import json
import os
import os.path
import logging
import random
import stat
import tempfile
import urllib2
import urlparse

from sandbox.projects.common.nanny import nanny

from sandbox.sandboxsdk import errors
from sandbox.sandboxsdk import process

from sandbox import sdk2

from sandbox.projects.common import decorators
from sandbox.projects.common import dolbilka
from sandbox.projects.common import string
from sandbox.projects.common import utils
from sandbox.projects.common.dolbilka import resources as resources_dolbilka
from sandbox.projects.common.search.settings import CollectionsSettings
from sandbox.projects.images.resources import eventlog as resources_eventlog
from sandbox.projects import resource_types as resources_common


_BASESEARCH_NANNY_SERVICE = "pdb_search_boards_base_priemka"
_MMETA_NANNY_SERVICE = "pdb_search_boards_mmeta_priemka"

_EVENTLOG_CURR_PATH = 'current-eventlog-{meta_type}-{port}'
_EVENTLOG_PREV_PATH = 'current-eventlog-{meta_type}-{port}.1'
_EVENTLOG_SSH_USER = "robot-imgdev"
_VAULT_OWNER = "IMAGES-SANDBOX"
_EVENTLOG_SSH_USER_PK_VAULT = "robot_imgdev_ssh_private_key"
_EVENTLOG_URL_PREFIX = "{host}:/logs"
_INDEX_TS = "index_timestamp"
_SHARD_ID = "shard_id"
_SEARCH_SOURCE = "PDB_SEARCH_BOARDS-search"
_SNIPPET_SOURCE = "PDB_SEARCH_BOARDS-fetch"

_MMETA_APPHOST_TAG = "mmeta_apphost"
_BS_SEARCH_TAG = "main_search"
_BS_SNIPPET_TAG = "main_snippets"


def _read_eventlog(evlogdump):
    data = None
    sub_sources = {}

    for line in evlogdump:
        tabs = line.strip().split('\t')
        if len(tabs) < 3:
            continue
        event_type = tabs[2]
        if event_type == 'CreateYSRequest':
            data = {
                'url': '',
                'apphost': '',
                'collection': '',
                'subsources': {},
                'requests': {},
            }
            sub_sources = {}
        elif not data:  # all other cases depends on dictionary with data
            continue
        elif event_type == 'AppHostRequest' and len(tabs) > 5:
            data.update({
                'apphost': tabs[5],
            })
        elif event_type == 'EnqueueYSRequest':
            data.update({
                'collection': tabs[6].lstrip('/'),
            })
        elif event_type in ('ContextCreated', 'FastCacheHit'):
            data.update({
                'url': tabs[3],
            })
        elif event_type == 'SubSourceInit':
            source_num = int(tabs[3])
            sub_sources[source_num] = '{}-{}'.format(tabs[6], tabs[7])
            if sub_sources[source_num] not in data['requests']:
                data['requests'][sub_sources[source_num]] = {}
            data['requests'][sub_sources[source_num]][source_num] = {
                'request': None,
                'ok': False
            }
        elif event_type == 'SubSourceRequest':
            source_num = int(tabs[3])
            data['requests'][sub_sources[source_num]][source_num]['request'] = tabs[7]
        elif event_type in ('SubSourceOk'):
            source_num = int(tabs[3])
            data['requests'][sub_sources[source_num]][source_num]['ok'] = True
        elif event_type == 'EndOfFrame':
            yield data
            data = None


class EventlogPlainTextQueriesResource:
    """
        Basesearch query resources
    """

    queries_resource_type = resources_common.PLAIN_TEXT_QUERIES
    plan_resource_type = resources_common.BASESEARCH_PLAN
    plan_loader_type = "plain"

    def __init__(self, task, tag, attrs, source_name, source_instances):
        self._attrs = {attr.format(tag) : attrs[attr] for attr in attrs}
        self._queries_resource = self.queries_resource_type(task,
                    "collections board requests, {}".format(tag), "{}.queries.txt".format(tag), **self._attrs)
        self._plan_resource = self.plan_resource_type(task,
                    "collections board requests plan, {}".format(tag), "{}.requests.plan".format(tag), **self._attrs)

        self._source_name = source_name
        self._source_instances = source_instances

        self._queries_output = open(str(sdk2.ResourceData(self._queries_resource).path), "w")
        self._record_count = 0

    def write(self, record):
        requests = record.get("requests", {}).get(self._source_name, {})
        for source in requests:
            if requests[source].get("ok", False):
                request = requests[source].get("request", "")
                if not request:
                    continue
                url = urlparse.urlparse(request)
                if url.netloc not in self._source_instances:
                    continue
                self._record_count += 1
                self._queries_output.write(url.path + "?" + url.query + "\n")
                break

    def close(self):
        self._queries_output.close()
        self._queries_resource.record_count = self._record_count
        self._plan_resource.record_count = self._record_count
        dolbilka.convert_queries_to_plan(
            str(str(sdk2.ResourceData(self._queries_resource).path)),
            str(str(sdk2.ResourceData(self._plan_resource).path)),
            loader_type=self.plan_loader_type
        )

class EventlogApphostQueriesResource(EventlogPlainTextQueriesResource):
    """
        Middlesearch apphost query resources
    """

    queries_resource_type = resources_dolbilka.DOLBILKA_STPD_QUERIES
    plan_loader_type = "phantom"

    def __init__(self, task, tag, attrs):
        self._attrs = {attr.format(tag) : attrs[attr] for attr in attrs}
        self._queries_resource = self.queries_resource_type(task,
                    "collections board requests, {}".format(tag), "{}.queries.txt".format(tag), **self._attrs)
        self._plan_resource = self.plan_resource_type(task,
                    "collections board requests plan, {}".format(tag), "{}.requests.plan".format(tag), **self._attrs)

        self._queries_output = open(str(sdk2.ResourceData(self._queries_resource).path), "w")
        self._record_count = 0

    def write(self, record):
        collection = record["collection"]
        query = record["apphost"]
        if query and collection == "all" or collection == "yandsearch":
            self._queries_output.write(resources_eventlog._make_apphost_request(query))
            self._record_count += 1


class CollectionsBoardsLoadMiddlesearchResources(sdk2.Task):
    """
        Loads queries from production
    """

    class Requirements(sdk2.Task.Requirements):
        disk_space = 60 * 1024

    class Parameters(sdk2.Task.Parameters):
        basesearch_nanny_service_name = sdk2.parameters.String("Basesearch Nanny Service Name", description="Nanny Service With Basesearch", default=_BASESEARCH_NANNY_SERVICE)
        mmeta_nanny_service_name = sdk2.parameters.String("Mmetasearch Nanny Service Name", description="Nanny Service With Middlesearch", default=_MMETA_NANNY_SERVICE)
        max_queries = sdk2.parameters.String("Max loading query count", default=10000)
        attributes = sdk2.parameters.String("Create and set attrs to data (e.g. attr1=v1, attr2=v2)")

    def on_execute(self):
        self.pk_file = None

        self.bs_instances = self._get_basesearch_instances_info()

        res_attrs = string.parse_attrs(self.Parameters.attributes)

        # load resources
        for middle_instance in self._get_live_instances(self._get_middlesearch_instances(), 20):
            logging.info('Using instance {}'.format(middle_instance))
            try:
                self._load_queries(middle_instance, res_attrs)
            except Exception as e:
                logging.error("Failed to acquire queries from {}: {}".format(middle_instance, str(e)))
            else:
                break
        else:
            raise errors.SandboxTaskFailureError("Failed to acquire production data. See logs for details")

    def __get_nanny_client(self):
        return nanny.NannyClient(
            api_url='http://nanny.yandex-team.ru/',
            oauth_token=sdk2.Vault.data('IMAGES-SANDBOX', 'nanny-oauth-token')
        )

    @decorators.retries(max_tries=3, delay=10)
    def __get_nanny_instances(self, service_id):
        nanny_client = self.__get_nanny_client()
        for instance in nanny_client.get_service_current_instances(service_id)['result']:
            logging.info("{} instance: {}".format(service_id, str(instance)))
        return [
            (instance['container_hostname'], instance['port'], instance['itags'])
            for instance in nanny_client.get_service_current_instances(service_id)['result']
        ]

    def _get_basesearch_instances(self):
        instances = self.__get_nanny_instances(self.Parameters.basesearch_nanny_service_name)
        logging.info("basesearch instances={}".format(instances))
        if not instances:
            raise errors.SandboxTaskFailureError("No hosts found for basesearch")
        return instances

    def _get_middlesearch_instances(self):
        instances =  self.__get_nanny_instances(self.Parameters.mmeta_nanny_service_name)
        logging.info("metasearch instances={}".format(instances))
        if not instances:
            raise errors.SandboxTaskFailureError("No hosts found for middlesearch")
        return instances

    def _get_basesearch_instances_info(self):
        """
            Returns basesearch instances info: shard, state
        """

        instances = self._get_live_instances(self._get_basesearch_instances())
        def instance_info(host_and_port_itags):
            try:
                logging.info('Instance info for {}'.format(host_and_port_itags))
                reply = urllib2.urlopen('http://{}:{}/?command=get_info_server'.format(host_and_port_itags[0], int(host_and_port_itags[1]) + 3))
                data = reply.read()
                timestamp = 0
                if data and 'result' in data:
                    result = json.loads(data)['result']
                    indexes = result['indexes']
                    for index in indexes:
                        if indexes[index]['type'] == 'FINAL':
                            timestamp = int(indexes[index]['timestamp'])
                shard_id = -1
                for itag in host_and_port_itags[2]:
                    if itag.startswith('OPT_shardid='):
                        shard_id = int(itag[len('OPT_shardid='):])
                return {_INDEX_TS : timestamp, _SHARD_ID : shard_id}
            except IOError:
                logging.info('Get instance info {} failed'.format(host_and_port))
                return {}

        return {"{}:{}".format(instance[0], instance[1]) : instance_info(instance) for instance in instances}

    def _get_live_instances(self, hosts, sample_size=-1):
        """
            Returns subset of live instances
        """

        def ping_instance(host_and_port):
            try:
                logging.info('Pinging instance {}'.format(host_and_port))
                urllib2.urlopen('http://{}:{}/yandsearch'.format(host_and_port[0], host_and_port[1]))
                return True
            except IOError:
                logging.info('Pinging instance {} failed'.format(host_and_port))
                return False


        live_hosts = [h for h in hosts if ping_instance(h)]
        if not live_hosts:
            raise errors.SandboxTaskFailureError("Could not find any working instance for middlesearch")

        if sample_size > 0 and len(live_hosts) > sample_size:
            live_hosts = random.sample(live_hosts, sample_size)

        return live_hosts

    def _copy_rsync(self, remote_path, local_path="./"):
        """
            Copies remote file using rsync
        """

        if not self.pk_file:
            self.temp_pk_file = tempfile.NamedTemporaryFile()
            self.temp_pk_file.write(sdk2.Vault.data(_VAULT_OWNER, _EVENTLOG_SSH_USER_PK_VAULT))
            self.temp_pk_file.flush()
            self.temp_pk_file.seek(0)
            self.pk_file = self.temp_pk_file.name
            # rsync requires password file to be not other-accessible
            os.chmod(self.pk_file, stat.S_IRUSR)

        cmd = [
            "rsync", "-vvv",
            "--rsh", "ssh -l {user} -i {pk_file}".format(user=_EVENTLOG_SSH_USER, pk_file=self.pk_file),
            remote_path, local_path
        ]
        logging.info("run>" + " ".join(cmd))
        process.run_process(cmd, wait=True, log_prefix="rsync")

    def _load_queries(self, middle_instance, attrs):
        host, port, itags = middle_instance

        queries_files = []
        queries_files.append(resources_eventlog.QueriesFile("main", "yandsearch"))

        shard_id = 0
        shard_instance = ""
        bs_instances = {instance : self.bs_instances[instance] for instance in self.bs_instances if self.bs_instances[instance].get(_SHARD_ID) == shard_id}
        logging.info("instance with shard id == 0: {}".format(str(bs_instances)))
        if len(bs_instances) > 0:
            shard_prefix = CollectionsSettings().basesearch_database_prefix()
            shard_instance = "{}-{}-{}".format(shard_prefix, shard_id, bs_instances[bs_instances.keys()[0]][_INDEX_TS])
        attrs["shard_instance"] = shard_instance

        query_resources = [
            EventlogPlainTextQueriesResource(self, _BS_SEARCH_TAG, attrs, _SEARCH_SOURCE, bs_instances),
            EventlogPlainTextQueriesResource(self, _BS_SNIPPET_TAG, attrs, _SNIPPET_SOURCE, bs_instances),
            EventlogApphostQueriesResource(self, _MMETA_APPHOST_TAG, attrs),
        ]

        log_names = (_EVENTLOG_CURR_PATH, _EVENTLOG_PREV_PATH)

        request_count = 0
        max_request_count = self.Parameters.max_queries

        try:
            for log_name in log_names:
                local_path = log_name.format(
                    meta_type="mmeta",
                    host=host,
                    port=port
                )
                try:
                    remote_path = os.path.join(_EVENTLOG_URL_PREFIX, log_name).format(
                        meta_type="mmeta",
                        host=host,
                        port=port
                    )
                    self._copy_rsync(remote_path, local_path)
                    with resources_eventlog.Evlogdump(local_path) as evlogdump:
                        for data in _read_eventlog(evlogdump):
                            for resource in query_resources:
                                resource.write(data)

                            request_count += 1
                            if request_count >= max_request_count:  # break inner loop
                                break
                finally:
                    os.remove(local_path)

                if request_count >= max_request_count:  # break outer loop
                    break
        finally:
            for resource in query_resources:
                resource.close()
