# -*- coding: utf-8 -*-

import re
import logging
import random
import traceback
import urllib2
import httplib
import itertools

import sandbox.common.types.client as ctc
from sandbox.projects import resource_types
from sandbox.sandboxsdk import errors
from sandbox.sandboxsdk import parameters
from sandbox.sandboxsdk import sandboxapi
from sandbox.projects.common import cms
from sandbox.projects.common import dolbilka
from sandbox.projects.common.search import settings as media_settings
from sandbox.projects.common import utils
from sandbox.projects.common import string
from sandbox.projects.common.search.eventlog import load_eventlog
from sandbox.projects.common.search.response import cgi as response_cgi

_META_SOURCE_RE = re.compile(r'http://([\w\-]+)\.(?:yandex\.ru|search\.yandex\.net):(\d+)')
_DEFAULT_ATTRIBUTES = 'debug=yes'
_SNIPPET_INT_TAG = "itag_int_with_snippet_reqs"


class IndexTypeParameter(parameters.SandboxSelectParameter):
    """
        Тип индекса
    """

    name = 'index_type'
    description = 'Index type'
    choices = [(media_settings.INDEX_MAIN, media_settings.INDEX_MAIN),
               (media_settings.INDEX_QUICK, media_settings.INDEX_QUICK)]
    default_value = media_settings.INDEX_MAIN


class RegionParameter(parameters.SandboxSelectParameter):
    """
        Регион
    """

    name = 'region'
    description = 'Region'
    choices = (
        (media_settings.VLA_REGION, media_settings.VLA_REGION),
        (media_settings.SAS_REGION, media_settings.SAS_REGION),
        (media_settings.MAN_REGION, media_settings.MAN_REGION),
    )
    default_value = media_settings.VLA_REGION


class LogNameParameter(parameters.SandboxStringParameter):
    """
        Инстанс-тег для промежуточного поиска (инт или средний)
    """

    name = 'log_name'
    description = 'Mmeta/Int log name'
    default_value = 'current-eventlog-{meta_type}-{port}'


class MinSearchQueriesParameter(parameters.SandboxIntegerParameter):
    """
        Минимально ожидаемое количество загруженных запросов

        Если получено запросов меньше чем указанное значение,
        то задача завершается неудачей
    """

    name = 'min_search_queries'
    description = 'Minimum queries (search)'
    default_value = 10000


class MinSnippetsQueriesParameter(parameters.SandboxIntegerParameter):
    """
        Минимально ожидаемое количество загруженных запросов

        Если получено запросов меньше чем указанное значение,
        то задача завершается неудачей
    """

    name = 'min_snippets_queries'
    description = 'Minimum queries (snippets)'
    default_value = 10000


class AllQueriesAttrsParameter(parameters.SandboxStringParameter):
    """
        Какие атрибуты следует добавить к создаваемому ресурсу
        со всеми запросами
    """

    name = 'all_queries_attributes'
    description = 'Set attrs to all queries (e.g. attr1=v1, attr2=v2)'
    default_value = _DEFAULT_ATTRIBUTES

    queries_file = 'all_queries'
    queries_description = 'all queries'
    queries_filter = re.compile(r".*")


class SearchQueriesAttrsParameter(parameters.SandboxStringParameter):
    """
        Какие атрибуты следует добавить к создаваемому ресурсу
        с запросами поисковой стадии
    """

    name = 'search_queries_attributes'
    description = 'Set attrs to search queries (e.g. attr1=v1, attr2=v2)'
    default_value = _DEFAULT_ATTRIBUTES

    queries_file = 'search_queries'
    queries_description = 'only search queries'
    queries_filter = re.compile(r"(&|^)haha=da(&|$)")


class SnippetsQueriesAttrsParameter(parameters.SandboxStringParameter):
    """
        Какие атрибуты следует добавить к создаваемому ресурсу
        с запросами сниппетной стадии
    """

    name = 'snippets_queries_attributes'
    description = 'Set attrs to snippet queries (e.g. attr1=v1, attr2=v2)'
    default_value = _DEFAULT_ATTRIBUTES

    queries_file = 'snippets_queries'
    queries_description = 'only snippets queries'
    queries_filter = re.compile(r"(&|^)DF=da(&|$)")


class SnippetsFilterGenerationParameter(parameters.SandboxBoolParameter):
    name = 'snippets_filter_generation'
    description = 'Filter snippet queries by index generation'
    default_value = False


class RemoveSupermindControls(parameters.SandboxBoolParameter):
    name = 'remove_supermind_controls'
    description = 'Remove supermind controls (SEARCH-2050)'
    default_value = True
    group = 'LoadQueries subfields'


class BaseLoadMediaBasesearchResources(load_eventlog.LoadEventlog):
    """
        Задача по загрузке запросов с production
        и формированию из них ресурсов
    """

    common_parameters = (
        IndexTypeParameter,
        RegionParameter,
        LogNameParameter,
        MinSearchQueriesParameter,
        MinSnippetsQueriesParameter,
        SnippetsFilterGenerationParameter,
        RemoveSupermindControls,
    )

    queries_parameters = (
        AllQueriesAttrsParameter,
        SearchQueriesAttrsParameter,
        SnippetsQueriesAttrsParameter,
    )

    input_parameters = common_parameters + queries_parameters
    client_tags = ctc.Tag.Group.LINUX

    def on_execute(self):
        self._min_queries = self._get_min_queries()

        def key_function(h):
            return h['shard']

        region = self.ctx[RegionParameter.name]
        index_type = self.ctx[IndexTypeParameter.name]

        base_instances = self._get_base_instances(index_type, region)
        if not base_instances:
            raise errors.SandboxTaskFailureError('Basesearch not found')

        logging.info('Number of base instances: {}'.format(len(base_instances)))

        meta_type, meta_instances = self._get_meta_type_and_instances(index_type, region)
        if not meta_instances:
            raise errors.SandboxTaskFailureError('Meta not found')
        logging.info('Meta_instances: {}'.format(meta_instances))

        for shard_name, shard_instances in itertools.groupby(sorted(base_instances, key=key_function), key_function):
            shard_hosts = set((x['host'], x['port']) for x in shard_instances)

            logging.info('Trying shard {},\nShard hosts: {}'.format(shard_name, shard_hosts))

            shard_metas = self._filter_shard_metas(meta_instances, shard_hosts)

            logging.info('Shard metas: {}'.format(shard_metas))

            if not shard_metas:
                continue

            # Move int instances with snippets to the top
            # shard_metas.sort(
            #   key=lambda inst: not cms.is_instance_with_tag(inst["host"], inst["port"], _SNIPPET_INT_TAG))
            # logging.info('Shard metas (after sort): {}'.format(shard_metas))

            try:
                self._load_queries(shard_name, list(shard_hosts), shard_metas, meta_type)
                return
            except Exception:
                logging.info(traceback.format_exc())
                pass

        raise errors.SandboxTaskFailureError('Failed to load queries')

    def _get_min_queries(self):
        """
            Возвращает минимальное число запросов для каждого типа
        """

        return {
            SearchQueriesAttrsParameter.name: self.ctx[MinSearchQueriesParameter.name],
            SnippetsQueriesAttrsParameter.name: self.ctx[MinSnippetsQueriesParameter.name],
            AllQueriesAttrsParameter.name: (
                self.ctx[MinSearchQueriesParameter.name] +
                self.ctx[MinSnippetsQueriesParameter.name]
            )
        }

    def _get_basesearch_tag(self, index_type, region):
        """
            Возвращает тег для базового поиска
        """

        raise NotImplementedError()

    def _get_middlesearch_tag(self, index_type, region):
        """
            Возвращает тег для среднего метапоиска
        """

        raise NotImplementedError()

    def _get_meta_type_and_instances(self, index_type, region):
        """
            Возвращает тип среднего метапоиска и массив инстансов
        """

        meta_type, cms_configuration, instance_tag_name = self._get_middlesearch_tag(index_type, region)
        meta_instances = [
            (meta['host'], meta['port'], _get_meta_sources(meta['host'], meta['port']))
            for meta in cms.get_cms_instances(
                instance_tag_name=instance_tag_name, cms_configuration=cms_configuration)
        ]

        return meta_type, meta_instances

    def _get_base_instances(self, index_type, region):
        """
            Возвращает инстансы базового, по которым фильтровать eventlog
        """

        cms_configuration, instance_tag_name = self._get_basesearch_tag(index_type, region)
        return cms.get_cms_instances(instance_tag_name=instance_tag_name, cms_configuration=cms_configuration)

    def _get_plan_resource_type(self):
        """
           Возвращает тип атрибута для плана
        """

        raise NotImplementedError()

    def _get_plan_resource_path(self, queries_parameter):
        return self.abs_path(queries_parameter.queries_file) + '.plan.bin'

    def _get_queries_resource_path(self, queries_parameter):
        return self.abs_path(queries_parameter.queries_file) + '.txt'

    def _save_queries(self, queries_parameter, attributes):
        """
            Создаёт ресурс с запросами
        """

        description = '{}, {}'.format(self.descr, queries_parameter.queries_description)
        text_resource = self.create_resource(
            description,
            self._get_queries_resource_path(queries_parameter),
            resource_types.PLAIN_TEXT_QUERIES,
            arch=sandboxapi.ARCH_ANY,
            attributes=attributes
        )
        bin_resource = self.create_resource(
            description,
            self._get_plan_resource_path(queries_parameter),
            self._get_plan_resource_type(),
            arch=sandboxapi.ARCH_ANY,
            attributes=attributes
        )
        dolbilka.convert_queries_to_plan(text_resource.path, bin_resource.path)

    def _load_queries(self, shard_name, base_hosts, meta_hosts, meta_type):
        """
            Загружает запросы для указанных базовых
        """
        index_generation_filter = None
        if self.ctx[SnippetsFilterGenerationParameter.name]:
            index_generation_filter = _make_index_generation_filter(base_hosts)

        out_files = {}
        try:
            for queries_parameter in self.queries_parameters:
                if self.ctx[queries_parameter.name]:
                    out_files[queries_parameter.name] = {
                        'file': open(self._get_queries_resource_path(queries_parameter), "w"),
                        're': queries_parameter.queries_filter,
                        'length': 0,
                    }

            result = self._load_eventlog(
                meta_hosts, meta_type,
                eventlog_name=self.ctx[LogNameParameter.name],
                filter_args=(load_eventlog.make_url_filter(base_hosts), index_generation_filter, out_files)
            )
            if not result:
                raise errors.SandboxTaskFailureError('Failed to find required number of queries')
        finally:
            for name, out in out_files.iteritems():
                logging.info('Total number of queries {}: {}'.format(name, out['length']))
                out['file'].close()

        all_queries_file = self._get_queries_resource_path(AllQueriesAttrsParameter)
        _shuffle_file(all_queries_file, all_queries_file)

        for queries_parameter in self.queries_parameters:
            if self.ctx[queries_parameter.name]:
                self._save_queries(
                    queries_parameter,
                    _make_attributes_dict(shard_name, self.ctx[queries_parameter.name])
                )

    def _filter_eventlog(self, eventlog_path, url_filter, index_generation_filter, out_files):

        def _write(out, query):
            out['file'].write(query + '\n')
            out['length'] += 1

        all_queries_ready = False
        for line in eventlog_path:
            match = url_filter.search(line)
            if not match:
                continue
            query = match.group(1)

            query = response_cgi.remove_db_timestamp(query)

            if utils.get_or_default(self.ctx, RemoveSupermindControls):
                query = response_cgi.remove_supermind_controls(query)

            if utils.is_check_config_request(query):
                continue

            if index_generation_filter is not None and index_generation_filter.search(line):
                continue

            all_queries_ready = True
            for name, out in out_files.iteritems():
                # Skip 'all' queries
                if name == AllQueriesAttrsParameter.name:
                    continue

                # Write special queries
                if out['length'] < self._min_queries[name] and out['re'].search(query):
                    _write(out, query)
                    _write(out_files[AllQueriesAttrsParameter.name], query)

                all_queries_ready = all_queries_ready and out['length'] >= self._min_queries[name]

            if all_queries_ready:
                break

        for name, out in out_files.iteritems():
            logging.info("Current number of queries {}: {}".format(name, out['length']))

        return all_queries_ready

    def _filter_shard_metas(self, meta_instances, shard_hosts):
        shard_metas = [
            {"host": meta[0], "port": meta[1]}
            for meta in meta_instances if meta[2].intersection(shard_hosts)
        ]
        return shard_metas


def _get_index_generation(base_host, base_port):
    logging.info('Retrieving index generation from {}:{}'.format(base_host, base_port))
    try:
        base_url = 'http://{}:{}/yandsearch?ms=proto'.format(base_host, base_port)
        data = urllib2.urlopen(base_url).read()

        from sandbox.projects.common.base_search_quality.tree import meta_pb2
        report = meta_pb2.TReport()
        report.ParseFromString(data)
        return report.Head.IndexGeneration
    except (IOError, httplib.HTTPException) as e:
        logging.info('Retrieving index generation for {}:{} failed, {}'.format(base_host, base_port, e))
        return None


def _get_meta_sources(meta_host, meta_port):
    logging.info('Retrieving config for {}:{}'.format(meta_host, meta_port))
    try:
        meta_url = 'http://{}:{}/yandsearch?info=getconfig'.format(meta_host, meta_port)
        data = urllib2.urlopen(meta_url).read()
        return set((m.group(1), int(m.group(2))) for m in _META_SOURCE_RE.finditer(data))
    except (IOError, httplib.HTTPException) as e:
        logging.info('Retrieving config for {}:{} failed, {}'.format(meta_host, meta_port, e))
        return set()


def _make_index_generation_filter(base_hosts):
    for base_host, base_port in base_hosts:
        index_generation = _get_index_generation(base_host, base_port)
        if index_generation is not None:
            break
    else:
        raise Exception('Failed to get current index generation')

    logging.info('Current index generation: {}'.format(index_generation))
    return re.compile(r"(^|&)dh=[^,]+,(?!{})[^:]+:[^&]+(&|$)".format(index_generation))


def _make_attributes_dict(shard_name, attributes_string):
    attributes_dict = string.parse_attrs(attributes_string)
    attributes_dict[media_settings.SHARD_INSTANCE_ATTRIBUTE_NAME] = shard_name
    return attributes_dict


def _shuffle_file(in_filename, out_filename):
    logging.info('Shuffle {} to {}'.format(in_filename, out_filename))

    with open(in_filename) as in_file:
        lines = in_file.readlines()

    random.shuffle(lines)

    with open(out_filename, "w") as out_file:
        lines = out_file.writelines(lines)
