import logging
import re
import subprocess

from sandbox.projects import resource_types
from sandbox.sandboxsdk import errors
from sandbox.sandboxsdk import parameters
from sandbox.sandboxsdk import process
from sandbox.sandboxsdk import sandboxapi
from sandbox.sandboxsdk.channel import channel

from sandbox.projects.common import dolbilka
from sandbox.projects.common import string
from sandbox.projects.common import utils
from sandbox.projects.common.search import settings as media_settings
from sandbox.projects.common.search import performance as search_performance
from sandbox.projects.common.search.eventlog import eventlog as search_eventlog
from sandbox.projects.common.search.eventlog import load_eventlog
from sandbox.projects.common.search.response import cgi as search_cgi
from sandbox.projects.tank.load_resources import resources as tank_resources
from sandbox.projects.images.metasearch import task as metasearch_task
from sandbox.projects.images.resources import fuzzing


_FILTER_TYPES = {
    "all": re.compile(r".*"),
    "search": re.compile(r"(&|^)haha=da(&|$)"),
    "snippets": re.compile(r"(&|^)DF=da(&|$)"),
    "factors": re.compile(r"(&|^)allfctrs=da(&|$)"),
}
_DEFAULT_RPS = 300  # Current average speed on production machines
_PLAN_SIZE_DEMAND = 1000  # Minimal number of queries in each generated plan
_PLAN_SIZE_TOTAL = 600000  # Maximum total number of queries to read from eventlog

QUERIES_RESOURCE_KEY = "{}_ptq_resource_id"
PLAN_RESOURCE_KEY = "{}_plan_resource_id"

BASESEARCH_PARAMS = metasearch_task.BASESEARCH_PARAMS
SNIPPETIZER_PARAMS = metasearch_task.SNIPPETIZER_PARAMS


class PlanParameter(parameters.ResourceSelector):
    name = 'dolbilo_plan_resource_id'
    description = 'Plan'
    group = dolbilka.DOLBILKA_GROUP
    resource_type = resource_types.IMAGES_MIDDLESEARCH_PLAN
    required = True


class AttributesParameter(parameters.SandboxStringParameter):
    name = 'attributes'
    description = 'Set additional attrs to resources (ex.: attr1=v1, attr2=v2)'
    do_not_copy = True


class DemandedPlanTypesParameter(parameters.SandboxStringParameter):
    name = 'demanded_plan_types'
    description = 'Plans to check emptiness (ex.: search,snippets)'


class ImagesGenerateBasesearchRequests(search_performance.OldShootingTask,
                                       fuzzing.GenerateFuzzingTask,
                                       metasearch_task.BaseMiddlesearchTask):
    """
        Shoot basesearch via middlesearch and extract basesearch requests from eventlog
    """

    type = 'IMAGES_GENERATE_BASESEARCH_REQUESTS'

    shoot_input_parameters = \
        (PlanParameter,) + \
        search_performance.OldShootingTask.shoot_input_parameters

    metasearch_task_input_parameters = metasearch_task.BaseMiddlesearchTask.input_parameters

    static_input_parameters = (AttributesParameter, DemandedPlanTypesParameter) + \
        fuzzing.GenerateFuzzingTask.input_parameters

    input_parameters = \
        static_input_parameters + metasearch_task_input_parameters + shoot_input_parameters

    execution_space = 200 * 1024
    early_database_auto_selection = False

    def initCtx(self):
        self.ctx['kill_timeout'] = 6 * 60 * 60

    def on_enqueue(self):
        metasearch_task.BaseMiddlesearchTask.on_enqueue(self)
        fuzzing.GenerateFuzzingTask.on_enqueue(self)

        attributes = string.parse_attrs(self.ctx[AttributesParameter.name])
        for filter_type in _FILTER_TYPES:
            self.__create_output_resources(filter_type, attributes)

    def _fuzzing_dir(self):
        index_type = media_settings.ImagesSettings.basesearch_database_index_type(
            self.ctx, metasearch_task.BASESEARCH_PARAMS.Database
        )
        if index_type == media_settings.INDEX_RQ:
            return "extsearch/images/related_queries/base/fuzzing"
        elif index_type == media_settings.INDEX_QUICK:
            return "extsearch/images/base/fuzzing/quick"
        elif index_type == media_settings.INDEX_MAIN:
            return "extsearch/images/base/fuzzing/main"
        elif index_type == media_settings.INDEX_CBIR_MAIN:
            return "extsearch/images/base/fuzzing/cbir"
        else:
            raise errors.SandboxTaskFailureError("fuzzing unssuported for index_type={}".format(index_type))

    def _get_queries_parameter(self):
        return PlanParameter

    def _get_middlesearch(self):
        middlesearch = metasearch_task.BaseMiddlesearchTask._get_middlesearch(self)
        middlesearch.disable_cache()
        return middlesearch

    def _use_components(self, basesearch, snippetizer, intsearch, middlesearch):
        self.__update_output_resources()
        self._init_virtualenv(tank_resource_type=tank_resources.YANDEX_TANK_VIRTUALENV_19)
        self._old_shoot(middlesearch, self.ctx[PlanParameter.name])

    def _after_components(self, basesearch, snippetizer, intsearch, middlesearch):
        self.__extract_queries(
            middlesearch.get_event_log_path(),
            (("localhost", basesearch.port), ("localhost", snippetizer.port))
        )
        self.__generate_plans()

    def __extract_queries(self, eventlog_path, base_hosts):
        self._fuzzing_corpus_init()

        url_filter = load_eventlog.make_url_filter(base_hosts)
        out_files = {filter_type: open(self.__get_output_path(filter_type), "w") for filter_type in _FILTER_TYPES}

        query_counter = {}
        for plan_type in utils.get_or_default(self.ctx, DemandedPlanTypesParameter).split(","):
            plan_type = plan_type.strip()
            if plan_type:
                query_counter[plan_type] = 0

        try:
            evlogdump = process.run_process(
                [search_eventlog.get_evlogdump(), "-o", "-i", "300", eventlog_path],
                stdout=subprocess.PIPE,
                wait=False,
                outputs_to_one_file=False,
                log_prefix="evlogdump"
            )
            try:
                total_query_count = 0
                for line in evlogdump.stdout:
                    match = url_filter.search(line)
                    if not match:
                        continue
                    total_query_count += 1
                    if total_query_count > _PLAN_SIZE_TOTAL:
                        break

                    query = search_cgi.remove_supermind_controls(match.group(1))
                    for filter_type, filter_predicate in _FILTER_TYPES.iteritems():
                        if filter_predicate.search(query):
                            out_files[filter_type].write(query + "\n")
                            if filter_type in query_counter:
                                query_counter[filter_type] += 1
                    self._fuzzing_corpus_add(query)
            finally:
                rc = utils.terminate_process(evlogdump)
                logging.info("Exit code of evlogdump is {}".format(rc))
        finally:
            for file_obj in out_files.itervalues():
                file_obj.close()
            for filter_type, count in query_counter.iteritems():
                if count < _PLAN_SIZE_DEMAND:
                    logging.info("Too few requests of type \"%s\" (less than %s)", filter_type, _PLAN_SIZE_DEMAND)
                    logging.info("Demanded plans sizes: %s", repr(query_counter))
                    raise errors.SandboxTaskFailureError("Small plans to be generated. Thinking of another try.")

        # Commit fuzzing resources
        index_type = media_settings.ImagesSettings.basesearch_database_index_type(
            self.ctx, metasearch_task.BASESEARCH_PARAMS.Database
        )

        yamake_resources = [self.ctx[metasearch_task.BASESEARCH_PARAMS.Database.name]]
        models_resource_id = self.ctx[metasearch_task.BASESEARCH_PARAMS.ArchiveModel.name]

        if models_resource_id and index_type in (media_settings.INDEX_MAIN, media_settings.INDEX_QUICK):
            yamake_resources.append(models_resource_id)

        try:
            self._fuzzing_corpus_finish(*yamake_resources)
        except Exception as e:
            logging.info("Failed to commit fuzzing updates: {}".format(e))

    def __generate_plans(self):
        logging.info("Generating plans.")
        for filter_type in _FILTER_TYPES:
            dolbilka.convert_queries_to_plan(
                self.__get_output_path(filter_type),
                self.__get_output_path(filter_type, binary=True),
                rps=_DEFAULT_RPS
            )
        logging.info("Plans are generated.")

    def _get_shard_instance_keys(self):
        shard_attribute_name = media_settings.SHARD_INSTANCE_ATTRIBUTE_NAME
        basesearch_shard_instance = self._get_database_attribute(
            metasearch_task.BASESEARCH_PARAMS, shard_attribute_name
        )
        snippetizer_shard_instance = self._get_database_attribute(
            metasearch_task.SNIPPETIZER_PARAMS, shard_attribute_name
        )
        return basesearch_shard_instance, snippetizer_shard_instance

    # Note: Be carefull with 'shard_instance' attribute on 'all' queries.
    #       It's value will be correct for imgsearch only
    def __update_output_resources(self):
        logging.info("Updating output resources.")
        basesearch_shard_instance_attr, snippetizer_shard_instance_attr = self._get_shard_instance_keys()
        output_attribute_name = media_settings.SHARD_INSTANCE_ATTRIBUTE_NAME
        for filter_type in _FILTER_TYPES:
            attribute_value = (
                snippetizer_shard_instance_attr if filter_type == "snippets" else basesearch_shard_instance_attr
            )
            channel.sandbox.set_resource_attribute(
                self.ctx[QUERIES_RESOURCE_KEY.format(filter_type)], output_attribute_name, attribute_value)
            channel.sandbox.set_resource_attribute(
                self.ctx[PLAN_RESOURCE_KEY.format(filter_type)], output_attribute_name, attribute_value)

    def __create_output_resources(self, filter_type, attributes):
        logging.info("Creating output resources with filter_type={}".format(filter_type))
        attributes = {k.format(filter_type): v for k, v in attributes.iteritems()}
        description = "{}, {} requests".format(self.descr, filter_type)

        self.ctx[QUERIES_RESOURCE_KEY.format(filter_type)] = self.create_resource(
            description,
            self.__get_output_path(filter_type),
            resource_types.PLAIN_TEXT_QUERIES,
            arch=sandboxapi.ARCH_ANY,
            attributes=attributes
        ).id
        self.ctx[PLAN_RESOURCE_KEY.format(filter_type)] = self.create_resource(
            description,
            self.__get_output_path(filter_type, binary=True),
            resource_types.BASESEARCH_PLAN,
            arch=sandboxapi.ARCH_ANY,
            attributes=attributes
        ).id

    def __get_output_path(self, filter_type, binary=False):
        return "requests-{}.{}".format(filter_type, "plan" if binary else "txt")

    def _get_database_attribute(self, basesearch_params, attribute_name):
        logging.info("Getting database attribute.")
        database_resource = channel.sandbox.get_resource(self.ctx[basesearch_params.Database.name])
        if attribute_name not in database_resource.attributes:
            raise errors.SandboxTaskFailureError(
                "No '{}' attribute in database resource {}".format(attribute_name, database_resource.id))
        return database_resource.attributes[attribute_name]


__Task__ = ImagesGenerateBasesearchRequests
