import time
import os.path
import logging
import numpy
import re
import subprocess

from sandbox.projects import resource_types
from sandbox.sandboxsdk import errors
from sandbox.sandboxsdk import paths
from sandbox.sandboxsdk import parameters
from sandbox.sandboxsdk import process
from sandbox.sandboxsdk import sandboxapi

from sandbox.projects.common.search import settings as media_settings
from sandbox.projects.common import decorators
from sandbox.projects.common import string
from sandbox.projects.common import utils
from sandbox.projects.images.metasearch import resources as images_metasearch_resources
from sandbox.projects.images.resources import task as resources_task
from sandbox.projects.images.resources import eventlog as resources_eventlog


_INDEX_TYPES = {
    media_settings.INDEX_MIDDLE: (
       (media_settings.INDEX_ALL, media_settings.ImagesSettings.COLLECTION_ALL),
       (media_settings.INDEX_MAIN, media_settings.ImagesSettings.COLLECTION_MAIN),
       (media_settings.INDEX_QUICK, media_settings.ImagesSettings.COLLECTION_QUICK),
       (media_settings.INDEX_ULTRA, media_settings.ImagesSettings.COLLECTION_ULTRA),
       (media_settings.INDEX_GARBAGE, media_settings.ImagesSettings.COLLECTION_MAIN),
       (media_settings.INDEX_CBIR_MAIN, media_settings.ImagesSettings.COLLECTION_CBIR),
#       (media_settings.INDEX_CBIR_QUICK, media_settings.ImagesSettings.COLLECTION_CBIR), urgent fix because of stopped production_sas_imgfcbr
       (media_settings.INDEX_CBIR_GARBAGE, media_settings.ImagesSettings.COLLECTION_CBIR),
    ),
    media_settings.INDEX_MIDDLE_RQ: (
       (media_settings.INDEX_RQ, media_settings.ImagesSettings.COLLECTION_MAIN),
    ),
}
# URL contains relev=cbirmethod=2...
_DUP_QUERY_RE = re.compile(r"relev=([^&]+;)?cbirmethod%3D2")
# URL contains relev=cbirmethod=4...
_SIMILAR_QUERY_RE = re.compile(r"relev=([^&]+;)?cbirmethod%3D4")

_WEB_SLOW_QUERY_THRESHOLD = 128.0  # consider web queries slower than 128 ms as slow
_WEB_SLOW_SOURCE_THRESHOLD = 40.0  # consider search queries slower than 40ms as a slow


class IndexTypeParameter(parameters.SandboxStringParameter):
    name = 'index_type'
    description = 'Index type'
    choices = [
        ('Main', media_settings.INDEX_MIDDLE),
        ('Related queries', media_settings.INDEX_MIDDLE_RQ),
    ]
    default_value = media_settings.INDEX_MIDDLE


class FuncFilteredQueriesFile(resources_eventlog.QueriesFile):
    def __init__(self, index_type, collection, variant, predicate):
        resources_eventlog.QueriesFile.__init__(self, index_type, collection, variant)
        self.__predicate = predicate

    def _test_query(self, data):
        if not resources_eventlog.QueriesFile._test_query(self, data):
            return False

        if not self.__predicate(data["url"]):
            return False

        return True

    def validate(self, min_counters):
        pass  # TODO: validate number of requests too


class ProcFilteredQueriesFile(resources_eventlog.QueriesFile):
    def open(self, task):
        tool_path = task.sync_resource(utils.get_and_check_last_released_resource_id(
            resource_types.IMAGES_QUERY_FILTER_EXECUTABLE,
            arch=task.arch
        ))
        self._real_file = open(self.queries_path, "w")
        self._proc = process.run_process(
            [tool_path],
            stdin=subprocess.PIPE, stdout=self._real_file,
            log_prefix="query_filter", outputs_to_one_file=False,
            wait=False
        )
        self._file = self._proc.stdin
        self._counter = 0

    def close(self):
        self._file.close()
        try:
            # try graceful shutdown first
            waitfor = time.time() + 300
            while time.time() < waitfor:
                rc = self._proc.poll()
                if rc is not None:
                    logging.info("Filter shutted down with code {}".format(rc))
                    return
                time.sleep(2)
            logging.warning("Failed to shutdown filter process, terminating")

            # terminate process
            utils.terminate_process(self._proc)
        finally:
            self._real_file.close()

    def validate(self, min_counters):
        pass  # TODO: validate number of requests too


class WebSlowQueriesFile(resources_eventlog.QueriesFile):
    def _test_query(self, data):
        if not resources_eventlog.QueriesFile._test_query(self, data):
            return False

        if data["web"] and data["full_delta"] >= _WEB_SLOW_QUERY_THRESHOLD:
            if numpy.median(data["source_delta"].get("IMAGES-search", [])) > _WEB_SLOW_SOURCE_THRESHOLD:
                return True
        return False

    def validate(self, min_counters):
        pass  # TODO: validate number of requests too


class ImagesLoadMiddlesearchResources(resources_task.ImagesProductionResourcesTask,
                                      resources_task.LoadMetasearchResourcesTask):
    """
        Loads queries, configuration files and rearrange data from production
    """

    type = 'IMAGES_LOAD_MIDDLESEARCH_RESOURCES'

    input_parameters = (IndexTypeParameter,) + resources_task.LoadMetasearchResourcesTask.input_parameters

    def on_enqueue(self):
        resources_task.LoadMetasearchResourcesTask.on_enqueue(self)

        meta_index_type = self.ctx[IndexTypeParameter.name]

        data_attributes = self.ctx[resources_task.DataAttrsParameter.name]
        data_attributes_dict = string.parse_attrs(data_attributes)
        self._create_data_resource(data_attributes_dict)
        self._create_config_resource(data_attributes_dict)

        if meta_index_type == media_settings.INDEX_MIDDLE:
            key = media_settings.ImagesSettings.testenv_resource_attributes(meta_index_type)[0]
            key_hamster = media_settings.ImagesSettings.testenv_resource_attributes(meta_index_type, is_hamster=True)[0]
            data_attributes_dict[key_hamster] = data_attributes_dict[key]
            del data_attributes_dict[key]
            self._create_config_resource(data_attributes_dict, is_hamster=True)

        queries_attributes = string.parse_attrs(self.ctx[resources_task.QueriesAttrsParameter.name])
        for queries_file in self.__get_queries_files():
            attributes = {k.format(queries_file.variant): v for k, v in queries_attributes.iteritems()}
            self._register_queries_resource(queries_file, attributes)

    def on_execute(self):
        meta_index_type = self.ctx[IndexTypeParameter.name]
        archived_instances = set([x.strip().lower() for x in utils.get_or_default(self.ctx, resources_task.InstancesWithArchivedPlan).split(',') if x])
        logging.info('Archived instances {}'.format(','.join(archived_instances)))

        # generate queries attributes
        queries_attributes = {}
        for queries_file in self.__get_queries_files():
            index_type = queries_file.index_type
            if index_type not in queries_attributes:
                queries_attributes[index_type] = self.__get_shard_attributes(index_type)

        # load resources
        for middle_instance in self._get_instances(meta_index_type, archived_instances=archived_instances):
            logging.info('Using instance {}'.format(middle_instance))
            try:
                self._load_config(middle_instance)
                self._load_data()
                archived_plan = ''
                if middle_instance[0] in archived_instances:
                    logging.info('Try to search archived plan for instance {}'.format(middle_instance[0]))
                    archived_plan = utils.get_and_check_last_resource_with_attribute(
                        images_metasearch_resources.IMAGES_MIDDLESEARCH_PLAN_ARCHIVE,
                        attr_name='instance', attr_value=middle_instance[0].lower())
                else:
                    logging.info('No archived plan for instance {}'.format(middle_instance[0]))
                self._load_queries(middle_instance, meta_index_type, self.__get_queries_files(), archived_plan)
            except Exception as e:
                logging.error("Failed to acquire queries from {}: {}".format(middle_instance, str(e)))
            else:
                break
        else:
            raise errors.SandboxTaskFailureError("Failed to acquire production data. See logs for details")

        if meta_index_type == media_settings.INDEX_MIDDLE:
            for middle_instance in self._get_instances(meta_index_type, is_hamster=True):
                logging.info('Using instance {} from hamster'.format(middle_instance))
                try:
                    self._load_config(middle_instance, is_hamster=True)
                except Exception as e:
                    logging.error("Failed to acquire queries from {} from hamster: {}".format(middle_instance, str(e)))
                else:
                    break
            else:
                raise errors.SandboxTaskFailureError("Failed to acquire hamster data. See logs for details")

        # wait for possible database load tasks
        utils.check_subtasks_fails(fail_on_first_failure=True)

        # update resource attributes: new style
        for queries_file in self.__get_queries_files():
            attributes = queries_attributes[queries_file.index_type]
            if not attributes:
                continue
            utils.set_resource_attributes(self.ctx[queries_file.queries_key], attributes)
            utils.set_resource_attributes(self.ctx[queries_file.plan_key], attributes)

    def _create_data_resource(self, attributes):
        self.create_resource(
            self.descr,
            self.__get_data_path(),
            images_metasearch_resources.IMAGES_MIDDLESEARCH_DATA,
            arch=sandboxapi.ARCH_ANY,
            attributes=attributes
        )

    def _load_data(self):
        meta_index_type = self.ctx[IndexTypeParameter.name]

        data_path = self.__get_data_path()
        rearrange_path = os.path.join(data_path, 'rearrange')
        pure_path = os.path.join(data_path, 'pure')

        paths.make_folder(rearrange_path, delete_content=True)
        paths.make_folder(pure_path, delete_content=True)

        # Merge additional ban resources (404, antispam, etc.)
        for resource_type in media_settings.ImagesSettings.middlesearch_data_resources(meta_index_type):
            resource_path = utils.sync_last_stable_resource(resource_type)
            paths.copy_path(resource_path, os.path.join(rearrange_path, os.path.basename(resource_path)))
            paths.chmod(os.path.join(rearrange_path, os.path.basename(resource_path)), 0o777, recursively=True)

    def __get_shard_attributes(self, index_type):
        if index_type in (media_settings.INDEX_ALL, media_settings.INDEX_ULTRA):
            return None

        attributes = self._get_middlesearch_shard_attributes(index_type)

        # ensure that database available
        snippetizer_index_type = self._get_basesearch_snippetizer_index_type(index_type)
        self._get_basesearch_database(index_type, attributes[media_settings.SHARD_INSTANCE_ATTRIBUTE_NAME])
        self._get_basesearch_database(
            snippetizer_index_type, attributes[media_settings.SNIP_SHARD_INSTANCE_ATTRIBUTE_NAME]
        )
        return attributes

    def __get_data_path(self):
        return self.abs_path("mmeta.data")

    @decorators.memoize
    def __get_queries_files(self):
        meta_index_type = self.ctx[IndexTypeParameter.name]

        result = []

        # main streams
        for base_index_type, collection_type in _INDEX_TYPES[meta_index_type]:
            result.append(resources_eventlog.QueriesFile(base_index_type, collection_type))
            result.append(resources_eventlog.ApphostQueriesFile(base_index_type, collection_type))

        if meta_index_type == media_settings.INDEX_MIDDLE:
            # various subsets of main and garbage streams filtered by simple pattern
            result.append(FuncFilteredQueriesFile(
                media_settings.INDEX_CBIR_MAIN,
                media_settings.ImagesSettings.COLLECTION_CBIR,
                "_" + media_settings.ImagesSettings.SERP_TYPE_FILTER_DUPS,
                lambda q: _DUP_QUERY_RE.search(q)
            ))
            result.append(FuncFilteredQueriesFile(
                media_settings.INDEX_CBIR_MAIN,
                media_settings.ImagesSettings.COLLECTION_CBIR,
                "_" + media_settings.ImagesSettings.SERP_TYPE_FILTER_SIMILAR,
                lambda q: _SIMILAR_QUERY_RE.search(q)
            ))
            result.append(FuncFilteredQueriesFile(
                media_settings.INDEX_CBIR_GARBAGE,
                media_settings.ImagesSettings.COLLECTION_CBIR,
                "_" + media_settings.ImagesSettings.SERP_TYPE_FILTER_DUPS,
                lambda q: _DUP_QUERY_RE.search(q)
            ))
            result.append(FuncFilteredQueriesFile(
                media_settings.INDEX_CBIR_GARBAGE,
                media_settings.ImagesSettings.COLLECTION_CBIR,
                "_" + media_settings.ImagesSettings.SERP_TYPE_FILTER_SIMILAR,
                lambda q: _SIMILAR_QUERY_RE.search(q)
            ))

            # various subsets of main and garbage streams filtered by external filters
            result.append(ProcFilteredQueriesFile(
                media_settings.INDEX_MAIN,
                media_settings.ImagesSettings.COLLECTION_MAIN,
                "_" + media_settings.ImagesSettings.SERP_TYPE_FILTER_ATTRS
            ))
            result.append(ProcFilteredQueriesFile(
                media_settings.INDEX_GARBAGE,
                media_settings.ImagesSettings.COLLECTION_MAIN,
                "_" + media_settings.ImagesSettings.SERP_TYPE_FILTER_ATTRS
            ))

            # slow web queries
            result.append(WebSlowQueriesFile(
                media_settings.INDEX_MAIN,
                media_settings.ImagesSettings.COLLECTION_MAIN,
                "_" + media_settings.ImagesSettings.SERP_TYPE_FILTER_WEB_SLOW
            ))
            result.append(WebSlowQueriesFile(
                media_settings.INDEX_GARBAGE,
                media_settings.ImagesSettings.COLLECTION_MAIN,
                "_" + media_settings.ImagesSettings.SERP_TYPE_FILTER_WEB_SLOW
            ))

        return result


__Task__ = ImagesLoadMiddlesearchResources
