# -*- coding: utf-8 -*-
import logging
import os.path
import json
import urllib2

from sandbox.sandboxsdk.channel import channel
from sandbox.sandboxsdk import process
from sandbox.sandboxsdk import parameters as sp
from sandbox.sandboxsdk import paths

from sandbox.projects import resource_types

from sandbox.projects.common import config_processor as cfgproc
from sandbox.projects.common import dolbilka
from sandbox.projects.common import error_handlers as eh
from sandbox.projects.common import file_utils as fu
from sandbox.projects.common import utils

from sandbox.projects.common.search import bugbanner
from sandbox.projects.common.search import components as sc
from sandbox.projects.images.metasearch import resources as images_metasearch_resources


STATS_INLINE_ID = 'stats'
STATS_RESOURCE_ID = 'out_resource_id'

# Make ports separated a bit (see epic bug in SEARCH-2453)
_MIDSEARCH_PORT = sc.DEFAULT_MIDDLESEARCH_PORT
_INTSEARCH_PORT = sc.DEFAULT_MIDDLESEARCH_PORT + 10
_FAKEBASE_PORT = sc.DEFAULT_MIDDLESEARCH_PORT + 20

_EVENTLOG_PATH_CTX_ID = 'eventlog_out_path'
_EVENTLOG_RESOURCE_CTX_ID = 'eventlog_out_resource_id'
_QUERY_CACHE_PATH_CTX_ID = 'query_cache_path'
_QUERY_CACHE_RESOURCE_CTX_ID = 'query_cache_res_id'
_ACTUAL_REQUESTS_CTX = 'actual_requests'

_QUERY_OPTIONS_GROUP = 'Query Options'


class QueryCacheMode:
    SAVE = 'save_query_cache'
    LOAD = 'load_query_cache'
    AUTO = 'auto_query_cache'


class EvlogCachehitParams:
    class Binary(sp.ResourceSelector):
        name = 'middlesearch_evlogcachehit_resource_id'
        description = 'Evlogcachehit'
        resource_type = resource_types.EVLOGCACHEHIT_EXECUTABLE
        group = 'Middlesearch parameters'
        required = True

    class Regions(sp.SandboxStringParameter):
        name = 'cachehit_by_regions'
        description = 'Separate cachehit for regions (e.g. "1,2,3")'
        group = _QUERY_OPTIONS_GROUP
        default_value = ''

    class SkipRequestsRatio(sp.SandboxFloatParameter):
        name = 'evlogcachehit_skip_requests'
        description = 'Skip requests ratio for computing stats, from [0, 1)'
        group = _QUERY_OPTIONS_GROUP
        default_value = 0.9

    class RequestLimit(sp.SandboxIntegerParameter):
        name = 'request_limit'
        description = 'Limit requests (0 - shoot all from plan)'
        group = _QUERY_OPTIONS_GROUP
        default_value = 1000000

    class DexecutorThreads(sp.SandboxIntegerParameter):
        name = 'dexecutor_threads'
        description = 'Limit dexecutor threads (0 - equals to number of cpus)'
        group = _QUERY_OPTIONS_GROUP
        default_value = 0

    class AggregationMode(sp.SandboxStringParameter):
        BY_NAME = 'by_name'
        BY_NUMBER = 'by_number'

        name = 'aggregation_mode'
        description = 'Aggregate statistics'
        group = _QUERY_OPTIONS_GROUP
        choices = [('By source name', BY_NAME), ('By source number', BY_NUMBER)]
        default_value = BY_NUMBER

    class SaveEventlog(sp.SandboxBoolParameter):
        name = 'save_eventlog'
        description = 'Save eventlog'
        group = _QUERY_OPTIONS_GROUP
        default_value = False

    class QueryCacheResource(sp.ResourceSelector):
        name = _QUERY_CACHE_RESOURCE_CTX_ID
        description = 'Query cache resource id'
        resource_type = resource_types.QUERIES_CACHE
        group = 'Query Options'

    class QueryCache(sp.SandboxStringParameter):
        name = "query_cache"
        description = 'Query cache save/load option'
        choices = [
            ('Save query cache', QueryCacheMode.SAVE),
            ('Load existing query cache', QueryCacheMode.LOAD),
            ('Do nothing', QueryCacheMode.AUTO),
        ]
        sub_fields = {QueryCacheMode.LOAD: [_QUERY_CACHE_RESOURCE_CTX_ID]}
        default_value = QueryCacheMode.AUTO
        group = 'Query Options'

    params = (
        Binary,
        Regions,
        SkipRequestsRatio,
        RequestLimit,
        DexecutorThreads,
        AggregationMode,
        SaveEventlog,
        QueryCache,
        QueryCacheResource,
        dolbilka.DolbilkaExecutorMode,
        dolbilka.DolbilkaFixedRps,
    )


class IntConfigParameter(sp.ResourceSelector):
    name = 'intsearch_config_resource_id'
    description = 'Intsearch config:'
    resource_type = [
        resource_types.MIDDLESEARCH_CONFIG,
        images_metasearch_resources.IMAGES_MIDDLESEARCH_CONFIG,
        resource_types.VIDEO_MIDDLESEARCH_CONFIG
    ]
    group = 'Middlesearch parameters'
    required = True


# TODO: replace RequestLimit with DolbilkaExecutorRequestsLimit
class MiddlesearchEvlogCachehitTask(bugbanner.BugBannerTask):
    """
        Базовый таск для получения статистики по кешхиту метапоиска
        Общий класс для обстрела production и тестовых конфигураций среднего
    """
    required_ram = 62 << 10

    @property
    def footer(self):
        if STATS_INLINE_ID not in self.ctx:
            return {"Calculating...": ""}

        header = [{"key": "stats", "title": "&nbsp;"}]
        body = {
            "stats": ["cache miss", "subsource requests"]
        }

        stats = self.ctx[STATS_INLINE_ID]
        incoming_requests = float(stats["all"]["incoming_requests"])
        sources_stats = stats["all"]["sources_stats"]
        for source in sorted(sources_stats.keys()):
            header.append({"key": source, "title": source})
            cache_miss_count = sources_stats[source]["cache_miss_count"]
            subsource_requests = sources_stats[source]["sub_source_requests"]
            body[source] = [
                "{} <span style='color:blue'>({:0.2f}%)</span>".format(
                    cache_miss_count, cache_miss_count / incoming_requests * 100.0
                ),
                subsource_requests
            ]

        return {
            "<h3>Cachehit stats</h3>": {
                "header": header,
                "body": body,
            }
        }

    def on_enqueue(self):
        skip_ratio = self.ctx[EvlogCachehitParams.SkipRequestsRatio.name]
        eh.ensure(skip_ratio >= 0 and skip_ratio < 1, "Bad skip ratio value {}".format(skip_ratio))
        bugbanner.BugBannerTask.on_enqueue(self)
        channel.task = self
        self.ctx[STATS_RESOURCE_ID] = self.create_resource(
            self.descr,
            'cachehit_stats.txt',
            resource_types.MIDDLESEARCH_CACHEHIT_STATS,
            arch='any'
        ).id

        self.ctx[_EVENTLOG_PATH_CTX_ID] = self.abs_path('event.log')

        if self.ctx[EvlogCachehitParams.SaveEventlog.name]:
            eventlog_resource = self.create_resource(
                self.descr,
                self.ctx[_EVENTLOG_PATH_CTX_ID],
                resource_types.EVENTLOG_DUMP,
                arch='any'
            )
            self.ctx[_EVENTLOG_RESOURCE_CTX_ID] = eventlog_resource.id

        self.ctx[_QUERY_CACHE_PATH_CTX_ID] = self.abs_path('query_cache')

        if self.ctx.get(EvlogCachehitParams.QueryCache.name) == QueryCacheMode.SAVE:
            query_cache_resource = self.create_resource(
                'saved query.cache for plan "{}"'.format(self._get_plan_name()),
                self.ctx[_QUERY_CACHE_PATH_CTX_ID],
                resource_types.QUERIES_CACHE,
                arch='any'
            )
            self.ctx[_QUERY_CACHE_RESOURCE_CTX_ID] = query_cache_resource.id

    def on_execute(self):
        # For backward compatibility
        if dolbilka.DolbilkaExecutorRequestsLimit.name not in self.ctx:
            self.ctx[dolbilka.DolbilkaExecutorRequestsLimit.name] = self.ctx[EvlogCachehitParams.RequestLimit.name]
        if dolbilka.DolbilkaMaximumSimultaneousRequests not in self.ctx:
            self.ctx[dolbilka.DolbilkaMaximumSimultaneousRequests.name] = utils.get_or_default(
                self.ctx,
                EvlogCachehitParams.DexecutorThreads
            )
        if not self.ctx.get(dolbilka.DolbilkaMaximumSimultaneousRequests.name):
            self.ctx[dolbilka.DolbilkaMaximumSimultaneousRequests.name] = self.client_info['ncpu']

        self.add_bugbanner(bugbanner.Banners.WebMiddleSearch)

        plan_file = self._get_plan_path()

        query_cache = self.ctx[_QUERY_CACHE_PATH_CTX_ID]

        query_cache_mode = self.ctx.get(EvlogCachehitParams.QueryCache.name)
        if query_cache_mode in (QueryCacheMode.SAVE, QueryCacheMode.AUTO):
            paths.make_folder(query_cache, delete_content=True)
        elif query_cache_mode == QueryCacheMode.LOAD:
            remote_path = self.sync_resource(self.ctx[_QUERY_CACHE_RESOURCE_CTX_ID])
            paths.copy_path(remote_path, query_cache)
            paths.add_write_permissions_for_path(query_cache)

        event_log = self.ctx[_EVENTLOG_PATH_CTX_ID]
        paths.remove_path(event_log)

        self._create_middlesearch_component(event_log, query_cache)

        self._start_middlesearch_component()
        self._shoot_middlesearch_component(plan_file)
        self._stop_middlesearch_component()

        self._run_evlogcachehit(event_log)

    def _get_ms_port_for_shooting(self):
        return sc.DEFAULT_MIDDLESEARCH_PORT

    def _start_middlesearch_component(self):
        """
            Запускает средний
        """

        raise NotImplementedError()

    def _stop_middlesearch_component(self):
        """
            Останавливает средний
        """

        raise NotImplementedError()

    def _get_source_names(self):
        """
            Возвращает список имён источников полученный из конфигурационного файла среднего
        """

        raise NotImplementedError()

    def _get_plan_path(self):
        """
            Возвращает путь к файлу с планом
        """

        raise NotImplementedError()

    def _get_plan_name(self):
        """
            Возвращает имя ресурса с планом
        """

        raise NotImplementedError()

    def _shoot_middlesearch_component(self, plan):
        """
            Обстрел среднего указанным планом
        """
        executor = dolbilka.DolbilkaExecutor()  # executor reads it's parameters from context
        dump_path = self.abs_path(os.path.split(plan)[1]) + '.dump'
        dump_file = executor.run_session(plan, dump=dump_path, port=self._get_ms_port_for_shooting())
        dumper_stat = executor.run_dumper(dump_path, 'Dolbilka result stat ({0})'.format(self.descr))
        self.ctx[_ACTUAL_REQUESTS_CTX] = int(dumper_stat['requests'])
        eh.ensure(self.ctx[_ACTUAL_REQUESTS_CTX], "Zero requests have been shot at middlesearch")
        os.unlink(dump_file)

    def _run_evlogcachehit(self, eventlog_path):
        """
            Обработка evlogdump
        """

        binary_path = self.sync_resource(self.ctx[EvlogCachehitParams.Binary.name])
        skip_requests = int(self.ctx[EvlogCachehitParams.SkipRequestsRatio.name] * self.ctx[_ACTUAL_REQUESTS_CTX])
        regions = self.ctx[EvlogCachehitParams.Regions.name]

        cmd = [
            binary_path,
            '--skip_requests', str(skip_requests),
            '--input', eventlog_path
        ]

        search_source_names = self._get_source_names()
        aggregation_mode = utils.get_or_default(self.ctx, EvlogCachehitParams.AggregationMode)

        if aggregation_mode == EvlogCachehitParams.AggregationMode.BY_NUMBER:
            cmd += ['--sources_count', str(len(search_source_names))]
        elif aggregation_mode == EvlogCachehitParams.AggregationMode.BY_NAME:
            cmd += ['--sources_names', ','.join(search_source_names)]

        if regions:
            datadir = self.sync_resource(channel.task.ctx[sc.DefaultMiddlesearchParams.Data.name]),
            cmd += [
                '--regions', regions,
                '--geotree', os.path.join(datadir, 'pure', 'geo.c2p')
            ]

        temp_file = self.abs_path('evlog-cachehit.stats.temp')
        with open(temp_file, "w+") as f:
            process.run_process(
                cmd,
                stdout=f,
                log_prefix='evlogcachehit',
                wait=True,
                timeout=1200,
                outputs_to_one_file=False,
            )
            f.seek(0)
            stats = json.load(f)

        # replace source indexes with a human readable names
        self._process_evlogcachehit_output(stats, search_source_names)
        store_stats(self, stats)

        self.ctx['cachehit'] = stats['all']['sources_stats']['all']
        self.ctx['incoming_requests'] = stats['all']['incoming_requests']

    def _process_evlogcachehit_output(self, stats, search_source_names):
        for region, region_stats in stats.iteritems():
            new_sources_stats = {}
            for client_num, source_stats in region_stats['sources_stats'].iteritems():
                if client_num != 'all':
                    client_num = search_source_names[int(client_num)]
                new_sources_stats[client_num] = source_stats
            region_stats['sources_stats'] = new_sources_stats

    def get_results(self):
        if not self.is_completed():
            return None

        stats = self.ctx.get('cachehit', None)
        if not stats:
            return None

        return 'Subsource requests: %s. Cache miss count: %s' % \
            (stats['sub_source_requests'], stats['cache_miss_count'])

    def get_short_task_result(self):
        if not self.is_completed():
            return None

        stats = self.ctx.get('cachehit', None)
        if stats:
            return str(stats['sub_source_requests'])


class TestMiddlesearchEvlogCachehit(MiddlesearchEvlogCachehitTask):
    """
        Базовый таск для получения статистики по кешхиту метапоиска.

        Проверка производится на 'синтетическом' тесте
        http://wiki.yandex-team.ru/JandeksPoisk/KachestvoPoiska/Middlesearch/Cachehit
    """

    input_parameters = (
        sc.DefaultMiddlesearchParams.params +
        (IntConfigParameter,) +
        EvlogCachehitParams.params
    )

    def _create_middlesearch_component(self, event_log, query_cache):
        self.midsearch = sc.get_middlesearch(
            basesearches=[
                {
                    'basesearch_type': name,
                    'hosts_and_ports': [('localhost', _INTSEARCH_PORT)]
                } for name in self._get_source_names()
            ],
            disable_timeouts=True,
            event_log=event_log,
            load_log='/dev/null',
            query_cache=query_cache,
        )
        self.intsearch = sc.get_middlesearch_ex(
            is_int=True,
            binary_id=self.ctx[sc.DefaultMiddlesearchParams.Binary.name],
            config_id=self.ctx[IntConfigParameter.name],
            data_id=None,
            archive_model_id=None,
            basesearches=[
                {'hosts_and_ports': [('localhost', _FAKEBASE_PORT)]}
            ],
            disable_timeouts=True,
            port=_INTSEARCH_PORT,
            event_log='/dev/null',
            load_log='/dev/null',
            query_cache=None,
            use_verify_stderr=True,
        )

    def _get_ms_port_for_shooting(self):
        if utils.get_or_default(self.ctx, sc.DefaultMiddlesearchParams.ApphostMode):
            return sc.DEFAULT_MIDDLESEARCH_PORT + 1
        return sc.DEFAULT_MIDDLESEARCH_PORT

    def _start_middlesearch_component(self):
        self.midsearch.start()
        self.intsearch.start()

        self.midsearch.wait()
        self.intsearch.wait()

    def _stop_middlesearch_component(self):
        self.midsearch.stop()
        self.intsearch.stop()

    # Note: We need a unique sources only to perform a correct replacement in fake config.
    # AuxSource's will be removed automatically
    def _get_source_names(self):
        config_path = self.sync_resource(self.ctx[sc.DefaultMiddlesearchParams.Config.name])
        source_names = cfgproc.get_parameter_values(fu.read_file(config_path), 'Collection/SearchSource/ServerDescr')
        return list(set(source_names))


def store_stats(task, stats):
    """
        Store stats both in context and resource
    """
    resource = channel.sandbox.get_resource(task.ctx[STATS_RESOURCE_ID])
    fu.json_dump(resource.path, stats, indent=4, sort_keys=True)
    task.ctx[STATS_INLINE_ID] = stats


def load_stats(task):
    """
        Load stats from context or directly from resource
    """
    if STATS_INLINE_ID in task.ctx:
        return task.ctx[STATS_INLINE_ID]
    else:
        stats = download_resource_file(task.ctx[STATS_RESOURCE_ID])
        return json.loads(stats) if stats is not None else None


def download_resource_file(resource_id, file_name=None):
    """
        Выкачивает файл ресурса. Для отображения текста ресурса во view таска.
        Если ресурс не удалось выкачать, то возвращается None.

        :param resource_id: id ресурса
        :param file_name: имя файла внутри ресурса, если ресурс представляет собой директорию
        :return: содержимое файла ресурса
    """
    urls = channel.sandbox.get_resource_http_links(resource_id)
    logging.debug("Try to load resource from links: %s", urls)
    while len(urls) > 0:
        url = urls[0] if not file_name else "{}/{}".format(urls[0], file_name)
        urls = urls[1:]
        try:
            return urllib2.urlopen(url, timeout=5).read()
        except Exception:
            if len(urls) == 0:
                logging.info("Can't load resource '%s' from links", resource_id)
                break
