# -*- coding: utf-8 -*-

import re
import cgi
import json
import urllib
import itertools
import logging
import os

from collections import defaultdict

from sandbox.sandboxsdk.channel import channel
from sandbox.sandboxsdk.parameters import ResourceSelector
from sandbox.sandboxsdk.task import SandboxTask

from sandbox.projects.common import error_handlers as eh
import sandbox.projects.common.file_utils as fu
import sandbox.projects.common.search.response.cgi as sr_cgi
import sandbox.projects.common.utils as utils
from sandbox.projects.common.differ import printers, json_differ
from sandbox.projects import resource_types
from sandbox import sdk2
from sandbox.sandboxsdk import parameters as sp
from sandbox.sandboxsdk import paths

from sandbox.projects.websearch import CalcEventlogStats


class CompareEventlogStatsResult(sdk2.resource.AbstractResource):
    """
        resource with stats created by CompareEventlogStats task
    """
    auto_backup = True


class COMPARE_SUBSOURCE_REQUESTS_RESULT(sdk2.Resource):
    """
        https://st.yandex-team.ru/MIDDLE-103
    """
    any_arch = True


class CompareParams(object):
    class EventlogStats1(ResourceSelector):
        name = 'eventlog_stats_1'
        description = '1st stats'
        resource_type = resource_types.EVENTLOG_JSON_STATS
        required = True

    class EventlogStats2(ResourceSelector):
        name = 'eventlog_stats_2'
        description = '2nd stats'
        resource_type = resource_types.EVENTLOG_JSON_STATS
        required = True

    class SubSourceRequests1(ResourceSelector):
        name = 'subsource_requests_1'
        description = '1st subsource requests'
        resource_type = CalcEventlogStats.EVENTLOG_SUBSOURCE_REQUESTS
        required = False

    class SubSourceRequests2(ResourceSelector):
        name = 'subsource_requests_2'
        description = '2nd subsource requests'
        resource_type = CalcEventlogStats.EVENTLOG_SUBSOURCE_REQUESTS
        required = False

    class CompareEventlogStatsStoreMode(sp.SandboxStringParameter):
        name = 'compare_eventlog_stats_store_mode'
        description = 'where to store compared stats'
        choices = [
            ('context', 'context'),
            ('resource', 'resource'),
        ]
        default_value = 'resource'

    params = (
        EventlogStats1,
        EventlogStats2,
        SubSourceRequests1,
        SubSourceRequests2,
        CompareEventlogStatsStoreMode,
    )


class CompareEventlogStats(SandboxTask):
    """
        **Описание**

            Получает два файла со статистикой по эвентлогам, сравнивает их,
            и результат пишет в табличку.

        **Создаваемые ресурсы**

            MIDDLESEARCH_PERFORMANCE_COMPARE_RESULT - сохраняет параметр с наибольшим отклонением.
            COMPARE_SUBSOURCE_REQUESTS_RESULT - дифф запросов к источникам (MIDDLE-43)
    """

    type = 'COMPARE_EVENTLOG_STATS'
    cores = 1
    input_parameters = CompareParams.params + (CalcEventlogStats.CompareSubSourceRequests,)
    execution_space = 5 * 1024  # 5 Gb
    result = {}

    _RESULT_KEY = 'compare_result'
    _RESULT_RESOURCE_ID_KEY = 'result_resource_id'
    _SUBSOURCE_REQUESTS_DIFF_RESOURCE_ID_KEY = 'result_subsource_requests_diff'

    @property
    def footer(self):
        if self._store_mode == "context":
            msg = "Sorry, result is not counted yet"
            content = self._get_footer_from_ctx(self.ctx[self._RESULT_KEY]) if self._RESULT_KEY in self.ctx else msg
            return [{
                'helperName': '',
                'content': content
            }]

    @property
    def _store_mode(self):
        return utils.get_or_default(self.ctx, CompareParams.CompareEventlogStatsStoreMode)

    @staticmethod
    def _get_footer_from_ctx(ctx_result):
        return [
            {
                field.replace(".", " "): val_list[i] for field, val_list in ctx_result.iteritems()
            } for i in range(0, 3)  # assume, that results is a list of three elements
        ]

    def on_enqueue(self):
        SandboxTask.on_enqueue(self)
        self.ctx['out_resource_id'] = self._create_resource(
            self.descr, 'compare_result.txt',
            resource_types.MIDDLESEARCH_PERFORMANCE_COMPARE_RESULT
        ).id
        if self._store_mode == 'resource':
            self.ctx[self._RESULT_RESOURCE_ID_KEY] = self._create_resource(
                self.descr, 'compare_result.json',
                CompareEventlogStatsResult
            ).id

        if self.ctx.get(CalcEventlogStats.CompareSubSourceRequests.name, False):
            self.ctx[self._SUBSOURCE_REQUESTS_DIFF_RESOURCE_ID_KEY] = self.create_resource(
                "{}, subsource requests diff".format(self.descr), "diff",
                COMPARE_SUBSOURCE_REQUESTS_RESULT
            ).id

    def on_execute(self):
        json_stats_1 = fu.json_load(
            self.sync_resource(self.ctx[CompareParams.EventlogStats1.name])
        )
        json_stats_2 = fu.json_load(
            self.sync_resource(self.ctx[CompareParams.EventlogStats2.name])
        )
        self.result = _compare_stats(json_stats_1, json_stats_2)
        self._save_result()

        if self.ctx.get(CalcEventlogStats.CompareSubSourceRequests.name, False):
            self._compare_subsource_requests()

    def _save_result(self):
        resource_path = channel.sandbox.get_resource(self.ctx['out_resource_id']).path
        if not self.result:
            self.ctx['max_abs_diff'] = 0
            fu.write_file(resource_path, "No results")
            eh.check_failed("Results are empty!")
        max_abs_diff = max(
            self.result.items(),
            key=lambda (i, j): abs(j[2]) if j[2] is not None else 0,
        )
        with open(resource_path, 'w') as f:
            f.write(str(max_abs_diff))
        self.ctx['max_abs_diff'] = max_abs_diff[1][2]
        if self._store_mode == 'context':
            self.ctx[self._RESULT_KEY] = self.result
        elif self._store_mode == 'resource':
            result_filename = channel.sandbox.get_resource(self.ctx[self._RESULT_RESOURCE_ID_KEY]).path
            fu.json_dump(result_filename, self.result)
        else:
            eh.check_failed('unknown compare_eventlog_stats_store_mode: "{}"'.format(self._store_mode))

    def _compare_subsource_requests(self, requests_to_compare=100):
        # requests_infos = list of objects { "request_cgi": <str>, "subsource_requests": { "<SubSourceType>": { "url": <str>, "source_num": <str> }... } }
        requests_infos1 = fu.json_load(
            self.sync_resource(self.ctx[CompareParams.SubSourceRequests1.name])
        )
        requests_infos2 = fu.json_load(
            self.sync_resource(self.ctx[CompareParams.SubSourceRequests2.name])
        )

        infos1 = {info['request_cgi']: info for info in requests_infos1}
        infos2 = {info['request_cgi']: info for info in requests_infos2}
        request_cgis_common = set(infos1) & set(infos2)

        if len(request_cgis_common) < 10:
            # len(requests_infos1) == 200, порядок запросов должен примерно совпадать
            logging.warning('Failed to match two sequences of frames: len(sequence) == {}, '
                               'but matched only {} frames'.format(len(requests_infos1), len(request_cgis_common)))

        request_cgis_common = list(request_cgis_common)[:requests_to_compare]
        subsource_requests_strings = []
        requests_by_stage_with_no = defaultdict(lambda: [[], [], None])
        for i, info in enumerate([infos1, infos2]):
            subsource_requests = [info[request]['subsource_requests'] for request in request_cgis_common]
            subsource_requests, requests_by_stage = transform_requests(subsource_requests)
            subsource_requests = map(json.dumps, subsource_requests)
            subsource_requests_strings.append(subsource_requests)
            for stage in requests_by_stage:
                requests_by_stage_with_no[stage][i] = map(json.dumps, requests_by_stage[stage])

        request_titles = [
            {
                'request (incoming)': cgi.escape(request),
                'frame_id1': infos1[request]['_debug_frame_id'],
                'frame_id2': infos2[request]['_debug_frame_id'],
            }
            for request in request_cgis_common
        ]

        cmp_data_generator_by_stage = dict()
        for stage in requests_by_stage_with_no:
            cmp_data_generator_by_stage[stage] = itertools.izip(
                requests_by_stage_with_no[stage][0],
                requests_by_stage_with_no[stage][1],
                request_titles
            )
        path_to_diffs_folder = channel.sandbox.get_resource(self.ctx[self._SUBSOURCE_REQUESTS_DIFF_RESOURCE_ID_KEY]).path
        paths.make_folder(path_to_diffs_folder)
        cmp_data_generator_by_stage['All'] = itertools.izip(
            subsource_requests_strings[0],
            subsource_requests_strings[1],
            request_titles
        )
        requests_by_stage_with_no['All'] = subsource_requests_strings

        stages = ['Search', 'Allfactors', 'Snippets', 'Aux', 'All']
        for stage in requests_by_stage_with_no:
            if stage not in stages:
                logging.debug("_compare_subsource_requests: found unknown stage: %s", stage)
                continue
            path_to_diffs = os.path.join(path_to_diffs_folder, stage)
            paths.make_folder(path_to_diffs)
            printer = printers.PrinterToHtml(path_to_diffs, write_compact_diff=False)
            differ = json_differ.JsonDiffer(printer)
            differ.compare_pairs(cmp_data_generator_by_stage[stage])


def _soft_round(numberOrNone):
    if numberOrNone is None:
        return None
    try:
        return round(numberOrNone)
    except Exception as e:
        eh.log_exception('Expected number, found "{}"'.format(str(type(e))), e)
    return None


def _compare_stats(json_stats_1, json_stats_2):
    keys = set(json_stats_1.keys() + json_stats_2.keys())
    result = {}
    for key in keys:
        v1 = _soft_round(json_stats_1.get(key))
        v2 = _soft_round(json_stats_2.get(key))
        if v1 > 0 and v2 > 0:
            diff = round(100. * (v2 - v1) / v1, 2)
        elif v1 == 0 and v2 == 0:
            diff = 0
        else:
            diff = None
        result[key] = [v1, v2, diff]

    return result


###############################################################################
# для метода _compare_subsource_requests

def transform_requests(subsource_requests_multiply_frames):
    # requests: list of objects { "<SubSourceType>": { "url": <str>, "source_num": <str> }... }
    for subsource_requests in subsource_requests_multiply_frames:
        for subsource_type, request_info in subsource_requests.iteritems():
            url = request_info["url"]
            subsource_requests[subsource_type] = url_to_json(url)

    # group together SubSourceAux, SubSourceSearch, ...
    result_all = []
    results_by_stage = defaultdict(list)
    for subsource_requests in subsource_requests_multiply_frames:
        result = {}
        for key, requests in subsource_requests.iteritems():
            if key.count('_') == 0:
                result[key] = requests
            else:
                request_type = re.sub('^SubSource', '', key[:key.index('_')])
                source_name = key[key.index('_') + 1:]
                result.setdefault(request_type, {})[source_name] = requests
        for stage in result:
            results_by_stage[stage].append(result[stage])
        result_all.append(result)
    return result_all, results_by_stage


def url_to_json(url):
    # Cgi params with value as k1=v1;k2=v2
    cgi_sub_param_names = {
        'pron',
        'relev',
        'rearr',
    }
    params = list(sr_cgi.split_to_cgi(
        url,
        unquote=True,
        url=True,
        encode_to_ascii=True,
        sub_param_names=cgi_sub_param_names))
    cgi_dict = dict()
    multiple_ordinal_cgis = ['text', 'dh', "g"]
    ignored_cgis = ['ruid', 'push_port', 'req_path']
    for k, v in params:
        if k in ignored_cgis:
            continue
        if k in cgi_sub_param_names:
            for subparam_key, subparam_value in v:
                if subparam_value == '':
                    subparam_value = None
                else:
                    subparam_value = urllib.quote(subparam_value)
                cgi_dict.setdefault(k, dict()).setdefault(subparam_key, list()).append(subparam_value)
        elif k in multiple_ordinal_cgis:
            # order will be ignored
            v = urllib.quote(v)
            cgi_dict.setdefault(k, {})[v] = None
        else:
            v = urllib.quote(v)
            cgi_dict.setdefault(k, []).append(v)

    # fold [None] lists in just None for parameters with no values
    for k in cgi_dict:
        v = cgi_dict[k]
        if isinstance(v, dict):
            for subparam_key in v:
                if isinstance(v[subparam_key], list) and len(v[subparam_key]) == 1 and v[subparam_key][0] is None:
                    cgi_dict[k][subparam_key] = None
        else:
            if isinstance(v, list) and len(v) == 1 and v[0] is None:
                cgi_dict[k] = None
    return cgi_dict


__Task__ = CompareEventlogStats
