# -*- coding: utf-8 -*-

import itertools
import logging
import json

import yaml
import os

import sandbox.projects.common.app_host.converter as apphost_converter
from sandbox.projects import resource_types
from sandbox.projects.common import apihelpers
from sandbox.projects.common import error_handlers as eh
from sandbox.projects.common import file_utils as fu
from sandbox.projects.common import link_builder as lb
from sandbox.projects.common import utils
from sandbox.projects.common.base_search_quality import basesearch_response_parser as brp
from sandbox.projects.common.base_search_quality import response_diff_task as rdt
from sandbox.projects.common.differ import printers
from sandbox.projects.common.sdk_compat import task_helper
from sandbox.projects.common.search import bugbanner
from sandbox.projects.common.search.response.diff import protodiff as prt
from sandbox.projects.websearch.middlesearch import resources as ms_res
from sandbox.sandboxsdk import environments
from sandbox.sandboxsdk import parameters
from sandbox.sandboxsdk import paths
from sandbox.sandboxsdk.channel import channel
from sandbox.sandboxsdk.svn import Arcadia
from sandbox.sandboxsdk.task import SandboxTask

SEPARATOR = "-" * 10


def responses_param(n):
    class Responses(parameters.LastReleasedResource):
        name = "basesearch_responses{}_resource_id".format(n)
        description = "Binary responses #{}".format(n)
        resource_type = [
            resource_types.SEARCH_PROTO_RESPONSES,
            resource_types.PROXY_WIZARD_ANSWERS,
        ]
    return Responses


Responses1 = responses_param(1)
Responses2 = responses_param(2)


class WriteCompactDiff(parameters.SandboxBoolParameter):
    name = "write_compact_diff"
    description = "Write diff in compact form"
    group = "Diff options"
    default_value = True


class UseFakeQueries(parameters.SandboxBoolParameter):
    name = "use_fake_queries"
    description = "Use fake queries"
    group = "Debug options"
    default_value = False


class AllowDifferentQueries(parameters.SandboxBoolParameter):
    """For SEARCH-4373"""
    name = "allow_different_queries"
    description = "Allow different queries"
    group = "Debug options"
    default_value = False


class CustomSkipFields(parameters.ResourceSelector):
    name = "custom_skip_fields"
    description = "Custom SKIP_FIELDS for protodiff"
    resource_type = [resource_types.CUSTOM_PROTODIFF_SKIP_FIELDS]
    group = "Comparing parameters"
    required = False


class FailOnDifferentQueries(parameters.SandboxBoolParameter):
    name = "fail_on_different_queries"
    description = "Whether to fail if queries are different (makes sense only if allow_different_queries == True)"
    group = "Comparing parameters"
    default_value = True
    required = False


class RelevFactorsIndexesForSoftCheck(parameters.SandboxStringParameter):
    name = "relev_factors_for_soft_check"
    description = "','-separated factors indexes. Use soft difference check for given factors in _RelevFactors field"
    group = "Comparing parameters"
    default_value = ""
    required = False


class IgnoreDiffInCompressedAllFactors(parameters.SandboxBoolParameter):
    name = "ignore_diff_in_compressed_all_factors"
    description = "Disable difference check for _CompressedAllFactors field"
    group = "Comparing parameters"
    default_value = False
    required = False


class IgnoreDiffInDocRankingFactors(parameters.SandboxBoolParameter):
    name = "ignore_diff_in_doc_ranking_factors"
    description = "Disable difference check for DocRankingFactors field"
    group = "Comparing parameters"
    default_value = False
    required = False


class CompareBinaryResponses(bugbanner.BugBannerTask):
    """
        **Описание**

            Сравнивает ответы базового поиска в бинарном виде.
            Выдает дифф для каждого запроса в виде html-страницы. Плюс компактный дифф - опционально.
            Использует результаты задачи `GET_BASESEARCH_RESPONSES <https://nda.ya.ru/3R7cz6>`_

        **Ресурсы**

            *Обязательные*

            * Binary responses #1 (типа SEARCH_PROTO_RESPONSES или PROXY_WIZARD_ANSWERS)
            * Binary responses #2 (типа SEARCH_PROTO_RESPONSES или PROXY_WIZARD_ANSWERS)

        **Параметры**

        * **Write diff in compact form** - создавать ли дифф в компактной форме,
        * **Write diff txt form (for debug)** - рекомендуется использовать только в целях дебага вывода в html-формате,
        * **Queries number per 1 output file** - количество запросов в одном выходном файле диффа

    """
    type = "COMPARE_BINARY_RESPONSES"

    input_parameters = (
        Responses1,
        Responses2,
        WriteCompactDiff,
        rdt.QueriesPerFileParameter,
        rdt.FailOnDiffParameter,
        rdt.cmp_params.CompareCompleteResponsesOnly,
        rdt.cmp_params.IgnoreUnanswered,
        CustomSkipFields,
        FailOnDifferentQueries,
        UseFakeQueries,
        AllowDifferentQueries,
        RelevFactorsIndexesForSoftCheck,
        IgnoreDiffInCompressedAllFactors,
        IgnoreDiffInDocRankingFactors,
    )
    execution_space = 30 * 1024  # 30 Gb
    cores = 1
    environment = (environments.PipEnvironment('protobuf', '2.6.1', use_wheel=True),)

    @property
    def footer(self):
        return [{
            "helperName": "",
            "content": {"<h3>Result</h3>": {
                "header": [
                    {"key": "relevant_diff", "title": "Relevant diff props"},
                    {"key": "diff_resource", "title": "Diff resource link"},
                ],
                "body": {
                    "relevant_diff": ["</br>".join(sorted(self.ctx.get("relevant_diff", ["-"])))],
                    "diff_resource": [lb.resource_link(self.ctx.get("out_resource_id", 0))]
                }
            }}
        }]

    def on_enqueue(self):
        SandboxTask.on_enqueue(self)
        channel.task = self
        self.ctx["out_resource_id"] = self.create_resource(
            "{}, html diff".format(self.descr), "diff",
            resource_types.BASESEARCH_RESPONSES_COMPARE_RESULT
        ).id
        # self.ctx["profiler_result_resource_id"] = self.create_resource(
        #     "Profiler_result", "profiler_result.log",
        #     resource_types.PROFILE_LOG
        # ).id

    def on_execute(self):
        self.add_bugbanner(bugbanner.Banners.SearchTests)

        path_to_diffs = channel.sandbox.get_resource(self.ctx["out_resource_id"]).path
        paths.make_folder(path_to_diffs)
        self.ctx["diff_count"] = 0

        resp1 = channel.sandbox.get_resource(self.ctx[Responses1.name])
        resp2 = channel.sandbox.get_resource(self.ctx[Responses2.name])

        if utils.check_resources_by_md5(resp1, resp2, path_to_diffs):
            self.ctx["compare_result"] = True
            return

        self.diff_creator_parallel(path_to_diffs, resp1, resp2)

    def diff_creator_parallel(self, path_to_diffs, resp1, resp2):
        """New, fast parallel diff creator"""
        meta_pb_pack_path = self.create_meta_pb_package()
        printer = printers.PrinterToHtml(
            path_to_diffs,
            write_compact_diff=self.ctx.get(WriteCompactDiff.name),
            pair_head_template="response {obj_index}",
            obj_head_template="query"
        )
        protodiff = prt.Protodiff(
            printer,
            only_complete=self.ctx.get(rdt.cmp_params.CompareCompleteResponsesOnly.name, False),
            ignore_unanswered=self.ctx.get(rdt.cmp_params.IgnoreUnanswered.name, False),
            meta_pb_pack_path=meta_pb_pack_path
        )
        protodiff.set_factor_names(rdt.get_and_check_factor_names(resp1.id, resp2.id))

        # Redefine protodiff.SKIP_FIELDS
        if self.ctx.get(CustomSkipFields.name):
            json_file = self.sync_resource(self.ctx[CustomSkipFields.name])
            protodiff.set_skip_fields(yaml.load(open(json_file)))

        relev_factor_indexes_for_soft_check = utils.get_or_default(self.ctx, RelevFactorsIndexesForSoftCheck).split(",")
        protodiff.set_factors_check_params(
            utils.get_or_default(self.ctx, IgnoreDiffInCompressedAllFactors),
            utils.get_or_default(self.ctx, IgnoreDiffInDocRankingFactors),
            set(map(int, relev_factor_indexes_for_soft_check if relev_factor_indexes_for_soft_check[0] != "" else [])),
            {}
        )

        self.ctx["diff_count"] = 0
        self.ctx["compare_result"] = True
        self.ctx["relevant_diff"] = []
        self.ctx["relevant_diff_for_testenv"] = []

        query_generator = self._query_generator(resp1, resp2)
        if query_generator is None:
            if utils.get_or_default(self.ctx, FailOnDifferentQueries):
                eh.check_failed("Queries for responses1 and responses2 are different")
            no_diff_name = os.path.join(path_to_diffs, utils.NO_DIFF_NAME)
            fu.write_file(no_diff_name, utils.no_diff_detected())
            return

        cmp_data_bundle = itertools.izip(
            self._response_generator(resp1),
            self._response_generator(resp2),
            query_generator,
        )

        protodiff.compare_pairs(cmp_data_bundle)

        self.ctx["diff_count"] = protodiff.get_diff_count()
        self.ctx["compare_result"] = protodiff.get_diff_count() == 0
        self.ctx["relevant_diff"] = find_relevant_diff(printer.get_compact_diff())
        self.ctx["relevant_diff_for_testenv"] = self.ctx["relevant_diff"]
        if len(json.dumps(self.ctx["relevant_diff_for_testenv"])) > 100 * 1024:
            # если размер поля, которое передаётся из sandbox в testenv ("relevant_diff") превышает некоторый лимит (256КБ?),
            # то зависают все задачи в базе (к которой относится тест)
            self.ctx["relevant_diff_for_testenv"] = True

    def create_meta_pb_package(self):
        pack_path = self.abs_path("meta_pb_pack")
        Arcadia.export("arcadia:/arc/trunk/arcadia/search/idl/generated", pack_path)
        logging.debug("Pack path = %s", pack_path)
        # SEARCH-7148
        # copypasted this code from projects/RefreshTestingConfigGenerate/__init__.py
        # hope, that can help to use correct protobuf package
        # map(sys.modules.pop, [n for n in sys.modules if n.startswith("google.protobuf")])
        # google = sys.modules["google"] = types.ModuleType("google")
        # google.__dict__["__path__"] = [os.path.join(site.USER_SITE, "google")]
        return pack_path

    def _response_generator(self, response_res):
        path = self.sync_resource(response_res)
        return brp.load_binary_responses(path)

    def _get_queries_id(self, task):
        try:
            return task.ctx.get("queries_resource_id")
        except Exception:
            return task.Parameters.requests

    def _query_generator(self, resp1, resp2):
        if utils.get_or_default(self.ctx, UseFakeQueries):
            return itertools.repeat("")

        task1 = task_helper.task_obj(resp1.task_id)
        task2 = task_helper.task_obj(resp2.task_id)
        queries1_res_id = self._get_queries_id(task1)
        queries2_res_id = self._get_queries_id(task2)
        queries1_res = None if queries1_res_id is None else task_helper.resource_obj(queries1_res_id)
        queries2_res = None if queries2_res_id is None else task_helper.resource_obj(queries2_res_id)
        if queries1_res is None or queries2_res is None:
            return itertools.repeat("")

        if queries1_res.file_md5 == queries2_res.file_md5:
            return self._prepare_queries(queries1_res)

        if utils.get_or_default(self.ctx, AllowDifferentQueries):
            q1 = self._prepare_queries(queries1_res)
            q2 = self._prepare_queries(queries2_res)
            return ("query1: {}\nquery2: {}".format(i, j) for i, j in itertools.izip_longest(q1, q2))

    def _prepare_queries(self, queries_res):
        queries_path = task_helper.resource_sync_path(queries_res)
        if queries_res.type == ms_res.WebMiddlesearchApphostRequests:
            return self._prepare_apphost_queries(queries_path)
        else:
            return self._prepare_cgi_queries(queries_path)

    def _prepare_cgi_queries(self, queries_path):
        return fu.read_line_by_line(queries_path)

    def _prepare_apphost_queries(self, queries_path):
        queries = fu.read_line_by_line(queries_path)
        apphost_converter_path = self.sync_resource(apihelpers.get_last_resource(
            resource_type=resource_types.APP_HOST_TOOL_CONVERTER_EXECUTABLE,
        ).id)
        return (apphost_converter.convert_input_request(apphost_converter_path, q) for q in queries)

    def get_short_task_result(self):
        if self.is_completed():
            if self.ctx["compare_result"]:
                return "no diff"
            else:
                return str(self.ctx.get("diff_count", " ")) + "diff"
        return None


def find_relevant_diff(compact_diff_items):
    # считаем, что если в диффе содержится такая подстрока - он не влияет на выдачу
    paths_to_exclude = ["debug"]
    result = []
    for head, _ in compact_diff_items:
        for field in paths_to_exclude:
            if field not in head:
                result.append(head)
                break
    return result


__Task__ = CompareBinaryResponses
