# -*- coding: utf-8 -*-

import os
import six
import logging

from sandbox.sandboxsdk.paths import make_folder
from sandbox.sandboxsdk.paths import remove_path
from sandbox.sandboxsdk.channel import channel
from sandbox.sandboxsdk import errors as se
from sandbox.sandboxsdk import parameters as sp
from sandbox.sandboxsdk import process
from sandbox.sandboxsdk import task

from . import realsearch
from . import basesearch_response_parser as brp
from . import response_diff
from .tree import htmldiff

from sandbox.common import errors
from sandbox.projects import resource_types
from sandbox.projects.common import file_utils as fu
from sandbox.projects.common import error_handlers as eh
from sandbox.projects.common import utils
from sandbox.projects.common import utils2
from sandbox.projects.common import apihelpers
from sandbox.projects.common.differ import coloring
from sandbox.projects.common.differ import printers
from sandbox.projects.common.search import bugbanner
from sandbox.projects.common.base_search_quality import node_patchers
from sandbox.projects.common.base_search_quality import node_types
from sandbox.projects.common.base_search_quality import threadPool
from sandbox.projects.common.search.response import state as response_state
from sandbox.projects.common.search.response.diff import side_by_side as sbs
from sandbox.projects.websearch.middlesearch import resources as ms_resources


class QueriesResourceParameter(sp.LastReleasedResource):
    name = 'queries_resource_id'
    description = 'Queries (optional)'
    resource_type = [
        resource_types.PLAIN_TEXT_QUERIES,
        ms_resources.WebMiddlesearchPlainTextQueries,
        resource_types.REQUESTS_FOR_NOAPACHEUPPER,
        resource_types.IMAGES_MIDDLESEARCH_PLAIN_TEXT_REQUESTS,
        resource_types.USERS_QUERIES,
        resource_types.VIDEO_MIDDLESEARCH_PLAIN_TEXT_REQUESTS,
    ]
    required = False


class BasesearchResponses1Parameter(sp.LastReleasedResource):
    name = 'basesearch_responses1_resource_id'
    description = 'Basesearch responses #1'
    resource_type = [
        resource_types.BASESEARCH_HR_RESPONSES,
        resource_types.SEARCH_HR_RESPONSES,
    ]


class BasesearchResponses2Parameter(sp.LastReleasedResource):
    name = 'basesearch_responses2_resource_id'
    description = 'Basesearch responses #2'
    resource_type = [
        resource_types.BASESEARCH_HR_RESPONSES,
        resource_types.SEARCH_HR_RESPONSES,
    ]


class SaveSimpleDiffParameter(sp.SandboxBoolParameter):
    name = 'save_simple_diff'
    description = 'Save simple diff'
    group = 'Diff options'
    default_value = False


class SaveSideBySideDiffParameter(sp.SandboxBoolParameter):
    name = 'save_side_by_side_diff'
    description = 'Save side-by-side diff'
    group = 'Diff options'
    default_value = False


class QueriesPerFileParameter(sp.SandboxIntegerParameter):
    name = 'queries_per_file'
    description = 'Queries number per 1 output file'
    group = 'Diff options'
    default_value = 100


class MaxDiffSizeParameter(sp.SandboxIntegerParameter):
    name = 'max_diff_size'
    description = 'Max diff size in bytes (0 - unlimited)'
    group = 'Diff options'
    default_value = 0


class MaxDiffFilesParameter(sp.SandboxIntegerParameter):
    name = 'max_diff_files'
    description = 'Max diff files (0 - unlimited)'
    group = 'Diff options'
    default_value = 0


class FailOnDiffParameter(sp.SandboxBoolParameter):
    name = 'fail_on_diff'
    description = 'Fail on diff'
    group = 'Diff options'
    default_value = False


class FailOnUncomparable(sp.SandboxBoolParameter):
    name = 'fail_on_uncomparable'
    description = 'Fail on uncomparable'
    group = 'Diff options'
    default_value = True


class MinResponseSizeParameter(sp.SandboxIntegerParameter):
    name = 'min_response_size'
    description = 'Minimal response size in bytes (0 - unlimited)'
    group = 'Debug'
    default_value = 5


def create_comparing_responses_params(group_name='Comparing parameters'):
    class ComparingParams(object):
        class OmitResponsesCheck(sp.SandboxBoolParameter):
            name = 'omit_responses_check'
            description = 'Do not check that responses are equal'
            group = group_name

        class RemoveIndexGen(sp.SandboxBoolParameter):
            name = 'remove_index_generation'
            description = 'Remove Head/IndexGeneration field before compare'
            group = group_name

        class CompareCompleteResponsesOnly(sp.SandboxBoolParameter):
            name = 'compare_complete_responses_only'
            description = 'Compare complete responses only'
            group = group_name

        class CompareSameUnanswered(sp.SandboxBoolParameter):
            name = 'compare_same_unanswered'
            description = 'Compare same unanswered responses'
            group = group_name

        class CompareSameUpperUnanswered(sp.SandboxBoolParameter):
            name = 'compare_same_upper_unanswered'
            description = 'Compare same uppersearch unanswered responses'
            group = group_name

        class IgnoreUnanswered(sp.SandboxBoolParameter):
            name = 'ignore_unanswered'
            description = 'Ignore unanswered responses'
            sub_fields = {
                'true': [
                    'compare_same_unanswered',
                    'compare_same_upper_unanswered',
                ]
            }
            group = group_name
            default_value = False

        class IgnoreUnansweredSnippets(sp.SandboxBoolParameter):
            name = 'ignore_unanswered_snippets'
            description = 'Ignore unanswered snippets'
            group = group_name

        class LimitUncomparedQueries(sp.SandboxIntegerParameter):
            name = 'limit_of_uncompared_queries'
            description = 'Limit of uncompared queries (in percent, no limit = 100)'
            group = group_name
            default_value = 100

        class FailOnExceedLimitUncomparedQueries(sp.SandboxBoolParameter):
            name = 'fail_on_exceed_limit_of_uncompared_queries'
            description = (
                'Fail on exceed uncompared queries limit task, else only set diff mark '
                '(testenv can reset this for avoid tests suspending on errors)'
            )
            group = group_name
            default_value = True

        params = (
            OmitResponsesCheck,
            RemoveIndexGen,
            CompareCompleteResponsesOnly,
            IgnoreUnansweredSnippets,
            IgnoreUnanswered,
            CompareSameUnanswered,
            CompareSameUpperUnanswered,
            LimitUncomparedQueries,
            FailOnExceedLimitUncomparedQueries,
        )

    return ComparingParams


cmp_params = create_comparing_responses_params()


class ChangedProps(htmldiff.ChangedProps):
    """
        changes in comparison with parent:
            - lists minimized by default
            - show amount of queries
            - show all queries
            - sort queries by number
    """

    max_deltas = 100

    def find_file_for_query(self, q_num, list_of_files):
        if "index.html" in list_of_files:
            return "index.html"
        for name in list_of_files:
            if "queries" in name:
                borders = name[8:-5].split("-")
                if (q_num <= int(borders[1])) and (q_num >= int(borders[0])):
                    return name

    def combine(self, another):
        for key, values in another.iteritems():
            old_values = self.get(key, set([]))
            self[key] = set(list(old_values | values))

    def write(self, file, full_path):
        def write_body():
            names_of_htmls = os.listdir(full_path)
            for key, values in sorted(self.items(), key=lambda x: len(x[1]), reverse=True):
                path, color = key
                if _filter_props(path):
                    continue
                if len(values) == 0:
                    htmldiff.WriteData(file, path, color)
                else:
                    file.write(htmldiff.StartBlock(
                        "{} (amount of deltas: {})".format(path, len(values)),
                        color,
                        open=False,
                    ))

                    for delta_number, delta_data in enumerate(sorted(values, key=lambda x: x[0])):
                        if delta_number > self.max_deltas:
                            htmldiff.WriteData(file, "Too many deltas. See detailed logs.", "yellow")
                            break
                        query_number, value = delta_data
                        go_to_full_diff = "<a href=\"{}\">go to full diff</a>".format(
                            self.find_file_for_query(query_number, names_of_htmls)
                        )

                        if len(values) > 1:
                            file.write(htmldiff.StartBlock(
                                "query: {}, {}".format(query_number, go_to_full_diff),
                                color,
                                open=False,
                            ))
                        else:
                            file.write("query: {}, {}".format(query_number, go_to_full_diff))

                        if type(value) == tuple:
                            values0, values1 = printers.match_string_color_changed(value[0], value[1])
                            file.write(printers.HtmlBlock.colored_data(values0))
                            file.write(printers.HtmlBlock.colored_data(values1))
                        else:
                            if value is None:
                                raise Exception(path)
                            htmldiff.WriteData(file, value, color)

                        if len(values) > 1:
                            file.write(htmldiff.EndBlock())

                    file.write(htmldiff.EndBlock())

        htmldiff.WriteDiff(file, write_body, addButtons=False, title="changed props")


class BaseCompareBasesearchResponses(bugbanner.BugBannerTask):
    """
        Сравнивает два выхлопа (базового) поиска, полученных в human-readable (hr=da) формате.
        Строит diff по запросам и по изменившимся свойствам.
        Использует результаты задачи GET_BASESEARCH_RESPONSES.
    """

    input_parameters = (
        QueriesResourceParameter,
        BasesearchResponses1Parameter,
        BasesearchResponses2Parameter,
        SaveSimpleDiffParameter,
        SaveSideBySideDiffParameter,
        QueriesPerFileParameter,
        MaxDiffSizeParameter,
        MaxDiffFilesParameter,
        FailOnDiffParameter,
        FailOnUncomparable,
        MinResponseSizeParameter,
    ) + cmp_params.params + threadPool.PARAMS

    @property
    def footer(self):
        if not self.is_completed():
            return None
        same_md5 = self.ctx.get('same_md5')
        if same_md5:
            return [{
                'helperName': '',
                'content': '<h4>Resources have same md5 = {}</h4>'.format(same_md5)
            }]
        diff_count = '<h4>{}</br>compared {} responses of {}</br><b>{}</b> <a href="{}">diff</a></h4>'.format(
            self.descr,
            self.ctx.get('compared_responses_count', '???'),
            self.ctx.get('num_of_queries', '???'),
            self.ctx.get('num_of_diffs', '???'),
            self.list_resources(resource_type='BASESEARCH_RESPONSES_COMPARE_RESULT')[0].remote_http()
        )
        if self.ctx.get('exceed_limit_uncompared_queries', False):
            diff_count += "\n<b style='color:{}'>Exceed limit uncompared queries</b>".format(coloring.DiffColors.bad)
        fields_to_show = sorted([key for key in self.ctx if 'uncompared_' in key])
        return [{
            'helperName': '',
            'content': {
                diff_count: {
                    "header": [
                        {"key": "Key", "title": "Key"},
                        {"key": "Value", "title": "Value"},
                    ],
                    "body": {
                        "Key": fields_to_show,
                        "Value": [self.ctx[k] for k in fields_to_show]
                    }
                }
            }
        }]

    def on_enqueue(self):
        task.SandboxTask.on_enqueue(self)
        resource = self.create_resource(
            self.descr, 'compare_result',
            resource_types.BASESEARCH_RESPONSES_COMPARE_RESULT,
            attributes={'backup_task': True},
        )
        self.ctx['out_resource_id'] = resource.id

        # speed tuning
        te_db = self.ctx.get("testenv_database", "")
        if "ws-base-" in te_db:
            self.execution_space = 10000  # 10G should be enough

    def _data_diff_found(self, msg):
        self.ctx['not_compatible_data'] = True
        if utils.get_or_default(self.ctx, FailOnUncomparable):
            eh.check_failed(msg)
        else:
            self.set_info("Comparison is impossible: " + msg)
            no_diff_name = os.path.join(self._get_diff_path(), utils.NO_DIFF_NAME)
            fu.write_file(no_diff_name, utils.no_diff_detected())

    @property
    def _omit_responses_check(self):
        return utils.get_or_default(self.ctx, cmp_params.OmitResponsesCheck)

    def on_execute(self):
        make_folder(self._get_diff_path())

        self.add_bugbanner(bugbanner.Banners.SearchTests, component="response_diff")

        responses1_resource_id = self.ctx[BasesearchResponses1Parameter.name]
        responses2_resource_id = self.ctx[BasesearchResponses2Parameter.name]

        resp1 = channel.sandbox.get_resource(responses1_resource_id)
        resp2 = channel.sandbox.get_resource(responses2_resource_id)

        if utils.check_resources_by_md5(resp1, resp2, self._get_diff_path()):
            self.ctx['compare_result'] = True
            return

        node_types.GLOBAL_FACTOR_NAMES = get_and_check_factor_names(responses1_resource_id, responses2_resource_id)

        responses1, responses1_resource = self._load_responses(responses1_resource_id)
        responses2, responses2_resource = self._load_responses(responses2_resource_id)

        queries_resource_id = self.ctx.get(QueriesResourceParameter.name)

        if not queries_resource_id:
            logging.info("Using queries from responses1 resources")
            queries1_resource_id = self._find_queries(responses1_resource)
            queries2_resource_id = self._find_queries(responses2_resource)
            if not self._omit_responses_check and queries1_resource_id != queries2_resource_id:
                self._data_diff_found("Queries for responses1 and responses2 are different")
                return
            queries_resource_id = queries1_resource_id

        responses_len = len(responses1)
        queries = self._load_queries(queries_resource_id, responses_len)

        if not self._omit_responses_check and len(responses1) != len(responses2):
            self._data_diff_found("len(responses1) != len(responses2)")
        elif not self._omit_responses_check and responses_len != len(queries):
            self._data_diff_found("len(responses) != len(queries)")
        else:
            self.do_compare(queries, responses1, responses1_resource, responses2, responses2_resource, responses_len)

    def do_compare(self, queries, responses1, responses1_resource, responses2, responses2_resource, responses_len):
        self.ctx['num_of_queries'] = responses_len
        uncompared_responses_count = {
            'total': 0,
            'snippets': 0,
            'snippets1': 0,
            'snippets2': 0,
            'bs_incomplete': 0,
            'incomplete': 0,
            'incomplete1': 0,
            'incomplete2': 0,
        }
        diff_indexes = []

        with utils.TimeCounter("getting compare_result"):
            # True - no diff
            self.ctx['compare_result'] = brp.compare_responses(
                responses1, responses2,
                diff_indexes=diff_indexes,
                custom_comparer_with_info={
                    "comparer": custom_comparer_with_info,
                    "info": uncompared_responses_count,
                },
            )

        self.ctx['num_of_diffs'] = len(diff_indexes)
        logging.info("Uncompared stats: %s", uncompared_responses_count)
        self._save_simple_diff(responses1_resource.path, responses2_resource.path)
        self._save_side_by_side_diff(queries, responses1, responses2)
        self._save_full_diff(queries, responses1, responses2, diff_indexes)
        self.mark_resource_ready(self.ctx['out_resource_id'])

        for key in uncompared_responses_count:
            self.ctx['uncompared_' + key] = uncompared_responses_count[key]

        self.ctx['compared_responses_count'] = responses_len - uncompared_responses_count['total']
        uncompared_percent = (100.0 * uncompared_responses_count['total']) / responses_len

        if utils.get_or_default(self.ctx, cmp_params.LimitUncomparedQueries) < uncompared_percent:
            if utils.get_or_default(self.ctx, cmp_params.FailOnExceedLimitUncomparedQueries):
                eh.check_failed("Too many unanswered queries ({} per cent)".format(uncompared_percent))
            else:
                self.ctx['compare_result'] = False
                self.ctx['exceed_limit_uncompared_queries'] = True
                self._save_comparing_failed(
                    "Exceed limit uncompared queries: {}% > {}%<br>"
                    "You need to use another way of detecting diff (less synthetic - betas, etc...)".format(
                        uncompared_percent,
                        utils.get_or_default(self.ctx, cmp_params.LimitUncomparedQueries)
                    )
                )
        if (not self.ctx['compare_result']) and self.ctx.get(FailOnDiffParameter.name):
            eh.check_failed("diff found")

    def _get_diff_path(self):
        return channel.sandbox.get_resource(self.ctx['out_resource_id']).path

    def _get_response_patchers(self):
        patchers = [node_patchers.compress_formula_and_factors]
        if self.ctx.get(cmp_params.RemoveIndexGen.name):
            patchers.append(realsearch.RemoveIndexGeneration)
        return patchers

    def _get_custom_node_types(self):
        return {}

    def _find_queries(self, responses_resource):
        return None

    def _load_responses(self, responses_resource_id):
        self.sync_resource(responses_resource_id)
        responses_resource = channel.sandbox.get_resource(responses_resource_id)

        with utils.TimeCounter("Parsing responses"):
            responses = brp.parse_responses(
                responses_resource.path,
                remove_unstable_props=True,
                response_patchers=self._get_response_patchers(),
                min_response_size=utils.get_or_default(self.ctx, MinResponseSizeParameter),
                search_hr_responses=(responses_resource.type == resource_types.SEARCH_HR_RESPONSES),
                ctx=self.ctx,
            )

        return responses, responses_resource

    def _load_queries(self, queries_resource_id, responses_len):
        if queries_resource_id is not None:
            try:
                queries_resource_path = self.sync_resource(queries_resource_id)
                with open(queries_resource_path) as queries_file:
                    return [line.strip() for line in queries_file][:responses_len]
            except errors.TaskError as e:
                logging.info('Cannot get queries from resource: %s', e)
        return ["query {}".format(n) for n in six.moves.xrange(responses_len)]

    def _save_simple_diff(self, responses1_file, responses2_file):
        if self.ctx[SaveSimpleDiffParameter.name]:
            cmd = ["diff", responses1_file, responses2_file]
            simple_diff_path = os.path.join(self._get_diff_path(), "diff.txt")
            with open(simple_diff_path, 'wb') as simple_diff_file:
                try:
                    process.run_process(cmd, stdout=simple_diff_file, wait=True, check=False, timeout=600)
                except se.SandboxSubprocessTimeoutError:
                    logging.warning("Simple diffing failed by timeout")
                    remove_path(simple_diff_path)

    def _save_side_by_side_diff(self, queries, responses1, responses2):
        if not utils.get_or_default(self.ctx, SaveSideBySideDiffParameter):
            return
        with utils.TimeCounter("Generating side-by-side html diff"):
            if sbs.diff(queries, responses1, responses2, self._get_diff_path(), short_report=True):
                self.set_info(
                    utils2.resource_redirect_link(
                        self.ctx['out_resource_id'], "Side-by-side diff", path=sbs.OUTPUT_NAME_SHORT,
                    ),
                    do_escape=False,
                )

    def _save_full_diff(self, queries, responses1, responses2, diff_indexes):
        changed_props = ChangedProps()
        diff_path = self._get_diff_path()

        with utils.TimeCounter("generating full html diff"):
            response_diff.write_html_diff(
                queries,
                responses1,
                responses2,
                changed_props,
                diff_indexes,
                diff_path=diff_path,
                custom_node_types_dict=self._get_custom_node_types(),
                queries_per_file=self.ctx[QueriesPerFileParameter.name] or QueriesResourceParameter.default_value,
                max_diff_size=self.ctx[MaxDiffSizeParameter.name],
                max_diff_files=self.ctx[MaxDiffFilesParameter.name],
                ctx=self.ctx,
            )

        with utils.TimeCounter("generating changed props"):
            with open(os.path.join(diff_path, "changed_props.html"), "w") as out_file:
                changed_props.write(out_file, diff_path)

    def _save_comparing_failed(self, warning_text):
        diff_path = self._get_diff_path()
        with open(os.path.join(diff_path, "comparing_failed.html"), "w") as out_file:
            out_file.write("<html><h1 style='color:red'>{}</h1></html>".format(warning_text))

    def get_short_task_result(self):
        if self.is_completed():
            if self.ctx['compare_result']:
                return "no diff"
            else:
                if self.ctx.get('exceed_limit_uncompared_queries', False):
                    return 'exceed limit uncompared responses'
                else:
                    return "{0} diff".format(str(self.ctx.get('num_of_diffs', '')))
        return None


def _filter_props(path):
    undesirable_params = ["/Ig", "/FailedPrimusList"]
    return any([i in path for i in undesirable_params])


def custom_comparer_with_info(r1, r2, uncompared_responses_count=None):
    """
        Returns True when responses cannot be compared (incomplete, etc)
    """
    cannot_compare = False
    context = channel.task.ctx
    if utils.get_or_default(context, cmp_params.CompareCompleteResponsesOnly):
        bs_incomplete = response_state.has_incomplete_responses(r1, r2)
        uncompared_responses_count['bs_incomplete'] += (1 if bs_incomplete else 0)
        cannot_compare = cannot_compare or bs_incomplete
    if not cannot_compare and utils.get_or_default(context, cmp_params.IgnoreUnanswered):
        is_ans1 = response_state.is_answer_full(r1)
        is_ans2 = response_state.is_answer_full(r2)
        uncompared_responses_count['incomplete1'] += (1 if not is_ans1 else 0)
        uncompared_responses_count['incomplete2'] += (1 if not is_ans2 else 0)
        uncompared_responses_count['incomplete'] += (1 if not (is_ans1 and is_ans2) else 0)
        cannot_compare = cannot_compare or not (is_ans1 and is_ans2)
        if not (is_ans1 or is_ans2):
            if utils.get_or_default(context, cmp_params.CompareSameUnanswered):
                if response_state.get_bad_search_source(r1) == response_state.get_bad_search_source(r2):
                    cannot_compare = False
            if utils.get_or_default(context, cmp_params.CompareSameUpperUnanswered):
                if response_state.not_respond_state(r1) == response_state.not_respond_state(r2):
                    cannot_compare = False

    if not cannot_compare and utils.get_or_default(context, cmp_params.IgnoreUnansweredSnippets):
        snip1 = response_state.is_answer_not_fetched(r1)
        uncompared_responses_count['snippets1'] += (1 if snip1 else 0)
        snip2 = response_state.is_answer_not_fetched(r2)
        uncompared_responses_count['snippets2'] += (1 if snip2 else 0)
        uncompared_responses_count['snippets'] += (1 if (snip1 or snip2) else 0)
        cannot_compare = cannot_compare or snip1 or snip2

    if cannot_compare:
        uncompared_responses_count['total'] += 1
    return cannot_compare


def get_and_check_factor_names(resp_id1, resp_id2):
    logging.info("getting factor names for responses: {} and {}".format(resp_id1, resp_id2))
    factors_res_1 = _get_factor_resource(resp_id1)
    factors_res_2 = _get_factor_resource(resp_id2)

    factor_names_1 = get_factor_names_for_one_response(factors_res_1)
    factor_names_2 = get_factor_names_for_one_response(factors_res_2)
    if len(factor_names_1) * len(factor_names_2) == 0:
        logging.info("WARNING! Empty factor names!")

    difference = (set(factor_names_1) | set(factor_names_2)) - (set(factor_names_1) & set(factor_names_2))
    if difference:
        logging.info("factors for responses have difference: %s", difference)
    else:
        logging.info("factors for responses are equal")

    return factor_names_1, factor_names_2


def _get_factor_resource(resp_id):
    # SEARCH-1332
    return apihelpers.get_last_resource_with_attribute(
        resource_types.OTHER_RESOURCE,
        attribute_name="responses_id",
        attribute_value="resource:{}".format(resp_id)
    )


def get_factor_names_for_one_response(response_resource):
    if response_resource:
        return [line.split()[-1] for line in open(channel.task.sync_resource(response_resource)) if line]
    else:
        logging.info("WARNING! Cannot find factor_names resource!")
        return []
