# -*- coding: utf-8 -*-
import os
import imp
import sys
import shutil
import re
from sandbox.projects import resource_types
from sandbox.sandboxsdk import parameters as sp
from sandbox.sandboxsdk.paths import make_folder
from sandbox.sandboxsdk.task import SandboxTask
from sandbox.sandboxsdk import process
from sandbox.sandboxsdk.svn import Arcadia
from sandbox.projects.common.base_search_quality.tree import htmldiff
from sandbox.projects.common import file_utils as fu
from sandbox.projects.common import utils

input_group = "input params"


class Cluster(sp.SandboxStringParameter):
    """
        yt cluster
    """
    name = 'cluster'
    description = "used yt cluster"
    required = True
    group = input_group
    default_value = "hahn"


class InputPool1(sp.SandboxStringParameter):
    """
        path to second pool
    """
    name = 'first_pool'
    description = "baseline pool"
    required = True
    group = input_group


class InputPool2(sp.SandboxStringParameter):
    name = 'second_pool'
    required = True
    description = 'tested pool'
    group = input_group


class Pool1NamesUrl(sp.SandboxUrlParameter):
    name = 'names_1_url'
    required = False
    description = 'feature names for pool 1 given as url'
    group = input_group


class Pool2NamesUrl(sp.SandboxUrlParameter):
    name = 'names_2_url'
    required = False
    description = 'feature names for pool 2 by url'
    group = input_group


class ComparerExecutable(sp.LastReleasedResource):
    name = 'comparer_executable'
    required = True
    description = 'Comparer executable'
    resource_type = resource_types.POOL_COMPARER_EXECUTABLE
    group = 'Custom params'


class PackageResource(sp.ResourceSelector):
    name = 'package_resource'
    required = True
    description = 'package with pool_converter, mapreduce_yt and run script'
    resource_type = resource_types.OTHER_RESOURCE
    group = 'Custom params'
    default_value = 238620073


class AddDetailedJsons(sp.SandboxBoolParameter):
    name = 'add_detailed_jsons'
    required = False
    description = 'Save detailed info'
    group = 'Custom params'
    default_value = False


class CompareMrProtoPools(SandboxTask):
    """
        Сравнивает два mr-protobuf пула.
    """
    type = 'COMPARE_MR_PROTO_POOLS'

    input_parameters = (
        Cluster,
        InputPool1,
        InputPool2,
        Pool1NamesUrl,
        Pool2NamesUrl,
        ComparerExecutable,
        PackageResource,
        AddDetailedJsons,
    )
    cores = 1
    execution_space = 3 * 1024  # 3 Gb

    def on_enqueue(self):
        SandboxTask.on_enqueue(self)
        resource = self._create_resource(self.descr, 'compare_result', resource_types.TM_LIMITS_MONITOR_COMPARE_RESULT)
        self.ctx['out_resource_id'] = resource.id

    def load_converter(self, binary):
        Arcadia.export(
            Arcadia.trunk_url('/search/tools/idx_ops/comparer/to_html_converter/pool_comparer_result_converter.py'),
            self.abs_path('pool_comparer_result_converter.py'),
        )

        sys.path.append(self.abs_path("./"))
        fp, pathname, description = imp.find_module("pool_comparer_result_converter")
        res = imp.load_module("pool_comparer_result_converter", fp, pathname, description)
        sys.path.pop()
        return res

    def on_execute(self):
        comparer = self._read_resource(self.ctx[ComparerExecutable.name])
        package = self._read_resource(self.ctx[PackageResource.name])

        mapreduce_bin = os.path.join(package.abs_path(), "mapreduce-yt")
        converter_bin = os.path.join(package.abs_path(), "pool_converter")
        script = os.path.join(package.abs_path(), "run.sh")

        resource = self._read_resource(self.ctx['out_resource_id'], sync=False)
        make_folder(resource.abs_path())

        comparer_results_path = os.path.join(resource.abs_path(), "detailed_jsons")
        detailed_stats_path = os.path.join(resource.abs_path(), "detailed_stats")
        make_folder(comparer_results_path)
        make_folder(detailed_stats_path)

        comparer_results = {
            "bl": os.path.join(comparer_results_path, "bl_stats"),
            "so": os.path.join(comparer_results_path, "so_stats"),
            "agg": os.path.join(comparer_results_path, "agg_diff"),
            "line": os.path.join(comparer_results_path, "line_diff"),
            "ds": os.path.join(comparer_results_path, "diff_stats"),
        }

        comparer_args = "'%s'" % " ".join([
                "--dst-bl", comparer_results["bl"],
                "--dst-so", comparer_results["so"],
                "--dst-diff-agg", comparer_results["agg"],
                "--dst-diff-line", comparer_results["line"],
                "--dst-diff-stats", comparer_results["ds"],
        ])

        process.run_process(
            [
                script,
                mapreduce_bin,
                converter_bin,
                comparer.abs_path(),
                self.ctx[InputPool1.name],
                self.ctx[InputPool2.name],
                self.ctx['out_resource_id'],
                comparer_args,
            ],
            shell=True,
            log_prefix='run_script_for_calc_jsons',
            check=True,
            environment={
                "YT_PROXY": self.ctx[Cluster.name],
                "YT_TOKEN": self.get_vault_data("OTHER", "yt_token_for_robot_search_sandbox"),
            }
        )

        converter_results = {
            "bl": os.path.join(detailed_stats_path, "baseline_stats.html"),
            "so": os.path.join(detailed_stats_path, "test_stats.html"),
            "agg": os.path.join(detailed_stats_path, "aggregations_diff.html"),
            "line": os.path.join(resource.abs_path(), "per_line_diff_features.html"),
            "line_full": os.path.join(resource.abs_path(), "per_line_diff_hits.html"),
            "ds": os.path.join(resource.abs_path(), "diff_stats.html"),
        }

        bl_names = []
        if Pool1NamesUrl.name in self.ctx:
            curl_status = os.system(
                """set -x -e
                    curl '%s' -L | awk -F "\t" '{print $NF}' > n1.txt
                """ % re.sub("([\\\\'])", "\\\\\\1", self.ctx[Pool1NamesUrl.name])
            )
            assert curl_status == 0
            with open("n1.txt") as f:
                bl_names = [x.strip() for x in f.readlines()]

        so_names = []
        if Pool1NamesUrl.name in self.ctx:
            curl_status = os.system(
                """set -x -e
                    curl '%s' -L| awk -F "\t" '{print $NF}' > n2.txt
                """ % re.sub("([\\\\'])", "\\\\\\1", self.ctx[Pool2NamesUrl.name])
            )
            assert curl_status == 0
            with open("n1.txt") as f:
                so_names = [x.strip() for x in f.readlines()]

        pc = self.load_converter(comparer.abs_path())

        ld = pc.TPrintLinedDiff(comparer_results["line"], htmldiff, True, bl_names, so_names)
        ld.print_lined_diff(converter_results["line"])

        ld2 = pc.TPrintLinedDiff(comparer_results["line"], htmldiff, False, bl_names, so_names)
        ld2.print_lined_diff(converter_results["line_full"])

        ad = pc.TPrintAggDiff(comparer_results["agg"], htmldiff, bl_names, so_names)
        ad.print_aggregated_diff(converter_results["agg"])

        differed_features = ld.get_differed_f_keys()
        differed_features.update(ad.get_differed_f_keys())

        changed_features_list = [
            pc.NamesResolve(x, {"info": differed_features[x]}, bl_names, so_names, htmldiff)["full_name"]
            for x in differed_features
        ]

        if ld2.difference_in_hits:
            changed_features_list.append("TMHits")
        else:
            os.remove(converter_results["line_full"])

        if len(differed_features) > 40:
            diff_features_list_resource = self._create_resource(self.descr + ": changed factors list", 'changed_pool_factors_list', resource_types.OTHER_RESOURCE)
            with open(diff_features_list_resource.abs_path(), "w") as f:
                print >> f, changed_features_list
            self.ctx['changed_factors'] = ["number of changed factors in pool is %d, see details in resource %d" % (len(differed_features), diff_features_list_resource.id)]
        else:
            self.ctx['changed_factors'] = changed_features_list

        bl = pc.TPrintStats(comparer_results["bl"], False, htmldiff, bl_names, so_names, differed_features)
        bl.print_stats(converter_results["bl"], "BaseLine Stats")

        sl = pc.TPrintStats(comparer_results["so"], False, htmldiff, bl_names, so_names, differed_features)
        sl.print_stats(converter_results["so"], "Testing one Stats")

        bl = pc.TPrintStats(comparer_results["ds"], True, htmldiff, bl_names, so_names, differed_features)
        bl.print_stats(converter_results["ds"], "Stats of difference")

        if not utils.get_or_default(self.ctx, AddDetailedJsons):
            shutil.rmtree(comparer_results_path)

        if not ad.has_diff and not ld.has_diff:
            fu.write_file(
                os.path.join(resource.abs_path(), 'noDifferences.txt'),
                'There are no differences between pools or '
                'pools are not intersected and all aggregation stats differ by less than 0.5%'
            )

        self.ctx['changed_aggregated_stats'] = ad.has_diff
        self.ctx['per_line_diff'] = ld.has_diff

        resource.mark_ready()

    def get_short_task_result(self):
        if self.is_completed():
            res = ""
            if self.ctx['per_line_diff']:
                res = "diff"

            if self.ctx['changed_aggregated_stats']:
                res = res + ("" if res == "" else " ") + "agg_diff"

            if res == "":
                res = "no diff"
            return res

        return None


__Task__ = CompareMrProtoPools
