# -*- coding: utf-8 -*-
import logging
import json
import os
import errno
import itertools
import traceback
import math
from shutil import copy2
from collections import defaultdict

from sandbox import sdk2
from sandbox.common.types import task as ctt
import sandbox.common.errors
from sandbox.sandboxsdk import parameters as sp
from sandbox.sandboxsdk.channel import channel
from sandbox.sandboxsdk.process import run_process
from sandbox.sandboxsdk.task import SandboxTask

import sandbox.sandboxsdk.environments as env
import sandbox.projects.ParallelDumpEventlog as pde
import sandbox.projects.release_machine.core.task_env as task_env

from sandbox.projects.CompareEventlogStats import CompareParams
from sandbox.projects.CompareEventlogStats import CompareEventlogStats
from sandbox.projects.common.noapacheupper import search_component as nsc
from sandbox.projects.release_machine import input_params as rm_params
from sandbox.projects.release_machine.core import const as rm_const
from sandbox.projects.release_machine.helpers import startrek_helper as st_helper
from sandbox.projects.common.search import compare_middle_utils as cmu
from sandbox.projects.common.search import bugbanner
from sandbox.projects.release_machine.components import all as rmc
from sandbox.projects.common.differ import coloring
from sandbox.projects.common import apihelpers
from sandbox.projects.common import stats as mst
from sandbox.projects.common import error_handlers as eh
from sandbox.projects.common import file_utils as fu
from sandbox.projects.common import link_builder as lb
from sandbox.projects.common import utils
from sandbox.projects.common import utils2
from sandbox.projects.common import kosher_release
from sandbox.projects import resource_types
from sandbox.projects.common.noapacheupper.standalone import GrpcClientExecutable
from sandbox.projects.websearch import CalcEventlogStats
from sandbox.projects.ydo.ydo_parallel_dump_eventlog import create_resource_params as ydo_create_resource_params
from sandbox.projects.websearch.models_proxy import search_component as mpsc

from sandbox.projects.websearch.middlesearch.CompareMiddlesearchBinaries \
    import meta_search_perf_result_renderer as renderer


res_params = [
    cmu.create_resource_params(1, 'Baseline (first) middlesearch', use_int=True),
    cmu.create_resource_params(2, 'Experimental (second) middlesearch', use_int=True)
]
upper_res_params = [
    nsc.create_noapacheupper_params(
        n=1,
        neh_cache_mode='read',
        settings=False,
        add_requests=True,
        with_evlogdump=True,
        config_params=True,
    ),
    nsc.create_noapacheupper_params(
        n=2,
        neh_cache_mode='read',
        settings=False,
        add_requests=True,
        with_evlogdump=True,
        config_params=True,
    ),
]
mp_res_params = [
    mpsc.create_models_proxy_params(n=1),
    mpsc.create_models_proxy_params(n=2),
]
dst_res_params = cmu.create_resource_params()
stat_params = cmu.create_eventlog_stat_params()
ydo_pde_params = ydo_create_resource_params()

KEY_ANALYTICS = 'analytics'
KEY_MAIN_METRICS = 'main_metrics'
KEY_DIFFS = 'diffs'  # legacy for old tasks
KEY_MAJOR_DIFFS = 'major_diffs'
KEY_MINOR_DIFFS = 'minor_diffs'
KEY_WARNINGS = 'warnings'
KEY_AUTO_ACCEPTED = 'auto_accepted'
KEY_SIGNIFICANT_DIFF = 'significant_diff'
KEY_EXPLANATION = 'explanation'
KEY_SUMMARY = 'summary'
MAX_ALLOWED_DELAY = 24000000  # 24 seconds = sum of timeouts for search, factors and fetch
DEFAULT_STATS_DEVIATION_THRESHOLD = 0.05
RELEASE_DUTY_WIKI = 'https://wiki.yandex-team.ru/sergejjkozunov/relizy/'


class CompareBinariesResultHtml(sdk2.Resource):
    """ Html resource with compare MiddleSearch diff data """


class CtxDebugResource(sdk2.Resource):
    """ Resource with ctx dump for debug only """


class TestEnvDiffTask(sp.SandboxBoolParameter):
    name = 'test_env_diff_task'
    description = 'indicates if the task is used in testenv diff mode'
    default_value = False


class LaunchMode(pde.ComponentName):
    name = "launch_mode"
    description = "Launch mode (Middle or Upper)"
    sub_fields = {
        pde.ParallelComponent.MIDDLE: [p.name for p_group in res_params for p in p_group.params],
        pde.ParallelComponent.UPPER: [p.name for p_group in upper_res_params for p in p_group.params],
        pde.ParallelComponent.MODELS_PROXY: [p.name for p_group in mp_res_params for p in p_group.params],
        "YDO" : [p.name for p in ydo_pde_params],
    }


class CopyEventlogsParam(sp.SandboxBoolParameter):
    name = 'copy_eventlogs'
    description = 'Copy eventlogs'
    group = 'Other'
    default_value = False


class AutoModeParam(sp.SandboxRadioParameter):
    name = 'auto_mode'
    description = 'Metasearch comparation run mode'
    group = 'Other'
    choices = [
        ('Nocache (shoot once with nocache=da)', 'nocache'),
        ('Cache (shoot twice: first fill cache, then run again and measure)', 'cache'),
        ('Cachehit (shoot once allowing cache; prod-like)', 'cachehit'),
        (
            'Cache compatibility (shoot first metasearch not measuring; '
            'use its cache for measured shoots of both metasearches)',
            'cache_compatibility',
        ),
    ]
    default_value = 'nocache'


class AutoDiffMode(sp.SandboxRadioParameter):
    """SEARCH-6236"""
    name = 'auto_diff_mode'
    description = 'Defines how to detect diff'
    group = 'Other'
    choices = [
        ('binary', 'binary'),
        ('formulas', 'formulas'),
    ]
    default_value = 'binary'


class AutoDiffParam(sp.SandboxBoolParameter):
    name = 'enable_auto_diffs'
    description = 'Enable auto diff detection (try to check diff significance)'
    group = 'Other'
    default_value = True


class FailOnSlowdownThreshold(sp.SandboxIntegerParameter):
    """
    When significant performance slowdown is detected it is better to fail task
    to be able to run testenv-based bisect tooling.
    """
    name = 'fail_on_slowdown_threshold_q95'
    description = 'Fail task when Q95 slowdown is more than given number of microseconds'
    group = 'Other'
    default_value = -1


class FailOnRearrangeSlowdownThreshold(sp.SandboxIntegerParameter):
    """
    When significant performance slowdown by any of rearrange rules is detected it is better to fail task
    to be able to run testenv-based bisect tooling.
    """
    name = 'fail_on_rearrange_slowdown_threshold_q95'
    description = 'Fail task when Q95 slowdown by any of rearrange rules is more than given number of microseconds'
    group = 'Other'
    default_value = -1


class CheckSubSourceErrorCount(sp.SandboxBoolParameter):
    """MIDDLE-43"""
    name = 'check_sub_source_error_count'
    description = 'Check count of SubSourceError events'
    default_value = False


class UseNewCalcEventlogStats (sp.SandboxBoolParameter):
    name = 'use_new_calc_eventlog_stats'
    description = 'Use new implementation of stat calculator'
    default_value = False


class FailOnStatisticsDeviationThreshold(sp.SandboxFloatParameter):
    """
    When significant statistics deviation is detected it is better to fail task
    to be able to run testenv-based bisect tooling.
    """
    name = 'fail_on_statistics_deviation_threshold'
    description = 'Fail task when statistic deviation is more then [fail_on_statistics_deviation_threshold] * mean'
    group = 'Other'
    default_value = DEFAULT_STATS_DEVIATION_THRESHOLD


class ResourcesTaskMock(sp.TaskSelector):
    name = 'resources_task_mock'
    description = 'All needed resources will be collected from this task instead of scheduling our own tasks'
    task_type = ['COMPARE_MIDDLESEARCH_BINARIES']
    default_value = None
    group = 'Other'


class CompareBinariesResultResourceTTL(sp.SandboxIntegerParameter):
    """
    Set TTL for COMPARE_BINARIES_RESULT_HTML in CompareMiddlesearchBinaries
    """
    name = 'compare_binaries_result_resource_ttl'
    description = 'ttl for COMPARE_BINARIES_RESULT_HTML'
    default_value = 14


class StressLimiterSemaphore(sp.SandboxStringParameter):
    name = 'stress_limiter_semaphore'
    description = 'name of semaphore to limit concurrent running tasks'
    do_not_copy = True


class StressLimiterCapacity(sp.SandboxIntegerParameter):
    name = 'stress_limiter_capacity'
    description = 'maximum number of concurrent running tasks with same stress_limiter_semaphore'
    do_not_copy = True


def hardlink_or_copy(src, dst):
    try:
        os.link(src, dst)
    except OSError as e:
        if e.errno != errno.EXDEV:
            raise
        copy2(src, dst)


def allow_analytics(data, time_stat, all_stat):
    if not data["key"].split(".")[-1] in all_stat:
        return False

    rearr_timing_suffixes = []
    for s in ["-Merge", "-Fetch"]:
        for name in time_stat:
            rearr_timing_suffixes.append("{}.{}".format(s, name))

        if data["key"].endswith(s + ".Count"):
            return False

    # completely skip rearrange timings that diffs less than 0.3ms
    if data.get("abs_diff_avg", 1000) < 300:
        for rearr_stat in rearr_timing_suffixes:
            if data["key"].endswith(rearr_stat):
                return False

    return True


def walsh_avg(v):
    if len(v) == 1:
        return v[0]

    z = []
    for i in range(0, len(v)):
        for j in range(0, i):
            z.append((v[i] + v[j]) / 2.0)

    return sorted(z)[int(0.5 * len(z))]


def calc_std_dev(values):
    sum = 0
    count = 0
    for v in values:
        if v is not None:
            sum += v
            count += 1
    mean = sum / count if count > 0 else 0
    std_dev = 0
    if count > 1:
        sq_diff_sum = 0
        for v in values:
            if v is not None:
                diff = v - mean
                sq_diff_sum += diff * diff
        std_dev = math.sqrt(sq_diff_sum / (count - 1))
    return (mean, std_dev)


def calc_significance(pair_vals):
    # madskills + sign test

    c = 0
    greater = 0
    less = 0

    for v1, v2 in pair_vals:
        if v1 == v2:
            continue
        else:
            c += 1
            if v2 > v1:
                greater += 1
            else:
                less += 1

    w = min(greater, less)

    if w == 0:
        if c > 4:
            return 2
        else:
            return 1

    if 3 * w > c:
        return -1

    if c < 5:
        a = -1
    elif c < 8:
        a = 0
    elif c < 11:
        a = 1
    elif c < 13:
        a = 2
    else:
        a = 3

    if w <= a:
        return 1
    else:
        return 0


def format_float(v):
    return "?" if v is None else "{:.0f}".format(v)


def format_significance(v):
    if v > 1:
        return "high"
    elif v == 1:
        return "low"
    else:
        return "none"


def format_stat_key(data):
    key = data.get("key", "???")
    if "dimension" in data:
        if data.get("stat", "") != "Count":
            dimension = data["dimension"]
        else:
            dimension = "count"

        return '<div title="{}">{}</div>'.format(dimension, key)
    else:
        return key


def make_table(table_data, extended):
    table = {
        "header": [
            {"key": "stat", "title": ""},
            {"key": "diff", "title": "RelDiff, %"},
            {"key": "absdiff", "title": "AbsDiff"},
            {"key": "significance", "title": "Significance"},
            {"key": "allowed", "title": "Allowed"},
        ],
        "body": {
            "stat": [format_stat_key(data) for data in table_data],
            "diff": [coloring.color_diff(data.get("avg", 0)) for data in table_data],
            "absdiff": [format_float(data.get("abs_diff_avg", 0)) for data in table_data],
            "significance": [format_significance(data.get("significance", 0)) for data in table_data],
            "allowed": [str(data.get("allowed")) for data in table_data],
        }
    }

    if extended:
        table["header"].append({"key": "mean_diff", "title": "Mean Diff"})
        table["header"].append({"key": "avg_std_dev", "title": "Avg StdDev"})
        table["header"].append({"key": "adjusted_dev", "title": "Adjusted Dev"})
        table["body"]["mean_diff"] = [format_float(data.get("mean_diff", 0)) for data in table_data]
        table["body"]["avg_std_dev"] = [format_float(data.get("avg_std_dev", 0)) for data in table_data]
        table["body"]["adjusted_dev"] = [format_float(data.get("adjusted_dev", 0)) for data in table_data]

    max_vals = max(len(data.get("values", [])) for data in table_data)
    value_columns = [[] for _ in range(max_vals)]

    for data in table_data:
        vals = data.get("values", [[0, 0]] * max_vals)
        diffs = data.get("diff", [100] * max_vals)

        for i, val in enumerate(vals):
            value_columns[i].append("{}<br>{}<br>{}".format(
                format_float(val[0]),
                format_float(val[1]),
                coloring.color_diff(diffs[i])
            ))

    for i, col in enumerate(value_columns):
        table["header"].append({"key": str(i), "title": str(i)})
        table["body"][str(i)] = col

    if extended:
        means_column = []
        std_dev_column = []
        for data in table_data:
            means = data.get("mean", [0.0, 0.0])
            means_prc_diff = ((means[1] - means[0]) / means[0]) if means[0] > 0.0 else 0.0
            std_devs = data.get("std_dev", [0, 0])
            means_column.append("{}<br>{}<br>{}".format(
                format_float(means[0]),
                format_float(means[1]),
                coloring.color_diff(means_prc_diff * 100)
            ))
            std_dev_column.append("{}<br>{}".format(
                format_float(std_devs[0]),
                format_float(std_devs[1])
            ))
        table["header"].append({"key": "means", "title": "Mean"})
        table["header"].append({"key": "std_devs", "title": "Std Dev"})
        table["body"]["means"] = means_column
        table["body"]["std_devs"] = std_dev_column

    return table


def append_table(footer, table_name, table_data, extended=False):
    if table_data:
        footer.append({
            "content": {
                "<h4>{}</h4>".format(table_name): make_table(table_data, extended),
            },
            "helperName": ""
        })


def sort_rows(rows, comp_name=pde.ParallelComponent.MIDDLE):
    if comp_name == pde.ParallelComponent.UPPER:
        return sorted(rows, key=lambda x: -abs(float(x.get("abs_diff_avg", 0))))
    return sorted(
        rows,
        key=lambda x: (
            -x.get("allowed", 0),
            -x.get("significance", 0),
            -abs(float(x.get("abs_diff_avg", 0)))
        )
    )


def to_string(data):
    return "{}: {}, {}".format(data["key"], coloring.color_diff(data["avg"]), format_float(data["abs_diff_avg"]))


def auto_accept_formulas(all_data):
    """SEARCH-6236"""
    time_stat = {"Avg", "MeanAvg", "HiAvg", "Q95"}
    time_names = {"RequestTime", "MetaSearchTime", "CalcMetaRelevance", "FormulasBundle-RearrangeAfterMetaFeatures-TotalAsyncTime", "FormulasBundle-Fetch-TotalAsyncTime"}

    for key, data in all_data.items():
        if not data.get("allowed"):
            continue

        name, stat = data["key"].split(".")

        # skip all metrics that don't have enough hits to analyze
        count_key = name + ".Count"
        if count_key in all_data and all_data[count_key]["max_value"] < 100:
            continue

        if name in time_names and stat in time_stat:
            if data["avg"] >= 5 and data["abs_diff_avg"] > 100:
                return False, to_string(data)
    return True, ""


def auto_accept_middle(all_data, mode):
    subsource_names = {"SubSourceSearch", "SubSourceSnippets", "SubSourceAllfactors", "SubSourceAux"}

    if mode == "cache_compatibility":
        return auto_accept_middle_cache_compat(all_data.values(), subsource_names)

    time_stat = {"Avg", "MeanAvg", "HiAvg", "Q95"}
    time_names = {"RequestTime", "MetaSearchTime", "SearchTime", "FetchTime", "AllfactorsTime", "OtherTime"}

    for key, data in all_data.items():
        if not data.get("allowed"):
            continue

        name, stat = data["key"].split(".")

        # skip all metrics that don't have enough hits to analyze
        count_key = name + ".Count"
        if count_key in all_data and all_data[count_key]["max_value"] < 100:
            continue

        # skip all metrics, that differs less than 10 units.
        if data['abs_diff_avg'] < 10:
            continue

        if name in time_names and stat in time_stat:
            if (
                (data["abs_diff_avg"] > 3000 and data["significance"] > 1) or
                (data["abs_diff_avg"] > 5000 and data["significance"] > 0)
            ):
                return False, to_string(data)

        if (name in subsource_names or name.startswith("SubSourceAux")) and stat in {"Count"} | time_stat:
            if mode == "cache" and name != "SubSourceSearch" and all_data[count_key]["abs_avg"] < 25.0:
                continue

            strong_threshold = 0.5
            threshold = 2.0

            if "SAAS" in name:
                threshold += 1.0

            if name.startswith("SubSourceAux"):
                strong_threshold *= 2
                threshold *= 2

            if (
                (data["avg"] > strong_threshold and data["significance"] > 1) or
                (data["avg"] > threshold and data["significance"] > 0)
            ):
                return False, to_string(data)

    return True, ""


def auto_accept_middle_cache_compat(all_data, subsource_names):
    def get_threshold(source_type):  # в процентах
        if source_type in {"SubSourceSearch", "SubSourceSnippets", "SubSourceAllfactors"}:
            return 25.0
        elif source_type == 'SubSourceAux':
            return 50.0
        elif source_type == "SubSourceOther":
            return 75.0
        else:
            # actually this line is unreachable
            return 100.0

    for data in all_data:
        if data["significance"] < 2:
            continue

        # https://nda.ya.ru/3VMcx9
        # Нас интересует статистика SubSourceSearch_TYPE_ReqLen.Count
        # где TYPE — например, PLATINUM или QUICK_SAMOHOD_ON_MIDDLE
        name, statistic_type = data["key"].split(".")

        if name.count('_') < 2:
            continue
        name_splitted = name.split('_')
        source_type = name_splitted[0]
        # source_name = '_'.join(name_splitted[1:-1])
        statistic_name = name_splitted[-1]

        if statistic_type == "Count" and statistic_name == "ReqLen" and \
                source_type in {"SubSourceOther"} | subsource_names:
            threshold = get_threshold(source_type)
            if data["abs_avg"] > threshold:
                return False, "cache altered: {} ({})".format(name, coloring.color_diff(data["avg"]))
    return True, ""


class CompareMiddlesearchBinaries(bugbanner.BugBannerTask):
    """
        Runs PARALLEL_DUMP_EVENTLOG, which performs parallel shooting for
        2 upper-, mmeta- or int- searches and gets eventlogs.
        Then runs 2 CALC_EVENTLOG_STATS tasks to count performance stats from eventlogs.
        Then runs COMPARE_EVENTLOG_STATS to compare stats.
    """
    type = 'COMPARE_MIDDLESEARCH_BINARIES'


    input_parameters = (
        pde.shooting_params.params + (LaunchMode, rm_params.ComponentName) +
        res_params[0].params +
        res_params[1].params +
        upper_res_params[0].params +
        upper_res_params[1].params +
        ydo_pde_params.params +
        mp_res_params[0].params +
        mp_res_params[1].params +
        (
            CopyEventlogsParam,
            AutoModeParam,
            AutoDiffParam,
            AutoDiffMode,
            FailOnSlowdownThreshold,
            FailOnRearrangeSlowdownThreshold,
            FailOnStatisticsDeviationThreshold,
            pde.RequestsTimeoutMilliseconds,
            CheckSubSourceErrorCount,
        ) +
        stat_params.params +
        (
            TestEnvDiffTask,
            CompareParams.CompareEventlogStatsStoreMode,
            CalcEventlogStats.CompareSubSourceRequests,
            UseNewCalcEventlogStats,
            ResourcesTaskMock,
            StressLimiterSemaphore,
            StressLimiterCapacity,
        )
    )
    execution_space = 3000  # 3 Gb, 2G detected
    default_arch = 'linux'
    cores = 1
    environment = [task_env.TaskRequirements.startrek_client]

    _all_stats = None

    def _prepare(self):
        self._all_stats = {}
        self.ctx[self._KEY_REPORT_URLS] = defaultdict(list)

    @property
    def footer(self):
        footer_content = self.ctx.get('FOOTER_CONTENT', [])

        if footer_content:
            return footer_content

        if self.ctx.get(KEY_MAIN_METRICS):
            footer_content += [
                {
                    "content": "<h1>Release duty <a href=\"{}\">wiki</a></h1>".format(RELEASE_DUTY_WIKI)
                },
                {
                    "content": "<h3>Auto accepted: {}; {}</h3>".format(
                        self.ctx.get(KEY_AUTO_ACCEPTED), self.ctx.get(KEY_EXPLANATION)
                    )
                },
                {
                    "content": "<h4>Summary: {}</h4>".format(self.ctx.get(KEY_SUMMARY))
                },
            ]
            extended_stats = self.ctx[UseNewCalcEventlogStats.name]
            append_table(footer_content, "Main metrics", self.ctx.get(KEY_MAIN_METRICS), extended_stats)

        html_diffs_footer = self._get_html_diffs_footer()
        if html_diffs_footer:
            footer_content.append(html_diffs_footer)

        report_urls = self.ctx.get(self._KEY_REPORT_URLS)
        if report_urls:
            full_stat_urls = "<h4>Full stat urls</h4>"
            for run_rep in sorted(report_urls.keys()):
                urls = report_urls.get(run_rep, [""] * 2)
                full_stat_urls += (
                    "<em>run,repeat = {}</em>, "
                    "<a href=\"{}\">1st stat</a>, "
                    "<a href=\"{}\">2nd stat</a> <br>"
                ).format(run_rep, *urls)

            footer_content.append({'helperName': '', 'content': full_stat_urls})

        analytics = self.ctx.get(KEY_ANALYTICS)
        comp_name = utils.get_or_default(self.ctx, LaunchMode)
        if not analytics:
            return footer_content
        elif isinstance(analytics, dict):
            analytics = sort_rows(analytics.itervalues(), comp_name)
        else:
            analytics = sort_rows(analytics, comp_name)

        append_table(footer_content, "Diffs", self.ctx.get(KEY_DIFFS), extended_stats)
        append_table(footer_content, "Major diffs", self.ctx.get(KEY_MAJOR_DIFFS), extended_stats)
        append_table(footer_content, "Minor diffs", self.ctx.get(KEY_MINOR_DIFFS), extended_stats)

        append_table(footer_content, "Warnings", self.ctx.get(KEY_WARNINGS), extended_stats)
        append_table(footer_content, "All", analytics, extended_stats)

        return footer_content

    @staticmethod
    def _format_diff_footer(data_list):
        try:
            data_list[2] = coloring.color_diff(data_list[2], max_diff=2)
        except Exception:
            data_list[2] = "<em style='color:Cyan'>no data</em>"
        return data_list

    def on_enqueue(self):
        SandboxTask.on_enqueue(self)
        self.ctx[self._KEY_RESTARTED_TASKS] = []
        if not self.ctx.get(self._KEY_CALC_STATS_TASKS):
            self.ctx[self._KEY_CALC_STATS_TASKS] = {}
        if not self.ctx.get(self._KEY_COMPARE_TASKS):
            self.ctx[self._KEY_COMPARE_TASKS] = {}
        if not self.ctx.get(self._KEY_REPORT_URLS):
            self.ctx[self._KEY_REPORT_URLS] = defaultdict(list)

        if self.ctx[CopyEventlogsParam.name]:
            res_descr = '{0}, eventlog # {1}'
            res_file = 'eventlog.{0}'
            key = 'eventlog_output_resource{0}'
            for i in [1, 2]:
                res = self._create_resource(
                    res_descr.format(self.descr, i),
                    res_file.format(i),
                    resource_types.EVENTLOG_DUMP
                )
                self.ctx[key.format(i)] = res.id

    @property
    def _auto_diff(self):
        return self.ctx.get(AutoDiffParam.name)

    @property
    def _run_mode(self):
        return self.ctx.get(AutoModeParam.name)

    @property
    def _store_mode(self):
        return utils.get_or_default(self.ctx, CompareParams.CompareEventlogStatsStoreMode)

    def do_execute(self):
        self._prepare()
        self.add_bugbanner(bugbanner.Banners.SearchTests)
        self.add_bugbanner(bugbanner.Banners.WebMiddleSearch)
        self._check_test_parameters()
        comp_name = utils.get_or_default(self.ctx, LaunchMode)
        release_num = utils.get_or_default(self.ctx, rm_params.ReleaseNum)

        mock_task_id = utils.get_or_default(self.ctx, ResourcesTaskMock)
        if mock_task_id:
            mock_task_ctx = channel.sandbox.get_task(mock_task_id).ctx
            self.ctx[self._KEY_COMPARE_TASKS] = mock_task_ctx.get(self._KEY_COMPARE_TASKS)
            self.ctx[self._KEY_CALC_STATS_TASKS] = mock_task_ctx.get(self._KEY_CALC_STATS_TASKS)
            self.ctx[self._KEY_COMPARE_TASKS] = mock_task_ctx.get(self._KEY_COMPARE_TASKS)
            self.ctx[self._KEY_DUMP_EVENTLOG_TASK] = mock_task_ctx.get(self._KEY_DUMP_EVENTLOG_TASK)

        self.extended_stats = self.ctx[UseNewCalcEventlogStats.name]
        dump_eventlog_task_id = self.ctx.get(self._KEY_DUMP_EVENTLOG_TASK)
        component_name = utils.get_or_default(self.ctx, rm_params.ComponentName)
        if not dump_eventlog_task_id:
            # SEARCH-5281
            if release_num and component_name:
                st = st_helper.STHelper(self.get_vault_data(rm_const.COMMON_TOKEN_OWNER, rm_const.COMMON_TOKEN_NAME))
                c_info = rmc.COMPONENTS[component_name]()
                st.write_grouped_comment(
                    rm_const.TicketGroups.PerfTest,
                    "",
                    "Blender Performance test vs Prod task {}: started".format(lb.task_wiki_link(self.id)),
                    release_num,
                    c_info,
                )
            dump_eventlog_task_id = self._schedule_dump_eventlog_task(comp_name)

        evlogs =[]
        if comp_name == "YDO":
            for i in xrange(2):
                bin_evlogs = {}
                for evlog in sdk2.Resource.find(type="EVENTLOG_DUMP", task_id=dump_eventlog_task_id, attrs={"binary_number" : i}).limit(20):
                    bin_evlogs[evlog.key] = evlog.id
                evlogs.append(bin_evlogs)
        else:
            logging.info("extraction evlogs")
            evlogs = channel.sandbox.get_task(dump_eventlog_task_id).ctx[cmu.KEY_OUT_EVENTLOGS]
        report_data = {}
        keys_to_compare_task_ids = {}
        for key, res_id1 in evlogs[0].items():
            run, repeat = [int(x) for x in key.split(',')]
            res_id2 = evlogs[1][key]

            calc_stats_task_id1 = self._make_calc_task(0, run, repeat, res_id1, comp_name)
            calc_stats_task_id2 = self._make_calc_task(1, run, repeat, res_id2, comp_name)

            compare_task_id = self.ctx[self._KEY_COMPARE_TASKS].get("{},{}".format(run, repeat))
            report_data[key] = {}
            if not compare_task_id:
                self._schedule_compare_task(key, calc_stats_task_id1, calc_stats_task_id2)
            else:
                keys_to_compare_task_ids[key] = compare_task_id

        if self.ctx[CopyEventlogsParam.name]:
            self._copy_resources_if_necessary()

        self.restart_broken()
        utils.check_subtasks_fails(fail_on_first_failure=True, fail_on_failed_children=not self.ctx.get("do_not_fail"))

        for key, compare_task_id in keys_to_compare_task_ids.items():
            self._get_report(key, compare_task_id, report_data)

        self._get_stats_url()
        self._forward_subsource_requests_diff_result()

        report_result = self._extract_results_from_report(report_data, comp_name)
        auto_stat_ctx, auto_stat_big = self.calc_auto_stat(report_result, self._run_mode, self._auto_diff, comp_name)
        self.ctx.update(auto_stat_ctx)
        self._all_stats.update(auto_stat_big)
        self._render_all_diffs(auto_stat_big=auto_stat_big)
        self.ctx['FOOTER_CONTENT'] = self.footer

        if release_num and component_name:
            st = st_helper.STHelper(self.get_vault_data(rm_const.COMMON_TOKEN_OWNER, rm_const.COMMON_TOKEN_NAME))
            c_info = rmc.COMPONENTS[component_name]()
            if comp_name == pde.ParallelComponent.UPPER:
                result = not self.ctx.get("blender_diff")
            else:
                result = self.ctx.get(KEY_AUTO_ACCEPTED)
            st.write_grouped_comment(
                rm_const.TicketGroups.PerfTest, "",
                "Blender Performance test vs Prod task {} accepted result: {}".format(
                    lb.task_wiki_link(self.id), result
                ),
                release_num, c_info
            )

        q95_threshold = int(utils.get_or_default(self.ctx, FailOnSlowdownThreshold))
        if q95_threshold > 0:
            self._check_failure_threshold(q95_threshold)
            self._check_rearrange_rule_failure_threshold(q95_threshold)
            if self.extended_stats:
                self._check_grouping_rearrange_failure_threshold(q95_threshold)
                self._check_stats_deviation_failure(q95_threshold)
        self._check_response_time(comp_name)
        if self.ctx.get(CheckSubSourceErrorCount.name, False):
            self._check_subsource_errors(report_result)

    def on_execute(self):
        if not self.ctx.get(TestEnvDiffTask.name):
            return self.do_execute()
        try:
            return self.do_execute()
        except (sandbox.common.errors.Wait, sandbox.common.errors.NothingToWait):
            raise
        except sandbox.common.errors.TaskFailure:
            logging.exception('Got TaskFailure in diff task')
            self.set_info('Got TaskFailure in diff task: {}'.format(traceback.format_exc()))
        except Exception:
            logging.exception('Got unknown Exception in diff task')
            self.set_info('Got unknown Exception in diff task: {}'.format(traceback.format_exc()))
        self.ctx[KEY_SIGNIFICANT_DIFF] = True

    def restart_broken(self):
        check_failed = False
        try:
            broken_tasks = channel.sandbox.list_tasks(parent_id=self.id, status="UNKNOWN")  # old status here
            logging.debug("Already restarted tasks %s", self.ctx[self._KEY_RESTARTED_TASKS])
            for t in broken_tasks:
                if t.id not in self.ctx[self._KEY_RESTARTED_TASKS]:
                    logging.debug("Restarting %s", t.id)
                    self.ctx[self._KEY_RESTARTED_TASKS].append(t.id)
                    channel.sandbox.server.restart_task(t.id)
                else:
                    check_failed = True
                    break
        except Exception:
            logging.info("Unable to restart tasks:\n%s", eh.shifted_traceback())

        if check_failed:
            eh.check_failed("Child task's {} status is 'UNKNOWN' and it was already restarted.".format(t.id))

    def _get_html_diffs_footer(self):
        header = [
            {
                "key": "metric",
                "title": "metric",
            },
            {
                "key": "resource_link",
                "title": "Resource view link",
            },
            {
                "key": "resource",
                "title": "resource link",
            },
        ]
        body = {i["key"]: [] for i in header}

        num_records = 0
        for key in self._DIFF_STAT_RESOURCES:
            diff_stat = self._DIFF_STAT_RESOURCES[key]
            body["metric"].append(diff_stat.resource_description)
            if self.ctx.get(diff_stat.resource_key):
                resource = channel.sandbox.get_resource(self.ctx[diff_stat.resource_key])
                body["resource"].append(lb.resource_link(resource_id=resource.id, link_name=str(resource.id)))
                body["resource_link"].append(utils2.resource_redirect_link(resource_id=resource.id, title=str(resource.id)))
                num_records += 1
        if num_records == 0:
            return None
        return {
            "helperName": "",
            "content": {"<h3>Diff links:</h3>": {
                "header": header,
                "body": body,
            }}
        }

    def _get_or_create_resource(self, resource_key, resource_class, description, path, ttl=None):
        if not self.ctx.get(resource_key):
            attributes = {}
            if ttl:
                attributes["ttl"] = ttl
            analytics_resource = self.create_resource(
                description=description,
                resource_path=path,
                resource_type=resource_class,
                attributes=attributes,
            )
            self.ctx[resource_key] = analytics_resource.id
            return analytics_resource
        return channel.sandbox.get_resource(self.ctx[resource_key])

    def _get_or_create_compare_result_html(self, stat_key):
        dir_name = self._HTML_RESOURCES_DIR
        diff_stat = self._DIFF_STAT_RESOURCES[stat_key]
        return self._get_or_create_resource(
            resource_key=diff_stat.resource_key,
            resource_class=CompareBinariesResultHtml,
            description=diff_stat.resource_description,
            path=os.path.join(dir_name, diff_stat.resource_key + ".html"),
            ttl=utils.get_or_default(self.ctx, CompareBinariesResultResourceTTL),
        )

    @staticmethod
    def _get_template_key(resource_file_name):
        return os.path.join(os.path.dirname(__file__), "templates", resource_file_name)

    def _render_perf_result_template(self, json_table_stat, result_path):
        template = fu.read_file(self._get_template_key("diff_table.jinja2"))
        diff_html = renderer.meta_search_perf_result_renderer(
            template_content=template,
            json_table_stat=json_table_stat,
            out_file_path=result_path,
        )
        return diff_html

    def _make_calc_task(self, dump_1st_or_2nd, run, repeat, res_id, comp_name):
        calc_stats_task_id = self.ctx[self._KEY_CALC_STATS_TASKS].get(
            "{},{},{}".format(dump_1st_or_2nd, run, repeat)
        )
        if not calc_stats_task_id:
            calc_stats_task_id = self._schedule_calc_stats_task(
                dump_1st_or_2nd,
                run, repeat,
                res_id,
                comp_name
            )
        return calc_stats_task_id

    def _check_test_parameters(self):
        pass

    def _copy_resources_if_necessary(self):
        eventlog_ctx_key = cmu.create_eventlog_resource_params().Eventlog.name
        for (calc_task_key, key_out_eventlog) in [('0,0,0', '1'), ('1,0,0', '2')]:
            dst_eventlog_res = channel.sandbox.get_resource(
                self.ctx['eventlog_output_resource' + key_out_eventlog])
            if dst_eventlog_res.is_ready():
                continue

            calc_task_id = self.ctx[self._KEY_CALC_STATS_TASKS][calc_task_key]
            src_eventlog_res_id = channel.sandbox.get_task(calc_task_id).ctx[eventlog_ctx_key]
            if channel.sandbox.get_resource(src_eventlog_res_id).is_ready():
                src_path = self.sync_resource(src_eventlog_res_id)
                hardlink_or_copy(src_path, dst_eventlog_res.path)
                self.mark_resource_ready(dst_eventlog_res)

    @staticmethod
    def _check_dict_keys_equal(dict1, dict2):
        return not (set(dict1.keys()) ^ set(dict2.keys()))

    def _render_all_diffs(self, auto_stat_big):
        eh.ensure(
            self._check_dict_keys_equal(auto_stat_big, self._DIFF_STAT_RESOURCES),
            'auto_stat_big and _DIFF_STAT_RESOURCES keys are different'
        )
        extended_stats = self.ctx[UseNewCalcEventlogStats.name]
        if not os.path.exists(self._HTML_RESOURCES_DIR):
            os.makedirs(self._HTML_RESOURCES_DIR)
        for key in auto_stat_big:
            if not auto_stat_big[key]:
                continue
            logging.debug("_render_all_diffs(): rendering %s stat", key)
            resource = self._get_or_create_compare_result_html(stat_key=key)
            json_table_stat = make_table(table_data=auto_stat_big[key], extended=extended_stats)
            try:
                self._render_perf_result_template(json_table_stat=json_table_stat, result_path=resource.path)
                self.mark_resource_ready(resource.id)
            except Exception:
                logging.exception("_render_perf_fresult_template failed")

    def _find_noapacheupper_queries_resource(self, stage):
        resource_type = 'BLENDER_GRPC_CLIENT_PLAN'
        resource_id = kosher_release.find_release(
            resource_type=resource_type,
            stage=stage,
        )
        if not resource_id:
            raise Exception(
                "Cannot find any resource of type {} released to {}".format(
                    resource_type,
                    stage,
                ),
            )
        logging.info("%s resource found: %s", resource_type, resource_id)
        resource_info = sdk2.Resource[resource_id]
        self.set_info('{res_type} resource found: <a href="{res_link}">{res_descr}</a>'.format(
            res_type=resource_type,
            res_link=resource_info.url,
            res_descr=resource_info.description,
        ), do_escape=False)
        return resource_id

    def _schedule_dump_eventlog_task(self, comp_name):
        logging.info('scheduling PARALLEL_DUMP_EVENTLOG task')
        dst_ctx = {
            'notify_via': '',
            rm_params.ComponentName.name: comp_name,
            pde.RequestsTimeoutMilliseconds.name: self.ctx.get(pde.RequestsTimeoutMilliseconds.name),
        }

        for p in pde.ParallelComponent.params[comp_name]:
            utils.copy_param_values(self.ctx, p, dst_ctx, p)
        if comp_name == "YDO":
            utils.copy_param_values(self.ctx, ydo_pde_params, dst_ctx, ydo_pde_params)
            task = sdk2.Task["YDO_PARALLEL_DUMP_EVENTLOG"]
            subtask = task(task.current, **dst_ctx)
            subtask.enqueue()
            logging.info(subtask.id)
            self.ctx[self._KEY_DUMP_EVENTLOG_TASK] = subtask.id
            return subtask.id
        utils.copy_param_values(self.ctx, pde.shooting_params, dst_ctx, pde.shooting_params)

        dst_ctx[pde.shooting_params.Nruns.name] = max(
            dst_ctx[pde.shooting_params.Nruns.name], 2
        )
        dst_ctx[pde.shooting_params.Nrepeats.name] = 1
        dst_ctx[pde.shooting_params.ShuffleShootingOrder.name] = True
        dst_ctx[pde.shooting_params.NoCacheMode.name] = self._run_mode == 'nocache'
        dst_ctx[pde.shooting_params.CacheMode.name] = self._run_mode == 'cache'
        dst_ctx[pde.shooting_params.CacheCompatibilityMode.name] = self._run_mode == 'cache_compatibility'

        if self.ctx.get(StressLimiterSemaphore.name):
            dst_ctx[pde.StressLimiterSemaphore.name] = self.ctx[StressLimiterSemaphore.name]
        if self.ctx.get(StressLimiterCapacity.name):
            dst_ctx[pde.StressLimiterCapacity.name] = self.ctx[StressLimiterCapacity.name]

        if comp_name == pde.ParallelComponent.UPPER:
            for index in xrange(2):
                requests_param_name = pde.ParallelComponent.params[comp_name][index].Requests.name
                if not dst_ctx.get(requests_param_name):
                    dst_ctx[requests_param_name] = self._find_noapacheupper_queries_resource('stable')
            dst_ctx[GrpcClientExecutable.name] = 1832049857
            """
            TODO(dima-zakharov): uncomment
            dst_ctx[GrpcClientExecutable.name] = utils.get_and_check_last_released_resource_id(
                GrpcClientExecutable.resource_type
            )
            """
        task = self.create_subtask(
            task_type='PARALLEL_DUMP_EVENTLOG',
            description=u'{0}, dump eventlogs for middlesearch'.format(self.descr),
            input_parameters=dst_ctx,
            host=self.ctx.get('dolbilka_target_host'),
            arch=self.default_arch
        )
        logging.info('task scheduled, id = %s, ctx has keys: %s', task.id, ', '.join(task.ctx.keys()))
        self.ctx[self._KEY_DUMP_EVENTLOG_TASK] = task.id
        return task.id

    def _schedule_calc_stats_task(self, dump_1st_or_2nd, run, repeat, eventlog_res_id, comp_name):
        logging.info(
            'scheduling CALC_EVENTLOG_STATS task for %s set, run # %s, repeat # %s',
            ['first', 'second'][dump_1st_or_2nd], run, repeat
        )
        dst_ctx = {
            'notify_via': '',
            CalcEventlogStats.CompareSubSourceRequests.name: self.ctx.get(CalcEventlogStats.CompareSubSourceRequests.name, False),
        }
        if comp_name == pde.ParallelComponent.UPPER:
            dst_ctx['calc_mode'] = 'blender'
        elif comp_name == "YDO":
            dst_ctx['calc_mode'] = 'ydo'

        eventlog_ctx_key = cmu.create_eventlog_resource_params().Eventlog.name
        dst_ctx[eventlog_ctx_key] = eventlog_res_id

        if comp_name == "YDO":
            utils.copy_param_values(self.ctx, ydo_pde_params, dst_ctx, ydo_pde_params)
        else:
            dst_ctx[dst_res_params.Evlogdump.name] = self.ctx[
                pde.ParallelComponent.params[comp_name][dump_1st_or_2nd].Evlogdump.name
            ]
        utils.copy_param_values(self.ctx, stat_params, dst_ctx, stat_params)

        dst_ctx[stat_params.TableFields.name] = ""
        if comp_name != pde.ParallelComponent.UPPER:
            dst_ctx[stat_params.AdditionalStatCounters.name] = "Avg,Q50,Q95,Q99,HiAvg,MeanAvg"
        else:
            dst_ctx[cmu.create_eventlog_resource_params().OldRearrangeStagesOrder.name] = self.ctx[
                pde.ParallelComponent.params[comp_name][dump_1st_or_2nd].OldRearrangeStagesOrder.name
            ]
            dst_ctx[stat_params.AdditionalStatCounters.name] = "Q50,Q95,Q99"

        desc = '{0}, calc eventlog stats for {1} middlesearch, run#{2}, repeat#{3}'.format(
            self.descr,
            ['1st', '2nd'][dump_1st_or_2nd],
            run, repeat
        )

        calc_stats_task_type = 'CALC_EVENTLOG_STATS'

        if self.ctx['use_new_calc_eventlog_stats']:
            calc_stats_task_type = 'CALC_EVENTLOG_STATS_2'

        task = self.create_subtask(
            task_type=calc_stats_task_type,
            description=desc,
            input_parameters=dst_ctx,
            arch=self.default_arch
        )
        logging.info(
            'task scheduled, id = %s, ctx has keys: %s',
            task.id, ', '.join(task.ctx.keys())
        )
        self.ctx[self._KEY_CALC_STATS_TASKS]["{},{},{}".format(dump_1st_or_2nd, run, repeat)] = task.id
        return task.id

    def _schedule_compare_task(self, key, task_id1, task_id2):
        logging.info('scheduling COMPARE_EVENTLOG_STATS task')

        calc_task1 = channel.sandbox.get_task(task_id1)
        calc_task2 = channel.sandbox.get_task(task_id2)
        input_res = {
            'notify_via': '',
            CompareParams.EventlogStats1.name: calc_task1.ctx['json_stats'],
            CompareParams.EventlogStats2.name: calc_task2.ctx['json_stats'],
            CompareParams.CompareEventlogStatsStoreMode.name: self._store_mode,
        }

        if self.ctx.get(CalcEventlogStats.CompareSubSourceRequests.name, False):
            input_res[CalcEventlogStats.CompareSubSourceRequests.name] = True
            input_res[CompareParams.SubSourceRequests1.name] = calc_task1.ctx['subsource_requests']
            input_res[CompareParams.SubSourceRequests2.name] = calc_task2.ctx['subsource_requests']

        task = self.create_subtask(
            task_type='COMPARE_EVENTLOG_STATS',
            description='compare results for tasks: {0} and {1}'.format(task_id1, task_id2),
            input_parameters=input_res
        )

        self.ctx[self._KEY_COMPARE_TASKS][key] = task.id
        return task.id

    def _get_stats_url(self):
        for key, calc_task_id in sorted(self.ctx[self._KEY_CALC_STATS_TASKS].items(), key=lambda (i, j): i):
            stats_res = apihelpers.list_task_resources(calc_task_id, resource_types.EVENTLOG_FULL_STATS)[0]
            full_stats_url = stats_res.url
            key = ",".join(key.split(",")[1:])
            self.ctx[self._KEY_REPORT_URLS][key].append(full_stats_url)

    def _get_dimensions(self):
        result = {}

        for calc_task_id in self.ctx[self._KEY_CALC_STATS_TASKS].values():
            calc_task = channel.sandbox.get_task(calc_task_id)
            if 'stat_dimensions' in calc_task.ctx:
                res_id = calc_task.ctx['stat_dimensions']
                if channel.sandbox.get_resource(res_id).is_ready():
                    path = self.sync_resource(res_id)
                    with open(path, 'r') as f:
                        for key, value in json.load(f).items():
                            result[key] = value
        return result

    def _forward_subsource_requests_diff_result(self):
        if not self.ctx.get(CalcEventlogStats.CompareSubSourceRequests.name, False):
            return

        resource_infos = []
        for compare_task_id in self.ctx[self._KEY_COMPARE_TASKS].itervalues():
            compare_task = channel.sandbox.get_task(compare_task_id)
            resource_id = compare_task.ctx[CompareEventlogStats._SUBSOURCE_REQUESTS_DIFF_RESOURCE_ID_KEY]
            resource_infos.append((compare_task_id, resource_id))
        self.ctx[self._KEY_SUBSOURCE_REQUESTS_DIFF] = resource_infos

    def _get_report(self, key, task_id, report_data):
        task = channel.sandbox.get_task(task_id)
        if task.new_status == ctt.Status.SUCCESS:
            compared_stats = None
            if 'compare_result' in task.ctx:
                compared_stats = task.ctx['compare_result']
            elif 'result_resource_id' in task.ctx:
                compared_stats = fu.json_load(
                    self.sync_resource(task.ctx['result_resource_id'])
                )
            else:
                self.set_info(
                    "Compare task '{}' hasn't results!".format(lb.task_link(task_id)),
                    do_escape=False
                )
            if compared_stats is not None:
                report_data[key]['data'] = compared_stats
        else:
            logging.debug("Task's status is not 'SUCCESS'! #{} - {}".format(task_id, task.new_status))

    def _save_run_data_to_yt(self, result):
        """SEARCH-4816"""
        try:
            perf_data = {
                "cmp_sandbox_task_id": self.id,
                "run_sandbox_task_id": self.ctx.get(self._KEY_DUMP_EVENTLOG_TASK),
                "run_sandbox_task_host": channel.sandbox.get_task(self.ctx.get(self._KEY_DUMP_EVENTLOG_TASK)).host,
                "testenv_database": self.ctx.get("testenv_database")
            }
            perf_data.update({i.replace(".", "_"): j for i, j in result.iteritems()})
            perf_data_path = "perf_data.json"
            with open(perf_data_path, "w") as f:
                json.dump(perf_data, f)
            with env.VirtualEnvironment() as venv:
                logging.info('Installing yt client')
                env.PipEnvironment('yandex-yt', version="0.8.11-0", venv=venv, use_wheel=True).prepare()
                env.PipEnvironment(
                    'yandex-yt-yson-bindings-skynet', version="0.2.25.post1", venv=venv, use_wheel=True
                ).prepare()
                script_path = mst.get_module_path() + '/dump_data_to_yt.py'
                cmd = [
                    venv.executable, script_path,
                    "--token", self.get_vault_data("SEARCH-ROBOT", "yt_blender_perf_token"),
                    "--data", perf_data_path,
                    "--table", "//home/search-runtime/test_data/blender/performance",
                ]
                run_process(" ".join(cmd), log_prefix="dump_perf_data_to_yt", shell=True)
        except Exception:
            logging.error("Unable to dump info to YT: %s", eh.shifted_traceback())

    def _extract_results_from_report(self, report, comp_name):
        time_stat = {"Q95", "MeanAvg", "HiAvg"}
        all_stat = {"Count"} | time_stat
        keys = set()
        for v in report.values():
            keys.update(v["data"].keys())

        # read results
        result = defaultdict(dict)
        for k in keys:
            result[k]["diff"] = []
            result[k]["values"] = []
            # result[k]["run_ids"] = []
            for run_id, shoot_stat in sorted(report.items()):
                d = shoot_stat["data"].get(k, [None, None, 0])
                # result[k]["run_ids"].append(run_id)
                result[k]["diff"].append(d[2])
                result[k]["values"].append(d[0:2])

        if comp_name == pde.ParallelComponent.UPPER:
            self._save_run_data_to_yt(result)

        dimensions = self._get_dimensions()

        # compare results, calc statistics
        for k, data in result.items():
            data["key"] = k
            name, stat = k.split(".")
            data["name"] = name
            data["stat"] = stat
            data["dimension"] = dimensions.get(name, "?")

            diff_values = data["diff"][1:]

            if self.ctx['use_new_calc_eventlog_stats']:
                first_shoot_values = [v[0] for v in data["values"]]
                second_shoot_values = [v[1] for v in data["values"]]
                (first_mean, first_std_dev) = calc_std_dev(first_shoot_values)
                (second_mean, second_std_dev) = calc_std_dev(second_shoot_values)
                if any(v is None for v in first_shoot_values):
                    logging.error("values with None: {}: {}".format(k, repr(first_shoot_values)))
                if any(v is None for v in second_shoot_values):
                    logging.error("values with None: {}: {}".format(k, repr(second_shoot_values)))
                data["mean"] = [first_mean, second_mean]
                data["std_dev"] = [first_std_dev, second_std_dev]
                data["mean_diff"] = second_mean - first_mean
                data["avg_std_dev"] = math.sqrt(((first_std_dev * first_std_dev) + (second_std_dev * second_std_dev)) / 2)
                if abs(data["mean_diff"]) > data["avg_std_dev"]:
                    data["adjusted_dev"] = math.copysign(abs(data["mean_diff"]) - data["avg_std_dev"], data["mean_diff"])
                else:
                    data["adjusted_dev"] = 0

            # replace None (means increase from 0 to not 0) to 1000%
            prepared_diff_values = [1000.0 if v is None else v for v in diff_values]

            # count rel diff
            data["avg"] = walsh_avg(prepared_diff_values)
            data["abs_avg"] = abs(data["avg"])

            # count abs diff and max value
            abs_diff = []
            max_value = 0
            for v1, v2 in data["values"][1:]:
                if v1 is None:
                    v1 = 0
                if v2 is None:
                    v2 = 0

                abs_diff.append(v2 - v1)

                max_value = max(max_value, max(v1, v2))

            data["abs_diff_avg"] = walsh_avg(abs_diff)
            data["max_value"] = max_value

            data["significance"] = calc_significance(data["values"][1:])
            data["allowed"] = allow_analytics(data, time_stat, all_stat)
            data["diff_type"] = "no diff"

        return result

    def get_significant_diff(self, data):
        return data["adjusted_dev"] if self.extended_stats else data["abs_diff_avg"]

    def calc_auto_stat(self, result, run_mode, auto_diff, comp_name):
        main_metrics, major_diffs, minor_diffs, warnings = self.count_diffs_middle(auto_diff, result, comp_name)
        if comp_name == pde.ParallelComponent.UPPER:
            has_blender_diff = self.count_diffs_upper(result)
            self.ctx["blender_diff"] = has_blender_diff
            self.set_info("Diff: {}".format(has_blender_diff))

        if utils.get_or_default(self.ctx, AutoDiffMode) == "formulas":
            auto_accepted, explanation = auto_accept_formulas(result)
        else:
            auto_accepted, explanation = auto_accept_middle(result, run_mode)

        if major_diffs or minor_diffs:
            diff_resource = self.create_resource(
                '{}, diff stats'.format(self.descr), 'diff_stats.html', resource_types.OTHER_RESOURCE,
                arch='any'
            )
            diff_contents = (
                "COMPARE_MIDDLESEARCH_BINARIES task {diff_task_link} gives the following diff:<br />\n".format(
                    diff_task_link=lb.task_link(self.id, str(self.id)),
                ) +
                "\n<br />\n".join([to_string(single_diff) for single_diff in itertools.chain(major_diffs, minor_diffs)])
            )
            fu.write_file(diff_resource.path, diff_contents)
            self.ctx['diff_resource_id'] = diff_resource.id
            self.mark_resource_ready(diff_resource.id)

        return {
            KEY_MAIN_METRICS: main_metrics,
            KEY_AUTO_ACCEPTED: auto_accepted,
            KEY_SIGNIFICANT_DIFF: not auto_accepted,
            KEY_EXPLANATION: explanation,
            KEY_SUMMARY: self.summarize(major_diffs, minor_diffs, warnings),
        }, {
            KEY_MAJOR_DIFFS: major_diffs,
            KEY_MINOR_DIFFS: minor_diffs,
            KEY_WARNINGS: warnings,
            KEY_ANALYTICS: sort_rows(result.itervalues(), comp_name),
        }

    @staticmethod
    def summarize(major_diffs, minor_diffs, warnings):
        if major_diffs:
            summary = "has diff (" + to_string(major_diffs[0])
            if len(major_diffs) > 1:
                summary += "; ...)"
            else:
                summary += ")"
        elif minor_diffs:
            summary = "has minor diff (" + to_string(minor_diffs[0])
            if len(minor_diffs) > 1:
                summary += "; ...)"
            else:
                summary += ")"
        elif warnings:
            summary = "has warnings (" + to_string(warnings[0])
            if len(warnings) > 1:
                summary += "; ...)"
            else:
                summary += ")"
        else:
            summary = ""
        return summary

    def count_diffs_upper(self, result):
        main_metrics_keys = [
            "RequestTime.Q50",
            "RequestTime.Q95",
            "RequestTime.Q99",
        ]
        return any(abs(result.get(k, {}).get("abs_avg", 0)) > 1 for k in main_metrics_keys)

    def count_diffs_middle(self, auto_diff, result, comp_name):
        main_metrics_keys = [
            "RequestTime.Count",
            "RequestTime.Avg",
            "RequestTime.Q50",
            "RequestTime.Q95",
            "RequestTime.Q99",
            "MetaSearchTime.Avg",
            "MetaSearchTime.Q50",
            "MetaSearchTime.Q95",
            "MetaSearchTime.Q99",
        ]
        # try to guess a type of diff
        main_metrics = {}
        major_diffs = []
        minor_diffs = []
        warnings = []

        for key, data in result.items():
            name, stat = key.split(".")

            if key in main_metrics_keys:
                main_metrics[key] = data

            if not auto_diff or not data["allowed"]:
                continue

            abs_avg = data["abs_avg"]

            default_threshold = 0.8
            q95_threshold = 1.5

            reask_threshold = 30.0
            strong_threshold = 5

            if (
                stat == "Q95" and
                "abs_avg" in result[name + ".HiAvg"] and
                (abs_avg < q95_threshold or result[name + ".HiAvg"]["abs_avg"] < q95_threshold)
            ):
                continue

            if (
                stat == "HiAvg" and
                "abs_avg" in result[name + ".Q95"] and
                (abs_avg < q95_threshold or result[name + ".Q95"]["abs_avg"] < q95_threshold)
            ):
                continue

            if abs_avg < default_threshold:
                continue

            if "ReAsk" in key and abs_avg < reask_threshold:
                continue

            if "UnanswersPercent" in key and abs_avg < strong_threshold:
                continue

            if data["significance"] < 1:
                continue

            if data["significance"] == 2 or abs_avg > strong_threshold:
                data["diff_type"] = "diff"
                if data["dimension"] == "microseconds" and abs(self.get_significant_diff(data) < 500):
                    minor_diffs.append(data)
                else:
                    major_diffs.append(data)
            else:
                data["diff_type"] = "warning"
                warnings.append(data)

        main_metrics_sorted = []
        for key in main_metrics_keys:
            if key in main_metrics:
                main_metrics_sorted.append(main_metrics[key])

        return (
            main_metrics_sorted,
            sort_rows(major_diffs, comp_name),
            sort_rows(minor_diffs, comp_name),
            sort_rows(warnings, comp_name)
        )

    def _get_stat(self, key, default=None, check_exist=False):
        if key in self._all_stats:
            return self._all_stats[key]
        ctx_stat = self.ctx.get(key)
        if check_exist:
            eh.ensure(ctx_stat, "key {} must be non-empty".format(key))
        return ctx_stat if ctx_stat else default

    def _check_stats_deviation_failure(self, q95_threshold):
        stat_deviation_threshold = float(utils.get_or_default(self.ctx, FailOnStatisticsDeviationThreshold))
        if stat_deviation_threshold <= 0:
            return

        unstable_stats = {}
        analytics = self._get_stat(KEY_ANALYTICS, default=[], check_exist=True)
        for stat in analytics:
            if stat["stat"] == "Q95" and not stat["name"].endswith("Wait"):
                means = stat.get("mean", [0, 0])
                devs = stat.get("std_dev", [0, 0])
                if len(means) == 2 and len(devs) == 2:
                    first_mean, second_mean = means
                    first_dev, second_dev = devs
                    if first_mean > 0 and first_dev > (stat_deviation_threshold * first_mean) and first_dev > q95_threshold:
                        unstable_stats.setdefault(stat["key"], {})["first"] = [first_mean, first_dev]
                    if second_mean > 0 and second_dev > (stat_deviation_threshold * second_mean) and second_dev > q95_threshold:
                        unstable_stats.setdefault(stat["key"], {})["second"] = [second_mean, second_dev]

        if unstable_stats:
            error_message = ['Unstable measures']
            for stat, measures in unstable_stats.items():
                if measures:
                    error_message.append('{}:\n{}\n'.format(stat, '\n'.join(
                        map(
                            lambda (measure, stats): "  {}: {} > {}% of {}({})".format(
                                measure, stats[1], int(stat_deviation_threshold * 100), stats[0], stat_deviation_threshold * stats[0]
                            ),
                            measures.items()
                        )
                    )))
            eh.check_failed("\n".join(error_message))

    def _check_grouping_rearrange_failure_threshold(self, q95_threshold):
        failed_groupings_stats = {stage: {} for stage in self._REARRANGE_STAGES}
        analytics = self._get_stat(KEY_ANALYTICS, default=[], check_exist=True)
        for stat in analytics:
            if stat["stat"] == "Q95":
                name_splitted = stat["name"].split('-')
                if len(name_splitted) == 2:
                    stage, grouping = name_splitted
                    if stage in self._REARRANGE_STAGES:
                        logging.info(
                            "Checking %s-%s performance degradation threshold: %.1f",
                            stage, grouping, self.get_significant_diff(stat)
                        )
                        if self.get_significant_diff(stat) >= q95_threshold:
                            failed_groupings_stats[stage][grouping] = self.get_significant_diff(stat)
        logging.info('Groupings rearrange degradation:\n%s', json.dumps(failed_groupings_stats, indent=2))

        if any(failed_groupings_stats.values()):
            error_message = ['Performance degrade is too heavy']
            for stage, groupings in failed_groupings_stats.items():
                if groupings:
                    error_message.append('{}:\n{}\n'.format(stage, '\n'.join(
                        map(lambda (grp, diff): "  {}: {} > {}".format(grp, diff, q95_threshold), groupings.items())
                    )))
            eh.check_failed("\n".join(error_message))

    def _check_failure_threshold(self, q95_threshold):
        analytics = self._get_stat(KEY_ANALYTICS, default=[], check_exist=True)
        for stat in analytics:
            if stat["key"] == "RequestTime.Q95":
                logging.info("Checking performance degradation threshold: %.1f", self.get_significant_diff(stat))
                eh.ensure(
                    self.get_significant_diff(stat) < q95_threshold,
                    "Performance degrade is too heavy: {} > {}. ".format(self.get_significant_diff(stat), q95_threshold)
                )
                logging.info("Checking performance degradation threshold passed")

    def _check_rearrange_rule_failure_threshold(self, q95_threshold):
        failed_rules_stats = {stage: {} for stage in self._REARRANGE_STAGES}
        analytics = self._get_stat(KEY_ANALYTICS, default=[], check_exist=True)
        for stat in analytics:
            if stat["stat"] == "Q95":
                name_splitted = stat["name"].split('-')
                if len(name_splitted) != 2:
                    continue
                rule, stage = name_splitted
                if stage in self._REARRANGE_STAGES:
                    logging.info(
                        "Checking %s-%s performance degradation threshold: %.1f",
                        rule, stage, self.get_significant_diff(stat)
                    )
                    if self.get_significant_diff(stat) >= q95_threshold:
                        failed_rules_stats[stage][rule] = self.get_significant_diff(stat)

        logging.info('Rules degradation:\n%s', json.dumps(failed_rules_stats, indent=2))

        if any(failed_rules_stats.values()):
            error_message = ['Performance degrade is too heavy']
            for stage, rules in failed_rules_stats.items():
                if rules:
                    error_message.append('{}:\n{}\n'.format(stage, '\n'.join(
                        map(lambda (rule, diff): "{}: {} > {}".format(rule, diff, q95_threshold), rules.items())
                    )))
            eh.check_failed("\n".join(error_message))

    def _check_response_time(self, comp_name):  # SEARCH-6879
        if comp_name == pde.ParallelComponent.UPPER:
            return
        else:
            for task_id in self.ctx[self._KEY_CALC_STATS_TASKS].itervalues():
                task = channel.sandbox.get_task(task_id)
                logging.debug('checking calc_stats_task {}'.format(task_id))
                res_time = task.ctx.get('first_requests_max_response_time', 0)
                self.set_info('Response time from task {}: {}'.format(lb.task_link(task_id), res_time), do_escape=False)
                if res_time > MAX_ALLOWED_DELAY:
                    eh.check_failed('Response delay of {} query is too big. Got {}, while limit is {}'.format(
                        task.ctx.get('first_requests_max_response_num', 0), res_time, MAX_ALLOWED_DELAY
                    ))

    def _check_subsource_errors(self, result):
        # type: (Mapping[str, Mapping[str, Any]]) -> None
        # result[key] = {"values": [[v1_old, v1_new], [v2_old, v2_new], ...], ...}

        def check_number_requests_error(percent_error, number_requests_total):
            if number_requests_total > 160:
                return percent_error < 0.20
            if number_requests_total > 80:
                return percent_error < 0.40
            if number_requests_total > 40:
                return percent_error < 0.60
            if number_requests_total > 20:
                return percent_error < 0.80
            if number_requests_total > 10:
                return percent_error < 1.00
            return True

        subsources = set(key[:key.rindex('_')] for key in result.iterkeys() if key.startswith('SubSource') and key.endswith('_Error.Count'))

        for subsource in subsources:
            # если считать requests_counts_error как (SubSourceTYPE_ReqLen.Count - SubSourceTYPE.Count),
            # то будут подсчитаны также hedged запросы (которые генерируют события SubSourceSkip), чего мы не хотим
            requests_counts_error = result[subsource + '_Error.Count']['values']
            # https://nda.ya.ru/3VMcx9
            requests_counts_ok = result[subsource + '.Count']['values'] if subsource + '.Count' in result else [[0, 0]] * len(requests_counts_error)

            for requests_count_error, requests_count_ok in zip(requests_counts_error, requests_counts_ok):
                # requests_count_error, requests_count_ok --- это парочки из двух значений (для старого и нового бинарника)
                # нас интересует только для нового

                requests_count_error = requests_count_error[1]
                requests_count_ok = requests_count_ok[1]
                if requests_count_error is None or requests_count_ok is None:
                    continue
                requests_count_total = requests_count_error + requests_count_ok
                percent_error = requests_count_error * 1.0 / max(requests_count_total, 1)

                if not check_number_requests_error(percent_error, requests_count_total):
                    error_message = 'Number of requests with errors for subsource {} is too big: {} requests of {} ({:.0f}%)'.format(
                        subsource, requests_count_error, requests_count_total, percent_error * 100
                    )
                    eh.check_failed(error_message)

    _KEY_DUMP_EVENTLOG_TASK = 'dump_eventlog_task_id'
    _KEY_CALC_STATS_TASKS = 'calc_stats_task_ids'
    _KEY_REPORT_URLS = 'report_urls'
    _KEY_COMPARE_TASKS = 'compare_tasks'
    _KEY_RESTARTED_TASKS = 'restarted_broken_tasks'
    _KEY_SUBSOURCE_REQUESTS_DIFF = 'subsource_requests_diff'
    _KEY_ANALYTICS_RESOURCE_ID = 'analytics_resource_id'

    class TDiffStat:
        resource_key = ''
        resource_description = ''

        def __init__(self, resource_key, resource_description):
            self.resource_key = resource_key
            self.resource_description = resource_description

    _HTML_RESOURCES_DIR = 'html_diffs'
    _KEY_ANALYTICS_HTML = 'analytics_html'
    _KEY_WARNINGS_HTML = 'warnings_html'
    _KEY_MAJOR_DIFFS_HTML = 'major_diffs_html'
    _KEY_MINOR_DIFFS_HTML = 'minor_diffs_html'
    _DIFF_STAT_RESOURCES = {
        KEY_ANALYTICS: TDiffStat(
            resource_key='analytics_html',
            resource_description='analytics',
        ),
        KEY_MAJOR_DIFFS: TDiffStat(
            resource_key='major_diffs_html',
            resource_description='major diffs',
        ),
        KEY_MINOR_DIFFS: TDiffStat(
            resource_key='minor_diffs_html',
            resource_description='minor diffs',
        ),
        KEY_WARNINGS: TDiffStat(
            resource_key='warnings_html',
            resource_description='warnings',
        ),
    }

    _REARRANGE_STAGES = [
        "RearrangeAfterMerge",
        "RearrangeAfterFetch",
        "CalcBlenderFeatures",
        "CalcBlenderMetaFeatures",
        "RearrangeAfterBlenderFactors",
        "RearrangeAfterIntentWeights",
        "RearrangeAfterBlend",
    ]


__Task__ = CompareMiddlesearchBinaries
