# -*- coding: utf-8 -*-

import json
import logging
import os
import socket
import time
import urlparse

import sandbox.common.types.client as ctc

from sandbox.projects import resource_types

from sandbox.projects.common import apihelpers
from sandbox.projects.common import file_utils as fu
from sandbox.projects.common import error_handlers as eh
from sandbox.projects.common import utils
from sandbox.projects.common.differ import coloring
from sandbox.projects.release_machine.core import const as rm_const
from sandbox.projects.release_machine import yappy as yappy_helper
from sandbox.projects.common.search import bugbanner
from sandbox.projects.common.search import queries as sq
from sandbox.projects.common.search import requester
from sandbox.projects.common.search.components import Middlesearch
from sandbox.projects.images.resources.task import ImagesProductionResourcesTask
from sandbox.projects.common.betas.beta_api import BetaApi

from sandbox.sandboxsdk import parameters
from sandbox.sandboxsdk import process
from sandbox.sandboxsdk.channel import channel
from sandbox.sandboxsdk.errors import SandboxTaskFailureError
from sandbox.sandboxsdk.task import SandboxTask


class Params:
    class CheckedBeta(parameters.SandboxStringParameter):
        name = 'checked_beta'
        description = 'Checked beta (default: http://upper-hamster.hamster.yandex.ru/search?)'
        default_value = 'http://upper-hamster.hamster.yandex.ru/search?'

    class SampleBeta(parameters.SandboxStringParameter):
        name = 'sample_beta'
        description = 'Sample beta (default: http://upper-hamster.hamster.yandex.ru/search?)'
        default_value = 'http://upper-hamster.hamster.yandex.ru/search?'

    class MMetaType(parameters.SandboxSelectParameter):
        name = 'mmeta_type'
        description = 'Mmeta type (WEB/IMAGES)'
        default_value = 'WEB'
        choices = (
            ('WEB', 'WEB'),
            ('IMGS', 'IMGS'),
        )

    class MMetaTag(parameters.SandboxStringParameter):
        name = 'mmeta_tag'
        description = 'Get mmeta resources from (skynet tag)'
        default_value = None

    class GetImgsResourcesViaMediasearchApi(parameters.SandboxBoolParameter):
        name = 'imgs_res_via_mediasearch_api'
        description = 'Get IMGS resources via common/mediasearch api'
        default_value = True

    class UsersQueries(parameters.LastReleasedResource):
        name = 'users_queries_resource_id'
        description = 'Users queries'
        resource_type = 'USERS_QUERIES'
        default_value = 0

    class LimitUsersQueries(parameters.SandboxIntegerParameter):
        name = 'limit_users_queries'
        description = 'Limit used queries (0 == no limit)'
        default_value = 0

    class AdditionalCgiParamsForSampleBeta(parameters.SandboxStringParameter):
        name = 'sample_beta_additional_cgi_params'
        description = 'Additional cgi params for sample betas'
        default_value = ''

    class AdditionalCgiParamsForCheckedBeta(parameters.SandboxStringParameter):
        name = 'checked_beta_additional_cgi_params'
        description = 'Additional cgi params for checked betas'
        default_value = ''

    params = [
        CheckedBeta,
        SampleBeta,
        MMetaType,
        MMetaTag,
        GetImgsResourcesViaMediasearchApi,
        UsersQueries,
        LimitUsersQueries,
        AdditionalCgiParamsForSampleBeta,
        AdditionalCgiParamsForCheckedBeta,
    ]


class ReportRequester(requester.Requester):
    # наследуемся от requester, чтобы направлять запросы
    # не прямо в используемый компонент, а через report

    def __init__(self, report_url, mmeta_type="WEB", skip_first_errors=0, additional_cgi_params=''):
        super(ReportRequester, self).__init__()
        self.report_url = report_url
        # получаем наш хост (для использования в параметре metahost)
        self.hostname = socket.gethostname()
        logging.info('redirect requests to host=' + self.hostname)
        # self.ignore_incomplete_read = True
        self.re_try_limit = 10
        self.mmeta_type = mmeta_type
        self.skip_errors = skip_first_errors
        self.additional_cgi_params = additional_cgi_params

    def build_request(self, req):
        ep = "{}:{}".format(self.hostname, int(self.search_component.port) + 1)
        if self.mmeta_type == "WEB":
            request = (
                self.report_url + req +
                "&no-tests=1"
                "&srcrwr=WEB:{ep}"
                "&srcrwr=WEB_MISSPELL:{ep}"
                "&timeout=4999999"
                .format(ep=ep) +
                self.additional_cgi_params
            )
        elif self.mmeta_type == "IMGS":
            request = (
                self.report_url + req +
                "&no-tests=1"
                "&timeout=4999999"
                "&metahost2=IMAGES:{ep}"
                "&metahost2=IMAGES_MISSPELL:{ep}"
                "&metahost2=IMAGESP:{ep}"
                "&metahost2=IMAGESP_MISSPELL:{ep}"
                "&metahost2=IMAGESCBIR:{ep}"
                "&metahost2=IMAGESQUICK:{ep}"
                "&metahost2=IMAGESQUICKP:{ep}".format(ep=ep) +
                self.additional_cgi_params
            )
        else:
            raise Exception('Not supported mmeta_type: {}'.format(self.mmeta_type))
        logging.debug('Built request: %s', request)
        return request

    def raise_on_fail_request(self, nreq, err):
        """
            Перегружаем выброс исключения при ошибке выполнения запроса -
            игнорируем первые N ошибок (понижаем чувствительность теста,
            чтобы не fail-иться при сбоях в репорте)
        """
        if not self.skip_errors:
            return super(ReportRequester, self).raise_on_fail_request(nreq, err)
        self.skip_errors -= 1


class WebSourceWorker(requester.Worker):
    def _on_response(self, nreq, resp):
        dst_host = None
        eventlog = json.loads(resp)['eventlog']
        eventlog_lines = eventlog.split('\n')
        for log_line in eventlog_lines:
            logging.debug("EventLogFrame: %s", log_line)
            t = log_line.rstrip('\n').split('\t')
            if 'TSourceStart' in t:
                start_idx = t.index('TSourceStart')
                if t[start_idx + 1] == 'WEB':
                    up = urlparse.urlparse(t[start_idx + 3])
                    dst_host = up.netloc
                    break
        if not dst_host:
            last_error = ""
            for log_line in eventlog_lines:
                t = log_line.rstrip('\n').split('\t')
                if len(t) >= 3 and t[2] == "TSourceError":
                    channel.task.set_info(
                        'Error with request: \n{}\n Details are in common.log.'.format(log_line)
                    )
                    logging.warning("Sleep for 5 minutes before next retry.")
                    # Sleep for 5 minutes if error was found
                    time.sleep(300)
                    last_error = log_line
            logging.error("Parsed event log without WEB subsource:\n%s\n\n\n", eventlog)
            raise Exception("Cannot extract WEB destination address, see logs for details. \n{}".format(last_error))
        super(WebSourceWorker, self)._on_response(nreq, dst_host)


def get_web_source_worker(qreqs, qresps, worker_id):
    WebSourceWorker(worker_id, qreqs, qresps).run()


def get_subdir(index_dir, subdir_prefix):
    for f in os.listdir(index_dir):
        fp = os.path.join(index_dir, f)
        if os.path.isdir(fp) and f.startswith(subdir_prefix):
            return fp
    raise Exception('not found dir {}* in {}'.format(subdir_prefix, index_dir))


class ReportJsonEventlog(requester.Requester):
    """
        Наследуемся от requester, чтобы получать от report-а eventlog и брать
        оттуда адрес WEB источника на который сделан первый SubSourceRequest.
    """

    def __init__(self, report_url, additional_cgi_params=''):
        super(ReportJsonEventlog, self).__init__(requester_worker=get_web_source_worker)
        self.report_url = report_url
        self.additional_cgi_params = additional_cgi_params
        self.re_try_limit = 5
        self.web_sources = {}  # сюда собираем адреса

    def build_request(self, req):
        return self.report_url + req + (
            "&no-tests=1"
            "&json_dump=eventlog"
            "&timeout=4999999"
        ) + self.additional_cgi_params

    def on_response(self, nreq, resp):
        self.web_sources[nreq] = resp
        super(ReportJsonEventlog, self).on_response(nreq, resp)


class CacheHitsStat:
    def __init__(self):
        self.count_reqs = 0.
        self.cache_hit = 0.

    def __repr__(self):
        return str(self.__dict__)


class CheckCachehitForBeta(bugbanner.BugBannerTask, ImagesProductionResourcesTask):
    """
        **Описание**

         Проверка консистентности распределения одних и тех-же запросов на одни и те же средние

         1. Обстреливаем бету и для каждого запроса запоминаем,
            на какой адрес уходит первый запрос к WEB подисточнику на noapache
         2. Повторяем обстрел беты и сохранение адресов WEB источника
         3. Проверяем статистику распределения одинаковых запросы из первого и второго обстрелов
            на один и тот же средний: если <95%, то задача FAIL-ится.
        (для IMGS проверка консистентности не выполняется - подразумевается, что достаточно WEB-а)

         Проверка попаданий запросами в кеш:

         1. Берём текущую (последнюю зарелизенную) версию middlesearch
         2. Берём конфиг/данные для middlesearch с хостов по указанному тегу
            (например, средние hamster-а: I@MSK_WEB_MMETA_HAMSTER)
         3. Поднимаем экземпляр middlesearch локально
         4. Обстреливать запросами указанные два адреса (эталон + бета), добавляя параметр
            (&metahost=...) для перенаправления запросов на поднятый средний

            1. Первый обстрел через sample (наполнение кеша)
            2. Второй обстрел снова через sample (сбор статистики по попаданию в набранный кеш - используем
               event CacheHit в eventlog-е))
            3. Третий обстрел через beta (сбор статистики по попаднию в кеш).
            4. Четвёртый обстрел через beta (сбор статистики по попаднию в кеш) - контрольный прогон,
               - проверяем, что на бете вторым проходом всё таки попадаем в кеш, т.е. не начали посылать
               на средний нестабильные запросы
               (рандом в pron-ах, etc)

         5. Суммируем/сравниваем статистику
         6. Проверяем, что попадание в кеш по всем коллекциям на четвёртом обстреле >75%, иначе FAIL-им задачу

        Результат:

        В ctx['web_req_distribution_matching'] сохраняем процент консистентности распределения
        одинаковых запросов на одни и теже средние.

        В ctx['cache_hit_ratio_diff'] сохраняем изменение доли запросов (в процентах) для которых результаты
        найдены в кеше (чем отрицательнее результат, тем хуже) + строим небольшую табличку с более детальным
        описанием статистики.

        В ячейке таблички три строки соответствуют 2,3,4-му обстрелам
        (проценты в скобках берутся относительно результата 2-го обстрела - первой строки в ячейке)
    """

    type = 'CHECK_CACHEHIT_FOR_BETA'
    input_parameters = Params.params
    execution_space = 70000  # 70 Gb
    required_ram = 61 << 10  # 64 Gb
    client_tags = ctc.Tag.LINUX_PRECISE & ~ctc.Tag.LXC
    mmeta_type = None

    @property
    def footer(self):
        stat = self.ctx.get("stat")
        if not stat:
            return None
        keys = sorted(stat.iterkeys(), reverse=True)
        return [
            {
                'helperName': '',
                'content': {
                    "<h4>Sample/checked beta statistics</h4>": {
                        "header": [
                            {"key": "coll", "title": "Collection"},
                            {"key": "reqs", "title": "Requests"},
                            {"key": "cache_hit", "title": "Cache hits"},
                            {"key": "cache_hit_ratio", "title": "Cache hits ratio"}
                        ],
                        "body": {
                            "coll": keys,
                            "reqs": [self._format_foot(stat[k]["reqs"]) for k in keys],
                            "cache_hit": [self._format_foot(stat[k]["cache_hit"]) for k in keys],
                            "cache_hit_ratio": [self._format_foot(stat[k]["cache_hit_ratio"]) for k in keys]
                        }
                    }
                }
            }
        ]

    @staticmethod
    def _format_foot(array):
        if len(array) != 3:
            return str(array)
        max_diff = -0.5
        diffs = ["n/a", "n/a"]
        if array[0] != 0:
            diffs[0] = 100 * (array[1] - array[0]) / array[0]
            diffs[1] = 100 * (array[2] - array[0]) / array[0]
        return (
            '{:2.2f}</br>'
            '<span style="color:blue">{:2.2f}</span> ({})</br>'
            '<span style="color:blue">{:2.2f}</span> ({})'
        ).format(
            round(array[0], 2),
            round(array[1], 2), coloring.color_diff(diffs[0], max_diff=max_diff, round_to=2),
            round(array[2], 2), coloring.color_diff(diffs[1], max_diff=max_diff, round_to=2),
        )

    def _wait_betas_consistency(self):
        token = self.get_vault_data(rm_const.COMMON_TOKEN_OWNER, rm_const.COMMON_TOKEN_NAME)
        api = BetaApi.fromurl(token=token)
        for beta_param in [Params.CheckedBeta, Params.SampleBeta]:
            checked_beta = utils.get_or_default(self.ctx, beta_param)
            checked_name = checked_beta.split('//')[-1].split('.')[0]
            if not api.beta_exists(checked_name):
                eh.check_failed("Beta '{}' doesn't exist in sdms! Unable to check status!".format(checked_name))
            yappy_helper.wait_consistency(self, api, checked_name)

    def on_enqueue(self):
        for param in (Params.AdditionalCgiParamsForSampleBeta, Params.AdditionalCgiParamsForCheckedBeta):
            if self.ctx[param.name] and self.ctx[param.name][0] != '&':
                self.ctx[param.name] = '&' + self.ctx[param.name]

    def on_execute(self):
        self._wait_betas_consistency()
        self.add_bugbanner(bugbanner.Banners.NoapacheUpper)
        self.mmeta_type = self.ctx[Params.MMetaType.name]
        # self._get_resources_dbg() # debug plugin (production resources import does not work in local sandbox)
        self._get_resources()
        if self.mmeta_type == 'WEB':
            # распределение запросов проверяем только для WEB
            # (подразумеваем, что для IMGS всё аналогично и если с WEB всё ok, то и с картинками всё ok)
            self._check_request_distribution()
        self._check_cachehit()

    def _check_request_distribution(self):
        logging.info('Check requests distribution')
        dist1 = self._get_beta_requests_distribution()
        dist2 = self._get_beta_requests_distribution()
        # compare distribution
        coincidents_count = 0
        for k in dist1:
            if dist1[k] == dist2[k]:
                coincidents_count += 1

        match_percent = coincidents_count * 100.0 / len(dist1)
        self.ctx['web_req_distribution_matching'] = match_percent
        match_bottom_limit = 86.0  # 92 is still not enough, FIXME(mvel): see why
        if match_percent < match_bottom_limit:
            logging.debug("Request distribution1: %s", dist1)
            logging.debug("Request distribution2: %s", dist2)
            raise SandboxTaskFailureError(
                'Invalid subsource WEB requests distribution, - match only {:.2f}% , but expected > {:.2f}%'.format(
                    match_percent,
                    match_bottom_limit,
                )
            )

    def _get_beta_requests_distribution(self):
        """
            return hash { request: address-first-asked-web-subsource }
        """
        rr = ReportJsonEventlog(self.ctx[Params.CheckedBeta.name], additional_cgi_params=self.ctx[Params.AdditionalCgiParamsForCheckedBeta.name])
        requests_path = channel.task.sync_resource(self.ctx[Params.UsersQueries.name])
        requests_iterator = (requester.format_users_queries_line(line) for line in open(requests_path))
        rr.init(
            requests_iterator,
            process_count=5,
            requests_limit=self.ctx[Params.LimitUsersQueries.name]
        )
        rr.send_requests()
        if rr.error:
            raise SandboxTaskFailureError(rr.error)
        return rr.web_sources

    def _get_resources(self):
        if 'resources_task' in self.ctx:
            # handle deleted subtask case
            if channel.sandbox.get_task(self.ctx['resources_task']).status == SandboxTask.Status.DELETED:
                del self.ctx['resources_task']
                logging.debug("Resources task is deleted, start once more")
            else:
                return

        logging.info('getting resources ...')
        get_resources_ctx = {}
        if self.mmeta_type == 'WEB':
            get_resources_ctx.update({
                'get_mmeta_cfg': True,
                'get_mmeta_data': True,
                'get_mmeta_models': True,
            })
            if self.ctx.get(Params.MMetaTag.name):
                get_resources_ctx['mmeta_tag'] = self.ctx[Params.MMetaTag.name]
            if self.ctx.get('mmeta_conf'):
                get_resources_ctx['mmeta_conf'] = self.ctx.get('mmeta_conf')
        elif self.mmeta_type == 'IMGS':
            get_resources_ctx.update({
                'get_img_mmeta_cfg': True,
            })
            if self.ctx.get(Params.MMetaTag.name):
                get_resources_ctx['img_mmeta_tag'] = self.ctx[Params.MMetaTag.name]
            if self.ctx.get('img_mmeta_conf'):
                get_resources_ctx['img_mmeta_conf'] = self.ctx.get('img_mmeta_conf')
            if not self._get_imgs_resources_via_mediaserch_api():
                get_resources_ctx.update({
                    'get_img_mmeta_img_data': True,
                    'get_img_mmeta_img_index': True,
                })
        else:
            eh.fail("Unsupported mmeta type: {}".format(self.mmeta_type))

        task = self.create_subtask(
            task_type='GET_MIDDLESEARCH_RESOURCES',
            input_parameters=get_resources_ctx,
            description='{}, get resources'.format(self.descr)
        )

        if self.mmeta_type == 'WEB':
            self.ctx['mmeta_cfg'] = task.ctx[get_res_name(task, "mmeta", "cfg")]
            self.ctx['mmeta_data'] = task.ctx[get_res_name(task, "mmeta", "data")]
            self.ctx['mmeta_models_archive'] = task.ctx[get_res_name(task, "mmeta", "models")]
            middle_binary = apihelpers.get_last_released_resource(
                resource_types.RANKING_MIDDLESEARCH_EXECUTABLE, arch='linux'
            )

            self.ctx['middle_binary'] = middle_binary.id

            self.ctx['evlogdump_binary'] = channel.sandbox.list_resources(
                resource_types.EVLOGDUMP_EXECUTABLE, omit_failed=True, task_id=middle_binary.task_id
            )[0].id
        elif self.mmeta_type == 'IMGS':
            self.ctx['mmeta_cfg'] = task.ctx[get_res_name(task, "img_mmeta", "cfg")]
            if self._get_imgs_resources_via_mediaserch_api():
                self.ctx['mmeta_rearrange_data'] = self._get_middlesearch_data()
                self.ctx['mmeta_rearrange_index'] = self._get_middlesearch_index()
            else:
                self.ctx['mmeta_rearrange_data'] = task.ctx[get_res_name(task, "img_mmeta", "img_data")]
                self.ctx['mmeta_rearrange_index'] = task.ctx[get_res_name(task, "img_mmeta", "img_index")]
            middle_binary = apihelpers.get_last_released_resource(
                resource_types.IMAGES_MIDDLESEARCH_EXECUTABLE, arch='linux'
            )

            self.ctx['middle_binary'] = middle_binary.id

            self.ctx['evlogdump_binary'] = channel.sandbox.list_resources(
                resource_types.EVLOGDUMP_EXECUTABLE, omit_failed=True, task_id=middle_binary.task_id
            )[0].id

        self.ctx['resources_task'] = task.id
        self.wait_all_tasks_completed([task.id])

    def _get_imgs_resources_via_mediaserch_api(self):
        return utils.get_or_default(self.ctx, Params.GetImgsResourcesViaMediasearchApi)

    def _get_resources_dbg(self):
        self.ctx['mmeta_cfg'] = 65
        self.ctx['mmeta_data'] = 63
        self.ctx['middle_binary'] = 66
        self.ctx['evlogdump_binary'] = 98
        self.ctx['mmeta_models_archive'] = 2

    def _check_cachehit(self):
        logging.info('checking cachehit ...')
        # наполняем кеш
        self._shoot_middlesearch_thru_report(self.ctx[Params.SampleBeta.name], self.ctx[Params.AdditionalCgiParamsForSampleBeta.name])
        # собираем статистику по попаданиям в кеш (повторно прогоняем те-же запросы)
        sample_stat = self._shoot_middlesearch_thru_report(self.ctx[Params.SampleBeta.name], self.ctx[Params.AdditionalCgiParamsForSampleBeta.name])
        # собираем статистику по попаданиям в кеш при использовании беты
        checked_stat = self._shoot_middlesearch_thru_report(self.ctx[Params.CheckedBeta.name], self.ctx[Params.AdditionalCgiParamsForCheckedBeta.name])
        # собираем контрольную статистику по попаданиям в кеш при использовании беты
        checked_stat2 = self._shoot_middlesearch_thru_report(self.ctx[Params.CheckedBeta.name], self.ctx[Params.AdditionalCgiParamsForCheckedBeta.name])
        # сравниваем статистику беты с образцом
        idx_sample = 0
        idx_beta = 1
        idx_beta2 = 2

        def data_set(sample_reqs, beta_reqs, beta2_reqs, sample_cache_hits, beta_cache_hits, beta2_cache_hits):
            return {
                'reqs': [
                    sample_reqs,
                    beta_reqs,
                    beta2_reqs,
                ],
                'cache_hit': [
                    sample_cache_hits,
                    beta_cache_hits,
                    beta2_cache_hits,
                ],
            }

        def append_data_set(dst, src):
            for k, v in dst.iteritems():
                v[idx_sample] += src[k][idx_sample]
                v[idx_beta] += src[k][idx_beta]
                v[idx_beta2] += src[k][idx_beta2]

        def cache_hit_ratio(stat, n):
            return (stat['cache_hit'][n] / stat['reqs'][n]) if stat['reqs'][n] else 0.

        def calc_cache_hit_ratio(stat):
            stat.update({
                'cache_hit_ratio': [
                    cache_hit_ratio(stat, idx_sample),
                    cache_hit_ratio(stat, idx_beta),
                    cache_hit_ratio(stat, idx_beta2),
                ],
            })

        stat = dict()
        stat['ALL'] = data_set(0, 0, 0, 0, 0, 0)
        empty_stat = CacheHitsStat()
        collections = set(sample_stat).union(set(checked_stat))
        for collection in collections:
            stat0 = sample_stat.get(collection, empty_stat)
            stat1 = checked_stat.get(collection, empty_stat)
            stat2 = checked_stat2.get(collection, empty_stat)

            stat[collection] = data_set(
                stat0.count_reqs,
                stat1.count_reqs,
                stat2.count_reqs,
                stat0.cache_hit,
                stat1.cache_hit,
                stat2.cache_hit,
            )
            append_data_set(stat['ALL'], stat[collection])

        for st in stat.itervalues():
            calc_cache_hit_ratio(st)
        self.ctx['stat'] = stat
        sample_hit_ratio = stat['ALL']['cache_hit_ratio'][idx_sample]
        beta_hit_ratio = stat['ALL']['cache_hit_ratio'][idx_beta]
        self.ctx['cache_hit_ratio_diff'] = (beta_hit_ratio - sample_hit_ratio) * 100
        beta2_hit_ratio = stat['ALL']['cache_hit_ratio'][idx_beta2]

        bottom_limit_reduce_cache_hit_ratio_perc = 10
        if beta2_hit_ratio < sample_hit_ratio and sample_hit_ratio > 0.1:
            lost_cache_hit_ratio_perc = (sample_hit_ratio - beta2_hit_ratio) * 100. / sample_hit_ratio
            if lost_cache_hit_ratio_perc > bottom_limit_reduce_cache_hit_ratio_perc:
                raise SandboxTaskFailureError('Reducing cache hit ratio exceed limit: {:.2f}% > {:.2f}%'.format(
                    lost_cache_hit_ratio_perc,
                    bottom_limit_reduce_cache_hit_ratio_perc,
                ))

    def _shoot_middlesearch_thru_report(self, report_url, additional_cgi_params):
        logging.info('shoot middlesearch thru {}'.format(report_url))
        # допускаем (игнорируем) ошибки на 1% запросов
        # (хотя может получится и больший процент, если запросов меньше чем указано в опции-ограничителе)
        skip_first_errors = self.ctx.get(Params.LimitUsersQueries.name, 0) / 100
        rr = ReportRequester(report_url, self.mmeta_type, skip_first_errors=skip_first_errors, additional_cgi_params=additional_cgi_params)
        requests_path = channel.task.sync_resource(self.ctx[Params.UsersQueries.name])
        requests_iterator = (requester.format_users_queries_line(line) for line in open(requests_path))

        mmeta_exe_path = channel.task.sync_resource(self.ctx['middle_binary'])
        mmeta_cfg_path = channel.task.sync_resource(self.ctx['mmeta_cfg'])
        pure_dir = None
        archive_model_path = None
        rearrange_dir = None
        rearrange_index_dir = None
        evlogdump_exe_path = channel.task.sync_resource(self.ctx['evlogdump_binary'])

        if self.mmeta_type == 'WEB':
            mmeta_data_path = channel.task.sync_resource(self.ctx['mmeta_data'])
            archive_model_path = channel.task.sync_resource(self.ctx['mmeta_models_archive'])
            pure_dir = os.path.join(mmeta_data_path, 'pure')
            rearrange_dir = os.path.join(mmeta_data_path, 'rearrange')
        elif self.mmeta_type == 'IMGS':
            rearrange_dir = channel.task.sync_resource(self.ctx['mmeta_rearrange_data'])
            rearrange_index_dir = channel.task.sync_resource(self.ctx['mmeta_rearrange_index'])
            if self._get_imgs_resources_via_mediaserch_api():
                rearrange_dir += '/rearrange'
            else:
                rearrange_index_dir = get_subdir(rearrange_index_dir, 'imgmmeta-')

        search_component = Middlesearch(
            is_int=False,
            work_dir=channel.task.abs_path(),
            binary=mmeta_exe_path,
            config_file=mmeta_cfg_path,
            pure_dir=pure_dir,
            rearrange_dir=rearrange_dir,
            rearrange_index_dir=rearrange_index_dir,
            archive_model_path=archive_model_path,
            # port=8666, # for debug in local sandbox
        )
        rr.use(
            requests_iterator,
            search_component,
            process_count=5,
            requests_limit=self.ctx[Params.LimitUsersQueries.name]
        )
        if rr.error:
            raise SandboxTaskFailureError(rr.error)

        # парсим eventlog
        proc = process.run_process(
            [
                evlogdump_exe_path,
                '-i',
                'EnqueueYSRequest,ContextCreated,CacheHit,MakePageEnd',
                search_component.get_event_log_path(),
            ],
            log_prefix="parse_evlog",
            wait=True,
            check=True,
            outputs_to_one_file=False,
        )

        return self._collect_stats(proc.stdout_path)

    @staticmethod
    def _collect_stats(stdout_path):
        stat = {}  # {'collection': cache_hits_stat() }
        collection = None
        frame = -1
        text = None
        requests = {}  # searched text
        line_counter = 0
        for evline in fu.read_line_by_line(stdout_path):
            line_counter += 1
            tk = evline.split('\t')
            ev = tk[2]
            if ev == 'EnqueueYSRequest':
                frame = int(tk[1])
                collection = tk[6].lstrip('/')
                if collection not in stat:
                    stat[collection] = CacheHitsStat()
            elif ev == 'ContextCreated':
                cgi = sq.parse_cgi_params(tk[3])
                text_lst = cgi.get('text', None)
                if text_lst:
                    text = text_lst[0]
                    req_mark = collection + '\t' + text
                    if req_mark in requests:
                        text = None  # already proccessed req - ignore it
                    else:
                        requests[req_mark] = frame
                        stat[collection].count_reqs += 1
            elif ev == 'MakePageEnd':
                collection = None
                text = None
            elif ev == 'CacheHit':
                if text is not None:  # avoid duplicate handling statistics on same reqs
                    stat[collection].cache_hit += float(tk[4])
                collection = None
                text = None
        stat.pop('admin')  # игнорируем служебные запросы
        logging.info("%s lines of eventlog processed", line_counter)
        logging.info("Stats collected:\n%s", json.dumps(json.loads(str(stat).replace("'", '"')), indent=2))
        return stat


def get_res_name(task, host_type, res_type):
    res_name = task.ctx.get("{}_conf".format(host_type), "").strip("-") or host_type
    return '{}_{}'.format(res_name, res_type)


__Task__ = CheckCachehitForBeta
