# -*- coding: utf-8 -*-

import os
import re
import logging

import sandbox.common.types.client as ctc
import sandbox.projects.websearch.middlesearch.resources as ms_resources

from sandbox.projects import resource_types
from sandbox.sandboxsdk import parameters
from sandbox.sandboxsdk.channel import channel
from sandbox.sandboxsdk.task import SandboxTask
from sandbox.projects.common.search import components as sc
from sandbox.projects.common import apihelpers
from sandbox.projects.common import dolbilka
from sandbox.projects.common import error_handlers as eh
from sandbox.projects.common import file_utils as fu
from sandbox.projects.common import time_utils as tu
from sandbox.projects.common import string
from sandbox.projects.common import utils
from sandbox.projects.common.search import settings as media_settings
from sandbox.projects.common.search import bugbanner
from sandbox.projects.common.search.eventlog import eventlog
from sandbox.projects.common.search.eventlog import evlog_queries
from sandbox.projects.websearch.middlesearch import resources as ms_res

# quantile for queries length
QUANTILE = 0.99


class PlanGenParams(object):
    class GenSearchPlan(parameters.SandboxBoolParameter):
        name = 'gen_search_plan'
        description = 'Generate search plan'
        default_value = True
        group = 'Plan generation parameters'

    class GenFactorsPlan(parameters.SandboxBoolParameter):
        name = 'gen_factors_plan'
        description = 'Generate factors plan'
        default_value = True
        group = 'Plan generation parameters'

    class GenSnippetsPlan(parameters.SandboxBoolParameter):
        name = 'gen_snippets_plan'
        description = 'Generate snippets plan'
        default_value = True
        group = 'Plan generation parameters'

    class GenSnippetsExplPlan(parameters.SandboxBoolParameter):
        name = 'gen_snippets_explanation_plan'
        description = 'Generate snippets explanation plan'
        default_value = False
        group = 'Plan generation parameters'

    class GenSnippetsSeaPlan(parameters.SandboxBoolParameter):
        name = 'gen_snippets_extended_plan'
        description = 'Generate snippets extended plan'
        default_value = False
        group = 'Plan generation parameters'

    class GenAllPlan(parameters.SandboxBoolParameter):
        name = 'gen_all_plan'
        description = 'Generate plan with all queries'
        default_value = False
        group = 'Plan generation parameters'

    class SavePlansAsQueries(parameters.SandboxBoolParameter):
        name = 'save_plans_as_queries'
        description = 'Save plans as queries'
        group = 'Plan generation parameters'

    class MiddleExe(parameters.ResourceSelector):
        name = 'middlesearch_executable'
        description = 'Ranking middlesearch executable (default = last released)'
        resource_type = (
            ms_resources.RankingMiddlesearchExecutable,
            resource_types.VIDEO_RANKING_MIDDLESEARCH_EXECUTABLE,
        )
        group = 'Middlesearch parameters'

    class MiddleConfig(parameters.ResourceSelector):
        name = 'middlesearch_meta_config'
        description = 'Ranking middlesearch config'
        resource_type = (
            resource_types.MIDDLESEARCH_CONFIG,
            resource_types.VIDEO_MIDDLESEARCH_CONFIG,
        )
        group = 'Middlesearch parameters'

    class MiddleData(parameters.ResourceSelector):
        """
            Not required. If not specified, task tries to use last resource with 'TE_web_production_mmeta_data' attr
        """
        name = 'middlesearch_data'
        description = 'Ranking middlesearch data'
        resource_type = (
            resource_types.MIDDLESEARCH_DATA,
            resource_types.VIDEO_MIDDLESEARCH_DATA,
        )
        required = False
        group = 'Middlesearch parameters'

    class MiddleIndex(parameters.ResourceSelector):
        name = 'middlesearch_index'
        description = 'Ranking middlesearch index'
        resource_type = (
            resource_types.VIDEO_MIDDLESEARCH_INDEX,
        )
        required = False
        group = 'Middlesearch parameters'

    class MiddleModels(parameters.ResourceSelector):
        """
            Not required. If not specified, task tries to use last resource with 'current_production' attr
        """
        name = 'middlesearch_models_archive_resource_id'
        description = 'Middlesearch models archive'
        resource_type = (
            resource_types.DYNAMIC_MODELS_ARCHIVE,
            resource_types.VIDEO_MIDDLE_DYNAMIC_MODELS_ARCHIVE,
        )
        required = False
        group = 'Middlesearch parameters'

    class MiddlePlan(parameters.ResourceSelector):
        name = 'dolbilo_plan_resource_id'
        description = 'Middlesearch mmeta plan'
        resource_type = (
            ms_res.MiddlesearchPlan,
            resource_types.BASESEARCH_PLAN,
            resource_types.VIDEO_MIDDLESEARCH_PLAN,
        )
        group = 'Middlesearch parameters'

    class BasesearchExe(parameters.ResourceSelector):
        name = 'executable_resource_id'
        description = 'Basesearch executable (default = last released)'
        resource_type = (
            resource_types.BASESEARCH_EXECUTABLE,
            resource_types.RTYSERVER_EXECUTABLE,
            resource_types.VIDEOSEARCH_EXECUTABLE,
        )
        group = 'Basesearch parameters'

    class BasesearchConfig(parameters.ResourceSelector):
        name = 'config_resource_id'
        description = 'Basesearch config'
        resource_type = (
            resource_types.SEARCH_CONFIG,
            resource_types.VIDEO_SEARCH_CONFIG,
        )
        group = 'Basesearch parameters'

    class BasesearchDatabase(parameters.ResourceSelector):
        name = 'search_database_resource_id'
        description = 'Basesearch database'
        resource_type = (
            resource_types.SEARCH_DATABASE,
            resource_types.VIDEO_SEARCH_DATABASE,
        )
        group = 'Basesearch parameters'

    class RequestsNumber(parameters.SandboxIntegerParameter):
        name = 'requests_number'
        description = 'Middlesearch request count'
        default_value = 100000

    class GetUncachedQueries(parameters.SandboxBoolParameter):
        """
            Parameter for PRIEMKA_BASESEARCH_DATABASE
            allows to collect only queries with parameter '&pron=nocachehit'
        """
        name = 'get_only_uncached_queries'
        description = 'Create plan for basesearch only with uncached queries'

    class MaxSnippetsPerRequest(parameters.SandboxIntegerParameter):
        name = 'max_snippets_per_request'
        description = 'Max snippets per request'
        default_value = 1000  # less values lead to overload our single basesearch

    class DisableCache(parameters.SandboxBoolParameter):
        name = 'ignore_cache'
        description = 'Disable cache'
        default_value = False

    class IgnoreIndexGen(parameters.SandboxBoolParameter):
        name = 'ignore_index_gen'
        description = 'Ignore index generation'
        group = 'Parameters for snippets plan'
        default_value = True

    class GroupingSize(parameters.SandboxIntegerParameter):
        name = 'grouping_size'
        description = 'Custom grouping size'
        group = 'Parameters for snippets plan'
        default_value = 1000

    class MaxSimReqs(dolbilka.DolbilkaMaximumSimultaneousRequests):
        default_value = 4  # do not overload single basesearch

    class AddAttrs(parameters.SandboxStringParameter):
        name = 'resource_attrs'
        description = 'Set additional attrs to resources (ex.: attr1=v1, attr2=v2)'
        do_not_copy = True

    class SavePatchedQueries(parameters.SandboxBoolParameter):
        name = 'save_patched_queries'
        description = 'Save patched queries'
        group = 'Debug options'

    class BaseArchiveModel(parameters.ResourceSelector):
        name = "basesearch_models_archive_resource_id"
        description = "Base Models archive"
        resource_type = [resource_types.DYNAMIC_MODELS_ARCHIVE_BASE]
        group = "Basesearch parameters"

    class BasesearchQueriesCustomParams(parameters.SandboxStringParameter):
        name = 'basesearch_queries_custom_params'
        description = 'extra params per each basesearch query'
        default_value = ''
        do_not_copy = True

    params = (
        BaseArchiveModel,
        GenSearchPlan,
        GenFactorsPlan,
        GenSnippetsPlan,
        GenSnippetsExplPlan,
        GenSnippetsSeaPlan,
        GenAllPlan,
        SavePlansAsQueries,
        MiddleExe,
        MiddleConfig,
        MiddleData,
        MiddleIndex,
        MiddleModels,
        MiddlePlan,
        BasesearchExe,
        BasesearchConfig,
        BasesearchDatabase,
        RequestsNumber,
        GetUncachedQueries,
        DisableCache,
        MaxSimReqs,
        MaxSnippetsPerRequest,
        AddAttrs,
        IgnoreIndexGen,
        GroupingSize,
        SavePatchedQueries,
        BasesearchQueriesCustomParams,
    )


_PLAN_TYPES_KEY = 'plans_to_generate'


class GenerateAllPlans(bugbanner.BugBannerTask):
    """
        **Описание**

        * Получает план для среднего поиска
        * Патчит его
        * Обстреливает средний полученным планом
        * Получает eventlog
        * Из eventlog-а извлекает все типы запросов (search, factors, snippets) на базовые
        * Из полученных запросов генерирует планы на базовые

        **Ресурсы**

            *Обязательные*:

            * basesearch + middlesearch configs and databases
            * middlesearch plan

            *Необязательные*:

            Если не задавать явно эти ресурсы, они будут взяты из последнего релиза.

            * basesearch+middlesearch executable

        **Параметры**

        * **Generate search plan**: получать ли поисковый план,
        * **Generate factors plan**: получать ли факторный план,
        * **Generate snippets plan**: получать ли сниппетный план,
        * **Generate snippets explanation plan**: получать ли сниппетный план с параметром, который
          запрашивает факторы кандидатов в сниппеты,
        * **Generate snippets extended plan**: получать ли сниппетный план с дополнительным cgi-параметром
          "&snip=exps=fstann2&gta=sea". Это включает отдачу расширенного ответа из сниппетовщика в gta-атрибуте "sea",
        * **Generate plan with all queries**: получать ли комбирированный план из запросов всех типов,
        * **Save plans as queries**: сохранять ли планы дополнительно в виде запросов
        * **Middlesearch request count**: сколько запросов брать из плана для среднего поиска
        * **Create plan for basesearch only with not cached queries**: для планов выбирать только
          некешированные запросы (нужно для таска PRIEMKA_BASESEARCH_DATABASE_2)
        * **Ignore index generation**: добавляет в сниппетные запросы дополнительный cgi-параметр "&ignoreGen=da"

        **Создаваемые ресурсы**

            BASESEARCH_PLAN
            PLAIN_TEXT_QUERIES
    """

    type = 'GENERATE_ALL_PLANS'
    required_ram = 91 << 10
    input_parameters = PlanGenParams.params
    execution_space = 150000
    client_tags = ctc.Tag.LINUX_PRECISE

    def on_failure(self):
        eventlog.locate_instability_problems(self, search_resource_id=self.ctx[PlanGenParams.MiddleExe.name])
        bugbanner.BugBannerTask.on_failure(self)

    @property
    def footer(self):
        stats = self.ctx.get('stats', {})
        plan_names = self.ctx.get(_PLAN_TYPES_KEY, [])
        if not stats or not plan_names:
            return None
        foot = {
            "<h4>Stats</h4>": {
                "header": [
                    {"key": "type", "title": "Query type"},
                    {"key": "amount", "title": "Amount"},
                    {"key": "quantile", "title": "Length quantile 99%"}
                ],
                "body": {
                    "type": plan_names,
                    "amount": [stats.get(name, {}).get("queries_amount", "???") for name in plan_names],
                    "quantile": [stats.get(name, {}).get("queries_len_quantile", "???") for name in plan_names],
                }
            }
        }
        return [{
            'helperName': '',
            'content': foot
        }]

    def on_enqueue(self):
        SandboxTask.on_enqueue(self)
        channel.task = self
        self._create_resources()
        self.ctx['eventlog_out_resource_id'] = self.create_resource(
            'eventlog', 'event.log', resource_types.EVENTLOG_DUMP, arch='any'
        ).id

    basesearch_database_id = None
    aux_resource_attrs = None
    shard_instance = None

    def on_execute(self):
        self.add_bugbanner(bugbanner.Banners.WebMiddleSearch)

        eh.ensure(
            self.arch == 'linux',
            "Plan generator requires 'linux' arch, please choose it. "
            "Archs 'any' or 'freebsd' are not supported."
        )

        self.basesearch_database_id = self.ctx[PlanGenParams.BasesearchDatabase.name]
        self._set_undefined_resources_with_defaults()

        self.aux_resource_attrs = string.parse_attrs(utils.get_or_default(self.ctx, PlanGenParams.AddAttrs))
        base_db = channel.sandbox.get_resource(self.basesearch_database_id)
        self.shard_instance = (
            base_db.attributes.get(media_settings.SHARD_INSTANCE_ATTRIBUTE_NAME) or
            base_db.attributes.get('shard_name', 'unknown')
        )
        self.aux_resource_attrs.update({
            media_settings.SHARD_INSTANCE_ATTRIBUTE_NAME: self.shard_instance,
            'source_basesearch_database_id': str(base_db.id),
        })

        basesearch, middlesearch = self._create_searchers()
        d_executor = dolbilka.DolbilkaExecutor()
        d_planner = dolbilka.DolbilkaPlanner()

        if self.ctx.get(PlanGenParams.DisableCache.name):
            middlesearch.disable_cache()

        patched_plan_path = self._get_patched_plan_path(
            d_executor, d_planner,
            middlesearch.port
        )

        self._shoot(basesearch, middlesearch, d_executor, patched_plan_path)
        extracted_queries_path = self.abs_path("extracted_queries.txt")
        self._extract_requests(middlesearch.get_event_log_path(), extracted_queries_path)

        self.ctx['stats'] = {}
        for type_queries in self.ctx[_PLAN_TYPES_KEY]:
            self._create_plan_for_basesearch(
                type_queries,
                extracted_queries_path,
                d_planner,
            )

    @staticmethod
    def _personalize_attrs(attrs, q_type):
        """
            Работает только если в имени атрибута есть "{}", иначе возвращает исходные атрибуты
            :return: список атрибутов, персонализированных для каждого типа запросов
        """
        return {key.format(q_type): val for key, val in attrs.iteritems()}

    def _create_resources(self):
        base_db = channel.sandbox.get_resource(self.ctx[PlanGenParams.BasesearchDatabase.name])
        shard_instance = (
            base_db.attributes.get('shard_instance') or
            base_db.attributes.get('shard_name', 'UNKNOWN_SHARD_INSTANCE')
        )

        self._get_list_of_types()
        for type_queries in self.ctx[_PLAN_TYPES_KEY]:
            logging.info('creating resource for "%s"', type_queries)
            self.ctx['{}_plan_resource_id'.format(type_queries)] = str(self.create_resource(
                'Synt base {} plan {}, {}'.format(type_queries, shard_instance, tu.date_ymd()),
                'basesearch_{}.plan'.format(type_queries),
                resource_types.BASESEARCH_PLAN, arch='any'
            ).id)

    def _get_list_of_types(self):
        plan_types = []
        if utils.get_or_default(self.ctx, PlanGenParams.GenSearchPlan):
            plan_types.append('search')
        if utils.get_or_default(self.ctx, PlanGenParams.GenFactorsPlan):
            plan_types.append('factors')
        if utils.get_or_default(self.ctx, PlanGenParams.GenSnippetsPlan):
            plan_types.append('snippets')
        if utils.get_or_default(self.ctx, PlanGenParams.GenSnippetsExplPlan):
            plan_types.append('snippets_expl')
        if utils.get_or_default(self.ctx, PlanGenParams.GenSnippetsSeaPlan):
            plan_types.append('snippets_sea')
        if utils.get_or_default(self.ctx, PlanGenParams.GenAllPlan):
            plan_types.append('all')
        eh.verify(plan_types, 'There is no any chosen plan to generate')
        self.ctx[_PLAN_TYPES_KEY] = plan_types

    def _set_undefined_resources_with_defaults(self):
        """
            Gets middlesearch and basesearch executable resource_ids.
            If executable not found in task, get it from production
        """
        if not self.ctx.get(PlanGenParams.MiddleExe.name):
            logging.info("MiddleExe is not set, obtaining last stable middlesearch")
            self.ctx[PlanGenParams.MiddleExe.name] = self._get_last_released_middlesearch_id()
        if not self.ctx.get(PlanGenParams.BasesearchExe.name):
            logging.info("BasesearchExe is not set, obtaining last stable basesearch")
            self.ctx[PlanGenParams.BasesearchExe.name] = self._get_last_released_basesearch_id()
        if not self.ctx.get(PlanGenParams.MiddleConfig.name):
            logging.info("MiddleConfig is not set, obtaining it by last prod attribute")
            self.ctx[PlanGenParams.MiddleConfig.name] = utils.get_and_check_last_resource_with_attribute(
                resource_type=PlanGenParams.MiddleConfig.resource_type[0],
                attr_name="TE_web_hamster_mmeta_cfg"
            ).id
        if not self.ctx.get(PlanGenParams.BasesearchConfig.name):
            logging.info("BasesearchConfig is not set, obtaining it by last prod attribute")
            self.ctx[PlanGenParams.BasesearchConfig.name] = utils.get_and_check_last_resource_with_attribute(
                resource_type=PlanGenParams.BasesearchConfig.resource_type[0],
                attr_name="TE_web_base_prod_resources_PlatinumTier0"
            ).id

        if not self.ctx.get(PlanGenParams.BaseArchiveModel.name):
            logging.info("Basesearch Models Archive is not set, obtaining it by last prod attribute")
            self.ctx[PlanGenParams.BaseArchiveModel.name] = utils.get_and_check_last_resource_with_attribute(
                resource_type=PlanGenParams.BaseArchiveModel.resource_type[0],
            ).id

    def _get_type_and_tier(self):
        zones_table = {
            'WebTier0': 'WEB',
            'WebTier1': 'WEB',
            'PlatinumTier0': 'PLATINUM',
        }

        self.sync_resource(self.basesearch_database_id)
        base_data = channel.sandbox.get_resource(self.basesearch_database_id)
        stamp_tag_path = os.path.join(base_data.path, 'stamp.TAG')
        logging.info('stamp_tag_path = {}'.format(stamp_tag_path))

        try:
            with open(stamp_tag_path) as stamp_file:
                for line in stamp_file:
                    line = line.strip()
                    if line.startswith('SearchZone='):
                        line = line.replace('SearchZone=', '')
                        if line in zones_table:
                            return zones_table[line], line
                        else:
                            return 'WEB', None
        except Exception:
            pass

        shard_conf_path = os.path.join(base_data.path, 'shard.conf')
        with open(shard_conf_path) as shard_conf_file:
            for line in shard_conf_file:
                tabs = line.strip().split()
                if not tabs:
                    continue
                if tabs[0] == "%shard":
                    if tabs[1].startswith("imgsidx"):
                        return "IMAGES", None
                    elif tabs[1].startswith("imgscbr"):
                        return "IMAGESCBIR", None
                    elif tabs[1].startswith("vidsidx"):
                        return "VIDEO", None

        return 'WEB', None

    def _create_searchers(self):
        """
            Returns searchers with parameters from input_parameters of task
            Patches config with custom MaxSnippetsPerRequest parameter
        """
        eventlog_resource = channel.sandbox.get_resource(self.ctx['eventlog_out_resource_id'])
        basesearch_type, basesearch_tier = self._get_type_and_tier()
        logging.info('basesearch_type = %s', basesearch_type)
        base = sc.get_basesearch_ex(
            archive_model_id=self.ctx[PlanGenParams.BaseArchiveModel.name],
            binary_id=self.ctx[PlanGenParams.BasesearchExe.name],
            config_id=self.ctx[PlanGenParams.BasesearchConfig.name],
            database_id=self.ctx[PlanGenParams.BasesearchDatabase.name]
        )
        basesearches = [{
            'basesearch_type': basesearch_type,
            'basesearch_tier': basesearch_tier,
            'hosts_and_ports': [('localhost', base.port)]
        }]
        middle = sc.get_middlesearch_ex(
            binary_id=self.ctx[PlanGenParams.MiddleExe.name],
            config_id=self.ctx[PlanGenParams.MiddleConfig.name],
            data_id=self._get_middle_data_id(),
            index_id=self.ctx.get(PlanGenParams.MiddleIndex.name),
            archive_model_id=self._get_middle_models_id(),
            event_log=eventlog_resource.path,
            basesearches=basesearches,
            use_verify_stderr=True,
        )
        middle.replace_config_parameter(
            'MaxSnippetsPerRequest',
            utils.get_or_default(self.ctx, PlanGenParams.MaxSnippetsPerRequest)
        )
        middle.replace_config_parameter(
            'Collection/ConnStatOptions',
            ''
        )

        return base, middle

    def _get_patched_plan_path(self, d_executor, d_planner, middlesearch_port):
        """
            Gets middlesearch plan from task context, convert it to queries,
            patches queries with regular expression
            creates new plan from patched queries
            returns new plan path
        """
        mmeta_plan_path = self._get_plan_path()
        logging.info('size of mmeta_plan_path = %s bytes', os.path.getsize(mmeta_plan_path))

        source_queries_path = self.path('source_queries.txt')
        patched_queries_path = self.path('patched_queries.txt')

        d_executor.get_plan_requests(
            plan=mmeta_plan_path,
            result_file=source_queries_path,
        )
        self._patch_queries(source_queries_path, patched_queries_path)

        logging.info('create plan with patched queries for middlesearch')
        plan_path = self.path('patched_queries.plan')
        d_planner.create_plan(
            queries_path=patched_queries_path,
            result_path=plan_path,
            host='localhost',
            port=middlesearch_port,
            rps=100
        )
        logging.info('Patched plan for middlesearch size = %s bytes', os.path.getsize(plan_path))
        return plan_path

    def _get_plan_path(self):
        logging.info('getting plan path for middlesearch')
        return self.sync_resource(self.ctx[PlanGenParams.MiddlePlan.name])

    def _patch_queries(self, source_queries_path, patched_queries_path):
        """
            Removes '&haha=da' from queries.
            For snippet plans we need to replace grouping to '&g=1.d.1000.1.'
        """
        logging.info('Source queries size: %s bytes', os.path.getsize(source_queries_path))
        logging.info('Patch source queries: remove haha=da, add timeout and waitall=da')
        source_queries = fu.read_lines(source_queries_path)
        # append waitall=da and timeout 2s to prevent timeout failures, see SEARCH-596
        extra_params = '&waitall=da&timeout=2000000'
        patched_queries = [q.replace('&haha=da', '') + extra_params for q in source_queries]

        grouping_size = self.ctx.get(PlanGenParams.GroupingSize.name, 0)
        if grouping_size and any(['snippets' in plan_type for plan_type in self.ctx[_PLAN_TYPES_KEY]]):
            logging.info('Set grouping size to {}'.format(grouping_size))
            patched_queries = [re.sub(r'&g=1\.([^.]+)\.\d+\.\d+\.', r'&g=1.\1.1000.1.', q) for q in patched_queries]

        eh.verify(patched_queries, 'Cannot get correct patched queries')
        if not self.ctx.get(PlanGenParams.RequestsNumber.name):
            self.ctx[PlanGenParams.RequestsNumber.name] = len(patched_queries)
            logging.info("Set queries number = %s", self.ctx[PlanGenParams.RequestsNumber.name])
        fu.write_lines(patched_queries_path, patched_queries[:self.ctx[PlanGenParams.RequestsNumber.name]])

        if utils.get_or_default(self.ctx, PlanGenParams.SavePatchedQueries):
            evlog_queries.save_as_plain_text_queries_resource(
                task=self,
                queries_file_name=patched_queries_path,
                description='Patched queries',
            )

    def _shoot(self, basesearch, middlesearch, d_executor, plan_path):
        """
            Shoots middlesearch with plan_path
            saves result in eventlog
        """
        logging.info('run basesearch and middlesearch')
        basesearch.start()
        middlesearch.start()
        basesearch.wait()
        middlesearch.wait()

        logging.info('run dolbilka executor...')
        result_dump_path = self.abs_path('result.dump')
        d_executor.mode = dolbilka.DolbilkaExecutorMode.FINGER_MODE
        d_executor.max_simultaneous_requests = utils.get_or_default(self.ctx, PlanGenParams.MaxSimReqs)
        d_executor.requests = utils.get_or_default(self.ctx, PlanGenParams.RequestsNumber)
        d_executor.run_session(
            plan=plan_path,
            dump=result_dump_path,
            port=middlesearch.port,
        )

        basesearch.stop()
        middlesearch.stop()

    def _create_plan_for_basesearch(self, type_queries, all_queries_path, d_planner):
        personalized_attrs = self._personalize_attrs(self.aux_resource_attrs, type_queries)
        filtered_queries = self._filter_requests_by_type(type_queries, all_queries_path)

        basesearch_queries_custom_params = utils.get_or_default(self.ctx, PlanGenParams.BasesearchQueriesCustomParams)
        if basesearch_queries_custom_params:
            logging.info('custom params "%s" will be added to basesearch queries', basesearch_queries_custom_params)
            filtered_queries = [
                '{query}{custom_params}'.format(
                    query=q,
                    custom_params=basesearch_queries_custom_params,
                ) for q in filtered_queries
            ]

        if utils.get_or_default(self.ctx, PlanGenParams.GetUncachedQueries):
            filtered_queries = self._get_uncached_queries(filtered_queries)

        self._count_queries_stats(filtered_queries, type_queries)  # also check that amount is enough

        queries_path = self.abs_path('{}_queries.txt'.format(type_queries))
        fu.write_lines(queries_path, filtered_queries)
        if utils.get_or_default(self.ctx, PlanGenParams.SavePlansAsQueries):
            ptq_description = 'Synt base {} queries {}, {}'.format(
                type_queries,
                self.shard_instance,
                tu.date_ymd(),
            )
            evlog_queries.save_as_plain_text_queries_resource(
                task=self,
                queries_file_name=queries_path,
                queries=filtered_queries,
                description=ptq_description,
                attributes=personalized_attrs,
            )

        logging.info('create binary %s plan', type_queries)
        plan_file = self.abs_path('{}_plan.bin'.format(type_queries))
        d_planner.create_plan(queries_path=queries_path, result_path=plan_file, rps=100)

        logging.info('create BASESEARCH_PLAN resource')
        plan_resource = channel.sandbox.get_resource(
            int(self.ctx['{}_plan_resource_id'.format(type_queries)])
        )
        os.rename(plan_file, plan_resource.path)
        utils.set_resource_attributes(plan_resource, personalized_attrs)

    @staticmethod
    def _get_uncached_queries(queries):
        """
            New option: can collect only queries with '&pron=nocachehit'
        """
        logging.info('collect only queries with "&pron=nocachehit"')
        return [q.replace('&pron=nocachehit', '') for q in queries if '&pron=nocachehit' in q]

    def _extract_requests(self, eventlog_path, path_to_extract):
        """
            Obtains evlogdump from PlanGenParams.MiddleExe, then
            uses template 'grep' command to get requests
        """
        logging.info('extract all requests')
        evlogdump_path = eventlog.get_evlogdump(self.ctx[PlanGenParams.MiddleExe.name])

        eventlog.filter_eventlog(
            evlogdump_path, eventlog_path,
            filter_command='awk \'{print $8}\' | sed -e "s/^http2/http/"',
            output_path=path_to_extract,
            evlogdump_options='-o -i 300'
        )
        size = os.path.getsize(path_to_extract)

        eh.verify(size, "Can't get queries from evlog")
        logging.info("size of queries for basesearch = %s bytes", size)

    def _filter_requests_by_type(self, type_queries, all_queries_path):
        with open(all_queries_path) as f:
            if type_queries == "search":
                return [q.strip("\n\r") for q in f if "&dh=" not in q]
            elif type_queries == "factors":
                return [q.strip("\n\r") for q in f if "&allfctrs=da" in q]
            elif 'snippets' in type_queries:
                snip_reqs = [q.strip("\n\r") for q in f if "&DF=da" in q]
                if utils.get_or_default(self.ctx, PlanGenParams.IgnoreIndexGen):
                    logging.info('set additional cgi parameter "&ignoreGen=da" for %s', type_queries)
                    snip_reqs = [q + "&ignoreGen=da" for q in snip_reqs]
                if type_queries == "snippets":
                    return snip_reqs
                elif type_queries == 'snippets_expl':
                    return [
                        q.replace("&qtree", "&gta=_SnippetsExplanation&qtree") + "&snip=dump=da"
                        for q in snip_reqs
                    ]
                elif type_queries == 'snippets_sea':
                    return [q + "&snip=exps=fstann2&gta=sea" for q in snip_reqs]
            elif type_queries == 'all':
                return [q.strip("\n\r") for q in f if q.strip("\n\r")]
            else:
                eh.check_failed("Wrong type of queries: {}".format(type_queries))

    def _get_last_released_resource_id(self, resource_type):
        resource = apihelpers.get_last_released_resource(
            resource_type=resource_type, arch=self.arch
        )
        eh.verify(resource, "Can't get last stable released resource {}".format(resource_type))

        return resource.id

    def _get_last_released_middlesearch_id(self):
        return self._get_last_released_resource_id(
            ms_resources.RankingMiddlesearchExecutable)

    def _get_last_released_basesearch_id(self):
        return self._get_last_released_resource_id(resource_types.BASESEARCH_EXECUTABLE)

    def _get_middle_data_id(self):
        if not self.ctx[PlanGenParams.MiddleData.name]:
            self.ctx[PlanGenParams.MiddleData.name] = apihelpers.get_last_resource_with_attribute(
                resource_types.MIDDLESEARCH_DATA, attribute_name="TE_web_production_mmeta_data"
            ).id
        return self.ctx[PlanGenParams.MiddleData.name]

    def _get_middle_models_id(self):
        if not self.ctx[PlanGenParams.MiddleModels.name]:
            self.ctx[PlanGenParams.MiddleModels.name] = apihelpers.get_last_resource_with_attribute(
                resource_types.DYNAMIC_MODELS_ARCHIVE, attribute_name="current_production"
            ).id
        return self.ctx[PlanGenParams.MiddleModels.name]

    def _count_queries_stats(self, queries_list, type_queries):
        counter = len(queries_list)
        # in case of cache enabled there are only few SubSourceRequest
        multiplier = int(not self.ctx.get(PlanGenParams.DisableCache.name)) + 1
        if counter < self.ctx[PlanGenParams.RequestsNumber.name] / (10.0 ** multiplier):
            eh.fail("Too few {} queries for basesearch: {}".format(type_queries, counter))
        len_list = [len(q) for q in queries_list]
        self.ctx['stats'][type_queries] = {
            'queries_amount': counter,
            'queries_len_quantile': sorted(len_list)[int(QUANTILE * len(len_list))]
        }


__Task__ = GenerateAllPlans
