# -*- coding: utf-8 -*-

import logging
import requests
import subprocess
import json
import datetime
import random

from sandbox import sdk2
from sandbox.projects import resource_types
from sandbox.sandboxsdk.errors import SandboxTaskFailureError
from sandbox.sandboxsdk.task import SandboxTask
from sandbox.sandboxsdk.channel import channel
from sandbox.sandboxsdk.parameters import SandboxBoolParameter
from sandbox.sandboxsdk.parameters import SandboxIntegerParameter
from sandbox.sandboxsdk.parameters import ResourceSelector
from sandbox.sandboxsdk.parameters import LastReleasedResource
from sandbox.projects.common.search.components import DefaultBasesearchParams
from sandbox.projects.common.search.settings import WebSettings
from sandbox.projects.common import apihelpers
from sandbox.projects.common import requests_wrapper
from sandbox.projects.common import time_utils as tu
from sandbox.projects.common import utils
from sandbox.projects.GenerateAllPlans import PlanGenParams
from sandbox.projects.websearch.basesearch.TestBasesearchPerformance import PlanParameter
from sandbox.projects.websearch.basesearch.TestBasesearchPerformance import RunSpecialFinalSessionParameter
from sandbox.projects.common.dolbilka import DolbilkaExecutorRequestsLimit
from sandbox.projects.common.dolbilka import DolbilkaSessionsCount
from sandbox.projects.common.dolbilka import DolbilkaExecutorMode
from sandbox.projects.common.dolbilka import DolbilkaMaximumSimultaneousRequests
from sandbox.projects.common.search import memory_usage
from sandbox.projects.common.search import shards as search_shards
from sandbox.projects.common.nanny.nanny import NannyClient
from sandbox.projects.websearch.middlesearch import resources as ms_resources


class RunTestsWithFactorsPlan(SandboxBoolParameter):
    name = 'run_tests_with_factors_plan'
    description = 'Run performance tests with factors plan too:'
    default_value = True


class MeasureMiddlesearchMemoryUsage(SandboxBoolParameter):
    name = 'measure_middlesearch_memory_usage'
    description = 'Measure middlesearch memory usage (SEARCH-1081)'
    default_value = False


class ExecutorRequestsLimit(SandboxIntegerParameter):
    name = 'executor_requests_limit'
    description = 'Executor requests limit'
    default_value = 100000


class TierShardCount(SandboxIntegerParameter):
    name = 'tier_shard_count'
    description = 'Number of shards per tier'
    default_value = 30


class TestPlatinumTier0(SandboxBoolParameter):
    name = 'test_tier_PlatinumTier0'
    description = 'Test PlatinumTier0'
    group = 'Choose tiers for tests:'


class TestWebTier1(SandboxBoolParameter):
    name = 'test_tier_WebTier1'
    description = 'Test WebTier1'
    group = 'Choose tiers for tests:'


class BasesearchBinary(ResourceSelector):
    name = 'basesearch_executable_resource_id'
    description = 'Executable'
    resource_type = [
        resource_types.BASESEARCH_EXECUTABLE,
    ]
    group = 'Basesearch params'


class BasesearchConfig(ResourceSelector):
    name = 'basesearch_config_resource_id'
    description = 'Config'
    resource_type = [
        resource_types.SEARCH_CONFIG,
    ]
    group = 'Basesearch params'


class MiddlesearchBinary(ResourceSelector):
    name = 'middlesearch_executable_resource_id'
    description = 'Executable'
    resource_type = [
        ms_resources.RankingMiddlesearchExecutable,
    ]
    group = 'Middlesearch params'


class IssShards(LastReleasedResource):
    name = 'iss_shards'
    description = 'iss_shards'
    resource_type = resource_types.ISS_SHARDS
    required = True
    group = 'Resources'


class OverrideNewDbTimestamp(SandboxIntegerParameter):
    name = 'override_newdb_timestamp'
    description = 'Use provided database generation for newdb shards'


class OverrideProdTimestamp(SandboxIntegerParameter):
    name = 'override_prod_timestamp'
    description = 'Use provided database generation for prod shards'


BASESEARCH_DATABASE_TIERS = (
    'PlatinumTier0',
    'WebTier1'
)

GENCFG_API = 'http://api.gencfg.yandex-team.ru'


class PriemkaBasesearchDatabaseJupiter(SandboxTask):
    """
        **Описание**

            Проверяет производительность баз по tier-ам.

            * Получает планы для обстрела базовых с помощью таска GENERATE_ALL_PLANS
              (используются только кешированные запросы)
            * Для каждого тира берётся список шардов для текущей базы и принимаемой базы
            * Для каждой из 10 групп берётся по три шарда (то есть всего 30 шардов на 1 tier)
            * Запускаются подзадачи TEST_BASESEARCH_PERFORMANCE
            * Как только любая из подзадач завершается - результат ее выполнения пишется в контекст основного таска
            * На странице задачи приёмки рисуется таблица с результатами из контекста задачи

        **Параметры**

        * **Run performance tests with factors plan too:** - запускать дополнительно все тесты
          с использованием факторных планов
          (Attention!!! Увеличивает количество дочерних тасков в 2 раза!)
        * **Use d_neher** - использовать d_neher вместо d_execotor в тесте производительности базового
        * **Measure middlesearch memory usage** - померять значения rss (Resident Set Size) и vsz (Virtual Memory Size)
          для конфигурации из среднего и двух базовых для каждого шарда
        * **Executor requests limit** - количество запросов для тестирования
        * **Choose tiers for tests:** - какие базы хотим тестировать
    """

    type = 'PRIEMKA_BASESEARCH_DATABASE_JUPITER'

    input_parameters = [
        RunTestsWithFactorsPlan,
        MeasureMiddlesearchMemoryUsage,
        ExecutorRequestsLimit,
        TierShardCount,
        TestPlatinumTier0,
        TestWebTier1,
        BasesearchBinary,
        BasesearchConfig,
        MiddlesearchBinary,
        IssShards,
        OverrideNewDbTimestamp,
        OverrideProdTimestamp,
    ]

    db_shard_timeout = 18000
    db_tags = ['production', 'newdb']

    def on_enqueue(self):
        self.ctx['plan_types'] = ['search']
        if self.ctx.get(RunTestsWithFactorsPlan.name, RunTestsWithFactorsPlan.default_value):
            self.ctx['plan_types'].append('factors')

    def on_execute(self):
        self._check_tiers()

        if 'priemka_results' not in self.ctx:
            self._get_resources()
            self.ctx['priemka_results'] = self._get_results()

        self._check_results(self.ctx['priemka_results'])

    def _get_results(self):
        results = {
            tier_name: {db_tag: [] for db_tag in self.db_tags}
            for tier_name in self.ctx['test_tiers']
        }

        try:
            tier_tag_shards = self._list_tier_tag_shards()
        except (requests.RequestException, AssertionError) as ex:
            logging.exception(ex)
            self.wait_time(5 * 60)

        for tier_name in tier_tag_shards:
            for db_tag, shards in tier_tag_shards[tier_name].items():
                for shard in shards:
                    results[tier_name][db_tag].append(
                        self._create_tasks_for_shard(
                            tier_name, shard['real_shard_name'], shard['shard_torrent'], shard['unix_timestamp']
                        )
                    )
        return results

    def _list_tier_tag_shards(self):
        tier_tag_shards = {}
        for tier_name in self.ctx['test_tiers']:
            tier_tag_shards[tier_name] = {}

            for db_tag in self.db_tags:
                tier_tag_shards[tier_name][db_tag] = []

                if db_tag == 'newdb':
                    timestamp = self.ctx.get(OverrideNewDbTimestamp.name) or get_pip_timestamp()
                    all_shards, done_shards = get_shards_from_controller(tier_name, timestamp)
                else:
                    NANNY_API_URL = 'http://nanny.yandex-team.ru/'
                    nanny_client = NannyClient(
                        api_url=NANNY_API_URL,
                        oauth_token=self.get_vault_data('SEPE-PRIEMKA', 'JUBA_NANNY_TOKEN')
                    )
                    timestamp = get_prod_timestamp()

                    if not timestamp:
                        logging.info('Could not get timestamp for %s', db_tag)
                        self.wait_time(5 * 60)

                    all_shards = set()
                    for shard_name in get_shard_names(nanny_client, tier_name, db_tag):
                        all_shards.add(shard_name_to_real_shard_name(shard_name, timestamp))
                    done_shards = all_shards

                random_generator = random.Random(timestamp)
                selected_shards = random_generator.sample(sorted(all_shards), self.ctx[TierShardCount.name])
                selected_shards = set(selected_shards)
                if len(selected_shards & done_shards) / float(len(selected_shards)) < 0.9:
                    logging.warn('less than 90% shards ready')
                    self.wait_time(120)

                try:
                    if '-' in str(timestamp):
                        unix_timestamp = int(datetime.datetime.strptime(timestamp, '%Y%m%d-%H%M%S').strftime("%s"))
                    else:
                        unix_timestamp = int(timestamp)
                except TypeError:
                    unix_timestamp = 0

                for real_shard_name in selected_shards:
                    shard_torrent = self._get_rbtorrent(real_shard_name)
                    tier_tag_shards[tier_name][db_tag].append({
                        'real_shard_name': real_shard_name,
                        'shard_torrent': shard_torrent,
                        'unix_timestamp': unix_timestamp
                    })

        return tier_tag_shards

    def _get_rbtorrent(self, shard):
        iss_shards_resource = channel.sandbox.get_resource(self.ctx.get(IssShards.name))
        resource_local_path = channel.task.sync_resource(iss_shards_resource.id)

        try:
            out, err = subprocess.Popen([resource_local_path, 'info', '--json', shard],
                                        stdout=subprocess.PIPE,
                                        stderr=subprocess.PIPE).communicate()
            for url in json.loads(out)['full']['urls']:
                if url.startswith('rbtorrent:'):
                    return url
        except ValueError:
            logging.info('Error processing shard: {}'.format(shard))

        return None

    def _create_tasks_for_shard(self, tier, shard_name, shard_torrent, timestamp):
        """
            Generates plans for basesearch in child task
            Creates subtasks to test basesearch performance with those plans
            :return: dict with tasks data (id, rps, ...)
        """
        result = {'shard_name': shard_name, 'shard_torrent': shard_torrent}

        shard = search_shards.ShardDownloadTask(shard_name)
        shard.timestamp = timestamp

        shard_resource = search_shards.get_database_shard_resource(
            tier, shard,
            kill_timeout=self.db_shard_timeout,
            db_type='basesearch',
            check_spam=False,
            database_path=shard_torrent
        )

        result['shard_resource_id'] = shard_resource.id
        result['shard_resource_task_id'] = shard_resource.task_id

        plan_gen_task = self._create_plan_generator(
            shard_resource.id, shard.priority, tier, timestamp
        )
        tasks_info = self._create_basesearch_tests(
            shard_resource.id, shard.priority, plan_gen_task
        )

        if utils.get_or_default(self.ctx, MeasureMiddlesearchMemoryUsage):
            tasks_info['memusage_task_id'] = self._create_memusage_task(
                tier, shard_resource.id, shard.priority
            )
        result.update(tasks_info)
        result['plan_gen_task_id'] = plan_gen_task.id
        for plan_type in self.ctx['plan_types']:
            result.update({
                '{}_rps'.format(plan_type): '',
                'num_of_{}_queries'.format(plan_type): '',
                'quantile_{}'.format(plan_type): ''
            })

        return result

    def _create_plan_generator(self, base_shard_resource_id, priority, tier, timestamp):
        """
            Create task to generate search and factors plans for basesearch

            :return: task object
        """
        subtask_context = {
            'kill_timeout': 4 * 60 * 60,
            'notify_via': '',
            PlanGenParams.GenSearchPlan.name: True,
            PlanGenParams.GenFactorsPlan.name: self.ctx.get(
                RunTestsWithFactorsPlan.name,
                RunTestsWithFactorsPlan.default_value
            ),
            PlanGenParams.GenSnippetsPlan.name: False,
            PlanGenParams.GetUncachedQueries.name: False,
            PlanGenParams.MiddleExe.name: (
                self.ctx.get('middlesearch_executable_resource_id')
                or self._get_last_released_ranking_middlesearch_id()
            ),
            PlanGenParams.MiddleConfig.name: self._get_mmeta_middlesearch_config_id(),
            PlanGenParams.MiddleData.name: self._get_mmeta_middlesearch_data_id(timestamp),
            PlanGenParams.MiddleModels.name: self._get_mmeta_middlesearch_models_id(),
            PlanGenParams.BasesearchExe.name: self.ctx['basesearch_executable'],
            PlanGenParams.BasesearchConfig.name: self.ctx['basesearch_config'],
            PlanGenParams.BasesearchDatabase.name: base_shard_resource_id,
            PlanGenParams.MiddlePlan.name: self._get_mmeta_middlesearch_plan_id(tier),
            # use all requests in plan, but don't repeat them
            PlanGenParams.RequestsNumber.name: 0,  # SEARCH-1132
        }

        return self.create_subtask(
            task_type='GENERATE_ALL_PLANS',
            description='Generate plans for basesearch shard {}'.format(
                base_shard_resource_id
            ),
            input_parameters=subtask_context,
            arch='linux',
            priority=priority,
        )

    def _create_basesearch_tests(self, base_shard_resource_id, priority, plan_gen_task):
        """
            Create tasks to test basesearch performance with search and factors plans

            :return: dict with info - their ids
        """
        result = {}
        for plan_type in self.ctx['plan_types']:
            test_task = self._create_test_basesearch_task(
                plan_gen_task, plan_type, base_shard_resource_id, priority
            )
            result['test_{}_task_id'.format(plan_type)] = test_task.id
        return result

    def _create_memusage_task(self, tier, base_shard_resource_id, priority):
        """
            Create task to measure middlesearch memory usage with shard

            :return: task id
        """
        logging.info('Create MEASURE_MIDDLESEARCH_MEMORY_USAGE task for database {}'.format(base_shard_resource_id))

        subtask_descr = 'Test basesearch for priemka shard database {}'.format(base_shard_resource_id)
        arch = 'linux'

        base = _Basesearch(utils.get_and_check_last_released_resource_id(
            resource_type=resource_types.BASESEARCH_EXECUTABLE,
            arch=arch,
        ))
        base.config = utils.get_and_check_last_resource_with_attribute(
            resource_type=resource_types.SEARCH_CONFIG,
            attr_name='TE_web_base_prod_resources_{}'.format(tier),
        ).id
        base.database1 = base_shard_resource_id
        base.database2 = base_shard_resource_id
        base.models_base = utils.get_and_check_last_released_resource_id(
            resource_types.DYNAMIC_MODELS_ARCHIVE_BASE
        )

        middle = _Middlesearch(utils.get_and_check_last_released_resource_id(
            resource_type=ms_resources.RankingMiddlesearchExecutable,
            arch=arch
        ))
        middle.config = utils.get_and_check_last_resource_with_attribute(
            resource_type=resource_types.MIDDLESEARCH_CONFIG,
            attr_name=WebSettings.testenv_middle_cfg_attr_name("jupiter_mmeta")
        ).id
        middle.data = utils.get_and_check_last_resource_with_attribute(
            resource_type=resource_types.MIDDLESEARCH_DATA,
            attr_name=WebSettings.testenv_middle_resources_attr_name("mmeta", "data")
        ).id
        middle.models = utils.get_and_check_last_resource_with_attribute(
            resource_type=resource_types.DYNAMIC_MODELS_ARCHIVE,
            attr_name="current_production",
        ).id
        middle.plan = self._get_mmeta_middlesearch_plan_id(tier)

        return memory_usage._create_measure_memory_task(
            base, middle, subtask_descr)

    def _create_test_basesearch_task(self, plan_gen_task, plan_type, db_id, priority):
        """
            Create task to test basesearch performance

            :param: plan_gen_task: object of task with resources plan for basesearch
            :param: plan_type: can be (search, factors, snippets)
            :param: db_id: id of basesearch database
            :param: priority: priority of creating task
            :return: task object
        """
        plan_id = plan_gen_task.ctx.get('{}_plan_resource_id'.format(plan_type), 0)
        logging.info(
            'Create TEST_BASESEARCH task for priemka... database %s, plan %s',
            db_id, plan_id
        )
        subtask_descr = 'Test basesearch for priemka with {} plan, shard database {}'.format(
            plan_type, db_id
        )
        subtask_context = {
            'notify_via': '',
            'kill_timeout': 4 * 60 * 60,
            PlanParameter.name: plan_id,
            RunSpecialFinalSessionParameter.name: True,
            DefaultBasesearchParams.Binary.name: self.ctx['basesearch_executable'],
            DefaultBasesearchParams.Config.name: self.ctx['basesearch_config'],
            DefaultBasesearchParams.Database.name: db_id,
            DefaultBasesearchParams.ArchiveModel.name: self.ctx['models_archive_resource_id'],
            DefaultBasesearchParams.StartTimeout.name: 480,
            DolbilkaExecutorRequestsLimit.name: self.ctx[ExecutorRequestsLimit.name],
            DolbilkaSessionsCount.name: 8,
            DolbilkaExecutorMode.name: 'finger',
            DolbilkaMaximumSimultaneousRequests.name: 28,
        }

        return self.create_subtask(
            task_type='TEST_BASESEARCH_PERFORMANCE',
            description=subtask_descr,
            input_parameters=subtask_context,
            model='e5-2650',
            arch='linux',  # smart
            priority=priority,
        )

    def _get_mmeta_middlesearch_plan_id(self, tier):
        # look for resource with attribute 'priemka_basesearch_database_plan=yes'
        resource = channel.sandbox.list_resources(
            resource_type=ms_resources.MiddlesearchPlan,
            attribute_name='priemka_basesearch_database_plan',
            attribute_value='yes'
        )
        if resource is None:
            raise SandboxTaskFailureError(
                "Cannot find an MIDDLESEARCH_PLAN resource with priemka_basesearch_database_plan=yes attribute"
            )
        else:
            return resource[0].id

    def _get_mmeta_resources(self):
        sub_ctx = {
            'get_mmeta_cfg': True,
            'get_mmeta_models': True,
            'notify_via': ''
        }
        get_resources_task = self.create_subtask(
            task_type='GET_MIDDLESEARCH_RESOURCES',
            description='Get middlesearch resources',
            input_parameters=sub_ctx
        )
        middle_config = channel.sandbox.list_resources(
            resource_type=resource_types.MIDDLESEARCH_CONFIG,
            task_id=get_resources_task.id
        )[0]

        middle_models = channel.sandbox.list_resources(
            resource_type=resource_types.DYNAMIC_MODELS_ARCHIVE,
            task_id=get_resources_task.id
        )[0]

        self.ctx['mmeta_middlesearch_config'] = middle_config.id
        self.ctx['mmeta_middlesearch_models'] = middle_models.id

    def _get_mmeta_middlesearch_config_id(self):
        if not self.ctx.get('mmeta_middlesearch_config'):
            self._get_mmeta_resources()

        return self.ctx['mmeta_middlesearch_config']

    def _get_mmeta_middlesearch_models_id(self):
        if not self.ctx.get('mmeta_middlesearch_models'):
            self._get_mmeta_resources()

        return self.ctx['mmeta_middlesearch_models']

    def _get_mmeta_middlesearch_data_id(self, timestamp):
        shard_name = 'rearr-jupiter-msuserdata-000-{}'.format(timestamp)
        if not self.ctx.get(shard_name):
            shard_torrent = self._get_rbtorrent(shard_name)
            sub_ctx = {
                'database_shard_name': shard_name,
                'kill_timeout': self.db_shard_timeout,
                'search_database_rsync_path': shard_torrent,
                'notify_via': '',
                'resource_attrs': '{}={}'.format(
                    WebSettings.testenv_middle_resources_attr_name('mmeta', 'data'),  # SEARCH-4078
                    tu.date_ymd(sep="_")
                ),
            }
            get_middledb_task = self.create_subtask(
                task_type='GET_MIDDLESEARCH_DATABASE',
                description='Get middlesearch database',
                input_parameters=sub_ctx
            )
            db = channel.sandbox.list_resources(
                resource_type=resource_types.MIDDLESEARCH_DATA,
                task_id=get_middledb_task.id
            )[0]
            self.ctx[shard_name] = db.id

        return self.ctx[shard_name]

    def _get_last_released_basesearch_id(self):
        last_released_basesearch_task = apihelpers.get_last_released_resource(
            resource_types.BASESEARCH_EXECUTABLE, arch='linux'
        )
        if last_released_basesearch_task is None:
            raise SandboxTaskFailureError(
                "Cannot find the last stable released basesearch executable"
            )

        self.ctx['basesearch_executable'] = last_released_basesearch_task.id
        return last_released_basesearch_task.id

    def _get_last_released_ranking_middlesearch_id(self):
        if self.ctx.get('ranking_middlesearch_executable'):
            return self.ctx.get('ranking_middlesearch_executable')
        last_released_task = apihelpers.get_last_released_resource(
            ms_resources.RankingMiddlesearchExecutable, arch='linux'
        )
        if last_released_task is None:
            raise SandboxTaskFailureError(
                "Cannot find the last stable released ranking middlesearch executable"
            )
        else:
            self.ctx['ranking_middlesearch_executable'] = last_released_task.id
            return last_released_task.id

    def _get_last_released_mnet_models_id(self):
        resource = apihelpers.get_last_released_resource(resource_types.DYNAMIC_MODELS_ARCHIVE)
        if resource is None:
            raise SandboxTaskFailureError("Cannot find the last stable released models archive")
        return resource.id

    def _get_production_basesearch_config_id(self):
        logging.info('Get basesearch config from production')
        get_cfg_task = sdk2.Task['GET_BASESEARCH_CONFIG_2']
        get_cfg_task_id = get_cfg_task(
            get_cfg_task.current,
            description='Get basesearch production config for priemka database task #{}'.format(self.id),
            owner=self.owner,
            priority=self.priority,
            db_types=['PlatinumTier0']
        ).enqueue().id
        enqueued_cfg_task = sdk2.Task[get_cfg_task_id]
        return getattr(enqueued_cfg_task.Context, 'search_config_for_PlatinumTier0')

    def _check_one_tier_group(self, tier_group):
        """
            Check tasks from tier group,
            update info about rps, queries, quantiles and memory usage

            :param: tier_group: info about tasks for this tier_group
            :return: True, if all tasks finished, else False
        """
        unfinished_tasks_ids = []
        for shard in tier_group:
            logging.info('Check shard: %s', shard['shard_name'])
            plan_gen_task = channel.sandbox.get_task(shard['plan_gen_task_id'])
            for plan_type in self.ctx['plan_types']:
                test_task = channel.sandbox.get_task(shard['test_{}_task_id'.format(plan_type)])
                shard['{}_rps'.format(plan_type)] = test_task.ctx.get('max_rps', '')

                queries_stats = plan_gen_task.ctx.get('stats', {}).get(plan_type, {})
                shard['num_of_{}_queries'.format(plan_type)] = queries_stats.get('queries_amount', '')
                shard['quantile_{}'.format(plan_type)] = queries_stats.get('queries_len_quantile', '')
                if not test_task.is_done():
                    unfinished_tasks_ids.append(test_task.id)

            if utils.get_or_default(self.ctx, MeasureMiddlesearchMemoryUsage):
                memusage_task = channel.sandbox.get_task(shard['memusage_task_id'])
                rss = memusage_task.ctx.get('memory_rss', None)
                vsz = memusage_task.ctx.get('memory_vsz', None)
                if rss and vsz and len(rss) >= 1 and len(vsz) >= 1:
                    shard['memory_rss'] = rss[0] / (1024 * 1024)
                    shard['memory_vsz'] = vsz[0] / (1024 * 1024)
                else:
                    shard['memory_rss'] = shard['memory_vsz'] = ''
                if not memusage_task.is_done():
                    unfinished_tasks_ids.append(memusage_task.id)

        return unfinished_tasks_ids

    def _check_results(self, results):
        unfinished_test_tasks = []
        for tier_name, tier_info in results.items():
            logging.info('Check %s tier tasks', tier_name)
            unfinished_test_tasks += self._check_one_tier_group(tier_info['production'])
            unfinished_test_tasks += self._check_one_tier_group(tier_info['newdb'])
        if unfinished_test_tasks:
            logging.info('Unfinished test_tasks: %s', unfinished_test_tasks)
            self.wait_any_task_completed(unfinished_test_tasks)
        else:
            logging.info('Priemka is finished successfully.')

    def _check_tiers(self):
        if 'test_tiers' not in self.ctx:
            self.ctx['test_tiers'] = self._get_tiers_for_test()
            if not self.ctx['test_tiers']:
                raise SandboxTaskFailureError('No tiers for test.')

    def _get_tiers_for_test(self):
        """
            Gets list of tested tiers from task's parameters

            :return: list of tiers
            example ['WebTier1', 'PlatinumTier0', ]
        """
        logging.info('Get tiers for tests')

        specified_tiers_true = set(
            tier for tier in BASESEARCH_DATABASE_TIERS
            if self.ctx.get('test_tier_{}'.format(tier))
        )
        specified_tiers_false = set(BASESEARCH_DATABASE_TIERS) - specified_tiers_true

        logging.info(
            'From parameters:\n'
            '    test tiers: %s\n'
            '    do not test tiers: %s',
            specified_tiers_true, specified_tiers_false
        )

        return list(specified_tiers_true) or BASESEARCH_DATABASE_TIERS

    def _get_resources(self):
        if 'basesearch_executable' not in self.ctx:
            self.ctx['basesearch_executable'] = (
                self.ctx.get('basesearch_executable_resource_id')
                or self._get_last_released_basesearch_id()
            )

        if 'basesearch_config' not in self.ctx:
            self.ctx['basesearch_config'] = (
                self.ctx.get('basesearch_config_resource_id')
                or self._get_production_basesearch_config_id()
            )

        if 'models_archive_resource_id' not in self.ctx:
            self.ctx['models_archive_resource_id'] = self._get_last_released_mnet_models_id()

    def _get_shard_group_by_shard_name(self, shard_name):
        return '-'.join(shard_name.split('-')[:-1])

    def _get_scatter_group_by_shard_name(self, shard_name):
        """
            primus001-123-12312335436 -> primus001
        """
        group = shard_name.split('-', 1)[0]
        return self.PRIMUS_GROUPS.get(group, '')

    def get_child_task(self, child_tasks, task_id):
        """
            Used in form.html
        """
        for id, task in child_tasks:
            if task.id == task_id:
                return task
        raise Exception("cannot find child task %s. all child_tasks: %s" % (task_id, child_tasks))

    def get_priemka_results(self):
        """
            Used in form.html
        """
        priemka_results = self.ctx.get('priemka_results', [])
        shard_results_init = {
            'shard_group': '',
            'scatter_group': ''
        }
        for tag in self.db_tags:
            shard_results_init.update({
                '{}_shard_name'.format(tag): '',
                '{}_shard_task_id'.format(tag): '',
                '{}_shard_resource_id'.format(tag): '',
                '{}_memusage_task_id'.format(tag): '',
                '{}_memory_rss'.format(tag): '',
                '{}_memory_vsz'.format(tag): ''
            })
            for plan_type in self.ctx.get('plan_types', []):
                shard_results_init.update({
                    '{}_{}_task_id'.format(tag, plan_type): '',
                    '{}_{}_rps'.format(tag, plan_type): '',
                    '{}_num_of_{}_queries'.format(tag, plan_type): '',
                    '{}_quantile_{}'.format(tag, plan_type): ''
                })
        if priemka_results:
            results = []
            for tier_name in BASESEARCH_DATABASE_TIERS:
                if tier_name in priemka_results:
                    tier_results = priemka_results[tier_name]
                    result = {'tier_name': tier_name}

                    # get results
                    shards_results = []
                    rps_results, num_of_queries, quantile, memory_rss, memory_vsz = {}, {}, {}, {}, {}
                    for tag in self.db_tags:
                        for plan_type in self.ctx['plan_types']:
                            rps_results['{}_{}'.format(tag, plan_type)] = []
                            num_of_queries['{}_{}'.format(tag, plan_type)] = []
                            quantile['{}_{}'.format(tag, plan_type)] = []

                            for i, shard_result in enumerate(tier_results[tag]):
                                if i >= len(shards_results):
                                    shards_results.append(shard_results_init.copy())
                                shards_results[i].update(self._get_shard_results(shard_result, tag, plan_type))
                                try:
                                    rps_res = float(shards_results[i].get('{}_{}_rps'.format(tag, plan_type), ''))
                                    num_of_q = shards_results[i].get('{}_num_of_{}_queries'.format(tag, plan_type), '')
                                    quant = shards_results[i].get('{}_quantile_{}'.format(tag, plan_type), '')
                                    rps_results['{}_{}'.format(tag, plan_type)].append(rps_res)
                                    num_of_queries['{}_{}'.format(tag, plan_type)].append(float(num_of_q))
                                    quantile['{}_{}'.format(tag, plan_type)].append(float(quant))
                                except ValueError:
                                    pass
                                except TypeError:
                                    pass

                        memory_rss[tag] = []
                        memory_vsz[tag] = []
                        for i, shard_result in enumerate(tier_results[tag]):
                            try:
                                rss = shards_results[i].get('{}_memory_rss'.format(tag), '')
                                vsz = shards_results[i].get('{}_memory_vsz'.format(tag), '')
                                memory_rss[tag].append(int(rss))
                                memory_vsz[tag].append(int(vsz))
                            except ValueError:
                                pass
                            except TypeError:
                                pass

                            rps = rps_results['{}_{}'.format(tag, plan_type)]
                            num_of_q = num_of_queries['{}_{}'.format(tag, plan_type)]
                            quant = quantile['{}_{}'.format(tag, plan_type)]
                            mem_rss = memory_rss[tag]
                            mem_vsz = memory_vsz[tag]
                            result['{}_avg_num_of_{}_queries'.format(tag, plan_type)] = utils.count_avg(num_of_q, 1)
                            result['{}_avg_quantile_{}'.format(tag, plan_type)] = utils.count_avg(quant, 1)
                            result['{}_avg_{}_rps'.format(tag, plan_type)] = utils.count_avg(rps, 2)
                            result['{}_deviation_{}_rps'.format(tag, plan_type)] = utils.count_std_dev(rps, 2)
                            result['{}_avg_memory_rss'.format(tag)] = utils.count_avg(mem_rss, 1)
                            result['{}_avg_memory_vsz'.format(tag)] = utils.count_avg(mem_vsz, 1)

                    # sort tier_results by scatter_group
                    result['tier_results'] = enumerate(sorted(shards_results, key=lambda x: x['scatter_group']))
                    results.append(result)
            return results
        else:
            return []

    def _get_shard_results(self, shard_result, tag, plan_type):
        return {
            'shard_group': self._get_shard_group_by_shard_name(shard_result['shard_name']),
            'scatter_group': self._get_scatter_group_by_shard_name(shard_result['shard_name']),
            '{}_shard_name'.format(tag): shard_result.get('shard_name'),
            '{}_shard_task_id'.format(tag): shard_result.get('shard_resource_task_id'),
            '{}_shard_resource_id'.format(tag): shard_result.get('shard_resource_id'),
            '{}_{}_task_id'.format(tag, plan_type): shard_result.get(
                'test_{}_task_id'.format(plan_type)
            ),
            '{}_{}_rps'.format(tag, plan_type): shard_result.get('{}_rps'.format(plan_type)),
            '{}_num_of_{}_queries'.format(tag, plan_type): shard_result.get(
                'num_of_{}_queries'.format(plan_type)
            ),
            '{}_quantile_{}'.format(tag, plan_type): shard_result.get(
                'quantile_{}'.format(plan_type)
            ),
            '{}_memusage_task_id'.format(tag): shard_result.get('memusage_task_id'),
            '{}_memory_rss'.format(tag): shard_result.get('memory_rss'),
            '{}_memory_vsz'.format(tag): shard_result.get('memory_vsz')
        }


class _Middlesearch(object):
    def __init__(self, executable):
        self.executable = executable
        self.verify_stderr = None


class _Basesearch(object):
    def __init__(self, executable):
        self.executable = executable


MAIN_CTRL_URL = 'http://ctrl.clusterstate.yandex-team.ru'


def get_shard_names(nanny_client, tier, db_tag):
    if tier == 'PlatinumTier0':
        return ['primus-PlatinumTier0-0-{}-0'.format(i) for i in range(180)]


    tiers = {
        'PlatinumTier0': {
            'production': 'vla_web_platinum_base',
            'newdb': 'platinum_jupiter_build',
        },
        'WebTier1': {
            'production': 'jupiter_base',
            'newdb': 'tier1_jupiter_build',
        }
    }

    NANNY_SERVICE = tiers[tier][db_tag]

    service_attrs = nanny_client.get_service_runtime_attrs(NANNY_SERVICE)

    shards = set()
    for group in service_attrs['content']['instances']['extended_gencfg_groups']['groups']:
        shards = shards.union(get_gencfg_shards(group['name'], group['release'], tier))

    return list(shards)


def get_prod_timestamp():
    OLDPROD_CTRL_URL = MAIN_CTRL_URL + '/web/prod'

    prod_state = requests_wrapper.get(OLDPROD_CTRL_URL + '/jupiter/oldprod').json()
    logging.info('Get oldprod state %s', prod_state['common'])

    return prod_state['common']['yt_state']


def get_pip_timestamp():
    PIP_URL = MAIN_CTRL_URL + '/web/pip'

    pip_state = requests_wrapper.get(PIP_URL + '/pip').json()
    tier_timestamps = [slot_state['timestamp'] for slot_state in pip_state['common']['deployer']['target']]

    assert len(tier_timestamps) > 0

    return max(tier_timestamps)


def get_shards_from_controller(tier, timestamp):
    PROD_CTRL_URL = MAIN_CTRL_URL + '/web/prod'
    SHARD_READY = 'DONE'

    all_shards, done_shards = set(), set()
    shards = requests_wrapper.get(PROD_CTRL_URL + '/view/build_progress/' + tier).json()
    for shard_name, state in shards.items():
        if str(timestamp) in shard_name:

            # Temporary crutch
            if not shard_name.startswith('primus-'):
                shard_name = 'primus-' + shard_name

            all_shards.add(shard_name)
            if state['status'] == SHARD_READY:
                done_shards.add(shard_name)
    return all_shards, done_shards


def get_gencfg_shards(group, tag, tier):
    def get_tier_by_tags(tags):
        for t in tags:
            if t.startswith('a_tier_'):
                return t.replace('a_tier_', '').replace('OxygenExp', '')
        return None

    shards = set()
    r = requests_wrapper.get(GENCFG_API + '/' + tag + '/searcherlookup/groups/' + group + '/instances')
    if r.status_code == 200:
        for i in r.json()['instances']:
            if tier == get_tier_by_tags(i['tags']):
                shards.add(i['shard_name'])

    return shards


def shard_name_to_real_shard_name(shard_name, timestamp):
    name_fields = shard_name.split('-')
    name_fields[-1] = timestamp
    return '-'.join(name_fields)


__Task__ = PriemkaBasesearchDatabaseJupiter
