# -*- coding: utf-8 -*-
import datetime
import itertools
import json
import logging
import re
import six
from datetime import timedelta

import sandbox.common.types.task as ctt
import sandbox.projects.yabs.qa.utils.importer as importer_utils
from sandbox import sdk2
from sandbox.common.types.misc import NotExists

from sandbox.sandboxsdk.environments import PipEnvironment

from sandbox.projects.common.yabs.server.db import yt_bases
from sandbox.projects.common.yabs.server.db.task.cs import YabsCSTask, SaveAllInputs, UseSaveInputFromCS, YtPool, FilterInputArchiveTablesByOrderID
from sandbox.projects.common.yabs.server.db.utils import calc_combined_settings_md5
from sandbox.projects.common.yabs.server.util.general import check_tasks

from sandbox.projects.yabs.qa.resource_types import (
    YABS_CS_INPUT_SPEC,
    YABS_MYSQL_ARCHIVE_CONTENTS,
    BS_RELEASE_YT,
    BS_RELEASE_TAR,
    YABS_CS_IMPORT_OUT_LOCATION,
    YABS_CS_SETTINGS_ARCHIVE,
    YABS_CS_SETTINGS_PATCH,
)
from sandbox.projects.yabs.qa.solomon.mixin import (
    SolomonTaskMixin,
    SolomonTaskMixinParameters,
)
from sandbox.projects.yabs.qa.tasks.YabsServerRealRunCSImport import IMPORT_DIGEST_KEY, IMPORT_PREFIX_KEY
from sandbox.projects.yabs.qa.tasks.cs_helpers import CSHelper
from sandbox.projects.yabs.qa.utils import task_run_type, yt_utils
from sandbox.projects.yabs.qa.utils.general import (
    get_json_md5,
    get_task_html_hyperlink,
    get_yt_path_html_hyperlink,
)
from sandbox.projects.yabs.qa.bases.sample_tables.parameters import (
    get_sampling_parameters_hash,
    SamplingStrategy,
    SamplingStrategyParameter,
    SamplingQueryTemplateResourceParameter,
    SamplingTablesResourceParameter,
    FindTablesForSampling,
    SamplingTablesBlackListResourceParameter,
    MinRowsCountForSamplingTables,
    KeysForSamplingTables,
)

from sandbox.projects.common.yabs.server.tracing import TRACE_WRITER_FACTORY
from sandbox.projects.yabs.sandbox_task_tracing import trace, trace_calls, trace_entry_point
from sandbox.projects.yabs.sandbox_task_tracing.wrappers.sandbox.generic import enqueue_task, new_resource
from sandbox.projects.yabs.sandbox_task_tracing.wrappers.sandbox.sdk2 import new_resource_data

from .utils import (
    get_importer_with_dependencies_version,
    get_importers_to_run,
    import_node_is_reusable,
    is_st_update_mode,
    separate_base_tags,
)


_REAL_RUN_CTX_KEY = 'real_run'
IMPORT_REUSED_TAG = 'IMPORT_REUSED'
IMPORT_NODE_INITIAL_TTL = timedelta(hours=12).total_seconds()

YT_ACCOUNT = 'yabs-cs-sandbox'
YT_SSD_MINIMUM_FREE_PERCENT = 10


logger = logging.getLogger(__name__)


class ImportWrapperInputParameters(SolomonTaskMixinParameters, sdk2.Parameters):

    imports_root_dir = sdk2.parameters.String("Import results root directory", default=yt_bases.IMPORTS_ROOT)

    with sdk2.parameters.Group('Input data') as input_data:
        input_spec = sdk2.parameters.Resource("Input spec for cs_import", resource_type=YABS_CS_INPUT_SPEC, required=True)
        settings_archive = sdk2.parameters.Resource("CS settings archive", resource_type=YABS_CS_SETTINGS_ARCHIVE, required=False)
        cs_settings_patch = sdk2.parameters.Resource("Updates CS settings using jsondiff", resource_type=YABS_CS_SETTINGS_PATCH, required=False)
        settings_spec = sdk2.parameters.String("CS Settings spec (json) to be passed with --settings-spec")
        mysql_archive_contents = sdk2.parameters.Resource("MySQL archive contents.", resource_type=YABS_MYSQL_ARCHIVE_CONTENTS, required=True, multiple=False)
        bin_db_list = sdk2.parameters.String("Binary base tags (will run import for all bases if empty)", default='')
        oneshot_path = sdk2.parameters.String("Arcadia path to YT oneshot")
        oneshot_args = sdk2.parameters.String("Arguments for YT oneshot")
        exec_common_yt_oneshots = sdk2.parameters.Bool('Execute common yt oneshots', default=False)
        sampling_strategy = SamplingStrategyParameter('Sampling strategy')
        with sampling_strategy.value[SamplingStrategy.sampled.value]:
            sampling_query_template = SamplingQueryTemplateResourceParameter('Resource with sampling query template', required=True)
            sampling_tables = SamplingTablesResourceParameter('Resource with sampling tables', required=True)
            find_tables_for_sampling = FindTablesForSampling('Find big tables for sampling')
            sampling_tables_black_list = SamplingTablesBlackListResourceParameter('Resource with black list of tables for sampling', required=True)
            min_rows_count_for_sampling_tables = MinRowsCountForSamplingTables('Min rows count for sampling tables')
            sampling_tables_keys = KeysForSamplingTables('Keys for sampling tables')
        common_oneshots_md5 = sdk2.parameters.String("Common oneshots md5", default='')
        baseno_list = sdk2.parameters.List('Baseno list is being used by sandbox tests', default=[])
        filter_input_archive_tables_by_orderid = FilterInputArchiveTablesByOrderID()

    with sdk2.parameters.Group('Additional parameters for run cs import') as run_cs_import:
        use_cs_cycle = sdk2.parameters.Bool('Run cs_cycle to generate imports', default=False)
        with use_cs_cycle.value[False]:
            import_actions = sdk2.parameters.String(
                "Importers to run",
                description="If empty, run all importers present in 'cs import --print-info'"
            )
        with use_cs_cycle.value[True]:
            importers = sdk2.parameters.JSON(
                "List of importers to run, dependencies will be added automatically",
                description="If empty, list of importers will be calculated from bin_db_list parameter",
                default=[]
            )

        lower_reusable_ttl_limit = sdk2.parameters.Integer(
            "Lower limit for output TTL, days (does not work for runs with patch)",
            default=1)

        reuse_import_results = sdk2.parameters.Bool("Reuse import results", default=True)
        with reuse_import_results.value[True]:
            search_import_by_settings_version = sdk2.parameters.Bool("Search import by settings version", default=True)
            search_import_by_code_version = sdk2.parameters.Bool("Search import by code version", default=True)
            search_import_by_mkdb_info_version = sdk2.parameters.Bool("Search import by mkdb_info version", default=True)
            search_import_by_cs_import_ver = sdk2.parameters.Bool("Search import by cs_import_ver", default=False)

        write_yt_debug_log = sdk2.parameters.Bool("Write YT Debug Log to the separate file", default=True)
        calc_digest = sdk2.parameters.Bool("Calculate digest of import result", default=False)
        with calc_digest.value[True]:
            wait_digest = sdk2.parameters.Bool("Wait for digest of import result", default=False)
            hashes_history_dir = sdk2.parameters.String("Copy digests to directory", default="//home/yabs-cs-sandbox/import-digest")
        drop_import_result = sdk2.parameters.Bool("Remove node with import result after finish", default=False)
        save_all_inputs = SaveAllInputs()
        use_save_input_from_cs = UseSaveInputFromCS()
        yt_pool = YtPool()

    with sdk2.parameters.Group('Common mkdb parameters') as common_params:
        bs_release_yt_resource = sdk2.parameters.Resource("BS release yt resource", resource_type=BS_RELEASE_YT)
        server_resource = sdk2.parameters.Resource(
            "Resource with yabs-server & yabs_mkdb (last stable BS_RELEASE_TAR if empty)",
            resource_type=BS_RELEASE_TAR)
        publish_spec = sdk2.parameters.Bool("Publish spec to be used for local debugging", default=True)
        propagate_tags = sdk2.parameters.Bool("Use own tags when creating child tasks", default=False)

    solomon_project = SolomonTaskMixinParameters.solomon_project(default="yabs_testing")
    solomon_cluster = SolomonTaskMixinParameters.solomon_cluster(default="imports")
    solomon_service = SolomonTaskMixinParameters.solomon_service(default="reuse")
    solomon_token_vault_name = SolomonTaskMixinParameters.solomon_token_vault_name(default="robot-yabs-cs-sb-mon-solomon-token")


class YabsServerRunCSImportWrapper(SolomonTaskMixin, sdk2.Task, CSHelper):  # pylint: disable=R0904

    name = 'YABS_SERVER_RUN_CS_IMPORT_WRAPPER'

    class Requirements(sdk2.Requirements):
        cores = 16
        ram = 64 * 1024
        environments = (
            PipEnvironment('jsondiff', version='1.2.0'),
            PipEnvironment('networkx', version='2.2', use_wheel=True),
            PipEnvironment('yandex-yt', use_wheel=True),
        )

        class Caches(sdk2.Requirements.Caches):
            pass

    class Parameters(sdk2.Parameters):
        kill_timeout = int(timedelta(minutes=30).total_seconds())
        input_parameters = ImportWrapperInputParameters()

        with sdk2.parameters.Output:
            import_reused = sdk2.parameters.Bool("Import was reused", default=False)
            import_node_path = sdk2.parameters.String("YT node with imports")
            reused_importers = sdk2.parameters.JSON("Mapping from importer name to ImportWrapper task which generated it")

    class Context(sdk2.Task.Context):
        node_to_reuse = None

    YT_CHUNKS_QUOTA_SEMAPHORE = 'yabscs/quotas/yabs-cs-sandbox/chunks'
    YT_CHUNKS_QUOTA_USAGE = 330000  # FIXME extract from somewhere; FIXME count for DirectContext importer only
    YT_POOL_QUOTA_USAGE = 12

    YT_CHUNKS_QUOTA_BASE = 3000
    YT_CHUNKS_QUOTA_PER_ST = 1500
    YT_CHUNKS_QUOTA_PER_RESOURCE = 1500

    YT_POOL_QUOTA_BASE = 2
    YT_POOL_QUOTA_PER_ST = 0.255

    def on_enqueue(self):
        if getattr(self.Context, _REAL_RUN_CTX_KEY) is NotExists or self.Context.real_cs_import_subtask is not NotExists:
            self.Requirements.semaphores = None
            return

        base_tags = self.get_base_tags()[:]
        chunk_count_estimate = 0
        pool_count_estimate = 0
        if base_tags:
            st_bases = set()
            for tag in base_tags:
                match = match = re.match(r'(yabs_|bs_)?(st\d+)', tag)
                if match:
                    st_bases.add(match.group(2))
            chunk_count_estimate += self.YT_CHUNKS_QUOTA_PER_ST * len(st_bases)
            pool_count_estimate += self.YT_POOL_QUOTA_PER_ST * len(st_bases)
            chunk_count_estimate = max(chunk_count_estimate, self.YT_CHUNKS_QUOTA_BASE)
            pool_count_estimate = max(pool_count_estimate, self.YT_POOL_QUOTA_BASE)
        else:
            chunk_count_estimate = self.YT_CHUNKS_QUOTA_USAGE
            pool_count_estimate = self.YT_POOL_QUOTA_USAGE

        if self.Parameters.sampling_strategy == SamplingStrategy.sampled.value:
            # Sampled imports require less CPU to operate https://st.yandex-team.ru/BSEFFECTIVE-72
            pool_count_estimate = max(pool_count_estimate / 2, 1)

        yt_pool = self.Parameters.yt_pool
        if not yt_pool or yt_pool == yt_bases.YT_POOL:
            yt_pool_semaphore = YabsCSTask.YT_POOL_DEFAULT_SEMAPHORE
        else:
            yt_pool_semaphore = yt_pool

        self.Requirements.semaphores = ctt.Semaphores(
            acquires=[
                ctt.Semaphores.Acquire(name=yt_pool_semaphore, weight=int(pool_count_estimate)),
                ctt.Semaphores.Acquire(name=self.YT_CHUNKS_QUOTA_SEMAPHORE, weight=int(chunk_count_estimate)),
            ],
            release=ctt.Status.Group.BREAK + ctt.Status.Group.FINISH
        )

    def set_import_node_ttl(self, prev_status, status):
        try:
            from yt.wrapper import YtClient
        except ImportError:
            PipEnvironment("yandex-yt", use_wheel=True).prepare()
            from yt.wrapper import YtClient

        import_node_ttl = timedelta(days=self.Parameters.lower_reusable_ttl_limit).total_seconds()
        import_node_path = self.Context.import_node_path
        if import_node_path is NotExists:
            logger.info('No "import_node_path" in task\'s context')
            return
        yt_client = YtClient(proxy=yt_bases.YT_PROXY, token=yt_bases.get_jailed_yt_token())
        yt_utils.set_yt_node_ttl(import_node_path, import_node_ttl, yt_client)
        logger.info('Set TTL=%d seconds for node %s', import_node_ttl, import_node_path)

        if self.Parameters.drop_import_result:
            logger.info('Drop node %s', import_node_path)
            yt_client.remove(import_node_path, recursive=True)

    def on_finish(self, prev_status, status):
        super(YabsServerRunCSImportWrapper, self).on_finish(prev_status, status)
        self.set_import_node_ttl(prev_status, status)

    def on_break(self, prev_status, status):
        super(YabsServerRunCSImportWrapper, self).on_break(prev_status, status)
        self.set_import_node_ttl(prev_status, status)

    @property
    def base_tags(self):
        if self.Context.base_tags is NotExists:
            self.Context.base_tags = self.get_base_tags()
        return self.Context.base_tags

    @property
    def importers_info(self):
        if self.Context.importers_info is NotExists:
            st_update_mode = is_st_update_mode(self.base_tags)
            self.Context.importers_info = yt_bases.get_cs_import_info(self._get_yabscs(), self.cs_settings, st_update_mode)
        return self.Context.importers_info

    @property
    def imports_root(self):
        return self.Parameters.imports_root_dir

    @property
    def cs_settings_md5(self):
        if self.Context.cs_settings_md5 is NotExists:
            self.Context.cs_settings_md5 = calc_combined_settings_md5(self.cs_settings_archive_res_id, self.cs_settings_patch_res_id, self.Parameters.settings_spec)
        return self.Context.cs_settings_md5

    @property
    def importers(self):
        if self.Context.importers is NotExists:
            if not self.Parameters.use_cs_cycle:
                self.Context.importers = self.Parameters.import_actions.split()
            else:
                if self.Parameters.importers:
                    self.Context.importers = importer_utils.get_importers_with_dependencies(self.Parameters.importers, self.importers_info)
                else:
                    self.Context.importers = importer_utils.get_bases_importers_with_dependencies(
                        self.base_tags,
                        self.importers_info,
                        self.mkdb_info,
                    )

        return self.Context.importers

    @property
    def import_node(self):
        from yt.wrapper import ypath_join

        if self.Context.import_node_path is NotExists:
            self.Context.import_node_path = ypath_join(self.Parameters.imports_root_dir, str(self.id))
        return self.Context.import_node_path

    @property
    def importer_mkdb_info_version(self):
        if self.Context.importer_mkdb_info_version is NotExists:
            self.Context.importer_mkdb_info_version = {
                importer: importer_utils.get_importer_mkdb_info_version(self.importers_info[importer], self.mkdb_info)
                for importer in self.importers
            }
        return self.Context.importer_mkdb_info_version

    @staticmethod
    def get_import_node_medium_attributes(path, yt_client):
        resource_attribute_names = ['resource_limits', 'recursive_resource_usage']
        account_resource_attributes = yt_client.get('//sys/accounts/{}/@'.format(YT_ACCOUNT), attributes=resource_attribute_names)
        logger.debug('Account resource attributes: %s', account_resource_attributes)

        account_ssd_limit = account_resource_attributes['resource_limits']['disk_space_per_medium']['ssd_blobs']
        account_ssd_usage = account_resource_attributes['recursive_resource_usage']['disk_space_per_medium']['ssd_blobs']
        account_ssd_free = account_ssd_limit - account_ssd_usage
        account_ssd_minimum_free = account_ssd_limit * YT_SSD_MINIMUM_FREE_PERCENT // 100

        primary_medium = 'ssd_blobs' if account_ssd_free >= account_ssd_minimum_free else 'default'
        return dict(account=YT_ACCOUNT, primary_medium=primary_medium)

    @trace_calls(save_arguments=(1, 2, 'path', 'importers'))
    def create_import_node(self, path, importers, yt_client):
        import_node_attributes = self.generate_import_node_attributes(importers)
        try:
            import_node_attributes.update(self.get_import_node_medium_attributes(path, yt_client))
        except Exception:
            logger.exception('Error getting medium attributes')
        logger.info('Create import node "%s" with attributes "%s"', path, import_node_attributes)
        yt_utils.create_node(
            path=path,
            yt_client=yt_client,
            attributes=import_node_attributes,
            ttl=IMPORT_NODE_INITIAL_TTL,
            ignore_existing=False)

    @property
    def importer_full_settings_version(self):
        if self.Context.full_settings_version is NotExists:
            self.Context.full_settings_version = {
                importer: get_importer_with_dependencies_version(importer, self.importers_info, self.cs_settings_version)
                for importer in self.importers
            }
            logger.debug("Importer with dependencies settings version: %s", self.Context.full_settings_version)
        return self.Context.full_settings_version

    @property
    def importer_full_code_version(self):
        if self.Context.full_code_version is NotExists:
            self.Context.full_code_version = {
                importer: get_importer_with_dependencies_version(importer, self.importers_info, self.importer_code_version)
                for importer in self.importers
            }
            logger.debug("Importer with dependencies code version: %s", self.Context.full_code_version)
        return self.Context.full_code_version

    @property
    def importer_full_mkdb_info_version(self):
        if self.Context.full_mkdb_info_version is NotExists:
            self.Context.full_mkdb_info_version = {
                importer: get_importer_with_dependencies_version(importer, self.importers_info, self.importer_mkdb_info_version)
                for importer in self.importers
            }
            logger.debug("Importer with dependencies mkdb_info version: %s", self.Context.full_mkdb_info_version)
        return self.Context.full_mkdb_info_version

    def generate_import_node_attributes(self, importers):
        node_attributes = {
            yt_bases.BIN_DBS_NAMES_ATTR: separate_base_tags(self.base_tags),
            yt_bases.CS_IMPORT_VER_ATTR: [self.cs_import_ver] if self.cs_import_ver is not None else [],
            yt_bases.MYSQL_ARCHIVE_ATTR: self.Parameters.mysql_archive_contents.id if self.Parameters.mysql_archive_contents else None,
            yt_bases.INPUT_ARCHIVE_ATTR: self.archive_root_path,
            yt_bases.IS_REUSABLE_ATTR: import_node_is_reusable(
                oneshot_path=self.Parameters.oneshot_path,
                drop_import_result=self.Parameters.drop_import_result,
                tags=self.Parameters.tags,
            ),
            # TODO remove SETTINGS_SPEC_ATTR
            yt_bases.SETTINGS_SPEC_ATTR: self.cs_settings,
            yt_bases.SETTINGS_SPEC_MD5_ATTR: self.cs_settings_md5,
            yt_bases.CREATED_BY_TASK_ATTRIBUTE: self.id,
            yt_bases.IMPORT_COMPLETED_ATTRIBUTE: False,
            yt_bases.SAMPLING_STRATEGY_ATTRIBUTE: self.Parameters.sampling_strategy,
            yt_bases.COMMON_ONESHOTS_MD5_ATTR: self.Parameters.common_oneshots_md5,
            yt_bases.IMPORTERS_ATTR: {
                importer: {
                    yt_bases.IMPORTER_SETTINGS_VERSION_ATTR: self.importer_full_settings_version[importer],
                    yt_bases.IMPORTER_CODE_VERSION_ATTR: self.importer_full_code_version[importer],
                    yt_bases.IMPORTER_MKDB_INFO_VERSION_ATTR: self.importer_full_mkdb_info_version[importer],
                }
                for importer in importers
            }
        }
        if self.Parameters.sampling_strategy == SamplingStrategy.sampled.value:
            node_attributes.update({
                yt_bases.SAMPLING_TABLES_PARAMS_HASH: get_sampling_parameters_hash(
                    self.Parameters.sampling_query_template.id,
                    self.Parameters.sampling_tables.id,
                    self.Parameters.min_rows_count_for_sampling_tables,
                    self.Parameters.sampling_tables_keys,
                    self.Parameters.sampling_tables_black_list.id,
                    self.Parameters.find_tables_for_sampling),
            })
        return node_attributes

    def get_common_importer_attributes(self):
        attributes = {
            # TODO use only bases which depend on particurlar importer
            yt_bases.BIN_DBS_NAMES_ATTR: self.base_tags,
            yt_bases.MYSQL_ARCHIVE_ATTR: self.Parameters.mysql_archive_contents.id if self.Parameters.mysql_archive_contents else None,
            yt_bases.INPUT_ARCHIVE_ATTR: self.archive_root_path,
            yt_bases.SAMPLING_STRATEGY_ATTRIBUTE: self.Parameters.sampling_strategy,
            yt_bases.COMMON_ONESHOTS_MD5_ATTR: self.Parameters.common_oneshots_md5,
        }
        if self.Parameters.search_import_by_cs_import_ver:
            attributes[yt_bases.CS_IMPORT_VER_ATTR] = self.cs_import_ver

        if self.Parameters.sampling_strategy == SamplingStrategy.sampled.value:
            attributes.update({
                yt_bases.SAMPLING_TABLES_PARAMS_HASH: get_sampling_parameters_hash(
                    self.Parameters.sampling_query_template.id,
                    self.Parameters.sampling_tables.id,
                    self.Parameters.min_rows_count_for_sampling_tables,
                    self.Parameters.sampling_tables_keys,
                    self.Parameters.sampling_tables_black_list.id,
                    self.Parameters.find_tables_for_sampling),
            })

        return attributes

    def get_importer_attributes(self, importer):
        attributes = self.get_common_importer_attributes()

        attributes[yt_bases.IMPORTERS_ATTR] = {
            importer: {}
        }

        if self.Parameters.search_import_by_settings_version:
            attributes[yt_bases.IMPORTERS_ATTR][importer][yt_bases.IMPORTER_SETTINGS_VERSION_ATTR] = self.importer_full_settings_version[importer]
        else:
            attributes[yt_bases.SETTINGS_SPEC_MD5_ATTR] = self.cs_settings_md5

        if self.Parameters.search_import_by_code_version:
            attributes[yt_bases.IMPORTERS_ATTR][importer][yt_bases.IMPORTER_CODE_VERSION_ATTR] = self.importer_full_code_version[importer]

        if self.Parameters.search_import_by_mkdb_info_version:
            attributes[yt_bases.IMPORTERS_ATTR][importer][yt_bases.IMPORTER_MKDB_INFO_VERSION_ATTR] = self.importer_full_mkdb_info_version[importer]

        return attributes

    def link_reusable_imports(self, yt_client):
        from yt.wrapper import ypath_join

        target_paths_link_path = {}
        for importer, target_node in self.Context.importers_to_reuse.items():
            importer_output_tables = importer_utils.get_importer_output_tables(
                self.importers_info[importer],
                self.mkdb_info,
                self.base_tags,
            )
            logger.debug('Output tables of importer %s: %s', importer, importer_output_tables)

            for table_name in importer_output_tables:
                target_path = ypath_join(target_node, table_name)
                link_path = ypath_join(self.import_node, table_name)
                logger.debug('Create link "%s" -> "%s"', link_path, target_path)
                target_paths_link_path[target_path] = link_path

        batch_yt_client = yt_client.create_batch_client(raise_errors=True)

        target_paths_exists = {path: batch_yt_client.exists(path) for path in target_paths_link_path}
        batch_yt_client.commit_batch()
        missing_paths = sorted((path for path, exists in six.iteritems(target_paths_exists) if not exists.get_result()))
        if missing_paths:
            raise RuntimeError('Missing: {}'.format(missing_paths))

        for target_path, link_path in six.iteritems(target_paths_link_path):
            batch_yt_client.link(target_path, link_path, recursive=True)
        batch_yt_client.commit_batch()

    def notify_about_imports_status(self):
        message = ''

        if not self.Context.importers_to_reuse:
            message += 'No importers to reuse\n'
        else:
            message += 'Reused imports:\n'
            for importer, node in sorted(self.Context.importers_to_reuse.items()):
                message += '{importer} {node_link}\n'.format(
                    importer=importer,
                    node_link=get_yt_path_html_hyperlink(proxy=yt_bases.YT_PROXY, path=node),
                )

        message += '\n'

        if not self.Context.importers_to_run:
            message += 'No importers to run\n'
        else:
            message += 'Run importers {}'.format(
                ', '.join(sorted(self.Context.importers_to_run))
            )

        self.set_info(message, do_escape=False)

    def get_digest(self, path):
        hashes_history_dir = self.Parameters.hashes_history_dir
        if self.Parameters.wait_digest:
            hashes_history_dir = ''
        digest = self.calculate_digest(
            path,
            wait_task=self.Parameters.wait_digest,
            hashes_history_dir=hashes_history_dir)
        return digest

    @trace_entry_point(writer_factory=TRACE_WRITER_FACTORY)
    def on_execute(self):
        from yt.wrapper import YtClient
        from yt.wrapper.yson import yson_to_json

        yt_client = YtClient(proxy=yt_bases.YT_PROXY, token=yt_bases.get_jailed_yt_token())

        search_imports = should_reuse_import_results(
            self.Parameters.reuse_import_results,
            self.Parameters.oneshot_path
        )

        with self.memoize_stage.update_input_archive_ttl(), trace('update_input_archive_ttl'):
            yt_token = sdk2.Vault.data(self.owner, 'yabs-cs-sb-yt-token')
            yt_client = YtClient(token=yt_token, proxy=yt_bases.YT_PROXY)
            with open(str(new_resource_data(self.Parameters.input_spec).path), 'r') as input_spec_file:
                input_spec_data = json.load(input_spec_file)
            yt_bases.renew_input_spec_expiration_time(yt_client, input_spec_data, ttl=yt_bases.DEFAULT_CS_INPUT_ARCHIVE_TTL)

        if self.Context.importers_to_run is NotExists:
            with trace('importers_to_run', info=dict(search_imports=search_imports)):
                if not search_imports:
                    self.Context.importers_to_run = self.importers
                    self.create_import_node(self.import_node, self.Context.importers_to_run, yt_client)
                    self.Context.importers_to_reuse = {}
                else:
                    search_attributes = {
                        importer: self.get_importer_attributes(importer)
                        for importer in self.importers
                    }
                    _ = self.importer_mkdb_info_version
                    with yt_client.Transaction():
                        lock_attributes = self.get_common_importer_attributes()
                        lock_attributes.pop(yt_bases.BIN_DBS_NAMES_ATTR, None)
                        lock_attributes[yt_bases.SETTINGS_SPEC_MD5_ATTR] = self.cs_settings_md5

                        lock_for_search(yt_client, self.imports_root, lock_attributes)
                        existing_imports, tasks_to_wait = find_imports(
                            search_attributes,
                            self.Parameters.imports_root_dir,
                            yt_client)
                        if tasks_to_wait:
                            task_links = map(get_task_html_hyperlink, tasks_to_wait)
                            self.set_info('Wait for tasks {}'.format(', '.join(task_links)), do_escape=False)
                            check_tasks(self, tasks_to_wait, raise_on_fail=False, wait_all=True)

                        self.Context.importers_to_run = get_importers_to_run(self.importers, existing_imports, self.importers_info)

                        if self.Context.importers_to_run:
                            self.create_import_node(self.import_node, self.Context.importers_to_run, yt_client)

                        self.Context.importers_to_reuse = {
                            importer: existing_imports[importer]
                            for importer in (set(self.importers) - set(self.Context.importers_to_run))
                        }

        reused_nodes = list(set(self.Context.importers_to_reuse.values()))
        with self.memoize_stage.update_reused_nodes_ttl(), trace('update_reused_nodes_ttl'):
            for node_path in reused_nodes:
                ttl = timedelta(days=self.Parameters.lower_reusable_ttl_limit).total_seconds()
                yt_utils.set_yt_node_ttl(node_path, ttl, yt_client)
                logger.info('Set TTL=%d seconds for node %s', ttl, node_path)

                self.mark_import_node_as_reused(node_path, yt_client)

        with self.memoize_stage.notify_about_imports_status(), trace('notify_about_imports_status'):
            self.notify_about_imports_status()

        with self.memoize_stage.link_reusable_imports(), trace('link_reusable_imports'):
            # Don't create links if reuse single node for all imports
            if not self.Context.importers_to_run and len(reused_nodes) == 1:
                logger.info('Don\'t create links, all imports reuse single node %s', reused_nodes[0])
            else:
                self.link_reusable_imports(yt_client)

        # Run importers
        if self.Context.importers_to_run:
            if self.Context.real_cs_import_subtask is NotExists:
                self._ensure_real_run()
                self.Context.real_cs_import_subtask = self._run_real_cs_import(self.import_node, self.Context.importers_to_run)
                self.set_info('Run import task {}'.format(
                    get_task_html_hyperlink(self.Context.real_cs_import_subtask)
                ), do_escape=False)

            check_tasks(self, self.Context.real_cs_import_subtask)
            yt_client.set_attribute(self.import_node, yt_bases.IMPORT_COMPLETED_ATTRIBUTE, True)

        reused_single_node = not self.Context.importers_to_run and len(reused_nodes) == 1

        imports_path = self.import_node
        if reused_single_node:
            imports_path = reused_nodes[0]
        self.Context.import_node_path = imports_path
        self.Parameters.import_node_path = imports_path

        digest = None
        if reused_single_node:
            digest = yson_to_json(yt_client.get_attribute(imports_path, yt_bases.DIGEST_ATTR, None))
        elif self.Parameters.calc_digest:
            digest = self.get_digest(imports_path)
            yt_client.set_attribute(imports_path, yt_bases.DIGEST_ATTR, digest)
        setattr(self.Context, IMPORT_DIGEST_KEY, digest)

        setattr(self.Context, IMPORT_PREFIX_KEY, imports_path)
        imports_path_hyperlink = get_yt_path_html_hyperlink(proxy=yt_bases.YT_PROXY, path=imports_path)
        self.set_info('Imports path: {}'.format(imports_path_hyperlink), do_escape=False)

        if not self.Parameters.oneshot_path:
            cs_import_out_path = 'cs_import_out.json'
            with open(cs_import_out_path, 'w') as out_file:
                json.dump(
                    {
                        IMPORT_PREFIX_KEY: imports_path,
                        IMPORT_DIGEST_KEY: digest
                    },
                    out_file,
                    indent=2)

            new_resource(
                YABS_CS_IMPORT_OUT_LOCATION,
                self,
                description='cs_import output location for {}'.format(self.id),
                path=cs_import_out_path,
                ttl=self.Parameters.lower_reusable_ttl_limit)

        if self.Parameters.push_to_solomon:
            with trace('push_to_solomon'):
                run_type = task_run_type.get_task_run_type(self.Parameters.tags)
                testenv_database = task_run_type.get_task_testenv_database(self.Parameters.tags)
                metrics = collect_metrics(
                    self.Context.importers_to_reuse.keys(),
                    self.Context.importers_to_run,
                    run_type,
                    testenv_database
                )
                self.solomon_push_client.add(metrics)

    def _ensure_real_run(self):
        if getattr(self.Context, _REAL_RUN_CTX_KEY, False):
            logger.info("This is the real run, with the semaphores acquired")
            return

        setattr(self.Context, _REAL_RUN_CTX_KEY, True)
        self.set_info("Re-enqueuing the task to acquire the semaphores for real...")
        raise sdk2.WaitTime(10)
        raise RuntimeError("We should never get here, this is a bug")

    @trace_calls
    def _run_real_cs_import(self, import_destination_path, importers):
        def _create_params_for_sdk1(task_parameters):
            params = {}
            for key, value in task_parameters:
                if isinstance(value, (sdk2.Task, sdk2.Resource)):
                    params[key] = value.id
                else:
                    params[key] = value
            return params

        mysql_importers = filter(
            lambda importer: importer_utils.is_mysql_importer(self.importers_info[importer]),
            importers
        )
        logger.debug("mysql importers: %s", mysql_importers)

        subtask_params = _create_params_for_sdk1(self.Parameters)
        subtask_params.update(
            import_destination_path=import_destination_path,
            sampling_strategy=self.Parameters.sampling_strategy,
            sampling_query_template=self.Parameters.sampling_query_template,
            sampling_tables=self.Parameters.sampling_tables,
            exec_common_yt_oneshots=self.Parameters.exec_common_yt_oneshots and bool(self.Parameters.common_oneshots_md5),
            importers=importers,
            require_mysql=bool(mysql_importers),
            yt_pool=self.Parameters.yt_pool,
        )

        if self.Requirements.tasks_resource and self.Requirements.tasks_resource.type != sdk2.service_resources.SandboxTasksBinary:
            subtask_params['tasks_archive_resource'] = self.Requirements.tasks_resource.id
            subtask_params['__requirements__'] = {'tasks_resource': self.Requirements.tasks_resource}

        logger.debug('Run cs import task with parameters: %s', subtask_params)
        subtask = enqueue_task(sdk2.Task['YABS_SERVER_REAL_RUN_CS_IMPORT'](
            sdk2.Task.current,
            owner=self.owner,
            tags=self.Parameters.tags,
            hints=list(self.hints),
            description=self.Parameters.description,
            **subtask_params
        ))

        return subtask.id

    def mark_import_node_as_reused(self, path, yt_client):
        with yt_client.Transaction():
            yt_client.lock(
                path,
                mode='shared',
                attribute_key=yt_bases.REUSED_BY_TASKS_ATTRIBUTE,
                waitable=True,
                wait_for=10 * 1000,
            )
            reused_by_tasks = set(yt_client.get_attribute(path, yt_bases.REUSED_BY_TASKS_ATTRIBUTE, default=[]))
            reused_by_tasks.add(self.id)
            yt_client.set_attribute(path, yt_bases.REUSED_BY_TASKS_ATTRIBUTE, list(reused_by_tasks))


def collect_metrics(reused_importers, running_importers, run_type=None, testenv_database=None):
    """Generate solomon metrics for import results reuse

    :param reused_importers: Reused importers
    :type reused_importers: list
    :param running_importers: Importers that were run
    :type running_importers: list
    :type run_type: sandbox.projects.yabs.qa.utils.task_run_type.RunType or None
    :type testenv_database: string or None
    :return: Metrics in solomon format
    :rtype: list
    """
    importer_status = itertools.chain.from_iterable((
        ((importer_name, True) for importer_name in reused_importers),
        ((importer_name, False) for importer_name in running_importers),
    ))

    common_labels = {
        'run_type': run_type or 'unknown'
    }
    if testenv_database:
        common_labels['testenv_database'] = str(testenv_database)

    metrics = list(itertools.chain.from_iterable([
        (
            {
                'labels': dict(
                    sensor='reuse',
                    importer=importer_name,
                    status='reused',
                    **common_labels
                ),
                'value': int(reused),
            },
            {
                'labels': dict(
                    sensor='reuse',
                    importer=importer_name,
                    status='not_reused',
                    **common_labels
                ),
                'value': int(not reused),
            }
        )
        for importer_name, reused in importer_status
    ]))

    return metrics


@trace_calls
def lock_for_search(yt_client, lock_path, search_attributes):
    from yt.common import YtError

    logger.debug('Use node atributes to compute lock\'s child_key: %s', search_attributes)
    lock_child_key = get_json_md5(search_attributes)
    logger.debug('Lock\'s child_key: %s', lock_child_key)

    lock_timeout = datetime.timedelta(minutes=15).total_seconds() * 1000
    logger.info(
        'Try to acquire waitable (%d ms) shared lock on node "%s" with child_key="%s"',
        lock_timeout, lock_path, lock_child_key)
    try:
        yt_client.lock(
            lock_path,
            mode='shared',
            child_key=lock_child_key,
            waitable=True,
            wait_for=lock_timeout,
        )
    except YtError as e:
        logger.warning('Failed to acquire lock, will search a node to reuse.\n%s', e)
    else:
        logger.info('Acquired lock')


def should_reuse_import_results(reuse_import_results, oneshot_path):
    reuse_conditions = [
        ('reuse_import_results is enabled', reuse_import_results),
        ('oneshot_path is not set', not oneshot_path),
    ]
    logger.debug('Import reuse conditions: %s', reuse_conditions)
    return all(condition for _, condition in reuse_conditions)


@trace_calls
def find_imports(search_attributes, imports_root_dir, yt_client):
    existing_imports = {}
    tasks_to_wait = set()
    for importer, importer_attributes in search_attributes.items():
        # search_attributes = get_search_attributes(importer)
        logger.debug('Importer %s, filter nodes by "%s"', importer, importer_attributes)
        existing_node_path, task_to_wait = find_import_node(
            importer_attributes,
            imports_root_dir,
            yt_client)

        if existing_node_path:
            existing_imports[importer] = existing_node_path
            logger.debug('Importer %s, found node "%s"', importer, existing_node_path)
        elif task_to_wait:
            tasks_to_wait.add(task_to_wait)
            logger.debug('Importer %s, found running import task "%s"', importer, task_to_wait)
        else:
            logger.debug('Importer %s, nothing found', importer)
            continue

    return existing_imports, list(tasks_to_wait)


# TODO pass list of yt nodes
# https://a.yandex-team.ru/review/1548245/files/4#file-0-52177187:R470
def find_import_node(search_attributes, imports_root_dir, yt_client):
    node_to_reuse = yt_bases.find_node_to_reuse(
        yt_client,
        imports_root_dir,
        filter_attributes=search_attributes,
        check_task_status=True)

    if not node_to_reuse:
        return None, None

    node_path, node_attributes = node_to_reuse['$value'], node_to_reuse['$attributes']

    if node_attributes.get(yt_bases.IMPORT_COMPLETED_ATTRIBUTE, False):
        return node_path, None

    task_id = node_attributes[yt_bases.CREATED_BY_TASK_ATTRIBUTE]
    return None, task_id
