# -*- coding: utf-8 -*
import os
import gzip
import time
import json
import shutil
import jinja2
import logging
import tarfile
import subprocess
from datetime import datetime, date, timedelta
from collections import OrderedDict

from sandbox import sdk2
from time import sleep, mktime
import sandbox.common.types.task as ctt
from sandbox.sdk2.vcs.svn import Arcadia
from sandbox.sandboxsdk import environments
from sandbox.projects.common.nanny import nanny
from sandbox.common.types.misc import NotExists
from sandbox.sandboxsdk.paths import make_folder
import sandbox.projects.resource_types as rtypes
from sandbox.projects.geosearch.tools import stat
from sandbox.projects.geosearch.tools import yappy
from sandbox.projects.geosearch.tools.misc import retry
from sandbox.sandboxsdk.paths import get_logs_folder
from sandbox.sandboxsdk.errors import SandboxTaskFailureError
from sandbox.projects.geosearch import resource_types as geotypes
from sandbox.projects.common.geosearch.base_update import YtTablePath
from sandbox.projects.common.geosearch.startrek import StartrekClient
from sandbox.projects.common.geosearch.indexer_config import IndexerConfig
from sandbox.projects.common.geosearch.utils import unpack_files, unpack_file
from sandbox.projects.geosearch.tools.database_notifications import notify_by_telegram
from sandbox.projects.geosearch.RegisterAddrsBaseShard import RegisterAddrsBaseShard
from sandbox.projects.geosearch.snippets.AddrsSnippetsTaskManager import AddrsSnippetsTaskManager
from sandbox.projects.geosearch.AcceptanceGeobasesearchDatabase import AcceptanceGeobasesearchDatabase
from sandbox.projects.geosearch.BuildAddrsBaseAdditionalResources import BuildAddrsBaseAdditionalResources
from sandbox.projects.geosearch.snippets.AddrsSnipippetsPushToFerryman import AddrsSnipippetsPushToFerryman
from sandbox.projects.BuildMapsStaticFactorsDownloader import MapsRubricStaticFactors
from sandbox.projects.geosearch.BuildShardedAddrsWebIndexann import BuildShardedAddrsWebIndexann


USERS_WHITE_LIST = ['evelkin',
                    'ankopeliovich',
                    'karas-pv',
                    'sobols',
                    'yu-semichev',
                    'robot-geosearch',
                    'robot-thoth']


PREFIX_TO_TIER = {'AddrsBaseProd18Tier0': 'addrs_base_prod18',
                  'AddrsBaseOSMTier0': 'addrs_base_osm'}

CRIT_ERR_COUNT = 200    # Bold red message and release block


class ShardRegisteringException(Exception):
    pass


class ShardmapCreationException(Exception):
    pass


def _processess_finished(processes):
    for proc_name, proc in processes.iteritems():
        if proc.poll() is not None and proc.poll() != 0:
            msg = ('Process "%s" returned %s exit code! '
                   'Check process log for details.') % (proc_name,
                                                        proc.poll())
            raise SandboxTaskFailureError(msg)
    return all(proc.poll() is not None for proc in processes.values())


def _run_async(binary_resource, proc_env, cmd, log_file_path, error_msg):
    binary = _strip_binary(sdk2.ResourceData(binary_resource).path)
    cmd = '{binary} {cmd}'.format(binary=binary, cmd=cmd)
    logging.info('Running: %s' % cmd)
    with open(log_file_path, 'w') as log_file:
        try:
            process = subprocess.Popen(cmd,
                                       shell=True,
                                       env=proc_env,
                                       stdout=log_file,
                                       stderr=subprocess.STDOUT)
            return process
        except subprocess.CalledProcessError:
            logging.exception('%s command failed' % cmd)
            raise SandboxTaskFailureError(error_msg)


def _strip_binary(binary_path):
    binary_dir = './binaries'
    binary_name = os.path.basename(str(binary_path))
    stripped_binary_path = os.path.join(binary_dir, binary_name)
    make_folder(binary_dir)
    if os.path.exists(stripped_binary_path):
        return stripped_binary_path
    cmd = 'strip {binary_path} -o {stripped_binary}'.format(binary_path=binary_path,
                                                            stripped_binary=stripped_binary_path)
    try:
        subprocess.call(cmd, shell=True)
        return stripped_binary_path
    except subprocess.CalledProcessError:
        logging.exception('%s command failed' % cmd)
        raise SandboxTaskFailureError('Stripping {} failed'.format(binary_path))


class AddrsBaseDatabaseYtBuild(nanny.ReleaseToNannyTask2, sdk2.Task):
    '''
        Build addrs base database on YT
    '''

    class Parameters(sdk2.task.Parameters):
        kill_timeout = 43200
        need_acceptance = sdk2.parameters.Bool('Need to be accepted',
                                               default_value=False)
        shards_count = sdk2.parameters.Integer('Number of shards',
                                               default_value=4,
                                               required=True)
        tier_name = sdk2.parameters.String('GenCfg tier name',
                                           required=True)
        yt_table = YtTablePath('YT table',
                               default_value='//home/altay/db/export/current-state/snapshot/company',
                               required=True)
        export_path = YtTablePath('Export YT path or export resource id',
                                  default_value='//home/altay/db/export/current-state/',
                                  required=True)
        # with sdk2.parameters.Group('Build factor annotations parameters') as factor_annotations:
        maps_click_shows = YtTablePath('Maps click shows table',
                                       default_value='//home/geosearch-prod/stream_conveyor/production_data/ranking/maps_clicks_shows')
        org_data = YtTablePath('Org data table',
                               default_value='//home/geosearch-prod/stream_conveyor/production_data/ranking/org_data')
        mobile_clicks = YtTablePath('Mobile clicks table',
                                    default_value='//home/geosearch-prod/stream_conveyor/production_data/ranking/mobile_clicks')
        serp_clicks = YtTablePath('Serp clicks table',
                                  default_value='//home/geosearch-prod/stream_conveyor/production_data/ranking/serp_clicks')
        located_at_org_name = YtTablePath('Located at org name table',
                                          default_value='//home/geosearch-prod/stream_conveyor/production_data/ranking/located_at_org_name')
        hospital_name = YtTablePath('Hospital name table',
                                    default_value='//home/geosearch-prod/stream_conveyor/production_data/ranking/hospital_name')
        faculty_name = YtTablePath('Faculty name table',
                                   default_value='//home/geosearch-prod/stream_conveyor/production_data/ranking/faculty_name')
        org_reviews = YtTablePath('Org reviews',
                                  default_value='')
        org_reviews_for_inv = YtTablePath('Org reviews for main index',
                                          default_value='')
        # with sdk2.parameters.Group('Build static factors YT parameters') as static_factors_yt:
        count_factors = YtTablePath('Count factors table',
                                    default_value='//home/geosearch-prod/production_data/count_factors')
        paid_realty = YtTablePath('Paid realty table',
                                  default_value='//home/geosearch/paid_realty')
        departments = YtTablePath('Departments table',
                                  default_value='//home/geosearch-prod/production_data/departments')
        click_factors = YtTablePath('Click factors table',
                                    default_value='//home/geosearch-prod/production_data/click_factors')
        click_factors_document = YtTablePath('Click factors for document table',
                                              default_value='//home/geosearch-prod/stream_and_clicks_conveyor/production_data/click_annotations/click_factors_document')
        click_factors_search_group = YtTablePath('Click factors for document search group table',
                                                  default_value='//home/geosearch-prod/stream_and_clicks_conveyor/production_data/click_annotations/click_factors_search_group')
        click_factors_url = YtTablePath('Click factors for document url table',
                                         default_value='//home/geosearch-prod/stream_and_clicks_conveyor/production_data/click_annotations/click_factors_url')
        click_factors_rubric = YtTablePath('Click factors for document rubric table',
                                            default_value='//home/geosearch-prod/stream_and_clicks_conveyor/production_data/click_annotations/click_factors_rubric')
        click_factors_duplicate_stats = YtTablePath('Click factors for document duplicate stats table',
                                                     default_value='//home/geosearch-prod/stream_and_clicks_conveyor/production_data/click_annotations/click_factors_duplicate_stats')
        travel_factors = YtTablePath('Travel factors table',
                                     default_value='//home/travel/prod/factors/all/latest/output')
        # with sdk2.parameters.Group('Build geosearch click factors parameters') as geosearch_click_factors:
        clusterid_clicks = YtTablePath('clusterid_clicks',
                                       default_value='//home/geosearch-prod/stream_and_clicks_conveyor_v2/production_data/clicks_annotations/clusterid_clicks')
        clusterid_commit = YtTablePath('clusterid_commit',
                                       default_value='//home/geosearch-prod/stream_and_clicks_conveyor_v2/production_data/clicks_annotations/clusterid_commit')
        clusterid_stream = YtTablePath('clusterid_stream',
                                       default_value='')
        docurlids_clicks = YtTablePath('docurlids_clicks',
                                       default_value='//home/geosearch-prod/stream_and_clicks_conveyor_v2/production_data/clicks_annotations/docurlids_clicks')
        docurlids_export = YtTablePath('docurlids_export',
                                       default_value='//home/geosearch-prod/stream_and_clicks_conveyor_v2/production_data/clicks_annotations/docurlids_export')
        duplicate_clicks = YtTablePath('duplicate_clicks',
                                       default_value='//home/geosearch-prod/stream_and_clicks_conveyor_v2/production_data/clicks_annotations/duplicate_clicks')
        duplicate_commit = YtTablePath('duplicate_commit',
                                       default_value='//home/geosearch-prod/stream_and_clicks_conveyor_v2/production_data/clicks_annotations/duplicate_commit')
        duplicate_export = YtTablePath('duplicate_export',
                                       default_value='//home/geosearch-prod/stream_and_clicks_conveyor_v2/production_data/clicks_annotations/duplicate_export')
        duplicate_stream = YtTablePath('duplicate_stream',
                                       default_value='')
        rubricids_clicks = YtTablePath('rubricids_clicks',
                                       default_value='//home/geosearch-prod/stream_and_clicks_conveyor_v2/production_data/clicks_annotations/rubricids_clicks')
        rubricids_export = YtTablePath('rubricids_export',
                                       default_value='//home/geosearch-prod/stream_and_clicks_conveyor_v2/production_data/clicks_annotations/rubricids_export')
        advert_stats = YtTablePath('advert_stats',
                                   default_value='//home/geoadv/ranking/adv_stats/factors')

        review_count_table = YtTablePath('Table with review count',
                                         default_value='//home/robot-ugc/export/sprav/org_reviews_count')
        ttl_days = sdk2.parameters.Integer('Build artifacts TTL in days',
                                            default_value=7)
        yt_pool = sdk2.parameters.String('YT pool for executing operations')

        output_path = sdk2.parameters.String('Output YT path',
                                             default_value='//home/geosearch-prod/addrs_base')

        companies_limit = sdk2.parameters.Integer('Companies count in index (for testing purposes only)')
        common_features_pbs = sdk2.parameters.Bool('Use common features.pbs', default_value=True)

        # with sdk2.parameters.Group('Configs and Binaries') as binaries:
        standalone_indexer_executable = sdk2.parameters.Resource('Standalone YT indexer executable',
                                                                 resource_type=geotypes.GEOSEARCH_STANDALONE_INDEXER)
        index_downloader_executable = sdk2.parameters.Resource('Index downloader executable',
                                                               resource_type=geotypes.GEOSEARCH_INDEX_DOWNLOADER)
        indexer_configs_resource = sdk2.parameters.Resource(
            'Indexer configs archive',
            resource_type=geotypes.GEOSEARCH_YT_INDEXER_CONFIGS
        )
        data_preparer = sdk2.parameters.Resource('Prepare aux data executable',
                                                 resource_type=geotypes.GEOSEARCH_YT_DATA_PREPARER)
        mapper_memory_size = sdk2.parameters.String('Maximum memory size for mapper job in YT (Gb)')
        mapper_tmpfs_size = sdk2.parameters.String('Maximum tmpfs size for mapper job in YT (Gb)')
        merger_memory_size = sdk2.parameters.String('Maximum memory size for merger job in YT (Gb)',
                                                    default_value='20')
        merger_tmpfs_size = sdk2.parameters.String('Maximum tmpfs size for merger job in YT (Gb)',
                                                    default_value='80')
        # with sdk2.parameters.Group('Data sources') as data_sources:
        geo_stat = sdk2.parameters.Resource('Geo.stat',
                                            resource_type=rtypes.GEO_STAT)
        annotations = sdk2.parameters.Resource('Annotations',
                                               resource_type=rtypes.MAPS_WEB_INDEXANN)
        indexann_raw_data = sdk2.parameters.Resource('IndexAnn raw data',
                                                     resource_type=geotypes.MAPS_WEB_DOUBLEFRC)
        business_similar_orgs = sdk2.parameters.Resource('Business similar orgs',
                                                         resource_type=geotypes.SIMILAR_ORGS_TABLE)
        business_advert_similar_orgs = sdk2.parameters.Resource('Business similar orgs with adverts',
                                                                resource_type=geotypes.SIMILAR_ORGS_TABLE)
        business_similar_orgs_exp1 = sdk2.parameters.Resource('Business similar orgs exp1',
                                                              resource_type=geotypes.SIMILAR_ORGS_TABLE)
        business_similar_orgs_exp2 = sdk2.parameters.Resource('Business similar orgs exp2',
                                                              resource_type=geotypes.SIMILAR_ORGS_TABLE)
        business_similar_orgs_exp3 = sdk2.parameters.Resource('Business similar orgs exp3',
                                                              resource_type=geotypes.SIMILAR_ORGS_TABLE)
        business_similar_orgs_exp4 = sdk2.parameters.Resource('Business similar orgs exp4',
                                                              resource_type=geotypes.SIMILAR_ORGS_TABLE)
        business_similar_orgs_exp5 = sdk2.parameters.Resource('Business similar orgs exp5',
                                                              resource_type=geotypes.SIMILAR_ORGS_TABLE)
        geo_user_factors = sdk2.parameters.Resource('Geo user factors',
                                                    resource_type=rtypes.MAPS_GEO_USER_FACTORS)
        geo_events_distance_factors = sdk2.parameters.Resource('Geo events distance factors',
                                                               resource_type=rtypes.MAPS_STATIC_EVENTS_DISTANCE_FACTORS)

        precomuted_filters = sdk2.parameters.Resource('Precomuted filters file',
                                                      resource_type=rtypes.BUSINESS_COMPUTED_FILTERS)

        filtering_annotations_tbl = sdk2.parameters.String('Filtering annotations YT table',
                                                           default_value='//home/geosearch-prod/stream_conveyor/production_data/filtration/production_ready')
        queryrec_data = sdk2.parameters.Resource('Data for query recognizer',
                                                 resource_type=rtypes.MAPS_QUERYREC_DATA)
        suggest_dict = sdk2.parameters.Resource('Dictionaries for suggest filter',
                                                resource_type=rtypes.SUGGEST_DICT)
        export_svd = sdk2.parameters.Bool('Create svd.txt resource',
                                          default_value=True)
        export_canonizer = sdk2.parameters.Bool('Create canonizer.zz resource',
                                                default_value=True)
        geosearch_dssm_model = sdk2.parameters.Resource('Geosearch Dssm Model',
                                                        resource_type=rtypes.DSSM_MODEL)
        l2_bigrams_dssm = sdk2.parameters.Resource('L2 Bigrams Dssm Model',
                                                   resource_type=rtypes.DSSM_MODEL)
        rubrics_dssm_model = sdk2.parameters.Resource('Rubrics Dssm Model',
                                                      resource_type=rtypes.DSSM_MODEL)

        external_dssm_table = YtTablePath('External Dssm table', default_value='//home/geosearch-prod/production_data/dssm_embeddings')

        top_lists_tbl = sdk2.parameters.String('TopListsData YT table',
                                                           default_value='//home/sprav/food_aspects_classification/top_lists/for_geosearch')
        # TODO(pvlkhn): remove it
        additional_company_attrs = sdk2.parameters.List('Additional company attributes')
        # TODO(ppavel96): remove it
        experimental_pass_rubrics_table_to_dssm = sdk2.parameters.Bool('Pass rubrics table to dssm', default_value=False)

        moved_collapser_dump = sdk2.parameters.Resource('Moved collapser dump', resource_type=geotypes.GEOSEARCH_MOVED_COLLAPSER_DUMP)

        advert_chain_table = YtTablePath('advert_chain_table')

        working_dir_base = YtTablePath('working_dir basepath, default=//tmp',
                                       default_value='//tmp')

        keep_temps = sdk2.parameters.Bool('Do not remove working dir on YT after termination', default_value=False)

        duplicate_shards = sdk2.parameters.Bool('Duplicate shards in shardmap', default_value=False)

    class Requirements(sdk2.Task.Requirements):
        cores = 1
        ram = 8192

        environments = (environments.PipEnvironment('yandex-yt', use_wheel=True),)

        class Caches(sdk2.Requirements.Caches):
            pass

    @sdk2.header()
    def head(self):
        creation_timestamp = time.mktime(self.created.timetuple())
        now = datetime.now()
        now_timestamp = time.mktime(now.timetuple())
        url = ('https://yt.yandex-team.ru/hahn/#page=operation'
               '&dataMode=archive'
               '&from={start}'
               '&to={now}'
               '&filter={task_id}'
               '&user=robot-geosearch'
               '&userfilter=other'
               '&state=all').format(start=creation_timestamp, now=now_timestamp, task_id=self.id)
        href = '<a href="{url}" target="_blank">YT Operations</a>'.format(url=url)
        if self.Context.total_index_size:
            href += '\n<p>Index contains {sz} documents</p>'.format(sz=self.Context.total_index_size)
        if self.Parameters.need_acceptance and self.author not in USERS_WHITE_LIST:
            acceptance_warning = ('<p style="color:red">WARNING! '
                                  'Acceptance of this base wouldn`t be '
                                  'launched automatically</p>\n')
            return acceptance_warning + href
        return href

    def _unpack(self, path, target_dir):
        unpack_dir = make_folder(str(self.path(target_dir)))
        unpack_file(path, unpack_dir)
        unpack_files(unpack_dir)
        return unpack_dir

    def _prepare_similar_orgs(self):
        similars = {
            'baseline': self.Parameters.business_similar_orgs,
            'advert': self.Parameters.business_advert_similar_orgs,
            'similars_exp1': self.Parameters.business_similar_orgs_exp1,
            'similars_exp2': self.Parameters.business_similar_orgs_exp2,
            'similars_exp3': self.Parameters.business_similar_orgs_exp3,
            'similars_exp4': self.Parameters.business_similar_orgs_exp4,
            'similars_exp5': self.Parameters.business_similar_orgs_exp5,
        }
        result = []
        for name, res in similars.iteritems():
            if res:
                with open(str(sdk2.ResourceData(res).path)) as f:
                    tbl_name = f.read()
                    result.append('{exp}:{tbl}'.format(exp=name, tbl=tbl_name))
        return result

    def _prepare_geo_user_factors(self, factor_prefix):
        geo_user_factors_path = str(sdk2.ResourceData(self.Parameters.geo_user_factors).path)
        if os.path.isdir(geo_user_factors_path):
            return geo_user_factors_path
        elif os.path.isfile(geo_user_factors_path) and geo_user_factors_path.endswith('.tar.gz'):
            output_dir = make_folder('geo_user_factors')
            log_file_path = get_logs_folder() + '/unpack_geo_user_factors.out.txt'
            cmd = ('tar '
                   '--extract '
                   '--file {geo_user_factors_path} '
                   '--directory {output_dir} '
                   '--verbose '
                   '--wildcards '
                   './{prefix}_*').format(geo_user_factors_path=geo_user_factors_path,
                                          output_dir=output_dir,
                                          prefix=factor_prefix)
            with open(log_file_path, 'w') as log_file:
                logging.info('Running %s' % cmd)
                subprocess.check_call(cmd,
                                      shell=True,
                                      stdout=log_file,
                                      stderr=subprocess.STDOUT)
            return output_dir
        raise SandboxTaskFailureError('unknown format of user factors')

    def prepare_export(self):
        if self.Parameters.export_path.isalnum():     # resource id
            export_resource = sdk2.Resource[self.Parameters.export_path]
            resource_path = str(sdk2.ResourceData(export_resource).path)
            export_files = ['providers2.xml.gz',
                            'companies2_hidden.xml.gz',
                            'chains2.xml.gz',
                            'features2.xml.gz',
                            'rubrics2.xml.gz',
                            'features2_fast.xml.gz',
                            'references2.xml.gz']
            export_dir = make_folder('./export')
            archive = tarfile.open(resource_path)
            for file_name in export_files:
                logging.info('Untaring %s to %s' % (file_name, export_dir))
                archive.extract(file_name, export_dir)
            for zipped_file in os.listdir(export_dir):
                zipped_path = os.path.join(export_dir, zipped_file)
                unzipped_path = zipped_path.rstrip('.gz')
                logging.info('Unzipping %s to %s' % (zipped_path,
                                                     unzipped_path))
                with gzip.open(zipped_path, 'rb') as gz, open(unzipped_path, 'wb') as out:
                    shutil.copyfileobj(gz, out)
            return export_dir
        else:
            return self.get_table_real_path()

    def make_indexer_config(self):
        cfg = IndexerConfig()
        source_path = './source'
        # Files
        cfg.files['dynamic_user_factors'] = os.path.join(self.Context.geo_user_factors_dir, 'biz_clicks_dynamic')
        if self.Context.moved_collapser_dump:
            cfg.files['moved_collapser'] = self.Context.moved_collapser_dump
        # Directories
        cfg.directories['source'] = source_path
        cfg.directories['suggest_dicts'] = str(sdk2.ResourceData(self.Parameters.suggest_dict).path)
        config_file_path = os.path.join('./config.xml')
        with open(config_file_path, 'w') as config_file:
            config_file.write(cfg.dump())
        return config_file_path

    def make_geortyserver_configs(self):
        config_resource = str(sdk2.ResourceData(self.Parameters.indexer_configs_resource).path)
        config_dir = './'
        archive = tarfile.open(config_resource)
        conf_path = 'extsearch/geo/indexer/business_indexer_yt/indexer_config/'
        conf_files = {'geortyserver': os.path.join(conf_path, 'rtyserver-config.conf'),
                      'oxygen_opts': os.path.join(conf_path, 'OxygenOptions.cfg')}
        try:
            archive.extractall(config_dir)
            logging.info('All config files extracted')

            oxygen_splitted = []
            for file in os.listdir(os.path.join(config_dir, conf_path)):
                if file.startswith('OxygenOptions_') and file.endswith('.cfg'):
                    oxygen_splitted.append(os.path.join(conf_path, file))

            conf_files['oxygen_opts_splitted'] = oxygen_splitted

            return conf_files
        except KeyError:
            logging.exception('Path %s not found in indexer_config resource'
                              % conf_path)
            logging.info(archive.list())
        except ValueError:
            logging.exception()

    def _import_yt(self):
        import yt.wrapper as yt
        yt.config['token'] = self.yt_token
        yt.config['proxy']['url'] = 'hahn.yt.yandex.net'
        return yt

    def _make_db_dir(self, base_directory, ttl_days=None):
        build_directory = 'addrs_base_%s' % self.id
        attrs = {}
        if ttl_days is not None:
            attrs['expiration_timeout'] = ttl_days * 1000 * 60 * 60 * 24
        yt = self._import_yt()
        working_directory = yt.ypath_join(base_directory, build_directory)
        # As yt.mkdir() does not support creation and setting the attributes atomically, we use yt.create() instead.
        yt.create('map_node', working_directory, attributes=attrs, ignore_existing=True)
        return working_directory

    def make_yt_tmp_dir(self):
        ttl_days = None if self.Parameters.keep_temps else 1
        return self._make_db_dir(self.Parameters.working_dir_base, ttl_days)

    def make_yt_output_dir(self):
        return self._make_db_dir(self.Parameters.output_path, self.Parameters.ttl_days)

    def get_table_real_path(self):
        yt = self._import_yt()
        current_state_path = self.Parameters.yt_table.replace('/snapshot/company', '') + "&"
        logging.info('Trying to get %s real path' % current_state_path)
        try:
            real_path = yt.get(yt.ypath_join(current_state_path, '@target_path'))
            logging.info('%s real path is %s' % (current_state_path,
                                                 real_path))
        except yt.YtHttpResponseError:
            logging.exception('Error geting %s real path. Details:' % current_state_path)
            real_path = self.Parameters.export_path
        return real_path

    def copy_table(self, table_path, working_dir_path, polite=False):
        yt = self._import_yt()
        table_name = os.path.basename(table_path)
        result_path = os.path.join(working_dir_path, 'snapshot', table_name)
        logging.info('Copying %s to %s' % (table_path, result_path))
        try:
            yt.copy(table_path, result_path, recursive=True)
            return result_path
        except yt.YtHttpResponseError as err:
            if polite:
                logging.info('Could not copy %s' % table_path)
            else:
                raise SandboxTaskFailureError(err)

    def sort_table(self, table_name, destination_table=None):
        logging.info('Sorting %s by "permalink"' % table_name)
        yt = self._import_yt()
        yt.run_sort(table_name, sort_by=['permalink'], destination_table=destination_table)

    def get_table_attribute(self, table_name, attribute_name):
        yt = self._import_yt()
        return yt.get_attribute(table_name, attribute_name, default=None)

    def get_table_mtime(self):
        mtime = self.get_table_attribute(self.Parameters.yt_table, 'modification_time')
        mtime_tuple = datetime.strptime(mtime, '%Y-%m-%dT%H:%M:%S.%fZ') + timedelta(hours=3)
        mtimestamp = int(mktime(mtime_tuple.timetuple()))
        return mtimestamp

    def _yt_exists(self, yt_path):
        yt = self._import_yt()
        return yt.exists(yt_path)

    def collect_stat_info(self):
        err_tbl_path = '{dir}/error_log'.format(dir=self.Context.working_dir)
        if self._yt_exists(err_tbl_path):
            self.Context.error_count = self.get_yt_table_length(err_tbl_path)
            self.Context.error_path = err_tbl_path
        else:
            self.Context.error_count = 0

    def copy_tables_for_wizard_data(self):
        tables = [
            'rubric',
            'feature',
            'feature_enum_value',
            'chain'
        ]
        for table in tables:
            table_path = os.path.join(self.Parameters.export_path,
                                      'snapshot',
                                      table)
            self.copy_table(table_path, self.Context.output_dir, polite=True)
        fast_features_path = os.path.join(self.Parameters.export_path,
                                          'features2_fast.xml.gz')
        self.copy_table(fast_features_path,
                        self.Context.output_dir, polite=True)

    def run_sync(self, cmd, log_file_path, error_msg):
        binary = _strip_binary(sdk2.ResourceData(self.Parameters.data_preparer).path)
        cmd = '{binary} {cmd}'.format(binary=binary, cmd=cmd)
        logging.info('Running: %s' % cmd)
        with open(log_file_path, 'w') as log_file:
            try:
                subprocess.check_call(cmd,
                                      shell=True,
                                      env=self.proc_env,
                                      stdout=log_file,
                                      stderr=subprocess.STDOUT)
            except subprocess.CalledProcessError:
                logging.exception('%s command failed' % cmd)
                raise SandboxTaskFailureError(error_msg)

    def find_duplicates(self):
        duplicates_table = '{working_dir}/companies_duplicates'.format(working_dir=self.Context.working_dir)
        cmd = ('companies_duplicates '
               '-s hahn '
               '-o {duplicates_table} '
               '-c {working_copy}').format(duplicates_table=duplicates_table,
                                           working_copy=self.Context.companies_working_copy)
        log_file_path = get_logs_folder() + '/companies_duplicates.out.txt'
        self.run_sync(cmd, log_file_path, 'Building companies duplicates failed')
        return duplicates_table

    def export_svd(self):
        export_path = './svd.txt'
        cmd = ('export_svd '
               '-s hahn '
               '-d {export_path} '
               '-i {index_path} '
               '--shards-count {shards_count}').format(export_path=export_path,
                                                       index_path=self.Context.index_table,
                                                       shards_count=self.Parameters.shards_count)
        logging.info('Running: %s' % cmd)
        log_file_path = get_logs_folder() + '/export_svd.out.txt'
        self.run_sync(cmd, log_file_path, 'Building svd export failed')
        result_path = export_path + '.gz'
        with open(export_path, 'rb') as f_in:
            with gzip.open(result_path, 'wb') as f_out:
                shutil.copyfileobj(f_in, f_out)
        return result_path

    def export_canonizer(self):
        export_path = './canonizer.zz'
        cmd = ('export_canonizer '
               '-s hahn '
               '-d {export_path} '
               '-i {index_path} '
               '--shards-count {shards_count}').format(export_path=export_path,
                                                       index_path=self.Context.index_table,
                                                       shards_count=self.Parameters.shards_count)
        logging.info('Running: %s' % cmd)
        log_file_path = get_logs_folder() + '/export_canonizer.out.txt'
        self.run_sync(cmd, log_file_path, 'Building canonizer export failed')
        return export_path

    def build_raw_user_factors(self, duplicates_table):
        raw_user_factors_table = '{working_dir}/raw_user_factors'.format(working_dir=self.Context.working_dir)
        cmd = ('export_tsv '
               '-o {output_table} '
               '-s hahn '
               '{raw_user_factors_file} '
               '-d {duplicates} '
               '-n OrgFactorsRaw').format(output_table=raw_user_factors_table,
                                          raw_user_factors_file=os.path.join(self.Context.geo_user_factors_dir, 'biz_clicks_static_raw'),
                                          duplicates=duplicates_table)
        log_file_path = get_logs_folder() + '/prepare_static_user_factors.out.txt'
        self.run_sync(cmd, log_file_path, 'Building raw user factors index chunks failed')
        return raw_user_factors_table

    def cut_export(self, export_path, output_path):
        if not self.Parameters.companies_limit or self.Parameters.companies_limit <= 0:
            return False
        cmd = ('cut_export '
               '-d {output_path} '
               '-s hahn '
               '-i {export_path} '
               '-n {count}').format(output_path=output_path,
                                    export_path=export_path,
                                    count=self.Parameters.companies_limit)
        log_file_path = get_logs_folder() + '/cut_export.out.txt'
        self.run_sync(cmd, log_file_path, 'Cutting companies table failed')
        return True

    def _make_factor_annotations_params(self):
        factor_annotations_dict = {'maps_clicks_shows': self.Parameters.maps_click_shows,
                                   'org_data': self.Parameters.org_data,
                                   'mobile_clicks': self.Parameters.mobile_clicks,
                                   'serp_clicks': self.Parameters.serp_clicks,
                                   'located_at_org_name': self.Parameters.located_at_org_name,
                                   'hospital_name': self.Parameters.hospital_name,
                                   'faculty_name': self.Parameters.faculty_name,
                                   'org_reviews': self.Parameters.org_reviews,
                                   'duplicate_stream': self.Parameters.duplicate_stream,
                                   'clusterid_stream': self.Parameters.clusterid_stream}
        result = ''
        for key, value in factor_annotations_dict.iteritems():
            if value and value != '':
                result += '{0}:{1} '.format(key, value)
        return result

    def make_malls_catalog(self):
        malls_table = self.Context.working_dir + '/malls'
        cmd = ('malls '
               '-o {output} '
               '-s hahn '
               '-c {companies_table} '
               '-e {export_path}').format(companies_table=self.Context.companies_working_copy,
                                          export_path=self.Context.export_path,
                                          output=malls_table)
        log_file_path = get_logs_folder() + '/make_malls_catalog.out.txt'
        process = _run_async(
            self.Parameters.data_preparer,
            self.proc_env,
            cmd,
            log_file_path,
            'Building malls table failed'
        )
        return process, malls_table

    def build_annotation_index_chunks(self, duplicates_table):
        unpacked_queryrec_data_path = 'unpacked_queryrec'
        self._unpack(str(sdk2.ResourceData(self.Parameters.queryrec_data).path),
                     unpacked_queryrec_data_path)
        annotations_table = '{working_dir}/companies_annotations'.format(working_dir=self.Context.working_dir)
        cmd = ('annotations '
               '-o {annotations_table} '
               '-s hahn '
               '-d {duplicates_table} '
               '-i {filtering_annotations} '
               '-q {queryrec_data}').format(annotations_table=annotations_table,
                                            duplicates_table=duplicates_table,
                                            filtering_annotations=self.Parameters.filtering_annotations_tbl,
                                            queryrec_data=unpacked_queryrec_data_path)
        log_file_path = get_logs_folder() + '/make_annotation_index_chunks.out.txt'
        process = _run_async(
            self.Parameters.data_preparer,
            self.proc_env,
            cmd, log_file_path,
            'Building annotation index chunks failed'
        )
        return process, annotations_table

    def build_factor_annotations(self, duplicates_table):
        factorannotation_table = '{working_dir}/factor_annotations'.format(working_dir=self.Context.working_dir)
        cmd = ('factor_annotations '
               '-o {factorannotation_table} '
               '-s hahn '
               '--memory 12288 '
               '{factor_annotation_params} '
               '-d {duplicates_table}').format(factorannotation_table=factorannotation_table,
                                               factor_annotation_params=self.factor_annotations_params,
                                               duplicates_table=duplicates_table)
        log_file_path = get_logs_folder() + '/make_factor_annotation_index_chunks.out.txt'
        process = _run_async(
            self.Parameters.data_preparer,
            self.proc_env,
            cmd,
            log_file_path,
            'Building factor annotation index chunks failed'
        )
        return process, factorannotation_table

    def build_similar_orgs(self, duplicates_table):
        similar_orgs_data = self._prepare_similar_orgs()
        similar_orgs_table = '{working_dir}/similar_orgs'.format(working_dir=self.Context.working_dir)
        cmd = ('similar_orgs '
               '-o {similar_orgs_table} '
               '-s hahn '
               '-d {duplicates_table} ').format(similar_orgs_table=similar_orgs_table,
                                                duplicates_table=duplicates_table)
        for path in similar_orgs_data:
            cmd += '--input {path} '.format(path=path)
        log_file_path = get_logs_folder() + '/make_similar_orgs.out.txt'
        process = _run_async(
            self.Parameters.data_preparer,
            self.proc_env,
            cmd,
            log_file_path,
            'Building similar orgs failed'
        )
        return process, similar_orgs_table

    def build_static_factors_yt(self, duplicates_table):
        static_factors_yt_list = [
            self.Parameters.count_factors,
            self.Parameters.paid_realty,
            self.Parameters.departments,
            self.Parameters.click_factors,
            self.Parameters.click_factors_document,
            self.Parameters.click_factors_search_group,
            self.Parameters.click_factors_url,
            self.Parameters.click_factors_duplicate_stats,
            self.Parameters.travel_factors,
            self.Parameters.clusterid_clicks,
            self.Parameters.clusterid_commit,
            self.Parameters.duplicate_clicks,
            self.Parameters.duplicate_commit,
            self.Parameters.duplicate_export,
            self.Parameters.advert_stats,
        ]
        static_factors_yt_params = ' '.join(static_factors_yt_list)
        static_factors_yt_table = '{working_dir}/static_factors_yt'.format(working_dir=self.Context.working_dir)
        cmd = ('static_factors_yt '
               '-o {output_table} '
               '-s hahn '
               '{static_factors_yt_params} '
               '-d {duplicates_table}').format(output_table=static_factors_yt_table,
                                               static_factors_yt_params=static_factors_yt_params,
                                               duplicates_table=duplicates_table)
        log_file_path = get_logs_folder() + '/prepare_static_factors_yt.out.txt'
        process = _run_async(
            self.Parameters.data_preparer,
            self.proc_env,
            cmd,
            log_file_path,
            'Building static factors index chunks failed'
        )
        return process, static_factors_yt_table

    def build_static_rubric_factors_yt(self):
        static_rubric_factors_yt_list = [self.Parameters.click_factors_rubric,
                                         self.Parameters.rubricids_export,
                                         self.Parameters.rubricids_clicks]
        static_rubric_factors_yt_params = ' '.join(static_rubric_factors_yt_list)
        static_rubric_factors_yt_file = 'static_factors_rubric.mms'
        cmd = ('static_factors_not_indexed_yt '
               '-o {output_file} '
               '-s hahn '
               '{static_factors_not_indexed_yt_params} '
               '-k Permalink').format(output_file=static_rubric_factors_yt_file,
                                      static_factors_not_indexed_yt_params=static_rubric_factors_yt_params)
        log_file_path = get_logs_folder() + '/prepare_static_rubric_factors_yt.out.txt'
        process = _run_async(
            self.Parameters.data_preparer,
            self.proc_env,
            cmd,
            log_file_path,
            'Building static rubric factors index chunks failed'
        )
        return process, static_rubric_factors_yt_file

    def build_static_docurl_factors_yt(self, duplicates_table):
        static_docurl_factors_yt_list = [self.Parameters.docurlids_export,
                                         self.Parameters.docurlids_clicks]
        static_docurl_factors_yt_params = ' '.join(static_docurl_factors_yt_list)
        static_docurl_factors_yt_table = '{working_dir}/static_docurl_factors_yt'.format(working_dir=self.Context.working_dir)
        static_docurl_factors_yt_files = 'docurl'

        cmd = ('static_factors_docurlids '
               '-i {source_table} '
               '-o {output_table} '
               '--c {shards} '
               '-m {outmms} '
               '-s hahn '
               '{static_factors_yt_params} '
               '-d {duplicates_table}').format(source_table=self.Parameters.duplicate_export,
                                               output_table=static_docurl_factors_yt_table,
                                               shards=self.Parameters.shards_count,
                                               outmms=static_docurl_factors_yt_files,
                                               static_factors_yt_params=static_docurl_factors_yt_params,
                                               duplicates_table=duplicates_table)
        log_file_path = get_logs_folder() + '/prepare_static_docurl_factors_yt.out.txt'
        process = _run_async(
            self.Parameters.data_preparer,
            self.proc_env,
            cmd,
            log_file_path,
            'Building static docurl factors index chunks failed'
        )
        return process, static_docurl_factors_yt_table, static_docurl_factors_yt_files

    def build_static_doc_factors_yt(self, raw_user_factors_table):
        static_doc_factors_table = '{working_dir}/static_doc_factors'.format(working_dir=self.Context.working_dir)
        cmd = (
            'rich_user_factors '
            '-o {output_table} '
            '-c hahn '
            '-s {working_copy} '
            '-f {raw_factors} '
            '--event-dist-factors {event_factors}'
        ).format(
            output_table=static_doc_factors_table,
            working_copy=self.Context.companies_working_copy,
            raw_factors=raw_user_factors_table,
            event_factors=self.Context.events_distance_factors_table
        )
        log_file_path = get_logs_folder() + '/prepare_static_doc_factors_yt.out.txt'
        process = _run_async(
            self.Parameters.data_preparer,
            self.proc_env,
            cmd,
            log_file_path,
            'Building static doc factors index chunks failed'
        )
        return process, static_doc_factors_table

    def build_static_user_factors(self, duplicates_table):
        static_user_factors_table = '{working_dir}/static_user_factors'.format(working_dir=self.Context.working_dir)
        cmd = ('export_tsv '
               '-o {output_table} '
               '-s hahn '
               '{static_user_factors_file} '
               '-d {duplicates} '
               '-n OrgFactors').format(output_table=static_user_factors_table,
                                       static_user_factors_file=os.path.join(self.Context.geo_user_factors_dir, 'biz_clicks_static'),
                                       duplicates=duplicates_table)
        log_file_path = get_logs_folder() + '/prepare_static_user_factors.out.txt'
        process = _run_async(
            self.Parameters.data_preparer,
            self.proc_env,
            cmd,
            log_file_path,
            'Building static user factors index chunks failed'
        )
        return process, static_user_factors_table

    def build_events_distance_factors(self, duplicates_table):
        events_distance_factors_table = '{working_dir}/events_distance_factors'.format(working_dir=self.Context.working_dir)
        events_distance_factors_file = os.path.join(str(sdk2.ResourceData(self.Parameters.geo_events_distance_factors).path), 'static_events_distance_factors_business')
        cmd = ('export_tsv '
               '-o {output_table} '
               '-s hahn '
               '{events_distance_factors_file} '
               '-d {duplicates} '
               '-n EventsDistanceFactors').format(output_table=events_distance_factors_table,
                                                  events_distance_factors_file=events_distance_factors_file,
                                                  duplicates=duplicates_table)
        log_file_path = get_logs_folder() + '/prepare_events_distance_factors.out.txt'
        process = _run_async(
            self.Parameters.data_preparer,
            self.proc_env,
            cmd,
            log_file_path,
            'Building events distance factors index chunks failed'
        )
        return process, events_distance_factors_table

    def build_reviews(self):
        reviews_table = '{working_dir}/reviews'.format(working_dir=self.Context.working_dir)
        cmd = [
            'reviews',
            '-o', reviews_table,
            '-s', 'hahn',
            '--reviews', self.Parameters.review_count_table,
        ]
        log_file_path = get_logs_folder() + '/prepare_reviews.out.txt'
        process = _run_async(self.Parameters.data_preparer, self.proc_env, ' '.join(cmd), log_file_path, 'Building reviews index chunks failed')
        return process, reviews_table

    def build_geosearch_dssm(self):
        geosearch_dssm_table = '{working_dir}/geosearch_dssm'.format(working_dir=self.Context.working_dir)
        cmd = ('geosearch_dssm '
               '-o {output_table} '
               '-s hahn '
               '-i {companies_table} '
               '-d {duplicates_table} '
               'SerpFps:{geosearch_dssm_model} -e {external_table}'
               ).format(output_table=geosearch_dssm_table,
                        companies_table=self.Context.companies_working_copy,
                        duplicates_table=self.Context.duplicates_table,
                        geosearch_dssm_model=sdk2.ResourceData(self.Parameters.geosearch_dssm_model).path,
                        external_table=self.Parameters.external_dssm_table)

        if self.Parameters.experimental_pass_rubrics_table_to_dssm:
            cmd = ('geosearch_dssm '
                   '-o {output_table} '
                   '-s hahn '
                   '-i {companies_table} '
                   '-d {duplicates_table} '
                   '-r {rubrics_table} '
                   'SerpFps:{geosearch_dssm_model} WebFast:{l2_bigrams_dssm} Rubrics:{rubrics_dssm_model} -e {external_table}'
                   ).format(output_table=geosearch_dssm_table,
                            companies_table=self.Context.companies_working_copy,
                            duplicates_table=self.Context.duplicates_table,
                            rubrics_table=self.Parameters.export_path + "/exported/rubric",
                            geosearch_dssm_model=sdk2.ResourceData(self.Parameters.geosearch_dssm_model).path,
                            l2_bigrams_dssm=sdk2.ResourceData(self.Parameters.l2_bigrams_dssm).path,
                            rubrics_dssm_model=sdk2.ResourceData(self.Parameters.rubrics_dssm_model).path,
                            external_table=self.Parameters.external_dssm_table)

        log_file_path = get_logs_folder() + '/prepare_geosearch_dssm.out.txt'
        process = _run_async(
            self.Parameters.data_preparer,
            self.proc_env,
            cmd,
            log_file_path,
            'Building geosearch dssm embedding failed'
        )
        return process, geosearch_dssm_table

    def build_precomputed_filters(self, precomputed_filters_path):
        cmd = ' '.join(['precompute_filters',
                        '-s', 'hahn',
                        '-c', self.Context.companies_working_copy,
                        '-e', self.Parameters.export_path + "/exported",
                        '-o', precomputed_filters_path,
                        '--proto'])
        if self.Parameters.advert_chain_table:
            cmd = ' '.join([cmd, '-a', self.Parameters.advert_chain_table])
        log_file_path = os.path.join(get_logs_folder(), 'prepare_precomputed_filters.out.txt')
        process = _run_async(
            self.Parameters.data_preparer,
            self.proc_env,
            cmd,
            log_file_path,
            "Building precomputed filters"
        )
        return process

    def build_search_text(self):
        search_text_table_path = '{output_dir}/snapshot/search_text'.format(output_dir=self.Context.output_dir)
        cmd = ' '.join(['search_text',
                        '-s', 'hahn',
                        '-c', self.Context.companies_working_copy,
                        '-o', search_text_table_path])
        log_file_path = os.path.join(get_logs_folder(), 'prepare_search_text.out.txt')
        process = _run_async(
            self.Parameters.data_preparer,
            self.proc_env,
            cmd,
            log_file_path,
            'Building search text table'
        )
        return process, search_text_table_path

    def build_features(self, features_path):
        cmd = ' '.join(['features',
                        '-s', 'hahn',
                        '-e', self.Context.export_path + '/exported',
                        '-o', features_path])
        log_file_path = os.path.join(get_logs_folder(), 'prepare_features.out.txt')
        process = _run_async(
            self.Parameters.data_preparer,
            self.proc_env,
            cmd,
            log_file_path,
            'Building features.pbs'
        )
        return process

    def build_source_proto(self):
        source_proto_path = '{working_dir}/source_proto'.format(working_dir=self.Context.working_dir)
        args = [
            'source_proto',
            '--server', 'hahn',
            '--output', source_proto_path,
            '--mem-limit', '2048'
        ]
        if self.Context.companies_working_copy.endswith('/fresh_states'):
            # freshness shard
            args.extend(['--fresh-states', self.Context.companies_working_copy])
        else:
            args.extend(['--snapshot-company', self.Context.export_path + '/snapshot/company'])

        log_file_path = os.path.join(get_logs_folder(), 'prepare_source_proto.out.txt')
        process = _run_async(
            self.Parameters.data_preparer,
            self.proc_env,
            ' '.join(args),
            log_file_path,
            'Building source_proto'
        )
        return process, source_proto_path

    def build_moved_data(self, out_path):
        args = [
            'moved',
            '--cluster', 'hahn',
            '--companies', self.Context.companies_working_copy,
            '--duplicates', self.Context.duplicates_table,
            '-o', out_path
        ]
        log_file_path = os.path.join(get_logs_folder(), 'prepare_moved.out.txt')
        process = _run_async(
            self.Parameters.data_preparer,
            self.proc_env,
            ' '.join(args),
            log_file_path,
            'Building moved data'
        )
        return process

    def build_index(self, additional_data_tables, indexer_config, geortyserver_config):
        logging.info('Making indexer config')
        indexer_config = self.make_indexer_config()
        logging.info('Making geortyserver config')
        merger_configs = self.make_geortyserver_configs()
        geortyserver_config = merger_configs.get('geortyserver')
        oxygen_configs = merger_configs.get('oxygen_opts_splitted')
        binary = _strip_binary(sdk2.ResourceData(self.Parameters.standalone_indexer_executable).path)
        index_table = '{working_dir}/result'.format(working_dir=self.Context.working_dir)
        cmd = (
            '{binary} '
            '--tmp-dir={tmp_table} '
            '--indexer-config={indexer_config} '
            '--merger-config={geortyserver_config} '
            '--export={export_path} '
            '--output-dir {index_table} '
            '--shards-count {shards_count} '
            '--source-data={companies_table} '
            '--merger-memory-size={merger_memory} '
            '--merger-tmpfs-size={merger_tmpfs}'
        ).format(
            binary=binary,
            tmp_table=self.Context.working_dir,
            indexer_config=indexer_config,
            geortyserver_config=geortyserver_config,
            export_path=self.Context.export_path,
            index_table=index_table,
            shards_count=self.Parameters.shards_count,
            companies_table=self.Context.companies_working_copy,
            merger_memory=self.Parameters.merger_memory_size,
            merger_tmpfs=self.Parameters.merger_tmpfs_size
        )

        cmd += ' --malls-table={malls_table}'.format(malls_table=self.Context.working_dir + "/malls")
        if self.Parameters.mapper_memory_size is not None and self.Parameters.mapper_memory_size != '':
            cmd += ' --mapper-memory-size={mapper_memory}'.format(mapper_memory=self.Parameters.mapper_memory_size)
        if self.Parameters.mapper_tmpfs_size is not None and self.Parameters.mapper_tmpfs_size != '':
            cmd += ' --mapper-tmpfs-size={mapper_tmpfs}'.format(mapper_tmpfs=self.Parameters.mapper_tmpfs_size)
        for tbl in additional_data_tables:
            cmd += ' --append {tbl}'.format(tbl=tbl)
        for cfg in oxygen_configs:
            cmd += ' --oxygen-config {cfg}'.format(cfg=cfg)
        logging.info('Running: %s' % cmd)
        log_file_path = get_logs_folder() + '/build_index.out.txt'
        with open(log_file_path, 'w') as log_file:
            try:
                subprocess.check_call(cmd,
                                      shell=True,
                                      env=self.proc_env,
                                      stdout=log_file,
                                      stderr=subprocess.STDOUT)
                return index_table
            except subprocess.CalledProcessError:
                logging.exception('%s command failed' % cmd)
                raise SandboxTaskFailureError('Building database failed')

    def register_shards(self, index_table_path):
        if not self.Context.register_tasks:     # Subtasks not launched yet
            indexann_resources = []
            if self.Context.build_web_indexann_task:
                indexann_resources = sdk2.Resource[rtypes.MAPS_WEB_INDEXANN].find(task=sdk2.Task[self.Context.build_web_indexann_task]).limit(self.Parameters.shards_count)
            self.Context.register_tasks = []
            if self.Parameters.precomuted_filters:
                precomputed_filters_resource = self.Parameters.precomuted_filters
            else:
                precomputed_filters_resource = self.Context.precomputed_filters_resource
            if self.Parameters.common_features_pbs:
                features_resource = self.Context.features_resource
            else:
                features_resource = None
            for shard_number in range(0, self.Parameters.shards_count):
                indexann_resource = self.Parameters.annotations
                for resource in indexann_resources:
                    if resource.shard_id == str(shard_number):
                        indexann_resource = resource
                        break
                register_task = RegisterAddrsBaseShard(self,
                                                       description='Registering shard # {shard_num} (#{parent_id} subtask)'.format(shard_num=shard_number, parent_id=self.id),
                                                       owner=self.owner,
                                                       index_table=index_table_path,
                                                       annotations=indexann_resource,
                                                       geo_stat=self.Parameters.geo_stat,
                                                       rubric_static_factors_yt=self.Context.rubric_static_factors_resource,
                                                       docurl_static_factors_yt=self.Context.docurl_static_factors_resource[shard_number],
                                                       precomputed_filters=precomputed_filters_resource,
                                                       features=features_resource,
                                                       shard_number=shard_number,
                                                       index_downloader_executable=self.Parameters.index_downloader_executable,
                                                       companies_mtime=self.Context.companies_mtime,
                                                       cooking_start_time=self.Context.cooking_start_time,
                                                       company_table_path=self.Context.export_real_path,
                                                       filtering_annotations_table_path=self.Parameters.filtering_annotations_tbl,
                                                       create_sub_task=True,
                                                       base_commit_id=self.Context.base_commit_id).enqueue()
                self.Context.register_tasks.append(register_task.id)
            raise sdk2.WaitTask(self.Context.register_tasks,
                                ctt.Status.Group.FINISH | ctt.Status.Group.BREAK,
                                wait_all=True)
        else:
            for task in self.find():
                if task.status in ctt.Status.Group.BREAK:
                    raise ShardRegisteringException('Registering shard failed')

    def _get_shard_rbtorrent(self, task):
        return sdk2.Resource[rtypes.ADDRS_BUSINESS_SHARD].find(task=task).first().skynet_id

    @retry(tries=5, delay=10)
    def get_shards(self):
        tasks = self.find(task_type=sdk2.Task[RegisterAddrsBaseShard.type],
                          status=ctt.Status.Group.SUCCEED)
        logging.info('Child tasks: %s' % [t.id for t in tasks])
        shards = {task.Parameters.shard_number: self._get_shard_rbtorrent(task) for task in tasks}
        if len(shards) == self.Parameters.shards_count:
            return shards
        else:
            raise Exception('Expected {count} shards, got {shards} from API'.format(count=self.Parameters.shards_count,
                                                                                    shards=len(shards)))

    def make_rbtorrent_shardmap(self):
        shards = self.get_shards()
        if len(shards) != self.Parameters.shards_count:
            msg = ('{actual} shards found while {shard_count} needed\n'
                   'Check {task_type} subtasks for details').format(actual=len(shards),
                                                                    shard_count=self.Parameters.shards_count,
                                                                    task_type=RegisterAddrsBaseShard.type)
            raise ShardmapCreationException(msg)
        shard_map_path = './addrs_base_rbtorrent_shardmap'
        shard_map = []
        for shard_id, rbtorrent in shards.iteritems():
            gencfg_shard_prefix = PREFIX_TO_TIER.get(self.Parameters.tier_name)
            gencfg_shard_name = '{prfx}-{nmbr:0>3d}-0000000000'.format(prfx=gencfg_shard_prefix,
                                                                       nmbr=shard_id)
            shard_map.append(('{gencfg_shard}    '
                              '{rbtorrent}(local_path=shard)    '
                              '{tier_name}\n').format(gencfg_shard=gencfg_shard_name,
                                                      rbtorrent=rbtorrent,
                                                      tier_name=self.Parameters.tier_name))
        logging.info('Shardmap content: %s' % shard_map)
        with open(shard_map_path, 'w+') as f:
            f.writelines(shard_map)
        logging.info('Shardmap written to %s file' % shard_map_path)
        return shard_map_path

    def make_yp_shardmap_dict(self, shards):
        shardmap = []
        cnt = self.Parameters.shards_count
        if self.Parameters.duplicate_shards:
            shards *= 2
            cnt *= 2
        labels = range(1, cnt + 1)
        shardmap.extend(zip(labels, shards))
        return OrderedDict(shardmap)

    def make_yp_shardmap(self):
        shards = self.get_shards().values()
        shardmap_dict = self.make_yp_shardmap_dict(shards)
        shardmap = []
        for label_id, rbtorrent in shardmap_dict.iteritems():
            shardmap.append(('pod_label:shard_id={label_id}    {rbtorrent}(local_path=shard)\n').format(label_id=label_id,
                                                                                                        rbtorrent=rbtorrent))
        logging.info('Shardmap content: %s' % shardmap)
        shardmap_path = './addrs_base_yp_shardmap'
        with open(shardmap_path, 'w+') as f:
            f.writelines(shardmap)
        logging.info('Shardmap written to %s file' % shardmap_path)
        return shardmap_path

    def build_additional_resources(self):
        logging.info('Build additional resources')
        yt_url_parser = sdk2.Resource[geotypes.GEOSEARCH_YT_URL_PARSER].find(attrs={'released': 'stable'}).first()
        logging.info('YT URL parser: %s' % yt_url_parser)
        logging.info('Company working copy: %s' % self.Context.companies_working_copy)
        logging.info('Tables for wizard data: %s/snapshot/' % self.Context.output_dir)
        build_task_class = sdk2.Task[BuildAddrsBaseAdditionalResources.type]
        build_task = build_task_class(self,
                                      owner=self.owner,
                                      create_sub_task=True,
                                      company_table_path=self.Context.companies_working_copy,
                                      url_parser=yt_url_parser.id,
                                      tables_for_wizard_data=self.Context.output_dir + '/snapshot/',
                                      fast_features_path=self.Context.output_dir + '/snapshot/features2_fast.xml.gz',
                                      kill_timeout=self.Parameters.kill_timeout)
        build_task.enqueue()
        self.Context.build_additional_resources_task = build_task.id

    def get_additional_resources(self):
        additional_resources = {}
        additional_resources_task_id = self.Context.build_additional_resources_task
        additional_resources_task = sdk2.Task[additional_resources_task_id]
        logging.info('Additional resources build task id: %s' % additional_resources_task_id)
        additional_resources['wizard_data_resource_id'] = additional_resources_task.Context.wizard_data_resource_id
        logging.info('Additionnal resources: %s' % additional_resources)
        return additional_resources

    def _find_geobasesearch(self):
        standalone_indexer_resource = sdk2.Resource[self.Parameters.standalone_indexer_executable]
        build_task = sdk2.Task[standalone_indexer_resource.task_id]
        if build_task.type != 'BUILD_SEARCH':
            geobasesearch_resource = sdk2.Resource[rtypes.GEOBASESEARCH_EXECUTABLE].find(attrs={'released': 'stable'}).first()
        else:
            geobasesearch_resource = sdk2.Resource[rtypes.GEOBASESEARCH_EXECUTABLE].find(task=build_task).first()
        return geobasesearch_resource.id

    def get_yt_table_length(self, yt_path):
        yt = self._import_yt()
        if self._yt_exists(yt_path):
            return yt.get('{tbl}/@row_count'.format(tbl=yt_path))

    def _get_snippets_tasks(self):
        tm_task = sdk2.Task[self.Context.task_manager]
        return [sdk2.Task[task_id] for task_id in tm_task.Context.gen_snippets_tasks]

    def snippets_info(self):
        report = []
        childs = self._get_snippets_tasks()
        for generator in childs:
            generator_task_params = json.loads(generator.Parameters.snippet_task)
            generator_params = generator_task_params.get('params')
            snippet_name = generator_params.get('snippet_name')
            if generator.status == 'SUCCESS':
                snippet_count = self.get_yt_table_length(generator_params.get('processing_out')) or '0'
                error_count = self.get_yt_table_length(generator_params.get('error_log')) or '0'
                snippet_report = (
                    u'Таск варки {snippet_name}: https://sandbox.yandex-team.ru/task/{task_id}/view\n'
                    u'Количество сниппетов: {snippet_count}\n'
                    u'Ошибки валидации: {error_count}\n'
                ).format(
                    snippet_name=snippet_name,
                    task_id=generator.id,
                    snippet_count=snippet_count,
                    error_count=error_count
                )
                report.append(snippet_report)
        return report

    def launch_acceptance(self, shardmap_resource, additional_resources):
        startrek_token = sdk2.Vault.data('robot-geosearch',
                                         'robot_geosearch_startrek_token')
        self.startrek = StartrekClient(startrek_token)
        if not self.Context.startrek_acceptance_task:
            description = ((u' Таск, которым варилась база: https://sandbox.yandex-team.ru/task/%s/view\n'
                            u' База сварена из экспорта https://yt.yandex-team.ru/hahn/#page=navigation&path=%s')
                           % (self.id, self.Context.export_real_path or self.Parameters.yt_table))
            if self.Context.error_count != 0:
                alert_start = '===!!(red)' if self.Context.error_count >= CRIT_ERR_COUNT else ''
                alert_end = '!!' if self.Context.error_count >= CRIT_ERR_COUNT else ''
                error_count_comment = (u'\n{alert_start}Количество организаций, не попавших в индекс: {err_count}{alert_end}'
                                       u'\n{alert_start}Лог ошибок: https://yt.yandex-team.ru/hahn/#page=navigation&path={err_table_path}{alert_end}').format(
                                           err_count=self.Context.error_count,
                                           err_table_path=self.Context.error_path,
                                           alert_start=alert_start,
                                           alert_end=alert_end)
                description += error_count_comment
            snippets_report = self.snippets_info()
            if snippets_report:
                description = '{old_description}\n{snpt_report}'.format(old_description=description,
                                                                        snpt_report='\n'.join(snippets_report))
            task_dict = {'queue': 'GEOACCEPT',
                         'summary': u'Приёмка базы геопоиска #%s %s' % (shardmap_resource.id, date.today().strftime('%d.%m.%Y')),
                         'description': description}
            self.startrek_task = self.startrek.create_task(task_dict)
            self.Context.startrek_acceptance_task = self.startrek_task
            self.Context.startrek_ticket_ids = [self.startrek_task]
        else:
            self.startrek_task = self.Context.startrek_acceptance_task
        if not self.Context.basesearch_acceptance_task:
            surl_resource_id = None
            surl_res_new = sdk2.Resource[rtypes.MAPS_WIZARD_ORG_NAV_DATA].find(attrs={'released': 'prestable'}).first()
            surl_res_old = sdk2.Resource[rtypes.MAPS_WIZARD_ORG_NAV_DATA].find(attrs={'released': 'stable'}).first()
            if surl_res_new is not None and surl_res_new.id is not None and surl_res_new.created > surl_res_old.created:
                surl_resource_id = surl_res_new.id
            if surl_resource_id:
                self.Context.surl_resource = surl_resource_id

            routes_resource_id = None
            routes_res_new = sdk2.Resource[geotypes.GEOSEARCH_TRANSIT_GZT].find(attrs={'released': 'prestable'}).first()
            routes_res_old = sdk2.Resource[geotypes.GEOSEARCH_TRANSIT_GZT].find(attrs={'released': 'stable'}).first()
            if (routes_res_new is not None
                    and routes_res_new.id is not None
                    and routes_res_new.created > routes_res_old.created):
                routes_resource_id = routes_res_new.id
            if routes_resource_id:
                self.Context.routes_resource = routes_resource_id

            if self.Parameters.shards_count == 18:
                mlm_template_id = 'aa82863865d2232f0165d2d7726e0293'
            else:
                mlm_template_id = 'aa82863863876dc30163a690bbac12c4'
            acceptance_task_class = sdk2.Task[AcceptanceGeobasesearchDatabase.type]
            acceptance_task = acceptance_task_class(self,
                                                    startrek_task_id=self.startrek_task,
                                                    mlm_template=mlm_template_id,
                                                    shards_count=self.Parameters.shards_count,
                                                    geobasesearch_resource=self._find_geobasesearch(),
                                                    shardmap=shardmap_resource.id,
                                                    surl_resource=surl_resource_id,
                                                    routes_resource=routes_resource_id,
                                                    wizard_data_resource=additional_resources['wizard_data_resource_id'],
                                                    export_real_path=self.Context.export_real_path,
                                                    owner=self.owner,
                                                    kill_timeout=self.Parameters.kill_timeout)
            acceptance_task.enqueue()
            try:
                client = self.get_client()
                data = {'build_task': str(self.id),
                        'yt_path': self.Context.export_real_path,
                        'st_ticket': self.Context.startrek_acceptance_task}
                stat.accepting(data, client)
            except Exception:
                logging.exception('Failed to post dashboard data')
            self.Context.basesearch_acceptance_task = acceptance_task.id
            with self.memoize_stage.acceptance_launched(commit_on_entrance=False):
                self.startrek.add_comment(self.startrek_task, u'Таск приемки базы геопоиска: https://sandbox.yandex-team.ru/task/%s/view' % acceptance_task.id)
        else:
            for task in self.find():
                if task.status != ctt.Status.SUCCESS:
                    raise SandboxTaskFailureError('Acceptance task failed')

    def link_photos_table(self):
        try:
            yt = self._import_yt()
            yt.link(self.Context.photo_snippets or self.Context.snippets,
                    '//home/geosearch-prod/addrs_base/accepted_photos',
                    force=True)
        except Exception:
            logging.exception('Failed to link accepted photos table')

    def push_snippets_to_ferryman(self, snippet_type, snippets_table_path, namespace, snippet_params):
        task_class = sdk2.Task[AddrsSnipippetsPushToFerryman.type]
        task = task_class(self,
                          table_name=snippet_type,
                          table_path=snippets_table_path,
                          ferryman_url='http://geo-snippets.ferryman.n.yandex-team.ru/',
                          namespace=namespace,
                          snippet_params=snippet_params,
                          owner=self.owner,
                          priority=ctt.Priority(ctt.Priority.Class.USER,
                                                ctt.Priority.Subclass.LOW),
                          create_subtask=True)
        task.enqueue()
        comment = (u'Таск заливки (({table} {snpt_name})) в Ferryman: '
                   u'https://sandbox.yandex-team.ru/task/{tid}/view').format(table=snippets_table_path,
                                                                             snpt_name=snippet_type,
                                                                             tid=task.id)
        startrek_token = sdk2.Vault.data('robot-geosearch',
                                         'robot_geosearch_startrek_token')
        self.startrek = StartrekClient(startrek_token)
        self.startrek.add_comment(self.Context.startrek_acceptance_task,
                                  comment)
        self.Context.ferryman_tasks.update({snippet_type: task.id})

    def taskmanager_upload(self):
        try:
            childs = self._get_snippets_tasks()
            for generator in childs:
                if generator.status == 'SUCCESS':
                    generator_task_params = json.loads(generator.Parameters.snippet_task)
                    generator_params = generator_task_params.get('params')
                    snippet_name = generator_params.get('snippet_name')
                    table_path = '//%s' % generator_params.get('processing_out').split('//')[-1]
                    if self._yt_exists(table_path):
                        self.push_snippets_to_ferryman(snippet_name,
                                                       table_path,
                                                       generator_task_params.get('table_name'),
                                                       json.dumps(generator_params))
        except Exception:
            logging.exception('Failed to upload snippets generated by TaskManager')

    def run_taskmanager(self):
        tm_config_template = 'arcadia:/arc/trunk/arcadia/search/geo/tools/task_manager/configs/'
        script_name = 'base_sync.json'
        checkout_path = Arcadia.export(tm_config_template, './configs')
        logging.info('checkout_path = %s' % checkout_path)
        json_file = os.path.join(checkout_path, script_name)
        logging.info('json_file = %s' % json_file)
        env = jinja2.Environment(loader=jinja2.FileSystemLoader(checkout_path),
                                 extensions=['jinja2.ext.do'])
        data_to_render = {'company_table': self.Context.companies_working_copy, 'export_path': self.Context.export_real_path}
        tm_config = env.get_template(script_name).render(data_to_render)
        task_class = sdk2.Task[AddrsSnippetsTaskManager.type]
        task_manager = task_class(self,
                                  task_manager_config='',
                                  task_manager_config_text=tm_config,
                                  task_manager_path='//home/geosearch-prod/snippets',
                                  saas_kv_host='saas-indexerproxy-maps-kv.yandex.net',
                                  mail_list=[],
                                  geobasesearch_db_sync=True,
                                  fast=True,
                                  owner=self.owner,
                                  create_subtask=True)
        task_manager.enqueue()
        self.Context.task_manager = task_manager.id

    def build_web_indexann(self):
        if not self.Context.build_web_indexann_task:
            if not self.Parameters.indexann_raw_data:
                return
            web_indexann_task_class = sdk2.Task[BuildShardedAddrsWebIndexann.type]
            web_indexann_task = web_indexann_task_class(self,
                                                        description='Build GEOSEARCH_WEB_INDEXANN_SHARDED',
                                                        data_preparer=self.Parameters.data_preparer,
                                                        company_table_path=self.Context.companies_working_copy,
                                                        shards_count=self.Parameters.shards_count,
                                                        raw_indexann_data=self.Parameters.indexann_raw_data,
                                                        owner=self.owner,
                                                        create_sub_task=True)
            web_indexann_task.enqueue()
            self.Context.build_web_indexann_task = web_indexann_task.id

    @sdk2.footer()
    def footer(self):
        if not type(self.Context.header_message) == NotExists:
            return self.Context.header_message
        else:
            return ''

    def get_client(self):
        import yt.wrapper as yt
        yt_config = {'proxy': {'url': 'hahn.yt.yandex.net'},
                     'token': sdk2.Vault.data('GEOMETA-SEARCH', 'yt-token')}
        return yt.YtClient(config=yt_config)

    def on_prepare(self):
        sdk2.Task.on_prepare(self)
        self.Context.geo_user_factors_dir = self._prepare_geo_user_factors('biz')

    def notify_by_telegram(self, event):
        if self.Parameters.need_acceptance:
            notify_by_telegram(self, event)

    def remove_stats(self, keys):
        if self.Parameters.need_acceptance:
            try:
                client=self.get_client()
                data={'build_task': str(self.id),
                      'yt_path': self.Context.export_real_path}
                for k in keys:
                    stat._remove(data, k, client)
            except Exception:
                logging.exception('Failed to post dashboard data')

    def stop_yappy_betas(self):
        try:
            acceptance_task = sdk2.Task[self.Context.basesearch_acceptance_task]
            beta_data = [
                acceptance_task.Context.ref_base_data,
                acceptance_task.Context.ref_wizard_data,
                acceptance_task.Context.test_base_data,
                acceptance_task.Context.test_wizard_data
            ]
            token = sdk2.Vault.data('GEOSEARCH_PROD', 'GEOSEARCH_YAPPY_TOKEN')
            for data in beta_data:
                yappy.stop_beta(data.get('name'), token)
        except Exception:
            logging.exception('Failed to stop betas')

    def on_timeout(self, prev_status):
        sdk2.Task.on_timeout(self, prev_status)
        self.remove_stats(['building', 'accepting'])
        self.notify_by_telegram('timed out')

    def on_failure(self, prev_status):
        sdk2.Task.on_failure(self, prev_status)
        self.remove_stats(['building', 'accepting'])
        self.notify_by_telegram('failed')

    def on_break(self, prev_status, status):
        self.remove_stats(['building', 'accepting'])
        self.notify_by_telegram('with exception')

    def on_finish(self, prev_status, status):
        sdk2.Task.on_finish(self, prev_status, status)
        self.remove_stats(['building'])

    def on_release(self, params):
        release_anyway = 'RELEASE_ANYWAY' in params['release_comments']
        if self.Context.error_count >= CRIT_ERR_COUNT and not release_anyway:
            raise Exception('Failed to release because of {err_count} indexing errors'.format(err_count=self.Context.error_count))
        self.yt_token = sdk2.Vault.data('GEOMETA-SEARCH', 'yt-token')
        sdk2.Task.on_release(self, params)
        comment_template = (u'База сварена из {real_path}\n'
                            u'Таск приемки: https://st.yandex-team.ru/{acceptance_task}\n'
                            u'{release_comments}')
        comment = comment_template.format(real_path=self.Context.export_real_path,
                                          acceptance_task=self.Context.startrek_acceptance_task,
                                          release_comments=params['release_comments'])
        subject = '{release_subject} {acceptance_task}'.format(release_subject=params['release_subject'],
                                                               acceptance_task=self.Context.startrek_acceptance_task)
        notifications = params['email_notifications']
        notifications['cc'].extend(['abolkhovityanov@yandex-team.ru'])
        release_dict = dict(releaser=params['releaser'],
                            release_status=params['release_status'],
                            release_subject=subject,
                            email_notifications=notifications,
                            release_comments=comment)
        nanny.ReleaseToNannyTask2.on_release(self, release_dict)
        if self.Parameters.need_acceptance:
            acceptance_task = sdk2.Task[self.Context.basesearch_acceptance_task]
            if not acceptance_task.Context.finish_time_sent:
                stat.push_to_stat_table('sprav.yandex.ru/db/db_by_stage',
                                        self.Context.export_real_path,
                                        'finish_geosearch_index_acceptance')
                acceptance_task.Context.finish_time_sent = True
            with self.memoize_stage.STAT_RELEASE_START(commit_on_entrance=False):
                stat.push_to_stat_table('sprav.yandex.ru/db/db_by_stage',
                                        self.Context.export_real_path,
                                        'start_geosearch_index_release')
        if params['release_status'] == 'stable':
            environments.PipEnvironment('yandex-yt', use_wheel=True).prepare()
            client = self.get_client()
            try:
                data = {'build_task': str(self.id),
                        'yt_path': self.Context.export_real_path,
                        'st_ticket': self.Context.startrek_acceptance_task}
                stat.production(data, client)
                stat._remove(data, 'accepting', client)
            except Exception:
                logging.exception('Failed to post dashboard data')

            self.link_photos_table()
            with self.memoize_stage.TASK_MANAGER_UPLOAD(commit_on_entrance=False):
                self.taskmanager_upload()
        if 'PURE_BASE' not in params['release_comments']:
            try:
                build_additional_resources_task = sdk2.Task[self.Context.build_additional_resources_task]
                if params['release_status'] == 'stable' and build_additional_resources_task.status in ctt.Status.Group.FINISH:
                    self.server.release(task_id=self.Context.build_additional_resources_task,
                                        type=params['release_status'],
                                        subject=subject,
                                        comments=params['release_comments'])
                surl_resource = sdk2.Resource[self.Context.surl_resource]
                if surl_resource:
                    surl_build_task = surl_resource.task.id
                    self.server.release(task_id=surl_build_task,
                                        type=params['release_status'],
                                        subject=subject,
                                        comments=params['release_comments'])
                routes_resource = sdk2.Resource[self.Context.routes_resource]
                if routes_resource:
                    routes_build_task = routes_resource.task.id
                    self.server.release(task_id=routes_build_task,
                                        type=params['release_status'],
                                        subject=subject,
                                        comments=params['release_comments'])
            except Exception as err:
                self.Context.header_message = 'Releasing additional resources failed!\n %s' % err
        self.stop_yappy_betas()
        try:
            startrek_token = sdk2.Vault.data('robot-geosearch',
                                             'robot_geosearch_startrek_token')
            self.startrek = StartrekClient(startrek_token)
            tag = 'manual_release' if params['releaser'] != 'robot-thoth' else 'automatic_release'
            self.startrek.add_tag(self.Context.startrek_acceptance_task, tag)
        except Exception:
            logging.exception("Failed to set tags to acceptance ticket")
        logging.debug('Release parameters: {}'.format(params))

    def on_execute(self):
        self.proc_env = os.environ.copy()
        self.yt_token = sdk2.Vault.data('GEOMETA-SEARCH', 'yt-token')
        self.proc_env['YT_TOKEN'] = self.yt_token
        self.proc_env['YT_RETRY_COUNT'] = '50'
        if self.Parameters.yt_pool is not None:
            if self.Parameters.need_acceptance is True or self.Parameters.yt_pool != 'geosearch_high_priority':
                self.proc_env['YT_POOL'] = self.Parameters.yt_pool
        self.factor_annotations_params = self._make_factor_annotations_params()

        with self.memoize_stage.PREPARE_CONFIGS_AND_RESOURCES(commit_on_entrance=False):
            logging.info('Syncing resources')
            self.Context.export_path = self.prepare_export()
            self.Context.ferryman_tasks = {}
            self.Context.saaskv_tasks = {}
        with self.memoize_stage.CREATE_WORKING_COPY(commit_on_entrance=False):
            self.Context.working_dir = self.make_yt_tmp_dir()
            self.Context.output_dir = self.make_yt_output_dir()
            logging.info('Working directory on YT: %s' % self.Context.working_dir)
            self.Context.companies_mtime = self.get_table_mtime()
            cooking_time = self.get_table_attribute(self.Parameters.yt_table, '_cooking_start_time')
            self.Context.cooking_start_time = int(cooking_time or 0) / 1000
            self.Context.export_real_path = self.get_table_real_path()
            self.Context.base_commit_id = self.get_table_attribute(self.Parameters.export_path, 'db_commit_id')

            table_name = os.path.basename(self.Parameters.yt_table)
            result_path = os.path.join(self.Context.working_dir, 'snapshot', table_name)
            if self.cut_export(self.Parameters.yt_table, result_path):
                self.Context.companies_working_copy = result_path
            else:
                self.Context.companies_working_copy = self.copy_table(self.Parameters.yt_table,
                                                                      self.Context.working_dir,
                                                                      polite=False)
            self.Context.total_index_size = self.get_yt_table_length(self.Context.companies_working_copy)
            photo_snippets_path = self.Parameters.export_path + '/snippets'
            self.Context.photo_snippets = self.copy_table(photo_snippets_path,
                                                          self.Context.output_dir,
                                                          polite=True)
            if self.Parameters.need_acceptance:
                with self.memoize_stage.STAT_BUILD_START(commit_on_entrance=False):
                    stat.push_to_stat_table('sprav.yandex.ru/db/db_by_stage',
                                            self.Context.export_real_path,
                                            'start_geosearch_index_build')
            if self.Parameters.need_acceptance:
                try:
                    client = self.get_client()
                    data = {'build_task': str(self.id),
                            'yt_path': self.Context.export_real_path}
                    stat.building(data, client)
                except Exception:
                    logging.exception('Failed to post dashboard data')

                self.run_taskmanager()

            self.copy_tables_for_wizard_data()
            sorted_by = self.get_table_attribute(self.Context.companies_working_copy, 'sorted_by')
            if sorted_by is None or len(list(sorted_by)) == 0 or list(sorted_by)[0] != 'permalink':
                logging.info('companies table is not sorted by permalink')
                self.sort_table(self.Context.companies_working_copy)
            else:
                logging.info('companies table is sorted by permalink')
            logging.info('Companies working copy: %s' % self.Context.companies_working_copy)
        with self.memoize_stage.WEB_INDEXANN(commit_on_entrance=False):
            self.build_web_indexann()
        with self.memoize_stage.ADDITIONAL_RESOURCES(commit_on_entrance=False):
            if self.Parameters.need_acceptance:
                self.build_additional_resources()
        with self.memoize_stage.BUILD_DUPLICATES(commit_on_entrance=False):
            self.Context.duplicates_table = self.find_duplicates()
            logging.info('Duplicates table: %s' % self.Context.duplicates_table)
        with self.memoize_stage.BUILD_RAW_STATIC_FACTORS(commit_on_entrance=False):
            self.Context.raw_user_factors_table = self.build_raw_user_factors(self.Context.duplicates_table)
            logging.info('Static user factors YT table: %s' % self.Context.raw_user_factors_table)
            events_distance_factors_proc, self.Context.events_distance_factors_table = self.build_events_distance_factors(self.Context.duplicates_table)
            logging.info('Events distance factors YT table: %s' % self.Context.events_distance_factors_table)
        with self.memoize_stage.RUN_SYNCHRONOUS_PROCESSES(commit_on_entrance=False):
            synchronous_processes = {}
            annotations_index_proc, self.Context.annotations_index_table = self.build_annotation_index_chunks(self.Context.duplicates_table)
            synchronous_processes.update({'annotations_build': annotations_index_proc})
            logging.info('Annotation index table: %s' % self.Context.annotations_index_table)
            if self.factor_annotations_params is not None and self.factor_annotations_params != "":
                factor_annotations_proc, self.Context.factor_annotations_table = self.build_factor_annotations(self.Context.duplicates_table)
                synchronous_processes.update({'factor_annotations_build': factor_annotations_proc})
                logging.info('Factor annotation index table: %s' % self.Context.factor_annotations_table)
            similar_orgs_proc, self.Context.similar_orgs_table = self.build_similar_orgs(self.Context.duplicates_table)
            synchronous_processes.update({'similar_orgs_build': similar_orgs_proc})
            logging.info('Similar orgs table: %s' % self.Context.similar_orgs_table)
            static_factors_yt_proc, self.Context.static_factors_yt_table = self.build_static_factors_yt(self.Context.duplicates_table)
            synchronous_processes.update({'static_factors_build': static_factors_yt_proc})
            logging.info('Static factors YT table: %s' % self.Context.static_factors_yt_table)
            static_rubric_factors_yt_proc, self.Context.static_rubric_factors_file = self.build_static_rubric_factors_yt()
            synchronous_processes.update({'static_rubric_factors_build': static_rubric_factors_yt_proc})
            logging.info('Static rubric factors file: %s' % self.Context.static_rubric_factors_file)
            static_docurl_factors_yt_proc, self.Context.static_docurl_factors_yt_table, self.Context.static_docurl_factors_files = self.build_static_docurl_factors_yt(self.Context.duplicates_table)
            synchronous_processes.update({'static_docurl_factors_build': static_docurl_factors_yt_proc})
            logging.info('Static docurl factors YT table: %s' % self.Context.static_docurl_factors_yt_table)
            static_doc_factors_yt_proc, self.Context.static_doc_factors_table = self.build_static_doc_factors_yt(self.Context.raw_user_factors_table)
            synchronous_processes.update({'static_doc_factors_build': static_doc_factors_yt_proc})
            logging.info('Static doc factors YT table: %s' % self.Context.static_doc_factors_table)
            static_user_factors_proc, self.Context.static_user_factors_table = self.build_static_user_factors(self.Context.duplicates_table)
            synchronous_processes.update({'static_user_factors_build': static_user_factors_proc})
            logging.info('Static user factors YT table: %s' % self.Context.static_user_factors_table)
            reviews_proc, self.Context.reviews_table = self.build_reviews()
            synchronous_processes.update({'reviews_build': reviews_proc})
            logging.info('Reviews YT table: %s' % self.Context.reviews_table)
            malls_proc, malls_table = self.make_malls_catalog()
            synchronous_processes.update({'malls_build': malls_proc})
            logging.info('Malls Yt table: %s' % malls_table)
            if self.Parameters.geosearch_dssm_model is not None:
                geosearch_dssm_proc, self.Context.geosearch_dssm_table = self.build_geosearch_dssm()
                synchronous_processes.update({'geosearch_dssm_build': geosearch_dssm_proc})
                logging.info('Geosearch Dssm Yt table: %s' % self.Context.geosearch_dssm_table)
            else:
                self.Context.geosearch_dssm_table = None

            if self.Parameters.precomuted_filters:
                resource_path = str(sdk2.ResourceData(self.Parameters.precomuted_filters).path)
                self.Context.precomputed_filters_file = os.path.basename(resource_path)
                shutil.copy(resource_path, self.Context.precomputed_filters_file)
            else:
                self.Context.precomputed_filters_file = "precomputed_filters.pb"
                precomputed_filters_proc = self.build_precomputed_filters(self.Context.precomputed_filters_file)
                synchronous_processes.update({'precomputed_filters_build': precomputed_filters_proc})
            logging.info('Precomputed filters file: %s' % self.Context.precomputed_filters_file)

            if not self.Context.search_text_table:
                search_text_proc, self.Context.search_text_table = self.build_search_text()
                synchronous_processes.update({'search_text_build': search_text_proc})

            if self.Parameters.common_features_pbs:
                self.Context.features_file = 'features.pbs'
                features_proc = self.build_features(self.Context.features_file)
                synchronous_processes.update({'features_build': features_proc})

            source_proto_proc, self.Context.source_proto_table = self.build_source_proto()
            synchronous_processes.update({'source_proto_build': source_proto_proc})

            if self.Parameters.moved_collapser_dump:
                resource_path = str(sdk2.ResourceData(self.Parameters.moved_collapser_dump).path)
                self.Context.moved_collapser_dump = os.path.basename(resource_path)
                shutil.copy(resource_path, self.Context.moved_collapser_dump)
            else:
                self.Context.moved_collapser_dump = "moved.bin"
                moved_proc = self.build_moved_data(self.Context.moved_collapser_dump)
                synchronous_processes.update({'moved_build': moved_proc})
            logging.info('Moved collapser dump file: %s' % self.Context.moved_collapser_dump)

            while not _processess_finished(synchronous_processes):
                logging.info('Not all synchronous processes finished yet. Waiting')
                sleep(60)

            if self.Context.moved_collapser_dump and not os.path.exists(self.Context.moved_collapser_dump):
                self.Context.moved_collapser_dump = None

            # Creating resources with rubric static factors
            rubric_static_factors_resource = MapsRubricStaticFactors(self, "Rubric static factors file", self.Context.static_rubric_factors_file)
            rubric_static_factors_data = sdk2.ResourceData(rubric_static_factors_resource)
            rubric_static_factors_data.ready()
            self.Context.rubric_static_factors_resource = rubric_static_factors_resource.id

            # Creating resources with docurl static factors
            self.Context.docurl_static_factors_resource = []
            for shard_number in range(0, self.Parameters.shards_count):
                docurl_static_factors_resource = geotypes.GEOSEARCH_STATIC_FACTPRS_DOCURL(
                    self,
                    "Docurl static factors file",
                    self.Context.static_docurl_factors_files + "/" + str(shard_number) + "/static_factors_docurl.mms"
                )
                docurl_static_factors_data = sdk2.ResourceData(docurl_static_factors_resource)
                docurl_static_factors_data.ready()
                self.Context.docurl_static_factors_resource.append(docurl_static_factors_resource.id)

            # Creating resource with precomputed filters
            if not self.Parameters.precomuted_filters:
                precomputed_filters_resource = rtypes.BUSINESS_COMPUTED_FILTERS(self, "Business precomputed filters file", self.Context.precomputed_filters_file)
                precomputed_filters_data = sdk2.ResourceData(precomputed_filters_resource)
                precomputed_filters_data.ready()
                self.Context.precomputed_filters_resource = precomputed_filters_resource.id

            # Create resource with features
            if self.Parameters.common_features_pbs:
                features_resource = geotypes.GEOSEARCH_FEATURES(self, "Features information", self.Context.features_file)
                features_resource_data = sdk2.ResourceData(features_resource)
                features_resource_data.ready()
                self.Context.features_resource = features_resource.id

            # Creating resource with moved collapser dump
            if not self.Parameters.moved_collapser_dump:
                moved_collapser_dump_resource = geotypes.GEOSEARCH_MOVED_COLLAPSER_DUMP(
                    self, "Moved collapser dump file",
                    self.Context.moved_collapser_dump
                )
                resource_data = sdk2.ResourceData(moved_collapser_dump_resource)
                resource_data.ready()
                self.Context.moved_collapser_dump_resource = moved_collapser_dump_resource.id

        with self.memoize_stage.BUILD_INDEX(commit_on_entrance=False):
            additional_data_tables = [
                self.Context.annotations_index_table,
                self.Context.similar_orgs_table,
                self.Context.static_factors_yt_table,
                self.Context.static_docurl_factors_yt_table,
                self.Context.static_doc_factors_table,
                self.Context.static_user_factors_table,
                self.Context.events_distance_factors_table,
                self.Context.reviews_table,
                self.Context.source_proto_table,
            ]
            if self.factor_annotations_params is not None and self.factor_annotations_params != "":
                additional_data_tables.append(self.Context.factor_annotations_table)
            if self.Context.geosearch_dssm_table is not None:
                additional_data_tables.append(self.Context.geosearch_dssm_table)
            if self.Parameters.org_reviews_for_inv is not None and self.Parameters.org_reviews_for_inv != "":
                additional_data_tables.append(self.Parameters.org_reviews_for_inv)
            if self.Context.search_text_table and self._yt_exists(self.Context.search_text_table):
                additional_data_tables.append(self.Context.search_text_table)
            else:
                logging.info('search_text_table {} missing or not exists')

            if self.Parameters.top_lists_tbl is not None and self._yt_exists(self.Parameters.top_lists_tbl):
                additional_data_tables.append(self.Parameters.top_lists_tbl)

            for table in self.Parameters.additional_company_attrs:
                if self._yt_exists(table):
                    additional_data_tables.append(table)

            self.Context.index_table = self.build_index(additional_data_tables,
                                                        self.Context.indexer_config,
                                                        self.Context.geortyserver_config)
            logging.info('Index table: %s' % self.Context.index_table)
        with self.memoize_stage.WAIT_FOR_WEB_INDEXANN(commit_on_entrance=False):
            if self.Context.build_web_indexann_task:
                raise sdk2.WaitTask([self.Context.build_web_indexann_task],
                                    ctt.Status.Group.FINISH | ctt.Status.Group.BREAK,
                                    wait_all=True)
        with self.memoize_stage.REGISTER_SHARDS(commit_on_entrance=False):
            self.register_shards(self.Context.index_table)
        rbtorrent_shardmap_path = self.make_rbtorrent_shardmap()
        rbtorrent_shardmap_resource = sdk2.Resource[rtypes.ADDRS_BUSINESS_SHARDMAP]
        current_rbtorrent_shardmap_resource = rbtorrent_shardmap_resource(self,
                                                                          'Addrs basesearch shardmap',
                                                                          rbtorrent_shardmap_path)
        rbtorrent_shardmap_data = sdk2.ResourceData(current_rbtorrent_shardmap_resource)
        rbtorrent_shardmap_data.ready()
        if self.Parameters.shards_count > 1:
            yp_shardmap_path = self.make_yp_shardmap()
            yp_shardmap_resource = sdk2.Resource[geotypes.ADDRS_BASE_YP_SHARDMAP]
            current_yp_shardmap_resource = yp_shardmap_resource(self,
                                                                'Addrs base shardmap (YP)',
                                                                yp_shardmap_path)
            current_yp_shardmap_resource.backup_task = True
        if self.Parameters.need_acceptance and self.author in USERS_WHITE_LIST:
            current_rbtorrent_shardmap_resource.backup_task = True
            current_rbtorrent_shardmap_resource.addrs_base_shardmap_for_testenv = True
        if self.Parameters.export_svd is not False:
            svd_export_path = self.export_svd()
            svd_resource = sdk2.Resource[rtypes.MAPS_SVD_EXPORT]
            current_svd_resource = svd_resource(self,
                                                'svd.txt.gz',
                                                svd_export_path)
            svd_data = sdk2.ResourceData(current_svd_resource)
            svd_data.ready()
        if self.Parameters.export_canonizer is not False:
            canonizer_export_path = self.export_canonizer()
            canonizer_resource = sdk2.Resource[geotypes.GEOSEARCH_CANONIZER_DUMP]
            current_canonizer_resource = canonizer_resource(self,
                                                            'canonizer.zz',
                                                            canonizer_export_path)
            canonizer_data = sdk2.ResourceData(current_canonizer_resource)
            canonizer_data.ready()
        with self.memoize_stage.COLLECT_STAT(commit_on_entrance=False):
            self.collect_stat_info()
        if self.Parameters.need_acceptance:
            with self.memoize_stage.STAT_BUILD_FINISH(commit_on_entrance=False):
                stat.push_to_stat_table('sprav.yandex.ru/db/db_by_stage',
                                        self.Context.export_real_path,
                                        'finish_geosearch_index_build')
        with self.memoize_stage.ACCEPTANCE(commit_on_entrance=False):
            if self.Parameters.need_acceptance and self.author in USERS_WHITE_LIST:
                additional_resources = self.get_additional_resources()
                self.launch_acceptance(current_yp_shardmap_resource,
                                       additional_resources)

    def on_success(self, prev_status):
        if not self.Parameters.keep_temps:
            logging.info('Cleanup: removing %s from YT', self.Context.working_dir)
            yt = self._import_yt()
            yt.remove(self.Context.working_dir, recursive=True)
