# -*-  coding: utf-8 -*-

import sys
import time
import json
import logging
import datetime
import traceback
from random import randint
import multiprocessing.dummy as multiprocessing

import yt.wrapper as yt_wrapper
from yt.wrapper import TablePath, YtClient
from yt.common import YtError
from yt.transfer_manager.client import TransferManager
from yt.tools.dump_restore_client import DumpRestoreClient  # , check_table_schema


class DumpRestoreError(Exception):
    pass


class NotImplementedYet(DumpRestoreError):
    pass


SKY_MODULES_PATH = "/usr/lib/pyshared-skynet"
SKY_PYTHON_BINARY = "/skynet/python/bin/python"

DR_CLIENT_OPTIONS = {
    "input_row_limit": 100000000,
    "output_row_limit": 100000000,
    "job_count": 20000,
    "job_memory_limit": 6 * 2**30,  # 6GB
    "user_slots": 75,
    "workload_descriptor": {"category": "batch"},
    "default_client_config": {
        "api_version": "v3",
        "driver_config_path": "/etc/ytdriver.conf",
        # ВАЖНО
        # Настройки ниже нужны только в случае запуска map-reduce из sandbox
        # для успешной работы с запикленными mapper'ами и reducer'ами передаем
        # в yt skynet'овский бинарник
        "pickling": {
            "python_binary": SKY_PYTHON_BINARY
        },
        # а также передаем PYTHONPATH в mapper'ы и reducer'ы для того, чтобы
        # skynet'овский питон нашел для себя binding'и
        # это нужно только в случае работы с динтаблицами внутри операций
        "spec_defaults": {
            "max_failed_job_count": 100,
            "mapper": {
                "environment": {"PYTHONPATH": SKY_MODULES_PATH},
            },
            "reducer": {
                "environment": {"PYTHONPATH": SKY_MODULES_PATH},
            },
        }
    },
}

BASE_PATH = '//home/yabs/backup'
DEFAULT_DAY_DIR_FORMAT = '{base_path}/{origin_cluster}/{backup_date}'
DEFAULT_STATIC_FORMAT = DEFAULT_DAY_DIR_FORMAT+'/{task_id}'
DUMP_QUERY_TEMPLATE = '* WHERE {}'
ERASE_QUERY_TEMPLATE = '{} WHERE {}'
MAX_BACKUP_AGE = 3000  # TTL for {backup_date} directory, days
MAX_TMP_STATIC_AGE = 1  # TTL for {backup_date}/{task_id} directory, days

RETRIES_COUNT = 3
DUMP_PROCESSES_COUNT = 10
RESTORE_PROCESSES_COUNT = 10

ATTRIBUTE_PREFIX = "_yt_dump_restore_"


class Retry(object):
    def __init__(self, exception, attempts=3, max_sleep=10, verbose=True):
        self.exception = exception
        self.attempts = attempts
        self.max_sleep = max_sleep
        self.verbose = verbose
        self.func = None

    def __call__(self, func):
        self.func = func
        return self._retrier

    def _retrier(self, *args, **kwargs):
        cur_attempt = 0
        while True:
            cur_attempt += 1
            try:
                return self.func(*args, **kwargs)
            except self.exception as e:
                if cur_attempt < self.attempts:
                    if self.verbose:
                        logging.warning(
                            "Attempt %d of %s failed with error: %s \n %s",
                            cur_attempt,
                            self.func.__name__,
                            e.message,
                            traceback.format_exc(),
                        )
                    if self.max_sleep > 0:
                        logging.info("Some sleep")
                        time.sleep(randint(1, self.max_sleep))
                else:
                    if self.verbose:
                        logging.error(
                            "Execution of %s failed %d times with error: %s",
                            self.func.__name__,
                            self.attempts,
                            e.message,
                        )
                    raise


class DumpRestoreTask(object):
    def __init__(self, params):
        self.task_id = params["task_id"]
        self.yt_token = params["yt_token"]
        self.mode = params["mode"]
        self.src_cluster = params["src_cluster"]
        self.dst_cluster = params["dst_cluster"]
        self.tables_list = params.get("tables_list", None)
        self.tables_dict = params.get("tables_dict", None)
        if params.get("use_tables_list_input", False) and params.get("tables_list_input", ""):
            self.tables_list = params.get("tables_list_input", "").split('\n')
            self.tables_dict = dict(zip(self.tables_list, self.tables_list))

        self.delete_tmp = params["delete_tmp"]
        self.overwrite_dst_table = params.get("overwrite_dst_table", False)
        self.overwrite_backup = params.get("overwrite_backup", False)
        self.force_compaction = params.get("force_compaction", False)
        self.use_ssd_blobs = params.get("use_ssd_blobs", False)
        self.copy_attributes = params.get("copy_attributes", [])
        self.dump_query = params.get("dump_query", "")
        self.erase_query = params.get("erase_query", "")
        self.ttl = params.get("ttl", 0)
        self.erasure = params.get("erasure", False)
        self.queue_name = ''
        if params.get("use_transfer_queue", False):
            self.queue_name = params.get("queue_name", "")
        self.custom_tm_pool = ''
        if params.get("use_custom_pool", False):
            self.custom_tm_pool = params.get("custom_tm_pool", "")
        self.use_backbone = params.get("use_backbone", False)

        if self.mode == 'restore':
            self.origin_cluster = params["origin_cluster"]
            self.backup_date = params["backup_date"]
        else:
            self.origin_cluster = self.src_cluster
            self.backup_date = datetime.datetime.now().strftime('%Y-%m-%d')

        self.need_transfer = (self.src_cluster != self.dst_cluster)
        self.force_operations = True

        self.dr_client_options = DR_CLIENT_OPTIONS
        if params["job_count"]:
            self.dr_client_options["job_count"] = params["job_count"]
        if params["user_slots"]:
            self.dr_client_options["user_slots"] = params["user_slots"]
        if params["max_failed_job_count"]:
            self.dr_client_options["default_client_config"]["spec_defaults"]["max_failed_job_count"] = params["max_failed_job_count"]
        if params["job_time_limit"]:
            self.dr_client_options["default_client_config"]["spec_defaults"]["mapper"]["job_time_limit"] = params["job_time_limit"]

        if self.mode == 'advanced_backup':
            self.fill_patterns(self.tables_dict)

    def fill_patterns(self, tables_dict):
        date_today = datetime.datetime.now()
        date_yesterday = date_today - datetime.timedelta(days=1)
        today_start = date_today.replace(hour=0, minute=0, second=0, microsecond=0)
        today_end = date_today.replace(hour=23, minute=59, second=59, microsecond=999)
        yesterday_start = date_yesterday.replace(hour=0, minute=0, second=0, microsecond=0)
        yesterday_end = date_yesterday.replace(hour=23, minute=59, second=59, microsecond=999)
        yesterday_backup_date = date_yesterday.strftime('%Y-%m-%d')
        for src in tables_dict:
            tables_dict[src] = tables_dict[src].format(
                date=self.backup_date,
                task=self.task_id,
                src=self.src_cluster,
                yesterday_date=yesterday_backup_date
            )
        self.dump_query = self.dump_query.format(
            unixtime_today_start=int(time.mktime(today_start.timetuple())),
            unixtime_today_end=int(time.mktime(today_end.timetuple())),
            unixtime_yesterday_start=int(time.mktime(yesterday_start.timetuple())),
            unixtime_yesterday_end=int(time.mktime(yesterday_end.timetuple()))
        )
        self.erase_query = self.erase_query.format(
            unixtime_today_start=int(time.mktime(today_start.timetuple())),
            unixtime_today_end=int(time.mktime(today_end.timetuple())),
            unixtime_yesterday_start=int(time.mktime(yesterday_start.timetuple())),
            unixtime_yesterday_end=int(time.mktime(yesterday_end.timetuple()))
        )

    def get_path_for_static(self, sb_task_id=None):
        if not sb_task_id:
            sb_task_id = self.task_id

        return DEFAULT_STATIC_FORMAT.format(
            base_path=BASE_PATH,
            origin_cluster=self.origin_cluster,
            backup_date=self.backup_date,
            task_id=sb_task_id
        )

    def get_path_day_dir(self):
        return DEFAULT_DAY_DIR_FORMAT.format(
            base_path=BASE_PATH,
            origin_cluster=self.origin_cluster,
            backup_date=self.backup_date
        )

    def get_static_pattern(self, din_table_path, sb_task_id=None):
        return self.get_path_for_static(sb_task_id) + '/' + din_table_path.replace('/', '-')

    def get_backup_pattern(self, din_table_path):
        return self.get_path_day_dir() + '/' + din_table_path.replace('/', '-')

    @staticmethod
    def get_expiration_time(ttl):
        expiration_time = datetime.datetime.fromtimestamp(time.time() + ttl * 24 * 60 * 60).isoformat()
        return expiration_time

    def set_ttl_to_node(self, yt_client, node_path, ttl):
        if ttl:
            dir_expiration_time = self.get_expiration_time(ttl)
            logging.info('Set expiration_time %s to %s', dir_expiration_time, node_path)
            yt_client.set_attribute(node_path, "expiration_time", dir_expiration_time)

    def _dump_one_table(self, tables):
        din_table = tables[0]
        stat_table = tables[1]

        if self.mode == 'backup' and not self.overwrite_backup:
            dump = self.get_existed_dump(self.src_cluster, din_table)
            if dump:
                return dump

        yt_client = YtClient(
            token=self.yt_token,
            proxy=self.src_cluster,
        )

        def _copy_attributes(attributes):
            for attr in attributes:
                attr_value = yt_client.get(din_table + "/@" + attr)
                logging.info('Get attribute %s from %s - %s', attr, din_table, attr_value)
                yt_client.set_attribute(stat_table, ATTRIBUTE_PREFIX + attr, attr_value)

        def _copy_user_attributes():
            user_attributes = yt_client.get_attribute(din_table, "user_attribute_keys")
            user_attributes.append("user_attribute_keys")
            if "forced_compaction_revision" in user_attributes:
                user_attributes.remove("forced_compaction_revision")
            for attr in user_attributes:
                yt_client.set_attribute(stat_table, ATTRIBUTE_PREFIX + attr, yt_client.get_attribute(din_table, attr))

        def _set_attributes(attributes_dict):
            for attr in attributes_dict:
                yt_client.set_attribute(stat_table, ATTRIBUTE_PREFIX + attr, attributes_dict[attr])

        @Retry(YtError, attempts=RETRIES_COUNT, max_sleep=10)
        def _dump():
            logging.info('Dump %s to %s using predicate "%s"', din_table, stat_table, self.dump_query)
            if yt_client.get(din_table + "/@dynamic"):
                with yt_client.Transaction():
                    yt_client.create_table(
                        stat_table,
                        recursive=True,
                        attributes={
                            "optimize_for": yt_client.get_attribute(din_table, "optimize_for", default="lookup"),
                            "schema": yt_client.get_attribute(din_table, "schema"),
                        }
                    )
                    _copy_attributes(["pivot_keys", "key_columns", "schema", "compression_codec"])  # save key_columns and schema for backward compatibility to DumpRestoreClient
                    _copy_user_attributes()
                    if self.copy_attributes:
                        logging.info('Copy attributes %s', self.copy_attributes)
                        _copy_attributes(self.copy_attributes)

                    yt_client.lock(din_table, mode="snapshot")
                    ts = yt_client.get_attribute(din_table, "unflushed_timestamp") - 1
                    _set_attributes({
                        "backup_timestamp": ts,
                        "backup_date": self.backup_date,
                        "origin_cluster": self.src_cluster,
                        "origin_table": din_table,
                        "sb_task_id": self.task_id,
                    })
                    if self.erasure:
                        yt_client.set_attribute(stat_table, "compression_codec", "brotli_6")
                        yt_client.set_attribute(stat_table, "erasure_codec", "lrc_12_2_2")

                    table_path = TablePath(din_table, attributes={"timestamp": ts}, client=yt_client)
                    if yt_client.get("{0}/@tablets/0/state".format(din_table)) == 'frozen':
                        table_path = TablePath(din_table, client=yt_client)
                    merge_spec = {
                        "data_size_per_job": 2 * 1024 * 1024 * 1024,
                        "force_transform": True,
                        "combine_chunks": True,
                        "pool": "yabs-backup",
                    }
                    if self.dump_query:
                        merge_spec["input_query"] = DUMP_QUERY_TEMPLATE.format(self.dump_query)
                        _set_attributes({"dump_query": self.dump_query})
                    yt_client.run_merge(
                        table_path,
                        stat_table,
                        mode="ordered",
                        spec=merge_spec
                    )
                self.set_ttl_to_node(yt_client, stat_table, self.ttl)
            else:
                if not self.dump_query:
                    logging.warn('Table %s is static. Just copy it to %s', din_table, stat_table)
                    yt_client.copy(din_table, stat_table, recursive=True, force=self.force_operations)
                else:
                    logging.error("Can't dump static table %s with predicate", din_table)
                    raise DumpRestoreError("Can't dump static table with predicate")

        try:
            _dump()
        except (YtError, DumpRestoreError) as err:
            logging.error(
                "Dump of %s failed with error: %s \n %s",
                din_table,
                err.message,
                traceback.format_exc()
            )
            return None

        if yt_client.exists(stat_table):
            return stat_table
        else:
            return None

    def dump_tables(self, match_list):
        yt_client = YtClient(
            token=self.yt_token,
            proxy=self.src_cluster
        )
        day_dir = self.get_path_day_dir()
        static_path = self.get_path_for_static()
        yt_client.mkdir(static_path, recursive=True)
        self.set_ttl_to_node(yt_client, static_path, MAX_TMP_STATIC_AGE)
        self.set_ttl_to_node(yt_client, day_dir, MAX_BACKUP_AGE)

        pool = multiprocessing.Pool(processes=DUMP_PROCESSES_COUNT)
        dumps = pool.map(self._dump_one_table, match_list)
        pool.close()
        pool.join()
        return dumps

    def get_match_list_for_dump(self):
        tables_list = []
        if self.tables_dict:
            tables_list = self.tables_dict.keys()
        elif self.tables_list:
            tables_list = self.tables_list

        match_list = []
        skip_list = []
        for table_path in tables_list:
            if not self.overwrite_backup and self.get_existed_backup(self.dst_cluster, table_path):
                logging.info("Backup for table %s already exist on %s, skip it", table_path, self.dst_cluster)
                skip_list.append(table_path)
                continue
            match_list.append([table_path, self.get_static_pattern(table_path)])
        return match_list, skip_list

    def get_match_list_for_clone(self):
        match_list = []
        if self.tables_dict:
            for src_table, dst_table in self.tables_dict.iteritems():
                match_list.append([self.get_static_pattern(src_table), dst_table])
        return match_list

    def get_match_list_for_backup_move(self, dumps):
        match_list = []
        if dumps:
            for dump in dumps:
                dump_name = dump[dump.rindex('/')+1:]
                dump_path = dump[:dump.rindex('/')]
                backup_path = dump_path[:dump_path.rindex('/')]
                match_list.append([dump, backup_path+'/'+dump_name])
        return match_list

    def get_dumps_list_to_restore(self):
        if not self.tables_list:
            logging.warning("No tables to restore")
        else:
            logging.info("Tables on %s to restore in it: %s", self.dst_cluster, self.tables_list)

        dumps_list = []
        for din_table in self.tables_list:
            stat_table = self.get_existed_backup(self.src_cluster, din_table)
            if stat_table:
                dumps_list.append(stat_table)
            else:
                logging.error("Not found dump %s for %s on %s", din_table, self.backup_date, self.src_cluster)
                sys.exit(1)

        return dumps_list

    def get_existed_dump(self, cluster, din_table):
        yt_client = YtClient(
            token=self.yt_token,
            proxy=cluster
        )
        backup_path = self.get_path_day_dir()
        dir_list = []
        for node in yt_client.list(backup_path, attributes=['type']):
            if node.attributes.get('type') == 'map_node':
                try:
                    sb_id = int(node)
                except ValueError:
                    continue
                dir_list.append(sb_id)

        sb_tasks_list = sorted(dir_list, reverse=True)
        for sb_task in sb_tasks_list:
            stat_table = self.get_static_pattern(din_table, sb_task)
            if yt_client.exists(stat_table):
                return stat_table
        return None

    def get_existed_backup(self, cluster, din_table):
        yt_client = YtClient(
            token=self.yt_token,
            proxy=cluster
        )
        stat_table = self.get_backup_pattern(din_table)
        if yt_client.exists(stat_table):
            return stat_table
        return None

    def _restore_one_table(self, tables):
        stat_table = tables[0]
        din_table = tables[1]
        yt_client = YtClient(
            token=self.yt_token,
            proxy=self.dst_cluster,
            config=self.dr_client_options["default_client_config"]
        )

        if not din_table:
            if yt_client.exists(stat_table + "/@" + ATTRIBUTE_PREFIX + "origin_table"):
                din_table = yt_client.get_attribute(stat_table, ATTRIBUTE_PREFIX + "origin_table")
            else:
                raise DumpRestoreError("No dst table to restore")

        def _restore_user_attributes():
            if yt_client.exists("{table}/@{prefix}user_attribute_keys".format(table=stat_table, prefix=ATTRIBUTE_PREFIX)):
                user_attributes = yt_client.get_attribute(stat_table, ATTRIBUTE_PREFIX + "user_attribute_keys")
                if "forced_compaction_revision" in user_attributes:
                    user_attributes.remove("forced_compaction_revision")
                logging.info("Restore user attributes %s", user_attributes)
                for attr in user_attributes:
                    yt_client.set_attribute(din_table, attr, yt_client.get_attribute(stat_table,  ATTRIBUTE_PREFIX + attr))

        def _restore_attributes(user_attributes):
            for attr in user_attributes:
                if yt_client.exists("{table}/@{prefix}{attr}".format(table=stat_table, prefix=ATTRIBUTE_PREFIX, attr=attr)):
                    logging.info("Restore user attribute %s", attr)
                    yt_client.set_attribute(din_table, attr, yt_client.get_attribute(stat_table,  ATTRIBUTE_PREFIX + attr))

        def make_restore_mapper(schema):
            calculated_columns = []
            for column in schema:
                if "expression" in column:
                    calculated_columns.append(column['name'])
            if not calculated_columns:
                return None

            def mapper(row):
                for calculated_column in calculated_columns:
                    if calculated_column in row:
                        row.pop(calculated_column)
                yield row
            return mapper

        @Retry(YtError, attempts=RETRIES_COUNT, max_sleep=10)
        def _restore():
            if yt_client.exists(din_table):
                if not self.overwrite_dst_table:
                    logging.info('Table %s exists, restore to it', din_table)

                    if not yt_client.get(din_table + "/@dynamic"):
                        logging.error("Can't restore to static table %s", din_table)
                        raise DumpRestoreError("Can't restore to existing static table")

                    dr_client = DumpRestoreClient(
                        yt_client,
                        **self.dr_client_options
                    )

                    # if dr_client.get_schema_and_key_columns(din_table) != dr_client.get_schema_and_key_columns(stat_table):
                    #    logging.error("Schema of %s does not match schema of %s", din_table, stat_table)
                    #    raise DumpRestoreError("Schemas not match")

                    # key_columns = yt_client.get_attribute(stat_table, ATTRIBUTE_PREFIX + "key_columns")
                    # schema = yt_client.get_attribute(stat_table, ATTRIBUTE_PREFIX + "schema")
                    schema = yt_client.get(stat_table + "/@schema")
                    key_columns = [column["name"] for column in schema if "sort_order" in column]
                    # optimize_for = yt_client.get_attribute(stat_table, "optimize_for", default="lookup")
                    # check_table_schema(din_table, schema, key_columns, optimize_for, yt=yt_client)

                    if self.erase_query:
                        logging.info('Erase from %s by %s', din_table, self.erase_query)
                        # dr_client.erase_table(
                        #    din_table,
                        #    self.erase_query
                        # )
                        keys_list = []
                        for column in schema:
                            if column['name'] in key_columns and "expression" not in column:
                                keys_list.append(column['name'])
                        key_columns_str = ','.join(keys_list)
                        merge_spec = {
                            "data_size_per_job": 2 * 1024 * 1024 * 1024,
                            "force_transform": True,
                            "pool": "yabs-backup",
                            "input_query": ERASE_QUERY_TEMPLATE.format(key_columns_str, self.erase_query),
                        }
                        with yt_client.TempTable() as keys_table:
                            logging.info('Prepare keys to erase from %s by %s', din_table, self.erase_query)
                            yt_client.run_merge(
                                din_table,
                                keys_table,
                                mode="ordered",
                                spec=merge_spec
                            )
                            logging.info('Delete rows from %s by %s', din_table, self.erase_query)
                            dr_client.run_map_dynamic(
                                None,
                                keys_table,
                                din_table,
                                insert=False  # to delete
                            )

                    logging.info('Restore from %s to %s', stat_table, din_table)

                    dr_client.run_map_dynamic(
                        make_restore_mapper(schema),
                        stat_table,
                        din_table)
                    return
                else:
                    yt_client.remove(din_table)
                    logging.info("Delete old %s table", din_table)

            logging.info("Start restore %s", din_table)
            if yt_client.exists(stat_table + "/@schema"):
                schema = yt_client.get_attribute(stat_table, "schema")
            else:
                raise DumpRestoreError("Table {} don't have schema".format(stat_table))
            logging.info('Create %s and restore from %s to it', din_table, stat_table)
            yt_client.create_table(din_table, attributes={
                "optimize_for": yt_client.get_attribute(stat_table, "optimize_for", default="lookup"),
                "schema": schema,
                "external": False
            })
            if self.use_ssd_blobs:
                yt_client.set_attribute(din_table, 'primary_medium', 'ssd_blobs')
                logging.info('Add attr "primary_medium": "ssd_blobs" to %s', din_table)
            if yt_client.exists(stat_table+'/@'+ATTRIBUTE_PREFIX + "compression_codec"):
                yt_client.set_attribute(din_table, "compression_codec", yt_client.get_attribute(stat_table, ATTRIBUTE_PREFIX + "compression_codec"))
            yt_client.run_merge(
                stat_table,
                din_table,
                mode="ordered",
                spec={
                    "job_io": {"table_writer": {"block_size": 256 * 2**10, "desired_chunk_size": 100 * 2**20}},
                    "force_transform": True,
                    "pool": "yabs-restore",
                }
            )
            logging.info("Make %s dynamic", din_table)
            yt_client.alter_table(din_table, dynamic=True)
            logging.info("Reshard %s", din_table)
            yt_client.reshard_table(din_table, yt_client.get_attribute(stat_table, ATTRIBUTE_PREFIX + "pivot_keys"))
            _restore_user_attributes()
            if self.copy_attributes:
                _restore_attributes(self.copy_attributes)
            logging.info("Mount table %s", din_table)
            yt_client.mount_table(din_table)
            while not all(x["state"] == "mounted" for x in yt_client.get_attribute(din_table, "tablets")):
                time.sleep(1)

            if self.force_compaction:
                logging.info("Force compaction table %s", din_table)
                yt_client.set(din_table + "/@forced_compaction_revision", yt_client.get(din_table + "/@revision"))
                yt_client.set(din_table + "/@forced_compaction_revision", yt_client.get(din_table + "/@revision"))
                yt_client.remount_table(din_table)

        try:
            _restore()
        except (YtError, DumpRestoreError) as err:
            logging.error(
                "Restore from %s to %s failed with error: %s \n %s",
                stat_table,
                din_table,
                err.message,
                traceback.format_exc()
            )
            return None
        if yt_client.exists(din_table):
            return din_table
        else:
            return None

    def restore_tables(self, match_list):
        pool = multiprocessing.Pool(processes=RESTORE_PROCESSES_COUNT)
        restored_tables = pool.map(self._restore_one_table, match_list)
        pool.close()
        pool.join()
        return restored_tables

    def copy_tables(self, match_list):
        copied_tables = []
        yt_client = YtClient(
            token=self.yt_token,
            proxy=self.dst_cluster,
        )
        for src, dst in match_list:
            if self.overwrite_backup or not yt_client.exists(dst):
                if self.delete_tmp:
                    yt_client.move(src, dst, recursive=True, force=True)
                    logging.info('Move %s to %s', src, dst)
                else:
                    yt_client.copy(src, dst, recursive=True, force=True)
                    logging.info('Copy %s to %s', src, dst)
                self.set_ttl_to_node(yt_client, dst, self.ttl)
                copied_tables.append(dst)
            else:
                logging.warning('Don\'t move %s to %s: dst table already exist', src, dst)
        return copied_tables

    def _filter_one_table(self, tables):
        src_table = tables[0]
        dst_table = tables[1]

        yt_client = YtClient(
            token=self.yt_token,
            proxy=self.src_cluster,
        )

        def _copy_backup_attributes(attributes):
            for attr in attributes:
                yt_client.set_attribute(dst_table, ATTRIBUTE_PREFIX + attr, yt_client.get_attribute(src_table, ATTRIBUTE_PREFIX + attr))

        @Retry(YtError, attempts=RETRIES_COUNT, max_sleep=10)
        def _filter():
            logging.info('Filter %s to %s using predicate "%s"', src_table, dst_table, self.dump_query)
            with yt_client.Transaction():
                yt_client.create_table(
                    dst_table,
                    recursive=True,
                    attributes={
                        "optimize_for": yt_client.get_attribute(src_table, "optimize_for", default="lookup"),
                        "schema": yt_client.get_attribute(src_table, "schema"),
                    }
                )
                _copy_backup_attributes(["pivot_keys", "key_columns", "schema"])  # save key_columns and schema for backward compatibility to DumpRestoreClient
                if self.copy_attributes:
                    logging.info('Copy attributes %s', self.copy_attributes)
                    _copy_backup_attributes(self.copy_attributes)

                table_path = TablePath(src_table, client=yt_client)
                merge_spec = {
                    "data_size_per_job": 2 * 1024 * 1024 * 1024,
                    "force_transform": True,
                    "pool": "yabs-backup",
                    "input_query": DUMP_QUERY_TEMPLATE.format(self.dump_query)
                }
                yt_client.run_merge(
                    table_path,
                    dst_table,
                    mode="ordered",
                    spec=merge_spec
                )
            self.set_ttl_to_node(yt_client, dst_table, MAX_TMP_STATIC_AGE)

        try:
            _filter()
        except (YtError, DumpRestoreError) as err:
            logging.error(
                "Filter %s failed with error: %s \n %s",
                src_table,
                err.message,
                traceback.format_exc()
            )
            return None

        if yt_client.exists(dst_table):
            return dst_table
        else:
            return None

    def filter_tables(self, tables_list):
        match_list = []
        for table_path in tables_list:
            match_list.append([table_path, self.get_static_pattern(table_path)])
        yt_client = YtClient(
            token=self.yt_token,
            proxy=self.src_cluster
        )
        static_path = self.get_path_for_static()
        yt_client.mkdir(static_path, recursive=True)
        self.set_ttl_to_node(yt_client, static_path, MAX_TMP_STATIC_AGE)

        pool = multiprocessing.Pool(processes=DUMP_PROCESSES_COUNT)
        dumps = pool.map(self._filter_one_table, match_list)
        pool.close()
        pool.join()
        return dumps

    def delete_tmp_dumps(self, cluster, dumps):
        yt_client = YtClient(
            token=self.yt_token,
            proxy=cluster
        )
        for dump in dumps:
            yt_client.remove(dump, force=True)
        logging.info('Removed tmp dumps: %s', dumps)

    def do_task(self):
        logging.info("Start DumpRestore")
        dumps = []
        if self.mode == 'backup' or self.mode == 'advanced_backup' or self.mode == 'clone':
            match_list_for_dump, existed_backups = self.get_match_list_for_dump()
            if not match_list_for_dump:
                logging.info('All tables (%s) already in backup', existed_backups)
                return True

            logging.info('Tables to dump: %s', match_list_for_dump)
            dumps = self.dump_tables(match_list_for_dump)
            logging.info('Dumped tables: %s', dumps)
            dumps = set(dumps)
            if None in dumps:
                dumps.remove(None)
                not_dumped_tables = set(dict(match_list_for_dump).values()).difference(dumps)
                logging.error("Some tables not dumped: %s", not_dumped_tables)
                return False
        elif self.mode == 'restore':
            dumps = self.get_dumps_list_to_restore()
        elif self.mode == 'advanced_restore':
            src_tables = self.tables_dict.keys()
            if self.dump_query:
                logging.info('Tables to filter by "%s": %s', self.dump_query, src_tables)
                dumps = self.filter_tables(src_tables)
                logging.info('Filtered tables: %s', dumps)
                dumps = set(dumps)
                if None in dumps:
                    dumps.remove(None)
                    not_filtered_tables = set(src_tables).difference(dumps)
                    logging.error("Some tables not filtered: %s", not_filtered_tables)
                    return False
            else:
                dumps = src_tables
        logging.info("Dumps: %s", dumps)

        if self.need_transfer and dumps:
            transfer_list = [(dump, dump) for dump in dumps]
            logging.info(
                'Start transfer dumps from %s to %s',
                self.src_cluster,
                self.dst_cluster
            )
            transfer_args = dict(
                src_dst_pairs=transfer_list,
                source_cluster=self.src_cluster,
                destination_cluster=self.dst_cluster,
                sync=True,
            )
            transfer_args['params'] = {}
            if self.queue_name:
                transfer_args['params']['queue_name'] = self.queue_name
            if self.custom_tm_pool:
                transfer_args['params']['pool'] = self.custom_tm_pool
            if self.use_backbone:
                transfer_args['params']['copy_spec'] = {'network_name': 'default'}
            transfer_client = TransferManager(token=self.yt_token)
            transfer_client.add_tasks_from_src_dst_pairs(**transfer_args)

        if dumps:
            match_list_for_restore = []
            restored_tables = []
            if self.mode == 'restore':
                match_list_for_restore = dict.fromkeys(dumps, 0).items()
            elif self.mode == 'clone':
                match_list_for_restore = self.get_match_list_for_clone()
            elif self.mode == 'advanced_restore':
                if self.dump_query:
                    match_list_for_restore = self.get_match_list_for_clone()
                else:
                    match_list_for_restore = self.tables_dict.items()

            if match_list_for_restore:
                logging.info('Tables to restore %s', match_list_for_restore)
                restored_tables = self.restore_tables(match_list_for_restore)
                logging.info('Restored tables: %s', restored_tables)

            match_list_for_copy = []
            if self.mode == 'backup':
                match_list_for_copy = self.get_match_list_for_backup_move(dumps)
            elif self.mode == 'advanced_backup':
                match_list_for_copy = self.get_match_list_for_clone()

            if match_list_for_copy:
                copied_tables = self.copy_tables(match_list_for_copy)
                logging.info('Copied tables: %s', copied_tables)

            if self.delete_tmp:
                logging.info('Start remove tmp dumps')
                if self.mode == 'backup' or self.mode == 'advanced_backup' or self.mode == 'clone':
                    self.delete_tmp_dumps(self.src_cluster, dumps)
                if self.need_transfer:
                    self.delete_tmp_dumps(self.dst_cluster, dumps)

            if None in restored_tables:
                logging.error("Some tables not restored")
                return False
        logging.info("Finish DumpRestore")
        return True


if __name__ == "__main__":
    input_params = json.loads(sys.argv[1])
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s %(levelname)s (%(module)s) %(message)s"
    )
    logging.info("YT Wrapper version %s", yt_wrapper.__version__)
    dr_task = DumpRestoreTask(input_params)
    start = datetime.datetime.now()
    success = dr_task.do_task()
    end = datetime.datetime.now()
    logging.info("Time: %s", (end - start))
    if not success:
        sys.exit(1)
