import re
import os
import time
import json
import uuid
import logging
import datetime
import subprocess
import errno
import signal
import itertools
from collections import defaultdict

from concurrent import futures

from sandbox import sdk2
from sandbox import common
from sandbox.common import errors as common_errors
from sandbox.common.utils import chain
import sandbox.common.types.task as ctt

from sandbox.projects.common.network import get_my_ipv6
from sandbox.projects.common.yabs.server.db.task.mysql import get_mysql_port
from sandbox.projects.yabs.qa.utils import yt_utils
from sandbox.projects.yabs.qa.utils.general import issubset_dict
import sandbox.sandboxsdk.paths as sdk_paths
import sandbox.sandboxsdk.process as sdk_process

from sandbox.projects.yabs.sandbox_task_tracing import trace_calls, trace_subprocess

from .utils import get_mysql_tag, _extract_tables, _get_tag_tables


# Auxiliary data in spec
AUXILLARY_DATA_IN_SPEC_KEY = '__SANDBOX_AUXILLARY_DATA__'
ARCHIVE_ROOT_KEY = 'archive_root'
ARCHIVE_ROOT_TTL_KEY = 'archive_root_ttl'
DEFAULT_CS_INPUT_ARCHIVE_TTL = 24 * 60 * 60

DIGEST_ATTR = '__cs_import_out_digest'
CS_IMPORT_VER_ATTR = '__cs_import_ver'
CS_EXPORT_VER_ATTR = '__cs_export_ver'
CS_FETCH_VER_ATTR = '__cs_fetch_ver'
BIN_DBS_NAMES_ATTR = '__bin_dbs_names'
MYSQL_ARCHIVE_ATTR = '__mysql_archive_resource_id'
INPUT_ARCHIVE_ATTR = '__input_archive_path'
IS_REUSABLE_ATTR = '__is_reusable'
SETTINGS_SPEC_ATTR = '__settings_spec'
SETTINGS_SPEC_MD5_ATTR = '__settings_spec_md5'
CREATED_BY_TASK_ATTRIBUTE = '__created_by_task'
REUSED_BY_TASKS_ATTRIBUTE = '__reused_by_tasks'
IMPORT_COMPLETED_ATTRIBUTE = '__import_completed'
SAMPLING_STRATEGY_ATTRIBUTE = '__sampling_strategy'
SAMPLING_QUERY_ATTRIBUTE = '__sampling_query'
SAMPLING_TABLES_ATTRIBUTE = '__sampling_tables'
SAMPLING_TABLES_PARAMS_HASH = '__sampling_parameters_hash'
EXPORT_COMPLETED_ATTRIBUTE = '__export_completed'
COMMON_ONESHOTS_MD5_ATTR = '__common_oneshots_md5'

IMPORTERS_ATTR = '__importers'
IMPORTER_SETTINGS_VERSION_ATTR = 'settings_version'
IMPORTER_MKDB_INFO_VERSION_ATTR = 'mkdb_info_version'
IMPORTER_CODE_VERSION_ATTR = 'importer_code_version'

IMPORTS_ROOT = '//home/yabs-cs-sandbox/import'
EXPORTS_ROOT = '//home/yabs-cs-sandbox/export'
FETCH_ROOT = '//home/yabs-cs-sandbox/fetch'
EXPORT_DIR = 'export'

CS_IMPORT_OUTPUT_TTL = datetime.timedelta(hours=25).total_seconds()

FETCH_OUTPUT_TTL = datetime.timedelta(days=2).total_seconds()
REDUCE_OUTPUT_TTL = datetime.timedelta(hours=12).total_seconds()
REDUCE_OUTPUT_TTL_FINAL = datetime.timedelta(hours=3).total_seconds()
EXPORT_TMP_OUTPUT_TTL = datetime.timedelta(hours=5).total_seconds()

YT_PROXY = 'hahn'
YT_POOL = 'robot-yabs-cs-sb'


class TemporaryOrFailure(common.errors.TemporaryError):
    pass


def get_cs_cmdline(tooldir, tool, *args):
    args = list(args)
    if tool != 'import':
        return [os.path.join(tooldir, tool)] + args
    return [os.path.join(tooldir, 'cs'), 'import'] + args


def get_piggyback_info(tooldir):
    p = sdk_process.run_process(get_cs_cmdline(tooldir, 'cs_cycle', 'piggyback-info'), log_prefix='piggyback-info', outputs_to_one_file=False)
    with open(p.stdout_path) as out:
        return json.load(out)


def yabscs_failures_retried(method):
    """Returns task method with TemporaryOrFailure converted into either Temporary or Failure"""

    def wrapped(self, *args, **kwargs):
        try:
            return method(self, *args, **kwargs)
        except TemporaryOrFailure as err:
            logging.exception("TemporaryOrFailure:")
            retry_count = self.ctx.get('__yabscs_retry_count', 0)
            if retry_count > 0:
                raise common.errors.TaskFailure(err)
            self.ctx['__yabscs_retry_count'] = retry_count + 1
            raise common.errors.TemporaryError(err)
    return wrapped


@trace_calls(save_arguments=(2, 'base_tags'))
def fetch(
    yt_token,
    bs_release_yt_dir,
    base_tags,
    sandbox_client_fqdn,
    fetch_id,
    date,
    settings_spec=None,
    task_id=None,
    yt_pool=YT_POOL,
):
    from yt.wrapper import ypath_join, YtClient

    client = YtClient(proxy=YT_PROXY, token=yt_token)

    root = ypath_join(FETCH_ROOT, str(fetch_id))
    prefix = ypath_join(root, str(uuid.uuid4()))

    client.remove(prefix, recursive=True, force=True)
    client.create('map_node', prefix, recursive=True)
    yt_utils.set_yt_node_ttl(root, FETCH_OUTPUT_TTL, client)

    _prepare_mysqls(base_tags)

    cs_cycle_args = [
        '--prefix', prefix,
        '--fetch',
        '--fetch-timeout', str(int(datetime.timedelta(minutes=40).total_seconds())),
        '--now-unixtime', (datetime.datetime.strptime(date, "%Y%m%d")).strftime("%s"),
        '--disable-cleanup-merge',
    ]
    for tag in base_tags:
        cs_cycle_args += ['--basename', tag]
    run_cs_tool(yt_token, YT_PROXY, bs_release_yt_dir, 'cs_cycle', cs_cycle_args, cluster_map_tags=base_tags, settings_spec=settings_spec, task_id=task_id, yt_pool=yt_pool)

    outputs = []
    for node in client.list(prefix):
        if not node.startswith('tier'):
            continue
        tier_path = prefix + '/' + node
        for table in client.list(tier_path):
            if table.startswith('data') or table.startswith('meta'):
                outputs.append(tier_path + '/' + table)

    return root, outputs


def _merge_rinput(client, prefix, rinput):
    import yt.wrapper

    groups = defaultdict(list)
    for table in rinput:
        m = re.match(r'.*?/(tier_\d+)/.*?(meta|data\d*)$', table)
        if m is None:
            raise RuntimeError("Cannot merge rinput: malformed table path {}".format(table))
        tier = m.group(1)
        kind = 'meta' if m.group(2) == 'meta' else 'data'
        groups['{}/merged_rinput/{}/{}'.format(prefix, tier, kind)].append(table)

    logging.debug("Merging rinput:\n%s", json.dumps(groups, indent=4, sort_keys=True))

    merge_spec = {
        'schema_inference_mode': 'from_output',
        'data_size_per_job': 2048 * (1 << 20),
        'weight': len(rinput),  # Probably not the best
    }

    merged_paths = []
    with yt.wrapper.OperationsTracker() as tracker:
        for out_path in sorted(groups):
            tables = groups[out_path]
            schema = _merge_schemas(client, tables)

            out = yt.wrapper.ypath.TablePath(out_path, schema=schema)
            client.create('table', out, recursive=True)

            logging.info("Starting rinput merge for %s", out_path)
            tracker.add(client.run_merge(tables, out, spec=merge_spec, sync=False))

            merged_paths.append(out_path)

    logging.info("Merge done")

    return merged_paths


def _merge_schemas(client, tables):
    merged_prefix = None
    merged_tail = {}

    for table in tables:
        schema = client.get_attribute(table, 'schema')
        prefix = [item for item in schema if 'sort_order' in item]
        if merged_prefix is None:
            merged_prefix = prefix
        elif merged_prefix != prefix:
            raise RuntimeError("Tables have different key columns, cannot merge")

        for item in schema[len(prefix):]:
            name = item['name']
            try:
                known = merged_tail[name]
            except KeyError:
                merged_tail[name] = item
            else:
                if known != item:
                    raise RuntimeError("Table schemas have conflict in column %s, cannot merge" % name)

    return merged_prefix + [merged_tail[k] for k in sorted(merged_tail)]


@trace_calls(save_arguments=(2, 'base_tags'))
def reduce(
    yt_token,
    bs_release_yt_dir,
    base_tags,
    sandbox_client_fqdn,
    date,
    target_dir,
    expected_base_ver,
    import_prefix,
    input_spec_path,
    rinput=None,
    settings_spec=None,
    reduce_timeout_sec=3 * 60 * 60,
    task_id=None,
    yt_pool=YT_POOL,
):
    proxy = YT_PROXY

    import yt.wrapper.client
    client = yt.wrapper.client.Yt(proxy=proxy, config={"token": yt_token})

    task_prefix = '//home/yabs-cs-sandbox/tmp/tasks/{}'.format(task_id)
    client.create('map_node', task_prefix, recursive=True, ignore_existing=True)
    renew_expiration_time(client, task_prefix, REDUCE_OUTPUT_TTL)

    prefix = '{}/{}'.format(task_prefix, uuid.uuid4())
    client.create('map_node', prefix)

    cs_cycle_args = [
        '--prefix', prefix,
        '--force-copy-import',
        '--reduce',
        '--reduce-timeout', '{}'.format(reduce_timeout_sec),
        '--now-unixtime', (datetime.datetime.strptime(date, "%Y%m%d")).strftime("%s"),
        '--gen-empty-banner-user-choice',
    ]

    cs_cycle_args += ['--yt-upload', '--compressed-bases-dump', '--local-bases-dir', target_dir, '--download-bases']

    # FIXME set operaton weight in run_job_ng properly, wait for release and remove this
    st_bases = sorted(tag for tag in base_tags if re.match(r'st\d+', tag) is not None)
    spec_update = {'reduce_weight': {'_'.join(st_bases): 1000 * len(st_bases)}}

    cs_cycle_args += ['--spec', _preprocess_spec(client, input_spec_path, spec_update)]

    if rinput:
        # FIXME do not allow passing fetch input via old prefix
        # (always use rinput only)
        merged = _merge_rinput(client, prefix, rinput)
        cs_cycle_args += ['--add-imports-to-rinput', '--single-reduce']
        for table in merged:
            cs_cycle_args += ['--rinput', table]

    if import_prefix:
        renew_expiration_time_or_check_existence(client, import_prefix, CS_IMPORT_OUTPUT_TTL)

    cs_cycle_args += (['--copy-import', '--import-prefix', import_prefix] if import_prefix else ['--import', ])

    for tag in base_tags:
        cs_cycle_args += ['--basename', tag]

    run_cs_tool(yt_token, proxy, bs_release_yt_dir, 'cs_cycle', cs_cycle_args, settings_spec=settings_spec, task_id=task_id, yt_pool=yt_pool)
    chkdb_data = _get_chkdb_data(yt_token, proxy, prefix)
    logging.info('Chkdb data:')
    logging.info(json.dumps(chkdb_data, indent=4))
    renew_expiration_time(client, task_prefix, REDUCE_OUTPUT_TTL_FINAL, ignore_current_ttl=True)

    base_filenames = {tag: '{}.{}.yabs.zstd_7'.format(expected_base_ver, tag) for tag in base_tags}

    sdk_process.run_process('ls -alth ' + target_dir, shell=True, check=False, log_prefix='target_dir_contents.log')

    return base_filenames, chkdb_data


class CSImportError(common.errors.TaskError):

    def __init__(self, action, exc):
        self.action = action
        self.exc = exc

    def __str__(self):
        return 'Exception in {}: {}'.format(self.action, self.exc)


class CSImportFailure(common.errors.TaskFailure):

    def __init__(self, action, exc):
        self.action = action
        self.exc = exc

    def __str__(self):
        return '{} failed: {}'.format(self.action, self.exc)


@trace_calls(save_arguments=(3, 'base_tags'))
def run_save_input(
    yt_token,
    bs_release_yt_dir,
    archive_root,
    base_tags=None,
    settings_spec=None,
    task_id=None,
):
    proxy = YT_PROXY

    args = ['--save-input', archive_root]
    for tag in base_tags:
        args += ['--basename', tag]

    spec_path = run_cs_tool(
        yt_token=yt_token,
        proxy=proxy,
        cs_cycle_dir=bs_release_yt_dir,
        tool='import',
        args=args,
        log_suffix='save_input',
        settings_spec=settings_spec,
        task_id=task_id,
    )

    logging.info("Save input completed")

    return spec_path


def _get_import_spec(token, input_spec_path, tmp_path, update=None, action=None, yt_pool=None):
    from yt.wrapper import YtClient, ypath_join

    update = update or {}
    client = YtClient(proxy=YT_PROXY, token=token)

    yql_yt_tmp_folder = ypath_join(tmp_path, 'yql_tmp_folder')
    if action:
        yql_yt_tmp_folder = ypath_join(yql_yt_tmp_folder, action)

    if not yt_pool:
        yt_pool = YT_POOL

    yt_file_cache = ypath_join(tmp_path, 'file_storage')

    client.create('map_node', yql_yt_tmp_folder, recursive=True, ignore_existing=True)
    client.create('map_node', yt_file_cache, recursive=True, ignore_existing=True)
    client.create('map_node', ypath_join(tmp_path, 'table_storage'), recursive=True, ignore_existing=True)

    # FIXME need YT-8468 to get rid of unique YQL tmp folders
    spec_update = dict(
        yt_file_cache=yt_file_cache,
        yql_yt_tmp_folder=yql_yt_tmp_folder,
        pool=yt_pool,
    )
    spec_update.update(update)
    return _preprocess_spec(client, input_spec_path, spec_update, action)


@trace_calls(save_arguments=(5, 'base_tags', 10, 'importers', 11, 'prepare_mysql'))
def run_cs_cycle_import(
        import_destination_path,
        yt_token,
        bs_release_yt_dir,
        input_spec_path,
        date,
        base_tags=None,
        settings_spec=None,
        write_yt_debug_log=False,
        log_path='.',
        task_id=None,
        importers=None,
        prepare_mysql=True,
        yt_pool=YT_POOL,
        **kwargs
):
    """
    Run cs_cycle --import
    Return import_prefix to use for binary base generation.
    This might be an import_prefix of some other task that produced identical tables, not of our task!
    """
    from yt.wrapper import YtClient, ypath_join

    client = YtClient(proxy=YT_PROXY, token=yt_token)
    tmp_path = yt_utils.create_tmp_node(client, import_destination_path)
    export_prefix = ypath_join(EXPORTS_ROOT, 'tmp', str(task_id))
    yt_utils.create_node(export_prefix, client, ttl=EXPORT_TMP_OUTPUT_TTL)

    if prepare_mysql:
        logging.debug("Prepare MySQL")
        _prepare_mysqls()

    base_tags = base_tags or []

    spec_update = {
        'dependencies_path': import_destination_path,
        'destination_prefix': import_destination_path,
        'export_prefix': export_prefix,
    }
    spec_path = _get_import_spec(yt_token, input_spec_path, tmp_path, update=spec_update, yt_pool=yt_pool)

    args = [
        '--import',
        '--import-spec', spec_path,
        '--prefix', import_destination_path,
        '--now-unixtime', (datetime.datetime.strptime(date, "%Y%m%d")).strftime("%s"),
        '--write-imports-outputs-to-file',
        '--write-imports-log-prefix', log_path,
    ]
    for tag in base_tags:
        args += ['--basename', tag]

    run_cs_tool(
        yt_token=yt_token,
        proxy=YT_PROXY,
        cs_cycle_dir=bs_release_yt_dir,
        tool='cs_cycle',
        args=args,
        cluster_map_tags=[] if prepare_mysql else None,
        settings_spec=settings_spec,
        tmp_path=tmp_path,
        yt_pool=yt_pool,
        write_yt_debug_log=write_yt_debug_log,
        allow_task_restart=False,
        task_id=task_id,
        importers=importers,
    )
    client.remove(tmp_path, recursive=True, force=True)
    logging.info("Completed all imports.")
    return import_destination_path


@trace_calls(save_arguments=(5, 'actions', 6, 'base_tags', 10, 'prepare_mysql'))
def run_cs_import(
        import_destination_path,
        yt_token,
        bs_release_yt_dir,
        input_spec_path,
        date,
        actions=None,
        base_tags=None,
        settings_spec=None,
        write_yt_debug_log=False,
        task_id=None,
        prepare_mysql=True,
        yt_pool=YT_POOL,
        **kwargs
):
    """
    Run cs_import.
    Return import_prefix to use for binary base generation.
    This might be an import_prefix of some other task that produced identical tables, not of our task!
    """
    from yt.wrapper import YtClient, ypath_join

    client = YtClient(proxy=YT_PROXY, token=yt_token)
    tmp_path = yt_utils.create_tmp_node(client, import_destination_path)
    export_prefix = ypath_join(EXPORTS_ROOT, 'tmp', str(task_id))
    yt_utils.create_node(export_prefix, client, ttl=EXPORT_TMP_OUTPUT_TTL)

    if prepare_mysql:
        logging.debug("Prepare MySQL")
        _prepare_mysqls()

    base_tags = base_tags or []

    def run_action(action):
        args = [
            '--spec', _get_import_spec(yt_token, input_spec_path, tmp_path, action=action, yt_pool=yt_pool),
            '--destination-prefix', import_destination_path,
            '--export-prefix', export_prefix,
            '--now-unixtime', (datetime.datetime.strptime(date, "%Y%m%d")).strftime("%s"),
            action,
        ]
        for tag in base_tags:
            args += ['--basename', tag]
        try:
            run_cs_tool(
                yt_token=yt_token,
                proxy=YT_PROXY,
                cs_cycle_dir=bs_release_yt_dir,
                tool='import',
                args=args,
                cluster_map_tags=[] if prepare_mysql else None,
                log_suffix=action,
                settings_spec=settings_spec,
                tmp_path=tmp_path,
                yt_pool=yt_pool,
                write_yt_debug_log=write_yt_debug_log,
                allow_task_restart=False,
                task_id=task_id,
            )
        except common.errors.TaskFailure as exc:
            raise CSImportFailure(action, exc)
        except common.errors.TemporaryError:
            raise
        except Exception as exc:
            raise CSImportError(action, exc)
        logging.info("Import %s completed", action)
        return action

    actions = sorted(actions)
    pool = futures.ThreadPoolExecutor(max_workers=len(actions))
    import_fts = [pool.submit(run_action, action) for action in actions]
    futures.wait(import_fts, return_when=futures.FIRST_EXCEPTION)

    all_completed = True
    for ft, action in itertools.izip(import_fts, actions):
        try:
            ft.result(timeout=0)
        except futures.TimeoutError:
            logging.info("%s has not finished yet", action)
            all_completed = False
        except Exception:
            logging.exception("%s FAILED", action)
            raise
        else:
            logging.info("%s completed successfully", action)

    if not all_completed:
        raise RuntimeError(
            "No importer failed, but there still are running importers - "
            "most likely, this is a bug in the task code."
        )

    client.remove(tmp_path, recursive=True, force=True)
    logging.info("Completed all imports.")
    return import_destination_path


def fill_node_attributes(path, yt_client, node_attributes=None):
    from yt.wrapper import ypath_join

    node_attributes = node_attributes or {}
    with yt_client.Transaction():
        for attr_name, attr_value in node_attributes.iteritems():
            yt_client.lock(
                path,
                mode='shared',
                attribute_key=attr_name,
                waitable=True,
                wait_for=10 * 1000,
            )
            yt_client.set_attribute(path, attr_name, attr_value)
            logging.debug('Set attribute %s = %s', ypath_join(path, '@' + attr_name), attr_value)


def _wait_bases(filenames, target_dir, transport_process, transport_timeout):
    deadline = time.time() + transport_timeout
    pause = 5
    while True:
        existing_filenames = os.listdir(target_dir)
        logging.debug("Files in %s:\n%s", target_dir, '\n'.join(existing_filenames))
        missing = frozenset(filenames) - frozenset(existing_filenames)
        if not missing:
            break
        logging.info("Waiting for %s", ', '.join(missing))
        retcode = transport_process.poll()
        if retcode is not None:
            raise RuntimeError("Transport died before arrival of all bases with exit code %s" % retcode)
        if time.time() > deadline:
            raise TemporaryOrFailure("Base arrival timed out: %s are still missing" % ', '.join(missing))
        time.sleep(pause)
        pause = min(5, pause * 2)


MYSQL_USER = 'dbmk'
MYSQL_PASSWORD = 'q12b53b8'


@trace_calls(save_arguments='all')
def _prepare_mysqls(base_tags=None):
    base_tags = base_tags or []
    # All tables are stored as Sandbox resources, no reason to set strong password and store it securely.
    # However, we set non-trivial grants for integration testing (i.e. inserts by mkdb should fail)
    query = (
        "GRANT SELECT, INSERT, CREATE TEMPORARY TABLES, PROCESS, SHOW DATABASES, REPLICATION CLIENT "
        "ON *.* TO '{}'@'%' IDENTIFIED BY '{}';".format(MYSQL_USER, MYSQL_PASSWORD)
    )

    import MySQLdb

    socket_path = '/var/run/mysqld.yabs/mysqld.sock'
    logging.info("Granting remote access to MySQL for user %s to %s", MYSQL_USER, socket_path)
    connection = MySQLdb.connect(unix_socket=socket_path, user='root', db='yabsdb')
    cursor = connection.cursor()
    cursor.execute(query)
    cursor.fetchall()
    cursor.close()
    connection.close()


def prepare_yt_env(yt_token, tmp_path=None, yt_pool=None):
    env = {'YT_TOKEN': yt_token}

    if tmp_path:
        env["YT_FILE_STORAGE"] = tmp_path + '/file_storage'
        env["YT_TEMP_DIR"] = tmp_path + '/table_storage'

    if yt_pool:
        env["YT_POOL"] = yt_pool

    env.update(os.environ)

    env_dump = env.copy()
    env_dump.pop('YT_TOKEN')
    logging.info("YT environment:\n%s", env_dump)

    return env


def run_cs_tool(
    yt_token, proxy,
    cs_cycle_dir, tool, args,
    cluster_map_tags=None,
    log_suffix=None,
    settings_spec=None,
    tmp_path=None,
    yt_pool=None,
    write_yt_debug_log=False,
    allow_task_restart=False,
    task_id=None,
    importers=None,
    settings_file_name='settings_spec.json',
    owners=('yabs-cs', ),
):

    my_addr = get_my_ipv6(raise_class=common.errors.TemporaryError)

    logs_dir = sdk_paths.get_logs_folder()
    log_name = tool if log_suffix is None else tool + '_' + log_suffix

    cluster_map_path = os.path.join(logs_dir, 'cluster_map_{}.json'.format(uuid.uuid4()))
    updated_args = args[:] + [
        '--tooldir', cs_cycle_dir,
        '--shm-environment', 'default',
    ]
    if proxy:
        updated_args.extend(['--proxy', proxy])
    if yt_pool:
        updated_args.extend(['--pool', yt_pool])
    if cluster_map_tags is not None:
        _build_cluster_map(cs_cycle_dir, cluster_map_tags, cluster_map_path, my_addr)
        updated_args.extend(['--cluster-map', cluster_map_path, '--mysql-user', MYSQL_USER, '--mysql-password', MYSQL_PASSWORD, ])
    if owners:
        for owner in owners:
            updated_args.extend(['--owner', owner])

    if settings_spec:
        settings_spec_path = os.path.join(logs_dir, settings_file_name)
        if not os.path.exists(settings_spec_path):
            with open(settings_spec_path, "w") as settings_file:
                settings_file.write(settings_spec)
            logging.info("Saved CSSettings spec to %s", settings_spec_path)
        updated_args.extend(['--settings-spec', settings_spec_path])

    if task_id:
        updated_args.extend(['--task-id', task_id])

    if importers:
        updated_args += itertools.chain.from_iterable(['--importer', importer] for importer in importers)

    cmdline = get_cs_cmdline(
        cs_cycle_dir,
        tool,
        *updated_args
    )

    cmd = [str(p) for p in cmdline]
    env = prepare_yt_env(yt_token, tmp_path, yt_pool)

    out_path = os.path.join(logs_dir, log_name + '.out')
    err_path = os.path.join(logs_dir, log_name + '.err')

    cmd_attempt = cmd
    if write_yt_debug_log:
        yt_log_path = os.path.join(logs_dir, log_name + '.yt-log.err')
        cmd_attempt = cmd + ['--yt-log', yt_log_path]

    logging.info('Run %s. Save stdout to %s, stderr to %s', ' '.join(cmd_attempt), out_path, err_path)
    try:
        with open(out_path, 'w') as out, open(err_path, 'w') as err, trace_subprocess(cmd_attempt):
            # cs_cycle sends SIGINT to its process group, we need to call setsid() before exec()
            p = None
            try:
                p = subprocess.Popen(cmd_attempt, stdout=out, stderr=err, env=env)
                p.wait()
            finally:
                if p is not None:
                    # YT library stops jobs correctly only on SIGINT...
                    _stop_cs_tool(p, tool)
    except subprocess.CalledProcessError as exc:
        if allow_task_restart:
            raise common.errors.TemporaryError(str(exc))
        raise common.errors.TaskFailure(str(exc))
    else:
        return out_path


def _stop_cs_tool(proc, tool_name):
    if proc.poll() is None:
        logging.info("Stopping %s gracefully...", tool_name)
        os.killpg(proc.pid, signal.SIGINT)
    if _wait_cs_tool(proc, tool_name, 120):
        return
    logging.warning("Failed to stop %s gracefully, senging SIGTERM", tool_name)
    os.killpg(proc.pid, signal.SIGTERM)
    if _wait_cs_tool(proc, tool_name, 10):
        return
    logging.warning("Failed to terminate %s, senging SIGKILL", tool_name)
    os.killpg(proc.pid, signal.SIGKILL)


def _wait_cs_tool(proc, tool_name, timeout=10, poll_interval=1):
    deadline = time.time() + timeout
    while _is_pg_alive(proc.pid):
        if time.time() > deadline:
            logging.info("Waiting for %s timed out", tool_name)
            return False
        time.sleep(poll_interval)

    exitcode = proc.poll()
    if exitcode != 0:
        raise subprocess.CalledProcessError(exitcode, tool_name)
    logging.info("%s finished successfully", tool_name)

    return True


def _is_pg_alive(pid):
    try:
        os.killpg(pid, 0)
    except OSError as err:
        if err.errno != errno.ESRCH:
            raise
        return False
    return True


def _build_cluster_map(yabscs_path, base_tags, cluster_map_path, host):
    instances = set(get_mysql_tag(yabscs_path, tag) for tag in base_tags)
    instances |= {'yabs', 'yabsistat', 'yabsistat01', 'yabsistat03'}  # FIXME required by import actions

    port = get_mysql_port()
    item = [{'host': host, 'port': port, 'url': '[{}]:{}'.format(host, port)}]
    cluster_map = {inst: item for inst in instances}

    with open(cluster_map_path, 'w') as cm_file:
        json.dump(cluster_map, cm_file, indent=4)

    try:
        with open(cluster_map_path) as cm_file_read:
            json.load(cm_file_read)
    except Exception:
        logging.exception("Generated broken cluster map file:")
        raise common.errors.TemporaryError("Generated broken cluster map file")


def _get_chkdb_data(token, proxy, prefix):
    import yt.wrapper.client
    client = yt.wrapper.client.Yt(proxy=proxy, config={"token": token})

    data = {}

    for node in client.list(prefix):
        if not node.startswith('load'):
            continue
        load_path = '{}/{}'.format(prefix, node)
        path = load_path + '/' + next(n for n in sorted(client.list(load_path)) if n in ('base', 'chkdb'))

        for row in client.read_table(path, format=yt.wrapper.JsonFormat()):
            try:
                tag = row['BaseName']
                path = row['Path']
                dump = row['Dump']
                if path == "bases.check_size":
                    val = row['Value']
                elif isinstance(dump, basestring):  # else it is NoneType
                    path = "bases.dump"
                    val = dump
                elif isinstance(path, basestring):  # else it is NoneType
                    val = {}
                    for key in ["Key", "Value", "ItemSize", "ItemCount", "Size", "AllocatedSize"]:
                        val[key] = row[key]
                else:
                    continue
                data.setdefault(tag, {}).setdefault(path, []).append(val)
            except KeyError:
                continue

    if not data:
        raise TemporaryOrFailure('No chkdb data found in %s' % prefix)

    return data


def renew_expiration_time_or_check_existence(client, node, lower_ttl_limit, ignore_current_ttl=False):

    TTL_ATTR = '__ttl_for_expiration_time'

    try:
        if ignore_current_ttl:
            ttl = lower_ttl_limit
        else:
            ttl = client.get_attribute(node, TTL_ATTR, "0")
            try:
                ttl = float(ttl)
            except (KeyError, ValueError):
                logging.warning("Node %s has bad %s attribute: %s", node, TTL_ATTR, ttl)
                ttl = lower_ttl_limit
            ttl = max(ttl, lower_ttl_limit)

        expiration_time_str = (datetime.datetime.utcnow() + datetime.timedelta(seconds=ttl)).isoformat()

        import yt.wrapper.errors as errors
        try:
            client.set_attribute(node, TTL_ATTR, str(ttl))
            client.set_attribute(node, 'expiration_time', expiration_time_str)  # expiration_time is non-transactional
        except errors.YtCypressTransactionLockConflict:
            logging.warning('Cannot take lock, node attributes are already locked by another transaction')
    except Exception:
        if client.exists(node):
            logging.error("Failed to set expiration_time of %s (but node exists right now):", node, exc_info=True)
            raise
        logging.info("Failed to set expiration_time of %s (and node does not exist right now):", node, exc_info=True)
        return None
    logging.info("Successfully set expiration_time of %s to %s", node, expiration_time_str)
    return ttl


class NodeDoesNotExist(Exception):
    pass


def renew_expiration_time(client, node, lower_ttl_limit, ignore_current_ttl=False):
    ttl = renew_expiration_time_or_check_existence(client, node, lower_ttl_limit, ignore_current_ttl)
    if ttl is None:
        raise NodeDoesNotExist(node)
    return ttl


def renew_input_spec_expiration_time(yt_client, input_spec_data, ttl):
    auxiliary_data = input_spec_data.get(AUXILLARY_DATA_IN_SPEC_KEY, {})
    try:
        archive_root = auxiliary_data[ARCHIVE_ROOT_KEY]
    except KeyError as key:
        logging.warning("No %s in %s, cannot renew input archive expiration time. Dict was %s", key, AUXILLARY_DATA_IN_SPEC_KEY, auxiliary_data)
    else:
        renew_expiration_time(yt_client, archive_root, ttl)


def _preprocess_spec(client, input_spec_path, spec_update=None, spec_suffix=''):
    with open(input_spec_path) as spec_file:
        spec = json.load(spec_file)

    if spec_update is not None:
        spec.update(spec_update)

    logs_folder = sdk_paths.get_logs_folder()
    output_spec_path = os.path.abspath(os.path.join(logs_folder, 'processed_import_spec_{}.json'.format(spec_suffix)))
    with open(output_spec_path, 'w') as spec_file:
        json.dump(spec, spec_file, indent=4)

    return output_spec_path


def skip_node_by_attributes(filter_attributes, node_attributes):
    if node_attributes.get(IS_REUSABLE_ATTR, False) is not True:
        logging.debug('Node is not reusable')
        return True

    for attr, value in filter_attributes.items():
        if attr not in node_attributes:
            logging.debug('node has no attribute "%s"', attr)
            return True

        if attr == CS_IMPORT_VER_ATTR:
            if value in node_attributes.get(attr, []):
                continue
            else:
                logging.debug('attribute %s: %s not in %s', attr, value, node_attributes.get(attr, []))
                return True

        elif attr == BIN_DBS_NAMES_ATTR:
            if set(value).issubset(node_attributes.get(attr, [])):
                continue
            else:
                logging.debug('attribute %s: %s is not subset of %s', attr, set(value), node_attributes.get(attr, []))
                return True

        elif attr == IMPORTERS_ATTR:
            for importer_name, filter_importer_attributes in value.items():
                node_importer_attributes = node_attributes.get(attr, {}).get(importer_name, {})
                if issubset_dict(filter_importer_attributes, node_importer_attributes):
                    continue
                else:
                    logging.debug(
                        'attribute %s.%s: %s is not subset of %s',
                        attr, importer_name, filter_importer_attributes, node_importer_attributes
                    )
                    return True

        elif value == node_attributes.get(attr):
            continue
        else:
            logging.debug('attribute %s: %s != %s', attr, value, node_attributes.get(attr))
            return True

    return False


@trace_calls
def find_node_to_reuse(yt_client, path, filter_attributes=None, add_attributes=None, check_task_status=True, completed_attribute=IMPORT_COMPLETED_ATTRIBUTE):
    from yt.yson import yson_to_json

    filter_attributes = filter_attributes or {}
    add_attributes = add_attributes or []
    attributes = filter_attributes.keys() + add_attributes + [IS_REUSABLE_ATTR, completed_attribute, CREATED_BY_TASK_ATTRIBUTE]

    logging.info("Finding node by attributes: %s", attributes)

    yson_nodes = yt_client.list(
        path=path,
        absolute=True,
        sort=True,
        attributes=attributes
    )

    for yson_node in yson_nodes[::-1]:
        node = yson_to_json(yson_node)
        if not isinstance(node, dict):
            continue

        node_path = node['$value']
        node_attributes = node['$attributes']

        if skip_node_by_attributes(filter_attributes, node_attributes):
            logging.debug('Skip node %s: attributes doesn\'t match', node_path)
            continue

        if check_task_status:
            try:
                task_id = node_attributes[CREATED_BY_TASK_ATTRIBUTE]
            except KeyError:
                logging.debug('Skip node %s: no attribute %s', node_path, CREATED_BY_TASK_ATTRIBUTE)
                continue
            try:
                task = sdk2.Task[task_id]
            except common_errors.TaskError as e:
                logging.debug('Skip node %s: failed to get task %s. %s', node_path, task_id, e)
                continue

            if task.status in chain(ctt.Status.FAILURE, ctt.Status.Group.BREAK):
                logging.debug('Skip node %s: task %s in bad status %s', node_path, task_id, task.status)
                continue

            if task.status in ctt.Status.Group.FINISH and not node_attributes.get(completed_attribute, False):
                logging.debug('Skip node %s: task %s finished but node is not marked as completed', node_path, task_id)
                continue

        return node

    return None


def get_jailed_yt_token():
    """Get YT token without access to production data"""
    return sdk2.Vault.data('robot-yabs-cs-sbjail', 'yabscs_jailed_yt_token')


@trace_calls
def get_cs_import_info(tooldir, settings_spec=None, st_update_mode=False, outputs_version=2):
    """Call 'cs import --print-info'

    :param tooldir: CS directory
    :type tooldir: str
    :param settings_spec: CS settings spec, defaults to None
    :type settings_spec: str, optional
    :param st_update_mode: st_update mode, defaults to False
    :type st_update_mode: bool, optional
    :param outputs_version: version of outputs subfield, defaults to 2
    :type outputs_version: int, optional
    :return: Mapping from importer's name to it's settings version
    :rtype: dict
    """
    args = [
        '--print-info',
        '--outputs-version', outputs_version,
    ]
    if st_update_mode:
        args.append('--st-update-mode')
    out_path = run_cs_tool(
        yt_token='',
        proxy='',
        settings_spec=settings_spec,
        cs_cycle_dir=tooldir,
        tool='import',
        log_suffix='print_info',
        args=args)

    with open(out_path) as out:
        return json.load(out)


@trace_calls
def get_cs_settings_version(tooldir, settings_spec=None):
    """Call 'cs import --settings-version'

    :param tooldir: CS directory
    :type tooldir: str
    :param settings_spec: CS settings spec, defaults to None
    :type settings_spec: str, optional
    :return: Mapping from importer's name to it's settings version
    :rtype: dict
    """
    out_path = run_cs_tool(
        yt_token='',
        proxy='',
        settings_spec=settings_spec,
        cs_cycle_dir=tooldir,
        tool='import',
        log_suffix='settings_version',
        args=[
            '--settings-version',
            '--ignore-setting', 'tooldir',
            '--ignore-setting', 'basever',
        ])

    with open(out_path) as out:
        return json.load(out)


def _get_cs_import_tables(tooldir):
    """
    Returns iter that yields MySQL tables for cs_import
    """
    info = get_cs_import_info(tooldir)
    tables = defaultdict(set)
    try:
        for action_info in info.itervalues():
            for item in action_info.get('queries', []):
                inst = item.get('Instance', 'yabs')
                query = item['SQL']
                query_tables = _extract_tables(query)
                tables[inst].update(query_tables)
    except Exception as exc:
        raise common.errors.TaskFailure(
            "Failed to extract list of SQL tables from output of cs_import --print_info:\n%s\n"
            "See cs_import_print_info.out.txt in task logs." % exc
        )

    return tables


def iter_db_tables(yabscs_path, base_tags=None, cs_import=False):
    base_tags = base_tags or []

    needed_tables = defaultdict(set)
    for tag in base_tags:
        inst, tables = _get_tag_tables(yabscs_path, tag)
        needed_tables[inst].update(tables)

    if cs_import:
        cs_import_tables = _get_cs_import_tables(yabscs_path)
        for inst, tables in cs_import_tables.iteritems():
            needed_tables[inst].update(tables)

    for inst, tables in needed_tables.iteritems():
        for table in tables:
            yield inst, 'yabsdb', table
