"""Runner gives possibility to run SBYT's collector without Coordinator."""

# TODO
# TODO
# TODO There is a counterpart for this file /yabs/sbyt/supervisor/runner.py
# TODO When development become less active current file will be deleted and that file will be used instead.
# TODO Now you should copy changes in both files.
# TODO
# TODO

import json
import os
import re
import subprocess

# from . import consts
# from . import util
# from . import ylogging


# logger = ylogging.get_logger()

INPUT_CTX = '_inputs'


def run(
    task,
    ytc,
    collector_group, collector_name,
    yt_proxy, input_pattern,
    app_options_file, options, invocation_id, stage,
    dest_path, dict_path, tmp_path, rpl_path, need_schematize,
    env,
    logs_chunk_size, should_remove_tmp_path,
):
    """
    :param sandboxsdk.task.SandboxTask task:
    :param yt.wrapper ytc:
    :param basestring collector_group:
    :param basestring collector_name:
    :param basestring yt_proxy:
    :param basestring input_pattern:
    :param file app_options_file:
    :param dict options:
    :param int invocation_id:
    :param basestring stage:
    :param basestring dest_path:
    :param basestring dict_path:
    :param basestring tmp_path:
    :param basestring rpl_path:
    :param dict env:
    :param int logs_chunk_size:
    :param bool should_remove_tmp_path:
    """
    app_options_file = update_app_options_file(app_options_file, options)

    # there is a small chance that inputs will change after task's restart
    # in that case it can be different input for same invocation id that leads to bad stats
    if INPUT_CTX not in task.ctx:
        inputs = get_inputs(ytc, input_pattern)
        task.ctx[INPUT_CTX] = inputs
    else:
        inputs = task.ctx[INPUT_CTX]

    from supervisor import ylogging
    logger = ylogging.get_logger()
    logger.info('collector inputs: %s', inputs)

    from supervisor import util
    run_collector_ex = util.Retry(subprocess.CalledProcessError, attempts=5)(run_collector)

    if logs_chunk_size != 0 and len(inputs) / logs_chunk_size > 1:
        for chunk_start_index in xrange(0, len(inputs), logs_chunk_size):
            input_tables = inputs[chunk_start_index:chunk_start_index + logs_chunk_size]
            memoize_stage_name = '_'.join(os.path.basename(i) for i in input_tables)
            with task.memoize_stage[memoize_stage_name](commit_on_entrance=False):
                current_invocation_id = generate_invocation_id(task.id, input_tables[0], chunk_start_index)
                run_collector_ex(
                    ytc,
                    collector_group, collector_name,
                    yt_proxy, input_tables,
                    app_options_file, current_invocation_id, stage,
                    dest_path, dict_path, tmp_path, rpl_path, need_schematize,
                    env,
                    should_remove_tmp_path,
                )
    else:
        current_invocation_id = invocation_id or generate_invocation_id(task.id)
        run_collector_ex(
            ytc,
            collector_group, collector_name,
            yt_proxy, inputs,
            app_options_file, current_invocation_id, stage,
            dest_path, dict_path, tmp_path, rpl_path, need_schematize,
            env,
            should_remove_tmp_path,
        )


def run_collector(
    ytc,
    collector_group, collector_name,
    yt_proxy, inputs,
    app_options_file, invocation_id, stage,
    dest_path, dict_path, tmp_path, rpl_path, need_schematize,
    env,
    should_remove_tmp_path,
):
    tmp_path = tmp_path or generate_tmp_path(invocation_id)

    args = [
        collector_group, 'run', collector_name,
        '--proxy', yt_proxy,
        '--invocation-id', str(invocation_id),
        '--yt-tmp-path', tmp_path,
        '--options-file', app_options_file,
    ]

    for input_table in inputs:
        args.extend(('--input', input_table))

    if stage:
        args.extend(('--stage', stage))
    if dest_path:
        args.extend(('--yt-dest-path', dest_path))
    if dict_path:
        args.extend(('--yt-dict-path', dict_path))
    if rpl_path:
        args.extend(('--yt-rpl-path', rpl_path))
    if need_schematize:
        args.append('--need-schematize')

    from supervisor import util
    util.yt_create_temp_directory(ytc, tmp_path)

    from supervisor import ylogging
    logger = ylogging.get_logger()
    logger.info('collector run:\nargs = %s', args)
    subprocess.check_call(args, env=env)

    if should_remove_tmp_path:
        ytc.remove(tmp_path, recursive=True)


def get_inputs(ytc, input_pattern):
    root, pattern = os.path.split(input_pattern)

    def path_filter(path):
        return re.match(pattern, os.path.basename(path))

    return sorted(str(i) for i in ytc.search(root, node_type=['table'], path_filter=path_filter))


def update_app_options_file(app_options_file, options):
    if not options:
        return app_options_file

    with open(app_options_file) as f:
        app_options = json.load(f)

    app_options.update(options)

    with open(app_options_file, 'w') as f:
        json.dump(app_options, f)

    return app_options_file


def generate_invocation_id(task_id, input_table=None, input_index=0):
    invocation_id = 10 ** 9 * task_id
    if not input_table:
        return invocation_id
    date = os.path.basename(input_table)
    date = date.replace('-', '').replace('T', '').replace(':', '')
    if date.startswith('20'):
        date = date[2:]
    if date.endswith('0000'):
        date = date[:-4]
    try:
        return invocation_id + int(date)
    except ValueError:
        return invocation_id + input_index


def generate_tmp_path(invocation_id):
    from supervisor import consts
    return consts.YT_TEMP_PATH + '/runner' + str(invocation_id)
