#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division
import sys
import os
import codecs
import argparse
from nile.api.v1 import (
    clusters,
    filters as nf,
    extractors as ne,
    aggregators as na,
    Record,
    with_hints
)
from pytils import optionalize_schema, yql_run
import copy
import json
import qb2.api.v1.typing as qt
from yql.api.v1.client import YqlClient
import datetime
import requests


MEMORY_LIMIT = 16000
DELETED = 'OptionsDeleted'
NODELETE_FILTER = nf.equals(DELETED, False)
base_table = '//home/video-hosting/base/ContentGroup'
content_type_table = '//home/video-hosting/base/ContentType'
content_resource_table = '//home/video-hosting/base/ContentResource'
DEFAULT_JOB_ROOT = '//home/videolog/strm_meta/iron_branch'
job_root = DEFAULT_JOB_ROOT
current_table = '{}/current'.format(job_root)
full_table = '{}/full'.format(job_root)
full_work_table = '{}/full_work'.format(job_root)
full_stats_table = '{}/full_stats'.format(job_root)
alt_table = '{}/alt'.format(job_root)
concat_table = '{}/concat'.format(job_root)
fields_to_project = 'ContentGroupID'
chmh = '43aa0f9f9d7c57bca56cff1ddca64a74'
yatv_channels_table = '{}/yatv_channels'.format(job_root)
ontoids_table = '{}/onto_ids_data'.format(job_root)
acg_table = '{}/active_cgs'.format(job_root)
channels_yql = """
pragma yt.Pool = "@pool";
pragma yt.PoolTrees = "physical";
pragma yt.TentativePoolTrees = "cloud";
PRAGMA SimpleColumns;

$cg_table = "BASE_TABLE";
$cr_table = "CONTENT_RESOURCE_TABLE";
$output_table = "OUTPUT_TABLE";
$yatv_list = AsList("yatv", "yatv@yttv", "yatv@yttv@news");

$yatv = (
    select ContentGroupID, "yatv" as special_type
    from $cr_table
    where ResourceName == 'channel_type' and Value like 'yatv%'
);

$channels = (
    select distinct ContentGroupID from $cg_table
    where ContentTypeID == 2
);

$yatv_release = (
    select a.ContentGroupID as ContentGroupID, special_type
    from $yatv as a
    inner join $channels as b using (ContentGroupID)
);

$special_project = (
    select
        cr.ContentGroupID as ContentGroupID,
        "special_project" as special_type
    from $cr_table as cr
    inner join $channels as ch using (ContentGroupID)
    where cr.ResourceName == 'is_special_project' and cr.Value == '1'
);

$broadcasts_from_cg = (
    select
        ContentGroupID,
        ParentID
    from $cg_table
    where ContentTypeID == 43
);

$youtube_broadcasts = (
    select
        cr.ContentGroupID as ContentGroupID,
        ParentID,
        "youtube" as special_type
    from $cr_table as cr
    inner join $broadcasts_from_cg as ch using (ContentGroupID)
    where (
        cr.ResourceName == 'content_source_url' and
        cr.Value like '%youtube%'
    )
);

$youtube_broadcasts_release = (
    select
        ContentGroupID,
        special_type
    from $youtube_broadcasts
);

$youtube_channels = (
    select
        ch.ContentGroupID as ContentGroupID,
        special_type
    from $youtube_broadcasts as yb
    inner join $channels as ch
    on ch.ContentGroupID == yb.ParentID
);

$sp_only = (
    select
        sp.ContentGroupID as ContentGroupID,
        "special_project" as special_type
    from $special_project as sp
    left only join $yatv as yatv using (ContentGroupID)
);

insert into $output_table with truncate
select * from $yatv_release
union all
select * from $sp_only
union all
select * from $youtube_broadcasts_release
union all
select * from $youtube_channels;
"""


ontoids_yql = """
pragma yt.Pool = "@pool";
pragma yt.PoolTrees = "physical";
pragma yt.TentativePoolTrees = "cloud";

$all_ontoids = (
    SELECT
        key as onto_id,
        ListFlatMap(Yson::ConvertToList(Yson::ParseJson(value){"isa"}{"otype"}), ($y) -> {RETURN Yson::ConvertToString($y{"value"})||'/'||Yson::ConvertToString($y{"subvalue"})}) as type_subtype,
        ListFlatMap(Yson::ConvertToList(Yson::ParseJson(value){"isa"}{"tags"}), ($y) -> {RETURN Yson::ConvertToString($y{"value"})}) as tags
    FROM [//home/dict/ontodb/ver/daily/production/all_cards_final]
);


INSERT INTO [@ontoids_table] WITH TRUNCATE
SELECT
    cms.ContentGroupID as ContentGroupID,
    cms.Value as onto_id,
    ontodb.type_subtype as onto_type,
    ontodb.tags as onto_tags
FROM (
    SELECT *
    FROM [@cr_table]
    WHERE ResourceName == 'onto_id' and Value != "0")  as cms
LEFT JOIN $all_ontoids as ontodb
ON ontodb.onto_id == cms.Value
"""


active_content_groups_yql = """
pragma yt.Pool = "@pool";
pragma yt.PoolTrees = "physical";
pragma yt.TentativePoolTrees = "cloud";
PRAGMA yt.InferSchema = '1';

$output_stream = (
    SELECT
        ContentVersionID,
        SOME(Data) AS Data
    FROM [//home/video-hosting/base/OutputStream]
    WHERE Data like '%//%'
    GROUP BY ContentVersionID
);

$cvg = (
    SELECT
        ContentGroupID, cvg.ContentVersionID as ContentVersionID
    FROM [//home/video-hosting/base/ContentVersionGroup] as cvg
    INNER JOIN $output_stream as os using (ContentVersionID)
);

$cvg_filtered = (
    SELECT ContentGroupID, MAX(ContentVersionID)
    FROM $cvg
    GROUP BY ContentGroupID
);

INSERT INTO [@acg_table] WITH TRUNCATE
SELECT
    ContentGroup.ContentGroupID as ContentGroupID,
    SOME(ContentGroup.TMP_OvsServiceFlags) as TMP_OvsServiceFlags
FROM
    [@cg_table] as ContentGroup
JOIN
    [@cr_table] as ThumbnailResource
ON
    ContentGroup.ContentGroupID = ThumbnailResource.ContentGroupID
JOIN
    $cvg_filtered as ContentVersionGroup
ON
    ContentGroup.ContentGroupID = ContentVersionGroup.ContentGroupID
WHERE
    ContentGroup.Options not like '%deleted%'
    AND ThumbnailResource.ResourceName = 'thumbnail'
    AND ThumbnailResource.Value like '%//%'
GROUP BY
    ContentGroup.ContentGroupID
"""


def update_global_vars(job_root_):
    global job_root
    global current_table
    global full_table
    global full_work_table
    global full_stats_table
    global alt_table
    global concat_table
    global yatv_channels_table
    global ontoids_table
    global acg_table
    job_root = job_root_
    current_table = '{}/current'.format(job_root)
    full_table = '{}/full'.format(job_root)
    full_work_table = '{}/full_work'.format(job_root)
    full_stats_table = '{}/full_stats'.format(job_root)
    alt_table = '{}/alt'.format(job_root)
    concat_table = '{}/concat'.format(job_root)
    yatv_channels_table = '{}/yatv_channels'.format(job_root)
    ontoids_table = '{}/onto_ids_data'.format(job_root)
    acg_table = '{}/active_cgs'.format(job_root)


def form_dict(
    ContentGroupID, Name, ContentTypeID, ParentID, UUID, OptionsDeleted
):
    return {
        'ContentGroupID': ContentGroupID,
        'Name': Name,
        'ContentTypeID': ContentTypeID,
        'ParentID': ParentID,
        'UUID': UUID,
        DELETED: 'deleted' in json.loads(OptionsDeleted),
    }


def get_driver(cluster):
    try:
        return cluster.driver.yt_driver
    except AttributeError:
        return cluster.driver


def get_rc(cluster, table):
    return get_driver(cluster).client.get_attribute(
        table, 'row_count', 0
    )


final_schema = optionalize_schema(qt, {
    'ContentGroupID': qt.UInt64,
    'JoinKey': qt.String,
    'UUID': qt.String,
    'chain': qt.Json,
    'channel_type': qt.String,
    'computed_channel': qt.String,
    'computed_program': qt.String,
    'deleted': qt.Integer,
    'detailed_tags': qt.String,
    'duration': qt.Integer,
    'finish_time': qt.Integer,
    'heur_category': qt.String,
    'hr_path': qt.String,
    'hr_path_unified': qt.String,
    'page_id': qt.String,
    'parent_chain': qt.Json,
    'parent_channel_uuid': qt.String,
    'parent_hr_path': qt.String,
    'parent_path': qt.String,
    'path': qt.String,
    'path_unified': qt.String,
    'project_alias': qt.String,
    'TMP_OvsServiceFlags': qt.String,
    'onto_id': qt.String,
    'onto_type': qt.List[qt.String],
    'onto_tags': qt.List[qt.String],
    'start_time': qt.Integer,
})


class Rename(object):

    def __init__(self, step):
        self.step = step

    def __call__(self, records):
        for rec in records:
            result = rec.to_dict()
            step_label = 'step{}'.format(self.step)
            if step_label in result and result[step_label]:
                result['ContentGroupID'] = (
                    result[step_label]['ContentGroupID']
                )
                result['UUID'] = (
                    result[step_label]['UUID']
                )
            # else:
            #     result['ContentGroupID'] = 'dont_join'
            if 'ParentID' in result:
                result.pop('ParentID')
            yield Record(**result)


def rename(self, records):
    for rec in records:
        result = rec.to_dict()
        result.pop('ParentID')
        yield Record(**result)


class AddPath(object):

    def __init__(self, dct):
        self.dct = dct

    def __call__(self, records):
        dct = self.dct
        for rec in records:
            result = rec.to_dict()
            srtk = sorted(
                x for x in result if x.startswith('step') and result.get(x)
            )
            srtv = [str(result[k]['ContentTypeID']) for k in srtk]
            result['chain'] = [
                result.pop(x) for x in srtk
            ]
            result['path'] = ','.join(srtv)
            result['hr_path'] = process_path(result['path'], dct)
            yield Record(**result)


def process_path(path, dct):
    sp = path.split(',')
    sp1 = [dct[x] for x in sp]
    return ','.join(sp1)


def concat_path(*args):
    args = [x for x in args if x]
    return '+'.join(args)


def add_ids_and_hr_ids(rec, chain_field, id_field, hr_id_field):
    ids = rec[id_field].split(',')
    hr_ids = rec[hr_id_field].split(',')
    for e, x in enumerate(rec[chain_field]):
        x['content_type_id'] = ids[e]
        x['content_type_id_name'] = hr_ids[e]


def normalize_name(name):
    return name.strip()


special_type_to_prefix = {
    'yatv': 'Яндекс.',
    'special_project': 'Спецпроекты.',
    'youtube': 'Youtube.'
}


def search_for_channel(rec, chain_field, yatv):
    strong_name = None
    strong_cgid = None
    weak_name = None
    weak_cgid = None
    for element in rec[chain_field][::-1]:
        if element['content_type_id_name'] in {
            'channel', 'ntv-vod-library',
        } and not strong_name:
            strong_name = element['Name']
            strong_cgid = element['ContentGroupID']
            break
        elif element['content_type_id_name'] in {
            'type-folder', 'vod-broadcast'
        } and not weak_name:
            weak_name = element['Name']
            weak_cgid = element['ContentGroupID']
    name = strong_name or weak_name
    cgid = strong_cgid or weak_cgid
    if not name:
        return
    if cgid in yatv and not name.startswith(
        yatv[cgid]
    ):
        name = '{}{}'.format(
            special_type_to_prefix[yatv[cgid]], name
        )
    return normalize_name(name)


def search_for_program(rec, chain_field):
    for element in rec[chain_field][::-1]:
        if element['content_type_id_name'] in {
            'episode', 'zen-episode', 'vod-library',
            'ntv-vod-series', 'tv-series', 'ott-trailer', 'ott-movie',
            'ntv-vod-movie', 'yandex-market-library', 'high-light',
            'zen-namespace', 'vod-broadcast-episode'
        }:
            return normalize_name(element['Name'])
    return normalize_name(rec[chain_field][-1]['Name'])


def cr_reducer(groups):
    for key, recs in groups:
        result = key.to_dict()
        for rec in recs:
            if rec.ResourceName in {'start_time', 'finish_time', 'duration'}:
                try:
                    result[rec.ResourceName] = int(rec.Value)
                except (TypeError, AttributeError, ValueError):
                    continue
            elif rec.ResourceName in {
                'parent_channel_uuid', 'channel_type', 'project_alias',
                'page_id', 'detailed_tags'
            }:
                result[rec.ResourceName] = rec.Value
        if len(result) == 1:
            continue
        yield Record(**result)


class ChooseChannelAndProgramName(object):

    def __init__(self, yatv):
        self.yatv = yatv

    def __call__(self, records):
        def __search_for_channel(x, y):
            return search_for_channel(x, y, self.yatv)
        for rec in records:
            result = rec.to_dict()
            computed_channel = 'UNKNOWN'
            computed_program = 'UNKNOWN'
            has_parent_chain = result.get('parent_chain', {})
            if has_parent_chain:
                add_ids_and_hr_ids(
                    result, 'parent_chain', 'parent_path', 'parent_hr_path'
                )
            add_ids_and_hr_ids(result, 'chain', 'path', 'hr_path')
            if 'ott-' in result['hr_path_unified']:
                computed_channel = 'ott'
            elif 'ya-news' in result['hr_path_unified']:
                computed_channel = 'ya-news'
            elif 'zen-' in result['hr_path_unified']:
                computed_channel = 'zen'
            else:
                ch1 = ''
                ch2 = ''
                if has_parent_chain:
                    ch1 = __search_for_channel(result, 'parent_chain')
                ch2 = __search_for_channel(result, 'chain')
                if ch1:
                    computed_channel = ch1
                elif ch2:
                    computed_channel = ch2
            result['computed_channel'] = computed_channel
            pr = search_for_program(result, 'chain')
            if pr:
                computed_program = pr
            elif result['chain'] and result['chain'][-1].get('Name'):
                computed_channel = result['chain'][-1]['Name']
            result['computed_program'] = computed_program
            yield Record(**result)


def get_cluster(args):
    kwargs = {
        'token': os.environ['YT_TOKEN']
    }
    if args.pool:
        kwargs['pool'] = args.pool
    if args.no_yql:
        cluster = getattr(clusters, 'yt')
    else:
        cluster = getattr(clusters, 'yql')
        kwargs['yql_token'] = os.environ['YQL_TOKEN']
    proxy = os.environ['YT_PROXY'].split('.')[0].title()
    cluster = getattr(cluster, proxy)(
        **kwargs
    ).env(
        templates=dict(
            job_root=job_root,
            title=args.title
        )
    )
    return cluster


def tmp(s):
    return s + '_tmp'


def remove_tmp_(cluster, *tables):
    for table in tables:
        try:
            get_driver(cluster).remove(tmp(table))
            print('removed {}'.format(tmp(table)))
        except Exception as e:
            print('didn\'t remove {}: {}'.format(table, e))


def move_tmp_(cluster, *tables):
    for table in tables:
        try:
            get_driver(cluster).client.move(tmp(table), table, force=True)
            print('moved {} -> {}'.format(tmp(table), table))
        except Exception as e:
            print('didn\'t move {} -> {}: {}'.format(tmp(table), table, e))


def get_heur_category(path):
    for x in [
        'vod', 'ott', 'music-clip', 'kp-trailer',
        'yandex-market', 'yandex-disribution'
    ]:
        if x in path:
            return 'vod'
    return 'live_or_catchup'


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--debug', action='store_true')
    parser.add_argument('--nodeleted', action='store_true')
    parser.add_argument('--no_yql', action='store_true')
    parser.add_argument('--pool', default="pecheny")
    parser.add_argument('--title', default='Iron Branch')
    parser.add_argument('--job_root')
    args = parser.parse_args()
    yql_client = YqlClient(
        db=os.environ['YT_PROXY'].split('.')[0].lower(),
        token=os.environ['YQL_TOKEN']
    )
    if args.job_root:
        update_global_vars(args.job_root)
    start_time = datetime.datetime.now()

    cluster = get_cluster(args)

    def remove_tmp(*tables):
        return remove_tmp_(cluster, *tables)

    def move_tmp(*tables):
        return move_tmp_(cluster, *tables)

    fields = [
        form_dict, 'ContentGroupID', 'Name',
        'ContentTypeID', 'ParentID', 'UUID', 'Options'
    ]

    remove_tmp(
        current_table,
        full_work_table,
        full_work_table + '_after_step0'
    )

    query = ontoids_yql.replace(
        '@ontoids_table', ontoids_table
    ).replace(
        '@cr_table', content_resource_table
    ).replace(
        '@pool', args.pool
    )
    req = yql_client.query(query, title='Iron Branch | YQL')
    req.run()

    query = active_content_groups_yql.replace(
        '@acg_table', acg_table
    ).replace(
        '@cg_table', base_table
    ).replace(
        '@cr_table', content_resource_table
    ).replace(
        '@pool', args.pool
    )
    req = yql_client.query(query, title='Iron Branch | YQL')
    req.run()

    job = cluster.job()

    filters = [
        nf.equals('ParentID', 0)
    ]
    if args.nodeleted:
        filters.append(NODELETE_FILTER)

    s = job.table(base_table).filter(
        *filters
    ).project(
        'ContentGroupID', 'UUID',
        step0=ne.custom(*fields).add_hints(type=qt.Json)
    )

    s.put(tmp(current_table))
    s.put(tmp(full_work_table))
    s.put(tmp(full_work_table + '_after_step0'))

    job.run()

    move_tmp(
        current_table,
        full_work_table,
        full_work_table + '_after_step0'
    )

    step = 1
    rc = 1

    while rc and (True if args.debug else step <= 7):
        print('performing step {}'.format(step))

        remove_tmp(alt_table)

        job = cluster.job()

        current = job.table(current_table)

        step_label = 'step{}'.format(step)
        kwargs = {step_label: ne.custom(*fields).add_hints(type=qt.Json)}

        bt = job.table(base_table)

        join_kwargs = dict(
            by_left='ParentID',
            by_right='ContentGroupID',
            assume_unique_right=True
        )
        if step <= 2:
            join_kwargs['memory_limit'] = MEMORY_LIMIT
            join_kwargs['assume_small'] = True

        step_schema = dict(
            UUID=qt.Optional[qt.String],
            ContentGroupID=qt.Optional[qt.UInt64],
        )
        for step_ in range(step + 1):
            step_schema['step{}'.format(step_)] = qt.Optional[qt.Json]

        if args.nodeleted:
            bt = bt.filter(NODELETE_FILTER)
        bt = bt.project(
            'ParentID', **kwargs
        )

        if args.debug:
            bt = bt.put(
                '{}/{}_intermediate1'.format(job_root, step)
            )

        bt = bt.join(
            current, **join_kwargs
        )

        if args.debug:
            bt = bt.put(
                '{}/{}_intermediate2'.format(job_root, step)
            )

        bt = bt.map(
            with_hints(
                output_schema=step_schema
            )(Rename(step)), intensity='data', memory_limit=MEMORY_LIMIT
        ).put(
            tmp(alt_table)
        )

        job.run()

        move_tmp(alt_table)

        rc = get_rc(cluster, alt_table)
        if rc:
            print('concatting tables before next step')
            remove_tmp(
                full_work_table,
                full_work_table + '_after_{}'.format(step_label)
            )
            job = cluster.job()

            ci = job.concat(
                job.table(full_work_table),
                job.table(alt_table)
            )

            ci.put(
                tmp(full_work_table)
            )

            ci.put(
                tmp(full_work_table + '_after_{}'.format(step_label))
            )
            job.run()
            move_tmp(
                full_work_table,
                full_work_table + '_after_{}'.format(step_label)
            )
            get_driver(cluster).client.move(
                alt_table, current_table, force=True
            )
        else:
            print('nothing left, soon to be finished')
        step += 1

    recs = [
        rec.to_dict() for rec in get_driver(cluster).read(
            content_type_table
        )
    ]
    content_type_dict = {str(x['ContentTypeID']): x['Name'] for x in recs}

    # yql_run(channels_yql, token=os.environ['YQL_TOKEN'], title='Iron Branch')
    query = channels_yql.replace(
        "OUTPUT_TABLE", yatv_channels_table
    ).replace(
        "BASE_TABLE", base_table
    ).replace(
        "CONTENT_RESOURCE_TABLE", content_resource_table
    ).replace(
        '@pool', args.pool
    )
    req = yql_client.query(query, title='Iron Branch | YQL')
    req.run()
    req.wait_progress()

    yatv = {
        rec.ContentGroupID: rec.special_type
        for rec in get_driver(cluster).read(yatv_channels_table)
    }
    choose_channel_and_program_name = ChooseChannelAndProgramName(yatv)

    remove_tmp(
        full_table,
        full_stats_table
    )

    job = cluster.job()

    cfj_keys = [
        'ContentGroupID', 'parent_channel_uuid', 'channel_type',
        'project_alias', 'start_time', 'finish_time',
        'page_id', 'duration', 'detailed_tags'
    ]

    cr_for_join = job.table(
        content_resource_table
    ).groupby(
        'ContentGroupID'
    ).reduce(
        with_hints(
            output_schema=final_schema
        )(cr_reducer)
    ).project(
        *cfj_keys
    )

    ft = job.table(full_work_table).map(
        with_hints(output_schema=final_schema)(AddPath(content_type_dict))
    ).project(
        *list(
            set(final_schema.keys()) - set(cfj_keys[1:]) - {
                'onto_id', 'onto_tags', 'onto_type',
                'TMP_OvsServiceFlags'
            }
        )
    ).join(
        cr_for_join, by='ContentGroupID', type='left',
        memory_limit=MEMORY_LIMIT, assume_unique_right=True
    ).join(
        job.table(ontoids_table).project(
            'ContentGroupID', 'onto_id', 'onto_tags', 'onto_type'
        ), by='ContentGroupID', type='left',
        memory_limit=MEMORY_LIMIT  #, assume_unique_right=True
    ).join(
        job.table(acg_table).project(
            'ContentGroupID', 'TMP_OvsServiceFlags'
        ), by='ContentGroupID', type='left',
        memory_limit=MEMORY_LIMIT  #, assume_unique_right=True
    )

    ftpj = ft.project(
        parent_path='path',
        parent_hr_path='hr_path',
        parent_chain='chain',
        parent_channel_uuid='UUID'
    )

    ftbs = ft.project(
        *list(
            set(final_schema.keys()) - {
                'parent_path', 'parent_hr_path', 'parent_chain',
                'path_unified', 'hr_path_unified'
            }
        )
    ).join(
        ftpj, by='parent_channel_uuid', type='left',
        memory_limit=MEMORY_LIMIT
    ).project(
        ne.all(),
        path_unified=ne.custom(
            concat_path, 'path', 'parent_path'
        ).add_hints(
            type=qt.String
        ),
        hr_path_unified=ne.custom(
            concat_path, 'hr_path', 'parent_hr_path'
        ).add_hints(
            type=qt.String
        ),
    )

    ftbs.sort(
        'ContentGroupID'
    ).map(
        with_hints(
            output_schema=final_schema
        )(choose_channel_and_program_name)
    ).put(
        tmp(full_table)
    )

    stats_schema = copy.deepcopy(final_schema)
    stats_schema['count'] = qt.Integer

    ftbs.groupby(
        'hr_path_unified'
    ).aggregate(
        count=na.count()
    ).join(
        ftbs.unique('hr_path_unified'), by='hr_path_unified', type='inner',
        memory_limit=MEMORY_LIMIT, assume_small=True,
    ).map(
        with_hints(
            output_schema=stats_schema
        )(choose_channel_and_program_name)
    ).sort(
        'count', 'hr_path_unified'
    ).put(
        tmp(full_stats_table)
    )

    job.run()

    move_tmp(
        full_table, full_stats_table
    )

    remove_tmp(
        concat_table
    )
    print('concatting tables for join')

    job = cluster.job()

    to_concat = []

    ft = job.table(full_table).project(
        ne.all(exclude=['heur_category', 'deleted', 'JoinKey']),
        heur_category=ne.custom(
            get_heur_category,
            'hr_path_unified'
        ).add_hints(type=qt.String),
        deleted=ne.custom(
            lambda x: int(x[-1][DELETED]), 'chain'
        ).add_hints(type=qt.Integer)
    )

    to_concat.append(ft.project(
        ne.all(), JoinKey=ne.custom(str, 'ContentGroupID').add_hints(
            type=qt.String
        )
    ))
    to_concat.append(ft.project(
        ne.all(), JoinKey='UUID'
    ))

    schema_kwargs = {}
    if args.no_yql:
        schema_kwargs = {
            'schema': final_schema
        }

    job.concat(*to_concat).sort(
        'JoinKey'
    ).put(
        tmp(concat_table), **schema_kwargs
    )

    job.run()
    move_tmp(concat_table)
    end_time = datetime.datetime.now()

    print('total time: {} minutes'.format(
        (end_time - start_time).total_seconds() / 60
    ))

    if job_root != DEFAULT_JOB_ROOT:
        return
    for cl in ['arnold']:
        req = requests.post(
            'http://transfer-manager.yt.yandex.net/api/v1/tasks/',
            json={
                'source_cluster': 'hahn',
                'source_table': concat_table,
                'destination_cluster': cl,
                'destination_table': concat_table,
            },
            headers={'Authorization': 'OAuth {}'.format(
                os.environ['YT_TOKEN'])}
        )
        if req.status_code == 200:
            print(
                'Table transfer from hahn to {} started, '
                'you can look it up here:'
                'https://transfer-manager.yt.yandex-team.ru/task?id='
                '{}'.format(cl, req.content)
            )
        else:
            print(
                'Got error while trying to transfer, status code {}, '
                'content: {}'.format(req.status_code, req.content)
            )


if __name__ == "__main__":
    main()
