# -*- coding: utf-8 -*-
from yt.wrapper import ypath_join, ypath_split
from datacloud.dev_utils.time.utils import assert_date_str
from datacloud.dev_utils.yt.yt_utils import get_yt_client, create_folders
from datacloud.launcher.lib.routines.save_crypta_matches import (
    DEFAULT_ID_MATCHES_PATH, DEFAULT_ID_MATCHES_SCHEMA
)
from datacloud.dev_utils.logging.logger import get_basic_logger

logger = get_basic_logger(__name__)

ID_VALUE_BLENDS_PATH = '//home/x-products/production/crypta_v2/id_value_blends'


def blend_reducer(_, recs):
    for rec in recs:
        yield rec
        break


def run_blend_crypta_matches(task):
    date_str = task.data['date_str']
    assert_date_str(date_str)
    yt_client = get_yt_client()

    if not yt_client.exists(ID_VALUE_BLENDS_PATH):
        create_folders(ID_VALUE_BLENDS_PATH, yt_client=yt_client)

    output_table = yt_client.TablePath(
        ypath_join(ID_VALUE_BLENDS_PATH, date_str),
        attributes={
            'schema': DEFAULT_ID_MATCHES_SCHEMA,
            'compression_codec': 'brotli_6',
            'erasure_codec': 'lrc_12_2_2',
            'optimize_for': 'scan',
        }
    )

    blends = yt_client.list(ID_VALUE_BLENDS_PATH, absolute=True)
    id_value_match_tables = sorted(yt_client.list(DEFAULT_ID_MATCHES_PATH, absolute=True))
    if len(blends) == 0:
        logger.info('Initializing first id value blend')
        input_tables = id_value_match_tables[::-1]
    else:
        input_tables = [id_value_match_tables[-1], blends[-1]]

    if len(input_tables) > 1:
        input_tables_title = '{} ... {}'.format(
            ypath_split(input_tables[0])[-1],
            ypath_split(input_tables[-1])[-1]
        )
    else:
        input_tables_title = ', '.join(
            ypath_split(it)[-1] for it in input_tables
        )

    with yt_client.Transaction():
        yt_client.run_reduce(
            blend_reducer,
            input_tables,
            output_table,
            reduce_by='id_value',
            spec={'title': 'Blend id_value matches {}'.format(input_tables_title)}
        )
        yt_client.run_sort(
            output_table,
            sort_by='id_value',
            spec={'title': 'Blend id_value matches {} / sort after'.format(input_tables_title)}
        )

    return [task.make_done()]
