from yt.wrapper import ypath_join, TablePath
from datacloud.config.yt import MODELS_FOLDER, UNRELIABLE_TMP_FOLDER, EXPORT_FOLDER
from datacloud.dev_utils.yt import yt_utils, take_part
from datacloud.dev_utils.time import utils as time_utils
from datacloud.dev_utils.logging.logger import get_basic_logger
from datacloud.audience.lib.config_table import (
    AudienceConfigTable, AUDIENCE_CONFIG_TABLE_PATH)


logger = get_basic_logger(__name__)
TMP = ypath_join(UNRELIABLE_TMP_FOLDER, 'audience')
AUDIENCE_FOLDER = ypath_join(EXPORT_FOLDER, 'audience')
MIN_AUDIENCE_TABLE_SIZE = 1000000


def detect_ready(date_time, days=None):
    yt_client = yt_utils.get_yt_client()
    config_table = AudienceConfigTable(yt_client, AUDIENCE_CONFIG_TABLE_PATH)
    for audience in config_table.list_active():
        if 'is_export' not in audience['additional']:
            continue
        audience_name = audience['audience_name']
        partner_id, score_name = audience['partner_id'], audience['score_name']
        folder = yt_utils.ypath_join(MODELS_FOLDER, partner_id, score_name)
        last_table = yt_utils.get_last_table(folder, yt_client)
        date_str = last_table.split('/')[-1]
        time_utils.assert_date_str(date_str)
        key = _key(audience_name, date_str)
        yield key, {'audience_name': audience_name, 'date_str': date_str}


def run(task):
    yt_client = yt_utils.get_yt_client()
    audience_name = task.data['audience_name']
    date_str = task.data['date_str']
    time_utils.assert_date_str(date_str)

    audience = AudienceConfigTable(yt_client, AUDIENCE_CONFIG_TABLE_PATH).get_audience(
        audience_name)
    partner_id, score_name = audience['partner_id'], audience['score_name']
    export_folder_name = audience['additional'].get('export_folder_name')
    assert export_folder_name, 'export_folder_name is expected for export segments'

    interval_start = audience['interval_start']
    interval_end = audience['interval_end']
    interval = take_part.Interval(interval_start, interval_end)

    result_folder = ypath_join(AUDIENCE_FOLDER, export_folder_name)
    input_table = yt_utils.ypath_join(MODELS_FOLDER, partner_id, score_name, date_str)
    score_table = _define_score_table(
        ypath_join(TMP, '{}-{}-{}'.format(partner_id, score_name, date_str)))
    audience_table = ypath_join(result_folder, audience_name)

    with yt_client.Transaction():
        yt_utils.create_folders([TMP, result_folder], yt_client)
        if not yt_utils.check_table_exists(score_table, yt_client):
            _prepare_score_table(yt_client, input_table, score_table)
        if not yt_utils.is_sorted_by(yt_client, score_table, ['score']):
            raise Exception('Table must be sorted by cid')
        _extract_interval(yt_client, score_table, audience_table, interval)
        assert yt_client.row_count(audience_table) > MIN_AUDIENCE_TABLE_SIZE, 'Audience table too small, need check'
    return [task.make_done()]


def _prepare_score_table(yt_client, input_table, score_table):
    yt_client.run_sort(
        TablePath(input_table, columns=['cid', 'score']),
        score_table,
        sort_by='score',
    )


def _extract_interval(yt_client, input_table, output_table, interval):
    output_table = _define_score_table(output_table)
    take_part.take_part(yt_client, input_table, output_table, interval, 'score')


def _key(audience_name, date_str):
    return '{}#{}'.format(audience_name, date_str)


def _define_score_table(path):
    return TablePath(
        path,
        schema=[
            {'type': 'string', 'name': 'cid'},
            {'type': 'double', 'name': 'score'},
        ])
