import json
import logging
from itertools import tee, izip_longest


logger = logging.getLogger(__name__)
DIFF_RPS_SORTED_SCHEMA = [
    {
        'name': 'role',
        'type_v3': {
            'type_name': 'string',
        },
        'sort_order': 'ascending',
    },
    {
        'name': 'meta_mode',
        'type_v3': {
            'type_name': 'string',
        },
        'sort_order': 'ascending',
    },
    {
        'name': 'sampling_strategy',
        'type_v3': {
            'type_name': 'string',
        },
        'sort_order': 'ascending',
    },
    {
        'name': 'shard',
        'type_v3': {
            'type_name': 'optional',
            'item': 'uint8',
        },
        'sort_order': 'ascending',
    },
    {
        'name': 'handlers',
        'type_v3': {
            'type_name': 'list',
            'item': 'string',
        },
        'sort_order': 'ascending',
    },
    {
        'name': 'baseline_revision',
        'type_v3': {
            'type_name': 'uint32',
        },
        'sort_order': 'ascending',
    },
    {
        'name': 'test_revision',
        'type_v3': {
            'type_name': 'uint32',
        },
        'sort_order': 'ascending',
    },
    {
        'name': 'baseline_rps',
        'type_v3': {
            'type_name': 'float',
        },
    },
    {
        'name': 'test_rps',
        'type_v3': {
            'type_name': 'float',
        },
    },
    {
        'name': 'ammo_resource',
        'type_v3': {
            'type_name': 'optional',
            'item': 'int64',
        },
    },
]
DIFF_RPS_UNSORTED_SCHEMA = [
    {
        'name': f['name'],
        'type_v3': f['type_v3'],
    } for f in DIFF_RPS_SORTED_SCHEMA
]


def write_data(yt_client, table_path, data, schema=None):
    if schema is None:
        schema = DIFF_RPS_UNSORTED_SCHEMA

    if not yt_client.exists(table_path):
        logger.debug('Will create %s', table_path)
        yt_client.create('table', table_path, attributes={'schema': schema}, recursive=True)

    schema = yt_client.get_attribute(table_path, 'schema')
    filtered_data = []
    for datum in data:
        filtered_data.append({k['name']: datum.get(k['name']) for k in schema})
    logger.debug('Will write %d rows into %s:\n%s', len(filtered_data), table_path, json.dumps(filtered_data, indent=2))
    yt_client.write_table(table_path, filtered_data)


def get_release_intervals(scope_base_revisions):
    sorted_release_numbers = sorted(scope_base_revisions.keys())
    sorted_base_revisions = [scope_base_revisions[r] for r in sorted_release_numbers]
    a, b = tee(reversed(sorted_base_revisions))
    next(b, None)
    intervals = list(reversed([(_b, _a) for _a, _b in izip_longest(a, b, fillvalue=None)]))
    release_intervals = {
        sorted_release_numbers[idx]: intervals[idx]
        for idx in range(len(sorted_release_numbers))
    }
    logger.info('Got release intervals: %s', release_intervals)
    return release_intervals


def merge_data(yt_client, trunk_data, trunk_table_path, release_table_prefix, scope_base_revisions, min_last_processed_revision):
    from yt.wrapper import ypath_join
    release_intervals = get_release_intervals(scope_base_revisions)
    release_data = {}
    new_trunk_data = []

    existing_tables = list(yt_client.list(release_table_prefix))

    for r in trunk_data:
        for scope_number, (begin_revision, end_revision) in release_intervals.items():
            if str(scope_number) in existing_tables:
                continue
            if end_revision > min_last_processed_revision:
                continue
            if (end_revision is None or r['test_revision'] <= end_revision) and r['baseline_revision'] >= begin_revision:
                release_data.setdefault(str(scope_number), []).append(r)
                break
        else:
            new_trunk_data.append(r)

    for release_number, _release_data in release_data.items():
        if _release_data:
            write_data(
                yt_client,
                ypath_join(release_table_prefix, release_number),
                sorted(_release_data, key=lambda x: tuple(x[k['name']] for k in DIFF_RPS_SORTED_SCHEMA if 'sort_order' in k)),
                schema=DIFF_RPS_SORTED_SCHEMA,
            )

    new_trunk_table_path = trunk_table_path + '.new'
    write_data(yt_client, new_trunk_table_path, new_trunk_data, schema=DIFF_RPS_UNSORTED_SCHEMA)
    yt_client.move(new_trunk_table_path, trunk_table_path, force=True)
