import logging
from yt.wrapper import ypath_join, YtHttpResponseError


logger = logging.getLogger(__name__)


TABLES_TO_SAMPLE = {
    'home/yabs/dict/replica/OrderInfo': {
        'join_key': ['OrderID'],
    },
    'home/yabs/dict/replica/MysqlBannerDontUse': {
        'join_key': ['BannerID'],
    },
    'home/yabs/stat/BannerPhraseStatInterval': {
        'join_key': ['BannerID'],
    },
    'home/yabs/stat/PageBannerStat': {
        'join_key': ['BannerID'],
    },
    'home/yabs-cs/ads_quality/BannerVectorsAdv': {
        'join_key': ['BannerID'],
    },
    'home/yabs/stat/OrderBannerPhraseModel': {
        'join_key': ['BannerID'],
    },
    'home/yabs/stat/BannerPhraseStatControl': {
        'join_key': ['BannerID'],
    },
    'home/yabs-cs/key_1/v2/broad_match_v1/BroadMatch': {
        'join_key': ['BannerID'],
    },
    'home/bs/broad_match/v7/BroadMatchWithPhraseTable': {
        'join_key': ['BannerID'],
    },
    'home/yabs-cs/key_1/v2/advmachine_export/Banners': {
        'join_key': ['BannerID'],
    },
}


class SamplingTableConfig(object):
    def __init__(self, relative_path, event_join_key, sort_key, input_prefix, output_prefix, need_mount):
        # input
        self.relative_input_path = relative_path
        self.absolute_input_path = '/'.join([input_prefix, self.relative_input_path])

        absolute_output_path = '/'.join([output_prefix, self.relative_input_path])

        # intermediate
        self.intermediate_absolute_output_path = absolute_output_path + '.sampled'
        self.intermediate_merged_absolute_output_path = absolute_output_path + '.merged'

        # output
        self.absolute_output_path = absolute_output_path

        # schema and stuff
        self.event_join_key = event_join_key
        self.sort_key = sort_key

        # table type
        self.is_dynamic = False
        self.need_mount = need_mount

    def check_original_table_type(self, yt_client):
        is_dynamic = yt_client.get(ypath_join(self.absolute_input_path, '@dynamic'))
        logger.debug('dynamic attribute is %s %s', type(is_dynamic), is_dynamic)
        if is_dynamic:
            self.is_dynamic = True


def create_dynamic_table_empty_copy(source, target, yt_client):
    table_schema = yt_client.get(ypath_join(source, '@schema'))
    logger.debug('Got schema: %s', table_schema)

    if yt_client.exists(target):
        logger.warning('Target %s already exists', target)
        return False

    yt_client.create('table', target, attributes={'schema': table_schema, 'unique_keys': True}, recursive=True)
    return True


def create_sampling_tables_config(input_prefix, output_prefix, yt_client, tables=None, need_mount_tables={}):
    tables = tables or TABLES_TO_SAMPLE
    table_configs = []
    for path, config in tables.items():
        if not yt_client.exists(ypath_join(input_prefix, path)):
            logger.warning('Table %s is missing in input-archive', path)
            continue
        schema = yt_client.get(ypath_join(input_prefix, path, '@schema'))
        sort_key = []
        for f in schema:
            if f.get('sort_order'):
                sort_key.append(f['name'])

        table_config = SamplingTableConfig(
            path,
            config['join_key'],
            sort_key,
            input_prefix,
            output_prefix,
            need_mount=path in need_mount_tables,
        )
        table_config.check_original_table_type(yt_client)
        table_configs.append(table_config)

    return table_configs


def check_sampled_intermediate_tables_existence(table_configs, yt_client):
    missing_sampled_table_configs = []
    for table_config in table_configs:
        if not yt_client.exists(table_config.intermediate_absolute_output_path):
            logger.debug('Table %s does not exist', table_config.intermediate_absolute_output_path)
            missing_sampled_table_configs.append(table_config)
        else:
            logger.debug('Table %s already exists', table_config.intermediate_absolute_output_path)
    return missing_sampled_table_configs


def check_merged_intermediate_tables_existence(table_configs, yt_client):
    missing_sampled_table_configs = []
    for table_config in table_configs:
        if not yt_client.exists(table_config.intermediate_merged_absolute_output_path):
            logger.debug('Table %s does not exist', table_config.intermediate_merged_absolute_output_path)
            missing_sampled_table_configs.append(table_config)
        elif table_config.is_dynamic and not yt_client.get(table_config.intermediate_merged_absolute_output_path + '/@schema').attributes['strict']:
            logger.debug('Table %s exist, but does not have strict schema, so it needs merging', table_config.intermediate_merged_absolute_output_path)
            missing_sampled_table_configs.append(table_config)
        else:
            logger.debug('Table %s already exists', table_config.intermediate_merged_absolute_output_path)
    return missing_sampled_table_configs


def get_target_path(source_path, source_prefix, destination):
    if source_path.startswith(source_prefix):
        return ypath_join(destination, source_path[len(source_prefix):].lstrip('/'))
    logger.warning('Source path \'%s\' does not start with source prefix \'%s\', will preserve full path')
    return ypath_join(destination, source_path.lstrip('/'))


def clone_archive_with_symlinks(source, source_prefix, destination_prefix, yt_client):
    destination_path = get_target_path(source, source_prefix, destination_prefix)
    source_type = yt_client.get(ypath_join(source.rstrip('/') + '&', '@type'))

    if source_type != 'map_node' and yt_client.exists(destination_path):
        logger.debug('%s %s already exists', source_type, destination_path)
        return destination_path

    if source_type == 'link':
        source_symlink_target_path = yt_client.get(ypath_join(source.rstrip('/'), '@path'))
        logger.debug('Cloning target path %s of a symlink %s', source_symlink_target_path, source)
        destination_symlink_target_path = clone_archive_with_symlinks(source_symlink_target_path, source_prefix, destination_prefix, yt_client)
        logger.debug('Linking %s to %s', destination_symlink_target_path, destination_path)
        yt_client.link(destination_symlink_target_path, destination_path, recursive=True)
    elif source_type == 'map_node':
        for leaf in yt_client.list(source):
            clone_archive_with_symlinks(ypath_join(source, leaf), source_prefix, destination_prefix, yt_client)
    else:
        logger.debug('Linking %s to %s', source, destination_path)
        yt_client.link(source, destination_path, recursive=True, ignore_existing=True)

    return destination_path


def switch_symlinks(table_configs, yt_client):
    errors = []
    for table_config in table_configs:
        if yt_client.exists(table_config.absolute_input_path) and yt_client.get(ypath_join(table_config.absolute_input_path + '&', '@type')) != 'link':
            logger.error('Node %s has type %s, should be \'link\'', table_config.absolute_input_path)
            errors.append(table_config.absolute_input_path)
            continue
        logger.info('Overwriting link %s -> %s', table_config.absolute_input_path, table_config.absolute_output_path)
        yt_client.link(table_config.absolute_output_path, table_config.absolute_input_path, force=True)

    if errors:
        raise RuntimeError('Tables %s could not be switched', errors)


def get_tables_for_sampling(yt_client, archive_root, min_rows_count, source_tables, keys, black_list_tables, need_mount_tables):
    tables = {}
    for table in source_tables:
        table = table.lstrip('//')

        need_mount = table in need_mount_tables

        if any([table in tables, table in black_list_tables, table.endswith('/')]):
            continue

        input_archive_path = ypath_join(archive_root, 'yt_tables')
        full_table_name = ypath_join(input_archive_path, table)

        try:
            rows_count = yt_client.get(full_table_name + '/@chunk_row_count')
        except YtHttpResponseError as e:
            logger.warn("Exception when getting @chunk_row_count attribute of %s: %s", full_table_name, e.message)
            continue

        if rows_count > min_rows_count:
            key = find_key_for_sampling(full_table_name, keys, yt_client)
            if key:
                link_table = yt_client.get(full_table_name + '/@path')
                if link_table and link_table != full_table_name:
                    table = link_table.replace(input_archive_path + '/', '')
                logger.info('Table %s will be sampled by key %s', table, key)
                tables[table] = {"join_key": [key]}
                if need_mount:
                    need_mount_tables.add(table)
            else:
                logger.info("Couldn't find a key for table %s", table)
    return tables


def find_key_for_sampling(table, keys, yt_client):
    key_columns = yt_client.get(table + '/@key_columns')
    if key_columns:
        for key in keys:
            if key in key_columns:
                return key
    logger.info("Try to find key in schema for table %s", table)
    schema = yt_client.get(table + '/@schema')
    schema_keys = [item['name'] for item in schema]
    for key in keys:
        if key in schema_keys:
            return key
    return


def link_resulting_tables(table_configs, yt_client):
    for table_config in table_configs:
        if not yt_client.exists(table_config.intermediate_merged_absolute_output_path):
            logger.error('Path %s does not exist, sampling or merge probably failed', table_config.intermediate_merged_absolute_output_path)
            continue
        if yt_client.exists(table_config.absolute_output_path):
            logger.debug('Path %s already exists', table_config.absolute_output_path)
            continue
        link_target_path = table_config.intermediate_merged_absolute_output_path \
            if yt_client.get(table_config.intermediate_merged_absolute_output_path + '&/@type') == 'table' \
            else yt_client.get(table_config.intermediate_merged_absolute_output_path + '&/@path')
        logger.info('Will link %s to %s', link_target_path, table_config.absolute_output_path)
        yt_client.link(link_target_path, table_config.absolute_output_path, force=True)
