from functools import partial

import yt.wrapper as yt

from utils import mr_utils as mr
from utils import utils
from rtcconf import config
from v2.soup import soup_config
from v2.soup import soup_edge_type
from v2.soup.soup_tables import SoupDumpTable, SoupStorageTable

new_to_old_id_types = {
    'email_md5': 'email_hash',
    'phone_md5': 'phone_hash',
    'vk_id': 'vk',
    'ok_id': 'ok',
    'avito_id': 'avito',
    'kp_id': 'kp_uid',
    'distr_ui': 'ui',
    'distr_r1': 'r1',
    'yandexuid': 'yuid',

}

old_to_new_id_types = {v: k for k, v in new_to_old_id_types.iteritems()}


def convert_id_types(rec):
    id1_type = old_to_new_id_types.get(rec['id1Type']) or rec['id1Type']
    id2_type = old_to_new_id_types.get(rec['id2Type']) or rec['id2Type']
    new_edge_type = soup_edge_type.get_edge_type(id1_type, id2_type, rec['sourceType'], rec['logSource'])

    yield SoupDumpTable.make_rec(rec['id1'], rec['id2'], new_edge_type, rec['dates'])


def map_device_id_join_key(rec, key_col):
    rec['join_key'] = rec[key_col].lower()
    yield rec


def reduce_replace_device_id(join_key, recs, device_id_col):
    device_id_rec = None
    for r in recs:
        if not device_id_rec:
            if r['@table_index'] == 0:
                device_id_rec = r
            else:
                return

        else:
            if r['@table_index'] == 0:
                pass
                # raise Exception('two dev info recs')
            else:
                r[device_id_col] = device_id_rec['id']
                r['@table_index'] = 0
                del r['join_key']
                yield r


def migration_2017_11_14():
    workdir = '//home/crypta/team/artembelov/soup_migration/'
    mr.mkdir(workdir)
    with yt.Transaction() as tr:

        ops = []

        for et in soup_config.ALL_EDGES:

            if not et.supply_type == soup_edge_type.SupplyType.DAILY:
                continue

            id1_old = new_to_old_id_types.get(et.id1_type)
            id2_old = new_to_old_id_types.get(et.id2_type)

            if id1_old or id2_old:
                id1_old = id1_old or et.id1_type
                id2_old = id2_old or et.id2_type

                old_table = soup_config.SOUP_DIR + '%s_%s_%s_%s' % (id1_old, id2_old, et.source, et.log_source)
                new_table = workdir + '%s_%s_%s_%s' % (et.id1_type, et.id2_type, et.source, et.log_source)

                if old_table in tables:  # exists
                    print old_table
                    print new_table

                    mr.create_table_with_schema(new_table, SoupDumpTable.schema, tr, strict=True)
                    ops.append(yt.run_map(
                        convert_id_types,
                        old_table,
                        new_table,
                        job_count=mr.calculate_optimized_mr_partition_count(old_table),
                        sync=False
                    ))

                    print

        utils.wait_all(ops)
    devid_workdir = workdir + 'device_id/'
    devid_workdir_final = workdir + 'device_id/final/'
    mr.mkdir(devid_workdir)
    mr.mkdir(devid_workdir_final)
    yt.run_map(partial(map_device_id_join_key, key_col='id'),
               '//home/crypta/production/ids_storage/device_id/app_metrica_month',
               devid_workdir + 'app_metrica_month')
    yt.run_sort(devid_workdir + 'app_metrica_month', sort_by='join_key')
    ops = []
    for et in soup_config.ALL_EDGES:

        if not et.supply_type == soup_edge_type.SupplyType.DAILY:
            continue

        if et.id1_type == 'device_id' or et.id2_type == 'device_id':
            if et.id1_type == 'device_id':
                key_col = 'id1'
            else:
                key_col = 'id2'

            new_table = workdir + et.name()
            if not yt.exists(new_table):
                new_table = soup_config.SOUP_DIR + et.name()

            print new_table

            with yt.Transaction() as tr:
                yt.run_map(partial(map_device_id_join_key, key_col=key_col),
                           new_table,
                           devid_workdir + et.name())
                yt.run_sort(devid_workdir + et.name(), sort_by='join_key')

                mr.create_table_with_schema(devid_workdir_final + et.name(), SoupDumpTable.schema, tr, strict=True)
                yt.run_reduce(
                    partial(reduce_replace_device_id, device_id_col=key_col),
                    [
                        devid_workdir + 'app_metrica_month',
                        devid_workdir + et.name()
                    ],
                    devid_workdir_final + et.name(),
                    reduce_by='join_key')
    for t in yt.list(workdir[:-1]):
        print workdir + t
        print soup_config.SOUP_DIR + t
        yt.copy(workdir + t, soup_config.SOUP_DIR + t)
        print


        # for t in yt.list(devid_workdir_final[:-1]):
        #     print devid_workdir_final + t
        #     print soup_config.SOUP_DIR + t
        #     print


def map_soup_rec(rec):
    edge_type = soup_edge_type.get_edge_type(rec['id1Type'], rec['id2Type'], rec['sourceType'], rec['logSource'])
    yield SoupStorageTable.make_rec(rec['id1'], rec['id2'], edge_type, rec['dates'])


if __name__ == '__main__':
    yt.config.set_proxy(config.MR_SERVER)
    yt.config["tabular_data_format"] = yt.YsonFormat(process_table_index=True)


    tmp_dir = '//home/crypta/testing/state/graph/v2/soup/migration/'
    mr.mkdir(tmp_dir)

    with yt.Transaction() as tr:
        ops = []
        tables = yt.list('//home/crypta/testing/state/graph/v2/soup', absolute=True)
        for t in tables:
            node_type = yt.get_attribute(t, 'type')
            if node_type == 'table':
                dst_t = tmp_dir + t[t.rfind("/") + 1:]
                print t, dst_t

                # mr.create_table_with_schema(dst_t, SoupStorageTable("", "").schema, tr, strict=True)
                # op = yt.run_map(map_soup_rec, t, dst_t, sync=False)
                # ops.append(op)

                yt.copy(dst_t, t, force=True)

        utils.wait_all(ops)


