import itertools

import yt.wrapper as yt

from utils import mr_utils as mr
from utils import utils
from rtcconf import config

LARGE_CRYPTA_ID_LIMIT = 10000

def map_vertices_yamr_to_yt_format(rec):
    value = rec['value']
    crypta_id = mr.get_field_value('c', value)
    id_type = mr.get_field_value('id_type', value)
    sources = mr.get_field_value('s', value)
    component = mr.get_field_value('component', value)
    orig_crypta_id = mr.get_field_value('orig_c', value)
    cid_type = mr.get_field_value('t', value)

    out_rec = {'id': rec['key'], 'key': rec['key'], 'id_type': id_type,
               'crypta_id': crypta_id, 'cid_type': cid_type,
               'sources': sources}
    # for clustered
    if component:
        out_rec['component'] = component
    if orig_crypta_id:
        out_rec['orig_crypta_id'] = orig_crypta_id

    yield out_rec


def map_vertices_yt_to_yamr_format(rec):
    s = rec['sources']
    c = rec['crypta_id']
    t = rec['cid_type']
    id_type = rec['id_type']
    value = 's=%s\torig_s=%s\tc=%s\tt=%s\tid_type=%s' % (s, s, c, t, id_type)
    if id_type.startswith('yuid'):
        value += '\td=y'
    else:
        value += '\td=d'

    yield {'key': rec['key'], 'subkey': '1', 'value': value}


def yield_yuid_or_devid_rec(pair, idx, ids, pair_type, pair_source):
    yuid_or_devid = pair_type[idx]
    if yuid_or_devid == 'y':
        params = {'yuid': ids[idx], '@table_index': 1}
    else:  # d
        params = {'key': ids[idx], '@table_index': 2}

    rec = {'pair': pair, 'id_idx': idx, 'pair_source': pair_source}
    rec.update(params)
    return rec


def map_pairs_to_edges(rec):
    pair = rec['key']

    ids = pair.split('_')
    pair_type = rec['pair_type'].split('_')
    pair_source = rec['pair_source']

    # 2 types of recs: one for each id to join yuid_with_all
    yield yield_yuid_or_devid_rec(pair, 0, ids, pair_type, pair_source)
    yield yield_yuid_or_devid_rec(pair, 1, ids, pair_type, pair_source)

    # yield pair to merge id info back later
    # key will be used to join back to vertices
    rec['key'] = ids[0]  # key now represents left id instead op pair
    rec['pair'] = pair
    rec['id1'] = ids[0]
    rec['id2'] = ids[1]
    rec['@table_index'] = 0

    yield rec


def map_devid_yuid_dict(rec):
    rec['pair'] = rec['devid'] + '_' + rec['yuid']
    yield rec


def add_crypta_id_size(crypta_key, recs):
    first_10k = list(itertools.islice(recs, 0, LARGE_CRYPTA_ID_LIMIT))

    crypta_id_size = len(first_10k)  # count size up to LARGE_CRYPTA_ID_LIMIT only
    for r in itertools.chain(first_10k, recs):
        r['crypta_id_size'] = crypta_id_size
        yield r


def join_crypta_id_from_vertices_to_edges(id_key, recs):
    vertices, edges = mr.split_left_right(recs, oom_check=False)
    if len(vertices) != 1:
        err = dict(id_key)
        err['message'] = 'vertices problem'
        err['vertices_count'] = len(vertices)
        err['@table_index'] = 1
        yield err
    else:
        vertices_rec = vertices[0]

        for edge_rec in edges:
            edge_rec['crypta_id'] = vertices_rec['crypta_id']
            edge_rec['crypta_id_size'] = vertices_rec['crypta_id_size']
            edge_rec['@table_index'] = 0
            yield edge_rec


def join_yuid_all(yuid_key, recs):
    pair_id_recs, yuid_with_all = mr.split_left_right(recs, oom_check=False)
    if yuid_with_all and utils.is_true(yuid_with_all[0]['good']):
        yuid_all_rec = yuid_with_all[0]
        for pair_id_rec in pair_id_recs:
            pair_id_rec['ua_profile'] = yuid_all_rec.get('ua_profile')
            browser = yuid_all_rec.get('browser')
            browser_version = yuid_all_rec.get('browser_version') or ''
            pair_id_rec['browser_profile'] = browser + '|' + browser_version if browser else ''

            pair_id_rec['sex'] = yuid_all_rec.get('sex')
            pair_id_rec['main_region'] = yuid_all_rec.get('main_region')

            dates_col = pair_id_rec['pair_source'] + '_dates'
            if yuid_all_rec.get(dates_col):
                pair_id_rec['id_dates'] = yuid_all_rec[dates_col]

            yield pair_id_rec
    else:
        for rec in pair_id_recs:
            rec['@table_index'] = 1
            yield rec


def join_devid_ua_profile(devid_key, recs):
    pair_id_recs, dev_info = mr.split_left_right(recs, oom_check=False)
    if dev_info:
        for pair_id_rec in pair_id_recs:
            value = dev_info[0]['value']
            pair_id_rec['ua_profile'] = mr.get_field_value('ua_profile', value)
            yield pair_id_rec
    else:
        for rec in pair_id_recs:
            rec['@table_index'] = 1
            yield rec


def join_devid_yuid_dict(devid_yuid_key, recs):
    pair_recs, devid_yuid_recs = mr.split_left_right(recs, oom_check=False)
    for pair_rec in pair_recs:

        if pair_rec['pair_type'] == 'd_y' and devid_yuid_recs:
            dates_col = pair_rec['source_type'] + '_dates'
            pair_rec['devid_yuid_dates'] = devid_yuid_recs[0].get(dates_col)
            match_chain_col = pair_rec['source_type'] + '_match_chain'
            pair_rec['match_chain'] = devid_yuid_recs[0].get(match_chain_col)

        yield pair_rec


def info_to_edges(pair_key, recs):
    lrecs = list(recs)
    edges = [r for r in lrecs if r['@table_index'] == 0]
    ids_ua = [r for r in lrecs if r['@table_index'] != 0]
    for edge in edges:
        for id_ua in ids_ua:
            if id_ua['id_idx'] == 0:
                edge['id1_ua'] = id_ua['ua_profile']
                edge['id1_browser'] = id_ua.get('browser_profile', '')
                edge['id1_sex'] = id_ua.get('sex')
                edge['id1_dates'] = id_ua.get('id_dates')
                edge['id1_region'] = id_ua.get('main_region')
            else:
                edge['id2_ua'] = id_ua['ua_profile']
                edge['id2_browser'] = id_ua.get('browser_profile', '')
                edge['id2_sex'] = id_ua.get('sex')
                edge['id2_dates'] = id_ua.get('id_dates')
                edge['id2_dates'] = id_ua.get('id_dates')
                edge['id2_region'] = id_ua.get('main_region')
        yield edge


def enrich_vertices(key, recs):
    vertices_rec = None
    yuid_with_all_rec = None
    devid_info_rec = None

    for r in recs:
        if r['@table_index'] == 0:
            vertices_rec = r
        elif r['@table_index'] == 1:
            yuid_with_all_rec = r
        elif r['@table_index'] == 2:
            devid_info_rec = r

    if vertices_rec:
        if vertices_rec['id_type'] == 'deviceid':
            if devid_info_rec:
                vertices_rec['ua_profile'] = mr.get_field_value('ua_profile', devid_info_rec['value'])

        else:
            if yuid_with_all_rec:
                vertices_rec['ua_profile'] = yuid_with_all_rec.get('ua_profile')
                browser = yuid_with_all_rec.get('browser')
                if browser:
                    vertices_rec['browser'] = browser + '|' + yuid_with_all_rec.get('browser_version', '')
                else:
                    vertices_rec['browser'] = ''

                vertices_rec['sex'] = yuid_with_all_rec.get('sex')
                vertices_rec['main_region'] = yuid_with_all_rec.get('main_region')

                id_values = dict()
                for pt in config.YUID_PAIR_TYPES_EXACT:
                    dates_col = pt.id_type + '_dates'
                    if yuid_with_all_rec.get(dates_col):
                        id_values[pt.id_type] = yuid_with_all_rec[dates_col].keys()
                vertices_rec['id_values'] = id_values

        yield vertices_rec




def prettify(workdir, in_vertices_table, in_pairs_table, dict_f, devid_dict_f, out_vertices_table, out_edges_table):
    mr.mkdir(workdir)

    # dicts
    yuid_with_all = dict_f + 'yuid_with_all'  # all dict info for yuids
    yuid_with_all_by_key = dict_f + 'yuid_with_all_by_key'  # all dict info for yuids sorted by key
    dev_info = dict_f + 'dev_info'  # ua info for all devids
    devid_yuid_all = devid_dict_f + 'devid_yuid_all'  # dict info for perfect pairs

    # import locally
    utils.wait_all([
        yt.run_map_reduce(map_vertices_yamr_to_yt_format, add_crypta_id_size,
                          in_vertices_table,
                          out_vertices_table,
                          reduce_by='crypta_id', sync=False),
        yt.run_map(map_pairs_to_edges,
                   in_pairs_table,
                   [workdir + 'edges',
                    workdir + 'edges_yuid_id', workdir + 'edges_devid_id'],
                   sync=False),
        yt.run_map(map_devid_yuid_dict,
                   devid_yuid_all,
                   workdir + 'devid_yuid_all', sync=False)
    ])

    # for devid dict info is stored per pair, whether for yuid it's stored for yuid id
    # we join dict info to the left ids, right ids and edges itself separately
    utils.wait_all([
        # yt.run_sort(yuid_with_all, sort_by='yuid', sync=False), # assume sorted
        yt.run_sort(workdir + 'edges_yuid_id', sort_by='yuid', sync=False),

        # yt.run_sort(dev_info, sort_by='key', sync=False), # assume sorted
        yt.run_sort(workdir + 'edges_devid_id', sort_by='key', sync=False),

        # yt.run_sort(yuid_with_all_by_key, sort_by='key', sync=False), # assume sorted
        yt.run_sort(out_vertices_table, sort_by='key'),

        yt.run_sort(workdir + 'devid_yuid_all', sort_by='pair', sync=False),
        yt.run_sort(workdir + 'edges', sort_by='pair', sync=False)
    ])

    utils.wait_all([
        yt.run_reduce(join_yuid_all,
                      [workdir + 'edges_yuid_id', yuid_with_all],
                      [workdir + 'edges_yuid_id_ua', workdir + 'debug1'],
                      reduce_by='yuid', sync=False),
        yt.run_reduce(join_devid_ua_profile,
                      [workdir + 'edges_devid_id', dev_info],
                      [workdir + 'edges_devid_id_ua', workdir + 'debug2'],
                      reduce_by='key', sync=False),
        yt.run_reduce(join_devid_yuid_dict,
                      [workdir + 'edges', workdir + 'devid_yuid_all'],
                      workdir + 'edges',
                      reduce_by='pair', sync=False),
        yt.run_reduce(enrich_vertices,
                      [out_vertices_table, yuid_with_all_by_key, dev_info],
                      out_vertices_table,
                      reduce_by='key', sync=False)
    ])

    # put all ua back to edges
    mr.sort_all([
        workdir + 'edges',
        workdir + 'edges_yuid_id_ua', workdir + 'edges_devid_id_ua'
    ], sort_by=['pair', 'pair_source'])

    yt.run_reduce(info_to_edges,
                  [workdir + 'edges',
                   workdir + 'edges_yuid_id_ua', workdir + 'edges_devid_id_ua'],
                  workdir + 'edges', reduce_by=['pair', 'pair_source'])

    # Join vertices. Seems enough to join only one id, because both pair ids must belong to single crypta_id
    mr.sort_all([
        out_vertices_table, workdir + 'edges'
    ], sort_by='key')

    yt.run_reduce(join_crypta_id_from_vertices_to_edges,
                  [out_vertices_table, workdir + 'edges'],
                  [out_edges_table, workdir + 'debug4'],
                  reduce_by='key')

    yt.run_sort(out_edges_table, sort_by='key')

    # cleanup
    mr.drop(workdir + 'edges')
    mr.drop(workdir + 'devid_yuid_all')
    mr.drop(workdir + 'edges_yuid_id')
    mr.drop(workdir + 'edges_yuid_id_ua')
    mr.drop(workdir + 'edges_devid_id')
    mr.drop(workdir + 'edges_devid_id_ua')


if __name__ == '__main__':
    yt.config.set_proxy(config.MR_SERVER)



