from utils import mr_utils as mr
import ctypes
import re
import yt.wrapper as yt
import crypta_id_generator


def reduce_add_cid(key, recs):
    lrecs = list(recs)
    vs = [rec for rec in lrecs if rec['subkey'] == 'a']
    d = ''

    if not vs:
        return

    source = set()
    for v in vs:
        d = mr.get_field_value('d', v['value'])
        source |= set(mr.get_field_value('s', v['value']).split(','))

    # Yandexuid
    if d == 'y':
        yuid = key['key']
        cid = crypta_id_generator.yuid_to_crypta_id(yuid)
        cid_type = 'g'
    # DeviceId, this cid will be replaced by cid of connected yuid during graph processing
    elif d == 'd':
        cid = '9999999999999999999'
        cid_type = 'g'
    else:
        return

    yield {'key': key['key'],
           'subkey': '1',
           'value': 's=' + ','.join(source) + '\tc=' + cid + '\tt=' + cid_type + '\td=' + d
           }


def reduce_old_cids(yuid, recs):
    vertices, is_cid = mr.split_left_right(recs, oom_check=False)

    oldcid = ''
    if is_cid:
        is_cid_value = is_cid[0]['value']
        if is_cid_value.startswith('-'):
            is_cid_value = str(ctypes.c_uint64(int(is_cid_value)).value)
        oldcid = is_cid_value

    for r in vertices:
        fakecid = r['crypta_id']
        if not oldcid:
            oldcid = fakecid

        yield {'key': yuid['key'], 'fakecid': fakecid, 'oldcid': oldcid}


def map_cid_pairs(rec):
    if rec['oldcid']:
        yield dict(cid_pair=(rec['fakecid'] + '_' + rec['oldcid']))


def count_cid_intersection_sizes_and_ts(cid_pair_key, recs):
    count = sum([1 for _ in recs])
    fakecid = cid_pair_key['fakecid']
    oldcid = cid_pair_key['oldcid']

    try:
        ts = int(oldcid[-10:])
    except:
        ts = 9999999999

    yield dict(
        fakecid=fakecid,
        oldcid=oldcid,
        intersect_size=-count,
        oldcid_ts=ts)


def reduce_find_best_oldcids(fakecid, recs):
    yield next(recs)  # proper comes first because of sorting
    return


def reduce_fix_duplicate_oldcid(oldc, recs):
    fakecid = None
    oldcid = None
    for r in recs:
        if fakecid is None:  # first row with largest intersection
            fakecid = r['fakecid']
            oldcid = r['oldcid']

        if fakecid == r['fakecid']:
            r['newcid'] = oldcid
        else:
            r['newcid'] = 'generate'

        yield r


def mk_uniq_cid_reducer(reduce_field, collect_field):
    def reducer(key, recs):
        cids = set()
        for r in recs:
            cids.add(r[collect_field])

        if len(cids) != 1:
            for c in cids:
                yield {reduce_field: key[reduce_field], collect_field: c}
    return reducer


def join_reassigned_cid_back(key, recs):
    vertices_recs, reassinged_recs = mr.split_left_right(recs, oom_check=False)
    vertices_rec = vertices_recs[0]
    reassinged_rec = reassinged_recs[0]

    if vertices_rec['crypta_id'] != reassinged_rec['fakecid']:
        raise Exception('%s != %s, crypta id from nowhere!')

    if reassinged_rec['newcid'] != vertices_rec['crypta_id']:  # crypta id changed comparing to previous

        if reassinged_rec['oldcid'] == reassinged_rec['newcid']:
            vertices_rec['origin'] = 'reassign_is'  # reassigned with something we took from is
            vertices_rec['cid_type'] = 'i'
        else:
            vertices_rec['origin'] = 'reassign_gen'  # reassigned with something we generated offline

        if 'crypta_id_history' not in vertices_rec:
            vertices_rec['crypta_id_history'] = dict()
        vertices_rec['crypta_id_history']['before_is_reassign'] = vertices_rec['crypta_id']

        vertices_rec['crypta_id'] = reassinged_rec['newcid']

    yield vertices_rec


def map_reassign_cids_join_order(rec):
    if 'newcid' in rec:
        rec['jord'] = '1-newcid'
    else:
        rec['jord'] = '2-vertices'
    rec['@table_index'] = 0
    yield rec


def reduce_reassign_cids(fakecid, recs):
    newcid = None
    for r in recs:
        if r['jord'] == '1-newcid':
            newcid = r['newcid']
        else:
            if newcid is not None:
                r['newcid'] = newcid
                yield r
            else:
                raise Exception("This can't happen: expected to find some newcid for fakecid = " + fakecid['fakecid'] +
                                ', found none.')

    # lrecs = list(recs)
    # newcid = [r for r in lrecs if 'newcid' in r]
    # vertices = [r for r in lrecs if 'newcid' not in r]
    #
    # if not newcid:
    #     raise Exception('Expected newcid')
    #
    # for v in vertices:
    #     v['newcid'] = newcid[0]['newcid']
    #     yield v


def mk_cid_gen_reducer(date):
    cid_base_str = '%02d%02d%02d0000000000000' % (date.year - 2000, date.month, date.day)
    cid_base = int(cid_base_str)

    def reducer(key, recs):
        counter = cid_base
        for r in recs:
            if r['newcid'] == 'generate':
                r['newcid'] = str(counter)
                counter += 1
            yield r

    return reducer


def identity_map(rec):
    yield rec


def cc_mr_iter(id_from, edges):
    """
        This is one iteration of CC-MR algorithm (that finds Connected Components on Map-Reduce).
    """
    id_from = id_from['key']
    is_loc_max = False
    first_edge = next(edges)
    if id_from < first_edge['value']:
        is_loc_max = True
        yield {'key': id_from, 'value': first_edge['value'], '@table_index': 0}
    
    last_edge = first_edge
    
    for edge in edges:
        if last_edge['value'] == edge['value']:
            continue
        if is_loc_max:
            yield {'key': id_from, 'value': edge['value'], '@table_index': 0}
        else:
            yield {'key': first_edge['value'], 'value': edge['value'], '@table_index': 0}
            yield {'key': edge['value'], 'value': first_edge['value'], '@table_index': 0}
            yield {'key': id_from, '@table_index': 1}

        last_edge = edge

    if id_from < last_edge['value'] and not is_loc_max:
        yield {'key': id_from, 'value': first_edge['value'], '@table_index': 0}


def cc_mr_remove_backwards_edges(rec):
    if rec['key'] < rec['value']:
        yield rec


def reduce_add_component_center(key, edges):
    yield {'key': key['key'], 'value': key['key']}
    for edge in edges:
        yield edge


def map_fake_cid_ts(edge):
    try:
        # TODO: hackish, it's better to just put id types into the table edges, or reduce id types from vertices
        num_val = int(edge['value'])
        ts = int(edge['value'][-10:])
    except ValueError:
        ts = 9999999999
    edge['ts'] = ts
    yield edge


def reduce_fake_cid(key, recs):
    min_ts_yuid_rec = next(recs)
    min_ts_yuid = min_ts_yuid_rec['value']
    fake_cid = crypta_id_generator.yuid_to_crypta_id(min_ts_yuid)

    yield {'key': min_ts_yuid, 'subkey': '0', 'fake_cid': fake_cid, 'component_center': min_ts_yuid_rec['key']}

    for r in recs:
        yield {'key': r['value'], 'subkey': '0', 'fake_cid': fake_cid, 'component_center': r['key']}


def join_edges_vertices_cid(key, recs):
    cid_rec = next(recs)
    cid = cid_rec['fake_cid']
    vertex_rec = next(recs)
    vertex_rec['value'] = re.sub(r'c=\d+', 'c=' + cid, vertex_rec['value'])
    vertex_rec['@table_index'] = 0
    yield vertex_rec


def get_existing_cids(vertices, output_table):
    def mapper(rec):
        typ = rec['id_type']
        cid = rec['crypta_id']
        yuid = rec['key']
        if typ.startswith('yuid'):
            yield {'key': yuid, 'subkey': 'cryptaid', 'value': cid}

    yt.run_map(mapper, vertices, output_table)
    yt.run_merge(output_table, output_table, spec=dict(combine_chunks=True))


def map_oldcid_ts(rec):
    try:
        ts = int(rec['oldcid'][-10:])
    except:
        ts = 9999999999

    rec['oldcid_ts'] = ts
    yield rec
