# -*- coding: UTF-8 -*-

from mapreducelib import MapReduce, Record
from collections import defaultdict
from datetime import datetime
import libra
import urllib, re,random

def Reduce(key, recs):
    uid = key

    try:
        s = libra.ParseSession(recs, './blockstat.dict')
    except:
        return

    e = '23455'


    for r in s:
        if not r.IsA('TYandexWebRequest'):
            continue

        reqid = r.ReqID
        ts =  str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]

        if r.HasTestID(e):
            slot = 'exp ' + e
        else:
            continue

        bs_count = 0
        for bl in r.GetBSBlocks():
            bs_count += 1
            break

        if bs_count == 0:
            yield Record(reqid,'US',uid + '\t' + slot + '\t' + ts + '\t' + str(bs_count))


def aggr(key,recs):
    i = 0
    for rec in recs:
        i += 1

    yield Record(key,'',str(i))

def map_bs(rec):
    val = rec.value
    v = val.split('\t')

    try:
        reqid = v[10]
    except:
        reqid = '??'
    try:
        uid = v[11]
    except:
        uid = '??'

    yield Record(uid.replace('y',''),'',reqid + '\t' + val)

def map_us(rec):
    reqid = rec.key
    val = rec.value
    uid = val.split('\t')[0].replace('y','')

    yield Record(uid,rec.subkey,reqid + '\t' + val)

def join(key,recs):
    reqid = key
    isUS = 0
    isBS = 0

    rrecs = []
    usrecs = []

    for rec in recs:
        tmp = rec
        if tmp.subkey == 'US':
            isUS = 1
            usrecs.append(tmp)
        else:
            isBS = 1
            rrecs.append(rec)

    if isUS == 1 and isBS == 1:
        for rec in rrecs:
            yield Record(rec.key,rec.subkey,rec.value, tableIndex = 0) # joined

    elif isUS == 1:
        for rec in usrecs:
            yield Record(rec.key,rec.subkey,rec.value,tableIndex = 1) # errors

def main():

    MapReduce.useDefaults(
                            server   = 'sakura.search.yandex.net:8013',
                            username = 'userstats',
                            mrExec   = '/Berkanavt/bin/mapreduce-dev',
                            verbose  = True,
                            #testMode = True,
                         )

    dd = ['0515']
    for d in dd:
        bs = 'blockstat_log/2016' + d
        us = 'user_sessions/2016' + d

        dt = 'ensuetina/NODE/bs_data'
        dt1 = 'ensuetina/NODE/us_data'

        j1 = 'ensuetina/NODE/joined'
        j2 = 'ensuetina/NODE/not_joined'

#        MapReduce.runMap(map_bs,
#                         srcTable = bs,
#                         dstTable = dt,
#                         sortMode = True
#                        )
        MapReduce.runMap(map_us,
                         srcTable = j1,
                         dstTable = j1 + '_uids_in_keys',
                         sortMode = True
                        )
        MapReduce.runReduce(join,
                            srcTables = [j1 + '_uids_in_keys',dt1],
                            dstTables = [j1+'_1',j2+'_1'],
                            sortMode=True
                           )


#    MapReduce.runReduce(aggr,
#                        srcTable = dt,
#                        dstTable = dt + '_uids',
#                        sortMode = True
#                       )
#    MapReduce.runReduce(aggr,
#                        srcTable = dt1 + '_mapped',
#                        dstTable = dt1 + 'mapped__aggr',
#                        sortMode = True
#                       )
#    MapReduce.runReduce(aggr,
#                        srcTable = j1,
#                        dstTable = j1 + '_uids',
#                        sortMode = True
#                       )
#    MapReduce.runReduce(aggr,
#                        srcTable = j2,
#                        dstTable = j2 + '_uids',
#                        sortMode = True
#                       )


if __name__ == '__main__':
    main()
