# -*- coding: UTF-8 -*-

from mapreducelib import MapReduce, Record
from collections import defaultdict
from datetime import datetime, timedelta, date
import libra
import urllib, re,random

def Reduce(key, recs):
    uid = key

    try:
        s = libra.ParseSession(recs, './blockstat.dict')
    except:
        return

    c = '23454'
    e = '23455'

    for r in s:
        if r.IsA('TYandexWebRequest'):
            ui = 'Desktop'
        else:
            continue


        if r.HasTestID(c):
            slot = 'control ' + c
        elif r.HasTestID(e):
            slot = 'exp ' + e
        else:
            continue

        ts =  str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]
        q = str(r.Query)
        reqid = r.ReqID

        for cl in r.GetClicks():
            if cl.IsA('TMiscResultClick'):
                misc = 1
            else:
                misc = 0

            p = cl.ConvertedPath
            vv = cl.GetVars()

            my_vv = []
            for v in vv:
                if 'mc' in v:
                    my_vv.append(['mc'])
                elif '-mc' in v:
                    my_vv.append(['-mc'])
                else:
                    my_vv.append(v)

            yield Record(uid,'',q + '\t' + slot + '\t' + ts + '\t' + str(misc) + '\t' + p + '\t' + str(my_vv), tableIndex = 1)

        for bl in r.GetBSBlocks():
            p = bl.Path
            vv = bl.GetVars()

            my_vv = []
            for v in vv:
                if 'mc' in v:
                    my_vv.append(['mc'])
                elif '-mc' in v:
                    my_vv.append(['-mc'])
                else:
                    my_vv.append(v)

            yield Record(uid,'',q + '\t' + slot + '\t' + ts + '\t' + p + '\t' + str(my_vv), tableIndex = 0)

def map_shows(rec):
    yield Record(rec.value[:200],'','')

def aggr(key,recs):
    i = 0
    for rec in recs:
        i += 1

    yield Record(key,'',str(i))


def main():

    MapReduce.useDefaults(
                            server   = 'sakura.search.yandex.net:8013',
                            username = 'userstats',
                            mrExec   = '/Berkanavt/bin/mapreduce-dev',
                            verbose  = True,
                            #testMode = True,
                         )

#    cur_d1 = datetime.strptime('20160307', '%Y%m%d')
#    cur_d1 = cur_d1.date()

#    cur_d = str(cur_d1).replace('-','')
#    while int(cur_d)<int(20160308):

#        print cur_d

    dd = ['20160515']
    for d in dd:
        src = 'user_sessions/' + d

        dt0 = 'ensuetina/DESKTOP_NODEJS/shows'
        dt1 = 'ensuetina/DESKTOP_NODEJS/clicks'

        MapReduce.runMap(map_shows,
                         srcTable = dt0,
                         dstTable = dt0 + '_mapped',
                         sortMode = True
                        )
        MapReduce.runReduce(aggr,
                            srcTable = dt0 + '_mapped',
                            dstTable = dt0 + '_aggr',
                            sortMode = True
                           )
        continue

        MapReduce.runReduce(Reduce,
                            srcTable = src,
                            dstTables = [dt0,dt1],
                            files = ['/home/ensuetina/data/blockstat.dict'],
                            #appendMode = True,
                            sortMode = True
                            )

if __name__ == '__main__':
    main()
