# -*- coding: UTF-8 -*-

from mapreducelib import MapReduce, Record
from collections import defaultdict
from datetime import datetime
import libra
import urllib, re, random, sys

def Reduce(key, recs):
    uid = key
    if uid[0] != 'y':
        return

    try:
        s = libra.ParseSession(recs, './blockstat.dict')
    except:
        return

    c = '17982'
    e = '17983'

    for r in s:
        if not r.IsA('TTouchYandexWebRequest'):
            continue

        if r.ServiceDomRegion != 'ru':
            continue

        ts =  str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]

        if r.HasTestID(c):
            TI = 0
            slot = 'control ' + c
        elif r.HasTestID(e):
            TI = 1
            slot = 'exp ' + e
        else:
            continue

        if r.GetSuggest():
            isSugg = 1
        else:
            isSugg = 0

        yield Record(uid,'',slot + '\t' + ts + '\t' + str(isSugg),tableIndex = TI)


def aggr(key,recs):
    yield Record(key,'','')

def parse_redir(rec):
    line = rec.value

    uid = 'y' + line.split('@@')[-1]
    if len(uid) < 10:
        return
    try:
        ts = line.split('@@')[-3]
        date = str(datetime.fromtimestamp(float(ts)).isoformat()).split('T')[0]
    except:
        return

    data = dict([d.split('=', 1) for d in line.split('@@') if '=' in d])

    path = str(data.get('path'))
    suggest_reqid = str(data.get('suggest_reqid'))
    service = ''
    if path.startswith('morda_ru'):
        service = 'morda'
    elif path.startswith('serp_ru'):
        service = 'serp'
    else:
        return

    exprt = str(data.get('exprt'))
    reg = str(data.get('region'))

#    if exprt <> 'None':
    yield Record(uid,'',date + '\t' + ts + '\t' + path + '\t' + service + '\t' + exprt + '\t' + reg + '\t' + suggest_reqid)

#    yield Record(uid,'',path + '\t' + exprt + '\t' + reg,tableIndex = 1)

def join(key, recs):
    uid = key
    rrecs = []
    isS = 0
    for rec in recs:
        tmp = rec
        if len(tmp.value.split('\t')) == 1:
            isS = 1
            continue
        rrecs.append(tmp)

    if isS == 1:
        for rec in rrecs:
            #ts = tmp.value.split('\t')[0]
            #try:
            #    date = str(datetime.fromtimestamp(float(ts)).isoformat()).split('T')[0]
            #except:
            #    date = '-'

            #yield Record(uid,'',date + '\t' + rec.value)
            yield Record(uid,'',rec.value)


def main():

    MapReduce.useDefaults(
                            server   = 'sakura.search.yandex.net:8013',
                            username = 'userstats',
                            mrExec   = '/Berkanavt/bin/mapreduce-dev',
                            verbose  = True,
                            #testMode = True,
                         )

    dd = ['1029','1030','1031','1101','1102','1103','1104','1105','1106','1107','1108','1109','1110','1111']
    dd = ['1105']
    dd = ['1101','1102','1103','1104','1106']
    for d in dd:
        src = 'user_sessions/2015' + d
#        redir = 'redir_log/2015' + d

        dst = 'ensuetina/TOUCH_SUGGEST_EXP/1/suggests_exp'

        dt0 = 'ensuetina/TOUCH_SUGGEST_EXP/libra/c'
        dt1 = 'ensuetina/TOUCH_SUGGEST_EXP/libra/e'

        d0 = 'ensuetina/TOUCH_SUGGEST_EXP/TRY/urls_control'
        d1 = 'ensuetina/TOUCH_SUGGEST_EXP/TRY/urls_exp'
        d2 = 'ensuetina/TOUCH_SUGGEST_EXP/TRY/reqids_control'
        d3 = 'ensuetina/TOUCH_SUGGEST_EXP/TRY/reqids_exp'
        d4 = 'ensuetina/TOUCH_SUGGEST_EXP/TRY/sug_reqid_serp_control'
        d5 = 'ensuetina/TOUCH_SUGGEST_EXP/TRY/sug_reqid_serp_exp'
        d6 = 'ensuetina/TOUCH_SUGGEST_EXP/TRY/sug_reqid_morda_control'
        d7 = 'ensuetina/TOUCH_SUGGEST_EXP/TRY/sug_reqid_morda_exp'

#        continue

#        MapReduce.runMap(parse_redir,
#                            srcTable = redir,
#                            dstTable = dst,
#                            appendMode = True,
#                            sortMode = True
#                           )

#        continue

        MapReduce.runReduce(Reduce,
                            srcTable = src,
#                            dstTables = [d0,d1,d2,d3,d4,d5,d6,d7],
                            dstTables = [dt0,dt1],
                            files = ['/home/ensuetina/data/blockstat.dict'],
                            appendMode = True,
                            sortMode = True
                            )


    sys.exit(1)
    d0 = 'ensuetina/TOUCH_SUGGEST_EXP/uids_control'
    d1 = 'ensuetina/TOUCH_SUGGEST_EXP/uids_exp'

    d = 'ensuetina/TOUCH_SUGGEST_EXP/TRY/all_us_suggests'

#    MapReduce.runReduce(aggr,
#                        srcTable = d0,
#                        dstTable = d0 + '_aggr',
#                        sortMode = True
#                       )
#    MapReduce.runReduce(aggr,
#                        srcTable = d1,
#                        dstTable = d1 + '_aggr',
#                        sortMode = True
#                       )

#    dt0 = 'ensuetina/TOUCH_SUGGEST_EXP/1/suggest_control'
#    dt1 = 'ensuetina/TOUCH_SUGGEST_EXP/1/suggest_exp'

    MapReduce.runReduce(join,
                        srcTables = [d, d0],
                        dstTable = 'ensuetina/TOUCH_SUGGEST_EXP/TRY/s_control',
                        sortMode = True
                       )
    MapReduce.runReduce(join,
                        srcTables = [d, d1],
                        dstTable = 'ensuetina/TOUCH_SUGGEST_EXP/TRY/s_exp',
                        sortMode = True
                       )


if __name__ == '__main__':
    main()
