# -*- coding: UTF-8 -*-

from mapreducelib import MapReduce, Record
from collections import defaultdict
from datetime import datetime
import libra
import urllib, re,random
import sys

def Reduce(key, recs):
    uid = key
    if uid[0] != 'y':
        return

    c = '18036'
    e = '18037'

    try:
        s = libra.ParseSession(recs, './blockstat.dict')
    except:
        return

    for r in s:
        if not r.IsA('TYandexWebRequest'):
            continue

        if r.ServiceDomRegion != 'tr':
            continue

        ts =  str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]

        if r.HasTestID(c):
            TI = 0
            slot = 'control ' + c
        elif r.HasTestID(e):
            TI = 1
            slot = 'exp ' + e
        else:
            continue

        yield Record(uid,'',slot, tableIndex = TI)


def aggr(key,recs):
    yield Record(key,'','')

def get_installs(rec):
    line = rec.value
    try:
        uid = 'y' + str(line.split('@@')[-1])
        ts = str(line.split('@@')[-3])
        date = str(datetime.fromtimestamp(float(ts)).isoformat()).split('T')[0]
    except:
        return

    if len(uid) < 11:
        return

    data = dict([d.split('=', 1) for d in line.split('@@') if '=' in d])
    try:
        path = data['path']
        vv = data['vars']
        slots = data['slots']
    except:
        return

    if not 'tech.yaelements' in path:
        return


    vdict = dict([d.split('=', 1) for d in vv.split(',') if '=' in d])

    dayuse = str(vdict.get('-dayuse'))
    clid = str(vdict.get('-clid1'))
    product = str(vdict.get('-productname'))
    ver = str(vdict.get('-ver'))
    ui = str(vdict.get('-ui'))

    if not 'homesearchextchrome' in product:
        return

    yield Record(ui,'',uid + '\t' + date + '\t' + path + '\t' + str(vv) + '\t' + slots + '\t' + dayuse + '\t' + clid + '\t' + product + '\t' + ver + '\t' + ui + '\t' + str(vdict))

def cut_day0_and_transform(rec):
    uid = rec.key
    line = rec.value
    l = line.split('\t')
    try:
        dayuse = l[4]
        ui = l[8]
    except:
        return

    if str(dayuse) == '0':
        yield Record(uid,'',line, tableIndex = 0) # day0 table

    yield Record(ui,'',uid + '\t' + line, tableIndex = 1) # transformed table

def join(key,recs): #  пересекаем по уидам dayuse=0 и выборки
    uid = key
    us = 0
    rrecs = []
    for rec in recs:
        tmp = rec
        line = tmp.value
        if len(line) == 0:
            us = 1
            continue

        rrecs.append(tmp)
    if us == 1:
        for rec in rrecs:
            l = rec.value.split('\t')
            try:
                date = l[0]
                ui = l[8]
            except:
                continue

            yield Record(ui,date,uid + '\t' + rec.value) # на выходе только те dayuse = 0, которые были в выборках

def join_redir(key,recs): # пересекаем таблицу dayuse0 только из выборок и все dayuse
    ui = key
    rrecs = []
    slot = 0

    for rec in recs:
        tmp = rec

        if len(rec.subkey) > 0:
            mindate = rec.subkey
            slot = 1

        rrecs.append(tmp)

    if slot == 1:
        for rec in rrecs:
            yield Record(rec.key,'',mindate + '\t' + rec.value) # на выходе все dayuse из выборок, в value добавлено поле mindate - дата dayuse = 0

def join_redir2(key,recs):
    ui = key
    isD = 0
    rrecs = []

    for rec in recs:
        tmp = rec
        l = rec.value.split('\t')
        if str(l[0])[0] == '2':
            isD = 1
            continue
        rrecs.append(tmp)

    if isD == 1:
        for rec in rrecs:
            yield Record(ui,'',rec.value) # only activity for dayuse0 ui

def separate_dayuses(rec):
    uid = rec.key
    line = rec.value
    d = line.split('\t')[0]
    ui = line.split('\t')[8]

    if d == '2015-10-28':
        yield Record(ui,'',line,tableIndex = 0)
    elif d == '2015-10-29':
        yield Record(ui,'',line,tableIndex = 1)
    elif d == '2015-10-30':
        yield Record(ui,'',line,tableIndex = 2)
    elif d == '2015-10-31':
        yield Record(ui,'',line,tableIndex = 3)
    elif d == '2015-11-01':
        yield Record(ui,'',line,tableIndex = 4)
    elif d == '2015-11-02':
        yield Record(ui,'',line,tableIndex = 5)
    elif d == '2015-11-03':
        yield Record(ui,'',line,tableIndex = 6)
    elif d == '2015-11-04':
        yield Record(ui,'',line,tableIndex = 7)
    else:
        yield Record(ui,'',line,tableIndex = 8)

def separate_dayuses2(rec):
    ui = rec.key
    line = rec.value
    d = line.split('\t')[1]

    if d == '2015-11-05':
        yield Record(ui,'',line,tableIndex = 0)
    elif d == '2015-11-06':
        yield Record(ui,'',line,tableIndex = 1)
    elif d == '2015-11-07':
        yield Record(ui,'',line,tableIndex = 2)
    elif d == '2015-11-08':
        yield Record(ui,'',line,tableIndex = 3)
    elif d == '2015-11-09':
        yield Record(ui,'',line,tableIndex = 4)
    elif d == '2015-11-10':
        yield Record(ui,'',line,tableIndex = 5)
    elif d == '2015-11-11':
        yield Record(ui,'',line,tableIndex = 6)
    else:
        yield Record(ui,'',line,tableIndex = 7)


def main():

    MapReduce.useDefaults(
                            server   = 'sakura.search.yandex.net:8013',
                            username = 'userstats',
                            mrExec   = '/Berkanavt/bin/mapreduce-dev',
                            verbose  = True,
                            #testMode = True,
                         )

    ddd = ['1028','1029','1030','1031','1101','1102','1103','1104']
    dd = ['1029','1030','1031','1101','1102','1103','1104','1105','1106','1107','1108','1109','1110','1111']
    for d in dd:
        src = 'user_sessions/2015' + d
        redir = 'redir_log/2015' + d

        dt = 'ensuetina/EXTCHROME_retention/1/redir_lines_2'

        continue

#        dt0 = 'ensuetina/EXTCHROME_retention/1/all_reqs_control'
#        dt1 = 'ensuetina/EXTCHROME_retention/1/all_reqs_exp'

#        d0 = 'ensuetina/EXTCHROME_retention/1/uids_control'
#        d1 = 'ensuetina/EXTCHROME_retention/1/uids_exp'

#        continue

#        MapReduce.runReduce(Reduce,
#                            srcTable = src,
#                            dstTables = [dt0,dt1],
#                            files = ['/home/ensuetina/data/blockstat.dict'],
#                            appendMode = True,
#                            sortMode = True
#                            )

        MapReduce.runMap(get_installs,
                         srcTable = redir,
                         dstTable = d,
                         appendMode = True,
                         sortMode = True
                        )

#    sys.exit(1)

    cut0 = 'ensuetina/EXTCHROME_retention/1/redir_dayuse0'

    dst = []
    for d in ddd:
        dst.append('ensuetina/EXTCHROME_retention/1/days/1/dayuse0_' + d)

    dst.append('ensuetina/EXTCHROME_retention/1/days/1/dayuse0_other')

    MapReduce.runMap(separate_dayuses,
                     srcTable = cut0,
                     dstTables = dst,
                     sortMode = True
                    )

    i = 0
    while i < 7:
        dayuse0 = 'ensuetina/EXTCHROME_retention/1/days/1/dayuse0_' + dd[i]

        r = [dayuse0]
        j = 0
        while j < i+1:
            r.append('ensuetina/EXTCHROME_retention/1/days/1/redir_lines_' + dd[7+j])
            j += 1

        dst = 'ensuetina/EXTCHROME_retention/1/days/1/output/redir_lines'

        print r
        print '\n'

        MapReduce.runReduce(join_redir2,
                            srcTables = r,
                            dstTable = dst,
                            appendMode = True,
                            sortMode = True
                           )
        i += 1

    sys.exit(1)

#    cut0 = 'ensuetina/EXTCHROME_retention/1/redir_dayuse0'


#    dst = []
#    dst2 = []

#    for d in dd:
#        dst.append('ensuetina/EXTCHROME_retention/1/days/1/dayuse0_' + d)

#    dst.append('ensuetina/EXTCHROME_retention/1/days/1/dayuse0_other')

#    MapReduce.runMap(separate_dayuses,
#                     srcTable = cut0,
#                     dstTables = dst,
#                     sortMode = True
#                    )


#    for d in ddd:
#        dst2.append('ensuetina/EXTCHROME_retention/1/days/1/redir_lines_' + d)

#    dst2.append('ensuetina/EXTCHROME_retention/1/days/1/redir_lines_other')

#    print d

#    print dst2

#    MapReduce.runMap(separate_dayuses2,
#                     srcTable = dt,
#                     dstTables = dst2,
#                     sortMode = True
#                    )
#    MapReduce.runMap(separate_dayuses,
#                     srcTable = final_exp,
#                     dstTables = e,
#                     sortMode = True
#                    )


    sys.exit(1)


#    MapReduce.runReduce(aggr,
#                        srcTable = dt0,
#                        dstTable = d0,
#                        sortMode = True
#                       )
#    MapReduce.runReduce(aggr,
#                        srcTable = dt1,
#                        dstTable = d1,
#                        sortMode = True
#                       )

    cut0 = 'ensuetina/EXTCHROME_retention/1/redir_dayuse0'
    transformed = 'ensuetina/EXTCHROME_retention/1/redir_ui_keys'
    MapReduce.runMap(cut_day0_and_transform,
                     srcTable = d,
                     dstTables = [cut0,transformed],
                     sortMode = True
                    )

    redir_control = 'ensuetina/EXTCHROME_retention/1/redir_dayuse0_control'
    redir_exp = 'ensuetina/EXTCHROME_retention/1/redir_dayuse0_exp'

    MapReduce.runReduce(join,
                        srcTables = [d0,cut0],
                        dstTable = redir_control,
                        sortMode = True
                       )
    MapReduce.runReduce(join,
                        srcTables = [d1,cut0],
                        dstTable = redir_exp,
                        sortMode = True
                       )

    final_control = 'ensuetina/EXTCHROME_retention/1/redir_lines_control'
    final_exp = 'ensuetina/EXTCHROME_retention/1/redir_lines_exp'

    MapReduce.runReduce(join_redir,
                        srcTables = [redir_control,transformed],
                        dstTable = final_control,
                        sortMode = True
                       )
    MapReduce.runReduce(join_redir,
                        srcTables = [redir_exp,transformed],
                        dstTable = final_exp,
                        sortMode = True
                       )


if __name__ == '__main__':
    main()
