# -*- coding: UTF-8 -*-

from mapreducelib import MapReduce, Record
from collections import defaultdict
from datetime import datetime, timedelta, date
import libra
import urllib, re,random

def Reduce(key, recs):
    uid = key
    if uid[0] != 'y':
        return

    try:
        s = libra.ParseSession(recs, './blockstat.dict')
    except:
        return

    c = '19937'
    exps = ['19918','19919','19920','19923','19924','19917']

    for r in s:
        if r.IsA('TYandexWebRequest'):
            ui = 'Desktop'
        else:
            continue

        if r.ServiceDomRegion != 'ru':
            continue

        ts =  str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]

        slot = ''
        if r.HasTestID(c):
            slot = 'control ' + c
            TI = 0
        else:
            for e in exps:
                if r.HasTestID(e):
                    slot = 'exp ' + e
                    break

        if len(slot) == 0:
            continue

        for cl in r.GetClicks():
            p = cl.ConvertedPath
            if ('footer/promo' in p or 'stripe/' in p or 'teaser/' in p or 'wiz/default_search' in p) and 'install' in p:
                yield Record(uid,'',slot + '\t' + ts + '\t' + p)

class get_installs: # берем инсталлы из эксперимента и все дейюзы
    def __init__(self,c,e1,e2,e3,e4,e5,e6):
        self.c = c
        self.e1 = e1
        self.e2 = e2
        self.e3 = e3
        self.e4 = e4
        self.e5 = e5
        self.e6 = e6

    def __call__(self,rec):
        line = rec.value
        try:
            uid = 'y' + str(line.split('@@')[-1])
            ts = str(line.split('@@')[-3])
            date = str(datetime.fromtimestamp(float(ts)).isoformat()).split('T')[0]
        except:
            return

        if len(uid) < 11:
            return

        slot = ''

#        if uid in self.c:
#            slot = 'control 19937'
#        elif uid in self.e1:
#            slot = 'exp 19917'
#        elif uid in self.e2:
#            slot = 'exp 19918'
#        elif uid in self.e3:
#            slot = 'exp 19919'
#        elif uid in self.e4:
#            slot = 'exp 19920'
#        elif uid in self.e5:
#            slot = 'exp 19923'
#        elif uid in self.e6:
#            slot = 'exp 19924'
#        else:
#            slot = ''

        data = dict([d.split('=', 1) for d in line.split('@@') if '=' in d])
        try:
            path = data['path']
            vv = data['vars']
            slots = data['slots']
        except:
            return

        if not 'tech.yaelements' in path:
            return


        vdict = dict([d.split('=', 1) for d in vv.split(',') if '=' in d])

        dayuse = str(vdict.get('-dayuse'))
        clid = str(vdict.get('-clid1'))
        product = str(vdict.get('-productname'))
        ver = str(vdict.get('-ver'))
        ui = str(vdict.get('-ui'))

#        if not 'homesearchextchrome' in product:
#            return

#        if dayuse == '0' and len(slot) > 0: # это dayuse0 и юзер в эксперименте
#            yield Record(ui,'EXP',uid + '\t' + slot + '\t' + date + '\t' + path + '\t' + str(vv) + '\t' + slots + '\t' + dayuse + '\t' + clid + '\t' + product + '\t' + ver + '\t' + ui + '\t' + str(vdict), tableIndex = 0)

        yield Record(ui,'ALL',uid + '\t' + slot + '\t' + date + '\t' + path + '\t' + str(vv) + '\t' + slots + '\t' + dayuse + '\t' + clid + '\t' + product + '\t' + ver + '\t' + ui + '\t' + str(vdict), tableIndex = 1) # все дэйюзы


def join(key,recs):
    ui = key
    isExp = 0
    rrecs = []
    for rec in recs:
        tmp = rec
        if tmp.subkey == 'EXP':
            slot = tmp.value.split('\t')[1]
            isExp = 1
        else:
            rrecs.append(tmp)

    if isExp == 1:
        for rec in rrecs:
            yield Record(rec.key,rec.subkey,slot + '\t' + rec.value)

def main():

    MapReduce.useDefaults(
                            server   = 'sakura.search.yandex.net:8013',
                            username = 'userstats',
                            mrExec   = '/Berkanavt/bin/mapreduce-dev',
                            verbose  = True,
                            #testMode = True,
                         )

    with open('slots/c.txt') as f:
        c = f.read().replace('\r','').split('\n')
    with open('slots/19917.txt') as f:
        e1 = f.read().replace('\r','').split('\n')
    with open('slots/19918.txt') as f:
        e2 = f.read().replace('\r','').split('\n')
    with open('slots/19919.txt') as f:
        e3 = f.read().replace('\r','').split('\n')
    with open('slots/19920.txt') as f:
        e4 = f.read().replace('\r','').split('\n')
    with open('slots/19923.txt') as f:
        e5 = f.read().replace('\r','').split('\n')
    with open('slots/19924.txt') as f:
        e6 = f.read().replace('\r','').split('\n')

    cur_d1 = datetime.strptime('20160118', '%Y%m%d')
    cur_d1 = cur_d1.date()

    cur_d = str(cur_d1).replace('-','')
    while int(cur_d)<int(20160122):

        print cur_d
#        src = 'user_sessions/' + cur_d

#        dt = 'ensuetina/LTV_INSTALLS/installs'

        redir = 'redir_log/' + cur_d

        d0 = 'ensuetina/LTV_INSTALLS/dayuses0'
        d1 = 'ensuetina/LTV_INSTALLS/all_dayuses'

#        MapReduce.runReduce(Reduce,
#                            srcTable = src,
#                            dstTable = dt,
#                            files = ['/home/ensuetina/data/blockstat.dict'],
#                            appendMode = True,
#                            sortMode = True
#                            )

        MapReduce.runMap(get_installs(c,e1,e2,e3,e4,e5,e6),
                         srcTable = redir,
                         dstTables = [d0+'_tmp',d1],
                         appendMode = True,
                         sortMode = True
                        )

        cur_d1 = cur_d1 + timedelta(days=1)

        cur_d = str(cur_d1).replace('-','')

    MapReduce.runReduce(join,
                        srcTables = [d0,d1],
                        dstTable = d1 + '_exp',
                        sortMode = True
                       )


if __name__ == '__main__':
    main()
