# -*- coding: UTF-8 -*-

from mapreducelib import MapReduce, Record
from collections import defaultdict
from datetime import datetime, timedelta, date
import libra
import urllib, re,random


class Reduce:
    def __init__(self, control, exp):
        self.c = control
        self.e = exp

    def __call__(self, key, recs):
        uid = key
        if uid[0] != 'y':
            return

        try:
            uid_date = uid[-10:]
        except:
            return

        if int(uid_date) > 1450731600:
            age = 'NEW'
        else:
            age = 'OLD'

        uid_ts = str(datetime.fromtimestamp(float(uid_date)).isoformat()).split('T')[0]

        try:
            s = libra.ParseSession(recs, './blockstat.dict')
        except:
            return

        c = '19710'
        e = '19711'

        for r in s:
            if r.IsA('TTouchYandexWebRequest'):
                ui = 'Touch'
            else:
                continue

            ts =  str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]

            if r.HasTestID(c):
                slot = 'control ' + c
                TI = 0
            elif r.HasTestID(e):
                slot = 'exp ' + e
                TI = 1
            else:
                continue

            ref = str(r.Referer).split('?')[0]
            ua = str(r.UserAgent)

            clck = 0
            for cl in r.GetClicks():
                clck += 1

            isCl = 0
            for bl in r.GetMainBlocks():
                m = bl.GetMainResult()
                if m.IsA('TDirectResult'):
                    continue

                for cl in bl.GetClicks():
                    isCl = 1
                    break

            yield Record(uid,'',slot + '\t' + uid_ts + '\t' + ts + '\t' + age + '\t' + str(csp) + '\t' + str(clck) + '\t' + str(isCl) + '\t' + ref + '\t' + ua, tableIndex = TI)



def aggr(key,recs):
    yield Record(key,'','')


def map_spy(rec):
    uid = rec.key
    if uid[0] != 'y':
        return

    sk = rec.subkey
    ts = str(datetime.fromtimestamp(float(sk)).isoformat()).split('T')[0]

    line = rec.value
    if not 'type=TRAFFIC' in line:
        return

    data = dict([d.split('=', 1) for d in line.split('\t') if '=' in d])
    url = data.get('url')

    yield Record(uid, ts, url)


def main():

    MapReduce.useDefaults(
                            server   = 'sakura.search.yandex.net:8013',
                            username = 'userstats',
                            mrExec   = '/Berkanavt/bin/mapreduce-dev',
                            verbose  = True,
                            #testMode = True,
                         )

    cur_d1 = datetime.strptime('20151222', '%Y%m%d')
    cur_d1 = cur_d1.date()

    cur_d = str(cur_d1).replace('-','')
    while int(cur_d)<int(20160112):

        print cur_d
        src = 'user_sessions/' + cur_d + '/watch_log'

        dt = 'ensuetina/SM_OFF/spy_data'

        MapReduce.runMap(map_spy,
                         srcTable = src,
                         dstTable = dt,
                         appendMode = True,
                         sortMode = True
                        )

        cur_d1 = cur_d1 + timedelta(days=1)

        cur_d = str(cur_d1).replace('-','')


if __name__ == '__main__':
    main()
