# -*- coding: UTF-8 -*-
from nile import (
    clusters,
    Record,
    filters as nf,
    aggregators as na
)
import nile
import uatraits, libra

from datetime import datetime

import urlparse

def Reduce(groups):
    c = '23151'
    e = '23152'

    detector = uatraits.detector('/usr/share/uatraits/browser.xml')

    for key, records in groups:
        uid = key.key
        if uid[0] != 'y':
            continue
        try:
            uid_date = uid[-10:]
        except:
            continue

        if int(uid_date) > 1450731600:
            age = 'NEW'
        else:
            age = 'OLD'

        uid_ts = str(datetime.fromtimestamp(float(uid_date)).isoformat()).split('T')[0]

        try:
            session = libra.ParseSession(records, 'blockstat.dict')
        except:
            continue

        for r in session:
            if r.IsA('TTouchYandexWebRequest'):
                ui = 'Touch'
            else:
                continue

            ts =  str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]

            if r.HasTestID(c):
                slot = 'control ' + c
                TI = 0
            elif r.HasTestID(e):
                slot = 'exp ' + e
                TI = 1
            else:
                continue

            SM = '-'
            spv = r.SearchPropsValues
            for k in spv:
                if 'REPORT.smartbanner' in k:
                    SM = k + '=' + str(spv[k])
                    break

            if len(SM) == 1:
                continue

            reqid = r.ReqID
            ref = str(r.Referer).split('?')[0]
            ua = str(r.UserAgent)
            d = detector.detect(ua)

            BR = ''
            BV = ''
            OS = ''
            OSV = ''

            if 'OSName' in d:
                OS = d['OSName']
            if 'OSVersion' in d:
                OSV = d['OSVersion']
            if 'BrowserName' in d:
                BR = d['BrowserName']
            if 'BrowserVersion' in d:
                BV = d['BrowserVersion']

            smClick = 0
            sm_close = 0
            for cl in r.GetClicks():
                p = cl.ConvertedPath
                if 'smartbanner/' in p:
                    if '/close' in p:
                        sm_close = 1

                    smClick = 1

            smsh = 0
            for bl in r.GetBSBlocks():
                p = bl.Path
                if 'smartbanner' in p:
                    smsh = 1
                    break

            yield Record(uid=uid,age=age,ts=ts,slot=slot,OS=OS,BR=BR,smClick=smClick,SM=SM,smsh=smsh,sm_close=sm_close)
            #yield Record(uid,'',slot + '\t' + uid_ts + '\t' + ts + '\t' + age + '\t' + str(csp) + '\t' + str(clck) + '\t' + str(isCl) + '\t' + ref + '\t' + ua, tableIndex = TI)



cluster = clusters.YT(proxy="hahn.yt.yandex.net").env(
                                                        job_root=('home/search-research/ensuetina/SMARTBANNER_OFF_2')
                                                     )
broken_dates = ['04-01','04-02','04-03','04-04','04-05','04-06','04-07']
dates = ['03-28','03-29','03-30','03-31','04-01','04-02','04-03','04-04','04-05','04-06','04-07','04-08','04-09','04-10','04-11','04-12','04-13','04-14','04-15','04-16']
for date in dates:
    continue
    job = cluster.job()
    if date in broken_dates:
        us = job.table('user_sessions/pub/search/daily/2016-' + date + '.broken/clean')
    else:
        us = job.table('user_sessions/pub/search/daily/2016-' + date + '/clean')

    reqs = us.groupby('key').sort('subkey').reduce(Reduce,files=[nile.files.LocalFile('blockstat.dict'),nile.files.LocalFile('libra.so')], memory_limit=2000).put('$job_root/all_sm1_reqs',append=True)

    job.run()

job = cluster.job()

t = job.table('$job_root/all_sm1_reqs')

t.groupby('slot','ts','uid').aggregate(reqs=na.count()).put('$job_root/uids')#groupby('slot','ts').aggregate(uids=na.count(),reqs=na.sum('reqs')).put('$job_root/aggr')

t.groupby('BR','OS','slot','smClick','sm_close','smsh','ts','uid').aggregate(reqs=na.count()).put('$job_root/all_data')#groupby('BR','OS','slot','smClick','sm_close','smsh','ts').aggregate(uids=na.count(),reqs=na.sum('reqs')).put('$job_root/aggr_data')

job.run()



