# -*- coding: UTF-8 -*-
from nile import (
    clusters,
    Record,
    filters as nf,
    aggregators as na
)
import nile
import uatraits, libra

from datetime import datetime

import urlparse
import cgi

def Reduce(groups,reqs,clicks):
    c = '22096'
    e = '22097'

    detector = uatraits.detector('/usr/share/uatraits/browser.xml')

    for key, records in groups:
        uid = key.key
        if uid[0] != 'y':
            continue

        try:
            session = libra.ParseSession(records, 'blockstat.dict')
        except:
            continue

        prev = -1
        sess = 0
        sess_len = 0

        for r in session:
            if r.IsA('TTouchYandexWebRequest'):
                ui = 'touch'
            else:
                continue

            ts =  str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]
            curr = r.Timestamp

            if prev == -1:
                prev = curr

            if curr - prev > 1800:
                sess += 1
                delta = curr - prev
                sess_len = 1
            else:
                delta = '-'
                sess_len += 1

            if r.HasTestID(c):
                slot = 'control ' + c
            elif r.HasTestID(e):
                slot = 'exp ' + e
            else:
                continue

            reqid = r.ReqID
            ua = str(r.UserAgent)
            page = str(r.PageNo)
            ref = str(r.Referer)

            d = detector.detect(ua)

            BR = ''
            BV = ''
            OS = ''
            OSV = ''

            if 'OSName' in d:
                OS = d['OSName']
            if 'OSVersion' in d:
                OSV = d['OSVersion']
            if 'BrowserName' in d:
                BR = d['BrowserName']
            if 'BrowserVersion' in d:
                BV = d['BrowserVersion']

            all_cl = 0
            direct_cl = 0

            distrib = 0
            for bl in r.GetBSBlocks():
                p = bl.Path
                if 'stripe/inline' in p:
                    distrib = 1
                    break

            if distrib == 0:
                continue

            for cl in r.GetClicks():
                p = cl.ConvertedPath
                if 'stripe/inline' in p and 'install' in p:
                    wasClick = 1
                    break

            for bl in r.GetMainBlocks():
                m = bl.GetMainResult()
                if m.IsA('TDirectResult'):
                    for cl in bl.GetClicks():
                        direct_cl += 1
                else:
                    for cl in bl.GetClicks():
                        all_cl += 1
                        p = cl.ConvertedPath
                        dw = cl.DwellTimeOnService
                        rref = cl.Referer

                        clicks(Record(uid=uid,slot=slot,ts=ts,reqid=reqid,page=page,ref=ref,BR=BR,OS=OS,p=p,dw=dw,sess=sess,cl_ref=rref))

            prev = curr

            reqs(Record(uid=uid,slot=slot,ts=ts,reqid=reqid,page=page,sess=sess,delta=delta,sess_len=sess_len,ref=ref,BR=BR,OS=OS,all_cl=all_cl,direct_cl=direct_cl))



cluster = clusters.YT(proxy="hahn.yt.yandex.net").env(
                                                        job_root=('home/search-research/ensuetina/MOB_PODVAL')
                                                     )
# 20160310-20160323
dates = ['2016-03-10','2016-03-11','2016-03-12','2016-03-13','2016-03-14','2016-03-15','2016-03-16','2016-03-17','2016-03-18','2016-03-19','2016-03-20','2016-03-21','2016-03-22']
dates = ['2016-03-23']
for date in dates:
    continue
    job = cluster.job().env(date=date)

    us = job.table('user_sessions/pub/search/daily/$date/clean')

    reqs, clicks = us.groupby('key').sort('subkey').reduce(Reduce,files=[nile.files.LocalFile('/home/ensuetina/blockstat.dict'),nile.files.LocalFile('/home/ensuetina/lib/libra.so')],memory_limit=1000)

    reqs.put('$job_root/reqs',append=True)
    clicks.put('$job_root/clicks', append=True)

    job.run()

job = cluster.job()

reqs = job.table('$job_root/reqs')

reqs.groupby('slot','ts','sess','uid').aggregate(sess_len=na.count(),clicks=na.sum('all_cl'),direct_clicks=na.sum('direct_cl')).put('$job_root/sessions')
reqs.groupby('slot','ts','delta').aggregate(reqs=na.count()).put('$job_root/deltas')

job.run()

