# -*- coding: UTF-8 -*-
from nile import (
    filters as nf,
    aggregators as na,
    extractors as ne
)
from nile.api.v1 import clusters
from nile.processing import Record
import nile
import uatraits, libra

from qb2.api.v1 import extractors as se, filters as sf

from datetime import datetime

import urlparse
import cgi

def Reduce(groups, clck, allcl):

    detector = uatraits.detector('/usr/share/uatraits/browser.xml')

    for key, records in groups:
        uid = key.key
        if uid[0] != 'y':
            continue

        try:
            session = libra.ParseSession(records, 'blockstat.dict')
        except:
            continue

        for r in session:
            if r.IsA('TTouchYandexWebRequest'):
                ui = 'touch'
            else:
                continue

            ts =  str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]
            time = str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[1]
            hour = time.split(':')[0]
            mins = time.split(':')[1]

#            if int(hour) <= 15 and int(hour) >= 19:
#                continue

            ua = str(r.UserAgent)
            d = detector.detect(ua)

            BR = ''
            if 'BrowserName' in d:
                BR = d['BrowserName']

            all_cl = 0

            for bl in r.GetMainBlocks():
                m = bl.GetMainResult()
                if m.IsA('TDirectResult'):
                    continue

                for cl in bl.GetClicks():
                    all_cl += 1

            for cl in r.GetClicks():
                p = cl.ConvertedPath
                if cl.IsA('TMiscResultClick'):
                    misc = 1
                else:
                    misc = 0

                dyn = str(cl.IsDynamic)
                ref = str(cl.Referer)

                clck(Record(uid=uid,p=p,ts=ts,time=time,hour=hour,mins=mins,BR=BR,misc=misc,dyn=dyn,ref=ref))

            allcl(Record(uid=uid,ts=ts,time=time,hour=hour,mins=mins,BR=BR,all_cl=all_cl))


cluster = clusters.yt.Hahn().env(templates = dict(
                                                        job_root = 'home/search-research/ensuetina/HTTPS_NEW_UIDS_2/https_check/march'
                                                )
                                                )

dates = ['2016-03-21','2016-03-14','2016-03-22']
for date in dates:
    continue
    job = cluster.job()

    us = job.table('user_sessions/pub/search/daily/' + date + '/clean')

    cl1,cl2 = us.groupby('key').sort('subkey').reduce(Reduce,files=[nile.files.LocalFile('/home/ensuetina/blockstat.dict'),nile.files.LocalFile('/home/ensuetina/lib/libra.so')])

    cl1.put('$job_root/all_clicks',append=True)
    cl2.put('$job_root/all_serps', append=True)


    job.run()

job = cluster.job()

cl1 = job.table('$job_root/all_clicks').project('p','ts','hour','mins','BR','misc','dyn',ref=ne.custom(lambda x: x.split('//')[0], 'ref'))
cl1.groupby('p','ts','hour','mins','BR','misc','dyn','ref').aggregate(clicks=na.count()).put('$job_root/clicks_aggr')

#cl2 = job.table('$job_root/all_serps',ignore_missing=True).groupby('ts','hour','BR','mins').aggregate(cl=na.sum('all_cl'),reqs=na.count()).put('$job_root/serps_aggr')

job.run()

