#-*-coding: utf8 -*-

from nile import (
    clusters,
    Record,
    filters as nf,
    aggregators as na
)
import nile
import uatraits, libra
from datetime import datetime
import urlparse

def Reduce(groups):
    c = '20008'
    e1 = '20009'
#    e2 = '20010'

    detector = uatraits.detector('/usr/share/uatraits/browser.xml')

    for key, records in groups:
        uid = key.key
        #if uid[0] != 'y':
        #    continue

        try:
            session = libra.ParseSession(records, 'blockstat.dict')
        except:
            continue

        for r in session:
            if r.IsA('TYandexWebRequest'):
                ui = 'DESKTOP'
            else:
                continue

            if r.HasTestID(c):
                slot = 'control ' + c
            elif r.HasTestID(e1):
                slot = 'exp ' + e1
#            elif r.HasTestID(e2):
#                slot = 'exp ' + e2
            else:
                continue

            isDirect = 0
            isCl = 0
            all_cl = 0
            cl30 = 0
            cl120 = 0

            directClicks = 0
            directClicks30 = 0
            directClicks120 = 0

            for bl in r.GetMainBlocks():
                m = bl.GetMainResult()
                if m.IsA('TDirectResult'):
                    isDirect = 1
                    for cl in bl.GetClicks():
                        directClicks += 1
                        dw = cl.DwellTimeOnService
                        if int(dw) > 30:
                            directClicks30 += 1
                        if int(dw) > 120:
                            directClicks120 += 1

                    continue

                for cl in bl.GetClicks():
                    isCl = 1
                    all_cl += 1
                    dw = cl.DwellTimeOnService
                    if int(dw) > 30:
                        cl30 += 1
                    if int(dw) > 120:
                        cl120 += 1

            ua = r.UserAgent
            day = str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]

            if ua:
                d = detector.detect(ua)
            else:
                d = {}

            Device = ' '
            BR = ' '
            OS = ' '
            BV = ' '

            if 'OSName' in d:
                OS = d['OSName']
            if 'DeviceName' in d:
                Device = d['DeviceName']
            if 'BrowserName' in d:
                BR = d['BrowserName']
            if 'BrowserVersion' in d:
                BV = d['BrowserVersion']

            ref = str(r.Referer).split('?')[0]


            yield Record(uid=uid,slot=slot,ts=day,isDirect=isDirect,directClicks=directClicks,directClicks30=directClicks30,directClicks120=directClicks120,isCl=isCl,cl30=cl30,cl120=cl120,
                         OS=OS,BR=BR,BV=BV,ref=ref)

username = 'ensuetina'
cluster = clusters.Plato().env(
                                job_root=('home/search-research/' + username + '/'
                                         'nile/DIRECT_BNO') # directory on plato where i plan to store results
                            )
dates = nile.api.path.DateRange('20160114', '20160127')
for date in dates:
    job = cluster.job().env(date=date)

    log = job.table('userdata/user_sessions/$date')

    queries = log.groupby('key').sort('subkey').reduce(Reduce,files=[nile.files.LocalFile('blockstat.dict'),nile.files.LocalFile('libra.so')]).put('$job_root/result', append=True)

#    queries.groupby('ui','reg').aggregate(freq=na.count()).put('$job_root/aggr_result')

#    aggr_queries = queries.groupby('query').aggregate(freq=aggregators.count()).put('$job_root/queries')

#    filtered_queries = aggr_queries.filter(
#                        nf.custom(lambda x: x > 1000, 'freq')
#                   ).put('$job_root/queries_freq_more_1K') # filtering freq > 1000 and store filtered table


#    join1 = tr.join(filtered_queries, by='query').filter( nf.custom(bool, 'freq')).put('$job_root/join1') # join table of hosts with table of queries
    job.run()

