#-*-coding: utf8 -*-

from nile import (
    clusters,
    Record,
    filters as nf,
    aggregators as na
)
import nile
import uatraits, libra

from qb2.api.v1 import (
    extractors as se,
    filters as sf
)

import urlparse
from datetime import datetime

def Reduce(groups):
    c = '19029'
    e1 = '19030'
    e2 = '19031'

    for key, records in groups:
        uid = key.key
        #if uid[0] != 'y':
        #    continue

        try:
            session = libra.ParseSession(records, 'blockstat.dict')
        except:
            continue

        for r in session:
            if r.IsA('TYandexWebRequest'):
                ui = 'DESKTOP'
            elif r.IsA('TTouchYandexWebRequest'):
                ui = 'TOUCH'
            elif r.IsA('TPadYandexWebRequest'):
                ui = 'PAD'
            else:
                continue

            reg = r.ServiceDomRegion
            if reg != 'ru':
                continue

            if r.HasTestID(c):
                slot = 'control ' + c
            elif r.HasTestID(e1):
                slot = 'exp ' + e1
            elif r.HasTestID(e2):
                slot = 'exp ' + e2
            else:
                continue

            reqid = r.ReqID
            ts =  str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]

            isOn = ''
            isDenied = ''

            for tech in r.GetYandexTechEvents():
                if not tech.IsA('TYandexTechEvent'):
                    continue
                p = tech.Path
                if '690.781.10' in p or 'tech/userlocation/on' in p or 'tech.userlocation.on' in p:
                    isOn = p
                if 'tech/userlocation/denied' in p or 'tech.userlocation.denied' in p or '690.781.2032' in p:
                    isDenied = p

            yield Record(uid=uid,ui=ui,ts=ts,reqid=reqid,slot=slot,ON=isOn,DENIED=isDenied)

def Reduce(groups):
    for key, records in groups:
        i = 0
        for rec in records:
            i += 1

        yield Record(key,reqids=i)

def get_slot(slot):
    c = '19029'
    e1 = '19030'
    e2 = '19031'

    if c in slot:
        return c
    elif e1 in slot:
        return e1
    elif e2 in slot:
        return e2
    else:
        return None



username = 'ensuetina'
cluster = clusters.Plato().env(
                                job_root=('home/search-research/' + username + '/'
                                         'nile/YP_UserSessions') # directory on plato where i plan to store results
                            )
dates = nile.api.path.DateRange('2015-12-15', '2015-12-22')
for date in dates:
    continue
    job = cluster.job().env(date=date)

#    log = job.table('userdata/user_sessions/$date')

#    queries = log.groupby('key').sort('subkey').reduce(Reduce,files=[nile.files.LocalFile('blockstat.dict'),nile.files.LocalFile('libra.so')]).put('$job_root/result', append=True)

    redir_log = job.table('statbox/redir-log/$date')

    clicks = redir_log.qb2(log='redir-log',
                           fields=['reqid', 'normal_path','date',
                                   se.custom('yuid', lambda yandexuid: 'y' + yandexuid),
                                   se.log_field('slots'),
                                   se.custom('slot',get_slot,'slots')
                                  ],
                           filters = [sf.defined('reqid', 'normal_path','slot'),
                                      sf.contains('path', '690.781.')
                                     ]
                          ).put('$job_root/redir_result', append=True)

#    queries.groupby('ui','reg').aggregate(freq=na.count()).put('$job_root/aggr_result')

#    aggr_queries = queries.groupby('query').aggregate(freq=aggregators.count()).put('$job_root/queries')

#    filtered_queries = aggr_queries.filter(
#                        nf.custom(lambda x: x > 1000, 'freq')
#                   ).put('$job_root/queries_freq_more_1K') # filtering freq > 1000 and store filtered table


#    join1 = tr.join(filtered_queries, by='query').filter( nf.custom(bool, 'freq')).put('$job_root/join1') # join table of hosts with table of queries
    job.run()

job = cluster.job()

t = job.table('$job_root/redir_result')

#t.groupby('normal_path','slot','date').aggregate(uids=na.count_distinct('yuid')).put('$job_root/uids')

reqids = t.groupby('normal_path','slot','date','reqid').aggregate(count=na.count())
reqids.groupby('normal_path','slot','date').reduce(Reduce).put('$job_root/aggr_result')

job.run()

#days = t.groupby('ui','ts','slot','ON','DENIED').aggregate(reqids=na.count()).put('$job_root/days')
#full = t.groupby('ui','slot','ON','DENIED').aggregate(uids=na.count_distinct('uid'),reqids=na.count()).put('$job_root/uids')

#job.run()

