#-*-coding: utf8 -*-
from nile import (
    clusters,
    Record,
    filters as nf,
    aggregators as na
)
import nile
from qb2.api.v1 import (
    extractors as se,
    filters as sf
)

import uatraits
import libra
from datetime import datetime

def Reduce(groups):
    c = '22672'
    e1 = '22673'
    e2 = '22674'
    e3 = '22675'

    detector = uatraits.detector('/usr/share/uatraits/browser.xml')

    for key, records in groups:
        uid = key.key
        if uid[0] != 'y':
            continue

        try:
            session = libra.ParseSession(records, 'blockstat.dict')
        except:
            continue

        for r in session:
            if r.IsA('TYandexWebRequest'):
                ui = 'desktop'
            else:
                continue

            ts =  str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]

            if r.HasTestID(c):
                slot = 'control ' + c
            elif r.HasTestID(e1):
                slot = 'exp1 ' + e1
            elif r.HasTestID(e2):
                slot = 'exp2 ' + e2
            elif r.HasTestID(e3):
                slot = 'exp3 ' + e3
            else:
                continue

            ref = r.Referer
            isWiz = 0
            for cl in r.GetClicks():
                p = cl.ConvertedPath
                if 'wiz/default_search/set/install' in p:
                    isWiz = 1

            yield Record(uid=uid,ts=ts,slot=slot,isWiz=isWiz,ref=ref)


username = 'ensuetina'

cluster = clusters.YT(proxy="plato.yt.yandex.net").env(
                                                        job_root=('home/search-research/ensuetina/PARANJA')
                                                     )


dates = ['17','18','19','20','21','22','23']
#dates = ['23']
for date in dates:
#    continue
    job = cluster.job()
    #redir_log = job.table('statbox/redir-log/2016-03-' + date)
    #bs_log = job.table('statbox/blockstat-log/2016-03-18')
#    us = job.table('user_sessions/pub/search/daily/2016-03-' + date + '/clean')

    us = job.table('userdata/user_sessions/201603' + date)

    reqs = us.groupby('key').sort('subkey').reduce(Reduce,files=[nile.files.LocalFile('/home/ensuetina/blockstat.dict'),nile.files.LocalFile('/home/ensuetina/lib/libra.so')],memory_limit=1000).put('$job_root/1/reqs', append=True)


    #reqids = sessions.map(get_reqids).put('$job_root/data/reqids') # собираем рекиды и ОС
#    uids = reqids.groupby('yuid','r_type','OS','BR','dd').aggregate(hits=na.count()).put('$job_root/data/uids')

#    shows = bs_log.qb2(log='blockstat-log',
#                       fields=['reqid',
#                               se.custom('yuid', lambda yandexuid: 'y' + yandexuid),
#                            ],
#                       filters=[sf.contains('blocks','wiz/default_search/set/learn_popup'), # оставляем только строки, где был web/item/checkbutton и где у него были vars
                                #sf.defined('count_buttons')
#                               ]
#                    ).put('$job_root/data/bs_shows')
    #shows.groupby('reqid').aggregate(freq=na.count()).put('$job_root/reqids')

#    clicks = redir_log.qb2(log='redir-log',
#                           fields=['reqid', 'normal_path', 'normal_vars','path','vars','date',
#                                   se.custom('yuid', lambda yandexuid: 'y' + yandexuid),
#                                  ],
#                           filters = [sf.defined('reqid', 'path'), # надо, чтобы были нужные нам поля и чтобы счетчик клика соответствовал нашему элементу
                                      #sf.contains('path', '12.1620.1920.2147')
#                                      sf.contains('path', '295.714.1651.2147')
#                                     ]
#                          ).put('$job_root/redir_clicks_footer',append=True)

    job.run()

job = cluster.job()

reqs = job.table('$job_root/1/reqs')

aggr_reqs = reqs.groupby('uid','ts','slot').aggregate(isWiz=na.max('isWiz')).put('$job_root/1/aggr_reqs')

freqs = aggr_reqs.filter(nf.custom(lambda x: x == 1, 'isWiz')).groupby('uid').aggregate(freq=na.count()).put('$job_root/1/freqs')

reqs.join(freqs,by='uid',type='inner').put('$job_root/1/distrib_reqs')

#cl = job.table('$job_root/redir_clicks_footer')

#cl.groupby('yuid','date','normal_path').aggregate(clicks=na.count()).put('$job_root/aggr_clicks_footer')
#cl.groupby('date').aggregate(clicks=na.count()).put('$job_root/aggr_clicks_dates_footer')

job.run()

