#-*-coding: utf8 -*-

from nile import (
    clusters,
    Record,
    filters as nf,
    aggregators as na
)
import nile

from qb2.api.v1 import (
    extractors as se,
    filters as sf
)

import uatraits, libra

import urlparse, cgi, urllib
from datetime import datetime


def Reduce(groups):
    for key, records in groups:
        uid = key.key
        if uid[0] != 'y':
            continue

        try:
            session = libra.ParseSession(records, 'blockstat.dict')
        except:
            continue

        for r in session:
            if r.IsA('TYandexWebRequest') or r.IsA('TTouchYandexWebRequest') or r.IsA('TPadYandexWebRequest'):
                if r.HasTestID('19029'):
                    slot = '19029 control'
                elif r.HasTestID('19030'):
                    slot = '19030 exp refresh 900'
                elif r.HasTestID('19031'):
                    slot = 'exp refresh 60'
                else:
                    continue

                yield Record(uid=uid,slot=slot)
                break


def get_UI(req):
    req = req.split('?')[0]
    if '/touchsearch' in req or '/search/touch' in req:
        return 'TOUCH'
    elif '/padsearch' in req or '/search/pad' in req:
        return 'TABLET'
    elif 'jsonproxy' in req or 'searchapp' in req or 'searchapi' in req:
        return 'APP'
    elif '/yandsearch' in req or req == '/search/':
        return 'DESKTOP'
    else:
        return 'unknown - ' + req

def get_geo_ts(geo):
    try:
        yp_ts = geo.split(':')[4]
        yp_ts = yp_ts.split(',')[0].split('#')[0]
    except Exception as e:
        return str(e)

    return yp_ts

def get_delta(ts,yp_ts):
    try:
        delta = int(yp_ts) - int(ts)
    except:
        return 'no delta - ' + str(yp_ts) + ' - ' + str(ts)

    return delta

def delta_class(delta):
    try:
        if abs(delta) < 900:
            return '< 15 mins'
        elif abs(delta) < 3600:
            return '15 mins - 1 hour'
        elif abs(delta) < 86400:
            return '1 hour - 1 day'
        elif abs(delta) < 604800:
            return '1 day - 7 days'
        else:
            return '> 7 days'
    except:
        return delta

def isGeo(geo):
    if geo == None:
        return 'no geo'
    else:
        return 'some geo'


username = 'ensuetina'
cluster = clusters.Plato().env(
                                job_root=('home/search-research/' + username + '/'
                                         'nile/YP_EXP')
                            )

#dates = ['12-03','12-04','12-05']
dates = ['12-06']#,
dates = ['12-07','12-08','12-09']
dates = ['12-10','12-11','12-12','12-13','12-14','12-15','12-16','12-17','12-18','12-19','12-20','12-21','12-22']
for d in dates:
    job = cluster.job()

    access = job.table('statbox/access-log/2015-' + d)
    us = job.table('userdata/user_sessions/2015' + d.replace('-',''))

    acc = access.qb2(log = 'access-log',
                     fields = ['canonized_vhost','user_agent','status','request','cookies','cookie_yp','timestamp','os_name','browser','domain',
                               se.custom('yuid', lambda yandexuid: 'y' + yandexuid),
                               se.custom('UI',get_UI,'request'),
                               se.dictitem('gpauto',from_='parsed_yp').rename('geo'),
                               se.custom('yp_ts',get_geo_ts,'geo').allow_null_dependency(),
                               se.custom('isGeo',isGeo,'geo').allow_null_dependency(),
                               se.custom('delta',get_delta,'timestamp','yp_ts').allow_null_dependency(),
                               se.custom('delta_class',delta_class,'delta').allow_null_dependency()
                              ],
                    filters = [sf.default_filtering('access-log'),
                               sf.defined('canonized_vhost','request'),
                               sf.projects(['Yandex_RU','Search_Mobile_All','Search_Mobile_App','Mobile_Soft_Yandex']),
                               sf.equals('domain','ru')
                              ]
        )

    uids = us.groupby('key').sort('subkey').reduce(Reduce,files=[nile.files.LocalFile('blockstat.dict'),nile.files.LocalFile('libra.so')])

    exp_acc = acc.join(uids, by_left='yuid',by_right='uid').filter(nf.custom(bool,'slot')).put('$job_root/access/' + d) # оставляем только акесы из экспериментов и контроля

    aggr = exp_acc.groupby('UI').aggregate(count=na.count()).put('$job_root/check/ui_from_acc_' + d) # на всякий случай смотрим, что все нужные UI попали
    t = exp_acc.groupby('os_name','browser','UI','geo','isGeo','delta','delta_class','slot').aggregate(freq=na.count()).put('$job_root/all_exp/' + d) # группируем данные по нужным срезам

    result = t.filter(nf.equals('isGeo','some geo')).put('$job_root/filtered/' + d) # оставляем только срез с существующей yp-кукой


    job.run()

