# -*- coding: UTF-8 -*-
from nile.api.v1 import (
    clusters,
    Record,
    filters as nf,
    aggregators as na,
    extractors as ne
)
import nile
import uatraits, libra

from datetime import datetime

import urlparse

def Reduce(groups):
    c = '28164'
    e = '28165'

    detector = uatraits.detector('/usr/share/uatraits/browser.xml')

    for key, records in groups:
        uid = key.key

        try:
            session = libra.ParseSession(records, 'blockstat.dict')
        except:
            continue

        for r in session:
            if r.IsA('TYandexImagesRequest'):
                ui = 'desktop img'
            elif r.IsA('TTouchYandexImagesRequest'):
                ui = 'touch img'
            elif r.IsA('TPadYandexImagesRequest'):
                ui = 'pad img'
            elif r.IsA('TMobileYandexImagesRequest'):
                ui = 'mobile img'
            elif r.IsA('TImagesRequestProperties'):
                ui = 'other img'
            else:
                continue

            ts =  str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]

            if r.HasTestID(c):
                slot = 'control ' + c
            elif r.HasTestID(e):
                slot = 'exp ' + e
            else:
                continue

#            reqid = r.ReqID
            serpid = r.SerpID

            ua = str(r.UserAgent)

            d = detector.detect(ua)

            BR = str(d.get('BrowserName'))
            OS = str(d.get('OSFamily'))

#            if reqid != serpid:
#                continue

            yield Record(uid=uid,ui=ui,slot=slot,ts=ts,serpid=serpid,BR=BR,OS=OS,ua=ua)


def map_tech_events(lines):
    for line in lines:
        uid = line.key
        val = line.value

        data = dict([d.split('=', 1) for d in val.split('\t') if '=' in d])

        ttype = str(data.get('type'))
        service = str(data.get('service'))
        ui = str(data.get('ui'))
        path = str(data.get('path'))
        vv = str(data.get('vars'))
        reqid = str(data.get('reqid'))

        if ttype != 'TECH' or service != 'images.yandex' or not 'tech.portal-ads' in path:
            continue

        yield Record(uid=uid,uiterf=ui,path=path,vv=vv,service=service,val=val)


cluster = clusters.yt.Hahn(pool='search-research_ensuetina').env(templates = dict(job_root=('home/search-research/ensuetina/DAAS_IMAGES')))

job = cluster.job()

us = job.table('user_sessions/pub/search/daily/2016-08-02/clean')

reqs = us.groupby('key').sort('subkey').reduce(Reduce,
                                                     files=[nile.files.LocalFile('/home/ensuetina/blockstat.dict'),
                                                     nile.files.LocalFile('/home/ensuetina/libra.so')
                                                            ]
                                                     ).put('$job_root/reqs')

techs = job.table('$job_root/techs')#us.map(map_tech_events).put('$job_root/techs')

uids = reqs.project(ne.all(),
                    strange=ne.custom(lambda x: 'strange' if x == 'None' else 'ok')
                   ).groupby('slot',
                             'uid',
                             'strange',
                             'serpid'
                            ).aggregate(reqs_all=na.count()).groupby('slot','uid','strange').aggregate(serpids=na.count(),reqs=na.sum('reqs_all')).put('$job_root/exp_uids')

j = uids.join(techs,by='uid',type='left').put('$job_root/joined_techs')

job.run()



