# -*- coding: UTF-8 -*-
from nile.api.v1 import (
    clusters,
    Record,
    filters as nf,
    aggregators as na,
    extractors as ne
)
import nile
import uatraits, libra

from datetime import datetime

import urlparse

def Reduce(groups,reqs,techs):
    c = '28705'
    e = '28706'

#    detector = uatraits.detector('/usr/share/uatraits/browser.xml')

    for key, records in groups:
        uid = key.key

        try:
            session = libra.ParseSession(records, 'blockstat.dict')
        except:
            continue

        for r in session:
            if r.IsA('TTouchYandexVideoRequest'):
                ui = 'touch video'
            elif r.IsA('TYandexVideoRequest'):
                ui = 'desktop video'
            else:
                continue

            ts =  str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]

            if r.HasTestID(c):
                slot = 'control ' + c
            elif r.HasTestID(e):
                slot = 'exp ' + e
            else:
                continue

            reqid = r.ReqID
            serpid = r.SerpID

            reqs(Record(uid=uid,ui=ui,slot=slot,ts=ts,reqid=reqid,serpid=serpid))

            for tech in r.GetYandexTechEvents():
                if not tech.IsA('TYandexTechEvent'):
                    continue
                p = tech.Path

                techs(Record(uid=uid,ui=ui,slot=slot,ts=ts,p=p))

def map_video_reqs(lines):
    for line in lines:
        uid = line.key
        val = line.value

        data = dict([d.split('=', 1) for d in val.split('\t') if '=' in d])

        ttype = str(data.get('type'))
        service = str(data.get('service'))
        ui = str(data.get('ui'))
        query = str(data.get('query'))
        slots = str(data.get('test-buckets'))
        reqid = str(data.get('reqid'))
        serpid = str(data.get('serpid'))

        if ttype != 'REQUEST' or service != 'video.yandex':
            continue

        if '28705' in slots:
            slot = '28705'
        elif '28706' in slots:
            slot = '28706'
        else:
            continue

        yield Record(uid=uid,reqid=reqid,ui=ui,q=query,slot=slot,serpid=serpid,service=service)

def map_tech_events(lines):
    for line in lines:
        uid = line.key
        val = line.value

        data = dict([d.split('=', 1) for d in val.split('\t') if '=' in d])

        ttype = str(data.get('type'))
        service = str(data.get('service'))
        ui = str(data.get('ui'))
        path = str(data.get('path'))
        vv = str(data.get('vars'))
        reqid = str(data.get('reqid'))

        if ttype != 'TECH' or service != 'video.yandex' or not 'tech.portal-ads' in path:
            continue

        yield Record(uid=uid,reqid=reqid,ui=ui,path=path,vv=vv,service=service,val=val)


cluster = clusters.yt.Hahn(pool='search-research_ensuetina').env(templates = dict(job_root=('home/search-research/ensuetina/DAAS_VIDEO')))

job = cluster.job()

us = job.table('user_sessions/pub/search/daily/2016-08-04/clean')

#video_reqs = us.map(map_video_reqs).put('$job_root/video_reqs')

#v1 = video_reqs.filter(nf.custom(lambda x,y: x == y,'reqid','serpid')).put('$job_root/filtered_video_reqs')

#v1.groupby('slot').aggregate(reqs=na.count()).put('$job_root/aggr_video_filtered')
#video_reqs.groupby('slot').aggregate(reqs=na.count()).put('$job_root/aggr_video_reqs')

#techs = us.map(map_tech_events).put('$job_root/mapped_tech_events')

#techs = job.table('$job_root/mapped_tech_events').filter(nf.equals('path','tech.portal-ads.smart-banner')).put('$job_root/filtered_sm_shows')

#techs.groupby('path').aggregate(techs=na.count()).put('$job_root/aggr_tech_events')

#example = job.table('$job_root/example')

reqs,techs = us.groupby('key').sort('subkey').reduce(Reduce,
                                                     files=[nile.files.LocalFile('/home/ensuetina/blockstat.dict'),
                                                     nile.files.LocalFile('/home/ensuetina/libra.so')
                                                            ]
                                                     )
reqs.put('$job_root/reqs')
techs.put('$job_root/techs')

job.run()

#job = cluster.job()

#reqs = job.table('$job_root/reqs').groupby('uid',
#                                           'slot',
#                                           'ui'
#                                          ).aggregate(reqs=na.count()).groupby('slot',
#                                                                               'ui'
#                                                                              ).aggregate(reqs=na.sum('reqs'),
#                                                                                          uids=na.count()
#                                                                                         ).put('$job_root/aggr_reqs')

#techs = job.table('$job_root/techs').groupby('uid',
#                                             'slot',
#                                             'ui',
#                                             'p'
#                                            ).aggregate(techs=na.count()).groupby('slot',
#                                                                                  'ui',
#                                                                                  'p'
#                                                                                  ).aggregate(techs=na.sum('techs'),
#                                                                                              uids=na.count()
#                                                                                             ).put('$job_root/aggr_techs')
#
#job.run()


