#-*-coding: utf8 -*-
from nile import (
    clusters,
    Record,
    filters as nf,
    aggregators as na
)
import nile
from qb2.api.v1 import (
    extractors as se,
    filters as sf
)

import uatraits
import libra
from datetime import datetime
import urlparse

def Reduce(groups):
    c = '21167'
    e1 = '21168'
    e2 = '21169'
    e3 = '21170'

    for key, records in groups:
        uid = key.key
        if uid[0] != 'y':
            continue

        try:
            session = libra.ParseSession(records, 'blockstat.dict')
        except:
            continue

        prev_t = -1
        nav_t = -1

        for r in session:
            if r.IsA('TYandexWebRequest'):
                ui = 'web request'
            elif r.IsA('TYandexImagesRequest'):
                ui = 'images'
            elif r.IsA('TYandexVideoRequest'):
                ui = 'video'
            elif r.IsA('TYandexMapsObjectRequest'):
                ui = 'maps'
            else:
                continue

            ts =  str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]
            prev_t = r.Timestamp
            req_t = r.Timestamp

            if r.HasTestID(c):
                slot = 'control ' + c
            elif r.HasTestID(e1):
                slot = 'exp 5 tabs 4px ' + e1
            elif r.HasTestID(e2):
                slot = 'exp 5 tabs 5px ' + e2
            elif r.HasTestID(e3):
                slot = 'exp 7 tabs 4px ' + e3
            else:
                continue

            all_cl = 0

            for cl in r.GetClicks():
                p = str(cl.ConvertedPath)
                pp = str(cl.Path)

                secs = cl.DelayAfterRequest
                dw = cl.DwellTimeOnService

                if dw > 15:
                    cl_length = 'long'
                else:
                    cl_length = 'short'

                if 'serp/navig/' in p or '471.208.' in pp:
                    nav_t = req_t + secs
                    delta_prev = nav_t - prev_t
                    delta_req = secs
                    yield Record(uid=uid,ui=ui,ts=ts,slot=slot,p=p,delta_prev=delta_prev,delta_req=delta_req,dw=dw)
                else:
                    prev_t = req_t + secs
                    delta = '-'


def map_lines_reqs(lines):
    for line in lines:
        uid = line.key
        l = line.value.split('\t')
        ts = l[0]
        slot = l[1]
        page = l[2]
        ref = l[3].split('?')[0].replace('https://','').replace('http://','')
        all_cl = int(l[4])

        yield Record(uid=uid,ts=ts,slot=slot,page=page,ref=ref,all_cl=all_cl)


cluster = clusters.YT(proxy="hahn.yt.yandex.net").env(
                                                        job_root=('home/search-research/ensuetina/TABBAR/new')
                                                     )


job = cluster.job()

us = job.table('user_sessions/pub/search/daily/2016-03-10/clean')

t = us.groupby('key').sort('subkey').reduce(Reduce,files=[nile.files.LocalFile('/home/ensuetina/blockstat.dict'),nile.files.LocalFile('/home/ensuetina/lib/libra.so')])

t.put('$job_root/clicks', append=True)

job.run()



