# -*- coding: UTF-8 -*-
from nile import (
    filters as nf,
    aggregators as na
)

from nile.api.v1 import clusters
from nile.processing import Record
import nile
import uatraits, libra

from datetime import datetime

import urlparse

def Reduce(groups,reqs,clicks):
    c = '21013'
    e = '21016'

    for key, records in groups:
        uid = key.key
        if uid[0] != 'y':
            continue

        try:
            session = libra.ParseSession(records, 'blockstat.dict')
        except:
            continue

        prev = -1
        isClprev = -1

        for r in session:
            if r.IsA('TYandexWebRequest'):
                ui = 'desktop'
            else:
                continue

            ts =  str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]
            curr = r.Timestamp

            if r.HasTestID(c):
                slot = 'control ' + c
            elif r.HasTestID(e):
                slot = 'exp ' + e
            else:
                continue

            if prev == -1:
                prev = curr

            if curr - prev < 120 and isClprev == 0:
                isReask = 1
            else:
                isReask = 0

            reqid = r.ReqID
            page = str(r.PageNo)
            ref = str(r.Referer)
            q = str(r.Query)

            all_cl = 0
            direct_cl = 0

            isKP = 0
            for bl in r.GetBSBlocks():
                p = bl.Path
                if 'snippet/kinopoisk/thumb' in p:
                    isKP = 1
                    break

            if isKP == 0:
                continue

            for bl in r.GetMainBlocks():
                m = bl.GetMainResult()
                if m.IsA('TDirectResult'):
                    for cl in bl.GetClicks():
                        direct_cl += 1
                else:
                    for cl in bl.GetClicks():
                        all_cl += 1
                        p = cl.ConvertedPath
                        dw = cl.DwellTimeOnService
                        rref = cl.Referer
                        clurl = str(cl.Url)

                        clicks(Record(uid=uid,slot=slot,ts=ts,reqid=reqid,page=page,ref=ref,p=p,dw=dw,cl_ref=rref,cl_url=clurl,q=q))

            reqs(Record(uid=uid,slot=slot,ts=ts,reqid=reqid,page=page,ref=ref,all_cl=all_cl,direct_cl=direct_cl,isReask=isReask,q=q))

            isClprev = all_cl
            prev = curr



cluster = clusters.yt.Hahn().env(templates=dict(
                                            job_root='home/search-research/ensuetina/BIG_THUMB/KP_only'
                                            )
                                            )

#dates = nile.api.path.DateRange('2016-02-03', '2016-03-03')
dates = ['20','21','22','23','24','25','26']
for date in dates:
#    continue
    job = cluster.job()

    us = job.table('user_sessions/pub/search/daily/2016-02-' + date + '/clean')

    reqs, clicks = us.groupby('key').sort('subkey').reduce(Reduce,files=[nile.files.LocalFile('/home/ensuetina/blockstat.dict'),nile.files.LocalFile('/home/ensuetina/lib/libra.so')])

    reqs.put('$job_root/reqs',append=True)
    clicks.put('$job_root/clicks', append=True)

    job.run()

#job = cluster.job()

#t = job.table('$job_root/reqs').groupby('slot','isReask').aggregate(count=na.count()).put('$job_root/reasks')

#job.run()

