#-*-coding: utf8 -*-
from nile.api.v1 import (
    Record,
    aggregators as na,
    filters as nf,
    extractors as ne,
    clusters
)
import nile
from datetime import datetime
import uatraits, cgi, urlparse
import random
import libra

def Reduce(groups,hosts,reqs):
    for key,records in groups:
        uid = key.key

#        ids = ['ruw4161104','ruw211405','ruw2194852','ruw5827752','ruw4682745','ruw5270095','ruw1943831','ruw2355820']

        try:
            session = libra.ParseSession(records, 'blockstat.dict')
        except:
            continue

        for r in session:
            if not r.IsA('TTouchYandexWebRequest'):
                continue

            q = str(r.Query)
            ts =  str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]
            spv = r.SearchPropsValues

            isUpper = 0
            oo_type = ''
            oo_id = ''
            for bl in r.GetBSBlocks():
                p = bl.Path
                if oo_type != '':
                    break

                if 'snippet/entity_search/object-badge' in p:
                    isUpper = 1
                    vv = bl.GetVars()
                    for v in vv:
                        if '-type' in v:
                            oo_type = v[1]
                        if '-id' in v:
                            oo_id = v[1]
                    #break

            if isUpper == 0:
                continue

            if 'Film' in oo_type:
                about = 'Film'
            elif 'Hum' in oo_type:
                about = 'Human'
            else:
                about = 'not relevant'

            reqs(Record(q=q,oo_type=oo_type,about=about))

            continue

            ruw = '-'
            es = '-'
            isF = 0
            isAccept = 0

            if 'UPPER.EntitySearch.Log' in spv:
                es = spv['UPPER.EntitySearch.Log']
                if 'Film' in es:
                    isF = 1
                    f = 0
                    l = es.split('|')
                    for el in l:
                        if f == 1:
                            ruw = el
                            break
                        if 'Film' in el:
                            f = 1
                else:
                    isF = 0

            if 'UPPER.EntitySearch.Accept' in spv:
                acc = spv['UPPER.EntitySearch.Accept']
                if str(acc) == '1':
                    isAccept = 1
                else:
                    isAccept = 0

#            if isF == 1 and isAccept == 1:
            if oo_id in ids:
                for cl in r.GetClicks():
                    url = str(cl.Url)
                    p = str(cl.ConvertedPath)
                    try:
                        host = urlparse.urlparse(url).netloc
                    except:
                        host = url

                    hosts(Record(uid=uid,q=q,ruw=ruw,es=es,ts=ts,p=p,host=host,oo_type=oo_type,oo_id=oo_id))

            reqs(Record(uid=uid,q=q,ruw=ruw,es=es,ts=ts,oo_type=oo_type,oo_id=oo_id))


cluster = clusters.yt.Hahn(pool = 'search-research').env(templates=dict(
                                                job_root='home/search-research/ensuetina/KP_QUERIES/touch'
                                            ))


dates = ['2016-06-25']
for date in dates:
#    continue
    job = cluster.job()

    us = job.table('user_sessions/pub/search/daily/' + date + '/clean')

    hosts,reqs = us.groupby('key').sort('subkey').reduce(Reduce,files=[nile.files.LocalFile('/home/ensuetina/blockstat.dict'),nile.files.LocalFile('/home/ensuetina/libra.so')],memory_limit=2000)

    hosts.put('$job_root/clicks')
    reqs = reqs.put('$job_root/reqs')

    job.run()

job = cluster.job()

reqs = job.table('$job_root/reqs')
aggr = reqs.groupby('q','about').aggregate(freq=na.count()).put('$job_root/aggr_queries')

aggr.groupby('about').top(1000,by='freq',mode='max').put('$job_root/top_queries')

reqs.groupby('oo_type','about').aggregate(freq=na.count()).put('$job_root/check_types')

job.run()



