# -*- coding: UTF-8 -*-
from nile import (
    filters as nf,
    aggregators as na,
    extractors as ne
)
from nile.api.v1 import clusters
from nile.processing import Record
import nile
import uatraits, libra

#from qb2.api.v1 import extractors as se, filters as sf

from datetime import datetime

import urlparse
import cgi

def Reduce(groups):
    c = '24469'
    e = '24461'

    for key, records in groups:
        uid = key.key

        try:
            session = libra.ParseSession(records, 'blockstat.dict')
        except:
            continue

        for r in session:
            if r.IsA('TTouchYandexWebRequest'):
                ui = 'touch'
            else:
                continue

            ts =  str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]

            if r.HasTestID(c):
                slot = 'control ' + c
            elif r.HasTestID(e):
                slot = 'exp ' + e
            else:
                continue

            reqid = r.ReqID
            page = str(r.PageNo)
            ref = str(r.Referer)
            url = str(r.FullRequest)

            if '&reload=' in url:
                rel_us = url.split('&reload=')[1].split('&')[0]
            else:
                rel_us = '?'

            yield Record(uid=uid,slot=slot,ts=ts,reqid=reqid,page=page,ref=ref,url=url,rel_us=rel_us)


def map_sess(lines):
    for line in lines:
        uid = line.key
        sk = line.subkey
        line = line.value

        data = dict([d.split('=', 1) for d in line.split('\t') if '=' in d])
        ts =  str(datetime.fromtimestamp(float(sk)).isoformat()).split('T')[0]

        rtype = data.get('type')
        serv = data.get('service')
        if not rtype or not serv:
            continue

        if rtype == 'ACCESS':
            if serv != 'www.yandex':
                continue

            rreqid = data.get('reqid')
            rrequest = data.get('request')
            if not rrequest:
                continue

            qs = rrequest.split('?',1)
            if len(qs) < 2:
                continue

            qs = qs[1]
            dd = cgi.parse_qs(qs)

#            if not 'reload' in dd:
#                continue

            rel = str(dd.get('reload'))

            if '&reload=' in rrequest:
                rr = rrequest.split('&reload=')[1].split('&')[0]
            else:
                rr = '?'

            yield Record(uid=uid,ts=ts,reqid=rreqid,rel=rel,rr=rr,request=rrequest)



cluster = clusters.yt.Hahn().env(templates = dict(
                                                        job_root = 'home/search-research/ensuetina/LEGO_EXP/reloads'
                                                )
                                                )

dates = ['2016-05-07','2016-05-08','2016-05-09']
for date in dates:
    continue
#    job = cluster.job()

#    us = job.table('user_sessions/pub/search/daily/' + date + '/clean')
#    acc = job.table('statbox/access-log/' + date)

#    reloads = us.map(map_sess).put('$job_root/us_acc_reloads',append=True)
#    reqs = us.groupby('key').sort('subkey').reduce(Reduce,files=[nile.files.LocalFile('/home/ensuetina/blockstat.dict'),
#                                                                 nile.files.LocalFile('/home/ensuetina/lib/libra.so')]).put('$job_root/us_reqs', append=True)

#    acc_reloads = acc.qb2(log = 'access-log',
#                          fields = ['request',
#                                    se.dictitem('req_id', from_='parsed_log_line').rename('reqid'),
#                                    se.parameter('reload')
#                                   ],
#                          filters = [sf.defined('reqid','reload')]
#                        ).put('$job_root/acc_reloads', append=True)

#    job.run()


dd = ['7','8','9']
for d in dd:
    continue
#    job = cluster.job().env(default_mode='yamr_lines')

#    redir = job.table('statbox/redir-log/2016-05-0' + d)

#    t = redir.qb2(log='redir-log',
#                  fields = ['date','dtype','uid','log_line'],
#                  filters = [sf.defined('dtype','uid'),
#                             sf.equals('dtype','jserror')
#                            ]
#                ).put('$job_root/jserrors', append=True)

#    job.run()

job = cluster.job()

reqs = job.table('$job_root/us_reqs').groupby('slot','uid').aggregate(reqs=na.count()).project('slot','reqs',uid=ne.custom(lambda x: x.replace('y',''), 'uid'))
errors = job.table('$job_root/jserrors')#.project('date','dtype',ll= 'log_line' ,yuid = ne.custom(lambda x: 'y' + x, 'uid'))

reqs.join(errors, by='uid', type='inner').put('$job_root/joined_errors')

#reloads = job.table('$job_root/us_acc_reloads', ignore_missing=True).project('reqid','rel','rr',us_acc_request='request')
#reqs = job.table('$job_root/us_reqs', ignore_missing=True)

#acc_reloads = job.table('$job_root/acc_reloads', ignore_missing=True)

#j1 = reqs.join(reloads,by='reqid',type='inner').put('$job_root/reqs_with_reloads')
#j2 = reqs.join(acc_reloads,by='reqid',type='inner').put('$job_root/reqs_with_acc_reloads')

#j1.join(acc_reloads,by='reqid',type='inner').put('$job_root/reqs_with_reloads_total_join')
#t = job.table('$job_root/reqs').groupby('ts').aggregate(count=na.count()).put('$job_root/dates')

job.run()

