# -*- coding: UTF-8 -*-
from nile import (
    filters as nf,
    aggregators as na,
    extractors as ne
)
from nile.api.v1 import clusters
from nile.processing import Record
import nile
import uatraits, libra

from datetime import datetime

from qb2.api.v1 import extractors as se, filters as sf

import urlparse
import cgi

def Reduce(groups,reqs,clicks):
    c = '23815'
    e = '23816'

    detector = uatraits.detector('/usr/share/uatraits/browser.xml')

    for key, records in groups:
        uid = key.key
        if uid[0] != 'y':
            continue

        try:
            uid_date = uid[-10:]
        except:
            continue

        if int(uid_date) > 1460494800:
            age = 'NEW'
        else:
            age = 'OLD'

        uid_ts = str(datetime.fromtimestamp(float(uid_date)).isoformat()).split('T')[0]

        try:
            session = libra.ParseSession(records, 'blockstat.dict')
        except:
            continue

        for r in session:
            if r.IsA('TTouchYandexWebRequest'):
                ui = 'touch'
            else:
                continue

            ts =  str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]

            if r.HasTestID(c):
                slot = 'control ' + c
            elif r.HasTestID(e):
                slot = 'exp ' + e
            else:
                continue

            reqid = r.ReqID
            ua = str(r.UserAgent)
            page = str(r.PageNo)
            ref = str(r.Referer)
            login = str(r.YandexLoginHash)
            if len(login) > 0:
                isLogin = 1
            else:
                isLogin = 0

            d = detector.detect(ua)

            BR = ''
            BV = ''
            OS = ''
            OSV = ''

            if 'OSName' in d:
                OS = d['OSName']
            if 'OSVersion' in d:
                OSV = d['OSVersion']
            if 'BrowserName' in d:
                BR = d['BrowserName']
            if 'BrowserVersion' in d:
                BV = d['BrowserVersion']

            all_cl = 0
            direct_cl = 0

            for bl in r.GetMainBlocks():
                m = bl.GetMainResult()
                if m.IsA('TDirectResult'):
                    for cl in bl.GetClicks():
                        direct_cl += 1
                else:
                    for cl in bl.GetClicks():
                        all_cl += 1
                        p = cl.ConvertedPath
                        dw = cl.DwellTimeOnService
                        rref = cl.Referer

                        clicks(Record(uid=uid,slot=slot,ts=ts,reqid=reqid,page=page,ref=ref,BR=BR,BV=BV,OS=OS,OSV=OSV,p=p,dw=dw,cl_ref=rref, isLogin=isLogin, age=age, uid_ts=uid_ts))

            reqs(Record(uid=uid,slot=slot,ts=ts,reqid=reqid,page=page,ref=ref,BR=BR,BV=BV,OS=OS,OSV=OSV,all_cl=all_cl,direct_cl=direct_cl,login=login, isLogin=isLogin, age=age, uid_ts=uid_ts))


def map_sess(lines):
    for line in lines:
        uid = line.key
        sk = line.subkey
        line = line.value

        data = dict([d.split('=', 1) for d in line.split('\t') if '=' in d])
        ts =  str(datetime.fromtimestamp(float(sk)).isoformat()).split('T')[0]

        rtype = data.get('type')
        serv = data.get('service')
        if not rtype or not serv:
            continue

        if rtype == 'ACCESS':
            if serv != 'www.yandex':
                continue

            rreqid = data.get('reqid')
            rrequest = data.get('request')
            if not rrequest:
                continue

            qs = rrequest.split('?',1)
            if len(qs) < 2:
                continue

            qs = qs[1]
            dd = cgi.parse_qs(qs)

            if not 'reload' in dd:
                continue

            if '&reload=' in rrequest:
                rr = rrequest.split('&reload=')[1].split('&')[0]
            else:
                rr = '?'

            yield Record(uid=uid,ts=ts,reqid=rreqid,rel=str(dd['reload']),rr=rr,request=rrequest)



cluster = clusters.Plato().env(templates = dict(
                                                        job_root = 'home/search-research/ensuetina/HTTPS_NEW_UIDS_2'
                                                )
                                                )

dates = ['2016-04-14','2016-04-15','2016-04-16','2016-04-17','2016-04-18','2016-04-19','2016-04-20'] #nile.api.path.DateRange('2016-03-15', '2016-03-23')
for date in dates:
#    continue
    job = cluster.job().env(default_mode='yamr_lines')

    redir = job.table('statbox/redir-log/' + date)

    t = redir.qb2(log='redir-log',
                  fields = ['date','dtype','uid','log_line'],
                  filters = [sf.defined('dtype','uid'),
                             sf.equals('dtype','jserror')
                            ]
                ).put('$job_root/jserrors', append=True)

#    us = job.table('userdata/user_sessions/' + date)

#    reloads = us.map(map_sess).put('$job_root/reloads',append=True)
#    reqs, clicks = us.groupby('key').sort('subkey').reduce(Reduce,files=[nile.files.LocalFile('/home/ensuetina/blockstat.dict'),nile.files.LocalFile('/home/ensuetina/lib/libra.so')])

#    reqs.put('$job_root/reqs',append=True)
#    clicks.put('$job_root/clicks', append=True)

    job.run()

job = cluster.job()

#reloads = job.table('$job_root/reloads').project('reqid','rel','request')
reqs = job.table('$job_root/reqs').groupby('uid','slot').aggregate(reqs=na.count()).project('slot','reqs',uid=ne.custom(lambda x: x.replace('y',''),'uid'))
errors = job.table('$job_root/jserrors')

reqs.join(errors,by='uid',type='inner').put('$job_root/joined_errors')
#t = job.table('$job_root/reqs').groupby('ts').aggregate(count=na.count()).put('$job_root/dates')

job.run()

