# -*- coding: UTF-8 -*-
from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    clusters,
    Record
)
import nile
import uatraits, libra, urllib

from qb2.api.v1 import extractors as se, filters as sf

from datetime import datetime

import urlparse
import cgi

def Reduce(groups):
    c = '24469'
    e = '24461'

    for key, records in groups:
        uid = key.key

        try:
            session = libra.ParseSession(records, 'blockstat.dict')
        except:
            continue

        for r in session:
            if r.IsA('TTouchYandexWebRequest'):
                ui = 'touch'
            else:
                continue

            ts =  str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]

            if r.HasTestID(c):
                slot = 'control ' + c
            elif r.HasTestID(e):
                slot = 'exp ' + e
            else:
                continue

            reqid = r.ReqID
            page = str(r.PageNo)
            ref = str(r.Referer)
            url = str(r.FullRequest)

            if '&reload=' in url:
                rel_us = url.split('&reload=')[1].split('&')[0]
            else:
                rel_us = '?'

            yield Record(uid=uid,slot=slot,ts=ts,reqid=reqid,page=page,ref=ref,url=url,rel_us=rel_us)



cluster = clusters.yt.Hahn().env(templates = dict(
                                                        job_root = 'home/search-research/ensuetina/NO_MDA_TOUCH'
                                                )
                                                )


def del_urls(recs):
    detector = uatraits.detector('/usr/share/uatraits/browser.xml')

    for rec in recs:
        pv = rec.pvd
        slot = rec.slot
        domain = rec.domain

        url = str(pv.get('13'))
        message = str(pv.get('1304'))
        ua = str(pv.get('1042'))
        page = str(pv.get('143'))
        static_host = str(pv.get('2358'))
        has_global = str(pv.get('2359'))
        col = str(pv.get('2356'))
        line = str(pv.get('2355'))
        stack = str(pv.get('2360'))
        anonym = str(pv.get('<anonymous>'))
        tmp1 = str(pv.get(' like Gecko; Google Page Speed Insights) Chrome/27.0.1453 Mobile Safari/537.36'))
        tmp2 = str(pv.get('like Gecko) Mobile/13G36 TwonkyBeamBrowser/3.4.4-39 (iPhone iOS Ver'))

        d = detector.detect(ua)

        BR = str(d.get('BrowserName'))
        OSF = str(d.get('OSFamily'))

        yield Record(slot=slot,domain=domain,pv=pv,url=url, message=message,ua=ua,BR=BR,OSF=OSF,page=page,static_host=static_host,
                     has_global=has_global,col=col,line=line,stack=stack,anonym=anonym,tmp1=tmp1,tmp2=tmp2)

def get_unique_keys(groups):
    keys = []
    for key,recs in groups:
        for rec in recs:
            pv = rec.pvd
            for k in pv.keys():
                if not k in keys:
                    keys.append(k)

    yield Record(keys=keys)

def parse_line(recs):
    for rec in recs:
        line = rec.log_line
        data = dict([d.split('=', 1) for d in line.split('\t') if '=' in d])

        reqid = str(data.get('reqid'))

        yield Record(rec,reqid=reqid)

def sl(line):
    if '30167' in line:
        return 'control 30167'
    elif '30168' in line:
        return 'exp 30168'
    else:
        return '-'

def iserr(vv):
    if '1201.1030' in vv:
        return 'error'
    else:
        return '-'

dates = ['2016-09-03']
for date in dates:
#    continue
    job = cluster.job().env(default_mode='yamr_lines')

    redir = job.table('statbox/redir-log/' + date)

    t = redir.qb2(log='redir-log',
                  fields = ['date','dtype','uid','path','log_line','normal_path','vars','normal_vars',
                            se.custom('is_error',iserr,'vars'),
                            se.dictitem('reqid',from_='parsed_log_line'),
                            se.dictitem('slots',from_='parsed_log_line'),
                            se.custom('test',sl,'slots')],
                  filters = [sf.defined('uid','test'),
                             sf.contains('path','690.1201')
                             #sf.or_(sf.contains('path','690.2361'),
                             #       sf.contains('path','690.2354')
                             #      )
                             #sf.equals('dtype','jserror')
                            ]
                ).put('$job_root/more_errors')

    tt = t.filter(sf.not_(sf.equals('test','-'))).put('$job_root/1/err2_exp')

    tt.groupby('test','is_error').aggregate(freq=na.count()).put('$job_root/1/aggr')

#    uids = job.table('$job_root/reqs').project('slot','domain','country',uid=ne.custom(lambda x: x.replace('y',''),'uid')).groupby('uid',
#                                                                                                                                   'slot',
#                                                                                                                                   'domain',
#                                                                                                                                   'country'
#                                                                                                                                  ).aggregate(reqs=na.count()).put('$job_root/uids')

#    uids = job.table('$job_root/uids')

 #   t = job.table('$job_root/more_errors')
 #   t1 = job.table('$job_root/jserrors2')

#    t.join(uids,by='uid',type='inner').put('$job_root/1/exp_ajax')
  #  t1.join(uids,by='uid',type='inner').put('$job_root/exp_jserrors')

#    job.run()





