# -*- coding: UTF-8 -*-
from nile import (
    filters as nf,
    aggregators as na,
    extractors as ne
)
from nile.api.v1 import clusters
from nile.processing import Record
import nile
import uatraits, libra

from qb2.api.v1 import extractors as se, filters as sf

from datetime import datetime

import urlparse, urllib
import cgi

def Reduce(groups):
    c = '16960'
    e = '16961'

    for key, records in groups:
        uid = key.key
        if uid[0] != 'y':
            continue

        try:
            session = libra.ParseSession(records, 'blockstat.dict')
        except:
            continue

        for r in session:
            if r.IsA('TYandexWebRequest'):
                ui = 'desktop'
            else:
                continue

            ts =  str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]

            if r.HasTestID(c):
                slot = 'control ' + c
            elif r.HasTestID(e):
                slot = 'exp ' + e
            else:
                continue

            reqid = r.ReqID
            url = str(r.FullRequest)

            qs = url.split('?',1)
            if len(qs) < 2:
                continue

            qs = qs[1]
            data = cgi.parse_qs(qs)

            if 'reload' in data:
                rel = data['reload']
            else:
                rel = '-'


            yield Record(uid=uid,slot=slot,ts=ts,reqid=reqid,rel=rel,url=url)


def map_sess(lines):
    for line in lines:
        uid = line.key
        sk = line.subkey
        line = line.value

        data = dict([d.split('=', 1) for d in line.split('\t') if '=' in d])
        ts =  str(datetime.fromtimestamp(float(sk)).isoformat()).split('T')[0]

        rtype = data.get('type')
        serv = data.get('service')
        if not rtype or not serv:
            continue

        if rtype == 'ACCESS':
            if serv != 'www.yandex':
                continue

            rreqid = data.get('reqid')
            rrequest = data.get('request')
            if not rrequest:
                continue

            qs = rrequest.split('?',1)
            if len(qs) < 2:
                continue

            qs = qs[1]
            dd = cgi.parse_qs(qs)

            if not 'reload' in dd:
                continue

            yield Record(uid=uid,ts=ts,reqid=rreqid,reloadd=str(dd['reload']),request=rrequest)

def map_errors(lines):
    for line in lines:
        value = urllib.unquote(line.value)
        uid = 'y' + value.split('@@')[-1]

        data = dict([d.split('=', 1) for d in value.split('@@') if '=' in d])

        if '&reqid=' in value:
            reqid = value.split('&reqid=')[1].split('&')[0]
        else:
            reqid = '?'

        dtype = str(data.get('dtype'))

        yield Record(uid=uid,dtype=dtype,reqid=reqid,line=value)




cluster = clusters.yt.Hahn().env(templates = dict(
                                                        job_root = 'home/search-research/ensuetina/HTTPS_NEW_UIDS_2/desktop_check'
                                                )
                                                )

#dates = ['2016-04-14','2016-04-15','2016-04-16','2016-04-17','2016-04-18','2016-04-19','2016-04-20']
dates = ['2015-10-05']
for date in dates:
    continue
    job = cluster.job()

#    us = job.table('user_sessions/pub/search/daily/' + date + '/clean')

#    reloads = us.map(map_sess).put('$job_root/reloads',append=True)
#    reqs = us.groupby('key').sort('subkey').reduce(Reduce,files=[nile.files.LocalFile('/home/ensuetina/blockstat.dict'),nile.files.LocalFile('/home/ensuetina/lib/libra.so')]).put('$job_root/us_reqs')
#    clicks.put('$job_root/clicks', append=True)

    t = job.table('$job_root/us_reqs').project('uid','slot','ts','reqid','url',rld=ne.custom(lambda x: str(x), 'rel')).put('$job_root/us_project')

    job.run()


job = cluster.job()

t = job.table('$job_root/jserrors').map(map_errors).put('$job_root/mapped_errors')
reqs = job.table('$job_root/us_project').groupby('uid','slot').aggregate(reqs=na.count()).put('$job_root/us_uids')

reqs.join(t,by='uid',type='inner').put('$job_root/joined_errors')

job.run()


#job = cluster.job()

#reloads = job.table('$job_root/reloads').project('reqid','rel','request')
#reqs = job.table('$job_root/reqs')

#reqs.join(reloads,by='reqid',type='inner').put('$job_root/reqs_with_reloads')
#t = job.table('$job_root/reqs').groupby('ts').aggregate(count=na.count()).put('$job_root/dates')

#job.run()

