# -*- coding: UTF-8 -*-
from nile import (
    filters as nf,
    aggregators as na,
    extractors as ne
)
from nile.api.v1 import clusters
from nile.processing import Record
import nile
import uatraits, libra

from qb2.api.v1 import extractors as se, filters as sf

from datetime import datetime

import urlparse
import cgi

def Reduce(groups):

    words = ['игры для девочек','игры для мальчиков','игры онлайн']

    for key, records in groups:
        uid = key.key

        try:
            session = libra.ParseSession(records, 'blockstat.dict')
        except:
            continue

        for r in session:
            if r.IsA('TYandexWebRequest'):
                ui = 'desktop'
            else:
                continue

            ts =  str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]

            q = str(r.Query)
            lr = str(r.UserRegion)

            if lr != '213':
                continue

            if not q in words:
                continue

            yield Record(uid=uid,q=q,lr=lr)


def map_passp(lines):
    for line in lines:
        l = line.value
        data = dict([d.split('=', 1) for d in l.split('\t') if '=' in d])

        if not 'login' in data or not 'uid' in data:
            continue

        yield Record(uid=data['uid'],login=data['login'])

cluster = clusters.yt.Hahn().env(templates = dict(
                                                        job_root = 'home/search-research/ensuetina/GET_LOGINS'
                                                )
                                                )

dates = ['2016-05-13']
for date in dates:
    job = cluster.job()

#    us = job.table('user_sessions/pub/search/daily/' + date + '/clean')
#    acc = job.table('statbox/access-log/' + date)

#    reqs = us.groupby('key').sort('subkey').reduce(Reduce,files=[nile.files.LocalFile('/home/ensuetina/blockstat.dict'),
#                                                                 nile.files.LocalFile('/home/ensuetina/lib/libra.so')]
#                                                ).groupby('uid','q','lr').aggregate(freq=na.count()).put('$job_root/reqs')

#    hits = acc.qb2(
#        log='access-log',
#        fields=[
#            'uid', 'puid'
#        ],
#        filters=[
#            sf.projects('Yandex_RU'),
#            sf.defined('uid', 'puid'),
#        ]
#    ).groupby('uid','puid').aggregate(lines=na.count()).project('uid','puid').put('$job_root/puids')

    reqs = job.table('$job_root/reqs')
    hits = job.table('$job_root/puids').project('puid',uid = ne.custom(lambda x: 'y' + x,'uid'))

    reqs_puids = reqs.join(hits, by='uid', type='inner').put('$job_root/reqs_puids')

    passport_accounts = job.table('statbox/sql_passport_accounts').map(map_passp).put('$job_root/passports')#.qb2(
#        log='generic-tskv-log',
#        fields=[se.log_fields('uid', 'login')],
#        filters=[sf.defined('uid', 'login')],
#    ).put('$job_root/passports')

    reqs_puids.join(passport_accounts, by_left='puid', by_right='uid', type='inner').put('$job_root/users_with_login')

    job.run()

