#-*-coding: utf8 -*-

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    clusters,
    Record
)

from qb2.api.v1 import extractors as se, filters as sf

import nile
import libra
from datetime import datetime
import uatraits
import urllib, re, random, urlparse

def get_queries(groups):
    detector = uatraits.detector('/usr/share/uatraits/browser.xml')

    for key, records in groups:
        uid = key.key

        try:
            session = libra.ParseSession(records, 'blockstat.dict')
        except:
            continue

        prev = -1

        for r in session:
            if r.IsA('TTouchYandexWebRequest'):
                ui = 'touch'
            elif r.IsA('TPadYandexWebRequest'):
                ui = 'pad'
            elif r.IsA('TMobileAppYandexWebRequest'):
                ui = 'app'
            elif r.IsA('TYandexWebRequest'):
                ui = 'desktop'
            else:
                continue

            date = str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]
            q = str(r.Query)
            ua = str(r.UserAgent)

            curr = r.Timestamp

            d = detector.detect(ua)

            BR = str(d.get('BrowserName'))
            OSF = str(d.get('OSFamily'))

            ref = str(r.Referer)
            url = str(r.FullRequest)

            if prev == -1:
                prev = curr
                continue

            if ('yandex.ru/search' in ref or 'yandex.ru/touchsearch' in ref or 'yandex.ru/searchapp' in ref or 'callback' in url) and curr-prev>1800:
                yield Record(uid=uid,ui=ui,BR=BR,OSF=OSF,ref=ref.split('?')[0],url=url.split('?')[0],curr=curr,prev=prev)

            prev = curr

username = 'ensuetina'

cluster = clusters.yt.Hahn(pool='search-research_ensuetina').env(templates = dict(
                                job_root='home/search-research/' + username + '/RESEARCH-2025_returns_to_tabs'
                                                            )
                                                            )

full_path_to_libra = '/home/ensuetina/libra.so'
full_path_to_blockstat = '/home/ensuetina/blockstat.dict'

job = cluster.job()

log = job.table('user_sessions/pub/search/daily/2016-08-19/clean')

reqs = log.groupby('key').sort('subkey').reduce(get_queries,
                                                files=[nile.files.LocalFile(full_path_to_blockstat),
                                                       nile.files.LocalFile(full_path_to_libra)
                                                      ],
                                                memory_limit=2000
                                               ).put('$job_root/result')

job.run()

