#-*-coding: utf8 -*-
from nile.api.v1 import (
    Record,
    clusters,
    aggregators as na,
    filters as nf,
    extractors as ne
)

import nile
from datetime import datetime
import uatraits, cgi, urlparse
import random
import libra

import pandas as pd

def Reduce(groups):
    for key,records in groups:
        uid = key.key

        try:
            session = libra.ParseSession(records, 'blockstat.dict')
        except:
            continue

        for r in session:
            if r.IsA('TYandexWebRequest'):
                ui = 'search'
            elif r.IsA('TYandexImagesRequest'):
                ui = 'images'
            elif r.IsA('TYandexVideoRequest'):
                ui = 'video'
            else:
                continue

            domain = r.ServiceDomRegion


            cookie = str(r.YandexPermanentCookie)
            if 'los.1' in cookie:
                yield Record(uid=uid,ui=ui,cookie='los.1',domain=domain)
            yield Record(uid=uid,ui=ui,cookie='all',domain=domain)



cluster = clusters.yt.Hahn(pool = 'search-research_ensuetina').env(templates=dict(job_root='home/search-research/ensuetina/LOS_COOKIE'))

date_format = '%Y-%m-%d'
dates = [i.strftime('%Y-%m-%d' ) for i in pd.date_range('2016-10-10', '2016-10-10')]
for date in dates:
#    continue
    job = cluster.job()

    us = job.table('user_sessions/pub/search/daily/' + date + '/clean')

    t = us.groupby('key').sort('subkey').reduce(Reduce,
                                                files=[nile.files.LocalFile('/home/ensuetina/blockstat.dict'),
                                                       nile.files.LocalFile('/home/ensuetina/libra.so')
                                                      ],
                                                memory_limit=2000
                                               ).put('$job_root/reqs')

    job.run()

job = cluster.job()

t = job.table('$job_root/reqs').groupby('uid','ui','cookie','domain').aggregate(freq=na.count()).groupby('ui','cookie','domain').aggregate(uids=na.count()).put('$job_root/uids_with_los1')

job.run()


