from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    clusters,
    Record
)
from datetime import datetime, timedelta
from urlparse import parse_qs

cluster = clusters.yt.Hahn().env(templates=dict(job_root='home/search-research/ensuetina/STOLEN_COOKIES'))

statuses = (1, -1)
lifetime = 10


def extract_useful_values(records):
    for rec in records:
        ts = rec.iso_eventtime.split(' ')[0]
        data = parse_qs(rec.get('request').split('xml?')[1])
        bros = data.get('bro')[0].replace('Mail.ru', 'Mailru').split('.')
        cookies = data.get('ck')[0].split('.')
        yandexuid = str(data.get('yandexuid'))
        stolen = zip(bros, cookies)
        for pair in stolen:
            name, uid = pair
            if uid:
                yield Record(yandexuid=yandexuid,uid=uid,BR=name,ts=ts)


dates = ['2016-07-27','2016-07-28','2016-07-29','2016-07-30']
for date in dates:
    job = cluster.job()

    t = job.table('statbox/export-access-log/' + date)

    result = t.filter(nf.custom(lambda x: ('stat=dayuse' in x or 'stat=background' in x) and 'yasoft=yabro' in x, 'request'),
                      nf.custom(lambda x: 'ck=' in x and 'bro=' in x, 'request')
                     ).put('$job_root/filtered_log',append=True)

#result = job.table('$job_root/filtered_log')

    data = result.map(extract_useful_values).put('$job_root/stolen_cookies',append=True)

#    data.groupby('uid','yandexuid').aggregate(lines=na.count()).sort('lines').put('$job_root/stolen_cookies_aggr', append=True)

    job.run()

job = cluster.job()

data = job.table('$job_root/stolen_cookies').groupby('yandexuid','uid').aggregate(lines=na.count()).sort('lines').put('$job_root/stolen_cookies_aggr')

job.run()

