#!/usr/bin/env python
# -*- coding: utf-8 -*-

import yt.wrapper as yt
import sys
import libra
import argparse

fage=259200

def HandleOption():
    parser = argparse.ArgumentParser()
    parser.add_argument("--server", dest="server", help="mapreduce server",default='hahn.yt.yandex.net:80', required=False)
    parser.add_argument("--bs", dest="blockstat", help="path to blockstat.dict",default='/home/itajn/serploader/blockstat.dict', required=False)
    return parser

def countsurplus(key, recs):
    uid = key
    try:
        session = libra.ParseSession(recs, './blockstat.dict')
    except:
        return
    for request in session:
        if not (request.IsA("TYandexWebRequest") or request.IsA("TMobileYandexWebRequest") or request.IsA("TTouchYandexWebRequest")):
            continue
        region = request.ServiceDomRegion
        if not region == 'ua':
            continue

        query = request.Query.lower()
        if ('хоккей' in query) or ('хокей' in query) or ('хоккею' in query) or ('хокею' in query):
            yield {'query' : query}


def main():
    args = HandleOption().parse_args()
    yt.update_config({'proxy': {'url': args.server}})
    dates=['2016-04-17', '2016-04-18', '2016-04-19', '2016-04-20', '2016-04-21', '2016-04-22', '2016-04-23', '2016-04-24', '2016-05-06', '2016-05-07', '2016-05-08', '2016-05-09', '2016-05-10', '2016-05-11', '2016-05-12', '2016-05-13', '2016-05-14', '2016-05-15', '2016-05-16', '2016-05-17', '2016-05-18', '2016-05-19', '2016-05-20', '2016-05-21', '2016-05-22', '2016-05-23']
    for day in dates:
        usersessions='//user_sessions/pub/search/daily/'+day+'/clean'
        output = '//home/freshness/staff/itajn/FU-3461/'+day
        if not yt.exists(output):
            yt.create_table(path=output, recursive=True)
        yt.run_reduce(countsurplus,
                      source_table=usersessions,
                      destination_table=output,
                      local_files = [args.blockstat],
                      reduce_by = 'key',
                      spec={'data_size_per_job': 16000000000}#~16GB
                      )
        result=yt.read_table(output)
        total=0
        for r in result:
            total+=1
        print day, total

if __name__ == '__main__':
    main()
