#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import division
import yt.wrapper as yt
import sys
import libra
import argparse

def HandleOption():
    parser = argparse.ArgumentParser()
    parser.add_argument("--server", dest = "server", help = "yt server",default = 'hahn.yt.yandex.net', required = False)
    parser.add_argument("--bs", dest = "blockstat", help = "path to blockstat.dict",default = '/home/itajn/serploader/blockstat.dict', required = False)
    return parser

def countsurplus(key, recs):
    uid = key
    try:
        session = libra.ParseSession(recs, './blockstat.dict')
    except:
        return
    for request in session:
        if request.IsA("TYandexWebRequest"):
            type = 'web'
        elif request.IsA("TMobileYandexWebRequest") or request.IsA("TTouchYandexWebRequest")or request.IsA("TPadYandexWebRequest"):
            type = 'touch'
        elif request.IsA("TMobileAppYandexWebRequest"):
            type = 'app'
        else:
            continue
        sprops = request.SearchPropsValues
        if 'UPPER.ApplyBlender.factors' in sprops:
            if 'inf' in sprops['UPPER.ApplyBlender.factors'].split(' '):
                yield {'type': type, 'inf' : 'True'}
            else:
                yield {'type': type, 'inf' : 'False'}


def main():
    args = HandleOption().parse_args()
    yt.update_config({'proxy': {'url': args.server}})

    dates=['2016-10-12', '2016-10-13', '2016-10-14', '2016-10-15', '2016-10-16', '2016-10-17', '2016-10-18', '2016-10-19']

    for day in dates:
        usersessions= '//user_sessions/pub/search/daily/' + day + '/clean'
        output = '//home/freshness/staff/itajn/FR-2435/' + day

        if not yt.exists(output):
            yt.create_table(path = output, recursive=True)
        yt.run_reduce(countsurplus,
                      source_table = usersessions,
                      destination_table = output,
                      local_files = [args.blockstat],
                      reduce_by = 'key',
                      spec={'data_size_per_job': 16000000000}#~16GB
                      )
        count = {'web': 0, 'touch': 0, 'app': 0}
        inf = {'web': 0, 'touch': 0, 'app': 0}
        result = yt.read_table(output)
        for r in result:
            count[r['type']] += 1
            if r['inf'] == 'True':
                inf[r['type']] += 1
        print day, count['web'], count['touch'], count['app'], inf['web'], inf['touch'], inf['app']

if __name__ ==  '__main__':
    main()
