#!/usr/bin/env python
# -*- coding: utf-8 -*-


import sys
import libra
import yt.wrapper as yt
import argparse

fage=259200
testid='33169'

def HandleOption():
    parser = argparse.ArgumentParser()
    parser.add_argument("--server", dest="server", help="yt server",default='hahn.yt.yandex.net', required=False)
    parser.add_argument("--bs", dest="blockstat", help="path to blockstat.dict",default='/home/itajn/serploader/blockstat.dict', required=False)
    return parser


def findurl(key, recs):
    uid = key

    try:
        session = libra.ParseSession(recs, './blockstat.dict')
    except:
        return

    for request in session:
        if request.IsA("TMobileYandexWebRequest") or request.IsA("TTouchYandexWebRequest"):
            type="touch"
        else:
            continue
        region = request.ServiceDomRegion
        if region != 'ru':
            continue
        if request.IsA('TMiscRequestProperties') and request.HasTestID(testid):
            sprops = request.SearchPropsValues
            if not (('QUICK.Lua.TouchVideoBan_merge_worked' in sprops) and (sprops['QUICK.Lua.TouchVideoBan_merge_worked'] == '1')):
                continue
        else:
            continue
        query = request.Query
        if not 'серия' in query.lower():
            continue
        for block in request.GetMainBlocks():
            res = block.GetMainResult()
            if res.IsA("TWebResult"):
                m = res.Markers
                if ("FreshAge" in m) and (int(m['FreshAge'])<=fage):
                     yield { 'url' : res.Url, 'query' : query}

def main():
    args = HandleOption().parse_args()
    yt.update_config({'proxy': {'url': args.server}})
    dates=['2016-10-13', '2016-10-14', '2016-10-15', '2016-10-16', '2016-10-17', '2016-10-18', '2016-10-19']
    for day in dates:
        usersessions = '//user_sessions/pub/search/daily/' + day + '/clean'
        output = '//home/freshness/staff/itajn/FR-2153/' + day
        if not yt.exists(output):
            yt.create_table(path = output, recursive = True)
        yt.run_reduce(findurl,
                      source_table = usersessions,
                      destination_table = output,
                      local_files = [args.blockstat],
                      reduce_by = 'key',
                      spec = {'data_size_per_job' : 16000000000}#~16GB
                      )

if __name__ == '__main__':
    main()
