#!/usr/bin/env python
# -*- coding: utf-8 -*-

import yt.wrapper as yt
import sys
import libra
import argparse
import datetime
import json

fage=259200

def HandleOption():
    parser = argparse.ArgumentParser()
    parser.add_argument("--ts", dest="timestamp", help="fast logs timestamp", required=True)
    parser.add_argument("--server", dest="server", help="yt server",default='hahn.yt.yandex.net', required=False)
    parser.add_argument("--bs", dest="blockstat", help="path to blockstat.dict",default='/home/itajn/serploader/blockstat.dict', required=False)
    parser.add_argument("--mode", dest="mode", help="mode: hour/day", default='hour', required=False)
    parser.add_argument("--out", dest="output", help="file for the output queries", required=True)
    return parser

def countsurplus(key, recs):
    uid = key
    try:
        session = libra.ParseSession(recs, './blockstat.dict')
    except:
        return

    for request in session:
        win = {'fresh':0,'news':0,'video':0,'images':0}
        loss = {'fresh':0,'news':0,'video':0,'images':0}
        hasit = {'fresh':False,'news':False,'video':False,'images':False}
        was = {'fresh':False,'news':False,'video':False,'images':False}
        if request.IsA("TYandexWebRequest") or request.IsA("TMobileYandexWebRequest") or request.IsA("TTouchYandexWebRequest"):
            querytype="ok"
        else:
            continue

        region = request.ServiceDomRegion
        if (region == 'ru') or (region == None):
            region = 'ru'
        else:
            continue
        query = request.Query
        query = query.lower()
        if len(query) > 4095:
            query=query[:4095]
        for block in request.GetMainBlocks():
            c=0
            for click in block.GetClicks():
                if int(click.DwellTime) >= 15:
                    c+=1
            res = block.GetMainResult()
            this='none'
            if res.IsA("TBlenderWizardResult")or res.IsA("TWizardResult"):
                if res.Name=="news":
                    win['news']+=c
                    hasit['news']=True
                    was['news']=True
                    this='news'
                elif res.Name=="images":
                    win['images']+=c
                    hasit['images']=True
                    was['images']=True
                    this='images'
                elif res.Name=="video":
                    win['video']+=c
                    hasit['video']=True
                    was['video']=True
                    this='video'
            elif res.IsA("TWebResult"):
                m=res.Markers
                if ("FreshAge" in m) and (int(m['FreshAge'])<=fage):
                    win['fresh']+=c
                    hasit['fresh']=True
                    was['fresh']=True
                    this='fresh'
            if hasit['fresh'] and this<>'fresh':
                loss['fresh']+=c
                hasit['fresh']=False
            elif hasit['news'] and this<>'news':
                loss['news']+=c
                hasit['news']=False
            elif hasit['video'] and this<>'video':
                loss['video']+=c
                hasit['video']=False
            elif hasit['images'] and this<>'images':
                loss['images']+=c
                hasit['images']=False
        for tp in win:
            if was[tp]:
                yield {'query':query,
                       'type': tp,
                       'win':win[tp],
                       'loss':loss[tp]
                       }


def aggsurplus(key, recs):
    win = {'fresh':0,'news':0,'video':0,'images':0}
    loss = {'fresh':0,'news':0,'video':0,'images':0}
    hasit = {'fresh':False,'news':False,'video':False,'images':False}
    query = key["query"]
    for r in recs:
        for tp in win:
            if r["type"] == tp:
                win[tp]+=r["win"]
                loss[tp]+=r["loss"]
                hasit[tp]=True
    for tp in win:
        if tp=='fresh' or tp=='news':
            k=1
        else:
            k=2
        if hasit[tp]:
            yield {'query':query,
                   'type': tp,
                   'surplus':win[tp]-loss[tp]*k
                   }

def main():
    args = HandleOption().parse_args()
    yt.update_config({'proxy': {'url': args.server}})
    timestamp = args.timestamp[:10]
    if args.mode == 'day':
        timestamp = datetime.datetime.fromtimestamp(int(timestamp)).strftime('%Y-%m-%d')
        usersessions='//user_sessions/pub/search/daily/'+timestamp+'/clean'
    else:
        usersessions='//user_sessions/pub/search/fast/'+timestamp+'/clean'
    with yt.TempTable(prefix="surplus_tmp") as tmptable:
        yt.run_reduce(countsurplus,
                      source_table=usersessions,
                      destination_table=tmptable,
                      local_files = [args.blockstat],
                      reduce_by = 'key')
        yt.run_sort(source_table=tmptable,
                    destination_table=tmptable,
                    sort_by='query')
        yt.run_reduce(aggsurplus,
                      source_table=tmptable,
                      destination_table=tmptable,
                      reduce_by = 'query')
        result=yt.read_table(tmptable)
        with open(args.output, 'w') as file:
            for r in result:
                outp={'query':r["query"],'type':r["type"],'surplus':r["surplus"]}
                file.write(json.dumps(outp, ensure_ascii=False)+'\n')

if __name__ == '__main__':
    main()
