#!/usr/bin/env python
# -*- coding: utf-8 -*-

import yt.wrapper as yt
import sys
import libra
import argparse

fage=259200

def HandleOption():
    parser = argparse.ArgumentParser()
    parser.add_argument("--server", dest="server", help="mapreduce server",default='hahn.yt.yandex.net:80', required=False)
    parser.add_argument("--bs", dest="blockstat", help="path to blockstat.dict",default='/home/itajn/serploader/blockstat.dict', required=False)
    return parser

def countsurplus(key, recs):
    uid = key
    try:
        session = libra.ParseSession(recs, './blockstat.dict')
    except:
        return
    for request in session:
        was=0
        win = {'fresh':0,'news':0}
        loss = {'fresh':0,'news':0}
        hasit = {'fresh':False,'news':False}
        shown = {'fresh':False,'news':False}
        clicked = {'fresh':0,'news':0}
        if request.IsA("TYandexWebRequest"):
            qt='web'
        elif request.IsA("TMobileYandexWebRequest") or request.IsA("TTouchYandexWebRequest"):
            qt='touch'
        else:
            continue
        query = request.Query
        if len(query)>1000:
            query=query[:1000]
        for block in request.GetMainBlocks():
            cl=0
            lc=0
            for click in block.GetClicks():
                cl+=1
                if int(click.DwellTime) >= 15:
                    lc+=1
            res=block.GetMainResult()
            if block.Position==0:
                if res.IsA("TBlenderWizardResult")or res.IsA("TWizardResult"):
                    if res.Name=='sport' and 'olympiad' in res.Path:
                        was=1
            if block.Position>4:
                continue
            this='none'
            if res.IsA("TBlenderWizardResult")or res.IsA("TWizardResult"):
                if res.Name=="news" and block.Position<>4:
                    win['news']+=lc
                    clicked['news']+=cl
                    hasit['news']=True
                    this='news'
                    shown['news']=1
            elif res.IsA("TWebResult"):
                m=res.Markers
                if ("FreshAge" in m) and (int(m['FreshAge'])<=fage) and block.Position<>4:
                    win['fresh']+=lc
                    clicked['fresh']+=cl
                    hasit['fresh']=True
                    this='fresh'
                    if shown['fresh']:
                        shown['fresh']+=1
                    else:
                        shown['fresh']=1
            if hasit['fresh'] and this<>'fresh':
                loss['fresh']+=lc
                hasit['fresh']=False
            elif hasit['news'] and this<>'news':
                loss['news']+=lc
                hasit['news']=False
        if was:
            yield {'type': qt,
                   'query': query,
                   'fresh': '\t'.join([str(shown['fresh']),str(clicked['fresh']),str(win['fresh']-loss['fresh'])]),
                   'news': '\t'.join([str(shown['news']),str(clicked['news']),str(win['news']-loss['news'])])
                   }

def main():
    args = HandleOption().parse_args()
    yt.update_config({'proxy': {'url': args.server}})
    dates=['2016-08-05','2016-08-06','2016-08-07','2016-08-08','2016-08-09','2016-08-10','2016-08-11','2016-08-12','2016-08-13','2016-08-14','2016-08-15','2016-08-16','2016-08-17','2016-08-18','2016-08-19','2016-08-20','2016-08-21','2016-08-22','2016-08-23']
    for day in dates:
        usersessions='//user_sessions/pub/search/daily/'+day+'/clean'
        output = '//home/freshness/staff/itajn/FR-2337/'+day+'_top3'
        if not yt.exists(output):
            yt.create_table(path=output, recursive=True)
        yt.run_reduce(countsurplus,
                      source_table=usersessions,
                      destination_table=output,
                      local_files = [args.blockstat],
                      reduce_by = 'key')
        result=yt.read_table(output)
        total={'touch':{'all':0,'fresh':0,'news':0},'web':{'all':0,'fresh':0,'news':0}}
        avg={'touch':{'all':0,'fresh':0,'news':0},'web':{'all':0,'fresh':0,'news':0}}
        clicked={'touch':{'all':0,'fresh':0,'news':0},'web':{'all':0,'fresh':0,'news':0}}
        surplus={'touch':{'all':0,'fresh':0,'news':0},'web':{'all':0,'fresh':0,'news':0}}
        for r in result:
            type=r["type"]
            total[type]["all"]+=1
            for tp in ["news","fresh"]:
                tmp=r[tp].split('\t')
                if tmp[0]<>'False':
                    total[type][tp]+=1
                    avg[type][tp]+=int(tmp[0])
                    clicked[type][tp]+=int(tmp[1])
                    surplus[type][tp]+=int(tmp[2])
        for i in total:
            for j in total[i]:
                print '\t'.join([day,i,j,str(total[i][j]),str(avg[i][j]),str(clicked[i][j]),str(surplus[i][j])])

if __name__ == '__main__':
    main()
