#!/usr/bin/env python
# -*- coding: utf-8 -*-

from mapreducelib import MapReduce, Record, TemporaryTable
import sys
import libra
import random
import argparse

fage=259200
testiddesk='23258'
testidtouch='23284'

def HandleOption():
    parser = argparse.ArgumentParser()
    parser.add_argument("--server", dest="server", help="mapreduce server",default='sakura.search.yandex.net:8013', required=False)
    parser.add_argument("--user", dest="user", help="mapreduce user",default='freshness', required=False)
    parser.add_argument("--mr", dest="mr", help="mapreduce binary",default='/Berkanavt/bin/mapreduce-dev', required=False)
    parser.add_argument("--bs", dest="blockstat", help="path to blockstat.dict",default='/home/itajn/serploader/blockstat.dict', required=False)
    return parser

def countsurplus(key, recs):
    uid = key

    try:
        session = libra.ParseSession(recs, './blockstat.dict') # it takes whole raw session and blockstat.dict and returns a list on TRequest objects
    except:
        return

    for request in session:
        if not (request.IsA("TYandexWebRequest") or request.IsA("TTouchYandexWebRequest")):
            continue
        region = request.ServiceDomRegion
        if region != 'ru':
            continue
        if request.IsA('TMiscRequestProperties'):
            relev=request.RelevValues
            if ('is_nav' in relev) and (relev['is_nav']=='1'):
                nav=True
            else:
                nav=False
        else:
            print >> sys.stderr, 'No relev'
            continue
        query = request.Query.lower()
        if 'серия' in query:
            tv=True
        else:
            tv=False
        if 'новости' in query:
            news_rub=True
        else:
            news_rub=False
        if request.HasTestID(testiddesk):
            qtype='Desktop'
        elif request.HasTestID(testidtouch):
            qtype='Touch'
        else:
            continue
        newswiz=False
        win = 0
        loss = 0
        freshpos = -241
        hasfresh = False
        for block in request.GetMainBlocks():
            c=0
            for click in block.GetClicks():
                if click.DwellTime >= 15:
                    c+=1
            res = block.GetMainResult()
            if res.IsA("TBlenderWizardResult")or res.IsA("TWizardResult"):
                if res.Name=="news":
                    newswiz=True
            if res.IsA("TWebResult"):
                m=res.Markers
                if ("FreshAge" in m) and (int(m['FreshAge'])<=fage):
                    win+=c
                    hasfresh=True
                    if freshpos<0:
                        freshpos=block.Position
                    continue
            if hasfresh:
                loss+=c
                hasfresh= False
        if freshpos>=0 and freshpos<=12:
            wordcnt=len(query.split(' '))
            yield Record(qtype,str(freshpos),'\t'.join([str(wordcnt),str(win),str(loss),str(nav),str(tv),str(news_rub),str(newswiz)]))

def main():
    args = HandleOption().parse_args()
    MapReduce.useDefaults(
                            server   = args.server,
                            username = args.user,
                            mrExec   = args.mr,
                            verbose  = True,
                         )
    days=["20160516","20160517","20160518","20160519","20160520","20160521","20160522","20160523","20160524","20160525","20160526","20160527","20160528","20160529","20160530"]
    #days=["1466508600"]
    for day in days:
        #usersessions='fast_logs/user_sessions/'+day
        usersessions='user_sessions/'+day
        out='itajn/FR-1999/'+day
        MapReduce.runReduce(countsurplus,
                            srcTable = usersessions,
                            dstTable = out,
                            files = [args.blockstat],
                            sortMode = True
                           )

if __name__ == '__main__':
    main()
