# -*- coding: UTF-8 -*-
from mapreducelib import MapReduce, Record
from collections import defaultdict
import sys
from datetime import datetime
import libra
import urllib, cgi
import random

def grepdecay(rec):
    line = rec.value
    if not 'type=REQUEST' in line:
        return
    elif not 'service=www.yandex' in line:
        return
    elif not 'dom-region=ru' in line:
        return
    else:
        tmp=line.split('\t')
        decay='-241'
        cpo='-241'
        ip='0.0'
        for i in range (len(tmp)):
            if tmp[i][:6]=="query=":
                query=tmp[i][6:]
            elif tmp[i][:13] == "search-props=":
                props=tmp[i].split(',')
                for j in range(len(props)):
                    if props[j][:33]=="Fresh.QuickShowsDoppLastDayDecay=":
                        decay=props[j][33:]
                    if props[j][:32]=="ApplyBlender.IntentWeight/FRESH=":
                        ip=props[j][32:]
            elif tmp[i][:9]=="reqrelev=":
                 props=tmp[i].split(';')
                 for j in range(len(props)):
                     if props[j][:4]=="Cpo=":
                         cpo=props[j][4:]
        if len(query) > 4096:
            query=query[:4096]
        yield Record(query,'1',decay+'\t'+cpo+'\t'+ip)

def reduce(key, recs):
    uid = key
    querycount={}
    querydecay={}
    querycpo={}
    queryip={}
    for r in recs:
        query=r.key
        if query=='':
            continue
        val=r.value.split('\t')
        if query in querycount:
            querycount[query]+=int(r.subkey)
            if querydecay[query] < float(val[0]):
                querydecay[query]=float(val[0])
        else:
            querycount[query]=int(r.subkey)
            querydecay[query]=float(val[0])
            querycpo[query]=int(val[1])
            queryip[query]=float(val[2])

    for q in querycount:
        yield Record(q,str(querycount[q]),str(querydecay[q])+'\t'+str(querycpo[q])+'\t'+str(queryip[q]))

def main():
    blockstat = '/home/itajn/serploader/blockstat.dict'
    MapReduce.useDefaults(
                            server   = 'sakura.search.yandex.net:8013',
                            username = 'freshness',
                            mrExec   = '/Berkanavt/bin/mapreduce-dev',
                            verbose  = True,
                            #testMode = True,
                         )
    dates=['20160225','20160215']
    #'1456392600'
    for day in dates:

        usersessions='user_sessions/'+day
        output = 'itajn/FR-2024/'+day
        MapReduce.runMap(grepdecay,
                         srcTable = usersessions,
                         dstTables = [output+'_map'],
                         files = [blockstat],
                         sortMode = True
                        )
        MapReduce.runReduce(reduce,
                            srcTable = output+'_map',
                            dstTables = [output],
                            sortMode = True
                            )


if __name__ == '__main__':
    main()
