# -*- coding: UTF-8 -*-

from mapreducelib import MapReduce, Record
import libra
import urllib

fltRnk = libra.TRequestFilter()
fltRnk.Add("search_props_filter", "WEB.RankingModel==tr||WEB.RankingModel==tr.long||WEB.RankingModel==tr.regional")
fltRnk.Init()

def process(key, recs):
    try:
        session = libra.ParseSession(recs, './blockstat.dict') # it takes whole raw session and blockstat.dict and returns a list on TRequest objects
    except:
        return

    for request in session:
        # we check if this request is to yandex desktop search (not video search etc)
        #you can also check .IsA('TMobileYandexWebRequest') - for mobiles  and .IsA('TTouchYandexWebRequest') for smartphones and tablets or you can check them all together to filter all reqeusts to search
        if not request.IsA("TYandexWebRequest") or not fltRnk.Filter(request) or request.PageNo != 0 or request.ServiceDomRegion != 'tr':
            continue

        query = request.Query
        # reqid = request.ReqId
        # full_request = request.FullRequest
        # referer = request.Referer
        # user_ip = request.UserIP

        # ts = request.Timestamp
        # reqDay = str(datetime.fromtimestamp(ts).isoformat()).split('T')[0]
        # reqTime = str(datetime.fromtimestamp(ts).isoformat()).split('T')[1]

        # collecting serp results. GetMainBlocks() method is used when you need to get logic of serp results (order, links and so on)
        # GetMainBlocks() method returns a list of Tresult objects& They also have fileds and methods that you can use
        for block in request.GetMainBlocks():
            main_result = block.GetMainResult() # we now can see if this TResult is organic, direct or wizard result
            if main_result.IsA('TWebResult'): # if it is an organic result - we will check if it is faceebook result or not
                url = str(main_result.Url)
                pos = str(main_result.Position)
                shows = 1
                key_structure = query + '\t' + str(pos) + '\t' + url
                if len(key_structure) < 300:
                    yield Record(key_structure, '', str(shows) + '\t' + str(int(len(main_result.GetClicks()) > 0)))

def Count_CTR(key, recs):
    total_shows = 0
    total_clicks = 0
    for rec in recs:
        shows = rec.value.split('\t')[0]
        clicks = rec.value.split('\t')[1]
        total_shows += int(shows)
        total_clicks += int(clicks)
    CTR = str(total_clicks/float(total_shows))
    yield(key, '', CTR)

def main():

    MapReduce.useDefaults(
                            server   = 'cedar00.search.yandex.net:8013',
                            username = 'ranking',
                            mrExec   = '/Berkanavt/bin/mapreduce-dev',
                            verbose  = True,
                            #testMode = True,
                         )
    srcTable = 'user_sessions/20150401'
    dstTable = 'cansucullu/ANSEARCH-397/mapped'
    dstTable2 = 'cansucullu/ANSEARCH-397/reduced'

    MapReduce.runReduce(process,
                        srcTable = srcTable,
                        dstTable = dstTable,
                        files = ['/home/chikachoff/data/blockstat.dict'], # we pass blockstat.dict
                        appendMode = True,
                        sortMode = True
                        )
    MapReduce.runReduce(Count_CTR,
                        srcTable = dstTable,
                        dstTable = dstTable2,
                        files = ['/home/chikachoff/data/blockstat.dict'], # we pass blockstat.dict
                        appendMode = True,
                        sortMode = True
                        )

if __name__ == '__main__':
    main()
