# -*- coding: UTF-8 -*-

from mapreducelib import MapReduce, Record
from collections import defaultdict
import sys
from datetime import datetime
import libra
import urllib
import random

#***************#
# libra is a lib for parsing user sessions (from user_sessions log) in a convenient way
# documentation is here: https://wiki.yandex-team.ru/JandeksPoisk/KachestvoPoiska/UserData/LibRA/.ru-en
# basically you need to write reduce-function, you pass there your sessions from u_s log and blockstat.dict
# blockstat.dict is needed to convert counters from dot-separeted-numeric to readable words (for example 80.22.82 --> web/item/title)
# libra parses your raw session and gives you a list of useful structures they have type TRequest.
# TRequest have fields and methods that you can access.
# here is the example of script using libra. It is pretty intuitive. It goes through session and gets serp results, and yield position of facebook result (if there is one) and all clicks for this position.
# also it yields position of images wizard and clicks on this position
# than it checks if link to  video is presented in left column of serp (its counter - serp/navig/video) and if so, we will store the query.
#***************#

def Reduce(key, recs):
    uid = key

    try:
        session = libra.ParseSession(recs, './blockstat.dict') # it takes whole raw session and blockstat.dict and returns a list on TRequest objects
    except:
        return

    for request in session:
        # we check if this request is to yandex desktop search (not video search etc)
        # you can also check .IsA('TMobileYandexWebRequest') - for mobiles  and .IsA('TTouchYandexWebRequest') for smartphones and tablets or you can check them all together to filter all reqeusts to search
        if not request.IsA("TYandexWebRequest"):
            continue

        region = request.ServiceDomRegion
        if region != 'tr': # we only want Turkey results
            continue

        searchprops = request.SearchProps
        if not "rearr=fon=mn50815" in searchprops:
            continue


        # example of what fields does TRequest object have:

        #user_reg = str(request.UserRegion) # numeric regions, you can filter Istambul for example

        query = request.Query
        #reqid = request.ReqId
        #full_request = request.FullRequest
        #referer = request.Referer
        #user_ip = request.UserIP

        #ts = request.Timestamp
        #reqDay = str(datetime.fromtimestamp(ts).isoformat()).split('T')[0]
        #reqTime = str(datetime.fromtimestamp(ts).isoformat()).split('T')[1]

        # collecting serp results. GetMainBlocks() method is used when you need to get logic of serp results (order, links and so on)
        # GetMainBlocks() method returns a list of Tresult objects& They also have fileds and methods that you can use
        for block in request.GetMainBlocks():
            result_type = ''
            m = block.GetMainResult() # we now can see if this TResult is organic, direct or wizard result
            if not m.IsA('TWebResult'): # if it is an organic result - we will check if it is facebook result or not
                continue

            url = str(m.Url)
            combo = query+"@"+url
            # if we found result we needed, we colect its show in one table and its clicks to another table:
            yield Record(combo,'',m.Position, tableIndex = 0)

            for click in block.GetClicks(): # GetClicks() method returns a list of all TClick objects
                # example of what fields does TClick object have:
                path = click.ConvertedPath
                dwellTime = str(click.DwellTime)
                delay = str(click.DelayAfterRequest)
                url = str(click.Url)
                combo = query+"@"+url

                # we will yield only click path
                yield Record(combo,'',m.Position, tableIndex = 1)



def main():

    MapReduce.useDefaults(server='cedar00.search.yandex.net:8013', verbose=True)

    dstTable = 'cansucullu/ANSEARCH-389/EXPERIMENT-4066-map'

    days = ['17','18']

    for day in days:
        srcTable = 'user_sessions/201503' + day

        MapReduce.runReduce(Reduce,
                            srcTable = srcTable,
                            dstTable = dstTable,
                            files = ['/home/cansucullu/bin/blockstat.dict'],
                            appendMode = True,
                            sortMode = True
                            )

if __name__ == '__main__':
    main()
