# -*- coding: UTF-8 -*-

from mapreducelib import MapReduce, Record
import sys
import os
import libra

testIdExp = "12365"

fltRnk = libra.TRequestFilter()
fltRnk.Add("search_props_filter", "WEB.RankingModel==tr||WEB.RankingModel==tr.regional")
fltRnk.Init()

def collect(key, recs):
    try:
        session = libra.ParseSession(recs, './blockstat.dict') # it takes whole raw session and blockstat.dict and returns a list on TRequest objects
    except:
        return

    for request in session:
        # we check if this request is to yandex desktop search (not video search etc)
        #you can also check .IsA('TMobileYandexWebRequest') - for mobiles  and .IsA('TTouchYandexWebRequest') for smartphones and tablets or you can check them all together to filter all reqeusts to search
        if not request.IsA("TYandexWebRequest") or not fltRnk.Filter(request) or request.PageNo != 0 or request.ServiceDomRegion != 'tr':
            continue
        if request.IsA("TMiscRequestProperties"):
            ranking_model = request.SearchPropsValues.get("WEB.RankingModel", "")

        if request.HasTestID(testIdExp):
            query = request.Query
            for block in request.GetMainBlocks():
                main_result = block.GetMainResult() # we now can see if this TResult is organic, direct or wizard result
                if main_result.IsA('TWebResult'): # if it is an organic result - we will check if it is faceebook result or not
                    if 'TdiSrc' in main_result.Markers.keys():
                        url = str(main_result.Url)
                        pos = str(main_result.Position)

                        if pos != '0': # Get only pos = 0 for now
                            continue

                        show = 1
                        click = int(len(main_result.GetClicks()) > 0)
                        key_ = query + '\t' + url
                        value_ = str(click) + '\t' + str(show) + '\t' + ranking_model + '\t' + main_result.Markers['TdiSrc']

                        if len(key_) < 4096:
                            yield Record(key_, '', value_)

def count_exp(key, recs):
    total_shows = 0
    total_clicks = 0
    condition = False

    for rec in recs:
        items = rec.value.split('\t')

        click = items[0]
        show = items[1]
        ranking_model = items[2]
        marker = items[3]

        total_clicks += int(click)
        total_shows += int(show)

        #if marker == "WEB_EXPERIMENTAL": # change this for experiment or not
        if marker == "WEB": # change this for experiment or not
            condition = True

    if condition == True:
        value = str(total_clicks) + '\t' + str(total_shows)
        yield Record(key, '', value)


def combine_relevance(key, records):

    condition1 = False
    condition2 = False

    for rec in records:
        items = rec.value.split('\t')

        if len(items) == 2: # sessions table
            clicks = items[0]
            shows = items[1]
            condition1 = True

        if len(items) == 1: # relevance table
            r_mark = items[0]
            condition2 = True

    if condition1 and condition2:
        key_ = rec.key
        value = clicks + '\t' + shows + '\t' + r_mark
        yield Record(key_, '', value)


def url_first(key, records):
    for rec in records:
        query, url = rec.key.split('\t')
        clicks, shows, r_mark = rec.value.split('\t')
        key_ = url
        value = query + '\t' + clicks + '\t' + shows + '\t' + r_mark
        yield Record(key_, '', value)


def combine_trustworthiness(key, records):

    condition1 = False
    condition2 = False

    for rec in records:
        items = rec.value.split('\t')

        if len(items) == 4: # query table
            query = items[0]
            clicks = items[1]
            shows = items[2]
            r_mark = items[3]
            condition1 = True

        if len(items) == 2: # tw table
            host = items[0]
            tw_mark = items[1]
            condition2 = True

    if condition1 and condition2:
        key_ = rec.key
        value = query + '\t' + clicks + '\t' + shows+ '\t' + r_mark + '\t' + tw_mark + '\t' + host
        yield Record(key_, '', value)




def prepare(key, records):
    condition = False

    for rec in records:
        url = rec.key
        items = rec.value.split('\t')

        if len(items) == 6:
            query = items[0]
            clicks = items[1]
            shows = items[2]
            r_mark = items[3]
            tw_mark = items[4]
            host = items[5]

            if r_mark == 'RELEVANT_PLUS':
                condition = True

        if condition == True:
            key_ = host + '\t' + tw_mark
            value = clicks + '\t' + shows
            yield Record(key_, '', value)

def finalize(key, records):
    big_total_clicks = 0
    big_total_shows = 0
    for rec in records:
        click, show = rec.value.split('\t')
        big_total_clicks += int(click)
        big_total_shows += int(show)

    big_CTR = float(big_total_clicks) / float(big_total_shows)
    value = str(big_total_clicks) + '\t' + str(big_total_shows) + '\t' + str(big_CTR)
    yield Record(rec.key, '', value)


def prepare_all(key, records):
    condition = False

    for rec in records:
        url = rec.key
        items = rec.value.split('\t')

        if len(items) == 6:
            query = items[0]
            clicks = items[1]
            shows = items[2]
            r_mark = items[3]
            tw_mark = items[4]
            host = items[5]

            condition = True

        if condition == True:
            key_ = host + '\t' + r_mark + '\t' + tw_mark
            value = clicks + '\t' + shows
            yield Record(key_, '', value)



def main(position):

    MapReduce.useDefaults(server='cedar00.search.yandex.net:8013', username='ranking',
                          mrExec='/Berkanavt/bin/mapreduce-dev', verbose=True)
    
    srcTable = 'user_sessions/201503'
    table1 = 'cansucullu/ANSEARCH-397/hosts/p{0}-map'.format(position)
    table2 = 'cansucullu/ANSEARCH-397/hosts/ctrl-p{0}-reduce'.format(position)
    table3 = 'cansucullu/ANSEARCH-397/new-assessors-relevance'
    table4 = 'cansucullu/ANSEARCH-397/hosts/ctrl-p{0}-relevance-combined'.format(position)
    table5 = 'cansucullu/ANSEARCH-397/hosts/ctrl-p{0}-relevance-combined-url-first'.format(position)
    table6 = 'cansucullu/ANSEARCH-397/new-assessors-trustworthiness-ready'
    table7 = 'cansucullu/ANSEARCH-397/hosts/ctrl-p{0}-relevance-trustworthiness-combined-url-first'.format(position)
    table8 = 'cansucullu/ANSEARCH-397/hosts/ctrl-p{0}-final-prepare'.format(position)
    table9 = 'cansucullu/ANSEARCH-397/hosts/ctrl-p{0}-final-results'.format(position)

    """
    days = range(17,25)

    for day in days:
        srcTableFinal = srcTable + str(day)
        MapReduce.runReduce(collect, srcTable=srcTableFinal, dstTable=table1,
                            files = ['/home/cansucullu/bin/blockstat.dict'], # we pass blockstat.dict
                            appendMode=True, sortMode=True)
    """

    MapReduce.runReduce(count_exp, srcTable=table1, dstTable=table2, appendMode=True, sortMode=True)
    MapReduce.runReduce(combine_relevance, srcTables=[table2, table3], dstTable=table4, appendMode=True, sortMode=True)

    MapReduce.runReduce(url_first, srcTable=table4, dstTable=table5, appendMode=True, sortMode=True)

    MapReduce.runReduce(combine_trustworthiness, srcTables=[table5, table6], dstTable=table7, sortMode=True)

    #MapReduce.runReduce(prepare, srcTable=table7, dstTable=table8, sortMode=True)
    #MapReduce.runReduce(finalize, srcTable=table8, dstTable=table9, sortMode=True)

    table10 = 'cansucullu/ANSEARCH-397/hosts/ctrl-p{0}-final-all-prepare'.format(position)
    table11 = 'cansucullu/ANSEARCH-397/hosts/ctrl-p{0}-final-all-results'.format(position)
    MapReduce.runReduce(prepare_all, srcTable=table7, dstTable=table10, sortMode=True)
    MapReduce.runReduce(finalize, srcTable=table10, dstTable=table11, sortMode=True)


if __name__ == '__main__':
    position = sys.argv[1]
    main(str(position))
