# -*- coding: UTF-8 -*-

from mapreducelib import MapReduce, Record
import libra

fltRnk = libra.TRequestFilter()
fltRnk.Add("search_props_filter", "WEB.RankingModel==tr||WEB.RankingModel==tr.regional")
fltRnk.Init()

def MyMap(key, recs):
    try:
        session = libra.ParseSession(recs, './blockstat.dict') # it takes whole raw session and blockstat.dict and returns a list on TRequest objects
    except:
        return

    for request in session:
        if not request.IsA("TYandexWebRequest") or not fltRnk.Filter(request) or request.PageNo != 0 or request.ServiceDomRegion != 'tr':
            continue

        query = request.Query

        for block in request.GetMainBlocks():
            main_result = block.GetMainResult()
            if main_result.IsA('TWebResult'):
                url = str(main_result.Url)
                pos = str(main_result.Position)

                #if pos != '1': # Get only pos = 0
                #    continue
                if not pos in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']: # Get only pos = 0
                    continue
                #
                click1 = int(len(main_result.GetClicks()) > 0)
                click2 = int(len(block.GetClicks()) > 0)
                show = 1

                key_ = query + '\t' + pos + '\t' + url
                value_1 = str(click1) + '\t' + str(show)
                value_2 = str(click2) + '\t' + str(show)

                if len(key_) < 4096:
                    yield Record(key_, '', value_1, tableIndex=0)
                    yield Record(key_, '', value_2, tableIndex=1)


def MyReduce(key, recs):
    total_shows = 0
    total_clicks = 0

    for rec in recs:
        items = rec.value.split('\t')
        click = items[0]
        show = items[1]

        total_clicks += int(click)
        total_shows += int(show)

    value_ = str(total_clicks) + '\t' + str(total_shows)

    yield Record(key, '', value_)


def map(key, records):
    for rec in records:
        yield Record(rec.key,'',rec.value)

def combine(key, records):

    condition1 = False
    condition2 = False

    for rec in records:
        items = rec.value.split('\t')

        if len(items) == 3: # a problem in clicks= tab correct it afterwards
            clicks = items[0]
            shows = items[1]
            ctr = items[2]
            condition1 = True

        if len(items) == 2:
            r_mark = items[0]
            t_mark = items[1]
            condition2 = True
    
    if condition1 and condition2:
        key_ = rec.key
        value = clicks + '\t' + shows + '\t' + ctr + '\t' + r_mark + '\t' + t_mark
        yield Record(key_, '', value)


def reverse(key, records):
    for rec in records:
        query, url = rec.key.split('\t')
        clicks, shows, ctr, r_mark, t_mark = rec.value.split('\t')
        key_ = r_mark + '\t' + t_mark
        value = clicks.split('=')[1] + '\t' + shows.split('=')[1]
        yield Record(key_, '', value)

def finalize(key, records):
    big_total_clicks = 0
    big_total_shows = 0
    for rec in records:
        click, show = rec.value.split('\t')
        big_total_clicks += int(click)
        big_total_shows += int(show)

    big_CTR = float(big_total_clicks) / float(big_total_shows)
    value = str(big_total_clicks) + '\t' + str(big_total_shows) + '\t' + str(big_CTR)
    yield Record(rec.key, '', value)


def main():

    MapReduce.useDefaults(server='cedar00.search.yandex.net:8013',
                          username='ranking',
                          mrExec='/Berkanavt/bin/mapreduce-dev',
                          verbose=True)

    srcTable = 'tr/user_sessions/201504'
    dstTable1a = 'cansucullu/ANSEARCH-397/QUERY/kiraz-mevsimi-facebook-map-1'
    dstTable1b= 'cansucullu/ANSEARCH-397/QUERY/kiraz-mevsimi-facebook-reduce-1'

    dstTable2a = 'cansucullu/ANSEARCH-397/QUERY/kiraz-mevsimi-facebook-map-2'
    dstTable2b= 'cansucullu/ANSEARCH-397/QUERY/kiraz-mevsimi-facebook-reduce-2'

    days = range(14, 21)

    for day in days:
        srcTableFinal = srcTable + str(day)
        MapReduce.runReduce(MyMap,
                            srcTable=srcTableFinal,
                            dstTables=[dstTable1a,dstTable2a],
                            files=['/home/cansucullu/bin/blockstat.dict'],
                            appendMode=True,
                            sortMode=True)

    MapReduce.runReduce(MyReduce,
                        srcTable=dstTable1a,
                        dstTable=dstTable1b,
                        appendMode=True,
                        sortMode=True)

    MapReduce.runReduce(MyReduce,
                        srcTable=dstTable2a,
                        dstTable=dstTable2b,
                        appendMode=True,
                        sortMode=True)


if __name__ == '__main__':
    #position = sys.argv[1]
    main()
