# -*- coding: UTF-8 -*-

from mapreducelib import MapReduce, Record
from collections import defaultdict
from datetime import datetime
import libra
import urllib, re, random, sys, cgi

def Reduce(key, recs):
    uid = key
    if uid[0] != 'y':
        return

    try:
        s = libra.ParseSession(recs, './blockstat.dict')
    except:
        return

    c = '17982'
    e = '17983'

    isS = 0
    TI = -1

    for r in s:
        if not r.IsA('TTouchYandexWebRequest'):
            continue

        if r.ServiceDomRegion != 'ru':
            continue

        ts =  str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]

        if r.HasTestID(c):
            TI = 0
            slot = 'control ' + c
        elif r.HasTestID(e):
            TI = 1
            slot = 'exp ' + e
        else:
            continue

        if r.GetSuggest():
            isS += 1
        #else:
        #    isSugg = 0

    if TI != -1 and isS > 0:
        yield Record(uid,'LIBRA',str(isS), tableIndex = TI)

#        yield Record(uid,'',slot + '\t' + ts + '\t' + str(isSugg),tableIndex = TI)

class getSess:
    def __init__(self,c,e):
        self.c = c
        self.e = e

    def __call__(self,key,recs):
        uid = key
        if uid == '':
            return

        if uid in self.c:
            TI = 0
        elif uid in self.e:
            TI = 1
        else:
            return

        rrecs = []
        isS = 0
        for rec in recs:
            tmp = rec
            if 'type=SUGGEST' in tmp.value:
                isS += 1
            rrecs.append(tmp)

        if isS == 0:
            return

#        for rec in rrecs:
#            yield Record(rec.key,rec.subkey,rec.value, tableIndex = TI)

        yield Record(uid,'US',str(isS), tableIndex = TI)

def map_sess(rec):
    uid = rec.key
    sk = rec.subkey
    line = rec.value

    data = dict([d.split('=', 1) for d in line.split('\t') if '=' in d])

    rtype = data.get('type')
    if rtype == 'SUGGEST':
        rinput = data.get('input')
        rstatus = data.get('status')
        rquery = data.get('query')
        rsuggest_reqid = data.get('suggest_reqid')
        yield Record(uid,sk,rtype + '\t' + str(rinput) + '\t' + str(rstatus) + '\t' + str(rquery) + '\t' + str(rsuggest_reqid))
    elif rtype == 'REQUEST':
        rquery = data.get('query')
        rreqid = data.get('reqid')
        yield Record(uid,sk,rtype + '\t' + str(rquery) + '\t' + str(rreqid))
    elif rtype == 'ACCESS':
        rreqid = data.get('reqid')
        rrequest = data.get('request')
        yield Record(uid,sk,rtype + '\t' + str(rreqid) + '\t' + str(rrequest))

def map_keys(rec):
    uid = rec.key
    sk = rec.subkey
    line = rec.value

    l = line.split('\t')
    rtype = l[0]

    if rtype == 'SUGGEST':
        suggest_reqid = l[4]
        yield Record(suggest_reqid[:200],sk,uid + '\t' + line, tableIndex = 0)
        yield Record(suggest_reqid[:200],sk,uid + '\t' + line, tableIndex = 1)

        q = l[3]
        yield Record(q[:200],sk,uid + line, tableIndex = 2)

    elif rtype == 'REQUEST':
        reqid = l[2]
        q = l[1]
        yield Record(reqid[:200],sk,uid + '\t' + line, tableIndex = 0)
        yield Record(q[:200],sk,uid + line, tableIndex = 2)

    elif rtype == 'ACCESS':
        if 'callback' in line and not 'suggest_reqid' in line:
            yield Record(uid[:200],sk,line,tableIndex = 3)

        reqid = l[1]
        yield Record(reqid[:200],sk,uid + '\t' + line, tableIndex = 0)

        request = l[2]
        try:
            qs = request.split('?',1)[1]
        except:
            return

        data = cgi.parse_qs(qs)
        if not 'suggest_reqid' in data:
            return

        suggest_reqid = data['suggest_reqid'][0]
        yield Record(suggest_reqid[:200],sk,uid + '\t' + line, tableIndex = 1)

def aggr(key,recs):
    k = key
    if len(k) < 5:
        return

    rrecs = []
    i = 0
    for rec in recs:
        i += 1
        rrecs.append(rec)

    if i > 1:
#        yield Record(k,'','')
        for rec in rrecs:
            yield Record(rec.key,rec.subkey,rec.value)

def join(key,recs):
    uid = key
    isL = 0
    isUS = 0

    libra = 0
    us = 0

    i = 0

    for rec in recs:
        what = rec.subkey
        if what == 'LIBRA':
            isL = 1
            libra = rec.value
        elif what == 'US':
            isUS = 1
            us = rec.value
        i += 1

    if i > 2:
        yield Record(uid,'','',tableIndex = 2) # errors

    if isL + isUS != 2:
        yield Record(uid,'',str(isL) + '\t' + str(isUS), tableIndex = 3) # errors 2
        return

    if libra == us:
        yield Record(uid,'',str(libra) + '\t' + str(us), tableIndex = 0) # ok
    else:
        yield Record(uid,'',str(libra) + '\t' + str(us), tableIndex = 1) # not ok


def main():

    MapReduce.useDefaults(
                            server   = 'sakura.search.yandex.net:8013',
                            username = 'userstats',
                            mrExec   = '/Berkanavt/bin/mapreduce-dev',
                            verbose  = True,
                            #testMode = True,
                         )

    with open('c.txt') as f:
        c = f.read().replace('\r','').split('\n')
    with open('e.txt') as f:
        e = f.read().replace('\r','').split('\n')


    dd = ['1105']
    for d in dd:
        src = 'user_sessions/2015' + d

        dt0 = 'ensuetina/TOUCH_SUGGEST_EXP/compare_suggests/c'
        dt1 = 'ensuetina/TOUCH_SUGGEST_EXP/compare_suggests/e'

        MapReduce.runReduce(getSess(c,e),
                            srcTable = src,
                            dstTables = [dt0 + '_raw_us',dt1 + '_raw_us'],
#                            files = ['/home/ensuetina/data/blockstat.dict'],
                            #appendMode = True,
                            sortMode = True
                            )
        MapReduce.runReduce(Reduce,
                            srcTable = src,
                            dstTables = [dt0 + '_libra', dt1 + '_libra'],
                            files = ['/home/ensuetina/data/blockstat.dict'],
                            sortMode = True
                           )

        d0 = 'ensuetina/TOUCH_SUGGEST_EXP/compare_suggests/joins/ok'
        d1 = 'ensuetina/TOUCH_SUGGEST_EXP/compare_suggests/joins/not_ok'
        d2 = 'ensuetina/TOUCH_SUGGEST_EXP/compare_suggests/joins/errors_more_2_lines'
        d3 = 'ensuetina/TOUCH_SUGGEST_EXP/compare_suggests/joins/errors_some_missing'


        MapReduce.runReduce(join,
                            srcTables = [dt0 + '_raw_us',dt0 + '_libra'],
                            dstTables = [d0 + '_c',d1 + '_c',d2 + '_c',d3 + '_c'],
                            sortMode = True
                           )
        MapReduce.runReduce(join,
                            srcTables = [dt1 + '_raw_us',dt1 + '_libra'],
                            dstTables = [d0 + '_e',d1 + '_e',d2 + '_e',d3 + '_e'],
                            sortMode = True
                           )

#    MapReduce.runMap(map_sess,
#                     srcTable = dt0,
#                     dstTable = dt0 + '_mapped',
#                     sortMode = True
#                    )
#    MapReduce.runMap(map_sess,
#                     srcTable = dt1,
#                     dstTable = dt1 + '_mapped',
#                     sortMode = True
#                    )

    sys.exit(1)

    d0 = 'ensuetina/TOUCH_SUGGEST_EXP/validate/reqids_'
    d1 = 'ensuetina/TOUCH_SUGGEST_EXP/validate/suggest_reqids_'
    d2 = 'ensuetina/TOUCH_SUGGEST_EXP/validate/queries_'
    d3 = 'ensuetina/TOUCH_SUGGEST_EXP/validate/callbacks_'

#    MapReduce.runMap(map_keys,
#                     srcTable = dt0 + '_mapped',
#                     dstTables = [d0 + 'c', d1 + 'c', d2 + 'c', d3 + 'c'],
#                     sortMode = True
#                    )
#    MapReduce.runMap(map_keys,
#                     srcTable = dt1 + '_mapped',
#                     dstTables = [d0 + 'e', d1 + 'e', d2 + 'e', d3 + 'e'],
#                     sortMode = True
#                    )

    d_0 = 'ensuetina/TOUCH_SUGGEST_EXP/validate/aggr/1/reqids_'
    d_1 = 'ensuetina/TOUCH_SUGGEST_EXP/validate/aggr/1/suggest_reqids_'
    d_2 = 'ensuetina/TOUCH_SUGGEST_EXP/validate/aggr/1/queries_'
    d_3 = 'ensuetina/TOUCH_SUGGEST_EXP/validate/aggr/1/callbacks_'

    MapReduce.runReduce(aggr,
                        srcTable = d0 + 'c',
                        dstTable = d_0 + 'c',
                        sortMode = True
                       )
    MapReduce.runReduce(aggr,
                        srcTable = d1 + 'c',
                        dstTable = d_1 + 'c',
                        sortMode = True
                       )
    MapReduce.runReduce(aggr,
                        srcTable = d2 + 'c',
                        dstTable = d_2 + 'c',
                        sortMode = True
                       )
    MapReduce.runReduce(aggr,
                        srcTable = d3 + 'c',
                        dstTable = d_3 + 'c',
                        sortMode = True
                       )
    MapReduce.runReduce(aggr,
                        srcTable = d0 + 'e',
                        dstTable = d_0 + 'e',
                        sortMode = True
                       )
    MapReduce.runReduce(aggr,
                        srcTable = d1 + 'e',
                        dstTable = d_1 + 'e',
                        sortMode = True
                       )
    MapReduce.runReduce(aggr,
                        srcTable = d2 + 'e',
                        dstTable = d_2 + 'e',
                        sortMode = True
                       )
    MapReduce.runReduce(aggr,
                        srcTable = d3 + 'e',
                        dstTable = d_3 + 'e',
                        sortMode = True
                       )


if __name__ == '__main__':
    main()
