# -*- coding: UTF-8 -*-

from mapreducelib import MapReduce, Record
from collections import defaultdict
from datetime import datetime
import libra
import urllib, re,random

TRANSLATION = None

def build_translation():
    global TRANSLATION
    if TRANSLATION is None:
        import sys
        import unicodedata
        TRANSLATION = {
            index: u' ' for index in xrange(sys.maxunicode)
            if unicodedata.category(unichr(index)).startswith('P')
        }
        for char in u'\t\n\x0b\x0c\r$+<=>^`|~':
            TRANSLATION[ord(char)] = u' '
    return TRANSLATION


def normalize_query(query):

    try:
        query = query.decode('utf8')
    except UnicodeDecodeError:
        return

    query = query.translate(build_translation())
    query = query.lower()
    query = query.strip()
    query = re.sub(r'\s\s+', ' ', query)

    return query.encode('utf8')


def Reduce(key, recs):
    uid = key
    if uid[0] != 'y':
        return

    try:
        s = libra.ParseSession(recs, './blockstat.dict')
    except:
        return

    c = '17369'
    e = '17370'

    for r in s:
        if not r.IsA('TYandexWebRequest'):
            continue

        if r.ServiceDomRegion != 'tr':
            continue

        ts =  str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]
        q = normalize_query(r.Query)
        reqid = r.ReqID

        rel = r.RelevValues
        mut = str(rel.get('mut'))
        muvt = str(rel.get('muvt'))

        #if mut != '5' and muvt != '2':
        #    continue

        if r.HasTestID(c):
            TI = 0
            slot = 'control ' + c

            isV = 0
            pos = -1
            for bl in r.GetBSBlocks():
                p = bl.Path
                vv = bl.GetVars()
                if 'snippet/video/' in p:
                    for v in vv:
                        if v[0] == 'pos':
                            pos = v[1].replace('p','')
                    isV = 1
                    wiz = p
                    if pos > -1:
                        break

            if isV == 0:
                continue

            isWin = 0
            isLoss = 0
            all_cl = 0
            dyn_cl = 0

            for cl in r.GetClicks():
                p = cl.ConvertedPath
                dw = cl.DwellTimeOnService
                vv = cl.GetVars()
                if 'snippet/video/' in p:
                    all_cl += 1
                    if 'dynamic_click' in str(vv):
                        dyn_cl += 1
                    #else:
                    if int(dw) > 15:
                        isWin = 1

            if int(pos) > -1:
                i = 0
                for bl in r.GetMainBlocks():
                    m = bl.GetMainResult()
                    if m.IsA('TDirectResult'):
                        continue

                    if i == int(pos) + 1:
                        for cl in bl.GetClicks():
                            vv = cl.GetVars()
                            #if 'dynamic_click' in str(vv):
                            #    continue
                            dw = cl.DwellTimeOnService
                            if int(dw) > 15:
                                isLoss = 1
                        break

                    else:
                        i += 1
                        continue
            else:
                continue

        elif r.HasTestID(e):
            TI = 1
            slot = 'exp ' + e

            isV = 0
            pos = -1
            for bl in r.GetBSBlocks():
                p = bl.Path
                vv = bl.GetVars()
                if 'snippet/video/clip' in p:
                    wiz = p
                    isV = 1
                    for v in vv:
                        if v[0] == 'pos':
                            pos = v[1].replace('p','')

                    if pos > -1:
                        break

            if isV == 0:
                continue

            isWin = 0
            isLoss = 0
            all_cl = 0
            dyn_cl = 0

            for cl in r.GetClicks():
                p = cl.ConvertedPath
                dw = cl.DwellTimeOnService
                vv = cl.GetVars()
                if 'snippet/video/clip' in p or 'snippet/video/player/click' in p:
                    all_cl += 1
                    if 'dynamic_click' in str(vv):
                        dyn_cl += 1
                    #else:
                    if int(dw) > 15:
                        isWin = 1

            if int(pos) > -1:
                i = 0
                for bl in r.GetMainBlocks():
                    m = bl.GetMainResult()
                    if m.IsA('TDirectResult'):
                        continue

                    if i == int(pos) + 1:
                        for cl in bl.GetClicks():
                            dw = cl.DwellTimeOnService
                            vv = cl.GetVars()
                            if 'dynamic_click' in str(vv):
                                continue
                            if int(dw) > 15:
                                isLoss = 1
                        break
                    else:
                        i += 1
                        continue
            else:
                continue

        else:
            continue

        heights = '-'
        wiz_h = ''
        loss_h = ''
        for tech in r.GetYandexTechEvents():
            if not tech.IsA('TYandexTechEvent'):
                continue
            p = tech.Path
            if '71.143.1007' in p or 'serp.page.loaded' in p:
                vv = tech.Vars
                dd = dict([d.split('=', 1) for d in vv.split(',') if '=' in d])

                if '-res-heights' in dd:
                    hh = dd['-res-heights']
                    heights = dict([d.split(':', 1) for d in hh.split(';') if ':' in d])
                    if str(pos) in heights.keys():
                        wiz_h = heights[str(pos)]
                        loss_h = str(heights.get(str(int(pos)+1)))
                    else:
                        wiz_h = '-'
                        loss_h = '-'
                    break

        yield Record(uid,'',ts + '\t' + q + '\t' + reqid + '\t' + slot + '\t' + wiz + '\t' + str(isWin) + '\t' + str(isLoss) + '\t' + str(all_cl) + '\t' + str(dyn_cl) + '\t' + str(pos) + '\t' + str(wiz_h) + '\t' + loss_h + '\t' + str(heights) + '\t' + mut + '\t' + muvt,tableIndex = TI)



def transform(rec):
    yield Record(rec.value,'','')

def aggr(key,recs):
    yield Record(key,'','')


def main():

    MapReduce.useDefaults(
                            server   = 'sakura.search.yandex.net:8013',
                            username = 'userstats',
                            mrExec   = '/Berkanavt/bin/mapreduce-dev',
                            verbose  = True,
                            #testMode = True,
                         )

    dd = ['0926','0927','0928','0929','0930','1001','1002']
    for d in dd:
        src = 'user_sessions/2015' + d

        dt0 = 'ensuetina/PlayerSurplus/NEWCALC/control'
        dt1 = 'ensuetina/PlayerSurplus/NEWCALC/exp'

        MapReduce.runReduce(Reduce,
                            srcTable = src,
                            dstTables = [dt0,dt1],
                            files = ['/home/ensuetina/data/blockstat.dict'],
                            appendMode = True,
                            sortMode = True
                            )


if __name__ == '__main__':
    main()
