# -*- coding: UTF-8 -*-

from mapreducelib import MapReduce, Record
from collections import defaultdict
from datetime import datetime
import libra
import urllib, re,random

TRANSLATION = None

def build_translation():
    global TRANSLATION
    if TRANSLATION is None:
        import sys
        import unicodedata
        TRANSLATION = {
            index: u' ' for index in xrange(sys.maxunicode)
            if unicodedata.category(unichr(index)).startswith('P')
        }
        for char in u'\t\n\x0b\x0c\r$+<=>^`|~':
            TRANSLATION[ord(char)] = u' '
    return TRANSLATION


def normalize_query(query):

    try:
        query = query.decode('utf8')
    except UnicodeDecodeError:
        return

    query = query.translate(build_translation())
    query = query.lower()
    query = query.strip()
    query = re.sub(r'\s\s+', ' ', query)

    return query.encode('utf8')


def Reduce(key, recs):
    uid = key
    if uid[0] != 'y':
        return

    try:
        uid_date = uid[-10:]
    except:
        return

    if int(uid_date) > 1444424400:
        cookie = 'NEW COOKIE'
    else:
        cookie = 'OLD COOKIE'

    uid_ts = str(datetime.fromtimestamp(float(uid_date)).isoformat()).split('T')[0]

    touch_c = '17415'
    touch_e = '17414'

    try:
        s = libra.ParseSession(recs, './blockstat.dict')
    except:
        return

    TI = -1

    for r in s:
        if not r.IsA('TTouchYandexWebRequest'):
            continue

        if r.ServiceDomRegion != 'ru':
            continue

        reg = r.ServiceDomRegion
        ts =  str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]
        BR = r.GetBrowser()[0]
        BV = r.GetBrowser()[1]

 #       if r.HasTestID(touch_c):
 #           TI = 0
 #           slot = 'control ' + touch_c
 #       elif r.HasTestID(touch_e):
 #           TI = 1
 #           slot = 'exp ' + touch_e
 #       else:
 #           continue

#        yield Record(uid,'',cookie + '\t' + slot + '\t' + ts + '\t' + str(uid_ts) + '\t' + BR + '\t' + BV, tableIndex = TI)

    for r in s:
        if not r.IsA('TTouchYandexWebRequest'):
            continue

        if r.ServiceDomRegion != 'ru':
            continue

        reg = r.ServiceDomRegion
        ts =  str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]

        sp = r.SearchPropsValues
        if 'GATEWAY.test-ids' in sp.keys():
            tt = sp['GATEWAY.test-ids']
            yield Record(uid,'',tt)
#            if touch_c in tt:
#                TI = 2
#                slot = 'sp control ' + touch_c
#            elif touch_e in tt:
#                TI = 3
#                slot = 'sp exp ' + touch_e
#            else:
#                continue
#        else:
#            continue

#        yield Record(uid,'',cookie + '\t' + slot + '\t' + ts + '\t' + str(uid_ts) + '\t' + BR + '\t' + BV, tableIndex = TI)



def transform(rec):
    yield Record(rec.value,'','')

def aggr(key,recs):
    yield Record(key,'','')


def main():

    MapReduce.useDefaults(
                            server   = 'sakura.search.yandex.net:8013',
                            username = 'userstats',
                            mrExec   = '/Berkanavt/bin/mapreduce-dev',
                            verbose  = True,
                            #testMode = True,
                         )

    dd = ['20','21','22','23','24','25','26','27']
    dd = ['26']
    for d in dd:
        src = 'user_sessions/201510' + d

        d = 'ensuetina/HTTPS_EXPERIMENTS/try_gateway'

        dt0 = 'ensuetina/HTTPS_EXPERIMENTS/TOUCH/new/tag_control'
        dt1 = 'ensuetina/HTTPS_EXPERIMENTS/TOUCH/new/tag_exp'
        dt2 = 'ensuetina/HTTPS_EXPERIMENTS/TOUCH/new/sp_control'
        dt3 = 'ensuetina/HTTPS_EXPERIMENTS/TOUCH/new/sp_exp'

        MapReduce.runReduce(Reduce,
                            srcTable = src,
                            dstTable = d,
                            #dstTables = [dt0,dt1,dt2,dt3],
                            files = ['/home/ensuetina/data/blockstat.dict'],
                            #appendMode = True,
                            sortMode = True
                            )


if __name__ == '__main__':
    main()
