# -*- coding: UTF-8 -*-

from mapreducelib import MapReduce, Record
from collections import defaultdict
from datetime import datetime, timedelta, date
import libra
import urllib, re,random

TRANSLATION = None

def build_translation():
    global TRANSLATION
    if TRANSLATION is None:
        import sys
        import unicodedata
        TRANSLATION = {
            index: u' ' for index in xrange(sys.maxunicode)
            if unicodedata.category(unichr(index)).startswith('P')
        }
        for char in u'\t\n\x0b\x0c\r$+<=>^`|~':
            TRANSLATION[ord(char)] = u' '
    return TRANSLATION


def normalize_query(query):

    try:
        query = query.decode('utf8')
    except UnicodeDecodeError:
        return

    query = query.translate(build_translation())
    query = query.lower()
    query = query.strip()
    query = re.sub(r'\s\s+', ' ', query)

    return query.encode('utf8')


def Reduce(key, recs):
    uid = key
    if uid[0] != 'y':
        return

    try:
        s = libra.ParseSession(recs, './blockstat.dict')
    except:
        return

    for r in s:
        if r.IsA('TYandexWebRequest'):
            ui = 'Desktop'
        else:
            continue

        if r.ServiceDomRegion != 'ru':
            continue

        ts =  str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]

        q = normalize_query(r.Query)

        if len(q) == 0:
            continue

        isY = 0
        hasY = 0
        isParall = 0

        if q == 'яндекс браузер':
            isY = 1
        elif 'яндекс браузер' in q:
            hasY = 1

        for bl in r.GetBSBlocks():
            p = bl.Path
            if 'parallel/object-answer/badge' in p:
                isParall = 1
                break

        if isY == 0 and (hasY + isParall != 2):
            continue

        isCl = 0
        firstOrganic = 0

        isF = 0

        for bl in r.GetMainBlocks():
            m = bl.GetMainResult()
            if m.IsA('TDirectResult'):
                continue
            if m.IsA('TWebResult') and isF == 0:
                isF = 1
                for cl in bl.GetClicks():
                    firstOrganic += 1
            for cl in bl.GetClicks():
                isCl = 1

        pseudo_fast = 0
        promobar = 0
        bs_stats = 0

        for cl in r.GetClicks():
            p = cl.ConvertedPath
            if 'wiz/pseudo_fast/browser__one_' in p:
                pseudo_fast += 1
            elif 'stripe/promobar/download' in p:
                promobar += 1
            elif 'bs_stats/banners/mkb' in p:
                bs_stats += 1

        yield Record(uid,'',ts + '\t' + q + '\t' + str(isCl) + '\t' + str(firstOrganic) + '\t' + str(pseudo_fast) + '\t' + str(promobar) + '\t' + str(bs_stats))



def main():

    MapReduce.useDefaults(
                            server   = 'sakura.search.yandex.net:8013',
                            username = 'userstats',
                            mrExec   = '/Berkanavt/bin/mapreduce-dev',
                            verbose  = True,
                            #testMode = True,
                         )

#    cur_d1 = datetime.strptime('20160105', '%Y%m%d')
#    cur_d1 = cur_d1.date()

#    cur_d = str(cur_d1).replace('-','')
#    while int(cur_d)<int(20160118):

    dd = ['09','10','12','14','15']
    for cur_d in dd:

        print cur_d
        src = 'user_sessions/201601' + cur_d

        dt = 'ensuetina/ASSOC_CLOSURE/data'

        MapReduce.runReduce(Reduce,
                            srcTable = src,
                            dstTable = dt,
                            files = ['/home/ensuetina/data/blockstat.dict'],
                            appendMode = True,
                            sortMode = True
                            )

#        cur_d1 = cur_d1 + timedelta(days=1)

#        cur_d = str(cur_d1).replace('-','')


if __name__ == '__main__':
    main()
