# -*- coding: UTF-8 -*-

from mapreducelib import MapReduce, Record
from collections import defaultdict
from datetime import datetime
import libra
import urllib, re,random

TRANSLATION = None

def build_translation():
    global TRANSLATION
    if TRANSLATION is None:
        import sys
        import unicodedata
        TRANSLATION = {
            index: u' ' for index in xrange(sys.maxunicode)
            if unicodedata.category(unichr(index)).startswith('P')
        }
        for char in u'\t\n\x0b\x0c\r$+<=>^`|~':
            TRANSLATION[ord(char)] = u' '
    return TRANSLATION


def normalize_query(query):

    try:
        query = query.decode('utf8')
    except UnicodeDecodeError:
        return

    query = query.translate(build_translation())
    query = query.lower()
    query = query.strip()
    query = re.sub(r'\s\s+', ' ', query)

    return query.encode('utf8')


def Reduce(key, recs):
    uid = key
    if uid[0] != 'y':
        return

    words1 = ['akhisar belediyespor', 'antalyaspor', 'konyaspor', 'gençlerbirliği']
    words2 = ['beşiktaş', 'kasımpaşa']

    try:
        s = libra.ParseSession(recs, './blockstat.dict')
    except:
        return

    for r in s:
        if not r.IsA('TYandexWebRequest'):
            continue

        if r.ServiceDomRegion != 'tr':
            continue

        dd = str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]
        tt = str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[1]

        h = tt.split(':')[0]

#        if int(h) < 21:
#            continue

        q = normalize_query(r.Query)
        reqid = r.ReqID

        isFootball1 = 0
        w1 = ''

        for word in words1:
            if word in q:
                isFootball1 = 1
                w1 = word
                break

        isFootball2 = 0
        w2 = ''

        for word in words2:
            if word in q:
                isFootball2 = 1
                w2 = word
                break

        if isFootball1 + isFootball2 == 0:
            continue

        isStripe = 0
        for bl in r.GetBSBlocks():
            p = bl.Path
            if 'stripe/promobar/' in p:
                isStripe = 1

        yield Record(uid,'',str(dd) + '\t' + str(tt) + '\t' + q + '\t' + str(isStripe) + '\t' + str(isFootball1) + '\t' + str(isFootball2) + '\t' + reqid + '\t' + w1 + '\t' + w2, tableIndex = 0) # all reqs

        for cl in r.GetClicks():
            p = cl.ConvertedPath
            if 'stripe/promobar/' in p:
                yield Record(uid,'',str(dd) + '\t' + str(tt) + '\t' + q + '\t' + str(isStripe) + '\t' + str(isFootball1) + '\t' + str(isFootball2) + '\t' + reqid + '\t' + p + '\t' + w1 + '\t' + w2, tableIndex = 1) # promobar clicks



def main():

    MapReduce.useDefaults(
                            server   = 'sakura.search.yandex.net:8013',
                            username = 'userstats',
                            mrExec   = '/Berkanavt/bin/mapreduce-dev',
                            verbose  = True,
                            #testMode = True,
                         )

    dd = ['28']
    for d in dd:
        src = 'user_sessions/201510' + d

        dt0 = 'ensuetina/TR_FOOTBALL/result_'

        continue

        MapReduce.runReduce(Reduce,
                            srcTable = src,
                            dstTable = dt0 + 'us',
                            files = ['/home/ensuetina/data/blockstat.dict'],
                            sortMode = True
                           )
    s = 1446228000
    f = 1446285600

    tables = []

    curr = s

    while curr < f:
        t = 'fast_logs/user_sessions/' + str(curr)
        tables.append(t)

        curr += 1800

    print len(tables)
    for t in tables:
        print t

    MapReduce.runReduce(Reduce,
                        srcTables = tables,
 #                       srcTable = 'fast_logs/user_sessions/1446066000',
                        dstTables = [dt0 + '1030',dt0 + '1030_clicks'],
                        files = ['/home/ensuetina/data/blockstat.dict'],
                        sortMode = True
                        )


if __name__ == '__main__':
    main()
