# -*- coding: UTF-8 -*-

from mapreducelib import MapReduce, Record
from collections import defaultdict
import sys
from datetime import datetime
import libra
import urllib
import random

def SimpleNormalize(q):
    forb = ['!','@','#','$','%','^','&','*','(',')','_','+','[',']','{','}','~',',','.','\\','/','?','<','>',':',';','\'','\"','-','=']

    for f in forb:
        q = q.replace(f,' ')

    while '  ' in q:
        q = q.replace('  ',' ')

    q = q.strip().lower()

    return q

def Reduce(key, recs):
    uid = key
    if uid[0] != 'y':
        return

    try:
        s = libra.ParseSession(recs, './blockstat.dict')
    except RuntimeError as e:
        if 'fat user' in str(e):
            return
        if 'ParseSession can' in str(e):
            return
        else:
            raise e

    carusel = 'parallel/object-answer/upper/p'
    card1 = 'parallel/object-answer/badge/link'
    card2 = 'parallel/object-answer/badge/wikifact/p'
    card3 = 'parallel/object-answer/sources/p'
    video = 'wiz/pseudo/eurovision_wizard/p7/click'
    thumb = 'big-video/thumb'

    isSh1 = 0
    isCl = 0

    for r in s:
        if not r.IsA("TYandexWebRequest"):
            continue

        if r.ServiceDomRegion != 'ru':
            continue

        q = urllib.unquote(r.Query).lower()
        q = SimpleNormalize(q)

        ts =  str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]

        isSh = 0

        p1 = 'wiz/pseudo/eurovision_wizard'
        p2 = 'big-video/thumb'

        for bl in r.GetBSBlocks():
            p = bl.Path
            if p1 in p or p2 in p:
                isSh = 1
                isSh1 += 1

        if isSh == 0:
            continue

        for cl in r.GetClicks():
            p = cl.ConvertedPath
            dw = cl.DwellTime
            if carusel in p or card1 in p or card2 in p or card3 in p or video in p or thumb in p:
                isCl = 1

    if isSh1 > 0:
        yield Record(uid,'',ts + '\t' + str(isSh1) + '\t' + str(isCl))


def count_days(key,recs):
    days = []
    for rec in recs:
        line = rec.value
        day = line.split('\t')[0]
        if day[0] != '2':
            continue

        if not day in days:
            days.append(day)

    yield Record(key,'',str(len(days)) + '\t' + str(days))


def main():

    MapReduce.useDefaults(
                            server   = 'cedar00.search.yandex.net:8013',
                            username = 'userstats',
                            mrExec   = '/Berkanavt/bin/mapreduce-dev',
                            verbose  = True,
                            #testMode = True,
                         )

    dd = ['0519','0520','0521','0522','0523','0525','0526','0527','0528','0529','0530','0531']
    dd = ['0522','0524','0526']
    for d in dd:
        src = 'user_sessions/2015' + d
        dt0 = 'ensuetina/EUROVISION/uid_aband1'

        MapReduce.runReduce(Reduce,
                            srcTable = src,
                            dstTable = dt0,
                            files = ['/home/ensuetina/data/blockstat.dict'],
                            appendMode = True,
                            sortMode = True
                            )

if __name__ == '__main__':
    main()
