#!/usr/bin/env python2

from mapreducelib import Record, MapReduce as MR
import libra
from mymrutils import *
import random
import re
from hashlib import md5

evhosts = set([
        'afisha.tut.by',
        'concert.ru',
        'ponominalu.ru',
        'actionlist.ru',
        'bravo.israelinfo.co.il',
        'rockgig.net',
        'viagogo.ru',
        'bilesuserviss.lv',
        'mariinsky.ru',
        'ticketpro.by',
        'kassy.by',
        'order.muzbilet.ru',
        'anshlag.co.il',
        'milanticketsopera.com',
        ])

def main():
    MR.useDefaults(username='snippets', server='cedar00.search.yandex.net:8013', files=['blockstat.dict'], verbose=True)
    DST = 'likhomanov/event_3215'
    with mktmp() as tmp:
        for src in ['user_sessions/{}'.format(d) for d in strdaterange((2015, 7, 13), (2015, 7, 20))]:
            MR.runReduce(getData, srcTable=src, dstTable=tmp, appendMode=True)
            MR.runReduce(Summarizer(), srcTable=tmp, dstTable=tmp)
        MR.runCombine(cleankey, srcTable=tmp, dstTable=tmp)
        MR.runReduce(Summarizer(), srcTable=tmp, dstTable=DST)

def getData(key, recs):
    try:
        for req in libra.ParseSession(recs, 'blockstat.dict'):
            if req.IsA('TYandexWebRequest'):
                platf = 'desk'
            elif req.IsA('TTouchYandexWebRequest'):
                platf = 'touch'
            elif req.IsA('TMobileYandexWebRequest'):
                platf = 'mobile'
            else:
                continue
            if req.ServiceDomRegion != 'ru':
                continue
            pfx = str(random.randrange(256))
            ndoc = 0
            seen = set()
            for res in (x.GetMainResult() for x in req.GetMainBlocks()):
                if not res.IsA('TWebResult'):
                    continue
                ndoc += 1
                host = getHost(res.Url)
                if host not in evhosts:
                    continue
                face = 'main' if getInnerPath(res.Url) in (None, '', '/') else 'inner'
                seen.add(host + '\t' + face)
                yield Record('\t'.join([pfx, platf, 'doc', host, face]), '', '1')
            if ndoc:
                yield Record('\t'.join([pfx, platf, 'doc', 'ALL']), '', str(ndoc))
                yield Record('\t'.join([pfx, platf, 'serp', 'ALL']), '', '1')
                for s in seen:
                    yield Record('\t'.join([pfx, platf, 'serp', s]), '', '1')
    except (NameError, AttributeError, TypeError, ValueError):
            raise
    except Exception:
            pass

def cleankey(recs):
    for rec in recs:
        _, k = rec.key.split('\t', 1)
        yield Record(k, '', rec.value)

if __name__ == '__main__':
    main()

