#!/usr/bin/env python2

from mapreducelib import Record, MapReduce as MR
import libra
from mymrutils import *
import random
import re
import itertools
from collections import Counter

testids = {15623, 15624, 15625, 15791, 15792, 16173, 16175, 16188, 16189, 16584, 16585}
dates = itertools.chain(strdaterange((2015, 7, 28), (2015, 8, 21)), strdaterange((2015, 9, 9), (2015, 9, 13)))
MAX64 = 9223372036854775807

def main():
    MR.useDefaults(username='snippets', server='sakura00.search.yandex.net', files=['blockstat.dict'], verbose=True)
    DST = 'likhomanov/longtouch'
    with open('log_3365', 'w') as logf:
        for src in ['user_sessions/{}'.format(d) for d in dates]:
            MR.runReduce(getData, srcTable=src, dstTable=DST, appendMode=True)
            print >>logf, src + ' read'
            if src[-1] in "05":
                MR.runReduce(summarize, srcTable=DST, dstTable=DST)
        MR.runReduce(summarize, srcTable=DST, dstTable=DST)
        MR.runCombine(stripPfx, srcTable=DST, dstTable=DST)
        MR.runReduce(summarize, srcTable=DST, dstTable=DST)

def getData(key, recs):
    try:
        reqs = libra.ParseSession(recs, 'blockstat.dict')
        r0 = reqs[0]
        if not r0.IsA('TTouchYandexWebRequest'):
            return
        cur_testids = set(int(test.TestID) for req in reqs for test in req.GetTestInfo()) & testids
        if not cur_testids:
            return
        pfx = str(random.randrange(1000)) + '\t'
        times = []
        for req in reqs:
            for res in (b.GetMainResult() for b in req.GetMainBlocks()):
                if not res.IsA('TWebResult'):
                    continue
                times.extend(c.DwellTimeOnService for c in res.GetClicks())
        counts = Counter(t for t in times if t != MAX64)
        if counts:
            for tid in cur_testids:
                yield Record(pfx + str(tid), '', repr(counts))
    except (NameError, AttributeError, TypeError, ValueError):
        raise
    except Exception:
        pass

def summarize(key, recs):
    c = Counter()
    for rec in recs:
        c += eval(rec.value)
    yield Record(key, '', repr(c))

def stripPfx(recs):
    for rec in recs:
        yield Record(rec.key.split('\t', 1)[1], '', rec.value)

if __name__ == '__main__':
    main()

