#!/usr/bin/env python2

from mapreducelib import Record, MapReduce as MR
import libra
from mymrutils import *
from collections import defaultdict
import re
from hashlib import md5

def main():
    MR.useDefaults(username='snippets', server='cedar00.search.yandex.net:8013', files=['blockstat.dict'], verbose=True)
    DST = 'likhomanov/auto_tr'
    DSTCNT = 'likhomanov/auto_tr_cnt'
    with mktmp() as tmp:
        for src in ['user_sessions/{}'.format(d) for d in strdaterange((2015, 4, 22), (2015, 4, 25))]:
            MR.runReduce(getData, srcTable=src, dstTables=[tmp, DSTCNT], appendMode=True)
            MR.runReduce(Summarizer(), srcTable=DSTCNT, dstTable=DSTCNT)
        MR.runReduce(summarize, srcTable=tmp, dstTable=DST)
        MR.runReduce(Limiter(5000), srcTable=DST, dstTable=DST)

def getData(key, recs):
    try:
        for req in libra.ParseSession(recs, 'blockstat.dict'):
            if req.IsA('TYandexWebRequest'):
                stype = 'www'
            elif req.IsA('TTouchYandexWebRequest'):
                stype = 'touch'
            else:
                continue
            if req.ServiceDomRegion != 'tr':
                continue
            serp = auto = False
            for block in req.GetMainBlocks():
                res = block.GetMainResult()
                if not res.IsA('TWebResult'):
                    continue
                serp = True
                host = getHost(res.Url)
                if host not in ('otoraba.com', 'araba.com', 'arabam.com'):
                    continue
                host = stype + ' ' + host
                if getInnerPath(res.Url) in (None, '', '/'):
                    continue
                auto = True
                s = req.Query + '\t' + res.Url
                yield Record(host + '\t' + md5(s).hexdigest(), '', s, tableIndex=0)
            if serp:
                yield Record(stype + '\tserp', '', '1', tableIndex=1)
            if auto:
                yield Record(stype + '\tauto', '', '1', tableIndex=1)
    except (NameError, AttributeError, TypeError):
        raise
    except Exception:
        pass

def summarize(key, recs):
    s = next(recs).value
    host = key.split('\t')[0]
    n = 1
    for _ in recs:
        n += 1
    yield Record(host, str(10000000000 - n), str(n) + '\t' + s)

if __name__ == '__main__':
    main()

