#!/usr/bin/env python2

from mapreducelib import Record, MapReduce as MR
from logparse import parseReqans
from mymrutils import *
from itertools import islice
from collections import defaultdict

def main():
    MR.useDefaults(username='snippets', server='cedar00.search.yandex.net:8013', verbose=True)
    DST = 'likhomanov/specsnips'
    with mktmp() as tmp:
        for src in ['reqans_log/{}'.format(d) for d in strdaterange((2015, 2, 1), (2015, 2, 4))]:
            MR.runCombine(getstype, srcTable=src, dstTable=tmp, appendMode=True)
        MR.runReduce(Summarizer(), srcTable=tmp, dstTable=DST)

def getstype(recs):
    d = defaultdict(int)
    for rec in recs:
        req, ress = parseReqans(rec.value)
        if req.get('stype') not in ('www', 'www-smart', 'gateway'):
            continue
        if req.get('is_yandex', '0') == '1':
            continue
        lang = serpLang(req)
        if lang not in ('ru', 'ua', 'by', 'kz', 'com.tr'):
            continue
        #if lang == 'com.tr':
        #    langs = ('tr',)
        #elif lang == 'ru':
        #    langs = ('ru', 'kubr')
        #else:
        #    langs = ('kubr',)
        if lang == 'com.tr':
            lang = 'tr'
        for res in ress:
            stype = res.get('snippets_type')
            if stype is None:
                stype = 'None'
            if stype == '':
                stype = 'empty_field'
            d[lang + ' ALL'] += 1
            d[lang + ' ' + stype] += 1
    for k, v in d.iteritems():
        yield Record(k, '', str(v))

if __name__ == '__main__':
    main()

