#!/usr/bin/env python2

from mapreducelib import Record, TemporaryTable, MapReduce as MR
from logparse import parseReqans
from mymrutils import *
from itertools import islice
from collections import defaultdict

beakurls = []
for l in open('beakurls'):
    l = l.strip()
    if '/' not in l or (l.startswith('https://') and '/' not in l[8:]):
        l += '/'
    if not l.startswith('https:'):
        l = 'http://' + l
    beakurls.append(l)
#beakurls = [l.strip() for l in open('beakurls')]

def main():
    MR.useDefaults(username='snippets', server='cedar00.search.yandex.net:8013', verbose=True, files=['beakurls'])
    DST = 'likhomanov/specnips_sep_tr'
    with mktmp() as tmp:
        for src in ['reqans_log/{}'.format(d) for d in strdaterange((2014, 9, 22), (2014, 9, 29))]:
            MR.runCombine(getstype, srcTable=src, dstTable=tmp, appendMode=True)
        MR.runReduce(Summarizer(), srcTable=tmp, dstTable=DST)

def getstype(recs):
    d = defaultdict(int)
    for rec in recs:
        req, ress = parseReqans(rec.value)
        if req.get('stype') != 'www':
            continue
        if req.get('is_yandex', '0') == '1':
            continue
        if serpLang(req) != 'com.tr':
            continue
        for res in ress:
            stype = res.get('snippets_type')
            if not stype:
                continue
            d['ALL ALL'] += 1
            d['ALL ' + stype] += 1
            if res['url'] in beakurls:
                d['beak ALL'] += 1
                d['beak ' + stype] += 1
            else:
                d['nonbeak ALL'] += 1
                d['nonbeak ' + stype] += 1
    for k, v in d.iteritems():
        yield Record(k, '', str(v))

if __name__ == '__main__':
    main()

