#!/usr/bin/env python2

from mapreducelib import Record, MapReduce as MR
from logparse import parseReqans
from mymrutils import *
import re
from collections import defaultdict

def main():
    MR.useDefaults(username='snippets', server='cedar00.search.yandex.net:8013', verbose=True)
    DST = 'likhomanov/tr_2761'
    with mktmp() as tmp:
        for src in ['reqans_log/{}'.format(d) for d in strdaterange((2015, 1, 21), (2015, 1, 28))]:
            MR.runCombine(getData, srcTable=src, dstTable=tmp, appendMode=True)
            mrsort(tmp)
        MR.runReduce(Summarizer(), srcTable=tmp, dstTable=DST)

def getData(recs):
    d = defaultdict(int)
    for rec in recs:
        req, ress = parseReqans(rec.value)
        if req.get('is_yandex') == '1':
            continue
        if req.get('stype') not in ('www', 'www-smart'):
            continue
        if serpLang(req) != 'com.tr':
            continue
        haveSnip = haveSabah = haveOk = False
        for res in ress:
            if 'snippets_type' not in res:
                continue
            haveSnip = True
            d['ALL'] += 1
            url = res['url']
            if getHost(url) != 'sabah.com.tr':
                continue
            d['sabah'] += 1
            haveSabah = True
            path = getInnerPath(url).split('/')[1:]
            if len(path) == 5 or (len(path) == 6 and path[-1] == ''):
                if path[0] in ('sinema', 'basinilankurumu', 'fotohaber'):
                    continue
                if re.match(r'^\d{4}$', path[1]) and re.match(r'^\d{1,2}$', path[2]) and re.match(r'^\d{1,2}$', path[3]):
                    d['ok'] += 1
                    haveOk = True
        if haveSnip:
            d['serp'] += 1
        if haveSabah:
            d['serpsabah'] += 1
        if haveOk:
            d['serpok'] += 1
    for k, v in d.iteritems():
        yield Record(k, '', str(v))

def presort(rec):
    n = int(rec.value)
    yield Record('0', str(100000000 - n), '{}\t{}'.format(rec.key, n))

if __name__ == '__main__':
    main()

