#!/usr/bin/env python2

from mapreducelib import Record, MapReduce as MR
from logparse import parseReqans
from mymrutils import *
from collections import defaultdict
import re

def main():
    MR.useDefaults(username='snippets', server='cedar00.search.yandex.net:8013', verbose=True)
    DST = 'likhomanov/sahi_2952'
    with mktmp() as tmp:
        for src in ['reqans_log/{}'.format(d) for d in strdaterange((2015, 4, 12), (2015, 4, 14))]:
            MR.runCombine(getData, srcTable=src, dstTable=tmp, appendMode=True)
        MR.runReduce(Summarizer(), srcTable=tmp, dstTable=DST)
        MR.runMap(presort, srcTable=DST, dstTable=DST)
        mrsort(DST)

def getData(recs):
    for rec in recs:
        req, ress = parseReqans(rec.value)
        if req.get('is_yandex', '0') == '1':
            continue
        if req.get('stype') not in ('www', ):
            continue
        lang = serpLang(req)
        if lang != 'com.tr':
            continue
        for res in ress:
            url = res.get('url')
            if not url or getHost(url) != 'sahibinden.com':
                continue
            if res['snippets_type'] in ('yaca', 'sahibinden_template'):
                continue
            if getInnerPath(url) in (None, '', '/'):
                continue
            yield Record(url + '\t' + res['snippets_type'], '', '1')

def presort(rec):
    n = int(rec.value)
    yield Record('0', str(1000000000 - n), '{}\t{}'.format(n, rec.key))

if __name__ == '__main__':
    main()

