#!/usr/bin/env python2

from mapreducelib import Record, MapReduce as MR
from logparse import parseReqans
from mymrutils import *
from hashlib import md5

def main():
    MR.useDefaults(username='snippets', server='cedar00.search.yandex.net:8013', verbose=True)
    DST = 'likhomanov/ftopic27'
    MR.runCombine(getData, srcTable='reqans_log/20150227', dstTable=DST)
    MR.runReduce(Limiter(1), srcTable=DST, dstTable=DST)
    mrsort(DST)

def getData(recs):
    for rec in recs:
        req, ress = parseReqans(rec.value)
        if not iswww(req):
            continue
        lang = serpLang(req)
        if lang == 'com.tr':
            lang = 'tr'
        elif lang != 'ru':
            continue
        for res in ress:
            if res.get('snippets_type') == ('forum_topic' if lang == 'ru' else 'forum_forums'):
                s = '{}\t{}\t{}'.format(lang, req['req'], res['url'])
                yield Record(md5(s).hexdigest(), '0', s)

if __name__ == '__main__':
    main()

