#!/usr/bin/env python2

from mapreducelib import Record, TemporaryTable, MapReduce as MR
from reqansparse import parseRARecord
from mymrutils import *
from itertools import islice
import re
from hashlib import md5
import random

def main():
    MR.useDefaults(username='snippets', server='cedar00.search.yandex.net:8013', verbose=True)
    mktmp = lambda: TemporaryTable(project='likhomanov')
    mrsort = lambda table: MR.sortTable(srcTable=table.name, dstTable=table.name)
    with mktmp() as tmp:
        for src in ['reqans_log/201406{:02}'.format(i) for i in (28,29)]:
            MR.runMap(getchords, srcTable=src, dstTable=tmp.name, appendMode=True)
        MR.runReduce(count, srcTable=tmp.name, dstTable='likhomanov/chordsites')

def getchords(rec):
    ra = parseRARecord(rec.value)
    if serpLang(ra['request'].get('serp_url')) not in ('ru', 'ua', 'by', 'kz'):
        return
    for res in ra['results']:
        if res.get('snippets_type') != 'chords':
            continue
        host = getHost(res['url'])
        if host:
            yield Record(host, '', '')

if __name__ == '__main__':
    main()

