#!/usr/bin/env python2

from mapreducelib import Record, MapReduce as MR
import libra
from mymrutils import *
from itertools import islice
from collections import defaultdict
import re

youtube = re.compile(r'^https?://(www\.)?youtube\.com/(user|channel)/')

def main():
    MR.useDefaults(username='snippets', server='cedar00.search.yandex.net:8013', verbose=True, files=['blockstat.dict'])
    DST = 'likhomanov/youtube_us'
    with mktmp() as tmp, mktmp() as tmpsum:
        for src in ['user_sessions/{}'.format(d) for d in strdaterange((2014, 11, 10), (2014, 11, 17))]:
            MR.runReduce(getdata, srcTable=src, dstTable=tmp)
            MR.runCombine(mrg, srcTable=tmp, dstTable=tmpsum, appendMode=True)
        MR.runReduce(Summarizer(), srcTable=tmpsum, dstTable=DST)

def getdata(key, recs):
    try:
        for req in libra.ParseSessionWithFat(recs, 'blockstat.dict'):
            n_yt = 0
            n_res = 0
            if not req.IsA('TYandexWebRequest'):
                continue
            lang = req.ServiceDomRegion 
            if lang not in ('ru', 'tr'):
                continue
            for block in req.GetMainBlocks():
                res = block.GetMainResult()
                if not res.IsA('TWebResult'):
                    continue
                n_res += 1
                if re.match(youtube, res.Url):
                    n_yt += 1
            if n_res:
                yield Record(lang + ' serp ALL', '', '1')
                yield Record(lang + ' snip ALL', '', str(n_res))
                if n_yt:
                    yield Record(lang + ' serp YT', '', '1')
                    yield Record(lang + ' snip YT', '', str(n_yt))
    except (NameError, AttributeError, TypeError):
        raise
    except Exception:
        pass

def mrg(recs):
    d = defaultdict(int)
    for r in recs:
        d[r.key] += int(r.value)
    for k, v in d.iteritems():
        yield Record(k, '', str(v))

if __name__ == '__main__':
    main()

