#!/usr/bin/env python2
# coding=UTF-8
from mapreducelib import Record, MapReduce as MR
from mymrutils import *
import random
import libra

def main():

        MR.useDefaults(username='sitelinks', server='sakura.search.yandex.net:8013', verbose=True, files=['blockstat.dict'], mrExec='/home/pesitnikova/mapreduce/mapreduce')

        DST = 'pesitnikova/tophosts_with_sitelinks_jul'

        for src in ['user_sessions/{}'.format(d) for d in strdaterange((2015,7,1), (2015,7,8))]:

                MR.runReduce(getData, srcTable=src, dstTable=DST, appendMode=True)

        MR.runReduce(Summarizer(), srcTable=DST, dstTable=DST)
        MR.runMap(presort, srcTable=DST, dstTable=DST)
        mrsort(DST)

def getData(key, recs):

        try:

                for req in libra.ParseSession(recs, 'blockstat.dict'):

                        if not req.IsA('TYandexWebRequest'):
                                continue

                        lang = req.ServiceDomRegion
                        if lang != 'ru':
                                continue

			sls = [x for x in req.GetBSBlocks() if x.Path == "/snippet/sitelinks" ]
                        if not sls:
                                continue
				
			for sl in sls:
				for v in sl.GetVars():
					if v[0] == 'pos':
						pos = int(v[1][1:])
				       		organic_web_results = [x.GetMainResult() for x in req.GetMainBlocks() if x.GetMainResult().IsA("TWebResult")]
				       		res = [x for x in organic_web_results if x.Position == pos]
						if not res:
							continue
						host = getHost(res[0].Url)
						yield Record(host, '', '1')

        except (NameError, AttributeError, TypeError):
                raise
        except Exception:
                pass

def presort(rec):

        n = int(rec.value)
        yield Record('0', str(10000000000 - n), '{}\t{}'.format(n, rec.key))

if __name__ == '__main__':
        main()
