#!/usr/bin/env python2
# coding=UTF-8
from mapreducelib import Record, MapReduce as MR
from mymrutils import *
import random
import libra

def main():

        MR.useDefaults(username='sitelinks', server='sakura.search.yandex.net:8013', verbose=True, files=['blockstat.dict'], mrExec='/home/pesitnikova/mapreduce/mapreduce_new')

        dst = 'pesitnikova/addr_urls'

	MR.runReduce(processUSS24, srcTable = 'user_sessions/20150824', dstTable = dst)
	MR.runReduce(processUSS28, srcTable = 'user_sessions/20150828', dstTable = dst, appendMode=True)
	MR.runReduce(Summarizer(useSubkey=True), srcTable = dst, dstTable = dst)
	MR.runReduce(mergeTables, srcTable = dst, dstTable = dst)
	MR.runMap(presort, srcTable=dst, dstTable=dst)	
	mrsort(dst)
	
def mergeTables (key, recs):
	is24 = False
	v24 = ''
	is28 = False
	v28 = ''
	for rec in recs:
		if rec.subkey == '24.08':
			is24 = True
			v24 = rec.value
			continue
		if rec.subkey == '28.08':
			is28 = True
			v28 = rec.value
			continue  
	if is24 and not is28:
		yield Record(rec.key, '', v24)

def processUSS24(key, recs):

        try:
                for req in libra.ParseSession(recs, 'blockstat.dict'):

                        if not req.IsA('TTouchYandexWebRequest'):
                                continue

			address = [x for x in req.GetBSBlocks() if x.Path.startswith('/snippet/adress_button/')]
			
			if not address:
				continue
			
			poses = set()
			for block in address:
    				for v in block.GetVars():
        				if v[0] == 'pos':
	            				poses.add(int(v[1][1:]))


			for block in req.GetMainBlocks():
                                res = block.GetMainResult()
                                if not res.IsA('TWebResult'):
                                        continue
			
				if res.Position in poses:
					yield Record (res.Url, '24.08', '1')
							
		 


	except (NameError, AttributeError, TypeError):
                raise
        except Exception:
                pass

def processUSS28(key, recs):

        try:
                for req in libra.ParseSession(recs, 'blockstat.dict'):

                        if not req.IsA('TTouchYandexWebRequest'):
                                continue

			address = [x for x in req.GetBSBlocks() if x.Path.startswith('/snippet/adress_button/')]

			if not address:
                                continue

                        poses = set()
                        for block in address:
                                for v in block.GetVars():
                                        if v[0] == 'pos':
                                                poses.add(int(v[1][1:]))


                        for block in req.GetMainBlocks():
                                res = block.GetMainResult()
                                if not res.IsA('TWebResult'):
                                        continue

                                if res.Position in poses:
                                        yield Record (res.Url, '28.08', '1')


        except (NameError, AttributeError, TypeError):
                raise
        except Exception:
                pass

def presort(rec):

        n = int(rec.value)
        yield Record('0', str(10000000000 - n), '{}\t{}'.format(n, rec.key))

if __name__ == '__main__':
        main()
