#!/usr/bin/env python2
# coding=UTF-8
from mapreducelib import Record, MapReduce as MR
from mymrutils import *
import libra

def main():

        MR.useDefaults(username='sitelinks', server='sakura.search.yandex.net:8013', verbose=True, files=['blockstat.dict'], mrExec='/home/pesitnikova/mapreduce/mapreduce')

	for date in strdaterange((2015,8,12), (2015,8,21)):
		uss = 'user_sessions/{}'.format(date)
		MR.runReduce(processControl, srcTable = uss, dstTable = 'pesitnikova/3426/shows_urls_control', appendMode=True)
		MR.runReduce(processExp, srcTable = uss, dstTable = 'pesitnikova/3426/shows_urls_exp', appendMode=True)
	MR.runReduce(Summarizer(useSubkey=True), srcTable = 'pesitnikova/3426/shows_urls_exp', dstTable = 'pesitnikova/3426/shows_urls_exp')
	MR.runReduce(Summarizer(useSubkey=True), srcTable = 'pesitnikova/3426/shows_urls_control', dstTable = 'pesitnikova/3426/shows_urls_control')
	MR.copyTable(srcTable = 'pesitnikova/3426/shows_urls_control', dstTable = 'pesitnikova/3426/shows_urls_both_samples')	
	MR.copyTable(srcTable = 'pesitnikova/3426/shows_urls_exp', dstTable = 'pesitnikova/3426/shows_urls_both_samples', appendMode=True)	
	MR.runReduce(join, srcTable = 'pesitnikova/3426/shows_urls_both_samples', dstTable = 'pesitnikova/3426/shows_urls_exp_with_types')
	MR.runMap(trans, srcTable = 'pesitnikova/3426/shows_urls_exp_with_types', dstTable = 'pesitnikova/3426/shows_by_types_exp')
	MR.runReduce(Summarizer(), srcTable = 'pesitnikova/3426/shows_by_types_exp', dstTable = 'pesitnikova/3426/shows_by_types_exp')
	mrsort('pesitnikova/3426/shows_by_types_exp')

def trans(rec):
	yield Record(rec.subkey, rec.key, rec.value)

def join(key, recs):
	
	exp_value = ''
	ctrl_value = ''
	isControlMovie = False
	isControlQuestion = False
	isControlCrW = False
	isControlSoft = False
	isControlRecipe = False
	isControlPrOffer = False
	isControlOrg = False
	isExp = False
	
	for rec in recs:
		if rec.subkey == 'exp':
			isExp = True
			exp_value = rec.value	
			continue

		if rec.subkey == 'control_sample_schema_movie':
			isControlMovie = True				
			ctrl_value = rec.value
			continue			

		if rec.subkey == 'control_sample_question':
                        isControlQuestion = True
			ctrl_value = rec.value	
			continue

		if rec.subkey == 'control_sample_creative_work':
                        isControlCrW = True
			ctrl_value = rec.value
			continue

		if rec.subkey == 'control_sample_soft':
                        isControlSoft = True
			ctrl_value = rec.value
			continue

		if rec.subkey == 'control_sample_recipe':
                        isControlRecipe = True
			ctrl_value = rec.value
			continue

		if rec.subkey == 'control_sample_product_offer':
                        isControlPrOffer = True
			ctrl_value = rec.value
			continue

		if rec.subkey == 'control_sample_org':
                        isControlOrg = True
			ctrl_value = rec.value
			continue
	
	if isExp and isControlMovie:
		yield Record (rec.key, 'exp_schema_movie', ctrl_value)	

	if isExp and isControlOrg:
		yield Record (rec.key, 'exp_org', ctrl_value)	

	if isExp and isControlPrOffer:
		yield Record (rec.key, 'exp_product_offer', ctrl_value)	

	if isExp and isControlRecipe:
		yield Record (rec.key, 'exp_recipe', ctrl_value)	

	if isExp and isControlSoft:
		yield Record (rec.key, 'exp_soft', ctrl_value)	

	if isExp and isControlCrW:
		yield Record (rec.key, 'exp_creative_work', ctrl_value)	

	if isExp and isControlQuestion:
		yield Record (rec.key, 'exp_question', ctrl_value)	



def processExp(key, recs):
	
	try:
		for req in libra.ParseSession(recs, 'blockstat.dict'):

			if not req.IsA('TTouchYandexWebRequest'):
				continue

			if not req.HasTestID('16175'):
				continue

			for block in req.GetMainBlocks():
                        	res = block.GetMainResult()
                        	if not res.IsA('TWebResult'):
                                	continue
		
				yield Record(res.Url, req.Query, '1')
	
	except (NameError, AttributeError, TypeError):
		raise
	except Exception:
		pass


def processControl(key, recs):
	
        try:
                for req in libra.ParseSession(recs, 'blockstat.dict'):

                        if not req.IsA('TTouchYandexWebRequest'):
                                continue

			if not req.HasTestID('16173'):
				continue

			for block in req.GetMainBlocks():
                                res = block.GetMainResult()
				if not res.IsA('TWebResult'):
                    			continue	
				
				if res.SnippetType == 'schema_movie':
					yield Record(res.Url, req.Query, '1')				
					continue

				if res.SnippetType == 'question':
                                        yield Record(res.Url, req.Query, '1')
					continue

  				if res.SnippetType == 'creativework_snip':
                                        yield Record(res.Url, req.Query, '1')
                                        continue

				if res.SnippetType == 'software':
                                        yield Record(res.Url, req.Query, '1')
                                        continue
				
				if res.SnippetType == 'recipe':
                                        yield Record(res.Url, req.Query, '1')
                                        continue

				if res.SnippetType == 'productoffer_snip':
                                        yield Record(res.Url, req.Query, '1')
                                        continue

				org = [x for x in req.GetBSBlocks() if x.Path.startswith('/wiz/org_')]
				if len(org) > 0:
                                        yield Record(res.Url, req.Query, '1')
                                        continue


	except (NameError, AttributeError, TypeError):
                raise
        except Exception:
                pass

if __name__ == '__main__':
        main()
