#!/usr/bin/env python2
# coding=UTF-8
from mapreducelib import Record, MapReduce as MR
from mymrutils import *
import libra

def main():

        MR.useDefaults(username='sitelinks', server='sakura.search.yandex.net:8013', verbose=True, files=['blockstat.dict'], mrExec='/home/pesitnikova/mapreduce/mapreduce')

	for date in strdaterange((2015,8,12), (2015,8,21)):
		uss = 'user_sessions/{}'.format(date)
# сбор длинных кликов из контрольной и экспериментальной выборок
		MR.runReduce(processControl, srcTable = uss, dstTable = 'pesitnikova/3426/dwell_clicks_urls_control', appendMode=True)
		MR.runReduce(processExp, srcTable = uss, dstTable = 'pesitnikova/3426/dwell_clicks_urls_exp', appendMode=True)
	MR.runReduce(Summarizer(useSubkey=True), srcTable = 'pesitnikova/3426/dwell_clicks_urls_exp', dstTable = 'pesitnikova/3426/dwell_clicks_urls_exp')
	MR.runReduce(Summarizer(useSubkey=True), srcTable = 'pesitnikova/3426/dwell_clicks_urls_control', dstTable = 'pesitnikova/3426/dwell_clicks_urls_control')
# подготовка к джойну: слияние двух наборов кликов в одну таблицу
	MR.copyTable(srcTable = 'pesitnikova/3426/dwell_clicks_urls_control', dstTable = 'pesitnikova/3426/dwell_clicks_urls_both_samples')	
	MR.copyTable(srcTable = 'pesitnikova/3426/dwell_clicks_urls_exp', dstTable = 'pesitnikova/3426/dwell_clicks_urls_both_samples', appendMode=True)	
	MR.runReduce(join, srcTable = 'pesitnikova/3426/dwell_clicks_urls_both_samples', dstTable = 'pesitnikova/3426/dwell_clicks_urls_exp_with_types')
# оставляем только типы сниппетов с количеством кликов для получений общей суммы кликов в разрезе типов 
	MR.runMap(trans, srcTable = 'pesitnikova/3426/dwell_clicks_urls_exp_with_types', dstTable = 'pesitnikova/3426/dwell_clicks_by_types_exp')
	MR.runReduce(Summarizer(), srcTable = 'pesitnikova/3426/dwell_clicks_by_types_exp', dstTable = 'pesitnikova/3426/dwell_clicks_by_types_exp')
	MR.runMap(trans2, srcTable = 'pesitnikova/3426/dwell_clicks_by_types_exp', dstTable = 'pesitnikova/3426/dwell_clicks_by_types_exp')
	mrsort('pesitnikova/3426/dwell_clicks_by_types_exp')
	MR.mergeTables(srcTables = ['pesitnikova/3426/dwell_clicks_by_types_exp', 'pesitnikova/3426/clicks_by_types_exp'], dstTable = 'pesitnikova/3426/rate_dwell_clicks_exp')	
	mrsort('pesitnikova/3426/rate_dwell_clicks_exp')
# то же получение общей суммы кликов, но для контрольной выборки
	MR.runMap(trans, srcTable = 'pesitnikova/3426/dwell_clicks_urls_control', dstTable = 'pesitnikova/3426/dwell_clicks_by_types_control')	
	MR.runReduce(Summarizer(), srcTable = 'pesitnikova/3426/dwell_clicks_by_types_control', dstTable = 'pesitnikova/3426/dwell_clicks_by_types_control')
	mrsort('pesitnikova/3426/dwell_clicks_by_types_control')
	MR.mergeTables(srcTables = ['pesitnikova/3426/dwell_clicks_by_types_control', 'pesitnikova/3426/clcks_control'], dstTable = 'pesitnikova/3426/rate_dwell_clicks_control')
	mrsort('pesitnikova/3426/rate_dwell_clicks_control')

def trans2(rec):
	yield Record(rec.key, rec.value, '')

def trans(rec):
	yield Record(rec.subkey, '', rec.value)

def join(key, recs):
	
	exp_value = ''
	isControlMovie = False
	isControlQuestion = False
	isControlCrW = False
	isControlSoft = False
	isControlRecipe = False
	isControlPrOffer = False
	isControlOrg = False
	isExp = False
	
	for rec in recs:
		if rec.subkey == 'exp':
			isExp = True
			exp_value = rec.value	
			continue

		if rec.subkey == 'control_sample_schema_movie':
			isControlMovie = True				
			continue			

		if rec.subkey == 'control_sample_question':
                        isControlQuestion = True
			continue

		if rec.subkey == 'control_sample_creative_work':
                        isControlCrW = True
			continue

		if rec.subkey == 'control_sample_soft':
                        isControlSoft = True
			continue

		if rec.subkey == 'control_sample_recipe':
                        isControlRecipe = True
			continue

		if rec.subkey == 'control_sample_product_offer':
                        isControlPrOffer = True
			continue

		if rec.subkey == 'control_sample_org':
                        isControlOrg = True
			continue
	
	if isExp and isControlMovie:
		yield Record (rec.key, 'exp_schema_movie', exp_value)	

	if isExp and isControlOrg:
		yield Record (rec.key, 'exp_org', exp_value)	

	if isExp and isControlPrOffer:
		yield Record (rec.key, 'exp_product_offer', exp_value)	

	if isExp and isControlRecipe:
		yield Record (rec.key, 'exp_recipe', exp_value)	

	if isExp and isControlSoft:
		yield Record (rec.key, 'exp_soft', exp_value)	

	if isExp and isControlCrW:
		yield Record (rec.key, 'exp_creative_work', exp_value)	

	if isExp and isControlQuestion:
		yield Record (rec.key, 'exp_question', exp_value)	



def processExp(key, recs):
	
	try:
		for req in libra.ParseSession(recs, 'blockstat.dict'):

			if not req.IsA('TTouchYandexWebRequest'):
				continue

			if not req.HasTestID('16175'):
				continue

			clcks = set(clck.Url for clck in req.GetClicks() if clck.DwellTimeOnService > 30)

			for block in req.GetMainBlocks():
                        	res = block.GetMainResult()
                        	if not res.IsA('TWebResult'):
                                	continue
				
                                if res.Url in clcks:	
					yield Record(res.Url, 'exp', '1')
	
	except (NameError, AttributeError, TypeError):
		raise
	except Exception:
		pass


def processControl(key, recs):
	
        try:
                for req in libra.ParseSession(recs, 'blockstat.dict'):

                        if not req.IsA('TTouchYandexWebRequest'):
                                continue

			if not req.HasTestID('16173'):
				continue
			
			clcks = set(clck.Url for clck in req.GetClicks() if clck.DwellTimeOnService > 30)	
			
			for block in req.GetMainBlocks():
                                res = block.GetMainResult()
				if not res.IsA('TWebResult'):
                    			continue	
				
				if res.SnippetType == 'schema_movie':
	                                if res.Url in clcks:	
						yield Record(res.Url, 'control_sample_schema_movie', '1')				
					continue

				if res.SnippetType == 'question':
	                                if res.Url in clcks:	
						yield Record(res.Url, 'control_sample_question', '1')
					continue

  				if res.SnippetType == 'creativework_snip':
	                                if res.Url in clcks:	
						yield Record(res.Url, 'control_sample_creative_work', '1')
					continue

				if res.SnippetType == 'software':
	                                if res.Url in clcks:	
						yield Record(res.Url, 'control_sample_soft', '1')
					continue
				
				if res.SnippetType == 'recipe':
	                                if res.Url in clcks:	
						yield Record(res.Url, 'control_sample_recipe', '1')
					continue

				if res.SnippetType == 'productoffer_snip':
	                                if res.Url in clcks:	
						yield Record(res.Url, 'control_sample_product_offer', '1')
					continue

				org = [x for x in req.GetBSBlocks() if x.Path.startswith('/wiz/org_')]
				if len(org) > 0:
	                                if res.Url in clcks:	
						yield Record(res.Url, 'control_sample_org', '1')
					continue



	except (NameError, AttributeError, TypeError):
                raise
        except Exception:
                pass

if __name__ == '__main__':
        main()
