#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import division
from mapreducelib import MapReduce, Record, TemporaryTable
import sys
import libra
import re
import argparse

instagram='http(s|):\/\/(www\.|)instagram.com\/[^/]+'
testidexp='25497'
testidcnt='25496'

def HandleOption():
    parser = argparse.ArgumentParser()
    parser.add_argument("--server", dest="server", help="mapreduce server",default='sakura.search.yandex.net:8013', required=False)
    parser.add_argument("--user", dest="user", help="mapreduce user",default='freshness', required=False)
    parser.add_argument("--mr", dest="mr", help="mapreduce binary",default='/Berkanavt/bin/mapreduce-dev', required=False)
    parser.add_argument("--bs", dest="blockstat", help="path to blockstat.dict",default='/home/itajn/serploader/blockstat.dict', required=False)
    return parser

def countsocial(key, recs):
    uid = key
    try:
        session = libra.ParseSession(recs, './blockstat.dict')
    except:
        return
    for request in session:
        if not request.IsA("TYandexWebRequest"):
            continue
        if request.HasTestID(testidexp):
            type='Experiment'
        elif request.HasTestID(testidcnt):
            type='Control'
        else:
            continue
        for block in request.GetMainBlocks():
            c=0
            for click in block.GetClicks():
                if int(click.DwellTime) >= 15:
                    c+=1
            res = block.GetMainResult()
            if res.IsA("TWebResult"):
                url=res.Url
                if re.match(instagram,url):
                    yield Record(url,type,str(c))

def aggregate(key, recs):
    exp_shows=0
    exp_clicks=0
    control_shows=0
    control_clicks=0
    for r in recs:
        if r.subkey=='Experiment':
            exp_shows+=1
            exp_clicks+=int(r.value)
        else:
            control_shows+=1
            control_clicks+=int(r.value)
    yield Record(key,'Experiment',str(exp_shows)+'\t'+str(exp_clicks))
    yield Record(key,'Control',str(control_shows)+'\t'+str(control_clicks))

def main():
    args = HandleOption().parse_args()
    MapReduce.useDefaults(
                            server   = args.server,
                            username = args.user,
                            mrExec   = args.mr,
                            verbose  = True,
                         )
    days=['20160525','20160526','20160527','20160528','20160529','20160530','20160531']
    outtables=[]
    for d in days:
        print d
        output='itajn/EXP-8289/'+d
        usersessions='user_sessions/'+d
        MapReduce.runReduce(countsocial,
                            srcTable = usersessions,
                            dstTable = output,
                            files = [args.blockstat],
                            sortMode = True
                           )
        outtables.append(output)
    MapReduce.runReduce(aggregate,
                            srcTables = outtables,
                            dstTable = 'itajn/EXP-8289/allurls_long',
                            files = [args.blockstat],
                            sortMode = True
                       )

if __name__ == '__main__':
    main()
