#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import division
from mapreducelib import MapReduce, Record, TemporaryTable
import sys
import libra
import re
import argparse

profile={'vk':ur'http(|s):\/\/(www\.|)vk.com\/[^w^t^v^p][^a^o^i^h][^l^p^d^o][^l^i^e^t]','google+':ur'http(|s):\/\/(www\.|)plus\.google\.com\/(u\/0\/[0-9]+|[^/]+)\/(posts|about)(\/|)$', 'smotra':ur'http(|s):\/\/(www\.|)smotra\.ru\/[^$]'}
post={'vk':ur'http(|s):\/\/(www\.|)vk.com\/(wall|topic|video|photo)','fb':ur'http(|s):\/\/(www\.|ru-ru\.|)facebook\.com\/[^/]+(\/|\.php)[^$]','twitter':ur'http(|s):\/\/(www\.|)twitter\.com/[^/]+\/[^$]', 'google+':ur'http(|s):\/\/(www\.|)plus\.google\.com\/(u\/0\/[0-9]+|[^/]+)\/posts\/[^$]'}


def HandleOption():
    parser = argparse.ArgumentParser()
    parser.add_argument("--server", dest="server", help="mapreduce server",default='sakura.search.yandex.net:8013', required=False)
    parser.add_argument("--user", dest="user", help="mapreduce user",default='freshness', required=False)
    parser.add_argument("--mr", dest="mr", help="mapreduce binary",default='/Berkanavt/bin/mapreduce-dev', required=False)
    parser.add_argument("--bs", dest="blockstat", help="path to blockstat.dict",default='/home/itajn/serploader/blockstat.dict', required=False)
    return parser

def countsocial(key, recs):
    uid = key
    try:
        session = libra.ParseSession(recs, './blockstat.dict')
    except:
        return

    for request in session:
        if request.IsA("TYandexWebRequest"):
            type="web"
        elif request.IsA("TMobileYandexWebRequest"):
            type="mobile"
        elif request.IsA("TTouchYandexWebRequest"):
            type="touch"
        else:
            continue
        region = request.ServiceDomRegion
        if region != 'ru':
            continue

        for block in request.GetMainBlocks():
            c=0
            for click in block.GetClicks():
                c+=1
            res = block.GetMainResult()
            if res.IsA("TWebResult"):
                url=res.Url
                for site in profile:
                    if re.match(profile[site],url):
                        yield Record(site,'profile','\t'.join([str(c),str(block.Position)]))
                for site in post:
                    if re.match(post[site],url):
                        yield Record(site,'post','\t'.join([str(c),str(block.Position)]))

def main():
    args = HandleOption().parse_args()
    MapReduce.useDefaults(
                            server   = args.server,
                            username = args.user,
                            mrExec   = args.mr,
                            verbose  = True,
                         )
    days=['20160518']
    for d in days:
        print d
        output='itajn/FR-2129/'+d
        usersessions='user_sessions/'+d
        MapReduce.runReduce(countsocial,
                            srcTable = usersessions,
                            dstTable = output,
                            files = [args.blockstat],
                            sortMode = True
                           )

if __name__ == '__main__':
    main()
