#!/usr/bin/env python 
# -*- coding: utf-8 -*-

from mapreducelib import MapReduce, Record
from collections import defaultdict
from urlparse import parse_qs, urlparse
import datetime
import sys
import os
import optparse
import re

def parseOptions(argv):
    usage='usage: %prog <args> test-ids to parse'
    parser = optparse.OptionParser(usage=usage)
    parser.add_option("-d", dest="date", type='string', help = "date")

    options, testids = parser.parse_args()

    if not options.date:
        parser.error('Please, specify date')

    return options

class Extractor:
    def __init__(self, date):
        self.date = date

    def __call__(self, key, recs):
        uid = key
        if uid[0] != 'y':
            return

        morda_exp = None
        serp_exp = None

        morda_reqs = 0
        serp_reqs = 0

        ua = None

        for rec in recs:
            data = dict([x.split('=', 1) for x in rec.value.split('\t') if '=' in x])

            if data['type'] == 'ACCESS' and data['service'] == 'portal.yandex':
                morda_reqs += 1
                ua = data['u-agent']
                if 'exp' in data.keys():
                    for exp in data['exp'].split(','):
                        if 'geocontext_fuid2' in exp:
                            morda_exp = exp

            if data['type'] == 'REQUEST' and data['service'] == 'www.yandex':
                serp_reqs += 1
                for exp in ['15690', '15700']:
                    if 'test-ids='+exp in data['search-props']:
                        serp_exp = exp

        if morda_exp or serp_exp:
            yield Record(uid,
                         '\t'.join([str(morda_reqs), str(serp_reqs)]),
                         '\t'.join([str(morda_exp), str(serp_exp), str(ua)]))

def main(options):
    MapReduce.useDefaults(server='sakura.search.yandex.net:8013',
                            #server=os.environ['DEF_MR_SERVER'],
                            username='userstats',
                            #username='tmp',
                            mrExec='/Berkanavt/bin/mapreduce-dev',
                            verbose=True,
                            )
#    MapReduce.useDefaults(testMode=True)
    in_table  = 'user_sessions/'+options.date
#    in_table  = 'sample_by_yuid_1p/user_sessions/'+options.date
    out_table = 'shining/touch_morda_v2/'+options.date
#    out_table = 'shining/clicks_minutes_1p/'+options.date

#    MapReduce.useDefaults(server='plato.yt.yandex.net',
#                            mrExec='mapreduce-yt',
#                            verbose=True,
#                            saveSource=True,
#                            lenvalMode=True,
#                            loggerName=None,
#                            )
#    in_table  = '//userdata/user_sessions/'+options.date
#    out_table = '//tmp/shining/touch_morda_v2/'+options.date

    MapReduce.runReduce(Extractor(options.date),
                        srcTable=in_table,
                        dstTable=out_table,
                        sortMode=True,
                        files=['/home/shining/data/blockstat.dict'],
                        )

if __name__ == '__main__':
    options = parseOptions(sys.argv)
    main(options)
