#!/usr/bin/env python 
# -*- coding: utf-8 -*-

from mapreducelib import MapReduce, Record
from collections import defaultdict
from urlparse import parse_qs, urlparse
import datetime
import sys
import os
import optparse
import re

def parseOptions(argv):
    usage='usage: %prog <args> test-ids to parse'
    parser = optparse.OptionParser(usage=usage)
    parser.add_option("-d", dest="date", type='string', help = "date")

    options, testids = parser.parse_args()

    if not options.date:
        parser.error('Please, specify date')

    return options

class Extractor:
    def __init__(self, date):
        self.date = date

    def __call__(self, key, recs):
        uid = key
        if uid[0] != 'y':
            return

        has_reqs = False

        balancer_exp = defaultdict(int)
        report_exp = defaultdict(int)

        rpt_gateway = 0

        ua = None

        for rec in recs:
            data = dict([x.split('=', 1) for x in rec.value.split('\t') if '=' in x])

            if data['type'] == 'REQUEST' and data['service'] == 'www.yandex':
                for exp in ['15690', '15700']:
                    if 'test-ids='+exp in data['search-props']:
                        report_exp[exp] += 1
                        has_reqs = True
                        if 'rpt=gateway' in data['full-request']:
                            rpt_gateway += 1
                    if data['test-buckets'].startswith(exp):
                        balancer_exp[exp] += 1
                        has_reqs = True
                        if 'rpt=gateway' in data['full-request']:
                            rpt_gateway += 1

            if not ua:
                if data['type'] == 'ACCESS' and data['service'] == 'www.yandex':
                    ua = data['u-agent']

        if has_reqs:
            yield Record(uid, str(rpt_gateway),
                         '\t'.join([str(balancer_exp['15690']),
                                    str(balancer_exp['15700']),
                                    str(report_exp['15690']),
                                    str(report_exp['15700']),
                                    str(ua)]))

def main(options):
    MapReduce.useDefaults(server='sakura.search.yandex.net:8013',
                            #server=os.environ['DEF_MR_SERVER'],
                            username='userstats',
                            #username='tmp',
                            mrExec='/Berkanavt/bin/mapreduce-dev',
                            verbose=True,
                            )
#    MapReduce.useDefaults(testMode=True)
    in_table  = 'user_sessions/'+options.date
#    in_table  = 'sample_by_yuid_1p/user_sessions/'+options.date
    out_table = 'shining/touch_morda_v2_wrong_condition/'+options.date
#    out_table = 'shining/clicks_minutes_1p/'+options.date

#    MapReduce.useDefaults(server='plato.yt.yandex.net',
#                            mrExec='mapreduce-yt',
#                            verbose=True,
#                            saveSource=True,
#                            lenvalMode=True,
#                            loggerName=None,
#                            )
#    in_table  = '//userdata/user_sessions/'+options.date
#    out_table = '//tmp/shining/touch_morda_v2/'+options.date

    MapReduce.runReduce(Extractor(options.date),
                        srcTable=in_table,
                        dstTable=out_table,
                        sortMode=True,
                        files=['/home/shining/data/blockstat.dict'],
                        )

if __name__ == '__main__':
    options = parseOptions(sys.argv)
    main(options)
