#!/usr/bin/env python 
# -*- coding: utf-8 -*-

from mapreducelib import MapReduce, Record
from collections import defaultdict
from urlparse import parse_qs, urlparse
import httpagentparser
import datetime
import sys
import os
import optparse
import re
import libra

def parseOptions(argv):
    usage='usage: %prog <args> test-ids to parse'
    parser = optparse.OptionParser(usage=usage)
    parser.add_option("-d", dest="date", type='string', help = "date")

    options, testids = parser.parse_args()

    if not options.date:
        parser.error('Please, specify date')

    if not testids:
        parser.error('Please, specify at least one testid')

    return options, testids

def get_1min_bucket(timestamp):
    return datetime.datetime.fromtimestamp(int(timestamp)).strftime('%H:%M')

def get_5min_bucket(timestamp):
    (hours, minutes) = datetime.datetime.fromtimestamp(int(timestamp)).strftime('%H:%M').split(':')
    bucket = int(minutes) - int(minutes) % 5
    return hours + ':' + str(bucket)

class Extractor:
    def __init__(self, date, testids):
        self.testids = testids
        self.date = date

    def __call__(self, key, recs):
        uid = key
        if uid[0] != 'y':
            return

        try:
            s = libra.ParseSession(recs, './blockstat.dict')
        except RuntimeError as e:
            if 'fat user' in str(e):
                return
            if 'ParseSession can' in str(e):
                return
            else:
                raise e

        for r in s:
            if not r.IsA("TYandexWebRequest"):
                continue

            if r.ServiceDomRegion != 'ru':
                continue

            testid = None
            for ti in self.testids:
                if r.HasTestID(ti):
                    testid = ti
            if not testid:
                continue

            raw_ua = r.UserAgent
            ua = httpagentparser.detect(raw_ua)
            try:
                ua_data = ua['browser']['name'] + '\t'\
                        + ua['browser']['version']

            except KeyError:
                ua_data = 'ua_not_parsed	ua_not_parsed'

            url = r.FullRequest
            if 'callback' in parse_qs(urlparse(url).query).keys():
                ajax = 'ajax'
            else:
                ajax = 'plain'

            value = '\t'.join([testid,
                               ua_data,
                               ajax,
                               str(r.Timestamp),
                               get_1min_bucket(r.Timestamp),
                               get_5min_bucket(r.Timestamp),
                               raw_ua,
                               ])

            yield Record (uid, 'request', value)

            for b in r.GetMainBlocks():
                m = b.GetMainResult()
                clicks = b.GetClicks()
                if m.IsA('TDirectResult'):
                    for c in clicks:
                        yield Record(uid, 'direct', value)

                elif m.IsA('TWebResult'):
                    for c in clicks:
                        yield Record(uid, 'web', value)

            for b in r.GetParallelBlocks():
                m = b.GetMainResult()
                clicks = b.GetClicks()
                if m.IsA('TDirectResult'):
                    for c in clicks:
                        yield Record(uid, 'direct', value)

                elif m.IsA('TWebResult'):
                    for c in clicks:
                        yield Record(uid, 'web', value)

def main(options, testids):
#    MapReduce.useDefaults(server=os.environ['DEF_MR_SERVER'],
#                            #username='userstats',
#                            username='tmp',
#                            mrExec='/Berkanavt/bin/mapreduce-dev',
#                            verbose=True,
#                            )
#    MapReduce.useDefaults(testMode=True)
#    in_table  = 'user_sessions/'+options.date
#    in_table  = 'sample_by_yuid_1p/user_sessions/'+options.date
#    out_table = 'shining/clicks_minutes/'+options.date
#    out_table = 'shining/clicks_minutes_1p/'+options.date

    MapReduce.useDefaults(server='plato.yt.yandex.net',
                            mrExec='mapreduce-yt',
                            verbose=True,
                            saveSource=True,
                            lenvalMode=False,
                            loggerName=None,
                            )
    in_table  = '//userdata/user_sessions/'+options.date
    out_table = '//tmp/shining/https_msie_ajax/'+options.date

    MapReduce.runReduce(Extractor(options.date, testids),
                        srcTable=in_table,
                        dstTable=out_table,
                        sortMode=True,
                        files=['/home/shining/data/blockstat.dict'],
                        )

if __name__ == '__main__':
    options, testids = parseOptions(sys.argv)
    main(options, testids)
