#!/usr/bin/env python 
# -*- coding: utf-8 -*-

from mapreducelib import MapReduce, Record
from collections import defaultdict
from urlparse import parse_qs, urlparse
import datetime
import sys
import os
import optparse
import re
import libra

def parseOptions(argv):
    usage='usage: %prog <args> test-ids to parse'
    parser = optparse.OptionParser(usage=usage)
    parser.add_option("-d", dest="date", type='string', help = "date")

    options, testids = parser.parse_args()

    if not options.date:
        parser.error('Please, specify date')

    return options

TESTIDS = ['17374', '17375']

class Extractor:
    def __init__(self, date):
        self.date = date

    def __call__(self, key, recs):
        uid = key
        if uid[0] != 'y':
            return

        try:
            s = libra.ParseSession(recs, './blockstat.dict')
        except RuntimeError as e:
            if 'fat user' in str(e):
                return
            if 'ParseSession can' in str(e):
                return
            else:
                raise e

        queries = []
        testids = []
        for r in s:
            if not r.IsA('TYandexWebRequest'):
                continue

            req_testid = None
            for ti in TESTIDS:
                if r.HasTestID(ti):
                    req_testid = ti
            if not req_testid:
                continue
            testids.append(req_testid)

            query_key = '[ ' + r.Query + ' ] ' + str(r.PageNo) + ' ' + str(r.Timestamp)
            queries.append(query_key)

        if testids:
            yield Record(uid, ' '.join(set(testids)), '@@'.join(queries))

def main(options):
    MapReduce.useDefaults(server='sakura.search.yandex.net:8013',
                            #server=os.environ['DEF_MR_SERVER'],
                            username='userstats',
                            #username='tmp',
                            mrExec='/Berkanavt/bin/mapreduce-dev',
                            verbose=True,
                            )
#    MapReduce.useDefaults(testMode=True)
    in_table  = 'user_sessions/'+options.date
#    in_table  = 'sample_by_yuid_1p/user_sessions/'+options.date
    out_table = 'shining/EXPERIMENTS-5624/'+options.date
#    out_table = 'shining/clicks_minutes_1p/'+options.date

#    MapReduce.useDefaults(server='plato.yt.yandex.net',
#                            mrExec='mapreduce-yt',
#                            verbose=True,
#                            saveSource=True,
#                            lenvalMode=True,
#                            loggerName=None,
#                            )
#    in_table  = '//userdata/user_sessions/'+options.date
#    out_table = '//tmp/shining/touch_morda_v2/'+options.date

    MapReduce.runReduce(Extractor(options.date),
                        srcTable=in_table,
                        dstTable=out_table,
                        sortMode=True,
                        files=['/home/shining/data/blockstat.dict'],
                        )

if __name__ == '__main__':
    options = parseOptions(sys.argv)
    main(options)
