import yt.wrapper
import sys
import urlparse
import re
import urllib
import urllib2
import math
import json
import time
import datetime
from datetime import date, datetime, timedelta
import argparse
import ast
import libra

def parse_args():
    parser = argparse.ArgumentParser(add_help=True, description='Suggest metrics calc')
    parser.add_argument('--timestamp', help='date timestamp for calculation')
    parser.add_argument('--from_date', default='2017-02-28',  help='from date for calculation (format: YYYY-MM-DD)')
    parser.add_argument('--to_date', default='2017-02-28', help='to date for calculation (format: YYYY-MM-DD)')
    parser.add_argument("--bs", dest="blockstat", help="path to blockstat.dict", default='/home/galamaj/blockstat.dict', required=False)
    # parser.add_argument('--mr_server', default='hahn', help='MR server (hahn, banach, ...)')
    # parser.add_argument('--yt_pool', default='robot-suggestor-dev', help='YT pool')
    args = parser.parse_args()
    return args

def get_dates(timestamp, from_date, to_date):
    dates = []
    date_format = '%Y-%m-%d'
    if timestamp:
        date = datetime.fromtimestamp(int(timestamp[:10]))
        dates.append(date.strftime(date_format))
    if from_date and not to_date:
        to_date = datetime.strftime(datetime.now(), date_format)
    if from_date and to_date:
        current_date = datetime.strptime(from_date, date_format)
        while current_date <= datetime.strptime(to_date, date_format):
            dates.append(current_date.strftime(date_format))
            current_date += timedelta(1)
    return dates

def count_actions(key, recs):
    exprt = None
    start = 0
    init = 0
    stop = 0
    data = 0
    error = 0
    contin = 0
    paranja_stop = 0
    paranja_contin = 0
    help = 0
    total_41631 = 0
    try:
        session = libra.ParseSession(recs, './blockstat.dict')
        for request in session:
            if not request.IsA("TTouchYandexWebRequest"):
                continue
            # referer = request.Referer
            # if referer.find("query_source=voice") == -1:
            #     continue
            browser = request.GetBrowser()
            bro_name = browser[0]
            bro_version = browser[1]
            if request.HasTestID("41631"):
                exprt = "41631"
            elif request.HasTestID("41632"):
                exprt = "41632"
            elif request.HasTestID("41633"):
                exprt = "41633"
            else:
                continue
            for click in request.GetClicks():
                if click.Path == '183.2155.2154':
                    start += 1
                if click.Path == '183.2155.1309':
                    init += 1
                if click.Path == '183.2155.529':
                    stop += 1
                if click.Path == '183.2155.2368':
                    data += 1
                if click.Path == '183.2155.1030':
                    error += 1
                if click.Path == '183.2155.2040':
                    contin += 1
                if click.Path == '183.2155.1827.529':
                    paranja_stop += 1
                if click.Path == '183.2155.1827.2040':
                    paranja_contin += 1
                if click.Path == '183.2155.177':
                    help += 1
            if exprt is not None:
                yield {
                    "exprt" : exprt,
                    "browser" : bro_name,
                    "browser_version" : bro_version,
                    "start" : start,
                    "init" : init,
                    "stop" : stop,
                    "data" : data,
                    "error" : error,
                    "continue" : contin,
                    "paranja_stop" : paranja_stop,
                    "paranja_continue" : paranja_contin,
                    "help" : help
                    }
    except RuntimeError:
        return


def calculations(date):
    inputTable = '//user_sessions/pub/search/daily/' + date + '/clean'
    outputTable = '//home/suggest-dev/galamaj/analytics/web_voice/only_exprt_check_' + date
    yt.wrapper.config['memory_limit'] = 100 * 1024 * 1024 * 1024
    yt.wrapper.create("table", path = outputTable, recursive = True, ignore_existing = True, attributes = None)
    outputPath = yt.wrapper.TablePath(outputTable)
    print "%s   %s -> %s" % (str(datetime.now()), inputTable, outputTable)
    yt.wrapper.run_reduce(count_actions, inputTable, outputPath, reduce_by='key', files=["./blockstat.dict"])#, attributes = None)


if __name__ == '__main__':
    args = parse_args()
    dates = get_dates(args.timestamp, args.from_date, args.to_date)
    print dates
    for date in dates:
        calculations(date)









