import yt.wrapper
import sys
import urlparse
import re
import urllib
import urllib2
import math
import json
import time
import datetime
from datetime import date, datetime, timedelta
import argparse
import ast
# import libra

def parse_args():
    parser = argparse.ArgumentParser(add_help=True, description='Suggest metrics calc')
    parser.add_argument('--timestamp', help='date timestamp for calculation')
    parser.add_argument('--from_date', default='2017-02-28',  help='from date for calculation (format: YYYY-MM-DD)')
    parser.add_argument('--to_date', default='2017-02-28', help='to date for calculation (format: YYYY-MM-DD)')
    parser.add_argument("--bs", dest="blockstat", help="path to blockstat.dict", default='/home/galamaj/blockstat.dict', required=False)
    # parser.add_argument('--mr_server', default='hahn', help='MR server (hahn, banach, ...)')
    # parser.add_argument('--yt_pool', default='robot-suggestor-dev', help='YT pool')
    args = parser.parse_args()
    return args

def get_dates(timestamp, from_date, to_date):
    dates = []
    date_format = '%Y-%m-%d'
    if timestamp:
        date = datetime.fromtimestamp(int(timestamp[:10]))
        dates.append(date.strftime(date_format))
    if from_date and not to_date:
        to_date = datetime.strftime(datetime.now(), date_format)
    if from_date and to_date:
        current_date = datetime.strptime(from_date, date_format)
        while current_date <= datetime.strptime(to_date, date_format):
            dates.append(current_date.strftime(date_format))
            current_date += timedelta(1)
    return dates


def parse_redir(row):
    exprt = None
    start = 0
    init = 0
    stop = 0
    data_ = 0
    error = 0
    contin = 0
    paranja_stop = 0
    paranja_contin = 0
    help = 0
    exprt = None
    data = {}
    if row["value"].find("path=183.2155") != -1 and type(row["value"]) == str:
        items = row["value"].split("\t")
        for item in items:
            if item.find("=") != -1:
                k = item.split("=")[0]
                v = item.split("=")[1]
                data[k] = v
        for key in data.keys():
            try:
                if data["ids"].find("41632") != -1:
                    exprt = "41632"
                if data["ids"].find("41633") != -1:
                    exprt = "41633"
            except KeyError:
                continue
            uid = "y" + data["yandexuid"]
            if data["path"] == '183.2155.2154':
                start = 1
            if data["path"] == '183.2155.1309':
                init = 1
            if data["path"] == '183.2155.529':
                stop = 1
            if data["path"] == '183.2155.2368':
                data_ = 1
            if data["path"] == '183.2155.1030':
                error = 1
            if data["path"] == '183.2155.2040':
                contin = 1
            if data["path"] == '183.2155.1827.529':
                paranja_stop = 1
            if data["path"] == '183.2155.1827.2040':
                paranja_contin = 1
            if data["path"] == '183.2155.177':
                help = 1
        if exprt is not None:
            yield {
                "uid" : uid,
                "start" : start,
                "init" : init,
                "stop" : stop,
                "data" : data_,
                "error" : error,
                "continue" : contin,
                "paranja_stop" : paranja_stop,
                "paranja_continue" : paranja_contin,
                "help" : help,
                "exprt" : exprt
            }

def reduce_by_uid(key, recs):
    start = 0
    init = 0
    stop = 0
    data = 0
    error = 0
    contin = 0
    paranja_stop = 0
    paranja_contin = 0
    help = 0
    for rec in recs:
        uid = rec["uid"]
        exprt = rec["exprt"]
        start += rec["start"]
        init += rec["init"]
        stop += rec["stop"]
        data += rec["data"]
        error += rec["error"]
        contin += rec["continue"]
        paranja_stop += rec["paranja_stop"]
        paranja_contin += rec["paranja_continue"]
        help += rec["help"]
    yield {
        "uid" : uid,
        "start" : start,
        "init" : init,
        "stop" : stop,
        "data" : data,
        "error" : error,
        "continue" : contin,
        "paranja_stop" : paranja_stop,
        "paranja_continue" : paranja_contin,
        "help" : help,
        "exprt" : exprt
    }



def count_actions(key, recs):
    exprt = None
    d = {}
    try:
        session = libra.ParseSession(recs, './blockstat.dict')
        for request in session:
            if not request.IsA("TTouchYandexWebRequest"):
                continue
            uid = request.UID
            browser = request.GetBrowser()
            bro_name = browser[0]
            bro_version = browser[1]
            if request.HasTestID("41631"):
                exprt = "41631"
            elif request.HasTestID("41632"):
                exprt = "41632"
            elif request.HasTestID("41633"):
                exprt = "41633"
            if exprt is not None:
                continue
            d[uid] = [bro_name, bro_version]
        for k,v in d.items():
            yield {
                "uid" : k,
                "browser" : v
                }
    except RuntimeError:
        return

def join_sessions(key, recs):
    start = 0
    init = 0
    stop = 0
    data = 0
    error = 0
    contin = 0
    paranja_stop = 0
    paranja_contin = 0
    help = 0
    bro = None
    bro_version = None
    exprt = 0
    for rec in recs:
        uid = rec["uid"]
        if 'browser' in rec:
            bro = rec["browser"][0]
            bro_version = rec["browser"][-1]
        else:
            try:
                exprt = rec["exprt"]
                start = rec["start"]
                init = rec["init"]
                stop = rec["stop"]
                data = rec["data"]
                error = rec["error"]
                contin = rec["contin"]
                paranja_stop = rec["paranja_stop"]
                paranja_contin = rec["paranja_contin"]
                help = rec["help"]
            except KeyError:
                continue
    if bro is not None and exprt != 0:
        yield {
            "exprt" : exprt,
            "browser" : bro,
            "browser_version" : bro_version,
            "start" : start,
            "init" : init,
            "stop" : stop,
            "data" : data,
            "error" : error,
            "continue" : contin,
            "paranja_stop" : paranja_stop,
            "paranja_continue" : paranja_contin,
            "help" : help
        }

def group_by_browser(key, recs):
    datas = 0
    errors = 0
    continues = 0
    helps = 0
    inits = 0
    pcontinues = 0
    pstops = 0
    starts = 0
    stops = 0
    for rec in recs:
        bro = rec["browser"]
        exprt = rec["exprt"]
        datas += rec["data"]
        errors += rec["error"]
        continues += rec["continue"]
        helps += rec["help"]
        inits += rec["init"]
        pcontinues += rec["paranja_continue"]
        pstops += rec["paranja_stop"]
        starts += rec["start"]
        stops += rec["stop"]
    try:
        yield {
            "exprt" : exprt,
            "browser" : bro,
            "start" : starts,
            "init" : inits,
            "stop" : stops,
            "data" : datas,
            "error" : errors,
            "continue" : continues,
            "paranja_stop" : pstops,
            "paranja_continue" : pcontinues,
            "help" : helps,
            "conversion" : datas/float(starts)*100
        }
    except ZeroDivisionError:
        yield {
            "exprt" : exprt,
            "browser" : bro,
            "start" : starts,
            "init" : inits,
            "stop" : stops,
            "data" : datas,
            "error" : errors,
            "continue" : continues,
            "paranja_stop" : pstops,
            "paranja_continue" : pcontinues,
            "help" : helps,
            "conversion" : None
        }


def group_by_version(key, recs):
    datas = 0
    errors = 0
    continues = 0
    helps = 0
    inits = 0
    pcontinues = 0
    pstops = 0
    starts = 0
    stops = 0
    for rec in recs:
        bro = rec["browser"]
        bro_version = rec["browser_version"]
        exprt = rec["exprt"]
        datas += rec["data"]
        errors += rec["error"]
        continues += rec["continue"]
        helps += rec["help"]
        inits += rec["init"]
        pcontinues += rec["paranja_continue"]
        pstops += rec["paranja_stop"]
        starts += rec["start"]
        stops += rec["stop"]
    try:
        yield {
            "exprt" : exprt,
            "browser" : bro,
            "browser_version" : bro_version,
            "start" : starts,
            "init" : inits,
            "stop" : stops,
            "data" : datas,
            "error" : errors,
            "continue" : continues,
            "paranja_stop" : pstops,
            "paranja_continue" : pcontinues,
            "help" : helps,
            "conversion" : datas/float(starts)*100
        }
    except ZeroDivisionError:
        yield {
            "exprt" : exprt,
            "browser" : bro,
            "browser_version" : bro_version,
            "start" : starts,
            "init" : inits,
            "stop" : stops,
            "data" : datas,
            "error" : errors,
            "continue" : continues,
            "paranja_stop" : pstops,
            "paranja_continue" : pcontinues,
            "help" : helps,
            "conversion" : None
        }




def calculations(date):
    inputTable = '//home/logfeller/logs/redir-log/1d/' + date
    inputTable2 = '//user_sessions/pub/search/daily/' + date + '/clean'
    outputTable = '//home/suggest-dev/galamaj/analytics/web_voice/from_redir_' + date + "fin"
    outputTable2 = '//home/suggest-dev/galamaj/analytics/web_voice/from_sessions_' + date + "fin"
    outputTable3 = '//home/suggest-dev/galamaj/analytics/web_voice/joined_' + date
    outputTable4 = '//home/suggest-dev/galamaj/analytics/web_voice/browsers_' + date
    outputTable5 = '//home/suggest-dev/galamaj/analytics/web_voice/versions_' + date
    yt.wrapper.config['memory_limit'] = 100 * 1024 * 1024 * 1024
    yt.wrapper.create("table", path = outputTable, recursive = True, ignore_existing = True, attributes = None)
    yt.wrapper.create("table", path = outputTable2, recursive = True, ignore_existing = True, attributes = None)
    yt.wrapper.create("table", path = outputTable3, recursive = True, ignore_existing = True, attributes = None)
    outputPath = yt.wrapper.TablePath(outputTable)
    outputPath2 = yt.wrapper.TablePath(outputTable2)
    outputPath3 = yt.wrapper.TablePath(outputTable3)
    outputPath4 = yt.wrapper.TablePath(outputTable4)
    outputPath5 = yt.wrapper.TablePath(outputTable5)
    print "%s   %s -> %s" % (str(datetime.now()), inputTable, outputTable)
    yt.wrapper.run_map_reduce(parse_redir, reduce_by_uid, inputTable, outputPath, reduce_by=['exprt', 'uid'])
    #print "%s   %s -> %s" % (str(datetime.now()), inputTable2, outputTable2)
    # yt.wrapper.run_reduce(count_actions, inputTable2, outputPath2, reduce_by='key', files=["./blockstat.dict"])
    # yt.wrapper.run_sort(outputPath,sort_by=['uid'])
    # yt.wrapper.run_sort(outputPath2,sort_by=['uid'])
    # yt.wrapper.run_reduce(join_sessions, [outputPath, outputPath2], outputPath3, reduce_by='uid')
    # print "%s   %s -> %s" % (str(datetime.now()), outputTable3, outputTable4)
    # yt.wrapper.run_sort(outputPath3,sort_by=['exprt', 'browser'])
    # yt.wrapper.run_reduce(group_by_browser, outputPath3, outputPath4, reduce_by=['exprt', 'browser'])
    # yt.wrapper.run_sort(outputPath3,sort_by=['exprt', 'browser', 'browser_version'])
    # yt.wrapper.run_reduce(group_by_version, outputPath3, outputPath5, reduce_by=['exprt', 'browser', 'browser_version'])




if __name__ == '__main__':
    args = parse_args()
    dates = get_dates(args.timestamp, args.from_date, args.to_date)
    print dates
    for date in dates:
        calculations(date)









