#!/usr/bin/env python2
#
import sys
import argparse
import yt.wrapper as ytw
from traceback import format_exception


ANTIFRAUD_YT_LOG = {
    "daily": "//home/logfeller/logs/mail-so-antifraud-log/1d/",
    "30min": "//home/logfeller/logs/mail-so-antifraud-log/30min/",
    "5min":  "//home/logfeller/logs/mail-so-antifraud-log/stream/5min/"
}

ANTIFRAUD_LOG_TYPE = {
    "bnpl": "acquiring_market_bnpl"
}


def getTraceback():
    exc_type, exc_value, exc_traceback = sys.exc_info()
    tb = ''
    for step in format_exception(exc_type, exc_value, exc_traceback):
        try:
            tb += "\t" + step.strip() + "\n"
        except:
            pass
    return tb


def log(s, isTB=False):
    if isTB:
        s += getTraceback()
    print >>sys.stderr, s
    sys.stderr.flush()


@ytw.with_context
class AntifraudLogMapper:
    def __init__(self, log_type):
        self.log_type = log_type

    def __call__(self, record, context):
        if record.get('channel_uri', "") and record.get('channel_uri', "") == ANTIFRAUD_LOG_TYPE[self.log_type]:
            yield record


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('-s', '--src_table_path',  type=str, help="Artifact: new path to table, which will be parsed here")
    parser.add_argument('-d', '--dst_table_path',  type=str, help="Path to destination table")
    parser.add_argument('-f', '--dst_folder_path', type=str, help="Path to destination folder")
    parser.add_argument('-t', '--log_type',        type=str, help="Destination Antifraud log's type")
    # parser.add_argument('-s', '--scale',           type=str, help="Scale of input data ('daily', '30min' or '5min')")
    args = parser.parse_known_args()[0]
    DATETIME = args.src_table_path[args.src_table_path.rfind("/")+1:].replace(":", ".")
    DATE = DATETIME.split('T')[0]
    DST_FOLDER = args.dst_folder_path[:len(args.dst_folder_path)-1] if args.dst_folder_path.endswith('/') else args.dst_folder_path
    DST_TABLE = DST_FOLDER + '/' + DATE
    ytw.config["read_parallel"]["enable"] = True

    log("[%s] Start to gather of %s log. There are %s rows in source." % (DATE, args.log_type, ytw.row_count(args.src_table_path)))
    ytw.run_map(AntifraudLogMapper(args.log_type), args.src_table_path, args.dst_table_path, job_count=5000)
    rowsCount = ytw.row_count(args.dst_table_path)
    try:
        ytw.run_merge(args.dst_table_path, '<schema = <strict=%false>[{name = channel; type = string}; {name = channel_uri; type = string}; {name = unixtime; type = uint64}; {name = comments; type = string}; {name = lua_resolution; type = any}; {name = aggregates; type = any}; {name = rbl; type = any}; {name = request; type = string}; {name = request_data; type = any}; {name = script_log; type = any}; {name = structured_aggregates; type = any}; {name = subchannel; type = string}; {name = _rest; type = any}]>' + DST_TABLE, mode = 'ordered', spec = {'combine_chunks': 'true', 'data_size_per_job': 1453936477, 'schema_inference_mode': 'from_output', 'force_transform': True})
    except Exception, e:
        log("Exception while merging of YT table '%s' to '%s'." % (args.dst_table_path, DST_TABLE))
    if not ytw.exists(DST_TABLE) or ytw.row_count(DST_TABLE) < 1:
        try:
            ytw.copy(args.dst_table_path, DST_TABLE)
        except Exception, e:
            log("Exception while YT copy table '%s' to '%s'." % (args.dst_table_path, DST_TABLE))
    log("[%s] Gathering of log DONE: %s rows" % (DATE, rowsCount))
