#!/usr/bin/python2
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
import sys, re, argparse, subprocess, json
import yt.wrapper as ytw
from urllib import urlopen
from datetime import date
from traceback import format_exception
import nirvana.mr_job_context as nv

YT_OUTPUT_MESSAGES_PATH = "//home/so_fml/nirvana/tmp/messages_"
YT_TMP_MESSAGES_PATH = "//home/so_fml/nirvana/tmp/messages_tmp_"
GET_FORMULA_FILE_FROM_FML_URL = "https://fml.yandex-team.ru/download/computed/formula?id=%s&file=%s"
SAVE_MODEL_STATUS_URL = "https://so-web.n.yandex-team.ru/ml/save_model_status/?workflow_id=%s&workflow_instance_id=%s&formula_id=%s&status=%s&route=%s"
SAVE_ACCEPTANCE_METRICS_INFO_URL = "https://so-web.n.yandex-team.ru/ml/save_acceptance_metrics_info/?formula_id=%s&%s"
GET_ML_SETTINGS_URL = "https://so-web.n.yandex-team.ru/ml/get_ml_settings/?route=%s"
MX_OPS_EXECUTABLE = './mx_ops'

def get_traceback():
    exc_type, exc_value, exc_traceback = sys.exc_info()
    tb = ''
    for step in format_exception(exc_type, exc_value, exc_traceback):
        try:
            tb += "\t" + step.strip() + "\n"
        except:
            pass
    return tb

def matrixnet(mx_ops, matrixnetfile, src, dst):
    p = subprocess.Popen("%s info %s" % (mx_ops, matrixnetfile), shell = True, stdin = subprocess.PIPE, stdout = subprocess.PIPE)
    m = re.search(r"Slices:\t(.*?)\n", p.stdout.read())
    slice_info = ''
    if m and m.group(1):
        slice_info = m.group(1)
        f = open("./slicesinfo", "w")
        f.write(slice_info)
        f.close()
        slice_info = "--slices-info ./slicesinfo"
    cmd = '%s calc -s 4 --mr-server hahn.yt.yandex.net --mr-user so_fml --mr-src "%s" --mr-dst "%s" %s %s' % (mx_ops, src, dst, slice_info, matrixnetfile)
    p = subprocess.Popen(cmd, shell = True, stdin = subprocess.PIPE, stdout = subprocess.PIPE, stderr = subprocess.PIPE)
    p.wait()
    mx_ops_stdout = p.stdout.read()
    if mx_ops_stdout:
        print "MX_OPS stdout: %s\nMX_OPS output's end" % mx_ops_stdout
    mx_ops_stderr = p.stderr.read()
    if mx_ops_stderr:
        print "MX_OPS stderr: %s\nMX_OPS errors output's end" % mx_ops_stderr

def check_mn(ref_value, mn_value):
    ft, np = ["t", "f"], ["n", "p"]
    return ft[ref_value ^ mn_value] + np[mn_value]

class MessagesMapper:
    def __init__(self, threshold):
        self.threshold = threshold
    def __call__(self, record):
        rec_list = str(record["value"]).split("\t")
        yield {'queueid': rec_list[1], 'target': 1 if float(rec_list[-1]) > self.threshold else 0}

@ytw.with_context
def messagesReducer(key, records, context):
    t, record = -1, {}
    for r in records:
        if context.table_index == 1:
            t = r['target']
        else:
            record.update(r)
    record['target'] = t
    yield record

def doRequest(url, prompt):
    try:
        f = urlopen(url)
        if f.getcode() == 200:
            return f.read()
        else:
            print >>sys.stderr, '{0} response HTTP code: {1}, body: {2}'.format(prompt, f.getcode(), f.info())
    except Exception, e:
        print >>sys.stderr, '%s HTTP request failed: %s' % (prompt, str(e))
    return ""

def writeTextFile(filename, msg = "1"):
    try:
        print >>sys.stderr, "File '%s': %s" % (filename, msg)
        f = open(filename, 'wt')
        f.write(msg)
        f.close()
    except Exception, e:
        print >>sys.stderr, "Writing local file failed: %s.%s" % (str(e), get_traceback())

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('-f', '--formula',         type = str, help = "Input trained formula")
    parser.add_argument('-c', '--deepness',        type = int, help = "Deepness of cross comparison analysis: number of old models to which pools new model wil be applied")
    parser.add_argument('-d', '--dlvlog_pool',     type = str, help = "Table in YT with pool (features) obtained from the recent week of delivery-log")
    parser.add_argument('-m', '--messages',        type = str, help = "Table in YT with pool (dlvlog messages) which contains recent week of delivery-log")
    parser.add_argument('-p', '--model_params',    type = str, help = "Input JSON-file with model parameters")
    parser.add_argument('-s', '--output_messages', type = str, help = "Output table in YT with pool (dlvlog messages) for statistics as an acceptance metrics")
    parser.add_argument('-o', '--output_params',   type = str, help = "Output JSON-file with acceptance metrics parameters")
    parser.add_argument('-g', '--offline_test',    type = str, help = "Output text file for indicator: whether or not perform offline test of the new model")
    parser.add_argument('-n', '--online_test',     type = str, help = "Output text file for indicator: whether or not perform online test of the new model")
    parser.add_argument('-z', '--send_to_prod',    type = str, help = "Output text file for indicator: whether or not send new model to prod (after all testing step)")
    parser.add_argument('-x', '--mx_ops',          type = str, help = "Path to mx_ops executable")
    parser.add_argument('-t', '--prod', action = 'store_true', help = "Whether model must be sent to prod or not")
    parser.add_argument('-r', '--route',           type = str, help = "The type of mail for which the model is calculated")
    args, formula_id, model_params = parser.parse_known_args()[0], '', {}
    ROUTE = args.route if args.route else 'in'
    if not args.formula:
        print >>sys.stderr, "Input formula's file name must be specified!"
        sys.exit(1)
    if args.dlvlog_pool:
        YT_FEATURES = args.dlvlog_pool
    else:
        print >>sys.stderr, "Input JSON-file name with model params must be specified!"
        sys.exit(1)
    if args.messages:
        YT_MESSAGES = args.messages
    else:
        print >>sys.stderr, "Input table in YT with pool dlvlog messages must be specified!"
        sys.exit(1)
    if not args.model_params:
        print >>sys.stderr, "Input JSON-file name with model params must be specified!"
        sys.exit(1)
    if args.mx_ops:
        MX_OPS_EXECUTABLE = args.mx_ops
    else:
        print >>sys.stderr, "Path to mx_ops executable must be specified!"
        sys.exit(1)

    YT_OUTPUT_MESSAGES_PATH = args.output_messages if args.output_messages else YT_OUTPUT_MESSAGES_PATH + date.today().isoformat()
    try:
        f = open(args.formula)
        formula_id = json.loads(f.read())['id']
        f.close()
    except Exception, e:
        print >>sys.stderr, "Error: %s.%s" % (str(e), get_traceback())
    try:
        f = open(args.model_params)
        model_params = json.loads(f.read())
        f.close()
    except Exception, e:
        print >>sys.stderr, "Error: %s.%s" % (str(e), get_traceback())

    MATRIXNET_FILE = './matrixnet.info'
    YT_FEATURES_PATH = YT_OUTPUT_MESSAGES_PATH[:YT_OUTPUT_MESSAGES_PATH.rfind('/')]
    YT_ACCEPTANCE_METRICS = YT_FEATURES_PATH + '/features_acceptance_metrics_' + str(formula_id)
    YT_TMP_MESSAGES_PATH = YT_FEATURES_PATH + YT_TMP_MESSAGES_PATH[YT_TMP_MESSAGES_PATH.rfind('/'):] + date.today().isoformat()

    f = open(MATRIXNET_FILE, 'w')
    print >>f, doRequest(GET_FORMULA_FILE_FROM_FML_URL % (formula_id, "matrixnet.info"), "Retrieving formula's file from FML")
    f.close()
    if ytw.exists(YT_ACCEPTANCE_METRICS):
        ytw.remove(YT_ACCEPTANCE_METRICS, force = True)
    matrixnet(MX_OPS_EXECUTABLE, MATRIXNET_FILE, YT_FEATURES, YT_ACCEPTANCE_METRICS)
    if not (ytw.exists(YT_ACCEPTANCE_METRICS) and ytw.row_count(YT_ACCEPTANCE_METRICS) == ytw.row_count(YT_FEATURES)):
        print "MX_OPS error: output YT table does not exist or is empty!"
        sys.exit(1)
    matrixnet_result_list, res = [], {'tp': 0, 'tn': 0, 'fp': 0, 'fn': 0, 'total': 0}
    for record in ytw.read_table(YT_ACCEPTANCE_METRICS, format = ytw.JsonFormat(), raw = False):
        rec_list = str(record["value"]).split("\t")
        res[check_mn(int(rec_list[0]), 1 if float(rec_list[-1]) > model_params['threshold'] else 0)] += 1
        res['total'] += 1
    ctx = nv.context()
    meta = ctx.get_meta()
    output_params = {
        'precision':            res['tp'] * 1.0 / (res['tp'] + res['fp']) if res['tp'] + res['fp'] > 0 else 0,
        'recall':               res['tp'] * 1.0 / (res['tp'] + res['fn']) if res['tp'] + res['fn'] > 0 else 0,
        'accuracy':             (res['tp'] + res['tn']) * 1.0 / res['total'],
        'workflow_id':          meta.get_workflow_uid(),
        'workflow_instance_id': meta.get_workflow_instance_uid()
    }
    output_params['f_measure'] = 2.0 * output_params['precision'] * output_params['recall'] / (output_params['precision'] + output_params['recall']) if output_params['precision'] + output_params['recall'] > 0 else 0
    ytw.run_map(MessagesMapper(model_params['threshold']), YT_ACCEPTANCE_METRICS, YT_TMP_MESSAGES_PATH, job_count = 1000)
    ytw.run_sort(YT_TMP_MESSAGES_PATH, sort_by = "queueid")
    ytw.run_sort(YT_MESSAGES, sort_by = "queueid")
    ytw.run_reduce(messagesReducer, [YT_MESSAGES, YT_TMP_MESSAGES_PATH], YT_OUTPUT_MESSAGES_PATH, reduce_by = ["queueid"],
                       spec = {"job_io": {"control_attributes": {"enable_row_index": True}}})
    ytw.remove(YT_TMP_MESSAGES_PATH, force = True)
    doRequest(SAVE_ACCEPTANCE_METRICS_INFO_URL % (formula_id, '&'.join(map(lambda it: "%s=%s" % (it[0], it[1]), output_params.items()))), 'Saving acceptance metrics info')
    s, settings = doRequest(GET_ML_SETTINGS_URL, 'Retrieving of ML settings'), {}
    if s:
        try:
            settings = json.loads(s)
        except Exception, e:
            print >>sys.stderr, "Error: %s.%s" % (str(e), get_traceback())
    print >>sys.stderr, "Settings: %s" % str(settings)
    try:
        if 'offline_test' in settings and settings['offline_test']:
            print >>sys.stderr, "Start offline test"
            doRequest(SAVE_MODEL_STATUS_URL % (meta.get_workflow_uid(), meta.get_workflow_instance_uid(), formula_id, 'models_offline_test', ROUTE), 'Saving model status')
            writeTextFile(args.offline_test)
        elif 'online_test' in settings and settings['online_test']:
            print >>sys.stderr, "Sending formula %s with threshold %s to online testing." % (formula_id, model_params['threshold'])
            writeTextFile(args.online_test)
        elif 'send_to_prod' in settings and settings['send_to_prod']:
            print >>sys.stderr, "Sending formula %s with threshold %s to prod." % (formula_id, model_params['threshold'])
            writeTextFile(args.send_to_prod)
    except Exception, e:
        print >>sys.stderr, 'Exception: %s.%s' % (str(e), get_traceback())
    try:
        writeTextFile(args.output_params, json.dumps(output_params))
    except Exception, e:
        print >>sys.stderr, 'Saving result file error: %s.%s' % (str(e), get_traceback())
