#!/usr/bin/python2
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
import sys, re, argparse, subprocess, json
import yt.wrapper as ytw
import nirvana.mr_job_context as nv
from urllib import urlopen
from traceback import format_exception

DOWNLOAD_FORMULA_FILE_URL = "https://fml.yandex-team.ru/download/computed/formula?%s"
SAVE_APPLIED_POOL_PARAMS_URL = "https://so-web.n.yandex-team.ru/ml/save_applied_pool_params/?%s"
OUTPUT_JSON_FILE = './output_params.json'
MX_OPS_EXECUTABLE = './mx_ops'

def get_traceback():
    exc_type, exc_value, exc_traceback = sys.exc_info()
    tb = ''
    for step in format_exception(exc_type, exc_value, exc_traceback):
        try:
            tb += "\t" + step.strip() + "\n"
        except:
            pass
    return tb

def matrixnet(mx_ops, matrixnetfile, src, dst):
    p = subprocess.Popen("%s info %s" % (mx_ops, matrixnetfile), shell = True, stdin = subprocess.PIPE, stdout = subprocess.PIPE)
    m = re.search(r"Slices:\t(.*?)\n", p.stdout.read())
    slice_info = ''
    if m and m.group(1):
        slice_info = m.group(1)
        f = open("./slicesinfo", "w")
        f.write(slice_info)
        f.close()
        slice_info = "--slices-info ./slicesinfo"
    cmd = '%s calc -s 4 --mr-server hahn.yt.yandex.net --mr-user so_fbl --mr-src "%s" --mr-dst "%s" %s %s' % (mx_ops, src, dst, slice_info, matrixnetfile)
    p = subprocess.Popen(cmd, shell = True, stdin = subprocess.PIPE, stdout = subprocess.PIPE, stderr = subprocess.PIPE)
    p.wait()
    mx_ops_stdout = p.stdout.read()
    if mx_ops_stdout:
        print "MX_OPS stdout: %s\nMX_OPS output's end" % mx_ops_stdout
    mx_ops_stderr = p.stderr.read()
    if mx_ops_stderr:
        print "MX_OPS stderr: %s\nMX_OPS errors output's end" % mx_ops_stderr

def check_mn(ref_value, mn_value):
    ft, np = ["t", "f"], ["n", "p"]
    return ft[ref_value ^ mn_value] + np[mn_value]

def doRequest(url_template, params, prompt):
    try:
        f = urlopen(url_template % '&'.join(map(lambda it: "%s=%s" % (it[0], it[1]), params.items())))
        if f.getcode() == 200:
            return f.read()
        else:
            print >>sys.stderr, '{0} response HTTP code: {1}, body: {2}'.format(prompt, f.getcode(), f.info())
    except Exception, e:
        print >>sys.stderr, '%s HTTP request failed: %s.%s' % (prompt, str(e), get_traceback())
    return ""

if __name__ == "__main__":
    ctx = nv.context()
    meta = ctx.get_meta()
    MATRIXNET_FILE, YT_FEATURES = './matrixnet.info', ''
    YT_TMP_FOLDER = "//home/so_fml/nirvana/tmp/%s_%s" % (meta.get_workflow_uid(), meta.get_workflow_instance_uid())
    if not ytw.exists(YT_TMP_FOLDER):
        ytw.create('map_node', path = YT_TMP_FOLDER)
    parser = argparse.ArgumentParser()
    parser.add_argument('-f', '--formula',       type = str, help = "Input trained formula (model)")
    parser.add_argument('-d', '--dlvlog_pool',   type = str, help = "Table in YT with pool (features) to which the model will be applied")
    parser.add_argument('-p', '--pool_formula',  type = str, help = "Formula ID for the pool which undergoing application")
    parser.add_argument('-m', '--main_formula',  type = str, help = "Main formula ID, for which comparison is performing")
    parser.add_argument('-t', '--threshold',     type = float, help = "Threshold of the model")
    parser.add_argument('-o', '--output_params', type = str, help = "Output JSON-file with parameters of pool with applied formula")
    parser.add_argument('-x', '--mx_ops',        type = str, help = "Path to mx_ops executable")
    parser.add_argument('-r', '--route',         type = str, help = "The type of mail for which the model is calculated")
    args, formula_id, threshold = parser.parse_known_args()[0], '', 0
    ROUTE = args.route if args.route else 'in'
    if not args.formula:
        print >>sys.stderr, "Input formula's file name must be specified!"
        sys.exit(1)
    if args.dlvlog_pool:
        YT_FEATURES = args.dlvlog_pool
    else:
        print >>sys.stderr, "Input JSON-file name with model params must be specified!"
        sys.exit(1)
    if not args.threshold:
        print >>sys.stderr, "Threshold of the model must be specified!"
        sys.exit(1)
    if args.mx_ops:
        MX_OPS_EXECUTABLE = args.mx_ops
    else:
        print >>sys.stderr, "Path to mx_ops executable must be specified!"
        sys.exit(1)
    if args.output_params:
        OUTPUT_JSON_FILE = args.output_params
    try:
        f = open(args.formula)
        formula_id = json.loads(f.read())['id']
        f.close()
    except Exception, e:
        print >>sys.stderr, "Error: %s.%s" % (str(e), get_traceback())
    f = open(MATRIXNET_FILE, 'w')
    print >>f, doRequest(DOWNLOAD_FORMULA_FILE_URL, {"id": formula_id, "file": 'matrixnet.info'}, "Downloading matrixnet.info file")
    f.close()
    YT_TMP_OUTPUT = "%s/applied_formula_%s_2pool_%s" % (YT_TMP_FOLDER, formula_id, args.pool_formula)
    if ytw.exists(YT_TMP_OUTPUT):
        ytw.remove(YT_TMP_OUTPUT, force = True)
    matrixnet(MX_OPS_EXECUTABLE, MATRIXNET_FILE, YT_FEATURES, YT_TMP_OUTPUT)
    if not (ytw.exists(YT_TMP_OUTPUT) and ytw.row_count(YT_TMP_OUTPUT) == ytw.row_count(YT_FEATURES)):
        info = "MX_OPS error: output YT table does not exist or is empty!"
        doRequest(SAVE_APPLIED_POOL_PARAMS_URL, {'formula1': formula_id, 'formula2': args.pool_formula, 'status': 'failed', 'info': info, 'main_formula_id': args.main_formula, 'route': ROUTE}, 'Saving parameters of pool with applied formula')
        print >>sys.stderr, info
        sys.exit(1)
    matrixnet_result_list, res = [], {'tp': 0, 'tn': 0, 'fp': 0, 'fn': 0, 'total': 0}
    for record in ytw.read_table(YT_TMP_OUTPUT, format = ytw.JsonFormat(), raw = False):
        rec_list = str(record["value"]).split("\t")
        res[check_mn(int(rec_list[0]), 1 if float(rec_list[-1]) > args.threshold else 0)] += 1
        res['total'] += 1
    output_params = {
        'formula1':  formula_id,
        'formula2':  args.pool_formula,
        'precision': res['tp'] * 1.0 / (res['tp'] + res['fp']) if res['tp'] + res['fp'] > 0 else 0,
        'recall':    res['tp'] * 1.0 / (res['tp'] + res['fn']) if res['tp'] + res['fn'] > 0 else 0,
        'accuracy':  (res['tp'] + res['tn']) * 1.0 / res['total'],
        'status':    'completed',
        'main_formula_id': args.main_formula,
        'route':     ROUTE
    }
    output_params['f_measure'] = 2.0 * output_params['precision'] * output_params['recall'] / (output_params['precision'] + output_params['recall']) if output_params['precision'] + output_params['recall'] > 0 else 0
    ytw.remove(YT_TMP_FOLDER, recursive = True, force = True)
    doRequest(SAVE_APPLIED_POOL_PARAMS_URL, output_params, 'Saving parameters of pool with applied formula')
    try:
        f = open(OUTPUT_JSON_FILE, 'wt')
        print >>f, json.dumps(output_params)
        f.close()
    except Exception, e:
        print >>sys.stderr, 'Saving result file error: %s.%s' % (str(e), get_traceback())
