#!/usr/bin/python
# -*- coding: utf8 -*-

"""
It is essential to run this script using **system python**
as scipy module was not compiled into skynet python

Author: yuraust@, mklimushkin@, inspired by ekrokhalev@ article
Adopted for Sandbox by mvel@
"""

from __future__ import print_function

import sys
import json
import numpy as np


def _p(line):
    print(line, file=sys.stderr)


def _cut(rps):
    # zip(*A) allows to iterate over transposed matrix
    # cut first 2 shoots because they show warm-up rps, which could be "slow"
    return list(zip(*rps))[2:]


def _calc(top_runs, rps, index):
    check_rps(rps, index)
    cut_rps = _cut(rps)
    sample = [sum(sorted(r)[-top_runs:]) / top_runs for r in cut_rps]
    _p("Sample {}: {}".format(index, sample))
    avg_rps = np.median(sample)
    return sample, avg_rps


def t_test(rps1, rps2):
    from scipy import stats as st

    # count avg_top_runs rps for every shoot if possible
    # otherwise - use simple max()
    top_runs = 2 if min(len(rps1), len(rps2)) > 3 else 1
    sample1, avg_rps1 = _calc(top_runs, rps1, 1)
    sample2, avg_rps2 = _calc(top_runs, rps2, 2)
    t_stat, p_val = st.mstats.ttest_ind(sample1, sample2, equal_var=False)
    return {
        "t_stat": t_stat,
        "p_value": round(p_val, 4),
        "avg_rps_1": avg_rps1,
        "avg_rps_2": avg_rps2,
        "diff_per_cent_for_avg": (avg_rps2 - avg_rps1) * 100 / float(avg_rps1)
    }


def check_rps(rps, n):
    from scipy import stats as st

    _p("check rps {}".format(n))
    for i, rps_list_i in enumerate(rps):
        for j, rps_list_j in enumerate(rps[i + 1:]):
            _, p_val = st.mstats.ttest_ind(rps_list_i[2:], rps_list_j[2:], equal_var=False)
            median1 = np.median(rps_list_i[2:])
            median2 = np.median(rps_list_j[2:])
            _p("{}, {}".format(i, j))
            _p("  median1 ={}, median2 ={}".format(median1, median2))
            _p("  diff_per_cent ={}".format(round((median2 - median1) * 100 / float(median1), 2)))
            _p("  std1 ={}, std2 ={}".format(
                np.std(rps_list_i[2:]),
                np.std(rps_list_j[2:]),
            ))
            _p("  p_val ={}, diff_probability={}".format(
                round(p_val, 4),
                round((1 - p_val) * 100, 2),
            ))


def load(file_name):
    with open(file_name) as f:
        res = json.load(f)
    return res


def main():
    if len(sys.argv) < 3:
        print("Usage: {} <res1.json> <res2.json>".format(sys.argv[0]))
        sys.exit(1)

    res1 = load(sys.argv[1])
    res2 = load(sys.argv[2])

    stats_new = t_test(res1, res2)
    print(json.dumps(stats_new))


if __name__ == '__main__':
    main()
