from jafar.tests import JafarTestCase
from jafar.utils import fast

import numpy as np
from sklearn import metrics


class CythonScoringTestCase(JafarTestCase):
    """
    Verifying Cython scoring functions.
    """

    def test_roc_auc_score_avg(self):
        y_true = np.float32([0, 1, 0, 1])
        y_score = np.float32([0.3, 0.3, 0.2, 0.4])
        indices = np.int32([
            [0, 2],
            [2, 4]
        ])
        self.assertAlmostEquals(
            fast.roc_auc_score_avg(y_true, y_score, indices),
            np.mean([
                metrics.roc_auc_score(y_true[idx[0]: idx[1]], y_score[idx[0]: idx[1]])
                for idx in indices
            ])
        )

    def test_unexpectedness_score(self):
        """
        User1: expected [1, 2], predicted [7, 1, 2] (unexp 1 / 2)
        User2: expected [3, 4], predicted [1] (unexp 1)
        User3: expected [5], predicted [5] (unexp 0)
        """
        expected = np.int32([1, 2, 3, 4, 5])
        expected_idx = np.int32([
            [0, 2],
            [2, 4],
            [4, 5]
        ])
        predicted = np.int32([7, 1, 2, 1, 5])
        predicted_idx = np.int32([
            [0, 3],
            [3, 4],
            [4, 5]
        ])
        # calculate by hand
        values = []
        for i in xrange(expected_idx.shape[0]):
            user_expected = expected[expected_idx[i][0]: expected_idx[i][1]]
            user_predicted = predicted[predicted_idx[i][0]: predicted_idx[i][1]]
            values.append(
                len(set(user_predicted) - set(user_expected)) / float(min(len(user_predicted), len(user_expected)))
            )

        self.assertAlmostEquals(
            fast.unexpectedness_score(expected, expected_idx, predicted, predicted_idx),
            np.mean(values)
        )

    def test_map_score(self):
        """
        User1: actual [1, 2], predicted [7, 1, 2] (unexp 1 / 2)
        User2: actual [3, 4], predicted [1] (unexp 1)
        User3: actual [5], predicted [5] (unexp 0)
        """
        actual = np.int32([1, 2, 3, 4, 5])
        actual_idx = np.int32([
            [0, 2],
            [2, 4],
            [4, 5]
        ])
        predicted = np.int32([7, 1, 2, 1, 5])
        predicted_idx = np.int32([
            [0, 3],
            [3, 4],
            [4, 5]
        ])

        def map_check(actual, predicted):
            """
            Couldn't get sklearn's average_precision_score to work:
            therefore carefully implementing the function from
            http://fastml.com/what-you-wanted-to-know-about-mean-average-precision/

            In short: sum i=1:x of (precision at i * change in recall at i)
            """
            # binarize predictions
            predicted_bin = [1 if item in actual else 0 for item in predicted]
            score = 0.0
            for k in xrange(len(predicted_bin)):
                if predicted_bin[k] != 0:
                    precision_at_k = np.mean(predicted_bin[:k + 1])
                    score += precision_at_k
            return score / len(predicted_bin)

        # test with test_case(1:5, [6 4 7 1 2], 2, 0.25) from FastML
        self.assertAlmostEquals(map_check([1, 2, 3, 4, 5], [6, 4]), 0.25)
        self.assertAlmostEquals(fast.map_user(
            np.int32([1, 2, 3, 4, 5]), np.int32([6, 4]), 2
        ), 0.25)

        values = []
        for i in xrange(actual_idx.shape[0]):
            user_actual = actual[actual_idx[i][0]: actual_idx[i][1]]
            user_predicted = predicted[predicted_idx[i][0]: predicted_idx[i][1]]
            values.append(map_check(user_actual, user_predicted))

        self.assertAlmostEquals(
            fast.map_score(actual, actual_idx, predicted, predicted_idx, 3),
            np.mean(values)
        )

    def test_ndcg_score(self):
        actual = np.int32([1, 2, 3, 4, 5])
        actual_idx = np.int32([
            [0, 2],
            [2, 4],
            [4, 5]
        ])
        predicted = np.int32([7, 1, 2, 1, 5])
        predicted_idx = np.int32([
            [0, 3],
            [3, 4],
            [4, 5]
        ])

        def ndcg_check(actual, predicted):
            """
            Again, implemented from https://gist.github.com/bwhite/3726239 (method 1)
            """
            predicted_bin = [1 if item in actual else 0 for item in predicted]
            dcg = lambda relevance: np.sum(relevance / np.log2(np.arange(2, len(relevance) + 2)))
            dcg_max = dcg(sorted(predicted_bin, reverse=True))
            if not dcg_max:
                return 0.
            return dcg(predicted_bin) / dcg_max

        self.assertAlmostEquals(
            ndcg_check([1, 2, 3, 4, 5], [6, 4, 0, 1]),
            0.65092092980713256
        )
        self.assertAlmostEquals(fast.ndcg_user(
            np.int32([1, 2, 3, 4, 5]), np.int32([6, 4, 0, 1]), 4
        ), 0.65092092980713256)

        values = []
        for i in xrange(actual_idx.shape[0]):
            user_actual = actual[actual_idx[i][0]: actual_idx[i][1]]
            user_predicted = predicted[predicted_idx[i][0]: predicted_idx[i][1]]
            values.append(ndcg_check(user_actual, user_predicted))

        self.assertAlmostEquals(
            fast.ndcg_score(actual, actual_idx, predicted, predicted_idx, 10),
            np.mean(values)
        )

    def test_mrr_score(self):
        actual = np.int32([1, 2, 3, 4, 5])
        actual_idx = np.int32([
            [0, 2],
            [2, 4],
            [4, 5]
        ])
        predicted = np.int32([7, 1, 2, 1, 5])
        predicted_idx = np.int32([
            [0, 3],
            [3, 4],
            [4, 5]
        ])

        def mrr_check(actual, predicted):
            """
            Again, implemented from https://gist.github.com/bwhite/3726239
            """
            predicted_bin = [1 if item in actual else 0 for item in predicted]
            r = np.asarray(predicted_bin).nonzero()[0]
            return 1. / (r[0] + 1) if r.size else 0.

        self.assertAlmostEquals(
            mrr_check([1, 2, 3, 4, 5], [6, 4, 0, 1]),
            0.5
        )
        self.assertAlmostEquals(fast.mrr_user(
            np.int32([1, 2, 3, 4, 5]), np.int32([6, 4, 0, 1]), 4
        ), 0.5)

        values = []
        for i in xrange(actual_idx.shape[0]):
            user_actual = actual[actual_idx[i][0]: actual_idx[i][1]]
            user_predicted = predicted[predicted_idx[i][0]: predicted_idx[i][1]]
            values.append(mrr_check(user_actual, user_predicted))

        self.assertAlmostEquals(
            fast.mrr_score(actual, actual_idx, predicted, predicted_idx, 4),
            np.mean(values)
        )

    def test_precision_score(self):
        """
        User1: actual [1, 2], predicted [7, 1, 2] (precision 1 / 2)
        User2: actual [3, 4], predicted [1] (precision 0)
        User3: actual [5], predicted [5] (precision 1)
        """
        actual = np.int32([1, 2, 3, 4, 5])
        actual_idx = np.int32([
            [0, 2],
            [2, 4],
            [4, 5]
        ])
        predicted = np.int32([7, 1, 2, 1, 5])
        predicted_idx = np.int32([
            [0, 3],
            [3, 4],
            [4, 5]
        ])

        def precision_check(actual, predicted):
            return float(len(set(actual) & set(predicted))) / len(predicted)

        self.assertAlmostEquals(precision_check([1, 2, 3, 4, 5], [6, 4]), 0.5)
        self.assertAlmostEquals(fast.precision_user(
            np.int32([1, 2, 3, 4, 5]), np.int32([6, 4]), 2
        ), 0.5)

        values = []
        for i in xrange(actual_idx.shape[0]):
            user_actual = actual[actual_idx[i][0]: actual_idx[i][1]]
            user_predicted = predicted[predicted_idx[i][0]: predicted_idx[i][1]]
            values.append(precision_check(user_actual, user_predicted[:2]))

        self.assertAlmostEquals(
            fast.precision_score(actual, actual_idx, predicted, predicted_idx, 2),
            np.mean(values)
        )

    def test_recall_score(self):
        """
        User1: actual [1, 2], predicted [7, 1, 2] (recall 1)
        User2: actual [3, 4], predicted [1] (recall 0)
        User3: actual [4, 5], predicted [5] (precision 1 / 2)
        """
        actual = np.int32([1, 2, 3, 4, 5])
        actual_idx = np.int32([
            [0, 2],
            [2, 4],
            [3, 5]
        ])
        predicted = np.int32([7, 1, 2, 1, 5])
        predicted_idx = np.int32([
            [0, 3],
            [3, 4],
            [4, 5]
        ])

        def recall_check(actual, predicted):
            return float(len(set(actual) & set(predicted))) / len(actual)

        self.assertAlmostEquals(recall_check([1, 2, 3, 4, 5], [6, 4]), 0.2)
        self.assertAlmostEquals(fast.recall_user(
            np.int32([1, 2, 3, 4, 5]), np.int32([6, 4]), 2
        ), 0.2)

        values = []
        for i in xrange(actual_idx.shape[0]):
            user_actual = actual[actual_idx[i][0]: actual_idx[i][1]]
            user_predicted = predicted[predicted_idx[i][0]: predicted_idx[i][1]]
            values.append(recall_check(user_actual, user_predicted[:2]))

        mean = np.mean(values)
        self.assertAlmostEquals(
            fast.recall_score(actual, actual_idx, predicted, predicted_idx, 2),
            mean
        )
