# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
from scipy.stats import norm
from collections import Counter


def get_iv(y, pred):
    pred_hist = pd.qcut(pred, 10)
    G = Counter(pred_hist[y == 1])
    B = Counter(pred_hist[y == 0])
    iv = 0
    for key, val in (G + B).items():
        g = 1.0 * (G[key] + 1) / (sum(G.values()) + 1)
        b = 1.0 * (B[key] + 1) / (sum(B.values()) + 1)
        iv += (g - b) * np.log(g / b)
    return iv


def get_iv_by_binom(auc):
    """
    >>> iv(0.5)
    0.0
    >>> iv(0.7)
    0.5499917954569118
    """
    return norm.ppf(auc) ** 2 * 2


if __name__ == '__main__':
    x = np.arange(0, 1, 0.00001)
    y1 = (np.random.random(x.shape)) * 1 + x * 0.5 > 0.5
    y2 = (np.random.random(x.shape)) * 1 + x * 0.1 > 0.5
    print np.mean(y1 == 1), np.mean(y2 == 1)
    print get_iv(y1, x), get_iv(y2, x)
