# -*- coding: utf-8 -*-
import pandas as pd
from datacloud.ml_utils.hyperopt_wrapper.nirvana_cube.load_table import init_data
from datacloud.ml_utils.hyperopt_wrapper.validator import KFoldValidator
from sklearn.metrics import roc_auc_score
from sklearn.linear_model import Ridge
from tqdm import tqdm


def score_feature_selection_ridge(X, y, nfolds=3, random_state=42):
    valid = KFoldValidator(roc_auc_score, n_splits=nfolds, random_state=random_state)
    clf = Ridge(alpha=0.7, random_state=random_state, copy_X=True)
    loss, std = valid.validate(clf, X, y)

    return loss, std


if __name__ == '__main__':
    path = '//projects/scoring/otpbank/XPROD-1066/features_prod'
    target_name = 'target1'

    X, y = init_data(path, target_name, verbose=300000)

    X = pd.DataFrame(X)
    y = pd.DataFrame(y)
    train_features = set(list(X.columns))

    features_batch = []
    best_score = -1
    best_features_pack = None

    while train_features:
        best_f = None
        best_local_score = -1
        for f in tqdm(train_features):
            f_to_try = features_batch + [f]
            loss, std = score_feature_selection_ridge(X[f_to_try].values, y.values[:, 0])
            if loss > best_local_score:
                best_local_score = loss
                best_f = f
        features_batch.append(best_f)
        train_features.remove(best_f)
        print('final batch is {}'.format(features_batch))

        break
        if best_local_score > best_score:
            print('{} improved best auc up to {}'.format(features_batch, best_local_score))
            best_score = best_local_score
            best_features_pack = list(features_batch)

    # loss, std = score_feature_selection_ridge(X.values, y.values[:, 0])
    # best_score = loss
    # print('Best score on full dataset is {}'.format(best_score))
    # best_local_score = np.inf

    # while best_score < best_local_score and train_features:
    #     f_to_erase = None
    #     best_local_score = -1
    #     for f in tqdm(train_features):
    #         f_to_try = list(train_features ^ set([f]))
    #         loss, std = score_feature_selection_ridge(X[f_to_try].values, y.values[:, 0])
    #         if loss > best_local_score:
    #             best_local_score = loss
    #             f_to_erase = f
    #     train_features.remove(f_to_erase)
    #     print('{} is savoe slaboe zveno. Proschaite!'.format(f_to_erase))

    #     if best_local_score > best_score:
    #         print('{} erase improved best auc up to {}'.format(f_to_erase, best_local_score))
    #         best_score = best_local_score
