# -*- coding: utf-8 -*-
import argparse
import pandas as pd
from collections import Counter

from datacloud.input_pipeline.input_checker.constants import delimiter_vars

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('-f', '--file', help='<Required> File to info', required=True)
    parser.add_argument(
        '-d',
        '--delimiter',
        default='tab',
        choices=delimiter_vars.keys(),
        help='delimiter of csv file',
    )

    args = parser.parse_args()
    file_path = args.file
    print(file_path)
    delimiter = delimiter_vars[args.delimiter]
    print(delimiter)

    df = pd.read_csv(file_path, sep=delimiter)

    print(df.head())
    print(df.describe())

    for column in list(df.columns):
        if column.startswith('target'):
            print(column)
            target_counter = Counter(df[column].values)
            print(target_counter)
            default_rate = float(target_counter[1]) / (target_counter[1] + target_counter[0])
            print('Default rate for {} is {:.2f}%'.format(column, default_rate * 100))
