# -*- coding: utf-8 -*-
import argparse

import numpy as np
import pandas as pd


COLUMN_TYPES = {
    'url': np.str,
    'ip': np.str,
    'yandexuid': np.str,
}


def sample(input_filename, output_filename, target_0_n, target_1_n):
    data = pd.read_csv(input_filename, sep='\t', index_col=None, dtype=COLUMN_TYPES)
    print 'Targets:'
    print data.target.value_counts()
    target_0 = (data.ix[data.target <= 0]).sample(n=target_0_n)
    target_1 = (data.ix[data.target >= 1]).sample(n=target_1_n)
    result = pd.concat([target_0, target_1])
    result = result.reindex(np.random.permutation(result.index))
    print 'New targets:'
    print result.target.value_counts()
    result.to_csv(output_filename, sep='\t', header=True, index=False, float_format='%.3f')


def main():
    parser = argparse.ArgumentParser(description='print lines matching a pattern')
    parser.add_argument('-i', '--input', metavar='INPUT', type=str)
    parser.add_argument('-o', '--output', metavar='OUTPUT', type=str)
    parser.add_argument('-f', '--target_0', type=int)
    parser.add_argument('-t', '--target_1', type=int)
    args = parser.parse_args()
    sample(args.input, args.output, args.target_0, args.target_1)


if __name__ == '__main__':
    main()
