from __future__ import division

import numpy as np
import matplotlib.pyplot as plt

__author__ = 'cansucullu'

def convert_file(filename):
    """ Converts dots to commas for floating numbers
    :param filename: input file
    :return: modified file
    """
    return_file = filename+'-ok'

    f1 = open(filename)
    f2 = open(return_file,'w+')

    for line in f1:
        items = line.rstrip().split('\t')
        items[-1] = items[-1].replace(',','.')

        new_line = ''
        for i in items:
            new_line += i
            new_line += '\t'

        new_line = new_line.rstrip()
        new_line += '\n'

        f2.write(new_line)

    f1.close()
    f2.close()

    return return_file


def main_backup():
    filename = convert_file('rp-highest')
    a = np.loadtxt(filename, dtype={'names': ('url', 'click', 'show', 'ctr'), 'formats': ('U300', 'i4', 'i4', 'f4')}, delimiter='\t', skiprows=1)
    #print a[a['click']<1]['ctr']

    data = a[(a['show']<1000) & (a['show']>100)]
    x = data['show']
    print x
    print len(x)

    #print np.sort(a,order='show')[-100:]

    #ii = plt.hist(x,bins=100)
    #plt.show()

    y = data['ctr']
    print np.mean(y)
    print np.std(y)
    print np.var(y)

    plt.hist(y,bins=20)
    plt.show()

    print "Use this: ", np.mean(y) + 2 * np.std(y)

    #f2 = convert_file('rp-m')
    #b = np.loadtxt(filename, dtype={'names': ('url', 'click', 'show', 'ctr'), 'formats': ('U300', 'i4', 'i4', 'f4')}, delimiter='\t', skiprows=1)


def main():
    inputfilename = 'rp-high'
    filename = convert_file(inputfilename)
    a = np.loadtxt(filename, dtype={'names': ('url', 'click', 'show', 'ctr'), 'formats': ('U300', 'i4', 'i4', 'f4')}, delimiter='\t', skiprows=1)

    data = a[a['show']>100]
    x = data['show']

    y = data['ctr']
    print inputfilename
    print "mean: ", np.mean(y)
    print "std: ",np.std(y)
    print "mean + 2 sigma: ", np.mean(y) + 2 * np.std(y)

    plt.hist(y,bins=25)
    plt.title(inputfilename)
    plt.show()



if __name__ == '__main__':
    main()
