from __future__ import division
__author__ = 'cansucullu'

import operator
import numpy as np

MARKS = ['LOWEST', 'LOW', 'MIDDLE', 'HIGH', 'HIGHEST']
SCORES = [0., 0.25, 0.5, 0.75, 1]

MarkDict = dict(zip(MARKS,SCORES))


def main():
    filename = 'tw-avg-no-404-hosts'

    d_x = {}
    d_c = {}

    big_dict = {}

    for line in open(filename):
        host, url, mark = line.strip().split('\t')

        if d_x.has_key(host):
            d_x[host] += MarkDict[mark]
            d_c[host] += 1
            big_dict[host] = np.append(big_dict[host], MarkDict[mark])
        else:
            d_x[host] = MarkDict[mark]
            d_c[host] = 1
            big_dict[host] = np.empty(0)
            big_dict[host] = np.append(big_dict[host], MarkDict[mark])

    d = {}
    for i in d_x.keys():
        d[i] = d_x[i] / d_c[i]

    d_std = {}
    for i in d_x.keys():
        d_std[i] = d_x[i] / d_c[i]


    f = open(filename+'-scores', 'w+')
    for i in d.keys():
        f.write(i + '\t' + str(d[i]) + '\n')
    f.close()

    d_m = {}
    for i in d.keys():

        closest = min(MarkDict.values(), key=lambda x: abs(x-d[i]))
        index_ = MarkDict.values().index(closest)
        final_mark = MarkDict.keys()[index_]

        d_m[i] = final_mark

    f = open(filename+'-marks', 'w+')
    for i in d_m.keys():
        f.write(i + '\t' + str(d_m[i]) + '\n')
    f.close()

    f2 = open(filename + '-combined', 'w+')
    for line in open(filename):
        host, url, mark = line.strip().split('\t')
        host_mark = d_m[host]
        f2.write(line.rstrip() + '\t' + host_mark + '\n')
    f2.close()

    f = open(filename+'-counts', 'w+')
    for i in d_c.keys():
        f.write(i + '\t' + str(d_c[i]) + '\t' + str(d_m[i]) + '\t' + str(d[i]) + '\n')
    f.close()

    f = open(filename+'-statistics', 'w+')
    for i in big_dict.keys():
        f.write(i + '\t' + str(np.mean(big_dict[i])) + '\t' + str(np.std(big_dict[i])) + '\t' + str(str(len(big_dict[i]))) + '\t' + str(d_m[i])  + '\n')
    f.close()


if __name__ == '__main__':
    main()
