#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import division
import sys
import argparse
import json
import datetime
import time
import random
from collections import defaultdict

def HandleOption():
    parser = argparse.ArgumentParser()
    parser.add_argument("--results", dest="results", required = True)
    parser.add_argument("--weights", dest="weights", required = True)
    parser.add_argument("--ts", dest="ts", required = True)
    parser.add_argument("--metrics", dest="metrics", required = True)
    parser.add_argument("--urls", dest="urls", required = True)
    return parser


def main():
    args = HandleOption().parse_args()

    weight = {}
    with open(args.weights) as weights:
        for line in weights:
            if line.strip():
                weight[line.strip().split("\t")[0]] = int(line.strip().split("\t")[1])

    marked = {}
    honeypots = {}
    with open(args.results, "r") as results:
        data = json.load(results)
        for res in data:
            url = res["inputValues"]["url"]
            mark = 1 if res["outputValues"]["result"] == "News" else 0
            if res.get("knownSolutions"):
                is_honeypot = res["knownSolutions"][0]["outputValues"]["result"]
                if not url in honeypots:
                    honeypots[url] = {"correct" : 1 if is_honeypot =="News" else 0,
                                      "count" : 0, "marks" : 0}
                honeypots[url]["count"] += 1
                honeypots[url]["marks"] += mark
            else:
                if not url in marked:
                    if not weight.get(url):
                        print >> sys.stderr, url
                    marked[url] = {"count" : 0, "marks" : 0, "weight" : weight.get(url) if weight.get(url) else 0}
                marked[url]["count"] += 1
                marked[url]["marks"] += mark

    total_m = 0
    total_w = 0
    marks_m = 0
    marks_w = 0
    correct_hp = 0
    total_hp = 0
    out_urls = []
    for url in marked:
        total_m += 1
        total_w += marked[url]["weight"]
        marks_m += 1 if (1.0*marked[url]["marks"]/marked[url]["count"]) > 0.5 else 0
        marks_w += marked[url]["weight"] if (1.0*marked[url]["marks"]/marked[url]["count"]) > 0.5 else 0
        out_urls.append(json.dumps({"url" : url, "weight" : marked[url]["weight"],
                        "marks_number" : marked[url]["count"], "marked_ok" : marked[url]["marks"], "marked_bad" : marked[url]["count"] - marked[url]["marks"],
                         "verdict" :  "News" if (1.0*marked[url]["marks"]/marked[url]["count"]) > 0.5 else "Not news"}) + "\n")

    for hp in honeypots:
        total_hp += 1
        if (1.0*honeypots[hp]["marks"]/honeypots[hp]["count"]) > 0.5 and honeypots[hp]["correct"]:
            correct_hp += 1
        elif (1.0*honeypots[hp]["marks"]/honeypots[hp]["count"]) < 0.5 and not honeypots[hp]["correct"]:
            correct_hp += 1

    out_stat = {"fielddate" : datetime.datetime.fromtimestamp(int(args.ts[:10])).strftime("%Y-%m-%d"),
                "bad_share" : 1- marks_m/total_m,
                "bad_share_weighted" : 1 - marks_w/total_w,
                "honeypot_quality" : correct_hp/total_hp
                }
    with open(args.metrics, "w") as metrics:
        json.dump(out_stat, metrics, indent = 4)
    with open(args.urls, "w") as urls:
        urls.writelines(out_urls)

if __name__ == '__main__':
    main()
