#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division
import argparse
import datetime
import os
from collections import defaultdict, Counter
try:
    from urllib.parse import urlparse
except ImportError:
    from urlparse import urlparse
from nile.api.v1 import clusters
from videolog_common import get_cluster, StatPusher
from searchable_players import count_searchable_players


def generate_table(counter):
    result = ["#|"]
    result.append("||Status|Count|Share||")
    total = sum(counter.values())
    for tup in counter.most_common():
        key, value = tup
        result.append(
            "||{}|{}|{:.2%}||".format(key, value, value / total)
        )
    result.append("|#")
    return "\n".join(result)


ALLOWED_STATUSES = [
    "BAD_URL",
    "CRAWLED",
    "DEAD_URL",
    "DELETED",
    "DELETED_MIDDLESEARCH",
    "INDEXED_CANOURL",
    "NO_THUMB",
    "NOT_CRAWLED",
    "NOT_CRAWLED_CANOURL",
    "NOT_DISCOVERED",
    "NOT_INDEXED",
    "SEARCHABLE_WITH_ATTRS",
    "THUMB_404",
    "THUMB_NOT_CRAWLABLE",
]


def generate_report_record(c, host, fielddate, args):
    result = {
        "fielddate": fielddate,
        "host": host
    }
    for status in ALLOWED_STATUSES:
        result["status_{}".format(status.lower())] = c[host][status]
    if host == "_total_" and args.average_hosts:
        sbr_metrics = [
            count_searchable_players(c[host]) for host in c
        ]
        if not sbr_metrics:
            result["sbr_metric"] = 0
        else:
            result["sbr_metric"] = sum(sbr_metrics) / float(len(sbr_metrics))
    else:
        result["sbr_metric"] = count_searchable_players(c[host])
    return result


def generate_report_data(c, args):
    today = str(datetime.date.today())
    result = []
    for host in c:
        result.append(generate_report_record(c, host, today, args))
    return result



def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("filename")
    parser.add_argument("--report")
    parser.add_argument("--average_hosts", action="store_true")
    parser.add_argument("--output", default="result.txt")
    args = parser.parse_args()

    c = defaultdict(Counter)

    with open(args.filename) as f:
        for line in f:
            tabs = line.split("\t")
            if len(tabs) < 3:
                continue
            host = urlparse(tabs[0]).netloc
            status = tabs[-1].strip()
            c[host][status] += 1
            c["_total_"][status] += 1

    result = []

    for host in c:
        result.append("== {}".format(host))
        result.append(generate_table(c[host]))

    result = "\n\n".join(result)

    with open(args.output, "w") as f:
        f.write(result)

    if args.report:
        report_data = generate_report_data(c, args)
        cluster = get_cluster(clusters, {})
        sp = StatPusher(cluster=cluster, report=args.report)
        sp.push(report_data)


if __name__ == "__main__":
    main()
