#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import division
import argparse
import json
import sys
import datetime
import time
from bs4 import BeautifulSoup


def HandleOptions():
    parser = argparse.ArgumentParser()
    parser.add_argument("--ya", dest="ya", help="File with Yandex results")
    parser.add_argument("--g", dest="g", help="File with Google results")
    parser.add_argument("--freq", dest="freq_file", help="file with queries frequencies")
    parser.add_argument("--outm", dest="outm", help="Output metrics")
    parser.add_argument("--outq", dest="outq", help="Output queries")
    parser.add_argument("--ts", dest="ts", help="timestamp")
    parser.add_argument("--name", dest="name", help="name")
    parser.add_argument("--dev", dest="device", help="Mode: touch|desktop", default="touch")
    return parser


def get_query_freq(freq_file):
    query_freq = {}
    if freq_file is not None:
        with open(freq_file, "r") as f:
            for line in f:
                query, val = line.split("\t")
                query_freq[query] = int(val)
    return query_freq


def main():
    parser = HandleOptions()
    args = parser.parse_args()

    timestamp = args.ts[:10]

    G_HEADER = {"touch": "N60dNb", "desktop": "e2BEnf"}

    query_freq = get_query_freq(args.freq_file)

    querystat = {}
    ya_total_weighted = 0
    g_total_weighted = 0
    ya_google_cover_weighted = 0
    ya_total = 0
    g_total = 0
    ya_google_cover = 0
    totalq_weighted = 0
    totalq = 0
    with open(args.ya, "r") as ya, open(args.g, "r") as g:
        ya_results = json.load(ya)
        g_results = json.load(g)
        for ya_serp, g_serp in zip(ya_results, g_results):
            query = ya_serp["serpRequestExplained"]["per-query-parameters"]["query-text"].encode("utf-8")
            if query not in querystat:
                querystat[query] = {"ya": 0, "g": 0}
                totalq_weighted += query_freq.get(query, 1)
                totalq += 1

            for result in ya_serp["serp-page"]["parser-results"]["components"]:
                if result["type"] == "WIZARD" and result["wizard-type"] == "WIZARD_NEWS":
                    querystat[query]["ya"] = 1
                    ya_total_weighted += query_freq.get(query, 1)
                    ya_total += 1

            soup = BeautifulSoup(g_serp["serp-page"]["raw-content"], 'html.parser')
            headers = soup.find_all(attrs={"class": G_HEADER[args.device]})
            for h in headers:
                if h.get_text() == u"Главные новости":
                    querystat[query]["g"] = 1
                    g_total_weighted += query_freq.get(query, 1)
                    g_total += 1
                    if querystat[query]["ya"]:
                        ya_google_cover_weighted += query_freq.get(query, 1)
                        ya_google_cover += 1
                    break

    with open(args.outm, "w") as outm:
        outm.writelines(
            [args.name + "_ya_total_recall_weighted_" + args.device + "\t" + str(ya_total_weighted / totalq_weighted)
             + "\t" + timestamp + "\n",
             args.name + "_g_total_recall_weighted_" + args.device + "\t" + str(g_total_weighted / totalq_weighted)
             + "\t" + timestamp + "\n",
             args.name + "_ya_google_recall_weighted_" + args.device + "\t" + str(
                 ya_google_cover_weighted / g_total_weighted) + "\t" + timestamp + "\n",
             args.name + "_ya_total_recall_" + args.device + "\t" + str(ya_total / totalq) + "\t" + timestamp + "\n",
             args.name + "_g_total_recall_" + args.device + "\t" + str(g_total / totalq) + "\t" + timestamp + "\n",
             args.name + "_ya_google_recall_" + args.device + "\t" + str(
                 ya_google_cover / g_total) + "\t" + timestamp + "\n",
             ])

    with open(args.outq, "w") as outq:
        outq.write("query\tYa\tG\n")
        for q in querystat:
            outq.write("\t".join([q, str(querystat[q]["ya"] > 0), str(querystat[q]["g"] > 0)]) + "\n")


if __name__ == "__main__":
    main()
