#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import division
import argparse
import json
import sys
import datetime
import time
from bs4 import BeautifulSoup

def HandleOptions():
    parser = argparse.ArgumentParser()
    parser.add_argument("--ya", dest="ya", help="File with Yandex results")
    parser.add_argument("--g", dest="g", help="File with Google results")
    parser.add_argument("--outm", dest="outm", help="Output metrics")
    parser.add_argument("--outq", dest="outq", help="Output queries")
    parser.add_argument("--ts", dest="ts", help="timestamp")
    parser.add_argument("--name", dest="name", help="name")
    parser.add_argument("--dev", dest="device", help="Mode: touch|desktop", default = "touch")
    return parser


def main():
    parser = HandleOptions()
    args = parser.parse_args()

    timestamp = args.ts[:10]

    G_HEADER = {"touch" : "N60dNb", "desktop" : "e2BEnf"}

    querystat = {}
    with open(args.ya, "r") as ya:
        for line in ya:
            if line.strip() == "," or line.strip() == "[" or line.strip() == "]":
                continue
            serp = json.loads(line.strip())
            query = serp["serpRequestExplained"]["per-query-parameters"]["query-text"]
            if not query in querystat:
                querystat[query] = {"ya" : 0, "ya_pos" : None, "g" : 0, "g_pos" :  None}
            pos = 0
            for result in serp["serp-page"]["parser-results"]["components"]:
                if result["type"] == "WIZARD" and result["wizard-type"] == "WIZARD_NEWS":
                    querystat[query]["ya"] = 1
                    querystat[query]["ya_pos"] = pos
                    break
                pos += 1

    with open(args.g, "r") as g:
        for line in g:
            if line.strip() == "," or line.strip() == "[" or line.strip() == "]":
                continue
            serp = json.loads(line.strip())
            query = serp["serpRequestExplained"]["per-query-parameters"]["query-text"]
            if not query in querystat:
                querystat[query] = {"ya" : 0, "ya_pos" : None, "g" : 0, "g_pos" :  None}
            if serp.get("serp-page") and serp["serp-page"].get("parser-results") and serp["serp-page"]["parser-results"].get("components"):
                pos = 0
                for result in serp["serp-page"]["parser-results"]["components"]:
                    if result["type"] == "WIZARD" and result["wizard-type"] == "WIZARD_NEWS":
                        querystat[query]["g_pos"] = pos
                        break
                    pos += 1
            soup = BeautifulSoup(serp["serp-page"]["serp-resources"]["resources"][0]["content"], 'html.parser')
            headers = soup.find_all(attrs = {"class": G_HEADER[args.device]})
            for h in headers:
                if h.get_text() == u"Главные новости":
                    querystat[query]["g"] = 1
                    break

    totalq = len(querystat.keys())
    ya_total = 0
    g_total = 0
    sumpos = 0
    sumgpos = 0
    for q in querystat:
        if querystat[q]["ya"]:
            ya_total += 1
            sumpos += querystat[q]["ya_pos"]
        if querystat[q]["g"]:
            g_total += 1
            if querystat[q]["g_pos"] is not None:
                sumgpos += querystat[q]["g_pos"]

    out = [{"fielddate" : datetime.datetime.fromtimestamp(int(timestamp)).strftime("%Y-%m-%d"),
            "device" : args.device, "bucket" : args.name, "weighted" : "no",
            "ya_total_recall" : ya_total/totalq,
            "g_total_recall" : g_total/totalq,
            "ya_google_recall" : 0. if (g_total == 0) else ya_total/g_total,
            "average_pos" : sumpos/ya_total,
            "google_average_pos" : 0. if (g_total == 0) else sumgpos/g_total,
            }]
    with open(args.outm, "w") as outfile:
        json.dump(out, outfile, indent = 4)

    with open(args.outq, "w") as outq:
        outq.write("query\tYa\tG\n")
        for q in querystat:
            outq.write("\t".join([q.encode("utf-8"), str(querystat[q]["ya"] > 0), str(querystat[q]["g"] > 0), str(querystat[q]["ya_pos"]), str(querystat[q]["g_pos"])]) + "\n")

if __name__ == "__main__":
    main()
