#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import division
import yt.wrapper as yt
import sys
import argparse
import json
import datetime
from collections import defaultdict


def safe_divide(n, d):
    return n / d if d else 0


def HandleOption():
    parser = argparse.ArgumentParser()
    parser.add_argument("--server", dest = "server", help = "mapreduce server", default = "hahn.yt.yandex.net:80", required = False)
    parser.add_argument("--date", dest = "date", help = "date", required = True)
    parser.add_argument("--rc_desk", dest = "recall_desktop", help = "recall", required = True)
    parser.add_argument("--rc_touch", dest = "recall_touch", help = "recall", required = True)
    parser.add_argument("--outr", dest = "outr", help = "File for metrics", required = True)
    parser.add_argument("--ts", dest = "timestamp", help = "timestamp", required = True)
    return parser


def main():
    args = HandleOption().parse_args()
    yt.update_config({"proxy": {"url": args.server}})

    table = "//home/freshness/pool/news_wizard/daily/" + args.date + "/integral_bucket"
    queries = {}
    for row in yt.read_table(table):
        queries[unicode(row["query"].decode("utf-8")).lower()] = {"region" : row["region"], "src" : row["src"], "show_Ya_desktop" : False, "show_Ya_touch" : False, "show_G_desktop" : False, "show_G_touch" : False,
        "sbs_win_touch" : 0, "sbs_win_desktop" : 0, "sbs_loss_touch" : 0, "sbs_loss_desktop" : 0, "ya_position_touch" : None,  "ya_position_desktop" : None, "g_position_touch" : None,  "g_position_desktop" : None}


    files = {"desktop" : {"recall" : args.recall_desktop}, "touch" : {"recall" : args.recall_touch}}
    devlist = files.keys()
    metrics = {"desktop" : {"ya_shows" : 0, "g_shows" : 0, "sbs" : 0, "ya_pos" : 0, "g_pos" : 0}, "touch" : {"ya_shows" : 0, "g_shows" : 0, "sbs" : 0, "ya_pos" : 0, "g_pos" : 0}}
    totalq = len(queries.keys())
    timestamp = args.timestamp[:10]

    for dev in devlist:
        with open(files[dev]["recall"]) as f:
            for line in f:
                if line.strip() == "query\tYa\tG":
                    continue
                tmp = line.strip().split("\t")
                query = tmp[0].decode("utf-8").lower()
                if not query in queries:
                    queries[query] = {"region" : row["region"], "src" : "info_missing", "show_Ya_desktop" : False, "show_Ya_touch" : False, "show_G_desktop" : False, "show_G_touch" : False,
        "sbs_win_touch" : 0, "sbs_win_desktop" : 0, "sbs_loss_touch" : 0, "sbs_loss_desktop" : 0}
                ya = tmp[1] == "True"
                if ya and not queries[query]["show_Ya_" + dev]:
                    metrics[dev]["ya_shows"] += 1
                g = tmp[2] == "True"
                if g and not queries[query]["show_G_" + dev]:
                    metrics[dev]["g_shows"] += 1
                queries[query]["show_Ya_" + dev] = ya
                queries[query]["show_G_" + dev] = g
                pos = int(tmp[3]) if tmp[3] != "None" else None
                queries[query]["ya_position_" + dev] = pos
                if not pos is None:
                    metrics[dev]["ya_pos"] += pos
                g_pos = int(tmp[4]) if tmp[4] != "None" else None
                queries[query]["g_position_" + dev] = g_pos
                if not g_pos is None:
                    metrics[dev]["g_pos"] += g_pos

    out = []
    for q in queries.keys():
        out.append({"query" : q, "src" : queries[q]["src"], "region" : queries[q]["region"], "show_Ya_desktop" : queries[q]["show_Ya_desktop"], "show_Ya_touch" : queries[q]["show_Ya_touch"], "show_G_desktop" : queries[q]["show_G_desktop"], "show_G_touch" : queries[q]["show_G_touch"], "sbs_win_desktop" : queries[q]["sbs_win_desktop"], "sbs_loss_desktop" : queries[q]["sbs_loss_desktop"], "sbs_win_touch" : queries[q]["sbs_win_touch"], "sbs_loss_touch" : queries[q]["sbs_loss_touch"]})
    yt.write_table(yt.TablePath(table), out)

    out_stat = []
    for device in devlist:
        out_stat.append({"fielddate" : datetime.datetime.fromtimestamp(int(timestamp)).strftime("%Y-%m-%d"),
                        "device" : device, "bucket" : "integral", "weighted" : "no",
                        "ya_total_recall" : safe_divide(metrics[device]["ya_shows"], totalq),
                        "g_total_recall" : safe_divide(metrics[device]["g_shows"], totalq),
                        "ya_google_recall" : safe_divide(metrics[device]["ya_shows"], metrics[device]["g_shows"]),
                        "average_pos" : safe_divide(metrics[device]["ya_pos"], metrics[device]["ya_shows"]),
                        "google_average_pos" : safe_divide(metrics[device]["g_pos"], metrics[device]["g_shows"]),
                        })
    with open(args.outr, "w") as outr:
        json.dump(out_stat, outr, indent = 4)


if __name__ == "__main__":
    main()
