#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import division
import sys
import argparse
import yt.wrapper as yt
import datetime
import json
import random
from collections import defaultdict


def HandleOption():
    parser = argparse.ArgumentParser()
    parser.add_argument("--server", dest = "server", help = "mapreduce server", default = "hahn.yt.yandex.net:80", required = False)
    parser.add_argument("--sbs", dest = "sbs", help = "by-query results", required = True)
    parser.add_argument("--outs", dest = "outs", help = "File for metrics", required = True)
    parser.add_argument("--dev", dest = "mode", help = "Device: desktop/touch", required = True)
    parser.add_argument("--ts", dest = "timestamp", help = "timestamp", required = True)
    parser.add_argument("--weight", dest = "weight", help = "pre-calculated table with sample", required = True)
    return parser


def main():
    args = HandleOption().parse_args()

    timestamp = args.timestamp[:10]

    mode = args.mode
    weights = defaultdict(int)
    queries = []
    table = args.weight
    for row in yt.read_table(table):
        if row["query"] and row["ui"] == args.mode:
                queries.append(row["query"])
    random.shuffle(queries)
    for q in queries[:1000]:
        weights[q] += 1

    sbs_result = {}
    with open(args.sbs) as f:
        sbs = json.load(f)
        for res in sbs["sbs_metric_results"][0]["sbs_metric_values"]["custom_results"]:
            query = res["query"]["text"].lower()
            if not query in sbs_result:
                sbs_result[query] = {"win_01" : 0, "loss_01" : 0, "win_21" : 0, "loss_21" : 0, "weight": weights[query] + 1}
                print >> sys.stderr, query, weights[query]
            for c in res["comparisons"]:
                if c["left"]["sys_id"] == "0" and c["right"]["sys_id"] == "1":
                     sbs_result[query]["win_01"] += c["left"]["wins"]
                     sbs_result[query]["loss_01"] += c["right"]["wins"]
                elif c["left"]["sys_id"] == "1" and c["right"]["sys_id"] == "0":
                     sbs_result[query]["win_01"] += c["right"]["wins"]
                     sbs_result[query]["loss_01"] += c["left"]["wins"]
                elif c["left"]["sys_id"] == "2" and c["right"]["sys_id"] == "1":
                     sbs_result[query]["win_21"] += c["left"]["wins"]
                     sbs_result[query]["loss_21"] += c["right"]["wins"]
                elif c["left"]["sys_id"] == "1" and c["right"]["sys_id"] == "2":
                     sbs_result[query]["win_21"] += c["right"]["wins"]
                     sbs_result[query]["loss_21"] += c["left"]["wins"]

    total = {"01" : 0, "21" : 0, "01_w" : 0, "21_w" : 0}
    win = {"01" : 0, "21" : 0, "01_w" : 0, "21_w" : 0}
    for q in sbs_result:
        total["01"] += sbs_result[q]["win_01"] + sbs_result[q]["loss_01"]
        total["01_w"] += sbs_result[q]["win_01"] * sbs_result[q]["weight"] + sbs_result[q]["loss_01"] * sbs_result[q]["weight"]
        total["21"] += sbs_result[q]["win_21"] + sbs_result[q]["loss_21"]
        total["21_w"] += sbs_result[q]["win_21"] * sbs_result[q]["weight"] + sbs_result[q]["loss_21"] * sbs_result[q]["weight"]
        win["01"] += sbs_result[q]["win_01"]
        win["01_w"] += sbs_result[q]["win_01"] * sbs_result[q]["weight"]
        win["21"] += sbs_result[q]["win_21"]
        win["21_w"] += sbs_result[q]["win_21"] * sbs_result[q]["weight"]

    out = [{"fielddate" : datetime.datetime.fromtimestamp(int(timestamp)).strftime("%Y-%m-%d"), "device" : args.mode,
            "google_winrate" : win["01"]/total["01"],
            "diff_without" : win["01"]/total["01"] - win["21"]/total["21"],
            "google_winrate_weighted" : win["01_w"]/total["01_w"],
            "diff_weighted" : win["01_w"]/total["01_w"] - win["21_w"]/total["21_w"]
           }]
    with open(args.outs, "w") as outfile:
        json.dump(out, outfile, indent = 4)

if __name__ == "__main__":
    main()
