#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import division
import sys
import argparse
import yt.wrapper as yt
import datetime
import time
from urlparse import urlparse
import json
import random
from collections import defaultdict


def HandleOption():
    parser = argparse.ArgumentParser()
    parser.add_argument("--server", dest = "server", help = "mapreduce server", default = "hahn.yt.yandex.net:80", required = False)
    parser.add_argument("--ts", dest = "timestamp", help = "timestamp", required = True)
    parser.add_argument("--mode", dest = "mode", help = "touch/desktop", required = True)
    parser.add_argument("--out", dest = "out", help = "file with output", required = True)
    parser.add_argument("--is_google", dest = "is_google", default = False, required = False)
    parser.add_argument("--weight", dest = "weight", help = "pre-calculated table with sample", required = True)
    return parser

def get_nw(rec):
    first = False
    session = json.loads(rec["value"])
    if "results" not in session:
        return
    query = session["query"][:1024]
    if not "blender_data" in rec:
        return
    bd = json.loads(rec["blender_data"])
    if not "blender_results" in bd:
        return
    for res in bd["blender_results"]:
        for elem in res["blender_elements"]:
            if not "name" in elem:
                continue
            if elem["name"] == "news":
                yield {"query" : query}
                return


def main():
    args = HandleOption().parse_args()
    yt.update_config({"proxy": {"url": args.server}})

    mode = args.mode
    fit = 0
    fit_w = 0
    notfit = 0
    notfit_w = 0
    almost = 0
    almost_w = 0
    path = "//home/search-functionality/shinyserp/news_wizard/" + datetime.datetime.fromtimestamp(int(args.timestamp[:10])).strftime("%Y%m%d") + ("_google" if args.is_google else "") + "_" + mode

    weights = defaultdict(int)
    queries = []
    table = args.weight
    for row in yt.read_table(table):
        if row["query"] and row["ui"] == args.mode:
                queries.append(row["query"])
    random.shuffle(queries)
    for q in queries[:1000]:
        weights[q] += 1

    for row in yt.read_table(path):
        q = row["query"]
        if row["label"] == "fit":
            fit += 1
            fit_w += 1 + weights[q]
        elif row["label"] == "not_fit":
            notfit += 1
            notfit_w += 1 + weights[q]
        elif row["label"] == "almost_fit":
            almost += 1
            almost_w += 1 + weights[q]
    print >> sys.stderr, fit, notfit, almost
    out = [{"fielddate" : datetime.datetime.fromtimestamp(int(args.timestamp[:10])).strftime("%Y-%m-%d"),
            "platform" : args.mode + ("_google" if args.is_google else ""),
            "defect_rate": notfit/(fit + notfit + almost),
            "defect_rate_weighted" : notfit_w/(fit_w + notfit_w + almost_w)
            }]
    with open(args.out, "w") as outfile:
        json.dump(out, outfile, indent = 4)

if __name__ == "__main__":
    main()
