#-*- coding: UTF-8 -*-
import argparse
import codecs
import json
from collections import Counter

MIN_QUERY_URL_COUNT = 5

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--input', required=True)
    parser.add_argument('--field', required=True)
    parser.add_argument('--output', required=True)
    parser.add_argument('--mode', type=str, default="mean")
    args = parser.parse_args()

    with codecs.open(args.input, "r", "utf8") as inp:
        data = json.load(inp)
    total_pop_by_query = Counter()
    count_by_query = Counter()
    max_by_query = Counter()
    max_pop = 0.
    total_pop = 0.
    total_count = 0.
    for url_info in data:
        if "frontend.vh.yandex.ru" in url_info["url"]:
            continue
        popularity = float(url_info[args.field])
        total_pop += popularity
        total_count += 1
        max_pop = max(max_pop, popularity)
        total_pop_by_query[url_info["query"]] += popularity
        count_by_query[url_info["query"]] += 1.
        max_by_query[url_info["query"]] = max(max_by_query[url_info["query"]], popularity)

    results = []
    for query in total_pop_by_query:
        to_append = {"query" : query}
        if count_by_query[url_info["query"]] >= MIN_QUERY_URL_COUNT:
            if args.mode == "mean":
                to_append[args.field] = total_pop_by_query[query] / count_by_query[query]
            elif args.mode == "max":
                to_append[args.field] = max_by_query[query]
            else:
                raise Exception("Unknown mode!")
            results.append(to_append)
    if args.mode == "mean":
        results.append({"query" : "OTHER", args.field : total_pop / total_count})
    elif args.mode == "max":
        results.append({"query" : "OTHER", args.field : max_pop})
    else:
        raise Exception("Unknown mode!")

    with codecs.open(args.output, "w", "utf8") as out:
        json.dump(results, out, ensure_ascii=False)

if __name__ == '__main__':
    main()
