#-*- coding: UTF-8 -*-

import requests
import json
import codecs
import argparse
import re
import datetime
import os.path
import urllib, urllib2
import sys
import time

def parse_args():
    args = argparse.ArgumentParser()
    args.add_argument("--input", type=str, required=True)
    args.add_argument("--rel_plus_value", type=float, required=True)
    args.add_argument("--rel_minus_value", type=float, required=True)
    args.add_argument("--signals", nargs='+', default=[])
    args.add_argument("--signals_coefs", nargs='+', default=[])
    args.add_argument("--signals_porno", nargs='+', default=[])
    args.add_argument("--signals_porno_coefs", nargs='+', default=[])
    args.add_argument("--queries", type=str, required=True)
    args.add_argument("--ratings", type=str, required=True)
    return args.parse_args()

def calculate_formula(relevance,
                      rel_plus_value,
                      rel_minus_value,
                      signals,
                      coef_by_signal):
    clickadd = 0
    weight_sum = 0
    for signal in coef_by_signal:
        clickadd += float(signals[signal]) * coef_by_signal[signal]
        weight_sum += coef_by_signal[signal]

    if weight_sum > 0:
        max_result = 2.
    else:
        max_result = 1.

    normalize_coef = max(weight_sum, 1)

    result = 0
    if abs(relevance - 0.5) < 1e-5:
        result = rel_plus_value
        rel_coef = 1.
    elif abs(relevance - 0.05) < 1e-5:
        result = rel_minus_value
        rel_coef = rel_plus_value - rel_minus_value
    else:
        rel_coef = 0
    result += rel_coef * clickadd / normalize_coef
    return result / max_result

def main():
    args = parse_args()

    coef_by_signal = {}
    for signal, coef in zip(args.signals, args.signals_coefs):
        coef_by_signal[signal] = float(coef)

    porno_coef_by_signal = {}
    for signal, coef in zip(args.signals_porno, args.signals_porno_coefs):
        porno_coef_by_signal[signal] = float(coef)

    input = json.load(codecs.open(args.input, 'r', encoding="utf-8"))

    qids = set()
    queries = []
    ratings = []

    print "Start calculating...."

    for elem in input:
        region_id = str(elem["region_id"])
        query = elem["query"]
        qid = elem["qid"]
        if qid not in qids:
            queries.append(str(qid) + '\t' + query + '\t' + region_id)
            qids.add(qid)

        relevance = elem["relevance"]
        if elem.get("is_porno", 0):
            coefs = porno_coef_by_signal
        else:
            coefs = coef_by_signal
        value = calculate_formula(relevance,
                                  args.rel_plus_value,
                                  args.rel_minus_value,
                                  elem,
                                  coefs)
        ratings.append(str(qid) + '\t' + elem["url"] + '\t' + str(value))

    print "Dumping queries data...."
    f = codecs.open(args.queries, 'w', encoding="utf-8")
    for i, elem in enumerate(queries):
        if i != len(queries) - 1:
            f.write(elem + '\n')
        else:
            f.write(elem)
    f.close()

    print "Prepare ratings.tsv data...."
    print "Sorting ratings data...."
    ratings = sorted(ratings)
    print "Dumping ratings data...."
    f = open(args.ratings, 'w')
    for i, elem in enumerate(ratings):
        if i != len(ratings) - 1:
            f.write(elem + '\n')
        else:
            f.write(elem)
    f.close()

if __name__ == '__main__':
    main()
