#!/usr/bin/env python
# coding: utf-8


import sys
reload(sys)
sys.setdefaultencoding("utf-8")

import random
import numpy as np
from collections import defaultdict

from swiss import Url, Game, Tournament
from helpers import doc_key, query_key, aggregate_round_results

QUERIES_LIMIT = 5000
WIN_POINTS_THRESHHOLD = 1

def calc_pscore(url):
    return url.data['pscore']


def calc_dbd_score(url):
    return (url.points_won * 5000 +  # 1) points won
            url.get_buchholz_coef() * 200 +  # 2) Buchholz coef
            int(url.metric * 50))  # 3) metric score


def main(*args):
    queries_grouped, toloka_data, in3, token, any_param, html_file = args

    max_rounds = 0
    toloka_agg = {}
    for task in toloka_data:
        swiss_round = task['round']
        max_rounds = max(max_rounds, swiss_round)
        if swiss_round not in toloka_agg:
            toloka_agg[swiss_round] = defaultdict(list)
        toloka_agg[swiss_round][  query_key(task['inputValues'])  ].append(task)

    pairs_correct = {
        'calc_pscore': defaultdict(int),
        'calc_dbd_score': defaultdict(int)
    }
    pairs_total = {
        'calc_pscore': defaultdict(int),
        'calc_dbd_score': defaultdict(int)
    }

    idx = -1
    for group in queries_grouped[:QUERIES_LIMIT]:
        idx += 1
        query = group[0]['query'].decode('utf-8')
        region_id = group[0]['region_id']

        urls = {doc_key(item): Url(item, i) for i, item in enumerate(group)}

        #print '\n\n============================================================'
        #print '{}) Query: {}, region {}, items {} ({})'.format(idx, query.decode('utf-8'), region_id, len(group), '>16' if len(group) > 16 else '<=16')

        # Создаём турнир
        t = Tournament()
        t.set_urls(urls.values())

        games = []

        for swiss_round in range(1, max_rounds + 1):
            games_played, wins = aggregate_round_results(toloka_agg[swiss_round][  query_key(group[0])  ])

            try:
                # Создаём игры текущего раунда
                for i, (left_key, right_key) in enumerate(games_played.keys()):
                    game = Game(urls[left_key], urls[right_key])
                    game.add_score(wins[left_key], wins[right_key])
                    game.round = swiss_round
                    game.id = i
                    games.append(game)
            except:
                pass

            # print 'Round {}, games played {}'.format(swiss_round, len(games_played))
            t.set_games(games)

        for swiss_round in range(1, max_rounds + 1):
            # Аггрегируем результаты матча по разметке толоки
            round_results = defaultdict(int)
            for task in toloka_agg[swiss_round][query_key(group[0])]:
                inputs = task['inputValues']
                left_key = doc_key(inputs, 'left_')
                right_key = doc_key(inputs, 'right_')
                is_chosen = task['outputValues']['label']
                won_key = left_key if is_chosen == 'left' else right_key
                lost_key = left_key if is_chosen == 'right' else right_key
                round_results[(won_key, lost_key)] += 1

        for sort_type in [calc_dbd_score]:
            sort_name = sort_type.__name__
            sorted_by_dbd = Tournament.sort_urls(t.urls, sort_type)

            for WIN_POINTS in [4, 5, 6]:
                for won_key, lost_key in round_results.keys():
                    try:
                        if round_results[(won_key, lost_key)] >= WIN_POINTS:
                            won = t.urls[t.urls.index(won_key)]
                            lost = t.urls[t.urls.index(lost_key)]

                            pairs_correct[sort_name][50 + WIN_POINTS] += 1
                            if won.points_won - lost.points_won < WIN_POINTS_THRESHHOLD:
                                continue
                            pairs_correct[sort_name][60 + WIN_POINTS] += 1

                            pairs_total[sort_name][WIN_POINTS] += 1
                            if sorted_by_dbd.index(won) < sorted_by_dbd.index(lost):
                                pairs_correct[sort_name][WIN_POINTS] += 1
                    except:
                        print 'Exception {} {}, won_key {}, lost_key {}'.format(query, region_id, won_key, lost_key)

        # if pairs_count_total > 0:
        #     print '  Correct {} / {} = {:0.2f}%'.format(pairs_count_correct, pairs_count_total, float(pairs_count_correct) / pairs_count_total * 100)

    for sort_type in [calc_dbd_score]:
        sort_name = sort_type.__name__
        for WIN_POINTS in [4, 5, 6]:
            print 'Score {} predicts {:0.2f}% toloka games with {}-{} points'.format(sort_name, float(pairs_correct[sort_name][WIN_POINTS]) / pairs_total[sort_name][WIN_POINTS] * 100, WIN_POINTS, 6-WIN_POINTS)
            print 'strong pairs: {} / total pairs: {} = {:0.2f}% for WIN_POINTS_THRESHHOLD={} \n'.format(pairs_correct[sort_name][60 + WIN_POINTS], pairs_correct[sort_name][50 + WIN_POINTS], 100.0 * pairs_correct[sort_name][60 + WIN_POINTS] / pairs_correct[sort_name][50 + WIN_POINTS], WIN_POINTS_THRESHHOLD)

    # print 'For all queries, correct pairs {} / {} = {:0.2f}%'.format(pairs_correct, pairs_total, float(pairs_correct) / pairs_total * 100)

    return []
