#!/usr/bin/env python
# coding: utf-8

import os
import sys
reload(sys)
sys.setdefaultencoding("utf-8")

import copy
import math
import random
import datetime
import numpy as np
from pprint import pprint
from collections import defaultdict, OrderedDict

import operator

from helpers import aggregate_toloka_data, get_targets_metrics_structure, upload_file_to_hurge
from helpers import get_sorters_html, get_html_table_header, play_tournament_games
from helpers import boost_3cg, boost_corsa, boost_corsax, boost_corsax_viewport2, boost_utility1, \
    boost_utility_clickopt1_without_rel04

QUERIES_LIMIT = 5000
WIN_POINTS_THRESHHOLD = 1
SHOW_QUERIES = 50

def calc_dbd_score(url):
    return (url.points_won * 5000 +  # 1) points won
            url.get_buchholz_coef() * 200 +  # 2) Buchholz coef
            int(url.metric * 50))  # 3) metric score

def calc_pscore(url):
    return url.data['pscore']

def calc_click_boost(url):
    return url.data['click_boost']

def boost_relevance(url):
    return url.data['relevance']

def boost_calc_3cg(url):
    return boost_3cg(url.data)

def boost_corsa1(url):
    return boost_corsa(url.data)

def boost_utility(url):
    return boost_utility1(url.data)

def boost_utility_clickopt_without_rel04(url):
    return boost_utility_clickopt1_without_rel04(url.data)

def boost_corsax_1(url, rel_minus_weight=0.75, boost_weight=0.3):
    return boost_corsax(url.data, rel_minus_weight, boost_weight)

def boost_corsax_1_viewport2(url, rel_minus_weight=0.75, boost_weight=0.3):
    return boost_corsax_viewport2(url.data, rel_minus_weight, boost_weight)


boost_types = OrderedDict([
    ('relevance', boost_relevance),
    ('3cg', boost_calc_3cg),
    ('corsa', boost_corsa1),
    ('corsax_1', boost_corsax_1),
    ('corsax_1_viewport2', boost_corsax_1_viewport2),
    ('click_boost', calc_click_boost),
    ('utility_opt', boost_utility),
    ('utility_clickopt', boost_utility_clickopt_without_rel04),
    ('dbd', calc_dbd_score)
])

targets = OrderedDict([
    ('click_boost', calc_click_boost),
    ('dbd', calc_dbd_score),
])

best_boosts = OrderedDict([
    ('corsax_1_viewport2', boost_corsax_1_viewport2),
    # ('corsa', boost_corsa),
])


def main(*args):
    queries_grouped, toloka_data, sampled_queries, token, any_param, html_file = args

    DEBUG = os.uname()[0] == 'Darwin'
    SEED = 30
    random.seed(SEED)
    np.random.seed(SEED)
    np.set_printoptions(linewidth=240)
    np.set_printoptions(threshold=np.inf)
    global QUERIES_LIMIT

    if DEBUG:
        QUERIES_LIMIT = 20

    html = ''
    with open('static_header.html') as f:
        html_header = f.read()

    toloka_agg, max_rounds = aggregate_toloka_data(toloka_data)

    boosts = get_targets_metrics_structure(targets)

    # =============== отдельные турниры по каждому запросу ===============
    idx = -1
    errors_count = 0
    for group in queries_grouped:
        idx += 1
        query = group[0]['query'].decode('utf-8')
        region_id = group[0]['region_id']
        if not toloka_agg[1][(query, region_id)]:
            # тестируем на семпле запросов
            continue

        t = play_tournament_games(group, toloka_agg)

        print '{:<4}) Query: {:<40} region {:<5} items {}'.format(idx, query.decode('utf-8'), region_id, len(group))

        for target, target_fn in targets.items():
            t.cache_sort(target_fn)

        for target, target_fn in targets.items():
            metric_pairs_sort_result = t.get_sort_pairs_count(target_fn, WIN_POINTS_THRESHHOLD)
            boosts[target]['pairs_correct'][(query, region_id)] = metric_pairs_sort_result[0]
            boosts[target]['pairs_total'][(query, region_id)] = metric_pairs_sort_result[1]
            boosts[target]['pairs_predicted'][(query, region_id)] = (100 * metric_pairs_sort_result[0] / float(metric_pairs_sort_result[1] or 1)) if len(t.urls) >= 10 else -1
            boosts[target]['pairs_correct']['sum'] += metric_pairs_sort_result[0]
            boosts[target]['pairs_total']['sum'] += metric_pairs_sort_result[1]

    # =============== выбираем топы запросов по метрикам ===============
    sampled_queries_list = set()
    html_queries = '<ul class="queries">\n'
    for target, target_fn in reversed(targets.items()):
        html_queries += '<span class="category">=== Predict {cat} ===</span>'.format(cat=target)
        print 'Top queries with pairs {}'.format(target)
        html_queries += '<span class="category">{cat}</span>'.format(cat=target)
        items = [(k, v) for k, v in boosts[target]['pairs_predicted'].items() if boosts[target]['pairs_total'][k] >= 45]
        sorted_items = sorted(items, key=lambda x: x[1], reverse=True)

        html_queries += '<span class="category category_small">top</span>'
        for (q, r), pairs in sorted_items[:SHOW_QUERIES]:
            print '   query: {} {}, pairs: {}'.format(q, r, pairs)
            html_queries += '<li class="queries__item" data-query="{q}" data-region="{r}">{q} {p:0.2f}% ({oks}/{totals})</li>'.format(
                q=q, r=r, p=pairs, oks=boosts[target]['pairs_correct'][(q, r)], totals=boosts[target]['pairs_total'][(q, r)],
            )
            sampled_queries_list.add((q, r))
        html_queries += '<span class="category category_small">bottom</span>'
        for (q, r), pairs in sorted_items[-SHOW_QUERIES:]:
            print '   query: {} {}, pairs: {}'.format(q, r, pairs)
            html_queries += '<li class="queries__item" data-query="{q}" data-region="{r}">{q} {p:0.2f}% ({oks}/{totals})</li>'.format(
                q=q, r=r, p=pairs, oks=boosts[target]['pairs_correct'][(q, r)], totals=boosts[target]['pairs_total'][(q, r)],
            )
            sampled_queries_list.add((q, r))
    html_queries += '</ul>\n\n'

    # =============== формируем html результатов ===============
    html += '<div class="results">\n'
    idx = -1
    for group in queries_grouped[:QUERIES_LIMIT]:
        idx += 1
        query = group[0]['query'].decode('utf-8')
        region_id = group[0]['region_id']
        if not toloka_agg[1][(query, region_id)]:
            # тестируем на семпле запросов
            continue

        if (query, region_id) in sampled_queries_list:
            t = play_tournament_games(group, toloka_agg)

            html += '<div class="results__item" data-query="{q}" data-region="{region}">\n'.format(
                q=query.decode('utf-8'),
                region=region_id,
            )
            html += '<h3>Query: "{q}", Region: {region}</h3>\n'.format(
                q=query.decode('utf-8'),
                region=region_id,
            )

            html += get_html_table_header()

            html += """
            <tr>
                <td>qwerty</td>

                <td>{pairs_correct_dbd:6.0f}</td>
                <td>{pairs_total_dbd:6.0f}</td>
                <td>{pairs_dbd:0.2f}</td>

                <td>{pairs_correct_click_boost:6.0f}</td>
                <td>{pairs_total_click_boost:6.0f}</td>
                <td>{pairs_click_boost:0.2f}</td>
            </tr>
            """.format(
                pairs_correct_dbd=boosts['dbd']['pairs_correct'][(query, region_id)],
                pairs_total_dbd=boosts['dbd']['pairs_total'][(query, region_id)],
                pairs_dbd=boosts['dbd']['pairs_predicted'][(query, region_id)],

                pairs_correct_click_boost=boosts['click_boost']['pairs_correct'][(query, region_id)],
                pairs_total_click_boost=boosts['click_boost']['pairs_total'][(query, region_id)],
                pairs_click_boost=boosts['click_boost']['pairs_predicted'][(query, region_id)],
            )

            html += '</tbody></table>'

            html += get_sorters_html(boost_types)

            html += '<div class="gallery">\n'
            html += t.print_gallery(sorts=boost_types)
            html += '</div>\n\n'
            html += '</div>\n\n'

        # if idx >= 2000:
        #     break

    # print 'For all queries, correct pairs {} / {} = {:0.2f}%'.format(pairs_all_queries_correct, pairs_all_queries_total, float(pairs_all_queries_correct) / pairs_all_queries_total * 100)

    html += '<div class="results__item results__item_visible" data-query="{q}">\n'.format(
        q='Total stats table',
    )

    html += '<h3>Total stats table</h3>'
    html += get_html_table_header()

    pairs_dbd = float(boosts['dbd']['pairs_correct']['sum']) / \
                (boosts['dbd']['pairs_total']['sum'] or 1) * 100
    pairs_click_boost = float(boosts['click_boost']['pairs_correct']['sum']) / \
                        (boosts['click_boost']['pairs_total']['sum'] or 1) * 100
    html += """
    <tr>
        <td>qwerty</td>

        <td>{pairs_correct_dbd:6.0f}</td>
        <td>{pairs_total_dbd:6.0f}</td>
        <td>{pairs_dbd:0.2f}</td>

        <td>{pairs_correct_click_boost:6.0f}</td>
        <td>{pairs_total_click_boost:6.0f}</td>
        <td>{pairs_click_boost:0.2f}</td>
    </tr>
    """.format(
        pairs_correct_dbd=boosts['dbd']['pairs_correct']['sum'],
        pairs_total_dbd=boosts['dbd']['pairs_total']['sum'],
        pairs_dbd=pairs_dbd,

        pairs_correct_click_boost=boosts['click_boost']['pairs_correct']['sum'],
        pairs_total_click_boost=boosts['click_boost']['pairs_total']['sum'],
        pairs_click_boost=pairs_click_boost,
    )

    html += '</tbody></table>'
    html += '</div>'

    html_file.write(html_header)
    html_file.write(html_queries)
    html_file.write(html)

    print 'Errors count: {}, key not found in queries_list'.format(errors_count)
    # if not DEBUG:
    #     print 'Hurge url to html results: {}'.format(upload_file_to_hurge('index.html'))

    return []
