#!/usr/bin/env python
# coding: utf-8

import os
import sys
reload(sys)
sys.setdefaultencoding("utf-8")

import copy
import math
import random
import datetime
import numpy as np
import sys
import codecs
from pprint import pprint
from collections import defaultdict, OrderedDict
from itertools import combinations

import operator

from swiss import Tournament
from helpers import aggregate_toloka_data, get_boosts_metrics_structure, upload_file_to_hurge
from helpers import get_sorters_html, get_html_table_header, play_tournament_games
from helpers import boost_3cg, boost_corsa, boost_corsax, boost_corsax_viewport2, boost_utility1, \
    boost_utility_clickopt1_without_rel04, \
    boost_greenurl, boost_visual_quality, boost_grayscale_v2, boost_utility_only, boost_kernel, boost_viewport

def calc_dbd_score(url):
    return (url.points_won * 5000 +  # 1) points won
            url.get_buchholz_coef() * 200 +  # 2) Buchholz coef
            int(url.metric * 50))  # 3) metric score

def calc_pscore(url):
    return url.data['pscore']

def calc_click_boost(url):
    return url.data['click_boost']

def boost_relevance(url):
    return url.data['relevance']

def boost_calc_3cg(url):
    return boost_3cg(url.data)

def boost_corsa1(url):
    return boost_corsa(url.data)

def boost_utility(url):
    return boost_utility1(url.data)

def boost_utility_clickopt_without_rel04(url):
    return boost_utility_clickopt1_without_rel04(url.data)

def boost_corsax_1(url, rel_minus_weight=0.75, boost_weight=0.3):
    return boost_corsax(url.data, rel_minus_weight, boost_weight)

def boost_corsax_1_viewport2(url, rel_minus_weight=0.75, boost_weight=0.3):
    return boost_corsax_viewport2(url.data, rel_minus_weight, boost_weight)


def boost_greenurl_wrapped(url, rel_minus_weight=0.75, boost_weight=0.3):
    return boost_greenurl(url.data, rel_minus_weight, boost_weight)


def boost_visual_quality_wrapped(url, rel_minus_weight=0.75, boost_weight=0.3):
    return boost_visual_quality(url.data, rel_minus_weight, boost_weight)


def boost_grayscale_v2_wrapped(url, rel_minus_weight=0.75, boost_weight=0.3):
    return boost_grayscale_v2(url.data, rel_minus_weight, boost_weight)


def boost_utility_only_wrapped(url, rel_minus_weight=0.75, boost_weight=0.3):
    return boost_utility_only(url.data, rel_minus_weight, boost_weight)

def boost_viewport_wrapped(url, rel_minus_weight=0.75, boost_weight=0.3):
    return boost_viewport(url.data, rel_minus_weight, boost_weight)

def boost_kernel_wrapped(url, rel_minus_weight=0.75, boost_weight=0.3):
    return boost_kernel(url.data, rel_minus_weight, boost_weight)


boost_types = OrderedDict([
    ('relevance', boost_relevance),
    ('3cg', boost_calc_3cg),
    ('corsa', boost_corsa1),
    ('corsax_1', boost_corsax_1),
    ('corsax_1_viewport2', boost_corsax_1_viewport2),
    ('click_boost', calc_click_boost),
    ('utility_opt', boost_utility),
    ('utility_clickopt', boost_utility_clickopt_without_rel04),
    ('dbd', calc_dbd_score),
    ('utlity_only', boost_utility_only_wrapped),
    ('greenurl', boost_greenurl_wrapped),
    ('grayscale_v2', boost_grayscale_v2_wrapped)
])

targets = OrderedDict([
    ('click_boost', calc_click_boost),
    ('dbd', calc_dbd_score),
])

best_boosts = OrderedDict([
    ('corsa', boost_corsa),
    ('corsax_1_viewport2', boost_corsax_1_viewport2),
])


def main(*args):
    queries_grouped, toloka_data, sampled_queries, token, any_param, html_file = args

    DEBUG = os.uname()[0] == 'Darwin'
    N_TOP_DOCUMENTS = int(any_param)
    SEED = 30
    random.seed(SEED)
    np.random.seed(SEED)
    np.set_printoptions(linewidth=240)
    np.set_printoptions(threshold=np.inf)

    html = ''
    with open('static_header.html') as f:
        html_header = f.read()

    toloka_agg, max_rounds = aggregate_toloka_data(toloka_data)

    boosts = get_boosts_metrics_structure(boost_types, targets)

    # =============== отдельные турниры по каждому запросу ===============
    idx = -1
    errors_count = 0

    documents_stat = []

    for group in queries_grouped:
        idx += 1
        query = group[0]['query'].decode('utf-8')
        region_id = group[0]['region_id']
        if not toloka_agg[1][(query, region_id)]:
            # тестируем на семпле запросов
            continue

        t = play_tournament_games(group, toloka_agg)

        dbd_urls = t.sort_urls(t.urls, calc_dbd_score)
        corsa_urls = t.sort_urls(t.urls, boost_corsax_1_viewport2)
        print >>sys.stderr, query
        print >>sys.stderr, len(dbd_urls), dbd_urls
        print >>sys.stderr, len(corsa_urls), corsa_urls

        urls_diff = []
        urls_cnt = 0

        for i, url1 in enumerate(dbd_urls):
            try:
                j = corsa_urls.index(url1)
            except ValueError:
                continue
            urls_diff.append((url1, i, j))
            urls_cnt += 1

        for url, dbd_rank, corsa_rank in urls_diff:
            documents_stat.append((url, dbd_rank, corsa_rank, query, region_id, urls_cnt))

    documents_stat.sort(key=lambda x: float(x[2] - x[1]) / (x[5] + 1))

    uniq_queries = set()

    html += '<header><h2>Corsa is better</h2></header>'
    for url, dbd_rank, corsa_rank, query, region_id, urls_cnt in documents_stat[:N_TOP_DOCUMENTS]:
        uniq_queries.add(query)
        html += Tournament.get_html_gallery_item(url,
                                                sorts=boost_types,
                                                extra_info=['Query: <a href="index.html#{q}">{q}</a>, region_id {r}'.format(q=query, r=region_id),
                                                            'Ranks: diff {}, total {}, dbd {}, corsa {}'.format(\
                                                                    corsa_rank - dbd_rank, urls_cnt, dbd_rank, corsa_rank),
                                                            'Kernel {}, viewport {}'.format(boost_kernel_wrapped(url), boost_viewport_wrapped(url))
                                                            ])

    html += '<header><h2>DbD is better</h2></header>'
    for url, dbd_rank, corsa_rank, query, region_id, urls_cnt in documents_stat[-N_TOP_DOCUMENTS:]:
        uniq_queries.add(query)
        html += Tournament.get_html_gallery_item(url,
                                                sorts=boost_types,
                                                extra_info=['Query: <a href="index.html#{q}">{q}</a>, region_id {r}'.format(q=query, r=region_id),
                                                            'Ranks: diff {}, total {}, dbd {}, corsa {}'.format(\
                                                                    corsa_rank - dbd_rank, urls_cnt, dbd_rank, corsa_rank),
                                                            'Kernel {}, viewport {}'.format(boost_kernel_wrapped(url), boost_viewport_wrapped(url))
                                                            ])

    html += '</tbody></table>'
    html += '</div>'

    html_file.write(html_header)
    html_file.write(html)

    out_queries_json = [{"category": "all", "queries": list(uniq_queries)}]

    print 'Dumping queries to json'
    with codecs.open('all_queries.json', 'w', 'utf8') as f:
        json.dump(out_queries_json, f, ensure_ascii=False, indent=4)

    print 'Errors count: {}, key not found in queries_list'.format(errors_count)
    # if not DEBUG:
    #     print 'Hurge url to html results: {}'.format(upload_file_to_hurge('index.html'))

    return []
