# -*- coding: utf-8 -*-

import math
import json
import subprocess
from collections import defaultdict
from datetime import datetime
from swiss import Url, Game, Tournament


def doc_key(doc, pref=''):
    return (doc['query'], doc['region_id'], doc[pref + 'image_url'], doc[pref + 'page_url'])


def query_key(item):
    return (item['query'], item['region_id'])


def parse_task(task):
    inputs = task['inputValues']
    left_key = doc_key(inputs, 'left_')
    right_key = doc_key(inputs, 'right_')
    is_chosen = task['outputValues']['label']
    won_key = left_key if is_chosen == 'left' else right_key
    lost_key = left_key if is_chosen == 'right' else right_key
    won_i, lost_i = data_idx_by_key[won_key], data_idx_by_key[lost_key]
    return (won_i, lost_i)


def aggregate_toloka_data(toloka_data):
    max_rounds = 0
    toloka_agg = {}
    for task in toloka_data:
        swiss_round = task['round']
        max_rounds = max(max_rounds, swiss_round)
        if swiss_round not in toloka_agg:
            toloka_agg[swiss_round] = defaultdict(list)
        toloka_agg[swiss_round][  query_key(task['inputValues'])  ].append(task)
    return toloka_agg, max_rounds


def aggregate_round_results(toloka_agg_results, overlap=2):
    WINS_MAP_3_TO_2 = {
        0: 0,
        1: 1,
        2: 1,
        3: 2,
        4: 3,
        5: 3,
        6: 4,
    }

    # Аггрегируем результаты матча по разметке толоки
    games_played = {}
    round_results = defaultdict(int)
    for task in toloka_agg_results:
        inputs = task['inputValues']
        left_key = doc_key(inputs, 'left_')
        right_key = doc_key(inputs, 'right_')
        is_chosen = task['outputValues']['label']
        won_key = left_key if is_chosen == 'left' else right_key
        lost_key = left_key if is_chosen == 'right' else right_key
        round_results[(won_key, lost_key)] += 1
        round_results[(lost_key, won_key)] += 0
        games_played[(min(left_key, right_key), max(left_key, right_key))] = True

    wins = defaultdict(int)
    for won, lost in round_results.keys():
        wins[won] += round_results[(won, lost)] if overlap == 2 else WINS_MAP_3_TO_2[  round_results[(won, lost)]  ]

    return games_played, wins


def play_tournament_games(group, toloka_agg=None):
    query = group[0]['query'].decode('utf-8')
    region_id = group[0]['region_id']

    # Создаём турнир
    t = Tournament()
    urls = {doc_key(item): Url(item, i) for i, item in enumerate(group)}
    t.set_urls(urls.values())

    if toloka_agg:
        games = []
        for swiss_round in range(1, 100):
            if swiss_round not in toloka_agg or (query, region_id) not in toloka_agg[swiss_round]:
                break

            games_played, wins = aggregate_round_results(toloka_agg[swiss_round][query_key(group[0])])

            # Создаём игры текущего раунда
            for i, (left_key, right_key) in enumerate(games_played.keys()):
                try:
                    game = Game(urls[left_key], urls[right_key])
                    game.add_score(wins[left_key], wins[right_key])
                    game.round = swiss_round
                    game.id = i
                    games.append(game)
                except:
                    pass
                    # errors_count += 1

            # print 'Round {}, games played {}'.format(swiss_round, len(games_played))
            t.set_games(games)

    return t


def get_boosts_metrics_structure(boost_types, targets):
    boosts = {}
    for boost_name in boost_types.keys():
        boosts[boost_name] = {}
        for target in targets.keys():
            boosts[boost_name][target] = {}
            for metric in ['pairs_correct', 'pairs_total', 'pairs_predicted', 'mse', 'spearmanr']:
                # create structure:
                # boosts[ 'rel_col_size' ] [ 'dwelltime' ] [ 'mse' ] [ '300 спартанцев' ] = 45.67
                boosts[boost_name][target][metric] = defaultdict(float)
    return boosts


def get_boost_metrics_structure(boost_types, targets):
    boosts = {}
    for boost_name in boost_types.keys():
        boosts[boost_name] = {}
        for target in targets.keys():
            boosts[boost_name][target] = defaultdict(float)
    return boosts


def get_targets_metrics_structure(targets):
    boosts = {}
    for target in targets.keys():
        boosts[target] = {}
        for metric in ['pairs_correct', 'pairs_total', 'pairs_predicted', 'mse', 'spearmanr']:
            boosts[target][metric] = defaultdict(float)
    return boosts


def upload_file_to_hurge(filename):
    # archive
    p = subprocess.Popen('tar czf index.tar.gz {}'.format(filename), shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    p.wait()

    # upload
    url = 'curl -k -X POST --data-binary @index.tar.gz https://hurge.qloud.yandex-team.ru/api/v0/publish 2>/dev/null'
    p = subprocess.Popen(url, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    p.wait()

    data = json.loads(p.stdout.read())
    return 'https://' + data['url']


def upload_html_to_hurge(html):
    filename = 'index.html'
    with open(filename, 'w') as f:
        f.write(html)

    return upload_file_to_hurge(filename)


def get_sorters_html(boosts):
    html = '  <p>Sort by:'
    for boost_name in boosts.keys():
        html += ' <span class="sorter {1}" data-sortby="{0}">{0}</span>'.format(
            boost_name,
            'sorter__active' if boost_name == 'dbd' else ''
        )
    html += '  </p>'
    return html


def get_html_table_header():
    return """
    <table class="stats_table">
    <thead>
        <tr>
            <td rowspan="2" data-sort="0">Sort by boost:</td>
            <td colspan="3">dbd score</td>
            <td colspan="3">click_boost</td>
        </tr>
        <tr>
            <td title="Количество правильно отсортированных пар">correct pairs</td>
            <td title="Количество пар всего">total pairs</td>
            <td data-sort="3" title="Доля правильно отсортированных пар. Больше — лучше. ">% correct pairs</td>
            <td title="Количество правильно отсортированных пар">correct pairs</td>
            <td title="Количество пар всего">total pairs</td>
            <td data-sort="6" title="Доля правильно отсортированных пар. Больше — лучше">% correct pairs</td>
        </tr>
    </thead>
    <tbody>
    """


def format_queries_item(query, region_id='', selected=False):
    return '<li class="queries__li"><a class="queries__item{selected}" data-query="{query}"{region} href="#{hashtag}">{query}</a></li>'.format(
        query=query,
        selected=' queries__item_selected' if selected else '',
        region=' data-region="{region_id}"'.format(region_id=region_id) if region_id else '',
        region_id=region_id,
        hashtag='{query}{reg}'.format(query=query, reg='_{region_id}'.format(region_id=region_id) if region_id else '')
    )


def get_html_queries(sampled_queries, queries_grouped, queries_divider=50, add_random=True):
    html = '<ul class="queries">\n'
    html += format_queries_item('Total stats table', selected=True) + '<br/>'
    sampled_queries_list = []
    if len(sampled_queries) > 0:
        # best queries
        sampled_queries_list = [q for cat in sampled_queries for q in cat['queries']]
        for cat in sampled_queries:
            html += '<span class="category">{cat}</span>'.format(cat=cat['category'])
            for q in cat['queries']:
                html += format_queries_item(q)

    if add_random:
        # random sample
        html += '<span class="category">{cat}</span>'.format(cat='random sample')
        for idx, q in enumerate(queries_grouped):
            if idx % queries_divider == 0:
                sampled_queries_list.append(q[0]['query_text'])
                html += format_queries_item(q[0]['query_text'], q[0]['query_region_id'])
    html += '</ul>\n\n'
    return html, sampled_queries_list

def grayscale_scale(max_gray_deviation):
    max_gray_deviation = float(max_gray_deviation)

    if max_gray_deviation > 10000:
        return 1.0
    if max_gray_deviation > 5000:
        return 0.8
    if max_gray_deviation > 4000:
        return 0.6
    if max_gray_deviation > 3000:
        return 0.4
    if max_gray_deviation > 2000:
        return 0.2
    if max_gray_deviation > 1000:
        return 0.1
    return 0.0


def grayscale_avg_scale(mark):
    if mark == "NOT_JUDGED" or mark == "-1" or mark == "":
        return 0.0

    assert (float(mark) >= 0), "Grayscale must be a positive float number: {}".format(mark)

    avg_gray_deviation = float(mark)

    if avg_gray_deviation > 1000:
        return 1.0
    if avg_gray_deviation > 600:
        return 0.8
    if avg_gray_deviation > 400:
        return 0.6
    if avg_gray_deviation > 250:
        return 0.4
    if avg_gray_deviation > 150:
        return 0.2
    if avg_gray_deviation > 100:
        return 0.1
    return 0.0


def visual_quality_scale(mark):
    marks = {
        "GOOD": 1.0,
        "OK": 0.5,
        "BAD": 0.0,
        "404": 0.0,
    }
    assert (mark in marks), "Unknown mark: {}".format(mark)
    return marks.get(mark)


def square_scale(width, height):
    square = float(width) * float(height)
    size_factor = 1.0 / (1 + math.exp(-square / 100000.0 + 3))

    return size_factor


def desktop_viewport_factor(iw, ih):
    screens = [
        (1366, 768, 0.39),
        (1920, 1080, 0.19),
        (1280, 1024, 0.15),
        (1600, 900, 0.09),
        (1440, 900, 0.06),
        (1024, 768, 0.05),
        (1680, 1050, 0.04),
        (1280, 800, 0.03),
    ]
    result = 0
    for screen in screens:
        fw = screen[0] - 530
        fh = screen[1] - 250
        result += screen[2] * (( (iw * ih) / float(fw * fh) ) * min(fw/float(iw), fh/float(ih), 1)**2)
    return result


def desktop_viewport_factor2(iw, ih):
    screens = [
        (1366, 768,  0.292),
        (1920, 1080, 0.188),
        (1280, 1024, 0.102),
        (1600, 900,  0.071),
        (1440, 900,  0.040),
        (1280, 800,  0.033),
        (1280, 800,  0.032),
        (1024, 768,  0.030),
        (1536, 864,  0.030),
        (1680, 1050, 0.028),
        (1280, 720,  0.019),
        (1280, 720,  0.019),
        (1360, 768,  0.016),
    ]
    # нормируем сумму весов в 1.0
    coef = 1.0 / sum([x[2] for x in screens])
    result = 0
    for screen in screens:
        fw = screen[0] - 45*2 - 340
        fh = screen[1] - 20 - 26 - 101
        result += screen[2] * coef * (( (iw * ih) / float(fw * fh) ) * min(fw/float(iw), fh/float(ih), 1)**2)
    return result


def touch_viewport_factor(iw, ih):
    sw = 750
    sh = 1334
    return ( (iw * ih) / float(sw * sh) ) * min(sw/float(iw), sh/float(ih), 1)**2


def image_create_time_factor(t):
    MONTH = 30
    YEAR = 365
    if not t or math.isnan(t) or t == 0:
        return 0.0
    diff = (datetime.now() - datetime.fromtimestamp(int(t))).days
    if diff <= MONTH:
        return 1.0
    if diff <= 3 * MONTH:
        return 0.9
    elif diff <= YEAR:
        return 0.75
    elif diff <= 2 * YEAR:
        return 0.5
    elif diff <= 3 * YEAR:
        return 0.25
    return 0.0

def boost_corsa(row):
    target = row['relevance']
    target += grayscale_scale(row['max_gray_deviation']) * 0.1
    target += square_scale(row['image_width'], row['image_height']) * 0.1
    target += desktop_viewport_factor(row['image_width'], row['image_height']) * 0.025
    target += visual_quality_scale(row['image_visual_quality']) * 0.05
    if row.get('page_binary_relevance', 'NO_MARK') == 'RELEVANT':
        target += 0.03

    return target

def boost_3cg(row):
    target = row['relevance']
    target += grayscale_scale(row['max_gray_deviation']) * 1/6.0
    target += square_scale(row['image_width'], row['image_height']) * 1/12.0
    target += visual_quality_scale(row['image_visual_quality']) * 1/12.0

    return target

def boost_utility1(row):
    """utility opt"""
    target = row['relevance'] * 0.24
    target += square_scale(row['image_width'], row['image_height']) * 0.04
    target += grayscale_scale(row['max_gray_deviation']) * 0.05
    target += visual_quality_scale(row['image_visual_quality']) * 0.05
    target += desktop_viewport_factor(row['image_width'], row['image_height']) * 0.06
    if row.get('page_binary_relevance', 'NO_MARK') == 'RELEVANT':
        target += 0.03
    if row.get('utility', 'NO_MARK') != 'NO_MARK':
        target += row['utility'] * 0.52
    else:
        target += 0.4 * 0.52
    return target

def boost_utility_clickopt1_without_rel04(row):
    """click opt"""
    if row['relevance'] == 0.5:
        target = 0.75
    else:
        target = row['relevance']
    target += grayscale_scale(row['max_gray_deviation']) * 0.14 * 0.4
    target += desktop_viewport_factor(row['image_width'], row['image_height']) * 0.86 * 0.4
    return target

def boost_corsax(row, rel_minus_weight=0.75, boost_weight=0.3):
    if row['relevance'] == 0.5:
        target = rel_minus_weight
    else:
        target = row['relevance']

    boost = 0
    # boost += grayscale_scale(row['max_gray_deviation']) * 0.05
    boost += row.get('grayscale_factor_v2', 0) * 0.05
    boost += visual_quality_scale(row['image_visual_quality']) * 0.1
    boost += desktop_viewport_factor(row['image_width'], row['image_height']) * 0.1
    boost += square_scale(row['image_width'], row['image_height']) * 0.2
    if row.get('kernel_mart') is not None:
        boost += row.get('kernel_mart') * 0.10
    else:
        boost += 0.5 * 0.10

    if row.get('page_binary_relevance', 'NO_MARK') == 'RELEVANT':
        boost += 0.10

    if row.get('utility', 'NO_MARK') != 'NO_MARK':
        boost += row['utility'] * 0.35
    else:
        boost += 0.4 * 0.35
    return target + boost * boost_weight

def boost_corsax_viewport2(row, rel_minus_weight=0.75, boost_weight=0.3):
    if row['relevance'] == 0.5:
        target = rel_minus_weight
    else:
        target = row['relevance']

    boost = 0
    # boost += grayscale_scale(row['max_gray_deviation']) * 0.05
    boost += row.get('grayscale_factor_v2', 0) * 0.05
    boost += visual_quality_scale(row['image_visual_quality']) * 0.1
    boost += desktop_viewport_factor2(row['image_width'], row['image_height']) * 0.1
    boost += square_scale(row['image_width'], row['image_height']) * 0.2
    if row.get('kernel_mart') is not None:
        boost += row.get('kernel_mart') * 0.10
    else:
        boost += 0.5 * 0.10

    if row.get('page_binary_relevance', 'NO_MARK') == 'RELEVANT':
        boost += 0.10

    if row.get('utility', 'NO_MARK') != 'NO_MARK':
        boost += row['utility'] * 0.35
    else:
        boost += 0.4 * 0.35
    return target + boost * boost_weight


def boost_kernel(row, rel_minus_weight=0.75, boost_weight=0.3):
    boost = 0
    if row.get('kernel_mart') is not None:
        boost += row.get('kernel_mart') * 0.10
    else:
        boost += 0.5 * 0.10
    return boost * boost_weight


def boost_viewport(row, rel_minus_weight=0.75, boost_weight=0.3):
    boost = 0
    boost += desktop_viewport_factor2(row['image_width'], row['image_height']) * 0.1

    return boost * boost_weight


def boost_greenurl(row, rel_minus_weight=0.75, boost_weight=0.3):
    boost = 0
    if row.get('page_binary_relevance', 'NO_MARK') == 'RELEVANT':
        boost += 0.10
    return boost * boost_weight


def boost_visual_quality(row, rel_minus_weight=0.75, boost_weight=0.3):
    boost = 0
    boost += visual_quality_scale(row['image_visual_quality']) * 0.1
    return boost * boost_weight


def boost_grayscale_v2(row, rel_minus_weight=0.75, boost_weight=0.3):
    boost = 0
    boost += row.get('grayscale_factor_v2', 0) * 0.05
    return boost * boost_weight


def boost_utility_only(row, rel_minus_weight=0.75, boost_weight=0.3):
    boost = 0

    if row.get('utility', 'NO_MARK') != 'NO_MARK':
        boost += row['utility'] * 0.35
    else:
        boost += 0.4 * 0.35
    return boost * boost_weight
