#!/usr/bin/env python
# -*- coding: utf-8 -*-

import cgi
import pandas as pd
from string import Template
from scipy.stats import ttest_1samp
from collections import defaultdict, Counter

from helpers import extract_systems, get_user_actions_from_json, get_user_actions_count, calc_user_temperature, \
    get_cards_positions_clicks, is_worker_cheater, get_top_position

card_template = Template("""
<div class="view view_idx_$idx $selected $color" data-cardid="$card_id" data-idx="$idx">
    <div class="image">
        <div class="image__div" style="background-image: url($avatars_url);" /></div>
    </div>
    <div class="view__footer">
        <div class="board_title">$board_title</div>
        <div class="view__info">
            <div class="view__title">$description</div>
            <p class="view__source">$page_domain</p>
        </div>
        <div class="view__ref"><a href="https://yandex.ru/collections/card/$card_id/" target="_blank">$card_id</a></div>
    </div>
</div>
""")


def main(*args):
    toloka_data, in2, in3, token, need_workers_viewer, html_file = args

    if not len(toloka_data):
        return []

    def out(data):
        html_file.write(str(data) + '\n')

    with open('template.html') as f:
        html_header = Template(f.read())

    # сохраняем тип задания в outputValues
    for task in toloka_data:
        task['outputValues']['json_output']['type'] = task['inputValues']['json_input']['type']

    df = pd.DataFrame(toloka_data)
    df['cards_count'] = df['outputValues'].apply(lambda x: len(x['json_output']['cards']))
    df = df.loc[df['cards_count'] == 6]

    task = next(x for x in toloka_data if x['inputValues']['json_input']['type'] == 'task')
    system1_name = task['inputValues']['json_input']['systems'][0]['name']
    system2_name = task['inputValues']['json_input']['systems'][1]['name']

    out(html_header.substitute(
        title='"{}" vs "{}" :: Collections offline toloka TDI 3/6'.format(system1_name, system2_name)
    ))

    # ======================== Общие статистики пула ========================

    out('<pre>')
    out('<h1>"{}" vs "{}" :: Collections offline toloka TDI 3/6</h1>'.format(system1_name, system2_name))

    out('<h3>1. Статистика разметки пула в толоке</h3>')
    out("""
всего микротасков выполнено:     {tasks}
всего тасксьютов выполнено:      {tasksuites}
заданий в одном таск-сьюте:      {tasks_per_ts}
всего толокеров:                 {workers}
заданий выполнено на 1 толокера: {ts_per_worker:6.4f}\n\n""".format(
        tasks=df.taskId.describe()['count'],
        tasksuites=df.taskSuiteId.describe()['unique'],
        tasks_per_ts=df.taskId.describe()['count'] / df.taskSuiteId.describe()['unique'],
        workers=df.workerId.describe()['unique'],
        ts_per_worker=df.taskSuiteId.describe()['unique']/float(df.workerId.describe()['unique']),
    ))

    df['systems'] = df['outputValues'].apply(extract_systems)
    df['system1_name'] = df['systems'].apply(lambda x: x[0][0])
    df['system2_name'] = df['systems'].apply(lambda x: x[1][0])
    df['system1_wins'] = df['systems'].apply(lambda x: x[0][1])
    df['system2_wins'] = df['systems'].apply(lambda x: x[1][1])

    systems_wins = Counter()
    for idx, row in df.iterrows():
        systems_wins[row['system1_name']] += row['system1_wins']
        systems_wins[row['system2_name']] += row['system2_wins']

    out('Всего выбрано карточек: \n' + str(systems_wins.most_common()).replace('),', '),\n') + '\n')

    # ======================== Температура толокеров ========================
    workers = df \
        .sort_values(by='submitTs', ascending=False) \
        .groupby('workerId') \
        .agg({
            'outputValues': {
                'stats': get_user_actions_count,
            }
        })
    workers.columns = ['stats']
    workers = workers.reset_index()

    workers['likes'] = workers['stats'].apply(lambda x: x[0] if x[0] >= 0 else 0)
    workers['cards'] = workers['stats'].apply(lambda x: x[1] if x[1] >= 0 else 0)
    workers['subs'] = workers['stats'].apply(lambda x: x[2] if x[2] >= 0 else 0)
    workers['actions'] = workers['stats'].apply(lambda x: x[3] if x[3] >= 0 else 0)
    workers['tasks_count'] = workers['stats'].apply(lambda x: x[4] if x[4] >= 0 else 0)

    workers['temperature'] = workers['stats'].apply(calc_user_temperature)

    out('Распределение толокеров по температуре:')
    out(workers['temperature'].value_counts())

    out('\nКоличество действий у толокеров (без extremal):')
    out(workers.loc[(workers.cards <= 100) & (workers.subs <= 100)].describe())

    out('\nКоличество заданий на толокера:')
    out(workers.tasks_count.describe())

    # ======================== Распределение кликов по позициям ========================
    df['positions_clicks'] = df['outputValues'].apply(get_cards_positions_clicks)
    df['positions_clicks_list'] = df['positions_clicks'].apply(lambda x: x.values())
    df['positions_clicks_str'] = df['positions_clicks_list'].apply(lambda x: '-'.join([str(i) for i in x]))

    for p in range(1, 6+1):
        df['pos{}_clicks'.format(p)] = df['positions_clicks'].apply(lambda x: x[p])

    out('\n\nРаспределение кликов по позициям')
    for p in range(1, 6+1):
        out(str(p) + ' ' + str(df['pos{}_clicks'.format(p)].sum()))

    out(df.positions_clicks_str.describe())
    out(df.positions_clicks_str.value_counts())

    # ======================== Подсчёт читеров 123 ========================
    cheaters = df.groupby('assignmentId').agg({
        'positions_clicks_str': get_top_position,
        'workerId': 'first'
    }).reset_index()

    cheaters['top_combination'] = cheaters['positions_clicks_str'].apply(lambda x: x[0])
    cheaters['top_combination_count'] = cheaters['positions_clicks_str'].apply(lambda x: x[1])
    cheaters['is_cheater'] = cheaters.apply(is_worker_cheater, axis=1)
    cheaters_list = cheaters.groupby('workerId').agg({
        'is_cheater': 'first'
    }).reset_index()

    out('Читеры, которые часто выбирают 123, 456, 125:')
    out(cheaters_list.is_cheater.value_counts())

    if 'ya_collections_feed_exam' in df.columns:
        out('\nНавык толокеров по экзамену:')
        out(df.ya_collections_feed_exam.describe())
    if 'ya_collections_feed_main' in df.columns:
        out('\nНавык толокеров по боевым пулам:')
        out(df.ya_collections_feed_main.describe())

    # ======================== Результаты ========================

    dft = pd.merge(df, workers[['workerId', 'temperature', 'tasks_count']], on=['workerId'])
    dft = pd.merge(dft, cheaters[['workerId', 'is_cheater']], on=['workerId'])

    def aggregate_systems_by_worker(groups):
        systems = Counter()
        for idx, outputValues in groups.iteritems():
            if outputValues['json_output']['type'] == 'hp':
                continue

            for card in outputValues['json_output']['cards']:
                systems[card['system_name']] += int(card['selected'])
        return dict(systems)

    def calc_scores(df_in, best=False):
        grouped_by_worker = df_in.groupby('workerId').agg({
            'outputValues': aggregate_systems_by_worker
        })
        # out('tasks: {}, workers: {}, ({:2.0f}% of tasks)'.format(df_in.shape[0], grouped_by_worker.shape[0], 100.0*len(df_in)/len(dft)))

        grouped_by_worker['system1'] = grouped_by_worker.outputValues.apply(lambda x: x.get(system1_name, 0))
        grouped_by_worker['system2'] = grouped_by_worker.outputValues.apply(lambda x: x.get(system2_name, 0))
        grouped_by_worker['formulas_diff'] = grouped_by_worker['system1'] - grouped_by_worker['system2']

        system1_sum = sum(grouped_by_worker['system1'])
        system2_sum = sum(grouped_by_worker['system2'])
        pvalue = ttest_1samp(grouped_by_worker['formulas_diff'], 0).pvalue
        pvalue_color = '#fff'
        if pvalue < 0.05:
            pvalue_color = '#FEF3CB'
        if pvalue < 0.01:
            pvalue_color = '#FEE898'
        if pvalue < 0.001:
            pvalue_color = '#FED64B'

        # print 'system1: {}, {}, system2: {}, {}'.format(
        #     system1_name, system1_sum, system2_name, system2_sum
        # )

        # print grouped_by_worker
        if system1_sum == 0 or system2_sum == 0:
            out('<td></td><td></td><td></td><td></td><td></td><td></td><td></td><td></td><td>0</td></tr>')
        else:
            out('        <td>{:4.2f}%</td>'.format(100.0 * system1_sum / (system1_sum + system2_sum)))
            out('        <td>{:4.2f}%</td>'.format(100.0 * system2_sum / (system1_sum + system2_sum)))
            out('        <td>{}</td>'.format(system1_sum))
            out('        <td>{}</td>'.format(system2_sum))
            out('        <td>{}</td>'.format(system2_sum - system1_sum))
            out('        <td class="{}">{:4.2f}%</td>'.format('b' if best else '', 50 - 100.0 * system1_sum / (system1_sum + system2_sum)))
            out('        <td class="{}" style="background-color: {};">{:7.6f}</td>'.format('b' if best else '', pvalue_color, pvalue))
            out('        <td>{} ({:2.0f}% of tasks)</td>'.format(df_in.shape[0], 100.0*len(df_in)/len(dft)))
            out('        <td>{}</td>'.format(grouped_by_worker.shape[0]))
            out('    </tr>')

    out('\n\n<h3>2. Итоговые результаты</h3>')
    out('<table class="stats_table">')
    out('    <thead><tr>')
    out('        <th rowspan="2">Срез</th>')
    out('        <th colspan="2">Winrate</th>')
    out('        <th colspan="2">Cards</th>')
    out('        <th rowspan="2">Δ cards</th>')
    out('        <th rowspan="2">Δ %</th>')
    out('        <th rowspan="2">p-value</th>')
    out('        <th colspan="2">filtered</th>')
    out('    </tr>')
    out('    <tr>')
    out('        <th>A\n<span class="system_name">"{}"</span></th>'.format(system1_name))
    out('        <th>B\n<span class="system_name">"{}"</span></th>'.format(system2_name))
    out('        <th>A</th>')
    out('        <th>B</th>')
    out('        <th>tasks</th>')
    out('        <th>workers</th>')
    out('    </tr></thead><tbody>')

    out('    <tr><td>Всего, без extremal, без читеров</td>')
    calc_scores(dft.loc[
                    (dft.temperature != 'extremal')
                    & (dft.is_cheater == 0)
                ])

    if 'ya_collections_feed_main2' in df.columns:
        out('    <tr><td>Всего, без extremal, без читеров, skill >= 80</td>')
        calc_scores(dft.loc[
                        (dft.temperature != 'extremal')
                        & (dft.is_cheater == 0)
                        & (dft.ya_collections_feed_main2 >= 80)
                    ])

    if 'ya_collections_feed_main2' in df.columns:
        out('    <tr style="background-color: #d3ffd5;"><td>Всего, без extremal, без читеров, skill >= 85</td>')
        calc_scores(dft.loc[
                        (dft.temperature != 'extremal')
                        & (dft.is_cheater == 0)
                        & (dft.ya_collections_feed_main2 >= 85)
                    ], best=True)

    if 'ya_collections_feed_main2' in df.columns:
        out('    <tr><td>Всего, без extremal, без читеров, skill >= 90</td>')
        calc_scores(dft.loc[
                        (dft.temperature != 'extremal')
                        & (dft.is_cheater == 0)
                        & (dft.ya_collections_feed_main2 >= 90)
                    ])

    if 'ya_collections_feed_main2' in df.columns:
        out('    <tr><td>Всего, без extremal, без читеров, skill >= 95</td>')
        calc_scores(dft.loc[
                        (dft.temperature != 'extremal')
                        & (dft.is_cheater == 0)
                        & (dft.ya_collections_feed_main2 >= 95)
                    ])

    for t in ['cold', 'warm', 'hot', 'very_hot', 'boiling', 'extremal']:
        out('    <tr><td>{}</td>'.format(t))
        calc_scores(dft.loc[
                        (dft.temperature == t)
                        & (dft.is_cheater == 0)
                    ])

    out('</tbody></table>')

    if need_workers_viewer == '0':
        return []

    # ======================== Вьюер пользователей ========================

    out('\n<h3>3. Вьюер результатов каждого толокера</h3>')
    out('\nПоказаны задания каждого пользователя. Порядок — по уменьшению побед системы Б. Чтобы увидеть карточки — кликнуть по юзеру. Чтобы скрыть — [S]')

    out("""
Фильтр пользователей по активностям: <select onchange="filterUsers(this.options[this.selectedIndex].value)">
      <option value="all">Все пользователи</option>
      <option value="cold">cold (likes, cards, subs) <= (0, 0, 0)</option>
      <option value="warm">warm (likes, cards, subs) <= (8, 5, 3)</option>
      <option value="hot">hot (likes, cards, subs) <= (20, 10, 8)</option>
      <option value="very_hot">very_hot (likes, cards, subs) <= (40, 20, 20)</option>
      <option value="boiling">boiling (likes, cards, subs) <= (100, 100, 100)</option>
      <option value="extremal">extremal</option>
    </select>
    """)
    out('</pre>')

    tasks_by_user = defaultdict(list)
    for task in toloka_data:
        system1_wins = 0
        for card in task['outputValues']['json_output']['cards']:
            system1_wins += 1 if (card['selected'] and card['system_name'] == system1_name) else 0
        task['system1_wins'] = system1_wins
        tasks_by_user[task['workerId']].append(task)

    for group in sorted(tasks_by_user.values(), key=lambda x: sum([task['system1_wins'] for task in x])):
        systems = Counter()
        hp_correct = 0
        hp_total = 0
        for task in group:
            task_systems = Counter()
            for card in task['outputValues']['json_output']['cards']:
                systems[card['system_name']] += int(card['selected'])
                task_systems[card['system_name']] += int(card['selected'])
            if task['outputValues']['json_output']['type'] == 'hp':
                hp_correct += task_systems[task['inputValues']['json_input']['systems'][0]['name']]
                hp_total += 3

        user_actions = get_user_actions_from_json(group[0]['outputValues']['json_output']['userStats'])
        temperature = calc_user_temperature(user_actions)
        out('<div class="user" data-temperature="{}">'.format(temperature))
        out('<div class="user_info" onclick="this.nextSibling.classList.toggle(\'cards_visible\');">')

        skill_exam = -1
        skill_main = -1
        is_cheater = 0
        if dft[dft.workerId == group[0]['workerId']].shape[0] > 0:
            skill_exam = dft[dft.workerId == group[0]['workerId']].iloc[0].get('ya_collections_feed_exam2', -1),
            skill_main = dft[dft.workerId == group[0]['workerId']].iloc[0].get('ya_collections_feed_main2', -1),
            is_cheater = dft[dft.workerId == group[0]['workerId']].iloc[0].get('is_cheater', 0),

        out('<pre>')

        out("""<div class="user_title" title="Нажмите, чтобы показать карточки">workerId: "{workerId}", {puid} temperature: "{temperature}". {likes} likes, {cards} cards, {subs} subs, skill_exam: {skill_exam}, skill_main: {skill_main}
cards: <span class="title_system1">"{system1_name}"</span> {system1_score} vs {system2_score} <span class="title_system2">"{system2_name}"</span>, <span class="title_honeypots">honeypots correct: {hp_correct} / {hp_total}</span>, cheater: {cheater}</div>""".format(
            workerId=group[0]['workerId'],
            puid='puid: "{}"'.format(group[0]['puid']) if 'puid' in group[0] else '',
            temperature=temperature,
            likes=user_actions[0], cards=user_actions[1], subs=user_actions[2],
            skill_exam=skill_exam, skill_main=skill_main,

            system1_name=system1_name, system1_score=systems[system1_name],
            system2_name=system2_name, system2_score=systems[system2_name],
            hp_correct=hp_correct, hp_total=hp_total,
            cheater=is_cheater,
        ))
        out('</pre>')
        out('</div><div class="user_cards">')

        for idx, task in enumerate(group):
            out('<div>')
            for task_idx, val in enumerate(sorted(task['outputValues']['json_output']['cards'], key=lambda x: x['selected'])):
                if task_idx == 3:
                    out('<div class="separator"></div>')
                card = val['card']

                if task['outputValues']['json_output']['type'] == 'task':
                    color = 'color_system1' if val['system_name'] == system1_name else 'color_system2'
                else:
                    if val['system_name'] == task['inputValues']['json_input']['systems'][0]['name']:
                        color = 'color_hp_correct'
                    else:
                        color = 'color_hp_wrong'

                out(card_template.substitute(
                    idx=idx,
                    selected=' view_selected' if val['selected'] else '',
                    color=color,
                    card_id=val['card_id'],
                    avatars_url=card['img'],
                    board_title=cgi.escape(card.get('title', '')),
                    description=cgi.escape(card.get('descr', '')),
                    page_domain=(card.get('page', '') or ''),
                ).encode('utf-8'))
            out('</div>')
        out('</div>') # .user_cards
        out('<hr/>')
        out('</div>') # .user


    return []
