# -*- coding: utf-8 -*-

import random

from decorators import render_to, jsonify, user_has_permissions
from collections import namedtuple

import yt.wrapper as yt

DATA_DIR = '//home/shinyserp/featured_snippet/regular_toloka_assessment'

INCORRECT_REASONS = [
    'bad_query',
    'bad_typography',
    'drugs',
    'lie',
    'medicine',
    'politics',
    'swear',
    'unethical',
    'useless',
]
MAJORITY_VOTE_VAL = 2


Query = namedtuple('Query', 'query, answer, tags, date')


def get_reasons(toloka):
    correct = True
    reasons = set()
    for key, count in toloka.items():
        if key not in INCORRECT_REASONS:
            continue
        if count >= MAJORITY_VOTE_VAL:
            reasons.add(key)
            correct = False
    if correct:
        return {'correct'}
    return reasons


@user_has_permissions()
@render_to(t='featured_snippet.html')
def index(request, userinfo):
    fs_type = request.GET.get('type', 'all')
    last_tables = int(request.GET.get('last_tables', '10'))
    columns_count = int(request.GET.get('columns_cnt', '6'))
    queries_count = int(request.GET.get('queries_cnt', '60'))
    whitelist = set(filter(lambda x: x, request.GET.get('whitelist', '').split(',')))
    blacklist = set(filter(lambda x: x, request.GET.get('blacklist', '').split(',')))

    yc = yt.YtClient(proxy='hahn')
    data_path = yt.ypath_join(DATA_DIR, fs_type)
    queries = []
    for table in sorted(yc.list(data_path), reverse=True)[:last_tables]:
        queries_path = yt.ypath_join(data_path, table)
        for row in yc.read_table(queries_path):
            if 'TolokaRaw' not in row:
                continue
            reasons = get_reasons(row['TolokaRaw'])
            if whitelist and whitelist & reasons != whitelist:
                continue
            if blacklist and blacklist & reasons:
                continue
            queries.append(Query(
                row['Query'],
                row['Answer'],
                sorted(reasons),
                table,
            ))
            if len(queries) >= queries_count:
                break
        if len(queries) >= queries_count:
            break

    queries.sort(key=lambda x: (x.date, random.random()))

    columns = [[] for _ in range(columns_count)]
    for index, query in enumerate(queries):
        columns[index % columns_count].append(query)

    return {
        'columns': columns,
        'width': 100.0 / columns_count,
    }
