# -*- coding: utf-8 -*-

import datetime
import random

from decorators import render_to, jsonify, user_has_permissions
from collections import namedtuple

import yt.wrapper as yt

DATA_DIR = '//home/shinyserp/featured_snippet/regular_toloka_assessment/uzhas'

UZHAS_REASONS = [
    'bad_typography',
    'drugs',
    'lie',
    'provocative_query',
    'swear',
    'unethical',
    'outdated',
    'politics',
    'all',
]
MAJORITY_VOTE_VAL = 2


def get_uzhas_verdicts(row):
    verdicts = set()
    if not row.get('TolokaRaw'):
        return verdicts
    if row['actuality'] == 'outdated':
        row['TolokaRaw']['outdated'] = 2 * MAJORITY_VOTE_VAL
    for key, count in row['TolokaRaw'].items():
        if key not in UZHAS_REASONS:
            continue
        if count >= MAJORITY_VOTE_VAL:
            verdicts.add(key)
    return verdicts


Query = namedtuple('Query', 'query, answer, tags, date, source')


@user_has_permissions()
@render_to(t='uzhas.html')
def index(request, userinfo):
    platform = request.GET.get('platform', 'search')
    last_tables = int(request.GET.get('last_days', '2'))
    columns_count = int(request.GET.get('column_cnt', '6'))
    queries_count = int(request.GET.get('query_cnt', '60'))
    verdict_whitelist = set(filter(lambda x: x, request.GET.get('verdict_whitelist', '').split(',')))
    verdict_blacklist = set(filter(lambda x: x, request.GET.get('verdict_blacklist', '').split(',')))
    source_whitelist = set(filter(lambda x: x, request.GET.get('source_whitelist', '').split(',')))
    source_blacklist = set(filter(lambda x: x, request.GET.get('source_blacklist', '').split(',')))

    yc = yt.YtClient(proxy='hahn')
    data_path = yt.ypath_join(DATA_DIR, platform)
    queries = []
    for table in sorted(map(int, yc.list(data_path)), reverse=True)[:last_tables]:
        queries_path = yt.ypath_join(data_path, str(table))
        for row in yc.read_table(queries_path):
            verdicts = get_uzhas_verdicts(row)
            source = row['source'].strip() or 'null'
            if not verdicts:
                continue
            if verdict_whitelist and verdict_whitelist & verdicts != verdict_whitelist:
                continue
            if verdict_blacklist and verdict_blacklist & verdicts:
                continue
            if source_whitelist and source not in source_whitelist:
                continue
            if source_blacklist and source in source_blacklist:
                continue
            queries.append(Query(
                row['query'],
                row['answer'],
                sorted(verdicts),
                datetime.datetime.utcfromtimestamp(float(table)).strftime('%Y-%m-%d'),
                source,
            ))
            if len(queries) >= queries_count:
                break
        if len(queries) >= queries_count:
            break

    queries.sort(key=lambda x: (x.date, random.random()), reverse=True)

    columns = [[] for _ in range(columns_count)]
    for index, query in enumerate(queries):
        columns[index % columns_count].append(query)

    return {
        'columns': columns,
        'width': 100.0 / columns_count,
        'platform': platform,
    }
