# -*- coding: utf-8 -*-

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)

from qb2.api.v1 import extractors as se, filters as sf
import argparse
import nile
import datetime, time
import json, re
import urllib, urlparse
import math,cgi
import pandas as pd
from itertools import product
import sys
import os
import copy
import random
import collections


cluster = clusters.yt.Hahn(pool='vika-pavlova'
      ).env(templates=dict(job_root='//home/videolog/vika-pavlova/video_recommendations'
                          ),
            yt_spec_defaults=dict(pool_trees=["physical"],
                                  tentative_pool_trees=["cloud"]),
            parallel_operations_limit=10
           )
def parse_results(recs):

    for rec in recs:
        raw_tasks = json.loads(rec["task_suite_raw_tasks"])
        results = json.loads(rec["assignment_raw_solutions"])
        tasks = []
        pers_hps = []
        cold_hps = []

        for i in range(len(raw_tasks)):
            if raw_tasks[i]['known_solutions']:
                if len(raw_tasks[i]['known_solutions']) > 1:
                    pers_hps.append({'inputValues': raw_tasks[i]['input_values'],
                                     'knownSolutions': raw_tasks[i]['known_solutions'],
                                     'outputValues': results[i]['output_values']
                                    })
                else:
                    cold_hps.append({'inputValues': raw_tasks[i]['input_values'],
                                     'knownSolutions': raw_tasks[i]['known_solutions'],
                                     'outputValues': results[i]['output_values']
                                    })
            else:
                tasks.append({'inputValues': raw_tasks[i]['input_values'],
                              'outputValues': results[i]['output_values']
                             })

        yield Record(rec, tasks = tasks, pers_hps = pers_hps, cold_hps = cold_hps,
                    raw_tasks = raw_tasks, results = results)

def aggr_results(groups):

    for key, recs in groups:
        pers_hps = []
        cold_hps = []
        for rec in recs:
            if rec['pers_hps']:
                for item in rec['pers_hps']:
                    if int(item['outputValues']['label']) > 0:
                        pers_hps.append('bad')
                    else:
                        pers_hps.append('ok')

            if rec['cold_hps']:
                good_score = 0
                bad_score = 0
                for item in rec['cold_hps']:
                    if item['knownSolutions'][0]['output_values']['label'] == '+2':
                        good_score += int(item['outputValues']['label'])
                    else:
                        bad_score += int(item['outputValues']['label'])

                if good_score >= bad_score:
                    cold_hps.append('ok')
                else:
                    cold_hps.append('bad')

        if len(pers_hps) > 3:
            pers_counter = collections.Counter(pers_hps[:15])
            hp_skill = round(pers_counter['ok']*100./len(pers_hps[:15]))
        else:
            hp_skill = -1

        if len(cold_hps) > 3:
            cold_counter = collections.Counter(cold_hps[:15])
            cold_skill = round(cold_counter['ok']*100./len(cold_hps[:15]))
        else:
            cold_skill = -1

        if hp_skill > 0:
            if cold_skill > 0:
                skill = round(0.7 * hp_skill + 0.3 * cold_skill)
            else:
                skill = hp_skill
        else:
            skill = cold_skill

        yield Record(worker_id = key.worker_id, pers_hps = pers_hps, cold_hps = cold_hps,
                     hp_skill = hp_skill, cold_skill = cold_skill, skill = skill)


def recalc_skills(start_date, end_date):

    job = cluster.job()

    raw = job.table('//home/toloka/prod/export/results/{' + start_date + '..' + end_date + '}'
                   ).filter(sf.custom(lambda x, y: x in ['29402', '30573'] and y > 1590537600, "project_id", "assignment_approve_time")
                           ).project("worker_id", "task_suite_raw_tasks", "assignment_raw_solutions",
                                     "assignment_correctness_results"
                                    ).filter(sf.custom(lambda x: x != "[]", "assignment_raw_solutions"))

    parsed = raw.map(parse_results)

    parsed.groupby("worker_id"
                  ).reduce(aggr_results
                          ).sort('skill'
                                ).put('//home/videolog/vika-pavlova/video_recommendations/tolokers_skills')

    job.run()


def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--date', type=str, required=True)
    parser.add_argument('--carousel_type', type=str, required=True)
    args = parser.parse_args()

    end_date = args.date
    start_date = str(datetime.datetime.strptime(end_date, '%Y-%m-%d') - datetime.timedelta(days = 60)).split(' ')[0]

    recalc_skills(start_date, end_date)


if __name__ == '__main__':
    main()
