# -*- coding: utf-8 -*-

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)

from qb2.api.v1 import extractors as se, filters as sf
import argparse
import nile
import datetime, time
import json, re
import urllib, urlparse
import math,cgi
import pandas as pd
from itertools import product
import sys
import os
import copy
import random


cluster = clusters.yt.Hahn(pool='vika-pavlova'
      ).env(templates=dict(job_root='//home/videolog/vika-pavlova/video_recommendations/offline_recomms/offline_recomms'
                          ),
            yt_spec_defaults=dict(pool_trees=["physical"],
                                  tentative_pool_trees=["cloud"]),
            parallel_operations_limit=10
           )

def get_tolokers():

    job = cluster.job()

    #all available tolokers
    raw = job.table('//home/toloka/prod/export/skills/worker_skill_logs'
                    ).filter(sf.custom(lambda x: x in [10461, 10460, 14170], 'skill_id')
                            ).groupby('worker_id', 'skill_id',
                                    ).top(1, by = 'timestamp'
                                        )

    exam = raw.filter(sf.custom(lambda x, y: x == 10461 and y >= 80, 'skill_id', 'skill_value')
                    ).project('worker_id',
                                exam_skill = ne.custom(lambda x: x, 'skill_id'),
                                exam_value = ne.custom(lambda x: x, 'skill_value')
                                )

    old_skill = raw.filter(sf.custom(lambda x: x == 10460, 'skill_id')
                            ).project('worker_id',
                                    old_skill = ne.custom(lambda x: x, 'skill_id'),
                                    old_value = ne.custom(lambda x: x, 'skill_value')
                                    )

    new_skill = raw.filter(sf.custom(lambda x: x == 14170, 'skill_id')
                            ).project('worker_id',
                                    new_skill = ne.custom(lambda x: x, 'skill_id'),
                                    new_value = ne.custom(lambda x: x, 'skill_value')
                                    )

    tolokers = exam.join(old_skill, by = 'worker_id', type = 'left'
                        ).join(new_skill, by = 'worker_id', type = 'left'
                                ).filter(sf.custom(lambda x, y: (x >= 80 or x == None) and
                                                (y >= 80 or y == None),
                                                'new_value', 'old_value'
                                                )
                                    )
    #submitted tasks
    tasks_raw = job.table('//home/toloka/prod/export/assignments/results_v56'
                        ).project("project_id", "assignment_assignment_id", "assignment_status",
                                    "task_suite_id", "worker_id", "assignment_submit_time", "pool_training"
                                    )

    all_assignments = tasks_raw.filter(sf.and_(sf.equals("assignment_status", "APPROVED"),
                                                sf.custom(lambda x: x > (time.time() - 1209600), "assignment_submit_time"),
                                                sf.custom(lambda x: x in ['30573', '19656', '29402'], "project_id")
                                                )
                                        )

    activity = all_assignments.groupby("worker_id", "project_id"
                                        ).aggregate(reqs = na.count()
                                                ).join(tolokers, by = "worker_id", type = 'right')

    top = activity.top(1000, by = 'reqs')

    random = activity.join(top, by = "worker_id", type = 'left_only'
                            ).random(500)

    current_tolokers = job.concat(top, random)

    #add puids

    workers_puids = job.table('//home/toloka/prod/export/workers/puids')

    current_tolokers.join(workers_puids, by = "worker_id"
                   ).project(workerId = ne.custom(lambda x: x, "worker_id"),
                             puid = ne.custom(lambda x: x, 'uid')
                            ).put('//home/videolog/vika-pavlova/video_recommendations/current_tolokers')

    job.run()


def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--date', type=str, required=True)
    parser.add_argument('--carousel_type', type=str, required=True)
    args = parser.parse_args()

    get_tolokers()


if __name__ == '__main__':
    main()
