#-*- coding: UTF-8 -*-
import nile
import argparse
import time
from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)
from qb2.api.v1 import (
    extractors as se,
    filters as sf
)
from copy import deepcopy
import uatraits
import urllib
from datetime import datetime as dt, timedelta
import os
import sys
import codecs
from random import random
import hashlib
import urllib2
import json
import requests

VITRINA_REQUEST_TEMPLATE = "https://yandex.ru/video/?&exp_flags=video_new_morda_redirect=0&exp_flags=vitrina_puid={}&exp_flags=vitrina_sp=relev=rand_enable=0&exp_flags=vitrina_filter=vh&json_dump"
VIDEO_HOSTING_EPISODES = "//home/video-hosting/ya-video/episodes"

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--input_table', type=str, required=True)
    parser.add_argument('--min_kp_rating', type=float, default=7.5)
    parser.add_argument('--limit', type=int, required=True)
    parser.add_argument('--output_table', type=str, required=True)
    args = parser.parse_args()

    cluster = clusters.yt.Hahn().env(parallel_operations_limit=10,
                                     yt_spec_defaults=dict(
                                         pool_trees=["physical"],
                                         tentative_pool_trees=["cloud"]
                                     ),
                                     templates=dict(
                                         tmp_root='//tmp',
                                         title='GetFilmRecommendation'
                                     ))
    vh_film_uuid = {}
    for rec in cluster.driver.read(VIDEO_HOSTING_EPISODES):
        if rec["onto_id"] and rec["content_type"] == "ott-movie":
            vh_film_uuid[rec["onto_id"]] = rec["uuid"]

    to_write = []
    count = 0
    for rec in cluster.driver.read(args.input_table):
        if count > int(args.limit):
            break
        puid = rec["puid"]
        try:
            r = requests.get(VITRINA_REQUEST_TEMPLATE.format(puid)).json()
            top_category = r["tmpl_data"]["blocks"]["contents"]["netflix"]["netflix"]["categories"][0]
            if top_category["type"] not in ["CATEG_FILM", "CATEG_ANIM_FILM"]:
                continue
            for query in top_category["queries"]:
                if query["ontoid"] not in vh_film_uuid or query["cgi"].get("personal", False) == False or query["rating"][0].get("original_rating_value", 0) < args. min_kp_rating:
                    continue
                to_write.append(Record(puid=puid,
                                       film=query["title"],
                                       stream_id=vh_film_uuid[query["ontoid"]],
                                       object_id=query["ontoid"],
                                       thumb="https:" + query["thumb_url"][:-8] + "S120x120Top"))
                count += 1
                print count
                break
        except:
            continue
    cluster.driver.write(args.output_table, to_write)

if __name__ == "__main__":
    main()
