#-*- coding: UTF-8 -*-
from common import *


MAX_ROW_WEIGHT_BYTES = 64 * 1024 * 1024


def get_genre_ru(value):
    v = json.loads(value)
    genres = []
    if v.get("isa", {}).get("Wtype") == "Film":
        for genre_info in v.get("film_genres", []):
            for formatted in genre_info.get("formatted", []):
                for locale in formatted.get('RelevLocale',[]):
                    if locale in ['ru','xussr', 'universe']:
                        genres.append(formatted["value"].split('|')[1][:-2])
                        break
    if len(genres) > 0:
        return genres[0]


def get_year(value):
    v = json.loads(value)
    if "InitDate" in v:
        return v.get("InitDate")[0]["value"]

def get_rating(value):
    v = json.loads(value)
    for rating in v.get("rating", []):
        if rating.get("type") == "kinopoisk":
            return rating["value"]
    return None


def is_animation(value):
    v = json.loads(value)
    for tag in v.get("isa").get("tags", []):
        if "Animation" in tag["value"]:
            return True
    return False


def get_thumb(value):
    images = json.loads(value).get("Image", [])
    thumb_locale = None
    thumb_other = None
    for x in images:
        if x.get("avatar_type", "") == "vertical_film":
            if "ru" in x.get("RelevLocale", []):
                thumb_locale = x["mds_avatar_id"]
                break
            else:
                thumb_other = x["mds_avatar_id"]
    thumb_id = thumb_locale or thumb_other or ""
    if not thumb_id:
        return ""
    return "https://avatars.mds.yandex.net/get-entity_search/{0}/S120x120Top".format(thumb_id)


def filter_unused_blogger_fields(recs):
    for rec in recs:
        to_yield = {"Name" : rec["Name"],
                    "UUID" : rec["UUID"],
                    "ContentGroupID" : rec["ContentGroupID"],
                    "Value" : rec["ContentSourceUrl"],
                    "UpdateTime" : rec["UpdateTime"],
                    "ParentUUID" : rec["ParentUUID"],
                    "ParentName" : rec["ParentName"],
                    "ParentResources" : {"content_source_url" : rec["ParentResources"].get("content_source_url")},
                    "Resources" : {"thumbnail" : rec["Value"]}}

        yield Record(**to_yield)

def main():
    io_options = {"table_writer": {"max_row_weight": MAX_ROW_WEIGHT_BYTES}}

    cluster = clusters.yt.Hahn().env(parallel_operations_limit=10,
                                       yt_spec_defaults=dict(
                                           job_io=io_options,
                                           pool_trees=["physical"],
                                           tentative_pool_trees=["cloud"]
                                       ),
                                       templates=dict(
                                           tmp_root='//tmp',
                                           title='UpdateVHInfo'
                                       ))

    job = cluster.job()

    job.table(CONTENT_RESOURCE) \
        .join(job.table(ACTUAL_URLS), by='ContentGroupID') \
        .filter(sf.and_(sf.equals('ResourceName', 'content_source_url'),
                    sf.custom(lambda x : x.startswith('www.youtube.com/watch?v='), 'Value'))) \
        .project('Name', 'UUID', 'ContentGroupID', 'UpdateTime', 'ParentUUID', 'ParentName', 'ParentResources',
                 ContentSourceUrl=ne.custom(lambda Value : Value, 'Value')) \
        .join(job.table(CONTENT_RESOURCE), by='ContentGroupID') \
        .filter(sf.equals('ResourceName', 'thumbnail')) \
        .sort('UUID') \
        .project('Name', 'UUID', 'ContentGroupID', 'ContentSourceUrl', 'Value', 'UpdateTime', 'ParentUUID', 'ParentName', 'ParentResources') \
        .map(filter_unused_blogger_fields) \
        .put(VH_BLOGGERS_INFO)

    job.table(EPISODES) \
        .filter(sf.or_(sf.equals("content_type", "ott-episode"), sf.equals("content_type", "ott-movie"))) \
        .project('content_type', 'season_number', 'episode_number', 'full_title', 'episode_name', 'uuid', 'onto_id', 'series_name') \
        .join(job.table(ONTODB_BASE), by_left='onto_id', by_right='key') \
        .project(ne.all(),
                 genre=ne.custom(get_genre_ru, 'value'),
                 year=ne.custom(get_year, 'value'),
                 is_animation=ne.custom(is_animation, 'value'),
                 thumb=ne.custom(get_thumb, 'value'),
                 rating=ne.custom(get_rating, 'value')) \
        .sort('uuid') \
        .project('onto_id', 'content_type', 'season_number', 'episode_number', 'full_title', 'episode_name', 'uuid', 'series_name', 'genre', 'year', 'is_animation', 'thumb', 'rating') \
        .put(VH_OTT_INFO)

    job.run()


if __name__ == '__main__':
    main()

