#-*- coding: UTF-8 -*-
from common import *

TOP_ASSOCATIONS_COUNT = 10
MIN_ASSOCIATIONS_COUNT = 1
MAX_WATCHED_ASSOCIATIONS_COUNT = 5
TV_ONLINE_ETHER_SOURCE = "tv_online_ether"
TV_ONLINE_OTHER_SOURCES = "tv_online_other"
OTHER_SOURCES = "other"

MIN_FILM_CONTENT_TVT = 5 * 60

MIN_CONTENT_TVT = 3 * 60

HEARTBEAT_TICK_SECONDS = 30

NEWS_CHANNEL_ID = "1542781193"

OP_MEMORY_LIMIT = 8192

def get_genre(value):
    v = json.loads(value)
    genres = []
    if v.get("isa", {}).get("Wtype") == "Film":
        for genre_info in v.get("film_genres", []):
            for formatted in genre_info.get("formatted", []):
                if "us" in formatted["RelevLocale"]:
                    genres.append(formatted["value"].split('|')[1][:-2])
    if v.get("isa", {}).get("Wtype") == "Food":
        genres.append("food")
    if v.get("isa", {}).get("Wtype") == "Auto":
        genres.append("auto")
    if v.get("isa", {}).get("Wtype") == "Soft":
        is_game = False
        for otype in v.get("isa", {}).get("otype", []):
            if otype.get("value") == "Soft" and otype.get("subvalue") == "VideoGame@on":
                is_game = True
                break
        if is_game:
            genres.append("game")
    if len(genres) > 0:
        return genres[0]

def get_title(value):
    v = json.loads(value)

    for title in v.get("Title", []):
        for locale in title.get("RelevLocale", []):
            if locale in ['ru','xussr', 'universe']:
                return title["value"]

def get_type(value):
    v = json.loads(value)
    if "otype" in v.get("isa", {}):
        otype = v["isa"]["otype"][0]
        return otype.get("value", "") + "/" + otype.get("subvalue", "")
    else:
        return ""

def get_bloggers_info(records):
    bloggers_info = {}
    for rec in records:
        if rec["Resources"]["thumbnail"].startswith("https://"):
            thumb = rec["Resources"]["thumbnail"]
        else:
            thumb = "https:" + rec["Resources"]["thumbnail"]
        episode_info = {"name" : rec["Name"],
                        "uuid" : rec["UUID"],
                        "content_group_id" : str(rec["ContentGroupID"]),
                        "source" : rec["Value"],
                        "thumb" : thumb,
                        "ts" : rec["UpdateTime"]}
        if rec["ParentUUID"] in bloggers_info:
            bloggers_info[rec["ParentUUID"]]["episodes"].append(episode_info)
        else:
            bloggers_info[rec["ParentUUID"]] = {"name" : rec["ParentName"],
                                                "channel" : rec["ParentResources"].get("content_source_url"),
                                                "episodes" : [episode_info]}
    return bloggers_info

def get_ott_info(records):
    serial_info = {}
    film_info = {}
    for rec in records:
        if not rec["onto_id"]:
            continue
        if rec["content_type"] == "ott-episode":
            episode_info = {"season" : rec["season_number"],
                            "episode" : rec["episode_number"],
                            "title" : rec["full_title"],
                            "episode_name" : rec["episode_name"],
                            "uuid" : rec["uuid"]}
            serial_info[rec["onto_id"]] = {"serial_name" : rec["series_name"],
                                           "episodes_info"  : serial_info.get(rec["onto_id"], {}).get("episodes_info", []) + [episode_info],
                                           "genre" : rec["genre"],
                                           "year" : rec["year"],
                                           "is_animation" : rec['is_animation'],
                                           "thumb" : rec['thumb'],
                                           "rating" : rec["rating"]}
        elif rec["content_type"] == "ott-movie":
            film_info[rec["onto_id"]] = {"title" : rec["episode_name"],
                                         "uuid" : rec["uuid"],
                                         "genre" : rec["genre"],
                                         "year" : rec["year"],
                                         "is_animation" : rec['is_animation'],
                                         "thumb" : rec['thumb'],
                                         "rating" : rec["rating"]}

    for object_id in serial_info:
        episodes = sorted(serial_info[object_id]["episodes_info"], key=lambda x : [x["season"], x["episode"]])
        serial_info[object_id]["uuid"] = episodes[0]["uuid"]
        serial_info[object_id]["first_season"] = episodes[0]["season"]
        serial_info[object_id]["first_episode"] = episodes[0]["episode"]
        serial_info[object_id]["first_episode_name"] = episodes[0]["episode_name"]
        serial_info[object_id]["first_episode_title"] = episodes[0]["title"]

    return serial_info, film_info

def get_vh_info(cluster, serial_info, film_info, bloggers_info):
    bloggers_info = get_bloggers_info(cluster.driver.read(VH_BLOGGERS_INFO))

    serial_info, film_info = get_ott_info(cluster.driver.read(VH_OTT_INFO))

    if len(film_info) + len(serial_info) < 100 or len(bloggers_info) < 30:
        raise Exception("Problems with data")

def get_search_watched_objects(recs):
    for rec in recs:
        if rec["web_requests_stats"] == None:
            continue

        uid = rec["uid"]

        watched_objects = {}
        for object_id in rec.get("web_requests_stats", {}):
            if object_id == UNDEFINED_OBJECT_ID:
                continue
            watched_objects[object_id] = {"series" : [], "watch_source" : OTHER_SOURCES, "timestamp" : sorted(rec["web_requests_stats"][object_id]["timestamps"])[-1]}
        for object_id in rec.get("video_requests_stats", {}):
            if object_id == UNDEFINED_OBJECT_ID:
                continue
            timestamp = sorted(rec["video_requests_stats"][object_id]["timestamps"])[-1]
            series = [{"season" : int(elem.split('_')[1]), "episode" : int(elem.split('_')[3]), "timestamp" : timestamp} for elem in rec["video_requests_stats"][object_id]["series"]]
            if not object_id in watched_objects:
                watched_objects[object_id] = {"series" : [], "watch_source" : OTHER_SOURCES, "timestamp" : timestamp}
            watched_objects[object_id]["timestamp"] = max(watched_objects[object_id]["timestamp"], timestamp)
            watched_objects[object_id]["series"] += series
        for object_id in watched_objects:
            yield Record(uid=uid,
                         object_id=object_id,
                         series=watched_objects[object_id]["series"],
                         watch_source=watched_objects[object_id]["watch_source"],
                         timestamp=int(watched_objects[object_id]["timestamp"]))

@with_hints(files=[nfi.TableFile(VH_OTT_INFO, 'ott_info')])
def get_vitrine_stats(recs):
    serial_info, film_info = get_ott_info(get_records_from_file('ott_info'))
    for rec in recs:
        if rec["vitrine_stats"] == None:
            continue

        vitrine_stats = {}
        for object_id in rec.get("vitrine_stats", {}):
            if object_id in serial_info or object_id in film_info:
                vitrine_stats[object_id] = rec["vitrine_stats"][object_id]
        if len(vitrine_stats) > 0:
            yield Record(uid=rec["uid"], vitrine_stats=vitrine_stats)

@with_hints(files=[nfi.TableFile(VH_BLOGGERS_INFO, 'bloggers_info')])
def get_browser_watched_bloggers(recs):
    bloggers_info = get_bloggers_info(get_records_from_file('bloggers_info'))
    blogger_id_by_url = {}
    for blogger_id in bloggers_info:
        for episode_info in bloggers_info[blogger_id]["episodes"]:
            blogger_id_by_url[episode_info["source"]] = blogger_id
            blogger_id_by_url["https://" + episode_info["source"]] = blogger_id
            blogger_id_by_url["http://" + episode_info["source"]] = blogger_id
        if bloggers_info[blogger_id]["channel"]:
            blogger_id_by_url[bloggers_info[blogger_id]["channel"]] = blogger_id
    for rec in recs:
        if rec["views"] == None:
            continue

        browser_blogger_view_time = {}
        browser_blogger_watches = {}
        for url in rec["views"]:
            blogger_id = blogger_id_by_url.get(url)
            if blogger_id:
                browser_blogger_view_time[blogger_id] = browser_blogger_view_time.get(blogger_id, 0) + rec["views"][url]
                browser_blogger_watches[blogger_id] = browser_blogger_watches.get(blogger_id, []) + [url]
        if len(browser_blogger_view_time) > 0:
            yield Record(uid=rec["uid"],
                            browser_blogger_view_time=browser_blogger_view_time,
                            browser_blogger_watches=browser_blogger_watches)

@with_hints(files=[nfi.TableFile(VH_BLOGGERS_INFO, 'bloggers_info')])
def get_tv_online_watched_bloggers(recs):
    bloggers_info = get_bloggers_info(get_records_from_file('bloggers_info'))
    blogger_id_by_uuid = {}
    for blogger_id in bloggers_info:
        for episode_info in bloggers_info[blogger_id]["episodes"]:
            blogger_id_by_uuid[episode_info["uuid"]] = blogger_id
            blogger_id_by_uuid[episode_info["content_group_id"]] = blogger_id
    for rec in recs:
        if rec["tv_online_stats"] == None:
            continue

        tv_online_blogger_view_time = {}
        tv_online_blogger_watches = {}

        for content_id in rec.get("tv_online_stats", {}):
            if rec["tv_online_stats"][content_id]["tvt"] < min(MIN_CONTENT_TVT, rec["tv_online_stats"][content_id]["duration"] - HEARTBEAT_TICK_SECONDS):
                continue
            if content_id in blogger_id_by_uuid:
                tv_online_blogger_view_time[blogger_id_by_uuid[content_id]] = tv_online_blogger_view_time.get(blogger_id, 0) + rec["tv_online_stats"][content_id]["tvt"]
                tv_online_blogger_watches[blogger_id_by_uuid[content_id]] = tv_online_blogger_watches.get(blogger_id, []) + [content_id]
        if len(tv_online_blogger_watches) > 0:
            yield Record(uid=rec["uid"],
                            tv_online_blogger_view_time=tv_online_blogger_view_time,
                            tv_online_blogger_watches=tv_online_blogger_watches)

class get_best_uid_puid_map(object):
    def __init__(self, fields):
        self.fields = fields
    def __call__(self, groups):
        for key, recs in groups:
            best_map_score = -1
            stats_by_field = {}
            for rec in recs:
                if len(rec[self.fields[0]]) > best_map_score:
                    best_map_score = len(rec[self.fields[0]])
                    for field in self.fields:
                        stats_by_field[field] = rec[field]
            yield Record(uid=key["uid"],
                         **stats_by_field)

@with_hints(files=[nfi.TableFile(VH_BLOGGERS_INFO, 'bloggers_info'),
                   nfi.TableFile(VH_OTT_INFO, 'ott_info')])
def get_total_tv_online_stats(recs):
    bloggers_info = get_bloggers_info(get_records_from_file('bloggers_info'))
    serial_info, film_info = get_ott_info(get_records_from_file('ott_info'))
    film_stat_info = {}
    serial_stat_info = {}
    for object_id in serial_info:
        for episode_info in serial_info[object_id]["episodes_info"]:
            serial_stat_info[episode_info["uuid"]] = {"serial_name" : serial_info[object_id]["serial_name"],
                            "uuid" : serial_info[object_id]["uuid"],
                            "object_id" : object_id,
                            "season" : serial_info[object_id]["first_season"],
                            "episode" : serial_info[object_id]["first_episode"],
                            "episode_name" : serial_info[object_id]["first_episode_name"],
                            "title" : serial_info[object_id]["first_episode_title"],
                            "genre" : serial_info[object_id]["genre"],
                            "year" : serial_info[object_id]["year"],
                            "is_animation" : serial_info[object_id]["is_animation"],
                            "thumb" : serial_info[object_id]["thumb"],
                            "rating" : serial_info[object_id]["rating"],
                            "tvt" : 0}
    for object_id in film_info:
        film_stat_info[film_info[object_id]["uuid"]] = {"uuid" : film_info[object_id]["uuid"],
                                                                    "object_id" : object_id,
                                                                    "genre" : film_info[object_id]["genre"],
                                                                    "year" : film_info[object_id]["year"],
                                                                    "title" : film_info[object_id]["title"],
                                                                    "is_animation" : film_info[object_id]["is_animation"],
                                                                    "thumb" : film_info[object_id]["thumb"],
                                                                    "rating" : film_info[object_id]["rating"],
                                                                    "tvt" : 0}

    blogger_id_by_uuid = {}
    for blogger_id in bloggers_info:
        for episode_info in bloggers_info[blogger_id]["episodes"]:
            blogger_id_by_uuid[episode_info["uuid"]] = blogger_id
            blogger_id_by_uuid[episode_info["content_group_id"]] = blogger_id

    for rec in recs:
        bloggers_stat = {}
        channels_stat = {}
        film_ids = []
        serial_ids = []
        for content_id in rec["tv_online_stats"]:
            tvt = rec["tv_online_stats"][content_id]["tvt"]
            channel_id = rec["tv_online_stats"][content_id].get("channel_id")
            if channel_id:
                channels_stat[channel_id] = channels_stat.get(channel_id, 0) + tvt

            blogger_id = blogger_id_by_uuid.get(content_id)
            if blogger_id:
                bloggers_stat[blogger_id] = bloggers_stat.get(blogger_id, 0) + tvt

            if rec["tv_online_stats"][content_id]["computed_channel"] != "ott":
                continue

            if content_id in film_stat_info:
                film_stat_info[content_id]["tvt"] += tvt
                film_ids.append(content_id)

            if content_id in serial_stat_info:
                serial_stat_info[content_id]["tvt"] += tvt
                serial_ids.append(content_id)

        for content_id in serial_ids:
            yield Record(info=deepcopy(serial_stat_info[content_id]),
                            tvt=serial_stat_info[content_id]["tvt"],
                            id=serial_stat_info[content_id]['object_id'],
                            content_type='series')
            serial_stat_info[content_id]["tvt"] = 0

        for content_id in film_ids:
            yield Record(info=deepcopy(film_stat_info[content_id]),
                            tvt=film_stat_info[content_id]["tvt"],
                            id=film_stat_info[content_id]['object_id'],
                            content_type='film')
            film_stat_info[content_id]["tvt"] = 0

        for channel_id in channels_stat:
            yield Record(info = {"channel_id" : channel_id, "tvt" : channels_stat[channel_id]},
                            tvt=channels_stat[channel_id],
                            id=channel_id,
                            content_type='channel')

        for blogger_id in bloggers_stat:
            blogger_episodes = sorted(bloggers_info[blogger_id]["episodes"], key=lambda x : -x["ts"])
            for episode in blogger_episodes:
                yield Record(info = {"blogger" : bloggers_info[blogger_id]["name"],
                                        "blogger_id" : blogger_id,
                                        "uuid" : episode["uuid"],
                                        "episode_name" : episode["name"],
                                        "thumb" : episode["thumb"],
                                        "tvt" : bloggers_stat[blogger_id]},
                                tvt=bloggers_stat[blogger_id],
                                id=blogger_id,
                                content_type='blogger')
                break

@with_hints(files=[nfi.TableFile(VH_OTT_INFO, 'ott_info')])
def get_tv_online_watched_objects(recs):
    serial_info, film_info = get_ott_info(get_records_from_file('ott_info'))
    object_id_by_uuid = {}
    episode_by_uuid = {}
    for object_id in serial_info:
        for episode_info in serial_info[object_id]["episodes_info"]:
            object_id_by_uuid[episode_info["uuid"]] = object_id
            episode_by_uuid[episode_info["uuid"]] = {"season" : episode_info["season"],
                                                        "episode" : episode_info["episode"]}
    for object_id in film_info:
        object_id_by_uuid[film_info[object_id]["uuid"]] = object_id
    for rec in recs:
        if rec["tv_online_stats"] == None:
            continue

        watched_objects = {}
        for content_id in rec.get("tv_online_stats", {}):
            if rec["tv_online_stats"][content_id]["tvt"] < min(MIN_FILM_CONTENT_TVT, rec["tv_online_stats"][content_id]["duration"] - HEARTBEAT_TICK_SECONDS):
                continue
            if content_id in object_id_by_uuid:
                if rec["tv_online_stats"][content_id].get("source") in ETHER_VIEW_SOURCES:
                    watch_source = TV_ONLINE_ETHER_SOURCE
                else:
                    watch_source = TV_ONLINE_OTHER_SOURCES
                object_id = object_id_by_uuid[content_id]
                episode_info = deepcopy(episode_by_uuid.get(content_id, {}))

                if not object_id in watched_objects:
                    watched_objects[object_id] = {"series" : [], "watch_source" : watch_source, "timestamp" : int(rec["tv_online_stats"][content_id]["timestamp"])}

                if watch_source == TV_ONLINE_ETHER_SOURCE:
                    watched_objects[object_id]["watch_source"] = watch_source
                watched_objects[object_id]["timestamp"] = max(watched_objects[object_id]["timestamp"], int(rec["tv_online_stats"][content_id]["timestamp"]))

                if episode_info:
                    episode_info["timestamp"] = int(rec["tv_online_stats"][content_id]["timestamp"])
                    watched_objects[object_id]["series"] += [episode_info]

        for object_id in watched_objects:
            yield Record(uid=rec["uid"],
                            object_id=object_id,
                            series=watched_objects[object_id]["series"],
                            watch_source=watched_objects[object_id]["watch_source"],
                            timestamp=int(watched_objects[object_id]["timestamp"]))

def get_tv_online_watched_channels(recs):
    for rec in recs:
        if rec["tv_online_stats"] == None:
            continue

        channels_stat = {}
        for content_id in rec.get("tv_online_stats", {}):
            if rec["tv_online_stats"][content_id]["tvt"] < min(MIN_CONTENT_TVT, rec["tv_online_stats"][content_id]["duration"] - HEARTBEAT_TICK_SECONDS):
                continue
            channel_id = rec["tv_online_stats"][content_id].get("channel_id")
            timestamp = str(rec["tv_online_stats"][content_id]["timestamp"])
            tvt = rec["tv_online_stats"][content_id]["tvt"]
            if channel_id:
                if channel_id in channels_stat:
                    channels_stat[channel_id][timestamp] = channels_stat[channel_id].get(timestamp, 0) + tvt
                else:
                    channels_stat[channel_id] = {timestamp : tvt}
        if len(channels_stat) > 0:
            yield Record(uid=rec["uid"],
                         channels_stat=channels_stat)

def aggregate_blogggers_info(groups):
    for key, recs in groups:
        browser_blogger_view_time = {}
        browser_blogger_watches = {}
        tv_online_blogger_view_time = {}
        tv_online_blogger_watches = {}
        for rec in recs:
            if rec.get("browser_blogger_view_time"):
                browser_blogger_view_time = rec["browser_blogger_view_time"]
                browser_blogger_watches = rec["browser_blogger_watches"]
            if rec.get("tv_online_blogger_view_time"):
                tv_online_blogger_view_time = rec["tv_online_blogger_view_time"]
                tv_online_blogger_watches = rec["tv_online_blogger_watches"]
        yield Record(uid=key["uid"],
                     browser_blogger_view_time=browser_blogger_view_time,
                     browser_blogger_watches=browser_blogger_watches,
                     tv_online_blogger_view_time=tv_online_blogger_view_time,
                     tv_online_blogger_watches=tv_online_blogger_watches)

def aggregate_series_info(groups):
    for key, recs in groups:
        series = []
        watch_source = OTHER_SOURCES
        timestamp = 0
        for rec in recs:
            series += rec["series"]
            timestamp = max(timestamp, rec["timestamp"])
            if rec["watch_source"] == TV_ONLINE_ETHER_SOURCE:
                watch_source = TV_ONLINE_ETHER_SOURCE
        yield Record(uid=key["uid"],
                     object_id=key["object_id"],
                     series=series,
                     watch_source=watch_source,
                     timestamp=timestamp)

def update_film_recoms(film_info, film_recoms, object_id, count=1, watched_object_title=None, watch_timestamp=0, watch_source="ether"):
    if object_id in film_info:
        if object_id in film_recoms:
            film_recoms[object_id]["count"] += count
        else:
            film_recoms[object_id] = {"title" : film_info[object_id]["title"],
                                      "uuid" : film_info[object_id]["uuid"],
                                      "genre" : film_info[object_id]["genre"],
                                      "year" : film_info[object_id]["year"],
                                      "is_animation" : film_info[object_id]["is_animation"],
                                      "thumb" : film_info[object_id]["thumb"],
                                      "rating" : film_info[object_id]["rating"],
                                      "count" : count}
        if watched_object_title:
            watched_object_info = {"title" : watched_object_title,
                                   "timestamp" : watch_timestamp,
                                   "source" : watch_source}
            film_recoms[object_id]["watched_objects"] = film_recoms[object_id].get("watched_objects", []) + [watched_object_info]

        return True
    return False

def update_serial_recoms(serial_info, serial_recoms, object_id, count=1, watched_object_title=None, watch_timestamp=0, watch_source="ether"):
    if object_id in serial_info:
        if object_id in serial_recoms:
            serial_recoms[object_id]["count"] += count
        else:
            serial_recoms[object_id] = {"serial_name" : serial_info[object_id]["serial_name"],
                                        "uuid" : serial_info[object_id]["uuid"],
                                        "episode_name" : serial_info[object_id]["first_episode_name"],
                                        "season" : serial_info[object_id]["first_season"],
                                        "episode" : serial_info[object_id]["first_episode"],
                                        "title" : serial_info[object_id]["first_episode_title"],
                                        "genre" : serial_info[object_id]["genre"],
                                        "year" : serial_info[object_id]["year"],
                                        "is_animation" : serial_info[object_id]["is_animation"],
                                        "thumb" : serial_info[object_id]["thumb"],
                                        "rating" : serial_info[object_id]["rating"],
                                        "count" : count}
        if watched_object_title:
            watched_object_info = {"title" : watched_object_title,
                                   "timestamp" : watch_timestamp,
                                   "source" : watch_source}
            serial_recoms[object_id]["watched_objects"] = serial_recoms[object_id].get("watched_objects", []) + [watched_object_info]
        return True
    return False

def get_next_seria(watched_series, serial_info, object_id):
    next_seria_info = {}
    if len(watched_series) == 0:
        return {}
    season = watched_series[-1]["season"]
    episode = watched_series[-1]["episode"]
    if season == 0:
        return {}
    next_seria_info = {"serial_name" : serial_info[object_id]["serial_name"],
                       "series_watched" : len(watched_series),
                       "genre" : serial_info[object_id]["genre"],
                       "year" : serial_info[object_id]["year"],
                       "is_animation" : serial_info[object_id]["is_animation"],
                       "thumb" : serial_info[object_id]["thumb"],
                       "rating" : serial_info[object_id]["rating"]}
    if watched_series[-1].get("timestamp"):
        next_seria_info["timestamp"] = watched_series[-1]["timestamp"]
    have_next_episode_same_season = False
    next_episode_title = ""
    next_episode_name = ""
    next_episode_uuid = ""
    next_seria_season = 0
    next_seria_episode = 0
    for episode_info in serial_info[object_id]["episodes_info"]:
        if episode_info["season"] == season and episode_info["episode"] == episode + 1:
            have_next_episode_same_season = True
            next_episode_name = episode_info["episode_name"]
            next_episode_title = episode_info["title"]
            next_episode_uuid = episode_info["uuid"]
            next_seria_season= season
            next_seria_episode = episode + 1
    if not have_next_episode_same_season:
        have_first_episode_next_season = False
        for episode_info in serial_info[object_id]["episodes_info"]:
            if episode_info["season"] == season + 1 and episode_info["episode"] == 1:
                have_first_episode_next_season = True
                next_episode_name = episode_info["episode_name"]
                next_episode_title = episode_info["title"]
                next_episode_uuid = episode_info["uuid"]
                next_seria_season= season + 1
                next_seria_episode = 1
        if not have_first_episode_next_season:
            return {}

    next_seria_info["title"] = next_episode_title
    next_seria_info["uuid"] = next_episode_uuid
    next_seria_info["episode_name"] = next_episode_name,
    next_seria_info["season"] = next_seria_season
    next_seria_info["episode"] = next_seria_episode

    return next_seria_info

def prepare_recoms(recoms, watched_objects, min_count):
    filtered_recoms = {}
    for object_id in recoms:
        if object_id in watched_objects:
            continue
        if recoms[object_id]["count"] >= min_count:
            filtered_recoms[object_id] = deepcopy(recoms[object_id])
            if filtered_recoms[object_id].get("watched_objects"):
                watched_like_objects = sorted(filtered_recoms[object_id]["watched_objects"],  key=lambda x : [x["source"] != TV_ONLINE_ETHER_SOURCE, -x["timestamp"]])
                watched_like_objects = watched_like_objects[:MAX_WATCHED_ASSOCIATIONS_COUNT]
                filtered_recoms[object_id]["watched_objects"] = watched_like_objects
    return filtered_recoms

@with_hints(files=[nfi.TableFile(VH_BLOGGERS_INFO, 'bloggers_info')])
def get_bloggers_recommendations(recs):
    bloggers_info = get_bloggers_info(get_records_from_file('bloggers_info'))
    for rec in recs:
        bloggers_view_times = {}
        bloggers_watches = {}
        for blogger_id in rec["browser_blogger_view_time"]:
            bloggers_view_times[blogger_id] = bloggers_view_times.get(blogger_id, 0) + rec["browser_blogger_view_time"][blogger_id]
            bloggers_watches[blogger_id] = bloggers_watches.get(blogger_id, []) + rec["browser_blogger_watches"][blogger_id]
        for blogger_id in rec["tv_online_blogger_view_time"]:
            bloggers_view_times[blogger_id] = bloggers_view_times.get(blogger_id, 0) + rec["tv_online_blogger_view_time"][blogger_id]
            bloggers_watches[blogger_id] = bloggers_watches.get(blogger_id, []) + rec["tv_online_blogger_watches"][blogger_id]

        bloggers_stats = sorted(bloggers_view_times.items(), key=lambda x : -x[1])
        to_recommend = []
        for elem in bloggers_stats:
            blogger_id = elem[0]
            view_time = elem[1]
            blogger_episodes = sorted(bloggers_info[blogger_id]["episodes"], key=lambda x : -x["ts"])
            for episode in blogger_episodes:
                if episode["uuid"] in bloggers_watches[blogger_id] or episode["content_group_id"] in bloggers_watches[blogger_id]:
                    continue
                if episode["source"] in bloggers_watches[blogger_id]:
                    continue

                to_recommend.append({"blogger" : bloggers_info[blogger_id]["name"],
                                        "uuid" : episode["uuid"],
                                        "episode_name" : episode["name"],
                                        "thumb" : episode["thumb"],
                                        "blogger_id" : blogger_id,
                                        "view_time" : view_time,
                                        "watched_episodes" : bloggers_watches[blogger_id]})
                break
        if len(to_recommend) > 0:
            yield Record(uid=rec["uid"], bloggers=to_recommend)

@with_hints(files=[nfi.TableFile(VH_OTT_INFO, 'ott_info')])
def get_vh_associations(recs):
    serial_info, film_info = get_ott_info(get_records_from_file('ott_info'))
    for rec in recs:
        results = []
        for elem in json.loads(rec["value"]).get("Assoc", {}).get("ru", [])[:TOP_ASSOCATIONS_COUNT]:
            object_id = elem["value"][3:-2]
            if object_id in serial_info or object_id in film_info:
                results.append(object_id)
        if len(results) > 0:
            yield Record(key=rec['key'], associations=results)

@with_hints(files=[nfi.TableFile(VH_OTT_INFO, 'ott_info')])
def get_films_and_channels_recommendations(groups):
    serial_info, film_info = get_ott_info(get_records_from_file('ott_info'))
    for key, recs in groups:
        need_yield = False
        film_recoms = {}
        serial_recoms = {}
        next_seria_recoms = {}
        channels_stat = {}
        genres_stat = {}
        watched_objects = {}
        vitrine_film_recoms = {}
        vitrine_serial_recoms = {}
        bloggers = []
        for rec in recs:
            if rec.get("object_id"):
                for object_id in rec.get("associations", []):
                    is_vh_film = update_film_recoms(film_info, film_recoms, object_id, 1, rec["title"], rec["timestamp"], rec["watch_source"])
                    is_vh_serial = update_serial_recoms(serial_info, serial_recoms, object_id, 1,  rec["title"], rec["timestamp"], rec["watch_source"])
                    if is_vh_film or is_vh_serial:
                        need_yield = True
                watched_objects[rec["object_id"]] = {"timestamp" : rec["timestamp"],
                                                        "title" : rec["title"],
                                                        "object_type" : rec["object_type"],
                                                        "series" : rec["series"]}

                if rec["object_id"] in serial_info:
                    series = sorted(rec["series"], key=lambda x : [x["season"], x["episode"]])
                    next_seria_info = get_next_seria(series, serial_info, rec["object_id"])
                    if len(next_seria_info) != 0:
                        need_yield = True
                        next_seria_recoms[rec["object_id"]] = next_seria_info
            elif rec.get("channels_stat"):
                channels_stat = rec["channels_stat"]
                if len(channels_stat) > 0:
                    need_yield = True
            elif rec.get("genres_stat"):
                genres_stat = rec["genres_stat"]
                if len(genres_stat) > 0:
                    need_yield = True
            elif rec.get("bloggers"):
                bloggers = rec["bloggers"]
                if len(bloggers) > 0:
                    need_yield = True
            else:
                for object_id in rec["vitrine_stats"]:
                    is_vh_film = update_film_recoms(film_info, vitrine_film_recoms, object_id, rec["vitrine_stats"][object_id])
                    is_vh_serial = update_serial_recoms(serial_info, vitrine_serial_recoms, object_id, rec["vitrine_stats"][object_id])
                    if is_vh_film or is_vh_serial:
                        need_yield = True
        film_recoms = prepare_recoms(film_recoms, watched_objects, MIN_ASSOCIATIONS_COUNT)
        serial_recoms = prepare_recoms(serial_recoms, watched_objects, MIN_ASSOCIATIONS_COUNT)
        vitrine_film_recoms = prepare_recoms(vitrine_film_recoms, watched_objects, 1)
        vitrine_serial_recoms = prepare_recoms(vitrine_serial_recoms, watched_objects, 1)

        if need_yield:
            yield Record(uid=key["uid"],
                            film_recoms=film_recoms,
                            serial_recoms=serial_recoms,
                            next_seria_recoms=next_seria_recoms,
                            channels_stat=channels_stat,
                            genres_stat=genres_stat,
                            watched_objects=watched_objects,
                            vitrine_film_recoms=vitrine_film_recoms,
                            vitrine_serial_recoms=vitrine_serial_recoms,
                            bloggers=bloggers)

def get_genres_stats(groups):
    for key, recs in groups:
        genres_stat = {}
        for rec in recs:
            genres_stat[rec["genre"]] = genres_stat.get(rec["genre"], 0) + 1
        yield Record(uid=key["uid"], genres_stat=genres_stat)

def delete_keys_from_dict(dictionary, keys):
    result = deepcopy(dictionary)
    for key in keys:
        del result[key]
    return result

def get_recommendations(recomms_stats, total_stats, recom_type, reason):
    recoms = []
    for object_id in recomms_stats:
        if object_id in total_stats:
            recomms_stats[object_id]["tvt"] = total_stats[object_id]["tvt"]
        else:
            recomms_stats[object_id]["tvt"] = 0
    recomms_stats_sorted = sorted(recomms_stats.items(), key=lambda x : [-x[1]["count"], -x[1]["tvt"]])
    for elem in recomms_stats_sorted:
        info = delete_keys_from_dict(elem[1], ['tvt', 'count'])
        info["reason"] = reason
        recoms.append({"info" : info, "type" : recom_type, "from_default" : False})
    return recoms

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--date', type=str, required=True)
    parser.add_argument('--days_count_for_film_recommend', type=int, required=True)
    parser.add_argument('--days_count_for_cold_start', type=int, required=True)
    parser.add_argument('--total_tv_online_stats', type=str, required=True)
    parser.add_argument('--output_table', type=str, required=True)
    args = parser.parse_args()

    end_date = dt.strptime(args.date, "%Y-%m-%d")

    cluster = clusters.yt.Hahn().env(parallel_operations_limit=10,
                                     yt_spec_defaults=dict(
                                         pool_trees=["physical"],
                                         tentative_pool_trees=["cloud"]
                                     ),
                                     templates=dict(
                                         tmp_root=TMP_HITMAN_TV_ONLINE_RECOMMENDATIONS,
                                         title='GetRecommendationsForUids'
                                     ))

    total_tv_online_stats = args.total_tv_online_stats

    stat_fields = ['web_requests_stats', 'video_requests_stats', 'vitrine_stats',
                       'tv_online_stats', 'push_stats', 'urls', 'titles', 'views']

    job = cluster.job()

    stats = job.table(STATS_PREFIX + AGGREGATED_STATS_SUFFIX) \
               .groupby('uid') \
               .aggregate(memory_limit=OP_MEMORY_LIMIT, **{x: na.any(x) for x in stat_fields})

    search_objects = stats.map(get_search_watched_objects, memory_limit=OP_MEMORY_LIMIT)
    vitrine_stats = stats.map(get_vitrine_stats, memory_limit=OP_MEMORY_LIMIT)

    desktop_browser_bloggers = stats.map(get_browser_watched_bloggers, memory_limit=OP_MEMORY_LIMIT)

    tv_online_objects = stats.map(get_tv_online_watched_objects, memory_limit=OP_MEMORY_LIMIT)

    tv_online_channels = stats.map(get_tv_online_watched_channels, memory_limit=OP_MEMORY_LIMIT)

    objects = job.concat(search_objects, tv_online_objects) \
                 .groupby('uid', 'object_id') \
                 .reduce(aggregate_series_info)

    ontodb_base = job.table(ONTODB_BASE).project('key',
                                                 genre=ne.custom(get_genre, 'value'),
                                                 title=ne.custom(get_title, 'value'),
                                                 object_type=ne.custom(get_type, 'value')) \
                                        .filter(sf.custom(lambda x, y, z : x and y and z, 'genre', 'title', 'object_type'))

    ontodb_vh_associations = job.table(ONTODB_ASSOC_PATH).map(get_vh_associations, memory_limit=OP_MEMORY_LIMIT)

    objects_with_oo_joined = ontodb_base.join(ontodb_vh_associations, by='key', type='left').join(objects, by_left='key', by_right='object_id')

    genres_stat = objects_with_oo_joined.groupby('uid').reduce(get_genres_stats)

    PERCENTAGE_EXISTING_TABLES = 0.5

    tv_online_stats_tables_paths = [TV_ONLINE_STATS_PREFIX + dt.strftime(end_date - timedelta(delta), "%Y-%m-%d") for delta in range(args.days_count_for_cold_start)]
    tv_online_stats_to_concat_for_channels = [job.table(table_path) for table_path in get_tables_paths_ignore_missing(
        cluster, tv_online_stats_tables_paths, PERCENTAGE_EXISTING_TABLES, IgnorePolicy.PERCENT_OF_ALL)]

    job.concat(*tv_online_stats_to_concat_for_channels) \
       .groupby('uid') \
       .reduce(aggregate_tv_online_stats, files=nfi_common) \
       .map(get_total_tv_online_stats, memory_limit=OP_MEMORY_LIMIT) \
       .groupby('content_type', 'id') \
       .aggregate(info=na.any('info'),
                  tvt=na.sum('tvt')) \
       .sort('content_type') \
       .put(total_tv_online_stats)

    tv_online_bloggers = stats.map(get_tv_online_watched_bloggers, memory_limit=OP_MEMORY_LIMIT)
    bloggers = job.concat(desktop_browser_bloggers, tv_online_bloggers) \
                  .groupby('uid') \
                  .reduce(aggregate_blogggers_info) \
                  .map(get_bloggers_recommendations, memory_limit=OP_MEMORY_LIMIT)

    job.concat(objects_with_oo_joined, tv_online_channels, genres_stat, vitrine_stats, bloggers) \
       .groupby('uid') \
       .reduce(get_films_and_channels_recommendations, memory_limit=OP_MEMORY_LIMIT) \
       .sort('uid') \
       .put(args.output_table)
    job.run()

if __name__ == '__main__':
    main()
