#-*- coding: UTF-8 -*-
import nile
import argparse
import time
from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)
from qb2.api.v1 import (
    extractors as se,
    filters as sf
)
from copy import deepcopy
import uatraits
import urllib
from datetime import datetime as dt, timedelta
import os
import sys
import codecs
from random import random
import hashlib
import urllib2
import json

SEARCH_STATS_PREFIX = "//home/ether_prod/pushes/stats_for_pushes/web_video_"
TV_ONLINE_STATS_PREFIX = "//home/ether_prod/pushes/stats_for_pushes/tv_online_"

def update_event_stats(stats_to_update, requests_stats, stats=["timestamps", "watches", "series"], dict_stats=[]):
    for stat in stats:
        for object_id in requests_stats:
            if object_id in stats_to_update:
                if stat == "teams_stats":
                    update_event_stats(stats_to_update[object_id]["teams_stats"], requests_stats[object_id]["teams_stats"], ["matched_titles", "website_visits", "related_sites_visits"])
                    continue
                elif stat == "shown_websites":
                    for website in requests_stats[object_id][stat]:
                        stats_to_update[object_id][stat][website] = stats_to_update[object_id][stat].get(website, 0) + requests_stats[object_id][stat][website]
                    continue
                stats_to_update[object_id][stat] += requests_stats[object_id][stat]
            else:
                stats_to_update[object_id] = {}
                for stat in stats:
                    stats_to_update[object_id][stat] = requests_stats[object_id][stat]

def aggregate_search_stats(groups):
    for key, recs in groups:
        uid = key["uid"]
        if uid.startswith('y'):
            uid = uid[1:]
        elif uid.startswith('uu/'):
            uid = uid[3:]

        browser = "unknown"
        os_family = "unknown"
        special_events_stats = {}
        sport_leagues_stats = {}
        video_requests_stats = {}
        web_requests_stats = {}
        for rec in recs:
            browser = rec["browser"]
            os_family = rec["os_family"]
            update_event_stats(special_events_stats, rec["special_events_stats"], ["requests"])
            update_event_stats(sport_leagues_stats, rec["sport_leagues_stats"], ["requests", "shown_websites"])
            update_event_stats(video_requests_stats, rec["video_requests_stats"], ["timestamps", "requests", "watches", "series"])
            update_event_stats(web_requests_stats, rec["web_requests_stats"], ["timestamps", "requests", "series"])
        yield Record(uid=uid, os_family=os_family, browser=browser,
                     special_events_stats=special_events_stats,
                     sport_leagues_stats=sport_leagues_stats,
                     video_requests_stats=video_requests_stats,
                     web_requests_stats=web_requests_stats)

def aggregate_tv_online_stats(groups):
    for key, recs in groups:
        uid = key["uid"]
        tv_online_stats = {}
        for rec in recs:
            for object_id in rec["tv_online_stats"]:
                if object_id in tv_online_stats:
                    tv_online_stats[object_id]["tvt"] += rec["tv_online_stats"][object_id]["tvt"]
                    tv_online_stats[object_id]["timestamp"] = max(tv_online_stats[object_id]["timestamp"], rec["tv_online_stats"][object_id]["timestamp"])
                else:
                    tv_online_stats[object_id] = {"computed_channel" : rec["tv_online_stats"][object_id]["computed_channel"],
                                                  "computed_program" : rec["tv_online_stats"][object_id]["computed_program"],
                                                  "tvt" : rec["tv_online_stats"][object_id]["tvt"],
                                                  "timestamp" : rec["tv_online_stats"][object_id]["timestamp"]}
        yield Record(uid=uid, tv_online_stats=tv_online_stats)

def have_season_episode(serial_info, season, episode):
    for episode_info in serial_info["episodes"]:
        if episode_info["episode"] == episode and episode_info["season"] == season:
            return True, episode_info["uuid"]
    return False, None

class get_next_episode_for_uids_by_search_stats(object):
    def __init__(self, min_watched_series_count, vh_serial_info):
        self.min_watched_series_count = min_watched_series_count
        self.vh_serial_info = vh_serial_info
    def __call__(self, recs):
        for rec in recs:
            to_recommend = {}
            for object_id in rec["video_requests_stats"]:
                series = [[int(x.split('_')[1]), int(x.split('_')[3])] for x in rec["video_requests_stats"][object_id]["series"]]
                watches = rec["video_requests_stats"][object_id]["watches"]
                if object_id in self.vh_serial_info and len(series) >= self.min_watched_series_count:
                    last_seria = sorted(series)[-1]
                    if last_seria[0] == 0 or last_seria[1] == 0:
                        continue
                    have_next_seria, uuid = have_season_episode(self.vh_serial_info[object_id],
                                                                last_seria[0],
                                                                last_seria[1] + 1)
                    if have_next_seria:
                        to_recommend[object_id] = {"uuid" : uuid,
                                                   "episode" : last_seria[1] + 1,
                                                   "season" : last_seria[0],
                                                   "series" : self.vh_serial_info[object_id]["series_name"],
                                                   "watches" : len(series)}
            if len(to_recommend) > 0:
                max_watches = 0
                best_uuid = None
                best_episode = None
                best_season = None
                best_series = None
                best_object_id = None
                for object_id in to_recommend:
                    if to_recommend[object_id]["watches"] > max_watches:
                        best_uuid = to_recommend[object_id]["uuid"]
                        best_episode = to_recommend[object_id]["episode"]
                        best_season = to_recommend[object_id]["season"]
                        best_series = to_recommend[object_id]["series"]
                        best_object_id = object_id
                yield Record(uid=rec["uid"],
                             stream_id=best_uuid,
                             series=best_series,
                             episode=best_episode,
                             season=best_season,
                             object_id=best_object_id)

class get_next_episode_for_uids_by_tv_online_stats(object):
    def __init__(self, min_watched_series_count, vh_serial_info):
        self.min_watched_series_count = min_watched_series_count
        self.vh_serial_info = vh_serial_info
    def __call__(self, recs):
        serial_info_by_uuid = {}
        for object_id in self.vh_serial_info:
            for episode_info in self.vh_serial_info[object_id]["episodes"]:
                serial_info_by_uuid[episode_info["uuid"]] = {"object_id" : object_id,
                                                             "season" : episode_info["season"],
                                                             "episode" : episode_info["episode"]}
        for rec in recs:
            watched_serials = {}
            for content_id in rec["tv_online_stats"]:
                if content_id in serial_info_by_uuid and rec["tv_online_stats"][content_id]["tvt"] >= 300:
                    object_id = serial_info_by_uuid[content_id]["object_id"]
                    if object_id in watched_serials:
                        watched_serials[object_id]["watched_episodes"].append(serial_info_by_uuid[content_id])
                    else:
                        watched_serials[object_id] = {"watched_episodes" : [serial_info_by_uuid[content_id]]}
            to_recommend = {}
            for object_id in watched_serials:
                episodes = watched_serials[object_id]["watched_episodes"]
                if len(episodes) > self.min_watched_series_count:
                    last_seria = sorted(episodes, key=lambda x : [int(x["season"]), int(x["episode"])])[-1]
                    if last_seria["season"] == 0 or last_seria["episode"] == 0:
                        continue
                    have_next_seria, uuid = have_season_episode(self.vh_serial_info[object_id],
                                                                last_seria["season"],
                                                                last_seria["episode"] + 1)
                    if have_next_seria:
                        to_recommend[object_id] = {"uuid" : uuid,
                                                   "episode" : last_seria["season"],
                                                   "season" : last_seria["episode"] + 1,
                                                   "series" : self.vh_serial_info[object_id]["series_name"],
                                                   "watches" : len(episodes)}
            if len(to_recommend) > 0:
                max_watches = 0
                best_uuid = None
                best_episode = None
                best_season = None
                best_series = None
                best_object_id = None
                for object_id in to_recommend:
                    if to_recommend[object_id]["watches"] > max_watches:
                        best_uuid = to_recommend[object_id]["uuid"]
                        best_episode = to_recommend[object_id]["episode"]
                        best_season = to_recommend[object_id]["season"]
                        best_series = to_recommend[object_id]["series"]
                        best_object_id = object_id
                yield Record(uid=rec["uid"],
                             stream_id=best_uuid,
                             series=best_series,
                             episode=best_episode,
                             season=best_season,
                             object_id=best_object_id)

def get_thumb(json_str):
    images = json.loads(json_str).get("Image", [])
    thumb_locale = None
    thumb_other = None
    for x in images:
        if x.get("avatar_type", "") == "vertical_film":
            if "ru" in x.get("RelevLocale", []):
                thumb_locale = x["mds_avatar_id"]
                break
            else:
                thumb_other = x["mds_avatar_id"]
    thumb_id = thumb_locale or thumb_other or ""
    if not thumb_id:
        return ""
    return "https://avatars.mds.yandex.net/get-entity_search/{0}/S120x120Top".format(thumb_id)

VIDEO_HOSTING_EPISODES = "//home/video-hosting/ya-video/episodes"
ONTODB_BASE = "//home/dict/ontodb/ver/main/production/all_cards_final"

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--date', type=str, required=True)
    parser.add_argument('--days_count', type=int, required=True)
    parser.add_argument('--min_watched_search_series_count', type=int, default=1e10)
    parser.add_argument('--min_watched_tv_online_series_count', type=int, default=1e10)
    parser.add_argument('--output_table')
    args = parser.parse_args()

    end_date = dt.strptime(args.date, "%Y-%m-%d")

    cluster = clusters.yt.Hahn().env(parallel_operations_limit=10,
                                     yt_spec_defaults=dict(
                                         pool_trees=["physical"],
                                         tentative_pool_trees=["cloud"]
                                     ),
                                     templates=dict(
                                         tmp_root='//tmp',
                                         title='GetNextSeries'
                                     ))

    vh_serial_info = {}
    for rec in cluster.driver.read(VIDEO_HOSTING_EPISODES):
        if rec["onto_id"] and rec["series_id"] and rec["content_type"] == "ott-episode":
            episode_info = {"season" : rec["season_number"],
                            "episode" : rec["episode_number"],
                            "name" : rec["full_title"],
                            "uuid" : rec["uuid"]}
            if rec["onto_id"] in vh_serial_info:
                vh_serial_info[rec["onto_id"]]["episodes"].append(episode_info)
            else:
                vh_serial_info[rec["onto_id"]] = {"series_name" : rec["series_name"],
                                                 "episodes" : [episode_info]}

    job = cluster.job()

    search_stats_to_concat = [job.table(SEARCH_STATS_PREFIX +  dt.strftime(end_date - timedelta(i), "%Y-%m-%d")) for i in range(args.days_count)]
    tv_online_stats_to_concat = [job.table(TV_ONLINE_STATS_PREFIX +  dt.strftime(end_date - timedelta(i), "%Y-%m-%d")) for i in range(args.days_count)]

    search = job.concat(*search_stats_to_concat) \
       .groupby('uid') \
       .reduce(aggregate_search_stats) \
       .map(get_next_episode_for_uids_by_search_stats(args.min_watched_search_series_count, vh_serial_info)) \

    tv_online = job.concat(*tv_online_stats_to_concat) \
       .groupby('uid') \
       .reduce(aggregate_tv_online_stats) \
       .map(get_next_episode_for_uids_by_tv_online_stats(args.min_watched_tv_online_series_count, vh_serial_info))

    job.concat(search, tv_online) \
       .join(job.table(ONTODB_BASE), by_left='object_id', by_right='key') \
       .sort('uid') \
       .project('uid', 'stream_id', 'series', 'episode', 'season', thumb=ne.custom(get_thumb, 'value')) \
       .put(args.output_table)

    job.run()

if __name__ == "__main__":
    main()
