#-*- coding: UTF-8 -*-
import nile
import argparse
import codecs
from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    files as nfi,
    Record
)
from qb2.api.v1 import (
    extractors as se,
    filters as sf
)
from copy import deepcopy
import urllib
from datetime import datetime, timedelta
import os
import json
import sys
from time import time, sleep, mktime
import requests
from random import shuffle
from collections import defaultdict

def get_channel_programs(parent_id, date_from, date_to):
    url = 'https://frontend.vh.yandex.ru/episodes?parent_id=%s&end_date__from=%s&start_date__to=%s&geo_id=213&locale=ru' % (
        parent_id, date_from, date_to)

    resp = retry_request('get', kwargs=dict(url=url))
    data = json.loads(resp.text)

    res = data.get('set')

    if not len(res):
        return {}

    return res

def retry_request(request_type, args=None, kwargs=None):
    if not args:
        args = []
    if not kwargs:
        kwargs = {}
    req = None
    retries = 0
    while ((req is None or req.status_code >= 300) and retries < 10):
        try:
            req = getattr(requests, request_type)(*args, **kwargs)
        except Exception as e:
            sleep(60)
            retries += 1
    return req

nfi_common = [
    nfi.LocalFile('common.py')
]

ACTUAL_URLS = "//home/video-hosting/ya-video/actual_urls"
ACTUAL_URLS_FULL = "//home/video-hosting/ya-video/actual_urls.full"
CONTENT_GROUP = "//home/video-hosting/base/ContentGroup"
CONTENT_RESOURCE = "//home/video-hosting/base/ContentResource"
CONTENT_VERSION = "//home/video-hosting/base/ContentVersion"
CONTENT_VERSION_GROUP = "//home/video-hosting/base/ContentVersionGroup"
OUTPUT_STREAM = "//home/video-hosting/base/OutputStream"
STRM_META_PATH = "//home/videolog/strm_meta/iron_branch/concat"
CONTENT_STATS_PATH = "//home/videolog/AUTO_CHANNELS/content_stats"
TV_ONLINE_STATS_PREFIX = "//home/ether_prod/pushes/stats_for_pushes/tv_online_"

MIN_BITRATE = 1000000 #min quality 480p
MIN_CONTENT_DURATION = 30

def get_stats_tables(cluster, job, stats_prefix, date, days):
    stats_tables = []

    for i in range(days):
        tablePath = stats_prefix + datetime.strftime(date - timedelta(i), "%Y-%m-%d")
        if cluster.driver.exists(tablePath):
            stats_tables.append(job.table(tablePath))

    return stats_tables

def make_schedule_item(rec, content_group, resources, stream):
    item = rec.to_dict()
    vh_url = "frontend.vh.yandex.ru/player/{}".format(content_group["ContentGroupID"])

    import_thumbnail = resources.get('import_thumbnail')
    thumbnail = resources.get('thumbnail')
    thumbnail_info = resources.get('thumbnail_info')
    master_playlist_uri = stream.get('master_playlist_uri') if stream is not None else None
    content_type_id = content_group.get('ContentTypeID')
    content_uuid = content_group.get('UUID')
    name = content_group.get('Name')
    page_id = resources.get('page_id')
    restriction_age = resources.get('restriction_age')
    comment = content_group.get('Comment')
    tracking_events = None
    actors = None
    average_duration = int(resources.get('duration',0))
    countries = resources.get('program_countries')
    cover = None
    directors = None
    genres = resources.get('onto_genre')
    imdb_rating = None
    import_poster = resources.get('import_onto_poster')
    kinopoisk_id = None
    kp_rating = resources.get('onto_rating_kp')
    median_color = None
    monetization_model = None
    original_name = None
    playlist_generation = stream.get('playlist_generation') if stream is not None else None
    prime_rating = None
    producers = None
    release_date = resources.get('release_date')
    released = None
    stream_type = stream.get('stream_type') if stream is not None else None
    technical_name = None
    tv_series_episode_number = None
    tv_series_season_number = None
    video_platform_duration = None
    year = None
    years = None
    faas = resources.get('faas_answer')
    onto_genre = resources.get('onto_genre','-')
    onto_id = resources.get('onto_id','-')
    onto_category = resources.get('onto_category','-')

    item.update({
        'resources': resources,
        'onto_genre': onto_genre,
        'onto_id': onto_id,
        'onto_category': onto_category,
        'faas': faas,
        'importThumbnail': import_thumbnail,
        'thumbnail': thumbnail,
        'thumbnailInfo': thumbnail_info,
        'vh_url': vh_url,
        'name': name,
        'masterPlaylistUri': master_playlist_uri,
        'contentTypeId': content_type_id,
        'contentUuid': content_uuid,
        'pageId': page_id,
        'restrictionAge': restriction_age,
        'comment': comment,
        'trackingEvents': tracking_events,
        'actors': actors,
        'averageDuration': average_duration,
        'countries': countries,
        'cover': cover,
        'directors': directors,
        'genres': genres,
        'imdbRating': imdb_rating,
        'importPoster': import_poster,
        'kinopoiskId': kinopoisk_id,
        'kpRating': kp_rating,
        'medianColor': median_color,
        'monetizationModel': monetization_model,
        'originalName': original_name,
        'playlistGeneration': playlist_generation,
        'primeRating': prime_rating,
        'producers': producers,
        'releaseDate': release_date,
        'released': released,
        'streamType': stream_type,
        'technicalName':  technical_name,
        'tvSeriesEpisodeNumber': tv_series_episode_number,
        'tvSeriesSeasonNumber': tv_series_season_number,
        'videoPlatformDuration': video_platform_duration,
        'year': year,
        'years': years,
        'yatvChannelPageId': page_id,
        'ContentGroupID': content_group['ContentGroupID']
    })

    return item

class map_au(object):
    def __init__(self, stream_data):
        self.stream_data = stream_data
    def __call__(self, recs):
        for rec in recs:
            r = rec.Resources
            vh_url = rec.GroupingUrl

            import_thumbnail = r.get('import_thumbnail')
            thumbnail = r.get('thumbnail')
            thumbnail_info = r.get('thumbnail_info')
            try:
                master_playlist_uri = rec.OutputStreamData['Data']
            except:
                master_playlist_uri = rec.OutputStreamData

            content_type_id = rec.ContentTypeID
            content_uuid = rec.UUID
            name = rec.Name
            page_id = r.get('page_id')
            restriction_age = r.get('restriction_age')
            comment = rec.Comment
            tracking_events = None
            actors = None
            average_duration = int(r.get('duration',0))
            countries = r.get('program_countries')
            cover = None
            directors = None
            genres = r.get('onto_genre')
            imdb_rating = None
            import_poster = r.get('import_onto_poster')
            kinopoisk_id = None
            kp_rating = r.get('onto_rating_kp')
            median_color = None
            monetization_model = None
            original_name = None
            if rec.UUID in self.stream_data and master_playlist_uri in self.stream_data.get(rec.UUID):
                playlist_generation = self.stream_data.get(rec.UUID).get(master_playlist_uri).get('playlist_generation')
                stream_type = self.stream_data.get(rec.UUID).get(master_playlist_uri).get('stream_type')
            else:
                playlist_generation = None
                stream_type = None
            prime_rating = None
            producers = None
            release_date = r.get('release_date')
            released = None
            technical_name = None
            tv_series_episode_number = None
            tv_series_season_number = None
            video_platform_duration = None
            year = None
            years = None
            faas = r.get('faas_answer')
            onto_genre = r.get('onto_genre','-')
            onto_id = r.get('onto_id','-')
            onto_category = r.get('onto_category','-')


            yield Record(resources=r,
                        onto_genre=onto_genre,onto_id=onto_id,onto_category=onto_category,
                        faas=faas,
                        importThumbnail=import_thumbnail,
                        thumbnail=thumbnail,
                        thumbnailInfo=thumbnail_info,
                        vh_url=vh_url,
                        name=name,
                        masterPlaylistUri=master_playlist_uri,
                        contentTypeId=content_type_id,
                        contentUuid=content_uuid,
                        pageId=page_id,
                        restrictionAge=restriction_age,
                        comment=comment,
                        trackingEvents=tracking_events,
                        actors=actors,
                        averageDuration=average_duration,
                        countries=countries,
                        cover=cover,
                        directors=directors,
                        genres=genres,
                        imdbRating=imdb_rating,
                        importPoster=import_poster,
                        kinopoiskId=kinopoisk_id,
                        kpRating=kp_rating,
                        medianColor=median_color,
                        monetizationModel=monetization_model,
                        originalName=original_name,
                        playlistGeneration=playlist_generation,
                        primeRating=prime_rating,
                        producers=producers,
                        releaseDate=release_date,
                        released=released,
                        streamType=stream_type,
                        technicalName = technical_name,
                        tvSeriesEpisodeNumber=tv_series_episode_number,
                        tvSeriesSeasonNumber=tv_series_season_number,
                        videoPlatformDuration=video_platform_duration,
                        year=year,
                        years=years,
                        yatvChannelPageId=page_id,
                        ContentGroupID=rec.ContentGroupID
                    )

def has_intersection_with_live(start_ts, end_ts, lives_timestamps):
    for live in lives_timestamps:
        if (start_ts <= live[0] and end_ts > live[0]) or (live[0] <= start_ts and live[1] > start_ts):
            return True

def find_next_schedule_start_ts(current_ts, lives_timestamps):
    start_ts = 1e20
    for live_timestamps in lives_timestamps:
        if current_ts <= live_timestamps[0] and live_timestamps[0] < start_ts:
            start_ts = live_timestamps[1]

    return start_ts

def make_schedule(possible_content_ids_stats, start_ts,
                  end_ts, repeatability, with_ads,
                  ads_period, lives_timestamps=[]):
    possible_content_ids = sorted(possible_content_ids_stats.items(), key=lambda x : -x[1]["views"])
    best_content_ids = []
    current_duration = 0
    for elem in possible_content_ids:
        best_content_ids.append(elem)
        current_duration += elem[1]["duration"]
        if current_duration > repeatability:
            break

    rasp = []
    current_ts = start_ts
    current_duration = 0
    while current_ts < end_ts:
        shuffle(best_content_ids)
        while len(rasp) > 0 and best_content_ids[0] == rasp[-1]:
            shuffle(best_content_ids)

        all_elems_intersects_with_live = True
        for elem in best_content_ids:
            if has_intersection_with_live(current_ts, current_ts + elem[1]["duration"], lives_timestamps):
                continue
            all_elems_intersects_with_live = False

            with_ads = (current_duration + elem[1]["duration"]) / ads_period > current_duration / ads_period
            rasp.append(Record(begin=current_ts,
                               end=current_ts + elem[1]["duration"],
                               UUID=elem[0],
                               with_ads=with_ads,
                               dt=datetime.fromtimestamp(current_ts).strftime('%Y-%m-%d %H:%M:%S')))
            current_ts += elem[1]["duration"]
            current_duration += elem[1]["duration"]
            if current_ts >= end_ts:
                break

        if all_elems_intersects_with_live:
            current_ts = find_next_schedule_start_ts(current_ts, lives_timestamps)

    return rasp, current_ts

def make_schedule_with_live(content_ids, start_ts,
                            ads_period, live_start, live_end):
    rasp = []

    current_ts = start_ts
    lives_timestamps = [[live_start, live_end]]

    print "new schedule part start time:", current_ts

    total_duration = 0
    for elem in content_ids:
        total_duration += elem[1]["duration"]

    if has_intersection_with_live(current_ts, current_ts + total_duration, lives_timestamps):
        print "have intersection with live"
        content_ids_before_live = []
        duration = 0
        while True:
            duration += content_ids[0][1]["duration"]
            content_ids_before_live.append(content_ids[0])
            del content_ids[0]
            if current_ts + duration >= live_start:
                break

        cut_time = duration - (live_start - current_ts)
        first_elem_duration = content_ids_before_live[0][1]["duration"] - cut_time
        current_duration = 0
        with_ads = (current_duration + first_elem_duration) / ads_period > current_duration / ads_period
        rasp.append(Record(
            begin=current_ts,
            end=current_ts + first_elem_duration,
            UUID=content_ids_before_live[0][0],
            with_ads=with_ads,
            dt=datetime.fromtimestamp(current_ts).strftime('%Y-%m-%d %H:%M:%S')
        ))
        current_ts += first_elem_duration
        current_duration += first_elem_duration
        del content_ids_before_live[0]

        for elem in content_ids_before_live:
            with_ads = (current_duration + elem[1]["duration"]) / ads_period > current_duration / ads_period
            rasp.append(Record(
                begin=current_ts,
                end=current_ts + elem[1]["duration"],
                UUID=elem[0],
                with_ads=with_ads,
                dt=datetime.fromtimestamp(current_ts).strftime('%Y-%m-%d %H:%M:%S')
            ))
            current_ts += elem[1]["duration"]
            current_duration += elem[1]["duration"]

        current_ts = live_end
    else:
        print "no intersection with live"

    current_duration = 0
    for elem in content_ids:
        with_ads = (current_duration + elem[1]["duration"]) / ads_period > current_duration / ads_period
        rasp.append(Record(
            begin=current_ts,
            end=current_ts + elem[1]["duration"],
            UUID=elem[0],
            with_ads=with_ads,
            dt=datetime.fromtimestamp(current_ts).strftime('%Y-%m-%d %H:%M:%S')
        ))
        current_ts += elem[1]["duration"]
        current_duration += elem[1]["duration"]

    print "new schedule part end time:", current_ts

    return rasp, current_ts

def make_repeating_schedule(content_ids, repeating_schedule, ads_period):
    rasp = []

    schedule_size = len(repeating_schedule)

    print "building repeating schedule"

    current_duration = 0
    schedule_index = 0
    prevEnd = 0
    for elem in content_ids:
        if prevEnd > repeating_schedule[schedule_index]:
            raise Exception("repating schedule items intersect")

        with_ads = (current_duration + elem[1]["duration"]) / ads_period > current_duration / ads_period
        rasp.append(Record(
            begin=repeating_schedule[schedule_index],
            end=repeating_schedule[schedule_index] + elem[1]["duration"],
            UUID=elem[0],
            with_ads=with_ads,
            dt=datetime.fromtimestamp(repeating_schedule[schedule_index]).strftime('%Y-%m-%d %H:%M:%S')
        ))
        current_duration += elem[1]["duration"]
        prevEnd = repeating_schedule[schedule_index] + elem[1]["duration"]

        repeating_schedule[schedule_index] += 24 * 60 * 60
        schedule_index += 1
        if schedule_index == schedule_size:
            schedule_index = 0

    print "repeating schedule end time:", prevEnd

    return rasp, prevEnd

def filter_bitrate(recs):
    for rec in recs:
        faas = json.loads(rec["Resources"].get("faas_answer", "{}"))
        if faas.get("bitrate"):
            bitrate = int(faas["bitrate"])
        else:
            bitrate = int(faas.get("bit_rate", 0))
        if bitrate > MIN_BITRATE:
            yield rec

class make_event_id(object):
    def __init__(self, channel_id):
        self.channel_id = channel_id
    def __call__(self, recs):
        for rec in recs:
            to_dict = rec.to_dict()
            to_dict["eventId"] = "{}0{}0{}".format(self.channel_id, rec["begin"], rec["end"])
            yield Record(**to_dict)

class filter_by_tags(object):
    def __init__(self, tags_white_list):
        self.tags_white_list = tags_white_list
    def __call__(self, recs):
        for rec in recs:
            need_yield = False
            for tag in self.tags_white_list:
                if '[' + tag + ']' in rec["detailed_tags"]:
                    need_yield = True
            if need_yield:
                yield rec

class filter_by_uuids(object):
    def __init__(self, uuids):
        self.uuids = uuids
    def __call__(self, recs):
        for rec in recs:
            if rec["UUID"] in self.uuids:
                yield rec

class filter_active(object):
    def __call__(self, recs):
        for rec in recs:
            if rec["Active"] and rec["Active"] == 1:
                yield rec

def get_content_stats(recs):
    for rec in recs:
        for content_id in rec["tv_online_stats"]:
            yield Record(content_id=content_id, views=int(rec["tv_online_stats"][content_id]["tvt"] > 0))

def calc_content_stats(cluster, end_date, days_to_calc_stats):
    stats_end_date = cluster.driver.client.get_attribute(CONTENT_STATS_PATH, '_end_date', '2018-01-01')
    stats_end_date = datetime.strptime(stats_end_date, "%Y-%m-%d")
    if end_date > stats_end_date:
        job = cluster.job()
        tv_online_stats = get_stats_tables(cluster, job, TV_ONLINE_STATS_PREFIX, end_date, days_to_calc_stats)
        content_stats = job.concat(*tv_online_stats) \
                           .map(get_content_stats, files=nfi_common) \
                           .groupby('content_id') \
                           .aggregate(views=na.sum('views')) \
                           .join(job.table(STRM_META_PATH), by_left='content_id', by_right='JoinKey') \
                           .project('views', 'UUID', 'chain') \
                           .groupby('UUID') \
                           .aggregate(views=na.sum('views'),
                                      chain=na.any('chain')) \
                           .sort('views') \
                           .put(CONTENT_STATS_PATH)
        job.run()
        cluster.driver.client.set_attribute(CONTENT_STATS_PATH, '_end_date', end_date.strftime("%Y-%m-%d"))

def get_old_schedule_part(cluster, old_schedule, old_schedule_part, future_gap, current_ts, schema = []):
    if cluster.driver.exists(old_schedule):
        job = cluster.job()
        job.table(old_schedule) \
           .filter(sf.custom(lambda x : x < current_ts + future_gap, 'begin')) \
           .sort('begin') \
           .put(old_schedule_part)
        job.run()
        for rec in cluster.driver.read(old_schedule_part):
            last_schedule_ts = rec['end']
    else:
        if schema == []:
            cluster.driver.write(old_schedule_part, [])
        else:
            cluster.driver.client.create("table", old_schedule_part, attributes={"schema": schema})
        last_schedule_ts = current_ts
    return last_schedule_ts
