#-*- coding: UTF-8 -*-
from common import *

FUTURE_GAP = 365 * 24 * 60 * 60

RESULT_SCHEMA = [
    {"name": "begin", "required": False, "type": "int64"},
    {"name": "ContentGroupID", "required": False, "type": "uint64"},
    {"name": "UUID", "required": False, "type": "string"},
    {"name": "actors", "required": False, "type": "string"},
    {"name": "averageDuration", "required": False, "type": "int64"},
    {"name": "comment", "required": False, "type": "string"},
    {"name": "contentTypeId", "required": False, "type": "uint64"},
    {"name": "contentUuid", "required": False, "type": "string"},
    {"name": "countries", "required": False, "type": "string"},
    {"name": "cover", "required": False, "type": "string"},
    {"name": "directors", "required": False, "type": "string"},
    {"name": "dt", "required": False, "type": "string"},
    {"name": "end", "required": False, "type": "int64"},
    {"name": "eventId", "required": False, "type": "string"},
    {"name": "faas", "required": False, "type": "string"},
    {"name": "genres", "required": False, "type": "string"},
    {"name": "imdbRating", "required": False, "type": "double"},
    {"name": "importPoster", "required": False, "type": "string"},
    {"name": "importThumbnail", "required": False, "type": "string"},
    {"name": "kinopoiskId", "required": False, "type": "uint64"},
    {"name": "kpRating", "required": False, "type": "string"},
    {"name": "masterPlaylistUri", "required": False, "type": "string"},
    {"name": "medianColor", "required": False, "type": "string"},
    {"name": "monetizationModel", "required": False, "type": "string"},
    {"name": "name", "required": False, "type": "string"},
    {"name": "onto_category", "required": False, "type": "string"},
    {"name": "onto_genre", "required": False, "type": "string"},
    {"name": "onto_id", "required": False, "type": "string"},
    {"name": "originalName", "required": False, "type": "string"},
    {"name": "pageId", "required": False, "type": "string"},
    {"name": "playlistGeneration", "required": False, "type": "string"},
    {"name": "primeRating", "required": False, "type": "double"},
    {"name": "producers", "required": False, "type": "string"},
    {"name": "releaseDate", "required": False, "type": "string"},
    {"name": "released", "required": False, "type": "string"},
    {"name": "resources", "required": False, "type": "any"},
    {"name": "restrictionAge", "required": False, "type": "string"},
    {"name": "streamType", "required": False, "type": "string"},
    {"name": "technicalName", "required": False, "type": "string"},
    {"name": "thumbnail", "required": False, "type": "string"},
    {"name": "thumbnailInfo", "required": False, "type": "string"},
    {"name": "trackingEvents", "required": False, "type": "string"},
    {"name": "tvSeriesEpisodeNumber", "required": False, "type": "string"},
    {"name": "tvSeriesSeasonNumber", "required": False, "type": "string"},
    {"name": "vh_url", "required": False, "type": "string"},
    {"name": "videoPlatformDuration", "required": False, "type": "uint64"},
    {"name": "with_ads", "required": False, "type": "boolean"},
    {"name": "withoutTimeline", "required": False, "type": "string"},
    {"name": "yatvChannelPageId", "required": False, "type": "string"},
    {"name": "year", "required": False, "type": "string"},
    {"name": "years", "required": False, "type": "string"}
]

class filter_vods(object):
    def __call__(self, recs):
        for rec in recs:
            if rec["ContentTypeID"] == 45:
                yield rec

class filter_by_content_resource_tags(object):
    def __init__(self, tags_white_list):
        self.tags_white_list = tags_white_list
    def __call__(self, recs):
        for rec in recs:
            if rec['ResourceName'] != 'detailed_tags':
                continue
            detailed_tags = json.loads(rec['Value'])
            for tag in self.tags_white_list:
                if tag in detailed_tags:
                    yield rec
                    break

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--table', type=str, required=True)
    parser.add_argument('--days_to_calc_stats', type=int, required=True)
    parser.add_argument('--channel_id', type=str, required=True)
    parser.add_argument('--ads_period', type=int, required=True)
    parser.add_argument('--content_uuid_table', type=str, default=None)
    parser.add_argument('--tags', nargs='+', default=None)
    parser.add_argument('--channel_start', type=int, default=0)
    parser.add_argument('--current_start', type=int, default=0)
    parser.add_argument('--repeating_schedule', type=str, default=None)
    parser.add_argument('--repeating_schedule_start_date', type=str, default=None)
    parser.add_argument('--live_start', type=int, required=True)
    parser.add_argument('--live_end', type=int, required=True)
    parser.add_argument('--default_gmt', type=int, default=0)
    parser.add_argument('--current_gmt', type=int, default=0)
    parser.add_argument('--auto_fields', type=str, default=None)
    parser.add_argument('--page_id', type=str, default=None)
    parser.add_argument('--min_content_views_count', type=int, default=None)
    parser.add_argument('--repeatability', type=int, default=None)
    parser.add_argument('--schedule_gap', type=int, default=None)
    parser.add_argument('--with_ads', type=int, default=1)
    args = parser.parse_args()

    shift_time = (args.default_gmt - args.current_gmt) * 60 * 60
    if args.channel_start != 0:
        args.channel_start += shift_time
    if args.current_start != 0:
        args.current_start += shift_time
    args.live_start += shift_time
    args.live_end += shift_time

    if ((args.repeating_schedule is not None and args.repeating_schedule_start_date is None) or
        (args.repeating_schedule is None and args.repeating_schedule_start_date is not None)):
       raise Exception("repeating_schedule and repeating_schedule_start_date are inconsistent")

    repeating_schedule = None
    if args.repeating_schedule is not None:
        repeating_schedule = json.loads(args.repeating_schedule)
        start_date = datetime.strptime(args.repeating_schedule_start_date, '%Y-%m-%d')
        start_date_timestamp = int(mktime(start_date.timetuple()))
        start_date_timestamp += shift_time
        for i in range(0, len(repeating_schedule)):
            repeating_schedule[i] = start_date_timestamp + repeating_schedule[i] * 60 * 60

    if args.auto_fields:
        RESULT_SCHEMA.append({"name": "auto_fields", "required": False, "type": "string"})

    cluster = clusters.yt.Hahn().env(
        parallel_operations_limit=10,
        yt_spec_defaults=dict(
            pool_trees=["physical"],
            tentative_pool_trees=["cloud"]
        ),
        templates=dict(
            tmp_root='//tmp',
            title='GetYandexChannelSchedule'
        )
    )

    uuid_to_resource_table = "//tmp/alex0512/uuid_to_resource_table_" + str(time()) + "_" + str(args.current_gmt)
    uuid_to_stream_table = "//tmp/alex0512/uuid_to_stream_table_" + str(time()) + "_" + str(args.current_gmt)
    content_group_table = "//tmp/alex0512/content_group_table_" + str(time()) + "_" + str(args.current_gmt)

    uuids = []
    if args.content_uuid_table is not None:
        for rec in cluster.driver.read(args.content_uuid_table):
            uuids.append(rec["UUID"])
    elif args.tags is not None:
        uuids_temp_table = "//tmp/alex0512/uuids_temp_table_" + str(time()) + "_" + str(args.current_gmt)
        job = cluster.job()
        job.table(CONTENT_GROUP) \
            .map(filter_vods(), files=nfi_common) \
            .join(job.table(CONTENT_RESOURCE), by='ContentGroupID', type='inner') \
            .map(filter_by_content_resource_tags(args.tags), files=nfi_common) \
            .project('UUID') \
            .put(uuids_temp_table)
        job.run()
        for rec in cluster.driver.read(uuids_temp_table):
            uuids.append(rec["UUID"])
    else:
        raise Exception("no content source for the schedule")

    job = cluster.job()
    content_ids = job.table(CONTENT_GROUP) \
        .map(filter_by_uuids(uuids), files=nfi_common)

    job.table(CONTENT_RESOURCE) \
        .join(content_ids, by='ContentGroupID', type='inner') \
        .project('UUID', 'ResourceName', 'Value') \
        .put(uuid_to_resource_table)

    job.table(CONTENT_VERSION_GROUP) \
        .join(content_ids, by='ContentGroupID', type='inner') \
        .join(job.table(CONTENT_VERSION), by='ContentVersionID', type='inner') \
        .map(filter_active(), files=nfi_common) \
        .join(job.table(OUTPUT_STREAM), by='ContentVersionID', type='inner') \
        .project('UUID', 'Data', 'Options', 'PlaylistGeneration', 'StreamType') \
        .put(uuid_to_stream_table)

    content_ids.put(content_group_table)
    job.run()

    uuid_to_content_group = defaultdict(dict)
    for rec in cluster.driver.read(content_group_table):
        uuid_to_content_group[rec["UUID"]] = rec

    uuid_to_resource = defaultdict(dict)
    for rec in cluster.driver.read(uuid_to_resource_table):
        uuid_to_resource[rec["UUID"]][rec["ResourceName"]] = rec["Value"]

    uuid_to_stream = defaultdict(dict)
    for rec in cluster.driver.read(uuid_to_stream_table):
        if rec['Data'] is not None:
            options = rec['Options']
            if options is not None and options.find('deleted') >= 0:
                continue
        uuid_to_stream[rec["UUID"]] = defaultdict(dict)
        uuid_to_stream[rec["UUID"]]["master_playlist_uri"] = rec['Data']
        uuid_to_stream[rec["UUID"]]["playlist_generation"] = rec['PlaylistGeneration']
        uuid_to_stream[rec["UUID"]]["stream_type"] = rec['StreamType']

    date = datetime.now()
    calc_content_stats(cluster, date, args.days_to_calc_stats)

    views_table = "//tmp/alex0512/views_table_" + str(time()) + "_" + str(args.current_gmt)
    job = cluster.job()
    job.table(CONTENT_STATS_PATH) \
        .map(filter_by_uuids(uuids), files=nfi_common) \
        .project('UUID', 'views') \
        .put(views_table)
    job.run()

    views_dict = defaultdict(dict)
    for rec in cluster.driver.read(views_table):
        views_dict[rec['UUID']] = rec['views'] if rec['views'] is not None else 0

    total_duration = 0
    possible_content_ids_stats = []
    for uuid in uuids:
        if uuid in uuid_to_resource and 'duration' in uuid_to_resource[uuid]:
            duration = int(uuid_to_resource[uuid]['duration'])
            views = views_dict[uuid] if uuid in views_dict else 0
            if args.tags is None or (views > args.min_content_views_count and duration > MIN_CONTENT_DURATION):
                possible_content_ids_stats.append((uuid, {'duration': duration, 'views': views}))
                total_duration += duration

    print "Possible videos count {}. Total content duration {}".format(len(possible_content_ids_stats), total_duration)

    current_ts = int(time())

    old_schedule_part = "//tmp/alex0512/old_schedule_part_" + str(current_ts) + "_" + str(args.current_gmt)
    last_schedule_ts = get_old_schedule_part(cluster, args.table, old_schedule_part, FUTURE_GAP, current_ts, RESULT_SCHEMA)
    if last_schedule_ts == current_ts and args.channel_start != 0:
        last_schedule_ts = args.channel_start

    if args.tags is not None:
        new_schedule_end_ts = current_ts + args.schedule_gap
        possible_content_ids_stats_dict = {}
        for item in possible_content_ids_stats:
            possible_content_ids_stats_dict[item[0]] = item[1]
        new_schedule_part, new_schedule_end_ts = make_schedule(
            possible_content_ids_stats_dict,
            last_schedule_ts,
            new_schedule_end_ts,
            args.repeatability,
            args.with_ads,
            args.ads_period
        )
    elif repeating_schedule is None:
        start_ts = 0
        if args.current_start != 0:
            start_ts = args.current_start
        elif args.channel_start != 0:
            start_ts = last_schedule_ts
        else:
            raise Exception('current_start and channel_start both are 0')

        new_schedule_part, new_schedule_end_ts = make_schedule_with_live(
            possible_content_ids_stats,
            start_ts,
            args.ads_period,
            args.live_start,
            args.live_end
        )
    else:
        new_schedule_part, new_schedule_end_ts = make_repeating_schedule(
            possible_content_ids_stats,
            repeating_schedule,
            args.ads_period
        )

    schedule_items = []
    for rec in new_schedule_part:
        uuid = rec.UUID
        item = make_schedule_item(rec, uuid_to_content_group.get(uuid), uuid_to_resource.get(uuid), uuid_to_stream.get(uuid))
        if args.page_id is not None:
            item["pageId"]=args.page_id
            item["yatvChannelPageId"]=args.page_id
        item["withoutTimeline"]="0"
        item["begin"]=int(rec.begin)
        item["end"]=int(rec.end)
        item["eventId"] = "{}0{}0{}".format(args.channel_id, item["begin"], item["end"])
        if args.auto_fields is not None:
            item["auto_fields"]=args.auto_fields
        schedule_items.append(Record(**item))

    new_schedule_part_table = "//tmp/alex0512/new_schedule_part" + str(current_ts) + "_" + str(args.current_gmt)
    cluster.driver.client.create("table", new_schedule_part_table, attributes={"schema": RESULT_SCHEMA})
    cluster.driver.write(new_schedule_part_table, schedule_items)

    job = cluster.job()
    cluster.driver.client.create("table", new_schedule_part_table + "_merged", attributes={"schema": RESULT_SCHEMA})
    job.concat(job.table(old_schedule_part), job.table(new_schedule_part_table)) \
       .sort('begin') \
       .put(new_schedule_part_table + "_merged")

    job.run()

    if cluster.driver.exists(args.table):
        cluster.driver.remove(args.table)
    cluster.driver.copy(new_schedule_part_table + "_merged", args.table)

    cluster.driver.client.set_attribute(args.table, '_max_ts', new_schedule_end_ts)

if __name__ == '__main__':
    main()
