#-*- coding: UTF-8 -*-
from common import *

FUTURE_GAP = 50 * 60 * 60

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--table', type=str, required=True)
    parser.add_argument('--date', type=str, required=True)
    parser.add_argument('--days_to_calc_stats', type=int, required=True)
    parser.add_argument('--min_content_views_count', type=int, required=True)
    parser.add_argument('--channel_id', type=str, required=True)
    parser.add_argument('--repeatability', type=int, required=True)
    parser.add_argument('--schedule_gap', type=int, required=True)
    parser.add_argument('--with_ads', type=int, required=True)
    parser.add_argument('--ads_period', type=int, required=True)
    parser.add_argument('--tags', nargs='+', required=True)
    args = parser.parse_args()

    cluster = clusters.yt.Hahn().env(parallel_operations_limit=10,
                                     yt_spec_defaults=dict(
                                         pool_trees=["physical"],
                                         tentative_pool_trees=["cloud"]
                                     ),
                                     templates=dict(
                                         tmp_root='//tmp',
                                         title='GetYandexChannelSchedule'
                                     ))

    end_date = datetime.strptime(args.date, "%Y-%m-%d")

    calc_content_stats(cluster, end_date, args.days_to_calc_stats)

    ### Calc possible content ids stats
    possible_content_ids_stats_table = "//tmp/msvvitaly/possible_content_ids_stats_table_" + str(time())
    content_ids_table_schema = [
        {"name": "ContentGroupID", "required": False, "type": "uint64"},
        {"name": "UUID", "required": False, "type": "string"},
        {"name": "ComputedName", "required": False, "type": "string"},
        {"name": "duration", "required": False, "type": "string"},
        {"name": "views", "required": False, "type": "int64"},
    ]
    cluster.driver.client.create("table", possible_content_ids_stats_table, attributes={"schema": content_ids_table_schema})
    job = cluster.job()
    job.table(ACTUAL_URLS).map(filter_bitrate, files=nfi_common) \
                          .map(filter_by_tags(args.tags), files=nfi_common) \
                          .project('UUID', 'ContentGroupID', 'ComputedName', duration=ne.custom(lambda x : x['duration'], 'Resources')) \
                          .join(job.table(CONTENT_STATS_PATH), by='UUID', type='left') \
                          .project('UUID', 'ContentGroupID', 'ComputedName', 'duration', 'views') \
                          .sort('views') \
                          .put(possible_content_ids_stats_table)
    job.run()

    stream_data_table = "//tmp/alex0512/stream_data_table_old_" + str(time())
    stream_data_table_schema = [
        {"name": "UUID", "required": False, "type": "string"},
        {"name": "Data", "required": False, "type": "string"},
        {"name": "Options", "required": False, "type": "string"},
        {"name": "PlaylistGeneration", "required": False, "type": "string"},
        {"name": "StreamType", "required": False, "type": "string"},
        {"name": "UpdateTime", "required": False, "type": "uint64"},
    ]
    cluster.driver.client.create("table", stream_data_table, attributes={"schema": stream_data_table_schema})
    job = cluster.job()
    job.table(CONTENT_VERSION_GROUP) \
        .join(job.table(possible_content_ids_stats_table), by='ContentGroupID', type='inner') \
        .join(job.table(CONTENT_VERSION), by='ContentVersionID', type='inner') \
        .map(filter_active(), files=nfi_common) \
        .join(job.table(OUTPUT_STREAM), by='ContentVersionID', type='inner') \
        .project('UUID', 'Data', 'Options', 'PlaylistGeneration', 'StreamType', 'UpdateTime') \
        .put(stream_data_table)
    job.run()

    stream_data = defaultdict(dict)
    stream_data_update_time = defaultdict(dict)
    for rec in cluster.driver.read(stream_data_table):
        uuid = rec['UUID']
        stream = rec['Data']

        if stream is not None:
            options = rec['Options']
            if options is not None and options.find('deleted') >= 0:
                continue

        if uuid in stream_data and stream in stream_data[uuid] and rec['UpdateTime'] < stream_data_update_time[uuid][stream]:
            continue

        if uuid not in stream_data:
            stream_data[uuid] = defaultdict(dict)
            stream_data_update_time[uuid] = defaultdict(dict)
        if stream not in stream_data[uuid]:
            stream_data[uuid][stream] = defaultdict(dict)
        stream_data[uuid][stream]['playlist_generation'] = rec['PlaylistGeneration']
        stream_data[uuid][stream]['stream_type'] = rec['StreamType']
        stream_data_update_time[uuid][stream] = rec['UpdateTime']

    possible_content_ids_stats = {}
    total_duration = 0
    for rec in cluster.driver.read(possible_content_ids_stats_table):
        if rec.get("views"):
            views = int(rec["views"])
        else:
            views = 0
        duration = int(rec["duration"])
        if views > args.min_content_views_count and duration > MIN_CONTENT_DURATION:
            total_duration += duration
            possible_content_ids_stats[rec["UUID"]] = {'views' : views, 'duration' : duration}

    print "Possible videos count {}. Total content duration {}".format(len(possible_content_ids_stats), total_duration)

    current_ts = int(time())

    old_schedule_part = "//tmp/msvvitaly/old_schedule_part_" + str(current_ts)
    last_schedule_ts = get_old_schedule_part(cluster, args.table, old_schedule_part, FUTURE_GAP, current_ts)

    new_schedule_end_ts = current_ts + args.schedule_gap
    new_schedule_part, new_schedule_end_ts = make_schedule(possible_content_ids_stats,
                                                           last_schedule_ts,
                                                           new_schedule_end_ts,
                                                           args.repeatability,
                                                           args.with_ads,
                                                           args.ads_period)
    new_schedule_part_table = "//tmp/msvvitaly/new_schedule_part" + str(current_ts)
    cluster.driver.write(new_schedule_part_table, new_schedule_part)

    job = cluster.job()

    new_schedule_part_joined = job.table(ACTUAL_URLS).map(map_au(stream_data), files=nfi_common) \
                                  .join(job.table(new_schedule_part_table), by_left='contentUuid', by_right='UUID') \
                                  .project(ne.all(exclude=('pageId', 'yatvChannelPageId', 'begin', 'end')),
                                           pageId=ne.custom(lambda x, y : x if y else '', 'pageId', 'with_ads'),
                                           yatvChannelPageId=ne.custom(lambda x, y : x if y else '', 'yatvChannelPageId', 'with_ads'),
                                           withoutTimeline=ne.const('0'),
                                           begin=ne.custom(lambda x : int(x), 'begin'),
                                           end=ne.custom(lambda x : int(x), 'end')) \
                                  .map(make_event_id(args.channel_id), files=nfi_common)

    job.concat(job.table(old_schedule_part), new_schedule_part_joined) \
       .sort('begin') \
       .put(new_schedule_part_table + "_merged")

    job.run()

    if cluster.driver.exists(args.table):
        cluster.driver.remove(args.table)
    cluster.driver.copy(new_schedule_part_table + "_merged", args.table)

    cluster.driver.client.set_attribute(args.table, '_max_ts', new_schedule_end_ts)

if __name__ == '__main__':
    main()
