use hahn;
pragma yson.DisableStrict;
pragma yt.Pool = "@[pool]";
pragma AnsiInForEmptyOrNullableItemsCollections;
pragma library("get_ugc_meta.sql");
import get_ugc_meta symbols $generate_vcid;
pragma library("stability_common.sql");
import stability_common symbols $mskDateFromTs;

$date = "@[date]";

$sessions_table = "//cubes/video-strm/" || $date || "/sessions";

$parseInner = ($stream) -> {
    $resolutions = Yson::ConvertToList(Yson::YPath($stream, "/Resolutions"));
    $result = ListMap($resolutions, ($x)->(<|
        height: CAST(Yson::LookupUint64($x, "Height") as String),
        bytesize: Yson::LookupUint64($x, "ByteSize"),
    |>));
    $total_bs = ListSum(ListMap($result, ($x)->($x.bytesize)));
    $result = ListExtend($result, [<|height: "_total_", bytesize: $total_bs|>]);
    return $result
};

$parse_transcoder = ($x) -> {
    $parsed = Yson::ParseJson($x);
    $streams = Yson::ConvertToList(Yson::YPath($parsed, "/Streams"));
    $resolutions = ListFlatMap($streams, $parseInner);
    return $resolutions
};

$file_data = (
    select `video_file.id` as video_file_id,
        $parse_transcoder(`video_file.transcoder_info`) as sizes,
        cast(cast(Yson::LookupUint64(Yson::ParseJson(`video_file.transcoder_info`), "DurationMs") as Double) / 1000.0 as UInt64) as duration_sec
    from `//home/video-hosting/ugc_replica/ugc_video_file`
    where `video_file.service` = "ugc" and ListLength($parse_transcoder(`video_file.transcoder_info`)) > 0
);

$meta_data = (
    select
        `video_meta.id` as video_meta_id,
        $generate_vcid(`video_meta.id`) as `UUID`,
        $mskDateFromTs(`video_meta.create_time`) as create_date,
        `video_meta.channel_id` as channel_id,
        `video_meta.moderation_status` == "success" as passed_moderation,
        `video_meta.video_file_id` as video_file_id
    from `//home/video-hosting/ugc_replica/ugc_video_meta`
);

$durationCat = ($secs) -> (case
when $secs is null or $secs <= 0 then "unknown"
when $secs < 60 then "<01m"
when $secs < (60 * 5) then "<05m"
when $secs < (60 * 20) then "<20m"
when $secs < (60 * 40) then "<40m"
when $secs < (60 * 60) then "<60m"
else "60m+"
end );

$joined = (
    select
        f.*,
        [$durationCat(duration_sec), "_total_"] as duration_cat,
        video_meta_id, `UUID`, create_date, channel_id, passed_moderation
    from $file_data as f
    inner join any $meta_data as m using (video_file_id)
);

$res_cat = ($res_) -> {
    $res = CAST($res_ as UInt64);
    return case
    when $res_ = "_total_" then "_total_"
    when $res < 144 then "<144"
    when $res between 144 and 239 then "144-239"
    when $res between 240 and 359 then "240-359"
    when $res between 360 and 575 then "360-575"
    when $res between 576 and 719 then "576-719"
    when $res between 720 and 1079 then "720-1079"
    when $res == 1080 then "1080"
    else ">1080"
    end
};

$flattened = (
    select *
    from (
        select
            $res_cat(sizes.height) as resolution,
            sizes.bytesize as bytesize,
            s.* without sizes
        from $joined as s flatten list by sizes
    )
    flatten list by duration_cat
);

$views = (
    select
        `UUID`,
        count_if(view_time > 0) as views,
        sum(view_time) as tvt
    from $sessions_table
    where `UUID` like "v%"
    group by `UUID`
);

$join_views = (
    select
        j.*, v.views ?? 0 as views, v.tvt ?? 0 as tvt, v_at.views ?? 0 as views_3mon, v_at.tvt ?? 0 as tvt_3mon
    from $flattened as j
    left join any $views as v on (j.`UUID` = v.`UUID`)
    left join any `//home/videolog/MMA-5428-top-ugc-videos/top` as v_at on (j.`UUID` = v_at.`UUID`)
);

$channel_data = (
    select
        `channel.id` as channel_id,
        `channel.user_uid` as owner_id,
        `channel.title` as channel_name,
        $mskDateFromTs(`channel.create_time`) as channel_create_date
    from `//home/video-hosting/ugc_replica/ugc_channel`
);

$add_info = (
    select (cast(s.channel_id as String) ?? "") || " " || (channel_name ?? "-") as channel_name, s.*
    from $join_views as s
    left join any $channel_data as ch using (channel_id)
);

$top100_by_size = (
    select
        top_by(channel_id, total_bytesize, 100)
    from (select channel_id, sum(bytesize) as total_bytesize from $add_info where resolution = "_total_" and duration_cat = "_total_" group by channel_id)
);

$top100_by_videos = (
    select
        top_by(channel_id, videos, 100)
    from (select channel_id, count(*) as videos from $add_info where resolution = "_total_" and duration_cat = "_total_" group by channel_id)
);

$top100_by_views = (
    select
        top_by(channel_id, views_3mon, 100)
    from (select channel_id, sum(views_3mon) as views_3mon from $add_info where resolution = "_total_" and duration_cat = "_total_" group by channel_id)
);

$add_other = (
    select
        IF(
            channel_id in unwrap($top100_by_size) or channel_id in unwrap($top100_by_views) or channel_id in unwrap($top100_by_videos),
            channel_name,
            "other"
        ) as channel_name,
        r.*
    without channel_name from $add_info as r
);

$totalize = ($row) -> {
    $result = [$row];
    $add = ListMap($result, ($x)->(AddMember(RemoveMember($x, "channel_name"), "channel_name", "_total_")));
    $result = ListUnionAll($result, $add);
    return $result
};

$totalized = process $add_other using $totalize(TableRow());

$grouped = (
    select
        channel_name,
        resolution,
        duration_cat,
        sum(bytesize) as total_bytesize,
        count(*) as total_videos,
        count_if(not passed_moderation) as videos_failed_moderation,
        sum(views) as views_today,
        sum(tvt) as tvt_today,
        sum(views_3mon) as views_3mon,
        sum(tvt_3mon) as tvt_3mon,
        count_if(views = 0) as videos_without_views_today,
        sum_if(bytesize, views = 0) as total_bytesize_without_views_today,
        sum(duration_sec) as total_duration,
        count_if(create_date = $date) as videos_today,
        sum_if(bytesize, create_date = $date) as size_today,
        some(channel_id) as channel_id
    from $totalized
    group by channel_name, resolution, duration_cat
);

$join_channel_metadata = (
    select
        g.*,
        c.owner_id as owner_id,
        channel_create_date,
    from $grouped as g
    left join any $channel_data as c on (g.channel_id = c.channel_id)
);


$transform_jcm = (
    select
        "ugc" as bucket,
        unwrap(channel_name) as channel_name,
        unwrap(duration_cat) as duration_cat,
        unwrap(resolution) as resolution,
        $date as fielddate,
        views_today ?? 0 as views_today,
        tvt_today ?? 0 as tvt_today,
        total_bytesize_without_views_today ?? 0 as total_bytesize_without_views_today,
        size_today ?? 0 as size_today,
        s.*
    without
    channel_name,
    duration_cat,
    resolution,
    views_today,
    tvt_today,
    total_bytesize_without_views_today,
    size_today
    from $join_channel_metadata as s
);

upsert into stat.`Video/Others/Strm/MMA-5355-s3/daily` ERASE BY (fielddate)
select * from $transform_jcm;
