use hahn;
pragma library("stability_common.sql");
PRAGMA AnsiInForEmptyOrNullableItemsCollections;
pragma yson.DisableStrict;
pragma yt.MaxRowWeight = "64M";
pragma DqEngine = "disable";
import stability_common symbols $refFromWrapper, $getVideoContentId, $stationMaxCrutch, $getPlatformExt;
pragma library("quality_report_avglog_common.sql");
import quality_report_avglog_common symbols $parsePlayerState, $getPlayerStateFromData;

$date_from = $date;
$date_to = $date;

$isAd = ($streamUrl) -> (
    FIND($streamUrl, "-canvas-") is not null
    or FIND($streamUrl, "-bsvideo-") is not null
);

$tmp = (
select
    TableName() as fielddate,
    vsid as VSID,
    version,
    labels_from,
    additionalParameters_from_block,
    additionalParameters_stream_block,
    userAgent,
    service,
    AsList($parsePlayerState(
        $getPlayerStateFromData(data)
    )) ?? ListMap(Yson::ConvertToList(Yson::YPath(
        Yson::ParseJson(data), "/data/states"
    )), $parsePlayerState) as state,
    IF($isAd(streamUrl), "AD", videoContentId) as video_content_id,
    yandexuid as Yandexuid
from range(
    `logs/strm-gogol-log/1d`, $date_from, $date_to
) with columns Struct<yandexuid:String?>
where eventName == "PlayerAlive"
);

$tmp_flattened = (
    select t.*,
    from $tmp as t
    flatten list by state
);

$wrapZen = ($ref_from) -> (IF($ref_from like "zen:%", String::JoinFromList(
    ListTake(String::SplitToList($ref_from, ":"), 2), ":"
), $ref_from));

$processRefFrom = ($rf, $ua, $fb, $sb) -> {
    $rf = $wrapZen($rf);
    $wrapped = $refFromWrapper($rf);
    $list = ListNotNull([
        $rf,
        IF($rf != $wrapped, $wrapped),
        IF(
            $rf = "zen" and $fb is not null,
            "zen/" || $fb
        ),
        IF(
            $rf = "zen" and $fb is not null and $sb is not null,
            "zen/" || $fb || "/" || $sb
        ),
    ]);
    return ListMap($list, ($x)->($stationMaxCrutch($x, $ua)))
};

$getBandwidthCat = ($states) -> {
    $estimates = ListNotNull(ListMap($states, ($x)->($x.bandwidthEstimate)));
    $slow = ListFilter($estimates, ($x)->($x <= 500000));
    return case
    when ListLength($estimates) = 0 or $estimates is null then "unknown"
    when ListLength($slow) / cast(ListLength($estimates) as Double) >= 0.6 then "slow"
    else "normal"
    end
};

$src_ = (
    SELECT
        VSID,
        video_content_id,
        fielddate,
        max(version) as version,
        max(Yandexuid) as yandexuid,
        max(additionalParameters_stream_block) as additionalParameters_stream_block,
        max(additionalParameters_from_block) as additionalParameters_from_block,
        $processRefFrom(max(labels_from), max(userAgent),
            max(additionalParameters_from_block),
            max(additionalParameters_stream_block)
        ) as ref_from,
        $getPlatformExt(max(service), max(userAgent), VSID) as platform,
        -- ListLength(aggregate_list(state)) as state_len
        ListTake(listsort(aggregate_list(state), ($x)->($x.`timestamp`)), 5000) as unprocessed_states,
        $getBandwidthCat(aggregate_list(state)) as bandwidth_cat
    from $tmp_flattened
    where state.`timestamp` is not null
    and state.watchedTime is not null
    and state.stalledTime is not null
    and state.stalledCount is not null
    group by VSID,
        video_content_id,
        fielddate
);
$src = (
    select
        IF(video_content_id = "AD", (platform ?? "") || "_Ads", platform) as platform,
    s.* without platform from $src_ as s
);

$unprocessed_state_type = Struct<
    'capHeight':Int64?,
    'capWidth':Int64?,
    'height':Int64?,
    'isMuted':Bool?,
    'isVisible':Bool?,
    'is_ad':Bool,
    'maxHeight':Int64?,
    'maxWidth':Int64?,
    'stalledReason': String?,
    'stalledCount':Int64?,
    'stalledTime':Double?,
    'state':String?,
    'timestamp':Uint64?,
    'watchedTime':Double?,
    'width':Int64?
>;

$output_type = Tuple<
    List<Struct<
        "res_cat": String,
        "muted_cat": String,
        "tvt": Double,
        "avglogs": List<Double>
    >>,
    List<Struct<
        "stalled_reason": String,
        "stalled_count": Uint64,
        "stalled_time": Double,
    >>,
    Struct<
        "res_cat": String,
        "avglog": Double,
    >?
>?;

$mapper = Python::mapper(
    Callable<(List<$unprocessed_state_type>)->$output_type>, FileContent("list_processor.py")
);

$nullwrap = ($x) -> (IF(Math::IsFinite($x), $x));

$tmp_g_ = (
    select
        fielddate,
        VSID,
        video_content_id,
        platform,
        ref_from,
        bandwidth_cat,
        $mapper(unprocessed_states) as mapped
    from $src
);

$tmp_g = (
    select
        unwrap(mapped.0) as resolution_info,
        unwrap(mapped.1) as stalled_info,
        mapped.2 as first_avglog_info,
        t.* without mapped
    from $tmp_g_ as t
    where mapped is not null
);

$totalizePlatform = ($x) -> {
    return case
    when $x.platform like "StreamPlayer_%" then
        [AddMember(RemoveMember($x, "platform"), "platform", "_total_"),AddMember(RemoveMember($x, "platform"), "platform", "StreamPlayer_total") ]
    when $x.platform like "AndroidPlayer_%" then [AddMember(RemoveMember($x, "platform"), "platform", "_total_"),AddMember(RemoveMember($x, "platform"), "platform", "AndroidPlayer_total") ]
    else [AddMember(RemoveMember($x, "platform"), "platform", "_total_")]
    end
};

$_microreduce_get_key = ($ri) -> (AsTuple($ri.res_cat, $ri.muted_cat));

$_microreduce_reduce_list = ($list) -> (<|
    res_cat: unwrap(ListHead($list).res_cat),
    muted_cat: unwrap(ListHead($list).muted_cat),
    tvt: ListSum(ListMap($list, ($x)->($x.tvt))) ?? 0,
    avglogs: $nullwrap(ListAvg(ListMap($list, ($x)->($x.avglogs)))),
|>);

$microreduce = ($resolution_info) -> {
    $keys = ListUniq(ListMap($resolution_info, $_microreduce_get_key));
    $lists = ListMap(
        $keys, ($k) -> (ListFilter($resolution_info, ($x)->($_microreduce_get_key($x) = $k)))
    );
    return ListMap($lists, $_microreduce_reduce_list)
};

$totalize = ($row) -> {
    $resolution_info = ListMap($row.resolution_info, ($x)->(AddMember(
        RemoveMember($x, "avglogs"), "avglogs", ListAvg($x.avglogs)
    )));
    $add = ListMap($resolution_info, ($x)->(AddMember(
        RemoveMember($x, "muted_cat"), "muted_cat", "_total_"
    )));
    $resolution_info = ListUnionAll($resolution_info, $add);
    $add = ListMap($resolution_info, ($x)->(AddMember(
        RemoveMember($x, "res_cat"), "res_cat", "_total_"
    )));
    $resolution_info = ListUnionAll($resolution_info, $add);
    $row = AddMember(RemoveMember($row, "resolution_info"), "resolution_info", $microreduce($resolution_info));
    $add = [<|
        "stalled_time": ListSum(ListMap($row.stalled_info, ($x)->($x.stalled_time))) ?? 0.0,
        "stalled_count": ListSum(ListMap($row.stalled_info, ($x)->($x.stalled_count))) ?? 0,
        "stalled_reason": "_total_",
    |>];
    $stalled_info = ListUnionAll($row.stalled_info, $add);
    $row = AddMember(RemoveMember($row, "stalled_info"), "stalled_info", $stalled_info);
    $rf = $row.ref_from ?? ListCreate(String);
    $rf = IF(
        "disk" in $rf,
        ListExtend($rf, ["_total_"]),
        ListExtend($rf, ["_total_", "_total_without_disk_"]),
    );
    $result = ListMap($rf, ($x)->(AddMember(RemoveMember($row, "ref_from"), "ref_from", $x)));
    $add = ListFlatMap($result, $totalizePlatform);
    $result = ListUnionAll($result, $add);
    $add = ListMap($result, ($x)->(AddMember(RemoveMember($x, "bandwidth_cat"), "bandwidth_cat", "_total_")));
    $result = ListUnionAll($result, $add);
    RETURN $result
};

$totalizeFirstAvglog = ($row) ->{
    $rf = $row.ref_from ?? ListCreate(String);
    $rf = IF(
        "disk" in $rf,
        ListExtend($rf, ["_total_"]),
        ListExtend($rf, ["_total_", "_total_without_disk_"]),
    );
    $result = ListMap($rf, ($x)->(AddMember(RemoveMember($row, "ref_from"), "ref_from", $x)));
    $add = ListFlatMap($result, $totalizePlatform);
    $result = ListUnionAll($result, $add);
    $add = ListMap($result, ($x)->(AddMember(RemoveMember($x, "bandwidth_cat"), "bandwidth_cat", "_total_")));
    $result = ListUnionAll($result, $add);
    $add = ListMap($result, ($x)->(AddMember(RemoveMember($x, "res_cat"), "res_cat", "_total_")));
    $result = ListUnionAll($result, $add);
    RETURN $result
};

$totalized = process $tmp_g using $totalize(TableRow());

$totalized_first_avglog = (
    process (
        select
            first_avglog_info.res_cat as res_cat,
            first_avglog_info.avglog as avglog,
            bandwidth_cat,
            ref_from,
            platform,
            fielddate,
            VSID,
            video_content_id
        from $tmp_g
        where first_avglog_info is not null
    )
    using $totalizeFirstAvglog(TableRow())
);

$w = ($x) -> (IF($x = "", null, $x));

$grouped_resolution = (
    select
        fielddate,
        ref_from,
        platform,
        bandwidth_cat,
        res_cat,
        muted_cat,
        CountDistinctEstimate(AsTuple(VSID, video_content_id)) as `count`,
        sum(resolution_info.tvt) as tvt,
        SUM(resolution_info.avglogs) / CAST(COUNT(*) as Double) as avglog,
        $nullwrap(percentile(resolution_info.avglogs, 0.25)) as avglog_p25,
        $nullwrap(percentile(resolution_info.avglogs, 0.50)) as avglog_p50,
        $nullwrap(percentile(resolution_info.avglogs, 0.75)) as avglog_p75,
        $nullwrap(percentile(resolution_info.avglogs, 0.80)) as avglog_p80,
        $nullwrap(percentile(resolution_info.avglogs, 0.85)) as avglog_p85,
        $nullwrap(percentile(resolution_info.avglogs, 0.90)) as avglog_p90,
        $nullwrap(percentile(resolution_info.avglogs, 0.95)) as avglog_p95,
        $nullwrap(percentile(resolution_info.avglogs, 0.99)) as avglog_p99,
    from $totalized
    flatten list by (resolution_info)
    group by
        $w(fielddate) ?? "-" as fielddate,
        $w(ref_from) ?? "-" as ref_from,
        $w(platform) ?? "-" as platform,
        $w(bandwidth_cat) ?? "-" as bandwidth_cat,
        $w(resolution_info.res_cat) ?? "-" as res_cat,
        $w(resolution_info.muted_cat) ?? "-" as muted_cat
);

$grouped_first_avglog = (
    select
        fielddate,
        ref_from,
        platform,
        bandwidth_cat,
        res_cat,
        CountDistinctEstimate(AsTuple(VSID, video_content_id)) as `count`,
        SUM($nullwrap(avglog)) / CAST(COUNT(*) as Double) as avglog,
        $nullwrap(percentile(avglog, 0.25)) as avglog_p25,
        $nullwrap(percentile(avglog, 0.50)) as avglog_p50,
        $nullwrap(percentile(avglog, 0.75)) as avglog_p75,
        $nullwrap(percentile(avglog, 0.80)) as avglog_p80,
        $nullwrap(percentile(avglog, 0.85)) as avglog_p85,
        $nullwrap(percentile(avglog, 0.90)) as avglog_p90,
        $nullwrap(percentile(avglog, 0.95)) as avglog_p95,
        $nullwrap(percentile(avglog, 0.99)) as avglog_p99,
    from $totalized_first_avglog
    group by
        $w(fielddate) ?? "-" as fielddate,
        $w(ref_from) ?? "-" as ref_from,
        $w(platform) ?? "-" as platform,
        $w(bandwidth_cat) ?? "-" as bandwidth_cat,
        $w(res_cat) ?? "-" as res_cat
);

$totals = select * from $grouped_resolution where res_cat = "_total_" and muted_cat = "_total_";

$getTvtMins = ($res_info) -> {
    $flt = ListFilter($res_info, ($x)->($x.muted_cat = "_total_" and $x.res_cat = "_total_"));
    return if(
        ListLength($flt) = 1,
        unwrap($flt[0].tvt ?? 0.0) / 60.0,
        0.0
    )
};

$stalledTimePerMin = ($stalledTime, $watchedTime) -> {
    $stalledTime = $stalledTime ?? 0;
    $watchedTime = $watchedTime ?? 0;
    return $stalledTime / cast($stalledTime + $watchedTime as Double) * 60.0
};

$grouped_stalled = (
    select
        fielddate,
        ref_from,
        platform,
        bandwidth_cat,
        stalled_reason,
        sum(stalled_info.stalled_time) as stalled_time_total,
        sum(stalled_info.stalled_count) as stalled_count_total,

        $nullwrap(percentile(stalled_count_per_min, 0.25)) as stalled_count_per_min_p25,
        $nullwrap(percentile(stalled_count_per_min, 0.50)) as stalled_count_per_min_p50,
        $nullwrap(percentile(stalled_count_per_min, 0.75)) as stalled_count_per_min_p75,
        $nullwrap(percentile(stalled_count_per_min, 0.95)) as stalled_count_per_min_p95,
        $nullwrap(percentile(stalled_count_per_min, 0.99)) as stalled_count_per_min_p99,

        $nullwrap(percentile(stalled_time_per_min, 0.25)) as stalled_time_per_min_p25,
        $nullwrap(percentile(stalled_time_per_min, 0.50)) as stalled_time_per_min_p50,
        $nullwrap(percentile(stalled_time_per_min, 0.75)) as stalled_time_per_min_p75,
        $nullwrap(percentile(stalled_time_per_min, 0.95)) as stalled_time_per_min_p95,
        $nullwrap(percentile(stalled_time_per_min, 0.99)) as stalled_time_per_min_p99,
        $nullwrap(percentile(stalled_time_per_min_hb, 0.25)) as stalled_time_per_min_hb_p25,
        $nullwrap(percentile(stalled_time_per_min_hb, 0.50)) as stalled_time_per_min_hb_p50,
        $nullwrap(percentile(stalled_time_per_min_hb, 0.75)) as stalled_time_per_min_hb_p75,
        $nullwrap(percentile(stalled_time_per_min_hb, 0.95)) as stalled_time_per_min_hb_p95,
        $nullwrap(percentile(stalled_time_per_min_hb, 0.99)) as stalled_time_per_min_hb_p99,
    from (
        select
            $nullwrap(stalled_info.stalled_count / $getTvtMins(resolution_info)) as stalled_count_per_min,
            $nullwrap($stalledTimePerMin(stalled_info.stalled_time / 60.0, $getTvtMins(resolution_info))) as stalled_time_per_min,
            IF(
                $getTvtMins(resolution_info) >= 0.5,
                $nullwrap($stalledTimePerMin(stalled_info.stalled_time / 60.0, $getTvtMins(resolution_info)))
            ) as stalled_time_per_min_hb,
            t.*
        from $totalized as t
        flatten list by stalled_info
    )
    group by
        $w(fielddate) ?? "-" as fielddate,
        $w(ref_from) ?? "-" as ref_from,
        $w(platform) ?? "-" as platform,
        $w(bandwidth_cat) ?? "-" as bandwidth_cat,
        $w(stalled_info.stalled_reason) ?? "-" as stalled_reason
);

$add_totals = (
    select
        `count`,
        tvt,
        $nullwrap($stalledTimePerMin(stalled_time_total, tvt / 60.0)) as stalled_time_per_min,
        $nullwrap(stalled_count_total / (tvt / 60.0)) as stalled_count_per_min,
        s.* without s.stalled_time_total, s.stalled_count_total
    from $grouped_stalled as s
    left join any $totals as t using (fielddate, ref_from, platform, bandwidth_cat)
);

upsert into stat.`Video/Others/Strm/Stability/MMA-3930-quality-v5r/daily` erase by (fielddate)
select * from $grouped_resolution;

upsert into stat.`Video/Others/Strm/Stability/MMA-3930-quality-v5f/daily` erase by (fielddate)
select * from $grouped_first_avglog;

upsert into stat.`Video/Others/Strm/Stability/MMA-3930-quality-v5s/daily` erase by (fielddate)
select * from $add_totals;
