-- VQS dashboard
use hahn;
pragma AnsiInForEmptyOrNullableItemsCollections;
pragma yt.DefaultMaxJobFails = "1";
pragma yson.DisableStrict;
pragma DqEngine = "disable";
pragma yt.Pool = 'video_player_abt';
pragma library('common.sql');
import common symbols $getQuality;


$dst_table = 'home/videoquality/vh_analytics/korovinvas/day_stat/_tmp_table';
$access_table = "//logs/strm-access-log/1d/" || $date;
$gogol_table = '//logs/strm-gogol-log/1d/' || $date;

$QUALITIES = [144, 240, 360, 480, 576, 720, 1080, 1440, 2160, 2880, 4320];
$chooseQuality = ($x) -> {
    $n = cast($x as Uint32);
    $res = IF(
        Math::IsFinite($n) and $n > 0,
        cast(ListHead(ListSort($QUALITIES, ($y)->(ABS($y - cast($n as Double))))) as String),
        $x
    );
    return $res ?? "-"
};

$chooseService = ($list) -> {
    $list = ListNotNull($list);
    $wo_vas = ListFilter($list, ($x)->($x != "VAS"));
    return case
    when ListLength($wo_vas) > 0 then ListMax($wo_vas)
    when ListLength($list) > 0 then ListMax($list)
    else null
    end
};

$wrapOsFamily = ($ua) -> {
    $parsed = UserAgent::Parse($ua);
    return IF(
        $parsed.OSFamily = "Unknown", null, $parsed.OSFamily
    )
};

$device_type = ($ua) -> {
    return CASE WHEN UserAgent::Parse($ua).isTablet THEN 'IsTablet' WHEN UserAgent::Parse($ua).isMobile THEN 'IsMobile' WHEN  UserAgent::Parse($ua).isTV THEN 'IsTV'ELSE 'Desktop' END
};

$os_aggregate = ($os) -> {
    return CASE WHEN $os = "Unknown" then null when $os in ("Windows", "Android", "iOS","Linux", "MacOS") then $os else "Other_OS"  end 
};

$re_abcid = Re2::Capture("abcID=(?P<abc_id>[0-9]+)");

$gogol_abcid = ($x) -> (cast($re_abcid($x).abc_id as Int32));

$access_map = (
    SELECT 
        abc_id as abc_id,
        vsid as vsid, 
        $chooseQuality($getQuality(request)) as label,
        Url::GetCGIParam(request, "from") as labels_from, 
        $os_aggregate($wrapOsFamily(user_agent)) as os_family,
        $device_type(user_agent)  as device,
        cast(bytes_sent as UInt64) as bytes_sent,
    from $access_table
    where
        traffic_type='users'
);

$access_vsid = (
    select * from $access_map
    where vsid is not null
);

$access_reduce_traffic = (
    select
        vsid,
        abc_id,
        label,
        sum(bytes_sent) as bytes_sent
    from $access_vsid
    group by vsid, label, abc_id
);

$access_reduce_other = ( -- если редьюсить отдельно по vsid+label там теоретически могут быть разные значения device, labels_from - мы этого не хотим
    select
        vsid,
        max(device) as device,
        max(labels_from) as labels_from,
        max(os_family) ?? "Unknown_OS" as os_family,
    from $access_vsid
    group by vsid
);

$access_reduce_no_vsid = (
    select
        device,
        abc_id,
        label,
        os_family,
        labels_from,
        sum(bytes_sent) as bytes_sent
    from $access_map
    where vsid is null
    group by abc_id, os_family, labels_from, device, label
);

$getVideoContentId = ($request) -> {
    $sp = String::SplitToList($request, "/");
    $sp = ListSkipWhileInclusive($sp, ($x)->($x != "vod-content"));
    $kal = ListSkipWhileInclusive($sp, ($x)->($x != "kaltura"));
    return
    case
    when ListLength($kal) >= 2 then $kal[1]
    when $sp[0] like "sid%" then $sp[1]
    else $sp[0]
    end
};

define subquery $gogol_map_pa() as
    select
        vsid,
        videoContentId ?? $getVideoContentId(streamUrl) as videoContentId,
        CASE WHEN `streamType`='vod' and labels_videoType='LIVE' THEN 'VOD' WHEN labels_videoType not in ('VOD','LIVE','EVENT','-') and labels_videoType is not null THEN 'VOD' ELSE labels_videoType  END as streamPrj, 
        $gogol_abcid(streamUrl) as abc_id,
        $chooseQuality(cast(ListMin(ListNotNull([data_state_width, data_state_height])) as String)) as quality,
        data_state_watchedTime as wt,
        clientTimestamp as ts
    from $gogol_table
    where eventName = "PlayerAlive";
end define;

define subquery $count_watched_time($source) as
$step0 = (
    select
        s.*, lag(wt, 1) over w1 as prev_wt
    from $source() as s
    window w1 as (
        partition by vsid, videoContentId
        order by ts
    )
);

$step1 = (
    select * from $step0 where wt != prev_wt or prev_wt is null
);

$wrap_ts = ($ts) -> (IF(length(cast($ts as String)) = 10, $ts * 1000, $ts));

$getDelta = ($row, $prev_wt, $prev_ts) -> {
    $ts_delta = ($wrap_ts($row.ts) - $wrap_ts($prev_ts)) / 1000.0;
    $wt_delta = IF(
        $row.wt > $prev_wt, $row.wt - $prev_wt
    );
    $delta_result = case
    when $ts_delta > 15.0 or $ts_delta < 0 then null
    when $wt_delta > 0 and $ts_delta > 0 then min_of($wt_delta, $ts_delta)
    else null
    end;
    return IF(
        $delta_result is not null,
        AsTuple($row.quality, $delta_result, $row.abc_id)
    )
};

$step2 = (
    select
        s.*,
        $getDelta(
            TableRow(),
            lag(wt, 1) over w,
            lag(ts, 1) over w
        ) as wt_d
    from $step1 as s
    window w as (
        partition by vsid, videoContentId
        order by ts
    )
);

$step3 = (
    select
        vsid,
        videoContentId,
        abc_id,
        streamPrj,
        q,
        sum(wt_d.1) as wt_d
    from $step2
    where wt_d.0 is not null
    group by vsid, videoContentId, wt_d.0 as q, wt_d.2 as abc_id, streamPrj
);

select * from $step3;
end define;

$watched_time = select q as label, wt_d as tvt, s.* without q, wt_d from $count_watched_time($gogol_map_pa) as s where wt_d <= 86400.0; -- в подзапросе лень переименовывать, а тут переименуем для удобства

$gogol_other_map = (
    select
        vsid,
        service,
        CASE WHEN `streamType`='vod' and labels_videoType='LIVE' THEN 'VOD' WHEN labels_videoType not in ('VOD','LIVE','EVENT','-') and labels_videoType is not null THEN 'VOD' ELSE labels_videoType  END as streamPrj,        
        additionalParameters_from_block as from_block,
        additionalParameters_stream_block as stream_block,
        labels_from,
        --$gogol_abcid(String::SplitToList(streamUrl,',')) as abc_id, -- не надо использовать data без большой необходимости, это огромная колонка
        -- userAgent as user_agent, -- он вроде дальше не используется
        $device_type(userAgent) as device,
        $os_aggregate($wrapOsFamily(userAgent)) as os_family
    from $gogol_table
    where vsid is not null
);

$gogol_reduce_other = (
    select
        vsid,
        max(streamPrj) as streamPrj,        
        $chooseService(AGGREGATE_LIST(service)) as service, -- везде где можно лучше использовать max, а не some, чтобы расчёт был повторяемым
        max(from_block) as from_block,
        max(stream_block) as stream_block,
        max(labels_from) as labels_from,
        -- max(abc_id) as abc_id,  -- забирать abc_id из гоголя неправильно
        -- max(user_agent) as user_agent, -- вроде не нужен
        max(device) as device,
        max(os_family) as os_family
    from $gogol_other_map
    group by vsid
);

$patch_labels_from = ($from) -> (ListHead(String::SplitToList($from, '%?')));

$access_reduce_other_patch = (
    select
        a.vsid as vsid,
        $patch_labels_from(g.labels_from ?? a.labels_from) as labels_from,
        g.streamPrj as streamPrj,        
        g.os_family ?? a.os_family as os_family,
        g.stream_block as stream_block,
        g.from_block as from_block,
        g.device ?? a.device as device,
        g.service as service
    from $access_reduce_other as a
    left join any $gogol_reduce_other as g using (vsid)
);
$gogol_reduce_other_only = (
    select * from $gogol_reduce_other as g
    left only join $access_reduce_other as a using (vsid)
);
$vsid_other_concat = (
    select * from $access_reduce_other_patch
    union all
    select * from $gogol_reduce_other_only  
);

$traffic_join = (
    select
        t.*,
        labels_from,
        os_family,
        stream_block,
        streamPrj,
        from_block,
        device,
        service
    from $access_reduce_traffic as t
    left join any $vsid_other_concat as p using (vsid)
);

$tvt_join = (
    select
        t.*,
        $date as fielddate,
        NVL(services.name,'-') as abc_service_name,
        labels_from,
        os_family,
        stream_block,
        from_block,
        device,
        service
    from $watched_time as t
    left join any $vsid_other_concat as p on (t.vsid = p.vsid)
    left join any `//home/videoquality/strm_meta/abc_services` as services on (t.abc_id = services.abc_id)
);

$traffic_after_join = (
    select   
        s.abc_id as abc_id,
        -- s.user_agent as user_agent,
        NVL(services.name,'-') as abc_service_name,
        s.stream_block as stream_block,
        s.label as label, 
        s.from_block as from_block,
        s.vsid as vsid,
        s.os_family as os_family,
        s.bytes_sent as bytes_sent,
        s.streamPrj as streamPrj,        
        -- s.tvt as tvt,
        s.device as device,
        s.labels_from as labels_from,
        CASE WHEN s.service is not null THEN s.service  ELSE "Unknown_Service_" || s.os_family END as gogol_service
    from (
        select * from $traffic_join
        union all
        select * from $access_reduce_no_vsid
    ) as s
    left join any `//home/videoquality/strm_meta/abc_services` as services on (s.abc_id = services.abc_id)
);

$traffic_map = (
    select
        $date as fielddate,
        abc_service_name as abc_service_name,
        gogol_service as gogol_service,
        label as label,
        os_family ?? "-" as os_family,
        stream_block as stream_block,
        from_block as from_block,
        streamPrj as streamPrj,
        vsid,
        device,
        -- user_agent,
        unwrap(case
        when labels_from is not null then labels_from
        when vsid is not null then "with_vsid_" || (os_family ?? "-")
        when vsid is null then "no_vsid_" || (os_family ?? "-")
        else "-"
        end) as labels_from,
        bytes_sent,
    from $traffic_after_join
);

$to_tb = ($b) -> (Math::Round(cast($b as Double) / Math::Pow(1024, 4), -2));
$w = ($x) -> (IF($x = "", null, $x));

$tvt_grouped = (
    select
        vsid,
        fielddate,
        label,
        streamPrj,
        abc_service_name,
        gogol_service,
        from_block,
        stream_block,
        os_family,
        labels_from,
        device,
        sum(tvt) as tvt,
    from $tvt_join
    group by $w(vsid) ?? "-" as vsid,
        $w(streamPrj) ?? "-" as streamPrj,
        $w(label) ?? "-" as label,
        $w(from_block) ?? "-" as from_block,
        $w(stream_block) ?? "-" as stream_block,
        $w(device) ?? "-" as device,
        $w(abc_service_name) ?? "-" as abc_service_name,
        $w(fielddate) ?? "-" as fielddate,
        $w(service) ?? "-" as gogol_service,
        $w(os_family) ?? "-" as os_family,
        $w(labels_from) ?? "-" as labels_from
);

$traffic_grouped = (
    select
        vsid,
        fielddate,
        streamPrj,
        label,
        abc_service_name,
        gogol_service,
        from_block,
        stream_block,
        os_family,
        labels_from,
        device,
        sum(bytes_sent) as bytes_sent
    from $traffic_map
    group by
        $w(vsid) ?? "-" as vsid,
        $w(label) ?? "-" as label,
        $w(streamPrj) ?? "-" as streamPrj,
        $w(from_block) ?? "-" as from_block,
        $w(stream_block) ?? "-" as stream_block,
        $w(device) ?? "-" as device,
        $w(abc_service_name) ?? "-" as abc_service_name,
        $w(fielddate) ?? "-" as fielddate,
        $w(gogol_service) ?? "-" as gogol_service,
        $w(os_family) ?? "-" as os_family,
        $w(labels_from) ?? "-" as labels_from
);

$final_join = (
    select
        fielddate,
        label,
        abc_service_name,
        gogol_service,
        from_block,
        streamPrj,
        stream_block,
        os_family,
        labels_from,
        device,
        count(distinct vsid) as vsids,
        $to_tb(sum(bytes_sent)) ?? 0.0 as tib,
        sum(tvt) ?? 0.0 as tvt
    from (
        select * from $traffic_grouped
        union all
        select * from $tvt_grouped
    )
    group by
        $w(streamPrj) ?? "-"    as streamPrj,
        $w(label) ?? "-" as label,
        $w(from_block) ?? "-" as from_block,
        $w(stream_block) ?? "-" as stream_block,
        $w(device) ?? "-" as device,
        $w(abc_service_name) ?? "-" as abc_service_name,
        $w(fielddate) ?? "-" as fielddate,
        $w(gogol_service) ?? "-" as gogol_service,
        $w(os_family) ?? "-" as os_family,
        $w(labels_from) ?? "-" as labels_from
);


insert into $dst_table with truncate
select * from $final_join;

