use hahn;
pragma AnsiInForEmptyOrNullableItemsCollections;
pragma yson.DisableStrict;
pragma yt.Pool = "@[pool]";

$date_from = "@[date_from]";
$date_to = "@[date_to]";
$output_root = "@[output_root]";
$output_table_tmp = $output_root || "/tmp/" || $date_from || "_" || $date_to;
$output_table_stat = $output_root || "/stat/" || $date_from || "_" || $date_to;

$nullIfBad = ($val)->(IF(
    $val in ("", "other", "unknown"),
    null,
    $val
));

$tmp = (
    SELECT
        TableName() as fielddate,
        labels_from,
        $nullIfBad(labels_from) as n_labels_from,
        streamUrl,
        Url::GetCGIParam(streamUrl, "from") as streamUrl_from,
        $nullIfBad(Url::GetCGIParam(streamUrl, "from")) as n_streamUrl_from,
        referrer,
        topReferrer,
        location,
        topLocation,
        vsid,
        -- eventName
    from range(
        `logs/strm-gogol-log/1d`, $date_from, $date_to
    )
    where service == "StreamPlayer" and vsid is not null and eventName in ("CreatePlayer", "SetSource")
);

$grouped = (
    select
        fielddate,
        vsid,
        MAX(n_labels_from) ?? MAX(n_streamUrl_from) as best_possible_ref_from,
        MAX(labels_from) as labels_from,
        MAX(streamUrl_from) as streamUrl_from,
        COUNT_IF(n_streamUrl_from is null) ?? 0 as bad_events,
        MAX(streamUrl) as streamUrl,
        MAX(referrer) as referrer,
        MAX(topReferrer) as topReferrer,
        MAX(location) as location,
        MAX(topLocation) as topLocation
    from $tmp
    group by fielddate, vsid
);

insert into $output_table_tmp with truncate
select * from $grouped where best_possible_ref_from is null;

insert into $output_table_stat with truncate
select
    fielddate,
    count(*) as total_vsids,
    count_if(best_possible_ref_from is null) as bad_vsids,
    count_if(bad_events > 0) as vsids_with_bad_stream_url
from $grouped
group by fielddate;
