use hahn;
pragma yt.Pool = "@pool";
pragma yt.PoolTrees = "physical";
pragma yt.TentativePoolTrees = "cloud";
pragma SimpleColumns;

$ornull = ($x, $y) -> {
    RETURN IF($x is not null, $x, $y)
};

$orempty = ($x, $y) -> {
    RETURN IF($x != "", $x, "-")
};

$pv_capture = Re2::Capture("1\.0-`0-9`{3}");

$extract_pv = ($add_info) -> {
    RETURN $pv_capture(
        Yson::LookupString($add_info, "referer")
    )._0 ?? $pv_capture(
        Yson::LookupString($add_info, "request")
    )._0
};

$process_player_version = ($pv, $add_info) -> {
    RETURN IF(
        $pv IS NOT NULL AND $pv != "-", $pv, $extract_pv($add_info) ?? "-"
    )
};


--@ref_from


$tmp1 = (
    select
        vsid,
        `timestamp`,
        if(event == "error", error_id, event) as event,
        ListReverse(String::SplitToList(TablePath(), "/"))[1] as fielddate,
        provider,
        add_info,
        browser_name as browser,
        $process_player_version(player_version, add_info),
        country,
        os_family
    from RANGE(
        `cubes/video-strm`,
        `@date_from`, `@date_to`, `preprocessed`
    )
    where
        event != "playlist" and
        event != "chunk" and
        event != "create_player"
);

$conn_quality_1 = (
    select
        fielddate,
        vsid,
        $ornull(
            Yson::ConvertToString(
                Yson::YPath(add_info, "/error_details/connection")
            ), "-"
        ) as connection
    from $tmp1
);

$conn_quality_2 = (
    select
        fielddate,
        vsid,
        TOPFREQ(connection, 1)[0].Value as connection
    from $conn_quality_1
    where connection == "OK" or connection == "SLOW"
    group by fielddate, vsid
);

$tmp2 = (
    select
        fielddate,
        $ornull(provider, "-") as provider,
        $ornull(browser, "-") as browser,
        $ornull(country, "-") as country,
        $ornull(os_family, "-") as os_family,
        -- $ornull(player_version, "-") as player_version,
        vsid,
        lead(event) over w as next_event,
        event,
        lag(event, 1) over w as event1
        -- lag(event, 2) over w as event2,
        -- lag(event, 3) over w as event3,
        -- lag(event, 4) over w as event4,
        -- lag(event, 5) over w as event5
    from $tmp1 as t
    -- left join $conn_quality_2 as c
    -- using (fielddate, vsid)
    window w as (
        partition by fielddate, vsid
        order by `timestamp`
    )
);

$has_heartbeat = ($x) -> {
    $x = Yson::ConvertToDict($x);
    RETURN CAST(ListLength(DictKeys($x)) > 0 As Int64)
};

-- $vsid_to_ref_from_tmp = (
--     select
--         ListReverse(String::SplitToList(TablePath(), "/"))[1] as fielddate,
--         vsid,
--         $ref_from_preprocess(ref_from) as ref_from,
--         player_version as player_version,
--         $has_heartbeat(heartbeats) as has_heartbeat
--     from RANGE(
--         `cubes/video-strm`,
--         `@date_from`, `@date_to`, `sessions`
--     )
-- );

$vsid_to_ref_from_tmp = (
    select
        ListReverse(String::SplitToList(TablePath(), "/"))[1] as fielddate,
        vsid,
        ref_from,
        player_version,
        IF(Yson::LookupString(add_info, "sources_aggr") LIKE "%heartbeat%", 1, 0) as has_heartbeat
    from RANGE(
        `cubes/video-strm`,
        `@date_from`, `@date_to`, `sessions`
    )
    where stream_block not like "%initial%"
);

$getRefFromValue = ($x) -> {
    $x = ListMap($x, ($y) -> {RETURN IF($y == "-" OR $y == "", NULL, $y)});
    $x = ListMap($x, $ref_from_preprocess);
    RETURN CAST(
        IF(ListLength($x) == 1, $x[0], "multiple") ?? "multiple" AS String
    )
};

$getValue = ($x) -> {
    $x = ListMap($x, ($y) -> {RETURN IF($y == "-", NULL, $y)});
    RETURN CAST(
        IF(ListLength($x) == 1, $x[0], "multiple") AS String
    ) ?? "multiple"
};

$rppw = ($x) -> {
    $prep = $ref_from_preprocess($x);
    RETURN IF($prep == "other", NULL, $prep)
};

$vsid_to_ref_from = (
    select
        fielddate,
        vsid,
        ListMax(
            ListFlatMap(AGGREGATE_LIST_DISTINCT(ref_from), $rppw)
        ) ?? "other" as ref_from,
        MAX(player_version) as player_version,
        MAX(has_heartbeat) as has_heartbeat
    from $vsid_to_ref_from_tmp
    group by fielddate, vsid
);

-- insert into `home/videolog/tmp/ref_froms` with truncate
-- select * from $vsid_to_ref_from;

$tmp2a = (
    select
        t.fielddate as fielddate,
        $orempty(provider, "-") as provider,
        $orempty(browser, "-") as browser,
        $orempty(country, "-") as country,
        $orempty(os_family, "-") as os_family,
        v.player_version ?? "-" as player_version,
        t.vsid as vsid,
        v.ref_from as ref_from,
        IF(v.has_heartbeat == 1, "true", "false") as has_heartbeat,
        connection ?? "-" as connection,
        event,
        event1
    from $tmp2 as t
    inner join $vsid_to_ref_from as v
    on (t.fielddate == v.fielddate and t.vsid == v.vsid)
    left join $conn_quality_2 as c
    on (t.fielddate == c.fielddate and t.vsid == c.vsid)
    where next_event is null
);

insert into `@by_vsid_table` with truncate
select * from $tmp2a;

$tmp3 = (
    select
        fielddate,
        ref_from,
        provider,
        browser,
        country,
        os_family,
        player_version,
        connection,
        has_heartbeat,
        event as last_event,
        event1 as penultimate_event,
        COUNT(*) as `count`
    from $tmp2a
    group by fielddate, ref_from, provider, browser, country, os_family, player_version, connection, event, event1, has_heartbeat
);

insert into `@output_table` with truncate
select * from $tmp3
order by `count` desc