pragma yt.PoolTrees = "physical";
pragma yt.UseDefaultTentativePoolTrees;
pragma yt.Pool = "@[pool]";
pragma yt.InferSchema;
pragma yson.DisableStrict;

$date_from = "@[date_from]";
$date_to = "@[date_to]";
$output_table_raw = "@[output_table_raw]";
$output_table_grouped = "@[output_table_grouped]";

$countLvt = ($view_time) -> {
    RETURN IF(
        $view_time < 25, 0, Math::Log($view_time)
    )
};

$extract_video_info_new = ($inner_list_elem, $url) -> {
    $guid = Yson::LookupString($inner_list_elem, "uid") ?? "";
    $view_time = Yson::LookupDouble(
        $inner_list_elem, "total_played_duration"
    ) ?? 0.0;
    $duration = Yson::LookupDouble($inner_list_elem, "duration") ?? 0.0;
    $frame_url_candidate = Yson::LookupString(
        $inner_list_elem, "frame_url"
    );
    $frame_url = IF(
        $frame_url_candidate IS NOT NULL,
        $frame_url_candidate,
        $url
    );
    $buffer_infos = Yson::ConvertToList(Yson::YPath($inner_list_elem, "/buffer_queue_info"));
    $buffer_infos = ListFlatMap(
        $buffer_infos, ($x) -> {
            RETURN IF(
                Yson::LookupInt64($x, "size") is not null and Yson::LookupDouble($x, "duration") is not null,
                AsStruct(
                    unwrap(Yson::LookupInt64($x, "size")) as size,
                    unwrap(Yson::LookupDouble($x, "duration")) as duration
                ), NULL
            )
        }
    );
    $size = ListSum(ListMap($buffer_infos, ($x) -> {RETURN $x.size})) ?? 0;
    $duration = ListSum(ListMap($buffer_infos, ($x) -> {RETURN $x.duration})) ?? 0;
    $rebufferings = ListLength(Yson::ConvertToList(Yson::YPath($inner_list_elem, "/rebuffering_times"))) ?? 0;
    RETURN AsStruct(
        Url::GetHost($frame_url) as host,
        $frame_url as frame_url,
        $url as page_url,
        $guid as guid,
        $size as size,
        $duration as duration,
        $view_time as view_time,
        $buffer_infos as buffer_infos,
        $rebufferings as rebufferings
    )
};

$process_list_elem_new = ($elem) -> {
    $elem = Yson::ConvertToDict($elem);
    $url = Yson::ConvertToString($elem["url"]);
    RETURN ListMap(
        Yson::ConvertToList($elem["data"]),
        ($x) -> {RETURN $extract_video_info_new($x, $url)}
    )
};

$parseVc = ($decoded_vc, $function) -> {
    $parsed_vc = Yson::ParseJson($decoded_vc);
    $list = Yson::ConvertToList($parsed_vc);
    $processed = ListFlatMap($list, $function);
    RETURN $processed
};

$parseVcNew = ($decoded_vc) -> {
    RETURN $parseVc($decoded_vc, $process_list_elem_new)
};

$version_filter = ($version) -> {
    $version = ListMap(
        String::SplitToList($version, "."), ($x) -> {RETURN CAST($x AS Int32)}
    );
    $version = ListTake($version, 3);
    RETURN IF($version < AsList(72, 0, 3626), 1, 2)
};

$ensureParsing = ($s) -> {
    RETURN IF($s LIKE "?%", $s, "?" || $s)
};

-- pragma yt.DataSizePerJob = "512M";
$step1 = (
    select 
        $parseVcNew(Url::Decode(Url::GetCGIParam($ensureParsing(http_params), "decoded_vc"))) as parsed_vc,
        -- Url::Decode(Url::GetCGIParam($ensureParsing(http_params), "decoded_vc")) as decoded_vc_raw,
        yandexuid as user_id,
        ListReverse(String::SplitToList(TablePath(), "/"))[0] as fielddate,
        unixtime as `timestamp`
    from RANGE(`//logs/bar-navig-log/1d`, $date_from, $date_to)
    where http_params like '%decoded_vc=%'
    and http_params like '%frame_size%'
    and $version_filter(Url::GetCGIParam($ensureParsing(http_params), "ver")) == 2
    and yandexuid is not null
    and yandexuid != "-"
);

-- select * from $step1 limit 10

$step2 = (
    select
        user_id,
        parsed_vc,
        fielddate,
        `timestamp`
    from $step1
    flatten by parsed_vc
);

pragma yt.DefaultMemoryLimit = "2G";

$step3 = (
    select
        fielddate,
        user_id,
        `timestamp`,
        parsed_vc.guid as guid,
        parsed_vc.view_time as view_time,
        parsed_vc.host as host,
        parsed_vc.size as size,
        parsed_vc.duration as duration,
        parsed_vc.rebufferings as rebufferings,
        parsed_vc.buffer_infos as buffer_infos,
        parsed_vc.page_url as page_url,
        parsed_vc.frame_url as frame_url
    from $step2
);

$step4 = (
    select
        fielddate,
        user_id,
        guid,
        some(page_url) as page_url,
        some(frame_url) as frame_url,
        listsort(Aggregate_list(
            asstruct(
                view_time as view_time,
                rebufferings as rebufferings,
                host as host,
                size as size,
                duration as duration
            )
        ), ($x) -> {RETURN $x.view_time}) as event_list
    from $step3
    where guid is not null
    group by fielddate, user_id, guid
);

$step5 = (
    select
        fielddate,
        user_id,
        guid,
        frame_url,
        page_url,
        unwrap(unwrap(event_list[listlength(event_list)-1]).host) as host,
        unwrap(unwrap(event_list[listlength(event_list)-1]).size) as size,
        unwrap(unwrap(event_list[listlength(event_list)-1]).duration) as duration,
        unwrap(unwrap(event_list[listlength(event_list)-1]).rebufferings) as rebufferings,
        unwrap(unwrap(event_list[listlength(event_list)-1]).view_time) as view_time
    from $step4
    where listlength(event_list) > 0
    and unwrap(event_list[listlength(event_list)-1]).host is not null
    and unwrap(event_list[listlength(event_list)-1]).size is not null
    and unwrap(event_list[listlength(event_list)-1]).size > 0
    and unwrap(event_list[listlength(event_list)-1]).rebufferings is not null
    and unwrap(event_list[listlength(event_list)-1]).duration is not null
    and unwrap(event_list[listlength(event_list)-1]).duration > 0
    and unwrap(event_list[listlength(event_list)-1]).view_time is not null
    and unwrap(event_list[listlength(event_list)-1]).view_time > 0
);

$step6 = (
    select
        t.*,
        rebufferings / cast(view_time as double) as rebufferings_per_sec,
        view_time / cast(rebufferings + 1 as double) as mtbr,
        size / cast(duration as double) / 128.0 as abr_kbps
    from $step5 as t
);

insert into $output_table_raw WITH TRUNCATE
select * from $step6;

$grouped = (
    select
        fielddate,
        host,
        count(*) as sessions,
        sum(view_time) as tvt,
        sum(rebufferings) as total_rebufferings,
        percentile(rebufferings_per_sec, 0.75) as rebufferings_per_sec_p75,
        percentile(rebufferings_per_sec, 0.9) as rebufferings_per_sec_p90,
        percentile(rebufferings_per_sec, 0.95) as rebufferings_per_sec_p95,
        percentile(rebufferings_per_sec, 0.99) as rebufferings_per_sec_p99,
        percentile(mtbr, 0.1) as mtbr_p10,
        percentile(mtbr, 0.01) as mtbr_p01,
        percentile(mtbr, 0.5) as mtbr_p50,
        percentile(mtbr, 0.75) as mtbr_p75,
        percentile(mtbr, 0.90) as mtbr_p90,
        percentile(mtbr, 0.95) as mtbr_p95,
        percentile(mtbr, 0.99) as mtbr_p99,
        percentile(abr_kbps, 0.01) as abr_p01,
        percentile(abr_kbps, 0.05) as abr_p05,
        percentile(abr_kbps, 0.1) as abr_p10,
        percentile(abr_kbps, 0.25) as abr_p25,
        percentile(abr_kbps, 0.5) as abr_p50,
        percentile(abr_kbps, 0.9) as abr_p90,
        percentile(abr_kbps, 0.99) as abr_p99
    from $step6
    group by fielddate, host
);

$grouped_after = (
    select
        *
    from $grouped
    where sessions >= 2000 or host like '%netflix%' or host like '%ivi%'
);

insert into $output_table_grouped WITH TRUNCATE
select * from $grouped_after;