pragma yt.PoolTrees = "physical";
pragma yt.UseDefaultTentativePoolTrees;
pragma yt.Pool = "@[pool]";
pragma yson.DisableStrict;

$date_from = "@[date_from]";
$date_to = "@[date_to]";
$output_table_raw = "@[output_table_raw]";
$output_table_grouped = "@[output_table_grouped]";

$abr = Python::get_abr(
    ParseType("(Json?)->Double?"), @@
import json

def get_abr(s):
    try:
        obj = json.loads(json.loads(s))
    except (TypeError, ValueError):
        return
    duration = 0
    size = 0
    for element in obj:
        try:
            duration += element["duration"]
            size += element["size"]
        except KeyError:
            continue
    if duration:
        return size / duration / 128.0
@@
);

$getRebufferings = ($row) -> {
    $ev = Yson::ParseJson($row.EventValue);
    $pl = CAST(Yson::LookupString($ev, "play length") as Double);
    $rebs = Yson::ConvertToList(Yson::ParseJson(Yson::LookupString($ev, "rebufferings")));
    $buffers = Yson::SerializeJson(Yson::YPath($ev, "/buffers"));
    $frame_url = Url::GetHost(Yson::LookupString($ev, "frame url"));
    RETURN ExpandStruct(
        $row,
        $pl as play_length,
        $buffers as buffers,
        $frame_url as host,
        $abr($buffers) as abr_kbps,
        ListLength($rebs) as rebs_len,
        $pl / CAST(ListLength($rebs) + 1 as Double) as mtbr,
        ListLength($rebs) / $pl as rebufferings_per_sec
    )
};

$preselect = (
    select
        TableName() as fielddate,
        EventValue
    from range(
        `//home/logfeller/logs/browser-metrika-mobile-log/1d`, $date_from, $date_to
    )
    where EventName == "video statistics"
    and String::Contains(EventValue, "rebufferings")
    union all
    select
        TableName() as fielddate,
        EventValue
    from range(
        `//home/logfeller/logs/superapp-metrika-mobile-log/1d`, $date_from, $date_to
    )
    where EventName == "video statistics"
    and String::Contains(EventValue, "rebufferings")
);

$process = (
    process $preselect using $getRebufferings(TableRow())
);

$postselect = (
    select * from $process
    where host != "incognito"
    and play_length > 0
);

insert into $output_table_raw WITH TRUNCATE
select * from $postselect;

$grouped = (
    select
        fielddate,
        host,
        count(*) as sessions,
        sum(play_length) as tvt,
        sum(rebs_len) as total_rebufferings,
        percentile(rebufferings_per_sec, 0.75) as rebufferings_per_sec_p75,
        percentile(rebufferings_per_sec, 0.9) as rebufferings_per_sec_p90,
        percentile(rebufferings_per_sec, 0.95) as rebufferings_per_sec_p95,
        percentile(rebufferings_per_sec, 0.99) as rebufferings_per_sec_p99,
        percentile(mtbr, 0.01) as mtbr_p01,
        percentile(mtbr, 0.1) as mtbr_p10,
        percentile(mtbr, 0.5) as mtbr_p50,
        percentile(mtbr, 0.75) as mtbr_p75,
        percentile(mtbr, 0.90) as mtbr_p90,
        percentile(mtbr, 0.95) as mtbr_p95,
        percentile(mtbr, 0.99) as mtbr_p99,
        percentile(abr_kbps, 0.01) as abr_p01,
        percentile(abr_kbps, 0.05) as abr_p05,
        percentile(abr_kbps, 0.1) as abr_p10,
        percentile(abr_kbps, 0.25) as abr_p25,
        percentile(abr_kbps, 0.5) as abr_p50,
        percentile(abr_kbps, 0.9) as abr_p90,
        percentile(abr_kbps, 0.99) as abr_p99
    from $postselect
    group by fielddate, host
);

$grouped_after = (
    select * from $grouped
    where sessions >= 100000 or host like '%netflix%' or host like '%ivi%'
);

insert into $output_table_grouped WITH TRUNCATE
select * from $grouped_after