use hahn;
pragma yt.Pool = "@[pool]";
pragma yt.PoolTrees = "physical";
pragma yt.UseDefaultTentativePoolTrees;
pragma yt.InferSchema;

$date_from = "@[date_from]";
$date_to = "@[date_to]";
$suffix = IF(
    $date_from == $date_to,
    $date_from,
    $date_from || "_" || $date_to
);
$root = "//home/videoquality/vh_analytics/strm_perf_dash/" || $suffix;
$output_table = $root || "/strm_perf_joined";

$common_filter = ($request) -> {
    RETURN $request is not null
    AND LENGTH($request) > 0
    AND LENGTH($request) < 500
    AND NOT String::Contains($request, "PlayerAlive")
    AND NOT String::Contains($request, "for-regional-cache=1")
    AND NOT String::Contains($request, "monitoring=1")
    AND NOT String::Contains($request, "hide_stub=1")
    AND NOT String::Contains($request, "dvrpy=1")
};

$parseSignedUrl = ($url) -> {
    $url = SUBSTRING($url, 0, FIND($url, "?"));
    $sp = ListFilter(String::SplitToList($url, "/"), ($x) -> {RETURN FIND($x, "=") IS NOT NULL});
    RETURN ToDict(ListMap($sp, ($x) -> {
        RETURN AsTuple(
            unwrap(String::SplitToList($x, "=")[0]),
            unwrap(String::SplitToList($x, "=")[1])
        )
    }));
};

$re_audio_chunk = Re2::Grep("-a[0-9].+(ts|m4s)");
$re_video_chunk = Re2::Grep("-v[0-9].+(ts|m4s)");

$getQuality = ($request) -> {
    $parsedSigned = $parseSignedUrl($request);
    RETURN CASE
    WHEN $parsedSigned["res"] is not null then unwrap($parsedSigned["res"])
    WHEN String::Contains($request, "audio") THEN "audio"
    WHEN $re_audio_chunk($request) AND NOT $re_video_chunk($request) THEN "audio"
    WHEN String::Contains($request, "720p") THEN "720"
    WHEN String::Contains($request, "1080p") THEN "1080"
    WHEN String::Contains($request, "360p") THEN "360"
    WHEN String::Contains($request, "540p") THEN "540"
    WHEN String::Contains($request, "480p") THEN "480"
    WHEN String::Contains($request, "240p") THEN "240"
    WHEN String::Contains($request, "576p") THEN "576"
    WHEN String::Contains($request, "432p") THEN "432"
    WHEN String::Contains($request, "408p") THEN "408"
    WHEN String::Contains($request, "504p") THEN "504"
    WHEN String::Contains($request, "270p") THEN "270"
    WHEN String::Contains($request, "600p") THEN "600"
    WHEN String::Contains($request, "144p") THEN "144"
    ELSE NULL
    END
};

$getData = ($request) -> {
    $parsed = Url::Parse($request);
    $path = $parsed.Path;
    $chunkType = CASE
    WHEN String::Contains($path, ".ts") THEN "HLS"
    WHEN  String::Contains($path, ".m4s") THEN "DASH/MSS"
    ELSE NULL
    END;
    $live = String::Contains($path, "/kal/");
    $quality = $getQuality($path);
    RETURN AsStruct(
        $chunkType as chunk_type,
        IF($live, "live", "vod") as view_type,
        $quality as quality
    )
};

$getDeviceType = ($parsed) -> {
    RETURN CASE
    WHEN $parsed.isTV THEN "tv"
    WHEN $parsed.isMobile THEN "mobile"
    ELSE "desktop"
    END
};

$strm_map = (
    select
        ip,
        request,
        max(request_id) as request_id_strm,
        max(cast(`timestamp` as UInt64)) as `timestamp`,
        max(user_agent) as user_agent,
        UserAgent::Parse(max(user_agent)).OSFamily as os_family,
        UserAgent::Parse(max(user_agent)).BrowserName as browser_name,
        $getDeviceType(UserAgent::Parse(max(user_agent))) as device_type,
        max(chunk_type) as chunk_type,
        max(view_type) as view_type,
        max(quality) as quality,
        max(country) as country,
        max(isp) as isp,
        max(tcpinfo_rtt) as tcpinfo_rtt,
        max(tcpinfo_rttvar) as tcpinfo_rttvar,
        max(tcpinfo_snd_cwnd) as tcpinfo_snd_cwnd,
        max(tcpinfo_total_retrans) as tcpinfo_total_retrans
    from (
        select
            ip,
            `timestamp`,
            request_id,
            request,
            $getData("https://strm.yandex.ru" || request).chunk_type as chunk_type,
            $getData("https://strm.yandex.ru" || request).view_type as view_type,
            $getData("https://strm.yandex.ru" || request).quality as quality,
            Geo::RoundRegionByIp(ip, "country").short_en_name as country,
            Geo::GetIspNameByIp(ip) as isp,
            user_agent,
            CAST(tcpinfo_rtt as Double) as tcpinfo_rtt,
            CAST(tcpinfo_rttvar as Double) as tcpinfo_rttvar,
            CAST(tcpinfo_snd_cwnd as Double) as tcpinfo_snd_cwnd,
            IF(
                CAST(tcpinfo_total_retrans as Double) > 400000000,
                NULL,
                CAST(tcpinfo_total_retrans as Double)
            ) as tcpinfo_total_retrans
        from range(
            `logs/strm-access-log/1d`,
            $date_from, $date_to
        )
        where (status == "200" or status == "206")
        and $common_filter(request)
    )
    where request is not null
    and chunk_type is not null
    and quality is not null
    and country is not null
    and isp is not null
    group by request, ip
    having count(*) < 1000
);

$getPerfRequest = ($url) -> {
    $parsed = Url::Parse($url);
    RETURN IF(
        $parsed.Query is not null,
        $parsed.Path || "?" || $parsed.Query,
        $parsed.Path
    )
};

$perf_map = (
    select
        ip,
        request,
        max(request_id) as request_id_perf,
        max(transferSize) as transferSize,
        max(duration) as duration,
        min(throughput) as throughput
    from (
        select
            WeakField(remote_addr, "String") ?? WeakField(remote_ip, "String") as ip,
            request_id,
            $getPerfRequest(request) as request,
            CAST(responseEnd as Double) - cast(requestStart as Double) as duration,
            cast(transferSize as Double) as transferSize,
            (
                CAST(transferSize as Double) / (
                    CAST(responseEnd as Double) - cast(requestStart as Double)
                )
            ) * 8.0 as throughput
        from range(
            `logs/strm-perf-log/1d`,
            $date_from,
            $date_to
        )
        where $common_filter(request)
    )
    group by ip, request
    having count(*) < 1000
);

$joined = (
    select
        *
    from $strm_map as s
    inner join $perf_map as p
    using (ip, request)
);

insert into $output_table WITH TRUNCATE
select * from $joined;
