--- variables
$pool = "@pool";
$input_table = "@input_table";
$errors_input_table = "@errors_input_table";
$iron_branch = "//home/videolog/strm_meta/iron_branch/concat";
$output_table = "@output_table";
---

use hahn;
pragma yt.Pool = $pool;
pragma yt.PoolTrees = "physical";
pragma yt.TentativePoolTrees = "cloud";
pragma SimpleColumns;

$getYandexuid = ($x) -> {
    RETURN Dsv::Parse($x, "; "){"yandexuid"}
};

$getRegion = ($ip) -> {
    $country = Geo::RoundRegionByIp($ip, "country");
    $region = Geo::RoundRegionByIp($ip, "region");
    RETURN CAST(IF(
        $country.short_en_name == "RU", $region.id, $country.id
    ) AS String)
};

$vsid_capture = Re2::Capture(".*vsid(=|%253D|%3D)([0-9a-f]+).*");

$ornull = ($x, $y) -> {
    RETURN IF($x is not null, $x, $y)
};

$getVsidWrapper = ($x, $y) -> {
    RETURN $ornull(
        $vsid_capture($x)._2,
        $vsid_capture($y)._2
    )
};

$request_capture = Re2::Capture("^/dvr/(.+?)/.*-([0-9]+)");
$request_kal_capture = Re2::Capture("^/kal/(.+?)/.+-([0-9]+)-");
$request_vod1_capture = Re2::Capture("vh-(.+?)-converted");
$request_vod2_capture = Re2::Capture("/(.+?)-vod/");

$channelFromRequest = ($request) -> {
    $channel = CASE
    WHEN $request LIKE "%cdn1tvru%" THEN "1tv"
    WHEN $request_capture($request)._0 IS NOT NULL THEN (
        $request_capture($request)._1
    )
    WHEN $request_kal_capture($request)._0 IS NOT NULL THEN (
        $request_kal_capture($request)._1
    )
    WHEN $request_vod1_capture($request)._0 IS NOT NULL THEN (
        $request_vod1_capture($request)._1
    )
    WHEN $request_vod2_capture($request)._0 IS NOT NULL THEN (
        $request_vod2_capture($request)._1
    )
    ELSE NULL
    END;
    RETURN IF(
        $channel IS NOT NULL AND $channel LIKE "%/%",
        ListReverse(String::SplitToList($channel, "/")){0},
        $channel
    )
};

$viewTypeFromRequest = ($request) -> {
    RETURN CASE
    WHEN $request LIKE "%cdn1tvru/live%" THEN "live"
    WHEN $request LIKE "%cdn1tvru/vod%" THEN "vod"
    WHEN (
        $request_vod1_capture($request)._0 IS NOT NULL
        OR $request_vod2_capture($request)._0 IS NOT NULL
    ) THEN "vod"
    ELSE NULL
    END
};

$tsFromRequest = ($request) -> {
    RETURN CASE
    WHEN $request_capture($request)._0 IS NOT NULL THEN (
        CAST($request_capture($request)._2 As UInt64) / 1000
    )
    WHEN $request_kal_capture($request)._0 IS NOT NULL THEN (
        CAST($request_kal_capture($request)._2 As UInt64) * 5
    )
    ELSE NULL
    END
};

$ok = ($x) -> {
    RETURN NOT ($x IS NULL OR $x == "")
};

$getCategoryId = ($request, $referer) -> {
    RETURN CASE
    WHEN $ok(
        Url::GetCGIParam($request, "video_category_id")
    ) THEN Url::GetCGIParam($request, "video_category_id")
    WHEN $ok(
        Url::GetCGIParam($referer, "category")
    ) THEN Url::GetCGIParam($referer, "category")
    ELSE NULL
    END
};

$getStreamUrl = ($referer) -> {
    RETURN Url::Decode(Url::GetCGIParam($referer, "stream_url"))
};

$getSrcUrl = ($request) -> {
    RETURN Url::Decode(Url::GetCGIParam($request, "src"))
};

$getAdditionalParam = ($referer, $param) -> {
    $ap = Yson::ParseJson(
        Url::Decode(Url::GetCGIParam($referer, "additional_params"))
    );
    RETURN Yson::LookupString($ap, $param)
};

$getMqUrl = ($referer) -> {
    RETURN Url::Decode(Url::GetCGIParam($referer, "mq_url"))
};

$parserWrapper = ($request, $referer, $param) -> {
    RETURN CASE
    WHEN $ok(
        Url::GetCGIParam($request, $param)
    ) THEN Url::GetCGIParam($request, $param)
    WHEN $ok(
        Url::GetCGIParam($referer, $param)
    ) THEN Url::GetCGIParam($referer, $param)
    WHEN $ok(
        Url::GetCGIParam($getStreamUrl($referer), $param)
    ) THEN Url::GetCGIParam($getStreamUrl($referer), $param)
    WHEN $ok(
        $getAdditionalParam($referer, $param)
    ) THEN $getAdditionalParam($referer, $param)
    WHEN $ok(
        Url::GetCGIParam($getMqUrl($referer), $param)
    ) THEN Url::GetCGIParam($getMqUrl($referer), $param)
    WHEN $ok(
        Url::GetCGIParam($getSrcUrl($request), $param)
    ) THEN Url::GetCGIParam($getSrcUrl($request), $param)
    ELSE NULL
    END
};

$getHash = ($x) -> {
    RETURN Digest::Md5Hex($x || "e0440ebc0786e3d2cff6ef51319bc226")
};

$ornull = ($x, $y) -> {
    RETURN IF($x is not null, $x, $y)
};

--- for errors only

$getDetails = ($request) -> {
    RETURN Url::Decode(Url::GetCGIParam($request, "details"))
};

$getStalled = ($event_id, $request) -> {
    $reason = Yson::LookupString(
        Yson::ParseJson($getDetails($request)), "reason"
    );
    RETURN IF(
        $ok($reason),
        "Stalled_" || $reason,
        "Stalled"
    )
};

$ifFatal = ($error_id, $fatal) -> {
    RETURN IF(
        $fatal, $error_id || "_fatal", $error_id
    )
};

$getErrorId = ($request) -> {
    $fatal = Url::GetCGIParam($request, "fatal") == "true";
    $event_id = Url::GetCGIParam($request, "event_id");
    $error_id = Url::GetCGIParam($request, "error_id");
    RETURN CASE
    WHEN $event_id IS NOT NULL AND ListHas(
        AsList(
            'Buffer.Empty', 'InvalidFragDuration',
            'RecoverStreamError', 'BufferEmptyCritical',
            'Buffer.Empty.Critical'
        ), $event_id
    ) THEN $event_id
    WHEN $event_id IS NOT NULL AND ListHas(
        AsList(
            "PlayerStalled", "PlayedStalled", "Stalled"
        ), $event_id
    ) THEN $getStalled($event_id, $request)
    WHEN $error_id IS NOT NULL THEN $ifFatal($error_id, $fatal)
    ELSE NULL
    END
};

$getErrorVsid = ($request, $referer) -> {
    RETURN CASE
    WHEN $ok(
        Url::GetCGIParam($request, "vsid")
    ) THEN Url::GetCGIParam($request, "vsid")
    WHEN $ok(
        Url::GetCGIParam($referer, "vsid")
    ) THEN Url::GetCGIParam($referer, "vsid")
    WHEN $ok(
        Url::GetCGIParam($getSrcUrl($request), "vsid")
    ) THEN Url::GetCGIParam($getSrcUrl($request), "vsid")
    WHEN $ok(
        Url::GetCGIParam($getStreamUrl($referer), "vsid")
    ) THEN Url::GetCGIParam($getStreamUrl($referer), "vsid")
    WHEN $ok(
        $getAdditionalParam($referer, "vsid")
    ) THEN $getAdditionalParam($referer, "vsid")
    WHEN $ok(
        Url::GetCGIParam($getMqUrl($referer), "vsid")
    ) THEN Url::GetCGIParam($getMqUrl($referer), "vsid")
    WHEN $ok(
        Url::GetCGIParam($getMqUrl($referer), "vsid")
    ) THEN Url::GetCGIParam($getMqUrl($referer), "vsid")
    ELSE NULL
    END
};

$common_filter = ($request, $status) -> {
    RETURN LENGTH($ornull($request, "")) > 0 AND
    ($status == "200" or $status == "206") AND
    $request NOT LIKE "%PlayerAlive%" AND
    $request NOT LIKE '%for-regional-cache=1%'AND
    $request NOT LIKE '%monitoring=1%' AND
    $request NOT LIKE '%hide_stub=1%' AND
    $request NOT LIKE '%dvrpy=1%' AND
    $request NOT LIKE '%/vh-bsvideo-converted/%'
};

$getExtension = ($x) -> {
    $x = String::SplitToList($x, "?");
    $x = $x{0};
    $x = String::SplitToList($x, ".");
    $x = ListReverse($x){0};
    RETURN $x
};

$chunks = (
    SELECT
    `timestamp`,
    $getYandexuid(cookies) as yandexuid_from_cookies,
    ip,
    request,
    referer,
    user_agent,
    CAST(_other{"tcpinfo_total_retrans"} AS Uint64) as tcpinfo_total_retrans,
    CAST(bytes_sent AS Uint64) as bytes_sent,
    $channelFromRequest(request) as channel_from_request,
    $viewTypeFromRequest(request) as view_type_from_request,
    $tsFromRequest(request) as ts_from_request,
    $getCategoryId(request, referer) as category_id,
    $parserWrapper(request, referer, "vsid") as vsid,
    $parserWrapper(request, referer, "from") as ref_from,
    $parserWrapper(request, referer, "from_block") as ref_from_block,
    $parserWrapper(request, referer, "source") as ref_source,
    $parserWrapper(request, referer, "slots") as slots,
    $parserWrapper(request, referer, "reqid") as ref_reqid,
    $parserWrapper(request, referer, "yandexuid") as ref_yandexuid,
    $parserWrapper(request, referer, "hash") as ref_yandexuid_hash,
    $parserWrapper(request, referer, "partner_id") as ref_partner_id,
    $parserWrapper(request, referer, "video_content_id") as video_content_id,
    Url::Decode($parserWrapper(request, referer, "video_content_name")) as ref_video_content_name
FROM $input_table
WHERE
    $common_filter(request, status) AND
    $ok($channelFromRequest(request)) AND
    ListHas(AsList("ts", "mp4"), $getExtension(request)) AND
    request NOT LIKE '/log%' AND
    LENGTH(
        $ornull($parserWrapper(request, referer, "vsid"), "")
    ) == 64
);

$errors = (
    SELECT
        $parserWrapper(request, @error_referer, "video_content_id") as video_content_id,
        $getErrorVsid(request, @error_referer) as vsid,
        AsStruct(
            $getDetails(request) as error_details,
            $getErrorId(request) as error_id,
            Url::GetCGIParam(request, "fatal") as is_fatal,
            `timestamp` as `timestamp`,
            $parserWrapper(request, @error_referer, "video_content_id") as video_content_id
        ) as error_data
    FROM $errors_input_table
    WHERE
        $common_filter(request, status) AND
        request LIKE '/log%' AND
        $ok($getErrorVsid(request, @error_referer)) AND
        $ok($getErrorId(request))
);

$chunks_reduce = (
    SELECT
        ref_from,
        vsid,
        video_content_id,
        CAST(MIN(`timestamp`) AS UInt64) as start_ts,
        CAST(MAX(`timestamp`) AS UInt64) as end_ts,
        CAST(MIN(ts_from_request) AS UInt64) as start_ts_from_request,
        CAST(MAX(ts_from_request) AS UInt64) as end_ts_from_request,
        SUM(tcpinfo_total_retrans) as tcpinfo_total_retrans,
        SUM(bytes_sent) as bytes_sent,
        COUNT(*) as chunks_count,
        TOPFREQ(yandexuid_from_cookies){0}.Value AS yandexuid_from_cookies,
        TOPFREQ(ip){0}.Value AS ip,
        $getRegion(TOPFREQ(ip){0}.Value) AS reg,
        TOPFREQ(channel_from_request){0}.Value AS channel_from_request,
        TOPFREQ(view_type_from_request){0}.Value AS view_type_from_request,
        TOPFREQ(category_id){0}.Value  AS category_id,
        TOPFREQ(ref_from_block){0}.Value AS ref_from_block,
        TOPFREQ(ref_source){0}.Value AS ref_source,
        TOPFREQ(slots){0}.Value AS slots,
        TOPFREQ(ref_reqid){0}.Value AS ref_reqid,
        TOPFREQ(ref_yandexuid){0}.Value AS ref_yandexuid,
        TOPFREQ(ref_yandexuid_hash){0}.Value AS ref_yandexuid_hash,
        TOPFREQ(ref_partner_id){0}.Value AS ref_partner_id,
        TOPFREQ(ref_video_content_name){0}.Value AS ref_video_content_name,
        TOPFREQ(user_agent){0}.Value as user_agent,
        UserAgent::Parse(TOPFREQ(user_agent){0}.Value).BrowserName AS browser_name,
        UserAgent::Parse(TOPFREQ(user_agent){0}.Value).BrowserVersion AS browser_version,
        UserAgent::Parse(TOPFREQ(user_agent){0}.Value).OSFamily AS os_family
    FROM $chunks
    GROUP BY ref_from, vsid, video_content_id
);

$errors_reduce = (
    SELECT
        vsid, video_content_id,
        LIST(error_data) AS errors_all
    FROM $errors
    GROUP BY vsid, video_content_id
    HAVING COUNT(*) <= 2000
);

$joined = (
    SELECT
        computed_channel as computed_channel,
        computed_program as computed_program,
        channel_type as channel_type,
        path as path,
        c.*,
        e.* WITHOUT e.vsid, e.video_content_id
    FROM $chunks_reduce as c
    LEFT JOIN $errors_reduce as e ON (
        c.vsid == e.vsid AND
        c.video_content_id == e.video_content_id
    )
    LEFT JOIN $iron_branch as i ON (
        c.video_content_id == i.JoinKey
    )
);

insert into $output_table with truncate
select * from $joined;