DECLARE $src_folder AS String;
DECLARE $dst_folder AS String;

PRAGMA yt.Pool = "strm";
PRAGMA yt.PoolTrees = "physical";
PRAGMA yt.UseDefaultTentativePoolTrees;

-- first get tables that we don't handled yet
$table_basename = ($path) -> (SUBSTRING($path, RFIND($path, "/") + 1u));
$tablename_to_datetime = ($tablename) -> (
    DateTime::MakeDatetime(
        DateTime::ParseIso8601($tablename || "+03:00")
    )
);

$tables_diff = (
    SELECT ListSort(AGGREGATE_LIST($table_basename(l.Path)))
    FROM FOLDER($src_folder) AS l
    LEFT ONLY JOIN FOLDER($dst_folder) AS r ON ($table_basename(l.Path) = $table_basename(r.Path))
    WHERE
        l.Type = "table"
        AND $tablename_to_datetime($table_basename(l.Path)) >= TzDatetime("2021-08-24T00:00:00,Europe/Moscow")
        AND $tablename_to_datetime($table_basename(l.Path))
            BETWEEN CurrentUtcDatetime() - Interval("P7D")
                AND CurrentUtcDatetime() - Interval("PT6H")
);

SELECT $tables_diff;

-- then transform and save tables
$get_datetime = ($timestamp) -> (
    DateTime::MakeDatetime(
        DateTime::FromSeconds(CAST($timestamp AS Uint32)),
    )
);
$timestamp_to_unixtime = ($timestamp) -> (DateTime::ToSeconds($timestamp));

$make_billing_struct = (
    $schema,
    $version,
    $type,
    $abc_id,
    $write_time,
    $start_time,
    $finish_time,
    $quantity,
    $unit
) -> (
    AsStruct(
        BITCAST(EnsureConvertibleTo($abc_id, Int32) AS Int32) AS abc_id,
        CAST(RandomUuid($type, $abc_id, $start_time, $finish_time, $quantity, $unit) AS String) AS id,
        $version AS version,
        $schema AS schema,
        "yt" AS source_id,
        $timestamp_to_unixtime($write_time) AS source_wt,
        AsStruct(
            EnsureType($type, String) AS type
        ) AS tags,
        AsStruct(
            $timestamp_to_unixtime($start_time) AS start,
            $timestamp_to_unixtime($finish_time) AS finish,
            BITCAST(EnsureConvertibleTo($quantity, Uint64) AS Uint64) AS quantity,
            "delta" AS type,
            EnsureType($unit, String) AS unit
        ) AS usage
    )
);

$make_billing_struct_traffic_v1 = (
    $abc_id,
    $write_time,
    $start_time,
    $finish_time,
    $quantity,
) -> (
    $make_billing_struct(
        "strm.cdn.traffic.v1",
        "v1alpha1",
        "strm.cdn.traffic.egress.edge",
        $abc_id,
        $write_time,
        $start_time,
        $finish_time,
        $quantity,
        "byte"
    )
);

$make_billing_struct_traffic_v2 = (
    $tariff,
    $abc_id,
    $write_time,
    $start_time,
    $finish_time,
    $quantity,
) -> (
    $make_billing_struct(
        "strm.cdn.traffic.v1",
        "v2",
        "strm.cdn.traffic.egress." || $tariff,
        $abc_id,
        $write_time,
        $start_time,
        $finish_time,
        $quantity,
        "byte"
    )
);

$make_billing_struct_playlist_v1 = (
    $abc_id,
    $write_time,
    $start_time,
    $finish_time,
    $quantity,
) -> (
    $make_billing_struct(
        "strm.cdn.requests.v1",
        "v1",
        "strm.cdn.requests.playlist",
        $abc_id,
        $write_time,
        $start_time,
        $finish_time,
        $quantity,
        "request"
    )
);

$dc_locations = ["man", "sas", "vla", "myt", "iva", "vlx", "klg"];
$re_nocache = Re2::Grep(@@/ysign\d[^/]+,no_cache=1,@@);

$re_playlist = Re2::Grep(@@-(m3u8|mpd|mss)$@@);

DEFINE ACTION $calculate_billing($src_table, $dst_table) AS
    $source = (
        SELECT
            $get_datetime(timestamp) AS timestamp,
            abc_id,
            bytes_sent,

            -- misc
            geo,
            request,
            unistat_prj
        FROM $src_table
        WHERE
            traffic_type = 'users'
            AND abc_id IS NOT NULL
            AND abc_id > 0
    );

    INSERT INTO $dst_table WITH TRUNCATE
    -- traffic v1
    SELECT
        Unwrap(
            Yson::SerializeJson(
                Yson::From(
                    $make_billing_struct_traffic_v1(
                        Unwrap(abc_id),       -- abc_id
                        CurrentUTCDateTime(), -- write_time
                        start_time,           -- start_time
                        finish_time,          -- finish_time
                        Unwrap(bytes_sent),   -- quantity
                    )
                )
            )
        ) AS value
    FROM (
        SELECT
            abc_id,
            MIN(timestamp) AS start_time,
            MAX(timestamp) AS finish_time,
            SUM(bytes_sent) AS bytes_sent
        FROM $source
        GROUP BY abc_id
    )
    UNION ALL

    -- traffic v2
    SELECT
        Unwrap(
            Yson::SerializeJson(
                Yson::From(
                    $make_billing_struct_traffic_v2(
                        tariff,               -- tariff
                        Unwrap(abc_id),       -- abc_id
                        CurrentUTCDateTime(), -- write_time
                        start_time,           -- start_time
                        finish_time,          -- finish_time
                        Unwrap(bytes_sent),   -- quantity
                    )
                )
            )
        ) AS value
    FROM (
        SELECT
            tariff,
            abc_id,
            MIN(timestamp) AS start_time,
            MAX(timestamp) AS finish_time,
            SUM(bytes_sent) AS bytes_sent
        FROM $source
        GROUP BY
            IF(ListHas($dc_locations, geo) OR $re_nocache(request), "cold", "cached") AS tariff,
            abc_id
    )
    UNION ALL

    -- playlist v1
    SELECT
        Unwrap(
            Yson::SerializeJson(
                Yson::From(
                    $make_billing_struct_playlist_v1(
                        Unwrap(abc_id),       -- abc_id
                        CurrentUTCDateTime(), -- write_time
                        start_time,           -- start_time
                        finish_time,          -- finish_time
                        Unwrap(cnt),          -- quantity
                    )
                )
            )
        ) AS value
    FROM (
        SELECT
            abc_id,
            MIN(timestamp) AS start_time,
            MAX(timestamp) AS finish_time,
            COUNT(*) AS cnt
        FROM $source
        WHERE $re_playlist(unistat_prj)
        GROUP BY abc_id
    )
END DEFINE;

EVALUATE FOR $date IN $tables_diff
DO $calculate_billing($src_folder || "/" || $date, $dst_folder || "/" || $date);
