PRAGMA yt.Pool = ?;

$src_tables = ?;
$dst_table = ?;
$service = ?;
$limit = ?;

$parse_content_categories = ($str) -> {
    RETURN IF(
        $str IS NOT NULL,
        String::SplitToList(String::ReplaceAll(String::ReplaceAll($str, '[', ''), ']', ''), ','),
        ListCreate(String)
    )
};

$parse_brandsafety = ($str) -> {
    RETURN IF(
        $str IS NOT NULL,
        String::SplitToList(String::ReplaceAll(String::ReplaceAll($str, '[{\"id\": ', ''), '}]', ''), '}, {\"id\": '),
        ListCreate(String)
    )
};

INSERT INTO @data
    SELECT
        urlmd5,
        MAX_BY(url, path) AS url,
        MAX_BY(BrandSafetyTags, path) AS BrandSafetyTags,
        $service AS service
    FROM (
        SELECT
            TablePath() AS path,
            CAST(urlmd5 AS UInt64) AS urlmd5,
            GroupingUrl AS url,
            ListExtend($parse_content_categories(ContentCategoriesOutstream), $parse_brandsafety(BrandSafetyTagsOutstream)) AS BrandSafetyTags
        FROM EACH(String::SplitToList($src_tables, ','))
    )
    WHERE url IS NOT NULL
    GROUP BY urlmd5;

COMMIT;

$row_count = SELECT COUNT(*) FROM EACH(String::SplitToList($src_tables, ","));

INSERT INTO $dst_table
    SELECT
        urlmd5,
        COALESCE(url, "") AS url,
        BrandSafetyTags,
        service
    FROM @data
    SAMPLE IF($limit < $row_count, CAST($limit AS Double) / $row_count, 1.0)
