PRAGMA SimpleColumns;
-- ========================================================================= --
$MAX_VERTEX = 1000000000ul;

DEFINE ACTION $prepare_components($all_components) AS
    INSERT INTO @components_sorted
    SELECT * FROM $all_components
    ORDER BY component_id;

    COMMIT;

    INSERT INTO @components
    SELECT
      component_id
    FROM (
      SELECT
        component_id,
        COUNT(*) AS count_vertix
      FROM @components_sorted
      GROUP BY component_id
    )
    WHERE count_vertix < $MAX_VERTEX
    ORDER BY component_id;
END DEFINE;


DEFINE ACTION $make_sample($percent) AS
    $white_list = (
        SELECT component_id
        FROM @components
        {% if crypta_env == "test" %}
        LIMIT 1 -- TODO: move on REPEATABLE after YT-9851
        {% else %}
        TABLESAMPLE BERNOULLI($percent * 100)
        {% endif %}
    );

    INSERT INTO @soup_sample
    SELECT
        components.id_type AS id1Type,
        components.id_type AS id_type,
        components.id AS id1,
        components.id AS id
    FROM @components_sorted AS components
    LEFT SEMI JOIN $white_list AS white
    USING (component_id)
    ORDER BY id1Type, id1;
END DEFINE;

DEFINE ACTION $prepare_soup($soup_table, $vertices_properties) AS
    INSERT INTO @soup_sorted
    SELECT * FROM $soup_table
    ORDER BY id1Type, id1;

    INSERT INTO @vertices_properties_sorted
    SELECT * FROM $vertices_properties
    ORDER BY id_type, id;
END DEFINE;

DEFINE ACTION $select_soup_sample($output_edges, $output_properties) AS
    PRAGMA yt.DataSizePerJob = "2G";

    INSERT INTO $output_edges WITH TRUNCATE
    SELECT * FROM (
        SELECT soup.*
        FROM @soup_sorted AS soup
        LEFT SEMI JOIN @soup_sample AS sample
        USING (id1Type, id1)
    ) ORDER BY id1Type, id2Type, id1, id2;

    INSERT INTO $output_properties WITH TRUNCATE
    SELECT * FROM (
        SELECT properties.*
        FROM @vertices_properties_sorted AS properties
        LEFT SEMI JOIN @soup_sample AS sample
        USING (id_type, id)
    ) ORDER BY id, id_type;
END DEFINE;
-- ========================================================================= --
DO $prepare_components("{{ components_table }}");
COMMIT;
DO $make_sample({{ percent }});
DO $prepare_soup("{{ soup_edges_table }}", "{{ vertices_properties_table }}");
COMMIT;
DO $select_soup_sample("{{ output_edges }}", "{{ output_properties }}");
