PRAGMA SimpleColumns;
-- ========================================================================= --

DEFINE ACTION $prepare_components($all_components, $staff) AS
    INSERT INTO @staff_normalized
    SELECT * FROM
    (
        SELECT fields.0 AS id_type, fields.1 AS id
            FROM (
                SELECT Yson::ConvertTo(
                    Yson::Parse(data),
                    ParseType('Dict<String,List<String>>')
                ) AS fields
                FROM $staff
            )
        FLATTEN DICT BY fields
    )
    FLATTEN LIST BY id
    ORDER BY id_type, id;

    COMMIT;

    INSERT INTO @sorted_components
    SELECT *
    FROM $all_components
    ORDER BY id_type, id;

    COMMIT;

    INSERT INTO @staff_component_ids
    SELECT components.component_id AS component_id
    FROM @sorted_components AS components
    LEFT SEMI JOIN @staff_normalized AS staff
    USING (id_type, id)
    ORDER BY component_id;
END DEFINE;


DEFINE ACTION $make_sample($all_components) AS

    INSERT INTO @components_sorted
    SELECT * FROM $all_components
    ORDER BY component_id;

    COMMIT;

    INSERT INTO @staff_components
    SELECT
        components.id_type AS id1Type,
        components.id_type AS id_type,
        components.id AS id1,
        components.id AS id
    FROM @components_sorted AS components
    LEFT SEMI JOIN @staff_component_ids AS white
    USING (component_id)
    ORDER BY id1Type, id1;
END DEFINE;


DEFINE ACTION $prepare_soup($soup_edges, $vertices_properties) AS
    INSERT INTO @soup_sorted
    SELECT * FROM $soup_edges
    ORDER BY id1Type, id1;

    INSERT INTO @vertices_properties_sorted
    SELECT * FROM $vertices_properties
    ORDER BY id_type, id;
END DEFINE;

DEFINE ACTION $select_soup_sample($output_edges, $output_properties) AS
    PRAGMA yt.DataSizePerJob = "2G";

    INSERT INTO $output_edges WITH TRUNCATE
    SELECT * FROM (
        SELECT soup.*
        FROM @soup_sorted AS soup
        LEFT SEMI JOIN @staff_components AS sample
        USING (id1Type, id1)
    ) ORDER BY id1Type, id2Type, id1, id2;

    INSERT INTO $output_properties WITH TRUNCATE
    SELECT * FROM (
        SELECT properties.*
        FROM @vertices_properties_sorted AS properties
        LEFT SEMI JOIN @staff_components AS sample
        USING (id_type, id)
    ) ORDER BY id, id_type;
END DEFINE;

-- ========================================================================= --

DO $prepare_components("{{ components_table }}", "{{ staff_table }}");
DO $make_sample("{{ components_table }}");
DO $prepare_soup("{{ soup_edges_table }}", "{{ vertices_properties_table }}");
COMMIT;
DO $select_soup_sample("{{ output_edges }}", "{{ output_properties }}");
