{% if not is_embedded %}
PRAGMA Library = 'aggregation_lib.sql';
{% endif %}

IMPORT {% if is_embedded %}.lib.{% endif %}aggregation_lib SYMBOLS
    $aggregate_uniq_arrays;

-- ========================================================================= --

$validation_errors = '{{ soup_dir }}/day/{{ date }}/validation/incremental_errors{% if log_source_name %}_{{ log_source_name }}{% endif %}';

{% set WITH_SCHEMA = """
    -- Bad table schema `yandexuid--distr_* (yasoft)`
    WITH SCHEMA Struct<
        `id1`: String?,
        `id1Type`: String?,
        `id2`: String?,
        `id2Type`: String?,
        `sourceType`: String?,
        `logSource`: String?,
        -- may be dates list, or single date string
        `dates`: List<String>?,
        `date`: String?
    >
""" %}

$soup_daily_data = (
    SELECT
        id1,
        id1Type,
        id2,
        id2Type,
        logSource,
        sourceType,
        CASE
            WHEN `date` IS NULL
                THEN dates
            ELSE AsList(Unwrap(`date`))
        END ?? ListCreate(String) AS dates
    FROM EACH(AsList(
{% if stream %}
'{{ (input_tables) | join("',\n'") | safe }}'
{% else %}{% for et in edge_types %}
{% set et_name = '_'.join([et.Id1Type.Name, et.Id2Type.Name, et.SourceType.Name, et.LogSource.Name]) %}
'{{ soup_dir }}/day/{{ date }}/{{ et_name }}',
'{{ graph_stream_dir }}/soup/{{ et_name }}',
{% endfor %}
{% endif %}
    ))
    {{ WITH_SCHEMA }}
);

$all = (
    SELECT
        id1,
        id1Type,
        id2,
        id2Type,
        logSource,
        sourceType,
        (
            Identifiers::IsSignificant(id1Type, id1)
            AND Identifiers::IsSignificant(id2Type, id2)
        ) AS IsValid,
        ListSort(AGGREGATE_BY(dates ?? [], $aggregate_uniq_arrays) ?? []) AS dates
    FROM (
        SELECT
            CASE
                WHEN id1Type == id2Type
                THEN MIN_OF(id1, id2)
                ELSE id1
            END AS id1,
            id1Type,
            CASE
                WHEN id1Type == id2Type
                THEN MAX_OF(id1, id2)
                ELSE id2
            END AS id2,
            id2Type,
            logSource,
            sourceType,
            dates
        FROM $soup_daily_data
    )
    GROUP BY
        Identifiers::Normalize(id1Type, id1) AS id1,
        id1Type,
        Identifiers::Normalize(id2Type, id2) AS id2,
        id2Type,
        logSource,
        sourceType
);

$all_normed = (
    SELECT * WITHOUT IsValid
    FROM $all
    WHERE IsValid
);

INSERT INTO $validation_errors WITH TRUNCATE
SELECT * WITHOUT IsValid
FROM $all
WHERE NOT IsValid;

{% if throw_before_date %}
{%
    if not (
        (crypta_env == "testing")
            or (stream and soup_dir.endswith("state/graph/stream/soup"))
        )
%}
{{ "WARNING! SHOULD NEVER BE ON PROD!!!" / 0 }}
{% endif %}
-- ON prestable (and stream) cut last Xd for soup storage
-- WARNING! SHOULD NEVER BE ON PROD!!!
DISCARD SELECT
    Ensure(
        NULL,
        ('{{ crypta_env }}' == 'testing')
        OR (
            {{ stream }}
            AND String::EndsWith('{{ soup_dir }}', 'state/graph/stream/soup')
        ),
        "SHOULD NEVER BE ON PROD!!!"
    )
;
{% endif %}

{% for et in edge_types %}
{% set et_name = '_'.join([et.Id1Type.Name, et.Id2Type.Name, et.SourceType.Name, et.LogSource.Name]) %}
INSERT INTO `{{ soup_dir }}/{{ et_name }}` WITH TRUNCATE
SELECT * FROM (
    {% if normalize_lazy %}
    SELECT
        Unwrap(soup.id1 ?? new.id1) AS id1,
        Unwrap(soup.id2 ?? new.id2) AS id2,
        Unwrap(soup.id1Type ?? new.id1Type) AS id1Type,
        Unwrap(soup.id2Type ?? new.id2Type) AS id2Type,
        Unwrap(soup.sourceType ?? new.sourceType) AS sourceType,
        Unwrap(soup.logSource ?? new.logSource) AS logSource,
        ListSort(ListUniq(ListExtend(
            soup.dates ?? ListCreate(String),
            new.dates ?? ListCreate(String)
        ))) AS dates
    FROM EACH(AsList('{{ soup_dir }}/{{ et_name }}'))
    {{ WITH_SCHEMA }} AS soup
    FULL OUTER JOIN (
        SELECT *
        FROM $all_normed
        WHERE id1Type = '{{ et.Id1Type.Name }}'
            AND id2Type = '{{ et.Id2Type.Name }}'
            AND sourceType = '{{ et.SourceType.Name }}'
            AND logSource = '{{ et.LogSource.Name }}'
    ) AS new
    USING (id1, id2)
    {% else %}
        SELECT
            id1,
            id1Type,
            id2,
            id2Type,
            logSource,
            sourceType,
            ListSort(AGGREGATE_BY(dates ?? [], $aggregate_uniq_arrays) ?? []) AS dates
        FROM (
            SELECT *
            FROM $all_normed
            WHERE id1Type = '{{ et.Id1Type.Name }}'
                AND id2Type = '{{ et.Id2Type.Name }}'
                AND sourceType = '{{ et.SourceType.Name }}'
                AND logSource = '{{ et.LogSource.Name }}'

            UNION ALL

            SELECT *
            FROM EACH(AsList('{{ soup_dir }}/{{ et_name }}'))
            {{ WITH_SCHEMA }}
        ) GROUP BY
            Identifiers::Normalize(id1Type, id1) AS id1,
            id1Type,
            Identifiers::Normalize(id2Type, id2) AS id2,
            id2Type,
            logSource,
            sourceType
    {% endif %}
){% if throw_before_date %}
    WHERE ListLast(dates) >= '{{ throw_before_date }}'
{% endif %};
{% endfor %}
