
{% include soup_filter %} -- method $soup_filter ($row -> bool)

$output_all_pairs = '{{ output_all_pairs }}';
$output_stats = '{{ output_stats }}';

{% set has_data = [] %}
{% for range in input_ranges %}
$data = (
  SELECT * FROM RANGE("{{ range[0] }}", "{{ range[1] }}", "{{ range[2] }}")
  {% if has_data %}
  UNION ALL SELECT * FROM $data
  {% else %}{% do has_data.append(1) %}
  {% endif %}
);
{% endfor %}

{% for path in input_paths %}
$data = (
  SELECT * FROM `{{ path }}`
  {% if has_data %}
  UNION ALL SELECT * FROM $data
  {% else %}{% do has_data.append(1) %}
  {% endif %}
);
{% endfor %}

$to_dict = ($list) -> {
    $flatten_items = ListFlatten(ListMap($list, ($d)->(DictItems($d))));
    RETURN ToDict(ListMap(DictItems(ToMultiDict($flatten_items)), ($item)->(AsTuple($item.0, ListSum($item.1)))));
};

$pairs = (
    SELECT
        id1, id2, id1Type, id2Type,
        SUM(cnt) AS cnt,
        $to_dict(AGGREGATE_LIST(fpTypes)) AS fpTypes,
        MIN(firstTimestamp) AS firstTimestamp,
        MAX(lastTimestamp) AS lastTimestamp,
        MAX(trueIdfa) AS trueIdfa,
        MAX(withIdfa) AS withIdfa,
        MAX_BY(idfa1, AsTuple(trueIdfa, idfa1)) AS idfa1,
        MAX_BY(idfa2, AsTuple(trueIdfa, idfa2)) AS idfa2,
        MAX_BY(withIdfa, AsTuple(lastTimestamp, withIdfa)) AS lastWithIdfa,

        {% if with_cryptaid %}
          MAX(trueCryptaID) AS trueCryptaID,
          MAX(withCryptaID) AS withCryptaID,
          MAX_BY(cryptaId1, AsTuple(trueCryptaID, cryptaId1)) AS cryptaId1,
          MAX_BY(cryptaId2, AsTuple(trueCryptaID, cryptaId2)) AS cryptaId2,
        {% endif %}
        {% if with_sspid %}sspid,{% endif %}
    FROM $data
    GROUP BY (
        id1, id2,
        id1Type, id2Type
        {% if with_sspid %}, sspid {% endif %}
    )
);

$pairs = (
    SELECT
      id1, id2, id1Type, id2Type,
      cnt, fpTypes,
      firstTimestamp, lastTimestamp,
      trueIdfa, withIdfa,
      idfa1, idfa2,
      lastWithIdfa,
      $soup_filter(TableRow()) AS isGood,

      {% if with_cryptaid %}
        trueCryptaID, withCryptaID,
        cryptaId1, cryptaId2,
      {% endif %}
      {% if with_sspid %}sspid,{% endif %}
    FROM $pairs
    WHERE id1 IS NOT NULL AND id2 IS NOT NULL
);

$stats = (
    SELECT
        COUNT(*) AS cnt1,
        COUNT_IF(cnt > 1) AS cnt2,
        COUNT_IF(cnt > 2) AS cnt3,
        COUNT_IF(cnt > 3) AS cnt4,
        COUNT_IF(cnt > 4) AS cnt5,
        COUNT_IF(trueIdfa) AS ok1,
        COUNT_IF(trueIdfa AND cnt > 1) AS ok2,
        COUNT_IF(trueIdfa AND cnt > 2) AS ok3,
        COUNT_IF(trueIdfa AND cnt > 3) AS ok4,
        COUNT_IF(trueIdfa AND cnt > 4) AS ok5,
        MIN(firstTimestamp) AS firstTimestamp,
        MAX(lastTimestamp) AS lastTimestamp
    FROM $pairs
    WHERE withIdfa
);

$stats =
    SELECT
        cnt1,
        cnt2,
        cnt3,
        cnt4,
        cnt5,
        1.*ok1 / cnt1 AS rate1,
        1.*ok2 / cnt2 AS rate2,
        1.*ok3 / cnt3 AS rate3,
        1.*ok4 / cnt4 AS rate4,
        1.*ok5 / cnt5 AS rate5,
        firstTimestamp,
        lastTimestamp
    FROM $stats
;

SELECT * FROM $stats;

INSERT INTO $output_all_pairs WITH TRUNCATE
    SELECT
      *
    FROM $pairs
;

INSERT INTO $output_stats WITH TRUNCATE
SELECT * FROM $stats;
