--CONTEXT: For the daily ping of the extensions backend api to get a daily snapshot of metadata, it does not always successfully complete when writing data to the redshift db
-- For days that we do not have data for, take the most recent data up to that day as the metadata to use
-- https://stackoverflow.com/questions/2142907/sql-join-on-nearest-less-than-date
DROP TABLE analysis.dsds_389_extensions_metadata_everything_date_mapping;
CREATE TABLE analysis.dsds_389_extensions_metadata_everything_date_mapping AS
WITH

the_constants_time AS (
  SELECT '2018-12-13'::DATE AS start_date
    --, '2019-02-26'::DATE AS end_date
    , DATE_TRUNC('day', (SYSDATE - INTERVAL '7 hours')) AS end_date
)

, the_constants_date_list AS (
  SELECT date AS the_day
  FROM metadata.date
  WHERE
    date>=(SELECT start_date FROM the_constants_time) AND date<=(SELECT end_date FROM the_constants_time)
  GROUP BY 1
)

, the_extensions_metadata_logs AS (
  SELECT the_date AS the_day, COUNT(1) AS rows
  FROM analysis.extensions_metadata_everything
  WHERE
    the_date>=(SELECT start_date FROM the_constants_time) AND the_date<=(SELECT end_date FROM the_constants_time)
  GROUP BY 1
)

, joined_data AS (
  SELECT full_data.the_day AS the_day, partial.the_day AS the_day_extensions_metadata_everything, partial.rows
    , ROW_NUMBER() OVER (PARTITION BY full_data.the_day ORDER BY partial.the_day DESC) AS the_row_num
  FROM the_constants_date_list AS full_data
  INNER JOIN the_extensions_metadata_logs AS partial ON full_data.the_day - INTERVAL '7 days'<= partial.the_day AND partial.the_day<= full_data.the_day

)

SELECT the_day, the_day_extensions_metadata_everything, rows
FROM joined_data
WHERE the_row_num=1
--ORDER BY 1
;
