
-- -- Create the rollup table with these column values:
-- -- Create the Table with explicit schema
-- --(OPTIONAL) FIRST DEFINE YOUR SCHEMA (IF THIS IS YOUR SECOND TIME RUNNING IT THEN REMOVE THIS CHUNK OF CODE)
-- DROP TABLE IF EXISTS analysis.dsds_281_extension_daily_channel_presence_new;
-- CREATE TABLE IF NOT EXISTS analysis.dsds_281_extension_daily_channel_presence_new (
--   the_day                         TIMESTAMP WITHOUT TIME ZONE sortkey
--   , extension_id                  VARCHAR(32) distkey
--   , extension_version             VARCHAR(25)
--   , extension_anchor              VARCHAR(50)
--   , channel_id                    BIGINT
--   , flag_install_activate_status  VARCHAR(25)
--   , time_most_recent_install      TIMESTAMP WITHOUT TIME ZONE
--   , time_most_recent_activate     TIMESTAMP WITHOUT TIME ZONE
--   , time_most_recent_deactivate   TIMESTAMP WITHOUT TIME ZONE
--   , time_most_recent_uninstall    TIMESTAMP WITHOUT TIME ZONE
--   , time_most_recent_delete       TIMESTAMP WITHOUT TIME ZONE
--   , bool_extension_activated      BOOLEAN
--   , bool_extension_installed      BOOLEAN
-- );




--THEN CREATE A the_temp_table BASED ON THAT SCHEMA
CREATE TEMP TABLE the_temp_table (LIKE analysis.dsds_281_extension_daily_channel_presence_new);

--THEN WRITE YOUR SQL CODE FOR THE INSERT FUNCTION
PREPARE the_insert_function (VARCHAR) AS
INSERT INTO the_temp_table (

  WITH
  the_parameter_constants AS (
    SELECT
      $1::TIMESTAMP AS range_start
      , DATEADD(day, 1, $1::TIMESTAMP) AS range_end
  )
  --used to get the time frame to look at the the activation/deactivation/uninstall/delete logs
  , the_basetable_date AS (
    SELECT
      '2017-05-01'::TIMESTAMP AS start_date --start from may 1st to capture all possible events
      , date::TIMESTAMP AS end_date
    FROM metadata.date
    WHERE date>=(SELECT range_start FROM the_parameter_constants) AND date<(SELECT range_end FROM the_parameter_constants)
    GROUP BY 1,2
  )







  --get raw install logs and remove duplicates
  , install_logs_step0 AS (
    SELECT
      (SELECT start_date::DATE FROM the_basetable_date) AS start_date
      , (SELECT end_date::DATE FROM the_basetable_date) AS end_date
      , "time"::TIMESTAMP, extension_id::VARCHAR, extension_version::VARCHAR, channel_id::BIGINT
    FROM logs.extension_install
    WHERE extension_id IS NOT NULL AND extension_version IS NOT NULL
    AND (SELECT start_date FROM the_basetable_date)<="time" AND "time"<(SELECT end_date FROM the_basetable_date)
    GROUP BY 1,2,3,4,5,6
  )
  --processing to deal with same event firing duplicate rows with one being normal extension_version and another being extension_version='N/A'
  , install_logs_step1 AS (
    SELECT start_date, end_date, "time", extension_id
      , MAX(CASE WHEN extension_version='N/A' THEN '0.0.0' ELSE extension_version END) AS extension_version --this is to deal with same event firing duplicate events with one being regular extension_version and another being extension_version='N/A'
      , channel_id
    FROM install_logs_step0
    GROUP BY 1,2,3,4,6
  )
  --for each user and extension and version combo, get the last timestamp that the action occurred (for the given start_date to end_date range)
  , install_logs_step2 AS (
    SELECT start_date, end_date, extension_id, extension_version, channel_id, MAX(time) AS last_time_install
    FROM install_logs_step1
    GROUP BY 1,2,3,4,5
  )
  --for each user and extension, return 1 row that is the most recent action
  , install_logs_step3 AS (
    SELECT final.start_date, final.end_date, final.extension_id, final.extension_version, final.channel_id, final.last_time_install
    FROM (
      SELECT *, MAX(last_time_install) OVER (PARTITION BY start_date, end_date, channel_id, extension_id) AS max_last_time_install --this max timestamp is extension_version agnostic
      FROM install_logs_step2
    ) AS final
    WHERE last_time_install = max_last_time_install --only get the activate record that matches the max timestamp (i.e. they activate/reactivate multiple times within same day)
  )




  --get raw activate logs and remove duplicates
  , activate_logs_step0 AS (
    SELECT
      (SELECT start_date::DATE FROM the_basetable_date) AS start_date
      , (SELECT end_date::DATE FROM the_basetable_date) AS end_date
      , "time"::TIMESTAMP, extension_id::VARCHAR, extension_version::VARCHAR, channel_id, extension_anchor::VARCHAR
    FROM logs.extension_activate
    WHERE extension_id IS NOT NULL AND extension_version IS NOT NULL
    AND (SELECT start_date FROM the_basetable_date)<="time" AND "time"<(SELECT end_date FROM the_basetable_date)
    GROUP BY 1,2,3,4,5,6,7
  )
  --processing to deal with same event firing duplicate rows with one being normal extension_version and another being extension_version='N/A'
  , activate_logs_step1 AS (
    SELECT start_date, end_date, "time", extension_id
    , MAX(CASE WHEN extension_version='N/A' THEN '0.0.0' ELSE extension_version END) AS extension_version --this is to deal with same event firing duplicate events with one being regular extension_version and another being extension_version='N/A'
    , channel_id, extension_anchor
    FROM activate_logs_step0
    GROUP BY 1,2,3,4,6,7
  )
  --for each user and extension and version and anchor combo, get the last timestamp that the action occurred (for the given start_date to end_date range)
  , activate_logs_step2 AS (
    SELECT start_date, end_date, extension_id, extension_version, channel_id, extension_anchor, MAX(time) AS last_time_activate
    FROM activate_logs_step1
    GROUP BY 1,2,3,4,5,6
  )
  --for each user and extension, return 1 row that is the most recent action
  , activate_logs_step3 AS (
    SELECT final.start_date, final.end_date, final.extension_id, final.extension_version, final.channel_id, final.extension_anchor, final.last_time_activate
    FROM (
      SELECT *, MAX(last_time_activate) OVER (PARTITION BY start_date, end_date, channel_id, extension_id) AS max_last_time_activate --this max timestamp is extension_version agnostic
      FROM activate_logs_step2
    ) AS final
    WHERE last_time_activate = max_last_time_activate --only get the activate record that matches the max timestamp (i.e. they activate/reactivate multiple times within same day, just take the last one)
  )







  --join install and activate data together using FULL OUTER JOIN
  , install_activate_logs AS (
    SELECT
      NVL(install.start_date, activate.start_date) AS start_date
      , NVL(install.end_date, activate.end_date) AS end_date
      , (NVL(install.end_date, activate.end_date) - INTERVAL '1 day')::DATE AS the_day
      , NVL(install.extension_id, activate.extension_id) AS extension_id
      , NVL(install.extension_version, activate.extension_version) AS extension_version
      , (NVL(install.channel_id, activate.channel_id))::BIGINT AS channel_id
      , activate.extension_anchor
      , install.last_time_install AS time_most_recent_install
      , activate.last_time_activate AS time_most_recent_activate
      , CASE
          WHEN install.last_time_install IS NULL AND activate.last_time_activate IS NOT NULL THEN 'activate_only'
          WHEN install.last_time_install IS NOT NULL AND activate.last_time_activate IS NULL THEN 'install_only'
          WHEN install.last_time_install IS NULL AND activate.last_time_activate IS NULL THEN 'error_both_null'
          WHEN install.last_time_install IS NOT NULL AND activate.last_time_activate IS NOT NULL THEN 'activate_and_install'
          ELSE 'error_weird'
        END AS flag_install_activate_status
    FROM install_logs_step3 AS install
    FULL OUTER JOIN activate_logs_step3 AS activate --full outer join to make sure we cover all potential cases of activate and install
      ON install.channel_id = activate.channel_id
      AND install.extension_id = activate.extension_id
      -- AND install.extension_version = activate.extension_version --IS THIS NEEDED? PROBABLY NOT
  )






  --get raw deactivate logs and remove duplicates
  , deactivate_logs_step0 AS (
    SELECT
      (SELECT start_date::DATE FROM the_basetable_date) AS start_date
      , (SELECT end_date::DATE FROM the_basetable_date) AS end_date
      , "time"::TIMESTAMP, extension_id::VARCHAR, extension_version::VARCHAR, channel_id
    FROM logs.extension_deactivate
    WHERE extension_id IS NOT NULL AND extension_version IS NOT NULL
    AND (SELECT start_date FROM the_basetable_date)<="time" AND "time"<(SELECT end_date FROM the_basetable_date)
    GROUP BY 1,2,3,4,5,6
  )
  --processing to deal with same event firing duplicate rows with one being normal extension_version and another being extension_version='N/A'
  , deactivate_logs_step1 AS (
    SELECT start_date, end_date, "time", extension_id
      , MAX(CASE WHEN extension_version='N/A' THEN '0.0.0' ELSE extension_version END) AS extension_version --this is to deal with same event firing duplicate events with one being regular extension_version and another being extension_version='N/A'
      , channel_id
    FROM deactivate_logs_step0
    GROUP BY 1,2,3,4,6
  )
  --for each user and extension and version combo, get the last timestamp that the action occurred (for the given start_date to end_date range)
  , deactivate_logs_step2 AS (
    SELECT start_date, end_date, extension_id, extension_version, channel_id, MAX(time) AS last_time_deactivate
    FROM deactivate_logs_step1
    GROUP BY 1,2,3,4,5
  )
  --for each user and extension, return 1 row that is the most recent action
  , deactivate_logs_step3 AS (
    SELECT final.start_date, final.end_date, final.extension_id, final.extension_version, final.channel_id, final.last_time_deactivate
    FROM (
      SELECT *, MAX(last_time_deactivate) OVER (PARTITION BY start_date, end_date, channel_id, extension_id) AS max_last_time_deactivate --this max timestamp is extension_version agnostic
      FROM deactivate_logs_step2
    ) AS final
    WHERE last_time_deactivate = max_last_time_deactivate --only get the activate record that matches the max timestamp (i.e. they activate/reactivate multiple times within same day, just take the last one)
  )







  --get raw uninstall logs and remove duplicates
  , uninstall_logs_step0 AS (
    SELECT
      (SELECT start_date::DATE FROM the_basetable_date) AS start_date
      , (SELECT end_date::DATE FROM the_basetable_date) AS end_date
      , "time"::TIMESTAMP, extension_id::VARCHAR, extension_version::VARCHAR, channel_id
    FROM logs.extension_uninstall
    WHERE extension_id IS NOT NULL AND extension_version IS NOT NULL
    AND (SELECT start_date FROM the_basetable_date)<="time" AND "time"<(SELECT end_date FROM the_basetable_date)
    GROUP BY 1,2,3,4,5,6
  )
  --processing to deal with same event firing duplicate rows with one being normal extension_version and another being extension_version='N/A'
  , uninstall_logs_step1 AS (
    SELECT start_date, end_date, "time", extension_id
    , MAX(CASE WHEN extension_version='N/A' THEN '0.0.0' ELSE extension_version END) AS extension_version --this is to deal with same event firing duplicate events with one being regular extension_version and another being extension_version='N/A'
    , channel_id
    FROM uninstall_logs_step0
    GROUP BY 1,2,3,4,6
  )
  --for each user and extension and version combo, get the last timestamp that the action occurred (for the given start_date to end_date range)
  , uninstall_logs_step2 AS (
    SELECT start_date, end_date, extension_id, extension_version, channel_id, MAX(time) AS last_time_uninstall
    FROM uninstall_logs_step1
    GROUP BY 1,2,3,4,5
  )
  --for each user and extension, return 1 row that is the most recent action
  , uninstall_logs_step3 AS (
    SELECT final.start_date, final.end_date, final.extension_id, final.extension_version, final.channel_id, final.last_time_uninstall
    FROM (
      SELECT *, MAX(last_time_uninstall) OVER (PARTITION BY start_date, end_date, channel_id, extension_id) AS max_last_time_uninstall --this max timestamp is extension_version agnostic
      FROM uninstall_logs_step2
    ) AS final
    WHERE last_time_uninstall = max_last_time_uninstall --only get the activate record that matches the max timestamp (i.e. they activate/reactivate multiple times within same day)
  )




  --get raw delete logs and remove duplicates, used when extension_version IS NOT the value 'N/A' (which we assign to 0.0.0)
  , delete_logs_v1 AS (
    SELECT
      (SELECT start_date::DATE FROM the_basetable_date) AS start_date
      , (SELECT end_date::DATE FROM the_basetable_date) AS end_date
      , MAX("time")::TIMESTAMP AS "time", extension_id::VARCHAR, extension_version::VARCHAR
    FROM logs.extension_delete
    WHERE extension_id IS NOT NULL AND extension_version IS NOT NULL
    AND (SELECT start_date FROM the_basetable_date)<="time" AND "time"<(SELECT end_date FROM the_basetable_date)
    GROUP BY 1,2,4,5
  )
  --return 1 row per extension_id that has a delete event, used when extension_version IS the value 'N/A' (which we assign to 0.0.0)
  , delete_logs_v2 AS (
    SELECT start_date, end_date, MAX("time") AS "time", extension_id, MAX(extension_version) AS extension_version
    FROM delete_logs_v1
    GROUP BY 1,2,4
  )








  -- JOIN install_activate_logs with deactivate_logs_step2, uninstall_logs_step2, delete_logs_step0
  , all_join_logs_step0 AS (
    SELECT base.start_date, base.end_date, base.the_day
      , base.extension_id, base.extension_version, base.extension_anchor, base.channel_id
      , base.flag_install_activate_status, base.time_most_recent_install, base.time_most_recent_activate
      , deact.last_time_deactivate AS time_most_recent_deactivate, uninstall.last_time_uninstall AS time_most_recent_uninstall
      , NVL(delete_v1.time, delete_v2.time) AS time_most_recent_delete
    FROM install_activate_logs AS base
    LEFT JOIN deactivate_logs_step3 AS deact ON base.channel_id = deact.channel_id AND base.extension_id = deact.extension_id
    LEFT JOIN uninstall_logs_step3 AS uninstall ON base.channel_id = uninstall.channel_id AND base.extension_id = uninstall.extension_id
    LEFT JOIN delete_logs_v1 AS delete_v1 --when extension_version!='N/A', then we do join against extension_version
      ON base.extension_version!='0.0.0' AND base.extension_id = delete_v1.extension_id AND base.extension_version=delete_v1.extension_version
    LEFT JOIN delete_logs_v2 AS delete_v2 --when extension_version='N/A', then we don't join against extension_version
      ON base.extension_version='0.0.0' AND base.extension_id = delete_v2.extension_id
  )
  --use timestamps of the different events to determine if the extension is activated, calculate this first as the bool_extension_activated is used later
  , all_join_logs_step1 AS (
    SELECT the_day, extension_id, extension_version, extension_anchor, channel_id, flag_install_activate_status
      , time_most_recent_install, time_most_recent_activate, time_most_recent_deactivate
      , time_most_recent_uninstall, time_most_recent_delete
      , CASE
          WHEN time_most_recent_activate IS NULL THEN false --if no activate timestamp then can't be activated
          WHEN time_most_recent_deactivate >= time_most_recent_activate AND time_most_recent_deactivate IS NOT NULL THEN false -- deactivate happened after activate, then not activated
          WHEN time_most_recent_uninstall >= time_most_recent_activate AND time_most_recent_uninstall IS NOT NULL THEN false -- uninstall happened after activate, then not activated
          WHEN time_most_recent_delete >= time_most_recent_activate AND time_most_recent_delete IS NOT NULL THEN false --delete happened after activate, then not activated
          ELSE true
      END AS bool_extension_activated
    FROM all_join_logs_step0
  )
  --use timestamps of the different events to determine if the extension is installed, uses bool_extension_activated from before
  , all_join_logs_step2 AS (
    SELECT *
      , CASE
          WHEN bool_extension_activated IS TRUE THEN true --if extension is activated based on logic, then it must be installed even if the timestamps don't have it
          WHEN time_most_recent_install IS NULL THEN false --if no install timestamp then can't be installed, this also should never happen in this query (both install and activate timestamps are null)
          WHEN time_most_recent_uninstall >= time_most_recent_install AND time_most_recent_install IS NOT NULL THEN false -- uninstall happened after install, then not installed
          WHEN time_most_recent_delete >= time_most_recent_install AND time_most_recent_delete IS NOT NULL THEN false --delete happened after install, then not installed
          ELSE true
      END AS bool_extension_installed
    FROM all_join_logs_step1
  )




  , final_data AS (
    SELECT * FROM all_join_logs_step2 --WHERE bool_extension_installed IS TRUE OR bool_extension_activated IS TRUE
  )
  SELECT * FROM final_data

);










--RUN THE INSERT FUNCTIONS BY ITERATING OVER DIFFERENT VALUES
EXECUTE the_insert_function ('2018-08-13');














-- Remove from TABLE any data also existing in THE_TEMP_TABLE
DELETE FROM analysis.dsds_281_extension_daily_channel_presence_new
USING the_temp_table
WHERE the_temp_table.the_day = analysis.dsds_281_extension_daily_channel_presence_new.the_day
AND the_temp_table.extension_id = analysis.dsds_281_extension_daily_channel_presence_new.extension_id
AND the_temp_table.channel_id = analysis.dsds_281_extension_daily_channel_presence_new.channel_id
-- AND the_temp_table.some_id_2 = schema.table_name.some_id_2
;

-- Load TEMP_TABLE into TABLE
INSERT INTO analysis.dsds_281_extension_daily_channel_presence_new
(SELECT * FROM the_temp_table);

-- DEALLOCATE THE INSERT FUNCTION
DEALLOCATE the_insert_function;




--TEST DATA
SELECT * FROM analysis.dsds_281_extension_daily_channel_presence_new WHERE extension_id = 'h7683cdyci11yov0tlac9f6lkvgtzi' AND channel_id = 174523291;


--CHECK TO SEE IF THERE ARE DUPLICATES
WITH
weird_channel_id AS (
  SELECT *
  FROM (
    SELECT channel_id, extension_id, the_day, COUNT(1) AS rows
    FROM analysis.dsds_281_extension_daily_channel_presence_new
    GROUP BY 1,2,3
  )
  WHERE rows>1
)

SELECT * FROM analysis.dsds_281_extension_daily_channel_presence_new
WHERE channel_id IN (SELECT channel_id FROM weird_channel_id)
ORDER BY channel_id, extension_id, time_most_recent_activate
;
