# Delete previous rows for the same record_date before calculating new aggregations,
# so the DAG run is idempotent and can be run multiple times with the same result.
INCREMENTAL_DELETE = """
DELETE FROM extensions_overview_v2 WHERE record_date = %(record_date)s
"""

# Common conditions to partition Tahoe events by date and filter by time_utc with the given record_date.
# The record_date param is a date in UTC (e.g. "2018-07-22"); but Tahoe dates are in PT (Pacific Time), so the
# partition needs an extra day, we need to load 2 PT day partitions, to fit one UTC day on it.
time_utc_in_record_date = """
        "date" >= DATEADD(day, -1, %(record_date)s) AND "date"   < DATEADD(day, 1, %(record_date)s)
        AND time_utc >= %(record_date)s             AND time_utc < DATEADD(day, 1, %(record_date)s)
"""
time_utc_in_record_date_7d = """
        "date" >= DATEADD(day, -8, %(record_date)s)       AND "date"   < DATEADD(day, 1, %(record_date)s)
        AND time_utc >= DATEADD(day, -7, %(record_date)s) AND time_utc < DATEADD(day, 1, %(record_date)s)
"""
time_utc_in_record_date_30d = """
        "date" >= DATEADD(day, -31, %(record_date)s)       AND "date"   < DATEADD(day, 1, %(record_date)s)
        AND time_utc >= DATEADD(day, -30, %(record_date)s) AND time_utc < DATEADD(day, 1, %(record_date)s)
"""

# Query Redshift to extract daily metrics for extensions in the time interval.
INCREMENTAL_SELECT = """
WITH

installs AS (
    SELECT extension_id,
        COUNT(1) AS count
    FROM tahoe."extension_install"
    WHERE {time_utc_in_record_date}
    GROUP BY 1
),

uninstalls AS (
    SELECT extension_id,
        COUNT(1) AS count
    FROM tahoe."extension_uninstall"
    WHERE {time_utc_in_record_date}
    GROUP BY 1
),

activates AS (
    SELECT extension_id,
        COUNT(1) AS count
    FROM tahoe."extension_activate"
    WHERE {time_utc_in_record_date}
    GROUP BY 1
),

-- A render event happens when the channel page with the extension is loaded or refreshed.
renders AS (
    SELECT extension_id,
        COUNT(1)                   AS count,
        COUNT(DISTINCT device_id)  AS uniq_devices,
        COUNT(DISTINCT channel_id) AS uniq_channels
    FROM tahoe."extension_render"
    WHERE {time_utc_in_record_date} AND extension_mode = 'viewer'
    GROUP BY 1
),

renders_7d AS (
    SELECT extension_id,
        COUNT(DISTINCT device_id)  AS uniq_devices,
        COUNT(DISTINCT channel_id) AS uniq_channels
    FROM tahoe."extension_render"
    WHERE {time_utc_in_record_date_7d} AND extension_mode = 'viewer'
    GROUP BY 1
),

renders_30d AS (
    SELECT extension_id,
        COUNT(DISTINCT device_id)  AS uniq_devices,
        COUNT(DISTINCT channel_id) AS uniq_channels
    FROM tahoe."extension_render"
    WHERE {time_utc_in_record_date_30d} AND extension_mode = 'viewer'
    GROUP BY 1
),

-- A view event happens when the extension iframe is at least 75 percent visible.
-- Only panel extensions track view events.
views_panels AS (
    SELECT extension_id,
        COUNT(1)                  AS count,
        COUNT(DISTINCT device_id) AS uniq_devices
    FROM tahoe."extension_view"
    WHERE {time_utc_in_record_date} AND extension_mode = 'viewer'
        AND extension_anchor = 'panel'
        AND pct_view_visible = 75
    GROUP BY 1
),

views_panels_7d AS (
    SELECT extension_id,
        COUNT(DISTINCT device_id) AS uniq_devices
    FROM tahoe."extension_view"
    WHERE {time_utc_in_record_date_7d} AND extension_mode = 'viewer'
        AND extension_anchor = 'panel'
        AND pct_view_visible = 75
    GROUP BY 1
),

views_panels_30d AS (
    SELECT extension_id,
        COUNT(DISTINCT device_id) AS uniq_devices
    FROM tahoe."extension_view"
    WHERE {time_utc_in_record_date_30d} AND extension_mode = 'viewer'
        AND extension_anchor = 'panel'
        AND pct_view_visible = 75
    GROUP BY 1
),

-- For video_overlay and component extensions, views are the same as renders (view events are not available).
views_video AS (
    SELECT extension_id,
        COUNT(1)                  AS count,
        COUNT(DISTINCT device_id) AS uniq_devices
    FROM tahoe."extension_render"
    WHERE {time_utc_in_record_date} AND extension_mode = 'viewer'
        AND extension_anchor != 'panel' -- video_overlay and component
    GROUP BY 1
),

views_video_7d AS (
    SELECT extension_id,
        COUNT(DISTINCT device_id) AS uniq_devices
    FROM tahoe."extension_render"
    WHERE {time_utc_in_record_date_7d} AND extension_mode = 'viewer'
        AND extension_anchor != 'panel' -- video_overlay and component
    GROUP BY 1
),

views_video_30d AS (
    SELECT extension_id,
        COUNT(DISTINCT device_id) AS uniq_devices
    FROM tahoe."extension_render"
    WHERE {time_utc_in_record_date_30d} AND extension_mode = 'viewer'
        AND extension_anchor != 'panel' -- video_overlay and component
    GROUP BY 1
),

clicks AS (
    SELECT extension_id,
        COUNT(1)                  AS count,
        COUNT(DISTINCT device_id) AS uniq_devices
    FROM tahoe."extension_click"
    WHERE {time_utc_in_record_date} AND extension_mode = 'viewer'
    GROUP BY 1
),

clicks_7d AS (
    SELECT extension_id,
        COUNT(DISTINCT device_id) AS uniq_devices
    FROM tahoe."extension_click"
    WHERE {time_utc_in_record_date_7d} AND extension_mode = 'viewer'
    GROUP BY 1
),

clicks_30d AS (
    SELECT extension_id,
        COUNT(DISTINCT device_id) AS uniq_devices
    FROM tahoe."extension_click"
    WHERE {time_utc_in_record_date_30d} AND extension_mode = 'viewer'
    GROUP BY 1
),

mouseenters AS (
    SELECT extension_id,
        COUNT(1)                  AS count,
        COUNT(DISTINCT device_id) AS uniq_devices
    FROM tahoe."extension_mouseenter"
    WHERE {time_utc_in_record_date}
    GROUP BY 1
),

mouseenters_7d AS (
    SELECT extension_id,
        COUNT(DISTINCT device_id) AS uniq_devices
    FROM tahoe."extension_mouseenter"
    WHERE {time_utc_in_record_date_7d}
    GROUP BY 1
),

mouseenters_30d AS (
    SELECT extension_id,
        COUNT(DISTINCT device_id) AS uniq_devices
    FROM tahoe."extension_mouseenter"
    WHERE {time_utc_in_record_date_30d}
    GROUP BY 1
),

-- Extensions hidden by viewers in the video player
minimizns AS (
    SELECT extension_id,
        COUNT(1)                  AS count,
        COUNT(DISTINCT device_id) AS uniq_devices
    FROM tahoe."extension_viewer_overlay_settings_toggle"
    WHERE {time_utc_in_record_date}
        AND "date" >= '2018-06-22' -- events start on 6/19, but data is broken until 6/22
        AND visibility_state='off'
    GROUP BY 1
),

unminimizns AS (
    SELECT extension_id,
        COUNT(1)                  AS count,
        COUNT(DISTINCT device_id) AS uniq_devices
    FROM tahoe."extension_viewer_overlay_settings_toggle"
    WHERE {time_utc_in_record_date}
        AND "date" >= '2018-06-22' -- events start on 6/19, but data is broken until 6/22
        AND visibility_state='on'
    GROUP BY 1
),

-- bits transactions have some duplicates sometimes, make sure to de-duplicate them.
-- It won't work if for whatever reason there are two rows with the same transaction ID,
-- but different extension_id, amount, or user_id. Thankfully that should never happen since transaction_id is a UUID.
bits_deduped AS (
    SELECT extension_id, amount, transaction_id, user_id
    FROM tahoe."extension_use_bits_complete"
    WHERE {time_utc_in_record_date}
    GROUP BY 1,2,3,4
),

bits AS (
    SELECT extension_id,
        -- sum absolute amounts because of a data issue around 2019-10-28 that registered amounts with the wrong sign
        SUM(ABS(amount))               AS bits_spent,
        -- transaction_ids are duplicated when the purchase involves multiple bits bundles (cost_basis)
        COUNT(DISTINCT transaction_id) AS uniq_transactions,
        COUNT(DISTINCT user_id)        AS uniq_users
    FROM bits_deduped
    GROUP BY 1
),

bits_7d AS (
    SELECT extension_id,
        COUNT(DISTINCT user_id)        AS uniq_users
    FROM tahoe."extension_use_bits_complete"
    WHERE {time_utc_in_record_date_7d}
    GROUP BY 1
),

bits_30d AS (
    SELECT extension_id,
        COUNT(DISTINCT user_id)        AS uniq_users
    FROM tahoe."extension_use_bits_complete"
    WHERE {time_utc_in_record_date_30d}
    GROUP BY 1
),

identity_links AS (
    SELECT extension_id,
        COUNT(DISTINCT user_id) AS uniq_users
    FROM tahoe."extension_identity_link"
    WHERE {time_utc_in_record_date}
    GROUP BY 1
),

identity_unlinks AS (
    SELECT extension_id,
        COUNT(DISTINCT user_id) AS uniq_users
    FROM tahoe."extension_identity_unlink"
    WHERE {time_utc_in_record_date}
    GROUP BY 1
),

-- Extension Details page views, are tracked from the pageview event, that has the extension_id in the page URL.
-- New URLs look like this:
--   * http://dashboard.twitch.tv/extensions/4ntuott0zqw008atk0wy60phfa8jyf
--   * http://dashboard.twitch.tv/extensions/4ntuott0zqw008atk0wy60phfa8jyf-0.0.9
--   * http://dashboard.twitch.tv/extensions/4ntuott0zqw008atk0wy60phfa8jyf-0.0.9?no-mobile-redirect=true#foobar
-- Old URLs (before Dec-13-2019) look like this:
--   * http://www.twitch.tv/ext/4ntuott0zqw008atk0wy60phfa8jyf
--   * http://www.twitch.tv/ext/4ntuott0zqw008atk0wy60phfa8jyf-0.0.9?foo=bar#foobar
-- Some URLs are invalid
-- The extension_id is extracted from URL with some basic validation.
-- There is no guarantee that extension ids are valid, but previous analysys shows the vast majority is valid.
ext_page_views AS (
    SELECT
        split_part(split_part(split_part(split_part(split_part(url,'?',1),'#',1),'%%',1),'/',5),'-',1) AS extension_id,
        COUNT(1)                  AS count,
        COUNT(DISTINCT device_id) AS uniq_devices
    FROM tahoe."pageview"
    WHERE {time_utc_in_record_date}
        AND location='extension_details'
        AND ( -- double checck URL format where the extension id is extracted
            url LIKE '%%//www.twitch.tv/ext/%%' -- after Dec-13-2019
            OR
            url LIKE '%%//dashboard.twitch.tv/extensions/%%' -- before Dec-13-2019
        )
        AND len(extension_id) = 30 AND extension_id ~ '^[0-9a-zA-Z]+$' -- filter out some typos and invalid ids
    GROUP BY 1
    HAVING "count" >= 2 -- little treshold helps removing single pageviews that may be typos
),

-- Released extensions that should be considered to generate reports.
-- Read from dbsnapshots to get released extension and their names.
-- NOTE: this is updated with latest data every day, expect drift when doing report backfills on old data.
extension_versions AS (
    SELECT id, version, details, is_valid_json(Details) AS flag_valid_json
    FROM dbsnapshots.extensions_extension_versions
),
valid_extension_versions AS (
    SELECT * FROM extension_versions WHERE flag_valid_json = TRUE
),
extension_version_name AS (
    SELECT id AS extension_id,
        version AS extension_version,
        json_extract_path_text(details, 'Name') AS extension_name
    FROM valid_extension_versions
),
released_extensions_list AS (
    SELECT id AS extension_id,
        releasedversion AS extension_version
    FROM dbsnapshots.extensions_extension_summaries
    WHERE releasedversion IS NOT NULL AND deleted = FALSE
    GROUP BY 1,2
),
extension_metadata AS (
    SELECT rel.extension_id, rel.extension_version, extension_name
    FROM released_extensions_list AS rel
    LEFT JOIN extension_version_name AS ee ON rel.extension_id = ee.extension_id
        AND rel.extension_version = ee.extension_version
)

SELECT %(record_date)s::DATE                            AS "record_date",
    e.extension_id                                      AS "extension_id",
    COALESCE(e.extension_name, '')                      AS "extension_name",
    COALESCE(installs.count, 0)                         AS "installs",
    COALESCE(uninstalls.count, 0)                       AS "uninstalls",
    COALESCE(activates.count, 0)                        AS "activates",
    COALESCE(renders.count, 0)                          AS "renders",
    COALESCE(renders.uniq_devices, 0)                   AS "renders_uniq_devices",
    COALESCE(renders_7d.uniq_devices, 0)                AS "renders_uniq_devices_7d",
    COALESCE(renders_30d.uniq_devices, 0)               AS "renders_uniq_devices_30d",
    COALESCE(renders.uniq_channels, 0)                  AS "renders_uniq_channels",
    COALESCE(renders_7d.uniq_channels, 0)               AS "renders_uniq_channels_7d",
    COALESCE(renders_30d.uniq_channels, 0)              AS "renders_uniq_channels_30d",
    COALESCE(views_panels.count, 0)            + COALESCE(views_video.count, 0)            AS "views",
    COALESCE(views_panels.uniq_devices, 0)     + COALESCE(views_video.uniq_devices, 0)     AS "views_uniq_devices",
    COALESCE(views_panels_7d.uniq_devices, 0)  + COALESCE(views_video_7d.uniq_devices, 0)  AS "views_uniq_devices_7d",
    COALESCE(views_panels_30d.uniq_devices, 0) + COALESCE(views_video_30d.uniq_devices, 0) AS "views_uniq_devices_30d",
    COALESCE(mouseenters.count, 0)                      AS "mouseenters",
    COALESCE(mouseenters.uniq_devices, 0)               AS "mouseenters_uniq_devices",
    COALESCE(mouseenters_7d.uniq_devices, 0)            AS "mouseenters_uniq_devices_7d",
    COALESCE(mouseenters_30d.uniq_devices, 0)           AS "mouseenters_uniq_devices_30d",
    COALESCE(clicks.count, 0)                           AS "clicks",
    COALESCE(clicks.uniq_devices, 0)                    AS "clicks_uniq_devices",
    COALESCE(clicks_7d.uniq_devices, 0)                 AS "clicks_uniq_devices_7d",
    COALESCE(clicks_30d.uniq_devices, 0)                AS "clicks_uniq_devices_30d",
    COALESCE(minimizns.count, 0)                        AS "minimizns",
    COALESCE(minimizns.uniq_devices, 0)                 AS "minimizns_uniq_devices",
    COALESCE(unminimizns.count, 0)                      AS "unminimizns",
    COALESCE(unminimizns.uniq_devices, 0)               AS "unminimizns_uniq_devices",
    COALESCE(bits.bits_spent, 0)                        AS "bits_spent",
    COALESCE(bits.uniq_transactions, 0)                 AS "bits_transactions",
    COALESCE(bits.uniq_users, 0)                        AS "bits_uniq_users",
    COALESCE(bits_7d.uniq_users, 0)                     AS "bits_uniq_users_7d",
    COALESCE(bits_30d.uniq_users, 0)                    AS "bits_uniq_users_30d",
    COALESCE(identity_links.uniq_users, 0)              AS "identity_links_uniq_users",
    COALESCE(identity_unlinks.uniq_users, 0)            AS "identity_unlinks_uniq_users",
    COALESCE(ext_page_views.count, 0)                   AS "ext_page_views",
    COALESCE(ext_page_views.uniq_devices, 0)            AS "ext_page_views_uniq_devices"
FROM extension_metadata AS e
LEFT JOIN installs          ON e.extension_id = installs.extension_id
LEFT JOIN uninstalls        ON e.extension_id = uninstalls.extension_id
LEFT JOIN activates         ON e.extension_id = activates.extension_id
LEFT JOIN renders           ON e.extension_id = renders.extension_id
LEFT JOIN renders_7d        ON e.extension_id = renders_7d.extension_id
LEFT JOIN renders_30d       ON e.extension_id = renders_30d.extension_id
LEFT JOIN views_panels      ON e.extension_id = views_panels.extension_id
LEFT JOIN views_panels_7d   ON e.extension_id = views_panels_7d.extension_id
LEFT JOIN views_panels_30d  ON e.extension_id = views_panels_30d.extension_id
LEFT JOIN views_video       ON e.extension_id = views_video.extension_id
LEFT JOIN views_video_7d    ON e.extension_id = views_video_7d.extension_id
LEFT JOIN views_video_30d   ON e.extension_id = views_video_30d.extension_id
LEFT JOIN mouseenters       ON e.extension_id = mouseenters.extension_id
LEFT JOIN mouseenters_7d    ON e.extension_id = mouseenters_7d.extension_id
LEFT JOIN mouseenters_30d   ON e.extension_id = mouseenters_30d.extension_id
LEFT JOIN clicks            ON e.extension_id = clicks.extension_id
LEFT JOIN clicks_7d         ON e.extension_id = clicks_7d.extension_id
LEFT JOIN clicks_30d        ON e.extension_id = clicks_30d.extension_id
LEFT JOIN minimizns         ON e.extension_id = minimizns.extension_id
LEFT JOIN unminimizns       ON e.extension_id = unminimizns.extension_id
LEFT JOIN bits              ON e.extension_id = bits.extension_id
LEFT JOIN bits_7d           ON e.extension_id = bits_7d.extension_id
LEFT JOIN bits_30d          ON e.extension_id = bits_30d.extension_id
LEFT JOIN identity_links    ON e.extension_id = identity_links.extension_id
LEFT JOIN identity_unlinks  ON e.extension_id = identity_unlinks.extension_id
LEFT JOIN ext_page_views    ON e.extension_id = ext_page_views.extension_id
""".format(
    time_utc_in_record_date=time_utc_in_record_date,
    time_utc_in_record_date_7d=time_utc_in_record_date_7d,
    time_utc_in_record_date_30d=time_utc_in_record_date_30d)


# Rows to INSERT in Aurora from the SELECT query in Redshift.
# The ORDER of fields is important, they need to be exactly the same as in the SELECT statement.
INCREMENTAL_FIELDS = [
    "record_date",
    "extension_id",
    "extension_name",
    "installs",
    "uninstalls",
    "activates",
    "renders",
    "renders_uniq_devices",
    "renders_uniq_devices_7d",
    "renders_uniq_devices_30d",
    "renders_uniq_channels",
    "renders_uniq_channels_7d",
    "renders_uniq_channels_30d",
    "views",
    "views_uniq_devices",
    "views_uniq_devices_7d",
    "views_uniq_devices_30d",
    "mouseenters",
    "mouseenters_uniq_devices",
    "mouseenters_uniq_devices_7d",
    "mouseenters_uniq_devices_30d",
    "clicks",
    "clicks_uniq_devices",
    "clicks_uniq_devices_7d",
    "clicks_uniq_devices_30d",
    "minimizns",
    "minimizns_uniq_devices",
    "unminimizns",
    "unminimizns_uniq_devices",
    "bits_spent",
    "bits_transactions",
    "bits_uniq_users",
    "bits_uniq_users_7d",
    "bits_uniq_users_30d",
    "identity_links_uniq_users",
    "identity_unlinks_uniq_users",
    "ext_page_views",
    "ext_page_views_uniq_devices"]

UPDATE_DOMAIN_REPORT_META_DATA = """
BEGIN;
DELETE FROM latest_domain_report_meta_data WHERE domain = 'extensions' AND report = 'overview_v2';
INSERT INTO latest_domain_report_meta_data
SELECT %(end_date)s
    , 'extensions'
    , 'overview_v2'
    , extension_id
    , MAX(extension_name)
    , MIN(record_date)
    , MAX(record_date)
FROM extensions_overview_v2
WHERE record_date >= %(start_date)s AND record_date < %(end_date)s
GROUP BY extension_id;
COMMIT;
"""
