-- num_minutes are > 0
-- buffer_empties are not null and positive
-- buffering_time is in seconds and can be larger than num_minutes * 60 ?


delete from mgst.play_sessions_qos
WHERE date >= %(start_date)s :: DATE - INTERVAL '2 days'
    AND date < %(end_date)s;

INSERT INTO mgst.play_sessions_qos (

with 

lifecycle_stage as
(
SELECT
    device_id,
    lifecycle_stage,
    day
FROM cubes.viewers_daily_by_lifecycle_stage_2020
WHERE day >= %(start_date)s :: DATE - INTERVAL '2 days'
    AND day < %(end_date)s
),

mw as
(
SELECT 
	play_session_id, 
	device_id,
    MEDIAN(CASE WHEN f_sql_get_video_product_type_v1(broadcaster_software,vod_type,content_mode, live)='live' AND hls_latency_broadcaster between 1 AND 1000*60 
        THEN hls_latency_broadcaster ELSE NULL END) as  median_hls_latency_broadcaster,
    COUNT(1) as num_minutes
FROM spade."minute-watched"
WHERE date >= %(start_date)s :: DATE - INTERVAL '2 days'
    AND date < %(end_date)s
    AND country in ('US', 'BR', 'MX', 'AR', 'CL', 'CO', 'PY', 'UY', 'VE', 'PE', 'HN', 'SV', 'BO', 'CR', 'IN')
GROUP BY 1,2
)

--counts buffer empty events
,be as 
(
select 
	device_id,
	play_session_id, 
	count(1) as buffer_events 
from tahoe."buffer-empty"
WHERE date >= %(start_date)s :: DATE - INTERVAL '2 days'
    AND date < %(end_date)s
    AND country in ('US', 'BR', 'MX', 'AR', 'CL', 'CO', 'PY', 'UY', 'VE', 'PE', 'HN', 'SV', 'BO', 'CR', 'IN')
group by 1,2
),


-- we limit buffering events to 60 seconds to remove outliers e.g. (buffer-refill.buffering_time > 60 seconds) android
-- https://jira.twitch.com/browse/AND-8675

br as 
(
SELECT 
	device_id,
	play_session_id, 
	SUM(CASE WHEN buffering_time > 60 THEN 60 WHEN buffering_time < 0 THEN 0 ELSE buffering_time END) as buffering_time
FROM spade."buffer-refill"
WHERE date >= %(start_date)s :: DATE - INTERVAL '2 days'
    AND date < %(end_date)s
    AND country in ('US', 'BR', 'MX', 'AR', 'CL', 'CO', 'PY', 'UY', 'VE', 'PE', 'HN', 'SV', 'BO', 'CR', 'IN')
GROUP BY 1,2
), 

b2s as 
(
select 
	device_id,
	play_session_id, 
	(count(1) * 2) as b2s_buffering_time
from spade.buffer_2_seconds 
WHERE date >= %(start_date)s :: DATE - INTERVAL '2 days'
    AND date < %(end_date)s
    AND country in ('US', 'BR', 'MX', 'AR', 'CL', 'CO', 'PY', 'UY', 'VE', 'PE', 'HN', 'SV', 'BO', 'CR', 'IN')
GROUP BY 1,2
),

vp as (
SELECT 
    play_session_id,
    device_id,
    platform,
    backend,
    CASE WHEN mobile_connection_type='none' OR mobile_connection_type IS NULL THEN 'unknown'
        ELSE mobile_connection_type END AS mobile_connection_type,
    CASE WHEN low_latency THEN 'low' ELSE 'normal' end as latency_mode,
    broadcast_id,
    time_since_load_start, -- https://jira.twitch.com/browse/AND-8677
    -- play session reuse: Android: https://jira.twitch.com/browse/AND-8677
    ROW_NUMBER() OVER (PARTITION BY device_id, play_session_id ORDER BY "time" desc) AS play_index 
FROM spade."video-play"
WHERE date >= %(start_date)s :: DATE - INTERVAL '2 days'
    AND date < %(end_date)s
    AND country in ('US', 'BR', 'MX', 'AR', 'CL', 'CO', 'PY', 'UY', 'VE', 'PE', 'HN', 'SV', 'BO', 'CR', 'IN')
),

buffering_time as 
(
SELECT
    vp.device_id,
    vp.play_session_id,
    platform,
    backend,
    mobile_connection_type,
    latency_mode,
    broadcast_id,
    time_since_load_start,
    median_hls_latency_broadcaster,
    COALESCE(buffering_time, 0) as buffering_time,
    COALESCE(b2s_buffering_time, 0) as b2s_buffering_time,
    COALESCE(buffer_events, 0) as buffer_events,
    COALESCE(num_minutes, 0) as num_minutes
FROM vp
LEFT JOIN mw ON mw.play_session_id=vp.play_session_id AND mw.device_id=vp.device_id
LEFT JOIN be ON be.play_session_id=vp.play_session_id AND be.device_id=vp.device_id
-- only br if there are be:
LEFT JOIN br ON be.play_session_id=br.play_session_id AND be.device_id=br.device_id
LEFT JOIN b2s ON b2s.play_session_id=vp.play_session_id AND b2s.device_id=vp.device_id
WHERE vp.play_index=1
),

ps as (
SELECT * 
FROM cubes.play_sessions
WHERE date >= %(start_date)s :: DATE - INTERVAL '2 days'
    AND date < %(end_date)s
    AND country in ('US', 'BR', 'MX', 'AR', 'CL', 'CO', 'PY', 'UY', 'VE', 'PE', 'HN', 'SV', 'BO', 'CR', 'IN')
),


-- play_sessions is affected by play_session_id re-use
dup_filter as (
SELECT 
  play_session_id,
  COUNT(1) as dupcnt
FROM ps
GROUP BY 1
HAVING dupcnt > 1
)


SELECT 
    play_sessions.date,
    play_sessions.play_session_id,
    play_sessions.device_id,
    play_sessions.tab_session_id,
    play_sessions.login,
    play_sessions.player,
    play_sessions.platform,
    play_sessions.live,
    play_sessions.game_on_session_start,
    play_sessions.channel,
    play_sessions.play_time,
    play_sessions.country,
    play_sessions.referral_source,
    play_sessions.vod_format,
    play_sessions.vod_id,
    play_sessions.channel_id,
    play_sessions.content_mode_and_type,
    play_sessions.user_id,
    play_sessions.url,
    play_sessions.browser,
    play_sessions.content_mode,
    play_sessions.referrer,
    play_sessions.referrer_domain,
    play_sessions.referrer_path_root,
    play_sessions.referrer_path_stem,
    play_sessions.referrer_url,
    play_sessions.content,
    play_sessions.medium,
    play_sessions.broadcaster_software,
    play_sessions.host_channel,
    play_sessions.video_product_type,
    play_sessions.video_product_subtype,
    play_sessions.asn,
    play_sessions.cluster,
    play_sessions.app_version,
    play_sessions.item_tracking_id,
    play_sessions.item_position,
    play_sessions.row_name,
    play_sessions.row_position,
    play_sessions.app_session_id,
    play_sessions.search_session_id,
    play_sessions.search_query_id,
    buffering_time.num_minutes,
    play_sessions.opps_preroll,
    play_sessions.imps_preroll,
    play_sessions.opps_midroll,
    play_sessions.imps_midroll,
    play_sessions.opps_postroll,
    play_sessions.imps_postroll,
    COALESCE(buffer_events, 0) AS buffer_empties,
    broadcast_id,
    backend,
    LEAST(COALESCE(buffering_time, 0), buffering_time.num_minutes*60) as buffering_time,
    latency_mode,
    mobile_connection_type,
    median_hls_latency_broadcaster,
    time_since_load_start,
    lifecycle_stage,
    b2s_buffering_time
FROM ps as play_sessions
LEFT JOIN dup_filter on dup_filter.play_session_id=play_sessions.play_session_id
LEFT JOIN buffering_time on buffering_time.play_session_id=play_sessions.play_session_id AND buffering_time.device_id=play_sessions.device_id
LEFT JOIN lifecycle_stage ON lifecycle_stage.device_id=play_sessions.device_id and lifecycle_stage.day=play_sessions.date

WHERE dup_filter.play_session_id IS NULL
);
