-- AUDIENCE

drop table if exists geo;
create temp table geo as

select
    tariff_zone,
    name_en,
    name_ru,
    case when population_group in ('RU_1M+','RU_500K+','RU_300K+') then name_en end as agglomeration_name,
    case
        when population_group in ('RU_MSC','RU_SPB','RU_1M+','RU_500K+','RU_300K+') then population_group
        when population_group in ('RU_200K+','RU_100K+','RU_50K+','RU_50K-') then 'RU_other'
        else 'Other' end as population_group,
    tz_country_name_en as country,
    case
        when tz_country_name_ru in ('Россия') then 'Russia'
        when tz_country_name_ru in ('Казахстан','Белоруссия','Азербайджан','Армения','Грузия','Киргизия','Молдавия','Узбекистан') then 'CIS'
        else 'International' end as country_group1,
    case
        when tz_country_name_ru in ('Россия')
        then 'Russia'
        else 'CIS+Int' end as country_group2
from taxi_cdm_geo.v_dim_full_geo_hierarchy
where
    root_node_id = 'br_root'
    and node_type = 'agglomeration'

DISTRIBUTED RANDOMLY;
ANALYZE geo;



--  фильтруем и оставляем только B2C
DROP TABLE IF EXISTS deliveries_fact_tbl;
CREATE TEMP TABLE deliveries_fact_tbl AS
SELECT
    *
FROM (SELECT
        fo.order_id as order_id,
        fo.user_phone_pd_id AS user_phone_pd_id,
        fo.tariff_geo_zone_code as tariff_geo_zone_code,
        geo.country_group2 as country_group2,
        geo.country_group1 as country_group1,
        geo.country as country,
        geo.population_group as population_group,
        geo.agglomeration_name as agglomeration_name,

        CASE
            when not fo.corp_order_flg THEN TRUE
        END AS is_b2c,

        (fo.utc_order_created_dttm + INTERVAL '3 hours') as msk_order_created_dttm,
        DATE_TRUNC('day', (fo.utc_order_created_dttm + INTERVAL '3 hours'))::DATE AS day,
        DATE_TRUNC('week', (fo.utc_order_created_dttm + INTERVAL '3 hours'))::DATE AS week,
        (DATE_TRUNC('week', (fo.utc_order_created_dttm + INTERVAL '3 hours')) + INTERVAL '6 days')::DATE AS week_end,
        DATE_TRUNC('month', (fo.utc_order_created_dttm + INTERVAL '3 hours'))::DATE AS month,
        (DATE_TRUNC('month', (fo.utc_order_created_dttm + INTERVAL '3 hours')) + INTERVAL '1 month - 1 day')::DATE AS month_end,


        CASE
            WHEN fo.corp_order_flg THEN 'B2B'
            WHEN tier4.user_phone_pd_id IS NOT NULL THEN 'SMB'
            ELSE 'C2C'
        END AS corp_order_flg,

        fo.corp_order_flg as cog,
        fo.corp_client_id as corp_client_id,

        fo.order_source_code as order_source_code,
        fo.delivery_point_cnt as deliveries

        FROM taxi_cdm_marketplace.fct_order AS fo

        LEFT JOIN (
            SELECT *
            FROM snb_delivery.smb_users_flags
            WHERE utc_valid_to_dt = '9999-12-31'
            ) AS tier4 ON tier4.user_phone_pd_id = fo.user_phone_pd_id

        LEFT JOIN geo as geo on geo.tariff_zone = fo.tariff_geo_zone_code

        WHERE ((fo.tariff_class_code in ('cargo', 'express', 'courier', 'cargocorp'))
            or (fo.tariff_class_code = 'ubernight' and fo.msk_order_created_dttm >= '2021-09-01') -- пока только Доставка и Грузовой
            or (fo.tariff_class_code = 'uberlux' and fo.utc_order_created_dttm >= '2022-03-01'))
            AND (fo.utc_order_created_dttm + INTERVAL '3 hours')::DATE BETWEEN '2019-01-01' AND (CURRENT_DATE - INTERVAL '1 day')
            AND fo.delivery_point_cnt > 0
            AND NOT fo.fake_order_flg
            AND NOT fo.mqc_order_flg
            AND NOT fo.user_fraud_flg
            AND fo.order_id not in (select order_id from snb_delivery.self_orders)

      ) AS tmp1
WHERE tmp1.corp_order_flg IN ('C2C', 'SMB')

DISTRIBUTED RANDOMLY;
ANALYZE deliveries_fact_tbl;



-- ищем новичков:
DROP TABLE IF EXISTS deliveries_fact_tbl_1;
CREATE TEMP TABLE deliveries_fact_tbl_1 AS
SELECT
       *,
       -- новички в принципе
       FIRST_VALUE(msk_order_created_dttm) OVER all_newbies AS all_newbies
FROM deliveries_fact_tbl
WINDOW
       -- новички
       all_newbies AS (PARTITION BY
                                    user_phone_pd_id
                            ORDER BY msk_order_created_dttm ASC
                            RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
DISTRIBUTED RANDOMLY;
ANALYZE deliveries_fact_tbl_1;



DROP TABLE IF EXISTS c2c_id_month_ag;
CREATE TEMP TABLE c2c_id_month_ag AS

SELECT
    t1.*,
    coalesce((money.order_cost * money.currency_rate), 0) as gmv,
    coalesce((money.net_inflow_amt * money.currency_rate), 0) as net_inflow
FROM deliveries_fact_tbl_1 as t1
LEFT JOIN
    (select *
    from taxi_cdm_marketplace.fct_order_metric
    where 1=1
        and (utc_order_created_dttm + INTERVAL '3 hours')::DATE >= '2021-01-01'
        and tariff_class_code in ('cargo', 'express', 'courier', 'cargocorp', 'ubernight', 'uberlux'))
    as money on money.order_id = t1.order_id

WHERE 1=1
    and t1.order_source_code in ('yandex', 'cargo')
    and t1.corp_client_id is null
    and t1.cog = false
    and t1.day >= '2021-01-01'

DISTRIBUTED RANDOMLY;
ANALYZE c2c_id_month_ag;



DROP TABLE IF EXISTS final_total_WAU;
CREATE TEMP TABLE final_total_WAU AS

SELECT
    week as date,
    'week'::text as scale,
    'level 1'::text as level,
    'TOTAL'::text as geo,
    CASE
        WHEN week = DATE_TRUNC('week', all_newbies) THEN TRUE
        ELSE FALSE
        END as is_newbies,
    COUNT (distinct user_phone_pd_id)::float AS au,
    SUM (deliveries)::float AS deliveries,
    SUM (gmv)::float AS gmv,
    SUM (net_inflow)::float AS net_inflow

FROM c2c_id_month_ag
GROUP BY 1, 2, 3, 4, 5
ORDER BY 1, 2, 3, 4, 5

DISTRIBUTED RANDOMLY;
ANALYZE final_total_WAU;



DROP TABLE IF EXISTS final_country_group2_WAU;
CREATE TEMP TABLE final_country_group2_WAU AS

SELECT
    week as date,
    'week'::text as scale,
    'level 2'::text as level,
    country_group2 as geo,
    CASE
        WHEN week = DATE_TRUNC('week', all_newbies) THEN TRUE
        ELSE FALSE
        END as is_newbies,
    COUNT (distinct user_phone_pd_id)::float AS au,
    SUM (deliveries)::float AS deliveries,
    SUM (gmv)::float AS gmv,
    SUM (net_inflow)::float AS net_inflow

FROM c2c_id_month_ag
GROUP BY 1, 2, 3, 4, 5
ORDER BY 1, 2, 3, 4, 5

DISTRIBUTED RANDOMLY;
ANALYZE final_country_group2_WAU;



DROP TABLE IF EXISTS final_country_group1_WAU;
CREATE TEMP TABLE final_country_group1_WAU AS

SELECT
    week as date,
    'week'::text as scale,
    'level 3'::text as level,
    country_group1 as geo,
    CASE
        WHEN week = DATE_TRUNC('week', all_newbies) THEN TRUE
        ELSE FALSE
        END as is_newbies,
    COUNT (distinct user_phone_pd_id)::float AS au,
    SUM (deliveries)::float AS deliveries,
    SUM (gmv)::float AS gmv,
    SUM (net_inflow)::float AS net_inflow

FROM c2c_id_month_ag
GROUP BY 1, 2, 3, 4, 5
ORDER BY 1, 2, 3, 4, 5

DISTRIBUTED RANDOMLY;
ANALYZE final_country_group1_WAU;



DROP TABLE IF EXISTS final_country_WAU;
CREATE TEMP TABLE final_country_WAU AS

SELECT
    week as date,
    'week'::text as scale,
    'level 4'::text as level,
    country as geo,
    CASE
        WHEN week = DATE_TRUNC('week', all_newbies) THEN TRUE
        ELSE FALSE
        END as is_newbies,
    COUNT (distinct user_phone_pd_id)::float AS au,
    SUM (deliveries)::float AS deliveries,
    SUM (gmv)::float AS gmv,
    SUM (net_inflow)::float AS net_inflow

FROM c2c_id_month_ag
GROUP BY 1, 2, 3, 4, 5
ORDER BY 1, 2, 3, 4, 5

DISTRIBUTED RANDOMLY;
ANALYZE final_country_WAU;



DROP TABLE IF EXISTS final_population_group_WAU;
CREATE TEMP TABLE final_population_group_WAU AS

SELECT
    week as date,
    'week'::text as scale,
    'level 5'::text as level,
    population_group as geo,
    CASE
        WHEN week = DATE_TRUNC('week', all_newbies) THEN TRUE
        ELSE FALSE
        END as is_newbies,
    COUNT (distinct user_phone_pd_id)::float AS au,
    SUM (deliveries)::float AS deliveries,
    SUM (gmv)::float AS gmv,
    SUM (net_inflow)::float AS net_inflow

FROM c2c_id_month_ag
GROUP BY 1, 2, 3, 4, 5
ORDER BY 1, 2, 3, 4, 5

DISTRIBUTED RANDOMLY;
ANALYZE final_population_group_WAU;



DROP TABLE IF EXISTS final_agglomeration_name_WAU;
CREATE TEMP TABLE final_agglomeration_name_WAU AS

SELECT
    week as date,
    'week'::text as scale,
    'level 6'::text as level,
    agglomeration_name as geo,
    CASE
        WHEN week = DATE_TRUNC('week', all_newbies) THEN TRUE
        ELSE FALSE
        END as is_newbies,
    COUNT (distinct user_phone_pd_id)::float AS au,
    SUM (deliveries)::float AS deliveries,
    SUM (gmv)::float AS gmv,
    SUM (net_inflow)::float AS net_inflow

FROM c2c_id_month_ag
GROUP BY 1, 2, 3, 4, 5
ORDER BY 1, 2, 3, 4, 5

DISTRIBUTED RANDOMLY;
ANALYZE final_agglomeration_name_WAU;



DROP TABLE IF EXISTS pre_final_table;
CREATE TEMP TABLE pre_final_table AS

select
       *,
       lag(au, case when scale='day' then 7 else 1 end) over (
           partition by
               scale,
               level,
               geo,
               is_newbies
           order by date asc
           ) as prev_au,

       lag(deliveries, case when scale='day' then 7 else 1 end) over (
           partition by
               scale,
               level,
               geo,
               is_newbies
           order by date asc
           ) as prev_deliveries,

       lag(gmv, case when scale='day' then 7 else 1 end) over (
           partition by
               scale,
               level,
               geo,
               is_newbies
           order by date asc
           ) as prev_gmv,

      lag(net_inflow, case when scale='day' then 7 else 1 end) over (
           partition by
               scale,
               level,
               geo,
               is_newbies
           order by date asc
           ) as prev_net_inlow

from (
select * from final_total_WAU
union all
select * from final_country_group2_WAU
union all
select * from final_country_group1_WAU
union all
select * from final_country_WAU
union all
select * from final_population_group_WAU
union all
select * from final_agglomeration_name_WAU) as tmp1

DISTRIBUTED RANDOMLY;
ANALYZE pre_final_table;


drop table if exists snb_delivery.go_b2c_audience_taxi_template;
create table snb_delivery.go_b2c_audience_taxi_template as

SELECT * FROM pre_final_table
where 1=1
    AND NOT ((scale = 'month' and date >= date_trunc('month', NOW()))
    OR (scale = 'week' and date >= date_trunc('week', NOW()))
    OR (scale = 'day' and date >= date_trunc('day', NOW())))

DISTRIBUTED RANDOMLY;
ANALYZE snb_delivery.go_b2c_audience_taxi_template;
