use hahn;
PRAGMA yt.Pool='taxi-delivery';
PRAGMA SimpleColumns;
PRAGMA AnsiInForEmptyOrNullableItemsCollections;

-- в `//home/taxi-delivery/analytics/production/taxi_template/clients_old` живёт признак старого клиента (до 2022); пересчитывать её не надо
-- в `//home/taxi-delivery/analytics/production/taxi_template/clients_raw` живёт разбивка клиент - тарифная зона - неделя
-- в `//home/taxi-delivery/analytics/production/taxi_template/clients_aggr` живёт агрегация до level - geo_name(зависит от level) - клиент - неделя


$dt_format = '%Y-%m-%d';
$str_to_dt = ($dttm_str) -> { RETURN DateTime::MakeDate(DateTime::Parse($dt_format)($dttm_str)) };
$dt_to_str = DateTime::Format($dt_format);
$week_start = ($str) -> { RETURN DateTime::MakeDate(DateTime::StartOfWeek($str_to_dt($str))) };
$week_diff = ($str1, $str2)-> { RETURN (DateTime::ToDays($str_to_dt($str2) - $str_to_dt($str1)))/7 };

-- !!! добавить признак CRM, чтобы избежать совпадения company_id в amocrm и amocrm_international !!! --
$crm_cis = (
    select contract_num as corp_contract_id, company_id
    from `//home/delivery-dwh/ods/amocrm/lead/lead` as amo_lead
    where contract_num is not null
      and contract_num <> ''
      and company_id is not null
);
$crm_int = (
    select contract_num as corp_contract_id, company_id
    from `//home/delivery-dwh/ods/amocrm_international/lead/lead` as amo_lead
    where contract_num is not null
      and contract_num <> ''
      and company_id is not null
);
$crm = (
    select corp_contract_id,
        some(company_id) as company_id
    from (
        select corp_contract_id, company_id from $crm_cis
        union all
        select corp_contract_id, company_id from $crm_int
    )
    group by corp_contract_id
);

---- data till 2022 --
-- $dlv_old = (
--     select
--         company_id,
--         tariff_geo_zone_code,
--         sum(delivery_point_cnt) as deliveries_cnt
--     from range(`//home/taxi-dwh/cdm/marketplace/fct_order`,`2020-01-01`,`2022-01-01`) as fct_order
--       join $crm as crm
--         on crm.corp_contract_id = fct_order.corp_contract_id
--     where delivery_point_cnt > 0
--       and msk_order_created_dt <= '2022-01-02'
--     group by crm.company_id as company_id,
--         fct_order.tariff_geo_zone_code as tariff_geo_zone_code
-- );
-- insert into `//home/taxi-delivery/analytics/production/taxi_template/clients_old` with truncate
-- select * from $dlv_old;
-- commit;
---- endof data till 2022 --

$dlv = (
    select
        company_id,
        tariff_geo_zone_code,
        calweek,
        sum(delivery_point_cnt) as deliveries_cnt
    from range(`//home/taxi-dwh/cdm/marketplace/fct_order`,`2022-01-01`) as fct_order
      join $crm as crm
        on crm.corp_contract_id = fct_order.corp_contract_id
    where delivery_point_cnt > 0
      and msk_order_created_dt >= '2022-01-03'
    group by crm.company_id as company_id,
        fct_order.tariff_geo_zone_code as tariff_geo_zone_code,
        $dt_to_str($week_start(fct_order.msk_order_created_dt)) as calweek
);

insert into `//home/taxi-delivery/analytics/production/taxi_template/clients_raw` WITH TRUNCATE
select dlv.company_id as company_id,
    dlv.tariff_geo_zone_code as tariff_geo_zone_code,
    calweek,
    dlv.deliveries_cnt as deliveries_cnt,
    clients_old.company_id is not null as old_client_flg
from $dlv as dlv
  left join `//home/taxi-delivery/analytics/production/taxi_template/clients_old` as clients_old
    on clients_old.company_id = dlv.company_id
    and clients_old.tariff_geo_zone_code = dlv.tariff_geo_zone_code;
commit;


$geo = (
    select
        tariff_zone,
        case when population_group in ('RU_1M+','RU_500K+','RU_300K+') then name_en
            when tz_country_name_ru in ('Россия') then 'RU_other'
            else 'Other' end as agglomeration_name,
        case
            when population_group in ('RU_MSC','RU_SPB','RU_1M+','RU_500K+','RU_300K+') then population_group
            when tz_country_name_ru in ('Россия') then 'RU_other'
            else 'Other' end as population_group,
        tz_country_name_en as country,
        case
            when tz_country_name_ru in ('Россия') then 'Russia'
            when tz_country_name_ru in ('Казахстан','Белоруссия','Азербайджан','Армения','Грузия','Киргизия','Молдавия','Узбекистан') then 'CIS'
            else 'International' end as country_group1,
        case
            when tz_country_name_ru in ('Россия')
            then 'Russia'
            else 'CIS+Int' end as country_group2
    from
        `//home/taxi-dwh/cdm/geo/v_dim_full_geo_hierarchy/v_dim_full_geo_hierarchy` as g
    where
        root_node_id = 'br_root'
        and node_type = 'agglomeration'
);

$aggr = (
    select 'level 1' as level,
        cast('TOTAL' as String) as geo_name,
        company_id, calweek,
        sum(deliveries_cnt) as deliveries_cnt,
        bool_or(old_client_flg) as old_client_flg
    from `//home/taxi-delivery/analytics/production/taxi_template/clients_raw`
    group by company_id, calweek
    --
    union all
    select 'level 2' as level,
        geo_name,
        company_id, calweek,
        sum(deliveries_cnt) as deliveries_cnt,
        bool_or(old_client_flg) as old_client_flg
    from `//home/taxi-delivery/analytics/production/taxi_template/clients_raw` as clients_raw
      join $geo as geo
        on geo.tariff_zone = clients_raw.tariff_geo_zone_code
    group by clients_raw.company_id as company_id,
        clients_raw.calweek as calweek,
        country_group2 as geo_name
    --
    union all
    select 'level 3' as level,
        geo_name,
        company_id, calweek,
        sum(deliveries_cnt) as deliveries_cnt,
        bool_or(old_client_flg) as old_client_flg
    from `//home/taxi-delivery/analytics/production/taxi_template/clients_raw` as clients_raw
      join $geo as geo
        on geo.tariff_zone = clients_raw.tariff_geo_zone_code
    group by clients_raw.company_id as company_id,
        clients_raw.calweek as calweek,
        country_group1 as geo_name
    --
    union all
    select 'level 4' as level,
        geo_name,
        company_id, calweek,
        sum(deliveries_cnt) as deliveries_cnt,
        bool_or(old_client_flg) as old_client_flg
    from `//home/taxi-delivery/analytics/production/taxi_template/clients_raw` as clients_raw
      join $geo as geo
        on geo.tariff_zone = clients_raw.tariff_geo_zone_code
    group by clients_raw.company_id as company_id,
        clients_raw.calweek as calweek,
        country as geo_name
    --
    union all
    select 'level 5' as level,
        geo_name,
        company_id, calweek,
        sum(deliveries_cnt) as deliveries_cnt,
        bool_or(old_client_flg) as old_client_flg
    from `//home/taxi-delivery/analytics/production/taxi_template/clients_raw` as clients_raw
      join $geo as geo
        on geo.tariff_zone = clients_raw.tariff_geo_zone_code
    group by clients_raw.company_id as company_id,
        clients_raw.calweek as calweek,
        population_group as geo_name
    --
    union all
    select 'level 6' as level,
        geo_name,
        company_id, calweek,
        sum(deliveries_cnt) as deliveries_cnt,
        bool_or(old_client_flg) as old_client_flg
    from `//home/taxi-delivery/analytics/production/taxi_template/clients_raw` as clients_raw
      join $geo as geo
        on geo.tariff_zone = clients_raw.tariff_geo_zone_code
    group by clients_raw.company_id as company_id,
        clients_raw.calweek as calweek,
        agglomeration_name as geo_name
);

insert into `//home/taxi-delivery/analytics/production/taxi_template/clients_aggr` WITH TRUNCATE
select * from $aggr;

$t = (
    select level,
        geo_name,
        company_id, calweek,
        deliveries_cnt,
        case when not old_client_flg then min(calweek) over w else NULL end as calweek_init,
        $week_diff(
            case when not old_client_flg then min(calweek) over w else NULL end,
            calweek) as week_id
    from `//home/taxi-delivery/analytics/production/taxi_template/clients_aggr` as clients_aggr
    window w as (partition by level, geo_name, company_id)
);

$retention = (
    SELECT level,
        geo_name,
        calweek_init,
        COUNT_IF(week_id=0) as clients_cnt_15_prev,
        Math::Round(cast(COUNT_IF(week_id=1) as double) / COUNT_IF(week_id=0), -4) as clients_cnt_15_prev_w1_retention,
        Math::Round(cast(COUNT_IF(week_id=15) as double) / COUNT_IF(week_id=0), -4) as clients_cnt_15_prev_w15_retention
    from $t
    group by level,
        geo_name,
        calweek_init
);

$grouped = (
    SELECT
        calweek,
        level,
        geo_name,
        count(*) as clients_cnt,
        COUNT_IF(week_id=0) as clients_new_cnt,
        $dt_to_str($str_to_dt(calweek) - DateTime::IntervalFromDays(7)) as calweek_prev,
        $dt_to_str($str_to_dt(calweek) - DateTime::IntervalFromDays(7*15)) as calweek_15_prev
    from $t
    group by calweek, level, geo_name
);

insert into `//home/taxi-delivery/analytics/production/taxi_template/output_clients` with truncate
select cw.calweek as calweek,
    cw.level as level,
    cw.geo_name as geo_name,
    ----
    cw.clients_cnt as clients_cnt,
    case when lw.clients_cnt>0 then Math::Round(cast(cw.clients_cnt as double)/lw.clients_cnt-1., -4) else NULL end as clients_cnt_wow,
    cw.clients_new_cnt as clients_new_cnt,
    case when lw.clients_new_cnt>0 then Math::Round(cast(cw.clients_new_cnt as double)/lw.clients_new_cnt-1., -4) else NULL end as clients_new_cnt_wow,
    --
    clients_cnt_15_prev,
    clients_cnt_15_prev_w1_retention,
    clients_cnt_15_prev_w15_retention
from $grouped as cw
  left join $grouped as lw
    on lw.level = cw.level
    and lw.geo_name = cw.geo_name
    and lw.calweek = cw.calweek_prev
  left join $retention as retention
    on retention.level = cw.level
    and retention.geo_name = cw.geo_name
    and retention.calweek_init = cw.calweek_15_prev
order by calweek, level, geo_name;
