use hahn;
PRAGMA yt.Pool='taxi-delivery';
PRAGMA yson.DisableStrict;
PRAGMA AnsiInForEmptyOrNullableItemsCollections;


$source             = '//home/taxi-delivery/analytics/production/product/supply_overview';
$couriers_source    = '//home/taxi-delivery/analytics/production/taxi_template/tmp_supply';
$couriers_output    = '//home/taxi-delivery/analytics/production/taxi_template/output_couriers';

$date_parse         = DateTime::Parse("%Y-%m-%d");
$date_format        = Datetime::Format('%Y-%m-%d');
$end_dt             = Unwrap(DateTime::MakeDate(CurrentUtcDate() - DateTime::IntervalFromDays(7))); -- отправная точка - неделя назад
$start_dt           = Unwrap(DateTime::MakeDate(DateTime::StartOfWeek(Date('2021-01-01'))));


-- формируем список всех дат за заданный период
$all_dates = (
    select
        $date_format(utc_dt) as week_start,
        $date_format(utc_dt+Interval('P6D')) as week_end,
        $date_format(DateTime::MakeDate(DateTime::StartOfMonth(utc_dt))) as month_start,
        rank() over(order by utc_dt desc) as rnk
    from (
        select ListFromRange($start_dt, $end_dt, Interval("P7D")) as utc_dt
        )
    flatten by utc_dt
);

$curr_date          = (select week_start from $all_dates where rnk = 1); --'2022-06-27';
$prev_w_date        = (select week_start from $all_dates where rnk = 2); --'2022-06-20'; для WoW
$prev_2w_date       = (select week_start from $all_dates where rnk = 3); --'2022-06-13'; для 4-недельных спарклайнов
$prev_3w_date       = (select week_start from $all_dates where rnk = 4); --'2022-06-06'; для 4-недельных спарклайнов
$prev_y_date        = (select week_start from $all_dates where rnk = 53); --'2021-06-28'; для YoY
$supply_month_start = (select month_start from $all_dates where rnk = 2); --'2022-06-01'; партиция supply таблицы, чтобы достать данные для WoW
$prev_15w_date      = (select week_start from $all_dates where rnk = 16);
$curr_date_end      = (select week_end from $all_dates where rnk = 1);

$geo = (

    select distinct
        name_en,
        name_ru,
        case
            when population_group in ('RU_1M+','RU_500K+','RU_300K+') then name_en
            when population_group in ('RU_200K+','RU_100K+','RU_50K+','RU_50K-') then 'RU_other'
            else 'Other' end as agglomeration_name,
        case
            when population_group in ('RU_MSC','RU_SPB','RU_1M+','RU_500K+','RU_300K+') then population_group
            when population_group in ('RU_200K+','RU_100K+','RU_50K+','RU_50K-') then 'RU_other'
            else 'Other'
            end as population_group,
        tz_country_name_en as country,
        case
            when tz_country_name_ru in ('Россия') then 'Russia'
            when tz_country_name_ru in ('Казахстан','Белоруссия','Азербайджан','Армения','Грузия','Киргизия','Молдавия','Узбекистан') then 'CIS'
            else 'International'
            end as country_group1,
        case
            when tz_country_name_ru in ('Россия')
            then 'Russia'
            else 'CIS+Int' 
            end as country_group2
    from
        `//home/taxi-dwh/cdm/geo/v_dim_full_geo_hierarchy/v_dim_full_geo_hierarchy` as g
    where
        root_node_id = 'br_root'
        and node_type = 'agglomeration'

);


-- couriers table source
$curr =
(
    select
        date,
        unique_driver_id,
        country_group2,
        country_group1,
        country,
        population_group,
        agglomeration_name,
        transport_type,
        courier_segmentation_label,
        max(activity_window_start_date = utc_date) as is_new,
        sum(number_of_trips_cargo+
            number_of_trips_courier+
            number_of_trips_express+
            number_of_trips_other_delivery) as trips
    from
        range($source,$supply_month_start) as s
        inner join $geo as g
            on s.agglomeration = g.name_ru
    where
        utc_date between $prev_w_date and $curr_date_end
    group by
        case
            when utc_date >= $curr_date
            then $curr_date
            else $prev_w_date
            end as date,
        s.unique_driver_id as unique_driver_id,
        g.country_group2 as country_group2,
        g.country_group1 as country_group1,
        g.country as country,
        g.population_group as population_group,
        g.agglomeration_name as agglomeration_name,
        s.transport_type as transport_type,
        s.courier_segmentation_label as courier_segmentation_label
);

insert into $couriers_source WITH TRUNCATE 
select * from $curr;


$city_prev_act =
(
    SELECT
        'level 6' as level,
        date,
        geo, 
        CountDistinctEstimate(unique_driver_id) as couriers_total,
        CountDistinctEstimate(if(is_new=true, unique_driver_id, null)) as new_couriers,
        lag(CountDistinctEstimate(unique_driver_id)) over w as couriers_total_prev,
        lag(CountDistinctEstimate(if(is_new=true, unique_driver_id, null))) over w as new_couriers_prev,
        CountDistinctEstimate(if(courier_segmentation_label = 'delivery_fulltimer_core', unique_driver_id, null)) as couriers_core,
        CountDistinctEstimate(if(transport_type in ('Скорее Такси','Скорее Авто-Курьер','Строго Авто-Курьер'), unique_driver_id, null)) as couriers_auto,
        CountDistinctEstimate(if(transport_type in ('Велосипед','Мотоцикл'), unique_driver_id, null)) as couriers_moto,
        CountDistinctEstimate(if(transport_type like 'Грузовой%',unique_driver_id,null)) as couriers_cargo,
        CountDistinctEstimate(if(transport_type = 'Пешеход',unique_driver_id,null)) as couriers_walk        
    from 
        $curr as a
    where
        trips > 0 -- active couriers only
    group by
        date,
        agglomeration_name as geo
    window w as (
        partition by agglomeration_name order by date
    )

    union all

    SELECT
        'level 5' as level,
        date,
        geo,
        CountDistinctEstimate(unique_driver_id) as couriers_total,
        CountDistinctEstimate(if(is_new=true, unique_driver_id, null)) as new_couriers,
        lag(CountDistinctEstimate(unique_driver_id)) over w as couriers_total_prev,
        lag(CountDistinctEstimate(if(is_new=true, unique_driver_id, null))) over w as new_couriers_prev,
        CountDistinctEstimate(if(courier_segmentation_label = 'delivery_fulltimer_core', unique_driver_id, null)) as couriers_core,
        CountDistinctEstimate(if(transport_type in ('Скорее Такси','Скорее Авто-Курьер','Строго Авто-Курьер'), unique_driver_id, null)) as couriers_auto,
        CountDistinctEstimate(if(transport_type in ('Велосипед','Мотоцикл'), unique_driver_id, null)) as couriers_moto,
        CountDistinctEstimate(if(transport_type like 'Грузовой%',unique_driver_id,null)) as couriers_cargo,
        CountDistinctEstimate(if(transport_type = 'Пешеход',unique_driver_id,null)) as couriers_walk        
    from 
        $curr as a
    where
        trips > 0 -- active couriers only
    group by
        date,
        population_group as geo
    window w as (
        partition by population_group order by date
    )

    union all

    SELECT
        'level 4' as level,
        date,
        geo,
        CountDistinctEstimate(unique_driver_id) as couriers_total,
        CountDistinctEstimate(if(is_new=true, unique_driver_id, null)) as new_couriers,
        lag(CountDistinctEstimate(unique_driver_id)) over w as couriers_total_prev,
        lag(CountDistinctEstimate(if(is_new=true, unique_driver_id, null))) over w as new_couriers_prev,
        CountDistinctEstimate(if(courier_segmentation_label = 'delivery_fulltimer_core', unique_driver_id, null)) as couriers_core,
        CountDistinctEstimate(if(transport_type in ('Скорее Такси','Скорее Авто-Курьер','Строго Авто-Курьер'), unique_driver_id, null)) as couriers_auto,
        CountDistinctEstimate(if(transport_type in ('Велосипед','Мотоцикл'), unique_driver_id, null)) as couriers_moto,
        CountDistinctEstimate(if(transport_type like 'Грузовой%',unique_driver_id,null)) as couriers_cargo,
        CountDistinctEstimate(if(transport_type = 'Пешеход',unique_driver_id,null)) as couriers_walk        
    from 
        $curr as a
    where
        trips > 0 -- active couriers only
    group by
        date,
        country as geo
    window w as (
        partition by country order by date
    )


    union all

    SELECT
        'level 3' as level,
        date,
        geo,
        CountDistinctEstimate(unique_driver_id) as couriers_total,
        CountDistinctEstimate(if(is_new=true, unique_driver_id, null)) as new_couriers,
        lag(CountDistinctEstimate(unique_driver_id)) over w as couriers_total_prev,
        lag(CountDistinctEstimate(if(is_new=true, unique_driver_id, null))) over w as new_couriers_prev,
        CountDistinctEstimate(if(courier_segmentation_label = 'delivery_fulltimer_core', unique_driver_id, null)) as couriers_core,
        CountDistinctEstimate(if(transport_type in ('Скорее Такси','Скорее Авто-Курьер','Строго Авто-Курьер'), unique_driver_id, null)) as couriers_auto,
        CountDistinctEstimate(if(transport_type in ('Велосипед','Мотоцикл'), unique_driver_id, null)) as couriers_moto,
        CountDistinctEstimate(if(transport_type like 'Грузовой%',unique_driver_id,null)) as couriers_cargo,
        CountDistinctEstimate(if(transport_type = 'Пешеход',unique_driver_id,null)) as couriers_walk        
    from 
        $curr as a
    where
        trips > 0 -- active couriers only
    group by
        date,
        country_group1 as geo
    window w as (
        partition by country_group1 order by date
    )


    union all

-- for grand totals
    SELECT
        'level 2' as level,
        date,
        geo,
        CountDistinctEstimate(unique_driver_id) as couriers_total,
        CountDistinctEstimate(if(is_new=true, unique_driver_id, null)) as new_couriers,
        lag(CountDistinctEstimate(unique_driver_id)) over w as couriers_total_prev,
        lag(CountDistinctEstimate(if(is_new=true, unique_driver_id, null))) over w as new_couriers_prev,
        CountDistinctEstimate(if(courier_segmentation_label = 'delivery_fulltimer_core', unique_driver_id, null)) as couriers_core,
        CountDistinctEstimate(if(transport_type in ('Скорее Такси','Скорее Авто-Курьер','Строго Авто-Курьер'), unique_driver_id, null)) as couriers_auto,
        CountDistinctEstimate(if(transport_type in ('Велосипед','Мотоцикл'), unique_driver_id, null)) as couriers_moto,
        CountDistinctEstimate(if(transport_type like 'Грузовой%',unique_driver_id,null)) as couriers_cargo,
        CountDistinctEstimate(if(transport_type = 'Пешеход',unique_driver_id,null)) as couriers_walk        
    from 
        $curr as a
    where
        trips > 0 -- active couriers only
    group by
        date,
        country_group2 as geo
    window w as (
        partition by country_group2 order by date
    )

    union all

    SELECT
        'level 1' as level,
        date,
        'TOTAL' as geo,
        CountDistinctEstimate(unique_driver_id) as couriers_total,
        CountDistinctEstimate(if(is_new=true, unique_driver_id, null)) as new_couriers,
        lag(CountDistinctEstimate(unique_driver_id)) over w as couriers_total_prev,
        lag(CountDistinctEstimate(if(is_new=true, unique_driver_id, null))) over w as new_couriers_prev,
        CountDistinctEstimate(if(courier_segmentation_label = 'delivery_fulltimer_core', unique_driver_id, null)) as couriers_core,
        CountDistinctEstimate(if(transport_type in ('Скорее Такси','Скорее Авто-Курьер','Строго Авто-Курьер'), unique_driver_id, null)) as couriers_auto,
        CountDistinctEstimate(if(transport_type in ('Велосипед','Мотоцикл'), unique_driver_id, null)) as couriers_moto,
        CountDistinctEstimate(if(transport_type like 'Грузовой%',unique_driver_id,null)) as couriers_cargo,
        CountDistinctEstimate(if(transport_type = 'Пешеход',unique_driver_id,null)) as couriers_walk        
    from 
        $curr as a
    where
        trips > 0 -- active couriers only
    group by
        date
    window w as (
        order by date
    )

);


insert into $couriers_output WITH TRUNCATE 
SELECT
    $curr_date as date,
    level,
    geo,
    couriers_total,
    couriers_total_prev,
    couriers_total*1.0/couriers_total_prev - 1 as couriers_total_wow,
    new_couriers,
    new_couriers_prev,
    new_couriers*1.0/new_couriers_prev - 1 as new_couriers_wow,
    couriers_core,
    couriers_auto,
    couriers_moto,
    couriers_cargo,
    couriers_walk
from
    $city_prev_act
where
    date = $curr_date;
commit;



