/*
    Author: ikhomyanin@
    About: Parse AmoCRM raw logs for Delivery corp client funnel
*/

use hahn;
PRAGMA yson.Strict = 'false';
PRAGMA yt.QueryCacheMode = 'disable';

$SAVE_TO = '//home/taxi-delivery/analytics/production/corp_client_funnel/leads';

$LEADS_HIST = '//home/delivery-dwh/etl/raw_history/amocrm/leads/2022';
$USER = '//home/delivery-dwh/ods/amocrm/user/user';
$COMPANY = '//home/delivery-dwh/ods/amocrm/company/company';
$PIPELINE_STATUS = '//home/delivery-dwh/ods/amocrm/lead_pipeline_status/lead_pipeline_status';
$LEAD = '//home/delivery-dwh/ods/amocrm/lead/lead';

$extract_lead_tags = ($doc) -> {
    $tags_list = Yson::YPathList($doc, '/_embedded/tags');

    $tags = ListMap($tags_list, ($x) -> {return Yson::YPathString($x, '/name')});

    return ListConcat($tags, '; ');
};

$map_funnel_stage = ($funnel_stage) -> {
    $mapped_stage = case 
        when $funnel_stage is null then 'NO STATUS'
        when $funnel_stage in (
            '1. Идея'
            , '2. Провести встречу'
            , '3. Переговоры'
            , '4. Заключение договора'
            , '5. Старт'
            , '6. Разгон'
            , '7. Развитие'
            , '8. Удержание'
            , 'Проигрыш'

            -- SDD
            , '2. Провести презентацию'
        ) then $funnel_stage
        when $funnel_stage == 'Переговоры' then '3. Переговоры'
        when $funnel_stage == 'Провести встречу' then '2. Провести встречу'
        when $funnel_stage == 'Проиграно' then 'Проигрыш'
        when $funnel_stage == 'Заключение договора' then '4. Заключение договора'
        when $funnel_stage == '(old) Отложить' then 'WRONG STATUS'
        when $funnel_stage == 'Идея' then '1. Идея'
        when $funnel_stage == 'Старт' then '5. Старт'
        when $funnel_stage == 'Развитие' then '7. Развитие'
        when $funnel_stage == 'Сделано предложение' then '2. Провести презентацию'
        else 'UNKNOWN STATUS'
    end;

    return $mapped_stage
};

$extract_from_custom_fields = ($doc, $field) -> {
    $custom_fields = Yson::YPathList($doc, '/custom_fields_values');

    $field_doc =  ListHead(ListSkipWhile($custom_fields, ($x) -> {return Yson::YPathString($x, '/field_name') != $field}));

    $field_values = Yson::YPathList($field_doc, '/values');

    $field_type = Yson::YPathString($field_doc, '/field_type');

    $field_values_mapped = CASE 
        WHEN $field_type = 'text' then Yson::YPathString(ListHead($field_values), '/value')
        when $field_type = 'numeric' then Yson::YPathString(ListHead($field_values), '/value')
        when $field_type = 'url' then Yson::YPathString(ListHead($field_values), '/value')
        when $field_type = 'date' then cast(Yson::YPathInt64(ListHead($field_values), '/value') as String)
        when $field_type = 'multiselect' then ListConcat(ListSort(ListMap($field_values, ($x) -> {return Yson::YPathString($x, '/value')})), '; ')
        when $field_type = 'select' then Yson::YPathString(ListHead($field_values), '/value')
        when $field_type = 'textarea' then Yson::YPathString(ListHead($field_values), '/value')
        when $field_type = 'multitext' then ListConcat(ListSort(ListMap($field_values, ($x) -> {return Yson::YPathString($x, '/value')})), '; ')
        when $field_type is null then null
        else 'UNKNOWN FIELD TYPE'
    end;

    return $field_values_mapped;
};

$extract_company = ($doc) -> {
    $companies = Yson::YPathList($doc, '/_embedded/companies');
    $first_company = ListHead($companies);
    return Yson::YPathInt64($first_company, '/id'); 
};

$format_sec = ($sec, $formatting) -> {
    $dttm = DateTime::FromSeconds(cast($sec as UInt32));
    $format = DateTime::Format($formatting);
    return $format($dttm);
};

$raw_leads = (
    select id
        , utc_updated_dt
        , max_by(doc, utc_updated_dttm) as doc 
        , max(utc_updated_dttm) as utc_updated_dttm
    from $LEADS_HIST
    group by id
        , substring(utc_updated_dttm, 0, 10) as utc_updated_dt
);

$parsed_leads = (
    select id as lead_id
        , utc_updated_dt
        , utc_updated_dttm

        -- Common
        , $extract_from_custom_fields(doc, '№ договора') as lead_corp_contract_id
        , Yson::YPathInt64(doc, '/account_id') as lead_account_id
        , Yson::YPathInt64(doc, '/group_id') as lead_group_id
        , Yson::YPathBool(doc, '/is_deleted') as lead_is_deleted
        , $format_sec(Yson::YPathInt64(doc, '/created_at'), '%Y-%m-%d') as lead_created_dt
        , Yson::YPathString(doc, '/name') as lead_name
        , Yson::YPathInt64(doc, '/pipeline_id') as lead_pipeline_id
        , Yson::YPathInt64(doc, '/price') as lead_price
        , Yson::YPathInt64(doc, '/responsible_user_id') as lead_responsible_user_id
        , Yson::YPathInt64(doc, '/status_id') as lead_status_id
        , $extract_from_custom_fields(doc, 'Канал привлечения') as lead_acquisition_channel
        , $extract_from_custom_fields(doc, 'Продукт') as lead_products
        , $format_sec($extract_from_custom_fields(doc, 'Дата проверки лидом'), '%Y-%m-%d') as lead_checked_by_lead_dt
        , $format_sec($extract_from_custom_fields(doc, 'Дата проверки КК'), '%Y-%m-%d') as lead_checked_by_qc_dt
        , $extract_company(doc) as company_id
        , $extract_lead_tags(doc) as lead_tags_list

        -- Legal 
        , $extract_from_custom_fields(doc, 'Тип договора') as lead_contract_type
        , $extract_from_custom_fields(doc, 'Тип оплаты') as lead_payment_method
        , $extract_from_custom_fields(doc, 'Тикет LegalTaxi') as lead_taxi_legal_ticket

        -- Integration 
        , $extract_from_custom_fields(doc, 'Менеджер интеграции') as lead_intergration_manager
        , $extract_from_custom_fields(doc, 'Тикет на интеграцию') as lead_intergration_ticket
        , $extract_from_custom_fields(doc, 'Тикет на тестовый кабинет') as lead_test_user_ticket
        , $extract_from_custom_fields(doc, 'Информационные системы клиента (CMS, CRM, ERP)') as lead_client_modules
        , $extract_from_custom_fields(doc, 'Фактический модуль интеграции') as lead_integration_module

        -- Express
        , $extract_from_custom_fields(doc, 'Потенциал Экспресс') as lead_express_potential_per_month
        , $extract_from_custom_fields(doc, 'Подключенные опции Экспресс') as lead_express_options
        , $extract_from_custom_fields(doc, 'Этап воронки Экспресс') as lead_express_funnel_stage
        , $extract_from_custom_fields(doc, 'Причина проигрыша Экспресс') as lead_express_loss_reason
        , $extract_from_custom_fields(doc, 'Статус интеграции Экспресс') as lead_express_intergration_status
        , $extract_from_custom_fields(doc, 'Динамика Экспресс') as lead_express_dynamics
        , $extract_from_custom_fields(doc, 'Причина оттока Экспресс') as lead_express_churn_reason
        , $extract_from_custom_fields(doc, 'Wishlist Экспресс') as lead_express_wishlist
        , $extract_from_custom_fields(doc, 'Тикет LOGISTARIFF Экспресс') as lead_express_tariff_ticket

        -- NDD
        , $extract_from_custom_fields(doc, 'Потенциал NDD') as lead_ndd_potential_per_month
        , $extract_from_custom_fields(doc, 'Подключенные опции NDD') as lead_ndd_options
        , $extract_from_custom_fields(doc, 'Этап воронки NDD') as lead_ndd_funnel_stage
        , $extract_from_custom_fields(doc, 'Причина проигрыша NDD') as lead_ndd_loss_reason
        , $extract_from_custom_fields(doc, 'Статус интеграции NDD') as lead_ndd_intergration_status
        , $extract_from_custom_fields(doc, 'Динамика NDD') as lead_ndd_dynamics
        , $extract_from_custom_fields(doc, 'Причина оттока NDD') as lead_ndd_churn_reason
        , $extract_from_custom_fields(doc, 'Wishlist NDD') as lead_ndd_wishlist
        , $extract_from_custom_fields(doc, 'Тикет LOGISTARIFF NDD') as lead_ndd_tariff_ticket

        -- SDD
        , $extract_from_custom_fields(doc, 'Потенциал SDD') as lead_sdd_potential_per_month
        , $extract_from_custom_fields(doc, 'Подключенные опции SDD') as lead_sdd_options
        , $extract_from_custom_fields(doc, 'Этап воронки SDD') as lead_sdd_funnel_stage
        , $extract_from_custom_fields(doc, 'Причина проигрыша SDD') as lead_sdd_loss_reason
        , $extract_from_custom_fields(doc, 'Статус интеграции SDD') as lead_sdd_intergration_status
        , $extract_from_custom_fields(doc, 'Динамика SDD') as lead_sdd_dynamics
        , $extract_from_custom_fields(doc, 'Причина оттока SDD') as lead_sdd_churn_reason
        , $extract_from_custom_fields(doc, 'Wishlist SDD') as lead_sdd_wishlist
        , $extract_from_custom_fields(doc, 'Тикет LOGISTARIFF SDD') as lead_sdd_tariff_ticket

        -- Cargo
        , $extract_from_custom_fields(doc, 'Потенциал Грузовой') as lead_cargo_potential_per_month
        , $extract_from_custom_fields(doc, 'Подключенные опции Грузовой') as lead_cargo_options
        , $extract_from_custom_fields(doc, 'Этап воронки Грузовой') as lead_cargo_funnel_stage
        , $extract_from_custom_fields(doc, 'Причина проигрыша Грузовой') as lead_cargo_loss_reason
        , $extract_from_custom_fields(doc, 'Статус интеграции Грузовой') as lead_cargo_intergration_status
        , $extract_from_custom_fields(doc, 'Динамика Грузовой') as lead_cargo_dynamics
        , $extract_from_custom_fields(doc, 'Причина оттока Грузовой') as lead_cargo_churn_reason
        , $extract_from_custom_fields(doc, 'Wishlist Грузовой') as lead_cargo_wishlist
        , $extract_from_custom_fields(doc, 'Тикет LOGISTARIFF Грузовой') as lead_cargo_tariff_ticket
    from $raw_leads
);

$parsed_leads_tmp = (
    select t1.*
        , some(utc_updated_dt) over w1 as next_utc_updated_dt
    from $parsed_leads as t1
    WINDOW 
        w1 AS (
            PARTITION BY lead_id
            ORDER BY utc_updated_dt asc
            ROWS BETWEEN 1 FOLLOWING AND 1 FOLLOWING
        )
);

$script = @@
import datetime as dt
today = str(dt.date.today())
def generate_dates(date_from, date_to=today):
    """
    generate array of dates between two dates given in isoformat
    """
    if not isinstance(date_from, str):
        date_from = date_from.decode('utf-8')
    if not isinstance(date_to, str):
        date_to = date_to.decode('utf-8')
    returnarray = []
    try:
        current = dt.date.fromisoformat(str(date_from))
        finish = dt.date.fromisoformat(str(date_to))
    except ValueError:
        return(["Плохой формат даты"])
    while current <= finish:
        returnarray.append(str(current))
        current += dt.timedelta(days=1)
    return returnarray
@@;

$generate_series = Python3::generate_dates(Callable<(String?)->List<string>>, $script);

$START_DATE = '2022-03-23';

$dates = (
    select $generate_series($START_DATE) as dates
);

$dates_flatten = (
    select *
    from $dates
    flatten by dates
);

$parsed_leads_long = (
    SELECT t1.*
        , t2.dates as utc_dt
    WITHOUT 
        t1.next_utc_updated_dt
    from $parsed_leads_tmp as t1
    cross join $dates_flatten as t2
    where 1=1
        and t2.dates >= t1.utc_updated_dt
        and t2.dates < coalesce(t1.next_utc_updated_dt, '9999-12-31')
);

$users = (
    select user_group_name
        , user_id 
        , user_login
        , user_name
    from $USER
);

$companies = (
    select city_name as company_city
        , company_group_name
        , company_group_type
        , company_id
        , company_name
        , country_name as company_country
        , geography_name_list as company_geography_list
        , market_segment_code as company_market_segment
        , responsible_user_id as company_responsible_user_id
        , tag_name_list as company_tags_list
        , tier_code as company_tier
        , substring(utc_created_dttm, 0, 10) as company_utc_created_dt
    from $COMPANY
);

$pipeline_statuses = (
    select pipeline_id
        , pipeline_name
        , pipeline_status
        , pipeline_status_id
    from $PIPELINE_STATUS
);

$lead_products = (
    select lead_id
        , 'NDD' in Yson::ConvertToStringList(product_name_list) as ndd_flg
        , 'SDD' in Yson::ConvertToStringList(product_name_list) as sdd_flg
        , 'Грузовой' in Yson::ConvertToStringList(product_name_list) as cargo_flg
        , 'Экспресс' in Yson::ConvertToStringList(product_name_list) as express_flg
        , 'Магистраль' in Yson::ConvertToStringList(product_name_list) as magistal_flg
        , 'Старая Яндекс Доставка' in Yson::ConvertToStringList(product_name_list) as old_yandex_delivery_flg
        , if(product_name_list is null, true, false) as null_product_flg
        , corp_client_id
    from $LEAD
);

$final_raw = (
    select l.*
        , u.user_group_name as lead_responsible_user_group_name
        , u.user_login as lead_responsible_user_login
        , u.user_name as lead_responsible_user_name
        , c.*
        , ps.pipeline_name as lead_pipeline_name
        , ps.pipeline_status as lead_status_name
        , lp.*
    WITHOUT 
        c.company_id
        , lp.lead_id
    from $parsed_leads_long as l 
    left join $users as u on l.lead_responsible_user_id = u.user_id
    left join $companies as c on c.company_id = l.company_id
    left join $pipeline_statuses as ps on l.lead_pipeline_id = ps.pipeline_id and l.lead_status_id = ps.pipeline_status_id
    left join $lead_products as lp on l.lead_id = lp.lead_id
);

$final_processed = (
    select t1.*
        , $map_funnel_stage(t1.lead_ndd_funnel_stage) as clean_lead_ndd_funnel_stage
        , $map_funnel_stage(t1.lead_sdd_funnel_stage) as clean_lead_sdd_funnel_stage
        , $map_funnel_stage(t1.lead_express_funnel_stage) as clean_lead_express_funnel_stage
        , $map_funnel_stage(t1.lead_cargo_funnel_stage) as clean_lead_cargo_funnel_stage

        , coalesce(company_market_segment, 'NO MARKET SEGMENT') as clean_company_market_segment
        , coalesce(company_city, 'NO CITY') as clean_company_city
        , coalesce(company_country, 'NO COUNTRY') as clean_company_country
        , coalesce(company_tier, 'NO TIER') as clean_company_tier

        , if(lead_cargo_potential_per_month is null, true, false) as null_cargo_potential
        , if(lead_ndd_potential_per_month is null, true, false) as null_ndd_potential
        , if(lead_sdd_potential_per_month is null, true, false) as null_sdd_potential
        , if(lead_express_potential_per_month is null, true, false) as null_express_potential

        , if(lead_cargo_potential_per_month is null, 0, cast(lead_cargo_potential_per_month as Int64)) as clean_lead_cargo_potential_per_month
        , if(lead_ndd_potential_per_month is null, 0, cast(lead_ndd_potential_per_month as Int64)) as clean_lead_ndd_potential_per_month
        , if(lead_sdd_potential_per_month is null, 0, cast(lead_sdd_potential_per_month as Int64)) as clean_lead_sdd_potential_per_month
        , if(lead_express_potential_per_month is null, 0, cast(lead_express_potential_per_month as Int64)) as clean_lead_express_potential_per_month

        , 1.0*if(lead_cargo_potential_per_month is null, 0, cast(lead_cargo_potential_per_month as Int64)) / (365.0/12) as clean_lead_cargo_potential_per_day
        , 1.0*if(lead_ndd_potential_per_month is null, 0, cast(lead_ndd_potential_per_month as Int64)) / (365.0/12) as clean_lead_ndd_potential_per_day
        , 1.0*if(lead_sdd_potential_per_month is null, 0, cast(lead_sdd_potential_per_month as Int64)) / (365.0/12) as clean_lead_sdd_potential_per_day
        , 1.0*if(lead_express_potential_per_month is null, 0, cast(lead_express_potential_per_month as Int64)) / (365.0/12) as clean_lead_express_potential_per_day

        , if(lead_ndd_wishlist is null, true, false) as null_ndd_wishlist
        , if(lead_sdd_wishlist is null, true, false) as null_sdd_wishlist
        , if(lead_express_wishlist is null, true, false) as null_express_wishlist
        , if(lead_cargo_wishlist is null, true, false) as null_cargo_wishlist

        , ListLength(String::SplitToList(lead_ndd_wishlist, '; ')) as lead_ndd_wish_cnt
        , ListLength(String::SplitToList(lead_sdd_wishlist, '; ')) as lead_sdd_wish_cnt
        , ListLength(String::SplitToList(lead_express_wishlist, '; ')) as lead_express_wish_cnt
        , ListLength(String::SplitToList(lead_cargo_wishlist, '; ')) as lead_cargo_wish_cnt
    from $final_raw as t1
);

$delete_leads = (
    select lead_id
    from $final_processed
    group by lead_id
    having (
        max_by(lead_name, utc_dt) like '---%' 
        OR 
        Unicode::ToLower(cast(max_by(lead_name, utc_dt) as Utf8)) like '%удалить%' 
        OR 
        Unicode::ToLower(cast(max_by(lead_name, utc_dt) as Utf8)) like '%удаление%'
    )
);

insert into $SAVE_TO WITH TRUNCATE 
select *
from $final_processed
where 1=1
    and lead_id not in $delete_leads;
