/*
    Ticket: https://st.yandex-team.ru/LOGDATA-2060
    Author: ikhomyanin@
    About: Parse AmoCRM raw logs for NDD corp client funnel
*/

use hahn;
PRAGMA yson.Strict = 'false';
PRAGMA yt.QueryCacheMode = 'disable';

$LEADS_HIST = '//home/delivery-dwh/etl/raw_history/amocrm/leads/2022';
$LEADS = '//home/delivery-dwh/raw/amocrm/leads/leads';
$USERS = '//home/delivery-dwh/raw/amocrm/users/users';
$COMPANIES = '//home/delivery-dwh/raw/amocrm/companies/companies';
$PIPELINES = '//home/delivery-dwh/raw/amocrm/leads_pipelines/leads_pipelines';

$SAVE_TO = '//home/taxi-delivery/analytics/production/ndd/corp_client_funnel/full_data';
$SAVE_TO_FLATTENED = '//home/taxi-delivery/analytics/production/ndd/corp_client_funnel/full_data_flattened';

$clean_ndd_potential = ($potential) -> {
    return if($potential is null, 0, cast($potential as int64));
};

$sales_group_mapping = ($sales_name) -> {
    $sales_group = case 
        when $sales_name = 'Валентина Зуйкова' then 'ГПП'
        when $sales_name = 'Екатерина Шарапова' then "ГПП"
        when $sales_name = 'Анастасия Щеблякова' then 'ГПП'
        when $sales_name = 'Павел Дехтерев' then 'ГПП'
        when $sales_name = 'Анна Овчаренко' then 'ГПП'
        when $sales_name = 'Дарья Савченко' then 'ГПП'
        when $sales_name = 'Анна Ювкина' then 'ГПП'
        when $sales_name = 'Николай Куликов' then 'ГПП'
        when $sales_name = 'Григорий Прокопенко' then 'ГПП'
        when $sales_name = 'Дмитрий Иванютин' then 'ГПП'
        when $sales_name = 'Татьяна Книгина' then 'ГПП'
        when $sales_name = 'Илья Бижов' then 'ГПП'
        when $sales_name = '!Вуббо Окелс' then 'ГМП'
        when $sales_name = '!Тоёхиро Акияма' then 'ГМП'
        else 'ГМП'
    end;

    return $sales_group;
};

$status_mapping = ($status) -> {
    $mapped_status = case
        when $status is null then 'NO STATUS'
        when $status = 'Проиграно' then 'Проигрыш'
        when $status = 'Проигрыш' then 'Проигрыш'
        when $status = 'Отложить' then 'WRONG STATUS'
        when $status = 'Переговоры' then '3. Переговоры'
        when $status = 'Идея' then '1. Идея'
        when $status = '3. Переговоры' then '3. Переговоры'
        when $status = 'Провести встречу' then '2. Провести встречу'
        when $status = '1. Идея' then '1. Идея'
        when $status = 'Развитие' then '7. Развитие'
        when $status = '2. Провести встречу' then '2. Провести встречу'
        when $status = 'Старт' then '5. Старт'
        when $status = 'Заключение договора' then '4. Заключение договора'
        when $status = '6. Разгон' then '6. Разгон'
        when $status = '5. Старт' then '5. Старт'
        when $status = '4. Заключение договора' then '4. Заключение договора'
        when $status = 'Отток' then 'WRONG STATUS'
        when $status = '7. Развитие' then '7. Развитие'
        when $status = '(old) Отложить' then 'WRONG STATUS'
        when $status = '(old) Отток' then 'WRONG STATUS'
        when $status = '8. Удержание' then '8. Удержание'
        when $status = 'Удержание' then '8. Удержание'
        else 'STATUS ERROR'
    end;

    return $mapped_status
};

$extract_from_custom_fields = ($doc, $field) -> {
    $custom_fields = Yson::YPathList($doc, '/custom_fields_values');

    $field_doc =  ListHead(ListSkipWhile($custom_fields, ($x) -> {return Yson::YPathString($x, '/field_name') != $field}));

    $field_values = Yson::YPathList($field_doc, '/values');

    $field_type = Yson::YPathString($field_doc, '/field_type');

    $field_values_mapped = CASE 
        WHEN $field_type = 'text' then Yson::YPathString(ListHead($field_values), '/value')
        when $field_type = 'numeric' then Yson::YPathString(ListHead($field_values), '/value')
        when $field_type = 'url' then Yson::YPathString(ListHead($field_values), '/value')
        when $field_type = 'date' then cast(Yson::YPathInt64(ListHead($field_values), '/value') as String)
        when $field_type = 'multiselect' then ListConcat(ListSort(ListMap($field_values, ($x) -> {return Yson::YPathString($x, '/value')})), '; ')
        when $field_type = 'select' then Yson::YPathString(ListHead($field_values), '/value')
        when $field_type = 'textarea' then Yson::YPathString(ListHead($field_values), '/value')
        when $field_type = 'multitext' then ListConcat(ListSort(ListMap($field_values, ($x) -> {return Yson::YPathString($x, '/value')})), '; ')
        when $field_type is null then null
        else 'UNKNOWN FIELD TYPE'
    end;

    return $field_values_mapped;
};

$extract_company = ($doc) -> {
    $companies = Yson::YPathList($doc, '/_embedded/companies');

    $first_company = ListHead($companies);

    return Yson::YPathInt64($first_company, '/id'); 
};

$format_sec = ($sec, $formatting) -> {
    $dttm = DateTime::FromSeconds(cast($sec as UInt32));

    $format = DateTime::Format($formatting);

    return $format($dttm);
};

$script = @@
import datetime as dt
today = str(dt.date.today())
def generate_dates(date_from, date_to=today):
    """
    generate array of dates between two dates given in isoformat
    """
    if not isinstance(date_from, str):
        date_from = date_from.decode('utf-8')
    if not isinstance(date_to, str):
        date_to = date_to.decode('utf-8')
    returnarray = []
    try:
        current = dt.date.fromisoformat(str(date_from))
        finish = dt.date.fromisoformat(str(date_to))
    except ValueError:
        return(["Плохой формат даты"])
    while current <= finish:
        returnarray.append(str(current))
        current += dt.timedelta(days=1)
    return returnarray
@@;

$generate_series = Python3::generate_dates(Callable<(String?)->List<string>>, $script);

$START_DATE = '2022-03-23';

$dates = (
    select $generate_series($START_DATE) as dates
);

$dates_flatten = (
    select *
    from $dates
    flatten by dates
);

$ndd_leads = (
    select id
    from $LEADS
    where 1=1
        and $extract_from_custom_fields(doc, 'Продукт') like '%NDD%'
        and Yson::YPathString(doc, '/name') not like '---%'
);

$raw_leads = (
    select id
        , utc_updated_dt
        , max_by(doc, utc_updated_dttm) as doc 
        , max(utc_updated_dttm) as utc_updated_dttm
    from $LEADS_HIST
    where 1=1
        and id in $ndd_leads
    group by id
        , substring(utc_updated_dttm, 0, 10) as utc_updated_dt
);

$raw_leads_tmp = (
    select id
        , doc
        , utc_updated_dttm
        , utc_updated_dt
        , some(utc_updated_dt) over w1 as next_utc_updated_dt
    from $raw_leads
    WINDOW 
    w1 AS (
        PARTITION BY id
        ORDER BY utc_updated_dt asc
        ROWS BETWEEN 1 FOLLOWING AND 1 FOLLOWING
    )
);

$raw_leads_long = (
    SELECT t1.id as id
        , t1.doc as doc
        , t1.utc_updated_dttm as utc_updated_dttm
        , t1.utc_updated_dt as utc_updated_dt
        , t2.dates as utc_dt
    from $raw_leads_tmp as t1
    cross join $dates_flatten as t2
    where 1=1
        and t2.dates >= t1.utc_updated_dt
        and t2.dates < coalesce(t1.next_utc_updated_dt, '9999-12-31')
);

$parsed_leads = (
    select id as lead_id
        , utc_updated_dt as utc_updated_dt
        , utc_updated_dttm as utc_updated_dttm
        , utc_dt as utc_dt
        , $extract_company(doc) as company_id
        , Yson::YPathString(doc, '/name') as lead_name
        , Yson::YPathInt64(doc, '/group_id') as lead_group_id
        , Yson::YPathInt64(doc, '/account_id') as lead_account_id
        , Yson::YPathInt64(doc, '/pipeline_id') as lead_pipeline_id
        , Yson::YPathInt64(doc, '/status_id') as lead_status_id
        , Yson::YPathBool(doc, '/is_deleted') as lead_is_deleted
        , Yson::YPathInt64(doc, '/responsible_user_id') as lead_responsible_user_id
        , $format_sec(Yson::YPathInt64(doc, '/created_at'), '%Y-%m-%d') as lead_created_dt
        , $format_sec($extract_from_custom_fields(doc, 'Дата проверки лидом'), '%Y-%m-%d') as lead_cheacked_by_lead_dt
        , $extract_from_custom_fields(doc, 'Продукт') as lead_product
        , $extract_from_custom_fields(doc, 'Потенциал NDD') as lead_ndd_potential
        , 1.0*$clean_ndd_potential($extract_from_custom_fields(doc, 'Потенциал NDD')) / 30 as clean_lead_ndd_potential
        -- , cast(1.0*$clean_ndd_potential($extract_from_custom_fields(doc, 'Потенциал NDD')) / 30 as Int64) as clean_lead_ndd_potential
        , $extract_from_custom_fields(doc, '№ договора') as lead_corp_contract_id
        , $extract_from_custom_fields(doc, 'Тикет на интеграцию') as lead_intergration_ticket
        , $extract_from_custom_fields(doc, 'Менеджер интеграции') as lead_intergration_manager
        , $extract_from_custom_fields(doc, 'Статус интеграции NDD') as lead_ndd_intergration_stage
        , $extract_from_custom_fields(doc, 'Тип договора') as lead_corp_contract_type
        , $extract_from_custom_fields(doc, 'Тип оплаты') as lead_payment_type
        , $status_mapping($extract_from_custom_fields(doc, 'Этап воронки NDD')) as lead_ndd_funnel_stage
        , $extract_from_custom_fields(doc, 'Причина проигрыша NDD') as lead_ndd_lose_reason
        , $extract_from_custom_fields(doc, 'Канал привлечения') as lead_acquisition_channel
        , $extract_from_custom_fields(doc, 'Wishlist (старое поле)') as lead_wishlist_old
        , $extract_from_custom_fields(doc, 'Wishlist NDD') as lead_ndd_wishlist
    from $raw_leads_long
);

$parsed_users = (
    select id 
        , Yson::YPathString(coalesce(doc, null), '/name') as name
    from $USERS
);

$parsed_companies = (
    select id as company_id
        , Yson::YPathBool(doc, '/is_deleted') as company_is_deleted
        , Yson::YPathString(doc, '/name') as company_name
        , Yson::YPathInt64(doc, '/responsible_user_id') as company_responsible_user_id
        , $format_sec(Yson::YPathInt64(doc, '/created_at'), '%Y-%m-%d') as company_created_dt
        , $extract_from_custom_fields(doc, 'ИНН/БИН') as company_inn
        , $extract_from_custom_fields(doc, 'КПП') as company_kpp
        , $extract_from_custom_fields(doc, 'Web') as company_web
        , $extract_from_custom_fields(doc, 'Адрес') as company_address
        , $extract_from_custom_fields(doc, 'Email') as company_email
        , $extract_from_custom_fields(doc, 'Телефон') as company_telephone
        , $extract_from_custom_fields(doc, 'Tier') as company_tier
        , $extract_from_custom_fields(doc, 'Сегмент рынка') as company_market_segment
        , $extract_from_custom_fields(doc, 'Группа компаний') as company_holding
        , $extract_from_custom_fields(doc, 'Страна') as company_country
        , $extract_from_custom_fields(doc, 'Город') as company_city
    from $COMPANIES
);

$pipelines_raw = (
select id 
    , doc 
    , Yson::YPathString(doc, '/name') as pipeline_name
    , Yson::YPathList(doc, '/_embedded/statuses') as pipeline_statuses
from $PIPELINES
);

$parsed_pipelines = (
    select id as pipeline_id
        , pipeline_name
        , Yson::YPathString(pipeline_statuses, '/name') as status_name
        , Yson::YPathInt64(pipeline_statuses, '/id') as status_id
    from $pipelines_raw
    flatten list by pipeline_statuses
);

$final = (
    select pl.*
        , pc.*
        , pu1.name as lead_responsible_user_name
        , $sales_group_mapping(pu1.name) as lead_sales_group
        , pu2.name as company_responsible_user_name
        , $sales_group_mapping(pu2.name) as company_sales_group
        , pp.pipeline_name as pipeline_name
        , pp.status_name as status_name
        , ListLength(String::SplitToList(pl.lead_ndd_wishlist, '; ')) as lead_wish_cnt
    from $parsed_leads as pl
    left join $parsed_companies as pc on pl.company_id = pc.company_id
    left join $parsed_users as pu1 on cast(pl.lead_responsible_user_id as String) = pu1.id
    left join $parsed_users as pu2 on cast(pc.company_responsible_user_id as String) = pu2.id
    left join $parsed_pipelines as pp on cast(pl.lead_pipeline_id as String) = pp.pipeline_id and pl.lead_status_id = pp.status_id
);

insert into $SAVE_TO WITH TRUNCATE 
select *
from $final;

$final_tmp = (
    select f.*
        , String::SplitToList(f.lead_ndd_wishlist, '; ') as wish
    from $final as f
);

insert into $SAVE_TO_FLATTENED WITH TRUNCATE 
select *
from $final_tmp
flatten by wish;
