use hahn; 
PRAGMA AnsiInForEmptyOrNullableItemsCollections;
PRAGMA yson.DisableStrict;

$lead_hist = '//home/delivery-dwh/ods/amocrm/lead_hist/2022-01-01';
$lead_pipeline_status = '//home/delivery-dwh/ods/amocrm/lead_pipeline_status/lead_pipeline_status';
$tm_ticket_to_deal = '//home/taxi-delivery/analytics/dev/nemici/tier3_analytics/tm_ticket_to_deal';
$telemarketing = '//home/taxi-delivery/analytics/dev/fedorivashkin/touch/Telemarketing';
$employers_history = '//home/taxi/testing/export/taxi-logistic-platform-production/employers_history';
$dm_contract_snp = '//home/taxi-dwh/cdm/b2b/dm_contract_snp';
$corporate_contracts_and_debts = '//home/taxi-dwh/import/yandex-balance/corporate_contracts_and_debts'; 
$request_datamart = '//home/taxi-delivery/analytics/production/ndd/requests_datamart';
$web_lk = '//home/taxi-delivery/analytics/production/ndd/product/lk/order_form';
$web_visits = '//home/taxi-delivery/analytics/production/metrica/web_dostavka_visits';

$START_DT = '2022-07-15';
$month_start = '2022-07-01';

$SAVE_to_table_without_agg = '//home/taxi-delivery/analytics/production/ndd/product/lk/funnel_of_self-registration_without_agg';

$SAVE_to_table_with_agg = '//home/taxi-delivery/analytics/production/ndd/product/lk/funnel_of_self-registration_with_agg';

$datetime_parse = DateTime::Parse("%Y-%m-%d %H:%M:%S");
$date_format = DateTime::Format("%Y-%m-%d %H:%M:%S");
$dttm_format = '%Y-%m-%d %H:%M:%S';
$dt_format = '%Y-%m-%d';
$str_to_dttm    = ($dttm_str) -> { RETURN DateTime::MakeDatetime(DateTime::Parse($dttm_format)($dttm_str)) };
$utc_dttm_to_dt = ($utc_dttm) -> {RETURN DateTime::Format($dt_format)($str_to_dttm($utc_dttm))};



$telemarketing_info = 
    (SELECT 
        ticket_code
        , lead_id
        , min(call_dt) as min_call_dt
        , substring(min(call_dttm),0,10) as min_call_dttm 
        , min(event_dttm) as min_event_dttm
        , max(event_dttm) as max_event_dttm
        , max_by(w_tax_result_value, event_dttm) as max_w_tax_result_value
        , min(if(call_dttm is null,event_dttm, substring(call_dttm,0,19))) as full_call_dttm
    from $telemarketing
    where 1=1
        and call_status = 'Звонок состоялся'
    group by  ticket_code
        , lead_id
);

$lead_info = 
    (select 
        lead_id 
        , channel_name
        , contract_num
        --, max_by(contract_type, utc_updated_dttm) as contract_type
        --, max_by(lead_name, utc_updated_dttm) as lead_name
        -- , max_by(payment_type, utc_updated_dttm) as payment_type
        , max_by(pipeline_express_phase_name, utc_updated_dttm) as max_pipeline_express_phase_name
        , max_by(pipeline_ndd_phase_name, utc_updated_dttm) as max_pipeline_ndd_phase_name
        , max_by(pipeline_sdd_phase_name, utc_updated_dttm) as pipeline_sdd_phase_name
        , max_by(lp.pipeline_status_id, utc_updated_dttm) as max_pipeline_status_id
        , max_by(Yson::ConvertToStringList(product_name_list), utc_updated_dttm) as product_name_list
        , max(utc_created_dttm) as max_utc_created_dttm
        , min(utc_created_dttm) as min_utc_created_dttm
        , max(utc_updated_dttm) as max_utc_updated_dttm
        , min(utc_updated_dttm) as min_utc_updated_dttm
        , max_by(pipeline_status, utc_updated_dttm)  as max_pipeline_status
        , max_by(pipeline_name, utc_updated_dttm) as max_pipeline_name
        , tm_ticket_key
        , some(min_call_dt) as min_call_dt
        , some(min_call_dttm) as min_call_dttm
        , some(min_event_dttm) as min_event_dttm
        , some(max_event_dttm) as max_event_dttm
        , some(max_w_tax_result_value) as max_w_tax_result_value
        , min(full_call_dttm) as full_call_dttm
    from $lead_hist as lh
    left join $lead_pipeline_status as lp on lh.pipeline_status_id = lp.pipeline_status_id  
    left join $tm_ticket_to_deal as t on lh.lead_id = t.deal_id
    left join $telemarketing_info as tl on t.tm_ticket_key = tl.ticket_code
    where 1=1 
        -- and contract_num in ('4011276/22', '4037435/22', '4039778/22', '4051162/22')
        and ListHas(Yson::ConvertToStringList(product_name_list), 'NDD')
        and utc_created_dttm >= $START_DT
        and channel_name = 'САМООБРАЩЕНИЕ'
    group by lh.lead_id as lead_id
        , lh.channel_name as channel_name
        , lh.contract_num as contract_num
        -- , lh.payment_type as payment_type
        , t.tm_ticket_key as tm_ticket_key
    );


$employers_info = 
    (select 
        employer_id
            , employer_code
            ,  max_by(Yson::ConvertToString(unpacked_data.brand_name), history_event_id) as brand_name
            , max_by(Yson::ConvertToString(unpacked_data.brand_name_new.brand_name_eng), history_event_id) as brand_name_eng
            , max_by(Yson::ConvertToString(unpacked_data.brand_name_new.brand_name_ru), history_event_id) as brand_name_ru
            , max_by(Yson::ConvertToString(unpacked_data.corp_client_id), history_event_id) as corp_client_id
            , max_by(Yson::ConvertToBool(unpacked_data.self_registered), history_event_id) as self_registered
            , min(history_timestamp) as start_dttm_sec
            , min(history_timestamp) as start_dttm
    from range($employers_history, '2022-07-10')
    
    group by employer_id
            , employer_code
    ); 


$contract_snp_info = (
    select 
        DISTINCT 
        e.corp_client_id as corp_client_id 
        , employer_code
        , employer_id
        , brand_name
        , brand_name_eng
        , brand_name_ru
        , corp_contract_id
        , self_registered
        , start_dttm_sec
    from $employers_info as e 
    left join range($dm_contract_snp, '2022') as d on e.corp_client_id = d.corp_client_id
);

$corporate_contracts_and_debts_info = (
    select
        contract
        , case
            when payment_type = 'предоплата' and  contract_type != 'Договор-оферта' then start_dt
            when contract_type = 'Договор-оферта' then min_payment_date_v2
            else least(COALESCE(null,'9999-12-31',signed_dt), COALESCE(null,'9999-12-31', faxed_dt), COALESCE(null,'9999-12-31',booked_dt))
        end as activated_dt
        , case
    when contract_type = 'Договор-оферта' then min_payment_date_v2
    when payment_type = 'предоплата' then start_dt
    else least(signed_dt, faxed_dt, booked_dt)
        end as activated_dt_old
        , contract_type
        , min_payment_date_v2
        , payment_type 
        , start_dt as start_dt_con
        , signed_dt as signed_dt_con
        , faxed_dt as faxed_dt_con
        , booked_dt as booked_dt_con
    from  $corporate_contracts_and_debts
    where contract in (select contract_num from $lead_info)
);
$request_datamart_info = (
    select 
         corp_client_id
           , min(substring(created_dttm,0,10)) as first_delivery_dt
           , min(created_dttm) as first_delivery_dttm
           , max( substring(created_dttm,0,10)) as last_delivery_dt
           , count(DISTINCT request_id) as cnt_deliveries
    from $request_datamart
    where corp_client_id is not null
        and corp_client_id != ''
        and mapped_event_type in ('Доставлен', 'Доставлен частично')
        and corp_client_id in (select corp_client_id from $employers_info)
        and substring(created_dttm,0,10) >= $START_DT
    group by corp_client_id  
);

$web_lk_info = (
    select 
        clientID
        , count(DISTINCT form_id) as cnt_forms
        , min(utc_visit_start_dttm) as min_utc_visit_start_dttm
    from range($web_lk, '2022-07-01')
    where clientID is not null
    and substring(utc_visit_start_dttm,0,10) >= $START_DT
    group by clientID
);

$total_with_web = 
    (select 
        l.* 
        , e.*
        , c.*
        , r.* 
        , w.*
    from $lead_info as l 
    left join $contract_snp_info as e on l.contract_num = e.corp_contract_id 
    left join $corporate_contracts_and_debts_info as c on l.contract_num = c.contract
    left join $request_datamart_info as r on e.corp_client_id = r.corp_client_id
    left join $web_lk_info as w on e.corp_client_id = w.clientID
    where full_call_dttm >= $START_DT
); 

$info_without_agg = 
    (select 
        lead_id 
        , full_call_dttm -- дозвонились
        , DateTime::ToSeconds(DateTime::MakeDatetime($datetime_parse(full_call_dttm))) as full_call_dttm_sec
        , $date_format(DateTime::FromSeconds(cast(start_dttm_sec as Uint32))) as start_dttm -- завели клиента 
        , start_dttm_sec -- завели клиента 
        , if(activated_dt = '9999-12-31', start_dt_con, activated_dt)  as activated_dt -- активировал договор
        ,  DateTime::ToSeconds(DateTime::MakeDatetime($datetime_parse(if(activated_dt = '9999-12-31', start_dt_con, activated_dt) || ' 00:00:00'))) as activated_dt_sec 
        , min_utc_visit_start_dttm 
        , DateTime::ToSeconds(DateTime::MakeDatetime($datetime_parse(min_utc_visit_start_dttm))) as min_utc_visit_start_dttm_sec-- зашли в ЛК веб 
        , first_delivery_dttm -- первая доставка
        , DateTime::ToSeconds(DateTime::MakeDatetime($datetime_parse(first_delivery_dttm))) as first_delivery_dttm_sec
        , COALESCE(cnt_deliveries,0) as cnt_deliveries
        , self_registered
        , COALESCE(cnt_forms,0) as cnt_forms
        , DateTime::ToSeconds(DateTime::MakeDatetime($datetime_parse(min_utc_visit_start_dttm))) - DateTime::ToSeconds(DateTime::MakeDatetime($datetime_parse(full_call_dttm))) as diff_call_web
        , DateTime::ToSeconds(DateTime::MakeDatetime($datetime_parse(first_delivery_dttm))) - DateTime::ToSeconds(DateTime::MakeDatetime($datetime_parse(full_call_dttm))) as diff_ferst_dev_call
        , contract_num
        , corp_client_id
        , corp_contract_id
        , employer_code
        , employer_id
        , tm_ticket_key
        , min_utc_created_dttm as min_utc_created_dttm_CRM
        -- , activated_dt as activated_dt_2
        -- , activated_dt_old
        -- , start_dt_con
        -- , signed_dt_con
        -- , faxed_dt_con
        -- , booked_dt_con 
        -- , min_payment_date_v2
        -- , contract_type
        -- , payment_type
    from $total_with_web);

insert into $SAVE_to_table_without_agg WITH TRUNCATE 
select *
from $info_without_agg; 

----------------------------------- агр инфо -----------------------------
$info_with_agg = (
    select 
        -- min_utc_created_dt
         full_call_dt
        -- , self_registered
        , COUNT_IF(full_call_dttm is not null) as cnt_full_call
        , COUNT_IF(start_dttm_sec is not null) as cnt_start 
        , count_if(activated_dt is not null) as cnt_activated
        , count_if(min_utc_visit_start_dttm is not null) as cnt_visit_start
        , count_if(first_delivery_dttm is not null) as cnt_first_delivery
        , count_if(cnt_deliveries >= 5) as cnt_5_deliveris 
        , count_if(cnt_deliveries >= 10) as cnt_10_deliveries 
    from $info_without_agg
    group by 
        -- substring(min_utc_created_dttm_CRM,0,10) as min_utc_created_dt
          substring(full_call_dttm,0,10) as full_call_dt
          -- , self_registered

);
---------------------------- веб метрика ----------------------------
$map_trafic_source_id = ($trafic_source_id) -> {
    $mapped_trafic_source = CASE 
    WHEN $trafic_source_id IS NULL THEN 'Прямые заходы' 
    WHEN $trafic_source_id == - 1 THEN 'Внутренние переходы' 
    WHEN $trafic_source_id == 0 THEN 'Прямые заходы' 
    WHEN $trafic_source_id == 1 THEN 'Переходы по ссылкам на сайтах' 
    WHEN $trafic_source_id == 2 THEN 'Переходы из поисковых систем' 
    WHEN $trafic_source_id == 3 THEN 'Переходы по рекламе' 
    WHEN $trafic_source_id == 4 THEN 'Переходы с сохранённых страниц' 
    WHEN $trafic_source_id == 5 THEN 'Не определён' 
    WHEN $trafic_source_id == 6 THEN 'Переходы по внешним ссылкам' 
    WHEN $trafic_source_id == 7 THEN 'Переходы с почтовых рассылок' 
    WHEN $trafic_source_id == 8 THEN 'Переходы из социальных сетей' 
    WHEN $trafic_source_id == 9 THEN 'Переходы из рекомендательных систем' 
    WHEN $trafic_source_id == 10 THEN 'Переходы из мессенджеров' 
    WHEN $trafic_source_id == 11 THEN 'Переходы по QR-коду' 
    ELSE 'UNKNOWN' END;
    RETURN $mapped_trafic_source
};

$METRICA_VISITS_raw = (
    SELECT
        counter_id,
        deleted_flg,
        end_url,
        event_id,
        Yson::ConvertToInt64List(goals_event_time) as goals_event_time,
        Yson::ConvertToInt64List(goals_id) as goals_id,
        goals_order_id,
        is_bounce,
        is_mobile_flg,
        is_robot_internal,
        is_robot,
        link_url,
        metrica_user_id,
        order_nr,
        referer,
        region_id,
        search_engine_id,
        search_phrase,
        start_url,
        start_url_domain_name,
        $map_trafic_source_id(Yson::ConvertToInt64List(Yson::Parse(trafficsourse_id)) [ListIndexOf(Yson::ConvertToInt64List(Yson::Parse(trafficsourse_utmmodel)), 2)]) AS trafficsourse_id_la,
        case when Yson::ConvertToStringList(Yson::Parse(trafficsourse_utmcampain)) [ListIndexOf(Yson::ConvertToInt64List(Yson::Parse(trafficsourse_utmmodel)), 2)] = '' then 'non' 
            else  Yson::ConvertToStringList(Yson::Parse(trafficsourse_utmcampain)) [ListIndexOf(Yson::ConvertToInt64List(Yson::Parse(trafficsourse_utmmodel)), 2)] end as utmcampain_la,
        case when Yson::ConvertToStringList(Yson::Parse(trafficsourse_utmcontent)) [ListIndexOf(Yson::ConvertToInt64List(Yson::Parse(trafficsourse_utmmodel)), 2)] = '' then 'non' 
            else Yson::ConvertToStringList(Yson::Parse(trafficsourse_utmcontent)) [ListIndexOf(Yson::ConvertToInt64List(Yson::Parse(trafficsourse_utmmodel)), 2)] end as utmcontent_la,
        case when Yson::ConvertToStringList(Yson::Parse(trafficsourse_utmmedium)) [ListIndexOf(Yson::ConvertToInt64List(Yson::Parse(trafficsourse_utmmodel)), 2)] = '' then 'non' 
            else Yson::ConvertToStringList(Yson::Parse(trafficsourse_utmmedium)) [ListIndexOf(Yson::ConvertToInt64List(Yson::Parse(trafficsourse_utmmodel)), 2)]  end as utmmedium_la,
        case when Yson::ConvertToStringList(Yson::Parse(trafficsourse_utmmodel)) [ListIndexOf(Yson::ConvertToInt64List(Yson::Parse(trafficsourse_utmmodel)), 2)] = '' then 'non' 
            else Yson::ConvertToStringList(Yson::Parse(trafficsourse_utmmodel)) [ListIndexOf(Yson::ConvertToInt64List(Yson::Parse(trafficsourse_utmmodel)), 2)]  end as utmmodel_la,
        case when Yson::ConvertToStringList(Yson::Parse(trafficsourse_utmsource)) [ListIndexOf(Yson::ConvertToInt64List(Yson::Parse(trafficsourse_utmmodel)), 2)] = '' then 'non' 
            else Yson::ConvertToStringList(Yson::Parse(trafficsourse_utmsource)) [ListIndexOf(Yson::ConvertToInt64List(Yson::Parse(trafficsourse_utmmodel)), 2)]  end  as utmsource_la,
        case when Yson::ConvertToStringList(Yson::Parse(trafficsourse_utmterm)) [ListIndexOf(Yson::ConvertToInt64List(Yson::Parse(trafficsourse_utmmodel)), 2)]  = '' then 'non' 
            else  Yson::ConvertToStringList(Yson::Parse(trafficsourse_utmterm)) [ListIndexOf(Yson::ConvertToInt64List(Yson::Parse(trafficsourse_utmmodel)), 2)] end as utmterm_la,
        user_agent_id,
        utc_first_visit_dttm,
        $utc_dttm_to_dt(utc_visit_start_dttm) as utc_visit_start_dt,
        visit_id,
        yandex_uid
        FROM range($web_visits,$month_start)
        WHERE start_url NOT LIKE 'https://dostavka.yandex.ru/route%' and start_url_domain_name = 'dostavka.yandex.ru'
);
$list_to_tuples = ($list) -> {
    $new_list = ListMap($list, ($x) -> {
        RETURN AsStruct($x.0 AS goal, $x.1 AS time,);
    });
    RETURN $new_list;
};
$zip_goals = (
    SELECT
        f.*,
        ListZip(goals_id, goals_event_time) AS zip_goals_id_goals_event_time
        FROM $METRICA_VISITS_raw
            AS f
        WHERE 1 = 1
);
$tuples = (
    SELECT
        t.*,
        $list_to_tuples(zip_goals_id_goals_event_time) AS list_zip_goals_id_goals_event_time
        FROM $zip_goals
            AS t
);
$flatten_goals = (
    SELECT
        *
        FROM $tuples
            FLATTEN BY
                list_zip_goals_id_goals_event_time
);
$flatten_goal_to_colums = (
    SELECT
        *
        FROM $flatten_goals
            FLATTEN COLUMNS
);

$raw = (
    select 
        counter_id,
        deleted_flg,
        end_url,
        event_id,
        goals_order_id,
        is_bounce,
        is_mobile_flg,
        is_robot_internal,
        is_robot,
        link_url,
        metrica_user_id,
        order_nr,
        referer,
        region_id,
        search_engine_id,
        search_phrase,
        start_url,
        start_url_domain_name,
        trafficsourse_id_la,--case when utmsource_la = 'direct_serp' then 'Переходы по рекламе' else trafficsourse_id_la end as trafficsourse_id_la,
        utmcampain_la,
        utmcontent_la,
        utmmedium_la,
        utmmodel_la,
        utmsource_la,
        utmterm_la,
        user_agent_id,
        utc_first_visit_dttm,
        utc_visit_start_dt,
        visit_id,
        yandex_uid, 
        goal,
        time
    from $flatten_goal_to_colums
);

$cnt_click = (SELECT
    utc_visit_start_dt,
     group_source,
     utmcampain_la,
        utmcontent_la,
        utmmedium_la,
        utmsource_la,
        utmterm_la,
        trafficsourse_id_la,
     count_if(goal in (227138371)) as cnt_click_connect, 
     count_if(goal in (235747406)) as cnt_click_connecthead,
     count_if(goal in (235747446)) as cnt_click_connecttexpress,
     count_if(goal in (235807397)) as cnt_click_contract,
     count_if(goal in (230110699)) as cnt_click_fastconnect,
     count_if(goal in (235747482)) as cnt_click_ndd, 
     count_if(goal in (228670569)) as cnt_click_order
FROM $raw
group by utc_visit_start_dt,
    case when utmsource_la = 'direct_serp' or trafficsourse_id_la ='Переходы по рекламе' then 'Переходы по рекламе'
    when trafficsourse_id_la = 'Переходы из поисковых систем' then 'Переходы из поисковых систем'
    when  trafficsourse_id_la =  'Прямые заходы' then 'Прямые заходы'
    when trafficsourse_id_la = 'Переходы по ссылкам на сайтах' then 'Переходы по ссылкам на сайтах'
    else 'other' end as group_source,  
    utmcampain_la,
        utmcontent_la,
        utmmedium_la,
        utmsource_la,
        utmterm_la, 
        trafficsourse_id_la
    );


$cnt_visits = (select
    utc_visit_start_dt,
     group_source,
    utmcampain_la,
        utmcontent_la,
        utmmedium_la,
        utmsource_la,
        utmterm_la,
         trafficsourse_id_la,
     count(visit_id) as cnt_visit,
     COUNT_IF(is_robot) as is_robot
    FROM $METRICA_VISITS_raw
    --where DateTime::MakeDatetime($datetime_parse(utc_visit_start_dt)) >= Date('2022-06-01') and  DateTime::MakeDatetime($datetime_parse(utc_visit_start_dt)) < Date('2022-06-18')
group by utc_visit_start_dt,
    case when utmsource_la = 'direct_serp' or trafficsourse_id_la ='Переходы по рекламе' then 'Переходы по рекламе'
    when trafficsourse_id_la = 'Переходы из поисковых систем' then 'Переходы из поисковых систем'
    when  trafficsourse_id_la =  'Прямые заходы' then 'Прямые заходы'
    when trafficsourse_id_la = 'Переходы по ссылкам на сайтах' then 'Переходы по ссылкам на сайтах'
    else 'other' end as group_source, 
    utmcampain_la,
        utmcontent_la,
        utmmedium_la,
        utmsource_la,
        utmterm_la, 
        trafficsourse_id_la
        );
$final_web_1 = 
    (select v.*,
        cnt_click_connect, 
        cnt_click_connecthead,
        cnt_click_connecttexpress,
        cnt_click_contract,
        cnt_click_fastconnect,
        cnt_click_ndd, 
        cnt_click_order
    from $cnt_visits as v 
    left join $cnt_click as cl
        on cl.utc_visit_start_dt = v.utc_visit_start_dt
        and cl.group_source = v.group_source
        and cl.utmcampain_la = v.utmcampain_la
        and cl.utmcontent_la = v.utmcontent_la 
        and cl.utmmedium_la = v.utmmedium_la
        and cl.utmsource_la = v.utmsource_la
        and cl.utmterm_la = v.utmterm_la
        and cl.trafficsourse_id_la = v.trafficsourse_id_la);

$final_web_2 = (
    select 
        utc_visit_start_dt
        , sum(cnt_visit) as cnt_visit
        , sum(cnt_click_connect) as cnt_click_connect
        , sum(cnt_click_connecthead) as cnt_click_connecthead
        , sum(cnt_click_connecttexpress) as cnt_click_connecttexpress
        , sum(cnt_click_contract) as cnt_click_contract
        , sum(cnt_click_fastconnect) as cnt_click_fastconnect
        , sum(cnt_click_ndd) as cnt_click_ndd
        , sum(cnt_click_order) as cnt_click_order
    from $final_web_1
    where utc_visit_start_dt >= $START_DT
    group by utc_visit_start_dt
);



insert into $SAVE_to_table_with_agg with TRUNCATE
select 
    f.*
    , a.*
from $final_web_2 as f
left join $info_with_agg as a on f.utc_visit_start_dt = a.full_call_dt
;
