USE hahn;
$read_path = "statbox/cube/daily/comdep/cd_reporting/cd_all_with_tiers";
$prof_agency_path = "home/balance/prod/bo/v_contract_apex_full";
$write_path = "home/geo-analytics/serge-ssk/agency_metrics/industry_distribution";

-- Список агентств про
$agency_pro_list = SELECT
    AGGREGATE_LIST(DISTINCT agency_id) AS AgencyID
FROM $prof_agency_path
WHERE
    agency_id IS NOT NULL
    AND finish_dt >= "2021-12-01"
    AND (wsale_ag_prm_awrd_sc_tp == "Премиум 2020" OR wsale_ag_prm_awrd_sc_tp == "РМП РФ");

-- Список агентств база
$agency_base_list = SELECT
    AGGREGATE_LIST(DISTINCT agency_id) AS AgencyID
FROM $prof_agency_path
WHERE
    agency_id IS NOT NULL
    AND finish_dt >= "2021-12-01"
    AND (wsale_ag_prm_awrd_sc_tp == "Базовая 2015" OR wsale_ag_prm_awrd_sc_tp == "Базовая лайт");

-- Список агентств, которые явялются агрегаторами
$agency_agregators = SELECT [523581, 28047771, 2371403, 2124001, 67140973, 9830514, 1332316, 35786724, 32805081, 5946133, 1621799, 322057, 472729, 36002706];

$select_prepare = SELECT
    period AS Month,
    clientid AS ClientID,
    agency_id AS AgencyID,
    curr_industry AS Industry,
    operator_role AS OperatorRole,
FROM $read_path
WHERE
    scale == "months"
    AND period LIKE '2021%'
    AND money IS NOT NULL
    AND ListHas(["agency", "client", "elama"], operator_role)
    AND (operator_role == "client" OR ListHas($agency_agregators, agency_id) OR ListHas($agency_pro_list, agency_id) OR ListHas($agency_base_list, agency_id));

-- Делаем разбивку клиентов по прямому и агентскому каналам
$select_agency_vs_client = SELECT
    Month AS Month,
    ClientID AS ClientID,
    AgencyID AS AgencyID,
    Industry AS Industry,
    CASE OperatorRole
        WHEN "elama" THEN "Агентский канал"
        WHEN "agency" THEN "Агентский канал"
        ELSE "Прямой канал"
    END AS TypeClient,
FROM $select_prepare;

-- Делаем разбивку клиентов по прямому каналу, агенствам база, агенствам про и агрегаторам
$select_type_clients = SELECT
    Month AS Month,
    ClientID AS ClientID,
    AgencyID AS AgencyID,
    IF(TypeClient == "Прямой канал", "Прямой канал", IF(ListHas($agency_agregators, AgencyID), "Агрегаторы", IF(ListHas($agency_pro_list, AgencyID), "Агентский канал (про)", "Агентский канал (база)"))) AS TypeClient,
    Industry AS Industry,
FROM $select_agency_vs_client;

-- Сопоставляем каждому клиенту его средний за год чек
$mean_budget_clients = SELECT
    clientid AS clientid,
    AVG(money) AS MeanTotalRevenue,
FROM $read_path
WHERE
    scale == "months"
    AND product == "Общая выручка"
    AND period LIKE '2021%'
    AND money IS NOT NULL
GROUP BY clientid;

$select_budget_type_clients = SELECT
    a.Month AS Month,
    a.ClientID AS ClientID,
    a.TypeClient AS TypeClient,
    a.Industry AS Industry,
    CASE
        WHEN b.MeanTotalRevenue < 500 * 1000 THEN '20K-500K'
        WHEN (b.MeanTotalRevenue >= 500 * 1000) AND (b.MeanTotalRevenue < 1000 * 1000) THEN '500K-1M'
        ELSE '1M-50M'
    END as ClientSize,
FROM $select_type_clients AS a

JOIN

$mean_budget_clients AS b

ON a.ClientID == b.clientid
WHERE b.MeanTotalRevenue < 50 * 1000 * 1000 AND b.MeanTotalRevenue > 20 * 1000;

$select_aux = SELECT
    Industry AS Industry,
    TypeClient AS TypeClient,
    ClientSize AS ClientSize,
    COUNT(DISTINCT ClientID) AS CountClients,
FROM $select_budget_type_clients
GROUP BY Industry, TypeClient, ClientSize;

$select_aux_aux = SELECT
    TypeClient AS TypeClient,
    ClientSize AS ClientSize,
    SUM(CountClients) AS SumCountClients,
FROM $select_aux
GROUP BY TypeClient, ClientSize;


INSERT INTO $write_path WITH TRUNCATE
SELECT
    a.Industry AS Industry,
    a.TypeClient AS TypeClient,
    a.ClientSize AS ClientSize,
    a.CountClients AS CountClients,
    Math::Round(a.CountClients / b.SumCountClients, -4) AS NormCountClients,
FROM $select_aux AS a 

JOIN 

$select_aux_aux AS b

ON a.TypeClient == b.TypeClient AND a.ClientSize == b.ClientSize;

SELECT DISTINCT curr_industry FROM $read_path;
