PRAGMA yt.InferSchema = '1';
PRAGMA AnsiInForEmptyOrNullableItemsCollections;
PRAGMA yson.AutoConvert; 
PRAGMA SimpleColumns;

use hahn;

DECLARE $param_dict AS Dict<String, String>;

$base_dir = IF(
  $param_dict["branch"] = "prod",
  "//home/vipplanners/sow",
  "//home/vipplanners/sow_dev" || '/' || $param_dict["branch"]
);

-- input
-- чтобы был помощнее сигнал - берём полное пересечение (а не просто финальный набор вердиктов)
$intersection = $base_dir || '/' || 'dict/intersection/latest';

$spark_schematized = '//home/comdep-analytics/zedlaa/spark/all_2';
-- доменно-тирная схема, чтобы отранжировать по кол-ву разных контрагетов
$tiers = '//home/comdep-analytics/public/client_tiers/fact/latest_v2';

-- output
$result = $base_dir || '/' || 'dict/agency_bl';

$client_stat = (
SELECT client_id
FROM RANGE(`//home/comdep-cubes/direct/production/hypercubes/clients`, `2018-01-01`)
GROUP BY client_id
HAVING SUM(cost) > 0
);

INSERT INTO $result WITH TRUNCATE
SELECT
  spark_id
, name
, main_okved2_name
, count(distinct curr_counterparty_id) as counterparties_covered
FROM (
  SELECT
    a.spark_id AS spark_id
  , a.client_id AS client_id
  , name
  , main_okved2_name
  , curr_counterparty_id
  FROM $intersection AS a
  LEFT SEMI JOIN $client_stat AS cwl
    ON a.client_id = cwl.client_id
  JOIN (
      SELECT DISTINCT spark_id, name, main_okved2_name
      FROM $spark_schematized
      WHERE main_okved2_name IN (
          "Деятельность рекламная",
          "Деятельность рекламных агентств"
      )
    ) AS b
    ON a.spark_id = b.spark_id
  JOIN $tiers AS c
    ON a.client_id = c.client_id
  WHERE c.agency_id > 0
)
GROUP BY
  spark_id
, name
, main_okved2_name
HAVING count(distinct curr_counterparty_id) > 10
ORDER BY spark_id
;
