-- OPERANALYTICS-479
-- регулярный отчет по пейджам-клиентам
-- только коммерция
-- Компоненты:
--  1. Регулярный отчет с подневной статистикой
--  2. Датасет над списком таблиц с помесячной агрегацией
PRAGMA yt.ParallelOperationsLimit = "7";  -- дефолтный лимит на пользователя 10
PRAGMA yt.DefaultOperationWeight = "0.5"; -- дефолт 1.0
PRAGMA yt.PoolTrees = "physical";
PRAGMA yt.TentativePoolTrees = "cloud";
PRAGMA AnsiInForEmptyOrNullableItemsCollections;
USE hahn;

$FIRST_TABLE = "2019-12-31";
$FORCE_UPDATE =  Nothing(String?);  -- "YYYY-mm-dd" or Nothing(String?)
$TASK_MAX_TABLES = 60;  -- количество старейших таблиц для обновления за один запуск
$HISTORY_MAX_TABLES = 200; -- сколько таблиц хранить
$SRC_BS_FOLDER = "//cooked_logs/bs-chevent-cooked-log/1d";
$DST_FOLDER = "//home/vipplanners/dashboard_geoproducts_clients/daily/chevent";

-- https://wiki.yandex-team.ru/users/ntumasheva/medijjka-navi/
$pages_id_wl = (
  355417, 355501,
  472668, 472667, 593113, 593111,  -- Навигатор Native pages
  588231, 588230, 591292, 591289   -- Навигатор YAN pages
);

DEFINE SUBQUERY $get_folder_tables($x, $first_table) AS
SELECT
  DateTime::MakeTimestamp(DateTime::ParseIso8601(Yson::LookupString(Attributes, "modification_time"))) AS table_mod_time
, Path AS table_path
, TableName(Path, "yt") AS table_name
FROM FOLDER($x, modification_time)
WHERE TableName(Path, "yt") >= $first_table
  AND Type == "table"
; END DEFINE;

$is_old_src_dst_mod_time = ($src_mt, $dst_mt) -> {
  RETURN ($dst_mt < $src_mt) OR ($dst_mt IS NULL);
};

-- 1. Берем последние $TASK_MAX_TABLES из источника
-- 2.а Оставляем только те, которые после $FORCE_UPDATE (при наличии) или 2.б
-- 2.б Оставляем только те, которых нет или были обновлены с последнего пересчета
$to_update = (
SELECT ListFlatMap(table_names, ($x) -> (IF($x.1, $x.0))) AS table_names
FROM (
  SELECT TOP(
      (src.table_name
      , IF(
          $FORCE_UPDATE IS NOT NULL
        , CAST($FORCE_UPDATE AS Date) <= CAST(src.table_name AS Date)
        , $is_old_src_dst_mod_time(src.table_mod_time, dst.table_mod_time))
      )
    , $TASK_MAX_TABLES
    ) AS table_names
  FROM $get_folder_tables($SRC_BS_FOLDER, $FIRST_TABLE) AS src
  LEFT JOIN $get_folder_tables($DST_FOLDER, $FIRST_TABLE) AS dst
    ON src.table_name == dst.table_name
));


DEFINE ACTION $process_table($x) AS

$dst_table_path = $DST_FOLDER || '/' || $x;
$src_bs_table_path = $SRC_BS_FOLDER || '/' || $x;

INSERT INTO $dst_table_path WITH TRUNCATE
SELECT
  pageid
, impid
, orderid
, $x AS table_date
, eventdate
, SUM(e.eventcost / 1e6) / 1.18 * 30. AS cost_rub_wo_nds
FROM $src_bs_table_path AS e
WHERE e.fraudbits = 0
  AND e.options_commerce
  AND NOT e.`autobudgetoptions_paid-actions`
  AND e.pageid IN COMPACT $pages_id_wl
GROUP BY
  e.pageid AS pageid
, e.impid AS impid
, e.orderid AS orderid
, DateTime::MakeDate(DateTime::FromSeconds(CAST(e.eventtime AS Uint32))) AS eventdate
ORDER BY
  pageid
, impid
;
COMMIT;
END DEFINE;


EVALUATE FOR $x IN $to_update DO $process_table($x)
;
COMMIT;
-- Удаление старых таблиц
$to_remove = (
SELECT AGGREGATE_LIST(Path)
FROM (
  SELECT Path, TableName(Path, "yt") AS table_name
  FROM FOLDER($DST_FOLDER)
  ORDER BY table_name DESC
  LIMIT 100000
  OFFSET $HISTORY_MAX_TABLES
  )
);
EVALUATE FOR $t IN $to_remove
DO BEGIN
  DROP TABLE $t; COMMIT;
END DO;
