USE hahn;

DECLARE $yt_pool AS String;

PRAGMA yson.DisableStrict = '1';
PRAGMA yt.TemporaryAutoMerge = 'disabled';
PRAGMA yt.Pool = $yt_pool;


$flow_agg = SELECT
    cluster,
    engine,
    ui,
    day,
    SUM(queries_w) AS queries_w,
    SUM(queries_w99) AS queries_w99,
    SUM(queries_wC2) AS queries_wC2
FROM `//home/searchshare/dashboard/yandex_google_datasets/flow_aggregation_weighted`
GROUP BY cluster, engine, engine_ui AS ui, day;


$money_yandex = SELECT
    cluster,
    ui,
    day,
    SUM(revenue) AS revenue
FROM `//home/searchshare/dashboard/cube_datasets/cluster_data_by_day_and_ui`
GROUP BY cluster, ui, day;


$surplus_yandex = SELECT
    cluster,
    ui,
    day,
    SUM(surplus_v7_sum) AS surplus_v7
FROM `//home/searchshare/dashboard/cube_datasets/surplus`
GROUP BY cluster, ui, day;


$data = SELECT
    f.*,
    IF(engine = "Yandex", m.revenue) AS revenue_yandex,
    IF(engine = "Yandex", s.surplus_v7) AS surplus_v7_yandex
FROM $flow_agg AS f
LEFT JOIN ANY $money_yandex AS m
ON f.cluster = m.cluster AND f.ui = m.ui AND f.day = m.day
LEFT JOIN ANY $surplus_yandex AS s
ON f.cluster = s.cluster AND f.ui = s.ui AND f.day = s.day;


$tree = (
    SELECT cluster_id AS cluster, cluster_name, middle_cluster_name, parent_cluster_name
    FROM `//home/searchshare/dashboard/cluster_tree`
);

$data_with_tree = SELECT
    *
FROM $data AS d
LEFT JOIN ANY $tree AS t
USING(cluster);


$cluster_info = SELECT
    day,
    cluster,
    ui,
    SOME(cluster_name) AS cluster_name,
    SOME(middle_cluster_name) AS middle_cluster_name,
    SOME(parent_cluster_name) AS parent_cluster_name,
    SUM(surplus_v7_yandex) AS surplus_v7_yandex,
    SUM(revenue_yandex) AS revenue_yandex,
    SUM_IF(queries_w, engine = "Yandex") AS queries_yandex,
    SUM_IF(queries_wC2, engine = "Yandex") AS queries_C2_yandex,
    SUM_IF(queries_w99, engine = "Yandex") AS queries_99_yandex,
    SUM_IF(queries_w, engine = "Google") AS queries_google,
    SUM_IF(queries_wC2, engine = "Google") AS queries_C2_google,
    SUM_IF(queries_w99, engine = "Google") AS queries_99_google
FROM $data_with_tree
GROUP BY day, cluster, ui;

INSERT INTO `//home/searchshare/dashboard/yandex_google_datasets/cluster_info` WITH TRUNCATE 
SELECT * FROM $cluster_info
ORDER BY cluster, day;


$source = SELECT
    a.*,
    parent_cluster_name || (IF(middle_cluster_name IS NOT NULL, " - " || middle_cluster_name, "")) AS category
FROM $cluster_info AS a
WHERE cluster != 'all_clusters';


DEFINE SUBQUERY $interval_stats($len) AS

    $t = SELECT
        category,
        ui,
        day,
        SOME(parent_cluster_name) AS parent_cluster_name,
        SOME(middle_cluster_name) AS middle_cluster_name,
        SUM(queries_yandex) AS queries_yandex,
        SUM(queries_C2_yandex) AS queries_C2_yandex,
        SUM(queries_99_yandex) AS queries_99_yandex,
        SUM(queries_google) AS queries_google,
        SUM(queries_C2_google) AS queries_C2_google,
        SUM(queries_99_google) AS queries_99_google,
        SUM(revenue_yandex) AS revenue,
        SUM(surplus_v7_yandex) AS surplus_v7,
        SUM(revenue_yandex) AS revenue_prev
    FROM $source AS s
    GROUP BY ui, category, day;
    
    SELECT
        category,
        ui,
        parent_cluster_name,
        middle_cluster_name,
        day AS last_date,
        $len AS interval_days,
        SUM(queries_yandex) OVER w AS queries_yandex,
        SUM(queries_C2_yandex) OVER w AS queries_C2_yandex,
        SUM(queries_99_yandex) OVER w AS queries_99_yandex,
        SUM(queries_google) OVER w AS queries_google,
        SUM(queries_C2_google) OVER w AS queries_C2_google,
        SUM(queries_99_google) OVER w AS queries_99_google,
        SUM(revenue) OVER w AS revenue,
        SUM(surplus_v7) OVER w AS surplus_v7,
        SUM(queries_yandex) OVER w_prev AS queries_yandex_prev,
        SUM(queries_C2_yandex) OVER w_prev AS queries_C2_yandex_prev,
        SUM(queries_99_yandex) OVER w_prev AS queries_99_yandex_prev,
        SUM(queries_google) OVER w_prev AS queries_google_prev,
        SUM(queries_C2_google) OVER w_prev AS queries_C2_google_prev,
        SUM(queries_99_google) OVER w_prev AS queries_99_google_prev,
        SUM(revenue) OVER w_prev AS revenue_prev,
        SUM(surplus_v7) OVER w_prev AS surplus_v7_prev
    FROM $t AS s
    WINDOW w AS (
        PARTITION BY ui, category
        ORDER BY day
        ROWS BETWEEN ($len - 1) PRECEDING AND CURRENT ROW
    ), w_prev AS (
        PARTITION BY ui, category
        ORDER BY day
        ROWS BETWEEN ($len + 364) PRECEDING AND 365 PRECEDING
    );
    
END DEFINE;


INSERT INTO `//home/searchshare/dashboard/yandex_google_datasets/category_info` WITH TRUNCATE
SELECT * FROM (
    SELECT * FROM $interval_stats(14)
    UNION ALL SELECT * FROM $interval_stats(30)
    UNION ALL SELECT * FROM $interval_stats(90)
    UNION ALL SELECT * FROM $interval_stats(365)
)
WHERE DateTime::GetDayOfMonth(last_date) = 1 AND DateTime::GetYear(last_date) >= 2020;
