-- PRAGMA dq.AnalyzeQuery = '1';
PRAGMA yt.DefaultMaxJobFails = '1';
PRAGMA yt.TemporaryAutoMerge = 'disabled';

USE hahn;

$clusters_table = "//home/searchshare/dashboard/yandex_google_datasets/flow_aggregation_raw";
$weights_table = "//home/searchshare/dashboard/yandex_google_datasets/flow_weights";
$output_table = "//home/searchshare/dashboard/yandex_google_datasets/flow_aggregation_weighted";

$source = SELECT * FROM $clusters_table
WHERE cluster IS NOT NULL;


$target_data = SELECT
    c.*,
    w.*,
    -- выравнивание запросов и переходов по Трафу с учетом ui
    -- https://st.yandex-team.ru/SEARCHSHARE-76#5fd0d69e24022e1b0fff8e02
    IF(target_system = "desktop", 2, 1.6) * weight AS w
FROM $source AS c
JOIN $weights_table AS w
ON  CAST(CAST(c.day AS Date) AS String) = w.fielddate AND
    c.source = w.source_log AND
    c.ui = w.source_ui AND
    c.enter_type = w.source_system;


$source = SELECT
    day,
    cluster,
    engine,
    engine_ui,
    SUM(queries * w) AS queries_w,
    SUM(queries_90 * w) AS queries_w90,
    SUM(queries_99 * w) AS queries_w99,
    SUM(queries_C2 * w) AS queries_wC2
FROM $target_data
GROUP BY day, cluster, target_system AS engine, target_ui AS engine_ui;


$max_day = SELECT MAX(day) FROM $source;


$source_yoy = SELECT * FROM (
    SELECT
        now.day ?? CAST(ago.day AS Date) + DateTime::IntervalFromDays(365) AS day,
        now.cluster ?? ago.cluster AS cluster,
        now.engine ?? ago.engine AS engine,
        now.engine_ui ?? ago.engine_ui AS engine_ui,
        now.queries_w ?? 0.0 AS queries_w,
        ago.queries_w ?? 0.0 AS queries_w_year_ago,
        now.queries_wC2 ?? 0.0 AS queries_wC2,
        ago.queries_wC2 ?? 0.0 AS queries_wC2_year_ago,
        now.queries_w99 ?? 0.0 AS queries_w99,
        ago.queries_w99 ?? 0.0 AS queries_w99_year_ago
    FROM $source AS now
    FULL JOIN $source AS ago
    ON(now.cluster = ago.cluster AND now.engine = ago.engine AND now.engine_ui = ago.engine_ui AND now.day = ago.day + DateTime::IntervalFromDays(365))
)
WHERE day <= $max_day;


$combos = SELECT * FROM
(SELECT DISTINCT day, engine, engine_ui FROM $source_yoy) AS a
CROSS JOIN (SELECT DISTINCT cluster FROM $source_yoy) AS d;


$res = SELECT
    r.*,
    SUM(queries_w) OVER w AS queries_w_total,
    SUM(queries_w_year_ago) OVER w AS queries_w_year_ago_total,
    SUM(queries_wC2) OVER w AS queries_wC2_total,
    SUM(queries_wC2_year_ago) OVER w AS queries_wC2_year_ago_total,
    SUM(queries_w99) OVER w AS queries_w99_total,
    SUM(queries_w99_year_ago) OVER w AS queries_w99_year_ago_total
FROM (
    SELECT
        *
    FROM $combos AS c
    LEFT JOIN $source_yoy AS r
    USING(day, engine, engine_ui, cluster)
) AS r
WINDOW w AS (
    PARTITION BY day, engine, engine_ui
);

INSERT INTO $output_table WITH TRUNCATE 
SELECT * FROM $res AS r
LEFT JOIN ANY (
    SELECT cluster_id AS cluster, cluster_name, middle_cluster_name, parent_cluster_name
    FROM `//home/searchshare/dashboard/cluster_tree`
) AS t
USING(cluster);
