USE hahn;

PRAGMA yt.DefaultMaxJobFails = '1';
PRAGMA yt.TemporaryAutoMerge = 'disabled';

DECLARE $yt_pool AS String;
DECLARE $start_date AS String;
DECLARE $end_date AS String;

PRAGMA yt.Pool = $yt_pool;

$YEAR = 365;
$start_month = DateTime::Format("%Y-%m")(CAST($start_date AS Date));
$end_month = DateTime::Format("%Y-%m")(CAST($end_date AS Date));
$start_month_ago = DateTime::Format("%Y-%m")(CAST($start_date AS Date) - DateTime::IntervalFromDays($YEAR));


$source = SELECT
    *
FROM RANGE("//home/searchshare/squeeze_cube", $start_month_ago, $end_month)
WHERE query IS NOT NULL AND cluster IS NOT NULL;

$clusters = SELECT
    day,
    cluster,
    ui,
    COUNT(*) AS queries,
    COUNT_IF(revenue != 0) AS queries_with_transaction,
    SUM(revenue) AS revenue,
    SUM(1 * crypta_C2_prob) AS queries_C2,
    SUM(revenue * crypta_C2_prob) AS revenue_C2,
    COUNT_IF(prism_cluster = "99") AS queries_99,
    SUM_IF(revenue, prism_cluster = "99") AS revenue_99
FROM $source
GROUP BY day, CAST(cluster AS String) AS cluster, ui;


$max_day = SELECT MAX(day) FROM $clusters;


$clusters_yoy = SELECT * FROM (
    SELECT
        now.day ?? ago.day + DateTime::IntervalFromDays($YEAR) AS day,
        now.cluster ?? ago.cluster AS cluster,
        now.ui ?? ago.ui AS ui,
        now.queries ?? 0 AS queries,
        ago.queries ?? 0 AS queries_year_ago,
        now.queries_with_transaction ?? 0 AS queries_with_transaction,
        ago.queries_with_transaction ?? 0 AS queries_with_transaction_year_ago,
        now.revenue ?? 0 AS revenue,
        ago.revenue ?? 0 AS revenue_year_ago,
        now.queries_C2 ?? 0 AS queries_C2,
        ago.queries_C2 ?? 0 AS queries_C2_year_ago,
        now.revenue_C2 ?? 0 AS revenue_C2,
        ago.revenue_C2 ?? 0 AS revenue_C2_year_ago,
        now.queries_99 ?? 0 AS queries_99,
        now.revenue_99 ?? 0 AS revenue_99
    FROM $clusters AS now
    FULL JOIN $clusters AS ago
    ON(now.cluster = ago.cluster AND now.ui = ago.ui AND now.day = ago.day + DateTime::IntervalFromDays(365))
)
WHERE day <= $max_day;


$combos = SELECT * FROM
(SELECT DISTINCT day, ui FROM $clusters_yoy) AS a
CROSS JOIN (SELECT DISTINCT cluster FROM $clusters_yoy) AS d;


$res = SELECT
    r.*,
    SUM(queries) OVER w AS queries_total,
    SUM(revenue) OVER w AS revenue_total,
    SUM(revenue_C2) OVER w AS revenue_C2_total,
    SUM(queries_C2) OVER w AS queries_C2_total,
    SUM(queries_year_ago) OVER w AS queries_year_ago_total,
    SUM(revenue_year_ago) OVER w AS revenue_year_ago_total,
    SUM(revenue_C2_year_ago) OVER w AS revenue_C2_year_ago_total,
    SUM(queries_C2_year_ago) OVER w AS queries_C2_year_ago_total,
    SUM(revenue_99) OVER w AS revenue_99_total,
    SUM(queries_99) OVER w AS queries_99_total
FROM (
    SELECT
        *
    FROM $combos AS c
    LEFT JOIN $clusters_yoy AS r
    USING(day, ui, cluster)
) AS r
WINDOW w AS (
    PARTITION BY day, ui
);


$output = "//home/searchshare/dashboard/cube_datasets/cluster_data_by_day_and_ui";


$old_data = SELECT * FROM $output
WHERE DateTime::Format("%Y-%m")(CAST(day AS Date)) NOT BETWEEN $start_month AND $end_month;


$new_data = SELECT * FROM $res AS r
LEFT JOIN ANY (
    SELECT cluster_id AS cluster, cluster_name, middle_cluster_name, parent_cluster_name
    FROM `//home/searchshare/dashboard/cluster_tree`
) AS t
USING(cluster)
WHERE DateTime::Format("%Y-%m")(CAST(day AS Date)) BETWEEN $start_month AND $end_month;


INSERT INTO $output WITH TRUNCATE
SELECT * FROM $old_data
UNION ALL
SELECT * FROM $new_data
ORDER BY cluster, day;
