USE hahn;

PRAGMA yt.DefaultMaxJobFails = '1';
PRAGMA yt.TemporaryAutoMerge = 'disabled';

DECLARE $yt_pool AS String;
DECLARE $start_date AS String;
DECLARE $end_date AS String;

PRAGMA yt.Pool = $yt_pool;

$start_month = DateTime::Format("%Y-%m")(CAST($start_date AS Date));
$end_month = DateTime::Format("%Y-%m")(CAST($end_date AS Date));

$source = SELECT
    *
FROM RANGE("//home/searchshare/squeeze_cube", $start_month, $end_month);

$cluster_data = SELECT
    CAST(a.cluster AS String) AS cluster,
    b.queries AS queries,
    a.* WITHOUT a.cluster
FROM (
    SELECT
        day,
        cluster,
        ui,
        service,
        SUM(revenue) AS revenue,
        COUNT(*) AS service_transactions
    FROM (
        SELECT
            day,
            cluster,
            ui,
            revenue_item.0 AS service,
            revenue_item.1 AS revenue
        FROM $source
        FLATTEN DICT BY revenue_dict AS revenue_item
    )
    GROUP BY day, cluster, ui, service
) AS a
JOIN (
    SELECT
        day,
        cluster,
        ui,
        COUNT(*) AS queries
    FROM $source
    GROUP BY day, cluster, ui
) AS b
USING(day, cluster, ui);


$combos = SELECT * FROM
(SELECT DISTINCT service, day, ui FROM $cluster_data) AS a
CROSS JOIN (SELECT DISTINCT cluster FROM $cluster_data) AS d;


$res = SELECT
    r.*,
    SUM(queries) OVER w AS queries_total,
    SUM(revenue) OVER w AS revenue_total
FROM (
    SELECT
        *
    FROM $combos AS c
    LEFT JOIN $cluster_data AS r
    USING(service, day, ui, cluster)
) AS r
WINDOW w AS (
    PARTITION BY service, day, ui
);

$output = "//home/searchshare/dashboard/cube_datasets/cluster_revenue";

$old_data = SELECT * FROM $output
WHERE DateTime::Format("%Y-%m")(CAST(day AS Date)) NOT BETWEEN $start_month AND $end_month;


INSERT INTO $output WITH TRUNCATE 
SELECT * FROM $old_data
UNION ALL
SELECT * FROM $res AS r
LEFT JOIN ANY (
    SELECT cluster_id AS cluster, cluster_name, middle_cluster_name, parent_cluster_name
    FROM `//home/searchshare/dashboard/cluster_tree`
) AS t
USING(cluster)
ORDER BY cluster, day;
