use hahn;

pragma yt.InferSchema = '1';
pragma yt.PoolTrees = 'physical';
pragma yt.TentativePoolTrees = 'cloud';

declare $input_path as String;
declare $output_path as String;
declare $new_query_to_cluster_path as String;
declare $query_to_cluster_path as String;
declare $yt_pool as String?;
declare $query_column as String;
declare $new_query_to_cluster_folder as String?;
declare $service as String;

$yt_pool = $yt_pool ?? "default";
pragma yt.Pool = $yt_pool;

pragma library("common.sql");
pragma library("define_cluster_lib.sql");

import define_cluster_lib symbols $get_query, $get_new_query_path;

$new_query_to_cluster = (
    select
        query,
        Cast(Some(Model0) as int64) as ClusterID,
    from $new_query_to_cluster_path
    group by QueryRaw as query
);

$query_to_cluster = (
    select * from $new_query_to_cluster
    union all
    select * from $query_to_cluster_path
);

$raw_queries = (
    select *
    from (
        select
            ForceRemoveMembers(TableRow(), ["query"]),
            $get_query(TableRow(), $query_column) as query,
        from $input_path
    )
    flatten columns
    where query is not null
);

insert into $output_path with truncate
select
    q.*,
    q2c.ClusterID as ClusterID,
from $raw_queries as q
left join any $query_to_cluster as q2c
using (query);

evaluate if $new_query_to_cluster_folder is not null do
begin
    $new_query_path = $get_new_query_path($new_query_to_cluster_folder, $service);
    insert into $new_query_path
    select query, ClusterID
    from $new_query_to_cluster
    order by query;
end do;
