import common SYMBOLS $normalize_query;

define subquery $define_cluster($stream, $start_date, $end_date, $cluster, $common_path) AS
    $competition_clusters_path = $common_path || "/competition_clusters";
    $competitors = (
        SELECT
            host AS c_host,
            Cast(ToDict(top_papas) AS Dict<String,Double>) AS c_clusters,
        FROM yt:$cluster.$competition_clusters_path
    );

    $mapper = ($shows) -> (
        ($v) -> (($v.Value, 1.0 * $v.Frequency / $shows))
    );

    $host_clusters_path = $common_path || "/host_clusters";
    $host_by_class = (
        SELECT
            host AS h_host,
            ToDict(ListMap(top_cluster, $mapper(shows))) AS h_clusters,
            ToDict(ListMap(top_father, $mapper(shows))) AS h_fathers
        FROM yt:$cluster.$host_clusters_path
    );

    $query_to_cluster_path = $common_path || "/query_to_cluster";
    $clusters_path = $common_path || "/clusters";
    $query_by_class = (
        SELECT
            info.ClusterID AS q_cluster,
            Yson::ConvertToString(info.FatherCluster[0]["id"]) AS cl_father,
            query.query AS q_norm_query,
        FROM yt:$cluster.$query_to_cluster_path AS query
        LEFT JOIN yt:$cluster.$clusters_path AS info
        ON info.ClusterID = CAST(query.ClusterID AS String)
    );

    $get_best_class = ($values) -> {
        $best = ListMax(
            ListMap(
                DictItems($values ?? {}),
                ($x) -> (($x.1, $x.0))
            )
        );
        RETURN CASE
            WHEN $values IS NULL THEN NULL
            WHEN $best.0 > 0.5 THEN $best.1
            ELSE "multiclass"
        END;
    };

    $get_session_class = ($c_clusters, $h_clusters, $h_fathers) -> {
        $query_father = NULL;  --TODO: check
        $main_competitor_class = $get_best_class($c_clusters);
        $main_host_class = $get_best_class($h_clusters);  --TODO: check
        $main_host_father = $get_best_class($h_fathers);
        RETURN CASE
            WHEN ($query_father ?? "") NOT IN ("", "None", "Unknown") THEN $query_father
            WHEN ($main_competitor_class ?? "") NOT IN ("", "None", "Unknown") THEN $main_competitor_class
            WHEN ($main_host_father ?? "") NOT IN ("", "None", "Unknown") THEN $main_host_father
            ELSE "Unknown"
        END
    };

    $sessions_with_hosts = (
        select
            $get_session_class(c.c_clusters, h.h_clusters, h.h_fathers) AS session_class,
            c.c_clusters AS c_clusters,
            h.h_clusters AS h_clusters,
            h.h_fathers AS h_fathers,
            h.h_host AS h_host,
            Null AS cl_father,
            Null AS q_cluster,
            s.*,
        from $stream() as s
        LEFT JOIN ANY $competitors AS c ON s.enter_host = c.c_host
        LEFT JOIN ANY $host_by_class AS h ON s.end_host = h.h_host
    );

    SELECT
        q.cl_father AS cl_father,
        q.q_cluster AS q_cluster,
        s.*,
    WITHOUT s.cl_father, s.q_cluster
    FROM (select * from $sessions_with_hosts where norm_query is not NUll and norm_query!="") AS s
    LEFT JOIN ANY $query_by_class AS q ON s.norm_query = q.q_norm_query

    union all

    select *
    from $sessions_with_hosts
    where norm_query is NUll or norm_query=="";
end define;

-- You need to add the row
-- pragma File("model", "https://proxy.sandbox.yandex-team.ru/1941102488");
-- to use $get_cluster_id method.
$model = Dssm::LoadModel(FilePath("model"));
$BNorm = ($x) -> (SearchRequest::NormalizeBert(cast($x as Utf8)) ?? "");
$argmax = ($lst) -> (ListIndexOf($lst, ListMax($lst)));

$regions = ($user_region) -> (
    String::JoinFromList(
        cast(Geo::GetParents(Geo::FindCountry(cast($user_region as int32))) as List<String>),
        " "
    )
);

$get_cluster_id = ($query, $ext_qfuf_top10, $user_region) -> (
    $argmax(
        Dssm::Apply(
            $model,
            AsStruct(
                $BNorm($query) ?? "" as query,
                $ext_qfuf_top10 ?? "" as extensions,
                $regions($user_region) as base_region_chain
            ),
            "logits_v_0"
        )
    )
);

$default_region = 225;
$get_region = ($row, $column) -> {
    $reg = TryMember($row, $column, $default_region);
    $reg = cast($reg as int32) ?? 0;
    return if($reg = 0, $default_region, $reg);
};
$get_query = ($row, $column) -> (
    $normalize_query(TryMember($row, $column, NULL))
);
$normalize_dopp = ($q) -> (Cast(SearchRequest::NormalizeDopp(SearchRequest::NormalizeConsistent(Cast($q as Utf8))) as String));
$get_new_query_path = ($new_query_folder, $service) -> {
    $date = SUBSTRING(Cast(CurrentTzDate("Europe/Moscow") as String), 0, 10);
    $uid = Cast(RandomUuid(CurrentUtcTimestamp()) as String);
    return $new_query_folder || "/" || $service || "/" || $date || "_" || $uid;
};

export $define_cluster, $get_cluster_id, $get_query, $get_region, $normalize_dopp, $get_new_query_path;
