apply_catboost_common_query = """
-- specify the URL(s) where to get the models
PRAGMA file(
    'model.bin',
    '{catboost_model}'
);

-- Initialize CatBoost FormulaEvaluator with given models:
$evaluator = CatBoost::LoadModel(
    FilePath('model.bin')
);

-- Prepare the data:
$data = (
    SELECT
        ListMap(
            Yson::ConvertToDoubleList(FloatFeatures),
            ($x) -> {{ RETURN Cast($x AS Float); }}
        ) AS FloatFeatures,
        Yson::ConvertToStringList(CatFeatures) AS CatFeatures,
        PassThrough
    FROM `{input_table}`
);

-- Apply Catboost in batch mode
$processed = (
    PROCESS $data
    USING CatBoost::EvaluateBatch(
        $evaluator,
        TableRows(),
        {number_of_classes},
        4096  -- Batch size
    )
);
"""


split_by_cryptaid_presence_query = """
INSERT INTO `{output_with_cryptaid_table}`
WITH TRUNCATE

SELECT
    catboost_applied.*,
    yandexuid_cryptaid.crypta_id AS crypta_id
FROM `{catboost_applied_table}` AS catboost_applied
INNER JOIN `{yandexuid_cryptaid_table}` AS yandexuid_cryptaid
ON catboost_applied.id == yandexuid_cryptaid.yandexuid
ORDER BY crypta_id;

INSERT INTO `{output_without_cryptaid_table}`
WITH TRUNCATE

SELECT catboost_applied.*
FROM `{catboost_applied_table}` AS catboost_applied
LEFT ONLY JOIN `{yandexuid_cryptaid_table}` AS yandexuid_cryptaid
ON catboost_applied.id == yandexuid_cryptaid.yandexuid;
"""


def get_slices_for_percentile_segmentation(yt, table, percentiles):
    row_cnt = yt.row_count(table)
    slices = []

    for segment_name, start_end_percentiles in percentiles.items():
        start_percentile, end_percentile = start_end_percentiles

        slices.append(
            yt.TablePath(
                name=table,
                start_index=int(start_percentile * row_cnt),
                end_index=int(end_percentile * row_cnt),
            )
        )

    return slices
