PRAGMA yt.InferSchema;
PRAGMA yt.QueryCacheMode = "normal";

{% include 'yql/lib.sql' %}

$normed_data = (
    select u as u, u as v
    from (
        select distinct u
        from `{{ input_table }}`
    )
    union all
    select u as u, v as v
    from `{{ input_table }}`
    order by u
);

$vs_by_u = (
    select
        u, count(v) as cv
    from
        $normed_data
    group by u
);

$comp_size_limit = (
    select
        cast(percentile(cv, 0.99) as int64)
    from $vs_by_u
);

insert into `{{ output_table }}` with truncate
select
    l.u as u, l.v as v
from
    $normed_data as l
inner join
    $vs_by_u as r
on r.u = l.u
where
    r.cv <= $comp_size_limit
order by
    v;
