PRAGMA yt.InferSchema;
PRAGMA yt.QueryCacheMode = "normal";

{% include 'yql/lib.sql' %}
{% include 'yql/domain_lib.sql' %}
{% include 'yql/parse_lib.sql' %}

$min_by_ts_and_seen = ($yuid, $source, $seen_you) -> {
    return case
        when $seen_you then $extract_ts($yuid) + 1
        when $source == "watch" then $extract_ts($yuid) + 1
        else $extract_ts($yuid)
    end;
};

$new_data_uniq = (
SELECT
    SOME(osversion) AS osversion,
    SOME(osfamily) AS osfamily,
    SOME(browser_name) as browser_name,
    SOME(browser_version) as browser_version,
    MIN_BY(yuid, $min_by_ts_and_seen(yuid, source, COALESCE(seen_you, false))) AS yuid,
    MAX(last_seen) AS last_seen,
    SOME(domain) AS domain,
    SOME(domain_cookie) AS domain_cookie,
    SOME(original_domain) AS original_domain,
    MIN(COALESCE(seen_you, false)) AS seen_you,
    MIN_BY(source, $min_by_ts_and_seen(yuid, source, COALESCE(seen_you, false))) AS source
FROM CONCAT({{input_tables}})
GROUP BY
    domain_cookie
);

$export_data = (
SELECT
    l.osversion AS osversion,
    l.osfamily AS osfamily,
    l.browser_name as browser_name,
    l.browser_version as browser_version,
    max_of(l.last_seen, r.last_seen ?? 0) AS last_seen,
    $select_older_yuid(l.yuid, r.yuid ?? "") AS yuid,
    l.domain AS domain,
    l.original_domain AS original_domain,
    l.domain_cookie AS domain_cookie,
    l.seen_you as seen_you,
    $get_source_for_older_yuid(l.yuid, l.source, r.yuid ?? "", r.source ?? "") AS source
FROM $new_data_uniq AS l
LEFT JOIN `{{ single_table }}` AS r
    ON l.domain_cookie = r.domain_cookie
);

$duids_on_yuid = (
    select
        yuid, count(1) as duids_on_yuid
    from
        $export_data
    group by yuid
);

$threshold = (
    select
        cast(percentile(duids_on_yuid, 0.99) as int64)
    from
        $duids_on_yuid
);

$filtered_by_size = (
select
    a.osversion AS osversion,
    a.osfamily AS osfamily,
    a.browser_name as browser_name,
    a.browser_version as browser_version,
    a.last_seen AS last_seen,
    a.yuid as yuid,
    a.domain AS domain,
    a.original_domain AS original_domain,
    a.domain_cookie AS domain_cookie,
    a.seen_you as seen_you,
    a.source AS source
from
    $export_data as a
inner join
    $duids_on_yuid as b
using
    (yuid)
where
    b.duids_on_yuid < $threshold
);

$blacklist = (
    select
        uid as yuid
    from
        `//home/search-functionality/romariogi/TURBO-4370/blacklist_v9.1`
);

$filtered_by_blacklist = (
    select
        *
    from
        $filtered_by_size as a
    left only join
        $blacklist as b
    using
        (yuid)
);

insert into `{{ output_table }}` with truncate
select
    *
from
    $filtered_by_blacklist
order by
    domain_cookie
;

insert into `{{ export_table }}` with truncate
select
        yuid,
        domain_cookie as duid,
from $filtered_by_blacklist
