PRAGMA yt.InferSchema;
PRAGMA yt.QueryCacheMode = "normal";

{% include 'yql/lib.sql' %}
{% include 'yql/domain_lib.sql' %}
{% include 'yql/parse_lib.sql' %}

$generate_hash_from_url_and_duid = ($url, $duid) -> {
    $v=($cut_domain_from_url($url) || $duid);

    $hash = String::Hex(Digest::MurMurHash($v));
    return String::ToLower(SUBSTRING($hash, 2));
};

$prepare_to_group = (select
    CAST(hits.unixtime AS Uint64) as last_seen,
    hits.useragent as useragent,
    chevents.rawuniqid as uniqid,
    chevents.duid as duid,
    hits.referer as referer,
    Digest::SipHash(0, 0, Ip::FromString(chevents.clientip6) || hits.useragent || cast(chevents.clientport as String)) as macrouserhash_port
from
    concat({{ input_tables }}) as chevents
inner join
    concat({{ join_tables }}) as hits
using(hitlogid)
where
    $is_good_useragent(hits.useragent)
    AND $is_good_domain($cut_domain_from_url(hits.referer))
    AND $not_gdpr(chevents.regionid)
    AND $not_private_relay(chevents.clientip6)
    AND $is_valid_fpc(chevents.duid)
);

$candidates = (
    select
        AGGREGATE_LIST_DISTINCT(
            IF ($is_valid_yuid(uniqid),
                uniqid,
                null),
                $max_list_values) as uniqids,
        AGGREGATE_LIST_DISTINCT(
            ($cut_domain_from_url(referer), duid),
            $max_list_values) as duids,
        max(last_seen) as last_seen,
        some(UserAgent::Parse(useragent).BrowserName) as browser_name,
        some(UserAgent::Parse(useragent).BrowserVersion) as browser_version,
        some(UserAgent::Parse(useragent).OSFamily) as os,
        some(UserAgent::Parse(useragent).OSVersion) as os_version
    from
        $prepare_to_group
    group by
        macrouserhash_port
);

$yuid_duids = (
    select
        $bestYandexuidByTs(uniqids) as yuid,
        "port" as source,
        duids,
        os as osfamily,
        os_version as osversion,
        browser_name,
        browser_version,
        last_seen
    from
        $candidates
    where
        $bestYandexuidByTs(uniqids) is not null
        and $is_good_list_length(duids, 0, 100)
);

$duid_duids = (
    select
        last_seen,
        duids[0] as duid,
        ListSkip(duids, 1) as duids
    from
        $candidates
    where
        $bestYandexuidByTs(uniqids) is null
        and $is_good_list_length(duids, 1, 100)
);

insert into `{{output_table}}`
select
    yuid, source, last_seen, osfamily, osversion, browser_name, browser_version,
    duids.0 as domain, duids.0 as original_domain, duids.1 as domain_cookie,
    $generate_hash(duids.0 || duids.1) as hash, false as seen_you
from $yuid_duids
    flatten list by duids;
