PRAGMA yt.InferSchema;
PRAGMA yt.QueryCacheMode = "normal";

{% include 'yql/lib.sql' %}
{% include 'yql/domain_lib.sql' %}
{% include 'yql/parse_lib.sql' %}

$select_ip = ($ip4, $ip6) -> {
    $ip = case
        when $ip4 = "0.0.0.0" then $ip6
        else $ip4
    end;

    return $ip;
};

$remove_zen_postfix = ($url) -> {
    $postfix_to_remove = "?utm_referrer=https%3A%2F%2Fzen.yandex.com";
    return case
        when String::EndsWith($url, $postfix_to_remove) then substring($url, 0, Length($url) - Length($postfix_to_remove))
        else $url
    end;
};

$watchlog_data = (
    select
        uniqid,
        Digest::SipHash(0, 0, (Ip::FromString($select_ip(clientip, clientip6)) || useragent)) as macrouserhash,
        CAST(unixtime as Int64) as last_seen,
        domainuserid as fpc,
        Digest::CityHash($remove_zen_postfix(url)) as urlhash,
        UserAgent::Parse(useragent).OSVersion as osversion,
        UserAgent::Parse(useragent).OSFamily as osfamily,
        Url::GetHost(Url::Normalize(url)) as original_domain,
        $cut_domain_from_url(url) as domain,
        UserAgent::Parse(useragent).BrowserName as browser_name,
        UserAgent::Parse(useragent).BrowserVersion as browser_version,
        "zen" as source
    from
        CONCAT({{input_watch_tables}})
    where
        $is_good_useragent(useragent)
        and counterid ?? '0' not in ('3', '48547472')
        and Metrika::ParseBrowserinfo(browserinfo)['pv']=='1'
        and $https_match(url)
        and $is_good_domain($cut_domain_from_url(url))
        and $is_valid_fpc(domainuserid)
        and $not_upload_or_changed(browserinfo) = true
        and $not_gdpr(regionid)
        and $not_private_relay(clientip6)
        and (uniqid = "0" or uniqid = domainuserid)
);

$zen_data = (
    select
        (ts / 1000) as eventtime,
        Digest::CityHash(url) as urlhash,
        Digest::SipHash(0, 0, Ip::FromString(ip) || user_agent) as macrouserhash,
        CAST(yandexuid as String) as yandexuid
    from
        CONCAT({{input_zen_tables}})
    where
        url is not null
        and yandexuid is not null
        and yandexuid > 0
        and yandex_internal = false
        and item_type = "url"
        and $is_good_useragent(user_agent)
);

insert into
    `{{output_table}}`
with truncate
select
    source,
    last_seen,
    original_domain,
    domain,
    fpc as domain_cookie,
    yandexuid as yuid,
    osversion,
    osfamily,
    browser_name,
    browser_version,
    false as seen_you
from
    $watchlog_data as a
inner join
    $zen_data as b
using
    (urlhash, macrouserhash)
where
    last_seen-eventtime between 0 and 300
;
