{% include 'yql/common.sql' %}
{% include 'yql/lib.sql' %}
{% include 'yql/domain_lib.sql' %}
{% include 'yql/parse_lib.sql' %}

PRAGMA yson.DisableStrict;

$remove_ta = Re2::Replace('TA/[0-9\.]+ ');

$make_ip_join_hash = ($ip, $ua, $url) -> {
    RETURN CASE
        WHEN $not_private_relay($ip)
            THEN AsStruct(
                Digest::SipHash(0, 0, (Ip::FromString($ip) || $remove_ta($ua, '') || $url)) AS hash,
                "serp" AS source
            )
        ELSE NULL
    END;
};

$ysclid_join_hash = ($ysclid) -> {
    RETURN AsStruct(
        Digest::SipHash(0, 0, $ysclid) AS hash,
        "ysclid" AS source
    );
};

$make_ysclid_join_hash = ($ysclid) -> {
    RETURN CASE
        WHEN Identifiers::IsSignificantYsclid($ysclid)
            THEN $ysclid_join_hash($ysclid)
        ELSE NULL
    END;
};

$make_ysclid_join_hash_ref = ($ysclid, $referer, $client_time) -> {
    $_check_ysclid_timeframe = ($ysclid, $client_time) -> {
        $click_time_frame = 20;
        $ysclid_time = Identifiers::TimestampYsclid($ysclid);
        $event_time = CAST($client_time AS UInt64);
        RETURN $event_time IS NOT NULL  -- correct convert from string
            AND ($event_time >= $ysclid_time)  -- non past event
            AND ($event_time - $ysclid_time < $click_time_frame)  -- inside time frame 
        ;
    };

    RETURN CASE
        WHEN $is_yandex_domain($referer)
            AND Identifiers::IsSignificantYsclid($ysclid)
            AND $_check_ysclid_timeframe($ysclid, $client_time)
            THEN $ysclid_join_hash($ysclid)
        ELSE NULL
    END;
};

DEFINE SUBQUERY $watchlog_data() AS
    select
        uniqid,
        ListNotNull([
            $make_ip_join_hash(if(clientip='0.0.0.0', clientip6, clientip), useragent, url),
            $make_ysclid_join_hash_ref(
                Url::GetCGIParam(url, "ysclid"),
                Url::GetSignificantDomain(referer),
                Metrika::ParseBrowserinfo(browserinfo)['et']
            )
        ]) as joinhash,
        CAST(unixtime as Int64) as eventtime,
        Metrika::ParseBrowserinfo(browserinfo)['u'] as fpc,
        $generate_hash_from_url_and_bi(url, browserinfo) as hash,
        UserAgent::Parse(useragent).OSVersion as osversion,
        UserAgent::Parse(useragent).OSFamily as osfamily,
        Url::GetHost(Url::Normalize(url)) as original_domain,
        $cut_domain_from_url(url) as domain,
        UserAgent::Parse(useragent).BrowserName as browser_name,
        UserAgent::Parse(useragent).BrowserVersion as browser_version,
    FROM CONCAT({{input_watch_tables}})
    where
        not UserAgent::Parse(useragent).inAppBrowser
        and $is_good_useragent(useragent)
        and counterid ?? '0' not in ('3', '48547472')
        and $https_match(url)
        and not UserAgent::Parse(useragent).isRobot
        and $is_good_domain($cut_domain_from_url(url))
        and Url::GetSignificantDomain(referer) != Url::GetSignificantDomain(url) --- not internal visit
        and $is_valid_fpc(Metrika::ParseBrowserinfo(browserinfo)['u'])
        and $not_upload_or_changed(browserinfo) = true
        and $not_gdpr(regionid)
        and ($not_private_relay(clientip6) OR Url::GetCGIParam(url, "ysclid") IS NOT NULL)
        and (not $is_valid_yuid(uniqid) or uniqid = domainuserid)
    ;
END DEFINE;

DEFINE SUBQUERY $redir_data() AS
    select
        CAST(`dict`['timestamp'] as Uint64) as eventtime,
        ListNotNull(
            ListExtend(
                [$make_ip_join_hash(String::SplitToList(`dict`['ip'],',')[0], `dict`['user_agent'], `dict`['url'])],
                ListMap(
                    Yson::ConvertToList(Yson::ParseJson(Url::Decode(`dict`["events"]))),
                    ($event) -> { RETURN $make_ysclid_join_hash(Yson::ConvertToString($event.data.ysclid)); }
                )
            )
        ) as joinhash,
        `dict`['yandexuid'] as yandexuid
    from CONCAT({{input_redir_tables}})
    where
        $is_good_useragent(`dict`['user_agent'])
        and `dict`['yandexuid'] != ""
        and NOT $is_yandex_domain(`dict`['url'])
        and $https_match(`dict`['url'])
    ;
END DEFINE;

DEFINE SUBQUERY $flatten($subq) AS
    SELECT *
    FROM $subq()
    FLATTEN LIST BY joinhash;
END DEFINE;

insert into `{{output_table}}` with truncate
select
    a.joinhash.source as source,
    a.eventtime as last_seen,
    original_domain,
    domain,
    fpc as domain_cookie,
    yandexuid as yuid,
    hash,
    osversion,
    osfamily,
    browser_name,
    browser_version,
    false as seen_you
from $flatten($watchlog_data) as a
inner join $flatten($redir_data) as b
using (joinhash);
