{% include 'yql/common.sql' %}
{% include 'yql/lib.sql' %}
{% include 'yql/domain_lib.sql' %}
{% include 'yql/parse_lib.sql' %}

{% if not is_embedded %}
PRAGMA File('libtcp_options_udf.so', 'yt://hahn/home/crypta/public/udfs/stable/libtcp_options_udf.so');
PRAGMA Udf('libtcp_options_udf.so');
{% endif %}

$unmask = ($mask, $masked_duid) -> {
    $pairs = ListZip(String::ToByteList(unwrap($masked_duid)), String::ToByteList(unwrap($mask)));
    $digits = ListMap($pairs, ($x) -> { return unwrap(cast((10 + $x.0 - $x.1) % 10 + 48 as uint8)); });

    return String::FromByteList($digits);
};

$is_valid_mask = Re2::Match(@@^[0-9]{16,21}$@@); -- https://a.yandex-team.ru/arc_vcs/crypta/graph/rtmr/lib/parse_adstat_log/unmask/unmask.cpp?rev=r9220018#L13
$extract_duid = ($uri) -> {
    $mask = Url::GetCGIParam($uri, "mask");
    $masked_duid = Url::GetCGIParam($uri, "id");

    return case
        when $mask is null then null
        when $masked_duid is null then null
        when length($mask) != length($masked_duid) then null
        when not $is_valid_mask($mask) or not $is_valid_mask($masked_duid) then null
        else $unmask($mask, $masked_duid)
    end;
};

$extract_host = ($uri) -> {
    $ref = Url::GetCGIParam($uri, "ref");
    return Url::GetHost(Url::Decode($ref));
};

$yandexuid_capture = Re2::Capture(".*; yandexuid=([1-9][0-9]{16,19});.*");
$extract_yandexuid = ($http_cookies) -> {
    return $yandexuid_capture($http_cookies)._1;
};

$test_ext_ids = AsList("ffffffffffffffffffffffffffffffff", "00000000000000000000000000000000", "0000000000000000", "ffffffffffffffff");
$get_ext_id_from_tcp_options = ($tcp_syn_options) -> {
    $parsed = TcpOptions::Parse($tcp_syn_options);
    $candidate = coalesce($parsed[40], $parsed[65]);

    return if($candidate in $test_ext_ids, null, $candidate);
};

$parsed = (
    select
        cast(cast(`timestamp` as Double) as Uint64) as ts,
        $get_ext_id_from_tcp_options(tcp_syn_options) as ext_id,
        http_user_agent as user_agent,
        $extract_yandexuid(http_cookie) as yandexuid,
        $extract_duid(request_uri) as duid,
        $extract_host(request_uri) as domain,
    from
        concat({{ input_tables }})
);

$filtered = (
    select
        ts,
        ext_id,
        if (Identifiers::IsValidYandexuid(yandexuid), yandexuid, null) as yandexuid,
        duid,
        domain as original_domain,
        user_agent,
    from
        $parsed
    where
        ext_id is not null
        and $is_valid_domain(domain)
        and domain != "yastatic.net"
        and Identifiers::IsValidDuid(duid)
        and $is_good_useragent(user_agent)
);

$max_list_values = 100;
$candidates = (
    select
        max(ts) as last_seen,
        $bestYandexuidByTs(aggregate_list_distinct(yandexuid, $max_list_values)) as uniqid,
        aggregate_list_distinct(
            if ($is_yandex_domain(original_domain), null, (original_domain, duid)),
            $max_list_values
        ) as duids,
        UserAgent::Parse(user_agent).OSVersion AS osversion,
        UserAgent::Parse(user_agent).OSFamily AS osfamily,
        UserAgent::Parse(user_agent).BrowserName as browser_name,
        UserAgent::Parse(user_agent).BrowserVersion as browser_version,
    from
        $filtered
    group by
        ext_id, user_agent
);

$yuids_duids = (
    select
        uniqid as yuid,
        "adstat" as source,
        last_seen as last_seen,
        osfamily as osfamily,
        osversion as osversion,
        browser_name as browser_name,
        browser_version as browser_version,
        $cut_domain(duids.0) as domain,
        duids.0 as original_domain,
        duids.1 as domain_cookie,
        false as seen_you
    from
        $candidates
    flatten by
        duids
    where
        uniqid is not null
);

insert into `{{output_table}}` with truncate select * from $yuids_duids order by domain_cookie;
