{% if not is_embedded %}
PRAGMA Library = 'config.sql';
PRAGMA Library = 'metrica_lib.sql';
PRAGMA Library = 'sn_id_parse_lib.sql';
{% endif %}

PRAGMA yt.DataSizePerJob = '6G';

PRAGMA yson.DisableStrict;

IMPORT {% if is_embedded %}.lib.{% endif %}config SYMBOLS
    $config
;

IMPORT {% if is_embedded %}.lib.{% endif %}metrica_lib SYMBOLS
    $fp_capture_login,
    $force_null,
    $lower_strip,
    $clean_ip,
    $ClientUserID,
    -- $ym_uid_regexp,
    $get_duid,
    $uniqid_was_changed,
    $get_domain,
    $check_phone
;

IMPORT {% if is_embedded %}.lib.{% endif %}sn_id_parse_lib SYMBOLS
    $fp_parse_vk_id_from_url,
    $fp_parse_ok_id_from_url
;


-- TODO: Python3
$ecommerce_parser = Python2::parse_ecommerce(
    Callable<
        (String?) ->
        List<
            Struct<
                action:String,
                name:String,
                category:String,
                revenue:String
            >
        >
    >,
    FileContent('wl_udf.py')
);
$decode_mmetr_id = Python2::decode_social_id(
    Callable<(String?)->String?>,
    FileContent('wl_udf.py')
);

{% if stream %}

$watchlog = AsList(
    '{{ input | join("', '") | safe }}'
);
$watchlog_soup = '{{ output }}';
$watchlog_idstorage_dir = '{{ output_idstorage_dir }}/';
$uniqid = '{{ uniqid }}';
$ecommerce = '{{ ecommerce_table }}';
$yuid_purchase_log = '{{ output_extra_data_dir }}/yuid_purchase_log/{{ uniqid }}';
$user_params_log = '{{ output_extra_data_dir }}/user_params_log/{{ uniqid }}';

{% else %}

$date = '{{ date }}';

$watchlog = AsList('{{ bswatch_log_dir }}/{{ date }}');

$yt_output_folder = '{{ graph_output_dir }}';
$indevice_output_folder = '{{ indevice_output_dir }}';

$raw_links = $yt_output_folder || '/' || $date || '/raw_links/';
$ecommerce = $yt_output_folder || '/' || $date || '/ecommerce/';
$devid_raw = $indevice_output_folder || '/' || $date || '/perfect/devid_raw_day/';
$watchlog_soup = $yt_output_folder || '/v2/soup/day/tmp/' || $date || '/' || LogSource::WATCH_LOG();
$watchlog_idstorage_dir = $yt_output_folder || '/v2/soup/day/tmp/' || $date || '/';

$yuid_purchase_log = $raw_links || 'yuid_purchase_log';
$user_params_log = $raw_links || 'user_params_log';

$get_table_with = ($cookie, $id_type, $source_type) -> {
    $cookie_dir = $yt_output_folder || '/' || $date || '/' || $cookie || '_raw';
    $table_name = $cookie || '_with_' || $id_type || '_' || $source_type;
    RETURN $cookie_dir || '/' || $table_name;
};

{% endif %}
-- ========================================================================== --
-- $date_regexp = Re2::Capture(@@.*(?P<dt>\d{4}-\d{2}-\d{2}).*@@);

$cookies = AsList('yuid', 'icookie');

-- === DEFINE ACTIONS TO PARSE WATCHLOG ==================================== --

-- YQL disallow variables as column name, so use separatly actions
-- for icookie and yuid. TODO: refactor when https://st.yandex-team.ru/YQL-5748

$watchlog_combined_all = (
    SELECT
        uniqid,
        cookiei,
        headerargs,
        domainuserid,
        useragent,
        iso_eventtime,
        url,
        referer,
        params,
        eventtime,
        passportuid,
        browserinfo,
        counterid,
    FROM EACH($watchlog)
);

$watchlog_combined = (
    SELECT
        *
    FROM $watchlog_combined_all
    WHERE $force_null(uniqid) IS NOT NULL
        AND NOT $uniqid_was_changed(browserinfo)  -- CRYPTA-8393
);


$preparsed_watchlog_all = (
SELECT
    uniqid,
    CAST($force_null(uniqid) AS String) AS yuid,
    $force_null(cookiei) AS icookie,
    passportuid,
    headerargs,
    useragent,
    browserinfo,
    counterid,
    CAST($get_duid(domainuserid, headerargs) AS String) AS duid,
    -- $date_regexp(iso_eventtime).dt AS dt,
    -- in python dt is string literal as luigi task param
    -- when try to use correct dt from logs
    -- we got fail on ci-test (cause test are not correct at all),
    -- so use date as literal $date
    {% if stream %}
    String::SplitToList(iso_eventtime, ' ')[0] as dt,
    {% else %}
    $date AS dt,
    {% endif %}
    url,
    referer,
    params,
    CAST(eventtime AS Uint64) AS ts
FROM $watchlog_combined_all
);

-- INSERT INTO @preparsed_watchlog WITH TRUNCATE
$preparsed_watchlog = (
    SELECT
        *
    FROM $preparsed_watchlog_all
    WHERE $force_null(uniqid) IS NOT NULL
        AND NOT $uniqid_was_changed(browserinfo)  -- CRYPTA-8393
);

$top_freq_icookies = 5;
-- COMMIT;

DEFINE ACTION $select_page_titles($cookie) AS
    -- parse emails from page title
    -- $cookie - user id, 'yuid' or 'icookie'
    $page_title_email_regexp = Re2::FindAndConsume(@@([_\w\d]\S+@\S+\.\w+)@@);
    $available_sources_domains = AsList(
        'e.mail.ru', 'light.mail.ru', 'mail.rambler.ru',
        'mail.ngs.ru', 'aeroflot.ru', 'lingualeo.ru');

    $parse_emails_from_titles = ($title) -> {
        $emails = $page_title_email_regexp($title);
        RETURN $emails;
    };

    $id_type = $config.ID_TYPE_EMAIL;
    $source_type = $config.ID_SOURCE_TYPE_PAGE_TITLE;

    $base_query = (
        SELECT * FROM (
            SELECT
                yuid,
                icookie,
                $parse_emails_from_titles(DictLookup(Metrika::ParseBrowserinfo(browserinfo), 't')) AS emails,
                dt AS id_date,
                ts
            FROM $preparsed_watchlog
            WHERE
                browserinfo REGEXP @@(^|\:)t\:.+$@@
                AND ListHas($available_sources_domains, Url::GetHost(url) ?? '')
        ) FLATTEN LIST BY emails AS email
    );

    {% if not stream %}

    $table = $get_table_with($cookie, $id_type, $source_type);

    INSERT INTO $table WITH TRUNCATE
    SELECT
        yuid,
        $id_type AS id_type,
        $source_type AS source_type,
        id_value,
        id_date,
        COUNT(1) AS id_count,
        TopFreq(icookie, $top_freq_icookies) AS icookies
    FROM $base_query
    GROUP BY yuid, email AS id_value, id_date
    ORDER BY yuid;

    {% endif %}

    INSERT INTO $watchlog_soup
    SELECT
        LogSource::WATCH_LOG() AS logSource,
        SourceType::PAGE_TITLE() AS sourceType,
        yuid AS id1,
        IdType::YANDEXUID() AS id1Type,
        Just(email) AS id2,
        IdType::EMAIL() AS id2Type,
        -- make nullable fields to allow python append into soup
        {% if stream %}
        AGGREGATE_LIST_DISTINCT(id_date) as dates
        {% else %}
        Nothing(Int64?) AS ts,
        Just(COUNT(1)) AS hits
        {% endif %}
    FROM $base_query
    GROUP BY yuid, email;

END DEFINE;

DEFINE ACTION $select_vmetro($cookie) AS
    -- Parse social ids from watchlog

    $mac_regexp = Re2::Capture(@@mac=(?P<g>\w{2}-\w{2}-\w{2}-\w{2}-\w{2}-\w{2})@@);
    $get_mac_from_referer = ($referer) -> {
        $mac = String::RemoveAll(
            $lower_strip($mac_regexp($referer).g), '-:');
        RETURN IF($mac IN ('', '0', '000000000000'), NULL, $mac);
    };

    $id_type = $config.ID_TYPE_MAC;
    $source_type = $config.ID_SOURCE_TYPE_VMETRO;

    $base_query = (
        SELECT * FROM (
            SELECT
                yuid,
                icookie,
                $get_mac_from_referer(referer) AS id_value,
                dt AS id_date,
                ts
            FROM $preparsed_watchlog
            WHERE Url::GetDomain(referer, 2) == 'wi-fi.ru'
        ) WHERE id_value IS NOT NULL
    );

    {% if not stream %}

    $table = $get_table_with($cookie, $id_type, $source_type);

    INSERT INTO $table WITH TRUNCATE
    SELECT
        yuid,
        $id_type AS id_type,
        $source_type AS source_type,
        id_value,
        id_date,
        COUNT(1) AS id_count,
        TopFreq(icookie, $top_freq_icookies) AS icookies
    FROM $base_query
    GROUP BY yuid, id_value, id_date
    ORDER BY yuid;

    {% endif %}

    INSERT INTO $watchlog_soup
    SELECT
        LogSource::WATCH_LOG() AS logSource,
        SourceType::VMETRO() AS sourceType,
        yuid AS id1,
        IdType::YANDEXUID() AS id1Type,
        id_value AS id2,
        IdType::MAC() AS id2Type,
        -- make nullable fields to allow python append into soup
        {% if stream %}
        AGGREGATE_LIST_DISTINCT(id_date) as dates
        {% else %}
        Nothing(Int64?) AS ts,
        Just(COUNT(1)) AS hits
        {% endif %}
    FROM $base_query
    GROUP BY yuid, id_value;

END DEFINE;

DEFINE ACTION $select_ecommerce($cookie) AS
    -- Parse ecommerce from watchlog
    $id_type = $config.ID_TYPE_PURCHASE_DOMAIN;
    $source_type = $config.ID_SOURCE_TYPE_ECOMMERCE_LOG;

    INSERT INTO $yuid_purchase_log WITH TRUNCATE
    SELECT
        yuid,
        icookie,
        ecommerce.action AS action,
        ecommerce.category AS category,
        ecommerce.name AS name,
        ecommerce.revenue AS revenue,
        url,
        id_value,
        dt
    FROM (
        SELECT
            yuid,
            icookie,
            $ecommerce_parser(params) AS ecommerce,
            url,
            Url::GetHostPort(url) ?? '' AS id_value,
            dt
        FROM $preparsed_watchlog
        WHERE params LIKE '%ecommerce%' AND params LIKE '%purchase%'
    ) FLATTEN LIST BY ecommerce
    ORDER BY yuid, id_value;

    {% if not stream %}

    $yuid_with_purchase_day = $ecommerce || 'yuid_with_purchase_day';

    COMMIT;

    INSERT INTO $yuid_with_purchase_day WITH TRUNCATE
    SELECT
        yuid,
        $id_type AS id_type,
        $source_type AS source_type,
        id_value,
        id_date,
        COUNT(1) AS id_count,
        TopFreq(icookie, $top_freq_icookies) AS icookies
    FROM $yuid_purchase_log
    GROUP BY yuid, id_value, dt AS id_date
    ORDER BY yuid, id_value;

    {% endif %}

END DEFINE;

DEFINE ACTION $parse_search_lib_uuid($cookie) AS
    -- parse search lib uuid

    $capture_uuid = Re2::Capture(@@.*uuid=(?P<uuid>\w[\w-]+\w).*@@);

    $id_type = $config.ID_TYPE_UUID;
    $source_type = $config.ID_SOURCE_TYPE_WATCH_LOG;

    $base_query = (
        SELECT
            `uuid`,
            yuid,
            ts,
            icookie,
            String::Contains(url, 'ru.yandex.searchplugin') AS click,
            String::Contains(url, 'clck.yandex.ru')
                OR String::Contains(url, 'yandex.ru/clck') AS searchplugin,
            url,
            Url::GetHost(url) AS domain,
            dt,
            useragent
        FROM (
            SELECT
                yuid,
                icookie,
                uuid_referer ?? uuid_url AS `uuid`,
                CASE
                    WHEN uuid_referer IS NOT NULL THEN referer
                    WHEN uuid_url IS NOT NULL THEN url
                    ELSE NULL
                END AS url,
                ts,
                dt,
                useragent
            FROM (
                SELECT
                    yuid,
                    icookie,
                    dt,
                    ts,
                    $capture_uuid(referer).`uuid` AS uuid_referer,
                    $capture_uuid(url).`uuid` AS uuid_url,
                    referer,
                    url,
                    useragent
                FROM $preparsed_watchlog
                WHERE (referer LIKE '%uuid%' AND Url::GetSignificantDomain(referer) LIKE '%yandex%')
                    OR (url LIKE '%uuid%' AND Url::GetSignificantDomain(url) LIKE '%yandex%')
            )
        ) WHERE yuid IS NOT NULL
            AND `uuid` IS NOT NULL
            AND url IS NOT NULL
    );

/*
    INSERT INTO $workdir_table WITH TRUNCATE
    SELECT
        `uuid`,
        yuid,
        -CAST(ts AS Int64) AS ts, -- ???
        icookie,
        click,
        searchplugin,
        url,
        domain,
        $source_type AS source_type
    FROM $base_query
    ORDER BY `uuid`, yuid, ts;
*/

    {% if not stream %}

    $workdir_table = $raw_links || 'uuid_yuid_' || $source_type;
    $table = $devid_raw || 'uuid_yuid_' || $source_type;

    INSERT INTO $table WITH TRUNCATE
    SELECT
        yuid,
        `uuid`,
        dt,
        TopFreq(icookie, $top_freq_icookies) AS icookies,
        COUNT(1) AS hits_count,
        -MAX(CAST(ts AS Int64)) AS last_ts,
        $source_type AS source_type
    FROM $base_query
    GROUP BY yuid, `uuid`, dt
    ORDER BY yuid, `uuid`, dt;

    {% endif %}

    INSERT INTO $watchlog_soup
    SELECT
        LogSource::WATCH_LOG() AS logSource,
        SourceType::APP_URL_REDIR() AS sourceType,
        yuid AS id1,
        IdType::YANDEXUID() AS id1Type,
        `uuid` AS id2,
        IdType::UUID() AS id2Type,
        -- make nullable fields to allow python append into soup
        {% if stream %}
        AGGREGATE_LIST_DISTINCT(dt) as dates
        {% else %}
        Nothing(Int64?) AS ts,
        Just(COUNT(1)) AS hits
        {% endif %}
    FROM $base_query
    WHERE UserAgent::Parse(useragent).isMobile OR UserAgent::Parse(useragent).isTablet OR UserAgent::Parse(useragent).isTV
    GROUP BY yuid, `uuid`;

    -- pass `uuid` with desktop's user agent in special table
    INSERT INTO $watchlog_soup
    SELECT
        LogSource::WATCH_LOG() AS logSource,
        SourceType::APP_URL_REDIR_DESKTOP() AS sourceType,
        yuid AS id1,
        IdType::YANDEXUID() AS id1Type,
        `uuid` AS id2,
        IdType::UUID() AS id2Type,
        -- make nullable fields to allow python append into soup
        {% if stream %}
        AGGREGATE_LIST_DISTINCT(dt) as dates
        {% else %}
        Nothing(Int64?) AS ts,
        Just(COUNT(1)) AS hits
        {% endif %}
    FROM $base_query
    WHERE NOT(UserAgent::Parse(useragent).isMobile OR UserAgent::Parse(useragent).isTablet OR UserAgent::Parse(useragent).isTV)
    GROUP BY yuid, `uuid`;

END DEFINE;

DEFINE ACTION $select_icookie($cookie) AS
    -- Parse yuid - icookie
    $base_query = (
        SELECT * FROM (
            SELECT
                yuid,
                icookie AS id_value,
                dt AS id_date,
                ts
            FROM $preparsed_watchlog
        ) WHERE id_value IS NOT NULL
    );

    INSERT INTO $watchlog_soup
    SELECT
        LogSource::WATCH_LOG() AS logSource,
        SourceType::HTTP_HEADER_COOKIE() AS sourceType,
        yuid AS id1,
        IdType::YANDEXUID() AS id1Type,
        id_value AS id2,
        IdType::ICOOKIE() AS id2Type,
        -- make nullable fields to allow python append into soup
        {% if stream %}
        AGGREGATE_LIST_DISTINCT(id_date) as dates
        {% else %}
        Nothing(Int64?) AS ts,
        Just(COUNT(1)) AS hits
        {% endif %}
    FROM $base_query
    WHERE yuid != id_value  -- filter only different yuid/icookies
    GROUP BY yuid, id_value;

END DEFINE;

DEFINE ACTION $parse_did($cookie) AS
    -- parse did yp

    $did_regexp = @@yp=[^;]*\.did\.(?P<did>[a-zA-Z0-9\-]+)@@;
    $capture_did = Re2::Capture($did_regexp);

    $base_query = (
        SELECT
            yuid,
            icookie,
            dt,
            ts,
            mmetric_devid,
            $lower_strip(UserAgent::Parse(useragent).OSFamily) AS os
        FROM (
            SELECT
                yuid,
                icookie,
                ts,
                dt,
                $capture_did(headerargs).did AS mmetric_devid,
                useragent
            FROM $preparsed_watchlog
            WHERE headerargs REGEXP $did_regexp
        ) WHERE mmetric_devid IS NOT NULL
    );

    DEFINE ACTION $parse_did_for_os($os) AS

        $source_type = CASE $os
            WHEN 'ios' THEN $config.ID_SOURCE_TYPE_WATCH_YP_IOS
            WHEN 'android' THEN $config.ID_SOURCE_TYPE_WATCH_YP_ANDROID
            ELSE $config.ID_SOURCE_TYPE_WATCH_YP_ANDROID
        END;

        $soup_source_type = CASE $os
            WHEN 'ios' THEN SourceType::WATCH_YP_COOKIE_IOS()
            WHEN 'android' THEN SourceType::WATCH_YP_COOKIE_ANDROID()
            ELSE SourceType::WATCH_YP_COOKIE_ANDROID()
        END;

        {% if not stream %}

        $mmetric_devid_yuid_table = $devid_raw || 'mmetric_devid_yuid_' || $source_type;

        INSERT INTO $mmetric_devid_yuid_table WITH TRUNCATE
        SELECT
            yuid,
            TopFreq(icookie, $top_freq_icookies) AS icookies,
            mmetric_devid,
            $date AS dt,
            COUNT(1) AS hits_count,
            MAX(CAST(ts AS Int64)) AS last_ts,
            $source_type AS source_type
        FROM $base_query
        WHERE os == $os
        GROUP BY yuid, mmetric_devid;

        {% endif %}

        -- make soup from did

        INSERT INTO $watchlog_soup
        SELECT
            LogSource::WATCH_LOG() AS logSource,
            $soup_source_type AS sourceType,
            yuid AS id1,
            IdType::YANDEXUID() AS id1Type,
            mmetric_devid AS id2,
            IdType::MM_DEVICE_ID() AS id2Type,
            -- make nullable fields to allow python append into soup
            {% if stream %}
            AGGREGATE_LIST_DISTINCT(dt) as dates
            {% else %}
            Nothing(Int64?) AS ts,
            Just(COUNT(1)) AS hits
            {% endif %}
        FROM $base_query
        WHERE os == $os
        GROUP BY yuid, mmetric_devid;

    END DEFINE;

    DO $parse_did_for_os('android');
    DO $parse_did_for_os('ios');

END DEFINE;

DEFINE ACTION $decrypt_metrica_sockets() AS
    $decryptor = SocketDecryptor::Decryptor(SecureParam('token:metrika'));

    $decrypted_sockets = (
        SELECT
            yuid,
            duid,
            Identifiers::IsValidYandexuid(yuid) as validYuid,
            $decryptor(DictLookup(Metrika::ParseBrowserinfo(browserinfo), 'di')) AS di,
            $lower_strip(UserAgent::Parse(useragent).OSFamily) AS os,
            dt,
        FROM $preparsed_watchlog_all
        WHERE browserinfo REGEXP @@(^|\:)di\:[^\:]+($|\:)@@
              AND NOT $uniqid_was_changed(browserinfo)
              AND (
                Identifiers::IsValidYandexuid(yuid)
                OR Identifiers::IsValidDuid(duid)
            )
    );

    $sockets_prepared = (
        SELECT
            if (validYuid, yuid, duid) as id,
            if (validYuid, IdType::YANDEXUID(), IdType::DUID()) as idType,
            if (os == "ios",
                SourceType::APP_METRICA_SOCKETS_IOS(),
                SourceType::APP_METRICA_SOCKETS_ANDROID()
            ) AS sourceType,
            dt,
            AsList(
                AsStruct(IdType::IDFA() as idType, di["ifa"] as id),
                AsStruct(IdType::OAID() as idType, di["huawei_aid"] as id),
                AsStruct(IdType::GAID() as idType, di["google_aid"] as id),
                AsStruct(IdType::UUID() as idType, di["uuid"] as id),
            ) as x
        FROM $decrypted_sockets
        WHERE di IS NOT NULL
    );

    INSERT INTO $watchlog_soup
    SELECT
        [dt ?? ""] as dates,
        id AS id1,
        idType AS id1Type,
        x.id AS id2,
        x.idType AS id2Type,
        LogSource::WATCH_LOG() AS logSource,
        sourceType,
    FROM $sockets_prepared
    FLATTEN LIST BY x
    WHERE x.id IS NOT NULL;

END DEFINE;

DEFINE ACTION $select_idstorage($cookie) AS
    $id_type = (CASE $cookie
                WHEN 'yuid' THEN IdType::YANDEXUID()
                WHEN 'icookie' THEN IdType::ICOOKIE()
                ELSE 'unknown'
                END);

    $base = (
        SELECT
            t.$cookie AS id,
            $id_type AS id_type,
            t.useragent AS useragent,
            t.ts as ts,
            t.dt as dt
        FROM $preparsed_watchlog as t
        where t.$cookie is not null
    );

    $grouped = (
        SELECT id, id_type, MIN(dt) AS date_begin, MAX(dt) as date_end, MAX_BY(useragent, ts) AS useragent, MAX(ts) as ts
        FROM $base
        GROUP BY id, id_type
    );

    $uatraits = (
        SELECT id, id_type, UserAgent::Parse(useragent) AS ua, date_begin, date_end, ts
        FROM $grouped
    );

    $columns = (
        SELECT id, id_type,
            ua.BrowserName as browser_name,
            ua.BrowserVersion as browser_version,
            ua.OSName as os_name,
            ua.OSFamily as os_family,
            ua.OSVersion as os_version,
            ua.isEmulator as is_emulator,
            ua.isBrowser as is_browser,
            ua.isMobile as is_mobile,
            ua.isTablet as is_tablet,
            ua.isTouch as is_touch,
            ua.isRobot as is_robot,
            ua.isTV as is_tv,
            date_begin,
            date_end,
            ts
        FROM $uatraits
    );

    {% if stream %}
    $out = $watchlog_idstorage_dir || $id_type || '/' || $uniqid;
    {% else %}
    $out = $watchlog_idstorage_dir || 'idstorage_' || $id_type;
    {% endif %}

    INSERT INTO $out WITH TRUNCATE
    SELECT * from $columns;
END DEFINE;

-- === RUN WATCHLOG PARSING ================================================ --
DEFINE ACTION $parse_watchlog_for_cookie($cookie) AS
    -- cookie_raw/cookie_with_email_page_title
    DO $select_page_titles($cookie);
    -- cookie_raw/cookie_with_mac_vmetro
    DO $select_vmetro($cookie);
    -- ecommerce/yuid_with_purchase_day
    DO $select_ecommerce($cookie);
    -- perfect/devid_raw_day/uuid_yuid_watch_log
    DO $parse_search_lib_uuid($cookie);
    -- cookie_raw/cookie_with_icookie
    DO $select_icookie($cookie);
    -- perfect/devid_raw_day/devid_yuid_...
    DO $parse_did($cookie);
END DEFINE;

DEFINE ACTION $fp_soup() AS
    $bswatch_query = (
        SELECT
            {% if stream %}
                String::SplitToList(iso_eventtime, ' ')[0] as dt,
            {% else %}
                $date AS dt,
            {% endif %}

            $fp_capture_login(headerargs).g AS login,
            $force_null($fp_parse_vk_id_from_url(referer))
                ?? $force_null($fp_parse_vk_id_from_url(url)) AS vk_com_id,
            $force_null($fp_parse_ok_id_from_url(referer))
                ?? $force_null($fp_parse_ok_id_from_url(url)) AS ok_ru_id,
            $force_null(passportuid) AS puid,
            Identifiers::NormalizeYandexuid(uniqid) AS yuid
        FROM $watchlog_combined
    );

    INSERT INTO $watchlog_soup
    SELECT
        id1,
        id1Type,
        id2,
        id2Type,
        sourceType,
        logSource,
        AGGREGATE_LIST_DISTINCT(dt) as dates
    FROM (

        SELECT
            yuid AS id1,
            puid AS id2,
            IdType::YANDEXUID() AS id1Type,
            IdType::PUID() AS id2Type,
            SourceType::PASSPORT_AUTH() AS sourceType,
            LogSource::WATCH_LOG() AS logSource,
            dt
        FROM $bswatch_query

    UNION ALL

        SELECT
            yuid AS id1,
            vk_com_id AS id2,
            IdType::YANDEXUID() AS id1Type,
            IdType::VK_ID() AS id2Type,
            SourceType::VK_APPS_REDIR() AS sourceType,
            LogSource::WATCH_LOG() AS logSource,
            dt
        FROM $bswatch_query

    UNION ALL

        SELECT
            yuid AS id1,
            ok_ru_id AS id2,
            IdType::YANDEXUID() AS id1Type,
            IdType::OK_ID() AS id2Type,
            SourceType::OK_APPS_REDIR() AS sourceType,
            LogSource::WATCH_LOG() AS logSource,
            dt
        FROM $bswatch_query

    ) WHERE id1 IS NOT NULL
        AND id2 IS NOT NULL
    GROUP BY id1, id2, id1Type, id2Type, sourceType, logSource;
END DEFINE;

DEFINE ACTION $duid_index() AS
    {% if stream %}
    $output = $watchlog_idstorage_dir || 'duid/' || $uniqid;
    {% else %}
    $output = $watchlog_idstorage_dir || 'idstorage_duid';
    {% endif %}

    INSERT INTO $output WITH TRUNCATE
    SELECT
        duid,
        yandexuid,
        counter_id,
        MAX(ts) AS last_ts,
    FROM (
        SELECT
            CAST(duid AS UInt64) ?? 0 AS duid,
            CAST(yuid AS UInt64) ?? 0 AS yandexuid,
            CAST(counterid AS UInt64) ?? 0 AS counter_id,
            ts,
        FROM $preparsed_watchlog
        WHERE True
            AND Identifiers::IsSignificantDuid(duid)
            AND Identifiers::IsSignificantYandexuid(yuid)
    ) GROUP BY
        duid,
        yandexuid,
        counter_id
    ORDER BY duid, yandexuid, counter_id;
END DEFINE;


DEFINE ACTION $parse_client_user_id_as_id($approved_domens, $id_type, $is_valid, $normalize) AS
    -- parse yandexuid

    INSERT INTO $watchlog_soup
    SELECT
        LogSource::WATCH_LOG() AS logSource,
        SourceType::METRICA_USER_PARAMS() AS sourceType,
        id1,
        IdType::YANDEXUID() AS id1Type,
        id2,
        $id_type() AS id2Type,
        {% if stream %}
        AGGREGATE_LIST_DISTINCT(dt) as dates
        {% else %}
        -- make nullable fields to allow python append into soup
        Nothing(Int64?) AS ts,
        Just(COUNT(1)) AS hits
        {% endif %}
    FROM $preparsed_watchlog
    WHERE $get_domain(url) IN $approved_domens
        AND $is_valid($ClientUserID(params))
    GROUP BY yuid AS id1, $normalize($ClientUserID(params)) AS id2;

    -- parse duid

    $duid_id_edges =
    SELECT
        LogSource::WATCH_LOG() AS logSource,
        SourceType::METRICA_USER_PARAMS() AS sourceType,
        id1,
        IdType::DUID() AS id1Type,
        id2,
        $id_type() AS id2Type,
        SOME(domain) AS domain,
        {% if stream %}
        AGGREGATE_LIST_DISTINCT(dt) as dates
        {% else %}
        -- make nullable fields to allow python append into soup
        Nothing(Int64?) AS ts,
        Just(COUNT(1)) AS hits
        {% endif %}
    FROM (
        SELECT
            CAST(duid AS String) AS id1,
            $normalize($ClientUserID(params)) AS id2,
            $get_domain(url) AS domain,
            dt,
        FROM $preparsed_watchlog_all
        WHERE
            $get_domain(url) IN $approved_domens
            AND Identifiers::IsSignificantDuid(duid)
            AND (yuid IS NULL OR yuid == duid) -- get itp slice, see more CRYPTA-14256
            AND $is_valid($ClientUserID(params))
    ) GROUP BY id1, id2;

    INSERT INTO $watchlog_soup
    SELECT
           * WITHOUT domain
    FROM $duid_id_edges;

    -- logging user_params

    INSERT INTO $user_params_log
    SELECT * FROM $duid_id_edges;

END DEFINE;

DEFINE ACTION $fill_params_soup($parsed_wl_f_rows, $source_type) AS
    $yuid_soup = (
        SELECT
            id1,
            IdType::YANDEXUID() AS id1Type,
            id2,
            id2Type,
            $source_type AS sourceType,
            LogSource::WATCH_LOG() AS logSource,
            AGGREGATE_LIST_DISTINCT(dt) as dates
        FROM $parsed_wl_f_rows()
        WHERE NOT uwc
            AND Identifiers::IsSignificantYandexuid(yuid)
            AND Identifiers::IsValid(id2Type, id2)
        GROUP BY
            yuid AS id1,
            id2,
            id2Type
    );

    $duid_soup = (
        SELECT
            id1,
            IdType::DUID() AS id1Type,
            id2,
            id2Type,
            $source_type AS sourceType,
            LogSource::WATCH_LOG() AS logSource,
            AGGREGATE_LIST_DISTINCT(dt) as dates
        FROM $parsed_wl_f_rows()
        WHERE True
            AND Identifiers::IsSignificantDuid(duid)
            AND Identifiers::IsValid(id2Type, id2)
        GROUP BY
            duid AS id1,
            id2,
            id2Type
    );

    INSERT INTO $watchlog_soup
    SELECT
        Unwrap(id1) AS id1,
        Unwrap(id2) AS id2,
        Unwrap(id1Type) AS id1Type,
        Unwrap(id2Type) AS id2Type,
        Unwrap(sourceType) AS sourceType,
        Unwrap(logSource) AS logSource,
        dates ?? [] AS dates,
    FROM (
        SELECT * FROM $yuid_soup
        UNION ALL SELECT * FROM $duid_soup
    );

END DEFINE;


DEFINE ACTION $parse_fi() AS
    -- parse yandexuid

    $type_mapping = {
        "0": IdType::PHONE_SHA256(),
        "1": IdType::EMAIL_SHA256(),
    };

    $value_decode = ($value) -> {
        RETURN String::AsciiToLower(String::HexEncode(String::Base64Decode($value)));
    };

    $fi_re = Re2::Capture(@@^\w+\((?P<type>[01])\)\w+\((?P<path>\S+)\)\w+\((?P<value>\S+)\)$@@);
    $parse_fi_rows = ($params) -> {
        $unpack_ym = ($value) -> {
            $groups = $fi_re($value);
            RETURN CASE
                WHEN $groups._0 IS NULL THEN NULL
                ELSE AsStruct(
                    $type_mapping[$groups.type] AS type,
                    $value_decode($groups.value) AS value
                )
            END;
        };

        RETURN ListNotNull(
            ListMap(
                [Yson::ConvertToString(Yson::YPath(Yson::ParseJson($params), "/__ym/fi"))],
                $unpack_ym
            )
        );
    };


    DEFINE SUBQUERY $parsed_wl_fi_rows() AS
        SELECT
            yuid,
            duid,
            fi_row.value AS id2,
            fi_row.type AS id2Type,
            uwc,
            dt,
        FROM (
            SELECT
                yuid,
                duid,
                $parse_fi_rows(params) AS fi_rows,
                $uniqid_was_changed(browserinfo) AS uwc,
                dt
            FROM $preparsed_watchlog_all
            WHERE params LIKE '%__ym%fi%'
        ) FLATTEN LIST BY fi_rows AS fi_row
    END DEFINE;

    DO $fill_params_soup($parsed_wl_fi_rows, SourceType::ANTIVISOR());

END DEFINE;


DEFINE ACTION $parse_fpp() AS
    -- parse yandexuid

    $type_mapping = {
        "phone_number": IdType::PHONE_SHA256(),
        "email": IdType::EMAIL_SHA256(),
        "yandex_cid": IdType::PUID(),
    };

    $value_decode = ($pair) -> {
        RETURN CASE
            WHEN $pair[0] == "yandex_cid" THEN $pair[1]
            ELSE String::AsciiToLower(String::HexEncode(String::Base64Decode($pair[1])))
        END;
    };

    $parse_fi_rows = ($params) -> {
        $unpack_ym = ($y_pair) -> {
            $pair = Yson::ConvertToStringList($y_pair);
            RETURN CASE
                WHEN $type_mapping[$pair[0]] IS NULL THEN NULL
                ELSE AsStruct(
                    $type_mapping[$pair[0]] AS type,
                    $value_decode($pair) AS value
                )
            END;
        };

        RETURN ListNotNull(
            ListMap(
                Yson::ConvertToList(Yson::YPath(Yson::ParseJson($params), "/__ym/fpp")),
                $unpack_ym
            )
        );
    };


    DEFINE SUBQUERY $parsed_wl_fi_rows() AS
        SELECT
            yuid,
            duid,
            fpp_row.value AS id2,
            fpp_row.type AS id2Type,
            uwc,
            dt,
        FROM (
            SELECT
                yuid,
                duid,
                $parse_fi_rows(params) AS fpp_rows,
                $uniqid_was_changed(browserinfo) AS uwc,
                dt
            FROM $preparsed_watchlog_all
            WHERE params LIKE '%__ym%fpp%'
        ) FLATTEN LIST BY fpp_rows AS fpp_row
    END DEFINE;

    DO $fill_params_soup($parsed_wl_fi_rows, SourceType::FIRST_PARTY_PARAMS());

END DEFINE;


-- EVALUATE FOR $cookie IN $cookies
--     DO $parse_watchlog_for_cookie($cookie);

$approved_domens = "{{ approved_domens_table }}";

$EMAIL_APPROVED_DOMENS = SELECT ToSet(AGGREGATE_LIST(domen)) FROM $approved_domens WHERE email;
$EMAIL_MD5_APPROVED_DOMENS = SELECT ToSet(AGGREGATE_LIST(domen)) FROM $approved_domens WHERE email_md5;
$EMAIL_SHA256_APPROVED_DOMENS = SELECT ToSet(AGGREGATE_LIST(domen)) FROM $approved_domens WHERE email_sha256;
$PHONE_APPROVED_DOMENS = SELECT ToSet(AGGREGATE_LIST(domen)) FROM $approved_domens WHERE phone;


DROP TABLE $watchlog_soup; COMMIT;
DROP TABLE $user_params_log; COMMIT;

DO $decrypt_metrica_sockets();
DO $parse_watchlog_for_cookie('yuid');
DO $select_idstorage('yuid');
DO $select_idstorage('icookie');
DO $duid_index();

DO $parse_client_user_id_as_id($EMAIL_APPROVED_DOMENS, IdType::EMAIL, Identifiers::IsValidEmail, Identifiers::NormalizeEmail);
DO $parse_client_user_id_as_id($EMAIL_MD5_APPROVED_DOMENS, IdType::EMAIL_MD5, Identifiers::IsValidEmailMd5, Identifiers::NormalizeEmailMd5);
DO $parse_client_user_id_as_id($EMAIL_SHA256_APPROVED_DOMENS, IdType::EMAIL_SHA256, Identifiers::IsValidEmailSha256, Identifiers::NormalizeEmailSha256);
DO $parse_client_user_id_as_id($PHONE_APPROVED_DOMENS, IdType::PHONE, $check_phone, Identifiers::NormalizePhone);

DO $parse_fi();
DO $parse_fpp();
DO $fp_soup();
