PRAGMA yson.DisableStrict;
{% if not is_embedded %}
PRAGMA Library = 'aggregation_lib.sql';
PRAGMA Library = 'metrica_lib.sql';
PRAGMA Library = 'ut_utils_lib.sql';
{% endif %}

IMPORT {% if is_embedded %}.lib.{% endif %}aggregation_lib SYMBOLS
    $aggregate_uniq_arrays,
    $aggregate_sum_dict,
    $aggregate_sum_dict_dicts,
    $aggregate_merge_dicts,
    $aggregate_merge_multi_dicts,
    $flat_multi_dict,
    $dump_as_dict_int
;

IMPORT {% if is_embedded %}.lib.{% endif %}metrica_lib SYMBOLS
    $strip,
    $force_null,
    $get_connection_type,
    $clean_ip,
    $dev_features,
    $parse_macs,
    $get_device_type,
    $get_main_regions,
    $get_os,

    $dict_join,
    $dict_dict_join,

    $seconds_from_table
;

IMPORT {% if is_embedded %}.lib.{% endif %}ut_utils_lib SYMBOLS
    $encode_startup
;

PRAGMA yt.TentativeTreeEligibilityMaxJobDurationRatio = '2.5';
PRAGMA yt.TentativeTreeEligibilityMinJobDuration = '{{ 1 * 60 * 1000 }}';  -- in milliseconds

-- PRAGMA yt.DataSizePerJob = '4G';
PRAGMA yt.MaxSpeculativeJobCountPerTask = '50';
-- PRAGMA yt.UseTmpfs = 'True';
PRAGMA yt.DataSizePerJob = '1G';
PRAGMA yt.DefaultOperationWeight = '2';
-- PRAGMA yt.MaxRowWeight = '32M';
-- PRAGMA yt.PublishedCompressionCodec = 'zstd_3';
PRAGMA yt.TemporaryAutoMerge = 'disabled';

{% set group_by_clause = 'GROUP BY' %}

-- $date = '{{ date }}';
$date_format = DateTime::Format("%Y-%m-%d");

-- tables to use
-- input tables
$metrica_log_tables = AsList(
    '{{ metrica_input | join("', '") | safe }}'
);
$yabro_log_tables = AsList(
    '{{ yabro_input | join("', '") | safe }}'
);
$pp_log_tables = AsList(
    '{{ pp_input | join("', '") | safe }}'
);
$cross_mobmet = "{{ cross_mobmet }}";
$metrika_params_owners = "{{ metrika_params_owners }}";

-- output tables
$metrica_soup = "{{ output }}";
$dev_info_table = "{{ output_extra_data_dir }}/{{ date }}/dev_info_table/{{ uniqid }}";
$uuid_info_table = "{{ output_extra_data_dir }}/{{ date }}/uuid_info_table/{{ uniqid }}";
$fuzzy2_metrica = "{{ output_extra_data_dir }}/{{ date }}/fuzzy2_metrica/{{ uniqid }}";
$am_log_table = "{{ output_extra_data_dir }}/{{ date }}/am_log_table/{{ uniqid }}";

-- ========================================================================= --
-- for custom user attributes

{% include 'for_custom_user_attributes.sql.j2' %}

-- ========================================================================= --

$clean = ($type, $value) -> {
    RETURN IF(Identifiers::IsSignificant($type, $value), Identifiers::Normalize($type, $value), null);
};

$metrica_required_fields = (
    SELECT
        _logfeller_timestamp AS ts,
        (_logfeller_timestamp - (_logfeller_timestamp % 600)) AS timestamp_10m,
        CAST(DeviceIDHash AS String) AS mmetric_device_id_hash,
        $clean(IdType::MM_DEVICE_ID(), DeviceID) AS mmetric_device_id,
        $clean(IdType::IDFA(), OriginalDeviceID) AS idfa,
        $clean(IdType::IFV(), IFV) AS ifv,
        $clean(IdType::ANDROID_ID(), AndroidID) AS android_id,
        $clean(IdType::GAID(), ADVID) AS google_adv_id,
        $clean(IdType::OAID(), OAID) AS open_id,
        Imei ?? [] AS imeis,
        SimCards_OperatorsNames ?? [] AS simcards_operators_names,

        ListZip(
            CAST(SimCards_CountriesCodes AS List<Uint64>) ?? [],
            CAST(SimCards_OperatorsIDs AS List<Uint64>) ?? []
        ) AS simcards_operators_ids,

        -- Os info
        $strip(Model) AS model,
        $strip(Manufacturer) AS manufacturer,
        $get_os(OperatingSystem) AS os,
        $strip(OSVersion) AS os_version,
        $get_device_type(DeviceType) AS device_type,
        CAST(ScreenWidth AS Int64) AS screen_width,
        CAST(ScreenHeight AS Int64) AS screen_height,

        Locale AS locale,
        $get_connection_type(ConnectionType ?? 0) AS connection_type,
        CAST(RegionID AS Int32) AS region_id,

        IF(
            EventType == 8, -- "EVENT_IDENTITY",
            Yson::ConvertToStringList(
                Yson::YPath(
                    Yson::ParseJson(EventValue),
                    "/dfid/apps/names"
                )
            )
        ) ?? [] AS event_apps,
        NetworksInterfaces_Macs,
        NetworksInterfaces_Names,

        `UUID` AS `uuid`,
        AppVersionName AS app_version,
        APIKey AS api_key,
        AppID AS app_id,

        $clean_ip(Ip::ToString(ClientIP)) AS ip,
        ProfileID AS profile_id, -- Custom profile id of user issued by the seller an application developer via SDK setUserProfileID()

        -- Custom Attributes https://appmetrica.yandex.ru/docs/data-collection/profile-attributes.html#custom
        IF(ListLength(CustomAttribute_Ids) > 0 AND APIKey IN $EMAIL_PHONE_CUSTOM_ATTRIBUTE_APPROVED_APIKey, CustomAttribute_Ids, ListCreate(Uint64)) AS custom_attribute_ids,
        IF(ListLength(CustomAttribute_Ids) > 0 AND APIKey IN $EMAIL_PHONE_CUSTOM_ATTRIBUTE_APPROVED_APIKey, CustomAttribute_Types, ListCreate(Uint32)) AS custom_attribute_types,
        IF(ListLength(CustomAttribute_Ids) > 0 AND APIKey IN $EMAIL_PHONE_CUSTOM_ATTRIBUTE_APPROVED_APIKey, CustomAttribute_StringValues, ListCreate(String)) AS custom_attribute_strings,
        IF(ListLength(CustomAttribute_Ids) > 0 AND APIKey IN $EMAIL_PHONE_CUSTOM_ATTRIBUTE_APPROVED_APIKey, CustomAttribute_NumberValues, ListCreate(Double)) AS custom_attribute_doubles,

    FROM EACH($metrica_log_tables)
    -- CRYPTR-1533
    WHERE AppID != 'ru.yandex.botan'
        {% if filter_system_events|default(True) %}
        AND ((EventSource ?? 0) != 3)
        AND ((
                ((EventType ?? 0) NOT IN {11, 4, 15, 31, 32})
                -- 'EVENT_STATBOX'
                -- 'EVENT_CLIENT'
                -- 'EVENT_%_NOTIFICATION'
            )
            OR (EventName LIKE 'AM_System AM info%')
        ){% endif %}
);

DEFINE SUBQUERY $UrlOpenEvent($logs, $api_keys) AS
    $get_uopen_yuid = ($EventValue) -> {
        $yoptions = Yson::Options(False AS Strict, True AS AutoConvert);
        $parsed = Yson::ParseJson($EventValue, $yoptions);
        $yuid = Yson::LookupString($parsed, "yandexuid", $yoptions);
        $ru = Yson::LookupString($parsed, "yandexuid.ru", $yoptions);
        -- IsValid
        RETURN CASE
            WHEN Identifiers::IsValidYandexuid($ru) THEN $ru
            WHEN Identifiers::IsValidYandexuid($yuid) THEN $yuid
            ELSE "0"
        END;
    };

    SELECT
        $clean(IdType::MM_DEVICE_ID(), DeviceID) AS mmetric_device_id,
        $get_uopen_yuid(EventValue) AS yandexuid,
        $date_format(DateTime::FromSeconds(
            Unwrap(CAST(_logfeller_timestamp AS UInt32))
        )) AS dt,

    FROM EACH($logs)
    WHERE
        EventType == 4 -- 'EVENT_CLIENT'
        AND APIKey IN $api_keys AND EventName == 'url opened';
END DEFINE;

DEFINE SUBQUERY $YaBroEvents() AS
    SELECT * FROM $UrlOpenEvent(
        $yabro_log_tables,
        [
            19531, 19534,            -- yabro ios
            106400, 106450, 149390,  -- yabro android
        ]
    );
END DEFINE;

DEFINE SUBQUERY $PpEvents() AS
    SELECT * FROM $UrlOpenEvent(
        $pp_log_tables,
        [
            10321,
            372509,
            10318,
        ]
    );
END DEFINE;

$parse_log = (
-- Parse required fields from app metrica log
    SELECT
    -- <DEV INFO :: BUILD>
        -- date and timestamp info
        ts,
        timestamp_10m,
        -- get device ids
        mmetric_device_id_hash,
        mmetric_device_id,
        idfa,
        ifv,
        android_id,
        open_id,
        google_adv_id,
        imeis,
        simcards_operators_names,
        simcards_operators_ids,
        -- get user agent info
        model,
        manufacturer,
        os,
        os_version,
        device_type,
        screen_width,
        screen_height,
        $encode_startup(
            os,
            os_version,
            manufacturer,
            device_type,
            model
        ) AS ua_profile,
        -- other details
        locale,
        connection_type,
        IF(connection_type == 'cell', timestamp_10m) AS connection_cell_flag,
        IF(connection_type == 'wifi', timestamp_10m) AS connection_wifi_flag,
        region_id,
        -- dev features
        $dev_features(ts, app_id) AS features,
        -- Null AS features,
    -- </DEV INFO :: BUILD>

    -- <DEV INFO :: PARSE APPS>
        event_apps,
        app_id,
        -- EventValue AS metrica_event_value,
    -- </DEV INFO :: PARSE APPS>

    -- <DEV INFO :: PARSE MAC ADDR>
        $parse_macs(NetworksInterfaces_Macs, NetworksInterfaces_Names) AS macs,
    -- </DEV INFO :: PARSE MAC ADDR

    -- <UUID application info>
        `uuid`,
        app_version,
        api_key,
    -- </UUID application info>

        ip,

        profile_id,

    -- <Custom Attributes>
        custom_attribute_ids,
        custom_attribute_types,
        custom_attribute_strings,
        custom_attribute_doubles,
    -- </Custom Attributes>

    FROM $metrica_required_fields
);

$parsed_app_metrica = (
SELECT
    idfa,
    ifv,
    open_id,
    google_adv_id,
    mmetric_device_id,
    mmetric_device_id_hash,
    COALESCE(
        idfa,
        ifv,
        open_id,
        google_adv_id,
        mmetric_device_id
    ) AS device_id,

    Unwrap(ts) AS ts,
    $date_format(DateTime::FromSeconds(
        Unwrap(CAST(ts AS UInt32))
    )) AS dt,

    timestamp_10m,

    android_id,
    imeis,
    simcards_operators_names,
    simcards_operators_ids,

    model,
    manufacturer,
    os,
    os_version,
    device_type,
    screen_width,
    screen_height,
    ua_profile,
    locale,

    -- connection_type,
    connection_cell_flag,
    connection_wifi_flag,
    macs ?? [] AS macs,

    features,

    IF(
        app_id IS NOT NULL,
        ListExtend(event_apps, AsList(Unwrap(app_id))),
        event_apps
    ) AS metrica_apps,
    app_id,

    `uuid`,
    app_version,
    api_key,
    ip,

    profile_id,

    custom_attribute_ids,
    custom_attribute_types,
    custom_attribute_strings,
    custom_attribute_doubles,

    region_id
FROM $parse_log
);

PRAGMA yt.DataSizePerJob = '2G';

-- enable compression to default
-- PRAGMA yt.PublishedCompressionCodec = 'zstd_4';

-- === Final parse ========================================================= --

$uuid_info = (
    SELECT
        `uuid` AS id,
        'uuid' AS id_type,
        'mm' AS source,

        `uuid` AS `uuid`,
        MAX_BY(app_id, IF(app_id IS NOT Null, ts)) AS app_id,
        MAX_BY(app_version, IF(app_version IS NOT Null, ts)) AS app_version,
        MAX_BY(os, IF(os IS NOT Null, ts)) AS os,

        MAX_BY(device_id, IF(device_id IS NOT Null, ts)) AS device_id,
        MAX_BY(idfa, IF(idfa IS NOT Null, ts)) AS idfa,
        MAX_BY(ifv, IF(ifv IS NOT Null, ts)) AS ifv,
        MAX_BY(android_id, IF(android_id IS NOT Null, ts)) AS android_id,
        MAX_BY(open_id, IF(open_id IS NOT Null, ts)) AS open_id,
        MAX_BY(google_adv_id, IF(google_adv_id IS NOT Null, ts)) AS google_adv_id,
        MAX_BY(mmetric_device_id_hash, IF(mmetric_device_id_hash IS NOT Null, ts)) AS mmetric_device_id_hash,
        MAX_BY(mmetric_device_id, IF(mmetric_device_id IS NOT Null, ts)) AS mmetric_device_id,
        MAX_BY(ua_profile, IF(ua_profile IS NOT Null, ts)) AS ua_profile,

        $dump_as_dict_int(
            AGGREGATE_BY(
                api_key,
                $aggregate_sum_dict
            )
        ) AS api_keys,

        AGGREGATE_LIST_DISTINCT(dt) AS dates,
        MAX(ts) AS ts,

    FROM $parsed_app_metrica
    WHERE `uuid` IS NOT Null
        AND device_id IS NOT Null
    {{ group_by_clause }} `uuid`
);

/* ======================================================================= */

$to_dict_skip_nulls = ($k, $v) -> {
    RETURN IF($k IS NOT Null AND $v IS NOT Null,
        AsDict((Unwrap($k), Unwrap($v))),
        DictCreate(OptionalItemType(TypeOf($k)), OptionalItemType(TypeOf($v)))
    );
};

DEFINE ACTION $group_device_info($group_by) AS

    $device_info_reduce = (
        SELECT
            id,  -- grop by idfa/gaid basd on $group_by param
            MAX(ts) AS ts,

            AGGREGATE_LIST_DISTINCT(dt) AS dates,

            MAX_BY(idfa, IF(idfa IS NOT Null, ts)) AS idfa,
            MAX_BY(ifv, IF(ifv IS NOT Null, ts)) AS ifv,
            MAX_BY(android_id, IF(android_id IS NOT Null, ts)) AS android_id,
            MAX_BY(open_id, IF(open_id IS NOT Null, ts)) AS open_id,
            MAX_BY(google_adv_id, IF(google_adv_id IS NOT Null, ts)) AS google_adv_id,

            AGGREGATE_BY(
                $to_dict_skip_nulls(mmetric_device_id, mmetric_device_id_hash),
                $aggregate_merge_dicts
            ) AS mmetric_device_ids_to_hash,

            AGGREGATE_BY(metrica_apps ?? [], $aggregate_uniq_arrays) ?? [] AS metrica_apps,

            AGGREGATE_BY(simcards_operators_names ?? [], $aggregate_uniq_arrays) ?? [] AS simcards_operators_names,
            AGGREGATE_BY(
                ToMultiDict(ListZip(simcards_operators_ids, simcards_operators_names)),
                $aggregate_merge_multi_dicts
            ) AS simcards_operators,

            MAX_BY(device_type, IF(device_type IS NOT Null, ts)) AS device_type,
            MAX_BY(manufacturer, IF(manufacturer IS NOT Null, ts)) AS manufacturer,
            MAX_BY(model, IF(model IS NOT Null, ts)) AS model,
            MAX_BY(os, IF(os IS NOT Null, ts)) AS os,
            MAX_BY(os_version, IF(os_version IS NOT Null, ts)) AS os_version,
            MAX_BY(screen_width, IF(screen_width IS NOT Null, ts)) AS screen_width,
            MAX_BY(screen_height, IF(screen_height IS NOT Null, ts)) AS screen_height,
            MAX_BY(ua_profile, IF(ua_profile IS NOT Null, ts)) AS ua_profile,
            MAX_BY(locale, IF(locale IS NOT Null, ts)) AS locale,

            ListSort(AGGREGATE_BY(macs ?? [], $aggregate_uniq_arrays) ?? []) AS macs,

            AsDict(
                ('cell', HyperLogLog(connection_cell_flag, 6) ?? 0),
                ('wifi', HyperLogLog(connection_wifi_flag, 6) ?? 0)
            ) AS connection_hist,

            AGGREGATE_BY(
                region_id,
                $aggregate_sum_dict
            ) AS region_ids,

            $get_main_regions(
                AGGREGATE_BY(
                    region_id,
                    $aggregate_sum_dict
                )
            ) AS main_regions,

            AGGREGATE_BY(
                features,
                $aggregate_sum_dict_dicts
            ) AS features

        FROM $parsed_app_metrica
        WHERE (
            CASE $group_by
                WHEN 'idfa' THEN (
                    idfa IS NOT Null
                    AND google_adv_id IS Null
                )
                WHEN 'ifv' THEN (
                    idfa IS Null
                    AND ifv IS NOT Null
                    AND google_adv_id IS Null
                )
                WHEN 'gaid' THEN (
                    idfa IS Null
                    AND ifv IS Null
                    AND google_adv_id IS NOT Null
                )
                WHEN 'oaid' THEN (
                    idfa IS Null
                    AND ifv IS Null
                    AND google_adv_id IS Null
                    AND open_id IS NOT Null
                )
                WHEN 'mm_device_id' THEN (
                    idfa IS Null
                    AND ifv IS Null
                    AND google_adv_id IS Null
                    AND open_id IS Null
                    AND mmetric_device_id IS NOT Null
                )
                ELSE False
            END
        )
        {{ group_by_clause }}
            CASE $group_by
                WHEN 'idfa' THEN idfa
                WHEN 'ifv'  THEN ifv
                WHEN 'gaid' THEN google_adv_id
                WHEN 'oaid' THEN open_id
                ELSE mmetric_device_id
            END AS id
    );

    $device_info = (
        SELECT
            'mm' AS source,
            $group_by AS id_type,

            id,
            id AS device_id,

            idfa,
            ifv,
            android_id,
            open_id,
            google_adv_id,

            Yson::Serialize(Yson::From(mmetric_device_ids_to_hash)) AS mmetric_device_ids_to_hash,

            ListSort(DictKeys(mmetric_device_ids_to_hash)) AS mmetric_device_ids,
            ListSort(metrica_apps) AS metrika_apps,
            ListSort(simcards_operators_names) AS simcards_operators_names,
            ToMultiDict($flat_multi_dict(simcards_operators)) ?? {} AS simcards_operators,

            $dict_dict_join(features, ';', '-', ',', ':') ?? '' AS features,
            -- '' AS features,

            device_type,
            manufacturer,
            model,
            os,
            os_version,
            screen_width,
            screen_height,
            locale,
            ua_profile,
            macs,

            $dict_join(connection_hist, ',', ':') AS connection_hist,
            $dump_as_dict_int(region_ids) AS region_ids,

            dates,
            ts,

            main_regions.main_region_country AS main_region_country,
            main_regions.main_region_obl AS main_region_obl,
            main_regions.main_region_city AS main_region_city,
            main_regions.main_region AS main_region

        FROM $device_info_reduce AS device_info
    );

    INSERT INTO $dev_info_table
    SELECT * FROM $device_info
    WHERE Identifiers::IsValid(id_type, id);

END DEFINE;

DEFINE ACTION $make_metrica_soup() AS
    -- Filter Soup tables

    $idfa_mm_device_id = (
        SELECT
            idfa AS id1,
            IdType::IDFA() AS id1Type,
            mmetric_device_id AS id2,
            IdType::MM_DEVICE_ID() AS id2Type,

            AGGREGATE_LIST_DISTINCT(dt) AS dates,
            LogSource::METRIKA_MOBILE_LOG() AS logSource,
            SourceType::APP_METRICA() AS sourceType
        FROM $parsed_app_metrica
        WHERE
            mmetric_device_id IS NOT Null
            AND idfa IS NOT Null
        {{ group_by_clause }} idfa, mmetric_device_id
    );

    $ifv_mm_device_id = (
        SELECT
            ifv AS id1,
            IdType::IFV() AS id1Type,
            mmetric_device_id AS id2,
            IdType::MM_DEVICE_ID() AS id2Type,

            AGGREGATE_LIST_DISTINCT(dt) AS dates,
            LogSource::METRIKA_MOBILE_LOG() AS logSource,
            SourceType::APP_METRICA() AS sourceType
        FROM $parsed_app_metrica
        WHERE
            mmetric_device_id IS NOT Null
            AND ifv IS NOT Null
        {{ group_by_clause }} ifv, mmetric_device_id
    );

    $gaid_mm_device_id = (
        SELECT
            google_adv_id AS id1,
            IdType::GAID() AS id1Type,
            mmetric_device_id AS id2,
            IdType::MM_DEVICE_ID() AS id2Type,

            AGGREGATE_LIST_DISTINCT(dt) AS dates,
            LogSource::METRIKA_MOBILE_LOG() AS logSource,
            SourceType::APP_METRICA() AS sourceType
        FROM $parsed_app_metrica
        WHERE
            mmetric_device_id IS NOT Null
            AND google_adv_id IS NOT Null
        {{ group_by_clause }} google_adv_id, mmetric_device_id
    );

    $uuid_mm_device_id = (
        SELECT
            mmetric_device_id AS id1,
            IdType::MM_DEVICE_ID() AS id1Type,
            `uuid` AS id2,
            IdType::UUID() AS id2Type,

            AGGREGATE_LIST_DISTINCT(dt) AS dates,
            LogSource::METRIKA_MOBILE_LOG() AS logSource,
            SourceType::APP_METRICA() AS sourceType
        FROM $parsed_app_metrica
        WHERE
            mmetric_device_id IS NOT Null
            AND `uuid` IS NOT Null
        {{ group_by_clause }} `uuid`, mmetric_device_id
    );

    $mac_mm_device_id = (
        SELECT
            mmetric_device_id AS id1,
            IdType::MM_DEVICE_ID() AS id1Type,
            mac AS id2,
            IdType::MAC() AS id2Type,

            AGGREGATE_LIST_DISTINCT(dt) AS dates,
            LogSource::METRIKA_MOBILE_LOG() AS logSource,
            SourceType::APP_METRICA() AS sourceType
        FROM $parsed_app_metrica
        FLATTEN LIST BY macs AS mac
        WHERE
            mmetric_device_id IS NOT Null
            AND mac IS NOT Null
        {{ group_by_clause }} mac, mmetric_device_id
    );

    $android_mm_device_id = (
        SELECT
            mmetric_device_id AS id1,
            IdType::MM_DEVICE_ID() AS id1Type,
            android_id AS id2,
            IdType::ANDROID_ID() AS id2Type,

            AGGREGATE_LIST_DISTINCT(dt) AS dates,
            LogSource::METRIKA_MOBILE_LOG() AS logSource,
            SourceType::APP_METRICA() AS sourceType
        FROM $parsed_app_metrica
        WHERE
            mmetric_device_id IS NOT Null
            AND android_id IS NOT Null
        {{ group_by_clause }} android_id, mmetric_device_id
    );

    $imei_mm_device_id = (
        SELECT
            mmetric_device_id AS id1,
            IdType::MM_DEVICE_ID() AS id1Type,
            imei AS id2,
            IdType::IMEI() AS id2Type,

            AGGREGATE_LIST_DISTINCT(dt) AS dates,
            LogSource::METRIKA_MOBILE_LOG() AS logSource,
            SourceType::APP_METRICA() AS sourceType
        FROM $parsed_app_metrica
        FLATTEN LIST BY imeis AS imei
        WHERE
            mmetric_device_id IS NOT Null
            AND imei IS NOT Null
        {{ group_by_clause }} imei, mmetric_device_id
    );

    $yuid_mm_device_id_yb = (
        SELECT
            yandexuid AS id1,
            IdType::YANDEXUID() AS id1Type,
            mmetric_device_id AS id2,
            IdType::MM_DEVICE_ID() AS id2Type,

            AGGREGATE_LIST_DISTINCT(dt) AS dates,
            LogSource::METRIKA_MOBILE_LOG() AS logSource,
            SourceType::YABRO_EVENT() AS sourceType
        FROM $YaBroEvents()
        WHERE
            mmetric_device_id IS NOT Null
            AND yandexuid IS NOT Null
        {{ group_by_clause }} yandexuid, mmetric_device_id
    );

    $yuid_mm_device_id_pp = (
        SELECT
            yandexuid AS id1,
            IdType::YANDEXUID() AS id1Type,
            mmetric_device_id AS id2,
            IdType::MM_DEVICE_ID() AS id2Type,

            AGGREGATE_LIST_DISTINCT(dt) AS dates,
            LogSource::METRIKA_MOBILE_LOG() AS logSource,
            SourceType::PP_EVENT() AS sourceType
        FROM $PpEvents()
        WHERE
            mmetric_device_id IS NOT Null
            AND yandexuid IS NOT Null
        {{ group_by_clause }} yandexuid, mmetric_device_id
    );

    $oaid_mm_device_id = (
        SELECT
            open_id AS id1,
            IdType::OAID() AS id1Type,
            mmetric_device_id AS id2,
            IdType::MM_DEVICE_ID() AS id2Type,

            AGGREGATE_LIST_DISTINCT(dt) AS dates,
            LogSource::METRIKA_MOBILE_LOG() AS logSource,
            SourceType::APP_METRICA() AS sourceType
        FROM $parsed_app_metrica
        WHERE
            mmetric_device_id IS NOT Null
            AND open_id IS NOT Null
        {{ group_by_clause }} open_id, mmetric_device_id
    );

    ---------------------------------------
    -- Make edges email-uuid from custom profile_id

    $profile_email_uuid = (
        SELECT
            id1,
            IdType::EMAIL() AS id1Type,
            `uuid` AS id2,
            IdType::UUID() AS id2Type,

            AGGREGATE_LIST_DISTINCT(dt) AS dates,
            LogSource::METRIKA_MOBILE_LOG() AS logSource,
            SourceType::APP_METRICA_USER_PROFILE() AS sourceType
        FROM $parsed_app_metrica
        WHERE
            `uuid` IS NOT Null
            AND Identifiers::IsValidEmail(profile_id)
        {{ group_by_clause }} Identifiers::NormalizeEmail(profile_id) AS id1, `uuid`
    );

    ---------------------------------------
    -- Make edges phone-uuid from custom profile_id

    $profile_phone_uuid = (
        SELECT
            id1,
            IdType::PHONE() AS id1Type,
            `uuid` AS id2,
            IdType::UUID() AS id2Type,

            AGGREGATE_LIST_DISTINCT(dt) AS dates,
            LogSource::METRIKA_MOBILE_LOG() AS logSource,
            SourceType::APP_METRICA_USER_PROFILE() AS sourceType
        FROM $parsed_app_metrica
        WHERE
            `uuid` IS NOT NULL
            AND api_key IN $PHONE_PROFILEID_APPROVED_APIKey
            AND $is_valid_phone(profile_id)
        {{ group_by_clause }} Identifiers::NormalizePhone(profile_id) AS id1, `uuid`
    );

    ---------------------------------------
    -- Make edges email-uuid from custom attributes

    $filter_emails = ($ids, $strings) -> {
        $structs = ListMap(
            ListZipAll($ids, $strings),
            ($t) -> ( <|
               id: $t.0, -- UInt64
               value: $t.1, -- string
            |> )
        );
        RETURN ListFilter($structs, ($s) -> ( $s.id IN $EMAIL_CUSTOM_ATTRIBUTE_IDS AND Identifiers::IsValidEmail($s.value) ));
    };

    $get_emails = ($ids, $strings) -> {
        RETURN ListMap(
            $filter_emails($ids, $strings),
            ($s) -> ( Identifiers::NormalizeEmail($s.value) )
        );
    };

    $get_email_ids = ($ids, $strings) -> {
        RETURN ListMap(
            $filter_emails($ids, $strings),
            ($s) -> ( <| id: $s.id, name: $EMAIL_CUSTOM_ATTRIBUTE_IDS[$s.id] |> )
        );
    };

    $custom_emails = (
    SELECT DISTINCT
        dt,
        `uuid`,
        email,
    FROM (
        SELECT
            dt,
            `uuid`,
            $get_emails(custom_attribute_ids, custom_attribute_strings) AS custom_emails
        FROM $parsed_app_metrica
        WHERE `uuid` IS NOT NULL
            AND api_key IN $EMAIL_CUSTOM_ATTRIBUTE_APPROVED_APIKey
            AND ListLength(custom_attribute_ids) > 0
            AND ListLength($get_email_ids(custom_attribute_ids, custom_attribute_strings)) > 0
    )
    FLATTEN LIST BY custom_emails AS email
    );


    $custom_attribute_email_uuid = (
        SELECT
            email AS id1,
            IdType::EMAIL() AS id1Type,
            `uuid` AS id2,
            IdType::UUID() AS id2Type,

            AGGREGATE_LIST_DISTINCT(dt) AS dates,
            LogSource::METRIKA_MOBILE_LOG() AS logSource,
            SourceType::APP_METRICA_CUSTOM_ATTRIBUTE() AS sourceType
        FROM $custom_emails
        {{ group_by_clause }} email, `uuid`
    );

    ---------------------------------------
    -- Make edges phone_uuid from custom attributes

    $filter_phones = ($ids, $types, $strings, $numbers) -> {
        $structs = ListMap(
            ListZipAll($ids, $types, $strings, $numbers),
            ($t) -> ( <|
               id: $t.0, -- UInt64
               value: IF($t.1 == 1, CAST($t.3 AS String), $t.2) -- string
               --  type = $t.1: 0=string 1=number 2=bool 3=counter 4=price
            |> )
        );
        RETURN ListFilter($structs, ($s) -> ( $s.id IN $PHONE_CUSTOM_ATTRIBUTE_IDS AND $is_valid_phone($s.value) ));
    };

    $get_phones = ($ids, $types, $strings, $numbers) -> {
        RETURN ListMap(
            $filter_phones($ids, $types, $strings, $numbers),
            ($s) -> ( Identifiers::NormalizePhone($s.value) )
        );
    };

    $get_phone_ids = ($ids, $types, $strings, $numbers) -> {
        RETURN ListMap(
            $filter_phones($ids, $types, $strings, $numbers),
            ($s) -> ( <| id: $s.id, name: $PHONE_CUSTOM_ATTRIBUTE_IDS[$s.id] |> )
        );
    };


    $custom_phones = (
    SELECT DISTINCT
        dt,
        `uuid`,
        phone
    FROM (
        SELECT
            dt,
            `uuid`,
            $get_phones(custom_attribute_ids, custom_attribute_types, custom_attribute_strings, custom_attribute_doubles) AS custom_phones
        FROM $parsed_app_metrica
        WHERE `uuid` IS NOT NULL
            AND api_key IN $PHONE_CUSTOM_ATTRIBUTE_APPROVED_APIKey
            AND ListLength(custom_attribute_ids) > 0
            AND ListLength($get_phone_ids(custom_attribute_ids, custom_attribute_types, custom_attribute_strings, custom_attribute_doubles)) > 0
    )
    FLATTEN LIST BY custom_phones AS phone
    );

    $custom_attribute_phone_uuid = (
        SELECT
            phone AS id1,
            IdType::PHONE() AS id1Type,
            `uuid` AS id2,
            IdType::UUID() AS id2Type,

            AGGREGATE_LIST_DISTINCT(dt) AS dates,
            LogSource::METRIKA_MOBILE_LOG() AS logSource,
            SourceType::APP_METRICA_CUSTOM_ATTRIBUTE() AS sourceType
        FROM $custom_phones
        {{ group_by_clause }} phone, `uuid`
    );

    ---------------------------------------
    -- Make cross profile edges yandexuid-uuid

    $filtered_am_profile =
    SELECT
        am.`uuid` AS `uuid`,
        am.profile_id AS ProfileID,
        am.dt AS dt,
        mobmet.CounterID AS CounterID,
    FROM (
        SELECT
            `uuid`,
            profile_id,
            app_id AS AppID,
            os AS OperatingSystem,
            dt,
        FROM $parsed_app_metrica
            WHERE `uuid` IS NOT NULL
            AND $force_null(profile_id) IS NOT NULL
            AND os IN {"android", "ios"}
    ) AS am
    JOIN $cross_mobmet AS mobmet
    USING(AppID, OperatingSystem);

    $cross_profile_edges =
    SELECT
        metrika.yandexuid AS yandexuid,
        appmetrica.`uuid` AS `uuid`,
        appmetrica.dt AS dt,
    FROM $metrika_params_owners AS metrika
    JOIN $filtered_am_profile AS appmetrica
    USING(ProfileID, CounterID);

    $yandexuid_uuid_cross_profile =
    SELECT
        yandexuid AS id1,
        IdType::YANDEXUID() AS id1Type,
        `uuid` AS id2,
        IdType::UUID() AS id2Type,

        AGGREGATE_LIST_DISTINCT(dt) AS dates,
        LogSource::METRIKA_MOBILE_LOG() AS logSource,
        SourceType::CROSS_PROFILE() AS sourceType
    FROM $cross_profile_edges
    {{ group_by_clause }} yandexuid, `uuid`;

    ---------------------------------------

    INSERT INTO $metrica_soup WITH TRUNCATE
        SELECT * FROM (SELECT ['{{ date }}'] AS dates LIMIT 0)
    {% if metrica_input %}
        UNION ALL SELECT * FROM $idfa_mm_device_id
        UNION ALL SELECT * FROM $ifv_mm_device_id
        UNION ALL SELECT * FROM $oaid_mm_device_id
        UNION ALL SELECT * FROM $gaid_mm_device_id
        UNION ALL SELECT * FROM $uuid_mm_device_id
        UNION ALL SELECT * FROM $mac_mm_device_id
        UNION ALL SELECT * FROM $android_mm_device_id
        UNION ALL SELECT * FROM $imei_mm_device_id
        UNION ALL SELECT * FROM $profile_email_uuid
        UNION ALL SELECT * FROM $profile_phone_uuid
        UNION ALL SELECT * FROM $custom_attribute_email_uuid
        UNION ALL SELECT * FROM $custom_attribute_phone_uuid
        UNION ALL SELECT * FROM $yandexuid_uuid_cross_profile
    {% endif %}
    {% if yabro_input %}
        UNION ALL SELECT * FROM $yuid_mm_device_id_yb
    {% endif %}
    {% if pp_input %}
        UNION ALL SELECT * FROM $yuid_mm_device_id_pp
    {% endif %}
    ;

END DEFINE;

/*/ ======================================================================= /*/

DO $make_metrica_soup();

{% if metrica_input %}
DO $group_device_info(IdType::IDFA());
DO $group_device_info(IdType::IFV());
DO $group_device_info(IdType::OAID());
DO $group_device_info(IdType::GAID());
DO $group_device_info(IdType::MM_DEVICE_ID());

/*/ ======================================================================= /*/

-- INSERT INTO $dev_info_table WITH TRUNCATE
-- SELECT * FROM CONCAT(
--     @device_info_idfa,
--     @device_info_gaid,
--     @device_info_mm_device_id
-- ) -- ORDER BY id, id_type, ts
-- ;

INSERT INTO $uuid_info_table WITH TRUNCATE
SELECT * FROM $uuid_info
WHERE Identifiers::IsValid(id_type, id)
-- ORDER BY id, id_type, ts
;

-- Filter AM LOG
INSERT INTO $am_log_table WITH TRUNCATE
SELECT * FROM (
    SELECT
        $strip(ADVID) AS device_id,
        IdType::GAID() AS id_type,
        EventName,
        EventValue,
        DeviceID,
        _logfeller_timestamp,
        _logfeller_timestamp AS `timestamp`
    FROM EACH($metrica_log_tables)
    WHERE EventName LIKE 'AM_System AM info%'
        AND $strip(ADVID) IS NOT Null

    UNION ALL

    SELECT
        $strip(OAID) AS device_id,
        IdType::OAID() AS id_type,
        EventName,
        EventValue,
        DeviceID,
        _logfeller_timestamp,
        _logfeller_timestamp AS `timestamp`
    FROM EACH($metrica_log_tables)
    WHERE EventName LIKE 'AM_System AM info%'
        AND $strip(OAID) IS NOT Null
)
;

-- Filter for Fuzzy2
INSERT INTO $fuzzy2_metrica WITH TRUNCATE
SELECT
    ip, ua_profile,
    TopFreq(device_id, 15) AS devices,
    TopFreq(google_adv_id, 15) AS gaids,
    TopFreq(open_id, 15) AS oaids,
    TopFreq(idfa, 15) AS idfas,
    TopFreq(ifv, 15) AS ifvs
FROM $parsed_app_metrica
WHERE ip IS NOT Null
    AND ua_profile IS NOT Null
    AND device_id IS NOT Null
{{ group_by_clause }} ip, ua_profile
ORDER BY ip, ua_profile;
{% endif %}
