{% if not is_embedded %}
PRAGMA Library = 'aggregation_lib.sql';
PRAGMA Library = 'metrica_lib.sql';
{% endif %}

IMPORT {% if is_embedded %}.lib.{% endif %}aggregation_lib SYMBOLS
    $aggregate_uniq_arrays,
    $aggregate_limited_uniq_arrays,
    $aggregate_sum_dict,
    $aggregate_sum_dicts,
    $aggregate_sum_dict_dicts,
    $aggregate_merge_dicts,
    $aggregate_merge_multi_dicts,
    $flat_multi_dict,
    $dump_as_dict_int
;

IMPORT {% if is_embedded %}.lib.{% endif %}metrica_lib SYMBOLS
    $dict_join,
    $dict_dict_join,
    $dict_from_string_int,
    $dict_dict_from_string_int,
    $get_main_geo,
    $get_main_regions
;

PRAGMA yt.MaxRowWeight = '32M';
-- PRAGMA yt.DataSizePerJob = '6G';
-- PRAGMA yt.MaxJobCount = '10000';

{% set date = date_start %}
$date_start = '{{ date_start }}';
$date_end = '{{ date_end }}';

-- input
$stream_dir = "{{ graph_stream_dir }}/extra_data/AppMetrikaTask/{{ date }}";
$am_stream = $stream_dir || "/am_log_table";
$dev_stream = $stream_dir || "/dev_info_table";
$uuid_stream = $stream_dir || "/uuid_info_table";
$fuzzy2_stream = $stream_dir || "/fuzzy2_metrica";

$rtb_stream_dir = "{{ graph_stream_dir }}/extra_data/RTBLogTask/{{ date }}/ssp_apps_info_table";
$postback_stream_dir = "{{ graph_stream_dir }}/extra_data/PostbackLogTask/{{ date }}/postback_apps_table";

-- constants
$APPS_LIMIT_NUMBER = 2000;
$MACS_LIMIT_NUMBER = 200;

-- output
$am_log_table = "{{ graph_output_dir }}/{{ date }}/mobile/account_manager/am_log";

$fuzzy2_metrica = "{{ indevice_output_dir }}/{{ date }}/fuzzy/fuzzy2_metrica";
$dev_info_table  = "{{ graph_output_dir }}/{{ date }}/mobile/dev_info_yt";
$uuid_info_table = "{{ graph_output_dir }}/{{ date }}/mobile/uuid_info_yt";

-- tables to use
$crypta_graph_dir = '{{ graph_output_dir }}';
$crypta_ids_storage_dir = '{{ ids_storage_dir }}';

-- output tables
-- $ids_tables = AsList(
--     $crypta_ids_storage_dir || '/idfa/app_metrica_month',
--     $crypta_ids_storage_dir || '/gaid/app_metrica_month',
--     $crypta_ids_storage_dir || '/mm_device_id/app_metrica_month',
-- );
{% if is_month %}
    $ids_devices = $crypta_ids_storage_dir || '/device_id/app_metrica_month';
    $ids_hash = $crypta_ids_storage_dir || '/device_id/hash_month';
{% else %}
    $ids_devices = $dev_info_table;
{% endif %}

$dev_info_yt_tables = (
{% if is_month %}
    SELECT * FROM RANGE(
        $crypta_graph_dir,
        $date_start,
        $date_end,
        'mobile/dev_info_yt'
    )
{% else %}
    SELECT * FROM RANGE($dev_stream)
    UNION ALL SELECT * FROM RANGE($rtb_stream_dir)
    UNION ALL SELECT * FROM RANGE($postback_stream_dir)
{% endif %}
);

DEFINE ACTION $merge_simple() AS
    -- am log and ip dev stream is just map
    INSERT INTO $am_log_table WITH TRUNCATE
    SELECT *
    FROM RANGE($am_stream);
END DEFINE;

DEFINE ACTION $merge_fuzzy2() AS
    $combine_tpfreq = ($tpfr_list) -> {
        RETURN ListSortDesc(
            ListMap(
                DictItems(
                    ToMultiDict(
                        ListMap(
                            ListFlatMap(
                                $tpfr_list,
                                ($item) -> {
                                    RETURN $item;
                                }
                            ),
                            ($struct) -> {
                                RETURN AsTuple(
                                    $struct.Value,
                                    $struct.Frequency
                                );
                            }
                        )
                    )
                ),
                ($pair) -> {
                    RETURN AsStruct(
                        $pair.0 AS Value,
                        ListSum($pair.1) AS Frequency
                    );
                }
            ),
            ($struct) -> {
                RETURN $struct.Frequency;
            }
        );
    };

    -- combine fuzzy2
    INSERT INTO $fuzzy2_metrica WITH TRUNCATE
    SELECT
        ip, ua_profile,
        $combine_tpfreq(AGGREGATE_LIST(devices)) AS devices,
        $combine_tpfreq(AGGREGATE_LIST(gaids)) AS gaids,
        $combine_tpfreq(AGGREGATE_LIST(oaids)) AS oaids,
        $combine_tpfreq(AGGREGATE_LIST(idfas)) AS idfas,
        $combine_tpfreq(AGGREGATE_LIST(ifvs)) AS ifvs
    FROM RANGE($fuzzy2_stream)
    GROUP BY ip, ua_profile
    ORDER BY ip, ua_profile;
END DEFINE;

DEFINE ACTION $collect_ids_month() AS
    -- Collect idfa/ifv/oaid/gaid -- mm_id edges for month

    $month_ids = (
        SELECT
            idfa,
            ifv,
            google_adv_id AS gaid,
            open_id AS oaid,
            mmetric_device_id
        FROM $dev_info_yt_tables
        FLATTEN LIST BY mmetric_device_ids AS mmetric_device_id
    );

    INSERT INTO @idfa_month WITH TRUNCATE
    SELECT id, mmetric_device_id
    FROM $month_ids
    WHERE idfa IS NOT Null
        AND mmetric_device_id IS NOT Null
    GROUP BY idfa AS id, mmetric_device_id
    ORDER BY id, mmetric_device_id;

    INSERT INTO @ifv_month WITH TRUNCATE
    SELECT id, mmetric_device_id
    FROM $month_ids
    WHERE ifv IS NOT Null
        AND mmetric_device_id IS NOT Null
    GROUP BY ifv AS id, mmetric_device_id
    ORDER BY id, mmetric_device_id;

    INSERT INTO @gaid_month WITH TRUNCATE
    SELECT id, mmetric_device_id
    FROM $month_ids
    WHERE gaid IS NOT Null
        AND mmetric_device_id IS NOT Null
    GROUP BY gaid AS id, mmetric_device_id
    ORDER BY id, mmetric_device_id;

    INSERT INTO @oaid_month WITH TRUNCATE
    SELECT id, mmetric_device_id
    FROM $month_ids
    WHERE oaid IS NOT Null
        AND gaid IS Null -- to select OAID ONLY ids
        AND mmetric_device_id IS NOT Null
    GROUP BY oaid AS id, mmetric_device_id
    ORDER BY id, mmetric_device_id;

    INSERT INTO @fake_month WITH TRUNCATE (id, mmetric_device_id) VALUES ('', '');

END DEFINE;

DEFINE ACTION $app_metrica_month_devices($group_by) AS
    -- $out_table = $crypta_ids_storage_dir || '/' || $id_type || '/app_metrica_month';

    DISCARD SELECT Ensure(
        'dummy',
        $group_by IN ('idfa', 'ifv', 'gaid', 'oaid', 'mm_device_id'),
        'id_type should be one of ("idfa", "ifv", "gaid", "oaid", "mm_device_id")');

    -- only mm_id devices (newer be idfa/ifv/gaid/oaid)
    $only_mm_devices = (
        SELECT
            devices.id AS id,
            $group_by AS id_type,

            devices.idfa AS idfa,
            devices.ifv AS ifv,
            devices.android_id AS android_id,
            devices.google_adv_id AS google_adv_id,
            devices.open_id AS open_id,
            devices.device_id AS device_id,

            devices.model AS model,
            devices.manufacturer AS manufacturer,
            devices.os AS os,
            devices.os_version AS os_version,
            devices.ua_profile AS ua_profile,
            devices.device_type AS device_type,
            devices.locale AS locale,

            devices.metrika_apps AS metrika_apps,
            devices.ssp_apps AS ssp_apps,
            devices.postback_apps AS postback_apps,
            devices.simcards_operators_names AS simcards_operators_names,
            devices.simcards_operators AS simcards_operators,

            devices.features AS features,
            devices.connection_hist AS connection_hist,
            devices.macs AS macs,
            devices.dates AS dates,

            devices.mmetric_device_ids AS mmetric_device_ids,
            devices.mmetric_device_ids_to_hash AS mmetric_device_ids_to_hash,

            devices.region_ids AS region_ids,

            devices.source AS source,

            devices.screen_width AS screen_width,
            devices.screen_height AS screen_height,

            devices.ts AS ts
        FROM (
            SELECT * FROM $dev_info_yt_tables
            WHERE idfa IS Null
                AND ifv IS Null
                AND google_adv_id IS Null
                AND open_id IS Null
        ) AS devices LEFT ONLY JOIN (
            SELECT mmetric_device_id AS id FROM @idfa_month
            UNION ALL
            SELECT mmetric_device_id AS id FROM @ifv_month
            UNION ALL
            SELECT mmetric_device_id AS id FROM @gaid_month
            UNION ALL
            SELECT mmetric_device_id AS id FROM @oaid_month
        ) AS ids USING (id)
    );

    -- pure devices with idfa/ifv/gaid/oaid as is
    $primary_devices = (
        SELECT * FROM $dev_info_yt_tables
        WHERE (
            CASE $group_by
                WHEN 'idfa' THEN (
                    idfa IS NOT Null
                    AND google_adv_id IS Null
                )
                WHEN 'ifv' THEN (
                    idfa IS Null
                    AND ifv IS NOT Null
                    AND google_adv_id IS Null
                )
                WHEN 'gaid' THEN (
                    idfa IS Null
                    AND ifv IS Null
                    AND google_adv_id IS NOT Null
                )
                WHEN 'oaid' THEN (
                    idfa IS Null
                    AND ifv IS Null
                    AND google_adv_id IS Null
                    AND open_id IS NOT Null
                )
                -- WHEN 'mm_device_id' THEN (
                --     idfa IS Null
                --     AND google_adv_id IS Null
                --     AND open_id IS Null
                --     AND mmetric_device_id IS NOT Null
                -- )
                ELSE False
            END
        )
    );

    -- dirt devices with restored idfa/ifv/gaid/oaid
    $ids_month_table = CASE $group_by
        WHEN 'idfa' THEN 'idfa_month'
        WHEN 'ifv'  THEN 'ifv_month'
        WHEN 'gaid' THEN 'gaid_month'
        WHEN 'oaid' THEN 'oaid_month'
        ELSE 'fake_month'  -- should newer be
    END;

    $secondary_devices = (
        SELECT
            ids.id AS id,
            $group_by AS id_type,

            IF($group_by == 'idfa', ids.id) AS idfa,
            IF($group_by == 'ifv', ids.id) AS ifv,
            devices.android_id AS android_id,
            IF($group_by == 'gaid', ids.id) AS google_adv_id,
            IF($group_by == 'oaid', ids.id) AS open_id,
            devices.device_id AS device_id,

            devices.model AS model,
            devices.manufacturer AS manufacturer,
            devices.os AS os,
            devices.os_version AS os_version,
            devices.ua_profile AS ua_profile,
            devices.device_type AS device_type,
            devices.locale AS locale,

            devices.metrika_apps AS metrika_apps,
            devices.ssp_apps AS ssp_apps,
            devices.postback_apps AS postback_apps,
            devices.simcards_operators_names AS simcards_operators_names,
            devices.simcards_operators AS simcards_operators,

            devices.features AS features,
            devices.connection_hist AS connection_hist,
            devices.macs AS macs,
            devices.dates AS dates,

            devices.mmetric_device_ids AS mmetric_device_ids,
            devices.mmetric_device_ids_to_hash AS mmetric_device_ids_to_hash,

            devices.region_ids AS region_ids,

            devices.source AS source,

            devices.screen_width AS screen_width,
            devices.screen_height AS screen_height,

            devices.ts AS ts
        FROM (
            SELECT * FROM $dev_info_yt_tables
            WHERE idfa IS Null
                AND ifv IS Null
                AND google_adv_id IS Null
                AND open_id IS Null
        ) AS devices INNER JOIN @$ids_month_table AS ids
        ON (devices.id == ids.mmetric_device_id)
    );

    $devices_all = (
        -- for idfa/ifv/gaid/oaid TRUE on first two queries
        SELECT * FROM $primary_devices WHERE EvaluateExpr($group_by IN ('idfa', 'ifv', 'gaid', 'oaid', ))
        UNION ALL
        SELECT * FROM $secondary_devices WHERE EvaluateExpr($group_by IN ('idfa', 'ifv', 'gaid', 'oaid', ))
        UNION ALL
        -- for mm_device_id TRUE on last querie
        SELECT * FROM $only_mm_devices WHERE EvaluateExpr($group_by == 'mm_device_id')
    );

    $devices_with_id = (
        SELECT
            CASE $group_by
                WHEN 'idfa' THEN idfa
                WHEN 'ifv'  THEN ifv
                WHEN 'gaid' THEN google_adv_id
                WHEN 'oaid' THEN open_id
                ELSE id -- id already be mmetric device id
            END AS id,
            id_type,

            idfa,
            ifv,
            android_id,
            google_adv_id,
            open_id,
            device_id,

            model,
            manufacturer,
            os,
            os_version,
            ua_profile,
            device_type,
            locale,

            metrika_apps,
            ssp_apps,
            postback_apps,
            simcards_operators_names,
            simcards_operators,

            $dict_dict_from_string_int(
                features,
                ';', '-', ',', ':'
            ) AS features,
            IF(connection_hist IS NULL,
                NULL,
                $dict_from_string_int(
                connection_hist,
                ',', ':'
            )) AS connection_hist,
            macs,
            dates,

            mmetric_device_ids,
            Yson::ConvertToStringDict(
                mmetric_device_ids_to_hash) AS mmetric_device_ids_to_hash,

            Just(Yson::ConvertToInt64Dict(region_ids)) AS region_ids,

            screen_width,
            screen_height,

            Abs(ts) AS ts,
            source
        FROM $devices_all
    );

    -- group by id
    $devices_groupped = (
        SELECT
            id,
            $group_by AS id_type,

            SOME(source) AS source,
            MAX(ts) AS ts,

            MAX_BY(idfa, IF(idfa IS NOT Null, ts)) AS idfa,
            MAX_BY(ifv, IF(ifv IS NOT Null, ts)) AS ifv,
            MAX_BY(android_id, IF(android_id IS NOT Null, ts)) AS android_id,
            MAX_BY(google_adv_id, IF(google_adv_id IS NOT Null, ts)) AS google_adv_id,
            MAX_BY(open_id, IF(open_id IS NOT Null, ts)) AS open_id,
            id AS device_id,

            MAX_BY(locale, IF(locale IS NOT Null, ts)) AS locale,

            MAX_BY(device_type, IF(device_type IS NOT Null, ts)) AS device_type,
            MAX_BY(model, IF(model IS NOT Null, ts)) AS model,
            MAX_BY(manufacturer, IF(manufacturer IS NOT Null, ts)) AS manufacturer,
            MAX_BY(os, IF(os IS NOT Null, ts)) AS os,
            MAX_BY(os_version, IF(os_version IS NOT Null, ts)) AS os_version,
            MAX_BY(ua_profile, IF(ua_profile IS NOT Null, ts)) AS ua_profile,

            MAX_BY(screen_width, IF(screen_width IS NOT Null, ts)) AS screen_width,
            MAX_BY(screen_height, IF(screen_height IS NOT Null, ts)) AS screen_height,
            -- sum lists uniq

            AGGREGATE_BY(metrika_apps ?? [], $aggregate_limited_uniq_arrays($APPS_LIMIT_NUMBER)) ?? [] as metrika_apps,
            AGGREGATE_BY(ssp_apps ?? [], $aggregate_limited_uniq_arrays($APPS_LIMIT_NUMBER)) ?? [] as ssp_apps,
            AGGREGATE_BY(postback_apps ?? [], $aggregate_limited_uniq_arrays($APPS_LIMIT_NUMBER)) ?? [] as postback_apps,

            AGGREGATE_BY(simcards_operators_names ?? [], $aggregate_uniq_arrays) ?? [] AS simcards_operators_names,
            AGGREGATE_BY(simcards_operators, $aggregate_merge_multi_dicts) AS simcards_operators,

            AGGREGATE_BY(macs ?? [], $aggregate_limited_uniq_arrays($MACS_LIMIT_NUMBER)) ?? [] as macs,

            AGGREGATE_BY(dates ?? [], $aggregate_uniq_arrays) ?? [] as dates,

            AGGREGATE_BY(mmetric_device_ids ?? [], $aggregate_uniq_arrays) ?? [] as mmetric_device_ids,

            AGGREGATE_BY(features, $aggregate_sum_dict_dicts) as features,
            AGGREGATE_BY(region_ids, $aggregate_sum_dicts) as region_ids,
            $get_main_regions(AGGREGATE_BY(region_ids, $aggregate_sum_dicts)) AS main_regions,

            AGGREGATE_BY(mmetric_device_ids_to_hash, $aggregate_merge_dicts) as mmetric_device_ids_to_hash,
            AGGREGATE_BY(connection_hist, $aggregate_sum_dicts) as connection_hist

        FROM $devices_with_id
        WHERE id IS NOT Null
        GROUP BY id
    );

    -- INSERT INTO $out_table WITH TRUNCATE
    INSERT INTO @$group_by WITH TRUNCATE
    SELECT
        id,
        id_type,

        source,

        ts,

        idfa,
        ifv,
        android_id,
        google_adv_id,
        open_id,
        device_id,

        locale,
        device_type,
        model,
        manufacturer,
        os,
        os_version,
        ua_profile,

        ListSort(metrika_apps) AS metrika_apps,
        ListSort(ssp_apps) AS ssp_apps,
        ListSort(postback_apps) AS postback_apps,
        ListSort(simcards_operators_names) AS simcards_operators_names,
        ToMultiDict($flat_multi_dict(simcards_operators)) AS simcards_operators,
        ListSort(ListUniq(ListExtend(metrika_apps, ssp_apps, postback_apps))) AS apps,
        ListSort(macs) AS macs,
        ListSort(dates) AS dates,
        ListSort(mmetric_device_ids) AS mmetric_device_ids,

        $dict_dict_join(
            features,
            ';', '-', ',', ':'
        ) ?? '' AS features,
        $dump_as_dict_int(region_ids) AS region_ids,
        Yson::Serialize(Yson::From(mmetric_device_ids_to_hash)) AS mmetric_device_ids_to_hash,
        $dict_join(
            connection_hist ?? AsDict(
                -- For ssp_apps cell:0, wifi:0
                (Just('cell'), 0), (Just('wifi'), 0)),
            ',', ':'
        ) AS connection_hist,

        screen_width,
        screen_height,

        -- get geo info
        main_regions.main_region_country AS main_region_country,
        main_regions.main_region_obl AS main_region_obl,
        main_regions.main_region_city AS main_region_city,
        main_regions.main_region AS main_region
    FROM $devices_groupped
    WHERE Identifiers::IsValid(id_type, id)
    -- ORDER BY id, id_type
    ;

END DEFINE;

DEFINE ACTION $app_metrica_month_uuids() AS
    $id_type = 'uuid';
{% if is_month %}
    $out_table = $crypta_ids_storage_dir || '/' || $id_type || '/app_metrica_month';
{% else %}
    $out_table = $uuid_info_table;
{% endif %}

    $devices_with_id = (
        SELECT
            id,
            id_type,

            Yson::ConvertToInt64Dict(
                api_keys
            ) AS api_keys,
            app_id,
            app_version,
            dates,

            os,

            device_id,
            idfa,
            ifv,
            android_id,
            google_adv_id,
            open_id,

            mmetric_device_id,
            mmetric_device_id_hash,

            ua_profile,
            `uuid`,

            Abs(ts) AS ts,
            source
        FROM RANGE(
{% if is_month %}
            $crypta_graph_dir,
            $date_start,
            $date_end,
            'mobile/uuid_info_yt'
{% else %}
    $uuid_stream
{% endif %}
        )
    );

    -- group by id
    $devices_groupped = (
        SELECT
            id,
            id_type,

            SOME(source) AS source, -- always mm
            MAX(ts) AS ts,

            MAX_BY(app_id, IF(app_id IS NOT Null, ts)) AS app_id,
            MAX_BY(app_version, IF(app_version IS NOT Null, ts)) AS app_version,

            MAX_BY(os, IF(os IS NOT Null, ts)) AS os,

            MAX_BY(device_id, IF(device_id IS NOT Null, ts)) AS device_id,
            MAX_BY(idfa, IF(idfa IS NOT Null, ts)) AS idfa,
            MAX_BY(ifv, IF(ifv IS NOT Null, ts)) AS ifv,
            MAX_BY(android_id, IF(android_id IS NOT Null, ts)) AS android_id,
            MAX_BY(google_adv_id, IF(google_adv_id IS NOT Null, ts)) AS google_adv_id,
            MAX_BY(open_id, IF(open_id IS NOT Null, ts)) AS open_id,

            MAX_BY(mmetric_device_id, IF(mmetric_device_id IS NOT Null, ts)) AS mmetric_device_id,
            MAX_BY(mmetric_device_id_hash, IF(mmetric_device_id_hash IS NOT Null, ts)) AS mmetric_device_id_hash,

            MAX_BY(ua_profile, IF(ua_profile IS NOT Null, ts)) AS ua_profile,
            MAX_BY(`uuid`, IF(`uuid` IS NOT Null, ts)) AS `uuid`,

            -- sum lists uniq
            AGGREGATE_BY(dates ?? [], $aggregate_uniq_arrays) ?? [] as dates,

            -- sum dicts
            AGGREGATE_BY(
                api_keys,
                $aggregate_sum_dicts
            ) as api_keys

        FROM $devices_with_id
        WHERE
            id IS NOT Null
            AND (app_id IS Null OR Unicode::IsUtf(app_id))
        GROUP BY id, id_type
    );

    INSERT INTO $out_table WITH TRUNCATE
    SELECT
        id,
        id_type,

        source,
        ts,

        app_id,
        app_version,

        os,

        device_id,
        idfa,
        ifv,
        android_id,
        google_adv_id,
        open_id,

        mmetric_device_id,
        mmetric_device_id_hash,

        ua_profile,
        `uuid`,

        ListSort(dates) AS dates,
        $dump_as_dict_int(
            api_keys
        ) AS api_keys
    FROM $devices_groupped
    WHERE Identifiers::IsValid(id_type, id)
    ORDER BY id, id_type
    ;

END DEFINE;

-- ========================================================================= --

DO $collect_ids_month();
COMMIT;

{% if not is_month %}
DO $merge_simple();
DO $merge_fuzzy2();
{% endif %}

DO $app_metrica_month_devices(IdType::IDFA());
DO $app_metrica_month_devices(IdType::IFV());
DO $app_metrica_month_devices(IdType::GAID());
DO $app_metrica_month_devices(IdType::OAID());
DO $app_metrica_month_devices(IdType::MM_DEVICE_ID());

DO $app_metrica_month_uuids();

COMMIT;

$dev_common = (
    SELECT * FROM (
        SELECT * FROM @idfa
        UNION ALL
        SELECT * FROM @ifv
        UNION ALL
        SELECT * FROM @gaid
        UNION ALL
        SELECT * FROM @oaid
        UNION ALL
        SELECT * FROM @mm_device_id
    )
);

INSERT INTO $ids_devices WITH TRUNCATE
SELECT *
FROM $dev_common
ORDER BY id, id_type
;

{% if is_month %}
INSERT INTO $ids_hash WITH TRUNCATE
SELECT
    id, id_type,
    id_hash_pair.0 AS mm_device_id,
    id_hash_pair.1 AS hash
FROM (
    SELECT
        id, id_type,
        DictItems(Yson::ConvertToStringDict(mmetric_device_ids_to_hash)) AS id_hash_pairs
    FROM $dev_common
) FLATTEN LIST BY id_hash_pairs AS id_hash_pair
ORDER BY id, id_type
;{% endif %}
