{% if not is_embedded %}
PRAGMA Library = 'metrica_lib.sql';
PRAGMA Library = 'ut_utils_lib.sql';
{% endif %}

IMPORT {% if is_embedded %}.lib.{% endif %}metrica_lib SYMBOLS
    $clean_ip,
    $force_null
;
IMPORT {% if is_embedded %}.lib.{% endif %}ut_utils_lib SYMBOLS
    $ua_profile_str
;

PRAGMA yt.DataSizePerJob = '5G';
PRAGMA yt.MaxJobCount = '10000';

-- ========================================================================= --

$from_date = '{{ date_start }}';
$date = '{{ date_end }}';

$max_yuids_per_device = 35;

$indevice_output_folder = '{{ indevice_output_dir }}';
$yt_output_folder = '{{ graph_output_dir }}';

-- input tables
$bswatch_log    = '{{ bswatch_log_dir }}/' || $date;
$yuid_all_info  = '{{ ids_storage_dir }}/yandexuid/yuid_with_all_info_no_socdem';

$fuzzy2_metrica = $indevice_output_folder || '/' || $date || '/fuzzy/fuzzy2_metrica';
$fuzzy2_bswatch = $indevice_output_folder || '/' || $date || '/fuzzy/fuzzy2_bswatch';

-- output tables
$fuzzy2_table = $indevice_output_folder || '/' || $date || '/fuzzy/dev_yuid_fuzzy2';
$unperfect_table = $indevice_output_folder || '/' || $date || '/fuzzy/dev_yuid_unperfect';
$soup_dir = $yt_output_folder || '/v2/soup/dumps';

-- ========================================================================= --

DEFINE ACTION $fuzzy2_daily_table() AS

    INSERT INTO $fuzzy2_bswatch WITH TRUNCATE
    SELECT
        ip, ua_profile, TopFreq(yuid, 100) AS yuids
    FROM (
        SELECT
            $clean_ip(clientip6) AS ip,
            $ua_profile_str(UserAgent::Parse(useragent)) AS ua_profile,
            $force_null(uniqid) AS yuid
        FROM $bswatch_log
        WHERE browserinfo NOT REGEXP @@(^|\:)cy\:2($|\:)@@  -- CRYPTR-1978
    ) WHERE ip IS NOT Null
        AND ua_profile IS NOT Null
        AND yuid IS NOT Null
    GROUP BY ip, ua_profile
    ORDER BY ip, ua_profile
    ;

    COMMIT;

    {% for id_type_var in ("idfa", "gaid", "oaid", "ifv") %}
    ${{ id_type_var }}_yuid = (
        SELECT
            {{ id_type_var }}s[0].Value AS id_value,
            '{{ id_type_var }}' AS id_type,
            yuid.Value AS yuid,
            yuid.Frequency AS hits
        FROM (
            SELECT
                Unwrap(mm.{{ id_type_var }}s) AS {{ id_type_var }}s,
                Unwrap(bs.yuids) AS yuids
            FROM (
                SELECT ip, ua_profile, {{ id_type_var }}s
                FROM $fuzzy2_metrica
                WHERE ListLength({{ id_type_var }}s) == 1
            ) AS mm INNER JOIN (
                SELECT ip, ua_profile, yuids
                FROM $fuzzy2_bswatch
                WHERE ListLength(yuids) <= $max_yuids_per_device
            ) AS bs ON (mm.ip == bs.ip AND mm.ua_profile == bs.ua_profile)
        ) FLATTEN LIST BY yuids AS yuid
    );
    {% endfor %}

    INSERT INTO $fuzzy2_table WITH TRUNCATE
    SELECT
        id_value,
        id_type,
        yuid,
        $date AS dt,
        SUM(hits) AS hits
    FROM (
        SELECT * FROM $idfa_yuid
        UNION ALL
        SELECT * FROM $ifv_yuid
        UNION ALL
        SELECT * FROM $oaid_yuid
        UNION ALL
        SELECT * FROM $gaid_yuid
    ) GROUP BY id_value, id_type, yuid
    ORDER BY id_value, id_type, yuid
    ;

END DEFINE;

DEFINE ACTION $make_fuzzy2_soup() AS

    $correct_yuids = (
        SELECT
            yuid
        FROM (
            SELECT
                yuids.yuid AS yuid,
                active_yuids.is_un AS is_un
            FROM (
                SELECT yuid
                FROM RANGE (
                    $indevice_output_folder,
                    $from_date,
                    $date,
                    `fuzzy/dev_yuid_fuzzy2`
                ) GROUP BY yuid
                HAVING COUNT(DISTINCT id_value) == 1
            ) AS yuids
            INNER JOIN (
                SELECT id AS yuid, ua_profile LIKE '%|un|%' AS is_un
                FROM $yuid_all_info
                WHERE ip_activity_type NOT IN ('private', 'not_active')
            ) AS active_yuids
            USING (yuid)
        ){% if not fuzzy2_use_un|default(True) %}WHERE NOT is_un{% endif %}
    );

    $pre_soup = (
        SELECT
            fuzzy2.id_value AS id_value,
            fuzzy2.id_type AS id_type,
            fuzzy2.yuid AS yuid,
            SUM(fuzzy2.hits) AS hits,
            ListSort(
                AGGREGATE_LIST(fuzzy2.dt)
            ) AS dates
        FROM RANGE (
            $indevice_output_folder,
            $from_date,
            $date,
            `fuzzy/dev_yuid_fuzzy2`
        ) AS fuzzy2
        LEFT SEMI JOIN $correct_yuids AS correct_yuids
        USING (yuid)
        GROUP BY
            fuzzy2.id_value,
            fuzzy2.id_type,
            fuzzy2.yuid
    );

    -- Filter Soup tables
    --  idfa -> yandexuid
    --  oaid -> yandexuid
    --  gaid -> yandexuid

    DEFINE ACTION $filter_soup($id_type) AS
        -- idfa_yandexuid_fuzzy2_fuzzy2-indevice
        -- oaid_yandexuid_fuzzy2_fuzzy2-indevice
        -- gaid_yandexuid_fuzzy2_fuzzy2-indevice

        $soup_table = $soup_dir || '/' || String::JoinFromList(AsList(
            $id_type,
            IdType::YANDEXUID(),
            SourceType::PROBABILISTIC2(),
            LogSource::FUZZY2_INDEVICE()
        ), '_');

        INSERT INTO $soup_table WITH TRUNCATE
        SELECT
            SourceType::PROBABILISTIC2() AS sourceType,
            LogSource::FUZZY2_INDEVICE() AS logSource,
            id_value AS id1,
            id_type AS id1Type,
            yuid AS id2,
            IdType::YANDEXUID() AS id2Type,
            dates
        FROM $pre_soup
        WHERE id_type == $id_type
        ;

    END DEFINE;

    DEFINE ACTION $old_indevice() AS
        -- OLD INDEVICE DATA
        $get_value_str = ($c) -> {
            -- prob may be gte than 1.0 (because is historichesky slozjilos)
            $prob = CAST(0.75 + $c / 30.0 AS String);
            RETURN "type=mn\tprob=" || $prob || "\tperfect=0";
        };

        INSERT INTO $unperfect_table WITH TRUNCATE
        SELECT
            id_value AS key,
            yuid AS subkey,
            $get_value_str(ListLength(dates)) AS value
        FROM $pre_soup
        ORDER BY key, subkey;
    END DEFINE;

    DO $filter_soup('idfa');
    DO $filter_soup('ifv');
    DO $filter_soup('oaid');
    DO $filter_soup('gaid');

    DO $old_indevice();

END DEFINE;

-- ========================================================================== --

EVALUATE IF {{ is_day_task }}
    DO $fuzzy2_daily_table()
ELSE
    DO $make_fuzzy2_soup()
;
