$profiles_dump_dir = '//home/bigb/production/public/profiles';


$add_link_type = ($link_type) -> {
    RETURN ($item) -> {
        $link_types = ListExtend($item.link_types, [$link_type]);
        RETURN ReplaceMember($item, 'link_types', $link_types);
    }
};


$get_dict = ($actions) -> {
    RETURN ToDict(
        ListMap(
            $actions,
            ($item) -> (AsTuple($item.user_id, $item))
        )
    )
};


DEFINE SUBQUERY $apply_offline($hit_log, $idfy_users, $date, $keyword, $link_type, $get_actions) AS
    -- джойним с crypta

    $joined_w_offline = (
        SELECT
            hit.hit_id AS hit_id,
            hit.duplicates AS duplicates,
            hit.UniqID AS UniqID,
            DictKeys(SetDifference(ToSet(idfy.user_ids ?? []), $get_dict(hit.actions))) AS go_to_dump,
            ListExtend(
                ListMap(
                    DictPayloads(SetIntersection(ToSet(idfy.user_ids ?? []), $get_dict(hit.actions), ($k, $a, $b) -> ($b))),
                    $add_link_type($link_type)
                ),
                DictPayloads(SetDifference($get_dict(hit.actions), ToSet(idfy.user_ids ?? [])))
            ) AS actions,
        FROM $hit_log() AS hit
        LEFT JOIN ANY $idfy_users() AS idfy USING(UniqID)
    );

    -- джойним с PublicProfilesDump, берём оттуда нужные keyword-события

    $flatten_idfy_addition = (
        SELECT
            hit_id,
            'y' || user_id AS user_id
        FROM $joined_w_offline
        FLATTEN LIST BY go_to_dump AS user_id
    );

    $profiles_dump_name = SELECT
        ListMax(ListFilter(AGGREGATE_LIST(TableName(Path, "yt")), ($x) -> ($x LIKE '%' || $date || '%') )) AS table_name
    FROM FOLDER($profiles_dump_dir);

    $checked_profiles_dump_name = SELECT Ensure(
        $profiles_dump_name,
        $profiles_dump_name IS NOT NULL,
        "There are no PublicProfilesDump for " || $date || ". Observed path: " || $profiles_dump_dir
    );

    $filtered_dump =
    SELECT
        UniqID AS user_id,
        $get_actions(SerializedProto, $keyword)[-1] AS ids,  -- в PublicProfilesDump все действия с source_uniq_index=-1
    FROM LIKE($profiles_dump_dir, $checked_profiles_dump_name)
    WHERE DictHasItems($get_actions(SerializedProto, $keyword));

    $offline_addition_joined_w_dump = (
        SELECT
            dump.user_id AS user_id,
            AsStruct(
                SUBSTRING(dump.user_id, 1) AS user_id, -- откусываем 'y'
                [$link_type] AS link_types, -- проставляем метку из какой склейки пришли события
                dump.ids AS ids,
            ) AS actions
        FROM (
            SELECT DISTINCT user_id
            FROM $flatten_idfy_addition
        ) AS addition
        RIGHT SEMI JOIN $filtered_dump AS dump
        USING (user_id)
    );

    -- для каждого хита свой набор кук (даже если UniqID одинаковые)
    -- поэтому собираем для каждого хита свой набор добавочных событий

    $offline_addition = (
        SELECT
            hit_id,
            AGGREGATE_LIST(dump.actions, 8000) AS actions,
        FROM $flatten_idfy_addition AS addition
        INNER JOIN $offline_addition_joined_w_dump AS dump USING(user_id)
        GROUP COMPACT BY addition.hit_id AS hit_id -- max кол-во ключей десятки тысяч < 1млн, поэтому COMPACT
    );

    -- добавляем новые keyword-события из дополнительных оффлайн кук от склейки

    SELECT
        hit.hit_id AS hit_id,
        hit.duplicates AS duplicates,
        hit.UniqID AS UniqID,
        ListUnionAll(hit.actions, addition.actions ?? []) AS actions,
    FROM $joined_w_offline AS hit
    LEFT JOIN $offline_addition AS addition USING(hit_id);

END DEFINE;

-- EXPORT $apply_offline;
