{% set base_path = "//home/crypta/" + crypta_env %}
{#% set staff_dir = "//home/crypta/team/mskorokhod/CRYPTA-16300/staff" %#}
{% set staff_dir = base_path + "/ids_storage/staff" %}
{% set export_dir = base_path + "/state/graph/profiles" %}

DEFINE SUBQUERY $LAST_TABLE($dir) AS
    $table_name = Re2::Capture(@@^.*\/(?P<name>[^\/\.]+).*$@@);
    $max_table_name = (
        SELECT MAX($table_name(`Path`).name) AS name
        FROM FOLDER($dir, 'row_count')
        WHERE Type == 'table'
            AND Yson::LookupUint64(Attributes, 'row_count') > 0
    );

    SELECT *
    FROM LIKE($dir, $max_table_name);
END DEFINE;

$puid_heavy = (
    SELECT uid AS puid, Identifiers::NormalizeLogin(login) AS login
    FROM $LAST_TABLE('//home/passport/production/userdata')
    WHERE 669 IN {% if is_inside_test %}Yson::ConvertToInt64List(sid){% else %}sid{% endif %}
        AND login NOT LIKE "robbiter-%"
        AND login NOT LIKE "robbitter-%"
        AND login NOT LIKE "yandex-team-%"
);

$exclude = Pire::Grep("yandex-team|yamoney");
$yson_parse_to_list = ($data, $key) -> {
    RETURN ListFilter(
        Yson::ConvertToStringList(Yson::YPath(Yson::Parse($data), $key)),
        ($id) -> {
            RETURN NOT $exclude($id);
        }
    );
};

$email_to_login = ($email) -> {
    $norm = Identifiers::NormalizeLogin($email);
    RETURN CASE
        WHEN NOT String::Contains($norm, "@")
        THEN $norm
        ELSE NULL
    END;
};

$staff_to_join = (
    SELECT staff_login, id
    FROM (
        SELECT
            staff_login,
            ListExtend(
                [Identifiers::NormalizeLogin(passport_login)],
                ListMap($yson_parse_to_list(data, "/phone"), Identifiers::HashMd5Phone),
                ListMap($yson_parse_to_list(data, "/email"), Identifiers::HashMd5Email)
            ) AS joins
        FROM `{{ staff_dir }}/dump`
    ) FLATTEN LIST BY joins AS id
);

$staff_to_login = (
    SELECT staff_login, login
    FROM (
        SELECT
            staff_login,
            ListNotNull(ListExtend(
                [Identifiers::NormalizeLogin(passport_login)],
                (ListMap($yson_parse_to_list(data, "/email"), $email_to_login))
            )) AS joins
        FROM `{{ staff_dir }}/dump`
    ) FLATTEN LIST BY joins AS login
);

$staff_to_puid = (
    SELECT
        staff.staff_login AS staff_login,
        soup.id1 AS puid
    FROM $staff_to_login AS staff
    LEFT JOIN ANY `{{ base_path }}/state/graph/v2/soup/puid_login_passport-profile_passport-dict` AS soup
    ON (staff.login == soup.id2)
);

DEFINE SUBQUERY $crypta_id_to_join($id_types) AS
    SELECT id, cryptaId AS crypta_id
    FROM `{{ base_path }}/state/graph/v2/matching/vertices_no_multi_profile_by_id_type`
    WHERE id_type IN $id_types
END DEFINE;

$staff_to_crypta_id = (
    SELECT
        staff_dump.staff_login AS staff,
        crypta.crypta_id AS crypta_id
    FROM $staff_to_join AS staff_dump
    INNER JOIN ANY $crypta_id_to_join({"email_md5", "phone_md5", "login"}) AS crypta
    USING (id)
);

$all_staff_crypta_ids = (
    SELECT DISTINCT crypta_id
    FROM (
        -- staff - to - cid
        SELECT crypta_id
        FROM $staff_to_crypta_id
        WHERE crypta_id IS NOT NULL

        UNION ALL

        -- passport heavy - to - cid
        SELECT crypta.crypta_id AS crypta_id
        FROM $crypta_id_to_join({"puid"}) AS crypta
        LEFT SEMI JOIN $puid_heavy AS passport
        ON (crypta.id == passport.puid)
    )
);

-------------------------------------------------------------------------------

INSERT INTO `{{ staff_dir }}/crypta_id` WITH TRUNCATE
SELECT
    staff AS id,
    "staff" AS id_type,
    crypta_id AS target_id,
    "crypta_id" AS target_id_type,
    Nothing(String?) AS date_begin,
    Nothing(String?) AS date_end,
FROM $staff_to_crypta_id
GROUP BY staff, crypta_id
ORDER BY id, target_id;

INSERT INTO `{{ staff_dir }}/login` WITH TRUNCATE
SELECT
    staff_login AS id,
    "staff" AS id_type,
    login AS target_id,
    "login" AS target_id_type,
    Nothing(String?) AS date_begin,
    Nothing(String?) AS date_end,
FROM $staff_to_login
GROUP BY staff_login, login
ORDER BY id, target_id;

INSERT INTO `{{ staff_dir }}/puid` WITH TRUNCATE
SELECT
    staff_login AS id,
    "staff" AS id_type,
    puid AS target_id,
    "puid" AS target_id_type,
    Nothing(String?) AS date_begin,
    Nothing(String?) AS date_end,
FROM $staff_to_puid
GROUP BY staff_login, puid
ORDER BY id, target_id;

INSERT INTO `{{ staff_dir }}/passport_userdata` WITH TRUNCATE
SELECT puid, login
FROM $puid_heavy
ORDER BY puid, login;

INSERT INTO `{{ staff_dir }}/yandexuid_ext` WITH TRUNCATE
SELECT yandexuid
FROM $crypta_id_to_join({"yandexuid"}) AS crypta
LEFT SEMI JOIN $all_staff_crypta_ids AS whitelist
ON (whitelist.crypta_id == crypta.crypta_id)
GROUP BY CAST(crypta.id AS UInt64) AS yandexuid
ORDER BY yandexuid;

$puid_ext_subq = (
    SELECT
        puid,
        MAX(from_staff) AS from_staff,
        MAX(from_userdata) AS from_userdata,
    FROM (
        -- puid by crypta_id
        SELECT puid, True AS from_staff
        FROM $crypta_id_to_join({"puid"}) AS crypta
        LEFT SEMI JOIN $all_staff_crypta_ids AS whitelist
        ON (whitelist.crypta_id == crypta.crypta_id)
        GROUP BY crypta.id AS puid

        UNION ALL

        -- extra puids direct from passport
        SELECT puid, True AS from_userdata
        FROM $puid_heavy
    )
    GROUP BY puid
);

INSERT INTO `{{ staff_dir }}/puid_ext` WITH TRUNCATE
SELECT puid
FROM $puid_ext_subq
ORDER BY puid;

INSERT INTO `{{ export_dir }}/staff` WITH TRUNCATE
SELECT
    puid AS id, "puid" AS id_type,
    from_staff, from_userdata,
    1140 AS keyword
FROM $puid_ext_subq
ORDER BY id, id_type;
