-- ========================================================================= --
PRAGMA yt.MapJoinLimit = '4096M';
PRAGMA yt.MapJoinShardCount = '8';
-- ========================================================================= --
$TOPFREQ_BUFF = 250;
$MAX_CRYPTA_ID_PER_HH = 10;
-- ========================================================================= --
$work_dir = '{{ household_dir }}/workdir';
$output_dir = '{{ household_dir }}/output';

$crypta_id_crypta_id = $work_dir || '/crypta_id_crypta_id';
$crypta_id_yuid = $work_dir || '/crypta_id_yuid';
$homeless_tvs_tbl = $work_dir || '/homeless_tvs';
$all_yuids_tbl = $work_dir || '/all_yuids';

$output_hh                        = $output_dir || '/hh_match';
$output_enrich                    = $output_dir || '/hh_enrich';
$output_enrich_reversed           = $output_dir || '/hh_reversed';
$output_enrich_crypta_id          = $output_dir || '/hh_crypta_id';
$output_enrich_reversed_crypta_id = $output_dir || '/hh_crypta_id_reversed';
-- ========================================================================== --

$whitelist_households = (
    SELECT hhid, COUNT(1) AS size
    FROM $crypta_id_crypta_id
    GROUP BY id1 AS hhid
    HAVING COUNT(1) < $MAX_CRYPTA_ID_PER_HH
);

$hh_id_from_ccid = (
    SELECT
        households.hhid AS hhid,
        households.crypta_id AS crypta_id
    FROM (
        SELECT
            id1 AS hhid,
            id2 AS crypta_id
        FROM $crypta_id_crypta_id
    ) AS households
    LEFT SEMI JOIN $whitelist_households AS white_hh
    USING (hhid)
);

$output_hh_query = (
    SELECT
        hh.hhid ?? crypta_id.crypta_id AS hhid,
        crypta_id.crypta_id AS crypta_id,
        crypta_id.yuid AS yuid
    FROM $crypta_id_yuid AS crypta_id
    LEFT JOIN $hh_id_from_ccid AS hh
    USING (crypta_id)
);

$uniq_crypta_id_hh = (
    SELECT crypta_id, hhid
    FROM $output_hh_query
    GROUP BY crypta_id, hhid
);

$homeless_tvs_hh = (
    SELECT
        tv.yuid AS yuid,
        -- tv.crypta_id AS crypta_id,
        NULL AS crypta_id,
        hh.hhid AS hhid
    FROM (
        SELECT yuid, ccid AS crypta_id FROM $homeless_tvs_tbl
    ) AS tv
    INNER JOIN $uniq_crypta_id_hh AS hh
    USING (crypta_id)
);

-- ========================================================================== --

INSERT INTO $output_hh WITH TRUNCATE
SELECT
    hh.hhid AS hhid,
    hh.crypta_id AS crypta_id,
    hh.yuid AS yuid,
    yuid_info.is_tv ?? False AS is_tv
FROM (
    SELECT
        hh.hhid AS hhid,
        hh.crypta_id AS crypta_id,
        hh.yuid AS yuid
    FROM $output_hh_query AS hh
    LEFT SEMI JOIN $whitelist_households AS white_hh
    USING (hhid)
) AS hh
LEFT JOIN $all_yuids_tbl AS yuid_info
USING (yuid)
ORDER BY hhid, crypta_id, yuid;

$hh_enrich_query = (
    SELECT * FROM (
        SELECT
            -- strings nead for smotrelka tm
            hh.hhid AS hhid,
            hh.crypta_id AS crypta_id,
            hh.yuid AS yuid,
            yuid_info.is_tv ?? False AS is_tv
        FROM (
            SELECT * FROM $output_hh_query
            UNION ALL
            SELECT * FROM $homeless_tvs_hh
        ) AS hh
        LEFT JOIN $all_yuids_tbl AS yuid_info
        USING (yuid)
    )
);

-- ========================================================================= --

INSERT INTO $output_enrich WITH TRUNCATE
SELECT hhid, crypta_id, yuid, is_tv
FROM $hh_enrich_query
ORDER BY hhid, crypta_id, yuid;

INSERT INTO $output_enrich_reversed WITH TRUNCATE
SELECT hhid, crypta_id, yuid, is_tv
FROM $hh_enrich_query
ORDER BY yuid, crypta_id, hhid;

INSERT INTO $output_enrich_crypta_id WITH TRUNCATE
SELECT hhid, crypta_id
FROM $hh_enrich_query
WHERE crypta_id IS NOT NULL
GROUP BY hhid, crypta_id
ORDER BY hhid, crypta_id;

INSERT INTO $output_enrich_reversed_crypta_id WITH TRUNCATE
SELECT hhid, crypta_id
FROM $hh_enrich_query
WHERE crypta_id IS NOT NULL
GROUP BY hhid, crypta_id
ORDER BY crypta_id, hhid;
