BEGIN;

DROP FUNCTION IF EXISTS bloated_tables(OUT databasename TEXT,
                                        OUT schemaname TEXT,
                                        OUT tablename TEXT,
                                        OUT pct_bloat NUMERIC,
                                        OUT mb_bloat REAL,
                                        OUT table_mb REAL);

CREATE FUNCTION bloated_tables(OUT databasename TEXT,
                                        OUT schemaname TEXT,
                                        OUT tablename TEXT,
                                        OUT pct_bloat NUMERIC,
                                        OUT mb_bloat NUMERIC,
                                        OUT table_mb NUMERIC) RETURNS SETOF RECORD AS

$bloated_tables$

BEGIN
  RETURN QUERY
    -- new table bloat query
-- still needs work; is often off by +/- 20%
WITH constants AS (
    -- define some constants for sizes of things
    -- for reference down the query and easy maintenance
    SELECT info::numeric AS bs, 23 AS hdr, 8 AS ma
    FROM other_stats_start
    WHERE name = 'current_setting'
),
no_stats AS (
    -- screen out table who have attributes
    -- which dont have stats, such as JSON
    SELECT isc.table_schema, isc.table_name
    FROM information_schema_columns_start isc
        LEFT OUTER JOIN pg_stats_start pss
        ON table_schema = pss.schemaname
            AND isc.table_name = pss.tablename
            AND column_name = attname
    WHERE attname IS NULL
        AND table_schema NOT IN ('pg_catalog', 'information_schema')
    GROUP BY table_schema, isc.table_name
),
null_headers AS (
    -- calculate null header sizes
    -- omitting tables which dont have complete stats
    -- and attributes which aren't visible  ****
    SELECT
      hdr+1+(sum(case when null_frac <> 0 THEN 1 else 0 END)/8) as nullhdr,
        SUM((1-null_frac)*avg_width) as datawidth,
        MAX(null_frac) as maxfracsum,
        pgs.schemaname,
        pgs.tablename,
        hdr, ma, bs
    FROM pg_stats_start pgs CROSS JOIN constants
	LEFT OUTER JOIN no_stats
            ON pgs.schemaname = no_stats.table_schema
            AND pgs.tablename = no_stats.table_name
WHERE pgs.schemaname NOT IN ('pg_catalog', 'information_schema')
AND no_stats.table_name IS NULL
AND EXISTS ( SELECT 1
            FROM information_schema_columns_start columns
                WHERE pgs.schemaname = columns.table_schema
                    AND pgs.tablename = columns.table_name )
GROUP BY pgs.schemaname, pgs.tablename, hdr, ma, bs
),
data_headers AS (
    -- estimate header and row size
    SELECT
        ma, bs, hdr, null_headers.schemaname, null_headers.tablename,
        (datawidth+(hdr+ma-(case when hdr%ma=0 THEN ma ELSE hdr%ma END)))::numeric AS datahdr,
        (maxfracsum*(nullhdr+ma-(case when nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2
    FROM null_headers
),
table_estimates AS (
    -- make estimates of how large the table should be
    -- based on row and page size
    SELECT data_headers.schemaname, data_headers.tablename, pg_namespace_start.dbname as databasename, bs, pg_namespace_start.dbname,
        reltuples, relpages * bs as table_bytes,
    CEIL((reltuples*
            (datahdr + nullhdr2 + 4 + ma -
                (CASE WHEN datahdr%ma=0
                    THEN ma ELSE datahdr%ma END)
                )/(bs-20))) * bs AS expected_bytes
    FROM data_headers
        JOIN pg_class_start ON data_headers.tablename = relname
        JOIN pg_namespace_start ON relnamespace = pg_namespace_start.oid
            AND data_headers.schemaname = nspname
    WHERE pg_class_start.relkind = 'r'
),
table_estimates_plus AS (
-- add some extra metadata to the table data
-- and calculations to be reused
-- including whether we cant estimate it
-- or whether we think it might be compressed
    SELECT dbname,
            table_estimates.schemaname, table_estimates.tablename, table_estimates.databasename, reltuples as est_rows,
            CASE WHEN expected_bytes > 0 AND table_bytes > 0 THEN
                TRUE ELSE FALSE END as can_estimate,
            CASE WHEN expected_bytes > table_bytes THEN
                TRUE ELSE FALSE END as is_compressed,
            CASE WHEN table_bytes > 0
                THEN table_bytes::NUMERIC
                ELSE NULL::NUMERIC END
                AS table_bytes,
            CASE WHEN expected_bytes > 0
                THEN expected_bytes::NUMERIC
                ELSE NULL::NUMERIC END
                    AS expected_bytes,
            CASE WHEN expected_bytes > 0 AND table_bytes > 0
                AND expected_bytes <= table_bytes
                THEN (table_bytes - expected_bytes)::NUMERIC
                ELSE 0::NUMERIC END AS bloat_bytes
    FROM table_estimates
),
bloat_data AS (
    -- do final math calculations and formatting
    select table_estimates_plus.databasename,
        table_estimates_plus.schemaname, table_estimates_plus.tablename, can_estimate, is_compressed,
        table_bytes, round(table_bytes/(1024^2)::NUMERIC,3) as table_mb,
        expected_bytes, round(expected_bytes/(1024^2)::NUMERIC,3) as expected_mb,
        round(bloat_bytes*100/table_bytes) as pct_bloat,
        round(bloat_bytes/(1024::NUMERIC^2),2) as mb_bloat,
        table_bytes, expected_bytes
    FROM table_estimates_plus
)
-- filter output for bloated tables
SELECT bloat_data.databasename::TEXT, bloat_data.schemaname::TEXT, bloat_data.tablename::TEXT,
    --can_estimate, is_compressed,
    bloat_data.pct_bloat, bloat_data.mb_bloat,
    bloat_data.table_mb
FROM bloat_data
-- this where clause defines which tables actually appear
-- in the bloat chart
-- example below filters for tables which are either 50%
-- bloated and more than 20mb in size, or more than 25%
-- bloated and more than 4GB in size
WHERE ( bloat_data.pct_bloat >= 50 AND bloat_data.mb_bloat >= 10 )
    OR ( bloat_data.pct_bloat >= 25 AND bloat_data.mb_bloat >= 1000 )
ORDER BY bloat_data.pct_bloat DESC;
END;
$bloated_tables$
  LANGUAGE 'plpgsql';

COMMIT;