__author__ = 'kaceymiriholston'

import glob, subprocess

from config.settings import *
from settings import *
import reports
import util.db

def bloated_tables():

    for SCHEMA_NAME in [dir[1] for dir in os.walk(MONITOR_PATH_NAME)][0]:
        if SCHEMA_NAME == "server":
            continue
        db = util.db.connect()
        cur = db.cursor()
        cur.execute("SET search_path=%s, public"%SCHEMA_NAME)
        cur.execute("SELECT * from bloated_tables()")

        reports.text_report(cur, REPORT_DIR_PATH + '/bloated_tables.csv')

def bloated_indexes():

    for SCHEMA_NAME in [dir[1] for dir in os.walk(MONITOR_PATH_NAME)][0]:
        if SCHEMA_NAME == "server":
            continue
        db = util.db.connect()
        cur = db.cursor()
        cur.execute("SET search_path=%s, server, public"%SCHEMA_NAME)
        cur.execute("""
            WITH btree_index_atts AS (
                SELECT nspname, relname, reltuples, relpages, indrelid, relam,
                    regexp_split_to_table(indkey::text, ' ')::smallint AS attnum,
                    indexrelid as index_oid
                FROM pg_index_start
                JOIN pg_class_start ON pg_class_start.oid=pg_index_start.indexrelid
                JOIN pg_namespace_start ON pg_namespace_start.oid = pg_class_start.relnamespace
                JOIN current_pg_am ON pg_class_start.relam = current_pg_am.oid
                WHERE current_pg_am.amname = 'btree'
                ),
            index_item_sizes AS (
                SELECT
                i.nspname, i.relname, i.reltuples, i.relpages, i.relam,
                s.starelid, a.attrelid AS table_oid, a.relation_size AS table_bytes, index_oid,
                oss.info::numeric AS bs,
                8 AS maxalign,
                24 AS pagehdr,
                /* per tuple header: add index_attribute_bm if some cols are null-able */
                CASE WHEN max(coalesce(s.stanullfrac,0)) = 0
                    THEN 2
                    ELSE 6
                END AS index_tuple_hdr,
                /* data len: we remove null values save space using it fractionnal part from stats */
                sum( (1-coalesce(s.stanullfrac, 0)) * coalesce(s.stawidth, 2048) ) AS nulldatawidth
                FROM pg_attribute_start AS a
                JOIN pg_statistic_start AS s ON s.starelid=a.attrelid AND s.staattnum = a.attnum
                JOIN btree_index_atts AS i ON i.indrelid = a.attrelid AND a.attnum = i.attnum,
                other_stats_start oss
                WHERE a.attnum > 0 AND oss.name = 'current_setting'
                GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
            ),
            index_aligned AS (
                SELECT maxalign, bs, nspname, relname AS index_name, reltuples,
                    relpages, relam, table_oid, table_bytes, index_oid,
                  ( 2 +
                      maxalign - CASE /* Add padding to the index tuple header to align on MAXALIGN */
                        WHEN index_tuple_hdr%maxalign = 0 THEN maxalign
                        ELSE index_tuple_hdr%maxalign
                      END
                    + nulldatawidth + maxalign - CASE /* Add padding to the data to align on MAXALIGN */
                        WHEN nulldatawidth::integer%maxalign = 0 THEN maxalign
                        ELSE nulldatawidth::integer%maxalign
                      END
                  )::numeric AS nulldatahdrwidth, pagehdr
                FROM index_item_sizes AS s1
            ),
            otta_calc AS (
              SELECT bs, nspname, table_oid, table_bytes, index_oid, index_name, relpages, coalesce(
                ceil((reltuples*(4+nulldatahdrwidth))/(bs-pagehdr::float)) +
                  CASE WHEN am.amname IN ('hash','btree') THEN 1 ELSE 0 END , 0 -- btree and hash have a metadata reserved block
                ) AS otta
              FROM index_aligned AS s2
                LEFT JOIN current_pg_am am ON s2.relam = am.oid
            ),
            raw_bloat AS (
                SELECT stat.dbname as dbname, nspname, table_bytes, c.relname AS table_name, index_name,
                    bs*(sub.relpages)::bigint AS totalbytes, otta as expected,
                    CASE
                        WHEN sub.relpages <= otta THEN 0
                        ELSE bs*(sub.relpages-otta)::bigint END
                        AS wastedbytes,
                    CASE
                        WHEN sub.relpages <= otta
                        THEN 0 ELSE bs*(sub.relpages-otta)::bigint * 100 / (bs*(sub.relpages)::bigint) END
                        AS realbloat,
                    stat.idx_scan as index_scans
                FROM otta_calc AS sub
                JOIN pg_class_start AS c ON c.oid=sub.table_oid
                JOIN pg_stat_user_indexes_start AS stat ON sub.index_oid = stat.indexrelid
            ),
            format_bloat AS (
            SELECT dbname as database_name, nspname as schema_name, table_name, index_name,
                    round(realbloat) as bloat_pct, round(wastedbytes/(1024^2)::NUMERIC) as bloat_mb,
                    round(totalbytes/(1024^2)::NUMERIC,3) as index_mb,
                    round(table_bytes/(1024^2)::NUMERIC,3) as table_mb,
                    index_scans
            FROM raw_bloat
            )
            -- final query outputting the bloated indexes
            -- change the where and order by to change
            -- what shows up as bloated
            SELECT *
            FROM format_bloat
            WHERE ( bloat_pct > 50 and bloat_mb > 10 )
            ORDER BY bloat_mb DESC;
        """)

        reports.text_report(cur, REPORT_DIR_PATH + '/bloated_indexes.csv')