#!/usr/bin/env python
from pyPgSQL import PgSQL
from decimal import Decimal
import re, time, sys, traceback, StringIO

def finish(exit_status, errtxt):
    print errtxt
    sys.exit(exit_status)

if len(sys.argv) > 1:
    connections = sys.argv[1].split(',')
    servers = []
    for con in connections:
        servers.append(con.split('::'))
else:
    finish(3, 'No server(s) specified.')
    
if len(sys.argv) > 2:
    timeout = int(sys.argv[2])
else:
    timeout = '300'

if len(sys.argv) > 3:
    idle = True
else:
    idle = False

for server in servers:  
    started = time.time()
    try:
        db_conn = PgSQL.connect(host=server[0], database=server[1], user=server[2], password=server[3])
    except:
        finish(2, 'Unable to connect to database %s on host %s' % (server[1], server[0]))
    
    ct = time.time() - started
    if ct > 5:
        finish(1, 'Took too long to connect to %s (%.2fs)' % (server, ct))
        
    # Which version of postgres? Some column names in pg_stat_activity changed in 9.1 -> 9.2
    try:
        db_cur = db_conn.cursor()
        db_cur.execute("select version()")
        db_rows = db_cur.fetchall()
        pg_version = re.match(r"^PostgreSQL (\d+\.\d+)[\.\d]* ", db_rows[0][0]).group(1)
        pg_version = Decimal(pg_version)
    except:
        io = StringIO.StringIO()
        traceback.print_exc(limit=None, file=io)
        finish(2, 'Unable to get postgres version on %s! Reason: %s' % (server, io.getvalue().replace('\n', '').replace('\r', '')))
    
    if pg_version >= Decimal('9.2'):
        PROCPID = 'pid'
        CURRENT_QUERY = 'query'
    else:
        PROCPID = 'procpid'
        CURRENT_QUERY = 'current_query'
        
    # Locks
    try:
        db_cur = db_conn.cursor()
        sql = "select count(*) from pg_locks join pg_stat_activity on pg_stat_activity.{procpid} = pg_locks.pid where not granted and now() - query_start < '00:00:02'::interval".format(
                    procpid=PROCPID)
        db_cur.execute(sql)
        db_rows = db_cur.fetchall()
        n = db_rows[0][0]
    except:
        io = StringIO.StringIO()
        traceback.print_exc(limit=None, file=io)
        finish(2, 'Unable to get lock count on %s! Reason: %s' % (server, io.getvalue().replace('\n', '').replace('\r', '')))
    if n >= 2:
        finish(1, "%s ungranted locks in the database." % n)
    
    # Long running queries
    try:
        db_cur = db_conn.cursor()
        
        if idle:
            query = "select {current_query}, {procpid} from pg_stat_activity where now() - xact_start > '{timeout} seconds'::interval and {current_query} not like '%%analyze%%' and {current_query} not like '%%vacuum%%' and {current_query} not like '%%COPY%%' and {current_query} not like '%%<IDLE> in transaction%%';".format(timeout=timeout, procpid=PROCPID, current_query=CURRENT_QUERY)
        else:
            query = "select {current_query}, {procpid} from pg_stat_activity where now() - xact_start > '{timeout} seconds'::interval and {current_query} not like '%%analyze%%' and {current_query} not like '%%vacuum%%' and {current_query} not like '%%COPY%%';".format(timeout=timeout, procpid=PROCPID, current_query=CURRENT_QUERY)
        db_cur.execute(query)
        db_rows = db_cur.fetchall()
        queries = db_rows
    except:
        io = StringIO.StringIO()
        traceback.print_exc(limit=None, file=io)
        finish(2, 'Unable to read long running query data from %s! Reason: %s' % (server, io.getvalue().replace('\n', '').replace('\r', '')))
    if len(queries) > 0:
        finish(1, "%s queries running for more than %s seconds: %s" % (len(queries), timeout, queries))
            
    try:
        db_cur = db_conn.cursor()
        db_cur.execute("select count(*) from pg_stat_activity")
        db_rows = db_cur.fetchall()
        cur_conns = int(db_rows[0][0])

        db_cur = db_conn.cursor()
        db_cur.execute("show max_connections")
        db_rows = db_cur.fetchall()
        max_conns = int(db_rows[0][0])    

        perc_used = float(cur_conns / max_conns)
        remaining = max_conns - cur_conns
    except:
        finish(2, 'Unable to run connection check on %s!' % server)
    if perc_used > 0.90:
        finish(1, '%.2f of db connections used on %s' % ((perc_used * 100.0), server))

    if perc_used > 0.95:
        finish(2, '%.2f of db connections used on %s' % ((perc_used * 100.0), server))

    if remaining < 10:
	finish(2, 'Only %s db connections left on %s, cur_conns = %s max_conns = %s' % (remaining, server, cur_conns, max_conns))

    try:
        db_cur = db_conn.cursor()
        db_cur.execute("create table test_table (test char(50))")
        db_cur.execute("commit")
        db_cur = db_conn.cursor()
        db_cur.execute("insert into test_table (test) values ('test')")
        db_cur.execute("commit")
        db_cur = db_conn.cursor()
        db_cur.execute("drop table test_table")
        db_cur.execute("commit")
    except:
        finish(2, 'Unable to create test table on %s!' % server)
             
finish(0, "Database OK - %s server(s) checked" % len(servers))
