#!/bin/bash
# Nagios check to go critical if a # of specific processes gets over 2 days old.
# Usage: check_process_age.sh "<process_name>"

# We could make these args, but this script is just for fun. ;)
WARN_COUNT=1
CRIT_COUNT=3
# Can only be 1-9
DAYS_OLD=2

if [ "$1" = "" ]; then
    process="hls[_]quicksync.py"
else
    process=$1
fi

all_processes=$(ps ax -o etime,command --sort=start_time)
all_proc_count=$(echo -n "$all_processes" | wc -l)
our_processes=$(echo "$all_processes" | grep $process)
our_proc_count=$(echo -n "$our_processes" | wc -l)
# This is where the magic happens. [2-9] makes this match anything older than 2 days.
old_processes=$(echo "$our_processes" | egrep "([0-9][0-9]+|[${DAYS_OLD}-9])-[0-9]+:[0-9]+:[0-9]+")
old_proc_count=$(echo -n "$old_processes" | wc -l)

MSG="OK! $process processes: $our_proc_count (none older than ${DAYS_OLD} days)"
STATUS=0
if [ $old_proc_count -gt $CRIT_COUNT ]; then
    MSG="CRITICAL! $process process has $old_proc_count instances running longer than ${DAYS_OLD} days."
    STATUS=2
elif [ $old_proc_count -gt $WARN_COUNT ]; then
    MSG="WARNING! process process has $old_proc_count instances running longer than ${DAYS_OLD} days."
    STATUS=1
fi

# Add pnp data.
MSG="$MSG | '$process'=$our_proc_count 'old_$process=$old_proc_count 'all_procs'=$all_proc_count"

echo $MSG
exit $STATUS
