#!/bin/bash
# Simple script to check amazon ssm agent health for Nagios.
# Can be used with NRPE or NRDP.
# OK: agent is running
# WARN: agent was not updating logs for more than $warn minutes
# CRIT: agent was not updating logs for more than $crit minutes

set -u -e

NAME=amazon-ssm-agent

crit=20
warn=10

usage() {
    echo $1
    echo
    echo "Usage: $(basename $0) [-c crit_minutes] [-w warn_minutes]"
    exit 1
}

while [ $# -gt 0 ]; do
    case "$1" in
        -c)
            crit=${2:-}
            (( crit < 1 )) && usage "-c takes integer positive argument"
            shift 2
            ;;
        -w)
            warn=${2:-}
            (( warn < 1 )) && usage "-w takes integer positive argument"
            shift 2
            ;;
        *)
            usage "unknown option $1"
            ;;
    esac
done

sudo /bin/journalctl -u $NAME --since -${crit}m | grep -q $NAME || {
    echo "CRITICAL: last amazon ssm agent activity was more than $crit minutes ago"
    exit 2
}

sudo /bin/journalctl -u $NAME --since -${warn}m | grep -q $NAME || {
    echo "WARNING: last amazon ssm agent activity was more than $warn minutes ago"
    exit 1
}

echo "OK: amazon ssm agent is healthy"
exit 0
