#!/bin/sh

me=${0##*/} # strip path
me=${me%.*} # strip extension

export HOME=/home/monitor

IGNORE_RAID_PAT="raid_not_ignored"
TMP="/tmp"
PATH=/bin:/sbin:/usr/bin:/usr/sbin
TSTAMP=$(date '+%s')
ALARM_PAT='error|warning|fail|\(da[0-9]+:[a-z0-9]+:[0-9]+:[0-9]+:[0-9]+\)|mfi[0-9]|ip_conntrack:\ table\ full|messages\ suppressed|logical\ blocks:\ \([0-9].[1-9][1-9]\ TB|task\ abort'
IGNORE_PAT='acpi0:\ reservation|ACPI\ _OSC\ request\ failed|AcpiOsExecute:\ failed\ to\ enqueue\ task|acpi_throttle|acpi_throttle[0-9]:\ failed\ to\ attach\ P_CNT|ACPI\ Warning|aer_init:\ AER\ service\ init\ |aer_init:\ AER\ service\ init\ fails|aer:\ probe\ of|arplookup|assertion\ \(\(int\)tcp_packets_in_flight|(at [0-9a-f]+)?\ rip[: ][0-9a-f]+\ rsp[: ][0-9a-f]+\ error[: ]|Attempt\ to\ query\ device\ size\ failed|barrier-based\ sync\ failed|check_tsc_sync_source\ failed|ehci_hcd|FPDMA\ QUEUED|failed\ SYNCOOKIE\ authentication|Failed\ to\ disable\ AUX\ port|failed\ to\ enable\ AA|Failed\ to\ enable\ APEI\ firmware\ first\ mode|Fast\ TSC\ calibration\ failed|GHES:\ Poll\ interval|ifa_add_loopback_route:\ insertion\ failed|igb:|ipfw:\ pullup\ failed|Marking\ TSCmfi|MOD_LOAD|module_register_init:\ MOD_LOAD|MSI\ interrupts|nfs\ send\ error\ 32\ |NON-FATAL|NO_REBOOT|optimal|page\ allocation\ failure|PCI\ error\ interrupt|pid\ [0-9]+|rebuild|reset\ failed|Resume\ from\ disk\ failed|revalidation\ failed|rwsem_down|smb|softreset\ failed|START_STOP\ FAILED|swap_pager_getswapspace|thr_sleep|uhub[0-9]|ukbd|usbd|USB_ERR_TIMEOUT|uses\ 32-bit\ capabilities|ZFS\ is\ considered|ipmi_si:\ Interface\ detection\ failed|ICMPv6\ checksum\ failed|ocraas|push-client|Tikaite|segfault|ERST|checktime\ reached,\ running\ e2fsck\ is\ recommended|maximal\ mount\ count\ reached,\ running\ e2fsck\ is\ recommended|SATA\ NCQ\ Fail\ All\ Commands\ After\ Error|failed:\ 0|dev\ error\ handler|critical\ medium\ error'
CRIT_PAT='REJECT|I/O|defect|mechanical|retrying|broken|degraded|offline|failed|unconfigured_bad|domain_links.*segfault|EXT4-fs.*Remounting\ filesystem\ read-only|EXT4-fs\ error'

while getopts "w:rs:" OPTION
do
    case $OPTION in
        w)
            WATCH_LAST="$OPTARG"
        ;;
        r)
            egrep -q 'md' /proc/mdstat && IGNORE_RAID_PAT="sd[a-z]"
        ;;
        s)
            if [ -f "$OPTARG" ]
            then
                . "$OPTARG"
            fi
        ;;
    esac
done

WATCH_LAST=${WATCH_LAST:-3600} # seconds, e.g. 3 hours

[ -d $TMP ] || mkdir -p "$TMP"
[ -s "$TMP/$me.dmesg.prev" ] || touch "$TMP/$me.dmesg.prev"
[ -s "$TMP/$me.msg.prev" ] || touch "$TMP/$me.msg.prev"

die () {
    echo "$1;$2"
    exit 0
}

if [ -f /usr/local/sbin/autodetect_environment ] ; then
    is_virtual_host=0
    . /usr/local/sbin/autodetect_environment >/dev/null 2>&1 || true
    if [ "$is_virtual_host" -eq 1 ] ; then
        die 0 "OK"
    fi
fi

if grep 'model name' /proc/cpuinfo | grep -q 'KVM'
then
    die 0 "OK"
elif sudo grep /var/log/dmesg -e 'Booting paravirtualized kernel on KVM' -q 2>/dev/null
then
    die 0 "OK"
fi

dmesg | tail -n +2 | grep -i -E "$ALARM_PAT" | grep -v -E "$IGNORE_PAT" | grep -v -P "$IGNORE_RAID_PAT" > "$TMP/$me.dmesg.cur"

diff "$TMP/$me.dmesg.prev" "$TMP/$me.dmesg.cur" 2>/dev/null | sed -e '/> / {
        s///
        p
}
d
' | awk -v t="$TSTAMP" '{print t, $0}' >> "$TMP/$me.msg.prev"

awk -v p=$((TSTAMP-WATCH_LAST)) '$1>=p {print}' "$TMP/$me.msg.prev" > "$TMP/$me.msg.cur"
mv "$TMP/$me.dmesg.cur" "$TMP/$me.dmesg.prev" 2>/dev/null
mv "$TMP/$me.msg.cur" "$TMP/$me.msg.prev" 2>/dev/null
[ -s "$TMP/$me.msg.prev" ] || die 0 "OK"

c_err=$(grep -i -E "$CRIT_PAT" "$TMP/$me.msg.prev" | tail -n 1 | sed -e 's/^[0-9]* //')
[ -n "$c_err" ] && die 2 "$c_err"
err=$(tail -n 1 "$TMP/$me.msg.prev" | sed -e 's/^[0-9]* //')
die 1 "$err"
