#!/bin/bash
## This checks LDAP Server status by connecting with ldapsearch local and on ldap-write-aws and reading the cn=Replication,cn=Monitor object
## This Object contains server sync info (CSN), which is evaluated for the lag time of the last update, and alerts accordingly

# Set default time of 1hr for sync warning, 1day for crit
SYNC_WARN_TIME=3600
SYNC_CRIT_TIME=86400
CON_TIMEOUT=10
LDAP_SERVER='localhost'
# Due to new ACLs in DS6.5+ we nee to auth for replication stats
REPLICATION_DN='cn=replicationadmin,cn=Administrators,cn=admin data'
REPLICATION_PW_FILE=/var/lib/sandstorm-agent/secrets/resource-opendj_user_replicationadmin.sandstorm
REMOTE_SERVER='ldap-write-aws.internal.justin.tv'
EXIT_CODE=0
EXIT_MSG=""

function OK {
    echo "OK: ${1}"
    exit 0
}

function WARN {
    if [[ $EXIT_CODE -gt 0 ]] ; then
      EXIT_MSG="${EXIT_MSG}, WARNING: ${1}"
    else
      EXIT_MSG="WARNING: ${1}"
    fi
    EXIT_CODE=1
}

function CRIT {
    echo "CRITICAL: ${1}"
    exit 2
}

function UNKN {
    echo "UNKNOWN: ${1}"
    exit 3
}

function EXIT_WARN {
    echo $EXIT_MSG
    exit $EXIT_CODE
}

function usage {
    echo "$0 [-w sec] [-c sec] [-h host] [-t sec]" >&2
    echo "  This checks LDAP servers "
    echo "  -w sec   Set WARN seconds of sync lag. Currently=${SYNC_WARN_TIME}" >&2
    echo "  -c sec   Set CRITICAL seconds of sync lag. Currently=${SYNC_CRIT_TIME}" >&2
    echo "  -h host  Which LDAP server to connect to. Currently=${LDAP_SERVER}" >&2
    echo "  -t sec   Set timeout for LDAP connection attempt. Currently=${CON_TIMEOUT}" >&2
    UNKN "Invalid optarg passed to check"
}

while getopts "c:w:" opt; do
    case $opt in
        c) SYNC_CRIT_TIME=$OPTARG ;;
        w) SYNC_WARN_TIME=$OPTARG ;;
        h) LDAP_HOST=$OPTARG ;;
        *) usage ;;
    esac
done

## If no opendj on this machine, return OK
dpkg -l 2>/dev/null |grep -q opendj || OK "(${LDAP_SERVER} not an LDAP server)"

## Check if we are in the ldap-ro DNS round-robbin
service_ip=$(facter ipaddress_team0)
grep -q $service_ip <(nslookup ldap-ro.internal.justin.tv) || WARN "${service_ip} Not in DNS ldap-ro round robbin"

## Check that we can connect to LDAP within 10s
## Check that the Server status timestamp is within SYNC_* times

local_ldap_resp=$(ldapsearch -x -H ldaps://$LDAP_SERVER -D "$REPLICATION_DN" -y "$REPLICATION_PW_FILE" -o nettimeout=$CON_TIMEOUT -o ldif-wrap=no -s sub -b "cn=Replication,cn=monitor" \* + 2>/dev/null) || CRIT "Failed to connect to LDAP server on ${LDAP_SERVER}"

if $(echo "${local_ldap_resp}"|grep -q '^ds-mon-server-state' ) ; then
    #upgraded DS server, different timestamp, lots more work to convert from hex to sec
    local_sync_time=$(echo "$local_ldap_resp" | grep 'server-state: 01' |awk '{print "0x" substr($2,5,12)}'| xargs printf "%d\n"|sort -n|tail -1|cut -c-10)
else
    local_sync_time=$(echo "$local_ldap_resp" | grep server-state |grep -v 'server-state: 01'| sort -nk9 | tail -1 | awk '{print substr($9,1,10)}' )
fi

primary_ldap_resp=$(ldapsearch -x -H ldaps://$REMOTE_SERVER -D "$REPLICATION_DN" -y "$REPLICATION_PW_FILE" -o nettimeout=$CON_TIMEOUT -o ldif-wrap=no -s sub -b "cn=replicas,cn=Replication,cn=monitor" \* +) || CRIT "Failed to connect to LDAP server on ${REMOTE_SERVER}"

if $(echo "${primary_ldap_resp}"|grep -q '^ds-mon-server-state' ) ; then
    primary_sync_time=$(echo "$primary_ldap_resp" | grep 'server-state: 01' |awk '{print "0x" substr($2,5,12)}'| xargs printf "%d\n"|sort -n|tail -1|cut -c-10)
else
    primary_sync_time=$(echo "$primary_ldap_resp" | grep server-state | sort -nk9 | tail -1 | awk '{print substr($9,1,10)}' )
fi

sync_drift=$(( primary_sync_time - local_sync_time ))
last_change=$(( $(date +%s) - primary_sync_time ))

# Filter out empty and weird results
[ -z $sync_drift ] || [[ $sync_drift =~ [^0-9-] ]] && UNKN "Sync Time or status unknown ${sync_drift}"

[ $sync_drift -lt 0 ] && sync_drift=$((-$sync_drift))
(( sync_drift > SYNC_CRIT_TIME )) && CRIT "Sync time ${sync_drift} is > ${SYNC_CRIT_TIME}"
(( sync_drift > SYNC_WARN_TIME )) && WARN "Sync time ${sync_drift} is > ${SYNC_WARN_TIME}"
(( EXIT_CODE != 0 )) && EXIT_WARN
OK "sync lag ${sync_drift}s last change age ${last_change}s"
