#!/bin/bash

# LDAP query to get the ever-refreshing timestamp
CHECK_USER_DN="uid=cauth_watchdog,ou=people,dc=yandex,dc=net"

# Symas OpenLDAP instances service name
SERVICE_NAME="slapd-instances"

# Symas OpenLDAP init script configuration file
INSTANCES_CONFIG="/etc/default/slapd-instances-config"

# Symas OpenLDAP configuration file include with replication settings
REPLICATION_CONFIG="/etc/cauth-ldap-slave-slapd-config/replication-settings.conf"

# possible lag in seconds. if current lag lower then it monitoring returns warning
MAX_POSSIBLE_LAG=300




# Function: Exit code and description provider
# Usage: die [012] "Description"
# 0 - OK
# 1 - Warning
# 2 - Critical
die () {
	echo "$1;$2"
	exit 0
}


# Function: Get values from instances configuration file. Values are like "^NAME=value$"
valuef() {
        value=$( grep "${1}=" ${INSTANCES_CONFIG} | grep -v '#'\
                | awk -F "\"" '{ print $2 }')
        echo ${value}
}



# Check if ldapsearch utility is installed
if [ ! -f /usr/bin/ldapsearch ]
then
	die 2 "/usr/bin/ldapsearch utility not found"
fi


# Check if all slapd-instances are running via "service status"
service_status=$(service $SERVICE_NAME status &> /dev/null; echo $?)
if [ "$service_status" -ne "0" ]
then
	die 2 "Skipping replication check: service $SERVICE_NAME status exit code is not 0, ensure that all LDAP instances are runnig"
fi


# Get number of instances from init script configuration file
num_instances_in_config=$( valuef "num_instances" )


# Determine LDAP replication master and check it against regexp like "ldap://vs-cauth01mht.dev.os.yandex-team.ru:389"
replication_master_uri=$(grep -ioP "(?<=provider\=)\S*(?=\ .*)" $REPLICATION_CONFIG)
if [[ ! "$replication_master_uri" =~ ^ldap://[^[:space:]]+:[[:digit:]]+$ ]]
then
	die 2 "Cannot determine LDAP replication URI: failed checking it against regexp 'ldap://FQDN:PORT'. Check replication config file"
fi


# Get current watchdog timestamp from LDAP master server
master_watchdog_timestamp=$(/usr/bin/ldapsearch -xLLL -H $replication_master_uri -b $CHECK_USER_DN -s base cn 2>&1 | grep 'cn:' | awk -F "_" '{print $2}')
if [ "$(date -d @$master_watchdog_timestamp &> /dev/null; echo $?)" -ne "0" ]
then
	die 2 "Cannot determine watchdog timestamp from LDAP master"
fi


# Cycle through each instance, determine the LDAP port it's listening to, annd get the timestamp
instances_with_problems_num=$num_instances_in_config
for instance in $(seq 1 $num_instances_in_config)
do
	instance_daemon_options=$( valuef "ins${instance}_SLAPD_SERVICES")
	instance_ldap_host_port=$(echo "$instance_daemon_options" | grep -oP "(?<=ldap://)\S+(?=\/)")
	instance_watchdog_timestamp=$(/usr/bin/ldapsearch -xLLL -H ldap://$instance_ldap_host_port -b $CHECK_USER_DN -s base cn 2>&1 | grep 'cn:' | awk -F "_" '{print $2}')
	instance_lag=$(( master_watchdog_timestamp - instance_watchdog_timestamp ))
	if [ $instance_lag -lt $MAX_POSSIBLE_LAG ]
	then
		instances_with_problems_num=$(( $instances_with_problems_num - 1 ))
	fi
done


# Output check results
if [ $instances_with_problems_num -gt 0 ]
then
	die 2 "$instances_with_problems_num from total of $num_instances_in_config instances have replication lag above $MAX_POSSIBLE_LAG seconds or other problems."
else
	die 0 "OK, master timestamp: $master_watchdog_timestamp"
fi

exit 0
