#!/usr/bin/env bash
#
# Check if the given redis cluster node is healthy.
#
# Usage: check_redis_node.sh [-h host] [-p port]
#   -h  Redis host (default 127.0.0.1)
#   -p  Redis port (default 6379)
#

# Abort if we use an undeclared variable.
set -u

# Parse the command line flags.
OPTIONS="h:p:"
LONGOPTIONS="host:,port:"
PARSED=$(getopt --options="$OPTIONS" --longoptions="$LONGOPTIONS" --name "$0" -- "$@")
if [[ $? -ne 0 ]]; then
	exit 3
fi

eval set -- "$PARSED"

HOST="127.0.0.1"
PORT="6379"

while true; do
	case "$1" in
		-h|--host)
			HOST="$2"
			shift 2
			;;
		-p|--port)
			PORT="$2"
			shift 2
			;;
		--)
			shift
			break
			;;
	esac
done

# Send the node a PING and expect a PONG in response.
OUTPUT=$(timeout 1 redis-cli -h "$HOST" -p "$PORT" PING 2>&1)
STATUS=$?

if [[ $STATUS == 124 ]]; then
	echo "CRITICAL - PING timed out after 1 second"
	exit 2
elif [[ $STATUS != 0 ]] || [[ $OUTPUT == ERR* ]]; then
	echo "CRITICAL - $OUTPUT"
	exit 2
fi

# Get the cluster info.
OUTPUT=$(timeout 1 redis-cli -h "$HOST" -p "$PORT" CLUSTER INFO 2>&1)
STATUS=$?

if [[ $STATUS == 124 ]]; then
	echo "CRITICAL - CLUSTER INFO timed out after 1 second"
	exit 2
elif [[ $STATUS != 0 ]] || [[ $OUTPUT == ERR* ]]; then
	echo "CRITICAL - $OUTPUT"
	exit 2
fi

CLUSTER_INFO="$OUTPUT"

# Parse the cluster info output to find the state.
if [[ "$CLUSTER_INFO" =~ cluster_state:([a-z]+) ]]; then
	CLUSTER_STATE="${BASH_REMATCH[1]}"
else
	echo "CRITICAL - failed to parse cluster state"
	exit 2
fi

if [[ "$CLUSTER_STATE" != "ok" ]]; then
	echo "CRITICAL - cluster_state is $CLUSTER_STATE"
	exit 2
fi

# Parse the cluster info output to get the number of ok slots.
if [[ "$CLUSTER_INFO" =~ cluster_slots_ok:([0-9]+) ]]; then
	CLUSTER_SLOTS="${BASH_REMATCH[1]}"
else
	echo "CRITICAL - failed to parse cluster slots"
	exit 2
fi

# This number might seem arbitrary but it's part of the spec.
if [[ "$CLUSTER_SLOTS" != "16384" ]]; then
	echo "CRITICAL - $CLUSTER_SLOTS of 16384 slots are ok"
	exit 2
fi

# Get the cluster nodes.
OUTPUT=$(timeout 1 redis-cli -h "$HOST" -p "$PORT" CLUSTER NODES 2>&1)
STATUS=$?

if [[ $STATUS == 124 ]]; then
	echo "CRITICAL - CLUSTER NODES timed out after 1 second"
	exit 2
elif [[ $STATUS != 0 ]] || [[ "$OUTPUT" == ERR* ]]; then
	echo "CRITICAL - $OUTPUT"
	exit 2
fi

CLUSTER_NODES="$OUTPUT"

# TODO Implement `redis-cli --latency --raw` once we have upgraded to Redis 4.0

# Find the line with "myself" to get my info.
MYSELF=$(grep "myself" <<< "$CLUSTER_NODES")
if [[ -z "$MYSELF" ]]; then
	echo "CRITICAL - failed to find myself"
	exit 2
fi

# Get the 3rd column which contains flags, such as master or slave.
MYSELF_FLAGS=$(cut -d' ' -f3 <<< "$MYSELF")

# Get the slots we own.
SLOTS=$(cut -d' ' -f'9-' <<< "$MYSELF")

# If we're a master with slots, do some extra work.
if [[ "$MYSELF_FLAGS" =~ master ]] && [[ ! -z "${SLOTS}" ]]; then
	# Get the 1st column which is our ID.
	MYSELF_ID=$(cut -d' ' -f 1 <<< "$MYSELF")

	# Find any slaves that claim to replicate our ID.
	REPLICAS=$(grep "$MYSELF_ID" <<< "$CLUSTER_NODES" | grep 'slave')

	# Remove any unhealthy nodes.
	REPLICAS=$(grep -E --invert-match 'fail|noaddr|handshake' <<< "$REPLICAS")

	# If we don't have any replicas, it's critical.
	if [[ -z "$REPLICAS" ]]; then
		echo "WARNING - node is a master with no replicas"
		exit 1
	fi
fi

echo "OK"
exit 0
