#!/usr/bin/env bash

count=0
MAX=30

function log {
    echo "$(date): $*"
}

while : ; do
    # only proceed if we are using a lot of memory
    if [[ "$(( $(free | grep Mem | awk '{print $3}') / 1024 / 1024 ))" -lt 20 ]]; then
        log "Memory usage under 20GB threshold, Continue..."
        sleep 10
        count=0
        continue
    fi

    if consul operator raft list-peers | grep "$(hostname)" | grep -q leader; then
        log "Currently a leader, just continue..."
        sleep 60
        count=0
        continue
    fi

    state=$([[ $(consul members 2>/dev/null | wc -l) -gt 0 ]] && echo good || echo bad)

    if [[ "$state" == "bad" ]]; then
        if [[ "$count" -lt "$MAX" ]]; then
            (( count++ ))

            log "consul potentially not in a good state, retrying..: $count of $MAX"
            sleep 2
        else
            log "Consul cluster has been in a bad state after $count tries"

            log "Validate that cluster has 3 other members"
            if [[ $(consul operator raft list-peers | grep -c true) -gt 3 ]]; then
                log "Safe to restart cluster"
                systemctl restart consul
            else
                log "Not safe to restart cluster. Bailing..."
            fi

            # reset
            count=0
        fi
    else
        log "Good; proceeding..."
        count=0
        sleep 5
    fi
done
