#!/bin/bash

# Navigate into services directory
cd /etc/service

# Identify which service will need to be monitored
while getopts "s:e:k:" flag
do
  case "$flag" in
    s) service="$OPTARG";;
    e) environment="$OPTARG";;
    k) pd_key="$OPTARG";;
  esac
done

logfile="/var/log/${service}/${service}_flap.log"

echo $$> "/var/run/${service}/${service}_flap.pid"

# if service is not specified, exit with an error
if [ -z "$service" ]; then
    echo "$(date): No service specified" >> "$logfile"
    exit 1
fi

if [[ $environment != "prod" ]]; then
    exit 0
fi

# Initialize and store timestamp
start_time=$(date +%s)
echo "$(date): starting at ${start_time}" >> "$logfile"
current_pid=$(sudo svstat /etc/service/${service} | sed -e 's/.* (pid \([0-9]*\)) .*/\1/')
threshold=3
flap_count=0
host=$(hostname --fqdn)
# Initialize pid array to log out to file
pids=( "$current_pid" )
reset=false

while true; do
    current_time=$(date +%s)
    # only check for flaps every 60 seconds. reset our counter every 60 seconds
    # regardless.
    if [[ $(( current_time - start_time )) -gt 60 ]]; then
        pidsDesc="[ ${pids[*]} ]"
        # if more than 3 flaps have occurred, trigger incident
        if [[ "$flap_count" -gt "$threshold" ]]; then
            alert_msg="Flap threshold surpassed on $host,$service flapped ${flap_count} times at $(date). $service has run on pids $pidsDesc and is now on $current_pid environment is ${environment}"
            payload="{\"service_key\": \"$pd_key\",\"incident_key\":\"$(date '+%d%H')\",\"event_type\": \"trigger\",\"description\": \"$alert_msg\",\"client\": \"${service} Flap Watch Script\"}"

            echo "$(date) sending payload: ${payload}" >> "$logfile"

            # Send to pagerduty. Since we are specifying the incident_key,
            # subsequent flag messages will be grouped.
            curl -XPOST https://events.pagerduty.com/generic/2010-04-15/create_event.json -d @<(cat <<EOF
$payload
EOF) >> "$logfile"

            curl_exit_code=$?
            if [ $? != 0 ]; then
                echo "$(date): failed to send pagerduty message: ${curl_exit_code}" >> "$logfile"
            fi
            echo "$(date): flap triggered at $(date)" >> "$logfile"
        fi


        # Update log file if pid history is different
        previous_pids=$(tail -1 "$logfile" | sed 's/.* \(\[ [0-9]* \]\).*/\1/')
        if [[ "$previous_pids" != "$pidsDesc" ]]; then
            echo "${service}'s pids are $pidsDesc, last updated at $(date)" >> "$logfile"
        fi

        reset=true
        start_time=$current_time
        flap_count=0
    fi

    # Check against service's current pid and increment if different
    latest_pid=$(sudo svstat /etc/service/${service} | sed -e 's/.* (pid \([0-9]*\)) .*/\1/')
    if $reset; then
        pids=( "$latest_pid" )
        current_pid="$latest_pid"
        reset=false
    elif [ "$current_pid" != "$latest_pid" ]; then
        echo "$(date): flapped" >> "$logfile"
        ((flap_count++))
        pids=("${pids[@]}" "$latest_pid")
        current_pid=$latest_pid
    fi
    sleep 5
done
