#!/bin/bash


function log_message
{
    echo "[$(date +'%Y-%m-%d %H:%M:%S')] $1" >> "$log_file"
}



log_file="/var/log/mworker-watchdog.log"

working_dir="/var/tmp/mworker-watchdog"
tmp_dir="${working_dir}/"`date "+%Y-%m-%d_%H-%M"`

active_master_pids_file="${tmp_dir}/active_master_pids"
active_master_ps_file="${tmp_dir}/active_master_ps"

running_master_pids_file="${tmp_dir}/running_master_pids"
running_master_ps_file="${tmp_dir}/running_master_ps"

inactive_master_ps_file="${tmp_dir}/inactive_master_ps"
inactive_master_pids_file="${tmp_dir}/inactive_master_pids"

old_inactive_master_pids_file="${tmp_dir}/old_inactive_master_pids"

stop_kill_file="${working_dir}/stop_kill"
older_diff=60


mkdir -p ${tmp_dir}



timetail -t java -n 60  /var/log/mpfs/requests-tskv.log | grep 'INSPECT_' | grep receive | grep amqp | grep -oP "pid=\K[0-9]+" | sort -n | uniq > ${active_master_pids_file}
ps -f -p $(cat ${active_master_pids_file}) | grep -vP '^UID' > ${active_master_ps_file}

log_message "MPFS requests log: `cat ${active_master_pids_file} | wc -l ` master pids were found"


cat /var/run/mpfs/worker*.pid | sort -n > ${running_master_pids_file}
ps -f -p $(cat ${running_master_pids_file}) | grep -vP '^UID' > ${running_master_ps_file}

log_message "Running processes: `cat ${running_master_pids_file} | wc -l` master pids were found"


diff -d ${active_master_pids_file} ${running_master_pids_file} | grep -oP '^> \K[0-9]+' > ${inactive_master_pids_file}
inactive_master_pids_count=`cat ${inactive_master_pids_file} | wc -l`

current_ts=$(date "+%s")
touch ${old_inactive_master_pids_file}
cat ${inactive_master_pids_file} | while read pid  ; do
    start_ts=$(stat /proc/${pid}/cmdline --printf '%Y')
    diff=$((current_ts - start_ts))
    if [ "${diff}" -gt "${older_diff}" ] ; then
        echo ${pid} >> ${old_inactive_master_pids_file}
    fi
done

old_inactive_master_pids_count=`cat ${old_inactive_master_pids_file} | wc -l`

if [ "$inactive_master_pids_count" -gt "0" ]; then
    ps -f -p $(cat ${inactive_master_pids_file}) > ${inactive_master_ps_file}
    log_message "Found ${inactive_master_pids_count} inactive master pids, check tmp_dir (${tmp_dir})"
    if [ -e "${stop_kill_file}" ]; then
        log_message "Stop-kill file was found, exit now"
    else
        
        if [ "$old_inactive_master_pids_count" -gt "0" ]; then
            log_message "Trying to send SIGTERM"
        
            kill $(cat ${old_inactive_master_pids_file})
            sleep 5

            log_message "Trying to send SIGKILL"
            kill -9 $(cat ${old_inactive_master_pids_file})
        else
            log_message "No older than ${older_diff} sec inactive workers were found"
        fi
    fi

else
    log_message "All running master pids were found in logs. Cleaning up tmp_dir."
    #rm -rf ${tmp_dir}

fi

# cleanup
find ${working_dir} -mindepth 1 -type d -mtime +2 -print | xargs -I % rm -rf %


