#!/bin/sh

#
# runtime cloud log rotating tool
# https://bb.yandex-team.ru/projects/SEARCH_INFRA/repos/rotmissingdocs
#
# vim: set tabstop=4 shiftwidth=4 noexpandtab:
#

#
# Subroutines
#

check_perms() {
	local _file
	_file=${1};
	[ `uname` = Linux ] || return 1 # FreeBSD not supported
	[ -O "${_file}" -a -G "${_file}" ] || return 1 # owner and group match effective IDs
	[ `stat --format=%a "${_file}" | cut -c 3` -le 5 ] || return 1 # others not allowed to write
	return 0
}

erase_unchanged_older_than() {
	local _age_days _dir _file _files _pids _ret
	[ -n "$1" ] && { _dir="$1"; shift; } || return 1
	[ -n "$1" ] && { _age_days="$1"; shift; } || return 1
	_files="`find "$_dir" -type f -mtime +$_age_days`"
	[ $? -eq 0 ] || return 1
	_ret=0
	echo "$_files" | while read _file; do # support files with spaces
		[ -e "$_file" ] || continue
		_pids=`file_get_attached_pids $_file [a-z]`
		if [ -n "${_pids}" ]; then
			log WARN "Skiping '${_file}' (still opened by ${_pids})"
			continue # don't remove opened files
		fi
		log DEBUG "Removing '${_file}' (unchanged more than ${_age_days} days)"
		rm -f "$_file" || _ret=1
	done
	find "$_dir" -type d \! -name "`basename "$_dir"`" -mtime +$_age_days -empty -delete || \
		_ret=1 # remove remaining empty dirs
	return $_ret
}

evict() {
	[ `uname` != Linux ] && return
	[ -d "$1" ] || return 1
	/usr/sbin/vmtouch -e -f -m100g -q "$1"
}

file_get_attached_pids() {
	local _file _mode
	[ -n "$1" -a -f "$1" ] && { _file="${1}"; shift; } || return 1
	[ -n "$1" ] && { _mode="$1"; shift; } || _mode="[a-z]" # all modes by default, specify 'r' for read or 'w' for write mode

	# there is no reason to check exit code, lsof returns 1 for all errors
	# including case when all ok, but there is no procs opened file
	echo `lsof "${_file}" | awk '!/PID/ && $4 ~ /'${_mode}'/ {print $2}' | sort -rn`
}

# returns value for arbitrary named variable (means run time composed)
get_val() {
	eval "echo \"\${${1}}\""
}

ini_get_sections() {
    python -c '
from ConfigParser import SafeConfigParser
config = SafeConfigParser()
config.read("'${1}'")
for section in config.sections():
    print("%s" % section)
'
}

load_conf_file() {
	[ -f "${1}" ] || return 0
	check_perms "${1}" || \
		{ log ERROR "Ignoring conf file '${1}' (wrong permissions)"; return 0; }
	sh -n "${1}" >/dev/null 2>&1 || \
		{ log ERROR "Ignoring conf file '${1}' (wrong syntax)"; return 0; }
	. "${1}" # ok, source it
}

log() {
	local _line _msg
	case ${1} in
		CRIT|CRITICAL|ERR|ERROR) ;;
		DEBUG) [ -n "${VERBOSE}" ] && [ "${VERBOSE}" -ge 3 ] || return 0 ;;
		INFO)  [ -n "${VERBOSE}" ] && [ "${VERBOSE}" -ge 2 ] || return 0 ;;
		WARN|WARNING) [ -n "${VERBOSE}" ] && [ "${VERBOSE}" -ge 1 ] || return 0 ;;
		*) echo >&2 "unsupported log class '${1}' passed"; return 1 ;;
	esac
	if [ -n "${2}" ]; then # arg mode
		_msg="[`date +\"%Y-%m-%d %H:%M:%S\"` $$] $@"
		[ -t 1 ] && echo >&2 "${_msg}"
		[ -n "${SELF_LOG}" ] && echo >> "${SELF_LOG}" "${_msg}"
	else
		while read _line; do # stdin mode
			_msg="[`date +\"%Y-%m-%d %H:%M:%S\"` $$] ${1} ---> ${_line}"
			[ -t 1 ] && echo >&2 "${_msg}"
			[ -n "${SELF_LOG}" ] && echo >> "${SELF_LOG}" "${_msg}"
		done
	fi
}

run_hooks() {
	local _dir _hook _log_files _log_name _tout_cmd
	[ -n "$1" -a -d "$1" ] && { _dir="${1}"; shift; } || return 1
	[ -n "$1" ] && { _log_files="${1}"; shift; } || _log_files=""
	[ -n "${tout_bin}" ] && _tout_cmd="${tout_bin} --kill-after=5 ${HOOKS_TIMEOUT}" || tout_cmd="" # don't use if not exists (FBSD for example)
	for _hook in ${_dir}/*; do
		[ -f "${_hook}" -a -x "${_hook}" ] || continue
		check_perms "${_hook}" || continue
		if [ -n "${_log_files}" ]; then
			for _log_name in ${_log_files}; do
				log INFO "Running '${_tout_cmd} ${_hook} ${_log_name}'"
				${_tout_cmd} ${_hook} ${_log_name}
			done
		else # run once if no log files specified (useful for furture hooks)
			${_tout_cmd} ${_hook}
		fi
	done
}

#
# Main code
#

# Loading conf (if exists), SEPE-10150
load_conf_file "/etc/rotmissingdocs/rotmissingdocs.conf"

while getopts "bd:D:efhlnNvus:" optname ; do
	case "$optname" in
		d)
			ERASE_ABAND_LOGS_OLDER_THAN=${OPTARG}
		;;
		D)
			ERASE_ABAND_CACHES_OLDER_THAN=${OPTARG}
		;;
		f) # useful for manual run
			FORCE=yes
		;;
		N|b|e|l|n|v)
			# Dummy for backward compatibility
		;;
		s)
			CURRENT_LOG_SIZE_MAX=${OPTARG}
		;;
		h|*)
			echo "`basename $0` -- runtime cloud log rotating tool"
			echo "usage: `basename $0` [-dDfhs]"
			echo "    options:"
			echo "        -d <days> - erase abandoned log files older than specified amount of days, by default - 90 days, empty value disables erasion"
			echo "        -D <days> - same as above, but for mmeta caches"
			echo "        -f - force (don't check avail space, rotate logs anyway)"
			echo "        -h - print this message"
			echo "        -s <Mb> - size in megabytes (max total size of current* logs)"
			exit 1
		;;
	esac
done

tout_bin=`which timeout`
[ -z "${tout_bin}" ] && [ -e "/usr/bin/timeout" ] && tout_bin="/usr/bin/timeout" # try direct path in case prev command was killed, SEPE-15803
if [ -n "${tout_bin}" ]; then
	tout_cmd="${tout_bin} --kill-after=180 300" # default timeout cmd
else
	tout_cmd="" # don't use if not exists (FBSD for example)
	log ERROR "Timeout util not found, hooks and some other commands will be runned without timeouts"
fi

# Defaults
: ${CACHEDIR="/db/bsconfig/webcache"}
: ${CURRENT_LOG_SIZE_MAX="65536"} # Mb
: ${ERASE_ABAND_CACHES_OLDER_THAN="3"} # in days
: ${ERASE_ABAND_LOGS_OLDER_THAN="30"} # in days
: ${ERASE_ABAND_LOGS_FIND_MAXDEPTH="1024"} # a non-negative integer
: ${ERASE_ABAND_LOGS_BACKUPS_OLDER_THAN="14"} # in days
: ${HOOKS_ENABLED="NO"}
: ${HOOKS_TIMEOUT="180"} # seconds
: ${LOGDIR="/usr/local/www/logs"}
: ${LOGS_PREFIX="current-"}
: ${LOGBACKUPDIR="/usr/local/www/backup_logs/"}
: ${MIN_FREE_SPACE="1024"}
: ${SELF_LOG="${LOGDIR}/${LOGS_PREFIX}`basename $0`-0.log"}
: ${SELF_LOG_MATCH="`basename ${SELF_LOG}`"}
: ${SELF_LOG_SIZE="10"} # Mb
: ${VERBOSE="3"} # 0(CRITs and ERRORs) 1(WARNs) 2(INFOs) 3(DEBUGs)

[ -t 1 ] || exec 2>>${SELF_LOG} # write stderr to logfile if not a terminal

log INFO "=== Started as $0 $@, PATH=$PATH ==="

[ -d "$LOGDIR" -a -w "$LOGDIR" ] || { echo >&2 "'$LOGDIR' is not a dir or isn't writeable"; exit 1; }
cd "$LOGDIR" || { echo >&2 "Cannot cd to '$LOGDIR'"; exit 1; }

log DEBUG 'Replace new lines in file names (if any) to "--SEPE-15917--"' # SEPE-15917
NL_PTTRN="*
*"
find . -name "$NL_PTTRN" -exec rename 's/\n/--SEPE-15917--/' {} \;

log DEBUG "Looking for files with prefix '${LOGS_PREFIX}' in '${LOGDIR}'"
generic_logs_list=`find "$LOGDIR" -maxdepth 1 -name "$LOGS_PREFIX*" -type f`

if [ "${FORCE}" = "yes" ]; then
	log INFO "Rotating logs (forced by opt)"
else
	FREE_SPACE=`df -m ${LOGDIR} | tail -1 | awk '{print $4};'`
	USED_BY_LOGS="$(du -mc $generic_logs_list 2>/dev/null | awk '$2 == "total" {print $1}' | tail -1)"
	if [ ${FREE_SPACE} -gt ${MIN_FREE_SPACE} -a ${USED_BY_LOGS} -lt ${CURRENT_LOG_SIZE_MAX} ]; then
		log INFO "Exit 0. (Free space: ${FREE_SPACE} Mb (> ${MIN_FREE_SPACE} Mb); Used by logs: ${USED_BY_LOGS} Mb (< ${CURRENT_LOG_SIZE_MAX} Mb)"
		exit 0
	fi
	log INFO "Rotating logs (Free space: ${FREE_SPACE} Mb (threshold: ${MIN_FREE_SPACE} Mb); Used by logs: ${USED_BY_LOGS} Mb (threshold: ${CURRENT_LOG_SIZE_MAX} Mb)"
fi

log DEBUG "Evicting logs from page cache"
evict "$LOGDIR"

CHECKFUSER=

log INFO "Moving currents to PREVs"
for file in $generic_logs_list; do
	newname="`basename $file`"
	for grp in ${CUSTOM_GROUPS} "SELF_LOG"; do
		if [ `echo "${newname}" | egrep $(get_val ${grp}_MATCH)` ]; then # custom size log
			size="$(du -mc "${newname}" 2>/dev/null | awk '$2 == "total" {print $1}' | tail -1)"
			if [ "${size}" -le "`get_val ${grp}_SIZE`" ]; then
				log DEBUG "Skiping '${file}' (less than it's personal size limit: ${size} Mb vs. `get_val ${grp}_SIZE` Mb)"
				continue 2 # 'next' for outer loop
			fi
		fi
	done
	newname="${newname##${LOGS_PREFIX}}"
	newname="$LOGDIR/$newname.PREV"
	pids=`file_get_attached_pids "$newname" "[a-z]"`
	if [ -n "${pids}" ]; then
		log DEBUG "kill -KILL ${pids} (still keep opened '${newname}')"
		kill -KILL ${pids} >/dev/null 2>&1
	fi
	log DEBUG "Moving '$file' to '$newname'"
	mv -f "$file" "$newname"
	[ "${file}" = "${SELF_LOG}" ] && continue # exclude self log to avoid self killing
	CHECKFUSER="$CHECKFUSER $newname"
done

log INFO "Sending reopenlog signals"
if [ -n "$CHECKFUSER" ] ; then
	[ ${HOOKS_ENABLED} = "YES" ] && run_hooks /etc/rotmissingdocs/hooks/prerotatefile "$CHECKFUSER"
	if [ -e /db/bin/bsconfig ]; then
		log DEBUG "Running 'bsconfig reopenlog'"
		su loadbase -c "${tout_cmd} /db/bin/bsconfig reopenlog --batch-mode"
	else
		log DEBUG "/db/bin/bsconfig absent, skipped"
	fi
	log DEBUG "Asking iss-agent to reopenlogs"
	if [ -s /place/db/iss3/iss-agent.jar ]; then
		# ISS response time often longer than minute, so, try to connect several times,
		# but if connected - wait for reply to the end of times (to avoid multiple reopen requests)
		${tout_cmd} wget -qO /dev/null --server-response --tries=8 --retry-connrefused --connect-timeout=8 --read-timeout=180 \
			"http://localhost:25536/instances/reopenlogs" \
			|| log ERROR "reopenlogs request to iss-agent has failed. Code $?"
	fi
	[ ${HOOKS_ENABLED} = "YES" ] && run_hooks /etc/rotmissingdocs/hooks/postrotatefile "$CHECKFUSER"
	sleep 10 # give time for some slow process (such as push-client) to release it
fi

log INFO "Looking for processes still writing to moved files"
for file in $CHECKFUSER ; do
	pids=`file_get_attached_pids "$file" w`
	[ -n "${pids}" ] || continue
	log WARN "Kill procs for '${file}' (-KILL ${pids})"
	kill -KILL ${pids} >/dev/null 2>&1
done

log INFO "Looking for STD{ERR,OUT} files in active bsconfig/ISS instances dirs"
iss_meta=`wget -q --tries=1 --timeout=5 --read-timeout=20 -O - "http://localhost:25536/instances/active"`
iss_dirs=`echo "${iss_meta}" | jq --raw-output 'map(.instanceDir) | .[]'`
hardcoded="loop-httpsearch.debug httpsearch.err httpsearch.out yandex.log" # may be hardcoded somewhere else, so keep them
for instancedir in /db/bsconfig/configactive/*/*:* ${iss_dirs}; do
	[ -f "${instancedir}/instancectl.conf" ] && instancectl_conf="instancectl.conf" || instancectl_conf="loop.conf"
	files=`ini_get_sections "${instancedir}/${instancectl_conf}"`
	if [ "${?}" -eq 0 ] && [ -n "${files}" ] ; then
		files=`for section in ${files}; do echo ${section}.err ${section}.out; done`
	else
		log ERROR "Failed to get section names from '${instancedir}/${instancectl_conf}'"
	fi
	for fname in `echo ${files} ${hardcoded} | tr " " "\n" | sort -u`; do # go through uniq names
		if [ -f "$instancedir/$fname" -a -s "$instancedir/$fname" ]; then
			# loop scripts and daemons don't reopen theirs stderr/out, that's why cp and truncate here
			log DEBUG "Rotating (cp && truncate) '$instancedir/$fname'"
			cp -pf "$instancedir/$fname" "$instancedir/$fname.PREV"
			cat /dev/null > "$instancedir/$fname"
		fi
	done
done

if [ -n "$ERASE_ABAND_LOGS_OLDER_THAN" ]; then
	log INFO "Looking for abandoned (unchanged more than ${ERASE_ABAND_LOGS_OLDER_THAN} days) files in '${LOGDIR}'"
	for file in `find "$LOGDIR" -maxdepth ${ERASE_ABAND_LOGS_FIND_MAXDEPTH} -type f -mtime +$ERASE_ABAND_LOGS_OLDER_THAN`; do
		pids=`file_get_attached_pids ${file} [a-z]`
		if [ -z "${pids}" ]; then
			log DEBUG "Removing abandoned file '${file}'"
			rm -f "$file"
		else
			log WARN "Skipping '${file}' (not abandoned, still opened by ${pids} in spite of outdated mtime)"
		fi
	done
fi

if [ -n "$ERASE_ABAND_CACHES_OLDER_THAN" ]; then
	log INFO "Looking for abandoned (unchanged more than ${ERASE_ABAND_CACHES_OLDER_THAN} days) cache dirs in '${CACHEDIR}'"
	for dir in $CACHEDIR/*; do
		[ -e "$dir" ] || continue
		[ -z "`find "$dir" -type f -mtime -$ERASE_ABAND_CACHES_OLDER_THAN`" ] || \
			continue # don't erase dir if at least one file in it newer than treshold
		[ $? -eq 0 ] || continue # don't erase if can't check
		for file in `find "$dir" -type f`; do
			pids=`file_get_attached_pids $file [a-z]`
			if [ -n "${pids}" ]; then
				log WARN "Skipping cache '${dir}' (at least '${file}' still opened by ${pids})"
				continue 2
			fi
		done
		log DEBUG "Recursively removing abandoned cache ${dir}"
		rm -rf "$dir" && break # erase only one dir per run (check is slow, dont stall for long time)
	done
fi

if [ -n "${ERASE_ABAND_LOGS_BACKUPS_OLDER_THAN}" -a -d "$LOGBACKUPDIR" ]; then
	log INFO "Looking for abandoned (unchanged more than ${ERASE_ABAND_LOGS_BACKUPS_OLDER_THAN} days) logs backups (SEPE-6758, SEPE-6762)"
	erase_unchanged_older_than "$LOGBACKUPDIR" "${ERASE_ABAND_LOGS_BACKUPS_OLDER_THAN}"
fi

log INFO "All done. Exit 0"
exit 0
