#!/bin/sh
#
# $Id$
# $HeadURL$
#
# Script for cleaning garbage in tmp dirs.
#
# Script has 1 stages:
# 1) script rm's files older than 10 days
# 2) If that's not enough --- rm's files older than 3 days
# 3) If even that's not enough --- script will find top of most
#    heavy files and delete them one by one until top ends or
#    required free space is reached.

set -e

PATH=/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/sbin:/usr/local/bin
export PATH

#-- Subroutines --------------------------------------------------------

usage()
{
  local _thiscmd

  _thiscmd=$(basename $0)

  echo 1>&2 "Usage: ${_thiscmd} [ -a time ] [ -b time ] [-d dir] [-f threshold] [ -n count ] [-q] [-h]"
  echo 1>&2 "Options:"
  echo 1>&1 "  -a time        First pass delete time threshold (default: ${default_first_pass_threshold})"
  echo 1>&1 "  -b time        Second pass delete time threshold (default: ${default_second_pass_threshold})"
  echo 1>&2 "  -d dir         Directory to clean (default: ${default_dir})"
  echo 1>&2 "  -f threshold   Free space threshold, after which more intensive cleanup will occur. Units -- Gbytes. (default: ${default_space_threshold})"
  echo 1>&2 "  -n count       Delete at most <count> files in last stage. (default: ${default_delete_count})"
  echo 1>&2 "  -p threshold   Preserve files if target dir is less than treshold Gbytes. (default: ${default_dont_clean_threshold})"
  echo 1>&2 "  -q             Suppress verbose output"
  echo 1>&2 "  -h             This help message"

  exit 1
}

checkyesno()
{
  local _value

  eval _value=\$$1

  case $_value in
    [Yy][Ee][Ss]|[Tt][Rr][Uu][Ee]|[Oo][Nn]|1) return 0 ;;
    [Nn][Oo]|[Ff][Aa][Ll][Ss][Ee]|[Oo][Ff][Ff]|0) return 1 ;;
    *) return 1 ;;
  esac
}

err()
{
  local _exitval

  _exitval=$1
  shift

  echo 1>&2 "$0: ERROR: $@"
  exit $_exitval
}

quiet_err()
{
  if checkyesno quiet; then
    exit 1
  else
    err $@
  fi
}

verbose()
{
  if ! checkyesno quiet; then
    echo 1>&2 "    $@"
  fi
}

get_options()
{
  local _opt

  while getopts "d:f:n:hqa:b:p:" _opt; do
    case "$_opt" in
      d) dir="${OPTARG}" ;;
      f) space_threshold="${OPTARG}" ;;
      n) delete_count="${OPTARG}" ;;
      h) usage ;;
      q) quiet="YES" ;;
      a) first_pass_threshold="${OPTARG}" ;;
      b) second_pass_threshold="${OPTARG}" ;;
      p) dont_clean_threshold="${OPTARG}" ;;
      *) usage ;;
    esac
  done

  shift $(($OPTIND - 1))

  if [ $# -ne 0 ]; then
    usage
  fi
}

check_options()
{
  : ${dir:=${default_dir}}
  : ${space_threshold:=${default_space_threshold}}
  : ${delete_count:=${default_delete_count}}
  : ${quiet:=${default_quiet}}
  : ${first_pass_threshold:=${default_first_pass_threshold}}
  : ${second_pass_threshold:=${default_second_pass_threshold}}
  : ${dont_clean_threshold:=${default_dont_clean_threshold}}

  # fix symlink troubles in find
  if [ -L "${dir}" ]; then
    dir="${dir}/"
  fi

  if [ ! -d "${dir}" ]; then
    quiet_err 1 "Directory [ ${dir} ] does not exist !"
  fi

  if ! echo "${space_threshold}" | grep -qE '^[0-9]+$'; then
    usage
  fi
}


profound_delete() 
{
  local _size _item
  read _size _item
  if [ -z "${_item}" ]; then
    quiet_err "profound_delete: got empty _item"
    return 0
  fi

  if [ -f "${_item}" ]; then
    lsof -- "${_item}" 2>/dev/null | sed 1d | awk '{ print $2; }' | uniq | xargs -r -n 1 kill -9 2>/dev/null
  else
    # $_item was either deleted or is a directory. It's a bad idea to delete directories here,
    # as it may fail `find` scannind directories. Anyway, `find` above was supposed to be run with '-type f' option.
    return 0
  fi
  verbose "profound_delete: deleting ${_item}"
  rm -f "${_item}"
}

pipe_head()
{
  local _size _item _line_number _pipe_cmd _head_lines
  _line_number=0
  _pipe_cmd="$1"
  shift
  _head_lines="$1"
  shift

  while read _size _item; do
    _line_number=$(($_line_number+1))
    if [ $_line_number -le $_head_lines ]; then
      echo "${_size} ${_item}" | $_pipe_cmd
    else
      echo "${_size} ${_item}"
    fi
  done
}

time_cleanup()
{
  local _age _dir
  _dir="$1"
  shift
  _age="$1"
  shift
  #we are at $dir now
  find "${_dir}" -depth -mindepth 1 -ctime +$_age -type f -exec stat "${stat_args}" {} \; 2>/dev/null | sort -n | pipe_head profound_delete 50 | xargs -n 10 rm -rf 2>/dev/null
}

remove_old_directories()
{
  verbose "remove_old_directories: start"

  local _dir _age
  _dir="$1"
  shift
  _age="$1"
  shift

  find "$_dir"  -depth -mindepth 1 -ctime +$_age -type d -empty -delete 2>/dev/null

  verbose "remove_old_directories: end"
}

remove_old_symlinks()
{
  verbose "remove_old_symlinks: start"

  local _dir _age
  _dir="$1"
  shift
  _age="$1"
  shift

  find "$_dir"  -depth -mindepth 1 -ctime +$_age -type l -empty -delete 2>/dev/null

  verbose "remove_old_symlinks: end"
}

need_cleanup()
{
  local _dir _space_threshold _dont_clean_threshold _vartmp_size
  _dir="$1"
  shift
  _space_threshold="$1"
  shift
  _dont_clean_threshold=$1
  shift

  #  symbolic link $_dir does not break this
  if [ $( df $_dir | awk 'NR == 2 { print int($4/1024/1014); }'; ) -lt $_space_threshold ]; then
    _vartmp_size=$( du -B1 -sx "${_dir}" 2>/dev/null | awk '{ print int($1/1024/1024); }' || echo 10000 ) 
    if [ ${_vartmp_size} -gt ${_dont_clean_threshold} ]; then
      return 0
    else
      verbose "target dir: \"${_dir}\" uses ${_vartmp_size} which is less then don't delete threshold ${_dont_clean_threshold}"
      return 1
    fi
  else
    return 1
  fi
}

wary_cleanup()
{
  local _dir _space_threshhold _delete_count _line _size _dont_clean_threshold
  _dir="$1"
  shift
  _space_threshhold="$1"
  shift
  _delete_count="$1"
  shift
  _dont_clean_threshold=$1
  shift

  if [ ${_delete_count} -le 0 ]; then
    return
  fi

  find "$_dir" -type f -exec stat "${stat_args}" {} \; 2>/dev/null | sort -n | head -n $_delete_count | \
  while read _size _line; do
    if ! need_cleanup "$_dir" "$_space_threshhold" "$_dont_clean_threshold"; then
      break
    fi
    echo "$_size $_line" | profound_delete
  done
}

clean_garbage()
{
  local _dir _space_threshhold _delete_count
  _dir="$1"
  shift
  _space_threshhold="$1"
  shift
  _delete_count="$1"
  shift

  remove_old_directories "$_dir" "${first_pass_threshold}"
  remove_old_symlinks "$_dir" "${first_pass_threshold}"
  time_cleanup "$_dir" "${first_pass_threshold}"

  if need_cleanup "$_dir" "$_space_threshhold" "$dont_clean_threshold"; then
    verbose "clean_garbage: deleting +${second_pass_threshold} files"
    time_cleanup "$_dir" "${second_pass_threshold}"
  fi

  if need_cleanup "$_dir" "$_space_threshhold" "$dont_clean_threshold"; then
    verbose "clean_garbage: using wary cleanup"
    wary_cleanup "$dir" "$_space_threshhold" "$delete_count" "$dont_clean_threshold"
  fi
}

#-- Platform-dependend variables ---------------------------------------

ya_os="$( uname -s | tr '[:upper:]' '[:lower:]' )"
case "$ya_os" in
  freebsd)
    stat_args="-f%m %N"
  ;;
  linux)
    stat_args="-c%Y %n"
  ;;
  *)
    err 1 "Can't determinate OS type. Exiting."
  ;;
esac

#-- Variables ----------------------------------------------------------

default_dir="/var/tmp/"
default_space_threshold=10
default_delete_count=50
default_quiet="NO"
default_first_pass_threshold="10"
default_second_pass_threshold="3"
default_dont_clean_threshold="0"

#-- Main ---------------------------------------------------------------

get_options $@
check_options

verbose "Started cleanup"
clean_garbage "$dir" "$space_threshold" "$delete_count"
verbose "Finished cleanup"
