#!/usr/bin/env bash
# Usage: . ghe-backup-config
# GitHub Enterprise backup shell configuration.
#
# This file is sourced by the various utilities under bin and share/github-backup-utils to
# load in backup configuration and ensure things are configured properly.
#
# All commands in share/github-backup-utils/ should start with the following:
#
#     . $( dirname "${BASH_SOURCE[0]}" )/ghe-backup-config
#
# And all commands in bin/ should start with the following:
#
#     . $( dirname "${BASH_SOURCE[0]}" )/../share/github-backup-utils/ghe-backup-config
#
set +o posix
# Assume this script lives in share/github-backup-utils/ when setting the root
GHE_BACKUP_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"

# Get the version from the version file.
BACKUP_UTILS_VERSION="$(cat "$GHE_BACKUP_ROOT/share/github-backup-utils/version")"

# If a version check was requested, show the current version and exit
if [ -n "$GHE_SHOW_VERSION" ]; then
  echo "GitHub backup-utils v$BACKUP_UTILS_VERSION"
  exit 0
fi

# Check for "--help|-h" in args or GHE_SHOW_HELP=true and show usage
# shellcheck disable=SC2120 # the script name is always referenced
print_usage() {
  grep '^#/' <"$0" | cut -c 4-
  exit "${1:-1}"
}

if [ -n "$GHE_SHOW_HELP" ]; then
  print_usage
else
  for a in "$@"; do
    if [ "$a" = "--help" ] || [ "$a" = "-h" ]; then
      print_usage
    fi
  done
fi

# Add the bin and share/github-backup-utils dirs to PATH
PATH="$GHE_BACKUP_ROOT/bin:$GHE_BACKUP_ROOT/share/github-backup-utils:$PATH"
# shellcheck source=share/github-backup-utils/bm.sh
. "$GHE_BACKUP_ROOT/share/github-backup-utils/bm.sh"

# Save off GHE_HOSTNAME from the environment since we want it to override the
# backup.config value when set.
GHE_HOSTNAME_PRESERVE="$GHE_HOSTNAME"

# Source in the backup config file from the copy specified in the environment
# first and then fall back to the backup-utils root, home directory and system.
config_found=false
for f in "$GHE_BACKUP_CONFIG" "$GHE_BACKUP_ROOT/backup.config" \
  "$HOME/.github-backup-utils/backup.config" "/etc/github-backup-utils/backup.config"; do
  if [ -f "$f" ]; then
    GHE_BACKUP_CONFIG="$f"
    # shellcheck disable=SC1090 # This is a user-supplied value that can't be predicted
    . "$GHE_BACKUP_CONFIG"
    config_found=true
    break
  fi
done

ghe_parallel_check() {
  if [ "$GHE_PARALLEL_ENABLED" != "yes" ]; then
    return 0
  fi

  # Some machines may have both moreutils parallel and GNU parallel installed.
  # Check some variants to find it
  GHE_PARALLEL_COMMAND="parallel"
  local x
  for x in \
      /usr/bin/parallel.moreutils \
      /usr/bin/parallel_moreutils \
      /usr/bin/moreutils.parallel \
      /usr/bin/moreutils_parallel \
      ; do
        if [ -x "${x}" ]; then
            GHE_PARALLEL_COMMAND="${x}"
            break
        fi
  done

  # Check that the GHE_PARALLEL_COMMAND is pointing to moreutils parallel
  if ! $GHE_PARALLEL_COMMAND -h | grep -q "parallel \[OPTIONS\] command -- arguments"; then
    echo "Error: moreutils not found. Please install https://joeyh.name/code/moreutils" 1>&2
    exit 1
  fi

  if [ -n "$GHE_PARALLEL_MAX_JOBS" ]; then
    GHE_PARALLEL_COMMAND_OPTIONS="-j $GHE_PARALLEL_MAX_JOBS"
    # Default to the number of max rsync jobs to the same as GHE_PARALLEL_MAX_JOBS, if not set.
    # This is only applicable to ghe-restore-repositories currently.
    : "${GHE_PARALLEL_RSYNC_MAX_JOBS:="$GHE_PARALLEL_MAX_JOBS"}"
  fi

  if [ -n "$GHE_PARALLEL_RSYNC_MAX_JOBS" ]; then
    GHE_PARALLEL_RSYNC_COMMAND_OPTIONS="-j $GHE_PARALLEL_RSYNC_MAX_JOBS"
  fi

  if [ -n "$GHE_PARALLEL_MAX_LOAD" ]; then
    GHE_PARALLEL_COMMAND_OPTIONS+=" -l $GHE_PARALLEL_MAX_LOAD"
    GHE_PARALLEL_RSYNC_COMMAND_OPTIONS+=" -l $GHE_PARALLEL_MAX_LOAD"
  fi
}

# Check that the config file exists before we source it in.
if ! $config_found; then
  echo "Error: No backup configuration file found. Tried:" 1>&2
  [ -n "$GHE_BACKUP_CONFIG" ] && echo " - $GHE_BACKUP_CONFIG" 1>&2
  echo " - $GHE_BACKUP_ROOT/backup.config" 1>&2
  echo " - $HOME/.github-backup-utils/backup.config" 1>&2
  echo " - /etc/github-backup-utils/backup.config" 1>&2
  exit 2
fi

# If verbose logging is enabled, redirect fd 3 to stdout or the specified log file;
# otherwise, redirect it to /dev/null. Write verbose output to fd 3.
if [ -n "$GHE_VERBOSE" ]; then
  if [ -n "$GHE_VERBOSE_LOG" ]; then
    if [ "$GHE_PARALLEL_ENABLED" != "yes" ]; then
      exec 3>>"$GHE_VERBOSE_LOG"
    else
      if ! echo | awk '{ print strftime("%b %d %H:%M:%S"); fflush(); }' &>/dev/null; then
        echo "Error: awk command failed. Please install https://www.gnu.org/software/gawk" 1>&2
        exit 1
      fi
      calling_script_name="$(caller | sed 's:.*/::')"
      exec 3> >(awk -v c="$calling_script_name" '{ print strftime("%b %d %H:%M:%S"), c":", $0; fflush(); }' >>"$GHE_VERBOSE_LOG")
    fi
  else
    exec 3>&1
  fi
else
  exec 3>/dev/null
fi

# Restore saved off hostname.
[ -n "$GHE_HOSTNAME_PRESERVE" ] && GHE_HOSTNAME="$GHE_HOSTNAME_PRESERVE"

# Check that the GHE hostname is set.
if [ -z "$GHE_HOSTNAME" ]; then
  echo "Error: GHE_HOSTNAME not set in config file." 1>&2
  exit 2
fi

# Check that the GHE data directory is set.
if [ -z "$GHE_DATA_DIR" ]; then
  echo "Error: GHE_DATA_DIR not set in config file." 1>&2
  exit 2
fi

# Convert the data directory path to an absolute path, basing any relative
# paths on the backup-utils root, and using readlink, if available, to
# canonicalize the path.
if [ "${GHE_DATA_DIR:0:1}" != "/" ]; then
  GHE_DATA_DIR="$(cd "$GHE_BACKUP_ROOT" && readlink -m "$GHE_DATA_DIR" 2>/dev/null || echo "$GHE_BACKUP_ROOT/$GHE_DATA_DIR")"
fi
export GHE_DATA_DIR

# Assign the Release File path if it hasn't been provided (eg: by test suite)
: "${GHE_RELEASE_FILE:="/etc/github/enterprise-release"}"

# Check that utils are not being run directly on GHE appliance.
if [ -f "$GHE_RELEASE_FILE" ]; then
  echo "Error: Backup Utils cannot be run on the GitHub Enterprise host." 1>&2
  echo "       The backup utilities should be run on a host dedicated to" 1>&2
  echo "       long-term permanent storage and must have network connectivity" 1>&2
  echo "       with the GitHub Enterprise appliance." 1>&2
  exit 1
fi

GHE_CREATE_DATA_DIR=${GHE_CREATE_DATA_DIR:-yes}

# Check that the data directory is set and create it if it doesn't exist.
if [ ! -d "$GHE_DATA_DIR" ] && [ "$GHE_CREATE_DATA_DIR" = "yes" ]; then
  echo "Creating the backup data directory ..." 1>&3
  mkdir -p "$GHE_DATA_DIR"
fi

if [ ! -d "$GHE_DATA_DIR" ]; then
  echo "Error: GHE_DATA_DIR $GHE_DATA_DIR does not exist." >&2
  exit 8
fi

# Set some defaults if needed.
: ${GHE_NUM_SNAPSHOTS:=10}

# Generate a backup timestamp if one has not already been generated.
# We export the variable so the process group shares the same value.
: ${GHE_SNAPSHOT_TIMESTAMP:=$(date +"%Y%m%dT%H%M%S")}
export GHE_SNAPSHOT_TIMESTAMP

# Set the current snapshot directory to <data-dir>/<timestamp>. This is where
# all backups should be written for the current invocation.
GHE_SNAPSHOT_DIR="$GHE_DATA_DIR"/"$GHE_SNAPSHOT_TIMESTAMP"
export GHE_SNAPSHOT_DIR

# The root filesystem location. This must be used so that tests can override
# the root as a local directory location.
: ${GHE_REMOTE_ROOT_DIR:=""}

# The root location of persistent data and applications on the remote side. This
# is always "/data" for GitHub instances. Use of this variable allows
# the location to be overridden in tests.
: ${GHE_REMOTE_DATA_DIR:="/data"}

# The root location of user data stores such as git repositories, pages sites,
# elasticsearch indices, etc. This is "/data" under 1.x filesystem layouts and
# "/data/user" under the 2.x filesystem layout. The location is adjusted
# dynamically in ghe_remote_version_config() immediately after obtaining the
# remote version. Utilities that transfer data in and out of the appliance
# should use this variable to ensure proper behavior under different versions.
: ${GHE_REMOTE_DATA_USER_DIR:="$GHE_REMOTE_DATA_DIR"}

# The location of the license file on the remote side. This is always
# "/data/enterprise/enterprise.ghl" for GitHub instances. Use of this variable
# allows the location to be overridden in tests.
: ${GHE_REMOTE_LICENSE_FILE:="$GHE_REMOTE_DATA_DIR/enterprise/enterprise.ghl"}

# The number of seconds to wait for in progress git-gc processes to complete
# before starting the sync of git data. See share/github-backup-utils/ghe-backup-repositories-rsync
# for more information. Default: 10 minutes.
: ${GHE_GIT_COOLDOWN_PERIOD:=600}

# Set "true" to get verbose logging of all ssh commands on stderr
: ${GHE_VERBOSE_SSH:=false}

# The location of the cluster configuration file file on the remote side.
# This is always "/data/user/common/cluster.conf" for GitHub Cluster instances.
# Use of this variable allows the location to be overridden in tests.
: ${GHE_REMOTE_CLUSTER_CONF_FILE:="$GHE_REMOTE_DATA_DIR/user/common/cluster.conf"}

# The location of the file used to disable GC operations on the remote side.
: ${SYNC_IN_PROGRESS_FILE:="$GHE_REMOTE_DATA_USER_DIR/repositories/.sync_in_progress"}

# Base path for temporary directories and files.
: ${TMPDIR:="/tmp"}

###############################################################################
### Dynamic remote version config

# Adjusts remote paths based on the version of the remote appliance. This is
# called immediately after the remote version is obtained by
# ghe_remote_version_required(). Child processes inherit the values set here.
ghe_remote_version_config() {
  GHE_REMOTE_DATA_USER_DIR="$GHE_REMOTE_DATA_DIR/user"
  export GHE_REMOTE_DATA_DIR GHE_REMOTE_DATA_USER_DIR
  export GHE_REMOTE_LICENSE_FILE
}

###############################################################################
### Utility functions

# If we don't have a readlink command, parse ls -l output.
if ! type readlink 1>/dev/null 2>&1; then
  readlink() {
    if [ -x "$1" ]; then
      ls -ld "$1" | sed 's/.*-> //'
    else
      return 1
    fi
  }
fi

# Run ghe-host-check and establish the version of the remote GitHub instance in
# the exported GHE_REMOTE_VERSION variable. If the remote version has already
# been established then don't perform the host check again. Utilities in share/github-backup-utils
# that need the remote version should use this function instead of calling
# ghe-host-check directly to reduce ssh roundtrips. The top-level ghe-backup and
# ghe-restore commands establish the version for all subcommands.
ghe_remote_version_required() {
  if [ -z "$GHE_REMOTE_VERSION" ]; then
    _out=$(ghe-host-check "$@")
    echo "$_out"

    # override hostname w/ ghe-host-check output because the port could have
    # been autodetected to 122.
    GHE_HOSTNAME=$(echo "$_out" | sed 's/Connect \(.*:[0-9]*\) OK.*/\1/')
    export GHE_HOSTNAME

    GHE_REMOTE_VERSION=$(echo "$_out" | sed 's/.*(\(.*\))/\1/')
    export GHE_REMOTE_VERSION

    ghe_parse_remote_version "$GHE_REMOTE_VERSION"
    ghe_remote_version_config "$GHE_REMOTE_VERSION"
  fi
  true
}

# Parse a version string into major, minor and patch parts and echo.
ghe_parse_version() {
  local version_major version_minor version_patch
  version_major=$(echo "${1#v}" | cut -f 1 -d .)
  version_minor=$(echo "$1" | cut -f 2 -d .)
  version_patch=$(echo "$1" | cut -f 3 -d .)
  version_patch=${version_patch%%[a-zA-Z]*}

  echo "$version_major $version_minor $version_patch"
}
# Parse major, minor, and patch parts of the remote appliance version and store
# in GHE_VERSION_MAJOR, GHE_VERSION_MINOR, and GHE_VERSION_PATCH variables. All
# parts are numeric. This is called automatically from
# ghe_remote_version_required so shouldn't be used directly.
#
# Scripts use these variables to alter behavior based on what's supported on the
# appliance version.
ghe_parse_remote_version() {
  # shellcheck disable=SC2046 # Word splitting is required to populate the variables
  read -r GHE_VERSION_MAJOR GHE_VERSION_MINOR GHE_VERSION_PATCH <<<$(ghe_parse_version $1)
  export GHE_VERSION_MAJOR GHE_VERSION_MINOR GHE_VERSION_PATCH
}

# Parses the <host> part out of a "<host>:<port>" or just "<host>" string.
# This is used primarily to break hostspecs with non-standard ports down for
# rsync commands.
ssh_host_part() {
  [ "${1##*:}" = "$1" ] && echo "$1" || echo "${1%:*}"
}

# Parses the <port> part out of a "<host>:<port>" or just "<host>" string.
# This is used primarily to break hostspecs with non-standard ports down for
# rsync commands.
ssh_port_part() {
  [ "${1##*:}" = "$1" ] && echo 22 || echo "${1##*:}"
}

# Usage: ghe_remote_logger <message>...
# Log a message to /var/log/syslog on the remote instance.
# Note: Use sparingly. Remote logging requires an ssh connection per invocation.
ghe_remote_logger() {
  echo "$@" |
    ghe-ssh "$GHE_HOSTNAME" -- logger -t backup-utils || true
}

# Usage: ghe_verbose <message>
# Log if verbose mode is enabled (GHE_VERBOSE or `-v`).
ghe_verbose() {
  if [ -n "$GHE_VERBOSE" ]; then
    echo "$@" 1>&3
  fi
}

# Usage: ghe_debug <message> OR echo <message> | ghe_debug
# Log if debug mode is enabled (GHE_DEBUG).
ghe_debug() {
  [ -z "$GHE_DEBUG" ] && return

  if [ $# -ne 0 ]; then
    echo -e "Debug: $*" 1>&3
  elif [ -p /dev/stdin ]; then
    echo "\n" 1>&3
    while read line; do
      echo -e "Debug: $line" 1>&3
    done </dev/stdin
  fi
}

version() {
  echo "${@#v}" | awk -F. '{ printf("%d%03d%03d%03d\n", $1,$2,$3,$4); }'
}

# The list of gists returned by the source changed in 2.16.23, 2.17.14,
# 2.18.8, and 2.19.3.  We need to account for this difference here.
# In older versions, all paths need to be truncated with `dirname`.
# In newer versions, gist paths are unmodified, and only other repo types
# are truncated with `dirname`.
fix_paths_for_ghe_version() {
  if [[ "$GHE_REMOTE_VERSION" =~ 2.16. && "$(version $GHE_REMOTE_VERSION)" -ge "$(version 2.16.23)" ]] ||
    [[ "$GHE_REMOTE_VERSION" =~ 2.17. && "$(version $GHE_REMOTE_VERSION)" -ge "$(version 2.17.14)" ]] ||
    [[ "$GHE_REMOTE_VERSION" =~ 2.18. && "$(version $GHE_REMOTE_VERSION)" -ge "$(version 2.18.8)" ]] ||
    [[ "$(version $GHE_REMOTE_VERSION)" -ge "$(version 2.19.3)" ]]; then
    GIST_FILTER="-e /gist/b"
  else
    unset GIST_FILTER
  fi

  # This sed expression is equivalent to running `dirname` on each line,
  # but without all the fork+exec overhead of calling `dirname` that many
  # times:
  #   1. strip off trailing slashes
  #   2. if the result has no slashes in it, the dirname is "."
  #   3. truncate from the final slash (if any) to the end
  # If the GIST_FILTER was set above (because we're on a modern version of
  # GHES), then don't modify lines with "gist" in them.
  sed $GIST_FILTER -e 's/\/$//; s/^[^\/]*$/./; s/\/[^\/]*$//'
}

is_binary_backup_feature_on() {
  ghe-ssh "$GHE_HOSTNAME" ghe-config --true "mysql.backup.binary"
}

# Check if the backup is binary by looking up the sentinel file
is_binary_backup() {
  test -f "$1/mysql-binary-backup-sentinel"
}

# Check if a given service is managed externally on the appliance or in a snapshot.
# Usage: is_service_external $service [$config_file]
# Pass in the config file to check if the service is managed externally in the snapshot.
is_service_external(){
  service=$1
  config_file=$2
  case $service in
    "mysql")
      if [ -n "$config_file" ]; then
        enabled=$(GIT_CONFIG="$config_file" git config mysql.external.enabled)
        [ "$enabled" == "true" ];
      else
        ghe-ssh "$GHE_HOSTNAME" -- ghe-config --true "mysql.external.enabled"
      fi
      ;;
    *)
      return 1
      ;;
    esac
}

is_instance_configured(){
  ghe-ssh "$GHE_HOSTNAME" -- "[ -f '$GHE_REMOTE_ROOT_DIR/etc/github/configured' ]"
}

# Helper method that returns true if:
# - the target appliance uses the internal MySQL database (aka NOT BYODB), and
# - the snapshot being restored is from an appliance using an external MySQL database (BYODB)
external_database_snapshot_to_internal_database(){
  ! is_external_database_target && is_external_database_snapshot
}

# Helper method that returns true if:
# - the target appliance uses an external MySQL database (BYODB), and
# - the snapshot being restored is from an appliance using an internal MySQL database (aka NOT BYODB)
internal_database_snapshot_to_external_database(){
  is_external_database_target && ! is_external_database_snapshot
}

is_external_database_target_or_snapshot(){
  # If restoring settings, only check if the snapshot being restored was from an appliance with external DB configured.
  if $RESTORE_SETTINGS; then
    is_external_database_snapshot
  else
    # Check if restoring a snapshot with an external database configured, or restoring
    # to an appliance with an external database configured.
    is_external_database_snapshot || is_external_database_target
  fi
}

is_external_database_target(){
  is_service_external "mysql"
}

is_external_database_snapshot(){
  is_service_external "mysql" "$GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/settings.json"
}

# This file exists if this is a backup for an external database AND the backup was
# taken via our logical backup strategy.
is_default_external_database_snapshot(){
  is_external_database_snapshot && test -f "$1/logical-external-database-backup-sentinel"
}

prompt_for_confirmation(){
  echo "$1"
  printf "Type 'yes' to continue: "

  while read -r response; do
    case $response in
      yes|Yes|YES)
        break
        ;;
      '')
        printf "Type 'yes' to continue: "
        ;;
      *)
        echo "Restore aborted." 1>&2
        exit 1
        ;;
    esac
  done

  echo
}

# Function to restore a secret setting stored in a file.
#   restore-secret <description> <file-name> <setting-name>
restore-secret() {
  if [ -f "$GHE_RESTORE_SNAPSHOT_PATH/$2" ]; then
    echo "Restoring $1 ..."
    echo "ghe-config '$3' '$(cat "$GHE_RESTORE_SNAPSHOT_PATH/$2")'" |
    ghe-ssh "$GHE_HOSTNAME" -- /bin/bash
  fi
}
