#!/usr/bin/env bash
#/ Usage: ghe-restore-storage <host>
#/
#/ Restore storage objects from an rsync snapshot.
#/
#/ Note: This script typically isn't called directly. It's invoked by the
#/ ghe-restore command.
set -e

# Bring in the backup configuration
# shellcheck source=share/github-backup-utils/ghe-backup-config
. "$( dirname "${BASH_SOURCE[0]}" )/ghe-backup-config"

# Check to make sure moreutils parallel is installed and working properly
ghe_parallel_check

# Show usage and bail with no arguments
[ -z "$*" ] && print_usage

bm_start "$(basename $0)"

# Grab host arg
GHE_HOSTNAME="$1"

# The snapshot to restore should be set by the ghe-restore command but this lets
# us run this script directly.
: ${GHE_RESTORE_SNAPSHOT:=current}

# Find the objects to restore
storage_paths=$(cd $GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/ && find storage -mindepth 4 -maxdepth 4 -type f -exec wc -c {} \;)

# No need to restore anything, early exit
if [ -z "$storage_paths" ]; then
  echo "Warning: Storage backup missing. Skipping ..."
  exit 0
fi

# Perform a host-check and establish GHE_REMOTE_XXX variables.
ghe_remote_version_required "$GHE_HOSTNAME"

# Split host:port into parts
port=$(ssh_port_part "$GHE_HOSTNAME")
host=$(ssh_host_part "$GHE_HOSTNAME")

# Add user / -l option
user="${host%@*}"
[ "$user" = "$host" ] && user="admin"

hostnames=$host
tempdir=$(mktemp -d -t backup-utils-restore-XXXXXX)
remote_tempdir=$(ghe-ssh "$GHE_HOSTNAME" -- mktemp -d -t backup-utils-restore-XXXXXX)
ssh_config_file_opt=
opts="$GHE_EXTRA_SSH_OPTS"
tmp_list=$tempdir/tmp_list
remote_tmp_list=$remote_tempdir/remote_tmp_list
routes_list=$tempdir/routes_list
remote_routes_list=$remote_tempdir/remote_routes_list

if $CLUSTER; then
  ssh_config_file="$tempdir/ssh_config"
  ssh_config_file_opt="-F $tempdir/ssh_config"
  opts="$opts -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o PasswordAuthentication=no"
  hostnames=$(ghe-cluster-nodes "$GHE_HOSTNAME" "storage-server")
  ghe-ssh-config "$GHE_HOSTNAME" "$hostnames" > "$ssh_config_file"
fi

cleanup() {
  rm -rf $tempdir
  ghe-ssh "$GHE_HOSTNAME" -- rm -rf $remote_tempdir
  true
}

trap 'cleanup' EXIT

# Find the routes (servers) for each storage object available locally
# Sends a list of "<oid> <size>" tuples with the following format:
#
# # OID bytes
# b8a48b6b122b4ef8175348d1d6fbd846d3b3ccc8fd7552b79f91125c4958e43b 5592001
# b851fd1f147c644a9de778f19090ea785b415c69e2a2fba35a65144fa2753ab9 7340032
# b65f657194ca6202c17b5062e4afc11843fc892a3f2febef8ac10971db7689a8 5591634
# b63c30f6f885e59282c2aa22cfca846516b5e72621c10a58140fb04d133e2c17 5592492
# ...
bm_start "$(basename $0) - Building object list"
echo "$storage_paths" | awk '{print $2 " " $1}' | awk -F/ '{print $NF }' > $tmp_list
bm_end "$(basename $0) - Building object list"

# The server returns the list of servers where the objects will be sent:
#
# # OID SERVER1 SERVER2 SERVER2
# b8a48b6b122b4ef8175348d1d6fbd846d3b3ccc8fd7552b79f91125c4958e43b server1 server2 server3
# bc4cdd292e6b5387df2a42a907fcd5f3b6804a5d5ab427184faea5ef118d635e server1 server2 server3
# ...
#
# One route per line.
#
# NOTE: The route generation is performed on the appliance as it is considerably
# more performant than performing over an SSH pipe.
#
bm_start "$(basename $0) - Transferring object list"
cat $tmp_list | ghe-ssh "$GHE_HOSTNAME" -- sponge $remote_tmp_list
cat $tmp_list | ghe_debug
bm_end "$(basename $0) - Transferring object list"

bm_start "$(basename $0) - Generating routes"
echo "cat $remote_tmp_list | github-env ./bin/storage-cluster-restore-routes > $remote_routes_list" | ghe-ssh "$GHE_HOSTNAME" /bin/bash
ghe-ssh "$GHE_HOSTNAME" -- cat $remote_routes_list | ghe_debug
bm_end "$(basename $0) - Generating routes"

bm_start "$(basename $0) - Fetching routes"
ghe-ssh "$GHE_HOSTNAME" -- gzip -c $remote_routes_list | gzip -d > $routes_list
cat $routes_list | ghe_debug
bm_end "$(basename $0) - Fetching routes"

bm_start "$(basename $0) - Processing routes"
cat $routes_list | awk -v tempdir="$tempdir" '{ for(i=2;i<=NF;i++){ print substr($1,1,1) "/" substr($1,1,2) "/" substr($1,3,2) "/" $1 > (tempdir"/"$i".rsync") }}'
ghe_debug "\n$(find "$tempdir" -maxdepth 1 -name '*.rsync')"
bm_end "$(basename $0) - Processing routes"

if [ -z "$(find "$tempdir" -maxdepth 1 -name '*.rsync')" ]; then
  echo "Warning: no routes found, skipping storage restore ..."
  exit 0
fi

# rsync all the objects to the storage server where they belong.
# One rsync invocation per server available.
bm_start "$(basename $0) - Restoring objects"
for file_list in $tempdir/*.rsync; do
  if $CLUSTER; then
    server=$(basename $file_list .rsync)
  else
    server=$host
  fi

  storage_user=$(ghe-ssh $ssh_config_file_opt $server:$port -- stat -c %U /data/user/storage || echo git)

  rsync_commands+=("
  if [ -n \"$GHE_VERBOSE\" ]; then
    echo \"* Transferring data to $server ...\" 1>&3
  fi

  ghe-rsync -arvHR --delete \
    -e \"ssh -q $opts -p $port $ssh_config_file_opt -l $user\" \
    --rsync-path=\"sudo -u $storage_user rsync\" \
    --files-from=$file_list \
    --size-only \
    \"$GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/storage/./\" \
    \"$server:$GHE_REMOTE_DATA_USER_DIR/storage/\" 1>&3
    ")
done

if [ "$GHE_PARALLEL_ENABLED" = "yes" ]; then
  $GHE_PARALLEL_COMMAND $GHE_PARALLEL_RSYNC_COMMAND_OPTIONS -- "${rsync_commands[@]}"
else
  for c in "${rsync_commands[@]}"; do
    eval "$c"
  done
fi

bm_end "$(basename $0) - Restoring objects"

if $CLUSTER; then
  bm_start "$(basename $0) - Finalizing routes"
  ghe_verbose "Finalizing routes"
  ghe-ssh "$GHE_HOSTNAME" -- /bin/bash >&3 <<EOF
    split -l 1000 $remote_routes_list $remote_tempdir/chunk
    chunks=\$(find $remote_tempdir/ -name chunk\*)
    parallel -i /bin/sh -c "cat {} | github-env ./bin/storage-cluster-restore-finalize" -- \$chunks
EOF
  bm_end "$(basename $0) - Finalizing routes"
fi

bm_end "$(basename $0)"
