Add 'k3os/restore_k3os.sh'
parent
83623dc317
commit
343702ab49
@ -0,0 +1,216 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# K3s Snapshot Restore Script (with Verification)
|
||||||
|
# Purpose: Download remote backup, extract, inspect timestamp, and apply restore.
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
|
# Configuration Variables
|
||||||
|
LOCAL_DB_DIR="/var/lib/rancher/k3s/server/db"
|
||||||
|
SNAPSHOT_SUBDIR="snapshots"
|
||||||
|
SSH_KEY_PATH="/root/.ssh/backup-k3os.pem"
|
||||||
|
REMOTE_USER="ubuntu"
|
||||||
|
REMOTE_BASE_DIR="/home/ubuntu"
|
||||||
|
SERVICE_NAME="k3s-service"
|
||||||
|
|
||||||
|
# Ensure running as root
|
||||||
|
if [ "$EUID" -ne 0 ]; then
|
||||||
|
echo "Error: This script must be run as root."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Starting Restoration Routine..."
|
||||||
|
echo "------------------------------------------------"
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
# 1. Setup & Connectivity
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Check for SSH Key
|
||||||
|
if [ ! -f "$SSH_KEY_PATH" ]; then
|
||||||
|
echo "Error: SSH key not found at $SSH_KEY_PATH."
|
||||||
|
echo "Please ensure the key exists (or run the backup script to generate it)."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Prompt for Remote IP
|
||||||
|
read -p "Enter the IP address of the backup server: " REMOTE_IP
|
||||||
|
if [ -z "$REMOTE_IP" ]; then
|
||||||
|
echo "Error: IP address required."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "------------------------------------------------"
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
# 2. Remote File Search & Selection
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
echo "Fetching file list from remote server..."
|
||||||
|
|
||||||
|
# Get list of tar.gz files from remote server
|
||||||
|
RAW_FILES=$(ssh -i "$SSH_KEY_PATH" -o StrictHostKeyChecking=no "${REMOTE_USER}@${REMOTE_IP}" "ls ${REMOTE_BASE_DIR}/*.tar.gz 2>/dev/null")
|
||||||
|
|
||||||
|
if [ -z "$RAW_FILES" ]; then
|
||||||
|
echo "Error: No .tar.gz files found in ${REMOTE_BASE_DIR} on remote server."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Selection Loop
|
||||||
|
SELECTED_FILE=""
|
||||||
|
CURRENT_FILTER=""
|
||||||
|
|
||||||
|
while [ -z "$SELECTED_FILE" ]; do
|
||||||
|
echo -e "\nAvailable Backups (Remote):"
|
||||||
|
|
||||||
|
if [ -z "$CURRENT_FILTER" ]; then
|
||||||
|
MATCHING_FILES="$RAW_FILES"
|
||||||
|
else
|
||||||
|
MATCHING_FILES=$(echo "$RAW_FILES" | grep "$CURRENT_FILTER")
|
||||||
|
fi
|
||||||
|
|
||||||
|
mapfile -t FILE_ARRAY <<< "$MATCHING_FILES"
|
||||||
|
|
||||||
|
count=0
|
||||||
|
for f in "${FILE_ARRAY[@]}"; do
|
||||||
|
if [ -n "$f" ]; then
|
||||||
|
filename=$(basename "$f")
|
||||||
|
echo "[$count] $filename"
|
||||||
|
((count++))
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ "$count" -eq 0 ]; then
|
||||||
|
echo "No files match your search."
|
||||||
|
CURRENT_FILTER=""
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "------------------------------------------------"
|
||||||
|
echo "Instructions:"
|
||||||
|
echo " - Enter a NUMBER to select a file."
|
||||||
|
echo " - Enter TEXT to filter/search the list."
|
||||||
|
echo " - Enter 'ALL' to clear filters."
|
||||||
|
read -p "Selection: " USER_INPUT
|
||||||
|
|
||||||
|
if [[ "$USER_INPUT" =~ ^[0-9]+$ ]]; then
|
||||||
|
if [ "$USER_INPUT" -ge 0 ] && [ "$USER_INPUT" -lt "$count" ]; then
|
||||||
|
SELECTED_FULL_PATH="${FILE_ARRAY[$USER_INPUT]}"
|
||||||
|
SELECTED_FILE=$(basename "$SELECTED_FULL_PATH")
|
||||||
|
else
|
||||||
|
echo "Invalid selection number."
|
||||||
|
fi
|
||||||
|
elif [ "$USER_INPUT" == "ALL" ]; then
|
||||||
|
CURRENT_FILTER=""
|
||||||
|
else
|
||||||
|
CURRENT_FILTER="$USER_INPUT"
|
||||||
|
echo "Filtering for '$CURRENT_FILTER'..."
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Selected Backup: $SELECTED_FILE"
|
||||||
|
echo "------------------------------------------------"
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
# 3. Download and Extract
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Ensure local directory exists
|
||||||
|
mkdir -p "$LOCAL_DB_DIR"
|
||||||
|
|
||||||
|
LOCAL_TAR_PATH="${LOCAL_DB_DIR}/${SELECTED_FILE}"
|
||||||
|
|
||||||
|
echo "Downloading $SELECTED_FILE to $LOCAL_DB_DIR..."
|
||||||
|
scp -i "$SSH_KEY_PATH" "${REMOTE_USER}@${REMOTE_IP}:${SELECTED_FULL_PATH}" "$LOCAL_TAR_PATH"
|
||||||
|
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
echo "Error: Download failed."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Extracting archive..."
|
||||||
|
tar -xzf "$LOCAL_TAR_PATH" -C "$LOCAL_DB_DIR"
|
||||||
|
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
echo "Error: Extraction failed."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Extraction complete."
|
||||||
|
echo "------------------------------------------------"
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
# 4. Identify Latest Snapshot & Verify
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
SNAPSHOT_DIR_FULL="${LOCAL_DB_DIR}/${SNAPSHOT_SUBDIR}"
|
||||||
|
|
||||||
|
if [ ! -d "$SNAPSHOT_DIR_FULL" ]; then
|
||||||
|
echo "Error: Expected directory $SNAPSHOT_DIR_FULL not found after extraction."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Find the newest file in the snapshots folder
|
||||||
|
RESTORE_FILE_NAME=$(ls -t "$SNAPSHOT_DIR_FULL" | head -n 1)
|
||||||
|
RESTORE_PATH="${SNAPSHOT_DIR_FULL}/${RESTORE_FILE_NAME}"
|
||||||
|
|
||||||
|
if [ -z "$RESTORE_FILE_NAME" ]; then
|
||||||
|
echo "Error: No snapshot files found inside $SNAPSHOT_DIR_FULL."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Get Timestamp of the extracted snapshot file
|
||||||
|
SNAPSHOT_DATE=$(date -r "$RESTORE_PATH" "+%Y-%m-%d %H:%M:%S")
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
# 5. Pre-Restore Confirmation Prompt
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
echo "CRITICAL CONFIRMATION REQUIRED"
|
||||||
|
echo "------------------------------------------------"
|
||||||
|
echo "You are about to reset the Kubernetes cluster using the following snapshot:"
|
||||||
|
echo ""
|
||||||
|
echo " File Name: $RESTORE_FILE_NAME"
|
||||||
|
echo " Snapshot Date: $SNAPSHOT_DATE"
|
||||||
|
echo " Full Path: $RESTORE_PATH"
|
||||||
|
echo ""
|
||||||
|
echo "Warning: This will STOP '$SERVICE_NAME' and reset the cluster database."
|
||||||
|
echo "------------------------------------------------"
|
||||||
|
|
||||||
|
read -p "Do you want to perform this restore now? (y/N): " CONFIRM
|
||||||
|
|
||||||
|
# Check user input (only 'y' or 'Y' allows progress)
|
||||||
|
if [[ ! "$CONFIRM" =~ ^[yY]$ ]]; then
|
||||||
|
echo "Restore cancelled by user."
|
||||||
|
echo "The downloaded files remain at: $LOCAL_DB_DIR"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Confirmed. Proceeding with restore..."
|
||||||
|
echo "------------------------------------------------"
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
# 6. Perform Restore
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
echo "Stopping Service: $SERVICE_NAME..."
|
||||||
|
service "$SERVICE_NAME" stop
|
||||||
|
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
echo "Warning: Failed to stop service. Attempting to proceed, but this may fail."
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Running Cluster Reset..."
|
||||||
|
k3s server --cluster-reset --cluster-reset-restore-path="$RESTORE_PATH"
|
||||||
|
|
||||||
|
RESTORE_STATUS=$?
|
||||||
|
|
||||||
|
if [ $RESTORE_STATUS -eq 0 ]; then
|
||||||
|
echo "Cluster reset successful."
|
||||||
|
|
||||||
|
echo "Starting Service: $SERVICE_NAME..."
|
||||||
|
service "$SERVICE_NAME" start
|
||||||
|
|
||||||
|
echo "------------------------------------------------"
|
||||||
|
echo "RESTORE COMPLETE. Please verify cluster health with 'kubectl get nodes'."
|
||||||
|
else
|
||||||
|
echo "CRITICAL ERROR: Cluster reset failed."
|
||||||
|
echo "Service was NOT restarted to preserve logs/state for debugging."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
Loading…
Reference in New Issue