#!/bin/bash # ============================================================================== # K3s Snapshot Restore Script (with Verification) # Purpose: Download remote backup, extract, inspect timestamp, and apply restore. # ============================================================================== # Configuration Variables LOCAL_DB_DIR="/var/lib/rancher/k3s/server/db" SNAPSHOT_SUBDIR="snapshots" SSH_KEY_PATH="/root/.ssh/backup-k3os.pem" REMOTE_USER="ubuntu" REMOTE_BASE_DIR="/home/ubuntu" SERVICE_NAME="k3s-service" # Ensure running as root if [ "$EUID" -ne 0 ]; then echo "Error: This script must be run as root." exit 1 fi echo "Starting Restoration Routine..." echo "------------------------------------------------" # ------------------------------------------------------------------------------ # 1. Setup & Connectivity # ------------------------------------------------------------------------------ # Check for SSH Key if [ ! -f "$SSH_KEY_PATH" ]; then echo "Error: SSH key not found at $SSH_KEY_PATH." echo "Please ensure the key exists (or run the backup script to generate it)." exit 1 fi # Prompt for Remote IP read -p "Enter the IP address of the backup server: " REMOTE_IP if [ -z "$REMOTE_IP" ]; then echo "Error: IP address required." exit 1 fi echo "------------------------------------------------" # ------------------------------------------------------------------------------ # 2. Remote File Search & Selection # ------------------------------------------------------------------------------ echo "Fetching file list from remote server..." # Get list of tar.gz files from remote server RAW_FILES=$(ssh -i "$SSH_KEY_PATH" -o StrictHostKeyChecking=no "${REMOTE_USER}@${REMOTE_IP}" "ls ${REMOTE_BASE_DIR}/*.tar.gz 2>/dev/null") if [ -z "$RAW_FILES" ]; then echo "Error: No .tar.gz files found in ${REMOTE_BASE_DIR} on remote server." exit 1 fi # Selection Loop SELECTED_FILE="" CURRENT_FILTER="" while [ -z "$SELECTED_FILE" ]; do echo -e "\nAvailable Backups (Remote):" if [ -z "$CURRENT_FILTER" ]; then MATCHING_FILES="$RAW_FILES" else MATCHING_FILES=$(echo "$RAW_FILES" | grep "$CURRENT_FILTER") fi mapfile -t FILE_ARRAY <<< "$MATCHING_FILES" count=0 for f in "${FILE_ARRAY[@]}"; do if [ -n "$f" ]; then filename=$(basename "$f") echo "[$count] $filename" ((count++)) fi done if [ "$count" -eq 0 ]; then echo "No files match your search." CURRENT_FILTER="" continue fi echo "------------------------------------------------" echo "Instructions:" echo " - Enter a NUMBER to select a file." echo " - Enter TEXT to filter/search the list." echo " - Enter 'ALL' to clear filters." read -p "Selection: " USER_INPUT if [[ "$USER_INPUT" =~ ^[0-9]+$ ]]; then if [ "$USER_INPUT" -ge 0 ] && [ "$USER_INPUT" -lt "$count" ]; then SELECTED_FULL_PATH="${FILE_ARRAY[$USER_INPUT]}" SELECTED_FILE=$(basename "$SELECTED_FULL_PATH") else echo "Invalid selection number." fi elif [ "$USER_INPUT" == "ALL" ]; then CURRENT_FILTER="" else CURRENT_FILTER="$USER_INPUT" echo "Filtering for '$CURRENT_FILTER'..." fi done echo "Selected Backup: $SELECTED_FILE" echo "------------------------------------------------" # ------------------------------------------------------------------------------ # 3. Download and Extract # ------------------------------------------------------------------------------ # Ensure local directory exists mkdir -p "$LOCAL_DB_DIR" LOCAL_TAR_PATH="${LOCAL_DB_DIR}/${SELECTED_FILE}" echo "Downloading $SELECTED_FILE to $LOCAL_DB_DIR..." scp -i "$SSH_KEY_PATH" "${REMOTE_USER}@${REMOTE_IP}:${SELECTED_FULL_PATH}" "$LOCAL_TAR_PATH" if [ $? -ne 0 ]; then echo "Error: Download failed." exit 1 fi echo "Extracting archive..." tar -xzf "$LOCAL_TAR_PATH" -C "$LOCAL_DB_DIR" if [ $? -ne 0 ]; then echo "Error: Extraction failed." exit 1 fi echo "Extraction complete." echo "------------------------------------------------" # ------------------------------------------------------------------------------ # 4. Identify Latest Snapshot & Verify # ------------------------------------------------------------------------------ SNAPSHOT_DIR_FULL="${LOCAL_DB_DIR}/${SNAPSHOT_SUBDIR}" if [ ! -d "$SNAPSHOT_DIR_FULL" ]; then echo "Error: Expected directory $SNAPSHOT_DIR_FULL not found after extraction." exit 1 fi # Find the newest file in the snapshots folder RESTORE_FILE_NAME=$(ls -t "$SNAPSHOT_DIR_FULL" | head -n 1) RESTORE_PATH="${SNAPSHOT_DIR_FULL}/${RESTORE_FILE_NAME}" if [ -z "$RESTORE_FILE_NAME" ]; then echo "Error: No snapshot files found inside $SNAPSHOT_DIR_FULL." exit 1 fi # Get Timestamp of the extracted snapshot file SNAPSHOT_DATE=$(date -r "$RESTORE_PATH" "+%Y-%m-%d %H:%M:%S") # ------------------------------------------------------------------------------ # 5. Pre-Restore Confirmation Prompt # ------------------------------------------------------------------------------ echo "CRITICAL CONFIRMATION REQUIRED" echo "------------------------------------------------" echo "You are about to reset the Kubernetes cluster using the following snapshot:" echo "" echo " File Name: $RESTORE_FILE_NAME" echo " Snapshot Date: $SNAPSHOT_DATE" echo " Full Path: $RESTORE_PATH" echo "" echo "Warning: This will STOP '$SERVICE_NAME' and reset the cluster database." echo "------------------------------------------------" read -p "Do you want to perform this restore now? (y/N): " CONFIRM # Check user input (only 'y' or 'Y' allows progress) if [[ ! "$CONFIRM" =~ ^[yY]$ ]]; then echo "Restore cancelled by user." echo "The downloaded files remain at: $LOCAL_DB_DIR" exit 0 fi echo "Confirmed. Proceeding with restore..." echo "------------------------------------------------" # ------------------------------------------------------------------------------ # 6. Perform Restore # ------------------------------------------------------------------------------ echo "Stopping Service: $SERVICE_NAME..." service "$SERVICE_NAME" stop if [ $? -ne 0 ]; then echo "Warning: Failed to stop service. Attempting to proceed, but this may fail." fi echo "Running Cluster Reset..." k3s server --cluster-reset --cluster-reset-restore-path="$RESTORE_PATH" RESTORE_STATUS=$? if [ $RESTORE_STATUS -eq 0 ]; then echo "Cluster reset successful." echo "Starting Service: $SERVICE_NAME..." service "$SERVICE_NAME" start echo "------------------------------------------------" echo "RESTORE COMPLETE. Please verify cluster health with 'kubectl get nodes'." else echo "CRITICAL ERROR: Cluster reset failed." echo "Service was NOT restarted to preserve logs/state for debugging." exit 1 fi