diff --git a/k3os/restore_k3os.sh b/k3os/restore_k3os.sh new file mode 100644 index 0000000..eeeab70 --- /dev/null +++ b/k3os/restore_k3os.sh @@ -0,0 +1,216 @@ +#!/bin/bash + +# ============================================================================== +# K3s Snapshot Restore Script (with Verification) +# Purpose: Download remote backup, extract, inspect timestamp, and apply restore. +# ============================================================================== + +# Configuration Variables +LOCAL_DB_DIR="/var/lib/rancher/k3s/server/db" +SNAPSHOT_SUBDIR="snapshots" +SSH_KEY_PATH="/root/.ssh/backup-k3os.pem" +REMOTE_USER="ubuntu" +REMOTE_BASE_DIR="/home/ubuntu" +SERVICE_NAME="k3s-service" + +# Ensure running as root +if [ "$EUID" -ne 0 ]; then + echo "Error: This script must be run as root." + exit 1 +fi + +echo "Starting Restoration Routine..." +echo "------------------------------------------------" + +# ------------------------------------------------------------------------------ +# 1. Setup & Connectivity +# ------------------------------------------------------------------------------ + +# Check for SSH Key +if [ ! -f "$SSH_KEY_PATH" ]; then + echo "Error: SSH key not found at $SSH_KEY_PATH." + echo "Please ensure the key exists (or run the backup script to generate it)." + exit 1 +fi + +# Prompt for Remote IP +read -p "Enter the IP address of the backup server: " REMOTE_IP +if [ -z "$REMOTE_IP" ]; then + echo "Error: IP address required." + exit 1 +fi + +echo "------------------------------------------------" + +# ------------------------------------------------------------------------------ +# 2. Remote File Search & Selection +# ------------------------------------------------------------------------------ +echo "Fetching file list from remote server..." + +# Get list of tar.gz files from remote server +RAW_FILES=$(ssh -i "$SSH_KEY_PATH" -o StrictHostKeyChecking=no "${REMOTE_USER}@${REMOTE_IP}" "ls ${REMOTE_BASE_DIR}/*.tar.gz 2>/dev/null") + +if [ -z "$RAW_FILES" ]; then + echo "Error: No .tar.gz files found in ${REMOTE_BASE_DIR} on remote server." + exit 1 +fi + +# Selection Loop +SELECTED_FILE="" +CURRENT_FILTER="" + +while [ -z "$SELECTED_FILE" ]; do + echo -e "\nAvailable Backups (Remote):" + + if [ -z "$CURRENT_FILTER" ]; then + MATCHING_FILES="$RAW_FILES" + else + MATCHING_FILES=$(echo "$RAW_FILES" | grep "$CURRENT_FILTER") + fi + + mapfile -t FILE_ARRAY <<< "$MATCHING_FILES" + + count=0 + for f in "${FILE_ARRAY[@]}"; do + if [ -n "$f" ]; then + filename=$(basename "$f") + echo "[$count] $filename" + ((count++)) + fi + done + + if [ "$count" -eq 0 ]; then + echo "No files match your search." + CURRENT_FILTER="" + continue + fi + + echo "------------------------------------------------" + echo "Instructions:" + echo " - Enter a NUMBER to select a file." + echo " - Enter TEXT to filter/search the list." + echo " - Enter 'ALL' to clear filters." + read -p "Selection: " USER_INPUT + + if [[ "$USER_INPUT" =~ ^[0-9]+$ ]]; then + if [ "$USER_INPUT" -ge 0 ] && [ "$USER_INPUT" -lt "$count" ]; then + SELECTED_FULL_PATH="${FILE_ARRAY[$USER_INPUT]}" + SELECTED_FILE=$(basename "$SELECTED_FULL_PATH") + else + echo "Invalid selection number." + fi + elif [ "$USER_INPUT" == "ALL" ]; then + CURRENT_FILTER="" + else + CURRENT_FILTER="$USER_INPUT" + echo "Filtering for '$CURRENT_FILTER'..." + fi +done + +echo "Selected Backup: $SELECTED_FILE" +echo "------------------------------------------------" + +# ------------------------------------------------------------------------------ +# 3. Download and Extract +# ------------------------------------------------------------------------------ + +# Ensure local directory exists +mkdir -p "$LOCAL_DB_DIR" + +LOCAL_TAR_PATH="${LOCAL_DB_DIR}/${SELECTED_FILE}" + +echo "Downloading $SELECTED_FILE to $LOCAL_DB_DIR..." +scp -i "$SSH_KEY_PATH" "${REMOTE_USER}@${REMOTE_IP}:${SELECTED_FULL_PATH}" "$LOCAL_TAR_PATH" + +if [ $? -ne 0 ]; then + echo "Error: Download failed." + exit 1 +fi + +echo "Extracting archive..." +tar -xzf "$LOCAL_TAR_PATH" -C "$LOCAL_DB_DIR" + +if [ $? -ne 0 ]; then + echo "Error: Extraction failed." + exit 1 +fi + +echo "Extraction complete." +echo "------------------------------------------------" + +# ------------------------------------------------------------------------------ +# 4. Identify Latest Snapshot & Verify +# ------------------------------------------------------------------------------ +SNAPSHOT_DIR_FULL="${LOCAL_DB_DIR}/${SNAPSHOT_SUBDIR}" + +if [ ! -d "$SNAPSHOT_DIR_FULL" ]; then + echo "Error: Expected directory $SNAPSHOT_DIR_FULL not found after extraction." + exit 1 +fi + +# Find the newest file in the snapshots folder +RESTORE_FILE_NAME=$(ls -t "$SNAPSHOT_DIR_FULL" | head -n 1) +RESTORE_PATH="${SNAPSHOT_DIR_FULL}/${RESTORE_FILE_NAME}" + +if [ -z "$RESTORE_FILE_NAME" ]; then + echo "Error: No snapshot files found inside $SNAPSHOT_DIR_FULL." + exit 1 +fi + +# Get Timestamp of the extracted snapshot file +SNAPSHOT_DATE=$(date -r "$RESTORE_PATH" "+%Y-%m-%d %H:%M:%S") + +# ------------------------------------------------------------------------------ +# 5. Pre-Restore Confirmation Prompt +# ------------------------------------------------------------------------------ +echo "CRITICAL CONFIRMATION REQUIRED" +echo "------------------------------------------------" +echo "You are about to reset the Kubernetes cluster using the following snapshot:" +echo "" +echo " File Name: $RESTORE_FILE_NAME" +echo " Snapshot Date: $SNAPSHOT_DATE" +echo " Full Path: $RESTORE_PATH" +echo "" +echo "Warning: This will STOP '$SERVICE_NAME' and reset the cluster database." +echo "------------------------------------------------" + +read -p "Do you want to perform this restore now? (y/N): " CONFIRM + +# Check user input (only 'y' or 'Y' allows progress) +if [[ ! "$CONFIRM" =~ ^[yY]$ ]]; then + echo "Restore cancelled by user." + echo "The downloaded files remain at: $LOCAL_DB_DIR" + exit 0 +fi + +echo "Confirmed. Proceeding with restore..." +echo "------------------------------------------------" + +# ------------------------------------------------------------------------------ +# 6. Perform Restore +# ------------------------------------------------------------------------------ +echo "Stopping Service: $SERVICE_NAME..." +service "$SERVICE_NAME" stop + +if [ $? -ne 0 ]; then + echo "Warning: Failed to stop service. Attempting to proceed, but this may fail." +fi + +echo "Running Cluster Reset..." +k3s server --cluster-reset --cluster-reset-restore-path="$RESTORE_PATH" + +RESTORE_STATUS=$? + +if [ $RESTORE_STATUS -eq 0 ]; then + echo "Cluster reset successful." + + echo "Starting Service: $SERVICE_NAME..." + service "$SERVICE_NAME" start + + echo "------------------------------------------------" + echo "RESTORE COMPLETE. Please verify cluster health with 'kubectl get nodes'." +else + echo "CRITICAL ERROR: Cluster reset failed." + echo "Service was NOT restarted to preserve logs/state for debugging." + exit 1 +fi \ No newline at end of file