diff --git a/k3os/backup_etcd_from_masters.sh b/k3os/backup_etcd_from_masters.sh new file mode 100644 index 0000000..f4c9bc8 --- /dev/null +++ b/k3os/backup_etcd_from_masters.sh @@ -0,0 +1,180 @@ +#!/bin/bash + +# ============================================================================== +# Multi-Master K3s Snapshot Retrieval & Offload +# Purpose: Retrieve latest snapshots from all Control Plane nodes via specific +# system pods and upload them to a central backup server. +# ============================================================================== + +# Variables +SNAPSHOT_DIR="/var/lib/rancher/k3s/server/db/snapshots" +LOCAL_STAGING_DIR="/tmp/k3s_master_backups" +SSH_KEY_PATH="/root/.ssh/backup-k3os.pem" +REMOTE_USER="ubuntu" +REMOTE_DEST_DIR="/home/ubuntu" + +# Ensure running as root +if [ "$EUID" -ne 0 ]; then + echo "Error: This script must be run as root." + exit 1 +fi + +# Create a clean local staging area for the downloads +rm -rf "$LOCAL_STAGING_DIR" +mkdir -p "$LOCAL_STAGING_DIR" + +echo "Starting Backup Retrieval Routine..." +echo "------------------------------------------------" + +# ------------------------------------------------------------------------------ +# 1. SSH Key Initialization +# ------------------------------------------------------------------------------ +# Check if key directory exists +mkdir -p $(dirname "$SSH_KEY_PATH") + +if [ ! -f "$SSH_KEY_PATH" ]; then + echo "SSH Key not found at $SSH_KEY_PATH." + echo "Please paste the private key content below." + echo "Press 'Ctrl+D' on a new line when finished:" + + # Read multi-line input + cat > "$SSH_KEY_PATH" + + # Verify file was written + if [ -s "$SSH_KEY_PATH" ]; then + echo "Key saved." + else + echo "Error: Key file is empty." + exit 1 + fi +fi + +# Set strict permissions +chmod 0600 "$SSH_KEY_PATH" +echo "SSH Key permissions set to 0600." + +echo "------------------------------------------------" + +# ------------------------------------------------------------------------------ +# 2. Configuration Prompts +# ------------------------------------------------------------------------------ +read -p "Enter the IP address of the backup server: " REMOTE_IP + +if [ -z "$REMOTE_IP" ]; then + echo "Error: IP address cannot be empty." + exit 1 +fi + +echo "------------------------------------------------" + +# ------------------------------------------------------------------------------ +# 3. Identify Masters and Map to Pods +# ------------------------------------------------------------------------------ +echo "Querying Kubernetes API for Control Plane nodes..." + +# Get list of master nodes based on the label selector +MASTER_NODES=$(kubectl get nodes --selector='node-role.kubernetes.io/control-plane' -o jsonpath='{.items[*].metadata.name}') + +if [ -z "$MASTER_NODES" ]; then + echo "Error: No control plane nodes found with selector 'node-role.kubernetes.io/control-plane'." + exit 1 +fi + +# Array to store the paths of successfully downloaded files for the upload step +declare -a DOWNLOADED_FILES + +# Loop through each master node found +for NODE_NAME in $MASTER_NODES; do + echo "Processing Node: $NODE_NAME" + + # Find the 'del-log' pod running on this specific node in kube-system + # We filter by the node name in the output of 'kubectl get pod -o wide' + POD_NAME=$(kubectl get pod -n kube-system -o wide | grep "del-log" | grep "$NODE_NAME" | awk '{print $1}') + + if [ -z "$POD_NAME" ]; then + echo " Warning: No 'del-log' pod found running on $NODE_NAME. Skipping." + continue + fi + + echo " Access pod identified: $POD_NAME" + + # -------------------------------------------------------------------------- + # 4. Find and Download Latest Snapshot + # -------------------------------------------------------------------------- + + # Use kubectl exec to list files in the snapshot directory, sort by time, take top one + LATEST_SNAPSHOT=$(kubectl exec -n kube-system "$POD_NAME" -- sh -c "ls -t $SNAPSHOT_DIR | head -n 1") + + if [ -z "$LATEST_SNAPSHOT" ]; then + echo " Warning: No snapshots found in $SNAPSHOT_DIR on $NODE_NAME." + continue + fi + + # Get timestamp of the file (remote execution) + # Using stat for better compatibility, assuming linux environment in pod + FILE_TIMESTAMP=$(kubectl exec -n kube-system "$POD_NAME" -- date -r "$SNAPSHOT_DIR/$LATEST_SNAPSHOT" "+%Y-%m-%d %H:%M:%S") + + echo " Found Newest Snapshot: $LATEST_SNAPSHOT" + echo " Timestamp: $FILE_TIMESTAMP" + + # Define local filename (Prepend Node Name to prevent collisions) + LOCAL_FILENAME="${NODE_NAME}-${LATEST_SNAPSHOT}" + LOCAL_FILE_PATH="${LOCAL_STAGING_DIR}/${LOCAL_FILENAME}" + + echo " Downloading to local staging area..." + + # Copy from the remote pod to the local machine + kubectl cp -n kube-system "${POD_NAME}:${SNAPSHOT_DIR}/${LATEST_SNAPSHOT}" "$LOCAL_FILE_PATH" + + if [ $? -eq 0 ]; then + echo " Download successful." + DOWNLOADED_FILES+=("$LOCAL_FILE_PATH") + else + echo " Error: Failed to download file from pod." + fi + echo " ---" +done + +echo "------------------------------------------------" + +# ------------------------------------------------------------------------------ +# 5. Upload to Remote Backup Server +# ------------------------------------------------------------------------------ +echo "Starting Upload Phase to ${REMOTE_IP}..." + +for FILE_PATH in "${DOWNLOADED_FILES[@]}"; do + FILENAME=$(basename "$FILE_PATH") + echo "Uploading: $FILENAME" + + # SCP Upload + scp -i "$SSH_KEY_PATH" "$FILE_PATH" "${REMOTE_USER}@${REMOTE_IP}:${REMOTE_DEST_DIR}" + + if [ $? -eq 0 ]; then + echo " SCP reported success." + + # ---------------------------------------------------------------------- + # 6. Verification + # ---------------------------------------------------------------------- + echo " Verifying integrity..." + + # Get local size + LOCAL_SIZE=$(stat -c%s "$FILE_PATH") + + # Get remote size via SSH + REMOTE_SIZE=$(ssh -i "$SSH_KEY_PATH" -o StrictHostKeyChecking=no "${REMOTE_USER}@${REMOTE_IP}" "stat -c%s ${REMOTE_DEST_DIR}/${FILENAME}") + + if [ "$LOCAL_SIZE" -eq "$REMOTE_SIZE" ]; then + echo " SUCCESS: File sizes match ($LOCAL_SIZE bytes)." + else + echo " FAILURE: File sizes differ (Local: $LOCAL_SIZE vs Remote: $REMOTE_SIZE)." + fi + else + echo " Error: SCP upload failed." + fi + echo " ---" +done + +# Cleanup staging area +rm -rf "$LOCAL_STAGING_DIR" + +echo "Backup operation complete." \ No newline at end of file