bastion: discover-first PXE provisioning with multi-arch support
Rewrote bastion from install-only to discover-first flow: - Default mode discovers hardware (PXE boot → inventory → poweroff) - Discovered machines promoted to install via subcommand - Per-MAC iPXE dispatch (/dispatch?mac=) routes discover vs install - Python HTTP server with discovery API, state management, kickstart gen - Added full DHCP mode (DHCP_MODE=full) for isolated/test networks - Added arm64 UEFI support (client-arch 11, iPXE arm64 binary) - Added QEMU test script (aarch64+KVM on Asahi Linux) - All API endpoints unit tested and working Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
242
test-bastion.sh
Executable file
242
test-bastion.sh
Executable file
@@ -0,0 +1,242 @@
|
||||
#!/usr/bin/env bash
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
# test-bastion.sh — End-to-end test of PXE bastion using QEMU
|
||||
#
|
||||
# Creates an isolated virtual network, starts the bastion in full DHCP
|
||||
# mode, and PXE boots a QEMU VM to test the discovery flow.
|
||||
#
|
||||
# Uses aarch64 + KVM on Apple Silicon for near-native speed.
|
||||
#
|
||||
# Usage:
|
||||
# sudo bash test-bastion.sh # discover test (default)
|
||||
# sudo bash test-bastion.sh --install # discover + install test
|
||||
# sudo bash test-bastion.sh --cleanup # remove test artifacts
|
||||
#
|
||||
# Requirements: qemu-system-aarch64, edk2-aarch64, dnsmasq, python3
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
MODE="${1:---discover}"
|
||||
|
||||
# Virtual network config
|
||||
BRIDGE="lab-br0"
|
||||
TAP="lab-tap0"
|
||||
BRIDGE_IP="10.99.0.1"
|
||||
BRIDGE_CIDR="${BRIDGE_IP}/24"
|
||||
BRIDGE_NET="10.99.0.0"
|
||||
|
||||
# Test dir
|
||||
TEST_DIR="/tmp/lab-bastion-test"
|
||||
BASTION_LOG="$TEST_DIR/bastion.log"
|
||||
DISK="$TEST_DIR/test-disk.qcow2"
|
||||
OVMF_CODE="/usr/share/edk2/aarch64/QEMU_EFI-pflash.raw"
|
||||
OVMF_VARS_TEMPLATE="/usr/share/AAVMF/AAVMF_VARS.fd"
|
||||
OVMF_VARS="$TEST_DIR/AAVMF_VARS.fd"
|
||||
|
||||
# Colors
|
||||
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'
|
||||
CYAN='\033[0;36m'; BOLD='\033[1m'; NC='\033[0m'
|
||||
|
||||
log() { echo -e "${GREEN}[test]${NC} $*"; }
|
||||
warn() { echo -e "${YELLOW}[test]${NC} $*"; }
|
||||
die() { echo -e "${RED}[test]${NC} $*" >&2; exit 1; }
|
||||
|
||||
# ──── Cleanup subcommand ──────────────────────────────────────────
|
||||
if [[ "$MODE" == "--cleanup" ]]; then
|
||||
echo "Cleaning up test artifacts..."
|
||||
ip link del "$TAP" 2>/dev/null || true
|
||||
ip link del "$BRIDGE" 2>/dev/null || true
|
||||
rm -rf "$TEST_DIR"
|
||||
echo "Done."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# ──── Preflight ───────────────────────────────────────────────────
|
||||
[[ $EUID -eq 0 ]] || die "Run as root: sudo bash test-bastion.sh"
|
||||
|
||||
MISSING=""
|
||||
command -v qemu-system-aarch64 >/dev/null || MISSING="$MISSING qemu-system-aarch64"
|
||||
command -v qemu-img >/dev/null || MISSING="$MISSING qemu-img"
|
||||
command -v dnsmasq >/dev/null || MISSING="$MISSING dnsmasq"
|
||||
command -v python3 >/dev/null || MISSING="$MISSING python3"
|
||||
command -v curl >/dev/null || MISSING="$MISSING curl"
|
||||
|
||||
[[ -z "$MISSING" ]] || die "Missing:$MISSING\n Install with: sudo dnf install$MISSING"
|
||||
[[ -f "$OVMF_CODE" ]] || die "UEFI firmware not found: $OVMF_CODE\n Install with: sudo dnf install edk2-aarch64"
|
||||
[[ -e /dev/kvm ]] || die "/dev/kvm not available — KVM required for aarch64 testing"
|
||||
|
||||
mkdir -p "$TEST_DIR"
|
||||
|
||||
# ──── Cleanup handler ─────────────────────────────────────────────
|
||||
BASTION_PID=""
|
||||
TAIL_PID=""
|
||||
|
||||
cleanup() {
|
||||
echo ""
|
||||
log "Cleaning up..."
|
||||
[[ -n "$TAIL_PID" ]] && kill "$TAIL_PID" 2>/dev/null || true
|
||||
[[ -n "$BASTION_PID" ]] && kill "$BASTION_PID" 2>/dev/null || true
|
||||
sleep 1
|
||||
ip link set "$TAP" down 2>/dev/null || true
|
||||
ip link del "$TAP" 2>/dev/null || true
|
||||
ip link set "$BRIDGE" down 2>/dev/null || true
|
||||
ip link del "$BRIDGE" 2>/dev/null || true
|
||||
log "Done. Logs: $BASTION_LOG State: $TEST_DIR/bastion/state.json"
|
||||
}
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
# ──── Create isolated virtual network ─────────────────────────────
|
||||
log "Creating virtual network ${BOLD}${BRIDGE_NET}/24${NC} ..."
|
||||
|
||||
# Clean up leftovers from previous runs
|
||||
ip link del "$TAP" 2>/dev/null || true
|
||||
ip link del "$BRIDGE" 2>/dev/null || true
|
||||
|
||||
ip link add "$BRIDGE" type bridge
|
||||
ip addr add "$BRIDGE_CIDR" dev "$BRIDGE"
|
||||
ip link set "$BRIDGE" up
|
||||
|
||||
ip tuntap add dev "$TAP" mode tap
|
||||
ip link set "$TAP" master "$BRIDGE"
|
||||
ip link set "$TAP" up
|
||||
|
||||
log "Bridge ${BOLD}$BRIDGE${NC} at ${BOLD}$BRIDGE_IP${NC}, tap ${BOLD}$TAP${NC}"
|
||||
|
||||
# ──── Start bastion ───────────────────────────────────────────────
|
||||
log "Starting bastion (full DHCP mode, aarch64)..."
|
||||
|
||||
# Override ARCH to aarch64 for the test VM
|
||||
IFACE="$BRIDGE" \
|
||||
DHCP_MODE="full" \
|
||||
ARCH="aarch64" \
|
||||
BASTION_DIR="$TEST_DIR/bastion" \
|
||||
HTTP_PORT=8080 \
|
||||
bash "$SCRIPT_DIR/bastion.sh" serve > "$BASTION_LOG" 2>&1 &
|
||||
BASTION_PID=$!
|
||||
|
||||
# Tail bastion output
|
||||
sleep 1
|
||||
tail -f "$BASTION_LOG" --pid=$BASTION_PID 2>/dev/null &
|
||||
TAIL_PID=$!
|
||||
|
||||
# Wait for bastion HTTP to be ready
|
||||
log "Waiting for bastion to start..."
|
||||
READY=false
|
||||
for i in $(seq 1 60); do
|
||||
if curl -sf "http://${BRIDGE_IP}:8080/boot.ipxe" >/dev/null 2>&1; then
|
||||
READY=true
|
||||
break
|
||||
fi
|
||||
if ! kill -0 "$BASTION_PID" 2>/dev/null; then
|
||||
echo ""
|
||||
log "Bastion failed to start. Last 20 lines:"
|
||||
tail -20 "$BASTION_LOG"
|
||||
die "Bastion exited unexpectedly"
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
|
||||
$READY || die "Bastion HTTP not responding after 60s"
|
||||
log "Bastion is ready!"
|
||||
|
||||
# ──── Prepare UEFI vars and disk ──────────────────────────────────
|
||||
if [[ ! -f "$OVMF_VARS" ]]; then
|
||||
cp "$OVMF_VARS_TEMPLATE" "$OVMF_VARS"
|
||||
fi
|
||||
|
||||
if [[ ! -f "$DISK" ]]; then
|
||||
log "Creating 20G test disk..."
|
||||
qemu-img create -f qcow2 "$DISK" 20G >/dev/null
|
||||
fi
|
||||
|
||||
# ──── Boot QEMU VM ────────────────────────────────────────────────
|
||||
echo ""
|
||||
log "${BOLD}Booting QEMU VM (aarch64 + KVM — PXE network boot)${NC}"
|
||||
log "UEFI firmware will attempt PXE boot automatically."
|
||||
log "Watch for ${BOLD}'NEW MACHINE DISCOVERED'${NC} in bastion output."
|
||||
echo ""
|
||||
echo -e "${CYAN}──── QEMU console ────${NC}"
|
||||
echo ""
|
||||
|
||||
# aarch64 UEFI PXE boot with KVM acceleration
|
||||
# - virtio-net-pci for networking (UEFI has PXE driver)
|
||||
# - pflash for UEFI firmware (code + vars)
|
||||
# - no disk boot priority → falls through to PXE
|
||||
qemu-system-aarch64 \
|
||||
-machine virt,gic-version=3 \
|
||||
-cpu host \
|
||||
--enable-kvm \
|
||||
-m 2048 \
|
||||
-smp 2 \
|
||||
-drive if=pflash,format=raw,readonly=on,file="$OVMF_CODE" \
|
||||
-drive if=pflash,format=raw,file="$OVMF_VARS" \
|
||||
-drive if=virtio,format=qcow2,file="$DISK" \
|
||||
-netdev tap,id=net0,ifname="$TAP",script=no,downscript=no \
|
||||
-device virtio-net-pci,netdev=net0 \
|
||||
-boot n \
|
||||
-nographic
|
||||
|
||||
# ──── Post-test ───────────────────────────────────────────────────
|
||||
echo ""
|
||||
log "QEMU exited. Checking bastion state..."
|
||||
|
||||
STATE=$(curl -sf "http://${BRIDGE_IP}:8080/api/machines" 2>/dev/null || echo '{}')
|
||||
DISCOVERED=$(echo "$STATE" | python3 -c "
|
||||
import sys, json
|
||||
state = json.load(sys.stdin)
|
||||
print(len(state.get('discovered', {})))
|
||||
" 2>/dev/null || echo "0")
|
||||
|
||||
echo ""
|
||||
if [[ "$DISCOVERED" -gt 0 ]]; then
|
||||
log "${GREEN}${BOLD}SUCCESS — $DISCOVERED machine(s) discovered!${NC}"
|
||||
HTTP_PORT=8080 bash "$SCRIPT_DIR/bastion.sh" list 2>/dev/null || \
|
||||
echo "$STATE" | python3 -m json.tool
|
||||
else
|
||||
warn "No machines discovered. Check bastion log: $BASTION_LOG"
|
||||
fi
|
||||
|
||||
# ──── Install phase (if requested) ────────────────────────────────
|
||||
if [[ "$MODE" == "--install" && "$DISCOVERED" -gt 0 ]]; then
|
||||
MAC=$(echo "$STATE" | python3 -c "
|
||||
import sys, json
|
||||
state = json.load(sys.stdin)
|
||||
print(list(state.get('discovered', {}).keys())[0])
|
||||
" 2>/dev/null)
|
||||
|
||||
if [[ -n "$MAC" ]]; then
|
||||
echo ""
|
||||
log "Install mode: queuing ${BOLD}$MAC${NC} as ${BOLD}test-node${NC}..."
|
||||
HTTP_PORT=8080 bash "$SCRIPT_DIR/bastion.sh" install "$MAC" test-node
|
||||
|
||||
# Reset UEFI vars so it PXE boots again (not from disk)
|
||||
cp "$OVMF_VARS_TEMPLATE" "$OVMF_VARS"
|
||||
|
||||
echo ""
|
||||
log "Re-booting QEMU for install phase..."
|
||||
echo ""
|
||||
echo -e "${CYAN}──── QEMU console (install phase) ────${NC}"
|
||||
echo ""
|
||||
|
||||
qemu-system-aarch64 \
|
||||
-machine virt,gic-version=3 \
|
||||
-cpu host \
|
||||
--enable-kvm \
|
||||
-m 2048 \
|
||||
-smp 2 \
|
||||
-drive if=pflash,format=raw,readonly=on,file="$OVMF_CODE" \
|
||||
-drive if=pflash,format=raw,file="$OVMF_VARS" \
|
||||
-drive if=virtio,format=qcow2,file="$DISK" \
|
||||
-netdev tap,id=net0,ifname="$TAP",script=no,downscript=no \
|
||||
-device virtio-net-pci,netdev=net0 \
|
||||
-boot n \
|
||||
-nographic
|
||||
|
||||
echo ""
|
||||
log "Install phase complete."
|
||||
fi
|
||||
fi
|
||||
|
||||
echo ""
|
||||
log "Test finished."
|
||||
Reference in New Issue
Block a user