Files
lab/bastion.sh

1365 lines
50 KiB
Bash
Raw Permalink Normal View History

2026-03-15 23:50:43 +00:00
#!/usr/bin/env bash
# ─────────────────────────────────────────────────────────────────────
2026-03-16 00:00:13 +00:00
# Lab PXE Bastion — discover-first bare-metal provisioning
2026-03-15 23:50:43 +00:00
#
2026-03-16 00:00:13 +00:00
# Default mode: DISCOVER. Any machine that PXE boots gets inventoried
# and powered off. You review what appeared, then promote to install.
2026-03-15 23:50:43 +00:00
#
# Usage:
2026-03-16 00:00:13 +00:00
# sudo bash bastion.sh # start bastion (discover mode)
# bash bastion.sh install <mac> <hostname> # queue discovered machine for install
# bash bastion.sh list # show discovered/queued machines
#
# Flow:
# 1. Start bastion → sudo bash bastion.sh
# 2. Power on machine → PXE boots, hardware discovered, powers off
# 3. Queue for install → bash bastion.sh install aa:bb:cc:dd:ee:ff puppet
# 4. Power on again → PXE boots, Fedora installed, reboots into OS
2026-03-15 23:50:43 +00:00
#
# Requirements: Fedora/RHEL host with dnsmasq, python3, curl
# ─────────────────────────────────────────────────────────────────────
set -euo pipefail
2026-03-16 00:00:13 +00:00
# ──── Configuration (override via environment) ────────────────────
FEDORA_VERSION="${FEDORA_VERSION:-43}"
2026-03-15 23:50:43 +00:00
ARCH="${ARCH:-x86_64}"
HTTP_PORT="${HTTP_PORT:-8080}"
TIMEZONE="${TIMEZONE:-Europe/London}"
LOCALE="${LOCALE:-en_GB.UTF-8}"
BASTION_DIR="${BASTION_DIR:-/tmp/lab-bastion}"
DOMAIN="${DOMAIN:-ad.itaz.eu}" # internal domain for hostnames
DHCP_MODE="${DHCP_MODE:-proxy}" # proxy (alongside existing DHCP) or full (bastion IS the DHCP server)
DHCP_RANGE_START="${DHCP_RANGE_START:-}" # only for full mode, auto-derived if empty
DHCP_RANGE_END="${DHCP_RANGE_END:-}"
2026-03-15 23:50:43 +00:00
2026-03-16 00:00:13 +00:00
# ──── Colors ──────────────────────────────────────────────────────
2026-03-15 23:50:43 +00:00
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'
CYAN='\033[0;36m'; BOLD='\033[1m'; NC='\033[0m'
log() { echo -e "${GREEN}[bastion]${NC} $*"; }
warn() { echo -e "${YELLOW}[bastion]${NC} $*"; }
err() { echo -e "${RED}[bastion]${NC} $*" >&2; }
die() { err "$@"; exit 1; }
2026-03-16 00:00:13 +00:00
# ──── Subcommand handling ─────────────────────────────────────────
CMD="${1:-serve}"
case "$CMD" in
install)
[[ $# -ge 3 ]] || { echo "Usage: bastion.sh install <mac> <hostname> [--role worker|infra] [--disk <dev>]"; exit 1; }
2026-03-16 00:00:13 +00:00
MAC="$2"
HOSTNAME="$3"
shift 3
DISK="" ROLE="worker"
while [[ $# -gt 0 ]]; do
case "$1" in
--disk) DISK="$2"; shift 2 ;;
--role) ROLE="$2"; shift 2 ;;
*) echo "Unknown option: $1"; exit 1 ;;
esac
done
PAYLOAD=$(python3 -c "import json; print(json.dumps({k:v for k,v in {'mac':'$MAC','hostname':'$HOSTNAME','disk':'$DISK','role':'$ROLE'}.items() if v}))")
2026-03-16 00:00:13 +00:00
RESULT=$(curl -sf -X POST "http://localhost:${HTTP_PORT}/api/install" \
-H "Content-Type: application/json" \
-d "$PAYLOAD" 2>&1) || die "Cannot reach bastion at localhost:${HTTP_PORT}. Is it running?"
echo "$RESULT" | python3 -m json.tool 2>/dev/null || echo "$RESULT"
echo ""
echo "Power on the machine to start Fedora installation."
exit 0
;;
list)
RESULT=$(curl -sf "http://localhost:${HTTP_PORT}/api/machines" 2>&1) || \
die "Cannot reach bastion at localhost:${HTTP_PORT}. Is it running?"
echo "$RESULT" | python3 -c "
import sys, json
state = json.load(sys.stdin)
discovered = state.get('discovered', {})
queue = state.get('install_queue', {})
installed = state.get('installed', {})
print()
print('\033[1mDISCOVERED\033[0m')
if discovered:
print(f' {\"MAC\":<20} {\"CPU\":<32} {\"CORES\":<6} {\"RAM\":<8} {\"ARCH\":<10} {\"PRODUCT\"}')
for mac, hw in discovered.items():
status = ' [QUEUED]' if mac in queue else ''
print(f' {mac:<20} {hw.get(\"cpu_model\",\"?\"):<32} {hw.get(\"cpu_cores\",\"?\"):<6} {str(hw.get(\"memory_gb\",\"?\"))+\"GB\":<8} {hw.get(\"arch\",\"?\"):<10} {hw.get(\"product\",\"?\")}{status}')
else:
print(' (none — PXE boot a machine to discover it)')
print()
print('\033[1mINSTALL QUEUE\033[0m')
if queue:
for mac, cfg in queue.items():
print(f' {mac:<20} → hostname={cfg.get(\"hostname\",\"?\")}')
else:
print(' (none)')
print()
print('\033[1mINSTALLED\033[0m')
if installed:
for mac, info in installed.items():
ip = info.get('ip', '')
ip_str = f' ip={ip}' if ip else ''
print(f' {mac:<20} → {info.get(\"hostname\",\"?\")} role={info.get(\"role\",\"?\")}{ip_str} ({info.get(\"installed_at\",\"?\")})')
2026-03-16 00:00:13 +00:00
else:
print(' (none)')
print()
" 2>/dev/null || echo "$RESULT"
exit 0
;;
reprovision)
[[ $# -ge 3 ]] || { echo "Usage: bastion.sh reprovision <mac> <hostname> [--role worker|infra] [--disk <dev>]"; exit 1; }
MAC="$2"
HOSTNAME="$3"
shift 3
DISK="" ROLE="worker"
while [[ $# -gt 0 ]]; do
case "$1" in
--disk) DISK="$2"; shift 2 ;;
--role) ROLE="$2"; shift 2 ;;
*) echo "Unknown option: $1"; exit 1 ;;
esac
done
# Queue the install
PAYLOAD=$(python3 -c "import json; print(json.dumps({k:v for k,v in {'mac':'$MAC','hostname':'$HOSTNAME','disk':'$DISK','role':'$ROLE'}.items() if v}))")
RESULT=$(curl -sf -X POST "http://localhost:${HTTP_PORT}/api/install" \
-H "Content-Type: application/json" \
-d "$PAYLOAD" 2>&1) || die "Cannot reach bastion at localhost:${HTTP_PORT}. Is it running?"
echo "$RESULT" | python3 -m json.tool 2>/dev/null || echo "$RESULT"
# Try to find IP from installed state and SSH in to trigger PXE reboot
IP=$(curl -sf "http://localhost:${HTTP_PORT}/api/machines" 2>/dev/null | \
python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('installed',{}).get('${MAC}',{}).get('ip',''))" 2>/dev/null || echo "")
ADMIN_USER="${SUDO_USER:-$USER}"
[[ "$ADMIN_USER" == "root" ]] && ADMIN_USER=""
if [[ -n "$IP" && -n "$ADMIN_USER" ]]; then
echo ""
echo "Attempting SSH reboot into PXE ($ADMIN_USER@$IP)..."
ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 "$ADMIN_USER@$IP" \
'sudo efibootmgr 2>/dev/null; PXE_ENTRY=$(sudo efibootmgr | grep -iE "pxe|network|ipv4" | head -1 | grep -oP "Boot\K[0-9A-F]+"); if [ -n "$PXE_ENTRY" ]; then sudo efibootmgr --bootnext "$PXE_ENTRY" && echo "PXE set as next boot" && sudo reboot; else echo "No PXE boot entry found, rebooting anyway..." && sudo reboot; fi' 2>&1 && {
echo ""
echo "Machine is rebooting into PXE. Install will start automatically."
} || {
echo ""
echo "SSH failed. Reboot the machine manually into PXE (e.g. via IPMI/KVM)."
}
else
echo ""
echo "No IP known for this machine. Reboot it manually into PXE."
fi
exit 0
;;
2026-03-16 00:00:13 +00:00
serve) ;; # continue below
*)
echo "Usage: bastion.sh [serve|install|reprovision|list]"
2026-03-16 00:00:13 +00:00
exit 1
;;
esac
# ══════════════════════════════════════════════════════════════════
# SERVE MODE — start the bastion
# ══════════════════════════════════════════════════════════════════
# ──── Kill old instances ──────────────────────────────────────────
# Find and kill any previous bastion dnsmasq and HTTP server
OLD_DNSMASQ=$(pgrep -f 'dnsmasq --no-daemon --conf-file=/tmp/lab-bastion' 2>/dev/null || true)
OLD_HTTP=$(pgrep -f 'python3 /tmp/lab-bastion/server.py' 2>/dev/null || true)
if [[ -n "$OLD_DNSMASQ" || -n "$OLD_HTTP" ]]; then
warn "Killing old bastion processes..."
[[ -n "$OLD_DNSMASQ" ]] && kill $OLD_DNSMASQ 2>/dev/null && log " Stopped old dnsmasq (PID $OLD_DNSMASQ)"
[[ -n "$OLD_HTTP" ]] && kill $OLD_HTTP 2>/dev/null && log " Stopped old HTTP server (PID $OLD_HTTP)"
sleep 1
fi
2026-03-16 00:00:13 +00:00
# ──── Preflight ───────────────────────────────────────────────────
2026-03-15 23:50:43 +00:00
[[ $EUID -eq 0 ]] || die "Must run as root (need DHCP/TFTP ports). Use: sudo bash bastion.sh"
command -v python3 >/dev/null || die "python3 not found"
command -v curl >/dev/null || die "curl not found"
INSTALL_PKGS=()
command -v dnsmasq >/dev/null || INSTALL_PKGS+=(dnsmasq)
[[ -f /usr/share/ipxe/undionly.kpxe ]] || INSTALL_PKGS+=(ipxe-bootimgs-x86)
[[ -f /usr/share/ipxe/arm64-efi/snponly.efi ]] || INSTALL_PKGS+=(ipxe-bootimgs-aarch64)
[[ -f /usr/include/efi/efi.h ]] || INSTALL_PKGS+=(gnu-efi-devel)
if [[ ${#INSTALL_PKGS[@]} -gt 0 ]]; then
log "Installing ${INSTALL_PKGS[*]}..."
2026-03-15 23:50:43 +00:00
if command -v dnf >/dev/null; then
dnf install -y "${INSTALL_PKGS[@]}"
2026-03-15 23:50:43 +00:00
elif command -v apt-get >/dev/null; then
apt-get install -y "${INSTALL_PKGS[@]}"
2026-03-15 23:50:43 +00:00
else
die "Cannot install packages — install manually: ${INSTALL_PKGS[*]}"
2026-03-15 23:50:43 +00:00
fi
fi
2026-03-16 00:00:13 +00:00
# ──── Auto-detect network ────────────────────────────────────────
2026-03-15 23:50:43 +00:00
IFACE="${IFACE:-$(ip route | awk '/default/ {print $5; exit}')}"
SERVER_IP="$(ip -4 addr show "$IFACE" | awk '/inet / {split($2,a,"/"); print a[1]; exit}')"
NETWORK="$(echo "$SERVER_IP" | awk -F. '{print $1"."$2"."$3".0"}')"
GATEWAY="$(ip route | awk '/default/ {print $3; exit}')"
2026-03-15 23:50:43 +00:00
[[ -n "$SERVER_IP" ]] || die "Cannot detect IP on interface $IFACE"
log "Interface: ${BOLD}$IFACE${NC} IP: ${BOLD}$SERVER_IP${NC} Network: ${BOLD}$NETWORK${NC}"
# ──── Auto-detect SSH keys ───────────────────────────────────────
REAL_HOME="${HOME}"
[[ -n "${SUDO_USER:-}" ]] && REAL_HOME="$(getent passwd "$SUDO_USER" | cut -d: -f6)"
SSH_KEYS_CONTENT=""
SSH_KEY_SOURCE=""
# Collect SSH keys from authorized_keys + local pubkeys (deduplicated)
SSH_KEY_SOURCE=""
if [[ -f "$REAL_HOME/.ssh/authorized_keys" ]]; then
SSH_KEYS_CONTENT="$(grep -v '^#' "$REAL_HOME/.ssh/authorized_keys" | grep -v '^$')"
SSH_KEY_SOURCE="$REAL_HOME/.ssh/authorized_keys"
2026-03-15 23:50:43 +00:00
fi
# Also include local pubkey files (they may not be in authorized_keys)
for keyfile in "$REAL_HOME/.ssh/id_ed25519.pub" "$REAL_HOME/.ssh/id_rsa.pub" "$REAL_HOME/.ssh/id_ecdsa.pub"; do
if [[ -f "$keyfile" ]]; then
KEY_DATA="$(cat "$keyfile")"
KEY_FP="$(awk '{print $2}' "$keyfile")"
if [[ -n "$SSH_KEYS_CONTENT" ]]; then
# Add only if not already present
if ! echo "$SSH_KEYS_CONTENT" | grep -qF "$KEY_FP"; then
SSH_KEYS_CONTENT="$SSH_KEYS_CONTENT"$'\n'"$KEY_DATA"
SSH_KEY_SOURCE="${SSH_KEY_SOURCE} + $keyfile"
fi
else
SSH_KEYS_CONTENT="$KEY_DATA"
SSH_KEY_SOURCE="$keyfile"
fi
fi
done
# Priority 3: generate a keypair
if [[ -z "$SSH_KEYS_CONTENT" ]]; then
GENERATED_KEY="$BASTION_DIR/bastion_ed25519"
if [[ ! -f "$GENERATED_KEY" ]]; then
log "No SSH keys found — generating ed25519 keypair..."
ssh-keygen -t ed25519 -f "$GENERATED_KEY" -N "" -C "bastion-generated@$(hostname)" >/dev/null 2>&1
fi
SSH_KEYS_CONTENT="$(cat "${GENERATED_KEY}.pub")"
SSH_KEY_SOURCE="$GENERATED_KEY (generated)"
warn "Using generated keypair: ${BOLD}$GENERATED_KEY${NC}"
warn "Save this private key — it's the only way to access installed machines."
fi
SSH_KEY_COUNT="$(echo "$SSH_KEYS_CONTENT" | wc -l)"
log "SSH keys: ${BOLD}${SSH_KEY_COUNT} key(s)${NC} from ${BOLD}${SSH_KEY_SOURCE}${NC}"
# ──── Detect admin username ──────────────────────────────────────
ADMIN_USER="${SUDO_USER:-$USER}"
[[ "$ADMIN_USER" == "root" ]] && ADMIN_USER=""
if [[ -n "$ADMIN_USER" ]]; then
log "Admin user: ${BOLD}${ADMIN_USER}${NC} (will be created on installed machines)"
2026-03-15 23:50:43 +00:00
fi
2026-03-16 00:00:13 +00:00
# ──── Prepare directories ────────────────────────────────────────
2026-03-15 23:50:43 +00:00
TFTPDIR="$BASTION_DIR/tftp"
HTTPDIR="$BASTION_DIR/http"
2026-03-16 00:00:13 +00:00
STATEFILE="$BASTION_DIR/state.json"
2026-03-15 23:50:43 +00:00
mkdir -p "$TFTPDIR" "$HTTPDIR"
2026-03-16 00:00:13 +00:00
# Initialize state if not present
[[ -f "$STATEFILE" ]] || echo '{"discovered":{},"install_queue":{},"installed":{}}' > "$STATEFILE"
2026-03-15 23:50:43 +00:00
# ──── Cleanup handler ─────────────────────────────────────────────
DNSMASQ_PID=""
HTTP_PID=""
FW_OPENED=false
cleanup() {
echo ""
log "Shutting down..."
[[ -n "$HTTP_PID" ]] && kill "$HTTP_PID" 2>/dev/null && log "Stopped HTTP server"
2026-03-16 00:00:13 +00:00
[[ -n "$DNSMASQ_PID" ]] && kill "$DNSMASQ_PID" 2>/dev/null && log "Stopped dnsmasq"
2026-03-15 23:50:43 +00:00
if $FW_OPENED && command -v firewall-cmd >/dev/null; then
log "Removing firewall rules..."
firewall-cmd --quiet ${FW_ZONE_FLAG:-} --remove-service=dhcp 2>/dev/null || true
firewall-cmd --quiet ${FW_ZONE_FLAG:-} --remove-service=tftp 2>/dev/null || true
firewall-cmd --quiet ${FW_ZONE_FLAG:-} --remove-port=${HTTP_PORT}/tcp 2>/dev/null || true
firewall-cmd --quiet ${FW_ZONE_FLAG:-} --remove-port=4011/udp 2>/dev/null || true
2026-03-15 23:50:43 +00:00
fi
2026-03-16 00:00:13 +00:00
log "State preserved in $STATEFILE"
log "Restart bastion with: sudo bash bastion.sh"
2026-03-15 23:50:43 +00:00
}
trap cleanup EXIT INT TERM
# ──── Prepare boot artifacts ─────────────────────────────────────
2026-03-15 23:50:43 +00:00
download() {
local url="$1" dest="$2" label="$3"
if [[ -f "$dest" ]]; then
log " ${label} — cached"
return
fi
log " ${label} — downloading..."
curl -# -L -f -o "$dest" "$url" || die "Failed to download $label from $url"
}
copy_if_missing() {
local src="$1" dest="$2" label="$3"
if [[ -f "$dest" ]]; then
log " ${label} — cached"
return
fi
[[ -f "$src" ]] || die "${label}: source not found at $src"
cp "$src" "$dest"
log " ${label} — copied from $src"
}
build_pxeloader() {
local src="$1" dest="$2" label="$3"
if [[ -f "$dest" ]]; then
log " ${label} — cached ($(stat -c%s "$dest") bytes)"
return
fi
log " ${label} — building UEFI PXE loader stub..."
local builddir="$BASTION_DIR/pxeloader-build"
mkdir -p "$builddir"
local gnuefi_lib gnuefi_inc
gnuefi_lib="/usr/lib"
gnuefi_inc="/usr/include/efi"
# Compile
gcc -I"$gnuefi_inc" -I"$gnuefi_inc/x86_64" -I"$gnuefi_inc/protocol" \
-DGNU_EFI_USE_MS_ABI -fPIC -fshort-wchar -ffreestanding \
-fno-stack-protector -mno-red-zone -maccumulate-outgoing-args \
-Wall -Os -c -o "$builddir/pxeloader.o" "$src" || die "PXE loader compile failed"
# Link
ld -nostdlib -znocombreloc -shared -Bsymbolic \
-T "$gnuefi_lib/elf_x86_64_efi.lds" \
"$gnuefi_lib/crt0-efi-x86_64.o" \
"$builddir/pxeloader.o" \
-o "$builddir/pxeloader.so" \
-lgnuefi -lefi -L"$gnuefi_lib" || die "PXE loader link failed"
# Convert to PE/COFF EFI binary
objcopy -j .text -j .sdata -j .data -j .dynamic -j .rodata -j .dynsym \
-j .rel -j .rela -j .rel.* -j .rela.* -j .rel* -j .rela* \
-j .reloc --target efi-app-x86_64 \
"$builddir/pxeloader.so" "$dest" || die "PXE loader objcopy failed"
local size
size="$(stat -c%s "$dest")"
log " ${label} — built (${size} bytes / $((size/1024)) KB)"
2026-03-15 23:50:43 +00:00
}
FEDORA_MIRROR="https://download.fedoraproject.org/pub/fedora/linux/releases/${FEDORA_VERSION}/Everything/${ARCH}/os"
log "Preparing boot artifacts (Fedora ${FEDORA_VERSION} ${ARCH})..."
copy_if_missing "/usr/share/ipxe/undionly.kpxe" "$TFTPDIR/undionly.kpxe" "iPXE BIOS"
# UEFI x86_64: serve iPXE directly via TFTP (UEFI has no TFTP size limit)
copy_if_missing "/usr/share/ipxe/ipxe-snponly-x86_64.efi" "$TFTPDIR/ipxe.efi" "iPXE UEFI x86_64"
copy_if_missing "/usr/share/ipxe/arm64-efi/snponly.efi" "$TFTPDIR/ipxe-arm64.efi" "iPXE UEFI arm64"
download "${FEDORA_MIRROR}/images/pxeboot/vmlinuz" "$HTTPDIR/vmlinuz" "Fedora kernel"
download "${FEDORA_MIRROR}/images/pxeboot/initrd.img" "$HTTPDIR/initrd.img" "Fedora initrd"
# Symlink iPXE binaries into HTTP dir (UEFI HTTP Boot downloads via HTTP, not TFTP)
for f in "$TFTPDIR"/*.efi; do
ln -sf "$f" "$HTTPDIR/$(basename "$f")" 2>/dev/null || true
done
2026-03-15 23:50:43 +00:00
2026-03-16 00:00:13 +00:00
# ──── Generate discovery kickstart ────────────────────────────────
# Boots Fedora installer env, collects hardware info, POSTs to bastion, powers off.
# Never touches the disk.
cat > "$HTTPDIR/discover.ks" << 'DISCOVER_KS'
# Lab Bastion — Discovery Mode
# Collects hardware inventory and powers off. Does NOT install anything.
2026-03-15 23:50:43 +00:00
2026-03-16 00:00:13 +00:00
%pre --erroronfail --log=/tmp/discover.log
#!/bin/bash
set -x
2026-03-15 23:50:43 +00:00
2026-03-16 00:00:13 +00:00
# ── Collect hardware info from /proc, /sys, and available tools ──
MAC=$(ip link show | awk '/ether/ && !/00:00:00:00/ {print $2; exit}')
PRODUCT=$(cat /sys/class/dmi/id/product_name 2>/dev/null || echo "unknown")
BOARD=$(cat /sys/class/dmi/id/board_name 2>/dev/null || echo "unknown")
SERIAL=$(cat /sys/class/dmi/id/product_serial 2>/dev/null || echo "unknown")
MANUFACTURER=$(cat /sys/class/dmi/id/sys_vendor 2>/dev/null || echo "unknown")
CPUMODEL=$(grep -m1 'model name' /proc/cpuinfo | cut -d: -f2 | sed 's/^ //')
CPUCORES=$(grep -c '^processor' /proc/cpuinfo)
MEMGB=$(awk '/MemTotal/ {printf "%d", $2/1024/1024}' /proc/meminfo)
ARCHTYPE=$(uname -m)
# Disk info — lsblk is available in Anaconda
DISKS_JSON=$(lsblk -Jb -o NAME,SIZE,TYPE,MODEL 2>/dev/null | python3 -c "
import sys, json
data = json.load(sys.stdin)
disks = [d for d in data.get('blockdevices', []) if d.get('type') == 'disk']
result = []
for d in disks:
size_gb = round(int(d.get('size', 0)) / 1073741824, 1)
result.append({
'name': d.get('name', '?'),
'size_gb': size_gb,
'model': (d.get('model') or 'unknown').strip()
})
print(json.dumps(result))
" 2>/dev/null || echo '[]')
# Network interfaces
NICS_JSON=$(ip -j link show 2>/dev/null | python3 -c "
import sys, json
nics = json.load(sys.stdin)
result = []
for n in nics:
if n.get('link_type') == 'loopback':
continue
result.append({
'name': n.get('ifname', '?'),
'mac': n.get('address', '?'),
'state': n.get('operstate', '?')
})
print(json.dumps(result))
" 2>/dev/null || echo '[]')
# ── Build and POST discovery payload ──
PAYLOAD=$(python3 -c "
import json
print(json.dumps({
'mac': '$MAC',
'product': '$PRODUCT',
'board': '$BOARD',
'serial': '$SERIAL',
'manufacturer': '$MANUFACTURER',
'cpu_model': '$CPUMODEL',
'cpu_cores': int('$CPUCORES' or 0),
'memory_gb': int('$MEMGB' or 0),
'arch': '$ARCHTYPE',
'disks': $DISKS_JSON,
'nics': $NICS_JSON
}))
")
# POST to bastion — try curl first, fall back to python3 urllib
BASTION_URL="__BASTION_URL__/api/discover"
if command -v curl >/dev/null 2>&1; then
curl -sf -X POST "$BASTION_URL" \
-H "Content-Type: application/json" \
-d "$PAYLOAD" || true
2026-03-15 23:50:43 +00:00
else
2026-03-16 00:00:13 +00:00
python3 -c "
import urllib.request
req = urllib.request.Request('$BASTION_URL',
data=b'''$PAYLOAD''',
headers={'Content-Type': 'application/json'})
try:
urllib.request.urlopen(req, timeout=10)
except Exception as e:
print(f'POST failed: {e}')
"
2026-03-15 23:50:43 +00:00
fi
# ── Reboot — do NOT let Anaconda proceed ──
2026-03-16 00:00:13 +00:00
echo ""
echo "=== Discovery complete, rebooting ==="
2026-03-16 00:00:13 +00:00
echo ""
sleep 3
echo 1 > /proc/sys/kernel/sysrq
echo b > /proc/sysrq-trigger
2026-03-16 00:00:13 +00:00
sleep 5
reboot -f
2026-03-16 00:00:13 +00:00
%end
# Anaconda should never get here, but just in case:
reboot
2026-03-16 00:00:13 +00:00
DISCOVER_KS
# Patch in the bastion URL
sed -i "s|__BASTION_URL__|http://${SERVER_IP}:${HTTP_PORT}|g" "$HTTPDIR/discover.ks"
# Save SSH keys and admin user for the HTTP server to use
echo "$SSH_KEYS_CONTENT" > "$BASTION_DIR/ssh_keys"
echo "$ADMIN_USER" > "$BASTION_DIR/admin_user"
2026-03-16 00:00:13 +00:00
# ──── Generate iPXE boot script ───────────────────────────────────
# Initial iPXE script chains to /dispatch with the MAC, so the server
# can route to discover or install mode per machine.
cat > "$HTTPDIR/boot.ipxe" << IPXE
#!ipxe
echo
echo ============================================
echo Lab PXE Bastion
echo Contacting server for instructions...
echo ============================================
echo
chain http://${SERVER_IP}:${HTTP_PORT}/dispatch?mac=\${net0/mac}
IPXE
# ──── Write the HTTP server ──────────────────────────────────────
cat > "$BASTION_DIR/server.py" << 'PYSERVER'
#!/usr/bin/env python3
"""Lab PXE Bastion — HTTP server with discovery API and per-MAC iPXE dispatch."""
import json
import os
import sys
import time
import fcntl
from http.server import HTTPServer, SimpleHTTPRequestHandler
from urllib.parse import urlparse, parse_qs
from datetime import datetime
# Config from argv
HTTP_DIR = sys.argv[1]
STATE_FILE = sys.argv[2]
SERVER_IP = sys.argv[3]
HTTP_PORT = int(sys.argv[4])
FEDORA_VER = sys.argv[5]
FEDORA_MIRROR = sys.argv[6]
SSH_KEYS_FILE = sys.argv[7] if len(sys.argv) > 7 else ""
2026-03-16 00:00:13 +00:00
TIMEZONE = sys.argv[8] if len(sys.argv) > 8 else "Europe/London"
LOCALE = sys.argv[9] if len(sys.argv) > 9 else "en_GB.UTF-8"
DOMAIN = sys.argv[10] if len(sys.argv) > 10 else "ad.itaz.eu"
ADMIN_USER = sys.argv[11] if len(sys.argv) > 11 else ""
# Load SSH keys from file
SSH_KEYS = []
if SSH_KEYS_FILE and os.path.isfile(SSH_KEYS_FILE):
with open(SSH_KEYS_FILE) as f:
SSH_KEYS = [l.strip() for l in f if l.strip() and not l.startswith('#')]
2026-03-16 00:00:13 +00:00
# ── State management (file-backed, lock-protected) ───────────────
def load_state():
try:
with open(STATE_FILE) as f:
return json.load(f)
except (FileNotFoundError, json.JSONDecodeError):
return {"discovered": {}, "install_queue": {}, "installed": {}}
def save_state(state):
tmp = STATE_FILE + ".tmp"
with open(tmp, 'w') as f:
json.dump(state, f, indent=2)
os.replace(tmp, STATE_FILE)
# ── Kickstart generation ─────────────────────────────────────────
def generate_kickstart(hostname, disk="", ssh_keys=None, domain="", role="worker", admin_user=""):
ssh_keys = ssh_keys or []
fqdn = f"{hostname}.{domain}" if domain else hostname
vg = "labvg"
2026-03-16 00:00:13 +00:00
# ── Auth ──
if ssh_keys:
auth = f'rootpw --lock\nsshkey --username=root "{ssh_keys[0]}"'
2026-03-16 00:00:13 +00:00
else:
auth = 'rootpw --plaintext changeme'
# ── Admin user (kickstart directive) ──
user_directive = ""
if admin_user:
user_directive = f'user --name={admin_user} --groups=wheel --lock'
# ── SSH keys for %post (root + admin user) ──
all_keys = "\n".join(ssh_keys)
ssh_post_block = ""
if ssh_keys:
ssh_post_block = f"""
# Set up SSH keys for root
mkdir -p /root/.ssh && chmod 700 /root/.ssh
cat > /root/.ssh/authorized_keys << 'SSHKEYS'
{all_keys}
SSHKEYS
chmod 600 /root/.ssh/authorized_keys"""
if admin_user and ssh_keys:
ssh_post_block += f"""
# Set up SSH keys for {admin_user}
ADMIN_HOME=$(getent passwd {admin_user} | cut -d: -f6)
mkdir -p "$ADMIN_HOME/.ssh" && chmod 700 "$ADMIN_HOME/.ssh"
cp /root/.ssh/authorized_keys "$ADMIN_HOME/.ssh/authorized_keys"
chown -R {admin_user}:{admin_user} "$ADMIN_HOME/.ssh"
chmod 600 "$ADMIN_HOME/.ssh/authorized_keys"
# Fix SELinux contexts for SSH
restorecon -R /root/.ssh "$ADMIN_HOME/.ssh" 2>/dev/null || true
# Passwordless sudo for {admin_user}
echo '{admin_user} ALL=(ALL) NOPASSWD: ALL' > /etc/sudoers.d/{admin_user}
chmod 440 /etc/sudoers.d/{admin_user}"""
# ── Determine disk (auto-detect first NVMe/SDA if not specified) ──
disk_line = f'DISK="{disk}"' if disk else '''
DISK=""
for d in /dev/nvme0n1 /dev/sda /dev/vda; do
[ -b "$d" ] && { DISK="$(basename $d)"; break; }
done
[ -z "$DISK" ] && { echo "ERROR: no disk found"; exit 1; }
'''
# ── LVM layout sizes (MB) ──
has_longhorn = (role == "worker")
return f"""# Lab Bastion -- Fedora {FEDORA_VER} server install
2026-03-16 00:00:13 +00:00
# Generated: {datetime.now().isoformat()}
# Target: {fqdn} (role={role})
2026-03-15 23:50:43 +00:00
text
reboot
2026-03-16 00:00:13 +00:00
lang {LOCALE}
2026-03-15 23:50:43 +00:00
keyboard uk
2026-03-16 00:00:13 +00:00
timezone {TIMEZONE} --utc
2026-03-15 23:50:43 +00:00
network --bootproto=dhcp --activate --hostname={fqdn}
2026-03-15 23:50:43 +00:00
2026-03-16 00:00:13 +00:00
{auth}
{user_directive}
2026-03-15 23:50:43 +00:00
bootloader --append="console=tty0 console=ttyS0,115200n8"
2026-03-16 00:00:13 +00:00
url --mirrorlist=https://mirrors.fedoraproject.org/mirrorlist?repo=fedora-$releasever&arch=$basearch
2026-03-15 23:50:43 +00:00
# Partitioning is generated dynamically by %pre (supports longhorn preservation)
%include /tmp/part.ks
%pre --log=/tmp/pre-partition.log
#!/bin/bash
set -x
# Progress callback helper
bastion_progress() {{
local stage="$1" detail="${{2:-}}"
local mac=$(ip link show | awk '/ether/ && !/00:00:00:00/ {{print $2; exit}}')
curl -sf -X POST "http://{SERVER_IP}:{HTTP_PORT}/api/progress" \
-H "Content-Type: application/json" \
-d "{{\\"mac\\":\\"$mac\\",\\"stage\\":\\"$stage\\",\\"detail\\":\\"$detail\\"}}" 2>/dev/null || true
}}
bastion_progress "partitioning" "preparing disk layout"
VG="{vg}"
{disk_line}
REPROVISION=no
# Check if VG exists (reprovision scenario)
if vgs $VG &>/dev/null; then
echo "=== Existing VG found - reprovision mode ==="
REPROVISION=yes
# Detect which data LVs to preserve
PRESERVE_LONGHORN=no; PRESERVE_SRV=no; PRESERVE_HOME=no
lvs $VG/longhorn &>/dev/null && PRESERVE_LONGHORN=yes
lvs $VG/srv &>/dev/null && PRESERVE_SRV=yes
lvs $VG/home &>/dev/null && PRESERVE_HOME=yes
echo "Preserving: longhorn=$PRESERVE_LONGHORN srv=$PRESERVE_SRV home=$PRESERVE_HOME"
# Remove only OS logical volumes (keep data LVs)
for lv in root var varlog swap; do
lvremove -f $VG/$lv 2>/dev/null || true
done
fi
if [ "$REPROVISION" = "yes" ]; then
# Find existing boot partitions by type
EFI_PART=$(blkid -t TYPE=vfat -o device /dev/${{DISK}}* 2>/dev/null | head -1)
BOOT_PART=$(blkid -t TYPE=ext4 -o device /dev/${{DISK}}* 2>/dev/null | head -1)
EFI_PART=${{EFI_PART:-/dev/${{DISK}}1}}
BOOT_PART=${{BOOT_PART:-/dev/${{DISK}}2}}
echo "Reusing EFI=$EFI_PART BOOT=$BOOT_PART"
# Build partition config reusing existing PV/VG
cat > /tmp/part.ks << PARTEOF
ignoredisk --only-use=$DISK
clearpart --none
part /boot/efi --onpart=$EFI_PART --fstype=efi
part /boot --onpart=$BOOT_PART --fstype=ext4
volgroup {vg} --useexisting --noformat
logvol swap --vgname={vg} --name=swap --fstype=swap --size=27648
logvol / --vgname={vg} --name=root --fstype=xfs --size=33792
logvol /var --vgname={vg} --name=var --fstype=xfs --size=102400
logvol /var/log --vgname={vg} --name=varlog --fstype=xfs --size=10240
PARTEOF
# Preserve or recreate data LVs
if [ "$PRESERVE_HOME" = "yes" ]; then
echo "logvol /home --vgname={vg} --name=home --useexisting --noformat" >> /tmp/part.ks
else
echo "logvol /home --vgname={vg} --name=home --fstype=xfs --size=10240" >> /tmp/part.ks
fi
if [ "$PRESERVE_SRV" = "yes" ]; then
echo "logvol /srv --vgname={vg} --name=srv --useexisting --noformat" >> /tmp/part.ks
else
echo "logvol /srv --vgname={vg} --name=srv --fstype=xfs --size=20480" >> /tmp/part.ks
fi
if [ "$PRESERVE_LONGHORN" = "yes" ]; then
echo "logvol /var/lib/longhorn --vgname={vg} --name=longhorn --useexisting --noformat" >> /tmp/part.ks
fi
else
# Fresh install
cat > /tmp/part.ks << PARTEOF
ignoredisk --only-use=$DISK
clearpart --all --initlabel --drives=$DISK
part /boot/efi --fstype=efi --size=600 --ondisk=$DISK
part /boot --fstype=ext4 --size=3072 --ondisk=$DISK
part pv.01 --size=1 --grow --ondisk=$DISK
volgroup {vg} pv.01
logvol swap --vgname={vg} --name=swap --fstype=swap --size=27648
logvol / --vgname={vg} --name=root --fstype=xfs --size=33792
logvol /var --vgname={vg} --name=var --fstype=xfs --size=102400
logvol /var/log --vgname={vg} --name=varlog --fstype=xfs --size=10240
logvol /home --vgname={vg} --name=home --fstype=xfs --size=10240
logvol /srv --vgname={vg} --name=srv --fstype=xfs --size=20480
{"logvol /var/lib/longhorn --vgname=" + vg + " --name=longhorn --fstype=xfs --grow --size=1" if has_longhorn else ""}
PARTEOF
fi
echo "=== Generated partition config ==="
cat /tmp/part.ks
echo "==================================="
bastion_progress "partitioning" "layout ready, starting install"
%end
2026-03-15 23:50:43 +00:00
%packages
@core
openssh-server
vim-enhanced
tmux
git
curl
wget
2026-03-15 23:50:43 +00:00
python3
2026-03-16 00:00:13 +00:00
lshw
dmidecode
2026-03-15 23:50:43 +00:00
dnf-plugins-core
# Networking and diagnostics
NetworkManager
bind-utils
net-tools
iproute
iputils
traceroute
tcpdump
htop
iotop
strace
jq
# k3s prerequisites
container-selinux
iptables-nft
nftables
policycoreutils-python-utils
chrony
tar
socat
conntrack-tools
ethtool
# Boot management
efibootmgr
# Puppet prerequisites
ruby
ruby-libs
# Exclude desktop
-@workstation-product
-@gnome-desktop
-gnome-shell
-gdm
-PackageKit
-PackageKit-glib
2026-03-15 23:50:43 +00:00
%end
%post --log=/root/bastion-post-install.log
#!/bin/bash
set -x
# Progress callback helper
bastion_progress() {{
local stage="$1" detail="${{2:-}}"
local mac=$(ip link show | awk '/ether/ && !/00:00:00:00/ {{print $2; exit}}')
curl -sf -X POST "http://{SERVER_IP}:{HTTP_PORT}/api/progress" \
-H "Content-Type: application/json" \
-d "{{\\"mac\\":\\"$mac\\",\\"stage\\":\\"$stage\\",\\"detail\\":\\"$detail\\"}}" 2>/dev/null || true
}}
bastion_progress "post-install" "configuring system"
# ── SSH ──
2026-03-15 23:50:43 +00:00
systemctl enable --now sshd
2026-03-16 00:00:13 +00:00
sed -i 's/^#\\?PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config
sed -i 's/^#\\?PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
{ssh_post_block}
# ── Hostname and domain ──
hostnamectl set-hostname {fqdn}
# ── tmpfs for /tmp ──
echo "tmpfs /tmp tmpfs defaults,noatime,nosuid,nodev,size=4G 0 0" >> /etc/fstab
# ── Kernel modules for k3s ──
cat > /etc/modules-load.d/k3s.conf << 'MODULES'
br_netfilter
overlay
ip_conntrack
MODULES
modprobe br_netfilter || true
modprobe overlay || true
# ── Sysctl for k3s networking ──
cat > /etc/sysctl.d/90-k3s.conf << 'SYSCTL'
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward = 1
net.ipv6.conf.all.forwarding = 1
fs.inotify.max_user_instances = 524288
fs.inotify.max_user_watches = 1048576
SYSCTL
sysctl --system || true
# ── Disable firewalld (k3s manages its own iptables rules) ──
systemctl disable --now firewalld || true
# ── Enable chronyd for time sync ──
systemctl enable --now chronyd
# ── Set boot order: local disk first, PXE after ──
if command -v efibootmgr >/dev/null 2>&1; then
# Find the Fedora boot entry and move it first
FEDORA_ENTRY=$(efibootmgr | grep -i fedora | head -1 | grep -oP 'Boot\\K[0-9A-F]+')
if [ -n "$FEDORA_ENTRY" ]; then
CURRENT_ORDER=$(efibootmgr | grep BootOrder | cut -d: -f2 | tr -d ' ')
# Put Fedora first, keep rest
NEW_ORDER="$FEDORA_ENTRY,$(echo "$CURRENT_ORDER" | sed "s/$FEDORA_ENTRY,\\?//;s/,$//")"
efibootmgr -o "$NEW_ORDER" || true
echo "Boot order set: Fedora first ($NEW_ORDER)"
fi
fi
# ── Provisioning metadata ──
cat > /etc/lab-provisioned << PROVEOF
hostname: {fqdn}
role: {role}
provisioned: $(date -Iseconds)
bastion: {SERVER_IP}
PROVEOF
cat > /root/README << 'README'
# Lab Node -- {fqdn} (role: {role})
#
# Next steps:
# 1. Install puppet agent:
# dnf install -y puppet-agent
#
# 2. Install k3s:
# curl -sfL https://get.k3s.io | sh -
#
# 3. Or join existing cluster:
# curl -sfL https://get.k3s.io | K3S_URL=https://<server>:6443 K3S_TOKEN=<token> sh -
README
IP_ADDR=$(ip -4 addr show | awk '/inet / && !/127.0.0/ {{split($2,a,"/"); print a[1]; exit}}')
bastion_progress "complete" "ready at $IP_ADDR"
2026-03-15 23:50:43 +00:00
%end
2026-03-16 00:00:13 +00:00
"""
# ── Pretty terminal output ────────────────────────────────────────
RESET = "\033[0m"
BOLD = "\033[1m"
GREEN = "\033[0;32m"
YELLOW = "\033[1;33m"
CYAN = "\033[0;36m"
RED = "\033[0;31m"
def print_discovery(mac, hw, is_new):
"""Print a discovered machine to the bastion terminal."""
label = "NEW MACHINE DISCOVERED" if is_new else "MACHINE RE-DISCOVERED"
color = GREEN if is_new else YELLOW
# Format disk summary
disks = hw.get('disks', [])
disk_str = ", ".join(
f"{d.get('size_gb', '?')}GB {d.get('model', '?')}"
for d in disks
) or "none detected"
# Format NIC summary
nics = hw.get('nics', [])
nic_str = ", ".join(n.get('name', '?') for n in nics) or "none"
print(f"\n{color}{BOLD}{'═' * 60}")
print(f" {label}")
print(f"{'═' * 60}{RESET}")
print(f" {BOLD}MAC:{RESET} {mac}")
print(f" {BOLD}Product:{RESET} {hw.get('manufacturer', '?')} {hw.get('product', '?')}")
print(f" {BOLD}CPU:{RESET} {hw.get('cpu_model', '?')} ({hw.get('cpu_cores', '?')} cores)")
print(f" {BOLD}RAM:{RESET} {hw.get('memory_gb', '?')} GB")
print(f" {BOLD}Arch:{RESET} {hw.get('arch', '?')}")
print(f" {BOLD}Disks:{RESET} {disk_str}")
print(f" {BOLD}NICs:{RESET} {nic_str}")
print(f" {BOLD}Serial:{RESET} {hw.get('serial', '?')}")
print()
print(f" {CYAN}To install Fedora on this machine:{RESET}")
print(f" {BOLD}bash bastion.sh install {mac} <hostname>{RESET}")
print(f"\n{'─' * 60}\n", flush=True)
def print_install_queued(mac, hostname):
print(f"\n{GREEN}{BOLD} INSTALL QUEUED{RESET}")
print(f" {mac} → hostname={BOLD}{hostname}{RESET}")
print(f" PXE boot the machine to start Fedora installation.")
print(f"\n{'─' * 60}\n", flush=True)
def print_install_started(mac, hostname):
print(f"\n{CYAN}{BOLD} INSTALL STARTED{RESET}")
print(f" {mac} → {BOLD}{hostname}{RESET}")
print(f" Serving Fedora {FEDORA_VER} installer + kickstart...")
print(f"\n{'─' * 60}\n", flush=True)
PROGRESS_ICONS = {
"partitioning": "◆",
"installing": "◆◆",
"post-install": "◆◆◆",
"complete": "✔",
"error": "✘",
}
def print_progress(mac, stage, detail=""):
icon = PROGRESS_ICONS.get(stage, "·")
color = GREEN if stage == "complete" else (RED if stage == "error" else YELLOW)
detail_str = f" -- {detail}" if detail else ""
print(f" {color}{icon}{RESET} {mac} {BOLD}{stage}{RESET}{detail_str}", flush=True)
if stage == "complete" and detail:
ip = detail.replace("ready at ", "").strip()
if ip:
admin = ADMIN_USER or "root"
print(f"\n {GREEN}{BOLD} ssh {admin}@{ip}{RESET}\n", flush=True)
2026-03-16 00:00:13 +00:00
# ── HTTP Handler ──────────────────────────────────────────────────
class BastionHandler(SimpleHTTPRequestHandler):
protocol_version = "HTTP/1.1"
2026-03-16 00:00:13 +00:00
def __init__(self, *args, **kwargs):
super().__init__(*args, directory=HTTP_DIR, **kwargs)
def log_message(self, format, *args):
"""Log HTTP requests to help debug boot issues."""
print(f" HTTP: {self.client_address[0]} {self.command} {self.path}", flush=True)
2026-03-16 00:00:13 +00:00
def send_text(self, code, text, content_type="text/plain"):
data = text.encode()
2026-03-16 00:00:13 +00:00
self.send_response(code)
self.send_header("Content-Type", content_type)
self.send_header("Content-Length", str(len(data)))
self.send_header("Connection", "close")
2026-03-16 00:00:13 +00:00
self.end_headers()
self.wfile.write(data)
2026-03-16 00:00:13 +00:00
def send_json(self, code, data):
self.send_text(code, json.dumps(data, indent=2), "application/json")
def do_GET(self):
parsed = urlparse(self.path)
# ── iPXE dispatch: route to discover or install based on MAC ──
if parsed.path == "/dispatch":
params = parse_qs(parsed.query)
mac = params.get("mac", [""])[0].lower().replace("-", ":")
state = load_state()
if mac in state.get("install_queue", {}):
cfg = state["install_queue"][mac]
hostname = cfg.get("hostname", "lab-node")
print_install_started(mac, hostname)
script = f"""#!ipxe
2026-03-15 23:50:43 +00:00
2026-03-16 00:00:13 +00:00
echo
echo =============================================
echo Lab PXE Bastion - INSTALLING Fedora {FEDORA_VER}
2026-03-16 00:00:13 +00:00
echo Target: {hostname}
echo MAC: {mac}
echo =============================================
echo
2026-03-15 23:50:43 +00:00
2026-03-16 00:00:13 +00:00
kernel http://{SERVER_IP}:{HTTP_PORT}/vmlinuz inst.ks=http://{SERVER_IP}:{HTTP_PORT}/ks?mac={mac} inst.repo={FEDORA_MIRROR} inst.text
initrd http://{SERVER_IP}:{HTTP_PORT}/initrd.img
boot
"""
self.send_text(200, script)
elif mac in state.get("installed", {}):
info = state["installed"][mac]
hostname = info.get("hostname", "?")
print(f" {GREEN}PXE request from {mac} ({hostname}) - already installed, booting local disk{RESET}", flush=True)
script = f"""#!ipxe
echo
echo =============================================
echo Lab PXE Bastion - {hostname}
echo Already installed, booting from local disk
echo =============================================
echo
sleep 3
exit
"""
self.send_text(200, script)
2026-03-16 00:00:13 +00:00
else:
print(f" {YELLOW}PXE request from {mac} → discovery mode{RESET}", flush=True)
script = f"""#!ipxe
2026-03-15 23:50:43 +00:00
echo
2026-03-16 00:00:13 +00:00
echo =============================================
echo Lab PXE Bastion - DISCOVERY MODE
2026-03-16 00:00:13 +00:00
echo MAC: {mac}
echo Collecting hardware info...
echo =============================================
2026-03-15 23:50:43 +00:00
echo
kernel http://{SERVER_IP}:{HTTP_PORT}/vmlinuz inst.ks=http://{SERVER_IP}:{HTTP_PORT}/discover.ks inst.stage2={FEDORA_MIRROR} inst.text
2026-03-16 00:00:13 +00:00
initrd http://{SERVER_IP}:{HTTP_PORT}/initrd.img
2026-03-15 23:50:43 +00:00
boot
2026-03-16 00:00:13 +00:00
"""
self.send_text(200, script)
return
# ── Per-MAC kickstart for install mode ──
if parsed.path == "/ks":
params = parse_qs(parsed.query)
mac = params.get("mac", [""])[0].lower().replace("-", ":")
state = load_state()
cfg = state.get("install_queue", {}).get(mac, {})
ks = generate_kickstart(
hostname=cfg.get("hostname", "lab-node"),
disk=cfg.get("disk", ""),
ssh_keys=SSH_KEYS,
domain=DOMAIN,
role=cfg.get("role", "worker"),
admin_user=ADMIN_USER,
2026-03-16 00:00:13 +00:00
)
self.send_text(200, ks)
return
# ── API: list machines ──
if parsed.path == "/api/machines":
self.send_json(200, load_state())
return
# ── iPXE EFI binaries (for UEFI HTTP Boot) ──
if parsed.path in ("/ipxe.efi", "/ipxe-real.efi", "/ipxe-arm64.efi"):
tftp_dir = os.path.join(os.path.dirname(HTTP_DIR), "tftp")
fpath = os.path.join(tftp_dir, parsed.path.lstrip("/"))
if os.path.isfile(fpath):
self.send_response(200)
self.send_header("Content-Type", "application/efi")
self.send_header("Content-Length", str(os.path.getsize(fpath)))
self.end_headers()
with open(fpath, "rb") as f:
self.wfile.write(f.read())
return
2026-03-16 00:00:13 +00:00
# ── Static files (vmlinuz, initrd, discover.ks, etc.) ──
super().do_GET()
def do_POST(self):
parsed = urlparse(self.path)
content_length = int(self.headers.get("Content-Length", 0))
body = self.rfile.read(content_length)
# ── Discovery report from PXE-booted machine ──
if parsed.path == "/api/discover":
try:
data = json.loads(body)
except json.JSONDecodeError:
self.send_json(400, {"error": "invalid JSON"})
return
mac = data.get("mac", "unknown").lower()
data["last_seen"] = datetime.now().isoformat()
state = load_state()
is_new = mac not in state.get("discovered", {})
if is_new:
data["first_seen"] = data["last_seen"]
else:
data["first_seen"] = state["discovered"][mac].get("first_seen", data["last_seen"])
state.setdefault("discovered", {})[mac] = data
save_state(state)
print_discovery(mac, data, is_new)
self.send_json(200, {"status": "ok", "mac": mac, "new": is_new})
return
# ── Queue a machine for install ──
if parsed.path == "/api/install":
try:
data = json.loads(body)
except json.JSONDecodeError:
self.send_json(400, {"error": "invalid JSON"})
return
mac = data.get("mac", "").lower().replace("-", ":")
hostname = data.get("hostname", "lab-node")
disk = data.get("disk", "")
role = data.get("role", "worker")
2026-03-16 00:00:13 +00:00
if not mac:
self.send_json(400, {"error": "mac is required"})
return
if role not in ("worker", "infra"):
self.send_json(400, {"error": "role must be 'worker' or 'infra'"})
return
2026-03-16 00:00:13 +00:00
state = load_state()
state.setdefault("install_queue", {})[mac] = {
"hostname": hostname,
"disk": disk,
"role": role,
2026-03-16 00:00:13 +00:00
"queued_at": datetime.now().isoformat(),
}
save_state(state)
print_install_queued(mac, hostname)
self.send_json(200, {
"status": "queued",
"mac": mac,
"hostname": hostname,
"role": role,
"message": f"PXE boot the machine to start installation (role={role})",
2026-03-16 00:00:13 +00:00
})
return
# ── Install progress callback from kickstart ──
if parsed.path == "/api/progress":
try:
data = json.loads(body)
except json.JSONDecodeError:
self.send_json(400, {"error": "invalid JSON"})
return
mac = data.get("mac", "unknown").lower()
stage = data.get("stage", "unknown")
detail = data.get("detail", "")
print_progress(mac, stage, detail)
# Update state with progress
state = load_state()
if mac in state.get("install_queue", {}):
state["install_queue"][mac]["progress"] = stage
state["install_queue"][mac]["progress_at"] = datetime.now().isoformat()
if detail:
state["install_queue"][mac]["progress_detail"] = detail
# Move to installed on completion
if stage == "complete":
cfg = state["install_queue"].pop(mac)
ip = detail.replace("ready at ", "").strip() if detail else ""
state.setdefault("installed", {})[mac] = {
"hostname": cfg.get("hostname", "?"),
"role": cfg.get("role", "?"),
"ip": ip,
"installed_at": datetime.now().isoformat(),
}
save_state(state)
self.send_json(200, {"status": "ok"})
return
2026-03-16 00:00:13 +00:00
self.send_json(404, {"error": "not found"})
def run_server():
server = HTTPServer(("0.0.0.0", HTTP_PORT), BastionHandler)
print(f"HTTP server listening on :{HTTP_PORT}", flush=True)
server.serve_forever()
if __name__ == "__main__":
run_server()
PYSERVER
2026-03-15 23:50:43 +00:00
# ──── Generate dnsmasq config ─────────────────────────────────────
# ──── Generate dnsmasq config ─────────────────────────────────────
# Derive DHCP range for full mode
if [[ "$DHCP_MODE" == "full" ]]; then
DHCP_RANGE_START="${DHCP_RANGE_START:-${NETWORK%.*}.100}"
DHCP_RANGE_END="${DHCP_RANGE_END:-${NETWORK%.*}.200}"
fi
2026-03-15 23:50:43 +00:00
cat > "$BASTION_DIR/dnsmasq.conf" << DNSMASQ
# Lab PXE Bastion — dnsmasq config
# Disable DNS (we only want DHCP/TFTP)
port=0
# Listen on the right interface
interface=${IFACE}
bind-dynamic
2026-03-15 23:50:43 +00:00
$(if [[ "$DHCP_MODE" == "full" ]]; then
cat << FULL_DHCP
# Full DHCP mode — bastion is the only DHCP server on this network
dhcp-range=${DHCP_RANGE_START},${DHCP_RANGE_END},255.255.255.0,12h
dhcp-option=3,${GATEWAY}
dhcp-option=6,${GATEWAY}
FULL_DHCP
else
cat << PROXY_DHCP
2026-03-15 23:50:43 +00:00
# ProxyDHCP — works alongside existing DHCP (UniFi etc)
dhcp-range=${NETWORK},proxy
PROXY_DHCP
fi)
2026-03-15 23:50:43 +00:00
# TFTP for initial PXE boot
enable-tftp
tftp-root=${TFTPDIR}
tftp-no-blocksize
2026-03-15 23:50:43 +00:00
# Detect client architecture — PXE (TFTP) clients
2026-03-15 23:50:43 +00:00
dhcp-match=set:bios,option:client-arch,0
dhcp-match=set:efi-x86_64,option:client-arch,7
dhcp-match=set:efi-x86_64,option:client-arch,9
dhcp-match=set:efi-arm64,option:client-arch,11
2026-03-15 23:50:43 +00:00
# Detect client architecture — UEFI HTTP Boot clients (no TFTP size limit)
dhcp-match=set:httpboot-x86_64,option:client-arch,16
dhcp-match=set:httpboot-arm64,option:client-arch,20
2026-03-15 23:50:43 +00:00
# Detect iPXE clients (already chainloaded)
dhcp-userclass=set:ipxe,iPXE
# UEFI HTTP Boot → serve full iPXE EFI via HTTP (no TFTP size limit)
dhcp-boot=tag:httpboot-x86_64,http://${SERVER_IP}:${HTTP_PORT}/ipxe-real.efi
dhcp-boot=tag:httpboot-arm64,http://${SERVER_IP}:${HTTP_PORT}/ipxe-arm64.efi
# Echo vendor class back to HTTP Boot clients (required by UEFI HTTP Boot spec)
dhcp-option-force=tag:httpboot-x86_64,60,HTTPClient
dhcp-option-force=tag:httpboot-arm64,60,HTTPClient
# First PXE boot → serve iPXE binary via TFTP (BIOS and UEFI fallback)
2026-03-15 23:50:43 +00:00
dhcp-boot=tag:bios,tag:!ipxe,undionly.kpxe
dhcp-boot=tag:efi-x86_64,tag:!ipxe,ipxe.efi
dhcp-boot=tag:efi-arm64,tag:!ipxe,ipxe-arm64.efi
2026-03-15 23:50:43 +00:00
# iPXE clients → chain to boot script via HTTP
dhcp-boot=tag:ipxe,http://${SERVER_IP}:${HTTP_PORT}/boot.ipxe
# PXE service directives (needed for proxy DHCP to respond properly)
pxe-service=tag:!ipxe,x86PC,"PXE Boot",undionly.kpxe
pxe-service=tag:!ipxe,X86-64_EFI,"PXE Boot",ipxe.efi
pxe-service=tag:!ipxe,BC_EFI,"PXE Boot",ipxe.efi
pxe-service=tag:!ipxe,ARM64_EFI,"PXE Boot",ipxe-arm64.efi
2026-03-16 00:00:13 +00:00
# Verbose logging
2026-03-15 23:50:43 +00:00
log-dhcp
DNSMASQ
2026-03-16 00:00:13 +00:00
# ──── Open firewall ──────────────────────────────────────────────
2026-03-15 23:50:43 +00:00
if command -v firewall-cmd >/dev/null && firewall-cmd --state >/dev/null 2>&1; then
# Detect the zone for our interface (may differ from default zone)
FW_ZONE="$(firewall-cmd --get-zone-of-interface="${IFACE}" 2>/dev/null || echo "")"
FW_ZONE_FLAG=""
[[ -n "$FW_ZONE" ]] && FW_ZONE_FLAG="--zone=${FW_ZONE}"
log "Opening firewall ports (DHCP, TFTP, HTTP:${HTTP_PORT})${FW_ZONE:+ in zone ${FW_ZONE}}..."
firewall-cmd --quiet ${FW_ZONE_FLAG} --add-service=dhcp
firewall-cmd --quiet ${FW_ZONE_FLAG} --add-service=tftp
firewall-cmd --quiet ${FW_ZONE_FLAG} --add-port=${HTTP_PORT}/tcp
firewall-cmd --quiet ${FW_ZONE_FLAG} --add-port=4011/udp 2>/dev/null || true
2026-03-15 23:50:43 +00:00
FW_OPENED=true
fi
# ──── Stop conflicting services ───────────────────────────────────
if systemctl is-active --quiet dnsmasq 2>/dev/null; then
warn "System dnsmasq is running — stopping it temporarily"
systemctl stop dnsmasq
fi
2026-03-16 00:00:13 +00:00
# ──── Start HTTP server ──────────────────────────────────────────
2026-03-15 23:50:43 +00:00
log "Starting HTTP server on :${HTTP_PORT}..."
2026-03-16 00:00:13 +00:00
python3 "$BASTION_DIR/server.py" \
"$HTTPDIR" \
"$STATEFILE" \
"$SERVER_IP" \
"$HTTP_PORT" \
"$FEDORA_VERSION" \
"$FEDORA_MIRROR" \
"$BASTION_DIR/ssh_keys" \
2026-03-16 00:00:13 +00:00
"$TIMEZONE" \
"$LOCALE" \
"$DOMAIN" \
"$ADMIN_USER" &
2026-03-15 23:50:43 +00:00
HTTP_PID=$!
2026-03-16 00:00:13 +00:00
sleep 1
2026-03-15 23:50:43 +00:00
if ! kill -0 "$HTTP_PID" 2>/dev/null; then
die "HTTP server failed to start — is port ${HTTP_PORT} in use?"
fi
2026-03-16 00:00:13 +00:00
# ──── Start dnsmasq ──────────────────────────────────────────────
2026-03-15 23:50:43 +00:00
log "Starting PXE server (proxyDHCP on ${IFACE})..."
echo ""
2026-03-16 00:00:13 +00:00
echo -e "${CYAN}${BOLD}════════════════════════════════════════════════════════════${NC}"
echo -e "${CYAN}${BOLD} Lab PXE Bastion — Discovery Mode${NC}"
echo -e "${CYAN}${BOLD}════════════════════════════════════════════════════════════${NC}"
echo ""
echo -e " Network: ${BOLD}${NETWORK}/24${NC} via ${BOLD}${IFACE}${NC}"
echo -e " DHCP: ${BOLD}${DHCP_MODE}${NC}$(if [[ "$DHCP_MODE" == "full" ]]; then echo " (${DHCP_RANGE_START}${DHCP_RANGE_END})"; else echo " (alongside existing DHCP)"; fi)"
2026-03-16 00:00:13 +00:00
echo -e " HTTP: ${BOLD}http://${SERVER_IP}:${HTTP_PORT}/${NC}"
echo -e " OS: ${BOLD}Fedora ${FEDORA_VERSION} (${ARCH})${NC}"
echo -e " Domain: ${BOLD}${DOMAIN}${NC}"
2026-03-16 00:00:13 +00:00
echo -e " State: ${BOLD}${STATEFILE}${NC}"
2026-03-15 23:50:43 +00:00
echo ""
2026-03-16 00:00:13 +00:00
echo -e " ${YELLOW}PXE boot any machine on this network.${NC}"
echo -e " ${YELLOW}It will be inventoried and powered off automatically.${NC}"
2026-03-15 23:50:43 +00:00
echo ""
2026-03-16 00:00:13 +00:00
echo -e " Commands (from another terminal):"
echo -e " ${BOLD}bash bastion.sh list${NC} — show machines"
echo -e " ${BOLD}bash bastion.sh install <mac> <hostname>${NC} — queue install"
2026-03-15 23:50:43 +00:00
echo ""
2026-03-16 00:00:13 +00:00
echo -e " Press ${BOLD}Ctrl-C${NC} to stop."
2026-03-15 23:50:43 +00:00
echo ""
2026-03-16 00:00:13 +00:00
echo -e "${CYAN}──── Waiting for PXE boot requests... ────${NC}"
2026-03-15 23:50:43 +00:00
echo ""
dnsmasq --no-daemon --conf-file="$BASTION_DIR/dnsmasq.conf" &
DNSMASQ_PID=$!
wait "$DNSMASQ_PID" || {
err "dnsmasq exited unexpectedly. Check if another DHCP/TFTP service is running."
err "Try: ss -ulnp | grep -E ':(67|69|4011) '"
exit 1
}