Files
lab/bastion.sh
Michal fac14b6d4a feat: server kickstart with LVM, user creation, progress callbacks, reprovision
- LVM partition layout: /, /var, /var/log, /home, /srv, swap, tmpfs /tmp
  plus /var/lib/longhorn for worker role (grows to fill disk)
- Reprovision preserves /home, /srv, /var/lib/longhorn via %pre detection
- Admin user created matching the user running the bastion script
  with SSH keys from authorized_keys + local pubkeys, passwordless sudo
- Progress callbacks from %pre and %post to /api/progress endpoint
  with IP reported on completion (ssh command printed)
- Installed machines boot from local disk (iPXE exit) instead of
  re-entering discovery mode
- --role worker|infra flag (infra skips longhorn partition)
- reprovision subcommand: queues install + SSH reboot into PXE
- Self-cleanup: kills old bastion instances on start
- Domain config (DOMAIN env, default ad.itaz.eu)
- efibootmgr in %post to set local disk first in boot order
- k3s prereqs: kernel modules, sysctl, firewalld disabled, chrony
- VM reprovision test script (test-reprovision.sh)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 02:40:40 +00:00

1365 lines
50 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env bash
# ─────────────────────────────────────────────────────────────────────
# Lab PXE Bastion — discover-first bare-metal provisioning
#
# Default mode: DISCOVER. Any machine that PXE boots gets inventoried
# and powered off. You review what appeared, then promote to install.
#
# Usage:
# sudo bash bastion.sh # start bastion (discover mode)
# bash bastion.sh install <mac> <hostname> # queue discovered machine for install
# bash bastion.sh list # show discovered/queued machines
#
# Flow:
# 1. Start bastion → sudo bash bastion.sh
# 2. Power on machine → PXE boots, hardware discovered, powers off
# 3. Queue for install → bash bastion.sh install aa:bb:cc:dd:ee:ff puppet
# 4. Power on again → PXE boots, Fedora installed, reboots into OS
#
# Requirements: Fedora/RHEL host with dnsmasq, python3, curl
# ─────────────────────────────────────────────────────────────────────
set -euo pipefail
# ──── Configuration (override via environment) ────────────────────
FEDORA_VERSION="${FEDORA_VERSION:-43}"
ARCH="${ARCH:-x86_64}"
HTTP_PORT="${HTTP_PORT:-8080}"
TIMEZONE="${TIMEZONE:-Europe/London}"
LOCALE="${LOCALE:-en_GB.UTF-8}"
BASTION_DIR="${BASTION_DIR:-/tmp/lab-bastion}"
DOMAIN="${DOMAIN:-ad.itaz.eu}" # internal domain for hostnames
DHCP_MODE="${DHCP_MODE:-proxy}" # proxy (alongside existing DHCP) or full (bastion IS the DHCP server)
DHCP_RANGE_START="${DHCP_RANGE_START:-}" # only for full mode, auto-derived if empty
DHCP_RANGE_END="${DHCP_RANGE_END:-}"
# ──── Colors ──────────────────────────────────────────────────────
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'
CYAN='\033[0;36m'; BOLD='\033[1m'; NC='\033[0m'
log() { echo -e "${GREEN}[bastion]${NC} $*"; }
warn() { echo -e "${YELLOW}[bastion]${NC} $*"; }
err() { echo -e "${RED}[bastion]${NC} $*" >&2; }
die() { err "$@"; exit 1; }
# ──── Subcommand handling ─────────────────────────────────────────
CMD="${1:-serve}"
case "$CMD" in
install)
[[ $# -ge 3 ]] || { echo "Usage: bastion.sh install <mac> <hostname> [--role worker|infra] [--disk <dev>]"; exit 1; }
MAC="$2"
HOSTNAME="$3"
shift 3
DISK="" ROLE="worker"
while [[ $# -gt 0 ]]; do
case "$1" in
--disk) DISK="$2"; shift 2 ;;
--role) ROLE="$2"; shift 2 ;;
*) echo "Unknown option: $1"; exit 1 ;;
esac
done
PAYLOAD=$(python3 -c "import json; print(json.dumps({k:v for k,v in {'mac':'$MAC','hostname':'$HOSTNAME','disk':'$DISK','role':'$ROLE'}.items() if v}))")
RESULT=$(curl -sf -X POST "http://localhost:${HTTP_PORT}/api/install" \
-H "Content-Type: application/json" \
-d "$PAYLOAD" 2>&1) || die "Cannot reach bastion at localhost:${HTTP_PORT}. Is it running?"
echo "$RESULT" | python3 -m json.tool 2>/dev/null || echo "$RESULT"
echo ""
echo "Power on the machine to start Fedora installation."
exit 0
;;
list)
RESULT=$(curl -sf "http://localhost:${HTTP_PORT}/api/machines" 2>&1) || \
die "Cannot reach bastion at localhost:${HTTP_PORT}. Is it running?"
echo "$RESULT" | python3 -c "
import sys, json
state = json.load(sys.stdin)
discovered = state.get('discovered', {})
queue = state.get('install_queue', {})
installed = state.get('installed', {})
print()
print('\033[1mDISCOVERED\033[0m')
if discovered:
print(f' {\"MAC\":<20} {\"CPU\":<32} {\"CORES\":<6} {\"RAM\":<8} {\"ARCH\":<10} {\"PRODUCT\"}')
for mac, hw in discovered.items():
status = ' [QUEUED]' if mac in queue else ''
print(f' {mac:<20} {hw.get(\"cpu_model\",\"?\"):<32} {hw.get(\"cpu_cores\",\"?\"):<6} {str(hw.get(\"memory_gb\",\"?\"))+\"GB\":<8} {hw.get(\"arch\",\"?\"):<10} {hw.get(\"product\",\"?\")}{status}')
else:
print(' (none — PXE boot a machine to discover it)')
print()
print('\033[1mINSTALL QUEUE\033[0m')
if queue:
for mac, cfg in queue.items():
print(f' {mac:<20} → hostname={cfg.get(\"hostname\",\"?\")}')
else:
print(' (none)')
print()
print('\033[1mINSTALLED\033[0m')
if installed:
for mac, info in installed.items():
ip = info.get('ip', '')
ip_str = f' ip={ip}' if ip else ''
print(f' {mac:<20} → {info.get(\"hostname\",\"?\")} role={info.get(\"role\",\"?\")}{ip_str} ({info.get(\"installed_at\",\"?\")})')
else:
print(' (none)')
print()
" 2>/dev/null || echo "$RESULT"
exit 0
;;
reprovision)
[[ $# -ge 3 ]] || { echo "Usage: bastion.sh reprovision <mac> <hostname> [--role worker|infra] [--disk <dev>]"; exit 1; }
MAC="$2"
HOSTNAME="$3"
shift 3
DISK="" ROLE="worker"
while [[ $# -gt 0 ]]; do
case "$1" in
--disk) DISK="$2"; shift 2 ;;
--role) ROLE="$2"; shift 2 ;;
*) echo "Unknown option: $1"; exit 1 ;;
esac
done
# Queue the install
PAYLOAD=$(python3 -c "import json; print(json.dumps({k:v for k,v in {'mac':'$MAC','hostname':'$HOSTNAME','disk':'$DISK','role':'$ROLE'}.items() if v}))")
RESULT=$(curl -sf -X POST "http://localhost:${HTTP_PORT}/api/install" \
-H "Content-Type: application/json" \
-d "$PAYLOAD" 2>&1) || die "Cannot reach bastion at localhost:${HTTP_PORT}. Is it running?"
echo "$RESULT" | python3 -m json.tool 2>/dev/null || echo "$RESULT"
# Try to find IP from installed state and SSH in to trigger PXE reboot
IP=$(curl -sf "http://localhost:${HTTP_PORT}/api/machines" 2>/dev/null | \
python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('installed',{}).get('${MAC}',{}).get('ip',''))" 2>/dev/null || echo "")
ADMIN_USER="${SUDO_USER:-$USER}"
[[ "$ADMIN_USER" == "root" ]] && ADMIN_USER=""
if [[ -n "$IP" && -n "$ADMIN_USER" ]]; then
echo ""
echo "Attempting SSH reboot into PXE ($ADMIN_USER@$IP)..."
ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 "$ADMIN_USER@$IP" \
'sudo efibootmgr 2>/dev/null; PXE_ENTRY=$(sudo efibootmgr | grep -iE "pxe|network|ipv4" | head -1 | grep -oP "Boot\K[0-9A-F]+"); if [ -n "$PXE_ENTRY" ]; then sudo efibootmgr --bootnext "$PXE_ENTRY" && echo "PXE set as next boot" && sudo reboot; else echo "No PXE boot entry found, rebooting anyway..." && sudo reboot; fi' 2>&1 && {
echo ""
echo "Machine is rebooting into PXE. Install will start automatically."
} || {
echo ""
echo "SSH failed. Reboot the machine manually into PXE (e.g. via IPMI/KVM)."
}
else
echo ""
echo "No IP known for this machine. Reboot it manually into PXE."
fi
exit 0
;;
serve) ;; # continue below
*)
echo "Usage: bastion.sh [serve|install|reprovision|list]"
exit 1
;;
esac
# ══════════════════════════════════════════════════════════════════
# SERVE MODE — start the bastion
# ══════════════════════════════════════════════════════════════════
# ──── Kill old instances ──────────────────────────────────────────
# Find and kill any previous bastion dnsmasq and HTTP server
OLD_DNSMASQ=$(pgrep -f 'dnsmasq --no-daemon --conf-file=/tmp/lab-bastion' 2>/dev/null || true)
OLD_HTTP=$(pgrep -f 'python3 /tmp/lab-bastion/server.py' 2>/dev/null || true)
if [[ -n "$OLD_DNSMASQ" || -n "$OLD_HTTP" ]]; then
warn "Killing old bastion processes..."
[[ -n "$OLD_DNSMASQ" ]] && kill $OLD_DNSMASQ 2>/dev/null && log " Stopped old dnsmasq (PID $OLD_DNSMASQ)"
[[ -n "$OLD_HTTP" ]] && kill $OLD_HTTP 2>/dev/null && log " Stopped old HTTP server (PID $OLD_HTTP)"
sleep 1
fi
# ──── Preflight ───────────────────────────────────────────────────
[[ $EUID -eq 0 ]] || die "Must run as root (need DHCP/TFTP ports). Use: sudo bash bastion.sh"
command -v python3 >/dev/null || die "python3 not found"
command -v curl >/dev/null || die "curl not found"
INSTALL_PKGS=()
command -v dnsmasq >/dev/null || INSTALL_PKGS+=(dnsmasq)
[[ -f /usr/share/ipxe/undionly.kpxe ]] || INSTALL_PKGS+=(ipxe-bootimgs-x86)
[[ -f /usr/share/ipxe/arm64-efi/snponly.efi ]] || INSTALL_PKGS+=(ipxe-bootimgs-aarch64)
[[ -f /usr/include/efi/efi.h ]] || INSTALL_PKGS+=(gnu-efi-devel)
if [[ ${#INSTALL_PKGS[@]} -gt 0 ]]; then
log "Installing ${INSTALL_PKGS[*]}..."
if command -v dnf >/dev/null; then
dnf install -y "${INSTALL_PKGS[@]}"
elif command -v apt-get >/dev/null; then
apt-get install -y "${INSTALL_PKGS[@]}"
else
die "Cannot install packages — install manually: ${INSTALL_PKGS[*]}"
fi
fi
# ──── Auto-detect network ────────────────────────────────────────
IFACE="${IFACE:-$(ip route | awk '/default/ {print $5; exit}')}"
SERVER_IP="$(ip -4 addr show "$IFACE" | awk '/inet / {split($2,a,"/"); print a[1]; exit}')"
NETWORK="$(echo "$SERVER_IP" | awk -F. '{print $1"."$2"."$3".0"}')"
GATEWAY="$(ip route | awk '/default/ {print $3; exit}')"
[[ -n "$SERVER_IP" ]] || die "Cannot detect IP on interface $IFACE"
log "Interface: ${BOLD}$IFACE${NC} IP: ${BOLD}$SERVER_IP${NC} Network: ${BOLD}$NETWORK${NC}"
# ──── Auto-detect SSH keys ───────────────────────────────────────
REAL_HOME="${HOME}"
[[ -n "${SUDO_USER:-}" ]] && REAL_HOME="$(getent passwd "$SUDO_USER" | cut -d: -f6)"
SSH_KEYS_CONTENT=""
SSH_KEY_SOURCE=""
# Collect SSH keys from authorized_keys + local pubkeys (deduplicated)
SSH_KEY_SOURCE=""
if [[ -f "$REAL_HOME/.ssh/authorized_keys" ]]; then
SSH_KEYS_CONTENT="$(grep -v '^#' "$REAL_HOME/.ssh/authorized_keys" | grep -v '^$')"
SSH_KEY_SOURCE="$REAL_HOME/.ssh/authorized_keys"
fi
# Also include local pubkey files (they may not be in authorized_keys)
for keyfile in "$REAL_HOME/.ssh/id_ed25519.pub" "$REAL_HOME/.ssh/id_rsa.pub" "$REAL_HOME/.ssh/id_ecdsa.pub"; do
if [[ -f "$keyfile" ]]; then
KEY_DATA="$(cat "$keyfile")"
KEY_FP="$(awk '{print $2}' "$keyfile")"
if [[ -n "$SSH_KEYS_CONTENT" ]]; then
# Add only if not already present
if ! echo "$SSH_KEYS_CONTENT" | grep -qF "$KEY_FP"; then
SSH_KEYS_CONTENT="$SSH_KEYS_CONTENT"$'\n'"$KEY_DATA"
SSH_KEY_SOURCE="${SSH_KEY_SOURCE} + $keyfile"
fi
else
SSH_KEYS_CONTENT="$KEY_DATA"
SSH_KEY_SOURCE="$keyfile"
fi
fi
done
# Priority 3: generate a keypair
if [[ -z "$SSH_KEYS_CONTENT" ]]; then
GENERATED_KEY="$BASTION_DIR/bastion_ed25519"
if [[ ! -f "$GENERATED_KEY" ]]; then
log "No SSH keys found — generating ed25519 keypair..."
ssh-keygen -t ed25519 -f "$GENERATED_KEY" -N "" -C "bastion-generated@$(hostname)" >/dev/null 2>&1
fi
SSH_KEYS_CONTENT="$(cat "${GENERATED_KEY}.pub")"
SSH_KEY_SOURCE="$GENERATED_KEY (generated)"
warn "Using generated keypair: ${BOLD}$GENERATED_KEY${NC}"
warn "Save this private key — it's the only way to access installed machines."
fi
SSH_KEY_COUNT="$(echo "$SSH_KEYS_CONTENT" | wc -l)"
log "SSH keys: ${BOLD}${SSH_KEY_COUNT} key(s)${NC} from ${BOLD}${SSH_KEY_SOURCE}${NC}"
# ──── Detect admin username ──────────────────────────────────────
ADMIN_USER="${SUDO_USER:-$USER}"
[[ "$ADMIN_USER" == "root" ]] && ADMIN_USER=""
if [[ -n "$ADMIN_USER" ]]; then
log "Admin user: ${BOLD}${ADMIN_USER}${NC} (will be created on installed machines)"
fi
# ──── Prepare directories ────────────────────────────────────────
TFTPDIR="$BASTION_DIR/tftp"
HTTPDIR="$BASTION_DIR/http"
STATEFILE="$BASTION_DIR/state.json"
mkdir -p "$TFTPDIR" "$HTTPDIR"
# Initialize state if not present
[[ -f "$STATEFILE" ]] || echo '{"discovered":{},"install_queue":{},"installed":{}}' > "$STATEFILE"
# ──── Cleanup handler ─────────────────────────────────────────────
DNSMASQ_PID=""
HTTP_PID=""
FW_OPENED=false
cleanup() {
echo ""
log "Shutting down..."
[[ -n "$HTTP_PID" ]] && kill "$HTTP_PID" 2>/dev/null && log "Stopped HTTP server"
[[ -n "$DNSMASQ_PID" ]] && kill "$DNSMASQ_PID" 2>/dev/null && log "Stopped dnsmasq"
if $FW_OPENED && command -v firewall-cmd >/dev/null; then
log "Removing firewall rules..."
firewall-cmd --quiet ${FW_ZONE_FLAG:-} --remove-service=dhcp 2>/dev/null || true
firewall-cmd --quiet ${FW_ZONE_FLAG:-} --remove-service=tftp 2>/dev/null || true
firewall-cmd --quiet ${FW_ZONE_FLAG:-} --remove-port=${HTTP_PORT}/tcp 2>/dev/null || true
firewall-cmd --quiet ${FW_ZONE_FLAG:-} --remove-port=4011/udp 2>/dev/null || true
fi
log "State preserved in $STATEFILE"
log "Restart bastion with: sudo bash bastion.sh"
}
trap cleanup EXIT INT TERM
# ──── Prepare boot artifacts ─────────────────────────────────────
download() {
local url="$1" dest="$2" label="$3"
if [[ -f "$dest" ]]; then
log " ${label} — cached"
return
fi
log " ${label} — downloading..."
curl -# -L -f -o "$dest" "$url" || die "Failed to download $label from $url"
}
copy_if_missing() {
local src="$1" dest="$2" label="$3"
if [[ -f "$dest" ]]; then
log " ${label} — cached"
return
fi
[[ -f "$src" ]] || die "${label}: source not found at $src"
cp "$src" "$dest"
log " ${label} — copied from $src"
}
build_pxeloader() {
local src="$1" dest="$2" label="$3"
if [[ -f "$dest" ]]; then
log " ${label} — cached ($(stat -c%s "$dest") bytes)"
return
fi
log " ${label} — building UEFI PXE loader stub..."
local builddir="$BASTION_DIR/pxeloader-build"
mkdir -p "$builddir"
local gnuefi_lib gnuefi_inc
gnuefi_lib="/usr/lib"
gnuefi_inc="/usr/include/efi"
# Compile
gcc -I"$gnuefi_inc" -I"$gnuefi_inc/x86_64" -I"$gnuefi_inc/protocol" \
-DGNU_EFI_USE_MS_ABI -fPIC -fshort-wchar -ffreestanding \
-fno-stack-protector -mno-red-zone -maccumulate-outgoing-args \
-Wall -Os -c -o "$builddir/pxeloader.o" "$src" || die "PXE loader compile failed"
# Link
ld -nostdlib -znocombreloc -shared -Bsymbolic \
-T "$gnuefi_lib/elf_x86_64_efi.lds" \
"$gnuefi_lib/crt0-efi-x86_64.o" \
"$builddir/pxeloader.o" \
-o "$builddir/pxeloader.so" \
-lgnuefi -lefi -L"$gnuefi_lib" || die "PXE loader link failed"
# Convert to PE/COFF EFI binary
objcopy -j .text -j .sdata -j .data -j .dynamic -j .rodata -j .dynsym \
-j .rel -j .rela -j .rel.* -j .rela.* -j .rel* -j .rela* \
-j .reloc --target efi-app-x86_64 \
"$builddir/pxeloader.so" "$dest" || die "PXE loader objcopy failed"
local size
size="$(stat -c%s "$dest")"
log " ${label} — built (${size} bytes / $((size/1024)) KB)"
}
FEDORA_MIRROR="https://download.fedoraproject.org/pub/fedora/linux/releases/${FEDORA_VERSION}/Everything/${ARCH}/os"
log "Preparing boot artifacts (Fedora ${FEDORA_VERSION} ${ARCH})..."
copy_if_missing "/usr/share/ipxe/undionly.kpxe" "$TFTPDIR/undionly.kpxe" "iPXE BIOS"
# UEFI x86_64: serve iPXE directly via TFTP (UEFI has no TFTP size limit)
copy_if_missing "/usr/share/ipxe/ipxe-snponly-x86_64.efi" "$TFTPDIR/ipxe.efi" "iPXE UEFI x86_64"
copy_if_missing "/usr/share/ipxe/arm64-efi/snponly.efi" "$TFTPDIR/ipxe-arm64.efi" "iPXE UEFI arm64"
download "${FEDORA_MIRROR}/images/pxeboot/vmlinuz" "$HTTPDIR/vmlinuz" "Fedora kernel"
download "${FEDORA_MIRROR}/images/pxeboot/initrd.img" "$HTTPDIR/initrd.img" "Fedora initrd"
# Symlink iPXE binaries into HTTP dir (UEFI HTTP Boot downloads via HTTP, not TFTP)
for f in "$TFTPDIR"/*.efi; do
ln -sf "$f" "$HTTPDIR/$(basename "$f")" 2>/dev/null || true
done
# ──── Generate discovery kickstart ────────────────────────────────
# Boots Fedora installer env, collects hardware info, POSTs to bastion, powers off.
# Never touches the disk.
cat > "$HTTPDIR/discover.ks" << 'DISCOVER_KS'
# Lab Bastion — Discovery Mode
# Collects hardware inventory and powers off. Does NOT install anything.
%pre --erroronfail --log=/tmp/discover.log
#!/bin/bash
set -x
# ── Collect hardware info from /proc, /sys, and available tools ──
MAC=$(ip link show | awk '/ether/ && !/00:00:00:00/ {print $2; exit}')
PRODUCT=$(cat /sys/class/dmi/id/product_name 2>/dev/null || echo "unknown")
BOARD=$(cat /sys/class/dmi/id/board_name 2>/dev/null || echo "unknown")
SERIAL=$(cat /sys/class/dmi/id/product_serial 2>/dev/null || echo "unknown")
MANUFACTURER=$(cat /sys/class/dmi/id/sys_vendor 2>/dev/null || echo "unknown")
CPUMODEL=$(grep -m1 'model name' /proc/cpuinfo | cut -d: -f2 | sed 's/^ //')
CPUCORES=$(grep -c '^processor' /proc/cpuinfo)
MEMGB=$(awk '/MemTotal/ {printf "%d", $2/1024/1024}' /proc/meminfo)
ARCHTYPE=$(uname -m)
# Disk info — lsblk is available in Anaconda
DISKS_JSON=$(lsblk -Jb -o NAME,SIZE,TYPE,MODEL 2>/dev/null | python3 -c "
import sys, json
data = json.load(sys.stdin)
disks = [d for d in data.get('blockdevices', []) if d.get('type') == 'disk']
result = []
for d in disks:
size_gb = round(int(d.get('size', 0)) / 1073741824, 1)
result.append({
'name': d.get('name', '?'),
'size_gb': size_gb,
'model': (d.get('model') or 'unknown').strip()
})
print(json.dumps(result))
" 2>/dev/null || echo '[]')
# Network interfaces
NICS_JSON=$(ip -j link show 2>/dev/null | python3 -c "
import sys, json
nics = json.load(sys.stdin)
result = []
for n in nics:
if n.get('link_type') == 'loopback':
continue
result.append({
'name': n.get('ifname', '?'),
'mac': n.get('address', '?'),
'state': n.get('operstate', '?')
})
print(json.dumps(result))
" 2>/dev/null || echo '[]')
# ── Build and POST discovery payload ──
PAYLOAD=$(python3 -c "
import json
print(json.dumps({
'mac': '$MAC',
'product': '$PRODUCT',
'board': '$BOARD',
'serial': '$SERIAL',
'manufacturer': '$MANUFACTURER',
'cpu_model': '$CPUMODEL',
'cpu_cores': int('$CPUCORES' or 0),
'memory_gb': int('$MEMGB' or 0),
'arch': '$ARCHTYPE',
'disks': $DISKS_JSON,
'nics': $NICS_JSON
}))
")
# POST to bastion — try curl first, fall back to python3 urllib
BASTION_URL="__BASTION_URL__/api/discover"
if command -v curl >/dev/null 2>&1; then
curl -sf -X POST "$BASTION_URL" \
-H "Content-Type: application/json" \
-d "$PAYLOAD" || true
else
python3 -c "
import urllib.request
req = urllib.request.Request('$BASTION_URL',
data=b'''$PAYLOAD''',
headers={'Content-Type': 'application/json'})
try:
urllib.request.urlopen(req, timeout=10)
except Exception as e:
print(f'POST failed: {e}')
"
fi
# ── Reboot — do NOT let Anaconda proceed ──
echo ""
echo "=== Discovery complete, rebooting ==="
echo ""
sleep 3
echo 1 > /proc/sys/kernel/sysrq
echo b > /proc/sysrq-trigger
sleep 5
reboot -f
%end
# Anaconda should never get here, but just in case:
reboot
DISCOVER_KS
# Patch in the bastion URL
sed -i "s|__BASTION_URL__|http://${SERVER_IP}:${HTTP_PORT}|g" "$HTTPDIR/discover.ks"
# Save SSH keys and admin user for the HTTP server to use
echo "$SSH_KEYS_CONTENT" > "$BASTION_DIR/ssh_keys"
echo "$ADMIN_USER" > "$BASTION_DIR/admin_user"
# ──── Generate iPXE boot script ───────────────────────────────────
# Initial iPXE script chains to /dispatch with the MAC, so the server
# can route to discover or install mode per machine.
cat > "$HTTPDIR/boot.ipxe" << IPXE
#!ipxe
echo
echo ============================================
echo Lab PXE Bastion
echo Contacting server for instructions...
echo ============================================
echo
chain http://${SERVER_IP}:${HTTP_PORT}/dispatch?mac=\${net0/mac}
IPXE
# ──── Write the HTTP server ──────────────────────────────────────
cat > "$BASTION_DIR/server.py" << 'PYSERVER'
#!/usr/bin/env python3
"""Lab PXE Bastion — HTTP server with discovery API and per-MAC iPXE dispatch."""
import json
import os
import sys
import time
import fcntl
from http.server import HTTPServer, SimpleHTTPRequestHandler
from urllib.parse import urlparse, parse_qs
from datetime import datetime
# Config from argv
HTTP_DIR = sys.argv[1]
STATE_FILE = sys.argv[2]
SERVER_IP = sys.argv[3]
HTTP_PORT = int(sys.argv[4])
FEDORA_VER = sys.argv[5]
FEDORA_MIRROR = sys.argv[6]
SSH_KEYS_FILE = sys.argv[7] if len(sys.argv) > 7 else ""
TIMEZONE = sys.argv[8] if len(sys.argv) > 8 else "Europe/London"
LOCALE = sys.argv[9] if len(sys.argv) > 9 else "en_GB.UTF-8"
DOMAIN = sys.argv[10] if len(sys.argv) > 10 else "ad.itaz.eu"
ADMIN_USER = sys.argv[11] if len(sys.argv) > 11 else ""
# Load SSH keys from file
SSH_KEYS = []
if SSH_KEYS_FILE and os.path.isfile(SSH_KEYS_FILE):
with open(SSH_KEYS_FILE) as f:
SSH_KEYS = [l.strip() for l in f if l.strip() and not l.startswith('#')]
# ── State management (file-backed, lock-protected) ───────────────
def load_state():
try:
with open(STATE_FILE) as f:
return json.load(f)
except (FileNotFoundError, json.JSONDecodeError):
return {"discovered": {}, "install_queue": {}, "installed": {}}
def save_state(state):
tmp = STATE_FILE + ".tmp"
with open(tmp, 'w') as f:
json.dump(state, f, indent=2)
os.replace(tmp, STATE_FILE)
# ── Kickstart generation ─────────────────────────────────────────
def generate_kickstart(hostname, disk="", ssh_keys=None, domain="", role="worker", admin_user=""):
ssh_keys = ssh_keys or []
fqdn = f"{hostname}.{domain}" if domain else hostname
vg = "labvg"
# ── Auth ──
if ssh_keys:
auth = f'rootpw --lock\nsshkey --username=root "{ssh_keys[0]}"'
else:
auth = 'rootpw --plaintext changeme'
# ── Admin user (kickstart directive) ──
user_directive = ""
if admin_user:
user_directive = f'user --name={admin_user} --groups=wheel --lock'
# ── SSH keys for %post (root + admin user) ──
all_keys = "\n".join(ssh_keys)
ssh_post_block = ""
if ssh_keys:
ssh_post_block = f"""
# Set up SSH keys for root
mkdir -p /root/.ssh && chmod 700 /root/.ssh
cat > /root/.ssh/authorized_keys << 'SSHKEYS'
{all_keys}
SSHKEYS
chmod 600 /root/.ssh/authorized_keys"""
if admin_user and ssh_keys:
ssh_post_block += f"""
# Set up SSH keys for {admin_user}
ADMIN_HOME=$(getent passwd {admin_user} | cut -d: -f6)
mkdir -p "$ADMIN_HOME/.ssh" && chmod 700 "$ADMIN_HOME/.ssh"
cp /root/.ssh/authorized_keys "$ADMIN_HOME/.ssh/authorized_keys"
chown -R {admin_user}:{admin_user} "$ADMIN_HOME/.ssh"
chmod 600 "$ADMIN_HOME/.ssh/authorized_keys"
# Fix SELinux contexts for SSH
restorecon -R /root/.ssh "$ADMIN_HOME/.ssh" 2>/dev/null || true
# Passwordless sudo for {admin_user}
echo '{admin_user} ALL=(ALL) NOPASSWD: ALL' > /etc/sudoers.d/{admin_user}
chmod 440 /etc/sudoers.d/{admin_user}"""
# ── Determine disk (auto-detect first NVMe/SDA if not specified) ──
disk_line = f'DISK="{disk}"' if disk else '''
DISK=""
for d in /dev/nvme0n1 /dev/sda /dev/vda; do
[ -b "$d" ] && { DISK="$(basename $d)"; break; }
done
[ -z "$DISK" ] && { echo "ERROR: no disk found"; exit 1; }
'''
# ── LVM layout sizes (MB) ──
has_longhorn = (role == "worker")
return f"""# Lab Bastion -- Fedora {FEDORA_VER} server install
# Generated: {datetime.now().isoformat()}
# Target: {fqdn} (role={role})
text
reboot
lang {LOCALE}
keyboard uk
timezone {TIMEZONE} --utc
network --bootproto=dhcp --activate --hostname={fqdn}
{auth}
{user_directive}
bootloader --append="console=tty0 console=ttyS0,115200n8"
url --mirrorlist=https://mirrors.fedoraproject.org/mirrorlist?repo=fedora-$releasever&arch=$basearch
# Partitioning is generated dynamically by %pre (supports longhorn preservation)
%include /tmp/part.ks
%pre --log=/tmp/pre-partition.log
#!/bin/bash
set -x
# Progress callback helper
bastion_progress() {{
local stage="$1" detail="${{2:-}}"
local mac=$(ip link show | awk '/ether/ && !/00:00:00:00/ {{print $2; exit}}')
curl -sf -X POST "http://{SERVER_IP}:{HTTP_PORT}/api/progress" \
-H "Content-Type: application/json" \
-d "{{\\"mac\\":\\"$mac\\",\\"stage\\":\\"$stage\\",\\"detail\\":\\"$detail\\"}}" 2>/dev/null || true
}}
bastion_progress "partitioning" "preparing disk layout"
VG="{vg}"
{disk_line}
REPROVISION=no
# Check if VG exists (reprovision scenario)
if vgs $VG &>/dev/null; then
echo "=== Existing VG found - reprovision mode ==="
REPROVISION=yes
# Detect which data LVs to preserve
PRESERVE_LONGHORN=no; PRESERVE_SRV=no; PRESERVE_HOME=no
lvs $VG/longhorn &>/dev/null && PRESERVE_LONGHORN=yes
lvs $VG/srv &>/dev/null && PRESERVE_SRV=yes
lvs $VG/home &>/dev/null && PRESERVE_HOME=yes
echo "Preserving: longhorn=$PRESERVE_LONGHORN srv=$PRESERVE_SRV home=$PRESERVE_HOME"
# Remove only OS logical volumes (keep data LVs)
for lv in root var varlog swap; do
lvremove -f $VG/$lv 2>/dev/null || true
done
fi
if [ "$REPROVISION" = "yes" ]; then
# Find existing boot partitions by type
EFI_PART=$(blkid -t TYPE=vfat -o device /dev/${{DISK}}* 2>/dev/null | head -1)
BOOT_PART=$(blkid -t TYPE=ext4 -o device /dev/${{DISK}}* 2>/dev/null | head -1)
EFI_PART=${{EFI_PART:-/dev/${{DISK}}1}}
BOOT_PART=${{BOOT_PART:-/dev/${{DISK}}2}}
echo "Reusing EFI=$EFI_PART BOOT=$BOOT_PART"
# Build partition config reusing existing PV/VG
cat > /tmp/part.ks << PARTEOF
ignoredisk --only-use=$DISK
clearpart --none
part /boot/efi --onpart=$EFI_PART --fstype=efi
part /boot --onpart=$BOOT_PART --fstype=ext4
volgroup {vg} --useexisting --noformat
logvol swap --vgname={vg} --name=swap --fstype=swap --size=27648
logvol / --vgname={vg} --name=root --fstype=xfs --size=33792
logvol /var --vgname={vg} --name=var --fstype=xfs --size=102400
logvol /var/log --vgname={vg} --name=varlog --fstype=xfs --size=10240
PARTEOF
# Preserve or recreate data LVs
if [ "$PRESERVE_HOME" = "yes" ]; then
echo "logvol /home --vgname={vg} --name=home --useexisting --noformat" >> /tmp/part.ks
else
echo "logvol /home --vgname={vg} --name=home --fstype=xfs --size=10240" >> /tmp/part.ks
fi
if [ "$PRESERVE_SRV" = "yes" ]; then
echo "logvol /srv --vgname={vg} --name=srv --useexisting --noformat" >> /tmp/part.ks
else
echo "logvol /srv --vgname={vg} --name=srv --fstype=xfs --size=20480" >> /tmp/part.ks
fi
if [ "$PRESERVE_LONGHORN" = "yes" ]; then
echo "logvol /var/lib/longhorn --vgname={vg} --name=longhorn --useexisting --noformat" >> /tmp/part.ks
fi
else
# Fresh install
cat > /tmp/part.ks << PARTEOF
ignoredisk --only-use=$DISK
clearpart --all --initlabel --drives=$DISK
part /boot/efi --fstype=efi --size=600 --ondisk=$DISK
part /boot --fstype=ext4 --size=3072 --ondisk=$DISK
part pv.01 --size=1 --grow --ondisk=$DISK
volgroup {vg} pv.01
logvol swap --vgname={vg} --name=swap --fstype=swap --size=27648
logvol / --vgname={vg} --name=root --fstype=xfs --size=33792
logvol /var --vgname={vg} --name=var --fstype=xfs --size=102400
logvol /var/log --vgname={vg} --name=varlog --fstype=xfs --size=10240
logvol /home --vgname={vg} --name=home --fstype=xfs --size=10240
logvol /srv --vgname={vg} --name=srv --fstype=xfs --size=20480
{"logvol /var/lib/longhorn --vgname=" + vg + " --name=longhorn --fstype=xfs --grow --size=1" if has_longhorn else ""}
PARTEOF
fi
echo "=== Generated partition config ==="
cat /tmp/part.ks
echo "==================================="
bastion_progress "partitioning" "layout ready, starting install"
%end
%packages
@core
openssh-server
vim-enhanced
tmux
git
curl
wget
python3
lshw
dmidecode
dnf-plugins-core
# Networking and diagnostics
NetworkManager
bind-utils
net-tools
iproute
iputils
traceroute
tcpdump
htop
iotop
strace
jq
# k3s prerequisites
container-selinux
iptables-nft
nftables
policycoreutils-python-utils
chrony
tar
socat
conntrack-tools
ethtool
# Boot management
efibootmgr
# Puppet prerequisites
ruby
ruby-libs
# Exclude desktop
-@workstation-product
-@gnome-desktop
-gnome-shell
-gdm
-PackageKit
-PackageKit-glib
%end
%post --log=/root/bastion-post-install.log
#!/bin/bash
set -x
# Progress callback helper
bastion_progress() {{
local stage="$1" detail="${{2:-}}"
local mac=$(ip link show | awk '/ether/ && !/00:00:00:00/ {{print $2; exit}}')
curl -sf -X POST "http://{SERVER_IP}:{HTTP_PORT}/api/progress" \
-H "Content-Type: application/json" \
-d "{{\\"mac\\":\\"$mac\\",\\"stage\\":\\"$stage\\",\\"detail\\":\\"$detail\\"}}" 2>/dev/null || true
}}
bastion_progress "post-install" "configuring system"
# ── SSH ──
systemctl enable --now sshd
sed -i 's/^#\\?PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config
sed -i 's/^#\\?PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
{ssh_post_block}
# ── Hostname and domain ──
hostnamectl set-hostname {fqdn}
# ── tmpfs for /tmp ──
echo "tmpfs /tmp tmpfs defaults,noatime,nosuid,nodev,size=4G 0 0" >> /etc/fstab
# ── Kernel modules for k3s ──
cat > /etc/modules-load.d/k3s.conf << 'MODULES'
br_netfilter
overlay
ip_conntrack
MODULES
modprobe br_netfilter || true
modprobe overlay || true
# ── Sysctl for k3s networking ──
cat > /etc/sysctl.d/90-k3s.conf << 'SYSCTL'
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward = 1
net.ipv6.conf.all.forwarding = 1
fs.inotify.max_user_instances = 524288
fs.inotify.max_user_watches = 1048576
SYSCTL
sysctl --system || true
# ── Disable firewalld (k3s manages its own iptables rules) ──
systemctl disable --now firewalld || true
# ── Enable chronyd for time sync ──
systemctl enable --now chronyd
# ── Set boot order: local disk first, PXE after ──
if command -v efibootmgr >/dev/null 2>&1; then
# Find the Fedora boot entry and move it first
FEDORA_ENTRY=$(efibootmgr | grep -i fedora | head -1 | grep -oP 'Boot\\K[0-9A-F]+')
if [ -n "$FEDORA_ENTRY" ]; then
CURRENT_ORDER=$(efibootmgr | grep BootOrder | cut -d: -f2 | tr -d ' ')
# Put Fedora first, keep rest
NEW_ORDER="$FEDORA_ENTRY,$(echo "$CURRENT_ORDER" | sed "s/$FEDORA_ENTRY,\\?//;s/,$//")"
efibootmgr -o "$NEW_ORDER" || true
echo "Boot order set: Fedora first ($NEW_ORDER)"
fi
fi
# ── Provisioning metadata ──
cat > /etc/lab-provisioned << PROVEOF
hostname: {fqdn}
role: {role}
provisioned: $(date -Iseconds)
bastion: {SERVER_IP}
PROVEOF
cat > /root/README << 'README'
# Lab Node -- {fqdn} (role: {role})
#
# Next steps:
# 1. Install puppet agent:
# dnf install -y puppet-agent
#
# 2. Install k3s:
# curl -sfL https://get.k3s.io | sh -
#
# 3. Or join existing cluster:
# curl -sfL https://get.k3s.io | K3S_URL=https://<server>:6443 K3S_TOKEN=<token> sh -
README
IP_ADDR=$(ip -4 addr show | awk '/inet / && !/127.0.0/ {{split($2,a,"/"); print a[1]; exit}}')
bastion_progress "complete" "ready at $IP_ADDR"
%end
"""
# ── Pretty terminal output ────────────────────────────────────────
RESET = "\033[0m"
BOLD = "\033[1m"
GREEN = "\033[0;32m"
YELLOW = "\033[1;33m"
CYAN = "\033[0;36m"
RED = "\033[0;31m"
def print_discovery(mac, hw, is_new):
"""Print a discovered machine to the bastion terminal."""
label = "NEW MACHINE DISCOVERED" if is_new else "MACHINE RE-DISCOVERED"
color = GREEN if is_new else YELLOW
# Format disk summary
disks = hw.get('disks', [])
disk_str = ", ".join(
f"{d.get('size_gb', '?')}GB {d.get('model', '?')}"
for d in disks
) or "none detected"
# Format NIC summary
nics = hw.get('nics', [])
nic_str = ", ".join(n.get('name', '?') for n in nics) or "none"
print(f"\n{color}{BOLD}{'═' * 60}")
print(f" {label}")
print(f"{'═' * 60}{RESET}")
print(f" {BOLD}MAC:{RESET} {mac}")
print(f" {BOLD}Product:{RESET} {hw.get('manufacturer', '?')} {hw.get('product', '?')}")
print(f" {BOLD}CPU:{RESET} {hw.get('cpu_model', '?')} ({hw.get('cpu_cores', '?')} cores)")
print(f" {BOLD}RAM:{RESET} {hw.get('memory_gb', '?')} GB")
print(f" {BOLD}Arch:{RESET} {hw.get('arch', '?')}")
print(f" {BOLD}Disks:{RESET} {disk_str}")
print(f" {BOLD}NICs:{RESET} {nic_str}")
print(f" {BOLD}Serial:{RESET} {hw.get('serial', '?')}")
print()
print(f" {CYAN}To install Fedora on this machine:{RESET}")
print(f" {BOLD}bash bastion.sh install {mac} <hostname>{RESET}")
print(f"\n{'─' * 60}\n", flush=True)
def print_install_queued(mac, hostname):
print(f"\n{GREEN}{BOLD} INSTALL QUEUED{RESET}")
print(f" {mac} → hostname={BOLD}{hostname}{RESET}")
print(f" PXE boot the machine to start Fedora installation.")
print(f"\n{'─' * 60}\n", flush=True)
def print_install_started(mac, hostname):
print(f"\n{CYAN}{BOLD} INSTALL STARTED{RESET}")
print(f" {mac} → {BOLD}{hostname}{RESET}")
print(f" Serving Fedora {FEDORA_VER} installer + kickstart...")
print(f"\n{'─' * 60}\n", flush=True)
PROGRESS_ICONS = {
"partitioning": "◆",
"installing": "◆◆",
"post-install": "◆◆◆",
"complete": "✔",
"error": "✘",
}
def print_progress(mac, stage, detail=""):
icon = PROGRESS_ICONS.get(stage, "·")
color = GREEN if stage == "complete" else (RED if stage == "error" else YELLOW)
detail_str = f" -- {detail}" if detail else ""
print(f" {color}{icon}{RESET} {mac} {BOLD}{stage}{RESET}{detail_str}", flush=True)
if stage == "complete" and detail:
ip = detail.replace("ready at ", "").strip()
if ip:
admin = ADMIN_USER or "root"
print(f"\n {GREEN}{BOLD} ssh {admin}@{ip}{RESET}\n", flush=True)
# ── HTTP Handler ──────────────────────────────────────────────────
class BastionHandler(SimpleHTTPRequestHandler):
protocol_version = "HTTP/1.1"
def __init__(self, *args, **kwargs):
super().__init__(*args, directory=HTTP_DIR, **kwargs)
def log_message(self, format, *args):
"""Log HTTP requests to help debug boot issues."""
print(f" HTTP: {self.client_address[0]} {self.command} {self.path}", flush=True)
def send_text(self, code, text, content_type="text/plain"):
data = text.encode()
self.send_response(code)
self.send_header("Content-Type", content_type)
self.send_header("Content-Length", str(len(data)))
self.send_header("Connection", "close")
self.end_headers()
self.wfile.write(data)
def send_json(self, code, data):
self.send_text(code, json.dumps(data, indent=2), "application/json")
def do_GET(self):
parsed = urlparse(self.path)
# ── iPXE dispatch: route to discover or install based on MAC ──
if parsed.path == "/dispatch":
params = parse_qs(parsed.query)
mac = params.get("mac", [""])[0].lower().replace("-", ":")
state = load_state()
if mac in state.get("install_queue", {}):
cfg = state["install_queue"][mac]
hostname = cfg.get("hostname", "lab-node")
print_install_started(mac, hostname)
script = f"""#!ipxe
echo
echo =============================================
echo Lab PXE Bastion - INSTALLING Fedora {FEDORA_VER}
echo Target: {hostname}
echo MAC: {mac}
echo =============================================
echo
kernel http://{SERVER_IP}:{HTTP_PORT}/vmlinuz inst.ks=http://{SERVER_IP}:{HTTP_PORT}/ks?mac={mac} inst.repo={FEDORA_MIRROR} inst.text
initrd http://{SERVER_IP}:{HTTP_PORT}/initrd.img
boot
"""
self.send_text(200, script)
elif mac in state.get("installed", {}):
info = state["installed"][mac]
hostname = info.get("hostname", "?")
print(f" {GREEN}PXE request from {mac} ({hostname}) - already installed, booting local disk{RESET}", flush=True)
script = f"""#!ipxe
echo
echo =============================================
echo Lab PXE Bastion - {hostname}
echo Already installed, booting from local disk
echo =============================================
echo
sleep 3
exit
"""
self.send_text(200, script)
else:
print(f" {YELLOW}PXE request from {mac} → discovery mode{RESET}", flush=True)
script = f"""#!ipxe
echo
echo =============================================
echo Lab PXE Bastion - DISCOVERY MODE
echo MAC: {mac}
echo Collecting hardware info...
echo =============================================
echo
kernel http://{SERVER_IP}:{HTTP_PORT}/vmlinuz inst.ks=http://{SERVER_IP}:{HTTP_PORT}/discover.ks inst.stage2={FEDORA_MIRROR} inst.text
initrd http://{SERVER_IP}:{HTTP_PORT}/initrd.img
boot
"""
self.send_text(200, script)
return
# ── Per-MAC kickstart for install mode ──
if parsed.path == "/ks":
params = parse_qs(parsed.query)
mac = params.get("mac", [""])[0].lower().replace("-", ":")
state = load_state()
cfg = state.get("install_queue", {}).get(mac, {})
ks = generate_kickstart(
hostname=cfg.get("hostname", "lab-node"),
disk=cfg.get("disk", ""),
ssh_keys=SSH_KEYS,
domain=DOMAIN,
role=cfg.get("role", "worker"),
admin_user=ADMIN_USER,
)
self.send_text(200, ks)
return
# ── API: list machines ──
if parsed.path == "/api/machines":
self.send_json(200, load_state())
return
# ── iPXE EFI binaries (for UEFI HTTP Boot) ──
if parsed.path in ("/ipxe.efi", "/ipxe-real.efi", "/ipxe-arm64.efi"):
tftp_dir = os.path.join(os.path.dirname(HTTP_DIR), "tftp")
fpath = os.path.join(tftp_dir, parsed.path.lstrip("/"))
if os.path.isfile(fpath):
self.send_response(200)
self.send_header("Content-Type", "application/efi")
self.send_header("Content-Length", str(os.path.getsize(fpath)))
self.end_headers()
with open(fpath, "rb") as f:
self.wfile.write(f.read())
return
# ── Static files (vmlinuz, initrd, discover.ks, etc.) ──
super().do_GET()
def do_POST(self):
parsed = urlparse(self.path)
content_length = int(self.headers.get("Content-Length", 0))
body = self.rfile.read(content_length)
# ── Discovery report from PXE-booted machine ──
if parsed.path == "/api/discover":
try:
data = json.loads(body)
except json.JSONDecodeError:
self.send_json(400, {"error": "invalid JSON"})
return
mac = data.get("mac", "unknown").lower()
data["last_seen"] = datetime.now().isoformat()
state = load_state()
is_new = mac not in state.get("discovered", {})
if is_new:
data["first_seen"] = data["last_seen"]
else:
data["first_seen"] = state["discovered"][mac].get("first_seen", data["last_seen"])
state.setdefault("discovered", {})[mac] = data
save_state(state)
print_discovery(mac, data, is_new)
self.send_json(200, {"status": "ok", "mac": mac, "new": is_new})
return
# ── Queue a machine for install ──
if parsed.path == "/api/install":
try:
data = json.loads(body)
except json.JSONDecodeError:
self.send_json(400, {"error": "invalid JSON"})
return
mac = data.get("mac", "").lower().replace("-", ":")
hostname = data.get("hostname", "lab-node")
disk = data.get("disk", "")
role = data.get("role", "worker")
if not mac:
self.send_json(400, {"error": "mac is required"})
return
if role not in ("worker", "infra"):
self.send_json(400, {"error": "role must be 'worker' or 'infra'"})
return
state = load_state()
state.setdefault("install_queue", {})[mac] = {
"hostname": hostname,
"disk": disk,
"role": role,
"queued_at": datetime.now().isoformat(),
}
save_state(state)
print_install_queued(mac, hostname)
self.send_json(200, {
"status": "queued",
"mac": mac,
"hostname": hostname,
"role": role,
"message": f"PXE boot the machine to start installation (role={role})",
})
return
# ── Install progress callback from kickstart ──
if parsed.path == "/api/progress":
try:
data = json.loads(body)
except json.JSONDecodeError:
self.send_json(400, {"error": "invalid JSON"})
return
mac = data.get("mac", "unknown").lower()
stage = data.get("stage", "unknown")
detail = data.get("detail", "")
print_progress(mac, stage, detail)
# Update state with progress
state = load_state()
if mac in state.get("install_queue", {}):
state["install_queue"][mac]["progress"] = stage
state["install_queue"][mac]["progress_at"] = datetime.now().isoformat()
if detail:
state["install_queue"][mac]["progress_detail"] = detail
# Move to installed on completion
if stage == "complete":
cfg = state["install_queue"].pop(mac)
ip = detail.replace("ready at ", "").strip() if detail else ""
state.setdefault("installed", {})[mac] = {
"hostname": cfg.get("hostname", "?"),
"role": cfg.get("role", "?"),
"ip": ip,
"installed_at": datetime.now().isoformat(),
}
save_state(state)
self.send_json(200, {"status": "ok"})
return
self.send_json(404, {"error": "not found"})
def run_server():
server = HTTPServer(("0.0.0.0", HTTP_PORT), BastionHandler)
print(f"HTTP server listening on :{HTTP_PORT}", flush=True)
server.serve_forever()
if __name__ == "__main__":
run_server()
PYSERVER
# ──── Generate dnsmasq config ─────────────────────────────────────
# ──── Generate dnsmasq config ─────────────────────────────────────
# Derive DHCP range for full mode
if [[ "$DHCP_MODE" == "full" ]]; then
DHCP_RANGE_START="${DHCP_RANGE_START:-${NETWORK%.*}.100}"
DHCP_RANGE_END="${DHCP_RANGE_END:-${NETWORK%.*}.200}"
fi
cat > "$BASTION_DIR/dnsmasq.conf" << DNSMASQ
# Lab PXE Bastion — dnsmasq config
# Disable DNS (we only want DHCP/TFTP)
port=0
# Listen on the right interface
interface=${IFACE}
bind-dynamic
$(if [[ "$DHCP_MODE" == "full" ]]; then
cat << FULL_DHCP
# Full DHCP mode — bastion is the only DHCP server on this network
dhcp-range=${DHCP_RANGE_START},${DHCP_RANGE_END},255.255.255.0,12h
dhcp-option=3,${GATEWAY}
dhcp-option=6,${GATEWAY}
FULL_DHCP
else
cat << PROXY_DHCP
# ProxyDHCP — works alongside existing DHCP (UniFi etc)
dhcp-range=${NETWORK},proxy
PROXY_DHCP
fi)
# TFTP for initial PXE boot
enable-tftp
tftp-root=${TFTPDIR}
tftp-no-blocksize
# Detect client architecture — PXE (TFTP) clients
dhcp-match=set:bios,option:client-arch,0
dhcp-match=set:efi-x86_64,option:client-arch,7
dhcp-match=set:efi-x86_64,option:client-arch,9
dhcp-match=set:efi-arm64,option:client-arch,11
# Detect client architecture — UEFI HTTP Boot clients (no TFTP size limit)
dhcp-match=set:httpboot-x86_64,option:client-arch,16
dhcp-match=set:httpboot-arm64,option:client-arch,20
# Detect iPXE clients (already chainloaded)
dhcp-userclass=set:ipxe,iPXE
# UEFI HTTP Boot → serve full iPXE EFI via HTTP (no TFTP size limit)
dhcp-boot=tag:httpboot-x86_64,http://${SERVER_IP}:${HTTP_PORT}/ipxe-real.efi
dhcp-boot=tag:httpboot-arm64,http://${SERVER_IP}:${HTTP_PORT}/ipxe-arm64.efi
# Echo vendor class back to HTTP Boot clients (required by UEFI HTTP Boot spec)
dhcp-option-force=tag:httpboot-x86_64,60,HTTPClient
dhcp-option-force=tag:httpboot-arm64,60,HTTPClient
# First PXE boot → serve iPXE binary via TFTP (BIOS and UEFI fallback)
dhcp-boot=tag:bios,tag:!ipxe,undionly.kpxe
dhcp-boot=tag:efi-x86_64,tag:!ipxe,ipxe.efi
dhcp-boot=tag:efi-arm64,tag:!ipxe,ipxe-arm64.efi
# iPXE clients → chain to boot script via HTTP
dhcp-boot=tag:ipxe,http://${SERVER_IP}:${HTTP_PORT}/boot.ipxe
# PXE service directives (needed for proxy DHCP to respond properly)
pxe-service=tag:!ipxe,x86PC,"PXE Boot",undionly.kpxe
pxe-service=tag:!ipxe,X86-64_EFI,"PXE Boot",ipxe.efi
pxe-service=tag:!ipxe,BC_EFI,"PXE Boot",ipxe.efi
pxe-service=tag:!ipxe,ARM64_EFI,"PXE Boot",ipxe-arm64.efi
# Verbose logging
log-dhcp
DNSMASQ
# ──── Open firewall ──────────────────────────────────────────────
if command -v firewall-cmd >/dev/null && firewall-cmd --state >/dev/null 2>&1; then
# Detect the zone for our interface (may differ from default zone)
FW_ZONE="$(firewall-cmd --get-zone-of-interface="${IFACE}" 2>/dev/null || echo "")"
FW_ZONE_FLAG=""
[[ -n "$FW_ZONE" ]] && FW_ZONE_FLAG="--zone=${FW_ZONE}"
log "Opening firewall ports (DHCP, TFTP, HTTP:${HTTP_PORT})${FW_ZONE:+ in zone ${FW_ZONE}}..."
firewall-cmd --quiet ${FW_ZONE_FLAG} --add-service=dhcp
firewall-cmd --quiet ${FW_ZONE_FLAG} --add-service=tftp
firewall-cmd --quiet ${FW_ZONE_FLAG} --add-port=${HTTP_PORT}/tcp
firewall-cmd --quiet ${FW_ZONE_FLAG} --add-port=4011/udp 2>/dev/null || true
FW_OPENED=true
fi
# ──── Stop conflicting services ───────────────────────────────────
if systemctl is-active --quiet dnsmasq 2>/dev/null; then
warn "System dnsmasq is running — stopping it temporarily"
systemctl stop dnsmasq
fi
# ──── Start HTTP server ──────────────────────────────────────────
log "Starting HTTP server on :${HTTP_PORT}..."
python3 "$BASTION_DIR/server.py" \
"$HTTPDIR" \
"$STATEFILE" \
"$SERVER_IP" \
"$HTTP_PORT" \
"$FEDORA_VERSION" \
"$FEDORA_MIRROR" \
"$BASTION_DIR/ssh_keys" \
"$TIMEZONE" \
"$LOCALE" \
"$DOMAIN" \
"$ADMIN_USER" &
HTTP_PID=$!
sleep 1
if ! kill -0 "$HTTP_PID" 2>/dev/null; then
die "HTTP server failed to start — is port ${HTTP_PORT} in use?"
fi
# ──── Start dnsmasq ──────────────────────────────────────────────
log "Starting PXE server (proxyDHCP on ${IFACE})..."
echo ""
echo -e "${CYAN}${BOLD}════════════════════════════════════════════════════════════${NC}"
echo -e "${CYAN}${BOLD} Lab PXE Bastion — Discovery Mode${NC}"
echo -e "${CYAN}${BOLD}════════════════════════════════════════════════════════════${NC}"
echo ""
echo -e " Network: ${BOLD}${NETWORK}/24${NC} via ${BOLD}${IFACE}${NC}"
echo -e " DHCP: ${BOLD}${DHCP_MODE}${NC}$(if [[ "$DHCP_MODE" == "full" ]]; then echo " (${DHCP_RANGE_START}${DHCP_RANGE_END})"; else echo " (alongside existing DHCP)"; fi)"
echo -e " HTTP: ${BOLD}http://${SERVER_IP}:${HTTP_PORT}/${NC}"
echo -e " OS: ${BOLD}Fedora ${FEDORA_VERSION} (${ARCH})${NC}"
echo -e " Domain: ${BOLD}${DOMAIN}${NC}"
echo -e " State: ${BOLD}${STATEFILE}${NC}"
echo ""
echo -e " ${YELLOW}PXE boot any machine on this network.${NC}"
echo -e " ${YELLOW}It will be inventoried and powered off automatically.${NC}"
echo ""
echo -e " Commands (from another terminal):"
echo -e " ${BOLD}bash bastion.sh list${NC} — show machines"
echo -e " ${BOLD}bash bastion.sh install <mac> <hostname>${NC} — queue install"
echo ""
echo -e " Press ${BOLD}Ctrl-C${NC} to stop."
echo ""
echo -e "${CYAN}──── Waiting for PXE boot requests... ────${NC}"
echo ""
dnsmasq --no-daemon --conf-file="$BASTION_DIR/dnsmasq.conf" &
DNSMASQ_PID=$!
wait "$DNSMASQ_PID" || {
err "dnsmasq exited unexpectedly. Check if another DHCP/TFTP service is running."
err "Try: ss -ulnp | grep -E ':(67|69|4011) '"
exit 1
}