- LVM partition layout: /, /var, /var/log, /home, /srv, swap, tmpfs /tmp plus /var/lib/longhorn for worker role (grows to fill disk) - Reprovision preserves /home, /srv, /var/lib/longhorn via %pre detection - Admin user created matching the user running the bastion script with SSH keys from authorized_keys + local pubkeys, passwordless sudo - Progress callbacks from %pre and %post to /api/progress endpoint with IP reported on completion (ssh command printed) - Installed machines boot from local disk (iPXE exit) instead of re-entering discovery mode - --role worker|infra flag (infra skips longhorn partition) - reprovision subcommand: queues install + SSH reboot into PXE - Self-cleanup: kills old bastion instances on start - Domain config (DOMAIN env, default ad.itaz.eu) - efibootmgr in %post to set local disk first in boot order - k3s prereqs: kernel modules, sysctl, firewalld disabled, chrony - VM reprovision test script (test-reprovision.sh) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1365 lines
50 KiB
Bash
Executable File
1365 lines
50 KiB
Bash
Executable File
#!/usr/bin/env bash
|
||
# ─────────────────────────────────────────────────────────────────────
|
||
# Lab PXE Bastion — discover-first bare-metal provisioning
|
||
#
|
||
# Default mode: DISCOVER. Any machine that PXE boots gets inventoried
|
||
# and powered off. You review what appeared, then promote to install.
|
||
#
|
||
# Usage:
|
||
# sudo bash bastion.sh # start bastion (discover mode)
|
||
# bash bastion.sh install <mac> <hostname> # queue discovered machine for install
|
||
# bash bastion.sh list # show discovered/queued machines
|
||
#
|
||
# Flow:
|
||
# 1. Start bastion → sudo bash bastion.sh
|
||
# 2. Power on machine → PXE boots, hardware discovered, powers off
|
||
# 3. Queue for install → bash bastion.sh install aa:bb:cc:dd:ee:ff puppet
|
||
# 4. Power on again → PXE boots, Fedora installed, reboots into OS
|
||
#
|
||
# Requirements: Fedora/RHEL host with dnsmasq, python3, curl
|
||
# ─────────────────────────────────────────────────────────────────────
|
||
set -euo pipefail
|
||
|
||
# ──── Configuration (override via environment) ────────────────────
|
||
FEDORA_VERSION="${FEDORA_VERSION:-43}"
|
||
ARCH="${ARCH:-x86_64}"
|
||
HTTP_PORT="${HTTP_PORT:-8080}"
|
||
TIMEZONE="${TIMEZONE:-Europe/London}"
|
||
LOCALE="${LOCALE:-en_GB.UTF-8}"
|
||
BASTION_DIR="${BASTION_DIR:-/tmp/lab-bastion}"
|
||
DOMAIN="${DOMAIN:-ad.itaz.eu}" # internal domain for hostnames
|
||
DHCP_MODE="${DHCP_MODE:-proxy}" # proxy (alongside existing DHCP) or full (bastion IS the DHCP server)
|
||
DHCP_RANGE_START="${DHCP_RANGE_START:-}" # only for full mode, auto-derived if empty
|
||
DHCP_RANGE_END="${DHCP_RANGE_END:-}"
|
||
|
||
# ──── Colors ──────────────────────────────────────────────────────
|
||
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'
|
||
CYAN='\033[0;36m'; BOLD='\033[1m'; NC='\033[0m'
|
||
|
||
log() { echo -e "${GREEN}[bastion]${NC} $*"; }
|
||
warn() { echo -e "${YELLOW}[bastion]${NC} $*"; }
|
||
err() { echo -e "${RED}[bastion]${NC} $*" >&2; }
|
||
die() { err "$@"; exit 1; }
|
||
|
||
# ──── Subcommand handling ─────────────────────────────────────────
|
||
CMD="${1:-serve}"
|
||
|
||
case "$CMD" in
|
||
install)
|
||
[[ $# -ge 3 ]] || { echo "Usage: bastion.sh install <mac> <hostname> [--role worker|infra] [--disk <dev>]"; exit 1; }
|
||
MAC="$2"
|
||
HOSTNAME="$3"
|
||
shift 3
|
||
DISK="" ROLE="worker"
|
||
while [[ $# -gt 0 ]]; do
|
||
case "$1" in
|
||
--disk) DISK="$2"; shift 2 ;;
|
||
--role) ROLE="$2"; shift 2 ;;
|
||
*) echo "Unknown option: $1"; exit 1 ;;
|
||
esac
|
||
done
|
||
PAYLOAD=$(python3 -c "import json; print(json.dumps({k:v for k,v in {'mac':'$MAC','hostname':'$HOSTNAME','disk':'$DISK','role':'$ROLE'}.items() if v}))")
|
||
RESULT=$(curl -sf -X POST "http://localhost:${HTTP_PORT}/api/install" \
|
||
-H "Content-Type: application/json" \
|
||
-d "$PAYLOAD" 2>&1) || die "Cannot reach bastion at localhost:${HTTP_PORT}. Is it running?"
|
||
echo "$RESULT" | python3 -m json.tool 2>/dev/null || echo "$RESULT"
|
||
echo ""
|
||
echo "Power on the machine to start Fedora installation."
|
||
exit 0
|
||
;;
|
||
list)
|
||
RESULT=$(curl -sf "http://localhost:${HTTP_PORT}/api/machines" 2>&1) || \
|
||
die "Cannot reach bastion at localhost:${HTTP_PORT}. Is it running?"
|
||
echo "$RESULT" | python3 -c "
|
||
import sys, json
|
||
state = json.load(sys.stdin)
|
||
|
||
discovered = state.get('discovered', {})
|
||
queue = state.get('install_queue', {})
|
||
installed = state.get('installed', {})
|
||
|
||
print()
|
||
print('\033[1mDISCOVERED\033[0m')
|
||
if discovered:
|
||
print(f' {\"MAC\":<20} {\"CPU\":<32} {\"CORES\":<6} {\"RAM\":<8} {\"ARCH\":<10} {\"PRODUCT\"}')
|
||
for mac, hw in discovered.items():
|
||
status = ' [QUEUED]' if mac in queue else ''
|
||
print(f' {mac:<20} {hw.get(\"cpu_model\",\"?\"):<32} {hw.get(\"cpu_cores\",\"?\"):<6} {str(hw.get(\"memory_gb\",\"?\"))+\"GB\":<8} {hw.get(\"arch\",\"?\"):<10} {hw.get(\"product\",\"?\")}{status}')
|
||
else:
|
||
print(' (none — PXE boot a machine to discover it)')
|
||
|
||
print()
|
||
print('\033[1mINSTALL QUEUE\033[0m')
|
||
if queue:
|
||
for mac, cfg in queue.items():
|
||
print(f' {mac:<20} → hostname={cfg.get(\"hostname\",\"?\")}')
|
||
else:
|
||
print(' (none)')
|
||
|
||
print()
|
||
print('\033[1mINSTALLED\033[0m')
|
||
if installed:
|
||
for mac, info in installed.items():
|
||
ip = info.get('ip', '')
|
||
ip_str = f' ip={ip}' if ip else ''
|
||
print(f' {mac:<20} → {info.get(\"hostname\",\"?\")} role={info.get(\"role\",\"?\")}{ip_str} ({info.get(\"installed_at\",\"?\")})')
|
||
else:
|
||
print(' (none)')
|
||
print()
|
||
" 2>/dev/null || echo "$RESULT"
|
||
exit 0
|
||
;;
|
||
reprovision)
|
||
[[ $# -ge 3 ]] || { echo "Usage: bastion.sh reprovision <mac> <hostname> [--role worker|infra] [--disk <dev>]"; exit 1; }
|
||
MAC="$2"
|
||
HOSTNAME="$3"
|
||
shift 3
|
||
DISK="" ROLE="worker"
|
||
while [[ $# -gt 0 ]]; do
|
||
case "$1" in
|
||
--disk) DISK="$2"; shift 2 ;;
|
||
--role) ROLE="$2"; shift 2 ;;
|
||
*) echo "Unknown option: $1"; exit 1 ;;
|
||
esac
|
||
done
|
||
|
||
# Queue the install
|
||
PAYLOAD=$(python3 -c "import json; print(json.dumps({k:v for k,v in {'mac':'$MAC','hostname':'$HOSTNAME','disk':'$DISK','role':'$ROLE'}.items() if v}))")
|
||
RESULT=$(curl -sf -X POST "http://localhost:${HTTP_PORT}/api/install" \
|
||
-H "Content-Type: application/json" \
|
||
-d "$PAYLOAD" 2>&1) || die "Cannot reach bastion at localhost:${HTTP_PORT}. Is it running?"
|
||
echo "$RESULT" | python3 -m json.tool 2>/dev/null || echo "$RESULT"
|
||
|
||
# Try to find IP from installed state and SSH in to trigger PXE reboot
|
||
IP=$(curl -sf "http://localhost:${HTTP_PORT}/api/machines" 2>/dev/null | \
|
||
python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('installed',{}).get('${MAC}',{}).get('ip',''))" 2>/dev/null || echo "")
|
||
ADMIN_USER="${SUDO_USER:-$USER}"
|
||
[[ "$ADMIN_USER" == "root" ]] && ADMIN_USER=""
|
||
|
||
if [[ -n "$IP" && -n "$ADMIN_USER" ]]; then
|
||
echo ""
|
||
echo "Attempting SSH reboot into PXE ($ADMIN_USER@$IP)..."
|
||
ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 "$ADMIN_USER@$IP" \
|
||
'sudo efibootmgr 2>/dev/null; PXE_ENTRY=$(sudo efibootmgr | grep -iE "pxe|network|ipv4" | head -1 | grep -oP "Boot\K[0-9A-F]+"); if [ -n "$PXE_ENTRY" ]; then sudo efibootmgr --bootnext "$PXE_ENTRY" && echo "PXE set as next boot" && sudo reboot; else echo "No PXE boot entry found, rebooting anyway..." && sudo reboot; fi' 2>&1 && {
|
||
echo ""
|
||
echo "Machine is rebooting into PXE. Install will start automatically."
|
||
} || {
|
||
echo ""
|
||
echo "SSH failed. Reboot the machine manually into PXE (e.g. via IPMI/KVM)."
|
||
}
|
||
else
|
||
echo ""
|
||
echo "No IP known for this machine. Reboot it manually into PXE."
|
||
fi
|
||
exit 0
|
||
;;
|
||
serve) ;; # continue below
|
||
*)
|
||
echo "Usage: bastion.sh [serve|install|reprovision|list]"
|
||
exit 1
|
||
;;
|
||
esac
|
||
|
||
# ══════════════════════════════════════════════════════════════════
|
||
# SERVE MODE — start the bastion
|
||
# ══════════════════════════════════════════════════════════════════
|
||
|
||
# ──── Kill old instances ──────────────────────────────────────────
|
||
# Find and kill any previous bastion dnsmasq and HTTP server
|
||
OLD_DNSMASQ=$(pgrep -f 'dnsmasq --no-daemon --conf-file=/tmp/lab-bastion' 2>/dev/null || true)
|
||
OLD_HTTP=$(pgrep -f 'python3 /tmp/lab-bastion/server.py' 2>/dev/null || true)
|
||
if [[ -n "$OLD_DNSMASQ" || -n "$OLD_HTTP" ]]; then
|
||
warn "Killing old bastion processes..."
|
||
[[ -n "$OLD_DNSMASQ" ]] && kill $OLD_DNSMASQ 2>/dev/null && log " Stopped old dnsmasq (PID $OLD_DNSMASQ)"
|
||
[[ -n "$OLD_HTTP" ]] && kill $OLD_HTTP 2>/dev/null && log " Stopped old HTTP server (PID $OLD_HTTP)"
|
||
sleep 1
|
||
fi
|
||
|
||
# ──── Preflight ───────────────────────────────────────────────────
|
||
[[ $EUID -eq 0 ]] || die "Must run as root (need DHCP/TFTP ports). Use: sudo bash bastion.sh"
|
||
|
||
command -v python3 >/dev/null || die "python3 not found"
|
||
command -v curl >/dev/null || die "curl not found"
|
||
|
||
INSTALL_PKGS=()
|
||
command -v dnsmasq >/dev/null || INSTALL_PKGS+=(dnsmasq)
|
||
[[ -f /usr/share/ipxe/undionly.kpxe ]] || INSTALL_PKGS+=(ipxe-bootimgs-x86)
|
||
[[ -f /usr/share/ipxe/arm64-efi/snponly.efi ]] || INSTALL_PKGS+=(ipxe-bootimgs-aarch64)
|
||
[[ -f /usr/include/efi/efi.h ]] || INSTALL_PKGS+=(gnu-efi-devel)
|
||
|
||
if [[ ${#INSTALL_PKGS[@]} -gt 0 ]]; then
|
||
log "Installing ${INSTALL_PKGS[*]}..."
|
||
if command -v dnf >/dev/null; then
|
||
dnf install -y "${INSTALL_PKGS[@]}"
|
||
elif command -v apt-get >/dev/null; then
|
||
apt-get install -y "${INSTALL_PKGS[@]}"
|
||
else
|
||
die "Cannot install packages — install manually: ${INSTALL_PKGS[*]}"
|
||
fi
|
||
fi
|
||
|
||
# ──── Auto-detect network ────────────────────────────────────────
|
||
IFACE="${IFACE:-$(ip route | awk '/default/ {print $5; exit}')}"
|
||
SERVER_IP="$(ip -4 addr show "$IFACE" | awk '/inet / {split($2,a,"/"); print a[1]; exit}')"
|
||
NETWORK="$(echo "$SERVER_IP" | awk -F. '{print $1"."$2"."$3".0"}')"
|
||
GATEWAY="$(ip route | awk '/default/ {print $3; exit}')"
|
||
|
||
[[ -n "$SERVER_IP" ]] || die "Cannot detect IP on interface $IFACE"
|
||
log "Interface: ${BOLD}$IFACE${NC} IP: ${BOLD}$SERVER_IP${NC} Network: ${BOLD}$NETWORK${NC}"
|
||
|
||
# ──── Auto-detect SSH keys ───────────────────────────────────────
|
||
REAL_HOME="${HOME}"
|
||
[[ -n "${SUDO_USER:-}" ]] && REAL_HOME="$(getent passwd "$SUDO_USER" | cut -d: -f6)"
|
||
|
||
SSH_KEYS_CONTENT=""
|
||
SSH_KEY_SOURCE=""
|
||
|
||
# Collect SSH keys from authorized_keys + local pubkeys (deduplicated)
|
||
SSH_KEY_SOURCE=""
|
||
if [[ -f "$REAL_HOME/.ssh/authorized_keys" ]]; then
|
||
SSH_KEYS_CONTENT="$(grep -v '^#' "$REAL_HOME/.ssh/authorized_keys" | grep -v '^$')"
|
||
SSH_KEY_SOURCE="$REAL_HOME/.ssh/authorized_keys"
|
||
fi
|
||
|
||
# Also include local pubkey files (they may not be in authorized_keys)
|
||
for keyfile in "$REAL_HOME/.ssh/id_ed25519.pub" "$REAL_HOME/.ssh/id_rsa.pub" "$REAL_HOME/.ssh/id_ecdsa.pub"; do
|
||
if [[ -f "$keyfile" ]]; then
|
||
KEY_DATA="$(cat "$keyfile")"
|
||
KEY_FP="$(awk '{print $2}' "$keyfile")"
|
||
if [[ -n "$SSH_KEYS_CONTENT" ]]; then
|
||
# Add only if not already present
|
||
if ! echo "$SSH_KEYS_CONTENT" | grep -qF "$KEY_FP"; then
|
||
SSH_KEYS_CONTENT="$SSH_KEYS_CONTENT"$'\n'"$KEY_DATA"
|
||
SSH_KEY_SOURCE="${SSH_KEY_SOURCE} + $keyfile"
|
||
fi
|
||
else
|
||
SSH_KEYS_CONTENT="$KEY_DATA"
|
||
SSH_KEY_SOURCE="$keyfile"
|
||
fi
|
||
fi
|
||
done
|
||
|
||
# Priority 3: generate a keypair
|
||
if [[ -z "$SSH_KEYS_CONTENT" ]]; then
|
||
GENERATED_KEY="$BASTION_DIR/bastion_ed25519"
|
||
if [[ ! -f "$GENERATED_KEY" ]]; then
|
||
log "No SSH keys found — generating ed25519 keypair..."
|
||
ssh-keygen -t ed25519 -f "$GENERATED_KEY" -N "" -C "bastion-generated@$(hostname)" >/dev/null 2>&1
|
||
fi
|
||
SSH_KEYS_CONTENT="$(cat "${GENERATED_KEY}.pub")"
|
||
SSH_KEY_SOURCE="$GENERATED_KEY (generated)"
|
||
warn "Using generated keypair: ${BOLD}$GENERATED_KEY${NC}"
|
||
warn "Save this private key — it's the only way to access installed machines."
|
||
fi
|
||
|
||
SSH_KEY_COUNT="$(echo "$SSH_KEYS_CONTENT" | wc -l)"
|
||
log "SSH keys: ${BOLD}${SSH_KEY_COUNT} key(s)${NC} from ${BOLD}${SSH_KEY_SOURCE}${NC}"
|
||
|
||
# ──── Detect admin username ──────────────────────────────────────
|
||
ADMIN_USER="${SUDO_USER:-$USER}"
|
||
[[ "$ADMIN_USER" == "root" ]] && ADMIN_USER=""
|
||
if [[ -n "$ADMIN_USER" ]]; then
|
||
log "Admin user: ${BOLD}${ADMIN_USER}${NC} (will be created on installed machines)"
|
||
fi
|
||
|
||
# ──── Prepare directories ────────────────────────────────────────
|
||
TFTPDIR="$BASTION_DIR/tftp"
|
||
HTTPDIR="$BASTION_DIR/http"
|
||
STATEFILE="$BASTION_DIR/state.json"
|
||
mkdir -p "$TFTPDIR" "$HTTPDIR"
|
||
|
||
# Initialize state if not present
|
||
[[ -f "$STATEFILE" ]] || echo '{"discovered":{},"install_queue":{},"installed":{}}' > "$STATEFILE"
|
||
|
||
# ──── Cleanup handler ─────────────────────────────────────────────
|
||
DNSMASQ_PID=""
|
||
HTTP_PID=""
|
||
FW_OPENED=false
|
||
|
||
cleanup() {
|
||
echo ""
|
||
log "Shutting down..."
|
||
[[ -n "$HTTP_PID" ]] && kill "$HTTP_PID" 2>/dev/null && log "Stopped HTTP server"
|
||
[[ -n "$DNSMASQ_PID" ]] && kill "$DNSMASQ_PID" 2>/dev/null && log "Stopped dnsmasq"
|
||
|
||
if $FW_OPENED && command -v firewall-cmd >/dev/null; then
|
||
log "Removing firewall rules..."
|
||
firewall-cmd --quiet ${FW_ZONE_FLAG:-} --remove-service=dhcp 2>/dev/null || true
|
||
firewall-cmd --quiet ${FW_ZONE_FLAG:-} --remove-service=tftp 2>/dev/null || true
|
||
firewall-cmd --quiet ${FW_ZONE_FLAG:-} --remove-port=${HTTP_PORT}/tcp 2>/dev/null || true
|
||
firewall-cmd --quiet ${FW_ZONE_FLAG:-} --remove-port=4011/udp 2>/dev/null || true
|
||
fi
|
||
|
||
log "State preserved in $STATEFILE"
|
||
log "Restart bastion with: sudo bash bastion.sh"
|
||
}
|
||
trap cleanup EXIT INT TERM
|
||
|
||
# ──── Prepare boot artifacts ─────────────────────────────────────
|
||
download() {
|
||
local url="$1" dest="$2" label="$3"
|
||
if [[ -f "$dest" ]]; then
|
||
log " ${label} — cached"
|
||
return
|
||
fi
|
||
log " ${label} — downloading..."
|
||
curl -# -L -f -o "$dest" "$url" || die "Failed to download $label from $url"
|
||
}
|
||
|
||
copy_if_missing() {
|
||
local src="$1" dest="$2" label="$3"
|
||
if [[ -f "$dest" ]]; then
|
||
log " ${label} — cached"
|
||
return
|
||
fi
|
||
[[ -f "$src" ]] || die "${label}: source not found at $src"
|
||
cp "$src" "$dest"
|
||
log " ${label} — copied from $src"
|
||
}
|
||
|
||
build_pxeloader() {
|
||
local src="$1" dest="$2" label="$3"
|
||
|
||
if [[ -f "$dest" ]]; then
|
||
log " ${label} — cached ($(stat -c%s "$dest") bytes)"
|
||
return
|
||
fi
|
||
|
||
log " ${label} — building UEFI PXE loader stub..."
|
||
|
||
local builddir="$BASTION_DIR/pxeloader-build"
|
||
mkdir -p "$builddir"
|
||
|
||
local gnuefi_lib gnuefi_inc
|
||
gnuefi_lib="/usr/lib"
|
||
gnuefi_inc="/usr/include/efi"
|
||
|
||
# Compile
|
||
gcc -I"$gnuefi_inc" -I"$gnuefi_inc/x86_64" -I"$gnuefi_inc/protocol" \
|
||
-DGNU_EFI_USE_MS_ABI -fPIC -fshort-wchar -ffreestanding \
|
||
-fno-stack-protector -mno-red-zone -maccumulate-outgoing-args \
|
||
-Wall -Os -c -o "$builddir/pxeloader.o" "$src" || die "PXE loader compile failed"
|
||
|
||
# Link
|
||
ld -nostdlib -znocombreloc -shared -Bsymbolic \
|
||
-T "$gnuefi_lib/elf_x86_64_efi.lds" \
|
||
"$gnuefi_lib/crt0-efi-x86_64.o" \
|
||
"$builddir/pxeloader.o" \
|
||
-o "$builddir/pxeloader.so" \
|
||
-lgnuefi -lefi -L"$gnuefi_lib" || die "PXE loader link failed"
|
||
|
||
# Convert to PE/COFF EFI binary
|
||
objcopy -j .text -j .sdata -j .data -j .dynamic -j .rodata -j .dynsym \
|
||
-j .rel -j .rela -j .rel.* -j .rela.* -j .rel* -j .rela* \
|
||
-j .reloc --target efi-app-x86_64 \
|
||
"$builddir/pxeloader.so" "$dest" || die "PXE loader objcopy failed"
|
||
|
||
local size
|
||
size="$(stat -c%s "$dest")"
|
||
log " ${label} — built (${size} bytes / $((size/1024)) KB)"
|
||
}
|
||
|
||
FEDORA_MIRROR="https://download.fedoraproject.org/pub/fedora/linux/releases/${FEDORA_VERSION}/Everything/${ARCH}/os"
|
||
|
||
log "Preparing boot artifacts (Fedora ${FEDORA_VERSION} ${ARCH})..."
|
||
copy_if_missing "/usr/share/ipxe/undionly.kpxe" "$TFTPDIR/undionly.kpxe" "iPXE BIOS"
|
||
|
||
# UEFI x86_64: serve iPXE directly via TFTP (UEFI has no TFTP size limit)
|
||
copy_if_missing "/usr/share/ipxe/ipxe-snponly-x86_64.efi" "$TFTPDIR/ipxe.efi" "iPXE UEFI x86_64"
|
||
|
||
copy_if_missing "/usr/share/ipxe/arm64-efi/snponly.efi" "$TFTPDIR/ipxe-arm64.efi" "iPXE UEFI arm64"
|
||
download "${FEDORA_MIRROR}/images/pxeboot/vmlinuz" "$HTTPDIR/vmlinuz" "Fedora kernel"
|
||
download "${FEDORA_MIRROR}/images/pxeboot/initrd.img" "$HTTPDIR/initrd.img" "Fedora initrd"
|
||
|
||
# Symlink iPXE binaries into HTTP dir (UEFI HTTP Boot downloads via HTTP, not TFTP)
|
||
for f in "$TFTPDIR"/*.efi; do
|
||
ln -sf "$f" "$HTTPDIR/$(basename "$f")" 2>/dev/null || true
|
||
done
|
||
|
||
# ──── Generate discovery kickstart ────────────────────────────────
|
||
# Boots Fedora installer env, collects hardware info, POSTs to bastion, powers off.
|
||
# Never touches the disk.
|
||
cat > "$HTTPDIR/discover.ks" << 'DISCOVER_KS'
|
||
# Lab Bastion — Discovery Mode
|
||
# Collects hardware inventory and powers off. Does NOT install anything.
|
||
|
||
%pre --erroronfail --log=/tmp/discover.log
|
||
#!/bin/bash
|
||
set -x
|
||
|
||
# ── Collect hardware info from /proc, /sys, and available tools ──
|
||
|
||
MAC=$(ip link show | awk '/ether/ && !/00:00:00:00/ {print $2; exit}')
|
||
PRODUCT=$(cat /sys/class/dmi/id/product_name 2>/dev/null || echo "unknown")
|
||
BOARD=$(cat /sys/class/dmi/id/board_name 2>/dev/null || echo "unknown")
|
||
SERIAL=$(cat /sys/class/dmi/id/product_serial 2>/dev/null || echo "unknown")
|
||
MANUFACTURER=$(cat /sys/class/dmi/id/sys_vendor 2>/dev/null || echo "unknown")
|
||
CPUMODEL=$(grep -m1 'model name' /proc/cpuinfo | cut -d: -f2 | sed 's/^ //')
|
||
CPUCORES=$(grep -c '^processor' /proc/cpuinfo)
|
||
MEMGB=$(awk '/MemTotal/ {printf "%d", $2/1024/1024}' /proc/meminfo)
|
||
ARCHTYPE=$(uname -m)
|
||
|
||
# Disk info — lsblk is available in Anaconda
|
||
DISKS_JSON=$(lsblk -Jb -o NAME,SIZE,TYPE,MODEL 2>/dev/null | python3 -c "
|
||
import sys, json
|
||
data = json.load(sys.stdin)
|
||
disks = [d for d in data.get('blockdevices', []) if d.get('type') == 'disk']
|
||
result = []
|
||
for d in disks:
|
||
size_gb = round(int(d.get('size', 0)) / 1073741824, 1)
|
||
result.append({
|
||
'name': d.get('name', '?'),
|
||
'size_gb': size_gb,
|
||
'model': (d.get('model') or 'unknown').strip()
|
||
})
|
||
print(json.dumps(result))
|
||
" 2>/dev/null || echo '[]')
|
||
|
||
# Network interfaces
|
||
NICS_JSON=$(ip -j link show 2>/dev/null | python3 -c "
|
||
import sys, json
|
||
nics = json.load(sys.stdin)
|
||
result = []
|
||
for n in nics:
|
||
if n.get('link_type') == 'loopback':
|
||
continue
|
||
result.append({
|
||
'name': n.get('ifname', '?'),
|
||
'mac': n.get('address', '?'),
|
||
'state': n.get('operstate', '?')
|
||
})
|
||
print(json.dumps(result))
|
||
" 2>/dev/null || echo '[]')
|
||
|
||
# ── Build and POST discovery payload ──
|
||
|
||
PAYLOAD=$(python3 -c "
|
||
import json
|
||
print(json.dumps({
|
||
'mac': '$MAC',
|
||
'product': '$PRODUCT',
|
||
'board': '$BOARD',
|
||
'serial': '$SERIAL',
|
||
'manufacturer': '$MANUFACTURER',
|
||
'cpu_model': '$CPUMODEL',
|
||
'cpu_cores': int('$CPUCORES' or 0),
|
||
'memory_gb': int('$MEMGB' or 0),
|
||
'arch': '$ARCHTYPE',
|
||
'disks': $DISKS_JSON,
|
||
'nics': $NICS_JSON
|
||
}))
|
||
")
|
||
|
||
# POST to bastion — try curl first, fall back to python3 urllib
|
||
BASTION_URL="__BASTION_URL__/api/discover"
|
||
|
||
if command -v curl >/dev/null 2>&1; then
|
||
curl -sf -X POST "$BASTION_URL" \
|
||
-H "Content-Type: application/json" \
|
||
-d "$PAYLOAD" || true
|
||
else
|
||
python3 -c "
|
||
import urllib.request
|
||
req = urllib.request.Request('$BASTION_URL',
|
||
data=b'''$PAYLOAD''',
|
||
headers={'Content-Type': 'application/json'})
|
||
try:
|
||
urllib.request.urlopen(req, timeout=10)
|
||
except Exception as e:
|
||
print(f'POST failed: {e}')
|
||
"
|
||
fi
|
||
|
||
# ── Reboot — do NOT let Anaconda proceed ──
|
||
echo ""
|
||
echo "=== Discovery complete, rebooting ==="
|
||
echo ""
|
||
sleep 3
|
||
echo 1 > /proc/sys/kernel/sysrq
|
||
echo b > /proc/sysrq-trigger
|
||
sleep 5
|
||
reboot -f
|
||
|
||
%end
|
||
|
||
# Anaconda should never get here, but just in case:
|
||
reboot
|
||
DISCOVER_KS
|
||
|
||
# Patch in the bastion URL
|
||
sed -i "s|__BASTION_URL__|http://${SERVER_IP}:${HTTP_PORT}|g" "$HTTPDIR/discover.ks"
|
||
|
||
# Save SSH keys and admin user for the HTTP server to use
|
||
echo "$SSH_KEYS_CONTENT" > "$BASTION_DIR/ssh_keys"
|
||
echo "$ADMIN_USER" > "$BASTION_DIR/admin_user"
|
||
|
||
# ──── Generate iPXE boot script ───────────────────────────────────
|
||
# Initial iPXE script chains to /dispatch with the MAC, so the server
|
||
# can route to discover or install mode per machine.
|
||
cat > "$HTTPDIR/boot.ipxe" << IPXE
|
||
#!ipxe
|
||
|
||
echo
|
||
echo ============================================
|
||
echo Lab PXE Bastion
|
||
echo Contacting server for instructions...
|
||
echo ============================================
|
||
echo
|
||
|
||
chain http://${SERVER_IP}:${HTTP_PORT}/dispatch?mac=\${net0/mac}
|
||
IPXE
|
||
|
||
# ──── Write the HTTP server ──────────────────────────────────────
|
||
cat > "$BASTION_DIR/server.py" << 'PYSERVER'
|
||
#!/usr/bin/env python3
|
||
"""Lab PXE Bastion — HTTP server with discovery API and per-MAC iPXE dispatch."""
|
||
|
||
import json
|
||
import os
|
||
import sys
|
||
import time
|
||
import fcntl
|
||
from http.server import HTTPServer, SimpleHTTPRequestHandler
|
||
from urllib.parse import urlparse, parse_qs
|
||
from datetime import datetime
|
||
|
||
# Config from argv
|
||
HTTP_DIR = sys.argv[1]
|
||
STATE_FILE = sys.argv[2]
|
||
SERVER_IP = sys.argv[3]
|
||
HTTP_PORT = int(sys.argv[4])
|
||
FEDORA_VER = sys.argv[5]
|
||
FEDORA_MIRROR = sys.argv[6]
|
||
SSH_KEYS_FILE = sys.argv[7] if len(sys.argv) > 7 else ""
|
||
TIMEZONE = sys.argv[8] if len(sys.argv) > 8 else "Europe/London"
|
||
LOCALE = sys.argv[9] if len(sys.argv) > 9 else "en_GB.UTF-8"
|
||
DOMAIN = sys.argv[10] if len(sys.argv) > 10 else "ad.itaz.eu"
|
||
ADMIN_USER = sys.argv[11] if len(sys.argv) > 11 else ""
|
||
|
||
# Load SSH keys from file
|
||
SSH_KEYS = []
|
||
if SSH_KEYS_FILE and os.path.isfile(SSH_KEYS_FILE):
|
||
with open(SSH_KEYS_FILE) as f:
|
||
SSH_KEYS = [l.strip() for l in f if l.strip() and not l.startswith('#')]
|
||
|
||
# ── State management (file-backed, lock-protected) ───────────────
|
||
|
||
def load_state():
|
||
try:
|
||
with open(STATE_FILE) as f:
|
||
return json.load(f)
|
||
except (FileNotFoundError, json.JSONDecodeError):
|
||
return {"discovered": {}, "install_queue": {}, "installed": {}}
|
||
|
||
def save_state(state):
|
||
tmp = STATE_FILE + ".tmp"
|
||
with open(tmp, 'w') as f:
|
||
json.dump(state, f, indent=2)
|
||
os.replace(tmp, STATE_FILE)
|
||
|
||
# ── Kickstart generation ─────────────────────────────────────────
|
||
|
||
def generate_kickstart(hostname, disk="", ssh_keys=None, domain="", role="worker", admin_user=""):
|
||
ssh_keys = ssh_keys or []
|
||
fqdn = f"{hostname}.{domain}" if domain else hostname
|
||
vg = "labvg"
|
||
|
||
# ── Auth ──
|
||
if ssh_keys:
|
||
auth = f'rootpw --lock\nsshkey --username=root "{ssh_keys[0]}"'
|
||
else:
|
||
auth = 'rootpw --plaintext changeme'
|
||
|
||
# ── Admin user (kickstart directive) ──
|
||
user_directive = ""
|
||
if admin_user:
|
||
user_directive = f'user --name={admin_user} --groups=wheel --lock'
|
||
|
||
# ── SSH keys for %post (root + admin user) ──
|
||
all_keys = "\n".join(ssh_keys)
|
||
ssh_post_block = ""
|
||
if ssh_keys:
|
||
ssh_post_block = f"""
|
||
# Set up SSH keys for root
|
||
mkdir -p /root/.ssh && chmod 700 /root/.ssh
|
||
cat > /root/.ssh/authorized_keys << 'SSHKEYS'
|
||
{all_keys}
|
||
SSHKEYS
|
||
chmod 600 /root/.ssh/authorized_keys"""
|
||
|
||
if admin_user and ssh_keys:
|
||
ssh_post_block += f"""
|
||
|
||
# Set up SSH keys for {admin_user}
|
||
ADMIN_HOME=$(getent passwd {admin_user} | cut -d: -f6)
|
||
mkdir -p "$ADMIN_HOME/.ssh" && chmod 700 "$ADMIN_HOME/.ssh"
|
||
cp /root/.ssh/authorized_keys "$ADMIN_HOME/.ssh/authorized_keys"
|
||
chown -R {admin_user}:{admin_user} "$ADMIN_HOME/.ssh"
|
||
chmod 600 "$ADMIN_HOME/.ssh/authorized_keys"
|
||
|
||
# Fix SELinux contexts for SSH
|
||
restorecon -R /root/.ssh "$ADMIN_HOME/.ssh" 2>/dev/null || true
|
||
|
||
# Passwordless sudo for {admin_user}
|
||
echo '{admin_user} ALL=(ALL) NOPASSWD: ALL' > /etc/sudoers.d/{admin_user}
|
||
chmod 440 /etc/sudoers.d/{admin_user}"""
|
||
|
||
# ── Determine disk (auto-detect first NVMe/SDA if not specified) ──
|
||
disk_line = f'DISK="{disk}"' if disk else '''
|
||
DISK=""
|
||
for d in /dev/nvme0n1 /dev/sda /dev/vda; do
|
||
[ -b "$d" ] && { DISK="$(basename $d)"; break; }
|
||
done
|
||
[ -z "$DISK" ] && { echo "ERROR: no disk found"; exit 1; }
|
||
'''
|
||
|
||
# ── LVM layout sizes (MB) ──
|
||
has_longhorn = (role == "worker")
|
||
|
||
return f"""# Lab Bastion -- Fedora {FEDORA_VER} server install
|
||
# Generated: {datetime.now().isoformat()}
|
||
# Target: {fqdn} (role={role})
|
||
|
||
text
|
||
reboot
|
||
|
||
lang {LOCALE}
|
||
keyboard uk
|
||
timezone {TIMEZONE} --utc
|
||
|
||
network --bootproto=dhcp --activate --hostname={fqdn}
|
||
|
||
{auth}
|
||
{user_directive}
|
||
|
||
bootloader --append="console=tty0 console=ttyS0,115200n8"
|
||
|
||
url --mirrorlist=https://mirrors.fedoraproject.org/mirrorlist?repo=fedora-$releasever&arch=$basearch
|
||
|
||
# Partitioning is generated dynamically by %pre (supports longhorn preservation)
|
||
%include /tmp/part.ks
|
||
|
||
%pre --log=/tmp/pre-partition.log
|
||
#!/bin/bash
|
||
set -x
|
||
|
||
# Progress callback helper
|
||
bastion_progress() {{
|
||
local stage="$1" detail="${{2:-}}"
|
||
local mac=$(ip link show | awk '/ether/ && !/00:00:00:00/ {{print $2; exit}}')
|
||
curl -sf -X POST "http://{SERVER_IP}:{HTTP_PORT}/api/progress" \
|
||
-H "Content-Type: application/json" \
|
||
-d "{{\\"mac\\":\\"$mac\\",\\"stage\\":\\"$stage\\",\\"detail\\":\\"$detail\\"}}" 2>/dev/null || true
|
||
}}
|
||
|
||
bastion_progress "partitioning" "preparing disk layout"
|
||
|
||
VG="{vg}"
|
||
{disk_line}
|
||
|
||
REPROVISION=no
|
||
|
||
# Check if VG exists (reprovision scenario)
|
||
if vgs $VG &>/dev/null; then
|
||
echo "=== Existing VG found - reprovision mode ==="
|
||
REPROVISION=yes
|
||
|
||
# Detect which data LVs to preserve
|
||
PRESERVE_LONGHORN=no; PRESERVE_SRV=no; PRESERVE_HOME=no
|
||
lvs $VG/longhorn &>/dev/null && PRESERVE_LONGHORN=yes
|
||
lvs $VG/srv &>/dev/null && PRESERVE_SRV=yes
|
||
lvs $VG/home &>/dev/null && PRESERVE_HOME=yes
|
||
|
||
echo "Preserving: longhorn=$PRESERVE_LONGHORN srv=$PRESERVE_SRV home=$PRESERVE_HOME"
|
||
|
||
# Remove only OS logical volumes (keep data LVs)
|
||
for lv in root var varlog swap; do
|
||
lvremove -f $VG/$lv 2>/dev/null || true
|
||
done
|
||
fi
|
||
|
||
if [ "$REPROVISION" = "yes" ]; then
|
||
# Find existing boot partitions by type
|
||
EFI_PART=$(blkid -t TYPE=vfat -o device /dev/${{DISK}}* 2>/dev/null | head -1)
|
||
BOOT_PART=$(blkid -t TYPE=ext4 -o device /dev/${{DISK}}* 2>/dev/null | head -1)
|
||
EFI_PART=${{EFI_PART:-/dev/${{DISK}}1}}
|
||
BOOT_PART=${{BOOT_PART:-/dev/${{DISK}}2}}
|
||
echo "Reusing EFI=$EFI_PART BOOT=$BOOT_PART"
|
||
|
||
# Build partition config reusing existing PV/VG
|
||
cat > /tmp/part.ks << PARTEOF
|
||
ignoredisk --only-use=$DISK
|
||
clearpart --none
|
||
part /boot/efi --onpart=$EFI_PART --fstype=efi
|
||
part /boot --onpart=$BOOT_PART --fstype=ext4
|
||
volgroup {vg} --useexisting --noformat
|
||
logvol swap --vgname={vg} --name=swap --fstype=swap --size=27648
|
||
logvol / --vgname={vg} --name=root --fstype=xfs --size=33792
|
||
logvol /var --vgname={vg} --name=var --fstype=xfs --size=102400
|
||
logvol /var/log --vgname={vg} --name=varlog --fstype=xfs --size=10240
|
||
PARTEOF
|
||
|
||
# Preserve or recreate data LVs
|
||
if [ "$PRESERVE_HOME" = "yes" ]; then
|
||
echo "logvol /home --vgname={vg} --name=home --useexisting --noformat" >> /tmp/part.ks
|
||
else
|
||
echo "logvol /home --vgname={vg} --name=home --fstype=xfs --size=10240" >> /tmp/part.ks
|
||
fi
|
||
|
||
if [ "$PRESERVE_SRV" = "yes" ]; then
|
||
echo "logvol /srv --vgname={vg} --name=srv --useexisting --noformat" >> /tmp/part.ks
|
||
else
|
||
echo "logvol /srv --vgname={vg} --name=srv --fstype=xfs --size=20480" >> /tmp/part.ks
|
||
fi
|
||
|
||
if [ "$PRESERVE_LONGHORN" = "yes" ]; then
|
||
echo "logvol /var/lib/longhorn --vgname={vg} --name=longhorn --useexisting --noformat" >> /tmp/part.ks
|
||
fi
|
||
|
||
else
|
||
# Fresh install
|
||
cat > /tmp/part.ks << PARTEOF
|
||
ignoredisk --only-use=$DISK
|
||
clearpart --all --initlabel --drives=$DISK
|
||
part /boot/efi --fstype=efi --size=600 --ondisk=$DISK
|
||
part /boot --fstype=ext4 --size=3072 --ondisk=$DISK
|
||
part pv.01 --size=1 --grow --ondisk=$DISK
|
||
volgroup {vg} pv.01
|
||
logvol swap --vgname={vg} --name=swap --fstype=swap --size=27648
|
||
logvol / --vgname={vg} --name=root --fstype=xfs --size=33792
|
||
logvol /var --vgname={vg} --name=var --fstype=xfs --size=102400
|
||
logvol /var/log --vgname={vg} --name=varlog --fstype=xfs --size=10240
|
||
logvol /home --vgname={vg} --name=home --fstype=xfs --size=10240
|
||
logvol /srv --vgname={vg} --name=srv --fstype=xfs --size=20480
|
||
{"logvol /var/lib/longhorn --vgname=" + vg + " --name=longhorn --fstype=xfs --grow --size=1" if has_longhorn else ""}
|
||
PARTEOF
|
||
fi
|
||
|
||
echo "=== Generated partition config ==="
|
||
cat /tmp/part.ks
|
||
echo "==================================="
|
||
|
||
bastion_progress "partitioning" "layout ready, starting install"
|
||
|
||
%end
|
||
|
||
%packages
|
||
@core
|
||
openssh-server
|
||
vim-enhanced
|
||
tmux
|
||
git
|
||
curl
|
||
wget
|
||
python3
|
||
lshw
|
||
dmidecode
|
||
dnf-plugins-core
|
||
|
||
# Networking and diagnostics
|
||
NetworkManager
|
||
bind-utils
|
||
net-tools
|
||
iproute
|
||
iputils
|
||
traceroute
|
||
tcpdump
|
||
htop
|
||
iotop
|
||
strace
|
||
jq
|
||
|
||
# k3s prerequisites
|
||
container-selinux
|
||
iptables-nft
|
||
nftables
|
||
policycoreutils-python-utils
|
||
chrony
|
||
tar
|
||
socat
|
||
conntrack-tools
|
||
ethtool
|
||
|
||
# Boot management
|
||
efibootmgr
|
||
|
||
# Puppet prerequisites
|
||
ruby
|
||
ruby-libs
|
||
|
||
# Exclude desktop
|
||
-@workstation-product
|
||
-@gnome-desktop
|
||
-gnome-shell
|
||
-gdm
|
||
-PackageKit
|
||
-PackageKit-glib
|
||
%end
|
||
|
||
%post --log=/root/bastion-post-install.log
|
||
#!/bin/bash
|
||
set -x
|
||
|
||
# Progress callback helper
|
||
bastion_progress() {{
|
||
local stage="$1" detail="${{2:-}}"
|
||
local mac=$(ip link show | awk '/ether/ && !/00:00:00:00/ {{print $2; exit}}')
|
||
curl -sf -X POST "http://{SERVER_IP}:{HTTP_PORT}/api/progress" \
|
||
-H "Content-Type: application/json" \
|
||
-d "{{\\"mac\\":\\"$mac\\",\\"stage\\":\\"$stage\\",\\"detail\\":\\"$detail\\"}}" 2>/dev/null || true
|
||
}}
|
||
|
||
bastion_progress "post-install" "configuring system"
|
||
|
||
# ── SSH ──
|
||
systemctl enable --now sshd
|
||
sed -i 's/^#\\?PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config
|
||
sed -i 's/^#\\?PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
|
||
{ssh_post_block}
|
||
|
||
# ── Hostname and domain ──
|
||
hostnamectl set-hostname {fqdn}
|
||
|
||
# ── tmpfs for /tmp ──
|
||
echo "tmpfs /tmp tmpfs defaults,noatime,nosuid,nodev,size=4G 0 0" >> /etc/fstab
|
||
|
||
# ── Kernel modules for k3s ──
|
||
cat > /etc/modules-load.d/k3s.conf << 'MODULES'
|
||
br_netfilter
|
||
overlay
|
||
ip_conntrack
|
||
MODULES
|
||
modprobe br_netfilter || true
|
||
modprobe overlay || true
|
||
|
||
# ── Sysctl for k3s networking ──
|
||
cat > /etc/sysctl.d/90-k3s.conf << 'SYSCTL'
|
||
net.bridge.bridge-nf-call-iptables = 1
|
||
net.bridge.bridge-nf-call-ip6tables = 1
|
||
net.ipv4.ip_forward = 1
|
||
net.ipv6.conf.all.forwarding = 1
|
||
fs.inotify.max_user_instances = 524288
|
||
fs.inotify.max_user_watches = 1048576
|
||
SYSCTL
|
||
sysctl --system || true
|
||
|
||
# ── Disable firewalld (k3s manages its own iptables rules) ──
|
||
systemctl disable --now firewalld || true
|
||
|
||
# ── Enable chronyd for time sync ──
|
||
systemctl enable --now chronyd
|
||
|
||
# ── Set boot order: local disk first, PXE after ──
|
||
if command -v efibootmgr >/dev/null 2>&1; then
|
||
# Find the Fedora boot entry and move it first
|
||
FEDORA_ENTRY=$(efibootmgr | grep -i fedora | head -1 | grep -oP 'Boot\\K[0-9A-F]+')
|
||
if [ -n "$FEDORA_ENTRY" ]; then
|
||
CURRENT_ORDER=$(efibootmgr | grep BootOrder | cut -d: -f2 | tr -d ' ')
|
||
# Put Fedora first, keep rest
|
||
NEW_ORDER="$FEDORA_ENTRY,$(echo "$CURRENT_ORDER" | sed "s/$FEDORA_ENTRY,\\?//;s/,$//")"
|
||
efibootmgr -o "$NEW_ORDER" || true
|
||
echo "Boot order set: Fedora first ($NEW_ORDER)"
|
||
fi
|
||
fi
|
||
|
||
# ── Provisioning metadata ──
|
||
cat > /etc/lab-provisioned << PROVEOF
|
||
hostname: {fqdn}
|
||
role: {role}
|
||
provisioned: $(date -Iseconds)
|
||
bastion: {SERVER_IP}
|
||
PROVEOF
|
||
|
||
cat > /root/README << 'README'
|
||
# Lab Node -- {fqdn} (role: {role})
|
||
#
|
||
# Next steps:
|
||
# 1. Install puppet agent:
|
||
# dnf install -y puppet-agent
|
||
#
|
||
# 2. Install k3s:
|
||
# curl -sfL https://get.k3s.io | sh -
|
||
#
|
||
# 3. Or join existing cluster:
|
||
# curl -sfL https://get.k3s.io | K3S_URL=https://<server>:6443 K3S_TOKEN=<token> sh -
|
||
README
|
||
|
||
IP_ADDR=$(ip -4 addr show | awk '/inet / && !/127.0.0/ {{split($2,a,"/"); print a[1]; exit}}')
|
||
bastion_progress "complete" "ready at $IP_ADDR"
|
||
|
||
%end
|
||
"""
|
||
|
||
# ── Pretty terminal output ────────────────────────────────────────
|
||
|
||
RESET = "\033[0m"
|
||
BOLD = "\033[1m"
|
||
GREEN = "\033[0;32m"
|
||
YELLOW = "\033[1;33m"
|
||
CYAN = "\033[0;36m"
|
||
RED = "\033[0;31m"
|
||
|
||
def print_discovery(mac, hw, is_new):
|
||
"""Print a discovered machine to the bastion terminal."""
|
||
label = "NEW MACHINE DISCOVERED" if is_new else "MACHINE RE-DISCOVERED"
|
||
color = GREEN if is_new else YELLOW
|
||
|
||
# Format disk summary
|
||
disks = hw.get('disks', [])
|
||
disk_str = ", ".join(
|
||
f"{d.get('size_gb', '?')}GB {d.get('model', '?')}"
|
||
for d in disks
|
||
) or "none detected"
|
||
|
||
# Format NIC summary
|
||
nics = hw.get('nics', [])
|
||
nic_str = ", ".join(n.get('name', '?') for n in nics) or "none"
|
||
|
||
print(f"\n{color}{BOLD}{'═' * 60}")
|
||
print(f" {label}")
|
||
print(f"{'═' * 60}{RESET}")
|
||
print(f" {BOLD}MAC:{RESET} {mac}")
|
||
print(f" {BOLD}Product:{RESET} {hw.get('manufacturer', '?')} {hw.get('product', '?')}")
|
||
print(f" {BOLD}CPU:{RESET} {hw.get('cpu_model', '?')} ({hw.get('cpu_cores', '?')} cores)")
|
||
print(f" {BOLD}RAM:{RESET} {hw.get('memory_gb', '?')} GB")
|
||
print(f" {BOLD}Arch:{RESET} {hw.get('arch', '?')}")
|
||
print(f" {BOLD}Disks:{RESET} {disk_str}")
|
||
print(f" {BOLD}NICs:{RESET} {nic_str}")
|
||
print(f" {BOLD}Serial:{RESET} {hw.get('serial', '?')}")
|
||
print()
|
||
print(f" {CYAN}To install Fedora on this machine:{RESET}")
|
||
print(f" {BOLD}bash bastion.sh install {mac} <hostname>{RESET}")
|
||
print(f"\n{'─' * 60}\n", flush=True)
|
||
|
||
def print_install_queued(mac, hostname):
|
||
print(f"\n{GREEN}{BOLD} INSTALL QUEUED{RESET}")
|
||
print(f" {mac} → hostname={BOLD}{hostname}{RESET}")
|
||
print(f" PXE boot the machine to start Fedora installation.")
|
||
print(f"\n{'─' * 60}\n", flush=True)
|
||
|
||
def print_install_started(mac, hostname):
|
||
print(f"\n{CYAN}{BOLD} INSTALL STARTED{RESET}")
|
||
print(f" {mac} → {BOLD}{hostname}{RESET}")
|
||
print(f" Serving Fedora {FEDORA_VER} installer + kickstart...")
|
||
print(f"\n{'─' * 60}\n", flush=True)
|
||
|
||
PROGRESS_ICONS = {
|
||
"partitioning": "◆",
|
||
"installing": "◆◆",
|
||
"post-install": "◆◆◆",
|
||
"complete": "✔",
|
||
"error": "✘",
|
||
}
|
||
|
||
def print_progress(mac, stage, detail=""):
|
||
icon = PROGRESS_ICONS.get(stage, "·")
|
||
color = GREEN if stage == "complete" else (RED if stage == "error" else YELLOW)
|
||
detail_str = f" -- {detail}" if detail else ""
|
||
print(f" {color}{icon}{RESET} {mac} {BOLD}{stage}{RESET}{detail_str}", flush=True)
|
||
if stage == "complete" and detail:
|
||
ip = detail.replace("ready at ", "").strip()
|
||
if ip:
|
||
admin = ADMIN_USER or "root"
|
||
print(f"\n {GREEN}{BOLD} ssh {admin}@{ip}{RESET}\n", flush=True)
|
||
|
||
# ── HTTP Handler ──────────────────────────────────────────────────
|
||
|
||
class BastionHandler(SimpleHTTPRequestHandler):
|
||
protocol_version = "HTTP/1.1"
|
||
|
||
def __init__(self, *args, **kwargs):
|
||
super().__init__(*args, directory=HTTP_DIR, **kwargs)
|
||
|
||
def log_message(self, format, *args):
|
||
"""Log HTTP requests to help debug boot issues."""
|
||
print(f" HTTP: {self.client_address[0]} {self.command} {self.path}", flush=True)
|
||
|
||
def send_text(self, code, text, content_type="text/plain"):
|
||
data = text.encode()
|
||
self.send_response(code)
|
||
self.send_header("Content-Type", content_type)
|
||
self.send_header("Content-Length", str(len(data)))
|
||
self.send_header("Connection", "close")
|
||
self.end_headers()
|
||
self.wfile.write(data)
|
||
|
||
def send_json(self, code, data):
|
||
self.send_text(code, json.dumps(data, indent=2), "application/json")
|
||
|
||
def do_GET(self):
|
||
parsed = urlparse(self.path)
|
||
|
||
# ── iPXE dispatch: route to discover or install based on MAC ──
|
||
if parsed.path == "/dispatch":
|
||
params = parse_qs(parsed.query)
|
||
mac = params.get("mac", [""])[0].lower().replace("-", ":")
|
||
state = load_state()
|
||
|
||
if mac in state.get("install_queue", {}):
|
||
cfg = state["install_queue"][mac]
|
||
hostname = cfg.get("hostname", "lab-node")
|
||
print_install_started(mac, hostname)
|
||
script = f"""#!ipxe
|
||
|
||
echo
|
||
echo =============================================
|
||
echo Lab PXE Bastion - INSTALLING Fedora {FEDORA_VER}
|
||
echo Target: {hostname}
|
||
echo MAC: {mac}
|
||
echo =============================================
|
||
echo
|
||
|
||
kernel http://{SERVER_IP}:{HTTP_PORT}/vmlinuz inst.ks=http://{SERVER_IP}:{HTTP_PORT}/ks?mac={mac} inst.repo={FEDORA_MIRROR} inst.text
|
||
initrd http://{SERVER_IP}:{HTTP_PORT}/initrd.img
|
||
boot
|
||
"""
|
||
self.send_text(200, script)
|
||
|
||
elif mac in state.get("installed", {}):
|
||
info = state["installed"][mac]
|
||
hostname = info.get("hostname", "?")
|
||
print(f" {GREEN}PXE request from {mac} ({hostname}) - already installed, booting local disk{RESET}", flush=True)
|
||
script = f"""#!ipxe
|
||
|
||
echo
|
||
echo =============================================
|
||
echo Lab PXE Bastion - {hostname}
|
||
echo Already installed, booting from local disk
|
||
echo =============================================
|
||
echo
|
||
sleep 3
|
||
exit
|
||
"""
|
||
self.send_text(200, script)
|
||
|
||
else:
|
||
print(f" {YELLOW}PXE request from {mac} → discovery mode{RESET}", flush=True)
|
||
script = f"""#!ipxe
|
||
|
||
echo
|
||
echo =============================================
|
||
echo Lab PXE Bastion - DISCOVERY MODE
|
||
echo MAC: {mac}
|
||
echo Collecting hardware info...
|
||
echo =============================================
|
||
echo
|
||
|
||
kernel http://{SERVER_IP}:{HTTP_PORT}/vmlinuz inst.ks=http://{SERVER_IP}:{HTTP_PORT}/discover.ks inst.stage2={FEDORA_MIRROR} inst.text
|
||
initrd http://{SERVER_IP}:{HTTP_PORT}/initrd.img
|
||
boot
|
||
"""
|
||
self.send_text(200, script)
|
||
return
|
||
|
||
# ── Per-MAC kickstart for install mode ──
|
||
if parsed.path == "/ks":
|
||
params = parse_qs(parsed.query)
|
||
mac = params.get("mac", [""])[0].lower().replace("-", ":")
|
||
state = load_state()
|
||
cfg = state.get("install_queue", {}).get(mac, {})
|
||
ks = generate_kickstart(
|
||
hostname=cfg.get("hostname", "lab-node"),
|
||
disk=cfg.get("disk", ""),
|
||
ssh_keys=SSH_KEYS,
|
||
domain=DOMAIN,
|
||
role=cfg.get("role", "worker"),
|
||
admin_user=ADMIN_USER,
|
||
)
|
||
self.send_text(200, ks)
|
||
return
|
||
|
||
# ── API: list machines ──
|
||
if parsed.path == "/api/machines":
|
||
self.send_json(200, load_state())
|
||
return
|
||
|
||
# ── iPXE EFI binaries (for UEFI HTTP Boot) ──
|
||
if parsed.path in ("/ipxe.efi", "/ipxe-real.efi", "/ipxe-arm64.efi"):
|
||
tftp_dir = os.path.join(os.path.dirname(HTTP_DIR), "tftp")
|
||
fpath = os.path.join(tftp_dir, parsed.path.lstrip("/"))
|
||
if os.path.isfile(fpath):
|
||
self.send_response(200)
|
||
self.send_header("Content-Type", "application/efi")
|
||
self.send_header("Content-Length", str(os.path.getsize(fpath)))
|
||
self.end_headers()
|
||
with open(fpath, "rb") as f:
|
||
self.wfile.write(f.read())
|
||
return
|
||
|
||
# ── Static files (vmlinuz, initrd, discover.ks, etc.) ──
|
||
super().do_GET()
|
||
|
||
def do_POST(self):
|
||
parsed = urlparse(self.path)
|
||
content_length = int(self.headers.get("Content-Length", 0))
|
||
body = self.rfile.read(content_length)
|
||
|
||
# ── Discovery report from PXE-booted machine ──
|
||
if parsed.path == "/api/discover":
|
||
try:
|
||
data = json.loads(body)
|
||
except json.JSONDecodeError:
|
||
self.send_json(400, {"error": "invalid JSON"})
|
||
return
|
||
|
||
mac = data.get("mac", "unknown").lower()
|
||
data["last_seen"] = datetime.now().isoformat()
|
||
|
||
state = load_state()
|
||
is_new = mac not in state.get("discovered", {})
|
||
if is_new:
|
||
data["first_seen"] = data["last_seen"]
|
||
else:
|
||
data["first_seen"] = state["discovered"][mac].get("first_seen", data["last_seen"])
|
||
|
||
state.setdefault("discovered", {})[mac] = data
|
||
save_state(state)
|
||
|
||
print_discovery(mac, data, is_new)
|
||
|
||
self.send_json(200, {"status": "ok", "mac": mac, "new": is_new})
|
||
return
|
||
|
||
# ── Queue a machine for install ──
|
||
if parsed.path == "/api/install":
|
||
try:
|
||
data = json.loads(body)
|
||
except json.JSONDecodeError:
|
||
self.send_json(400, {"error": "invalid JSON"})
|
||
return
|
||
|
||
mac = data.get("mac", "").lower().replace("-", ":")
|
||
hostname = data.get("hostname", "lab-node")
|
||
disk = data.get("disk", "")
|
||
role = data.get("role", "worker")
|
||
|
||
if not mac:
|
||
self.send_json(400, {"error": "mac is required"})
|
||
return
|
||
|
||
if role not in ("worker", "infra"):
|
||
self.send_json(400, {"error": "role must be 'worker' or 'infra'"})
|
||
return
|
||
|
||
state = load_state()
|
||
state.setdefault("install_queue", {})[mac] = {
|
||
"hostname": hostname,
|
||
"disk": disk,
|
||
"role": role,
|
||
"queued_at": datetime.now().isoformat(),
|
||
}
|
||
save_state(state)
|
||
|
||
print_install_queued(mac, hostname)
|
||
|
||
self.send_json(200, {
|
||
"status": "queued",
|
||
"mac": mac,
|
||
"hostname": hostname,
|
||
"role": role,
|
||
"message": f"PXE boot the machine to start installation (role={role})",
|
||
})
|
||
return
|
||
|
||
# ── Install progress callback from kickstart ──
|
||
if parsed.path == "/api/progress":
|
||
try:
|
||
data = json.loads(body)
|
||
except json.JSONDecodeError:
|
||
self.send_json(400, {"error": "invalid JSON"})
|
||
return
|
||
|
||
mac = data.get("mac", "unknown").lower()
|
||
stage = data.get("stage", "unknown")
|
||
detail = data.get("detail", "")
|
||
|
||
print_progress(mac, stage, detail)
|
||
|
||
# Update state with progress
|
||
state = load_state()
|
||
if mac in state.get("install_queue", {}):
|
||
state["install_queue"][mac]["progress"] = stage
|
||
state["install_queue"][mac]["progress_at"] = datetime.now().isoformat()
|
||
if detail:
|
||
state["install_queue"][mac]["progress_detail"] = detail
|
||
|
||
# Move to installed on completion
|
||
if stage == "complete":
|
||
cfg = state["install_queue"].pop(mac)
|
||
ip = detail.replace("ready at ", "").strip() if detail else ""
|
||
state.setdefault("installed", {})[mac] = {
|
||
"hostname": cfg.get("hostname", "?"),
|
||
"role": cfg.get("role", "?"),
|
||
"ip": ip,
|
||
"installed_at": datetime.now().isoformat(),
|
||
}
|
||
|
||
save_state(state)
|
||
|
||
self.send_json(200, {"status": "ok"})
|
||
return
|
||
|
||
self.send_json(404, {"error": "not found"})
|
||
|
||
|
||
def run_server():
|
||
server = HTTPServer(("0.0.0.0", HTTP_PORT), BastionHandler)
|
||
print(f"HTTP server listening on :{HTTP_PORT}", flush=True)
|
||
server.serve_forever()
|
||
|
||
|
||
if __name__ == "__main__":
|
||
run_server()
|
||
PYSERVER
|
||
|
||
# ──── Generate dnsmasq config ─────────────────────────────────────
|
||
# ──── Generate dnsmasq config ─────────────────────────────────────
|
||
# Derive DHCP range for full mode
|
||
if [[ "$DHCP_MODE" == "full" ]]; then
|
||
DHCP_RANGE_START="${DHCP_RANGE_START:-${NETWORK%.*}.100}"
|
||
DHCP_RANGE_END="${DHCP_RANGE_END:-${NETWORK%.*}.200}"
|
||
fi
|
||
|
||
cat > "$BASTION_DIR/dnsmasq.conf" << DNSMASQ
|
||
# Lab PXE Bastion — dnsmasq config
|
||
|
||
# Disable DNS (we only want DHCP/TFTP)
|
||
port=0
|
||
|
||
# Listen on the right interface
|
||
interface=${IFACE}
|
||
bind-dynamic
|
||
|
||
$(if [[ "$DHCP_MODE" == "full" ]]; then
|
||
cat << FULL_DHCP
|
||
# Full DHCP mode — bastion is the only DHCP server on this network
|
||
dhcp-range=${DHCP_RANGE_START},${DHCP_RANGE_END},255.255.255.0,12h
|
||
dhcp-option=3,${GATEWAY}
|
||
dhcp-option=6,${GATEWAY}
|
||
FULL_DHCP
|
||
else
|
||
cat << PROXY_DHCP
|
||
# ProxyDHCP — works alongside existing DHCP (UniFi etc)
|
||
dhcp-range=${NETWORK},proxy
|
||
PROXY_DHCP
|
||
fi)
|
||
|
||
# TFTP for initial PXE boot
|
||
enable-tftp
|
||
tftp-root=${TFTPDIR}
|
||
tftp-no-blocksize
|
||
|
||
# Detect client architecture — PXE (TFTP) clients
|
||
dhcp-match=set:bios,option:client-arch,0
|
||
dhcp-match=set:efi-x86_64,option:client-arch,7
|
||
dhcp-match=set:efi-x86_64,option:client-arch,9
|
||
dhcp-match=set:efi-arm64,option:client-arch,11
|
||
|
||
# Detect client architecture — UEFI HTTP Boot clients (no TFTP size limit)
|
||
dhcp-match=set:httpboot-x86_64,option:client-arch,16
|
||
dhcp-match=set:httpboot-arm64,option:client-arch,20
|
||
|
||
# Detect iPXE clients (already chainloaded)
|
||
dhcp-userclass=set:ipxe,iPXE
|
||
|
||
# UEFI HTTP Boot → serve full iPXE EFI via HTTP (no TFTP size limit)
|
||
dhcp-boot=tag:httpboot-x86_64,http://${SERVER_IP}:${HTTP_PORT}/ipxe-real.efi
|
||
dhcp-boot=tag:httpboot-arm64,http://${SERVER_IP}:${HTTP_PORT}/ipxe-arm64.efi
|
||
# Echo vendor class back to HTTP Boot clients (required by UEFI HTTP Boot spec)
|
||
dhcp-option-force=tag:httpboot-x86_64,60,HTTPClient
|
||
dhcp-option-force=tag:httpboot-arm64,60,HTTPClient
|
||
|
||
# First PXE boot → serve iPXE binary via TFTP (BIOS and UEFI fallback)
|
||
dhcp-boot=tag:bios,tag:!ipxe,undionly.kpxe
|
||
dhcp-boot=tag:efi-x86_64,tag:!ipxe,ipxe.efi
|
||
dhcp-boot=tag:efi-arm64,tag:!ipxe,ipxe-arm64.efi
|
||
|
||
# iPXE clients → chain to boot script via HTTP
|
||
dhcp-boot=tag:ipxe,http://${SERVER_IP}:${HTTP_PORT}/boot.ipxe
|
||
|
||
# PXE service directives (needed for proxy DHCP to respond properly)
|
||
pxe-service=tag:!ipxe,x86PC,"PXE Boot",undionly.kpxe
|
||
pxe-service=tag:!ipxe,X86-64_EFI,"PXE Boot",ipxe.efi
|
||
pxe-service=tag:!ipxe,BC_EFI,"PXE Boot",ipxe.efi
|
||
pxe-service=tag:!ipxe,ARM64_EFI,"PXE Boot",ipxe-arm64.efi
|
||
|
||
# Verbose logging
|
||
log-dhcp
|
||
DNSMASQ
|
||
|
||
# ──── Open firewall ──────────────────────────────────────────────
|
||
if command -v firewall-cmd >/dev/null && firewall-cmd --state >/dev/null 2>&1; then
|
||
# Detect the zone for our interface (may differ from default zone)
|
||
FW_ZONE="$(firewall-cmd --get-zone-of-interface="${IFACE}" 2>/dev/null || echo "")"
|
||
FW_ZONE_FLAG=""
|
||
[[ -n "$FW_ZONE" ]] && FW_ZONE_FLAG="--zone=${FW_ZONE}"
|
||
log "Opening firewall ports (DHCP, TFTP, HTTP:${HTTP_PORT})${FW_ZONE:+ in zone ${FW_ZONE}}..."
|
||
firewall-cmd --quiet ${FW_ZONE_FLAG} --add-service=dhcp
|
||
firewall-cmd --quiet ${FW_ZONE_FLAG} --add-service=tftp
|
||
firewall-cmd --quiet ${FW_ZONE_FLAG} --add-port=${HTTP_PORT}/tcp
|
||
firewall-cmd --quiet ${FW_ZONE_FLAG} --add-port=4011/udp 2>/dev/null || true
|
||
FW_OPENED=true
|
||
fi
|
||
|
||
# ──── Stop conflicting services ───────────────────────────────────
|
||
if systemctl is-active --quiet dnsmasq 2>/dev/null; then
|
||
warn "System dnsmasq is running — stopping it temporarily"
|
||
systemctl stop dnsmasq
|
||
fi
|
||
|
||
# ──── Start HTTP server ──────────────────────────────────────────
|
||
log "Starting HTTP server on :${HTTP_PORT}..."
|
||
python3 "$BASTION_DIR/server.py" \
|
||
"$HTTPDIR" \
|
||
"$STATEFILE" \
|
||
"$SERVER_IP" \
|
||
"$HTTP_PORT" \
|
||
"$FEDORA_VERSION" \
|
||
"$FEDORA_MIRROR" \
|
||
"$BASTION_DIR/ssh_keys" \
|
||
"$TIMEZONE" \
|
||
"$LOCALE" \
|
||
"$DOMAIN" \
|
||
"$ADMIN_USER" &
|
||
HTTP_PID=$!
|
||
sleep 1
|
||
|
||
if ! kill -0 "$HTTP_PID" 2>/dev/null; then
|
||
die "HTTP server failed to start — is port ${HTTP_PORT} in use?"
|
||
fi
|
||
|
||
# ──── Start dnsmasq ──────────────────────────────────────────────
|
||
log "Starting PXE server (proxyDHCP on ${IFACE})..."
|
||
echo ""
|
||
echo -e "${CYAN}${BOLD}════════════════════════════════════════════════════════════${NC}"
|
||
echo -e "${CYAN}${BOLD} Lab PXE Bastion — Discovery Mode${NC}"
|
||
echo -e "${CYAN}${BOLD}════════════════════════════════════════════════════════════${NC}"
|
||
echo ""
|
||
echo -e " Network: ${BOLD}${NETWORK}/24${NC} via ${BOLD}${IFACE}${NC}"
|
||
echo -e " DHCP: ${BOLD}${DHCP_MODE}${NC}$(if [[ "$DHCP_MODE" == "full" ]]; then echo " (${DHCP_RANGE_START}–${DHCP_RANGE_END})"; else echo " (alongside existing DHCP)"; fi)"
|
||
echo -e " HTTP: ${BOLD}http://${SERVER_IP}:${HTTP_PORT}/${NC}"
|
||
echo -e " OS: ${BOLD}Fedora ${FEDORA_VERSION} (${ARCH})${NC}"
|
||
echo -e " Domain: ${BOLD}${DOMAIN}${NC}"
|
||
echo -e " State: ${BOLD}${STATEFILE}${NC}"
|
||
echo ""
|
||
echo -e " ${YELLOW}PXE boot any machine on this network.${NC}"
|
||
echo -e " ${YELLOW}It will be inventoried and powered off automatically.${NC}"
|
||
echo ""
|
||
echo -e " Commands (from another terminal):"
|
||
echo -e " ${BOLD}bash bastion.sh list${NC} — show machines"
|
||
echo -e " ${BOLD}bash bastion.sh install <mac> <hostname>${NC} — queue install"
|
||
echo ""
|
||
echo -e " Press ${BOLD}Ctrl-C${NC} to stop."
|
||
echo ""
|
||
echo -e "${CYAN}──── Waiting for PXE boot requests... ────${NC}"
|
||
echo ""
|
||
|
||
dnsmasq --no-daemon --conf-file="$BASTION_DIR/dnsmasq.conf" &
|
||
DNSMASQ_PID=$!
|
||
|
||
wait "$DNSMASQ_PID" || {
|
||
err "dnsmasq exited unexpectedly. Check if another DHCP/TFTP service is running."
|
||
err "Try: ss -ulnp | grep -E ':(67|69|4011) '"
|
||
exit 1
|
||
}
|