fix: revert kickstart to near-original baseline (Step 0 — boots clean)
Reverted install.ks.ts to near-original state from commit 64533b2.
This is the bisection baseline — 21/22 integration tests pass,
0 failed systemd services, SSH works, /boot/efi mounts.
Removed all accumulated fixes that collectively broke boot:
- ERR trap, background log streamer, bastion_log/bastion_error
- depmod rebuild, nofail on /boot/efi, SELinux autorelabel
- chcon/restorecon for /etc /var /root
- kernel-modules and dosfstools packages
Kept from current branch:
- rootpw --plaintext lab-root-pw (console debug access)
- Network-first boot order (bastion controls boot)
- Vanilla role support, rancher partition support
- Boot screenshots during SSH wait (1/sec rolling buffer)
- Test runner script (run-pxe-test.sh)
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -74,9 +74,8 @@ cp /root/.ssh/authorized_keys "$ADMIN_HOME/.ssh/authorized_keys"
|
||||
chown -R ${adminUser}:${adminUser} "$ADMIN_HOME/.ssh"
|
||||
chmod 600 "$ADMIN_HOME/.ssh/authorized_keys"
|
||||
|
||||
# Fix SELinux contexts for SSH (restorecon may not work in Anaconda chroot, use chcon)
|
||||
chcon -R -t ssh_home_t /root/.ssh "$ADMIN_HOME/.ssh" 2>/dev/null || true
|
||||
chcon -t user_home_dir_t "$ADMIN_HOME" 2>/dev/null || true
|
||||
# Fix SELinux contexts for SSH
|
||||
restorecon -R /root/.ssh "$ADMIN_HOME/.ssh" 2>/dev/null || true
|
||||
|
||||
# Passwordless sudo for ${adminUser}
|
||||
echo '${adminUser} ALL=(ALL) NOPASSWD: ALL' > /etc/sudoers.d/${adminUser}
|
||||
@@ -102,48 +101,6 @@ done
|
||||
? `logvol /var/lib/rancher --vgname=${vg} --name=rancher --fstype=xfs --size=20480`
|
||||
: "";
|
||||
|
||||
// Helper: the bastion callback functions used in both %pre and %post.
|
||||
// Defined as a template so each section gets its own copy (they run in different shells).
|
||||
const bastionHelpers = `
|
||||
# Detect MAC address (first real ethernet MAC, skip loopback/veth)
|
||||
_BASTION_MAC=$(ip link show | awk '/ether/ && !/00:00:00:00/ {print $2; exit}')
|
||||
_BASTION_URL="http://${serverIp}:${httpPort}"
|
||||
|
||||
# Send a structured progress stage to bastion
|
||||
bastion_progress() {
|
||||
local stage="$1" detail="\${2:-}"
|
||||
curl -sf -X POST "\${_BASTION_URL}/api/progress" \\
|
||||
-H "Content-Type: application/json" \\
|
||||
-d "{\\"mac\\":\\"$_BASTION_MAC\\",\\"stage\\":\\"$stage\\",\\"detail\\":\\"$detail\\"}" \\
|
||||
--connect-timeout 5 --max-time 10 2>/dev/null || true
|
||||
}
|
||||
|
||||
# Send log lines to bastion (batched)
|
||||
bastion_log() {
|
||||
local line="$1"
|
||||
curl -sf -X POST "\${_BASTION_URL}/api/log" \\
|
||||
-H "Content-Type: application/json" \\
|
||||
-d "{\\"mac\\":\\"$_BASTION_MAC\\",\\"line\\":\\"$(echo "$line" | sed 's/\\\\/\\\\\\\\/g; s/"/\\\\"/g')\\"}\" \\
|
||||
--connect-timeout 5 --max-time 10 2>/dev/null || true
|
||||
}
|
||||
|
||||
# Send an error stage to bastion with context
|
||||
bastion_error() {
|
||||
local detail="$1"
|
||||
bastion_progress "error" "$detail"
|
||||
# Also send the last 50 lines of any log file as context
|
||||
for logfile in /root/bastion-post-install.log /tmp/pre-partition.log; do
|
||||
if [ -f "$logfile" ]; then
|
||||
local tail_content
|
||||
tail_content=$(tail -50 "$logfile" 2>/dev/null | sed 's/\\\\/\\\\\\\\/g; s/"/\\\\"/g; s/$/\\\\n/' | tr -d '\\n')
|
||||
curl -sf -X POST "\${_BASTION_URL}/api/log" \\
|
||||
-H "Content-Type: application/json" \\
|
||||
-d "{\\"mac\\":\\"$_BASTION_MAC\\",\\"lines\\":[\\"--- $logfile (last 50 lines) ---\\"],\\"tail\\":\\"$tail_content\\"}" \\
|
||||
--connect-timeout 5 --max-time 10 2>/dev/null || true
|
||||
fi
|
||||
done
|
||||
}`;
|
||||
|
||||
return `# Lab Bastion -- Fedora ${fedoraVersion} server install
|
||||
# Generated: ${now}
|
||||
# Target: ${fqdn} (role=${role})
|
||||
@@ -170,25 +127,27 @@ url --mirrorlist=https://mirrors.fedoraproject.org/mirrorlist?repo=fedora-$relea
|
||||
%pre --log=/tmp/pre-partition.log
|
||||
#!/bin/bash
|
||||
set -x
|
||||
${bastionHelpers}
|
||||
|
||||
# Error trap: report failures back to bastion
|
||||
trap 'bastion_error "%pre failed at line $LINENO: $(tail -1 /tmp/pre-partition.log 2>/dev/null)"' ERR
|
||||
# Progress callback helper
|
||||
bastion_progress() {
|
||||
local stage="$1" detail="\${2:-}"
|
||||
local mac=$(ip link show | awk '/ether/ && !/00:00:00:00/ {print $2; exit}')
|
||||
curl -sf -X POST "http://${serverIp}:${httpPort}/api/progress" \\
|
||||
-H "Content-Type: application/json" \\
|
||||
-d "{\\"mac\\":\\"$mac\\",\\"stage\\":\\"$stage\\",\\"detail\\":\\"$detail\\"}" 2>/dev/null || true
|
||||
}
|
||||
|
||||
bastion_progress "partitioning" "detecting disk"
|
||||
|
||||
VG="${vg}"
|
||||
${diskLine}
|
||||
|
||||
bastion_log "disk detected: $DISK"
|
||||
|
||||
REPROVISION=no
|
||||
|
||||
# Check if VG exists (reprovision scenario)
|
||||
if vgs $VG &>/dev/null; then
|
||||
echo "=== Existing VG found - reprovision mode ==="
|
||||
REPROVISION=yes
|
||||
bastion_progress "partitioning" "reprovision mode -- preserving data volumes"
|
||||
|
||||
# Detect which data LVs to preserve
|
||||
PRESERVE_LONGHORN=no; PRESERVE_SRV=no; PRESERVE_HOME=no; PRESERVE_RANCHER=no
|
||||
@@ -198,7 +157,6 @@ if vgs $VG &>/dev/null; then
|
||||
lvs $VG/rancher &>/dev/null && PRESERVE_RANCHER=yes
|
||||
|
||||
echo "Preserving: longhorn=$PRESERVE_LONGHORN srv=$PRESERVE_SRV home=$PRESERVE_HOME rancher=$PRESERVE_RANCHER"
|
||||
bastion_log "preserving LVs: longhorn=$PRESERVE_LONGHORN srv=$PRESERVE_SRV home=$PRESERVE_HOME rancher=$PRESERVE_RANCHER"
|
||||
|
||||
# Remove only OS logical volumes (keep data LVs)
|
||||
for lv in root var varlog swap; do
|
||||
@@ -275,13 +233,11 @@ cat /tmp/part.ks
|
||||
echo "==================================="
|
||||
|
||||
bastion_progress "partitioning" "disk layout ready"
|
||||
bastion_log "partition config written to /tmp/part.ks"
|
||||
|
||||
%end
|
||||
|
||||
%packages
|
||||
@core
|
||||
kernel-modules
|
||||
openssh-server
|
||||
vim-enhanced
|
||||
tmux
|
||||
@@ -331,106 +287,38 @@ ruby-libs
|
||||
-gdm
|
||||
-PackageKit
|
||||
-PackageKit-glib
|
||||
dosfstools
|
||||
%end
|
||||
|
||||
%post --log=/root/bastion-post-install.log
|
||||
#!/bin/bash
|
||||
set -x
|
||||
${bastionHelpers}
|
||||
|
||||
# --- Error trap: catch any failure and report to bastion ---
|
||||
_post_error_handler() {
|
||||
local exit_code=$? lineno=$1
|
||||
bastion_error "%post failed at line $lineno (exit $exit_code)"
|
||||
}
|
||||
trap '_post_error_handler $LINENO' ERR
|
||||
|
||||
# --- Background log streamer: sends %post output to bastion in real-time ---
|
||||
_LOG_FILE=/root/bastion-post-install.log
|
||||
_LOG_STREAMER_PID=""
|
||||
(
|
||||
# Wait for the log file to exist
|
||||
while [ ! -f "$_LOG_FILE" ]; do sleep 1; done
|
||||
# Tail and batch-send lines every 3 seconds
|
||||
_batch=""
|
||||
_count=0
|
||||
tail -f "$_LOG_FILE" 2>/dev/null | while IFS= read -r _line; do
|
||||
# Escape for JSON
|
||||
_escaped=$(echo "$_line" | sed 's/\\\\/\\\\\\\\/g; s/"/\\\\"/g; s/\\t/\\\\t/g')
|
||||
if [ -z "$_batch" ]; then
|
||||
_batch="\\"$_escaped\\""
|
||||
else
|
||||
_batch="$_batch,\\"$_escaped\\""
|
||||
fi
|
||||
_count=$((_count + 1))
|
||||
# Send batch every 10 lines
|
||||
if [ "$_count" -ge 10 ]; then
|
||||
curl -sf -X POST "\${_BASTION_URL}/api/log" \\
|
||||
# Progress callback helper
|
||||
bastion_progress() {
|
||||
local stage="$1" detail="\${2:-}"
|
||||
local mac=$(ip link show | awk '/ether/ && !/00:00:00:00/ {print $2; exit}')
|
||||
curl -sf -X POST "http://${serverIp}:${httpPort}/api/progress" \\
|
||||
-H "Content-Type: application/json" \\
|
||||
-d "{\\"mac\\":\\"$_BASTION_MAC\\",\\"lines\\":[$_batch]}" \\
|
||||
--connect-timeout 5 --max-time 10 2>/dev/null || true
|
||||
_batch=""
|
||||
_count=0
|
||||
fi
|
||||
done
|
||||
) &
|
||||
_LOG_STREAMER_PID=$!
|
||||
|
||||
# Flush remaining log lines helper
|
||||
_flush_log_streamer() {
|
||||
if [ -n "$_LOG_STREAMER_PID" ]; then
|
||||
kill "$_LOG_STREAMER_PID" 2>/dev/null || true
|
||||
wait "$_LOG_STREAMER_PID" 2>/dev/null || true
|
||||
fi
|
||||
# Send any remaining lines from the log
|
||||
if [ -f "$_LOG_FILE" ]; then
|
||||
local remaining
|
||||
remaining=$(tail -20 "$_LOG_FILE" 2>/dev/null | sed 's/\\\\/\\\\\\\\/g; s/"/\\\\"/g; s/\\t/\\\\t/g; s/^/"/; s/$/"/' | paste -sd, -)
|
||||
if [ -n "$remaining" ]; then
|
||||
curl -sf -X POST "\${_BASTION_URL}/api/log" \\
|
||||
-H "Content-Type: application/json" \\
|
||||
-d "{\\"mac\\":\\"$_BASTION_MAC\\",\\"lines\\":[$remaining]}" \\
|
||||
--connect-timeout 5 --max-time 10 2>/dev/null || true
|
||||
fi
|
||||
fi
|
||||
-d "{\\"mac\\":\\"$mac\\",\\"stage\\":\\"$stage\\",\\"detail\\":\\"$detail\\"}" 2>/dev/null || true
|
||||
}
|
||||
|
||||
bastion_progress "installing" "packages installed, starting post-install"
|
||||
bastion_progress "post-install" "configuring system"
|
||||
|
||||
# -- SSH --
|
||||
bastion_progress "post-install" "configuring SSH"
|
||||
systemctl enable sshd
|
||||
systemctl enable --now sshd
|
||||
sed -i 's/^#\\?PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config
|
||||
sed -i 's/^#\\?PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
|
||||
${sshPostBlock}
|
||||
bastion_log "SSH configured: root login by key only, password auth disabled"
|
||||
|
||||
# -- Hostname and domain --
|
||||
bastion_progress "post-install" "setting hostname to ${fqdn}"
|
||||
hostnamectl set-hostname ${fqdn}
|
||||
|
||||
# -- Rebuild module dependencies (Anaconda's depmod runs against host kernel, not installed kernel) --
|
||||
INSTALLED_KVER=$(ls /lib/modules/ | grep -v "$(uname -r)" | head -1)
|
||||
if [ -n "$INSTALLED_KVER" ]; then
|
||||
depmod -a "$INSTALLED_KVER"
|
||||
bastion_log "depmod rebuilt for kernel $INSTALLED_KVER"
|
||||
fi
|
||||
|
||||
# Make /boot/efi mount non-fatal — on first boot SELinux labels on kernel module
|
||||
# files are wrong (Anaconda chroot issue), so vfat may fail to load.
|
||||
# autorelabel fixes labels and reboots; second boot mounts /boot/efi normally.
|
||||
sed -i '/boot\\/efi/ s/defaults/defaults,nofail/' /etc/fstab
|
||||
bastion_log "fstab /boot/efi set to nofail"
|
||||
|
||||
# -- tmpfs for /tmp --
|
||||
echo "tmpfs /tmp tmpfs defaults,noatime,nosuid,nodev,size=4G 0 0" >> /etc/fstab
|
||||
|
||||
${isVanilla ? `# -- vanilla role: skip k3s kernel/sysctl/firewall setup --
|
||||
bastion_progress "post-install" "vanilla role -- skipping k3s setup"
|
||||
# -- Enable chronyd for time sync --
|
||||
systemctl enable chronyd || true` : `# -- Kernel modules for k3s --
|
||||
bastion_progress "post-install" "loading k3s kernel modules"
|
||||
cat > /etc/modules-load.d/k3s.conf << 'MODULES'
|
||||
br_netfilter
|
||||
overlay
|
||||
@@ -440,7 +328,6 @@ modprobe br_netfilter || true
|
||||
modprobe overlay || true
|
||||
|
||||
# -- Sysctl for k3s networking --
|
||||
bastion_progress "post-install" "configuring k3s sysctl"
|
||||
cat > /etc/sysctl.d/90-k3s.conf << 'SYSCTL'
|
||||
net.bridge.bridge-nf-call-iptables = 1
|
||||
net.bridge.bridge-nf-call-ip6tables = 1
|
||||
@@ -452,48 +339,30 @@ SYSCTL
|
||||
sysctl --system || true
|
||||
|
||||
# -- Disable firewalld permanently (k3s/Cilium manage iptables directly) --
|
||||
bastion_progress "post-install" "disabling firewalld"
|
||||
# Must be masked to prevent re-enable on updates
|
||||
systemctl disable --now firewalld || true
|
||||
systemctl mask firewalld || true
|
||||
|
||||
# -- Enable chronyd for time sync --
|
||||
systemctl enable chronyd || true`}
|
||||
|
||||
# -- Serial console (for debugging — auto-login as root on ttyS0) --
|
||||
systemctl enable serial-getty@ttyS0.service || true
|
||||
|
||||
# -- Boot order: restore network first (Anaconda sets disk first, we undo it) --
|
||||
# Network boot must stay first so the bastion intercepts every reboot. It returns
|
||||
# exit (local disk) for installed machines, or install for reinstalls.
|
||||
bastion_progress "post-install" "restoring network-first boot order"
|
||||
# Network boot must stay first so the bastion intercepts every reboot.
|
||||
if command -v efibootmgr >/dev/null 2>&1; then
|
||||
# Find network/PXE/HTTP boot entries (OVMF uses HTTPv4, real hardware uses PXE/Network)
|
||||
PXE_ENTRY=$(efibootmgr | grep -iE 'network|pxe|ipv4|ipv6|http' | head -1 | grep -oP 'Boot\\K[0-9A-F]+')
|
||||
if [ -n "$PXE_ENTRY" ]; then
|
||||
CURRENT_ORDER=$(efibootmgr | grep BootOrder | cut -d: -f2 | tr -d ' ')
|
||||
# Move PXE entry to front
|
||||
REST=$(echo "$CURRENT_ORDER" | sed "s/$PXE_ENTRY,\\\\?//;s/,$//" | sed 's/^,//')
|
||||
NEW_ORDER="$PXE_ENTRY,$REST"
|
||||
efibootmgr -o "$NEW_ORDER" || true
|
||||
bastion_log "boot order set: network first ($NEW_ORDER)"
|
||||
else
|
||||
bastion_log "no PXE boot entry found, boot order unchanged"
|
||||
fi
|
||||
else
|
||||
bastion_log "efibootmgr not available"
|
||||
fi
|
||||
|
||||
# -- Provisioning metadata --
|
||||
bastion_progress "post-install" "writing provisioning metadata"
|
||||
IP_ADDR=$(ip -4 addr show | awk '/inet / && !/127.0.0/ {split($2,a,"/"); print a[1]; exit}')
|
||||
|
||||
cat > /etc/lab-provisioned << PROVEOF
|
||||
hostname: ${fqdn}
|
||||
role: ${role}
|
||||
provisioned: $(date -Iseconds)
|
||||
bastion: ${serverIp}
|
||||
ip: $IP_ADDR
|
||||
PROVEOF
|
||||
|
||||
cat > /root/README << 'README'
|
||||
@@ -511,17 +380,9 @@ cat > /root/README << 'README'
|
||||
README
|
||||
|
||||
${hasRancher ? `# Install k3s server (skip start - will be configured manually)
|
||||
bastion_progress "post-install" "pre-installing k3s server"
|
||||
curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true sh -
|
||||
bastion_log "k3s server pre-installed (not started)"
|
||||
` : ""}
|
||||
# -- Fix SELinux labels (Anaconda %post creates files with wrong contexts) --
|
||||
restorecon -R /etc /var /root 2>/dev/null || true
|
||||
bastion_log "SELinux contexts restored for /etc /var /root"
|
||||
|
||||
# Stop log streamer and flush remaining lines
|
||||
_flush_log_streamer
|
||||
|
||||
IP_ADDR=$(ip -4 addr show | awk '/inet / && !/127.0.0/ {split($2,a,"/"); print a[1]; exit}')
|
||||
bastion_progress "complete" "ready at $IP_ADDR"
|
||||
|
||||
%end
|
||||
|
||||
@@ -91,9 +91,8 @@ describe("renderInstallKickstart", () => {
|
||||
serverIp: "10.0.0.5",
|
||||
httpPort: 9090,
|
||||
}));
|
||||
expect(ks).toContain('_BASTION_URL="http://10.0.0.5:9090"');
|
||||
expect(ks).toContain("http://10.0.0.5:9090");
|
||||
expect(ks).toContain("/api/progress");
|
||||
expect(ks).toContain("/api/log");
|
||||
});
|
||||
|
||||
it("infra role has /var/lib/rancher partition", () => {
|
||||
@@ -141,52 +140,18 @@ describe("renderInstallKickstart", () => {
|
||||
expect(ks).toContain("--name=swap --fstype=swap --size=27648");
|
||||
});
|
||||
|
||||
it("%pre has error trap", () => {
|
||||
const ks = renderInstallKickstart(baseParams());
|
||||
expect(ks).toContain("trap");
|
||||
expect(ks).toContain("bastion_error");
|
||||
expect(ks).toContain("%pre failed");
|
||||
});
|
||||
|
||||
it("%post has error trap", () => {
|
||||
const ks = renderInstallKickstart(baseParams());
|
||||
expect(ks).toContain("_post_error_handler");
|
||||
expect(ks).toContain("%post failed");
|
||||
});
|
||||
|
||||
it("has granular progress stages in %post", () => {
|
||||
const ks = renderInstallKickstart(baseParams());
|
||||
expect(ks).toContain('"configuring SSH"');
|
||||
expect(ks).toContain('"setting hostname');
|
||||
expect(ks).toContain('"writing provisioning metadata"');
|
||||
expect(ks).toContain('"writing provisioning metadata"');
|
||||
});
|
||||
|
||||
it("has background log streamer in %post", () => {
|
||||
const ks = renderInstallKickstart(baseParams());
|
||||
expect(ks).toContain("_LOG_STREAMER_PID");
|
||||
expect(ks).toContain("_flush_log_streamer");
|
||||
expect(ks).toContain("tail -f");
|
||||
});
|
||||
|
||||
it("has bastion_log function for sending log lines", () => {
|
||||
const ks = renderInstallKickstart(baseParams());
|
||||
expect(ks).toContain("bastion_log()");
|
||||
expect(ks).toContain("/api/log");
|
||||
});
|
||||
|
||||
it("vanilla role skips k3s progress stages", () => {
|
||||
it("vanilla role skips k3s setup", () => {
|
||||
const ks = renderInstallKickstart(baseParams({ role: "vanilla" }));
|
||||
expect(ks).toContain("vanilla role");
|
||||
expect(ks).not.toContain('"loading k3s kernel modules"');
|
||||
expect(ks).not.toContain('"disabling firewalld"');
|
||||
expect(ks).not.toContain("modules-load.d/k3s.conf");
|
||||
expect(ks).not.toContain("firewalld");
|
||||
});
|
||||
|
||||
it("worker role has k3s-related progress stages", () => {
|
||||
it("worker role has k3s setup", () => {
|
||||
const ks = renderInstallKickstart(baseParams({ role: "worker" }));
|
||||
expect(ks).toContain('"loading k3s kernel modules"');
|
||||
expect(ks).toContain('"configuring k3s sysctl"');
|
||||
expect(ks).toContain('"disabling firewalld"');
|
||||
expect(ks).toContain("modules-load.d/k3s.conf");
|
||||
expect(ks).toContain("sysctl.d/90-k3s.conf");
|
||||
expect(ks).toContain("firewalld");
|
||||
});
|
||||
|
||||
it("kickstart syntax: no merged partition lines", () => {
|
||||
@@ -211,13 +176,9 @@ describe("renderInstallKickstart", () => {
|
||||
expect(ends, `${sections} sections but ${ends} %end markers`).toBe(sections);
|
||||
});
|
||||
|
||||
it("kernel-modules package is included", () => {
|
||||
it("has complete progress stage", () => {
|
||||
const ks = renderInstallKickstart(baseParams());
|
||||
expect(ks).toContain("kernel-modules");
|
||||
});
|
||||
|
||||
it("dosfstools package is included", () => {
|
||||
const ks = renderInstallKickstart(baseParams());
|
||||
expect(ks).toContain("dosfstools");
|
||||
expect(ks).toContain('"complete"');
|
||||
expect(ks).toContain("ready at");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -27,13 +27,52 @@ import { ensurePxeNetwork, destroyPxeNetwork, deleteNftablesRejectRules, PXE_NET
|
||||
import { createPxeVm, destroyPxeVm, getVmMac, rebootPxeVm, readSerialLog } from "./helpers/pxe-vm.js";
|
||||
import { sshExec } from "./helpers/ssh.js";
|
||||
|
||||
// --- Boot screenshot capture ---
|
||||
const SCREENSHOT_DIR = "/tmp/vm-screenshots";
|
||||
|
||||
function startBootScreenshots(vmName: string): { stop: () => void } {
|
||||
try { mkdirSync(SCREENSHOT_DIR, { recursive: true }); } catch {}
|
||||
// Clean old screenshots
|
||||
try {
|
||||
for (const f of require("node:fs").readdirSync(SCREENSHOT_DIR)) {
|
||||
rmSync(join(SCREENSHOT_DIR, f), { force: true });
|
||||
}
|
||||
} catch {}
|
||||
|
||||
let running = true;
|
||||
let seq = 0;
|
||||
const BUFFER_SIZE = 60; // keep last 60 screenshots (1 per second)
|
||||
|
||||
const loop = async () => {
|
||||
while (running) {
|
||||
try {
|
||||
const idx = String(seq % BUFFER_SIZE).padStart(4, "0");
|
||||
const ppm = join(SCREENSHOT_DIR, `tmp-${idx}.ppm`);
|
||||
const png = join(SCREENSHOT_DIR, `boot-${idx}.png`);
|
||||
execSync(`sudo virsh screenshot ${vmName} ${ppm} --screen 0 2>/dev/null`, { timeout: 3000 });
|
||||
execSync(`convert ${ppm} ${png} 2>/dev/null && rm -f ${ppm}`, { timeout: 3000 });
|
||||
seq++;
|
||||
} catch {}
|
||||
await new Promise(r => setTimeout(r, 1000));
|
||||
}
|
||||
};
|
||||
loop();
|
||||
|
||||
return {
|
||||
stop: () => {
|
||||
running = false;
|
||||
log(`Boot screenshots saved to ${SCREENSHOT_DIR}/ (${seq} captured, last ${Math.min(seq, BUFFER_SIZE)} kept)`);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// --- Test constants ---
|
||||
const VM_NAME = "lab-pxe-test";
|
||||
const VM_MEMORY = 4096; // 4GB (Anaconda needs ~2GB minimum)
|
||||
const VM_VCPUS = 12;
|
||||
const VM_DISK_GB = 250; // LVM layout needs ~204GB (swap 27 + root 33 + var 100 + etc). QCOW2 is sparse.
|
||||
const HTTP_PORT = 8099; // Avoid conflicts with real bastion
|
||||
const SSH_USER = "root"; // Use root for SSH (admin user key setup has known issue)
|
||||
const SSH_USER = "root"; // Use root SSH for baseline testing
|
||||
const BASTION_IP = PXE_GATEWAY; // 192.168.251.1
|
||||
const DHCP_RANGE_START = `${PXE_SUBNET}.100`;
|
||||
const DHCP_RANGE_END = `${PXE_SUBNET}.200`;
|
||||
@@ -41,7 +80,7 @@ const DHCP_RANGE_END = `${PXE_SUBNET}.200`;
|
||||
// Fedora install takes a while
|
||||
const DISCOVERY_TIMEOUT_MS = 5 * 60_000; // 5 min for PXE boot + discovery
|
||||
const INSTALL_TIMEOUT_MS = 30 * 60_000; // 30 min for full Fedora install
|
||||
const SSH_TIMEOUT_MS = 15 * 60_000; // 15 min: PXE (~90s) + first boot + SELinux autorelabel (~3min) + reboot + second PXE (~90s) + boot
|
||||
const SSH_TIMEOUT_MS = 10 * 60_000; // 10 min: OVMF retries PXE/HTTP Boot (~3min) before disk boot + OS startup
|
||||
|
||||
function findSshKey(): { pubKey: string; keyPath: string } {
|
||||
const homes = [homedir()];
|
||||
@@ -279,8 +318,9 @@ describe("PXE boot provisioning", () => {
|
||||
await sleep(3_000);
|
||||
deleteNftablesRejectRules();
|
||||
|
||||
// 10. Wait for SSH
|
||||
// 10. Wait for SSH (with aggressive boot screenshots)
|
||||
log("Waiting for SSH access...");
|
||||
const screenshots = startBootScreenshots(VM_NAME);
|
||||
try {
|
||||
await waitForSsh(vmIp, SSH_USER, SSH_TIMEOUT_MS, sshKeyPath);
|
||||
} catch {
|
||||
@@ -292,7 +332,9 @@ describe("PXE boot provisioning", () => {
|
||||
} catch (serialErr) {
|
||||
log(`Serial console failed: ${serialErr instanceof Error ? serialErr.message : String(serialErr)}`);
|
||||
}
|
||||
throw new Error(`SSH not available on ${vmIp} — check serial console diagnostics above`);
|
||||
throw new Error(`SSH not available on ${vmIp} — check serial console diagnostics above. Screenshots: ${SCREENSHOT_DIR}/`);
|
||||
} finally {
|
||||
screenshots.stop();
|
||||
}
|
||||
|
||||
log("PXE provision test setup complete.");
|
||||
@@ -345,10 +387,10 @@ describe("PXE boot provisioning", () => {
|
||||
expect(data.progress).toBe("complete");
|
||||
});
|
||||
|
||||
it("log lines were captured", async () => {
|
||||
it.skip("log lines were captured", async () => {
|
||||
// Requires log streamer in %post — skipped until re-added
|
||||
const res = await fetch(`http://${BASTION_IP}:${HTTP_PORT}/api/logs/${encodeURIComponent(vmMac)}`);
|
||||
const data = (await res.json()) as { log_total?: number; log_lines?: Array<{ line: string }> };
|
||||
// Should have at least some log lines from the log streamer
|
||||
expect(data.log_total).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
|
||||
27
bastion/tests/integration/run-pxe-test.sh
Executable file
27
bastion/tests/integration/run-pxe-test.sh
Executable file
@@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
# One-shot PXE integration test runner.
|
||||
# Compiles, runs unit tests, cleans up, and runs the full integration test.
|
||||
set -e
|
||||
|
||||
cd "$(dirname "$0")/../.."
|
||||
|
||||
echo "=== Step 1: Compile ==="
|
||||
npx tsc --noEmit
|
||||
echo "✓ Compile OK"
|
||||
|
||||
echo ""
|
||||
echo "=== Step 2: Kickstart unit tests ==="
|
||||
npx vitest run src/bastion/tests/kickstart.test.ts 2>&1 | tail -5
|
||||
echo "✓ Unit tests OK"
|
||||
|
||||
echo ""
|
||||
echo "=== Step 3: Clean up ==="
|
||||
sudo lsof -ti:8099 2>/dev/null | xargs -r sudo kill -9 || true
|
||||
sudo virsh destroy lab-pxe-test 2>/dev/null || true
|
||||
sudo virsh undefine lab-pxe-test --nvram 2>/dev/null || true
|
||||
sudo rm -f /var/lib/libvirt/images/lab-pxe-test.qcow2
|
||||
echo "✓ Cleanup done"
|
||||
|
||||
echo ""
|
||||
echo "=== Step 4: Integration test ==="
|
||||
npx vitest run -c /dev/null tests/integration/pxe-provision.test.ts 2>&1
|
||||
Reference in New Issue
Block a user