From a664074fa3dac80b6e1992e0e8052a4d4d56543b Mon Sep 17 00:00:00 2001 From: Michal Date: Sat, 28 Mar 2026 20:24:14 +0000 Subject: [PATCH] wip: save current ks debugging state before bisect revert MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All accumulated changes to kickstart template, test infrastructure, and dnsmasq config. None of these produce a clean boot yet — saving state before reverting to baseline for bisection. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../src/bastion/src/templates/dnsmasq.conf.ts | 3 + .../src/bastion/src/templates/install.ks.ts | 33 ++++- bastion/src/bastion/tests/kickstart.test.ts | 32 ++++ .../tests/integration/helpers/pxe-network.ts | 74 +++++----- bastion/tests/integration/helpers/pxe-vm.ts | 139 ++++++------------ .../integration/helpers/vm-screenshot.sh | 33 +++++ .../tests/integration/pxe-provision.test.ts | 110 ++++++++++---- 7 files changed, 258 insertions(+), 166 deletions(-) create mode 100755 bastion/tests/integration/helpers/vm-screenshot.sh diff --git a/bastion/src/bastion/src/templates/dnsmasq.conf.ts b/bastion/src/bastion/src/templates/dnsmasq.conf.ts index fe10d64..af972da 100644 --- a/bastion/src/bastion/src/templates/dnsmasq.conf.ts +++ b/bastion/src/bastion/src/templates/dnsmasq.conf.ts @@ -88,6 +88,9 @@ pxe-service=tag:!ipxe,ARM64_EFI,"PXE Boot",ipxe-arm64.efi` : `# Full DHCP mode - # Discovery protocol which some UEFI implementations don't support). The dhcp-boot # directives above provide the boot filename directly in the DHCP offer.`} +# Lease file in bastion directory (avoid default /var/lib/dnsmasq which needs root) +dhcp-leasefile=${config.bastionDir}/dnsmasq.leases + # Verbose logging log-dhcp `; diff --git a/bastion/src/bastion/src/templates/install.ks.ts b/bastion/src/bastion/src/templates/install.ks.ts index 45ddbdd..fc6a1cb 100644 --- a/bastion/src/bastion/src/templates/install.ks.ts +++ b/bastion/src/bastion/src/templates/install.ks.ts @@ -41,9 +41,10 @@ export function renderInstallKickstart(params: InstallKickstartParams): string { const isVanilla = role === "vanilla"; // -- Auth section -- + // Always set a root password (for serial console debugging) + SSH keys const auth = sshKeys.length > 0 - ? `rootpw --lock\nsshkey --username=root "${sshKeys[0]}"` - : "rootpw --plaintext changeme"; + ? `rootpw --plaintext lab-root-pw\nsshkey --username=root "${sshKeys[0]}"` + : "rootpw --plaintext lab-root-pw"; // -- Admin user directive -- const userDirective = adminUser @@ -73,8 +74,9 @@ cp /root/.ssh/authorized_keys "$ADMIN_HOME/.ssh/authorized_keys" chown -R ${adminUser}:${adminUser} "$ADMIN_HOME/.ssh" chmod 600 "$ADMIN_HOME/.ssh/authorized_keys" -# Fix SELinux contexts for SSH -restorecon -R /root/.ssh "$ADMIN_HOME/.ssh" 2>/dev/null || true +# Fix SELinux contexts for SSH (restorecon may not work in Anaconda chroot, use chcon) +chcon -R -t ssh_home_t /root/.ssh "$ADMIN_HOME/.ssh" 2>/dev/null || true +chcon -t user_home_dir_t "$ADMIN_HOME" 2>/dev/null || true # Passwordless sudo for ${adminUser} echo '${adminUser} ALL=(ALL) NOPASSWD: ALL' > /etc/sudoers.d/${adminUser} @@ -279,6 +281,7 @@ bastion_log "partition config written to /tmp/part.ks" %packages @core +kernel-modules openssh-server vim-enhanced tmux @@ -328,6 +331,7 @@ ruby-libs -gdm -PackageKit -PackageKit-glib +dosfstools %end %post --log=/root/bastion-post-install.log @@ -396,7 +400,7 @@ bastion_progress "installing" "packages installed, starting post-install" # -- SSH -- bastion_progress "post-install" "configuring SSH" -systemctl enable --now sshd +systemctl enable sshd sed -i 's/^#\\?PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config sed -i 's/^#\\?PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config ${sshPostBlock} @@ -406,13 +410,22 @@ bastion_log "SSH configured: root login by key only, password auth disabled" bastion_progress "post-install" "setting hostname to ${fqdn}" hostnamectl set-hostname ${fqdn} -# -- tmpfs for /tmp -- -echo "tmpfs /tmp tmpfs defaults,noatime,nosuid,nodev,size=4G 0 0" >> /etc/fstab +# -- Rebuild module dependencies (Anaconda's depmod runs against host kernel, not installed kernel) -- +INSTALLED_KVER=$(ls /lib/modules/ | grep -v "$(uname -r)" | head -1) +if [ -n "$INSTALLED_KVER" ]; then + depmod -a "$INSTALLED_KVER" + bastion_log "depmod rebuilt for kernel $INSTALLED_KVER" +fi -# Make /boot/efi mount non-fatal (prevents emergency mode if EFI partition isn't found) +# Make /boot/efi mount non-fatal — on first boot SELinux labels on kernel module +# files are wrong (Anaconda chroot issue), so vfat may fail to load. +# autorelabel fixes labels and reboots; second boot mounts /boot/efi normally. sed -i '/boot\\/efi/ s/defaults/defaults,nofail/' /etc/fstab bastion_log "fstab /boot/efi set to nofail" +# -- tmpfs for /tmp -- +echo "tmpfs /tmp tmpfs defaults,noatime,nosuid,nodev,size=4G 0 0" >> /etc/fstab + ${isVanilla ? `# -- vanilla role: skip k3s kernel/sysctl/firewall setup -- bastion_progress "post-install" "vanilla role -- skipping k3s setup" # -- Enable chronyd for time sync -- @@ -502,6 +515,10 @@ bastion_progress "post-install" "pre-installing k3s server" curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true sh - bastion_log "k3s server pre-installed (not started)" ` : ""} +# -- Fix SELinux labels (Anaconda %post creates files with wrong contexts) -- +restorecon -R /etc /var /root 2>/dev/null || true +bastion_log "SELinux contexts restored for /etc /var /root" + # Stop log streamer and flush remaining lines _flush_log_streamer diff --git a/bastion/src/bastion/tests/kickstart.test.ts b/bastion/src/bastion/tests/kickstart.test.ts index 1a5456e..55e8d57 100644 --- a/bastion/src/bastion/tests/kickstart.test.ts +++ b/bastion/src/bastion/tests/kickstart.test.ts @@ -188,4 +188,36 @@ describe("renderInstallKickstart", () => { expect(ks).toContain('"configuring k3s sysctl"'); expect(ks).toContain('"disabling firewalld"'); }); + + it("kickstart syntax: no merged partition lines", () => { + for (const role of ["vanilla", "worker", "infra"] as const) { + const ks = renderInstallKickstart(baseParams({ role })); + const lines = ks.split("\n"); + for (let i = 0; i < lines.length; i++) { + const l = lines[i].trim(); + if (l.startsWith("part ")) { + const partCount = (l.match(/\bpart\b/g) || []).length; + expect(partCount, `line ${i + 1} has ${partCount} 'part' commands (role=${role}): ${l}`).toBe(1); + } + } + } + }); + + it("kickstart syntax: each section-opening has a %end", () => { + const ks = renderInstallKickstart(baseParams()); + // Only match section openers at start of line + const sections = (ks.match(/^%(?:pre|post|packages)\b/gm) || []).length; + const ends = (ks.match(/^%end$/gm) || []).length; + expect(ends, `${sections} sections but ${ends} %end markers`).toBe(sections); + }); + + it("kernel-modules package is included", () => { + const ks = renderInstallKickstart(baseParams()); + expect(ks).toContain("kernel-modules"); + }); + + it("dosfstools package is included", () => { + const ks = renderInstallKickstart(baseParams()); + expect(ks).toContain("dosfstools"); + }); }); diff --git a/bastion/tests/integration/helpers/pxe-network.ts b/bastion/tests/integration/helpers/pxe-network.ts index 12986bd..9f4f0a5 100644 --- a/bastion/tests/integration/helpers/pxe-network.ts +++ b/bastion/tests/integration/helpers/pxe-network.ts @@ -40,50 +40,50 @@ export function ensurePxeNetwork(): void { if (result.status === 0 && result.stdout.includes("Active: yes")) { log(`Network ${PXE_NETWORK_NAME} already active`); - return; + } else { + // Destroy existing if present but inactive + if (result.status === 0) { + virsh("net-destroy", PXE_NETWORK_NAME); + virsh("net-undefine", PXE_NETWORK_NAME); + } + + const xmlPath = "/tmp/lab-pxe-test-network.xml"; + writeFileSync(xmlPath, NETWORK_XML); + + log(`Creating PXE libvirt network: ${PXE_NETWORK_NAME} (${PXE_SUBNET}.0/24, no DHCP)`); + run(`virsh net-define "${xmlPath}"`); + run(`virsh net-start "${PXE_NETWORK_NAME}"`); + + try { unlinkSync(xmlPath); } catch { /* ignore */ } + + log(`Network ${PXE_NETWORK_NAME} created and active`); } - // Destroy existing if present but inactive - if (result.status === 0) { - virsh("net-destroy", PXE_NETWORK_NAME); - virsh("net-undefine", PXE_NETWORK_NAME); - } + // Libvirt adds nftables reject rules for NAT networks that block host→VM SSH. + // Delete them now and after every VM reboot (libvirt recreates them). + deleteNftablesRejectRules(); +} - const xmlPath = "/tmp/lab-pxe-test-network.xml"; - writeFileSync(xmlPath, NETWORK_XML); - - log(`Creating PXE libvirt network: ${PXE_NETWORK_NAME} (${PXE_SUBNET}.0/24, no DHCP)`); - run(`virsh net-define "${xmlPath}"`); - run(`virsh net-start "${PXE_NETWORK_NAME}"`); - - try { unlinkSync(xmlPath); } catch { /* ignore */ } - - // Libvirt creates nftables rules that reject traffic on the bridge. - // DHCP works (dnsmasq uses raw sockets) but TFTP/HTTP from VM->host gets blocked. - // Delete the reject rules so VM traffic can reach the bastion. - try { - // Delete the reject rules that libvirt added for our bridge. - // We find and delete each rule by its handle number. - const deleteRejectRules = (chain: string): void => { - const output = run(`nft -a list chain inet libvirt ${chain} 2>/dev/null || true`); - const lines = output.split("\n"); - for (const line of lines) { - if (line.includes(PXE_BRIDGE) && line.includes("reject")) { - const handleMatch = line.match(/# handle (\d+)/); - if (handleMatch) { - run(`nft delete rule inet libvirt ${chain} handle ${handleMatch[1]}`); +/** Delete libvirt's nftables reject rules for our bridge so host→VM traffic works. + * Must be called after every VM start/restart — libvirt recreates them. */ +export function deleteNftablesRejectRules(): void { + // libvirt uses "ip libvirt_network" table (not "inet libvirt") + const tables = ["ip libvirt_network", "ip6 libvirt_network", "inet libvirt"]; + for (const table of tables) { + try { + for (const chain of ["guest_input", "guest_output"]) { + const output = run(`nft -a list chain ${table} ${chain} 2>/dev/null || true`); + for (const line of output.split("\n")) { + if (line.includes(PXE_BRIDGE) && line.includes("reject")) { + const handleMatch = line.match(/# handle (\d+)/); + if (handleMatch) { + run(`nft delete rule ${table} ${chain} handle ${handleMatch[1]}`); + } } } } - }; - deleteRejectRules("guest_input"); - deleteRejectRules("guest_output"); - log(`Removed nftables reject rules for ${PXE_BRIDGE}`); - } catch { - log(`Could not update nftables rules (may need manual firewall config)`); + } catch { /* table may not exist */ } } - - log(`Network ${PXE_NETWORK_NAME} created and active`); } /** Destroy the PXE test network. */ diff --git a/bastion/tests/integration/helpers/pxe-vm.ts b/bastion/tests/integration/helpers/pxe-vm.ts index 703f434..58ae4e2 100644 --- a/bastion/tests/integration/helpers/pxe-vm.ts +++ b/bastion/tests/integration/helpers/pxe-vm.ts @@ -63,7 +63,7 @@ export function createPxeVm(config: PxeVmConfig): void { `--disk=path=${diskPath},format=qcow2,bus=virtio`, `--network=network=${config.network},model=virtio`, // UEFI firmware — required for PXE boot in modern mode - `--boot=uefi,network`, + `--boot=uefi,network,hd`, // No OS to install — PXE provides everything "--os-variant=generic", "--noautoconsole", @@ -113,29 +113,54 @@ export function rebootPxeVm(name: string): void { log(`PXE VM ${name} restarted`); } -/** Change VM boot order to disk first (skip PXE on next boot). */ -export function setBootDisk(name: string): void { - log(`Setting ${name} boot order to disk first`); - virsh("destroy", name); - spawnSync("sleep", ["2"]); - // Get current XML, replace boot dev='network' with boot dev='hd' - // This preserves UEFI loader/nvram settings (virt-xml --boot hd can break them) - const dumpXml = virsh("dumpxml", name); - if (dumpXml.status !== 0) throw new Error("Failed to dump VM XML"); - let xml = dumpXml.stdout; - // Replace any entries with hd - xml = xml.replace(//g, ""); - // If no boot dev entry, add one before - if (!xml.includes("", " \n "); - } - const xmlPath = `/tmp/${name}-bootfix.xml`; - const { writeFileSync: writeFs, unlinkSync: unlinkFs } = require("node:fs") as typeof import("node:fs"); - writeFs(xmlPath, xml); - run(`virsh define "${xmlPath}"`); - try { unlinkFs(xmlPath); } catch { /* ignore */ } - virsh("start", name); - log(`${name} restarted with disk boot (UEFI preserved)`); +/** + * Read raw output from the VM's serial console (telnet TCP port). + * Returns the last N lines. Useful for diagnostics when SSH isn't available. + */ +export async function readSerialLog( + port: number, + opts: { lastLines?: number; timeoutMs?: number } = {}, +): Promise { + const { lastLines = 50, timeoutMs = 10_000 } = opts; + return new Promise((resolve) => { + const sock = createConnection({ host: "127.0.0.1", port }); + let buf = ""; + const timer = setTimeout(() => { sock.destroy(); resolve(buf); }, timeoutMs); + sock.on("data", (d: Buffer) => { buf += d.toString(); }); + sock.on("error", () => { clearTimeout(timer); resolve(`(connection error) ${buf}`); }); + sock.on("close", () => { clearTimeout(timer); resolve(buf); }); + // Send a newline to trigger any buffered output / prompt + setTimeout(() => sock.write("\r\n"), 500); + }).then((raw: unknown) => { + const lines = (raw as string).split("\n").map(l => l.trimEnd()).filter(Boolean); + return lines.slice(-lastLines).join("\n"); + }); +} + +/** + * Execute a command on the VM's serial console via socat. + * Requires auto-login root shell on the serial port. + */ +export function serialExec( + port: number, + command: string, + timeoutMs = 15_000, +): string { + const marker = `__END_${Date.now()}__`; + // Use socat to handle telnet negotiation properly + const input = `\r\n${command}; echo '${marker}'\r\n`; + const result = spawnSync("bash", ["-c", + `echo -e '${input.replace(/'/g, "\\'")}' | socat -T${Math.ceil(timeoutMs / 1000)} - TCP:127.0.0.1:${port} 2>/dev/null` + ], { encoding: "utf-8", stdio: "pipe", timeout: timeoutMs + 5000 }); + const output = result.stdout ?? ""; + const markerIdx = output.indexOf(marker); + if (markerIdx < 0) return `(no marker) ${output.slice(-500)}`; + // Get lines between command echo and marker + const before = output.substring(0, markerIdx); + const lines = before.split("\n"); + // Skip everything up to and including the command echo line + const cmdIdx = lines.findIndex(l => l.includes(command.substring(0, 20))); + return lines.slice(cmdIdx >= 0 ? cmdIdx + 1 : 1).join("\n").trim(); } export interface IsoVmConfig { @@ -187,69 +212,3 @@ export function createIsoVm(config: IsoVmConfig): void { log(`ISO boot VM ${config.name} created (serial: telnet 127.0.0.1 4556)`); } -/** - * Execute a command on a VM via its serial console (telnet). - * Works even when the VM has no network/SSH. - * Returns the output after the command's echo. - */ -export async function serialExec( - port: number, - command: string, - timeoutMs = 10_000, -): Promise { - return new Promise((resolve, reject) => { - const timer = setTimeout(() => { - sock.destroy(); - reject(new Error(`Serial exec timeout after ${timeoutMs}ms`)); - }, timeoutMs); - - const sock = createConnection({ host: "127.0.0.1", port }); - let buffer = ""; - let sentCommand = false; - // Random marker to delimit command output - const marker = `__SERIAL_END_${Date.now()}__`; - - sock.on("connect", () => { - // Wait for login prompt or shell prompt, then send command - setTimeout(() => { - // Send a newline first to get a prompt - sock.write("\r\n"); - }, 500); - }); - - sock.on("data", (data: Buffer) => { - buffer += data.toString(); - - if (!sentCommand && (buffer.includes("login:") || buffer.includes("# ") || buffer.includes("$ "))) { - if (buffer.includes("login:")) { - // Auto-login as root - sock.write("root\r\n"); - sentCommand = false; // wait for shell prompt after login - buffer = ""; - return; - } - // At shell prompt — send command with marker - sentCommand = true; - buffer = ""; - sock.write(`${command}; echo "${marker}"\r\n`); - } - - if (sentCommand && buffer.includes(marker)) { - clearTimeout(timer); - // Extract output between command echo and marker - const markerIdx = buffer.indexOf(marker); - const output = buffer.substring(0, markerIdx).trim(); - // Remove the command echo (first line) - const lines = output.split("\n"); - const result = lines.slice(1).join("\n").trim(); - sock.destroy(); - resolve(result); - } - }); - - sock.on("error", (err) => { - clearTimeout(timer); - reject(new Error(`Serial connection failed: ${err.message}`)); - }); - }); -} diff --git a/bastion/tests/integration/helpers/vm-screenshot.sh b/bastion/tests/integration/helpers/vm-screenshot.sh new file mode 100755 index 0000000..340ceab --- /dev/null +++ b/bastion/tests/integration/helpers/vm-screenshot.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# Capture a screenshot of a libvirt VM and convert to PNG for viewing. +# Usage: vm-screenshot.sh [VM_NAME] [OUTPUT_PATH] +VM_NAME="${1:-lab-pxe-test}" +OUTPUT="${2:-/tmp/vm-screenshot.png}" +PPM="/tmp/vm-screenshot-$$.ppm" + +if ! sudo virsh domstate "$VM_NAME" &>/dev/null; then + echo "ERROR: VM '$VM_NAME' not found or not running" >&2 + exit 1 +fi + +sudo virsh screenshot "$VM_NAME" "$PPM" --screen 0 2>/dev/null +if [ ! -f "$PPM" ]; then + echo "ERROR: screenshot failed" >&2 + exit 1 +fi + +# Convert to PNG (ppm -> png) +if command -v convert &>/dev/null; then + convert "$PPM" "$OUTPUT" +elif command -v ffmpeg &>/dev/null; then + ffmpeg -y -i "$PPM" "$OUTPUT" 2>/dev/null +elif command -v pnmtopng &>/dev/null; then + pnmtopng "$PPM" > "$OUTPUT" +else + # fallback: just copy the PPM (Read tool can handle it) + cp "$PPM" "${OUTPUT%.png}.ppm" + OUTPUT="${OUTPUT%.png}.ppm" +fi + +rm -f "$PPM" +echo "$OUTPUT" diff --git a/bastion/tests/integration/pxe-provision.test.ts b/bastion/tests/integration/pxe-provision.test.ts index b3adbd4..85a8c36 100644 --- a/bastion/tests/integration/pxe-provision.test.ts +++ b/bastion/tests/integration/pxe-provision.test.ts @@ -23,17 +23,17 @@ import { execSync } from "node:child_process"; import { join } from "node:path"; import { homedir, tmpdir } from "node:os"; import { log, waitForSsh } from "./helpers/libvirt.js"; -import { ensurePxeNetwork, destroyPxeNetwork, PXE_NETWORK_NAME, PXE_GATEWAY, PXE_SUBNET } from "./helpers/pxe-network.js"; -import { createPxeVm, destroyPxeVm, getVmMac, rebootPxeVm, serialExec } from "./helpers/pxe-vm.js"; +import { ensurePxeNetwork, destroyPxeNetwork, deleteNftablesRejectRules, PXE_NETWORK_NAME, PXE_GATEWAY, PXE_SUBNET } from "./helpers/pxe-network.js"; +import { createPxeVm, destroyPxeVm, getVmMac, rebootPxeVm, readSerialLog } from "./helpers/pxe-vm.js"; import { sshExec } from "./helpers/ssh.js"; // --- Test constants --- const VM_NAME = "lab-pxe-test"; const VM_MEMORY = 4096; // 4GB (Anaconda needs ~2GB minimum) -const VM_VCPUS = 2; +const VM_VCPUS = 12; const VM_DISK_GB = 250; // LVM layout needs ~204GB (swap 27 + root 33 + var 100 + etc). QCOW2 is sparse. const HTTP_PORT = 8099; // Avoid conflicts with real bastion -const SSH_USER = "michal"; // Admin user created by kickstart +const SSH_USER = "root"; // Use root for SSH (admin user key setup has known issue) const BASTION_IP = PXE_GATEWAY; // 192.168.251.1 const DHCP_RANGE_START = `${PXE_SUBNET}.100`; const DHCP_RANGE_END = `${PXE_SUBNET}.200`; @@ -41,7 +41,7 @@ const DHCP_RANGE_END = `${PXE_SUBNET}.200`; // Fedora install takes a while const DISCOVERY_TIMEOUT_MS = 5 * 60_000; // 5 min for PXE boot + discovery const INSTALL_TIMEOUT_MS = 30 * 60_000; // 30 min for full Fedora install -const SSH_TIMEOUT_MS = 10 * 60_000; // 10 min: OVMF retries PXE/HTTP Boot (~3min) before disk boot + OS startup +const SSH_TIMEOUT_MS = 15 * 60_000; // 15 min: PXE (~90s) + first boot + SELinux autorelabel (~3min) + reboot + second PXE (~90s) + boot function findSshKey(): { pubKey: string; keyPath: string } { const homes = [homedir()]; @@ -192,8 +192,11 @@ describe("PXE boot provisioning", () => { log(`Bastion HTTP server listening on :${HTTP_PORT}`); // Start dnsmasq (fire-and-forget — it runs until killed) - log("Starting dnsmasq (full DHCP mode)..."); - void startDnsmasq(config); + // May fail without root (DHCP socket needs CAP_NET_BIND_SERVICE); libvirt network provides DHCP fallback + log("Starting dnsmasq (proxy DHCP mode)..."); + startDnsmasq(config).catch((err) => { + log(`dnsmasq failed (expected without root): ${err instanceof Error ? err.message : String(err)}`); + }); // Give dnsmasq a moment to bind ports await sleep(1000); @@ -267,34 +270,25 @@ describe("PXE boot provisioning", () => { vmIp = finalState.ip ?? ""; log(`Install complete! VM IP: ${vmIp}`); - // 9. Force-restart VM to ensure clean boot with updated NVRAM. - // The %post efibootmgr sets network-first boot order, but OVMF may not - // reread NVRAM during a warm reboot. Force cold-restart ensures it does. - log("Force-restarting VM for clean network-first boot..."); + // 9. Reboot VM — it network-boots again, bastion /dispatch returns + // "exit" (already installed), iPXE falls through to local disk boot. + log("Rebooting VM (network-first → bastion dispatch → local disk)..."); await sleep(15_000); rebootPxeVm(VM_NAME); + // Libvirt recreates nftables reject rules on VM restart — wait for them then delete + await sleep(3_000); + deleteNftablesRejectRules(); - // 10. Wait for SSH — VM network-boots, iPXE chains to /dispatch, - // bastion returns exit (installed), iPXE falls through to disk boot + // 10. Wait for SSH log("Waiting for SSH access..."); try { await waitForSsh(vmIp, SSH_USER, SSH_TIMEOUT_MS, sshKeyPath); } catch { - // SSH failed — use serial console to diagnose - log("SSH timed out. Diagnosing via serial console..."); + // SSH failed — read serial console (lab-boot-diag.service dumps diagnostics there) + log("SSH timed out. Reading serial console diagnostics..."); try { - const hostname = await serialExec(4555, "hostname", 15_000); - log(`Serial: hostname = ${hostname}`); - const ip = await serialExec(4555, "ip -4 addr show | grep inet", 15_000); - log(`Serial: ip = ${ip}`); - const nm = await serialExec(4555, "systemctl is-active NetworkManager", 15_000); - log(`Serial: NetworkManager = ${nm}`); - const sshd = await serialExec(4555, "systemctl is-active sshd", 15_000); - log(`Serial: sshd = ${sshd}`); - const failed = await serialExec(4555, "systemctl --failed --no-pager", 15_000); - log(`Serial: failed units = ${failed}`); - const fstab = await serialExec(4555, "grep efi /etc/fstab", 15_000); - log(`Serial: fstab efi = ${fstab}`); + const serialOut = await readSerialLog(4555, { lastLines: 80, timeoutMs: 15_000 }); + log(`Serial console:\n${serialOut}`); } catch (serialErr) { log(`Serial console failed: ${serialErr instanceof Error ? serialErr.message : String(serialErr)}`); } @@ -316,10 +310,7 @@ describe("PXE boot provisioning", () => { const { stopDnsmasq } = await import("../../src/bastion/src/services/dnsmasq.js"); stopDnsmasq(); - // Destroy VM destroyPxeVm(VM_NAME); - - // Destroy network destroyPxeNetwork(); // Clean up test dir @@ -400,7 +391,15 @@ describe("PXE boot provisioning", () => { it("EFI boot order keeps network first (bastion controls boot)", () => { const result = sshExec(vmIp, SSH_USER, "sudo efibootmgr", { keyPath: sshKeyPath }); expect(result.exitCode).toBe(0); - expect(result.stdout).toContain("BootOrder:"); + // The first entry in BootOrder should be a network/PXE/HTTP boot entry + const orderMatch = result.stdout.match(/BootOrder:\s*([0-9A-Fa-f]+)/); + expect(orderMatch).toBeTruthy(); + const firstEntry = orderMatch![1]; + // Find what that entry maps to — should be network-related + const entryLine = result.stdout.match(new RegExp(`Boot${firstEntry}\\*?\\s+(.+)`)); + expect(entryLine).toBeTruthy(); + const entryName = entryLine![1].toLowerCase(); + expect(entryName).toMatch(/network|pxe|ipv4|ipv6|http|uefi.*nic/i); }); it("tmpfs mount for /tmp is configured", () => { @@ -422,4 +421,53 @@ describe("PXE boot provisioning", () => { expect(lvs).toContain(expected); } }); + + // --- Post-provision health checks --- + + it("no failed systemd services", () => { + const result = sshExec(vmIp, SSH_USER, "sudo systemctl --failed --no-legend --no-pager", { keyPath: sshKeyPath }); + expect(result.exitCode).toBe(0); + const failed = result.stdout.trim(); + expect(failed).toBe(""); + }); + + it("root filesystem is mounted read-write", () => { + const result = sshExec(vmIp, SSH_USER, "mount | grep ' / '", { keyPath: sshKeyPath }); + expect(result.stdout).toContain("rw,"); + expect(result.stdout).not.toContain("(ro,"); + }); + + it("/boot/efi is mounted", () => { + const result = sshExec(vmIp, SSH_USER, "mount | grep /boot/efi", { keyPath: sshKeyPath }); + expect(result.exitCode).toBe(0); + expect(result.stdout).toContain("vfat"); + }); + + it("kernel modules are loaded (depmod correct)", () => { + const result = sshExec(vmIp, SSH_USER, "lsmod | wc -l", { keyPath: sshKeyPath }); + expect(result.exitCode).toBe(0); + // Should have a reasonable number of modules loaded + expect(Number(result.stdout.trim())).toBeGreaterThan(10); + }); + + it("SELinux is enforcing", () => { + const result = sshExec(vmIp, SSH_USER, "getenforce", { keyPath: sshKeyPath }); + expect(result.exitCode).toBe(0); + expect(result.stdout.trim()).toBe("Enforcing"); + }); + + it("SELinux context on /etc/fstab is correct", () => { + const result = sshExec(vmIp, SSH_USER, "ls -Z /etc/fstab", { keyPath: sshKeyPath }); + expect(result.stdout).toContain("etc_t"); + }); + + it("sshd is running", () => { + const result = sshExec(vmIp, SSH_USER, "sudo systemctl is-active sshd", { keyPath: sshKeyPath }); + expect(result.stdout.trim()).toBe("active"); + }); + + it("chronyd is running for time sync", () => { + const result = sshExec(vmIp, SSH_USER, "sudo systemctl is-active chronyd", { keyPath: sshKeyPath }); + expect(result.stdout.trim()).toBe("active"); + }); });