fix: PXE boot debugging — bisect root cause, syslog logging, serial console #3

Merged
michal merged 31 commits from wip/ks-debugging into main 2026-03-29 00:50:05 +00:00
5 changed files with 50 additions and 31 deletions
Showing only changes of commit ea7e437241 - Show all commits

View File

@@ -410,7 +410,8 @@ hostnamectl set-hostname ${fqdn}
echo "tmpfs /tmp tmpfs defaults,noatime,nosuid,nodev,size=4G 0 0" >> /etc/fstab
# Make /boot/efi mount non-fatal (prevents emergency mode if EFI partition isn't found)
sed -i '/\\/boot\\/efi/ s/defaults/defaults,nofail/' /etc/fstab
sed -i '/boot\\/efi/ s/defaults/defaults,nofail/' /etc/fstab
bastion_log "fstab /boot/efi set to nofail"
${isVanilla ? `# -- vanilla role: skip k3s kernel/sysctl/firewall setup --
bastion_progress "post-install" "vanilla role -- skipping k3s setup"
@@ -446,20 +447,25 @@ systemctl mask firewalld || true
# -- Enable chronyd for time sync --
systemctl enable chronyd || true`}
# -- Set boot order: local disk first, PXE after --
bastion_progress "post-install" "configuring EFI boot order"
# -- Boot order: restore network first (Anaconda sets disk first, we undo it) --
# Network boot must stay first so the bastion intercepts every reboot. It returns
# exit (local disk) for installed machines, or install for reinstalls.
bastion_progress "post-install" "restoring network-first boot order"
if command -v efibootmgr >/dev/null 2>&1; then
FEDORA_ENTRY=$(efibootmgr | grep -i fedora | head -1 | grep -oP 'Boot\\K[0-9A-F]+')
if [ -n "$FEDORA_ENTRY" ]; then
# Find network/PXE/HTTP boot entries (OVMF uses HTTPv4, real hardware uses PXE/Network)
PXE_ENTRY=$(efibootmgr | grep -iE 'network|pxe|ipv4|ipv6|http' | head -1 | grep -oP 'Boot\\K[0-9A-F]+')
if [ -n "$PXE_ENTRY" ]; then
CURRENT_ORDER=$(efibootmgr | grep BootOrder | cut -d: -f2 | tr -d ' ')
NEW_ORDER="$FEDORA_ENTRY,$(echo "$CURRENT_ORDER" | sed "s/$FEDORA_ENTRY,\\\\?//;s/,$//")"
# Move PXE entry to front
REST=$(echo "$CURRENT_ORDER" | sed "s/$PXE_ENTRY,\\\\?//;s/,$//" | sed 's/^,//')
NEW_ORDER="$PXE_ENTRY,$REST"
efibootmgr -o "$NEW_ORDER" || true
bastion_log "boot order set: Fedora first ($NEW_ORDER)"
bastion_log "boot order set: network first ($NEW_ORDER)"
else
bastion_log "no Fedora EFI entry found, boot order unchanged"
bastion_log "no PXE boot entry found, boot order unchanged"
fi
else
bastion_log "efibootmgr not available, skipping boot order config"
bastion_log "efibootmgr not available"
fi
# -- Provisioning metadata --

View File

@@ -79,10 +79,11 @@ describe("renderInstallKickstart", () => {
expect(ks).toContain("/etc/sudoers.d/admin");
});
it("efibootmgr section present", () => {
it("boot order restores network first (bastion controls boot)", () => {
const ks = renderInstallKickstart(baseParams());
expect(ks).toContain("efibootmgr");
expect(ks).toContain("FEDORA_ENTRY");
expect(ks).toContain("restore network first");
expect(ks).toContain("PXE_ENTRY");
expect(ks).toContain("efibootmgr -o");
});
it("progress callback URLs use correct serverIp and httpPort", () => {
@@ -157,7 +158,7 @@ describe("renderInstallKickstart", () => {
const ks = renderInstallKickstart(baseParams());
expect(ks).toContain('"configuring SSH"');
expect(ks).toContain('"setting hostname');
expect(ks).toContain('"configuring EFI boot order"');
expect(ks).toContain('"writing provisioning metadata"');
expect(ks).toContain('"writing provisioning metadata"');
});

View File

@@ -21,7 +21,7 @@ import { join } from "node:path";
import { homedir, tmpdir } from "node:os";
import { log, waitForSsh } from "./helpers/libvirt.js";
import { ensurePxeNetwork, destroyPxeNetwork, PXE_NETWORK_NAME, PXE_GATEWAY, PXE_SUBNET } from "./helpers/pxe-network.js";
import { createIsoVm, destroyPxeVm, getVmMac, rebootPxeVm, setBootDisk } from "./helpers/pxe-vm.js";
import { createIsoVm, destroyPxeVm, getVmMac, rebootPxeVm } from "./helpers/pxe-vm.js";
import { sshExec } from "./helpers/ssh.js";
const VM_NAME = "lab-arm-iso-test";
@@ -269,10 +269,15 @@ describe("ARM ISO boot provisioning", () => {
vmIp = finalState.ip ?? "";
log(`ARM install complete! VM IP: ${vmIp}`);
// 9. Switch boot to disk
log("Switching ARM VM boot order to disk...");
await sleep(15_000);
setBootDisk(VM_NAME);
// 9. Ensure VM is running after kickstart reboot/poweroff
log("Waiting for kickstart reboot/poweroff...");
await sleep(30_000); // ARM is slow
const { spawnSync: spSync } = await import("node:child_process");
const stateResult = spSync("sudo", ["virsh", "domstate", VM_NAME], { encoding: "utf-8", stdio: "pipe" });
if (stateResult.stdout?.trim() === "shut off") {
log("ARM VM shut off after install. Restarting...");
rebootPxeVm(VM_NAME);
}
// 10. Wait for SSH (ARM reboot is slow)
log("Waiting for SSH on ARM VM...");

View File

@@ -18,7 +18,7 @@ import { homedir, tmpdir } from "node:os";
import { mkdirSync, rmSync } from "node:fs";
import { log, waitForSsh } from "./helpers/libvirt.js";
import { ensurePxeNetwork, destroyPxeNetwork, PXE_NETWORK_NAME, PXE_GATEWAY, PXE_SUBNET } from "./helpers/pxe-network.js";
import { createIsoVm, destroyPxeVm, getVmMac, rebootPxeVm, setBootDisk } from "./helpers/pxe-vm.js";
import { createIsoVm, destroyPxeVm, getVmMac, rebootPxeVm } from "./helpers/pxe-vm.js";
import { sshExec } from "./helpers/ssh.js";
const VM_NAME = "lab-iso-test";
@@ -245,10 +245,15 @@ describe("ISO boot provisioning", () => {
vmIp = finalState.ip ?? "";
log(`Install complete! VM IP: ${vmIp}`);
// 9. Switch boot to disk
log("Switching VM boot order to disk...");
await sleep(10_000);
setBootDisk(VM_NAME);
// 9. Ensure VM is running after kickstart reboot/poweroff
log("Waiting for kickstart reboot/poweroff...");
await sleep(15_000);
const { spawnSync: spSync } = await import("node:child_process");
const stateResult = spSync("sudo", ["virsh", "domstate", VM_NAME], { encoding: "utf-8", stdio: "pipe" });
if (stateResult.stdout?.trim() === "shut off") {
log("VM shut off after install. Restarting...");
rebootPxeVm(VM_NAME);
}
// 10. Wait for SSH
log("Waiting for SSH...");

View File

@@ -24,7 +24,7 @@ import { join } from "node:path";
import { homedir, tmpdir } from "node:os";
import { log, waitForSsh } from "./helpers/libvirt.js";
import { ensurePxeNetwork, destroyPxeNetwork, PXE_NETWORK_NAME, PXE_GATEWAY, PXE_SUBNET } from "./helpers/pxe-network.js";
import { createPxeVm, destroyPxeVm, getVmMac, rebootPxeVm, setBootDisk } from "./helpers/pxe-vm.js";
import { createPxeVm, destroyPxeVm, getVmMac, rebootPxeVm } from "./helpers/pxe-vm.js";
import { sshExec } from "./helpers/ssh.js";
// --- Test constants ---
@@ -267,12 +267,15 @@ describe("PXE boot provisioning", () => {
vmIp = finalState.ip ?? "";
log(`Install complete! VM IP: ${vmIp}`);
// 9. Switch VM boot to disk (OVMF PXE/HTTP Boot loop prevents reaching installed OS)
log("Switching VM boot order to disk...");
await sleep(10_000); // Let kickstart reboot settle
setBootDisk(VM_NAME);
// 9. Force-restart VM to ensure clean boot with updated NVRAM.
// The %post efibootmgr sets network-first boot order, but OVMF may not
// reread NVRAM during a warm reboot. Force cold-restart ensures it does.
log("Force-restarting VM for clean network-first boot...");
await sleep(15_000);
rebootPxeVm(VM_NAME);
// 10. Wait for SSH
// 10. Wait for SSH — VM network-boots, iPXE chains to /dispatch,
// bastion returns exit (installed), iPXE falls through to disk boot
log("Waiting for SSH access...");
await waitForSsh(vmIp, SSH_USER, SSH_TIMEOUT_MS, sshKeyPath);
@@ -372,10 +375,9 @@ describe("PXE boot provisioning", () => {
expect(result.stdout).toContain("no");
});
it("EFI boot order has Fedora first (local disk before PXE)", () => {
it("EFI boot order keeps network first (bastion controls boot)", () => {
const result = sshExec(vmIp, SSH_USER, "sudo efibootmgr", { keyPath: sshKeyPath });
expect(result.exitCode).toBe(0);
// Boot order should start with the Fedora entry
expect(result.stdout).toContain("BootOrder:");
});