feat: install logging, error trapping, PXE/ISO integration tests
Some checks failed
CI/CD / lint (pull_request) Failing after 13s
CI/CD / test (pull_request) Failing after 10s
CI/CD / typecheck (pull_request) Failing after 36s
CI/CD / build (pull_request) Has been skipped
CI/CD / publish-rpm (pull_request) Has been skipped
CI/CD / publish-deb (pull_request) Has been skipped

Kickstart installs on real hardware failed silently — no error reporting,
only 3 progress callbacks, zero log streaming. This overhaul makes every
install fully observable.

Kickstart improvements:
- Error trapping in %pre and %post (trap ERR sends failure details to bastion)
- 12+ granular progress stages (was 3): SSH, hostname, k3s prep, EFI boot, metadata
- Background log streamer: tails %post output and batch-sends to /api/log
- bastion_log() function for explicit log lines from kickstart scripts

Bastion API:
- POST /api/log — receives raw log lines from kickstart (single or batch)
- InstallLogBuffer — per-MAC ring buffer (2000 lines) + file persistence
- GET /api/logs/:mac — now returns log_lines + log_total alongside stages
- SSE /api/logs/:mac/follow — uses named events (event: stage vs event: log)
- Progress events forwarded to labd via bastion-progress WebSocket message
- Post-provision k3s logs routed through progressBus (was console-only)

dnsmasq fixes found during VM testing:
- HTTP Boot filename: ipxe-real.efi → ipxe.efi (leftover from old 2-stage approach)
- pxe-service directives: only in proxy mode (breaks OVMF PXE in full mode)
- PXEClient vendor class echo for UEFI firmware compatibility

Integration tests:
- PXE boot test: blank UEFI VM → dnsmasq → HTTP Boot → iPXE → bastion → install
- ISO boot test: blank VM boots from bastion-generated ISO → same flow
- Shared helpers: pxe-network (no DHCP, nftables fix), pxe-vm (UEFI + ISO boot)
- test-provision.sh: runs both PXE + ISO tests with prerequisite checks
- 250GB sparse QCOW2 disk (LVM layout needs ~204GB)

201 unit tests passing (11 new).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Michal
2026-03-26 22:26:33 +00:00
parent ffc4a782d2
commit 46b017d77e
189 changed files with 16241 additions and 432 deletions

View File

@@ -0,0 +1,95 @@
// Libvirt network for PXE boot testing.
// Unlike the regular test network, this one has NO DHCP —
// the bastion provides full DHCP + PXE on this network.
import { execSync, spawnSync } from "node:child_process";
import { writeFileSync, unlinkSync } from "node:fs";
import { log } from "./libvirt.js";
export const PXE_NETWORK_NAME = "lab-pxe-test";
export const PXE_BRIDGE = "virbr-pxe";
export const PXE_SUBNET = "192.168.251";
export const PXE_GATEWAY = `${PXE_SUBNET}.1`;
const IS_ROOT = process.getuid?.() === 0;
function run(cmd: string): string {
const full = IS_ROOT ? cmd : `sudo ${cmd}`;
return execSync(full, { encoding: "utf-8", stdio: "pipe" });
}
function virsh(...args: string[]): { status: number; stdout: string } {
const cmd = IS_ROOT ? "virsh" : "sudo";
const finalArgs = IS_ROOT ? args : ["virsh", ...args];
const result = spawnSync(cmd, finalArgs, { encoding: "utf-8", stdio: "pipe" });
return { status: result.status ?? 1, stdout: result.stdout ?? "" };
}
// No <dhcp> section — bastion dnsmasq provides full DHCP + PXE
const NETWORK_XML = `<network>
<name>${PXE_NETWORK_NAME}</name>
<forward mode='nat'/>
<bridge name='${PXE_BRIDGE}' stp='on' delay='0'/>
<ip address='${PXE_GATEWAY}' netmask='255.255.255.0'>
</ip>
</network>`;
/** Ensure the PXE test network exists and is active (no DHCP). */
export function ensurePxeNetwork(): void {
const result = virsh("net-info", PXE_NETWORK_NAME);
if (result.status === 0 && result.stdout.includes("Active: yes")) {
log(`Network ${PXE_NETWORK_NAME} already active`);
return;
}
// Destroy existing if present but inactive
if (result.status === 0) {
virsh("net-destroy", PXE_NETWORK_NAME);
virsh("net-undefine", PXE_NETWORK_NAME);
}
const xmlPath = "/tmp/lab-pxe-test-network.xml";
writeFileSync(xmlPath, NETWORK_XML);
log(`Creating PXE libvirt network: ${PXE_NETWORK_NAME} (${PXE_SUBNET}.0/24, no DHCP)`);
run(`virsh net-define "${xmlPath}"`);
run(`virsh net-start "${PXE_NETWORK_NAME}"`);
try { unlinkSync(xmlPath); } catch { /* ignore */ }
// Libvirt creates nftables rules that reject traffic on the bridge.
// DHCP works (dnsmasq uses raw sockets) but TFTP/HTTP from VM->host gets blocked.
// Delete the reject rules so VM traffic can reach the bastion.
try {
// Delete the reject rules that libvirt added for our bridge.
// We find and delete each rule by its handle number.
const deleteRejectRules = (chain: string): void => {
const output = run(`nft -a list chain inet libvirt ${chain} 2>/dev/null || true`);
const lines = output.split("\n");
for (const line of lines) {
if (line.includes(PXE_BRIDGE) && line.includes("reject")) {
const handleMatch = line.match(/# handle (\d+)/);
if (handleMatch) {
run(`nft delete rule inet libvirt ${chain} handle ${handleMatch[1]}`);
}
}
}
};
deleteRejectRules("guest_input");
deleteRejectRules("guest_output");
log(`Removed nftables reject rules for ${PXE_BRIDGE}`);
} catch {
log(`Could not update nftables rules (may need manual firewall config)`);
}
log(`Network ${PXE_NETWORK_NAME} created and active`);
}
/** Destroy the PXE test network. */
export function destroyPxeNetwork(): void {
log(`Destroying PXE network: ${PXE_NETWORK_NAME}`);
// nftables rules are cleaned up when the network is destroyed (libvirt removes them)
virsh("net-destroy", PXE_NETWORK_NAME);
virsh("net-undefine", PXE_NETWORK_NAME);
}