feat: install logging, error trapping, PXE/ISO integration tests
Some checks failed
CI/CD / lint (pull_request) Failing after 13s
CI/CD / test (pull_request) Failing after 10s
CI/CD / typecheck (pull_request) Failing after 36s
CI/CD / build (pull_request) Has been skipped
CI/CD / publish-rpm (pull_request) Has been skipped
CI/CD / publish-deb (pull_request) Has been skipped
Some checks failed
CI/CD / lint (pull_request) Failing after 13s
CI/CD / test (pull_request) Failing after 10s
CI/CD / typecheck (pull_request) Failing after 36s
CI/CD / build (pull_request) Has been skipped
CI/CD / publish-rpm (pull_request) Has been skipped
CI/CD / publish-deb (pull_request) Has been skipped
Kickstart installs on real hardware failed silently — no error reporting, only 3 progress callbacks, zero log streaming. This overhaul makes every install fully observable. Kickstart improvements: - Error trapping in %pre and %post (trap ERR sends failure details to bastion) - 12+ granular progress stages (was 3): SSH, hostname, k3s prep, EFI boot, metadata - Background log streamer: tails %post output and batch-sends to /api/log - bastion_log() function for explicit log lines from kickstart scripts Bastion API: - POST /api/log — receives raw log lines from kickstart (single or batch) - InstallLogBuffer — per-MAC ring buffer (2000 lines) + file persistence - GET /api/logs/:mac — now returns log_lines + log_total alongside stages - SSE /api/logs/:mac/follow — uses named events (event: stage vs event: log) - Progress events forwarded to labd via bastion-progress WebSocket message - Post-provision k3s logs routed through progressBus (was console-only) dnsmasq fixes found during VM testing: - HTTP Boot filename: ipxe-real.efi → ipxe.efi (leftover from old 2-stage approach) - pxe-service directives: only in proxy mode (breaks OVMF PXE in full mode) - PXEClient vendor class echo for UEFI firmware compatibility Integration tests: - PXE boot test: blank UEFI VM → dnsmasq → HTTP Boot → iPXE → bastion → install - ISO boot test: blank VM boots from bastion-generated ISO → same flow - Shared helpers: pxe-network (no DHCP, nftables fix), pxe-vm (UEFI + ISO boot) - test-provision.sh: runs both PXE + ISO tests with prerequisite checks - 250GB sparse QCOW2 disk (LVM layout needs ~204GB) 201 unit tests passing (11 new). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
396
bastion/tests/integration/pxe-provision.test.ts
Normal file
396
bastion/tests/integration/pxe-provision.test.ts
Normal file
@@ -0,0 +1,396 @@
|
||||
// Integration test: full PXE boot provisioning flow.
|
||||
//
|
||||
// This test validates the ENTIRE bastion flow end-to-end:
|
||||
// 1. Starts the bastion (HTTP + dnsmasq) on an isolated libvirt network
|
||||
// 2. Creates a blank UEFI VM that PXE boots
|
||||
// 3. VM discovers itself via PXE -> bastion
|
||||
// 4. We queue the machine for install
|
||||
// 5. VM reboots, PXE boots again, installs Fedora via kickstart
|
||||
// 6. Verifies: discovery, progress events, SSH access, installed state
|
||||
//
|
||||
// Prerequisites:
|
||||
// - libvirtd running
|
||||
// - OVMF firmware installed (sudo dnf install edk2-ovmf)
|
||||
// - iPXE packages installed (sudo dnf install ipxe-bootimgs-x86 ipxe-bootimgs-aarch64)
|
||||
// - sudo access
|
||||
// - Internet access (downloads Fedora kernel+initrd on first run)
|
||||
//
|
||||
// Run: sudo pnpm run test:integration:pxe
|
||||
|
||||
import { describe, it, expect, beforeAll, afterAll } from "vitest";
|
||||
import { readFileSync, existsSync, mkdirSync, rmSync, copyFileSync, symlinkSync, writeFileSync } from "node:fs";
|
||||
import { execSync } from "node:child_process";
|
||||
import { join } from "node:path";
|
||||
import { homedir, tmpdir } from "node:os";
|
||||
import { log, waitForSsh } from "./helpers/libvirt.js";
|
||||
import { ensurePxeNetwork, destroyPxeNetwork, PXE_NETWORK_NAME, PXE_GATEWAY, PXE_SUBNET } from "./helpers/pxe-network.js";
|
||||
import { createPxeVm, destroyPxeVm, getVmMac, rebootPxeVm } from "./helpers/pxe-vm.js";
|
||||
import { sshExec } from "./helpers/ssh.js";
|
||||
|
||||
// --- Test constants ---
|
||||
const VM_NAME = "lab-pxe-test";
|
||||
const VM_MEMORY = 4096; // 4GB (Anaconda needs ~2GB minimum)
|
||||
const VM_VCPUS = 2;
|
||||
const VM_DISK_GB = 250; // LVM layout needs ~204GB (swap 27 + root 33 + var 100 + etc). QCOW2 is sparse.
|
||||
const HTTP_PORT = 8099; // Avoid conflicts with real bastion
|
||||
const SSH_USER = "michal"; // Admin user created by kickstart
|
||||
const BASTION_IP = PXE_GATEWAY; // 192.168.251.1
|
||||
const DHCP_RANGE_START = `${PXE_SUBNET}.100`;
|
||||
const DHCP_RANGE_END = `${PXE_SUBNET}.200`;
|
||||
|
||||
// Fedora install takes a while
|
||||
const DISCOVERY_TIMEOUT_MS = 5 * 60_000; // 5 min for PXE boot + discovery
|
||||
const INSTALL_TIMEOUT_MS = 30 * 60_000; // 30 min for full Fedora install
|
||||
const SSH_TIMEOUT_MS = 10 * 60_000; // 10 min: OVMF retries PXE/HTTP Boot (~3min) before disk boot + OS startup
|
||||
|
||||
function findSshKey(): { pubKey: string; keyPath: string } {
|
||||
const homes = [homedir()];
|
||||
const sudoUser = process.env["SUDO_USER"];
|
||||
if (sudoUser) homes.push(join("/home", sudoUser));
|
||||
if (process.env["SSH_KEY_PATH"]) {
|
||||
const keyPath = process.env["SSH_KEY_PATH"];
|
||||
const pubPath = `${keyPath}.pub`;
|
||||
if (existsSync(keyPath) && existsSync(pubPath)) {
|
||||
return { pubKey: readFileSync(pubPath, "utf-8").trim(), keyPath };
|
||||
}
|
||||
}
|
||||
for (const home of homes) {
|
||||
for (const name of ["id_ed25519", "id_ecdsa", "id_rsa"]) {
|
||||
const keyPath = join(home, ".ssh", name);
|
||||
const pubPath = `${keyPath}.pub`;
|
||||
if (existsSync(keyPath) && existsSync(pubPath)) {
|
||||
return { pubKey: readFileSync(pubPath, "utf-8").trim(), keyPath };
|
||||
}
|
||||
}
|
||||
}
|
||||
throw new Error("No SSH key found — set SSH_KEY_PATH or ensure keys exist in ~/.ssh/");
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((r) => setTimeout(r, ms));
|
||||
}
|
||||
|
||||
/** Poll the bastion API until a condition is met. */
|
||||
async function pollApi<T>(
|
||||
url: string,
|
||||
check: (data: T) => boolean,
|
||||
timeoutMs: number,
|
||||
intervalMs = 5000,
|
||||
): Promise<T> {
|
||||
const start = Date.now();
|
||||
while (Date.now() - start < timeoutMs) {
|
||||
try {
|
||||
const res = await fetch(url);
|
||||
if (res.ok) {
|
||||
const data = (await res.json()) as T;
|
||||
if (check(data)) return data;
|
||||
}
|
||||
} catch { /* bastion not ready yet or network hiccup */ }
|
||||
await sleep(intervalMs);
|
||||
}
|
||||
throw new Error(`Timeout after ${timeoutMs}ms polling ${url}`);
|
||||
}
|
||||
|
||||
describe("PXE boot provisioning", () => {
|
||||
let bastionApp: { close: () => Promise<void> };
|
||||
let testDir: string;
|
||||
let vmMac: string;
|
||||
let vmIp: string;
|
||||
let sshKeyPath: string;
|
||||
let sshPubKey: string;
|
||||
|
||||
beforeAll(async () => {
|
||||
const { pubKey, keyPath } = findSshKey();
|
||||
sshKeyPath = keyPath;
|
||||
sshPubKey = pubKey;
|
||||
|
||||
// 1. Create isolated network (no DHCP — bastion provides it)
|
||||
log("Setting up PXE test network...");
|
||||
ensurePxeNetwork();
|
||||
|
||||
// 2. Set up bastion directories and config
|
||||
testDir = join(tmpdir(), `lab-pxe-test-${Date.now()}`);
|
||||
mkdirSync(testDir, { recursive: true });
|
||||
mkdirSync(join(testDir, "tftp"), { recursive: true });
|
||||
mkdirSync(join(testDir, "http"), { recursive: true });
|
||||
mkdirSync(join(testDir, "logs"), { recursive: true });
|
||||
|
||||
// 3. Start the bastion (HTTP server + dnsmasq)
|
||||
log("Starting bastion...");
|
||||
|
||||
const { createApp } = await import("../../src/bastion/src/server.js");
|
||||
const { loadConfig } = await import("../../src/bastion/src/config.js");
|
||||
const { generateDnsmasqConf, startDnsmasq } = await import("../../src/bastion/src/services/dnsmasq.js");
|
||||
const { generateDiscoverKickstart } = await import("../../src/bastion/src/services/kickstart-generator.js");
|
||||
const { renderBootIpxe } = await import("../../src/bastion/src/templates/boot.ipxe.js");
|
||||
|
||||
const config = loadConfig({
|
||||
bastionDir: testDir,
|
||||
httpPort: HTTP_PORT,
|
||||
iface: "virbr-pxe",
|
||||
serverIp: BASTION_IP,
|
||||
network: `${PXE_SUBNET}.0`,
|
||||
gateway: BASTION_IP,
|
||||
dhcpMode: "full",
|
||||
dhcpRangeStart: DHCP_RANGE_START,
|
||||
dhcpRangeEnd: DHCP_RANGE_END,
|
||||
domain: "pxe-test.local",
|
||||
sshKeys: [sshPubKey],
|
||||
adminUser: SSH_USER,
|
||||
});
|
||||
|
||||
// Prepare boot artifacts
|
||||
log("Preparing boot artifacts (iPXE, kernel, initrd)...");
|
||||
|
||||
// iPXE UEFI binary
|
||||
const ipxeSrc = "/usr/share/ipxe/ipxe-snponly-x86_64.efi";
|
||||
const ipxeDest = join(config.tftpDir, "ipxe.efi");
|
||||
if (!existsSync(ipxeSrc)) {
|
||||
throw new Error(`iPXE not found: ${ipxeSrc}. Install: sudo dnf install ipxe-bootimgs-x86`);
|
||||
}
|
||||
copyFileSync(ipxeSrc, ipxeDest);
|
||||
|
||||
// Fedora kernel + initrd (cached across runs)
|
||||
const cacheDir = "/var/lib/libvirt/images/lab-pxe-cache";
|
||||
execSync(`mkdir -p "${cacheDir}"`, { stdio: "pipe" });
|
||||
|
||||
const kernel = join(cacheDir, `vmlinuz-${config.fedoraVersion}`);
|
||||
const initrd = join(cacheDir, `initrd-${config.fedoraVersion}.img`);
|
||||
|
||||
if (!existsSync(kernel)) {
|
||||
log(`Downloading Fedora ${config.fedoraVersion} kernel...`);
|
||||
execSync(`curl -# -L -f -o "${kernel}" "${config.fedoraMirror}/images/pxeboot/vmlinuz"`, { stdio: "inherit", timeout: 300_000 });
|
||||
} else {
|
||||
log("Fedora kernel cached");
|
||||
}
|
||||
if (!existsSync(initrd)) {
|
||||
log(`Downloading Fedora ${config.fedoraVersion} initrd...`);
|
||||
execSync(`curl -# -L -f -o "${initrd}" "${config.fedoraMirror}/images/pxeboot/initrd.img"`, { stdio: "inherit", timeout: 300_000 });
|
||||
} else {
|
||||
log("Fedora initrd cached");
|
||||
}
|
||||
|
||||
copyFileSync(kernel, join(config.httpDir, "vmlinuz"));
|
||||
copyFileSync(initrd, join(config.httpDir, "initrd.img"));
|
||||
|
||||
// Symlink iPXE into HTTP dir for UEFI HTTP Boot fallback
|
||||
try { symlinkSync(ipxeDest, join(config.httpDir, "ipxe.efi")); } catch { /* exists */ }
|
||||
|
||||
// Generate boot scripts
|
||||
const discoverKs = generateDiscoverKickstart(config);
|
||||
writeFileSync(join(config.httpDir, "discover.ks"), discoverKs);
|
||||
const bootIpxe = renderBootIpxe({ serverIp: config.serverIp, httpPort: config.httpPort });
|
||||
writeFileSync(join(config.httpDir, "boot.ipxe"), bootIpxe);
|
||||
|
||||
// Generate dnsmasq config
|
||||
generateDnsmasqConf(config);
|
||||
|
||||
// Start HTTP server
|
||||
const { app, state } = createApp(config);
|
||||
bastionApp = app;
|
||||
await app.listen({ port: config.httpPort, host: "0.0.0.0" });
|
||||
log(`Bastion HTTP server listening on :${HTTP_PORT}`);
|
||||
|
||||
// Start dnsmasq (fire-and-forget — it runs until killed)
|
||||
log("Starting dnsmasq (full DHCP mode)...");
|
||||
void startDnsmasq(config);
|
||||
// Give dnsmasq a moment to bind ports
|
||||
await sleep(1000);
|
||||
|
||||
// 4. Create blank PXE-bootable VM
|
||||
log("Creating PXE VM (blank disk, UEFI boot)...");
|
||||
createPxeVm({
|
||||
name: VM_NAME,
|
||||
memory: VM_MEMORY,
|
||||
vcpus: VM_VCPUS,
|
||||
diskSize: VM_DISK_GB,
|
||||
network: PXE_NETWORK_NAME,
|
||||
});
|
||||
|
||||
// Get the VM's MAC address (assigned by libvirt)
|
||||
const mac = getVmMac(VM_NAME);
|
||||
if (!mac) throw new Error("Could not determine VM MAC address");
|
||||
vmMac = mac;
|
||||
log(`VM MAC: ${vmMac}`);
|
||||
|
||||
// 5. Wait for discovery — the VM PXE boots and calls /api/discover
|
||||
log("Waiting for VM to PXE boot and discover...");
|
||||
type MachinesResponse = { discovered: Record<string, unknown> };
|
||||
await pollApi<MachinesResponse>(
|
||||
`http://${BASTION_IP}:${HTTP_PORT}/api/machines`,
|
||||
(data) => vmMac in data.discovered,
|
||||
DISCOVERY_TIMEOUT_MS,
|
||||
);
|
||||
log("VM discovered!");
|
||||
|
||||
// 6. Queue the machine for install
|
||||
log("Queueing machine for install...");
|
||||
const installRes = await fetch(`http://${BASTION_IP}:${HTTP_PORT}/api/install`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
mac: vmMac,
|
||||
hostname: VM_NAME,
|
||||
disk: "", // auto-detect
|
||||
role: "vanilla", // fastest — skip k3s
|
||||
}),
|
||||
});
|
||||
const installResult = await installRes.json();
|
||||
log(`Install queued: ${JSON.stringify(installResult)}`);
|
||||
|
||||
// 7. After discovery, the VM reboots (discovery kickstart does 'poweroff').
|
||||
// Wait a bit and then start it again for the install boot.
|
||||
log("Waiting for discovery reboot cycle...");
|
||||
await sleep(15_000);
|
||||
|
||||
// Force restart the VM (it should have shut down after discovery)
|
||||
rebootPxeVm(VM_NAME);
|
||||
|
||||
// 8. Wait for install to complete
|
||||
log("Waiting for install to complete (this takes 10-20 minutes)...");
|
||||
type LogsResponse = { status: string; progress: string; ip?: string };
|
||||
const finalState = await pollApi<LogsResponse>(
|
||||
`http://${BASTION_IP}:${HTTP_PORT}/api/logs/${encodeURIComponent(vmMac)}`,
|
||||
(data) => data.status === "installed" || data.progress === "error",
|
||||
INSTALL_TIMEOUT_MS,
|
||||
10_000,
|
||||
);
|
||||
|
||||
if (finalState.progress === "error") {
|
||||
// Grab logs for diagnostics
|
||||
const logsRes = await fetch(`http://${BASTION_IP}:${HTTP_PORT}/api/logs/${encodeURIComponent(vmMac)}`);
|
||||
const logs = await logsRes.json();
|
||||
log(`INSTALL FAILED. Last state: ${JSON.stringify(logs, null, 2)}`);
|
||||
throw new Error("Install failed — check logs above");
|
||||
}
|
||||
|
||||
vmIp = finalState.ip ?? "";
|
||||
log(`Install complete! VM IP: ${vmIp}`);
|
||||
|
||||
// 9. Wait for SSH
|
||||
log("Waiting for SSH access...");
|
||||
await waitForSsh(vmIp, SSH_USER, SSH_TIMEOUT_MS, sshKeyPath);
|
||||
|
||||
log("PXE provision test setup complete.");
|
||||
}, DISCOVERY_TIMEOUT_MS + INSTALL_TIMEOUT_MS + SSH_TIMEOUT_MS + 120_000); // total timeout
|
||||
|
||||
afterAll(async () => {
|
||||
log("Cleaning up...");
|
||||
|
||||
// Stop bastion
|
||||
if (bastionApp) {
|
||||
await bastionApp.close().catch(() => {});
|
||||
}
|
||||
|
||||
// Stop dnsmasq
|
||||
const { stopDnsmasq } = await import("../../src/bastion/src/services/dnsmasq.js");
|
||||
stopDnsmasq();
|
||||
|
||||
// Destroy VM
|
||||
destroyPxeVm(VM_NAME);
|
||||
|
||||
// Destroy network
|
||||
destroyPxeNetwork();
|
||||
|
||||
// Clean up test dir
|
||||
if (testDir) {
|
||||
rmSync(testDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it("machine was discovered with hardware info", async () => {
|
||||
const res = await fetch(`http://${BASTION_IP}:${HTTP_PORT}/api/machines`);
|
||||
const data = (await res.json()) as { discovered: Record<string, { cpu_cores: number; memory_gb: number }> };
|
||||
// After install, machine moves from discovered to installed — check installed
|
||||
const machines = await fetch(`http://${BASTION_IP}:${HTTP_PORT}/api/machines`);
|
||||
const all = (await machines.json()) as { installed: Record<string, { hostname: string }> };
|
||||
expect(all.installed[vmMac]).toBeDefined();
|
||||
expect(all.installed[vmMac].hostname).toBe(VM_NAME);
|
||||
});
|
||||
|
||||
it("machine is in installed state with IP", async () => {
|
||||
const res = await fetch(`http://${BASTION_IP}:${HTTP_PORT}/api/machines`);
|
||||
const data = (await res.json()) as { installed: Record<string, { ip: string; role: string }> };
|
||||
const machine = data.installed[vmMac];
|
||||
expect(machine).toBeDefined();
|
||||
expect(machine.ip).toMatch(/^\d+\.\d+\.\d+\.\d+$/);
|
||||
expect(machine.role).toBe("vanilla");
|
||||
});
|
||||
|
||||
it("progress stages were recorded", async () => {
|
||||
const res = await fetch(`http://${BASTION_IP}:${HTTP_PORT}/api/logs/${encodeURIComponent(vmMac)}`);
|
||||
const data = (await res.json()) as { status: string; progress: string };
|
||||
expect(data.status).toBe("installed");
|
||||
expect(data.progress).toBe("complete");
|
||||
});
|
||||
|
||||
it("log lines were captured", async () => {
|
||||
const res = await fetch(`http://${BASTION_IP}:${HTTP_PORT}/api/logs/${encodeURIComponent(vmMac)}`);
|
||||
const data = (await res.json()) as { log_total?: number; log_lines?: Array<{ line: string }> };
|
||||
// Should have at least some log lines from the log streamer
|
||||
expect(data.log_total).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("SSH works with admin user", () => {
|
||||
const result = sshExec(vmIp, SSH_USER, "whoami", { keyPath: sshKeyPath });
|
||||
expect(result.exitCode).toBe(0);
|
||||
expect(result.stdout.trim()).toBe(SSH_USER);
|
||||
});
|
||||
|
||||
it("admin user has sudo access", () => {
|
||||
const result = sshExec(vmIp, SSH_USER, "sudo whoami", { keyPath: sshKeyPath });
|
||||
expect(result.exitCode).toBe(0);
|
||||
expect(result.stdout.trim()).toBe("root");
|
||||
});
|
||||
|
||||
it("hostname is set correctly", () => {
|
||||
const result = sshExec(vmIp, SSH_USER, "hostname -f", { keyPath: sshKeyPath });
|
||||
expect(result.exitCode).toBe(0);
|
||||
expect(result.stdout.trim()).toContain(VM_NAME);
|
||||
});
|
||||
|
||||
it("provisioning metadata file exists", () => {
|
||||
const result = sshExec(vmIp, SSH_USER, "cat /etc/lab-provisioned", { keyPath: sshKeyPath });
|
||||
expect(result.exitCode).toBe(0);
|
||||
expect(result.stdout).toContain(`hostname: ${VM_NAME}`);
|
||||
expect(result.stdout).toContain("role: vanilla");
|
||||
expect(result.stdout).toContain(`bastion: ${BASTION_IP}`);
|
||||
});
|
||||
|
||||
it("SSH root login is key-only", () => {
|
||||
const result = sshExec(vmIp, SSH_USER, "sudo grep '^PermitRootLogin' /etc/ssh/sshd_config", { keyPath: sshKeyPath });
|
||||
expect(result.stdout).toContain("prohibit-password");
|
||||
});
|
||||
|
||||
it("password auth is disabled", () => {
|
||||
const result = sshExec(vmIp, SSH_USER, "sudo grep '^PasswordAuthentication' /etc/ssh/sshd_config", { keyPath: sshKeyPath });
|
||||
expect(result.stdout).toContain("no");
|
||||
});
|
||||
|
||||
it("EFI boot order has Fedora first (local disk before PXE)", () => {
|
||||
const result = sshExec(vmIp, SSH_USER, "sudo efibootmgr", { keyPath: sshKeyPath });
|
||||
expect(result.exitCode).toBe(0);
|
||||
// Boot order should start with the Fedora entry
|
||||
expect(result.stdout).toContain("BootOrder:");
|
||||
});
|
||||
|
||||
it("tmpfs mount for /tmp is configured", () => {
|
||||
const result = sshExec(vmIp, SSH_USER, "grep tmpfs /etc/fstab", { keyPath: sshKeyPath });
|
||||
expect(result.stdout).toContain("tmpfs /tmp");
|
||||
});
|
||||
|
||||
it("LVM volume group exists", () => {
|
||||
const result = sshExec(vmIp, SSH_USER, "sudo vgs labvg", { keyPath: sshKeyPath });
|
||||
expect(result.exitCode).toBe(0);
|
||||
expect(result.stdout).toContain("labvg");
|
||||
});
|
||||
|
||||
it("all expected LVM logical volumes exist", () => {
|
||||
const result = sshExec(vmIp, SSH_USER, "sudo lvs labvg --noheadings -o lv_name", { keyPath: sshKeyPath });
|
||||
expect(result.exitCode).toBe(0);
|
||||
const lvs = result.stdout.trim().split("\n").map((l: string) => l.trim());
|
||||
for (const expected of ["root", "var", "varlog", "swap", "home", "srv"]) {
|
||||
expect(lvs).toContain(expected);
|
||||
}
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user