Files
lab/bastion/tests/integration/iso-provision.test.ts

324 lines
12 KiB
TypeScript
Raw Normal View History

feat: install logging, error trapping, PXE/ISO integration tests Kickstart installs on real hardware failed silently — no error reporting, only 3 progress callbacks, zero log streaming. This overhaul makes every install fully observable. Kickstart improvements: - Error trapping in %pre and %post (trap ERR sends failure details to bastion) - 12+ granular progress stages (was 3): SSH, hostname, k3s prep, EFI boot, metadata - Background log streamer: tails %post output and batch-sends to /api/log - bastion_log() function for explicit log lines from kickstart scripts Bastion API: - POST /api/log — receives raw log lines from kickstart (single or batch) - InstallLogBuffer — per-MAC ring buffer (2000 lines) + file persistence - GET /api/logs/:mac — now returns log_lines + log_total alongside stages - SSE /api/logs/:mac/follow — uses named events (event: stage vs event: log) - Progress events forwarded to labd via bastion-progress WebSocket message - Post-provision k3s logs routed through progressBus (was console-only) dnsmasq fixes found during VM testing: - HTTP Boot filename: ipxe-real.efi → ipxe.efi (leftover from old 2-stage approach) - pxe-service directives: only in proxy mode (breaks OVMF PXE in full mode) - PXEClient vendor class echo for UEFI firmware compatibility Integration tests: - PXE boot test: blank UEFI VM → dnsmasq → HTTP Boot → iPXE → bastion → install - ISO boot test: blank VM boots from bastion-generated ISO → same flow - Shared helpers: pxe-network (no DHCP, nftables fix), pxe-vm (UEFI + ISO boot) - test-provision.sh: runs both PXE + ISO tests with prerequisite checks - 250GB sparse QCOW2 disk (LVM layout needs ~204GB) 201 unit tests passing (11 new). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-26 22:26:33 +00:00
// Integration test: boot ISO provisioning flow (for machines without PXE support).
//
// This test validates the ISO boot chain:
// 1. Bastion generates a boot ISO containing iPXE + embedded kernel/initrd
// 2. VM boots from the ISO (CD-ROM, not PXE)
// 3. iPXE loads from ISO, does DHCP, chains to bastion
// 4. Normal discover -> install flow follows
//
// This simulates machines like the MinisForum R1 that have no UEFI PXE ROM.
//
// Prerequisites: same as PXE test + xorriso, mtools
// Run: sudo pnpm run test:integration:iso
import { describe, it, expect, beforeAll, afterAll } from "vitest";
import { readFileSync, existsSync } from "node:fs";
import { join } from "node:path";
import { homedir, tmpdir } from "node:os";
import { mkdirSync, rmSync } from "node:fs";
import { log, waitForSsh } from "./helpers/libvirt.js";
import { ensurePxeNetwork, destroyPxeNetwork, PXE_NETWORK_NAME, PXE_GATEWAY, PXE_SUBNET } from "./helpers/pxe-network.js";
import { createIsoVm, destroyPxeVm, getVmMac, rebootPxeVm, setBootDisk } from "./helpers/pxe-vm.js";
feat: install logging, error trapping, PXE/ISO integration tests Kickstart installs on real hardware failed silently — no error reporting, only 3 progress callbacks, zero log streaming. This overhaul makes every install fully observable. Kickstart improvements: - Error trapping in %pre and %post (trap ERR sends failure details to bastion) - 12+ granular progress stages (was 3): SSH, hostname, k3s prep, EFI boot, metadata - Background log streamer: tails %post output and batch-sends to /api/log - bastion_log() function for explicit log lines from kickstart scripts Bastion API: - POST /api/log — receives raw log lines from kickstart (single or batch) - InstallLogBuffer — per-MAC ring buffer (2000 lines) + file persistence - GET /api/logs/:mac — now returns log_lines + log_total alongside stages - SSE /api/logs/:mac/follow — uses named events (event: stage vs event: log) - Progress events forwarded to labd via bastion-progress WebSocket message - Post-provision k3s logs routed through progressBus (was console-only) dnsmasq fixes found during VM testing: - HTTP Boot filename: ipxe-real.efi → ipxe.efi (leftover from old 2-stage approach) - pxe-service directives: only in proxy mode (breaks OVMF PXE in full mode) - PXEClient vendor class echo for UEFI firmware compatibility Integration tests: - PXE boot test: blank UEFI VM → dnsmasq → HTTP Boot → iPXE → bastion → install - ISO boot test: blank VM boots from bastion-generated ISO → same flow - Shared helpers: pxe-network (no DHCP, nftables fix), pxe-vm (UEFI + ISO boot) - test-provision.sh: runs both PXE + ISO tests with prerequisite checks - 250GB sparse QCOW2 disk (LVM layout needs ~204GB) 201 unit tests passing (11 new). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-26 22:26:33 +00:00
import { sshExec } from "./helpers/ssh.js";
const VM_NAME = "lab-iso-test";
const VM_MEMORY = 4096;
const VM_VCPUS = 2;
const VM_DISK_GB = 250; // LVM layout needs ~204GB. QCOW2 is sparse.
const HTTP_PORT = 8098; // different from PXE test
const SSH_USER = "michal";
const BASTION_IP = PXE_GATEWAY;
const DHCP_RANGE_START = `${PXE_SUBNET}.100`;
const DHCP_RANGE_END = `${PXE_SUBNET}.200`;
const DISCOVERY_TIMEOUT_MS = 5 * 60_000;
const INSTALL_TIMEOUT_MS = 30 * 60_000;
const SSH_TIMEOUT_MS = 10 * 60_000; // 10 min: OVMF retries PXE/HTTP Boot before disk boot + OS startup
function findSshKey(): { pubKey: string; keyPath: string } {
const homes = [homedir()];
const sudoUser = process.env["SUDO_USER"];
if (sudoUser) homes.push(join("/home", sudoUser));
if (process.env["SSH_KEY_PATH"]) {
const keyPath = process.env["SSH_KEY_PATH"];
const pubPath = `${keyPath}.pub`;
if (existsSync(keyPath) && existsSync(pubPath)) {
return { pubKey: readFileSync(pubPath, "utf-8").trim(), keyPath };
}
}
for (const home of homes) {
for (const name of ["id_ed25519", "id_ecdsa", "id_rsa"]) {
const keyPath = join(home, ".ssh", name);
const pubPath = `${keyPath}.pub`;
if (existsSync(keyPath) && existsSync(pubPath)) {
return { pubKey: readFileSync(pubPath, "utf-8").trim(), keyPath };
}
}
}
throw new Error("No SSH key found — set SSH_KEY_PATH or ensure keys exist in ~/.ssh/");
}
function sleep(ms: number): Promise<void> {
return new Promise((r) => setTimeout(r, ms));
}
async function pollApi<T>(
url: string,
check: (data: T) => boolean,
timeoutMs: number,
intervalMs = 5000,
): Promise<T> {
const start = Date.now();
while (Date.now() - start < timeoutMs) {
try {
const res = await fetch(url);
if (res.ok) {
const data = (await res.json()) as T;
if (check(data)) return data;
}
} catch { /* bastion not ready yet */ }
await sleep(intervalMs);
}
throw new Error(`Timeout after ${timeoutMs}ms polling ${url}`);
}
describe("ISO boot provisioning", () => {
let bastionApp: ReturnType<typeof import("fastify").default>;
let testDir: string;
let vmMac: string;
let vmIp: string;
let sshKeyPath: string;
beforeAll(async () => {
const { pubKey, keyPath } = findSshKey();
sshKeyPath = keyPath;
// 1. Network
log("Setting up PXE test network (for ISO boot test)...");
ensurePxeNetwork();
// 2. Bastion dirs
testDir = join(tmpdir(), `lab-iso-test-${Date.now()}`);
mkdirSync(testDir, { recursive: true });
mkdirSync(join(testDir, "tftp"), { recursive: true });
mkdirSync(join(testDir, "http"), { recursive: true });
mkdirSync(join(testDir, "logs"), { recursive: true });
// 3. Start bastion with boot ISO generation
log("Starting bastion with boot ISO generation...");
const { createApp } = await import("../../src/bastion/src/server.js");
const { loadConfig } = await import("../../src/bastion/src/config.js");
const { generateDnsmasqConf, startDnsmasq } = await import("../../src/bastion/src/services/dnsmasq.js");
const { generateDiscoverKickstart } = await import("../../src/bastion/src/services/kickstart-generator.js");
const { renderBootIpxe } = await import("../../src/bastion/src/templates/boot.ipxe.js");
const { ensureBootIso } = await import("../../src/bastion/src/routes/boot-iso.js");
const fs = await import("node:fs");
const { execSync } = await import("node:child_process");
const config = loadConfig({
bastionDir: testDir,
httpPort: HTTP_PORT,
iface: "virbr-pxe",
serverIp: BASTION_IP,
network: `${PXE_SUBNET}.0`,
gateway: BASTION_IP,
dhcpMode: "full",
dhcpRangeStart: DHCP_RANGE_START,
dhcpRangeEnd: DHCP_RANGE_END,
domain: "iso-test.local",
sshKeys: [pubKey],
adminUser: SSH_USER,
});
// iPXE for TFTP (still needed — dnsmasq points PXE clients here)
const ipxeSrc = "/usr/share/ipxe/ipxe-snponly-x86_64.efi";
if (fs.existsSync(ipxeSrc)) {
fs.copyFileSync(ipxeSrc, join(config.tftpDir, "ipxe.efi"));
}
// Fedora kernel + initrd (cached)
const cacheDir = "/var/lib/libvirt/images/lab-pxe-cache";
execSync(`mkdir -p "${cacheDir}"`, { stdio: "pipe" });
const kernel = join(cacheDir, `vmlinuz-${config.fedoraVersion}`);
const initrd = join(cacheDir, `initrd-${config.fedoraVersion}.img`);
if (!fs.existsSync(kernel)) {
log(`Downloading Fedora ${config.fedoraVersion} kernel...`);
execSync(`curl -# -L -f -o "${kernel}" "${config.fedoraMirror}/images/pxeboot/vmlinuz"`, { stdio: "inherit", timeout: 300_000 });
}
if (!fs.existsSync(initrd)) {
log(`Downloading Fedora ${config.fedoraVersion} initrd...`);
execSync(`curl -# -L -f -o "${initrd}" "${config.fedoraMirror}/images/pxeboot/initrd.img"`, { stdio: "inherit", timeout: 300_000 });
}
fs.copyFileSync(kernel, join(config.httpDir, "vmlinuz"));
fs.copyFileSync(initrd, join(config.httpDir, "initrd.img"));
try { fs.symlinkSync(join(config.tftpDir, "ipxe.efi"), join(config.httpDir, "ipxe.efi")); } catch { /* exists */ }
// Generate boot scripts
const discoverKs = generateDiscoverKickstart(config);
fs.writeFileSync(join(config.httpDir, "discover.ks"), discoverKs);
const bootIpxe = renderBootIpxe({ serverIp: config.serverIp, httpPort: config.httpPort });
fs.writeFileSync(join(config.httpDir, "boot.ipxe"), bootIpxe);
// Generate the boot ISO — this is the key artifact for this test
log("Generating boot ISO...");
ensureBootIso(config);
const isoPath = join(config.httpDir, "boot.iso");
if (!fs.existsSync(isoPath)) {
throw new Error("Boot ISO was not generated");
}
const isoSize = fs.statSync(isoPath).size;
log(`Boot ISO generated: ${isoPath} (${(isoSize / 1024 / 1024).toFixed(1)}MB)`);
// dnsmasq config + start
generateDnsmasqConf(config);
const { app, state } = createApp(config);
bastionApp = app;
await app.listen({ port: config.httpPort, host: "0.0.0.0" });
log(`Bastion HTTP listening on :${HTTP_PORT}`);
log("Starting dnsmasq (full DHCP)...");
void startDnsmasq(config);
await sleep(1000);
// 4. Create VM that boots from the ISO (not PXE)
log("Creating ISO boot VM (blank disk, UEFI, CD-ROM boot)...");
createIsoVm({
name: VM_NAME,
memory: VM_MEMORY,
vcpus: VM_VCPUS,
diskSize: VM_DISK_GB,
network: PXE_NETWORK_NAME,
isoPath,
});
const mac = getVmMac(VM_NAME);
if (!mac) throw new Error("Could not determine VM MAC address");
vmMac = mac;
log(`VM MAC: ${vmMac}`);
// 5. Wait for discovery
log("Waiting for VM to boot ISO -> iPXE -> DHCP -> bastion -> discover...");
type MachinesResponse = { discovered: Record<string, unknown> };
await pollApi<MachinesResponse>(
`http://${BASTION_IP}:${HTTP_PORT}/api/machines`,
(data) => vmMac in data.discovered,
DISCOVERY_TIMEOUT_MS,
);
log("VM discovered via ISO boot!");
// 6. Queue install
log("Queueing machine for install...");
await fetch(`http://${BASTION_IP}:${HTTP_PORT}/api/install`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ mac: vmMac, hostname: VM_NAME, disk: "", role: "vanilla" }),
});
// 7. Reboot for install
log("Waiting for discovery reboot...");
await sleep(15_000);
rebootPxeVm(VM_NAME);
// 8. Wait for install
log("Waiting for install to complete (10-20 minutes)...");
type LogsResponse = { status: string; progress: string; ip?: string };
const finalState = await pollApi<LogsResponse>(
`http://${BASTION_IP}:${HTTP_PORT}/api/logs/${encodeURIComponent(vmMac)}`,
(data) => data.status === "installed" || data.progress === "error",
INSTALL_TIMEOUT_MS,
10_000,
);
if (finalState.progress === "error") {
const logsRes = await fetch(`http://${BASTION_IP}:${HTTP_PORT}/api/logs/${encodeURIComponent(vmMac)}`);
const logs = await logsRes.json();
log(`INSTALL FAILED. State: ${JSON.stringify(logs, null, 2)}`);
throw new Error("Install failed — check logs above");
}
vmIp = finalState.ip ?? "";
log(`Install complete! VM IP: ${vmIp}`);
// 9. Switch boot to disk
log("Switching VM boot order to disk...");
await sleep(10_000);
setBootDisk(VM_NAME);
// 10. Wait for SSH
feat: install logging, error trapping, PXE/ISO integration tests Kickstart installs on real hardware failed silently — no error reporting, only 3 progress callbacks, zero log streaming. This overhaul makes every install fully observable. Kickstart improvements: - Error trapping in %pre and %post (trap ERR sends failure details to bastion) - 12+ granular progress stages (was 3): SSH, hostname, k3s prep, EFI boot, metadata - Background log streamer: tails %post output and batch-sends to /api/log - bastion_log() function for explicit log lines from kickstart scripts Bastion API: - POST /api/log — receives raw log lines from kickstart (single or batch) - InstallLogBuffer — per-MAC ring buffer (2000 lines) + file persistence - GET /api/logs/:mac — now returns log_lines + log_total alongside stages - SSE /api/logs/:mac/follow — uses named events (event: stage vs event: log) - Progress events forwarded to labd via bastion-progress WebSocket message - Post-provision k3s logs routed through progressBus (was console-only) dnsmasq fixes found during VM testing: - HTTP Boot filename: ipxe-real.efi → ipxe.efi (leftover from old 2-stage approach) - pxe-service directives: only in proxy mode (breaks OVMF PXE in full mode) - PXEClient vendor class echo for UEFI firmware compatibility Integration tests: - PXE boot test: blank UEFI VM → dnsmasq → HTTP Boot → iPXE → bastion → install - ISO boot test: blank VM boots from bastion-generated ISO → same flow - Shared helpers: pxe-network (no DHCP, nftables fix), pxe-vm (UEFI + ISO boot) - test-provision.sh: runs both PXE + ISO tests with prerequisite checks - 250GB sparse QCOW2 disk (LVM layout needs ~204GB) 201 unit tests passing (11 new). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-26 22:26:33 +00:00
log("Waiting for SSH...");
await waitForSsh(vmIp, SSH_USER, SSH_TIMEOUT_MS, sshKeyPath);
log("ISO boot provision test setup complete.");
}, DISCOVERY_TIMEOUT_MS + INSTALL_TIMEOUT_MS + SSH_TIMEOUT_MS + 120_000);
afterAll(async () => {
log("Cleaning up ISO test...");
if (bastionApp) await bastionApp.close().catch(() => {});
const { stopDnsmasq } = await import("../../src/bastion/src/services/dnsmasq.js");
stopDnsmasq();
destroyPxeVm(VM_NAME);
destroyPxeNetwork();
if (testDir) rmSync(testDir, { recursive: true, force: true });
});
it("machine was discovered and installed", async () => {
const res = await fetch(`http://${BASTION_IP}:${HTTP_PORT}/api/machines`);
const data = (await res.json()) as { installed: Record<string, { hostname: string }> };
expect(data.installed[vmMac]).toBeDefined();
expect(data.installed[vmMac].hostname).toBe(VM_NAME);
});
it("progress stages were recorded", async () => {
const res = await fetch(`http://${BASTION_IP}:${HTTP_PORT}/api/logs/${encodeURIComponent(vmMac)}`);
const data = (await res.json()) as { status: string; progress: string };
expect(data.status).toBe("installed");
expect(data.progress).toBe("complete");
});
it("log lines were captured from kickstart", async () => {
const res = await fetch(`http://${BASTION_IP}:${HTTP_PORT}/api/logs/${encodeURIComponent(vmMac)}`);
const data = (await res.json()) as { log_total?: number };
expect(data.log_total).toBeGreaterThan(0);
});
it("SSH works", () => {
const result = sshExec(vmIp, SSH_USER, "whoami", { keyPath: sshKeyPath });
expect(result.exitCode).toBe(0);
expect(result.stdout.trim()).toBe(SSH_USER);
});
it("sudo works", () => {
const result = sshExec(vmIp, SSH_USER, "sudo whoami", { keyPath: sshKeyPath });
expect(result.exitCode).toBe(0);
expect(result.stdout.trim()).toBe("root");
});
it("hostname is correct", () => {
const result = sshExec(vmIp, SSH_USER, "hostname -f", { keyPath: sshKeyPath });
expect(result.exitCode).toBe(0);
expect(result.stdout.trim()).toContain(VM_NAME);
});
it("provisioning metadata exists", () => {
const result = sshExec(vmIp, SSH_USER, "cat /etc/lab-provisioned", { keyPath: sshKeyPath });
expect(result.exitCode).toBe(0);
expect(result.stdout).toContain(`hostname: ${VM_NAME}`);
expect(result.stdout).toContain("role: vanilla");
});
it("LVM layout is correct", () => {
const result = sshExec(vmIp, SSH_USER, "sudo lvs labvg --noheadings -o lv_name", { keyPath: sshKeyPath });
expect(result.exitCode).toBe(0);
const lvs = result.stdout.trim().split("\n").map((l: string) => l.trim());
for (const expected of ["root", "var", "varlog", "swap", "home", "srv"]) {
expect(lvs).toContain(expected);
}
});
});