feat: install logging, error trapping, PXE/ISO integration tests
Some checks failed
CI/CD / lint (pull_request) Failing after 13s
CI/CD / test (pull_request) Failing after 10s
CI/CD / typecheck (pull_request) Failing after 36s
CI/CD / build (pull_request) Has been skipped
CI/CD / publish-rpm (pull_request) Has been skipped
CI/CD / publish-deb (pull_request) Has been skipped
Some checks failed
CI/CD / lint (pull_request) Failing after 13s
CI/CD / test (pull_request) Failing after 10s
CI/CD / typecheck (pull_request) Failing after 36s
CI/CD / build (pull_request) Has been skipped
CI/CD / publish-rpm (pull_request) Has been skipped
CI/CD / publish-deb (pull_request) Has been skipped
Kickstart installs on real hardware failed silently — no error reporting, only 3 progress callbacks, zero log streaming. This overhaul makes every install fully observable. Kickstart improvements: - Error trapping in %pre and %post (trap ERR sends failure details to bastion) - 12+ granular progress stages (was 3): SSH, hostname, k3s prep, EFI boot, metadata - Background log streamer: tails %post output and batch-sends to /api/log - bastion_log() function for explicit log lines from kickstart scripts Bastion API: - POST /api/log — receives raw log lines from kickstart (single or batch) - InstallLogBuffer — per-MAC ring buffer (2000 lines) + file persistence - GET /api/logs/:mac — now returns log_lines + log_total alongside stages - SSE /api/logs/:mac/follow — uses named events (event: stage vs event: log) - Progress events forwarded to labd via bastion-progress WebSocket message - Post-provision k3s logs routed through progressBus (was console-only) dnsmasq fixes found during VM testing: - HTTP Boot filename: ipxe-real.efi → ipxe.efi (leftover from old 2-stage approach) - pxe-service directives: only in proxy mode (breaks OVMF PXE in full mode) - PXEClient vendor class echo for UEFI firmware compatibility Integration tests: - PXE boot test: blank UEFI VM → dnsmasq → HTTP Boot → iPXE → bastion → install - ISO boot test: blank VM boots from bastion-generated ISO → same flow - Shared helpers: pxe-network (no DHCP, nftables fix), pxe-vm (UEFI + ISO boot) - test-provision.sh: runs both PXE + ISO tests with prerequisite checks - 250GB sparse QCOW2 disk (LVM layout needs ~204GB) 201 unit tests passing (11 new). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
219
bastion/tests/integration/helpers/libvirt.ts
Normal file
219
bastion/tests/integration/helpers/libvirt.ts
Normal file
@@ -0,0 +1,219 @@
|
||||
// Libvirt VM lifecycle management for integration tests.
|
||||
|
||||
import { execSync, spawnSync, type SpawnSyncReturns } from "node:child_process";
|
||||
import { existsSync, mkdirSync, writeFileSync, unlinkSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
|
||||
const IMAGE_DIR = "/var/lib/libvirt/images";
|
||||
// Cloud-init ISOs must be in a path accessible to the host's libvirtd,
|
||||
// so we use the shared images directory (not /tmp which may be container-only).
|
||||
const CLOUD_INIT_DIR = "/var/lib/libvirt/images/lab-cloud-init";
|
||||
|
||||
// When running as root (inside container or via sudo), don't prefix with sudo.
|
||||
const IS_ROOT = process.getuid?.() === 0;
|
||||
|
||||
/** Run a shell command, prefixing with sudo if not root. */
|
||||
function run(cmd: string, opts?: { timeout?: number }): string {
|
||||
const full = IS_ROOT ? cmd : `sudo ${cmd}`;
|
||||
return execSync(full, { encoding: "utf-8", stdio: "pipe", timeout: opts?.timeout ?? 60_000 });
|
||||
}
|
||||
|
||||
/** Spawn a command, prefixing args with sudo if not root. */
|
||||
function virsh(...args: string[]): SpawnSyncReturns<string> {
|
||||
const cmd = IS_ROOT ? "virsh" : "sudo";
|
||||
const finalArgs = IS_ROOT ? args : ["virsh", ...args];
|
||||
return spawnSync(cmd, finalArgs, { encoding: "utf-8", stdio: "pipe", timeout: 30_000 });
|
||||
}
|
||||
|
||||
export interface VmConfig {
|
||||
name: string;
|
||||
memory: number; // MB
|
||||
vcpus: number;
|
||||
diskSize: number; // GB
|
||||
network: string; // libvirt network name
|
||||
cloudImageUrl: string;
|
||||
sshPubKey: string; // content of authorized key
|
||||
userData?: string; // custom cloud-init user-data
|
||||
}
|
||||
|
||||
export function log(msg: string): void {
|
||||
const ts = new Date().toISOString().slice(11, 19);
|
||||
console.log(` [${ts}] ${msg}`);
|
||||
}
|
||||
|
||||
/** Download a cloud image if not already cached. */
|
||||
export function ensureCloudImage(url: string, name: string): string {
|
||||
const dest = join(IMAGE_DIR, `${name}.qcow2`);
|
||||
if (existsSync(dest)) {
|
||||
log(`Cloud image cached: ${dest}`);
|
||||
return dest;
|
||||
}
|
||||
log(`Downloading cloud image: ${url}`);
|
||||
run(`curl -L -f -o "${dest}" "${url}"`, { timeout: 300_000 });
|
||||
return dest;
|
||||
}
|
||||
|
||||
/** Create a cloud-init ISO for a VM. */
|
||||
export function createCloudInitIso(vmName: string, config: VmConfig): string {
|
||||
mkdirSync(CLOUD_INIT_DIR, { recursive: true });
|
||||
const dir = join(CLOUD_INIT_DIR, vmName);
|
||||
mkdirSync(dir, { recursive: true });
|
||||
|
||||
const userData = config.userData ?? `#cloud-config
|
||||
hostname: ${vmName}
|
||||
manage_etc_hosts: true
|
||||
users:
|
||||
- default
|
||||
- name: fedora
|
||||
sudo: ALL=(ALL) NOPASSWD:ALL
|
||||
shell: /bin/bash
|
||||
ssh_authorized_keys:
|
||||
- ${config.sshPubKey}
|
||||
ssh_pwauth: false
|
||||
package_update: false
|
||||
packages:
|
||||
- curl
|
||||
- socat
|
||||
- conntrack-tools
|
||||
- ethtool
|
||||
- iptables-nft
|
||||
runcmd:
|
||||
- modprobe br_netfilter
|
||||
- modprobe overlay
|
||||
- |
|
||||
cat > /etc/sysctl.d/90-k3s.conf << 'EOF'
|
||||
net.bridge.bridge-nf-call-iptables = 1
|
||||
net.bridge.bridge-nf-call-ip6tables = 1
|
||||
net.ipv4.ip_forward = 1
|
||||
vm.panic_on_oom = 0
|
||||
vm.overcommit_memory = 1
|
||||
kernel.panic = 10
|
||||
kernel.panic_on_oops = 1
|
||||
EOF
|
||||
- sysctl --system
|
||||
- swapoff -a
|
||||
- systemctl disable --now firewalld 2>/dev/null || true
|
||||
- systemctl disable --now ufw 2>/dev/null || true
|
||||
`;
|
||||
|
||||
const metaData = `instance-id: ${vmName}\nlocal-hostname: ${vmName}\n`;
|
||||
|
||||
writeFileSync(join(dir, "user-data"), userData);
|
||||
writeFileSync(join(dir, "meta-data"), metaData);
|
||||
|
||||
const isoPath = join(CLOUD_INIT_DIR, `${vmName}-cloud-init.iso`);
|
||||
execSync(
|
||||
`genisoimage -output "${isoPath}" -volid cidata -joliet -rock "${dir}/user-data" "${dir}/meta-data" 2>/dev/null || ` +
|
||||
`mkisofs -output "${isoPath}" -volid cidata -joliet -rock "${dir}/user-data" "${dir}/meta-data" 2>/dev/null || ` +
|
||||
`xorrisofs -output "${isoPath}" -volid cidata -joliet -rock "${dir}/user-data" "${dir}/meta-data"`,
|
||||
{ stdio: "pipe" },
|
||||
);
|
||||
|
||||
return isoPath;
|
||||
}
|
||||
|
||||
/** Create and start a VM from a cloud image with cloud-init. */
|
||||
export function createVm(config: VmConfig): void {
|
||||
destroyVm(config.name);
|
||||
|
||||
log(`Creating VM: ${config.name} (${config.memory}MB RAM, ${config.vcpus} vCPU, ${config.diskSize}GB disk)`);
|
||||
|
||||
const baseImage = ensureCloudImage(config.cloudImageUrl, `${config.name}-base`);
|
||||
const diskPath = join(IMAGE_DIR, `${config.name}.qcow2`);
|
||||
|
||||
run(`cp "${baseImage}" "${diskPath}"`);
|
||||
run(`qemu-img resize "${diskPath}" ${config.diskSize}G`);
|
||||
|
||||
const cloudInitIso = createCloudInitIso(config.name, config);
|
||||
|
||||
const virtInstallArgs = [
|
||||
"virt-install",
|
||||
`--name=${config.name}`,
|
||||
`--memory=${config.memory}`,
|
||||
`--vcpus=${config.vcpus}`,
|
||||
`--disk=path=${diskPath},format=qcow2`,
|
||||
`--disk=path=${cloudInitIso},device=cdrom`,
|
||||
`--network=network=${config.network},model=virtio`,
|
||||
"--os-variant=generic",
|
||||
"--import",
|
||||
"--noautoconsole",
|
||||
"--wait=0",
|
||||
];
|
||||
|
||||
log(`Running: virt-install --name=${config.name} ...`);
|
||||
run(virtInstallArgs.join(" "));
|
||||
log(`VM ${config.name} created and starting`);
|
||||
}
|
||||
|
||||
/** Destroy a VM and remove its storage. */
|
||||
export function destroyVm(name: string): void {
|
||||
const result = virsh("dominfo", name);
|
||||
if (result.status !== 0) return;
|
||||
|
||||
log(`Destroying VM: ${name}`);
|
||||
virsh("destroy", name);
|
||||
virsh("undefine", name, "--remove-all-storage");
|
||||
|
||||
const isoPath = join(CLOUD_INIT_DIR, `${name}-cloud-init.iso`);
|
||||
try { unlinkSync(isoPath); } catch { /* ignore */ }
|
||||
}
|
||||
|
||||
/** Get the IP address of a running VM. */
|
||||
export function getVmIp(name: string): string | null {
|
||||
try {
|
||||
// Try with agent first, then without
|
||||
let output = virsh("domifaddr", name, "--source", "agent").stdout;
|
||||
if (!output || !output.includes(".")) {
|
||||
output = virsh("domifaddr", name).stdout;
|
||||
}
|
||||
const match = output.match(/(\d+\.\d+\.\d+\.\d+)/);
|
||||
return match ? match[1] : null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Wait for a VM to get an IP address. */
|
||||
export async function waitForVmIp(name: string, timeoutMs: number): Promise<string> {
|
||||
const start = Date.now();
|
||||
while (Date.now() - start < timeoutMs) {
|
||||
const ip = getVmIp(name);
|
||||
if (ip) {
|
||||
log(`VM ${name} got IP: ${ip}`);
|
||||
return ip;
|
||||
}
|
||||
await sleep(2000);
|
||||
}
|
||||
throw new Error(`VM ${name} did not get an IP within ${timeoutMs}ms`);
|
||||
}
|
||||
|
||||
/** Wait for SSH to become available on a host. */
|
||||
export async function waitForSsh(
|
||||
ip: string,
|
||||
user: string,
|
||||
timeoutMs: number,
|
||||
keyPath?: string,
|
||||
): Promise<void> {
|
||||
const start = Date.now();
|
||||
while (Date.now() - start < timeoutMs) {
|
||||
const result = spawnSync("ssh", [
|
||||
"-o", "StrictHostKeyChecking=no",
|
||||
"-o", "ConnectTimeout=3",
|
||||
"-o", "BatchMode=yes",
|
||||
...(keyPath ? ["-i", keyPath] : []),
|
||||
`${user}@${ip}`,
|
||||
"echo ok",
|
||||
], { encoding: "utf-8", stdio: "pipe", timeout: 10_000 });
|
||||
|
||||
if (result.status === 0 && result.stdout.includes("ok")) {
|
||||
log(`SSH ready on ${ip}`);
|
||||
return;
|
||||
}
|
||||
await sleep(3000);
|
||||
}
|
||||
throw new Error(`SSH not available on ${ip} within ${timeoutMs}ms`);
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((r) => setTimeout(r, ms));
|
||||
}
|
||||
Reference in New Issue
Block a user