feat: serial console on test VMs for debugging without SSH
Some checks failed
CI/CD / typecheck (pull_request) Failing after 9s
CI/CD / test (pull_request) Failing after 9s
CI/CD / lint (pull_request) Failing after 21s
CI/CD / build (pull_request) Has been skipped
CI/CD / publish-rpm (pull_request) Has been skipped
CI/CD / publish-deb (pull_request) Has been skipped

- VMs get serial console on TCP (PXE: port 4555, ISO: port 4556)
- serialExec() helper: runs commands via telnet when SSH/network is down
- PXE test: on SSH failure, dumps hostname, IP, NetworkManager, sshd,
  failed units, and fstab via serial console before failing
- Kickstart enables serial-getty@ttyS0 for auto-login on serial

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Michal
2026-03-27 15:22:43 +00:00
parent ea7e437241
commit cc289c0f94
3 changed files with 103 additions and 4 deletions

View File

@@ -1,9 +1,11 @@
// Create a blank UEFI VM for PXE boot testing.
// Unlike cloud image VMs, these have an empty disk and boot from network.
// Each VM gets a serial console on a TCP port for debugging without network/SSH.
import { execSync, spawnSync, type SpawnSyncReturns } from "node:child_process";
import { existsSync } from "node:fs";
import { join } from "node:path";
import { createConnection } from "node:net";
import { log } from "./libvirt.js";
const IMAGE_DIR = "/var/lib/libvirt/images";
@@ -68,6 +70,9 @@ export function createPxeVm(config: PxeVmConfig): void {
"--wait=0",
// Graphics for debugging (VNC, connect with virt-viewer if needed)
"--graphics=vnc,listen=127.0.0.1",
// Serial console via TCP — allows exec without network/SSH
// Connect: socat - TCP:127.0.0.1:4555
"--serial=tcp,host=127.0.0.1:4555,mode=bind,protocol=telnet",
];
if (arch === "aarch64") {
@@ -76,7 +81,7 @@ export function createPxeVm(config: PxeVmConfig): void {
log(`Running: virt-install --name=${config.name} --boot=uefi,network ...`);
run(virtInstallArgs.join(" "), { timeout: 30_000 });
log(`PXE VM ${config.name} created and booting from network`);
log(`PXE VM ${config.name} created (serial: telnet 127.0.0.1 4555)`);
}
/** Destroy a PXE VM and clean up its disk. */
@@ -169,6 +174,8 @@ export function createIsoVm(config: IsoVmConfig): void {
"--noautoconsole",
"--wait=0",
"--graphics=vnc,listen=127.0.0.1",
// Serial console via TCP (port 4556 to avoid conflict with PXE VM)
"--serial=tcp,host=127.0.0.1:4556,mode=bind,protocol=telnet",
];
if (arch === "aarch64") {
@@ -177,5 +184,72 @@ export function createIsoVm(config: IsoVmConfig): void {
log(`Running: virt-install --name=${config.name} --boot=uefi,cdrom ...`);
run(virtInstallArgs.join(" "), { timeout: 60_000 });
log(`ISO boot VM ${config.name} created and booting from ISO`);
log(`ISO boot VM ${config.name} created (serial: telnet 127.0.0.1 4556)`);
}
/**
* Execute a command on a VM via its serial console (telnet).
* Works even when the VM has no network/SSH.
* Returns the output after the command's echo.
*/
export async function serialExec(
port: number,
command: string,
timeoutMs = 10_000,
): Promise<string> {
return new Promise((resolve, reject) => {
const timer = setTimeout(() => {
sock.destroy();
reject(new Error(`Serial exec timeout after ${timeoutMs}ms`));
}, timeoutMs);
const sock = createConnection({ host: "127.0.0.1", port });
let buffer = "";
let sentCommand = false;
// Random marker to delimit command output
const marker = `__SERIAL_END_${Date.now()}__`;
sock.on("connect", () => {
// Wait for login prompt or shell prompt, then send command
setTimeout(() => {
// Send a newline first to get a prompt
sock.write("\r\n");
}, 500);
});
sock.on("data", (data: Buffer) => {
buffer += data.toString();
if (!sentCommand && (buffer.includes("login:") || buffer.includes("# ") || buffer.includes("$ "))) {
if (buffer.includes("login:")) {
// Auto-login as root
sock.write("root\r\n");
sentCommand = false; // wait for shell prompt after login
buffer = "";
return;
}
// At shell prompt — send command with marker
sentCommand = true;
buffer = "";
sock.write(`${command}; echo "${marker}"\r\n`);
}
if (sentCommand && buffer.includes(marker)) {
clearTimeout(timer);
// Extract output between command echo and marker
const markerIdx = buffer.indexOf(marker);
const output = buffer.substring(0, markerIdx).trim();
// Remove the command echo (first line)
const lines = output.split("\n");
const result = lines.slice(1).join("\n").trim();
sock.destroy();
resolve(result);
}
});
sock.on("error", (err) => {
clearTimeout(timer);
reject(new Error(`Serial connection failed: ${err.message}`));
});
});
}

View File

@@ -24,7 +24,7 @@ import { join } from "node:path";
import { homedir, tmpdir } from "node:os";
import { log, waitForSsh } from "./helpers/libvirt.js";
import { ensurePxeNetwork, destroyPxeNetwork, PXE_NETWORK_NAME, PXE_GATEWAY, PXE_SUBNET } from "./helpers/pxe-network.js";
import { createPxeVm, destroyPxeVm, getVmMac, rebootPxeVm } from "./helpers/pxe-vm.js";
import { createPxeVm, destroyPxeVm, getVmMac, rebootPxeVm, serialExec } from "./helpers/pxe-vm.js";
import { sshExec } from "./helpers/ssh.js";
// --- Test constants ---
@@ -277,7 +277,29 @@ describe("PXE boot provisioning", () => {
// 10. Wait for SSH — VM network-boots, iPXE chains to /dispatch,
// bastion returns exit (installed), iPXE falls through to disk boot
log("Waiting for SSH access...");
await waitForSsh(vmIp, SSH_USER, SSH_TIMEOUT_MS, sshKeyPath);
try {
await waitForSsh(vmIp, SSH_USER, SSH_TIMEOUT_MS, sshKeyPath);
} catch {
// SSH failed — use serial console to diagnose
log("SSH timed out. Diagnosing via serial console...");
try {
const hostname = await serialExec(4555, "hostname", 15_000);
log(`Serial: hostname = ${hostname}`);
const ip = await serialExec(4555, "ip -4 addr show | grep inet", 15_000);
log(`Serial: ip = ${ip}`);
const nm = await serialExec(4555, "systemctl is-active NetworkManager", 15_000);
log(`Serial: NetworkManager = ${nm}`);
const sshd = await serialExec(4555, "systemctl is-active sshd", 15_000);
log(`Serial: sshd = ${sshd}`);
const failed = await serialExec(4555, "systemctl --failed --no-pager", 15_000);
log(`Serial: failed units = ${failed}`);
const fstab = await serialExec(4555, "grep efi /etc/fstab", 15_000);
log(`Serial: fstab efi = ${fstab}`);
} catch (serialErr) {
log(`Serial console failed: ${serialErr instanceof Error ? serialErr.message : String(serialErr)}`);
}
throw new Error(`SSH not available on ${vmIp} — check serial console diagnostics above`);
}
log("PXE provision test setup complete.");
}, DISCOVERY_TIMEOUT_MS + INSTALL_TIMEOUT_MS + SSH_TIMEOUT_MS + 120_000); // total timeout