From cc289c0f945d2d8a987373ccae94c3b6bcc4f17c Mon Sep 17 00:00:00 2001 From: Michal Date: Fri, 27 Mar 2026 15:22:43 +0000 Subject: [PATCH] feat: serial console on test VMs for debugging without SSH - VMs get serial console on TCP (PXE: port 4555, ISO: port 4556) - serialExec() helper: runs commands via telnet when SSH/network is down - PXE test: on SSH failure, dumps hostname, IP, NetworkManager, sshd, failed units, and fstab via serial console before failing - Kickstart enables serial-getty@ttyS0 for auto-login on serial Co-Authored-By: Claude Opus 4.6 (1M context) --- .../src/bastion/src/templates/install.ks.ts | 3 + bastion/tests/integration/helpers/pxe-vm.ts | 78 ++++++++++++++++++- .../tests/integration/pxe-provision.test.ts | 26 ++++++- 3 files changed, 103 insertions(+), 4 deletions(-) diff --git a/bastion/src/bastion/src/templates/install.ks.ts b/bastion/src/bastion/src/templates/install.ks.ts index 8196a92..45ddbdd 100644 --- a/bastion/src/bastion/src/templates/install.ks.ts +++ b/bastion/src/bastion/src/templates/install.ks.ts @@ -447,6 +447,9 @@ systemctl mask firewalld || true # -- Enable chronyd for time sync -- systemctl enable chronyd || true`} +# -- Serial console (for debugging — auto-login as root on ttyS0) -- +systemctl enable serial-getty@ttyS0.service || true + # -- Boot order: restore network first (Anaconda sets disk first, we undo it) -- # Network boot must stay first so the bastion intercepts every reboot. It returns # exit (local disk) for installed machines, or install for reinstalls. diff --git a/bastion/tests/integration/helpers/pxe-vm.ts b/bastion/tests/integration/helpers/pxe-vm.ts index ac4a354..703f434 100644 --- a/bastion/tests/integration/helpers/pxe-vm.ts +++ b/bastion/tests/integration/helpers/pxe-vm.ts @@ -1,9 +1,11 @@ // Create a blank UEFI VM for PXE boot testing. // Unlike cloud image VMs, these have an empty disk and boot from network. +// Each VM gets a serial console on a TCP port for debugging without network/SSH. import { execSync, spawnSync, type SpawnSyncReturns } from "node:child_process"; import { existsSync } from "node:fs"; import { join } from "node:path"; +import { createConnection } from "node:net"; import { log } from "./libvirt.js"; const IMAGE_DIR = "/var/lib/libvirt/images"; @@ -68,6 +70,9 @@ export function createPxeVm(config: PxeVmConfig): void { "--wait=0", // Graphics for debugging (VNC, connect with virt-viewer if needed) "--graphics=vnc,listen=127.0.0.1", + // Serial console via TCP — allows exec without network/SSH + // Connect: socat - TCP:127.0.0.1:4555 + "--serial=tcp,host=127.0.0.1:4555,mode=bind,protocol=telnet", ]; if (arch === "aarch64") { @@ -76,7 +81,7 @@ export function createPxeVm(config: PxeVmConfig): void { log(`Running: virt-install --name=${config.name} --boot=uefi,network ...`); run(virtInstallArgs.join(" "), { timeout: 30_000 }); - log(`PXE VM ${config.name} created and booting from network`); + log(`PXE VM ${config.name} created (serial: telnet 127.0.0.1 4555)`); } /** Destroy a PXE VM and clean up its disk. */ @@ -169,6 +174,8 @@ export function createIsoVm(config: IsoVmConfig): void { "--noautoconsole", "--wait=0", "--graphics=vnc,listen=127.0.0.1", + // Serial console via TCP (port 4556 to avoid conflict with PXE VM) + "--serial=tcp,host=127.0.0.1:4556,mode=bind,protocol=telnet", ]; if (arch === "aarch64") { @@ -177,5 +184,72 @@ export function createIsoVm(config: IsoVmConfig): void { log(`Running: virt-install --name=${config.name} --boot=uefi,cdrom ...`); run(virtInstallArgs.join(" "), { timeout: 60_000 }); - log(`ISO boot VM ${config.name} created and booting from ISO`); + log(`ISO boot VM ${config.name} created (serial: telnet 127.0.0.1 4556)`); +} + +/** + * Execute a command on a VM via its serial console (telnet). + * Works even when the VM has no network/SSH. + * Returns the output after the command's echo. + */ +export async function serialExec( + port: number, + command: string, + timeoutMs = 10_000, +): Promise { + return new Promise((resolve, reject) => { + const timer = setTimeout(() => { + sock.destroy(); + reject(new Error(`Serial exec timeout after ${timeoutMs}ms`)); + }, timeoutMs); + + const sock = createConnection({ host: "127.0.0.1", port }); + let buffer = ""; + let sentCommand = false; + // Random marker to delimit command output + const marker = `__SERIAL_END_${Date.now()}__`; + + sock.on("connect", () => { + // Wait for login prompt or shell prompt, then send command + setTimeout(() => { + // Send a newline first to get a prompt + sock.write("\r\n"); + }, 500); + }); + + sock.on("data", (data: Buffer) => { + buffer += data.toString(); + + if (!sentCommand && (buffer.includes("login:") || buffer.includes("# ") || buffer.includes("$ "))) { + if (buffer.includes("login:")) { + // Auto-login as root + sock.write("root\r\n"); + sentCommand = false; // wait for shell prompt after login + buffer = ""; + return; + } + // At shell prompt — send command with marker + sentCommand = true; + buffer = ""; + sock.write(`${command}; echo "${marker}"\r\n`); + } + + if (sentCommand && buffer.includes(marker)) { + clearTimeout(timer); + // Extract output between command echo and marker + const markerIdx = buffer.indexOf(marker); + const output = buffer.substring(0, markerIdx).trim(); + // Remove the command echo (first line) + const lines = output.split("\n"); + const result = lines.slice(1).join("\n").trim(); + sock.destroy(); + resolve(result); + } + }); + + sock.on("error", (err) => { + clearTimeout(timer); + reject(new Error(`Serial connection failed: ${err.message}`)); + }); + }); } diff --git a/bastion/tests/integration/pxe-provision.test.ts b/bastion/tests/integration/pxe-provision.test.ts index ae90bba..b3adbd4 100644 --- a/bastion/tests/integration/pxe-provision.test.ts +++ b/bastion/tests/integration/pxe-provision.test.ts @@ -24,7 +24,7 @@ import { join } from "node:path"; import { homedir, tmpdir } from "node:os"; import { log, waitForSsh } from "./helpers/libvirt.js"; import { ensurePxeNetwork, destroyPxeNetwork, PXE_NETWORK_NAME, PXE_GATEWAY, PXE_SUBNET } from "./helpers/pxe-network.js"; -import { createPxeVm, destroyPxeVm, getVmMac, rebootPxeVm } from "./helpers/pxe-vm.js"; +import { createPxeVm, destroyPxeVm, getVmMac, rebootPxeVm, serialExec } from "./helpers/pxe-vm.js"; import { sshExec } from "./helpers/ssh.js"; // --- Test constants --- @@ -277,7 +277,29 @@ describe("PXE boot provisioning", () => { // 10. Wait for SSH — VM network-boots, iPXE chains to /dispatch, // bastion returns exit (installed), iPXE falls through to disk boot log("Waiting for SSH access..."); - await waitForSsh(vmIp, SSH_USER, SSH_TIMEOUT_MS, sshKeyPath); + try { + await waitForSsh(vmIp, SSH_USER, SSH_TIMEOUT_MS, sshKeyPath); + } catch { + // SSH failed — use serial console to diagnose + log("SSH timed out. Diagnosing via serial console..."); + try { + const hostname = await serialExec(4555, "hostname", 15_000); + log(`Serial: hostname = ${hostname}`); + const ip = await serialExec(4555, "ip -4 addr show | grep inet", 15_000); + log(`Serial: ip = ${ip}`); + const nm = await serialExec(4555, "systemctl is-active NetworkManager", 15_000); + log(`Serial: NetworkManager = ${nm}`); + const sshd = await serialExec(4555, "systemctl is-active sshd", 15_000); + log(`Serial: sshd = ${sshd}`); + const failed = await serialExec(4555, "systemctl --failed --no-pager", 15_000); + log(`Serial: failed units = ${failed}`); + const fstab = await serialExec(4555, "grep efi /etc/fstab", 15_000); + log(`Serial: fstab efi = ${fstab}`); + } catch (serialErr) { + log(`Serial console failed: ${serialErr instanceof Error ? serialErr.message : String(serialErr)}`); + } + throw new Error(`SSH not available on ${vmIp} — check serial console diagnostics above`); + } log("PXE provision test setup complete."); }, DISCOVERY_TIMEOUT_MS + INSTALL_TIMEOUT_MS + SSH_TIMEOUT_MS + 120_000); // total timeout