fix: PXE boot debugging — bisect root cause, syslog logging, serial console #3

Merged
michal merged 31 commits from wip/ks-debugging into main 2026-03-29 00:50:05 +00:00
2 changed files with 71 additions and 10 deletions
Showing only changes of commit d01b675cca - Show all commits

View File

@@ -45,6 +45,47 @@ function symlinkSafe(target: string, linkPath: string): void {
} }
} }
function runCmd(cmd: string, args: string[]): boolean {
try {
execSync(`${cmd} ${args.join(" ")}`, { stdio: "pipe" });
return true;
} catch {
return false;
}
}
let fwZoneFlag = "";
let fwOpened = false;
function openFirewall(config: BastionConfig): void {
// Check if firewalld is running
if (!runCmd("firewall-cmd", ["--state"])) return;
// Detect zone for our interface
try {
const zone = execSync(`firewall-cmd --get-zone-of-interface=${config.iface} 2>/dev/null`, { encoding: "utf-8" }).trim();
if (zone) fwZoneFlag = `--zone=${zone}`;
} catch { /* use default zone */ }
const zf = fwZoneFlag ? [fwZoneFlag] : [];
logger.info(`Opening firewall ports (DHCP, TFTP, HTTP:${config.httpPort})...`);
runCmd("firewall-cmd", ["--quiet", ...zf, "--add-service=dhcp"]);
runCmd("firewall-cmd", ["--quiet", ...zf, "--add-service=tftp"]);
runCmd("firewall-cmd", ["--quiet", ...zf, `--add-port=${config.httpPort}/tcp`]);
runCmd("firewall-cmd", ["--quiet", ...zf, "--add-port=4011/udp"]);
fwOpened = true;
}
function closeFirewall(config: BastionConfig): void {
if (!fwOpened) return;
const zf = fwZoneFlag ? [fwZoneFlag] : [];
logger.info("Removing firewall rules...");
runCmd("firewall-cmd", ["--quiet", ...zf, "--remove-service=dhcp"]);
runCmd("firewall-cmd", ["--quiet", ...zf, "--remove-service=tftp"]);
runCmd("firewall-cmd", ["--quiet", ...zf, `--remove-port=${config.httpPort}/tcp`]);
runCmd("firewall-cmd", ["--quiet", ...zf, "--remove-port=4011/udp"]);
}
export async function startBastion(overrides: Partial<BastionConfig> = {}): Promise<void> { export async function startBastion(overrides: Partial<BastionConfig> = {}): Promise<void> {
// Load and populate config // Load and populate config
let config = loadConfig(overrides); let config = loadConfig(overrides);
@@ -135,6 +176,11 @@ export async function startBastion(overrides: Partial<BastionConfig> = {}): Prom
// Generate dnsmasq config // Generate dnsmasq config
generateDnsmasqConf(config); generateDnsmasqConf(config);
// Open firewall ports
if (!config.skipDnsmasq) {
openFirewall(config);
}
// Start HTTP server // Start HTTP server
const { app } = createApp(config); const { app } = createApp(config);
await app.listen({ port: config.httpPort, host: "0.0.0.0" }); await app.listen({ port: config.httpPort, host: "0.0.0.0" });
@@ -167,6 +213,7 @@ export async function startBastion(overrides: Partial<BastionConfig> = {}): Prom
const shutdown = async () => { const shutdown = async () => {
logger.info("Shutting down..."); logger.info("Shutting down...");
if (!config.skipDnsmasq) stopDnsmasq(); if (!config.skipDnsmasq) stopDnsmasq();
closeFirewall(config);
await app.close(); await app.close();
try { unlinkSync(pidFile); } catch { /* ignore */ } try { unlinkSync(pidFile); } catch { /* ignore */ }
logger.info(`State preserved in ${config.stateFile}`); logger.info(`State preserved in ${config.stateFile}`);

View File

@@ -1,7 +1,10 @@
// CLI command: provision reprovision // CLI command: provision reprovision
// Queue a machine for reinstall and attempt SSH reboot into PXE. // Queue a machine for reinstall and attempt SSH reboot into PXE.
import { execSync } from "node:child_process"; import { execFileSync } from "node:child_process";
import { existsSync } from "node:fs";
import { homedir } from "node:os";
import { join } from "node:path";
import type { Command } from "commander"; import type { Command } from "commander";
import type { BastionState } from "@lab/shared"; import type { BastionState } from "@lab/shared";
@@ -62,16 +65,27 @@ export function registerReprovisionCommand(parent: Command): void {
console.log(""); console.log("");
console.log(`Attempting SSH reboot into PXE (${effectiveUser}@${ip})...`); console.log(`Attempting SSH reboot into PXE (${effectiveUser}@${ip})...`);
try { // Find SSH key
const sshCmd = [ const realHome = process.env["SUDO_USER"]
"ssh", ? join("/home", process.env["SUDO_USER"])
"-o", "StrictHostKeyChecking=no", : homedir();
"-o", "ConnectTimeout=5", const keyPaths = [
`${effectiveUser}@${ip}`, join(realHome, ".ssh", "id_ed25519"),
'sudo efibootmgr 2>/dev/null; PXE_ENTRY=$(sudo efibootmgr | grep -iE "pxe|network|ipv4" | head -1 | grep -oP "Boot\\K[0-9A-F]+"); if [ -n "$PXE_ENTRY" ]; then sudo efibootmgr --bootnext "$PXE_ENTRY" && echo "PXE set as next boot" && sudo reboot; else echo "No PXE boot entry found, rebooting anyway..." && sudo reboot; fi', join(realHome, ".ssh", "id_rsa"),
].join(" "); join(realHome, ".ssh", "id_ecdsa"),
];
const sshKey = keyPaths.find(k => existsSync(k));
execSync(sshCmd, { stdio: "inherit" }); const sshArgs = [
"-o", "StrictHostKeyChecking=no",
"-o", "ConnectTimeout=10",
...(sshKey ? ["-i", sshKey] : []),
`${effectiveUser}@${ip}`,
'PXE_ENTRY=$(sudo efibootmgr | grep -iE "pxe|network|ipv4" | head -1 | grep -oP "Boot\\K[0-9A-F]+"); if [ -n "$PXE_ENTRY" ]; then sudo efibootmgr --bootnext "$PXE_ENTRY" && echo "PXE set as next boot" && sudo reboot; else echo "No PXE boot entry found, rebooting anyway..." && sudo reboot; fi',
];
try {
execFileSync("ssh", sshArgs, { stdio: "inherit" });
console.log(""); console.log("");
console.log("Machine is rebooting into PXE. Install will start automatically."); console.log("Machine is rebooting into PXE. Install will start automatically.");
} catch { } catch {