fix: PXE boot debugging — bisect root cause, syslog logging, serial console #3

Merged
michal merged 31 commits from wip/ks-debugging into main 2026-03-29 00:50:05 +00:00
2 changed files with 76 additions and 13 deletions
Showing only changes of commit 7cfd8fe1b8 - Show all commits

View File

@@ -1,6 +1,8 @@
// CLI command: init bastion standalone start // CLI command: init bastion standalone start
// Start the bastion server (HTTP + dnsmasq). // Start the bastion server (HTTP + dnsmasq), daemonized by default.
import { spawn, type ChildProcess } from "node:child_process";
import { existsSync, readFileSync } from "node:fs";
import type { Command } from "commander"; import type { Command } from "commander";
import { startBastion } from "@lab/bastion"; import { startBastion } from "@lab/bastion";
@@ -18,6 +20,7 @@ export function registerStartCommand(parent: Command): void {
.option("--locale <locale>", "Locale", "en_GB.UTF-8") .option("--locale <locale>", "Locale", "en_GB.UTF-8")
.option("--skip-dnsmasq", "Skip starting dnsmasq (for testing)") .option("--skip-dnsmasq", "Skip starting dnsmasq (for testing)")
.option("--skip-artifacts", "Skip downloading boot artifacts (for testing)") .option("--skip-artifacts", "Skip downloading boot artifacts (for testing)")
.option("--foreground", "Run in foreground (default: daemonize)")
.action(async (opts: { .action(async (opts: {
port: string; port: string;
dir: string; dir: string;
@@ -29,18 +32,74 @@ export function registerStartCommand(parent: Command): void {
locale: string; locale: string;
skipDnsmasq?: boolean; skipDnsmasq?: boolean;
skipArtifacts?: boolean; skipArtifacts?: boolean;
foreground?: boolean;
}) => { }) => {
await startBastion({ if (opts.foreground === true) {
httpPort: parseInt(opts.port, 10), // Run in foreground
bastionDir: opts.dir, await startBastion({
domain: opts.domain, httpPort: parseInt(opts.port, 10),
dhcpMode: opts.dhcpMode as "proxy" | "full", bastionDir: opts.dir,
fedoraVersion: opts.fedora, domain: opts.domain,
arch: opts.arch, dhcpMode: opts.dhcpMode as "proxy" | "full",
timezone: opts.timezone, fedoraVersion: opts.fedora,
locale: opts.locale, arch: opts.arch,
skipDnsmasq: opts.skipDnsmasq, timezone: opts.timezone,
skipArtifacts: opts.skipArtifacts, locale: opts.locale,
skipDnsmasq: opts.skipDnsmasq,
skipArtifacts: opts.skipArtifacts,
});
return;
}
// Daemonize: spawn ourselves with --foreground and detach
const logFile = `${opts.dir}/bastion.log`;
const args = process.argv.slice(1);
// Add --foreground flag
args.push("--foreground");
const child: ChildProcess = spawn(process.argv[0] ?? "lab", args, {
detached: true,
stdio: ["ignore", "pipe", "pipe"],
});
// Collect initial output to confirm startup
let output = "";
const timeout = setTimeout(() => {
child.stdout?.removeAllListeners();
child.stderr?.removeAllListeners();
child.unref();
console.log(`Bastion starting in background (PID ${child.pid})`);
console.log(`Log: ${logFile}`);
process.exit(0);
}, 3000);
child.stdout?.on("data", (data: Buffer) => {
output += data.toString();
process.stdout.write(data);
if (output.includes("Waiting for PXE boot requests")) {
clearTimeout(timeout);
child.stdout?.removeAllListeners();
child.stderr?.removeAllListeners();
child.unref();
// Check PID file
const pidFile = `${opts.dir}/bastion.pid`;
const pid = existsSync(pidFile) ? readFileSync(pidFile, "utf-8").trim() : String(child.pid);
console.log("");
console.log(`Bastion running in background (PID ${pid})`);
console.log(`Log: ${logFile}`);
process.exit(0);
}
});
child.stderr?.on("data", (data: Buffer) => {
process.stderr.write(data);
});
child.on("exit", (code) => {
clearTimeout(timeout);
console.error(`Bastion exited with code ${code}`);
process.exit(code ?? 1);
}); });
}); });
} }

View File

@@ -5,9 +5,13 @@ import { readFileSync, existsSync, statSync } from "node:fs";
import type { Command } from "commander"; import type { Command } from "commander";
import type { BastionState } from "@lab/shared"; import type { BastionState } from "@lab/shared";
import { execSync } from "node:child_process";
function isProcessAlive(pid: number): boolean { function isProcessAlive(pid: number): boolean {
try { try {
process.kill(pid, 0); // process.kill(pid, 0) fails for root-owned processes when run as non-root
// Use kill -0 which works across users, or check /proc
execSync(`kill -0 ${pid} 2>/dev/null || test -d /proc/${pid}`, { stdio: "pipe" });
return true; return true;
} catch { } catch {
return false; return false;