fix: PXE boot debugging — bisect root cause, syslog logging, serial console #3
@@ -14,6 +14,8 @@ export function loadConfig(overrides: Partial<BastionConfig> = {}): BastionConfi
|
||||
const dhcpRangeStart = overrides.dhcpRangeStart ?? process.env["DHCP_RANGE_START"] ?? "";
|
||||
const dhcpRangeEnd = overrides.dhcpRangeEnd ?? process.env["DHCP_RANGE_END"] ?? "";
|
||||
|
||||
const syslogPort = overrides.syslogPort ?? parseInt(process.env["SYSLOG_PORT"] ?? "5514", 10);
|
||||
|
||||
const ubuntuVersion = overrides.ubuntuVersion ?? process.env["UBUNTU_VERSION"] ?? "26.04";
|
||||
const ubuntuMirror = overrides.ubuntuMirror ?? process.env["UBUNTU_MIRROR"]
|
||||
?? `https://releases.ubuntu.com/${ubuntuVersion}`;
|
||||
@@ -43,6 +45,7 @@ export function loadConfig(overrides: Partial<BastionConfig> = {}): BastionConfi
|
||||
gateway: overrides.gateway ?? "",
|
||||
sshKeys: overrides.sshKeys ?? [],
|
||||
adminUser: overrides.adminUser ?? "",
|
||||
syslogPort,
|
||||
skipDnsmasq: overrides.skipDnsmasq,
|
||||
skipArtifacts: overrides.skipArtifacts,
|
||||
labdUrl: overrides.labdUrl ?? process.env["LABD_URL"],
|
||||
|
||||
@@ -220,10 +220,11 @@ export async function startBastion(overrides: Partial<BastionConfig> = {}): Prom
|
||||
openFirewall(config);
|
||||
}
|
||||
|
||||
// Start HTTP server
|
||||
const { app, state } = createApp(config);
|
||||
// Start HTTP server + syslog listener
|
||||
const { app, state, syslog } = createApp(config);
|
||||
await app.listen({ port: config.httpPort, host: "0.0.0.0" });
|
||||
logger.info(`HTTP server listening on :${config.httpPort}`);
|
||||
syslog.start();
|
||||
|
||||
// Start dnsmasq (unless skipped)
|
||||
if (config.skipDnsmasq !== true) {
|
||||
@@ -310,6 +311,7 @@ export async function startBastion(overrides: Partial<BastionConfig> = {}): Prom
|
||||
// Graceful shutdown
|
||||
const shutdown = async (): Promise<void> => {
|
||||
logger.info("Shutting down...");
|
||||
syslog.stop();
|
||||
if (labdConn) labdConn.close();
|
||||
if (config.skipDnsmasq !== true) stopDnsmasq();
|
||||
closeFirewall(config);
|
||||
|
||||
@@ -6,13 +6,14 @@ import { mkdirSync, existsSync } from "node:fs";
|
||||
import type { BastionConfig } from "@lab/shared";
|
||||
import { StateManager } from "./services/state.js";
|
||||
import { InstallLogBuffer } from "./services/install-log.js";
|
||||
import { SyslogListener } from "./services/syslog-listener.js";
|
||||
import { logger } from "./services/logger.js";
|
||||
import { registerDispatchRoutes } from "./routes/dispatch.js";
|
||||
import { registerKickstartRoutes } from "./routes/kickstart.js";
|
||||
import { registerApiRoutes } from "./routes/api.js";
|
||||
|
||||
|
||||
export function createApp(config: BastionConfig): { app: ReturnType<typeof Fastify>; state: StateManager; installLog: InstallLogBuffer } {
|
||||
export function createApp(config: BastionConfig): { app: ReturnType<typeof Fastify>; state: StateManager; installLog: InstallLogBuffer; syslog: SyslogListener } {
|
||||
const app = Fastify({
|
||||
logger: false, // We use winston instead
|
||||
});
|
||||
@@ -21,6 +22,7 @@ export function createApp(config: BastionConfig): { app: ReturnType<typeof Fasti
|
||||
state.init();
|
||||
|
||||
const installLog = new InstallLogBuffer(config.bastionDir);
|
||||
const syslog = new SyslogListener(config.syslogPort, installLog, state);
|
||||
|
||||
// Serve static files (vmlinuz, initrd.img, iPXE binaries) from the HTTP directory
|
||||
mkdirSync(config.httpDir, { recursive: true });
|
||||
@@ -51,7 +53,7 @@ export function createApp(config: BastionConfig): { app: ReturnType<typeof Fasti
|
||||
logger.info(`HTTP: ${request.ip} ${request.method} ${request.url}`);
|
||||
});
|
||||
|
||||
return { app, state, installLog };
|
||||
return { app, state, installLog, syslog };
|
||||
}
|
||||
|
||||
export async function startServer(config: BastionConfig): Promise<void> {
|
||||
|
||||
@@ -36,6 +36,7 @@ export function generateInstallKickstart(
|
||||
locale: config.locale,
|
||||
serverIp: config.serverIp,
|
||||
httpPort: config.httpPort,
|
||||
syslogPort: config.syslogPort,
|
||||
sshKeys: config.sshKeys,
|
||||
adminUser: config.adminUser,
|
||||
};
|
||||
|
||||
99
bastion/src/bastion/src/services/syslog-listener.ts
Normal file
99
bastion/src/bastion/src/services/syslog-listener.ts
Normal file
@@ -0,0 +1,99 @@
|
||||
// UDP syslog listener for receiving Anaconda install logs.
|
||||
// Anaconda's `logging --host` sends RFC 3164 syslog over UDP.
|
||||
// We parse the messages and route them to InstallLogBuffer.
|
||||
|
||||
import { createSocket, type Socket } from "node:dgram";
|
||||
import type { InstallLogBuffer } from "./install-log.js";
|
||||
import type { StateManager } from "./state.js";
|
||||
import { logger } from "./logger.js";
|
||||
|
||||
/**
|
||||
* Parse a BSD syslog (RFC 3164) message.
|
||||
* Format: <PRI>TIMESTAMP HOSTNAME APP[PID]: MESSAGE
|
||||
* Anaconda messages look like: <13>Mar 28 19:32:01 anaconda[1234]: some message
|
||||
*/
|
||||
function parseSyslogLine(raw: string): { program: string; message: string } {
|
||||
// Strip priority: <NN>
|
||||
const noPri = raw.replace(/^<\d+>/, "");
|
||||
// Try to extract program and message after the timestamp + hostname
|
||||
// RFC 3164: "Mon DD HH:MM:SS HOSTNAME PROGRAM[PID]: MESSAGE"
|
||||
const match = noPri.match(/^\w+\s+\d+\s+[\d:]+\s+\S+\s+(\S+?)(?:\[\d+\])?:\s*(.*)/);
|
||||
if (match) {
|
||||
return { program: match[1], message: match[2] };
|
||||
}
|
||||
// Fallback: just return the whole line
|
||||
return { program: "unknown", message: noPri.trim() };
|
||||
}
|
||||
|
||||
export class SyslogListener {
|
||||
private socket: Socket | null = null;
|
||||
private port: number;
|
||||
private installLog: InstallLogBuffer;
|
||||
private state: StateManager;
|
||||
|
||||
constructor(port: number, installLog: InstallLogBuffer, state: StateManager) {
|
||||
this.port = port;
|
||||
this.installLog = installLog;
|
||||
this.state = state;
|
||||
}
|
||||
|
||||
/** Resolve a source IP to a MAC address using the install queue. */
|
||||
private resolveIpToMac(ip: string): string | null {
|
||||
const currentState = this.state.load();
|
||||
|
||||
// Check install queue — machines being installed have an IP from DHCP
|
||||
for (const [mac, entry] of Object.entries(currentState.install_queue)) {
|
||||
// The progress callback sends IP in "complete" detail, but during install
|
||||
// we need to match by what we know. Check if any progress mentions this IP.
|
||||
if (entry.progress_detail?.includes(ip)) return mac;
|
||||
}
|
||||
|
||||
// Check installed machines
|
||||
for (const [mac, info] of Object.entries(currentState.installed)) {
|
||||
if (info.ip === ip) return mac;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Resolve a MAC to the hostname from install queue or installed state. */
|
||||
private resolveHostname(mac: string): string {
|
||||
const s = this.state.load();
|
||||
return s.install_queue[mac]?.hostname ?? s.installed[mac]?.hostname ?? mac;
|
||||
}
|
||||
|
||||
start(): void {
|
||||
this.socket = createSocket("udp4");
|
||||
|
||||
this.socket.on("message", (msg, rinfo) => {
|
||||
const raw = msg.toString("utf-8").trim();
|
||||
if (!raw) return;
|
||||
|
||||
const { program, message } = parseSyslogLine(raw);
|
||||
const mac = this.resolveIpToMac(rinfo.address);
|
||||
|
||||
if (mac) {
|
||||
const hostname = this.resolveHostname(mac);
|
||||
const line = program !== "unknown" ? `[${program}] ${message}` : message;
|
||||
this.installLog.append(mac, [line], hostname);
|
||||
}
|
||||
// If we can't resolve the IP, we still log it for debugging
|
||||
// but don't store it in the install log buffer
|
||||
});
|
||||
|
||||
this.socket.on("error", (err) => {
|
||||
logger.error(`Syslog listener error: ${err.message}`);
|
||||
});
|
||||
|
||||
this.socket.bind(this.port, "0.0.0.0", () => {
|
||||
logger.info(`Syslog listener on UDP :${this.port}`);
|
||||
});
|
||||
}
|
||||
|
||||
stop(): void {
|
||||
if (this.socket) {
|
||||
this.socket.close();
|
||||
this.socket = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -14,6 +14,7 @@ export interface InstallKickstartParams {
|
||||
locale: string;
|
||||
serverIp: string;
|
||||
httpPort: number;
|
||||
syslogPort: number;
|
||||
sshKeys: string[];
|
||||
adminUser: string;
|
||||
}
|
||||
@@ -29,6 +30,7 @@ export function renderInstallKickstart(params: InstallKickstartParams): string {
|
||||
locale,
|
||||
serverIp,
|
||||
httpPort,
|
||||
syslogPort,
|
||||
sshKeys,
|
||||
adminUser,
|
||||
} = params;
|
||||
@@ -119,6 +121,8 @@ ${userDirective}
|
||||
|
||||
bootloader --append="console=tty0 console=ttyS0,115200n8"
|
||||
|
||||
logging --host=${serverIp} --port=${syslogPort} --level=info
|
||||
|
||||
url --mirrorlist=https://mirrors.fedoraproject.org/mirrorlist?repo=fedora-$releasever&arch=$basearch
|
||||
|
||||
# Partitioning is generated dynamically by %pre (supports reprovision preservation)
|
||||
@@ -344,7 +348,14 @@ ${isVanilla ? `# -- vanilla role: skip k3s kernel/sysctl/firewall setup --
|
||||
systemctl enable chronyd || true
|
||||
|
||||
# -- Serial console (for debugging — auto-login as root on ttyS0) --
|
||||
systemctl enable serial-getty@ttyS0.service || true` : `# -- Kernel modules for k3s --
|
||||
# AWS EC2 compatible: ttyS0 @ 115200n8
|
||||
systemctl enable serial-getty@ttyS0.service || true
|
||||
|
||||
# -- Forward all system logs to serial console --
|
||||
cat > /etc/rsyslog.d/serial-console.conf << 'RSYSLOG'
|
||||
*.* /dev/ttyS0
|
||||
RSYSLOG
|
||||
systemctl enable rsyslog || true` : `# -- Kernel modules for k3s --
|
||||
cat > /etc/modules-load.d/k3s.conf << 'MODULES'
|
||||
br_netfilter
|
||||
overlay
|
||||
|
||||
@@ -12,6 +12,7 @@ function baseParams(overrides: Partial<InstallKickstartParams> = {}): InstallKic
|
||||
locale: "en_GB.UTF-8",
|
||||
serverIp: "192.168.1.100",
|
||||
httpPort: 8080,
|
||||
syslogPort: 5514,
|
||||
sshKeys: [
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAITEST1 user1@host",
|
||||
"ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQTEST2 user2@host",
|
||||
@@ -181,4 +182,16 @@ describe("renderInstallKickstart", () => {
|
||||
expect(ks).toContain('"complete"');
|
||||
expect(ks).toContain("ready at");
|
||||
});
|
||||
|
||||
it("sends install logs to bastion via syslog", () => {
|
||||
const ks = renderInstallKickstart(baseParams({ syslogPort: 5514 }));
|
||||
expect(ks).toContain("logging --host=192.168.1.100 --port=5514 --level=info");
|
||||
});
|
||||
|
||||
it("forwards system logs to serial console", () => {
|
||||
const ks = renderInstallKickstart(baseParams({ role: "vanilla" }));
|
||||
expect(ks).toContain("serial-console.conf");
|
||||
expect(ks).toContain("/dev/ttyS0");
|
||||
expect(ks).toContain("rsyslog");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -100,6 +100,7 @@ export type BastionMessage =
|
||||
| { type: "bastion-heartbeat"; bastionId: string; uptime: number; machineCount: number }
|
||||
| { type: "bastion-state-sync"; bastionId: string; state: import("../types/state.js").BastionState }
|
||||
| { type: "bastion-progress"; bastionId: string; mac: string; stage: string; detail: string; timestamp: string }
|
||||
| { type: "bastion-install-log"; bastionId: string; mac: string; hostname: string; provisionerType: import("../types/state.js").ProvisionStackType; sessionId: string; lines: string[]; timestamp: string }
|
||||
| { type: "command-response"; requestId: string; status: "ok" | "error"; data?: unknown; error?: string };
|
||||
|
||||
// --- labd -> Bastion messages ---
|
||||
@@ -119,7 +120,7 @@ export type LabdBastionMessageType = LabdBastionMessage["type"];
|
||||
|
||||
const BASTION_MESSAGE_TYPES = new Set<string>([
|
||||
"bastion-enroll", "bastion-heartbeat", "bastion-state-sync",
|
||||
"bastion-progress", "command-response",
|
||||
"bastion-progress", "bastion-install-log", "command-response",
|
||||
]);
|
||||
|
||||
const LABD_BASTION_MESSAGE_TYPES = new Set<string>([
|
||||
|
||||
@@ -14,6 +14,8 @@ export interface BastionConfig {
|
||||
// Ubuntu support
|
||||
ubuntuVersion: string;
|
||||
ubuntuMirror: string;
|
||||
// Syslog listener for install logs (Anaconda logging --host)
|
||||
syslogPort: number;
|
||||
// Flags
|
||||
skipDnsmasq?: boolean | undefined;
|
||||
skipArtifacts?: boolean | undefined;
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
// State types for discovered machines, install queue, and installed machines.
|
||||
|
||||
export type ProvisionStackType = "dhcpproxy" | "iso" | "cloud-init";
|
||||
|
||||
export type OsId = "fedora-43" | "ubuntu-26.04";
|
||||
export type Arch = "x86_64" | "aarch64";
|
||||
|
||||
|
||||
Reference in New Issue
Block a user