feat: install logging, error trapping, PXE/ISO integration tests
Kickstart installs on real hardware failed silently — no error reporting,
only 3 progress callbacks, zero log streaming. This overhaul makes every
install fully observable.
Kickstart improvements:
- Error trapping in %pre and %post (trap ERR sends failure details to bastion)
- 12+ granular progress stages (was 3): SSH, hostname, k3s prep, EFI boot, metadata
- Background log streamer: tails %post output and batch-sends to /api/log
- bastion_log() function for explicit log lines from kickstart scripts
Bastion API:
- POST /api/log — receives raw log lines from kickstart (single or batch)
- InstallLogBuffer — per-MAC ring buffer (2000 lines) + file persistence
- GET /api/logs/:mac — now returns log_lines + log_total alongside stages
- SSE /api/logs/:mac/follow — uses named events (event: stage vs event: log)
- Progress events forwarded to labd via bastion-progress WebSocket message
- Post-provision k3s logs routed through progressBus (was console-only)
dnsmasq fixes found during VM testing:
- HTTP Boot filename: ipxe-real.efi → ipxe.efi (leftover from old 2-stage approach)
- pxe-service directives: only in proxy mode (breaks OVMF PXE in full mode)
- PXEClient vendor class echo for UEFI firmware compatibility
Integration tests:
- PXE boot test: blank UEFI VM → dnsmasq → HTTP Boot → iPXE → bastion → install
- ISO boot test: blank VM boots from bastion-generated ISO → same flow
- Shared helpers: pxe-network (no DHCP, nftables fix), pxe-vm (UEFI + ISO boot)
- test-provision.sh: runs both PXE + ISO tests with prerequisite checks
- 250GB sparse QCOW2 disk (LVM layout needs ~204GB)
201 unit tests passing (11 new).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-26 22:26:33 +00:00
|
|
|
// Protocol types for agent-labd WebSocket communication.
|
|
|
|
|
|
|
|
|
|
import { randomUUID } from "node:crypto";
|
|
|
|
|
|
|
|
|
|
// --- Agent -> labd messages ---
|
|
|
|
|
|
|
|
|
|
export type AgentMessage =
|
|
|
|
|
| { type: "heartbeat"; hostname: string; uptime: number; version: string; memUsage: number; cpuUsage: number }
|
|
|
|
|
| { type: "exec-stdout"; requestId: string; data: string }
|
|
|
|
|
| { type: "exec-stderr"; requestId: string; data: string }
|
|
|
|
|
| { type: "exec-exit"; requestId: string; exitCode: number }
|
|
|
|
|
| { type: "log-line"; requestId: string; line: string }
|
|
|
|
|
| { type: "log-end"; requestId: string }
|
|
|
|
|
| { type: "enrollment-request"; joinToken: string; hostname: string; csr: string }
|
|
|
|
|
| { type: "rotation-request"; currentFingerprint: string; newCsr: string };
|
|
|
|
|
|
|
|
|
|
// --- labd -> Agent messages ---
|
|
|
|
|
|
|
|
|
|
export type ServerMessage =
|
|
|
|
|
| { type: "exec"; requestId: string; command: string; args: string[]; timeout: number; tty: boolean }
|
|
|
|
|
| { type: "exec-stdin"; requestId: string; data: string }
|
|
|
|
|
| { type: "exec-signal"; requestId: string; signal: "SIGTERM" | "SIGKILL" | "SIGINT" }
|
|
|
|
|
| { type: "log-subscribe"; requestId: string; options: JournalOptions }
|
|
|
|
|
| { type: "log-unsubscribe"; requestId: string }
|
|
|
|
|
| { type: "enrollment-response"; status: "success" | "error"; certificatePem?: string; error?: string }
|
|
|
|
|
| { type: "heartbeat-ack"; serverTime: string }
|
|
|
|
|
| { type: "server-shutdown"; reconnectAfter: number };
|
|
|
|
|
|
|
|
|
|
// --- Supporting types ---
|
|
|
|
|
|
|
|
|
|
export interface JournalOptions {
|
|
|
|
|
follow?: boolean;
|
|
|
|
|
lines?: number;
|
|
|
|
|
unit?: string;
|
|
|
|
|
since?: string;
|
|
|
|
|
priority?: string;
|
|
|
|
|
kernel?: boolean;
|
|
|
|
|
file?: string;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// --- Message types for discriminated union access ---
|
|
|
|
|
|
|
|
|
|
export type AgentMessageType = AgentMessage["type"];
|
|
|
|
|
export type ServerMessageType = ServerMessage["type"];
|
|
|
|
|
|
|
|
|
|
// --- Type guards ---
|
|
|
|
|
|
|
|
|
|
const AGENT_MESSAGE_TYPES = new Set<string>([
|
|
|
|
|
"heartbeat", "exec-stdout", "exec-stderr", "exec-exit",
|
|
|
|
|
"log-line", "log-end", "enrollment-request", "rotation-request",
|
|
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
const SERVER_MESSAGE_TYPES = new Set<string>([
|
|
|
|
|
"exec", "exec-stdin", "exec-signal", "log-subscribe",
|
|
|
|
|
"log-unsubscribe", "enrollment-response", "heartbeat-ack", "server-shutdown",
|
|
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
export function isAgentMessage(msg: unknown): msg is AgentMessage {
|
|
|
|
|
return (
|
|
|
|
|
typeof msg === "object" &&
|
|
|
|
|
msg !== null &&
|
|
|
|
|
"type" in msg &&
|
|
|
|
|
typeof (msg as { type: unknown }).type === "string" &&
|
|
|
|
|
AGENT_MESSAGE_TYPES.has((msg as { type: string }).type)
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function isServerMessage(msg: unknown): msg is ServerMessage {
|
|
|
|
|
return (
|
|
|
|
|
typeof msg === "object" &&
|
|
|
|
|
msg !== null &&
|
|
|
|
|
"type" in msg &&
|
|
|
|
|
typeof (msg as { type: unknown }).type === "string" &&
|
|
|
|
|
SERVER_MESSAGE_TYPES.has((msg as { type: string }).type)
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// --- Parsing utilities ---
|
|
|
|
|
|
|
|
|
|
export function parseAgentMessage(data: string): AgentMessage {
|
|
|
|
|
const msg: unknown = JSON.parse(data);
|
|
|
|
|
if (!isAgentMessage(msg)) {
|
|
|
|
|
throw new Error(`Invalid agent message: ${(msg as { type?: string }).type ?? "unknown"}`);
|
|
|
|
|
}
|
|
|
|
|
return msg;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function parseServerMessage(data: string): ServerMessage {
|
|
|
|
|
const msg: unknown = JSON.parse(data);
|
|
|
|
|
if (!isServerMessage(msg)) {
|
|
|
|
|
throw new Error(`Invalid server message: ${(msg as { type?: string }).type ?? "unknown"}`);
|
|
|
|
|
}
|
|
|
|
|
return msg;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// --- Bastion -> labd messages ---
|
|
|
|
|
|
|
|
|
|
export type BastionMessage =
|
|
|
|
|
| { type: "bastion-enroll"; token: string; hostname: string; network: string; serverIp: string }
|
|
|
|
|
| { type: "bastion-heartbeat"; bastionId: string; uptime: number; machineCount: number }
|
|
|
|
|
| { type: "bastion-state-sync"; bastionId: string; state: import("../types/state.js").BastionState }
|
|
|
|
|
| { type: "bastion-progress"; bastionId: string; mac: string; stage: string; detail: string; timestamp: string }
|
2026-03-28 23:14:10 +00:00
|
|
|
| { type: "bastion-install-log"; bastionId: string; mac: string; hostname: string; provisionerType: import("../types/state.js").ProvisionStackType; sessionId: string; lines: string[]; timestamp: string }
|
feat: install logging, error trapping, PXE/ISO integration tests
Kickstart installs on real hardware failed silently — no error reporting,
only 3 progress callbacks, zero log streaming. This overhaul makes every
install fully observable.
Kickstart improvements:
- Error trapping in %pre and %post (trap ERR sends failure details to bastion)
- 12+ granular progress stages (was 3): SSH, hostname, k3s prep, EFI boot, metadata
- Background log streamer: tails %post output and batch-sends to /api/log
- bastion_log() function for explicit log lines from kickstart scripts
Bastion API:
- POST /api/log — receives raw log lines from kickstart (single or batch)
- InstallLogBuffer — per-MAC ring buffer (2000 lines) + file persistence
- GET /api/logs/:mac — now returns log_lines + log_total alongside stages
- SSE /api/logs/:mac/follow — uses named events (event: stage vs event: log)
- Progress events forwarded to labd via bastion-progress WebSocket message
- Post-provision k3s logs routed through progressBus (was console-only)
dnsmasq fixes found during VM testing:
- HTTP Boot filename: ipxe-real.efi → ipxe.efi (leftover from old 2-stage approach)
- pxe-service directives: only in proxy mode (breaks OVMF PXE in full mode)
- PXEClient vendor class echo for UEFI firmware compatibility
Integration tests:
- PXE boot test: blank UEFI VM → dnsmasq → HTTP Boot → iPXE → bastion → install
- ISO boot test: blank VM boots from bastion-generated ISO → same flow
- Shared helpers: pxe-network (no DHCP, nftables fix), pxe-vm (UEFI + ISO boot)
- test-provision.sh: runs both PXE + ISO tests with prerequisite checks
- 250GB sparse QCOW2 disk (LVM layout needs ~204GB)
201 unit tests passing (11 new).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-26 22:26:33 +00:00
|
|
|
| { type: "command-response"; requestId: string; status: "ok" | "error"; data?: unknown; error?: string };
|
|
|
|
|
|
|
|
|
|
// --- labd -> Bastion messages ---
|
|
|
|
|
|
|
|
|
|
export type LabdBastionMessage =
|
|
|
|
|
| { type: "bastion-enrolled"; bastionId: string }
|
|
|
|
|
| { type: "bastion-heartbeat-ack"; serverTime: string }
|
|
|
|
|
| { type: "command-install"; requestId: string; mac: string; hostname: string; disk?: string; role: string; os: string }
|
|
|
|
|
| { type: "command-forget"; requestId: string; mac: string }
|
|
|
|
|
| { type: "command-role-update"; requestId: string; mac: string; role: string }
|
2026-03-30 03:58:51 +01:00
|
|
|
| { type: "command-debug"; requestId: string; mac: string; pxeBoot?: boolean }
|
2026-03-31 01:15:31 +01:00
|
|
|
| { type: "command-register"; requestId: string; mac: string; hostname: string; role: string; ip: string }
|
2026-04-01 17:59:39 +01:00
|
|
|
| { type: "command-discover"; requestId: string; mac: string; product?: string; board?: string; serial?: string; manufacturer?: string; cpu_model?: string; cpu_cores?: number; memory_gb?: number; arch?: string; disks?: Array<{ name: string; size_gb: number; model: string }>; nics?: Array<{ name: string; mac: string; state: string }> }
|
feat: install logging, error trapping, PXE/ISO integration tests
Kickstart installs on real hardware failed silently — no error reporting,
only 3 progress callbacks, zero log streaming. This overhaul makes every
install fully observable.
Kickstart improvements:
- Error trapping in %pre and %post (trap ERR sends failure details to bastion)
- 12+ granular progress stages (was 3): SSH, hostname, k3s prep, EFI boot, metadata
- Background log streamer: tails %post output and batch-sends to /api/log
- bastion_log() function for explicit log lines from kickstart scripts
Bastion API:
- POST /api/log — receives raw log lines from kickstart (single or batch)
- InstallLogBuffer — per-MAC ring buffer (2000 lines) + file persistence
- GET /api/logs/:mac — now returns log_lines + log_total alongside stages
- SSE /api/logs/:mac/follow — uses named events (event: stage vs event: log)
- Progress events forwarded to labd via bastion-progress WebSocket message
- Post-provision k3s logs routed through progressBus (was console-only)
dnsmasq fixes found during VM testing:
- HTTP Boot filename: ipxe-real.efi → ipxe.efi (leftover from old 2-stage approach)
- pxe-service directives: only in proxy mode (breaks OVMF PXE in full mode)
- PXEClient vendor class echo for UEFI firmware compatibility
Integration tests:
- PXE boot test: blank UEFI VM → dnsmasq → HTTP Boot → iPXE → bastion → install
- ISO boot test: blank VM boots from bastion-generated ISO → same flow
- Shared helpers: pxe-network (no DHCP, nftables fix), pxe-vm (UEFI + ISO boot)
- test-provision.sh: runs both PXE + ISO tests with prerequisite checks
- 250GB sparse QCOW2 disk (LVM layout needs ~204GB)
201 unit tests passing (11 new).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-26 22:26:33 +00:00
|
|
|
| { type: "server-shutdown"; reconnectAfter: number };
|
|
|
|
|
|
|
|
|
|
export type BastionMessageType = BastionMessage["type"];
|
|
|
|
|
export type LabdBastionMessageType = LabdBastionMessage["type"];
|
|
|
|
|
|
|
|
|
|
// --- Bastion type guards ---
|
|
|
|
|
|
|
|
|
|
const BASTION_MESSAGE_TYPES = new Set<string>([
|
|
|
|
|
"bastion-enroll", "bastion-heartbeat", "bastion-state-sync",
|
2026-03-28 23:14:10 +00:00
|
|
|
"bastion-progress", "bastion-install-log", "command-response",
|
feat: install logging, error trapping, PXE/ISO integration tests
Kickstart installs on real hardware failed silently — no error reporting,
only 3 progress callbacks, zero log streaming. This overhaul makes every
install fully observable.
Kickstart improvements:
- Error trapping in %pre and %post (trap ERR sends failure details to bastion)
- 12+ granular progress stages (was 3): SSH, hostname, k3s prep, EFI boot, metadata
- Background log streamer: tails %post output and batch-sends to /api/log
- bastion_log() function for explicit log lines from kickstart scripts
Bastion API:
- POST /api/log — receives raw log lines from kickstart (single or batch)
- InstallLogBuffer — per-MAC ring buffer (2000 lines) + file persistence
- GET /api/logs/:mac — now returns log_lines + log_total alongside stages
- SSE /api/logs/:mac/follow — uses named events (event: stage vs event: log)
- Progress events forwarded to labd via bastion-progress WebSocket message
- Post-provision k3s logs routed through progressBus (was console-only)
dnsmasq fixes found during VM testing:
- HTTP Boot filename: ipxe-real.efi → ipxe.efi (leftover from old 2-stage approach)
- pxe-service directives: only in proxy mode (breaks OVMF PXE in full mode)
- PXEClient vendor class echo for UEFI firmware compatibility
Integration tests:
- PXE boot test: blank UEFI VM → dnsmasq → HTTP Boot → iPXE → bastion → install
- ISO boot test: blank VM boots from bastion-generated ISO → same flow
- Shared helpers: pxe-network (no DHCP, nftables fix), pxe-vm (UEFI + ISO boot)
- test-provision.sh: runs both PXE + ISO tests with prerequisite checks
- 250GB sparse QCOW2 disk (LVM layout needs ~204GB)
201 unit tests passing (11 new).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-26 22:26:33 +00:00
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
const LABD_BASTION_MESSAGE_TYPES = new Set<string>([
|
|
|
|
|
"bastion-enrolled", "bastion-heartbeat-ack", "command-install",
|
2026-04-01 17:59:39 +01:00
|
|
|
"command-forget", "command-role-update", "command-debug", "command-register", "command-discover", "server-shutdown",
|
feat: install logging, error trapping, PXE/ISO integration tests
Kickstart installs on real hardware failed silently — no error reporting,
only 3 progress callbacks, zero log streaming. This overhaul makes every
install fully observable.
Kickstart improvements:
- Error trapping in %pre and %post (trap ERR sends failure details to bastion)
- 12+ granular progress stages (was 3): SSH, hostname, k3s prep, EFI boot, metadata
- Background log streamer: tails %post output and batch-sends to /api/log
- bastion_log() function for explicit log lines from kickstart scripts
Bastion API:
- POST /api/log — receives raw log lines from kickstart (single or batch)
- InstallLogBuffer — per-MAC ring buffer (2000 lines) + file persistence
- GET /api/logs/:mac — now returns log_lines + log_total alongside stages
- SSE /api/logs/:mac/follow — uses named events (event: stage vs event: log)
- Progress events forwarded to labd via bastion-progress WebSocket message
- Post-provision k3s logs routed through progressBus (was console-only)
dnsmasq fixes found during VM testing:
- HTTP Boot filename: ipxe-real.efi → ipxe.efi (leftover from old 2-stage approach)
- pxe-service directives: only in proxy mode (breaks OVMF PXE in full mode)
- PXEClient vendor class echo for UEFI firmware compatibility
Integration tests:
- PXE boot test: blank UEFI VM → dnsmasq → HTTP Boot → iPXE → bastion → install
- ISO boot test: blank VM boots from bastion-generated ISO → same flow
- Shared helpers: pxe-network (no DHCP, nftables fix), pxe-vm (UEFI + ISO boot)
- test-provision.sh: runs both PXE + ISO tests with prerequisite checks
- 250GB sparse QCOW2 disk (LVM layout needs ~204GB)
201 unit tests passing (11 new).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-26 22:26:33 +00:00
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
export function isBastionMessage(msg: unknown): msg is BastionMessage {
|
|
|
|
|
return (
|
|
|
|
|
typeof msg === "object" &&
|
|
|
|
|
msg !== null &&
|
|
|
|
|
"type" in msg &&
|
|
|
|
|
typeof (msg as { type: unknown }).type === "string" &&
|
|
|
|
|
BASTION_MESSAGE_TYPES.has((msg as { type: string }).type)
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function isLabdBastionMessage(msg: unknown): msg is LabdBastionMessage {
|
|
|
|
|
return (
|
|
|
|
|
typeof msg === "object" &&
|
|
|
|
|
msg !== null &&
|
|
|
|
|
"type" in msg &&
|
|
|
|
|
typeof (msg as { type: unknown }).type === "string" &&
|
|
|
|
|
LABD_BASTION_MESSAGE_TYPES.has((msg as { type: string }).type)
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function parseBastionMessage(data: string): BastionMessage {
|
|
|
|
|
const msg: unknown = JSON.parse(data);
|
|
|
|
|
if (!isBastionMessage(msg)) {
|
|
|
|
|
throw new Error(`Invalid bastion message: ${(msg as { type?: string }).type ?? "unknown"}`);
|
|
|
|
|
}
|
|
|
|
|
return msg;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function parseLabdBastionMessage(data: string): LabdBastionMessage {
|
|
|
|
|
const msg: unknown = JSON.parse(data);
|
|
|
|
|
if (!isLabdBastionMessage(msg)) {
|
|
|
|
|
throw new Error(`Invalid labd-bastion message: ${(msg as { type?: string }).type ?? "unknown"}`);
|
|
|
|
|
}
|
|
|
|
|
return msg;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// --- Request ID utility ---
|
|
|
|
|
|
|
|
|
|
export function generateRequestId(): string {
|
|
|
|
|
return randomUUID();
|
|
|
|
|
}
|