feat: install logging, error trapping, PXE/ISO integration tests
Some checks failed
CI/CD / lint (pull_request) Failing after 13s
CI/CD / test (pull_request) Failing after 10s
CI/CD / typecheck (pull_request) Failing after 36s
CI/CD / build (pull_request) Has been skipped
CI/CD / publish-rpm (pull_request) Has been skipped
CI/CD / publish-deb (pull_request) Has been skipped

Kickstart installs on real hardware failed silently — no error reporting,
only 3 progress callbacks, zero log streaming. This overhaul makes every
install fully observable.

Kickstart improvements:
- Error trapping in %pre and %post (trap ERR sends failure details to bastion)
- 12+ granular progress stages (was 3): SSH, hostname, k3s prep, EFI boot, metadata
- Background log streamer: tails %post output and batch-sends to /api/log
- bastion_log() function for explicit log lines from kickstart scripts

Bastion API:
- POST /api/log — receives raw log lines from kickstart (single or batch)
- InstallLogBuffer — per-MAC ring buffer (2000 lines) + file persistence
- GET /api/logs/:mac — now returns log_lines + log_total alongside stages
- SSE /api/logs/:mac/follow — uses named events (event: stage vs event: log)
- Progress events forwarded to labd via bastion-progress WebSocket message
- Post-provision k3s logs routed through progressBus (was console-only)

dnsmasq fixes found during VM testing:
- HTTP Boot filename: ipxe-real.efi → ipxe.efi (leftover from old 2-stage approach)
- pxe-service directives: only in proxy mode (breaks OVMF PXE in full mode)
- PXEClient vendor class echo for UEFI firmware compatibility

Integration tests:
- PXE boot test: blank UEFI VM → dnsmasq → HTTP Boot → iPXE → bastion → install
- ISO boot test: blank VM boots from bastion-generated ISO → same flow
- Shared helpers: pxe-network (no DHCP, nftables fix), pxe-vm (UEFI + ISO boot)
- test-provision.sh: runs both PXE + ISO tests with prerequisite checks
- 250GB sparse QCOW2 disk (LVM layout needs ~204GB)

201 unit tests passing (11 new).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Michal
2026-03-26 22:26:33 +00:00
parent ffc4a782d2
commit 46b017d77e
189 changed files with 16241 additions and 432 deletions

View File

@@ -9,6 +9,10 @@
".": {
"import": "./dist/main.js",
"types": "./dist/main.d.ts"
},
"./iso-builder": {
"import": "./dist/services/iso-builder.js",
"types": "./dist/services/iso-builder.d.ts"
}
},
"scripts": {
@@ -20,12 +24,15 @@
},
"dependencies": {
"@fastify/static": "^8.0.0",
"@lab/modules": "workspace:*",
"@lab/shared": "workspace:*",
"execa": "^9.5.0",
"fastify": "^5.0.0",
"winston": "^3.17.0"
"winston": "^3.17.0",
"ws": "^8.19.0"
},
"devDependencies": {
"@types/node": "^22.10.0"
"@types/node": "^22.10.0",
"@types/ws": "^8.18.0"
}
}

View File

@@ -14,6 +14,10 @@ export function loadConfig(overrides: Partial<BastionConfig> = {}): BastionConfi
const dhcpRangeStart = overrides.dhcpRangeStart ?? process.env["DHCP_RANGE_START"] ?? "";
const dhcpRangeEnd = overrides.dhcpRangeEnd ?? process.env["DHCP_RANGE_END"] ?? "";
const ubuntuVersion = overrides.ubuntuVersion ?? process.env["UBUNTU_VERSION"] ?? "26.04";
const ubuntuMirror = overrides.ubuntuMirror ?? process.env["UBUNTU_MIRROR"]
?? `https://releases.ubuntu.com/${ubuntuVersion}`;
const fedoraMirror = `https://download.fedoraproject.org/pub/fedora/linux/releases/${fedoraVersion}/Everything/${arch}/os`;
const tftpDir = `${bastionDir}/tftp`;
const httpDir = `${bastionDir}/http`;
@@ -30,6 +34,8 @@ export function loadConfig(overrides: Partial<BastionConfig> = {}): BastionConfi
dhcpMode,
dhcpRangeStart,
dhcpRangeEnd,
ubuntuVersion,
ubuntuMirror,
// These are populated at runtime by the network service
iface: overrides.iface ?? "",
serverIp: overrides.serverIp ?? "",
@@ -39,6 +45,8 @@ export function loadConfig(overrides: Partial<BastionConfig> = {}): BastionConfi
adminUser: overrides.adminUser ?? "",
skipDnsmasq: overrides.skipDnsmasq,
skipArtifacts: overrides.skipArtifacts,
labdUrl: overrides.labdUrl ?? process.env["LABD_URL"],
bastionJoinToken: overrides.bastionJoinToken ?? process.env["BASTION_JOIN_TOKEN"],
fedoraMirror,
tftpDir,
httpDir,

View File

@@ -11,6 +11,9 @@ import { startDnsmasq, stopDnsmasq, generateDnsmasqConf } from "./services/dnsma
import { generateDiscoverKickstart } from "./services/kickstart-generator.js";
import { renderBootIpxe } from "./templates/boot.ipxe.js";
import { logger } from "./services/logger.js";
import { BastionConnection } from "./services/labd-connection.js";
import { progressBus } from "./services/progress-events.js";
import { ensureBootIso } from "./routes/boot-iso.js";
function copyIfMissing(src: string, dest: string, label: string): void {
if (existsSync(dest)) {
@@ -91,11 +94,9 @@ export async function startBastion(overrides: Partial<BastionConfig> = {}): Prom
let config = loadConfig(overrides);
config = populateNetworkConfig(config);
// PID file management: kill old instance if running
// Bastion needs root for dnsmasq (DHCP port 67)
if (!config.skipDnsmasq && process.getuid?.() !== 0) {
logger.error("Must run as root (dnsmasq needs DHCP/TFTP ports). Use: sudo labctl init bastion standalone start");
process.exit(1);
throw new Error("Must run as root (dnsmasq needs DHCP/TFTP ports). Use: sudo labctl init bastion standalone start");
}
mkdirSync(config.bastionDir, { recursive: true, mode: 0o755 });
@@ -164,6 +165,23 @@ export async function startBastion(overrides: Partial<BastionConfig> = {}): Prom
"Fedora initrd",
);
// Ubuntu netboot artifacts (non-fatal — Ubuntu version may not be released yet)
try {
logger.info(`Preparing Ubuntu ${config.ubuntuVersion} netboot artifacts...`);
download(
`${config.ubuntuMirror}/casper/vmlinuz`,
`${config.httpDir}/ubuntu-vmlinuz`,
"Ubuntu kernel",
);
download(
`${config.ubuntuMirror}/casper/initrd`,
`${config.httpDir}/ubuntu-initrd`,
"Ubuntu initrd",
);
} catch {
logger.warn(`Ubuntu ${config.ubuntuVersion} artifacts not available -- Ubuntu provisioning disabled`);
}
// Symlink iPXE binaries into HTTP dir for UEFI HTTP Boot
for (const name of ["ipxe.efi", "ipxe-arm64.efi"]) {
const src = `${config.tftpDir}/${name}`;
@@ -172,6 +190,13 @@ export async function startBastion(overrides: Partial<BastionConfig> = {}): Prom
symlinkSafe(src, dest);
}
}
// Generate boot ISO (served as static file for Range request support)
try {
ensureBootIso(config);
} catch (err) {
logger.warn(`Boot ISO generation failed: ${err instanceof Error ? err.message : String(err)}`);
}
} else {
logger.info("Skipping boot artifacts (--skip-artifacts)");
}
@@ -196,7 +221,7 @@ export async function startBastion(overrides: Partial<BastionConfig> = {}): Prom
}
// Start HTTP server
const { app } = createApp(config);
const { app, state } = createApp(config);
await app.listen({ port: config.httpPort, host: "0.0.0.0" });
logger.info(`HTTP server listening on :${config.httpPort}`);
@@ -220,12 +245,72 @@ export async function startBastion(overrides: Partial<BastionConfig> = {}): Prom
logger.info("Skipping dnsmasq (--skip-dnsmasq)");
}
// Connect to labd if configured (otherwise run standalone)
let labdConn: BastionConnection | null = null;
if (config.labdUrl) {
labdConn = new BastionConnection(config, () => state.load());
// Wire up command handlers so labd can send install/forget/role commands
labdConn.onCommand("command-install", async (msg) => {
if (msg.type !== "command-install") throw new Error("unexpected");
state.update((s) => {
s.install_queue[msg.mac] = {
hostname: msg.hostname,
disk: msg.disk ?? "/dev/sda",
role: msg.role as import("@lab/shared").Role,
os: msg.os as import("@lab/shared").OsId,
queued_at: new Date().toISOString(),
};
});
return { status: "ok", data: { mac: msg.mac, hostname: msg.hostname } };
});
labdConn.onCommand("command-forget", async (msg) => {
if (msg.type !== "command-forget") throw new Error("unexpected");
const mac = msg.mac.toLowerCase();
state.update((s) => {
delete s.discovered[mac];
delete s.install_queue[mac];
delete s.installed[mac];
});
return { status: "ok", data: { mac } };
});
labdConn.onCommand("command-role-update", async (msg) => {
if (msg.type !== "command-role-update") throw new Error("unexpected");
const mac = msg.mac.toLowerCase();
const current = state.load();
if (!current.installed[mac]) {
return { status: "error", error: `MAC ${mac} not found in installed machines` };
}
state.update((s) => {
const inst = s.installed[mac];
if (inst) inst.role = msg.role;
});
return { status: "ok", data: { mac, role: msg.role } };
});
// Push state to labd on every local state change
state.onChange(() => labdConn?.syncState());
// Forward progress events (stages only, not raw log lines) to labd
progressBus.on((event) => {
if (event.stage !== "log") {
labdConn?.sendProgress(event.mac, event.stage, event.detail);
}
});
labdConn.connect();
logger.info(`Registering with labd at ${config.labdUrl}`);
}
// Print banner
printBanner(config);
// Graceful shutdown
const shutdown = async (): Promise<void> => {
logger.info("Shutting down...");
if (labdConn) labdConn.close();
if (config.skipDnsmasq !== true) stopDnsmasq();
closeFirewall(config);
await app.close();

View File

@@ -5,13 +5,19 @@
// /api/discover - receive hardware discovery reports from PXE-booted machines
import type { FastifyInstance } from "fastify";
import type { HardwareInfo, InstalledInfo } from "@lab/shared";
import type { HardwareInfo, InstalledInfo, Role } from "@lab/shared";
import { isValidOsId, SUPPORTED_ROLES } from "@lab/shared";
import type { StateManager } from "../services/state.js";
import { logger } from "../services/logger.js";
import { triggerPostProvisionK3s } from "../services/post-provision.js";
import { progressBus } from "../services/progress-events.js";
import type { ProgressEvent } from "../services/progress-events.js";
import type { InstallLogBuffer } from "../services/install-log.js";
export function registerApiRoutes(
app: FastifyInstance,
state: StateManager,
installLog: InstallLogBuffer,
): void {
// List all machines
app.get("/api/machines", async (_request, reply) => {
@@ -25,9 +31,10 @@ export function registerApiRoutes(
hostname?: string;
disk?: string;
role?: string;
os?: string;
};
}>("/api/install", async (request, reply) => {
const { mac: rawMac, hostname, disk, role } = request.body ?? {};
const { mac: rawMac, hostname, disk, role, os } = request.body ?? {};
const mac = (rawMac ?? "").toLowerCase().replace(/-/g, ":");
if (mac === "") {
@@ -35,27 +42,34 @@ export function registerApiRoutes(
}
const validRole = role ?? "worker";
if (validRole !== "worker" && validRole !== "infra") {
return reply.status(400).send({ error: "role must be 'worker' or 'infra'" });
if (!(SUPPORTED_ROLES as readonly string[]).includes(validRole)) {
return reply.status(400).send({ error: `invalid role: '${validRole}'. Supported: ${SUPPORTED_ROLES.join(", ")}` });
}
const osId = os ?? "fedora-43";
if (!isValidOsId(osId)) {
return reply.status(400).send({ error: `invalid os: '${osId}'. Supported: fedora-43, ubuntu-26.04` });
}
state.update((s) => {
s.install_queue[mac] = {
hostname: hostname ?? "lab-node",
disk: disk ?? "",
role: validRole as "worker" | "infra",
role: validRole as Role,
os: osId,
queued_at: new Date().toISOString(),
};
});
logger.info(`INSTALL QUEUED: ${mac} -> hostname=${hostname ?? "lab-node"} role=${validRole}`);
logger.info(`INSTALL QUEUED: ${mac} -> hostname=${hostname ?? "lab-node"} role=${validRole} os=${osId}`);
return reply.send({
status: "queued",
mac,
hostname: hostname ?? "lab-node",
role: validRole,
message: `PXE boot the machine to start installation (role=${validRole})`,
os: osId,
message: `PXE boot the machine to start installation (role=${validRole}, os=${osId})`,
});
});
@@ -85,6 +99,13 @@ export function registerApiRoutes(
const color = stageName === "complete" ? GREEN : stageName === "error" ? RED : YELLOW;
console.log(` ${color}${icon}${RESET} ${mac} ${BOLD}${stageName}${RESET}${detailStr ? ` -- ${detailStr}` : ""}`);
// Emit progress event for SSE clients
const hostname = state.load().install_queue[mac]?.hostname ?? mac;
progressBus.emit({
mac, hostname, stage: stageName, detail: detailStr,
timestamp: new Date().toISOString(),
});
state.update((s) => {
const queueEntry = s.install_queue[mac];
if (queueEntry) {
@@ -94,6 +115,14 @@ export function registerApiRoutes(
queueEntry.progress_detail = detailStr;
}
// Append to progress log history
if (!queueEntry.log) queueEntry.log = [];
queueEntry.log.push({
stage: stageName,
detail: detailStr,
timestamp: new Date().toISOString(),
});
// Move to installed on completion
if (stageName === "complete") {
const cfg = s.install_queue[mac];
@@ -106,14 +135,19 @@ export function registerApiRoutes(
const installedInfo: InstalledInfo = {
hostname: cfg?.hostname ?? "?",
role: cfg?.role ?? "?",
...(cfg?.os !== undefined ? { os: cfg.os } : {}),
ip,
installed_at: new Date().toISOString(),
};
s.installed[mac] = installedInfo;
const installedRole = state.load().installed[mac]?.role;
const admin = installedRole !== undefined && installedRole !== "" ? "michal" : "root";
const admin = installedInfo.role !== "vanilla" && installedInfo.role !== "" ? "michal" : "root";
console.log(`\n \x1b[0;32m\x1b[1m ssh ${admin}@${ip}\x1b[0m\n`); // eslint-disable-line no-console
// Auto-install k3s for non-vanilla roles
if (installedInfo.role !== "vanilla" && ip !== "") {
void triggerPostProvisionK3s(installedInfo.hostname, ip, installedInfo.role, admin, mac);
}
}
}
});
@@ -121,6 +155,40 @@ export function registerApiRoutes(
return reply.send({ status: "ok" });
});
// Receive raw log lines from kickstart scripts
app.post<{
Body: {
mac?: string;
line?: string;
lines?: string[];
tail?: string;
};
}>("/api/log", async (request, reply) => {
const { mac: rawMac, line, lines: rawLines, tail } = request.body ?? {};
const mac = (rawMac ?? "unknown").toLowerCase();
// Collect all lines from the various input formats
const allLines: string[] = [];
if (line) allLines.push(line);
if (rawLines) allLines.push(...rawLines);
if (tail) {
// tail is a string with escaped \n — split it into lines
allLines.push(...tail.split("\\n").filter(Boolean));
}
if (allLines.length === 0) {
return reply.send({ status: "ok", lines: 0 });
}
// Look up hostname from install queue for enriching events
const hostname = state.load().install_queue[mac]?.hostname ?? mac;
// Append to the install log buffer (this also emits to progressBus)
installLog.append(mac, allLines, hostname);
return reply.send({ status: "ok", lines: allLines.length });
});
// Delete a machine from all state
app.delete<{
Params: { mac: string };
@@ -209,4 +277,125 @@ export function registerApiRoutes(
return reply.send({ status: "ok", mac, new: isNew });
});
// Update a machine's role (e.g. promote infra -> labcontroller)
app.post<{
Body: {
mac?: string;
role?: string;
};
}>("/api/role", async (request, reply) => {
const { mac: rawMac, role } = request.body ?? {};
const mac = (rawMac ?? "").toLowerCase().replace(/-/g, ":");
if (mac === "") {
return reply.status(400).send({ error: "mac is required" });
}
if (!role) {
return reply.status(400).send({ error: "role is required" });
}
let found = false;
state.update((s) => {
if (s.installed[mac]) {
const oldRole = s.installed[mac].role;
s.installed[mac].role = role;
found = true;
logger.info(`ROLE UPDATED: ${mac} (${s.installed[mac].hostname}) ${oldRole} -> ${role}`);
}
});
if (!found) {
return reply.status(404).send({ error: "machine not found in installed state", mac });
}
return reply.send({ status: "updated", mac, role });
});
// Get provision logs for a machine (current state snapshot + raw log lines)
app.get<{
Params: { mac: string };
Querystring: { lines?: string; offset?: string };
}>("/api/logs/:mac", async (request, reply) => {
const mac = request.params.mac.toLowerCase().replace(/-/g, ":");
const logLimit = parseInt(request.query.lines ?? "200", 10);
const logOffset = parseInt(request.query.offset ?? "0", 10);
const currentState = state.load();
const queueEntry = currentState.install_queue[mac];
const installedEntry = currentState.installed[mac];
if (queueEntry) {
return reply.send({
mac,
hostname: queueEntry.hostname,
status: "installing",
progress: queueEntry.progress ?? "queued",
progress_detail: queueEntry.progress_detail ?? "",
progress_at: queueEntry.progress_at ?? queueEntry.queued_at,
role: queueEntry.role,
os: queueEntry.os,
stages: queueEntry.log ?? [],
log_lines: installLog.getLines(mac, logOffset, logLimit),
log_total: installLog.lineCount(mac),
});
}
if (installedEntry) {
return reply.send({
mac,
hostname: installedEntry.hostname,
status: "installed",
progress: "complete",
progress_detail: `ready at ${installedEntry.ip}`,
progress_at: installedEntry.installed_at,
role: installedEntry.role,
ip: installedEntry.ip,
log_lines: installLog.getLines(mac, logOffset, logLimit),
log_total: installLog.lineCount(mac),
});
}
return reply.status(404).send({ error: "machine not found", mac });
});
// SSE stream: follow provision progress for a machine (or all machines)
app.get<{
Params: { mac: string };
}>("/api/logs/:mac/follow", async (request, reply) => {
const filterMac = request.params.mac === "all"
? null
: request.params.mac.toLowerCase().replace(/-/g, ":");
void reply.raw.writeHead(200, {
"Content-Type": "text/event-stream",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
});
// Send current state as first event
const currentState = state.load();
const queueEntry = filterMac ? currentState.install_queue[filterMac] : undefined;
if (queueEntry) {
const initData = JSON.stringify({
mac: filterMac, hostname: queueEntry.hostname,
stage: queueEntry.progress ?? "queued",
detail: queueEntry.progress_detail ?? "",
timestamp: queueEntry.progress_at ?? queueEntry.queued_at,
});
reply.raw.write(`data: ${initData}\n\n`);
}
const onProgress = (event: ProgressEvent): void => {
if (filterMac && event.mac !== filterMac) return;
// Use SSE event types so clients can filter: "stage" for progress, "log" for raw lines
const eventType = event.stage === "log" ? "log" : "stage";
reply.raw.write(`event: ${eventType}\ndata: ${JSON.stringify(event)}\n\n`);
};
progressBus.on(onProgress);
request.raw.on("close", () => {
progressBus.off(onProgress);
});
});
}

View File

@@ -0,0 +1,249 @@
// Boot ISO generation.
// Generates a UEFI-bootable iPXE ISO using xorriso+mtools.
// The ISO is placed in httpDir so @fastify/static serves it with Range request
// support (required by JetKVM, which streams via HTTP Range + NBD).
//
// The ISO embeds kernel + initrd so machines without UEFI NIC support
// (no SNP protocol) can still boot. iPXE loads them from file:/ and the
// Linux kernel handles networking with its own drivers.
import { createHash } from "node:crypto";
import { execSync } from "node:child_process";
import { existsSync, readFileSync, statSync, writeFileSync, mkdirSync, rmSync, unlinkSync } from "node:fs";
import { join } from "node:path";
import { tmpdir } from "node:os";
import type { BastionConfig } from "@lab/shared";
import { logger } from "../services/logger.js";
// iPXE SNP variant (scans all UEFI SNP handles, works from CD-ROM/USB boot).
const IPXE_ISO_PATHS: Record<string, { src: string[]; efiName: string }> = {
x86_64: {
src: [
"/usr/share/ipxe/ipxe-snp-x86_64.efi",
"/usr/share/ipxe/ipxe-x86_64.efi",
],
efiName: "BOOTX64.EFI",
},
aarch64: {
src: [
"/usr/share/ipxe/arm64-efi/ipxe-snp.efi",
"/usr/share/ipxe/arm64-efi/ipxe.efi",
],
efiName: "BOOTAA64.EFI",
},
};
// Fedora PXE kernel/initrd paths per architecture
const FEDORA_MIRROR_BASE = "https://download.fedoraproject.org/pub/fedora/linux/releases";
interface BootPayload {
arch: string;
vmlinuz: string;
initrd: string;
}
function downloadIfMissing(url: string, dest: string, label: string): void {
if (existsSync(dest)) {
logger.info(` ${label} -- cached`);
return;
}
logger.info(` ${label} -- downloading...`);
execSync(`curl -# -L -f -o "${dest}" "${url}"`, { stdio: "inherit" });
}
function generateIso(config: BastionConfig, outputPath: string): void {
const work = join(tmpdir(), `bastion-iso-${process.pid}`);
mkdirSync(join(work, "EFI", "BOOT"), { recursive: true });
const bastionUrl = `http://${config.serverIp}:${config.httpPort}`;
// Copy available iPXE EFI binaries
const archs: string[] = [];
for (const [arch, paths] of Object.entries(IPXE_ISO_PATHS)) {
const srcFile = paths.src.find((s) => existsSync(s));
if (srcFile) {
execSync(`cp "${srcFile}" "${join(work, "EFI", "BOOT", paths.efiName)}"`, { stdio: "pipe" });
archs.push(arch);
logger.info(` iPXE ISO ${arch}: ${srcFile}`);
}
}
if (archs.length === 0) throw new Error("No iPXE EFI binaries found");
// Download and stage kernel/initrd for each architecture.
// These are embedded in the ISO so machines without UEFI NIC support
// can boot the Linux installer (which has its own NIC drivers).
const cacheDir = join(config.bastionDir, "iso-cache");
mkdirSync(cacheDir, { recursive: true });
const payloads: BootPayload[] = [];
for (const arch of ["x86_64", "aarch64"]) {
const mirror = `${FEDORA_MIRROR_BASE}/${config.fedoraVersion}/Everything/${arch}/os`;
const vmlinuzCache = join(cacheDir, `vmlinuz-${arch}`);
const initrdCache = join(cacheDir, `initrd-${arch}`);
try {
downloadIfMissing(
`${mirror}/images/pxeboot/vmlinuz`,
vmlinuzCache,
`Fedora ${arch} kernel`,
);
downloadIfMissing(
`${mirror}/images/pxeboot/initrd.img`,
initrdCache,
`Fedora ${arch} initrd`,
);
payloads.push({ arch, vmlinuz: vmlinuzCache, initrd: initrdCache });
} catch {
logger.warn(` Fedora ${arch} kernel/initrd not available -- skipping`);
}
}
// Write iPXE autoexec script.
// Strategy: try DHCP (for machines with UEFI NIC support), then fall back
// to booting the embedded kernel/initrd from the ISO filesystem.
// iPXE's ${buildarch} resolves to "x86_64" or "arm64".
const ipxeScript = [
"#!ipxe",
"",
"echo",
"echo =============================================",
"echo Lab PXE Bastion -- ISO Boot",
"echo =============================================",
"echo",
"",
"# Try DHCP (works if UEFI has NIC driver / SNP support)",
"set attempts:int32 0",
":retry",
"dhcp && goto netboot ||",
"inc attempts",
"iseq ${attempts} 3 || goto retry_wait",
"goto localboot",
":retry_wait",
"echo DHCP failed (attempt ${attempts}/3), retrying...",
"sleep 2",
"goto retry",
"",
"# Network available -- chain to bastion for dynamic dispatch",
":netboot",
"echo Network OK. Chaining to bastion...",
`chain ${bastionUrl}/boot.ipxe || shell`,
"",
"# No network -- boot embedded kernel (Linux has its own NIC drivers)",
":localboot",
"echo No UEFI network support. Booting embedded installer...",
"echo Linux will configure networking with its own drivers.",
"echo",
"# Map iPXE arch names to Fedora mirror paths (arm64 -> aarch64)",
"set fedarch ${buildarch}",
"iseq ${buildarch} arm64 && set fedarch aarch64 ||",
`kernel file:/vmlinuz-\${buildarch} inst.ks=${bastionUrl}/discover.ks inst.repo=${FEDORA_MIRROR_BASE}/${config.fedoraVersion}/Everything/\${fedarch}/os inst.text || goto no_kernel`,
`initrd file:/initrd-\${buildarch} || goto no_kernel`,
"boot || shell",
"",
":no_kernel",
"echo ERROR: kernel not found for this architecture. Dropping to shell.",
"shell",
].join("\n");
writeFileSync(join(work, "autoexec.ipxe"), ipxeScript);
// Calculate EFI partition size: iPXE binaries + autoexec + kernel/initrd + margin
let payloadSize = 2 * 1024 * 1024; // 2MB base for iPXE + autoexec + FAT overhead
for (const p of payloads) {
payloadSize += statSync(p.vmlinuz).size;
payloadSize += statSync(p.initrd).size;
}
const efiSizeMB = Math.ceil(payloadSize / (1024 * 1024)) + 4; // +4MB margin
logger.info(` EFI partition: ${efiSizeMB}MB (${payloads.length} arch payloads)`);
// Create FAT EFI system partition
const efiImg = join(work, "efi.img");
execSync(`dd if=/dev/zero of="${efiImg}" bs=1M count=${efiSizeMB} 2>/dev/null`, { stdio: "pipe" });
execSync(`mformat -i "${efiImg}" -v LABBOOT ::`, { stdio: "pipe" });
execSync(`mmd -i "${efiImg}" ::/EFI`, { stdio: "pipe" });
execSync(`mmd -i "${efiImg}" ::/EFI/BOOT`, { stdio: "pipe" });
for (const arch of archs) {
const paths = IPXE_ISO_PATHS[arch]!;
execSync(`mcopy -i "${efiImg}" "${join(work, "EFI", "BOOT", paths.efiName)}" ::/EFI/BOOT/${paths.efiName}`, { stdio: "pipe" });
}
execSync(`mcopy -i "${efiImg}" "${join(work, "autoexec.ipxe")}" ::/autoexec.ipxe`, { stdio: "pipe" });
// Copy kernel/initrd onto EFI partition with arch-specific names
for (const p of payloads) {
// iPXE ${buildarch} returns "x86_64" or "arm64"
const archLabel = p.arch === "aarch64" ? "arm64" : p.arch;
execSync(`mcopy -i "${efiImg}" "${p.vmlinuz}" ::/vmlinuz-${archLabel}`, { stdio: "pipe" });
execSync(`mcopy -i "${efiImg}" "${p.initrd}" ::/initrd-${archLabel}`, { stdio: "pipe" });
logger.info(` Embedded ${archLabel}: vmlinuz + initrd`);
}
// Build hybrid ISO: El Torito EFI boot + GPT EFI partition
execSync([
`xorriso -as mkisofs`,
`-o "${outputPath}"`,
`-R`,
`-V LAB_BOOT`,
`-e efi.img`,
`-no-emul-boot`,
`-partition_offset 16`,
`-append_partition 2 0xEF "${efiImg}"`,
`-appended_part_as_gpt`,
`"${work}"`,
].join(" "), { stdio: "pipe" });
rmSync(work, { recursive: true, force: true });
logger.info(`Generated boot ISO (${archs.join(", ")}): ${outputPath}`);
}
/** Compute a short hash of all inputs that affect ISO content. */
function computeIsoHash(config: BastionConfig): string {
const h = createHash("sha256");
h.update(`${config.serverIp}:${config.httpPort}`);
h.update(config.fedoraVersion);
for (const paths of Object.values(IPXE_ISO_PATHS)) {
const srcFile = paths.src.find((s) => existsSync(s));
if (srcFile) {
const st = statSync(srcFile);
h.update(`${srcFile}:${st.size}:${st.mtimeMs}`);
}
}
// Include kernel/initrd cache state
const cacheDir = join(config.bastionDir, "iso-cache");
for (const arch of ["x86_64", "aarch64"]) {
const vmlinuz = join(cacheDir, `vmlinuz-${arch}`);
if (existsSync(vmlinuz)) {
const st = statSync(vmlinuz);
h.update(`${vmlinuz}:${st.size}`);
}
}
return h.digest("hex").slice(0, 16);
}
/**
* Ensure boot.iso exists and is up-to-date in httpDir.
* Called during startup so @fastify/static can serve it with Range support.
*/
export function ensureBootIso(config: BastionConfig): void {
const isoPath = join(config.httpDir, "boot.iso");
const hashPath = join(config.httpDir, "boot.iso.hash");
const currentHash = computeIsoHash(config);
const cachedHash = existsSync(hashPath) ? readFileSync(hashPath, "utf-8").trim() : "";
if (existsSync(isoPath) && currentHash === cachedHash) {
logger.info(" Boot ISO -- cached (up to date)");
return;
}
if (existsSync(isoPath)) {
logger.info(" Boot ISO -- inputs changed, regenerating...");
try { unlinkSync(isoPath); } catch { /* ignore */ }
} else {
logger.info(" Boot ISO -- generating...");
}
generateIso(config, isoPath);
writeFileSync(hashPath, currentHash);
}

View File

@@ -12,6 +12,7 @@ import {
renderInstallIpxe,
renderLocalBootIpxe,
} from "../templates/boot.ipxe.js";
import { renderUbuntuInstallIpxe } from "../templates/ubuntu-boot.ipxe.js";
import { logger } from "../services/logger.js";
export function registerDispatchRoutes(
@@ -26,16 +27,28 @@ export function registerDispatchRoutes(
const queueEntry = currentState.install_queue[mac];
if (queueEntry) {
const hostname = queueEntry.hostname ?? "lab-node";
logger.info(`INSTALL STARTED: ${mac} -> ${hostname}`);
const os = queueEntry.os ?? "fedora-43";
logger.info(`INSTALL STARTED: ${mac} -> ${hostname} (${os})`);
const script = renderInstallIpxe({
mac,
hostname,
serverIp: config.serverIp,
httpPort: config.httpPort,
fedoraVersion: config.fedoraVersion,
fedoraMirror: config.fedoraMirror,
});
let script: string;
if (os.startsWith("ubuntu")) {
script = renderUbuntuInstallIpxe({
mac,
hostname,
serverIp: config.serverIp,
httpPort: config.httpPort,
ubuntuVersion: config.ubuntuVersion,
});
} else {
script = renderInstallIpxe({
mac,
hostname,
serverIp: config.serverIp,
httpPort: config.httpPort,
fedoraVersion: config.fedoraVersion,
fedoraMirror: config.fedoraMirror,
});
}
return reply.type("text/plain").send(script);
}

View File

@@ -1,10 +1,12 @@
// Kickstart generation routes.
// Serves per-MAC install kickstart and the static discovery kickstart.
// Serves per-MAC install kickstart, static discovery kickstart,
// and Ubuntu autoinstall cloud-init endpoints.
import type { FastifyInstance } from "fastify";
import type { BastionConfig } from "@lab/shared";
import type { StateManager } from "../services/state.js";
import { generateInstallKickstart, generateDiscoverKickstart } from "../services/kickstart-generator.js";
import { renderUbuntuAutoinstall, renderUbuntuMetaData, type UbuntuAutoinstallParams } from "../templates/ubuntu-autoinstall.js";
export function registerKickstartRoutes(
app: FastifyInstance,
@@ -31,4 +33,39 @@ export function registerKickstartRoutes(
const ks = generateDiscoverKickstart(config);
return reply.type("text/plain").send(ks);
});
// Ubuntu autoinstall user-data (cloud-init)
app.get<{ Params: { mac: string } }>("/autoinstall/:mac/user-data", async (request, reply) => {
const mac = request.params.mac.toLowerCase().replace(/-/g, ":");
const currentState = state.load();
const queueEntry = currentState.install_queue[mac];
const aiParams: UbuntuAutoinstallParams = {
hostname: queueEntry?.hostname ?? "lab-node",
disk: queueEntry?.disk ?? "",
role: queueEntry?.role ?? "worker",
domain: config.domain,
ubuntuVersion: config.ubuntuVersion,
timezone: config.timezone,
locale: config.locale,
serverIp: config.serverIp,
httpPort: config.httpPort,
sshKeys: config.sshKeys,
adminUser: config.adminUser,
};
const userData = renderUbuntuAutoinstall(aiParams);
return reply.type("text/plain").send(userData);
});
// Ubuntu autoinstall meta-data (cloud-init)
app.get<{ Params: { mac: string } }>("/autoinstall/:mac/meta-data", async (request, reply) => {
const mac = request.params.mac.toLowerCase().replace(/-/g, ":");
const currentState = state.load();
const queueEntry = currentState.install_queue[mac];
const hostname = queueEntry?.hostname ?? "lab-node";
const metaData = renderUbuntuMetaData(hostname);
return reply.type("text/plain").send(metaData);
});
}

View File

@@ -5,12 +5,14 @@ import fastifyStatic from "@fastify/static";
import { mkdirSync, existsSync } from "node:fs";
import type { BastionConfig } from "@lab/shared";
import { StateManager } from "./services/state.js";
import { InstallLogBuffer } from "./services/install-log.js";
import { logger } from "./services/logger.js";
import { registerDispatchRoutes } from "./routes/dispatch.js";
import { registerKickstartRoutes } from "./routes/kickstart.js";
import { registerApiRoutes } from "./routes/api.js";
export function createApp(config: BastionConfig): { app: ReturnType<typeof Fastify>; state: StateManager } {
export function createApp(config: BastionConfig): { app: ReturnType<typeof Fastify>; state: StateManager; installLog: InstallLogBuffer } {
const app = Fastify({
logger: false, // We use winston instead
});
@@ -18,6 +20,8 @@ export function createApp(config: BastionConfig): { app: ReturnType<typeof Fasti
const state = new StateManager(config.stateFile);
state.init();
const installLog = new InstallLogBuffer(config.bastionDir);
// Serve static files (vmlinuz, initrd.img, iPXE binaries) from the HTTP directory
mkdirSync(config.httpDir, { recursive: true });
app.register(fastifyStatic, {
@@ -38,14 +42,16 @@ export function createApp(config: BastionConfig): { app: ReturnType<typeof Fasti
// Register route handlers
registerDispatchRoutes(app, config, state);
registerKickstartRoutes(app, config, state);
registerApiRoutes(app, state);
registerApiRoutes(app, state, installLog);
// boot.iso is generated at startup and served as a static file from httpDir
// (static serving supports HTTP Range requests, required by JetKVM streaming)
// Log all requests
app.addHook("onRequest", async (request) => {
logger.info(`HTTP: ${request.ip} ${request.method} ${request.url}`);
});
return { app, state };
return { app, state, installLog };
}
export async function startServer(config: BastionConfig): Promise<void> {

View File

@@ -0,0 +1,86 @@
// Per-machine install log buffer.
// Stores raw log lines in memory (ring buffer) and persists to disk.
// Used by /api/log for ingestion and /api/logs/:mac/follow for SSE streaming.
import { mkdirSync, appendFileSync, readFileSync, existsSync } from "node:fs";
import { join } from "node:path";
import { progressBus } from "./progress-events.js";
const MAX_LINES_IN_MEMORY = 2000;
export interface LogLine {
line: string;
timestamp: string;
}
export class InstallLogBuffer {
/** In-memory ring buffer per MAC */
private buffers = new Map<string, LogLine[]>();
private logDir: string;
constructor(bastionDir: string) {
this.logDir = join(bastionDir, "logs");
mkdirSync(this.logDir, { recursive: true });
}
/** Append log lines for a machine. Stores in memory + appends to file. */
append(mac: string, lines: string[], hostname?: string): void {
const now = new Date().toISOString();
const buffer = this.buffers.get(mac) ?? [];
const newEntries: LogLine[] = lines.map((line) => ({ line, timestamp: now }));
buffer.push(...newEntries);
// Trim to ring buffer size
if (buffer.length > MAX_LINES_IN_MEMORY) {
buffer.splice(0, buffer.length - MAX_LINES_IN_MEMORY);
}
this.buffers.set(mac, buffer);
// Persist to file
const filePath = this.logFilePath(mac);
const fileContent = lines.map((l) => `${now} ${l}`).join("\n") + "\n";
appendFileSync(filePath, fileContent);
// Emit to SSE via progressBus (use "log" stage for log lines)
const host = hostname ?? mac;
for (const line of lines) {
progressBus.emit({
mac,
hostname: host,
stage: "log",
detail: line,
timestamp: now,
});
}
}
/** Get buffered log lines for a machine. */
getLines(mac: string, offset = 0, limit = 500): LogLine[] {
const buffer = this.buffers.get(mac) ?? [];
return buffer.slice(offset, offset + limit);
}
/** Get total line count for a machine. */
lineCount(mac: string): number {
return this.buffers.get(mac)?.length ?? 0;
}
/** Read full log from disk (for machines no longer in memory). */
readFromDisk(mac: string): string | null {
const filePath = this.logFilePath(mac);
if (!existsSync(filePath)) return null;
return readFileSync(filePath, "utf-8");
}
/** Clear log for a machine (after install complete or forget). */
clear(mac: string): void {
this.buffers.delete(mac);
}
private logFilePath(mac: string): string {
// Replace colons with dashes for filesystem safety
return join(this.logDir, `${mac.replace(/:/g, "-")}.log`);
}
}

View File

@@ -0,0 +1,437 @@
// Pure TypeScript UEFI-bootable ISO builder.
// Creates an ISO 9660 image with an embedded FAT EFI system partition
// containing iPXE EFI binaries and an autoexec script.
// No external tools required (no xorriso, mtools).
import { readFileSync } from "node:fs";
const SECTOR_SIZE = 2048; // ISO 9660 logical sector
const FAT_SECTOR_SIZE = 512;
// --- Utility helpers ---
function asciiPad(s: string, len: number, pad = " "): Buffer {
const buf = Buffer.alloc(len, pad.charCodeAt(0));
buf.write(s, 0, Math.min(s.length, len), "ascii");
return buf;
}
function u16le(n: number): Buffer {
const buf = Buffer.alloc(2);
buf.writeUInt16LE(n);
return buf;
}
function u32le(n: number): Buffer {
const buf = Buffer.alloc(4);
buf.writeUInt32LE(n);
return buf;
}
function u16be(n: number): Buffer {
const buf = Buffer.alloc(2);
buf.writeUInt16BE(n);
return buf;
}
function u32be(n: number): Buffer {
const buf = Buffer.alloc(4);
buf.writeUInt32BE(n);
return buf;
}
/** Both-endian 16-bit (ISO 9660 "both-byte" format) */
function u16both(n: number): Buffer {
return Buffer.concat([u16le(n), u16be(n)]);
}
/** Both-endian 32-bit */
function u32both(n: number): Buffer {
return Buffer.concat([u32le(n), u32be(n)]);
}
function isoDate(d: Date): Buffer {
// ISO 9660 date: 17 bytes ASCII "YYYYMMDDHHMMSSCC" + timezone offset
const s =
d.getUTCFullYear().toString().padStart(4, "0") +
(d.getUTCMonth() + 1).toString().padStart(2, "0") +
d.getUTCDate().toString().padStart(2, "0") +
d.getUTCHours().toString().padStart(2, "0") +
d.getUTCMinutes().toString().padStart(2, "0") +
d.getUTCSeconds().toString().padStart(2, "0") +
"00"; // hundredths
const buf = Buffer.alloc(17, 0);
buf.write(s, 0, 16, "ascii");
buf[16] = 0; // UTC offset (0 = UTC)
return buf;
}
function dirRecordDate(d: Date): Buffer {
// 7-byte recording date
const buf = Buffer.alloc(7, 0);
buf[0] = d.getUTCFullYear() - 1900;
buf[1] = d.getUTCMonth() + 1;
buf[2] = d.getUTCDate();
buf[3] = d.getUTCHours();
buf[4] = d.getUTCMinutes();
buf[5] = d.getUTCSeconds();
buf[6] = 0; // UTC
return buf;
}
// --- FAT12 filesystem builder ---
function buildFatImage(files: Array<{ path: string; data: Buffer }>): Buffer {
// Build a minimal FAT12 filesystem in memory
// Layout: BPB | FAT | FAT copy | Root dir | Data clusters
const bytesPerSector = FAT_SECTOR_SIZE;
const sectorsPerCluster = 4; // 2KB clusters
const clusterSize = bytesPerSector * sectorsPerCluster;
const reservedSectors = 1;
const numFats = 2;
const rootEntryCount = 64; // 64 * 32 = 2048 bytes = 4 sectors
const rootDirSectors = Math.ceil((rootEntryCount * 32) / bytesPerSector);
// Calculate data size needed
let totalDataBytes = 0;
for (const f of files) totalDataBytes += Math.ceil(f.data.length / clusterSize) * clusterSize;
// Add directory clusters for EFI and EFI/BOOT
totalDataBytes += clusterSize * 2;
const dataClusters = Math.ceil(totalDataBytes / clusterSize) + 2; // +2 safety
const fatEntries = dataClusters + 2; // clusters start at 2
const fatBytes = Math.ceil((fatEntries * 3) / 2); // FAT12: 1.5 bytes per entry
const sectorsPerFat = Math.ceil(fatBytes / bytesPerSector);
const totalSectors = reservedSectors + (numFats * sectorsPerFat) + rootDirSectors + (dataClusters * sectorsPerCluster);
const image = Buffer.alloc(totalSectors * bytesPerSector, 0);
// --- BPB (BIOS Parameter Block) ---
image[0] = 0xEB; image[1] = 0x3C; image[2] = 0x90; // Jump + NOP
image.write("LABCTL ", 3, 8, "ascii"); // OEM
image.writeUInt16LE(bytesPerSector, 11);
image[13] = sectorsPerCluster;
image.writeUInt16LE(reservedSectors, 14);
image[16] = numFats;
image.writeUInt16LE(rootEntryCount, 17);
image.writeUInt16LE(totalSectors < 0x10000 ? totalSectors : 0, 19);
image[21] = 0xF0; // media descriptor (removable)
image.writeUInt16LE(sectorsPerFat, 22);
image.writeUInt16LE(1, 24); // sectors per track
image.writeUInt16LE(1, 26); // heads
image[38] = 0x29; // Extended boot sig
image.writeUInt32LE(0x12345678, 39); // volume serial
image.write("IPXE BOOT ", 43, 11, "ascii"); // volume label
image.write("FAT12 ", 54, 8, "ascii"); // filesystem type
image[510] = 0x55; image[511] = 0xAA; // Boot signature
// --- FAT table ---
const fatOffset = reservedSectors * bytesPerSector;
const rootDirOffset = fatOffset + (numFats * sectorsPerFat * bytesPerSector);
const dataOffset = rootDirOffset + (rootDirSectors * bytesPerSector);
// FAT12 helper: write a 12-bit entry
function fatSet(fat: number, cluster: number, value: number): void {
const off = fatOffset + (fat * sectorsPerFat * bytesPerSector);
const byteIdx = Math.floor(cluster * 3 / 2);
if (cluster % 2 === 0) {
image[off + byteIdx] = value & 0xFF;
image[off + byteIdx + 1] = (image[off + byteIdx + 1]! & 0xF0) | ((value >> 8) & 0x0F);
} else {
image[off + byteIdx] = (image[off + byteIdx]! & 0x0F) | ((value & 0x0F) << 4);
image[off + byteIdx + 1] = (value >> 4) & 0xFF;
}
}
// Media descriptor in FAT
for (let f = 0; f < numFats; f++) {
fatSet(f, 0, 0xFF0);
fatSet(f, 1, 0xFFF);
}
let nextCluster = 2;
function allocClusters(size: number): number {
const needed = Math.max(1, Math.ceil(size / clusterSize));
const startCluster = nextCluster;
for (let i = 0; i < needed; i++) {
const c = nextCluster++;
const next = (i === needed - 1) ? 0xFFF : c + 1;
for (let f = 0; f < numFats; f++) fatSet(f, c, next);
}
return startCluster;
}
function clusterOffset(cluster: number): number {
return dataOffset + (cluster - 2) * clusterSize;
}
function writeDirEntry(dirBuf: Buffer, entryIdx: number, name: string, ext: string, cluster: number, size: number, isDir: boolean): void {
const off = entryIdx * 32;
dirBuf.write(name.toUpperCase().padEnd(8, " "), off, 8, "ascii");
dirBuf.write(ext.toUpperCase().padEnd(3, " "), off + 8, 3, "ascii");
dirBuf[off + 11] = isDir ? 0x10 : 0x20; // attributes
dirBuf.writeUInt16LE(cluster & 0xFFFF, off + 26); // first cluster low
dirBuf.writeUInt32LE(isDir ? 0 : size, off + 28); // file size
}
// --- Create directory structure ---
// Root: EFI dir + autoexec.ipxe
// EFI: BOOT dir
// BOOT: BOOTX64.EFI, BOOTAA64.EFI
// EFI directory cluster
const efiDirCluster = allocClusters(clusterSize);
const efiDirBuf = Buffer.alloc(clusterSize, 0);
// BOOT directory cluster
const bootDirCluster = allocClusters(clusterSize);
const bootDirBuf = Buffer.alloc(clusterSize, 0);
// Write . and .. entries for EFI
writeDirEntry(efiDirBuf, 0, ".", "", efiDirCluster, 0, true);
writeDirEntry(efiDirBuf, 1, "..", "", 0, 0, true);
// BOOT subdir in EFI
writeDirEntry(efiDirBuf, 2, "BOOT", "", bootDirCluster, 0, true);
// Write . and .. entries for BOOT
writeDirEntry(bootDirBuf, 0, ".", "", bootDirCluster, 0, true);
writeDirEntry(bootDirBuf, 1, "..", "", efiDirCluster, 0, true);
let bootEntryIdx = 2;
// Root directory entries
let rootEntryIdx = 0;
// Volume label
const rootBuf = image.subarray(rootDirOffset, rootDirOffset + rootDirSectors * bytesPerSector);
rootBuf.write("IPXE BOOT ", rootEntryIdx * 32, 11, "ascii");
rootBuf[rootEntryIdx * 32 + 11] = 0x08; // volume label attribute
rootEntryIdx++;
// EFI directory in root
writeDirEntry(rootBuf, rootEntryIdx++, "EFI", "", efiDirCluster, 0, true);
// Write files
for (const file of files) {
const parts = file.path.toUpperCase().split("/").filter(Boolean);
const fileName = parts[parts.length - 1]!;
const nameParts = fileName.split(".");
const name = nameParts[0]!.substring(0, 8);
const ext = (nameParts[1] ?? "").substring(0, 3);
const fileCluster = allocClusters(file.data.length);
file.data.copy(image, clusterOffset(fileCluster));
if (parts.length === 1) {
// Root level file
writeDirEntry(rootBuf, rootEntryIdx++, name, ext, fileCluster, file.data.length, false);
} else if (parts.length === 3 && parts[0] === "EFI" && parts[1] === "BOOT") {
// EFI/BOOT/ file
writeDirEntry(bootDirBuf, bootEntryIdx++, name, ext, fileCluster, file.data.length, false);
}
}
// Write directory clusters to image
efiDirBuf.copy(image, clusterOffset(efiDirCluster));
bootDirBuf.copy(image, clusterOffset(bootDirCluster));
return image;
}
// --- ISO 9660 builder ---
export function buildBootIso(efiFiles: Array<{ path: string; data: Buffer }>, scriptContent?: string): Buffer {
const now = new Date();
// Build FAT image with all files
const allFiles = [...efiFiles];
if (scriptContent) {
allFiles.push({ path: "autoexec.ipxe", data: Buffer.from(scriptContent, "utf-8") });
}
const fatImage = buildFatImage(allFiles);
// ISO layout:
// Sector 0-15: System area (unused)
// Sector 16: Primary Volume Descriptor
// Sector 17: Boot Record Volume Descriptor (El Torito)
// Sector 18: Volume Descriptor Set Terminator
// Sector 19: Root directory record
// Sector 20: El Torito boot catalog
// Sector 21: El Torito boot image (the FAT image, this gets large)
// After FAT: EFI boot image reference for files visible in ISO
const fatSectors = Math.ceil(fatImage.length / SECTOR_SIZE);
const rootDirSector = 19;
const bootCatalogSector = 20;
const efiImageSector = 21;
const totalSectors = efiImageSector + fatSectors + 1;
const iso = Buffer.alloc(totalSectors * SECTOR_SIZE, 0);
// --- Primary Volume Descriptor (sector 16) ---
const pvd = iso.subarray(16 * SECTOR_SIZE, 17 * SECTOR_SIZE);
pvd[0] = 1; // type: Primary
pvd.write("CD001", 1, 5, "ascii"); // standard identifier
pvd[6] = 1; // version
asciiPad("LABCTL", 32).copy(pvd, 8); // system identifier
asciiPad("IPXE_BOOT", 32).copy(pvd, 40); // volume identifier
u32both(totalSectors).copy(pvd, 80); // volume space size
u16both(1).copy(pvd, 120); // volume set size
u16both(1).copy(pvd, 124); // volume sequence number
u16both(SECTOR_SIZE).copy(pvd, 128); // logical block size
// Root directory record (34 bytes)
const rootRec = Buffer.alloc(34, 0);
rootRec[0] = 34; // length
rootRec[1] = 0; // extended attribute length
u32both(rootDirSector).copy(rootRec, 2); // extent location
u32both(SECTOR_SIZE).copy(rootRec, 10); // data length
dirRecordDate(now).copy(rootRec, 18);
rootRec[25] = 0x02; // flags: directory
rootRec[28] = 1; // file unit size
u16both(1).copy(rootRec, 30); // volume sequence
rootRec[32] = 1; // name length
rootRec[33] = 0; // name: root
rootRec.copy(pvd, 156); // copy to PVD
// Volume dates
isoDate(now).copy(pvd, 813); // creation
isoDate(now).copy(pvd, 830); // modification
Buffer.alloc(17, 0x30).copy(pvd, 847); // expiration (none)
isoDate(now).copy(pvd, 864); // effective
pvd[881] = 1; // file structure version
// --- Boot Record Volume Descriptor (El Torito, sector 17) ---
const brvd = iso.subarray(17 * SECTOR_SIZE, 18 * SECTOR_SIZE);
brvd[0] = 0; // type: Boot Record
brvd.write("CD001", 1, 5, "ascii");
brvd[6] = 1; // version
brvd.write("EL TORITO SPECIFICATION", 7, 32, "ascii");
u32le(bootCatalogSector).copy(brvd, 0x47); // boot catalog pointer
// --- Volume Descriptor Set Terminator (sector 18) ---
const vdst = iso.subarray(18 * SECTOR_SIZE, 19 * SECTOR_SIZE);
vdst[0] = 255; // type: terminator
vdst.write("CD001", 1, 5, "ascii");
vdst[6] = 1;
// --- Root Directory (sector 19) ---
const rootDir = iso.subarray(rootDirSector * SECTOR_SIZE, (rootDirSector + 1) * SECTOR_SIZE);
let offset = 0;
// "." entry
const dotRec = Buffer.alloc(34, 0);
dotRec[0] = 34;
u32both(rootDirSector).copy(dotRec, 2);
u32both(SECTOR_SIZE).copy(dotRec, 10);
dirRecordDate(now).copy(dotRec, 18);
dotRec[25] = 0x02;
u16both(1).copy(dotRec, 28);
dotRec[32] = 1;
dotRec[33] = 0;
dotRec.copy(rootDir, offset);
offset += 34;
// ".." entry
const dotdotRec = Buffer.alloc(34, 0);
dotdotRec[0] = 34;
u32both(rootDirSector).copy(dotdotRec, 2);
u32both(SECTOR_SIZE).copy(dotdotRec, 10);
dirRecordDate(now).copy(dotdotRec, 18);
dotdotRec[25] = 0x02;
u16both(1).copy(dotdotRec, 28);
dotdotRec[32] = 1;
dotdotRec[33] = 1;
dotdotRec.copy(rootDir, offset);
offset += 34;
// EFI boot image file entry (the FAT image visible as a file)
const efiFileName = "EFI.IMG;1";
const efiRec = Buffer.alloc(33 + efiFileName.length + ((efiFileName.length % 2 === 0) ? 1 : 0), 0);
efiRec[0] = efiRec.length;
u32both(efiImageSector).copy(efiRec, 2);
u32both(fatImage.length).copy(efiRec, 10);
dirRecordDate(now).copy(efiRec, 18);
efiRec[25] = 0x00; // flags: file
u16both(1).copy(efiRec, 28);
efiRec[32] = efiFileName.length;
efiRec.write(efiFileName, 33, efiFileName.length, "ascii");
efiRec.copy(rootDir, offset);
offset += efiRec.length;
// Boot catalog file entry
const catFileName = "BOOT.CAT;1";
const catRec = Buffer.alloc(33 + catFileName.length + ((catFileName.length % 2 === 0) ? 1 : 0), 0);
catRec[0] = catRec.length;
u32both(bootCatalogSector).copy(catRec, 2);
u32both(SECTOR_SIZE).copy(catRec, 10);
dirRecordDate(now).copy(catRec, 18);
catRec[25] = 0x01; // flags: hidden
u16both(1).copy(catRec, 28);
catRec[32] = catFileName.length;
catRec.write(catFileName, 33, catFileName.length, "ascii");
catRec.copy(rootDir, offset);
// --- El Torito Boot Catalog (sector 20) ---
const catalog = iso.subarray(bootCatalogSector * SECTOR_SIZE, (bootCatalogSector + 1) * SECTOR_SIZE);
// Validation entry (32 bytes)
catalog[0] = 1; // header ID
catalog[1] = 0xEF; // platform: EFI
catalog.write("LABCTL", 4, 24, "ascii"); // ID string
// Calculate checksum for validation entry
let cksum = 0;
for (let i = 0; i < 32; i += 2) {
cksum += catalog[i]! + (catalog[i + 1]! << 8);
}
catalog.writeUInt16LE((0x10000 - (cksum & 0xFFFF)) & 0xFFFF, 28); // checksum
catalog[30] = 0x55;
catalog[31] = 0xAA;
// Default/Initial entry (32 bytes, offset 32)
catalog[32] = 0x88; // bootable
catalog[33] = 0xEF; // type: EFI
catalog.writeUInt16LE(0, 34); // load segment
catalog[36] = 0; // system type
const efiImageSectors512 = Math.ceil(fatImage.length / FAT_SECTOR_SIZE);
catalog.writeUInt16LE(efiImageSectors512 & 0xFFFF, 38); // sector count
catalog.writeUInt32LE(efiImageSector, 40); // load LBA
// --- EFI boot image (FAT filesystem, starting at sector 21) ---
fatImage.copy(iso, efiImageSector * SECTOR_SIZE);
return iso;
}
/** Build a ready-to-serve iPXE boot ISO from system iPXE binaries. */
export function buildBastionBootIso(bastionUrl: string): Buffer {
const efiFiles: Array<{ path: string; data: Buffer }> = [];
const PATHS: Record<string, { src: string; dest: string }> = {
x86_64: { src: "/usr/share/ipxe/ipxe-snponly-x86_64.efi", dest: "EFI/BOOT/BOOTX64.EFI" },
aarch64: { src: "/usr/share/ipxe/arm64-efi/snponly.efi", dest: "EFI/BOOT/BOOTAA64.EFI" },
};
for (const [, paths] of Object.entries(PATHS)) {
try {
efiFiles.push({ path: paths.dest, data: readFileSync(paths.src) });
} catch {
// Architecture not available, skip
}
}
if (efiFiles.length === 0) {
throw new Error("No iPXE EFI binaries found on system");
}
const script = [
"#!ipxe",
"",
"echo Booting from iPXE ISO -- connecting to bastion...",
"dhcp || ( echo DHCP failed, retrying... && sleep 3 && dhcp )",
`chain ${bastionUrl}/boot.ipxe || shell`,
].join("\n");
return buildBootIso(efiFiles, script);
}

View File

@@ -1,7 +1,7 @@
// Generate kickstart content for discovery and install modes.
// Uses template literal functions -- no external template engine.
import type { BastionConfig } from "@lab/shared";
import type { BastionConfig, Role } from "@lab/shared";
import { renderDiscoverKickstart } from "../templates/discover.ks.js";
import { renderInstallKickstart, type InstallKickstartParams } from "../templates/install.ks.js";
@@ -23,7 +23,7 @@ export function generateInstallKickstart(
params: {
hostname: string;
disk: string;
role: "worker" | "infra";
role: Role;
},
): string {
const ksParams: InstallKickstartParams = {

View File

@@ -0,0 +1,252 @@
// WebSocket connection from bastion to labd for registration and state sync.
// If LABD_URL is configured, bastion registers with labd on startup and pushes
// state changes. If not configured, bastion runs standalone (backward compatible).
import WebSocket from "ws";
import { readFileSync, writeFileSync, existsSync } from "node:fs";
import { hostname as osHostname } from "node:os";
import type { BastionState, BastionConfig } from "@lab/shared";
import {
type BastionMessage,
type LabdBastionMessage,
isLabdBastionMessage,
} from "@lab/shared";
import { logger } from "./logger.js";
const HEARTBEAT_INTERVAL_MS = 10_000;
const RECONNECT_BASE_DELAY_MS = 1_000;
const RECONNECT_MAX_DELAY_MS = 30_000;
type CommandHandler = (msg: LabdBastionMessage) => Promise<{ status: "ok" | "error"; data?: unknown; error?: string }>;
export class BastionConnection {
private ws: WebSocket | null = null;
private bastionId: string | null = null;
private heartbeatTimer: NodeJS.Timeout | null = null;
private reconnectTimer: NodeJS.Timeout | null = null;
private retryCount = 0;
private closed = false;
private startTime = Date.now();
private commandHandlers = new Map<string, CommandHandler>();
constructor(
private readonly config: BastionConfig,
private readonly getState: () => BastionState,
) {
// Load persisted bastionId if we've enrolled before
const idFile = `${config.bastionDir}/bastion-id`;
if (existsSync(idFile)) {
this.bastionId = readFileSync(idFile, "utf-8").trim();
}
}
/** Register a handler for incoming commands from labd. */
onCommand(type: string, handler: CommandHandler): void {
this.commandHandlers.set(type, handler);
}
connect(): void {
if (this.closed) return;
if (!this.config.labdUrl) return;
const wsUrl = this.config.labdUrl
.replace(/^https:/, "wss:")
.replace(/^http:/, "ws:");
const token = this.config.bastionJoinToken ?? "";
const url = `${wsUrl}/ws/bastion?token=${encodeURIComponent(token)}`;
logger.info(`Connecting to labd at ${this.config.labdUrl}...`);
this.ws = new WebSocket(url);
this.ws.on("open", () => {
logger.info("Connected to labd");
this.retryCount = 0;
// Send enrollment or re-registration
if (this.bastionId) {
// Already enrolled — send state sync immediately
this.sendStateSync();
} else {
// First time — enroll
this.send({
type: "bastion-enroll",
token,
hostname: osHostname(),
network: this.config.network,
serverIp: this.config.serverIp,
});
}
this.startHeartbeat();
});
this.ws.on("message", (data: WebSocket.Data) => {
try {
const raw = data.toString();
const msg: unknown = JSON.parse(raw);
if (!isLabdBastionMessage(msg)) {
logger.warn(`Unknown message from labd: ${(msg as { type?: string }).type}`);
return;
}
this.handleMessage(msg);
} catch (err) {
logger.error(`Failed to parse labd message: ${err instanceof Error ? err.message : String(err)}`);
}
});
this.ws.on("close", () => {
logger.warn("Disconnected from labd");
this.stopHeartbeat();
this.scheduleReconnect();
});
this.ws.on("error", (err) => {
logger.error(`WebSocket error: ${err.message}`);
// close event will fire after this, triggering reconnect
});
}
/** Push current state to labd. Call this after any state change. */
syncState(): void {
if (!this.bastionId || !this.ws || this.ws.readyState !== WebSocket.OPEN) return;
this.sendStateSync();
}
/** Forward a progress event to labd. */
sendProgress(mac: string, stage: string, detail: string): void {
if (!this.bastionId || !this.ws || this.ws.readyState !== WebSocket.OPEN) return;
this.send({
type: "bastion-progress",
bastionId: this.bastionId,
mac,
stage,
detail,
timestamp: new Date().toISOString(),
});
}
close(): void {
this.closed = true;
this.stopHeartbeat();
if (this.reconnectTimer) {
clearTimeout(this.reconnectTimer);
this.reconnectTimer = null;
}
if (this.ws) {
this.ws.close();
this.ws = null;
}
}
private handleMessage(msg: LabdBastionMessage): void {
switch (msg.type) {
case "bastion-enrolled":
this.bastionId = msg.bastionId;
// Persist for reconnects
writeFileSync(`${this.config.bastionDir}/bastion-id`, msg.bastionId);
logger.info(`Enrolled with labd as bastion ${msg.bastionId}`);
// Send initial state
this.sendStateSync();
break;
case "bastion-heartbeat-ack":
// No-op, confirms labd is alive
break;
case "server-shutdown":
logger.info(`labd shutting down, will reconnect in ${msg.reconnectAfter}ms`);
break;
case "command-install":
case "command-forget":
case "command-role-update":
void this.handleCommand(msg);
break;
}
}
private async handleCommand(msg: LabdBastionMessage & { requestId: string }): Promise<void> {
const handler = this.commandHandlers.get(msg.type);
if (!handler) {
this.send({
type: "command-response",
requestId: msg.requestId,
status: "error",
error: `No handler for command: ${msg.type}`,
});
return;
}
try {
const result = await handler(msg);
this.send({
type: "command-response",
requestId: msg.requestId,
...result,
});
} catch (err) {
this.send({
type: "command-response",
requestId: msg.requestId,
status: "error",
error: err instanceof Error ? err.message : String(err),
});
}
}
private sendStateSync(): void {
if (!this.bastionId) return;
this.send({
type: "bastion-state-sync",
bastionId: this.bastionId,
state: this.getState(),
});
}
private startHeartbeat(): void {
this.stopHeartbeat();
this.heartbeatTimer = setInterval(() => {
if (!this.bastionId) return;
const state = this.getState();
const machineCount =
Object.keys(state.discovered).length +
Object.keys(state.install_queue).length +
Object.keys(state.installed).length;
this.send({
type: "bastion-heartbeat",
bastionId: this.bastionId,
uptime: Math.floor((Date.now() - this.startTime) / 1000),
machineCount,
});
}, HEARTBEAT_INTERVAL_MS);
}
private stopHeartbeat(): void {
if (this.heartbeatTimer) {
clearInterval(this.heartbeatTimer);
this.heartbeatTimer = null;
}
}
private scheduleReconnect(): void {
if (this.closed) return;
const delay = Math.min(
RECONNECT_BASE_DELAY_MS * Math.pow(2, this.retryCount),
RECONNECT_MAX_DELAY_MS,
);
this.retryCount++;
logger.info(`Reconnecting to labd in ${delay}ms (attempt ${this.retryCount})...`);
this.reconnectTimer = setTimeout(() => this.connect(), delay);
}
private send(msg: BastionMessage): void {
if (this.ws && this.ws.readyState === WebSocket.OPEN) {
this.ws.send(JSON.stringify(msg));
}
}
}

View File

@@ -0,0 +1,233 @@
// Post-provision automation: installs k3s after OS provisioning completes.
// Runs asynchronously — does not block the progress callback.
import { spawn } from "node:child_process";
import { existsSync } from "node:fs";
import { homedir } from "node:os";
import { join } from "node:path";
import { logger } from "./logger.js";
import { progressBus } from "./progress-events.js";
function findSshKey(): string | undefined {
const sudoUser = process.env["SUDO_USER"];
const realHome = sudoUser ? join("/home", sudoUser) : homedir();
for (const name of ["id_ed25519", "id_ecdsa", "id_rsa"]) {
const p = join(realHome, ".ssh", name);
if (existsSync(p)) return p;
}
return undefined;
}
/** Wait for SSH to become available, with retries. */
async function waitForSsh(ip: string, user: string, keyPath: string | undefined, timeoutMs: number): Promise<boolean> {
const start = Date.now();
while (Date.now() - start < timeoutMs) {
try {
const result = await sshExec(ip, user, "echo ok", keyPath);
if (result.includes("ok")) return true;
} catch { /* retry */ }
await new Promise((r) => setTimeout(r, 5000));
}
return false;
}
function sshExec(ip: string, user: string, command: string, keyPath: string | undefined): Promise<string> {
return new Promise((resolve, reject) => {
const args = [
"-o", "StrictHostKeyChecking=no",
"-o", "ConnectTimeout=10",
"-o", "BatchMode=yes",
...(keyPath ? ["-i", keyPath] : []),
`${user}@${ip}`,
command,
];
const proc = spawn("ssh", args, { stdio: ["ignore", "pipe", "pipe"] });
let stdout = "";
proc.stdout.on("data", (d: Buffer) => { stdout += d.toString(); });
proc.on("close", (code) => {
if (code === 0) resolve(stdout);
else reject(new Error(`SSH exit ${code}`));
});
proc.on("error", reject);
});
}
function sshRunStreaming(ip: string, user: string, command: string, keyPath: string | undefined, label: string, mac?: string): Promise<number> {
return new Promise((resolve) => {
const args = [
"-o", "StrictHostKeyChecking=no",
"-o", "ConnectTimeout=10",
"-o", "BatchMode=yes",
...(keyPath ? ["-i", keyPath] : []),
`${user}@${ip}`,
command,
];
const proc = spawn("ssh", args, { stdio: ["ignore", "pipe", "pipe"] });
proc.stdout.on("data", (d: Buffer) => {
for (const line of d.toString().split("\n").filter(Boolean)) {
logger.info(`[k3s:${label}] ${line}`);
if (mac) {
progressBus.emit({ mac, hostname: label, stage: "log", detail: `[k3s] ${line}`, timestamp: new Date().toISOString() });
}
}
});
proc.stderr.on("data", (d: Buffer) => {
for (const line of d.toString().split("\n").filter(Boolean)) {
logger.info(`[k3s:${label}] ${line}`);
if (mac) {
progressBus.emit({ mac, hostname: label, stage: "log", detail: `[k3s] ${line}`, timestamp: new Date().toISOString() });
}
}
});
proc.on("close", (code) => resolve(code ?? 1));
proc.on("error", () => resolve(1));
});
}
/**
* Trigger k3s installation on a freshly provisioned machine.
* Runs in the background — logs progress to bastion console and progressBus.
*/
export async function triggerPostProvisionK3s(
hostname: string,
ip: string,
role: string,
sshUser: string,
mac?: string,
): Promise<void> {
const keyPath = findSshKey();
const emitStage = (stage: string, detail: string): void => {
logger.info(`[k3s] ${detail}`);
if (mac) {
progressBus.emit({ mac, hostname, stage, detail, timestamp: new Date().toISOString() });
}
};
emitStage("post-provision", `auto-installing k3s on ${hostname} (${ip}) role=${role}`);
emitStage("post-provision", "waiting for SSH (machine may still be rebooting)");
// Wait up to 5 minutes for SSH (machine just finished kickstart and is rebooting)
const sshReady = await waitForSsh(ip, sshUser, keyPath, 300_000);
if (!sshReady) {
emitStage("error", `SSH not available on ${hostname} (${ip}) after 5 minutes`);
logger.error(`[k3s] Run manually: labctl app k3s install ${hostname}`);
return;
}
emitStage("post-provision", "SSH ready, installing k3s prerequisites");
// Step 1: Prerequisites
await sshRunStreaming(ip, sshUser, "sudo modprobe br_netfilter overlay 2>/dev/null; sudo swapoff -a", keyPath, hostname, mac);
// Step 2: Sysctl
emitStage("post-provision", "configuring sysctl for k3s");
await sshRunStreaming(ip, sshUser, `sudo bash -c 'cat > /etc/sysctl.d/90-k3s.conf << EOF
net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
net.ipv4.ip_forward=1
vm.panic_on_oom=0
vm.overcommit_memory=1
kernel.panic=10
kernel.panic_on_oops=1
EOF
sysctl --system > /dev/null'`, keyPath, hostname, mac);
// Step 3: SELinux + firewalld + stale CNI cleanup
emitStage("post-provision", "disabling firewalld and cleaning stale CNI");
await sshRunStreaming(ip, sshUser, [
"sudo setenforce 0 2>/dev/null || true",
"sudo systemctl disable --now firewalld 2>/dev/null || true",
"sudo systemctl mask firewalld 2>/dev/null || true",
// Clean stale CNI interfaces that conflict with Cilium (flannel.1 uses same vxlan port 8472)
"sudo systemctl stop k3s 2>/dev/null || true",
"sudo ip link delete flannel.1 2>/dev/null || true",
"sudo ip link delete cilium_vxlan 2>/dev/null || true",
"sudo ip link delete cilium_host 2>/dev/null || true",
"sudo ip link delete cilium_net 2>/dev/null || true",
"sudo rm -rf /etc/cni/net.d/* /var/lib/cni/ 2>/dev/null || true",
].join("; "), keyPath, hostname, mac);
// Step 4: Install k3s
// labcontroller extends infra — both are k3s servers
const k3sRole = (role === "infra" || role === "labcontroller") ? "server" : "agent";
emitStage("post-provision", `installing k3s ${k3sRole}`);
const code = await sshRunStreaming(ip, sshUser,
`curl -sfL https://get.k3s.io | sudo INSTALL_K3S_EXEC="${k3sRole}" INSTALL_K3S_SKIP_SELINUX_RPM=true sh -`,
keyPath, hostname, mac,
);
if (code !== 0) {
emitStage("error", `k3s install failed on ${hostname} (exit ${code})`);
logger.error(`[k3s] Run manually: labctl app k3s install ${hostname}`);
return;
}
// Step 5: Wait for ready
emitStage("post-provision", "waiting for k3s node to become Ready");
await sshRunStreaming(ip, sshUser,
"for i in $(seq 1 60); do sudo k3s kubectl get nodes 2>/dev/null | grep -q Ready && break; sleep 2; done",
keyPath, hostname, mac,
);
emitStage("post-provision", `k3s ${k3sRole} installed on ${hostname} (${ip})`);
// Step 6: Deploy role-specific apps from ROLE_REGISTRY chain
const { ROLE_REGISTRY } = await import("@lab/shared");
const roleInfo = ROLE_REGISTRY.find((r: { name: string }) => r.name === role);
if (roleInfo && roleInfo.apps.length > 0) {
emitStage("post-provision", `deploying apps: ${roleInfo.apps.join(", ")}`);
if (roleInfo.apps.includes("cockroachdb") || roleInfo.apps.includes("labd") || roleInfo.apps.includes("bastion")) {
// This is a labcontroller — deploy the full stack
emitStage("post-provision", `deploying labcontroller stack on ${hostname}`);
try {
const { cockroachDbManifests } = await import("@lab/modules/dist/modules/labcontroller/src/cockroachdb.js");
const { labdManifests } = await import("@lab/modules/dist/modules/labcontroller/src/labd.js");
const { bastionManifests } = await import("@lab/modules/dist/modules/labcontroller/src/bastion.js");
const crdb = cockroachDbManifests();
const labd = labdManifests({ databaseUrl: crdb.connectionString });
const bastion = bastionManifests();
const manifests = [
crdb.namespace, crdb.headlessService, crdb.clientService, crdb.statefulSet,
labd.service, labd.deployment,
bastion.daemonSet,
];
for (const manifest of manifests) {
const json = JSON.stringify(manifest);
const kind = (manifest as { kind?: string }).kind ?? "?";
const name = ((manifest as { metadata?: { name?: string } }).metadata)?.name ?? "?";
const result = await sshRunStreaming(ip, sshUser,
`echo '${json.replace(/'/g, "'\\''")}' | sudo k3s kubectl apply -f -`,
keyPath, hostname, mac,
);
if (result === 0) {
emitStage("post-provision", `applied ${kind}/${name}`);
} else {
emitStage("error", `failed to apply ${kind}/${name}`);
}
}
// Init CockroachDB
const initJson = JSON.stringify(crdb.initJob);
await sshRunStreaming(ip, sshUser,
`echo '${initJson.replace(/'/g, "'\\''")}' | sudo k3s kubectl apply -f - 2>/dev/null; sleep 30; sudo k3s kubectl exec cockroachdb-0 -n lab-system -- /cockroach/cockroach sql --insecure -e 'CREATE DATABASE IF NOT EXISTS lab' 2>/dev/null || true`,
keyPath, hostname, mac,
);
emitStage("post-provision", `labcontroller stack deployed on ${hostname}`);
} catch (err) {
const errMsg = err instanceof Error ? err.message : String(err);
emitStage("error", `failed to deploy labcontroller stack: ${errMsg}`);
logger.error(`[post-provision] Run manually: labctl app labcontroller deploy ${hostname}`);
}
}
}
emitStage("post-provision", `${hostname} (${ip}) provisioning complete (role: ${role})`);
}

View File

@@ -0,0 +1,28 @@
// In-memory event bus for provision progress updates.
// Allows SSE clients to subscribe to real-time progress and log lines.
import { EventEmitter } from "node:events";
export interface ProgressEvent {
mac: string;
hostname: string;
/** "log" for raw log lines, anything else is a progress stage name */
stage: string;
detail: string;
timestamp: string;
}
// Simple typed wrapper around EventEmitter for progress events.
const _bus = new EventEmitter();
export const progressBus = {
emit(event: ProgressEvent): void {
_bus.emit("progress", event);
},
on(listener: (event: ProgressEvent) => void): void {
_bus.on("progress", listener);
},
off(listener: (event: ProgressEvent) => void): void {
_bus.off("progress", listener);
},
};

View File

@@ -13,9 +13,18 @@ const EMPTY_STATE: BastionState = {
installed: {},
};
export type StateChangeListener = (state: BastionState) => void;
export class StateManager {
private changeListeners: StateChangeListener[] = [];
constructor(private readonly stateFile: string) {}
/** Register a listener that fires after every state update. */
onChange(listener: StateChangeListener): void {
this.changeListeners.push(listener);
}
load(): BastionState {
try {
const raw = readFileSync(this.stateFile, "utf-8");
@@ -52,6 +61,9 @@ export class StateManager {
const state = this.load();
fn(state);
this.save(state);
for (const listener of this.changeListeners) {
try { listener(state); } catch { /* don't let listener errors break state updates */ }
}
return state;
}
}

View File

@@ -62,7 +62,7 @@ dhcp-match=set:httpboot-arm64,option:client-arch,20
dhcp-userclass=set:ipxe,iPXE
# UEFI HTTP Boot -> serve full iPXE EFI via HTTP (no TFTP size limit)
dhcp-boot=tag:httpboot-x86_64,http://${serverIp}:${httpPort}/ipxe-real.efi
dhcp-boot=tag:httpboot-x86_64,http://${serverIp}:${httpPort}/ipxe.efi
dhcp-boot=tag:httpboot-arm64,http://${serverIp}:${httpPort}/ipxe-arm64.efi
# Echo vendor class back to HTTP Boot clients (required by UEFI HTTP Boot spec)
dhcp-option-force=tag:httpboot-x86_64,60,HTTPClient
@@ -72,15 +72,21 @@ dhcp-option-force=tag:httpboot-arm64,60,HTTPClient
dhcp-boot=tag:bios,tag:!ipxe,undionly.kpxe
dhcp-boot=tag:efi-x86_64,tag:!ipxe,ipxe.efi
dhcp-boot=tag:efi-arm64,tag:!ipxe,ipxe-arm64.efi
# Echo vendor class back to PXE clients (OVMF requires this, real hardware usually doesn't)
dhcp-option-force=tag:efi-x86_64,60,PXEClient
dhcp-option-force=tag:efi-arm64,60,PXEClient
dhcp-option-force=tag:bios,60,PXEClient
# iPXE clients -> chain to boot script via HTTP
dhcp-boot=tag:ipxe,http://${serverIp}:${httpPort}/boot.ipxe
# PXE service directives (needed for proxy DHCP to respond properly)
${dhcpMode === "proxy" ? `# PXE service directives (proxy DHCP needs these to respond on port 4011)
pxe-service=tag:!ipxe,x86PC,"PXE Boot",undionly.kpxe
pxe-service=tag:!ipxe,X86-64_EFI,"PXE Boot",ipxe.efi
pxe-service=tag:!ipxe,BC_EFI,"PXE Boot",ipxe.efi
pxe-service=tag:!ipxe,ARM64_EFI,"PXE Boot",ipxe-arm64.efi
pxe-service=tag:!ipxe,ARM64_EFI,"PXE Boot",ipxe-arm64.efi` : `# Full DHCP mode -- pxe-service directives omitted (they trigger PXE Boot Server
# Discovery protocol which some UEFI implementations don't support). The dhcp-boot
# directives above provide the boot filename directly in the DHCP offer.`}
# Verbose logging
log-dhcp

View File

@@ -2,10 +2,12 @@
// Full Fedora server install with LVM partitioning, %pre for reprovision detection,
// packages, and %post with SSH keys, user creation, k3s prereqs, progress callbacks.
import type { Role } from "@lab/shared";
export interface InstallKickstartParams {
hostname: string;
disk: string;
role: "worker" | "infra";
role: Role;
domain: string;
fedoraVersion: string;
timezone: string;
@@ -36,6 +38,7 @@ export function renderInstallKickstart(params: InstallKickstartParams): string {
const now = new Date().toISOString();
const hasLonghorn = role === "worker";
const hasRancher = role === "infra";
const isVanilla = role === "vanilla";
// -- Auth section --
const auth = sshKeys.length > 0
@@ -97,6 +100,48 @@ done
? `logvol /var/lib/rancher --vgname=${vg} --name=rancher --fstype=xfs --size=20480`
: "";
// Helper: the bastion callback functions used in both %pre and %post.
// Defined as a template so each section gets its own copy (they run in different shells).
const bastionHelpers = `
# Detect MAC address (first real ethernet MAC, skip loopback/veth)
_BASTION_MAC=$(ip link show | awk '/ether/ && !/00:00:00:00/ {print $2; exit}')
_BASTION_URL="http://${serverIp}:${httpPort}"
# Send a structured progress stage to bastion
bastion_progress() {
local stage="$1" detail="\${2:-}"
curl -sf -X POST "\${_BASTION_URL}/api/progress" \\
-H "Content-Type: application/json" \\
-d "{\\"mac\\":\\"$_BASTION_MAC\\",\\"stage\\":\\"$stage\\",\\"detail\\":\\"$detail\\"}" \\
--connect-timeout 5 --max-time 10 2>/dev/null || true
}
# Send log lines to bastion (batched)
bastion_log() {
local line="$1"
curl -sf -X POST "\${_BASTION_URL}/api/log" \\
-H "Content-Type: application/json" \\
-d "{\\"mac\\":\\"$_BASTION_MAC\\",\\"line\\":\\"$(echo "$line" | sed 's/\\\\/\\\\\\\\/g; s/"/\\\\"/g')\\"}\" \\
--connect-timeout 5 --max-time 10 2>/dev/null || true
}
# Send an error stage to bastion with context
bastion_error() {
local detail="$1"
bastion_progress "error" "$detail"
# Also send the last 50 lines of any log file as context
for logfile in /root/bastion-post-install.log /tmp/pre-partition.log; do
if [ -f "$logfile" ]; then
local tail_content
tail_content=$(tail -50 "$logfile" 2>/dev/null | sed 's/\\\\/\\\\\\\\/g; s/"/\\\\"/g; s/$/\\\\n/' | tr -d '\\n')
curl -sf -X POST "\${_BASTION_URL}/api/log" \\
-H "Content-Type: application/json" \\
-d "{\\"mac\\":\\"$_BASTION_MAC\\",\\"lines\\":[\\"--- $logfile (last 50 lines) ---\\"],\\"tail\\":\\"$tail_content\\"}" \\
--connect-timeout 5 --max-time 10 2>/dev/null || true
fi
done
}`;
return `# Lab Bastion -- Fedora ${fedoraVersion} server install
# Generated: ${now}
# Target: ${fqdn} (role=${role})
@@ -123,27 +168,25 @@ url --mirrorlist=https://mirrors.fedoraproject.org/mirrorlist?repo=fedora-$relea
%pre --log=/tmp/pre-partition.log
#!/bin/bash
set -x
${bastionHelpers}
# Progress callback helper
bastion_progress() {
local stage="$1" detail="\${2:-}"
local mac=$(ip link show | awk '/ether/ && !/00:00:00:00/ {print $2; exit}')
curl -sf -X POST "http://${serverIp}:${httpPort}/api/progress" \\
-H "Content-Type: application/json" \\
-d "{\\"mac\\":\\"$mac\\",\\"stage\\":\\"$stage\\",\\"detail\\":\\"$detail\\"}" 2>/dev/null || true
}
# Error trap: report failures back to bastion
trap 'bastion_error "%pre failed at line $LINENO: $(tail -1 /tmp/pre-partition.log 2>/dev/null)"' ERR
bastion_progress "partitioning" "preparing disk layout"
bastion_progress "partitioning" "detecting disk"
VG="${vg}"
${diskLine}
bastion_log "disk detected: $DISK"
REPROVISION=no
# Check if VG exists (reprovision scenario)
if vgs $VG &>/dev/null; then
echo "=== Existing VG found - reprovision mode ==="
REPROVISION=yes
bastion_progress "partitioning" "reprovision mode -- preserving data volumes"
# Detect which data LVs to preserve
PRESERVE_LONGHORN=no; PRESERVE_SRV=no; PRESERVE_HOME=no; PRESERVE_RANCHER=no
@@ -153,11 +196,14 @@ if vgs $VG &>/dev/null; then
lvs $VG/rancher &>/dev/null && PRESERVE_RANCHER=yes
echo "Preserving: longhorn=$PRESERVE_LONGHORN srv=$PRESERVE_SRV home=$PRESERVE_HOME rancher=$PRESERVE_RANCHER"
bastion_log "preserving LVs: longhorn=$PRESERVE_LONGHORN srv=$PRESERVE_SRV home=$PRESERVE_HOME rancher=$PRESERVE_RANCHER"
# Remove only OS logical volumes (keep data LVs)
for lv in root var varlog swap; do
lvremove -f $VG/$lv 2>/dev/null || true
done
else
bastion_progress "partitioning" "fresh install on $DISK"
fi
if [ "$REPROVISION" = "yes" ]; then
@@ -226,7 +272,8 @@ echo "=== Generated partition config ==="
cat /tmp/part.ks
echo "==================================="
bastion_progress "partitioning" "layout ready, starting install"
bastion_progress "partitioning" "disk layout ready"
bastion_log "partition config written to /tmp/part.ks"
%end
@@ -256,7 +303,7 @@ iotop
strace
jq
# k3s prerequisites
${isVanilla ? "# vanilla role -- skipping k3s prerequisites" : `# k3s prerequisites
container-selinux
iptables-nft
nftables
@@ -265,7 +312,7 @@ chrony
tar
socat
conntrack-tools
ethtool
ethtool`}
# Boot management
efibootmgr
@@ -286,31 +333,87 @@ ruby-libs
%post --log=/root/bastion-post-install.log
#!/bin/bash
set -x
${bastionHelpers}
# Progress callback helper
bastion_progress() {
local stage="$1" detail="\${2:-}"
local mac=$(ip link show | awk '/ether/ && !/00:00:00:00/ {print $2; exit}')
curl -sf -X POST "http://${serverIp}:${httpPort}/api/progress" \\
-H "Content-Type: application/json" \\
-d "{\\"mac\\":\\"$mac\\",\\"stage\\":\\"$stage\\",\\"detail\\":\\"$detail\\"}" 2>/dev/null || true
# --- Error trap: catch any failure and report to bastion ---
_post_error_handler() {
local exit_code=$? lineno=$1
bastion_error "%post failed at line $lineno (exit $exit_code)"
}
trap '_post_error_handler $LINENO' ERR
# --- Background log streamer: sends %post output to bastion in real-time ---
_LOG_FILE=/root/bastion-post-install.log
_LOG_STREAMER_PID=""
(
# Wait for the log file to exist
while [ ! -f "$_LOG_FILE" ]; do sleep 1; done
# Tail and batch-send lines every 3 seconds
_batch=""
_count=0
tail -f "$_LOG_FILE" 2>/dev/null | while IFS= read -r _line; do
# Escape for JSON
_escaped=$(echo "$_line" | sed 's/\\\\/\\\\\\\\/g; s/"/\\\\"/g; s/\\t/\\\\t/g')
if [ -z "$_batch" ]; then
_batch="\\"$_escaped\\""
else
_batch="$_batch,\\"$_escaped\\""
fi
_count=$((_count + 1))
# Send batch every 10 lines
if [ "$_count" -ge 10 ]; then
curl -sf -X POST "\${_BASTION_URL}/api/log" \\
-H "Content-Type: application/json" \\
-d "{\\"mac\\":\\"$_BASTION_MAC\\",\\"lines\\":[$_batch]}" \\
--connect-timeout 5 --max-time 10 2>/dev/null || true
_batch=""
_count=0
fi
done
) &
_LOG_STREAMER_PID=$!
# Flush remaining log lines helper
_flush_log_streamer() {
if [ -n "$_LOG_STREAMER_PID" ]; then
kill "$_LOG_STREAMER_PID" 2>/dev/null || true
wait "$_LOG_STREAMER_PID" 2>/dev/null || true
fi
# Send any remaining lines from the log
if [ -f "$_LOG_FILE" ]; then
local remaining
remaining=$(tail -20 "$_LOG_FILE" 2>/dev/null | sed 's/\\\\/\\\\\\\\/g; s/"/\\\\"/g; s/\\t/\\\\t/g; s/^/"/; s/$/"/' | paste -sd, -)
if [ -n "$remaining" ]; then
curl -sf -X POST "\${_BASTION_URL}/api/log" \\
-H "Content-Type: application/json" \\
-d "{\\"mac\\":\\"$_BASTION_MAC\\",\\"lines\\":[$remaining]}" \\
--connect-timeout 5 --max-time 10 2>/dev/null || true
fi
fi
}
bastion_progress "post-install" "configuring system"
bastion_progress "installing" "packages installed, starting post-install"
# -- SSH --
bastion_progress "post-install" "configuring SSH"
systemctl enable --now sshd
sed -i 's/^#\\?PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config
sed -i 's/^#\\?PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
${sshPostBlock}
bastion_log "SSH configured: root login by key only, password auth disabled"
# -- Hostname and domain --
bastion_progress "post-install" "setting hostname to ${fqdn}"
hostnamectl set-hostname ${fqdn}
# -- tmpfs for /tmp --
echo "tmpfs /tmp tmpfs defaults,noatime,nosuid,nodev,size=4G 0 0" >> /etc/fstab
# -- Kernel modules for k3s --
${isVanilla ? `# -- vanilla role: skip k3s kernel/sysctl/firewall setup --
bastion_progress "post-install" "vanilla role -- skipping k3s setup"
# -- Enable chronyd for time sync --
systemctl enable chronyd || true` : `# -- Kernel modules for k3s --
bastion_progress "post-install" "loading k3s kernel modules"
cat > /etc/modules-load.d/k3s.conf << 'MODULES'
br_netfilter
overlay
@@ -320,6 +423,7 @@ modprobe br_netfilter || true
modprobe overlay || true
# -- Sysctl for k3s networking --
bastion_progress "post-install" "configuring k3s sysctl"
cat > /etc/sysctl.d/90-k3s.conf << 'SYSCTL'
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
@@ -330,29 +434,41 @@ fs.inotify.max_user_watches = 1048576
SYSCTL
sysctl --system || true
# -- Disable firewalld (k3s manages its own iptables rules) --
# -- Disable firewalld permanently (k3s/Cilium manage iptables directly) --
bastion_progress "post-install" "disabling firewalld"
# Must be masked to prevent re-enable on updates
systemctl disable --now firewalld || true
systemctl mask firewalld || true
# -- Enable chronyd for time sync --
systemctl enable --now chronyd
systemctl enable chronyd || true`}
# -- Set boot order: local disk first, PXE after --
bastion_progress "post-install" "configuring EFI boot order"
if command -v efibootmgr >/dev/null 2>&1; then
FEDORA_ENTRY=$(efibootmgr | grep -i fedora | head -1 | grep -oP 'Boot\\K[0-9A-F]+')
if [ -n "$FEDORA_ENTRY" ]; then
CURRENT_ORDER=$(efibootmgr | grep BootOrder | cut -d: -f2 | tr -d ' ')
NEW_ORDER="$FEDORA_ENTRY,$(echo "$CURRENT_ORDER" | sed "s/$FEDORA_ENTRY,\\\\?//;s/,$//")"
efibootmgr -o "$NEW_ORDER" || true
echo "Boot order set: Fedora first ($NEW_ORDER)"
bastion_log "boot order set: Fedora first ($NEW_ORDER)"
else
bastion_log "no Fedora EFI entry found, boot order unchanged"
fi
else
bastion_log "efibootmgr not available, skipping boot order config"
fi
# -- Provisioning metadata --
bastion_progress "post-install" "writing provisioning metadata"
IP_ADDR=$(ip -4 addr show | awk '/inet / && !/127.0.0/ {split($2,a,"/"); print a[1]; exit}')
cat > /etc/lab-provisioned << PROVEOF
hostname: ${fqdn}
role: ${role}
provisioned: $(date -Iseconds)
bastion: ${serverIp}
ip: $IP_ADDR
PROVEOF
cat > /root/README << 'README'
@@ -370,8 +486,13 @@ cat > /root/README << 'README'
README
${hasRancher ? `# Install k3s server (skip start - will be configured manually)
bastion_progress "post-install" "pre-installing k3s server"
curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true sh -
` : ""}IP_ADDR=$(ip -4 addr show | awk '/inet / && !/127.0.0/ {split($2,a,"/"); print a[1]; exit}')
bastion_log "k3s server pre-installed (not started)"
` : ""}
# Stop log streamer and flush remaining lines
_flush_log_streamer
bastion_progress "complete" "ready at $IP_ADDR"
%end

View File

@@ -0,0 +1,299 @@
// Ubuntu autoinstall template (cloud-init).
// Equivalent of the Fedora kickstart: LVM partitioning, packages,
// SSH keys, k3s prereqs, progress callbacks.
export interface UbuntuAutoinstallParams {
hostname: string;
disk: string;
role: string; // "vanilla" | "worker" | "infra"
domain: string;
ubuntuVersion: string;
timezone: string;
locale: string;
serverIp: string;
httpPort: number;
sshKeys: string[];
adminUser: string;
}
export function renderUbuntuAutoinstall(params: UbuntuAutoinstallParams): string {
const {
hostname,
disk,
role,
domain,
timezone,
serverIp,
httpPort,
sshKeys,
adminUser,
} = params;
const fqdn = domain ? `${hostname}.${domain}` : hostname;
const vg = "labvg";
const hasLonghorn = role === "worker";
const hasRancher = role === "infra";
// Determine disk device -- default to biggest NVMe/SCSI/virtio
const diskDevice = disk || "/dev/sda";
// Build the LVM layout to match Fedora kickstart sizes
const extraLvs: string[] = [];
if (hasLonghorn) {
extraLvs.push(` - id: lv-longhorn
name: longhorn
type: lvm_partition
volgroup: vg0
size: -1
- id: fs-longhorn
type: format
volume: lv-longhorn
fstype: xfs
- id: mount-longhorn
type: mount
device: fs-longhorn
path: /var/lib/longhorn`);
}
if (hasRancher) {
extraLvs.push(` - id: lv-rancher
name: rancher
type: lvm_partition
volgroup: vg0
size: 20G
- id: fs-rancher
type: format
volume: lv-rancher
fstype: xfs
- id: mount-rancher
type: mount
device: fs-rancher
path: /var/lib/rancher`);
}
const extraLvsBlock = extraLvs.length > 0 ? "\n" + extraLvs.join("\n") : "";
// SSH keys YAML list
const sshKeysYaml = sshKeys.map((k) => ` - "${k}"`).join("\n");
// late-commands for k3s prereqs, firewall, chrony, admin user, progress callback
const lateCommands: string[] = [
// Kernel modules for k3s
`curtin in-target -- bash -c 'cat > /etc/modules-load.d/k3s.conf << EOF\nbr_netfilter\noverlay\nip_conntrack\nEOF'`,
// Sysctl for k3s networking
`curtin in-target -- bash -c 'cat > /etc/sysctl.d/90-k3s.conf << EOF\nnet.bridge.bridge-nf-call-iptables = 1\nnet.bridge.bridge-nf-call-ip6tables = 1\nnet.ipv4.ip_forward = 1\nnet.ipv6.conf.all.forwarding = 1\nfs.inotify.max_user_instances = 524288\nfs.inotify.max_user_watches = 1048576\nEOF'`,
// Disable ufw firewall
`curtin in-target -- systemctl disable ufw || true`,
// Enable chrony/ntp
`curtin in-target -- systemctl enable chrony || true`,
// tmpfs for /tmp
`curtin in-target -- bash -c 'echo "tmpfs /tmp tmpfs defaults,noatime,nosuid,nodev,size=4G 0 0" >> /etc/fstab'`,
];
// Admin user creation + SSH keys + sudoers
if (adminUser) {
lateCommands.push(
`curtin in-target -- useradd -m -G sudo -s /bin/bash ${adminUser}`,
`curtin in-target -- usermod -L ${adminUser}`,
`curtin in-target -- mkdir -p /home/${adminUser}/.ssh`,
`curtin in-target -- bash -c 'cat > /home/${adminUser}/.ssh/authorized_keys << EOF\n${sshKeys.join("\n")}\nEOF'`,
`curtin in-target -- chmod 700 /home/${adminUser}/.ssh`,
`curtin in-target -- chmod 600 /home/${adminUser}/.ssh/authorized_keys`,
`curtin in-target -- chown -R ${adminUser}:${adminUser} /home/${adminUser}/.ssh`,
`curtin in-target -- bash -c 'echo "${adminUser} ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/${adminUser}'`,
`curtin in-target -- chmod 440 /etc/sudoers.d/${adminUser}`,
);
}
// Provisioning metadata
lateCommands.push(
`curtin in-target -- bash -c 'cat > /etc/lab-provisioned << EOF\nhostname: ${fqdn}\nrole: ${role}\nprovisioned: $(date -Iseconds)\nbastion: ${serverIp}\nEOF'`,
);
// k3s install for infra role
if (hasRancher) {
lateCommands.push(
`curtin in-target -- bash -c 'curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true sh -'`,
);
}
// Progress callback (complete)
lateCommands.push(
`curtin in-target -- bash -c 'IP_ADDR=$(ip -4 addr show | awk "/inet / && !/127.0.0/ {split(\\$2,a,\\"/\\"); print a[1]; exit}"); curl -sf -X POST "http://${serverIp}:${httpPort}/api/progress" -H "Content-Type: application/json" -d "{\\"mac\\":\\"$(ip link show | awk "/ether/ && !/00:00:00:00/ {print \\$2; exit}")\\",\\"stage\\":\\"complete\\",\\"detail\\":\\"ready at $IP_ADDR\\"}" || true'`,
);
const lateCommandsYaml = lateCommands.map((c) => ` - "${c}"`).join("\n");
return `#cloud-config
autoinstall:
version: 1
locale: ${params.locale}
keyboard:
layout: gb
timezone: ${timezone}
identity:
hostname: ${fqdn}
username: ${adminUser || "root"}
password: "!"
ssh:
install-server: true
allow-pw: false
authorized-keys:
${sshKeysYaml}
storage:
config:
- id: disk0
type: disk
ptable: gpt
path: ${diskDevice}
wipe: superblock-recursive
grub_device: true
- id: part-efi
type: partition
device: disk0
size: 600M
flag: boot
grub_device: true
- id: fs-efi
type: format
volume: part-efi
fstype: fat32
- id: mount-efi
type: mount
device: fs-efi
path: /boot/efi
- id: part-boot
type: partition
device: disk0
size: 3G
- id: fs-boot
type: format
volume: part-boot
fstype: ext4
- id: mount-boot
type: mount
device: fs-boot
path: /boot
- id: part-pv
type: partition
device: disk0
size: -1
- id: vg0
type: lvm_volgroup
name: ${vg}
devices:
- part-pv
- id: lv-swap
name: swap
type: lvm_partition
volgroup: vg0
size: 27G
- id: fs-swap
type: format
volume: lv-swap
fstype: swap
- id: mount-swap
type: mount
device: fs-swap
path: none
- id: lv-root
name: root
type: lvm_partition
volgroup: vg0
size: 33G
- id: fs-root
type: format
volume: lv-root
fstype: xfs
- id: mount-root
type: mount
device: fs-root
path: /
- id: lv-var
name: var
type: lvm_partition
volgroup: vg0
size: 100G
- id: fs-var
type: format
volume: lv-var
fstype: xfs
- id: mount-var
type: mount
device: fs-var
path: /var
- id: lv-varlog
name: varlog
type: lvm_partition
volgroup: vg0
size: 10G
- id: fs-varlog
type: format
volume: lv-varlog
fstype: xfs
- id: mount-varlog
type: mount
device: fs-varlog
path: /var/log
- id: lv-home
name: home
type: lvm_partition
volgroup: vg0
size: 10G
- id: fs-home
type: format
volume: lv-home
fstype: xfs
- id: mount-home
type: mount
device: fs-home
path: /home
- id: lv-srv
name: srv
type: lvm_partition
volgroup: vg0
size: 20G
- id: fs-srv
type: format
volume: lv-srv
fstype: xfs
- id: mount-srv
type: mount
device: fs-srv
path: /srv${extraLvsBlock}
packages:
- openssh-server
- curl
- wget
- git
- jq
- htop
- vim
- tmux
- python3
- lshw
- dmidecode
- net-tools
- iproute2
- iputils-ping
- traceroute
- tcpdump
- iotop
- strace
- tar
- containerd
- socat
- conntrack
- ethtool
- iptables
- chrony
- efibootmgr
late-commands:
${lateCommandsYaml}
`;
}
export function renderUbuntuMetaData(hostname: string): string {
return `instance-id: ${hostname}
local-hostname: ${hostname}
`;
}

View File

@@ -0,0 +1,24 @@
// iPXE boot script template for Ubuntu autoinstall.
export function renderUbuntuInstallIpxe(params: {
mac: string;
hostname: string;
serverIp: string;
httpPort: number;
ubuntuVersion: string;
}): string {
return `#!ipxe
echo
echo =============================================
echo Lab PXE Bastion - INSTALLING Ubuntu ${params.ubuntuVersion}
echo Target: ${params.hostname}
echo MAC: ${params.mac}
echo =============================================
echo
kernel http://${params.serverIp}:${params.httpPort}/ubuntu-vmlinuz autoinstall ds=nocloud-net;seedfrom=http://${params.serverIp}:${params.httpPort}/autoinstall/${params.mac}/ ---
initrd http://${params.serverIp}:${params.httpPort}/ubuntu-initrd
boot
`;
}

View File

@@ -6,6 +6,7 @@ import type { BastionConfig } from "@lab/shared";
import { createApp } from "../src/server.js";
import type { FastifyInstance } from "fastify";
import type { StateManager } from "../src/services/state.js";
import type { InstallLogBuffer } from "../src/services/install-log.js";
function createTestConfig(testDir: string): BastionConfig {
return {
@@ -19,6 +20,8 @@ function createTestConfig(testDir: string): BastionConfig {
dhcpMode: "proxy",
dhcpRangeStart: "",
dhcpRangeEnd: "",
ubuntuVersion: "26.04",
ubuntuMirror: "https://releases.ubuntu.com/26.04",
iface: "eth0",
serverIp: "10.0.0.1",
network: "10.0.0.0",
@@ -38,6 +41,7 @@ describe("dispatch routes", () => {
let testDir: string;
let app: FastifyInstance;
let state: StateManager;
let installLog: InstallLogBuffer;
beforeEach(() => {
testDir = join(tmpdir(), `bastion-dispatch-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
@@ -49,6 +53,7 @@ describe("dispatch routes", () => {
const result = createApp(config);
app = result.app;
state = result.state;
installLog = result.installLog;
});
afterEach(async () => {
@@ -224,4 +229,100 @@ describe("dispatch routes", () => {
const result = JSON.parse(response.body);
expect(result.error).toBe("machine not found");
});
it("POST /api/log accepts a single line", async () => {
const mac = "aa:bb:cc:dd:ee:ff";
const response = await app.inject({
method: "POST",
url: "/api/log",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ mac, line: "hello from kickstart" }),
});
expect(response.statusCode).toBe(200);
const result = JSON.parse(response.body);
expect(result.status).toBe("ok");
expect(result.lines).toBe(1);
// Verify line is stored
const lines = installLog.getLines(mac);
expect(lines).toHaveLength(1);
expect(lines[0]!.line).toBe("hello from kickstart");
});
it("POST /api/log accepts multiple lines", async () => {
const mac = "aa:bb:cc:dd:ee:ff";
const response = await app.inject({
method: "POST",
url: "/api/log",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ mac, lines: ["line 1", "line 2", "line 3"] }),
});
expect(response.statusCode).toBe(200);
const result = JSON.parse(response.body);
expect(result.lines).toBe(3);
const lines = installLog.getLines(mac);
expect(lines).toHaveLength(3);
});
it("GET /api/logs/:mac includes log lines for installing machine", async () => {
const mac = "aa:bb:cc:dd:ee:ff";
state.update((s) => {
s.install_queue[mac] = {
hostname: "test-node",
disk: "/dev/sda",
role: "worker",
queued_at: new Date().toISOString(),
};
});
// Add some log lines
installLog.append(mac, ["log line 1", "log line 2"], "test-node");
const response = await app.inject({
method: "GET",
url: `/api/logs/${encodeURIComponent(mac)}`,
});
expect(response.statusCode).toBe(200);
const result = JSON.parse(response.body);
expect(result.status).toBe("installing");
expect(result.log_lines).toHaveLength(2);
expect(result.log_total).toBe(2);
expect(result.log_lines[0].line).toBe("log line 1");
});
it("progress endpoint with 'error' stage keeps machine in install_queue", async () => {
const mac = "aa:bb:cc:dd:ee:ff";
state.update((s) => {
s.install_queue[mac] = {
hostname: "failing-node",
disk: "/dev/sda",
role: "worker",
queued_at: new Date().toISOString(),
};
});
const response = await app.inject({
method: "POST",
url: "/api/progress",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
mac,
stage: "error",
detail: "%post failed at line 42",
}),
});
expect(response.statusCode).toBe(200);
// Machine should still be in install_queue (not moved to installed)
const currentState = state.load();
expect(currentState.install_queue[mac]).toBeDefined();
expect(currentState.install_queue[mac]?.progress).toBe("error");
expect(currentState.install_queue[mac]?.progress_detail).toBe("%post failed at line 42");
expect(currentState.installed[mac]).toBeUndefined();
});
});

View File

@@ -90,7 +90,9 @@ describe("renderInstallKickstart", () => {
serverIp: "10.0.0.5",
httpPort: 9090,
}));
expect(ks).toContain("http://10.0.0.5:9090/api/progress");
expect(ks).toContain('_BASTION_URL="http://10.0.0.5:9090"');
expect(ks).toContain("/api/progress");
expect(ks).toContain("/api/log");
});
it("infra role has /var/lib/rancher partition", () => {
@@ -137,4 +139,52 @@ describe("renderInstallKickstart", () => {
// swap = 27648
expect(ks).toContain("--name=swap --fstype=swap --size=27648");
});
it("%pre has error trap", () => {
const ks = renderInstallKickstart(baseParams());
expect(ks).toContain("trap");
expect(ks).toContain("bastion_error");
expect(ks).toContain("%pre failed");
});
it("%post has error trap", () => {
const ks = renderInstallKickstart(baseParams());
expect(ks).toContain("_post_error_handler");
expect(ks).toContain("%post failed");
});
it("has granular progress stages in %post", () => {
const ks = renderInstallKickstart(baseParams());
expect(ks).toContain('"configuring SSH"');
expect(ks).toContain('"setting hostname');
expect(ks).toContain('"configuring EFI boot order"');
expect(ks).toContain('"writing provisioning metadata"');
});
it("has background log streamer in %post", () => {
const ks = renderInstallKickstart(baseParams());
expect(ks).toContain("_LOG_STREAMER_PID");
expect(ks).toContain("_flush_log_streamer");
expect(ks).toContain("tail -f");
});
it("has bastion_log function for sending log lines", () => {
const ks = renderInstallKickstart(baseParams());
expect(ks).toContain("bastion_log()");
expect(ks).toContain("/api/log");
});
it("vanilla role skips k3s progress stages", () => {
const ks = renderInstallKickstart(baseParams({ role: "vanilla" }));
expect(ks).toContain("vanilla role");
expect(ks).not.toContain('"loading k3s kernel modules"');
expect(ks).not.toContain('"disabling firewalld"');
});
it("worker role has k3s-related progress stages", () => {
const ks = renderInstallKickstart(baseParams({ role: "worker" }));
expect(ks).toContain('"loading k3s kernel modules"');
expect(ks).toContain('"configuring k3s sysctl"');
expect(ks).toContain('"disabling firewalld"');
});
});

View File

@@ -7,6 +7,7 @@
},
"include": ["src/**/*.ts"],
"references": [
{ "path": "../shared" }
{ "path": "../shared" },
{ "path": "../modules" }
]
}

View File

@@ -17,10 +17,13 @@
},
"dependencies": {
"@lab/bastion": "workspace:*",
"@lab/modules": "workspace:*",
"@lab/shared": "workspace:*",
"commander": "^13.0.0"
"commander": "^13.0.0",
"ws": "^8.19.0"
},
"devDependencies": {
"@types/node": "^22.10.0"
"@types/node": "^22.10.0",
"@types/ws": "^8.18.1"
}
}

View File

@@ -0,0 +1,161 @@
// Typed API client for communicating with labd.
import https from "node:https";
import { readFileSync } from "node:fs";
import { LabdApiError } from "./errors.js";
import type {
Server,
ServerFilters,
JoinToken,
CreateTokenOpts,
EnrollmentRequest,
EnrollmentResponse,
HealthStatus,
RequestOpts,
} from "./types.js";
export interface LabdClientConfig {
baseUrl: string;
certPath?: string;
keyPath?: string;
caPath?: string;
timeoutMs?: number;
}
export class LabdClient {
private config: LabdClientConfig;
private agent: https.Agent | undefined;
private sessionId: string | undefined;
constructor(config: LabdClientConfig) {
this.config = config;
if (config.certPath && config.keyPath) {
this.agent = new https.Agent({
cert: readFileSync(config.certPath),
key: readFileSync(config.keyPath),
ca: config.caPath ? readFileSync(config.caPath) : undefined,
rejectUnauthorized: true,
});
}
}
setSessionId(id: string): void {
this.sessionId = id;
}
// --- Server endpoints ---
async getServers(filters?: ServerFilters): Promise<Server[]> {
return this.request("GET", "/api/servers", { query: filters as Record<string, string | undefined> });
}
async getServer(id: string): Promise<Server> {
return this.request("GET", `/api/servers/${encodeURIComponent(id)}`);
}
// --- Token endpoints ---
async createJoinToken(opts: CreateTokenOpts): Promise<JoinToken> {
return this.request("POST", "/api/tokens", { body: opts });
}
async listTokens(): Promise<JoinToken[]> {
return this.request("GET", "/api/tokens");
}
async revokeToken(id: string): Promise<{ status: string; id: string }> {
return this.request("DELETE", `/api/tokens/${encodeURIComponent(id)}`);
}
// --- Auth endpoints ---
async enroll(req: EnrollmentRequest): Promise<EnrollmentResponse> {
return this.request("POST", "/api/auth/enroll", { body: req });
}
// --- Bastion endpoints ---
async getBastions(): Promise<Array<{
id: string; hostname: string; network: string; serverIp: string;
status: string; machineCount: number; lastHeartbeat?: string; connectedAt?: string;
}>> {
return this.request("GET", "/api/bastions");
}
// --- Machine endpoints (aggregated through labd from bastions) ---
async getMachines(): Promise<import("@lab/shared").BastionState> {
return this.request("GET", "/api/machines");
}
async installMachine(opts: {
mac: string; hostname: string; disk?: string; role?: string; os?: string;
}): Promise<{ status: string; data?: unknown; error?: string }> {
return this.request("POST", "/api/machines/install", { body: opts });
}
async forgetMachine(mac: string): Promise<{ status: string }> {
return this.request("DELETE", `/api/machines/${encodeURIComponent(mac)}`);
}
async updateRole(mac: string, role: string): Promise<{ status: string }> {
return this.request("POST", "/api/machines/role", { body: { mac, role } });
}
async getMachineLogs(mac: string): Promise<Record<string, unknown>> {
return this.request("GET", `/api/machines/${encodeURIComponent(mac)}/logs`);
}
// --- Health endpoints ---
async getHealth(): Promise<HealthStatus> {
return this.request("GET", "/healthz");
}
// --- Internal ---
private async request<T>(method: string, path: string, opts?: RequestOpts): Promise<T> {
const url = new URL(path, this.config.baseUrl);
if (opts?.query) {
for (const [k, v] of Object.entries(opts.query)) {
if (v !== undefined) url.searchParams.set(k, String(v));
}
}
const headers: Record<string, string> = {
"Content-Type": "application/json",
};
if (this.sessionId) {
headers["X-Session-ID"] = this.sessionId;
}
const timeoutMs = this.config.timeoutMs ?? 30_000;
try {
const resp = await fetch(url.toString(), {
method,
headers,
body: opts?.body ? JSON.stringify(opts.body) : undefined,
signal: AbortSignal.timeout(timeoutMs),
// @ts-expect-error -- Node fetch supports dispatcher/agent
agent: this.agent,
});
if (!resp.ok) {
const body = await resp.json().catch(() => ({ error: resp.statusText }));
throw LabdApiError.fromResponse(resp.status, body);
}
return (await resp.json()) as T;
} catch (err) {
if (err instanceof LabdApiError) throw err;
if (err instanceof TypeError && (err.message.includes("fetch") || err.message.includes("ECONNREFUSED"))) {
throw LabdApiError.notConnected(this.config.baseUrl);
}
if (err instanceof DOMException && err.name === "TimeoutError") {
throw LabdApiError.timeout(timeoutMs);
}
throw err;
}
}
}

View File

@@ -0,0 +1,47 @@
// CLI configuration loading for labd client.
// Bridges the CLI config module into LabdClient configuration.
import { loadConfig, CONFIG_DIR, CONFIG_FILE, CERT_DIR } from "../config/index.js";
import { LabdClient, type LabdClientConfig } from "./client.js";
export { CONFIG_DIR, CONFIG_FILE, CERT_DIR };
export function loadClientConfig(
overrides?: Partial<LabdClientConfig>,
): LabdClientConfig {
const cliConfig = loadConfig();
let config: LabdClientConfig = {
baseUrl: cliConfig.labdUrl,
...(cliConfig.certPath ? { certPath: cliConfig.certPath } : {}),
...(cliConfig.keyPath ? { keyPath: cliConfig.keyPath } : {}),
...(cliConfig.caPath ? { caPath: cliConfig.caPath } : {}),
};
// Environment variable overrides (cert paths)
if (process.env["LABCTL_CERT_PATH"]) config.certPath = process.env["LABCTL_CERT_PATH"];
if (process.env["LABCTL_KEY_PATH"]) config.keyPath = process.env["LABCTL_KEY_PATH"];
if (process.env["LABCTL_CA_PATH"]) config.caPath = process.env["LABCTL_CA_PATH"];
if (overrides) {
config = { ...config, ...overrides };
}
return config;
}
export function createLabdClient(
overrides?: Partial<LabdClientConfig>,
): LabdClient {
const config = loadClientConfig(overrides);
return new LabdClient(config);
}
let _singleton: LabdClient | undefined;
export function getLabdClient(): LabdClient {
if (!_singleton) {
_singleton = createLabdClient();
}
return _singleton;
}

View File

@@ -0,0 +1,59 @@
// Structured API error class for labd communication.
export class LabdApiError extends Error {
readonly statusCode: number;
readonly errorCode: string;
readonly detail: string | undefined;
constructor(statusCode: number, message: string, detail?: string) {
super(message);
this.name = "LabdApiError";
this.statusCode = statusCode;
this.errorCode = statusCodeToErrorCode(statusCode);
this.detail = detail;
}
static fromResponse(statusCode: number, body: unknown): LabdApiError {
if (typeof body === "object" && body !== null) {
const b = body as Record<string, unknown>;
const message = typeof b["error"] === "string" ? b["error"] : `HTTP ${statusCode}`;
const detail = typeof b["detail"] === "string" ? b["detail"] : undefined;
return new LabdApiError(statusCode, message, detail);
}
return new LabdApiError(statusCode, `HTTP ${statusCode}`);
}
static notConnected(url: string): LabdApiError {
return new LabdApiError(
0,
`Cannot connect to labd at ${url}`,
"Check that labd is running and the URL is correct.",
);
}
static timeout(timeoutMs: number): LabdApiError {
return new LabdApiError(
0,
`Request timed out after ${timeoutMs}ms`,
"The server may be overloaded. Try again later.",
);
}
}
export function isLabdApiError(err: unknown): err is LabdApiError {
return err instanceof LabdApiError;
}
function statusCodeToErrorCode(code: number): string {
switch (code) {
case 400: return "BAD_REQUEST";
case 401: return "UNAUTHORIZED";
case 403: return "FORBIDDEN";
case 404: return "NOT_FOUND";
case 409: return "CONFLICT";
case 429: return "RATE_LIMITED";
case 500: return "INTERNAL_ERROR";
case 503: return "UNAVAILABLE";
default: return code === 0 ? "CONNECTION_ERROR" : "UNKNOWN";
}
}

View File

@@ -0,0 +1,18 @@
// Public API for labd client.
export { LabdClient, type LabdClientConfig } from "./client.js";
export { LabdApiError, isLabdApiError } from "./errors.js";
export { loadClientConfig, createLabdClient, getLabdClient, CONFIG_DIR, CONFIG_FILE, CERT_DIR } from "./config.js";
export type {
Server,
ServerFilters,
Agent,
JoinToken,
CreateTokenOpts,
EnrollmentRequest,
EnrollmentResponse,
HealthStatus,
ApiErrorBody,
RequestOpts,
} from "./types.js";
export { createLabdWebSocket, streamExec, streamLogs, type StreamOptions } from "./websocket.js";

View File

@@ -0,0 +1,96 @@
// Typed interfaces for labd API requests and responses.
// Matches Prisma schema models and labd route contracts.
// --- Server ---
export interface Server {
id: string;
hostname: string;
mac: string | null;
cloud: string;
environment: string;
role: string;
labels: Record<string, string>;
ip: string | null;
agentVersion: string | null;
status: string;
lastHeartbeat: string | null;
createdAt: string;
updatedAt: string;
agent?: Agent | null;
}
export interface Agent {
id: string;
serverId: string;
certificatePem: string | null;
enrolledAt: string;
lastSeen: string | null;
}
export interface ServerFilters {
cloud?: string;
environment?: string;
status?: string;
}
// --- Join Tokens ---
export interface JoinToken {
id: string;
token?: string; // Only present on creation
type: string;
label: string | null;
usedBy: string | null;
usedAt: string | null;
revokedAt: string | null;
createdAt: string;
expiresAt: string | null;
}
export interface CreateTokenOpts {
type?: "one-time" | "reusable";
label?: string;
expiresInHours?: number;
}
// --- Auth / Enrollment ---
export interface EnrollmentRequest {
token: string;
hostname: string;
csr?: string;
}
export interface EnrollmentResponse {
status: string;
hostname: string;
message: string;
certificatePem: string | null;
}
// --- Health ---
export interface HealthStatus {
status: "healthy" | "degraded";
uptime: number;
timestamp: string;
checks: {
database: "ok" | "error";
};
}
// --- API Error ---
export interface ApiErrorBody {
error: string;
detail?: string;
code?: string;
}
// --- Request helpers ---
export interface RequestOpts {
query?: Record<string, string | number | boolean | undefined>;
body?: unknown;
}

View File

@@ -0,0 +1,160 @@
// WebSocket client for real-time streaming operations (exec, logs).
import { WebSocket } from "ws";
import { loadConfig } from "../config/index.js";
import { readFileSync } from "node:fs";
import { LabdApiError } from "./errors.js";
export interface StreamOptions {
onData: (data: string) => void;
onError: (error: Error) => void;
onClose: () => void;
}
export async function createLabdWebSocket(path: string): Promise<WebSocket> {
const config = loadConfig();
const baseUrl = config.labdUrl.replace("https:", "wss:").replace("http:", "ws:");
const url = new URL(path, baseUrl);
const wsOptions: WebSocket.ClientOptions = {};
if (config.certPath && config.keyPath) {
wsOptions.cert = readFileSync(config.certPath);
wsOptions.key = readFileSync(config.keyPath);
if (config.caPath) wsOptions.ca = readFileSync(config.caPath);
}
return new Promise((resolve, reject) => {
const timeout = setTimeout(() => {
ws.terminate();
reject(LabdApiError.timeout(10_000));
}, 10_000);
const ws = new WebSocket(url.toString(), wsOptions);
ws.on("open", () => {
clearTimeout(timeout);
resolve(ws);
});
ws.on("error", (err: Error) => {
clearTimeout(timeout);
reject(
LabdApiError.notConnected(config.labdUrl + " — " + err.message),
);
});
});
}
export async function streamExec(
serverName: string,
command: string[],
options: StreamOptions & { tty?: boolean; timeout?: number },
): Promise<number> {
const ws = await createLabdWebSocket("/ws/exec");
const requestId = crypto.randomUUID();
return new Promise<number>((resolve, reject) => {
ws.on("message", (raw: Buffer) => {
try {
const msg = JSON.parse(raw.toString()) as {
type: string;
data?: string;
exitCode?: number;
message?: string;
};
switch (msg.type) {
case "exec-stdout":
case "exec-stderr":
if (msg.data) options.onData(msg.data);
break;
case "exec-exit":
ws.close();
resolve(msg.exitCode ?? 1);
break;
case "error":
ws.close();
reject(new Error(msg.message ?? "Remote execution error"));
break;
}
} catch (err) {
options.onError(err instanceof Error ? err : new Error(String(err)));
}
});
ws.on("close", () => {
options.onClose();
});
ws.on("error", (err: Error) => {
options.onError(err);
});
ws.send(
JSON.stringify({
type: "exec",
requestId,
server: serverName,
command,
tty: options.tty ?? false,
timeout: options.timeout ?? 30_000,
}),
);
});
}
export async function streamLogs(
serverName: string,
logOptions: {
follow?: boolean;
lines?: number;
unit?: string;
since?: string;
priority?: string;
kernel?: boolean;
},
options: StreamOptions,
): Promise<void> {
const ws = await createLabdWebSocket("/ws/logs");
const requestId = crypto.randomUUID();
ws.on("message", (raw: Buffer) => {
try {
const msg = JSON.parse(raw.toString()) as {
type: string;
line?: string;
message?: string;
};
switch (msg.type) {
case "log-line":
if (msg.line) options.onData(msg.line);
break;
case "log-end":
ws.close();
break;
case "error":
ws.close();
options.onError(new Error(msg.message ?? "Log streaming error"));
break;
}
} catch (err) {
options.onError(err instanceof Error ? err : new Error(String(err)));
}
});
ws.on("close", () => {
options.onClose();
});
ws.on("error", (err) => {
options.onError(err);
});
ws.send(
JSON.stringify({
type: "log-subscribe",
requestId,
server: serverName,
options: logOptions,
}),
);
}

View File

@@ -0,0 +1,403 @@
// CLI command: labctl app k3s install/health <target>
// Install or check k3s on a target machine via SSH.
import { existsSync } from "node:fs";
import { homedir } from "node:os";
import { join } from "node:path";
import type { Command } from "commander";
import type { BastionState } from "@lab/shared";
import { K3sModule, sshExec } from "@lab/modules";
import { getLabdClient } from "../api/config.js";
function resolveTarget(
target: string,
state: BastionState | null,
): { ip: string; hostname: string; role: string } | null {
// Direct IP
if (/^\d+\.\d+\.\d+\.\d+$/.test(target)) {
return { ip: target, hostname: target, role: "infra" };
}
if (!state) return null;
// Check by MAC
const mac = target.toLowerCase().replace(/-/g, ":");
const installed = state.installed[mac];
if (installed?.ip) {
return { ip: installed.ip, hostname: installed.hostname, role: installed.role };
}
// Check by hostname
for (const [, info] of Object.entries(state.installed)) {
if (info.hostname === target || info.hostname.startsWith(target + ".")) {
return { ip: info.ip, hostname: info.hostname, role: info.role };
}
}
return null;
}
function findSshKey(): string | undefined {
const sudoUser = process.env["SUDO_USER"];
const realHome = sudoUser ? join("/home", sudoUser) : homedir();
for (const name of ["id_ed25519", "id_ecdsa", "id_rsa"]) {
const keyPath = join(realHome, ".ssh", name);
if (existsSync(keyPath)) return keyPath;
}
return undefined;
}
async function fetchState(): Promise<BastionState | null> {
try {
return await getLabdClient().getMachines();
} catch {
return null;
}
}
import { registerLabcontrollerCommands } from "./labcontroller.js";
export function registerAppCommand(program: Command): void {
const appCmd = program.command("app").description("Application management");
// labcontroller subcommands
registerLabcontrollerCommands(appCmd);
const k3sCmd = appCmd.command("k3s").description("k3s cluster management");
k3sCmd
.command("install <target>")
.description("Install k3s on a target machine (hostname, IP, or MAC)")
.option("--role <role>", "k3s role: infra (server) or worker (agent)", "infra")
.option("--user <user>", "SSH user", "michal")
.option("--k3s-server <url>", "k3s server URL (required for worker role)")
.option("--k3s-token <token>", "k3s join token (required for worker role)")
.action(async (target: string, opts: {
role: string;
user: string;
k3sServer?: string;
k3sToken?: string;
}) => {
const state = await fetchState();
const resolved = resolveTarget(target, state);
if (!resolved) {
console.error(`Cannot resolve target: ${target}`);
console.error("Provide an IP address, hostname, or MAC of an installed machine.");
process.exit(1);
}
const role = opts.role === "worker" ? "worker" : "infra";
const sshKey = findSshKey();
console.log(`Installing k3s on ${resolved.hostname} (${resolved.ip}) as ${role}...`);
console.log("");
const k3s = new K3sModule();
const moduleCtx = {
hostname: resolved.hostname,
ip: resolved.ip,
role,
os: "fedora-43" as const,
arch: "x86_64" as const,
sshUser: opts.user,
...(sshKey ? { sshKeyPath: sshKey } : {}),
config: {
...(opts.k3sServer ? { k3sServerUrl: opts.k3sServer } : {}),
...(opts.k3sToken ? { k3sToken: opts.k3sToken } : {}),
},
};
const installResult = await k3s.install(moduleCtx);
for (const line of installResult.output) {
console.log(` ${line}`);
}
if (!installResult.success) {
console.error(`\nk3s install failed: ${installResult.errors.join(", ")}`);
process.exit(1);
}
console.log("\nRunning post-install configuration...\n");
const configResult = await k3s.configure(moduleCtx);
for (const line of configResult.output) {
console.log(` ${line}`);
}
if (!configResult.success) {
console.error(`\nk3s configure failed: ${configResult.errors.join(", ")}`);
process.exit(1);
}
console.log("\nk3s installed successfully.");
// Check if the machine's role requires additional app deployments
try {
const { ROLE_REGISTRY } = await import("@lab/shared");
const freshState = await fetchState();
if (freshState) {
for (const [, info] of Object.entries(freshState.installed)) {
if (info.ip === resolved.ip || info.hostname === resolved.hostname) {
const roleInfo = ROLE_REGISTRY.find((r: { name: string }) => r.name === info.role);
if (roleInfo && roleInfo.apps.length > 0) {
console.log(`\nRole ${info.role} requires: ${roleInfo.apps.join(", ")}`);
console.log(`Deploying automatically...`);
const { execFileSync } = await import("node:child_process");
try {
execFileSync("node", [
process.argv[1] ?? "",
"app", "labcontroller", "deploy", resolved.hostname,
"--user", opts.user,
], { stdio: "inherit" });
} catch {
console.error(`\nAuto-deploy failed. Run manually: labctl app labcontroller deploy ${resolved.hostname}`);
}
}
break;
}
}
}
} catch { /* best-effort chain */ }
console.log(`\nTo get kubeconfig: ssh ${opts.user}@${resolved.ip} sudo cat /etc/rancher/k3s/k3s.yaml`);
});
k3sCmd
.command("health [target]")
.description("Check k3s health (all hosts if no target given)")
.option("--user <user>", "SSH user", "michal")
.action(async (target: string | undefined, opts: { user: string }) => {
const sshKey = findSshKey();
if (!target) {
let state: BastionState;
try {
state = await getLabdClient().getMachines();
} catch (err) {
console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`);
process.exit(1);
}
const entries = Object.entries(state.installed);
if (entries.length === 0) {
console.log("No installed machines.");
return;
}
const BOLD = "\x1b[1m";
const GREEN = "\x1b[32m";
const RED = "\x1b[31m";
const DIM = "\x1b[2m";
const RESET = "\x1b[0m";
const pad = (s: string, w: number) => s.padEnd(w);
console.log(
`${BOLD}${pad("HOST", 22)}${pad("IP", 16)}${pad("ROLE", 8)}${pad("K3S", 14)}${pad("NODE", 10)}${pad("ENCRYPT", 10)}${pad("CNI", 14)}${pad("PODS", 6)}${RESET}`,
);
interface HealthRow {
host: string; ip: string; role: string;
k3s: string; node: string; encrypt: string; cni: string; pods: string;
k3sC: string; nodeC: string; encC: string; cniC: string;
}
const probes = entries.map(async ([_mac, info]): Promise<HealthRow> => {
const r: HealthRow = {
host: info.hostname, ip: info.ip, role: info.role,
k3s: "—", node: "—", encrypt: "—", cni: "—", pods: "—",
k3sC: DIM, nodeC: DIM, encC: DIM, cniC: DIM,
};
if (!info.ip || info.role === "vanilla") {
r.k3s = info.role === "vanilla" ? "n/a" : "no ip";
return r;
}
try {
const svc = await sshExec(info.ip, opts.user, "systemctl is-active k3s 2>/dev/null || systemctl is-active k3s-agent 2>/dev/null", {
...(sshKey ? { keyPath: sshKey } : {}), timeoutMs: 8_000,
});
if (svc.stdout.trim() !== "active") {
r.k3s = svc.stdout.trim() === "inactive" ? "stopped" : "not installed";
r.k3sC = svc.stdout.trim() === "inactive" ? RED : DIM;
return r;
}
r.k3s = "running"; r.k3sC = GREEN;
const [nodeRes, encRes, cniRes, podRes] = await Promise.all([
sshExec(info.ip, opts.user,
"sudo k3s kubectl get nodes -o jsonpath='{.items[0].status.conditions[?(@.type==\"Ready\")].status}' 2>/dev/null",
{ ...(sshKey ? { keyPath: sshKey } : {}), timeoutMs: 8_000 }),
sshExec(info.ip, opts.user,
"sudo k3s secrets-encrypt status 2>/dev/null | head -1",
{ ...(sshKey ? { keyPath: sshKey } : {}), timeoutMs: 8_000 }),
sshExec(info.ip, opts.user,
"sudo k3s kubectl get pods -n kube-system -l k8s-app=cilium --no-headers 2>/dev/null | head -1",
{ ...(sshKey ? { keyPath: sshKey } : {}), timeoutMs: 8_000 }),
sshExec(info.ip, opts.user,
"sudo k3s kubectl get pods -A --no-headers 2>/dev/null | wc -l",
{ ...(sshKey ? { keyPath: sshKey } : {}), timeoutMs: 8_000 }),
]);
r.node = nodeRes.stdout.includes("True") ? "Ready" : "NotReady";
r.nodeC = nodeRes.stdout.includes("True") ? GREEN : RED;
r.encrypt = encRes.stdout.includes("Enabled") ? "yes" : "no";
r.encC = encRes.stdout.includes("Enabled") ? GREEN : RED;
r.cni = cniRes.stdout.includes("Running") ? "cilium" : "flannel";
r.cniC = cniRes.stdout.includes("Running") ? GREEN : DIM;
r.pods = podRes.stdout.trim() || "?";
} catch {
r.k3s = "unreachable"; r.k3sC = RED;
}
return r;
});
const results = await Promise.all(probes);
for (const r of results) {
console.log(
`${pad(r.host, 22)}${pad(r.ip, 16)}${pad(r.role, 8)}${r.k3sC}${pad(r.k3s, 14)}${RESET}${r.nodeC}${pad(r.node, 10)}${RESET}${r.encC}${pad(r.encrypt, 10)}${RESET}${r.cniC}${pad(r.cni, 14)}${RESET}${pad(r.pods, 6)}`,
);
}
return;
}
// Single target: detailed health check
const state = await fetchState();
const resolved = resolveTarget(target, state);
if (!resolved) {
console.error(`Cannot resolve target: ${target}`);
process.exit(1);
}
console.log(`Checking k3s health on ${resolved.hostname} (${resolved.ip})...\n`);
const k3s = new K3sModule();
const healthResult = await k3s.health({
hostname: resolved.hostname,
ip: resolved.ip,
role: resolved.role,
os: "fedora-43" as const,
arch: "x86_64" as const,
sshUser: opts.user,
...(sshKey ? { sshKeyPath: sshKey } : {}),
config: {},
});
for (const line of healthResult.output) {
console.log(` ${line}`);
}
if (healthResult.errors.length > 0) {
for (const err of healthResult.errors) {
console.error(` ERROR: ${err}`);
}
}
process.exit(healthResult.success ? 0 : 1);
});
k3sCmd
.command("list")
.description("List installed machines and their k3s status")
.option("--user <user>", "SSH user", "michal")
.action(async (opts: { user: string }) => {
let state: BastionState;
try {
state = await getLabdClient().getMachines();
} catch (err) {
console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`);
process.exit(1);
}
const entries = Object.entries(state.installed);
if (entries.length === 0) {
console.log("No installed machines.");
return;
}
const sshKey = findSshKey();
const BOLD = "\x1b[1m";
const GREEN = "\x1b[32m";
const RED = "\x1b[31m";
const DIM = "\x1b[2m";
const RESET = "\x1b[0m";
const hdr = (s: string, w: number) => s.padEnd(w);
console.log(
`${BOLD}${hdr("HOSTNAME", 28)}${hdr("IP", 18)}${hdr("ROLE", 10)}${hdr("K3S", 16)}${hdr("NODE", 12)}${hdr("PODS", 6)}${RESET}`,
);
const probes = entries.map(async ([_mac, info]) => {
const row = {
hostname: info.hostname,
ip: info.ip,
role: info.role,
k3s: "—",
node: "—",
pods: "—",
k3sColor: DIM,
nodeColor: DIM,
};
if (!info.ip || info.role === "vanilla") {
row.k3s = info.role === "vanilla" ? "n/a" : "no ip";
return row;
}
try {
const svcResult = await sshExec(info.ip, opts.user, "systemctl is-active k3s 2>/dev/null || systemctl is-active k3s-agent 2>/dev/null", {
...(sshKey ? { keyPath: sshKey } : {}),
timeoutMs: 8_000,
});
const svcStatus = svcResult.stdout.trim();
if (svcStatus === "active") {
row.k3s = "running";
row.k3sColor = GREEN;
const nodeResult = await sshExec(info.ip, opts.user,
"sudo k3s kubectl get nodes -o jsonpath='{.items[0].status.conditions[?(@.type==\"Ready\")].status}' 2>/dev/null || echo unknown",
{ ...(sshKey ? { keyPath: sshKey } : {}), timeoutMs: 8_000 },
);
const nodeReady = nodeResult.stdout.trim();
if (nodeReady.includes("True")) {
row.node = "Ready";
row.nodeColor = GREEN;
} else {
row.node = "NotReady";
row.nodeColor = RED;
}
const podResult = await sshExec(info.ip, opts.user,
"sudo k3s kubectl get pods -A --no-headers 2>/dev/null | wc -l",
{ ...(sshKey ? { keyPath: sshKey } : {}), timeoutMs: 8_000 },
);
row.pods = podResult.stdout.trim() || "?";
} else if (svcStatus === "inactive" || svcStatus === "dead") {
row.k3s = "stopped";
row.k3sColor = RED;
} else {
row.k3s = "not installed";
row.k3sColor = DIM;
}
} catch {
row.k3s = "unreachable";
row.k3sColor = RED;
}
return row;
});
const results = await Promise.all(probes);
for (const r of results) {
console.log(
`${hdr(r.hostname, 28)}${hdr(r.ip, 18)}${hdr(r.role, 10)}${r.k3sColor}${hdr(r.k3s, 16)}${RESET}${r.nodeColor}${hdr(r.node, 12)}${RESET}${hdr(r.pods, 6)}`,
);
}
});
}

View File

@@ -0,0 +1,76 @@
// labctl config — view and modify CLI configuration.
import type { Command } from "commander";
import {
loadConfig,
saveConfig,
getConfigValue,
setConfigValue,
isValidConfigKey,
CONFIG_FILE,
} from "../config/index.js";
export function registerConfigCommand(parent: Command): void {
const configCmd = parent
.command("config")
.description("View and modify CLI configuration");
// config list
configCmd
.command("list")
.description("Show all configuration values")
.action(() => {
const config = loadConfig();
console.log(`# Configuration (${CONFIG_FILE})\n`);
for (const [k, v] of Object.entries(config)) {
if (v !== undefined) {
console.log(`${k}: ${v}`);
}
}
});
// config get <key>
configCmd
.command("get <key>")
.description("Get a configuration value")
.action((key: string) => {
if (!isValidConfigKey(key)) {
console.error(`Unknown config key: ${key}`);
console.error(`Valid keys: labdUrl, certPath, keyPath, caPath, defaultEnvironment, defaultCloud, outputFormat`);
process.exit(1);
}
const config = loadConfig();
const value = getConfigValue(config, key);
if (value) {
console.log(value);
}
});
// config set <key> <value>
configCmd
.command("set <key> <value>")
.description("Set a configuration value")
.action((key: string, value: string) => {
if (!isValidConfigKey(key)) {
console.error(`Unknown config key: ${key}`);
console.error(`Valid keys: labdUrl, certPath, keyPath, caPath, defaultEnvironment, defaultCloud, outputFormat`);
process.exit(1);
}
if (key === "outputFormat" && !["table", "json", "yaml"].includes(value)) {
console.error(`Invalid output format: ${value}. Must be table, json, or yaml.`);
process.exit(1);
}
let config = loadConfig();
config = setConfigValue(config, key, value);
saveConfig(config);
console.log(`Set ${key} = ${value}`);
});
// config path
configCmd
.command("path")
.description("Show configuration file path")
.action(() => {
console.log(CONFIG_FILE);
});
}

View File

@@ -0,0 +1,126 @@
// labctl doctor — diagnose configuration and connectivity issues.
import { existsSync, readFileSync } from "node:fs";
import { X509Certificate } from "node:crypto";
import type { Command } from "commander";
import { loadConfig, CONFIG_FILE, CERT_DIR } from "../config/index.js";
interface DiagnosticResult {
name: string;
status: "ok" | "warn" | "error";
message: string;
}
const GREEN = "\x1b[32m";
const YELLOW = "\x1b[33m";
const RED = "\x1b[31m";
const RESET = "\x1b[0m";
export function registerDoctorCommand(program: Command): void {
program
.command("doctor")
.description("Diagnose configuration and connectivity issues")
.option("--json", "Output results as JSON")
.action(async (opts: { json?: boolean }) => {
const results: DiagnosticResult[] = [];
const config = loadConfig();
// Check config file
results.push({
name: "Configuration file",
status: existsSync(CONFIG_FILE) ? "ok" : "warn",
message: existsSync(CONFIG_FILE) ? CONFIG_FILE : "Using defaults — run 'labctl config set labdUrl <url>'",
});
// Check labd URL
results.push({
name: "labd URL",
status: config.labdUrl ? "ok" : "error",
message: config.labdUrl || "Not configured",
});
// Check client certificate
if (config.certPath && existsSync(config.certPath)) {
try {
const certPem = readFileSync(config.certPath, "utf-8");
const cert = new X509Certificate(certPem);
const expiresIn = new Date(cert.validTo).getTime() - Date.now();
const daysLeft = Math.floor(expiresIn / (1000 * 60 * 60 * 24));
results.push({
name: "Client certificate",
status: daysLeft > 7 ? "ok" : daysLeft > 0 ? "warn" : "error",
message: daysLeft > 0 ? `Valid for ${daysLeft} days` : "Expired!",
});
} catch {
results.push({
name: "Client certificate",
status: "error",
message: "Failed to parse certificate",
});
}
} else {
results.push({
name: "Client certificate",
status: "warn",
message: `Not configured — run 'labctl login'`,
});
}
// Check cert directory
results.push({
name: "Certificate directory",
status: existsSync(CERT_DIR) ? "ok" : "warn",
message: existsSync(CERT_DIR) ? CERT_DIR : "Not created yet",
});
// Test labd connectivity
try {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), 5000);
const resp = await fetch(`${config.labdUrl}/healthz`, {
signal: controller.signal,
});
clearTimeout(timeout);
const body = (await resp.json()) as { status?: string };
results.push({
name: "labd connectivity",
status: resp.ok ? "ok" : "warn",
message: resp.ok
? `Connected — ${body.status ?? "ok"}`
: `HTTP ${resp.status}: ${body.status ?? "unknown"}`,
});
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
results.push({
name: "labd connectivity",
status: "error",
message: msg.includes("abort")
? "Connection timed out (5s)"
: msg.includes("ECONNREFUSED")
? "Connection refused"
: msg,
});
}
// Output
if (opts.json) {
console.log(JSON.stringify(results, null, 2));
} else {
console.log("Running diagnostics...\n");
for (const r of results) {
const icon = r.status === "ok" ? "\u2713" : r.status === "warn" ? "!" : "\u2717";
const color = r.status === "ok" ? GREEN : r.status === "warn" ? YELLOW : RED;
console.log(`${color}${icon}${RESET} ${r.name}: ${r.message}`);
}
const errors = results.filter((r) => r.status === "error").length;
const warns = results.filter((r) => r.status === "warn").length;
const oks = results.filter((r) => r.status === "ok").length;
console.log(`\n${oks} passed, ${warns} warnings, ${errors} errors`);
if (errors > 0) process.exitCode = 1;
}
});
}

View File

@@ -1,35 +1,21 @@
// CLI command: provision forget
// Remove a machine from all bastion state.
// Remove a machine from all bastion state via labd.
import type { Command } from "commander";
import { getLabdClient } from "../api/config.js";
export function registerForgetCommand(parent: Command): void {
parent
.command("forget <mac>")
.description("Remove a machine from bastion state")
.option("--port <port>", "Bastion HTTP port", "8080")
.action(async (mac: string, opts: { port: string }) => {
const port = parseInt(opts.port, 10);
.action(async (mac: string) => {
const normalizedMac = mac.toLowerCase().replace(/-/g, ":");
try {
const response = await fetch(
`http://localhost:${port}/api/machines/${encodeURIComponent(normalizedMac)}`,
{ method: "DELETE" },
);
const result = await response.json() as Record<string, unknown>;
if (!response.ok) {
console.error(
`Error: ${result["error"] ?? `HTTP ${response.status}`}`,
);
process.exit(1);
}
const result = await getLabdClient().forgetMachine(normalizedMac);
console.log(JSON.stringify(result, null, 2));
} catch {
console.error(`Cannot reach bastion at localhost:${port}. Is it running?`);
} catch (err) {
console.error(`Failed: ${err instanceof Error ? err.message : String(err)}`);
process.exit(1);
}
});

View File

@@ -1,43 +1,68 @@
// CLI command: provision install
// Queue a discovered machine for Fedora installation.
// Queue a discovered machine for OS installation via labd.
import type { Command } from "commander";
import { Command, Option } from "commander";
import { isValidOsId, SUPPORTED_OS, SUPPORTED_ROLES, ROLE_REGISTRY } from "@lab/shared";
import { getLabdClient } from "../api/config.js";
function roleTable(): string {
const lines: string[] = ["", "Available roles:"];
for (const r of ROLE_REGISTRY) {
const parent = r.parent ? ` (extends ${r.parent})` : "";
const apps = r.apps.length > 0 ? ` [auto: ${r.apps.join(", ")}]` : "";
lines.push(` ${r.name.padEnd(16)} ${r.description}${parent}${apps}`);
}
return lines.join("\n");
}
export function registerInstallCommand(parent: Command): void {
parent
.command("install <mac> <hostname>")
.description("Queue a discovered machine for Fedora installation")
.option("--role <role>", "Machine role: worker or infra", "worker")
.description("Queue a discovered machine for OS installation")
.showHelpAfterError(true)
.addHelpText("after", roleTable())
.addOption(new Option("--role <role>", "Machine role (see below)").choices([...SUPPORTED_ROLES]).default("worker"))
.addOption(new Option("--os <os>", "Operating system").choices([...SUPPORTED_OS]).default("fedora-43"))
.option("--disk <device>", "Target disk device (auto-detect if omitted)")
.option("--port <port>", "Bastion HTTP port", "8080")
.action(async (mac: string, hostname: string, opts: {
role: string;
os: string;
disk?: string;
port: string;
}) => {
const port = parseInt(opts.port, 10);
const payload: Record<string, string> = {
mac,
hostname,
role: opts.role,
};
if (opts.disk !== undefined) {
payload["disk"] = opts.disk;
if (!isValidOsId(opts.os)) {
console.error(`Unknown OS: ${opts.os}. Supported: ${SUPPORTED_OS.join(", ")}`);
process.exit(1);
}
if (!(SUPPORTED_ROLES as readonly string[]).includes(opts.role)) {
console.error(`Unknown role: ${opts.role}`);
console.error(roleTable());
process.exit(1);
}
try {
const response = await fetch(`http://localhost:${port}/api/install`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(payload),
const result = await getLabdClient().installMachine({
mac,
hostname,
role: opts.role,
os: opts.os,
...(opts.disk ? { disk: opts.disk } : {}),
});
const result = await response.json() as Record<string, unknown>;
console.log(JSON.stringify(result, null, 2));
console.log("");
console.log("Power on the machine to start Fedora installation.");
} catch {
console.error(`Cannot reach bastion at localhost:${port}. Is it running?`);
const osLabel = opts.os.startsWith("ubuntu") ? "Ubuntu" : "Fedora";
console.log(`Power on the machine to start ${osLabel} installation.`);
const roleInfo = ROLE_REGISTRY.find(r => r.name === opts.role);
if (roleInfo?.k3s) {
console.log(`After install completes, k3s will be installed automatically (role=${opts.role}).`);
if (roleInfo.apps.length > 0) {
console.log(`Then: ${roleInfo.apps.join(", ")} will be deployed.`);
}
console.log(`To install k3s manually later: labctl app k3s install ${hostname}`);
}
} catch (err) {
console.error(`Failed: ${err instanceof Error ? err.message : String(err)}`);
process.exit(1);
}
});

View File

@@ -0,0 +1,298 @@
// CLI command: labctl app labcontroller deploy/status
// Deploy bastion + labd + CockroachDB to a k3s labcontroller node.
import { existsSync, writeFileSync, mkdirSync } from "node:fs";
import { homedir } from "node:os";
import { join } from "node:path";
import type { Command } from "commander";
import type { BastionState } from "@lab/shared";
import { sshExec } from "@lab/modules";
import { getLabdClient } from "../api/config.js";
function findSshKey(): string | undefined {
const sudoUser = process.env["SUDO_USER"];
const realHome = sudoUser ? join("/home", sudoUser) : homedir();
for (const name of ["id_ed25519", "id_ecdsa", "id_rsa"]) {
const p = join(realHome, ".ssh", name);
if (existsSync(p)) return p;
}
return undefined;
}
async function resolveIp(target: string): Promise<string> {
if (/^\d+\.\d+\.\d+\.\d+$/.test(target)) return target;
try {
const state = await getLabdClient().getMachines();
for (const [, info] of Object.entries(state.installed)) {
if (info.hostname === target || info.hostname.startsWith(target + ".")) {
return info.ip;
}
}
} catch { /* use target as-is */ }
return target;
}
export function registerLabcontrollerCommands(appCmd: Command): void {
const lcCmd = appCmd.command("labcontroller").description("Labcontroller deployment (bastion + labd + CockroachDB)");
lcCmd
.command("deploy <target>")
.description("Deploy labcontroller stack to a k3s node")
.option("--user <user>", "SSH user", "michal")
.option("--crdb-replicas <n>", "CockroachDB replicas", "1")
.action(async (target: string, opts: {
user: string;
crdbReplicas: string;
}) => {
const ip = await resolveIp(target);
const sshKey = findSshKey();
const sshOpts = sshKey ? { keyPath: sshKey } : {};
console.log(`Deploying labcontroller stack to ${target} (${ip})...\n`);
// 1. Fetch kubeconfig from target
console.log("[1/4] Fetching kubeconfig...");
const kcResult = await sshExec(ip, opts.user, "sudo cat /etc/rancher/k3s/k3s.yaml", { ...sshOpts, timeoutMs: 10_000 });
if (kcResult.exitCode !== 0) {
console.error(" Failed to fetch kubeconfig. Is k3s running?");
process.exit(1);
}
const kubeconfigDir = join(homedir(), ".kube");
mkdirSync(kubeconfigDir, { recursive: true });
const contextName = `lab-${target}`;
const kubeconfig = kcResult.stdout
.replace(/server:\s*https:\/\/127\.0\.0\.1:6443/, `server: https://${ip}:6443`)
.replace(/name:\s*default/g, `name: ${contextName}`)
.replace(/cluster:\s*default/g, `cluster: ${contextName}`)
.replace(/user:\s*default/g, `user: ${contextName}`);
const tmpPath = join(kubeconfigDir, `.lab-${target}-tmp`);
writeFileSync(tmpPath, kubeconfig, { mode: 0o600 });
const mainConfig = join(kubeconfigDir, "config");
const { spawnSync } = await import("node:child_process");
const mergeResult = spawnSync("kubectl", ["config", "view", "--flatten"], {
encoding: "utf-8",
stdio: ["pipe", "pipe", "pipe"],
env: { ...process.env, KUBECONFIG: `${mainConfig}:${tmpPath}` },
});
if (mergeResult.status === 0 && mergeResult.stdout) {
writeFileSync(mainConfig, mergeResult.stdout, { mode: 0o600 });
spawnSync("kubectl", ["config", "use-context", contextName], {
stdio: "pipe",
env: { ...process.env, KUBECONFIG: mainConfig },
});
console.log(` Merged into ~/.kube/config as context "${contextName}"`);
console.log(` Active context set to "${contextName}"`);
} else {
writeFileSync(join(kubeconfigDir, `lab-${target}`), kubeconfig, { mode: 0o600 });
console.log(` Saved to ~/.kube/lab-${target} (merge failed, use KUBECONFIG=~/.kube/lab-${target})`);
}
try { const { unlinkSync } = await import("node:fs"); unlinkSync(tmpPath); } catch { /* ignore */ }
console.log("");
// 2. Apply CockroachDB manifests
console.log("[2/4] Deploying CockroachDB...");
const { cockroachDbManifests } = await import("@lab/modules/dist/modules/labcontroller/src/cockroachdb.js");
const crdb = cockroachDbManifests({ replicas: parseInt(opts.crdbReplicas, 10) });
const manifests = [crdb.namespace, crdb.headlessService, crdb.clientService, crdb.statefulSet];
for (const manifest of manifests) {
const json = JSON.stringify(manifest);
const kind = (manifest as { kind?: string }).kind ?? "?";
const name = ((manifest as { metadata?: { name?: string } }).metadata)?.name ?? "?";
const result = await sshExec(ip, opts.user,
`echo '${json.replace(/'/g, "'\\''")}' | sudo k3s kubectl apply -f -`,
{ ...sshOpts, timeoutMs: 15_000 },
);
if (result.exitCode === 0) {
console.log(` applied ${kind}/${name}`);
} else {
console.error(` FAILED ${kind}/${name}: ${result.stderr.trim()}`);
}
}
console.log(" Waiting for CockroachDB pod...");
const waitResult = await sshExec(ip, opts.user,
"sudo k3s kubectl wait --for=condition=Ready pod -l app=cockroachdb -n lab-system --timeout=120s 2>/dev/null || echo 'still starting'",
{ ...sshOpts, timeoutMs: 130_000 },
);
console.log(` ${waitResult.stdout.trim()}`);
console.log(" Initializing CockroachDB cluster...");
const initJson = JSON.stringify(crdb.initJob);
await sshExec(ip, opts.user,
`echo '${initJson.replace(/'/g, "'\\''")}' | sudo k3s kubectl apply -f - 2>/dev/null; sudo k3s kubectl wait --for=condition=Complete job/cockroachdb-init -n lab-system --timeout=60s 2>/dev/null || echo 'init may already be done'`,
{ ...sshOpts, timeoutMs: 70_000 },
);
await sshExec(ip, opts.user,
"sudo k3s kubectl exec cockroachdb-0 -n lab-system -- /cockroach/cockroach sql --insecure -e 'CREATE DATABASE IF NOT EXISTS lab' 2>/dev/null || echo 'db may already exist'",
{ ...sshOpts, timeoutMs: 15_000 },
);
console.log(" CockroachDB ready\n");
// 3. Deploy labd
console.log("[3/4] Deploying labd...");
const { labdManifests } = await import("@lab/modules/dist/modules/labcontroller/src/labd.js");
const labd = labdManifests({ databaseUrl: crdb.connectionString });
for (const manifest of [labd.service, labd.deployment]) {
const json = JSON.stringify(manifest);
const kind = (manifest as { kind?: string }).kind ?? "?";
const name = ((manifest as { metadata?: { name?: string } }).metadata)?.name ?? "?";
const result = await sshExec(ip, opts.user,
`echo '${json.replace(/'/g, "'\\''")}' | sudo k3s kubectl apply -f -`,
{ ...sshOpts, timeoutMs: 15_000 },
);
console.log(` ${result.exitCode === 0 ? "applied" : "FAILED"} ${kind}/${name}`);
}
console.log("");
// 4. Deploy bastion
console.log("[4/4] Deploying bastion (hostNetwork)...");
const { bastionManifests } = await import("@lab/modules/dist/modules/labcontroller/src/bastion.js");
const bastion = bastionManifests();
const bJson = JSON.stringify(bastion.daemonSet);
const bResult = await sshExec(ip, opts.user,
`echo '${bJson.replace(/'/g, "'\\''")}' | sudo k3s kubectl apply -f -`,
{ ...sshOpts, timeoutMs: 15_000 },
);
console.log(` ${bResult.exitCode === 0 ? "applied" : "FAILED"} DaemonSet/bastion`);
// 5. Promote host role to labcontroller via labd
console.log("Promoting host role to labcontroller...");
try {
const state = await getLabdClient().getMachines();
for (const [mac, info] of Object.entries(state.installed)) {
if (info.ip === ip || info.hostname === target) {
await getLabdClient().updateRole(mac, "labcontroller");
console.log(` ${info.hostname}: infra -> labcontroller`);
break;
}
}
} catch {
console.log(" Could not update role (labd may not be running yet)");
}
console.log("\n=== Labcontroller deployed ===");
console.log(` CockroachDB: cockroachdb-client.lab-system:26257`);
console.log(` labd: ${ip}:30100`);
console.log(` bastion: ${ip}:8080 (hostNetwork)`);
console.log(` context: lab-${target}`);
console.log(`\n Switch context: kubectl ctx lab-${target}`);
console.log(` View pods: kubectl get pods -n lab-system`);
});
lcCmd
.command("status [target]")
.description("Check labcontroller deployment status (all hosts if no target)")
.option("--user <user>", "SSH user", "michal")
.action(async (target: string | undefined, opts: { user: string }) => {
const sshKey = findSshKey();
const sshOpts = sshKey ? { keyPath: sshKey } : {};
if (!target) {
let state: BastionState;
try {
state = await getLabdClient().getMachines();
} catch (err) {
console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`);
process.exit(1);
}
const entries = Object.entries(state.installed);
if (entries.length === 0) {
console.log("No installed machines.");
return;
}
const BOLD = "\x1b[1m";
const GREEN = "\x1b[32m";
const RED = "\x1b[31m";
const DIM = "\x1b[2m";
const RESET = "\x1b[0m";
const pad = (s: string, w: number) => s.padEnd(w);
console.log(
`${BOLD}${pad("HOST", 22)}${pad("IP", 16)}${pad("ROLE", 14)}${pad("CRDB", 12)}${pad("LABD", 12)}${pad("BASTION", 12)}${pad("NS", 8)}${RESET}`,
);
interface StatusRow {
host: string; ip: string; role: string;
crdb: string; labd: string; bastion: string; ns: string;
crdbC: string; labdC: string; bastionC: string;
}
const probes = entries.map(async ([_mac, info]): Promise<StatusRow> => {
const r: StatusRow = {
host: info.hostname, ip: info.ip, role: info.role ?? "?",
crdb: "—", labd: "—", bastion: "—", ns: "—",
crdbC: DIM, labdC: DIM, bastionC: DIM,
};
if (!info.ip) return r;
try {
const result = await sshExec(info.ip, opts.user,
"sudo k3s kubectl get pods -n lab-system --no-headers -o custom-columns='NAME:.metadata.name,STATUS:.status.phase' 2>/dev/null || echo 'NO_NS'",
{ ...sshOpts, timeoutMs: 10_000 },
);
if (result.stdout.includes("NO_NS") || result.exitCode !== 0) {
r.ns = "none";
return r;
}
r.ns = "ok";
const lines = result.stdout.trim().split("\n").filter(Boolean);
for (const line of lines) {
const [name, status] = line.trim().split(/\s+/);
if (!name) continue;
const running = status === "Running" || status === "Succeeded";
const color = running ? GREEN : RED;
const label = running ? "running" : (status ?? "?").toLowerCase();
if (name.startsWith("cockroachdb-") && !name.includes("init")) {
r.crdb = label; r.crdbC = color;
} else if (name.startsWith("labd-")) {
r.labd = label; r.labdC = color;
} else if (name.startsWith("bastion-")) {
r.bastion = label; r.bastionC = color;
}
}
} catch {
r.crdb = "ssh err"; r.crdbC = RED;
}
return r;
});
const results = await Promise.all(probes);
for (const r of results) {
console.log(
`${pad(r.host, 22)}${pad(r.ip, 16)}${pad(r.role, 14)}${r.crdbC}${pad(r.crdb, 12)}${RESET}${r.labdC}${pad(r.labd, 12)}${RESET}${r.bastionC}${pad(r.bastion, 12)}${RESET}${pad(r.ns, 8)}`,
);
}
return;
}
// Specific target: show detailed pod list
const ip = await resolveIp(target);
console.log(`Labcontroller status on ${target} (${ip}):\n`);
const result = await sshExec(ip, opts.user,
"sudo k3s kubectl get pods -n lab-system -o wide 2>/dev/null || echo 'lab-system namespace not found'",
{ ...sshOpts, timeoutMs: 10_000 },
);
console.log(result.stdout);
});
}

View File

@@ -3,6 +3,7 @@
import type { Command } from "commander";
import type { BastionState } from "@lab/shared";
import { getLabdClient } from "../api/config.js";
const BOLD = "\x1b[1m";
const GREEN = "\x1b[0;32m";
@@ -24,16 +25,12 @@ export function registerListCommand(parent: Command): void {
parent
.command("list")
.description("List all known machines")
.option("--port <port>", "Bastion HTTP port", "8080")
.action(async (opts: { port: string }) => {
const port = parseInt(opts.port, 10);
.action(async () => {
let state: BastionState;
try {
const response = await fetch(`http://localhost:${port}/api/machines`);
state = (await response.json()) as BastionState;
} catch {
console.error(`Cannot reach bastion at localhost:${port}. Is it running?`);
state = await getLabdClient().getMachines();
} catch (err) {
console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`);
process.exit(1);
}

View File

@@ -0,0 +1,120 @@
// labctl login — authenticate with labd and obtain client certificate.
import { generateKeyPairSync } from "node:crypto";
import { writeFileSync, existsSync, mkdirSync, readFileSync } from "node:fs";
import { createInterface } from "node:readline";
import type { Command } from "commander";
import { loadConfig, saveConfig, CERT_DIR } from "../config/index.js";
import { join } from "node:path";
export function registerLoginCommand(program: Command): void {
program
.command("login")
.description("Authenticate with labd and obtain client certificate")
.option("--server <url>", "labd server URL")
.action(async (options: { server?: string }) => {
if (!existsSync(CERT_DIR)) {
mkdirSync(CERT_DIR, { recursive: true, mode: 0o700 });
}
const config = loadConfig();
const serverUrl = options.server ?? config.labdUrl;
const keyPath = join(CERT_DIR, "client.key");
const certPath = join(CERT_DIR, "client.crt");
const caPath = join(CERT_DIR, "ca.crt");
// 1. Generate keypair if not exists
if (!existsSync(keyPath)) {
console.log("Generating client keypair...");
const { privateKey } = generateKeyPairSync("ec", {
namedCurve: "P-256",
privateKeyEncoding: { type: "pkcs8", format: "pem" },
publicKeyEncoding: { type: "spki", format: "pem" },
});
writeFileSync(keyPath, privateKey, { mode: 0o600 });
console.log(`Private key saved to ${keyPath}`);
} else {
console.log(`Using existing keypair at ${keyPath}`);
}
// 2. Read public key for CSR (simplified — send public key, labd signs)
const publicKey = readFileSync(keyPath, "utf-8");
// 3. Prompt for token
const token = await promptPassword("Enter join token: ");
if (!token) {
console.error("Token is required.");
process.exit(1);
}
// 4. Submit enrollment request
console.log(`Authenticating with ${serverUrl}...`);
try {
const resp = await fetch(`${serverUrl}/api/auth/user-enroll`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
token,
hostname: `cli-${process.env["USER"] ?? "unknown"}`,
csr: publicKey,
}),
});
if (!resp.ok) {
const body = (await resp.json().catch(() => ({}))) as Record<string, string>;
console.error(`Login failed: ${body["error"] ?? resp.statusText}`);
process.exit(1);
}
const result = (await resp.json()) as {
certificatePem?: string | null;
caPem?: string | null;
status: string;
};
if (result.certificatePem) {
writeFileSync(certPath, result.certificatePem, { mode: 0o600 });
console.log(`Client certificate saved to ${certPath}`);
}
if (result.caPem) {
writeFileSync(caPath, result.caPem, { mode: 0o644 });
console.log(`CA certificate saved to ${caPath}`);
}
// 5. Update config
saveConfig({
...config,
labdUrl: serverUrl,
certPath,
keyPath,
...(existsSync(caPath) ? { caPath } : {}),
});
console.log(`\nLogin successful! Configuration updated.`);
console.log(`Server: ${serverUrl}`);
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
if (message.includes("ECONNREFUSED") || message.includes("fetch")) {
console.error(`Cannot connect to labd at ${serverUrl}`);
console.error("Check that labd is running and the URL is correct.");
} else {
console.error(`Login failed: ${message}`);
}
process.exit(1);
}
});
}
function promptPassword(message: string): Promise<string> {
return new Promise((resolve) => {
const rl = createInterface({
input: process.stdin,
output: process.stdout,
});
rl.question(message, (answer) => {
rl.close();
resolve(answer.trim());
});
});
}

View File

@@ -0,0 +1,85 @@
// CLI command: provision logs
// Show provisioning logs for a machine via labd.
import type { Command } from "commander";
import { getLabdClient } from "../api/config.js";
/** Resolve a target (hostname, MAC, IP) to a MAC address. */
async function resolveToMac(target: string): Promise<string> {
const normalized = target.toLowerCase().replace(/-/g, ":");
// Looks like a MAC already
if (/^([0-9a-f]{2}:){5}[0-9a-f]{2}$/.test(normalized)) {
return normalized;
}
// Resolve from labd aggregated state
try {
const state = await getLabdClient().getMachines();
for (const [mac, info] of Object.entries(state.installed)) {
if (info.hostname === target || info.hostname.startsWith(target + ".") || info.ip === target) {
return mac;
}
}
for (const [mac, info] of Object.entries(state.install_queue)) {
if (info.hostname === target || info.hostname.startsWith(target + ".")) {
return mac;
}
}
for (const mac of Object.keys(state.discovered)) {
if (mac === normalized) return mac;
}
} catch { /* can't reach labd */ }
return normalized;
}
export function registerLogsCommand(parent: Command): void {
parent
.command("logs <target>")
.description("Show provisioning logs for a machine (hostname, MAC, or IP)")
.action(async (target: string) => {
const mac = await resolveToMac(target);
try {
const data = await getLabdClient().getMachineLogs(mac);
const BOLD = "\x1b[1m";
const GREEN = "\x1b[32m";
const YELLOW = "\x1b[33m";
const RED = "\x1b[31m";
const DIM = "\x1b[2m";
const RESET = "\x1b[0m";
console.log(`${BOLD}${data["hostname"]}${RESET} (${mac})`);
console.log(` Status: ${data["status"] === "installed" ? GREEN : YELLOW}${data["status"]}${RESET}`);
console.log(` Role: ${data["role"]}`);
if (data["os"]) console.log(` OS: ${data["os"]}`);
if (data["ip"]) console.log(` IP: ${data["ip"]}`);
console.log("");
const log = data["log"] as Array<{ stage: string; detail: string; timestamp: string }> | undefined;
if (log && log.length > 0) {
console.log(`${BOLD} Log:${RESET}`);
for (const entry of log) {
const time = entry.timestamp.slice(11, 19);
const color = entry.stage === "complete" ? GREEN : entry.stage === "error" ? RED : YELLOW;
const detail = entry.detail ? ` ${DIM}-- ${entry.detail}${RESET}` : "";
console.log(` ${DIM}${time}${RESET} ${color}${entry.stage}${RESET}${detail}`);
}
} else {
console.log(` ${DIM}No progress events yet (queued, waiting for PXE boot)${RESET}`);
}
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
if (msg.includes("404") || msg.includes("not found")) {
console.error(`Machine not found: ${target}`);
console.error("Run 'labctl provision list' to see available machines.");
} else {
console.error(`Cannot reach labd: ${msg}`);
}
process.exit(1);
}
});
}

View File

@@ -0,0 +1,114 @@
// CLI command: provision makeiso
// Generate/serve a UEFI-bootable iPXE ISO for machines that don't support PXE boot.
// Queries labd for connected bastions and provides the download URL.
import { readFileSync, writeFileSync, existsSync } from "node:fs";
import { createInterface } from "node:readline";
import { Command, Option } from "commander";
import { getLabdClient } from "../api/config.js";
import { buildBootIso } from "@lab/bastion/iso-builder";
function prompt(question: string): Promise<string> {
const rl = createInterface({ input: process.stdin, output: process.stdout });
return new Promise((resolve) => {
rl.question(question, (answer) => {
rl.close();
resolve(answer.trim());
});
});
}
const IPXE_PATHS: Record<string, { src: string; dest: string }> = {
x86_64: { src: "/usr/share/ipxe/ipxe-snponly-x86_64.efi", dest: "EFI/BOOT/BOOTX64.EFI" },
aarch64: { src: "/usr/share/ipxe/arm64-efi/snponly.efi", dest: "EFI/BOOT/BOOTAA64.EFI" },
};
async function selectBastion(): Promise<{ hostname: string; serverIp: string; httpPort: number }> {
const bastions = await getLabdClient().getBastions();
const online = bastions.filter(b => b.status === "online");
if (online.length === 0) {
console.error("No bastions online. Start a bastion first.");
process.exit(1);
}
if (online.length === 1) {
const b = online[0]!;
console.log(`Using bastion: ${b.hostname} (${b.serverIp})`);
return { hostname: b.hostname, serverIp: b.serverIp, httpPort: 8080 };
}
console.log("Available bastions:\n");
for (let i = 0; i < online.length; i++) {
const b = online[i]!;
console.log(` ${i + 1}) ${b.hostname} ${b.serverIp} (${b.network})`);
}
console.log("");
const answer = await prompt(`Select bastion [1-${online.length}]: `);
const idx = parseInt(answer, 10) - 1;
if (isNaN(idx) || idx < 0 || idx >= online.length) {
console.error("Invalid selection.");
process.exit(1);
}
const selected = online[idx]!;
return { hostname: selected.hostname, serverIp: selected.serverIp, httpPort: 8080 };
}
export function registerMakeIsoCommand(parent: Command): void {
parent
.command("makeiso")
.description("Generate a UEFI-bootable iPXE ISO for network provisioning")
.addOption(
new Option("--arch <arch...>", "Target architecture(s)")
.choices(["x86_64", "aarch64"])
.default(["x86_64", "aarch64"]),
)
.option("--local", "Build ISO locally instead of using bastion-hosted URL")
.option("--out <path>", "Output path for local ISO build", "ipxe-bastion.iso")
.action(async (opts: { arch: string[]; local?: boolean; out: string }) => {
const bastion = await selectBastion();
const bastionUrl = `http://${bastion.serverIp}:${bastion.httpPort}`;
if (opts.local) {
console.log(`\nGenerating iPXE boot ISO...`);
console.log(` Architectures: ${opts.arch.join(", ")}`);
console.log(` Bastion: ${bastionUrl}`);
const efiFiles: Array<{ path: string; data: Buffer }> = [];
for (const arch of opts.arch) {
const paths = IPXE_PATHS[arch];
if (!paths) {
console.error(`Unknown architecture: ${arch}`);
process.exit(1);
}
if (!existsSync(paths.src)) {
console.error(`iPXE binary not found: ${paths.src}`);
console.error(`Install: sudo dnf install ipxe-bootimgs-${arch === "aarch64" ? "aarch64" : "x86"}`);
process.exit(1);
}
efiFiles.push({ path: paths.dest, data: readFileSync(paths.src) });
console.log(` ${arch}: ${paths.dest.split("/").pop()}`);
}
const script = [
"#!ipxe",
"",
"echo Booting from iPXE ISO -- connecting to bastion...",
"dhcp || ( echo DHCP failed, retrying... && sleep 3 && dhcp )",
`chain ${bastionUrl}/boot.ipxe || shell`,
].join("\n");
const iso = buildBootIso(efiFiles, script);
writeFileSync(opts.out, iso);
console.log(`\nISO written to: ${opts.out} (${(iso.length / 1024 / 1024).toFixed(1)}MB)`);
} else {
console.log(`\nThe bastion serves a boot ISO with the correct URL embedded.`);
console.log(`Use this URL in JetKVM or any BMC virtual media:\n`);
console.log(` ${bastionUrl}/boot.iso`);
}
console.log(`\nMount as virtual CD, boot from it. iPXE will chainload from bastion.`);
});
}

View File

@@ -1,100 +1,161 @@
// CLI command: provision reprovision
// Queue a machine for reinstall and attempt SSH reboot into PXE.
// Queue a machine for reinstall and attempt SSH reboot into PXE via labd.
import { execFileSync } from "node:child_process";
import { existsSync } from "node:fs";
import { homedir } from "node:os";
import { join } from "node:path";
import type { Command } from "commander";
import { Command, Option } from "commander";
import type { BastionState } from "@lab/shared";
import { isValidOsId, SUPPORTED_OS, SUPPORTED_ROLES, ROLE_REGISTRY } from "@lab/shared";
import { getLabdClient } from "../api/config.js";
function roleTable(): string {
const lines: string[] = ["", "Available roles:"];
for (const r of ROLE_REGISTRY) {
const parent = r.parent ? ` (extends ${r.parent})` : "";
const apps = r.apps.length > 0 ? ` [auto: ${r.apps.join(", ")}]` : "";
lines.push(` ${r.name.padEnd(16)} ${r.description}${parent}${apps}`);
}
return lines.join("\n");
}
/** Resolve a target (hostname, MAC, or IP) to {mac, hostname, ip} from state. */
function resolveTarget(
target: string,
state: BastionState,
): { mac: string; hostname: string; ip: string } | null {
const normalized = target.toLowerCase().replace(/-/g, ":");
if (state.installed[normalized]) {
const info = state.installed[normalized];
return { mac: normalized, hostname: info.hostname, ip: info.ip };
}
if (state.discovered[normalized]) {
return { mac: normalized, hostname: normalized, ip: "" };
}
for (const [mac, info] of Object.entries(state.installed)) {
if (info.hostname === target || info.hostname.startsWith(target + ".")) {
return { mac, hostname: info.hostname, ip: info.ip };
}
}
for (const [mac, info] of Object.entries(state.installed)) {
if (info.ip === target) {
return { mac, hostname: info.hostname, ip: info.ip };
}
}
return null;
}
export function registerReprovisionCommand(parent: Command): void {
parent
.command("reprovision <mac> <hostname>")
.description("Queue install + SSH reboot into PXE for reprovision")
.option("--role <role>", "Machine role: worker or infra", "worker")
.command("reprovision <target> [hostname]")
.description("Queue install + SSH reboot into PXE (target: hostname, MAC, or IP)")
.showHelpAfterError(true)
.addHelpText("after", roleTable())
.addOption(new Option("--role <role>", "Machine role (see below)").choices([...SUPPORTED_ROLES]).default("worker"))
.addOption(new Option("--os <os>", "Operating system").choices([...SUPPORTED_OS]).default("fedora-43"))
.option("--disk <device>", "Target disk device (auto-detect if omitted)")
.option("--port <port>", "Bastion HTTP port", "8080")
.action(async (mac: string, hostname: string, opts: {
.action(async (target: string, hostnameOverride: string | undefined, opts: {
role: string;
os: string;
disk?: string;
port: string;
}) => {
const port = parseInt(opts.port, 10);
// Queue the install
const payload: Record<string, string> = {
mac,
hostname,
role: opts.role,
};
if (opts.disk !== undefined) {
payload["disk"] = opts.disk;
if (!isValidOsId(opts.os)) {
console.error(`Unknown OS: ${opts.os}. Supported: ${SUPPORTED_OS.join(", ")}`);
process.exit(1);
}
let state: BastionState;
try {
const installResponse = await fetch(`http://localhost:${port}/api/install`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(payload),
});
const result = await installResponse.json() as Record<string, unknown>;
console.log(JSON.stringify(result, null, 2));
} catch {
console.error(`Cannot reach bastion at localhost:${port}. Is it running?`);
if (!(SUPPORTED_ROLES as readonly string[]).includes(opts.role)) {
console.error(`Unknown role: ${opts.role}`);
console.error(roleTable());
process.exit(1);
}
// Try to find IP from installed state and SSH in to trigger PXE reboot
const client = getLabdClient();
// Resolve target from labd aggregated state
let state: BastionState;
try {
const machinesResponse = await fetch(`http://localhost:${port}/api/machines`);
state = (await machinesResponse.json()) as BastionState;
} catch {
console.log("");
console.log("Could not fetch machine state. Reboot the machine manually into PXE.");
state = await client.getMachines();
} catch (err) {
console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`);
process.exit(1);
}
const resolved = resolveTarget(target, state);
if (!resolved) {
console.error(`Cannot find machine: ${target}`);
console.error("Provide a hostname, MAC, or IP of a known machine.");
console.error("Run 'labctl provision list' to see available machines.");
process.exit(1);
}
const mac = resolved.mac;
const hostname = hostnameOverride ?? resolved.hostname;
const ip = resolved.ip;
console.log(`Reprovisioning ${hostname} (${mac})${ip ? ` at ${ip}` : ""}...`);
console.log(` Role: ${opts.role} OS: ${opts.os}`);
console.log("");
// Queue the install via labd
try {
const result = await client.installMachine({
mac,
hostname,
role: opts.role,
os: opts.os,
...(opts.disk ? { disk: opts.disk } : {}),
});
console.log(JSON.stringify(result, null, 2));
} catch (err) {
console.error(`Failed to queue install: ${err instanceof Error ? err.message : String(err)}`);
process.exit(1);
}
// Try SSH reboot into PXE
if (ip === "") {
console.log("\nNo IP known. Reboot the machine manually into PXE.");
return;
}
const installedEntry = state.installed[mac.toLowerCase().replace(/-/g, ":")];
const ip = installedEntry?.ip ?? "";
const adminUser = process.env["SUDO_USER"] ?? process.env["USER"] ?? "";
const effectiveUser = adminUser === "root" ? "" : adminUser;
if (ip !== "" && effectiveUser !== "") {
console.log("");
console.log(`Attempting SSH reboot into PXE (${effectiveUser}@${ip})...`);
// Find SSH key
const sudoUser = process.env["SUDO_USER"];
const realHome = sudoUser !== undefined
? join("/home", sudoUser)
: homedir();
const keyPaths = [
join(realHome, ".ssh", "id_ed25519"),
join(realHome, ".ssh", "id_rsa"),
join(realHome, ".ssh", "id_ecdsa"),
];
const sshKey = keyPaths.find(k => existsSync(k));
const sshArgs = [
"-o", "StrictHostKeyChecking=no",
"-o", "ConnectTimeout=10",
...(sshKey !== undefined ? ["-i", sshKey] : []),
`${effectiveUser}@${ip}`,
'PXE_ENTRY=$(sudo efibootmgr | grep -iE "pxe|network|ipv4" | head -1 | grep -oP "Boot\\K[0-9A-F]+"); if [ -n "$PXE_ENTRY" ]; then sudo efibootmgr --bootnext "$PXE_ENTRY" && echo "PXE set as next boot" && sudo reboot; else echo "No PXE boot entry found, rebooting anyway..." && sudo reboot; fi',
];
try {
execFileSync("ssh", sshArgs, { stdio: "inherit" });
} catch {
// SSH connection closing during reboot is expected
}
console.log("");
console.log("Machine is rebooting into PXE. Install will start automatically.");
} else {
console.log("");
console.log("No IP known for this machine. Reboot it manually into PXE.");
if (effectiveUser === "") {
console.log("\nReboot the machine manually into PXE.");
return;
}
console.log(`\nAttempting SSH reboot into PXE (${effectiveUser}@${ip})...`);
const sudoUser = process.env["SUDO_USER"];
const realHome = sudoUser !== undefined ? join("/home", sudoUser) : homedir();
const keyPaths = [
join(realHome, ".ssh", "id_ed25519"),
join(realHome, ".ssh", "id_rsa"),
join(realHome, ".ssh", "id_ecdsa"),
];
const sshKey = keyPaths.find(k => existsSync(k));
const sshArgs = [
"-o", "StrictHostKeyChecking=no",
"-o", "ConnectTimeout=10",
...(sshKey !== undefined ? ["-i", sshKey] : []),
`${effectiveUser}@${ip}`,
'PXE_ENTRY=$(sudo efibootmgr | grep -iE "pxe|network|ipv4" | head -1 | grep -oP "Boot\\K[0-9A-F]+"); if [ -n "$PXE_ENTRY" ]; then sudo efibootmgr --bootnext "$PXE_ENTRY" && echo "PXE set as next boot" && sudo reboot; else echo "No PXE boot entry found, rebooting anyway..." && sudo reboot; fi',
];
try {
execFileSync("ssh", sshArgs, { stdio: "inherit" });
} catch {
// SSH connection closing during reboot is expected
}
console.log("");
console.log("Machine is rebooting into PXE. Install will start automatically.");
});
}

View File

@@ -2,7 +2,7 @@
// Start the bastion server (HTTP + dnsmasq), daemonized by default.
import { spawn, type ChildProcess } from "node:child_process";
import { existsSync, readFileSync } from "node:fs";
import { existsSync, readFileSync, openSync, mkdirSync } from "node:fs";
import type { Command } from "commander";
import { startBastion } from "@lab/bastion";
@@ -34,6 +34,13 @@ export function registerStartCommand(parent: Command): void {
skipArtifacts?: boolean;
foreground?: boolean;
}) => {
// Check root early (before daemonize) so the error is visible
if (!opts.skipDnsmasq && process.getuid?.() !== 0) {
console.error("Must run as root (dnsmasq needs DHCP/TFTP ports).");
console.error("Usage: sudo labctl init bastion standalone start");
process.exit(1);
}
if (opts.foreground === true) {
// Run in foreground
await startBastion({
@@ -51,55 +58,88 @@ export function registerStartCommand(parent: Command): void {
return;
}
// Daemonize: spawn ourselves with --foreground and detach
// Daemonize: re-run with --foreground, redirect output to log file
mkdirSync(opts.dir, { recursive: true });
const logFile = `${opts.dir}/bastion.log`;
const args = process.argv.slice(1);
// Add --foreground flag
args.push("--foreground");
const child: ChildProcess = spawn(process.argv[0] ?? "labctl", args, {
// Build explicit argument list instead of re-using process.argv
// (which breaks with bun-compiled binaries)
const fgArgs = [
"init", "bastion", "standalone", "start", "--foreground",
"--port", opts.port,
"--dir", opts.dir,
"--domain", opts.domain,
"--dhcp-mode", opts.dhcpMode,
"--fedora", opts.fedora,
"--arch", opts.arch,
"--timezone", opts.timezone,
"--locale", opts.locale,
];
if (opts.skipDnsmasq) fgArgs.push("--skip-dnsmasq");
if (opts.skipArtifacts) fgArgs.push("--skip-artifacts");
// Determine how to re-invoke ourselves
const execPath = process.argv[0] ?? "labctl";
let spawnCmd: string;
let spawnArgs: string[];
if (execPath.includes("node") || execPath.includes("tsx")) {
const scriptPath = process.argv[1];
spawnCmd = execPath;
spawnArgs = scriptPath ? [scriptPath, ...fgArgs] : fgArgs;
} else {
spawnCmd = execPath;
spawnArgs = fgArgs;
}
// Open log file for the child's stdout/stderr so it survives parent exit
const logFd = openSync(logFile, "a");
const child: ChildProcess = spawn(spawnCmd, spawnArgs, {
detached: true,
stdio: ["ignore", "pipe", "pipe"],
stdio: ["ignore", logFd, logFd],
});
// Collect initial output to confirm startup
let output = "";
const timeout = setTimeout(() => {
child.stdout?.removeAllListeners();
child.stderr?.removeAllListeners();
child.unref();
console.log(`Bastion starting in background (PID ${child.pid})`);
console.log(`Log: ${logFile}`);
process.exit(0);
}, 3000);
// Wait briefly for the child to start, then check it's alive
await new Promise((resolve) => setTimeout(resolve, 3000));
child.stdout?.on("data", (data: Buffer) => {
output += data.toString();
process.stdout.write(data);
if (output.includes("Waiting for PXE boot requests")) {
clearTimeout(timeout);
child.stdout?.removeAllListeners();
child.stderr?.removeAllListeners();
child.unref();
// Check PID file
const pidFile = `${opts.dir}/bastion.pid`;
const pid = existsSync(pidFile) ? readFileSync(pidFile, "utf-8").trim() : String(child.pid);
console.log("");
console.log(`Bastion running in background (PID ${pid})`);
console.log(`Log: ${logFile}`);
process.exit(0);
// Check if child is still running
try {
process.kill(child.pid!, 0); // signal 0 = check existence
} catch {
// Child already died — show the log
console.error("Bastion failed to start. Log output:");
console.error("");
try {
const log = readFileSync(logFile, "utf-8");
const lines = log.trim().split("\n").slice(-20);
for (const line of lines) {
console.error(" " + line);
}
} catch {
console.error(" (no log output)");
}
});
process.exit(1);
}
child.stderr?.on("data", (data: Buffer) => {
process.stderr.write(data);
});
child.unref();
child.on("exit", (code) => {
clearTimeout(timeout);
console.error(`Bastion exited with code ${code}`);
process.exit(code ?? 1);
});
// Print startup info from the log
try {
const log = readFileSync(logFile, "utf-8");
process.stdout.write(log);
} catch {
// No log yet
}
const pidFile = `${opts.dir}/bastion.pid`;
const pid = existsSync(pidFile)
? readFileSync(pidFile, "utf-8").trim()
: String(child.pid);
console.log("");
console.log(`Bastion running in background (PID ${pid})`);
console.log(`Log: ${logFile}`);
process.exit(0);
});
}

View File

@@ -1,67 +1,42 @@
// CLI command: init bastion standalone status
// Check if bastion is running, show port/uptime/machine count.
// Show connected bastions and their machine counts via labd.
import { readFileSync, existsSync, statSync } from "node:fs";
import type { Command } from "commander";
import type { BastionState } from "@lab/shared";
import { getLabdClient } from "../api/config.js";
import { execSync } from "node:child_process";
function isProcessAlive(pid: number): boolean {
try {
// process.kill(pid, 0) fails for root-owned processes when run as non-root
// Use kill -0 which works across users, or check /proc
execSync(`kill -0 ${pid} 2>/dev/null || test -d /proc/${pid}`, { stdio: "pipe" });
return true;
} catch {
return false;
}
}
const BOLD = "\x1b[1m";
const GREEN = "\x1b[32m";
const RED = "\x1b[31m";
const DIM = "\x1b[2m";
const RESET = "\x1b[0m";
export function registerStatusCommand(parent: Command): void {
parent
.command("status")
.description("Show bastion server status")
.option("--dir <dir>", "Bastion data directory", "/tmp/lab-bastion")
.option("--port <port>", "Bastion HTTP port", "8080")
.action(async (opts: { dir: string; port: string }) => {
const pidFile = `${opts.dir}/bastion.pid`;
const port = parseInt(opts.port, 10);
if (!existsSync(pidFile)) {
console.log("Bastion is not running (no PID file).");
return;
}
const pid = parseInt(readFileSync(pidFile, "utf-8").trim(), 10);
if (isNaN(pid) || !isProcessAlive(pid)) {
console.log("Bastion is not running (stale PID file).");
return;
}
// Calculate uptime from PID file mtime
const pidStat = statSync(pidFile);
const uptimeMs = Date.now() - pidStat.mtimeMs;
const uptimeMin = Math.floor(uptimeMs / 60_000);
const uptimeHr = Math.floor(uptimeMin / 60);
const uptimeStr = uptimeHr > 0
? `${uptimeHr}h ${uptimeMin % 60}m`
: `${uptimeMin}m`;
console.log(`Bastion is running (PID ${pid})`);
console.log(` Port: ${port}`);
console.log(` Uptime: ${uptimeStr}`);
// Try to fetch machine count
.action(async () => {
try {
const response = await fetch(`http://localhost:${port}/api/machines`);
const state = (await response.json()) as BastionState;
const discovered = Object.keys(state.discovered).length;
const queued = Object.keys(state.install_queue).length;
const installed = Object.keys(state.installed).length;
console.log(` Machines: ${discovered} discovered, ${queued} queued, ${installed} installed`);
} catch {
console.log(" Machines: (could not reach API)");
const bastions = await getLabdClient().getBastions();
if (bastions.length === 0) {
console.log("No bastions registered.");
return;
}
const pad = (s: string, w: number) => s.padEnd(w);
console.log(
`${BOLD}${pad("HOSTNAME", 24)}${pad("NETWORK", 18)}${pad("IP", 18)}${pad("STATUS", 10)}${pad("MACHINES", 10)}${RESET}`,
);
for (const b of bastions) {
const statusColor = b.status === "online" ? GREEN : RED;
console.log(
`${pad(b.hostname, 24)}${DIM}${pad(b.network, 18)}${RESET}${pad(b.serverIp, 18)}${statusColor}${pad(b.status, 10)}${RESET}${pad(String(b.machineCount), 10)}`,
);
}
} catch (err) {
console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`);
process.exit(1);
}
});
}

View File

@@ -0,0 +1,111 @@
// CLI configuration management.
// Loads from: defaults -> ~/.labctl/config.yaml -> env vars -> CLI flags.
import { existsSync, readFileSync, writeFileSync, mkdirSync } from "node:fs";
import { homedir } from "node:os";
import { join } from "node:path";
export interface CliConfig {
labdUrl: string;
certPath?: string;
keyPath?: string;
caPath?: string;
defaultEnvironment?: string;
defaultCloud?: string;
outputFormat?: "table" | "json" | "yaml";
}
export const CONFIG_DIR = join(homedir(), ".labctl");
export const CONFIG_FILE = join(CONFIG_DIR, "config.yaml");
export const CERT_DIR = join(CONFIG_DIR, "certs");
const VALID_KEYS = new Set<keyof CliConfig>([
"labdUrl",
"certPath",
"keyPath",
"caPath",
"defaultEnvironment",
"defaultCloud",
"outputFormat",
]);
export function isValidConfigKey(key: string): key is keyof CliConfig {
return VALID_KEYS.has(key as keyof CliConfig);
}
export function loadConfig(): CliConfig {
// 1. Defaults
const config: CliConfig = {
labdUrl: "http://localhost:3100",
};
// 2. Config file overrides
if (existsSync(CONFIG_FILE)) {
try {
const raw = readFileSync(CONFIG_FILE, "utf-8");
const parsed = parseSimpleYaml(raw);
for (const [k, v] of Object.entries(parsed)) {
if (isValidConfigKey(k) && v !== "") {
(config as unknown as Record<string, string>)[k] = v;
}
}
} catch {
// Ignore malformed config
}
}
// 3. Environment variable overrides
if (process.env["LABD_URL"]) config.labdUrl = process.env["LABD_URL"];
if (process.env["LABCTL_ENV"]) config.defaultEnvironment = process.env["LABCTL_ENV"];
if (process.env["LABCTL_CLOUD"]) config.defaultCloud = process.env["LABCTL_CLOUD"];
if (process.env["LABCTL_OUTPUT"]) {
const fmt = process.env["LABCTL_OUTPUT"];
if (fmt === "table" || fmt === "json" || fmt === "yaml") {
config.outputFormat = fmt;
}
}
return config;
}
export function saveConfig(config: CliConfig): void {
if (!existsSync(CONFIG_DIR)) {
mkdirSync(CONFIG_DIR, { recursive: true, mode: 0o700 });
}
const lines: string[] = [];
for (const [k, v] of Object.entries(config)) {
if (v !== undefined) {
lines.push(`${k}: ${v}`);
}
}
writeFileSync(CONFIG_FILE, lines.join("\n") + "\n", { mode: 0o600 });
}
export function getConfigValue(config: CliConfig, key: keyof CliConfig): string {
return String(config[key] ?? "");
}
export function setConfigValue(config: CliConfig, key: keyof CliConfig, value: string): CliConfig {
return { ...config, [key]: value };
}
/** Minimal YAML parser for flat key: value files (no nested structures). */
function parseSimpleYaml(raw: string): Record<string, string> {
const result: Record<string, string> = {};
for (const line of raw.split("\n")) {
const trimmed = line.trim();
if (!trimmed || trimmed.startsWith("#")) continue;
const idx = trimmed.indexOf(":");
if (idx === -1) continue;
const key = trimmed.slice(0, idx).trim();
let value = trimmed.slice(idx + 1).trim();
if (
(value.startsWith('"') && value.endsWith('"')) ||
(value.startsWith("'") && value.endsWith("'"))
) {
value = value.slice(1, -1);
}
result[key] = value;
}
return result;
}

View File

@@ -5,8 +5,9 @@
// provision list/install/reprovision/forget
import { fileURLToPath } from "node:url";
import { Command } from "commander";
import { Command, Option } from "commander";
import { APP_VERSION } from "@lab/shared";
import { loadConfig } from "./config/index.js";
import { registerStartCommand } from "./commands/serve.js";
import { registerStopCommand } from "./commands/stop.js";
import { registerStatusCommand } from "./commands/status.js";
@@ -14,15 +15,65 @@ import { registerInstallCommand } from "./commands/install.js";
import { registerListCommand } from "./commands/list.js";
import { registerReprovisionCommand } from "./commands/reprovision.js";
import { registerForgetCommand } from "./commands/forget.js";
import { registerLogsCommand } from "./commands/logs.js";
import { registerMakeIsoCommand } from "./commands/makeiso.js";
import { registerConfigCommand } from "./commands/config.js";
import { registerLoginCommand } from "./commands/login.js";
import { registerDoctorCommand } from "./commands/doctor.js";
import { registerAppCommand } from "./commands/app.js";
import { ROLE_REGISTRY } from "@lab/shared";
export function createProgram(): Command {
const program = new Command();
program
.name("labctl")
.description("Lab PXE Bastion -- discover-first bare-metal provisioning")
.description("Lab infrastructure management CLI")
.version(APP_VERSION);
// Global options
program
.addOption(
new Option("-o, --output <format>", "output format")
.choices(["table", "json", "yaml"])
.default("table"),
)
.option("--server <url>", "override labd server URL")
.option("--env <name>", "override default environment")
.option("--cloud <name>", "override default cloud")
.option("--debug", "enable debug output")
.option("--no-color", "disable colored output");
// preAction hook: load config, apply CLI overrides, store merged config
program.hook("preAction", (thisCommand) => {
const config = loadConfig();
const opts = thisCommand.opts();
if (opts.output) config.outputFormat = opts.output;
if (opts.server) config.labdUrl = opts.server;
if (opts.env) config.defaultEnvironment = opts.env;
if (opts.cloud) config.defaultCloud = opts.cloud;
if (opts.debug) {
process.env["DEBUG"] = "1";
}
if (opts.color === false) {
process.env["NO_COLOR"] = "1";
}
thisCommand.setOptionValue("_config", config);
});
// version subcommand
program
.command("version")
.description("Show version information")
.action(() => {
console.log(`labctl ${APP_VERSION}`);
console.log(`node ${process.version}`);
console.log(`platform ${process.platform} ${process.arch}`);
});
// init bastion standalone start/stop/status
const initCmd = program.command("init");
initCmd.description("Initialise infrastructure components");
@@ -45,6 +96,39 @@ export function createProgram(): Command {
registerInstallCommand(provisionCmd);
registerReprovisionCommand(provisionCmd);
registerForgetCommand(provisionCmd);
registerLogsCommand(provisionCmd);
registerMakeIsoCommand(provisionCmd);
// config list/get/set/path
registerConfigCommand(program);
// login
registerLoginCommand(program);
// doctor
registerDoctorCommand(program);
// app k3s install/health + labcontroller
registerAppCommand(program);
// roles — quick reference
program
.command("roles")
.description("List available machine roles")
.action(() => {
const BOLD = "\x1b[1m";
const DIM = "\x1b[2m";
const RESET = "\x1b[0m";
const pad = (s: string, w: number) => s.padEnd(w);
console.log(`${BOLD}${pad("ROLE", 18)}${pad("EXTENDS", 12)}${pad("K3S", 6)}${pad("AUTO-DEPLOY", 30)}DESCRIPTION${RESET}`);
for (const r of ROLE_REGISTRY) {
const k3s = r.k3s ? "yes" : "no";
const apps = r.apps.length > 0 ? r.apps.join(", ") : "—";
const parent = r.parent ?? "—";
console.log(`${pad(r.name, 18)}${DIM}${pad(parent, 12)}${RESET}${pad(k3s, 6)}${pad(apps, 30)}${r.description}`);
}
});
return program;
}

View File

@@ -0,0 +1,27 @@
// Public API for CLI utility functions.
export {
parseResource,
formatResource,
validateServerName,
type ResourceType,
type ResourceIdentifier,
} from "./resource.js";
export {
printTable,
formatStatus,
formatRelativeTime,
formatOutput,
serverColumns,
roleColumns,
type TableColumn,
type Role,
} from "./table.js";
export {
isInteractive,
confirmAction,
promptInput,
promptPassword,
} from "./prompts.js";

View File

@@ -0,0 +1,48 @@
// Interactive CLI prompts with non-interactive mode support.
import { createInterface } from "node:readline";
export function isInteractive(): boolean {
if (process.env["LABCTL_YES"] === "true") return false;
if (process.env["CI"] === "true") return false;
return Boolean(process.stdin.isTTY);
}
export async function confirmAction(
message: string,
defaultValue = false,
): Promise<boolean> {
if (!isInteractive()) return true;
const hint = defaultValue ? "[Y/n]" : "[y/N]";
const answer = await prompt(`${message} ${hint} `);
if (answer === "") return defaultValue;
return answer.toLowerCase().startsWith("y");
}
export async function promptInput(
message: string,
defaultValue?: string,
): Promise<string> {
if (!isInteractive() && defaultValue !== undefined) return defaultValue;
const suffix = defaultValue ? ` (${defaultValue})` : "";
const answer = await prompt(`${message}${suffix}: `);
return answer || defaultValue || "";
}
export async function promptPassword(message: string): Promise<string> {
return prompt(message);
}
function prompt(message: string): Promise<string> {
return new Promise((resolve) => {
const rl = createInterface({
input: process.stdin,
output: process.stdout,
});
rl.question(message, (answer) => {
rl.close();
resolve(answer.trim());
});
});
}

View File

@@ -0,0 +1,129 @@
// Resource name parsing and validation utilities.
// Handles "type/name" and "type/namespace/name" resource identifiers
// used throughout the CLI for addressing lab platform objects.
/** All valid resource types in the lab platform. */
export type ResourceType =
| "server"
| "app"
| "cluster"
| "role"
| "user"
| "pulumi"
| "bastion"
| "agent"
| "audit";
const VALID_RESOURCE_TYPES: ReadonlySet<string> = new Set<ResourceType>([
"server",
"app",
"cluster",
"role",
"user",
"pulumi",
"bastion",
"agent",
"audit",
]);
/** A parsed resource identifier: type with name and optional namespace. */
export interface ResourceIdentifier {
type: ResourceType;
name: string;
namespace?: string;
}
/**
* Parse a resource string into a structured identifier.
*
* Accepted formats:
* "server/myhost" -> { type: "server", name: "myhost" }
* "app/production/frontend" -> { type: "app", name: "frontend", namespace: "production" }
*
* @throws Error if the input does not match the expected format or the type is unknown.
*/
export function parseResource(input: string): ResourceIdentifier {
const match = /^([a-z-]+)\/(.+)$/.exec(input);
if (!match) {
throw new Error(
`Invalid resource format: "${input}". Expected "type/name" or "type/namespace/name".`,
);
}
const rawType = match[1]!;
const rest = match[2]!;
if (!VALID_RESOURCE_TYPES.has(rawType)) {
const valid = [...VALID_RESOURCE_TYPES].join(", ");
throw new Error(
`Unknown resource type "${rawType}". Valid types: ${valid}.`,
);
}
const type = rawType as ResourceType;
// If rest contains a slash, split into namespace/name
const slashIndex = rest.indexOf("/");
if (slashIndex !== -1) {
const namespace = rest.slice(0, slashIndex);
const name = rest.slice(slashIndex + 1);
if (!namespace || !name) {
throw new Error(
`Invalid resource format: "${input}". Namespace and name must not be empty.`,
);
}
return { type, name, namespace };
}
return { type, name: rest };
}
/**
* Format a resource identifier back into its string representation.
*
* Returns "type/name" or "type/namespace/name" depending on whether
* a namespace is present.
*/
export function formatResource(resource: ResourceIdentifier): string {
if (resource.namespace !== undefined) {
return `${resource.type}/${resource.namespace}/${resource.name}`;
}
return `${resource.type}/${resource.name}`;
}
/** Hostname validation pattern: lowercase alphanumeric with dots and hyphens. */
const HOSTNAME_PATTERN = /^[a-z0-9][a-z0-9.-]*[a-z0-9]$/;
/**
* Validate that a server name is a legal hostname.
*
* Rules:
* - Must start and end with a lowercase letter or digit
* - May contain lowercase letters, digits, dots, and hyphens
* - Single-character names (one lowercase letter or digit) are allowed
*
* @throws Error if the name is not a valid hostname.
*/
export function validateServerName(name: string): void {
if (name.length === 0) {
throw new Error("Server name must not be empty.");
}
// Single character: just check it's alphanumeric
if (name.length === 1) {
if (!/^[a-z0-9]$/.test(name)) {
throw new Error(
`Invalid server name "${name}". Must contain only lowercase letters, digits, dots, and hyphens, ` +
"and must start and end with a letter or digit.",
);
}
return;
}
if (!HOSTNAME_PATTERN.test(name)) {
throw new Error(
`Invalid server name "${name}". Must contain only lowercase letters, digits, dots, and hyphens, ` +
"and must start and end with a letter or digit.",
);
}
}

View File

@@ -0,0 +1,267 @@
// Table formatting utilities for CLI output.
// Uses plain ANSI escape codes and string padding — no external dependencies.
import type { Server } from "../api/types.js";
// ---------------------------------------------------------------------------
// ANSI escape codes
// ---------------------------------------------------------------------------
const BOLD = "\x1b[1m";
const RESET = "\x1b[0m";
const GREEN = "\x1b[32m";
const RED = "\x1b[31m";
const YELLOW = "\x1b[33m";
const CYAN = "\x1b[36m";
// ---------------------------------------------------------------------------
// TableColumn interface
// ---------------------------------------------------------------------------
/** Describes a single column in a formatted table. */
export interface TableColumn<T> {
/** Column header label. */
header: string;
/** Property key on T, or a function that extracts the cell value from a row. */
accessor: keyof T | ((row: T) => string);
/** Fixed column width (defaults to max of header width and widest cell). */
width?: number;
/** Text alignment within the cell. Defaults to 'left'. */
align?: "left" | "center" | "right";
}
// ---------------------------------------------------------------------------
// printTable
// ---------------------------------------------------------------------------
/** Resolve a cell value from a row using a column's accessor. */
function cellValue<T>(row: T, accessor: TableColumn<T>["accessor"]): string {
if (typeof accessor === "function") {
return accessor(row);
}
const raw = row[accessor];
if (raw === null || raw === undefined) return "-";
return String(raw);
}
/** Pad a string to a given width respecting the requested alignment. */
function padCell(text: string, width: number, align: "left" | "center" | "right"): string {
if (text.length >= width) return text.slice(0, width);
switch (align) {
case "right":
return text.padStart(width);
case "center": {
const total = width - text.length;
const left = Math.floor(total / 2);
return " ".repeat(left) + text + " ".repeat(total - left);
}
case "left":
default:
return text.padEnd(width);
}
}
/**
* Format and print a table to stdout.
*
* Columns are separated by two spaces. The header row is bold. Column widths
* are auto-calculated from the data unless explicitly set via `width`.
*/
export function printTable<T>(data: T[], columns: TableColumn<T>[]): void {
// Compute effective widths: max(header, longest cell, explicit width).
const widths = columns.map((col) => {
const headerLen = col.header.length;
const maxCell = data.reduce((max, row) => {
const len = cellValue(row, col.accessor).length;
return len > max ? len : max;
}, 0);
const auto = Math.max(headerLen, maxCell);
return col.width !== undefined ? Math.max(col.width, headerLen) : auto;
});
const gap = " ";
// Header
const headerLine = columns
.map((col, i) => padCell(col.header, widths[i]!, col.align ?? "left"))
.join(gap);
console.log(`${BOLD}${headerLine}${RESET}`);
// Rows
for (const row of data) {
const line = columns
.map((col, i) => padCell(cellValue(row, col.accessor), widths[i]!, col.align ?? "left"))
.join(gap);
console.log(line);
}
}
// ---------------------------------------------------------------------------
// formatStatus
// ---------------------------------------------------------------------------
/**
* Return a status string wrapped in the appropriate ANSI colour code.
*
* - green: online, installed
* - red: offline, error
* - yellow: queued, installing, provisioning
* - cyan: discovered
*/
export function formatStatus(status: string): string {
const lower = status.toLowerCase();
switch (lower) {
case "online":
case "installed":
return `${GREEN}${status}${RESET}`;
case "offline":
case "error":
return `${RED}${status}${RESET}`;
case "queued":
case "installing":
case "provisioning":
return `${YELLOW}${status}${RESET}`;
case "discovered":
return `${CYAN}${status}${RESET}`;
default:
return status;
}
}
// ---------------------------------------------------------------------------
// formatRelativeTime
// ---------------------------------------------------------------------------
/**
* Convert a timestamp into a human-friendly relative string such as
* "2m ago", "3h ago", or "5d ago". Returns "-" for null / undefined.
*/
export function formatRelativeTime(timestamp: Date | string | null): string {
if (timestamp === null || timestamp === undefined) return "-";
const date = typeof timestamp === "string" ? new Date(timestamp) : timestamp;
const now = Date.now();
const diffMs = now - date.getTime();
if (diffMs < 0) return "just now";
const seconds = Math.floor(diffMs / 1000);
if (seconds < 60) return `${seconds}s ago`;
const minutes = Math.floor(seconds / 60);
if (minutes < 60) return `${minutes}m ago`;
const hours = Math.floor(minutes / 60);
if (hours < 24) return `${hours}h ago`;
const days = Math.floor(hours / 24);
return `${days}d ago`;
}
// ---------------------------------------------------------------------------
// Predefined column sets
// ---------------------------------------------------------------------------
/** Role-like object used for predefined roleColumns. */
export interface Role {
name: string;
description: string;
permissions: string[];
}
/** Predefined columns for listing Server objects. */
export const serverColumns: TableColumn<Server>[] = [
{ header: "NAME", accessor: "hostname" },
{ header: "CLOUD", accessor: "cloud" },
{ header: "ENV", accessor: "environment" },
{ header: "ROLE", accessor: "role" },
{ header: "STATUS", accessor: (s) => formatStatus(s.status) },
{ header: "LAST SEEN", accessor: (s) => formatRelativeTime(s.lastHeartbeat) },
];
/** Predefined columns for listing Role objects. */
export const roleColumns: TableColumn<Role>[] = [
{ header: "NAME", accessor: "name" },
{ header: "DESCRIPTION", accessor: "description" },
{ header: "PERMISSIONS", accessor: (r) => String(r.permissions.length) },
];
// ---------------------------------------------------------------------------
// formatOutput — multi-format output dispatcher
// ---------------------------------------------------------------------------
/**
* Render an array of objects in the requested output format.
*
* - `table`: delegates to {@link printTable}. Requires `columns`.
* - `json`: pretty-prints with 2-space indent.
* - `yaml`: simple key/value serialisation (no external dependency).
*/
export function formatOutput<T>(
data: T[],
format: "table" | "json" | "yaml",
columns?: TableColumn<T>[],
): void {
switch (format) {
case "json":
console.log(JSON.stringify(data, null, 2));
break;
case "yaml":
for (const [idx, item] of data.entries()) {
if (idx > 0) console.log("---");
serializeYaml(item as Record<string, unknown>, 0);
}
break;
case "table":
if (!columns || columns.length === 0) {
// Fallback: dump as JSON when no columns provided.
console.log(JSON.stringify(data, null, 2));
return;
}
printTable(data, columns);
break;
}
}
// ---------------------------------------------------------------------------
// Minimal YAML serialiser (no dependency)
// ---------------------------------------------------------------------------
function serializeYaml(obj: unknown, indent: number): void {
const prefix = " ".repeat(indent);
if (obj === null || obj === undefined) {
console.log(`${prefix}null`);
return;
}
if (typeof obj !== "object") {
console.log(`${prefix}${String(obj)}`);
return;
}
if (Array.isArray(obj)) {
for (const item of obj) {
if (typeof item === "object" && item !== null) {
console.log(`${prefix}-`);
serializeYaml(item, indent + 1);
} else {
console.log(`${prefix}- ${String(item)}`);
}
}
return;
}
for (const [key, value] of Object.entries(obj as Record<string, unknown>)) {
if (value === null || value === undefined) {
console.log(`${prefix}${key}: null`);
} else if (typeof value === "object") {
console.log(`${prefix}${key}:`);
serializeYaml(value, indent + 1);
} else {
console.log(`${prefix}${key}: ${String(value)}`);
}
}
}

View File

@@ -0,0 +1,56 @@
// Tests for LabdApiError.
import { describe, it, expect } from "vitest";
import { LabdApiError, isLabdApiError } from "../src/api/errors.js";
describe("LabdApiError", () => {
it("constructs with status code and message", () => {
const err = new LabdApiError(404, "Not found");
expect(err.statusCode).toBe(404);
expect(err.message).toBe("Not found");
expect(err.errorCode).toBe("NOT_FOUND");
});
it("fromResponse parses error body", () => {
const err = LabdApiError.fromResponse(400, {
error: "Invalid input",
detail: "hostname required",
});
expect(err.statusCode).toBe(400);
expect(err.message).toBe("Invalid input");
expect(err.detail).toBe("hostname required");
});
it("fromResponse handles non-object body", () => {
const err = LabdApiError.fromResponse(500, "plain text");
expect(err.statusCode).toBe(500);
expect(err.message).toBe("HTTP 500");
});
it("notConnected creates connection error", () => {
const err = LabdApiError.notConnected("https://localhost:8443");
expect(err.statusCode).toBe(0);
expect(err.errorCode).toBe("CONNECTION_ERROR");
expect(err.message).toContain("localhost:8443");
});
it("timeout creates timeout error", () => {
const err = LabdApiError.timeout(30000);
expect(err.message).toContain("30000ms");
});
});
describe("isLabdApiError", () => {
it("returns true for LabdApiError", () => {
expect(isLabdApiError(new LabdApiError(500, "err"))).toBe(true);
});
it("returns false for regular Error", () => {
expect(isLabdApiError(new Error("nope"))).toBe(false);
});
it("returns false for non-errors", () => {
expect(isLabdApiError(null)).toBe(false);
expect(isLabdApiError("string")).toBe(false);
});
});

View File

@@ -0,0 +1,53 @@
// Tests for CLI configuration management.
import { describe, it, expect, beforeEach, afterEach } from "vitest";
import { mkdirSync, writeFileSync, rmSync, existsSync } from "node:fs";
import { join } from "node:path";
import { tmpdir } from "node:os";
// We can't easily test loadConfig() because it uses homedir() for paths.
// Instead, test the parsing and validation logic directly.
import { isValidConfigKey } from "../src/config/index.js";
describe("isValidConfigKey", () => {
it("accepts valid keys", () => {
expect(isValidConfigKey("labdUrl")).toBe(true);
expect(isValidConfigKey("certPath")).toBe(true);
expect(isValidConfigKey("keyPath")).toBe(true);
expect(isValidConfigKey("caPath")).toBe(true);
expect(isValidConfigKey("defaultEnvironment")).toBe(true);
expect(isValidConfigKey("defaultCloud")).toBe(true);
expect(isValidConfigKey("outputFormat")).toBe(true);
});
it("rejects invalid keys", () => {
expect(isValidConfigKey("password")).toBe(false);
expect(isValidConfigKey("")).toBe(false);
expect(isValidConfigKey("LABD_URL")).toBe(false);
});
});
describe("CLI program creation", () => {
it("creates program with all commands", async () => {
const { createProgram } = await import("../src/index.js");
const program = createProgram();
// Check top-level commands exist
const commandNames = program.commands.map((c) => c.name());
expect(commandNames).toContain("version");
expect(commandNames).toContain("init");
expect(commandNames).toContain("provision");
expect(commandNames).toContain("config");
expect(commandNames).toContain("login");
expect(commandNames).toContain("doctor");
});
it("has global options", async () => {
const { createProgram } = await import("../src/index.js");
const program = createProgram();
const optionNames = program.options.map((o) => o.long);
expect(optionNames).toContain("--output");
expect(optionNames).toContain("--server");
expect(optionNames).toContain("--debug");
});
});

View File

@@ -0,0 +1,71 @@
// Tests for resource name parsing utilities.
import { describe, it, expect } from "vitest";
import {
parseResource,
formatResource,
validateServerName,
} from "../src/utils/resource.js";
describe("parseResource", () => {
it("parses server/name", () => {
const r = parseResource("server/labmaster");
expect(r.type).toBe("server");
expect(r.name).toBe("labmaster");
expect(r.namespace).toBeUndefined();
});
it("parses app/namespace/name", () => {
const r = parseResource("app/kube-system/nginx");
expect(r.type).toBe("app");
expect(r.namespace).toBe("kube-system");
expect(r.name).toBe("nginx");
});
it("parses all valid types", () => {
const types = ["server", "app", "cluster", "role", "user", "pulumi", "bastion", "agent", "audit"];
for (const t of types) {
expect(parseResource(`${t}/name`).type).toBe(t);
}
});
it("throws on invalid format", () => {
expect(() => parseResource("noslash")).toThrow("Invalid resource format");
});
it("throws on unknown type", () => {
expect(() => parseResource("unknown/name")).toThrow("Unknown resource type");
});
});
describe("formatResource", () => {
it("formats simple resource", () => {
expect(formatResource({ type: "server", name: "w1" })).toBe("server/w1");
});
it("formats namespaced resource", () => {
expect(
formatResource({ type: "app", namespace: "default", name: "nginx" }),
).toBe("app/default/nginx");
});
it("roundtrips with parseResource", () => {
const input = "server/labmaster";
expect(formatResource(parseResource(input))).toBe(input);
});
});
describe("validateServerName", () => {
it("accepts valid hostnames", () => {
expect(() => validateServerName("worker-1")).not.toThrow();
expect(() => validateServerName("web.cluster.local")).not.toThrow();
expect(() => validateServerName("a")).not.toThrow();
});
it("rejects invalid hostnames", () => {
expect(() => validateServerName("-start")).toThrow();
expect(() => validateServerName("end-")).toThrow();
expect(() => validateServerName("")).toThrow();
expect(() => validateServerName("has space")).toThrow();
});
});

View File

@@ -0,0 +1,197 @@
// Smoke tests for bastion CLI commands.
// These tests spawn real processes and verify they work end-to-end.
import { describe, it, expect, afterEach } from "vitest";
import { spawn, execSync, type ChildProcess } from "node:child_process";
import { existsSync, readFileSync, mkdirSync, rmSync } from "node:fs";
import { join } from "node:path";
import { tmpdir } from "node:os";
const CLI_PATH = join(import.meta.dirname, "..", "src", "index.ts");
const TEST_DIR = join(tmpdir(), `lab-bastion-smoke-${process.pid}`);
const PID_FILE = join(TEST_DIR, "bastion.pid");
const LOG_FILE = join(TEST_DIR, "bastion.log");
const TEST_PORT = 18932; // Unlikely to conflict
function runCli(args: string[], timeoutMs = 10_000): Promise<{ code: number; stdout: string; stderr: string }> {
return new Promise((resolve, reject) => {
const child = spawn("node", ["--import", "tsx", CLI_PATH, ...args], {
timeout: timeoutMs,
env: { ...process.env, NODE_NO_WARNINGS: "1" },
});
let stdout = "";
let stderr = "";
child.stdout.on("data", (d: Buffer) => { stdout += d.toString(); });
child.stderr.on("data", (d: Buffer) => { stderr += d.toString(); });
child.on("close", (code) => {
resolve({ code: code ?? 1, stdout, stderr });
});
child.on("error", reject);
});
}
function sleep(ms: number): Promise<void> {
return new Promise((r) => setTimeout(r, ms));
}
function killPid(pid: number): void {
try { process.kill(pid, "SIGTERM"); } catch { /* already dead */ }
}
describe("bastion smoke tests", () => {
let daemonPid: number | undefined;
afterEach(() => {
// Kill any daemon we started
if (daemonPid) {
killPid(daemonPid);
daemonPid = undefined;
}
// Also try PID file
try {
const pid = parseInt(readFileSync(PID_FILE, "utf-8").trim(), 10);
if (!isNaN(pid)) killPid(pid);
} catch { /* no pid file */ }
// Clean up test directory
try { rmSync(TEST_DIR, { recursive: true, force: true }); } catch { /* ignore */ }
});
it("--help prints usage without error", async () => {
const result = await runCli(["--help"]);
expect(result.code).toBe(0);
expect(result.stdout).toContain("labctl");
expect(result.stdout).toContain("Commands:");
});
it("--version prints version", async () => {
const result = await runCli(["--version"]);
expect(result.code).toBe(0);
expect(result.stdout.trim()).toMatch(/^\d+\.\d+\.\d+$/);
});
it("version subcommand prints detailed info", async () => {
const result = await runCli(["version"]);
expect(result.code).toBe(0);
expect(result.stdout).toContain("labctl");
expect(result.stdout).toContain("node");
expect(result.stdout).toContain("platform");
});
it("config list works without config file", async () => {
const result = await runCli(["config", "list"]);
expect(result.code).toBe(0);
expect(result.stdout).toContain("labdUrl");
});
it("config path prints a path", async () => {
const result = await runCli(["config", "path"]);
expect(result.code).toBe(0);
expect(result.stdout.trim()).toContain(".labctl");
});
it("start without root prints helpful error", async () => {
// Only run if we're NOT root (CI may run as root)
if (process.getuid?.() === 0) return;
const result = await runCli([
"init", "bastion", "standalone", "start",
"--dir", TEST_DIR,
"--port", String(TEST_PORT),
]);
expect(result.code).toBe(1);
expect(result.stderr).toContain("root");
expect(result.stderr).toContain("sudo");
});
it("foreground start with --skip-dnsmasq --skip-artifacts works and stays alive", async () => {
mkdirSync(TEST_DIR, { recursive: true });
// Start in foreground as a child process
const child = spawn(
"node",
[
"--import", "tsx",
CLI_PATH,
"init", "bastion", "standalone", "start", "--foreground",
"--skip-dnsmasq", "--skip-artifacts",
"--dir", TEST_DIR,
"--port", String(TEST_PORT),
],
{
env: { ...process.env, NODE_NO_WARNINGS: "1" },
stdio: ["ignore", "pipe", "pipe"],
},
);
daemonPid = child.pid;
// Collect output
let stdout = "";
child.stdout.on("data", (d: Buffer) => { stdout += d.toString(); });
let stderr = "";
child.stderr.on("data", (d: Buffer) => { stderr += d.toString(); });
// Wait for the server to start (look for the banner)
const startedAt = Date.now();
const maxWait = 10_000;
while (Date.now() - startedAt < maxWait) {
if (stdout.includes("Waiting for PXE boot requests")) break;
await sleep(200);
}
expect(stdout).toContain("Waiting for PXE boot requests");
expect(stdout).toContain("HTTP server listening");
// Verify the process is still alive after startup
await sleep(1000);
let alive = false;
try {
process.kill(child.pid!, 0);
alive = true;
} catch { /* dead */ }
expect(alive).toBe(true);
// Verify PID file was created
expect(existsSync(PID_FILE)).toBe(true);
const pidFromFile = parseInt(readFileSync(PID_FILE, "utf-8").trim(), 10);
expect(pidFromFile).toBe(child.pid);
// Verify HTTP server responds
try {
const resp = await fetch(`http://127.0.0.1:${TEST_PORT}/api/machines`);
expect(resp.ok).toBe(true);
} catch (err) {
// If fetch fails, that's a real problem
throw new Error(`HTTP server not responding: ${err}`);
}
// Clean shutdown
child.kill("SIGTERM");
await new Promise<void>((resolve) => {
child.on("close", () => resolve());
setTimeout(resolve, 3000);
});
daemonPid = undefined;
}, 20_000);
it("status shows bastion info or reports labd unreachable", async () => {
const result = await runCli([
"init", "bastion", "standalone", "status",
]);
// Status queries labd — may show bastions (if labd running) or error (if not)
const output = result.stdout + result.stderr;
expect(output).toMatch(/HOSTNAME|Cannot reach labd|No bastions/i);
});
it("doctor runs without crashing", async () => {
const result = await runCli(["doctor"]);
// Doctor may report errors (no labd running) but should not crash
expect(result.code).toBeLessThanOrEqual(1); // 0 = all ok, 1 = errors found
expect(result.stdout).toContain("diagnostics");
});
});

View File

@@ -8,6 +8,7 @@
"include": ["src/**/*.ts"],
"references": [
{ "path": "../shared" },
{ "path": "../bastion" }
{ "path": "../bastion" },
{ "path": "../modules" }
]
}

View File

@@ -0,0 +1,24 @@
{
"name": "@lab/agent",
"version": "0.1.0",
"private": true,
"type": "module",
"main": "./dist/main.js",
"types": "./dist/main.d.ts",
"scripts": {
"build": "tsc --build",
"clean": "rimraf dist"
},
"dependencies": {
"@lab/shared": "workspace:*",
"winston": "^3.17.0",
"winston-daily-rotate-file": "^5.0.0",
"ws": "^8.19.0"
},
"devDependencies": {
"@types/node": "^22.14.1",
"@types/ws": "^8.18.1",
"rimraf": "^6.1.3",
"typescript": "^5.9.3"
}
}

View File

@@ -0,0 +1,10 @@
/**
* @lab/agent — Lab agent daemon entry point.
*
* For now this module re-exports the command executor so it can be consumed
* by other packages in the monorepo.
*/
export { CommandExecutor } from "./services/executor.js";
export type { ExecOptions, ExecResult } from "./services/executor.js";
export { AgentConnection, type ConnectionConfig, type ConnectionState, DEFAULT_CONNECTION_CONFIG } from "./services/connection.js";

View File

@@ -0,0 +1,157 @@
// Agent WebSocket connection to labd with heartbeat and reconnection.
import { EventEmitter } from "node:events";
import { hostname } from "node:os";
import { readFileSync } from "node:fs";
import WebSocket from "ws";
import type { AgentMessage, ServerMessage } from "@lab/shared";
import { parseServerMessage } from "@lab/shared";
export type ConnectionState = "disconnected" | "connecting" | "connected" | "reconnecting";
export interface ConnectionConfig {
labdUrl: string;
certPath: string;
keyPath: string;
caPath?: string;
heartbeatIntervalMs: number;
reconnectBaseDelayMs: number;
reconnectMaxDelayMs: number;
}
export const DEFAULT_CONNECTION_CONFIG: Partial<ConnectionConfig> = {
heartbeatIntervalMs: 10_000,
reconnectBaseDelayMs: 1_000,
reconnectMaxDelayMs: 30_000,
};
export class AgentConnection extends EventEmitter {
private ws: WebSocket | null = null;
private heartbeatTimer: NodeJS.Timeout | null = null;
private reconnectAttempts = 0;
private isClosing = false;
private _state: ConnectionState = "disconnected";
constructor(private config: ConnectionConfig) {
super();
}
get state(): ConnectionState {
return this._state;
}
isConnected(): boolean {
return this._state === "connected";
}
async connect(): Promise<void> {
if (this.isClosing) return;
this.setState(this.reconnectAttempts > 0 ? "reconnecting" : "connecting");
const wsUrl = this.config.labdUrl.replace("https:", "wss:").replace("http:", "ws:") + "/ws/agent";
try {
this.ws = new WebSocket(wsUrl, {
cert: readFileSync(this.config.certPath),
key: readFileSync(this.config.keyPath),
ca: this.config.caPath ? readFileSync(this.config.caPath) : undefined,
rejectUnauthorized: true,
});
this.ws.on("open", () => {
this.reconnectAttempts = 0;
this.setState("connected");
this.startHeartbeat();
this.emit("connected");
});
this.ws.on("message", (data: Buffer) => {
try {
const message = parseServerMessage(data.toString());
this.handleMessage(message);
this.emit("message", message);
} catch {
// Ignore unparseable messages
}
});
this.ws.on("close", (_code: number, _reason: Buffer) => {
this.stopHeartbeat();
this.setState("disconnected");
this.emit("disconnected");
this.scheduleReconnect();
});
this.ws.on("error", (_error: Error) => {
// Error is followed by close event, so reconnect happens there
});
} catch {
this.scheduleReconnect();
}
}
send(message: AgentMessage): void {
if (this.ws?.readyState === WebSocket.OPEN) {
this.ws.send(JSON.stringify(message));
}
}
close(): void {
this.isClosing = true;
this.stopHeartbeat();
this.ws?.close();
this.setState("disconnected");
}
private handleMessage(message: ServerMessage): void {
if (message.type === "server-shutdown") {
this.isClosing = true; // Don't reconnect
this.emit("shutdown", message.reconnectAfter);
}
}
private startHeartbeat(): void {
this.stopHeartbeat();
this.heartbeatTimer = setInterval(() => {
this.send({
type: "heartbeat",
hostname: hostname(),
uptime: process.uptime(),
version: process.env["npm_package_version"] ?? "0.0.0",
memUsage: process.memoryUsage().heapUsed,
cpuUsage: 0, // Simplified — os.loadavg() not available everywhere
});
}, this.config.heartbeatIntervalMs);
}
private stopHeartbeat(): void {
if (this.heartbeatTimer) {
clearInterval(this.heartbeatTimer);
this.heartbeatTimer = null;
}
}
private scheduleReconnect(): void {
if (this.isClosing) return;
const delay = Math.min(
this.config.reconnectBaseDelayMs * Math.pow(2, this.reconnectAttempts),
this.config.reconnectMaxDelayMs,
);
this.reconnectAttempts++;
this.setState("reconnecting");
setTimeout(() => {
void this.connect();
}, delay);
}
private setState(state: ConnectionState): void {
if (this._state !== state) {
this._state = state;
this.emit("stateChange", state);
}
}
}

View File

@@ -0,0 +1,161 @@
import { EventEmitter } from "node:events";
import { spawn, type ChildProcess } from "node:child_process";
/** Options for executing a command. */
export interface ExecOptions {
/** The command and its arguments, e.g. ["ls", "-la"]. */
command: string[];
/** Maximum execution time in milliseconds. */
timeout: number;
/** Whether to allocate a pseudo-TTY. */
tty: boolean;
/** Optional environment variables (merged with process.env). */
env?: Record<string, string>;
/** Optional working directory. */
cwd?: string;
}
/** Result returned after a command finishes. */
export interface ExecResult {
exitCode: number;
stdout: string;
stderr: string;
timedOut: boolean;
signal?: string | undefined;
}
export interface CommandExecutorEvents {
stdout: [requestId: string, chunk: Buffer];
stderr: [requestId: string, chunk: Buffer];
}
/**
* Executes commands in a sandboxed child process with timeout handling
* and streaming output via events.
*/
export class CommandExecutor extends EventEmitter<CommandExecutorEvents> {
private readonly processes = new Map<string, ChildProcess>();
/** Grace period between SIGTERM and SIGKILL when a timeout fires (ms). */
private static readonly KILL_GRACE_MS = 5_000;
/**
* Execute a command and return its result once it exits.
*
* While the process is running, `stdout` and `stderr` events are emitted
* with `(requestId, chunk)` so callers can stream output in real time.
*/
execute(requestId: string, options: ExecOptions): Promise<ExecResult> {
const { command, timeout, tty, env, cwd } = options;
const [cmd, ...args] = command;
if (cmd === undefined) {
return Promise.resolve({
exitCode: 1,
stdout: "",
stderr: "Empty command",
timedOut: false,
});
}
return new Promise<ExecResult>((resolve) => {
const child = spawn(cmd, args, {
cwd,
env: env ? { ...process.env, ...env } : undefined,
stdio: tty ? ["pipe", "pipe", "pipe"] : ["pipe", "pipe", "pipe"],
// When TTY support is needed the caller should use node-pty or
// similar; for now we always use pipe-based stdio.
});
this.processes.set(requestId, child);
let stdoutBuf = "";
let stderrBuf = "";
let timedOut = false;
let killTimer: ReturnType<typeof setTimeout> | undefined;
// -- Streaming output ------------------------------------------------
child.stdout?.on("data", (chunk: Buffer) => {
stdoutBuf += chunk.toString();
this.emit("stdout", requestId, chunk);
});
child.stderr?.on("data", (chunk: Buffer) => {
stderrBuf += chunk.toString();
this.emit("stderr", requestId, chunk);
});
// -- Timeout handling -------------------------------------------------
const timeoutTimer = setTimeout(() => {
timedOut = true;
// Graceful shutdown first.
child.kill("SIGTERM");
// If the process does not exit within the grace period, force-kill.
killTimer = setTimeout(() => {
child.kill("SIGKILL");
}, CommandExecutor.KILL_GRACE_MS);
}, timeout);
// -- Completion -------------------------------------------------------
child.on("close", (code, signal) => {
clearTimeout(timeoutTimer);
if (killTimer !== undefined) {
clearTimeout(killTimer);
}
this.processes.delete(requestId);
resolve({
exitCode: code ?? 1,
stdout: stdoutBuf,
stderr: stderrBuf,
timedOut,
signal: signal ?? undefined,
});
});
child.on("error", (err) => {
clearTimeout(timeoutTimer);
if (killTimer !== undefined) {
clearTimeout(killTimer);
}
this.processes.delete(requestId);
resolve({
exitCode: 1,
stdout: stdoutBuf,
stderr: err.message,
timedOut: false,
});
});
});
}
/**
* Send a signal to a running process.
*
* @returns `true` if the process was found and the signal was sent.
*/
sendSignal(requestId: string, signal: NodeJS.Signals): boolean {
const child = this.processes.get(requestId);
if (!child) {
return false;
}
return child.kill(signal);
}
/**
* Write data to the stdin of a running process.
*
* @returns `true` if the process was found and stdin was writable.
*/
writeStdin(requestId: string, data: string): boolean {
const child = this.processes.get(requestId);
if (!child?.stdin || child.stdin.destroyed) {
return false;
}
return child.stdin.write(data);
}
}

View File

@@ -0,0 +1,38 @@
import winston from "winston";
import DailyRotateFile from "winston-daily-rotate-file";
const LOG_DIR = process.env["LOG_DIR"] ?? "/var/log/lab-agent";
const logger = winston.createLogger({
level: process.env["LOG_LEVEL"] ?? "info",
format: winston.format.combine(
winston.format.timestamp(),
winston.format.json(),
),
transports: [
new winston.transports.Console({
format: winston.format.combine(
winston.format.colorize(),
winston.format.simple(),
),
}),
new DailyRotateFile({
dirname: LOG_DIR,
filename: "agent-%DATE%.log",
maxSize: "20m",
maxFiles: "14d",
}),
],
});
/**
* Create a child logger scoped to a specific component.
*
* The returned logger inherits all transports and configuration from the root
* logger but attaches a `component` metadata field to every log entry.
*/
export function createChildLogger(component: string): winston.Logger {
return logger.child({ component });
}
export { logger };

View File

@@ -0,0 +1,111 @@
// Tests for CommandExecutor.
import { describe, it, expect } from "vitest";
import { CommandExecutor } from "../src/services/executor.js";
describe("CommandExecutor", () => {
it("executes a simple command", async () => {
const exec = new CommandExecutor();
const result = await exec.execute("req-1", {
command: ["echo", "hello"],
timeout: 5000,
tty: false,
});
expect(result.exitCode).toBe(0);
expect(result.stdout.trim()).toBe("hello");
expect(result.timedOut).toBe(false);
});
it("captures stderr", async () => {
const exec = new CommandExecutor();
const result = await exec.execute("req-2", {
command: ["sh", "-c", "echo err >&2"],
timeout: 5000,
tty: false,
});
expect(result.exitCode).toBe(0);
expect(result.stderr.trim()).toBe("err");
});
it("returns non-zero exit code", async () => {
const exec = new CommandExecutor();
const result = await exec.execute("req-3", {
command: ["sh", "-c", "exit 42"],
timeout: 5000,
tty: false,
});
expect(result.exitCode).toBe(42);
});
it("times out long-running commands", async () => {
const exec = new CommandExecutor();
const result = await exec.execute("req-4", {
command: ["sleep", "60"],
timeout: 200,
tty: false,
});
expect(result.timedOut).toBe(true);
}, 10_000);
it("emits stdout events for streaming", async () => {
const exec = new CommandExecutor();
const chunks: string[] = [];
exec.on("stdout", (_reqId: string, chunk: string) => {
chunks.push(chunk);
});
await exec.execute("req-5", {
command: ["echo", "streamed"],
timeout: 5000,
tty: false,
});
expect(chunks.join("").trim()).toBe("streamed");
});
it("sends signal to running process", async () => {
const exec = new CommandExecutor();
// Start a long process
const promise = exec.execute("req-6", {
command: ["sleep", "60"],
timeout: 30000,
tty: false,
});
// Give it time to start
await new Promise((r) => setTimeout(r, 100));
const sent = exec.sendSignal("req-6", "SIGTERM");
expect(sent).toBe(true);
const result = await promise;
expect(result.exitCode).not.toBe(0);
}, 10_000);
it("sendSignal returns false for unknown request", () => {
const exec = new CommandExecutor();
expect(exec.sendSignal("nonexistent", "SIGTERM")).toBe(false);
});
it("uses custom cwd", async () => {
const exec = new CommandExecutor();
const result = await exec.execute("req-7", {
command: ["pwd"],
timeout: 5000,
tty: false,
cwd: "/tmp",
});
expect(result.stdout.trim()).toBe("/tmp");
});
it("uses custom env", async () => {
const exec = new CommandExecutor();
const result = await exec.execute("req-8", {
command: ["sh", "-c", "echo $MY_VAR"],
timeout: 5000,
tty: false,
env: { MY_VAR: "test_value" },
});
expect(result.stdout.trim()).toBe("test_value");
});
});

View File

@@ -0,0 +1,12 @@
{
"extends": "../../tsconfig.base.json",
"compilerOptions": {
"rootDir": "src",
"outDir": "dist",
"composite": true
},
"include": ["src/**/*.ts"],
"references": [
{ "path": "../shared" }
]
}

View File

@@ -16,18 +16,29 @@
"clean": "rimraf dist",
"dev": "tsx src/main.ts",
"db:push": "prisma db push",
"db:migrate": "prisma migrate dev",
"db:generate": "prisma generate"
"db:generate": "prisma generate",
"db:migrate:dev": "prisma migrate dev",
"db:migrate:deploy": "prisma migrate deploy",
"db:migrate:reset": "prisma migrate reset",
"db:seed": "tsx prisma/seed.ts",
"db:studio": "prisma studio"
},
"dependencies": {
"@fastify/rate-limit": "^10.3.0",
"@fastify/websocket": "^11.0.2",
"@lab/shared": "workspace:*",
"@prisma/client": "^6.9.0",
"fastify": "^5.3.3",
"@fastify/websocket": "^11.0.2",
"winston": "^3.17.0"
"winston": "^3.17.0",
"ws": "^8.19.0",
"zod": "^4.3.6"
},
"prisma": {
"seed": "tsx prisma/seed.ts"
},
"devDependencies": {
"@types/node": "^22.14.1",
"@types/ws": "^8.18.1",
"prisma": "^6.9.0",
"rimraf": "^6.1.3",
"tsx": "^4.21.0",

View File

@@ -133,6 +133,17 @@ model PulumiRun {
@@index([stackName])
}
model Bastion {
id String @id @default(uuid())
hostname String @unique
network String
serverIp String
status String @default("offline") // online, offline
lastHeartbeat DateTime?
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
}
model Cluster {
id String @id @default(uuid())
name String @unique

View File

@@ -0,0 +1,113 @@
import { PrismaClient } from "@prisma/client";
const db = new PrismaClient();
async function main() {
console.log("Seeding database with default RBAC roles and permissions...");
// --- Admin role: full wildcard access ---
const admin = await db.role.upsert({
where: { name: "admin" },
update: {
description: "Full administrative access to all resources",
},
create: {
name: "admin",
description: "Full administrative access to all resources",
permissions: {
create: [
{
type: "allow",
action: "*",
cloud: "*",
environment: "*",
server: "*",
},
],
},
},
});
console.log(` Upserted role: ${admin.name} (${admin.id})`);
// --- Viewer role: read-only access ---
const viewer = await db.role.upsert({
where: { name: "viewer" },
update: {
description: "Read-only access to all resources",
},
create: {
name: "viewer",
description: "Read-only access to all resources",
permissions: {
create: [
{
type: "allow",
action: "read",
cloud: "*",
environment: "*",
server: "*",
},
],
},
},
});
console.log(` Upserted role: ${viewer.name} (${viewer.id})`);
// --- Operator role: read/exec/kubectl allowed, destroy denied ---
const operator = await db.role.upsert({
where: { name: "operator" },
update: {
description:
"Operational access: read, exec, and kubectl — destroy denied",
},
create: {
name: "operator",
description:
"Operational access: read, exec, and kubectl — destroy denied",
permissions: {
create: [
{
type: "allow",
action: "read",
cloud: "*",
environment: "*",
server: "*",
},
{
type: "allow",
action: "exec",
cloud: "*",
environment: "*",
server: "*",
},
{
type: "allow",
action: "kubectl",
cloud: "*",
environment: "*",
server: "*",
},
{
type: "deny",
action: "destroy",
cloud: "*",
environment: "*",
server: "*",
},
],
},
},
});
console.log(` Upserted role: ${operator.name} (${operator.id})`);
console.log("Seed complete.");
}
main()
.catch((error) => {
console.error("Seed failed:", error);
process.exit(1);
})
.finally(async () => {
await db.$disconnect();
});

View File

@@ -4,59 +4,54 @@
import { loadConfig } from "./config.js";
import { createApp } from "./server.js";
import { logger } from "./services/logger.js";
import { setupGracefulShutdown } from "./services/shutdown.js";
async function main(): Promise<void> {
const config = loadConfig();
// Initialize Prisma client (wrapped in try/catch for when DB isn't available)
let db;
let db: import("./server.js").DbClient;
try {
const { PrismaClient } = await import("@prisma/client");
const prisma = new PrismaClient({
datasources: config.databaseUrl
? { db: { url: config.databaseUrl } }
: undefined,
});
const prisma = config.databaseUrl
? new PrismaClient({ datasources: { db: { url: config.databaseUrl } } })
: new PrismaClient();
await prisma.$connect();
logger.info("Database connected");
db = prisma;
db = prisma as unknown as import("./server.js").DbClient;
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
logger.warn(`Database not available: ${message}`);
logger.warn("Running without database -- some features will be unavailable");
// Create a stub db client that returns errors for all operations
const dbError = (): never => {
throw new Error("Database not connected");
};
db = {
$queryRaw: async () => {
throw new Error("Database not connected");
},
$queryRaw: () => dbError(),
$disconnect: async () => {},
server: {
findMany: async () => {
throw new Error("Database not connected");
},
findUnique: async () => {
throw new Error("Database not connected");
},
findMany: () => dbError(),
findUnique: () => dbError(),
},
joinToken: {
findUnique: async () => {
throw new Error("Database not connected");
},
findMany: async () => {
throw new Error("Database not connected");
},
create: async () => {
throw new Error("Database not connected");
},
update: async () => {
throw new Error("Database not connected");
},
findUnique: () => dbError(),
findMany: () => dbError(),
create: () => dbError(),
update: () => dbError(),
},
bastion: {
upsert: () => dbError(),
findMany: () => dbError(),
findUnique: () => dbError(),
update: () => dbError(),
},
};
}
// Create Fastify app
const { app } = createApp(config, db);
const { app } = await createApp(config, db);
// Start server
try {
@@ -68,18 +63,7 @@ async function main(): Promise<void> {
}
// Graceful shutdown
const shutdown = async (): Promise<void> => {
logger.info("Shutting down...");
await app.close();
if (db !== null && "$disconnect" in db) {
await (db as { $disconnect: () => Promise<void> }).$disconnect();
}
logger.info("Goodbye");
process.exit(0);
};
process.on("SIGINT", () => void shutdown());
process.on("SIGTERM", () => void shutdown());
setupGracefulShutdown({ app, db });
// Keep process alive
await new Promise(() => {});

View File

@@ -0,0 +1,50 @@
// Rate limiting middleware for labd API.
// Applies global rate limits and stricter limits for sensitive routes.
import type { FastifyInstance } from "fastify";
import rateLimit from "@fastify/rate-limit";
import { logger } from "../services/logger.js";
/** Routes that require stricter rate limiting. */
const SENSITIVE_ROUTE_LIMITS: Record<string, number> = {
"/api/auth/enroll": 10,
"/api/tokens": 20,
};
/**
* Register the @fastify/rate-limit plugin with global defaults
* and apply stricter limits to sensitive routes.
*/
export async function setupRateLimiting(
app: FastifyInstance,
): Promise<void> {
await app.register(rateLimit, {
max: 100,
timeWindow: "1 minute",
keyGenerator: (request) => request.ip,
errorResponseBuilder: (_request, context) => ({
error: "Too many requests",
code: "RATE_LIMITED",
retryAfter: context.after,
}),
});
// Apply stricter per-route limits for sensitive endpoints.
app.addHook("onRoute", (routeOptions) => {
const url = routeOptions.url;
for (const [prefix, max] of Object.entries(SENSITIVE_ROUTE_LIMITS)) {
if (url.startsWith(prefix)) {
routeOptions.config = {
...routeOptions.config,
rateLimit: {
max,
timeWindow: "1 minute",
},
};
logger.info(`Rate limit: ${url} -> ${max} req/min`);
break;
}
}
});
}

View File

@@ -0,0 +1,20 @@
// Agent connection routes.
// GET /api/agents — list currently connected agents (excludes raw socket)
import type { FastifyInstance } from "fastify";
import { agentRegistry } from "../services/agent-registry.js";
export function registerAgentRoutes(app: FastifyInstance): void {
app.get("/api/agents", async (_request, reply) => {
const agents = agentRegistry.getAllConnected().map((agent) => ({
serverId: agent.serverId,
hostname: agent.hostname,
connectedAt: agent.connectedAt,
lastHeartbeat: agent.lastHeartbeat,
version: agent.version,
certFingerprint: agent.certFingerprint,
}));
return reply.send(agents);
});
}

View File

@@ -0,0 +1,207 @@
// Bastion management routes.
// GET /api/bastions — list connected bastions
// GET /api/machines — aggregated machines from all bastions
// POST /api/machines/install — queue install on correct bastion
// DELETE /api/machines/:mac — forget machine on correct bastion
// POST /api/machines/role — update role on correct bastion
// GET /api/machines/:mac/logs — get provision logs from correct bastion
import type { FastifyInstance } from "fastify";
import type { DbClient } from "../server.js";
import { bastionRegistry } from "../services/bastion-registry.js";
import { generateRequestId } from "@lab/shared";
const COMMAND_TIMEOUT_MS = 15_000;
/** Send a command to a bastion and wait for the response. */
function sendCommand(
bastionId: string,
msg: Record<string, unknown>,
): Promise<{ status: string; data?: unknown; error?: string | undefined }> {
const bastion = bastionRegistry.getById(bastionId);
if (!bastion) {
return Promise.reject(new Error(`Bastion ${bastionId} not connected`));
}
const requestId = generateRequestId();
const fullMsg = { ...msg, requestId };
return new Promise((resolve, reject) => {
const timeout = setTimeout(() => {
cleanup();
reject(new Error("Command timed out"));
}, COMMAND_TIMEOUT_MS);
const handler = (data: Buffer) => {
try {
const parsed = JSON.parse(data.toString()) as { type: string; requestId?: string; status?: string; data?: unknown; error?: string };
if (parsed.type === "command-response" && parsed.requestId === requestId) {
cleanup();
resolve({ status: parsed.status ?? "ok", data: parsed.data, error: parsed.error });
}
} catch { /* not our message */ }
};
const cleanup = () => {
clearTimeout(timeout);
bastion.socket.off("message", handler);
};
bastion.socket.on("message", handler);
bastion.socket.send(JSON.stringify(fullMsg));
});
}
export function registerBastionRoutes(app: FastifyInstance, db: DbClient): void {
// List all bastions (DB records enriched with online status from registry)
app.get("/api/bastions", async () => {
const dbBastions = await db.bastion.findMany() as Array<{
id: string; hostname: string; network: string; serverIp: string;
status: string; lastHeartbeat: Date | null; createdAt: Date;
}>;
return dbBastions.map((b) => {
const connected = bastionRegistry.getById(b.id);
return {
id: b.id,
hostname: b.hostname,
network: b.network,
serverIp: b.serverIp,
status: connected ? "online" : "offline",
lastHeartbeat: connected?.lastHeartbeat ?? b.lastHeartbeat,
connectedAt: connected?.connectedAt,
machineCount: connected
? Object.keys(connected.state.discovered).length +
Object.keys(connected.state.install_queue).length +
Object.keys(connected.state.installed).length
: 0,
createdAt: b.createdAt,
};
});
});
// Aggregated machines from all connected bastions
app.get("/api/machines", async () => {
return bastionRegistry.getAggregatedState();
});
// Queue install — route to correct bastion by MAC
app.post<{
Body: { mac?: string; hostname?: string; disk?: string; role?: string; os?: string };
}>("/api/machines/install", async (request, reply) => {
const { mac, hostname, disk, role, os } = request.body ?? {};
if (!mac || !hostname) {
return reply.code(400).send({ error: "mac and hostname are required" });
}
// Find bastion that knows this MAC, or let caller specify
const bastion = bastionRegistry.findBastionByMac(mac);
if (!bastion) {
// If only one bastion is connected, use it
const all = bastionRegistry.getAll();
if (all.length === 0) {
return reply.code(503).send({ error: "No bastions connected" });
}
if (all.length === 1) {
try {
const result = await sendCommand(all[0]!.bastionId, {
type: "command-install",
mac, hostname, disk: disk ?? "/dev/sda", role: role ?? "infra", os: os ?? "fedora-43",
});
return reply.code(result.status === "ok" ? 200 : 500).send(result);
} catch (err) {
return reply.code(500).send({ error: err instanceof Error ? err.message : String(err) });
}
}
return reply.code(404).send({ error: `MAC ${mac} not found on any bastion` });
}
try {
const result = await sendCommand(bastion.bastionId, {
type: "command-install",
mac, hostname, disk: disk ?? "/dev/sda", role: role ?? "infra", os: os ?? "fedora-43",
});
return reply.code(result.status === "ok" ? 200 : 500).send(result);
} catch (err) {
return reply.code(500).send({ error: err instanceof Error ? err.message : String(err) });
}
});
// Forget machine
app.delete<{ Params: { mac: string } }>("/api/machines/:mac", async (request, reply) => {
const mac = request.params.mac.toLowerCase().replace(/-/g, ":");
const bastion = bastionRegistry.findBastionByMac(mac);
if (!bastion) {
return reply.code(404).send({ error: `MAC ${mac} not found on any bastion` });
}
try {
const result = await sendCommand(bastion.bastionId, { type: "command-forget", mac });
return reply.send(result);
} catch (err) {
return reply.code(500).send({ error: err instanceof Error ? err.message : String(err) });
}
});
// Update role
app.post<{
Body: { mac?: string; role?: string };
}>("/api/machines/role", async (request, reply) => {
const { mac, role } = request.body ?? {};
if (!mac || !role) {
return reply.code(400).send({ error: "mac and role are required" });
}
const normalized = mac.toLowerCase().replace(/-/g, ":");
const bastion = bastionRegistry.findBastionByMac(normalized);
if (!bastion) {
return reply.code(404).send({ error: `MAC ${normalized} not found on any bastion` });
}
try {
const result = await sendCommand(bastion.bastionId, { type: "command-role-update", mac: normalized, role });
return reply.send(result);
} catch (err) {
return reply.code(500).send({ error: err instanceof Error ? err.message : String(err) });
}
});
// Machine logs (snapshot from bastion's state)
app.get<{ Params: { mac: string } }>("/api/machines/:mac/logs", async (request, reply) => {
const mac = request.params.mac.toLowerCase().replace(/-/g, ":");
const bastion = bastionRegistry.findBastionByMac(mac);
if (!bastion) {
return reply.code(404).send({ error: `MAC ${mac} not found` });
}
const queued = bastion.state.install_queue[mac];
const installed = bastion.state.installed[mac];
if (installed) {
return {
mac,
hostname: installed.hostname,
status: "installed",
role: installed.role,
ip: installed.ip,
installed_at: installed.installed_at,
};
}
if (queued) {
return {
mac,
hostname: queued.hostname,
status: queued.progress ? "installing" : "queued",
progress: queued.progress,
progress_detail: queued.progress_detail,
progress_at: queued.progress_at,
role: queued.role,
os: queued.os,
log: queued.log,
};
}
return reply.code(404).send({ error: `MAC ${mac} not found in install queue or installed` });
});
}

View File

@@ -1,10 +1,85 @@
// Health check routes.
import type { FastifyInstance } from "fastify";
import { performance } from "node:perf_hooks";
import type { DbClient } from "../server.js";
import { agentRegistry } from "../services/agent-registry.js";
import { isShuttingDownNow } from "../services/shutdown.js";
// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
export interface ComponentStatus {
status: "up" | "down" | "degraded";
latency?: number;
message?: string;
}
export interface HealthStatus {
status: "healthy" | "degraded" | "unhealthy";
version: string;
uptime: number;
timestamp: string;
components: {
database: ComponentStatus;
agents: ComponentStatus;
};
}
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
async function checkDatabase(db: DbClient): Promise<ComponentStatus> {
const start = performance.now();
try {
await db.$queryRaw`SELECT 1`;
const latency = Math.round((performance.now() - start) * 100) / 100;
return { status: "up", latency };
} catch (err) {
const latency = Math.round((performance.now() - start) * 100) / 100;
const message = err instanceof Error ? err.message : "Unknown error";
return { status: "down", latency, message };
}
}
function checkAgents(): ComponentStatus {
const count = agentRegistry.getConnectedCount();
return {
status: count > 0 ? "up" : "degraded",
message: `${count} agent(s) connected`,
};
}
function aggregateStatus(
components: HealthStatus["components"],
): { status: HealthStatus["status"]; statusCode: number } {
const statuses = Object.values(components);
if (statuses.some((c) => c.status === "down")) {
return { status: "unhealthy", statusCode: 503 };
}
if (statuses.some((c) => c.status === "degraded")) {
return { status: "degraded", statusCode: 200 };
}
return { status: "healthy", statusCode: 200 };
}
// ---------------------------------------------------------------------------
// Route registration
// ---------------------------------------------------------------------------
export function registerHealthRoutes(app: FastifyInstance, db: DbClient): void {
// ---- existing /healthz (preserved for backward compat) ------------------
app.get("/healthz", async (_request, reply) => {
if (isShuttingDownNow()) {
return reply.code(503).send({
status: "shutting_down",
uptime: process.uptime(),
timestamp: new Date().toISOString(),
});
}
let dbOk = false;
try {
await db.$queryRaw`SELECT 1`;
@@ -25,4 +100,45 @@ export function registerHealthRoutes(app: FastifyInstance, db: DbClient): void {
},
});
});
// ---- GET /health — simple probe for k8s --------------------------------
app.get("/health", async (_request, reply) => {
return reply.code(200).send({ status: "ok" });
});
// ---- GET /health/detailed — full component check -----------------------
app.get("/health/detailed", async (_request, reply) => {
const database = await checkDatabase(db);
const agents = checkAgents();
const components = { database, agents };
const { status, statusCode } = aggregateStatus(components);
const body: HealthStatus = {
status,
version: process.env["LABD_VERSION"] ?? "0.1.0",
uptime: process.uptime(),
timestamp: new Date().toISOString(),
components,
};
return reply.code(statusCode).send(body);
});
// ---- GET /health/live — liveness probe ---------------------------------
app.get("/health/live", async (_request, reply) => {
return reply.code(200).send({ status: "alive" });
});
// ---- GET /health/ready — readiness probe (needs DB) --------------------
app.get("/health/ready", async (_request, reply) => {
const database = await checkDatabase(db);
if (database.status === "down") {
return reply.code(503).send({
status: "not_ready",
reason: database.message,
});
}
return reply.code(200).send({ status: "ready" });
});
}

View File

@@ -7,28 +7,43 @@ import { logger } from "./services/logger.js";
import { registerHealthRoutes } from "./routes/health.js";
import { registerServerRoutes } from "./routes/servers.js";
import { registerAuthRoutes } from "./routes/auth.js";
import { registerAgentRoutes } from "./routes/agents.js";
import { registerBastionRoutes } from "./routes/bastions.js";
import { setupRateLimiting } from "./middleware/rate-limit.js";
import { bastionRegistry } from "./services/bastion-registry.js";
import { isBastionMessage } from "@lab/shared";
export interface DbClient {
$queryRaw: (query: TemplateStringsArray) => Promise<unknown>;
$queryRaw: (...args: unknown[]) => Promise<unknown>;
$disconnect?: () => Promise<void>;
server: {
findMany: (args?: unknown) => Promise<unknown[]>;
findUnique: (args: unknown) => Promise<unknown>;
findMany: (...args: unknown[]) => Promise<unknown[]>;
findUnique: (...args: unknown[]) => Promise<unknown>;
};
joinToken: {
findUnique: (args: unknown) => Promise<unknown>;
findMany: (args?: unknown) => Promise<unknown[]>;
create: (args: unknown) => Promise<unknown>;
update: (args: unknown) => Promise<unknown>;
findUnique: (...args: unknown[]) => Promise<unknown>;
findMany: (...args: unknown[]) => Promise<unknown[]>;
create: (...args: unknown[]) => Promise<unknown>;
update: (...args: unknown[]) => Promise<unknown>;
};
bastion: {
upsert: (...args: unknown[]) => Promise<unknown>;
findMany: (...args: unknown[]) => Promise<unknown[]>;
findUnique: (...args: unknown[]) => Promise<unknown>;
update: (...args: unknown[]) => Promise<unknown>;
};
}
export function createApp(_config: LabdConfig, db: DbClient): {
export async function createApp(_config: LabdConfig, db: DbClient): Promise<{
app: ReturnType<typeof Fastify>;
} {
}> {
const app = Fastify({
logger: false, // We use winston instead
});
// Register rate limiting before routes
await setupRateLimiting(app);
// Register WebSocket support
void app.register(websocket);
@@ -36,6 +51,8 @@ export function createApp(_config: LabdConfig, db: DbClient): {
registerHealthRoutes(app, db);
registerServerRoutes(app, db);
registerAuthRoutes(app, db);
registerAgentRoutes(app);
registerBastionRoutes(app, db);
// WebSocket handler for agent connections
app.register(async (fastify) => {
@@ -54,6 +71,148 @@ export function createApp(_config: LabdConfig, db: DbClient): {
});
});
// WebSocket handler for bastion connections
app.register(async (fastify) => {
fastify.get("/ws/bastion", { websocket: true }, (socket, _request) => {
let bastionId: string | null = null;
logger.info("Bastion WebSocket connection established");
socket.on("message", (data: Buffer) => {
try {
const raw = data.toString();
const msg: unknown = JSON.parse(raw);
if (!isBastionMessage(msg)) {
logger.warn(`Unknown bastion message: ${(msg as { type?: string }).type}`);
return;
}
switch (msg.type) {
case "bastion-enroll": {
// Validate the join token
void (async () => {
try {
const joinToken = await db.joinToken.findUnique({ where: { token: msg.token } }) as {
id: string; type: string; usedBy: string | null; revokedAt: Date | null; expiresAt: Date | null;
} | null;
if (!joinToken || joinToken.revokedAt !== null) {
logger.warn(`Bastion enrollment rejected: invalid/revoked token from ${msg.hostname}`);
socket.send(JSON.stringify({ type: "error", error: "Invalid or revoked token" }));
socket.close();
return;
}
if (joinToken.expiresAt !== null && joinToken.expiresAt < new Date()) {
logger.warn(`Bastion enrollment rejected: expired token from ${msg.hostname}`);
socket.send(JSON.stringify({ type: "error", error: "Token expired" }));
socket.close();
return;
}
if (joinToken.type === "one-time" && joinToken.usedBy !== null) {
logger.warn(`Bastion enrollment rejected: already-used token from ${msg.hostname}`);
socket.send(JSON.stringify({ type: "error", error: "Token already used" }));
socket.close();
return;
}
// Mark token as used
await db.joinToken.update({
where: { id: joinToken.id },
data: { usedBy: `bastion:${msg.hostname}`, usedAt: new Date() },
});
// Upsert bastion record
const record = await db.bastion.upsert({
where: { hostname: msg.hostname },
create: { hostname: msg.hostname, network: msg.network, serverIp: msg.serverIp, status: "online" },
update: { network: msg.network, serverIp: msg.serverIp, status: "online", lastHeartbeat: new Date() },
}) as { id: string };
bastionId = record.id;
bastionRegistry.register({
bastionId: record.id,
hostname: msg.hostname,
network: msg.network,
serverIp: msg.serverIp,
socket,
connectedAt: new Date(),
lastHeartbeat: new Date(),
state: { discovered: {}, install_queue: {}, installed: {} },
});
socket.send(JSON.stringify({ type: "bastion-enrolled", bastionId: record.id }));
logger.info(`BASTION ENROLLED: ${msg.hostname} (${msg.network}) as ${record.id.slice(0, 8)}...`);
} catch (err) {
logger.error(`Bastion enrollment error: ${err instanceof Error ? err.message : String(err)}`);
socket.close();
}
})();
break;
}
case "bastion-state-sync": {
if (!bastionId && msg.bastionId) {
// Reconnection with known bastionId — re-register
bastionId = msg.bastionId;
const existing = bastionRegistry.getById(bastionId);
if (!existing) {
bastionRegistry.register({
bastionId,
hostname: "reconnecting",
network: "",
serverIp: "",
socket,
connectedAt: new Date(),
lastHeartbeat: new Date(),
state: msg.state,
});
// Update DB status
void db.bastion.update({ where: { id: bastionId }, data: { status: "online", lastHeartbeat: new Date() } });
}
}
if (bastionId) {
bastionRegistry.updateState(bastionId, msg.state);
logger.info(`Bastion ${bastionId.slice(0, 8)} state sync: ${Object.keys(msg.state.discovered).length} discovered, ${Object.keys(msg.state.installed).length} installed`);
}
break;
}
case "bastion-heartbeat": {
if (bastionId) {
bastionRegistry.updateHeartbeat(bastionId);
socket.send(JSON.stringify({ type: "bastion-heartbeat-ack", serverTime: new Date().toISOString() }));
}
break;
}
case "bastion-progress": {
// Forward to any SSE subscribers (future)
logger.info(`Bastion progress: ${msg.mac} -> ${msg.stage}: ${msg.detail}`);
break;
}
case "command-response": {
// Handled by the pending command listener in bastions.ts routes
break;
}
}
} catch (err) {
logger.error(`Failed to parse bastion message: ${err instanceof Error ? err.message : String(err)}`);
}
});
socket.on("close", () => {
if (bastionId) {
logger.info(`Bastion ${bastionId.slice(0, 8)} disconnected`);
bastionRegistry.unregister(bastionId);
void db.bastion.update({ where: { id: bastionId }, data: { status: "offline" } }).catch(() => {});
}
});
});
});
// Log all requests
app.addHook("onRequest", async (request) => {
logger.info(`HTTP: ${request.ip} ${request.method} ${request.url}`);

View File

@@ -0,0 +1,65 @@
// In-memory registry of connected lab-agent WebSocket connections.
// Tracks agents by serverId and hostname, emits lifecycle events.
import { EventEmitter } from "node:events";
import type { WebSocket } from "ws";
export interface ConnectedAgent {
serverId: string;
hostname: string;
socket: WebSocket;
connectedAt: Date;
lastHeartbeat: Date;
version: string;
certFingerprint: string;
}
export class AgentRegistry extends EventEmitter {
private agents: Map<string, ConnectedAgent> = new Map();
private byHostname: Map<string, string> = new Map();
register(agent: ConnectedAgent): void {
this.agents.set(agent.serverId, agent);
this.byHostname.set(agent.hostname, agent.serverId);
this.emit("agent:connected", agent);
}
unregister(serverId: string): void {
const agent = this.agents.get(serverId);
if (agent !== undefined) {
this.byHostname.delete(agent.hostname);
this.agents.delete(serverId);
this.emit("agent:disconnected", agent);
}
}
getByServerId(serverId: string): ConnectedAgent | undefined {
return this.agents.get(serverId);
}
getByHostname(hostname: string): ConnectedAgent | undefined {
const serverId = this.byHostname.get(hostname);
if (serverId === undefined) {
return undefined;
}
return this.agents.get(serverId);
}
updateHeartbeat(serverId: string): void {
const agent = this.agents.get(serverId);
if (agent !== undefined) {
agent.lastHeartbeat = new Date();
this.emit("agent:heartbeat", agent);
}
}
getConnectedCount(): number {
return this.agents.size;
}
getAllConnected(): ConnectedAgent[] {
return [...this.agents.values()];
}
}
export const agentRegistry = new AgentRegistry();

View File

@@ -0,0 +1,107 @@
// In-memory registry of connected bastion WebSocket connections.
// Tracks bastions by ID, caches their BastionState, and provides aggregation.
import { EventEmitter } from "node:events";
import type { WebSocket } from "ws";
import type { BastionState, HardwareInfo, InstallConfig, InstalledInfo } from "@lab/shared";
export interface ConnectedBastion {
bastionId: string;
hostname: string;
network: string;
serverIp: string;
socket: WebSocket;
connectedAt: Date;
lastHeartbeat: Date;
state: BastionState;
}
export interface AggregatedState {
discovered: Record<string, HardwareInfo>;
install_queue: Record<string, InstallConfig>;
installed: Record<string, InstalledInfo>;
}
export class BastionRegistry extends EventEmitter {
private bastions = new Map<string, ConnectedBastion>();
register(bastion: ConnectedBastion): void {
this.bastions.set(bastion.bastionId, bastion);
this.emit("bastion:connected", bastion);
}
unregister(bastionId: string): void {
const bastion = this.bastions.get(bastionId);
if (bastion) {
this.bastions.delete(bastionId);
this.emit("bastion:disconnected", bastion);
}
}
getById(bastionId: string): ConnectedBastion | undefined {
return this.bastions.get(bastionId);
}
getAll(): ConnectedBastion[] {
return [...this.bastions.values()];
}
getConnectedCount(): number {
return this.bastions.size;
}
updateState(bastionId: string, state: BastionState): void {
const bastion = this.bastions.get(bastionId);
if (bastion) {
bastion.state = state;
this.emit("bastion:state-updated", bastion);
}
}
updateHeartbeat(bastionId: string): void {
const bastion = this.bastions.get(bastionId);
if (bastion) {
bastion.lastHeartbeat = new Date();
}
}
/** Find which bastion owns a given MAC address. */
findBastionByMac(mac: string): ConnectedBastion | undefined {
const normalized = mac.toLowerCase();
for (const bastion of this.bastions.values()) {
if (
normalized in bastion.state.discovered ||
normalized in bastion.state.install_queue ||
normalized in bastion.state.installed
) {
return bastion;
}
}
return undefined;
}
/** Merge all bastion states into one, tagging each entry with bastionId. */
getAggregatedState(): AggregatedState {
const result: AggregatedState = {
discovered: {},
install_queue: {},
installed: {},
};
for (const bastion of this.bastions.values()) {
for (const [mac, hw] of Object.entries(bastion.state.discovered)) {
result.discovered[mac] = { ...hw, bastionId: bastion.bastionId };
}
for (const [mac, cfg] of Object.entries(bastion.state.install_queue)) {
result.install_queue[mac] = { ...cfg, bastionId: bastion.bastionId };
}
for (const [mac, info] of Object.entries(bastion.state.installed)) {
result.installed[mac] = { ...info, bastionId: bastion.bastionId };
}
}
return result;
}
}
export const bastionRegistry = new BastionRegistry();

View File

@@ -0,0 +1,73 @@
// Encryption service for sensitive data (CA keys, kubeconfigs).
// Uses AES-256-GCM with scrypt-derived keys.
import {
createCipheriv,
createDecipheriv,
randomBytes,
scryptSync,
} from "node:crypto";
const ALGORITHM = "aes-256-gcm";
const KEY_LENGTH = 32;
const IV_LENGTH = 16;
const SALT = "labctl-salt";
export class EncryptionService {
private key: Buffer;
constructor(masterKey: string) {
this.key = scryptSync(masterKey, SALT, KEY_LENGTH);
}
encrypt(plaintext: string): string {
const iv = randomBytes(IV_LENGTH);
const cipher = createCipheriv(ALGORITHM, this.key, iv);
let encrypted = cipher.update(plaintext, "utf8", "base64");
encrypted += cipher.final("base64");
const authTag = cipher.getAuthTag();
// Format: iv:authTag:encrypted (all base64)
return [
iv.toString("base64"),
authTag.toString("base64"),
encrypted,
].join(":");
}
decrypt(ciphertext: string): string {
const parts = ciphertext.split(":");
if (parts.length !== 3) {
throw new Error("Invalid ciphertext format: expected iv:authTag:encrypted");
}
const [ivB64, tagB64, encrypted] = parts as [string, string, string];
const iv = Buffer.from(ivB64, "base64");
const authTag = Buffer.from(tagB64, "base64");
const decipher = createDecipheriv(ALGORITHM, this.key, iv);
decipher.setAuthTag(authTag);
let decrypted = decipher.update(encrypted, "base64", "utf8");
decrypted += decipher.final("utf8");
return decrypted;
}
}
// --- Singleton ---
let _instance: EncryptionService | undefined;
export function getEncryptionService(): EncryptionService {
if (!_instance) {
const masterKey = process.env["CA_ENCRYPTION_KEY"];
if (!masterKey || masterKey.length < 32) {
throw new Error("CA_ENCRYPTION_KEY must be set and at least 32 characters");
}
_instance = new EncryptionService(masterKey);
}
return _instance;
}

View File

@@ -0,0 +1,192 @@
// WebSocket message routing between labd and connected agents.
// Dispatches incoming agent messages to handlers, manages pending requests.
import type { AgentMessage, ServerMessage, JournalOptions } from "@lab/shared";
import { generateRequestId } from "@lab/shared";
import type { ConnectedAgent } from "./agent-registry.js";
export type MessageHandler = (
agent: ConnectedAgent,
message: AgentMessage,
) => Promise<void>;
export interface PendingRequest {
requestId: string;
serverId: string;
type: "exec" | "log";
resolve: (result: unknown) => void;
reject: (error: Error) => void;
timeout: NodeJS.Timeout;
onData?: (chunk: string) => void;
}
export class MessageRouter {
private pendingRequests: Map<string, PendingRequest> = new Map();
private handlers: Map<string, MessageHandler> = new Map();
registerHandler(type: string, handler: MessageHandler): void {
this.handlers.set(type, handler);
}
async handleMessage(
agent: ConnectedAgent,
message: AgentMessage,
): Promise<void> {
// Dispatch to registered handler
const handler = this.handlers.get(message.type);
if (handler) {
await handler(agent, message);
}
// Handle responses to pending requests
if ("requestId" in message) {
const pending = this.pendingRequests.get(message.requestId);
if (!pending) return;
switch (message.type) {
case "exec-exit":
clearTimeout(pending.timeout);
pending.resolve({ exitCode: message.exitCode });
this.pendingRequests.delete(message.requestId);
break;
case "exec-stdout":
case "exec-stderr":
pending.onData?.(message.data);
break;
case "log-line":
pending.onData?.(message.line);
break;
case "log-end":
clearTimeout(pending.timeout);
pending.resolve({ completed: true });
this.pendingRequests.delete(message.requestId);
break;
}
}
}
async sendRequest(
agent: ConnectedAgent,
message: ServerMessage,
timeoutMs: number = 30_000,
): Promise<unknown> {
return new Promise((resolve, reject) => {
const requestId =
"requestId" in message ? message.requestId : generateRequestId();
const timeoutHandle = setTimeout(() => {
this.pendingRequests.delete(requestId);
reject(new Error(`Request ${requestId} timed out after ${timeoutMs}ms`));
}, timeoutMs);
this.pendingRequests.set(requestId, {
requestId,
serverId: agent.serverId,
type: message.type === "log-subscribe" ? "log" : "exec",
resolve,
reject,
timeout: timeoutHandle,
});
agent.socket.send(JSON.stringify(message));
});
}
async sendRequestWithStreaming(
agent: ConnectedAgent,
message: ServerMessage,
onData: (chunk: string) => void,
timeoutMs: number = 30_000,
): Promise<unknown> {
return new Promise((resolve, reject) => {
const requestId =
"requestId" in message ? message.requestId : generateRequestId();
const timeoutHandle = setTimeout(() => {
this.pendingRequests.delete(requestId);
reject(new Error(`Request ${requestId} timed out after ${timeoutMs}ms`));
}, timeoutMs);
this.pendingRequests.set(requestId, {
requestId,
serverId: agent.serverId,
type: message.type === "log-subscribe" ? "log" : "exec",
resolve,
reject,
timeout: timeoutHandle,
onData,
});
agent.socket.send(JSON.stringify(message));
});
}
subscribeToLogs(
agent: ConnectedAgent,
options: JournalOptions,
onLine: (line: string) => void,
): { requestId: string; unsubscribe: () => void } {
const requestId = generateRequestId();
const timeoutHandle = setTimeout(() => {
// Log subscriptions don't time out by default, but we set a long one
}, 24 * 60 * 60 * 1000); // 24h max
this.pendingRequests.set(requestId, {
requestId,
serverId: agent.serverId,
type: "log",
resolve: () => {},
reject: () => {},
timeout: timeoutHandle,
onData: onLine,
});
agent.socket.send(
JSON.stringify({
type: "log-subscribe",
requestId,
options,
} satisfies ServerMessage),
);
return {
requestId,
unsubscribe: () => {
clearTimeout(timeoutHandle);
this.pendingRequests.delete(requestId);
agent.socket.send(
JSON.stringify({
type: "log-unsubscribe",
requestId,
} satisfies ServerMessage),
);
},
};
}
cancelRequest(requestId: string): boolean {
const pending = this.pendingRequests.get(requestId);
if (!pending) return false;
clearTimeout(pending.timeout);
pending.reject(new Error("Request cancelled"));
this.pendingRequests.delete(requestId);
return true;
}
cleanupAgent(serverId: string): void {
for (const [id, pending] of this.pendingRequests) {
if (pending.serverId === serverId) {
clearTimeout(pending.timeout);
pending.reject(new Error("Agent disconnected"));
this.pendingRequests.delete(id);
}
}
}
getPendingCount(): number {
return this.pendingRequests.size;
}
}
export const messageRouter = new MessageRouter();

View File

@@ -0,0 +1,98 @@
// Graceful shutdown handling for labd.
// Registers SIGTERM/SIGINT handlers, drains connections, and exits cleanly.
import type { FastifyInstance } from "fastify";
import { logger } from "./logger.js";
import { agentRegistry } from "./agent-registry.js";
import { messageRouter } from "./message-router.js";
const FORCE_EXIT_TIMEOUT_MS = 15_000;
let isShuttingDown = false;
/**
* Returns true if the server is in the process of shutting down.
*/
export function isShuttingDownNow(): boolean {
return isShuttingDown;
}
export interface ShutdownResources {
app: FastifyInstance;
db?: { $disconnect?: () => Promise<void> };
}
/**
* Registers SIGTERM and SIGINT handlers for graceful shutdown.
* Idempotent: a second signal while shutdown is in progress is ignored.
*/
export function setupGracefulShutdown(resources: ShutdownResources): void {
const { app, db } = resources;
const shutdown = async (signal: string): Promise<void> => {
if (isShuttingDown) {
logger.info(`Received ${signal} again — shutdown already in progress, ignoring`);
return;
}
isShuttingDown = true;
logger.info(`Received ${signal}, starting graceful shutdown...`);
// Safety net: force exit after timeout
const forceExitTimer = setTimeout(() => {
logger.error("Graceful shutdown timed out — forcing exit");
process.exit(1);
}, FORCE_EXIT_TIMEOUT_MS);
forceExitTimer.unref();
try {
// 1. Stop accepting new connections
await app.close();
logger.info("HTTP server closed");
// 2. Notify connected agents and close their sockets
const agents = agentRegistry.getAllConnected();
if (agents.length > 0) {
logger.info(`Notifying ${agents.length} connected agent(s) of shutdown`);
for (const agent of agents) {
try {
agent.socket.send(
JSON.stringify({
type: "server-shutdown",
reconnectAfter: 5000,
}),
);
agent.socket.close();
} catch {
// Agent may already be disconnected
}
}
}
// 3. Clean up pending message-router requests for each agent
const pendingCount = messageRouter.getPendingCount();
if (pendingCount > 0) {
logger.info(`Cleaning up ${pendingCount} pending request(s)`);
for (const agent of agents) {
messageRouter.cleanupAgent(agent.serverId);
}
}
// 4. Disconnect database
if (db?.$disconnect) {
await db.$disconnect();
logger.info("Database disconnected");
}
logger.info("Graceful shutdown complete — goodbye");
process.exit(0);
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
logger.error(`Error during shutdown: ${message}`);
process.exit(1);
}
};
process.on("SIGTERM", () => void shutdown("SIGTERM"));
process.on("SIGINT", () => void shutdown("SIGINT"));
}

View File

@@ -0,0 +1,13 @@
export {
createTokenSchema,
enrollmentSchema,
serverFiltersSchema,
permissionPatternSchema,
createRoleSchema,
type CreateTokenInput,
type EnrollmentInput,
type ServerFiltersInput,
type CreateRoleInput,
} from "./schemas.js";
export { validateBody, validateQuery } from "./middleware.js";

View File

@@ -0,0 +1,32 @@
// Fastify validation middleware using Zod schemas.
import type { FastifyRequest, FastifyReply } from "fastify";
import type { ZodSchema } from "zod";
export function validateBody<T>(schema: ZodSchema<T>) {
return async (request: FastifyRequest, reply: FastifyReply): Promise<void> => {
const result = schema.safeParse(request.body);
if (!result.success) {
void reply.status(400).send({
error: "Validation failed",
details: result.error.flatten(),
});
return;
}
(request as unknown as { body: T }).body = result.data;
};
}
export function validateQuery<T>(schema: ZodSchema<T>) {
return async (request: FastifyRequest, reply: FastifyReply): Promise<void> => {
const result = schema.safeParse(request.query);
if (!result.success) {
void reply.status(400).send({
error: "Validation failed",
details: result.error.flatten(),
});
return;
}
(request as unknown as { query: T }).query = result.data;
};
}

View File

@@ -0,0 +1,37 @@
// Zod validation schemas for labd API requests.
import { z } from "zod";
export const createTokenSchema = z.object({
type: z.enum(["one-time", "reusable"]).default("one-time"),
label: z.string().max(255).optional(),
expiresInHours: z.number().positive().max(8760).optional(), // Max 1 year
});
export type CreateTokenInput = z.infer<typeof createTokenSchema>;
export const enrollmentSchema = z.object({
token: z.string().min(1, "token is required"),
hostname: z.string().regex(/^[a-z0-9][a-z0-9.-]*$/i, "Invalid hostname format").max(253),
csr: z.string().optional(),
});
export type EnrollmentInput = z.infer<typeof enrollmentSchema>;
export const serverFiltersSchema = z.object({
cloud: z.string().optional(),
environment: z.string().optional(),
status: z.enum(["online", "offline", "provisioning", "unknown"]).optional(),
});
export type ServerFiltersInput = z.infer<typeof serverFiltersSchema>;
export const permissionPatternSchema = z.string().regex(
/^[a-z*]+:[a-z*]+:[a-z*]+:[a-z0-9.*-]+$/,
"Permission must match pattern action:cloud:environment:server",
);
export const createRoleSchema = z.object({
name: z.string().min(1).max(64).regex(/^[a-z][a-z0-9-]*$/, "Role name must be lowercase alphanumeric with hyphens"),
description: z.string().max(500).optional(),
allow: z.array(permissionPatternSchema).optional(),
deny: z.array(permissionPatternSchema).optional(),
});
export type CreateRoleInput = z.infer<typeof createRoleSchema>;

View File

@@ -0,0 +1,112 @@
// Tests for AgentRegistry.
import { describe, it, expect, vi, beforeEach } from "vitest";
import { AgentRegistry, type ConnectedAgent } from "../src/services/agent-registry.js";
function makeAgent(overrides: Partial<ConnectedAgent> = {}): ConnectedAgent {
return {
serverId: "srv-1",
hostname: "worker-1",
socket: { send: vi.fn(), close: vi.fn(), readyState: 1 } as unknown as ConnectedAgent["socket"],
connectedAt: new Date(),
lastHeartbeat: new Date(),
version: "0.1.0",
certFingerprint: "abc123",
...overrides,
};
}
describe("AgentRegistry", () => {
let registry: AgentRegistry;
beforeEach(() => {
registry = new AgentRegistry();
});
it("starts empty", () => {
expect(registry.getConnectedCount()).toBe(0);
expect(registry.getAllConnected()).toEqual([]);
});
it("registers and retrieves by serverId", () => {
const agent = makeAgent();
registry.register(agent);
expect(registry.getByServerId("srv-1")).toBe(agent);
expect(registry.getConnectedCount()).toBe(1);
});
it("retrieves by hostname", () => {
const agent = makeAgent({ hostname: "web-1" });
registry.register(agent);
expect(registry.getByHostname("web-1")).toBe(agent);
});
it("returns undefined for unknown serverId", () => {
expect(registry.getByServerId("nope")).toBeUndefined();
});
it("returns undefined for unknown hostname", () => {
expect(registry.getByHostname("nope")).toBeUndefined();
});
it("unregisters agent", () => {
const agent = makeAgent();
registry.register(agent);
registry.unregister("srv-1");
expect(registry.getByServerId("srv-1")).toBeUndefined();
expect(registry.getByHostname("worker-1")).toBeUndefined();
expect(registry.getConnectedCount()).toBe(0);
});
it("unregister is no-op for unknown serverId", () => {
registry.unregister("nonexistent"); // should not throw
expect(registry.getConnectedCount()).toBe(0);
});
it("updates heartbeat", () => {
const agent = makeAgent();
const oldTime = new Date(2020, 0, 1);
agent.lastHeartbeat = oldTime;
registry.register(agent);
registry.updateHeartbeat("srv-1");
expect(agent.lastHeartbeat.getTime()).toBeGreaterThan(oldTime.getTime());
});
it("emits agent:connected on register", () => {
const handler = vi.fn();
registry.on("agent:connected", handler);
const agent = makeAgent();
registry.register(agent);
expect(handler).toHaveBeenCalledWith(agent);
});
it("emits agent:disconnected on unregister", () => {
const handler = vi.fn();
registry.on("agent:disconnected", handler);
const agent = makeAgent();
registry.register(agent);
registry.unregister("srv-1");
expect(handler).toHaveBeenCalledWith(agent);
});
it("emits agent:heartbeat on updateHeartbeat", () => {
const handler = vi.fn();
registry.on("agent:heartbeat", handler);
const agent = makeAgent();
registry.register(agent);
registry.updateHeartbeat("srv-1");
expect(handler).toHaveBeenCalledWith(agent);
});
it("handles multiple agents", () => {
const a1 = makeAgent({ serverId: "s1", hostname: "h1" });
const a2 = makeAgent({ serverId: "s2", hostname: "h2" });
registry.register(a1);
registry.register(a2);
expect(registry.getConnectedCount()).toBe(2);
expect(registry.getAllConnected()).toHaveLength(2);
registry.unregister("s1");
expect(registry.getConnectedCount()).toBe(1);
expect(registry.getByHostname("h2")).toBe(a2);
});
});

View File

@@ -0,0 +1,208 @@
// Tests for auth and token management routes.
import { describe, it, expect, vi, beforeEach } from "vitest";
import Fastify from "fastify";
import { registerAuthRoutes } from "../src/routes/auth.js";
function createMockDb() {
const tokens: Map<string, Record<string, unknown>> = new Map();
return {
$queryRaw: vi.fn(),
server: { findMany: vi.fn(), findUnique: vi.fn() },
joinToken: {
findUnique: vi.fn(async (args: { where: { token?: string; id?: string } }) => {
const key = args.where.token ?? args.where.id;
return key ? tokens.get(key) ?? null : null;
}),
findMany: vi.fn(async () => [...tokens.values()]),
create: vi.fn(async (args: { data: Record<string, unknown> }) => {
const id = `tok-${tokens.size + 1}`;
const record = {
id,
...args.data,
usedBy: null,
usedAt: null,
revokedAt: null,
createdAt: new Date(),
};
tokens.set(record.token as string, record);
tokens.set(id, record);
return record;
}),
update: vi.fn(async (args: { where: { id: string }; data: Record<string, unknown> }) => {
const existing = tokens.get(args.where.id);
if (existing) Object.assign(existing, args.data);
return existing;
}),
},
_tokens: tokens,
};
}
describe("auth routes", () => {
let app: ReturnType<typeof Fastify>;
let db: ReturnType<typeof createMockDb>;
beforeEach(async () => {
app = Fastify({ logger: false });
db = createMockDb();
registerAuthRoutes(app, db);
await app.ready();
});
describe("POST /api/tokens", () => {
it("creates a one-time token", async () => {
const resp = await app.inject({
method: "POST",
url: "/api/tokens",
payload: { label: "test-token" },
});
expect(resp.statusCode).toBe(201);
const body = resp.json();
expect(body.token).toBeDefined();
expect(body.type).toBe("one-time");
expect(body.label).toBe("test-token");
});
it("creates a reusable token", async () => {
const resp = await app.inject({
method: "POST",
url: "/api/tokens",
payload: { type: "reusable", label: "asg-token" },
});
expect(resp.statusCode).toBe(201);
expect(resp.json().type).toBe("reusable");
});
it("rejects invalid type", async () => {
const resp = await app.inject({
method: "POST",
url: "/api/tokens",
payload: { type: "invalid" },
});
expect(resp.statusCode).toBe(400);
});
it("creates token with expiry", async () => {
const resp = await app.inject({
method: "POST",
url: "/api/tokens",
payload: { expiresInHours: 24 },
});
expect(resp.statusCode).toBe(201);
expect(resp.json().expiresAt).toBeDefined();
});
});
describe("GET /api/tokens", () => {
it("lists tokens", async () => {
// Create a token first
await app.inject({
method: "POST",
url: "/api/tokens",
payload: { label: "t1" },
});
const resp = await app.inject({ method: "GET", url: "/api/tokens" });
expect(resp.statusCode).toBe(200);
expect(Array.isArray(resp.json())).toBe(true);
});
});
describe("POST /api/auth/enroll", () => {
it("rejects missing token", async () => {
const resp = await app.inject({
method: "POST",
url: "/api/auth/enroll",
payload: { hostname: "w1" },
});
expect(resp.statusCode).toBe(400);
expect(resp.json().error).toContain("token");
});
it("rejects missing hostname", async () => {
const resp = await app.inject({
method: "POST",
url: "/api/auth/enroll",
payload: { token: "some-token" },
});
expect(resp.statusCode).toBe(400);
expect(resp.json().error).toContain("hostname");
});
it("rejects invalid token", async () => {
const resp = await app.inject({
method: "POST",
url: "/api/auth/enroll",
payload: { token: "nonexistent", hostname: "w1" },
});
expect(resp.statusCode).toBe(401);
});
it("accepts valid token", async () => {
// Create a token
const createResp = await app.inject({
method: "POST",
url: "/api/tokens",
payload: { label: "test" },
});
const tokenValue = createResp.json().token;
const resp = await app.inject({
method: "POST",
url: "/api/auth/enroll",
payload: { token: tokenValue, hostname: "worker-1" },
});
expect(resp.statusCode).toBe(200);
expect(resp.json().status).toBe("enrolled");
expect(resp.json().hostname).toBe("worker-1");
});
it("rejects revoked token", async () => {
// Create and revoke a token
const createResp = await app.inject({
method: "POST",
url: "/api/tokens",
payload: { label: "revoked" },
});
const token = createResp.json();
// Manually set revokedAt
const record = db._tokens.get(token.token);
if (record) record.revokedAt = new Date();
const resp = await app.inject({
method: "POST",
url: "/api/auth/enroll",
payload: { token: token.token, hostname: "w1" },
});
expect(resp.statusCode).toBe(401);
expect(resp.json().error).toContain("revoked");
});
it("rejects expired token", async () => {
const createResp = await app.inject({
method: "POST",
url: "/api/tokens",
payload: { label: "expired" },
});
const token = createResp.json();
// Manually set past expiry (keep revokedAt null so it hits expiry check)
const record = db._tokens.get(token.token);
if (record) {
record.expiresAt = new Date(Date.now() - 60000);
record.revokedAt = null;
}
const resp = await app.inject({
method: "POST",
url: "/api/auth/enroll",
payload: { token: token.token, hostname: "w1" },
});
expect(resp.statusCode).toBe(401);
expect(resp.json().error).toContain("expired");
});
});
});

View File

@@ -0,0 +1,63 @@
// Tests for EncryptionService.
import { describe, it, expect } from "vitest";
import { EncryptionService } from "../src/services/encryption.js";
const MASTER_KEY = "a".repeat(32);
describe("EncryptionService", () => {
it("encrypt/decrypt roundtrip", () => {
const svc = new EncryptionService(MASTER_KEY);
const plaintext = "hello world secret data";
const ciphertext = svc.encrypt(plaintext);
expect(ciphertext).not.toBe(plaintext);
expect(svc.decrypt(ciphertext)).toBe(plaintext);
});
it("encrypts empty string", () => {
const svc = new EncryptionService(MASTER_KEY);
expect(svc.decrypt(svc.encrypt(""))).toBe("");
});
it("encrypts unicode", () => {
const svc = new EncryptionService(MASTER_KEY);
const text = "Héllo 世界 🌍";
expect(svc.decrypt(svc.encrypt(text))).toBe(text);
});
it("encrypts large data", () => {
const svc = new EncryptionService(MASTER_KEY);
const text = "x".repeat(100_000);
expect(svc.decrypt(svc.encrypt(text))).toBe(text);
});
it("different IVs produce different ciphertext", () => {
const svc = new EncryptionService(MASTER_KEY);
const a = svc.encrypt("same");
const b = svc.encrypt("same");
expect(a).not.toBe(b); // Random IV each time
expect(svc.decrypt(a)).toBe("same");
expect(svc.decrypt(b)).toBe("same");
});
it("different keys produce different ciphertext", () => {
const svc1 = new EncryptionService("a".repeat(32));
const svc2 = new EncryptionService("b".repeat(32));
const ct1 = svc1.encrypt("secret");
expect(() => svc2.decrypt(ct1)).toThrow(); // Auth tag mismatch
});
it("rejects tampered ciphertext", () => {
const svc = new EncryptionService(MASTER_KEY);
const ct = svc.encrypt("data");
const parts = ct.split(":");
parts[2] = "AAAA" + parts[2]!.slice(4); // corrupt encrypted data
expect(() => svc.decrypt(parts.join(":"))).toThrow();
});
it("rejects invalid format", () => {
const svc = new EncryptionService(MASTER_KEY);
expect(() => svc.decrypt("not:enough")).toThrow("Invalid ciphertext format");
expect(() => svc.decrypt("")).toThrow("Invalid ciphertext format");
});
});

View File

@@ -0,0 +1,123 @@
// Tests for Zod validation schemas.
import { describe, it, expect } from "vitest";
import {
createTokenSchema,
enrollmentSchema,
serverFiltersSchema,
createRoleSchema,
permissionPatternSchema,
} from "../src/validation/schemas.js";
describe("createTokenSchema", () => {
it("accepts minimal input", () => {
const result = createTokenSchema.safeParse({});
expect(result.success).toBe(true);
if (result.success) {
expect(result.data.type).toBe("one-time"); // default
}
});
it("accepts full input", () => {
const result = createTokenSchema.safeParse({
type: "reusable",
label: "asg-token",
expiresInHours: 24,
});
expect(result.success).toBe(true);
});
it("rejects invalid type", () => {
const result = createTokenSchema.safeParse({ type: "invalid" });
expect(result.success).toBe(false);
});
it("rejects negative expiry", () => {
const result = createTokenSchema.safeParse({ expiresInHours: -1 });
expect(result.success).toBe(false);
});
it("rejects expiry over 1 year", () => {
const result = createTokenSchema.safeParse({ expiresInHours: 9000 });
expect(result.success).toBe(false);
});
});
describe("enrollmentSchema", () => {
it("accepts valid enrollment", () => {
const result = enrollmentSchema.safeParse({
token: "abc123",
hostname: "worker-1.ad.itaz.eu",
});
expect(result.success).toBe(true);
});
it("rejects empty token", () => {
const result = enrollmentSchema.safeParse({
token: "",
hostname: "w1",
});
expect(result.success).toBe(false);
});
it("rejects invalid hostname", () => {
const result = enrollmentSchema.safeParse({
token: "abc",
hostname: "-invalid",
});
expect(result.success).toBe(false);
});
});
describe("serverFiltersSchema", () => {
it("accepts empty filters", () => {
const result = serverFiltersSchema.safeParse({});
expect(result.success).toBe(true);
});
it("accepts valid status", () => {
const result = serverFiltersSchema.safeParse({ status: "online" });
expect(result.success).toBe(true);
});
it("rejects invalid status", () => {
const result = serverFiltersSchema.safeParse({ status: "banana" });
expect(result.success).toBe(false);
});
});
describe("permissionPatternSchema", () => {
it("accepts valid patterns", () => {
expect(permissionPatternSchema.safeParse("read:*:*:*").success).toBe(true);
expect(permissionPatternSchema.safeParse("exec:baremetal:lab:*").success).toBe(true);
expect(permissionPatternSchema.safeParse("*:*:*:worker-1").success).toBe(true);
});
it("rejects invalid patterns", () => {
expect(permissionPatternSchema.safeParse("read").success).toBe(false);
expect(permissionPatternSchema.safeParse("read:*").success).toBe(false);
expect(permissionPatternSchema.safeParse("READ:*:*:*").success).toBe(false);
});
});
describe("createRoleSchema", () => {
it("accepts valid role", () => {
const result = createRoleSchema.safeParse({
name: "deployer",
description: "Can deploy apps",
allow: ["exec:*:*:*", "read:*:*:*"],
deny: ["destroy:*:*:*"],
});
expect(result.success).toBe(true);
});
it("rejects uppercase role name", () => {
const result = createRoleSchema.safeParse({ name: "Admin" });
expect(result.success).toBe(false);
});
it("rejects empty role name", () => {
const result = createRoleSchema.safeParse({ name: "" });
expect(result.success).toBe(false);
});
});

View File

@@ -0,0 +1,6 @@
name: k3s
version: 0.1.0
description: Install and configure k3s with CIS security hardening and Cilium CNI
targets:
roles: [infra, worker]
dependencies: []

View File

@@ -0,0 +1,117 @@
// k3s module — configure phase.
// Post-install configuration: log rotation, network policies, cert rotation.
export function generateConfigureScript(hostname: string): string {
return `#!/bin/bash
set -euo pipefail
echo "=== k3s configure: ${hostname} ==="
# ── 0. Fix CoreDNS upstream resolver ──
# systemd-resolved listens on 127.0.0.53, but that address is unreachable from
# inside CoreDNS's pod network namespace. CoreDNS forwards to /etc/resolv.conf
# which contains 127.0.0.53 on systemd-resolved hosts, causing all external DNS
# lookups to time out. Fix: write a resolv.conf with the real upstream DNS server
# that k3s will use instead of /etc/resolv.conf.
echo "[0/4] Fixing CoreDNS upstream DNS..."
UPSTREAM_DNS=$(resolvectl status 2>/dev/null | grep -A2 "Link.*$(ip -4 route show default | awk '{print $5}' | head -1)" | grep "Current DNS" | awk '{print $NF}' || echo "")
if [ -z "$UPSTREAM_DNS" ]; then
# Fallback: parse resolv.conf from systemd-resolved's real config
UPSTREAM_DNS=$(cat /run/systemd/resolve/resolv.conf 2>/dev/null | grep "^nameserver" | head -1 | awk '{print $2}' || echo "")
fi
if [ -n "$UPSTREAM_DNS" ] && [ "$UPSTREAM_DNS" != "127.0.0.53" ]; then
echo "nameserver $UPSTREAM_DNS" > /etc/rancher/k3s/resolv.conf
echo " Wrote /etc/rancher/k3s/resolv.conf with upstream DNS: $UPSTREAM_DNS"
# k3s reads this file automatically on next restart; restart now to apply
if systemctl is-active k3s >/dev/null 2>&1; then
systemctl restart k3s
echo " Restarted k3s to pick up DNS fix"
# Wait for API to come back
for i in $(seq 1 30); do
if k3s kubectl get nodes >/dev/null 2>&1; then
break
fi
sleep 2
done
fi
else
echo " Upstream DNS already correct or could not detect — skipping"
fi
# ── 1. Log rotation for k3s ──
echo "[1/4] Setting up log rotation..."
cat > /etc/logrotate.d/k3s << 'LOGROTATE'
/var/log/kubernetes/*.log {
daily
rotate 14
compress
delaycompress
missingok
notifempty
copytruncate
maxsize 100M
}
LOGROTATE
# ── 2. Verify certificate rotation ──
echo "[2/4] Checking certificate rotation..."
if k3s certificate rotate --help > /dev/null 2>&1; then
echo " Certificate rotation available"
else
echo " Warning: certificate rotation not available in this k3s version"
fi
# Check cert expiry
CERT_DIR="/var/lib/rancher/k3s/server/tls"
if [ -d "$CERT_DIR" ]; then
for cert in "$CERT_DIR"/*.crt; do
[ -f "$cert" ] || continue
EXPIRY=$(openssl x509 -in "$cert" -enddate -noout 2>/dev/null | cut -d= -f2)
echo " $(basename "$cert"): expires $EXPIRY"
done
fi
# ── 3. Default network policy (deny all ingress by default) ──
echo "[3/4] Applying default network policies..."
k3s kubectl apply -f - << 'NETPOL'
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: default-deny-ingress
namespace: default
spec:
podSelector: {}
policyTypes:
- Ingress
NETPOL
# Allow DNS
k3s kubectl apply -f - << 'DNSPOL'
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: allow-dns
namespace: default
spec:
podSelector: {}
policyTypes:
- Egress
egress:
- to: []
ports:
- port: 53
protocol: UDP
- port: 53
protocol: TCP
DNSPOL
# ── 4. Verify cluster state ──
echo "[4/4] Verifying cluster state..."
k3s kubectl get nodes
k3s kubectl get pods -A
echo "=== k3s configure complete ==="
`;
}

View File

@@ -0,0 +1,22 @@
// Hardening: Pod Security Standards, certificate check, log rotation.
import type { OperationContext, OperationResult, OperationGroup } from "../types.js";
import { runSequential } from "../utils.js";
import { applyPodSecurityStandards } from "../operations/pod-security.js";
import { checkCertExpiry } from "../operations/cert-check.js";
import { configureLogRotation } from "../operations/log-rotation.js";
export const hardeningGroup: OperationGroup = {
name: "hardening",
description: "Pod security, certificate check, log rotation",
operations: [
{ name: "Apply Pod Security Standards", fn: applyPodSecurityStandards },
{ name: "Check certificate expiry", fn: checkCertExpiry },
{ name: "Configure log rotation", fn: configureLogRotation },
],
};
export async function runHardening(ctx: OperationContext): Promise<OperationResult[]> {
ctx.log("Cluster hardening...");
return runSequential(ctx, hardeningGroup.operations);
}

View File

@@ -0,0 +1,26 @@
// Host preparation: kernel modules, sysctl, swap, firewall, SELinux.
import type { OperationContext, OperationResult, OperationGroup } from "../types.js";
import { runSequential } from "../utils.js";
import { loadKernelModules } from "../operations/kernel-modules.js";
import { applyCisHardening } from "../operations/sysctl.js";
import { disableSwap } from "../operations/swap.js";
import { disableFirewall } from "../operations/firewall.js";
import { setSelinuxPermissive } from "../operations/selinux.js";
export const hostPrepGroup: OperationGroup = {
name: "host-prep",
description: "Prepare host for k3s: kernel modules, sysctl, swap, firewall, SELinux",
operations: [
{ name: "Load kernel modules", fn: loadKernelModules },
{ name: "Apply CIS sysctl", fn: applyCisHardening },
{ name: "Disable swap", fn: disableSwap },
{ name: "Disable firewall", fn: disableFirewall },
{ name: "Set SELinux permissive", fn: setSelinuxPermissive },
],
};
export async function runHostPrep(ctx: OperationContext): Promise<OperationResult[]> {
ctx.log("Host preparation...");
return runSequential(ctx, hostPrepGroup.operations);
}

View File

@@ -0,0 +1,5 @@
export { hostPrepGroup, runHostPrep } from "./host-prep.js";
export { k3sServerGroup, runK3sServer } from "./k3s-server.js";
export { k3sAgentGroup, runK3sAgent } from "./k3s-agent.js";
export { networkingGroup, runNetworking } from "./networking.js";
export { hardeningGroup, runHardening } from "./hardening.js";

View File

@@ -0,0 +1,20 @@
// K3s agent installation: config + binary in agent mode.
import type { OperationContext, OperationResult, OperationGroup } from "../types.js";
import { runSequential } from "../utils.js";
import { writeK3sConfig } from "../operations/k3s-config.js";
import { installK3sBinary } from "../operations/k3s-install.js";
export const k3sAgentGroup: OperationGroup = {
name: "k3s-agent",
description: "Install k3s agent and join cluster",
operations: [
{ name: "Write k3s config", fn: writeK3sConfig },
{ name: "Install k3s agent", fn: installK3sBinary },
],
};
export async function runK3sAgent(ctx: OperationContext): Promise<OperationResult[]> {
ctx.log("K3s agent installation...");
return runSequential(ctx, k3sAgentGroup.operations);
}

View File

@@ -0,0 +1,24 @@
// K3s server installation: config, audit policy, CNI cleanup, binary install.
import type { OperationContext, OperationResult, OperationGroup } from "../types.js";
import { runSequential } from "../utils.js";
import { writeK3sConfig } from "../operations/k3s-config.js";
import { writeAuditPolicy } from "../operations/audit-policy.js";
import { cleanupStaleCni } from "../operations/cni-cleanup.js";
import { installK3sBinary } from "../operations/k3s-install.js";
export const k3sServerGroup: OperationGroup = {
name: "k3s-server",
description: "Install k3s server with CIS-hardened config",
operations: [
{ name: "Write k3s config", fn: writeK3sConfig },
{ name: "Write audit policy", fn: writeAuditPolicy },
{ name: "Clean stale CNI", fn: cleanupStaleCni },
{ name: "Install k3s binary", fn: installK3sBinary },
],
};
export async function runK3sServer(ctx: OperationContext): Promise<OperationResult[]> {
ctx.log("K3s server installation...");
return runSequential(ctx, k3sServerGroup.operations);
}

View File

@@ -0,0 +1,22 @@
// Networking: Cilium CNI, CoreDNS fix, network policies.
import type { OperationContext, OperationResult, OperationGroup } from "../types.js";
import { runSequential } from "../utils.js";
import { installCilium } from "../operations/cilium.js";
import { fixCoreDnsUpstream } from "../operations/dns-fix.js";
import { applyDefaultNetworkPolicies } from "../operations/network-policy.js";
export const networkingGroup: OperationGroup = {
name: "networking",
description: "Install Cilium CNI, fix DNS, apply network policies",
operations: [
{ name: "Install Cilium CNI", fn: installCilium },
{ name: "Fix CoreDNS upstream", fn: fixCoreDnsUpstream },
{ name: "Apply network policies", fn: applyDefaultNetworkPolicies },
],
};
export async function runNetworking(ctx: OperationContext): Promise<OperationResult[]> {
ctx.log("Networking setup...");
return runSequential(ctx, networkingGroup.operations);
}

View File

@@ -0,0 +1,56 @@
// k3s module — health check phase.
// Verifies k3s is running, nodes ready, API accessible, Cilium healthy, encryption active.
export interface HealthCheck {
name: string;
command: string;
/** Function to check if the command output indicates success */
check: (stdout: string, exitCode: number) => boolean;
}
export const K3S_HEALTH_CHECKS: HealthCheck[] = [
{
name: "k3s service active",
command: "systemctl is-active k3s",
check: (stdout, code) => code === 0 && stdout.trim() === "active",
},
{
name: "node Ready",
command: "k3s kubectl get nodes -o jsonpath='{.items[0].status.conditions[?(@.type==\"Ready\")].status}'",
check: (stdout) => stdout.includes("True"),
},
{
name: "API server healthy",
command: "k3s kubectl get --raw /healthz",
check: (stdout, code) => code === 0 && stdout.trim() === "ok",
},
{
name: "secrets encryption enabled",
command: "k3s secrets-encrypt status 2>/dev/null || echo 'not available'",
check: (stdout) => stdout.includes("Enabled") || stdout.includes("enabled"),
},
{
name: "Cilium status",
command: "cilium status --brief 2>/dev/null || echo 'cilium not installed'",
check: (stdout, code) => code === 0 && !stdout.includes("not installed"),
},
{
name: "kube-system pods running",
command: "k3s kubectl get pods -n kube-system --no-headers | grep -v Running | grep -v Completed | wc -l",
check: (stdout) => parseInt(stdout.trim(), 10) === 0,
},
];
export function generateHealthScript(): string {
const checks = K3S_HEALTH_CHECKS.map((check, i) => `
echo "[${i + 1}/${K3S_HEALTH_CHECKS.length}] ${check.name}..."
OUTPUT=$(${check.command} 2>&1) || true
echo " result: $OUTPUT"
`).join("\n");
return `#!/bin/bash
echo "=== k3s health check ==="
${checks}
echo "=== health check complete ==="
`;
}

View File

@@ -0,0 +1,8 @@
import type { Operation, OperationResult } from "../types.js";
import { sshOpts } from "../utils.js";
export const checkApiHealth: Operation = async (ctx): Promise<OperationResult> => {
const result = await ctx.ssh.exec("k3s kubectl get --raw /healthz 2>/dev/null", sshOpts(ctx));
const healthy = result.exitCode === 0 && result.stdout.trim() === "ok";
return { success: healthy, changed: false, message: healthy ? "API server healthy" : "API server unhealthy" };
};

View File

@@ -0,0 +1,16 @@
import type { Operation, OperationResult } from "../types.js";
import { sshOpts } from "../utils.js";
export const checkCiliumStatus: Operation = async (ctx): Promise<OperationResult> => {
const result = await ctx.ssh.exec(
"KUBECONFIG=/etc/rancher/k3s/k3s.yaml cilium status --brief 2>/dev/null",
sshOpts(ctx),
);
const ok = result.exitCode === 0;
return {
success: ok,
changed: false,
message: ok ? "Cilium OK" : "Cilium unhealthy",
details: ok ? [result.stdout.trim()] : [result.stderr.trim()],
};
};

View File

@@ -0,0 +1,6 @@
export { checkK3sService } from "./k3s-service.js";
export { checkNodeReady } from "./node-ready.js";
export { checkApiHealth } from "./api-health.js";
export { checkSecretsEncryption } from "./secrets-encryption.js";
export { checkCiliumStatus } from "./cilium-status.js";
export { checkPodStatus } from "./pod-status.js";

View File

@@ -0,0 +1,9 @@
import type { Operation, OperationResult } from "../types.js";
import { isServiceActive } from "../utils.js";
export const checkK3sService: Operation = async (ctx): Promise<OperationResult> => {
const isServer = ctx.config.role === "infra" || ctx.config.role === "labcontroller";
const service = isServer ? "k3s" : "k3s-agent";
const active = await isServiceActive(ctx, service);
return { success: active, changed: false, message: active ? `${service} is active` : `${service} is not active` };
};

View File

@@ -0,0 +1,11 @@
import type { Operation, OperationResult } from "../types.js";
import { sshOpts } from "../utils.js";
export const checkNodeReady: Operation = async (ctx): Promise<OperationResult> => {
const result = await ctx.ssh.exec(
"k3s kubectl get nodes -o jsonpath='{.items[0].status.conditions[?(@.type==\"Ready\")].status}' 2>/dev/null",
sshOpts(ctx),
);
const ready = result.stdout.includes("True");
return { success: ready, changed: false, message: ready ? "Node is Ready" : "Node is NotReady" };
};

View File

@@ -0,0 +1,20 @@
import type { Operation, OperationResult } from "../types.js";
import { sshOpts } from "../utils.js";
export const checkPodStatus: Operation = async (ctx): Promise<OperationResult> => {
const result = await ctx.ssh.exec(
"k3s kubectl get pods -n kube-system --no-headers 2>/dev/null",
sshOpts(ctx),
);
const lines = result.stdout.trim().split("\n").filter(Boolean);
const notReady = lines.filter((l) => !l.includes("Running") && !l.includes("Completed"));
return {
success: notReady.length === 0,
changed: false,
message: notReady.length === 0
? `All ${lines.length} kube-system pods healthy`
: `${notReady.length} unhealthy pod(s)`,
...(notReady.length > 0 ? { details: notReady } : {}),
};
};

View File

@@ -0,0 +1,8 @@
import type { Operation, OperationResult } from "../types.js";
import { sshOpts } from "../utils.js";
export const checkSecretsEncryption: Operation = async (ctx): Promise<OperationResult> => {
const result = await ctx.ssh.exec("k3s secrets-encrypt status 2>/dev/null", sshOpts(ctx));
const enabled = result.stdout.includes("Enabled");
return { success: enabled, changed: false, message: enabled ? "Secrets encryption enabled" : "Secrets encryption not enabled" };
};

View File

@@ -0,0 +1,32 @@
// k3s module entry point.
// New operation-based module
export { K3sModule } from "./k3s-module.js";
// Types
export type {
K3sConfig,
OperationContext,
OperationResult,
Operation,
NamedOperation,
OperationGroup,
SshClient,
} from "./types.js";
// Utilities
export { runSequential, aggregateResults, writeRemoteFile, isServiceActive, checkCommand } from "./utils.js";
// Individual operations
export * from "./operations/index.js";
// Operation groups
export * from "./groups/index.js";
// Health checks
export * from "./health/index.js";
// DEPRECATED: Legacy bash script generators — remove after CLI migration
export { generateInstallScript, type K3sInstallContext } from "./install.js";
export { generateConfigureScript } from "./configure.js";
export { generateHealthScript, K3S_HEALTH_CHECKS, type HealthCheck } from "./health.js";

View File

@@ -0,0 +1,275 @@
// k3s module — install phase.
// Installs k3s with CIS-hardened configuration and Cilium CNI.
export interface K3sInstallContext {
hostname: string;
ip: string;
role: string; // "infra" = server, "worker" = agent
k3sServerUrl?: string; // Required for agent role
k3sToken?: string; // Required for agent role
}
/** Generate the shell script that installs k3s on a target machine. */
export function generateInstallScript(ctx: K3sInstallContext): string {
const isServer = ctx.role === "infra";
return `#!/bin/bash
set -euo pipefail
echo "=== k3s install: ${ctx.hostname} (${ctx.role}) ==="
# ── 1. Verify kernel prerequisites ──
echo "[1/10] Checking kernel modules..."
modprobe br_netfilter
modprobe overlay
modprobe ip_conntrack 2>/dev/null || true
cat > /etc/modules-load.d/k3s.conf << 'MODULES'
br_netfilter
overlay
ip_conntrack
MODULES
# ── 2. CIS-compliant sysctl ──
echo "[2/10] Setting kernel parameters..."
cat > /etc/sysctl.d/90-k3s-cis.conf << 'SYSCTL'
# k3s CIS hardening
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward = 1
vm.panic_on_oom = 0
vm.overcommit_memory = 1
kernel.panic = 10
kernel.panic_on_oops = 1
# inotify limits for large clusters
fs.inotify.max_user_instances = 524288
fs.inotify.max_user_watches = 524288
SYSCTL
sysctl --system > /dev/null
# ── 3. Disable swap (CIS requirement) ──
echo "[3/10] Disabling swap..."
swapoff -a || true
sed -i '/\\sswap\\s/d' /etc/fstab
# ── 4. Disable firewall permanently (k3s/Cilium manage iptables directly) ──
# CRITICAL: firewalld's nftables rules block pod-to-gateway traffic.
# Must survive reboot — use both disable and mask.
echo "[4/10] Disabling firewall..."
systemctl disable --now firewalld 2>/dev/null || true
systemctl mask firewalld 2>/dev/null || true
systemctl disable --now ufw 2>/dev/null || true
systemctl mask ufw 2>/dev/null || true
${isServer ? generateServerInstall(ctx) : generateAgentInstall(ctx)}
echo "=== k3s install complete ==="
`;
}
function generateServerInstall(ctx: K3sInstallContext): string {
return `# ── 5. Set SELinux permissive (Fedora: k3s-selinux RPM has GPG issues with dnf5) ──
echo "[5/10] Configuring SELinux..."
setenforce 0 2>/dev/null || true
sed -i 's/^SELINUX=enforcing/SELINUX=permissive/' /etc/selinux/config 2>/dev/null || true
# ── 5b. Create k3s config directory ──
echo "[5/10] Writing k3s server configuration..."
mkdir -p /etc/rancher/k3s
mkdir -p /var/log/kubernetes
cat > /etc/rancher/k3s/config.yaml << 'K3S_CONFIG'
# k3s server configuration — CIS hardened
protect-kernel-defaults: true
secrets-encryption: true
write-kubeconfig-mode: "0640"
# Disable default components (we use Cilium)
flannel-backend: none
disable-network-policy: true
disable:
- servicelb
- traefik
# API server hardening
kube-apiserver-arg:
- "anonymous-auth=false"
- "audit-log-path=/var/log/kubernetes/audit.log"
- "audit-log-maxage=30"
- "audit-log-maxbackup=10"
- "audit-log-maxsize=100"
- "audit-policy-file=/etc/rancher/k3s/audit-policy.yaml"
- "enable-admission-plugins=NodeRestriction,PodSecurity"
- "request-timeout=300s"
# Kubelet hardening
kubelet-arg:
- "protect-kernel-defaults=true"
- "streaming-connection-idle-timeout=5m"
- "make-iptables-util-chains=true"
# TLS SANs for remote access
tls-san:
- "${ctx.hostname}"
- "${ctx.ip}"
K3S_CONFIG
# ── 6. Write audit policy ──
echo "[6/10] Writing audit policy..."
cat > /etc/rancher/k3s/audit-policy.yaml << 'AUDIT_POLICY'
apiVersion: audit.k8s.io/v1
kind: Policy
rules:
# Log secret/configmap access at metadata level
- level: Metadata
resources:
- group: ""
resources: ["secrets", "configmaps"]
# Log pod/service mutations at request level
- level: RequestResponse
verbs: ["create", "update", "patch", "delete"]
resources:
- group: ""
resources: ["pods", "services", "deployments"]
# Skip noisy endpoints
- level: None
resources:
- group: ""
resources: ["endpoints", "events"]
users: ["system:kube-proxy", "system:apiserver"]
# Default: log everything else at metadata level
- level: Metadata
omitStages:
- "RequestReceived"
AUDIT_POLICY
# ── 6b. Pre-install cleanup: stop existing k3s and remove stale CNI state ──
# CRITICAL: flannel.1 vxlan uses port 8472 which conflicts with Cilium's vxlan.
# If we don't clean this up BEFORE starting k3s with flannel-backend=none + Cilium,
# Cilium will fail with "address already in use" and ALL pod creation will hang.
echo "[6b/10] Cleaning up previous CNI state..."
if systemctl is-active k3s >/dev/null 2>&1; then
echo " Stopping k3s before reconfiguration..."
systemctl stop k3s
sleep 3
fi
# Remove stale flannel interface (uses same vxlan port 8472 as Cilium)
if ip link show flannel.1 >/dev/null 2>&1; then
echo " Removing stale flannel.1 vxlan interface..."
ip link delete flannel.1 2>/dev/null || true
fi
# Remove stale Cilium interfaces from any previous install
for iface in cilium_vxlan cilium_host cilium_net; do
if ip link show "\$iface" >/dev/null 2>&1; then
echo " Removing stale \$iface interface..."
ip link delete "\$iface" 2>/dev/null || true
fi
done
# Remove any other vxlan on port 8472 (Cilium's port)
for iface in \$(ip -o link show type vxlan 2>/dev/null | awk -F': ' '{print \$2}'); do
if ip -d link show "\$iface" 2>/dev/null | grep -q 'dstport 8472'; then
echo " Removing conflicting vxlan interface: \$iface"
ip link delete "\$iface" 2>/dev/null || true
fi
done
# Clean old CNI config and state
rm -rf /etc/cni/net.d/* 2>/dev/null || true
rm -rf /var/lib/cni/ 2>/dev/null || true
echo " CNI state cleaned"
# ── 7. Install k3s server ──
echo "[7/10] Installing k3s server..."
curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC="server" INSTALL_K3S_SKIP_SELINUX_RPM=true sh -
# Force restart to pick up new config (installer may skip if binary unchanged)
echo " Restarting k3s to apply configuration..."
systemctl restart k3s
# ── 8. Wait for k3s API to be available (node will be NotReady until Cilium is installed) ──
echo "[8/10] Waiting for k3s API..."
for i in $(seq 1 60); do
if k3s kubectl get nodes 2>/dev/null; then
echo " API available after \${i}s"
break
fi
sleep 2
done
# ── 9. Install Cilium CNI (node becomes Ready after Cilium provides networking) ──
echo "[9/10] Installing Cilium CNI..."
CILIUM_CLI_VERSION=\$(curl -s https://raw.githubusercontent.com/cilium/cilium-cli/main/stable.txt)
ARCH=\$(uname -m)
case "\$ARCH" in
x86_64) CLI_ARCH="amd64" ;;
aarch64) CLI_ARCH="arm64" ;;
*) CLI_ARCH="\$ARCH" ;;
esac
curl -L --fail --silent \\
"https://github.com/cilium/cilium-cli/releases/download/\${CILIUM_CLI_VERSION}/cilium-linux-\${CLI_ARCH}.tar.gz" \\
| tar xz -C /usr/local/bin
# Detect the default route device (avoid picking up tailscale/wireguard interfaces)
DEFAULT_DEV=\$(ip -4 route show default | awk '{print \$5}' | head -1)
echo " Using network device: \$DEFAULT_DEV"
KUBECONFIG=/etc/rancher/k3s/k3s.yaml cilium install \\
--set kubeProxyReplacement=true \\
--set ipam.mode=kubernetes \\
--set devices="\$DEFAULT_DEV" \\
--set nodePort.directRoutingDevice="\$DEFAULT_DEV"
echo " Waiting for Cilium to become ready..."
KUBECONFIG=/etc/rancher/k3s/k3s.yaml cilium status --wait --wait-duration 300s || echo " Cilium wait timed out (may still be pulling images)"
# Wait for node to become Ready (now that Cilium provides CNI)
echo " Waiting for node Ready..."
k3s kubectl wait --for=condition=Ready node --all --timeout=120s || echo " Node not ready yet (Cilium may still be initializing)"
# ── 10. Apply Pod Security Standards ──
echo "[10/10] Applying Pod Security Standards..."
k3s kubectl label namespace default pod-security.kubernetes.io/enforce=restricted --overwrite
k3s kubectl label namespace default pod-security.kubernetes.io/warn=restricted --overwrite
k3s kubectl label namespace default pod-security.kubernetes.io/audit=restricted --overwrite
`;
}
function generateAgentInstall(ctx: K3sInstallContext): string {
if (!ctx.k3sServerUrl || !ctx.k3sToken) {
return `echo "ERROR: k3s agent requires --k3s-server-url and --k3s-token"
exit 1`;
}
return `# ── 5-10. Install k3s agent ──
echo "[5/10] Installing k3s agent..."
mkdir -p /etc/rancher/k3s
cat > /etc/rancher/k3s/config.yaml << 'K3S_CONFIG'
protect-kernel-defaults: true
kubelet-arg:
- "protect-kernel-defaults=true"
- "streaming-connection-idle-timeout=5m"
- "make-iptables-util-chains=true"
K3S_CONFIG
echo "[6/10] Joining cluster at ${ctx.k3sServerUrl}..."
curl -sfL https://get.k3s.io | \\
INSTALL_K3S_EXEC="agent" \\
K3S_URL="${ctx.k3sServerUrl}" \\
K3S_TOKEN="${ctx.k3sToken}" \\
sh -
echo "[7/10] Waiting for agent to connect..."
sleep 10
echo "[8/10] Verifying agent service..."
systemctl is-active k3s-agent
echo "[9/10] Agent joined successfully"
echo "[10/10] Done"
`;
}

View File

@@ -0,0 +1,112 @@
// K3sModule: implements the Module interface using typed operations.
// Orchestrates install/configure/health phases via operation groups.
import type { Module, ModuleMetadata, ModuleContext, ModuleResult } from "../../../src/types.js";
import type { OperationContext, K3sConfig, OperationResult } from "./types.js";
import { sshExec } from "../../../src/ssh.js";
import { aggregateResults } from "./utils.js";
import { runHostPrep } from "./groups/host-prep.js";
import { runK3sServer } from "./groups/k3s-server.js";
import { runK3sAgent } from "./groups/k3s-agent.js";
import { runNetworking } from "./groups/networking.js";
import { runHardening } from "./groups/hardening.js";
import { runSequential } from "./utils.js";
import * as health from "./health/index.js";
function toOpContext(ctx: ModuleContext): OperationContext {
const config: K3sConfig = {
hostname: ctx.hostname,
ip: ctx.ip,
role: ctx.role as K3sConfig["role"],
k3sServerUrl: ctx.config["k3sServerUrl"] as string | undefined,
k3sToken: ctx.config["k3sToken"] as string | undefined,
tlsSans: ctx.config["tlsSans"] as string[] | undefined,
};
return {
config,
ssh: {
exec: (cmd, opts) => sshExec(ctx.ip, ctx.sshUser, cmd, {
...opts,
...(ctx.sshKeyPath ? { keyPath: ctx.sshKeyPath } : {}),
}),
user: ctx.sshUser,
ip: ctx.ip,
keyPath: ctx.sshKeyPath,
},
os: ctx.os,
arch: ctx.arch,
log: (_msg) => { /* collected via results */ },
};
}
function toModuleResult(phase: ModuleResult["phase"], results: OperationResult[], startMs: number): ModuleResult {
const agg = aggregateResults(results);
return {
success: agg.success,
phase,
duration: Math.round(performance.now() - startMs),
output: agg.details ?? [agg.message],
errors: agg.error ? [agg.error] : [],
};
}
export class K3sModule implements Module {
readonly metadata: ModuleMetadata = {
name: "k3s",
version: "1.0.0",
description: "CIS-hardened k3s with Cilium CNI",
targets: { roles: ["infra", "worker", "labcontroller"] },
dependencies: [],
};
async install(ctx: ModuleContext): Promise<ModuleResult> {
const start = performance.now();
const opCtx = toOpContext(ctx);
const isServer = ctx.role === "infra" || ctx.role === "labcontroller";
// Phase 1: Host preparation
const prepResults = await runHostPrep(opCtx);
if (prepResults.some((r) => !r.success)) {
return toModuleResult("install", prepResults, start);
}
// Phase 2: K3s install (server or agent)
const k3sResults = isServer
? await runK3sServer(opCtx)
: await runK3sAgent(opCtx);
if (k3sResults.some((r) => !r.success)) {
return toModuleResult("install", [...prepResults, ...k3sResults], start);
}
// Phase 3: Networking (server only — agents don't install Cilium)
let netResults: OperationResult[] = [];
if (isServer) {
netResults = await runNetworking(opCtx);
}
return toModuleResult("install", [...prepResults, ...k3sResults, ...netResults], start);
}
async configure(ctx: ModuleContext): Promise<ModuleResult> {
const start = performance.now();
const opCtx = toOpContext(ctx);
const results = await runHardening(opCtx);
return toModuleResult("configure", results, start);
}
async health(ctx: ModuleContext): Promise<ModuleResult> {
const start = performance.now();
const opCtx = toOpContext(ctx);
const checks = await runSequential(opCtx, [
{ name: "K3s service", fn: health.checkK3sService },
{ name: "Node ready", fn: health.checkNodeReady },
{ name: "API health", fn: health.checkApiHealth },
{ name: "Secrets encryption", fn: health.checkSecretsEncryption },
{ name: "Cilium status", fn: health.checkCiliumStatus },
{ name: "Pod status", fn: health.checkPodStatus },
]);
return toModuleResult("health", checks, start);
}
}

Some files were not shown because too many files have changed in this diff Show More