feat: PXE debug boot mode for rescue/diagnostics #4

Merged
michal merged 16 commits from wip/ks-debugging into main 2026-03-30 02:59:35 +00:00
18 changed files with 368 additions and 59 deletions
Showing only changes of commit e87edfcfbd - Show all commits

View File

@@ -29,43 +29,46 @@ _labctl() {
COMPREPLY=($(compgen -W "--dir -h --help" -- "$cur"))
return ;;
"init bastion standalone status")
COMPREPLY=($(compgen -W "--dir --port -h --help" -- "$cur"))
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
return ;;
"init bastion standalone")
COMPREPLY=($(compgen -W "start stop status -h --help" -- "$cur"))
return ;;
"app labcontroller deploy")
COMPREPLY=($(compgen -W "--user --port --crdb-replicas -h --help" -- "$cur"))
COMPREPLY=($(compgen -W "--user --crdb-replicas -h --help" -- "$cur"))
return ;;
"app labcontroller status")
COMPREPLY=($(compgen -W "--user --port -h --help" -- "$cur"))
COMPREPLY=($(compgen -W "--user -h --help" -- "$cur"))
return ;;
"app k3s install")
COMPREPLY=($(compgen -W "--role --user --port --k3s-server --k3s-token -h --help" -- "$cur"))
COMPREPLY=($(compgen -W "--role --user --k3s-server --k3s-token -h --help" -- "$cur"))
return ;;
"app k3s health")
COMPREPLY=($(compgen -W "--user --port -h --help" -- "$cur"))
COMPREPLY=($(compgen -W "--user -h --help" -- "$cur"))
return ;;
"app k3s list")
COMPREPLY=($(compgen -W "--user --port -h --help" -- "$cur"))
COMPREPLY=($(compgen -W "--user -h --help" -- "$cur"))
return ;;
"init bastion")
COMPREPLY=($(compgen -W "standalone -h --help" -- "$cur"))
return ;;
"provision list")
COMPREPLY=($(compgen -W "--port -h --help" -- "$cur"))
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
return ;;
"provision install")
COMPREPLY=($(compgen -W "--role --os --disk --port -h --help" -- "$cur"))
COMPREPLY=($(compgen -W "--role --os --disk -h --help" -- "$cur"))
return ;;
"provision reprovision")
COMPREPLY=($(compgen -W "--role --os --disk --port -h --help" -- "$cur"))
COMPREPLY=($(compgen -W "--role --os --disk -h --help" -- "$cur"))
return ;;
"provision forget")
COMPREPLY=($(compgen -W "--port -h --help" -- "$cur"))
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
return ;;
"provision logs")
COMPREPLY=($(compgen -W "-f --follow --port -h --help" -- "$cur"))
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
return ;;
"provision makeiso")
COMPREPLY=($(compgen -W "--arch --local --out -h --help" -- "$cur"))
return ;;
"config list")
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
@@ -92,7 +95,7 @@ _labctl() {
COMPREPLY=($(compgen -W "bastion -h --help" -- "$cur"))
return ;;
"provision")
COMPREPLY=($(compgen -W "list install reprovision forget logs -h --help" -- "$cur"))
COMPREPLY=($(compgen -W "list install reprovision forget logs makeiso -h --help" -- "$cur"))
return ;;
"config")
COMPREPLY=($(compgen -W "list get set path -h --help" -- "$cur"))

View File

@@ -118,38 +118,28 @@ complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l foregro
# init bastion standalone stop options
complete -c labctl -n "__labctl_in_cmd init bastion standalone stop" -l dir -d 'Bastion data directory' -x
# init bastion standalone status options
complete -c labctl -n "__labctl_in_cmd init bastion standalone status" -l dir -d 'Bastion data directory' -x
complete -c labctl -n "__labctl_in_cmd init bastion standalone status" -l port -d 'Bastion HTTP port' -x
# provision subcommands
complete -c labctl -n "__labctl_using_cmd provision" -a list -d 'List all known machines'
complete -c labctl -n "__labctl_using_cmd provision" -a install -d 'Queue a discovered machine for OS installation'
complete -c labctl -n "__labctl_using_cmd provision" -a reprovision -d 'Queue install + SSH reboot into PXE (target: hostname, MAC, or IP)'
complete -c labctl -n "__labctl_using_cmd provision" -a forget -d 'Remove a machine from bastion state'
complete -c labctl -n "__labctl_using_cmd provision" -a logs -d 'Show provisioning logs for a machine (hostname, MAC, or IP)'
# provision list options
complete -c labctl -n "__labctl_in_cmd provision list" -l port -d 'Bastion HTTP port' -x
complete -c labctl -n "__labctl_using_cmd provision" -a makeiso -d 'Generate a UEFI-bootable iPXE ISO for network provisioning'
# provision install options
complete -c labctl -n "__labctl_in_cmd provision install" -l role -d 'Machine role (see below)' -xa 'vanilla worker infra labcontroller'
complete -c labctl -n "__labctl_in_cmd provision install" -l os -d 'Operating system' -xa 'fedora-43 ubuntu-26.04'
complete -c labctl -n "__labctl_in_cmd provision install" -l disk -d 'Target disk device (auto-detect if omitted)' -x
complete -c labctl -n "__labctl_in_cmd provision install" -l port -d 'Bastion HTTP port' -x
# provision reprovision options
complete -c labctl -n "__labctl_in_cmd provision reprovision" -l role -d 'Machine role (see below)' -xa 'vanilla worker infra labcontroller'
complete -c labctl -n "__labctl_in_cmd provision reprovision" -l os -d 'Operating system' -xa 'fedora-43 ubuntu-26.04'
complete -c labctl -n "__labctl_in_cmd provision reprovision" -l disk -d 'Target disk device (auto-detect if omitted)' -x
complete -c labctl -n "__labctl_in_cmd provision reprovision" -l port -d 'Bastion HTTP port' -x
# provision forget options
complete -c labctl -n "__labctl_in_cmd provision forget" -l port -d 'Bastion HTTP port' -x
# provision logs options
complete -c labctl -n "__labctl_in_cmd provision logs" -s f -l follow -d 'Follow logs in real-time (SSE stream)'
complete -c labctl -n "__labctl_in_cmd provision logs" -l port -d 'Bastion HTTP port' -x
# provision makeiso options
complete -c labctl -n "__labctl_in_cmd provision makeiso" -l arch -d 'Target architecture(s)' -xa 'x86_64 aarch64'
complete -c labctl -n "__labctl_in_cmd provision makeiso" -l local -d 'Build ISO locally instead of using bastion-hosted URL'
complete -c labctl -n "__labctl_in_cmd provision makeiso" -l out -d 'Output path for local ISO build' -x
# config subcommands
complete -c labctl -n "__labctl_using_cmd config" -a list -d 'Show all configuration values'
@@ -173,12 +163,10 @@ complete -c labctl -n "__labctl_using_cmd app labcontroller" -a status -d 'Check
# app labcontroller deploy options
complete -c labctl -n "__labctl_in_cmd app labcontroller deploy" -l user -d 'SSH user' -x
complete -c labctl -n "__labctl_in_cmd app labcontroller deploy" -l port -d 'Bastion HTTP port' -x
complete -c labctl -n "__labctl_in_cmd app labcontroller deploy" -l crdb-replicas -d 'CockroachDB replicas' -x
# app labcontroller status options
complete -c labctl -n "__labctl_in_cmd app labcontroller status" -l user -d 'SSH user' -x
complete -c labctl -n "__labctl_in_cmd app labcontroller status" -l port -d 'Bastion HTTP port' -x
# app k3s subcommands
complete -c labctl -n "__labctl_using_cmd app k3s" -a install -d 'Install k3s on a target machine (hostname, IP, or MAC)'
@@ -188,15 +176,12 @@ complete -c labctl -n "__labctl_using_cmd app k3s" -a list -d 'List installed ma
# app k3s install options
complete -c labctl -n "__labctl_in_cmd app k3s install" -l role -d 'k3s role: infra (server) or worker (agent)' -x
complete -c labctl -n "__labctl_in_cmd app k3s install" -l user -d 'SSH user' -x
complete -c labctl -n "__labctl_in_cmd app k3s install" -l port -d 'Bastion HTTP port (for resolving target)' -x
complete -c labctl -n "__labctl_in_cmd app k3s install" -l k3s-server -d 'k3s server URL (required for worker role)' -x
complete -c labctl -n "__labctl_in_cmd app k3s install" -l k3s-token -d 'k3s join token (required for worker role)' -x
# app k3s health options
complete -c labctl -n "__labctl_in_cmd app k3s health" -l user -d 'SSH user' -x
complete -c labctl -n "__labctl_in_cmd app k3s health" -l port -d 'Bastion HTTP port' -x
# app k3s list options
complete -c labctl -n "__labctl_in_cmd app k3s list" -l user -d 'SSH user' -x
complete -c labctl -n "__labctl_in_cmd app k3s list" -l port -d 'Bastion HTTP port' -x

View File

@@ -266,6 +266,21 @@ export async function startBastion(overrides: Partial<BastionConfig> = {}): Prom
return { status: "ok", data: { mac: msg.mac, hostname: msg.hostname } };
});
labdConn.onCommand("command-debug", async (msg) => {
if (msg.type !== "command-debug") throw new Error("unexpected");
const mac = msg.mac.toLowerCase();
const currentState = state.load();
const hostname =
currentState.installed[mac]?.hostname ??
currentState.install_queue[mac]?.hostname ??
currentState.discovered[mac]?.product ??
mac;
state.update((s) => {
s.debug[mac] = { hostname, queued_at: new Date().toISOString() };
});
return { status: "ok", data: { mac, hostname } };
});
labdConn.onCommand("command-forget", async (msg) => {
if (msg.type !== "command-forget") throw new Error("unexpected");
const mac = msg.mac.toLowerCase();
@@ -273,6 +288,7 @@ export async function startBastion(overrides: Partial<BastionConfig> = {}): Prom
delete s.discovered[mac];
delete s.install_queue[mac];
delete s.installed[mac];
delete s.debug[mac];
});
return { status: "ok", data: { mac } };
});

View File

@@ -189,6 +189,31 @@ export function registerApiRoutes(
return reply.send({ status: "ok", lines: allLines.length });
});
// Queue debug/rescue mode for a machine
app.post<{
Body: { mac?: string };
}>("/api/debug", async (request, reply) => {
const mac = (request.body?.mac ?? "").toLowerCase().replace(/-/g, ":");
if (mac === "") {
return reply.status(400).send({ error: "mac is required" });
}
// Look up hostname from installed or discovered state
const currentState = state.load();
const hostname =
currentState.installed[mac]?.hostname ??
currentState.install_queue[mac]?.hostname ??
currentState.discovered[mac]?.product ??
mac;
state.update((s) => {
s.debug[mac] = { hostname, queued_at: new Date().toISOString() };
});
logger.info(`DEBUG QUEUED: ${mac} -> ${hostname}`);
return reply.send({ status: "ok", mac, hostname });
});
// Delete a machine from all state
app.delete<{
Params: { mac: string };
@@ -213,6 +238,10 @@ export function registerApiRoutes(
delete s.installed[mac];
found = true;
}
if (s.debug[mac] !== undefined) {
delete s.debug[mac];
found = true;
}
});
if (!found) {

View File

@@ -10,9 +10,11 @@ import type { StateManager } from "../services/state.js";
import {
renderDiscoverIpxe,
renderInstallIpxe,
renderDebugIpxe,
renderLocalBootIpxe,
} from "../templates/boot.ipxe.js";
import { renderUbuntuInstallIpxe } from "../templates/ubuntu-boot.ipxe.js";
import { renderDebugKickstart } from "../templates/debug.ks.js";
import { logger } from "../services/logger.js";
export function registerDispatchRoutes(
@@ -20,10 +22,34 @@ export function registerDispatchRoutes(
config: BastionConfig,
state: StateManager,
): void {
// Serve debug/rescue kickstart (minimal: SSH keys + network)
app.get<{ Querystring: { mac?: string } }>("/debug.ks", async (_request, reply) => {
const ks = renderDebugKickstart({ sshKeys: config.sshKeys ?? [] });
return reply.type("text/plain").send(ks);
});
app.get<{ Querystring: { mac?: string } }>("/dispatch", async (request, reply) => {
const mac = (request.query.mac ?? "").toLowerCase().replace(/-/g, ":");
const currentState = state.load();
// Debug mode takes highest priority — auto-clear after serving once
const debugEntry = currentState.debug[mac];
if (debugEntry) {
const hostname = debugEntry.hostname ?? "debug";
logger.info(`DEBUG BOOT: ${mac} -> ${hostname} (rescue mode)`);
state.update((s) => { delete s.debug[mac]; });
const script = renderDebugIpxe({
mac,
hostname,
serverIp: config.serverIp,
httpPort: config.httpPort,
fedoraMirror: config.fedoraMirror,
});
return reply.type("text/plain").send(script);
}
const queueEntry = currentState.install_queue[mac];
if (queueEntry) {
const hostname = queueEntry.hostname ?? "lab-node";

View File

@@ -11,6 +11,7 @@ const EMPTY_STATE: BastionState = {
discovered: {},
install_queue: {},
installed: {},
debug: {},
};
export type StateChangeListener = (state: BastionState) => void;
@@ -33,6 +34,7 @@ export class StateManager {
discovered: parsed.discovered ?? {},
install_queue: parsed.install_queue ?? {},
installed: parsed.installed ?? {},
debug: parsed.debug ?? {},
};
} catch {
return { ...EMPTY_STATE };

View File

@@ -75,6 +75,33 @@ boot
`;
}
/**
* iPXE script for debug/rescue mode -- boots Fedora installer in rescue mode.
* Provides a shell with LVM tools, network, and SSH for inspecting installed systems.
*/
export function renderDebugIpxe(params: {
mac: string;
hostname: string;
serverIp: string;
httpPort: number;
fedoraMirror: string;
}): string {
return `#!ipxe
echo
echo =============================================
echo Lab PXE Bastion - DEBUG/RESCUE MODE
echo Target: ${params.hostname}
echo MAC: ${params.mac}
echo =============================================
echo
kernel http://${params.serverIp}:${params.httpPort}/vmlinuz inst.rescue inst.text inst.sshd inst.ks=http://${params.serverIp}:${params.httpPort}/debug.ks?mac=${params.mac} inst.stage2=${params.fedoraMirror}
initrd http://${params.serverIp}:${params.httpPort}/initrd.img
boot
`;
}
/**
* iPXE script for already-installed machines -- exits to boot from local disk.
*/

View File

@@ -0,0 +1,25 @@
// Debug/rescue kickstart template.
// Minimal: sets SSH access and network for Anaconda rescue mode.
// No disk operations, no packages, no %post.
export interface DebugKickstartParams {
sshKeys: string[];
}
export function renderDebugKickstart(params: DebugKickstartParams): string {
const sshpw = "sshpw --username=root --plaintext lab-root-pw";
const sshkeyLine = params.sshKeys.length > 0
? `sshkey --username=root "${params.sshKeys[0]}"`
: "";
return `# Lab Bastion -- Debug/Rescue Kickstart
# Minimal: only SSH + network for Anaconda rescue mode
lang en_US.UTF-8
keyboard uk
network --bootproto=dhcp --activate
${sshpw}
${sshkeyLine}
`;
}

View File

@@ -322,39 +322,20 @@ bastion_progress() {
-d "{\\"mac\\":\\"$mac\\",\\"stage\\":\\"$stage\\",\\"detail\\":\\"$detail\\"}" 2>/dev/null || true
}
# Send log lines to bastion
bastion_log() {
local line="$1"
local mac=$(ip link show | awk '/ether/ && !/00:00:00:00/ {print $2; exit}')
curl -sf -X POST "http://${serverIp}:${httpPort}/api/log" \\
-H "Content-Type: application/json" \\
-d "{\\"mac\\":\\"$mac\\",\\"line\\":\\"$(echo "$line" | sed 's/\\\\/\\\\\\\\/g; s/"/\\\\"/g')\\"}\" \\
--connect-timeout 5 --max-time 10 2>/dev/null || true
}
# Send an error stage to bastion
bastion_error() {
local detail="$1"
bastion_progress "error" "$detail"
}
# --- Error trap: catch any failure and report to bastion ---
_post_error_handler() {
local exit_code=$? lineno=$1
bastion_error "%post failed at line $lineno (exit $exit_code)"
}
trap '_post_error_handler $LINENO' ERR
bastion_progress "post-install" "configuring system"
# -- SSH --
systemctl enable --now sshd
# Note: only 'enable', not '--now' — systemd is not running in the Anaconda chroot
systemctl enable sshd || true
sed -i 's/^#\\?PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config
sed -i 's/^#\\?PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
${sshPostBlock}
# -- Hostname and domain --
hostnamectl set-hostname ${fqdn}
bastion_progress "post-install" "1-ssh done"
# -- Hostname and domain (write directly, hostnamectl needs D-Bus) --
echo "${fqdn}" > /etc/hostname
# -- tmpfs for /tmp --
echo "tmpfs /tmp tmpfs defaults,noatime,nosuid,nodev,size=4G 0 0" >> /etc/fstab
@@ -392,12 +373,15 @@ SYSCTL
sysctl --system || true
# -- Disable firewalld permanently (k3s/Cilium manage iptables directly) --
systemctl disable --now firewalld || true
# Note: no '--now' — systemd is not running in the Anaconda chroot
systemctl disable firewalld || true
systemctl mask firewalld || true
# -- Enable chronyd for time sync --
systemctl enable chronyd || true`}
bastion_progress "post-install" "2-system done"
# -- Boot order: restore network first (Anaconda sets disk first, we undo it) --
# Network boot must stay first so the bastion intercepts every reboot.
if command -v efibootmgr >/dev/null 2>&1; then
@@ -410,6 +394,8 @@ if command -v efibootmgr >/dev/null 2>&1; then
fi
fi
bastion_progress "post-install" "3-bootorder done"
# -- Provisioning metadata --
cat > /etc/lab-provisioned << PROVEOF
hostname: ${fqdn}
@@ -435,6 +421,8 @@ README
${hasRancher ? `# Install k3s server (skip start - will be configured manually)
curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true sh -
` : ""}
bastion_progress "post-install" "4-metadata done"
IP_ADDR=$(ip -4 addr show | awk '/inet / && !/127.0.0/ {split($2,a,"/"); print a[1]; exit}')
bastion_progress "complete" "ready at $IP_ADDR"

View File

@@ -26,6 +26,7 @@ describe("StateManager", () => {
discovered: {},
install_queue: {},
installed: {},
debug: {},
});
});
@@ -39,6 +40,7 @@ describe("StateManager", () => {
discovered: {},
install_queue: {},
installed: {},
debug: {},
});
});

View File

@@ -94,6 +94,10 @@ export class LabdClient {
return this.request("POST", "/api/machines/install", { body: opts });
}
async debugMachine(mac: string): Promise<{ status: string; data?: { mac: string; hostname: string }; error?: string }> {
return this.request("POST", "/api/machines/debug", { body: { mac } });
}
async forgetMachine(mac: string): Promise<{ status: string }> {
return this.request("DELETE", `/api/machines/${encodeURIComponent(mac)}`);
}

View File

@@ -0,0 +1,153 @@
// CLI command: provision debug
// Queue a machine for debug/rescue PXE boot and optionally SSH reboot into PXE.
import { execFileSync } from "node:child_process";
import { existsSync } from "node:fs";
import { homedir } from "node:os";
import { join } from "node:path";
import { Command } from "commander";
import type { BastionState } from "@lab/shared";
import { getLabdClient } from "../api/config.js";
/** Resolve a target (hostname, MAC, or IP) to {mac, hostname, ip} from state. */
function resolveTarget(
target: string,
state: BastionState,
): { mac: string; hostname: string; ip: string } | null {
const normalized = target.toLowerCase().replace(/-/g, ":");
if (state.installed[normalized]) {
const info = state.installed[normalized];
return { mac: normalized, hostname: info.hostname, ip: info.ip };
}
if (state.discovered[normalized]) {
return { mac: normalized, hostname: normalized, ip: "" };
}
if (state.install_queue[normalized]) {
return { mac: normalized, hostname: state.install_queue[normalized].hostname, ip: "" };
}
for (const [mac, info] of Object.entries(state.installed)) {
if (info.hostname === target || info.hostname.startsWith(target + ".")) {
return { mac, hostname: info.hostname, ip: info.ip };
}
}
for (const [mac, info] of Object.entries(state.installed)) {
if (info.ip === target) {
return { mac, hostname: info.hostname, ip: info.ip };
}
}
return null;
}
export function registerDebugCommand(parent: Command): void {
parent
.command("debug <target>")
.description("PXE boot into Fedora rescue mode for debugging (target: hostname, MAC, or IP)")
.showHelpAfterError(true)
.action(async (target: string) => {
const client = getLabdClient();
// Resolve target from labd aggregated state
let state: BastionState;
try {
state = await client.getMachines();
} catch (err) {
console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`);
process.exit(1);
}
const resolved = resolveTarget(target, state);
if (!resolved) {
console.error(`Cannot find machine: ${target}`);
console.error("Provide a hostname, MAC, or IP of a known machine.");
console.error("Run 'labctl provision list' to see available machines.");
process.exit(1);
}
const { mac, hostname, ip } = resolved;
console.log(`Queuing debug mode for ${hostname} (${mac})...`);
try {
const result = await client.debugMachine(mac);
if (result.error) {
console.error(`Failed: ${result.error}`);
process.exit(1);
}
} catch (err) {
console.error(`Failed to queue debug: ${err instanceof Error ? err.message : String(err)}`);
process.exit(1);
}
// Try SSH reboot into PXE
if (ip !== "") {
const adminUser = process.env["SUDO_USER"] ?? process.env["USER"] ?? "";
const effectiveUser = adminUser === "root" ? "" : adminUser;
if (effectiveUser !== "") {
console.log(`\nAttempting SSH reboot into PXE (${effectiveUser}@${ip})...`);
const sudoUser = process.env["SUDO_USER"];
const realHome = sudoUser !== undefined ? join("/home", sudoUser) : homedir();
const keyPaths = [
join(realHome, ".ssh", "id_ed25519"),
join(realHome, ".ssh", "id_rsa"),
join(realHome, ".ssh", "id_ecdsa"),
];
const sshKey = keyPaths.find(k => existsSync(k));
const sshArgs = [
"-o", "StrictHostKeyChecking=no",
"-o", "ConnectTimeout=10",
...(sshKey !== undefined ? ["-i", sshKey] : []),
`${effectiveUser}@${ip}`,
'PXE_ENTRY=$(sudo efibootmgr | grep -iE "pxe|network|ipv4" | head -1 | grep -oP "Boot\\K[0-9A-F]+"); if [ -n "$PXE_ENTRY" ]; then sudo efibootmgr --bootnext "$PXE_ENTRY" && echo "PXE set as next boot" && sudo reboot; else echo "No PXE boot entry found, rebooting anyway..." && sudo reboot; fi',
];
try {
execFileSync("ssh", sshArgs, { stdio: "inherit" });
} catch {
// SSH connection closing during reboot is expected
}
}
}
console.log(`
Debug mode queued for ${hostname} (${mac}).
Reboot the machine to enter Fedora rescue mode.
Once in rescue shell:
# Activate LVM
vgchange -ay labvg
# Mount root + other volumes
mkdir -p /mnt/sysroot
mount /dev/labvg/root /mnt/sysroot
cat /mnt/sysroot/etc/fstab # check what else to mount
mount /dev/labvg/var /mnt/sysroot/var
mount /dev/labvg/home /mnt/sysroot/home
# Boot the installed system in a container
/mnt/sysroot/usr/bin/systemd-nspawn -D /mnt/sysroot --boot
# Or just chroot for quick fixes
mount --bind /dev /mnt/sysroot/dev
mount --bind /proc /mnt/sysroot/proc
mount --bind /sys /mnt/sysroot/sys
chroot /mnt/sysroot
# Check initramfs size
ls -lh /mnt/sysroot/boot/initramfs-*.img
# Rebuild initramfs without amdgpu
chroot /mnt/sysroot
echo 'omit_drivers+=" amdgpu "' > /etc/dracut.conf.d/omit-amdgpu.conf
dracut -f --regenerate-all
`);
});
}

View File

@@ -14,6 +14,7 @@ import { registerStatusCommand } from "./commands/status.js";
import { registerInstallCommand } from "./commands/install.js";
import { registerListCommand } from "./commands/list.js";
import { registerReprovisionCommand } from "./commands/reprovision.js";
import { registerDebugCommand } from "./commands/debug.js";
import { registerForgetCommand } from "./commands/forget.js";
import { registerLogsCommand } from "./commands/logs.js";
import { registerMakeIsoCommand } from "./commands/makeiso.js";
@@ -95,6 +96,7 @@ export function createProgram(): Command {
registerListCommand(provisionCmd);
registerInstallCommand(provisionCmd);
registerReprovisionCommand(provisionCmd);
registerDebugCommand(provisionCmd);
registerForgetCommand(provisionCmd);
registerLogsCommand(provisionCmd);
registerMakeIsoCommand(provisionCmd);

View File

@@ -172,6 +172,40 @@ export function registerBastionRoutes(app: FastifyInstance, db: DbClient): void
}
});
// Queue debug/rescue mode — route to correct bastion by MAC
app.post<{
Body: { mac?: string };
}>("/api/machines/debug", async (request, reply) => {
const mac = (request.body?.mac ?? "").toLowerCase().replace(/-/g, ":");
if (!mac) {
return reply.code(400).send({ error: "mac is required" });
}
const bastion = bastionRegistry.findBastionByMac(mac);
if (!bastion) {
const all = bastionRegistry.getAll();
if (all.length === 0) {
return reply.code(503).send({ error: "No bastions connected" });
}
if (all.length === 1) {
try {
const result = await sendCommand(all[0]!.bastionId, { type: "command-debug", mac });
return reply.code(result.status === "ok" ? 200 : 500).send(result);
} catch (err) {
return reply.code(500).send({ error: err instanceof Error ? err.message : String(err) });
}
}
return reply.code(404).send({ error: `MAC ${mac} not found on any bastion` });
}
try {
const result = await sendCommand(bastion.bastionId, { type: "command-debug", mac });
return reply.code(result.status === "ok" ? 200 : 500).send(result);
} catch (err) {
return reply.code(500).send({ error: err instanceof Error ? err.message : String(err) });
}
});
// Forget machine
app.delete<{ Params: { mac: string } }>("/api/machines/:mac", async (request, reply) => {
const mac = request.params.mac.toLowerCase().replace(/-/g, ":");

View File

@@ -3,7 +3,7 @@
import { EventEmitter } from "node:events";
import type { WebSocket } from "ws";
import type { BastionState, HardwareInfo, InstallConfig, InstalledInfo } from "@lab/shared";
import type { BastionState, HardwareInfo, InstallConfig, InstalledInfo, DebugConfig } from "@lab/shared";
export interface ConnectedBastion {
bastionId: string;
@@ -20,6 +20,7 @@ export interface AggregatedState {
discovered: Record<string, HardwareInfo>;
install_queue: Record<string, InstallConfig>;
installed: Record<string, InstalledInfo>;
debug: Record<string, DebugConfig>;
}
export class BastionRegistry extends EventEmitter {
@@ -86,6 +87,7 @@ export class BastionRegistry extends EventEmitter {
discovered: {},
install_queue: {},
installed: {},
debug: {},
};
for (const bastion of this.bastions.values()) {
@@ -98,6 +100,9 @@ export class BastionRegistry extends EventEmitter {
for (const [mac, info] of Object.entries(bastion.state.installed)) {
result.installed[mac] = { ...info, bastionId: bastion.bastionId };
}
for (const [mac, dbg] of Object.entries(bastion.state.debug ?? {})) {
result.debug[mac] = { ...dbg };
}
}
return result;

View File

@@ -5,6 +5,7 @@ export type {
HardwareInfo,
InstallConfig,
InstalledInfo,
DebugConfig,
BastionState,
BastionConfig,
} from "./types/index.js";

View File

@@ -5,6 +5,7 @@ export type {
HardwareInfo,
InstallConfig,
InstalledInfo,
DebugConfig,
BastionState,
} from "./state.js";

View File

@@ -98,8 +98,14 @@ export interface InstalledInfo {
bastionId?: string; // set when aggregated through labd
}
export interface DebugConfig {
hostname: string;
queued_at: string;
}
export interface BastionState {
discovered: Record<string, HardwareInfo>;
install_queue: Record<string, InstallConfig>;
installed: Record<string, InstalledInfo>;
debug: Record<string, DebugConfig>;
}