feat: provision recheck, hardware info preservation, ISO boot fixes
Some checks failed
CI/CD / lint (pull_request) Failing after 1m26s
CI/CD / typecheck (pull_request) Failing after 11s
CI/CD / test (pull_request) Failing after 11s
CI/CD / build (pull_request) Has been skipped
CI/CD / publish-rpm (pull_request) Has been skipped
CI/CD / publish-deb (pull_request) Has been skipped

- Add `labctl provision recheck` to refresh hardware info via SSH
- Preserve hardware info in InstalledInfo when install completes
- Fix /ks-auto: run nested %pre scripts from included kickstarts
- Add command-discover WebSocket routing for hw info updates
- Fix k3s join: clean stale TLS/cred when joining existing cluster
- Add --tls-verify=false for internal HTTP registry pushes
- Add fix-ssh-root.sh script for root SSH access on all nodes

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Michal
2026-04-01 17:59:39 +01:00
parent ae91f2895e
commit 9ddab24931
17 changed files with 368 additions and 12 deletions

View File

@@ -309,6 +309,32 @@ export async function startBastion(overrides: Partial<BastionConfig> = {}): Prom
return { status: "ok", data: { mac, hostname: msg.hostname } };
});
labdConn.onCommand("command-discover", async (msg) => {
if (msg.type !== "command-discover") throw new Error("unexpected");
const mac = (msg.mac as string).toLowerCase();
const now = new Date().toISOString();
const existing = state.load().discovered[mac];
state.update((s) => {
s.discovered[mac] = {
mac,
product: (msg.product as string) ?? "unknown",
board: (msg.board as string) ?? "unknown",
serial: (msg.serial as string) ?? "unknown",
manufacturer: (msg.manufacturer as string) ?? "unknown",
cpu_model: (msg.cpu_model as string) ?? "unknown",
cpu_cores: (msg.cpu_cores as number) ?? 0,
memory_gb: (msg.memory_gb as number) ?? 0,
arch: (msg.arch as string) ?? "unknown",
disks: (msg.disks as Array<{ name: string; size_gb: number; model: string }>) ?? [],
nics: (msg.nics as Array<{ name: string; mac: string; state: string }>) ?? [],
first_seen: existing?.first_seen ?? now,
last_seen: now,
};
});
logger.info(`HARDWARE UPDATED: ${mac} -- ${msg.manufacturer ?? "?"} ${msg.product ?? "?"} (${msg.cpu_model ?? "?"}, ${msg.cpu_cores ?? "?"} cores, ${msg.memory_gb ?? "?"}GB RAM)`);
return { status: "ok", data: { mac } };
});
labdConn.onCommand("command-role-update", async (msg) => {
if (msg.type !== "command-role-update") throw new Error("unexpected");
const mac = msg.mac.toLowerCase();

View File

@@ -139,12 +139,22 @@ export function registerApiRoutes(
? detailStr.replace("ready at ", "").trim()
: "";
const hw = s.discovered[mac];
const installedInfo: InstalledInfo = {
hostname: cfg?.hostname ?? "?",
role: cfg?.role ?? "?",
...(cfg?.os !== undefined ? { os: cfg.os } : {}),
ip,
installed_at: new Date().toISOString(),
// Preserve hardware info from discovery
...(hw ? {
product: hw.product,
manufacturer: hw.manufacturer,
cpu_model: hw.cpu_model,
cpu_cores: hw.cpu_cores,
memory_gb: hw.memory_gb,
arch: hw.arch,
} : {}),
};
s.installed[mac] = installedInfo;

View File

@@ -83,6 +83,20 @@ case "$STATE" in
echo "ERROR: Failed to download install kickstart"
exit 1
fi
# Run any %pre scripts from the downloaded kickstart.
# Anaconda only runs %pre from the top-level file, not from %include'd files.
python3 -c "
import re, subprocess
content = open('/tmp/dynamic.ks').read()
blocks = re.findall(r'%pre[^\\n]*\\n(.*?)%end', content, re.DOTALL)
for i, script in enumerate(blocks):
path = f'/tmp/inner-pre-{i}.sh'
with open(path, 'w') as f:
f.write(script)
print(f'Running inner %pre script {i} ({len(script.splitlines())} lines)')
subprocess.run(['bash', path], check=False)
"
;;
debug)

View File

@@ -166,6 +166,7 @@ export class BastionConnection {
case "command-role-update":
case "command-debug":
case "command-register":
case "command-discover":
void this.handleCommand(msg);
break;
}

View File

@@ -104,6 +104,16 @@ export class LabdClient {
return this.request("POST", "/api/machines/debug", { body: { mac, pxeBoot: opts?.pxeBoot } });
}
async discoverMachine(data: {
mac: string; product?: string; board?: string; serial?: string;
manufacturer?: string; cpu_model?: string; cpu_cores?: number;
memory_gb?: number; arch?: string;
disks?: Array<{ name: string; size_gb: number; model: string }>;
nics?: Array<{ name: string; mac: string; state: string }>;
}): Promise<{ status: string; error?: string }> {
return this.request("POST", "/api/machines/discover", { body: data });
}
async forgetMachine(mac: string): Promise<{ status: string }> {
return this.request("DELETE", `/api/machines/${encodeURIComponent(mac)}`);
}

View File

@@ -69,10 +69,10 @@ export function registerListCommand(parent: Command): void {
const hostname = inst?.hostname ?? queued?.hostname ?? "-";
const role = inst?.role ?? queued?.role ?? "-";
const ip = inst?.ip ?? "-";
const cpu = hw?.cpu_model ?? "-";
const cores = hw?.cpu_cores != null ? String(hw.cpu_cores) : "-";
const ram = hw?.memory_gb != null ? `${hw.memory_gb}GB` : "-";
const product = hw?.product ?? "-";
const cpu = hw?.cpu_model ?? inst?.cpu_model ?? "-";
const cores = (hw?.cpu_cores ?? inst?.cpu_cores) != null ? String(hw?.cpu_cores ?? inst?.cpu_cores) : "-";
const ram = (hw?.memory_gb ?? inst?.memory_gb) != null ? `${hw?.memory_gb ?? inst?.memory_gb}GB` : "-";
const product = hw?.product ?? inst?.product ?? "-";
const color = statusColor(status);

View File

@@ -0,0 +1,94 @@
// CLI command: provision recheck
// SSH into all installed machines, collect hardware info, update bastion state.
import type { Command } from "commander";
import { sshExec } from "@lab/modules";
import { getLabdClient } from "../api/config.js";
const BOLD = "\x1b[1m";
const GREEN = "\x1b[0;32m";
const RED = "\x1b[0;31m";
const DIM = "\x1b[2m";
const RESET = "\x1b[0m";
const SSH_OPTS = { timeoutMs: 30_000 };
// Shell script that collects hardware info as JSON.
// Kept simple — no Python, pure shell + awk.
const HW_COLLECT_SCRIPT = [
'P=$(cat /sys/class/dmi/id/product_name 2>/dev/null || echo unknown)',
'B=$(cat /sys/class/dmi/id/board_name 2>/dev/null || echo unknown)',
'S=$(cat /sys/class/dmi/id/product_serial 2>/dev/null || echo unknown)',
'M=$(cat /sys/class/dmi/id/sys_vendor 2>/dev/null || echo unknown)',
'C=$(grep -m1 "model name" /proc/cpuinfo 2>/dev/null | cut -d: -f2 | sed "s/^ //" || grep -m1 Model /proc/cpuinfo 2>/dev/null | cut -d: -f2 | sed "s/^ //" || echo unknown)',
'N=$(grep -c "^processor" /proc/cpuinfo 2>/dev/null || echo 0)',
'R=$(awk "/MemTotal/ {printf \\"%d\\", \\$2/1024/1024}" /proc/meminfo 2>/dev/null || echo 0)',
'A=$(uname -m)',
'printf \'{"product":"%s","board":"%s","serial":"%s","manufacturer":"%s","cpu_model":"%s","cpu_cores":%s,"memory_gb":%s,"arch":"%s"}\\n\' "$P" "$B" "$S" "$M" "$C" "$N" "$R" "$A"',
].join("; ");
export function registerRecheckCommand(parent: Command): void {
parent
.command("recheck")
.description("Refresh hardware info for all installed machines via SSH")
.option("--user <user>", "SSH user", "root")
.option("--target <hostname>", "Only recheck a specific machine (by hostname or MAC)")
.action(async (opts: { user: string; target?: string }) => {
const client = getLabdClient();
let state;
try {
state = await client.getMachines();
} catch (err) {
console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`);
process.exit(1);
}
// Build list of machines to check
const targets: Array<{ mac: string; hostname: string; ip: string }> = [];
for (const [mac, info] of Object.entries(state.installed)) {
if (!info.ip) continue;
if (opts.target && info.hostname !== opts.target && mac !== opts.target) continue;
targets.push({ mac, hostname: info.hostname, ip: info.ip });
}
if (targets.length === 0) {
console.log("No installed machines with IPs to check.");
return;
}
console.log(`\n${BOLD}Rechecking ${targets.length} machine(s)...${RESET}\n`);
let updated = 0;
let failed = 0;
for (const { mac, hostname, ip } of targets) {
process.stdout.write(` ${hostname.padEnd(24)} ${DIM}(${ip})${RESET} `);
try {
const t0 = Date.now();
const result = await sshExec(ip, opts.user, HW_COLLECT_SCRIPT, SSH_OPTS);
const elapsed = Date.now() - t0;
if (result.exitCode !== 0) {
console.log(`${RED}SSH failed (exit ${result.exitCode}, ${elapsed}ms)${RESET}`);
if (result.stderr) console.log(` ${DIM}${result.stderr.substring(0, 200)}${RESET}`);
console.log(`${RED}SSH failed (exit ${result.exitCode})${RESET}`);
failed++;
continue;
}
const hwData = JSON.parse(result.stdout.trim());
await client.discoverMachine({ mac, ...hwData });
const cpu = hwData.cpu_model || "?";
const cores = hwData.cpu_cores || "?";
const mem = hwData.memory_gb || "?";
console.log(`${GREEN}OK${RESET} ${DIM}${cpu}, ${cores} cores, ${mem}GB${RESET}`);
updated++;
} catch (err) {
console.log(`${RED}FAIL${RESET} ${DIM}${err instanceof Error ? err.message : String(err)}${RESET}`);
failed++;
}
}
console.log(`\n${BOLD}Done:${RESET} ${updated} updated, ${failed} failed\n`);
});
}

View File

@@ -20,6 +20,7 @@ import { registerRegisterCommand } from "./commands/register.js";
import { registerAsahiCommand } from "./commands/asahi.js";
import { registerLogsCommand } from "./commands/logs.js";
import { registerMakeIsoCommand } from "./commands/makeiso.js";
import { registerRecheckCommand } from "./commands/recheck.js";
import { registerConfigCommand } from "./commands/config.js";
import { registerLoginCommand } from "./commands/login.js";
import { registerDoctorCommand } from "./commands/doctor.js";
@@ -104,6 +105,7 @@ export function createProgram(): Command {
registerAsahiCommand(provisionCmd);
registerLogsCommand(provisionCmd);
registerMakeIsoCommand(provisionCmd);
registerRecheckCommand(provisionCmd);
// config list/get/set/path
registerConfigCommand(program);

View File

@@ -260,6 +260,37 @@ export function registerBastionRoutes(app: FastifyInstance, db: DbClient): void
}
});
// Update hardware info (discovery data) for a machine
app.post<{
Body: {
mac?: string; product?: string; board?: string; serial?: string;
manufacturer?: string; cpu_model?: string; cpu_cores?: number;
memory_gb?: number; arch?: string;
disks?: Array<{ name: string; size_gb: number; model: string }>;
nics?: Array<{ name: string; mac: string; state: string }>;
};
}>("/api/machines/discover", async (request, reply) => {
const data = request.body ?? {};
const mac = (data.mac ?? "").toLowerCase().replace(/-/g, ":");
if (!mac) {
return reply.code(400).send({ error: "mac is required" });
}
const bastion = bastionRegistry.findBastionByMac(mac);
const target = bastion ?? (bastionRegistry.getAll().length === 1 ? bastionRegistry.getAll()[0] : null);
if (!target) {
return reply.code(503).send({ error: "No bastion found for this MAC" });
}
try {
const result = await sendCommand(target.bastionId, { type: "command-discover", ...data, mac });
return reply.code(result.status === "ok" ? 200 : 500).send(result);
} catch (err) {
return reply.code(500).send({ error: err instanceof Error ? err.message : String(err) });
}
});
// Update role
app.post<{
Body: { mac?: string; role?: string };

View File

@@ -15,6 +15,15 @@ export const installK3sBinary: Operation = async (ctx): Promise<OperationResult>
const alreadyInstalled = version.exitCode === 0;
if (isServer) {
// Clean stale server state when joining an existing cluster
// (TLS certs from a previous run cause "newer than datastore" fatal error)
if (ctx.config.k3sServerUrl && ctx.config.k3sToken) {
await ctx.ssh.exec(
"rm -rf /var/lib/rancher/k3s/server/tls /var/lib/rancher/k3s/server/cred /var/lib/rancher/k3s/server/db",
sshOpts(ctx),
);
}
// If joining an existing cluster, pass K3S_URL and K3S_TOKEN
const joinEnv = ctx.config.k3sServerUrl && ctx.config.k3sToken
? `K3S_URL="${ctx.config.k3sServerUrl}" K3S_TOKEN="${ctx.config.k3sToken}"`

View File

@@ -113,6 +113,7 @@ export type LabdBastionMessage =
| { type: "command-role-update"; requestId: string; mac: string; role: string }
| { type: "command-debug"; requestId: string; mac: string; pxeBoot?: boolean }
| { type: "command-register"; requestId: string; mac: string; hostname: string; role: string; ip: string }
| { type: "command-discover"; requestId: string; mac: string; product?: string; board?: string; serial?: string; manufacturer?: string; cpu_model?: string; cpu_cores?: number; memory_gb?: number; arch?: string; disks?: Array<{ name: string; size_gb: number; model: string }>; nics?: Array<{ name: string; mac: string; state: string }> }
| { type: "server-shutdown"; reconnectAfter: number };
export type BastionMessageType = BastionMessage["type"];
@@ -127,7 +128,7 @@ const BASTION_MESSAGE_TYPES = new Set<string>([
const LABD_BASTION_MESSAGE_TYPES = new Set<string>([
"bastion-enrolled", "bastion-heartbeat-ack", "command-install",
"command-forget", "command-role-update", "command-debug", "command-register", "server-shutdown",
"command-forget", "command-role-update", "command-debug", "command-register", "command-discover", "server-shutdown",
]);
export function isBastionMessage(msg: unknown): msg is BastionMessage {

View File

@@ -96,6 +96,13 @@ export interface InstalledInfo {
ip: string;
installed_at: string;
bastionId?: string; // set when aggregated through labd
// Hardware info (copied from discovered on install completion)
product?: string;
manufacturer?: string;
cpu_model?: string;
cpu_cores?: number;
memory_gb?: number;
arch?: string;
}
export interface DebugConfig {