feat: provision recheck, hardware info preservation, ISO boot fixes
Some checks failed
CI/CD / lint (pull_request) Failing after 1m26s
CI/CD / typecheck (pull_request) Failing after 11s
CI/CD / test (pull_request) Failing after 11s
CI/CD / build (pull_request) Has been skipped
CI/CD / publish-rpm (pull_request) Has been skipped
CI/CD / publish-deb (pull_request) Has been skipped
Some checks failed
CI/CD / lint (pull_request) Failing after 1m26s
CI/CD / typecheck (pull_request) Failing after 11s
CI/CD / test (pull_request) Failing after 11s
CI/CD / build (pull_request) Has been skipped
CI/CD / publish-rpm (pull_request) Has been skipped
CI/CD / publish-deb (pull_request) Has been skipped
- Add `labctl provision recheck` to refresh hardware info via SSH - Preserve hardware info in InstalledInfo when install completes - Fix /ks-auto: run nested %pre scripts from included kickstarts - Add command-discover WebSocket routing for hw info updates - Fix k3s join: clean stale TLS/cred when joining existing cluster - Add --tls-verify=false for internal HTTP registry pushes - Add fix-ssh-root.sh script for root SSH access on all nodes Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -309,6 +309,32 @@ export async function startBastion(overrides: Partial<BastionConfig> = {}): Prom
|
||||
return { status: "ok", data: { mac, hostname: msg.hostname } };
|
||||
});
|
||||
|
||||
labdConn.onCommand("command-discover", async (msg) => {
|
||||
if (msg.type !== "command-discover") throw new Error("unexpected");
|
||||
const mac = (msg.mac as string).toLowerCase();
|
||||
const now = new Date().toISOString();
|
||||
const existing = state.load().discovered[mac];
|
||||
state.update((s) => {
|
||||
s.discovered[mac] = {
|
||||
mac,
|
||||
product: (msg.product as string) ?? "unknown",
|
||||
board: (msg.board as string) ?? "unknown",
|
||||
serial: (msg.serial as string) ?? "unknown",
|
||||
manufacturer: (msg.manufacturer as string) ?? "unknown",
|
||||
cpu_model: (msg.cpu_model as string) ?? "unknown",
|
||||
cpu_cores: (msg.cpu_cores as number) ?? 0,
|
||||
memory_gb: (msg.memory_gb as number) ?? 0,
|
||||
arch: (msg.arch as string) ?? "unknown",
|
||||
disks: (msg.disks as Array<{ name: string; size_gb: number; model: string }>) ?? [],
|
||||
nics: (msg.nics as Array<{ name: string; mac: string; state: string }>) ?? [],
|
||||
first_seen: existing?.first_seen ?? now,
|
||||
last_seen: now,
|
||||
};
|
||||
});
|
||||
logger.info(`HARDWARE UPDATED: ${mac} -- ${msg.manufacturer ?? "?"} ${msg.product ?? "?"} (${msg.cpu_model ?? "?"}, ${msg.cpu_cores ?? "?"} cores, ${msg.memory_gb ?? "?"}GB RAM)`);
|
||||
return { status: "ok", data: { mac } };
|
||||
});
|
||||
|
||||
labdConn.onCommand("command-role-update", async (msg) => {
|
||||
if (msg.type !== "command-role-update") throw new Error("unexpected");
|
||||
const mac = msg.mac.toLowerCase();
|
||||
|
||||
@@ -139,12 +139,22 @@ export function registerApiRoutes(
|
||||
? detailStr.replace("ready at ", "").trim()
|
||||
: "";
|
||||
|
||||
const hw = s.discovered[mac];
|
||||
const installedInfo: InstalledInfo = {
|
||||
hostname: cfg?.hostname ?? "?",
|
||||
role: cfg?.role ?? "?",
|
||||
...(cfg?.os !== undefined ? { os: cfg.os } : {}),
|
||||
ip,
|
||||
installed_at: new Date().toISOString(),
|
||||
// Preserve hardware info from discovery
|
||||
...(hw ? {
|
||||
product: hw.product,
|
||||
manufacturer: hw.manufacturer,
|
||||
cpu_model: hw.cpu_model,
|
||||
cpu_cores: hw.cpu_cores,
|
||||
memory_gb: hw.memory_gb,
|
||||
arch: hw.arch,
|
||||
} : {}),
|
||||
};
|
||||
s.installed[mac] = installedInfo;
|
||||
|
||||
|
||||
@@ -83,6 +83,20 @@ case "$STATE" in
|
||||
echo "ERROR: Failed to download install kickstart"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Run any %pre scripts from the downloaded kickstart.
|
||||
# Anaconda only runs %pre from the top-level file, not from %include'd files.
|
||||
python3 -c "
|
||||
import re, subprocess
|
||||
content = open('/tmp/dynamic.ks').read()
|
||||
blocks = re.findall(r'%pre[^\\n]*\\n(.*?)%end', content, re.DOTALL)
|
||||
for i, script in enumerate(blocks):
|
||||
path = f'/tmp/inner-pre-{i}.sh'
|
||||
with open(path, 'w') as f:
|
||||
f.write(script)
|
||||
print(f'Running inner %pre script {i} ({len(script.splitlines())} lines)')
|
||||
subprocess.run(['bash', path], check=False)
|
||||
"
|
||||
;;
|
||||
|
||||
debug)
|
||||
|
||||
@@ -166,6 +166,7 @@ export class BastionConnection {
|
||||
case "command-role-update":
|
||||
case "command-debug":
|
||||
case "command-register":
|
||||
case "command-discover":
|
||||
void this.handleCommand(msg);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -104,6 +104,16 @@ export class LabdClient {
|
||||
return this.request("POST", "/api/machines/debug", { body: { mac, pxeBoot: opts?.pxeBoot } });
|
||||
}
|
||||
|
||||
async discoverMachine(data: {
|
||||
mac: string; product?: string; board?: string; serial?: string;
|
||||
manufacturer?: string; cpu_model?: string; cpu_cores?: number;
|
||||
memory_gb?: number; arch?: string;
|
||||
disks?: Array<{ name: string; size_gb: number; model: string }>;
|
||||
nics?: Array<{ name: string; mac: string; state: string }>;
|
||||
}): Promise<{ status: string; error?: string }> {
|
||||
return this.request("POST", "/api/machines/discover", { body: data });
|
||||
}
|
||||
|
||||
async forgetMachine(mac: string): Promise<{ status: string }> {
|
||||
return this.request("DELETE", `/api/machines/${encodeURIComponent(mac)}`);
|
||||
}
|
||||
|
||||
@@ -69,10 +69,10 @@ export function registerListCommand(parent: Command): void {
|
||||
const hostname = inst?.hostname ?? queued?.hostname ?? "-";
|
||||
const role = inst?.role ?? queued?.role ?? "-";
|
||||
const ip = inst?.ip ?? "-";
|
||||
const cpu = hw?.cpu_model ?? "-";
|
||||
const cores = hw?.cpu_cores != null ? String(hw.cpu_cores) : "-";
|
||||
const ram = hw?.memory_gb != null ? `${hw.memory_gb}GB` : "-";
|
||||
const product = hw?.product ?? "-";
|
||||
const cpu = hw?.cpu_model ?? inst?.cpu_model ?? "-";
|
||||
const cores = (hw?.cpu_cores ?? inst?.cpu_cores) != null ? String(hw?.cpu_cores ?? inst?.cpu_cores) : "-";
|
||||
const ram = (hw?.memory_gb ?? inst?.memory_gb) != null ? `${hw?.memory_gb ?? inst?.memory_gb}GB` : "-";
|
||||
const product = hw?.product ?? inst?.product ?? "-";
|
||||
|
||||
const color = statusColor(status);
|
||||
|
||||
|
||||
94
bastion/src/cli/src/commands/recheck.ts
Normal file
94
bastion/src/cli/src/commands/recheck.ts
Normal file
@@ -0,0 +1,94 @@
|
||||
// CLI command: provision recheck
|
||||
// SSH into all installed machines, collect hardware info, update bastion state.
|
||||
|
||||
import type { Command } from "commander";
|
||||
import { sshExec } from "@lab/modules";
|
||||
import { getLabdClient } from "../api/config.js";
|
||||
|
||||
const BOLD = "\x1b[1m";
|
||||
const GREEN = "\x1b[0;32m";
|
||||
const RED = "\x1b[0;31m";
|
||||
const DIM = "\x1b[2m";
|
||||
const RESET = "\x1b[0m";
|
||||
|
||||
const SSH_OPTS = { timeoutMs: 30_000 };
|
||||
|
||||
// Shell script that collects hardware info as JSON.
|
||||
// Kept simple — no Python, pure shell + awk.
|
||||
const HW_COLLECT_SCRIPT = [
|
||||
'P=$(cat /sys/class/dmi/id/product_name 2>/dev/null || echo unknown)',
|
||||
'B=$(cat /sys/class/dmi/id/board_name 2>/dev/null || echo unknown)',
|
||||
'S=$(cat /sys/class/dmi/id/product_serial 2>/dev/null || echo unknown)',
|
||||
'M=$(cat /sys/class/dmi/id/sys_vendor 2>/dev/null || echo unknown)',
|
||||
'C=$(grep -m1 "model name" /proc/cpuinfo 2>/dev/null | cut -d: -f2 | sed "s/^ //" || grep -m1 Model /proc/cpuinfo 2>/dev/null | cut -d: -f2 | sed "s/^ //" || echo unknown)',
|
||||
'N=$(grep -c "^processor" /proc/cpuinfo 2>/dev/null || echo 0)',
|
||||
'R=$(awk "/MemTotal/ {printf \\"%d\\", \\$2/1024/1024}" /proc/meminfo 2>/dev/null || echo 0)',
|
||||
'A=$(uname -m)',
|
||||
'printf \'{"product":"%s","board":"%s","serial":"%s","manufacturer":"%s","cpu_model":"%s","cpu_cores":%s,"memory_gb":%s,"arch":"%s"}\\n\' "$P" "$B" "$S" "$M" "$C" "$N" "$R" "$A"',
|
||||
].join("; ");
|
||||
|
||||
export function registerRecheckCommand(parent: Command): void {
|
||||
parent
|
||||
.command("recheck")
|
||||
.description("Refresh hardware info for all installed machines via SSH")
|
||||
.option("--user <user>", "SSH user", "root")
|
||||
.option("--target <hostname>", "Only recheck a specific machine (by hostname or MAC)")
|
||||
.action(async (opts: { user: string; target?: string }) => {
|
||||
const client = getLabdClient();
|
||||
let state;
|
||||
try {
|
||||
state = await client.getMachines();
|
||||
} catch (err) {
|
||||
console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Build list of machines to check
|
||||
const targets: Array<{ mac: string; hostname: string; ip: string }> = [];
|
||||
for (const [mac, info] of Object.entries(state.installed)) {
|
||||
if (!info.ip) continue;
|
||||
if (opts.target && info.hostname !== opts.target && mac !== opts.target) continue;
|
||||
targets.push({ mac, hostname: info.hostname, ip: info.ip });
|
||||
}
|
||||
|
||||
if (targets.length === 0) {
|
||||
console.log("No installed machines with IPs to check.");
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`\n${BOLD}Rechecking ${targets.length} machine(s)...${RESET}\n`);
|
||||
|
||||
let updated = 0;
|
||||
let failed = 0;
|
||||
|
||||
for (const { mac, hostname, ip } of targets) {
|
||||
process.stdout.write(` ${hostname.padEnd(24)} ${DIM}(${ip})${RESET} `);
|
||||
|
||||
try {
|
||||
const t0 = Date.now();
|
||||
const result = await sshExec(ip, opts.user, HW_COLLECT_SCRIPT, SSH_OPTS);
|
||||
const elapsed = Date.now() - t0;
|
||||
if (result.exitCode !== 0) {
|
||||
console.log(`${RED}SSH failed (exit ${result.exitCode}, ${elapsed}ms)${RESET}`);
|
||||
if (result.stderr) console.log(` ${DIM}${result.stderr.substring(0, 200)}${RESET}`);
|
||||
console.log(`${RED}SSH failed (exit ${result.exitCode})${RESET}`);
|
||||
failed++;
|
||||
continue;
|
||||
}
|
||||
|
||||
const hwData = JSON.parse(result.stdout.trim());
|
||||
await client.discoverMachine({ mac, ...hwData });
|
||||
const cpu = hwData.cpu_model || "?";
|
||||
const cores = hwData.cpu_cores || "?";
|
||||
const mem = hwData.memory_gb || "?";
|
||||
console.log(`${GREEN}OK${RESET} ${DIM}${cpu}, ${cores} cores, ${mem}GB${RESET}`);
|
||||
updated++;
|
||||
} catch (err) {
|
||||
console.log(`${RED}FAIL${RESET} ${DIM}${err instanceof Error ? err.message : String(err)}${RESET}`);
|
||||
failed++;
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\n${BOLD}Done:${RESET} ${updated} updated, ${failed} failed\n`);
|
||||
});
|
||||
}
|
||||
@@ -20,6 +20,7 @@ import { registerRegisterCommand } from "./commands/register.js";
|
||||
import { registerAsahiCommand } from "./commands/asahi.js";
|
||||
import { registerLogsCommand } from "./commands/logs.js";
|
||||
import { registerMakeIsoCommand } from "./commands/makeiso.js";
|
||||
import { registerRecheckCommand } from "./commands/recheck.js";
|
||||
import { registerConfigCommand } from "./commands/config.js";
|
||||
import { registerLoginCommand } from "./commands/login.js";
|
||||
import { registerDoctorCommand } from "./commands/doctor.js";
|
||||
@@ -104,6 +105,7 @@ export function createProgram(): Command {
|
||||
registerAsahiCommand(provisionCmd);
|
||||
registerLogsCommand(provisionCmd);
|
||||
registerMakeIsoCommand(provisionCmd);
|
||||
registerRecheckCommand(provisionCmd);
|
||||
|
||||
// config list/get/set/path
|
||||
registerConfigCommand(program);
|
||||
|
||||
@@ -260,6 +260,37 @@ export function registerBastionRoutes(app: FastifyInstance, db: DbClient): void
|
||||
}
|
||||
});
|
||||
|
||||
// Update hardware info (discovery data) for a machine
|
||||
app.post<{
|
||||
Body: {
|
||||
mac?: string; product?: string; board?: string; serial?: string;
|
||||
manufacturer?: string; cpu_model?: string; cpu_cores?: number;
|
||||
memory_gb?: number; arch?: string;
|
||||
disks?: Array<{ name: string; size_gb: number; model: string }>;
|
||||
nics?: Array<{ name: string; mac: string; state: string }>;
|
||||
};
|
||||
}>("/api/machines/discover", async (request, reply) => {
|
||||
const data = request.body ?? {};
|
||||
const mac = (data.mac ?? "").toLowerCase().replace(/-/g, ":");
|
||||
if (!mac) {
|
||||
return reply.code(400).send({ error: "mac is required" });
|
||||
}
|
||||
|
||||
const bastion = bastionRegistry.findBastionByMac(mac);
|
||||
const target = bastion ?? (bastionRegistry.getAll().length === 1 ? bastionRegistry.getAll()[0] : null);
|
||||
|
||||
if (!target) {
|
||||
return reply.code(503).send({ error: "No bastion found for this MAC" });
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await sendCommand(target.bastionId, { type: "command-discover", ...data, mac });
|
||||
return reply.code(result.status === "ok" ? 200 : 500).send(result);
|
||||
} catch (err) {
|
||||
return reply.code(500).send({ error: err instanceof Error ? err.message : String(err) });
|
||||
}
|
||||
});
|
||||
|
||||
// Update role
|
||||
app.post<{
|
||||
Body: { mac?: string; role?: string };
|
||||
|
||||
@@ -15,6 +15,15 @@ export const installK3sBinary: Operation = async (ctx): Promise<OperationResult>
|
||||
const alreadyInstalled = version.exitCode === 0;
|
||||
|
||||
if (isServer) {
|
||||
// Clean stale server state when joining an existing cluster
|
||||
// (TLS certs from a previous run cause "newer than datastore" fatal error)
|
||||
if (ctx.config.k3sServerUrl && ctx.config.k3sToken) {
|
||||
await ctx.ssh.exec(
|
||||
"rm -rf /var/lib/rancher/k3s/server/tls /var/lib/rancher/k3s/server/cred /var/lib/rancher/k3s/server/db",
|
||||
sshOpts(ctx),
|
||||
);
|
||||
}
|
||||
|
||||
// If joining an existing cluster, pass K3S_URL and K3S_TOKEN
|
||||
const joinEnv = ctx.config.k3sServerUrl && ctx.config.k3sToken
|
||||
? `K3S_URL="${ctx.config.k3sServerUrl}" K3S_TOKEN="${ctx.config.k3sToken}"`
|
||||
|
||||
@@ -113,6 +113,7 @@ export type LabdBastionMessage =
|
||||
| { type: "command-role-update"; requestId: string; mac: string; role: string }
|
||||
| { type: "command-debug"; requestId: string; mac: string; pxeBoot?: boolean }
|
||||
| { type: "command-register"; requestId: string; mac: string; hostname: string; role: string; ip: string }
|
||||
| { type: "command-discover"; requestId: string; mac: string; product?: string; board?: string; serial?: string; manufacturer?: string; cpu_model?: string; cpu_cores?: number; memory_gb?: number; arch?: string; disks?: Array<{ name: string; size_gb: number; model: string }>; nics?: Array<{ name: string; mac: string; state: string }> }
|
||||
| { type: "server-shutdown"; reconnectAfter: number };
|
||||
|
||||
export type BastionMessageType = BastionMessage["type"];
|
||||
@@ -127,7 +128,7 @@ const BASTION_MESSAGE_TYPES = new Set<string>([
|
||||
|
||||
const LABD_BASTION_MESSAGE_TYPES = new Set<string>([
|
||||
"bastion-enrolled", "bastion-heartbeat-ack", "command-install",
|
||||
"command-forget", "command-role-update", "command-debug", "command-register", "server-shutdown",
|
||||
"command-forget", "command-role-update", "command-debug", "command-register", "command-discover", "server-shutdown",
|
||||
]);
|
||||
|
||||
export function isBastionMessage(msg: unknown): msg is BastionMessage {
|
||||
|
||||
@@ -96,6 +96,13 @@ export interface InstalledInfo {
|
||||
ip: string;
|
||||
installed_at: string;
|
||||
bastionId?: string; // set when aggregated through labd
|
||||
// Hardware info (copied from discovered on install completion)
|
||||
product?: string;
|
||||
manufacturer?: string;
|
||||
cpu_model?: string;
|
||||
cpu_cores?: number;
|
||||
memory_gb?: number;
|
||||
arch?: string;
|
||||
}
|
||||
|
||||
export interface DebugConfig {
|
||||
|
||||
Reference in New Issue
Block a user