From 9ddab2493147d09a5f22507212d6acf4adfd7686 Mon Sep 17 00:00:00 2001 From: Michal Date: Wed, 1 Apr 2026 17:59:39 +0100 Subject: [PATCH] feat: provision recheck, hardware info preservation, ISO boot fixes - Add `labctl provision recheck` to refresh hardware info via SSH - Preserve hardware info in InstalledInfo when install completes - Fix /ks-auto: run nested %pre scripts from included kickstarts - Add command-discover WebSocket routing for hw info updates - Fix k3s join: clean stale TLS/cred when joining existing cluster - Add --tls-verify=false for internal HTTP registry pushes - Add fix-ssh-root.sh script for root SSH access on all nodes Co-Authored-By: Claude Opus 4.6 (1M context) --- bastion/completions/labctl.bash | 5 +- bastion/completions/labctl.fish | 5 + bastion/scripts/build-bastion.sh | 12 +- bastion/scripts/build-labd.sh | 12 +- bastion/scripts/fix-ssh-root.sh | 131 ++++++++++++++++++ bastion/src/bastion/src/main.ts | 26 ++++ bastion/src/bastion/src/routes/api.ts | 10 ++ bastion/src/bastion/src/routes/kickstart.ts | 14 ++ .../bastion/src/services/labd-connection.ts | 1 + bastion/src/cli/src/api/client.ts | 10 ++ bastion/src/cli/src/commands/list.ts | 8 +- bastion/src/cli/src/commands/recheck.ts | 94 +++++++++++++ bastion/src/cli/src/index.ts | 2 + bastion/src/labd/src/routes/bastions.ts | 31 +++++ .../modules/k3s/src/operations/k3s-install.ts | 9 ++ bastion/src/shared/src/protocol/index.ts | 3 +- bastion/src/shared/src/types/state.ts | 7 + 17 files changed, 368 insertions(+), 12 deletions(-) create mode 100644 bastion/scripts/fix-ssh-root.sh create mode 100644 bastion/src/cli/src/commands/recheck.ts diff --git a/bastion/completions/labctl.bash b/bastion/completions/labctl.bash index a51e9fd..628e688 100644 --- a/bastion/completions/labctl.bash +++ b/bastion/completions/labctl.bash @@ -82,6 +82,9 @@ _labctl() { "provision makeiso") COMPREPLY=($(compgen -W "--arch --local --out -h --help" -- "$cur")) return ;; + "provision recheck") + COMPREPLY=($(compgen -W "--user --target -h --help" -- "$cur")) + return ;; "config list") COMPREPLY=($(compgen -W "-h --help" -- "$cur")) return ;; @@ -107,7 +110,7 @@ _labctl() { COMPREPLY=($(compgen -W "bastion -h --help" -- "$cur")) return ;; "provision") - COMPREPLY=($(compgen -W "list install reprovision debug forget register asahi logs makeiso -h --help" -- "$cur")) + COMPREPLY=($(compgen -W "list install reprovision debug forget register asahi logs makeiso recheck -h --help" -- "$cur")) return ;; "config") COMPREPLY=($(compgen -W "list get set path -h --help" -- "$cur")) diff --git a/bastion/completions/labctl.fish b/bastion/completions/labctl.fish index 50480d1..26c1204 100644 --- a/bastion/completions/labctl.fish +++ b/bastion/completions/labctl.fish @@ -128,6 +128,7 @@ complete -c labctl -n "__labctl_using_cmd provision" -a register -d 'Register an complete -c labctl -n "__labctl_using_cmd provision" -a asahi -d 'Show instructions to provision an Apple Silicon Mac with Asahi Linux' complete -c labctl -n "__labctl_using_cmd provision" -a logs -d 'Show provisioning logs for a machine (hostname, MAC, or IP)' complete -c labctl -n "__labctl_using_cmd provision" -a makeiso -d 'Generate a UEFI-bootable iPXE ISO for network provisioning' +complete -c labctl -n "__labctl_using_cmd provision" -a recheck -d 'Refresh hardware info for all installed machines via SSH' # provision install options complete -c labctl -n "__labctl_in_cmd provision install" -l role -d 'Machine role (see below)' -xa 'vanilla worker infra labcontroller' @@ -154,6 +155,10 @@ complete -c labctl -n "__labctl_in_cmd provision makeiso" -l arch -d 'Target arc complete -c labctl -n "__labctl_in_cmd provision makeiso" -l local -d 'Build ISO locally instead of using bastion-hosted URL' complete -c labctl -n "__labctl_in_cmd provision makeiso" -l out -d 'Output path for local ISO build' -x +# provision recheck options +complete -c labctl -n "__labctl_in_cmd provision recheck" -l user -d 'SSH user' -x +complete -c labctl -n "__labctl_in_cmd provision recheck" -l target -d 'Only recheck a specific machine (by hostname or MAC)' -x + # config subcommands complete -c labctl -n "__labctl_using_cmd config" -a list -d 'Show all configuration values' complete -c labctl -n "__labctl_using_cmd config" -a get -d 'Get a configuration value' diff --git a/bastion/scripts/build-bastion.sh b/bastion/scripts/build-bastion.sh index 4b4b67a..8f79fb3 100755 --- a/bastion/scripts/build-bastion.sh +++ b/bastion/scripts/build-bastion.sh @@ -99,16 +99,22 @@ if [ "$PUSH" = true ]; then fi fi + # Use --tls-verify=false for plain HTTP registries (e.g. 10.0.0.194:3012) + TLS_FLAG="" + if [[ "$REGISTRY" =~ ^[0-9] ]] || [[ "$REGISTRY" =~ ^localhost ]]; then + TLS_FLAG="--tls-verify=false" + fi + echo "==> Logging in to $REGISTRY..." - podman login -u michal -p "$GITEA_TOKEN" "$REGISTRY" + podman login $TLS_FLAG -u michal -p "$GITEA_TOKEN" "$REGISTRY" echo "==> Pushing $FULL_IMAGE:$TAG..." - podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:$TAG" + podman manifest push --all $TLS_FLAG "$MANIFEST" "docker://$FULL_IMAGE:$TAG" # Also tag as :latest if not already if [ "$TAG" != "latest" ]; then echo "==> Also pushing as :latest..." - podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:latest" + podman manifest push --all $TLS_FLAG "$MANIFEST" "docker://$FULL_IMAGE:latest" fi # Link package to repository if script exists diff --git a/bastion/scripts/build-labd.sh b/bastion/scripts/build-labd.sh index 0084ea5..bae8e68 100755 --- a/bastion/scripts/build-labd.sh +++ b/bastion/scripts/build-labd.sh @@ -92,15 +92,21 @@ if [ "$PUSH" = true ]; then fi fi + # Use --tls-verify=false for plain HTTP registries (e.g. 10.0.0.194:3012) + TLS_FLAG="" + if [[ "$REGISTRY" =~ ^[0-9] ]] || [[ "$REGISTRY" =~ ^localhost ]]; then + TLS_FLAG="--tls-verify=false" + fi + echo "==> Logging in to $REGISTRY..." - podman login -u michal -p "$GITEA_TOKEN" "$REGISTRY" + podman login $TLS_FLAG -u michal -p "$GITEA_TOKEN" "$REGISTRY" echo "==> Pushing $FULL_IMAGE:$TAG..." - podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:$TAG" + podman manifest push --all $TLS_FLAG "$MANIFEST" "docker://$FULL_IMAGE:$TAG" if [ "$TAG" != "latest" ]; then echo "==> Also pushing as :latest..." - podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:latest" + podman manifest push --all $TLS_FLAG "$MANIFEST" "docker://$FULL_IMAGE:latest" fi if [ -f "$SCRIPT_DIR/link-package.sh" ]; then diff --git a/bastion/scripts/fix-ssh-root.sh b/bastion/scripts/fix-ssh-root.sh new file mode 100644 index 0000000..ad0f75f --- /dev/null +++ b/bastion/scripts/fix-ssh-root.sh @@ -0,0 +1,131 @@ +#!/bin/bash +# Fix root SSH access on all provisioned machines. +# Tries root, lab, michal users to find one that works, +# then ensures root has the SSH key and PermitRootLogin is enabled. +set -euo pipefail + +SSH_KEY="ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQDMJ3FkUGbG174eoO5RjZd2eNV680FM5pgp0AgpW/QwlJExK3qxMk0DJSr4ICmzGUx4yujAXcrqU1otcOMPzzFzwc5heWpSmlNHU3TIW6NHEt0sF9ZTAbGLw2zSw3si5UouqFkCcENA40mePFJqY+Q9R8N1uvLgu4m/do+Zrn/mk5Ewc1V7OCRE5Acrnaec4T7LTB0BuVXcjPUfAmZ0q5fI+bKPR1q2Kc3+IeGhVkBuZ9OJVeXXhnpedm0uEbLeriK/jUYKYw/1QhsNDM8Tyty+UIGr9QVnWwzCMHB+wuQcDYC9mPGTqg0fYwX8Mp8xMi1PPxdsh1G7bj/cpWMAF43KswWORF2ul8ICGbaE1zEgIYXO790SuBjpBHhaC6Iegqi58hmCuP+a9893q/EU9HyrWTJHCZXC5E4kP1MsM57KrhEpszM6I3sW9f9zMTPd5QsCXFi4si4OMwX4kYNVu3fQGQPpseDPlTTSrT6uUdqj4Irm0c1m9cYTmK0vYgsM3ss= michal@fedora" + +SSH_OPTS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR -o ConnectTimeout=5" +USERS_TO_TRY=(root lab michal) + +# Machines: hostname ip +MACHINES=( + "labmaster 192.168.8.11" + "worker0-k8s0 192.168.8.23" + "worker1-k8s0 192.168.8.13" + "worker2-k8s0 192.168.8.25" + "spark-2935 192.168.8.12" +) + +BOLD="\033[1m" +GREEN="\033[0;32m" +RED="\033[0;31m" +DIM="\033[2m" +RESET="\033[0m" + +# Script to run on each machine (via sudo if needed) +read -r -d '' FIX_SCRIPT << 'FIXEOF' || true +#!/bin/bash +set -e +KEY="$1" + +# 1. Ensure root .ssh dir exists +mkdir -p /root/.ssh +chmod 700 /root/.ssh +touch /root/.ssh/authorized_keys +chmod 600 /root/.ssh/authorized_keys + +# 2. Add key if not present +if ! grep -qF "$KEY" /root/.ssh/authorized_keys 2>/dev/null; then + echo "$KEY" >> /root/.ssh/authorized_keys + echo "KEY_ADDED" +else + echo "KEY_EXISTS" +fi + +# 3. Fix sshd_config for root login with keys +SSHD_CONF="/etc/ssh/sshd_config" +CHANGED=0 + +# Ensure PermitRootLogin allows key auth +CURRENT=$(grep -E "^PermitRootLogin" "$SSHD_CONF" 2>/dev/null | tail -1 || true) +if [ "$CURRENT" = "PermitRootLogin prohibit-password" ] || [ "$CURRENT" = "PermitRootLogin without-password" ]; then + echo "SSHD_OK" +elif [ "$CURRENT" = "PermitRootLogin yes" ]; then + echo "SSHD_OK" +else + # Remove any existing PermitRootLogin lines + sed -i '/^#*PermitRootLogin/d' "$SSHD_CONF" + echo "PermitRootLogin prohibit-password" >> "$SSHD_CONF" + CHANGED=1 + echo "SSHD_FIXED" +fi + +# Ensure PubkeyAuthentication is enabled +if grep -qE "^PubkeyAuthentication no" "$SSHD_CONF" 2>/dev/null; then + sed -i 's/^PubkeyAuthentication no/PubkeyAuthentication yes/' "$SSHD_CONF" + CHANGED=1 + echo "PUBKEY_FIXED" +else + echo "PUBKEY_OK" +fi + +# Restart sshd if changed +if [ "$CHANGED" -eq 1 ]; then + systemctl restart sshd 2>/dev/null || systemctl restart ssh 2>/dev/null || true + echo "SSHD_RESTARTED" +fi + +# 4. Verify root can be reached +echo "DONE" +FIXEOF + +echo "" +echo -e "${BOLD}Fixing root SSH access on all machines...${RESET}" +echo "" + +for entry in "${MACHINES[@]}"; do + read -r hostname ip <<< "$entry" + printf " %-24s ${DIM}(%s)${RESET} " "$hostname" "$ip" + + # Try each user until one works + WORKING_USER="" + for user in "${USERS_TO_TRY[@]}"; do + if ssh $SSH_OPTS "$user@$ip" "true" 2>/dev/null; then + WORKING_USER="$user" + break + fi + done + + if [ -z "$WORKING_USER" ]; then + echo -e "${RED}UNREACHABLE${RESET} (tried: ${USERS_TO_TRY[*]})" + continue + fi + + # Run fix script (with sudo if not root) + if [ "$WORKING_USER" = "root" ]; then + RESULT=$(ssh $SSH_OPTS "root@$ip" "bash -s -- '$SSH_KEY'" <<< "$FIX_SCRIPT" 2>&1) + else + RESULT=$(ssh $SSH_OPTS "$WORKING_USER@$ip" "sudo bash -s -- '$SSH_KEY'" <<< "$FIX_SCRIPT" 2>&1) + fi + + # Parse result + DETAILS="" + if echo "$RESULT" | grep -q "KEY_ADDED"; then DETAILS="key added"; fi + if echo "$RESULT" | grep -q "KEY_EXISTS"; then DETAILS="key ok"; fi + if echo "$RESULT" | grep -q "SSHD_FIXED"; then DETAILS="$DETAILS, sshd fixed"; fi + if echo "$RESULT" | grep -q "SSHD_OK"; then DETAILS="$DETAILS, sshd ok"; fi + if echo "$RESULT" | grep -q "SSHD_RESTARTED"; then DETAILS="$DETAILS, restarted"; fi + + # Verify root works now + if ssh $SSH_OPTS "root@$ip" "true" 2>/dev/null; then + echo -e "${GREEN}OK${RESET} ${DIM}(via $WORKING_USER: $DETAILS)${RESET}" + else + echo -e "${RED}PARTIAL${RESET} ${DIM}(via $WORKING_USER: $DETAILS -- root still blocked)${RESET}" + fi +done + +echo "" +echo -e "${BOLD}Done.${RESET} Verify: labctl provision recheck --user root" +echo "" diff --git a/bastion/src/bastion/src/main.ts b/bastion/src/bastion/src/main.ts index 4c9d4ee..7468f1f 100644 --- a/bastion/src/bastion/src/main.ts +++ b/bastion/src/bastion/src/main.ts @@ -309,6 +309,32 @@ export async function startBastion(overrides: Partial = {}): Prom return { status: "ok", data: { mac, hostname: msg.hostname } }; }); + labdConn.onCommand("command-discover", async (msg) => { + if (msg.type !== "command-discover") throw new Error("unexpected"); + const mac = (msg.mac as string).toLowerCase(); + const now = new Date().toISOString(); + const existing = state.load().discovered[mac]; + state.update((s) => { + s.discovered[mac] = { + mac, + product: (msg.product as string) ?? "unknown", + board: (msg.board as string) ?? "unknown", + serial: (msg.serial as string) ?? "unknown", + manufacturer: (msg.manufacturer as string) ?? "unknown", + cpu_model: (msg.cpu_model as string) ?? "unknown", + cpu_cores: (msg.cpu_cores as number) ?? 0, + memory_gb: (msg.memory_gb as number) ?? 0, + arch: (msg.arch as string) ?? "unknown", + disks: (msg.disks as Array<{ name: string; size_gb: number; model: string }>) ?? [], + nics: (msg.nics as Array<{ name: string; mac: string; state: string }>) ?? [], + first_seen: existing?.first_seen ?? now, + last_seen: now, + }; + }); + logger.info(`HARDWARE UPDATED: ${mac} -- ${msg.manufacturer ?? "?"} ${msg.product ?? "?"} (${msg.cpu_model ?? "?"}, ${msg.cpu_cores ?? "?"} cores, ${msg.memory_gb ?? "?"}GB RAM)`); + return { status: "ok", data: { mac } }; + }); + labdConn.onCommand("command-role-update", async (msg) => { if (msg.type !== "command-role-update") throw new Error("unexpected"); const mac = msg.mac.toLowerCase(); diff --git a/bastion/src/bastion/src/routes/api.ts b/bastion/src/bastion/src/routes/api.ts index 7aab036..8a75c65 100644 --- a/bastion/src/bastion/src/routes/api.ts +++ b/bastion/src/bastion/src/routes/api.ts @@ -139,12 +139,22 @@ export function registerApiRoutes( ? detailStr.replace("ready at ", "").trim() : ""; + const hw = s.discovered[mac]; const installedInfo: InstalledInfo = { hostname: cfg?.hostname ?? "?", role: cfg?.role ?? "?", ...(cfg?.os !== undefined ? { os: cfg.os } : {}), ip, installed_at: new Date().toISOString(), + // Preserve hardware info from discovery + ...(hw ? { + product: hw.product, + manufacturer: hw.manufacturer, + cpu_model: hw.cpu_model, + cpu_cores: hw.cpu_cores, + memory_gb: hw.memory_gb, + arch: hw.arch, + } : {}), }; s.installed[mac] = installedInfo; diff --git a/bastion/src/bastion/src/routes/kickstart.ts b/bastion/src/bastion/src/routes/kickstart.ts index db4dab6..aa5c1f3 100644 --- a/bastion/src/bastion/src/routes/kickstart.ts +++ b/bastion/src/bastion/src/routes/kickstart.ts @@ -83,6 +83,20 @@ case "$STATE" in echo "ERROR: Failed to download install kickstart" exit 1 fi + + # Run any %pre scripts from the downloaded kickstart. + # Anaconda only runs %pre from the top-level file, not from %include'd files. + python3 -c " +import re, subprocess +content = open('/tmp/dynamic.ks').read() +blocks = re.findall(r'%pre[^\\n]*\\n(.*?)%end', content, re.DOTALL) +for i, script in enumerate(blocks): + path = f'/tmp/inner-pre-{i}.sh' + with open(path, 'w') as f: + f.write(script) + print(f'Running inner %pre script {i} ({len(script.splitlines())} lines)') + subprocess.run(['bash', path], check=False) +" ;; debug) diff --git a/bastion/src/bastion/src/services/labd-connection.ts b/bastion/src/bastion/src/services/labd-connection.ts index 4fa89cb..d1c4cb1 100644 --- a/bastion/src/bastion/src/services/labd-connection.ts +++ b/bastion/src/bastion/src/services/labd-connection.ts @@ -166,6 +166,7 @@ export class BastionConnection { case "command-role-update": case "command-debug": case "command-register": + case "command-discover": void this.handleCommand(msg); break; } diff --git a/bastion/src/cli/src/api/client.ts b/bastion/src/cli/src/api/client.ts index 55ad790..52add1e 100644 --- a/bastion/src/cli/src/api/client.ts +++ b/bastion/src/cli/src/api/client.ts @@ -104,6 +104,16 @@ export class LabdClient { return this.request("POST", "/api/machines/debug", { body: { mac, pxeBoot: opts?.pxeBoot } }); } + async discoverMachine(data: { + mac: string; product?: string; board?: string; serial?: string; + manufacturer?: string; cpu_model?: string; cpu_cores?: number; + memory_gb?: number; arch?: string; + disks?: Array<{ name: string; size_gb: number; model: string }>; + nics?: Array<{ name: string; mac: string; state: string }>; + }): Promise<{ status: string; error?: string }> { + return this.request("POST", "/api/machines/discover", { body: data }); + } + async forgetMachine(mac: string): Promise<{ status: string }> { return this.request("DELETE", `/api/machines/${encodeURIComponent(mac)}`); } diff --git a/bastion/src/cli/src/commands/list.ts b/bastion/src/cli/src/commands/list.ts index 029ca9b..b95a48d 100644 --- a/bastion/src/cli/src/commands/list.ts +++ b/bastion/src/cli/src/commands/list.ts @@ -69,10 +69,10 @@ export function registerListCommand(parent: Command): void { const hostname = inst?.hostname ?? queued?.hostname ?? "-"; const role = inst?.role ?? queued?.role ?? "-"; const ip = inst?.ip ?? "-"; - const cpu = hw?.cpu_model ?? "-"; - const cores = hw?.cpu_cores != null ? String(hw.cpu_cores) : "-"; - const ram = hw?.memory_gb != null ? `${hw.memory_gb}GB` : "-"; - const product = hw?.product ?? "-"; + const cpu = hw?.cpu_model ?? inst?.cpu_model ?? "-"; + const cores = (hw?.cpu_cores ?? inst?.cpu_cores) != null ? String(hw?.cpu_cores ?? inst?.cpu_cores) : "-"; + const ram = (hw?.memory_gb ?? inst?.memory_gb) != null ? `${hw?.memory_gb ?? inst?.memory_gb}GB` : "-"; + const product = hw?.product ?? inst?.product ?? "-"; const color = statusColor(status); diff --git a/bastion/src/cli/src/commands/recheck.ts b/bastion/src/cli/src/commands/recheck.ts new file mode 100644 index 0000000..2bfaba3 --- /dev/null +++ b/bastion/src/cli/src/commands/recheck.ts @@ -0,0 +1,94 @@ +// CLI command: provision recheck +// SSH into all installed machines, collect hardware info, update bastion state. + +import type { Command } from "commander"; +import { sshExec } from "@lab/modules"; +import { getLabdClient } from "../api/config.js"; + +const BOLD = "\x1b[1m"; +const GREEN = "\x1b[0;32m"; +const RED = "\x1b[0;31m"; +const DIM = "\x1b[2m"; +const RESET = "\x1b[0m"; + +const SSH_OPTS = { timeoutMs: 30_000 }; + +// Shell script that collects hardware info as JSON. +// Kept simple — no Python, pure shell + awk. +const HW_COLLECT_SCRIPT = [ + 'P=$(cat /sys/class/dmi/id/product_name 2>/dev/null || echo unknown)', + 'B=$(cat /sys/class/dmi/id/board_name 2>/dev/null || echo unknown)', + 'S=$(cat /sys/class/dmi/id/product_serial 2>/dev/null || echo unknown)', + 'M=$(cat /sys/class/dmi/id/sys_vendor 2>/dev/null || echo unknown)', + 'C=$(grep -m1 "model name" /proc/cpuinfo 2>/dev/null | cut -d: -f2 | sed "s/^ //" || grep -m1 Model /proc/cpuinfo 2>/dev/null | cut -d: -f2 | sed "s/^ //" || echo unknown)', + 'N=$(grep -c "^processor" /proc/cpuinfo 2>/dev/null || echo 0)', + 'R=$(awk "/MemTotal/ {printf \\"%d\\", \\$2/1024/1024}" /proc/meminfo 2>/dev/null || echo 0)', + 'A=$(uname -m)', + 'printf \'{"product":"%s","board":"%s","serial":"%s","manufacturer":"%s","cpu_model":"%s","cpu_cores":%s,"memory_gb":%s,"arch":"%s"}\\n\' "$P" "$B" "$S" "$M" "$C" "$N" "$R" "$A"', +].join("; "); + +export function registerRecheckCommand(parent: Command): void { + parent + .command("recheck") + .description("Refresh hardware info for all installed machines via SSH") + .option("--user ", "SSH user", "root") + .option("--target ", "Only recheck a specific machine (by hostname or MAC)") + .action(async (opts: { user: string; target?: string }) => { + const client = getLabdClient(); + let state; + try { + state = await client.getMachines(); + } catch (err) { + console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`); + process.exit(1); + } + + // Build list of machines to check + const targets: Array<{ mac: string; hostname: string; ip: string }> = []; + for (const [mac, info] of Object.entries(state.installed)) { + if (!info.ip) continue; + if (opts.target && info.hostname !== opts.target && mac !== opts.target) continue; + targets.push({ mac, hostname: info.hostname, ip: info.ip }); + } + + if (targets.length === 0) { + console.log("No installed machines with IPs to check."); + return; + } + + console.log(`\n${BOLD}Rechecking ${targets.length} machine(s)...${RESET}\n`); + + let updated = 0; + let failed = 0; + + for (const { mac, hostname, ip } of targets) { + process.stdout.write(` ${hostname.padEnd(24)} ${DIM}(${ip})${RESET} `); + + try { + const t0 = Date.now(); + const result = await sshExec(ip, opts.user, HW_COLLECT_SCRIPT, SSH_OPTS); + const elapsed = Date.now() - t0; + if (result.exitCode !== 0) { + console.log(`${RED}SSH failed (exit ${result.exitCode}, ${elapsed}ms)${RESET}`); + if (result.stderr) console.log(` ${DIM}${result.stderr.substring(0, 200)}${RESET}`); + console.log(`${RED}SSH failed (exit ${result.exitCode})${RESET}`); + failed++; + continue; + } + + const hwData = JSON.parse(result.stdout.trim()); + await client.discoverMachine({ mac, ...hwData }); + const cpu = hwData.cpu_model || "?"; + const cores = hwData.cpu_cores || "?"; + const mem = hwData.memory_gb || "?"; + console.log(`${GREEN}OK${RESET} ${DIM}${cpu}, ${cores} cores, ${mem}GB${RESET}`); + updated++; + } catch (err) { + console.log(`${RED}FAIL${RESET} ${DIM}${err instanceof Error ? err.message : String(err)}${RESET}`); + failed++; + } + } + + console.log(`\n${BOLD}Done:${RESET} ${updated} updated, ${failed} failed\n`); + }); +} diff --git a/bastion/src/cli/src/index.ts b/bastion/src/cli/src/index.ts index 28e7db4..34ea728 100644 --- a/bastion/src/cli/src/index.ts +++ b/bastion/src/cli/src/index.ts @@ -20,6 +20,7 @@ import { registerRegisterCommand } from "./commands/register.js"; import { registerAsahiCommand } from "./commands/asahi.js"; import { registerLogsCommand } from "./commands/logs.js"; import { registerMakeIsoCommand } from "./commands/makeiso.js"; +import { registerRecheckCommand } from "./commands/recheck.js"; import { registerConfigCommand } from "./commands/config.js"; import { registerLoginCommand } from "./commands/login.js"; import { registerDoctorCommand } from "./commands/doctor.js"; @@ -104,6 +105,7 @@ export function createProgram(): Command { registerAsahiCommand(provisionCmd); registerLogsCommand(provisionCmd); registerMakeIsoCommand(provisionCmd); + registerRecheckCommand(provisionCmd); // config list/get/set/path registerConfigCommand(program); diff --git a/bastion/src/labd/src/routes/bastions.ts b/bastion/src/labd/src/routes/bastions.ts index f8cc7e9..1903534 100644 --- a/bastion/src/labd/src/routes/bastions.ts +++ b/bastion/src/labd/src/routes/bastions.ts @@ -260,6 +260,37 @@ export function registerBastionRoutes(app: FastifyInstance, db: DbClient): void } }); + // Update hardware info (discovery data) for a machine + app.post<{ + Body: { + mac?: string; product?: string; board?: string; serial?: string; + manufacturer?: string; cpu_model?: string; cpu_cores?: number; + memory_gb?: number; arch?: string; + disks?: Array<{ name: string; size_gb: number; model: string }>; + nics?: Array<{ name: string; mac: string; state: string }>; + }; + }>("/api/machines/discover", async (request, reply) => { + const data = request.body ?? {}; + const mac = (data.mac ?? "").toLowerCase().replace(/-/g, ":"); + if (!mac) { + return reply.code(400).send({ error: "mac is required" }); + } + + const bastion = bastionRegistry.findBastionByMac(mac); + const target = bastion ?? (bastionRegistry.getAll().length === 1 ? bastionRegistry.getAll()[0] : null); + + if (!target) { + return reply.code(503).send({ error: "No bastion found for this MAC" }); + } + + try { + const result = await sendCommand(target.bastionId, { type: "command-discover", ...data, mac }); + return reply.code(result.status === "ok" ? 200 : 500).send(result); + } catch (err) { + return reply.code(500).send({ error: err instanceof Error ? err.message : String(err) }); + } + }); + // Update role app.post<{ Body: { mac?: string; role?: string }; diff --git a/bastion/src/modules/modules/k3s/src/operations/k3s-install.ts b/bastion/src/modules/modules/k3s/src/operations/k3s-install.ts index c2bb6e9..2c274c7 100644 --- a/bastion/src/modules/modules/k3s/src/operations/k3s-install.ts +++ b/bastion/src/modules/modules/k3s/src/operations/k3s-install.ts @@ -15,6 +15,15 @@ export const installK3sBinary: Operation = async (ctx): Promise const alreadyInstalled = version.exitCode === 0; if (isServer) { + // Clean stale server state when joining an existing cluster + // (TLS certs from a previous run cause "newer than datastore" fatal error) + if (ctx.config.k3sServerUrl && ctx.config.k3sToken) { + await ctx.ssh.exec( + "rm -rf /var/lib/rancher/k3s/server/tls /var/lib/rancher/k3s/server/cred /var/lib/rancher/k3s/server/db", + sshOpts(ctx), + ); + } + // If joining an existing cluster, pass K3S_URL and K3S_TOKEN const joinEnv = ctx.config.k3sServerUrl && ctx.config.k3sToken ? `K3S_URL="${ctx.config.k3sServerUrl}" K3S_TOKEN="${ctx.config.k3sToken}"` diff --git a/bastion/src/shared/src/protocol/index.ts b/bastion/src/shared/src/protocol/index.ts index 37b4a3f..d7e3018 100644 --- a/bastion/src/shared/src/protocol/index.ts +++ b/bastion/src/shared/src/protocol/index.ts @@ -113,6 +113,7 @@ export type LabdBastionMessage = | { type: "command-role-update"; requestId: string; mac: string; role: string } | { type: "command-debug"; requestId: string; mac: string; pxeBoot?: boolean } | { type: "command-register"; requestId: string; mac: string; hostname: string; role: string; ip: string } + | { type: "command-discover"; requestId: string; mac: string; product?: string; board?: string; serial?: string; manufacturer?: string; cpu_model?: string; cpu_cores?: number; memory_gb?: number; arch?: string; disks?: Array<{ name: string; size_gb: number; model: string }>; nics?: Array<{ name: string; mac: string; state: string }> } | { type: "server-shutdown"; reconnectAfter: number }; export type BastionMessageType = BastionMessage["type"]; @@ -127,7 +128,7 @@ const BASTION_MESSAGE_TYPES = new Set([ const LABD_BASTION_MESSAGE_TYPES = new Set([ "bastion-enrolled", "bastion-heartbeat-ack", "command-install", - "command-forget", "command-role-update", "command-debug", "command-register", "server-shutdown", + "command-forget", "command-role-update", "command-debug", "command-register", "command-discover", "server-shutdown", ]); export function isBastionMessage(msg: unknown): msg is BastionMessage { diff --git a/bastion/src/shared/src/types/state.ts b/bastion/src/shared/src/types/state.ts index 689f09a..b8599fd 100644 --- a/bastion/src/shared/src/types/state.ts +++ b/bastion/src/shared/src/types/state.ts @@ -96,6 +96,13 @@ export interface InstalledInfo { ip: string; installed_at: string; bastionId?: string; // set when aggregated through labd + // Hardware info (copied from discovered on install completion) + product?: string; + manufacturer?: string; + cpu_model?: string; + cpu_cores?: number; + memory_gb?: number; + arch?: string; } export interface DebugConfig { -- 2.49.1