Merge pull request 'feat: provision recheck, hardware info preservation, ISO boot fixes' (#11) from feat/recheck-and-fixes into main
Some checks failed
Some checks failed
Reviewed-on: #11
This commit was merged in pull request #11.
This commit is contained in:
@@ -82,6 +82,9 @@ _labctl() {
|
||||
"provision makeiso")
|
||||
COMPREPLY=($(compgen -W "--arch --local --out -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"provision recheck")
|
||||
COMPREPLY=($(compgen -W "--user --target -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"config list")
|
||||
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
|
||||
return ;;
|
||||
@@ -107,7 +110,7 @@ _labctl() {
|
||||
COMPREPLY=($(compgen -W "bastion -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"provision")
|
||||
COMPREPLY=($(compgen -W "list install reprovision debug forget register asahi logs makeiso -h --help" -- "$cur"))
|
||||
COMPREPLY=($(compgen -W "list install reprovision debug forget register asahi logs makeiso recheck -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"config")
|
||||
COMPREPLY=($(compgen -W "list get set path -h --help" -- "$cur"))
|
||||
|
||||
@@ -128,6 +128,7 @@ complete -c labctl -n "__labctl_using_cmd provision" -a register -d 'Register an
|
||||
complete -c labctl -n "__labctl_using_cmd provision" -a asahi -d 'Show instructions to provision an Apple Silicon Mac with Asahi Linux'
|
||||
complete -c labctl -n "__labctl_using_cmd provision" -a logs -d 'Show provisioning logs for a machine (hostname, MAC, or IP)'
|
||||
complete -c labctl -n "__labctl_using_cmd provision" -a makeiso -d 'Generate a UEFI-bootable iPXE ISO for network provisioning'
|
||||
complete -c labctl -n "__labctl_using_cmd provision" -a recheck -d 'Refresh hardware info for all installed machines via SSH'
|
||||
|
||||
# provision install options
|
||||
complete -c labctl -n "__labctl_in_cmd provision install" -l role -d 'Machine role (see below)' -xa 'vanilla worker infra labcontroller'
|
||||
@@ -154,6 +155,10 @@ complete -c labctl -n "__labctl_in_cmd provision makeiso" -l arch -d 'Target arc
|
||||
complete -c labctl -n "__labctl_in_cmd provision makeiso" -l local -d 'Build ISO locally instead of using bastion-hosted URL'
|
||||
complete -c labctl -n "__labctl_in_cmd provision makeiso" -l out -d 'Output path for local ISO build' -x
|
||||
|
||||
# provision recheck options
|
||||
complete -c labctl -n "__labctl_in_cmd provision recheck" -l user -d 'SSH user' -x
|
||||
complete -c labctl -n "__labctl_in_cmd provision recheck" -l target -d 'Only recheck a specific machine (by hostname or MAC)' -x
|
||||
|
||||
# config subcommands
|
||||
complete -c labctl -n "__labctl_using_cmd config" -a list -d 'Show all configuration values'
|
||||
complete -c labctl -n "__labctl_using_cmd config" -a get -d 'Get a configuration value'
|
||||
|
||||
@@ -99,16 +99,22 @@ if [ "$PUSH" = true ]; then
|
||||
fi
|
||||
fi
|
||||
|
||||
# Use --tls-verify=false for plain HTTP registries (e.g. 10.0.0.194:3012)
|
||||
TLS_FLAG=""
|
||||
if [[ "$REGISTRY" =~ ^[0-9] ]] || [[ "$REGISTRY" =~ ^localhost ]]; then
|
||||
TLS_FLAG="--tls-verify=false"
|
||||
fi
|
||||
|
||||
echo "==> Logging in to $REGISTRY..."
|
||||
podman login -u michal -p "$GITEA_TOKEN" "$REGISTRY"
|
||||
podman login $TLS_FLAG -u michal -p "$GITEA_TOKEN" "$REGISTRY"
|
||||
|
||||
echo "==> Pushing $FULL_IMAGE:$TAG..."
|
||||
podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:$TAG"
|
||||
podman manifest push --all $TLS_FLAG "$MANIFEST" "docker://$FULL_IMAGE:$TAG"
|
||||
|
||||
# Also tag as :latest if not already
|
||||
if [ "$TAG" != "latest" ]; then
|
||||
echo "==> Also pushing as :latest..."
|
||||
podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:latest"
|
||||
podman manifest push --all $TLS_FLAG "$MANIFEST" "docker://$FULL_IMAGE:latest"
|
||||
fi
|
||||
|
||||
# Link package to repository if script exists
|
||||
|
||||
@@ -92,15 +92,21 @@ if [ "$PUSH" = true ]; then
|
||||
fi
|
||||
fi
|
||||
|
||||
# Use --tls-verify=false for plain HTTP registries (e.g. 10.0.0.194:3012)
|
||||
TLS_FLAG=""
|
||||
if [[ "$REGISTRY" =~ ^[0-9] ]] || [[ "$REGISTRY" =~ ^localhost ]]; then
|
||||
TLS_FLAG="--tls-verify=false"
|
||||
fi
|
||||
|
||||
echo "==> Logging in to $REGISTRY..."
|
||||
podman login -u michal -p "$GITEA_TOKEN" "$REGISTRY"
|
||||
podman login $TLS_FLAG -u michal -p "$GITEA_TOKEN" "$REGISTRY"
|
||||
|
||||
echo "==> Pushing $FULL_IMAGE:$TAG..."
|
||||
podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:$TAG"
|
||||
podman manifest push --all $TLS_FLAG "$MANIFEST" "docker://$FULL_IMAGE:$TAG"
|
||||
|
||||
if [ "$TAG" != "latest" ]; then
|
||||
echo "==> Also pushing as :latest..."
|
||||
podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:latest"
|
||||
podman manifest push --all $TLS_FLAG "$MANIFEST" "docker://$FULL_IMAGE:latest"
|
||||
fi
|
||||
|
||||
if [ -f "$SCRIPT_DIR/link-package.sh" ]; then
|
||||
|
||||
131
bastion/scripts/fix-ssh-root.sh
Normal file
131
bastion/scripts/fix-ssh-root.sh
Normal file
@@ -0,0 +1,131 @@
|
||||
#!/bin/bash
|
||||
# Fix root SSH access on all provisioned machines.
|
||||
# Tries root, lab, michal users to find one that works,
|
||||
# then ensures root has the SSH key and PermitRootLogin is enabled.
|
||||
set -euo pipefail
|
||||
|
||||
SSH_KEY="ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQDMJ3FkUGbG174eoO5RjZd2eNV680FM5pgp0AgpW/QwlJExK3qxMk0DJSr4ICmzGUx4yujAXcrqU1otcOMPzzFzwc5heWpSmlNHU3TIW6NHEt0sF9ZTAbGLw2zSw3si5UouqFkCcENA40mePFJqY+Q9R8N1uvLgu4m/do+Zrn/mk5Ewc1V7OCRE5Acrnaec4T7LTB0BuVXcjPUfAmZ0q5fI+bKPR1q2Kc3+IeGhVkBuZ9OJVeXXhnpedm0uEbLeriK/jUYKYw/1QhsNDM8Tyty+UIGr9QVnWwzCMHB+wuQcDYC9mPGTqg0fYwX8Mp8xMi1PPxdsh1G7bj/cpWMAF43KswWORF2ul8ICGbaE1zEgIYXO790SuBjpBHhaC6Iegqi58hmCuP+a9893q/EU9HyrWTJHCZXC5E4kP1MsM57KrhEpszM6I3sW9f9zMTPd5QsCXFi4si4OMwX4kYNVu3fQGQPpseDPlTTSrT6uUdqj4Irm0c1m9cYTmK0vYgsM3ss= michal@fedora"
|
||||
|
||||
SSH_OPTS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR -o ConnectTimeout=5"
|
||||
USERS_TO_TRY=(root lab michal)
|
||||
|
||||
# Machines: hostname ip
|
||||
MACHINES=(
|
||||
"labmaster 192.168.8.11"
|
||||
"worker0-k8s0 192.168.8.23"
|
||||
"worker1-k8s0 192.168.8.13"
|
||||
"worker2-k8s0 192.168.8.25"
|
||||
"spark-2935 192.168.8.12"
|
||||
)
|
||||
|
||||
BOLD="\033[1m"
|
||||
GREEN="\033[0;32m"
|
||||
RED="\033[0;31m"
|
||||
DIM="\033[2m"
|
||||
RESET="\033[0m"
|
||||
|
||||
# Script to run on each machine (via sudo if needed)
|
||||
read -r -d '' FIX_SCRIPT << 'FIXEOF' || true
|
||||
#!/bin/bash
|
||||
set -e
|
||||
KEY="$1"
|
||||
|
||||
# 1. Ensure root .ssh dir exists
|
||||
mkdir -p /root/.ssh
|
||||
chmod 700 /root/.ssh
|
||||
touch /root/.ssh/authorized_keys
|
||||
chmod 600 /root/.ssh/authorized_keys
|
||||
|
||||
# 2. Add key if not present
|
||||
if ! grep -qF "$KEY" /root/.ssh/authorized_keys 2>/dev/null; then
|
||||
echo "$KEY" >> /root/.ssh/authorized_keys
|
||||
echo "KEY_ADDED"
|
||||
else
|
||||
echo "KEY_EXISTS"
|
||||
fi
|
||||
|
||||
# 3. Fix sshd_config for root login with keys
|
||||
SSHD_CONF="/etc/ssh/sshd_config"
|
||||
CHANGED=0
|
||||
|
||||
# Ensure PermitRootLogin allows key auth
|
||||
CURRENT=$(grep -E "^PermitRootLogin" "$SSHD_CONF" 2>/dev/null | tail -1 || true)
|
||||
if [ "$CURRENT" = "PermitRootLogin prohibit-password" ] || [ "$CURRENT" = "PermitRootLogin without-password" ]; then
|
||||
echo "SSHD_OK"
|
||||
elif [ "$CURRENT" = "PermitRootLogin yes" ]; then
|
||||
echo "SSHD_OK"
|
||||
else
|
||||
# Remove any existing PermitRootLogin lines
|
||||
sed -i '/^#*PermitRootLogin/d' "$SSHD_CONF"
|
||||
echo "PermitRootLogin prohibit-password" >> "$SSHD_CONF"
|
||||
CHANGED=1
|
||||
echo "SSHD_FIXED"
|
||||
fi
|
||||
|
||||
# Ensure PubkeyAuthentication is enabled
|
||||
if grep -qE "^PubkeyAuthentication no" "$SSHD_CONF" 2>/dev/null; then
|
||||
sed -i 's/^PubkeyAuthentication no/PubkeyAuthentication yes/' "$SSHD_CONF"
|
||||
CHANGED=1
|
||||
echo "PUBKEY_FIXED"
|
||||
else
|
||||
echo "PUBKEY_OK"
|
||||
fi
|
||||
|
||||
# Restart sshd if changed
|
||||
if [ "$CHANGED" -eq 1 ]; then
|
||||
systemctl restart sshd 2>/dev/null || systemctl restart ssh 2>/dev/null || true
|
||||
echo "SSHD_RESTARTED"
|
||||
fi
|
||||
|
||||
# 4. Verify root can be reached
|
||||
echo "DONE"
|
||||
FIXEOF
|
||||
|
||||
echo ""
|
||||
echo -e "${BOLD}Fixing root SSH access on all machines...${RESET}"
|
||||
echo ""
|
||||
|
||||
for entry in "${MACHINES[@]}"; do
|
||||
read -r hostname ip <<< "$entry"
|
||||
printf " %-24s ${DIM}(%s)${RESET} " "$hostname" "$ip"
|
||||
|
||||
# Try each user until one works
|
||||
WORKING_USER=""
|
||||
for user in "${USERS_TO_TRY[@]}"; do
|
||||
if ssh $SSH_OPTS "$user@$ip" "true" 2>/dev/null; then
|
||||
WORKING_USER="$user"
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [ -z "$WORKING_USER" ]; then
|
||||
echo -e "${RED}UNREACHABLE${RESET} (tried: ${USERS_TO_TRY[*]})"
|
||||
continue
|
||||
fi
|
||||
|
||||
# Run fix script (with sudo if not root)
|
||||
if [ "$WORKING_USER" = "root" ]; then
|
||||
RESULT=$(ssh $SSH_OPTS "root@$ip" "bash -s -- '$SSH_KEY'" <<< "$FIX_SCRIPT" 2>&1)
|
||||
else
|
||||
RESULT=$(ssh $SSH_OPTS "$WORKING_USER@$ip" "sudo bash -s -- '$SSH_KEY'" <<< "$FIX_SCRIPT" 2>&1)
|
||||
fi
|
||||
|
||||
# Parse result
|
||||
DETAILS=""
|
||||
if echo "$RESULT" | grep -q "KEY_ADDED"; then DETAILS="key added"; fi
|
||||
if echo "$RESULT" | grep -q "KEY_EXISTS"; then DETAILS="key ok"; fi
|
||||
if echo "$RESULT" | grep -q "SSHD_FIXED"; then DETAILS="$DETAILS, sshd fixed"; fi
|
||||
if echo "$RESULT" | grep -q "SSHD_OK"; then DETAILS="$DETAILS, sshd ok"; fi
|
||||
if echo "$RESULT" | grep -q "SSHD_RESTARTED"; then DETAILS="$DETAILS, restarted"; fi
|
||||
|
||||
# Verify root works now
|
||||
if ssh $SSH_OPTS "root@$ip" "true" 2>/dev/null; then
|
||||
echo -e "${GREEN}OK${RESET} ${DIM}(via $WORKING_USER: $DETAILS)${RESET}"
|
||||
else
|
||||
echo -e "${RED}PARTIAL${RESET} ${DIM}(via $WORKING_USER: $DETAILS -- root still blocked)${RESET}"
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo -e "${BOLD}Done.${RESET} Verify: labctl provision recheck --user root"
|
||||
echo ""
|
||||
@@ -309,6 +309,32 @@ export async function startBastion(overrides: Partial<BastionConfig> = {}): Prom
|
||||
return { status: "ok", data: { mac, hostname: msg.hostname } };
|
||||
});
|
||||
|
||||
labdConn.onCommand("command-discover", async (msg) => {
|
||||
if (msg.type !== "command-discover") throw new Error("unexpected");
|
||||
const mac = (msg.mac as string).toLowerCase();
|
||||
const now = new Date().toISOString();
|
||||
const existing = state.load().discovered[mac];
|
||||
state.update((s) => {
|
||||
s.discovered[mac] = {
|
||||
mac,
|
||||
product: (msg.product as string) ?? "unknown",
|
||||
board: (msg.board as string) ?? "unknown",
|
||||
serial: (msg.serial as string) ?? "unknown",
|
||||
manufacturer: (msg.manufacturer as string) ?? "unknown",
|
||||
cpu_model: (msg.cpu_model as string) ?? "unknown",
|
||||
cpu_cores: (msg.cpu_cores as number) ?? 0,
|
||||
memory_gb: (msg.memory_gb as number) ?? 0,
|
||||
arch: (msg.arch as string) ?? "unknown",
|
||||
disks: (msg.disks as Array<{ name: string; size_gb: number; model: string }>) ?? [],
|
||||
nics: (msg.nics as Array<{ name: string; mac: string; state: string }>) ?? [],
|
||||
first_seen: existing?.first_seen ?? now,
|
||||
last_seen: now,
|
||||
};
|
||||
});
|
||||
logger.info(`HARDWARE UPDATED: ${mac} -- ${msg.manufacturer ?? "?"} ${msg.product ?? "?"} (${msg.cpu_model ?? "?"}, ${msg.cpu_cores ?? "?"} cores, ${msg.memory_gb ?? "?"}GB RAM)`);
|
||||
return { status: "ok", data: { mac } };
|
||||
});
|
||||
|
||||
labdConn.onCommand("command-role-update", async (msg) => {
|
||||
if (msg.type !== "command-role-update") throw new Error("unexpected");
|
||||
const mac = msg.mac.toLowerCase();
|
||||
|
||||
@@ -139,12 +139,22 @@ export function registerApiRoutes(
|
||||
? detailStr.replace("ready at ", "").trim()
|
||||
: "";
|
||||
|
||||
const hw = s.discovered[mac];
|
||||
const installedInfo: InstalledInfo = {
|
||||
hostname: cfg?.hostname ?? "?",
|
||||
role: cfg?.role ?? "?",
|
||||
...(cfg?.os !== undefined ? { os: cfg.os } : {}),
|
||||
ip,
|
||||
installed_at: new Date().toISOString(),
|
||||
// Preserve hardware info from discovery
|
||||
...(hw ? {
|
||||
product: hw.product,
|
||||
manufacturer: hw.manufacturer,
|
||||
cpu_model: hw.cpu_model,
|
||||
cpu_cores: hw.cpu_cores,
|
||||
memory_gb: hw.memory_gb,
|
||||
arch: hw.arch,
|
||||
} : {}),
|
||||
};
|
||||
s.installed[mac] = installedInfo;
|
||||
|
||||
|
||||
@@ -83,6 +83,20 @@ case "$STATE" in
|
||||
echo "ERROR: Failed to download install kickstart"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Run any %pre scripts from the downloaded kickstart.
|
||||
# Anaconda only runs %pre from the top-level file, not from %include'd files.
|
||||
python3 -c "
|
||||
import re, subprocess
|
||||
content = open('/tmp/dynamic.ks').read()
|
||||
blocks = re.findall(r'%pre[^\\n]*\\n(.*?)%end', content, re.DOTALL)
|
||||
for i, script in enumerate(blocks):
|
||||
path = f'/tmp/inner-pre-{i}.sh'
|
||||
with open(path, 'w') as f:
|
||||
f.write(script)
|
||||
print(f'Running inner %pre script {i} ({len(script.splitlines())} lines)')
|
||||
subprocess.run(['bash', path], check=False)
|
||||
"
|
||||
;;
|
||||
|
||||
debug)
|
||||
|
||||
@@ -166,6 +166,7 @@ export class BastionConnection {
|
||||
case "command-role-update":
|
||||
case "command-debug":
|
||||
case "command-register":
|
||||
case "command-discover":
|
||||
void this.handleCommand(msg);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -104,6 +104,16 @@ export class LabdClient {
|
||||
return this.request("POST", "/api/machines/debug", { body: { mac, pxeBoot: opts?.pxeBoot } });
|
||||
}
|
||||
|
||||
async discoverMachine(data: {
|
||||
mac: string; product?: string; board?: string; serial?: string;
|
||||
manufacturer?: string; cpu_model?: string; cpu_cores?: number;
|
||||
memory_gb?: number; arch?: string;
|
||||
disks?: Array<{ name: string; size_gb: number; model: string }>;
|
||||
nics?: Array<{ name: string; mac: string; state: string }>;
|
||||
}): Promise<{ status: string; error?: string }> {
|
||||
return this.request("POST", "/api/machines/discover", { body: data });
|
||||
}
|
||||
|
||||
async forgetMachine(mac: string): Promise<{ status: string }> {
|
||||
return this.request("DELETE", `/api/machines/${encodeURIComponent(mac)}`);
|
||||
}
|
||||
|
||||
@@ -69,10 +69,10 @@ export function registerListCommand(parent: Command): void {
|
||||
const hostname = inst?.hostname ?? queued?.hostname ?? "-";
|
||||
const role = inst?.role ?? queued?.role ?? "-";
|
||||
const ip = inst?.ip ?? "-";
|
||||
const cpu = hw?.cpu_model ?? "-";
|
||||
const cores = hw?.cpu_cores != null ? String(hw.cpu_cores) : "-";
|
||||
const ram = hw?.memory_gb != null ? `${hw.memory_gb}GB` : "-";
|
||||
const product = hw?.product ?? "-";
|
||||
const cpu = hw?.cpu_model ?? inst?.cpu_model ?? "-";
|
||||
const cores = (hw?.cpu_cores ?? inst?.cpu_cores) != null ? String(hw?.cpu_cores ?? inst?.cpu_cores) : "-";
|
||||
const ram = (hw?.memory_gb ?? inst?.memory_gb) != null ? `${hw?.memory_gb ?? inst?.memory_gb}GB` : "-";
|
||||
const product = hw?.product ?? inst?.product ?? "-";
|
||||
|
||||
const color = statusColor(status);
|
||||
|
||||
|
||||
94
bastion/src/cli/src/commands/recheck.ts
Normal file
94
bastion/src/cli/src/commands/recheck.ts
Normal file
@@ -0,0 +1,94 @@
|
||||
// CLI command: provision recheck
|
||||
// SSH into all installed machines, collect hardware info, update bastion state.
|
||||
|
||||
import type { Command } from "commander";
|
||||
import { sshExec } from "@lab/modules";
|
||||
import { getLabdClient } from "../api/config.js";
|
||||
|
||||
const BOLD = "\x1b[1m";
|
||||
const GREEN = "\x1b[0;32m";
|
||||
const RED = "\x1b[0;31m";
|
||||
const DIM = "\x1b[2m";
|
||||
const RESET = "\x1b[0m";
|
||||
|
||||
const SSH_OPTS = { timeoutMs: 30_000 };
|
||||
|
||||
// Shell script that collects hardware info as JSON.
|
||||
// Kept simple — no Python, pure shell + awk.
|
||||
const HW_COLLECT_SCRIPT = [
|
||||
'P=$(cat /sys/class/dmi/id/product_name 2>/dev/null || echo unknown)',
|
||||
'B=$(cat /sys/class/dmi/id/board_name 2>/dev/null || echo unknown)',
|
||||
'S=$(cat /sys/class/dmi/id/product_serial 2>/dev/null || echo unknown)',
|
||||
'M=$(cat /sys/class/dmi/id/sys_vendor 2>/dev/null || echo unknown)',
|
||||
'C=$(grep -m1 "model name" /proc/cpuinfo 2>/dev/null | cut -d: -f2 | sed "s/^ //" || grep -m1 Model /proc/cpuinfo 2>/dev/null | cut -d: -f2 | sed "s/^ //" || echo unknown)',
|
||||
'N=$(grep -c "^processor" /proc/cpuinfo 2>/dev/null || echo 0)',
|
||||
'R=$(awk "/MemTotal/ {printf \\"%d\\", \\$2/1024/1024}" /proc/meminfo 2>/dev/null || echo 0)',
|
||||
'A=$(uname -m)',
|
||||
'printf \'{"product":"%s","board":"%s","serial":"%s","manufacturer":"%s","cpu_model":"%s","cpu_cores":%s,"memory_gb":%s,"arch":"%s"}\\n\' "$P" "$B" "$S" "$M" "$C" "$N" "$R" "$A"',
|
||||
].join("; ");
|
||||
|
||||
export function registerRecheckCommand(parent: Command): void {
|
||||
parent
|
||||
.command("recheck")
|
||||
.description("Refresh hardware info for all installed machines via SSH")
|
||||
.option("--user <user>", "SSH user", "root")
|
||||
.option("--target <hostname>", "Only recheck a specific machine (by hostname or MAC)")
|
||||
.action(async (opts: { user: string; target?: string }) => {
|
||||
const client = getLabdClient();
|
||||
let state;
|
||||
try {
|
||||
state = await client.getMachines();
|
||||
} catch (err) {
|
||||
console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Build list of machines to check
|
||||
const targets: Array<{ mac: string; hostname: string; ip: string }> = [];
|
||||
for (const [mac, info] of Object.entries(state.installed)) {
|
||||
if (!info.ip) continue;
|
||||
if (opts.target && info.hostname !== opts.target && mac !== opts.target) continue;
|
||||
targets.push({ mac, hostname: info.hostname, ip: info.ip });
|
||||
}
|
||||
|
||||
if (targets.length === 0) {
|
||||
console.log("No installed machines with IPs to check.");
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`\n${BOLD}Rechecking ${targets.length} machine(s)...${RESET}\n`);
|
||||
|
||||
let updated = 0;
|
||||
let failed = 0;
|
||||
|
||||
for (const { mac, hostname, ip } of targets) {
|
||||
process.stdout.write(` ${hostname.padEnd(24)} ${DIM}(${ip})${RESET} `);
|
||||
|
||||
try {
|
||||
const t0 = Date.now();
|
||||
const result = await sshExec(ip, opts.user, HW_COLLECT_SCRIPT, SSH_OPTS);
|
||||
const elapsed = Date.now() - t0;
|
||||
if (result.exitCode !== 0) {
|
||||
console.log(`${RED}SSH failed (exit ${result.exitCode}, ${elapsed}ms)${RESET}`);
|
||||
if (result.stderr) console.log(` ${DIM}${result.stderr.substring(0, 200)}${RESET}`);
|
||||
console.log(`${RED}SSH failed (exit ${result.exitCode})${RESET}`);
|
||||
failed++;
|
||||
continue;
|
||||
}
|
||||
|
||||
const hwData = JSON.parse(result.stdout.trim());
|
||||
await client.discoverMachine({ mac, ...hwData });
|
||||
const cpu = hwData.cpu_model || "?";
|
||||
const cores = hwData.cpu_cores || "?";
|
||||
const mem = hwData.memory_gb || "?";
|
||||
console.log(`${GREEN}OK${RESET} ${DIM}${cpu}, ${cores} cores, ${mem}GB${RESET}`);
|
||||
updated++;
|
||||
} catch (err) {
|
||||
console.log(`${RED}FAIL${RESET} ${DIM}${err instanceof Error ? err.message : String(err)}${RESET}`);
|
||||
failed++;
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\n${BOLD}Done:${RESET} ${updated} updated, ${failed} failed\n`);
|
||||
});
|
||||
}
|
||||
@@ -20,6 +20,7 @@ import { registerRegisterCommand } from "./commands/register.js";
|
||||
import { registerAsahiCommand } from "./commands/asahi.js";
|
||||
import { registerLogsCommand } from "./commands/logs.js";
|
||||
import { registerMakeIsoCommand } from "./commands/makeiso.js";
|
||||
import { registerRecheckCommand } from "./commands/recheck.js";
|
||||
import { registerConfigCommand } from "./commands/config.js";
|
||||
import { registerLoginCommand } from "./commands/login.js";
|
||||
import { registerDoctorCommand } from "./commands/doctor.js";
|
||||
@@ -104,6 +105,7 @@ export function createProgram(): Command {
|
||||
registerAsahiCommand(provisionCmd);
|
||||
registerLogsCommand(provisionCmd);
|
||||
registerMakeIsoCommand(provisionCmd);
|
||||
registerRecheckCommand(provisionCmd);
|
||||
|
||||
// config list/get/set/path
|
||||
registerConfigCommand(program);
|
||||
|
||||
@@ -260,6 +260,37 @@ export function registerBastionRoutes(app: FastifyInstance, db: DbClient): void
|
||||
}
|
||||
});
|
||||
|
||||
// Update hardware info (discovery data) for a machine
|
||||
app.post<{
|
||||
Body: {
|
||||
mac?: string; product?: string; board?: string; serial?: string;
|
||||
manufacturer?: string; cpu_model?: string; cpu_cores?: number;
|
||||
memory_gb?: number; arch?: string;
|
||||
disks?: Array<{ name: string; size_gb: number; model: string }>;
|
||||
nics?: Array<{ name: string; mac: string; state: string }>;
|
||||
};
|
||||
}>("/api/machines/discover", async (request, reply) => {
|
||||
const data = request.body ?? {};
|
||||
const mac = (data.mac ?? "").toLowerCase().replace(/-/g, ":");
|
||||
if (!mac) {
|
||||
return reply.code(400).send({ error: "mac is required" });
|
||||
}
|
||||
|
||||
const bastion = bastionRegistry.findBastionByMac(mac);
|
||||
const target = bastion ?? (bastionRegistry.getAll().length === 1 ? bastionRegistry.getAll()[0] : null);
|
||||
|
||||
if (!target) {
|
||||
return reply.code(503).send({ error: "No bastion found for this MAC" });
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await sendCommand(target.bastionId, { type: "command-discover", ...data, mac });
|
||||
return reply.code(result.status === "ok" ? 200 : 500).send(result);
|
||||
} catch (err) {
|
||||
return reply.code(500).send({ error: err instanceof Error ? err.message : String(err) });
|
||||
}
|
||||
});
|
||||
|
||||
// Update role
|
||||
app.post<{
|
||||
Body: { mac?: string; role?: string };
|
||||
|
||||
@@ -15,6 +15,15 @@ export const installK3sBinary: Operation = async (ctx): Promise<OperationResult>
|
||||
const alreadyInstalled = version.exitCode === 0;
|
||||
|
||||
if (isServer) {
|
||||
// Clean stale server state when joining an existing cluster
|
||||
// (TLS certs from a previous run cause "newer than datastore" fatal error)
|
||||
if (ctx.config.k3sServerUrl && ctx.config.k3sToken) {
|
||||
await ctx.ssh.exec(
|
||||
"rm -rf /var/lib/rancher/k3s/server/tls /var/lib/rancher/k3s/server/cred /var/lib/rancher/k3s/server/db",
|
||||
sshOpts(ctx),
|
||||
);
|
||||
}
|
||||
|
||||
// If joining an existing cluster, pass K3S_URL and K3S_TOKEN
|
||||
const joinEnv = ctx.config.k3sServerUrl && ctx.config.k3sToken
|
||||
? `K3S_URL="${ctx.config.k3sServerUrl}" K3S_TOKEN="${ctx.config.k3sToken}"`
|
||||
|
||||
@@ -113,6 +113,7 @@ export type LabdBastionMessage =
|
||||
| { type: "command-role-update"; requestId: string; mac: string; role: string }
|
||||
| { type: "command-debug"; requestId: string; mac: string; pxeBoot?: boolean }
|
||||
| { type: "command-register"; requestId: string; mac: string; hostname: string; role: string; ip: string }
|
||||
| { type: "command-discover"; requestId: string; mac: string; product?: string; board?: string; serial?: string; manufacturer?: string; cpu_model?: string; cpu_cores?: number; memory_gb?: number; arch?: string; disks?: Array<{ name: string; size_gb: number; model: string }>; nics?: Array<{ name: string; mac: string; state: string }> }
|
||||
| { type: "server-shutdown"; reconnectAfter: number };
|
||||
|
||||
export type BastionMessageType = BastionMessage["type"];
|
||||
@@ -127,7 +128,7 @@ const BASTION_MESSAGE_TYPES = new Set<string>([
|
||||
|
||||
const LABD_BASTION_MESSAGE_TYPES = new Set<string>([
|
||||
"bastion-enrolled", "bastion-heartbeat-ack", "command-install",
|
||||
"command-forget", "command-role-update", "command-debug", "command-register", "server-shutdown",
|
||||
"command-forget", "command-role-update", "command-debug", "command-register", "command-discover", "server-shutdown",
|
||||
]);
|
||||
|
||||
export function isBastionMessage(msg: unknown): msg is BastionMessage {
|
||||
|
||||
@@ -96,6 +96,13 @@ export interface InstalledInfo {
|
||||
ip: string;
|
||||
installed_at: string;
|
||||
bastionId?: string; // set when aggregated through labd
|
||||
// Hardware info (copied from discovered on install completion)
|
||||
product?: string;
|
||||
manufacturer?: string;
|
||||
cpu_model?: string;
|
||||
cpu_cores?: number;
|
||||
memory_gb?: number;
|
||||
arch?: string;
|
||||
}
|
||||
|
||||
export interface DebugConfig {
|
||||
|
||||
Reference in New Issue
Block a user