feat: provision recheck, hardware info preservation, ISO boot fixes #11
@@ -82,6 +82,9 @@ _labctl() {
|
|||||||
"provision makeiso")
|
"provision makeiso")
|
||||||
COMPREPLY=($(compgen -W "--arch --local --out -h --help" -- "$cur"))
|
COMPREPLY=($(compgen -W "--arch --local --out -h --help" -- "$cur"))
|
||||||
return ;;
|
return ;;
|
||||||
|
"provision recheck")
|
||||||
|
COMPREPLY=($(compgen -W "--user --target -h --help" -- "$cur"))
|
||||||
|
return ;;
|
||||||
"config list")
|
"config list")
|
||||||
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
|
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
|
||||||
return ;;
|
return ;;
|
||||||
@@ -107,7 +110,7 @@ _labctl() {
|
|||||||
COMPREPLY=($(compgen -W "bastion -h --help" -- "$cur"))
|
COMPREPLY=($(compgen -W "bastion -h --help" -- "$cur"))
|
||||||
return ;;
|
return ;;
|
||||||
"provision")
|
"provision")
|
||||||
COMPREPLY=($(compgen -W "list install reprovision debug forget register asahi logs makeiso -h --help" -- "$cur"))
|
COMPREPLY=($(compgen -W "list install reprovision debug forget register asahi logs makeiso recheck -h --help" -- "$cur"))
|
||||||
return ;;
|
return ;;
|
||||||
"config")
|
"config")
|
||||||
COMPREPLY=($(compgen -W "list get set path -h --help" -- "$cur"))
|
COMPREPLY=($(compgen -W "list get set path -h --help" -- "$cur"))
|
||||||
|
|||||||
@@ -128,6 +128,7 @@ complete -c labctl -n "__labctl_using_cmd provision" -a register -d 'Register an
|
|||||||
complete -c labctl -n "__labctl_using_cmd provision" -a asahi -d 'Show instructions to provision an Apple Silicon Mac with Asahi Linux'
|
complete -c labctl -n "__labctl_using_cmd provision" -a asahi -d 'Show instructions to provision an Apple Silicon Mac with Asahi Linux'
|
||||||
complete -c labctl -n "__labctl_using_cmd provision" -a logs -d 'Show provisioning logs for a machine (hostname, MAC, or IP)'
|
complete -c labctl -n "__labctl_using_cmd provision" -a logs -d 'Show provisioning logs for a machine (hostname, MAC, or IP)'
|
||||||
complete -c labctl -n "__labctl_using_cmd provision" -a makeiso -d 'Generate a UEFI-bootable iPXE ISO for network provisioning'
|
complete -c labctl -n "__labctl_using_cmd provision" -a makeiso -d 'Generate a UEFI-bootable iPXE ISO for network provisioning'
|
||||||
|
complete -c labctl -n "__labctl_using_cmd provision" -a recheck -d 'Refresh hardware info for all installed machines via SSH'
|
||||||
|
|
||||||
# provision install options
|
# provision install options
|
||||||
complete -c labctl -n "__labctl_in_cmd provision install" -l role -d 'Machine role (see below)' -xa 'vanilla worker infra labcontroller'
|
complete -c labctl -n "__labctl_in_cmd provision install" -l role -d 'Machine role (see below)' -xa 'vanilla worker infra labcontroller'
|
||||||
@@ -154,6 +155,10 @@ complete -c labctl -n "__labctl_in_cmd provision makeiso" -l arch -d 'Target arc
|
|||||||
complete -c labctl -n "__labctl_in_cmd provision makeiso" -l local -d 'Build ISO locally instead of using bastion-hosted URL'
|
complete -c labctl -n "__labctl_in_cmd provision makeiso" -l local -d 'Build ISO locally instead of using bastion-hosted URL'
|
||||||
complete -c labctl -n "__labctl_in_cmd provision makeiso" -l out -d 'Output path for local ISO build' -x
|
complete -c labctl -n "__labctl_in_cmd provision makeiso" -l out -d 'Output path for local ISO build' -x
|
||||||
|
|
||||||
|
# provision recheck options
|
||||||
|
complete -c labctl -n "__labctl_in_cmd provision recheck" -l user -d 'SSH user' -x
|
||||||
|
complete -c labctl -n "__labctl_in_cmd provision recheck" -l target -d 'Only recheck a specific machine (by hostname or MAC)' -x
|
||||||
|
|
||||||
# config subcommands
|
# config subcommands
|
||||||
complete -c labctl -n "__labctl_using_cmd config" -a list -d 'Show all configuration values'
|
complete -c labctl -n "__labctl_using_cmd config" -a list -d 'Show all configuration values'
|
||||||
complete -c labctl -n "__labctl_using_cmd config" -a get -d 'Get a configuration value'
|
complete -c labctl -n "__labctl_using_cmd config" -a get -d 'Get a configuration value'
|
||||||
|
|||||||
@@ -99,16 +99,22 @@ if [ "$PUSH" = true ]; then
|
|||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Use --tls-verify=false for plain HTTP registries (e.g. 10.0.0.194:3012)
|
||||||
|
TLS_FLAG=""
|
||||||
|
if [[ "$REGISTRY" =~ ^[0-9] ]] || [[ "$REGISTRY" =~ ^localhost ]]; then
|
||||||
|
TLS_FLAG="--tls-verify=false"
|
||||||
|
fi
|
||||||
|
|
||||||
echo "==> Logging in to $REGISTRY..."
|
echo "==> Logging in to $REGISTRY..."
|
||||||
podman login -u michal -p "$GITEA_TOKEN" "$REGISTRY"
|
podman login $TLS_FLAG -u michal -p "$GITEA_TOKEN" "$REGISTRY"
|
||||||
|
|
||||||
echo "==> Pushing $FULL_IMAGE:$TAG..."
|
echo "==> Pushing $FULL_IMAGE:$TAG..."
|
||||||
podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:$TAG"
|
podman manifest push --all $TLS_FLAG "$MANIFEST" "docker://$FULL_IMAGE:$TAG"
|
||||||
|
|
||||||
# Also tag as :latest if not already
|
# Also tag as :latest if not already
|
||||||
if [ "$TAG" != "latest" ]; then
|
if [ "$TAG" != "latest" ]; then
|
||||||
echo "==> Also pushing as :latest..."
|
echo "==> Also pushing as :latest..."
|
||||||
podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:latest"
|
podman manifest push --all $TLS_FLAG "$MANIFEST" "docker://$FULL_IMAGE:latest"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Link package to repository if script exists
|
# Link package to repository if script exists
|
||||||
|
|||||||
@@ -92,15 +92,21 @@ if [ "$PUSH" = true ]; then
|
|||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Use --tls-verify=false for plain HTTP registries (e.g. 10.0.0.194:3012)
|
||||||
|
TLS_FLAG=""
|
||||||
|
if [[ "$REGISTRY" =~ ^[0-9] ]] || [[ "$REGISTRY" =~ ^localhost ]]; then
|
||||||
|
TLS_FLAG="--tls-verify=false"
|
||||||
|
fi
|
||||||
|
|
||||||
echo "==> Logging in to $REGISTRY..."
|
echo "==> Logging in to $REGISTRY..."
|
||||||
podman login -u michal -p "$GITEA_TOKEN" "$REGISTRY"
|
podman login $TLS_FLAG -u michal -p "$GITEA_TOKEN" "$REGISTRY"
|
||||||
|
|
||||||
echo "==> Pushing $FULL_IMAGE:$TAG..."
|
echo "==> Pushing $FULL_IMAGE:$TAG..."
|
||||||
podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:$TAG"
|
podman manifest push --all $TLS_FLAG "$MANIFEST" "docker://$FULL_IMAGE:$TAG"
|
||||||
|
|
||||||
if [ "$TAG" != "latest" ]; then
|
if [ "$TAG" != "latest" ]; then
|
||||||
echo "==> Also pushing as :latest..."
|
echo "==> Also pushing as :latest..."
|
||||||
podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:latest"
|
podman manifest push --all $TLS_FLAG "$MANIFEST" "docker://$FULL_IMAGE:latest"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -f "$SCRIPT_DIR/link-package.sh" ]; then
|
if [ -f "$SCRIPT_DIR/link-package.sh" ]; then
|
||||||
|
|||||||
131
bastion/scripts/fix-ssh-root.sh
Normal file
131
bastion/scripts/fix-ssh-root.sh
Normal file
@@ -0,0 +1,131 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Fix root SSH access on all provisioned machines.
|
||||||
|
# Tries root, lab, michal users to find one that works,
|
||||||
|
# then ensures root has the SSH key and PermitRootLogin is enabled.
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SSH_KEY="ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQDMJ3FkUGbG174eoO5RjZd2eNV680FM5pgp0AgpW/QwlJExK3qxMk0DJSr4ICmzGUx4yujAXcrqU1otcOMPzzFzwc5heWpSmlNHU3TIW6NHEt0sF9ZTAbGLw2zSw3si5UouqFkCcENA40mePFJqY+Q9R8N1uvLgu4m/do+Zrn/mk5Ewc1V7OCRE5Acrnaec4T7LTB0BuVXcjPUfAmZ0q5fI+bKPR1q2Kc3+IeGhVkBuZ9OJVeXXhnpedm0uEbLeriK/jUYKYw/1QhsNDM8Tyty+UIGr9QVnWwzCMHB+wuQcDYC9mPGTqg0fYwX8Mp8xMi1PPxdsh1G7bj/cpWMAF43KswWORF2ul8ICGbaE1zEgIYXO790SuBjpBHhaC6Iegqi58hmCuP+a9893q/EU9HyrWTJHCZXC5E4kP1MsM57KrhEpszM6I3sW9f9zMTPd5QsCXFi4si4OMwX4kYNVu3fQGQPpseDPlTTSrT6uUdqj4Irm0c1m9cYTmK0vYgsM3ss= michal@fedora"
|
||||||
|
|
||||||
|
SSH_OPTS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR -o ConnectTimeout=5"
|
||||||
|
USERS_TO_TRY=(root lab michal)
|
||||||
|
|
||||||
|
# Machines: hostname ip
|
||||||
|
MACHINES=(
|
||||||
|
"labmaster 192.168.8.11"
|
||||||
|
"worker0-k8s0 192.168.8.23"
|
||||||
|
"worker1-k8s0 192.168.8.13"
|
||||||
|
"worker2-k8s0 192.168.8.25"
|
||||||
|
"spark-2935 192.168.8.12"
|
||||||
|
)
|
||||||
|
|
||||||
|
BOLD="\033[1m"
|
||||||
|
GREEN="\033[0;32m"
|
||||||
|
RED="\033[0;31m"
|
||||||
|
DIM="\033[2m"
|
||||||
|
RESET="\033[0m"
|
||||||
|
|
||||||
|
# Script to run on each machine (via sudo if needed)
|
||||||
|
read -r -d '' FIX_SCRIPT << 'FIXEOF' || true
|
||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
KEY="$1"
|
||||||
|
|
||||||
|
# 1. Ensure root .ssh dir exists
|
||||||
|
mkdir -p /root/.ssh
|
||||||
|
chmod 700 /root/.ssh
|
||||||
|
touch /root/.ssh/authorized_keys
|
||||||
|
chmod 600 /root/.ssh/authorized_keys
|
||||||
|
|
||||||
|
# 2. Add key if not present
|
||||||
|
if ! grep -qF "$KEY" /root/.ssh/authorized_keys 2>/dev/null; then
|
||||||
|
echo "$KEY" >> /root/.ssh/authorized_keys
|
||||||
|
echo "KEY_ADDED"
|
||||||
|
else
|
||||||
|
echo "KEY_EXISTS"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 3. Fix sshd_config for root login with keys
|
||||||
|
SSHD_CONF="/etc/ssh/sshd_config"
|
||||||
|
CHANGED=0
|
||||||
|
|
||||||
|
# Ensure PermitRootLogin allows key auth
|
||||||
|
CURRENT=$(grep -E "^PermitRootLogin" "$SSHD_CONF" 2>/dev/null | tail -1 || true)
|
||||||
|
if [ "$CURRENT" = "PermitRootLogin prohibit-password" ] || [ "$CURRENT" = "PermitRootLogin without-password" ]; then
|
||||||
|
echo "SSHD_OK"
|
||||||
|
elif [ "$CURRENT" = "PermitRootLogin yes" ]; then
|
||||||
|
echo "SSHD_OK"
|
||||||
|
else
|
||||||
|
# Remove any existing PermitRootLogin lines
|
||||||
|
sed -i '/^#*PermitRootLogin/d' "$SSHD_CONF"
|
||||||
|
echo "PermitRootLogin prohibit-password" >> "$SSHD_CONF"
|
||||||
|
CHANGED=1
|
||||||
|
echo "SSHD_FIXED"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure PubkeyAuthentication is enabled
|
||||||
|
if grep -qE "^PubkeyAuthentication no" "$SSHD_CONF" 2>/dev/null; then
|
||||||
|
sed -i 's/^PubkeyAuthentication no/PubkeyAuthentication yes/' "$SSHD_CONF"
|
||||||
|
CHANGED=1
|
||||||
|
echo "PUBKEY_FIXED"
|
||||||
|
else
|
||||||
|
echo "PUBKEY_OK"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Restart sshd if changed
|
||||||
|
if [ "$CHANGED" -eq 1 ]; then
|
||||||
|
systemctl restart sshd 2>/dev/null || systemctl restart ssh 2>/dev/null || true
|
||||||
|
echo "SSHD_RESTARTED"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 4. Verify root can be reached
|
||||||
|
echo "DONE"
|
||||||
|
FIXEOF
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -e "${BOLD}Fixing root SSH access on all machines...${RESET}"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
for entry in "${MACHINES[@]}"; do
|
||||||
|
read -r hostname ip <<< "$entry"
|
||||||
|
printf " %-24s ${DIM}(%s)${RESET} " "$hostname" "$ip"
|
||||||
|
|
||||||
|
# Try each user until one works
|
||||||
|
WORKING_USER=""
|
||||||
|
for user in "${USERS_TO_TRY[@]}"; do
|
||||||
|
if ssh $SSH_OPTS "$user@$ip" "true" 2>/dev/null; then
|
||||||
|
WORKING_USER="$user"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ -z "$WORKING_USER" ]; then
|
||||||
|
echo -e "${RED}UNREACHABLE${RESET} (tried: ${USERS_TO_TRY[*]})"
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Run fix script (with sudo if not root)
|
||||||
|
if [ "$WORKING_USER" = "root" ]; then
|
||||||
|
RESULT=$(ssh $SSH_OPTS "root@$ip" "bash -s -- '$SSH_KEY'" <<< "$FIX_SCRIPT" 2>&1)
|
||||||
|
else
|
||||||
|
RESULT=$(ssh $SSH_OPTS "$WORKING_USER@$ip" "sudo bash -s -- '$SSH_KEY'" <<< "$FIX_SCRIPT" 2>&1)
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Parse result
|
||||||
|
DETAILS=""
|
||||||
|
if echo "$RESULT" | grep -q "KEY_ADDED"; then DETAILS="key added"; fi
|
||||||
|
if echo "$RESULT" | grep -q "KEY_EXISTS"; then DETAILS="key ok"; fi
|
||||||
|
if echo "$RESULT" | grep -q "SSHD_FIXED"; then DETAILS="$DETAILS, sshd fixed"; fi
|
||||||
|
if echo "$RESULT" | grep -q "SSHD_OK"; then DETAILS="$DETAILS, sshd ok"; fi
|
||||||
|
if echo "$RESULT" | grep -q "SSHD_RESTARTED"; then DETAILS="$DETAILS, restarted"; fi
|
||||||
|
|
||||||
|
# Verify root works now
|
||||||
|
if ssh $SSH_OPTS "root@$ip" "true" 2>/dev/null; then
|
||||||
|
echo -e "${GREEN}OK${RESET} ${DIM}(via $WORKING_USER: $DETAILS)${RESET}"
|
||||||
|
else
|
||||||
|
echo -e "${RED}PARTIAL${RESET} ${DIM}(via $WORKING_USER: $DETAILS -- root still blocked)${RESET}"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -e "${BOLD}Done.${RESET} Verify: labctl provision recheck --user root"
|
||||||
|
echo ""
|
||||||
@@ -309,6 +309,32 @@ export async function startBastion(overrides: Partial<BastionConfig> = {}): Prom
|
|||||||
return { status: "ok", data: { mac, hostname: msg.hostname } };
|
return { status: "ok", data: { mac, hostname: msg.hostname } };
|
||||||
});
|
});
|
||||||
|
|
||||||
|
labdConn.onCommand("command-discover", async (msg) => {
|
||||||
|
if (msg.type !== "command-discover") throw new Error("unexpected");
|
||||||
|
const mac = (msg.mac as string).toLowerCase();
|
||||||
|
const now = new Date().toISOString();
|
||||||
|
const existing = state.load().discovered[mac];
|
||||||
|
state.update((s) => {
|
||||||
|
s.discovered[mac] = {
|
||||||
|
mac,
|
||||||
|
product: (msg.product as string) ?? "unknown",
|
||||||
|
board: (msg.board as string) ?? "unknown",
|
||||||
|
serial: (msg.serial as string) ?? "unknown",
|
||||||
|
manufacturer: (msg.manufacturer as string) ?? "unknown",
|
||||||
|
cpu_model: (msg.cpu_model as string) ?? "unknown",
|
||||||
|
cpu_cores: (msg.cpu_cores as number) ?? 0,
|
||||||
|
memory_gb: (msg.memory_gb as number) ?? 0,
|
||||||
|
arch: (msg.arch as string) ?? "unknown",
|
||||||
|
disks: (msg.disks as Array<{ name: string; size_gb: number; model: string }>) ?? [],
|
||||||
|
nics: (msg.nics as Array<{ name: string; mac: string; state: string }>) ?? [],
|
||||||
|
first_seen: existing?.first_seen ?? now,
|
||||||
|
last_seen: now,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
logger.info(`HARDWARE UPDATED: ${mac} -- ${msg.manufacturer ?? "?"} ${msg.product ?? "?"} (${msg.cpu_model ?? "?"}, ${msg.cpu_cores ?? "?"} cores, ${msg.memory_gb ?? "?"}GB RAM)`);
|
||||||
|
return { status: "ok", data: { mac } };
|
||||||
|
});
|
||||||
|
|
||||||
labdConn.onCommand("command-role-update", async (msg) => {
|
labdConn.onCommand("command-role-update", async (msg) => {
|
||||||
if (msg.type !== "command-role-update") throw new Error("unexpected");
|
if (msg.type !== "command-role-update") throw new Error("unexpected");
|
||||||
const mac = msg.mac.toLowerCase();
|
const mac = msg.mac.toLowerCase();
|
||||||
|
|||||||
@@ -139,12 +139,22 @@ export function registerApiRoutes(
|
|||||||
? detailStr.replace("ready at ", "").trim()
|
? detailStr.replace("ready at ", "").trim()
|
||||||
: "";
|
: "";
|
||||||
|
|
||||||
|
const hw = s.discovered[mac];
|
||||||
const installedInfo: InstalledInfo = {
|
const installedInfo: InstalledInfo = {
|
||||||
hostname: cfg?.hostname ?? "?",
|
hostname: cfg?.hostname ?? "?",
|
||||||
role: cfg?.role ?? "?",
|
role: cfg?.role ?? "?",
|
||||||
...(cfg?.os !== undefined ? { os: cfg.os } : {}),
|
...(cfg?.os !== undefined ? { os: cfg.os } : {}),
|
||||||
ip,
|
ip,
|
||||||
installed_at: new Date().toISOString(),
|
installed_at: new Date().toISOString(),
|
||||||
|
// Preserve hardware info from discovery
|
||||||
|
...(hw ? {
|
||||||
|
product: hw.product,
|
||||||
|
manufacturer: hw.manufacturer,
|
||||||
|
cpu_model: hw.cpu_model,
|
||||||
|
cpu_cores: hw.cpu_cores,
|
||||||
|
memory_gb: hw.memory_gb,
|
||||||
|
arch: hw.arch,
|
||||||
|
} : {}),
|
||||||
};
|
};
|
||||||
s.installed[mac] = installedInfo;
|
s.installed[mac] = installedInfo;
|
||||||
|
|
||||||
|
|||||||
@@ -83,6 +83,20 @@ case "$STATE" in
|
|||||||
echo "ERROR: Failed to download install kickstart"
|
echo "ERROR: Failed to download install kickstart"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Run any %pre scripts from the downloaded kickstart.
|
||||||
|
# Anaconda only runs %pre from the top-level file, not from %include'd files.
|
||||||
|
python3 -c "
|
||||||
|
import re, subprocess
|
||||||
|
content = open('/tmp/dynamic.ks').read()
|
||||||
|
blocks = re.findall(r'%pre[^\\n]*\\n(.*?)%end', content, re.DOTALL)
|
||||||
|
for i, script in enumerate(blocks):
|
||||||
|
path = f'/tmp/inner-pre-{i}.sh'
|
||||||
|
with open(path, 'w') as f:
|
||||||
|
f.write(script)
|
||||||
|
print(f'Running inner %pre script {i} ({len(script.splitlines())} lines)')
|
||||||
|
subprocess.run(['bash', path], check=False)
|
||||||
|
"
|
||||||
;;
|
;;
|
||||||
|
|
||||||
debug)
|
debug)
|
||||||
|
|||||||
@@ -166,6 +166,7 @@ export class BastionConnection {
|
|||||||
case "command-role-update":
|
case "command-role-update":
|
||||||
case "command-debug":
|
case "command-debug":
|
||||||
case "command-register":
|
case "command-register":
|
||||||
|
case "command-discover":
|
||||||
void this.handleCommand(msg);
|
void this.handleCommand(msg);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -104,6 +104,16 @@ export class LabdClient {
|
|||||||
return this.request("POST", "/api/machines/debug", { body: { mac, pxeBoot: opts?.pxeBoot } });
|
return this.request("POST", "/api/machines/debug", { body: { mac, pxeBoot: opts?.pxeBoot } });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async discoverMachine(data: {
|
||||||
|
mac: string; product?: string; board?: string; serial?: string;
|
||||||
|
manufacturer?: string; cpu_model?: string; cpu_cores?: number;
|
||||||
|
memory_gb?: number; arch?: string;
|
||||||
|
disks?: Array<{ name: string; size_gb: number; model: string }>;
|
||||||
|
nics?: Array<{ name: string; mac: string; state: string }>;
|
||||||
|
}): Promise<{ status: string; error?: string }> {
|
||||||
|
return this.request("POST", "/api/machines/discover", { body: data });
|
||||||
|
}
|
||||||
|
|
||||||
async forgetMachine(mac: string): Promise<{ status: string }> {
|
async forgetMachine(mac: string): Promise<{ status: string }> {
|
||||||
return this.request("DELETE", `/api/machines/${encodeURIComponent(mac)}`);
|
return this.request("DELETE", `/api/machines/${encodeURIComponent(mac)}`);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -69,10 +69,10 @@ export function registerListCommand(parent: Command): void {
|
|||||||
const hostname = inst?.hostname ?? queued?.hostname ?? "-";
|
const hostname = inst?.hostname ?? queued?.hostname ?? "-";
|
||||||
const role = inst?.role ?? queued?.role ?? "-";
|
const role = inst?.role ?? queued?.role ?? "-";
|
||||||
const ip = inst?.ip ?? "-";
|
const ip = inst?.ip ?? "-";
|
||||||
const cpu = hw?.cpu_model ?? "-";
|
const cpu = hw?.cpu_model ?? inst?.cpu_model ?? "-";
|
||||||
const cores = hw?.cpu_cores != null ? String(hw.cpu_cores) : "-";
|
const cores = (hw?.cpu_cores ?? inst?.cpu_cores) != null ? String(hw?.cpu_cores ?? inst?.cpu_cores) : "-";
|
||||||
const ram = hw?.memory_gb != null ? `${hw.memory_gb}GB` : "-";
|
const ram = (hw?.memory_gb ?? inst?.memory_gb) != null ? `${hw?.memory_gb ?? inst?.memory_gb}GB` : "-";
|
||||||
const product = hw?.product ?? "-";
|
const product = hw?.product ?? inst?.product ?? "-";
|
||||||
|
|
||||||
const color = statusColor(status);
|
const color = statusColor(status);
|
||||||
|
|
||||||
|
|||||||
94
bastion/src/cli/src/commands/recheck.ts
Normal file
94
bastion/src/cli/src/commands/recheck.ts
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
// CLI command: provision recheck
|
||||||
|
// SSH into all installed machines, collect hardware info, update bastion state.
|
||||||
|
|
||||||
|
import type { Command } from "commander";
|
||||||
|
import { sshExec } from "@lab/modules";
|
||||||
|
import { getLabdClient } from "../api/config.js";
|
||||||
|
|
||||||
|
const BOLD = "\x1b[1m";
|
||||||
|
const GREEN = "\x1b[0;32m";
|
||||||
|
const RED = "\x1b[0;31m";
|
||||||
|
const DIM = "\x1b[2m";
|
||||||
|
const RESET = "\x1b[0m";
|
||||||
|
|
||||||
|
const SSH_OPTS = { timeoutMs: 30_000 };
|
||||||
|
|
||||||
|
// Shell script that collects hardware info as JSON.
|
||||||
|
// Kept simple — no Python, pure shell + awk.
|
||||||
|
const HW_COLLECT_SCRIPT = [
|
||||||
|
'P=$(cat /sys/class/dmi/id/product_name 2>/dev/null || echo unknown)',
|
||||||
|
'B=$(cat /sys/class/dmi/id/board_name 2>/dev/null || echo unknown)',
|
||||||
|
'S=$(cat /sys/class/dmi/id/product_serial 2>/dev/null || echo unknown)',
|
||||||
|
'M=$(cat /sys/class/dmi/id/sys_vendor 2>/dev/null || echo unknown)',
|
||||||
|
'C=$(grep -m1 "model name" /proc/cpuinfo 2>/dev/null | cut -d: -f2 | sed "s/^ //" || grep -m1 Model /proc/cpuinfo 2>/dev/null | cut -d: -f2 | sed "s/^ //" || echo unknown)',
|
||||||
|
'N=$(grep -c "^processor" /proc/cpuinfo 2>/dev/null || echo 0)',
|
||||||
|
'R=$(awk "/MemTotal/ {printf \\"%d\\", \\$2/1024/1024}" /proc/meminfo 2>/dev/null || echo 0)',
|
||||||
|
'A=$(uname -m)',
|
||||||
|
'printf \'{"product":"%s","board":"%s","serial":"%s","manufacturer":"%s","cpu_model":"%s","cpu_cores":%s,"memory_gb":%s,"arch":"%s"}\\n\' "$P" "$B" "$S" "$M" "$C" "$N" "$R" "$A"',
|
||||||
|
].join("; ");
|
||||||
|
|
||||||
|
export function registerRecheckCommand(parent: Command): void {
|
||||||
|
parent
|
||||||
|
.command("recheck")
|
||||||
|
.description("Refresh hardware info for all installed machines via SSH")
|
||||||
|
.option("--user <user>", "SSH user", "root")
|
||||||
|
.option("--target <hostname>", "Only recheck a specific machine (by hostname or MAC)")
|
||||||
|
.action(async (opts: { user: string; target?: string }) => {
|
||||||
|
const client = getLabdClient();
|
||||||
|
let state;
|
||||||
|
try {
|
||||||
|
state = await client.getMachines();
|
||||||
|
} catch (err) {
|
||||||
|
console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build list of machines to check
|
||||||
|
const targets: Array<{ mac: string; hostname: string; ip: string }> = [];
|
||||||
|
for (const [mac, info] of Object.entries(state.installed)) {
|
||||||
|
if (!info.ip) continue;
|
||||||
|
if (opts.target && info.hostname !== opts.target && mac !== opts.target) continue;
|
||||||
|
targets.push({ mac, hostname: info.hostname, ip: info.ip });
|
||||||
|
}
|
||||||
|
|
||||||
|
if (targets.length === 0) {
|
||||||
|
console.log("No installed machines with IPs to check.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`\n${BOLD}Rechecking ${targets.length} machine(s)...${RESET}\n`);
|
||||||
|
|
||||||
|
let updated = 0;
|
||||||
|
let failed = 0;
|
||||||
|
|
||||||
|
for (const { mac, hostname, ip } of targets) {
|
||||||
|
process.stdout.write(` ${hostname.padEnd(24)} ${DIM}(${ip})${RESET} `);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const t0 = Date.now();
|
||||||
|
const result = await sshExec(ip, opts.user, HW_COLLECT_SCRIPT, SSH_OPTS);
|
||||||
|
const elapsed = Date.now() - t0;
|
||||||
|
if (result.exitCode !== 0) {
|
||||||
|
console.log(`${RED}SSH failed (exit ${result.exitCode}, ${elapsed}ms)${RESET}`);
|
||||||
|
if (result.stderr) console.log(` ${DIM}${result.stderr.substring(0, 200)}${RESET}`);
|
||||||
|
console.log(`${RED}SSH failed (exit ${result.exitCode})${RESET}`);
|
||||||
|
failed++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const hwData = JSON.parse(result.stdout.trim());
|
||||||
|
await client.discoverMachine({ mac, ...hwData });
|
||||||
|
const cpu = hwData.cpu_model || "?";
|
||||||
|
const cores = hwData.cpu_cores || "?";
|
||||||
|
const mem = hwData.memory_gb || "?";
|
||||||
|
console.log(`${GREEN}OK${RESET} ${DIM}${cpu}, ${cores} cores, ${mem}GB${RESET}`);
|
||||||
|
updated++;
|
||||||
|
} catch (err) {
|
||||||
|
console.log(`${RED}FAIL${RESET} ${DIM}${err instanceof Error ? err.message : String(err)}${RESET}`);
|
||||||
|
failed++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`\n${BOLD}Done:${RESET} ${updated} updated, ${failed} failed\n`);
|
||||||
|
});
|
||||||
|
}
|
||||||
@@ -20,6 +20,7 @@ import { registerRegisterCommand } from "./commands/register.js";
|
|||||||
import { registerAsahiCommand } from "./commands/asahi.js";
|
import { registerAsahiCommand } from "./commands/asahi.js";
|
||||||
import { registerLogsCommand } from "./commands/logs.js";
|
import { registerLogsCommand } from "./commands/logs.js";
|
||||||
import { registerMakeIsoCommand } from "./commands/makeiso.js";
|
import { registerMakeIsoCommand } from "./commands/makeiso.js";
|
||||||
|
import { registerRecheckCommand } from "./commands/recheck.js";
|
||||||
import { registerConfigCommand } from "./commands/config.js";
|
import { registerConfigCommand } from "./commands/config.js";
|
||||||
import { registerLoginCommand } from "./commands/login.js";
|
import { registerLoginCommand } from "./commands/login.js";
|
||||||
import { registerDoctorCommand } from "./commands/doctor.js";
|
import { registerDoctorCommand } from "./commands/doctor.js";
|
||||||
@@ -104,6 +105,7 @@ export function createProgram(): Command {
|
|||||||
registerAsahiCommand(provisionCmd);
|
registerAsahiCommand(provisionCmd);
|
||||||
registerLogsCommand(provisionCmd);
|
registerLogsCommand(provisionCmd);
|
||||||
registerMakeIsoCommand(provisionCmd);
|
registerMakeIsoCommand(provisionCmd);
|
||||||
|
registerRecheckCommand(provisionCmd);
|
||||||
|
|
||||||
// config list/get/set/path
|
// config list/get/set/path
|
||||||
registerConfigCommand(program);
|
registerConfigCommand(program);
|
||||||
|
|||||||
@@ -260,6 +260,37 @@ export function registerBastionRoutes(app: FastifyInstance, db: DbClient): void
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Update hardware info (discovery data) for a machine
|
||||||
|
app.post<{
|
||||||
|
Body: {
|
||||||
|
mac?: string; product?: string; board?: string; serial?: string;
|
||||||
|
manufacturer?: string; cpu_model?: string; cpu_cores?: number;
|
||||||
|
memory_gb?: number; arch?: string;
|
||||||
|
disks?: Array<{ name: string; size_gb: number; model: string }>;
|
||||||
|
nics?: Array<{ name: string; mac: string; state: string }>;
|
||||||
|
};
|
||||||
|
}>("/api/machines/discover", async (request, reply) => {
|
||||||
|
const data = request.body ?? {};
|
||||||
|
const mac = (data.mac ?? "").toLowerCase().replace(/-/g, ":");
|
||||||
|
if (!mac) {
|
||||||
|
return reply.code(400).send({ error: "mac is required" });
|
||||||
|
}
|
||||||
|
|
||||||
|
const bastion = bastionRegistry.findBastionByMac(mac);
|
||||||
|
const target = bastion ?? (bastionRegistry.getAll().length === 1 ? bastionRegistry.getAll()[0] : null);
|
||||||
|
|
||||||
|
if (!target) {
|
||||||
|
return reply.code(503).send({ error: "No bastion found for this MAC" });
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await sendCommand(target.bastionId, { type: "command-discover", ...data, mac });
|
||||||
|
return reply.code(result.status === "ok" ? 200 : 500).send(result);
|
||||||
|
} catch (err) {
|
||||||
|
return reply.code(500).send({ error: err instanceof Error ? err.message : String(err) });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
// Update role
|
// Update role
|
||||||
app.post<{
|
app.post<{
|
||||||
Body: { mac?: string; role?: string };
|
Body: { mac?: string; role?: string };
|
||||||
|
|||||||
@@ -15,6 +15,15 @@ export const installK3sBinary: Operation = async (ctx): Promise<OperationResult>
|
|||||||
const alreadyInstalled = version.exitCode === 0;
|
const alreadyInstalled = version.exitCode === 0;
|
||||||
|
|
||||||
if (isServer) {
|
if (isServer) {
|
||||||
|
// Clean stale server state when joining an existing cluster
|
||||||
|
// (TLS certs from a previous run cause "newer than datastore" fatal error)
|
||||||
|
if (ctx.config.k3sServerUrl && ctx.config.k3sToken) {
|
||||||
|
await ctx.ssh.exec(
|
||||||
|
"rm -rf /var/lib/rancher/k3s/server/tls /var/lib/rancher/k3s/server/cred /var/lib/rancher/k3s/server/db",
|
||||||
|
sshOpts(ctx),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// If joining an existing cluster, pass K3S_URL and K3S_TOKEN
|
// If joining an existing cluster, pass K3S_URL and K3S_TOKEN
|
||||||
const joinEnv = ctx.config.k3sServerUrl && ctx.config.k3sToken
|
const joinEnv = ctx.config.k3sServerUrl && ctx.config.k3sToken
|
||||||
? `K3S_URL="${ctx.config.k3sServerUrl}" K3S_TOKEN="${ctx.config.k3sToken}"`
|
? `K3S_URL="${ctx.config.k3sServerUrl}" K3S_TOKEN="${ctx.config.k3sToken}"`
|
||||||
|
|||||||
@@ -113,6 +113,7 @@ export type LabdBastionMessage =
|
|||||||
| { type: "command-role-update"; requestId: string; mac: string; role: string }
|
| { type: "command-role-update"; requestId: string; mac: string; role: string }
|
||||||
| { type: "command-debug"; requestId: string; mac: string; pxeBoot?: boolean }
|
| { type: "command-debug"; requestId: string; mac: string; pxeBoot?: boolean }
|
||||||
| { type: "command-register"; requestId: string; mac: string; hostname: string; role: string; ip: string }
|
| { type: "command-register"; requestId: string; mac: string; hostname: string; role: string; ip: string }
|
||||||
|
| { type: "command-discover"; requestId: string; mac: string; product?: string; board?: string; serial?: string; manufacturer?: string; cpu_model?: string; cpu_cores?: number; memory_gb?: number; arch?: string; disks?: Array<{ name: string; size_gb: number; model: string }>; nics?: Array<{ name: string; mac: string; state: string }> }
|
||||||
| { type: "server-shutdown"; reconnectAfter: number };
|
| { type: "server-shutdown"; reconnectAfter: number };
|
||||||
|
|
||||||
export type BastionMessageType = BastionMessage["type"];
|
export type BastionMessageType = BastionMessage["type"];
|
||||||
@@ -127,7 +128,7 @@ const BASTION_MESSAGE_TYPES = new Set<string>([
|
|||||||
|
|
||||||
const LABD_BASTION_MESSAGE_TYPES = new Set<string>([
|
const LABD_BASTION_MESSAGE_TYPES = new Set<string>([
|
||||||
"bastion-enrolled", "bastion-heartbeat-ack", "command-install",
|
"bastion-enrolled", "bastion-heartbeat-ack", "command-install",
|
||||||
"command-forget", "command-role-update", "command-debug", "command-register", "server-shutdown",
|
"command-forget", "command-role-update", "command-debug", "command-register", "command-discover", "server-shutdown",
|
||||||
]);
|
]);
|
||||||
|
|
||||||
export function isBastionMessage(msg: unknown): msg is BastionMessage {
|
export function isBastionMessage(msg: unknown): msg is BastionMessage {
|
||||||
|
|||||||
@@ -96,6 +96,13 @@ export interface InstalledInfo {
|
|||||||
ip: string;
|
ip: string;
|
||||||
installed_at: string;
|
installed_at: string;
|
||||||
bastionId?: string; // set when aggregated through labd
|
bastionId?: string; // set when aggregated through labd
|
||||||
|
// Hardware info (copied from discovered on install completion)
|
||||||
|
product?: string;
|
||||||
|
manufacturer?: string;
|
||||||
|
cpu_model?: string;
|
||||||
|
cpu_cores?: number;
|
||||||
|
memory_gb?: number;
|
||||||
|
arch?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface DebugConfig {
|
export interface DebugConfig {
|
||||||
|
|||||||
Reference in New Issue
Block a user