Compare commits
13 Commits
feat/asahi
...
feat/reche
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2eda926d4c | ||
|
|
70258a0cc3 | ||
|
|
e9944c5413 | ||
| 22e2946e95 | |||
|
|
9ddab24931 | ||
|
|
ae91f2895e | ||
|
|
06fc40a857 | ||
|
|
a68d6d617e | ||
|
|
c49a650888 | ||
|
|
87e09af941 | ||
|
|
6f13e284fd | ||
|
|
6c963a15bd | ||
| 8c737d163d |
19
CLAUDE.md
Normal file
19
CLAUDE.md
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
## Skill routing
|
||||||
|
|
||||||
|
When the user's request matches an available skill, ALWAYS invoke it using the Skill
|
||||||
|
tool as your FIRST action. Do NOT answer directly, do NOT use other tools first.
|
||||||
|
The skill has specialized workflows that produce better results than ad-hoc answers.
|
||||||
|
|
||||||
|
Key routing rules:
|
||||||
|
- Product ideas, "is this worth building", brainstorming → invoke gstack-office-hours
|
||||||
|
- Bugs, errors, "why is this broken", 500 errors → invoke gstack-investigate
|
||||||
|
- Ship, deploy, push, create PR → invoke gstack-ship
|
||||||
|
- QA, test the site, find bugs → invoke gstack-qa
|
||||||
|
- Code review, check my diff → invoke gstack-review
|
||||||
|
- Update docs after shipping → invoke gstack-document-release
|
||||||
|
- Weekly retro → invoke gstack-retro
|
||||||
|
- Design system, brand → invoke gstack-design-consultation
|
||||||
|
- Visual audit, design polish → invoke gstack-design-review
|
||||||
|
- Architecture review → invoke gstack-plan-eng-review
|
||||||
|
- Save progress, checkpoint, resume → invoke gstack-checkpoint
|
||||||
|
- Code quality, health check → invoke gstack-health
|
||||||
47
TODOS.md
Normal file
47
TODOS.md
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
# TODOS
|
||||||
|
|
||||||
|
## P1 — Ship with Phase 1
|
||||||
|
|
||||||
|
### v2.0 Architecture Document Update
|
||||||
|
Update `bastion/docs/ARCHITECTURE.md` to cover v2.0: driver model, fleet system,
|
||||||
|
Pulumi integration, Vault secrets, Deno evaluator, new CLI grammar. The existing
|
||||||
|
doc covers v1.0 comprehensively (432 lines). v2.0 adds 5+ major subsystems.
|
||||||
|
**Effort:** M (human: 1 week / CC: 1-2 days)
|
||||||
|
**Depends on:** Phase 1 complete
|
||||||
|
**Source:** CEO review 2026-04-01
|
||||||
|
|
||||||
|
## P2 — Post-v2.0 Core
|
||||||
|
|
||||||
|
### SSH Emergency Mode (scoped)
|
||||||
|
SSH-based operations limited to: (1) earliest necessary box provisioning before agent
|
||||||
|
is installed, and (2) emergency debugging/fixing operations that can't be done via agent.
|
||||||
|
NOT a general-purpose DeploymentTarget alternative. The v1.0 `recheck` and `fix-ssh-root.sh`
|
||||||
|
patterns are the model. Agent stays the primary management path.
|
||||||
|
**Effort:** S (human: 1 week / CC: 1 day)
|
||||||
|
**Depends on:** Phase 2 complete (DeploymentTarget interface exists)
|
||||||
|
**Source:** CEO review 2026-04-01
|
||||||
|
|
||||||
|
### Prometheus Metrics Endpoint
|
||||||
|
Add `/metrics` endpoint to labd: resource counts by status, apply duration histograms,
|
||||||
|
driver operation latency, fleet pipeline completion rates. Standard Prometheus scraping
|
||||||
|
for Grafana dashboards and alerting.
|
||||||
|
**Effort:** S (human: 2-3 days / CC: 2-3 hours)
|
||||||
|
**Depends on:** Phase 1 (labd exists with resource store)
|
||||||
|
**Source:** CEO review 2026-04-01 (observability gap)
|
||||||
|
|
||||||
|
## P3 — Future Enhancements
|
||||||
|
|
||||||
|
### Infrastructure Graph Visualization
|
||||||
|
Visual representation of resource dependencies, environment topology, fleet status.
|
||||||
|
Could be a web UI or terminal-based (like `kubectl tree`).
|
||||||
|
**Source:** CEO review 2026-04-01
|
||||||
|
|
||||||
|
### `labctl import` for Existing Cloud Resources
|
||||||
|
Discover and import existing AWS/GCP resources into the state store.
|
||||||
|
Pulumi's import functionality could be leveraged.
|
||||||
|
**Source:** CEO review 2026-04-01
|
||||||
|
|
||||||
|
### Built-in Secrets Rotation
|
||||||
|
Automatic rotation of managed secrets (database passwords, API keys).
|
||||||
|
Vault handles rotation but a labctl-native workflow could simplify.
|
||||||
|
**Source:** CEO review 2026-04-01
|
||||||
4
bastion/bastion/.gitignore
vendored
Normal file
4
bastion/bastion/.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
# Asahi build artifacts (large)
|
||||||
|
.asahi-cache/
|
||||||
|
asahi-repo/*.zip
|
||||||
@@ -82,6 +82,9 @@ _labctl() {
|
|||||||
"provision makeiso")
|
"provision makeiso")
|
||||||
COMPREPLY=($(compgen -W "--arch --local --out -h --help" -- "$cur"))
|
COMPREPLY=($(compgen -W "--arch --local --out -h --help" -- "$cur"))
|
||||||
return ;;
|
return ;;
|
||||||
|
"provision recheck")
|
||||||
|
COMPREPLY=($(compgen -W "--user --target -h --help" -- "$cur"))
|
||||||
|
return ;;
|
||||||
"config list")
|
"config list")
|
||||||
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
|
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
|
||||||
return ;;
|
return ;;
|
||||||
@@ -107,7 +110,7 @@ _labctl() {
|
|||||||
COMPREPLY=($(compgen -W "bastion -h --help" -- "$cur"))
|
COMPREPLY=($(compgen -W "bastion -h --help" -- "$cur"))
|
||||||
return ;;
|
return ;;
|
||||||
"provision")
|
"provision")
|
||||||
COMPREPLY=($(compgen -W "list install reprovision debug forget register asahi logs makeiso -h --help" -- "$cur"))
|
COMPREPLY=($(compgen -W "list install reprovision debug forget register asahi logs makeiso recheck -h --help" -- "$cur"))
|
||||||
return ;;
|
return ;;
|
||||||
"config")
|
"config")
|
||||||
COMPREPLY=($(compgen -W "list get set path -h --help" -- "$cur"))
|
COMPREPLY=($(compgen -W "list get set path -h --help" -- "$cur"))
|
||||||
|
|||||||
@@ -128,6 +128,7 @@ complete -c labctl -n "__labctl_using_cmd provision" -a register -d 'Register an
|
|||||||
complete -c labctl -n "__labctl_using_cmd provision" -a asahi -d 'Show instructions to provision an Apple Silicon Mac with Asahi Linux'
|
complete -c labctl -n "__labctl_using_cmd provision" -a asahi -d 'Show instructions to provision an Apple Silicon Mac with Asahi Linux'
|
||||||
complete -c labctl -n "__labctl_using_cmd provision" -a logs -d 'Show provisioning logs for a machine (hostname, MAC, or IP)'
|
complete -c labctl -n "__labctl_using_cmd provision" -a logs -d 'Show provisioning logs for a machine (hostname, MAC, or IP)'
|
||||||
complete -c labctl -n "__labctl_using_cmd provision" -a makeiso -d 'Generate a UEFI-bootable iPXE ISO for network provisioning'
|
complete -c labctl -n "__labctl_using_cmd provision" -a makeiso -d 'Generate a UEFI-bootable iPXE ISO for network provisioning'
|
||||||
|
complete -c labctl -n "__labctl_using_cmd provision" -a recheck -d 'Refresh hardware info for all installed machines via SSH'
|
||||||
|
|
||||||
# provision install options
|
# provision install options
|
||||||
complete -c labctl -n "__labctl_in_cmd provision install" -l role -d 'Machine role (see below)' -xa 'vanilla worker infra labcontroller'
|
complete -c labctl -n "__labctl_in_cmd provision install" -l role -d 'Machine role (see below)' -xa 'vanilla worker infra labcontroller'
|
||||||
@@ -154,6 +155,10 @@ complete -c labctl -n "__labctl_in_cmd provision makeiso" -l arch -d 'Target arc
|
|||||||
complete -c labctl -n "__labctl_in_cmd provision makeiso" -l local -d 'Build ISO locally instead of using bastion-hosted URL'
|
complete -c labctl -n "__labctl_in_cmd provision makeiso" -l local -d 'Build ISO locally instead of using bastion-hosted URL'
|
||||||
complete -c labctl -n "__labctl_in_cmd provision makeiso" -l out -d 'Output path for local ISO build' -x
|
complete -c labctl -n "__labctl_in_cmd provision makeiso" -l out -d 'Output path for local ISO build' -x
|
||||||
|
|
||||||
|
# provision recheck options
|
||||||
|
complete -c labctl -n "__labctl_in_cmd provision recheck" -l user -d 'SSH user' -x
|
||||||
|
complete -c labctl -n "__labctl_in_cmd provision recheck" -l target -d 'Only recheck a specific machine (by hostname or MAC)' -x
|
||||||
|
|
||||||
# config subcommands
|
# config subcommands
|
||||||
complete -c labctl -n "__labctl_using_cmd config" -a list -d 'Show all configuration values'
|
complete -c labctl -n "__labctl_using_cmd config" -a list -d 'Show all configuration values'
|
||||||
complete -c labctl -n "__labctl_using_cmd config" -a get -d 'Get a configuration value'
|
complete -c labctl -n "__labctl_using_cmd config" -a get -d 'Get a configuration value'
|
||||||
|
|||||||
@@ -99,16 +99,22 @@ if [ "$PUSH" = true ]; then
|
|||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Use --tls-verify=false for plain HTTP registries (e.g. 10.0.0.194:3012)
|
||||||
|
TLS_FLAG=""
|
||||||
|
if [[ "$REGISTRY" =~ ^[0-9] ]] || [[ "$REGISTRY" =~ ^localhost ]]; then
|
||||||
|
TLS_FLAG="--tls-verify=false"
|
||||||
|
fi
|
||||||
|
|
||||||
echo "==> Logging in to $REGISTRY..."
|
echo "==> Logging in to $REGISTRY..."
|
||||||
podman login -u michal -p "$GITEA_TOKEN" "$REGISTRY"
|
podman login $TLS_FLAG -u michal -p "$GITEA_TOKEN" "$REGISTRY"
|
||||||
|
|
||||||
echo "==> Pushing $FULL_IMAGE:$TAG..."
|
echo "==> Pushing $FULL_IMAGE:$TAG..."
|
||||||
podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:$TAG"
|
podman manifest push --all $TLS_FLAG "$MANIFEST" "docker://$FULL_IMAGE:$TAG"
|
||||||
|
|
||||||
# Also tag as :latest if not already
|
# Also tag as :latest if not already
|
||||||
if [ "$TAG" != "latest" ]; then
|
if [ "$TAG" != "latest" ]; then
|
||||||
echo "==> Also pushing as :latest..."
|
echo "==> Also pushing as :latest..."
|
||||||
podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:latest"
|
podman manifest push --all $TLS_FLAG "$MANIFEST" "docker://$FULL_IMAGE:latest"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Link package to repository if script exists
|
# Link package to repository if script exists
|
||||||
|
|||||||
@@ -92,15 +92,21 @@ if [ "$PUSH" = true ]; then
|
|||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Use --tls-verify=false for plain HTTP registries (e.g. 10.0.0.194:3012)
|
||||||
|
TLS_FLAG=""
|
||||||
|
if [[ "$REGISTRY" =~ ^[0-9] ]] || [[ "$REGISTRY" =~ ^localhost ]]; then
|
||||||
|
TLS_FLAG="--tls-verify=false"
|
||||||
|
fi
|
||||||
|
|
||||||
echo "==> Logging in to $REGISTRY..."
|
echo "==> Logging in to $REGISTRY..."
|
||||||
podman login -u michal -p "$GITEA_TOKEN" "$REGISTRY"
|
podman login $TLS_FLAG -u michal -p "$GITEA_TOKEN" "$REGISTRY"
|
||||||
|
|
||||||
echo "==> Pushing $FULL_IMAGE:$TAG..."
|
echo "==> Pushing $FULL_IMAGE:$TAG..."
|
||||||
podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:$TAG"
|
podman manifest push --all $TLS_FLAG "$MANIFEST" "docker://$FULL_IMAGE:$TAG"
|
||||||
|
|
||||||
if [ "$TAG" != "latest" ]; then
|
if [ "$TAG" != "latest" ]; then
|
||||||
echo "==> Also pushing as :latest..."
|
echo "==> Also pushing as :latest..."
|
||||||
podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:latest"
|
podman manifest push --all $TLS_FLAG "$MANIFEST" "docker://$FULL_IMAGE:latest"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -f "$SCRIPT_DIR/link-package.sh" ]; then
|
if [ -f "$SCRIPT_DIR/link-package.sh" ]; then
|
||||||
|
|||||||
131
bastion/scripts/fix-ssh-root.sh
Normal file
131
bastion/scripts/fix-ssh-root.sh
Normal file
@@ -0,0 +1,131 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Fix root SSH access on all provisioned machines.
|
||||||
|
# Tries root, lab, michal users to find one that works,
|
||||||
|
# then ensures root has the SSH key and PermitRootLogin is enabled.
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SSH_KEY="ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQDMJ3FkUGbG174eoO5RjZd2eNV680FM5pgp0AgpW/QwlJExK3qxMk0DJSr4ICmzGUx4yujAXcrqU1otcOMPzzFzwc5heWpSmlNHU3TIW6NHEt0sF9ZTAbGLw2zSw3si5UouqFkCcENA40mePFJqY+Q9R8N1uvLgu4m/do+Zrn/mk5Ewc1V7OCRE5Acrnaec4T7LTB0BuVXcjPUfAmZ0q5fI+bKPR1q2Kc3+IeGhVkBuZ9OJVeXXhnpedm0uEbLeriK/jUYKYw/1QhsNDM8Tyty+UIGr9QVnWwzCMHB+wuQcDYC9mPGTqg0fYwX8Mp8xMi1PPxdsh1G7bj/cpWMAF43KswWORF2ul8ICGbaE1zEgIYXO790SuBjpBHhaC6Iegqi58hmCuP+a9893q/EU9HyrWTJHCZXC5E4kP1MsM57KrhEpszM6I3sW9f9zMTPd5QsCXFi4si4OMwX4kYNVu3fQGQPpseDPlTTSrT6uUdqj4Irm0c1m9cYTmK0vYgsM3ss= michal@fedora"
|
||||||
|
|
||||||
|
SSH_OPTS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR -o ConnectTimeout=5"
|
||||||
|
USERS_TO_TRY=(root lab michal)
|
||||||
|
|
||||||
|
# Machines: hostname ip
|
||||||
|
MACHINES=(
|
||||||
|
"labmaster 192.168.8.11"
|
||||||
|
"worker0-k8s0 192.168.8.23"
|
||||||
|
"worker1-k8s0 192.168.8.13"
|
||||||
|
"worker2-k8s0 192.168.8.25"
|
||||||
|
"spark-2935 192.168.8.12"
|
||||||
|
)
|
||||||
|
|
||||||
|
BOLD="\033[1m"
|
||||||
|
GREEN="\033[0;32m"
|
||||||
|
RED="\033[0;31m"
|
||||||
|
DIM="\033[2m"
|
||||||
|
RESET="\033[0m"
|
||||||
|
|
||||||
|
# Script to run on each machine (via sudo if needed)
|
||||||
|
read -r -d '' FIX_SCRIPT << 'FIXEOF' || true
|
||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
KEY="$1"
|
||||||
|
|
||||||
|
# 1. Ensure root .ssh dir exists
|
||||||
|
mkdir -p /root/.ssh
|
||||||
|
chmod 700 /root/.ssh
|
||||||
|
touch /root/.ssh/authorized_keys
|
||||||
|
chmod 600 /root/.ssh/authorized_keys
|
||||||
|
|
||||||
|
# 2. Add key if not present
|
||||||
|
if ! grep -qF "$KEY" /root/.ssh/authorized_keys 2>/dev/null; then
|
||||||
|
echo "$KEY" >> /root/.ssh/authorized_keys
|
||||||
|
echo "KEY_ADDED"
|
||||||
|
else
|
||||||
|
echo "KEY_EXISTS"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 3. Fix sshd_config for root login with keys
|
||||||
|
SSHD_CONF="/etc/ssh/sshd_config"
|
||||||
|
CHANGED=0
|
||||||
|
|
||||||
|
# Ensure PermitRootLogin allows key auth
|
||||||
|
CURRENT=$(grep -E "^PermitRootLogin" "$SSHD_CONF" 2>/dev/null | tail -1 || true)
|
||||||
|
if [ "$CURRENT" = "PermitRootLogin prohibit-password" ] || [ "$CURRENT" = "PermitRootLogin without-password" ]; then
|
||||||
|
echo "SSHD_OK"
|
||||||
|
elif [ "$CURRENT" = "PermitRootLogin yes" ]; then
|
||||||
|
echo "SSHD_OK"
|
||||||
|
else
|
||||||
|
# Remove any existing PermitRootLogin lines
|
||||||
|
sed -i '/^#*PermitRootLogin/d' "$SSHD_CONF"
|
||||||
|
echo "PermitRootLogin prohibit-password" >> "$SSHD_CONF"
|
||||||
|
CHANGED=1
|
||||||
|
echo "SSHD_FIXED"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure PubkeyAuthentication is enabled
|
||||||
|
if grep -qE "^PubkeyAuthentication no" "$SSHD_CONF" 2>/dev/null; then
|
||||||
|
sed -i 's/^PubkeyAuthentication no/PubkeyAuthentication yes/' "$SSHD_CONF"
|
||||||
|
CHANGED=1
|
||||||
|
echo "PUBKEY_FIXED"
|
||||||
|
else
|
||||||
|
echo "PUBKEY_OK"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Restart sshd if changed
|
||||||
|
if [ "$CHANGED" -eq 1 ]; then
|
||||||
|
systemctl restart sshd 2>/dev/null || systemctl restart ssh 2>/dev/null || true
|
||||||
|
echo "SSHD_RESTARTED"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 4. Verify root can be reached
|
||||||
|
echo "DONE"
|
||||||
|
FIXEOF
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -e "${BOLD}Fixing root SSH access on all machines...${RESET}"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
for entry in "${MACHINES[@]}"; do
|
||||||
|
read -r hostname ip <<< "$entry"
|
||||||
|
printf " %-24s ${DIM}(%s)${RESET} " "$hostname" "$ip"
|
||||||
|
|
||||||
|
# Try each user until one works
|
||||||
|
WORKING_USER=""
|
||||||
|
for user in "${USERS_TO_TRY[@]}"; do
|
||||||
|
if ssh $SSH_OPTS "$user@$ip" "true" 2>/dev/null; then
|
||||||
|
WORKING_USER="$user"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ -z "$WORKING_USER" ]; then
|
||||||
|
echo -e "${RED}UNREACHABLE${RESET} (tried: ${USERS_TO_TRY[*]})"
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Run fix script (with sudo if not root)
|
||||||
|
if [ "$WORKING_USER" = "root" ]; then
|
||||||
|
RESULT=$(ssh $SSH_OPTS "root@$ip" "bash -s -- '$SSH_KEY'" <<< "$FIX_SCRIPT" 2>&1)
|
||||||
|
else
|
||||||
|
RESULT=$(ssh $SSH_OPTS "$WORKING_USER@$ip" "sudo bash -s -- '$SSH_KEY'" <<< "$FIX_SCRIPT" 2>&1)
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Parse result
|
||||||
|
DETAILS=""
|
||||||
|
if echo "$RESULT" | grep -q "KEY_ADDED"; then DETAILS="key added"; fi
|
||||||
|
if echo "$RESULT" | grep -q "KEY_EXISTS"; then DETAILS="key ok"; fi
|
||||||
|
if echo "$RESULT" | grep -q "SSHD_FIXED"; then DETAILS="$DETAILS, sshd fixed"; fi
|
||||||
|
if echo "$RESULT" | grep -q "SSHD_OK"; then DETAILS="$DETAILS, sshd ok"; fi
|
||||||
|
if echo "$RESULT" | grep -q "SSHD_RESTARTED"; then DETAILS="$DETAILS, restarted"; fi
|
||||||
|
|
||||||
|
# Verify root works now
|
||||||
|
if ssh $SSH_OPTS "root@$ip" "true" 2>/dev/null; then
|
||||||
|
echo -e "${GREEN}OK${RESET} ${DIM}(via $WORKING_USER: $DETAILS)${RESET}"
|
||||||
|
else
|
||||||
|
echo -e "${RED}PARTIAL${RESET} ${DIM}(via $WORKING_USER: $DETAILS -- root still blocked)${RESET}"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -e "${BOLD}Done.${RESET} Verify: labctl provision recheck --user root"
|
||||||
|
echo ""
|
||||||
@@ -309,6 +309,32 @@ export async function startBastion(overrides: Partial<BastionConfig> = {}): Prom
|
|||||||
return { status: "ok", data: { mac, hostname: msg.hostname } };
|
return { status: "ok", data: { mac, hostname: msg.hostname } };
|
||||||
});
|
});
|
||||||
|
|
||||||
|
labdConn.onCommand("command-discover", async (msg) => {
|
||||||
|
if (msg.type !== "command-discover") throw new Error("unexpected");
|
||||||
|
const mac = (msg.mac as string).toLowerCase();
|
||||||
|
const now = new Date().toISOString();
|
||||||
|
const existing = state.load().discovered[mac];
|
||||||
|
state.update((s) => {
|
||||||
|
s.discovered[mac] = {
|
||||||
|
mac,
|
||||||
|
product: (msg.product as string) ?? "unknown",
|
||||||
|
board: (msg.board as string) ?? "unknown",
|
||||||
|
serial: (msg.serial as string) ?? "unknown",
|
||||||
|
manufacturer: (msg.manufacturer as string) ?? "unknown",
|
||||||
|
cpu_model: (msg.cpu_model as string) ?? "unknown",
|
||||||
|
cpu_cores: (msg.cpu_cores as number) ?? 0,
|
||||||
|
memory_gb: (msg.memory_gb as number) ?? 0,
|
||||||
|
arch: (msg.arch as string) ?? "unknown",
|
||||||
|
disks: (msg.disks as Array<{ name: string; size_gb: number; model: string }>) ?? [],
|
||||||
|
nics: (msg.nics as Array<{ name: string; mac: string; state: string }>) ?? [],
|
||||||
|
first_seen: existing?.first_seen ?? now,
|
||||||
|
last_seen: now,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
logger.info(`HARDWARE UPDATED: ${mac} -- ${msg.manufacturer ?? "?"} ${msg.product ?? "?"} (${msg.cpu_model ?? "?"}, ${msg.cpu_cores ?? "?"} cores, ${msg.memory_gb ?? "?"}GB RAM)`);
|
||||||
|
return { status: "ok", data: { mac } };
|
||||||
|
});
|
||||||
|
|
||||||
labdConn.onCommand("command-role-update", async (msg) => {
|
labdConn.onCommand("command-role-update", async (msg) => {
|
||||||
if (msg.type !== "command-role-update") throw new Error("unexpected");
|
if (msg.type !== "command-role-update") throw new Error("unexpected");
|
||||||
const mac = msg.mac.toLowerCase();
|
const mac = msg.mac.toLowerCase();
|
||||||
|
|||||||
@@ -139,12 +139,22 @@ export function registerApiRoutes(
|
|||||||
? detailStr.replace("ready at ", "").trim()
|
? detailStr.replace("ready at ", "").trim()
|
||||||
: "";
|
: "";
|
||||||
|
|
||||||
|
const hw = s.discovered[mac];
|
||||||
const installedInfo: InstalledInfo = {
|
const installedInfo: InstalledInfo = {
|
||||||
hostname: cfg?.hostname ?? "?",
|
hostname: cfg?.hostname ?? "?",
|
||||||
role: cfg?.role ?? "?",
|
role: cfg?.role ?? "?",
|
||||||
...(cfg?.os !== undefined ? { os: cfg.os } : {}),
|
...(cfg?.os !== undefined ? { os: cfg.os } : {}),
|
||||||
ip,
|
ip,
|
||||||
installed_at: new Date().toISOString(),
|
installed_at: new Date().toISOString(),
|
||||||
|
// Preserve hardware info from discovery
|
||||||
|
...(hw ? {
|
||||||
|
product: hw.product,
|
||||||
|
manufacturer: hw.manufacturer,
|
||||||
|
cpu_model: hw.cpu_model,
|
||||||
|
cpu_cores: hw.cpu_cores,
|
||||||
|
memory_gb: hw.memory_gb,
|
||||||
|
arch: hw.arch,
|
||||||
|
} : {}),
|
||||||
};
|
};
|
||||||
s.installed[mac] = installedInfo;
|
s.installed[mac] = installedInfo;
|
||||||
|
|
||||||
@@ -359,6 +369,23 @@ export function registerApiRoutes(
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Simple machine state query (used by ks-auto for ISO boot dispatch)
|
||||||
|
app.get<{
|
||||||
|
Params: { mac: string };
|
||||||
|
}>("/api/machine-state/:mac", async (request, reply) => {
|
||||||
|
const mac = request.params.mac.toLowerCase().replace(/-/g, ":");
|
||||||
|
const currentState = state.load();
|
||||||
|
|
||||||
|
if (currentState.debug[mac]) return reply.send("debug");
|
||||||
|
if (currentState.install_queue[mac]) {
|
||||||
|
const progress = currentState.install_queue[mac].progress;
|
||||||
|
return reply.send(progress ? "installing" : "queued");
|
||||||
|
}
|
||||||
|
if (currentState.installed[mac]) return reply.send("installed");
|
||||||
|
if (currentState.discovered[mac]) return reply.send("discovered");
|
||||||
|
return reply.send("unknown");
|
||||||
|
});
|
||||||
|
|
||||||
// Update a machine's role (e.g. promote infra -> labcontroller)
|
// Update a machine's role (e.g. promote infra -> labcontroller)
|
||||||
app.post<{
|
app.post<{
|
||||||
Body: {
|
Body: {
|
||||||
|
|||||||
@@ -102,7 +102,8 @@ echo " - Standard Asahi boot infrastructure (m1n1 + U-Boot)"
|
|||||||
echo " - Fedora Asahi Remix root partition"
|
echo " - Fedora Asahi Remix root partition"
|
||||||
echo " - LVM data partition (remaining space)"
|
echo " - LVM data partition (remaining space)"
|
||||||
echo ""
|
echo ""
|
||||||
echo " On first boot, LVM volumes are created automatically."
|
echo " After first boot, SSH in and set up LVM:"
|
||||||
|
echo " ssh lab@<ip> 'curl -sf \${BASTION}/asahi/firstboot.sh | sudo bash'"
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
# Run the installer
|
# Run the installer
|
||||||
@@ -150,10 +151,10 @@ fi
|
|||||||
app.get<{
|
app.get<{
|
||||||
Querystring: { hostname?: string; role?: string; mac?: string; user?: string };
|
Querystring: { hostname?: string; role?: string; mac?: string; user?: string };
|
||||||
}>("/asahi/firstboot.sh", async (request, reply) => {
|
}>("/asahi/firstboot.sh", async (request, reply) => {
|
||||||
const hostname = request.query.hostname ?? "mac-studio";
|
const hostname = request.query.hostname ?? "unknown";
|
||||||
const role = (request.query.role ?? "infra") as Role;
|
const role = (request.query.role ?? "infra") as Role;
|
||||||
const mac = request.query.mac ?? "unknown";
|
const mac = request.query.mac ?? "unknown";
|
||||||
const user = request.query.user ?? config.adminUser;
|
const user = request.query.user ?? "lab";
|
||||||
|
|
||||||
const script = renderFirstbootScript({
|
const script = renderFirstbootScript({
|
||||||
hostname,
|
hostname,
|
||||||
|
|||||||
@@ -137,7 +137,7 @@ function generateIso(config: BastionConfig, outputPath: string): void {
|
|||||||
"# Map iPXE arch names to Fedora mirror paths (arm64 -> aarch64)",
|
"# Map iPXE arch names to Fedora mirror paths (arm64 -> aarch64)",
|
||||||
"set fedarch ${buildarch}",
|
"set fedarch ${buildarch}",
|
||||||
"iseq ${buildarch} arm64 && set fedarch aarch64 ||",
|
"iseq ${buildarch} arm64 && set fedarch aarch64 ||",
|
||||||
`kernel file:/vmlinuz-\${buildarch} inst.ks=${bastionUrl}/discover.ks inst.repo=${FEDORA_MIRROR_BASE}/${config.fedoraVersion}/Everything/\${fedarch}/os inst.text || goto no_kernel`,
|
`kernel file:/vmlinuz-\${buildarch} inst.ks=${bastionUrl}/ks-auto inst.repo=${FEDORA_MIRROR_BASE}/${config.fedoraVersion}/Everything/\${fedarch}/os inst.text || goto no_kernel`,
|
||||||
`initrd file:/initrd-\${buildarch} || goto no_kernel`,
|
`initrd file:/initrd-\${buildarch} || goto no_kernel`,
|
||||||
"boot || shell",
|
"boot || shell",
|
||||||
"",
|
"",
|
||||||
|
|||||||
@@ -41,6 +41,150 @@ export function registerKickstartRoutes(
|
|||||||
return reply.type("text/plain").send(ks);
|
return reply.type("text/plain").send(ks);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Auto-detecting kickstart for ISO boot (no-network machines like R1 ARM).
|
||||||
|
// %pre detects MAC, queries bastion state, writes dynamic kickstart to /tmp.
|
||||||
|
// Main body %include's it — so Anaconda gets either discover or install content.
|
||||||
|
app.get("/ks-auto", async (_request, reply) => {
|
||||||
|
const bastionUrl = `http://${config.serverIp}:${config.httpPort}`;
|
||||||
|
|
||||||
|
const ks = `# Lab Bastion -- Auto-detect kickstart (ISO boot)
|
||||||
|
# %pre detects MAC, queries bastion state, writes /tmp/dynamic.ks.
|
||||||
|
# Main body %include's it to get either discovery reboot or full install.
|
||||||
|
|
||||||
|
%pre --erroronfail --log=/tmp/ks-auto.log
|
||||||
|
#!/bin/bash
|
||||||
|
set -x
|
||||||
|
|
||||||
|
# -- Detect MAC address --
|
||||||
|
MAC=$(ip link show | awk '/ether/ && !/00:00:00:00/ {print $2; exit}')
|
||||||
|
echo "Detected MAC: $MAC"
|
||||||
|
|
||||||
|
# -- Wait for network (Linux drivers may take a moment) --
|
||||||
|
for i in $(seq 1 30); do
|
||||||
|
if curl -sf "${bastionUrl}/healthz" >/dev/null 2>&1; then
|
||||||
|
echo "Bastion reachable at ${bastionUrl}"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
echo "Waiting for network... ($i/30)"
|
||||||
|
sleep 2
|
||||||
|
done
|
||||||
|
|
||||||
|
# -- Query bastion for machine state --
|
||||||
|
STATE=$(curl -sf "${bastionUrl}/api/machine-state/$MAC" 2>/dev/null || echo "unknown")
|
||||||
|
echo "Machine state: $STATE"
|
||||||
|
|
||||||
|
case "$STATE" in
|
||||||
|
queued|installing)
|
||||||
|
echo "=== Machine queued for install. Fetching install kickstart... ==="
|
||||||
|
curl -sf "${bastionUrl}/ks?mac=$MAC" > /tmp/dynamic.ks
|
||||||
|
if [ -s /tmp/dynamic.ks ]; then
|
||||||
|
echo "Install kickstart downloaded ($(wc -l < /tmp/dynamic.ks) lines)"
|
||||||
|
else
|
||||||
|
echo "ERROR: Failed to download install kickstart"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Run any %pre scripts from the downloaded kickstart.
|
||||||
|
# Anaconda only runs %pre from the top-level file, not from %include'd files.
|
||||||
|
python3 -c "
|
||||||
|
import re, subprocess
|
||||||
|
content = open('/tmp/dynamic.ks').read()
|
||||||
|
blocks = re.findall(r'%pre[^\\n]*\\n(.*?)%end', content, re.DOTALL)
|
||||||
|
for i, script in enumerate(blocks):
|
||||||
|
path = f'/tmp/inner-pre-{i}.sh'
|
||||||
|
with open(path, 'w') as f:
|
||||||
|
f.write(script)
|
||||||
|
print(f'Running inner %pre script {i} ({len(script.splitlines())} lines)')
|
||||||
|
subprocess.run(['bash', path], check=False)
|
||||||
|
"
|
||||||
|
;;
|
||||||
|
|
||||||
|
debug)
|
||||||
|
echo "=== Debug mode ==="
|
||||||
|
curl -sf "${bastionUrl}/debug.ks?mac=$MAC" > /tmp/dynamic.ks 2>/dev/null
|
||||||
|
if [ ! -s /tmp/dynamic.ks ]; then
|
||||||
|
echo "rescue" > /tmp/dynamic.ks
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
|
*)
|
||||||
|
echo "=== Running hardware discovery ==="
|
||||||
|
# Collect hardware info
|
||||||
|
PRODUCT=$(cat /sys/class/dmi/id/product_name 2>/dev/null || echo "unknown")
|
||||||
|
BOARD=$(cat /sys/class/dmi/id/board_name 2>/dev/null || echo "unknown")
|
||||||
|
SERIAL=$(cat /sys/class/dmi/id/product_serial 2>/dev/null || echo "unknown")
|
||||||
|
MANUFACTURER=$(cat /sys/class/dmi/id/sys_vendor 2>/dev/null || echo "unknown")
|
||||||
|
CPUMODEL=$(grep -m1 'model name' /proc/cpuinfo | cut -d: -f2 | sed 's/^ //')
|
||||||
|
CPUCORES=$(grep -c '^processor' /proc/cpuinfo)
|
||||||
|
MEMGB=$(awk '/MemTotal/ {printf "%d", $2/1024/1024}' /proc/meminfo)
|
||||||
|
ARCHTYPE=$(uname -m)
|
||||||
|
|
||||||
|
DISKS_JSON=$(lsblk -Jb -o NAME,SIZE,TYPE,MODEL 2>/dev/null | python3 -c "
|
||||||
|
import sys, json
|
||||||
|
data = json.load(sys.stdin)
|
||||||
|
disks = [d for d in data.get('blockdevices', []) if d.get('type') == 'disk']
|
||||||
|
result = []
|
||||||
|
for d in disks:
|
||||||
|
size_gb = round(int(d.get('size', 0)) / 1073741824, 1)
|
||||||
|
result.append({'name': d.get('name', '?'), 'size_gb': size_gb, 'model': (d.get('model') or 'unknown').strip()})
|
||||||
|
print(json.dumps(result))
|
||||||
|
" 2>/dev/null || echo '[]')
|
||||||
|
|
||||||
|
NICS_JSON=$(ip -j link show 2>/dev/null | python3 -c "
|
||||||
|
import sys, json
|
||||||
|
nics = json.load(sys.stdin)
|
||||||
|
result = []
|
||||||
|
for n in nics:
|
||||||
|
if n.get('link_type') == 'loopback': continue
|
||||||
|
result.append({'name': n.get('ifname', '?'), 'mac': n.get('address', '?'), 'state': n.get('operstate', '?')})
|
||||||
|
print(json.dumps(result))
|
||||||
|
" 2>/dev/null || echo '[]')
|
||||||
|
|
||||||
|
PAYLOAD=$(python3 -c "
|
||||||
|
import json
|
||||||
|
print(json.dumps({
|
||||||
|
'mac': '$MAC', 'product': '$PRODUCT', 'board': '$BOARD', 'serial': '$SERIAL',
|
||||||
|
'manufacturer': '$MANUFACTURER', 'cpu_model': '$CPUMODEL',
|
||||||
|
'cpu_cores': int('$CPUCORES' or 0), 'memory_gb': int('$MEMGB' or 0),
|
||||||
|
'arch': '$ARCHTYPE', 'disks': $DISKS_JSON, 'nics': $NICS_JSON
|
||||||
|
}))
|
||||||
|
")
|
||||||
|
|
||||||
|
curl -sf -X POST "${bastionUrl}/api/discover" \\
|
||||||
|
-H "Content-Type: application/json" \\
|
||||||
|
-d "$PAYLOAD" || true
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "=== Discovery complete ==="
|
||||||
|
echo "Machine MAC: $MAC"
|
||||||
|
echo "Queue for install: labctl provision install $MAC <hostname> --role infra"
|
||||||
|
echo "Then reboot to start installation."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Write a minimal kickstart that just reboots
|
||||||
|
cat > /tmp/dynamic.ks << 'DISCOVER_KS'
|
||||||
|
# Discovery mode -- reboot to allow install queue
|
||||||
|
reboot
|
||||||
|
DISCOVER_KS
|
||||||
|
|
||||||
|
# Force reboot now (don't wait for Anaconda)
|
||||||
|
sleep 3
|
||||||
|
echo 1 > /proc/sys/kernel/sysrq
|
||||||
|
echo b > /proc/sysrq-trigger
|
||||||
|
sleep 5
|
||||||
|
reboot -f
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
%end
|
||||||
|
|
||||||
|
# Include the dynamically chosen kickstart
|
||||||
|
%include /tmp/dynamic.ks
|
||||||
|
`;
|
||||||
|
|
||||||
|
return reply.type("text/plain").send(ks);
|
||||||
|
});
|
||||||
|
|
||||||
// Ubuntu autoinstall user-data (cloud-init)
|
// Ubuntu autoinstall user-data (cloud-init)
|
||||||
app.get<{ Params: { mac: string } }>("/autoinstall/:mac/user-data", async (request, reply) => {
|
app.get<{ Params: { mac: string } }>("/autoinstall/:mac/user-data", async (request, reply) => {
|
||||||
const mac = request.params.mac.toLowerCase().replace(/-/g, ":");
|
const mac = request.params.mac.toLowerCase().replace(/-/g, ":");
|
||||||
|
|||||||
@@ -166,6 +166,7 @@ export class BastionConnection {
|
|||||||
case "command-role-update":
|
case "command-role-update":
|
||||||
case "command-debug":
|
case "command-debug":
|
||||||
case "command-register":
|
case "command-register":
|
||||||
|
case "command-discover":
|
||||||
void this.handleCommand(msg);
|
void this.handleCommand(msg);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -111,6 +111,29 @@ mount_lv() {
|
|||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# ── Write fstab function (idempotent) ────────────────────────────
|
||||||
|
write_lab_fstab() {
|
||||||
|
# Remove any previous lab LVM entries (clean slate)
|
||||||
|
sed -i '/# lab-lvm:/d' /etc/fstab
|
||||||
|
sed -i '/# Lab LVM volumes/d' /etc/fstab
|
||||||
|
grep -v "/dev/labvg/" /etc/fstab > /etc/fstab.tmp && mv /etc/fstab.tmp /etc/fstab
|
||||||
|
# Comment out non-LVM entries for mount points we manage
|
||||||
|
for mp in "/var " "/var/log " "/home " "/srv "; do
|
||||||
|
if grep -q "$mp" /etc/fstab; then
|
||||||
|
awk -v m="$mp" '{if($0 !~ /^#/ && index($0,m)) print "# lab-lvm: " $0; else print}' /etc/fstab > /etc/fstab.tmp
|
||||||
|
mv /etc/fstab.tmp /etc/fstab
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
# Add fresh LVM entries
|
||||||
|
echo "# Lab LVM volumes" >> /etc/fstab
|
||||||
|
echo "/dev/labvg/swap none swap defaults 0 0" >> /etc/fstab
|
||||||
|
echo "/dev/labvg/var /var xfs defaults 0 0" >> /etc/fstab
|
||||||
|
echo "/dev/labvg/varlog /var/log xfs defaults 0 0" >> /etc/fstab
|
||||||
|
echo "/dev/labvg/home /home xfs defaults 0 0" >> /etc/fstab
|
||||||
|
echo "/dev/labvg/srv /srv xfs defaults 0 0" >> /etc/fstab
|
||||||
|
${roleFstabLines.join('\n ')}
|
||||||
|
}
|
||||||
|
|
||||||
# ── Check for existing VG ────────────────────────────────────────
|
# ── Check for existing VG ────────────────────────────────────────
|
||||||
if vgs labvg &>/dev/null; then
|
if vgs labvg &>/dev/null; then
|
||||||
echo "Volume group 'labvg' already exists — reprovision detected."
|
echo "Volume group 'labvg' already exists — reprovision detected."
|
||||||
@@ -129,22 +152,11 @@ ${roleMountLines.map(l => ` ${l}`).join('\n')}
|
|||||||
echo " Enabled swap"
|
echo " Enabled swap"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Ensure fstab entries exist
|
# Ensure fstab entries exist — comment out conflicting btrfs subvol entries
|
||||||
grep -q "labvg" /etc/fstab || {
|
write_lab_fstab
|
||||||
echo "# Lab LVM volumes (re-added after reprovision)" >> /etc/fstab
|
|
||||||
echo "/dev/labvg/swap none swap defaults 0 0" >> /etc/fstab
|
|
||||||
echo "/dev/labvg/var /var xfs defaults 0 0" >> /etc/fstab
|
|
||||||
echo "/dev/labvg/varlog /var/log xfs defaults 0 0" >> /etc/fstab
|
|
||||||
echo "/dev/labvg/home /home xfs defaults 0 0" >> /etc/fstab
|
|
||||||
echo "/dev/labvg/srv /srv xfs defaults 0 0" >> /etc/fstab
|
|
||||||
${roleFstabLines.map(l => ` ${l}`).join('\n')}
|
|
||||||
}
|
|
||||||
|
|
||||||
echo "Existing LVM volumes re-mounted."
|
echo "Existing LVM volumes re-mounted."
|
||||||
touch "$MARKER"
|
else
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
# ── Fresh install: create LVM ────────────────────────────────────
|
# ── Fresh install: create LVM ────────────────────────────────────
|
||||||
echo "Creating LVM on $DATA_PART..."
|
echo "Creating LVM on $DATA_PART..."
|
||||||
|
|
||||||
@@ -210,34 +222,39 @@ echo "NOTE: /var and /var/log will switch to LVM on next reboot."
|
|||||||
# Enable swap
|
# Enable swap
|
||||||
swapon /dev/labvg/swap 2>/dev/null || true
|
swapon /dev/labvg/swap 2>/dev/null || true
|
||||||
|
|
||||||
# Write fstab entries
|
write_lab_fstab
|
||||||
echo "" >> /etc/fstab
|
|
||||||
echo "# Lab LVM volumes" >> /etc/fstab
|
|
||||||
echo "/dev/labvg/swap none swap defaults 0 0" >> /etc/fstab
|
|
||||||
echo "/dev/labvg/var /var xfs defaults 0 0" >> /etc/fstab
|
|
||||||
echo "/dev/labvg/varlog /var/log xfs defaults 0 0" >> /etc/fstab
|
|
||||||
echo "/dev/labvg/home /home xfs defaults 0 0" >> /etc/fstab
|
|
||||||
echo "/dev/labvg/srv /srv xfs defaults 0 0" >> /etc/fstab
|
|
||||||
${roleFstabLines.join('\n')}
|
|
||||||
|
|
||||||
echo "LVM setup complete."
|
echo "LVM setup complete."
|
||||||
lvs labvg
|
lvs labvg
|
||||||
|
|
||||||
# ── Set hostname ─────────────────────────────────────────────────
|
fi # end if/else for reprovision vs fresh install
|
||||||
hostnamectl set-hostname "${hostname}"
|
|
||||||
|
# ── Set hostname (use configured value, or keep existing) ────────
|
||||||
|
CONF_HOSTNAME="${hostname}"
|
||||||
|
if [ "$CONF_HOSTNAME" != "unknown" ] && [ -n "$CONF_HOSTNAME" ]; then
|
||||||
|
hostnamectl set-hostname "$CONF_HOSTNAME"
|
||||||
|
fi
|
||||||
|
ACTUAL_HOSTNAME=$(hostname)
|
||||||
|
|
||||||
|
# ── Detect MAC address ───────────────────────────────────────────
|
||||||
|
CONF_MAC="${mac}"
|
||||||
|
if [ "$CONF_MAC" = "unknown" ] || [ -z "$CONF_MAC" ]; then
|
||||||
|
CONF_MAC=$(ip -o link show | grep -v "lo:" | grep "state UP" | head -1 | grep -oP 'link/ether \\K[^ ]+' || echo "unknown")
|
||||||
|
fi
|
||||||
|
|
||||||
# ── Configure admin user ─────────────────────────────────────────
|
# ── Configure admin user ─────────────────────────────────────────
|
||||||
if ! id "${adminUser}" &>/dev/null; then
|
ADMIN="${adminUser}"
|
||||||
useradd -m -G wheel "${adminUser}"
|
if ! id "$ADMIN" &>/dev/null; then
|
||||||
echo "${adminUser} ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/${adminUser}
|
useradd -m -G wheel "$ADMIN"
|
||||||
chmod 440 /etc/sudoers.d/${adminUser}
|
echo "$ADMIN ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/$ADMIN
|
||||||
|
chmod 440 /etc/sudoers.d/$ADMIN
|
||||||
fi
|
fi
|
||||||
ADMIN_SSH="/home/${adminUser}/.ssh"
|
ADMIN_SSH="/home/$ADMIN/.ssh"
|
||||||
mkdir -p "$ADMIN_SSH"
|
mkdir -p "$ADMIN_SSH"
|
||||||
chmod 700 "$ADMIN_SSH"
|
chmod 700 "$ADMIN_SSH"
|
||||||
${sshKeyBlock}
|
${sshKeyBlock}
|
||||||
chmod 600 "$ADMIN_SSH/authorized_keys"
|
chmod 600 "$ADMIN_SSH/authorized_keys"
|
||||||
chown -R ${adminUser}:${adminUser} "$ADMIN_SSH"
|
chown -R $ADMIN:$ADMIN "$ADMIN_SSH"
|
||||||
|
|
||||||
# Also authorize root
|
# Also authorize root
|
||||||
mkdir -p /root/.ssh
|
mkdir -p /root/.ssh
|
||||||
@@ -246,14 +263,14 @@ ${rootSshKeyBlock}
|
|||||||
chmod 600 /root/.ssh/authorized_keys
|
chmod 600 /root/.ssh/authorized_keys
|
||||||
|
|
||||||
# ── Harden SSH (takes effect on next sshd restart/reboot) ────────
|
# ── Harden SSH (takes effect on next sshd restart/reboot) ────────
|
||||||
sed -i 's/^#\\?PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config
|
sed -i 's/^#*PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config
|
||||||
sed -i 's/^#\\?PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
|
sed -i 's/^#*PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
|
||||||
|
|
||||||
# ── Write provisioning metadata ──────────────────────────────────
|
# ── Write provisioning metadata ──────────────────────────────────
|
||||||
cat > /etc/lab-provisioned << LABMETA
|
cat > /etc/lab-provisioned << LABMETA
|
||||||
hostname=${hostname}
|
hostname=$ACTUAL_HOSTNAME
|
||||||
role=${role}
|
role=${role}
|
||||||
mac=${mac}
|
mac=$CONF_MAC
|
||||||
provisioned_at=$(date -Iseconds)
|
provisioned_at=$(date -Iseconds)
|
||||||
method=asahi-firstboot
|
method=asahi-firstboot
|
||||||
LABMETA
|
LABMETA
|
||||||
@@ -263,9 +280,9 @@ IP=$(hostname -I | awk '{print $1}')
|
|||||||
echo "Registering with bastion at ${serverIp}:${httpPort}..."
|
echo "Registering with bastion at ${serverIp}:${httpPort}..."
|
||||||
curl -sf -X POST "http://${serverIp}:${httpPort}/api/register" \\
|
curl -sf -X POST "http://${serverIp}:${httpPort}/api/register" \\
|
||||||
-H "Content-Type: application/json" \\
|
-H "Content-Type: application/json" \\
|
||||||
-d "{\\"mac\\":\\"${mac}\\",\\"hostname\\":\\"${hostname}\\",\\"role\\":\\"${role}\\",\\"ip\\":\\"$IP\\"}" \\
|
-d "{\\"mac\\":\\"$CONF_MAC\\",\\"hostname\\":\\"$ACTUAL_HOSTNAME\\",\\"role\\":\\"${role}\\",\\"ip\\":\\"$IP\\"}" \\
|
||||||
2>/dev/null && echo " Registered as ${hostname} ($IP)" \\
|
2>/dev/null && echo " Registered as $ACTUAL_HOSTNAME ($IP)" \\
|
||||||
|| echo " WARNING: Could not reach bastion — register manually with: labctl provision register ${mac} ${hostname} --role ${role} --ip $IP"
|
|| echo " WARNING: Could not reach bastion — register manually with: labctl provision register $CONF_MAC $ACTUAL_HOSTNAME --role ${role} --ip $IP"
|
||||||
|
|
||||||
# ── Mark done ────────────────────────────────────────────────────
|
# ── Mark done ────────────────────────────────────────────────────
|
||||||
touch "$MARKER"
|
touch "$MARKER"
|
||||||
|
|||||||
@@ -184,7 +184,8 @@ describe("renderFirstbootScript", () => {
|
|||||||
|
|
||||||
it("sets hostname", () => {
|
it("sets hostname", () => {
|
||||||
const script = renderFirstbootScript({ ...baseParams, role: "worker" });
|
const script = renderFirstbootScript({ ...baseParams, role: "worker" });
|
||||||
expect(script).toContain('hostnamectl set-hostname "test-node"');
|
expect(script).toContain('CONF_HOSTNAME="test-node"');
|
||||||
|
expect(script).toContain("hostnamectl set-hostname");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("includes bastion self-registration", () => {
|
it("includes bastion self-registration", () => {
|
||||||
|
|||||||
@@ -104,6 +104,16 @@ export class LabdClient {
|
|||||||
return this.request("POST", "/api/machines/debug", { body: { mac, pxeBoot: opts?.pxeBoot } });
|
return this.request("POST", "/api/machines/debug", { body: { mac, pxeBoot: opts?.pxeBoot } });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async discoverMachine(data: {
|
||||||
|
mac: string; product?: string; board?: string; serial?: string;
|
||||||
|
manufacturer?: string; cpu_model?: string; cpu_cores?: number;
|
||||||
|
memory_gb?: number; arch?: string;
|
||||||
|
disks?: Array<{ name: string; size_gb: number; model: string }>;
|
||||||
|
nics?: Array<{ name: string; mac: string; state: string }>;
|
||||||
|
}): Promise<{ status: string; error?: string }> {
|
||||||
|
return this.request("POST", "/api/machines/discover", { body: data });
|
||||||
|
}
|
||||||
|
|
||||||
async forgetMachine(mac: string): Promise<{ status: string }> {
|
async forgetMachine(mac: string): Promise<{ status: string }> {
|
||||||
return this.request("DELETE", `/api/machines/${encodeURIComponent(mac)}`);
|
return this.request("DELETE", `/api/machines/${encodeURIComponent(mac)}`);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -70,7 +70,7 @@ export function registerAppCommand(program: Command): void {
|
|||||||
.command("install <target>")
|
.command("install <target>")
|
||||||
.description("Install k3s on a target machine (hostname, IP, or MAC)")
|
.description("Install k3s on a target machine (hostname, IP, or MAC)")
|
||||||
.option("--role <role>", "k3s role: infra (server) or worker (agent)", "infra")
|
.option("--role <role>", "k3s role: infra (server) or worker (agent)", "infra")
|
||||||
.option("--user <user>", "SSH user", "lab")
|
.option("--user <user>", "SSH user", "root")
|
||||||
.option("--k3s-server <url>", "k3s server URL (required for worker role)")
|
.option("--k3s-server <url>", "k3s server URL (required for worker role)")
|
||||||
.option("--k3s-token <token>", "k3s join token (required for worker role)")
|
.option("--k3s-token <token>", "k3s join token (required for worker role)")
|
||||||
.action(async (target: string, opts: {
|
.action(async (target: string, opts: {
|
||||||
@@ -164,7 +164,7 @@ export function registerAppCommand(program: Command): void {
|
|||||||
k3sCmd
|
k3sCmd
|
||||||
.command("health [target]")
|
.command("health [target]")
|
||||||
.description("Check k3s health (all hosts if no target given)")
|
.description("Check k3s health (all hosts if no target given)")
|
||||||
.option("--user <user>", "SSH user", "lab")
|
.option("--user <user>", "SSH user", "root")
|
||||||
.action(async (target: string | undefined, opts: { user: string }) => {
|
.action(async (target: string | undefined, opts: { user: string }) => {
|
||||||
const sshKey = findSshKey();
|
const sshKey = findSshKey();
|
||||||
|
|
||||||
@@ -304,7 +304,7 @@ export function registerAppCommand(program: Command): void {
|
|||||||
k3sCmd
|
k3sCmd
|
||||||
.command("list")
|
.command("list")
|
||||||
.description("List installed machines and their k3s status")
|
.description("List installed machines and their k3s status")
|
||||||
.option("--user <user>", "SSH user", "lab")
|
.option("--user <user>", "SSH user", "root")
|
||||||
.action(async (opts: { user: string }) => {
|
.action(async (opts: { user: string }) => {
|
||||||
let state: BastionState;
|
let state: BastionState;
|
||||||
try {
|
try {
|
||||||
|
|||||||
@@ -59,9 +59,9 @@ export function registerAsahiCommand(parent: Command): void {
|
|||||||
console.log(` labvg/longhorn (remaining space)${RESET}`);
|
console.log(` labvg/longhorn (remaining space)${RESET}`);
|
||||||
console.log("");
|
console.log("");
|
||||||
console.log(` After first boot, SSH in and run the firstboot script:`);
|
console.log(` After first boot, SSH in and run the firstboot script:`);
|
||||||
console.log(` ${BOLD}ssh root@<ip> 'curl -sf ${bastionUrl}/asahi/firstboot.sh?hostname=<name>\\&role=infra | bash'${RESET}`);
|
console.log(` ${BOLD}ssh root@<ip> 'curl -sf ${bastionUrl}/asahi/firstboot.sh | bash'${RESET}`);
|
||||||
console.log("");
|
console.log("");
|
||||||
console.log(` This sets up LVM and self-registers with the bastion.`);
|
console.log(` This sets up LVM, detects hostname/MAC, and self-registers.`);
|
||||||
console.log(` Then install k3s:`);
|
console.log(` Then install k3s:`);
|
||||||
console.log(` ${BOLD}labctl app k3s install <hostname> --role infra${RESET}`);
|
console.log(` ${BOLD}labctl app k3s install <hostname> --role infra${RESET}`);
|
||||||
console.log("");
|
console.log("");
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ export function registerLabcontrollerCommands(appCmd: Command): void {
|
|||||||
lcCmd
|
lcCmd
|
||||||
.command("deploy <target>")
|
.command("deploy <target>")
|
||||||
.description("Deploy labcontroller stack to a k3s node")
|
.description("Deploy labcontroller stack to a k3s node")
|
||||||
.option("--user <user>", "SSH user", "lab")
|
.option("--user <user>", "SSH user", "root")
|
||||||
.option("--crdb-replicas <n>", "CockroachDB replicas", "1")
|
.option("--crdb-replicas <n>", "CockroachDB replicas", "1")
|
||||||
.action(async (target: string, opts: {
|
.action(async (target: string, opts: {
|
||||||
user: string;
|
user: string;
|
||||||
@@ -193,7 +193,7 @@ export function registerLabcontrollerCommands(appCmd: Command): void {
|
|||||||
lcCmd
|
lcCmd
|
||||||
.command("status [target]")
|
.command("status [target]")
|
||||||
.description("Check labcontroller deployment status (all hosts if no target)")
|
.description("Check labcontroller deployment status (all hosts if no target)")
|
||||||
.option("--user <user>", "SSH user", "lab")
|
.option("--user <user>", "SSH user", "root")
|
||||||
.action(async (target: string | undefined, opts: { user: string }) => {
|
.action(async (target: string | undefined, opts: { user: string }) => {
|
||||||
const sshKey = findSshKey();
|
const sshKey = findSshKey();
|
||||||
const sshOpts = sshKey ? { keyPath: sshKey } : {};
|
const sshOpts = sshKey ? { keyPath: sshKey } : {};
|
||||||
|
|||||||
@@ -69,10 +69,10 @@ export function registerListCommand(parent: Command): void {
|
|||||||
const hostname = inst?.hostname ?? queued?.hostname ?? "-";
|
const hostname = inst?.hostname ?? queued?.hostname ?? "-";
|
||||||
const role = inst?.role ?? queued?.role ?? "-";
|
const role = inst?.role ?? queued?.role ?? "-";
|
||||||
const ip = inst?.ip ?? "-";
|
const ip = inst?.ip ?? "-";
|
||||||
const cpu = hw?.cpu_model ?? "-";
|
const cpu = hw?.cpu_model ?? inst?.cpu_model ?? "-";
|
||||||
const cores = hw?.cpu_cores != null ? String(hw.cpu_cores) : "-";
|
const cores = (hw?.cpu_cores ?? inst?.cpu_cores) != null ? String(hw?.cpu_cores ?? inst?.cpu_cores) : "-";
|
||||||
const ram = hw?.memory_gb != null ? `${hw.memory_gb}GB` : "-";
|
const ram = (hw?.memory_gb ?? inst?.memory_gb) != null ? `${hw?.memory_gb ?? inst?.memory_gb}GB` : "-";
|
||||||
const product = hw?.product ?? "-";
|
const product = hw?.product ?? inst?.product ?? "-";
|
||||||
|
|
||||||
const color = statusColor(status);
|
const color = statusColor(status);
|
||||||
|
|
||||||
|
|||||||
94
bastion/src/cli/src/commands/recheck.ts
Normal file
94
bastion/src/cli/src/commands/recheck.ts
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
// CLI command: provision recheck
|
||||||
|
// SSH into all installed machines, collect hardware info, update bastion state.
|
||||||
|
|
||||||
|
import type { Command } from "commander";
|
||||||
|
import { sshExec } from "@lab/modules";
|
||||||
|
import { getLabdClient } from "../api/config.js";
|
||||||
|
|
||||||
|
const BOLD = "\x1b[1m";
|
||||||
|
const GREEN = "\x1b[0;32m";
|
||||||
|
const RED = "\x1b[0;31m";
|
||||||
|
const DIM = "\x1b[2m";
|
||||||
|
const RESET = "\x1b[0m";
|
||||||
|
|
||||||
|
const SSH_OPTS = { timeoutMs: 30_000 };
|
||||||
|
|
||||||
|
// Shell script that collects hardware info as JSON.
|
||||||
|
// Kept simple — no Python, pure shell + awk.
|
||||||
|
const HW_COLLECT_SCRIPT = [
|
||||||
|
'P=$(cat /sys/class/dmi/id/product_name 2>/dev/null || echo unknown)',
|
||||||
|
'B=$(cat /sys/class/dmi/id/board_name 2>/dev/null || echo unknown)',
|
||||||
|
'S=$(cat /sys/class/dmi/id/product_serial 2>/dev/null || echo unknown)',
|
||||||
|
'M=$(cat /sys/class/dmi/id/sys_vendor 2>/dev/null || echo unknown)',
|
||||||
|
'C=$(grep -m1 "model name" /proc/cpuinfo 2>/dev/null | cut -d: -f2 | sed "s/^ //" || grep -m1 Model /proc/cpuinfo 2>/dev/null | cut -d: -f2 | sed "s/^ //" || echo unknown)',
|
||||||
|
'N=$(grep -c "^processor" /proc/cpuinfo 2>/dev/null || echo 0)',
|
||||||
|
'R=$(awk "/MemTotal/ {printf \\"%d\\", \\$2/1024/1024}" /proc/meminfo 2>/dev/null || echo 0)',
|
||||||
|
'A=$(uname -m)',
|
||||||
|
'printf \'{"product":"%s","board":"%s","serial":"%s","manufacturer":"%s","cpu_model":"%s","cpu_cores":%s,"memory_gb":%s,"arch":"%s"}\\n\' "$P" "$B" "$S" "$M" "$C" "$N" "$R" "$A"',
|
||||||
|
].join("; ");
|
||||||
|
|
||||||
|
export function registerRecheckCommand(parent: Command): void {
|
||||||
|
parent
|
||||||
|
.command("recheck")
|
||||||
|
.description("Refresh hardware info for all installed machines via SSH")
|
||||||
|
.option("--user <user>", "SSH user", "root")
|
||||||
|
.option("--target <hostname>", "Only recheck a specific machine (by hostname or MAC)")
|
||||||
|
.action(async (opts: { user: string; target?: string }) => {
|
||||||
|
const client = getLabdClient();
|
||||||
|
let state;
|
||||||
|
try {
|
||||||
|
state = await client.getMachines();
|
||||||
|
} catch (err) {
|
||||||
|
console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build list of machines to check
|
||||||
|
const targets: Array<{ mac: string; hostname: string; ip: string }> = [];
|
||||||
|
for (const [mac, info] of Object.entries(state.installed)) {
|
||||||
|
if (!info.ip) continue;
|
||||||
|
if (opts.target && info.hostname !== opts.target && mac !== opts.target) continue;
|
||||||
|
targets.push({ mac, hostname: info.hostname, ip: info.ip });
|
||||||
|
}
|
||||||
|
|
||||||
|
if (targets.length === 0) {
|
||||||
|
console.log("No installed machines with IPs to check.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`\n${BOLD}Rechecking ${targets.length} machine(s)...${RESET}\n`);
|
||||||
|
|
||||||
|
let updated = 0;
|
||||||
|
let failed = 0;
|
||||||
|
|
||||||
|
for (const { mac, hostname, ip } of targets) {
|
||||||
|
process.stdout.write(` ${hostname.padEnd(24)} ${DIM}(${ip})${RESET} `);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const t0 = Date.now();
|
||||||
|
const result = await sshExec(ip, opts.user, HW_COLLECT_SCRIPT, SSH_OPTS);
|
||||||
|
const elapsed = Date.now() - t0;
|
||||||
|
if (result.exitCode !== 0) {
|
||||||
|
console.log(`${RED}SSH failed (exit ${result.exitCode}, ${elapsed}ms)${RESET}`);
|
||||||
|
if (result.stderr) console.log(` ${DIM}${result.stderr.substring(0, 200)}${RESET}`);
|
||||||
|
console.log(`${RED}SSH failed (exit ${result.exitCode})${RESET}`);
|
||||||
|
failed++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const hwData = JSON.parse(result.stdout.trim());
|
||||||
|
await client.discoverMachine({ mac, ...hwData });
|
||||||
|
const cpu = hwData.cpu_model || "?";
|
||||||
|
const cores = hwData.cpu_cores || "?";
|
||||||
|
const mem = hwData.memory_gb || "?";
|
||||||
|
console.log(`${GREEN}OK${RESET} ${DIM}${cpu}, ${cores} cores, ${mem}GB${RESET}`);
|
||||||
|
updated++;
|
||||||
|
} catch (err) {
|
||||||
|
console.log(`${RED}FAIL${RESET} ${DIM}${err instanceof Error ? err.message : String(err)}${RESET}`);
|
||||||
|
failed++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`\n${BOLD}Done:${RESET} ${updated} updated, ${failed} failed\n`);
|
||||||
|
});
|
||||||
|
}
|
||||||
@@ -20,6 +20,7 @@ import { registerRegisterCommand } from "./commands/register.js";
|
|||||||
import { registerAsahiCommand } from "./commands/asahi.js";
|
import { registerAsahiCommand } from "./commands/asahi.js";
|
||||||
import { registerLogsCommand } from "./commands/logs.js";
|
import { registerLogsCommand } from "./commands/logs.js";
|
||||||
import { registerMakeIsoCommand } from "./commands/makeiso.js";
|
import { registerMakeIsoCommand } from "./commands/makeiso.js";
|
||||||
|
import { registerRecheckCommand } from "./commands/recheck.js";
|
||||||
import { registerConfigCommand } from "./commands/config.js";
|
import { registerConfigCommand } from "./commands/config.js";
|
||||||
import { registerLoginCommand } from "./commands/login.js";
|
import { registerLoginCommand } from "./commands/login.js";
|
||||||
import { registerDoctorCommand } from "./commands/doctor.js";
|
import { registerDoctorCommand } from "./commands/doctor.js";
|
||||||
@@ -104,6 +105,7 @@ export function createProgram(): Command {
|
|||||||
registerAsahiCommand(provisionCmd);
|
registerAsahiCommand(provisionCmd);
|
||||||
registerLogsCommand(provisionCmd);
|
registerLogsCommand(provisionCmd);
|
||||||
registerMakeIsoCommand(provisionCmd);
|
registerMakeIsoCommand(provisionCmd);
|
||||||
|
registerRecheckCommand(provisionCmd);
|
||||||
|
|
||||||
// config list/get/set/path
|
// config list/get/set/path
|
||||||
registerConfigCommand(program);
|
registerConfigCommand(program);
|
||||||
|
|||||||
@@ -260,6 +260,37 @@ export function registerBastionRoutes(app: FastifyInstance, db: DbClient): void
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Update hardware info (discovery data) for a machine
|
||||||
|
app.post<{
|
||||||
|
Body: {
|
||||||
|
mac?: string; product?: string; board?: string; serial?: string;
|
||||||
|
manufacturer?: string; cpu_model?: string; cpu_cores?: number;
|
||||||
|
memory_gb?: number; arch?: string;
|
||||||
|
disks?: Array<{ name: string; size_gb: number; model: string }>;
|
||||||
|
nics?: Array<{ name: string; mac: string; state: string }>;
|
||||||
|
};
|
||||||
|
}>("/api/machines/discover", async (request, reply) => {
|
||||||
|
const data = request.body ?? {};
|
||||||
|
const mac = (data.mac ?? "").toLowerCase().replace(/-/g, ":");
|
||||||
|
if (!mac) {
|
||||||
|
return reply.code(400).send({ error: "mac is required" });
|
||||||
|
}
|
||||||
|
|
||||||
|
const bastion = bastionRegistry.findBastionByMac(mac);
|
||||||
|
const target = bastion ?? (bastionRegistry.getAll().length === 1 ? bastionRegistry.getAll()[0] : null);
|
||||||
|
|
||||||
|
if (!target) {
|
||||||
|
return reply.code(503).send({ error: "No bastion found for this MAC" });
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await sendCommand(target.bastionId, { type: "command-discover", ...data, mac });
|
||||||
|
return reply.code(result.status === "ok" ? 200 : 500).send(result);
|
||||||
|
} catch (err) {
|
||||||
|
return reply.code(500).send({ error: err instanceof Error ? err.message : String(err) });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
// Update role
|
// Update role
|
||||||
app.post<{
|
app.post<{
|
||||||
Body: { mac?: string; role?: string };
|
Body: { mac?: string; role?: string };
|
||||||
|
|||||||
@@ -78,9 +78,10 @@ export class K3sModule implements Module {
|
|||||||
return toModuleResult("install", [...prepResults, ...k3sResults], start);
|
return toModuleResult("install", [...prepResults, ...k3sResults], start);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Phase 3: Networking (server only — agents don't install Cilium)
|
// Phase 3: Networking (initial server only — joining servers get Cilium via daemonset)
|
||||||
let netResults: OperationResult[] = [];
|
let netResults: OperationResult[] = [];
|
||||||
if (isServer) {
|
const isJoiningServer = isServer && !!opCtx.config.k3sServerUrl;
|
||||||
|
if (isServer && !isJoiningServer) {
|
||||||
netResults = await runNetworking(opCtx);
|
netResults = await runNetworking(opCtx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -13,10 +13,11 @@ export const enableIscsi: Operation = async (ctx): Promise<OperationResult> => {
|
|||||||
|
|
||||||
// Install the package (detect distro)
|
// Install the package (detect distro)
|
||||||
const osRelease = await ctx.ssh.exec("cat /etc/os-release", sshOpts(ctx));
|
const osRelease = await ctx.ssh.exec("cat /etc/os-release", sshOpts(ctx));
|
||||||
const isFedora = osRelease.stdout.includes("fedora") || osRelease.stdout.includes("rhel") || osRelease.stdout.includes("centos");
|
const osLower = osRelease.stdout.toLowerCase();
|
||||||
|
const isFedora = osLower.includes("fedora") || osLower.includes("rhel") || osLower.includes("centos");
|
||||||
|
|
||||||
const pkg = isFedora ? "iscsi-initiator-utils" : "open-iscsi";
|
const pkg = isFedora ? "iscsi-initiator-utils" : "open-iscsi";
|
||||||
const installCmd = isFedora ? `dnf install -y ${pkg}` : `apt-get install -y ${pkg}`;
|
const installCmd = isFedora ? `sudo dnf install -y ${pkg}` : `sudo apt-get install -y ${pkg}`;
|
||||||
|
|
||||||
const install = await ctx.ssh.exec(installCmd, { timeoutMs: 120_000 });
|
const install = await ctx.ssh.exec(installCmd, { timeoutMs: 120_000 });
|
||||||
if (install.exitCode !== 0) {
|
if (install.exitCode !== 0) {
|
||||||
@@ -24,7 +25,7 @@ export const enableIscsi: Operation = async (ctx): Promise<OperationResult> => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Enable and start
|
// Enable and start
|
||||||
await ctx.ssh.exec("systemctl enable --now iscsid", sshOpts(ctx));
|
await ctx.ssh.exec("sudo systemctl enable --now iscsid", sshOpts(ctx));
|
||||||
|
|
||||||
return { success: true, changed: true, message: `Installed ${pkg} and enabled iscsid` };
|
return { success: true, changed: true, message: `Installed ${pkg} and enabled iscsid` };
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -9,7 +9,12 @@ function isServerRole(role: string): boolean {
|
|||||||
|
|
||||||
function generateServerConfig(config: K3sConfig): string {
|
function generateServerConfig(config: K3sConfig): string {
|
||||||
const tlsSans = [config.hostname, config.ip, ...(config.tlsSans ?? [])];
|
const tlsSans = [config.hostname, config.ip, ...(config.tlsSans ?? [])];
|
||||||
return `# k3s server configuration — CIS hardened
|
const isJoining = !!config.k3sServerUrl;
|
||||||
|
const clusterLines = isJoining
|
||||||
|
? `server: "${config.k3sServerUrl}"\ntoken: "${config.k3sToken}"`
|
||||||
|
: "cluster-init: true";
|
||||||
|
return `# k3s server configuration — CIS hardened, etcd HA
|
||||||
|
${clusterLines}
|
||||||
protect-kernel-defaults: true
|
protect-kernel-defaults: true
|
||||||
secrets-encryption: true
|
secrets-encryption: true
|
||||||
write-kubeconfig-mode: "0640"
|
write-kubeconfig-mode: "0640"
|
||||||
|
|||||||
@@ -15,8 +15,21 @@ export const installK3sBinary: Operation = async (ctx): Promise<OperationResult>
|
|||||||
const alreadyInstalled = version.exitCode === 0;
|
const alreadyInstalled = version.exitCode === 0;
|
||||||
|
|
||||||
if (isServer) {
|
if (isServer) {
|
||||||
|
// Clean stale server state when joining an existing cluster
|
||||||
|
// (TLS certs from a previous run cause "newer than datastore" fatal error)
|
||||||
|
if (ctx.config.k3sServerUrl && ctx.config.k3sToken) {
|
||||||
|
await ctx.ssh.exec(
|
||||||
|
"rm -rf /var/lib/rancher/k3s/server/tls /var/lib/rancher/k3s/server/cred /var/lib/rancher/k3s/server/db",
|
||||||
|
sshOpts(ctx),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If joining an existing cluster, pass K3S_URL and K3S_TOKEN
|
||||||
|
const joinEnv = ctx.config.k3sServerUrl && ctx.config.k3sToken
|
||||||
|
? `K3S_URL="${ctx.config.k3sServerUrl}" K3S_TOKEN="${ctx.config.k3sToken}"`
|
||||||
|
: "";
|
||||||
const result = await ctx.ssh.exec(
|
const result = await ctx.ssh.exec(
|
||||||
'curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC="server" INSTALL_K3S_SKIP_SELINUX_RPM=true sh -',
|
`curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC="server" INSTALL_K3S_SKIP_SELINUX_RPM=true ${joinEnv} sh -`,
|
||||||
{ timeoutMs: 300_000 },
|
{ timeoutMs: 300_000 },
|
||||||
);
|
);
|
||||||
if (result.exitCode !== 0) {
|
if (result.exitCode !== 0) {
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
import type { Operation, OperationResult } from "../types.js";
|
import type { Operation, OperationResult } from "../types.js";
|
||||||
import { sshOpts } from "../utils.js";
|
import { sshOpts } from "../utils.js";
|
||||||
|
import { sshExec as remoteSshExec } from "../../../../src/ssh.js";
|
||||||
|
|
||||||
export const configureLonghornDisk: Operation = async (ctx): Promise<OperationResult> => {
|
export const configureLonghornDisk: Operation = async (ctx): Promise<OperationResult> => {
|
||||||
// Check if /var/lib/longhorn exists on this node
|
// Check if /var/lib/longhorn exists on this node
|
||||||
@@ -15,12 +16,11 @@ export const configureLonghornDisk: Operation = async (ctx): Promise<OperationRe
|
|||||||
const nodeNameResult = await ctx.ssh.exec("hostname -f 2>/dev/null || hostname", sshOpts(ctx));
|
const nodeNameResult = await ctx.ssh.exec("hostname -f 2>/dev/null || hostname", sshOpts(ctx));
|
||||||
const nodeName = nodeNameResult.stdout.trim();
|
const nodeName = nodeNameResult.stdout.trim();
|
||||||
|
|
||||||
// Apply the annotation via kubectl (works on server nodes, or via KUBECONFIG on agents)
|
|
||||||
const kubectlPrefix = "k3s kubectl";
|
|
||||||
const annotation = JSON.stringify([{ path: "/var/lib/longhorn", allowScheduling: true }]);
|
const annotation = JSON.stringify([{ path: "/var/lib/longhorn", allowScheduling: true }]);
|
||||||
|
|
||||||
|
// Try kubectl locally first (works on server nodes)
|
||||||
const result = await ctx.ssh.exec(
|
const result = await ctx.ssh.exec(
|
||||||
`${kubectlPrefix} annotate node "${nodeName}" "node.longhorn.io/default-disks-config=${annotation}" --overwrite 2>&1 || true`,
|
`k3s kubectl annotate node "${nodeName}" "node.longhorn.io/default-disks-config=${annotation}" --overwrite 2>&1 || true`,
|
||||||
sshOpts(ctx),
|
sshOpts(ctx),
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -28,7 +28,23 @@ export const configureLonghornDisk: Operation = async (ctx): Promise<OperationRe
|
|||||||
return { success: true, changed: true, message: `Longhorn disk annotation applied to ${nodeName}` };
|
return { success: true, changed: true, message: `Longhorn disk annotation applied to ${nodeName}` };
|
||||||
}
|
}
|
||||||
|
|
||||||
// If kubectl isn't available (agent node without server access), that's OK —
|
// For worker/agent nodes without local kubectl: apply via the server
|
||||||
// the label is set, annotation can be applied from the server later
|
if (ctx.config.k3sServerUrl) {
|
||||||
|
// The CLI has SSH access to the server — use sshExec from there
|
||||||
|
const serverHost = new URL(ctx.config.k3sServerUrl).hostname;
|
||||||
|
try {
|
||||||
|
const remoteResult = await remoteSshExec(
|
||||||
|
serverHost, "root",
|
||||||
|
`k3s kubectl annotate node "${nodeName}" "node.longhorn.io/default-disks-config=${annotation}" --overwrite`,
|
||||||
|
{ ...(ctx.ssh.keyPath ? { keyPath: ctx.ssh.keyPath } : {}), timeoutMs: 15_000 },
|
||||||
|
);
|
||||||
|
if (remoteResult.stdout.includes("annotated") || remoteResult.stdout.includes("unchanged")) {
|
||||||
|
return { success: true, changed: true, message: `Longhorn disk annotation applied to ${nodeName} (via server)` };
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Fall through to manual instruction
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return { success: true, changed: false, message: "Longhorn disk label set (annotation requires server kubectl)" };
|
return { success: true, changed: false, message: "Longhorn disk label set (annotation requires server kubectl)" };
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -113,6 +113,7 @@ export type LabdBastionMessage =
|
|||||||
| { type: "command-role-update"; requestId: string; mac: string; role: string }
|
| { type: "command-role-update"; requestId: string; mac: string; role: string }
|
||||||
| { type: "command-debug"; requestId: string; mac: string; pxeBoot?: boolean }
|
| { type: "command-debug"; requestId: string; mac: string; pxeBoot?: boolean }
|
||||||
| { type: "command-register"; requestId: string; mac: string; hostname: string; role: string; ip: string }
|
| { type: "command-register"; requestId: string; mac: string; hostname: string; role: string; ip: string }
|
||||||
|
| { type: "command-discover"; requestId: string; mac: string; product?: string; board?: string; serial?: string; manufacturer?: string; cpu_model?: string; cpu_cores?: number; memory_gb?: number; arch?: string; disks?: Array<{ name: string; size_gb: number; model: string }>; nics?: Array<{ name: string; mac: string; state: string }> }
|
||||||
| { type: "server-shutdown"; reconnectAfter: number };
|
| { type: "server-shutdown"; reconnectAfter: number };
|
||||||
|
|
||||||
export type BastionMessageType = BastionMessage["type"];
|
export type BastionMessageType = BastionMessage["type"];
|
||||||
@@ -127,7 +128,7 @@ const BASTION_MESSAGE_TYPES = new Set<string>([
|
|||||||
|
|
||||||
const LABD_BASTION_MESSAGE_TYPES = new Set<string>([
|
const LABD_BASTION_MESSAGE_TYPES = new Set<string>([
|
||||||
"bastion-enrolled", "bastion-heartbeat-ack", "command-install",
|
"bastion-enrolled", "bastion-heartbeat-ack", "command-install",
|
||||||
"command-forget", "command-role-update", "command-debug", "command-register", "server-shutdown",
|
"command-forget", "command-role-update", "command-debug", "command-register", "command-discover", "server-shutdown",
|
||||||
]);
|
]);
|
||||||
|
|
||||||
export function isBastionMessage(msg: unknown): msg is BastionMessage {
|
export function isBastionMessage(msg: unknown): msg is BastionMessage {
|
||||||
|
|||||||
@@ -96,6 +96,13 @@ export interface InstalledInfo {
|
|||||||
ip: string;
|
ip: string;
|
||||||
installed_at: string;
|
installed_at: string;
|
||||||
bastionId?: string; // set when aggregated through labd
|
bastionId?: string; // set when aggregated through labd
|
||||||
|
// Hardware info (copied from discovered on install completion)
|
||||||
|
product?: string;
|
||||||
|
manufacturer?: string;
|
||||||
|
cpu_model?: string;
|
||||||
|
cpu_cores?: number;
|
||||||
|
memory_gb?: number;
|
||||||
|
arch?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface DebugConfig {
|
export interface DebugConfig {
|
||||||
|
|||||||
Reference in New Issue
Block a user