Compare commits
23 Commits
feat/asahi
...
fix/v2-wir
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cdf3b5c045 | ||
| f3c50f71ef | |||
|
|
98b0ccc6c9 | ||
|
|
37a3b51e57 | ||
|
|
d6e1f3c74d | ||
|
|
52e831b8c1 | ||
| f5af24699a | |||
|
|
dd92147341 | ||
|
|
04faa079e2 | ||
| 95c99cb4d5 | |||
|
|
2eda926d4c | ||
|
|
70258a0cc3 | ||
|
|
e9944c5413 | ||
| 22e2946e95 | |||
|
|
9ddab24931 | ||
|
|
ae91f2895e | ||
|
|
06fc40a857 | ||
|
|
a68d6d617e | ||
|
|
c49a650888 | ||
|
|
87e09af941 | ||
|
|
6f13e284fd | ||
|
|
6c963a15bd | ||
| 8c737d163d |
19
CLAUDE.md
Normal file
19
CLAUDE.md
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
## Skill routing
|
||||||
|
|
||||||
|
When the user's request matches an available skill, ALWAYS invoke it using the Skill
|
||||||
|
tool as your FIRST action. Do NOT answer directly, do NOT use other tools first.
|
||||||
|
The skill has specialized workflows that produce better results than ad-hoc answers.
|
||||||
|
|
||||||
|
Key routing rules:
|
||||||
|
- Product ideas, "is this worth building", brainstorming → invoke gstack-office-hours
|
||||||
|
- Bugs, errors, "why is this broken", 500 errors → invoke gstack-investigate
|
||||||
|
- Ship, deploy, push, create PR → invoke gstack-ship
|
||||||
|
- QA, test the site, find bugs → invoke gstack-qa
|
||||||
|
- Code review, check my diff → invoke gstack-review
|
||||||
|
- Update docs after shipping → invoke gstack-document-release
|
||||||
|
- Weekly retro → invoke gstack-retro
|
||||||
|
- Design system, brand → invoke gstack-design-consultation
|
||||||
|
- Visual audit, design polish → invoke gstack-design-review
|
||||||
|
- Architecture review → invoke gstack-plan-eng-review
|
||||||
|
- Save progress, checkpoint, resume → invoke gstack-checkpoint
|
||||||
|
- Code quality, health check → invoke gstack-health
|
||||||
47
TODOS.md
Normal file
47
TODOS.md
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
# TODOS
|
||||||
|
|
||||||
|
## P1 — Ship with Phase 1
|
||||||
|
|
||||||
|
### v2.0 Architecture Document Update
|
||||||
|
Update `bastion/docs/ARCHITECTURE.md` to cover v2.0: driver model, fleet system,
|
||||||
|
Pulumi integration, Vault secrets, Deno evaluator, new CLI grammar. The existing
|
||||||
|
doc covers v1.0 comprehensively (432 lines). v2.0 adds 5+ major subsystems.
|
||||||
|
**Effort:** M (human: 1 week / CC: 1-2 days)
|
||||||
|
**Depends on:** Phase 1 complete
|
||||||
|
**Source:** CEO review 2026-04-01
|
||||||
|
|
||||||
|
## P2 — Post-v2.0 Core
|
||||||
|
|
||||||
|
### SSH Emergency Mode (scoped)
|
||||||
|
SSH-based operations limited to: (1) earliest necessary box provisioning before agent
|
||||||
|
is installed, and (2) emergency debugging/fixing operations that can't be done via agent.
|
||||||
|
NOT a general-purpose DeploymentTarget alternative. The v1.0 `recheck` and `fix-ssh-root.sh`
|
||||||
|
patterns are the model. Agent stays the primary management path.
|
||||||
|
**Effort:** S (human: 1 week / CC: 1 day)
|
||||||
|
**Depends on:** Phase 2 complete (DeploymentTarget interface exists)
|
||||||
|
**Source:** CEO review 2026-04-01
|
||||||
|
|
||||||
|
### Prometheus Metrics Endpoint
|
||||||
|
Add `/metrics` endpoint to labd: resource counts by status, apply duration histograms,
|
||||||
|
driver operation latency, fleet pipeline completion rates. Standard Prometheus scraping
|
||||||
|
for Grafana dashboards and alerting.
|
||||||
|
**Effort:** S (human: 2-3 days / CC: 2-3 hours)
|
||||||
|
**Depends on:** Phase 1 (labd exists with resource store)
|
||||||
|
**Source:** CEO review 2026-04-01 (observability gap)
|
||||||
|
|
||||||
|
## P3 — Future Enhancements
|
||||||
|
|
||||||
|
### Infrastructure Graph Visualization
|
||||||
|
Visual representation of resource dependencies, environment topology, fleet status.
|
||||||
|
Could be a web UI or terminal-based (like `kubectl tree`).
|
||||||
|
**Source:** CEO review 2026-04-01
|
||||||
|
|
||||||
|
### `labctl import` for Existing Cloud Resources
|
||||||
|
Discover and import existing AWS/GCP resources into the state store.
|
||||||
|
Pulumi's import functionality could be leveraged.
|
||||||
|
**Source:** CEO review 2026-04-01
|
||||||
|
|
||||||
|
### Built-in Secrets Rotation
|
||||||
|
Automatic rotation of managed secrets (database passwords, API keys).
|
||||||
|
Vault handles rotation but a labctl-native workflow could simplify.
|
||||||
|
**Source:** CEO review 2026-04-01
|
||||||
@@ -11,6 +11,7 @@ WORKDIR /app
|
|||||||
# Copy workspace config and package manifests first (layer cache)
|
# Copy workspace config and package manifests first (layer cache)
|
||||||
COPY pnpm-workspace.yaml pnpm-lock.yaml package.json tsconfig.base.json tsconfig.json ./
|
COPY pnpm-workspace.yaml pnpm-lock.yaml package.json tsconfig.base.json tsconfig.json ./
|
||||||
COPY src/shared/package.json src/shared/tsconfig.json src/shared/
|
COPY src/shared/package.json src/shared/tsconfig.json src/shared/
|
||||||
|
COPY src/core/package.json src/core/tsconfig.json src/core/
|
||||||
COPY src/labd/package.json src/labd/tsconfig.json src/labd/
|
COPY src/labd/package.json src/labd/tsconfig.json src/labd/
|
||||||
|
|
||||||
# Install all dependencies (dev included -- needed for build)
|
# Install all dependencies (dev included -- needed for build)
|
||||||
@@ -22,10 +23,13 @@ RUN pnpm --filter @lab/labd exec prisma generate
|
|||||||
|
|
||||||
# Copy source code
|
# Copy source code
|
||||||
COPY src/shared/src/ src/shared/src/
|
COPY src/shared/src/ src/shared/src/
|
||||||
|
COPY src/core/src/ src/core/src/
|
||||||
COPY src/labd/src/ src/labd/src/
|
COPY src/labd/src/ src/labd/src/
|
||||||
|
|
||||||
# Build TypeScript (shared first via project references)
|
# Build TypeScript (shared + core before labd via project references)
|
||||||
RUN pnpm --filter @lab/shared build && pnpm --filter @lab/labd build
|
RUN pnpm --filter @lab/shared build \
|
||||||
|
&& pnpm --filter @lab/core build \
|
||||||
|
&& pnpm --filter @lab/labd build
|
||||||
|
|
||||||
# Hoist the generated Prisma client so stage 2 can COPY it from a stable path
|
# Hoist the generated Prisma client so stage 2 can COPY it from a stable path
|
||||||
RUN mkdir -p /app/_prisma && \
|
RUN mkdir -p /app/_prisma && \
|
||||||
@@ -41,6 +45,7 @@ WORKDIR /app
|
|||||||
# Copy workspace config and package manifests
|
# Copy workspace config and package manifests
|
||||||
COPY pnpm-workspace.yaml pnpm-lock.yaml package.json ./
|
COPY pnpm-workspace.yaml pnpm-lock.yaml package.json ./
|
||||||
COPY src/shared/package.json src/shared/
|
COPY src/shared/package.json src/shared/
|
||||||
|
COPY src/core/package.json src/core/
|
||||||
COPY src/labd/package.json src/labd/
|
COPY src/labd/package.json src/labd/
|
||||||
|
|
||||||
# Install production dependencies only
|
# Install production dependencies only
|
||||||
@@ -48,6 +53,7 @@ RUN pnpm install --frozen-lockfile --prod 2>/dev/null || pnpm install --prod
|
|||||||
|
|
||||||
# Copy built output from builder
|
# Copy built output from builder
|
||||||
COPY --from=builder /app/src/shared/dist/ src/shared/dist/
|
COPY --from=builder /app/src/shared/dist/ src/shared/dist/
|
||||||
|
COPY --from=builder /app/src/core/dist/ src/core/dist/
|
||||||
COPY --from=builder /app/src/labd/dist/ src/labd/dist/
|
COPY --from=builder /app/src/labd/dist/ src/labd/dist/
|
||||||
|
|
||||||
# Copy Prisma schema + generated client into pnpm store location
|
# Copy Prisma schema + generated client into pnpm store location
|
||||||
|
|||||||
4
bastion/bastion/.gitignore
vendored
Normal file
4
bastion/bastion/.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
# Asahi build artifacts (large)
|
||||||
|
.asahi-cache/
|
||||||
|
asahi-repo/*.zip
|
||||||
@@ -82,6 +82,9 @@ _labctl() {
|
|||||||
"provision makeiso")
|
"provision makeiso")
|
||||||
COMPREPLY=($(compgen -W "--arch --local --out -h --help" -- "$cur"))
|
COMPREPLY=($(compgen -W "--arch --local --out -h --help" -- "$cur"))
|
||||||
return ;;
|
return ;;
|
||||||
|
"provision recheck")
|
||||||
|
COMPREPLY=($(compgen -W "--user --target -h --help" -- "$cur"))
|
||||||
|
return ;;
|
||||||
"config list")
|
"config list")
|
||||||
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
|
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
|
||||||
return ;;
|
return ;;
|
||||||
@@ -107,7 +110,7 @@ _labctl() {
|
|||||||
COMPREPLY=($(compgen -W "bastion -h --help" -- "$cur"))
|
COMPREPLY=($(compgen -W "bastion -h --help" -- "$cur"))
|
||||||
return ;;
|
return ;;
|
||||||
"provision")
|
"provision")
|
||||||
COMPREPLY=($(compgen -W "list install reprovision debug forget register asahi logs makeiso -h --help" -- "$cur"))
|
COMPREPLY=($(compgen -W "list install reprovision debug forget register asahi logs makeiso recheck -h --help" -- "$cur"))
|
||||||
return ;;
|
return ;;
|
||||||
"config")
|
"config")
|
||||||
COMPREPLY=($(compgen -W "list get set path -h --help" -- "$cur"))
|
COMPREPLY=($(compgen -W "list get set path -h --help" -- "$cur"))
|
||||||
|
|||||||
@@ -128,6 +128,7 @@ complete -c labctl -n "__labctl_using_cmd provision" -a register -d 'Register an
|
|||||||
complete -c labctl -n "__labctl_using_cmd provision" -a asahi -d 'Show instructions to provision an Apple Silicon Mac with Asahi Linux'
|
complete -c labctl -n "__labctl_using_cmd provision" -a asahi -d 'Show instructions to provision an Apple Silicon Mac with Asahi Linux'
|
||||||
complete -c labctl -n "__labctl_using_cmd provision" -a logs -d 'Show provisioning logs for a machine (hostname, MAC, or IP)'
|
complete -c labctl -n "__labctl_using_cmd provision" -a logs -d 'Show provisioning logs for a machine (hostname, MAC, or IP)'
|
||||||
complete -c labctl -n "__labctl_using_cmd provision" -a makeiso -d 'Generate a UEFI-bootable iPXE ISO for network provisioning'
|
complete -c labctl -n "__labctl_using_cmd provision" -a makeiso -d 'Generate a UEFI-bootable iPXE ISO for network provisioning'
|
||||||
|
complete -c labctl -n "__labctl_using_cmd provision" -a recheck -d 'Refresh hardware info for all installed machines via SSH'
|
||||||
|
|
||||||
# provision install options
|
# provision install options
|
||||||
complete -c labctl -n "__labctl_in_cmd provision install" -l role -d 'Machine role (see below)' -xa 'vanilla worker infra labcontroller'
|
complete -c labctl -n "__labctl_in_cmd provision install" -l role -d 'Machine role (see below)' -xa 'vanilla worker infra labcontroller'
|
||||||
@@ -154,6 +155,10 @@ complete -c labctl -n "__labctl_in_cmd provision makeiso" -l arch -d 'Target arc
|
|||||||
complete -c labctl -n "__labctl_in_cmd provision makeiso" -l local -d 'Build ISO locally instead of using bastion-hosted URL'
|
complete -c labctl -n "__labctl_in_cmd provision makeiso" -l local -d 'Build ISO locally instead of using bastion-hosted URL'
|
||||||
complete -c labctl -n "__labctl_in_cmd provision makeiso" -l out -d 'Output path for local ISO build' -x
|
complete -c labctl -n "__labctl_in_cmd provision makeiso" -l out -d 'Output path for local ISO build' -x
|
||||||
|
|
||||||
|
# provision recheck options
|
||||||
|
complete -c labctl -n "__labctl_in_cmd provision recheck" -l user -d 'SSH user' -x
|
||||||
|
complete -c labctl -n "__labctl_in_cmd provision recheck" -l target -d 'Only recheck a specific machine (by hostname or MAC)' -x
|
||||||
|
|
||||||
# config subcommands
|
# config subcommands
|
||||||
complete -c labctl -n "__labctl_using_cmd config" -a list -d 'Show all configuration values'
|
complete -c labctl -n "__labctl_using_cmd config" -a list -d 'Show all configuration values'
|
||||||
complete -c labctl -n "__labctl_using_cmd config" -a get -d 'Get a configuration value'
|
complete -c labctl -n "__labctl_using_cmd config" -a get -d 'Get a configuration value'
|
||||||
|
|||||||
1847
bastion/pnpm-lock.yaml
generated
1847
bastion/pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
@@ -99,16 +99,22 @@ if [ "$PUSH" = true ]; then
|
|||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Use --tls-verify=false for plain HTTP registries (e.g. 10.0.0.194:3012)
|
||||||
|
TLS_FLAG=""
|
||||||
|
if [[ "$REGISTRY" =~ ^[0-9] ]] || [[ "$REGISTRY" =~ ^localhost ]]; then
|
||||||
|
TLS_FLAG="--tls-verify=false"
|
||||||
|
fi
|
||||||
|
|
||||||
echo "==> Logging in to $REGISTRY..."
|
echo "==> Logging in to $REGISTRY..."
|
||||||
podman login -u michal -p "$GITEA_TOKEN" "$REGISTRY"
|
podman login $TLS_FLAG -u michal -p "$GITEA_TOKEN" "$REGISTRY"
|
||||||
|
|
||||||
echo "==> Pushing $FULL_IMAGE:$TAG..."
|
echo "==> Pushing $FULL_IMAGE:$TAG..."
|
||||||
podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:$TAG"
|
podman manifest push --all $TLS_FLAG "$MANIFEST" "docker://$FULL_IMAGE:$TAG"
|
||||||
|
|
||||||
# Also tag as :latest if not already
|
# Also tag as :latest if not already
|
||||||
if [ "$TAG" != "latest" ]; then
|
if [ "$TAG" != "latest" ]; then
|
||||||
echo "==> Also pushing as :latest..."
|
echo "==> Also pushing as :latest..."
|
||||||
podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:latest"
|
podman manifest push --all $TLS_FLAG "$MANIFEST" "docker://$FULL_IMAGE:latest"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Link package to repository if script exists
|
# Link package to repository if script exists
|
||||||
|
|||||||
@@ -92,15 +92,21 @@ if [ "$PUSH" = true ]; then
|
|||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Use --tls-verify=false for plain HTTP registries (e.g. 10.0.0.194:3012)
|
||||||
|
TLS_FLAG=""
|
||||||
|
if [[ "$REGISTRY" =~ ^[0-9] ]] || [[ "$REGISTRY" =~ ^localhost ]]; then
|
||||||
|
TLS_FLAG="--tls-verify=false"
|
||||||
|
fi
|
||||||
|
|
||||||
echo "==> Logging in to $REGISTRY..."
|
echo "==> Logging in to $REGISTRY..."
|
||||||
podman login -u michal -p "$GITEA_TOKEN" "$REGISTRY"
|
podman login $TLS_FLAG -u michal -p "$GITEA_TOKEN" "$REGISTRY"
|
||||||
|
|
||||||
echo "==> Pushing $FULL_IMAGE:$TAG..."
|
echo "==> Pushing $FULL_IMAGE:$TAG..."
|
||||||
podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:$TAG"
|
podman manifest push --all $TLS_FLAG "$MANIFEST" "docker://$FULL_IMAGE:$TAG"
|
||||||
|
|
||||||
if [ "$TAG" != "latest" ]; then
|
if [ "$TAG" != "latest" ]; then
|
||||||
echo "==> Also pushing as :latest..."
|
echo "==> Also pushing as :latest..."
|
||||||
podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:latest"
|
podman manifest push --all $TLS_FLAG "$MANIFEST" "docker://$FULL_IMAGE:latest"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -f "$SCRIPT_DIR/link-package.sh" ]; then
|
if [ -f "$SCRIPT_DIR/link-package.sh" ]; then
|
||||||
|
|||||||
131
bastion/scripts/fix-ssh-root.sh
Normal file
131
bastion/scripts/fix-ssh-root.sh
Normal file
@@ -0,0 +1,131 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Fix root SSH access on all provisioned machines.
|
||||||
|
# Tries root, lab, michal users to find one that works,
|
||||||
|
# then ensures root has the SSH key and PermitRootLogin is enabled.
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SSH_KEY="ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQDMJ3FkUGbG174eoO5RjZd2eNV680FM5pgp0AgpW/QwlJExK3qxMk0DJSr4ICmzGUx4yujAXcrqU1otcOMPzzFzwc5heWpSmlNHU3TIW6NHEt0sF9ZTAbGLw2zSw3si5UouqFkCcENA40mePFJqY+Q9R8N1uvLgu4m/do+Zrn/mk5Ewc1V7OCRE5Acrnaec4T7LTB0BuVXcjPUfAmZ0q5fI+bKPR1q2Kc3+IeGhVkBuZ9OJVeXXhnpedm0uEbLeriK/jUYKYw/1QhsNDM8Tyty+UIGr9QVnWwzCMHB+wuQcDYC9mPGTqg0fYwX8Mp8xMi1PPxdsh1G7bj/cpWMAF43KswWORF2ul8ICGbaE1zEgIYXO790SuBjpBHhaC6Iegqi58hmCuP+a9893q/EU9HyrWTJHCZXC5E4kP1MsM57KrhEpszM6I3sW9f9zMTPd5QsCXFi4si4OMwX4kYNVu3fQGQPpseDPlTTSrT6uUdqj4Irm0c1m9cYTmK0vYgsM3ss= michal@fedora"
|
||||||
|
|
||||||
|
SSH_OPTS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR -o ConnectTimeout=5"
|
||||||
|
USERS_TO_TRY=(root lab michal)
|
||||||
|
|
||||||
|
# Machines: hostname ip
|
||||||
|
MACHINES=(
|
||||||
|
"labmaster 192.168.8.11"
|
||||||
|
"worker0-k8s0 192.168.8.23"
|
||||||
|
"worker1-k8s0 192.168.8.13"
|
||||||
|
"worker2-k8s0 192.168.8.25"
|
||||||
|
"spark-2935 192.168.8.12"
|
||||||
|
)
|
||||||
|
|
||||||
|
BOLD="\033[1m"
|
||||||
|
GREEN="\033[0;32m"
|
||||||
|
RED="\033[0;31m"
|
||||||
|
DIM="\033[2m"
|
||||||
|
RESET="\033[0m"
|
||||||
|
|
||||||
|
# Script to run on each machine (via sudo if needed)
|
||||||
|
read -r -d '' FIX_SCRIPT << 'FIXEOF' || true
|
||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
KEY="$1"
|
||||||
|
|
||||||
|
# 1. Ensure root .ssh dir exists
|
||||||
|
mkdir -p /root/.ssh
|
||||||
|
chmod 700 /root/.ssh
|
||||||
|
touch /root/.ssh/authorized_keys
|
||||||
|
chmod 600 /root/.ssh/authorized_keys
|
||||||
|
|
||||||
|
# 2. Add key if not present
|
||||||
|
if ! grep -qF "$KEY" /root/.ssh/authorized_keys 2>/dev/null; then
|
||||||
|
echo "$KEY" >> /root/.ssh/authorized_keys
|
||||||
|
echo "KEY_ADDED"
|
||||||
|
else
|
||||||
|
echo "KEY_EXISTS"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 3. Fix sshd_config for root login with keys
|
||||||
|
SSHD_CONF="/etc/ssh/sshd_config"
|
||||||
|
CHANGED=0
|
||||||
|
|
||||||
|
# Ensure PermitRootLogin allows key auth
|
||||||
|
CURRENT=$(grep -E "^PermitRootLogin" "$SSHD_CONF" 2>/dev/null | tail -1 || true)
|
||||||
|
if [ "$CURRENT" = "PermitRootLogin prohibit-password" ] || [ "$CURRENT" = "PermitRootLogin without-password" ]; then
|
||||||
|
echo "SSHD_OK"
|
||||||
|
elif [ "$CURRENT" = "PermitRootLogin yes" ]; then
|
||||||
|
echo "SSHD_OK"
|
||||||
|
else
|
||||||
|
# Remove any existing PermitRootLogin lines
|
||||||
|
sed -i '/^#*PermitRootLogin/d' "$SSHD_CONF"
|
||||||
|
echo "PermitRootLogin prohibit-password" >> "$SSHD_CONF"
|
||||||
|
CHANGED=1
|
||||||
|
echo "SSHD_FIXED"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure PubkeyAuthentication is enabled
|
||||||
|
if grep -qE "^PubkeyAuthentication no" "$SSHD_CONF" 2>/dev/null; then
|
||||||
|
sed -i 's/^PubkeyAuthentication no/PubkeyAuthentication yes/' "$SSHD_CONF"
|
||||||
|
CHANGED=1
|
||||||
|
echo "PUBKEY_FIXED"
|
||||||
|
else
|
||||||
|
echo "PUBKEY_OK"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Restart sshd if changed
|
||||||
|
if [ "$CHANGED" -eq 1 ]; then
|
||||||
|
systemctl restart sshd 2>/dev/null || systemctl restart ssh 2>/dev/null || true
|
||||||
|
echo "SSHD_RESTARTED"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 4. Verify root can be reached
|
||||||
|
echo "DONE"
|
||||||
|
FIXEOF
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -e "${BOLD}Fixing root SSH access on all machines...${RESET}"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
for entry in "${MACHINES[@]}"; do
|
||||||
|
read -r hostname ip <<< "$entry"
|
||||||
|
printf " %-24s ${DIM}(%s)${RESET} " "$hostname" "$ip"
|
||||||
|
|
||||||
|
# Try each user until one works
|
||||||
|
WORKING_USER=""
|
||||||
|
for user in "${USERS_TO_TRY[@]}"; do
|
||||||
|
if ssh $SSH_OPTS "$user@$ip" "true" 2>/dev/null; then
|
||||||
|
WORKING_USER="$user"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ -z "$WORKING_USER" ]; then
|
||||||
|
echo -e "${RED}UNREACHABLE${RESET} (tried: ${USERS_TO_TRY[*]})"
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Run fix script (with sudo if not root)
|
||||||
|
if [ "$WORKING_USER" = "root" ]; then
|
||||||
|
RESULT=$(ssh $SSH_OPTS "root@$ip" "bash -s -- '$SSH_KEY'" <<< "$FIX_SCRIPT" 2>&1)
|
||||||
|
else
|
||||||
|
RESULT=$(ssh $SSH_OPTS "$WORKING_USER@$ip" "sudo bash -s -- '$SSH_KEY'" <<< "$FIX_SCRIPT" 2>&1)
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Parse result
|
||||||
|
DETAILS=""
|
||||||
|
if echo "$RESULT" | grep -q "KEY_ADDED"; then DETAILS="key added"; fi
|
||||||
|
if echo "$RESULT" | grep -q "KEY_EXISTS"; then DETAILS="key ok"; fi
|
||||||
|
if echo "$RESULT" | grep -q "SSHD_FIXED"; then DETAILS="$DETAILS, sshd fixed"; fi
|
||||||
|
if echo "$RESULT" | grep -q "SSHD_OK"; then DETAILS="$DETAILS, sshd ok"; fi
|
||||||
|
if echo "$RESULT" | grep -q "SSHD_RESTARTED"; then DETAILS="$DETAILS, restarted"; fi
|
||||||
|
|
||||||
|
# Verify root works now
|
||||||
|
if ssh $SSH_OPTS "root@$ip" "true" 2>/dev/null; then
|
||||||
|
echo -e "${GREEN}OK${RESET} ${DIM}(via $WORKING_USER: $DETAILS)${RESET}"
|
||||||
|
else
|
||||||
|
echo -e "${RED}PARTIAL${RESET} ${DIM}(via $WORKING_USER: $DETAILS -- root still blocked)${RESET}"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -e "${BOLD}Done.${RESET} Verify: labctl provision recheck --user root"
|
||||||
|
echo ""
|
||||||
@@ -309,6 +309,32 @@ export async function startBastion(overrides: Partial<BastionConfig> = {}): Prom
|
|||||||
return { status: "ok", data: { mac, hostname: msg.hostname } };
|
return { status: "ok", data: { mac, hostname: msg.hostname } };
|
||||||
});
|
});
|
||||||
|
|
||||||
|
labdConn.onCommand("command-discover", async (msg) => {
|
||||||
|
if (msg.type !== "command-discover") throw new Error("unexpected");
|
||||||
|
const mac = (msg.mac as string).toLowerCase();
|
||||||
|
const now = new Date().toISOString();
|
||||||
|
const existing = state.load().discovered[mac];
|
||||||
|
state.update((s) => {
|
||||||
|
s.discovered[mac] = {
|
||||||
|
mac,
|
||||||
|
product: (msg.product as string) ?? "unknown",
|
||||||
|
board: (msg.board as string) ?? "unknown",
|
||||||
|
serial: (msg.serial as string) ?? "unknown",
|
||||||
|
manufacturer: (msg.manufacturer as string) ?? "unknown",
|
||||||
|
cpu_model: (msg.cpu_model as string) ?? "unknown",
|
||||||
|
cpu_cores: (msg.cpu_cores as number) ?? 0,
|
||||||
|
memory_gb: (msg.memory_gb as number) ?? 0,
|
||||||
|
arch: (msg.arch as string) ?? "unknown",
|
||||||
|
disks: (msg.disks as Array<{ name: string; size_gb: number; model: string }>) ?? [],
|
||||||
|
nics: (msg.nics as Array<{ name: string; mac: string; state: string }>) ?? [],
|
||||||
|
first_seen: existing?.first_seen ?? now,
|
||||||
|
last_seen: now,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
logger.info(`HARDWARE UPDATED: ${mac} -- ${msg.manufacturer ?? "?"} ${msg.product ?? "?"} (${msg.cpu_model ?? "?"}, ${msg.cpu_cores ?? "?"} cores, ${msg.memory_gb ?? "?"}GB RAM)`);
|
||||||
|
return { status: "ok", data: { mac } };
|
||||||
|
});
|
||||||
|
|
||||||
labdConn.onCommand("command-role-update", async (msg) => {
|
labdConn.onCommand("command-role-update", async (msg) => {
|
||||||
if (msg.type !== "command-role-update") throw new Error("unexpected");
|
if (msg.type !== "command-role-update") throw new Error("unexpected");
|
||||||
const mac = msg.mac.toLowerCase();
|
const mac = msg.mac.toLowerCase();
|
||||||
|
|||||||
@@ -139,12 +139,22 @@ export function registerApiRoutes(
|
|||||||
? detailStr.replace("ready at ", "").trim()
|
? detailStr.replace("ready at ", "").trim()
|
||||||
: "";
|
: "";
|
||||||
|
|
||||||
|
const hw = s.discovered[mac];
|
||||||
const installedInfo: InstalledInfo = {
|
const installedInfo: InstalledInfo = {
|
||||||
hostname: cfg?.hostname ?? "?",
|
hostname: cfg?.hostname ?? "?",
|
||||||
role: cfg?.role ?? "?",
|
role: cfg?.role ?? "?",
|
||||||
...(cfg?.os !== undefined ? { os: cfg.os } : {}),
|
...(cfg?.os !== undefined ? { os: cfg.os } : {}),
|
||||||
ip,
|
ip,
|
||||||
installed_at: new Date().toISOString(),
|
installed_at: new Date().toISOString(),
|
||||||
|
// Preserve hardware info from discovery
|
||||||
|
...(hw ? {
|
||||||
|
product: hw.product,
|
||||||
|
manufacturer: hw.manufacturer,
|
||||||
|
cpu_model: hw.cpu_model,
|
||||||
|
cpu_cores: hw.cpu_cores,
|
||||||
|
memory_gb: hw.memory_gb,
|
||||||
|
arch: hw.arch,
|
||||||
|
} : {}),
|
||||||
};
|
};
|
||||||
s.installed[mac] = installedInfo;
|
s.installed[mac] = installedInfo;
|
||||||
|
|
||||||
@@ -359,6 +369,23 @@ export function registerApiRoutes(
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Simple machine state query (used by ks-auto for ISO boot dispatch)
|
||||||
|
app.get<{
|
||||||
|
Params: { mac: string };
|
||||||
|
}>("/api/machine-state/:mac", async (request, reply) => {
|
||||||
|
const mac = request.params.mac.toLowerCase().replace(/-/g, ":");
|
||||||
|
const currentState = state.load();
|
||||||
|
|
||||||
|
if (currentState.debug[mac]) return reply.send("debug");
|
||||||
|
if (currentState.install_queue[mac]) {
|
||||||
|
const progress = currentState.install_queue[mac].progress;
|
||||||
|
return reply.send(progress ? "installing" : "queued");
|
||||||
|
}
|
||||||
|
if (currentState.installed[mac]) return reply.send("installed");
|
||||||
|
if (currentState.discovered[mac]) return reply.send("discovered");
|
||||||
|
return reply.send("unknown");
|
||||||
|
});
|
||||||
|
|
||||||
// Update a machine's role (e.g. promote infra -> labcontroller)
|
// Update a machine's role (e.g. promote infra -> labcontroller)
|
||||||
app.post<{
|
app.post<{
|
||||||
Body: {
|
Body: {
|
||||||
|
|||||||
@@ -102,7 +102,8 @@ echo " - Standard Asahi boot infrastructure (m1n1 + U-Boot)"
|
|||||||
echo " - Fedora Asahi Remix root partition"
|
echo " - Fedora Asahi Remix root partition"
|
||||||
echo " - LVM data partition (remaining space)"
|
echo " - LVM data partition (remaining space)"
|
||||||
echo ""
|
echo ""
|
||||||
echo " On first boot, LVM volumes are created automatically."
|
echo " After first boot, SSH in and set up LVM:"
|
||||||
|
echo " ssh lab@<ip> 'curl -sf \${BASTION}/asahi/firstboot.sh | sudo bash'"
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
# Run the installer
|
# Run the installer
|
||||||
@@ -150,10 +151,10 @@ fi
|
|||||||
app.get<{
|
app.get<{
|
||||||
Querystring: { hostname?: string; role?: string; mac?: string; user?: string };
|
Querystring: { hostname?: string; role?: string; mac?: string; user?: string };
|
||||||
}>("/asahi/firstboot.sh", async (request, reply) => {
|
}>("/asahi/firstboot.sh", async (request, reply) => {
|
||||||
const hostname = request.query.hostname ?? "mac-studio";
|
const hostname = request.query.hostname ?? "unknown";
|
||||||
const role = (request.query.role ?? "infra") as Role;
|
const role = (request.query.role ?? "infra") as Role;
|
||||||
const mac = request.query.mac ?? "unknown";
|
const mac = request.query.mac ?? "unknown";
|
||||||
const user = request.query.user ?? config.adminUser;
|
const user = request.query.user ?? "lab";
|
||||||
|
|
||||||
const script = renderFirstbootScript({
|
const script = renderFirstbootScript({
|
||||||
hostname,
|
hostname,
|
||||||
|
|||||||
@@ -137,7 +137,7 @@ function generateIso(config: BastionConfig, outputPath: string): void {
|
|||||||
"# Map iPXE arch names to Fedora mirror paths (arm64 -> aarch64)",
|
"# Map iPXE arch names to Fedora mirror paths (arm64 -> aarch64)",
|
||||||
"set fedarch ${buildarch}",
|
"set fedarch ${buildarch}",
|
||||||
"iseq ${buildarch} arm64 && set fedarch aarch64 ||",
|
"iseq ${buildarch} arm64 && set fedarch aarch64 ||",
|
||||||
`kernel file:/vmlinuz-\${buildarch} inst.ks=${bastionUrl}/discover.ks inst.repo=${FEDORA_MIRROR_BASE}/${config.fedoraVersion}/Everything/\${fedarch}/os inst.text || goto no_kernel`,
|
`kernel file:/vmlinuz-\${buildarch} inst.ks=${bastionUrl}/ks-auto inst.repo=${FEDORA_MIRROR_BASE}/${config.fedoraVersion}/Everything/\${fedarch}/os inst.text || goto no_kernel`,
|
||||||
`initrd file:/initrd-\${buildarch} || goto no_kernel`,
|
`initrd file:/initrd-\${buildarch} || goto no_kernel`,
|
||||||
"boot || shell",
|
"boot || shell",
|
||||||
"",
|
"",
|
||||||
|
|||||||
@@ -41,6 +41,150 @@ export function registerKickstartRoutes(
|
|||||||
return reply.type("text/plain").send(ks);
|
return reply.type("text/plain").send(ks);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Auto-detecting kickstart for ISO boot (no-network machines like R1 ARM).
|
||||||
|
// %pre detects MAC, queries bastion state, writes dynamic kickstart to /tmp.
|
||||||
|
// Main body %include's it — so Anaconda gets either discover or install content.
|
||||||
|
app.get("/ks-auto", async (_request, reply) => {
|
||||||
|
const bastionUrl = `http://${config.serverIp}:${config.httpPort}`;
|
||||||
|
|
||||||
|
const ks = `# Lab Bastion -- Auto-detect kickstart (ISO boot)
|
||||||
|
# %pre detects MAC, queries bastion state, writes /tmp/dynamic.ks.
|
||||||
|
# Main body %include's it to get either discovery reboot or full install.
|
||||||
|
|
||||||
|
%pre --erroronfail --log=/tmp/ks-auto.log
|
||||||
|
#!/bin/bash
|
||||||
|
set -x
|
||||||
|
|
||||||
|
# -- Detect MAC address --
|
||||||
|
MAC=$(ip link show | awk '/ether/ && !/00:00:00:00/ {print $2; exit}')
|
||||||
|
echo "Detected MAC: $MAC"
|
||||||
|
|
||||||
|
# -- Wait for network (Linux drivers may take a moment) --
|
||||||
|
for i in $(seq 1 30); do
|
||||||
|
if curl -sf "${bastionUrl}/healthz" >/dev/null 2>&1; then
|
||||||
|
echo "Bastion reachable at ${bastionUrl}"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
echo "Waiting for network... ($i/30)"
|
||||||
|
sleep 2
|
||||||
|
done
|
||||||
|
|
||||||
|
# -- Query bastion for machine state --
|
||||||
|
STATE=$(curl -sf "${bastionUrl}/api/machine-state/$MAC" 2>/dev/null || echo "unknown")
|
||||||
|
echo "Machine state: $STATE"
|
||||||
|
|
||||||
|
case "$STATE" in
|
||||||
|
queued|installing)
|
||||||
|
echo "=== Machine queued for install. Fetching install kickstart... ==="
|
||||||
|
curl -sf "${bastionUrl}/ks?mac=$MAC" > /tmp/dynamic.ks
|
||||||
|
if [ -s /tmp/dynamic.ks ]; then
|
||||||
|
echo "Install kickstart downloaded ($(wc -l < /tmp/dynamic.ks) lines)"
|
||||||
|
else
|
||||||
|
echo "ERROR: Failed to download install kickstart"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Run any %pre scripts from the downloaded kickstart.
|
||||||
|
# Anaconda only runs %pre from the top-level file, not from %include'd files.
|
||||||
|
python3 -c "
|
||||||
|
import re, subprocess
|
||||||
|
content = open('/tmp/dynamic.ks').read()
|
||||||
|
blocks = re.findall(r'%pre[^\\n]*\\n(.*?)%end', content, re.DOTALL)
|
||||||
|
for i, script in enumerate(blocks):
|
||||||
|
path = f'/tmp/inner-pre-{i}.sh'
|
||||||
|
with open(path, 'w') as f:
|
||||||
|
f.write(script)
|
||||||
|
print(f'Running inner %pre script {i} ({len(script.splitlines())} lines)')
|
||||||
|
subprocess.run(['bash', path], check=False)
|
||||||
|
"
|
||||||
|
;;
|
||||||
|
|
||||||
|
debug)
|
||||||
|
echo "=== Debug mode ==="
|
||||||
|
curl -sf "${bastionUrl}/debug.ks?mac=$MAC" > /tmp/dynamic.ks 2>/dev/null
|
||||||
|
if [ ! -s /tmp/dynamic.ks ]; then
|
||||||
|
echo "rescue" > /tmp/dynamic.ks
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
|
*)
|
||||||
|
echo "=== Running hardware discovery ==="
|
||||||
|
# Collect hardware info
|
||||||
|
PRODUCT=$(cat /sys/class/dmi/id/product_name 2>/dev/null || echo "unknown")
|
||||||
|
BOARD=$(cat /sys/class/dmi/id/board_name 2>/dev/null || echo "unknown")
|
||||||
|
SERIAL=$(cat /sys/class/dmi/id/product_serial 2>/dev/null || echo "unknown")
|
||||||
|
MANUFACTURER=$(cat /sys/class/dmi/id/sys_vendor 2>/dev/null || echo "unknown")
|
||||||
|
CPUMODEL=$(grep -m1 'model name' /proc/cpuinfo | cut -d: -f2 | sed 's/^ //')
|
||||||
|
CPUCORES=$(grep -c '^processor' /proc/cpuinfo)
|
||||||
|
MEMGB=$(awk '/MemTotal/ {printf "%d", $2/1024/1024}' /proc/meminfo)
|
||||||
|
ARCHTYPE=$(uname -m)
|
||||||
|
|
||||||
|
DISKS_JSON=$(lsblk -Jb -o NAME,SIZE,TYPE,MODEL 2>/dev/null | python3 -c "
|
||||||
|
import sys, json
|
||||||
|
data = json.load(sys.stdin)
|
||||||
|
disks = [d for d in data.get('blockdevices', []) if d.get('type') == 'disk']
|
||||||
|
result = []
|
||||||
|
for d in disks:
|
||||||
|
size_gb = round(int(d.get('size', 0)) / 1073741824, 1)
|
||||||
|
result.append({'name': d.get('name', '?'), 'size_gb': size_gb, 'model': (d.get('model') or 'unknown').strip()})
|
||||||
|
print(json.dumps(result))
|
||||||
|
" 2>/dev/null || echo '[]')
|
||||||
|
|
||||||
|
NICS_JSON=$(ip -j link show 2>/dev/null | python3 -c "
|
||||||
|
import sys, json
|
||||||
|
nics = json.load(sys.stdin)
|
||||||
|
result = []
|
||||||
|
for n in nics:
|
||||||
|
if n.get('link_type') == 'loopback': continue
|
||||||
|
result.append({'name': n.get('ifname', '?'), 'mac': n.get('address', '?'), 'state': n.get('operstate', '?')})
|
||||||
|
print(json.dumps(result))
|
||||||
|
" 2>/dev/null || echo '[]')
|
||||||
|
|
||||||
|
PAYLOAD=$(python3 -c "
|
||||||
|
import json
|
||||||
|
print(json.dumps({
|
||||||
|
'mac': '$MAC', 'product': '$PRODUCT', 'board': '$BOARD', 'serial': '$SERIAL',
|
||||||
|
'manufacturer': '$MANUFACTURER', 'cpu_model': '$CPUMODEL',
|
||||||
|
'cpu_cores': int('$CPUCORES' or 0), 'memory_gb': int('$MEMGB' or 0),
|
||||||
|
'arch': '$ARCHTYPE', 'disks': $DISKS_JSON, 'nics': $NICS_JSON
|
||||||
|
}))
|
||||||
|
")
|
||||||
|
|
||||||
|
curl -sf -X POST "${bastionUrl}/api/discover" \\
|
||||||
|
-H "Content-Type: application/json" \\
|
||||||
|
-d "$PAYLOAD" || true
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "=== Discovery complete ==="
|
||||||
|
echo "Machine MAC: $MAC"
|
||||||
|
echo "Queue for install: labctl provision install $MAC <hostname> --role infra"
|
||||||
|
echo "Then reboot to start installation."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Write a minimal kickstart that just reboots
|
||||||
|
cat > /tmp/dynamic.ks << 'DISCOVER_KS'
|
||||||
|
# Discovery mode -- reboot to allow install queue
|
||||||
|
reboot
|
||||||
|
DISCOVER_KS
|
||||||
|
|
||||||
|
# Force reboot now (don't wait for Anaconda)
|
||||||
|
sleep 3
|
||||||
|
echo 1 > /proc/sys/kernel/sysrq
|
||||||
|
echo b > /proc/sysrq-trigger
|
||||||
|
sleep 5
|
||||||
|
reboot -f
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
%end
|
||||||
|
|
||||||
|
# Include the dynamically chosen kickstart
|
||||||
|
%include /tmp/dynamic.ks
|
||||||
|
`;
|
||||||
|
|
||||||
|
return reply.type("text/plain").send(ks);
|
||||||
|
});
|
||||||
|
|
||||||
// Ubuntu autoinstall user-data (cloud-init)
|
// Ubuntu autoinstall user-data (cloud-init)
|
||||||
app.get<{ Params: { mac: string } }>("/autoinstall/:mac/user-data", async (request, reply) => {
|
app.get<{ Params: { mac: string } }>("/autoinstall/:mac/user-data", async (request, reply) => {
|
||||||
const mac = request.params.mac.toLowerCase().replace(/-/g, ":");
|
const mac = request.params.mac.toLowerCase().replace(/-/g, ":");
|
||||||
|
|||||||
@@ -166,6 +166,7 @@ export class BastionConnection {
|
|||||||
case "command-role-update":
|
case "command-role-update":
|
||||||
case "command-debug":
|
case "command-debug":
|
||||||
case "command-register":
|
case "command-register":
|
||||||
|
case "command-discover":
|
||||||
void this.handleCommand(msg);
|
void this.handleCommand(msg);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -111,6 +111,29 @@ mount_lv() {
|
|||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# ── Write fstab function (idempotent) ────────────────────────────
|
||||||
|
write_lab_fstab() {
|
||||||
|
# Remove any previous lab LVM entries (clean slate)
|
||||||
|
sed -i '/# lab-lvm:/d' /etc/fstab
|
||||||
|
sed -i '/# Lab LVM volumes/d' /etc/fstab
|
||||||
|
grep -v "/dev/labvg/" /etc/fstab > /etc/fstab.tmp && mv /etc/fstab.tmp /etc/fstab
|
||||||
|
# Comment out non-LVM entries for mount points we manage
|
||||||
|
for mp in "/var " "/var/log " "/home " "/srv "; do
|
||||||
|
if grep -q "$mp" /etc/fstab; then
|
||||||
|
awk -v m="$mp" '{if($0 !~ /^#/ && index($0,m)) print "# lab-lvm: " $0; else print}' /etc/fstab > /etc/fstab.tmp
|
||||||
|
mv /etc/fstab.tmp /etc/fstab
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
# Add fresh LVM entries
|
||||||
|
echo "# Lab LVM volumes" >> /etc/fstab
|
||||||
|
echo "/dev/labvg/swap none swap defaults 0 0" >> /etc/fstab
|
||||||
|
echo "/dev/labvg/var /var xfs defaults 0 0" >> /etc/fstab
|
||||||
|
echo "/dev/labvg/varlog /var/log xfs defaults 0 0" >> /etc/fstab
|
||||||
|
echo "/dev/labvg/home /home xfs defaults 0 0" >> /etc/fstab
|
||||||
|
echo "/dev/labvg/srv /srv xfs defaults 0 0" >> /etc/fstab
|
||||||
|
${roleFstabLines.join('\n ')}
|
||||||
|
}
|
||||||
|
|
||||||
# ── Check for existing VG ────────────────────────────────────────
|
# ── Check for existing VG ────────────────────────────────────────
|
||||||
if vgs labvg &>/dev/null; then
|
if vgs labvg &>/dev/null; then
|
||||||
echo "Volume group 'labvg' already exists — reprovision detected."
|
echo "Volume group 'labvg' already exists — reprovision detected."
|
||||||
@@ -129,22 +152,11 @@ ${roleMountLines.map(l => ` ${l}`).join('\n')}
|
|||||||
echo " Enabled swap"
|
echo " Enabled swap"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Ensure fstab entries exist
|
# Ensure fstab entries exist — comment out conflicting btrfs subvol entries
|
||||||
grep -q "labvg" /etc/fstab || {
|
write_lab_fstab
|
||||||
echo "# Lab LVM volumes (re-added after reprovision)" >> /etc/fstab
|
|
||||||
echo "/dev/labvg/swap none swap defaults 0 0" >> /etc/fstab
|
|
||||||
echo "/dev/labvg/var /var xfs defaults 0 0" >> /etc/fstab
|
|
||||||
echo "/dev/labvg/varlog /var/log xfs defaults 0 0" >> /etc/fstab
|
|
||||||
echo "/dev/labvg/home /home xfs defaults 0 0" >> /etc/fstab
|
|
||||||
echo "/dev/labvg/srv /srv xfs defaults 0 0" >> /etc/fstab
|
|
||||||
${roleFstabLines.map(l => ` ${l}`).join('\n')}
|
|
||||||
}
|
|
||||||
|
|
||||||
echo "Existing LVM volumes re-mounted."
|
echo "Existing LVM volumes re-mounted."
|
||||||
touch "$MARKER"
|
else
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
# ── Fresh install: create LVM ────────────────────────────────────
|
# ── Fresh install: create LVM ────────────────────────────────────
|
||||||
echo "Creating LVM on $DATA_PART..."
|
echo "Creating LVM on $DATA_PART..."
|
||||||
|
|
||||||
@@ -210,34 +222,39 @@ echo "NOTE: /var and /var/log will switch to LVM on next reboot."
|
|||||||
# Enable swap
|
# Enable swap
|
||||||
swapon /dev/labvg/swap 2>/dev/null || true
|
swapon /dev/labvg/swap 2>/dev/null || true
|
||||||
|
|
||||||
# Write fstab entries
|
write_lab_fstab
|
||||||
echo "" >> /etc/fstab
|
|
||||||
echo "# Lab LVM volumes" >> /etc/fstab
|
|
||||||
echo "/dev/labvg/swap none swap defaults 0 0" >> /etc/fstab
|
|
||||||
echo "/dev/labvg/var /var xfs defaults 0 0" >> /etc/fstab
|
|
||||||
echo "/dev/labvg/varlog /var/log xfs defaults 0 0" >> /etc/fstab
|
|
||||||
echo "/dev/labvg/home /home xfs defaults 0 0" >> /etc/fstab
|
|
||||||
echo "/dev/labvg/srv /srv xfs defaults 0 0" >> /etc/fstab
|
|
||||||
${roleFstabLines.join('\n')}
|
|
||||||
|
|
||||||
echo "LVM setup complete."
|
echo "LVM setup complete."
|
||||||
lvs labvg
|
lvs labvg
|
||||||
|
|
||||||
# ── Set hostname ─────────────────────────────────────────────────
|
fi # end if/else for reprovision vs fresh install
|
||||||
hostnamectl set-hostname "${hostname}"
|
|
||||||
|
# ── Set hostname (use configured value, or keep existing) ────────
|
||||||
|
CONF_HOSTNAME="${hostname}"
|
||||||
|
if [ "$CONF_HOSTNAME" != "unknown" ] && [ -n "$CONF_HOSTNAME" ]; then
|
||||||
|
hostnamectl set-hostname "$CONF_HOSTNAME"
|
||||||
|
fi
|
||||||
|
ACTUAL_HOSTNAME=$(hostname)
|
||||||
|
|
||||||
|
# ── Detect MAC address ───────────────────────────────────────────
|
||||||
|
CONF_MAC="${mac}"
|
||||||
|
if [ "$CONF_MAC" = "unknown" ] || [ -z "$CONF_MAC" ]; then
|
||||||
|
CONF_MAC=$(ip -o link show | grep -v "lo:" | grep "state UP" | head -1 | grep -oP 'link/ether \\K[^ ]+' || echo "unknown")
|
||||||
|
fi
|
||||||
|
|
||||||
# ── Configure admin user ─────────────────────────────────────────
|
# ── Configure admin user ─────────────────────────────────────────
|
||||||
if ! id "${adminUser}" &>/dev/null; then
|
ADMIN="${adminUser}"
|
||||||
useradd -m -G wheel "${adminUser}"
|
if ! id "$ADMIN" &>/dev/null; then
|
||||||
echo "${adminUser} ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/${adminUser}
|
useradd -m -G wheel "$ADMIN"
|
||||||
chmod 440 /etc/sudoers.d/${adminUser}
|
echo "$ADMIN ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/$ADMIN
|
||||||
|
chmod 440 /etc/sudoers.d/$ADMIN
|
||||||
fi
|
fi
|
||||||
ADMIN_SSH="/home/${adminUser}/.ssh"
|
ADMIN_SSH="/home/$ADMIN/.ssh"
|
||||||
mkdir -p "$ADMIN_SSH"
|
mkdir -p "$ADMIN_SSH"
|
||||||
chmod 700 "$ADMIN_SSH"
|
chmod 700 "$ADMIN_SSH"
|
||||||
${sshKeyBlock}
|
${sshKeyBlock}
|
||||||
chmod 600 "$ADMIN_SSH/authorized_keys"
|
chmod 600 "$ADMIN_SSH/authorized_keys"
|
||||||
chown -R ${adminUser}:${adminUser} "$ADMIN_SSH"
|
chown -R $ADMIN:$ADMIN "$ADMIN_SSH"
|
||||||
|
|
||||||
# Also authorize root
|
# Also authorize root
|
||||||
mkdir -p /root/.ssh
|
mkdir -p /root/.ssh
|
||||||
@@ -246,14 +263,14 @@ ${rootSshKeyBlock}
|
|||||||
chmod 600 /root/.ssh/authorized_keys
|
chmod 600 /root/.ssh/authorized_keys
|
||||||
|
|
||||||
# ── Harden SSH (takes effect on next sshd restart/reboot) ────────
|
# ── Harden SSH (takes effect on next sshd restart/reboot) ────────
|
||||||
sed -i 's/^#\\?PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config
|
sed -i 's/^#*PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config
|
||||||
sed -i 's/^#\\?PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
|
sed -i 's/^#*PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
|
||||||
|
|
||||||
# ── Write provisioning metadata ──────────────────────────────────
|
# ── Write provisioning metadata ──────────────────────────────────
|
||||||
cat > /etc/lab-provisioned << LABMETA
|
cat > /etc/lab-provisioned << LABMETA
|
||||||
hostname=${hostname}
|
hostname=$ACTUAL_HOSTNAME
|
||||||
role=${role}
|
role=${role}
|
||||||
mac=${mac}
|
mac=$CONF_MAC
|
||||||
provisioned_at=$(date -Iseconds)
|
provisioned_at=$(date -Iseconds)
|
||||||
method=asahi-firstboot
|
method=asahi-firstboot
|
||||||
LABMETA
|
LABMETA
|
||||||
@@ -263,9 +280,9 @@ IP=$(hostname -I | awk '{print $1}')
|
|||||||
echo "Registering with bastion at ${serverIp}:${httpPort}..."
|
echo "Registering with bastion at ${serverIp}:${httpPort}..."
|
||||||
curl -sf -X POST "http://${serverIp}:${httpPort}/api/register" \\
|
curl -sf -X POST "http://${serverIp}:${httpPort}/api/register" \\
|
||||||
-H "Content-Type: application/json" \\
|
-H "Content-Type: application/json" \\
|
||||||
-d "{\\"mac\\":\\"${mac}\\",\\"hostname\\":\\"${hostname}\\",\\"role\\":\\"${role}\\",\\"ip\\":\\"$IP\\"}" \\
|
-d "{\\"mac\\":\\"$CONF_MAC\\",\\"hostname\\":\\"$ACTUAL_HOSTNAME\\",\\"role\\":\\"${role}\\",\\"ip\\":\\"$IP\\"}" \\
|
||||||
2>/dev/null && echo " Registered as ${hostname} ($IP)" \\
|
2>/dev/null && echo " Registered as $ACTUAL_HOSTNAME ($IP)" \\
|
||||||
|| echo " WARNING: Could not reach bastion — register manually with: labctl provision register ${mac} ${hostname} --role ${role} --ip $IP"
|
|| echo " WARNING: Could not reach bastion — register manually with: labctl provision register $CONF_MAC $ACTUAL_HOSTNAME --role ${role} --ip $IP"
|
||||||
|
|
||||||
# ── Mark done ────────────────────────────────────────────────────
|
# ── Mark done ────────────────────────────────────────────────────
|
||||||
touch "$MARKER"
|
touch "$MARKER"
|
||||||
|
|||||||
@@ -184,7 +184,8 @@ describe("renderFirstbootScript", () => {
|
|||||||
|
|
||||||
it("sets hostname", () => {
|
it("sets hostname", () => {
|
||||||
const script = renderFirstbootScript({ ...baseParams, role: "worker" });
|
const script = renderFirstbootScript({ ...baseParams, role: "worker" });
|
||||||
expect(script).toContain('hostnamectl set-hostname "test-node"');
|
expect(script).toContain('CONF_HOSTNAME="test-node"');
|
||||||
|
expect(script).toContain("hostnamectl set-hostname");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("includes bastion self-registration", () => {
|
it("includes bastion self-registration", () => {
|
||||||
|
|||||||
@@ -104,6 +104,16 @@ export class LabdClient {
|
|||||||
return this.request("POST", "/api/machines/debug", { body: { mac, pxeBoot: opts?.pxeBoot } });
|
return this.request("POST", "/api/machines/debug", { body: { mac, pxeBoot: opts?.pxeBoot } });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async discoverMachine(data: {
|
||||||
|
mac: string; product?: string; board?: string; serial?: string;
|
||||||
|
manufacturer?: string; cpu_model?: string; cpu_cores?: number;
|
||||||
|
memory_gb?: number; arch?: string;
|
||||||
|
disks?: Array<{ name: string; size_gb: number; model: string }>;
|
||||||
|
nics?: Array<{ name: string; mac: string; state: string }>;
|
||||||
|
}): Promise<{ status: string; error?: string }> {
|
||||||
|
return this.request("POST", "/api/machines/discover", { body: data });
|
||||||
|
}
|
||||||
|
|
||||||
async forgetMachine(mac: string): Promise<{ status: string }> {
|
async forgetMachine(mac: string): Promise<{ status: string }> {
|
||||||
return this.request("DELETE", `/api/machines/${encodeURIComponent(mac)}`);
|
return this.request("DELETE", `/api/machines/${encodeURIComponent(mac)}`);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -70,7 +70,7 @@ export function registerAppCommand(program: Command): void {
|
|||||||
.command("install <target>")
|
.command("install <target>")
|
||||||
.description("Install k3s on a target machine (hostname, IP, or MAC)")
|
.description("Install k3s on a target machine (hostname, IP, or MAC)")
|
||||||
.option("--role <role>", "k3s role: infra (server) or worker (agent)", "infra")
|
.option("--role <role>", "k3s role: infra (server) or worker (agent)", "infra")
|
||||||
.option("--user <user>", "SSH user", "lab")
|
.option("--user <user>", "SSH user", "root")
|
||||||
.option("--k3s-server <url>", "k3s server URL (required for worker role)")
|
.option("--k3s-server <url>", "k3s server URL (required for worker role)")
|
||||||
.option("--k3s-token <token>", "k3s join token (required for worker role)")
|
.option("--k3s-token <token>", "k3s join token (required for worker role)")
|
||||||
.action(async (target: string, opts: {
|
.action(async (target: string, opts: {
|
||||||
@@ -164,7 +164,7 @@ export function registerAppCommand(program: Command): void {
|
|||||||
k3sCmd
|
k3sCmd
|
||||||
.command("health [target]")
|
.command("health [target]")
|
||||||
.description("Check k3s health (all hosts if no target given)")
|
.description("Check k3s health (all hosts if no target given)")
|
||||||
.option("--user <user>", "SSH user", "lab")
|
.option("--user <user>", "SSH user", "root")
|
||||||
.action(async (target: string | undefined, opts: { user: string }) => {
|
.action(async (target: string | undefined, opts: { user: string }) => {
|
||||||
const sshKey = findSshKey();
|
const sshKey = findSshKey();
|
||||||
|
|
||||||
@@ -304,7 +304,7 @@ export function registerAppCommand(program: Command): void {
|
|||||||
k3sCmd
|
k3sCmd
|
||||||
.command("list")
|
.command("list")
|
||||||
.description("List installed machines and their k3s status")
|
.description("List installed machines and their k3s status")
|
||||||
.option("--user <user>", "SSH user", "lab")
|
.option("--user <user>", "SSH user", "root")
|
||||||
.action(async (opts: { user: string }) => {
|
.action(async (opts: { user: string }) => {
|
||||||
let state: BastionState;
|
let state: BastionState;
|
||||||
try {
|
try {
|
||||||
|
|||||||
@@ -59,9 +59,9 @@ export function registerAsahiCommand(parent: Command): void {
|
|||||||
console.log(` labvg/longhorn (remaining space)${RESET}`);
|
console.log(` labvg/longhorn (remaining space)${RESET}`);
|
||||||
console.log("");
|
console.log("");
|
||||||
console.log(` After first boot, SSH in and run the firstboot script:`);
|
console.log(` After first boot, SSH in and run the firstboot script:`);
|
||||||
console.log(` ${BOLD}ssh root@<ip> 'curl -sf ${bastionUrl}/asahi/firstboot.sh?hostname=<name>\\&role=infra | bash'${RESET}`);
|
console.log(` ${BOLD}ssh root@<ip> 'curl -sf ${bastionUrl}/asahi/firstboot.sh | bash'${RESET}`);
|
||||||
console.log("");
|
console.log("");
|
||||||
console.log(` This sets up LVM and self-registers with the bastion.`);
|
console.log(` This sets up LVM, detects hostname/MAC, and self-registers.`);
|
||||||
console.log(` Then install k3s:`);
|
console.log(` Then install k3s:`);
|
||||||
console.log(` ${BOLD}labctl app k3s install <hostname> --role infra${RESET}`);
|
console.log(` ${BOLD}labctl app k3s install <hostname> --role infra${RESET}`);
|
||||||
console.log("");
|
console.log("");
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ export function registerLabcontrollerCommands(appCmd: Command): void {
|
|||||||
lcCmd
|
lcCmd
|
||||||
.command("deploy <target>")
|
.command("deploy <target>")
|
||||||
.description("Deploy labcontroller stack to a k3s node")
|
.description("Deploy labcontroller stack to a k3s node")
|
||||||
.option("--user <user>", "SSH user", "lab")
|
.option("--user <user>", "SSH user", "root")
|
||||||
.option("--crdb-replicas <n>", "CockroachDB replicas", "1")
|
.option("--crdb-replicas <n>", "CockroachDB replicas", "1")
|
||||||
.action(async (target: string, opts: {
|
.action(async (target: string, opts: {
|
||||||
user: string;
|
user: string;
|
||||||
@@ -193,7 +193,7 @@ export function registerLabcontrollerCommands(appCmd: Command): void {
|
|||||||
lcCmd
|
lcCmd
|
||||||
.command("status [target]")
|
.command("status [target]")
|
||||||
.description("Check labcontroller deployment status (all hosts if no target)")
|
.description("Check labcontroller deployment status (all hosts if no target)")
|
||||||
.option("--user <user>", "SSH user", "lab")
|
.option("--user <user>", "SSH user", "root")
|
||||||
.action(async (target: string | undefined, opts: { user: string }) => {
|
.action(async (target: string | undefined, opts: { user: string }) => {
|
||||||
const sshKey = findSshKey();
|
const sshKey = findSshKey();
|
||||||
const sshOpts = sshKey ? { keyPath: sshKey } : {};
|
const sshOpts = sshKey ? { keyPath: sshKey } : {};
|
||||||
|
|||||||
@@ -69,10 +69,10 @@ export function registerListCommand(parent: Command): void {
|
|||||||
const hostname = inst?.hostname ?? queued?.hostname ?? "-";
|
const hostname = inst?.hostname ?? queued?.hostname ?? "-";
|
||||||
const role = inst?.role ?? queued?.role ?? "-";
|
const role = inst?.role ?? queued?.role ?? "-";
|
||||||
const ip = inst?.ip ?? "-";
|
const ip = inst?.ip ?? "-";
|
||||||
const cpu = hw?.cpu_model ?? "-";
|
const cpu = hw?.cpu_model ?? inst?.cpu_model ?? "-";
|
||||||
const cores = hw?.cpu_cores != null ? String(hw.cpu_cores) : "-";
|
const cores = (hw?.cpu_cores ?? inst?.cpu_cores) != null ? String(hw?.cpu_cores ?? inst?.cpu_cores) : "-";
|
||||||
const ram = hw?.memory_gb != null ? `${hw.memory_gb}GB` : "-";
|
const ram = (hw?.memory_gb ?? inst?.memory_gb) != null ? `${hw?.memory_gb ?? inst?.memory_gb}GB` : "-";
|
||||||
const product = hw?.product ?? "-";
|
const product = hw?.product ?? inst?.product ?? "-";
|
||||||
|
|
||||||
const color = statusColor(status);
|
const color = statusColor(status);
|
||||||
|
|
||||||
|
|||||||
94
bastion/src/cli/src/commands/recheck.ts
Normal file
94
bastion/src/cli/src/commands/recheck.ts
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
// CLI command: provision recheck
|
||||||
|
// SSH into all installed machines, collect hardware info, update bastion state.
|
||||||
|
|
||||||
|
import type { Command } from "commander";
|
||||||
|
import { sshExec } from "@lab/modules";
|
||||||
|
import { getLabdClient } from "../api/config.js";
|
||||||
|
|
||||||
|
const BOLD = "\x1b[1m";
|
||||||
|
const GREEN = "\x1b[0;32m";
|
||||||
|
const RED = "\x1b[0;31m";
|
||||||
|
const DIM = "\x1b[2m";
|
||||||
|
const RESET = "\x1b[0m";
|
||||||
|
|
||||||
|
const SSH_OPTS = { timeoutMs: 30_000 };
|
||||||
|
|
||||||
|
// Shell script that collects hardware info as JSON.
|
||||||
|
// Kept simple — no Python, pure shell + awk.
|
||||||
|
const HW_COLLECT_SCRIPT = [
|
||||||
|
'P=$(cat /sys/class/dmi/id/product_name 2>/dev/null || echo unknown)',
|
||||||
|
'B=$(cat /sys/class/dmi/id/board_name 2>/dev/null || echo unknown)',
|
||||||
|
'S=$(cat /sys/class/dmi/id/product_serial 2>/dev/null || echo unknown)',
|
||||||
|
'M=$(cat /sys/class/dmi/id/sys_vendor 2>/dev/null || echo unknown)',
|
||||||
|
'C=$(grep -m1 "model name" /proc/cpuinfo 2>/dev/null | cut -d: -f2 | sed "s/^ //" || grep -m1 Model /proc/cpuinfo 2>/dev/null | cut -d: -f2 | sed "s/^ //" || echo unknown)',
|
||||||
|
'N=$(grep -c "^processor" /proc/cpuinfo 2>/dev/null || echo 0)',
|
||||||
|
'R=$(awk "/MemTotal/ {printf \\"%d\\", \\$2/1024/1024}" /proc/meminfo 2>/dev/null || echo 0)',
|
||||||
|
'A=$(uname -m)',
|
||||||
|
'printf \'{"product":"%s","board":"%s","serial":"%s","manufacturer":"%s","cpu_model":"%s","cpu_cores":%s,"memory_gb":%s,"arch":"%s"}\\n\' "$P" "$B" "$S" "$M" "$C" "$N" "$R" "$A"',
|
||||||
|
].join("; ");
|
||||||
|
|
||||||
|
export function registerRecheckCommand(parent: Command): void {
|
||||||
|
parent
|
||||||
|
.command("recheck")
|
||||||
|
.description("Refresh hardware info for all installed machines via SSH")
|
||||||
|
.option("--user <user>", "SSH user", "root")
|
||||||
|
.option("--target <hostname>", "Only recheck a specific machine (by hostname or MAC)")
|
||||||
|
.action(async (opts: { user: string; target?: string }) => {
|
||||||
|
const client = getLabdClient();
|
||||||
|
let state;
|
||||||
|
try {
|
||||||
|
state = await client.getMachines();
|
||||||
|
} catch (err) {
|
||||||
|
console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build list of machines to check
|
||||||
|
const targets: Array<{ mac: string; hostname: string; ip: string }> = [];
|
||||||
|
for (const [mac, info] of Object.entries(state.installed)) {
|
||||||
|
if (!info.ip) continue;
|
||||||
|
if (opts.target && info.hostname !== opts.target && mac !== opts.target) continue;
|
||||||
|
targets.push({ mac, hostname: info.hostname, ip: info.ip });
|
||||||
|
}
|
||||||
|
|
||||||
|
if (targets.length === 0) {
|
||||||
|
console.log("No installed machines with IPs to check.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`\n${BOLD}Rechecking ${targets.length} machine(s)...${RESET}\n`);
|
||||||
|
|
||||||
|
let updated = 0;
|
||||||
|
let failed = 0;
|
||||||
|
|
||||||
|
for (const { mac, hostname, ip } of targets) {
|
||||||
|
process.stdout.write(` ${hostname.padEnd(24)} ${DIM}(${ip})${RESET} `);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const t0 = Date.now();
|
||||||
|
const result = await sshExec(ip, opts.user, HW_COLLECT_SCRIPT, SSH_OPTS);
|
||||||
|
const elapsed = Date.now() - t0;
|
||||||
|
if (result.exitCode !== 0) {
|
||||||
|
console.log(`${RED}SSH failed (exit ${result.exitCode}, ${elapsed}ms)${RESET}`);
|
||||||
|
if (result.stderr) console.log(` ${DIM}${result.stderr.substring(0, 200)}${RESET}`);
|
||||||
|
console.log(`${RED}SSH failed (exit ${result.exitCode})${RESET}`);
|
||||||
|
failed++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const hwData = JSON.parse(result.stdout.trim());
|
||||||
|
await client.discoverMachine({ mac, ...hwData });
|
||||||
|
const cpu = hwData.cpu_model || "?";
|
||||||
|
const cores = hwData.cpu_cores || "?";
|
||||||
|
const mem = hwData.memory_gb || "?";
|
||||||
|
console.log(`${GREEN}OK${RESET} ${DIM}${cpu}, ${cores} cores, ${mem}GB${RESET}`);
|
||||||
|
updated++;
|
||||||
|
} catch (err) {
|
||||||
|
console.log(`${RED}FAIL${RESET} ${DIM}${err instanceof Error ? err.message : String(err)}${RESET}`);
|
||||||
|
failed++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`\n${BOLD}Done:${RESET} ${updated} updated, ${failed} failed\n`);
|
||||||
|
});
|
||||||
|
}
|
||||||
@@ -11,7 +11,7 @@ export function registerStartCommand(parent: Command): void {
|
|||||||
.command("start")
|
.command("start")
|
||||||
.description("Start the bastion server (HTTP + dnsmasq PXE)")
|
.description("Start the bastion server (HTTP + dnsmasq PXE)")
|
||||||
.option("--port <port>", "HTTP port", "8080")
|
.option("--port <port>", "HTTP port", "8080")
|
||||||
.option("--dir <dir>", "Bastion data directory", "/tmp/lab-bastion")
|
.option("--dir <dir>", "Bastion data directory", process.env["BASTION_DIR"] ?? "/tmp/lab-bastion")
|
||||||
.option("--domain <domain>", "Internal domain for hostnames", "ad.itaz.eu")
|
.option("--domain <domain>", "Internal domain for hostnames", "ad.itaz.eu")
|
||||||
.option("--dhcp-mode <mode>", "DHCP mode: proxy or full", "proxy")
|
.option("--dhcp-mode <mode>", "DHCP mode: proxy or full", "proxy")
|
||||||
.option("--fedora <version>", "Fedora version", "43")
|
.option("--fedora <version>", "Fedora version", "43")
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ export function registerStopCommand(parent: Command): void {
|
|||||||
parent
|
parent
|
||||||
.command("stop")
|
.command("stop")
|
||||||
.description("Stop a running bastion server")
|
.description("Stop a running bastion server")
|
||||||
.option("--dir <dir>", "Bastion data directory", "/tmp/lab-bastion")
|
.option("--dir <dir>", "Bastion data directory", process.env["BASTION_DIR"] ?? "/tmp/lab-bastion")
|
||||||
.action((opts: { dir: string }) => {
|
.action((opts: { dir: string }) => {
|
||||||
const pidFile = `${opts.dir}/bastion.pid`;
|
const pidFile = `${opts.dir}/bastion.pid`;
|
||||||
|
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ import { registerRegisterCommand } from "./commands/register.js";
|
|||||||
import { registerAsahiCommand } from "./commands/asahi.js";
|
import { registerAsahiCommand } from "./commands/asahi.js";
|
||||||
import { registerLogsCommand } from "./commands/logs.js";
|
import { registerLogsCommand } from "./commands/logs.js";
|
||||||
import { registerMakeIsoCommand } from "./commands/makeiso.js";
|
import { registerMakeIsoCommand } from "./commands/makeiso.js";
|
||||||
|
import { registerRecheckCommand } from "./commands/recheck.js";
|
||||||
import { registerConfigCommand } from "./commands/config.js";
|
import { registerConfigCommand } from "./commands/config.js";
|
||||||
import { registerLoginCommand } from "./commands/login.js";
|
import { registerLoginCommand } from "./commands/login.js";
|
||||||
import { registerDoctorCommand } from "./commands/doctor.js";
|
import { registerDoctorCommand } from "./commands/doctor.js";
|
||||||
@@ -104,6 +105,7 @@ export function createProgram(): Command {
|
|||||||
registerAsahiCommand(provisionCmd);
|
registerAsahiCommand(provisionCmd);
|
||||||
registerLogsCommand(provisionCmd);
|
registerLogsCommand(provisionCmd);
|
||||||
registerMakeIsoCommand(provisionCmd);
|
registerMakeIsoCommand(provisionCmd);
|
||||||
|
registerRecheckCommand(provisionCmd);
|
||||||
|
|
||||||
// config list/get/set/path
|
// config list/get/set/path
|
||||||
registerConfigCommand(program);
|
registerConfigCommand(program);
|
||||||
|
|||||||
23
bastion/src/core/package.json
Normal file
23
bastion/src/core/package.json
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
{
|
||||||
|
"name": "@lab/core",
|
||||||
|
"version": "0.1.0",
|
||||||
|
"private": true,
|
||||||
|
"type": "module",
|
||||||
|
"main": "./dist/index.js",
|
||||||
|
"types": "./dist/index.d.ts",
|
||||||
|
"exports": {
|
||||||
|
".": {
|
||||||
|
"import": "./dist/index.js",
|
||||||
|
"types": "./dist/index.d.ts"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"scripts": {
|
||||||
|
"build": "tsc --build",
|
||||||
|
"clean": "rimraf dist",
|
||||||
|
"test": "vitest",
|
||||||
|
"test:run": "vitest run"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"@pulumi/pulumi": "^3.0.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
75
bastion/src/core/src/audit.ts
Normal file
75
bastion/src/core/src/audit.ts
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
// Audit event types for the labctl platform.
|
||||||
|
// Every mutation is tracked with correlation IDs for causal chains.
|
||||||
|
|
||||||
|
export type AuditEventKind =
|
||||||
|
| "resource_created"
|
||||||
|
| "resource_updated"
|
||||||
|
| "resource_deleted"
|
||||||
|
| "resource_state_change"
|
||||||
|
| "plan_generated"
|
||||||
|
| "apply_started"
|
||||||
|
| "apply_step"
|
||||||
|
| "apply_completed"
|
||||||
|
| "driver_translate"
|
||||||
|
| "driver_execute"
|
||||||
|
| "driver_error"
|
||||||
|
| "fleet_discovery"
|
||||||
|
| "fleet_classification"
|
||||||
|
| "fleet_approval"
|
||||||
|
| "fleet_auto_approve"
|
||||||
|
| "pipeline_started"
|
||||||
|
| "pipeline_step_started"
|
||||||
|
| "pipeline_step_completed"
|
||||||
|
| "pipeline_completed"
|
||||||
|
| "deploy_started"
|
||||||
|
| "deploy_completed"
|
||||||
|
| "deploy_failed"
|
||||||
|
| "drift_detected"
|
||||||
|
| "drift_corrected"
|
||||||
|
| "sync_triggered"
|
||||||
|
| "sync_completed"
|
||||||
|
| "auth_login"
|
||||||
|
| "auth_logout"
|
||||||
|
| "auth_bootstrap"
|
||||||
|
| "rbac_decision"
|
||||||
|
| "impersonation"
|
||||||
|
| "server_started"
|
||||||
|
| "controller_started"
|
||||||
|
| "agent_connected"
|
||||||
|
| "agent_disconnected"
|
||||||
|
| "bastion_registered";
|
||||||
|
|
||||||
|
export type AuditSource =
|
||||||
|
| "cli"
|
||||||
|
| "labd"
|
||||||
|
| "agent"
|
||||||
|
| "driver"
|
||||||
|
| "fleet-controller"
|
||||||
|
| "sync-controller";
|
||||||
|
|
||||||
|
export type AuditResult = "success" | "failure" | "denied" | "skipped";
|
||||||
|
|
||||||
|
export interface AuditEvent {
|
||||||
|
id: string;
|
||||||
|
timestamp: Date;
|
||||||
|
eventKind: AuditEventKind;
|
||||||
|
source: AuditSource;
|
||||||
|
verified: boolean;
|
||||||
|
|
||||||
|
userId?: string;
|
||||||
|
userName?: string;
|
||||||
|
sessionId?: string;
|
||||||
|
environmentName?: string;
|
||||||
|
accountName?: string;
|
||||||
|
|
||||||
|
resourceKind?: string;
|
||||||
|
resourceName?: string;
|
||||||
|
|
||||||
|
correlationId: string;
|
||||||
|
parentEventId?: string;
|
||||||
|
|
||||||
|
details: Record<string, unknown>;
|
||||||
|
result: AuditResult;
|
||||||
|
error?: string;
|
||||||
|
durationMs?: number;
|
||||||
|
}
|
||||||
50
bastion/src/core/src/auth.ts
Normal file
50
bastion/src/core/src/auth.ts
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
// Auth types for the labctl platform.
|
||||||
|
// Bearer token auth for CLI/SDK. mTLS stays for agent/bastion.
|
||||||
|
|
||||||
|
export type UserRole = "USER" | "ADMIN";
|
||||||
|
|
||||||
|
export interface User {
|
||||||
|
id: string;
|
||||||
|
email: string;
|
||||||
|
name?: string;
|
||||||
|
role: UserRole;
|
||||||
|
createdAt: Date;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface Session {
|
||||||
|
id: string;
|
||||||
|
userId: string;
|
||||||
|
token: string;
|
||||||
|
expiresAt: Date;
|
||||||
|
createdAt: Date;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface Group {
|
||||||
|
id: string;
|
||||||
|
name: string;
|
||||||
|
description?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export type SubjectKind = "User" | "Group" | "ServiceAccount";
|
||||||
|
|
||||||
|
export interface RoleBinding {
|
||||||
|
role: "view" | "edit" | "create" | "delete" | "run" | "admin";
|
||||||
|
resource: string;
|
||||||
|
name?: string;
|
||||||
|
environment?: string;
|
||||||
|
action?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface RbacSubject {
|
||||||
|
kind: SubjectKind;
|
||||||
|
name: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface RbacDefinition {
|
||||||
|
id: string;
|
||||||
|
name: string;
|
||||||
|
subjects: RbacSubject[];
|
||||||
|
roleBindings: RoleBinding[];
|
||||||
|
createdAt: Date;
|
||||||
|
updatedAt: Date;
|
||||||
|
}
|
||||||
24
bastion/src/core/src/environment.ts
Normal file
24
bastion/src/core/src/environment.ts
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
// Environment and Account types.
|
||||||
|
// An Environment is a logical boundary (production, staging, dev).
|
||||||
|
// An Account is a configured driver instance with credentials.
|
||||||
|
|
||||||
|
export interface Environment {
|
||||||
|
id: string;
|
||||||
|
name: string;
|
||||||
|
status: "active" | "archived";
|
||||||
|
createdAt: Date;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface Account {
|
||||||
|
id: string;
|
||||||
|
name: string;
|
||||||
|
driver: string;
|
||||||
|
config: Record<string, unknown>;
|
||||||
|
createdAt: Date;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface Binding {
|
||||||
|
id: string;
|
||||||
|
environmentId: string;
|
||||||
|
accountId: string;
|
||||||
|
}
|
||||||
9
bastion/src/core/src/index.ts
Normal file
9
bastion/src/core/src/index.ts
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
// @lab/core — foundation types for the labctl platform.
|
||||||
|
// Phase 1 stub: resource types, auth types, audit types, Output<T>.
|
||||||
|
// Phase 5 adds: CompositeResource, evaluator integration, full SDK.
|
||||||
|
|
||||||
|
export * from "./resource.js";
|
||||||
|
export * from "./environment.js";
|
||||||
|
export * from "./audit.js";
|
||||||
|
export * from "./auth.js";
|
||||||
|
export { Output, output, all, interpolate, secret } from "./output.js";
|
||||||
5
bastion/src/core/src/output.ts
Normal file
5
bastion/src/core/src/output.ts
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
// Re-export Pulumi's Output<T> type for use across the platform.
|
||||||
|
// Cloud drivers use this for future values (endpoints, IPs, kubeconfigs).
|
||||||
|
// Phase 1: type re-export only. Phase 5 adds full evaluator integration.
|
||||||
|
|
||||||
|
export { Output, output, all, interpolate, secret } from "@pulumi/pulumi";
|
||||||
83
bastion/src/core/src/resource.ts
Normal file
83
bastion/src/core/src/resource.ts
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
// Core resource types for the labctl platform.
|
||||||
|
// Every managed thing (Server, Database, App, Cluster) is a Resource.
|
||||||
|
|
||||||
|
export type ResourceOrigin = "file" | "cli" | "fleet" | "imported";
|
||||||
|
export type ResourceManagedBy = "gitops" | "manual" | "auto";
|
||||||
|
|
||||||
|
export type ResourceStatus =
|
||||||
|
| "pending"
|
||||||
|
| "creating"
|
||||||
|
| "ready"
|
||||||
|
| "updating"
|
||||||
|
| "deleting"
|
||||||
|
| "error"
|
||||||
|
| "unknown";
|
||||||
|
|
||||||
|
export interface ResourceMetadata {
|
||||||
|
kind: string;
|
||||||
|
name: string;
|
||||||
|
environmentId: string;
|
||||||
|
accountId: string;
|
||||||
|
origin: ResourceOrigin;
|
||||||
|
managedBy: ResourceManagedBy;
|
||||||
|
sourceRef?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ResourceState {
|
||||||
|
status: ResourceStatus;
|
||||||
|
message?: string;
|
||||||
|
lastReconciled?: Date;
|
||||||
|
platformRef?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface Resource<TSpec = Record<string, unknown>> {
|
||||||
|
id: string;
|
||||||
|
metadata: ResourceMetadata;
|
||||||
|
desiredSpec: TSpec;
|
||||||
|
actualSpec?: TSpec;
|
||||||
|
state: ResourceState;
|
||||||
|
createdAt: Date;
|
||||||
|
updatedAt: Date;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Well-known resource kinds. Drivers register additional kinds.
|
||||||
|
export const RESOURCE_KINDS = {
|
||||||
|
SERVER: "server",
|
||||||
|
DATABASE: "database",
|
||||||
|
CACHE: "cache",
|
||||||
|
CLUSTER: "cluster",
|
||||||
|
APP: "app",
|
||||||
|
SERVICE: "service",
|
||||||
|
CRONJOB: "cronjob",
|
||||||
|
NETWORK: "network",
|
||||||
|
LOADBALANCER: "loadbalancer",
|
||||||
|
DNSZONE: "dnszone",
|
||||||
|
CERTIFICATE: "certificate",
|
||||||
|
OBJECTSTORE: "objectstore",
|
||||||
|
QUEUE: "queue",
|
||||||
|
SECRET: "secret",
|
||||||
|
FLEET: "fleet",
|
||||||
|
} as const;
|
||||||
|
|
||||||
|
export type ResourceKind = (typeof RESOURCE_KINDS)[keyof typeof RESOURCE_KINDS];
|
||||||
|
|
||||||
|
// Resource aliases for CLI (kubectl-style shortnames)
|
||||||
|
export const RESOURCE_ALIASES: Record<string, string> = {
|
||||||
|
srv: "server",
|
||||||
|
db: "database",
|
||||||
|
cl: "cluster",
|
||||||
|
svc: "service",
|
||||||
|
cj: "cronjob",
|
||||||
|
lb: "loadbalancer",
|
||||||
|
dns: "dnszone",
|
||||||
|
cert: "certificate",
|
||||||
|
os: "objectstore",
|
||||||
|
mq: "queue",
|
||||||
|
sec: "secret",
|
||||||
|
fl: "fleet",
|
||||||
|
};
|
||||||
|
|
||||||
|
export function resolveResourceKind(input: string): string {
|
||||||
|
const lower = input.toLowerCase();
|
||||||
|
return RESOURCE_ALIASES[lower] ?? lower;
|
||||||
|
}
|
||||||
8
bastion/src/core/tsconfig.json
Normal file
8
bastion/src/core/tsconfig.json
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"extends": "../../tsconfig.base.json",
|
||||||
|
"compilerOptions": {
|
||||||
|
"rootDir": "src",
|
||||||
|
"outDir": "dist"
|
||||||
|
},
|
||||||
|
"include": ["src/**/*.ts"]
|
||||||
|
}
|
||||||
@@ -26,8 +26,10 @@
|
|||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@fastify/rate-limit": "^10.3.0",
|
"@fastify/rate-limit": "^10.3.0",
|
||||||
"@fastify/websocket": "^11.0.2",
|
"@fastify/websocket": "^11.0.2",
|
||||||
|
"@lab/core": "workspace:^",
|
||||||
"@lab/shared": "workspace:*",
|
"@lab/shared": "workspace:*",
|
||||||
"@prisma/client": "^6.9.0",
|
"@prisma/client": "^6.9.0",
|
||||||
|
"bcryptjs": "^3.0.3",
|
||||||
"fastify": "^5.3.3",
|
"fastify": "^5.3.3",
|
||||||
"winston": "^3.17.0",
|
"winston": "^3.17.0",
|
||||||
"ws": "^8.19.0",
|
"ws": "^8.19.0",
|
||||||
@@ -37,6 +39,7 @@
|
|||||||
"seed": "tsx prisma/seed.ts"
|
"seed": "tsx prisma/seed.ts"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
|
"@types/bcryptjs": "^3.0.0",
|
||||||
"@types/node": "^22.14.1",
|
"@types/node": "^22.14.1",
|
||||||
"@types/ws": "^8.18.1",
|
"@types/ws": "^8.18.1",
|
||||||
"prisma": "^6.9.0",
|
"prisma": "^6.9.0",
|
||||||
|
|||||||
@@ -7,6 +7,225 @@ datasource db {
|
|||||||
url = env("DATABASE_URL")
|
url = env("DATABASE_URL")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Auth (mcpctl pattern: email/password + bearer token sessions) ──
|
||||||
|
|
||||||
|
model User {
|
||||||
|
id String @id @default(cuid())
|
||||||
|
email String @unique
|
||||||
|
password String // bcrypt
|
||||||
|
name String?
|
||||||
|
role UserRole @default(USER)
|
||||||
|
createdAt DateTime @default(now())
|
||||||
|
updatedAt DateTime @updatedAt
|
||||||
|
|
||||||
|
sessions Session[]
|
||||||
|
auditLogs AuditEvent[]
|
||||||
|
groups GroupMember[]
|
||||||
|
}
|
||||||
|
|
||||||
|
enum UserRole {
|
||||||
|
USER
|
||||||
|
ADMIN
|
||||||
|
}
|
||||||
|
|
||||||
|
model Session {
|
||||||
|
id String @id @default(cuid())
|
||||||
|
userId String
|
||||||
|
user User @relation(fields: [userId], references: [id], onDelete: Cascade)
|
||||||
|
token String @unique
|
||||||
|
expiresAt DateTime
|
||||||
|
createdAt DateTime @default(now())
|
||||||
|
|
||||||
|
@@index([userId])
|
||||||
|
@@index([token])
|
||||||
|
}
|
||||||
|
|
||||||
|
model Group {
|
||||||
|
id String @id @default(cuid())
|
||||||
|
name String @unique
|
||||||
|
description String?
|
||||||
|
createdAt DateTime @default(now())
|
||||||
|
members GroupMember[]
|
||||||
|
}
|
||||||
|
|
||||||
|
model GroupMember {
|
||||||
|
id String @id @default(cuid())
|
||||||
|
groupId String
|
||||||
|
group Group @relation(fields: [groupId], references: [id], onDelete: Cascade)
|
||||||
|
userId String
|
||||||
|
user User @relation(fields: [userId], references: [id], onDelete: Cascade)
|
||||||
|
|
||||||
|
@@unique([groupId, userId])
|
||||||
|
}
|
||||||
|
|
||||||
|
model ServiceAccount {
|
||||||
|
id String @id @default(cuid())
|
||||||
|
name String @unique
|
||||||
|
token String @unique
|
||||||
|
createdAt DateTime @default(now())
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── RBAC (mcpctl pattern: named definitions with JSON subjects/bindings) ──
|
||||||
|
|
||||||
|
model RbacDefinition {
|
||||||
|
id String @id @default(cuid())
|
||||||
|
name String @unique
|
||||||
|
subjects Json // [{kind: "User"|"Group"|"ServiceAccount", name: string}]
|
||||||
|
roleBindings Json // [{role, resource, name?, environment?, action?}]
|
||||||
|
createdAt DateTime @default(now())
|
||||||
|
updatedAt DateTime @updatedAt
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Audit (mcpctl pattern: fire-and-forget with correlation IDs) ──
|
||||||
|
|
||||||
|
model AuditEvent {
|
||||||
|
id String @id @default(cuid())
|
||||||
|
timestamp DateTime @default(now())
|
||||||
|
eventKind String
|
||||||
|
source String // cli | labd | agent | driver | fleet-controller | sync-controller
|
||||||
|
verified Boolean @default(false)
|
||||||
|
|
||||||
|
userId String?
|
||||||
|
user User? @relation(fields: [userId], references: [id])
|
||||||
|
userName String?
|
||||||
|
sessionId String?
|
||||||
|
environmentName String?
|
||||||
|
accountName String?
|
||||||
|
|
||||||
|
resourceKind String?
|
||||||
|
resourceName String?
|
||||||
|
|
||||||
|
correlationId String
|
||||||
|
parentEventId String?
|
||||||
|
|
||||||
|
details Json @default("{}")
|
||||||
|
result String // success | failure | denied | skipped
|
||||||
|
error String?
|
||||||
|
durationMs Int?
|
||||||
|
|
||||||
|
@@index([correlationId])
|
||||||
|
@@index([eventKind, timestamp])
|
||||||
|
@@index([environmentName, timestamp])
|
||||||
|
@@index([resourceKind, resourceName])
|
||||||
|
@@index([userId, timestamp])
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Core infrastructure ──
|
||||||
|
|
||||||
|
model Environment {
|
||||||
|
id String @id @default(cuid())
|
||||||
|
name String @unique
|
||||||
|
status String @default("active") // active | archived
|
||||||
|
createdAt DateTime @default(now())
|
||||||
|
updatedAt DateTime @updatedAt
|
||||||
|
|
||||||
|
bindings Binding[]
|
||||||
|
resources Resource[]
|
||||||
|
}
|
||||||
|
|
||||||
|
model Account {
|
||||||
|
id String @id @default(cuid())
|
||||||
|
name String @unique
|
||||||
|
driver String // baremetal-pxe | aws | gcp | kubernetes | ovh
|
||||||
|
config Json @default("{}")
|
||||||
|
// Credentials stored in Infisical, referenced by secretPath
|
||||||
|
secretPath String?
|
||||||
|
createdAt DateTime @default(now())
|
||||||
|
updatedAt DateTime @updatedAt
|
||||||
|
|
||||||
|
bindings Binding[]
|
||||||
|
resources Resource[]
|
||||||
|
}
|
||||||
|
|
||||||
|
model Binding {
|
||||||
|
id String @id @default(cuid())
|
||||||
|
environmentId String
|
||||||
|
environment Environment @relation(fields: [environmentId], references: [id], onDelete: Cascade)
|
||||||
|
accountId String
|
||||||
|
account Account @relation(fields: [accountId], references: [id], onDelete: Cascade)
|
||||||
|
|
||||||
|
@@unique([environmentId, accountId])
|
||||||
|
}
|
||||||
|
|
||||||
|
model Resource {
|
||||||
|
id String @id @default(cuid())
|
||||||
|
kind String
|
||||||
|
name String
|
||||||
|
environmentId String
|
||||||
|
environment Environment @relation(fields: [environmentId], references: [id])
|
||||||
|
accountId String
|
||||||
|
account Account @relation(fields: [accountId], references: [id])
|
||||||
|
origin String @default("cli") // file | cli | fleet | imported
|
||||||
|
managedBy String @default("manual") // gitops | manual | auto
|
||||||
|
sourceRef String?
|
||||||
|
desiredSpec Json @default("{}")
|
||||||
|
actualSpec Json?
|
||||||
|
platformRef String?
|
||||||
|
status String @default("pending") // pending | creating | ready | updating | deleting | error
|
||||||
|
statusMessage String?
|
||||||
|
lastReconciled DateTime?
|
||||||
|
createdAt DateTime @default(now())
|
||||||
|
updatedAt DateTime @updatedAt
|
||||||
|
|
||||||
|
@@unique([kind, name, environmentId])
|
||||||
|
@@index([environmentId])
|
||||||
|
@@index([accountId])
|
||||||
|
@@index([kind, status])
|
||||||
|
}
|
||||||
|
|
||||||
|
model Secret {
|
||||||
|
id String @id @default(cuid())
|
||||||
|
name String @unique
|
||||||
|
// Encrypted data — application-layer encryption as fallback if Infisical unavailable
|
||||||
|
data Json @default("{}")
|
||||||
|
version Int @default(1)
|
||||||
|
createdAt DateTime @default(now())
|
||||||
|
updatedAt DateTime @updatedAt
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Fleet ──
|
||||||
|
|
||||||
|
model Fleet {
|
||||||
|
id String @id @default(cuid())
|
||||||
|
name String
|
||||||
|
environmentId String
|
||||||
|
accountId String
|
||||||
|
selector Json // fact-matching rules
|
||||||
|
onboardPipeline Json // step definitions
|
||||||
|
offboardPipeline Json?
|
||||||
|
approvalConfig Json?
|
||||||
|
status String @default("active")
|
||||||
|
createdAt DateTime @default(now())
|
||||||
|
updatedAt DateTime @updatedAt
|
||||||
|
|
||||||
|
members FleetMember[]
|
||||||
|
}
|
||||||
|
|
||||||
|
model FleetMember {
|
||||||
|
id String @id @default(cuid())
|
||||||
|
fleetId String
|
||||||
|
fleet Fleet @relation(fields: [fleetId], references: [id], onDelete: Cascade)
|
||||||
|
serverId String
|
||||||
|
status String // discovered | pending | onboarding | active | offboarding | removed
|
||||||
|
joinedAt DateTime @default(now())
|
||||||
|
|
||||||
|
@@index([fleetId])
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Git sources (for sync controller) ──
|
||||||
|
|
||||||
|
model GitSource {
|
||||||
|
id String @id @default(cuid())
|
||||||
|
name String @unique
|
||||||
|
repo String
|
||||||
|
branch String @default("main")
|
||||||
|
path String @default("environments/")
|
||||||
|
lastSync DateTime?
|
||||||
|
createdAt DateTime @default(now())
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Existing v1.0 models (kept for bastion/agent compatibility) ──
|
||||||
|
|
||||||
model Server {
|
model Server {
|
||||||
id String @id @default(uuid())
|
id String @id @default(uuid())
|
||||||
hostname String @unique
|
hostname String @unique
|
||||||
@@ -17,13 +236,12 @@ model Server {
|
|||||||
labels Json @default("{}")
|
labels Json @default("{}")
|
||||||
ip String?
|
ip String?
|
||||||
agentVersion String?
|
agentVersion String?
|
||||||
status String @default("unknown") // unknown, online, offline, provisioning
|
status String @default("unknown")
|
||||||
lastHeartbeat DateTime?
|
lastHeartbeat DateTime?
|
||||||
createdAt DateTime @default(now())
|
createdAt DateTime @default(now())
|
||||||
updatedAt DateTime @updatedAt
|
updatedAt DateTime @updatedAt
|
||||||
|
|
||||||
agent Agent?
|
agent Agent?
|
||||||
auditLogs AuditLog[]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
model Agent {
|
model Agent {
|
||||||
@@ -33,112 +251,29 @@ model Agent {
|
|||||||
certificatePem String?
|
certificatePem String?
|
||||||
enrolledAt DateTime @default(now())
|
enrolledAt DateTime @default(now())
|
||||||
lastSeen DateTime?
|
lastSeen DateTime?
|
||||||
|
facts Json? // hardware facts reported by agent
|
||||||
|
|
||||||
@@index([serverId])
|
@@index([serverId])
|
||||||
}
|
}
|
||||||
|
|
||||||
model User {
|
|
||||||
id String @id @default(uuid())
|
|
||||||
username String @unique
|
|
||||||
displayName String?
|
|
||||||
certFingerprint String? @unique
|
|
||||||
createdAt DateTime @default(now())
|
|
||||||
updatedAt DateTime @updatedAt
|
|
||||||
|
|
||||||
roleBindings UserRole[]
|
|
||||||
auditLogs AuditLog[]
|
|
||||||
}
|
|
||||||
|
|
||||||
model Role {
|
|
||||||
id String @id @default(uuid())
|
|
||||||
name String @unique
|
|
||||||
description String?
|
|
||||||
createdAt DateTime @default(now())
|
|
||||||
|
|
||||||
permissions Permission[]
|
|
||||||
userBindings UserRole[]
|
|
||||||
}
|
|
||||||
|
|
||||||
model Permission {
|
|
||||||
id String @id @default(uuid())
|
|
||||||
roleId String
|
|
||||||
role Role @relation(fields: [roleId], references: [id], onDelete: Cascade)
|
|
||||||
type String @default("allow") // allow or deny
|
|
||||||
action String // read, exec, apply, destroy, manage, admin, kubectl, *
|
|
||||||
cloud String @default("*")
|
|
||||||
environment String @default("*")
|
|
||||||
server String @default("*")
|
|
||||||
|
|
||||||
@@index([roleId])
|
|
||||||
}
|
|
||||||
|
|
||||||
model UserRole {
|
|
||||||
id String @id @default(uuid())
|
|
||||||
userId String
|
|
||||||
user User @relation(fields: [userId], references: [id], onDelete: Cascade)
|
|
||||||
roleId String
|
|
||||||
role Role @relation(fields: [roleId], references: [id], onDelete: Cascade)
|
|
||||||
|
|
||||||
@@unique([userId, roleId])
|
|
||||||
@@index([userId])
|
|
||||||
@@index([roleId])
|
|
||||||
}
|
|
||||||
|
|
||||||
model JoinToken {
|
model JoinToken {
|
||||||
id String @id @default(uuid())
|
id String @id @default(uuid())
|
||||||
token String @unique
|
token String @unique
|
||||||
type String @default("one-time") // one-time or reusable
|
type String @default("one-time")
|
||||||
label String?
|
label String?
|
||||||
usedBy String? // server hostname that used it
|
usedBy String?
|
||||||
usedAt DateTime?
|
usedAt DateTime?
|
||||||
revokedAt DateTime?
|
revokedAt DateTime?
|
||||||
createdAt DateTime @default(now())
|
createdAt DateTime @default(now())
|
||||||
expiresAt DateTime?
|
expiresAt DateTime?
|
||||||
}
|
}
|
||||||
|
|
||||||
model AuditLog {
|
|
||||||
id String @id @default(uuid())
|
|
||||||
userId String?
|
|
||||||
user User? @relation(fields: [userId], references: [id])
|
|
||||||
serverId String?
|
|
||||||
server Server? @relation(fields: [serverId], references: [id])
|
|
||||||
sessionId String?
|
|
||||||
action String // exec, kubectl, apply, login, rbac-denied, etc.
|
|
||||||
resourceType String? // server, cluster, role, app, etc.
|
|
||||||
resourceName String?
|
|
||||||
args String? // sanitized command args
|
|
||||||
result String @default("success") // success, denied, error
|
|
||||||
durationMs Int?
|
|
||||||
sourceIp String?
|
|
||||||
timestamp DateTime @default(now())
|
|
||||||
|
|
||||||
@@index([userId])
|
|
||||||
@@index([serverId])
|
|
||||||
@@index([sessionId])
|
|
||||||
@@index([timestamp])
|
|
||||||
@@index([action])
|
|
||||||
}
|
|
||||||
|
|
||||||
model PulumiRun {
|
|
||||||
id String @id @default(uuid())
|
|
||||||
userId String
|
|
||||||
stackName String
|
|
||||||
action String // up, preview, destroy
|
|
||||||
status String @default("pending") // pending, running, succeeded, failed
|
|
||||||
output String?
|
|
||||||
startedAt DateTime @default(now())
|
|
||||||
completedAt DateTime?
|
|
||||||
|
|
||||||
@@index([userId])
|
|
||||||
@@index([stackName])
|
|
||||||
}
|
|
||||||
|
|
||||||
model Bastion {
|
model Bastion {
|
||||||
id String @id @default(uuid())
|
id String @id @default(uuid())
|
||||||
hostname String @unique
|
hostname String @unique
|
||||||
network String
|
network String
|
||||||
serverIp String
|
serverIp String
|
||||||
status String @default("offline") // online, offline
|
status String @default("offline")
|
||||||
lastHeartbeat DateTime?
|
lastHeartbeat DateTime?
|
||||||
createdAt DateTime @default(now())
|
createdAt DateTime @default(now())
|
||||||
updatedAt DateTime @updatedAt
|
updatedAt DateTime @updatedAt
|
||||||
@@ -149,7 +284,7 @@ model Cluster {
|
|||||||
name String @unique
|
name String @unique
|
||||||
cloud String @default("baremetal")
|
cloud String @default("baremetal")
|
||||||
environment String @default("default")
|
environment String @default("default")
|
||||||
kubeconfigEnc String? // encrypted kubeconfig
|
kubeconfigEnc String?
|
||||||
labels Json @default("{}")
|
labels Json @default("{}")
|
||||||
createdAt DateTime @default(now())
|
createdAt DateTime @default(now())
|
||||||
updatedAt DateTime @updatedAt
|
updatedAt DateTime @updatedAt
|
||||||
|
|||||||
65
bastion/src/labd/src/middleware/bearer-auth.ts
Normal file
65
bastion/src/labd/src/middleware/bearer-auth.ts
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
// Bearer token auth middleware for Fastify.
|
||||||
|
// Validates Authorization header, resolves user identity, attaches to request.
|
||||||
|
|
||||||
|
import type { FastifyRequest, FastifyReply } from "fastify";
|
||||||
|
import type { AuthService } from "../services/auth.js";
|
||||||
|
|
||||||
|
declare module "fastify" {
|
||||||
|
interface FastifyRequest {
|
||||||
|
userId?: string;
|
||||||
|
userEmail?: string;
|
||||||
|
userRole?: string;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Paths that don't require authentication
|
||||||
|
const PUBLIC_PATHS = new Set([
|
||||||
|
"/health",
|
||||||
|
"/api/auth/login",
|
||||||
|
"/ws/bastion",
|
||||||
|
"/ws/agent",
|
||||||
|
"/api/auth/enroll",
|
||||||
|
]);
|
||||||
|
|
||||||
|
export function createBearerAuthMiddleware(authService: AuthService) {
|
||||||
|
return async function bearerAuth(
|
||||||
|
request: FastifyRequest,
|
||||||
|
reply: FastifyReply,
|
||||||
|
): Promise<void> {
|
||||||
|
// Skip auth for public paths
|
||||||
|
if (PUBLIC_PATHS.has(request.url.split("?")[0] ?? "")) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip auth for WebSocket upgrade requests (handled by their own auth)
|
||||||
|
if (request.headers.upgrade === "websocket") {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const authHeader = request.headers.authorization;
|
||||||
|
if (!authHeader) {
|
||||||
|
void reply.code(401).send({ error: "Authorization header required" });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!authHeader.startsWith("Bearer ")) {
|
||||||
|
void reply.code(401).send({ error: "Invalid authorization format, expected: Bearer <token>" });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const token = authHeader.slice(7);
|
||||||
|
if (token.length === 0) {
|
||||||
|
void reply.code(401).send({ error: "Empty bearer token" });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const identity = await authService.validateToken(token);
|
||||||
|
request.userId = identity.userId;
|
||||||
|
request.userEmail = identity.email;
|
||||||
|
request.userRole = identity.role;
|
||||||
|
} catch {
|
||||||
|
void reply.code(401).send({ error: "Invalid or expired token. Run: labctl login" });
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -84,7 +84,6 @@ export function registerBastionRoutes(app: FastifyInstance, db: DbClient): void
|
|||||||
app.get("/api/machines", async () => {
|
app.get("/api/machines", async () => {
|
||||||
const live = bastionRegistry.getAggregatedState();
|
const live = bastionRegistry.getAggregatedState();
|
||||||
|
|
||||||
// Merge DB records for machines not currently in any bastion's live state
|
|
||||||
try {
|
try {
|
||||||
const dbServers = (await db.server.findMany({})) as Array<{
|
const dbServers = (await db.server.findMany({})) as Array<{
|
||||||
mac: string | null; hostname: string; role: string; ip: string | null;
|
mac: string | null; hostname: string; role: string; ip: string | null;
|
||||||
@@ -93,9 +92,49 @@ export function registerBastionRoutes(app: FastifyInstance, db: DbClient): void
|
|||||||
for (const s of dbServers) {
|
for (const s of dbServers) {
|
||||||
if (!s.mac) continue;
|
if (!s.mac) continue;
|
||||||
const mac = s.mac.toLowerCase();
|
const mac = s.mac.toLowerCase();
|
||||||
// Only add from DB if not already in live state
|
|
||||||
|
// DB knows this machine has been installed at some point if it has a real
|
||||||
|
// hostname+role (not just product-name-as-hostname and role="unknown").
|
||||||
|
// Status alone is unreliable: a rediscovery can re-set it without erasing the
|
||||||
|
// install identity. If the bastion restarted and lost its installed map, the
|
||||||
|
// machine will only show up in live.discovered — promote it here so the CLI
|
||||||
|
// still sees hostname/role/IP.
|
||||||
|
const dbKnowsInstalled =
|
||||||
|
s.role !== "unknown" && s.role !== "" &&
|
||||||
|
s.hostname !== "" && s.hostname !== s.mac;
|
||||||
|
|
||||||
|
if (dbKnowsInstalled && !(mac in live.installed) && !(mac in live.install_queue)) {
|
||||||
|
const hw = live.discovered[mac];
|
||||||
|
live.installed[mac] = {
|
||||||
|
hostname: s.hostname,
|
||||||
|
role: s.role,
|
||||||
|
ip: s.ip ?? "",
|
||||||
|
installed_at: "",
|
||||||
|
bastionId: hw?.bastionId ?? "db",
|
||||||
|
...(hw ? {
|
||||||
|
product: hw.product,
|
||||||
|
manufacturer: hw.manufacturer,
|
||||||
|
cpu_model: hw.cpu_model,
|
||||||
|
cpu_cores: hw.cpu_cores,
|
||||||
|
memory_gb: hw.memory_gb,
|
||||||
|
arch: hw.arch,
|
||||||
|
} : {}),
|
||||||
|
};
|
||||||
|
delete live.discovered[mac];
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unknown-to-live MAC: fall back to whatever the DB says.
|
||||||
if (!(mac in live.discovered) && !(mac in live.install_queue) && !(mac in live.installed)) {
|
if (!(mac in live.discovered) && !(mac in live.install_queue) && !(mac in live.installed)) {
|
||||||
if (s.status === "discovered") {
|
if (s.status === "online" || s.status === "offline") {
|
||||||
|
live.installed[mac] = {
|
||||||
|
hostname: s.hostname,
|
||||||
|
role: s.role,
|
||||||
|
ip: s.ip ?? "",
|
||||||
|
installed_at: "",
|
||||||
|
bastionId: "db",
|
||||||
|
};
|
||||||
|
} else {
|
||||||
live.discovered[mac] = {
|
live.discovered[mac] = {
|
||||||
mac,
|
mac,
|
||||||
product: String(s.labels?.product ?? "unknown"),
|
product: String(s.labels?.product ?? "unknown"),
|
||||||
@@ -112,14 +151,6 @@ export function registerBastionRoutes(app: FastifyInstance, db: DbClient): void
|
|||||||
last_seen: "",
|
last_seen: "",
|
||||||
bastionId: "db",
|
bastionId: "db",
|
||||||
};
|
};
|
||||||
} else if (s.status === "online" || s.status === "offline") {
|
|
||||||
live.installed[mac] = {
|
|
||||||
hostname: s.hostname,
|
|
||||||
role: s.role,
|
|
||||||
ip: s.ip ?? "",
|
|
||||||
installed_at: "",
|
|
||||||
bastionId: "db",
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -260,6 +291,37 @@ export function registerBastionRoutes(app: FastifyInstance, db: DbClient): void
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Update hardware info (discovery data) for a machine
|
||||||
|
app.post<{
|
||||||
|
Body: {
|
||||||
|
mac?: string; product?: string; board?: string; serial?: string;
|
||||||
|
manufacturer?: string; cpu_model?: string; cpu_cores?: number;
|
||||||
|
memory_gb?: number; arch?: string;
|
||||||
|
disks?: Array<{ name: string; size_gb: number; model: string }>;
|
||||||
|
nics?: Array<{ name: string; mac: string; state: string }>;
|
||||||
|
};
|
||||||
|
}>("/api/machines/discover", async (request, reply) => {
|
||||||
|
const data = request.body ?? {};
|
||||||
|
const mac = (data.mac ?? "").toLowerCase().replace(/-/g, ":");
|
||||||
|
if (!mac) {
|
||||||
|
return reply.code(400).send({ error: "mac is required" });
|
||||||
|
}
|
||||||
|
|
||||||
|
const bastion = bastionRegistry.findBastionByMac(mac);
|
||||||
|
const target = bastion ?? (bastionRegistry.getAll().length === 1 ? bastionRegistry.getAll()[0] : null);
|
||||||
|
|
||||||
|
if (!target) {
|
||||||
|
return reply.code(503).send({ error: "No bastion found for this MAC" });
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await sendCommand(target.bastionId, { type: "command-discover", ...data, mac });
|
||||||
|
return reply.code(result.status === "ok" ? 200 : 500).send(result);
|
||||||
|
} catch (err) {
|
||||||
|
return reply.code(500).send({ error: err instanceof Error ? err.message : String(err) });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
// Update role
|
// Update role
|
||||||
app.post<{
|
app.post<{
|
||||||
Body: { mac?: string; role?: string };
|
Body: { mac?: string; role?: string };
|
||||||
|
|||||||
191
bastion/src/labd/src/routes/environments.ts
Normal file
191
bastion/src/labd/src/routes/environments.ts
Normal file
@@ -0,0 +1,191 @@
|
|||||||
|
// Environment and Account management routes.
|
||||||
|
// GET/POST /api/environments — list/create environments
|
||||||
|
// GET/POST /api/accounts — list/create accounts
|
||||||
|
// POST /api/accounts/bind — bind account to environment
|
||||||
|
// GET /api/bindings — list bindings
|
||||||
|
|
||||||
|
import type { FastifyInstance } from "fastify";
|
||||||
|
import type { PrismaClient, Prisma } from "@prisma/client";
|
||||||
|
import type { RbacService } from "../services/rbac.js";
|
||||||
|
import type { AuditService } from "../services/audit.js";
|
||||||
|
|
||||||
|
export function registerEnvironmentRoutes(
|
||||||
|
app: FastifyInstance,
|
||||||
|
db: PrismaClient,
|
||||||
|
rbacService: RbacService,
|
||||||
|
auditService: AuditService,
|
||||||
|
): void {
|
||||||
|
// List environments
|
||||||
|
app.get("/api/environments", async (_request, reply) => {
|
||||||
|
const envs = await db.environment.findMany({ orderBy: { name: "asc" } });
|
||||||
|
return reply.send(envs);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Create environment
|
||||||
|
app.post<{
|
||||||
|
Body: { name?: string };
|
||||||
|
}>("/api/environments", async (request, reply) => {
|
||||||
|
const { name } = request.body ?? {};
|
||||||
|
if (!name) {
|
||||||
|
return reply.code(400).send({ error: "name is required" });
|
||||||
|
}
|
||||||
|
|
||||||
|
const rbac = await rbacService.check({
|
||||||
|
userId: request.userId!,
|
||||||
|
userEmail: request.userEmail!,
|
||||||
|
userRole: request.userRole!,
|
||||||
|
action: "admin",
|
||||||
|
resource: "environments",
|
||||||
|
});
|
||||||
|
if (!rbac.allowed) {
|
||||||
|
return reply.code(403).send({ error: rbac.reason });
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const env = await db.environment.create({ data: { name } });
|
||||||
|
auditService.emit({
|
||||||
|
eventKind: "resource_created",
|
||||||
|
source: "labd",
|
||||||
|
verified: true,
|
||||||
|
userId: request.userId ?? null,
|
||||||
|
resourceKind: "environment",
|
||||||
|
resourceName: name,
|
||||||
|
result: "success",
|
||||||
|
});
|
||||||
|
return reply.code(201).send(env);
|
||||||
|
} catch (err) {
|
||||||
|
if (err instanceof Error && err.message.includes("Unique constraint")) {
|
||||||
|
return reply.code(409).send({ error: `Environment '${name}' already exists` });
|
||||||
|
}
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// List accounts
|
||||||
|
app.get("/api/accounts", async (_request, reply) => {
|
||||||
|
const accounts = await db.account.findMany({
|
||||||
|
orderBy: { name: "asc" },
|
||||||
|
select: { id: true, name: true, driver: true, config: true, createdAt: true, updatedAt: true },
|
||||||
|
});
|
||||||
|
return reply.send(accounts);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Create account
|
||||||
|
app.post<{
|
||||||
|
Body: { name?: string; driver?: string; config?: Record<string, unknown> };
|
||||||
|
}>("/api/accounts", async (request, reply) => {
|
||||||
|
const { name, driver, config } = request.body ?? {};
|
||||||
|
if (!name || !driver) {
|
||||||
|
return reply.code(400).send({ error: "name and driver are required" });
|
||||||
|
}
|
||||||
|
|
||||||
|
const rbac = await rbacService.check({
|
||||||
|
userId: request.userId!,
|
||||||
|
userEmail: request.userEmail!,
|
||||||
|
userRole: request.userRole!,
|
||||||
|
action: "admin",
|
||||||
|
resource: "accounts",
|
||||||
|
});
|
||||||
|
if (!rbac.allowed) {
|
||||||
|
return reply.code(403).send({ error: rbac.reason });
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const account = await db.account.create({
|
||||||
|
data: { name, driver, config: (config ?? {}) as Prisma.InputJsonValue },
|
||||||
|
});
|
||||||
|
auditService.emit({
|
||||||
|
eventKind: "resource_created",
|
||||||
|
source: "labd",
|
||||||
|
verified: true,
|
||||||
|
userId: request.userId ?? null,
|
||||||
|
resourceKind: "account",
|
||||||
|
resourceName: name,
|
||||||
|
result: "success",
|
||||||
|
details: { driver },
|
||||||
|
});
|
||||||
|
return reply.code(201).send(account);
|
||||||
|
} catch (err) {
|
||||||
|
if (err instanceof Error && err.message.includes("Unique constraint")) {
|
||||||
|
return reply.code(409).send({ error: `Account '${name}' already exists` });
|
||||||
|
}
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Bind account to environment
|
||||||
|
app.post<{
|
||||||
|
Body: { environmentId?: string; accountId?: string };
|
||||||
|
}>("/api/accounts/bind", async (request, reply) => {
|
||||||
|
const { environmentId, accountId } = request.body ?? {};
|
||||||
|
if (!environmentId || !accountId) {
|
||||||
|
return reply.code(400).send({ error: "environmentId and accountId are required" });
|
||||||
|
}
|
||||||
|
|
||||||
|
const rbac = await rbacService.check({
|
||||||
|
userId: request.userId!,
|
||||||
|
userEmail: request.userEmail!,
|
||||||
|
userRole: request.userRole!,
|
||||||
|
action: "admin",
|
||||||
|
resource: "accounts",
|
||||||
|
});
|
||||||
|
if (!rbac.allowed) {
|
||||||
|
return reply.code(403).send({ error: rbac.reason });
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const binding = await db.binding.create({
|
||||||
|
data: { environmentId, accountId },
|
||||||
|
});
|
||||||
|
return reply.code(201).send(binding);
|
||||||
|
} catch (err) {
|
||||||
|
if (err instanceof Error && err.message.includes("Unique constraint")) {
|
||||||
|
return reply.code(409).send({ error: "This account is already bound to this environment" });
|
||||||
|
}
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// List bindings
|
||||||
|
app.get("/api/bindings", async (_request, reply) => {
|
||||||
|
const bindings = await db.binding.findMany({
|
||||||
|
include: { environment: true, account: true },
|
||||||
|
});
|
||||||
|
return reply.send(bindings);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Audit event query
|
||||||
|
app.get<{
|
||||||
|
Querystring: {
|
||||||
|
last?: string;
|
||||||
|
kind?: string;
|
||||||
|
env?: string;
|
||||||
|
correlation?: string;
|
||||||
|
limit?: string;
|
||||||
|
};
|
||||||
|
}>("/api/events", async (request, reply) => {
|
||||||
|
const { last, kind, env, correlation, limit } = request.query as { last?: string; kind?: string; env?: string; correlation?: string; limit?: string };
|
||||||
|
|
||||||
|
const where: Record<string, unknown> = {};
|
||||||
|
|
||||||
|
if (last) {
|
||||||
|
const match = last.match(/^(\d+)(h|d|m)$/);
|
||||||
|
if (match) {
|
||||||
|
const [, num, unit] = match;
|
||||||
|
const ms = { h: 3_600_000, d: 86_400_000, m: 60_000 }[unit!]!;
|
||||||
|
where.timestamp = { gte: new Date(Date.now() - parseInt(num!) * ms) };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (kind) where.eventKind = kind;
|
||||||
|
if (env) where.environmentName = env;
|
||||||
|
if (correlation) where.correlationId = correlation;
|
||||||
|
|
||||||
|
const events = await db.auditEvent.findMany({
|
||||||
|
where,
|
||||||
|
orderBy: { timestamp: "desc" },
|
||||||
|
take: Math.min(parseInt(limit ?? "100"), 500),
|
||||||
|
});
|
||||||
|
|
||||||
|
return reply.send(events);
|
||||||
|
});
|
||||||
|
}
|
||||||
196
bastion/src/labd/src/routes/resources.ts
Normal file
196
bastion/src/labd/src/routes/resources.ts
Normal file
@@ -0,0 +1,196 @@
|
|||||||
|
// Resource CRUD routes with RBAC enforcement.
|
||||||
|
// GET /api/resources — list (filtered by RBAC scope)
|
||||||
|
// GET /api/resources/:id — get
|
||||||
|
// POST /api/resources — create
|
||||||
|
// PUT /api/resources/:id — update
|
||||||
|
// DELETE /api/resources/:id — delete (marks as deleting)
|
||||||
|
|
||||||
|
import type { FastifyInstance } from "fastify";
|
||||||
|
import type { ResourceStore, CreateResourceInput } from "../services/resource-store.js";
|
||||||
|
import type { RbacService } from "../services/rbac.js";
|
||||||
|
import type { AuditService } from "../services/audit.js";
|
||||||
|
import { resolveResourceKind } from "@lab/core";
|
||||||
|
|
||||||
|
export function registerResourceRoutes(
|
||||||
|
app: FastifyInstance,
|
||||||
|
resourceStore: ResourceStore,
|
||||||
|
rbacService: RbacService,
|
||||||
|
auditService: AuditService,
|
||||||
|
): void {
|
||||||
|
// List resources (filtered by kind, environment, status)
|
||||||
|
app.get<{
|
||||||
|
Querystring: { kind?: string; environment?: string; status?: string };
|
||||||
|
}>("/api/resources", async (request, reply) => {
|
||||||
|
const rbac = await rbacService.check({
|
||||||
|
userId: request.userId!,
|
||||||
|
userEmail: request.userEmail!,
|
||||||
|
userRole: request.userRole!,
|
||||||
|
action: "view",
|
||||||
|
resource: request.query.kind ? resolveResourceKind(request.query.kind) : undefined,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!rbac.allowed) {
|
||||||
|
return reply.code(403).send({ error: rbac.reason });
|
||||||
|
}
|
||||||
|
|
||||||
|
const resources = await resourceStore.list({
|
||||||
|
kind: request.query.kind ? resolveResourceKind(request.query.kind) : undefined,
|
||||||
|
environmentId: request.query.environment,
|
||||||
|
status: request.query.status,
|
||||||
|
});
|
||||||
|
|
||||||
|
return reply.send(resources);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Get single resource
|
||||||
|
app.get<{
|
||||||
|
Params: { id: string };
|
||||||
|
}>("/api/resources/:id", async (request, reply) => {
|
||||||
|
const resource = await resourceStore.get(request.params.id);
|
||||||
|
if (!resource) {
|
||||||
|
return reply.code(404).send({ error: "Resource not found" });
|
||||||
|
}
|
||||||
|
|
||||||
|
const rbac = await rbacService.check({
|
||||||
|
userId: request.userId!,
|
||||||
|
userEmail: request.userEmail!,
|
||||||
|
userRole: request.userRole!,
|
||||||
|
action: "view",
|
||||||
|
resource: resource.kind,
|
||||||
|
name: resource.name,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!rbac.allowed) {
|
||||||
|
return reply.code(403).send({ error: rbac.reason });
|
||||||
|
}
|
||||||
|
|
||||||
|
return reply.send(resource);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Create resource
|
||||||
|
app.post<{
|
||||||
|
Body: CreateResourceInput;
|
||||||
|
}>("/api/resources", async (request, reply) => {
|
||||||
|
const input = request.body;
|
||||||
|
if (!input?.kind || !input?.name || !input?.environmentId || !input?.accountId) {
|
||||||
|
return reply.code(400).send({ error: "kind, name, environmentId, and accountId are required" });
|
||||||
|
}
|
||||||
|
|
||||||
|
const kind = resolveResourceKind(input.kind);
|
||||||
|
|
||||||
|
const rbac = await rbacService.check({
|
||||||
|
userId: request.userId!,
|
||||||
|
userEmail: request.userEmail!,
|
||||||
|
userRole: request.userRole!,
|
||||||
|
action: "create",
|
||||||
|
resource: kind,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!rbac.allowed) {
|
||||||
|
return reply.code(403).send({ error: rbac.reason });
|
||||||
|
}
|
||||||
|
|
||||||
|
const correlationId = auditService.createCorrelation();
|
||||||
|
|
||||||
|
try {
|
||||||
|
const resource = await resourceStore.create({ ...input, kind });
|
||||||
|
|
||||||
|
auditService.emit({
|
||||||
|
eventKind: "resource_created",
|
||||||
|
source: "labd",
|
||||||
|
verified: true,
|
||||||
|
userId: request.userId ?? null,
|
||||||
|
userName: request.userEmail ?? null,
|
||||||
|
resourceKind: kind,
|
||||||
|
resourceName: input.name,
|
||||||
|
correlationId,
|
||||||
|
result: "success",
|
||||||
|
});
|
||||||
|
|
||||||
|
return reply.code(201).send(resource);
|
||||||
|
} catch (err) {
|
||||||
|
// Prisma unique constraint violation
|
||||||
|
if (err instanceof Error && err.message.includes("Unique constraint")) {
|
||||||
|
return reply.code(409).send({ error: `Resource ${kind}/${input.name} already exists in this environment` });
|
||||||
|
}
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Update resource
|
||||||
|
app.put<{
|
||||||
|
Params: { id: string };
|
||||||
|
Body: { desiredSpec?: Record<string, unknown>; status?: string };
|
||||||
|
}>("/api/resources/:id", async (request, reply) => {
|
||||||
|
const resource = await resourceStore.get(request.params.id);
|
||||||
|
if (!resource) {
|
||||||
|
return reply.code(404).send({ error: "Resource not found" });
|
||||||
|
}
|
||||||
|
|
||||||
|
const rbac = await rbacService.check({
|
||||||
|
userId: request.userId!,
|
||||||
|
userEmail: request.userEmail!,
|
||||||
|
userRole: request.userRole!,
|
||||||
|
action: "edit",
|
||||||
|
resource: resource.kind,
|
||||||
|
name: resource.name,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!rbac.allowed) {
|
||||||
|
return reply.code(403).send({ error: rbac.reason });
|
||||||
|
}
|
||||||
|
|
||||||
|
const updated = await resourceStore.update(request.params.id, request.body);
|
||||||
|
|
||||||
|
auditService.emit({
|
||||||
|
eventKind: "resource_updated",
|
||||||
|
source: "labd",
|
||||||
|
verified: true,
|
||||||
|
userId: request.userId ?? null,
|
||||||
|
userName: request.userEmail ?? null,
|
||||||
|
resourceKind: resource.kind,
|
||||||
|
resourceName: resource.name,
|
||||||
|
result: "success",
|
||||||
|
});
|
||||||
|
|
||||||
|
return reply.send(updated);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Delete resource (marks as deleting)
|
||||||
|
app.delete<{
|
||||||
|
Params: { id: string };
|
||||||
|
}>("/api/resources/:id", async (request, reply) => {
|
||||||
|
const resource = await resourceStore.get(request.params.id);
|
||||||
|
if (!resource) {
|
||||||
|
return reply.code(404).send({ error: "Resource not found" });
|
||||||
|
}
|
||||||
|
|
||||||
|
const rbac = await rbacService.check({
|
||||||
|
userId: request.userId!,
|
||||||
|
userEmail: request.userEmail!,
|
||||||
|
userRole: request.userRole!,
|
||||||
|
action: "delete",
|
||||||
|
resource: resource.kind,
|
||||||
|
name: resource.name,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!rbac.allowed) {
|
||||||
|
return reply.code(403).send({ error: rbac.reason });
|
||||||
|
}
|
||||||
|
|
||||||
|
await resourceStore.delete(request.params.id);
|
||||||
|
|
||||||
|
auditService.emit({
|
||||||
|
eventKind: "resource_deleted",
|
||||||
|
source: "labd",
|
||||||
|
verified: true,
|
||||||
|
userId: request.userId ?? null,
|
||||||
|
userName: request.userEmail ?? null,
|
||||||
|
resourceKind: resource.kind,
|
||||||
|
resourceName: resource.name,
|
||||||
|
result: "success",
|
||||||
|
});
|
||||||
|
|
||||||
|
return reply.send({ status: "deleting", id: request.params.id });
|
||||||
|
});
|
||||||
|
}
|
||||||
81
bastion/src/labd/src/routes/v2-auth.ts
Normal file
81
bastion/src/labd/src/routes/v2-auth.ts
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
// v2 Auth routes: bearer token login/logout.
|
||||||
|
// POST /api/auth/login — email + password → session token
|
||||||
|
// POST /api/auth/logout — revoke session
|
||||||
|
|
||||||
|
import type { FastifyInstance } from "fastify";
|
||||||
|
import type { AuthService } from "../services/auth.js";
|
||||||
|
import type { AuditService } from "../services/audit.js";
|
||||||
|
import { AuthError } from "../services/auth.js";
|
||||||
|
|
||||||
|
export function registerV2AuthRoutes(
|
||||||
|
app: FastifyInstance,
|
||||||
|
authService: AuthService,
|
||||||
|
auditService: AuditService,
|
||||||
|
): void {
|
||||||
|
app.post<{
|
||||||
|
Body: { email?: string; password?: string };
|
||||||
|
}>("/api/auth/login", async (request, reply) => {
|
||||||
|
const { email, password } = request.body ?? {};
|
||||||
|
|
||||||
|
if (!email || !password) {
|
||||||
|
return reply.code(400).send({ error: "email and password are required" });
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await authService.login(email, password);
|
||||||
|
|
||||||
|
auditService.emit({
|
||||||
|
eventKind: result.isBootstrap ? "auth_bootstrap" : "auth_login",
|
||||||
|
source: "labd",
|
||||||
|
verified: true,
|
||||||
|
userId: result.userId,
|
||||||
|
userName: email,
|
||||||
|
result: "success",
|
||||||
|
details: { isBootstrap: result.isBootstrap },
|
||||||
|
});
|
||||||
|
|
||||||
|
return reply.send({
|
||||||
|
token: result.token,
|
||||||
|
expiresAt: result.expiresAt.toISOString(),
|
||||||
|
isBootstrap: result.isBootstrap,
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
if (err instanceof AuthError) {
|
||||||
|
auditService.emit({
|
||||||
|
eventKind: "auth_login",
|
||||||
|
source: "labd",
|
||||||
|
verified: true,
|
||||||
|
userName: email,
|
||||||
|
result: "failure",
|
||||||
|
error: err.message,
|
||||||
|
});
|
||||||
|
return reply.code(401).send({ error: err.message });
|
||||||
|
}
|
||||||
|
return reply.code(500).send({ error: "Login failed" });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
app.post("/api/auth/logout", async (request, reply) => {
|
||||||
|
const token = request.headers.authorization?.slice(7);
|
||||||
|
if (!token) {
|
||||||
|
return reply.code(400).send({ error: "Authorization header required" });
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
await authService.logout(token);
|
||||||
|
auditService.emit({
|
||||||
|
eventKind: "auth_logout",
|
||||||
|
source: "labd",
|
||||||
|
verified: true,
|
||||||
|
userId: request.userId ?? null,
|
||||||
|
result: "success",
|
||||||
|
});
|
||||||
|
return reply.send({ status: "logged_out" });
|
||||||
|
} catch (err) {
|
||||||
|
if (err instanceof AuthError) {
|
||||||
|
return reply.code(400).send({ error: err.message });
|
||||||
|
}
|
||||||
|
return reply.code(500).send({ error: "Logout failed" });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
@@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
import Fastify from "fastify";
|
import Fastify from "fastify";
|
||||||
import websocket from "@fastify/websocket";
|
import websocket from "@fastify/websocket";
|
||||||
|
import type { PrismaClient } from "@prisma/client";
|
||||||
import type { LabdConfig } from "./config.js";
|
import type { LabdConfig } from "./config.js";
|
||||||
import { logger } from "./services/logger.js";
|
import { logger } from "./services/logger.js";
|
||||||
import { registerHealthRoutes } from "./routes/health.js";
|
import { registerHealthRoutes } from "./routes/health.js";
|
||||||
@@ -9,8 +10,16 @@ import { registerServerRoutes } from "./routes/servers.js";
|
|||||||
import { registerAuthRoutes } from "./routes/auth.js";
|
import { registerAuthRoutes } from "./routes/auth.js";
|
||||||
import { registerAgentRoutes } from "./routes/agents.js";
|
import { registerAgentRoutes } from "./routes/agents.js";
|
||||||
import { registerBastionRoutes } from "./routes/bastions.js";
|
import { registerBastionRoutes } from "./routes/bastions.js";
|
||||||
|
import { registerV2AuthRoutes } from "./routes/v2-auth.js";
|
||||||
|
import { registerEnvironmentRoutes } from "./routes/environments.js";
|
||||||
|
import { registerResourceRoutes } from "./routes/resources.js";
|
||||||
import { setupRateLimiting } from "./middleware/rate-limit.js";
|
import { setupRateLimiting } from "./middleware/rate-limit.js";
|
||||||
|
import { createBearerAuthMiddleware } from "./middleware/bearer-auth.js";
|
||||||
import { bastionRegistry } from "./services/bastion-registry.js";
|
import { bastionRegistry } from "./services/bastion-registry.js";
|
||||||
|
import { AuthService } from "./services/auth.js";
|
||||||
|
import { RbacService } from "./services/rbac.js";
|
||||||
|
import { ResourceStore } from "./services/resource-store.js";
|
||||||
|
import { AuditService } from "./services/audit.js";
|
||||||
import { isBastionMessage } from "@lab/shared";
|
import { isBastionMessage } from "@lab/shared";
|
||||||
|
|
||||||
export interface DbClient {
|
export interface DbClient {
|
||||||
@@ -37,6 +46,7 @@ export interface DbClient {
|
|||||||
|
|
||||||
export async function createApp(_config: LabdConfig, db: DbClient): Promise<{
|
export async function createApp(_config: LabdConfig, db: DbClient): Promise<{
|
||||||
app: ReturnType<typeof Fastify>;
|
app: ReturnType<typeof Fastify>;
|
||||||
|
auditService: AuditService;
|
||||||
}> {
|
}> {
|
||||||
const app = Fastify({
|
const app = Fastify({
|
||||||
logger: false, // We use winston instead
|
logger: false, // We use winston instead
|
||||||
@@ -48,13 +58,39 @@ export async function createApp(_config: LabdConfig, db: DbClient): Promise<{
|
|||||||
// Register WebSocket support
|
// Register WebSocket support
|
||||||
void app.register(websocket);
|
void app.register(websocket);
|
||||||
|
|
||||||
// Register route handlers
|
// v2 services. The structural DbClient is a subset of the real PrismaClient;
|
||||||
|
// at runtime db IS the PrismaClient instance, so the cast is safe. Tests that
|
||||||
|
// exercise v2 routes provide a PrismaClient-shaped mock (see auth-bootstrap,
|
||||||
|
// rbac-deny, audit-correlation tests).
|
||||||
|
const prisma = db as unknown as PrismaClient;
|
||||||
|
const authService = new AuthService(prisma);
|
||||||
|
const rbacService = new RbacService(prisma);
|
||||||
|
const resourceStore = new ResourceStore(prisma);
|
||||||
|
const auditService = new AuditService(prisma);
|
||||||
|
auditService.start();
|
||||||
|
|
||||||
|
// Register v1 (legacy) route handlers
|
||||||
registerHealthRoutes(app, db);
|
registerHealthRoutes(app, db);
|
||||||
registerServerRoutes(app, db);
|
registerServerRoutes(app, db);
|
||||||
registerAuthRoutes(app, db);
|
registerAuthRoutes(app, db);
|
||||||
registerAgentRoutes(app);
|
registerAgentRoutes(app);
|
||||||
registerBastionRoutes(app, db);
|
registerBastionRoutes(app, db);
|
||||||
|
|
||||||
|
// v2 routes live in a scope with bearer-auth as preHandler. Public paths
|
||||||
|
// (login, /health, websockets) are skipped inside the middleware itself.
|
||||||
|
// v1 routes above are unaffected — they're registered on the root scope.
|
||||||
|
await app.register(async (scope) => {
|
||||||
|
scope.addHook("preHandler", createBearerAuthMiddleware(authService));
|
||||||
|
registerV2AuthRoutes(scope, authService, auditService);
|
||||||
|
registerEnvironmentRoutes(scope, prisma, rbacService, auditService);
|
||||||
|
registerResourceRoutes(scope, resourceStore, rbacService, auditService);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Flush pending audit events on shutdown so we never lose the last batch.
|
||||||
|
app.addHook("onClose", async () => {
|
||||||
|
auditService.stop();
|
||||||
|
});
|
||||||
|
|
||||||
// WebSocket handler for agent connections
|
// WebSocket handler for agent connections
|
||||||
app.register(async (fastify) => {
|
app.register(async (fastify) => {
|
||||||
fastify.get("/ws/agent", { websocket: true }, (socket, _request) => {
|
fastify.get("/ws/agent", { websocket: true }, (socket, _request) => {
|
||||||
@@ -192,7 +228,9 @@ export async function createApp(_config: LabdConfig, db: DbClient): Promise<{
|
|||||||
labels: { cpu: hw.cpu_model, cores: hw.cpu_cores, memory_gb: hw.memory_gb, arch: hw.arch, product: hw.product, manufacturer: hw.manufacturer },
|
labels: { cpu: hw.cpu_model, cores: hw.cpu_cores, memory_gb: hw.memory_gb, arch: hw.arch, product: hw.product, manufacturer: hw.manufacturer },
|
||||||
},
|
},
|
||||||
update: {
|
update: {
|
||||||
status: "discovered",
|
// Leave status alone — a previously "online"/"offline" record
|
||||||
|
// must not be downgraded to "discovered" just because the bastion
|
||||||
|
// restarted and re-discovered the MAC via DHCP/PXE.
|
||||||
lastHeartbeat: new Date(),
|
lastHeartbeat: new Date(),
|
||||||
labels: { cpu: hw.cpu_model, cores: hw.cpu_cores, memory_gb: hw.memory_gb, arch: hw.arch, product: hw.product, manufacturer: hw.manufacturer },
|
labels: { cpu: hw.cpu_model, cores: hw.cpu_cores, memory_gb: hw.memory_gb, arch: hw.arch, product: hw.product, manufacturer: hw.manufacturer },
|
||||||
},
|
},
|
||||||
@@ -265,5 +303,5 @@ export async function createApp(_config: LabdConfig, db: DbClient): Promise<{
|
|||||||
logger.info(`HTTP: ${request.ip} ${request.method} ${request.url}`);
|
logger.info(`HTTP: ${request.ip} ${request.method} ${request.url}`);
|
||||||
});
|
});
|
||||||
|
|
||||||
return { app };
|
return { app, auditService };
|
||||||
}
|
}
|
||||||
|
|||||||
106
bastion/src/labd/src/services/audit.ts
Normal file
106
bastion/src/labd/src/services/audit.ts
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
// Audit service: fire-and-forget event collection with batching.
|
||||||
|
// Batches 50 events or flushes every 5 seconds, whichever comes first.
|
||||||
|
// Failures never block the operation being audited.
|
||||||
|
|
||||||
|
import { randomBytes } from "node:crypto";
|
||||||
|
import type { PrismaClient, Prisma } from "@prisma/client";
|
||||||
|
import { logger } from "./logger.js";
|
||||||
|
|
||||||
|
const BATCH_SIZE = 50;
|
||||||
|
const FLUSH_INTERVAL_MS = 5_000;
|
||||||
|
|
||||||
|
export interface AuditEventInput {
|
||||||
|
eventKind: string;
|
||||||
|
source: string;
|
||||||
|
verified?: boolean;
|
||||||
|
userId?: string | null;
|
||||||
|
userName?: string | null;
|
||||||
|
sessionId?: string | null;
|
||||||
|
environmentName?: string | null;
|
||||||
|
accountName?: string | null;
|
||||||
|
resourceKind?: string | null;
|
||||||
|
resourceName?: string | null;
|
||||||
|
correlationId?: string | null;
|
||||||
|
parentEventId?: string | null;
|
||||||
|
details?: Record<string, unknown>;
|
||||||
|
result: string;
|
||||||
|
error?: string | null;
|
||||||
|
durationMs?: number | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export class AuditService {
|
||||||
|
private batch: AuditEventInput[] = [];
|
||||||
|
private timer: ReturnType<typeof setInterval> | null = null;
|
||||||
|
|
||||||
|
constructor(private readonly db: PrismaClient) {}
|
||||||
|
|
||||||
|
start(): void {
|
||||||
|
this.timer = setInterval(() => {
|
||||||
|
void this.flush();
|
||||||
|
}, FLUSH_INTERVAL_MS);
|
||||||
|
}
|
||||||
|
|
||||||
|
stop(): void {
|
||||||
|
if (this.timer) {
|
||||||
|
clearInterval(this.timer);
|
||||||
|
this.timer = null;
|
||||||
|
}
|
||||||
|
void this.flush();
|
||||||
|
}
|
||||||
|
|
||||||
|
emit(event: AuditEventInput): void {
|
||||||
|
// Generate correlation ID if not provided
|
||||||
|
if (!event.correlationId) {
|
||||||
|
event.correlationId = `corr_${randomBytes(8).toString("hex")}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.batch.push(event);
|
||||||
|
|
||||||
|
if (this.batch.length >= BATCH_SIZE) {
|
||||||
|
void this.flush();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Create a correlation context for a chain of related events. */
|
||||||
|
createCorrelation(): string {
|
||||||
|
return `corr_${randomBytes(8).toString("hex")}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Flush all pending events synchronously. Tests await this; production
|
||||||
|
* relies on the interval timer or stop() during shutdown. */
|
||||||
|
async flushPending(): Promise<void> {
|
||||||
|
await this.flush();
|
||||||
|
}
|
||||||
|
|
||||||
|
private async flush(): Promise<void> {
|
||||||
|
if (this.batch.length === 0) return;
|
||||||
|
|
||||||
|
const events = this.batch.splice(0);
|
||||||
|
try {
|
||||||
|
await this.db.auditEvent.createMany({
|
||||||
|
data: events.map((e) => ({
|
||||||
|
eventKind: e.eventKind,
|
||||||
|
source: e.source,
|
||||||
|
verified: e.verified ?? false,
|
||||||
|
userId: e.userId ?? null,
|
||||||
|
userName: e.userName ?? null,
|
||||||
|
sessionId: e.sessionId ?? null,
|
||||||
|
environmentName: e.environmentName ?? null,
|
||||||
|
accountName: e.accountName ?? null,
|
||||||
|
resourceKind: e.resourceKind ?? null,
|
||||||
|
resourceName: e.resourceName ?? null,
|
||||||
|
correlationId: e.correlationId ?? `corr_${randomBytes(8).toString("hex")}`,
|
||||||
|
parentEventId: e.parentEventId ?? null,
|
||||||
|
details: (e.details ?? {}) as Prisma.InputJsonValue,
|
||||||
|
result: e.result,
|
||||||
|
error: e.error ?? null,
|
||||||
|
durationMs: e.durationMs ?? null,
|
||||||
|
})),
|
||||||
|
});
|
||||||
|
logger.info(`AUDIT: flushed ${events.length} events`);
|
||||||
|
} catch (err) {
|
||||||
|
// Fire-and-forget: audit failures never block operations
|
||||||
|
logger.warn(`AUDIT: failed to flush ${events.length} events: ${err instanceof Error ? err.message : String(err)}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
119
bastion/src/labd/src/services/auth.ts
Normal file
119
bastion/src/labd/src/services/auth.ts
Normal file
@@ -0,0 +1,119 @@
|
|||||||
|
// Auth service: bearer token authentication with bootstrap flow.
|
||||||
|
// First login creates the admin user. Subsequent logins return session tokens.
|
||||||
|
|
||||||
|
import { randomBytes } from "node:crypto";
|
||||||
|
import bcrypt from "bcryptjs";
|
||||||
|
import type { PrismaClient } from "@prisma/client";
|
||||||
|
import { logger } from "./logger.js";
|
||||||
|
|
||||||
|
const SESSION_EXPIRY_DAYS = 30;
|
||||||
|
const BCRYPT_ROUNDS = 12;
|
||||||
|
|
||||||
|
export interface LoginResult {
|
||||||
|
token: string;
|
||||||
|
expiresAt: Date;
|
||||||
|
userId: string;
|
||||||
|
isBootstrap: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
export class AuthService {
|
||||||
|
constructor(private readonly db: PrismaClient) {}
|
||||||
|
|
||||||
|
async login(email: string, password: string): Promise<LoginResult> {
|
||||||
|
const userCount = await this.db.user.count();
|
||||||
|
|
||||||
|
// Bootstrap: first login creates admin user
|
||||||
|
if (userCount === 0) {
|
||||||
|
return this.bootstrap(email, password);
|
||||||
|
}
|
||||||
|
|
||||||
|
const user = await this.db.user.findUnique({ where: { email } });
|
||||||
|
if (!user) {
|
||||||
|
// Same error for unknown user and wrong password (no enumeration)
|
||||||
|
throw new AuthError("Invalid email or password");
|
||||||
|
}
|
||||||
|
|
||||||
|
const valid = await bcrypt.compare(password, user.password);
|
||||||
|
if (!valid) {
|
||||||
|
throw new AuthError("Invalid email or password");
|
||||||
|
}
|
||||||
|
|
||||||
|
const session = await this.createSession(user.id);
|
||||||
|
logger.info(`AUTH LOGIN: ${email} (${user.id.slice(0, 8)}...)`);
|
||||||
|
|
||||||
|
return {
|
||||||
|
token: session.token,
|
||||||
|
expiresAt: session.expiresAt,
|
||||||
|
userId: user.id,
|
||||||
|
isBootstrap: false,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async logout(token: string): Promise<void> {
|
||||||
|
const session = await this.db.session.findUnique({ where: { token } });
|
||||||
|
if (!session) {
|
||||||
|
throw new AuthError("Invalid session");
|
||||||
|
}
|
||||||
|
await this.db.session.delete({ where: { id: session.id } });
|
||||||
|
logger.info(`AUTH LOGOUT: session ${session.id.slice(0, 8)}...`);
|
||||||
|
}
|
||||||
|
|
||||||
|
async validateToken(token: string): Promise<{ userId: string; email: string; role: string }> {
|
||||||
|
const session = await this.db.session.findUnique({
|
||||||
|
where: { token },
|
||||||
|
include: { user: true },
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!session) {
|
||||||
|
throw new AuthError("Invalid token");
|
||||||
|
}
|
||||||
|
if (session.expiresAt < new Date()) {
|
||||||
|
await this.db.session.delete({ where: { id: session.id } });
|
||||||
|
throw new AuthError("Token expired");
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
userId: session.user.id,
|
||||||
|
email: session.user.email,
|
||||||
|
role: session.user.role,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private async bootstrap(email: string, password: string): Promise<LoginResult> {
|
||||||
|
const hashed = await bcrypt.hash(password, BCRYPT_ROUNDS);
|
||||||
|
const user = await this.db.user.create({
|
||||||
|
data: {
|
||||||
|
email,
|
||||||
|
password: hashed,
|
||||||
|
role: "ADMIN",
|
||||||
|
name: email.split("@")[0] ?? null,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const session = await this.createSession(user.id);
|
||||||
|
logger.info(`AUTH BOOTSTRAP: created admin user ${email} (${user.id.slice(0, 8)}...)`);
|
||||||
|
|
||||||
|
return {
|
||||||
|
token: session.token,
|
||||||
|
expiresAt: session.expiresAt,
|
||||||
|
userId: user.id,
|
||||||
|
isBootstrap: true,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private async createSession(userId: string) {
|
||||||
|
const token = randomBytes(32).toString("hex");
|
||||||
|
const expiresAt = new Date(Date.now() + SESSION_EXPIRY_DAYS * 24 * 60 * 60 * 1000);
|
||||||
|
|
||||||
|
return this.db.session.create({
|
||||||
|
data: { userId, token, expiresAt },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export class AuthError extends Error {
|
||||||
|
constructor(message: string) {
|
||||||
|
super(message);
|
||||||
|
this.name = "AuthError";
|
||||||
|
}
|
||||||
|
}
|
||||||
123
bastion/src/labd/src/services/rbac.ts
Normal file
123
bastion/src/labd/src/services/rbac.ts
Normal file
@@ -0,0 +1,123 @@
|
|||||||
|
// RBAC service: environment-scoped permission checks.
|
||||||
|
// Uses named RbacDefinition records with JSON subjects and roleBindings.
|
||||||
|
//
|
||||||
|
// Resolution flow:
|
||||||
|
// 1. Find all RbacDefinitions where subjects match the current user/groups
|
||||||
|
// 2. Collect all roleBindings from matching definitions
|
||||||
|
// 3. Check if any binding grants the requested action on the requested resource
|
||||||
|
|
||||||
|
import type { PrismaClient } from "@prisma/client";
|
||||||
|
import { logger } from "./logger.js";
|
||||||
|
|
||||||
|
export interface RbacCheck {
|
||||||
|
userId: string;
|
||||||
|
userEmail: string;
|
||||||
|
userRole: string;
|
||||||
|
action: string; // "view" | "edit" | "create" | "delete" | "run" | "admin"
|
||||||
|
resource?: string | undefined; // "servers" | "databases" | "clusters" | "*"
|
||||||
|
name?: string | undefined; // specific resource name
|
||||||
|
environment?: string | undefined; // specific environment name
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface RbacResult {
|
||||||
|
allowed: boolean;
|
||||||
|
reason: string;
|
||||||
|
matchedDefinition?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface StoredSubject {
|
||||||
|
kind: string;
|
||||||
|
name: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface StoredBinding {
|
||||||
|
role: string;
|
||||||
|
resource?: string;
|
||||||
|
name?: string;
|
||||||
|
environment?: string;
|
||||||
|
action?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export class RbacService {
|
||||||
|
constructor(private readonly db: PrismaClient) {}
|
||||||
|
|
||||||
|
async check(req: RbacCheck): Promise<RbacResult> {
|
||||||
|
// Admin users bypass RBAC
|
||||||
|
if (req.userRole === "ADMIN") {
|
||||||
|
return { allowed: true, reason: "admin role" };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Collect user's group memberships
|
||||||
|
const memberships = await this.db.groupMember.findMany({
|
||||||
|
where: { userId: req.userId },
|
||||||
|
include: { group: true },
|
||||||
|
});
|
||||||
|
const groupNames = memberships.map((m) => m.group.name);
|
||||||
|
|
||||||
|
// Find all RBAC definitions
|
||||||
|
const definitions = await this.db.rbacDefinition.findMany();
|
||||||
|
|
||||||
|
for (const def of definitions) {
|
||||||
|
const subjects = def.subjects as unknown as StoredSubject[];
|
||||||
|
const bindings = def.roleBindings as unknown as StoredBinding[];
|
||||||
|
|
||||||
|
// Check if this definition's subjects match the user
|
||||||
|
const subjectMatch = subjects.some((s) => {
|
||||||
|
if (s.kind === "User" && s.name === req.userEmail) return true;
|
||||||
|
if (s.kind === "Group" && groupNames.includes(s.name)) return true;
|
||||||
|
return false;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!subjectMatch) continue;
|
||||||
|
|
||||||
|
// Check if any binding grants the requested permission
|
||||||
|
for (const binding of bindings) {
|
||||||
|
if (this.bindingMatches(binding, req)) {
|
||||||
|
logger.info(`RBAC ALLOW: ${req.userEmail} ${req.action} ${req.resource ?? "*"}${req.name ? `/${req.name}` : ""} via ${def.name}`);
|
||||||
|
return {
|
||||||
|
allowed: true,
|
||||||
|
reason: `granted by ${def.name}`,
|
||||||
|
matchedDefinition: def.name,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(`RBAC DENY: ${req.userEmail} ${req.action} ${req.resource ?? "*"}${req.name ? `/${req.name}` : ""}`);
|
||||||
|
return {
|
||||||
|
allowed: false,
|
||||||
|
reason: `no matching role binding for ${req.action} on ${req.resource ?? "*"}`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private bindingMatches(binding: StoredBinding, req: RbacCheck): boolean {
|
||||||
|
// Check role grants the action
|
||||||
|
if (!this.roleGrantsAction(binding.role, req.action)) return false;
|
||||||
|
|
||||||
|
// Check resource scope
|
||||||
|
if (binding.resource && binding.resource !== "*" && binding.resource !== req.resource) return false;
|
||||||
|
|
||||||
|
// Check name scope
|
||||||
|
if (binding.name && binding.name !== req.name) return false;
|
||||||
|
|
||||||
|
// Check environment scope
|
||||||
|
if (binding.environment && binding.environment !== req.environment) return false;
|
||||||
|
|
||||||
|
// Check operation scope (for "run" role with specific actions)
|
||||||
|
if (binding.action && binding.action !== "*" && binding.action !== req.action) return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private roleGrantsAction(role: string, action: string): boolean {
|
||||||
|
const grants: Record<string, string[]> = {
|
||||||
|
admin: ["view", "edit", "create", "delete", "run", "admin"],
|
||||||
|
edit: ["view", "edit", "create", "delete"],
|
||||||
|
create: ["create"],
|
||||||
|
delete: ["delete"],
|
||||||
|
view: ["view"],
|
||||||
|
run: ["run"],
|
||||||
|
};
|
||||||
|
return grants[role]?.includes(action) ?? false;
|
||||||
|
}
|
||||||
|
}
|
||||||
108
bastion/src/labd/src/services/resource-store.ts
Normal file
108
bastion/src/labd/src/services/resource-store.ts
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
// Resource store: CRUD for generic resources with origin/managedBy tracking.
|
||||||
|
// All mutations go through this service so RBAC and audit are applied consistently.
|
||||||
|
|
||||||
|
import type { PrismaClient, Resource as PrismaResource, Prisma } from "@prisma/client";
|
||||||
|
import { logger } from "./logger.js";
|
||||||
|
|
||||||
|
export interface CreateResourceInput {
|
||||||
|
kind: string;
|
||||||
|
name: string;
|
||||||
|
environmentId: string;
|
||||||
|
accountId: string;
|
||||||
|
origin?: string;
|
||||||
|
managedBy?: string;
|
||||||
|
sourceRef?: string;
|
||||||
|
desiredSpec: Record<string, unknown>;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface UpdateResourceInput {
|
||||||
|
desiredSpec?: Record<string, unknown>;
|
||||||
|
status?: string;
|
||||||
|
statusMessage?: string;
|
||||||
|
actualSpec?: Record<string, unknown>;
|
||||||
|
platformRef?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ListResourcesFilter {
|
||||||
|
kind?: string | undefined;
|
||||||
|
environmentId?: string | undefined;
|
||||||
|
accountId?: string | undefined;
|
||||||
|
status?: string | undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
export class ResourceStore {
|
||||||
|
constructor(private readonly db: PrismaClient) {}
|
||||||
|
|
||||||
|
async create(input: CreateResourceInput): Promise<PrismaResource> {
|
||||||
|
const resource = await this.db.resource.create({
|
||||||
|
data: {
|
||||||
|
kind: input.kind,
|
||||||
|
name: input.name,
|
||||||
|
environmentId: input.environmentId,
|
||||||
|
accountId: input.accountId,
|
||||||
|
origin: input.origin ?? "cli",
|
||||||
|
managedBy: input.managedBy ?? "manual",
|
||||||
|
sourceRef: input.sourceRef ?? null,
|
||||||
|
desiredSpec: input.desiredSpec as Prisma.InputJsonValue,
|
||||||
|
status: "pending",
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
logger.info(`RESOURCE CREATED: ${input.kind}/${input.name} in env ${input.environmentId.slice(0, 8)}...`);
|
||||||
|
return resource;
|
||||||
|
}
|
||||||
|
|
||||||
|
async get(id: string): Promise<PrismaResource | null> {
|
||||||
|
return this.db.resource.findUnique({ where: { id } });
|
||||||
|
}
|
||||||
|
|
||||||
|
async getByKindNameEnv(kind: string, name: string, environmentId: string): Promise<PrismaResource | null> {
|
||||||
|
return this.db.resource.findUnique({
|
||||||
|
where: { kind_name_environmentId: { kind, name, environmentId } },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async list(filter: ListResourcesFilter = {}): Promise<PrismaResource[]> {
|
||||||
|
return this.db.resource.findMany({
|
||||||
|
where: {
|
||||||
|
...(filter.kind ? { kind: filter.kind } : {}),
|
||||||
|
...(filter.environmentId ? { environmentId: filter.environmentId } : {}),
|
||||||
|
...(filter.accountId ? { accountId: filter.accountId } : {}),
|
||||||
|
...(filter.status ? { status: filter.status } : {}),
|
||||||
|
},
|
||||||
|
orderBy: { createdAt: "desc" },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async update(id: string, input: UpdateResourceInput): Promise<PrismaResource> {
|
||||||
|
const data: Prisma.ResourceUpdateInput = {};
|
||||||
|
if (input.desiredSpec !== undefined) data.desiredSpec = input.desiredSpec as Prisma.InputJsonValue;
|
||||||
|
if (input.status !== undefined) data.status = input.status;
|
||||||
|
if (input.statusMessage !== undefined) data.statusMessage = input.statusMessage;
|
||||||
|
if (input.actualSpec !== undefined) data.actualSpec = input.actualSpec as Prisma.InputJsonValue;
|
||||||
|
if (input.platformRef !== undefined) data.platformRef = input.platformRef;
|
||||||
|
if (input.status === "ready") data.lastReconciled = new Date();
|
||||||
|
|
||||||
|
const resource = await this.db.resource.update({ where: { id }, data });
|
||||||
|
|
||||||
|
logger.info(`RESOURCE UPDATED: ${resource.kind}/${resource.name} -> ${input.status ?? "spec change"}`);
|
||||||
|
return resource;
|
||||||
|
}
|
||||||
|
|
||||||
|
async delete(id: string): Promise<void> {
|
||||||
|
const resource = await this.db.resource.findUnique({ where: { id } });
|
||||||
|
if (!resource) return;
|
||||||
|
|
||||||
|
// Mark as deleting first (driver handles actual deletion)
|
||||||
|
await this.db.resource.update({
|
||||||
|
where: { id },
|
||||||
|
data: { status: "deleting" },
|
||||||
|
});
|
||||||
|
|
||||||
|
logger.info(`RESOURCE DELETING: ${resource.kind}/${resource.name}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
async hardDelete(id: string): Promise<void> {
|
||||||
|
await this.db.resource.delete({ where: { id } });
|
||||||
|
}
|
||||||
|
}
|
||||||
144
bastion/src/labd/tests/bastions-machines.test.ts
Normal file
144
bastion/src/labd/tests/bastions-machines.test.ts
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
import { describe, it, expect, vi, beforeEach } from "vitest";
|
||||||
|
import Fastify from "fastify";
|
||||||
|
import { registerBastionRoutes } from "../src/routes/bastions.js";
|
||||||
|
import { bastionRegistry } from "../src/services/bastion-registry.js";
|
||||||
|
import type { DbClient } from "../src/server.js";
|
||||||
|
import type { BastionState } from "@lab/shared";
|
||||||
|
|
||||||
|
function createMockDb(servers: unknown[] = []): DbClient {
|
||||||
|
return {
|
||||||
|
$queryRaw: vi.fn().mockResolvedValue([{ "?column?": 1 }]),
|
||||||
|
server: {
|
||||||
|
findMany: vi.fn().mockResolvedValue(servers),
|
||||||
|
findUnique: vi.fn().mockResolvedValue(null),
|
||||||
|
upsert: vi.fn().mockResolvedValue({}),
|
||||||
|
},
|
||||||
|
joinToken: {
|
||||||
|
findUnique: vi.fn().mockResolvedValue(null),
|
||||||
|
findMany: vi.fn().mockResolvedValue([]),
|
||||||
|
create: vi.fn().mockResolvedValue({ id: "t" }),
|
||||||
|
update: vi.fn().mockResolvedValue({}),
|
||||||
|
},
|
||||||
|
bastion: {
|
||||||
|
upsert: vi.fn().mockResolvedValue({}),
|
||||||
|
findMany: vi.fn().mockResolvedValue([]),
|
||||||
|
findUnique: vi.fn().mockResolvedValue(null),
|
||||||
|
update: vi.fn().mockResolvedValue({}),
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function registerFakeBastion(bastionId: string, state: BastionState): void {
|
||||||
|
bastionRegistry.register({
|
||||||
|
bastionId,
|
||||||
|
hostname: "fake",
|
||||||
|
network: "192.168.8.0/24",
|
||||||
|
serverIp: "192.168.8.11",
|
||||||
|
// socket is referenced only on commands, not during aggregation
|
||||||
|
socket: { on: () => undefined, off: () => undefined, send: () => undefined, close: () => undefined } as never,
|
||||||
|
connectedAt: new Date(),
|
||||||
|
lastHeartbeat: new Date(),
|
||||||
|
state,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
describe("GET /api/machines aggregation", () => {
|
||||||
|
beforeEach(() => {
|
||||||
|
for (const b of bastionRegistry.getAll()) bastionRegistry.unregister(b.bastionId);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("promotes a live-discovered MAC to installed when the DB has a real hostname+role for it", async () => {
|
||||||
|
// Simulates the worker0-k8s0 bug: bastion restarted, lost its installed map,
|
||||||
|
// rediscovered the machine via DHCP/PXE. DB still has hostname=worker0-k8s0,
|
||||||
|
// role=infra, ip=192.168.8.23. Without the fix, the CLI sees a "discovered"
|
||||||
|
// row with no hostname/role/IP. With the fix, the row is promoted to
|
||||||
|
// "installed" with full identity preserved.
|
||||||
|
const mac = "78:55:36:08:28:fb";
|
||||||
|
registerFakeBastion("b1", {
|
||||||
|
discovered: {
|
||||||
|
[mac]: {
|
||||||
|
mac, product: "SER", board: "SER", serial: "x", manufacturer: "AZW",
|
||||||
|
cpu_model: "AMD Ryzen 7 255", cpu_cores: 16, memory_gb: 58, arch: "x86_64",
|
||||||
|
disks: [], nics: [], first_seen: "", last_seen: "",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
install_queue: {},
|
||||||
|
installed: {},
|
||||||
|
debug: {},
|
||||||
|
});
|
||||||
|
|
||||||
|
const app = Fastify({ logger: false });
|
||||||
|
const db = createMockDb([
|
||||||
|
{ mac, hostname: "worker0-k8s0", role: "infra", ip: "192.168.8.23", status: "discovered", labels: {} },
|
||||||
|
]);
|
||||||
|
registerBastionRoutes(app, db);
|
||||||
|
|
||||||
|
const res = await app.inject({ method: "GET", url: "/api/machines" });
|
||||||
|
expect(res.statusCode).toBe(200);
|
||||||
|
const body = JSON.parse(res.body);
|
||||||
|
|
||||||
|
expect(body.discovered[mac]).toBeUndefined();
|
||||||
|
expect(body.installed[mac]).toMatchObject({
|
||||||
|
hostname: "worker0-k8s0",
|
||||||
|
role: "infra",
|
||||||
|
ip: "192.168.8.23",
|
||||||
|
cpu_model: "AMD Ryzen 7 255",
|
||||||
|
cpu_cores: 16,
|
||||||
|
memory_gb: 58,
|
||||||
|
});
|
||||||
|
|
||||||
|
await app.close();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("leaves a fresh-discovery MAC in discovered when DB only has a discovery-shaped record", async () => {
|
||||||
|
const mac = "aa:bb:cc:dd:ee:ff";
|
||||||
|
registerFakeBastion("b1", {
|
||||||
|
discovered: {
|
||||||
|
[mac]: {
|
||||||
|
mac, product: "SER", board: "SER", serial: "x", manufacturer: "AZW",
|
||||||
|
cpu_model: "AMD Ryzen 7", cpu_cores: 8, memory_gb: 32, arch: "x86_64",
|
||||||
|
disks: [], nics: [], first_seen: "", last_seen: "",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
install_queue: {},
|
||||||
|
installed: {},
|
||||||
|
debug: {},
|
||||||
|
});
|
||||||
|
|
||||||
|
const app = Fastify({ logger: false });
|
||||||
|
// Matches what labd writes on first discovery: hostname=product, role="unknown"
|
||||||
|
const db = createMockDb([
|
||||||
|
{ mac, hostname: "SER", role: "unknown", ip: null, status: "discovered", labels: {} },
|
||||||
|
]);
|
||||||
|
registerBastionRoutes(app, db);
|
||||||
|
|
||||||
|
const res = await app.inject({ method: "GET", url: "/api/machines" });
|
||||||
|
const body = JSON.parse(res.body);
|
||||||
|
|
||||||
|
expect(body.discovered[mac]).toBeDefined();
|
||||||
|
expect(body.installed[mac]).toBeUndefined();
|
||||||
|
|
||||||
|
await app.close();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("falls back to DB for MACs not in any live bucket", async () => {
|
||||||
|
const mac = "11:22:33:44:55:66";
|
||||||
|
// No bastions connected
|
||||||
|
const app = Fastify({ logger: false });
|
||||||
|
const db = createMockDb([
|
||||||
|
{ mac, hostname: "worker1-k8s0", role: "infra", ip: "192.168.8.13", status: "online", labels: {} },
|
||||||
|
]);
|
||||||
|
registerBastionRoutes(app, db);
|
||||||
|
|
||||||
|
const res = await app.inject({ method: "GET", url: "/api/machines" });
|
||||||
|
const body = JSON.parse(res.body);
|
||||||
|
|
||||||
|
expect(body.installed[mac]).toMatchObject({
|
||||||
|
hostname: "worker1-k8s0",
|
||||||
|
role: "infra",
|
||||||
|
ip: "192.168.8.13",
|
||||||
|
});
|
||||||
|
|
||||||
|
await app.close();
|
||||||
|
});
|
||||||
|
});
|
||||||
425
bastion/src/labd/tests/v2-smoke.test.ts
Normal file
425
bastion/src/labd/tests/v2-smoke.test.ts
Normal file
@@ -0,0 +1,425 @@
|
|||||||
|
// End-to-end smoke tests for the v2.0 Phase 1 surface (auth bootstrap, RBAC,
|
||||||
|
// audit correlation). These exercise the wiring in createApp(): the bearer
|
||||||
|
// auth middleware, the v2 routes scope, and the AuditService lifecycle.
|
||||||
|
//
|
||||||
|
// We don't spin up CockroachDB. Instead we provide a PrismaClient-shaped
|
||||||
|
// in-memory mock that matches the surface the v2 services actually touch.
|
||||||
|
// Tests follow the project convention of using mock DBs + Fastify.inject().
|
||||||
|
|
||||||
|
import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
|
||||||
|
import bcrypt from "bcryptjs";
|
||||||
|
import { createApp } from "../src/server.js";
|
||||||
|
import type { DbClient } from "../src/server.js";
|
||||||
|
import type { AuditService } from "../src/services/audit.js";
|
||||||
|
|
||||||
|
const TEST_CONFIG = { port: 0, host: "127.0.0.1", databaseUrl: "", caDir: "/tmp", logLevel: "silent" };
|
||||||
|
|
||||||
|
interface UserRow { id: string; email: string; password: string; role: string; name: string | null; }
|
||||||
|
interface SessionRow { id: string; userId: string; token: string; expiresAt: Date; user?: UserRow; }
|
||||||
|
interface RbacDefRow { id: string; name: string; subjects: unknown; roleBindings: unknown; }
|
||||||
|
interface AuditEventRow {
|
||||||
|
id: string;
|
||||||
|
eventKind: string;
|
||||||
|
source: string;
|
||||||
|
verified: boolean;
|
||||||
|
userId: string | null;
|
||||||
|
userName: string | null;
|
||||||
|
environmentName: string | null;
|
||||||
|
resourceKind: string | null;
|
||||||
|
correlationId: string | null;
|
||||||
|
parentEventId: string | null;
|
||||||
|
details: unknown;
|
||||||
|
result: string;
|
||||||
|
error: string | null;
|
||||||
|
durationMs: number | null;
|
||||||
|
timestamp: Date;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface Stores {
|
||||||
|
users: Map<string, UserRow>;
|
||||||
|
sessions: Map<string, SessionRow>;
|
||||||
|
groupMembers: Array<{ userId: string; group: { name: string } }>;
|
||||||
|
rbacDefs: RbacDefRow[];
|
||||||
|
auditEvents: AuditEventRow[];
|
||||||
|
resources: Array<Record<string, unknown>>;
|
||||||
|
}
|
||||||
|
|
||||||
|
function makeStores(): Stores {
|
||||||
|
return {
|
||||||
|
users: new Map(),
|
||||||
|
sessions: new Map(),
|
||||||
|
groupMembers: [],
|
||||||
|
rbacDefs: [],
|
||||||
|
auditEvents: [],
|
||||||
|
resources: [],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function makeMockDb(s: Stores): DbClient {
|
||||||
|
let idCounter = 0;
|
||||||
|
const newId = (prefix: string): string => `${prefix}-${++idCounter}`;
|
||||||
|
|
||||||
|
return {
|
||||||
|
$queryRaw: vi.fn(async () => [{ "?column?": 1 }]),
|
||||||
|
server: { findMany: vi.fn(async () => []), findUnique: vi.fn(), upsert: vi.fn() },
|
||||||
|
joinToken: { findUnique: vi.fn(), findMany: vi.fn(), create: vi.fn(), update: vi.fn() },
|
||||||
|
bastion: { upsert: vi.fn(), findMany: vi.fn(), findUnique: vi.fn(), update: vi.fn() },
|
||||||
|
|
||||||
|
user: {
|
||||||
|
count: vi.fn(async () => s.users.size),
|
||||||
|
findUnique: vi.fn(async (args: { where: { email?: string; id?: string } }) => {
|
||||||
|
if (args.where.email) {
|
||||||
|
for (const u of s.users.values()) if (u.email === args.where.email) return u;
|
||||||
|
}
|
||||||
|
if (args.where.id) return s.users.get(args.where.id) ?? null;
|
||||||
|
return null;
|
||||||
|
}),
|
||||||
|
create: vi.fn(async (args: { data: Omit<UserRow, "id"> }) => {
|
||||||
|
const id = newId("user");
|
||||||
|
const row: UserRow = { id, ...args.data };
|
||||||
|
s.users.set(id, row);
|
||||||
|
return row;
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
session: {
|
||||||
|
findUnique: vi.fn(async (args: { where: { token?: string; id?: string }; include?: { user?: boolean } }) => {
|
||||||
|
let session: SessionRow | undefined;
|
||||||
|
if (args.where.token) {
|
||||||
|
for (const sess of s.sessions.values()) if (sess.token === args.where.token) { session = sess; break; }
|
||||||
|
} else if (args.where.id) {
|
||||||
|
session = s.sessions.get(args.where.id);
|
||||||
|
}
|
||||||
|
if (!session) return null;
|
||||||
|
if (args.include?.user) {
|
||||||
|
return { ...session, user: s.users.get(session.userId)! };
|
||||||
|
}
|
||||||
|
return session;
|
||||||
|
}),
|
||||||
|
create: vi.fn(async (args: { data: { userId: string; token: string; expiresAt: Date } }) => {
|
||||||
|
const id = newId("sess");
|
||||||
|
const row: SessionRow = { id, ...args.data };
|
||||||
|
s.sessions.set(id, row);
|
||||||
|
return row;
|
||||||
|
}),
|
||||||
|
delete: vi.fn(async (args: { where: { id: string } }) => {
|
||||||
|
s.sessions.delete(args.where.id);
|
||||||
|
return null;
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
groupMember: {
|
||||||
|
findMany: vi.fn(async (args: { where: { userId: string } }) =>
|
||||||
|
s.groupMembers.filter((m) => m.userId === args.where.userId),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
rbacDefinition: {
|
||||||
|
findMany: vi.fn(async () => s.rbacDefs),
|
||||||
|
},
|
||||||
|
auditEvent: {
|
||||||
|
createMany: vi.fn(async (args: { data: Array<Omit<AuditEventRow, "id" | "timestamp">> }) => {
|
||||||
|
const ts = new Date();
|
||||||
|
for (const e of args.data) {
|
||||||
|
s.auditEvents.push({ id: newId("evt"), timestamp: ts, ...e });
|
||||||
|
}
|
||||||
|
return { count: args.data.length };
|
||||||
|
}),
|
||||||
|
findMany: vi.fn(async (args: { where?: Record<string, unknown>; orderBy?: unknown; take?: number }) => {
|
||||||
|
const where = args.where ?? {};
|
||||||
|
const filtered = s.auditEvents.filter((e) => {
|
||||||
|
if (where["eventKind"] && e.eventKind !== where["eventKind"]) return false;
|
||||||
|
if (where["correlationId"] && e.correlationId !== where["correlationId"]) return false;
|
||||||
|
if (where["environmentName"] && e.environmentName !== where["environmentName"]) return false;
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
return filtered.slice(0, args.take ?? 100);
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
resource: {
|
||||||
|
findMany: vi.fn(async () => s.resources),
|
||||||
|
findUnique: vi.fn(),
|
||||||
|
create: vi.fn(),
|
||||||
|
update: vi.fn(),
|
||||||
|
delete: vi.fn(),
|
||||||
|
},
|
||||||
|
environment: { findMany: vi.fn(async () => []), findUnique: vi.fn(), create: vi.fn() },
|
||||||
|
account: { findMany: vi.fn(async () => []), findUnique: vi.fn(), create: vi.fn() },
|
||||||
|
binding: { findMany: vi.fn(async () => []), create: vi.fn() },
|
||||||
|
} as unknown as DbClient;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function buildApp(s: Stores) {
|
||||||
|
const db = makeMockDb(s);
|
||||||
|
const result = await createApp(TEST_CONFIG, db);
|
||||||
|
await result.app.ready();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
describe("v2 auth: bootstrap flow", () => {
|
||||||
|
let stores: Stores;
|
||||||
|
let app: Awaited<ReturnType<typeof buildApp>>["app"];
|
||||||
|
let auditService: AuditService;
|
||||||
|
|
||||||
|
beforeEach(async () => {
|
||||||
|
stores = makeStores();
|
||||||
|
const built = await buildApp(stores);
|
||||||
|
app = built.app;
|
||||||
|
auditService = built.auditService;
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(async () => {
|
||||||
|
await app.close(); // triggers auditService.stop()
|
||||||
|
});
|
||||||
|
|
||||||
|
it("first login with no users seeds the admin and returns a session token", async () => {
|
||||||
|
expect(stores.users.size).toBe(0);
|
||||||
|
|
||||||
|
const resp = await app.inject({
|
||||||
|
method: "POST",
|
||||||
|
url: "/api/auth/login",
|
||||||
|
payload: { email: "admin@itaz.eu", password: "s3cret-pw" },
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(resp.statusCode).toBe(200);
|
||||||
|
const body = resp.json();
|
||||||
|
expect(body.isBootstrap).toBe(true);
|
||||||
|
expect(body.token).toMatch(/^[a-f0-9]{64}$/);
|
||||||
|
expect(typeof body.expiresAt).toBe("string");
|
||||||
|
|
||||||
|
expect(stores.users.size).toBe(1);
|
||||||
|
const created = [...stores.users.values()][0]!;
|
||||||
|
expect(created.email).toBe("admin@itaz.eu");
|
||||||
|
expect(created.role).toBe("ADMIN");
|
||||||
|
// Password is hashed, not stored plaintext.
|
||||||
|
expect(created.password).not.toBe("s3cret-pw");
|
||||||
|
expect(await bcrypt.compare("s3cret-pw", created.password)).toBe(true);
|
||||||
|
|
||||||
|
// Bootstrap emits an audit event.
|
||||||
|
await auditService.flushPending();
|
||||||
|
const bootstrapEvents = stores.auditEvents.filter((e) => e.eventKind === "auth_bootstrap");
|
||||||
|
expect(bootstrapEvents).toHaveLength(1);
|
||||||
|
expect(bootstrapEvents[0]!.result).toBe("success");
|
||||||
|
expect(bootstrapEvents[0]!.userName).toBe("admin@itaz.eu");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns 400 for missing credentials", async () => {
|
||||||
|
const resp = await app.inject({ method: "POST", url: "/api/auth/login", payload: {} });
|
||||||
|
expect(resp.statusCode).toBe(400);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("second login uses normal flow (no isBootstrap)", async () => {
|
||||||
|
// Bootstrap once
|
||||||
|
await app.inject({
|
||||||
|
method: "POST",
|
||||||
|
url: "/api/auth/login",
|
||||||
|
payload: { email: "admin@itaz.eu", password: "s3cret-pw" },
|
||||||
|
});
|
||||||
|
expect(stores.users.size).toBe(1);
|
||||||
|
|
||||||
|
// Login again
|
||||||
|
const resp = await app.inject({
|
||||||
|
method: "POST",
|
||||||
|
url: "/api/auth/login",
|
||||||
|
payload: { email: "admin@itaz.eu", password: "s3cret-pw" },
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(resp.statusCode).toBe(200);
|
||||||
|
expect(resp.json().isBootstrap).toBe(false);
|
||||||
|
expect(stores.users.size).toBe(1); // no new user
|
||||||
|
});
|
||||||
|
|
||||||
|
it("rejects wrong password with 401", async () => {
|
||||||
|
// Seed admin
|
||||||
|
await app.inject({
|
||||||
|
method: "POST",
|
||||||
|
url: "/api/auth/login",
|
||||||
|
payload: { email: "admin@itaz.eu", password: "s3cret-pw" },
|
||||||
|
});
|
||||||
|
|
||||||
|
const resp = await app.inject({
|
||||||
|
method: "POST",
|
||||||
|
url: "/api/auth/login",
|
||||||
|
payload: { email: "admin@itaz.eu", password: "wrong" },
|
||||||
|
});
|
||||||
|
expect(resp.statusCode).toBe(401);
|
||||||
|
|
||||||
|
// Failed login is also audited.
|
||||||
|
await auditService.flushPending();
|
||||||
|
const fails = stores.auditEvents.filter((e) => e.eventKind === "auth_login" && e.result === "failure");
|
||||||
|
expect(fails).toHaveLength(1);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("v2 RBAC: env-scoped denial", () => {
|
||||||
|
let stores: Stores;
|
||||||
|
let app: Awaited<ReturnType<typeof buildApp>>["app"];
|
||||||
|
|
||||||
|
async function seedSession(role: string): Promise<string> {
|
||||||
|
stores.users.set("u-1", {
|
||||||
|
id: "u-1",
|
||||||
|
email: `${role.toLowerCase()}@itaz.eu`,
|
||||||
|
password: "x",
|
||||||
|
role,
|
||||||
|
name: null,
|
||||||
|
});
|
||||||
|
const token = "test-token-" + role;
|
||||||
|
stores.sessions.set("s-1", {
|
||||||
|
id: "s-1",
|
||||||
|
userId: "u-1",
|
||||||
|
token,
|
||||||
|
expiresAt: new Date(Date.now() + 86_400_000),
|
||||||
|
});
|
||||||
|
return token;
|
||||||
|
}
|
||||||
|
|
||||||
|
beforeEach(async () => {
|
||||||
|
stores = makeStores();
|
||||||
|
app = (await buildApp(stores)).app;
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(async () => {
|
||||||
|
await app.close();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("non-admin user with no role bindings gets 403 on /api/resources", async () => {
|
||||||
|
const token = await seedSession("EDITOR"); // not admin, no bindings
|
||||||
|
|
||||||
|
const resp = await app.inject({
|
||||||
|
method: "GET",
|
||||||
|
url: "/api/resources",
|
||||||
|
headers: { authorization: `Bearer ${token}` },
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(resp.statusCode).toBe(403);
|
||||||
|
expect(resp.json().error).toMatch(/no matching role binding/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("missing/empty bearer token gets 401 (auth, not RBAC)", async () => {
|
||||||
|
const r1 = await app.inject({ method: "GET", url: "/api/resources" });
|
||||||
|
expect(r1.statusCode).toBe(401);
|
||||||
|
|
||||||
|
const r2 = await app.inject({
|
||||||
|
method: "GET",
|
||||||
|
url: "/api/resources",
|
||||||
|
headers: { authorization: "Bearer " },
|
||||||
|
});
|
||||||
|
expect(r2.statusCode).toBe(401);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("invalid bearer token gets 401", async () => {
|
||||||
|
const resp = await app.inject({
|
||||||
|
method: "GET",
|
||||||
|
url: "/api/resources",
|
||||||
|
headers: { authorization: "Bearer not-a-real-token" },
|
||||||
|
});
|
||||||
|
expect(resp.statusCode).toBe(401);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("admin role bypasses RBAC", async () => {
|
||||||
|
const token = await seedSession("ADMIN");
|
||||||
|
|
||||||
|
const resp = await app.inject({
|
||||||
|
method: "GET",
|
||||||
|
url: "/api/resources",
|
||||||
|
headers: { authorization: `Bearer ${token}` },
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(resp.statusCode).toBe(200);
|
||||||
|
expect(resp.json()).toEqual([]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("user with binding for env A is denied for resources in env B", async () => {
|
||||||
|
const token = await seedSession("EDITOR");
|
||||||
|
stores.groupMembers.push({ userId: "u-1", group: { name: "team-a" } });
|
||||||
|
stores.rbacDefs.push({
|
||||||
|
id: "rbac-1",
|
||||||
|
name: "team-a-edit-on-env-a",
|
||||||
|
subjects: [{ kind: "Group", name: "team-a" }],
|
||||||
|
roleBindings: [{ role: "edit", environment: "env-a" }],
|
||||||
|
});
|
||||||
|
|
||||||
|
// List in env-a → should pass RBAC (no env query so it's global view, but
|
||||||
|
// the binding scope is environment-specific → for global list the binding
|
||||||
|
// doesn't apply when an environment scope is set on the binding).
|
||||||
|
// Smoke test the targeted denial: trying to create in env-b is rejected.
|
||||||
|
const respB = await app.inject({
|
||||||
|
method: "POST",
|
||||||
|
url: "/api/resources",
|
||||||
|
headers: { authorization: `Bearer ${token}` },
|
||||||
|
payload: { kind: "database", name: "x", environmentId: "env-b", accountId: "acc-1" },
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(respB.statusCode).toBe(403);
|
||||||
|
expect(respB.json().error).toMatch(/no matching role binding/);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("v2 audit: correlation chain visible via /api/events", () => {
|
||||||
|
let stores: Stores;
|
||||||
|
let app: Awaited<ReturnType<typeof buildApp>>["app"];
|
||||||
|
let auditService: AuditService;
|
||||||
|
|
||||||
|
beforeEach(async () => {
|
||||||
|
stores = makeStores();
|
||||||
|
const built = await buildApp(stores);
|
||||||
|
app = built.app;
|
||||||
|
auditService = built.auditService;
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(async () => {
|
||||||
|
await app.close();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("emitted audit events are queryable by correlation id", async () => {
|
||||||
|
// Seed admin so /api/events is accessible (it sits behind bearer auth)
|
||||||
|
const loginResp = await app.inject({
|
||||||
|
method: "POST",
|
||||||
|
url: "/api/auth/login",
|
||||||
|
payload: { email: "admin@itaz.eu", password: "pw" },
|
||||||
|
});
|
||||||
|
const token = loginResp.json().token;
|
||||||
|
|
||||||
|
// Force flush so the bootstrap event is in the DB
|
||||||
|
await auditService.flushPending();
|
||||||
|
|
||||||
|
expect(stores.auditEvents.length).toBeGreaterThan(0);
|
||||||
|
const bootstrap = stores.auditEvents.find((e) => e.eventKind === "auth_bootstrap")!;
|
||||||
|
expect(bootstrap.correlationId).toMatch(/^corr_[a-f0-9]{16}$/);
|
||||||
|
|
||||||
|
// Query /api/events filtered by correlation id
|
||||||
|
const queryResp = await app.inject({
|
||||||
|
method: "GET",
|
||||||
|
url: `/api/events?correlation=${bootstrap.correlationId}`,
|
||||||
|
headers: { authorization: `Bearer ${token}` },
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(queryResp.statusCode).toBe(200);
|
||||||
|
const events = queryResp.json() as Array<{ correlationId: string; eventKind: string }>;
|
||||||
|
expect(events.length).toBe(1);
|
||||||
|
expect(events[0]!.eventKind).toBe("auth_bootstrap");
|
||||||
|
expect(events[0]!.correlationId).toBe(bootstrap.correlationId);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("explicit parent/child correlation chain is preserved across emits", async () => {
|
||||||
|
const correlationId = auditService.createCorrelation();
|
||||||
|
|
||||||
|
auditService.emit({
|
||||||
|
eventKind: "test_parent",
|
||||||
|
source: "test",
|
||||||
|
result: "success",
|
||||||
|
correlationId,
|
||||||
|
});
|
||||||
|
auditService.emit({
|
||||||
|
eventKind: "test_child",
|
||||||
|
source: "test",
|
||||||
|
result: "success",
|
||||||
|
correlationId,
|
||||||
|
parentEventId: "evt-1",
|
||||||
|
});
|
||||||
|
|
||||||
|
await auditService.flushPending();
|
||||||
|
|
||||||
|
const chain = stores.auditEvents.filter((e) => e.correlationId === correlationId);
|
||||||
|
expect(chain).toHaveLength(2);
|
||||||
|
expect(chain.map((e) => e.eventKind).sort()).toEqual(["test_child", "test_parent"]);
|
||||||
|
expect(chain.find((e) => e.eventKind === "test_child")!.parentEventId).toBe("evt-1");
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -1,19 +1,21 @@
|
|||||||
// Hardening: Pod Security Standards, certificate check, log rotation.
|
// Hardening: Pod Security Standards, certificate check, journald cap, storage.
|
||||||
|
|
||||||
import type { OperationContext, OperationResult, OperationGroup } from "../types.js";
|
import type { OperationContext, OperationResult, OperationGroup } from "../types.js";
|
||||||
import { runSequential } from "../utils.js";
|
import { runSequential } from "../utils.js";
|
||||||
import { applyPodSecurityStandards } from "../operations/pod-security.js";
|
import { applyPodSecurityStandards } from "../operations/pod-security.js";
|
||||||
import { checkCertExpiry } from "../operations/cert-check.js";
|
import { checkCertExpiry } from "../operations/cert-check.js";
|
||||||
import { configureLogRotation } from "../operations/log-rotation.js";
|
import { configureLogRotation } from "../operations/log-rotation.js";
|
||||||
|
import { configureJournaldLimits } from "../operations/journald-limits.js";
|
||||||
import { configureLonghornDisk } from "../operations/longhorn-disk.js";
|
import { configureLonghornDisk } from "../operations/longhorn-disk.js";
|
||||||
|
|
||||||
export const hardeningGroup: OperationGroup = {
|
export const hardeningGroup: OperationGroup = {
|
||||||
name: "hardening",
|
name: "hardening",
|
||||||
description: "Pod security, certificate check, log rotation, storage",
|
description: "Pod security, certificate check, journald cap, storage",
|
||||||
operations: [
|
operations: [
|
||||||
{ name: "Apply Pod Security Standards", fn: applyPodSecurityStandards },
|
{ name: "Apply Pod Security Standards", fn: applyPodSecurityStandards },
|
||||||
{ name: "Check certificate expiry", fn: checkCertExpiry },
|
{ name: "Check certificate expiry", fn: checkCertExpiry },
|
||||||
{ name: "Configure log rotation", fn: configureLogRotation },
|
{ name: "Decommission file-based audit logs", fn: configureLogRotation },
|
||||||
|
{ name: "Configure journald disk cap", fn: configureJournaldLimits },
|
||||||
{ name: "Configure Longhorn disk", fn: configureLonghornDisk },
|
{ name: "Configure Longhorn disk", fn: configureLonghornDisk },
|
||||||
],
|
],
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -76,7 +76,6 @@ sed -i 's/^SELINUX=enforcing/SELINUX=permissive/' /etc/selinux/config 2>/dev/nul
|
|||||||
# ── 5b. Create k3s config directory ──
|
# ── 5b. Create k3s config directory ──
|
||||||
echo "[5/10] Writing k3s server configuration..."
|
echo "[5/10] Writing k3s server configuration..."
|
||||||
mkdir -p /etc/rancher/k3s
|
mkdir -p /etc/rancher/k3s
|
||||||
mkdir -p /var/log/kubernetes
|
|
||||||
|
|
||||||
cat > /etc/rancher/k3s/config.yaml << 'K3S_CONFIG'
|
cat > /etc/rancher/k3s/config.yaml << 'K3S_CONFIG'
|
||||||
# k3s server configuration — CIS hardened
|
# k3s server configuration — CIS hardened
|
||||||
@@ -91,13 +90,10 @@ disable:
|
|||||||
- servicelb
|
- servicelb
|
||||||
- traefik
|
- traefik
|
||||||
|
|
||||||
# API server hardening
|
# API server hardening (audit-log-path=- routes audit to journald via stdout)
|
||||||
kube-apiserver-arg:
|
kube-apiserver-arg:
|
||||||
- "anonymous-auth=false"
|
- "anonymous-auth=false"
|
||||||
- "audit-log-path=/var/log/kubernetes/audit.log"
|
- "audit-log-path=-"
|
||||||
- "audit-log-maxage=30"
|
|
||||||
- "audit-log-maxbackup=10"
|
|
||||||
- "audit-log-maxsize=100"
|
|
||||||
- "audit-policy-file=/etc/rancher/k3s/audit-policy.yaml"
|
- "audit-policy-file=/etc/rancher/k3s/audit-policy.yaml"
|
||||||
- "enable-admission-plugins=NodeRestriction,PodSecurity"
|
- "enable-admission-plugins=NodeRestriction,PodSecurity"
|
||||||
- "request-timeout=300s"
|
- "request-timeout=300s"
|
||||||
|
|||||||
@@ -78,9 +78,10 @@ export class K3sModule implements Module {
|
|||||||
return toModuleResult("install", [...prepResults, ...k3sResults], start);
|
return toModuleResult("install", [...prepResults, ...k3sResults], start);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Phase 3: Networking (server only — agents don't install Cilium)
|
// Phase 3: Networking (initial server only — joining servers get Cilium via daemonset)
|
||||||
let netResults: OperationResult[] = [];
|
let netResults: OperationResult[] = [];
|
||||||
if (isServer) {
|
const isJoiningServer = isServer && !!opCtx.config.k3sServerUrl;
|
||||||
|
if (isServer && !isJoiningServer) {
|
||||||
netResults = await runNetworking(opCtx);
|
netResults = await runNetworking(opCtx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
194
bastion/src/modules/modules/k3s/src/operations/etcd-recover.ts
Normal file
194
bastion/src/modules/modules/k3s/src/operations/etcd-recover.ts
Normal file
@@ -0,0 +1,194 @@
|
|||||||
|
// Recover a broken etcd member by removing it from the cluster, wiping its
|
||||||
|
// local state, and restarting k3s so it rejoins as a fresh member.
|
||||||
|
//
|
||||||
|
// Use case: a node panics on startup with
|
||||||
|
// "tocommit(N+1) is out of range [lastIndex(N)]. Was the raft log corrupted,
|
||||||
|
// truncated, or lost?"
|
||||||
|
// This means the local raft WAL is missing the last entry the leader thinks
|
||||||
|
// the follower acknowledged (lost write, unclean shutdown, etc). The fix is
|
||||||
|
// always the same and well-documented; this codifies it so we don't fumble
|
||||||
|
// the procedure under pressure.
|
||||||
|
//
|
||||||
|
// Preconditions:
|
||||||
|
// - At least one healthy peer is reachable so the cluster has quorum after
|
||||||
|
// we remove the broken member. (For a 3-node cluster: 2 healthy. For a
|
||||||
|
// 5-node: 3 healthy.) If quorum would be lost, this function refuses.
|
||||||
|
// - SSH access to both the broken node and a healthy peer.
|
||||||
|
// - etcdctl available on the healthy peer (k3s does not bundle it; the
|
||||||
|
// procedure installs it on demand on Fedora).
|
||||||
|
|
||||||
|
import type { SshClient } from "../types.js";
|
||||||
|
|
||||||
|
const ETCD_TLS = {
|
||||||
|
ca: "/var/lib/rancher/k3s/server/tls/etcd/server-ca.crt",
|
||||||
|
cert: "/var/lib/rancher/k3s/server/tls/etcd/server-client.crt",
|
||||||
|
key: "/var/lib/rancher/k3s/server/tls/etcd/server-client.key",
|
||||||
|
} as const;
|
||||||
|
|
||||||
|
const SSH_TIMEOUT = 60_000;
|
||||||
|
|
||||||
|
export interface RecoverEtcdMemberOptions {
|
||||||
|
/** SSH client for the broken node (the one panicking). */
|
||||||
|
broken: SshClient;
|
||||||
|
/** SSH client for any healthy server peer in the same cluster. */
|
||||||
|
peer: SshClient;
|
||||||
|
/** Hostname (k8s node name) of the broken node. Used to find its etcd member id. */
|
||||||
|
brokenHostname: string;
|
||||||
|
/** Logger for progress output. */
|
||||||
|
log?: (msg: string) => void;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface RecoverEtcdMemberResult {
|
||||||
|
success: boolean;
|
||||||
|
changed: boolean;
|
||||||
|
message: string;
|
||||||
|
/** New etcd member id assigned after rejoin (when known). */
|
||||||
|
newMemberId?: string;
|
||||||
|
/** Old etcd member id that was removed. */
|
||||||
|
removedMemberId?: string;
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
function etcdctl(subcmd: string): string {
|
||||||
|
return [
|
||||||
|
"ETCDCTL_API=3 etcdctl",
|
||||||
|
`--cacert=${ETCD_TLS.ca}`,
|
||||||
|
`--cert=${ETCD_TLS.cert}`,
|
||||||
|
`--key=${ETCD_TLS.key}`,
|
||||||
|
"--endpoints=https://127.0.0.1:2379",
|
||||||
|
"--command-timeout=10s",
|
||||||
|
subcmd,
|
||||||
|
].join(" ");
|
||||||
|
}
|
||||||
|
|
||||||
|
async function ensureEtcdctl(peer: SshClient): Promise<void> {
|
||||||
|
const probe = await peer.exec("command -v etcdctl 2>/dev/null", { timeoutMs: 5_000 });
|
||||||
|
if (probe.exitCode === 0 && probe.stdout.trim()) return;
|
||||||
|
// Best-effort install on Fedora. If the host isn't dnf-based, surface the
|
||||||
|
// error to the caller via the next etcdctl invocation.
|
||||||
|
await peer.exec("dnf install -y etcd 2>&1", { timeoutMs: 120_000 });
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getMemberList(peer: SshClient): Promise<Array<{ id: string; name: string }>> {
|
||||||
|
const result = await peer.exec(etcdctl("member list"), { timeoutMs: SSH_TIMEOUT });
|
||||||
|
if (result.exitCode !== 0) {
|
||||||
|
throw new Error(`etcdctl member list failed: ${result.stderr || result.stdout}`);
|
||||||
|
}
|
||||||
|
// Format: <hex-id>, started, <name>, <peer-urls>, <client-urls>, <isLearner>
|
||||||
|
return result.stdout
|
||||||
|
.split("\n")
|
||||||
|
.map((line) => line.trim())
|
||||||
|
.filter(Boolean)
|
||||||
|
.map((line) => {
|
||||||
|
const [id, , name] = line.split(",").map((p) => p.trim());
|
||||||
|
return { id: id ?? "", name: name ?? "" };
|
||||||
|
})
|
||||||
|
.filter((m) => m.id);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function recoverEtcdMember(
|
||||||
|
opts: RecoverEtcdMemberOptions,
|
||||||
|
): Promise<RecoverEtcdMemberResult> {
|
||||||
|
const log = opts.log ?? (() => {});
|
||||||
|
|
||||||
|
try {
|
||||||
|
log(`Looking up etcd member id for ${opts.brokenHostname} via peer...`);
|
||||||
|
await ensureEtcdctl(opts.peer);
|
||||||
|
|
||||||
|
const members = await getMemberList(opts.peer);
|
||||||
|
if (members.length < 3) {
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
changed: false,
|
||||||
|
message: "Refusing to remove a member from a cluster with <3 members (quorum would be lost)",
|
||||||
|
error: `member count = ${members.length}`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Member names are <hostname>-<random-suffix>; match by hostname prefix.
|
||||||
|
const broken = members.find((m) => m.name.startsWith(opts.brokenHostname));
|
||||||
|
if (!broken) {
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
changed: false,
|
||||||
|
message: `No etcd member found matching hostname ${opts.brokenHostname}`,
|
||||||
|
error: `members: ${members.map((m) => m.name).join(", ")}`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
log(`Broken member: ${broken.id} (${broken.name})`);
|
||||||
|
|
||||||
|
log("Step 1/4: stopping k3s on broken node");
|
||||||
|
await opts.broken.exec("systemctl stop k3s 2>&1", { timeoutMs: SSH_TIMEOUT });
|
||||||
|
|
||||||
|
log("Step 2/4: removing broken etcd member from cluster");
|
||||||
|
const remove = await opts.peer.exec(
|
||||||
|
etcdctl(`member remove ${broken.id}`),
|
||||||
|
{ timeoutMs: SSH_TIMEOUT },
|
||||||
|
);
|
||||||
|
if (remove.exitCode !== 0) {
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
changed: false,
|
||||||
|
message: "etcdctl member remove failed",
|
||||||
|
error: remove.stderr || remove.stdout,
|
||||||
|
removedMemberId: broken.id,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
log("Step 3/4: archiving corrupt etcd state and stale TLS/cred dirs on broken node");
|
||||||
|
const ts = Math.floor(Date.now() / 1000);
|
||||||
|
await opts.broken.exec(
|
||||||
|
[
|
||||||
|
`mv /var/lib/rancher/k3s/server/db /var/lib/rancher/k3s/server/db.corrupt-${ts} 2>/dev/null || true`,
|
||||||
|
"rm -rf /var/lib/rancher/k3s/server/tls /var/lib/rancher/k3s/server/cred",
|
||||||
|
].join(" && "),
|
||||||
|
{ timeoutMs: SSH_TIMEOUT },
|
||||||
|
);
|
||||||
|
|
||||||
|
log("Step 4/4: starting k3s on broken node — it will rejoin");
|
||||||
|
await opts.broken.exec("systemctl start k3s 2>&1", { timeoutMs: SSH_TIMEOUT });
|
||||||
|
|
||||||
|
// Poll for rejoin. The new member-id is what the cluster assigns on join.
|
||||||
|
let newMemberId: string | undefined;
|
||||||
|
for (let i = 0; i < 60; i++) {
|
||||||
|
await new Promise((r) => setTimeout(r, 5_000));
|
||||||
|
try {
|
||||||
|
const after = await getMemberList(opts.peer);
|
||||||
|
const rejoined = after.find(
|
||||||
|
(m) => m.name.startsWith(opts.brokenHostname) && m.id !== broken.id,
|
||||||
|
);
|
||||||
|
if (rejoined) {
|
||||||
|
newMemberId = rejoined.id;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// peer may briefly be unreachable mid-rejoin — keep polling
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!newMemberId) {
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
changed: true,
|
||||||
|
message: "k3s started but new member did not appear in cluster within 5 minutes",
|
||||||
|
removedMemberId: broken.id,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
log(`Rejoined as ${newMemberId}`);
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
changed: true,
|
||||||
|
message: `Recovered: removed ${broken.id}, rejoined as ${newMemberId}`,
|
||||||
|
removedMemberId: broken.id,
|
||||||
|
newMemberId,
|
||||||
|
};
|
||||||
|
} catch (err) {
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
changed: false,
|
||||||
|
message: "Recovery failed",
|
||||||
|
error: err instanceof Error ? err.message : String(err),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -11,7 +11,13 @@ export { installK3sBinary } from "./k3s-install.js";
|
|||||||
export { installCilium } from "./cilium.js";
|
export { installCilium } from "./cilium.js";
|
||||||
export { fixCoreDnsUpstream } from "./dns-fix.js";
|
export { fixCoreDnsUpstream } from "./dns-fix.js";
|
||||||
export { configureLogRotation } from "./log-rotation.js";
|
export { configureLogRotation } from "./log-rotation.js";
|
||||||
|
export { configureJournaldLimits } from "./journald-limits.js";
|
||||||
export { applyDefaultNetworkPolicies } from "./network-policy.js";
|
export { applyDefaultNetworkPolicies } from "./network-policy.js";
|
||||||
export { applyPodSecurityStandards } from "./pod-security.js";
|
export { applyPodSecurityStandards } from "./pod-security.js";
|
||||||
export { checkCertExpiry } from "./cert-check.js";
|
export { checkCertExpiry } from "./cert-check.js";
|
||||||
export { configureLonghornDisk } from "./longhorn-disk.js";
|
export { configureLonghornDisk } from "./longhorn-disk.js";
|
||||||
|
export { recoverEtcdMember } from "./etcd-recover.js";
|
||||||
|
export type {
|
||||||
|
RecoverEtcdMemberOptions,
|
||||||
|
RecoverEtcdMemberResult,
|
||||||
|
} from "./etcd-recover.js";
|
||||||
|
|||||||
@@ -13,10 +13,11 @@ export const enableIscsi: Operation = async (ctx): Promise<OperationResult> => {
|
|||||||
|
|
||||||
// Install the package (detect distro)
|
// Install the package (detect distro)
|
||||||
const osRelease = await ctx.ssh.exec("cat /etc/os-release", sshOpts(ctx));
|
const osRelease = await ctx.ssh.exec("cat /etc/os-release", sshOpts(ctx));
|
||||||
const isFedora = osRelease.stdout.includes("fedora") || osRelease.stdout.includes("rhel") || osRelease.stdout.includes("centos");
|
const osLower = osRelease.stdout.toLowerCase();
|
||||||
|
const isFedora = osLower.includes("fedora") || osLower.includes("rhel") || osLower.includes("centos");
|
||||||
|
|
||||||
const pkg = isFedora ? "iscsi-initiator-utils" : "open-iscsi";
|
const pkg = isFedora ? "iscsi-initiator-utils" : "open-iscsi";
|
||||||
const installCmd = isFedora ? `dnf install -y ${pkg}` : `apt-get install -y ${pkg}`;
|
const installCmd = isFedora ? `sudo dnf install -y ${pkg}` : `sudo apt-get install -y ${pkg}`;
|
||||||
|
|
||||||
const install = await ctx.ssh.exec(installCmd, { timeoutMs: 120_000 });
|
const install = await ctx.ssh.exec(installCmd, { timeoutMs: 120_000 });
|
||||||
if (install.exitCode !== 0) {
|
if (install.exitCode !== 0) {
|
||||||
@@ -24,7 +25,7 @@ export const enableIscsi: Operation = async (ctx): Promise<OperationResult> => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Enable and start
|
// Enable and start
|
||||||
await ctx.ssh.exec("systemctl enable --now iscsid", sshOpts(ctx));
|
await ctx.ssh.exec("sudo systemctl enable --now iscsid", sshOpts(ctx));
|
||||||
|
|
||||||
return { success: true, changed: true, message: `Installed ${pkg} and enabled iscsid` };
|
return { success: true, changed: true, message: `Installed ${pkg} and enabled iscsid` };
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -0,0 +1,33 @@
|
|||||||
|
// Cap journald disk usage so audit logs (which now flow through journald via
|
||||||
|
// kube-apiserver's stdout) cannot fill /var/log. Default journald uses up to
|
||||||
|
// 10% of the filesystem, capped at 4 GB. In a /var/log of ~10 GB shared with
|
||||||
|
// other services, that's still room for audit volume to evict useful logs.
|
||||||
|
// 2 GB / 200 MB-per-file is a comfortable middle.
|
||||||
|
|
||||||
|
import type { Operation, OperationResult } from "../types.js";
|
||||||
|
import { sshOpts, writeRemoteFile } from "../utils.js";
|
||||||
|
|
||||||
|
const DROPIN_CONTENT = `[Journal]
|
||||||
|
SystemMaxUse=2G
|
||||||
|
SystemKeepFree=1G
|
||||||
|
SystemMaxFileSize=200M
|
||||||
|
`;
|
||||||
|
|
||||||
|
const DROPIN_PATH = "/etc/systemd/journald.conf.d/10-k3s-audit-cap.conf";
|
||||||
|
|
||||||
|
export const configureJournaldLimits: Operation = async (ctx): Promise<OperationResult> => {
|
||||||
|
const changed = await writeRemoteFile(ctx, DROPIN_PATH, DROPIN_CONTENT);
|
||||||
|
if (changed) {
|
||||||
|
// Reload journald so the new limit applies without a reboot.
|
||||||
|
await ctx.ssh.exec(
|
||||||
|
"systemctl kill --signal=SIGUSR2 systemd-journald 2>/dev/null; " +
|
||||||
|
"systemctl restart systemd-journald 2>&1 || true",
|
||||||
|
sshOpts(ctx),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
changed,
|
||||||
|
message: changed ? "journald limits configured (2 GB cap)" : "journald limits already configured",
|
||||||
|
};
|
||||||
|
};
|
||||||
@@ -9,7 +9,18 @@ function isServerRole(role: string): boolean {
|
|||||||
|
|
||||||
function generateServerConfig(config: K3sConfig): string {
|
function generateServerConfig(config: K3sConfig): string {
|
||||||
const tlsSans = [config.hostname, config.ip, ...(config.tlsSans ?? [])];
|
const tlsSans = [config.hostname, config.ip, ...(config.tlsSans ?? [])];
|
||||||
return `# k3s server configuration — CIS hardened
|
const isJoining = !!config.k3sServerUrl;
|
||||||
|
const clusterLines = isJoining
|
||||||
|
? `server: "${config.k3sServerUrl}"\ntoken: "${config.k3sToken}"`
|
||||||
|
: "cluster-init: true";
|
||||||
|
// audit-log-path=- routes audit events to k3s.service's stdout, which systemd
|
||||||
|
// forwards to journald. journald enforces its own size caps (see
|
||||||
|
// configureJournaldLimits) so audit volume cannot fill the disk. File-based
|
||||||
|
// audit logs led to /var/log/kubernetes growing to 7+ GB because apiserver's
|
||||||
|
// own rotation produced files that any logrotate glob would double-rotate
|
||||||
|
// and never expire.
|
||||||
|
return `# k3s server configuration — CIS hardened, etcd HA
|
||||||
|
${clusterLines}
|
||||||
protect-kernel-defaults: true
|
protect-kernel-defaults: true
|
||||||
secrets-encryption: true
|
secrets-encryption: true
|
||||||
write-kubeconfig-mode: "0640"
|
write-kubeconfig-mode: "0640"
|
||||||
@@ -25,10 +36,7 @@ node-label:
|
|||||||
|
|
||||||
kube-apiserver-arg:
|
kube-apiserver-arg:
|
||||||
- "anonymous-auth=false"
|
- "anonymous-auth=false"
|
||||||
- "audit-log-path=/var/log/kubernetes/audit.log"
|
- "audit-log-path=-"
|
||||||
- "audit-log-maxage=30"
|
|
||||||
- "audit-log-maxbackup=10"
|
|
||||||
- "audit-log-maxsize=100"
|
|
||||||
- "audit-policy-file=/etc/rancher/k3s/audit-policy.yaml"
|
- "audit-policy-file=/etc/rancher/k3s/audit-policy.yaml"
|
||||||
- "enable-admission-plugins=NodeRestriction,PodSecurity"
|
- "enable-admission-plugins=NodeRestriction,PodSecurity"
|
||||||
- "request-timeout=300s"
|
- "request-timeout=300s"
|
||||||
@@ -56,7 +64,7 @@ kubelet-arg:
|
|||||||
}
|
}
|
||||||
|
|
||||||
export const writeK3sConfig: Operation = async (ctx): Promise<OperationResult> => {
|
export const writeK3sConfig: Operation = async (ctx): Promise<OperationResult> => {
|
||||||
await ctx.ssh.exec("mkdir -p /etc/rancher/k3s /var/log/kubernetes", sshOpts(ctx));
|
await ctx.ssh.exec("mkdir -p /etc/rancher/k3s", sshOpts(ctx));
|
||||||
|
|
||||||
const content = isServerRole(ctx.config.role)
|
const content = isServerRole(ctx.config.role)
|
||||||
? generateServerConfig(ctx.config)
|
? generateServerConfig(ctx.config)
|
||||||
|
|||||||
@@ -15,8 +15,21 @@ export const installK3sBinary: Operation = async (ctx): Promise<OperationResult>
|
|||||||
const alreadyInstalled = version.exitCode === 0;
|
const alreadyInstalled = version.exitCode === 0;
|
||||||
|
|
||||||
if (isServer) {
|
if (isServer) {
|
||||||
|
// Clean stale server state when joining an existing cluster
|
||||||
|
// (TLS certs from a previous run cause "newer than datastore" fatal error)
|
||||||
|
if (ctx.config.k3sServerUrl && ctx.config.k3sToken) {
|
||||||
|
await ctx.ssh.exec(
|
||||||
|
"rm -rf /var/lib/rancher/k3s/server/tls /var/lib/rancher/k3s/server/cred /var/lib/rancher/k3s/server/db",
|
||||||
|
sshOpts(ctx),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If joining an existing cluster, pass K3S_URL and K3S_TOKEN
|
||||||
|
const joinEnv = ctx.config.k3sServerUrl && ctx.config.k3sToken
|
||||||
|
? `K3S_URL="${ctx.config.k3sServerUrl}" K3S_TOKEN="${ctx.config.k3sToken}"`
|
||||||
|
: "";
|
||||||
const result = await ctx.ssh.exec(
|
const result = await ctx.ssh.exec(
|
||||||
'curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC="server" INSTALL_K3S_SKIP_SELINUX_RPM=true sh -',
|
`curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC="server" INSTALL_K3S_SKIP_SELINUX_RPM=true ${joinEnv} sh -`,
|
||||||
{ timeoutMs: 300_000 },
|
{ timeoutMs: 300_000 },
|
||||||
);
|
);
|
||||||
if (result.exitCode !== 0) {
|
if (result.exitCode !== 0) {
|
||||||
|
|||||||
@@ -1,25 +1,44 @@
|
|||||||
// Configure log rotation for k3s.
|
// Decommission file-based k8s audit logging in favor of journald.
|
||||||
|
//
|
||||||
|
// Earlier versions wrote audit events to /var/log/kubernetes/audit.log and
|
||||||
|
// rotated them with a logrotate rule. Two failure modes followed: kube-apiserver
|
||||||
|
// rotated internally (audit-{ts}.log), the *.log glob in logrotate
|
||||||
|
// double-rotated those (-{date}), and the resulting filename matched no
|
||||||
|
// retention policy, so the directory grew unbounded (we observed 7+ GB).
|
||||||
|
//
|
||||||
|
// k3s now sets audit-log-path=- so audit goes to stdout → journald, which
|
||||||
|
// enforces SystemMaxUse caps. This operation removes the obsolete logrotate
|
||||||
|
// rule and reaps any audit files left behind by the old setup. Idempotent: on
|
||||||
|
// fresh installs everything is already absent and the operation is a no-op.
|
||||||
|
|
||||||
import type { Operation, OperationResult } from "../types.js";
|
import type { Operation, OperationResult } from "../types.js";
|
||||||
import { writeRemoteFile } from "../utils.js";
|
import { sshOpts } from "../utils.js";
|
||||||
|
|
||||||
const LOGROTATE_CONFIG = `/var/log/kubernetes/*.log {
|
const REMOVE_LOGROTATE = "rm -f /etc/logrotate.d/k3s";
|
||||||
daily
|
|
||||||
rotate 14
|
// Bounded by a max-depth and explicit name pattern so we never reach outside
|
||||||
compress
|
// the deprecated audit-log directory.
|
||||||
delaycompress
|
const REAP_OLD_AUDIT_FILES =
|
||||||
missingok
|
"find /var/log/kubernetes -maxdepth 1 -type f " +
|
||||||
notifempty
|
"\\( -name 'audit*.log*' -o -name 'audit-*.log' \\) " +
|
||||||
copytruncate
|
"-delete 2>/dev/null; " +
|
||||||
maxsize 100M
|
"rmdir /var/log/kubernetes 2>/dev/null; true";
|
||||||
}`;
|
|
||||||
|
|
||||||
export const configureLogRotation: Operation = async (ctx): Promise<OperationResult> => {
|
export const configureLogRotation: Operation = async (ctx): Promise<OperationResult> => {
|
||||||
const changed = await writeRemoteFile(ctx, "/etc/logrotate.d/k3s", LOGROTATE_CONFIG);
|
const before = await ctx.ssh.exec(
|
||||||
|
"test -e /etc/logrotate.d/k3s -o -d /var/log/kubernetes && echo present || echo absent",
|
||||||
|
sshOpts(ctx),
|
||||||
|
);
|
||||||
|
const wasPresent = before.stdout.trim() === "present";
|
||||||
|
|
||||||
|
await ctx.ssh.exec(REMOVE_LOGROTATE, sshOpts(ctx));
|
||||||
|
await ctx.ssh.exec(REAP_OLD_AUDIT_FILES, sshOpts(ctx));
|
||||||
|
|
||||||
return {
|
return {
|
||||||
success: true,
|
success: true,
|
||||||
changed,
|
changed: wasPresent,
|
||||||
message: changed ? "Log rotation configured" : "Log rotation already configured",
|
message: wasPresent
|
||||||
|
? "Removed legacy file-based audit logging (now via journald)"
|
||||||
|
: "No legacy audit log artifacts present",
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
import type { Operation, OperationResult } from "../types.js";
|
import type { Operation, OperationResult } from "../types.js";
|
||||||
import { sshOpts } from "../utils.js";
|
import { sshOpts } from "../utils.js";
|
||||||
|
import { sshExec as remoteSshExec } from "../../../../src/ssh.js";
|
||||||
|
|
||||||
export const configureLonghornDisk: Operation = async (ctx): Promise<OperationResult> => {
|
export const configureLonghornDisk: Operation = async (ctx): Promise<OperationResult> => {
|
||||||
// Check if /var/lib/longhorn exists on this node
|
// Check if /var/lib/longhorn exists on this node
|
||||||
@@ -15,12 +16,11 @@ export const configureLonghornDisk: Operation = async (ctx): Promise<OperationRe
|
|||||||
const nodeNameResult = await ctx.ssh.exec("hostname -f 2>/dev/null || hostname", sshOpts(ctx));
|
const nodeNameResult = await ctx.ssh.exec("hostname -f 2>/dev/null || hostname", sshOpts(ctx));
|
||||||
const nodeName = nodeNameResult.stdout.trim();
|
const nodeName = nodeNameResult.stdout.trim();
|
||||||
|
|
||||||
// Apply the annotation via kubectl (works on server nodes, or via KUBECONFIG on agents)
|
|
||||||
const kubectlPrefix = "k3s kubectl";
|
|
||||||
const annotation = JSON.stringify([{ path: "/var/lib/longhorn", allowScheduling: true }]);
|
const annotation = JSON.stringify([{ path: "/var/lib/longhorn", allowScheduling: true }]);
|
||||||
|
|
||||||
|
// Try kubectl locally first (works on server nodes)
|
||||||
const result = await ctx.ssh.exec(
|
const result = await ctx.ssh.exec(
|
||||||
`${kubectlPrefix} annotate node "${nodeName}" "node.longhorn.io/default-disks-config=${annotation}" --overwrite 2>&1 || true`,
|
`k3s kubectl annotate node "${nodeName}" "node.longhorn.io/default-disks-config=${annotation}" --overwrite 2>&1 || true`,
|
||||||
sshOpts(ctx),
|
sshOpts(ctx),
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -28,7 +28,23 @@ export const configureLonghornDisk: Operation = async (ctx): Promise<OperationRe
|
|||||||
return { success: true, changed: true, message: `Longhorn disk annotation applied to ${nodeName}` };
|
return { success: true, changed: true, message: `Longhorn disk annotation applied to ${nodeName}` };
|
||||||
}
|
}
|
||||||
|
|
||||||
// If kubectl isn't available (agent node without server access), that's OK —
|
// For worker/agent nodes without local kubectl: apply via the server
|
||||||
// the label is set, annotation can be applied from the server later
|
if (ctx.config.k3sServerUrl) {
|
||||||
|
// The CLI has SSH access to the server — use sshExec from there
|
||||||
|
const serverHost = new URL(ctx.config.k3sServerUrl).hostname;
|
||||||
|
try {
|
||||||
|
const remoteResult = await remoteSshExec(
|
||||||
|
serverHost, "root",
|
||||||
|
`k3s kubectl annotate node "${nodeName}" "node.longhorn.io/default-disks-config=${annotation}" --overwrite`,
|
||||||
|
{ ...(ctx.ssh.keyPath ? { keyPath: ctx.ssh.keyPath } : {}), timeoutMs: 15_000 },
|
||||||
|
);
|
||||||
|
if (remoteResult.stdout.includes("annotated") || remoteResult.stdout.includes("unchanged")) {
|
||||||
|
return { success: true, changed: true, message: `Longhorn disk annotation applied to ${nodeName} (via server)` };
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Fall through to manual instruction
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return { success: true, changed: false, message: "Longhorn disk label set (annotation requires server kubectl)" };
|
return { success: true, changed: false, message: "Longhorn disk label set (annotation requires server kubectl)" };
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -71,9 +71,14 @@ describe("k3s install script — server role", () => {
|
|||||||
expect(script).toContain("enable-admission-plugins=NodeRestriction,PodSecurity");
|
expect(script).toContain("enable-admission-plugins=NodeRestriction,PodSecurity");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("configures audit logging", () => {
|
it("configures audit logging via journald (stdout)", () => {
|
||||||
expect(script).toContain("audit-log-path=/var/log/kubernetes/audit.log");
|
expect(script).toContain("audit-log-path=-");
|
||||||
expect(script).toContain("audit-log-maxage=30");
|
// file-based fields and the now-obsolete log directory must be gone
|
||||||
|
expect(script).not.toContain("/var/log/kubernetes/audit.log");
|
||||||
|
expect(script).not.toContain("audit-log-maxage");
|
||||||
|
expect(script).not.toContain("audit-log-maxbackup");
|
||||||
|
expect(script).not.toContain("audit-log-maxsize");
|
||||||
|
expect(script).not.toContain("mkdir -p /var/log/kubernetes");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("cleans stale flannel vxlan before Cilium install", () => {
|
it("cleans stale flannel vxlan before Cilium install", () => {
|
||||||
|
|||||||
@@ -348,3 +348,143 @@ describe("applyPodSecurityStandards", () => {
|
|||||||
expectCommand(ctx.ssh, "pod-security.kubernetes.io/audit=restricted");
|
expectCommand(ctx.ssh, "pod-security.kubernetes.io/audit=restricted");
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// --- Audit Logging Decommission (file-based → journald) ---
|
||||||
|
|
||||||
|
import { configureLogRotation } from "../src/operations/log-rotation.js";
|
||||||
|
import { configureJournaldLimits } from "../src/operations/journald-limits.js";
|
||||||
|
|
||||||
|
describe("configureLogRotation (decommission file-based audit logs)", () => {
|
||||||
|
it("removes the legacy logrotate rule and reaps obsolete audit files", async () => {
|
||||||
|
const ctx = mockCtx();
|
||||||
|
ctx.ssh.exec.mockResolvedValueOnce(stdout("present")); // probe: legacy artifacts exist
|
||||||
|
ctx.ssh.exec.mockResolvedValue(OK);
|
||||||
|
|
||||||
|
const result = await configureLogRotation(ctx);
|
||||||
|
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
expect(result.changed).toBe(true);
|
||||||
|
expectCommand(ctx.ssh, "rm -f /etc/logrotate.d/k3s");
|
||||||
|
expectCommand(ctx.ssh, /find \/var\/log\/kubernetes.*audit.*-delete/);
|
||||||
|
expectCommand(ctx.ssh, "rmdir /var/log/kubernetes");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("is a no-op when nothing legacy is present", async () => {
|
||||||
|
const ctx = mockCtx();
|
||||||
|
ctx.ssh.exec.mockResolvedValueOnce(stdout("absent"));
|
||||||
|
ctx.ssh.exec.mockResolvedValue(OK);
|
||||||
|
|
||||||
|
const result = await configureLogRotation(ctx);
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
expect(result.changed).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("configureJournaldLimits", () => {
|
||||||
|
it("writes a 2 GB SystemMaxUse drop-in and reloads journald when changed", async () => {
|
||||||
|
const ctx = mockCtx();
|
||||||
|
ctx.ssh.exec.mockResolvedValueOnce(stdout("__LABCTL_NOT_FOUND__")); // no existing drop-in
|
||||||
|
ctx.ssh.exec.mockResolvedValue(OK);
|
||||||
|
|
||||||
|
const result = await configureJournaldLimits(ctx);
|
||||||
|
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
expect(result.changed).toBe(true);
|
||||||
|
const writeCall = ctx.ssh.exec.mock.calls.find((c) => {
|
||||||
|
const cmd = c[0] as string;
|
||||||
|
return cmd.includes("10-k3s-audit-cap.conf") && cmd.includes("LABCTL_EOF");
|
||||||
|
});
|
||||||
|
expect(writeCall).toBeTruthy();
|
||||||
|
const written = writeCall?.[0] as string;
|
||||||
|
expect(written).toContain("SystemMaxUse=2G");
|
||||||
|
expect(written).toContain("SystemKeepFree=1G");
|
||||||
|
expectCommand(ctx.ssh, "systemctl restart systemd-journald");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("does not restart journald when the drop-in is already correct", async () => {
|
||||||
|
const ctx = mockCtx();
|
||||||
|
const existing =
|
||||||
|
"[Journal]\nSystemMaxUse=2G\nSystemKeepFree=1G\nSystemMaxFileSize=200M\n";
|
||||||
|
ctx.ssh.exec.mockResolvedValueOnce(stdout(existing));
|
||||||
|
ctx.ssh.exec.mockResolvedValue(OK);
|
||||||
|
|
||||||
|
const result = await configureJournaldLimits(ctx);
|
||||||
|
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
expect(result.changed).toBe(false);
|
||||||
|
expectNoCommand(ctx.ssh, "systemctl restart systemd-journald");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// --- Etcd Recovery ---
|
||||||
|
|
||||||
|
import { recoverEtcdMember } from "../src/operations/etcd-recover.js";
|
||||||
|
import { mockSsh } from "./helpers.js";
|
||||||
|
|
||||||
|
describe("recoverEtcdMember", () => {
|
||||||
|
it("refuses to operate when cluster is below 3 members (quorum risk)", async () => {
|
||||||
|
const broken = mockSsh();
|
||||||
|
const peer = mockSsh();
|
||||||
|
peer.exec.mockResolvedValueOnce(stdout("/usr/bin/etcdctl")); // etcdctl present
|
||||||
|
peer.exec.mockResolvedValueOnce(stdout(
|
||||||
|
"111, started, host-a-aaa, https://10.0.0.1:2380, https://10.0.0.1:2379, false\n" +
|
||||||
|
"222, started, host-b-bbb, https://10.0.0.2:2380, https://10.0.0.2:2379, false",
|
||||||
|
));
|
||||||
|
|
||||||
|
const result = await recoverEtcdMember({ broken, peer, brokenHostname: "host-b" });
|
||||||
|
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
expect(result.message).toMatch(/quorum/i);
|
||||||
|
// Critically: must NOT have stopped k3s or removed anything
|
||||||
|
expect(broken.exec).not.toHaveBeenCalledWith(expect.stringContaining("systemctl stop k3s"), expect.anything());
|
||||||
|
});
|
||||||
|
|
||||||
|
it("performs full procedure when quorum is preserved", async () => {
|
||||||
|
const broken = mockSsh();
|
||||||
|
const peer = mockSsh();
|
||||||
|
// ensureEtcdctl: present
|
||||||
|
peer.exec.mockResolvedValueOnce(stdout("/usr/bin/etcdctl"));
|
||||||
|
// member list (3 members, target = host-b)
|
||||||
|
peer.exec.mockResolvedValueOnce(stdout(
|
||||||
|
"111, started, host-a-aaa, https://10.0.0.1:2380, https://10.0.0.1:2379, false\n" +
|
||||||
|
"222, started, host-b-bbb, https://10.0.0.2:2380, https://10.0.0.2:2379, false\n" +
|
||||||
|
"333, started, host-c-ccc, https://10.0.0.3:2380, https://10.0.0.3:2379, false",
|
||||||
|
));
|
||||||
|
// member remove
|
||||||
|
peer.exec.mockResolvedValueOnce(stdout("Member 222 removed"));
|
||||||
|
// post-rejoin member list — new id 444 for host-b
|
||||||
|
peer.exec.mockResolvedValueOnce(stdout(
|
||||||
|
"111, started, host-a-aaa, https://10.0.0.1:2380, https://10.0.0.1:2379, false\n" +
|
||||||
|
"333, started, host-c-ccc, https://10.0.0.3:2380, https://10.0.0.3:2379, false\n" +
|
||||||
|
"444, started, host-b-zzz, https://10.0.0.2:2380, https://10.0.0.2:2379, false",
|
||||||
|
));
|
||||||
|
|
||||||
|
const result = await recoverEtcdMember({ broken, peer, brokenHostname: "host-b" });
|
||||||
|
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
expect(result.removedMemberId).toBe("222");
|
||||||
|
expect(result.newMemberId).toBe("444");
|
||||||
|
expectCommand(broken,"systemctl stop k3s");
|
||||||
|
expectCommand(peer,"member remove 222");
|
||||||
|
expectCommand(broken,/db\.corrupt-/);
|
||||||
|
expectCommand(broken,/rm -rf .*\/server\/tls/);
|
||||||
|
expectCommand(broken,"systemctl start k3s");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("fails clearly when no member matches the broken hostname", async () => {
|
||||||
|
const broken = mockSsh();
|
||||||
|
const peer = mockSsh();
|
||||||
|
peer.exec.mockResolvedValueOnce(stdout("/usr/bin/etcdctl"));
|
||||||
|
peer.exec.mockResolvedValueOnce(stdout(
|
||||||
|
"111, started, host-a-aaa, https://10.0.0.1:2380, https://10.0.0.1:2379, false\n" +
|
||||||
|
"222, started, host-b-bbb, https://10.0.0.2:2380, https://10.0.0.2:2379, false\n" +
|
||||||
|
"333, started, host-c-ccc, https://10.0.0.3:2380, https://10.0.0.3:2379, false",
|
||||||
|
));
|
||||||
|
|
||||||
|
const result = await recoverEtcdMember({ broken, peer, brokenHostname: "host-d" });
|
||||||
|
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
expect(result.message).toMatch(/No etcd member found/);
|
||||||
|
expect(broken.exec).not.toHaveBeenCalledWith(expect.stringContaining("systemctl stop k3s"), expect.anything());
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|||||||
@@ -113,6 +113,7 @@ export type LabdBastionMessage =
|
|||||||
| { type: "command-role-update"; requestId: string; mac: string; role: string }
|
| { type: "command-role-update"; requestId: string; mac: string; role: string }
|
||||||
| { type: "command-debug"; requestId: string; mac: string; pxeBoot?: boolean }
|
| { type: "command-debug"; requestId: string; mac: string; pxeBoot?: boolean }
|
||||||
| { type: "command-register"; requestId: string; mac: string; hostname: string; role: string; ip: string }
|
| { type: "command-register"; requestId: string; mac: string; hostname: string; role: string; ip: string }
|
||||||
|
| { type: "command-discover"; requestId: string; mac: string; product?: string; board?: string; serial?: string; manufacturer?: string; cpu_model?: string; cpu_cores?: number; memory_gb?: number; arch?: string; disks?: Array<{ name: string; size_gb: number; model: string }>; nics?: Array<{ name: string; mac: string; state: string }> }
|
||||||
| { type: "server-shutdown"; reconnectAfter: number };
|
| { type: "server-shutdown"; reconnectAfter: number };
|
||||||
|
|
||||||
export type BastionMessageType = BastionMessage["type"];
|
export type BastionMessageType = BastionMessage["type"];
|
||||||
@@ -127,7 +128,7 @@ const BASTION_MESSAGE_TYPES = new Set<string>([
|
|||||||
|
|
||||||
const LABD_BASTION_MESSAGE_TYPES = new Set<string>([
|
const LABD_BASTION_MESSAGE_TYPES = new Set<string>([
|
||||||
"bastion-enrolled", "bastion-heartbeat-ack", "command-install",
|
"bastion-enrolled", "bastion-heartbeat-ack", "command-install",
|
||||||
"command-forget", "command-role-update", "command-debug", "command-register", "server-shutdown",
|
"command-forget", "command-role-update", "command-debug", "command-register", "command-discover", "server-shutdown",
|
||||||
]);
|
]);
|
||||||
|
|
||||||
export function isBastionMessage(msg: unknown): msg is BastionMessage {
|
export function isBastionMessage(msg: unknown): msg is BastionMessage {
|
||||||
|
|||||||
@@ -96,6 +96,13 @@ export interface InstalledInfo {
|
|||||||
ip: string;
|
ip: string;
|
||||||
installed_at: string;
|
installed_at: string;
|
||||||
bastionId?: string; // set when aggregated through labd
|
bastionId?: string; // set when aggregated through labd
|
||||||
|
// Hardware info (copied from discovered on install completion)
|
||||||
|
product?: string;
|
||||||
|
manufacturer?: string;
|
||||||
|
cpu_model?: string;
|
||||||
|
cpu_cores?: number;
|
||||||
|
memory_gb?: number;
|
||||||
|
arch?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface DebugConfig {
|
export interface DebugConfig {
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
{
|
{
|
||||||
"files": [],
|
"files": [],
|
||||||
"references": [
|
"references": [
|
||||||
|
{ "path": "src/core" },
|
||||||
{ "path": "src/shared" },
|
{ "path": "src/shared" },
|
||||||
{ "path": "src/bastion" },
|
{ "path": "src/bastion" },
|
||||||
{ "path": "src/cli" },
|
{ "path": "src/cli" },
|
||||||
|
|||||||
Reference in New Issue
Block a user