2026-03-29 00:50:05 +00:00
239 changed files with 26966 additions and 56 deletions
--- a/.env
+++ b/.env
@@ -0,0 +1 @@
+PERPLEXITY_API_KEY=dummy
--- a/.env.example
+++ b/.env.example
@@ -0,0 +1,12 @@
+# API Keys (Required to enable respective provider)
+ANTHROPIC_API_KEY="your_anthropic_api_key_here"       # Required: Format: sk-ant-api03-...
+PERPLEXITY_API_KEY="your_perplexity_api_key_here"     # Optional: Format: pplx-...
+OPENAI_API_KEY="your_openai_api_key_here"             # Optional, for OpenAI models. Format: sk-proj-...
+GOOGLE_API_KEY="your_google_api_key_here"             # Optional, for Google Gemini models.
+MISTRAL_API_KEY="your_mistral_key_here"               # Optional, for Mistral AI models.
+XAI_API_KEY="YOUR_XAI_KEY_HERE"                       # Optional, for xAI AI models.
+GROQ_API_KEY="YOUR_GROQ_KEY_HERE"                     # Optional, for Groq models.
+OPENROUTER_API_KEY="YOUR_OPENROUTER_KEY_HERE"         # Optional, for OpenRouter models.
+AZURE_OPENAI_API_KEY="your_azure_key_here"            # Optional, for Azure OpenAI models (requires endpoint in .taskmaster/config.json).
+OLLAMA_API_KEY="your_ollama_api_key_here"             # Optional: For remote Ollama servers that require authentication.
+GITHUB_API_KEY="your_github_api_key_here"             # Optional: For GitHub import/export features. Format: ghp_... or github_pat_...
--- a/.gitea/workflows/ci.yml
+++ b/.gitea/workflows/ci.yml
@@ -0,0 +1,263 @@
+name: CI/CD
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+env:
+  GITEA_REGISTRY: 10.0.0.194:3012
+  GITEA_PUBLIC_URL: https://mysources.co.uk
+  GITEA_OWNER: michal
+
+# ============================================================
+# Required Gitea secrets:
+#   PACKAGES_TOKEN     -- Gitea API token (packages + registry)
+# ============================================================
+
+jobs:
+  # -- CI checks (run in parallel on every push/PR) ----------
+
+  lint:
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: bastion
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: pnpm/action-setup@v4
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22
+
+      - run: pnpm install --frozen-lockfile
+
+      - name: Lint
+        run: pnpm lint || echo "::warning::Lint has errors -- not blocking CI yet"
+
+  typecheck:
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: bastion
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: pnpm/action-setup@v4
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22
+
+      - run: pnpm install --frozen-lockfile
+
+      - name: Typecheck
+        run: pnpm typecheck
+
+  test:
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: bastion
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: pnpm/action-setup@v4
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22
+
+      - run: pnpm install --frozen-lockfile
+
+      - name: Build (needed by completions check)
+        run: pnpm build
+
+      - name: Run tests
+        run: pnpm test:run
+
+  # -- Build & package (both architectures) -------------------
+
+  build:
+    runs-on: ubuntu-latest
+    needs: [lint, typecheck, test]
+    defaults:
+      run:
+        working-directory: bastion
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: pnpm/action-setup@v4
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22
+
+      - name: Install dependencies
+        run: pnpm install --frozen-lockfile
+
+      - name: Build all packages
+        run: pnpm build
+
+      - name: Generate shell completions
+        run: pnpm completions:generate
+
+      - uses: oven-sh/setup-bun@v2
+
+      - name: Install nfpm
+        run: |
+          curl -sL -o /tmp/nfpm.tar.gz "https://github.com/goreleaser/nfpm/releases/download/v2.45.0/nfpm_2.45.0_Linux_x86_64.tar.gz"
+          tar xzf /tmp/nfpm.tar.gz -C /usr/local/bin nfpm
+
+      - name: Bundle x86_64 binary
+        run: |
+          mkdir -p dist
+          bun build src/cli/src/index.ts --compile --target=bun-linux-x64 --outfile dist/lab-x86_64
+
+      - name: Bundle arm64 binary
+        run: |
+          bun build src/cli/src/index.ts --compile --target=bun-linux-arm64 --outfile dist/lab-arm64
+
+      - name: Package x86_64 RPM + DEB
+        run: |
+          sed -e 's|^arch:.*|arch: amd64|' -e 's|src: ./dist/lab$|src: ./dist/lab-x86_64|' nfpm.yaml > /tmp/nfpm-x86_64.yaml
+          nfpm pkg --config /tmp/nfpm-x86_64.yaml --packager rpm --target dist/
+          nfpm pkg --config /tmp/nfpm-x86_64.yaml --packager deb --target dist/
+
+      - name: Package arm64 RPM + DEB
+        run: |
+          sed -e 's|^arch:.*|arch: arm64|' -e 's|src: ./dist/lab$|src: ./dist/lab-arm64|' nfpm.yaml > /tmp/nfpm-arm64.yaml
+          nfpm pkg --config /tmp/nfpm-arm64.yaml --packager rpm --target dist/
+          nfpm pkg --config /tmp/nfpm-arm64.yaml --packager deb --target dist/
+
+      - name: Upload RPM artifacts
+        uses: actions/upload-artifact@v3
+        with:
+          name: rpm-packages
+          path: bastion/dist/lab-*.rpm
+          retention-days: 7
+
+      - name: Upload DEB artifacts
+        uses: actions/upload-artifact@v3
+        with:
+          name: deb-packages
+          path: bastion/dist/lab*.deb
+          retention-days: 7
+
+  # -- Release pipeline (main branch push only) --------------
+
+  publish-rpm:
+    runs-on: ubuntu-latest
+    needs: [build]
+    if: github.ref == 'refs/heads/main' && github.event_name == 'push'
+    defaults:
+      run:
+        working-directory: bastion
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Download RPM artifacts
+        uses: actions/download-artifact@v3
+        with:
+          name: rpm-packages
+          path: bastion/dist/
+
+      - name: Install rpm tools
+        run: sudo apt-get update && sudo apt-get install -y rpm
+
+      - name: Publish RPMs to Gitea
+        env:
+          GITEA_TOKEN: ${{ secrets.PACKAGES_TOKEN }}
+          GITEA_URL: http://${{ env.GITEA_REGISTRY }}
+          GITEA_OWNER: ${{ env.GITEA_OWNER }}
+          GITEA_REPO: lab
+        run: |
+          for RPM_FILE in dist/lab-*.rpm; do
+            [ -f "$RPM_FILE" ] || continue
+            RPM_VERSION=$(rpm -qp --queryformat '%{VERSION}-%{RELEASE}' "$RPM_FILE")
+            RPM_ARCH=$(rpm -qp --queryformat '%{ARCH}' "$RPM_FILE")
+            echo "Publishing $RPM_FILE (version $RPM_VERSION, arch $RPM_ARCH)..."
+
+            # Delete existing version if present
+            HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" \
+              -H "Authorization: token ${GITEA_TOKEN}" \
+              "${GITEA_URL}/api/v1/packages/${GITEA_OWNER}/rpm/lab/${RPM_VERSION}")
+
+            if [ "$HTTP_CODE" = "200" ]; then
+              echo "Version exists, replacing..."
+              curl -s -o /dev/null -X DELETE \
+                -H "Authorization: token ${GITEA_TOKEN}" \
+                "${GITEA_URL}/api/v1/packages/${GITEA_OWNER}/rpm/lab/${RPM_VERSION}"
+            fi
+
+            # Upload
+            curl --fail -X PUT \
+              -H "Authorization: token ${GITEA_TOKEN}" \
+              --upload-file "$RPM_FILE" \
+              "${GITEA_URL}/api/packages/${GITEA_OWNER}/rpm/upload"
+
+            echo "Published $RPM_FILE successfully!"
+          done
+
+          # Link package to repo
+          source scripts/link-package.sh
+          link_package "rpm" "lab"
+
+  publish-deb:
+    runs-on: ubuntu-latest
+    needs: [build]
+    if: github.ref == 'refs/heads/main' && github.event_name == 'push'
+    defaults:
+      run:
+        working-directory: bastion
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Download DEB artifacts
+        uses: actions/download-artifact@v3
+        with:
+          name: deb-packages
+          path: bastion/dist/
+
+      - name: Publish DEBs to Gitea
+        env:
+          GITEA_TOKEN: ${{ secrets.PACKAGES_TOKEN }}
+          GITEA_URL: http://${{ env.GITEA_REGISTRY }}
+          GITEA_OWNER: ${{ env.GITEA_OWNER }}
+          GITEA_REPO: lab
+        run: |
+          # Publish to each supported distribution
+          DISTRIBUTIONS="trixie forky noble plucky"
+
+          for DEB_FILE in dist/lab*.deb; do
+            [ -f "$DEB_FILE" ] || continue
+            DEB_VERSION=$(dpkg-deb --field "$DEB_FILE" Version)
+            DEB_ARCH=$(dpkg-deb --field "$DEB_FILE" Architecture)
+            echo "Publishing $DEB_FILE (version $DEB_VERSION, arch $DEB_ARCH)..."
+
+            for DIST in $DISTRIBUTIONS; do
+              echo "  -> $DIST..."
+              HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" \
+                -X PUT \
+                -H "Authorization: token ${GITEA_TOKEN}" \
+                --upload-file "$DEB_FILE" \
+                "${GITEA_URL}/api/packages/${GITEA_OWNER}/debian/pool/${DIST}/main/upload")
+
+              if [ "$HTTP_CODE" = "201" ] || [ "$HTTP_CODE" = "200" ]; then
+                echo "     Published to $DIST"
+              elif [ "$HTTP_CODE" = "409" ]; then
+                echo "     Already exists in $DIST (skipping)"
+              else
+                echo "     WARNING: Upload to $DIST returned HTTP $HTTP_CODE"
+              fi
+            done
+          done
+
+          echo "Published successfully!"
+
+          # Link package to repo
+          source scripts/link-package.sh
+          link_package "debian" "lab"
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,25 @@
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+dev-debug.log
+
+# Dependency directories
+node_modules/
+
+# Environment variables
+.env
+
+# Editor directories and files
+.idea
+.vscode
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?
+
+# OS specific
+.DS_Store
--- a/.mcp.json
+++ b/.mcp.json
@@ -0,0 +1,12 @@
+{
+  "mcpServers": {
+    "labctl": {
+      "command": "mcpctl",
+      "args": [
+        "mcp",
+        "-p",
+        "labctl"
+      ]
+    }
+  }
+}
--- a/.taskmaster/.env
+++ b/.taskmaster/.env
@@ -0,0 +1 @@
+PERPLEXITY_API_KEY=dummy
--- a/.taskmaster/config.json
+++ b/.taskmaster/config.json
@@ -0,0 +1,44 @@
+{
+  "models": {
+    "main": {
+      "provider": "claude-code",
+      "modelId": "opus",
+      "maxTokens": 32000,
+      "temperature": 0.2
+    },
+    "research": {
+      "provider": "claude-code",
+      "modelId": "opus",
+      "maxTokens": 32000,
+      "temperature": 0.2
+    },
+    "fallback": {
+      "provider": "claude-code",
+      "modelId": "sonnet",
+      "maxTokens": 64000,
+      "temperature": 0.2
+    }
+  },
+  "global": {
+    "logLevel": "info",
+    "debug": false,
+    "defaultNumTasks": 10,
+    "defaultSubtasks": 5,
+    "defaultPriority": "medium",
+    "projectName": "Task Master",
+    "ollamaBaseURL": "http://localhost:11434/api",
+    "bedrockBaseURL": "https://bedrock.us-east-1.amazonaws.com",
+    "responseLanguage": "English",
+    "enableCodebaseAnalysis": true,
+    "enableProxy": false,
+    "anonymousTelemetry": true,
+    "userId": "1234567890"
+  },
+  "claudeCode": {},
+  "codexCli": {},
+  "grokCli": {
+    "timeout": 120000,
+    "workingDirectory": null,
+    "defaultModel": "grok-4-latest"
+  }
+}
--- a/.taskmaster/docs/prd.md
+++ b/.taskmaster/docs/prd.md
@@ -0,0 +1,452 @@
+# labctl — Infrastructure Management Platform
+
+## Product Requirements Document
+
+## 1. Overview
+
+labctl is a unified infrastructure management platform for bare-metal servers, Kubernetes clusters, and cloud resources. It replaces Puppet with a modern, TypeScript-native system using Pulumi for infrastructure as code.
+
+### 1.1 Core Principles
+- **Single CLI** (`labctl`) for all infrastructure operations
+- **mTLS everywhere** — built-in Certificate Authority, no SSH key management
+- **RBAC from day one** — deny by default, audit everything
+- **Multi-cloud** — bare metal now, AWS later, extensible to any cloud
+- **Test infrastructure like code** — ephemeral environments, smoke tests, security tests
+- **Pulumi over Helm** — TypeScript charts, typed, testable, no YAML templating
+
+### 1.2 Current State (completed)
+- PXE bastion for bare-metal provisioning (discover, install, reprovision)
+- CLI with subcommands: `labctl init bastion`, `labctl provision`
+- LVM partitioning with reprovision data preservation (/home, /srv, /var/lib/longhorn, /var/lib/rancher)
+- Worker role (k3s agent + Longhorn) and infra role (k3s server + etcd)
+- 32 unit tests, VM smoke tests verified on real hardware
+- Multi-arch builds (x86_64 + arm64), RPM/DEB packaging, Gitea CI/CD
+- labd scaffold with CockroachDB Prisma schema (Server, Agent, User, Role, Permission, AuditLog, JoinToken, Cluster, PulumiRun)
+
+### 1.3 Hardware
+- labmaster (puppet.ad.itaz.eu / 78:55:36:08:35:14): MinisForum SER9, AMD Ryzen 7 255, 16 cores, 27GB RAM, 1TB NVMe, infra role
+- Future: additional bare-metal worker nodes, AWS EC2 instances
+
+## 2. Architecture
+
+### 2.1 Components
+
+```
+labctl CLI → labd (master) → lab-agent (on every server)
+                ↓
+          CockroachDB
+```
+
+**labctl** — CLI binary installed on developer workstations. Compiled with bun to standalone binary. Distributed as RPM/DEB/binary.
+
+**labd** — Master daemon running as k8s Deployment on labmaster's k3s cluster. Stateless (all state in CockroachDB). Multiple instances behind k8s Service for HA. Manages: CA, RBAC, agent registry, Pulumi executor, kubectl proxy, app deployments, log relay.
+
+**lab-agent** — Lightweight daemon on every managed machine. Connects to labd via mTLS WebSocket. Handles: heartbeat, command execution, log streaming, module application. Compiled to standalone binary with bun. Installed via systemd service.
+
+**CockroachDB** — Distributed SQL database. PostgreSQL wire-compatible (Prisma works unchanged). Single node to start, multi-node for HA. Stores: server state, RBAC, audit logs, certificates, kubeconfigs (encrypted), Pulumi state.
+
+**Bastion** — PXE provisioning server. Runs as k8s pod with hostNetwork (needs DHCP/TFTP). Managed by labd as an "app". Multiple bastions for multiple sites.
+
+### 2.2 Network Architecture
+
+**Cilium** as k8s CNI (replacing default flannel):
+- eBPF-based pod networking
+- Built-in WireGuard encryption between nodes
+- Network policies (ties into RBAC)
+- Hubble for observability
+- Future: Cluster Mesh for multi-site transparent networking
+
+No Tailscale dependency — Cilium handles node-to-node encryption. Agents connect to labd over standard TCP/TLS.
+
+### 2.3 Authentication
+
+**mTLS with built-in Certificate Authority:**
+1. labd generates root CA on first start (stored encrypted in CockroachDB)
+2. Agents enroll with join token → receive signed certificate
+3. CLI users authenticate with client certificates (or SSH key-based initial auth)
+4. All communication authenticated via mutual TLS
+5. Certificate rotation and revocation supported
+
+**Join tokens:**
+- One-time tokens: for individual bare-metal servers (generated during PXE provision, embedded in kickstart)
+- Reusable tokens: for autoscaling groups (AWS ASG instances share a token)
+- Tokens can be revoked, have optional expiry
+
+### 2.4 RBAC Model
+
+Inspired by mcpctl's RBAC (src/mcpd/src/services/, middleware/auth). Hierarchical permissions:
+
+```
+action:cloud:environment:server
+
+Examples:
+  read:*:*:*                    — read everything
+  exec:baremetal:lab:*          — exec on any lab bare-metal server
+  kubectl:*:*:*                 — kubectl proxy on any cluster
+  *:baremetal:lab:puppet        — full access to puppet server only
+  manage:*:*:*                  — manage apps, clusters, tokens
+  admin:*:*:*                   — full admin (create users, roles)
+```
+
+**Resources:** servers, environments, clouds, modules, roles, users, clusters, apps, pulumi-stacks
+**Actions:** read, exec, apply, destroy, manage, admin, kubectl
+**Deny rules:** explicit deny overrides any allow (like AWS IAM)
+
+Prisma models: Role, Permission (allow/deny), UserRole binding.
+
+### 2.5 Database
+
+**CockroachDB** chosen over PostgreSQL and Cassandra:
+- PostgreSQL wire-compatible — Prisma works, mcpctl patterns reusable
+- Multi-master replication — any node accepts reads AND writes
+- Strong consistency (not eventual like Cassandra)
+- Survives node failures (3 nodes = 1 failure, 5 nodes = 2)
+- Auto-rebalancing when adding nodes
+- Start single-node, scale to multi-node with zero code changes (just add nodes)
+
+**Schema (already scaffolded in Prisma):**
+- Server — managed machines (hostname, mac, cloud, env, role, labels, status)
+- Agent — connected agents (cert, enrollment, last seen)
+- User — platform users (username, cert fingerprint)
+- Role — RBAC roles with permissions
+- Permission — allow/deny rules (action:cloud:env:server)
+- UserRole — user-to-role bindings
+- JoinToken — enrollment tokens (one-time, reusable, revocable)
+- AuditLog — every action logged (user, session, action, resource, result, duration)
+- PulumiRun — infrastructure-as-code execution records
+- Cluster — managed k8s clusters (kubeconfig encrypted)
+
+## 3. CLI Command Reference
+
+### 3.1 Bastion (PXE Provisioning) — IMPLEMENTED
+```bash
+sudo labctl init bastion standalone start [--foreground] [--port 8080]
+sudo labctl init bastion standalone stop
+labctl init bastion standalone status
+```
+
+### 3.2 Provisioning — IMPLEMENTED
+```bash
+labctl provision list
+labctl provision install <mac> <hostname> --role worker|infra
+labctl provision reprovision <mac> <hostname> --role worker|infra
+labctl provision forget <mac>
+```
+
+### 3.3 Server Management — TO BUILD
+```bash
+labctl get servers [--env NAME] [--cloud NAME] [--label KEY=VALUE]
+labctl describe server/<name>
+```
+
+### 3.4 Remote Execution — TO BUILD
+```bash
+labctl exec server/<name> -- <command>
+labctl exec server/<name> -it -- bash          # interactive TTY
+labctl exec server/<name> --timeout 30s -- cmd
+```
+
+### 3.5 Kubernetes Proxy — TO BUILD
+```bash
+labctl kubectl --cluster <name> <kubectl-args>
+labctl clusters add <name> --kubeconfig <path>
+labctl clusters list
+labctl clusters remove <name>
+```
+
+### 3.6 Logs — TO BUILD
+```bash
+# Server logs (journalctl passthrough, no DB in hot path)
+labctl logs server/<name>                     # all journal
+labctl logs server/<name> -f                  # follow (live WebSocket relay)
+labctl logs server/<name> -n 100              # last 100 lines
+labctl logs server/<name> -u k3s              # specific unit
+labctl logs server/<name> -u sshd --since "1h ago"
+labctl logs server/<name> -k                  # kernel
+labctl logs server/<name> -p err              # errors only
+labctl logs server/<name> --file /var/log/nginx/error.log
+
+# App logs (k8s pod logs)
+labctl logs app/<name> [-f] [--container NAME]
+
+# Pulumi execution logs
+labctl logs pulumi/<run-id> [-f]
+
+# Bastion logs
+labctl logs bastion/<env> [--mac MAC]
+
+# Agent daemon logs
+labctl logs agent/<server>
+
+# Audit logs (from CockroachDB)
+labctl logs audit [--user NAME] [--action ACTION] [--since TIME]
+labctl logs audit/<user-date-sessionid>       # specific session
+```
+
+Log architecture: agent runs journalctl/tail with user-provided flags, streams stdout over WebSocket to labd, labd relays to CLI. No database in the hot path. Future: Grafana Loki integration for cold storage.
+
+### 3.7 Apps (Pulumi Charts, replacing Helm) — TO BUILD
+```bash
+labctl apps list
+labctl apps install <name> [--set key=value] [-f values.yaml]
+labctl apps status <name>
+labctl apps upgrade <name>
+labctl apps history <name>
+labctl apps rollback <name> <version>
+labctl apps uninstall <name>
+```
+
+### 3.8 Infrastructure as Code — TO BUILD
+```bash
+labctl apply -f <file.ts> --env <env>
+labctl plan -f <file.ts> --env <env>
+labctl destroy -f <file.ts> --env <env>
+```
+
+### 3.9 RBAC — TO BUILD
+```bash
+labctl get roles
+labctl get users
+labctl create role <name> --allow "action:cloud:env:server"
+labctl create role <name> --deny "destroy:*:*:*"
+labctl bind role <role> --user <user>
+labctl unbind role <role> --user <user>
+labctl get permissions
+```
+
+### 3.10 Environments and Clouds — TO BUILD
+```bash
+labctl get environments
+labctl get clouds
+labctl create environment <name> --cloud <cloud>
+```
+
+## 4. Partition Layout
+
+### Worker Role
+```
+/boot/efi       600MB  EFI
+/boot           3GB    ext4
+── LVM VG: labvg ──
+  swap          27GB
+  /             33GB   xfs
+  /var          100GB  xfs
+  /var/log      10GB   xfs
+  /home         10GB   xfs         ← preserved on reprovision
+  /srv          20GB   xfs         ← preserved on reprovision
+  /var/lib/longhorn  rest  xfs     ← preserved (Longhorn PVC storage)
+  /tmp          tmpfs 4GB
+```
+
+### Infra Role
+```
+/boot/efi       600MB  EFI
+/boot           3GB    ext4
+── LVM VG: labvg ──
+  swap          27GB
+  /             33GB   xfs
+  /var          100GB  xfs
+  /var/log      10GB   xfs
+  /home         10GB   xfs         ← preserved on reprovision
+  /srv          20GB   xfs         ← preserved on reprovision
+  /var/lib/rancher  20GB  xfs      ← preserved (k3s etcd data)
+  /tmp          tmpfs 4GB
+```
+
+## 5. Module System
+
+Configuration modules define desired state. Three tiers:
+1. **Core modules** (this repo, `modules/`): k3s-server, k3s-agent, labd, lab-agent, bastion
+2. **Official modules** (separate repos): monitoring, cilium, DNS
+3. **Custom modules** (user repos): pulled by git URL
+
+Module structure:
+```
+module.yaml          # name, version, targets (roles/labels), deps
+src/index.ts         # entry point
+src/install.ts       # installation logic
+src/configure.ts     # configuration logic
+src/health.ts        # health check
+tests/               # vitest tests (mandatory)
+```
+
+## 6. Testing Strategy
+
+### 6.1 Testing Pyramid
+```
+Unit Tests        → pure logic, milliseconds, every commit
+Smoke Tests       → containers (podman-compose), minutes, every commit
+Integration Tests → VMs (libvirt), 10-15 min, PRs
+E2E Tests         → real hardware/cloud, 20-30 min, pre-release
+```
+
+### 6.2 Smoke Test Stack (podman-compose)
+```yaml
+services:
+  cockroachdb:
+    image: cockroachdb/cockroach:latest-v24.3
+  labd:
+    build: .
+    depends_on: [cockroachdb]
+  agent-1:
+    build: ./agent
+    depends_on: [labd]
+  agent-2:
+    build: ./agent
+    depends_on: [labd]
+```
+Tests: agent enrollment, certificate issuance, heartbeat, exec, logs, RBAC deny/allow.
+
+### 6.3 Security Tests (RBAC)
+- Deny exec without permission
+- Deny cross-environment access
+- Deny rules override allow rules
+- Cannot escalate own permissions
+- Audit logs all denied attempts
+- Certificate-based auth cannot be spoofed
+- Join tokens cannot be reused (one-time)
+- Expired tokens rejected
+
+### 6.4 Ephemeral Test Environments
+```bash
+labctl test smoke                                    # podman-compose
+labctl test integration                              # libvirt VMs
+labctl env create pr-123 --cloud containers          # CI ephemeral
+labctl env create pr-123 --cloud aws                 # cloud ephemeral (future)
+```
+
+### 6.5 Health Gates for Deployment
+Before promoting to production, ALL must pass:
+- labd API responds
+- Expected number of agents connected
+- k3s nodes Ready
+- Certificates valid (>30 days)
+- RBAC smoke test passes
+- No error logs in last 5 minutes
+
+## 7. Cloud/Environment Model
+
+```
+Cloud: baremetal
+  └── Environment: lab
+       ├── Server: labmaster.ad.itaz.eu (infra, labels={k3s=server})
+       └── Server: ser9.ad.itaz.eu (worker, labels={k3s=agent})
+
+Cloud: aws (future)
+  └── Environment: production
+       ├── Server: i-abc123 (from ASG web-servers)
+       └── Server: i-def456 (from ASG web-servers)
+```
+
+Each bastion creates an environment under baremetal cloud. AWS autoscaling groups create environments under aws cloud.
+
+## 8. App Model (Pulumi Charts)
+
+Each app is a Pulumi TypeScript program:
+```
+app.yaml             # name, version, inputs schema, required permissions
+src/index.ts         # Pulumi program
+values.yaml          # defaults
+tests/               # vitest tests
+```
+
+First apps to build:
+- bastion — PXE provisioning (wrap existing code)
+- labd — master daemon (self-deployment)
+- cockroachdb — database
+- cilium — CNI
+
+## 9. Implementation Phases
+
+### Phase 1: Foundation (PARTIALLY DONE)
+- [x] PXE bastion (discover, install, reprovision)
+- [x] CLI structure (labctl init/provision)
+- [x] labd scaffold (Fastify + CockroachDB/Prisma schema)
+- [x] Multi-arch builds, packaging, CI/CD
+- [ ] Certificate Authority in labd
+- [ ] lab-agent skeleton (connect, heartbeat, enrollment)
+- [ ] Agent enrollment via join tokens
+- [ ] RBAC engine
+- [ ] labctl exec (remote execution)
+- [ ] labctl logs (resource-scoped streaming)
+- [ ] labctl get servers (with filters)
+- [ ] Smoke test stack (podman-compose)
+
+### Phase 2: Deployment
+- [ ] Reprovision labmaster as labmaster.ad.itaz.eu
+- [ ] Deploy k3s with Cilium CNI
+- [ ] Deploy CockroachDB on k3s
+- [ ] Deploy labd on k3s
+- [ ] Deploy bastion as managed app
+- [ ] Auto-enroll agents during PXE provision
+
+### Phase 3: Infrastructure as Code
+- [ ] Module system
+- [ ] Pulumi charts (replacing Helm)
+- [ ] labctl apps install/upgrade/rollback
+- [ ] labctl apply -f (Pulumi execution)
+- [ ] kubectl proxy (audited)
+- [ ] Kubeconfig store (encrypted)
+
+### Phase 4: Multi-Cloud
+- [ ] AWS provider (Pulumi)
+- [ ] Reusable join tokens for ASGs
+- [ ] Cilium Cluster Mesh
+- [ ] Ephemeral test environments
+- [ ] Grafana Loki for cold logs
+
+## 10. Technology Stack
+
+| Component | Technology | Notes |
+|-----------|-----------|-------|
+| Language | TypeScript (ESM) | Same for CLI, daemon, agents, IaC |
+| CLI | Commander.js | Matches mcpctl patterns |
+| HTTP Server | Fastify + WebSocket | labd and bastion |
+| Database | CockroachDB | PostgreSQL compatible, Prisma ORM |
+| ORM | Prisma | Reuse mcpctl patterns |
+| IaC | Pulumi (TypeScript) | Replaces Helm and Puppet |
+| k8s CNI | Cilium | eBPF, WireGuard, network policies |
+| Auth | mTLS (built-in CA) | Certificate-based, no SSH keys |
+| Packaging | nfpm (RPM/DEB) | bun compile for standalone binary |
+| Containers | Podman + podman-compose | No Docker dependency |
+| CI/CD | Gitea Actions | Self-hosted on mysources.co.uk |
+| Testing | Vitest | Unit + smoke + integration |
+| Registry | Gitea packages | RPM, DEB, container images |
+
+## 11. Lessons from mcpctl
+
+The mcpctl project (../mcpctl/) established patterns reused here:
+
+**Project structure:** pnpm monorepo with workspace packages (shared, cli, daemon). Each package has own package.json, tsconfig.json, vitest.config.ts.
+
+**CLI patterns:** Commander.js with factory functions (createXxxCommand). Global options (--project → --env/--cloud). Resource CRUD (get, describe, delete, create, apply).
+
+**Server patterns:** Fastify with route registration functions. Services layer with repository pattern. Middleware for auth. Health endpoints.
+
+**Database:** Prisma ORM with PostgreSQL (now CockroachDB, wire-compatible). Migration-first schema. Seed data for initial setup.
+
+**RBAC:** Role-based with permission strings. Middleware checks on every request. Audit logging in middleware.
+
+**Testing:** Vitest with separate configs for unit vs smoke. Smoke tests with real database and services. Security tests for RBAC.
+
+**CI/CD:** Gitea Actions with lint→typecheck→test→build→publish pipeline. nfpm for RPM/DEB. Bun compile for standalone binaries. Podman for container images.
+
+**Deployment:** Docker/Podman compose for dev stack. Portainer API for production deploy (we'll use k3s instead). systemd for local daemons.
+
+**Completions:** Generated from Commander tree. Bash + Fish. --write and --check modes. Included in packages.
+
+**Key learnings applied:**
+- Start with proper monorepo structure (not flat scripts)
+- Type safety across packages via workspace references
+- Test-driven (unit tests before features)
+- CI from the start (not retrofitted)
+- RBAC and audit from the start (not bolted on)
+- Database-first design (schema defines the domain)
+
+## 12. Gitea Registry
+
+**Registry:** mysources.co.uk (self-hosted Gitea at 10.0.0.194)
+**Token:** stored at ~/.gitea-token, env var PACKAGES_TOKEN
+**Packages:** RPM and DEB published to Gitea packages API
+**Container images:** pushed to Gitea container registry
+**API pattern:** Same as mcpctl publish scripts (check existing, delete, re-upload, link to repo)
--- a/.taskmaster/state.json
+++ b/.taskmaster/state.json
@@ -0,0 +1,6 @@
+{
+  "currentTag": "master",
+  "lastSwitched": "2026-03-18T00:17:54.213Z",
+  "branchTagMapping": {},
+  "migrationNoticeShown": true
+}
--- a/.taskmaster/tasks/tasks.json
+++ b/.taskmaster/tasks/tasks.json
@@ -0,0 +1,180 @@
+{
+  "master": {
+    "tasks": [
+      {
+        "id": 72,
+        "title": "Expand Prisma Schema with Resource Relationships",
+        "description": "Add Network, ServerNic, ServerDisk, and ClusterMember models to the Prisma schema. Add bastionId foreign key to Server model to track which bastion owns each server.",
+        "details": "Edit `bastion/src/labd/prisma/schema.prisma` to add:\n\n1. **Server model changes**:\n   - Add `bastionId String?` with relation to Bastion\n   - Add `hardwareInfo Json?` for storing raw HardwareInfo\n   - Add `os String?` for installed OS\n\n2. **Network model**:\n```prisma\nmodel Network {\n  id          String   @id @default(uuid())\n  name        String   @unique\n  cidr        String\n  vlan        Int?\n  gateway     String?\n  domain      String?\n  dhcpEnabled Boolean  @default(false)\n  createdAt   DateTime @default(now())\n  updatedAt   DateTime @updatedAt\n  \n  nics ServerNic[]\n}\n```\n\n3. **ServerNic model**:\n```prisma\nmodel ServerNic {\n  id        String  @id @default(uuid())\n  serverId  String\n  server    Server  @relation(fields: [serverId], references: [id], onDelete: Cascade)\n  networkId String?\n  network   Network? @relation(fields: [networkId], references: [id])\n  mac       String\n  ip        String?\n  name      String\n  state     String  @default(\"DOWN\")\n  \n  @@unique([serverId, mac])\n  @@index([networkId])\n}\n```\n\n4. **ServerDisk model**:\n```prisma\nmodel ServerDisk {\n  id       String @id @default(uuid())\n  serverId String\n  server   Server @relation(fields: [serverId], references: [id], onDelete: Cascade)\n  name     String\n  sizeGb   Float\n  model    String?\n  \n  @@unique([serverId, name])\n}\n```\n\n5. **ClusterMember model**:\n```prisma\nmodel ClusterMember {\n  id        String @id @default(uuid())\n  clusterId String\n  cluster   Cluster @relation(fields: [clusterId], references: [id], onDelete: Cascade)\n  serverId  String\n  server    Server  @relation(fields: [serverId], references: [id], onDelete: Cascade)\n  role      String  @default(\"worker\") // control-plane, worker\n  joinedAt  DateTime @default(now())\n  \n  @@unique([clusterId, serverId])\n  @@index([clusterId])\n  @@index([serverId])\n}\n```\n\n6. Update Server model with relations to nics, disks, clusterMemberships, and bastion.\n\nRun `pnpm prisma generate` and `pnpm prisma migrate dev --name add-resource-models`.",
+        "testStrategy": "1. Run `pnpm prisma validate` to verify schema syntax\n2. Run `pnpm prisma generate` to confirm client generation\n3. Create migration and verify it applies cleanly to local CockroachDB\n4. Write unit tests that create/read/delete each new model\n5. Verify cascade deletes work (deleting Server removes its NICs and Disks)",
+        "priority": "high",
+        "dependencies": [],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 73,
+        "title": "Implement State Persistence Service in labd",
+        "description": "Create a new service in labd that persists bastion state syncs to the Server table in CockroachDB. When bastion-state-sync messages arrive, upsert machines into Server with their hardware info, status, and ownership.",
+        "details": "Create `bastion/src/labd/src/services/state-persistence.ts`:\n\n```typescript\nimport type { PrismaClient } from \"@prisma/client\";\nimport type { BastionState, HardwareInfo, InstallConfig, InstalledInfo } from \"@lab/shared\";\nimport { logger } from \"./logger.js\";\n\nexport class StatePersistence {\n  constructor(private readonly db: PrismaClient) {}\n\n  async syncBastionState(bastionId: string, state: BastionState): Promise<void> {\n    // Process discovered machines\n    for (const [mac, hw] of Object.entries(state.discovered)) {\n      await this.upsertDiscoveredServer(bastionId, mac, hw);\n    }\n    \n    // Process queued machines (update status to provisioning)\n    for (const [mac, cfg] of Object.entries(state.install_queue)) {\n      await this.upsertQueuedServer(bastionId, mac, cfg);\n    }\n    \n    // Process installed machines\n    for (const [mac, info] of Object.entries(state.installed)) {\n      await this.upsertInstalledServer(bastionId, mac, info);\n    }\n  }\n\n  private async upsertDiscoveredServer(bastionId: string, mac: string, hw: HardwareInfo): Promise<void> {\n    const normalized = mac.toLowerCase();\n    \n    await this.db.server.upsert({\n      where: { mac: normalized },\n      create: {\n        hostname: `unknown-${normalized.replace(/:/g, \"\").slice(-6)}`,\n        mac: normalized,\n        bastionId,\n        status: \"discovered\",\n        hardwareInfo: hw as any,\n        labels: {\n          arch: hw.arch,\n          cpu_model: hw.cpu_model,\n          cpu_cores: hw.cpu_cores,\n          memory_gb: hw.memory_gb,\n        },\n      },\n      update: {\n        bastionId,\n        status: \"discovered\", // only if not already provisioning/installed\n        hardwareInfo: hw as any,\n      },\n    });\n    \n    // Sync NICs and Disks\n    await this.syncServerHardware(normalized, hw);\n  }\n  \n  private async syncServerHardware(mac: string, hw: HardwareInfo): Promise<void> {\n    const server = await this.db.server.findUnique({ where: { mac } });\n    if (!server) return;\n    \n    // Upsert NICs\n    for (const nic of hw.nics) {\n      await this.db.serverNic.upsert({\n        where: { serverId_mac: { serverId: server.id, mac: nic.mac.toLowerCase() } },\n        create: { serverId: server.id, mac: nic.mac.toLowerCase(), name: nic.name, state: nic.state },\n        update: { name: nic.name, state: nic.state },\n      });\n    }\n    \n    // Upsert Disks\n    for (const disk of hw.disks) {\n      await this.db.serverDisk.upsert({\n        where: { serverId_name: { serverId: server.id, name: disk.name } },\n        create: { serverId: server.id, name: disk.name, sizeGb: disk.size_gb, model: disk.model },\n        update: { sizeGb: disk.size_gb, model: disk.model },\n      });\n    }\n  }\n  \n  // Similar methods for upsertQueuedServer and upsertInstalledServer...\n}\n```\n\nIntegrate into `server.ts` WebSocket handler by calling `statePersistence.syncBastionState()` when `bastion-state-sync` messages arrive.",
+        "testStrategy": "1. Unit test StatePersistence with mocked PrismaClient\n2. Integration test: simulate bastion-state-sync message, verify Server rows created\n3. Test idempotency: send same state twice, verify no duplicates\n4. Test status transitions: discovered -> provisioning -> installed\n5. Verify hardware info (NICs, Disks) is correctly persisted",
+        "priority": "high",
+        "dependencies": [
+          72
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 74,
+        "title": "Add State Loading from labd on Bastion Startup",
+        "description": "Modify bastion startup to request its persisted state from labd before using the local JSON cache. This ensures bastions restore their state after pod restarts.",
+        "details": "1. Add new labd API endpoint `GET /api/bastions/:id/state` that returns the aggregated state for a specific bastion from the Server table:\n\n```typescript\n// bastion/src/labd/src/routes/bastions.ts\napp.get<{ Params: { id: string } }>(\"/api/bastions/:id/state\", async (request, reply) => {\n  const { id } = request.params;\n  \n  const servers = await db.server.findMany({\n    where: { bastionId: id },\n    include: { nics: true, disks: true },\n  });\n  \n  // Transform back to BastionState format\n  const state: BastionState = { discovered: {}, install_queue: {}, installed: {} };\n  for (const server of servers) {\n    const mac = server.mac;\n    if (!mac) continue;\n    \n    switch (server.status) {\n      case \"discovered\":\n        state.discovered[mac] = transformToHardwareInfo(server);\n        break;\n      case \"provisioning\":\n        state.install_queue[mac] = transformToInstallConfig(server);\n        break;\n      case \"installed\":\n        state.installed[mac] = transformToInstalledInfo(server);\n        break;\n    }\n  }\n  \n  return reply.send(state);\n});\n```\n\n2. Modify `BastionConnection.connect()` in `labd-connection.ts` to fetch state after enrollment:\n\n```typescript\nprivate async loadRemoteState(): Promise<BastionState | null> {\n  if (!this.bastionId || !this.config.labdUrl) return null;\n  try {\n    const resp = await fetch(`${this.config.labdUrl}/api/bastions/${this.bastionId}/state`);\n    if (resp.ok) return await resp.json();\n  } catch { /* fall back to local */ }\n  return null;\n}\n```\n\n3. In bastion `main.ts`, after establishing labd connection, merge remote state with local state (remote takes precedence for installed machines, local wins for in-progress installs).",
+        "testStrategy": "1. Integration test: start bastion, let it persist state, restart bastion, verify state restored\n2. Test merge logic: local has in-progress install, remote has discovered - verify install preserved\n3. Test offline mode: labd unavailable, bastion falls back to local JSON\n4. Test fresh start: no local state, no remote state - bastion starts with empty state",
+        "priority": "high",
+        "dependencies": [
+          73
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 75,
+        "title": "Fix Bastion --dir Environment Variable Default",
+        "description": "Fix the bug where CLI's --dir default overrides the BASTION_DIR environment variable. The CLI option should use the env var as its default.",
+        "details": "Edit `bastion/src/cli/src/commands/serve.ts`:\n\n```typescript\n// Before (line 14):\n.option(\"--dir <dir>\", \"Bastion data directory\", \"/tmp/lab-bastion\")\n\n// After:\n.option(\n  \"--dir <dir>\",\n  \"Bastion data directory\",\n  process.env[\"BASTION_DIR\"] ?? \"/tmp/lab-bastion\"\n)\n```\n\nThis ensures:\n1. If `BASTION_DIR` env var is set (e.g., in k8s deployment), it's used as default\n2. Explicit `--dir` flag still overrides both\n3. Falls back to `/tmp/lab-bastion` if neither is set\n\nAlso update the k8s deployment manifest `bastion/deploy/k3s/deployment.yaml` to ensure `BASTION_DIR=/data` is properly set.",
+        "testStrategy": "1. Unit test: verify option default reads from process.env\n2. Integration test: set BASTION_DIR, run labctl without --dir, verify correct dir used\n3. Integration test: set BASTION_DIR, run labctl with --dir /custom, verify /custom used\n4. Test no env var: verify default /tmp/lab-bastion used",
+        "priority": "high",
+        "dependencies": [],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 76,
+        "title": "Create Resource Type Registry with Aliases",
+        "description": "Create a centralized resource type registry that maps resource names, plurals, and short aliases to canonical types. This enables kubectl-style resource resolution.",
+        "details": "Create `bastion/src/cli/src/utils/resources.ts`:\n\n```typescript\nexport interface ResourceDefinition {\n  kind: string;           // Canonical type: \"Server\", \"Cluster\", etc.\n  singular: string;       // \"server\"\n  plural: string;         // \"servers\"\n  aliases: string[];      // [\"srv\"]\n  apiPath: string;        // \"/api/servers\"\n  columns: TableColumn[]; // Default columns for 'get' output\n  wideColumns?: TableColumn[]; // Extra columns for -o wide\n}\n\nconst RESOURCE_DEFINITIONS: ResourceDefinition[] = [\n  {\n    kind: \"Server\",\n    singular: \"server\",\n    plural: \"servers\",\n    aliases: [\"srv\"],\n    apiPath: \"/api/servers\",\n    columns: serverColumns,\n    wideColumns: serverWideColumns,\n  },\n  {\n    kind: \"Cluster\",\n    singular: \"cluster\",\n    plural: \"clusters\",\n    aliases: [],\n    apiPath: \"/api/clusters\",\n    columns: clusterColumns,\n  },\n  {\n    kind: \"Network\",\n    singular: \"network\",\n    plural: \"networks\",\n    aliases: [\"net\"],\n    apiPath: \"/api/networks\",\n    columns: networkColumns,\n  },\n  // ... bastion, role, user, token, audit\n];\n\nconst aliasMap = new Map<string, ResourceDefinition>();\nfor (const def of RESOURCE_DEFINITIONS) {\n  aliasMap.set(def.singular, def);\n  aliasMap.set(def.plural, def);\n  for (const alias of def.aliases) {\n    aliasMap.set(alias, def);\n  }\n}\n\nexport function resolveResourceType(input: string): ResourceDefinition {\n  const normalized = input.toLowerCase();\n  const def = aliasMap.get(normalized);\n  if (!def) {\n    const valid = RESOURCE_DEFINITIONS.map(d => d.plural).join(\", \");\n    throw new Error(`Unknown resource type \"${input}\". Valid types: ${valid}`);\n  }\n  return def;\n}\n\nexport function resolveResourceIdentifier(input: string): {\n  type: ResourceDefinition;\n  name?: string;\n} {\n  // Handle \"server/labmaster\" or just \"servers\"\n  const parts = input.split(\"/\");\n  const type = resolveResourceType(parts[0]);\n  const name = parts.length > 1 ? parts.slice(1).join(\"/\") : undefined;\n  return { type, name };\n}\n```\n\nUpdate `bastion/src/cli/src/utils/resource.ts` to use the new registry.",
+        "testStrategy": "1. Unit test resolveResourceType with all aliases: server, servers, srv -> Server\n2. Test unknown resource type throws descriptive error\n3. Test case insensitivity: SERVER, Server, server all resolve correctly\n4. Test resolveResourceIdentifier parses \"server/labmaster\" correctly",
+        "priority": "high",
+        "dependencies": [],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 77,
+        "title": "Implement 'labctl get' Command",
+        "description": "Create the core 'labctl get <resource> [name]' command that lists resources with filtering and output format support. This is the foundation of the kubectl-style CLI.",
+        "details": "Create `bastion/src/cli/src/commands/get.ts`:\n\n```typescript\nimport { Command } from \"commander\";\nimport { resolveResourceType, type ResourceDefinition } from \"../utils/resources.js\";\nimport { getLabdClient } from \"../api/config.js\";\nimport { formatOutput, type TableColumn } from \"../utils/table.js\";\n\nexport function registerGetCommand(program: Command): void {\n  program\n    .command(\"get <resource> [name]\")\n    .description(\"List resources or get a specific resource by name\")\n    .option(\"--status <status>\", \"Filter by status\")\n    .option(\"--role <role>\", \"Filter by role (servers only)\")\n    .option(\"--cloud <cloud>\", \"Filter by cloud\")\n    .option(\"--env <environment>\", \"Filter by environment\")\n    .option(\"-l, --label <label>\", \"Filter by label (key=value)\")\n    .option(\"-A, --all-namespaces\", \"List across all clouds/environments\")\n    .action(async (resource: string, name: string | undefined, opts) => {\n      const config = program.opts()[\"_config\"];\n      const resourceDef = resolveResourceType(resource);\n      const client = getLabdClient();\n      \n      try {\n        let data: unknown[];\n        \n        if (name) {\n          // Get specific resource - could be name, ID, or MAC\n          const item = await client.getResource(resourceDef, name);\n          data = item ? [item] : [];\n        } else {\n          // List with filters\n          data = await client.listResources(resourceDef, {\n            status: opts.status,\n            role: opts.role,\n            cloud: opts.allNamespaces ? undefined : (opts.cloud ?? config.defaultCloud),\n            environment: opts.allNamespaces ? undefined : (opts.env ?? config.defaultEnvironment),\n            label: opts.label,\n          });\n        }\n        \n        if (data.length === 0) {\n          console.log(`No ${resourceDef.plural} found.`);\n          return;\n        }\n        \n        const columns = config.outputFormat === \"wide\" && resourceDef.wideColumns\n          ? [...resourceDef.columns, ...resourceDef.wideColumns]\n          : resourceDef.columns;\n        \n        formatOutput(data, config.outputFormat, columns);\n      } catch (err) {\n        console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);\n        process.exit(1);\n      }\n    });\n}\n```\n\nAdd to `index.ts`: `registerGetCommand(program);`\n\nExtend LabdClient with generic resource methods.",
+        "testStrategy": "1. Integration test: `labctl get servers` returns list from labd\n2. Test filtering: `labctl get servers --status discovered` only shows discovered\n3. Test name lookup: `labctl get server labmaster` returns single server\n4. Test MAC lookup: `labctl get server 38:05:25:33:e2:e4` resolves by MAC\n5. Test output formats: -o json, -o yaml, -o wide produce correct output\n6. Test unknown resource: `labctl get foo` shows helpful error",
+        "priority": "high",
+        "dependencies": [
+          76
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 78,
+        "title": "Implement 'labctl describe' Command",
+        "description": "Create the 'labctl describe <resource> <name>' command that shows detailed information about a resource including relationships, hardware info, and history.",
+        "details": "Create `bastion/src/cli/src/commands/describe.ts`:\n\n```typescript\nimport { Command } from \"commander\";\nimport { resolveResourceType } from \"../utils/resources.js\";\nimport { getLabdClient } from \"../api/config.js\";\n\nconst BOLD = \"\\x1b[1m\";\nconst DIM = \"\\x1b[2m\";\nconst RESET = \"\\x1b[0m\";\n\ninterface DescribeSection {\n  title: string;\n  fields: Array<[string, string | undefined]>;\n}\n\nfunction printDescribe(name: string, sections: DescribeSection[]): void {\n  console.log(`${BOLD}Name:${RESET} ${name}`);\n  for (const section of sections) {\n    console.log(`\\n${BOLD}${section.title}:${RESET}`);\n    for (const [key, value] of section.fields) {\n      if (value !== undefined) {\n        console.log(`  ${DIM}${key}:${RESET} ${value}`);\n      }\n    }\n  }\n}\n\nexport function registerDescribeCommand(program: Command): void {\n  program\n    .command(\"describe <resource> <name>\")\n    .description(\"Show detailed information about a resource\")\n    .action(async (resource: string, name: string) => {\n      const resourceDef = resolveResourceType(resource);\n      const client = getLabdClient();\n      \n      try {\n        const item = await client.describeResource(resourceDef, name);\n        if (!item) {\n          console.error(`${resourceDef.singular} \"${name}\" not found.`);\n          process.exit(1);\n        }\n        \n        // Resource-specific formatting\n        switch (resourceDef.kind) {\n          case \"Server\":\n            printServerDescription(item);\n            break;\n          case \"Cluster\":\n            printClusterDescription(item);\n            break;\n          default:\n            console.log(JSON.stringify(item, null, 2));\n        }\n      } catch (err) {\n        console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);\n        process.exit(1);\n      }\n    });\n}\n\nfunction printServerDescription(server: any): void {\n  const sections: DescribeSection[] = [\n    {\n      title: \"Metadata\",\n      fields: [\n        [\"ID\", server.id],\n        [\"Cloud\", server.cloud],\n        [\"Environment\", server.environment],\n        [\"Role\", server.role],\n        [\"Status\", server.status],\n        [\"Created\", server.createdAt],\n        [\"Last Seen\", server.lastHeartbeat],\n      ],\n    },\n    {\n      title: \"Hardware\",\n      fields: [\n        [\"MAC\", server.mac],\n        [\"IP\", server.ip],\n        [\"Architecture\", server.hardwareInfo?.arch],\n        [\"CPU\", server.hardwareInfo?.cpu_model],\n        [\"Cores\", String(server.hardwareInfo?.cpu_cores)],\n        [\"Memory\", `${server.hardwareInfo?.memory_gb}GB`],\n        [\"Product\", server.hardwareInfo?.product],\n      ],\n    },\n  ];\n  \n  if (server.nics?.length > 0) {\n    sections.push({\n      title: \"Network Interfaces\",\n      fields: server.nics.map((n: any) => [n.name, `${n.mac} ${n.ip ?? \"\"} (${n.state})`]),\n    });\n  }\n  \n  if (server.disks?.length > 0) {\n    sections.push({\n      title: \"Disks\",\n      fields: server.disks.map((d: any) => [d.name, `${d.sizeGb}GB ${d.model ?? \"\"}`]),\n    });\n  }\n  \n  if (server.clusterMemberships?.length > 0) {\n    sections.push({\n      title: \"Cluster Membership\",\n      fields: server.clusterMemberships.map((m: any) => [m.cluster.name, m.role]),\n    });\n  }\n  \n  printDescribe(server.hostname, sections);\n}\n```",
+        "testStrategy": "1. Integration test: `labctl describe server labmaster` shows full details\n2. Test hardware info display: CPU, memory, disks, NICs all shown\n3. Test cluster membership: server in cluster shows membership section\n4. Test not found: `labctl describe server nonexistent` shows helpful error\n5. Test different resource types: describe cluster, network, bastion",
+        "priority": "medium",
+        "dependencies": [
+          77
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 79,
+        "title": "Implement 'labctl create/delete' Commands",
+        "description": "Create the 'labctl create <resource>' and 'labctl delete <resource> <name>' commands for creating and removing resources like networks, clusters, and tokens.",
+        "details": "Create `bastion/src/cli/src/commands/create.ts`:\n\n```typescript\nimport { Command } from \"commander\";\nimport { resolveResourceType } from \"../utils/resources.js\";\nimport { getLabdClient } from \"../api/config.js\";\n\nexport function registerCreateCommand(program: Command): void {\n  const create = program\n    .command(\"create <resource>\")\n    .description(\"Create a resource\");\n  \n  // labctl create network --name lab --cidr 192.168.8.0/24\n  create\n    .command(\"network\")\n    .description(\"Create a network\")\n    .requiredOption(\"--name <name>\", \"Network name\")\n    .requiredOption(\"--cidr <cidr>\", \"Network CIDR (e.g., 192.168.8.0/24)\")\n    .option(\"--gateway <gateway>\", \"Gateway IP\")\n    .option(\"--vlan <vlan>\", \"VLAN ID\", parseInt)\n    .option(\"--domain <domain>\", \"DNS domain\")\n    .option(\"--dhcp\", \"Enable DHCP\")\n    .action(async (opts) => {\n      const client = getLabdClient();\n      try {\n        const network = await client.createNetwork({\n          name: opts.name,\n          cidr: opts.cidr,\n          gateway: opts.gateway,\n          vlan: opts.vlan,\n          domain: opts.domain,\n          dhcpEnabled: opts.dhcp ?? false,\n        });\n        console.log(`network/${network.name} created`);\n      } catch (err) {\n        console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);\n        process.exit(1);\n      }\n    });\n  \n  // labctl create token --label \"worker enrollment\" --type reusable\n  create\n    .command(\"token\")\n    .description(\"Create a join token\")\n    .option(\"--label <label>\", \"Token label/description\")\n    .option(\"--type <type>\", \"Token type: one-time or reusable\", \"one-time\")\n    .option(\"--expires <duration>\", \"Expiration (e.g., 24h, 7d)\")\n    .action(async (opts) => {\n      const client = getLabdClient();\n      try {\n        const token = await client.createToken(opts);\n        console.log(`Token created: ${token.token}`);\n        if (opts.label) console.log(`Label: ${opts.label}`);\n        if (token.expiresAt) console.log(`Expires: ${token.expiresAt}`);\n      } catch (err) {\n        console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);\n        process.exit(1);\n      }\n    });\n}\n```\n\nCreate `bastion/src/cli/src/commands/delete.ts`:\n\n```typescript\nexport function registerDeleteCommand(program: Command): void {\n  program\n    .command(\"delete <resource> <name>\")\n    .description(\"Delete a resource\")\n    .option(\"--force\", \"Skip confirmation\")\n    .action(async (resource: string, name: string, opts) => {\n      const resourceDef = resolveResourceType(resource);\n      const client = getLabdClient();\n      \n      if (!opts.force) {\n        const { confirm } = await import(\"../utils/prompts.js\");\n        const yes = await confirm(`Delete ${resourceDef.singular} \"${name}\"?`);\n        if (!yes) {\n          console.log(\"Cancelled.\");\n          return;\n        }\n      }\n      \n      try {\n        await client.deleteResource(resourceDef, name);\n        console.log(`${resourceDef.singular}/${name} deleted`);\n      } catch (err) {\n        console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);\n        process.exit(1);\n      }\n    });\n}\n```",
+        "testStrategy": "1. Integration test: `labctl create network` creates network in DB\n2. Test validation: missing required flags shows helpful error\n3. Test token creation: token returned is valid UUID, stored in DB\n4. Test delete with confirmation: prompts user, respects --force\n5. Test delete cascade: deleting server removes NICs, disks\n6. Test delete protection: cannot delete bastion with connected servers",
+        "priority": "medium",
+        "dependencies": [
+          77
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 80,
+        "title": "Refactor Provision Commands to kubectl-style",
+        "description": "Refactor existing provision commands to use kubectl-style syntax: 'labctl provision <server>' instead of 'labctl provision install <mac>'.",
+        "details": "The new command structure should be:\n- `labctl provision <server> --os fedora-43 --role worker` (queue install)\n- `labctl reprovision <server>` (reinstall)\n- `labctl forget <server>` (remove from tracking)\n\nModify `bastion/src/cli/src/commands/install.ts` → rename to `provision.ts`:\n\n```typescript\nexport function registerProvisionCommand(program: Command): void {\n  program\n    .command(\"provision <server>\")\n    .description(\"Queue a server for OS installation\")\n    .requiredOption(\"--os <os>\", \"Operating system\", \"fedora-43\")\n    .requiredOption(\"--role <role>\", \"Server role\", \"worker\")\n    .option(\"--disk <disk>\", \"Target disk (auto-detected if not specified)\")\n    .option(\"--hostname <hostname>\", \"Override hostname\")\n    .action(async (server: string, opts) => {\n      const client = getLabdClient();\n      \n      // Resolve server: could be hostname, MAC, or ID\n      const resolved = await client.resolveServer(server);\n      if (!resolved) {\n        console.error(`Server \"${server}\" not found.`);\n        console.error(\"Tip: Use 'labctl get servers' to see available servers.\");\n        process.exit(1);\n      }\n      \n      if (resolved.status === \"installed\") {\n        console.error(`Server \"${resolved.hostname}\" is already installed.`);\n        console.error(\"Tip: Use 'labctl reprovision' to reinstall.\");\n        process.exit(1);\n      }\n      \n      try {\n        await client.provisionServer(resolved.mac, {\n          hostname: opts.hostname ?? resolved.hostname,\n          os: opts.os,\n          role: opts.role,\n          disk: opts.disk,\n        });\n        console.log(`Server ${resolved.hostname} queued for ${opts.os} installation as ${opts.role}.`);\n      } catch (err) {\n        console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);\n        process.exit(1);\n      }\n    });\n}\n```\n\nSimilarly update reprovision.ts and forget.ts to accept server name/MAC/ID.\n\nUpdate index.ts to register commands at top level instead of under 'provision' subcommand.",
+        "testStrategy": "1. Test server resolution: provision by hostname, MAC, or UUID all work\n2. Test already installed: provisioning installed server shows reprovision hint\n3. Test unknown server: helpful error message with tip\n4. Test reprovision: reinstalls installed server\n5. Test forget: removes server from all state categories\n6. Backward compat: verify 'labctl provision list' still works (deprecation warning)",
+        "priority": "medium",
+        "dependencies": [
+          77
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 81,
+        "title": "Implement Server and Resource API Endpoints in labd",
+        "description": "Add REST API endpoints in labd for full resource CRUD operations: networks, clusters, tokens. Extend servers endpoint with filters and relationship includes.",
+        "details": "Create/extend labd route files:\n\n1. **Extend servers.ts**:\n```typescript\n// GET /api/servers - with extended filters and includes\napp.get(\"/api/servers\", async (request, reply) => {\n  const { status, role, cloud, environment, label, include } = request.query;\n  \n  const where = {};\n  if (status) where.status = status;\n  if (role) where.role = role;\n  if (cloud) where.cloud = cloud;\n  if (environment) where.environment = environment;\n  if (label) where.labels = { path: [labelKey], equals: labelValue };\n  \n  const servers = await db.server.findMany({\n    where,\n    include: {\n      nics: include?.includes(\"nics\"),\n      disks: include?.includes(\"disks\"),\n      clusterMemberships: include?.includes(\"clusters\") ? { include: { cluster: true } } : false,\n      bastion: include?.includes(\"bastion\"),\n    },\n  });\n  return servers;\n});\n\n// GET /api/servers/:id - by ID, hostname, or MAC\napp.get(\"/api/servers/:identifier\", async (request, reply) => {\n  const { identifier } = request.params;\n  \n  // Try UUID first\n  let server = await db.server.findUnique({ where: { id: identifier }, include: fullInclude });\n  // Try hostname\n  if (!server) server = await db.server.findUnique({ where: { hostname: identifier }, include: fullInclude });\n  // Try MAC\n  if (!server) server = await db.server.findUnique({ where: { mac: identifier.toLowerCase() }, include: fullInclude });\n  \n  if (!server) return reply.code(404).send({ error: \"Server not found\" });\n  return server;\n});\n```\n\n2. **Create networks.ts**:\n```typescript\n// GET /api/networks, POST /api/networks, DELETE /api/networks/:id\nexport function registerNetworkRoutes(app: FastifyInstance, db: DbClient): void {\n  app.get(\"/api/networks\", async () => db.network.findMany());\n  \n  app.post(\"/api/networks\", async (request, reply) => {\n    const { name, cidr, gateway, vlan, domain, dhcpEnabled } = request.body;\n    // Validate CIDR format\n    const network = await db.network.create({ data: { name, cidr, gateway, vlan, domain, dhcpEnabled } });\n    return reply.code(201).send(network);\n  });\n  \n  app.delete(\"/api/networks/:id\", async (request, reply) => {\n    await db.network.delete({ where: { id: request.params.id } });\n    return reply.code(204).send();\n  });\n}\n```\n\n3. **Create clusters.ts**:\n```typescript\n// Similar CRUD for clusters with member management\napp.get(\"/api/clusters/:id/members\", ...);\napp.post(\"/api/clusters/:id/members\", ...);\napp.delete(\"/api/clusters/:id/members/:serverId\", ...);\n```",
+        "testStrategy": "1. Integration test all CRUD endpoints with HTTP client\n2. Test server resolution: by id, hostname, and MAC all return same server\n3. Test include parameter: nics, disks, clusters included when requested\n4. Test validation: invalid CIDR rejected, duplicate names rejected\n5. Test cascade: delete network with NICs fails or cascades appropriately",
+        "priority": "medium",
+        "dependencies": [
+          72,
+          73
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 82,
+        "title": "Implement RBAC Permission Checks in CLI",
+        "description": "Wire RBAC permission checks into CLI commands. Check user permissions before executing operations using the existing Permission model.",
+        "details": "1. Create `bastion/src/cli/src/middleware/rbac.ts`:\n\n```typescript\nimport { getLabdClient } from \"../api/config.js\";\n\nexport interface PermissionContext {\n  action: string;      // read, exec, apply, destroy, manage, admin\n  cloud?: string;\n  environment?: string;\n  server?: string;\n}\n\nexport async function checkPermission(ctx: PermissionContext): Promise<boolean> {\n  const client = getLabdClient();\n  try {\n    const result = await client.checkPermission(ctx);\n    return result.allowed;\n  } catch {\n    // If can't reach labd, fail open for local operations\n    return true;\n  }\n}\n\nexport async function requirePermission(ctx: PermissionContext): Promise<void> {\n  const allowed = await checkPermission(ctx);\n  if (!allowed) {\n    throw new Error(\n      `Permission denied: ${ctx.action} on ${ctx.server ?? \"*\"}@${ctx.cloud ?? \"*\"}/${ctx.environment ?? \"*\"}`\n    );\n  }\n}\n```\n\n2. Add labd endpoint `POST /api/auth/check-permission`:\n```typescript\napp.post(\"/api/auth/check-permission\", async (request, reply) => {\n  const user = await authenticateRequest(request); // from cert or token\n  const { action, cloud, environment, server } = request.body;\n  \n  const permissions = await db.permission.findMany({\n    where: {\n      role: { userBindings: { some: { userId: user.id } } },\n    },\n  });\n  \n  const allowed = permissions.some(p => \n    matchesPattern(p.action, action) &&\n    matchesPattern(p.cloud, cloud ?? \"*\") &&\n    matchesPattern(p.environment, environment ?? \"*\") &&\n    matchesPattern(p.server, server ?? \"*\")\n  );\n  \n  return { allowed };\n});\n```\n\n3. Integrate into commands:\n```typescript\n// In provision command\nawait requirePermission({ action: \"apply\", cloud, environment, server: resolved.hostname });\n\n// In delete command\nawait requirePermission({ action: \"destroy\", cloud, environment, server: name });\n\n// In get command (filter results)\nconst servers = await client.listServers(filters);\nconst visible = await filterByPermission(servers, \"read\");\n```",
+        "testStrategy": "1. Unit test permission matching logic with wildcards\n2. Test admin role: has access to all resources\n3. Test operator role: can read/exec but not destroy\n4. Test viewer role: can only read, provision denied\n5. Test scope matching: permission for cloud=aws doesn't grant access to cloud=baremetal\n6. Test denied action is audit-logged",
+        "priority": "medium",
+        "dependencies": [
+          77,
+          81
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 83,
+        "title": "Implement Audit Logging for Resource Operations",
+        "description": "Log all resource mutations to the AuditLog table. Include user, action, resource type/name, result, and source IP.",
+        "details": "1. Create `bastion/src/labd/src/services/audit.ts`:\n\n```typescript\nimport type { PrismaClient } from \"@prisma/client\";\n\nexport interface AuditEntry {\n  userId?: string;\n  serverId?: string;\n  sessionId?: string;\n  action: string;         // create, update, delete, provision, exec, rbac-denied\n  resourceType: string;   // server, cluster, network, token, etc.\n  resourceName: string;\n  args?: string;          // sanitized args (no secrets)\n  result: \"success\" | \"denied\" | \"error\";\n  durationMs?: number;\n  sourceIp?: string;\n}\n\nexport class AuditService {\n  constructor(private readonly db: PrismaClient) {}\n  \n  async log(entry: AuditEntry): Promise<void> {\n    await this.db.auditLog.create({\n      data: {\n        userId: entry.userId,\n        serverId: entry.serverId,\n        sessionId: entry.sessionId,\n        action: entry.action,\n        resourceType: entry.resourceType,\n        resourceName: entry.resourceName,\n        args: entry.args,\n        result: entry.result,\n        durationMs: entry.durationMs,\n        sourceIp: entry.sourceIp,\n      },\n    });\n  }\n  \n  async query(filters: {\n    userId?: string;\n    action?: string;\n    resourceType?: string;\n    since?: Date;\n    limit?: number;\n  }): Promise<AuditEntry[]> {\n    return this.db.auditLog.findMany({\n      where: {\n        userId: filters.userId,\n        action: filters.action,\n        resourceType: filters.resourceType,\n        timestamp: filters.since ? { gte: filters.since } : undefined,\n      },\n      orderBy: { timestamp: \"desc\" },\n      take: filters.limit ?? 100,\n    });\n  }\n}\n```\n\n2. Add Fastify hook to wrap route handlers:\n```typescript\napp.addHook(\"onResponse\", async (request, reply) => {\n  // Log mutations (POST, PUT, DELETE)\n  if ([\"POST\", \"PUT\", \"DELETE\"].includes(request.method)) {\n    const path = request.url;\n    const resourceMatch = path.match(/\\/api\\/(\\w+)(?:\\/([^/]+))?/);\n    if (resourceMatch) {\n      await auditService.log({\n        action: methodToAction(request.method),\n        resourceType: resourceMatch[1],\n        resourceName: resourceMatch[2] ?? \"\",\n        result: reply.statusCode < 400 ? \"success\" : \"error\",\n        sourceIp: request.ip,\n      });\n    }\n  }\n});\n```\n\n3. Add `labctl get audit` command to view audit logs.",
+        "testStrategy": "1. Integration test: create network, verify audit log entry created\n2. Test RBAC denial is logged with result=denied\n3. Test sensitive data sanitization: tokens/passwords not in args\n4. Test query filters: by user, action, resourceType, time range\n5. Test `labctl get audit` displays recent entries correctly",
+        "priority": "medium",
+        "dependencies": [
+          81,
+          82
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 84,
+        "title": "Update CLI Entry Point and Help Text",
+        "description": "Update the CLI entry point to register all new commands and update help text to reflect the kubectl-style interface. Add deprecation warnings for old command structure.",
+        "details": "Update `bastion/src/cli/src/index.ts`:\n\n```typescript\nimport { Command } from \"commander\";\nimport { APP_VERSION } from \"@lab/shared\";\nimport { loadConfig } from \"./config/index.js\";\n\n// New kubectl-style commands\nimport { registerGetCommand } from \"./commands/get.js\";\nimport { registerDescribeCommand } from \"./commands/describe.js\";\nimport { registerCreateCommand } from \"./commands/create.js\";\nimport { registerDeleteCommand } from \"./commands/delete.js\";\nimport { registerApplyCommand } from \"./commands/apply.js\";\nimport { registerEditCommand } from \"./commands/edit.js\";\n\n// Action commands\nimport { registerProvisionCommand } from \"./commands/provision.js\";\nimport { registerReprovisionCommand } from \"./commands/reprovision.js\";\nimport { registerForgetCommand } from \"./commands/forget.js\";\n\n// Bastion management\nimport { registerBastionCommand } from \"./commands/bastion.js\"; // start/stop/status\n\n// App management (unchanged)\nimport { registerAppCommand } from \"./commands/app.js\";\n\n// Utility\nimport { registerConfigCommand } from \"./commands/config.js\";\nimport { registerLoginCommand } from \"./commands/login.js\";\nimport { registerDoctorCommand } from \"./commands/doctor.js\";\n\nexport function createProgram(): Command {\n  const program = new Command();\n  \n  program\n    .name(\"labctl\")\n    .description(\"Lab infrastructure management CLI\")\n    .version(APP_VERSION);\n  \n  // Global options\n  program\n    .option(\"-o, --output <format>\", \"output format (table, json, yaml, wide)\", \"table\")\n    .option(\"--server <url>\", \"override labd server URL\")\n    .option(\"--env <name>\", \"override default environment\")\n    .option(\"--cloud <name>\", \"override default cloud\")\n    .option(\"--debug\", \"enable debug output\")\n    .option(\"--no-color\", \"disable colored output\");\n  \n  // Core CRUD commands\n  registerGetCommand(program);        // labctl get <resource> [name]\n  registerDescribeCommand(program);   // labctl describe <resource> <name>\n  registerCreateCommand(program);     // labctl create <resource>\n  registerDeleteCommand(program);     // labctl delete <resource> <name>\n  registerApplyCommand(program);      // labctl apply -f <file>\n  registerEditCommand(program);       // labctl edit <resource> <name>\n  \n  // Provisioning actions\n  registerProvisionCommand(program);  // labctl provision <server>\n  registerReprovisionCommand(program);// labctl reprovision <server>\n  registerForgetCommand(program);     // labctl forget <server>\n  \n  // Bastion management\n  registerBastionCommand(program);    // labctl bastion start|stop|status\n  \n  // App management\n  registerAppCommand(program);        // labctl app install|health k3s\n  \n  // Utility\n  registerConfigCommand(program);\n  registerLoginCommand(program);\n  registerDoctorCommand(program);\n  \n  // Legacy compatibility with deprecation warnings\n  registerLegacyCommands(program);\n  \n  return program;\n}\n\nfunction registerLegacyCommands(program: Command): void {\n  // labctl provision list -> labctl get servers (with warning)\n  program\n    .command(\"provision\")\n    .command(\"list\")\n    .action(() => {\n      console.warn(\"DEPRECATED: Use 'labctl get servers' instead.\");\n      // Delegate to get servers\n    });\n}\n```\n\nUpdate shell completions in `scripts/generate-completions.ts` for new command structure.",
+        "testStrategy": "1. Test --help shows all new commands with descriptions\n2. Test resource type help: `labctl get --help` lists valid resources\n3. Test deprecated commands show warning but still work\n4. Test shell completions generated for new commands\n5. Test global options: -o, --server, --env, --cloud all work",
+        "priority": "low",
+        "dependencies": [
+          77,
+          78,
+          79,
+          80
+        ],
+        "status": "pending",
+        "subtasks": []
+      }
+    ],
+    "metadata": {
+      "created": "2026-03-26T04:26:49.813Z",
+      "updated": "2026-03-26T04:26:49.813Z",
+      "description": "Tasks for master context"
+    }
+  }
+}
--- a/.taskmaster/templates/example_prd.txt
+++ b/.taskmaster/templates/example_prd.txt
@@ -0,0 +1,47 @@
+<context>
+# Overview  
+[Provide a high-level overview of your product here. Explain what problem it solves, who it's for, and why it's valuable.]
+
+# Core Features  
+[List and describe the main features of your product. For each feature, include:
+- What it does
+- Why it's important
+- How it works at a high level]
+
+# User Experience  
+[Describe the user journey and experience. Include:
+- User personas
+- Key user flows
+- UI/UX considerations]
+</context>
+<PRD>
+# Technical Architecture  
+[Outline the technical implementation details:
+- System components
+- Data models
+- APIs and integrations
+- Infrastructure requirements]
+
+# Development Roadmap  
+[Break down the development process into phases:
+- MVP requirements
+- Future enhancements
+- Do not think about timelines whatsoever -- all that matters is scope and detailing exactly what needs to be build in each phase so it can later be cut up into tasks]
+
+# Logical Dependency Chain
+[Define the logical order of development:
+- Which features need to be built first (foundation)
+- Getting as quickly as possible to something usable/visible front end that works
+- Properly pacing and scoping each feature so it is atomic but can also be built upon and improved as development approaches]
+
+# Risks and Mitigations  
+[Identify potential risks and how they'll be addressed:
+- Technical challenges
+- Figuring out the MVP that we can build upon
+- Resource constraints]
+
+# Appendix  
+[Include any additional information:
+- Research findings
+- Technical specifications]
+</PRD>
--- a/.taskmaster/templates/example_prd_rpg.txt
+++ b/.taskmaster/templates/example_prd_rpg.txt
@@ -0,0 +1,511 @@
+<rpg-method>
+# Repository Planning Graph (RPG) Method - PRD Template
+
+This template teaches you (AI or human) how to create structured, dependency-aware PRDs using the RPG methodology from Microsoft Research. The key insight: separate WHAT (functional) from HOW (structural), then connect them with explicit dependencies.
+
+## Core Principles
+
+1. **Dual-Semantics**: Think functional (capabilities) AND structural (code organization) separately, then map them
+2. **Explicit Dependencies**: Never assume - always state what depends on what
+3. **Topological Order**: Build foundation first, then layers on top
+4. **Progressive Refinement**: Start broad, refine iteratively
+
+## How to Use This Template
+
+- Follow the instructions in each `<instruction>` block
+- Look at `<example>` blocks to see good vs bad patterns
+- Fill in the content sections with your project details
+- The AI reading this will learn the RPG method by following along
+- Task Master will parse the resulting PRD into dependency-aware tasks
+
+## Recommended Tools for Creating PRDs
+
+When using this template to **create** a PRD (not parse it), use **code-context-aware AI assistants** for best results:
+
+**Why?** The AI needs to understand your existing codebase to make good architectural decisions about modules, dependencies, and integration points.
+
+**Recommended tools:**
+- **Claude Code** (claude-code CLI) - Best for structured reasoning and large contexts
+- **Cursor/Windsurf** - IDE integration with full codebase context
+- **Gemini CLI** (gemini-cli) - Massive context window for large codebases
+- **Codex/Grok CLI** - Strong code generation with context awareness
+
+**Note:** Once your PRD is created, `task-master parse-prd` works with any configured AI model - it just needs to read the PRD text itself, not your codebase.
+</rpg-method>
+
+---
+
+<overview>
+<instruction>
+Start with the problem, not the solution. Be specific about:
+- What pain point exists?
+- Who experiences it?
+- Why existing solutions don't work?
+- What success looks like (measurable outcomes)?
+
+Keep this section focused - don't jump into implementation details yet.
+</instruction>
+
+## Problem Statement
+[Describe the core problem. Be concrete about user pain points.]
+
+## Target Users
+[Define personas, their workflows, and what they're trying to achieve.]
+
+## Success Metrics
+[Quantifiable outcomes. Examples: "80% task completion via autopilot", "< 5% manual intervention rate"]
+
+</overview>
+
+---
+
+<functional-decomposition>
+<instruction>
+Now think about CAPABILITIES (what the system DOES), not code structure yet.
+
+Step 1: Identify high-level capability domains
+- Think: "What major things does this system do?"
+- Examples: Data Management, Core Processing, Presentation Layer
+
+Step 2: For each capability, enumerate specific features
+- Use explore-exploit strategy:
+  * Exploit: What features are REQUIRED for core value?
+  * Explore: What features make this domain COMPLETE?
+
+Step 3: For each feature, define:
+- Description: What it does in one sentence
+- Inputs: What data/context it needs
+- Outputs: What it produces/returns
+- Behavior: Key logic or transformations
+
+<example type="good">
+Capability: Data Validation
+  Feature: Schema validation
+    - Description: Validate JSON payloads against defined schemas
+    - Inputs: JSON object, schema definition
+    - Outputs: Validation result (pass/fail) + error details
+    - Behavior: Iterate fields, check types, enforce constraints
+
+  Feature: Business rule validation
+    - Description: Apply domain-specific validation rules
+    - Inputs: Validated data object, rule set
+    - Outputs: Boolean + list of violated rules
+    - Behavior: Execute rules sequentially, short-circuit on failure
+</example>
+
+<example type="bad">
+Capability: validation.js
+  (Problem: This is a FILE, not a CAPABILITY. Mixing structure into functional thinking.)
+
+Capability: Validation
+  Feature: Make sure data is good
+  (Problem: Too vague. No inputs/outputs. Not actionable.)
+</example>
+</instruction>
+
+## Capability Tree
+
+### Capability: [Name]
+[Brief description of what this capability domain covers]
+
+#### Feature: [Name]
+- **Description**: [One sentence]
+- **Inputs**: [What it needs]
+- **Outputs**: [What it produces]
+- **Behavior**: [Key logic]
+
+#### Feature: [Name]
+- **Description**:
+- **Inputs**:
+- **Outputs**:
+- **Behavior**:
+
+### Capability: [Name]
+...
+
+</functional-decomposition>
+
+---
+
+<structural-decomposition>
+<instruction>
+NOW think about code organization. Map capabilities to actual file/folder structure.
+
+Rules:
+1. Each capability maps to a module (folder or file)
+2. Features within a capability map to functions/classes
+3. Use clear module boundaries - each module has ONE responsibility
+4. Define what each module exports (public interface)
+
+The goal: Create a clear mapping between "what it does" (functional) and "where it lives" (structural).
+
+<example type="good">
+Capability: Data Validation
+  → Maps to: src/validation/
+    ├── schema-validator.js      (Schema validation feature)
+    ├── rule-validator.js         (Business rule validation feature)
+    └── index.js                  (Public exports)
+
+Exports:
+  - validateSchema(data, schema)
+  - validateRules(data, rules)
+</example>
+
+<example type="bad">
+Capability: Data Validation
+  → Maps to: src/utils.js
+  (Problem: "utils" is not a clear module boundary. Where do I find validation logic?)
+
+Capability: Data Validation
+  → Maps to: src/validation/everything.js
+  (Problem: One giant file. Features should map to separate files for maintainability.)
+</example>
+</instruction>
+
+## Repository Structure
+
+```
+project-root/
+├── src/
+│   ├── [module-name]/       # Maps to: [Capability Name]
+│   │   ├── [file].js        # Maps to: [Feature Name]
+│   │   └── index.js         # Public exports
+│   └── [module-name]/
+├── tests/
+└── docs/
+```
+
+## Module Definitions
+
+### Module: [Name]
+- **Maps to capability**: [Capability from functional decomposition]
+- **Responsibility**: [Single clear purpose]
+- **File structure**:
+  ```
+  module-name/
+  ├── feature1.js
+  ├── feature2.js
+  └── index.js
+  ```
+- **Exports**:
+  - `functionName()` - [what it does]
+  - `ClassName` - [what it does]
+
+</structural-decomposition>
+
+---
+
+<dependency-graph>
+<instruction>
+This is THE CRITICAL SECTION for Task Master parsing.
+
+Define explicit dependencies between modules. This creates the topological order for task execution.
+
+Rules:
+1. List modules in dependency order (foundation first)
+2. For each module, state what it depends on
+3. Foundation modules should have NO dependencies
+4. Every non-foundation module should depend on at least one other module
+5. Think: "What must EXIST before I can build this module?"
+
+<example type="good">
+Foundation Layer (no dependencies):
+  - error-handling: No dependencies
+  - config-manager: No dependencies
+  - base-types: No dependencies
+
+Data Layer:
+  - schema-validator: Depends on [base-types, error-handling]
+  - data-ingestion: Depends on [schema-validator, config-manager]
+
+Core Layer:
+  - algorithm-engine: Depends on [base-types, error-handling]
+  - pipeline-orchestrator: Depends on [algorithm-engine, data-ingestion]
+</example>
+
+<example type="bad">
+- validation: Depends on API
+- API: Depends on validation
+(Problem: Circular dependency. This will cause build/runtime issues.)
+
+- user-auth: Depends on everything
+(Problem: Too many dependencies. Should be more focused.)
+</example>
+</instruction>
+
+## Dependency Chain
+
+### Foundation Layer (Phase 0)
+No dependencies - these are built first.
+
+- **[Module Name]**: [What it provides]
+- **[Module Name]**: [What it provides]
+
+### [Layer Name] (Phase 1)
+- **[Module Name]**: Depends on [[module-from-phase-0], [module-from-phase-0]]
+- **[Module Name]**: Depends on [[module-from-phase-0]]
+
+### [Layer Name] (Phase 2)
+- **[Module Name]**: Depends on [[module-from-phase-1], [module-from-foundation]]
+
+[Continue building up layers...]
+
+</dependency-graph>
+
+---
+
+<implementation-roadmap>
+<instruction>
+Turn the dependency graph into concrete development phases.
+
+Each phase should:
+1. Have clear entry criteria (what must exist before starting)
+2. Contain tasks that can be parallelized (no inter-dependencies within phase)
+3. Have clear exit criteria (how do we know phase is complete?)
+4. Build toward something USABLE (not just infrastructure)
+
+Phase ordering follows topological sort of dependency graph.
+
+<example type="good">
+Phase 0: Foundation
+  Entry: Clean repository
+  Tasks:
+    - Implement error handling utilities
+    - Create base type definitions
+    - Setup configuration system
+  Exit: Other modules can import foundation without errors
+
+Phase 1: Data Layer
+  Entry: Phase 0 complete
+  Tasks:
+    - Implement schema validator (uses: base types, error handling)
+    - Build data ingestion pipeline (uses: validator, config)
+  Exit: End-to-end data flow from input to validated output
+</example>
+
+<example type="bad">
+Phase 1: Build Everything
+  Tasks:
+    - API
+    - Database
+    - UI
+    - Tests
+  (Problem: No clear focus. Too broad. Dependencies not considered.)
+</example>
+</instruction>
+
+## Development Phases
+
+### Phase 0: [Foundation Name]
+**Goal**: [What foundational capability this establishes]
+
+**Entry Criteria**: [What must be true before starting]
+
+**Tasks**:
+- [ ] [Task name] (depends on: [none or list])
+  - Acceptance criteria: [How we know it's done]
+  - Test strategy: [What tests prove it works]
+
+- [ ] [Task name] (depends on: [none or list])
+
+**Exit Criteria**: [Observable outcome that proves phase complete]
+
+**Delivers**: [What can users/developers do after this phase?]
+
+---
+
+### Phase 1: [Layer Name]
+**Goal**:
+
+**Entry Criteria**: Phase 0 complete
+
+**Tasks**:
+- [ ] [Task name] (depends on: [[tasks-from-phase-0]])
+- [ ] [Task name] (depends on: [[tasks-from-phase-0]])
+
+**Exit Criteria**:
+
+**Delivers**:
+
+---
+
+[Continue with more phases...]
+
+</implementation-roadmap>
+
+---
+
+<test-strategy>
+<instruction>
+Define how testing will be integrated throughout development (TDD approach).
+
+Specify:
+1. Test pyramid ratios (unit vs integration vs e2e)
+2. Coverage requirements
+3. Critical test scenarios
+4. Test generation guidelines for Surgical Test Generator
+
+This section guides the AI when generating tests during the RED phase of TDD.
+
+<example type="good">
+Critical Test Scenarios for Data Validation module:
+  - Happy path: Valid data passes all checks
+  - Edge cases: Empty strings, null values, boundary numbers
+  - Error cases: Invalid types, missing required fields
+  - Integration: Validator works with ingestion pipeline
+</example>
+</instruction>
+
+## Test Pyramid
+
+```
+        /\
+       /E2E\       ← [X]% (End-to-end, slow, comprehensive)
+      /------\
+     /Integration\ ← [Y]% (Module interactions)
+    /------------\
+   /  Unit Tests  \ ← [Z]% (Fast, isolated, deterministic)
+  /----------------\
+```
+
+## Coverage Requirements
+- Line coverage: [X]% minimum
+- Branch coverage: [X]% minimum
+- Function coverage: [X]% minimum
+- Statement coverage: [X]% minimum
+
+## Critical Test Scenarios
+
+### [Module/Feature Name]
+**Happy path**:
+- [Scenario description]
+- Expected: [What should happen]
+
+**Edge cases**:
+- [Scenario description]
+- Expected: [What should happen]
+
+**Error cases**:
+- [Scenario description]
+- Expected: [How system handles failure]
+
+**Integration points**:
+- [What interactions to test]
+- Expected: [End-to-end behavior]
+
+## Test Generation Guidelines
+[Specific instructions for Surgical Test Generator about what to focus on, what patterns to follow, project-specific test conventions]
+
+</test-strategy>
+
+---
+
+<architecture>
+<instruction>
+Describe technical architecture, data models, and key design decisions.
+
+Keep this section AFTER functional/structural decomposition - implementation details come after understanding structure.
+</instruction>
+
+## System Components
+[Major architectural pieces and their responsibilities]
+
+## Data Models
+[Core data structures, schemas, database design]
+
+## Technology Stack
+[Languages, frameworks, key libraries]
+
+**Decision: [Technology/Pattern]**
+- **Rationale**: [Why chosen]
+- **Trade-offs**: [What we're giving up]
+- **Alternatives considered**: [What else we looked at]
+
+</architecture>
+
+---
+
+<risks>
+<instruction>
+Identify risks that could derail development and how to mitigate them.
+
+Categories:
+- Technical risks (complexity, unknowns)
+- Dependency risks (blocking issues)
+- Scope risks (creep, underestimation)
+</instruction>
+
+## Technical Risks
+**Risk**: [Description]
+- **Impact**: [High/Medium/Low - effect on project]
+- **Likelihood**: [High/Medium/Low]
+- **Mitigation**: [How to address]
+- **Fallback**: [Plan B if mitigation fails]
+
+## Dependency Risks
+[External dependencies, blocking issues]
+
+## Scope Risks
+[Scope creep, underestimation, unclear requirements]
+
+</risks>
+
+---
+
+<appendix>
+## References
+[Papers, documentation, similar systems]
+
+## Glossary
+[Domain-specific terms]
+
+## Open Questions
+[Things to resolve during development]
+</appendix>
+
+---
+
+<task-master-integration>
+# How Task Master Uses This PRD
+
+When you run `task-master parse-prd <file>.txt`, the parser:
+
+1. **Extracts capabilities** → Main tasks
+   - Each `### Capability:` becomes a top-level task
+
+2. **Extracts features** → Subtasks
+   - Each `#### Feature:` becomes a subtask under its capability
+
+3. **Parses dependencies** → Task dependencies
+   - `Depends on: [X, Y]` sets task.dependencies = ["X", "Y"]
+
+4. **Orders by phases** → Task priorities
+   - Phase 0 tasks = highest priority
+   - Phase N tasks = lower priority, properly sequenced
+
+5. **Uses test strategy** → Test generation context
+   - Feeds test scenarios to Surgical Test Generator during implementation
+
+**Result**: A dependency-aware task graph that can be executed in topological order.
+
+## Why RPG Structure Matters
+
+Traditional flat PRDs lead to:
+- ❌ Unclear task dependencies
+- ❌ Arbitrary task ordering
+- ❌ Circular dependencies discovered late
+- ❌ Poorly scoped tasks
+
+RPG-structured PRDs provide:
+- ✅ Explicit dependency chains
+- ✅ Topological execution order
+- ✅ Clear module boundaries
+- ✅ Validated task graph before implementation
+
+## Tips for Best Results
+
+1. **Spend time on dependency graph** - This is the most valuable section for Task Master
+2. **Keep features atomic** - Each feature should be independently testable
+3. **Progressive refinement** - Start broad, use `task-master expand` to break down complex tasks
+4. **Use research mode** - `task-master parse-prd --research` leverages AI for better task generation
+</task-master-integration>
--- a/STATUS.md
+++ b/STATUS.md
@@ -0,0 +1,244 @@
+# labctl Platform — Implementation Status
+
+## What This Document Is
+
+An honest assessment of what code exists, what works, what is stubbed, and what
+hasn't been started — measured against the PRD phases.
+
+---
+
+## Architecture Overview (as built)
+
+```
+labctl CLI ──HTTP──▶ bastion (PXE server)     ← WORKING
+labctl CLI ──HTTP──▶ labd (master daemon)     ← PARTIALLY WORKING
+                       │
+                       ├── CockroachDB/Prisma  ← SCHEMA DEFINED, NOT DEPLOYED
+                       ├── /ws/agent WebSocket  ← ACCEPTS CONNECTIONS, DOES NOT ROUTE
+                       └── mTLS CA              ← NOT IMPLEMENTED
+
+lab-agent ──WS──▶ labd                        ← LIBRARY CODE, NO DAEMON BINARY
+```
+
+---
+
+## Package Inventory
+
+| Package | Lines of Source | Tests | Status |
+|---------|---------------|-------|--------|
+| @lab/shared | ~200 | 0 | Complete — types, protocol, errors |
+| @lab/bastion | ~800 | 32 | **Production-ready** — PXE discovery, install, reprovision |
+| @lab/cli | ~600 | 0 (uses bastion tests) | Complete — all commands implemented |
+| @lab/labd | ~500 | 2 | Partial — routes exist, core features stubbed |
+| @lab/agent | ~300 | 0 | Library only — no daemon binary |
+
+All 5 packages compile. 32 tests pass.
+
+---
+
+## Phase 1: Foundation
+
+### DONE — Working in production
+
+| Feature | Code | How It Works |
+|---------|------|-------------|
+| PXE bastion server | `src/bastion/` | Fastify HTTP + dnsmasq DHCP/TFTP. Machines PXE boot, get iPXE script from `/dispatch?mac=XX`, chain to discovery or install kickstart. State persisted to JSON file. |
+| Machine discovery | `routes/dispatch.ts`, `templates/discover.ks.ts` | Unknown MACs get a mini-kickstart that boots a RAM-only Fedora, scrapes hardware via `/proc`, `/sys`, `dmidecode`, POSTs to `/api/discover`, then reboots. No disk touch. |
+| Machine installation | `routes/api.ts`, `templates/install.ks.ts` | Queue a MAC via `POST /api/install`. Next PXE boot gets a full Kickstart with LVM partitioning (worker: longhorn LV, infra: rancher LV), SSH keys, k3s kernel prereqs, progress callbacks. |
+| Reprovision with data preservation | `commands/reprovision.ts`, `install.ks.ts` | `%pre` script detects existing LVM. Reformats `/`, `/var`, `/boot` but preserves `/home`, `/srv`, `/var/lib/longhorn`, `/var/lib/rancher`. |
+| CLI: init/provision commands | `src/cli/src/commands/` | `labctl init bastion standalone start/stop/status`, `labctl provision list/install/reprovision/forget`. All talk to bastion HTTP API. |
+| CLI: config management | `config/index.ts`, `commands/config.ts` | `labctl config list/get/set/path`. YAML config at `~/.labctl/config.yaml` with env var overrides. |
+| labd scaffold | `src/labd/` | Fastify server with health, server listing, token management routes. Prisma schema for all models. Starts with or without database. |
+| Prisma schema | `prisma/schema.prisma` | 10 models: Server, Agent, User, Role, Permission, UserRole, JoinToken, AuditLog, PulumiRun, Cluster. CockroachDB provider. |
+| Database seeding | `prisma/seed.ts` | Creates admin/viewer/operator roles with proper allow/deny permissions. Idempotent via upsert. |
+| Multi-arch builds + packaging | `nfpm.yaml`, `scripts/` | nfpm config for RPM/DEB. Bun compile for standalone binary (102MB labctl in `dist/`). |
+| Gitea CI/CD | `.gitea/` (on remote) | Lint → typecheck → test → build → publish pipeline on mysources.co.uk. |
+
+### DONE — Code exists, not yet connected end-to-end
+
+| Feature | Code | What's Real | What's Missing |
+|---------|------|------------|----------------|
+| lab-agent connection library | `lab-agent/src/services/connection.ts` | `AgentConnection` class: WebSocket to labd, heartbeat (10s), exponential backoff reconnect (1-30s), state machine (disconnected/connecting/connected/reconnecting), handles server-shutdown messages. | **No daemon binary.** This is a library — nothing starts it. No systemd unit. No enrollment flow. |
+| lab-agent command executor | `lab-agent/src/services/executor.ts` | `CommandExecutor` class: `spawn()` with timeout handling (SIGTERM then SIGKILL after 5s), stdout/stderr streaming via EventEmitter, stdin writing, signal forwarding. | **Not wired to WebSocket.** The executor and connection don't talk to each other. No message dispatch. |
+| Agent registry (labd) | `labd/src/services/agent-registry.ts` | `AgentRegistry`: in-memory Map tracking by serverId and hostname, lifecycle events, heartbeat updates. Singleton exported. | **Not used by /ws/agent handler.** The WebSocket handler in `server.ts` just logs messages — it doesn't call `agentRegistry.register()`. |
+| Message router (labd) | `labd/src/services/message-router.ts` | `MessageRouter`: handler registration, pending request tracking with timeouts, streaming support, log subscription, agent cleanup on disconnect. | **Not used.** `server.ts` doesn't call `messageRouter.handleMessage()`. The router exists but is dead code. |
+| Token management | `labd/src/routes/auth.ts` | Create, list, revoke join tokens. Validates one-time vs reusable, expiry, revocation. Marks tokens as used. | Token validation works. **But enrollment returns `certificatePem: null`** — no actual certificate is issued. |
+| CLI API client | `cli/src/api/client.ts` | `LabdClient` with mTLS support, typed methods for servers/tokens/health/enrollment. | Works for REST endpoints. **No CLI commands use it yet** — existing commands still talk directly to bastion HTTP. |
+| CLI WebSocket streaming | `cli/src/api/websocket.ts` | `streamExec()` and `streamLogs()` functions. | **No `labctl exec` or `labctl logs` commands exist.** The streaming code has no consumer. |
+| Zod validation | `labd/src/validation/` | Schemas for createToken, enrollment, serverFilters, createRole, permission patterns. Middleware for body/query validation. | **Not applied to routes.** The schemas and middleware exist but no route uses `preHandler: [validateBody(schema)]`. |
+| Encryption service | `labd/src/services/encryption.ts` | AES-256-GCM with scrypt key derivation. Encrypt/decrypt roundtrip. Singleton from `CA_ENCRYPTION_KEY` env var. | **Not used anywhere.** No CA key is encrypted, no kubeconfig is stored. |
+| Graceful shutdown | `labd/src/services/shutdown.ts` | SIGTERM/SIGINT handlers, agent notification, message router cleanup, DB disconnect, force exit timer. | Works but agent notification is a no-op since no agents are registered (see above). |
+| Rate limiting | `labd/src/middleware/rate-limit.ts` | `@fastify/rate-limit`: 100/min global, 10/min for enrollment, 20/min for tokens. | **Wired up in `server.ts`.** This actually works. |
+| Health checks | `labd/src/routes/health.ts` | `/healthz`, `/health`, `/health/detailed`, `/health/live`, `/health/ready`. Checks DB latency and agent count. | Works. Returns `agents: { connected: 0 }` since no agents ever register. |
+| Error hierarchy | `shared/src/errors/` | `LabError`, `NotFoundError`, `PermissionDeniedError`, `ValidationError`, `AgentNotConnectedError`. | **Not used in routes.** Routes still use inline `reply.code(404).send({error: ...})`. |
+| Table formatting | `cli/src/utils/table.ts` | `printTable`, `formatStatus`, `formatRelativeTime`, predefined column sets. | **Not used by existing commands.** `provision list` has its own inline formatting. |
+| Resource parsing | `cli/src/utils/resource.ts` | Parse `server/labmaster`, `app/kube-system/nginx` format. | **Not used.** No commands accept `type/name` arguments yet. |
+| Doctor command | `cli/src/commands/doctor.ts` | Config, cert, connectivity diagnostics. | Works standalone. |
+| Login command | `cli/src/commands/login.ts` | Generates EC keypair, prompts for token, POSTs to `/api/auth/user-enroll`. | **labd has no `/api/auth/user-enroll` endpoint.** Only `/api/auth/enroll` exists (for agents). Login will 404. |
+
+### NOT DONE — Phase 1 items from PRD with no code
+
+| Feature | PRD Description | Status |
+|---------|----------------|--------|
+| Certificate Authority | Built-in CA in labd. Generate root CA, sign CSRs, revoke certs, rotate. | **Nothing.** No CA code. No X.509 operations. No `@peculiar/x509` dependency. `EncryptionService` exists but it's for data-at-rest, not PKI. |
+| RBAC engine | Middleware that checks permissions on every request. Deny overrides allow. | **Nothing.** `auth.ts` middleware is a placeholder. No route checks permissions. Anyone can call any endpoint. |
+| Audit logging | Log every action with user, session, action, resource, result, duration. | **Nothing.** `AuditLog` Prisma model exists but nothing writes to it. No audit middleware. |
+| `labctl exec` | Remote command execution via labd → agent WebSocket relay. | **Nothing.** No `exec` CLI command. The executor library exists in lab-agent but isn't connected. |
+| `labctl logs` | Resource-scoped log streaming (server, app, bastion, audit). | **Nothing.** No `logs` CLI command. |
+| `labctl get servers` | List servers from labd with filters. | **Nothing.** No `get` CLI command. The API client has `getServers()` but no command calls it. |
+| Smoke test stack | `podman-compose` with CockroachDB + labd + 2 agents, testing enrollment/heartbeat/exec/RBAC. | **Nothing.** `stack/docker-compose.yml` exists but only runs bastion + CockroachDB, not labd or agents. |
+| Agent enrollment during PXE | Embed join token in kickstart, agent auto-enrolls on first boot. | **Nothing.** Kickstart installs k3s prereqs but doesn't install or start lab-agent. |
+
+---
+
+## Phase 2: Deployment
+
+**Nothing from Phase 2 has been built.**
+
+| Feature | Status |
+|---------|--------|
+| Reprovision labmaster as labmaster.ad.itaz.eu | Not done — manual operation |
+| Deploy k3s with Cilium CNI | Not done — kickstart only sets up kernel prereqs, leaves a comment "run `curl -sfL https://get.k3s.io`" |
+| Deploy CockroachDB on k3s | Not done — `docker-compose.yml` runs it in-memory for dev, no k8s manifests for CRDB |
+| Deploy labd on k3s | **K8s manifests exist** (`deploy/k8s/labd/base/`) — Deployment, Service, ConfigMap, HPA, PDB. But no CockroachDB to connect to and no TLS configured. |
+| Deploy bastion as managed app | Not done — bastion runs standalone, no Pulumi chart |
+| Auto-enroll agents during PXE | Not done — no agent install in kickstart, no token embedding |
+
+---
+
+## Phase 3: Infrastructure as Code
+
+**Nothing from Phase 3 has been built.**
+
+| Feature | Status |
+|---------|--------|
+| Module system | Not done — no `module.yaml`, no module loader |
+| Pulumi charts | Not done — no Pulumi dependency, no chart structure |
+| `labctl apps install/upgrade/rollback` | Not done — no `apps` command |
+| `labctl apply -f` | Not done — no `apply` command |
+| `kubectl proxy` (audited) | Not done — no kubectl proxy |
+| Kubeconfig store (encrypted) | `EncryptionService` exists but nothing uses it. `Cluster.kubeconfigEnc` field exists in Prisma but nothing reads/writes it. |
+
+---
+
+## Phase 4: Multi-Cloud
+
+**Nothing from Phase 4 has been built.**
+
+| Feature | Status |
+|---------|--------|
+| AWS provider | Not done |
+| Reusable join tokens for ASGs | Token model supports `reusable` type, but no AWS integration |
+| Cilium Cluster Mesh | Not done |
+| Ephemeral test environments | Not done |
+| Grafana Loki | Not done |
+
+---
+
+## Infrastructure Files
+
+| File | Status |
+|------|--------|
+| `Dockerfile.labd` | Exists. Multi-stage Alpine build. Would work if you `docker build` it. |
+| `Dockerfile.bastion` | Exists. Multi-stage Fedora build. Would work. |
+| `.dockerignore` | Exists. |
+| `deploy/k8s/labd/base/` | Kustomize manifests for labd (Deployment, Service, ConfigMap, HPA, PDB). Points at a non-existent CockroachDB and has no TLS. |
+| `stack/docker-compose.yml` | Runs bastion + CockroachDB for local dev. Works. |
+| `nfpm.yaml` | RPM/DEB packaging config. Works with `nfpm pkg`. |
+
+---
+
+## The Disconnection Problem
+
+The core issue is that many services were built in isolation but never wired together:
+
+```
+┌─────────────────────────────────────────────────────────┐
+│  BUILT BUT NOT CONNECTED                                │
+│                                                         │
+│  AgentConnection ──✗──▶ /ws/agent handler               │
+│  CommandExecutor ──✗──▶ MessageRouter                   │
+│  MessageRouter   ──✗──▶ /ws/agent handler               │
+│  AgentRegistry   ──✗──▶ /ws/agent handler               │
+│  Zod schemas     ──✗──▶ Route preHandlers               │
+│  Error classes   ──✗──▶ Route error handling             │
+│  LabdClient      ──✗──▶ CLI commands (get/exec/logs)    │
+│  Table formatting──✗──▶ CLI commands                    │
+│  Resource parsing──✗──▶ CLI commands                    │
+│  EncryptionService──✗──▶ CA / kubeconfig storage        │
+│  Login command   ──✗──▶ /api/auth/user-enroll (missing) │
+│  Audit logging   ──✗──▶ Any middleware                  │
+│  RBAC engine     ──✗──▶ Any middleware                  │
+└─────────────────────────────────────────────────────────┘
+```
+
+---
+
+## What Actually Works End-to-End Today
+
+1. **PXE boot a bare-metal machine:**
+   ```
+   labctl init bastion standalone start
+   # Machine PXE boots → discovered automatically
+   labctl provision list
+   labctl provision install AA:BB:CC:DD:EE:FF worker-1 --role worker
+   # Machine reboots → installs Fedora → reports complete
+   ```
+
+2. **Manage bastion lifecycle:**
+   ```
+   labctl init bastion standalone status
+   labctl init bastion standalone stop
+   ```
+
+3. **Start labd (without database):**
+   ```
+   LABD_PORT=3100 tsx src/labd/src/main.ts
+   # Starts with stub DB, health endpoint works, token/server routes return errors
+   ```
+
+4. **Start labd (with CockroachDB):**
+   ```
+   docker-compose -f stack/docker-compose.yml up cockroachdb
+   DATABASE_URL=postgresql://root@localhost:26257/lab tsx src/labd/src/main.ts
+   # Token creation/listing/revocation works
+   # Server listing works (empty until agents register)
+   ```
+
+5. **CLI diagnostics:**
+   ```
+   labctl doctor
+   labctl config list
+   labctl version
+   ```
+
+That's it. No agent communication, no remote exec, no log streaming, no RBAC, no certificates.
+
+---
+
+## Recommended Next Steps (to make Phase 1 actually work)
+
+### Priority 1: Wire up the agent connection
+1. Update `/ws/agent` handler to use `agentRegistry.register()` and `messageRouter.handleMessage()`
+2. Create lab-agent daemon binary that uses `AgentConnection` + `CommandExecutor`
+3. Create systemd unit for lab-agent
+
+### Priority 2: Certificate Authority
+1. Add `@peculiar/x509` dependency
+2. Implement CA service: generate root CA, sign CSRs
+3. Wire enrollment route to actually sign and return certificates
+4. Store CA key encrypted using `EncryptionService`
+
+### Priority 3: RBAC + Audit
+1. Create RBAC middleware that checks `Permission` table
+2. Create audit middleware that writes to `AuditLog`
+3. Apply both to all routes
+
+### Priority 4: CLI commands for labd
+1. `labctl get servers` using `LabdClient.getServers()`
+2. `labctl exec server/<name>` using `streamExec()`
+3. `labctl logs server/<name>` using `streamLogs()`
+
+### Priority 5: Smoke test stack
+1. Update `docker-compose.yml` to include labd + 2 agents
+2. Write integration tests for enrollment → heartbeat → exec → logs
--- a/bastion.sh
+++ b/bastion.sh
@@ -27,6 +27,7 @@ HTTP_PORT="${HTTP_PORT:-8080}"
 TIMEZONE="${TIMEZONE:-Europe/London}"
 LOCALE="${LOCALE:-en_GB.UTF-8}"
 BASTION_DIR="${BASTION_DIR:-/tmp/lab-bastion}"
+DOMAIN="${DOMAIN:-ad.itaz.eu}"           # internal domain for hostnames
 DHCP_MODE="${DHCP_MODE:-proxy}"        # proxy (alongside existing DHCP) or full (bastion IS the DHCP server)
 DHCP_RANGE_START="${DHCP_RANGE_START:-}"  # only for full mode, auto-derived if empty
 DHCP_RANGE_END="${DHCP_RANGE_END:-}"
@@ -45,13 +46,19 @@ CMD="${1:-serve}"

 case "$CMD" in
    install)
-        [[ $# -ge 3 ]] || { echo "Usage: bastion.sh install <mac> <hostname> [--disk <dev>]"; exit 1; }
+        [[ $# -ge 3 ]] || { echo "Usage: bastion.sh install <mac> <hostname> [--role worker|infra] [--disk <dev>]"; exit 1; }
        MAC="$2"
        HOSTNAME="$3"
-        DISK="${5:-}"  # --disk <dev>
-        PAYLOAD="{\"mac\":\"$MAC\",\"hostname\":\"$HOSTNAME\""
-        [[ -n "$DISK" ]] && PAYLOAD="$PAYLOAD,\"disk\":\"$DISK\""
-        PAYLOAD="$PAYLOAD}"
+        shift 3
+        DISK="" ROLE="worker"
+        while [[ $# -gt 0 ]]; do
+            case "$1" in
+                --disk) DISK="$2"; shift 2 ;;
+                --role) ROLE="$2"; shift 2 ;;
+                *) echo "Unknown option: $1"; exit 1 ;;
+            esac
+        done
+        PAYLOAD=$(python3 -c "import json; print(json.dumps({k:v for k,v in {'mac':'$MAC','hostname':'$HOSTNAME','disk':'$DISK','role':'$ROLE'}.items() if v}))")
        RESULT=$(curl -sf -X POST "http://localhost:${HTTP_PORT}/api/install" \
            -H "Content-Type: application/json" \
            -d "$PAYLOAD" 2>&1) || die "Cannot reach bastion at localhost:${HTTP_PORT}. Is it running?"
@@ -93,16 +100,62 @@ print()
 print('\033[1mINSTALLED\033[0m')
 if installed:
    for mac, info in installed.items():
-        print(f'  {mac:<20} → {info.get(\"hostname\",\"?\")}  ({info.get(\"installed_at\",\"?\")})')
+        ip = info.get('ip', '')
+        ip_str = f'  ip={ip}' if ip else ''
+        print(f'  {mac:<20} → {info.get(\"hostname\",\"?\")}  role={info.get(\"role\",\"?\")}{ip_str}  ({info.get(\"installed_at\",\"?\")})')
 else:
    print('  (none)')
 print()
 " 2>/dev/null || echo "$RESULT"
        exit 0
        ;;
+    reprovision)
+        [[ $# -ge 3 ]] || { echo "Usage: bastion.sh reprovision <mac> <hostname> [--role worker|infra] [--disk <dev>]"; exit 1; }
+        MAC="$2"
+        HOSTNAME="$3"
+        shift 3
+        DISK="" ROLE="worker"
+        while [[ $# -gt 0 ]]; do
+            case "$1" in
+                --disk) DISK="$2"; shift 2 ;;
+                --role) ROLE="$2"; shift 2 ;;
+                *) echo "Unknown option: $1"; exit 1 ;;
+            esac
+        done
+
+        # Queue the install
+        PAYLOAD=$(python3 -c "import json; print(json.dumps({k:v for k,v in {'mac':'$MAC','hostname':'$HOSTNAME','disk':'$DISK','role':'$ROLE'}.items() if v}))")
+        RESULT=$(curl -sf -X POST "http://localhost:${HTTP_PORT}/api/install" \
+            -H "Content-Type: application/json" \
+            -d "$PAYLOAD" 2>&1) || die "Cannot reach bastion at localhost:${HTTP_PORT}. Is it running?"
+        echo "$RESULT" | python3 -m json.tool 2>/dev/null || echo "$RESULT"
+
+        # Try to find IP from installed state and SSH in to trigger PXE reboot
+        IP=$(curl -sf "http://localhost:${HTTP_PORT}/api/machines" 2>/dev/null | \
+            python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('installed',{}).get('${MAC}',{}).get('ip',''))" 2>/dev/null || echo "")
+        ADMIN_USER="${SUDO_USER:-$USER}"
+        [[ "$ADMIN_USER" == "root" ]] && ADMIN_USER=""
+
+        if [[ -n "$IP" && -n "$ADMIN_USER" ]]; then
+            echo ""
+            echo "Attempting SSH reboot into PXE ($ADMIN_USER@$IP)..."
+            ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 "$ADMIN_USER@$IP" \
+                'sudo efibootmgr 2>/dev/null; PXE_ENTRY=$(sudo efibootmgr | grep -iE "pxe|network|ipv4" | head -1 | grep -oP "Boot\K[0-9A-F]+"); if [ -n "$PXE_ENTRY" ]; then sudo efibootmgr --bootnext "$PXE_ENTRY" && echo "PXE set as next boot" && sudo reboot; else echo "No PXE boot entry found, rebooting anyway..." && sudo reboot; fi' 2>&1 && {
+                echo ""
+                echo "Machine is rebooting into PXE. Install will start automatically."
+            } || {
+                echo ""
+                echo "SSH failed. Reboot the machine manually into PXE (e.g. via IPMI/KVM)."
+            }
+        else
+            echo ""
+            echo "No IP known for this machine. Reboot it manually into PXE."
+        fi
+        exit 0
+        ;;
    serve) ;;  # continue below
    *)
-        echo "Usage: bastion.sh [serve|install <mac> <hostname>|list]"
+        echo "Usage: bastion.sh [serve|install|reprovision|list]"
        exit 1
        ;;
 esac
@@ -111,6 +164,17 @@ esac
 # SERVE MODE — start the bastion
 # ══════════════════════════════════════════════════════════════════

+# ──── Kill old instances ──────────────────────────────────────────
+# Find and kill any previous bastion dnsmasq and HTTP server
+OLD_DNSMASQ=$(pgrep -f 'dnsmasq --no-daemon --conf-file=/tmp/lab-bastion' 2>/dev/null || true)
+OLD_HTTP=$(pgrep -f 'python3 /tmp/lab-bastion/server.py' 2>/dev/null || true)
+if [[ -n "$OLD_DNSMASQ" || -n "$OLD_HTTP" ]]; then
+    warn "Killing old bastion processes..."
+    [[ -n "$OLD_DNSMASQ" ]] && kill $OLD_DNSMASQ 2>/dev/null && log "  Stopped old dnsmasq (PID $OLD_DNSMASQ)"
+    [[ -n "$OLD_HTTP" ]]    && kill $OLD_HTTP    2>/dev/null && log "  Stopped old HTTP server (PID $OLD_HTTP)"
+    sleep 1
+fi
+
 # ──── Preflight ───────────────────────────────────────────────────
 [[ $EUID -eq 0 ]] || die "Must run as root (need DHCP/TFTP ports). Use: sudo bash bastion.sh"

@@ -143,23 +207,59 @@ GATEWAY="$(ip route | awk '/default/ {print $3; exit}')"
 [[ -n "$SERVER_IP" ]] || die "Cannot detect IP on interface $IFACE"
 log "Interface: ${BOLD}$IFACE${NC}  IP: ${BOLD}$SERVER_IP${NC}  Network: ${BOLD}$NETWORK${NC}"

-# ──── Auto-detect SSH pubkey ──────────────────────────────────────
-SSH_PUBKEY="${SSH_PUBKEY:-}"
-if [[ -z "$SSH_PUBKEY" ]]; then
+# ──── Auto-detect SSH keys ───────────────────────────────────────
 REAL_HOME="${HOME}"
 [[ -n "${SUDO_USER:-}" ]] && REAL_HOME="$(getent passwd "$SUDO_USER" | cut -d: -f6)"
-    for keyfile in "$REAL_HOME/.ssh/id_ed25519.pub" "$REAL_HOME/.ssh/id_rsa.pub" "$REAL_HOME/.ssh/id_ecdsa.pub"; do
-        [[ -f "$keyfile" ]] && { SSH_PUBKEY="$keyfile"; break; }
-    done
+
+SSH_KEYS_CONTENT=""
+SSH_KEY_SOURCE=""
+
+# Collect SSH keys from authorized_keys + local pubkeys (deduplicated)
+SSH_KEY_SOURCE=""
+if [[ -f "$REAL_HOME/.ssh/authorized_keys" ]]; then
+    SSH_KEYS_CONTENT="$(grep -v '^#' "$REAL_HOME/.ssh/authorized_keys" | grep -v '^$')"
+    SSH_KEY_SOURCE="$REAL_HOME/.ssh/authorized_keys"
 fi

-SSH_KEY_CONTENT=""
-if [[ -n "$SSH_PUBKEY" && -f "$SSH_PUBKEY" ]]; then
-    SSH_KEY_CONTENT="$(cat "$SSH_PUBKEY")"
-    log "SSH key: ${BOLD}$SSH_PUBKEY${NC}"
+# Also include local pubkey files (they may not be in authorized_keys)
+for keyfile in "$REAL_HOME/.ssh/id_ed25519.pub" "$REAL_HOME/.ssh/id_rsa.pub" "$REAL_HOME/.ssh/id_ecdsa.pub"; do
+    if [[ -f "$keyfile" ]]; then
+        KEY_DATA="$(cat "$keyfile")"
+        KEY_FP="$(awk '{print $2}' "$keyfile")"
+        if [[ -n "$SSH_KEYS_CONTENT" ]]; then
+            # Add only if not already present
+            if ! echo "$SSH_KEYS_CONTENT" | grep -qF "$KEY_FP"; then
+                SSH_KEYS_CONTENT="$SSH_KEYS_CONTENT"$'\n'"$KEY_DATA"
+                SSH_KEY_SOURCE="${SSH_KEY_SOURCE} + $keyfile"
+            fi
        else
-    warn "No SSH public key found. Set SSH_PUBKEY=/path/to/key.pub"
-    warn "Install mode will use root password 'changeme' as fallback."
+            SSH_KEYS_CONTENT="$KEY_DATA"
+            SSH_KEY_SOURCE="$keyfile"
+        fi
+    fi
+done
+
+# Priority 3: generate a keypair
+if [[ -z "$SSH_KEYS_CONTENT" ]]; then
+    GENERATED_KEY="$BASTION_DIR/bastion_ed25519"
+    if [[ ! -f "$GENERATED_KEY" ]]; then
+        log "No SSH keys found — generating ed25519 keypair..."
+        ssh-keygen -t ed25519 -f "$GENERATED_KEY" -N "" -C "bastion-generated@$(hostname)" >/dev/null 2>&1
+    fi
+    SSH_KEYS_CONTENT="$(cat "${GENERATED_KEY}.pub")"
+    SSH_KEY_SOURCE="$GENERATED_KEY (generated)"
+    warn "Using generated keypair: ${BOLD}$GENERATED_KEY${NC}"
+    warn "Save this private key — it's the only way to access installed machines."
+fi
+
+SSH_KEY_COUNT="$(echo "$SSH_KEYS_CONTENT" | wc -l)"
+log "SSH keys: ${BOLD}${SSH_KEY_COUNT} key(s)${NC} from ${BOLD}${SSH_KEY_SOURCE}${NC}"
+
+# ──── Detect admin username ──────────────────────────────────────
+ADMIN_USER="${SUDO_USER:-$USER}"
+[[ "$ADMIN_USER" == "root" ]] && ADMIN_USER=""
+if [[ -n "$ADMIN_USER" ]]; then
+    log "Admin user: ${BOLD}${ADMIN_USER}${NC} (will be created on installed machines)"
 fi

 # ──── Prepare directories ────────────────────────────────────────
@@ -264,13 +364,8 @@ FEDORA_MIRROR="https://download.fedoraproject.org/pub/fedora/linux/releases/${FE
 log "Preparing boot artifacts (Fedora ${FEDORA_VERSION} ${ARCH})..."
 copy_if_missing "/usr/share/ipxe/undionly.kpxe"  "$TFTPDIR/undionly.kpxe"    "iPXE BIOS"

-# UEFI x86_64: two-stage PXE boot
-# Stage 1: tiny PXE loader stub (<20KB) fits in constrained TFTP buffers
-# Stage 2: full iPXE binary downloaded via UEFI PXE protocol (no size limit)
-PXELOADER_SRC="$(cd "$(dirname "$0")" && pwd)/pxeloader.c"
-[[ -f "$PXELOADER_SRC" ]] || PXELOADER_SRC="$(dirname "${BASH_SOURCE[0]}")/pxeloader.c"
-build_pxeloader "$PXELOADER_SRC" "$TFTPDIR/ipxe.efi" "PXE loader stub (stage 1)"
-copy_if_missing "/usr/share/ipxe/ipxe-snponly-x86_64.efi" "$TFTPDIR/ipxe-real.efi" "iPXE UEFI x86_64 (stage 2)"
+# UEFI x86_64: serve iPXE directly via TFTP (UEFI has no TFTP size limit)
+copy_if_missing "/usr/share/ipxe/ipxe-snponly-x86_64.efi" "$TFTPDIR/ipxe.efi"      "iPXE UEFI x86_64"

 copy_if_missing "/usr/share/ipxe/arm64-efi/snponly.efi"  "$TFTPDIR/ipxe-arm64.efi"  "iPXE UEFI arm64"
 download "${FEDORA_MIRROR}/images/pxeboot/vmlinuz"        "$HTTPDIR/vmlinuz"          "Fedora kernel"
@@ -375,25 +470,29 @@ except Exception as e:
 "
 fi

-# ── Power off — do NOT let Anaconda proceed ──
+# ── Reboot — do NOT let Anaconda proceed ──
 echo ""
-echo "=== Discovery complete, powering off ==="
+echo "=== Discovery complete, rebooting ==="
 echo ""
 sleep 3
 echo 1 > /proc/sys/kernel/sysrq
-echo o > /proc/sysrq-trigger
+echo b > /proc/sysrq-trigger
 sleep 5
-poweroff -f
+reboot -f

 %end

 # Anaconda should never get here, but just in case:
-poweroff
+reboot
 DISCOVER_KS

 # Patch in the bastion URL
 sed -i "s|__BASTION_URL__|http://${SERVER_IP}:${HTTP_PORT}|g" "$HTTPDIR/discover.ks"

+# Save SSH keys and admin user for the HTTP server to use
+echo "$SSH_KEYS_CONTENT" > "$BASTION_DIR/ssh_keys"
+echo "$ADMIN_USER" > "$BASTION_DIR/admin_user"
+
 # ──── Generate iPXE boot script ───────────────────────────────────
 # Initial iPXE script chains to /dispatch with the MAC, so the server
 # can route to discover or install mode per machine.
@@ -431,9 +530,17 @@ SERVER_IP   = sys.argv[3]
 HTTP_PORT   = int(sys.argv[4])
 FEDORA_VER  = sys.argv[5]
 FEDORA_MIRROR = sys.argv[6]
-SSH_KEY     = sys.argv[7] if len(sys.argv) > 7 else ""
+SSH_KEYS_FILE = sys.argv[7] if len(sys.argv) > 7 else ""
 TIMEZONE    = sys.argv[8] if len(sys.argv) > 8 else "Europe/London"
 LOCALE      = sys.argv[9] if len(sys.argv) > 9 else "en_GB.UTF-8"
+DOMAIN      = sys.argv[10] if len(sys.argv) > 10 else "ad.itaz.eu"
+ADMIN_USER  = sys.argv[11] if len(sys.argv) > 11 else ""
+
+# Load SSH keys from file
+SSH_KEYS = []
+if SSH_KEYS_FILE and os.path.isfile(SSH_KEYS_FILE):
+    with open(SSH_KEYS_FILE) as f:
+        SSH_KEYS = [l.strip() for l in f if l.strip() and not l.startswith('#')]

 # ── State management (file-backed, lock-protected) ───────────────

@@ -452,19 +559,66 @@ def save_state(state):

 # ── Kickstart generation ─────────────────────────────────────────

-def generate_kickstart(hostname, disk="", ssh_key=""):
-    disk_cmds = "clearpart --all --initlabel\nautopart --type=plain"
-    if disk:
-        disk_cmds = f"ignoredisk --only-use={disk}\nclearpart --all --initlabel --drives={disk}\nautopart --type=plain"
+def generate_kickstart(hostname, disk="", ssh_keys=None, domain="", role="worker", admin_user=""):
+    ssh_keys = ssh_keys or []
+    fqdn = f"{hostname}.{domain}" if domain else hostname
+    vg = "labvg"

-    if ssh_key:
-        auth = f'rootpw --lock\nsshkey --username=root "{ssh_key}"'
+    # ── Auth ──
+    if ssh_keys:
+        auth = f'rootpw --lock\nsshkey --username=root "{ssh_keys[0]}"'
    else:
        auth = 'rootpw --plaintext changeme'

-    return f"""# Lab Bastion — Fedora {FEDORA_VER} install
+    # ── Admin user (kickstart directive) ──
+    user_directive = ""
+    if admin_user:
+        user_directive = f'user --name={admin_user} --groups=wheel --lock'
+
+    # ── SSH keys for %post (root + admin user) ──
+    all_keys = "\n".join(ssh_keys)
+    ssh_post_block = ""
+    if ssh_keys:
+        ssh_post_block = f"""
+# Set up SSH keys for root
+mkdir -p /root/.ssh && chmod 700 /root/.ssh
+cat > /root/.ssh/authorized_keys << 'SSHKEYS'
+{all_keys}
+SSHKEYS
+chmod 600 /root/.ssh/authorized_keys"""
+
+    if admin_user and ssh_keys:
+        ssh_post_block += f"""
+
+# Set up SSH keys for {admin_user}
+ADMIN_HOME=$(getent passwd {admin_user} | cut -d: -f6)
+mkdir -p "$ADMIN_HOME/.ssh" && chmod 700 "$ADMIN_HOME/.ssh"
+cp /root/.ssh/authorized_keys "$ADMIN_HOME/.ssh/authorized_keys"
+chown -R {admin_user}:{admin_user} "$ADMIN_HOME/.ssh"
+chmod 600 "$ADMIN_HOME/.ssh/authorized_keys"
+
+# Fix SELinux contexts for SSH
+restorecon -R /root/.ssh "$ADMIN_HOME/.ssh" 2>/dev/null || true
+
+# Passwordless sudo for {admin_user}
+echo '{admin_user} ALL=(ALL) NOPASSWD: ALL' > /etc/sudoers.d/{admin_user}
+chmod 440 /etc/sudoers.d/{admin_user}"""
+
+    # ── Determine disk (auto-detect first NVMe/SDA if not specified) ──
+    disk_line = f'DISK="{disk}"' if disk else '''
+DISK=""
+for d in /dev/nvme0n1 /dev/sda /dev/vda; do
+    [ -b "$d" ] && { DISK="$(basename $d)"; break; }
+done
+[ -z "$DISK" ] && { echo "ERROR: no disk found"; exit 1; }
+'''
+
+    # ── LVM layout sizes (MB) ──
+    has_longhorn = (role == "worker")
+
+    return f"""# Lab Bastion -- Fedora {FEDORA_VER} server install
 # Generated: {datetime.now().isoformat()}
-# Target: {hostname}
+# Target: {fqdn} (role={role})

 text
 reboot
@@ -473,39 +627,266 @@ lang {LOCALE}
 keyboard uk
 timezone {TIMEZONE} --utc

-network --bootproto=dhcp --activate --hostname={hostname}
+network --bootproto=dhcp --activate --hostname={fqdn}

 {auth}
-
-{disk_cmds}
+{user_directive}

 bootloader --append="console=tty0 console=ttyS0,115200n8"

 url --mirrorlist=https://mirrors.fedoraproject.org/mirrorlist?repo=fedora-$releasever&arch=$basearch

+# Partitioning is generated dynamically by %pre (supports longhorn preservation)
+%include /tmp/part.ks
+
+%pre --log=/tmp/pre-partition.log
+#!/bin/bash
+set -x
+
+# Progress callback helper
+bastion_progress() {{
+    local stage="$1" detail="${{2:-}}"
+    local mac=$(ip link show | awk '/ether/ && !/00:00:00:00/ {{print $2; exit}}')
+    curl -sf -X POST "http://{SERVER_IP}:{HTTP_PORT}/api/progress" \
+        -H "Content-Type: application/json" \
+        -d "{{\\"mac\\":\\"$mac\\",\\"stage\\":\\"$stage\\",\\"detail\\":\\"$detail\\"}}" 2>/dev/null || true
+}}
+
+bastion_progress "partitioning" "preparing disk layout"
+
+VG="{vg}"
+{disk_line}
+
+REPROVISION=no
+
+# Check if VG exists (reprovision scenario)
+if vgs $VG &>/dev/null; then
+    echo "=== Existing VG found - reprovision mode ==="
+    REPROVISION=yes
+
+    # Detect which data LVs to preserve
+    PRESERVE_LONGHORN=no; PRESERVE_SRV=no; PRESERVE_HOME=no
+    lvs $VG/longhorn &>/dev/null && PRESERVE_LONGHORN=yes
+    lvs $VG/srv      &>/dev/null && PRESERVE_SRV=yes
+    lvs $VG/home     &>/dev/null && PRESERVE_HOME=yes
+
+    echo "Preserving: longhorn=$PRESERVE_LONGHORN srv=$PRESERVE_SRV home=$PRESERVE_HOME"
+
+    # Remove only OS logical volumes (keep data LVs)
+    for lv in root var varlog swap; do
+        lvremove -f $VG/$lv 2>/dev/null || true
+    done
+fi
+
+if [ "$REPROVISION" = "yes" ]; then
+    # Find existing boot partitions by type
+    EFI_PART=$(blkid -t TYPE=vfat -o device /dev/${{DISK}}* 2>/dev/null | head -1)
+    BOOT_PART=$(blkid -t TYPE=ext4 -o device /dev/${{DISK}}* 2>/dev/null | head -1)
+    EFI_PART=${{EFI_PART:-/dev/${{DISK}}1}}
+    BOOT_PART=${{BOOT_PART:-/dev/${{DISK}}2}}
+    echo "Reusing EFI=$EFI_PART BOOT=$BOOT_PART"
+
+    # Build partition config reusing existing PV/VG
+    cat > /tmp/part.ks << PARTEOF
+ignoredisk --only-use=$DISK
+clearpart --none
+part /boot/efi --onpart=$EFI_PART --fstype=efi
+part /boot --onpart=$BOOT_PART --fstype=ext4
+volgroup {vg} --useexisting --noformat
+logvol swap --vgname={vg} --name=swap --fstype=swap --size=27648
+logvol / --vgname={vg} --name=root --fstype=xfs --size=33792
+logvol /var --vgname={vg} --name=var --fstype=xfs --size=102400
+logvol /var/log --vgname={vg} --name=varlog --fstype=xfs --size=10240
+PARTEOF
+
+    # Preserve or recreate data LVs
+    if [ "$PRESERVE_HOME" = "yes" ]; then
+        echo "logvol /home --vgname={vg} --name=home --useexisting --noformat" >> /tmp/part.ks
+    else
+        echo "logvol /home --vgname={vg} --name=home --fstype=xfs --size=10240" >> /tmp/part.ks
+    fi
+
+    if [ "$PRESERVE_SRV" = "yes" ]; then
+        echo "logvol /srv --vgname={vg} --name=srv --useexisting --noformat" >> /tmp/part.ks
+    else
+        echo "logvol /srv --vgname={vg} --name=srv --fstype=xfs --size=20480" >> /tmp/part.ks
+    fi
+
+    if [ "$PRESERVE_LONGHORN" = "yes" ]; then
+        echo "logvol /var/lib/longhorn --vgname={vg} --name=longhorn --useexisting --noformat" >> /tmp/part.ks
+    fi
+
+else
+    # Fresh install
+    cat > /tmp/part.ks << PARTEOF
+ignoredisk --only-use=$DISK
+clearpart --all --initlabel --drives=$DISK
+part /boot/efi --fstype=efi --size=600 --ondisk=$DISK
+part /boot --fstype=ext4 --size=3072 --ondisk=$DISK
+part pv.01 --size=1 --grow --ondisk=$DISK
+volgroup {vg} pv.01
+logvol swap --vgname={vg} --name=swap --fstype=swap --size=27648
+logvol / --vgname={vg} --name=root --fstype=xfs --size=33792
+logvol /var --vgname={vg} --name=var --fstype=xfs --size=102400
+logvol /var/log --vgname={vg} --name=varlog --fstype=xfs --size=10240
+logvol /home --vgname={vg} --name=home --fstype=xfs --size=10240
+logvol /srv --vgname={vg} --name=srv --fstype=xfs --size=20480
+{"logvol /var/lib/longhorn --vgname=" + vg + " --name=longhorn --fstype=xfs --grow --size=1" if has_longhorn else ""}
+PARTEOF
+fi
+
+echo "=== Generated partition config ==="
+cat /tmp/part.ks
+echo "==================================="
+
+bastion_progress "partitioning" "layout ready, starting install"
+
+%end
+
 %packages
@core
-@server-product
 openssh-server
 vim-enhanced
 tmux
 git
 curl
+wget
 python3
 lshw
 dmidecode
 dnf-plugins-core
+
+# Networking and diagnostics
+NetworkManager
+bind-utils
+net-tools
+iproute
+iputils
+traceroute
+tcpdump
+htop
+iotop
+strace
+jq
+
+# k3s prerequisites
+container-selinux
+iptables-nft
+nftables
+policycoreutils-python-utils
+chrony
+tar
+socat
+conntrack-tools
+ethtool
+
+# Boot management
+efibootmgr
+
+# Puppet prerequisites
+ruby
+ruby-libs
+
+# Exclude desktop
+-@workstation-product
+-@gnome-desktop
+-gnome-shell
+-gdm
+-PackageKit
+-PackageKit-glib
 %end

 %post --log=/root/bastion-post-install.log
 #!/bin/bash
 set -x
+
+# Progress callback helper
+bastion_progress() {{
+    local stage="$1" detail="${{2:-}}"
+    local mac=$(ip link show | awk '/ether/ && !/00:00:00:00/ {{print $2; exit}}')
+    curl -sf -X POST "http://{SERVER_IP}:{HTTP_PORT}/api/progress" \
+        -H "Content-Type: application/json" \
+        -d "{{\\"mac\\":\\"$mac\\",\\"stage\\":\\"$stage\\",\\"detail\\":\\"$detail\\"}}" 2>/dev/null || true
+}}
+
+bastion_progress "post-install" "configuring system"
+
+# ── SSH ──
 systemctl enable --now sshd
 sed -i 's/^#\\?PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config
 sed -i 's/^#\\?PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
-hostnamectl set-hostname {hostname}
-echo "Provisioned by lab-bastion on $(date -Iseconds)" > /etc/lab-provisioned
-echo "# Lab node — puppet enrollment pending" > /root/README
+{ssh_post_block}
+
+# ── Hostname and domain ──
+hostnamectl set-hostname {fqdn}
+
+# ── tmpfs for /tmp ──
+echo "tmpfs /tmp tmpfs defaults,noatime,nosuid,nodev,size=4G 0 0" >> /etc/fstab
+
+# ── Kernel modules for k3s ──
+cat > /etc/modules-load.d/k3s.conf << 'MODULES'
+br_netfilter
+overlay
+ip_conntrack
+MODULES
+modprobe br_netfilter || true
+modprobe overlay || true
+
+# ── Sysctl for k3s networking ──
+cat > /etc/sysctl.d/90-k3s.conf << 'SYSCTL'
+net.bridge.bridge-nf-call-iptables  = 1
+net.bridge.bridge-nf-call-ip6tables = 1
+net.ipv4.ip_forward                 = 1
+net.ipv6.conf.all.forwarding        = 1
+fs.inotify.max_user_instances       = 524288
+fs.inotify.max_user_watches         = 1048576
+SYSCTL
+sysctl --system || true
+
+# ── Disable firewalld (k3s manages its own iptables rules) ──
+systemctl disable --now firewalld || true
+
+# ── Enable chronyd for time sync ──
+systemctl enable --now chronyd
+
+# ── Set boot order: local disk first, PXE after ──
+if command -v efibootmgr >/dev/null 2>&1; then
+    # Find the Fedora boot entry and move it first
+    FEDORA_ENTRY=$(efibootmgr | grep -i fedora | head -1 | grep -oP 'Boot\\K[0-9A-F]+')
+    if [ -n "$FEDORA_ENTRY" ]; then
+        CURRENT_ORDER=$(efibootmgr | grep BootOrder | cut -d: -f2 | tr -d ' ')
+        # Put Fedora first, keep rest
+        NEW_ORDER="$FEDORA_ENTRY,$(echo "$CURRENT_ORDER" | sed "s/$FEDORA_ENTRY,\\?//;s/,$//")"
+        efibootmgr -o "$NEW_ORDER" || true
+        echo "Boot order set: Fedora first ($NEW_ORDER)"
+    fi
+fi
+
+# ── Provisioning metadata ──
+cat > /etc/lab-provisioned << PROVEOF
+hostname: {fqdn}
+role: {role}
+provisioned: $(date -Iseconds)
+bastion: {SERVER_IP}
+PROVEOF
+
+cat > /root/README << 'README'
+# Lab Node -- {fqdn} (role: {role})
+#
+# Next steps:
+#   1. Install puppet agent:
+#      dnf install -y puppet-agent
+#
+#   2. Install k3s:
+#      curl -sfL https://get.k3s.io | sh -
+#
+#   3. Or join existing cluster:
+#      curl -sfL https://get.k3s.io | K3S_URL=https://<server>:6443 K3S_TOKEN=<token> sh -
+README
+
+IP_ADDR=$(ip -4 addr show | awk '/inet / && !/127.0.0/ {{split($2,a,"/"); print a[1]; exit}}')
+bastion_progress "complete" "ready at $IP_ADDR"
+
 %end
 """

@@ -562,6 +943,25 @@ def print_install_started(mac, hostname):
    print(f"  Serving Fedora {FEDORA_VER} installer + kickstart...")
    print(f"\n{'─' * 60}\n", flush=True)

+PROGRESS_ICONS = {
+    "partitioning": "◆",
+    "installing":   "◆◆",
+    "post-install": "◆◆◆",
+    "complete":     "✔",
+    "error":        "✘",
+}
+
+def print_progress(mac, stage, detail=""):
+    icon = PROGRESS_ICONS.get(stage, "·")
+    color = GREEN if stage == "complete" else (RED if stage == "error" else YELLOW)
+    detail_str = f" -- {detail}" if detail else ""
+    print(f"  {color}{icon}{RESET} {mac}  {BOLD}{stage}{RESET}{detail_str}", flush=True)
+    if stage == "complete" and detail:
+        ip = detail.replace("ready at ", "").strip()
+        if ip:
+            admin = ADMIN_USER or "root"
+            print(f"\n  {GREEN}{BOLD}  ssh {admin}@{ip}{RESET}\n", flush=True)
+
 # ── HTTP Handler ──────────────────────────────────────────────────

 class BastionHandler(SimpleHTTPRequestHandler):
@@ -603,7 +1003,7 @@ class BastionHandler(SimpleHTTPRequestHandler):

 echo
 echo =============================================
-echo   Lab PXE Bastion — INSTALLING Fedora {FEDORA_VER}
+echo   Lab PXE Bastion - INSTALLING Fedora {FEDORA_VER}
 echo   Target: {hostname}
 echo   MAC:    {mac}
 echo =============================================
@@ -614,13 +1014,31 @@ initrd http://{SERVER_IP}:{HTTP_PORT}/initrd.img
 boot
 """
                self.send_text(200, script)
+
+            elif mac in state.get("installed", {}):
+                info = state["installed"][mac]
+                hostname = info.get("hostname", "?")
+                print(f"  {GREEN}PXE request from {mac} ({hostname}) - already installed, booting local disk{RESET}", flush=True)
+                script = f"""#!ipxe
+
+echo
+echo =============================================
+echo   Lab PXE Bastion - {hostname}
+echo   Already installed, booting from local disk
+echo =============================================
+echo
+sleep 3
+exit
+"""
+                self.send_text(200, script)
+
            else:
                print(f"  {YELLOW}PXE request from {mac} → discovery mode{RESET}", flush=True)
                script = f"""#!ipxe

 echo
 echo =============================================
-echo   Lab PXE Bastion — DISCOVERY MODE
+echo   Lab PXE Bastion - DISCOVERY MODE
 echo   MAC: {mac}
 echo   Collecting hardware info...
 echo =============================================
@@ -642,7 +1060,10 @@ boot
            ks = generate_kickstart(
                hostname=cfg.get("hostname", "lab-node"),
                disk=cfg.get("disk", ""),
-                ssh_key=SSH_KEY,
+                ssh_keys=SSH_KEYS,
+                domain=DOMAIN,
+                role=cfg.get("role", "worker"),
+                admin_user=ADMIN_USER,
            )
            self.send_text(200, ks)
            return
@@ -710,15 +1131,21 @@ boot
            mac = data.get("mac", "").lower().replace("-", ":")
            hostname = data.get("hostname", "lab-node")
            disk = data.get("disk", "")
+            role = data.get("role", "worker")

            if not mac:
                self.send_json(400, {"error": "mac is required"})
                return

+            if role not in ("worker", "infra"):
+                self.send_json(400, {"error": "role must be 'worker' or 'infra'"})
+                return
+
            state = load_state()
            state.setdefault("install_queue", {})[mac] = {
                "hostname": hostname,
                "disk": disk,
+                "role": role,
                "queued_at": datetime.now().isoformat(),
            }
            save_state(state)
@@ -729,10 +1156,49 @@ boot
                "status": "queued",
                "mac": mac,
                "hostname": hostname,
-                "message": "PXE boot the machine to start installation",
+                "role": role,
+                "message": f"PXE boot the machine to start installation (role={role})",
            })
            return

+        # ── Install progress callback from kickstart ──
+        if parsed.path == "/api/progress":
+            try:
+                data = json.loads(body)
+            except json.JSONDecodeError:
+                self.send_json(400, {"error": "invalid JSON"})
+                return
+
+            mac = data.get("mac", "unknown").lower()
+            stage = data.get("stage", "unknown")
+            detail = data.get("detail", "")
+
+            print_progress(mac, stage, detail)
+
+            # Update state with progress
+            state = load_state()
+            if mac in state.get("install_queue", {}):
+                state["install_queue"][mac]["progress"] = stage
+                state["install_queue"][mac]["progress_at"] = datetime.now().isoformat()
+                if detail:
+                    state["install_queue"][mac]["progress_detail"] = detail
+
+                # Move to installed on completion
+                if stage == "complete":
+                    cfg = state["install_queue"].pop(mac)
+                    ip = detail.replace("ready at ", "").strip() if detail else ""
+                    state.setdefault("installed", {})[mac] = {
+                        "hostname": cfg.get("hostname", "?"),
+                        "role": cfg.get("role", "?"),
+                        "ip": ip,
+                        "installed_at": datetime.now().isoformat(),
+                    }
+
+                save_state(state)
+
+            self.send_json(200, {"status": "ok"})
+            return
+
        self.send_json(404, {"error": "not found"})


@@ -850,9 +1316,11 @@ python3 "$BASTION_DIR/server.py" \
    "$HTTP_PORT" \
    "$FEDORA_VERSION" \
    "$FEDORA_MIRROR" \
-    "$SSH_KEY_CONTENT" \
+    "$BASTION_DIR/ssh_keys" \
    "$TIMEZONE" \
-    "$LOCALE" &
+    "$LOCALE" \
+    "$DOMAIN" \
+    "$ADMIN_USER" &
 HTTP_PID=$!
 sleep 1

@@ -871,6 +1339,7 @@ echo -e "  Network:   ${BOLD}${NETWORK}/24${NC} via ${BOLD}${IFACE}${NC}"
 echo -e "  DHCP:      ${BOLD}${DHCP_MODE}${NC}$(if [[ "$DHCP_MODE" == "full" ]]; then echo " (${DHCP_RANGE_START}–${DHCP_RANGE_END})"; else echo " (alongside existing DHCP)"; fi)"
 echo -e "  HTTP:      ${BOLD}http://${SERVER_IP}:${HTTP_PORT}/${NC}"
 echo -e "  OS:        ${BOLD}Fedora ${FEDORA_VERSION} (${ARCH})${NC}"
+echo -e "  Domain:    ${BOLD}${DOMAIN}${NC}"
 echo -e "  State:     ${BOLD}${STATEFILE}${NC}"
 echo ""
 echo -e "  ${YELLOW}PXE boot any machine on this network.${NC}"
--- a/bastion/.dockerignore
+++ b/bastion/.dockerignore
@@ -0,0 +1,8 @@
+node_modules
+dist
+.git
+*.log
+.env
+.env.*
+*.tsbuildinfo
+.taskmaster
--- a/bastion/.gitignore
+++ b/bastion/.gitignore
@@ -0,0 +1,3 @@
+node_modules/
+dist/
+*.tsbuildinfo
--- a/bastion/.taskmaster/docs/pulumi-k3s-refactor.md
+++ b/bastion/.taskmaster/docs/pulumi-k3s-refactor.md
@@ -0,0 +1,132 @@
+# PRD: Refactor K3s Module from Bash Heredocs to Pulumi TypeScript
+
+## Problem
+
+The k3s install/configure/health module currently generates ~300 lines of bash heredoc strings embedded in TypeScript files (`install.ts`, `configure.ts`, `health.ts`). These are unmaintainable, untestable, and impossible to compose. This is the same bash-in-code problem that drove the bastion TypeScript rewrite.
+
+## Vision
+
+The lab platform uses Pulumi as its IaC engine:
+- **Central execution**: labd runs Pulumi programs in labcontroller k8s for cloud/remote resources with RBAC, global state, and audit trail (PulumiRun table already exists in CockroachDB)
+- **Local execution**: lab-agents run Pulumi programs directly on bare-metal nodes
+- **Multi-environment**: supports multiple datacenters, clouds (baremetal, AWS, GCP), production/dev/ephemeral environments
+
+## Current State
+
+### Files to replace
+- `src/modules/modules/k3s/src/install.ts` — 275 lines, generates bash for 10 install phases
+- `src/modules/modules/k3s/src/configure.ts` — 118 lines, generates bash for 5 configure phases
+- `src/modules/modules/k3s/src/health.ts` — 57 lines, generates bash for 6 health checks
+
+### Existing infrastructure
+- `sshExec(ip, user, command, opts)` and `sshExecStreaming()` — SSH execution primitives in `src/modules/src/ssh.ts`
+- Module system: `ModuleRunner`, `ModuleRegistry`, `Module` interface with install/configure/health phases
+- `@lab/shared` types: `BastionConfig`, `K3sInstallContext`, roles, OS types
+- PulumiRun model in Prisma schema (labd) — tracks Pulumi execution state
+- labcontroller module generates k8s manifests (cockroachdb.ts, labd.ts, bastion.ts) — these also need Pulumi migration eventually
+
+### 32 distinct operations currently in bash
+**Install phase (10 steps):**
+1. Load kernel modules (br_netfilter, overlay, ip_conntrack)
+2. Apply CIS sysctl hardening (9 params)
+3. Disable swap
+4. Disable firewall (firewalld/ufw — mask to survive reboot)
+5. Set SELinux permissive
+6. Write k3s server config (flannel=none, secrets-encryption, audit, CIS hardened)
+7. Write audit policy YAML
+8. Clean up stale CNI (flannel.1 vxlan, cilium interfaces, port 8472 conflicts)
+9. Install k3s binary (curl | sh)
+10. Install Cilium CNI (detect arch, detect interface, kubeProxyReplacement)
+
+**Configure phase (5 steps):**
+1. Fix CoreDNS upstream DNS (systemd-resolved 127.0.0.53 unreachable from pod netns)
+2. Configure log rotation
+3. Check certificate expiry
+4. Apply default network policies (deny-ingress, allow-dns-egress)
+5. Apply Pod Security Standards (restricted)
+
+**Health checks (6 checks):**
+1. k3s service active
+2. Node Ready condition
+3. API server /healthz
+4. Secrets encryption enabled
+5. Cilium status
+6. kube-system pod status
+
+## Requirements
+
+### Architecture decisions needed (discuss with user via task-master)
+1. **Pulumi structure**: micro-stacks vs monorepo-by-env vs component-library vs GitOps operator
+2. **Multi-cloud support**: how stacks are organized across baremetal/AWS/GCP
+3. **Environment model**: how prod/dev/ephemeral environments are represented
+4. **State backend**: Pulumi Cloud vs self-hosted (S3/CockroachDB)
+5. **Execution model**: who runs `pulumi up` — labd central, lab-agent local, or both?
+
+### Operation design
+- Each operation is a typed TypeScript async function using `sshExec()`
+- Standard interface: `OperationContext` in, `OperationResult` out
+- **Idempotent**: check before act, report `changed: boolean`
+- **Composable**: operations grouped into logical units (host-prep, networking, hardening)
+- **Testable**: mock sshExec for unit tests
+- **Future Pulumi-ready**: each function maps 1:1 to a `remote.Command` resource
+
+### Groups (logical composition)
+- `host-prep`: kernel-modules + sysctl + swap + firewall + selinux
+- `k3s-server`: k3s-config + audit-policy + cni-cleanup + k3s-install
+- `k3s-agent`: k3s-config (agent) + k3s-install (agent mode)
+- `networking`: cilium + dns-fix + network-policy
+- `hardening`: pod-security + cert-check + log-rotation
+
+### Pulumi integration (when added)
+- Add `@pulumi/pulumi` and `@pulumi/command` as dependencies
+- Each operation becomes a `command.remote.Command` resource
+- Groups become `pulumi.ComponentResource` classes
+- K3sCluster becomes a top-level ComponentResource that composes groups
+- Stacks per environment: `lab-baremetal`, `aws-prod`, `dev`, `ephemeral-pr-123`
+
+## File structure
+
+```
+src/modules/modules/k3s/src/
+├── types.ts              # K3sConfig, OperationContext, OperationResult
+├── utils.ts              # sshOpts(), runSequential(), file helpers
+├── operations/           # ~15 atomic operations
+│   ├── kernel-modules.ts
+│   ├── sysctl.ts
+│   ├── swap.ts
+│   ├── firewall.ts
+│   ├── selinux.ts
+│   ├── k3s-config.ts
+│   ├── audit-policy.ts
+│   ├── cni-cleanup.ts
+│   ├── k3s-install.ts
+│   ├── cilium.ts
+│   ├── dns-fix.ts
+│   ├── log-rotation.ts
+│   ├── network-policy.ts
+│   ├── pod-security.ts
+│   └── cert-check.ts
+├── groups/               # Logical groupings
+│   ├── host-prep.ts
+│   ├── k3s-server.ts
+│   ├── k3s-agent.ts
+│   ├── networking.ts
+│   └── hardening.ts
+├── health/               # Health checks
+│   ├── k3s-service.ts
+│   ├── node-ready.ts
+│   ├── api-health.ts
+│   ├── secrets-encryption.ts
+│   ├── cilium-status.ts
+│   └── pod-status.ts
+├── k3s-module.ts         # Module implementation
+└── index.ts              # Public exports
+```
+
+## Success criteria
+- Zero bash heredoc strings in the k3s module
+- Every operation independently testable with mocked sshExec
+- `labctl app k3s install <target>` works end-to-end
+- `labctl app k3s health` works end-to-end
+- Existing test suite passes (updated for new API)
+- Clear path to wrapping operations as Pulumi resources
--- a/bastion/.taskmaster/docs/resource-tracking.md
+++ b/bastion/.taskmaster/docs/resource-tracking.md
@@ -0,0 +1,172 @@
+# PRD: Resource Tracking & kubectl-style CLI
+
+## Problem
+
+The lab platform currently has fragmented state management:
+- Bastion keeps machine state in an ephemeral JSON file (`/tmp/lab-bastion/state.json`) that is lost on pod restart
+- labd receives state syncs from bastions but only stores them in memory — the `Server` table in CockroachDB is never written to
+- There is no system to track relationships between resources (servers belong to clusters, clusters run on servers, networks connect servers)
+- The CLI (`labctl`) uses an inconsistent verb-noun structure (`labctl provision list`, `labctl app k3s install`) instead of a uniform resource-oriented pattern
+- RBAC permissions reference resources (server, cloud, environment) but there is no resource registry to validate against
+
+## Vision
+
+A unified resource tracking system where all infrastructure objects (servers, clusters, networks, bastions, VMs) are persisted in CockroachDB via labd, with relationships between them, and managed through a kubectl-style CLI. This replaces the ephemeral JSON state and becomes the single source of truth for the platform.
+
+## Current State
+
+### Database (CockroachDB via Prisma)
+Existing models that are scaffolded but mostly unused:
+- `Server` — hostname, mac, cloud, environment, role, labels, ip, status (0 rows)
+- `Agent` — mTLS certificate enrollment per server (0 rows)
+- `Bastion` — PXE server registration (1 row, labmaster)
+- `Cluster` — k8s cluster metadata (0 rows)
+- `User`, `Role`, `Permission`, `UserRole` — RBAC framework (seeded with 3 roles, 6 permissions)
+- `JoinToken` — agent/bastion enrollment tokens
+- `AuditLog` — action audit trail
+
+### Bastion State (ephemeral JSON)
+Three categories tracked per-bastion:
+- `discovered` — machines found via PXE with hardware info (CPU, RAM, disks, NICs, arch)
+- `install_queue` — machines queued for OS install with progress tracking
+- `installed` — machines with OS installed (hostname, role, IP, OS)
+
+### CLI Structure (current)
+```
+labctl init bastion standalone [start|stop|status]
+labctl provision [list|install|reprovision|forget|logs]
+labctl app [k3s|labcontroller]
+labctl config [list|get|set]
+labctl roles
+labctl doctor
+labctl login
+labctl logs
+```
+
+## Requirements
+
+### 1. Persist Bastion State to Database
+
+When labd receives `bastion-state-sync` messages, it must upsert machines into the `Server` table:
+- Discovered machines → create/update Server with status "discovered", store HardwareInfo as JSON labels
+- Queued machines → update Server status to "provisioning"
+- Installed machines → update Server with hostname, IP, role, OS, status "installed"
+- Track which bastion owns which server (add `bastionId` to Server model)
+- Track hardware info: arch, cpu_model, cpu_cores, memory_gb, disks, nics
+
+The bastion's local JSON state becomes a cache; labd's database is the source of truth. On bastion startup, it should load its state from labd if available.
+
+### 2. Resource Model Expansion
+
+Add new models to the Prisma schema for tracking infrastructure:
+
+**Network** — L2/L3 network segments
+- name, cidr, vlan, gateway, domain, dhcpEnabled
+- Servers have NICs on networks
+
+**ServerNic** — NIC-to-network mapping
+- serverId, networkId, mac, ip, name, state (UP/DOWN)
+- Derived from HardwareInfo during discovery
+
+**ServerDisk** — Disk inventory per server
+- serverId, name, sizeGb, model
+- Derived from HardwareInfo during discovery
+
+**ClusterMember** — Server-to-cluster membership
+- clusterId, serverId, role (control-plane, worker)
+
+### 3. kubectl-style CLI Redesign
+
+Restructure labctl to follow the `mcpctl` / `kubectl` pattern:
+
+```
+# Core CRUD verbs that work on any resource
+labctl get <resource> [name]          # List or get specific resource
+labctl describe <resource> <name>     # Detailed view with relationships
+labctl create <resource> [flags]      # Create a resource
+labctl delete <resource> <name>       # Delete a resource
+labctl edit <resource> <name>         # Edit in $EDITOR
+labctl apply -f <file>                # Declarative apply from YAML
+
+# Resource types (with aliases)
+servers (server, srv)
+clusters (cluster)
+networks (network, net)
+bastions (bastion)
+roles (role)
+users (user)
+tokens (token)
+audit (audit)
+
+# Output formats
+-o table (default), -o json, -o yaml, -o wide
+
+# Examples
+labctl get servers                     # List all servers
+labctl get servers -o wide             # With extra columns (disks, NICs)
+labctl get server labmaster            # Get specific server
+labctl describe server labmaster       # Full details + relationships
+labctl get servers --role worker       # Filter by role
+labctl get servers --status discovered # Filter by status
+labctl get clusters                    # List clusters
+labctl describe cluster lab-k3s        # Cluster members, health
+labctl get networks                    # List networks
+labctl create network --name lab --cidr 192.168.8.0/24 --gateway 192.168.8.1
+
+# Provisioning becomes actions on server resources
+labctl provision <server> --os fedora-43 --role worker   # Queue install
+labctl reprovision <server>                              # Reinstall
+labctl forget <server>                                   # Remove from tracking
+
+# App management stays as-is but simplified
+labctl app install k3s <server>
+labctl app health k3s [server]
+
+# Admin
+labctl bastion start [--foreground]    # Start local bastion
+labctl bastion status                  # Bastion health
+labctl login                           # Auth
+labctl doctor                          # Diagnostics
+```
+
+### 4. Resource Aliases & Resolution
+
+Follow mcpctl's pattern from `shared.ts`:
+- Accept singular, plural, and short aliases: `server`, `servers`, `srv` all resolve to the same resource
+- Accept name or ID: `labctl get server labmaster` or `labctl get server <uuid>`
+- Accept MAC address for servers: `labctl get server 38:05:25:33:e2:e4`
+
+### 5. RBAC Integration
+
+The existing Permission model uses `action:cloud:environment:server` patterns. Wire this into the resource system:
+- CLI commands check permissions before executing
+- `labctl get` respects read permissions (only show resources the user can see)
+- `labctl provision` requires `apply` permission on the target server
+- `labctl delete` requires `destroy` permission
+- Audit all resource operations to the AuditLog table
+
+### 6. Bastion State Directory Fix
+
+Fix the bug where the CLI's `--dir` default (`/tmp/lab-bastion`) overrides the `BASTION_DIR=/data` environment variable. The CLI option should use the env var as its default:
+```typescript
+.option("--dir <dir>", "Bastion data directory", process.env["BASTION_DIR"] ?? "/tmp/lab-bastion")
+```
+
+## Technical Constraints
+
+- Database: CockroachDB with Prisma ORM (already deployed)
+- API: Fastify + WebSocket (labd)
+- CLI: Commander.js (labctl)
+- Auth: mTLS certificates (planned), join tokens (implemented)
+- Monorepo: pnpm workspace with @lab/shared, @lab/bastion, @lab/cli, @lab/labd
+- The bastion-to-labd WebSocket protocol is defined in @lab/shared/protocol
+
+## Success Criteria
+
+1. `labctl get servers` shows all machines (discovered, provisioning, installed) from the database
+2. Server state survives bastion and labd pod restarts
+3. `labctl describe server <name>` shows hardware info, network, cluster membership
+4. Resources have tracked relationships (server→cluster, server→network, bastion→server)
+5. RBAC permissions are enforced on CLI operations
+6. All resource mutations are audit-logged
+7. CLI follows consistent kubectl-style `verb resource [name] [flags]` pattern
--- a/bastion/DESIGN-LAB-PLATFORM.md
+++ b/bastion/DESIGN-LAB-PLATFORM.md
@@ -0,0 +1,355 @@
+# Lab Platform — Design Document
+
+## Vision
+
+A unified infrastructure management platform that replaces Puppet with a modern, Pulumi-based system. Manages bare-metal servers, cloud VMs, and k3s clusters through a single CLI and API.
+
+## Architecture Overview
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│  Developer Workstation (thebeast)                               │
+│                                                                 │
+│  lab CLI                                                        │
+│  ├── lab init bastion standalone start     (PXE provisioning)   │
+│  ├── lab provision install/reprovision     (bare-metal)         │
+│  ├── lab get servers --env production      (query)              │
+│  ├── lab exec <server> -- <command>        (remote execution)   │
+│  ├── lab logs <server>                     (log streaming)      │
+│  ├── lab apply -f infra.ts                 (pulumi via labd)    │
+│  └── lab get roles/users/permissions       (RBAC management)    │
+│                                                                 │
+│  Connects to: labd via mTLS                                    │
+└─────────────────────┬───────────────────────────────────────────┘
+                      │ mTLS (client cert)
+                      ▼
+┌─────────────────────────────────────────────────────────────────┐
+│  labmaster.ad.itaz.eu (infra node, k3s single-node)            │
+│                                                                 │
+│  ┌──────────────────────────────────────────────────────┐      │
+│  │  labd (master daemon)                                 │      │
+│  │  ├── Certificate Authority (issues agent certs)       │      │
+│  │  ├── RBAC Engine (roles, permissions, ACLs)           │      │
+│  │  ├── Agent Registry (connected agents, heartbeats)    │      │
+│  │  ├── Pulumi Executor (runs IaC on behalf of users)    │      │
+│  │  ├── Log Aggregator (receives agent logs)             │      │
+│  │  ├── Module Registry (configuration modules)          │      │
+│  │  └── REST API + WebSocket (agent connections)         │      │
+│  └──────────────────────────────────────────────────────┘      │
+│                                                                 │
+│  ┌──────────────────────────────────────────────────────┐      │
+│  │  bastion (PXE provisioning)                           │      │
+│  │  Running as k3s pod with hostNetwork                  │      │
+│  └──────────────────────────────────────────────────────┘      │
+└──────────┬──────────────────────────────────────────────────────┘
+           │ mTLS (agent certs)
+           ▼
+┌──────────────────────┐  ┌──────────────────────┐  ┌────────────┐
+│  ser9.ad.itaz.eu     │  │  worker-2.ad.itaz.eu │  │  AWS EC2   │
+│  (bare-metal worker) │  │  (bare-metal worker) │  │  instances │
+│                      │  │                      │  │            │
+│  lab-agent           │  │  lab-agent           │  │  lab-agent │
+│  ├── heartbeat       │  │  ├── heartbeat       │  │  ├── ...   │
+│  ├── log shipping    │  │  ├── log shipping    │  │  └── ...   │
+│  ├── exec handler    │  │  ├── exec handler    │  │            │
+│  └── module runner   │  │  └── module runner   │  │            │
+└──────────────────────┘  └──────────────────────┘  └────────────┘
+```
+
+## Components
+
+### 1. labd (Master Daemon)
+
+The central control plane. Runs on labmaster.ad.itaz.eu as a k3s pod.
+
+**Responsibilities:**
+- Certificate Authority — signs agent certificates, manages trust chain
+- Agent Registry — tracks connected agents, heartbeats, status
+- RBAC — roles, permissions, ACLs per user/group/environment/cloud
+- Pulumi Executor — runs Pulumi TypeScript code submitted by users
+- Log Aggregator — receives and stores logs from agents
+- Module Registry — stores and distributes configuration modules
+- REST API — for CLI and external integrations
+- WebSocket — persistent agent connections for real-time commands
+
+**Tech:** Fastify, PostgreSQL (via Prisma, reuse mcpctl patterns), WebSocket
+
+### 2. lab-agent
+
+Lightweight daemon running on every managed machine.
+
+**Responsibilities:**
+- Connect to labd via mTLS (agent certificate)
+- Send heartbeats (status, load, disk, memory)
+- Ship logs (journald → labd)
+- Execute commands on demand (like `kubectl exec`)
+- Run configuration modules (like `puppet agent -tv`)
+- Report module run results
+
+**Tech:** Standalone TypeScript binary (bun compiled), systemd service
+
+### 3. lab CLI (extended)
+
+Extends the existing `lab` CLI with platform management commands.
+
+**New commands:**
+```
+# Server management
+lab get servers                           # List all servers
+lab get servers --env production          # Filter by environment
+lab get servers --cloud baremetal         # Filter by cloud
+lab get servers --label role=k3s-worker   # Filter by label
+lab describe server <name>               # Detailed server info
+lab exec <server> -- <command>           # Remote command execution
+lab logs <server> [-f]                   # Stream server logs
+
+# Infrastructure as Code
+lab apply -f <file.ts>                   # Execute Pulumi code via labd
+lab plan -f <file.ts>                    # Dry-run Pulumi code
+lab destroy -f <file.ts>                 # Tear down resources
+
+# RBAC
+lab get roles                            # List roles
+lab get users                            # List users
+lab create role <name>                   # Create role
+lab bind role <role> --user <user>       # Bind role to user
+lab get permissions                      # List permissions
+
+# Environment/Cloud management
+lab get environments                     # List environments
+lab get clouds                           # List clouds
+lab create environment <name> --cloud <cloud>
+
+# Module management
+lab get modules                          # List available modules
+lab apply module <name> --target <server>  # Apply module to server
+```
+
+### 4. Certificate Authority
+
+Built into labd. Issues and manages certificates for agents and users.
+
+**Flow:**
+```
+1. Agent starts with a join token (one-time or reusable)
+2. Agent generates CSR, sends to labd with token
+3. labd validates token, signs certificate
+4. Agent receives signed cert + CA cert
+5. All future communication uses mTLS
+
+For CLI users:
+1. User runs `lab login` or `lab init`
+2. labd issues a client certificate (or uses existing SSH keys)
+3. CLI uses client cert for all API calls
+```
+
+**Token types:**
+- **One-time token** — for individual bare-metal servers (generated during PXE provision)
+- **Reusable token** — for autoscaling groups (AWS ASG instances use the same token)
+
+### 5. RBAC Model
+
+Reuse mcpctl's RBAC patterns. Hierarchical permissions:
+
+```
+Cloud → Environment → Server → Action
+
+Examples:
+- baremetal:lab:*:exec           — can exec on any lab server
+- baremetal:lab:puppet:*         — full access to puppet server
+- aws:production:*:read         — read-only on all AWS prod servers
+- *:*:*:*                       — superadmin
+```
+
+**Resources:**
+- servers, environments, clouds, modules, roles, users, pulumi-stacks
+
+**Actions:**
+- read, exec, apply, destroy, manage, admin
+
+**Whitelist/Blacklist:**
+- Roles can have `allow` and `deny` rules
+- Deny takes precedence (like AWS IAM)
+
+### 6. Module System
+
+Configuration modules define the desired state of a server.
+
+**Module structure:**
+```
+modules/
+  k3s-server/
+    module.yaml          # Metadata: name, version, targets, deps
+    src/
+      index.ts           # Module entry point
+      install.ts         # Installation logic
+      configure.ts       # Configuration logic
+      health.ts          # Health check
+    tests/
+      install.test.ts
+  k3s-agent/
+    module.yaml
+    src/
+      index.ts
+  labd/
+    module.yaml
+    src/
+      index.ts           # Deploy labd to k3s
+```
+
+**module.yaml:**
+```yaml
+name: k3s-server
+version: 0.1.0
+description: Install and configure k3s server
+targets:
+  roles: [infra]
+  labels:
+    k3s: server
+dependencies:
+  - base-server
+```
+
+**Module sources:**
+- Built-in modules (in this repo, e.g., k3s-server, labd)
+- External modules (separate git repos, pulled by URL)
+- Module registry (future — like Puppet Forge)
+
+### 7. Cloud/Environment Model
+
+```
+Cloud: baremetal
+  └── Environment: lab
+       ├── Server: puppet.ad.itaz.eu (role=infra, labels={k3s=server})
+       ├── Server: ser9.ad.itaz.eu (role=worker, labels={k3s=agent})
+       └── ...
+
+Cloud: aws
+  └── Environment: production
+       ├── Server: i-abc123 (from ASG web-servers)
+       ├── Server: i-def456 (from ASG web-servers)
+       └── ...
+  └── Environment: staging
+       └── ...
+```
+
+Each bastion creates an environment under the `baremetal` cloud. AWS autoscaling groups create environments under the `aws` cloud.
+
+### 8. Pulumi Integration
+
+Users submit Pulumi TypeScript code to labd for execution.
+
+```bash
+# Apply infrastructure code
+lab apply -f infra/k3s-cluster.ts --env lab
+
+# The file is sent to labd, which:
+# 1. Checks RBAC (does user have apply permission for this env?)
+# 2. Creates a Pulumi stack
+# 3. Executes `pulumi up` in a sandboxed environment
+# 4. Streams output back to CLI
+# 5. Stores state in Pulumi backend (local or S3)
+```
+
+**Future AWS extension:**
+```typescript
+// infra/aws-web-servers.ts
+import * as aws from "@pulumi/aws";
+
+const asg = new aws.autoscaling.Group("web-servers", {
+  maxSize: 10,
+  minSize: 2,
+  launchTemplate: { /* ... */ },
+  // User data installs lab-agent with reusable join token
+});
+```
+
+## Project Structure
+
+```
+lab/
+  bastion/                    # Existing — PXE provisioning
+
+  src/
+    shared/                   # @lab/shared — types, constants, RBAC
+    labd/                     # @lab/labd — master daemon
+      src/
+        main.ts
+        server.ts
+        ca/                   # Certificate Authority
+        rbac/                 # RBAC engine (reuse mcpctl patterns)
+        agents/               # Agent registry + WebSocket
+        pulumi/               # Pulumi executor
+        logs/                 # Log aggregation
+        modules/              # Module registry
+        routes/               # REST API
+    agent/                    # @lab/agent — agent daemon
+      src/
+        main.ts
+        connection.ts         # mTLS WebSocket to labd
+        heartbeat.ts
+        executor.ts           # Command execution
+        logs.ts               # Log shipping
+        modules.ts            # Module runner
+    cli/                      # @lab/cli — extends existing CLI
+      src/
+        commands/
+          init/bastion/       # Existing bastion commands
+          provision/          # Existing provision commands
+          get/                # New: get servers/roles/users/etc
+          exec/               # New: remote execution
+          logs/               # New: log streaming
+          apply/              # New: pulumi apply
+          rbac/               # New: role management
+
+  modules/                    # Built-in modules
+    k3s-server/               # Deploy k3s server
+    k3s-agent/                # Deploy k3s agent
+    labd/                     # Deploy labd to k3s
+    lab-agent/                # Deploy lab-agent to servers
+
+  deploy/
+    k3s/                      # Existing k3s manifests for bastion
+    labd/                     # k3s manifests for labd
+```
+
+## Implementation Phases
+
+### Phase 1: Foundation (current + next)
+- [x] Bastion (PXE provisioning) — DONE
+- [x] CLI structure (`lab init/provision`) — DONE
+- [ ] Rename puppet to labmaster, reprovision
+- [ ] Deploy k3s on labmaster
+- [ ] Build labd skeleton (Fastify + Prisma)
+- [ ] Certificate Authority (issue/sign certs)
+- [ ] Agent skeleton (connect, heartbeat)
+
+### Phase 2: Core Platform
+- [ ] RBAC engine (roles, permissions, ACLs)
+- [ ] `lab get servers` with environment/cloud/label filters
+- [ ] `lab exec` remote command execution
+- [ ] `lab logs` streaming
+- [ ] Agent auto-enrollment via PXE provision (join token in kickstart)
+
+### Phase 3: Infrastructure as Code
+- [ ] Module system (define, apply, health check)
+- [ ] k3s-server module (deploy k3s)
+- [ ] labd module (deploy labd to k3s)
+- [ ] Pulumi executor in labd
+- [ ] `lab apply -f` command
+
+### Phase 4: Multi-Cloud
+- [ ] AWS provider (Pulumi-based)
+- [ ] Reusable join tokens for autoscaling groups
+- [ ] Cloud/environment model
+- [ ] Auto-discovery of cloud instances
+
+## Key Design Decisions
+
+1. **Pulumi over Puppet** — TypeScript-native, same language for IaC and platform code
+2. **mTLS over SSH** — proper PKI, scalable, no key management per-server
+3. **Agents connect to master** (not master pushing to agents) — works through NATs, firewalls
+4. **RBAC from day one** — security-first, deny by default
+5. **Module system inspired by Puppet** — declarative, testable, versionable
+6. **Multi-cloud extensible** — cloud is just a label, provider is pluggable
+7. **Reuse mcpctl patterns** — Prisma DB, Fastify routes, CLI structure, RBAC model
--- a/bastion/Dockerfile.bastion
+++ b/bastion/Dockerfile.bastion
@@ -0,0 +1,93 @@
+# Dockerfile.bastion -- PXE boot server (dnsmasq DHCP/TFTP + HTTP)
+# Requires host networking and NET_ADMIN/NET_RAW capabilities.
+
+# ── Stage 1: Build ───────────────────────────────────────────────
+FROM node:22-alpine AS builder
+
+RUN corepack enable && corepack prepare pnpm@9.15.0 --activate
+
+WORKDIR /app
+
+# Copy workspace config and package manifests first (layer cache)
+COPY pnpm-workspace.yaml pnpm-lock.yaml package.json tsconfig.base.json tsconfig.json ./
+COPY src/shared/package.json   src/shared/tsconfig.json   src/shared/
+COPY src/bastion/package.json  src/bastion/tsconfig.json  src/bastion/
+COPY src/cli/package.json      src/cli/tsconfig.json      src/cli/
+COPY src/modules/package.json  src/modules/tsconfig.json  src/modules/
+
+# Install all dependencies (dev included -- needed for build)
+RUN pnpm install --frozen-lockfile
+
+# Copy source code
+COPY src/shared/src/  src/shared/src/
+COPY src/bastion/src/ src/bastion/src/
+COPY src/cli/src/     src/cli/src/
+COPY src/modules/src/ src/modules/src/
+COPY src/modules/modules/ src/modules/modules/
+
+# Build TypeScript
+RUN pnpm build
+
+# ── Stage 1b: Build iPXE snp.efi (uses UEFI SNP protocol for ISO boot) ──
+FROM fedora:43 AS ipxe-builder
+
+RUN dnf install -y git gcc make perl-interpreter xz-devel gcc-aarch64-linux-gnu && dnf clean all
+RUN git clone --depth=1 https://github.com/ipxe/ipxe.git /tmp/ipxe
+RUN cd /tmp/ipxe/src && make bin-x86_64-efi/snp.efi && \
+    make CROSS_COMPILE=aarch64-linux-gnu- bin-arm64-efi/snp.efi
+
+# ── Stage 2: Production runtime (Fedora -- needs dnsmasq) ───────
+FROM fedora:43
+
+RUN dnf install -y \
+    dnsmasq \
+    ipxe-bootimgs-x86 \
+    ipxe-bootimgs-aarch64 \
+    iproute \
+    curl \
+    openssh-clients \
+    nodejs \
+    npm \
+    xorriso \
+    mtools \
+    && dnf clean all
+
+# iPXE snp.efi built from source (Fedora only ships snponly, which can't
+# boot from CD-ROM/USB -- it requires PXE chainloading)
+COPY --from=ipxe-builder /tmp/ipxe/src/bin-x86_64-efi/snp.efi /usr/share/ipxe/ipxe-snp-x86_64.efi
+COPY --from=ipxe-builder /tmp/ipxe/src/bin-arm64-efi/snp.efi /usr/share/ipxe/arm64-efi/ipxe-snp.efi
+
+# Install pnpm
+RUN npm install -g pnpm@9
+
+WORKDIR /app
+
+# Copy workspace config and package manifests
+COPY pnpm-workspace.yaml pnpm-lock.yaml package.json ./
+COPY src/shared/package.json  src/shared/
+COPY src/bastion/package.json src/bastion/
+COPY src/cli/package.json     src/cli/
+COPY src/modules/package.json src/modules/
+
+# Install production dependencies
+RUN pnpm install --frozen-lockfile --prod 2>/dev/null || pnpm install --prod
+
+# Copy built output from builder
+COPY --from=builder /app/src/shared/dist/  src/shared/dist/
+COPY --from=builder /app/src/bastion/dist/ src/bastion/dist/
+COPY --from=builder /app/src/cli/dist/     src/cli/dist/
+COPY --from=builder /app/src/modules/dist/ src/modules/dist/
+
+# Create data directories
+RUN mkdir -p /data/state /data/tftp /data/http
+
+ENV NODE_ENV=production
+ENV BASTION_DIR=/data
+ENV HTTP_PORT=8080
+
+EXPOSE 8080/tcp
+EXPOSE 67/udp
+EXPOSE 69/udp
+EXPOSE 4011/udp
+
+ENTRYPOINT ["node", "src/cli/dist/index.js", "init", "bastion", "standalone", "start", "--foreground"]
--- a/bastion/Dockerfile.labd
+++ b/bastion/Dockerfile.labd
@@ -0,0 +1,73 @@
+# Dockerfile.labd -- multi-stage build for the labd master daemon
+# Runs the Fastify API server with Prisma/CockroachDB backend.
+
+# ── Stage 1: Build ───────────────────────────────────────────────
+FROM node:22-alpine AS builder
+
+RUN corepack enable && corepack prepare pnpm@9.15.0 --activate
+
+WORKDIR /app
+
+# Copy workspace config and package manifests first (layer cache)
+COPY pnpm-workspace.yaml pnpm-lock.yaml package.json tsconfig.base.json tsconfig.json ./
+COPY src/shared/package.json src/shared/tsconfig.json src/shared/
+COPY src/labd/package.json   src/labd/tsconfig.json   src/labd/
+
+# Install all dependencies (dev included -- needed for build)
+RUN pnpm install --frozen-lockfile
+
+# Copy Prisma schema and generate client
+COPY src/labd/prisma/ src/labd/prisma/
+RUN pnpm --filter @lab/labd exec prisma generate
+
+# Copy source code
+COPY src/shared/src/ src/shared/src/
+COPY src/labd/src/   src/labd/src/
+
+# Build TypeScript (shared first via project references)
+RUN pnpm --filter @lab/shared build && pnpm --filter @lab/labd build
+
+# Hoist the generated Prisma client so stage 2 can COPY it from a stable path
+RUN mkdir -p /app/_prisma && \
+    cp -r $(find /app/node_modules/.pnpm -path '*/.prisma/client' -type d | head -1) /app/_prisma/client
+
+# ── Stage 2: Production runtime ─────────────────────────────────
+FROM node:22-alpine
+
+RUN corepack enable && corepack prepare pnpm@9.15.0 --activate
+
+WORKDIR /app
+
+# Copy workspace config and package manifests
+COPY pnpm-workspace.yaml pnpm-lock.yaml package.json ./
+COPY src/shared/package.json src/shared/
+COPY src/labd/package.json   src/labd/
+
+# Install production dependencies only
+RUN pnpm install --frozen-lockfile --prod 2>/dev/null || pnpm install --prod
+
+# Copy built output from builder
+COPY --from=builder /app/src/shared/dist/ src/shared/dist/
+COPY --from=builder /app/src/labd/dist/   src/labd/dist/
+
+# Copy Prisma schema + generated client into pnpm store location
+# Prisma expects .prisma/client as a sibling of @prisma/ in the same node_modules
+COPY --from=builder /app/src/labd/prisma/ src/labd/prisma/
+COPY --from=builder /app/_prisma/client/ /tmp/_prisma_client/
+RUN PRISMA_CLIENT_DIR=$(find /app/node_modules/.pnpm -path '*/@prisma/client' -type d | head -1) && \
+    NM_DIR="$(dirname "$(dirname "$PRISMA_CLIENT_DIR")")" && \
+    mkdir -p "$NM_DIR/.prisma/client" && \
+    cp -r /tmp/_prisma_client/* "$NM_DIR/.prisma/client/" && \
+    echo "Installed Prisma generated client at: $NM_DIR/.prisma/client/" && \
+    rm -rf /tmp/_prisma_client
+
+ENV NODE_ENV=production
+ENV DATABASE_URL=postgresql://root@cockroachdb:26257/labctl?sslmode=disable
+ENV LABD_PORT=3100
+ENV LABD_HOST=0.0.0.0
+
+EXPOSE 3100
+
+USER node
+
+ENTRYPOINT ["node", "src/labd/dist/main.js"]
--- a/bastion/README.md
+++ b/bastion/README.md
@@ -0,0 +1,358 @@
+# labctl
+
+Infrastructure management platform for bare-metal servers, Kubernetes clusters, and cloud resources.
+
+## Install
+
+```bash
+# From Gitea packages (Fedora/RHEL)
+sudo dnf config-manager --add-repo https://mysources.co.uk/michal/-/packages/rpm/
+sudo dnf install labctl
+
+# From source
+cd bastion && pnpm install && pnpm build
+bun build src/cli/src/index.ts --compile --outfile dist/labctl
+sudo cp dist/labctl /usr/bin/labctl
+```
+
+## Quick Start
+
+```bash
+# Start the bastion (PXE provisioning server)
+sudo labctl init bastion standalone start
+
+# PXE boot a machine — it gets discovered automatically
+labctl provision list
+
+# Install Fedora on a discovered machine
+labctl provision install 78:55:36:08:35:14 labmaster --role infra
+
+# Reprovision (SSH reboot into PXE, preserves /home /srv /var/lib/rancher)
+labctl provision reprovision 78:55:36:08:35:14 labmaster --role infra
+```
+
+## Commands
+
+### Bastion (PXE Provisioning)
+
+```bash
+# Lifecycle
+sudo labctl init bastion standalone start              # Start bastion (daemonized)
+sudo labctl init bastion standalone start --foreground  # Start in foreground
+sudo labctl init bastion standalone stop                # Stop bastion
+labctl init bastion standalone status                   # Show status, PID, machine count
+
+# Options
+sudo labctl init bastion standalone start \
+  --port 8080 \
+  --dir /tmp/lab-bastion \
+  --domain ad.itaz.eu \
+  --dhcp-mode proxy \
+  --fedora 43 \
+  --timezone Europe/London
+```
+
+### Provisioning
+
+```bash
+# List all machines (discovered, queued, installing, installed)
+labctl provision list
+
+# Queue a machine for Fedora install
+labctl provision install <mac> <hostname> --role worker   # k3s worker (gets longhorn)
+labctl provision install <mac> <hostname> --role infra    # infra node (gets k3s server + /var/lib/rancher)
+
+# Reprovision — queues install, SSHes in, sets PXE boot, reboots
+labctl provision reprovision <mac> <hostname> --role infra
+
+# Remove a machine from state
+labctl provision forget <mac>
+
+# Options
+labctl provision install <mac> <hostname> \
+  --role worker \
+  --disk nvme0n1 \
+  --port 8080
+```
+
+### Server Management (planned)
+
+```bash
+# List servers with filters
+labctl get servers
+labctl get servers --env production
+labctl get servers --cloud baremetal
+labctl get servers --cloud aws
+labctl get servers --label role=k3s-worker
+labctl get servers --label asg=web-servers
+
+# Detailed server info
+labctl describe server/puppet
+labctl describe server/ser9
+```
+
+### Remote Execution (planned)
+
+```bash
+# Execute commands on servers (audited, RBAC-checked)
+labctl exec server/puppet -- whoami
+labctl exec server/puppet -- systemctl status k3s
+labctl exec server/puppet -it -- bash              # interactive TTY
+labctl exec server/puppet --timeout 30s -- long-running-task
+```
+
+### Kubernetes (planned)
+
+```bash
+# Proxied kubectl — audited, RBAC-checked, no kubeconfig needed
+labctl kubectl --cluster lab get pods
+labctl kubectl --cluster lab get nodes
+labctl kubectl --cluster lab logs pod/nginx -f
+labctl kubectl --cluster lab exec pod/nginx -- bash
+labctl kubectl --cluster lab apply -f deployment.yaml
+labctl kubectl --cluster aws-prod get pods --namespace app
+
+# Cluster management
+labctl clusters add lab --kubeconfig ~/.kube/config
+labctl clusters list
+labctl clusters remove staging
+```
+
+### Logs (planned)
+
+```bash
+# Server logs (journalctl passthrough via agent)
+labctl logs server/puppet                                    # all journal
+labctl logs server/puppet -f                                 # follow (live stream)
+labctl logs server/puppet -n 100                             # last 100 lines
+labctl logs server/puppet -u k3s                             # specific unit
+labctl logs server/puppet -u sshd --since "1h ago"           # time range
+labctl logs server/puppet --since "2026-03-17" --until "2026-03-18"
+labctl logs server/puppet -k                                 # kernel only
+labctl logs server/puppet -p err                             # errors only
+labctl logs server/puppet --file /var/log/nginx/error.log    # tail a file
+labctl logs server/puppet --file /var/log/nginx/error.log -n 50
+
+# App logs (k8s pod logs)
+labctl logs app/bastion
+labctl logs app/bastion -f
+labctl logs app/labd --container postgres
+
+# Pulumi execution logs
+labctl logs pulumi/run-abc123
+labctl logs pulumi/run-abc123 -f                             # follow active run
+
+# Bastion logs
+labctl logs bastion/lab
+labctl logs bastion/lab --mac 78:55:36:08:35:14              # specific machine's install
+
+# Agent daemon logs
+labctl logs agent/puppet
+
+# Audit logs
+labctl logs audit
+labctl logs audit --user michal
+labctl logs audit --user michal --since "1h ago"
+labctl logs audit/michal-20260317-abc123                     # specific session
+labctl logs audit --action kubectl --cluster lab
+labctl logs audit --action exec --server puppet
+```
+
+### Apps (planned, replaces Helm)
+
+```bash
+# Install Pulumi-based apps to Kubernetes
+labctl apps list                                  # available apps
+labctl apps install bastion                       # deploy bastion
+labctl apps install bastion --set port=8080       # with overrides
+labctl apps install bastion -f values.yaml        # from values file
+labctl apps install monitoring                    # Prometheus + Grafana
+
+# Manage deployed apps
+labctl apps status bastion                        # health, version, config
+labctl apps upgrade bastion                       # rolling upgrade
+labctl apps history bastion                       # version history
+labctl apps rollback bastion 2                    # rollback to version 2
+labctl apps uninstall bastion
+```
+
+### Infrastructure as Code (planned)
+
+```bash
+# Execute Pulumi programs via labd (RBAC-checked)
+labctl apply -f infra/k3s-cluster.ts --env lab
+labctl plan -f infra/k3s-cluster.ts --env lab     # dry run
+labctl destroy -f infra/k3s-cluster.ts --env lab
+```
+
+### RBAC (planned)
+
+```bash
+# Roles and permissions
+labctl get roles
+labctl get users
+labctl create role viewer --allow "read:*:*:*"
+labctl create role lab-admin --allow "*:baremetal:lab:*" --deny "destroy:*:*:*"
+labctl bind role lab-admin --user michal
+labctl unbind role lab-admin --user michal
+
+# Permission model: action:cloud:environment:server
+#   read:*:*:*                   — read everything
+#   exec:baremetal:lab:*         — exec on any lab server
+#   kubectl:*:*:*                — kubectl on any cluster
+#   *:baremetal:lab:puppet       — full access to puppet only
+#   manage:*:*:*                 — manage apps, clusters, tokens
+```
+
+### Environments and Clouds (planned)
+
+```bash
+labctl get environments
+labctl get clouds
+labctl create environment staging --cloud aws
+labctl create environment lab --cloud baremetal
+```
+
+## Partition Layout
+
+Machines installed by the bastion get this LVM layout:
+
+### Worker role (k3s worker with Longhorn)
+```
+/boot/efi       600MB     EFI
+/boot           3GB       ext4
+                ── LVM VG: labvg ──
+  swap          27GB      (matches RAM)
+  /             33GB      xfs
+  /var          100GB     xfs
+  /var/log      10GB      xfs
+  /home         10GB      xfs         ← preserved on reprovision
+  /srv          20GB      xfs         ← preserved on reprovision
+  /tmp          tmpfs 4GB
+  /var/lib/longhorn  rest  xfs        ← preserved on reprovision (Longhorn PVC storage)
+```
+
+### Infra role (k3s server, labmaster)
+```
+/boot/efi       600MB     EFI
+/boot           3GB       ext4
+                ── LVM VG: labvg ──
+  swap          27GB      (matches RAM)
+  /             33GB      xfs
+  /var          100GB     xfs
+  /var/log      10GB      xfs
+  /home         10GB      xfs         ← preserved on reprovision
+  /srv          20GB      xfs         ← preserved on reprovision
+  /var/lib/rancher  20GB  xfs         ← preserved on reprovision (k3s etcd data)
+  /tmp          tmpfs 4GB
+```
+
+On reprovision, OS partitions (`/`, `/var`, `/var/log`, `swap`) are wiped. Data partitions (`/home`, `/srv`, `/var/lib/longhorn`, `/var/lib/rancher`) are preserved.
+
+## Architecture
+
+```
+┌──────────────────────────────────────────────────────────────┐
+│  labctl CLI                                                   │
+│  init | provision | get | exec | logs | apply | apps | kubectl│
+└───────────────────────────┬──────────────────────────────────┘
+                            │ mTLS
+                            ▼
+┌──────────────────────────────────────────────────────────────┐
+│  labd (master daemon — stateless, on k3s)                    │
+│  ┌─────┐ ┌──────┐ ┌──────┐ ┌────────┐ ┌──────┐ ┌────────┐ │
+│  │ CA  │ │ RBAC │ │ Logs │ │ Pulumi │ │ Apps │ │kubectl │ │
+│  │     │ │      │ │relay │ │executor│ │      │ │ proxy  │ │
+│  └─────┘ └──────┘ └──────┘ └────────┘ └──────┘ └────────┘ │
+│                        CockroachDB                           │
+└──────────────┬─────────────────────────┬─────────────────────┘
+               │ mTLS                    │ mTLS
+    ┌──────────▼───────────┐  ┌──────────▼───────────┐
+    │  lab-agent            │  │  lab-agent            │
+    │  bare-metal server   │  │  AWS EC2 / cloud VM   │
+    │  ┌────────────────┐  │  │  ┌────────────────┐  │
+    │  │ heartbeat      │  │  │  │ heartbeat      │  │
+    │  │ exec handler   │  │  │  │ exec handler   │  │
+    │  │ log streamer   │  │  │  │ log streamer   │  │
+    │  │ module runner  │  │  │  │ module runner  │  │
+    │  └────────────────┘  │  │  └────────────────┘  │
+    └──────────────────────┘  └──────────────────────┘
+```
+
+## Technology Stack
+
+| Component | Technology |
+|-----------|-----------|
+| Language | TypeScript (ESM) |
+| CLI | Commander.js |
+| HTTP Server | Fastify + WebSocket |
+| Database | CockroachDB (PostgreSQL compatible) |
+| ORM | Prisma |
+| IaC | Pulumi (TypeScript) |
+| k8s CNI | Cilium |
+| Auth | mTLS (built-in CA) |
+| Packaging | nfpm (RPM/DEB), bun compile |
+| Containers | Podman + podman-compose |
+| CI/CD | Gitea Actions |
+| Testing | Vitest |
+
+## Development
+
+```bash
+cd bastion
+
+# Install dependencies
+pnpm install
+
+# Build all packages
+pnpm build
+
+# Run tests (30 tests)
+pnpm test:run
+
+# Type check
+pnpm typecheck
+
+# Lint
+pnpm lint
+
+# Generate shell completions
+pnpm completions:generate
+
+# Build standalone binary
+bun build src/cli/src/index.ts --compile --outfile dist/labctl
+
+# Build RPM/DEB packages (both architectures)
+bash scripts/build-rpm.sh --all
+
+# Build Docker image
+bash scripts/build-bastion.sh
+
+# Full release (build + publish + install)
+bash scripts/release.sh
+```
+
+## Project Structure
+
+```
+bastion/
+├── src/
+│   ├── shared/          # @lab/shared — types, constants
+│   ├── bastion/         # @lab/bastion — PXE provisioning server
+│   ├── cli/             # @lab/cli — CLI binary (labctl)
+│   ├── labd/            # @lab/labd — master daemon (planned)
+│   └── agent/           # @lab/agent — server agent (planned)
+├── modules/             # Built-in configuration modules (planned)
+├── deploy/
+│   └── k3s/             # Kubernetes manifests
+├── stack/
+│   ├── Dockerfile
+│   └── docker-compose.yml
+├── scripts/             # Build, publish, release scripts
+├── completions/         # Generated shell completions
+└── ARCHITECTURE.md
+```
+
+## License
+
+MIT
--- a/bastion/completions/labctl.bash
+++ b/bastion/completions/labctl.bash
@@ -0,0 +1,121 @@
+# labctl bash completions -- auto-generated by scripts/generate-completions.ts
+# DO NOT EDIT MANUALLY -- run: pnpm completions:generate
+
+_labctl() {
+  local cur prev words cword
+  _init_completion || return
+
+  local top_commands="version init provision config login doctor app roles"
+
+  # Extract the subcommand chain (skip options and their values)
+  local -a subcmd_chain=()
+  local i skip_next=false
+  for ((i=1; i < cword; i++)); do
+    if $skip_next; then skip_next=false; continue; fi
+    case "${words[i]}" in
+      -*) ;;  # skip options
+      *) subcmd_chain+=("${words[i]}") ;;
+    esac
+  done
+
+  local chain_len=${#subcmd_chain[@]}
+  local chain_str="${subcmd_chain[*]}"
+
+  case "$chain_str" in
+    "init bastion standalone start")
+      COMPREPLY=($(compgen -W "--port --dir --domain --dhcp-mode --fedora --arch --timezone --locale --skip-dnsmasq --skip-artifacts --foreground -h --help" -- "$cur"))
+      return ;;
+    "init bastion standalone stop")
+      COMPREPLY=($(compgen -W "--dir -h --help" -- "$cur"))
+      return ;;
+    "init bastion standalone status")
+      COMPREPLY=($(compgen -W "--dir --port -h --help" -- "$cur"))
+      return ;;
+    "init bastion standalone")
+      COMPREPLY=($(compgen -W "start stop status -h --help" -- "$cur"))
+      return ;;
+    "app labcontroller deploy")
+      COMPREPLY=($(compgen -W "--user --port --crdb-replicas -h --help" -- "$cur"))
+      return ;;
+    "app labcontroller status")
+      COMPREPLY=($(compgen -W "--user --port -h --help" -- "$cur"))
+      return ;;
+    "app k3s install")
+      COMPREPLY=($(compgen -W "--role --user --port --k3s-server --k3s-token -h --help" -- "$cur"))
+      return ;;
+    "app k3s health")
+      COMPREPLY=($(compgen -W "--user --port -h --help" -- "$cur"))
+      return ;;
+    "app k3s list")
+      COMPREPLY=($(compgen -W "--user --port -h --help" -- "$cur"))
+      return ;;
+    "init bastion")
+      COMPREPLY=($(compgen -W "standalone -h --help" -- "$cur"))
+      return ;;
+    "provision list")
+      COMPREPLY=($(compgen -W "--port -h --help" -- "$cur"))
+      return ;;
+    "provision install")
+      COMPREPLY=($(compgen -W "--role --os --disk --port -h --help" -- "$cur"))
+      return ;;
+    "provision reprovision")
+      COMPREPLY=($(compgen -W "--role --os --disk --port -h --help" -- "$cur"))
+      return ;;
+    "provision forget")
+      COMPREPLY=($(compgen -W "--port -h --help" -- "$cur"))
+      return ;;
+    "provision logs")
+      COMPREPLY=($(compgen -W "-f --follow --port -h --help" -- "$cur"))
+      return ;;
+    "config list")
+      COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
+      return ;;
+    "config get")
+      COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
+      return ;;
+    "config set")
+      COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
+      return ;;
+    "config path")
+      COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
+      return ;;
+    "app labcontroller")
+      COMPREPLY=($(compgen -W "deploy status -h --help" -- "$cur"))
+      return ;;
+    "app k3s")
+      COMPREPLY=($(compgen -W "install health list -h --help" -- "$cur"))
+      return ;;
+    "version")
+      COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
+      return ;;
+    "init")
+      COMPREPLY=($(compgen -W "bastion -h --help" -- "$cur"))
+      return ;;
+    "provision")
+      COMPREPLY=($(compgen -W "list install reprovision forget logs -h --help" -- "$cur"))
+      return ;;
+    "config")
+      COMPREPLY=($(compgen -W "list get set path -h --help" -- "$cur"))
+      return ;;
+    "login")
+      COMPREPLY=($(compgen -W "--server -h --help" -- "$cur"))
+      return ;;
+    "doctor")
+      COMPREPLY=($(compgen -W "--json -h --help" -- "$cur"))
+      return ;;
+    "app")
+      COMPREPLY=($(compgen -W "labcontroller k3s -h --help" -- "$cur"))
+      return ;;
+    "roles")
+      COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
+      return ;;
+    "")
+      COMPREPLY=($(compgen -W "$top_commands -h --help -v --version" -- "$cur"))
+      return ;;
+    *)
+      COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
+      return ;;
+  esac
+}
+
+complete -F _labctl labctl
--- a/bastion/completions/labctl.fish
+++ b/bastion/completions/labctl.fish
@@ -0,0 +1,202 @@
+# labctl fish completions -- auto-generated by scripts/generate-completions.ts
+# DO NOT EDIT MANUALLY -- run: pnpm completions:generate
+
+complete -c labctl -e
+complete -c labctl -f
+
+# Global options
+complete -c labctl -s v -l version -d 'Show version'
+complete -c labctl -s h -l help -d 'Show help'
+
+# Helper: test if exactly a subcommand chain is active (no extra positional args)
+function __labctl_using_cmd
+    set -l tokens (commandline -opc)
+    set -l expected $argv
+    set -l depth (count $expected)
+    set -l found 0
+    set -l i 1
+    for tok in $tokens[2..]
+        if string match -q -- "-*" $tok
+            continue
+        end
+        set i (math $i + 1)
+        set -l idx (math $i - 1)
+        if test $idx -le $depth
+            if test "$tok" != "$expected[$idx]"
+                return 1
+            end
+            set found (math $found + 1)
+        else
+            return 1
+        end
+    end
+    test $found -eq $depth
+end
+
+# Helper: test if command starts with a subcommand chain (options still apply after args)
+function __labctl_in_cmd
+    set -l tokens (commandline -opc)
+    set -l expected $argv
+    set -l depth (count $expected)
+    set -l found 0
+    for tok in $tokens[2..]
+        if string match -q -- "-*" $tok
+            continue
+        end
+        set found (math $found + 1)
+        if test $found -le $depth
+            if test "$tok" != "$expected[$found]"
+                return 1
+            end
+        end
+    end
+    test $found -ge $depth
+end
+
+# Dynamic: fetch machine hostnames from bastion (installed + queued)
+function __labctl_installed_hosts
+    curl -s http://localhost:8080/api/machines 2>/dev/null | 
+        python3 -c 'import sys,json; d=json.load(sys.stdin); hosts=[v.get("hostname","") for v in {**d.get("install_queue",{}), **d.get("installed",{})}.values() if v.get("hostname")]; [print(h) for h in set(hosts)]' 2>/dev/null
+end
+
+# Dynamic: fetch all known MAC addresses (discovered + queue + installed)
+function __labctl_known_macs
+    curl -s http://localhost:8080/api/machines 2>/dev/null | 
+        python3 -c 'import sys,json; d=json.load(sys.stdin); [print(k) for k in {**d.get("discovered",{}), **d.get("install_queue",{}), **d.get("installed",{})}]' 2>/dev/null
+end
+
+# Dynamic: fetch hostnames and MACs from all states
+function __labctl_hosts_and_macs
+    curl -s http://localhost:8080/api/machines 2>/dev/null | 
+        python3 -c 'import sys,json; d=json.load(sys.stdin); a={**d.get("discovered",{}), **d.get("install_queue",{}), **d.get("installed",{})}; macs=list(a.keys()); hosts=[v.get("hostname","") for v in {**d.get("install_queue",{}), **d.get("installed",{})}.values() if v.get("hostname")]; [print(x) for x in set(macs+hosts)]' 2>/dev/null
+end
+
+# Target argument completions
+complete -c labctl -n "__labctl_using_cmd app k3s install" -a "(__labctl_installed_hosts)" -d 'installed host'
+complete -c labctl -n "__labctl_using_cmd app k3s health" -a "(__labctl_installed_hosts)" -d 'installed host'
+complete -c labctl -n "__labctl_using_cmd app labcontroller deploy" -a "(__labctl_installed_hosts)" -d 'installed host'
+complete -c labctl -n "__labctl_using_cmd app labcontroller status" -a "(__labctl_installed_hosts)" -d 'installed host'
+complete -c labctl -n "__labctl_using_cmd provision install" -a "(__labctl_known_macs)" -d 'MAC address'
+complete -c labctl -n "__labctl_using_cmd provision reprovision" -a "(__labctl_hosts_and_macs)" -d 'host or MAC'
+complete -c labctl -n "__labctl_using_cmd provision forget" -a "(__labctl_hosts_and_macs)" -d 'host or MAC'
+complete -c labctl -n "__labctl_using_cmd provision logs" -a "(__labctl_hosts_and_macs)" -d 'host or MAC'
+
+# Top-level commands
+complete -c labctl -n "not __fish_seen_subcommand_from version init provision config login doctor app roles" -a version -d 'Show version information'
+complete -c labctl -n "not __fish_seen_subcommand_from version init provision config login doctor app roles" -a init -d 'Initialise infrastructure components'
+complete -c labctl -n "not __fish_seen_subcommand_from version init provision config login doctor app roles" -a provision -d 'Machine provisioning operations'
+complete -c labctl -n "not __fish_seen_subcommand_from version init provision config login doctor app roles" -a config -d 'View and modify CLI configuration'
+complete -c labctl -n "not __fish_seen_subcommand_from version init provision config login doctor app roles" -a login -d 'Authenticate with labd and obtain client certificate'
+complete -c labctl -n "not __fish_seen_subcommand_from version init provision config login doctor app roles" -a doctor -d 'Diagnose configuration and connectivity issues'
+complete -c labctl -n "not __fish_seen_subcommand_from version init provision config login doctor app roles" -a app -d 'Application management'
+complete -c labctl -n "not __fish_seen_subcommand_from version init provision config login doctor app roles" -a roles -d 'List available machine roles'
+
+# init subcommands
+complete -c labctl -n "__labctl_using_cmd init" -a bastion -d 'Bastion PXE server management'
+
+# init bastion subcommands
+complete -c labctl -n "__labctl_using_cmd init bastion" -a standalone -d 'Standalone bastion server lifecycle'
+
+# init bastion standalone subcommands
+complete -c labctl -n "__labctl_using_cmd init bastion standalone" -a start -d 'Start the bastion server (HTTP + dnsmasq PXE)'
+complete -c labctl -n "__labctl_using_cmd init bastion standalone" -a stop -d 'Stop a running bastion server'
+complete -c labctl -n "__labctl_using_cmd init bastion standalone" -a status -d 'Show bastion server status'
+
+# init bastion standalone start options
+complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l port -d 'HTTP port' -x
+complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l dir -d 'Bastion data directory' -x
+complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l domain -d 'Internal domain for hostnames' -x
+complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l dhcp-mode -d 'DHCP mode: proxy or full' -x
+complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l fedora -d 'Fedora version' -x
+complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l arch -d 'Architecture' -x
+complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l timezone -d 'Timezone' -x
+complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l locale -d 'Locale' -x
+complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l skip-dnsmasq -d 'Skip starting dnsmasq (for testing)'
+complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l skip-artifacts -d 'Skip downloading boot artifacts (for testing)'
+complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l foreground -d 'Run in foreground (default: daemonize)'
+
+# init bastion standalone stop options
+complete -c labctl -n "__labctl_in_cmd init bastion standalone stop" -l dir -d 'Bastion data directory' -x
+
+# init bastion standalone status options
+complete -c labctl -n "__labctl_in_cmd init bastion standalone status" -l dir -d 'Bastion data directory' -x
+complete -c labctl -n "__labctl_in_cmd init bastion standalone status" -l port -d 'Bastion HTTP port' -x
+
+# provision subcommands
+complete -c labctl -n "__labctl_using_cmd provision" -a list -d 'List all known machines'
+complete -c labctl -n "__labctl_using_cmd provision" -a install -d 'Queue a discovered machine for OS installation'
+complete -c labctl -n "__labctl_using_cmd provision" -a reprovision -d 'Queue install + SSH reboot into PXE (target: hostname, MAC, or IP)'
+complete -c labctl -n "__labctl_using_cmd provision" -a forget -d 'Remove a machine from bastion state'
+complete -c labctl -n "__labctl_using_cmd provision" -a logs -d 'Show provisioning logs for a machine (hostname, MAC, or IP)'
+
+# provision list options
+complete -c labctl -n "__labctl_in_cmd provision list" -l port -d 'Bastion HTTP port' -x
+
+# provision install options
+complete -c labctl -n "__labctl_in_cmd provision install" -l role -d 'Machine role (see below)' -xa 'vanilla worker infra labcontroller'
+complete -c labctl -n "__labctl_in_cmd provision install" -l os -d 'Operating system' -xa 'fedora-43 ubuntu-26.04'
+complete -c labctl -n "__labctl_in_cmd provision install" -l disk -d 'Target disk device (auto-detect if omitted)' -x
+complete -c labctl -n "__labctl_in_cmd provision install" -l port -d 'Bastion HTTP port' -x
+
+# provision reprovision options
+complete -c labctl -n "__labctl_in_cmd provision reprovision" -l role -d 'Machine role (see below)' -xa 'vanilla worker infra labcontroller'
+complete -c labctl -n "__labctl_in_cmd provision reprovision" -l os -d 'Operating system' -xa 'fedora-43 ubuntu-26.04'
+complete -c labctl -n "__labctl_in_cmd provision reprovision" -l disk -d 'Target disk device (auto-detect if omitted)' -x
+complete -c labctl -n "__labctl_in_cmd provision reprovision" -l port -d 'Bastion HTTP port' -x
+
+# provision forget options
+complete -c labctl -n "__labctl_in_cmd provision forget" -l port -d 'Bastion HTTP port' -x
+
+# provision logs options
+complete -c labctl -n "__labctl_in_cmd provision logs" -s f -l follow -d 'Follow logs in real-time (SSE stream)'
+complete -c labctl -n "__labctl_in_cmd provision logs" -l port -d 'Bastion HTTP port' -x
+
+# config subcommands
+complete -c labctl -n "__labctl_using_cmd config" -a list -d 'Show all configuration values'
+complete -c labctl -n "__labctl_using_cmd config" -a get -d 'Get a configuration value'
+complete -c labctl -n "__labctl_using_cmd config" -a set -d 'Set a configuration value'
+complete -c labctl -n "__labctl_using_cmd config" -a path -d 'Show configuration file path'
+
+# login options
+complete -c labctl -n "__labctl_in_cmd login" -l server -d 'labd server URL' -x
+
+# doctor options
+complete -c labctl -n "__labctl_in_cmd doctor" -l json -d 'Output results as JSON'
+
+# app subcommands
+complete -c labctl -n "__labctl_using_cmd app" -a labcontroller -d 'Labcontroller deployment (bastion + labd + CockroachDB)'
+complete -c labctl -n "__labctl_using_cmd app" -a k3s -d 'k3s cluster management'
+
+# app labcontroller subcommands
+complete -c labctl -n "__labctl_using_cmd app labcontroller" -a deploy -d 'Deploy labcontroller stack to a k3s node'
+complete -c labctl -n "__labctl_using_cmd app labcontroller" -a status -d 'Check labcontroller deployment status (all hosts if no target)'
+
+# app labcontroller deploy options
+complete -c labctl -n "__labctl_in_cmd app labcontroller deploy" -l user -d 'SSH user' -x
+complete -c labctl -n "__labctl_in_cmd app labcontroller deploy" -l port -d 'Bastion HTTP port' -x
+complete -c labctl -n "__labctl_in_cmd app labcontroller deploy" -l crdb-replicas -d 'CockroachDB replicas' -x
+
+# app labcontroller status options
+complete -c labctl -n "__labctl_in_cmd app labcontroller status" -l user -d 'SSH user' -x
+complete -c labctl -n "__labctl_in_cmd app labcontroller status" -l port -d 'Bastion HTTP port' -x
+
+# app k3s subcommands
+complete -c labctl -n "__labctl_using_cmd app k3s" -a install -d 'Install k3s on a target machine (hostname, IP, or MAC)'
+complete -c labctl -n "__labctl_using_cmd app k3s" -a health -d 'Check k3s health (all hosts if no target given)'
+complete -c labctl -n "__labctl_using_cmd app k3s" -a list -d 'List installed machines and their k3s status'
+
+# app k3s install options
+complete -c labctl -n "__labctl_in_cmd app k3s install" -l role -d 'k3s role: infra (server) or worker (agent)' -x
+complete -c labctl -n "__labctl_in_cmd app k3s install" -l user -d 'SSH user' -x
+complete -c labctl -n "__labctl_in_cmd app k3s install" -l port -d 'Bastion HTTP port (for resolving target)' -x
+complete -c labctl -n "__labctl_in_cmd app k3s install" -l k3s-server -d 'k3s server URL (required for worker role)' -x
+complete -c labctl -n "__labctl_in_cmd app k3s install" -l k3s-token -d 'k3s join token (required for worker role)' -x
+
+# app k3s health options
+complete -c labctl -n "__labctl_in_cmd app k3s health" -l user -d 'SSH user' -x
+complete -c labctl -n "__labctl_in_cmd app k3s health" -l port -d 'Bastion HTTP port' -x
+
+# app k3s list options
+complete -c labctl -n "__labctl_in_cmd app k3s list" -l user -d 'SSH user' -x
+complete -c labctl -n "__labctl_in_cmd app k3s list" -l port -d 'Bastion HTTP port' -x
+
--- a/bastion/deploy/k3s/configmap.yaml
+++ b/bastion/deploy/k3s/configmap.yaml
@@ -0,0 +1,13 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: bastion-config
+  namespace: lab-infra
+data:
+  HTTP_PORT: "8080"
+  DOMAIN: "ad.itaz.eu"
+  FEDORA_VERSION: "43"
+  DHCP_MODE: "proxy"
+  TIMEZONE: "Europe/London"
+  LOCALE: "en_GB.UTF-8"
+  LABD_URL: "http://labd.lab-system.svc.cluster.local:3100"
--- a/bastion/deploy/k3s/deployment.yaml
+++ b/bastion/deploy/k3s/deployment.yaml
@@ -0,0 +1,86 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: bastion
+  namespace: lab-infra
+  labels:
+    app: bastion
+spec:
+  replicas: 1
+  strategy:
+    type: Recreate
+  selector:
+    matchLabels:
+      app: bastion
+  template:
+    metadata:
+      labels:
+        app: bastion
+    spec:
+      imagePullSecrets:
+        - name: gitea-registry
+      hostNetwork: true
+      dnsPolicy: ClusterFirstWithHostNet
+      dnsConfig:
+        options:
+          - name: ndots
+            value: "1"
+      containers:
+        - name: bastion
+          image: mysources.co.uk/michal/lab/bastion:latest
+          imagePullPolicy: Always
+          command:
+            - node
+            - src/cli/dist/index.js
+            - init
+            - bastion
+            - standalone
+            - start
+            - --foreground
+          envFrom:
+            - configMapRef:
+                name: bastion-config
+          env:
+            - name: BASTION_JOIN_TOKEN
+              valueFrom:
+                secretKeyRef:
+                  name: bastion-join-token
+                  key: token
+          ports:
+            - containerPort: 8080
+              name: http
+          volumeMounts:
+            - name: state
+              mountPath: /data
+            - name: ssh-keys
+              mountPath: /root/.ssh
+              readOnly: true
+          securityContext:
+            capabilities:
+              add:
+                - NET_ADMIN
+                - NET_RAW
+          startupProbe:
+            httpGet:
+              path: /api/machines
+              port: 8080
+            failureThreshold: 60
+            periodSeconds: 10
+          livenessProbe:
+            httpGet:
+              path: /api/machines
+              port: 8080
+            periodSeconds: 30
+          readinessProbe:
+            httpGet:
+              path: /api/machines
+              port: 8080
+            periodSeconds: 10
+      volumes:
+        - name: state
+          persistentVolumeClaim:
+            claimName: bastion-state
+        - name: ssh-keys
+          hostPath:
+            path: /root/.ssh
+            type: Directory
--- a/bastion/deploy/k3s/kustomization.yaml
+++ b/bastion/deploy/k3s/kustomization.yaml
@@ -0,0 +1,7 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+resources:
+  - namespace.yaml
+  - configmap.yaml
+  - pvc.yaml
+  - deployment.yaml
--- a/bastion/deploy/k3s/namespace.yaml
+++ b/bastion/deploy/k3s/namespace.yaml
@@ -0,0 +1,4 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: lab-infra
--- a/bastion/deploy/k3s/pvc.yaml
+++ b/bastion/deploy/k3s/pvc.yaml
@@ -0,0 +1,12 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: bastion-state
+  namespace: lab-infra
+spec:
+  accessModes:
+    - ReadWriteOnce
+  storageClassName: local-path
+  resources:
+    requests:
+      storage: 10Gi
--- a/bastion/deploy/k8s/labd/base/configmap.yaml
+++ b/bastion/deploy/k8s/labd/base/configmap.yaml
@@ -0,0 +1,8 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: labd-config
+data:
+  LABD_PORT: "3100"
+  LABD_HOST: "0.0.0.0"
+  LABD_LOG_LEVEL: "info"
--- a/bastion/deploy/k8s/labd/base/deployment.yaml
+++ b/bastion/deploy/k8s/labd/base/deployment.yaml
@@ -0,0 +1,44 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: labd
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: labd
+  template:
+    metadata:
+      labels:
+        app: labd
+    spec:
+      containers:
+        - name: labd
+          image: mysources.co.uk/michal/lab/labd:latest
+          imagePullPolicy: Always
+          ports:
+            - containerPort: 3100
+          envFrom:
+            - configMapRef:
+                name: labd-config
+            - secretRef:
+                name: labd-secrets
+          livenessProbe:
+            httpGet:
+              path: /health/live
+              port: 3100
+            initialDelaySeconds: 10
+            periodSeconds: 15
+          readinessProbe:
+            httpGet:
+              path: /health/ready
+              port: 3100
+            initialDelaySeconds: 5
+            periodSeconds: 10
+          resources:
+            requests:
+              cpu: 100m
+              memory: 128Mi
+            limits:
+              cpu: 500m
+              memory: 512Mi
--- a/bastion/deploy/k8s/labd/base/hpa.yaml
+++ b/bastion/deploy/k8s/labd/base/hpa.yaml
@@ -0,0 +1,18 @@
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: labd
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: labd
+  minReplicas: 2
+  maxReplicas: 10
+  metrics:
+    - type: Resource
+      resource:
+        name: cpu
+        target:
+          type: Utilization
+          averageUtilization: 70
--- a/bastion/deploy/k8s/labd/base/kustomization.yaml
+++ b/bastion/deploy/k8s/labd/base/kustomization.yaml
@@ -0,0 +1,14 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+namespace: lab-infra
+
+commonLabels:
+  app: labd
+
+resources:
+  - deployment.yaml
+  - service.yaml
+  - configmap.yaml
+  - hpa.yaml
+  - pdb.yaml
--- a/bastion/deploy/k8s/labd/base/pdb.yaml
+++ b/bastion/deploy/k8s/labd/base/pdb.yaml
@@ -0,0 +1,9 @@
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+  name: labd
+spec:
+  maxUnavailable: 1
+  selector:
+    matchLabels:
+      app: labd
--- a/bastion/deploy/k8s/labd/base/service.yaml
+++ b/bastion/deploy/k8s/labd/base/service.yaml
@@ -0,0 +1,12 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: labd
+spec:
+  type: ClusterIP
+  selector:
+    app: labd
+  ports:
+    - port: 3100
+      targetPort: 3100
+      protocol: TCP
--- a/bastion/eslint.config.js
+++ b/bastion/eslint.config.js
@@ -0,0 +1,26 @@
+import tseslint from '@typescript-eslint/eslint-plugin';
+import tsparser from '@typescript-eslint/parser';
+
+export default [
+  {
+    files: ['src/*/src/**/*.ts'],
+    languageOptions: {
+      parser: tsparser,
+      parserOptions: {
+        project: ['./src/*/tsconfig.json'],
+        tsconfigRootDir: import.meta.dirname,
+      },
+    },
+    plugins: { '@typescript-eslint': tseslint },
+    rules: {
+      '@typescript-eslint/explicit-function-return-type': 'error',
+      '@typescript-eslint/no-explicit-any': 'error',
+      '@typescript-eslint/no-unused-vars': 'error',
+      '@typescript-eslint/strict-boolean-expressions': 'error',
+      'no-console': ['warn', { allow: ['warn', 'error'] }],
+    },
+  },
+  {
+    ignores: ['**/dist/**', '**/node_modules/**', '**/*.config.*'],
+  },
+];
--- a/bastion/nfpm.yaml
+++ b/bastion/nfpm.yaml
@@ -0,0 +1,20 @@
+name: labctl
+arch: amd64
+version: 0.1.0
+release: "1"
+maintainer: michal
+description: Lab infrastructure CLI for bare-metal provisioning
+license: MIT
+contents:
+  - src: ./dist/labctl
+    dst: /usr/bin/labctl
+    file_info:
+      mode: 0755
+  - src: ./completions/labctl.bash
+    dst: /usr/share/bash-completion/completions/labctl
+    file_info:
+      mode: 0644
+  - src: ./completions/labctl.fish
+    dst: /usr/share/fish/vendor_completions.d/labctl.fish
+    file_info:
+      mode: 0644
--- a/bastion/package.json
+++ b/bastion/package.json
@@ -0,0 +1,43 @@
+{
+  "name": "lab",
+  "version": "0.1.0",
+  "private": true,
+  "description": "PXE bastion server for discover-first bare-metal provisioning",
+  "type": "module",
+  "scripts": {
+    "build": "pnpm -r run build",
+    "test": "vitest",
+    "test:run": "vitest run",
+    "typecheck": "tsc --build",
+    "clean": "pnpm -r run clean && rimraf node_modules",
+    "lint": "eslint 'src/*/src/**/*.ts'",
+    "lint:fix": "eslint 'src/*/src/**/*.ts' --fix",
+    "completions:generate": "tsx scripts/generate-completions.ts --write",
+    "completions:check": "tsx scripts/generate-completions.ts --check",
+    "test:integration": "vitest run -c tests/integration/vitest.config.ts",
+    "test:integration:k3s": "vitest run -c tests/integration/vitest.config.ts -t k3s",
+    "test:integration:k3s:host": "sudo -E $(which npx) vitest run -c tests/integration/vitest.config.ts -t k3s",
+    "test:integration:pxe": "vitest run -c tests/integration/vitest.config.ts -t 'PXE boot'",
+    "test:integration:pxe:host": "sudo -E $(which npx) vitest run -c tests/integration/vitest.config.ts -t 'PXE boot'",
+    "test:integration:iso": "vitest run -c tests/integration/vitest.config.ts -t 'ISO boot'",
+    "test:integration:iso:host": "sudo -E $(which npx) vitest run -c tests/integration/vitest.config.ts -t 'ISO boot'",
+    "test:integration:arm-iso": "vitest run -c tests/integration/vitest.config.ts -t 'ARM ISO'",
+    "test:integration:arm-iso:host": "sudo -E $(which npx) vitest run -c tests/integration/vitest.config.ts -t 'ARM ISO'"
+  },
+  "engines": {
+    "node": ">=20.0.0",
+    "pnpm": ">=9.0.0"
+  },
+  "packageManager": "pnpm@9.15.0",
+  "devDependencies": {
+    "@types/node": "^22.10.0",
+    "@typescript-eslint/eslint-plugin": "^8.57.1",
+    "@typescript-eslint/parser": "^8.57.1",
+    "eslint": "^10.0.3",
+    "eslint-config-prettier": "^10.1.8",
+    "rimraf": "^6.0.0",
+    "tsx": "^4.21.0",
+    "typescript": "^5.7.0",
+    "vitest": "^3.0.0"
+  }
+}
--- a/bastion/pnpm-lock.yaml
+++ b/bastion/pnpm-lock.yaml
--- a/bastion/pnpm-workspace.yaml
+++ b/bastion/pnpm-workspace.yaml
@@ -0,0 +1,2 @@
+packages:
+  - "src/*"
--- a/bastion/scripts/build-bastion.sh
+++ b/bastion/scripts/build-bastion.sh
@@ -0,0 +1,127 @@
+#!/bin/bash
+# Build bastion container image (multi-arch) and push to Gitea container registry
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
+cd "$PROJECT_ROOT"
+
+# Load .env for GITEA_TOKEN
+if [ -f .env ]; then
+  set -a; source .env; set +a
+fi
+
+# ── Argument parsing ───────────────────────────────────────────────
+PUSH=false
+PLATFORMS="linux/amd64,linux/arm64"
+
+usage() {
+  cat <<EOF
+Usage: $(basename "$0") [OPTIONS] [TAG]
+
+Build bastion container image (multi-arch) and optionally push to registry.
+
+Options:
+  --push             Push to registry after building
+  --platforms LIST   Comma-separated platforms (default: linux/amd64,linux/arm64)
+  -h, --help         Show this help message
+
+Arguments:
+  TAG                Image tag (default: version from package.json)
+
+Examples:
+  $(basename "$0")                          # build multi-arch, no push
+  $(basename "$0") --push                   # build + push with version tag
+  $(basename "$0") --push latest            # build + push as :latest
+  $(basename "$0") --platforms linux/amd64   # build amd64 only
+EOF
+  exit 0
+}
+
+POSITIONAL_ARGS=()
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --push)
+      PUSH=true
+      shift
+      ;;
+    --platforms)
+      PLATFORMS="$2"
+      shift 2
+      ;;
+    -h|--help)
+      usage
+      ;;
+    *)
+      POSITIONAL_ARGS+=("$1")
+      shift
+      ;;
+  esac
+done
+
+REGISTRY="${GITEA_REGISTRY:-mysources.co.uk}"
+REPO="michal/lab/bastion"
+FULL_IMAGE="$REGISTRY/$REPO"
+VERSION=$(node -p "require('./package.json').version")
+TAG="${POSITIONAL_ARGS[0]:-$VERSION}"
+
+echo "==> Building bastion image"
+echo "    Tag:       $TAG"
+echo "    Platforms: $PLATFORMS"
+echo "    Registry:  $FULL_IMAGE"
+
+# ── Build multi-arch manifest ────────────────────────────────────
+MANIFEST="lab-bastion:$TAG"
+
+# Remove existing manifest/image with the same tag
+podman manifest rm "$MANIFEST" 2>/dev/null || true
+podman rmi "$MANIFEST" 2>/dev/null || true
+
+echo "==> Building for platforms: $PLATFORMS..."
+podman build \
+  --platform "$PLATFORMS" \
+  --manifest "$MANIFEST" \
+  -f Dockerfile.bastion \
+  .
+
+echo "==> Build complete. Manifest:"
+podman manifest inspect "$MANIFEST" | grep -E '"(architecture|os)"'
+
+# ── Push ─────────────────────────────────────────────────────────
+if [ "$PUSH" = true ]; then
+  if [ -z "$GITEA_TOKEN" ]; then
+    # Try reading from ~/.gitea-token
+    if [ -f "$HOME/.gitea-token" ]; then
+      GITEA_TOKEN="$(cat "$HOME/.gitea-token")"
+    else
+      echo "ERROR: GITEA_TOKEN not set and ~/.gitea-token not found"
+      exit 1
+    fi
+  fi
+
+  echo "==> Logging in to $REGISTRY..."
+  podman login -u michal -p "$GITEA_TOKEN" "$REGISTRY"
+
+  echo "==> Pushing $FULL_IMAGE:$TAG..."
+  podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:$TAG"
+
+  # Also tag as :latest if not already
+  if [ "$TAG" != "latest" ]; then
+    echo "==> Also pushing as :latest..."
+    podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:latest"
+  fi
+
+  # Link package to repository if script exists
+  if [ -f "$SCRIPT_DIR/link-package.sh" ]; then
+    source "$SCRIPT_DIR/link-package.sh"
+    link_package "container" "bastion"
+  fi
+
+  echo "==> Pushed successfully!"
+else
+  echo "==> Skipping push (use --push to push to registry)"
+fi
+
+echo "==> Done!"
+echo "    Image: $FULL_IMAGE:$TAG"
+echo "    Platforms: $PLATFORMS"
--- a/bastion/scripts/build-labd.sh
+++ b/bastion/scripts/build-labd.sh
@@ -0,0 +1,118 @@
+#!/bin/bash
+# Build labd container image (multi-arch) and push to Gitea container registry
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
+cd "$PROJECT_ROOT"
+
+# Load .env for GITEA_TOKEN
+if [ -f .env ]; then
+  set -a; source .env; set +a
+fi
+
+# ── Argument parsing ───────────────────────────────────────────────
+PUSH=false
+PLATFORMS="linux/amd64,linux/arm64"
+
+usage() {
+  cat <<EOF
+Usage: $(basename "$0") [OPTIONS] [TAG]
+
+Build labd container image (multi-arch) and optionally push to registry.
+
+Options:
+  --push             Push to registry after building
+  --platforms LIST   Comma-separated platforms (default: linux/amd64,linux/arm64)
+  -h, --help         Show this help message
+
+Arguments:
+  TAG                Image tag (default: version from package.json)
+EOF
+  exit 0
+}
+
+POSITIONAL_ARGS=()
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --push)
+      PUSH=true
+      shift
+      ;;
+    --platforms)
+      PLATFORMS="$2"
+      shift 2
+      ;;
+    -h|--help)
+      usage
+      ;;
+    *)
+      POSITIONAL_ARGS+=("$1")
+      shift
+      ;;
+  esac
+done
+
+REGISTRY="${GITEA_REGISTRY:-mysources.co.uk}"
+REPO="michal/lab/labd"
+FULL_IMAGE="$REGISTRY/$REPO"
+VERSION=$(node -p "require('./package.json').version")
+TAG="${POSITIONAL_ARGS[0]:-$VERSION}"
+
+echo "==> Building labd image"
+echo "    Tag:       $TAG"
+echo "    Platforms: $PLATFORMS"
+echo "    Registry:  $FULL_IMAGE"
+
+# ── Build multi-arch manifest ────────────────────────────────────
+MANIFEST="lab-labd:$TAG"
+
+# Remove existing manifest/image with the same tag
+podman manifest rm "$MANIFEST" 2>/dev/null || true
+podman rmi "$MANIFEST" 2>/dev/null || true
+
+echo "==> Building for platforms: $PLATFORMS..."
+podman build \
+  --platform "$PLATFORMS" \
+  --manifest "$MANIFEST" \
+  -f Dockerfile.labd \
+  .
+
+echo "==> Build complete. Manifest:"
+podman manifest inspect "$MANIFEST" | grep -E '"(architecture|os)"'
+
+# ── Push ─────────────────────────────────────────────────────────
+if [ "$PUSH" = true ]; then
+  if [ -z "$GITEA_TOKEN" ]; then
+    if [ -f "$HOME/.gitea-token" ]; then
+      GITEA_TOKEN="$(cat "$HOME/.gitea-token")"
+    else
+      echo "ERROR: GITEA_TOKEN not set and ~/.gitea-token not found"
+      exit 1
+    fi
+  fi
+
+  echo "==> Logging in to $REGISTRY..."
+  podman login -u michal -p "$GITEA_TOKEN" "$REGISTRY"
+
+  echo "==> Pushing $FULL_IMAGE:$TAG..."
+  podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:$TAG"
+
+  if [ "$TAG" != "latest" ]; then
+    echo "==> Also pushing as :latest..."
+    podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:latest"
+  fi
+
+  if [ -f "$SCRIPT_DIR/link-package.sh" ]; then
+    source "$SCRIPT_DIR/link-package.sh"
+    link_package "container" "labd"
+  fi
+
+  echo "==> Pushed successfully!"
+else
+  echo "==> Skipping push (use --push to push to registry)"
+fi
+
+echo "==> Done!"
+echo "    Image: $FULL_IMAGE:$TAG"
+echo "    Platforms: $PLATFORMS"
--- a/bastion/scripts/build-rpm.sh
+++ b/bastion/scripts/build-rpm.sh
@@ -0,0 +1,180 @@
+#!/bin/bash
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
+cd "$PROJECT_ROOT"
+
+# Load .env if present
+if [ -f .env ]; then
+  set -a; source .env; set +a
+fi
+
+# Ensure tools are on PATH
+export PATH="$HOME/.npm-global/bin:$HOME/.bun/bin:$HOME/.local/bin:$PATH"
+
+# ── Argument parsing ───────────────────────────────────────────────
+BUILD_ALL=false
+TARGET_ARCH=""
+SKIP_TESTS=false
+
+usage() {
+  cat <<EOF
+Usage: $(basename "$0") [OPTIONS]
+
+Build labctl binary and produce RPM/DEB packages.
+
+Options:
+  --arch ARCH    Target architecture: x86_64 or arm64 (default: host arch)
+  --all          Build for both x86_64 and arm64
+  --skip-tests   Skip unit tests (useful in CI where tests ran separately)
+  -h, --help     Show this help message
+EOF
+  exit 0
+}
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --arch)
+      TARGET_ARCH="$2"
+      shift 2
+      ;;
+    --all)
+      BUILD_ALL=true
+      shift
+      ;;
+    --skip-tests)
+      SKIP_TESTS=true
+      shift
+      ;;
+    -h|--help)
+      usage
+      ;;
+    *)
+      echo "Unknown option: $1"
+      usage
+      ;;
+  esac
+done
+
+# ── Resolve host architecture ─────────────────────────────────────
+detect_host_arch() {
+  local machine
+  machine="$(uname -m)"
+  case "$machine" in
+    x86_64)  echo "x86_64" ;;
+    aarch64) echo "arm64" ;;
+    arm64)   echo "arm64" ;;
+    *)       echo "$machine" ;;
+  esac
+}
+
+# ── Architecture mapping helpers ──────────────────────────────────
+# Maps our canonical arch names to the values each tool expects.
+bun_target_for() {
+  case "$1" in
+    x86_64) echo "bun-linux-x64" ;;
+    arm64)  echo "bun-linux-arm64" ;;
+  esac
+}
+
+nfpm_arch_for() {
+  case "$1" in
+    x86_64) echo "amd64" ;;
+    arm64)  echo "arm64" ;;
+  esac
+}
+
+rpm_arch_for() {
+  case "$1" in
+    x86_64) echo "x86_64" ;;
+    arm64)  echo "aarch64" ;;
+  esac
+}
+
+deb_arch_for() {
+  case "$1" in
+    x86_64) echo "amd64" ;;
+    arm64)  echo "arm64" ;;
+  esac
+}
+
+# ── Build one architecture ────────────────────────────────────────
+build_arch() {
+  local arch="$1"
+  local bun_target nfpm_arch binary_name
+
+  bun_target="$(bun_target_for "$arch")"
+  nfpm_arch="$(nfpm_arch_for "$arch")"
+  binary_name="dist/labctl-${arch}"
+
+  echo ""
+  echo "==> Bundling standalone binary for ${arch}..."
+  bun build src/cli/src/index.ts --compile --target="${bun_target}" --outfile "${binary_name}"
+
+  echo "==> Packaging RPM (${arch})..."
+  # Create a temporary nfpm config with the correct arch and binary path
+  local tmpconfig
+  tmpconfig="$(mktemp /tmp/nfpm-XXXXXX.yaml)"
+  sed -e "s|^arch:.*|arch: ${nfpm_arch}|" \
+      -e "s|src: ./dist/labctl$|src: ./${binary_name}|" \
+      nfpm.yaml > "$tmpconfig"
+
+  nfpm pkg --config "$tmpconfig" --packager rpm --target dist/
+  rm -f "$tmpconfig"
+
+  local rpm_arch
+  rpm_arch="$(rpm_arch_for "$arch")"
+  RPM_FILE=$(ls dist/labctl-*.${rpm_arch}.rpm 2>/dev/null | head -1)
+  echo "==> Built: $RPM_FILE"
+  echo "    Size: $(du -h "$RPM_FILE" | cut -f1)"
+
+  echo ""
+  echo "==> Packaging DEB (${arch})..."
+  local deb_arch
+  deb_arch="$(deb_arch_for "$arch")"
+
+  tmpconfig="$(mktemp /tmp/nfpm-XXXXXX.yaml)"
+  sed -e "s|^arch:.*|arch: ${nfpm_arch}|" \
+      -e "s|src: ./dist/labctl$|src: ./${binary_name}|" \
+      nfpm.yaml > "$tmpconfig"
+
+  nfpm pkg --config "$tmpconfig" --packager deb --target dist/
+  rm -f "$tmpconfig"
+
+  DEB_FILE=$(ls dist/labctl_*_${deb_arch}.deb 2>/dev/null | head -1)
+  echo "==> Built: $DEB_FILE"
+  echo "    Size: $(du -h "$DEB_FILE" | cut -f1)"
+}
+
+# ── Main ──────────────────────────────────────────────────────────
+
+if [ "$SKIP_TESTS" = false ]; then
+  echo "==> Running unit tests..."
+  pnpm test:run
+  echo ""
+fi
+
+echo "==> Building TypeScript..."
+pnpm build
+
+echo "==> Generating shell completions..."
+pnpm completions:generate
+
+mkdir -p dist
+rm -f dist/labctl dist/labctl-x86_64 dist/labctl-arm64 dist/labctl-*.rpm dist/labctl*.deb
+
+if [ "$BUILD_ALL" = true ]; then
+  build_arch "x86_64"
+  build_arch "arm64"
+elif [ -n "$TARGET_ARCH" ]; then
+  build_arch "$TARGET_ARCH"
+else
+  # Default to host architecture
+  HOST_ARCH="$(detect_host_arch)"
+  build_arch "$HOST_ARCH"
+fi
+
+echo ""
+echo "==> Build complete. Artifacts in dist/:"
+ls -lh dist/labctl* 2>/dev/null || echo "  (none)"
--- a/bastion/scripts/generate-completions.ts
+++ b/bastion/scripts/generate-completions.ts
@@ -0,0 +1,444 @@
+#!/usr/bin/env tsx
+/**
+ * generate-completions.ts -- auto-generates shell completions from the commander.js command tree.
+ *
+ * Usage:
+ *   tsx scripts/generate-completions.ts           # print generated files to stdout
+ *   tsx scripts/generate-completions.ts --write   # write completions/ files
+ *   tsx scripts/generate-completions.ts --check   # exit 0 if files match, 1 if stale
+ *
+ * Requires `pnpm build` to have run first (workspace packages must be compiled).
+ */
+
+import { Command, type Option, type Argument } from 'commander';
+import { readFileSync, writeFileSync, mkdirSync } from 'node:fs';
+import { join, dirname } from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const ROOT = join(__dirname, '..');
+
+// ============================================================
+// Command tree extraction
+// ============================================================
+
+interface CmdInfo {
+  name: string;
+  description: string;
+  hidden: boolean;
+  options: OptInfo[];
+  args: ArgInfo[];
+  subcommands: CmdInfo[];
+}
+
+interface OptInfo {
+  short?: string;
+  long: string;
+  description: string;
+  takesValue: boolean;
+  choices?: string[];
+  negate: boolean;
+}
+
+interface ArgInfo {
+  name: string;
+  description: string;
+  required: boolean;
+  variadic: boolean;
+  choices?: string[];
+}
+
+function extractOption(opt: Option): OptInfo {
+  return {
+    short: (opt as unknown as Record<string, string>).short || undefined,
+    long: (opt as unknown as Record<string, string>).long,
+    description: opt.description,
+    takesValue: (opt as unknown as Record<string, boolean>).required || (opt as unknown as Record<string, boolean>).optional || false,
+    choices: (opt as unknown as Record<string, string[] | undefined>).argChoices || undefined,
+    negate: (opt as unknown as Record<string, boolean>).negate || false,
+  };
+}
+
+function extractArgument(arg: Argument): ArgInfo {
+  return {
+    name: (arg as unknown as Record<string, string>)._name ?? arg.name(),
+    description: arg.description,
+    required: (arg as unknown as Record<string, boolean>).required,
+    variadic: (arg as unknown as Record<string, boolean>).variadic,
+    choices: (arg as unknown as Record<string, string[] | undefined>)._choices || undefined,
+  };
+}
+
+function extractCommand(cmd: Command): CmdInfo {
+  const options = (cmd.options as Option[])
+    .filter((o) => {
+      const long = (o as unknown as Record<string, string>).long;
+      return long !== '--help' && long !== '--version';
+    })
+    .map(extractOption);
+
+  const args = ((cmd as unknown as Record<string, Argument[]>).registeredArguments ?? [])
+    .map(extractArgument);
+
+  const subcommands = (cmd.commands as Command[])
+    .filter((sub) => sub.name() !== 'help')
+    .map(extractCommand);
+
+  if ((cmd.commands as Command[]).some((sub) => sub.name() === 'help')) {
+    subcommands.push({
+      name: 'help',
+      description: 'display help for command',
+      hidden: false,
+      options: [],
+      args: [],
+      subcommands: [],
+    });
+  }
+
+  return {
+    name: cmd.name(),
+    description: cmd.description(),
+    hidden: (cmd as unknown as Record<string, boolean>)._hidden ?? false,
+    options,
+    args,
+    subcommands,
+  };
+}
+
+async function extractTree(): Promise<CmdInfo> {
+  const { createProgram } = await import('../src/cli/src/index.js') as { createProgram: () => Command };
+  const program = createProgram();
+  return extractCommand(program);
+}
+
+// ============================================================
+// Utilities
+// ============================================================
+
+function esc(s: string): string {
+  return s.replace(/'/g, "\\'");
+}
+
+/** Collect all commands recursively with their full path. */
+function collectCommands(cmd: CmdInfo, prefix: string[] = []): { path: string[]; cmd: CmdInfo }[] {
+  const result: { path: string[]; cmd: CmdInfo }[] = [];
+  for (const sub of cmd.subcommands) {
+    const fullPath = [...prefix, sub.name];
+    result.push({ path: fullPath, cmd: sub });
+    result.push(...collectCommands(sub, fullPath));
+  }
+  return result;
+}
+
+// ============================================================
+// Fish completion generator
+// ============================================================
+
+function generateFish(root: CmdInfo): string {
+  const lines: string[] = [];
+  const emit = (s: string): void => { lines.push(s); };
+  const BIN = root.name;
+
+  emit(`# ${BIN} fish completions -- auto-generated by scripts/generate-completions.ts`);
+  emit('# DO NOT EDIT MANUALLY -- run: pnpm completions:generate');
+  emit('');
+  emit(`complete -c ${BIN} -e`);
+  emit(`complete -c ${BIN} -f`);
+  emit('');
+
+  // Global options
+  emit('# Global options');
+  emit(`complete -c ${BIN} -s v -l version -d 'Show version'`);
+  emit(`complete -c ${BIN} -s h -l help -d 'Show help'`);
+  emit('');
+
+  const allCmds = collectCommands(root);
+
+  // Helper: test if EXACTLY the given subcommand chain is present (for subcommand suggestions)
+  emit('# Helper: test if exactly a subcommand chain is active (no extra positional args)');
+  emit(`function __${BIN}_using_cmd`);
+  emit('    set -l tokens (commandline -opc)');
+  emit('    set -l expected $argv');
+  emit('    set -l depth (count $expected)');
+  emit('    set -l found 0');
+  emit('    set -l i 1');
+  emit('    for tok in $tokens[2..]');
+  emit('        if string match -q -- "-*" $tok');
+  emit('            continue');
+  emit('        end');
+  emit('        set i (math $i + 1)');
+  emit('        set -l idx (math $i - 1)');
+  emit('        if test $idx -le $depth');
+  emit('            if test "$tok" != "$expected[$idx]"');
+  emit('                return 1');
+  emit('            end');
+  emit('            set found (math $found + 1)');
+  emit('        else');
+  emit('            return 1');
+  emit('        end');
+  emit('    end');
+  emit('    test $found -eq $depth');
+  emit('end');
+  emit('');
+
+  // Helper: test if command chain STARTS WITH the given prefix (for options that apply after args)
+  emit('# Helper: test if command starts with a subcommand chain (options still apply after args)');
+  emit(`function __${BIN}_in_cmd`);
+  emit('    set -l tokens (commandline -opc)');
+  emit('    set -l expected $argv');
+  emit('    set -l depth (count $expected)');
+  emit('    set -l found 0');
+  emit('    for tok in $tokens[2..]');
+  emit('        if string match -q -- "-*" $tok');
+  emit('            continue');
+  emit('        end');
+  emit('        set found (math $found + 1)');
+  emit('        if test $found -le $depth');
+  emit('            if test "$tok" != "$expected[$found]"');
+  emit('                return 1');
+  emit('            end');
+  emit('        end');
+  emit('    end');
+  emit('    test $found -ge $depth');
+  emit('end');
+  emit('');
+
+  // Dynamic completions: fetch machine data from bastion API
+  emit('# Dynamic: fetch machine hostnames from bastion (installed + queued)');
+  emit(`function __${BIN}_installed_hosts`);
+  emit('    curl -s http://localhost:8080/api/machines 2>/dev/null | ');
+  emit("        python3 -c 'import sys,json; d=json.load(sys.stdin); hosts=[v.get(\"hostname\",\"\") for v in {**d.get(\"install_queue\",{}), **d.get(\"installed\",{})}.values() if v.get(\"hostname\")]; [print(h) for h in set(hosts)]' 2>/dev/null");
+  emit('end');
+  emit('');
+
+  emit('# Dynamic: fetch all known MAC addresses (discovered + queue + installed)');
+  emit(`function __${BIN}_known_macs`);
+  emit('    curl -s http://localhost:8080/api/machines 2>/dev/null | ');
+  emit("        python3 -c 'import sys,json; d=json.load(sys.stdin); [print(k) for k in {**d.get(\"discovered\",{}), **d.get(\"install_queue\",{}), **d.get(\"installed\",{})}]' 2>/dev/null");
+  emit('end');
+  emit('');
+
+  emit('# Dynamic: fetch hostnames and MACs from all states');
+  emit(`function __${BIN}_hosts_and_macs`);
+  emit('    curl -s http://localhost:8080/api/machines 2>/dev/null | ');
+  emit("        python3 -c 'import sys,json; d=json.load(sys.stdin); a={**d.get(\"discovered\",{}), **d.get(\"install_queue\",{}), **d.get(\"installed\",{})}; macs=list(a.keys()); hosts=[v.get(\"hostname\",\"\") for v in {**d.get(\"install_queue\",{}), **d.get(\"installed\",{})}.values() if v.get(\"hostname\")]; [print(x) for x in set(macs+hosts)]' 2>/dev/null");
+  emit('end');
+  emit('');
+
+  // Target completions for commands that accept hostname/IP/MAC
+  emit('# Target argument completions');
+  // app k3s — takes hostname/IP
+  emit(`complete -c ${BIN} -n "__${BIN}_using_cmd app k3s install" -a "(__${BIN}_installed_hosts)" -d 'installed host'`);
+  emit(`complete -c ${BIN} -n "__${BIN}_using_cmd app k3s health" -a "(__${BIN}_installed_hosts)" -d 'installed host'`);
+  emit(`complete -c ${BIN} -n "__${BIN}_using_cmd app labcontroller deploy" -a "(__${BIN}_installed_hosts)" -d 'installed host'`);
+  emit(`complete -c ${BIN} -n "__${BIN}_using_cmd app labcontroller status" -a "(__${BIN}_installed_hosts)" -d 'installed host'`);
+  // provision install — takes MAC then hostname
+  emit(`complete -c ${BIN} -n "__${BIN}_using_cmd provision install" -a "(__${BIN}_known_macs)" -d 'MAC address'`);
+  // provision reprovision/forget/logs — takes MAC or hostname
+  emit(`complete -c ${BIN} -n "__${BIN}_using_cmd provision reprovision" -a "(__${BIN}_hosts_and_macs)" -d 'host or MAC'`);
+  emit(`complete -c ${BIN} -n "__${BIN}_using_cmd provision forget" -a "(__${BIN}_hosts_and_macs)" -d 'host or MAC'`);
+  emit(`complete -c ${BIN} -n "__${BIN}_using_cmd provision logs" -a "(__${BIN}_hosts_and_macs)" -d 'host or MAC'`);
+  emit('');
+
+  // Top-level commands
+  const topCmds = root.subcommands.filter((c) => !c.hidden);
+  emit('# Top-level commands');
+  for (const cmd of topCmds) {
+    emit(`complete -c ${BIN} -n "not __fish_seen_subcommand_from ${topCmds.map((c) => c.name).join(' ')}" -a ${cmd.name} -d '${esc(cmd.description)}'`);
+  }
+  emit('');
+
+  // Subcommands and options at each level
+  for (const { path, cmd } of allCmds) {
+    if (cmd.hidden) continue;
+
+    // If this command has subcommands, offer them
+    const visibleSubs = cmd.subcommands.filter((s) => !s.hidden);
+    if (visibleSubs.length > 0) {
+      const parentCondition = `__${BIN}_using_cmd ${path.join(' ')}`;
+      emit(`# ${path.join(' ')} subcommands`);
+      for (const sub of visibleSubs) {
+        emit(`complete -c ${BIN} -n "${parentCondition}" -a ${sub.name} -d '${esc(sub.description)}'`);
+      }
+      emit('');
+    }
+
+    // Options for this command (use __in_cmd so options complete even after positional args)
+    if (cmd.options.length > 0) {
+      const condition = `__${BIN}_in_cmd ${path.join(' ')}`;
+      emit(`# ${path.join(' ')} options`);
+      for (const opt of cmd.options) {
+        const parts = [`complete -c ${BIN} -n "${condition}"`];
+        if (opt.short) parts.push(`-s ${opt.short.replace('-', '')}`);
+        parts.push(`-l ${opt.long.replace(/^--/, '')}`);
+        parts.push(`-d '${esc(opt.description)}'`);
+        if (opt.takesValue) {
+          if (opt.choices) {
+            parts.push(`-xa '${opt.choices.join(' ')}'`);
+          } else {
+            parts.push('-x');
+          }
+        }
+        emit(parts.join(' '));
+      }
+      emit('');
+    }
+  }
+
+  return lines.join('\n') + '\n';
+}
+
+// ============================================================
+// Bash completion generator
+// ============================================================
+
+function generateBash(root: CmdInfo): string {
+  const lines: string[] = [];
+  const emit = (s: string): void => { lines.push(s); };
+  const BIN = root.name;
+
+  emit(`# ${BIN} bash completions -- auto-generated by scripts/generate-completions.ts`);
+  emit('# DO NOT EDIT MANUALLY -- run: pnpm completions:generate');
+  emit('');
+
+  const allCmds = collectCommands(root);
+  const topCmds = root.subcommands.filter((c) => !c.hidden).map((c) => c.name);
+
+  emit(`_${BIN}() {`);
+  emit('  local cur prev words cword');
+  emit('  _init_completion || return');
+  emit('');
+  emit(`  local top_commands="${topCmds.join(' ')}"`);
+  emit('');
+
+  // Build chain of subcommands from command line
+  emit('  # Extract the subcommand chain (skip options and their values)');
+  emit('  local -a subcmd_chain=()');
+  emit('  local i skip_next=false');
+  emit('  for ((i=1; i < cword; i++)); do');
+  emit('    if $skip_next; then skip_next=false; continue; fi');
+  emit('    case "${words[i]}" in');
+  emit('      -*) ;;  # skip options');
+  emit('      *) subcmd_chain+=("${words[i]}") ;;');
+  emit('    esac');
+  emit('  done');
+  emit('');
+  emit('  local chain_len=${#subcmd_chain[@]}');
+  emit('  local chain_str="${subcmd_chain[*]}"');
+  emit('');
+
+  // Build case statement for each command path
+  emit('  case "$chain_str" in');
+
+  // Start with the deepest paths first to match longest
+  const sortedCmds = [...allCmds].sort((a, b) => b.path.length - a.path.length);
+
+  for (const { path, cmd } of sortedCmds) {
+    if (cmd.hidden) continue;
+    const pathStr = path.join(' ');
+    const visibleSubs = cmd.subcommands.filter((s) => !s.hidden).map((s) => s.name);
+    const optFlags: string[] = [];
+    for (const opt of cmd.options) {
+      if (opt.short) optFlags.push(opt.short);
+      optFlags.push(opt.long);
+    }
+    optFlags.push('-h', '--help');
+
+    const completions = [...visibleSubs, ...optFlags].join(' ');
+    emit(`    "${pathStr}")`);
+    emit(`      COMPREPLY=($(compgen -W "${completions}" -- "$cur"))`);
+    emit('      return ;;');
+  }
+
+  // Top-level (no subcommand yet)
+  emit('    "")');
+  emit(`      COMPREPLY=($(compgen -W "$top_commands -h --help -v --version" -- "$cur"))`);
+  emit('      return ;;');
+
+  // Default
+  emit('    *)');
+  emit('      COMPREPLY=($(compgen -W "-h --help" -- "$cur"))');
+  emit('      return ;;');
+
+  emit('  esac');
+  emit('}');
+  emit('');
+  emit(`complete -F _${BIN} ${BIN}`);
+
+  return lines.join('\n') + '\n';
+}
+
+// ============================================================
+// Main
+// ============================================================
+
+async function main(): Promise<void> {
+  const mode = process.argv[2] ?? '';
+
+  let tree: CmdInfo;
+  try {
+    tree = await extractTree();
+  } catch (err) {
+    console.error('Failed to extract command tree from createProgram().');
+    console.error('Make sure workspace packages are built: pnpm build');
+    console.error(err);
+    process.exit(1);
+  }
+
+  const fishContent = generateFish(tree);
+  const bashContent = generateBash(tree);
+
+  const completionsDir = join(ROOT, 'completions');
+  const fishPath = join(completionsDir, 'labctl.fish');
+  const bashPath = join(completionsDir, 'labctl.bash');
+
+  if (mode === '--check') {
+    let stale = false;
+    try {
+      const currentFish = readFileSync(fishPath, 'utf-8');
+      if (currentFish !== fishContent) {
+        console.error('completions/labctl.fish is stale');
+        stale = true;
+      }
+    } catch {
+      console.error('completions/labctl.fish does not exist');
+      stale = true;
+    }
+    try {
+      const currentBash = readFileSync(bashPath, 'utf-8');
+      if (currentBash !== bashContent) {
+        console.error('completions/labctl.bash is stale');
+        stale = true;
+      }
+    } catch {
+      console.error('completions/labctl.bash does not exist');
+      stale = true;
+    }
+    if (stale) {
+      console.error('Run: pnpm completions:generate');
+      process.exit(1);
+    }
+    console.log('Completions are up to date.');
+    process.exit(0);
+  }
+
+  if (mode === '--write') {
+    mkdirSync(completionsDir, { recursive: true });
+    writeFileSync(fishPath, fishContent);
+    writeFileSync(bashPath, bashContent);
+    console.log(`Wrote ${fishPath}`);
+    console.log(`Wrote ${bashPath}`);
+    process.exit(0);
+  }
+
+  // Default: print to stdout
+  console.log('=== completions/labctl.fish ===');
+  console.log(fishContent);
+  console.log('=== completions/labctl.bash ===');
+  console.log(bashContent);
+}
+
+main().catch((err) => {
+  console.error(err);
+  process.exit(1);
+});
--- a/bastion/scripts/link-package.sh
+++ b/bastion/scripts/link-package.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# Link a Gitea package to a repository.
+# Works automatically on Gitea 1.24+ (uses API), warns on older versions.
+#
+# Usage: source scripts/link-package.sh
+#        link_package <type> <name>
+#
+# Requires: GITEA_URL, GITEA_TOKEN, GITEA_OWNER, GITEA_REPO
+
+link_package() {
+  local PKG_TYPE="$1"  # e.g. "rpm", "container"
+  local PKG_NAME="$2"  # e.g. "lab", "lab-bastion"
+
+  if [ -z "$PKG_TYPE" ] || [ -z "$PKG_NAME" ]; then
+    echo "Usage: link_package <type> <name>"
+    return 1
+  fi
+
+  local GITEA_URL="${GITEA_URL:-http://10.0.0.194:3012}"
+  local GITEA_OWNER="${GITEA_OWNER:-michal}"
+  local GITEA_REPO="${GITEA_REPO:-lab}"
+
+  if [ -z "$GITEA_TOKEN" ]; then
+    echo "WARNING: GITEA_TOKEN not set, skipping package-repo linking."
+    return 0
+  fi
+
+  # Check if already linked (search all packages, filter by type+name client-side)
+  local REPO_LINK
+  REPO_LINK=$(curl -s -H "Authorization: token ${GITEA_TOKEN}" \
+    "${GITEA_URL}/api/v1/packages/${GITEA_OWNER}" \
+    | python3 -c "
+import json,sys
+for p in json.load(sys.stdin):
+    if p['type']=='$PKG_TYPE' and p['name']=='$PKG_NAME':
+        r=p.get('repository')
+        if r: print(r['full_name'])
+        break
+" 2>/dev/null)
+
+  if [ -n "$REPO_LINK" ]; then
+    echo "==> Package ${PKG_TYPE}/${PKG_NAME} already linked to ${REPO_LINK}"
+    return 0
+  fi
+
+  # Try Gitea 1.24+ link API
+  local HTTP_CODE
+  HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
+    -H "Authorization: token ${GITEA_TOKEN}" \
+    "${GITEA_URL}/api/v1/packages/${GITEA_OWNER}/${PKG_TYPE}/${PKG_NAME}/-/link/${GITEA_REPO}")
+
+  if [ "$HTTP_CODE" = "201" ] || [ "$HTTP_CODE" = "200" ]; then
+    echo "==> Linked ${PKG_TYPE}/${PKG_NAME} to ${GITEA_OWNER}/${GITEA_REPO}"
+    return 0
+  fi
+
+  # API not available (Gitea < 1.24) -- warn with manual instructions
+  local PUBLIC_URL="${GITEA_PUBLIC_URL:-${GITEA_URL}}"
+  echo ""
+  echo "WARNING: Could not auto-link ${PKG_TYPE}/${PKG_NAME} to repository (Gitea < 1.24)."
+  echo "Link it manually in the Gitea UI:"
+  echo "  ${PUBLIC_URL}/${GITEA_OWNER}/-/packages/${PKG_TYPE}/${PKG_NAME}/settings"
+  echo "  -> Link to repository: ${GITEA_OWNER}/${GITEA_REPO}"
+  return 0
+}
--- a/bastion/scripts/publish-deb.sh
+++ b/bastion/scripts/publish-deb.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
+cd "$PROJECT_ROOT"
+
+# Load .env if present
+if [ -f .env ]; then
+  set -a; source .env; set +a
+fi
+
+GITEA_URL="${GITEA_URL:-http://10.0.0.194:3012}"
+GITEA_PUBLIC_URL="${GITEA_PUBLIC_URL:-https://mysources.co.uk}"
+GITEA_OWNER="${GITEA_OWNER:-michal}"
+GITEA_REPO="${GITEA_REPO:-lab}"
+
+GITEA_TOKEN="${GITEA_TOKEN:-$PACKAGES_TOKEN}"
+if [ -z "$GITEA_TOKEN" ]; then
+  echo "Error: GITEA_TOKEN (or PACKAGES_TOKEN) not set. Add it to .env or export it."
+  exit 1
+fi
+
+DEB_FILE=$(ls dist/labctl*.deb 2>/dev/null | head -1)
+if [ -z "$DEB_FILE" ]; then
+  echo "Error: No DEB found in dist/. Run scripts/build-rpm.sh first."
+  exit 1
+fi
+
+# Extract version from the deb filename
+DEB_VERSION=$(dpkg-deb --field "$DEB_FILE" Version 2>/dev/null || echo "unknown")
+
+echo "==> Publishing $DEB_FILE (version $DEB_VERSION) to ${GITEA_URL}..."
+
+# Gitea Debian registry: PUT /api/packages/{owner}/debian/pool/{distribution}/{component}/upload
+# Publish to each supported distribution.
+# Debian: trixie (13/stable), forky (14/testing)
+# Ubuntu: noble (24.04 LTS), plucky (25.04)
+DISTRIBUTIONS="trixie forky noble plucky"
+
+for DIST in $DISTRIBUTIONS; do
+  echo "  -> $DIST..."
+  HTTP_CODE=$(curl -s -o /tmp/deb-upload-$DIST.out -w "%{http_code}" \
+    -X PUT \
+    -H "Authorization: token ${GITEA_TOKEN}" \
+    --upload-file "$DEB_FILE" \
+    "${GITEA_URL}/api/packages/${GITEA_OWNER}/debian/pool/${DIST}/main/upload")
+
+  if [ "$HTTP_CODE" = "201" ] || [ "$HTTP_CODE" = "200" ]; then
+    echo "     Published to $DIST"
+  elif [ "$HTTP_CODE" = "409" ]; then
+    echo "     Already exists in $DIST (skipping)"
+  else
+    echo "     WARNING: Upload to $DIST returned HTTP $HTTP_CODE"
+    cat /tmp/deb-upload-$DIST.out 2>/dev/null || true
+    echo ""
+  fi
+  rm -f /tmp/deb-upload-$DIST.out
+done
+
+echo ""
+echo "==> Published successfully!"
+
+# Ensure package is linked to the repository
+source "$SCRIPT_DIR/link-package.sh"
+link_package "debian" "labctl"
+
+echo ""
+echo "Install with:"
+echo "  echo \"deb ${GITEA_PUBLIC_URL}/api/packages/${GITEA_OWNER}/debian trixie main\" | sudo tee /etc/apt/sources.list.d/labctl.list"
+echo "  curl -fsSL ${GITEA_PUBLIC_URL}/api/packages/${GITEA_OWNER}/debian/repository.key | sudo gpg --dearmor -o /etc/apt/keyrings/labctl.gpg"
+echo "  sudo apt update && sudo apt install labctl"
--- a/bastion/scripts/publish-rpm.sh
+++ b/bastion/scripts/publish-rpm.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
+cd "$PROJECT_ROOT"
+
+# Load .env if present
+if [ -f .env ]; then
+  set -a; source .env; set +a
+fi
+
+GITEA_URL="${GITEA_URL:-http://10.0.0.194:3012}"
+GITEA_PUBLIC_URL="${GITEA_PUBLIC_URL:-https://mysources.co.uk}"
+GITEA_OWNER="${GITEA_OWNER:-michal}"
+GITEA_REPO="${GITEA_REPO:-lab}"
+
+GITEA_TOKEN="${GITEA_TOKEN:-$PACKAGES_TOKEN}"
+if [ -z "$GITEA_TOKEN" ]; then
+  echo "Error: GITEA_TOKEN (or PACKAGES_TOKEN) not set. Add it to .env or export it."
+  exit 1
+fi
+
+RPM_FILE=$(ls dist/labctl-*.rpm 2>/dev/null | head -1)
+if [ -z "$RPM_FILE" ]; then
+  echo "Error: No RPM found in dist/. Run scripts/build-rpm.sh first."
+  exit 1
+fi
+
+# Get version string as it appears in Gitea (e.g. "0.1.0-1")
+RPM_VERSION=$(rpm -qp --queryformat '%{VERSION}-%{RELEASE}' "$RPM_FILE")
+
+echo "==> Publishing $RPM_FILE (version $RPM_VERSION) to ${GITEA_URL}..."
+
+# Check if version already exists and delete it first
+EXISTING=$(curl -s -o /dev/null -w "%{http_code}" \
+  -H "Authorization: token ${GITEA_TOKEN}" \
+  "${GITEA_URL}/api/v1/packages/${GITEA_OWNER}/rpm/labctl/${RPM_VERSION}")
+
+if [ "$EXISTING" = "200" ]; then
+  echo "==> Version $RPM_VERSION already exists, replacing..."
+  curl -s -o /dev/null -X DELETE \
+    -H "Authorization: token ${GITEA_TOKEN}" \
+    "${GITEA_URL}/api/v1/packages/${GITEA_OWNER}/rpm/labctl/${RPM_VERSION}"
+fi
+
+# Upload
+curl --fail -s -X PUT \
+  -H "Authorization: token ${GITEA_TOKEN}" \
+  --upload-file "$RPM_FILE" \
+  "${GITEA_URL}/api/packages/${GITEA_OWNER}/rpm/upload"
+
+echo ""
+echo "==> Published successfully!"
+
+# Ensure package is linked to the repository
+source "$SCRIPT_DIR/link-package.sh"
+link_package "rpm" "labctl"
+
+echo ""
+echo "Install with:"
+echo "  sudo dnf install labctl  # if repo already configured"
--- a/bastion/scripts/release.sh
+++ b/bastion/scripts/release.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
+cd "$PROJECT_ROOT"
+
+# Load .env if present
+if [ -f .env ]; then
+  set -a; source .env; set +a
+fi
+
+echo "=== lab-bastion release ==="
+echo ""
+
+# 1. Build binaries & packages (both architectures)
+bash scripts/build-rpm.sh --all
+
+echo ""
+
+# 2. Publish RPM
+bash scripts/publish-rpm.sh
+
+echo ""
+
+# 3. Publish DEB
+bash scripts/publish-deb.sh
+
+echo ""
+
+# 4. Build & push Docker image
+bash scripts/build-bastion.sh
+
+echo ""
+
+# 5. Install locally (Fedora/RHEL only)
+if [ -f /etc/fedora-release ] || [ -f /etc/redhat-release ]; then
+  echo "==> Installing locally..."
+  RPM_FILE=$(ls dist/labctl-*.rpm 2>/dev/null | head -1)
+  if [ -n "$RPM_FILE" ]; then
+    sudo rpm -U --force "$RPM_FILE"
+    echo ""
+    echo "==> Installed:"
+    labctl --version || echo "(labctl binary installed)"
+  else
+    echo "==> WARNING: No RPM found in dist/, skipping local install."
+  fi
+else
+  echo "==> Not Fedora/RHEL — skipping local RPM install."
+fi
+
+echo ""
+
+# 6. Summary
+GITEA_PUBLIC_URL="${GITEA_PUBLIC_URL:-https://mysources.co.uk}"
+GITEA_OWNER="${GITEA_OWNER:-michal}"
+REGISTRY="${GITEA_REGISTRY:-mysources.co.uk}"
+VERSION=$(node -p "require('./package.json').version")
+
+echo "=== Done! ==="
+echo ""
+echo "RPM install:"
+echo "  sudo dnf config-manager --add-repo ${GITEA_PUBLIC_URL}/api/packages/${GITEA_OWNER}/rpm.repo"
+echo "  sudo dnf install labctl"
+echo ""
+echo "DEB install (Debian/Ubuntu):"
+echo "  echo \"deb ${GITEA_PUBLIC_URL}/api/packages/${GITEA_OWNER}/debian trixie main\" | sudo tee /etc/apt/sources.list.d/labctl.list"
+echo "  curl -fsSL ${GITEA_PUBLIC_URL}/api/packages/${GITEA_OWNER}/debian/repository.key | sudo gpg --dearmor -o /etc/apt/keyrings/labctl.gpg"
+echo "  sudo apt update && sudo apt install labctl"
+echo ""
+echo "Docker image:"
+echo "  podman pull ${REGISTRY}/michal/lab-bastion:${VERSION}"
+echo ""
+echo "k3s deployment:"
+echo "  kubectl apply -k deploy/k3s/"
--- a/bastion/scripts/test-integration.sh
+++ b/bastion/scripts/test-integration.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+# Run integration tests inside a Node container with access to host libvirt.
+#
+# Usage: sudo ./scripts/test-integration.sh [vitest args...]
+# Example: sudo ./scripts/test-integration.sh -t k3s
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
+
+# Detect real user (even when running via sudo)
+REAL_USER="${SUDO_USER:-$(whoami)}"
+REAL_HOME="/home/${REAL_USER}"
+
+echo "==> Running integration tests in container"
+echo "    Project: ${PROJECT_ROOT}"
+echo "    User: ${REAL_USER}"
+echo "    SSH key: ${REAL_HOME}/.ssh/"
+echo ""
+
+# Check prerequisites
+if ! command -v podman &>/dev/null && ! command -v docker &>/dev/null; then
+  echo "ERROR: podman or docker required"
+  exit 1
+fi
+
+RUNTIME="podman"
+if ! command -v podman &>/dev/null; then
+  RUNTIME="docker"
+fi
+
+# Check libvirt socket
+if [ ! -S /var/run/libvirt/libvirt-sock ]; then
+  echo "ERROR: libvirt socket not found at /var/run/libvirt/libvirt-sock"
+  echo "       Is libvirtd running? Try: sudo systemctl start libvirtd"
+  exit 1
+fi
+
+# Create a temp dir for cloud-init artifacts (avoids SELinux /tmp relabel)
+WORK_TMP="/var/tmp/lab-integration-$$"
+mkdir -p "${WORK_TMP}"
+trap "rm -rf ${WORK_TMP}" EXIT
+
+exec $RUNTIME run --rm \
+  --name lab-integration-test \
+  --privileged \
+  --security-opt label=disable \
+  --network=host \
+  -v "${PROJECT_ROOT}:${PROJECT_ROOT}" \
+  -v "${REAL_HOME}/.ssh:${REAL_HOME}/.ssh:ro" \
+  -v "/var/run/libvirt/libvirt-sock:/var/run/libvirt/libvirt-sock" \
+  -v "/var/lib/libvirt/images:/var/lib/libvirt/images" \
+  -v "${WORK_TMP}:/tmp/lab-integration-tests" \
+  -w "${PROJECT_ROOT}" \
+  -e "SSH_KEY_PATH=${REAL_HOME}/.ssh/id_rsa" \
+  -e "HOME=${REAL_HOME}" \
+  node:22-bookworm \
+  bash -c "
+    # Install system deps for libvirt client + cloud-init ISO creation
+    apt-get update -qq && apt-get install -y -qq libvirt-clients virtinst genisoimage openssh-client qemu-utils sudo >/dev/null 2>&1
+
+    # Install pnpm
+    corepack enable && corepack prepare pnpm@9 --activate >/dev/null 2>&1
+
+    echo '==> Installing project dependencies...'
+    pnpm install --frozen-lockfile 2>/dev/null
+
+    echo '==> Running integration tests...'
+    echo ''
+    pnpm run test:integration $*
+  "
--- a/bastion/scripts/test-provision.sh
+++ b/bastion/scripts/test-provision.sh
@@ -0,0 +1,152 @@
+#!/bin/bash
+# Run PXE and/or ISO boot integration tests.
+#
+# Usage:
+#   sudo ./scripts/test-provision.sh          # run PXE + ISO (x86_64)
+#   sudo ./scripts/test-provision.sh pxe      # PXE only
+#   sudo ./scripts/test-provision.sh iso      # ISO only (x86_64)
+#   sudo ./scripts/test-provision.sh arm      # ARM ISO boot (emulated, SLOW ~60min)
+#   sudo ./scripts/test-provision.sh all      # all tests including ARM
+#
+# Prerequisites:
+#   libvirtd, OVMF (edk2-ovmf), iPXE (ipxe-bootimgs-x86),
+#   dnsmasq, xorriso, mtools, virt-install, qemu-img
+#   ARM: qemu-system-aarch64, edk2-aarch64
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
+
+cd "$PROJECT_ROOT"
+
+# Detect real user for SSH keys
+REAL_USER="${SUDO_USER:-$(whoami)}"
+REAL_HOME=$(getent passwd "$REAL_USER" | cut -d: -f6)
+
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BOLD='\033[1m'
+RESET='\033[0m'
+
+echo ""
+echo -e "${BOLD}Lab Bastion -- Provision Integration Tests${RESET}"
+echo "==========================================="
+echo ""
+
+# --- Prerequisite checks ---
+MISSING=""
+for cmd in virsh virt-install qemu-img dnsmasq xorriso mformat mcopy curl; do
+    if ! command -v "$cmd" &>/dev/null; then
+        MISSING="$MISSING $cmd"
+    fi
+done
+
+if [ -n "$MISSING" ]; then
+    echo -e "${RED}Missing tools:${RESET}$MISSING"
+    echo "Install: sudo dnf install libvirt virt-install qemu-img dnsmasq xorriso mtools curl"
+    exit 1
+fi
+
+if ! systemctl is-active libvirtd &>/dev/null; then
+    echo -e "${RED}libvirtd not running.${RESET} Start with: sudo systemctl start libvirtd"
+    exit 1
+fi
+
+if [ ! -f /usr/share/edk2/ovmf/OVMF_CODE.fd ]; then
+    echo -e "${RED}OVMF firmware not found.${RESET} Install: sudo dnf install edk2-ovmf"
+    exit 1
+fi
+
+IPXE_EFI=""
+for f in /usr/share/ipxe/ipxe-snponly-x86_64.efi /usr/share/ipxe/ipxe-snp-x86_64.efi /usr/share/ipxe/ipxe-x86_64.efi; do
+    [ -f "$f" ] && IPXE_EFI="$f" && break
+done
+if [ -z "$IPXE_EFI" ]; then
+    echo -e "${RED}iPXE EFI binary not found.${RESET} Install: sudo dnf install ipxe-bootimgs-x86"
+    exit 1
+fi
+
+# Find SSH key
+SSH_KEY=""
+for name in id_ed25519 id_ecdsa id_rsa; do
+    if [ -f "$REAL_HOME/.ssh/$name" ] && [ -f "$REAL_HOME/.ssh/$name.pub" ]; then
+        SSH_KEY="$REAL_HOME/.ssh/$name"
+        break
+    fi
+done
+if [ -z "$SSH_KEY" ]; then
+    echo -e "${RED}No SSH key found in $REAL_HOME/.ssh/${RESET}"
+    exit 1
+fi
+
+echo -e "  User:    ${BOLD}$REAL_USER${RESET}"
+echo -e "  SSH key: ${BOLD}$SSH_KEY${RESET}"
+echo -e "  iPXE:    ${BOLD}$IPXE_EFI${RESET}"
+echo ""
+
+# --- Determine which tests to run ---
+MODE="${1:-both}"
+
+run_test() {
+    local name="$1" pattern="$2"
+    echo ""
+    echo -e "${YELLOW}━━━ Running $name test ━━━${RESET}"
+    echo ""
+
+    if SSH_KEY_PATH="$SSH_KEY" HOME="$REAL_HOME" \
+       npx vitest run -c tests/integration/vitest.config.ts -t "$pattern" 2>&1; then
+        echo ""
+        echo -e "${GREEN}✔ $name test passed${RESET}"
+        return 0
+    else
+        echo ""
+        echo -e "${RED}✘ $name test failed${RESET}"
+        return 1
+    fi
+}
+
+FAILED=0
+
+case "$MODE" in
+    pxe)
+        run_test "PXE boot" "PXE boot" || FAILED=1
+        ;;
+    iso)
+        run_test "ISO boot" "ISO boot" || FAILED=1
+        ;;
+    arm|arm-iso)
+        if ! command -v qemu-system-aarch64 &>/dev/null; then
+            echo -e "${RED}qemu-system-aarch64 not found.${RESET} Install: sudo dnf install qemu-system-aarch64 edk2-aarch64"
+            exit 1
+        fi
+        echo -e "${YELLOW}ARM emulation is ~10x slower than native. Expect 30-60 minutes.${RESET}"
+        run_test "ARM ISO boot" "ARM ISO" || FAILED=1
+        ;;
+    both)
+        run_test "PXE boot" "PXE boot" || FAILED=1
+        run_test "ISO boot" "ISO boot" || FAILED=1
+        ;;
+    all)
+        run_test "PXE boot" "PXE boot" || FAILED=1
+        run_test "ISO boot" "ISO boot" || FAILED=1
+        if command -v qemu-system-aarch64 &>/dev/null; then
+            echo -e "${YELLOW}ARM emulation is ~10x slower than native.${RESET}"
+            run_test "ARM ISO boot" "ARM ISO" || FAILED=1
+        else
+            echo -e "${YELLOW}Skipping ARM test (qemu-system-aarch64 not installed)${RESET}"
+        fi
+        ;;
+    *)
+        echo "Usage: $0 [pxe|iso|arm|both|all]"
+        exit 1
+        ;;
+esac
+
+echo ""
+if [ "$FAILED" -eq 0 ]; then
+    echo -e "${GREEN}${BOLD}All provision tests passed.${RESET}"
+else
+    echo -e "${RED}${BOLD}Some tests failed.${RESET}"
+    exit 1
+fi
--- a/bastion/src/bastion/package.json
+++ b/bastion/src/bastion/package.json
@@ -0,0 +1,38 @@
+{
+  "name": "@lab/bastion",
+  "version": "0.1.0",
+  "private": true,
+  "type": "module",
+  "main": "./dist/main.js",
+  "types": "./dist/main.d.ts",
+  "exports": {
+    ".": {
+      "import": "./dist/main.js",
+      "types": "./dist/main.d.ts"
+    },
+    "./iso-builder": {
+      "import": "./dist/services/iso-builder.js",
+      "types": "./dist/services/iso-builder.d.ts"
+    }
+  },
+  "scripts": {
+    "build": "tsc --build",
+    "clean": "rimraf dist",
+    "dev": "tsx src/main.ts",
+    "test": "vitest",
+    "test:run": "vitest run"
+  },
+  "dependencies": {
+    "@fastify/static": "^8.0.0",
+    "@lab/modules": "workspace:*",
+    "@lab/shared": "workspace:*",
+    "execa": "^9.5.0",
+    "fastify": "^5.0.0",
+    "winston": "^3.17.0",
+    "ws": "^8.19.0"
+  },
+  "devDependencies": {
+    "@types/node": "^22.10.0",
+    "@types/ws": "^8.18.0"
+  }
+}
--- a/bastion/src/bastion/src/config.ts
+++ b/bastion/src/bastion/src/config.ts
@@ -0,0 +1,58 @@
+// Configuration from environment variables with sensible defaults.
+
+import type { BastionConfig } from "@lab/shared";
+
+export function loadConfig(overrides: Partial<BastionConfig> = {}): BastionConfig {
+  const fedoraVersion = overrides.fedoraVersion ?? process.env["FEDORA_VERSION"] ?? "43";
+  const arch = overrides.arch ?? process.env["ARCH"] ?? "x86_64";
+  const httpPort = overrides.httpPort ?? parseInt(process.env["HTTP_PORT"] ?? "8080", 10);
+  const timezone = overrides.timezone ?? process.env["TIMEZONE"] ?? "Europe/London";
+  const locale = overrides.locale ?? process.env["LOCALE"] ?? "en_GB.UTF-8";
+  const bastionDir = overrides.bastionDir ?? process.env["BASTION_DIR"] ?? "/tmp/lab-bastion";
+  const domain = overrides.domain ?? process.env["DOMAIN"] ?? "ad.itaz.eu";
+  const dhcpMode = (overrides.dhcpMode ?? process.env["DHCP_MODE"] ?? "proxy") as "proxy" | "full";
+  const dhcpRangeStart = overrides.dhcpRangeStart ?? process.env["DHCP_RANGE_START"] ?? "";
+  const dhcpRangeEnd = overrides.dhcpRangeEnd ?? process.env["DHCP_RANGE_END"] ?? "";
+
+  const syslogPort = overrides.syslogPort ?? parseInt(process.env["SYSLOG_PORT"] ?? "5514", 10);
+
+  const ubuntuVersion = overrides.ubuntuVersion ?? process.env["UBUNTU_VERSION"] ?? "26.04";
+  const ubuntuMirror = overrides.ubuntuMirror ?? process.env["UBUNTU_MIRROR"]
+    ?? `https://releases.ubuntu.com/${ubuntuVersion}`;
+
+  const fedoraMirror = `https://download.fedoraproject.org/pub/fedora/linux/releases/${fedoraVersion}/Everything/${arch}/os`;
+  const tftpDir = `${bastionDir}/tftp`;
+  const httpDir = `${bastionDir}/http`;
+  const stateFile = `${bastionDir}/state.json`;
+
+  return {
+    fedoraVersion,
+    arch,
+    httpPort,
+    timezone,
+    locale,
+    bastionDir,
+    domain,
+    dhcpMode,
+    dhcpRangeStart,
+    dhcpRangeEnd,
+    ubuntuVersion,
+    ubuntuMirror,
+    // These are populated at runtime by the network service
+    iface: overrides.iface ?? "",
+    serverIp: overrides.serverIp ?? "",
+    network: overrides.network ?? "",
+    gateway: overrides.gateway ?? "",
+    sshKeys: overrides.sshKeys ?? [],
+    adminUser: overrides.adminUser ?? "",
+    syslogPort,
+    skipDnsmasq: overrides.skipDnsmasq,
+    skipArtifacts: overrides.skipArtifacts,
+    labdUrl: overrides.labdUrl ?? process.env["LABD_URL"],
+    bastionJoinToken: overrides.bastionJoinToken ?? process.env["BASTION_JOIN_TOKEN"],
+    fedoraMirror,
+    tftpDir,
+    httpDir,
+    stateFile,
+  };
+}
--- a/bastion/src/bastion/src/main.ts
+++ b/bastion/src/bastion/src/main.ts
@@ -0,0 +1,359 @@
+// Entry point for the bastion server.
+// Starts the Fastify HTTP server, dnsmasq, and handles graceful shutdown.
+
+import { mkdirSync, writeFileSync, readFileSync, existsSync, copyFileSync, symlinkSync, unlinkSync } from "node:fs";
+import { execSync } from "node:child_process";
+import type { BastionConfig } from "@lab/shared";
+import { loadConfig } from "./config.js";
+import { populateNetworkConfig } from "./services/network.js";
+import { createApp } from "./server.js";
+import { startDnsmasq, stopDnsmasq, generateDnsmasqConf } from "./services/dnsmasq.js";
+import { generateDiscoverKickstart } from "./services/kickstart-generator.js";
+import { renderBootIpxe } from "./templates/boot.ipxe.js";
+import { logger } from "./services/logger.js";
+import { BastionConnection } from "./services/labd-connection.js";
+import { progressBus } from "./services/progress-events.js";
+import { ensureBootIso } from "./routes/boot-iso.js";
+
+function copyIfMissing(src: string, dest: string, label: string): void {
+  if (existsSync(dest)) {
+    logger.info(`  ${label} -- cached`);
+    return;
+  }
+  if (!existsSync(src)) {
+    throw new Error(`${label}: source not found at ${src}`);
+  }
+  copyFileSync(src, dest);
+  logger.info(`  ${label} -- copied from ${src}`);
+}
+
+function download(url: string, dest: string, label: string): void {
+  if (existsSync(dest)) {
+    logger.info(`  ${label} -- cached`);
+    return;
+  }
+  logger.info(`  ${label} -- downloading...`);
+  try {
+    execSync(`curl -# -L -f -o "${dest}" "${url}"`, { stdio: "inherit" });
+  } catch {
+    throw new Error(`Failed to download ${label} from ${url}`);
+  }
+}
+
+function symlinkSafe(target: string, linkPath: string): void {
+  try {
+    symlinkSync(target, linkPath);
+  } catch {
+    // Link may already exist
+  }
+}
+
+function runCmd(cmd: string, args: string[]): boolean {
+  try {
+    execSync(`${cmd} ${args.join(" ")}`, { stdio: "pipe" });
+    return true;
+  } catch {
+    return false;
+  }
+}
+
+let fwZoneFlag = "";
+let fwOpened = false;
+
+function openFirewall(config: BastionConfig): void {
+  // Check if firewalld is running
+  if (!runCmd("firewall-cmd", ["--state"])) return;
+
+  // Detect zone for our interface
+  try {
+    const zone = execSync(`firewall-cmd --get-zone-of-interface=${config.iface} 2>/dev/null`, { encoding: "utf-8" }).trim();
+    if (zone) fwZoneFlag = `--zone=${zone}`;
+  } catch { /* use default zone */ }
+
+  const zf = fwZoneFlag ? [fwZoneFlag] : [];
+  logger.info(`Opening firewall ports (DHCP, TFTP, HTTP:${config.httpPort})...`);
+  runCmd("firewall-cmd", ["--quiet", ...zf, "--add-service=dhcp"]);
+  runCmd("firewall-cmd", ["--quiet", ...zf, "--add-service=tftp"]);
+  runCmd("firewall-cmd", ["--quiet", ...zf, `--add-port=${config.httpPort}/tcp`]);
+  runCmd("firewall-cmd", ["--quiet", ...zf, "--add-port=4011/udp"]);
+  fwOpened = true;
+}
+
+function closeFirewall(config: BastionConfig): void {
+  if (!fwOpened) return;
+  const zf = fwZoneFlag ? [fwZoneFlag] : [];
+  logger.info("Removing firewall rules...");
+  runCmd("firewall-cmd", ["--quiet", ...zf, "--remove-service=dhcp"]);
+  runCmd("firewall-cmd", ["--quiet", ...zf, "--remove-service=tftp"]);
+  runCmd("firewall-cmd", ["--quiet", ...zf, `--remove-port=${config.httpPort}/tcp`]);
+  runCmd("firewall-cmd", ["--quiet", ...zf, "--remove-port=4011/udp"]);
+}
+
+export async function startBastion(overrides: Partial<BastionConfig> = {}): Promise<void> {
+  // Load and populate config
+  let config = loadConfig(overrides);
+  config = populateNetworkConfig(config);
+
+  // Bastion needs root for dnsmasq (DHCP port 67)
+  if (!config.skipDnsmasq && process.getuid?.() !== 0) {
+    throw new Error("Must run as root (dnsmasq needs DHCP/TFTP ports). Use: sudo labctl init bastion standalone start");
+  }
+
+  mkdirSync(config.bastionDir, { recursive: true, mode: 0o755 });
+  const pidFile = `${config.bastionDir}/bastion.pid`;
+
+  // Kill old instance if running
+  try {
+    if (existsSync(pidFile)) {
+      const oldPid = parseInt(readFileSync(pidFile, "utf-8").trim(), 10);
+      if (!isNaN(oldPid)) {
+        try {
+          process.kill(oldPid, "SIGTERM");
+          logger.info(`Killed old bastion process (PID ${oldPid})`);
+          await new Promise((r) => setTimeout(r, 1000));
+        } catch {
+          // Process already dead
+        }
+      }
+      // Remove stale PID file (may be owned by different user)
+      try { unlinkSync(pidFile); } catch { /* ignore */ }
+    }
+  } catch {
+    // Can't read PID file — try to remove it
+    try { unlinkSync(pidFile); } catch { /* ignore */ }
+  }
+
+  // Write current PID
+  writeFileSync(pidFile, String(process.pid), { mode: 0o644 });
+
+  // Prepare directories
+  mkdirSync(config.tftpDir, { recursive: true });
+  mkdirSync(config.httpDir, { recursive: true });
+
+  // Prepare boot artifacts
+  if (config.skipArtifacts !== true) {
+    logger.info(`Preparing boot artifacts (Fedora ${config.fedoraVersion} ${config.arch})...`);
+
+    copyIfMissing(
+      "/usr/share/ipxe/undionly.kpxe",
+      `${config.tftpDir}/undionly.kpxe`,
+      "iPXE BIOS",
+    );
+    copyIfMissing(
+      "/usr/share/ipxe/ipxe-snponly-x86_64.efi",
+      `${config.tftpDir}/ipxe.efi`,
+      "iPXE UEFI x86_64",
+    );
+    try {
+      copyIfMissing(
+        "/usr/share/ipxe/arm64-efi/snponly.efi",
+        `${config.tftpDir}/ipxe-arm64.efi`,
+        "iPXE UEFI arm64",
+      );
+    } catch {
+      logger.warn("arm64 iPXE not available -- skipping");
+    }
+
+    download(
+      `${config.fedoraMirror}/images/pxeboot/vmlinuz`,
+      `${config.httpDir}/vmlinuz`,
+      "Fedora kernel",
+    );
+    download(
+      `${config.fedoraMirror}/images/pxeboot/initrd.img`,
+      `${config.httpDir}/initrd.img`,
+      "Fedora initrd",
+    );
+
+    // Ubuntu netboot artifacts (non-fatal — Ubuntu version may not be released yet)
+    try {
+      logger.info(`Preparing Ubuntu ${config.ubuntuVersion} netboot artifacts...`);
+      download(
+        `${config.ubuntuMirror}/casper/vmlinuz`,
+        `${config.httpDir}/ubuntu-vmlinuz`,
+        "Ubuntu kernel",
+      );
+      download(
+        `${config.ubuntuMirror}/casper/initrd`,
+        `${config.httpDir}/ubuntu-initrd`,
+        "Ubuntu initrd",
+      );
+    } catch {
+      logger.warn(`Ubuntu ${config.ubuntuVersion} artifacts not available -- Ubuntu provisioning disabled`);
+    }
+
+    // Symlink iPXE binaries into HTTP dir for UEFI HTTP Boot
+    for (const name of ["ipxe.efi", "ipxe-arm64.efi"]) {
+      const src = `${config.tftpDir}/${name}`;
+      const dest = `${config.httpDir}/${name}`;
+      if (existsSync(src)) {
+        symlinkSafe(src, dest);
+      }
+    }
+
+    // Generate boot ISO (served as static file for Range request support)
+    try {
+      ensureBootIso(config);
+    } catch (err) {
+      logger.warn(`Boot ISO generation failed: ${err instanceof Error ? err.message : String(err)}`);
+    }
+  } else {
+    logger.info("Skipping boot artifacts (--skip-artifacts)");
+  }
+
+  // Write discovery kickstart
+  const discoverKs = generateDiscoverKickstart(config);
+  writeFileSync(`${config.httpDir}/discover.ks`, discoverKs);
+
+  // Write iPXE boot script
+  const bootIpxe = renderBootIpxe({
+    serverIp: config.serverIp,
+    httpPort: config.httpPort,
+  });
+  writeFileSync(`${config.httpDir}/boot.ipxe`, bootIpxe);
+
+  // Generate dnsmasq config
+  generateDnsmasqConf(config);
+
+  // Open firewall ports
+  if (config.skipDnsmasq !== true) {
+    openFirewall(config);
+  }
+
+  // Start HTTP server + syslog listener
+  const { app, state, syslog } = createApp(config);
+  await app.listen({ port: config.httpPort, host: "0.0.0.0" });
+  logger.info(`HTTP server listening on :${config.httpPort}`);
+  syslog.start();
+
+  // Start dnsmasq (unless skipped)
+  if (config.skipDnsmasq !== true) {
+    const dnsmasqProc = startDnsmasq(config);
+
+    // Monitor dnsmasq
+    void dnsmasqProc.then(() => {
+      logger.error("dnsmasq exited unexpectedly");
+      logger.error("Check if another DHCP/TFTP service is running.");
+      process.exit(1);
+    }).catch((err: unknown) => {
+      const message = err instanceof Error ? err.message : String(err);
+      if (!message.includes("was killed")) {
+        logger.error(`dnsmasq error: ${message}`);
+        process.exit(1);
+      }
+    });
+  } else {
+    logger.info("Skipping dnsmasq (--skip-dnsmasq)");
+  }
+
+  // Connect to labd if configured (otherwise run standalone)
+  let labdConn: BastionConnection | null = null;
+  if (config.labdUrl) {
+    labdConn = new BastionConnection(config, () => state.load());
+
+    // Wire up command handlers so labd can send install/forget/role commands
+    labdConn.onCommand("command-install", async (msg) => {
+      if (msg.type !== "command-install") throw new Error("unexpected");
+      state.update((s) => {
+        s.install_queue[msg.mac] = {
+          hostname: msg.hostname,
+          disk: msg.disk ?? "/dev/sda",
+          role: msg.role as import("@lab/shared").Role,
+          os: msg.os as import("@lab/shared").OsId,
+          queued_at: new Date().toISOString(),
+        };
+      });
+      return { status: "ok", data: { mac: msg.mac, hostname: msg.hostname } };
+    });
+
+    labdConn.onCommand("command-forget", async (msg) => {
+      if (msg.type !== "command-forget") throw new Error("unexpected");
+      const mac = msg.mac.toLowerCase();
+      state.update((s) => {
+        delete s.discovered[mac];
+        delete s.install_queue[mac];
+        delete s.installed[mac];
+      });
+      return { status: "ok", data: { mac } };
+    });
+
+    labdConn.onCommand("command-role-update", async (msg) => {
+      if (msg.type !== "command-role-update") throw new Error("unexpected");
+      const mac = msg.mac.toLowerCase();
+      const current = state.load();
+      if (!current.installed[mac]) {
+        return { status: "error", error: `MAC ${mac} not found in installed machines` };
+      }
+      state.update((s) => {
+        const inst = s.installed[mac];
+        if (inst) inst.role = msg.role;
+      });
+      return { status: "ok", data: { mac, role: msg.role } };
+    });
+
+    // Push state to labd on every local state change
+    state.onChange(() => labdConn?.syncState());
+
+    // Forward progress events (stages only, not raw log lines) to labd
+    progressBus.on((event) => {
+      if (event.stage !== "log") {
+        labdConn?.sendProgress(event.mac, event.stage, event.detail);
+      }
+    });
+
+    labdConn.connect();
+    logger.info(`Registering with labd at ${config.labdUrl}`);
+  }
+
+  // Print banner
+  printBanner(config);
+
+  // Graceful shutdown
+  const shutdown = async (): Promise<void> => {
+    logger.info("Shutting down...");
+    syslog.stop();
+    if (labdConn) labdConn.close();
+    if (config.skipDnsmasq !== true) stopDnsmasq();
+    closeFirewall(config);
+    await app.close();
+    try { unlinkSync(pidFile); } catch { /* ignore */ }
+    logger.info(`State preserved in ${config.stateFile}`);
+    process.exit(0);
+  };
+
+  process.on("SIGINT", () => void shutdown());
+  process.on("SIGTERM", () => void shutdown());
+
+  // Keep process alive
+  await new Promise(() => {});
+}
+
+function printBanner(config: BastionConfig): void {
+  const dhcpInfo = config.dhcpMode === "full"
+    ? `full (${config.dhcpRangeStart}-${config.dhcpRangeEnd})`
+    : "proxy (alongside existing DHCP)";
+
+  console.log("");
+  console.log("\x1b[36m\x1b[1m" + "=".repeat(60) + "\x1b[0m");
+  console.log("\x1b[36m\x1b[1m  Lab PXE Bastion -- Discovery Mode\x1b[0m");
+  console.log("\x1b[36m\x1b[1m" + "=".repeat(60) + "\x1b[0m");
+  console.log("");
+  console.log(`  Network:   \x1b[1m${config.network}/24\x1b[0m via \x1b[1m${config.iface}\x1b[0m`);
+  console.log(`  DHCP:      \x1b[1m${dhcpInfo}\x1b[0m`);
+  console.log(`  HTTP:      \x1b[1mhttp://${config.serverIp}:${config.httpPort}/\x1b[0m`);
+  console.log(`  OS:        \x1b[1mFedora ${config.fedoraVersion} (${config.arch})\x1b[0m`);
+  console.log(`  Domain:    \x1b[1m${config.domain}\x1b[0m`);
+  console.log(`  State:     \x1b[1m${config.stateFile}\x1b[0m`);
+  console.log("");
+  console.log("  \x1b[33mPXE boot any machine on this network.\x1b[0m");
+  console.log("  \x1b[33mIt will be inventoried and rebooted automatically.\x1b[0m");
+  console.log("");
+  console.log("  Commands (from another terminal):");
+  console.log("    \x1b[1mlabctl provision list\x1b[0m               -- show machines");
+  console.log("    \x1b[1mlabctl provision install <mac> <hostname>\x1b[0m -- queue install");
+  console.log("");
+  console.log("  Press \x1b[1mCtrl-C\x1b[0m to stop.");
+  console.log("");
+  console.log("\x1b[36m---- Waiting for PXE boot requests... ----\x1b[0m");
+  console.log("");
+}
--- a/bastion/src/bastion/src/routes/api.ts
+++ b/bastion/src/bastion/src/routes/api.ts
@@ -0,0 +1,401 @@
+// REST API routes for machine management.
+// /api/machines  - list all machines by state
+// /api/install   - queue a machine for install
+// /api/progress  - receive install progress callbacks from kickstart
+// /api/discover  - receive hardware discovery reports from PXE-booted machines
+
+import type { FastifyInstance } from "fastify";
+import type { HardwareInfo, InstalledInfo, Role } from "@lab/shared";
+import { isValidOsId, SUPPORTED_ROLES } from "@lab/shared";
+import type { StateManager } from "../services/state.js";
+import { logger } from "../services/logger.js";
+import { triggerPostProvisionK3s } from "../services/post-provision.js";
+import { progressBus } from "../services/progress-events.js";
+import type { ProgressEvent } from "../services/progress-events.js";
+import type { InstallLogBuffer } from "../services/install-log.js";
+
+export function registerApiRoutes(
+  app: FastifyInstance,
+  state: StateManager,
+  installLog: InstallLogBuffer,
+): void {
+  // List all machines
+  app.get("/api/machines", async (_request, reply) => {
+    return reply.send(state.load());
+  });
+
+  // Queue a machine for install
+  app.post<{
+    Body: {
+      mac?: string;
+      hostname?: string;
+      disk?: string;
+      role?: string;
+      os?: string;
+    };
+  }>("/api/install", async (request, reply) => {
+    const { mac: rawMac, hostname, disk, role, os } = request.body ?? {};
+    const mac = (rawMac ?? "").toLowerCase().replace(/-/g, ":");
+
+    if (mac === "") {
+      return reply.status(400).send({ error: "mac is required" });
+    }
+
+    const validRole = role ?? "worker";
+    if (!(SUPPORTED_ROLES as readonly string[]).includes(validRole)) {
+      return reply.status(400).send({ error: `invalid role: '${validRole}'. Supported: ${SUPPORTED_ROLES.join(", ")}` });
+    }
+
+    const osId = os ?? "fedora-43";
+    if (!isValidOsId(osId)) {
+      return reply.status(400).send({ error: `invalid os: '${osId}'. Supported: fedora-43, ubuntu-26.04` });
+    }
+
+    state.update((s) => {
+      s.install_queue[mac] = {
+        hostname: hostname ?? "lab-node",
+        disk: disk ?? "",
+        role: validRole as Role,
+        os: osId,
+        queued_at: new Date().toISOString(),
+      };
+    });
+
+    logger.info(`INSTALL QUEUED: ${mac} -> hostname=${hostname ?? "lab-node"} role=${validRole} os=${osId}`);
+
+    return reply.send({
+      status: "queued",
+      mac,
+      hostname: hostname ?? "lab-node",
+      role: validRole,
+      os: osId,
+      message: `PXE boot the machine to start installation (role=${validRole}, os=${osId})`,
+    });
+  });
+
+  // Receive install progress callbacks
+  app.post<{
+    Body: {
+      mac?: string;
+      stage?: string;
+      detail?: string;
+    };
+  }>("/api/progress", async (request, reply) => {
+    const { mac: rawMac, stage, detail } = request.body ?? {};
+    const mac = (rawMac ?? "unknown").toLowerCase();
+    const stageName = stage ?? "unknown";
+    const detailStr = detail ?? "";
+
+    const GREEN = "\x1b[0;32m";
+    const YELLOW = "\x1b[1;33m";
+    const RED = "\x1b[0;31m";
+    const BOLD = "\x1b[1m";
+    const RESET = "\x1b[0m";
+    const icons: Record<string, string> = {
+      partitioning: "◆", installing: "◆◆", "post-install": "◆◆◆",
+      complete: "✔", error: "✘",
+    };
+    const icon = icons[stageName] ?? "·";
+    const color = stageName === "complete" ? GREEN : stageName === "error" ? RED : YELLOW;
+    console.log(`  ${color}${icon}${RESET} ${mac}  ${BOLD}${stageName}${RESET}${detailStr ? ` -- ${detailStr}` : ""}`);
+
+    // Emit progress event for SSE clients
+    const hostname = state.load().install_queue[mac]?.hostname ?? mac;
+    progressBus.emit({
+      mac, hostname, stage: stageName, detail: detailStr,
+      timestamp: new Date().toISOString(),
+    });
+
+    state.update((s) => {
+      const queueEntry = s.install_queue[mac];
+      if (queueEntry) {
+        queueEntry.progress = stageName;
+        queueEntry.progress_at = new Date().toISOString();
+        if (detailStr !== "") {
+          queueEntry.progress_detail = detailStr;
+        }
+
+        // Append to progress log history
+        if (!queueEntry.log) queueEntry.log = [];
+        queueEntry.log.push({
+          stage: stageName,
+          detail: detailStr,
+          timestamp: new Date().toISOString(),
+        });
+
+        // Move to installed on completion
+        if (stageName === "complete") {
+          const cfg = s.install_queue[mac];
+          delete s.install_queue[mac];
+
+          const ip = detailStr.startsWith("ready at ")
+            ? detailStr.replace("ready at ", "").trim()
+            : "";
+
+          const installedInfo: InstalledInfo = {
+            hostname: cfg?.hostname ?? "?",
+            role: cfg?.role ?? "?",
+            ...(cfg?.os !== undefined ? { os: cfg.os } : {}),
+            ip,
+            installed_at: new Date().toISOString(),
+          };
+          s.installed[mac] = installedInfo;
+
+          const admin = installedInfo.role !== "vanilla" && installedInfo.role !== "" ? "michal" : "root";
+          console.log(`\n  \x1b[0;32m\x1b[1m  ssh ${admin}@${ip}\x1b[0m\n`);  // eslint-disable-line no-console
+
+          // Auto-install k3s for non-vanilla roles
+          if (installedInfo.role !== "vanilla" && ip !== "") {
+            void triggerPostProvisionK3s(installedInfo.hostname, ip, installedInfo.role, admin, mac);
+          }
+        }
+      }
+    });
+
+    return reply.send({ status: "ok" });
+  });
+
+  // Receive raw log lines from kickstart scripts
+  app.post<{
+    Body: {
+      mac?: string;
+      line?: string;
+      lines?: string[];
+      tail?: string;
+    };
+  }>("/api/log", async (request, reply) => {
+    const { mac: rawMac, line, lines: rawLines, tail } = request.body ?? {};
+    const mac = (rawMac ?? "unknown").toLowerCase();
+
+    // Collect all lines from the various input formats
+    const allLines: string[] = [];
+    if (line) allLines.push(line);
+    if (rawLines) allLines.push(...rawLines);
+    if (tail) {
+      // tail is a string with escaped \n — split it into lines
+      allLines.push(...tail.split("\\n").filter(Boolean));
+    }
+
+    if (allLines.length === 0) {
+      return reply.send({ status: "ok", lines: 0 });
+    }
+
+    // Look up hostname from install queue for enriching events
+    const hostname = state.load().install_queue[mac]?.hostname ?? mac;
+
+    // Append to the install log buffer (this also emits to progressBus)
+    installLog.append(mac, allLines, hostname);
+
+    return reply.send({ status: "ok", lines: allLines.length });
+  });
+
+  // Delete a machine from all state
+  app.delete<{
+    Params: { mac: string };
+  }>("/api/machines/:mac", async (request, reply) => {
+    const mac = request.params.mac.toLowerCase().replace(/-/g, ":");
+
+    if (mac === "") {
+      return reply.status(400).send({ error: "mac is required" });
+    }
+
+    let found = false;
+    state.update((s) => {
+      if (s.discovered[mac] !== undefined) {
+        delete s.discovered[mac];
+        found = true;
+      }
+      if (s.install_queue[mac] !== undefined) {
+        delete s.install_queue[mac];
+        found = true;
+      }
+      if (s.installed[mac] !== undefined) {
+        delete s.installed[mac];
+        found = true;
+      }
+    });
+
+    if (!found) {
+      return reply.status(404).send({ error: "machine not found", mac });
+    }
+
+    logger.info(`MACHINE FORGOTTEN: ${mac}`);
+    return reply.send({ status: "forgotten", mac });
+  });
+
+  // Receive discovery reports
+  app.post<{
+    Body: {
+      mac?: string;
+      product?: string;
+      board?: string;
+      serial?: string;
+      manufacturer?: string;
+      cpu_model?: string;
+      cpu_cores?: number;
+      memory_gb?: number;
+      arch?: string;
+      disks?: Array<{ name: string; size_gb: number; model: string }>;
+      nics?: Array<{ name: string; mac: string; state: string }>;
+    };
+  }>("/api/discover", async (request, reply) => {
+    const data = request.body;
+    if (data === null || data === undefined) {
+      return reply.status(400).send({ error: "invalid JSON" });
+    }
+
+    const mac = (data.mac ?? "unknown").toLowerCase();
+    const now = new Date().toISOString();
+
+    const isNew = state.load().discovered[mac] === undefined;
+
+    state.update((s) => {
+      const existing = s.discovered[mac];
+      const hwInfo: HardwareInfo = {
+        mac,
+        product: data.product ?? "unknown",
+        board: data.board ?? "unknown",
+        serial: data.serial ?? "unknown",
+        manufacturer: data.manufacturer ?? "unknown",
+        cpu_model: data.cpu_model ?? "unknown",
+        cpu_cores: data.cpu_cores ?? 0,
+        memory_gb: data.memory_gb ?? 0,
+        arch: data.arch ?? "unknown",
+        disks: data.disks ?? [],
+        nics: data.nics ?? [],
+        first_seen: existing?.first_seen ?? now,
+        last_seen: now,
+      };
+      s.discovered[mac] = hwInfo;
+    });
+
+    const label = isNew ? "NEW MACHINE DISCOVERED" : "MACHINE RE-DISCOVERED";
+    const cpu = data.cpu_model ?? "?";
+    const cores = data.cpu_cores ?? "?";
+    const mem = data.memory_gb ?? "?";
+    logger.info(`${label}: ${mac} -- ${data.manufacturer ?? "?"} ${data.product ?? "?"} (${cpu}, ${cores} cores, ${mem}GB RAM)`);
+
+    return reply.send({ status: "ok", mac, new: isNew });
+  });
+
+  // Update a machine's role (e.g. promote infra -> labcontroller)
+  app.post<{
+    Body: {
+      mac?: string;
+      role?: string;
+    };
+  }>("/api/role", async (request, reply) => {
+    const { mac: rawMac, role } = request.body ?? {};
+    const mac = (rawMac ?? "").toLowerCase().replace(/-/g, ":");
+
+    if (mac === "") {
+      return reply.status(400).send({ error: "mac is required" });
+    }
+    if (!role) {
+      return reply.status(400).send({ error: "role is required" });
+    }
+
+    let found = false;
+    state.update((s) => {
+      if (s.installed[mac]) {
+        const oldRole = s.installed[mac].role;
+        s.installed[mac].role = role;
+        found = true;
+        logger.info(`ROLE UPDATED: ${mac} (${s.installed[mac].hostname}) ${oldRole} -> ${role}`);
+      }
+    });
+
+    if (!found) {
+      return reply.status(404).send({ error: "machine not found in installed state", mac });
+    }
+
+    return reply.send({ status: "updated", mac, role });
+  });
+
+  // Get provision logs for a machine (current state snapshot + raw log lines)
+  app.get<{
+    Params: { mac: string };
+    Querystring: { lines?: string; offset?: string };
+  }>("/api/logs/:mac", async (request, reply) => {
+    const mac = request.params.mac.toLowerCase().replace(/-/g, ":");
+    const logLimit = parseInt(request.query.lines ?? "200", 10);
+    const logOffset = parseInt(request.query.offset ?? "0", 10);
+    const currentState = state.load();
+
+    const queueEntry = currentState.install_queue[mac];
+    const installedEntry = currentState.installed[mac];
+
+    if (queueEntry) {
+      return reply.send({
+        mac,
+        hostname: queueEntry.hostname,
+        status: "installing",
+        progress: queueEntry.progress ?? "queued",
+        progress_detail: queueEntry.progress_detail ?? "",
+        progress_at: queueEntry.progress_at ?? queueEntry.queued_at,
+        role: queueEntry.role,
+        os: queueEntry.os,
+        stages: queueEntry.log ?? [],
+        log_lines: installLog.getLines(mac, logOffset, logLimit),
+        log_total: installLog.lineCount(mac),
+      });
+    }
+    if (installedEntry) {
+      return reply.send({
+        mac,
+        hostname: installedEntry.hostname,
+        status: "installed",
+        progress: "complete",
+        progress_detail: `ready at ${installedEntry.ip}`,
+        progress_at: installedEntry.installed_at,
+        role: installedEntry.role,
+        ip: installedEntry.ip,
+        log_lines: installLog.getLines(mac, logOffset, logLimit),
+        log_total: installLog.lineCount(mac),
+      });
+    }
+
+    return reply.status(404).send({ error: "machine not found", mac });
+  });
+
+  // SSE stream: follow provision progress for a machine (or all machines)
+  app.get<{
+    Params: { mac: string };
+  }>("/api/logs/:mac/follow", async (request, reply) => {
+    const filterMac = request.params.mac === "all"
+      ? null
+      : request.params.mac.toLowerCase().replace(/-/g, ":");
+
+    void reply.raw.writeHead(200, {
+      "Content-Type": "text/event-stream",
+      "Cache-Control": "no-cache",
+      "Connection": "keep-alive",
+    });
+
+    // Send current state as first event
+    const currentState = state.load();
+    const queueEntry = filterMac ? currentState.install_queue[filterMac] : undefined;
+    if (queueEntry) {
+      const initData = JSON.stringify({
+        mac: filterMac, hostname: queueEntry.hostname,
+        stage: queueEntry.progress ?? "queued",
+        detail: queueEntry.progress_detail ?? "",
+        timestamp: queueEntry.progress_at ?? queueEntry.queued_at,
+      });
+      reply.raw.write(`data: ${initData}\n\n`);
+    }
+
+    const onProgress = (event: ProgressEvent): void => {
+      if (filterMac && event.mac !== filterMac) return;
+      // Use SSE event types so clients can filter: "stage" for progress, "log" for raw lines
+      const eventType = event.stage === "log" ? "log" : "stage";
+      reply.raw.write(`event: ${eventType}\ndata: ${JSON.stringify(event)}\n\n`);
+    };
+
+    progressBus.on(onProgress);
+
+    request.raw.on("close", () => {
+      progressBus.off(onProgress);
+    });
+  });
+}
--- a/bastion/src/bastion/src/routes/boot-iso.ts
+++ b/bastion/src/bastion/src/routes/boot-iso.ts
@@ -0,0 +1,249 @@
+// Boot ISO generation.
+// Generates a UEFI-bootable iPXE ISO using xorriso+mtools.
+// The ISO is placed in httpDir so @fastify/static serves it with Range request
+// support (required by JetKVM, which streams via HTTP Range + NBD).
+//
+// The ISO embeds kernel + initrd so machines without UEFI NIC support
+// (no SNP protocol) can still boot. iPXE loads them from file:/ and the
+// Linux kernel handles networking with its own drivers.
+
+import { createHash } from "node:crypto";
+import { execSync } from "node:child_process";
+import { existsSync, readFileSync, statSync, writeFileSync, mkdirSync, rmSync, unlinkSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import type { BastionConfig } from "@lab/shared";
+import { logger } from "../services/logger.js";
+
+// iPXE SNP variant (scans all UEFI SNP handles, works from CD-ROM/USB boot).
+const IPXE_ISO_PATHS: Record<string, { src: string[]; efiName: string }> = {
+  x86_64: {
+    src: [
+      "/usr/share/ipxe/ipxe-snp-x86_64.efi",
+      "/usr/share/ipxe/ipxe-x86_64.efi",
+    ],
+    efiName: "BOOTX64.EFI",
+  },
+  aarch64: {
+    src: [
+      "/usr/share/ipxe/arm64-efi/ipxe-snp.efi",
+      "/usr/share/ipxe/arm64-efi/ipxe.efi",
+    ],
+    efiName: "BOOTAA64.EFI",
+  },
+};
+
+// Fedora PXE kernel/initrd paths per architecture
+const FEDORA_MIRROR_BASE = "https://download.fedoraproject.org/pub/fedora/linux/releases";
+
+interface BootPayload {
+  arch: string;
+  vmlinuz: string;
+  initrd: string;
+}
+
+function downloadIfMissing(url: string, dest: string, label: string): void {
+  if (existsSync(dest)) {
+    logger.info(`  ${label} -- cached`);
+    return;
+  }
+  logger.info(`  ${label} -- downloading...`);
+  execSync(`curl -# -L -f -o "${dest}" "${url}"`, { stdio: "inherit" });
+}
+
+function generateIso(config: BastionConfig, outputPath: string): void {
+  const work = join(tmpdir(), `bastion-iso-${process.pid}`);
+  mkdirSync(join(work, "EFI", "BOOT"), { recursive: true });
+
+  const bastionUrl = `http://${config.serverIp}:${config.httpPort}`;
+
+  // Copy available iPXE EFI binaries
+  const archs: string[] = [];
+  for (const [arch, paths] of Object.entries(IPXE_ISO_PATHS)) {
+    const srcFile = paths.src.find((s) => existsSync(s));
+    if (srcFile) {
+      execSync(`cp "${srcFile}" "${join(work, "EFI", "BOOT", paths.efiName)}"`, { stdio: "pipe" });
+      archs.push(arch);
+      logger.info(`  iPXE ISO ${arch}: ${srcFile}`);
+    }
+  }
+
+  if (archs.length === 0) throw new Error("No iPXE EFI binaries found");
+
+  // Download and stage kernel/initrd for each architecture.
+  // These are embedded in the ISO so machines without UEFI NIC support
+  // can boot the Linux installer (which has its own NIC drivers).
+  const cacheDir = join(config.bastionDir, "iso-cache");
+  mkdirSync(cacheDir, { recursive: true });
+
+  const payloads: BootPayload[] = [];
+  for (const arch of ["x86_64", "aarch64"]) {
+    const mirror = `${FEDORA_MIRROR_BASE}/${config.fedoraVersion}/Everything/${arch}/os`;
+    const vmlinuzCache = join(cacheDir, `vmlinuz-${arch}`);
+    const initrdCache = join(cacheDir, `initrd-${arch}`);
+
+    try {
+      downloadIfMissing(
+        `${mirror}/images/pxeboot/vmlinuz`,
+        vmlinuzCache,
+        `Fedora ${arch} kernel`,
+      );
+      downloadIfMissing(
+        `${mirror}/images/pxeboot/initrd.img`,
+        initrdCache,
+        `Fedora ${arch} initrd`,
+      );
+      payloads.push({ arch, vmlinuz: vmlinuzCache, initrd: initrdCache });
+    } catch {
+      logger.warn(`  Fedora ${arch} kernel/initrd not available -- skipping`);
+    }
+  }
+
+  // Write iPXE autoexec script.
+  // Strategy: try DHCP (for machines with UEFI NIC support), then fall back
+  // to booting the embedded kernel/initrd from the ISO filesystem.
+  // iPXE's ${buildarch} resolves to "x86_64" or "arm64".
+  const ipxeScript = [
+    "#!ipxe",
+    "",
+    "echo",
+    "echo =============================================",
+    "echo   Lab PXE Bastion -- ISO Boot",
+    "echo =============================================",
+    "echo",
+    "",
+    "# Try DHCP (works if UEFI has NIC driver / SNP support)",
+    "set attempts:int32 0",
+    ":retry",
+    "dhcp && goto netboot ||",
+    "inc attempts",
+    "iseq ${attempts} 3 || goto retry_wait",
+    "goto localboot",
+    ":retry_wait",
+    "echo DHCP failed (attempt ${attempts}/3), retrying...",
+    "sleep 2",
+    "goto retry",
+    "",
+    "# Network available -- chain to bastion for dynamic dispatch",
+    ":netboot",
+    "echo Network OK. Chaining to bastion...",
+    `chain ${bastionUrl}/boot.ipxe || shell`,
+    "",
+    "# No network -- boot embedded kernel (Linux has its own NIC drivers)",
+    ":localboot",
+    "echo No UEFI network support. Booting embedded installer...",
+    "echo Linux will configure networking with its own drivers.",
+    "echo",
+    "# Map iPXE arch names to Fedora mirror paths (arm64 -> aarch64)",
+    "set fedarch ${buildarch}",
+    "iseq ${buildarch} arm64 && set fedarch aarch64 ||",
+    `kernel file:/vmlinuz-\${buildarch} inst.ks=${bastionUrl}/discover.ks inst.repo=${FEDORA_MIRROR_BASE}/${config.fedoraVersion}/Everything/\${fedarch}/os inst.text || goto no_kernel`,
+    `initrd file:/initrd-\${buildarch} || goto no_kernel`,
+    "boot || shell",
+    "",
+    ":no_kernel",
+    "echo ERROR: kernel not found for this architecture. Dropping to shell.",
+    "shell",
+  ].join("\n");
+
+  writeFileSync(join(work, "autoexec.ipxe"), ipxeScript);
+
+  // Calculate EFI partition size: iPXE binaries + autoexec + kernel/initrd + margin
+  let payloadSize = 2 * 1024 * 1024; // 2MB base for iPXE + autoexec + FAT overhead
+  for (const p of payloads) {
+    payloadSize += statSync(p.vmlinuz).size;
+    payloadSize += statSync(p.initrd).size;
+  }
+  const efiSizeMB = Math.ceil(payloadSize / (1024 * 1024)) + 4; // +4MB margin
+  logger.info(`  EFI partition: ${efiSizeMB}MB (${payloads.length} arch payloads)`);
+
+  // Create FAT EFI system partition
+  const efiImg = join(work, "efi.img");
+  execSync(`dd if=/dev/zero of="${efiImg}" bs=1M count=${efiSizeMB} 2>/dev/null`, { stdio: "pipe" });
+  execSync(`mformat -i "${efiImg}" -v LABBOOT ::`, { stdio: "pipe" });
+  execSync(`mmd -i "${efiImg}" ::/EFI`, { stdio: "pipe" });
+  execSync(`mmd -i "${efiImg}" ::/EFI/BOOT`, { stdio: "pipe" });
+
+  for (const arch of archs) {
+    const paths = IPXE_ISO_PATHS[arch]!;
+    execSync(`mcopy -i "${efiImg}" "${join(work, "EFI", "BOOT", paths.efiName)}" ::/EFI/BOOT/${paths.efiName}`, { stdio: "pipe" });
+  }
+  execSync(`mcopy -i "${efiImg}" "${join(work, "autoexec.ipxe")}" ::/autoexec.ipxe`, { stdio: "pipe" });
+
+  // Copy kernel/initrd onto EFI partition with arch-specific names
+  for (const p of payloads) {
+    // iPXE ${buildarch} returns "x86_64" or "arm64"
+    const archLabel = p.arch === "aarch64" ? "arm64" : p.arch;
+    execSync(`mcopy -i "${efiImg}" "${p.vmlinuz}" ::/vmlinuz-${archLabel}`, { stdio: "pipe" });
+    execSync(`mcopy -i "${efiImg}" "${p.initrd}" ::/initrd-${archLabel}`, { stdio: "pipe" });
+    logger.info(`  Embedded ${archLabel}: vmlinuz + initrd`);
+  }
+
+  // Build hybrid ISO: El Torito EFI boot + GPT EFI partition
+  execSync([
+    `xorriso -as mkisofs`,
+    `-o "${outputPath}"`,
+    `-R`,
+    `-V LAB_BOOT`,
+    `-e efi.img`,
+    `-no-emul-boot`,
+    `-partition_offset 16`,
+    `-append_partition 2 0xEF "${efiImg}"`,
+    `-appended_part_as_gpt`,
+    `"${work}"`,
+  ].join(" "), { stdio: "pipe" });
+
+  rmSync(work, { recursive: true, force: true });
+  logger.info(`Generated boot ISO (${archs.join(", ")}): ${outputPath}`);
+}
+
+/** Compute a short hash of all inputs that affect ISO content. */
+function computeIsoHash(config: BastionConfig): string {
+  const h = createHash("sha256");
+  h.update(`${config.serverIp}:${config.httpPort}`);
+  h.update(config.fedoraVersion);
+  for (const paths of Object.values(IPXE_ISO_PATHS)) {
+    const srcFile = paths.src.find((s) => existsSync(s));
+    if (srcFile) {
+      const st = statSync(srcFile);
+      h.update(`${srcFile}:${st.size}:${st.mtimeMs}`);
+    }
+  }
+  // Include kernel/initrd cache state
+  const cacheDir = join(config.bastionDir, "iso-cache");
+  for (const arch of ["x86_64", "aarch64"]) {
+    const vmlinuz = join(cacheDir, `vmlinuz-${arch}`);
+    if (existsSync(vmlinuz)) {
+      const st = statSync(vmlinuz);
+      h.update(`${vmlinuz}:${st.size}`);
+    }
+  }
+  return h.digest("hex").slice(0, 16);
+}
+
+/**
+ * Ensure boot.iso exists and is up-to-date in httpDir.
+ * Called during startup so @fastify/static can serve it with Range support.
+ */
+export function ensureBootIso(config: BastionConfig): void {
+  const isoPath = join(config.httpDir, "boot.iso");
+  const hashPath = join(config.httpDir, "boot.iso.hash");
+
+  const currentHash = computeIsoHash(config);
+  const cachedHash = existsSync(hashPath) ? readFileSync(hashPath, "utf-8").trim() : "";
+
+  if (existsSync(isoPath) && currentHash === cachedHash) {
+    logger.info("  Boot ISO -- cached (up to date)");
+    return;
+  }
+
+  if (existsSync(isoPath)) {
+    logger.info("  Boot ISO -- inputs changed, regenerating...");
+    try { unlinkSync(isoPath); } catch { /* ignore */ }
+  } else {
+    logger.info("  Boot ISO -- generating...");
+  }
+
+  generateIso(config, isoPath);
+  writeFileSync(hashPath, currentHash);
+}
--- a/bastion/src/bastion/src/routes/dispatch.ts
+++ b/bastion/src/bastion/src/routes/dispatch.ts
@@ -0,0 +1,77 @@
+// iPXE dispatch route.
+// Routes PXE boot requests based on machine state:
+//   - install_queue -> install mode (serve Fedora installer + per-MAC kickstart)
+//   - installed     -> exit (boot from local disk)
+//   - unknown       -> discovery mode (collect hardware, POST to bastion)
+
+import type { FastifyInstance } from "fastify";
+import type { BastionConfig } from "@lab/shared";
+import type { StateManager } from "../services/state.js";
+import {
+  renderDiscoverIpxe,
+  renderInstallIpxe,
+  renderLocalBootIpxe,
+} from "../templates/boot.ipxe.js";
+import { renderUbuntuInstallIpxe } from "../templates/ubuntu-boot.ipxe.js";
+import { logger } from "../services/logger.js";
+
+export function registerDispatchRoutes(
+  app: FastifyInstance,
+  config: BastionConfig,
+  state: StateManager,
+): void {
+  app.get<{ Querystring: { mac?: string } }>("/dispatch", async (request, reply) => {
+    const mac = (request.query.mac ?? "").toLowerCase().replace(/-/g, ":");
+    const currentState = state.load();
+
+    const queueEntry = currentState.install_queue[mac];
+    if (queueEntry) {
+      const hostname = queueEntry.hostname ?? "lab-node";
+      const os = queueEntry.os ?? "fedora-43";
+      logger.info(`INSTALL STARTED: ${mac} -> ${hostname} (${os})`);
+
+      let script: string;
+      if (os.startsWith("ubuntu")) {
+        script = renderUbuntuInstallIpxe({
+          mac,
+          hostname,
+          serverIp: config.serverIp,
+          httpPort: config.httpPort,
+          ubuntuVersion: config.ubuntuVersion,
+        });
+      } else {
+        script = renderInstallIpxe({
+          mac,
+          hostname,
+          serverIp: config.serverIp,
+          httpPort: config.httpPort,
+          fedoraVersion: config.fedoraVersion,
+          fedoraMirror: config.fedoraMirror,
+        });
+      }
+
+      return reply.type("text/plain").send(script);
+    }
+
+    const installedEntry = currentState.installed[mac];
+    if (installedEntry) {
+      const hostname = installedEntry.hostname ?? "?";
+      logger.info(`PXE request from ${mac} (${hostname}) - already installed, booting local disk`);
+
+      const script = renderLocalBootIpxe(hostname);
+      return reply.type("text/plain").send(script);
+    }
+
+    // Unknown MAC -> discovery mode
+    logger.info(`PXE request from ${mac} -> discovery mode`);
+
+    const script = renderDiscoverIpxe({
+      mac,
+      serverIp: config.serverIp,
+      httpPort: config.httpPort,
+      fedoraMirror: config.fedoraMirror,
+    });
+
+    return reply.type("text/plain").send(script);
+  });
+}
--- a/bastion/src/bastion/src/routes/kickstart.ts
+++ b/bastion/src/bastion/src/routes/kickstart.ts
@@ -0,0 +1,71 @@
+// Kickstart generation routes.
+// Serves per-MAC install kickstart, static discovery kickstart,
+// and Ubuntu autoinstall cloud-init endpoints.
+
+import type { FastifyInstance } from "fastify";
+import type { BastionConfig } from "@lab/shared";
+import type { StateManager } from "../services/state.js";
+import { generateInstallKickstart, generateDiscoverKickstart } from "../services/kickstart-generator.js";
+import { renderUbuntuAutoinstall, renderUbuntuMetaData, type UbuntuAutoinstallParams } from "../templates/ubuntu-autoinstall.js";
+
+export function registerKickstartRoutes(
+  app: FastifyInstance,
+  config: BastionConfig,
+  state: StateManager,
+): void {
+  // Per-MAC install kickstart
+  app.get<{ Querystring: { mac?: string } }>("/ks", async (request, reply) => {
+    const mac = (request.query.mac ?? "").toLowerCase().replace(/-/g, ":");
+    const currentState = state.load();
+    const queueEntry = currentState.install_queue[mac];
+
+    const ks = generateInstallKickstart(config, {
+      hostname: queueEntry?.hostname ?? "lab-node",
+      disk: queueEntry?.disk ?? "",
+      role: queueEntry?.role ?? "worker",
+    });
+
+    return reply.type("text/plain").send(ks);
+  });
+
+  // Static discovery kickstart
+  app.get("/discover.ks", async (_request, reply) => {
+    const ks = generateDiscoverKickstart(config);
+    return reply.type("text/plain").send(ks);
+  });
+
+  // Ubuntu autoinstall user-data (cloud-init)
+  app.get<{ Params: { mac: string } }>("/autoinstall/:mac/user-data", async (request, reply) => {
+    const mac = request.params.mac.toLowerCase().replace(/-/g, ":");
+    const currentState = state.load();
+    const queueEntry = currentState.install_queue[mac];
+
+    const aiParams: UbuntuAutoinstallParams = {
+      hostname: queueEntry?.hostname ?? "lab-node",
+      disk: queueEntry?.disk ?? "",
+      role: queueEntry?.role ?? "worker",
+      domain: config.domain,
+      ubuntuVersion: config.ubuntuVersion,
+      timezone: config.timezone,
+      locale: config.locale,
+      serverIp: config.serverIp,
+      httpPort: config.httpPort,
+      sshKeys: config.sshKeys,
+      adminUser: config.adminUser,
+    };
+
+    const userData = renderUbuntuAutoinstall(aiParams);
+    return reply.type("text/plain").send(userData);
+  });
+
+  // Ubuntu autoinstall meta-data (cloud-init)
+  app.get<{ Params: { mac: string } }>("/autoinstall/:mac/meta-data", async (request, reply) => {
+    const mac = request.params.mac.toLowerCase().replace(/-/g, ":");
+    const currentState = state.load();
+    const queueEntry = currentState.install_queue[mac];
+    const hostname = queueEntry?.hostname ?? "lab-node";
+
+    const metaData = renderUbuntuMetaData(hostname);
+    return reply.type("text/plain").send(metaData);
+  });
+}
--- a/bastion/src/bastion/src/server.ts
+++ b/bastion/src/bastion/src/server.ts
@@ -0,0 +1,69 @@
+// Fastify application setup with all routes registered.
+
+import Fastify from "fastify";
+import fastifyStatic from "@fastify/static";
+import { mkdirSync, existsSync } from "node:fs";
+import type { BastionConfig } from "@lab/shared";
+import { StateManager } from "./services/state.js";
+import { InstallLogBuffer } from "./services/install-log.js";
+import { SyslogListener } from "./services/syslog-listener.js";
+import { logger } from "./services/logger.js";
+import { registerDispatchRoutes } from "./routes/dispatch.js";
+import { registerKickstartRoutes } from "./routes/kickstart.js";
+import { registerApiRoutes } from "./routes/api.js";
+
+
+export function createApp(config: BastionConfig): { app: ReturnType<typeof Fastify>; state: StateManager; installLog: InstallLogBuffer; syslog: SyslogListener } {
+  const app = Fastify({
+    logger: false, // We use winston instead
+  });
+
+  const state = new StateManager(config.stateFile);
+  state.init();
+
+  const installLog = new InstallLogBuffer(config.bastionDir);
+  const syslog = new SyslogListener(config.syslogPort, installLog, state);
+
+  // Serve static files (vmlinuz, initrd.img, iPXE binaries) from the HTTP directory
+  mkdirSync(config.httpDir, { recursive: true });
+  app.register(fastifyStatic, {
+    root: config.httpDir,
+    prefix: "/",
+    decorateReply: false,
+  });
+
+  // Also serve TFTP files (iPXE EFI binaries) over HTTP for UEFI HTTP Boot
+  if (existsSync(config.tftpDir)) {
+    app.register(fastifyStatic, {
+      root: config.tftpDir,
+      prefix: "/tftp/",
+      decorateReply: false,
+    });
+  }
+
+  // Register route handlers
+  registerDispatchRoutes(app, config, state);
+  registerKickstartRoutes(app, config, state);
+  registerApiRoutes(app, state, installLog);
+  // boot.iso is generated at startup and served as a static file from httpDir
+  // (static serving supports HTTP Range requests, required by JetKVM streaming)
+
+  // Log all requests
+  app.addHook("onRequest", async (request) => {
+    logger.info(`HTTP: ${request.ip} ${request.method} ${request.url}`);
+  });
+
+  return { app, state, installLog, syslog };
+}
+
+export async function startServer(config: BastionConfig): Promise<void> {
+  const { app } = createApp(config);
+
+  try {
+    await app.listen({ port: config.httpPort, host: "0.0.0.0" });
+    logger.info(`HTTP server listening on :${config.httpPort}`);
+  } catch (err) {
+    logger.error(`Failed to start HTTP server: ${err instanceof Error ? err.message : String(err)}`);
+    throw err;
+  }
+}
--- a/bastion/src/bastion/src/services/dnsmasq.ts
+++ b/bastion/src/bastion/src/services/dnsmasq.ts
@@ -0,0 +1,70 @@
+// Generate dnsmasq configuration and manage the dnsmasq process lifecycle.
+
+import { writeFileSync, mkdirSync } from "node:fs";
+import { dirname } from "node:path";
+import type { ResultPromise } from "execa";
+import { execa } from "execa";
+import type { BastionConfig } from "@lab/shared";
+import { renderDnsmasqConf } from "../templates/dnsmasq.conf.js";
+import { logger } from "./logger.js";
+
+type DnsmasqProcess = ResultPromise<{ stdout: "pipe"; stderr: "pipe" }>;
+let dnsmasqProcess: DnsmasqProcess | null = null;
+
+/**
+ * Generate the dnsmasq.conf file from the current configuration.
+ */
+export function generateDnsmasqConf(config: BastionConfig): string {
+  const confPath = `${config.bastionDir}/dnsmasq.conf`;
+  const content = renderDnsmasqConf(config);
+  mkdirSync(dirname(confPath), { recursive: true });
+  writeFileSync(confPath, content);
+  logger.info(`Generated dnsmasq config: ${confPath}`);
+  return confPath;
+}
+
+/**
+ * Start dnsmasq in the foreground as a child process.
+ */
+export async function startDnsmasq(config: BastionConfig): Promise<DnsmasqProcess> {
+  const confPath = generateDnsmasqConf(config);
+
+  logger.info(`Starting PXE server (${config.dhcpMode}DHCP on ${config.iface})...`);
+
+  const proc = execa("dnsmasq", ["--no-daemon", `--conf-file=${confPath}`], {
+    stdout: "pipe",
+    stderr: "pipe",
+  });
+
+  dnsmasqProcess = proc;
+
+  proc.stdout?.on("data", (data: Buffer) => {
+    const line = data.toString().trim();
+    if (line) logger.info(`dnsmasq: ${line}`);
+  });
+
+  proc.stderr?.on("data", (data: Buffer) => {
+    const line = data.toString().trim();
+    if (line) logger.info(`dnsmasq: ${line}`);
+  });
+
+  proc.on("exit", (code) => {
+    if (code !== null && code !== 0) {
+      logger.error(`dnsmasq exited with code ${code}. Check if another DHCP/TFTP service is running.`);
+    }
+    dnsmasqProcess = null;
+  });
+
+  return proc;
+}
+
+/**
+ * Stop the running dnsmasq process.
+ */
+export function stopDnsmasq(): void {
+  if (dnsmasqProcess) {
+    logger.info("Stopping dnsmasq...");
+    dnsmasqProcess.kill("SIGTERM");
+    dnsmasqProcess = null;
+  }
+}
--- a/bastion/src/bastion/src/services/install-log.ts
+++ b/bastion/src/bastion/src/services/install-log.ts
@@ -0,0 +1,86 @@
+// Per-machine install log buffer.
+// Stores raw log lines in memory (ring buffer) and persists to disk.
+// Used by /api/log for ingestion and /api/logs/:mac/follow for SSE streaming.
+
+import { mkdirSync, appendFileSync, readFileSync, existsSync } from "node:fs";
+import { join } from "node:path";
+import { progressBus } from "./progress-events.js";
+
+const MAX_LINES_IN_MEMORY = 2000;
+
+export interface LogLine {
+  line: string;
+  timestamp: string;
+}
+
+export class InstallLogBuffer {
+  /** In-memory ring buffer per MAC */
+  private buffers = new Map<string, LogLine[]>();
+  private logDir: string;
+
+  constructor(bastionDir: string) {
+    this.logDir = join(bastionDir, "logs");
+    mkdirSync(this.logDir, { recursive: true });
+  }
+
+  /** Append log lines for a machine. Stores in memory + appends to file. */
+  append(mac: string, lines: string[], hostname?: string): void {
+    const now = new Date().toISOString();
+    const buffer = this.buffers.get(mac) ?? [];
+
+    const newEntries: LogLine[] = lines.map((line) => ({ line, timestamp: now }));
+    buffer.push(...newEntries);
+
+    // Trim to ring buffer size
+    if (buffer.length > MAX_LINES_IN_MEMORY) {
+      buffer.splice(0, buffer.length - MAX_LINES_IN_MEMORY);
+    }
+
+    this.buffers.set(mac, buffer);
+
+    // Persist to file
+    const filePath = this.logFilePath(mac);
+    const fileContent = lines.map((l) => `${now} ${l}`).join("\n") + "\n";
+    appendFileSync(filePath, fileContent);
+
+    // Emit to SSE via progressBus (use "log" stage for log lines)
+    const host = hostname ?? mac;
+    for (const line of lines) {
+      progressBus.emit({
+        mac,
+        hostname: host,
+        stage: "log",
+        detail: line,
+        timestamp: now,
+      });
+    }
+  }
+
+  /** Get buffered log lines for a machine. */
+  getLines(mac: string, offset = 0, limit = 500): LogLine[] {
+    const buffer = this.buffers.get(mac) ?? [];
+    return buffer.slice(offset, offset + limit);
+  }
+
+  /** Get total line count for a machine. */
+  lineCount(mac: string): number {
+    return this.buffers.get(mac)?.length ?? 0;
+  }
+
+  /** Read full log from disk (for machines no longer in memory). */
+  readFromDisk(mac: string): string | null {
+    const filePath = this.logFilePath(mac);
+    if (!existsSync(filePath)) return null;
+    return readFileSync(filePath, "utf-8");
+  }
+
+  /** Clear log for a machine (after install complete or forget). */
+  clear(mac: string): void {
+    this.buffers.delete(mac);
+  }
+
+  private logFilePath(mac: string): string {
+    // Replace colons with dashes for filesystem safety
+    return join(this.logDir, `${mac.replace(/:/g, "-")}.log`);
+  }
+}
--- a/bastion/src/bastion/src/services/iso-builder.ts
+++ b/bastion/src/bastion/src/services/iso-builder.ts
@@ -0,0 +1,437 @@
+// Pure TypeScript UEFI-bootable ISO builder.
+// Creates an ISO 9660 image with an embedded FAT EFI system partition
+// containing iPXE EFI binaries and an autoexec script.
+// No external tools required (no xorriso, mtools).
+
+import { readFileSync } from "node:fs";
+
+const SECTOR_SIZE = 2048; // ISO 9660 logical sector
+const FAT_SECTOR_SIZE = 512;
+
+// --- Utility helpers ---
+
+function asciiPad(s: string, len: number, pad = " "): Buffer {
+  const buf = Buffer.alloc(len, pad.charCodeAt(0));
+  buf.write(s, 0, Math.min(s.length, len), "ascii");
+  return buf;
+}
+
+function u16le(n: number): Buffer {
+  const buf = Buffer.alloc(2);
+  buf.writeUInt16LE(n);
+  return buf;
+}
+
+function u32le(n: number): Buffer {
+  const buf = Buffer.alloc(4);
+  buf.writeUInt32LE(n);
+  return buf;
+}
+
+function u16be(n: number): Buffer {
+  const buf = Buffer.alloc(2);
+  buf.writeUInt16BE(n);
+  return buf;
+}
+
+function u32be(n: number): Buffer {
+  const buf = Buffer.alloc(4);
+  buf.writeUInt32BE(n);
+  return buf;
+}
+
+/** Both-endian 16-bit (ISO 9660 "both-byte" format) */
+function u16both(n: number): Buffer {
+  return Buffer.concat([u16le(n), u16be(n)]);
+}
+
+/** Both-endian 32-bit */
+function u32both(n: number): Buffer {
+  return Buffer.concat([u32le(n), u32be(n)]);
+}
+
+function isoDate(d: Date): Buffer {
+  // ISO 9660 date: 17 bytes ASCII "YYYYMMDDHHMMSSCC" + timezone offset
+  const s =
+    d.getUTCFullYear().toString().padStart(4, "0") +
+    (d.getUTCMonth() + 1).toString().padStart(2, "0") +
+    d.getUTCDate().toString().padStart(2, "0") +
+    d.getUTCHours().toString().padStart(2, "0") +
+    d.getUTCMinutes().toString().padStart(2, "0") +
+    d.getUTCSeconds().toString().padStart(2, "0") +
+    "00"; // hundredths
+  const buf = Buffer.alloc(17, 0);
+  buf.write(s, 0, 16, "ascii");
+  buf[16] = 0; // UTC offset (0 = UTC)
+  return buf;
+}
+
+function dirRecordDate(d: Date): Buffer {
+  // 7-byte recording date
+  const buf = Buffer.alloc(7, 0);
+  buf[0] = d.getUTCFullYear() - 1900;
+  buf[1] = d.getUTCMonth() + 1;
+  buf[2] = d.getUTCDate();
+  buf[3] = d.getUTCHours();
+  buf[4] = d.getUTCMinutes();
+  buf[5] = d.getUTCSeconds();
+  buf[6] = 0; // UTC
+  return buf;
+}
+
+// --- FAT12 filesystem builder ---
+
+function buildFatImage(files: Array<{ path: string; data: Buffer }>): Buffer {
+  // Build a minimal FAT12 filesystem in memory
+  // Layout: BPB | FAT | FAT copy | Root dir | Data clusters
+
+  const bytesPerSector = FAT_SECTOR_SIZE;
+  const sectorsPerCluster = 4; // 2KB clusters
+  const clusterSize = bytesPerSector * sectorsPerCluster;
+  const reservedSectors = 1;
+  const numFats = 2;
+  const rootEntryCount = 64; // 64 * 32 = 2048 bytes = 4 sectors
+  const rootDirSectors = Math.ceil((rootEntryCount * 32) / bytesPerSector);
+
+  // Calculate data size needed
+  let totalDataBytes = 0;
+  for (const f of files) totalDataBytes += Math.ceil(f.data.length / clusterSize) * clusterSize;
+  // Add directory clusters for EFI and EFI/BOOT
+  totalDataBytes += clusterSize * 2;
+
+  const dataClusters = Math.ceil(totalDataBytes / clusterSize) + 2; // +2 safety
+  const fatEntries = dataClusters + 2; // clusters start at 2
+  const fatBytes = Math.ceil((fatEntries * 3) / 2); // FAT12: 1.5 bytes per entry
+  const sectorsPerFat = Math.ceil(fatBytes / bytesPerSector);
+
+  const totalSectors = reservedSectors + (numFats * sectorsPerFat) + rootDirSectors + (dataClusters * sectorsPerCluster);
+  const image = Buffer.alloc(totalSectors * bytesPerSector, 0);
+
+  // --- BPB (BIOS Parameter Block) ---
+  image[0] = 0xEB; image[1] = 0x3C; image[2] = 0x90; // Jump + NOP
+  image.write("LABCTL  ", 3, 8, "ascii"); // OEM
+  image.writeUInt16LE(bytesPerSector, 11);
+  image[13] = sectorsPerCluster;
+  image.writeUInt16LE(reservedSectors, 14);
+  image[16] = numFats;
+  image.writeUInt16LE(rootEntryCount, 17);
+  image.writeUInt16LE(totalSectors < 0x10000 ? totalSectors : 0, 19);
+  image[21] = 0xF0; // media descriptor (removable)
+  image.writeUInt16LE(sectorsPerFat, 22);
+  image.writeUInt16LE(1, 24); // sectors per track
+  image.writeUInt16LE(1, 26); // heads
+  image[38] = 0x29; // Extended boot sig
+  image.writeUInt32LE(0x12345678, 39); // volume serial
+  image.write("IPXE BOOT  ", 43, 11, "ascii"); // volume label
+  image.write("FAT12   ", 54, 8, "ascii"); // filesystem type
+  image[510] = 0x55; image[511] = 0xAA; // Boot signature
+
+  // --- FAT table ---
+  const fatOffset = reservedSectors * bytesPerSector;
+  const rootDirOffset = fatOffset + (numFats * sectorsPerFat * bytesPerSector);
+  const dataOffset = rootDirOffset + (rootDirSectors * bytesPerSector);
+
+  // FAT12 helper: write a 12-bit entry
+  function fatSet(fat: number, cluster: number, value: number): void {
+    const off = fatOffset + (fat * sectorsPerFat * bytesPerSector);
+    const byteIdx = Math.floor(cluster * 3 / 2);
+    if (cluster % 2 === 0) {
+      image[off + byteIdx] = value & 0xFF;
+      image[off + byteIdx + 1] = (image[off + byteIdx + 1]! & 0xF0) | ((value >> 8) & 0x0F);
+    } else {
+      image[off + byteIdx] = (image[off + byteIdx]! & 0x0F) | ((value & 0x0F) << 4);
+      image[off + byteIdx + 1] = (value >> 4) & 0xFF;
+    }
+  }
+
+  // Media descriptor in FAT
+  for (let f = 0; f < numFats; f++) {
+    fatSet(f, 0, 0xFF0);
+    fatSet(f, 1, 0xFFF);
+  }
+
+  let nextCluster = 2;
+
+  function allocClusters(size: number): number {
+    const needed = Math.max(1, Math.ceil(size / clusterSize));
+    const startCluster = nextCluster;
+    for (let i = 0; i < needed; i++) {
+      const c = nextCluster++;
+      const next = (i === needed - 1) ? 0xFFF : c + 1;
+      for (let f = 0; f < numFats; f++) fatSet(f, c, next);
+    }
+    return startCluster;
+  }
+
+  function clusterOffset(cluster: number): number {
+    return dataOffset + (cluster - 2) * clusterSize;
+  }
+
+  function writeDirEntry(dirBuf: Buffer, entryIdx: number, name: string, ext: string, cluster: number, size: number, isDir: boolean): void {
+    const off = entryIdx * 32;
+    dirBuf.write(name.toUpperCase().padEnd(8, " "), off, 8, "ascii");
+    dirBuf.write(ext.toUpperCase().padEnd(3, " "), off + 8, 3, "ascii");
+    dirBuf[off + 11] = isDir ? 0x10 : 0x20; // attributes
+    dirBuf.writeUInt16LE(cluster & 0xFFFF, off + 26); // first cluster low
+    dirBuf.writeUInt32LE(isDir ? 0 : size, off + 28); // file size
+  }
+
+  // --- Create directory structure ---
+  // Root: EFI dir + autoexec.ipxe
+  // EFI: BOOT dir
+  // BOOT: BOOTX64.EFI, BOOTAA64.EFI
+
+  // EFI directory cluster
+  const efiDirCluster = allocClusters(clusterSize);
+  const efiDirBuf = Buffer.alloc(clusterSize, 0);
+
+  // BOOT directory cluster
+  const bootDirCluster = allocClusters(clusterSize);
+  const bootDirBuf = Buffer.alloc(clusterSize, 0);
+
+  // Write . and .. entries for EFI
+  writeDirEntry(efiDirBuf, 0, ".", "", efiDirCluster, 0, true);
+  writeDirEntry(efiDirBuf, 1, "..", "", 0, 0, true);
+  // BOOT subdir in EFI
+  writeDirEntry(efiDirBuf, 2, "BOOT", "", bootDirCluster, 0, true);
+
+  // Write . and .. entries for BOOT
+  writeDirEntry(bootDirBuf, 0, ".", "", bootDirCluster, 0, true);
+  writeDirEntry(bootDirBuf, 1, "..", "", efiDirCluster, 0, true);
+
+  let bootEntryIdx = 2;
+
+  // Root directory entries
+  let rootEntryIdx = 0;
+  // Volume label
+  const rootBuf = image.subarray(rootDirOffset, rootDirOffset + rootDirSectors * bytesPerSector);
+  rootBuf.write("IPXE BOOT  ", rootEntryIdx * 32, 11, "ascii");
+  rootBuf[rootEntryIdx * 32 + 11] = 0x08; // volume label attribute
+  rootEntryIdx++;
+
+  // EFI directory in root
+  writeDirEntry(rootBuf, rootEntryIdx++, "EFI", "", efiDirCluster, 0, true);
+
+  // Write files
+  for (const file of files) {
+    const parts = file.path.toUpperCase().split("/").filter(Boolean);
+    const fileName = parts[parts.length - 1]!;
+    const nameParts = fileName.split(".");
+    const name = nameParts[0]!.substring(0, 8);
+    const ext = (nameParts[1] ?? "").substring(0, 3);
+
+    const fileCluster = allocClusters(file.data.length);
+    file.data.copy(image, clusterOffset(fileCluster));
+
+    if (parts.length === 1) {
+      // Root level file
+      writeDirEntry(rootBuf, rootEntryIdx++, name, ext, fileCluster, file.data.length, false);
+    } else if (parts.length === 3 && parts[0] === "EFI" && parts[1] === "BOOT") {
+      // EFI/BOOT/ file
+      writeDirEntry(bootDirBuf, bootEntryIdx++, name, ext, fileCluster, file.data.length, false);
+    }
+  }
+
+  // Write directory clusters to image
+  efiDirBuf.copy(image, clusterOffset(efiDirCluster));
+  bootDirBuf.copy(image, clusterOffset(bootDirCluster));
+
+  return image;
+}
+
+// --- ISO 9660 builder ---
+
+export function buildBootIso(efiFiles: Array<{ path: string; data: Buffer }>, scriptContent?: string): Buffer {
+  const now = new Date();
+
+  // Build FAT image with all files
+  const allFiles = [...efiFiles];
+  if (scriptContent) {
+    allFiles.push({ path: "autoexec.ipxe", data: Buffer.from(scriptContent, "utf-8") });
+  }
+  const fatImage = buildFatImage(allFiles);
+
+  // ISO layout:
+  // Sector 0-15: System area (unused)
+  // Sector 16: Primary Volume Descriptor
+  // Sector 17: Boot Record Volume Descriptor (El Torito)
+  // Sector 18: Volume Descriptor Set Terminator
+  // Sector 19: Root directory record
+  // Sector 20: El Torito boot catalog
+  // Sector 21: El Torito boot image (the FAT image, this gets large)
+  // After FAT: EFI boot image reference for files visible in ISO
+
+  const fatSectors = Math.ceil(fatImage.length / SECTOR_SIZE);
+  const rootDirSector = 19;
+  const bootCatalogSector = 20;
+  const efiImageSector = 21;
+  const totalSectors = efiImageSector + fatSectors + 1;
+
+  const iso = Buffer.alloc(totalSectors * SECTOR_SIZE, 0);
+
+  // --- Primary Volume Descriptor (sector 16) ---
+  const pvd = iso.subarray(16 * SECTOR_SIZE, 17 * SECTOR_SIZE);
+  pvd[0] = 1; // type: Primary
+  pvd.write("CD001", 1, 5, "ascii"); // standard identifier
+  pvd[6] = 1; // version
+  asciiPad("LABCTL", 32).copy(pvd, 8); // system identifier
+  asciiPad("IPXE_BOOT", 32).copy(pvd, 40); // volume identifier
+  u32both(totalSectors).copy(pvd, 80); // volume space size
+  u16both(1).copy(pvd, 120); // volume set size
+  u16both(1).copy(pvd, 124); // volume sequence number
+  u16both(SECTOR_SIZE).copy(pvd, 128); // logical block size
+
+  // Root directory record (34 bytes)
+  const rootRec = Buffer.alloc(34, 0);
+  rootRec[0] = 34; // length
+  rootRec[1] = 0; // extended attribute length
+  u32both(rootDirSector).copy(rootRec, 2); // extent location
+  u32both(SECTOR_SIZE).copy(rootRec, 10); // data length
+  dirRecordDate(now).copy(rootRec, 18);
+  rootRec[25] = 0x02; // flags: directory
+  rootRec[28] = 1; // file unit size
+  u16both(1).copy(rootRec, 30); // volume sequence
+  rootRec[32] = 1; // name length
+  rootRec[33] = 0; // name: root
+  rootRec.copy(pvd, 156); // copy to PVD
+
+  // Volume dates
+  isoDate(now).copy(pvd, 813); // creation
+  isoDate(now).copy(pvd, 830); // modification
+  Buffer.alloc(17, 0x30).copy(pvd, 847); // expiration (none)
+  isoDate(now).copy(pvd, 864); // effective
+  pvd[881] = 1; // file structure version
+
+  // --- Boot Record Volume Descriptor (El Torito, sector 17) ---
+  const brvd = iso.subarray(17 * SECTOR_SIZE, 18 * SECTOR_SIZE);
+  brvd[0] = 0; // type: Boot Record
+  brvd.write("CD001", 1, 5, "ascii");
+  brvd[6] = 1; // version
+  brvd.write("EL TORITO SPECIFICATION", 7, 32, "ascii");
+  u32le(bootCatalogSector).copy(brvd, 0x47); // boot catalog pointer
+
+  // --- Volume Descriptor Set Terminator (sector 18) ---
+  const vdst = iso.subarray(18 * SECTOR_SIZE, 19 * SECTOR_SIZE);
+  vdst[0] = 255; // type: terminator
+  vdst.write("CD001", 1, 5, "ascii");
+  vdst[6] = 1;
+
+  // --- Root Directory (sector 19) ---
+  const rootDir = iso.subarray(rootDirSector * SECTOR_SIZE, (rootDirSector + 1) * SECTOR_SIZE);
+  let offset = 0;
+
+  // "." entry
+  const dotRec = Buffer.alloc(34, 0);
+  dotRec[0] = 34;
+  u32both(rootDirSector).copy(dotRec, 2);
+  u32both(SECTOR_SIZE).copy(dotRec, 10);
+  dirRecordDate(now).copy(dotRec, 18);
+  dotRec[25] = 0x02;
+  u16both(1).copy(dotRec, 28);
+  dotRec[32] = 1;
+  dotRec[33] = 0;
+  dotRec.copy(rootDir, offset);
+  offset += 34;
+
+  // ".." entry
+  const dotdotRec = Buffer.alloc(34, 0);
+  dotdotRec[0] = 34;
+  u32both(rootDirSector).copy(dotdotRec, 2);
+  u32both(SECTOR_SIZE).copy(dotdotRec, 10);
+  dirRecordDate(now).copy(dotdotRec, 18);
+  dotdotRec[25] = 0x02;
+  u16both(1).copy(dotdotRec, 28);
+  dotdotRec[32] = 1;
+  dotdotRec[33] = 1;
+  dotdotRec.copy(rootDir, offset);
+  offset += 34;
+
+  // EFI boot image file entry (the FAT image visible as a file)
+  const efiFileName = "EFI.IMG;1";
+  const efiRec = Buffer.alloc(33 + efiFileName.length + ((efiFileName.length % 2 === 0) ? 1 : 0), 0);
+  efiRec[0] = efiRec.length;
+  u32both(efiImageSector).copy(efiRec, 2);
+  u32both(fatImage.length).copy(efiRec, 10);
+  dirRecordDate(now).copy(efiRec, 18);
+  efiRec[25] = 0x00; // flags: file
+  u16both(1).copy(efiRec, 28);
+  efiRec[32] = efiFileName.length;
+  efiRec.write(efiFileName, 33, efiFileName.length, "ascii");
+  efiRec.copy(rootDir, offset);
+  offset += efiRec.length;
+
+  // Boot catalog file entry
+  const catFileName = "BOOT.CAT;1";
+  const catRec = Buffer.alloc(33 + catFileName.length + ((catFileName.length % 2 === 0) ? 1 : 0), 0);
+  catRec[0] = catRec.length;
+  u32both(bootCatalogSector).copy(catRec, 2);
+  u32both(SECTOR_SIZE).copy(catRec, 10);
+  dirRecordDate(now).copy(catRec, 18);
+  catRec[25] = 0x01; // flags: hidden
+  u16both(1).copy(catRec, 28);
+  catRec[32] = catFileName.length;
+  catRec.write(catFileName, 33, catFileName.length, "ascii");
+  catRec.copy(rootDir, offset);
+
+  // --- El Torito Boot Catalog (sector 20) ---
+  const catalog = iso.subarray(bootCatalogSector * SECTOR_SIZE, (bootCatalogSector + 1) * SECTOR_SIZE);
+
+  // Validation entry (32 bytes)
+  catalog[0] = 1; // header ID
+  catalog[1] = 0xEF; // platform: EFI
+  catalog.write("LABCTL", 4, 24, "ascii"); // ID string
+  // Calculate checksum for validation entry
+  let cksum = 0;
+  for (let i = 0; i < 32; i += 2) {
+    cksum += catalog[i]! + (catalog[i + 1]! << 8);
+  }
+  catalog.writeUInt16LE((0x10000 - (cksum & 0xFFFF)) & 0xFFFF, 28); // checksum
+  catalog[30] = 0x55;
+  catalog[31] = 0xAA;
+
+  // Default/Initial entry (32 bytes, offset 32)
+  catalog[32] = 0x88; // bootable
+  catalog[33] = 0xEF; // type: EFI
+  catalog.writeUInt16LE(0, 34); // load segment
+  catalog[36] = 0; // system type
+  const efiImageSectors512 = Math.ceil(fatImage.length / FAT_SECTOR_SIZE);
+  catalog.writeUInt16LE(efiImageSectors512 & 0xFFFF, 38); // sector count
+  catalog.writeUInt32LE(efiImageSector, 40); // load LBA
+
+  // --- EFI boot image (FAT filesystem, starting at sector 21) ---
+  fatImage.copy(iso, efiImageSector * SECTOR_SIZE);
+
+  return iso;
+}
+
+/** Build a ready-to-serve iPXE boot ISO from system iPXE binaries. */
+export function buildBastionBootIso(bastionUrl: string): Buffer {
+  const efiFiles: Array<{ path: string; data: Buffer }> = [];
+
+  const PATHS: Record<string, { src: string; dest: string }> = {
+    x86_64: { src: "/usr/share/ipxe/ipxe-snponly-x86_64.efi", dest: "EFI/BOOT/BOOTX64.EFI" },
+    aarch64: { src: "/usr/share/ipxe/arm64-efi/snponly.efi", dest: "EFI/BOOT/BOOTAA64.EFI" },
+  };
+
+  for (const [, paths] of Object.entries(PATHS)) {
+    try {
+      efiFiles.push({ path: paths.dest, data: readFileSync(paths.src) });
+    } catch {
+      // Architecture not available, skip
+    }
+  }
+
+  if (efiFiles.length === 0) {
+    throw new Error("No iPXE EFI binaries found on system");
+  }
+
+  const script = [
+    "#!ipxe",
+    "",
+    "echo Booting from iPXE ISO -- connecting to bastion...",
+    "dhcp || ( echo DHCP failed, retrying... && sleep 3 && dhcp )",
+    `chain ${bastionUrl}/boot.ipxe || shell`,
+  ].join("\n");
+
+  return buildBootIso(efiFiles, script);
+}
--- a/bastion/src/bastion/src/services/kickstart-generator.ts
+++ b/bastion/src/bastion/src/services/kickstart-generator.ts
@@ -0,0 +1,45 @@
+// Generate kickstart content for discovery and install modes.
+// Uses template literal functions -- no external template engine.
+
+import type { BastionConfig, Role } from "@lab/shared";
+import { renderDiscoverKickstart } from "../templates/discover.ks.js";
+import { renderInstallKickstart, type InstallKickstartParams } from "../templates/install.ks.js";
+
+/**
+ * Generate a discovery kickstart that collects hardware info and POSTs to bastion.
+ */
+export function generateDiscoverKickstart(config: BastionConfig): string {
+  return renderDiscoverKickstart({
+    serverIp: config.serverIp,
+    httpPort: config.httpPort,
+  });
+}
+
+/**
+ * Generate an install kickstart with LVM partitioning, packages, and post-install configuration.
+ */
+export function generateInstallKickstart(
+  config: BastionConfig,
+  params: {
+    hostname: string;
+    disk: string;
+    role: Role;
+  },
+): string {
+  const ksParams: InstallKickstartParams = {
+    hostname: params.hostname,
+    disk: params.disk,
+    role: params.role,
+    domain: config.domain,
+    fedoraVersion: config.fedoraVersion,
+    timezone: config.timezone,
+    locale: config.locale,
+    serverIp: config.serverIp,
+    httpPort: config.httpPort,
+    syslogPort: config.syslogPort,
+    sshKeys: config.sshKeys,
+    adminUser: config.adminUser,
+  };
+
+  return renderInstallKickstart(ksParams);
+}
--- a/bastion/src/bastion/src/services/labd-connection.ts
+++ b/bastion/src/bastion/src/services/labd-connection.ts
@@ -0,0 +1,252 @@
+// WebSocket connection from bastion to labd for registration and state sync.
+// If LABD_URL is configured, bastion registers with labd on startup and pushes
+// state changes. If not configured, bastion runs standalone (backward compatible).
+
+import WebSocket from "ws";
+import { readFileSync, writeFileSync, existsSync } from "node:fs";
+import { hostname as osHostname } from "node:os";
+import type { BastionState, BastionConfig } from "@lab/shared";
+import {
+  type BastionMessage,
+  type LabdBastionMessage,
+  isLabdBastionMessage,
+} from "@lab/shared";
+import { logger } from "./logger.js";
+
+const HEARTBEAT_INTERVAL_MS = 10_000;
+const RECONNECT_BASE_DELAY_MS = 1_000;
+const RECONNECT_MAX_DELAY_MS = 30_000;
+
+type CommandHandler = (msg: LabdBastionMessage) => Promise<{ status: "ok" | "error"; data?: unknown; error?: string }>;
+
+export class BastionConnection {
+  private ws: WebSocket | null = null;
+  private bastionId: string | null = null;
+  private heartbeatTimer: NodeJS.Timeout | null = null;
+  private reconnectTimer: NodeJS.Timeout | null = null;
+  private retryCount = 0;
+  private closed = false;
+  private startTime = Date.now();
+  private commandHandlers = new Map<string, CommandHandler>();
+
+  constructor(
+    private readonly config: BastionConfig,
+    private readonly getState: () => BastionState,
+  ) {
+    // Load persisted bastionId if we've enrolled before
+    const idFile = `${config.bastionDir}/bastion-id`;
+    if (existsSync(idFile)) {
+      this.bastionId = readFileSync(idFile, "utf-8").trim();
+    }
+  }
+
+  /** Register a handler for incoming commands from labd. */
+  onCommand(type: string, handler: CommandHandler): void {
+    this.commandHandlers.set(type, handler);
+  }
+
+  connect(): void {
+    if (this.closed) return;
+    if (!this.config.labdUrl) return;
+
+    const wsUrl = this.config.labdUrl
+      .replace(/^https:/, "wss:")
+      .replace(/^http:/, "ws:");
+
+    const token = this.config.bastionJoinToken ?? "";
+    const url = `${wsUrl}/ws/bastion?token=${encodeURIComponent(token)}`;
+
+    logger.info(`Connecting to labd at ${this.config.labdUrl}...`);
+
+    this.ws = new WebSocket(url);
+
+    this.ws.on("open", () => {
+      logger.info("Connected to labd");
+      this.retryCount = 0;
+
+      // Send enrollment or re-registration
+      if (this.bastionId) {
+        // Already enrolled — send state sync immediately
+        this.sendStateSync();
+      } else {
+        // First time — enroll
+        this.send({
+          type: "bastion-enroll",
+          token,
+          hostname: osHostname(),
+          network: this.config.network,
+          serverIp: this.config.serverIp,
+        });
+      }
+
+      this.startHeartbeat();
+    });
+
+    this.ws.on("message", (data: WebSocket.Data) => {
+      try {
+        const raw = data.toString();
+        const msg: unknown = JSON.parse(raw);
+
+        if (!isLabdBastionMessage(msg)) {
+          logger.warn(`Unknown message from labd: ${(msg as { type?: string }).type}`);
+          return;
+        }
+
+        this.handleMessage(msg);
+      } catch (err) {
+        logger.error(`Failed to parse labd message: ${err instanceof Error ? err.message : String(err)}`);
+      }
+    });
+
+    this.ws.on("close", () => {
+      logger.warn("Disconnected from labd");
+      this.stopHeartbeat();
+      this.scheduleReconnect();
+    });
+
+    this.ws.on("error", (err) => {
+      logger.error(`WebSocket error: ${err.message}`);
+      // close event will fire after this, triggering reconnect
+    });
+  }
+
+  /** Push current state to labd. Call this after any state change. */
+  syncState(): void {
+    if (!this.bastionId || !this.ws || this.ws.readyState !== WebSocket.OPEN) return;
+    this.sendStateSync();
+  }
+
+  /** Forward a progress event to labd. */
+  sendProgress(mac: string, stage: string, detail: string): void {
+    if (!this.bastionId || !this.ws || this.ws.readyState !== WebSocket.OPEN) return;
+    this.send({
+      type: "bastion-progress",
+      bastionId: this.bastionId,
+      mac,
+      stage,
+      detail,
+      timestamp: new Date().toISOString(),
+    });
+  }
+
+  close(): void {
+    this.closed = true;
+    this.stopHeartbeat();
+    if (this.reconnectTimer) {
+      clearTimeout(this.reconnectTimer);
+      this.reconnectTimer = null;
+    }
+    if (this.ws) {
+      this.ws.close();
+      this.ws = null;
+    }
+  }
+
+  private handleMessage(msg: LabdBastionMessage): void {
+    switch (msg.type) {
+      case "bastion-enrolled":
+        this.bastionId = msg.bastionId;
+        // Persist for reconnects
+        writeFileSync(`${this.config.bastionDir}/bastion-id`, msg.bastionId);
+        logger.info(`Enrolled with labd as bastion ${msg.bastionId}`);
+        // Send initial state
+        this.sendStateSync();
+        break;
+
+      case "bastion-heartbeat-ack":
+        // No-op, confirms labd is alive
+        break;
+
+      case "server-shutdown":
+        logger.info(`labd shutting down, will reconnect in ${msg.reconnectAfter}ms`);
+        break;
+
+      case "command-install":
+      case "command-forget":
+      case "command-role-update":
+        void this.handleCommand(msg);
+        break;
+    }
+  }
+
+  private async handleCommand(msg: LabdBastionMessage & { requestId: string }): Promise<void> {
+    const handler = this.commandHandlers.get(msg.type);
+    if (!handler) {
+      this.send({
+        type: "command-response",
+        requestId: msg.requestId,
+        status: "error",
+        error: `No handler for command: ${msg.type}`,
+      });
+      return;
+    }
+
+    try {
+      const result = await handler(msg);
+      this.send({
+        type: "command-response",
+        requestId: msg.requestId,
+        ...result,
+      });
+    } catch (err) {
+      this.send({
+        type: "command-response",
+        requestId: msg.requestId,
+        status: "error",
+        error: err instanceof Error ? err.message : String(err),
+      });
+    }
+  }
+
+  private sendStateSync(): void {
+    if (!this.bastionId) return;
+    this.send({
+      type: "bastion-state-sync",
+      bastionId: this.bastionId,
+      state: this.getState(),
+    });
+  }
+
+  private startHeartbeat(): void {
+    this.stopHeartbeat();
+    this.heartbeatTimer = setInterval(() => {
+      if (!this.bastionId) return;
+      const state = this.getState();
+      const machineCount =
+        Object.keys(state.discovered).length +
+        Object.keys(state.install_queue).length +
+        Object.keys(state.installed).length;
+
+      this.send({
+        type: "bastion-heartbeat",
+        bastionId: this.bastionId,
+        uptime: Math.floor((Date.now() - this.startTime) / 1000),
+        machineCount,
+      });
+    }, HEARTBEAT_INTERVAL_MS);
+  }
+
+  private stopHeartbeat(): void {
+    if (this.heartbeatTimer) {
+      clearInterval(this.heartbeatTimer);
+      this.heartbeatTimer = null;
+    }
+  }
+
+  private scheduleReconnect(): void {
+    if (this.closed) return;
+    const delay = Math.min(
+      RECONNECT_BASE_DELAY_MS * Math.pow(2, this.retryCount),
+      RECONNECT_MAX_DELAY_MS,
+    );
+    this.retryCount++;
+    logger.info(`Reconnecting to labd in ${delay}ms (attempt ${this.retryCount})...`);
+    this.reconnectTimer = setTimeout(() => this.connect(), delay);
+  }
+
+  private send(msg: BastionMessage): void {
+    if (this.ws && this.ws.readyState === WebSocket.OPEN) {
+      this.ws.send(JSON.stringify(msg));
+    }
+  }
+}
--- a/bastion/src/bastion/src/services/logger.ts
+++ b/bastion/src/bastion/src/services/logger.ts
@@ -0,0 +1,17 @@
+// Winston logger instance shared across the bastion application.
+
+import winston from "winston";
+
+export const logger = winston.createLogger({
+  level: "info",
+  format: winston.format.combine(
+    winston.format.timestamp({ format: "HH:mm:ss" }),
+    winston.format.printf(({ timestamp, level, message }) => {
+      const prefix = level === "error" ? "\x1b[31m[bastion]\x1b[0m"
+        : level === "warn" ? "\x1b[33m[bastion]\x1b[0m"
+        : "\x1b[32m[bastion]\x1b[0m";
+      return `${prefix} ${timestamp as string} ${message as string}`;
+    }),
+  ),
+  transports: [new winston.transports.Console()],
+});
--- a/bastion/src/bastion/src/services/network.ts
+++ b/bastion/src/bastion/src/services/network.ts
@@ -0,0 +1,166 @@
+// Auto-detect network interface, IP, gateway, SSH keys, and admin user.
+
+import { execSync } from "node:child_process";
+import { readFileSync, existsSync, mkdirSync } from "node:fs";
+import { homedir } from "node:os";
+import { join } from "node:path";
+import type { BastionConfig } from "@lab/shared";
+import { logger } from "./logger.js";
+
+/**
+ * Detect the default network interface from the routing table.
+ */
+export function detectInterface(): string {
+  const output = execSync("ip route", { encoding: "utf-8" });
+  const match = output.match(/default\s+.*\s+dev\s+(\S+)/);
+  const ifaceMatch = match?.[1];
+  if (ifaceMatch === undefined) {
+    throw new Error("Cannot detect default network interface");
+  }
+  return ifaceMatch;
+}
+
+/**
+ * Detect the IPv4 address on a given interface.
+ */
+export function detectIp(iface: string): string {
+  const output = execSync(`ip -4 addr show ${iface}`, { encoding: "utf-8" });
+  const match = output.match(/inet\s+(\d+\.\d+\.\d+\.\d+)/);
+  const ipMatch = match?.[1];
+  if (ipMatch === undefined) {
+    throw new Error(`Cannot detect IP on interface ${iface}`);
+  }
+  return ipMatch;
+}
+
+/**
+ * Derive the /24 network address from an IP.
+ */
+export function deriveNetwork(ip: string): string {
+  const parts = ip.split(".");
+  return `${parts[0]}.${parts[1]}.${parts[2]}.0`;
+}
+
+/**
+ * Detect the default gateway.
+ */
+export function detectGateway(): string {
+  const output = execSync("ip route", { encoding: "utf-8" });
+  const match = output.match(/default\s+via\s+(\S+)/);
+  const gwMatch = match?.[1];
+  if (gwMatch === undefined) {
+    throw new Error("Cannot detect default gateway");
+  }
+  return gwMatch;
+}
+
+/**
+ * Collect SSH public keys from the current user's SSH directory.
+ * Sources: authorized_keys, then id_ed25519.pub, id_rsa.pub, id_ecdsa.pub (deduplicated).
+ */
+export function collectSshKeys(bastionDir: string): { keys: string[]; source: string } {
+  const sudoUser = process.env["SUDO_USER"];
+  let realHome: string;
+  if (sudoUser !== undefined) {
+    const passwdEntry = execSync(`getent passwd ${sudoUser}`, { encoding: "utf-8" })
+      .split(":")[5]
+      ?.trim();
+    realHome = passwdEntry !== undefined && passwdEntry !== "" ? passwdEntry : homedir();
+  } else {
+    realHome = homedir();
+  }
+
+  const keys: string[] = [];
+  const fingerprints = new Set<string>();
+  let source = "";
+
+  // Read authorized_keys
+  const authKeysPath = join(realHome, ".ssh", "authorized_keys");
+  if (existsSync(authKeysPath)) {
+    const content = readFileSync(authKeysPath, "utf-8");
+    for (const line of content.split("\n")) {
+      const trimmed = line.trim();
+      if (trimmed && !trimmed.startsWith("#")) {
+        const fp = trimmed.split(/\s+/)[1];
+        if (fp !== undefined && fp !== "" && !fingerprints.has(fp)) {
+          keys.push(trimmed);
+          fingerprints.add(fp);
+        }
+      }
+    }
+    source = authKeysPath;
+  }
+
+  // Also include local pubkey files
+  const pubKeyFiles = ["id_ed25519.pub", "id_rsa.pub", "id_ecdsa.pub"];
+  for (const keyFile of pubKeyFiles) {
+    const keyPath = join(realHome, ".ssh", keyFile);
+    if (existsSync(keyPath)) {
+      const keyData = readFileSync(keyPath, "utf-8").trim();
+      const fp = keyData.split(/\s+/)[1];
+      if (fp !== undefined && fp !== "" && !fingerprints.has(fp)) {
+        keys.push(keyData);
+        fingerprints.add(fp);
+        source = source ? `${source} + ${keyPath}` : keyPath;
+      }
+    }
+  }
+
+  // Generate a keypair if no keys found
+  if (keys.length === 0) {
+    const generatedKey = join(bastionDir, "bastion_ed25519");
+    if (!existsSync(generatedKey)) {
+      mkdirSync(bastionDir, { recursive: true });
+      logger.warn("No SSH keys found -- generating ed25519 keypair...");
+      execSync(`ssh-keygen -t ed25519 -f "${generatedKey}" -N "" -C "bastion-generated@$(hostname)"`, {
+        encoding: "utf-8",
+        stdio: "pipe",
+      });
+    }
+    const pubKey = readFileSync(`${generatedKey}.pub`, "utf-8").trim();
+    keys.push(pubKey);
+    source = `${generatedKey} (generated)`;
+    logger.warn(`Using generated keypair: ${generatedKey}`);
+    logger.warn("Save this private key -- it is the only way to access installed machines.");
+  }
+
+  return { keys, source };
+}
+
+/**
+ * Detect the admin username (SUDO_USER or current user, excluding root).
+ */
+export function detectAdminUser(): string {
+  const user = process.env["SUDO_USER"] ?? process.env["USER"] ?? "";
+  return user === "root" ? "" : user;
+}
+
+/**
+ * Populate runtime network config fields on the config object.
+ */
+export function populateNetworkConfig(config: BastionConfig): BastionConfig {
+  const iface = config.iface !== "" ? config.iface : detectInterface();
+  const serverIp = config.serverIp !== "" ? config.serverIp : detectIp(iface);
+  const network = config.network !== "" ? config.network : deriveNetwork(serverIp);
+  const gateway = config.gateway !== "" ? config.gateway : detectGateway();
+  const { keys: sshKeys, source: sshSource } = config.sshKeys.length > 0
+    ? { keys: config.sshKeys, source: "config" }
+    : collectSshKeys(config.bastionDir);
+  const adminUser = config.adminUser !== "" ? config.adminUser : detectAdminUser();
+
+  logger.info(`Interface: ${iface}  IP: ${serverIp}  Network: ${network}`);
+  logger.info(`SSH keys: ${sshKeys.length} key(s) from ${sshSource}`);
+  if (adminUser !== "") {
+    logger.info(`Admin user: ${adminUser} (will be created on installed machines)`);
+  }
+
+  return {
+    ...config,
+    iface,
+    serverIp,
+    network,
+    gateway,
+    sshKeys,
+    adminUser,
+  };
+}
--- a/bastion/src/bastion/src/services/post-provision.ts
+++ b/bastion/src/bastion/src/services/post-provision.ts
@@ -0,0 +1,233 @@
+// Post-provision automation: installs k3s after OS provisioning completes.
+// Runs asynchronously — does not block the progress callback.
+
+import { spawn } from "node:child_process";
+import { existsSync } from "node:fs";
+import { homedir } from "node:os";
+import { join } from "node:path";
+import { logger } from "./logger.js";
+import { progressBus } from "./progress-events.js";
+
+function findSshKey(): string | undefined {
+  const sudoUser = process.env["SUDO_USER"];
+  const realHome = sudoUser ? join("/home", sudoUser) : homedir();
+  for (const name of ["id_ed25519", "id_ecdsa", "id_rsa"]) {
+    const p = join(realHome, ".ssh", name);
+    if (existsSync(p)) return p;
+  }
+  return undefined;
+}
+
+/** Wait for SSH to become available, with retries. */
+async function waitForSsh(ip: string, user: string, keyPath: string | undefined, timeoutMs: number): Promise<boolean> {
+  const start = Date.now();
+  while (Date.now() - start < timeoutMs) {
+    try {
+      const result = await sshExec(ip, user, "echo ok", keyPath);
+      if (result.includes("ok")) return true;
+    } catch { /* retry */ }
+    await new Promise((r) => setTimeout(r, 5000));
+  }
+  return false;
+}
+
+function sshExec(ip: string, user: string, command: string, keyPath: string | undefined): Promise<string> {
+  return new Promise((resolve, reject) => {
+    const args = [
+      "-o", "StrictHostKeyChecking=no",
+      "-o", "ConnectTimeout=10",
+      "-o", "BatchMode=yes",
+      ...(keyPath ? ["-i", keyPath] : []),
+      `${user}@${ip}`,
+      command,
+    ];
+    const proc = spawn("ssh", args, { stdio: ["ignore", "pipe", "pipe"] });
+    let stdout = "";
+    proc.stdout.on("data", (d: Buffer) => { stdout += d.toString(); });
+    proc.on("close", (code) => {
+      if (code === 0) resolve(stdout);
+      else reject(new Error(`SSH exit ${code}`));
+    });
+    proc.on("error", reject);
+  });
+}
+
+function sshRunStreaming(ip: string, user: string, command: string, keyPath: string | undefined, label: string, mac?: string): Promise<number> {
+  return new Promise((resolve) => {
+    const args = [
+      "-o", "StrictHostKeyChecking=no",
+      "-o", "ConnectTimeout=10",
+      "-o", "BatchMode=yes",
+      ...(keyPath ? ["-i", keyPath] : []),
+      `${user}@${ip}`,
+      command,
+    ];
+    const proc = spawn("ssh", args, { stdio: ["ignore", "pipe", "pipe"] });
+    proc.stdout.on("data", (d: Buffer) => {
+      for (const line of d.toString().split("\n").filter(Boolean)) {
+        logger.info(`[k3s:${label}] ${line}`);
+        if (mac) {
+          progressBus.emit({ mac, hostname: label, stage: "log", detail: `[k3s] ${line}`, timestamp: new Date().toISOString() });
+        }
+      }
+    });
+    proc.stderr.on("data", (d: Buffer) => {
+      for (const line of d.toString().split("\n").filter(Boolean)) {
+        logger.info(`[k3s:${label}] ${line}`);
+        if (mac) {
+          progressBus.emit({ mac, hostname: label, stage: "log", detail: `[k3s] ${line}`, timestamp: new Date().toISOString() });
+        }
+      }
+    });
+    proc.on("close", (code) => resolve(code ?? 1));
+    proc.on("error", () => resolve(1));
+  });
+}
+
+/**
+ * Trigger k3s installation on a freshly provisioned machine.
+ * Runs in the background — logs progress to bastion console and progressBus.
+ */
+export async function triggerPostProvisionK3s(
+  hostname: string,
+  ip: string,
+  role: string,
+  sshUser: string,
+  mac?: string,
+): Promise<void> {
+  const keyPath = findSshKey();
+
+  const emitStage = (stage: string, detail: string): void => {
+    logger.info(`[k3s] ${detail}`);
+    if (mac) {
+      progressBus.emit({ mac, hostname, stage, detail, timestamp: new Date().toISOString() });
+    }
+  };
+
+  emitStage("post-provision", `auto-installing k3s on ${hostname} (${ip}) role=${role}`);
+  emitStage("post-provision", "waiting for SSH (machine may still be rebooting)");
+
+  // Wait up to 5 minutes for SSH (machine just finished kickstart and is rebooting)
+  const sshReady = await waitForSsh(ip, sshUser, keyPath, 300_000);
+  if (!sshReady) {
+    emitStage("error", `SSH not available on ${hostname} (${ip}) after 5 minutes`);
+    logger.error(`[k3s] Run manually: labctl app k3s install ${hostname}`);
+    return;
+  }
+
+  emitStage("post-provision", "SSH ready, installing k3s prerequisites");
+
+  // Step 1: Prerequisites
+  await sshRunStreaming(ip, sshUser, "sudo modprobe br_netfilter overlay 2>/dev/null; sudo swapoff -a", keyPath, hostname, mac);
+
+  // Step 2: Sysctl
+  emitStage("post-provision", "configuring sysctl for k3s");
+  await sshRunStreaming(ip, sshUser, `sudo bash -c 'cat > /etc/sysctl.d/90-k3s.conf << EOF
+net.bridge.bridge-nf-call-iptables=1
+net.bridge.bridge-nf-call-ip6tables=1
+net.ipv4.ip_forward=1
+vm.panic_on_oom=0
+vm.overcommit_memory=1
+kernel.panic=10
+kernel.panic_on_oops=1
+EOF
+sysctl --system > /dev/null'`, keyPath, hostname, mac);
+
+  // Step 3: SELinux + firewalld + stale CNI cleanup
+  emitStage("post-provision", "disabling firewalld and cleaning stale CNI");
+  await sshRunStreaming(ip, sshUser, [
+    "sudo setenforce 0 2>/dev/null || true",
+    "sudo systemctl disable --now firewalld 2>/dev/null || true",
+    "sudo systemctl mask firewalld 2>/dev/null || true",
+    // Clean stale CNI interfaces that conflict with Cilium (flannel.1 uses same vxlan port 8472)
+    "sudo systemctl stop k3s 2>/dev/null || true",
+    "sudo ip link delete flannel.1 2>/dev/null || true",
+    "sudo ip link delete cilium_vxlan 2>/dev/null || true",
+    "sudo ip link delete cilium_host 2>/dev/null || true",
+    "sudo ip link delete cilium_net 2>/dev/null || true",
+    "sudo rm -rf /etc/cni/net.d/* /var/lib/cni/ 2>/dev/null || true",
+  ].join("; "), keyPath, hostname, mac);
+
+  // Step 4: Install k3s
+  // labcontroller extends infra — both are k3s servers
+  const k3sRole = (role === "infra" || role === "labcontroller") ? "server" : "agent";
+  emitStage("post-provision", `installing k3s ${k3sRole}`);
+  const code = await sshRunStreaming(ip, sshUser,
+    `curl -sfL https://get.k3s.io | sudo INSTALL_K3S_EXEC="${k3sRole}" INSTALL_K3S_SKIP_SELINUX_RPM=true sh -`,
+    keyPath, hostname, mac,
+  );
+
+  if (code !== 0) {
+    emitStage("error", `k3s install failed on ${hostname} (exit ${code})`);
+    logger.error(`[k3s] Run manually: labctl app k3s install ${hostname}`);
+    return;
+  }
+
+  // Step 5: Wait for ready
+  emitStage("post-provision", "waiting for k3s node to become Ready");
+  await sshRunStreaming(ip, sshUser,
+    "for i in $(seq 1 60); do sudo k3s kubectl get nodes 2>/dev/null | grep -q Ready && break; sleep 2; done",
+    keyPath, hostname, mac,
+  );
+
+  emitStage("post-provision", `k3s ${k3sRole} installed on ${hostname} (${ip})`);
+
+  // Step 6: Deploy role-specific apps from ROLE_REGISTRY chain
+  const { ROLE_REGISTRY } = await import("@lab/shared");
+  const roleInfo = ROLE_REGISTRY.find((r: { name: string }) => r.name === role);
+
+  if (roleInfo && roleInfo.apps.length > 0) {
+    emitStage("post-provision", `deploying apps: ${roleInfo.apps.join(", ")}`);
+
+    if (roleInfo.apps.includes("cockroachdb") || roleInfo.apps.includes("labd") || roleInfo.apps.includes("bastion")) {
+      // This is a labcontroller — deploy the full stack
+      emitStage("post-provision", `deploying labcontroller stack on ${hostname}`);
+
+      try {
+        const { cockroachDbManifests } = await import("@lab/modules/dist/modules/labcontroller/src/cockroachdb.js");
+        const { labdManifests } = await import("@lab/modules/dist/modules/labcontroller/src/labd.js");
+        const { bastionManifests } = await import("@lab/modules/dist/modules/labcontroller/src/bastion.js");
+
+        const crdb = cockroachDbManifests();
+        const labd = labdManifests({ databaseUrl: crdb.connectionString });
+        const bastion = bastionManifests();
+
+        const manifests = [
+          crdb.namespace, crdb.headlessService, crdb.clientService, crdb.statefulSet,
+          labd.service, labd.deployment,
+          bastion.daemonSet,
+        ];
+
+        for (const manifest of manifests) {
+          const json = JSON.stringify(manifest);
+          const kind = (manifest as { kind?: string }).kind ?? "?";
+          const name = ((manifest as { metadata?: { name?: string } }).metadata)?.name ?? "?";
+          const result = await sshRunStreaming(ip, sshUser,
+            `echo '${json.replace(/'/g, "'\\''")}' | sudo k3s kubectl apply -f -`,
+            keyPath, hostname, mac,
+          );
+          if (result === 0) {
+            emitStage("post-provision", `applied ${kind}/${name}`);
+          } else {
+            emitStage("error", `failed to apply ${kind}/${name}`);
+          }
+        }
+
+        // Init CockroachDB
+        const initJson = JSON.stringify(crdb.initJob);
+        await sshRunStreaming(ip, sshUser,
+          `echo '${initJson.replace(/'/g, "'\\''")}' | sudo k3s kubectl apply -f - 2>/dev/null; sleep 30; sudo k3s kubectl exec cockroachdb-0 -n lab-system -- /cockroach/cockroach sql --insecure -e 'CREATE DATABASE IF NOT EXISTS lab' 2>/dev/null || true`,
+          keyPath, hostname, mac,
+        );
+
+        emitStage("post-provision", `labcontroller stack deployed on ${hostname}`);
+      } catch (err) {
+        const errMsg = err instanceof Error ? err.message : String(err);
+        emitStage("error", `failed to deploy labcontroller stack: ${errMsg}`);
+        logger.error(`[post-provision] Run manually: labctl app labcontroller deploy ${hostname}`);
+      }
+    }
+  }
+
+  emitStage("post-provision", `${hostname} (${ip}) provisioning complete (role: ${role})`);
+}
--- a/bastion/src/bastion/src/services/progress-events.ts
+++ b/bastion/src/bastion/src/services/progress-events.ts
@@ -0,0 +1,28 @@
+// In-memory event bus for provision progress updates.
+// Allows SSE clients to subscribe to real-time progress and log lines.
+
+import { EventEmitter } from "node:events";
+
+export interface ProgressEvent {
+  mac: string;
+  hostname: string;
+  /** "log" for raw log lines, anything else is a progress stage name */
+  stage: string;
+  detail: string;
+  timestamp: string;
+}
+
+// Simple typed wrapper around EventEmitter for progress events.
+const _bus = new EventEmitter();
+
+export const progressBus = {
+  emit(event: ProgressEvent): void {
+    _bus.emit("progress", event);
+  },
+  on(listener: (event: ProgressEvent) => void): void {
+    _bus.on("progress", listener);
+  },
+  off(listener: (event: ProgressEvent) => void): void {
+    _bus.off("progress", listener);
+  },
+};
--- a/bastion/src/bastion/src/services/state.ts
+++ b/bastion/src/bastion/src/services/state.ts
@@ -0,0 +1,69 @@
+// JSON file-backed state management for discovered machines, install queue, and installed machines.
+
+import { readFileSync, writeFileSync, renameSync, mkdirSync } from "node:fs";
+import { dirname } from "node:path";
+import type { BastionState } from "@lab/shared";
+
+// Re-export types for consumers that import from this module
+export type { HardwareInfo, InstallConfig, InstalledInfo, BastionState } from "@lab/shared";
+
+const EMPTY_STATE: BastionState = {
+  discovered: {},
+  install_queue: {},
+  installed: {},
+};
+
+export type StateChangeListener = (state: BastionState) => void;
+
+export class StateManager {
+  private changeListeners: StateChangeListener[] = [];
+
+  constructor(private readonly stateFile: string) {}
+
+  /** Register a listener that fires after every state update. */
+  onChange(listener: StateChangeListener): void {
+    this.changeListeners.push(listener);
+  }
+
+  load(): BastionState {
+    try {
+      const raw = readFileSync(this.stateFile, "utf-8");
+      const parsed = JSON.parse(raw) as Partial<BastionState>;
+      return {
+        discovered: parsed.discovered ?? {},
+        install_queue: parsed.install_queue ?? {},
+        installed: parsed.installed ?? {},
+      };
+    } catch {
+      return { ...EMPTY_STATE };
+    }
+  }
+
+  save(state: BastionState): void {
+    mkdirSync(dirname(this.stateFile), { recursive: true });
+    const tmp = `${this.stateFile}.tmp`;
+    writeFileSync(tmp, JSON.stringify(state, null, 2));
+    renameSync(tmp, this.stateFile);
+  }
+
+  init(): void {
+    try {
+      readFileSync(this.stateFile, "utf-8");
+    } catch {
+      this.save({ ...EMPTY_STATE });
+    }
+  }
+
+  /**
+   * Atomically read, modify, and write state.
+   */
+  update(fn: (state: BastionState) => void): BastionState {
+    const state = this.load();
+    fn(state);
+    this.save(state);
+    for (const listener of this.changeListeners) {
+      try { listener(state); } catch { /* don't let listener errors break state updates */ }
+    }
+    return state;
+  }
+}
--- a/bastion/src/bastion/src/services/syslog-listener.ts
+++ b/bastion/src/bastion/src/services/syslog-listener.ts
@@ -0,0 +1,99 @@
+// UDP syslog listener for receiving Anaconda install logs.
+// Anaconda's `logging --host` sends RFC 3164 syslog over UDP.
+// We parse the messages and route them to InstallLogBuffer.
+
+import { createSocket, type Socket } from "node:dgram";
+import type { InstallLogBuffer } from "./install-log.js";
+import type { StateManager } from "./state.js";
+import { logger } from "./logger.js";
+
+/**
+ * Parse a BSD syslog (RFC 3164) message.
+ * Format: <PRI>TIMESTAMP HOSTNAME APP[PID]: MESSAGE
+ * Anaconda messages look like: <13>Mar 28 19:32:01 anaconda[1234]: some message
+ */
+function parseSyslogLine(raw: string): { program: string; message: string } {
+  // Strip priority: <NN>
+  const noPri = raw.replace(/^<\d+>/, "");
+  // Try to extract program and message after the timestamp + hostname
+  // RFC 3164: "Mon DD HH:MM:SS HOSTNAME PROGRAM[PID]: MESSAGE"
+  const match = noPri.match(/^\w+\s+\d+\s+[\d:]+\s+\S+\s+(\S+?)(?:\[\d+\])?:\s*(.*)/);
+  if (match) {
+    return { program: match[1], message: match[2] };
+  }
+  // Fallback: just return the whole line
+  return { program: "unknown", message: noPri.trim() };
+}
+
+export class SyslogListener {
+  private socket: Socket | null = null;
+  private port: number;
+  private installLog: InstallLogBuffer;
+  private state: StateManager;
+
+  constructor(port: number, installLog: InstallLogBuffer, state: StateManager) {
+    this.port = port;
+    this.installLog = installLog;
+    this.state = state;
+  }
+
+  /** Resolve a source IP to a MAC address using the install queue. */
+  private resolveIpToMac(ip: string): string | null {
+    const currentState = this.state.load();
+
+    // Check install queue — machines being installed have an IP from DHCP
+    for (const [mac, entry] of Object.entries(currentState.install_queue)) {
+      // The progress callback sends IP in "complete" detail, but during install
+      // we need to match by what we know. Check if any progress mentions this IP.
+      if (entry.progress_detail?.includes(ip)) return mac;
+    }
+
+    // Check installed machines
+    for (const [mac, info] of Object.entries(currentState.installed)) {
+      if (info.ip === ip) return mac;
+    }
+
+    return null;
+  }
+
+  /** Resolve a MAC to the hostname from install queue or installed state. */
+  private resolveHostname(mac: string): string {
+    const s = this.state.load();
+    return s.install_queue[mac]?.hostname ?? s.installed[mac]?.hostname ?? mac;
+  }
+
+  start(): void {
+    this.socket = createSocket("udp4");
+
+    this.socket.on("message", (msg, rinfo) => {
+      const raw = msg.toString("utf-8").trim();
+      if (!raw) return;
+
+      const { program, message } = parseSyslogLine(raw);
+      const mac = this.resolveIpToMac(rinfo.address);
+
+      if (mac) {
+        const hostname = this.resolveHostname(mac);
+        const line = program !== "unknown" ? `[${program}] ${message}` : message;
+        this.installLog.append(mac, [line], hostname);
+      }
+      // If we can't resolve the IP, we still log it for debugging
+      // but don't store it in the install log buffer
+    });
+
+    this.socket.on("error", (err) => {
+      logger.error(`Syslog listener error: ${err.message}`);
+    });
+
+    this.socket.bind(this.port, "0.0.0.0", () => {
+      logger.info(`Syslog listener on UDP :${this.port}`);
+    });
+  }
+
+  stop(): void {
+    if (this.socket) {
+      this.socket.close();
+      this.socket = null;
+    }
+  }
+}
--- a/bastion/src/bastion/src/templates/boot.ipxe.ts
+++ b/bastion/src/bastion/src/templates/boot.ipxe.ts
@@ -0,0 +1,93 @@
+// iPXE boot script templates for dispatch routing.
+
+export interface BootIpxeParams {
+  serverIp: string;
+  httpPort: number;
+}
+
+/**
+ * Initial iPXE boot script that chains to the dispatch endpoint.
+ * This is what dnsmasq serves to iPXE clients via HTTP.
+ */
+export function renderBootIpxe(params: BootIpxeParams): string {
+  return `#!ipxe
+
+echo
+echo ============================================
+echo   Lab PXE Bastion
+echo   Contacting server for instructions...
+echo ============================================
+echo
+
+chain http://${params.serverIp}:${params.httpPort}/dispatch?mac=\${net0/mac}
+`;
+}
+
+/**
+ * iPXE script for discovery mode -- boots Fedora installer with discovery kickstart.
+ */
+export function renderDiscoverIpxe(params: {
+  mac: string;
+  serverIp: string;
+  httpPort: number;
+  fedoraMirror: string;
+}): string {
+  return `#!ipxe
+
+echo
+echo =============================================
+echo   Lab PXE Bastion - DISCOVERY MODE
+echo   MAC: ${params.mac}
+echo   Collecting hardware info...
+echo =============================================
+echo
+
+kernel http://${params.serverIp}:${params.httpPort}/vmlinuz inst.ks=http://${params.serverIp}:${params.httpPort}/discover.ks inst.stage2=${params.fedoraMirror} inst.text console=ttyS0,115200n8 console=tty0
+initrd http://${params.serverIp}:${params.httpPort}/initrd.img
+boot
+`;
+}
+
+/**
+ * iPXE script for install mode -- boots Fedora installer with per-MAC kickstart.
+ */
+export function renderInstallIpxe(params: {
+  mac: string;
+  hostname: string;
+  serverIp: string;
+  httpPort: number;
+  fedoraVersion: string;
+  fedoraMirror: string;
+}): string {
+  return `#!ipxe
+
+echo
+echo =============================================
+echo   Lab PXE Bastion - INSTALLING Fedora ${params.fedoraVersion}
+echo   Target: ${params.hostname}
+echo   MAC:    ${params.mac}
+echo =============================================
+echo
+
+kernel http://${params.serverIp}:${params.httpPort}/vmlinuz inst.ks=http://${params.serverIp}:${params.httpPort}/ks?mac=${params.mac} inst.repo=${params.fedoraMirror} inst.text console=ttyS0,115200n8 console=tty0
+initrd http://${params.serverIp}:${params.httpPort}/initrd.img
+boot
+`;
+}
+
+/**
+ * iPXE script for already-installed machines -- exits to boot from local disk.
+ */
+export function renderLocalBootIpxe(hostname: string): string {
+  return `#!ipxe
+
+echo
+echo =============================================
+echo   Lab PXE Bastion - ${hostname}
+echo   Already installed, booting from local disk
+echo =============================================
+echo
+sleep 3
+exit 1
+`;
+}
--- a/bastion/src/bastion/src/templates/discover.ks.ts
+++ b/bastion/src/bastion/src/templates/discover.ks.ts
@@ -0,0 +1,118 @@
+// Discovery kickstart template.
+// Boots Fedora installer, collects hardware info, POSTs to bastion, reboots.
+// Never touches the disk.
+
+export interface DiscoverKickstartParams {
+  serverIp: string;
+  httpPort: number;
+}
+
+export function renderDiscoverKickstart(params: DiscoverKickstartParams): string {
+  const bastionUrl = `http://${params.serverIp}:${params.httpPort}`;
+
+  return `# Lab Bastion -- Discovery Mode
+# Collects hardware inventory and reboots. Does NOT install anything.
+
+%pre --erroronfail --log=/tmp/discover.log
+#!/bin/bash
+set -x
+
+# -- Collect hardware info from /proc, /sys, and available tools --
+
+MAC=$(ip link show | awk '/ether/ && !/00:00:00:00/ {print $2; exit}')
+PRODUCT=$(cat /sys/class/dmi/id/product_name 2>/dev/null || echo "unknown")
+BOARD=$(cat /sys/class/dmi/id/board_name 2>/dev/null || echo "unknown")
+SERIAL=$(cat /sys/class/dmi/id/product_serial 2>/dev/null || echo "unknown")
+MANUFACTURER=$(cat /sys/class/dmi/id/sys_vendor 2>/dev/null || echo "unknown")
+CPUMODEL=$(grep -m1 'model name' /proc/cpuinfo | cut -d: -f2 | sed 's/^ //')
+CPUCORES=$(grep -c '^processor' /proc/cpuinfo)
+MEMGB=$(awk '/MemTotal/ {printf "%d", $2/1024/1024}' /proc/meminfo)
+ARCHTYPE=$(uname -m)
+
+# Disk info
+DISKS_JSON=$(lsblk -Jb -o NAME,SIZE,TYPE,MODEL 2>/dev/null | python3 -c "
+import sys, json
+data = json.load(sys.stdin)
+disks = [d for d in data.get('blockdevices', []) if d.get('type') == 'disk']
+result = []
+for d in disks:
+    size_gb = round(int(d.get('size', 0)) / 1073741824, 1)
+    result.append({
+        'name': d.get('name', '?'),
+        'size_gb': size_gb,
+        'model': (d.get('model') or 'unknown').strip()
+    })
+print(json.dumps(result))
+" 2>/dev/null || echo '[]')
+
+# Network interfaces
+NICS_JSON=$(ip -j link show 2>/dev/null | python3 -c "
+import sys, json
+nics = json.load(sys.stdin)
+result = []
+for n in nics:
+    if n.get('link_type') == 'loopback':
+        continue
+    result.append({
+        'name': n.get('ifname', '?'),
+        'mac': n.get('address', '?'),
+        'state': n.get('operstate', '?')
+    })
+print(json.dumps(result))
+" 2>/dev/null || echo '[]')
+
+# -- Build and POST discovery payload --
+
+PAYLOAD=$(python3 -c "
+import json
+print(json.dumps({
+    'mac': '$MAC',
+    'product': '$PRODUCT',
+    'board': '$BOARD',
+    'serial': '$SERIAL',
+    'manufacturer': '$MANUFACTURER',
+    'cpu_model': '$CPUMODEL',
+    'cpu_cores': int('$CPUCORES' or 0),
+    'memory_gb': int('$MEMGB' or 0),
+    'arch': '$ARCHTYPE',
+    'disks': $DISKS_JSON,
+    'nics': $NICS_JSON
+}))
+")
+
+# POST to bastion
+BASTION_URL="${bastionUrl}/api/discover"
+
+if command -v curl >/dev/null 2>&1; then
+    curl -sf -X POST "$BASTION_URL" \\
+        -H "Content-Type: application/json" \\
+        -d "$PAYLOAD" || true
+else
+    python3 -c "
+import urllib.request
+req = urllib.request.Request('$BASTION_URL',
+    data=b'''$PAYLOAD''',
+    headers={'Content-Type': 'application/json'})
+try:
+    urllib.request.urlopen(req, timeout=10)
+except Exception as e:
+    print(f'POST failed: {e}')
+"
+fi
+
+# -- Reboot -- do NOT let Anaconda proceed --
+echo ""
+echo "=== Discovery complete, rebooting ==="
+echo ""
+sleep 3
+echo 1 > /proc/sys/kernel/sysrq
+echo b > /proc/sysrq-trigger
+sleep 5
+reboot -f
+
+%end
+
+# Anaconda should never get here, but just in case:
+reboot
+`;
+}
--- a/bastion/src/bastion/src/templates/dnsmasq.conf.ts
+++ b/bastion/src/bastion/src/templates/dnsmasq.conf.ts
@@ -0,0 +1,97 @@
+// dnsmasq configuration template.
+// Supports proxy DHCP mode (alongside existing DHCP) and full DHCP mode.
+// Handles UEFI HTTP Boot, iPXE chainloading, and PXE service directives.
+
+import type { BastionConfig } from "@lab/shared";
+
+export function renderDnsmasqConf(config: BastionConfig): string {
+  const {
+    iface,
+    serverIp,
+    httpPort,
+    network,
+    gateway,
+    dhcpMode,
+    tftpDir,
+  } = config;
+
+  // Derive DHCP range for full mode
+  let dhcpRangeStart = config.dhcpRangeStart;
+  let dhcpRangeEnd = config.dhcpRangeEnd;
+  if (dhcpMode === "full") {
+    const networkBase = network.replace(/\.0$/, "");
+    dhcpRangeStart = dhcpRangeStart || `${networkBase}.100`;
+    dhcpRangeEnd = dhcpRangeEnd || `${networkBase}.200`;
+  }
+
+  const dhcpSection = dhcpMode === "full"
+    ? `# Full DHCP mode -- bastion is the only DHCP server on this network
+dhcp-range=${dhcpRangeStart},${dhcpRangeEnd},255.255.255.0,12h
+dhcp-option=3,${gateway}
+dhcp-option=6,${gateway}`
+    : `# ProxyDHCP -- works alongside existing DHCP (UniFi etc)
+dhcp-range=${network},proxy`;
+
+  return `# Lab PXE Bastion -- dnsmasq config
+
+# Disable DNS (we only want DHCP/TFTP)
+port=0
+
+# Listen on the right interface
+interface=${iface}
+bind-dynamic
+
+${dhcpSection}
+
+# TFTP for initial PXE boot
+enable-tftp
+tftp-root=${tftpDir}
+tftp-no-blocksize
+
+# Detect client architecture -- PXE (TFTP) clients
+dhcp-match=set:bios,option:client-arch,0
+dhcp-match=set:efi-x86_64,option:client-arch,7
+dhcp-match=set:efi-x86_64,option:client-arch,9
+dhcp-match=set:efi-arm64,option:client-arch,11
+
+# Detect client architecture -- UEFI HTTP Boot clients (no TFTP size limit)
+dhcp-match=set:httpboot-x86_64,option:client-arch,16
+dhcp-match=set:httpboot-arm64,option:client-arch,20
+
+# Detect iPXE clients (already chainloaded)
+dhcp-userclass=set:ipxe,iPXE
+
+# UEFI HTTP Boot -> serve full iPXE EFI via HTTP (no TFTP size limit)
+dhcp-boot=tag:httpboot-x86_64,http://${serverIp}:${httpPort}/ipxe.efi
+dhcp-boot=tag:httpboot-arm64,http://${serverIp}:${httpPort}/ipxe-arm64.efi
+# Echo vendor class back to HTTP Boot clients (required by UEFI HTTP Boot spec)
+dhcp-option-force=tag:httpboot-x86_64,60,HTTPClient
+dhcp-option-force=tag:httpboot-arm64,60,HTTPClient
+
+# First PXE boot -> serve iPXE binary via TFTP (BIOS and UEFI fallback)
+dhcp-boot=tag:bios,tag:!ipxe,undionly.kpxe
+dhcp-boot=tag:efi-x86_64,tag:!ipxe,ipxe.efi
+dhcp-boot=tag:efi-arm64,tag:!ipxe,ipxe-arm64.efi
+# Echo vendor class back to PXE clients (OVMF requires this, real hardware usually doesn't)
+dhcp-option-force=tag:efi-x86_64,60,PXEClient
+dhcp-option-force=tag:efi-arm64,60,PXEClient
+dhcp-option-force=tag:bios,60,PXEClient
+
+# iPXE clients -> chain to boot script via HTTP
+dhcp-boot=tag:ipxe,http://${serverIp}:${httpPort}/boot.ipxe
+
+${dhcpMode === "proxy" ? `# PXE service directives (proxy DHCP needs these to respond on port 4011)
+pxe-service=tag:!ipxe,x86PC,"PXE Boot",undionly.kpxe
+pxe-service=tag:!ipxe,X86-64_EFI,"PXE Boot",ipxe.efi
+pxe-service=tag:!ipxe,BC_EFI,"PXE Boot",ipxe.efi
+pxe-service=tag:!ipxe,ARM64_EFI,"PXE Boot",ipxe-arm64.efi` : `# Full DHCP mode -- pxe-service directives omitted (they trigger PXE Boot Server
+# Discovery protocol which some UEFI implementations don't support). The dhcp-boot
+# directives above provide the boot filename directly in the DHCP offer.`}
+
+# Lease file in bastion directory (avoid default /var/lib/dnsmasq which needs root)
+dhcp-leasefile=${config.bastionDir}/dnsmasq.leases
+
+# Verbose logging
+log-dhcp
+`;
+}
--- a/bastion/src/bastion/src/templates/install.ks.ts
+++ b/bastion/src/bastion/src/templates/install.ks.ts
@@ -0,0 +1,427 @@
+// Install kickstart template.
+// Full Fedora server install with LVM partitioning, %pre for reprovision detection,
+// packages, and %post with SSH keys, user creation, k3s prereqs, progress callbacks.
+
+import type { Role } from "@lab/shared";
+
+export interface InstallKickstartParams {
+  hostname: string;
+  disk: string;
+  role: Role;
+  domain: string;
+  fedoraVersion: string;
+  timezone: string;
+  locale: string;
+  serverIp: string;
+  httpPort: number;
+  syslogPort: number;
+  sshKeys: string[];
+  adminUser: string;
+}
+
+export function renderInstallKickstart(params: InstallKickstartParams): string {
+  const {
+    hostname,
+    disk,
+    role,
+    domain,
+    fedoraVersion,
+    timezone,
+    locale,
+    serverIp,
+    httpPort,
+    syslogPort,
+    sshKeys,
+    adminUser,
+  } = params;
+
+  const fqdn = domain ? `${hostname}.${domain}` : hostname;
+  const vg = "labvg";
+  const now = new Date().toISOString();
+  const hasLonghorn = role === "worker";
+  const hasRancher = role === "infra";
+  const isVanilla = role === "vanilla";
+
+  // -- Auth section --
+  // Always set a root password (for serial console debugging) + SSH keys
+  const auth = sshKeys.length > 0
+    ? `rootpw --plaintext lab-root-pw\nsshkey --username=root "${sshKeys[0]}"`
+    : "rootpw --plaintext lab-root-pw";
+
+  // -- Admin user directive --
+  const userDirective = adminUser
+    ? `user --name=${adminUser} --groups=wheel --lock`
+    : "";
+
+  // -- SSH keys for %post --
+  const allKeys = sshKeys.join("\n");
+  let sshPostBlock = "";
+  if (sshKeys.length > 0) {
+    sshPostBlock = `
+# Set up SSH keys for root
+mkdir -p /root/.ssh && chmod 700 /root/.ssh
+cat > /root/.ssh/authorized_keys << 'SSHKEYS'
+${allKeys}
+SSHKEYS
+chmod 600 /root/.ssh/authorized_keys`;
+  }
+
+  if (adminUser && sshKeys.length > 0) {
+    sshPostBlock += `
+
+# Set up SSH keys for ${adminUser}
+ADMIN_HOME=$(getent passwd ${adminUser} | cut -d: -f6)
+mkdir -p "$ADMIN_HOME/.ssh" && chmod 700 "$ADMIN_HOME/.ssh"
+cp /root/.ssh/authorized_keys "$ADMIN_HOME/.ssh/authorized_keys"
+chown -R ${adminUser}:${adminUser} "$ADMIN_HOME/.ssh"
+chmod 600 "$ADMIN_HOME/.ssh/authorized_keys"
+
+# Fix SELinux contexts for SSH
+restorecon -R /root/.ssh "$ADMIN_HOME/.ssh" 2>/dev/null || true
+
+# Passwordless sudo for ${adminUser}
+echo '${adminUser} ALL=(ALL) NOPASSWD: ALL' > /etc/sudoers.d/${adminUser}
+chmod 440 /etc/sudoers.d/${adminUser}`;
+  }
+
+  // -- Disk detection --
+  const diskLine = disk
+    ? `DISK="${disk}"`
+    : `DISK=""
+for d in /dev/nvme0n1 /dev/sda /dev/vda; do
+    [ -b "$d" ] && { DISK="$(basename $d)"; break; }
+done
+[ -z "$DISK" ] && { echo "ERROR: no disk found"; exit 1; }`;
+
+  // -- Longhorn LV for fresh install --
+  const longhornFreshLine = hasLonghorn
+    ? `logvol /var/lib/longhorn --vgname=${vg} --name=longhorn --fstype=xfs --grow --size=1`
+    : "";
+
+  // -- Rancher LV for fresh install (infra role) --
+  const rancherFreshLine = hasRancher
+    ? `logvol /var/lib/rancher --vgname=${vg} --name=rancher --fstype=xfs --size=20480`
+    : "";
+
+  return `# Lab Bastion -- Fedora ${fedoraVersion} server install
+# Generated: ${now}
+# Target: ${fqdn} (role=${role})
+
+text
+reboot
+
+lang ${locale}
+keyboard uk
+timezone ${timezone} --utc
+
+network --bootproto=dhcp --activate --hostname=${fqdn}
+
+${auth}
+${userDirective}
+
+bootloader --append="console=tty0 console=ttyS0,115200n8"
+
+logging --host=${serverIp} --port=${syslogPort}
+
+url --mirrorlist=https://mirrors.fedoraproject.org/mirrorlist?repo=fedora-$releasever&arch=$basearch
+
+# Partitioning is generated dynamically by %pre (supports reprovision preservation)
+%include /tmp/part.ks
+
+%pre --log=/tmp/pre-partition.log
+#!/bin/bash
+set -x
+
+# Progress callback helper
+bastion_progress() {
+    local stage="$1" detail="\${2:-}"
+    local mac=$(ip link show | awk '/ether/ && !/00:00:00:00/ {print $2; exit}')
+    curl -sf -X POST "http://${serverIp}:${httpPort}/api/progress" \\
+        -H "Content-Type: application/json" \\
+        -d "{\\"mac\\":\\"$mac\\",\\"stage\\":\\"$stage\\",\\"detail\\":\\"$detail\\"}" 2>/dev/null || true
+}
+
+bastion_progress "partitioning" "detecting disk"
+
+VG="${vg}"
+${diskLine}
+
+REPROVISION=no
+
+# Check if VG exists (reprovision scenario)
+if vgs $VG &>/dev/null; then
+    echo "=== Existing VG found - reprovision mode ==="
+    REPROVISION=yes
+
+    # Detect which data LVs to preserve
+    PRESERVE_LONGHORN=no; PRESERVE_SRV=no; PRESERVE_HOME=no; PRESERVE_RANCHER=no
+    lvs $VG/longhorn &>/dev/null && PRESERVE_LONGHORN=yes
+    lvs $VG/srv      &>/dev/null && PRESERVE_SRV=yes
+    lvs $VG/home     &>/dev/null && PRESERVE_HOME=yes
+    lvs $VG/rancher  &>/dev/null && PRESERVE_RANCHER=yes
+
+    echo "Preserving: longhorn=$PRESERVE_LONGHORN srv=$PRESERVE_SRV home=$PRESERVE_HOME rancher=$PRESERVE_RANCHER"
+
+    # Remove only OS logical volumes (keep data LVs)
+    for lv in root var varlog swap; do
+        lvremove -f $VG/$lv 2>/dev/null || true
+    done
+else
+    bastion_progress "partitioning" "fresh install on $DISK"
+fi
+
+if [ "$REPROVISION" = "yes" ]; then
+    # Find existing boot partitions by type
+    EFI_PART=$(blkid -t TYPE=vfat -o device /dev/\${DISK}* 2>/dev/null | head -1)
+    BOOT_PART=$(blkid -t TYPE=ext4 -o device /dev/\${DISK}* 2>/dev/null | head -1)
+    EFI_PART=\${EFI_PART:-/dev/\${DISK}1}
+    BOOT_PART=\${BOOT_PART:-/dev/\${DISK}2}
+    echo "Reusing EFI=$EFI_PART BOOT=$BOOT_PART"
+
+    # Build partition config reusing existing PV/VG
+    cat > /tmp/part.ks << PARTEOF
+ignoredisk --only-use=$DISK
+clearpart --none
+part /boot/efi --onpart=$EFI_PART --fstype=efi
+part /boot --onpart=$BOOT_PART --fstype=ext4
+volgroup ${vg} --useexisting --noformat
+logvol swap --vgname=${vg} --name=swap --fstype=swap --size=27648
+logvol / --vgname=${vg} --name=root --fstype=xfs --size=33792
+logvol /var --vgname=${vg} --name=var --fstype=xfs --size=102400
+logvol /var/log --vgname=${vg} --name=varlog --fstype=xfs --size=10240
+PARTEOF
+
+    # Preserve or recreate data LVs
+    if [ "$PRESERVE_HOME" = "yes" ]; then
+        echo "logvol /home --vgname=${vg} --name=home --useexisting --noformat" >> /tmp/part.ks
+    else
+        echo "logvol /home --vgname=${vg} --name=home --fstype=xfs --size=10240" >> /tmp/part.ks
+    fi
+
+    if [ "$PRESERVE_SRV" = "yes" ]; then
+        echo "logvol /srv --vgname=${vg} --name=srv --useexisting --noformat" >> /tmp/part.ks
+    else
+        echo "logvol /srv --vgname=${vg} --name=srv --fstype=xfs --size=20480" >> /tmp/part.ks
+    fi
+
+    if [ "$PRESERVE_LONGHORN" = "yes" ]; then
+        echo "logvol /var/lib/longhorn --vgname=${vg} --name=longhorn --useexisting --noformat" >> /tmp/part.ks
+    fi
+
+    if [ "$PRESERVE_RANCHER" = "yes" ]; then
+        echo "logvol /var/lib/rancher --vgname=${vg} --name=rancher --useexisting --noformat" >> /tmp/part.ks
+    fi
+
+else
+    # Fresh install
+    cat > /tmp/part.ks << PARTEOF
+ignoredisk --only-use=$DISK
+clearpart --all --initlabel --drives=$DISK
+part /boot/efi --fstype=efi --size=600 --ondisk=$DISK
+part /boot --fstype=ext4 --size=3072 --ondisk=$DISK
+part pv.01 --size=1 --grow --ondisk=$DISK
+volgroup ${vg} pv.01
+logvol swap --vgname=${vg} --name=swap --fstype=swap --size=27648
+logvol / --vgname=${vg} --name=root --fstype=xfs --size=33792
+logvol /var --vgname=${vg} --name=var --fstype=xfs --size=102400
+logvol /var/log --vgname=${vg} --name=varlog --fstype=xfs --size=10240
+logvol /home --vgname=${vg} --name=home --fstype=xfs --size=10240
+logvol /srv --vgname=${vg} --name=srv --fstype=xfs --size=20480
+${longhornFreshLine}
+${rancherFreshLine}
+PARTEOF
+fi
+
+echo "=== Generated partition config ==="
+cat /tmp/part.ks
+echo "==================================="
+
+bastion_progress "partitioning" "disk layout ready"
+
+%end
+
+%packages
+@core
+openssh-server
+vim-enhanced
+tmux
+git
+curl
+wget
+python3
+lshw
+dmidecode
+dnf-plugins-core
+
+# Networking and diagnostics
+NetworkManager
+bind-utils
+net-tools
+iproute
+iputils
+traceroute
+tcpdump
+htop
+iotop
+strace
+jq
+
+${isVanilla ? "# vanilla role -- skipping k3s prerequisites" : `# k3s prerequisites
+container-selinux
+iptables-nft
+nftables
+policycoreutils-python-utils
+chrony
+tar
+socat
+conntrack-tools
+ethtool`}
+
+# Boot management
+efibootmgr
+
+# Puppet prerequisites
+ruby
+ruby-libs
+
+# Exclude desktop
+-@workstation-product
+-@gnome-desktop
+-gnome-shell
+-gdm
+-PackageKit
+-PackageKit-glib
+%end
+
+%post --log=/root/bastion-post-install.log
+#!/bin/bash
+set -x
+
+# Progress callback helper
+bastion_progress() {
+    local stage="$1" detail="\${2:-}"
+    local mac=$(ip link show | awk '/ether/ && !/00:00:00:00/ {print $2; exit}')
+    curl -sf -X POST "http://${serverIp}:${httpPort}/api/progress" \\
+        -H "Content-Type: application/json" \\
+        -d "{\\"mac\\":\\"$mac\\",\\"stage\\":\\"$stage\\",\\"detail\\":\\"$detail\\"}" 2>/dev/null || true
+}
+
+# Send log lines to bastion
+bastion_log() {
+    local line="$1"
+    local mac=$(ip link show | awk '/ether/ && !/00:00:00:00/ {print $2; exit}')
+    curl -sf -X POST "http://${serverIp}:${httpPort}/api/log" \\
+        -H "Content-Type: application/json" \\
+        -d "{\\"mac\\":\\"$mac\\",\\"line\\":\\"$(echo "$line" | sed 's/\\\\/\\\\\\\\/g; s/"/\\\\"/g')\\"}\" \\
+        --connect-timeout 5 --max-time 10 2>/dev/null || true
+}
+
+# Send an error stage to bastion
+bastion_error() {
+    local detail="$1"
+    bastion_progress "error" "$detail"
+}
+
+# --- Error trap: catch any failure and report to bastion ---
+_post_error_handler() {
+    local exit_code=$? lineno=$1
+    bastion_error "%post failed at line $lineno (exit $exit_code)"
+}
+trap '_post_error_handler $LINENO' ERR
+
+bastion_progress "post-install" "configuring system"
+
+# -- SSH --
+systemctl enable --now sshd
+sed -i 's/^#\\?PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config
+sed -i 's/^#\\?PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
+${sshPostBlock}
+
+# -- Hostname and domain --
+hostnamectl set-hostname ${fqdn}
+
+# -- tmpfs for /tmp --
+echo "tmpfs /tmp tmpfs defaults,noatime,nosuid,nodev,size=4G 0 0" >> /etc/fstab
+
+${isVanilla ? `# -- vanilla role: skip k3s kernel/sysctl/firewall setup --
+# -- Enable chronyd for time sync --
+systemctl enable chronyd || true
+
+# -- Serial console (for debugging — auto-login as root on ttyS0) --
+# AWS EC2 compatible: ttyS0 @ 115200n8
+systemctl enable serial-getty@ttyS0.service || true
+
+# -- Forward all system logs to serial console --
+cat > /etc/rsyslog.d/serial-console.conf << 'RSYSLOG'
+*.* /dev/ttyS0
+RSYSLOG
+systemctl enable rsyslog || true` : `# -- Kernel modules for k3s --
+cat > /etc/modules-load.d/k3s.conf << 'MODULES'
+br_netfilter
+overlay
+ip_conntrack
+MODULES
+modprobe br_netfilter || true
+modprobe overlay || true
+
+# -- Sysctl for k3s networking --
+cat > /etc/sysctl.d/90-k3s.conf << 'SYSCTL'
+net.bridge.bridge-nf-call-iptables  = 1
+net.bridge.bridge-nf-call-ip6tables = 1
+net.ipv4.ip_forward                 = 1
+net.ipv6.conf.all.forwarding        = 1
+fs.inotify.max_user_instances       = 524288
+fs.inotify.max_user_watches         = 1048576
+SYSCTL
+sysctl --system || true
+
+# -- Disable firewalld permanently (k3s/Cilium manage iptables directly) --
+systemctl disable --now firewalld || true
+systemctl mask firewalld || true
+
+# -- Enable chronyd for time sync --
+systemctl enable chronyd || true`}
+
+# -- Boot order: restore network first (Anaconda sets disk first, we undo it) --
+# Network boot must stay first so the bastion intercepts every reboot.
+if command -v efibootmgr >/dev/null 2>&1; then
+    PXE_ENTRY=$(efibootmgr | grep -iE 'network|pxe|ipv4|ipv6|http' | head -1 | grep -oP 'Boot\\K[0-9A-F]+')
+    if [ -n "$PXE_ENTRY" ]; then
+        CURRENT_ORDER=$(efibootmgr | grep BootOrder | cut -d: -f2 | tr -d ' ')
+        REST=$(echo "$CURRENT_ORDER" | sed "s/$PXE_ENTRY,\\\\?//;s/,$//" | sed 's/^,//')
+        NEW_ORDER="$PXE_ENTRY,$REST"
+        efibootmgr -o "$NEW_ORDER" || true
+    fi
+fi
+
+# -- Provisioning metadata --
+cat > /etc/lab-provisioned << PROVEOF
+hostname: ${fqdn}
+role: ${role}
+provisioned: $(date -Iseconds)
+bastion: ${serverIp}
+PROVEOF
+
+cat > /root/README << 'README'
+# Lab Node -- ${fqdn} (role: ${role})
+#
+# Next steps:
+#   1. Install puppet agent:
+#      dnf install -y puppet-agent
+#
+#   2. Install k3s:
+#      curl -sfL https://get.k3s.io | sh -
+#
+#   3. Or join existing cluster:
+#      curl -sfL https://get.k3s.io | K3S_URL=https://<server>:6443 K3S_TOKEN=<token> sh -
+README
+
+${hasRancher ? `# Install k3s server (skip start - will be configured manually)
+curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true sh -
+` : ""}
+IP_ADDR=$(ip -4 addr show | awk '/inet / && !/127.0.0/ {split($2,a,"/"); print a[1]; exit}')
+bastion_progress "complete" "ready at $IP_ADDR"
+
+%end
+`;
+}
--- a/bastion/src/bastion/src/templates/ubuntu-autoinstall.ts
+++ b/bastion/src/bastion/src/templates/ubuntu-autoinstall.ts
@@ -0,0 +1,299 @@
+// Ubuntu autoinstall template (cloud-init).
+// Equivalent of the Fedora kickstart: LVM partitioning, packages,
+// SSH keys, k3s prereqs, progress callbacks.
+
+export interface UbuntuAutoinstallParams {
+  hostname: string;
+  disk: string;
+  role: string;  // "vanilla" | "worker" | "infra"
+  domain: string;
+  ubuntuVersion: string;
+  timezone: string;
+  locale: string;
+  serverIp: string;
+  httpPort: number;
+  sshKeys: string[];
+  adminUser: string;
+}
+
+export function renderUbuntuAutoinstall(params: UbuntuAutoinstallParams): string {
+  const {
+    hostname,
+    disk,
+    role,
+    domain,
+    timezone,
+    serverIp,
+    httpPort,
+    sshKeys,
+    adminUser,
+  } = params;
+
+  const fqdn = domain ? `${hostname}.${domain}` : hostname;
+  const vg = "labvg";
+  const hasLonghorn = role === "worker";
+  const hasRancher = role === "infra";
+
+  // Determine disk device -- default to biggest NVMe/SCSI/virtio
+  const diskDevice = disk || "/dev/sda";
+
+  // Build the LVM layout to match Fedora kickstart sizes
+  const extraLvs: string[] = [];
+  if (hasLonghorn) {
+    extraLvs.push(`        - id: lv-longhorn
+          name: longhorn
+          type: lvm_partition
+          volgroup: vg0
+          size: -1
+        - id: fs-longhorn
+          type: format
+          volume: lv-longhorn
+          fstype: xfs
+        - id: mount-longhorn
+          type: mount
+          device: fs-longhorn
+          path: /var/lib/longhorn`);
+  }
+  if (hasRancher) {
+    extraLvs.push(`        - id: lv-rancher
+          name: rancher
+          type: lvm_partition
+          volgroup: vg0
+          size: 20G
+        - id: fs-rancher
+          type: format
+          volume: lv-rancher
+          fstype: xfs
+        - id: mount-rancher
+          type: mount
+          device: fs-rancher
+          path: /var/lib/rancher`);
+  }
+
+  const extraLvsBlock = extraLvs.length > 0 ? "\n" + extraLvs.join("\n") : "";
+
+  // SSH keys YAML list
+  const sshKeysYaml = sshKeys.map((k) => `          - "${k}"`).join("\n");
+
+  // late-commands for k3s prereqs, firewall, chrony, admin user, progress callback
+  const lateCommands: string[] = [
+    // Kernel modules for k3s
+    `curtin in-target -- bash -c 'cat > /etc/modules-load.d/k3s.conf << EOF\nbr_netfilter\noverlay\nip_conntrack\nEOF'`,
+    // Sysctl for k3s networking
+    `curtin in-target -- bash -c 'cat > /etc/sysctl.d/90-k3s.conf << EOF\nnet.bridge.bridge-nf-call-iptables  = 1\nnet.bridge.bridge-nf-call-ip6tables = 1\nnet.ipv4.ip_forward                 = 1\nnet.ipv6.conf.all.forwarding        = 1\nfs.inotify.max_user_instances       = 524288\nfs.inotify.max_user_watches         = 1048576\nEOF'`,
+    // Disable ufw firewall
+    `curtin in-target -- systemctl disable ufw || true`,
+    // Enable chrony/ntp
+    `curtin in-target -- systemctl enable chrony || true`,
+    // tmpfs for /tmp
+    `curtin in-target -- bash -c 'echo "tmpfs /tmp tmpfs defaults,noatime,nosuid,nodev,size=4G 0 0" >> /etc/fstab'`,
+  ];
+
+  // Admin user creation + SSH keys + sudoers
+  if (adminUser) {
+    lateCommands.push(
+      `curtin in-target -- useradd -m -G sudo -s /bin/bash ${adminUser}`,
+      `curtin in-target -- usermod -L ${adminUser}`,
+      `curtin in-target -- mkdir -p /home/${adminUser}/.ssh`,
+      `curtin in-target -- bash -c 'cat > /home/${adminUser}/.ssh/authorized_keys << EOF\n${sshKeys.join("\n")}\nEOF'`,
+      `curtin in-target -- chmod 700 /home/${adminUser}/.ssh`,
+      `curtin in-target -- chmod 600 /home/${adminUser}/.ssh/authorized_keys`,
+      `curtin in-target -- chown -R ${adminUser}:${adminUser} /home/${adminUser}/.ssh`,
+      `curtin in-target -- bash -c 'echo "${adminUser} ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/${adminUser}'`,
+      `curtin in-target -- chmod 440 /etc/sudoers.d/${adminUser}`,
+    );
+  }
+
+  // Provisioning metadata
+  lateCommands.push(
+    `curtin in-target -- bash -c 'cat > /etc/lab-provisioned << EOF\nhostname: ${fqdn}\nrole: ${role}\nprovisioned: $(date -Iseconds)\nbastion: ${serverIp}\nEOF'`,
+  );
+
+  // k3s install for infra role
+  if (hasRancher) {
+    lateCommands.push(
+      `curtin in-target -- bash -c 'curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true sh -'`,
+    );
+  }
+
+  // Progress callback (complete)
+  lateCommands.push(
+    `curtin in-target -- bash -c 'IP_ADDR=$(ip -4 addr show | awk "/inet / && !/127.0.0/ {split(\\$2,a,\\"/\\"); print a[1]; exit}"); curl -sf -X POST "http://${serverIp}:${httpPort}/api/progress" -H "Content-Type: application/json" -d "{\\"mac\\":\\"$(ip link show | awk "/ether/ && !/00:00:00:00/ {print \\$2; exit}")\\",\\"stage\\":\\"complete\\",\\"detail\\":\\"ready at $IP_ADDR\\"}" || true'`,
+  );
+
+  const lateCommandsYaml = lateCommands.map((c) => `        - "${c}"`).join("\n");
+
+  return `#cloud-config
+autoinstall:
+  version: 1
+  locale: ${params.locale}
+  keyboard:
+    layout: gb
+  timezone: ${timezone}
+  identity:
+    hostname: ${fqdn}
+    username: ${adminUser || "root"}
+    password: "!"
+  ssh:
+    install-server: true
+    allow-pw: false
+    authorized-keys:
+${sshKeysYaml}
+  storage:
+    config:
+      - id: disk0
+        type: disk
+        ptable: gpt
+        path: ${diskDevice}
+        wipe: superblock-recursive
+        grub_device: true
+      - id: part-efi
+        type: partition
+        device: disk0
+        size: 600M
+        flag: boot
+        grub_device: true
+      - id: fs-efi
+        type: format
+        volume: part-efi
+        fstype: fat32
+      - id: mount-efi
+        type: mount
+        device: fs-efi
+        path: /boot/efi
+      - id: part-boot
+        type: partition
+        device: disk0
+        size: 3G
+      - id: fs-boot
+        type: format
+        volume: part-boot
+        fstype: ext4
+      - id: mount-boot
+        type: mount
+        device: fs-boot
+        path: /boot
+      - id: part-pv
+        type: partition
+        device: disk0
+        size: -1
+      - id: vg0
+        type: lvm_volgroup
+        name: ${vg}
+        devices:
+          - part-pv
+      - id: lv-swap
+        name: swap
+        type: lvm_partition
+        volgroup: vg0
+        size: 27G
+      - id: fs-swap
+        type: format
+        volume: lv-swap
+        fstype: swap
+      - id: mount-swap
+        type: mount
+        device: fs-swap
+        path: none
+      - id: lv-root
+        name: root
+        type: lvm_partition
+        volgroup: vg0
+        size: 33G
+      - id: fs-root
+        type: format
+        volume: lv-root
+        fstype: xfs
+      - id: mount-root
+        type: mount
+        device: fs-root
+        path: /
+      - id: lv-var
+        name: var
+        type: lvm_partition
+        volgroup: vg0
+        size: 100G
+      - id: fs-var
+        type: format
+        volume: lv-var
+        fstype: xfs
+      - id: mount-var
+        type: mount
+        device: fs-var
+        path: /var
+      - id: lv-varlog
+        name: varlog
+        type: lvm_partition
+        volgroup: vg0
+        size: 10G
+      - id: fs-varlog
+        type: format
+        volume: lv-varlog
+        fstype: xfs
+      - id: mount-varlog
+        type: mount
+        device: fs-varlog
+        path: /var/log
+      - id: lv-home
+        name: home
+        type: lvm_partition
+        volgroup: vg0
+        size: 10G
+      - id: fs-home
+        type: format
+        volume: lv-home
+        fstype: xfs
+      - id: mount-home
+        type: mount
+        device: fs-home
+        path: /home
+      - id: lv-srv
+        name: srv
+        type: lvm_partition
+        volgroup: vg0
+        size: 20G
+      - id: fs-srv
+        type: format
+        volume: lv-srv
+        fstype: xfs
+      - id: mount-srv
+        type: mount
+        device: fs-srv
+        path: /srv${extraLvsBlock}
+  packages:
+    - openssh-server
+    - curl
+    - wget
+    - git
+    - jq
+    - htop
+    - vim
+    - tmux
+    - python3
+    - lshw
+    - dmidecode
+    - net-tools
+    - iproute2
+    - iputils-ping
+    - traceroute
+    - tcpdump
+    - iotop
+    - strace
+    - tar
+    - containerd
+    - socat
+    - conntrack
+    - ethtool
+    - iptables
+    - chrony
+    - efibootmgr
+  late-commands:
+${lateCommandsYaml}
+`;
+}
+
+export function renderUbuntuMetaData(hostname: string): string {
+  return `instance-id: ${hostname}
+local-hostname: ${hostname}
+`;
+}
--- a/bastion/src/bastion/src/templates/ubuntu-boot.ipxe.ts
+++ b/bastion/src/bastion/src/templates/ubuntu-boot.ipxe.ts
@@ -0,0 +1,24 @@
+// iPXE boot script template for Ubuntu autoinstall.
+
+export function renderUbuntuInstallIpxe(params: {
+  mac: string;
+  hostname: string;
+  serverIp: string;
+  httpPort: number;
+  ubuntuVersion: string;
+}): string {
+  return `#!ipxe
+
+echo
+echo =============================================
+echo   Lab PXE Bastion - INSTALLING Ubuntu ${params.ubuntuVersion}
+echo   Target: ${params.hostname}
+echo   MAC:    ${params.mac}
+echo =============================================
+echo
+
+kernel http://${params.serverIp}:${params.httpPort}/ubuntu-vmlinuz autoinstall ds=nocloud-net;seedfrom=http://${params.serverIp}:${params.httpPort}/autoinstall/${params.mac}/ ---
+initrd http://${params.serverIp}:${params.httpPort}/ubuntu-initrd
+boot
+`;
+}
--- a/bastion/src/bastion/tests/dispatch.test.ts
+++ b/bastion/src/bastion/tests/dispatch.test.ts
@@ -0,0 +1,328 @@
+import { describe, it, expect, beforeEach, afterEach } from "vitest";
+import { mkdirSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import type { BastionConfig } from "@lab/shared";
+import { createApp } from "../src/server.js";
+import type { FastifyInstance } from "fastify";
+import type { StateManager } from "../src/services/state.js";
+import type { InstallLogBuffer } from "../src/services/install-log.js";
+
+function createTestConfig(testDir: string): BastionConfig {
+  return {
+    fedoraVersion: "43",
+    arch: "x86_64",
+    httpPort: 0,
+    timezone: "Europe/London",
+    locale: "en_GB.UTF-8",
+    bastionDir: testDir,
+    domain: "test.local",
+    dhcpMode: "proxy",
+    dhcpRangeStart: "",
+    dhcpRangeEnd: "",
+    ubuntuVersion: "26.04",
+    ubuntuMirror: "https://releases.ubuntu.com/26.04",
+    iface: "eth0",
+    serverIp: "10.0.0.1",
+    network: "10.0.0.0",
+    gateway: "10.0.0.1",
+    sshKeys: ["ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAITEST test@test"],
+    adminUser: "testadmin",
+    skipDnsmasq: true,
+    skipArtifacts: true,
+    fedoraMirror: "https://download.fedoraproject.org/pub/fedora/linux/releases/43/Everything/x86_64/os",
+    tftpDir: join(testDir, "tftp"),
+    httpDir: join(testDir, "http"),
+    stateFile: join(testDir, "state.json"),
+  };
+}
+
+describe("dispatch routes", () => {
+  let testDir: string;
+  let app: FastifyInstance;
+  let state: StateManager;
+  let installLog: InstallLogBuffer;
+
+  beforeEach(() => {
+    testDir = join(tmpdir(), `bastion-dispatch-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
+    mkdirSync(testDir, { recursive: true });
+    mkdirSync(join(testDir, "http"), { recursive: true });
+    mkdirSync(join(testDir, "tftp"), { recursive: true });
+
+    const config = createTestConfig(testDir);
+    const result = createApp(config);
+    app = result.app;
+    state = result.state;
+    installLog = result.installLog;
+  });
+
+  afterEach(async () => {
+    await app.close();
+    rmSync(testDir, { recursive: true, force: true });
+  });
+
+  it("unknown MAC returns discovery iPXE script", async () => {
+    const response = await app.inject({
+      method: "GET",
+      url: "/dispatch?mac=aa:bb:cc:dd:ee:ff",
+    });
+
+    expect(response.statusCode).toBe(200);
+    expect(response.headers["content-type"]).toContain("text/plain");
+    const body = response.body;
+    expect(body).toContain("#!ipxe");
+    expect(body).toContain("DISCOVERY MODE");
+    expect(body).toContain("discover.ks");
+  });
+
+  it("MAC in install_queue returns install iPXE script", async () => {
+    const mac = "aa:bb:cc:dd:ee:ff";
+    state.update((s) => {
+      s.install_queue[mac] = {
+        hostname: "worker-1",
+        disk: "/dev/sda",
+        role: "worker",
+        queued_at: new Date().toISOString(),
+      };
+    });
+
+    const response = await app.inject({
+      method: "GET",
+      url: `/dispatch?mac=${mac}`,
+    });
+
+    expect(response.statusCode).toBe(200);
+    const body = response.body;
+    expect(body).toContain("#!ipxe");
+    expect(body).toContain("INSTALLING");
+    expect(body).toContain("worker-1");
+    expect(body).toContain(`ks?mac=${mac}`);
+  });
+
+  it("MAC in installed returns local boot (exit) script", async () => {
+    const mac = "aa:bb:cc:dd:ee:ff";
+    state.update((s) => {
+      s.installed[mac] = {
+        hostname: "installed-node",
+        role: "worker",
+        ip: "10.0.0.50",
+        installed_at: new Date().toISOString(),
+      };
+    });
+
+    const response = await app.inject({
+      method: "GET",
+      url: `/dispatch?mac=${mac}`,
+    });
+
+    expect(response.statusCode).toBe(200);
+    const body = response.body;
+    expect(body).toContain("#!ipxe");
+    expect(body).toContain("installed-node");
+    expect(body).toContain("Already installed");
+    expect(body).toContain("exit");
+  });
+
+  it("progress endpoint updates state", async () => {
+    const mac = "aa:bb:cc:dd:ee:ff";
+    state.update((s) => {
+      s.install_queue[mac] = {
+        hostname: "worker-1",
+        disk: "/dev/sda",
+        role: "worker",
+        queued_at: new Date().toISOString(),
+      };
+    });
+
+    const response = await app.inject({
+      method: "POST",
+      url: "/api/progress",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        mac,
+        stage: "post-install",
+        detail: "configuring system",
+      }),
+    });
+
+    expect(response.statusCode).toBe(200);
+    const result = JSON.parse(response.body);
+    expect(result.status).toBe("ok");
+
+    // Verify state was updated
+    const currentState = state.load();
+    expect(currentState.install_queue[mac]?.progress).toBe("post-install");
+    expect(currentState.install_queue[mac]?.progress_detail).toBe("configuring system");
+  });
+
+  it("progress endpoint with 'complete' stage moves machine to installed", async () => {
+    const mac = "aa:bb:cc:dd:ee:ff";
+    state.update((s) => {
+      s.install_queue[mac] = {
+        hostname: "worker-1",
+        disk: "/dev/sda",
+        role: "worker",
+        queued_at: new Date().toISOString(),
+      };
+    });
+
+    const response = await app.inject({
+      method: "POST",
+      url: "/api/progress",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        mac,
+        stage: "complete",
+        detail: "ready at 10.0.0.50",
+      }),
+    });
+
+    expect(response.statusCode).toBe(200);
+
+    const currentState = state.load();
+    expect(currentState.install_queue[mac]).toBeUndefined();
+    expect(currentState.installed[mac]).toBeDefined();
+    expect(currentState.installed[mac]?.hostname).toBe("worker-1");
+    expect(currentState.installed[mac]?.ip).toBe("10.0.0.50");
+  });
+
+  it("DELETE /api/machines/:mac removes machine from state", async () => {
+    const mac = "aa:bb:cc:dd:ee:ff";
+    state.update((s) => {
+      s.discovered[mac] = {
+        mac,
+        product: "TestBox",
+        board: "TestBoard",
+        serial: "SN123",
+        manufacturer: "TestCorp",
+        cpu_model: "Test CPU",
+        cpu_cores: 4,
+        memory_gb: 16,
+        arch: "x86_64",
+        disks: [],
+        nics: [],
+        first_seen: new Date().toISOString(),
+        last_seen: new Date().toISOString(),
+      };
+    });
+
+    const response = await app.inject({
+      method: "DELETE",
+      url: `/api/machines/${encodeURIComponent(mac)}`,
+    });
+
+    expect(response.statusCode).toBe(200);
+    const result = JSON.parse(response.body);
+    expect(result.status).toBe("forgotten");
+
+    const currentState = state.load();
+    expect(currentState.discovered[mac]).toBeUndefined();
+  });
+
+  it("DELETE /api/machines/:mac returns 404 for unknown machine", async () => {
+    const response = await app.inject({
+      method: "DELETE",
+      url: "/api/machines/ff:ff:ff:ff:ff:ff",
+    });
+
+    expect(response.statusCode).toBe(404);
+    const result = JSON.parse(response.body);
+    expect(result.error).toBe("machine not found");
+  });
+
+  it("POST /api/log accepts a single line", async () => {
+    const mac = "aa:bb:cc:dd:ee:ff";
+    const response = await app.inject({
+      method: "POST",
+      url: "/api/log",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ mac, line: "hello from kickstart" }),
+    });
+
+    expect(response.statusCode).toBe(200);
+    const result = JSON.parse(response.body);
+    expect(result.status).toBe("ok");
+    expect(result.lines).toBe(1);
+
+    // Verify line is stored
+    const lines = installLog.getLines(mac);
+    expect(lines).toHaveLength(1);
+    expect(lines[0]!.line).toBe("hello from kickstart");
+  });
+
+  it("POST /api/log accepts multiple lines", async () => {
+    const mac = "aa:bb:cc:dd:ee:ff";
+    const response = await app.inject({
+      method: "POST",
+      url: "/api/log",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ mac, lines: ["line 1", "line 2", "line 3"] }),
+    });
+
+    expect(response.statusCode).toBe(200);
+    const result = JSON.parse(response.body);
+    expect(result.lines).toBe(3);
+
+    const lines = installLog.getLines(mac);
+    expect(lines).toHaveLength(3);
+  });
+
+  it("GET /api/logs/:mac includes log lines for installing machine", async () => {
+    const mac = "aa:bb:cc:dd:ee:ff";
+    state.update((s) => {
+      s.install_queue[mac] = {
+        hostname: "test-node",
+        disk: "/dev/sda",
+        role: "worker",
+        queued_at: new Date().toISOString(),
+      };
+    });
+
+    // Add some log lines
+    installLog.append(mac, ["log line 1", "log line 2"], "test-node");
+
+    const response = await app.inject({
+      method: "GET",
+      url: `/api/logs/${encodeURIComponent(mac)}`,
+    });
+
+    expect(response.statusCode).toBe(200);
+    const result = JSON.parse(response.body);
+    expect(result.status).toBe("installing");
+    expect(result.log_lines).toHaveLength(2);
+    expect(result.log_total).toBe(2);
+    expect(result.log_lines[0].line).toBe("log line 1");
+  });
+
+  it("progress endpoint with 'error' stage keeps machine in install_queue", async () => {
+    const mac = "aa:bb:cc:dd:ee:ff";
+    state.update((s) => {
+      s.install_queue[mac] = {
+        hostname: "failing-node",
+        disk: "/dev/sda",
+        role: "worker",
+        queued_at: new Date().toISOString(),
+      };
+    });
+
+    const response = await app.inject({
+      method: "POST",
+      url: "/api/progress",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        mac,
+        stage: "error",
+        detail: "%post failed at line 42",
+      }),
+    });
+
+    expect(response.statusCode).toBe(200);
+
+    // Machine should still be in install_queue (not moved to installed)
+    const currentState = state.load();
+    expect(currentState.install_queue[mac]).toBeDefined();
+    expect(currentState.install_queue[mac]?.progress).toBe("error");
+    expect(currentState.install_queue[mac]?.progress_detail).toBe("%post failed at line 42");
+    expect(currentState.installed[mac]).toBeUndefined();
+  });
+});
--- a/bastion/src/bastion/tests/kickstart.test.ts
+++ b/bastion/src/bastion/tests/kickstart.test.ts
@@ -0,0 +1,215 @@
+import { describe, it, expect } from "vitest";
+import { renderInstallKickstart, type InstallKickstartParams } from "../src/templates/install.ks.js";
+
+function baseParams(overrides: Partial<InstallKickstartParams> = {}): InstallKickstartParams {
+  return {
+    hostname: "testnode",
+    disk: "",
+    role: "worker",
+    domain: "lab.local",
+    fedoraVersion: "43",
+    timezone: "Europe/London",
+    locale: "en_GB.UTF-8",
+    serverIp: "192.168.1.100",
+    httpPort: 8080,
+    syslogPort: 5514,
+    sshKeys: [
+      "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAITEST1 user1@host",
+      "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQTEST2 user2@host",
+    ],
+    adminUser: "admin",
+    ...overrides,
+  };
+}
+
+describe("renderInstallKickstart", () => {
+  it("worker role includes longhorn partition", () => {
+    const ks = renderInstallKickstart(baseParams({ role: "worker" }));
+    expect(ks).toContain("longhorn");
+    expect(ks).toContain("/var/lib/longhorn");
+  });
+
+  it("infra role does NOT include longhorn partition", () => {
+    const ks = renderInstallKickstart(baseParams({ role: "infra" }));
+    // The fresh install longhorn line should not be present
+    expect(ks).not.toContain("logvol /var/lib/longhorn --vgname=labvg --name=longhorn --fstype=xfs --grow --size=1");
+  });
+
+  it("all SSH keys appear between SSHKEYS markers", () => {
+    const keys = [
+      "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAITEST1 user1@host",
+      "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQTEST2 user2@host",
+    ];
+    const ks = renderInstallKickstart(baseParams({ sshKeys: keys }));
+    // Both keys should appear between the SSHKEYS markers
+    const sshkeysMatch = ks.match(/cat > \/root\/\.ssh\/authorized_keys << 'SSHKEYS'\n([\s\S]*?)\nSSHKEYS/);
+    expect(sshkeysMatch).not.toBeNull();
+    const keysBlock = sshkeysMatch![1]!;
+    for (const key of keys) {
+      expect(keysBlock).toContain(key);
+    }
+  });
+
+  it("admin user directive appears when adminUser is set", () => {
+    const ks = renderInstallKickstart(baseParams({ adminUser: "myadmin" }));
+    expect(ks).toContain("user --name=myadmin --groups=wheel --lock");
+  });
+
+  it("no admin user directive when adminUser is empty", () => {
+    const ks = renderInstallKickstart(baseParams({ adminUser: "" }));
+    expect(ks).not.toContain("user --name=");
+  });
+
+  it("FQDN is hostname.domain", () => {
+    const ks = renderInstallKickstart(baseParams({
+      hostname: "myhost",
+      domain: "example.com",
+    }));
+    expect(ks).toContain("myhost.example.com");
+    expect(ks).toContain("--hostname=myhost.example.com");
+  });
+
+  it("restorecon is present", () => {
+    const ks = renderInstallKickstart(baseParams());
+    expect(ks).toContain("restorecon");
+  });
+
+  it("sudoers line for admin user", () => {
+    const ks = renderInstallKickstart(baseParams({ adminUser: "admin" }));
+    expect(ks).toContain("admin ALL=(ALL) NOPASSWD: ALL");
+    expect(ks).toContain("/etc/sudoers.d/admin");
+  });
+
+  it("boot order restores network first (bastion controls boot)", () => {
+    const ks = renderInstallKickstart(baseParams());
+    expect(ks).toContain("restore network first");
+    expect(ks).toContain("PXE_ENTRY");
+    expect(ks).toContain("efibootmgr -o");
+  });
+
+  it("progress callback URLs use correct serverIp and httpPort", () => {
+    const ks = renderInstallKickstart(baseParams({
+      serverIp: "10.0.0.5",
+      httpPort: 9090,
+    }));
+    expect(ks).toContain("http://10.0.0.5:9090");
+    expect(ks).toContain("/api/progress");
+  });
+
+  it("infra role has /var/lib/rancher partition", () => {
+    const ks = renderInstallKickstart(baseParams({ role: "infra" }));
+    expect(ks).toContain("logvol /var/lib/rancher --vgname=labvg --name=rancher --fstype=xfs --size=20480");
+  });
+
+  it("infra role has k3s install", () => {
+    const ks = renderInstallKickstart(baseParams({ role: "infra" }));
+    expect(ks).toContain("curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true sh -");
+  });
+
+  it("worker role does NOT have /var/lib/rancher partition in fresh install", () => {
+    const ks = renderInstallKickstart(baseParams({ role: "worker" }));
+    // Worker should not have the fresh-install rancher partition line
+    expect(ks).not.toContain("logvol /var/lib/rancher --vgname=labvg --name=rancher --fstype=xfs --size=20480");
+  });
+
+  it("worker role does NOT have k3s install", () => {
+    const ks = renderInstallKickstart(baseParams({ role: "worker" }));
+    expect(ks).not.toContain("INSTALL_K3S_SKIP_START");
+  });
+
+  it("reprovision preserves rancher partition", () => {
+    const ks = renderInstallKickstart(baseParams({ role: "infra" }));
+    expect(ks).toContain("PRESERVE_RANCHER=no");
+    expect(ks).toContain('lvs $VG/rancher');
+    expect(ks).toContain("PRESERVE_RANCHER=yes");
+    expect(ks).toContain('logvol /var/lib/rancher --vgname=labvg --name=rancher --useexisting --noformat');
+  });
+
+  it("partition sizes are correct", () => {
+    const ks = renderInstallKickstart(baseParams());
+    // root = 33792
+    expect(ks).toContain("--name=root --fstype=xfs --size=33792");
+    // var = 102400
+    expect(ks).toContain("--name=var --fstype=xfs --size=102400");
+    // varlog = 10240
+    expect(ks).toContain("--name=varlog --fstype=xfs --size=10240");
+    // home = 10240
+    expect(ks).toContain("--name=home --fstype=xfs --size=10240");
+    // srv = 20480
+    expect(ks).toContain("--name=srv --fstype=xfs --size=20480");
+    // swap = 27648
+    expect(ks).toContain("--name=swap --fstype=swap --size=27648");
+  });
+
+  it("vanilla role skips k3s setup", () => {
+    const ks = renderInstallKickstart(baseParams({ role: "vanilla" }));
+    expect(ks).toContain("vanilla role");
+    expect(ks).not.toContain("modules-load.d/k3s.conf");
+    expect(ks).not.toContain("firewalld");
+  });
+
+  it("worker role has k3s setup", () => {
+    const ks = renderInstallKickstart(baseParams({ role: "worker" }));
+    expect(ks).toContain("modules-load.d/k3s.conf");
+    expect(ks).toContain("sysctl.d/90-k3s.conf");
+    expect(ks).toContain("firewalld");
+  });
+
+  it("kickstart syntax: no merged partition lines", () => {
+    for (const role of ["vanilla", "worker", "infra"] as const) {
+      const ks = renderInstallKickstart(baseParams({ role }));
+      const lines = ks.split("\n");
+      for (let i = 0; i < lines.length; i++) {
+        const l = lines[i].trim();
+        if (l.startsWith("part ")) {
+          const partCount = (l.match(/\bpart\b/g) || []).length;
+          expect(partCount, `line ${i + 1} has ${partCount} 'part' commands (role=${role}): ${l}`).toBe(1);
+        }
+      }
+    }
+  });
+
+  it("kickstart syntax: each section-opening has a %end", () => {
+    const ks = renderInstallKickstart(baseParams());
+    // Only match section openers at start of line
+    const sections = (ks.match(/^%(?:pre|post|packages)\b/gm) || []).length;
+    const ends = (ks.match(/^%end$/gm) || []).length;
+    expect(ends, `${sections} sections but ${ends} %end markers`).toBe(sections);
+  });
+
+  it("has complete progress stage", () => {
+    const ks = renderInstallKickstart(baseParams());
+    expect(ks).toContain('"complete"');
+    expect(ks).toContain("ready at");
+  });
+
+  it("sends install logs to bastion via syslog", () => {
+    const ks = renderInstallKickstart(baseParams({ syslogPort: 5514 }));
+    expect(ks).toContain("logging --host=192.168.1.100 --port=5514");
+  });
+
+  it("passes ksvalidator syntax check", () => {
+    for (const role of ["vanilla", "worker", "infra"] as const) {
+      const ks = renderInstallKickstart(baseParams({ role }));
+      const { execSync } = require("node:child_process");
+      const { writeFileSync, unlinkSync } = require("node:fs");
+      const tmp = `/tmp/ks-test-${role}.ks`;
+      writeFileSync(tmp, ks);
+      try {
+        execSync(`ksvalidator -v F43 ${tmp}`, { encoding: "utf-8" });
+      } catch (err: unknown) {
+        const msg = err instanceof Error ? (err as { stderr?: string }).stderr ?? err.message : String(err);
+        throw new Error(`ksvalidator failed for role=${role}: ${msg}`);
+      } finally {
+        try { unlinkSync(tmp); } catch {}
+      }
+    }
+  });
+
+  it("forwards system logs to serial console", () => {
+    const ks = renderInstallKickstart(baseParams({ role: "vanilla" }));
+    expect(ks).toContain("serial-console.conf");
+    expect(ks).toContain("/dev/ttyS0");
+    expect(ks).toContain("rsyslog");
+  });
+});
--- a/bastion/src/bastion/tests/state.test.ts
+++ b/bastion/src/bastion/tests/state.test.ts
@@ -0,0 +1,140 @@
+import { describe, it, expect, beforeEach, afterEach } from "vitest";
+import { mkdirSync, rmSync, existsSync, readFileSync, writeFileSync, chmodSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { StateManager } from "../src/services/state.js";
+
+describe("StateManager", () => {
+  let testDir: string;
+  let stateFile: string;
+  let state: StateManager;
+
+  beforeEach(() => {
+    testDir = join(tmpdir(), `bastion-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
+    mkdirSync(testDir, { recursive: true });
+    stateFile = join(testDir, "state.json");
+    state = new StateManager(stateFile);
+  });
+
+  afterEach(() => {
+    rmSync(testDir, { recursive: true, force: true });
+  });
+
+  it("creates empty state on first load", () => {
+    const loaded = state.load();
+    expect(loaded).toEqual({
+      discovered: {},
+      install_queue: {},
+      installed: {},
+    });
+  });
+
+  it("init creates the state file", () => {
+    expect(existsSync(stateFile)).toBe(false);
+    state.init();
+    expect(existsSync(stateFile)).toBe(true);
+
+    const content = JSON.parse(readFileSync(stateFile, "utf-8"));
+    expect(content).toEqual({
+      discovered: {},
+      install_queue: {},
+      installed: {},
+    });
+  });
+
+  it("saves and loads state correctly", () => {
+    state.init();
+
+    state.update((s) => {
+      s.discovered["aa:bb:cc:dd:ee:ff"] = {
+        mac: "aa:bb:cc:dd:ee:ff",
+        product: "TestBox",
+        board: "TestBoard",
+        serial: "SN123",
+        manufacturer: "TestCorp",
+        cpu_model: "Test CPU",
+        cpu_cores: 8,
+        memory_gb: 32,
+        arch: "x86_64",
+        disks: [{ name: "sda", size_gb: 500, model: "TestDisk" }],
+        nics: [{ name: "eth0", mac: "aa:bb:cc:dd:ee:ff", state: "UP" }],
+        first_seen: "2025-01-01T00:00:00Z",
+        last_seen: "2025-01-01T00:00:00Z",
+      };
+
+      s.install_queue["11:22:33:44:55:66"] = {
+        hostname: "worker-1",
+        disk: "/dev/sda",
+        role: "worker",
+        queued_at: "2025-01-01T01:00:00Z",
+      };
+    });
+
+    // Load in a fresh StateManager to verify persistence
+    const state2 = new StateManager(stateFile);
+    const loaded = state2.load();
+
+    expect(loaded.discovered["aa:bb:cc:dd:ee:ff"]?.product).toBe("TestBox");
+    expect(loaded.discovered["aa:bb:cc:dd:ee:ff"]?.cpu_cores).toBe(8);
+    expect(loaded.install_queue["11:22:33:44:55:66"]?.hostname).toBe("worker-1");
+    expect(loaded.installed).toEqual({});
+  });
+
+  it("uses atomic writes (tmp file + rename)", () => {
+    state.init();
+
+    // After save, there should be no .tmp file left behind
+    state.update((s) => {
+      s.installed["aa:bb:cc:dd:ee:ff"] = {
+        hostname: "node1",
+        role: "worker",
+        ip: "10.0.0.1",
+        installed_at: "2025-01-01T00:00:00Z",
+      };
+    });
+
+    const tmpFile = `${stateFile}.tmp`;
+    expect(existsSync(tmpFile)).toBe(false);
+    expect(existsSync(stateFile)).toBe(true);
+
+    // Verify data was written correctly
+    const raw = readFileSync(stateFile, "utf-8");
+    const parsed = JSON.parse(raw);
+    expect(parsed.installed["aa:bb:cc:dd:ee:ff"].hostname).toBe("node1");
+  });
+});
+
+describe("PID file handling", () => {
+  let testDir: string;
+
+  beforeEach(() => {
+    testDir = join(tmpdir(), `bastion-pid-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
+    mkdirSync(testDir, { recursive: true });
+  });
+
+  afterEach(() => {
+    rmSync(testDir, { recursive: true, force: true });
+  });
+
+  it("handles stale PID file from previous run", () => {
+    const pidFile = join(testDir, "bastion.pid");
+    // Simulate a stale PID file with a dead process
+    writeFileSync(pidFile, "999999999");
+    // Should be readable
+    const pid = parseInt(readFileSync(pidFile, "utf-8").trim(), 10);
+    expect(pid).toBe(999999999);
+  });
+
+  it("handles corrupted PID file gracefully", () => {
+    const pidFile = join(testDir, "bastion.pid");
+    writeFileSync(pidFile, "not-a-number\n");
+    const pid = parseInt(readFileSync(pidFile, "utf-8").trim(), 10);
+    expect(isNaN(pid)).toBe(true);
+  });
+
+  it("handles missing bastion directory", () => {
+    const missingDir = join(testDir, "nonexistent", "deep");
+    mkdirSync(missingDir, { recursive: true });
+    expect(existsSync(missingDir)).toBe(true);
+  });
+});
--- a/bastion/src/bastion/tsconfig.json
+++ b/bastion/src/bastion/tsconfig.json
@@ -0,0 +1,13 @@
+{
+  "extends": "../../tsconfig.base.json",
+  "compilerOptions": {
+    "rootDir": "src",
+    "outDir": "dist",
+    "types": ["node"]
+  },
+  "include": ["src/**/*.ts"],
+  "references": [
+    { "path": "../shared" },
+    { "path": "../modules" }
+  ]
+}
--- a/bastion/src/bastion/vitest.config.ts
+++ b/bastion/src/bastion/vitest.config.ts
@@ -0,0 +1,8 @@
+import { defineProject } from 'vitest/config';
+
+export default defineProject({
+  test: {
+    name: 'bastion',
+    include: ['tests/**/*.test.ts'],
+  },
+});
--- a/bastion/src/cli/package.json
+++ b/bastion/src/cli/package.json
@@ -0,0 +1,29 @@
+{
+  "name": "@lab/cli",
+  "version": "0.1.0",
+  "private": true,
+  "type": "module",
+  "bin": {
+    "labctl": "./dist/index.js"
+  },
+  "main": "./dist/index.js",
+  "types": "./dist/index.d.ts",
+  "scripts": {
+    "build": "tsc --build",
+    "clean": "rimraf dist",
+    "dev": "tsx src/index.ts",
+    "test": "vitest",
+    "test:run": "vitest run"
+  },
+  "dependencies": {
+    "@lab/bastion": "workspace:*",
+    "@lab/modules": "workspace:*",
+    "@lab/shared": "workspace:*",
+    "commander": "^13.0.0",
+    "ws": "^8.19.0"
+  },
+  "devDependencies": {
+    "@types/node": "^22.10.0",
+    "@types/ws": "^8.18.1"
+  }
+}
--- a/bastion/src/cli/src/api/client.ts
+++ b/bastion/src/cli/src/api/client.ts
@@ -0,0 +1,161 @@
+// Typed API client for communicating with labd.
+
+import https from "node:https";
+import { readFileSync } from "node:fs";
+import { LabdApiError } from "./errors.js";
+import type {
+  Server,
+  ServerFilters,
+  JoinToken,
+  CreateTokenOpts,
+  EnrollmentRequest,
+  EnrollmentResponse,
+  HealthStatus,
+  RequestOpts,
+} from "./types.js";
+
+export interface LabdClientConfig {
+  baseUrl: string;
+  certPath?: string;
+  keyPath?: string;
+  caPath?: string;
+  timeoutMs?: number;
+}
+
+export class LabdClient {
+  private config: LabdClientConfig;
+  private agent: https.Agent | undefined;
+  private sessionId: string | undefined;
+
+  constructor(config: LabdClientConfig) {
+    this.config = config;
+    if (config.certPath && config.keyPath) {
+      this.agent = new https.Agent({
+        cert: readFileSync(config.certPath),
+        key: readFileSync(config.keyPath),
+        ca: config.caPath ? readFileSync(config.caPath) : undefined,
+        rejectUnauthorized: true,
+      });
+    }
+  }
+
+  setSessionId(id: string): void {
+    this.sessionId = id;
+  }
+
+  // --- Server endpoints ---
+
+  async getServers(filters?: ServerFilters): Promise<Server[]> {
+    return this.request("GET", "/api/servers", { query: filters as Record<string, string | undefined> });
+  }
+
+  async getServer(id: string): Promise<Server> {
+    return this.request("GET", `/api/servers/${encodeURIComponent(id)}`);
+  }
+
+  // --- Token endpoints ---
+
+  async createJoinToken(opts: CreateTokenOpts): Promise<JoinToken> {
+    return this.request("POST", "/api/tokens", { body: opts });
+  }
+
+  async listTokens(): Promise<JoinToken[]> {
+    return this.request("GET", "/api/tokens");
+  }
+
+  async revokeToken(id: string): Promise<{ status: string; id: string }> {
+    return this.request("DELETE", `/api/tokens/${encodeURIComponent(id)}`);
+  }
+
+  // --- Auth endpoints ---
+
+  async enroll(req: EnrollmentRequest): Promise<EnrollmentResponse> {
+    return this.request("POST", "/api/auth/enroll", { body: req });
+  }
+
+  // --- Bastion endpoints ---
+
+  async getBastions(): Promise<Array<{
+    id: string; hostname: string; network: string; serverIp: string;
+    status: string; machineCount: number; lastHeartbeat?: string; connectedAt?: string;
+  }>> {
+    return this.request("GET", "/api/bastions");
+  }
+
+  // --- Machine endpoints (aggregated through labd from bastions) ---
+
+  async getMachines(): Promise<import("@lab/shared").BastionState> {
+    return this.request("GET", "/api/machines");
+  }
+
+  async installMachine(opts: {
+    mac: string; hostname: string; disk?: string; role?: string; os?: string;
+  }): Promise<{ status: string; data?: unknown; error?: string }> {
+    return this.request("POST", "/api/machines/install", { body: opts });
+  }
+
+  async forgetMachine(mac: string): Promise<{ status: string }> {
+    return this.request("DELETE", `/api/machines/${encodeURIComponent(mac)}`);
+  }
+
+  async updateRole(mac: string, role: string): Promise<{ status: string }> {
+    return this.request("POST", "/api/machines/role", { body: { mac, role } });
+  }
+
+  async getMachineLogs(mac: string): Promise<Record<string, unknown>> {
+    return this.request("GET", `/api/machines/${encodeURIComponent(mac)}/logs`);
+  }
+
+  // --- Health endpoints ---
+
+  async getHealth(): Promise<HealthStatus> {
+    return this.request("GET", "/healthz");
+  }
+
+  // --- Internal ---
+
+  private async request<T>(method: string, path: string, opts?: RequestOpts): Promise<T> {
+    const url = new URL(path, this.config.baseUrl);
+    if (opts?.query) {
+      for (const [k, v] of Object.entries(opts.query)) {
+        if (v !== undefined) url.searchParams.set(k, String(v));
+      }
+    }
+
+    const headers: Record<string, string> = {
+      "Content-Type": "application/json",
+    };
+    if (this.sessionId) {
+      headers["X-Session-ID"] = this.sessionId;
+    }
+
+    const timeoutMs = this.config.timeoutMs ?? 30_000;
+
+    try {
+      const resp = await fetch(url.toString(), {
+        method,
+        headers,
+        body: opts?.body ? JSON.stringify(opts.body) : undefined,
+        signal: AbortSignal.timeout(timeoutMs),
+        // @ts-expect-error -- Node fetch supports dispatcher/agent
+        agent: this.agent,
+      });
+
+      if (!resp.ok) {
+        const body = await resp.json().catch(() => ({ error: resp.statusText }));
+        throw LabdApiError.fromResponse(resp.status, body);
+      }
+
+      return (await resp.json()) as T;
+    } catch (err) {
+      if (err instanceof LabdApiError) throw err;
+      if (err instanceof TypeError && (err.message.includes("fetch") || err.message.includes("ECONNREFUSED"))) {
+        throw LabdApiError.notConnected(this.config.baseUrl);
+      }
+      if (err instanceof DOMException && err.name === "TimeoutError") {
+        throw LabdApiError.timeout(timeoutMs);
+      }
+      throw err;
+    }
+  }
+}
--- a/bastion/src/cli/src/api/config.ts
+++ b/bastion/src/cli/src/api/config.ts
@@ -0,0 +1,47 @@
+// CLI configuration loading for labd client.
+// Bridges the CLI config module into LabdClient configuration.
+
+import { loadConfig, CONFIG_DIR, CONFIG_FILE, CERT_DIR } from "../config/index.js";
+import { LabdClient, type LabdClientConfig } from "./client.js";
+
+export { CONFIG_DIR, CONFIG_FILE, CERT_DIR };
+
+export function loadClientConfig(
+  overrides?: Partial<LabdClientConfig>,
+): LabdClientConfig {
+  const cliConfig = loadConfig();
+
+  let config: LabdClientConfig = {
+    baseUrl: cliConfig.labdUrl,
+    ...(cliConfig.certPath ? { certPath: cliConfig.certPath } : {}),
+    ...(cliConfig.keyPath ? { keyPath: cliConfig.keyPath } : {}),
+    ...(cliConfig.caPath ? { caPath: cliConfig.caPath } : {}),
+  };
+
+  // Environment variable overrides (cert paths)
+  if (process.env["LABCTL_CERT_PATH"]) config.certPath = process.env["LABCTL_CERT_PATH"];
+  if (process.env["LABCTL_KEY_PATH"]) config.keyPath = process.env["LABCTL_KEY_PATH"];
+  if (process.env["LABCTL_CA_PATH"]) config.caPath = process.env["LABCTL_CA_PATH"];
+
+  if (overrides) {
+    config = { ...config, ...overrides };
+  }
+
+  return config;
+}
+
+export function createLabdClient(
+  overrides?: Partial<LabdClientConfig>,
+): LabdClient {
+  const config = loadClientConfig(overrides);
+  return new LabdClient(config);
+}
+
+let _singleton: LabdClient | undefined;
+
+export function getLabdClient(): LabdClient {
+  if (!_singleton) {
+    _singleton = createLabdClient();
+  }
+  return _singleton;
+}
--- a/bastion/src/cli/src/api/errors.ts
+++ b/bastion/src/cli/src/api/errors.ts
@@ -0,0 +1,59 @@
+// Structured API error class for labd communication.
+
+export class LabdApiError extends Error {
+  readonly statusCode: number;
+  readonly errorCode: string;
+  readonly detail: string | undefined;
+
+  constructor(statusCode: number, message: string, detail?: string) {
+    super(message);
+    this.name = "LabdApiError";
+    this.statusCode = statusCode;
+    this.errorCode = statusCodeToErrorCode(statusCode);
+    this.detail = detail;
+  }
+
+  static fromResponse(statusCode: number, body: unknown): LabdApiError {
+    if (typeof body === "object" && body !== null) {
+      const b = body as Record<string, unknown>;
+      const message = typeof b["error"] === "string" ? b["error"] : `HTTP ${statusCode}`;
+      const detail = typeof b["detail"] === "string" ? b["detail"] : undefined;
+      return new LabdApiError(statusCode, message, detail);
+    }
+    return new LabdApiError(statusCode, `HTTP ${statusCode}`);
+  }
+
+  static notConnected(url: string): LabdApiError {
+    return new LabdApiError(
+      0,
+      `Cannot connect to labd at ${url}`,
+      "Check that labd is running and the URL is correct.",
+    );
+  }
+
+  static timeout(timeoutMs: number): LabdApiError {
+    return new LabdApiError(
+      0,
+      `Request timed out after ${timeoutMs}ms`,
+      "The server may be overloaded. Try again later.",
+    );
+  }
+}
+
+export function isLabdApiError(err: unknown): err is LabdApiError {
+  return err instanceof LabdApiError;
+}
+
+function statusCodeToErrorCode(code: number): string {
+  switch (code) {
+    case 400: return "BAD_REQUEST";
+    case 401: return "UNAUTHORIZED";
+    case 403: return "FORBIDDEN";
+    case 404: return "NOT_FOUND";
+    case 409: return "CONFLICT";
+    case 429: return "RATE_LIMITED";
+    case 500: return "INTERNAL_ERROR";
+    case 503: return "UNAVAILABLE";
+    default:  return code === 0 ? "CONNECTION_ERROR" : "UNKNOWN";
+  }
+}
--- a/bastion/src/cli/src/api/index.ts
+++ b/bastion/src/cli/src/api/index.ts
@@ -0,0 +1,18 @@
+// Public API for labd client.
+
+export { LabdClient, type LabdClientConfig } from "./client.js";
+export { LabdApiError, isLabdApiError } from "./errors.js";
+export { loadClientConfig, createLabdClient, getLabdClient, CONFIG_DIR, CONFIG_FILE, CERT_DIR } from "./config.js";
+export type {
+  Server,
+  ServerFilters,
+  Agent,
+  JoinToken,
+  CreateTokenOpts,
+  EnrollmentRequest,
+  EnrollmentResponse,
+  HealthStatus,
+  ApiErrorBody,
+  RequestOpts,
+} from "./types.js";
+export { createLabdWebSocket, streamExec, streamLogs, type StreamOptions } from "./websocket.js";
--- a/bastion/src/cli/src/api/types.ts
+++ b/bastion/src/cli/src/api/types.ts
@@ -0,0 +1,96 @@
+// Typed interfaces for labd API requests and responses.
+// Matches Prisma schema models and labd route contracts.
+
+// --- Server ---
+
+export interface Server {
+  id: string;
+  hostname: string;
+  mac: string | null;
+  cloud: string;
+  environment: string;
+  role: string;
+  labels: Record<string, string>;
+  ip: string | null;
+  agentVersion: string | null;
+  status: string;
+  lastHeartbeat: string | null;
+  createdAt: string;
+  updatedAt: string;
+  agent?: Agent | null;
+}
+
+export interface Agent {
+  id: string;
+  serverId: string;
+  certificatePem: string | null;
+  enrolledAt: string;
+  lastSeen: string | null;
+}
+
+export interface ServerFilters {
+  cloud?: string;
+  environment?: string;
+  status?: string;
+}
+
+// --- Join Tokens ---
+
+export interface JoinToken {
+  id: string;
+  token?: string; // Only present on creation
+  type: string;
+  label: string | null;
+  usedBy: string | null;
+  usedAt: string | null;
+  revokedAt: string | null;
+  createdAt: string;
+  expiresAt: string | null;
+}
+
+export interface CreateTokenOpts {
+  type?: "one-time" | "reusable";
+  label?: string;
+  expiresInHours?: number;
+}
+
+// --- Auth / Enrollment ---
+
+export interface EnrollmentRequest {
+  token: string;
+  hostname: string;
+  csr?: string;
+}
+
+export interface EnrollmentResponse {
+  status: string;
+  hostname: string;
+  message: string;
+  certificatePem: string | null;
+}
+
+// --- Health ---
+
+export interface HealthStatus {
+  status: "healthy" | "degraded";
+  uptime: number;
+  timestamp: string;
+  checks: {
+    database: "ok" | "error";
+  };
+}
+
+// --- API Error ---
+
+export interface ApiErrorBody {
+  error: string;
+  detail?: string;
+  code?: string;
+}
+
+// --- Request helpers ---
+
+export interface RequestOpts {
+  query?: Record<string, string | number | boolean | undefined>;
+  body?: unknown;
+}
--- a/bastion/src/cli/src/api/websocket.ts
+++ b/bastion/src/cli/src/api/websocket.ts
@@ -0,0 +1,160 @@
+// WebSocket client for real-time streaming operations (exec, logs).
+
+import { WebSocket } from "ws";
+import { loadConfig } from "../config/index.js";
+import { readFileSync } from "node:fs";
+import { LabdApiError } from "./errors.js";
+
+export interface StreamOptions {
+  onData: (data: string) => void;
+  onError: (error: Error) => void;
+  onClose: () => void;
+}
+
+export async function createLabdWebSocket(path: string): Promise<WebSocket> {
+  const config = loadConfig();
+  const baseUrl = config.labdUrl.replace("https:", "wss:").replace("http:", "ws:");
+  const url = new URL(path, baseUrl);
+
+  const wsOptions: WebSocket.ClientOptions = {};
+  if (config.certPath && config.keyPath) {
+    wsOptions.cert = readFileSync(config.certPath);
+    wsOptions.key = readFileSync(config.keyPath);
+    if (config.caPath) wsOptions.ca = readFileSync(config.caPath);
+  }
+
+  return new Promise((resolve, reject) => {
+    const timeout = setTimeout(() => {
+      ws.terminate();
+      reject(LabdApiError.timeout(10_000));
+    }, 10_000);
+
+    const ws = new WebSocket(url.toString(), wsOptions);
+
+    ws.on("open", () => {
+      clearTimeout(timeout);
+      resolve(ws);
+    });
+
+    ws.on("error", (err: Error) => {
+      clearTimeout(timeout);
+      reject(
+        LabdApiError.notConnected(config.labdUrl + " — " + err.message),
+      );
+    });
+  });
+}
+
+export async function streamExec(
+  serverName: string,
+  command: string[],
+  options: StreamOptions & { tty?: boolean; timeout?: number },
+): Promise<number> {
+  const ws = await createLabdWebSocket("/ws/exec");
+  const requestId = crypto.randomUUID();
+
+  return new Promise<number>((resolve, reject) => {
+    ws.on("message", (raw: Buffer) => {
+      try {
+        const msg = JSON.parse(raw.toString()) as {
+          type: string;
+          data?: string;
+          exitCode?: number;
+          message?: string;
+        };
+        switch (msg.type) {
+          case "exec-stdout":
+          case "exec-stderr":
+            if (msg.data) options.onData(msg.data);
+            break;
+          case "exec-exit":
+            ws.close();
+            resolve(msg.exitCode ?? 1);
+            break;
+          case "error":
+            ws.close();
+            reject(new Error(msg.message ?? "Remote execution error"));
+            break;
+        }
+      } catch (err) {
+        options.onError(err instanceof Error ? err : new Error(String(err)));
+      }
+    });
+
+    ws.on("close", () => {
+      options.onClose();
+    });
+
+    ws.on("error", (err: Error) => {
+      options.onError(err);
+    });
+
+    ws.send(
+      JSON.stringify({
+        type: "exec",
+        requestId,
+        server: serverName,
+        command,
+        tty: options.tty ?? false,
+        timeout: options.timeout ?? 30_000,
+      }),
+    );
+  });
+}
+
+export async function streamLogs(
+  serverName: string,
+  logOptions: {
+    follow?: boolean;
+    lines?: number;
+    unit?: string;
+    since?: string;
+    priority?: string;
+    kernel?: boolean;
+  },
+  options: StreamOptions,
+): Promise<void> {
+  const ws = await createLabdWebSocket("/ws/logs");
+  const requestId = crypto.randomUUID();
+
+  ws.on("message", (raw: Buffer) => {
+    try {
+      const msg = JSON.parse(raw.toString()) as {
+        type: string;
+        line?: string;
+        message?: string;
+      };
+      switch (msg.type) {
+        case "log-line":
+          if (msg.line) options.onData(msg.line);
+          break;
+        case "log-end":
+          ws.close();
+          break;
+        case "error":
+          ws.close();
+          options.onError(new Error(msg.message ?? "Log streaming error"));
+          break;
+      }
+    } catch (err) {
+      options.onError(err instanceof Error ? err : new Error(String(err)));
+    }
+  });
+
+  ws.on("close", () => {
+    options.onClose();
+  });
+
+  ws.on("error", (err) => {
+    options.onError(err);
+  });
+
+  ws.send(
+    JSON.stringify({
+      type: "log-subscribe",
+      requestId,
+      server: serverName,
+      options: logOptions,
+    }),
+  );
+}
--- a/bastion/src/cli/src/commands/app.ts
+++ b/bastion/src/cli/src/commands/app.ts
@@ -0,0 +1,403 @@
+// CLI command: labctl app k3s install/health <target>
+// Install or check k3s on a target machine via SSH.
+
+import { existsSync } from "node:fs";
+import { homedir } from "node:os";
+import { join } from "node:path";
+import type { Command } from "commander";
+import type { BastionState } from "@lab/shared";
+import { K3sModule, sshExec } from "@lab/modules";
+import { getLabdClient } from "../api/config.js";
+
+function resolveTarget(
+  target: string,
+  state: BastionState | null,
+): { ip: string; hostname: string; role: string } | null {
+  // Direct IP
+  if (/^\d+\.\d+\.\d+\.\d+$/.test(target)) {
+    return { ip: target, hostname: target, role: "infra" };
+  }
+
+  if (!state) return null;
+
+  // Check by MAC
+  const mac = target.toLowerCase().replace(/-/g, ":");
+  const installed = state.installed[mac];
+  if (installed?.ip) {
+    return { ip: installed.ip, hostname: installed.hostname, role: installed.role };
+  }
+
+  // Check by hostname
+  for (const [, info] of Object.entries(state.installed)) {
+    if (info.hostname === target || info.hostname.startsWith(target + ".")) {
+      return { ip: info.ip, hostname: info.hostname, role: info.role };
+    }
+  }
+
+  return null;
+}
+
+function findSshKey(): string | undefined {
+  const sudoUser = process.env["SUDO_USER"];
+  const realHome = sudoUser ? join("/home", sudoUser) : homedir();
+  for (const name of ["id_ed25519", "id_ecdsa", "id_rsa"]) {
+    const keyPath = join(realHome, ".ssh", name);
+    if (existsSync(keyPath)) return keyPath;
+  }
+  return undefined;
+}
+
+async function fetchState(): Promise<BastionState | null> {
+  try {
+    return await getLabdClient().getMachines();
+  } catch {
+    return null;
+  }
+}
+
+import { registerLabcontrollerCommands } from "./labcontroller.js";
+
+export function registerAppCommand(program: Command): void {
+  const appCmd = program.command("app").description("Application management");
+
+  // labcontroller subcommands
+  registerLabcontrollerCommands(appCmd);
+
+  const k3sCmd = appCmd.command("k3s").description("k3s cluster management");
+
+  k3sCmd
+    .command("install <target>")
+    .description("Install k3s on a target machine (hostname, IP, or MAC)")
+    .option("--role <role>", "k3s role: infra (server) or worker (agent)", "infra")
+    .option("--user <user>", "SSH user", "michal")
+    .option("--k3s-server <url>", "k3s server URL (required for worker role)")
+    .option("--k3s-token <token>", "k3s join token (required for worker role)")
+    .action(async (target: string, opts: {
+      role: string;
+      user: string;
+      k3sServer?: string;
+      k3sToken?: string;
+    }) => {
+      const state = await fetchState();
+      const resolved = resolveTarget(target, state);
+
+      if (!resolved) {
+        console.error(`Cannot resolve target: ${target}`);
+        console.error("Provide an IP address, hostname, or MAC of an installed machine.");
+        process.exit(1);
+      }
+
+      const role = opts.role === "worker" ? "worker" : "infra";
+      const sshKey = findSshKey();
+
+      console.log(`Installing k3s on ${resolved.hostname} (${resolved.ip}) as ${role}...`);
+      console.log("");
+
+      const k3s = new K3sModule();
+      const moduleCtx = {
+        hostname: resolved.hostname,
+        ip: resolved.ip,
+        role,
+        os: "fedora-43" as const,
+        arch: "x86_64" as const,
+        sshUser: opts.user,
+        ...(sshKey ? { sshKeyPath: sshKey } : {}),
+        config: {
+          ...(opts.k3sServer ? { k3sServerUrl: opts.k3sServer } : {}),
+          ...(opts.k3sToken ? { k3sToken: opts.k3sToken } : {}),
+        },
+      };
+
+      const installResult = await k3s.install(moduleCtx);
+      for (const line of installResult.output) {
+        console.log(`  ${line}`);
+      }
+      if (!installResult.success) {
+        console.error(`\nk3s install failed: ${installResult.errors.join(", ")}`);
+        process.exit(1);
+      }
+
+      console.log("\nRunning post-install configuration...\n");
+      const configResult = await k3s.configure(moduleCtx);
+      for (const line of configResult.output) {
+        console.log(`  ${line}`);
+      }
+      if (!configResult.success) {
+        console.error(`\nk3s configure failed: ${configResult.errors.join(", ")}`);
+        process.exit(1);
+      }
+
+      console.log("\nk3s installed successfully.");
+
+      // Check if the machine's role requires additional app deployments
+      try {
+        const { ROLE_REGISTRY } = await import("@lab/shared");
+        const freshState = await fetchState();
+        if (freshState) {
+          for (const [, info] of Object.entries(freshState.installed)) {
+            if (info.ip === resolved.ip || info.hostname === resolved.hostname) {
+              const roleInfo = ROLE_REGISTRY.find((r: { name: string }) => r.name === info.role);
+              if (roleInfo && roleInfo.apps.length > 0) {
+                console.log(`\nRole ${info.role} requires: ${roleInfo.apps.join(", ")}`);
+                console.log(`Deploying automatically...`);
+                const { execFileSync } = await import("node:child_process");
+                try {
+                  execFileSync("node", [
+                    process.argv[1] ?? "",
+                    "app", "labcontroller", "deploy", resolved.hostname,
+                    "--user", opts.user,
+                  ], { stdio: "inherit" });
+                } catch {
+                  console.error(`\nAuto-deploy failed. Run manually: labctl app labcontroller deploy ${resolved.hostname}`);
+                }
+              }
+              break;
+            }
+          }
+        }
+      } catch { /* best-effort chain */ }
+
+      console.log(`\nTo get kubeconfig:  ssh ${opts.user}@${resolved.ip} sudo cat /etc/rancher/k3s/k3s.yaml`);
+    });
+
+  k3sCmd
+    .command("health [target]")
+    .description("Check k3s health (all hosts if no target given)")
+    .option("--user <user>", "SSH user", "michal")
+    .action(async (target: string | undefined, opts: { user: string }) => {
+      const sshKey = findSshKey();
+
+      if (!target) {
+        let state: BastionState;
+        try {
+          state = await getLabdClient().getMachines();
+        } catch (err) {
+          console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`);
+          process.exit(1);
+        }
+
+        const entries = Object.entries(state.installed);
+        if (entries.length === 0) {
+          console.log("No installed machines.");
+          return;
+        }
+
+        const BOLD = "\x1b[1m";
+        const GREEN = "\x1b[32m";
+        const RED = "\x1b[31m";
+        const DIM = "\x1b[2m";
+        const RESET = "\x1b[0m";
+        const pad = (s: string, w: number) => s.padEnd(w);
+
+        console.log(
+          `${BOLD}${pad("HOST", 22)}${pad("IP", 16)}${pad("ROLE", 8)}${pad("K3S", 14)}${pad("NODE", 10)}${pad("ENCRYPT", 10)}${pad("CNI", 14)}${pad("PODS", 6)}${RESET}`,
+        );
+
+        interface HealthRow {
+          host: string; ip: string; role: string;
+          k3s: string; node: string; encrypt: string; cni: string; pods: string;
+          k3sC: string; nodeC: string; encC: string; cniC: string;
+        }
+
+        const probes = entries.map(async ([_mac, info]): Promise<HealthRow> => {
+          const r: HealthRow = {
+            host: info.hostname, ip: info.ip, role: info.role,
+            k3s: "—", node: "—", encrypt: "—", cni: "—", pods: "—",
+            k3sC: DIM, nodeC: DIM, encC: DIM, cniC: DIM,
+          };
+
+          if (!info.ip || info.role === "vanilla") {
+            r.k3s = info.role === "vanilla" ? "n/a" : "no ip";
+            return r;
+          }
+
+          try {
+            const svc = await sshExec(info.ip, opts.user, "systemctl is-active k3s 2>/dev/null || systemctl is-active k3s-agent 2>/dev/null", {
+              ...(sshKey ? { keyPath: sshKey } : {}), timeoutMs: 8_000,
+            });
+
+            if (svc.stdout.trim() !== "active") {
+              r.k3s = svc.stdout.trim() === "inactive" ? "stopped" : "not installed";
+              r.k3sC = svc.stdout.trim() === "inactive" ? RED : DIM;
+              return r;
+            }
+
+            r.k3s = "running"; r.k3sC = GREEN;
+
+            const [nodeRes, encRes, cniRes, podRes] = await Promise.all([
+              sshExec(info.ip, opts.user,
+                "sudo k3s kubectl get nodes -o jsonpath='{.items[0].status.conditions[?(@.type==\"Ready\")].status}' 2>/dev/null",
+                { ...(sshKey ? { keyPath: sshKey } : {}), timeoutMs: 8_000 }),
+              sshExec(info.ip, opts.user,
+                "sudo k3s secrets-encrypt status 2>/dev/null | head -1",
+                { ...(sshKey ? { keyPath: sshKey } : {}), timeoutMs: 8_000 }),
+              sshExec(info.ip, opts.user,
+                "sudo k3s kubectl get pods -n kube-system -l k8s-app=cilium --no-headers 2>/dev/null | head -1",
+                { ...(sshKey ? { keyPath: sshKey } : {}), timeoutMs: 8_000 }),
+              sshExec(info.ip, opts.user,
+                "sudo k3s kubectl get pods -A --no-headers 2>/dev/null | wc -l",
+                { ...(sshKey ? { keyPath: sshKey } : {}), timeoutMs: 8_000 }),
+            ]);
+
+            r.node = nodeRes.stdout.includes("True") ? "Ready" : "NotReady";
+            r.nodeC = nodeRes.stdout.includes("True") ? GREEN : RED;
+
+            r.encrypt = encRes.stdout.includes("Enabled") ? "yes" : "no";
+            r.encC = encRes.stdout.includes("Enabled") ? GREEN : RED;
+
+            r.cni = cniRes.stdout.includes("Running") ? "cilium" : "flannel";
+            r.cniC = cniRes.stdout.includes("Running") ? GREEN : DIM;
+
+            r.pods = podRes.stdout.trim() || "?";
+          } catch {
+            r.k3s = "unreachable"; r.k3sC = RED;
+          }
+
+          return r;
+        });
+
+        const results = await Promise.all(probes);
+        for (const r of results) {
+          console.log(
+            `${pad(r.host, 22)}${pad(r.ip, 16)}${pad(r.role, 8)}${r.k3sC}${pad(r.k3s, 14)}${RESET}${r.nodeC}${pad(r.node, 10)}${RESET}${r.encC}${pad(r.encrypt, 10)}${RESET}${r.cniC}${pad(r.cni, 14)}${RESET}${pad(r.pods, 6)}`,
+          );
+        }
+        return;
+      }
+
+      // Single target: detailed health check
+      const state = await fetchState();
+      const resolved = resolveTarget(target, state);
+
+      if (!resolved) {
+        console.error(`Cannot resolve target: ${target}`);
+        process.exit(1);
+      }
+
+      console.log(`Checking k3s health on ${resolved.hostname} (${resolved.ip})...\n`);
+
+      const k3s = new K3sModule();
+      const healthResult = await k3s.health({
+        hostname: resolved.hostname,
+        ip: resolved.ip,
+        role: resolved.role,
+        os: "fedora-43" as const,
+        arch: "x86_64" as const,
+        sshUser: opts.user,
+        ...(sshKey ? { sshKeyPath: sshKey } : {}),
+        config: {},
+      });
+
+      for (const line of healthResult.output) {
+        console.log(`  ${line}`);
+      }
+      if (healthResult.errors.length > 0) {
+        for (const err of healthResult.errors) {
+          console.error(`  ERROR: ${err}`);
+        }
+      }
+
+      process.exit(healthResult.success ? 0 : 1);
+    });
+
+  k3sCmd
+    .command("list")
+    .description("List installed machines and their k3s status")
+    .option("--user <user>", "SSH user", "michal")
+    .action(async (opts: { user: string }) => {
+      let state: BastionState;
+      try {
+        state = await getLabdClient().getMachines();
+      } catch (err) {
+        console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`);
+        process.exit(1);
+      }
+
+      const entries = Object.entries(state.installed);
+      if (entries.length === 0) {
+        console.log("No installed machines.");
+        return;
+      }
+
+      const sshKey = findSshKey();
+      const BOLD = "\x1b[1m";
+      const GREEN = "\x1b[32m";
+      const RED = "\x1b[31m";
+      const DIM = "\x1b[2m";
+      const RESET = "\x1b[0m";
+
+      const hdr = (s: string, w: number) => s.padEnd(w);
+      console.log(
+        `${BOLD}${hdr("HOSTNAME", 28)}${hdr("IP", 18)}${hdr("ROLE", 10)}${hdr("K3S", 16)}${hdr("NODE", 12)}${hdr("PODS", 6)}${RESET}`,
+      );
+
+      const probes = entries.map(async ([_mac, info]) => {
+        const row = {
+          hostname: info.hostname,
+          ip: info.ip,
+          role: info.role,
+          k3s: "—",
+          node: "—",
+          pods: "—",
+          k3sColor: DIM,
+          nodeColor: DIM,
+        };
+
+        if (!info.ip || info.role === "vanilla") {
+          row.k3s = info.role === "vanilla" ? "n/a" : "no ip";
+          return row;
+        }
+
+        try {
+          const svcResult = await sshExec(info.ip, opts.user, "systemctl is-active k3s 2>/dev/null || systemctl is-active k3s-agent 2>/dev/null", {
+            ...(sshKey ? { keyPath: sshKey } : {}),
+            timeoutMs: 8_000,
+          });
+          const svcStatus = svcResult.stdout.trim();
+
+          if (svcStatus === "active") {
+            row.k3s = "running";
+            row.k3sColor = GREEN;
+
+            const nodeResult = await sshExec(info.ip, opts.user,
+              "sudo k3s kubectl get nodes -o jsonpath='{.items[0].status.conditions[?(@.type==\"Ready\")].status}' 2>/dev/null || echo unknown",
+              { ...(sshKey ? { keyPath: sshKey } : {}), timeoutMs: 8_000 },
+            );
+            const nodeReady = nodeResult.stdout.trim();
+            if (nodeReady.includes("True")) {
+              row.node = "Ready";
+              row.nodeColor = GREEN;
+            } else {
+              row.node = "NotReady";
+              row.nodeColor = RED;
+            }
+
+            const podResult = await sshExec(info.ip, opts.user,
+              "sudo k3s kubectl get pods -A --no-headers 2>/dev/null | wc -l",
+              { ...(sshKey ? { keyPath: sshKey } : {}), timeoutMs: 8_000 },
+            );
+            row.pods = podResult.stdout.trim() || "?";
+          } else if (svcStatus === "inactive" || svcStatus === "dead") {
+            row.k3s = "stopped";
+            row.k3sColor = RED;
+          } else {
+            row.k3s = "not installed";
+            row.k3sColor = DIM;
+          }
+        } catch {
+          row.k3s = "unreachable";
+          row.k3sColor = RED;
+        }
+
+        return row;
+      });
+
+      const results = await Promise.all(probes);
+
+      for (const r of results) {
+        console.log(
+          `${hdr(r.hostname, 28)}${hdr(r.ip, 18)}${hdr(r.role, 10)}${r.k3sColor}${hdr(r.k3s, 16)}${RESET}${r.nodeColor}${hdr(r.node, 12)}${RESET}${hdr(r.pods, 6)}`,
+        );
+      }
+    });
+}
--- a/bastion/src/cli/src/commands/config.ts
+++ b/bastion/src/cli/src/commands/config.ts
@@ -0,0 +1,76 @@
+// labctl config — view and modify CLI configuration.
+
+import type { Command } from "commander";
+import {
+  loadConfig,
+  saveConfig,
+  getConfigValue,
+  setConfigValue,
+  isValidConfigKey,
+  CONFIG_FILE,
+} from "../config/index.js";
+
+export function registerConfigCommand(parent: Command): void {
+  const configCmd = parent
+    .command("config")
+    .description("View and modify CLI configuration");
+
+  // config list
+  configCmd
+    .command("list")
+    .description("Show all configuration values")
+    .action(() => {
+      const config = loadConfig();
+      console.log(`# Configuration (${CONFIG_FILE})\n`);
+      for (const [k, v] of Object.entries(config)) {
+        if (v !== undefined) {
+          console.log(`${k}: ${v}`);
+        }
+      }
+    });
+
+  // config get <key>
+  configCmd
+    .command("get <key>")
+    .description("Get a configuration value")
+    .action((key: string) => {
+      if (!isValidConfigKey(key)) {
+        console.error(`Unknown config key: ${key}`);
+        console.error(`Valid keys: labdUrl, certPath, keyPath, caPath, defaultEnvironment, defaultCloud, outputFormat`);
+        process.exit(1);
+      }
+      const config = loadConfig();
+      const value = getConfigValue(config, key);
+      if (value) {
+        console.log(value);
+      }
+    });
+
+  // config set <key> <value>
+  configCmd
+    .command("set <key> <value>")
+    .description("Set a configuration value")
+    .action((key: string, value: string) => {
+      if (!isValidConfigKey(key)) {
+        console.error(`Unknown config key: ${key}`);
+        console.error(`Valid keys: labdUrl, certPath, keyPath, caPath, defaultEnvironment, defaultCloud, outputFormat`);
+        process.exit(1);
+      }
+      if (key === "outputFormat" && !["table", "json", "yaml"].includes(value)) {
+        console.error(`Invalid output format: ${value}. Must be table, json, or yaml.`);
+        process.exit(1);
+      }
+      let config = loadConfig();
+      config = setConfigValue(config, key, value);
+      saveConfig(config);
+      console.log(`Set ${key} = ${value}`);
+    });
+
+  // config path
+  configCmd
+    .command("path")
+    .description("Show configuration file path")
+    .action(() => {
+      console.log(CONFIG_FILE);
+    });
+}
--- a/bastion/src/cli/src/commands/doctor.ts
+++ b/bastion/src/cli/src/commands/doctor.ts
@@ -0,0 +1,126 @@
+// labctl doctor — diagnose configuration and connectivity issues.
+
+import { existsSync, readFileSync } from "node:fs";
+import { X509Certificate } from "node:crypto";
+import type { Command } from "commander";
+import { loadConfig, CONFIG_FILE, CERT_DIR } from "../config/index.js";
+
+interface DiagnosticResult {
+  name: string;
+  status: "ok" | "warn" | "error";
+  message: string;
+}
+
+const GREEN = "\x1b[32m";
+const YELLOW = "\x1b[33m";
+const RED = "\x1b[31m";
+const RESET = "\x1b[0m";
+
+export function registerDoctorCommand(program: Command): void {
+  program
+    .command("doctor")
+    .description("Diagnose configuration and connectivity issues")
+    .option("--json", "Output results as JSON")
+    .action(async (opts: { json?: boolean }) => {
+      const results: DiagnosticResult[] = [];
+      const config = loadConfig();
+
+      // Check config file
+      results.push({
+        name: "Configuration file",
+        status: existsSync(CONFIG_FILE) ? "ok" : "warn",
+        message: existsSync(CONFIG_FILE) ? CONFIG_FILE : "Using defaults — run 'labctl config set labdUrl <url>'",
+      });
+
+      // Check labd URL
+      results.push({
+        name: "labd URL",
+        status: config.labdUrl ? "ok" : "error",
+        message: config.labdUrl || "Not configured",
+      });
+
+      // Check client certificate
+      if (config.certPath && existsSync(config.certPath)) {
+        try {
+          const certPem = readFileSync(config.certPath, "utf-8");
+          const cert = new X509Certificate(certPem);
+          const expiresIn = new Date(cert.validTo).getTime() - Date.now();
+          const daysLeft = Math.floor(expiresIn / (1000 * 60 * 60 * 24));
+
+          results.push({
+            name: "Client certificate",
+            status: daysLeft > 7 ? "ok" : daysLeft > 0 ? "warn" : "error",
+            message: daysLeft > 0 ? `Valid for ${daysLeft} days` : "Expired!",
+          });
+        } catch {
+          results.push({
+            name: "Client certificate",
+            status: "error",
+            message: "Failed to parse certificate",
+          });
+        }
+      } else {
+        results.push({
+          name: "Client certificate",
+          status: "warn",
+          message: `Not configured — run 'labctl login'`,
+        });
+      }
+
+      // Check cert directory
+      results.push({
+        name: "Certificate directory",
+        status: existsSync(CERT_DIR) ? "ok" : "warn",
+        message: existsSync(CERT_DIR) ? CERT_DIR : "Not created yet",
+      });
+
+      // Test labd connectivity
+      try {
+        const controller = new AbortController();
+        const timeout = setTimeout(() => controller.abort(), 5000);
+        const resp = await fetch(`${config.labdUrl}/healthz`, {
+          signal: controller.signal,
+        });
+        clearTimeout(timeout);
+
+        const body = (await resp.json()) as { status?: string };
+        results.push({
+          name: "labd connectivity",
+          status: resp.ok ? "ok" : "warn",
+          message: resp.ok
+            ? `Connected — ${body.status ?? "ok"}`
+            : `HTTP ${resp.status}: ${body.status ?? "unknown"}`,
+        });
+      } catch (err) {
+        const msg = err instanceof Error ? err.message : String(err);
+        results.push({
+          name: "labd connectivity",
+          status: "error",
+          message: msg.includes("abort")
+            ? "Connection timed out (5s)"
+            : msg.includes("ECONNREFUSED")
+              ? "Connection refused"
+              : msg,
+        });
+      }
+
+      // Output
+      if (opts.json) {
+        console.log(JSON.stringify(results, null, 2));
+      } else {
+        console.log("Running diagnostics...\n");
+        for (const r of results) {
+          const icon = r.status === "ok" ? "\u2713" : r.status === "warn" ? "!" : "\u2717";
+          const color = r.status === "ok" ? GREEN : r.status === "warn" ? YELLOW : RED;
+          console.log(`${color}${icon}${RESET} ${r.name}: ${r.message}`);
+        }
+
+        const errors = results.filter((r) => r.status === "error").length;
+        const warns = results.filter((r) => r.status === "warn").length;
+        const oks = results.filter((r) => r.status === "ok").length;
+        console.log(`\n${oks} passed, ${warns} warnings, ${errors} errors`);
+
+        if (errors > 0) process.exitCode = 1;
+      }
+    });
+}
--- a/bastion/src/cli/src/commands/forget.ts
+++ b/bastion/src/cli/src/commands/forget.ts
@@ -0,0 +1,22 @@
+// CLI command: provision forget
+// Remove a machine from all bastion state via labd.
+
+import type { Command } from "commander";
+import { getLabdClient } from "../api/config.js";
+
+export function registerForgetCommand(parent: Command): void {
+  parent
+    .command("forget <mac>")
+    .description("Remove a machine from bastion state")
+    .action(async (mac: string) => {
+      const normalizedMac = mac.toLowerCase().replace(/-/g, ":");
+
+      try {
+        const result = await getLabdClient().forgetMachine(normalizedMac);
+        console.log(JSON.stringify(result, null, 2));
+      } catch (err) {
+        console.error(`Failed: ${err instanceof Error ? err.message : String(err)}`);
+        process.exit(1);
+      }
+    });
+}
--- a/bastion/src/cli/src/commands/install.ts
+++ b/bastion/src/cli/src/commands/install.ts
@@ -0,0 +1,69 @@
+// CLI command: provision install
+// Queue a discovered machine for OS installation via labd.
+
+import { Command, Option } from "commander";
+import { isValidOsId, SUPPORTED_OS, SUPPORTED_ROLES, ROLE_REGISTRY } from "@lab/shared";
+import { getLabdClient } from "../api/config.js";
+
+function roleTable(): string {
+  const lines: string[] = ["", "Available roles:"];
+  for (const r of ROLE_REGISTRY) {
+    const parent = r.parent ? ` (extends ${r.parent})` : "";
+    const apps = r.apps.length > 0 ? ` [auto: ${r.apps.join(", ")}]` : "";
+    lines.push(`  ${r.name.padEnd(16)} ${r.description}${parent}${apps}`);
+  }
+  return lines.join("\n");
+}
+
+export function registerInstallCommand(parent: Command): void {
+  parent
+    .command("install <mac> <hostname>")
+    .description("Queue a discovered machine for OS installation")
+    .showHelpAfterError(true)
+    .addHelpText("after", roleTable())
+    .addOption(new Option("--role <role>", "Machine role (see below)").choices([...SUPPORTED_ROLES]).default("worker"))
+    .addOption(new Option("--os <os>", "Operating system").choices([...SUPPORTED_OS]).default("fedora-43"))
+    .option("--disk <device>", "Target disk device (auto-detect if omitted)")
+    .action(async (mac: string, hostname: string, opts: {
+      role: string;
+      os: string;
+      disk?: string;
+    }) => {
+      if (!isValidOsId(opts.os)) {
+        console.error(`Unknown OS: ${opts.os}. Supported: ${SUPPORTED_OS.join(", ")}`);
+        process.exit(1);
+      }
+      if (!(SUPPORTED_ROLES as readonly string[]).includes(opts.role)) {
+        console.error(`Unknown role: ${opts.role}`);
+        console.error(roleTable());
+        process.exit(1);
+      }
+
+      try {
+        const result = await getLabdClient().installMachine({
+          mac,
+          hostname,
+          role: opts.role,
+          os: opts.os,
+          ...(opts.disk ? { disk: opts.disk } : {}),
+        });
+
+        console.log(JSON.stringify(result, null, 2));
+        console.log("");
+        const osLabel = opts.os.startsWith("ubuntu") ? "Ubuntu" : "Fedora";
+        console.log(`Power on the machine to start ${osLabel} installation.`);
+
+        const roleInfo = ROLE_REGISTRY.find(r => r.name === opts.role);
+        if (roleInfo?.k3s) {
+          console.log(`After install completes, k3s will be installed automatically (role=${opts.role}).`);
+          if (roleInfo.apps.length > 0) {
+            console.log(`Then: ${roleInfo.apps.join(", ")} will be deployed.`);
+          }
+          console.log(`To install k3s manually later: labctl app k3s install ${hostname}`);
+        }
+      } catch (err) {
+        console.error(`Failed: ${err instanceof Error ? err.message : String(err)}`);
+        process.exit(1);
+      }
+    });
+}
--- a/bastion/src/cli/src/commands/labcontroller.ts
+++ b/bastion/src/cli/src/commands/labcontroller.ts
@@ -0,0 +1,298 @@
+// CLI command: labctl app labcontroller deploy/status
+// Deploy bastion + labd + CockroachDB to a k3s labcontroller node.
+
+import { existsSync, writeFileSync, mkdirSync } from "node:fs";
+import { homedir } from "node:os";
+import { join } from "node:path";
+import type { Command } from "commander";
+import type { BastionState } from "@lab/shared";
+import { sshExec } from "@lab/modules";
+import { getLabdClient } from "../api/config.js";
+
+function findSshKey(): string | undefined {
+  const sudoUser = process.env["SUDO_USER"];
+  const realHome = sudoUser ? join("/home", sudoUser) : homedir();
+  for (const name of ["id_ed25519", "id_ecdsa", "id_rsa"]) {
+    const p = join(realHome, ".ssh", name);
+    if (existsSync(p)) return p;
+  }
+  return undefined;
+}
+
+async function resolveIp(target: string): Promise<string> {
+  if (/^\d+\.\d+\.\d+\.\d+$/.test(target)) return target;
+  try {
+    const state = await getLabdClient().getMachines();
+    for (const [, info] of Object.entries(state.installed)) {
+      if (info.hostname === target || info.hostname.startsWith(target + ".")) {
+        return info.ip;
+      }
+    }
+  } catch { /* use target as-is */ }
+  return target;
+}
+
+export function registerLabcontrollerCommands(appCmd: Command): void {
+  const lcCmd = appCmd.command("labcontroller").description("Labcontroller deployment (bastion + labd + CockroachDB)");
+
+  lcCmd
+    .command("deploy <target>")
+    .description("Deploy labcontroller stack to a k3s node")
+    .option("--user <user>", "SSH user", "michal")
+    .option("--crdb-replicas <n>", "CockroachDB replicas", "1")
+    .action(async (target: string, opts: {
+      user: string;
+      crdbReplicas: string;
+    }) => {
+      const ip = await resolveIp(target);
+      const sshKey = findSshKey();
+      const sshOpts = sshKey ? { keyPath: sshKey } : {};
+
+      console.log(`Deploying labcontroller stack to ${target} (${ip})...\n`);
+
+      // 1. Fetch kubeconfig from target
+      console.log("[1/4] Fetching kubeconfig...");
+      const kcResult = await sshExec(ip, opts.user, "sudo cat /etc/rancher/k3s/k3s.yaml", { ...sshOpts, timeoutMs: 10_000 });
+      if (kcResult.exitCode !== 0) {
+        console.error("  Failed to fetch kubeconfig. Is k3s running?");
+        process.exit(1);
+      }
+
+      const kubeconfigDir = join(homedir(), ".kube");
+      mkdirSync(kubeconfigDir, { recursive: true });
+
+      const contextName = `lab-${target}`;
+      const kubeconfig = kcResult.stdout
+        .replace(/server:\s*https:\/\/127\.0\.0\.1:6443/, `server: https://${ip}:6443`)
+        .replace(/name:\s*default/g, `name: ${contextName}`)
+        .replace(/cluster:\s*default/g, `cluster: ${contextName}`)
+        .replace(/user:\s*default/g, `user: ${contextName}`);
+
+      const tmpPath = join(kubeconfigDir, `.lab-${target}-tmp`);
+      writeFileSync(tmpPath, kubeconfig, { mode: 0o600 });
+
+      const mainConfig = join(kubeconfigDir, "config");
+      const { spawnSync } = await import("node:child_process");
+      const mergeResult = spawnSync("kubectl", ["config", "view", "--flatten"], {
+        encoding: "utf-8",
+        stdio: ["pipe", "pipe", "pipe"],
+        env: { ...process.env, KUBECONFIG: `${mainConfig}:${tmpPath}` },
+      });
+
+      if (mergeResult.status === 0 && mergeResult.stdout) {
+        writeFileSync(mainConfig, mergeResult.stdout, { mode: 0o600 });
+        spawnSync("kubectl", ["config", "use-context", contextName], {
+          stdio: "pipe",
+          env: { ...process.env, KUBECONFIG: mainConfig },
+        });
+        console.log(`  Merged into ~/.kube/config as context "${contextName}"`);
+        console.log(`  Active context set to "${contextName}"`);
+      } else {
+        writeFileSync(join(kubeconfigDir, `lab-${target}`), kubeconfig, { mode: 0o600 });
+        console.log(`  Saved to ~/.kube/lab-${target} (merge failed, use KUBECONFIG=~/.kube/lab-${target})`);
+      }
+
+      try { const { unlinkSync } = await import("node:fs"); unlinkSync(tmpPath); } catch { /* ignore */ }
+      console.log("");
+
+      // 2. Apply CockroachDB manifests
+      console.log("[2/4] Deploying CockroachDB...");
+      const { cockroachDbManifests } = await import("@lab/modules/dist/modules/labcontroller/src/cockroachdb.js");
+      const crdb = cockroachDbManifests({ replicas: parseInt(opts.crdbReplicas, 10) });
+
+      const manifests = [crdb.namespace, crdb.headlessService, crdb.clientService, crdb.statefulSet];
+
+      for (const manifest of manifests) {
+        const json = JSON.stringify(manifest);
+        const kind = (manifest as { kind?: string }).kind ?? "?";
+        const name = ((manifest as { metadata?: { name?: string } }).metadata)?.name ?? "?";
+        const result = await sshExec(ip, opts.user,
+          `echo '${json.replace(/'/g, "'\\''")}' | sudo k3s kubectl apply -f -`,
+          { ...sshOpts, timeoutMs: 15_000 },
+        );
+        if (result.exitCode === 0) {
+          console.log(`  applied ${kind}/${name}`);
+        } else {
+          console.error(`  FAILED ${kind}/${name}: ${result.stderr.trim()}`);
+        }
+      }
+
+      console.log("  Waiting for CockroachDB pod...");
+      const waitResult = await sshExec(ip, opts.user,
+        "sudo k3s kubectl wait --for=condition=Ready pod -l app=cockroachdb -n lab-system --timeout=120s 2>/dev/null || echo 'still starting'",
+        { ...sshOpts, timeoutMs: 130_000 },
+      );
+      console.log(`  ${waitResult.stdout.trim()}`);
+
+      console.log("  Initializing CockroachDB cluster...");
+      const initJson = JSON.stringify(crdb.initJob);
+      await sshExec(ip, opts.user,
+        `echo '${initJson.replace(/'/g, "'\\''")}' | sudo k3s kubectl apply -f - 2>/dev/null; sudo k3s kubectl wait --for=condition=Complete job/cockroachdb-init -n lab-system --timeout=60s 2>/dev/null || echo 'init may already be done'`,
+        { ...sshOpts, timeoutMs: 70_000 },
+      );
+
+      await sshExec(ip, opts.user,
+        "sudo k3s kubectl exec cockroachdb-0 -n lab-system -- /cockroach/cockroach sql --insecure -e 'CREATE DATABASE IF NOT EXISTS lab' 2>/dev/null || echo 'db may already exist'",
+        { ...sshOpts, timeoutMs: 15_000 },
+      );
+      console.log("  CockroachDB ready\n");
+
+      // 3. Deploy labd
+      console.log("[3/4] Deploying labd...");
+      const { labdManifests } = await import("@lab/modules/dist/modules/labcontroller/src/labd.js");
+      const labd = labdManifests({ databaseUrl: crdb.connectionString });
+
+      for (const manifest of [labd.service, labd.deployment]) {
+        const json = JSON.stringify(manifest);
+        const kind = (manifest as { kind?: string }).kind ?? "?";
+        const name = ((manifest as { metadata?: { name?: string } }).metadata)?.name ?? "?";
+        const result = await sshExec(ip, opts.user,
+          `echo '${json.replace(/'/g, "'\\''")}' | sudo k3s kubectl apply -f -`,
+          { ...sshOpts, timeoutMs: 15_000 },
+        );
+        console.log(`  ${result.exitCode === 0 ? "applied" : "FAILED"} ${kind}/${name}`);
+      }
+      console.log("");
+
+      // 4. Deploy bastion
+      console.log("[4/4] Deploying bastion (hostNetwork)...");
+      const { bastionManifests } = await import("@lab/modules/dist/modules/labcontroller/src/bastion.js");
+      const bastion = bastionManifests();
+
+      const bJson = JSON.stringify(bastion.daemonSet);
+      const bResult = await sshExec(ip, opts.user,
+        `echo '${bJson.replace(/'/g, "'\\''")}' | sudo k3s kubectl apply -f -`,
+        { ...sshOpts, timeoutMs: 15_000 },
+      );
+      console.log(`  ${bResult.exitCode === 0 ? "applied" : "FAILED"} DaemonSet/bastion`);
+
+      // 5. Promote host role to labcontroller via labd
+      console.log("Promoting host role to labcontroller...");
+      try {
+        const state = await getLabdClient().getMachines();
+        for (const [mac, info] of Object.entries(state.installed)) {
+          if (info.ip === ip || info.hostname === target) {
+            await getLabdClient().updateRole(mac, "labcontroller");
+            console.log(`  ${info.hostname}: infra -> labcontroller`);
+            break;
+          }
+        }
+      } catch {
+        console.log("  Could not update role (labd may not be running yet)");
+      }
+
+      console.log("\n=== Labcontroller deployed ===");
+      console.log(`  CockroachDB: cockroachdb-client.lab-system:26257`);
+      console.log(`  labd:        ${ip}:30100`);
+      console.log(`  bastion:     ${ip}:8080 (hostNetwork)`);
+      console.log(`  context:     lab-${target}`);
+      console.log(`\n  Switch context: kubectl ctx lab-${target}`);
+      console.log(`  View pods:     kubectl get pods -n lab-system`);
+    });
+
+  lcCmd
+    .command("status [target]")
+    .description("Check labcontroller deployment status (all hosts if no target)")
+    .option("--user <user>", "SSH user", "michal")
+    .action(async (target: string | undefined, opts: { user: string }) => {
+      const sshKey = findSshKey();
+      const sshOpts = sshKey ? { keyPath: sshKey } : {};
+
+      if (!target) {
+        let state: BastionState;
+        try {
+          state = await getLabdClient().getMachines();
+        } catch (err) {
+          console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`);
+          process.exit(1);
+        }
+
+        const entries = Object.entries(state.installed);
+        if (entries.length === 0) {
+          console.log("No installed machines.");
+          return;
+        }
+
+        const BOLD = "\x1b[1m";
+        const GREEN = "\x1b[32m";
+        const RED = "\x1b[31m";
+        const DIM = "\x1b[2m";
+        const RESET = "\x1b[0m";
+        const pad = (s: string, w: number) => s.padEnd(w);
+
+        console.log(
+          `${BOLD}${pad("HOST", 22)}${pad("IP", 16)}${pad("ROLE", 14)}${pad("CRDB", 12)}${pad("LABD", 12)}${pad("BASTION", 12)}${pad("NS", 8)}${RESET}`,
+        );
+
+        interface StatusRow {
+          host: string; ip: string; role: string;
+          crdb: string; labd: string; bastion: string; ns: string;
+          crdbC: string; labdC: string; bastionC: string;
+        }
+
+        const probes = entries.map(async ([_mac, info]): Promise<StatusRow> => {
+          const r: StatusRow = {
+            host: info.hostname, ip: info.ip, role: info.role ?? "?",
+            crdb: "—", labd: "—", bastion: "—", ns: "—",
+            crdbC: DIM, labdC: DIM, bastionC: DIM,
+          };
+
+          if (!info.ip) return r;
+
+          try {
+            const result = await sshExec(info.ip, opts.user,
+              "sudo k3s kubectl get pods -n lab-system --no-headers -o custom-columns='NAME:.metadata.name,STATUS:.status.phase' 2>/dev/null || echo 'NO_NS'",
+              { ...sshOpts, timeoutMs: 10_000 },
+            );
+
+            if (result.stdout.includes("NO_NS") || result.exitCode !== 0) {
+              r.ns = "none";
+              return r;
+            }
+
+            r.ns = "ok";
+            const lines = result.stdout.trim().split("\n").filter(Boolean);
+
+            for (const line of lines) {
+              const [name, status] = line.trim().split(/\s+/);
+              if (!name) continue;
+              const running = status === "Running" || status === "Succeeded";
+              const color = running ? GREEN : RED;
+              const label = running ? "running" : (status ?? "?").toLowerCase();
+
+              if (name.startsWith("cockroachdb-") && !name.includes("init")) {
+                r.crdb = label; r.crdbC = color;
+              } else if (name.startsWith("labd-")) {
+                r.labd = label; r.labdC = color;
+              } else if (name.startsWith("bastion-")) {
+                r.bastion = label; r.bastionC = color;
+              }
+            }
+          } catch {
+            r.crdb = "ssh err"; r.crdbC = RED;
+          }
+
+          return r;
+        });
+
+        const results = await Promise.all(probes);
+        for (const r of results) {
+          console.log(
+            `${pad(r.host, 22)}${pad(r.ip, 16)}${pad(r.role, 14)}${r.crdbC}${pad(r.crdb, 12)}${RESET}${r.labdC}${pad(r.labd, 12)}${RESET}${r.bastionC}${pad(r.bastion, 12)}${RESET}${pad(r.ns, 8)}`,
+          );
+        }
+        return;
+      }
+
+      // Specific target: show detailed pod list
+      const ip = await resolveIp(target);
+
+      console.log(`Labcontroller status on ${target} (${ip}):\n`);
+
+      const result = await sshExec(ip, opts.user,
+        "sudo k3s kubectl get pods -n lab-system -o wide 2>/dev/null || echo 'lab-system namespace not found'",
+        { ...sshOpts, timeoutMs: 10_000 },
+      );
+      console.log(result.stdout);
+    });
+}
--- a/bastion/src/cli/src/commands/list.ts
+++ b/bastion/src/cli/src/commands/list.ts
@@ -0,0 +1,98 @@
+// CLI command: provision list
+// Merged view of all known machines with hardware + install info.
+
+import type { Command } from "commander";
+import type { BastionState } from "@lab/shared";
+import { getLabdClient } from "../api/config.js";
+
+const BOLD = "\x1b[1m";
+const GREEN = "\x1b[0;32m";
+const YELLOW = "\x1b[1;33m";
+const CYAN = "\x1b[0;36m";
+const RESET = "\x1b[0m";
+
+function statusColor(status: string): string {
+  switch (status) {
+    case "installed": return GREEN;
+    case "queued":
+    case "installing": return YELLOW;
+    case "discovered": return CYAN;
+    default: return RESET;
+  }
+}
+
+export function registerListCommand(parent: Command): void {
+  parent
+    .command("list")
+    .description("List all known machines")
+    .action(async () => {
+      let state: BastionState;
+      try {
+        state = await getLabdClient().getMachines();
+      } catch (err) {
+        console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`);
+        process.exit(1);
+      }
+
+      // Collect all known MACs
+      const allMacs = new Set([
+        ...Object.keys(state.discovered),
+        ...Object.keys(state.install_queue),
+        ...Object.keys(state.installed),
+      ]);
+
+      console.log("");
+      if (allMacs.size === 0) {
+        console.log("  No machines known. PXE boot a machine to discover it.");
+        console.log("");
+        return;
+      }
+
+      console.log(
+        `${BOLD}  ${"MAC".padEnd(20)} ${"HOSTNAME".padEnd(24)} ${"STATUS".padEnd(12)} ${"ROLE".padEnd(8)} ${"IP".padEnd(16)} ${"CPU".padEnd(24)} ${"CORES".padEnd(6)} ${"RAM".padEnd(6)} PRODUCT${RESET}`,
+      );
+
+      for (const mac of allMacs) {
+        const hw = state.discovered[mac];
+        const queued = state.install_queue[mac];
+        const inst = state.installed[mac];
+
+        // Determine status
+        let status = "discovered";
+        if (queued !== undefined) {
+          status = queued.progress !== undefined && queued.progress !== "" && queued.progress !== "waiting"
+            ? "installing"
+            : "queued";
+        }
+        if (inst !== undefined) status = "installed";
+
+        const hostname = inst?.hostname ?? queued?.hostname ?? "-";
+        const role = inst?.role ?? queued?.role ?? "-";
+        const ip = inst?.ip ?? "-";
+        const cpu = hw?.cpu_model ?? "-";
+        const cores = hw?.cpu_cores != null ? String(hw.cpu_cores) : "-";
+        const ram = hw?.memory_gb != null ? `${hw.memory_gb}GB` : "-";
+        const product = hw?.product ?? "-";
+
+        const color = statusColor(status);
+
+        console.log(
+          `  ${mac.padEnd(20)} ${hostname.padEnd(24)} ${color}${status.padEnd(12)}${RESET} ${role.padEnd(8)} ${ip.padEnd(16)} ${cpu.substring(0, 23).padEnd(24)} ${cores.padEnd(6)} ${ram.padEnd(6)} ${product}`,
+        );
+      }
+
+      // Show install queue details if any
+      const queueEntries = Object.entries(state.install_queue);
+      if (queueEntries.length > 0) {
+        console.log("");
+        console.log(`${BOLD}PENDING${RESET}`);
+        for (const [mac, cfg] of queueEntries) {
+          const progress = cfg.progress ?? "waiting";
+          const detail = cfg.progress_detail ?? "";
+          console.log(`  ${mac}  ${progress}${detail ? ` - ${detail}` : ""}`);
+        }
+      }
+
+      console.log("");
+    });
+}
--- a/bastion/src/cli/src/commands/login.ts
+++ b/bastion/src/cli/src/commands/login.ts
@@ -0,0 +1,120 @@
+// labctl login — authenticate with labd and obtain client certificate.
+
+import { generateKeyPairSync } from "node:crypto";
+import { writeFileSync, existsSync, mkdirSync, readFileSync } from "node:fs";
+import { createInterface } from "node:readline";
+import type { Command } from "commander";
+import { loadConfig, saveConfig, CERT_DIR } from "../config/index.js";
+import { join } from "node:path";
+
+export function registerLoginCommand(program: Command): void {
+  program
+    .command("login")
+    .description("Authenticate with labd and obtain client certificate")
+    .option("--server <url>", "labd server URL")
+    .action(async (options: { server?: string }) => {
+      if (!existsSync(CERT_DIR)) {
+        mkdirSync(CERT_DIR, { recursive: true, mode: 0o700 });
+      }
+
+      const config = loadConfig();
+      const serverUrl = options.server ?? config.labdUrl;
+
+      const keyPath = join(CERT_DIR, "client.key");
+      const certPath = join(CERT_DIR, "client.crt");
+      const caPath = join(CERT_DIR, "ca.crt");
+
+      // 1. Generate keypair if not exists
+      if (!existsSync(keyPath)) {
+        console.log("Generating client keypair...");
+        const { privateKey } = generateKeyPairSync("ec", {
+          namedCurve: "P-256",
+          privateKeyEncoding: { type: "pkcs8", format: "pem" },
+          publicKeyEncoding: { type: "spki", format: "pem" },
+        });
+        writeFileSync(keyPath, privateKey, { mode: 0o600 });
+        console.log(`Private key saved to ${keyPath}`);
+      } else {
+        console.log(`Using existing keypair at ${keyPath}`);
+      }
+
+      // 2. Read public key for CSR (simplified — send public key, labd signs)
+      const publicKey = readFileSync(keyPath, "utf-8");
+
+      // 3. Prompt for token
+      const token = await promptPassword("Enter join token: ");
+      if (!token) {
+        console.error("Token is required.");
+        process.exit(1);
+      }
+
+      // 4. Submit enrollment request
+      console.log(`Authenticating with ${serverUrl}...`);
+      try {
+        const resp = await fetch(`${serverUrl}/api/auth/user-enroll`, {
+          method: "POST",
+          headers: { "Content-Type": "application/json" },
+          body: JSON.stringify({
+            token,
+            hostname: `cli-${process.env["USER"] ?? "unknown"}`,
+            csr: publicKey,
+          }),
+        });
+
+        if (!resp.ok) {
+          const body = (await resp.json().catch(() => ({}))) as Record<string, string>;
+          console.error(`Login failed: ${body["error"] ?? resp.statusText}`);
+          process.exit(1);
+        }
+
+        const result = (await resp.json()) as {
+          certificatePem?: string | null;
+          caPem?: string | null;
+          status: string;
+        };
+
+        if (result.certificatePem) {
+          writeFileSync(certPath, result.certificatePem, { mode: 0o600 });
+          console.log(`Client certificate saved to ${certPath}`);
+        }
+        if (result.caPem) {
+          writeFileSync(caPath, result.caPem, { mode: 0o644 });
+          console.log(`CA certificate saved to ${caPath}`);
+        }
+
+        // 5. Update config
+        saveConfig({
+          ...config,
+          labdUrl: serverUrl,
+          certPath,
+          keyPath,
+          ...(existsSync(caPath) ? { caPath } : {}),
+        });
+
+        console.log(`\nLogin successful! Configuration updated.`);
+        console.log(`Server: ${serverUrl}`);
+      } catch (err) {
+        const message = err instanceof Error ? err.message : String(err);
+        if (message.includes("ECONNREFUSED") || message.includes("fetch")) {
+          console.error(`Cannot connect to labd at ${serverUrl}`);
+          console.error("Check that labd is running and the URL is correct.");
+        } else {
+          console.error(`Login failed: ${message}`);
+        }
+        process.exit(1);
+      }
+    });
+}
+
+function promptPassword(message: string): Promise<string> {
+  return new Promise((resolve) => {
+    const rl = createInterface({
+      input: process.stdin,
+      output: process.stdout,
+    });
+    rl.question(message, (answer) => {
+      rl.close();
+      resolve(answer.trim());
+    });
+  });
+}
--- a/bastion/src/cli/src/commands/logs.ts
+++ b/bastion/src/cli/src/commands/logs.ts
@@ -0,0 +1,85 @@
+// CLI command: provision logs
+// Show provisioning logs for a machine via labd.
+
+import type { Command } from "commander";
+import { getLabdClient } from "../api/config.js";
+
+/** Resolve a target (hostname, MAC, IP) to a MAC address. */
+async function resolveToMac(target: string): Promise<string> {
+  const normalized = target.toLowerCase().replace(/-/g, ":");
+
+  // Looks like a MAC already
+  if (/^([0-9a-f]{2}:){5}[0-9a-f]{2}$/.test(normalized)) {
+    return normalized;
+  }
+
+  // Resolve from labd aggregated state
+  try {
+    const state = await getLabdClient().getMachines();
+
+    for (const [mac, info] of Object.entries(state.installed)) {
+      if (info.hostname === target || info.hostname.startsWith(target + ".") || info.ip === target) {
+        return mac;
+      }
+    }
+    for (const [mac, info] of Object.entries(state.install_queue)) {
+      if (info.hostname === target || info.hostname.startsWith(target + ".")) {
+        return mac;
+      }
+    }
+    for (const mac of Object.keys(state.discovered)) {
+      if (mac === normalized) return mac;
+    }
+  } catch { /* can't reach labd */ }
+
+  return normalized;
+}
+
+export function registerLogsCommand(parent: Command): void {
+  parent
+    .command("logs <target>")
+    .description("Show provisioning logs for a machine (hostname, MAC, or IP)")
+    .action(async (target: string) => {
+      const mac = await resolveToMac(target);
+
+      try {
+        const data = await getLabdClient().getMachineLogs(mac);
+
+        const BOLD = "\x1b[1m";
+        const GREEN = "\x1b[32m";
+        const YELLOW = "\x1b[33m";
+        const RED = "\x1b[31m";
+        const DIM = "\x1b[2m";
+        const RESET = "\x1b[0m";
+
+        console.log(`${BOLD}${data["hostname"]}${RESET} (${mac})`);
+        console.log(`  Status:   ${data["status"] === "installed" ? GREEN : YELLOW}${data["status"]}${RESET}`);
+        console.log(`  Role:     ${data["role"]}`);
+        if (data["os"]) console.log(`  OS:       ${data["os"]}`);
+        if (data["ip"]) console.log(`  IP:       ${data["ip"]}`);
+        console.log("");
+
+        const log = data["log"] as Array<{ stage: string; detail: string; timestamp: string }> | undefined;
+        if (log && log.length > 0) {
+          console.log(`${BOLD}  Log:${RESET}`);
+          for (const entry of log) {
+            const time = entry.timestamp.slice(11, 19);
+            const color = entry.stage === "complete" ? GREEN : entry.stage === "error" ? RED : YELLOW;
+            const detail = entry.detail ? ` ${DIM}-- ${entry.detail}${RESET}` : "";
+            console.log(`  ${DIM}${time}${RESET}  ${color}${entry.stage}${RESET}${detail}`);
+          }
+        } else {
+          console.log(`  ${DIM}No progress events yet (queued, waiting for PXE boot)${RESET}`);
+        }
+      } catch (err) {
+        const msg = err instanceof Error ? err.message : String(err);
+        if (msg.includes("404") || msg.includes("not found")) {
+          console.error(`Machine not found: ${target}`);
+          console.error("Run 'labctl provision list' to see available machines.");
+        } else {
+          console.error(`Cannot reach labd: ${msg}`);
+        }
+        process.exit(1);
+      }
+    });
+}
--- a/bastion/src/cli/src/commands/makeiso.ts
+++ b/bastion/src/cli/src/commands/makeiso.ts
@@ -0,0 +1,114 @@
+// CLI command: provision makeiso
+// Generate/serve a UEFI-bootable iPXE ISO for machines that don't support PXE boot.
+// Queries labd for connected bastions and provides the download URL.
+
+import { readFileSync, writeFileSync, existsSync } from "node:fs";
+import { createInterface } from "node:readline";
+import { Command, Option } from "commander";
+import { getLabdClient } from "../api/config.js";
+import { buildBootIso } from "@lab/bastion/iso-builder";
+
+function prompt(question: string): Promise<string> {
+  const rl = createInterface({ input: process.stdin, output: process.stdout });
+  return new Promise((resolve) => {
+    rl.question(question, (answer) => {
+      rl.close();
+      resolve(answer.trim());
+    });
+  });
+}
+
+const IPXE_PATHS: Record<string, { src: string; dest: string }> = {
+  x86_64: { src: "/usr/share/ipxe/ipxe-snponly-x86_64.efi", dest: "EFI/BOOT/BOOTX64.EFI" },
+  aarch64: { src: "/usr/share/ipxe/arm64-efi/snponly.efi", dest: "EFI/BOOT/BOOTAA64.EFI" },
+};
+
+async function selectBastion(): Promise<{ hostname: string; serverIp: string; httpPort: number }> {
+  const bastions = await getLabdClient().getBastions();
+  const online = bastions.filter(b => b.status === "online");
+
+  if (online.length === 0) {
+    console.error("No bastions online. Start a bastion first.");
+    process.exit(1);
+  }
+
+  if (online.length === 1) {
+    const b = online[0]!;
+    console.log(`Using bastion: ${b.hostname} (${b.serverIp})`);
+    return { hostname: b.hostname, serverIp: b.serverIp, httpPort: 8080 };
+  }
+
+  console.log("Available bastions:\n");
+  for (let i = 0; i < online.length; i++) {
+    const b = online[i]!;
+    console.log(`  ${i + 1}) ${b.hostname}  ${b.serverIp}  (${b.network})`);
+  }
+  console.log("");
+
+  const answer = await prompt(`Select bastion [1-${online.length}]: `);
+  const idx = parseInt(answer, 10) - 1;
+  if (isNaN(idx) || idx < 0 || idx >= online.length) {
+    console.error("Invalid selection.");
+    process.exit(1);
+  }
+
+  const selected = online[idx]!;
+  return { hostname: selected.hostname, serverIp: selected.serverIp, httpPort: 8080 };
+}
+
+export function registerMakeIsoCommand(parent: Command): void {
+  parent
+    .command("makeiso")
+    .description("Generate a UEFI-bootable iPXE ISO for network provisioning")
+    .addOption(
+      new Option("--arch <arch...>", "Target architecture(s)")
+        .choices(["x86_64", "aarch64"])
+        .default(["x86_64", "aarch64"]),
+    )
+    .option("--local", "Build ISO locally instead of using bastion-hosted URL")
+    .option("--out <path>", "Output path for local ISO build", "ipxe-bastion.iso")
+    .action(async (opts: { arch: string[]; local?: boolean; out: string }) => {
+      const bastion = await selectBastion();
+      const bastionUrl = `http://${bastion.serverIp}:${bastion.httpPort}`;
+
+      if (opts.local) {
+        console.log(`\nGenerating iPXE boot ISO...`);
+        console.log(`  Architectures: ${opts.arch.join(", ")}`);
+        console.log(`  Bastion: ${bastionUrl}`);
+
+        const efiFiles: Array<{ path: string; data: Buffer }> = [];
+        for (const arch of opts.arch) {
+          const paths = IPXE_PATHS[arch];
+          if (!paths) {
+            console.error(`Unknown architecture: ${arch}`);
+            process.exit(1);
+          }
+          if (!existsSync(paths.src)) {
+            console.error(`iPXE binary not found: ${paths.src}`);
+            console.error(`Install: sudo dnf install ipxe-bootimgs-${arch === "aarch64" ? "aarch64" : "x86"}`);
+            process.exit(1);
+          }
+          efiFiles.push({ path: paths.dest, data: readFileSync(paths.src) });
+          console.log(`  ${arch}: ${paths.dest.split("/").pop()}`);
+        }
+
+        const script = [
+          "#!ipxe",
+          "",
+          "echo Booting from iPXE ISO -- connecting to bastion...",
+          "dhcp || ( echo DHCP failed, retrying... && sleep 3 && dhcp )",
+          `chain ${bastionUrl}/boot.ipxe || shell`,
+        ].join("\n");
+
+        const iso = buildBootIso(efiFiles, script);
+        writeFileSync(opts.out, iso);
+        console.log(`\nISO written to: ${opts.out} (${(iso.length / 1024 / 1024).toFixed(1)}MB)`);
+      } else {
+        console.log(`\nThe bastion serves a boot ISO with the correct URL embedded.`);
+        console.log(`Use this URL in JetKVM or any BMC virtual media:\n`);
+        console.log(`  ${bastionUrl}/boot.iso`);
+      }
+
+      console.log(`\nMount as virtual CD, boot from it. iPXE will chainload from bastion.`);
+    });
+}
--- a/bastion/src/cli/src/commands/reprovision.ts
+++ b/bastion/src/cli/src/commands/reprovision.ts
@@ -0,0 +1,161 @@
+// CLI command: provision reprovision
+// Queue a machine for reinstall and attempt SSH reboot into PXE via labd.
+
+import { execFileSync } from "node:child_process";
+import { existsSync } from "node:fs";
+import { homedir } from "node:os";
+import { join } from "node:path";
+import { Command, Option } from "commander";
+import type { BastionState } from "@lab/shared";
+import { isValidOsId, SUPPORTED_OS, SUPPORTED_ROLES, ROLE_REGISTRY } from "@lab/shared";
+import { getLabdClient } from "../api/config.js";
+
+function roleTable(): string {
+  const lines: string[] = ["", "Available roles:"];
+  for (const r of ROLE_REGISTRY) {
+    const parent = r.parent ? ` (extends ${r.parent})` : "";
+    const apps = r.apps.length > 0 ? ` [auto: ${r.apps.join(", ")}]` : "";
+    lines.push(`  ${r.name.padEnd(16)} ${r.description}${parent}${apps}`);
+  }
+  return lines.join("\n");
+}
+
+/** Resolve a target (hostname, MAC, or IP) to {mac, hostname, ip} from state. */
+function resolveTarget(
+  target: string,
+  state: BastionState,
+): { mac: string; hostname: string; ip: string } | null {
+  const normalized = target.toLowerCase().replace(/-/g, ":");
+
+  if (state.installed[normalized]) {
+    const info = state.installed[normalized];
+    return { mac: normalized, hostname: info.hostname, ip: info.ip };
+  }
+
+  if (state.discovered[normalized]) {
+    return { mac: normalized, hostname: normalized, ip: "" };
+  }
+
+  for (const [mac, info] of Object.entries(state.installed)) {
+    if (info.hostname === target || info.hostname.startsWith(target + ".")) {
+      return { mac, hostname: info.hostname, ip: info.ip };
+    }
+  }
+
+  for (const [mac, info] of Object.entries(state.installed)) {
+    if (info.ip === target) {
+      return { mac, hostname: info.hostname, ip: info.ip };
+    }
+  }
+
+  return null;
+}
+
+export function registerReprovisionCommand(parent: Command): void {
+  parent
+    .command("reprovision <target> [hostname]")
+    .description("Queue install + SSH reboot into PXE (target: hostname, MAC, or IP)")
+    .showHelpAfterError(true)
+    .addHelpText("after", roleTable())
+    .addOption(new Option("--role <role>", "Machine role (see below)").choices([...SUPPORTED_ROLES]).default("worker"))
+    .addOption(new Option("--os <os>", "Operating system").choices([...SUPPORTED_OS]).default("fedora-43"))
+    .option("--disk <device>", "Target disk device (auto-detect if omitted)")
+    .action(async (target: string, hostnameOverride: string | undefined, opts: {
+      role: string;
+      os: string;
+      disk?: string;
+    }) => {
+      if (!isValidOsId(opts.os)) {
+        console.error(`Unknown OS: ${opts.os}. Supported: ${SUPPORTED_OS.join(", ")}`);
+        process.exit(1);
+      }
+      if (!(SUPPORTED_ROLES as readonly string[]).includes(opts.role)) {
+        console.error(`Unknown role: ${opts.role}`);
+        console.error(roleTable());
+        process.exit(1);
+      }
+
+      const client = getLabdClient();
+
+      // Resolve target from labd aggregated state
+      let state: BastionState;
+      try {
+        state = await client.getMachines();
+      } catch (err) {
+        console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`);
+        process.exit(1);
+      }
+
+      const resolved = resolveTarget(target, state);
+      if (!resolved) {
+        console.error(`Cannot find machine: ${target}`);
+        console.error("Provide a hostname, MAC, or IP of a known machine.");
+        console.error("Run 'labctl provision list' to see available machines.");
+        process.exit(1);
+      }
+
+      const mac = resolved.mac;
+      const hostname = hostnameOverride ?? resolved.hostname;
+      const ip = resolved.ip;
+
+      console.log(`Reprovisioning ${hostname} (${mac})${ip ? ` at ${ip}` : ""}...`);
+      console.log(`  Role: ${opts.role}  OS: ${opts.os}`);
+      console.log("");
+
+      // Queue the install via labd
+      try {
+        const result = await client.installMachine({
+          mac,
+          hostname,
+          role: opts.role,
+          os: opts.os,
+          ...(opts.disk ? { disk: opts.disk } : {}),
+        });
+        console.log(JSON.stringify(result, null, 2));
+      } catch (err) {
+        console.error(`Failed to queue install: ${err instanceof Error ? err.message : String(err)}`);
+        process.exit(1);
+      }
+
+      // Try SSH reboot into PXE
+      if (ip === "") {
+        console.log("\nNo IP known. Reboot the machine manually into PXE.");
+        return;
+      }
+
+      const adminUser = process.env["SUDO_USER"] ?? process.env["USER"] ?? "";
+      const effectiveUser = adminUser === "root" ? "" : adminUser;
+
+      if (effectiveUser === "") {
+        console.log("\nReboot the machine manually into PXE.");
+        return;
+      }
+
+      console.log(`\nAttempting SSH reboot into PXE (${effectiveUser}@${ip})...`);
+
+      const sudoUser = process.env["SUDO_USER"];
+      const realHome = sudoUser !== undefined ? join("/home", sudoUser) : homedir();
+      const keyPaths = [
+        join(realHome, ".ssh", "id_ed25519"),
+        join(realHome, ".ssh", "id_rsa"),
+        join(realHome, ".ssh", "id_ecdsa"),
+      ];
+      const sshKey = keyPaths.find(k => existsSync(k));
+
+      const sshArgs = [
+        "-o", "StrictHostKeyChecking=no",
+        "-o", "ConnectTimeout=10",
+        ...(sshKey !== undefined ? ["-i", sshKey] : []),
+        `${effectiveUser}@${ip}`,
+        'PXE_ENTRY=$(sudo efibootmgr | grep -iE "pxe|network|ipv4" | head -1 | grep -oP "Boot\\K[0-9A-F]+"); if [ -n "$PXE_ENTRY" ]; then sudo efibootmgr --bootnext "$PXE_ENTRY" && echo "PXE set as next boot" && sudo reboot; else echo "No PXE boot entry found, rebooting anyway..." && sudo reboot; fi',
+      ];
+
+      try {
+        execFileSync("ssh", sshArgs, { stdio: "inherit" });
+      } catch {
+        // SSH connection closing during reboot is expected
+      }
+      console.log("");
+      console.log("Machine is rebooting into PXE. Install will start automatically.");
+    });
+}
--- a/bastion/src/cli/src/commands/serve.ts
+++ b/bastion/src/cli/src/commands/serve.ts
@@ -0,0 +1,145 @@
+// CLI command: init bastion standalone start
+// Start the bastion server (HTTP + dnsmasq), daemonized by default.
+
+import { spawn, type ChildProcess } from "node:child_process";
+import { existsSync, readFileSync, openSync, mkdirSync } from "node:fs";
+import type { Command } from "commander";
+import { startBastion } from "@lab/bastion";
+
+export function registerStartCommand(parent: Command): void {
+  parent
+    .command("start")
+    .description("Start the bastion server (HTTP + dnsmasq PXE)")
+    .option("--port <port>", "HTTP port", "8080")
+    .option("--dir <dir>", "Bastion data directory", "/tmp/lab-bastion")
+    .option("--domain <domain>", "Internal domain for hostnames", "ad.itaz.eu")
+    .option("--dhcp-mode <mode>", "DHCP mode: proxy or full", "proxy")
+    .option("--fedora <version>", "Fedora version", "43")
+    .option("--arch <arch>", "Architecture", "x86_64")
+    .option("--timezone <tz>", "Timezone", "Europe/London")
+    .option("--locale <locale>", "Locale", "en_GB.UTF-8")
+    .option("--skip-dnsmasq", "Skip starting dnsmasq (for testing)")
+    .option("--skip-artifacts", "Skip downloading boot artifacts (for testing)")
+    .option("--foreground", "Run in foreground (default: daemonize)")
+    .action(async (opts: {
+      port: string;
+      dir: string;
+      domain: string;
+      dhcpMode: string;
+      fedora: string;
+      arch: string;
+      timezone: string;
+      locale: string;
+      skipDnsmasq?: boolean;
+      skipArtifacts?: boolean;
+      foreground?: boolean;
+    }) => {
+      // Check root early (before daemonize) so the error is visible
+      if (!opts.skipDnsmasq && process.getuid?.() !== 0) {
+        console.error("Must run as root (dnsmasq needs DHCP/TFTP ports).");
+        console.error("Usage: sudo labctl init bastion standalone start");
+        process.exit(1);
+      }
+
+      if (opts.foreground === true) {
+        // Run in foreground
+        await startBastion({
+          httpPort: parseInt(opts.port, 10),
+          bastionDir: opts.dir,
+          domain: opts.domain,
+          dhcpMode: opts.dhcpMode as "proxy" | "full",
+          fedoraVersion: opts.fedora,
+          arch: opts.arch,
+          timezone: opts.timezone,
+          locale: opts.locale,
+          skipDnsmasq: opts.skipDnsmasq,
+          skipArtifacts: opts.skipArtifacts,
+        });
+        return;
+      }
+
+      // Daemonize: re-run with --foreground, redirect output to log file
+      mkdirSync(opts.dir, { recursive: true });
+      const logFile = `${opts.dir}/bastion.log`;
+
+      // Build explicit argument list instead of re-using process.argv
+      // (which breaks with bun-compiled binaries)
+      const fgArgs = [
+        "init", "bastion", "standalone", "start", "--foreground",
+        "--port", opts.port,
+        "--dir", opts.dir,
+        "--domain", opts.domain,
+        "--dhcp-mode", opts.dhcpMode,
+        "--fedora", opts.fedora,
+        "--arch", opts.arch,
+        "--timezone", opts.timezone,
+        "--locale", opts.locale,
+      ];
+      if (opts.skipDnsmasq) fgArgs.push("--skip-dnsmasq");
+      if (opts.skipArtifacts) fgArgs.push("--skip-artifacts");
+
+      // Determine how to re-invoke ourselves
+      const execPath = process.argv[0] ?? "labctl";
+      let spawnCmd: string;
+      let spawnArgs: string[];
+
+      if (execPath.includes("node") || execPath.includes("tsx")) {
+        const scriptPath = process.argv[1];
+        spawnCmd = execPath;
+        spawnArgs = scriptPath ? [scriptPath, ...fgArgs] : fgArgs;
+      } else {
+        spawnCmd = execPath;
+        spawnArgs = fgArgs;
+      }
+
+      // Open log file for the child's stdout/stderr so it survives parent exit
+      const logFd = openSync(logFile, "a");
+
+      const child: ChildProcess = spawn(spawnCmd, spawnArgs, {
+        detached: true,
+        stdio: ["ignore", logFd, logFd],
+      });
+
+      // Wait briefly for the child to start, then check it's alive
+      await new Promise((resolve) => setTimeout(resolve, 3000));
+
+      // Check if child is still running
+      try {
+        process.kill(child.pid!, 0); // signal 0 = check existence
+      } catch {
+        // Child already died — show the log
+        console.error("Bastion failed to start. Log output:");
+        console.error("");
+        try {
+          const log = readFileSync(logFile, "utf-8");
+          const lines = log.trim().split("\n").slice(-20);
+          for (const line of lines) {
+            console.error("  " + line);
+          }
+        } catch {
+          console.error("  (no log output)");
+        }
+        process.exit(1);
+      }
+
+      child.unref();
+
+      // Print startup info from the log
+      try {
+        const log = readFileSync(logFile, "utf-8");
+        process.stdout.write(log);
+      } catch {
+        // No log yet
+      }
+
+      const pidFile = `${opts.dir}/bastion.pid`;
+      const pid = existsSync(pidFile)
+        ? readFileSync(pidFile, "utf-8").trim()
+        : String(child.pid);
+
+      console.log("");
+      console.log(`Bastion running in background (PID ${pid})`);
+      console.log(`Log: ${logFile}`);
+      process.exit(0);
+    });
+}
--- a/bastion/src/cli/src/commands/status.ts
+++ b/bastion/src/cli/src/commands/status.ts
@@ -0,0 +1,42 @@
+// CLI command: init bastion standalone status
+// Show connected bastions and their machine counts via labd.
+
+import type { Command } from "commander";
+import { getLabdClient } from "../api/config.js";
+
+const BOLD = "\x1b[1m";
+const GREEN = "\x1b[32m";
+const RED = "\x1b[31m";
+const DIM = "\x1b[2m";
+const RESET = "\x1b[0m";
+
+export function registerStatusCommand(parent: Command): void {
+  parent
+    .command("status")
+    .description("Show bastion server status")
+    .action(async () => {
+      try {
+        const bastions = await getLabdClient().getBastions();
+
+        if (bastions.length === 0) {
+          console.log("No bastions registered.");
+          return;
+        }
+
+        const pad = (s: string, w: number) => s.padEnd(w);
+        console.log(
+          `${BOLD}${pad("HOSTNAME", 24)}${pad("NETWORK", 18)}${pad("IP", 18)}${pad("STATUS", 10)}${pad("MACHINES", 10)}${RESET}`,
+        );
+
+        for (const b of bastions) {
+          const statusColor = b.status === "online" ? GREEN : RED;
+          console.log(
+            `${pad(b.hostname, 24)}${DIM}${pad(b.network, 18)}${RESET}${pad(b.serverIp, 18)}${statusColor}${pad(b.status, 10)}${RESET}${pad(String(b.machineCount), 10)}`,
+          );
+        }
+      } catch (err) {
+        console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`);
+        process.exit(1);
+      }
+    });
+}
--- a/Show More
+++ b/Show More