fix: PXE boot debugging — bisect root cause, syslog logging, serial console #3

Merged
michal merged 31 commits from wip/ks-debugging into main 2026-03-29 00:50:05 +00:00
239 changed files with 26966 additions and 56 deletions

1
.env Normal file
View File

@@ -0,0 +1 @@
PERPLEXITY_API_KEY=dummy

12
.env.example Normal file
View File

@@ -0,0 +1,12 @@
# API Keys (Required to enable respective provider)
ANTHROPIC_API_KEY="your_anthropic_api_key_here" # Required: Format: sk-ant-api03-...
PERPLEXITY_API_KEY="your_perplexity_api_key_here" # Optional: Format: pplx-...
OPENAI_API_KEY="your_openai_api_key_here" # Optional, for OpenAI models. Format: sk-proj-...
GOOGLE_API_KEY="your_google_api_key_here" # Optional, for Google Gemini models.
MISTRAL_API_KEY="your_mistral_key_here" # Optional, for Mistral AI models.
XAI_API_KEY="YOUR_XAI_KEY_HERE" # Optional, for xAI AI models.
GROQ_API_KEY="YOUR_GROQ_KEY_HERE" # Optional, for Groq models.
OPENROUTER_API_KEY="YOUR_OPENROUTER_KEY_HERE" # Optional, for OpenRouter models.
AZURE_OPENAI_API_KEY="your_azure_key_here" # Optional, for Azure OpenAI models (requires endpoint in .taskmaster/config.json).
OLLAMA_API_KEY="your_ollama_api_key_here" # Optional: For remote Ollama servers that require authentication.
GITHUB_API_KEY="your_github_api_key_here" # Optional: For GitHub import/export features. Format: ghp_... or github_pat_...

263
.gitea/workflows/ci.yml Normal file
View File

@@ -0,0 +1,263 @@
name: CI/CD
on:
push:
branches: [main]
pull_request:
branches: [main]
env:
GITEA_REGISTRY: 10.0.0.194:3012
GITEA_PUBLIC_URL: https://mysources.co.uk
GITEA_OWNER: michal
# ============================================================
# Required Gitea secrets:
# PACKAGES_TOKEN -- Gitea API token (packages + registry)
# ============================================================
jobs:
# -- CI checks (run in parallel on every push/PR) ----------
lint:
runs-on: ubuntu-latest
defaults:
run:
working-directory: bastion
steps:
- uses: actions/checkout@v4
- uses: pnpm/action-setup@v4
- uses: actions/setup-node@v4
with:
node-version: 22
- run: pnpm install --frozen-lockfile
- name: Lint
run: pnpm lint || echo "::warning::Lint has errors -- not blocking CI yet"
typecheck:
runs-on: ubuntu-latest
defaults:
run:
working-directory: bastion
steps:
- uses: actions/checkout@v4
- uses: pnpm/action-setup@v4
- uses: actions/setup-node@v4
with:
node-version: 22
- run: pnpm install --frozen-lockfile
- name: Typecheck
run: pnpm typecheck
test:
runs-on: ubuntu-latest
defaults:
run:
working-directory: bastion
steps:
- uses: actions/checkout@v4
- uses: pnpm/action-setup@v4
- uses: actions/setup-node@v4
with:
node-version: 22
- run: pnpm install --frozen-lockfile
- name: Build (needed by completions check)
run: pnpm build
- name: Run tests
run: pnpm test:run
# -- Build & package (both architectures) -------------------
build:
runs-on: ubuntu-latest
needs: [lint, typecheck, test]
defaults:
run:
working-directory: bastion
steps:
- uses: actions/checkout@v4
- uses: pnpm/action-setup@v4
- uses: actions/setup-node@v4
with:
node-version: 22
- name: Install dependencies
run: pnpm install --frozen-lockfile
- name: Build all packages
run: pnpm build
- name: Generate shell completions
run: pnpm completions:generate
- uses: oven-sh/setup-bun@v2
- name: Install nfpm
run: |
curl -sL -o /tmp/nfpm.tar.gz "https://github.com/goreleaser/nfpm/releases/download/v2.45.0/nfpm_2.45.0_Linux_x86_64.tar.gz"
tar xzf /tmp/nfpm.tar.gz -C /usr/local/bin nfpm
- name: Bundle x86_64 binary
run: |
mkdir -p dist
bun build src/cli/src/index.ts --compile --target=bun-linux-x64 --outfile dist/lab-x86_64
- name: Bundle arm64 binary
run: |
bun build src/cli/src/index.ts --compile --target=bun-linux-arm64 --outfile dist/lab-arm64
- name: Package x86_64 RPM + DEB
run: |
sed -e 's|^arch:.*|arch: amd64|' -e 's|src: ./dist/lab$|src: ./dist/lab-x86_64|' nfpm.yaml > /tmp/nfpm-x86_64.yaml
nfpm pkg --config /tmp/nfpm-x86_64.yaml --packager rpm --target dist/
nfpm pkg --config /tmp/nfpm-x86_64.yaml --packager deb --target dist/
- name: Package arm64 RPM + DEB
run: |
sed -e 's|^arch:.*|arch: arm64|' -e 's|src: ./dist/lab$|src: ./dist/lab-arm64|' nfpm.yaml > /tmp/nfpm-arm64.yaml
nfpm pkg --config /tmp/nfpm-arm64.yaml --packager rpm --target dist/
nfpm pkg --config /tmp/nfpm-arm64.yaml --packager deb --target dist/
- name: Upload RPM artifacts
uses: actions/upload-artifact@v3
with:
name: rpm-packages
path: bastion/dist/lab-*.rpm
retention-days: 7
- name: Upload DEB artifacts
uses: actions/upload-artifact@v3
with:
name: deb-packages
path: bastion/dist/lab*.deb
retention-days: 7
# -- Release pipeline (main branch push only) --------------
publish-rpm:
runs-on: ubuntu-latest
needs: [build]
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
defaults:
run:
working-directory: bastion
steps:
- uses: actions/checkout@v4
- name: Download RPM artifacts
uses: actions/download-artifact@v3
with:
name: rpm-packages
path: bastion/dist/
- name: Install rpm tools
run: sudo apt-get update && sudo apt-get install -y rpm
- name: Publish RPMs to Gitea
env:
GITEA_TOKEN: ${{ secrets.PACKAGES_TOKEN }}
GITEA_URL: http://${{ env.GITEA_REGISTRY }}
GITEA_OWNER: ${{ env.GITEA_OWNER }}
GITEA_REPO: lab
run: |
for RPM_FILE in dist/lab-*.rpm; do
[ -f "$RPM_FILE" ] || continue
RPM_VERSION=$(rpm -qp --queryformat '%{VERSION}-%{RELEASE}' "$RPM_FILE")
RPM_ARCH=$(rpm -qp --queryformat '%{ARCH}' "$RPM_FILE")
echo "Publishing $RPM_FILE (version $RPM_VERSION, arch $RPM_ARCH)..."
# Delete existing version if present
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" \
-H "Authorization: token ${GITEA_TOKEN}" \
"${GITEA_URL}/api/v1/packages/${GITEA_OWNER}/rpm/lab/${RPM_VERSION}")
if [ "$HTTP_CODE" = "200" ]; then
echo "Version exists, replacing..."
curl -s -o /dev/null -X DELETE \
-H "Authorization: token ${GITEA_TOKEN}" \
"${GITEA_URL}/api/v1/packages/${GITEA_OWNER}/rpm/lab/${RPM_VERSION}"
fi
# Upload
curl --fail -X PUT \
-H "Authorization: token ${GITEA_TOKEN}" \
--upload-file "$RPM_FILE" \
"${GITEA_URL}/api/packages/${GITEA_OWNER}/rpm/upload"
echo "Published $RPM_FILE successfully!"
done
# Link package to repo
source scripts/link-package.sh
link_package "rpm" "lab"
publish-deb:
runs-on: ubuntu-latest
needs: [build]
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
defaults:
run:
working-directory: bastion
steps:
- uses: actions/checkout@v4
- name: Download DEB artifacts
uses: actions/download-artifact@v3
with:
name: deb-packages
path: bastion/dist/
- name: Publish DEBs to Gitea
env:
GITEA_TOKEN: ${{ secrets.PACKAGES_TOKEN }}
GITEA_URL: http://${{ env.GITEA_REGISTRY }}
GITEA_OWNER: ${{ env.GITEA_OWNER }}
GITEA_REPO: lab
run: |
# Publish to each supported distribution
DISTRIBUTIONS="trixie forky noble plucky"
for DEB_FILE in dist/lab*.deb; do
[ -f "$DEB_FILE" ] || continue
DEB_VERSION=$(dpkg-deb --field "$DEB_FILE" Version)
DEB_ARCH=$(dpkg-deb --field "$DEB_FILE" Architecture)
echo "Publishing $DEB_FILE (version $DEB_VERSION, arch $DEB_ARCH)..."
for DIST in $DISTRIBUTIONS; do
echo " -> $DIST..."
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" \
-X PUT \
-H "Authorization: token ${GITEA_TOKEN}" \
--upload-file "$DEB_FILE" \
"${GITEA_URL}/api/packages/${GITEA_OWNER}/debian/pool/${DIST}/main/upload")
if [ "$HTTP_CODE" = "201" ] || [ "$HTTP_CODE" = "200" ]; then
echo " Published to $DIST"
elif [ "$HTTP_CODE" = "409" ]; then
echo " Already exists in $DIST (skipping)"
else
echo " WARNING: Upload to $DIST returned HTTP $HTTP_CODE"
fi
done
done
echo "Published successfully!"
# Link package to repo
source scripts/link-package.sh
link_package "debian" "lab"

25
.gitignore vendored Normal file
View File

@@ -0,0 +1,25 @@
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
dev-debug.log
# Dependency directories
node_modules/
# Environment variables
.env
# Editor directories and files
.idea
.vscode
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?
# OS specific
.DS_Store

12
.mcp.json Normal file
View File

@@ -0,0 +1,12 @@
{
"mcpServers": {
"labctl": {
"command": "mcpctl",
"args": [
"mcp",
"-p",
"labctl"
]
}
}
}

1
.taskmaster/.env Normal file
View File

@@ -0,0 +1 @@
PERPLEXITY_API_KEY=dummy

44
.taskmaster/config.json Normal file
View File

@@ -0,0 +1,44 @@
{
"models": {
"main": {
"provider": "claude-code",
"modelId": "opus",
"maxTokens": 32000,
"temperature": 0.2
},
"research": {
"provider": "claude-code",
"modelId": "opus",
"maxTokens": 32000,
"temperature": 0.2
},
"fallback": {
"provider": "claude-code",
"modelId": "sonnet",
"maxTokens": 64000,
"temperature": 0.2
}
},
"global": {
"logLevel": "info",
"debug": false,
"defaultNumTasks": 10,
"defaultSubtasks": 5,
"defaultPriority": "medium",
"projectName": "Task Master",
"ollamaBaseURL": "http://localhost:11434/api",
"bedrockBaseURL": "https://bedrock.us-east-1.amazonaws.com",
"responseLanguage": "English",
"enableCodebaseAnalysis": true,
"enableProxy": false,
"anonymousTelemetry": true,
"userId": "1234567890"
},
"claudeCode": {},
"codexCli": {},
"grokCli": {
"timeout": 120000,
"workingDirectory": null,
"defaultModel": "grok-4-latest"
}
}

452
.taskmaster/docs/prd.md Normal file
View File

@@ -0,0 +1,452 @@
# labctl — Infrastructure Management Platform
## Product Requirements Document
## 1. Overview
labctl is a unified infrastructure management platform for bare-metal servers, Kubernetes clusters, and cloud resources. It replaces Puppet with a modern, TypeScript-native system using Pulumi for infrastructure as code.
### 1.1 Core Principles
- **Single CLI** (`labctl`) for all infrastructure operations
- **mTLS everywhere** — built-in Certificate Authority, no SSH key management
- **RBAC from day one** — deny by default, audit everything
- **Multi-cloud** — bare metal now, AWS later, extensible to any cloud
- **Test infrastructure like code** — ephemeral environments, smoke tests, security tests
- **Pulumi over Helm** — TypeScript charts, typed, testable, no YAML templating
### 1.2 Current State (completed)
- PXE bastion for bare-metal provisioning (discover, install, reprovision)
- CLI with subcommands: `labctl init bastion`, `labctl provision`
- LVM partitioning with reprovision data preservation (/home, /srv, /var/lib/longhorn, /var/lib/rancher)
- Worker role (k3s agent + Longhorn) and infra role (k3s server + etcd)
- 32 unit tests, VM smoke tests verified on real hardware
- Multi-arch builds (x86_64 + arm64), RPM/DEB packaging, Gitea CI/CD
- labd scaffold with CockroachDB Prisma schema (Server, Agent, User, Role, Permission, AuditLog, JoinToken, Cluster, PulumiRun)
### 1.3 Hardware
- labmaster (puppet.ad.itaz.eu / 78:55:36:08:35:14): MinisForum SER9, AMD Ryzen 7 255, 16 cores, 27GB RAM, 1TB NVMe, infra role
- Future: additional bare-metal worker nodes, AWS EC2 instances
## 2. Architecture
### 2.1 Components
```
labctl CLI → labd (master) → lab-agent (on every server)
CockroachDB
```
**labctl** — CLI binary installed on developer workstations. Compiled with bun to standalone binary. Distributed as RPM/DEB/binary.
**labd** — Master daemon running as k8s Deployment on labmaster's k3s cluster. Stateless (all state in CockroachDB). Multiple instances behind k8s Service for HA. Manages: CA, RBAC, agent registry, Pulumi executor, kubectl proxy, app deployments, log relay.
**lab-agent** — Lightweight daemon on every managed machine. Connects to labd via mTLS WebSocket. Handles: heartbeat, command execution, log streaming, module application. Compiled to standalone binary with bun. Installed via systemd service.
**CockroachDB** — Distributed SQL database. PostgreSQL wire-compatible (Prisma works unchanged). Single node to start, multi-node for HA. Stores: server state, RBAC, audit logs, certificates, kubeconfigs (encrypted), Pulumi state.
**Bastion** — PXE provisioning server. Runs as k8s pod with hostNetwork (needs DHCP/TFTP). Managed by labd as an "app". Multiple bastions for multiple sites.
### 2.2 Network Architecture
**Cilium** as k8s CNI (replacing default flannel):
- eBPF-based pod networking
- Built-in WireGuard encryption between nodes
- Network policies (ties into RBAC)
- Hubble for observability
- Future: Cluster Mesh for multi-site transparent networking
No Tailscale dependency — Cilium handles node-to-node encryption. Agents connect to labd over standard TCP/TLS.
### 2.3 Authentication
**mTLS with built-in Certificate Authority:**
1. labd generates root CA on first start (stored encrypted in CockroachDB)
2. Agents enroll with join token → receive signed certificate
3. CLI users authenticate with client certificates (or SSH key-based initial auth)
4. All communication authenticated via mutual TLS
5. Certificate rotation and revocation supported
**Join tokens:**
- One-time tokens: for individual bare-metal servers (generated during PXE provision, embedded in kickstart)
- Reusable tokens: for autoscaling groups (AWS ASG instances share a token)
- Tokens can be revoked, have optional expiry
### 2.4 RBAC Model
Inspired by mcpctl's RBAC (src/mcpd/src/services/, middleware/auth). Hierarchical permissions:
```
action:cloud:environment:server
Examples:
read:*:*:* — read everything
exec:baremetal:lab:* — exec on any lab bare-metal server
kubectl:*:*:* — kubectl proxy on any cluster
*:baremetal:lab:puppet — full access to puppet server only
manage:*:*:* — manage apps, clusters, tokens
admin:*:*:* — full admin (create users, roles)
```
**Resources:** servers, environments, clouds, modules, roles, users, clusters, apps, pulumi-stacks
**Actions:** read, exec, apply, destroy, manage, admin, kubectl
**Deny rules:** explicit deny overrides any allow (like AWS IAM)
Prisma models: Role, Permission (allow/deny), UserRole binding.
### 2.5 Database
**CockroachDB** chosen over PostgreSQL and Cassandra:
- PostgreSQL wire-compatible — Prisma works, mcpctl patterns reusable
- Multi-master replication — any node accepts reads AND writes
- Strong consistency (not eventual like Cassandra)
- Survives node failures (3 nodes = 1 failure, 5 nodes = 2)
- Auto-rebalancing when adding nodes
- Start single-node, scale to multi-node with zero code changes (just add nodes)
**Schema (already scaffolded in Prisma):**
- Server — managed machines (hostname, mac, cloud, env, role, labels, status)
- Agent — connected agents (cert, enrollment, last seen)
- User — platform users (username, cert fingerprint)
- Role — RBAC roles with permissions
- Permission — allow/deny rules (action:cloud:env:server)
- UserRole — user-to-role bindings
- JoinToken — enrollment tokens (one-time, reusable, revocable)
- AuditLog — every action logged (user, session, action, resource, result, duration)
- PulumiRun — infrastructure-as-code execution records
- Cluster — managed k8s clusters (kubeconfig encrypted)
## 3. CLI Command Reference
### 3.1 Bastion (PXE Provisioning) — IMPLEMENTED
```bash
sudo labctl init bastion standalone start [--foreground] [--port 8080]
sudo labctl init bastion standalone stop
labctl init bastion standalone status
```
### 3.2 Provisioning — IMPLEMENTED
```bash
labctl provision list
labctl provision install <mac> <hostname> --role worker|infra
labctl provision reprovision <mac> <hostname> --role worker|infra
labctl provision forget <mac>
```
### 3.3 Server Management — TO BUILD
```bash
labctl get servers [--env NAME] [--cloud NAME] [--label KEY=VALUE]
labctl describe server/<name>
```
### 3.4 Remote Execution — TO BUILD
```bash
labctl exec server/<name> -- <command>
labctl exec server/<name> -it -- bash # interactive TTY
labctl exec server/<name> --timeout 30s -- cmd
```
### 3.5 Kubernetes Proxy — TO BUILD
```bash
labctl kubectl --cluster <name> <kubectl-args>
labctl clusters add <name> --kubeconfig <path>
labctl clusters list
labctl clusters remove <name>
```
### 3.6 Logs — TO BUILD
```bash
# Server logs (journalctl passthrough, no DB in hot path)
labctl logs server/<name> # all journal
labctl logs server/<name> -f # follow (live WebSocket relay)
labctl logs server/<name> -n 100 # last 100 lines
labctl logs server/<name> -u k3s # specific unit
labctl logs server/<name> -u sshd --since "1h ago"
labctl logs server/<name> -k # kernel
labctl logs server/<name> -p err # errors only
labctl logs server/<name> --file /var/log/nginx/error.log
# App logs (k8s pod logs)
labctl logs app/<name> [-f] [--container NAME]
# Pulumi execution logs
labctl logs pulumi/<run-id> [-f]
# Bastion logs
labctl logs bastion/<env> [--mac MAC]
# Agent daemon logs
labctl logs agent/<server>
# Audit logs (from CockroachDB)
labctl logs audit [--user NAME] [--action ACTION] [--since TIME]
labctl logs audit/<user-date-sessionid> # specific session
```
Log architecture: agent runs journalctl/tail with user-provided flags, streams stdout over WebSocket to labd, labd relays to CLI. No database in the hot path. Future: Grafana Loki integration for cold storage.
### 3.7 Apps (Pulumi Charts, replacing Helm) — TO BUILD
```bash
labctl apps list
labctl apps install <name> [--set key=value] [-f values.yaml]
labctl apps status <name>
labctl apps upgrade <name>
labctl apps history <name>
labctl apps rollback <name> <version>
labctl apps uninstall <name>
```
### 3.8 Infrastructure as Code — TO BUILD
```bash
labctl apply -f <file.ts> --env <env>
labctl plan -f <file.ts> --env <env>
labctl destroy -f <file.ts> --env <env>
```
### 3.9 RBAC — TO BUILD
```bash
labctl get roles
labctl get users
labctl create role <name> --allow "action:cloud:env:server"
labctl create role <name> --deny "destroy:*:*:*"
labctl bind role <role> --user <user>
labctl unbind role <role> --user <user>
labctl get permissions
```
### 3.10 Environments and Clouds — TO BUILD
```bash
labctl get environments
labctl get clouds
labctl create environment <name> --cloud <cloud>
```
## 4. Partition Layout
### Worker Role
```
/boot/efi 600MB EFI
/boot 3GB ext4
── LVM VG: labvg ──
swap 27GB
/ 33GB xfs
/var 100GB xfs
/var/log 10GB xfs
/home 10GB xfs ← preserved on reprovision
/srv 20GB xfs ← preserved on reprovision
/var/lib/longhorn rest xfs ← preserved (Longhorn PVC storage)
/tmp tmpfs 4GB
```
### Infra Role
```
/boot/efi 600MB EFI
/boot 3GB ext4
── LVM VG: labvg ──
swap 27GB
/ 33GB xfs
/var 100GB xfs
/var/log 10GB xfs
/home 10GB xfs ← preserved on reprovision
/srv 20GB xfs ← preserved on reprovision
/var/lib/rancher 20GB xfs ← preserved (k3s etcd data)
/tmp tmpfs 4GB
```
## 5. Module System
Configuration modules define desired state. Three tiers:
1. **Core modules** (this repo, `modules/`): k3s-server, k3s-agent, labd, lab-agent, bastion
2. **Official modules** (separate repos): monitoring, cilium, DNS
3. **Custom modules** (user repos): pulled by git URL
Module structure:
```
module.yaml # name, version, targets (roles/labels), deps
src/index.ts # entry point
src/install.ts # installation logic
src/configure.ts # configuration logic
src/health.ts # health check
tests/ # vitest tests (mandatory)
```
## 6. Testing Strategy
### 6.1 Testing Pyramid
```
Unit Tests → pure logic, milliseconds, every commit
Smoke Tests → containers (podman-compose), minutes, every commit
Integration Tests → VMs (libvirt), 10-15 min, PRs
E2E Tests → real hardware/cloud, 20-30 min, pre-release
```
### 6.2 Smoke Test Stack (podman-compose)
```yaml
services:
cockroachdb:
image: cockroachdb/cockroach:latest-v24.3
labd:
build: .
depends_on: [cockroachdb]
agent-1:
build: ./agent
depends_on: [labd]
agent-2:
build: ./agent
depends_on: [labd]
```
Tests: agent enrollment, certificate issuance, heartbeat, exec, logs, RBAC deny/allow.
### 6.3 Security Tests (RBAC)
- Deny exec without permission
- Deny cross-environment access
- Deny rules override allow rules
- Cannot escalate own permissions
- Audit logs all denied attempts
- Certificate-based auth cannot be spoofed
- Join tokens cannot be reused (one-time)
- Expired tokens rejected
### 6.4 Ephemeral Test Environments
```bash
labctl test smoke # podman-compose
labctl test integration # libvirt VMs
labctl env create pr-123 --cloud containers # CI ephemeral
labctl env create pr-123 --cloud aws # cloud ephemeral (future)
```
### 6.5 Health Gates for Deployment
Before promoting to production, ALL must pass:
- labd API responds
- Expected number of agents connected
- k3s nodes Ready
- Certificates valid (>30 days)
- RBAC smoke test passes
- No error logs in last 5 minutes
## 7. Cloud/Environment Model
```
Cloud: baremetal
└── Environment: lab
├── Server: labmaster.ad.itaz.eu (infra, labels={k3s=server})
└── Server: ser9.ad.itaz.eu (worker, labels={k3s=agent})
Cloud: aws (future)
└── Environment: production
├── Server: i-abc123 (from ASG web-servers)
└── Server: i-def456 (from ASG web-servers)
```
Each bastion creates an environment under baremetal cloud. AWS autoscaling groups create environments under aws cloud.
## 8. App Model (Pulumi Charts)
Each app is a Pulumi TypeScript program:
```
app.yaml # name, version, inputs schema, required permissions
src/index.ts # Pulumi program
values.yaml # defaults
tests/ # vitest tests
```
First apps to build:
- bastion — PXE provisioning (wrap existing code)
- labd — master daemon (self-deployment)
- cockroachdb — database
- cilium — CNI
## 9. Implementation Phases
### Phase 1: Foundation (PARTIALLY DONE)
- [x] PXE bastion (discover, install, reprovision)
- [x] CLI structure (labctl init/provision)
- [x] labd scaffold (Fastify + CockroachDB/Prisma schema)
- [x] Multi-arch builds, packaging, CI/CD
- [ ] Certificate Authority in labd
- [ ] lab-agent skeleton (connect, heartbeat, enrollment)
- [ ] Agent enrollment via join tokens
- [ ] RBAC engine
- [ ] labctl exec (remote execution)
- [ ] labctl logs (resource-scoped streaming)
- [ ] labctl get servers (with filters)
- [ ] Smoke test stack (podman-compose)
### Phase 2: Deployment
- [ ] Reprovision labmaster as labmaster.ad.itaz.eu
- [ ] Deploy k3s with Cilium CNI
- [ ] Deploy CockroachDB on k3s
- [ ] Deploy labd on k3s
- [ ] Deploy bastion as managed app
- [ ] Auto-enroll agents during PXE provision
### Phase 3: Infrastructure as Code
- [ ] Module system
- [ ] Pulumi charts (replacing Helm)
- [ ] labctl apps install/upgrade/rollback
- [ ] labctl apply -f (Pulumi execution)
- [ ] kubectl proxy (audited)
- [ ] Kubeconfig store (encrypted)
### Phase 4: Multi-Cloud
- [ ] AWS provider (Pulumi)
- [ ] Reusable join tokens for ASGs
- [ ] Cilium Cluster Mesh
- [ ] Ephemeral test environments
- [ ] Grafana Loki for cold logs
## 10. Technology Stack
| Component | Technology | Notes |
|-----------|-----------|-------|
| Language | TypeScript (ESM) | Same for CLI, daemon, agents, IaC |
| CLI | Commander.js | Matches mcpctl patterns |
| HTTP Server | Fastify + WebSocket | labd and bastion |
| Database | CockroachDB | PostgreSQL compatible, Prisma ORM |
| ORM | Prisma | Reuse mcpctl patterns |
| IaC | Pulumi (TypeScript) | Replaces Helm and Puppet |
| k8s CNI | Cilium | eBPF, WireGuard, network policies |
| Auth | mTLS (built-in CA) | Certificate-based, no SSH keys |
| Packaging | nfpm (RPM/DEB) | bun compile for standalone binary |
| Containers | Podman + podman-compose | No Docker dependency |
| CI/CD | Gitea Actions | Self-hosted on mysources.co.uk |
| Testing | Vitest | Unit + smoke + integration |
| Registry | Gitea packages | RPM, DEB, container images |
## 11. Lessons from mcpctl
The mcpctl project (../mcpctl/) established patterns reused here:
**Project structure:** pnpm monorepo with workspace packages (shared, cli, daemon). Each package has own package.json, tsconfig.json, vitest.config.ts.
**CLI patterns:** Commander.js with factory functions (createXxxCommand). Global options (--project → --env/--cloud). Resource CRUD (get, describe, delete, create, apply).
**Server patterns:** Fastify with route registration functions. Services layer with repository pattern. Middleware for auth. Health endpoints.
**Database:** Prisma ORM with PostgreSQL (now CockroachDB, wire-compatible). Migration-first schema. Seed data for initial setup.
**RBAC:** Role-based with permission strings. Middleware checks on every request. Audit logging in middleware.
**Testing:** Vitest with separate configs for unit vs smoke. Smoke tests with real database and services. Security tests for RBAC.
**CI/CD:** Gitea Actions with lint→typecheck→test→build→publish pipeline. nfpm for RPM/DEB. Bun compile for standalone binaries. Podman for container images.
**Deployment:** Docker/Podman compose for dev stack. Portainer API for production deploy (we'll use k3s instead). systemd for local daemons.
**Completions:** Generated from Commander tree. Bash + Fish. --write and --check modes. Included in packages.
**Key learnings applied:**
- Start with proper monorepo structure (not flat scripts)
- Type safety across packages via workspace references
- Test-driven (unit tests before features)
- CI from the start (not retrofitted)
- RBAC and audit from the start (not bolted on)
- Database-first design (schema defines the domain)
## 12. Gitea Registry
**Registry:** mysources.co.uk (self-hosted Gitea at 10.0.0.194)
**Token:** stored at ~/.gitea-token, env var PACKAGES_TOKEN
**Packages:** RPM and DEB published to Gitea packages API
**Container images:** pushed to Gitea container registry
**API pattern:** Same as mcpctl publish scripts (check existing, delete, re-upload, link to repo)

6
.taskmaster/state.json Normal file
View File

@@ -0,0 +1,6 @@
{
"currentTag": "master",
"lastSwitched": "2026-03-18T00:17:54.213Z",
"branchTagMapping": {},
"migrationNoticeShown": true
}

View File

@@ -0,0 +1,180 @@
{
"master": {
"tasks": [
{
"id": 72,
"title": "Expand Prisma Schema with Resource Relationships",
"description": "Add Network, ServerNic, ServerDisk, and ClusterMember models to the Prisma schema. Add bastionId foreign key to Server model to track which bastion owns each server.",
"details": "Edit `bastion/src/labd/prisma/schema.prisma` to add:\n\n1. **Server model changes**:\n - Add `bastionId String?` with relation to Bastion\n - Add `hardwareInfo Json?` for storing raw HardwareInfo\n - Add `os String?` for installed OS\n\n2. **Network model**:\n```prisma\nmodel Network {\n id String @id @default(uuid())\n name String @unique\n cidr String\n vlan Int?\n gateway String?\n domain String?\n dhcpEnabled Boolean @default(false)\n createdAt DateTime @default(now())\n updatedAt DateTime @updatedAt\n \n nics ServerNic[]\n}\n```\n\n3. **ServerNic model**:\n```prisma\nmodel ServerNic {\n id String @id @default(uuid())\n serverId String\n server Server @relation(fields: [serverId], references: [id], onDelete: Cascade)\n networkId String?\n network Network? @relation(fields: [networkId], references: [id])\n mac String\n ip String?\n name String\n state String @default(\"DOWN\")\n \n @@unique([serverId, mac])\n @@index([networkId])\n}\n```\n\n4. **ServerDisk model**:\n```prisma\nmodel ServerDisk {\n id String @id @default(uuid())\n serverId String\n server Server @relation(fields: [serverId], references: [id], onDelete: Cascade)\n name String\n sizeGb Float\n model String?\n \n @@unique([serverId, name])\n}\n```\n\n5. **ClusterMember model**:\n```prisma\nmodel ClusterMember {\n id String @id @default(uuid())\n clusterId String\n cluster Cluster @relation(fields: [clusterId], references: [id], onDelete: Cascade)\n serverId String\n server Server @relation(fields: [serverId], references: [id], onDelete: Cascade)\n role String @default(\"worker\") // control-plane, worker\n joinedAt DateTime @default(now())\n \n @@unique([clusterId, serverId])\n @@index([clusterId])\n @@index([serverId])\n}\n```\n\n6. Update Server model with relations to nics, disks, clusterMemberships, and bastion.\n\nRun `pnpm prisma generate` and `pnpm prisma migrate dev --name add-resource-models`.",
"testStrategy": "1. Run `pnpm prisma validate` to verify schema syntax\n2. Run `pnpm prisma generate` to confirm client generation\n3. Create migration and verify it applies cleanly to local CockroachDB\n4. Write unit tests that create/read/delete each new model\n5. Verify cascade deletes work (deleting Server removes its NICs and Disks)",
"priority": "high",
"dependencies": [],
"status": "pending",
"subtasks": []
},
{
"id": 73,
"title": "Implement State Persistence Service in labd",
"description": "Create a new service in labd that persists bastion state syncs to the Server table in CockroachDB. When bastion-state-sync messages arrive, upsert machines into Server with their hardware info, status, and ownership.",
"details": "Create `bastion/src/labd/src/services/state-persistence.ts`:\n\n```typescript\nimport type { PrismaClient } from \"@prisma/client\";\nimport type { BastionState, HardwareInfo, InstallConfig, InstalledInfo } from \"@lab/shared\";\nimport { logger } from \"./logger.js\";\n\nexport class StatePersistence {\n constructor(private readonly db: PrismaClient) {}\n\n async syncBastionState(bastionId: string, state: BastionState): Promise<void> {\n // Process discovered machines\n for (const [mac, hw] of Object.entries(state.discovered)) {\n await this.upsertDiscoveredServer(bastionId, mac, hw);\n }\n \n // Process queued machines (update status to provisioning)\n for (const [mac, cfg] of Object.entries(state.install_queue)) {\n await this.upsertQueuedServer(bastionId, mac, cfg);\n }\n \n // Process installed machines\n for (const [mac, info] of Object.entries(state.installed)) {\n await this.upsertInstalledServer(bastionId, mac, info);\n }\n }\n\n private async upsertDiscoveredServer(bastionId: string, mac: string, hw: HardwareInfo): Promise<void> {\n const normalized = mac.toLowerCase();\n \n await this.db.server.upsert({\n where: { mac: normalized },\n create: {\n hostname: `unknown-${normalized.replace(/:/g, \"\").slice(-6)}`,\n mac: normalized,\n bastionId,\n status: \"discovered\",\n hardwareInfo: hw as any,\n labels: {\n arch: hw.arch,\n cpu_model: hw.cpu_model,\n cpu_cores: hw.cpu_cores,\n memory_gb: hw.memory_gb,\n },\n },\n update: {\n bastionId,\n status: \"discovered\", // only if not already provisioning/installed\n hardwareInfo: hw as any,\n },\n });\n \n // Sync NICs and Disks\n await this.syncServerHardware(normalized, hw);\n }\n \n private async syncServerHardware(mac: string, hw: HardwareInfo): Promise<void> {\n const server = await this.db.server.findUnique({ where: { mac } });\n if (!server) return;\n \n // Upsert NICs\n for (const nic of hw.nics) {\n await this.db.serverNic.upsert({\n where: { serverId_mac: { serverId: server.id, mac: nic.mac.toLowerCase() } },\n create: { serverId: server.id, mac: nic.mac.toLowerCase(), name: nic.name, state: nic.state },\n update: { name: nic.name, state: nic.state },\n });\n }\n \n // Upsert Disks\n for (const disk of hw.disks) {\n await this.db.serverDisk.upsert({\n where: { serverId_name: { serverId: server.id, name: disk.name } },\n create: { serverId: server.id, name: disk.name, sizeGb: disk.size_gb, model: disk.model },\n update: { sizeGb: disk.size_gb, model: disk.model },\n });\n }\n }\n \n // Similar methods for upsertQueuedServer and upsertInstalledServer...\n}\n```\n\nIntegrate into `server.ts` WebSocket handler by calling `statePersistence.syncBastionState()` when `bastion-state-sync` messages arrive.",
"testStrategy": "1. Unit test StatePersistence with mocked PrismaClient\n2. Integration test: simulate bastion-state-sync message, verify Server rows created\n3. Test idempotency: send same state twice, verify no duplicates\n4. Test status transitions: discovered -> provisioning -> installed\n5. Verify hardware info (NICs, Disks) is correctly persisted",
"priority": "high",
"dependencies": [
72
],
"status": "pending",
"subtasks": []
},
{
"id": 74,
"title": "Add State Loading from labd on Bastion Startup",
"description": "Modify bastion startup to request its persisted state from labd before using the local JSON cache. This ensures bastions restore their state after pod restarts.",
"details": "1. Add new labd API endpoint `GET /api/bastions/:id/state` that returns the aggregated state for a specific bastion from the Server table:\n\n```typescript\n// bastion/src/labd/src/routes/bastions.ts\napp.get<{ Params: { id: string } }>(\"/api/bastions/:id/state\", async (request, reply) => {\n const { id } = request.params;\n \n const servers = await db.server.findMany({\n where: { bastionId: id },\n include: { nics: true, disks: true },\n });\n \n // Transform back to BastionState format\n const state: BastionState = { discovered: {}, install_queue: {}, installed: {} };\n for (const server of servers) {\n const mac = server.mac;\n if (!mac) continue;\n \n switch (server.status) {\n case \"discovered\":\n state.discovered[mac] = transformToHardwareInfo(server);\n break;\n case \"provisioning\":\n state.install_queue[mac] = transformToInstallConfig(server);\n break;\n case \"installed\":\n state.installed[mac] = transformToInstalledInfo(server);\n break;\n }\n }\n \n return reply.send(state);\n});\n```\n\n2. Modify `BastionConnection.connect()` in `labd-connection.ts` to fetch state after enrollment:\n\n```typescript\nprivate async loadRemoteState(): Promise<BastionState | null> {\n if (!this.bastionId || !this.config.labdUrl) return null;\n try {\n const resp = await fetch(`${this.config.labdUrl}/api/bastions/${this.bastionId}/state`);\n if (resp.ok) return await resp.json();\n } catch { /* fall back to local */ }\n return null;\n}\n```\n\n3. In bastion `main.ts`, after establishing labd connection, merge remote state with local state (remote takes precedence for installed machines, local wins for in-progress installs).",
"testStrategy": "1. Integration test: start bastion, let it persist state, restart bastion, verify state restored\n2. Test merge logic: local has in-progress install, remote has discovered - verify install preserved\n3. Test offline mode: labd unavailable, bastion falls back to local JSON\n4. Test fresh start: no local state, no remote state - bastion starts with empty state",
"priority": "high",
"dependencies": [
73
],
"status": "pending",
"subtasks": []
},
{
"id": 75,
"title": "Fix Bastion --dir Environment Variable Default",
"description": "Fix the bug where CLI's --dir default overrides the BASTION_DIR environment variable. The CLI option should use the env var as its default.",
"details": "Edit `bastion/src/cli/src/commands/serve.ts`:\n\n```typescript\n// Before (line 14):\n.option(\"--dir <dir>\", \"Bastion data directory\", \"/tmp/lab-bastion\")\n\n// After:\n.option(\n \"--dir <dir>\",\n \"Bastion data directory\",\n process.env[\"BASTION_DIR\"] ?? \"/tmp/lab-bastion\"\n)\n```\n\nThis ensures:\n1. If `BASTION_DIR` env var is set (e.g., in k8s deployment), it's used as default\n2. Explicit `--dir` flag still overrides both\n3. Falls back to `/tmp/lab-bastion` if neither is set\n\nAlso update the k8s deployment manifest `bastion/deploy/k3s/deployment.yaml` to ensure `BASTION_DIR=/data` is properly set.",
"testStrategy": "1. Unit test: verify option default reads from process.env\n2. Integration test: set BASTION_DIR, run labctl without --dir, verify correct dir used\n3. Integration test: set BASTION_DIR, run labctl with --dir /custom, verify /custom used\n4. Test no env var: verify default /tmp/lab-bastion used",
"priority": "high",
"dependencies": [],
"status": "pending",
"subtasks": []
},
{
"id": 76,
"title": "Create Resource Type Registry with Aliases",
"description": "Create a centralized resource type registry that maps resource names, plurals, and short aliases to canonical types. This enables kubectl-style resource resolution.",
"details": "Create `bastion/src/cli/src/utils/resources.ts`:\n\n```typescript\nexport interface ResourceDefinition {\n kind: string; // Canonical type: \"Server\", \"Cluster\", etc.\n singular: string; // \"server\"\n plural: string; // \"servers\"\n aliases: string[]; // [\"srv\"]\n apiPath: string; // \"/api/servers\"\n columns: TableColumn[]; // Default columns for 'get' output\n wideColumns?: TableColumn[]; // Extra columns for -o wide\n}\n\nconst RESOURCE_DEFINITIONS: ResourceDefinition[] = [\n {\n kind: \"Server\",\n singular: \"server\",\n plural: \"servers\",\n aliases: [\"srv\"],\n apiPath: \"/api/servers\",\n columns: serverColumns,\n wideColumns: serverWideColumns,\n },\n {\n kind: \"Cluster\",\n singular: \"cluster\",\n plural: \"clusters\",\n aliases: [],\n apiPath: \"/api/clusters\",\n columns: clusterColumns,\n },\n {\n kind: \"Network\",\n singular: \"network\",\n plural: \"networks\",\n aliases: [\"net\"],\n apiPath: \"/api/networks\",\n columns: networkColumns,\n },\n // ... bastion, role, user, token, audit\n];\n\nconst aliasMap = new Map<string, ResourceDefinition>();\nfor (const def of RESOURCE_DEFINITIONS) {\n aliasMap.set(def.singular, def);\n aliasMap.set(def.plural, def);\n for (const alias of def.aliases) {\n aliasMap.set(alias, def);\n }\n}\n\nexport function resolveResourceType(input: string): ResourceDefinition {\n const normalized = input.toLowerCase();\n const def = aliasMap.get(normalized);\n if (!def) {\n const valid = RESOURCE_DEFINITIONS.map(d => d.plural).join(\", \");\n throw new Error(`Unknown resource type \"${input}\". Valid types: ${valid}`);\n }\n return def;\n}\n\nexport function resolveResourceIdentifier(input: string): {\n type: ResourceDefinition;\n name?: string;\n} {\n // Handle \"server/labmaster\" or just \"servers\"\n const parts = input.split(\"/\");\n const type = resolveResourceType(parts[0]);\n const name = parts.length > 1 ? parts.slice(1).join(\"/\") : undefined;\n return { type, name };\n}\n```\n\nUpdate `bastion/src/cli/src/utils/resource.ts` to use the new registry.",
"testStrategy": "1. Unit test resolveResourceType with all aliases: server, servers, srv -> Server\n2. Test unknown resource type throws descriptive error\n3. Test case insensitivity: SERVER, Server, server all resolve correctly\n4. Test resolveResourceIdentifier parses \"server/labmaster\" correctly",
"priority": "high",
"dependencies": [],
"status": "pending",
"subtasks": []
},
{
"id": 77,
"title": "Implement 'labctl get' Command",
"description": "Create the core 'labctl get <resource> [name]' command that lists resources with filtering and output format support. This is the foundation of the kubectl-style CLI.",
"details": "Create `bastion/src/cli/src/commands/get.ts`:\n\n```typescript\nimport { Command } from \"commander\";\nimport { resolveResourceType, type ResourceDefinition } from \"../utils/resources.js\";\nimport { getLabdClient } from \"../api/config.js\";\nimport { formatOutput, type TableColumn } from \"../utils/table.js\";\n\nexport function registerGetCommand(program: Command): void {\n program\n .command(\"get <resource> [name]\")\n .description(\"List resources or get a specific resource by name\")\n .option(\"--status <status>\", \"Filter by status\")\n .option(\"--role <role>\", \"Filter by role (servers only)\")\n .option(\"--cloud <cloud>\", \"Filter by cloud\")\n .option(\"--env <environment>\", \"Filter by environment\")\n .option(\"-l, --label <label>\", \"Filter by label (key=value)\")\n .option(\"-A, --all-namespaces\", \"List across all clouds/environments\")\n .action(async (resource: string, name: string | undefined, opts) => {\n const config = program.opts()[\"_config\"];\n const resourceDef = resolveResourceType(resource);\n const client = getLabdClient();\n \n try {\n let data: unknown[];\n \n if (name) {\n // Get specific resource - could be name, ID, or MAC\n const item = await client.getResource(resourceDef, name);\n data = item ? [item] : [];\n } else {\n // List with filters\n data = await client.listResources(resourceDef, {\n status: opts.status,\n role: opts.role,\n cloud: opts.allNamespaces ? undefined : (opts.cloud ?? config.defaultCloud),\n environment: opts.allNamespaces ? undefined : (opts.env ?? config.defaultEnvironment),\n label: opts.label,\n });\n }\n \n if (data.length === 0) {\n console.log(`No ${resourceDef.plural} found.`);\n return;\n }\n \n const columns = config.outputFormat === \"wide\" && resourceDef.wideColumns\n ? [...resourceDef.columns, ...resourceDef.wideColumns]\n : resourceDef.columns;\n \n formatOutput(data, config.outputFormat, columns);\n } catch (err) {\n console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);\n process.exit(1);\n }\n });\n}\n```\n\nAdd to `index.ts`: `registerGetCommand(program);`\n\nExtend LabdClient with generic resource methods.",
"testStrategy": "1. Integration test: `labctl get servers` returns list from labd\n2. Test filtering: `labctl get servers --status discovered` only shows discovered\n3. Test name lookup: `labctl get server labmaster` returns single server\n4. Test MAC lookup: `labctl get server 38:05:25:33:e2:e4` resolves by MAC\n5. Test output formats: -o json, -o yaml, -o wide produce correct output\n6. Test unknown resource: `labctl get foo` shows helpful error",
"priority": "high",
"dependencies": [
76
],
"status": "pending",
"subtasks": []
},
{
"id": 78,
"title": "Implement 'labctl describe' Command",
"description": "Create the 'labctl describe <resource> <name>' command that shows detailed information about a resource including relationships, hardware info, and history.",
"details": "Create `bastion/src/cli/src/commands/describe.ts`:\n\n```typescript\nimport { Command } from \"commander\";\nimport { resolveResourceType } from \"../utils/resources.js\";\nimport { getLabdClient } from \"../api/config.js\";\n\nconst BOLD = \"\\x1b[1m\";\nconst DIM = \"\\x1b[2m\";\nconst RESET = \"\\x1b[0m\";\n\ninterface DescribeSection {\n title: string;\n fields: Array<[string, string | undefined]>;\n}\n\nfunction printDescribe(name: string, sections: DescribeSection[]): void {\n console.log(`${BOLD}Name:${RESET} ${name}`);\n for (const section of sections) {\n console.log(`\\n${BOLD}${section.title}:${RESET}`);\n for (const [key, value] of section.fields) {\n if (value !== undefined) {\n console.log(` ${DIM}${key}:${RESET} ${value}`);\n }\n }\n }\n}\n\nexport function registerDescribeCommand(program: Command): void {\n program\n .command(\"describe <resource> <name>\")\n .description(\"Show detailed information about a resource\")\n .action(async (resource: string, name: string) => {\n const resourceDef = resolveResourceType(resource);\n const client = getLabdClient();\n \n try {\n const item = await client.describeResource(resourceDef, name);\n if (!item) {\n console.error(`${resourceDef.singular} \"${name}\" not found.`);\n process.exit(1);\n }\n \n // Resource-specific formatting\n switch (resourceDef.kind) {\n case \"Server\":\n printServerDescription(item);\n break;\n case \"Cluster\":\n printClusterDescription(item);\n break;\n default:\n console.log(JSON.stringify(item, null, 2));\n }\n } catch (err) {\n console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);\n process.exit(1);\n }\n });\n}\n\nfunction printServerDescription(server: any): void {\n const sections: DescribeSection[] = [\n {\n title: \"Metadata\",\n fields: [\n [\"ID\", server.id],\n [\"Cloud\", server.cloud],\n [\"Environment\", server.environment],\n [\"Role\", server.role],\n [\"Status\", server.status],\n [\"Created\", server.createdAt],\n [\"Last Seen\", server.lastHeartbeat],\n ],\n },\n {\n title: \"Hardware\",\n fields: [\n [\"MAC\", server.mac],\n [\"IP\", server.ip],\n [\"Architecture\", server.hardwareInfo?.arch],\n [\"CPU\", server.hardwareInfo?.cpu_model],\n [\"Cores\", String(server.hardwareInfo?.cpu_cores)],\n [\"Memory\", `${server.hardwareInfo?.memory_gb}GB`],\n [\"Product\", server.hardwareInfo?.product],\n ],\n },\n ];\n \n if (server.nics?.length > 0) {\n sections.push({\n title: \"Network Interfaces\",\n fields: server.nics.map((n: any) => [n.name, `${n.mac} ${n.ip ?? \"\"} (${n.state})`]),\n });\n }\n \n if (server.disks?.length > 0) {\n sections.push({\n title: \"Disks\",\n fields: server.disks.map((d: any) => [d.name, `${d.sizeGb}GB ${d.model ?? \"\"}`]),\n });\n }\n \n if (server.clusterMemberships?.length > 0) {\n sections.push({\n title: \"Cluster Membership\",\n fields: server.clusterMemberships.map((m: any) => [m.cluster.name, m.role]),\n });\n }\n \n printDescribe(server.hostname, sections);\n}\n```",
"testStrategy": "1. Integration test: `labctl describe server labmaster` shows full details\n2. Test hardware info display: CPU, memory, disks, NICs all shown\n3. Test cluster membership: server in cluster shows membership section\n4. Test not found: `labctl describe server nonexistent` shows helpful error\n5. Test different resource types: describe cluster, network, bastion",
"priority": "medium",
"dependencies": [
77
],
"status": "pending",
"subtasks": []
},
{
"id": 79,
"title": "Implement 'labctl create/delete' Commands",
"description": "Create the 'labctl create <resource>' and 'labctl delete <resource> <name>' commands for creating and removing resources like networks, clusters, and tokens.",
"details": "Create `bastion/src/cli/src/commands/create.ts`:\n\n```typescript\nimport { Command } from \"commander\";\nimport { resolveResourceType } from \"../utils/resources.js\";\nimport { getLabdClient } from \"../api/config.js\";\n\nexport function registerCreateCommand(program: Command): void {\n const create = program\n .command(\"create <resource>\")\n .description(\"Create a resource\");\n \n // labctl create network --name lab --cidr 192.168.8.0/24\n create\n .command(\"network\")\n .description(\"Create a network\")\n .requiredOption(\"--name <name>\", \"Network name\")\n .requiredOption(\"--cidr <cidr>\", \"Network CIDR (e.g., 192.168.8.0/24)\")\n .option(\"--gateway <gateway>\", \"Gateway IP\")\n .option(\"--vlan <vlan>\", \"VLAN ID\", parseInt)\n .option(\"--domain <domain>\", \"DNS domain\")\n .option(\"--dhcp\", \"Enable DHCP\")\n .action(async (opts) => {\n const client = getLabdClient();\n try {\n const network = await client.createNetwork({\n name: opts.name,\n cidr: opts.cidr,\n gateway: opts.gateway,\n vlan: opts.vlan,\n domain: opts.domain,\n dhcpEnabled: opts.dhcp ?? false,\n });\n console.log(`network/${network.name} created`);\n } catch (err) {\n console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);\n process.exit(1);\n }\n });\n \n // labctl create token --label \"worker enrollment\" --type reusable\n create\n .command(\"token\")\n .description(\"Create a join token\")\n .option(\"--label <label>\", \"Token label/description\")\n .option(\"--type <type>\", \"Token type: one-time or reusable\", \"one-time\")\n .option(\"--expires <duration>\", \"Expiration (e.g., 24h, 7d)\")\n .action(async (opts) => {\n const client = getLabdClient();\n try {\n const token = await client.createToken(opts);\n console.log(`Token created: ${token.token}`);\n if (opts.label) console.log(`Label: ${opts.label}`);\n if (token.expiresAt) console.log(`Expires: ${token.expiresAt}`);\n } catch (err) {\n console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);\n process.exit(1);\n }\n });\n}\n```\n\nCreate `bastion/src/cli/src/commands/delete.ts`:\n\n```typescript\nexport function registerDeleteCommand(program: Command): void {\n program\n .command(\"delete <resource> <name>\")\n .description(\"Delete a resource\")\n .option(\"--force\", \"Skip confirmation\")\n .action(async (resource: string, name: string, opts) => {\n const resourceDef = resolveResourceType(resource);\n const client = getLabdClient();\n \n if (!opts.force) {\n const { confirm } = await import(\"../utils/prompts.js\");\n const yes = await confirm(`Delete ${resourceDef.singular} \"${name}\"?`);\n if (!yes) {\n console.log(\"Cancelled.\");\n return;\n }\n }\n \n try {\n await client.deleteResource(resourceDef, name);\n console.log(`${resourceDef.singular}/${name} deleted`);\n } catch (err) {\n console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);\n process.exit(1);\n }\n });\n}\n```",
"testStrategy": "1. Integration test: `labctl create network` creates network in DB\n2. Test validation: missing required flags shows helpful error\n3. Test token creation: token returned is valid UUID, stored in DB\n4. Test delete with confirmation: prompts user, respects --force\n5. Test delete cascade: deleting server removes NICs, disks\n6. Test delete protection: cannot delete bastion with connected servers",
"priority": "medium",
"dependencies": [
77
],
"status": "pending",
"subtasks": []
},
{
"id": 80,
"title": "Refactor Provision Commands to kubectl-style",
"description": "Refactor existing provision commands to use kubectl-style syntax: 'labctl provision <server>' instead of 'labctl provision install <mac>'.",
"details": "The new command structure should be:\n- `labctl provision <server> --os fedora-43 --role worker` (queue install)\n- `labctl reprovision <server>` (reinstall)\n- `labctl forget <server>` (remove from tracking)\n\nModify `bastion/src/cli/src/commands/install.ts` → rename to `provision.ts`:\n\n```typescript\nexport function registerProvisionCommand(program: Command): void {\n program\n .command(\"provision <server>\")\n .description(\"Queue a server for OS installation\")\n .requiredOption(\"--os <os>\", \"Operating system\", \"fedora-43\")\n .requiredOption(\"--role <role>\", \"Server role\", \"worker\")\n .option(\"--disk <disk>\", \"Target disk (auto-detected if not specified)\")\n .option(\"--hostname <hostname>\", \"Override hostname\")\n .action(async (server: string, opts) => {\n const client = getLabdClient();\n \n // Resolve server: could be hostname, MAC, or ID\n const resolved = await client.resolveServer(server);\n if (!resolved) {\n console.error(`Server \"${server}\" not found.`);\n console.error(\"Tip: Use 'labctl get servers' to see available servers.\");\n process.exit(1);\n }\n \n if (resolved.status === \"installed\") {\n console.error(`Server \"${resolved.hostname}\" is already installed.`);\n console.error(\"Tip: Use 'labctl reprovision' to reinstall.\");\n process.exit(1);\n }\n \n try {\n await client.provisionServer(resolved.mac, {\n hostname: opts.hostname ?? resolved.hostname,\n os: opts.os,\n role: opts.role,\n disk: opts.disk,\n });\n console.log(`Server ${resolved.hostname} queued for ${opts.os} installation as ${opts.role}.`);\n } catch (err) {\n console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);\n process.exit(1);\n }\n });\n}\n```\n\nSimilarly update reprovision.ts and forget.ts to accept server name/MAC/ID.\n\nUpdate index.ts to register commands at top level instead of under 'provision' subcommand.",
"testStrategy": "1. Test server resolution: provision by hostname, MAC, or UUID all work\n2. Test already installed: provisioning installed server shows reprovision hint\n3. Test unknown server: helpful error message with tip\n4. Test reprovision: reinstalls installed server\n5. Test forget: removes server from all state categories\n6. Backward compat: verify 'labctl provision list' still works (deprecation warning)",
"priority": "medium",
"dependencies": [
77
],
"status": "pending",
"subtasks": []
},
{
"id": 81,
"title": "Implement Server and Resource API Endpoints in labd",
"description": "Add REST API endpoints in labd for full resource CRUD operations: networks, clusters, tokens. Extend servers endpoint with filters and relationship includes.",
"details": "Create/extend labd route files:\n\n1. **Extend servers.ts**:\n```typescript\n// GET /api/servers - with extended filters and includes\napp.get(\"/api/servers\", async (request, reply) => {\n const { status, role, cloud, environment, label, include } = request.query;\n \n const where = {};\n if (status) where.status = status;\n if (role) where.role = role;\n if (cloud) where.cloud = cloud;\n if (environment) where.environment = environment;\n if (label) where.labels = { path: [labelKey], equals: labelValue };\n \n const servers = await db.server.findMany({\n where,\n include: {\n nics: include?.includes(\"nics\"),\n disks: include?.includes(\"disks\"),\n clusterMemberships: include?.includes(\"clusters\") ? { include: { cluster: true } } : false,\n bastion: include?.includes(\"bastion\"),\n },\n });\n return servers;\n});\n\n// GET /api/servers/:id - by ID, hostname, or MAC\napp.get(\"/api/servers/:identifier\", async (request, reply) => {\n const { identifier } = request.params;\n \n // Try UUID first\n let server = await db.server.findUnique({ where: { id: identifier }, include: fullInclude });\n // Try hostname\n if (!server) server = await db.server.findUnique({ where: { hostname: identifier }, include: fullInclude });\n // Try MAC\n if (!server) server = await db.server.findUnique({ where: { mac: identifier.toLowerCase() }, include: fullInclude });\n \n if (!server) return reply.code(404).send({ error: \"Server not found\" });\n return server;\n});\n```\n\n2. **Create networks.ts**:\n```typescript\n// GET /api/networks, POST /api/networks, DELETE /api/networks/:id\nexport function registerNetworkRoutes(app: FastifyInstance, db: DbClient): void {\n app.get(\"/api/networks\", async () => db.network.findMany());\n \n app.post(\"/api/networks\", async (request, reply) => {\n const { name, cidr, gateway, vlan, domain, dhcpEnabled } = request.body;\n // Validate CIDR format\n const network = await db.network.create({ data: { name, cidr, gateway, vlan, domain, dhcpEnabled } });\n return reply.code(201).send(network);\n });\n \n app.delete(\"/api/networks/:id\", async (request, reply) => {\n await db.network.delete({ where: { id: request.params.id } });\n return reply.code(204).send();\n });\n}\n```\n\n3. **Create clusters.ts**:\n```typescript\n// Similar CRUD for clusters with member management\napp.get(\"/api/clusters/:id/members\", ...);\napp.post(\"/api/clusters/:id/members\", ...);\napp.delete(\"/api/clusters/:id/members/:serverId\", ...);\n```",
"testStrategy": "1. Integration test all CRUD endpoints with HTTP client\n2. Test server resolution: by id, hostname, and MAC all return same server\n3. Test include parameter: nics, disks, clusters included when requested\n4. Test validation: invalid CIDR rejected, duplicate names rejected\n5. Test cascade: delete network with NICs fails or cascades appropriately",
"priority": "medium",
"dependencies": [
72,
73
],
"status": "pending",
"subtasks": []
},
{
"id": 82,
"title": "Implement RBAC Permission Checks in CLI",
"description": "Wire RBAC permission checks into CLI commands. Check user permissions before executing operations using the existing Permission model.",
"details": "1. Create `bastion/src/cli/src/middleware/rbac.ts`:\n\n```typescript\nimport { getLabdClient } from \"../api/config.js\";\n\nexport interface PermissionContext {\n action: string; // read, exec, apply, destroy, manage, admin\n cloud?: string;\n environment?: string;\n server?: string;\n}\n\nexport async function checkPermission(ctx: PermissionContext): Promise<boolean> {\n const client = getLabdClient();\n try {\n const result = await client.checkPermission(ctx);\n return result.allowed;\n } catch {\n // If can't reach labd, fail open for local operations\n return true;\n }\n}\n\nexport async function requirePermission(ctx: PermissionContext): Promise<void> {\n const allowed = await checkPermission(ctx);\n if (!allowed) {\n throw new Error(\n `Permission denied: ${ctx.action} on ${ctx.server ?? \"*\"}@${ctx.cloud ?? \"*\"}/${ctx.environment ?? \"*\"}`\n );\n }\n}\n```\n\n2. Add labd endpoint `POST /api/auth/check-permission`:\n```typescript\napp.post(\"/api/auth/check-permission\", async (request, reply) => {\n const user = await authenticateRequest(request); // from cert or token\n const { action, cloud, environment, server } = request.body;\n \n const permissions = await db.permission.findMany({\n where: {\n role: { userBindings: { some: { userId: user.id } } },\n },\n });\n \n const allowed = permissions.some(p => \n matchesPattern(p.action, action) &&\n matchesPattern(p.cloud, cloud ?? \"*\") &&\n matchesPattern(p.environment, environment ?? \"*\") &&\n matchesPattern(p.server, server ?? \"*\")\n );\n \n return { allowed };\n});\n```\n\n3. Integrate into commands:\n```typescript\n// In provision command\nawait requirePermission({ action: \"apply\", cloud, environment, server: resolved.hostname });\n\n// In delete command\nawait requirePermission({ action: \"destroy\", cloud, environment, server: name });\n\n// In get command (filter results)\nconst servers = await client.listServers(filters);\nconst visible = await filterByPermission(servers, \"read\");\n```",
"testStrategy": "1. Unit test permission matching logic with wildcards\n2. Test admin role: has access to all resources\n3. Test operator role: can read/exec but not destroy\n4. Test viewer role: can only read, provision denied\n5. Test scope matching: permission for cloud=aws doesn't grant access to cloud=baremetal\n6. Test denied action is audit-logged",
"priority": "medium",
"dependencies": [
77,
81
],
"status": "pending",
"subtasks": []
},
{
"id": 83,
"title": "Implement Audit Logging for Resource Operations",
"description": "Log all resource mutations to the AuditLog table. Include user, action, resource type/name, result, and source IP.",
"details": "1. Create `bastion/src/labd/src/services/audit.ts`:\n\n```typescript\nimport type { PrismaClient } from \"@prisma/client\";\n\nexport interface AuditEntry {\n userId?: string;\n serverId?: string;\n sessionId?: string;\n action: string; // create, update, delete, provision, exec, rbac-denied\n resourceType: string; // server, cluster, network, token, etc.\n resourceName: string;\n args?: string; // sanitized args (no secrets)\n result: \"success\" | \"denied\" | \"error\";\n durationMs?: number;\n sourceIp?: string;\n}\n\nexport class AuditService {\n constructor(private readonly db: PrismaClient) {}\n \n async log(entry: AuditEntry): Promise<void> {\n await this.db.auditLog.create({\n data: {\n userId: entry.userId,\n serverId: entry.serverId,\n sessionId: entry.sessionId,\n action: entry.action,\n resourceType: entry.resourceType,\n resourceName: entry.resourceName,\n args: entry.args,\n result: entry.result,\n durationMs: entry.durationMs,\n sourceIp: entry.sourceIp,\n },\n });\n }\n \n async query(filters: {\n userId?: string;\n action?: string;\n resourceType?: string;\n since?: Date;\n limit?: number;\n }): Promise<AuditEntry[]> {\n return this.db.auditLog.findMany({\n where: {\n userId: filters.userId,\n action: filters.action,\n resourceType: filters.resourceType,\n timestamp: filters.since ? { gte: filters.since } : undefined,\n },\n orderBy: { timestamp: \"desc\" },\n take: filters.limit ?? 100,\n });\n }\n}\n```\n\n2. Add Fastify hook to wrap route handlers:\n```typescript\napp.addHook(\"onResponse\", async (request, reply) => {\n // Log mutations (POST, PUT, DELETE)\n if ([\"POST\", \"PUT\", \"DELETE\"].includes(request.method)) {\n const path = request.url;\n const resourceMatch = path.match(/\\/api\\/(\\w+)(?:\\/([^/]+))?/);\n if (resourceMatch) {\n await auditService.log({\n action: methodToAction(request.method),\n resourceType: resourceMatch[1],\n resourceName: resourceMatch[2] ?? \"\",\n result: reply.statusCode < 400 ? \"success\" : \"error\",\n sourceIp: request.ip,\n });\n }\n }\n});\n```\n\n3. Add `labctl get audit` command to view audit logs.",
"testStrategy": "1. Integration test: create network, verify audit log entry created\n2. Test RBAC denial is logged with result=denied\n3. Test sensitive data sanitization: tokens/passwords not in args\n4. Test query filters: by user, action, resourceType, time range\n5. Test `labctl get audit` displays recent entries correctly",
"priority": "medium",
"dependencies": [
81,
82
],
"status": "pending",
"subtasks": []
},
{
"id": 84,
"title": "Update CLI Entry Point and Help Text",
"description": "Update the CLI entry point to register all new commands and update help text to reflect the kubectl-style interface. Add deprecation warnings for old command structure.",
"details": "Update `bastion/src/cli/src/index.ts`:\n\n```typescript\nimport { Command } from \"commander\";\nimport { APP_VERSION } from \"@lab/shared\";\nimport { loadConfig } from \"./config/index.js\";\n\n// New kubectl-style commands\nimport { registerGetCommand } from \"./commands/get.js\";\nimport { registerDescribeCommand } from \"./commands/describe.js\";\nimport { registerCreateCommand } from \"./commands/create.js\";\nimport { registerDeleteCommand } from \"./commands/delete.js\";\nimport { registerApplyCommand } from \"./commands/apply.js\";\nimport { registerEditCommand } from \"./commands/edit.js\";\n\n// Action commands\nimport { registerProvisionCommand } from \"./commands/provision.js\";\nimport { registerReprovisionCommand } from \"./commands/reprovision.js\";\nimport { registerForgetCommand } from \"./commands/forget.js\";\n\n// Bastion management\nimport { registerBastionCommand } from \"./commands/bastion.js\"; // start/stop/status\n\n// App management (unchanged)\nimport { registerAppCommand } from \"./commands/app.js\";\n\n// Utility\nimport { registerConfigCommand } from \"./commands/config.js\";\nimport { registerLoginCommand } from \"./commands/login.js\";\nimport { registerDoctorCommand } from \"./commands/doctor.js\";\n\nexport function createProgram(): Command {\n const program = new Command();\n \n program\n .name(\"labctl\")\n .description(\"Lab infrastructure management CLI\")\n .version(APP_VERSION);\n \n // Global options\n program\n .option(\"-o, --output <format>\", \"output format (table, json, yaml, wide)\", \"table\")\n .option(\"--server <url>\", \"override labd server URL\")\n .option(\"--env <name>\", \"override default environment\")\n .option(\"--cloud <name>\", \"override default cloud\")\n .option(\"--debug\", \"enable debug output\")\n .option(\"--no-color\", \"disable colored output\");\n \n // Core CRUD commands\n registerGetCommand(program); // labctl get <resource> [name]\n registerDescribeCommand(program); // labctl describe <resource> <name>\n registerCreateCommand(program); // labctl create <resource>\n registerDeleteCommand(program); // labctl delete <resource> <name>\n registerApplyCommand(program); // labctl apply -f <file>\n registerEditCommand(program); // labctl edit <resource> <name>\n \n // Provisioning actions\n registerProvisionCommand(program); // labctl provision <server>\n registerReprovisionCommand(program);// labctl reprovision <server>\n registerForgetCommand(program); // labctl forget <server>\n \n // Bastion management\n registerBastionCommand(program); // labctl bastion start|stop|status\n \n // App management\n registerAppCommand(program); // labctl app install|health k3s\n \n // Utility\n registerConfigCommand(program);\n registerLoginCommand(program);\n registerDoctorCommand(program);\n \n // Legacy compatibility with deprecation warnings\n registerLegacyCommands(program);\n \n return program;\n}\n\nfunction registerLegacyCommands(program: Command): void {\n // labctl provision list -> labctl get servers (with warning)\n program\n .command(\"provision\")\n .command(\"list\")\n .action(() => {\n console.warn(\"DEPRECATED: Use 'labctl get servers' instead.\");\n // Delegate to get servers\n });\n}\n```\n\nUpdate shell completions in `scripts/generate-completions.ts` for new command structure.",
"testStrategy": "1. Test --help shows all new commands with descriptions\n2. Test resource type help: `labctl get --help` lists valid resources\n3. Test deprecated commands show warning but still work\n4. Test shell completions generated for new commands\n5. Test global options: -o, --server, --env, --cloud all work",
"priority": "low",
"dependencies": [
77,
78,
79,
80
],
"status": "pending",
"subtasks": []
}
],
"metadata": {
"created": "2026-03-26T04:26:49.813Z",
"updated": "2026-03-26T04:26:49.813Z",
"description": "Tasks for master context"
}
}
}

View File

@@ -0,0 +1,47 @@
<context>
# Overview
[Provide a high-level overview of your product here. Explain what problem it solves, who it's for, and why it's valuable.]
# Core Features
[List and describe the main features of your product. For each feature, include:
- What it does
- Why it's important
- How it works at a high level]
# User Experience
[Describe the user journey and experience. Include:
- User personas
- Key user flows
- UI/UX considerations]
</context>
<PRD>
# Technical Architecture
[Outline the technical implementation details:
- System components
- Data models
- APIs and integrations
- Infrastructure requirements]
# Development Roadmap
[Break down the development process into phases:
- MVP requirements
- Future enhancements
- Do not think about timelines whatsoever -- all that matters is scope and detailing exactly what needs to be build in each phase so it can later be cut up into tasks]
# Logical Dependency Chain
[Define the logical order of development:
- Which features need to be built first (foundation)
- Getting as quickly as possible to something usable/visible front end that works
- Properly pacing and scoping each feature so it is atomic but can also be built upon and improved as development approaches]
# Risks and Mitigations
[Identify potential risks and how they'll be addressed:
- Technical challenges
- Figuring out the MVP that we can build upon
- Resource constraints]
# Appendix
[Include any additional information:
- Research findings
- Technical specifications]
</PRD>

View File

@@ -0,0 +1,511 @@
<rpg-method>
# Repository Planning Graph (RPG) Method - PRD Template
This template teaches you (AI or human) how to create structured, dependency-aware PRDs using the RPG methodology from Microsoft Research. The key insight: separate WHAT (functional) from HOW (structural), then connect them with explicit dependencies.
## Core Principles
1. **Dual-Semantics**: Think functional (capabilities) AND structural (code organization) separately, then map them
2. **Explicit Dependencies**: Never assume - always state what depends on what
3. **Topological Order**: Build foundation first, then layers on top
4. **Progressive Refinement**: Start broad, refine iteratively
## How to Use This Template
- Follow the instructions in each `<instruction>` block
- Look at `<example>` blocks to see good vs bad patterns
- Fill in the content sections with your project details
- The AI reading this will learn the RPG method by following along
- Task Master will parse the resulting PRD into dependency-aware tasks
## Recommended Tools for Creating PRDs
When using this template to **create** a PRD (not parse it), use **code-context-aware AI assistants** for best results:
**Why?** The AI needs to understand your existing codebase to make good architectural decisions about modules, dependencies, and integration points.
**Recommended tools:**
- **Claude Code** (claude-code CLI) - Best for structured reasoning and large contexts
- **Cursor/Windsurf** - IDE integration with full codebase context
- **Gemini CLI** (gemini-cli) - Massive context window for large codebases
- **Codex/Grok CLI** - Strong code generation with context awareness
**Note:** Once your PRD is created, `task-master parse-prd` works with any configured AI model - it just needs to read the PRD text itself, not your codebase.
</rpg-method>
---
<overview>
<instruction>
Start with the problem, not the solution. Be specific about:
- What pain point exists?
- Who experiences it?
- Why existing solutions don't work?
- What success looks like (measurable outcomes)?
Keep this section focused - don't jump into implementation details yet.
</instruction>
## Problem Statement
[Describe the core problem. Be concrete about user pain points.]
## Target Users
[Define personas, their workflows, and what they're trying to achieve.]
## Success Metrics
[Quantifiable outcomes. Examples: "80% task completion via autopilot", "< 5% manual intervention rate"]
</overview>
---
<functional-decomposition>
<instruction>
Now think about CAPABILITIES (what the system DOES), not code structure yet.
Step 1: Identify high-level capability domains
- Think: "What major things does this system do?"
- Examples: Data Management, Core Processing, Presentation Layer
Step 2: For each capability, enumerate specific features
- Use explore-exploit strategy:
* Exploit: What features are REQUIRED for core value?
* Explore: What features make this domain COMPLETE?
Step 3: For each feature, define:
- Description: What it does in one sentence
- Inputs: What data/context it needs
- Outputs: What it produces/returns
- Behavior: Key logic or transformations
<example type="good">
Capability: Data Validation
Feature: Schema validation
- Description: Validate JSON payloads against defined schemas
- Inputs: JSON object, schema definition
- Outputs: Validation result (pass/fail) + error details
- Behavior: Iterate fields, check types, enforce constraints
Feature: Business rule validation
- Description: Apply domain-specific validation rules
- Inputs: Validated data object, rule set
- Outputs: Boolean + list of violated rules
- Behavior: Execute rules sequentially, short-circuit on failure
</example>
<example type="bad">
Capability: validation.js
(Problem: This is a FILE, not a CAPABILITY. Mixing structure into functional thinking.)
Capability: Validation
Feature: Make sure data is good
(Problem: Too vague. No inputs/outputs. Not actionable.)
</example>
</instruction>
## Capability Tree
### Capability: [Name]
[Brief description of what this capability domain covers]
#### Feature: [Name]
- **Description**: [One sentence]
- **Inputs**: [What it needs]
- **Outputs**: [What it produces]
- **Behavior**: [Key logic]
#### Feature: [Name]
- **Description**:
- **Inputs**:
- **Outputs**:
- **Behavior**:
### Capability: [Name]
...
</functional-decomposition>
---
<structural-decomposition>
<instruction>
NOW think about code organization. Map capabilities to actual file/folder structure.
Rules:
1. Each capability maps to a module (folder or file)
2. Features within a capability map to functions/classes
3. Use clear module boundaries - each module has ONE responsibility
4. Define what each module exports (public interface)
The goal: Create a clear mapping between "what it does" (functional) and "where it lives" (structural).
<example type="good">
Capability: Data Validation
→ Maps to: src/validation/
├── schema-validator.js (Schema validation feature)
├── rule-validator.js (Business rule validation feature)
└── index.js (Public exports)
Exports:
- validateSchema(data, schema)
- validateRules(data, rules)
</example>
<example type="bad">
Capability: Data Validation
→ Maps to: src/utils.js
(Problem: "utils" is not a clear module boundary. Where do I find validation logic?)
Capability: Data Validation
→ Maps to: src/validation/everything.js
(Problem: One giant file. Features should map to separate files for maintainability.)
</example>
</instruction>
## Repository Structure
```
project-root/
├── src/
│ ├── [module-name]/ # Maps to: [Capability Name]
│ │ ├── [file].js # Maps to: [Feature Name]
│ │ └── index.js # Public exports
│ └── [module-name]/
├── tests/
└── docs/
```
## Module Definitions
### Module: [Name]
- **Maps to capability**: [Capability from functional decomposition]
- **Responsibility**: [Single clear purpose]
- **File structure**:
```
module-name/
├── feature1.js
├── feature2.js
└── index.js
```
- **Exports**:
- `functionName()` - [what it does]
- `ClassName` - [what it does]
</structural-decomposition>
---
<dependency-graph>
<instruction>
This is THE CRITICAL SECTION for Task Master parsing.
Define explicit dependencies between modules. This creates the topological order for task execution.
Rules:
1. List modules in dependency order (foundation first)
2. For each module, state what it depends on
3. Foundation modules should have NO dependencies
4. Every non-foundation module should depend on at least one other module
5. Think: "What must EXIST before I can build this module?"
<example type="good">
Foundation Layer (no dependencies):
- error-handling: No dependencies
- config-manager: No dependencies
- base-types: No dependencies
Data Layer:
- schema-validator: Depends on [base-types, error-handling]
- data-ingestion: Depends on [schema-validator, config-manager]
Core Layer:
- algorithm-engine: Depends on [base-types, error-handling]
- pipeline-orchestrator: Depends on [algorithm-engine, data-ingestion]
</example>
<example type="bad">
- validation: Depends on API
- API: Depends on validation
(Problem: Circular dependency. This will cause build/runtime issues.)
- user-auth: Depends on everything
(Problem: Too many dependencies. Should be more focused.)
</example>
</instruction>
## Dependency Chain
### Foundation Layer (Phase 0)
No dependencies - these are built first.
- **[Module Name]**: [What it provides]
- **[Module Name]**: [What it provides]
### [Layer Name] (Phase 1)
- **[Module Name]**: Depends on [[module-from-phase-0], [module-from-phase-0]]
- **[Module Name]**: Depends on [[module-from-phase-0]]
### [Layer Name] (Phase 2)
- **[Module Name]**: Depends on [[module-from-phase-1], [module-from-foundation]]
[Continue building up layers...]
</dependency-graph>
---
<implementation-roadmap>
<instruction>
Turn the dependency graph into concrete development phases.
Each phase should:
1. Have clear entry criteria (what must exist before starting)
2. Contain tasks that can be parallelized (no inter-dependencies within phase)
3. Have clear exit criteria (how do we know phase is complete?)
4. Build toward something USABLE (not just infrastructure)
Phase ordering follows topological sort of dependency graph.
<example type="good">
Phase 0: Foundation
Entry: Clean repository
Tasks:
- Implement error handling utilities
- Create base type definitions
- Setup configuration system
Exit: Other modules can import foundation without errors
Phase 1: Data Layer
Entry: Phase 0 complete
Tasks:
- Implement schema validator (uses: base types, error handling)
- Build data ingestion pipeline (uses: validator, config)
Exit: End-to-end data flow from input to validated output
</example>
<example type="bad">
Phase 1: Build Everything
Tasks:
- API
- Database
- UI
- Tests
(Problem: No clear focus. Too broad. Dependencies not considered.)
</example>
</instruction>
## Development Phases
### Phase 0: [Foundation Name]
**Goal**: [What foundational capability this establishes]
**Entry Criteria**: [What must be true before starting]
**Tasks**:
- [ ] [Task name] (depends on: [none or list])
- Acceptance criteria: [How we know it's done]
- Test strategy: [What tests prove it works]
- [ ] [Task name] (depends on: [none or list])
**Exit Criteria**: [Observable outcome that proves phase complete]
**Delivers**: [What can users/developers do after this phase?]
---
### Phase 1: [Layer Name]
**Goal**:
**Entry Criteria**: Phase 0 complete
**Tasks**:
- [ ] [Task name] (depends on: [[tasks-from-phase-0]])
- [ ] [Task name] (depends on: [[tasks-from-phase-0]])
**Exit Criteria**:
**Delivers**:
---
[Continue with more phases...]
</implementation-roadmap>
---
<test-strategy>
<instruction>
Define how testing will be integrated throughout development (TDD approach).
Specify:
1. Test pyramid ratios (unit vs integration vs e2e)
2. Coverage requirements
3. Critical test scenarios
4. Test generation guidelines for Surgical Test Generator
This section guides the AI when generating tests during the RED phase of TDD.
<example type="good">
Critical Test Scenarios for Data Validation module:
- Happy path: Valid data passes all checks
- Edge cases: Empty strings, null values, boundary numbers
- Error cases: Invalid types, missing required fields
- Integration: Validator works with ingestion pipeline
</example>
</instruction>
## Test Pyramid
```
/\
/E2E\ ← [X]% (End-to-end, slow, comprehensive)
/------\
/Integration\ ← [Y]% (Module interactions)
/------------\
/ Unit Tests \ ← [Z]% (Fast, isolated, deterministic)
/----------------\
```
## Coverage Requirements
- Line coverage: [X]% minimum
- Branch coverage: [X]% minimum
- Function coverage: [X]% minimum
- Statement coverage: [X]% minimum
## Critical Test Scenarios
### [Module/Feature Name]
**Happy path**:
- [Scenario description]
- Expected: [What should happen]
**Edge cases**:
- [Scenario description]
- Expected: [What should happen]
**Error cases**:
- [Scenario description]
- Expected: [How system handles failure]
**Integration points**:
- [What interactions to test]
- Expected: [End-to-end behavior]
## Test Generation Guidelines
[Specific instructions for Surgical Test Generator about what to focus on, what patterns to follow, project-specific test conventions]
</test-strategy>
---
<architecture>
<instruction>
Describe technical architecture, data models, and key design decisions.
Keep this section AFTER functional/structural decomposition - implementation details come after understanding structure.
</instruction>
## System Components
[Major architectural pieces and their responsibilities]
## Data Models
[Core data structures, schemas, database design]
## Technology Stack
[Languages, frameworks, key libraries]
**Decision: [Technology/Pattern]**
- **Rationale**: [Why chosen]
- **Trade-offs**: [What we're giving up]
- **Alternatives considered**: [What else we looked at]
</architecture>
---
<risks>
<instruction>
Identify risks that could derail development and how to mitigate them.
Categories:
- Technical risks (complexity, unknowns)
- Dependency risks (blocking issues)
- Scope risks (creep, underestimation)
</instruction>
## Technical Risks
**Risk**: [Description]
- **Impact**: [High/Medium/Low - effect on project]
- **Likelihood**: [High/Medium/Low]
- **Mitigation**: [How to address]
- **Fallback**: [Plan B if mitigation fails]
## Dependency Risks
[External dependencies, blocking issues]
## Scope Risks
[Scope creep, underestimation, unclear requirements]
</risks>
---
<appendix>
## References
[Papers, documentation, similar systems]
## Glossary
[Domain-specific terms]
## Open Questions
[Things to resolve during development]
</appendix>
---
<task-master-integration>
# How Task Master Uses This PRD
When you run `task-master parse-prd <file>.txt`, the parser:
1. **Extracts capabilities** → Main tasks
- Each `### Capability:` becomes a top-level task
2. **Extracts features** → Subtasks
- Each `#### Feature:` becomes a subtask under its capability
3. **Parses dependencies** → Task dependencies
- `Depends on: [X, Y]` sets task.dependencies = ["X", "Y"]
4. **Orders by phases** → Task priorities
- Phase 0 tasks = highest priority
- Phase N tasks = lower priority, properly sequenced
5. **Uses test strategy** → Test generation context
- Feeds test scenarios to Surgical Test Generator during implementation
**Result**: A dependency-aware task graph that can be executed in topological order.
## Why RPG Structure Matters
Traditional flat PRDs lead to:
- ❌ Unclear task dependencies
- ❌ Arbitrary task ordering
- ❌ Circular dependencies discovered late
- ❌ Poorly scoped tasks
RPG-structured PRDs provide:
- ✅ Explicit dependency chains
- ✅ Topological execution order
- ✅ Clear module boundaries
- ✅ Validated task graph before implementation
## Tips for Best Results
1. **Spend time on dependency graph** - This is the most valuable section for Task Master
2. **Keep features atomic** - Each feature should be independently testable
3. **Progressive refinement** - Start broad, use `task-master expand` to break down complex tasks
4. **Use research mode** - `task-master parse-prd --research` leverages AI for better task generation
</task-master-integration>

244
STATUS.md Normal file
View File

@@ -0,0 +1,244 @@
# labctl Platform — Implementation Status
## What This Document Is
An honest assessment of what code exists, what works, what is stubbed, and what
hasn't been started — measured against the PRD phases.
---
## Architecture Overview (as built)
```
labctl CLI ──HTTP──▶ bastion (PXE server) ← WORKING
labctl CLI ──HTTP──▶ labd (master daemon) ← PARTIALLY WORKING
├── CockroachDB/Prisma ← SCHEMA DEFINED, NOT DEPLOYED
├── /ws/agent WebSocket ← ACCEPTS CONNECTIONS, DOES NOT ROUTE
└── mTLS CA ← NOT IMPLEMENTED
lab-agent ──WS──▶ labd ← LIBRARY CODE, NO DAEMON BINARY
```
---
## Package Inventory
| Package | Lines of Source | Tests | Status |
|---------|---------------|-------|--------|
| @lab/shared | ~200 | 0 | Complete — types, protocol, errors |
| @lab/bastion | ~800 | 32 | **Production-ready** — PXE discovery, install, reprovision |
| @lab/cli | ~600 | 0 (uses bastion tests) | Complete — all commands implemented |
| @lab/labd | ~500 | 2 | Partial — routes exist, core features stubbed |
| @lab/agent | ~300 | 0 | Library only — no daemon binary |
All 5 packages compile. 32 tests pass.
---
## Phase 1: Foundation
### DONE — Working in production
| Feature | Code | How It Works |
|---------|------|-------------|
| PXE bastion server | `src/bastion/` | Fastify HTTP + dnsmasq DHCP/TFTP. Machines PXE boot, get iPXE script from `/dispatch?mac=XX`, chain to discovery or install kickstart. State persisted to JSON file. |
| Machine discovery | `routes/dispatch.ts`, `templates/discover.ks.ts` | Unknown MACs get a mini-kickstart that boots a RAM-only Fedora, scrapes hardware via `/proc`, `/sys`, `dmidecode`, POSTs to `/api/discover`, then reboots. No disk touch. |
| Machine installation | `routes/api.ts`, `templates/install.ks.ts` | Queue a MAC via `POST /api/install`. Next PXE boot gets a full Kickstart with LVM partitioning (worker: longhorn LV, infra: rancher LV), SSH keys, k3s kernel prereqs, progress callbacks. |
| Reprovision with data preservation | `commands/reprovision.ts`, `install.ks.ts` | `%pre` script detects existing LVM. Reformats `/`, `/var`, `/boot` but preserves `/home`, `/srv`, `/var/lib/longhorn`, `/var/lib/rancher`. |
| CLI: init/provision commands | `src/cli/src/commands/` | `labctl init bastion standalone start/stop/status`, `labctl provision list/install/reprovision/forget`. All talk to bastion HTTP API. |
| CLI: config management | `config/index.ts`, `commands/config.ts` | `labctl config list/get/set/path`. YAML config at `~/.labctl/config.yaml` with env var overrides. |
| labd scaffold | `src/labd/` | Fastify server with health, server listing, token management routes. Prisma schema for all models. Starts with or without database. |
| Prisma schema | `prisma/schema.prisma` | 10 models: Server, Agent, User, Role, Permission, UserRole, JoinToken, AuditLog, PulumiRun, Cluster. CockroachDB provider. |
| Database seeding | `prisma/seed.ts` | Creates admin/viewer/operator roles with proper allow/deny permissions. Idempotent via upsert. |
| Multi-arch builds + packaging | `nfpm.yaml`, `scripts/` | nfpm config for RPM/DEB. Bun compile for standalone binary (102MB labctl in `dist/`). |
| Gitea CI/CD | `.gitea/` (on remote) | Lint → typecheck → test → build → publish pipeline on mysources.co.uk. |
### DONE — Code exists, not yet connected end-to-end
| Feature | Code | What's Real | What's Missing |
|---------|------|------------|----------------|
| lab-agent connection library | `lab-agent/src/services/connection.ts` | `AgentConnection` class: WebSocket to labd, heartbeat (10s), exponential backoff reconnect (1-30s), state machine (disconnected/connecting/connected/reconnecting), handles server-shutdown messages. | **No daemon binary.** This is a library — nothing starts it. No systemd unit. No enrollment flow. |
| lab-agent command executor | `lab-agent/src/services/executor.ts` | `CommandExecutor` class: `spawn()` with timeout handling (SIGTERM then SIGKILL after 5s), stdout/stderr streaming via EventEmitter, stdin writing, signal forwarding. | **Not wired to WebSocket.** The executor and connection don't talk to each other. No message dispatch. |
| Agent registry (labd) | `labd/src/services/agent-registry.ts` | `AgentRegistry`: in-memory Map tracking by serverId and hostname, lifecycle events, heartbeat updates. Singleton exported. | **Not used by /ws/agent handler.** The WebSocket handler in `server.ts` just logs messages — it doesn't call `agentRegistry.register()`. |
| Message router (labd) | `labd/src/services/message-router.ts` | `MessageRouter`: handler registration, pending request tracking with timeouts, streaming support, log subscription, agent cleanup on disconnect. | **Not used.** `server.ts` doesn't call `messageRouter.handleMessage()`. The router exists but is dead code. |
| Token management | `labd/src/routes/auth.ts` | Create, list, revoke join tokens. Validates one-time vs reusable, expiry, revocation. Marks tokens as used. | Token validation works. **But enrollment returns `certificatePem: null`** — no actual certificate is issued. |
| CLI API client | `cli/src/api/client.ts` | `LabdClient` with mTLS support, typed methods for servers/tokens/health/enrollment. | Works for REST endpoints. **No CLI commands use it yet** — existing commands still talk directly to bastion HTTP. |
| CLI WebSocket streaming | `cli/src/api/websocket.ts` | `streamExec()` and `streamLogs()` functions. | **No `labctl exec` or `labctl logs` commands exist.** The streaming code has no consumer. |
| Zod validation | `labd/src/validation/` | Schemas for createToken, enrollment, serverFilters, createRole, permission patterns. Middleware for body/query validation. | **Not applied to routes.** The schemas and middleware exist but no route uses `preHandler: [validateBody(schema)]`. |
| Encryption service | `labd/src/services/encryption.ts` | AES-256-GCM with scrypt key derivation. Encrypt/decrypt roundtrip. Singleton from `CA_ENCRYPTION_KEY` env var. | **Not used anywhere.** No CA key is encrypted, no kubeconfig is stored. |
| Graceful shutdown | `labd/src/services/shutdown.ts` | SIGTERM/SIGINT handlers, agent notification, message router cleanup, DB disconnect, force exit timer. | Works but agent notification is a no-op since no agents are registered (see above). |
| Rate limiting | `labd/src/middleware/rate-limit.ts` | `@fastify/rate-limit`: 100/min global, 10/min for enrollment, 20/min for tokens. | **Wired up in `server.ts`.** This actually works. |
| Health checks | `labd/src/routes/health.ts` | `/healthz`, `/health`, `/health/detailed`, `/health/live`, `/health/ready`. Checks DB latency and agent count. | Works. Returns `agents: { connected: 0 }` since no agents ever register. |
| Error hierarchy | `shared/src/errors/` | `LabError`, `NotFoundError`, `PermissionDeniedError`, `ValidationError`, `AgentNotConnectedError`. | **Not used in routes.** Routes still use inline `reply.code(404).send({error: ...})`. |
| Table formatting | `cli/src/utils/table.ts` | `printTable`, `formatStatus`, `formatRelativeTime`, predefined column sets. | **Not used by existing commands.** `provision list` has its own inline formatting. |
| Resource parsing | `cli/src/utils/resource.ts` | Parse `server/labmaster`, `app/kube-system/nginx` format. | **Not used.** No commands accept `type/name` arguments yet. |
| Doctor command | `cli/src/commands/doctor.ts` | Config, cert, connectivity diagnostics. | Works standalone. |
| Login command | `cli/src/commands/login.ts` | Generates EC keypair, prompts for token, POSTs to `/api/auth/user-enroll`. | **labd has no `/api/auth/user-enroll` endpoint.** Only `/api/auth/enroll` exists (for agents). Login will 404. |
### NOT DONE — Phase 1 items from PRD with no code
| Feature | PRD Description | Status |
|---------|----------------|--------|
| Certificate Authority | Built-in CA in labd. Generate root CA, sign CSRs, revoke certs, rotate. | **Nothing.** No CA code. No X.509 operations. No `@peculiar/x509` dependency. `EncryptionService` exists but it's for data-at-rest, not PKI. |
| RBAC engine | Middleware that checks permissions on every request. Deny overrides allow. | **Nothing.** `auth.ts` middleware is a placeholder. No route checks permissions. Anyone can call any endpoint. |
| Audit logging | Log every action with user, session, action, resource, result, duration. | **Nothing.** `AuditLog` Prisma model exists but nothing writes to it. No audit middleware. |
| `labctl exec` | Remote command execution via labd → agent WebSocket relay. | **Nothing.** No `exec` CLI command. The executor library exists in lab-agent but isn't connected. |
| `labctl logs` | Resource-scoped log streaming (server, app, bastion, audit). | **Nothing.** No `logs` CLI command. |
| `labctl get servers` | List servers from labd with filters. | **Nothing.** No `get` CLI command. The API client has `getServers()` but no command calls it. |
| Smoke test stack | `podman-compose` with CockroachDB + labd + 2 agents, testing enrollment/heartbeat/exec/RBAC. | **Nothing.** `stack/docker-compose.yml` exists but only runs bastion + CockroachDB, not labd or agents. |
| Agent enrollment during PXE | Embed join token in kickstart, agent auto-enrolls on first boot. | **Nothing.** Kickstart installs k3s prereqs but doesn't install or start lab-agent. |
---
## Phase 2: Deployment
**Nothing from Phase 2 has been built.**
| Feature | Status |
|---------|--------|
| Reprovision labmaster as labmaster.ad.itaz.eu | Not done — manual operation |
| Deploy k3s with Cilium CNI | Not done — kickstart only sets up kernel prereqs, leaves a comment "run `curl -sfL https://get.k3s.io`" |
| Deploy CockroachDB on k3s | Not done — `docker-compose.yml` runs it in-memory for dev, no k8s manifests for CRDB |
| Deploy labd on k3s | **K8s manifests exist** (`deploy/k8s/labd/base/`) — Deployment, Service, ConfigMap, HPA, PDB. But no CockroachDB to connect to and no TLS configured. |
| Deploy bastion as managed app | Not done — bastion runs standalone, no Pulumi chart |
| Auto-enroll agents during PXE | Not done — no agent install in kickstart, no token embedding |
---
## Phase 3: Infrastructure as Code
**Nothing from Phase 3 has been built.**
| Feature | Status |
|---------|--------|
| Module system | Not done — no `module.yaml`, no module loader |
| Pulumi charts | Not done — no Pulumi dependency, no chart structure |
| `labctl apps install/upgrade/rollback` | Not done — no `apps` command |
| `labctl apply -f` | Not done — no `apply` command |
| `kubectl proxy` (audited) | Not done — no kubectl proxy |
| Kubeconfig store (encrypted) | `EncryptionService` exists but nothing uses it. `Cluster.kubeconfigEnc` field exists in Prisma but nothing reads/writes it. |
---
## Phase 4: Multi-Cloud
**Nothing from Phase 4 has been built.**
| Feature | Status |
|---------|--------|
| AWS provider | Not done |
| Reusable join tokens for ASGs | Token model supports `reusable` type, but no AWS integration |
| Cilium Cluster Mesh | Not done |
| Ephemeral test environments | Not done |
| Grafana Loki | Not done |
---
## Infrastructure Files
| File | Status |
|------|--------|
| `Dockerfile.labd` | Exists. Multi-stage Alpine build. Would work if you `docker build` it. |
| `Dockerfile.bastion` | Exists. Multi-stage Fedora build. Would work. |
| `.dockerignore` | Exists. |
| `deploy/k8s/labd/base/` | Kustomize manifests for labd (Deployment, Service, ConfigMap, HPA, PDB). Points at a non-existent CockroachDB and has no TLS. |
| `stack/docker-compose.yml` | Runs bastion + CockroachDB for local dev. Works. |
| `nfpm.yaml` | RPM/DEB packaging config. Works with `nfpm pkg`. |
---
## The Disconnection Problem
The core issue is that many services were built in isolation but never wired together:
```
┌─────────────────────────────────────────────────────────┐
│ BUILT BUT NOT CONNECTED │
│ │
│ AgentConnection ──✗──▶ /ws/agent handler │
│ CommandExecutor ──✗──▶ MessageRouter │
│ MessageRouter ──✗──▶ /ws/agent handler │
│ AgentRegistry ──✗──▶ /ws/agent handler │
│ Zod schemas ──✗──▶ Route preHandlers │
│ Error classes ──✗──▶ Route error handling │
│ LabdClient ──✗──▶ CLI commands (get/exec/logs) │
│ Table formatting──✗──▶ CLI commands │
│ Resource parsing──✗──▶ CLI commands │
│ EncryptionService──✗──▶ CA / kubeconfig storage │
│ Login command ──✗──▶ /api/auth/user-enroll (missing) │
│ Audit logging ──✗──▶ Any middleware │
│ RBAC engine ──✗──▶ Any middleware │
└─────────────────────────────────────────────────────────┘
```
---
## What Actually Works End-to-End Today
1. **PXE boot a bare-metal machine:**
```
labctl init bastion standalone start
# Machine PXE boots → discovered automatically
labctl provision list
labctl provision install AA:BB:CC:DD:EE:FF worker-1 --role worker
# Machine reboots → installs Fedora → reports complete
```
2. **Manage bastion lifecycle:**
```
labctl init bastion standalone status
labctl init bastion standalone stop
```
3. **Start labd (without database):**
```
LABD_PORT=3100 tsx src/labd/src/main.ts
# Starts with stub DB, health endpoint works, token/server routes return errors
```
4. **Start labd (with CockroachDB):**
```
docker-compose -f stack/docker-compose.yml up cockroachdb
DATABASE_URL=postgresql://root@localhost:26257/lab tsx src/labd/src/main.ts
# Token creation/listing/revocation works
# Server listing works (empty until agents register)
```
5. **CLI diagnostics:**
```
labctl doctor
labctl config list
labctl version
```
That's it. No agent communication, no remote exec, no log streaming, no RBAC, no certificates.
---
## Recommended Next Steps (to make Phase 1 actually work)
### Priority 1: Wire up the agent connection
1. Update `/ws/agent` handler to use `agentRegistry.register()` and `messageRouter.handleMessage()`
2. Create lab-agent daemon binary that uses `AgentConnection` + `CommandExecutor`
3. Create systemd unit for lab-agent
### Priority 2: Certificate Authority
1. Add `@peculiar/x509` dependency
2. Implement CA service: generate root CA, sign CSRs
3. Wire enrollment route to actually sign and return certificates
4. Store CA key encrypted using `EncryptionService`
### Priority 3: RBAC + Audit
1. Create RBAC middleware that checks `Permission` table
2. Create audit middleware that writes to `AuditLog`
3. Apply both to all routes
### Priority 4: CLI commands for labd
1. `labctl get servers` using `LabdClient.getServers()`
2. `labctl exec server/<name>` using `streamExec()`
3. `labctl logs server/<name>` using `streamLogs()`
### Priority 5: Smoke test stack
1. Update `docker-compose.yml` to include labd + 2 agents
2. Write integration tests for enrollment → heartbeat → exec → logs

View File

@@ -27,6 +27,7 @@ HTTP_PORT="${HTTP_PORT:-8080}"
TIMEZONE="${TIMEZONE:-Europe/London}"
LOCALE="${LOCALE:-en_GB.UTF-8}"
BASTION_DIR="${BASTION_DIR:-/tmp/lab-bastion}"
DOMAIN="${DOMAIN:-ad.itaz.eu}" # internal domain for hostnames
DHCP_MODE="${DHCP_MODE:-proxy}" # proxy (alongside existing DHCP) or full (bastion IS the DHCP server)
DHCP_RANGE_START="${DHCP_RANGE_START:-}" # only for full mode, auto-derived if empty
DHCP_RANGE_END="${DHCP_RANGE_END:-}"
@@ -45,13 +46,19 @@ CMD="${1:-serve}"
case "$CMD" in
install)
[[ $# -ge 3 ]] || { echo "Usage: bastion.sh install <mac> <hostname> [--disk <dev>]"; exit 1; }
[[ $# -ge 3 ]] || { echo "Usage: bastion.sh install <mac> <hostname> [--role worker|infra] [--disk <dev>]"; exit 1; }
MAC="$2"
HOSTNAME="$3"
DISK="${5:-}" # --disk <dev>
PAYLOAD="{\"mac\":\"$MAC\",\"hostname\":\"$HOSTNAME\""
[[ -n "$DISK" ]] && PAYLOAD="$PAYLOAD,\"disk\":\"$DISK\""
PAYLOAD="$PAYLOAD}"
shift 3
DISK="" ROLE="worker"
while [[ $# -gt 0 ]]; do
case "$1" in
--disk) DISK="$2"; shift 2 ;;
--role) ROLE="$2"; shift 2 ;;
*) echo "Unknown option: $1"; exit 1 ;;
esac
done
PAYLOAD=$(python3 -c "import json; print(json.dumps({k:v for k,v in {'mac':'$MAC','hostname':'$HOSTNAME','disk':'$DISK','role':'$ROLE'}.items() if v}))")
RESULT=$(curl -sf -X POST "http://localhost:${HTTP_PORT}/api/install" \
-H "Content-Type: application/json" \
-d "$PAYLOAD" 2>&1) || die "Cannot reach bastion at localhost:${HTTP_PORT}. Is it running?"
@@ -93,16 +100,62 @@ print()
print('\033[1mINSTALLED\033[0m')
if installed:
for mac, info in installed.items():
print(f' {mac:<20} → {info.get(\"hostname\",\"?\")} ({info.get(\"installed_at\",\"?\")})')
ip = info.get('ip', '')
ip_str = f' ip={ip}' if ip else ''
print(f' {mac:<20} → {info.get(\"hostname\",\"?\")} role={info.get(\"role\",\"?\")}{ip_str} ({info.get(\"installed_at\",\"?\")})')
else:
print(' (none)')
print()
" 2>/dev/null || echo "$RESULT"
exit 0
;;
reprovision)
[[ $# -ge 3 ]] || { echo "Usage: bastion.sh reprovision <mac> <hostname> [--role worker|infra] [--disk <dev>]"; exit 1; }
MAC="$2"
HOSTNAME="$3"
shift 3
DISK="" ROLE="worker"
while [[ $# -gt 0 ]]; do
case "$1" in
--disk) DISK="$2"; shift 2 ;;
--role) ROLE="$2"; shift 2 ;;
*) echo "Unknown option: $1"; exit 1 ;;
esac
done
# Queue the install
PAYLOAD=$(python3 -c "import json; print(json.dumps({k:v for k,v in {'mac':'$MAC','hostname':'$HOSTNAME','disk':'$DISK','role':'$ROLE'}.items() if v}))")
RESULT=$(curl -sf -X POST "http://localhost:${HTTP_PORT}/api/install" \
-H "Content-Type: application/json" \
-d "$PAYLOAD" 2>&1) || die "Cannot reach bastion at localhost:${HTTP_PORT}. Is it running?"
echo "$RESULT" | python3 -m json.tool 2>/dev/null || echo "$RESULT"
# Try to find IP from installed state and SSH in to trigger PXE reboot
IP=$(curl -sf "http://localhost:${HTTP_PORT}/api/machines" 2>/dev/null | \
python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('installed',{}).get('${MAC}',{}).get('ip',''))" 2>/dev/null || echo "")
ADMIN_USER="${SUDO_USER:-$USER}"
[[ "$ADMIN_USER" == "root" ]] && ADMIN_USER=""
if [[ -n "$IP" && -n "$ADMIN_USER" ]]; then
echo ""
echo "Attempting SSH reboot into PXE ($ADMIN_USER@$IP)..."
ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 "$ADMIN_USER@$IP" \
'sudo efibootmgr 2>/dev/null; PXE_ENTRY=$(sudo efibootmgr | grep -iE "pxe|network|ipv4" | head -1 | grep -oP "Boot\K[0-9A-F]+"); if [ -n "$PXE_ENTRY" ]; then sudo efibootmgr --bootnext "$PXE_ENTRY" && echo "PXE set as next boot" && sudo reboot; else echo "No PXE boot entry found, rebooting anyway..." && sudo reboot; fi' 2>&1 && {
echo ""
echo "Machine is rebooting into PXE. Install will start automatically."
} || {
echo ""
echo "SSH failed. Reboot the machine manually into PXE (e.g. via IPMI/KVM)."
}
else
echo ""
echo "No IP known for this machine. Reboot it manually into PXE."
fi
exit 0
;;
serve) ;; # continue below
*)
echo "Usage: bastion.sh [serve|install <mac> <hostname>|list]"
echo "Usage: bastion.sh [serve|install|reprovision|list]"
exit 1
;;
esac
@@ -111,6 +164,17 @@ esac
# SERVE MODE — start the bastion
# ══════════════════════════════════════════════════════════════════
# ──── Kill old instances ──────────────────────────────────────────
# Find and kill any previous bastion dnsmasq and HTTP server
OLD_DNSMASQ=$(pgrep -f 'dnsmasq --no-daemon --conf-file=/tmp/lab-bastion' 2>/dev/null || true)
OLD_HTTP=$(pgrep -f 'python3 /tmp/lab-bastion/server.py' 2>/dev/null || true)
if [[ -n "$OLD_DNSMASQ" || -n "$OLD_HTTP" ]]; then
warn "Killing old bastion processes..."
[[ -n "$OLD_DNSMASQ" ]] && kill $OLD_DNSMASQ 2>/dev/null && log " Stopped old dnsmasq (PID $OLD_DNSMASQ)"
[[ -n "$OLD_HTTP" ]] && kill $OLD_HTTP 2>/dev/null && log " Stopped old HTTP server (PID $OLD_HTTP)"
sleep 1
fi
# ──── Preflight ───────────────────────────────────────────────────
[[ $EUID -eq 0 ]] || die "Must run as root (need DHCP/TFTP ports). Use: sudo bash bastion.sh"
@@ -143,23 +207,59 @@ GATEWAY="$(ip route | awk '/default/ {print $3; exit}')"
[[ -n "$SERVER_IP" ]] || die "Cannot detect IP on interface $IFACE"
log "Interface: ${BOLD}$IFACE${NC} IP: ${BOLD}$SERVER_IP${NC} Network: ${BOLD}$NETWORK${NC}"
# ──── Auto-detect SSH pubkey ──────────────────────────────────────
SSH_PUBKEY="${SSH_PUBKEY:-}"
if [[ -z "$SSH_PUBKEY" ]]; then
# ──── Auto-detect SSH keys ───────────────────────────────────────
REAL_HOME="${HOME}"
[[ -n "${SUDO_USER:-}" ]] && REAL_HOME="$(getent passwd "$SUDO_USER" | cut -d: -f6)"
for keyfile in "$REAL_HOME/.ssh/id_ed25519.pub" "$REAL_HOME/.ssh/id_rsa.pub" "$REAL_HOME/.ssh/id_ecdsa.pub"; do
[[ -f "$keyfile" ]] && { SSH_PUBKEY="$keyfile"; break; }
done
SSH_KEYS_CONTENT=""
SSH_KEY_SOURCE=""
# Collect SSH keys from authorized_keys + local pubkeys (deduplicated)
SSH_KEY_SOURCE=""
if [[ -f "$REAL_HOME/.ssh/authorized_keys" ]]; then
SSH_KEYS_CONTENT="$(grep -v '^#' "$REAL_HOME/.ssh/authorized_keys" | grep -v '^$')"
SSH_KEY_SOURCE="$REAL_HOME/.ssh/authorized_keys"
fi
SSH_KEY_CONTENT=""
if [[ -n "$SSH_PUBKEY" && -f "$SSH_PUBKEY" ]]; then
SSH_KEY_CONTENT="$(cat "$SSH_PUBKEY")"
log "SSH key: ${BOLD}$SSH_PUBKEY${NC}"
# Also include local pubkey files (they may not be in authorized_keys)
for keyfile in "$REAL_HOME/.ssh/id_ed25519.pub" "$REAL_HOME/.ssh/id_rsa.pub" "$REAL_HOME/.ssh/id_ecdsa.pub"; do
if [[ -f "$keyfile" ]]; then
KEY_DATA="$(cat "$keyfile")"
KEY_FP="$(awk '{print $2}' "$keyfile")"
if [[ -n "$SSH_KEYS_CONTENT" ]]; then
# Add only if not already present
if ! echo "$SSH_KEYS_CONTENT" | grep -qF "$KEY_FP"; then
SSH_KEYS_CONTENT="$SSH_KEYS_CONTENT"$'\n'"$KEY_DATA"
SSH_KEY_SOURCE="${SSH_KEY_SOURCE} + $keyfile"
fi
else
warn "No SSH public key found. Set SSH_PUBKEY=/path/to/key.pub"
warn "Install mode will use root password 'changeme' as fallback."
SSH_KEYS_CONTENT="$KEY_DATA"
SSH_KEY_SOURCE="$keyfile"
fi
fi
done
# Priority 3: generate a keypair
if [[ -z "$SSH_KEYS_CONTENT" ]]; then
GENERATED_KEY="$BASTION_DIR/bastion_ed25519"
if [[ ! -f "$GENERATED_KEY" ]]; then
log "No SSH keys found — generating ed25519 keypair..."
ssh-keygen -t ed25519 -f "$GENERATED_KEY" -N "" -C "bastion-generated@$(hostname)" >/dev/null 2>&1
fi
SSH_KEYS_CONTENT="$(cat "${GENERATED_KEY}.pub")"
SSH_KEY_SOURCE="$GENERATED_KEY (generated)"
warn "Using generated keypair: ${BOLD}$GENERATED_KEY${NC}"
warn "Save this private key — it's the only way to access installed machines."
fi
SSH_KEY_COUNT="$(echo "$SSH_KEYS_CONTENT" | wc -l)"
log "SSH keys: ${BOLD}${SSH_KEY_COUNT} key(s)${NC} from ${BOLD}${SSH_KEY_SOURCE}${NC}"
# ──── Detect admin username ──────────────────────────────────────
ADMIN_USER="${SUDO_USER:-$USER}"
[[ "$ADMIN_USER" == "root" ]] && ADMIN_USER=""
if [[ -n "$ADMIN_USER" ]]; then
log "Admin user: ${BOLD}${ADMIN_USER}${NC} (will be created on installed machines)"
fi
# ──── Prepare directories ────────────────────────────────────────
@@ -264,13 +364,8 @@ FEDORA_MIRROR="https://download.fedoraproject.org/pub/fedora/linux/releases/${FE
log "Preparing boot artifacts (Fedora ${FEDORA_VERSION} ${ARCH})..."
copy_if_missing "/usr/share/ipxe/undionly.kpxe" "$TFTPDIR/undionly.kpxe" "iPXE BIOS"
# UEFI x86_64: two-stage PXE boot
# Stage 1: tiny PXE loader stub (<20KB) fits in constrained TFTP buffers
# Stage 2: full iPXE binary downloaded via UEFI PXE protocol (no size limit)
PXELOADER_SRC="$(cd "$(dirname "$0")" && pwd)/pxeloader.c"
[[ -f "$PXELOADER_SRC" ]] || PXELOADER_SRC="$(dirname "${BASH_SOURCE[0]}")/pxeloader.c"
build_pxeloader "$PXELOADER_SRC" "$TFTPDIR/ipxe.efi" "PXE loader stub (stage 1)"
copy_if_missing "/usr/share/ipxe/ipxe-snponly-x86_64.efi" "$TFTPDIR/ipxe-real.efi" "iPXE UEFI x86_64 (stage 2)"
# UEFI x86_64: serve iPXE directly via TFTP (UEFI has no TFTP size limit)
copy_if_missing "/usr/share/ipxe/ipxe-snponly-x86_64.efi" "$TFTPDIR/ipxe.efi" "iPXE UEFI x86_64"
copy_if_missing "/usr/share/ipxe/arm64-efi/snponly.efi" "$TFTPDIR/ipxe-arm64.efi" "iPXE UEFI arm64"
download "${FEDORA_MIRROR}/images/pxeboot/vmlinuz" "$HTTPDIR/vmlinuz" "Fedora kernel"
@@ -375,25 +470,29 @@ except Exception as e:
"
fi
# ── Power off — do NOT let Anaconda proceed ──
# ── Reboot — do NOT let Anaconda proceed ──
echo ""
echo "=== Discovery complete, powering off ==="
echo "=== Discovery complete, rebooting ==="
echo ""
sleep 3
echo 1 > /proc/sys/kernel/sysrq
echo o > /proc/sysrq-trigger
echo b > /proc/sysrq-trigger
sleep 5
poweroff -f
reboot -f
%end
# Anaconda should never get here, but just in case:
poweroff
reboot
DISCOVER_KS
# Patch in the bastion URL
sed -i "s|__BASTION_URL__|http://${SERVER_IP}:${HTTP_PORT}|g" "$HTTPDIR/discover.ks"
# Save SSH keys and admin user for the HTTP server to use
echo "$SSH_KEYS_CONTENT" > "$BASTION_DIR/ssh_keys"
echo "$ADMIN_USER" > "$BASTION_DIR/admin_user"
# ──── Generate iPXE boot script ───────────────────────────────────
# Initial iPXE script chains to /dispatch with the MAC, so the server
# can route to discover or install mode per machine.
@@ -431,9 +530,17 @@ SERVER_IP = sys.argv[3]
HTTP_PORT = int(sys.argv[4])
FEDORA_VER = sys.argv[5]
FEDORA_MIRROR = sys.argv[6]
SSH_KEY = sys.argv[7] if len(sys.argv) > 7 else ""
SSH_KEYS_FILE = sys.argv[7] if len(sys.argv) > 7 else ""
TIMEZONE = sys.argv[8] if len(sys.argv) > 8 else "Europe/London"
LOCALE = sys.argv[9] if len(sys.argv) > 9 else "en_GB.UTF-8"
DOMAIN = sys.argv[10] if len(sys.argv) > 10 else "ad.itaz.eu"
ADMIN_USER = sys.argv[11] if len(sys.argv) > 11 else ""
# Load SSH keys from file
SSH_KEYS = []
if SSH_KEYS_FILE and os.path.isfile(SSH_KEYS_FILE):
with open(SSH_KEYS_FILE) as f:
SSH_KEYS = [l.strip() for l in f if l.strip() and not l.startswith('#')]
# ── State management (file-backed, lock-protected) ───────────────
@@ -452,19 +559,66 @@ def save_state(state):
# ── Kickstart generation ─────────────────────────────────────────
def generate_kickstart(hostname, disk="", ssh_key=""):
disk_cmds = "clearpart --all --initlabel\nautopart --type=plain"
if disk:
disk_cmds = f"ignoredisk --only-use={disk}\nclearpart --all --initlabel --drives={disk}\nautopart --type=plain"
def generate_kickstart(hostname, disk="", ssh_keys=None, domain="", role="worker", admin_user=""):
ssh_keys = ssh_keys or []
fqdn = f"{hostname}.{domain}" if domain else hostname
vg = "labvg"
if ssh_key:
auth = f'rootpw --lock\nsshkey --username=root "{ssh_key}"'
# ── Auth ──
if ssh_keys:
auth = f'rootpw --lock\nsshkey --username=root "{ssh_keys[0]}"'
else:
auth = 'rootpw --plaintext changeme'
return f"""# Lab Bastion — Fedora {FEDORA_VER} install
# ── Admin user (kickstart directive) ──
user_directive = ""
if admin_user:
user_directive = f'user --name={admin_user} --groups=wheel --lock'
# ── SSH keys for %post (root + admin user) ──
all_keys = "\n".join(ssh_keys)
ssh_post_block = ""
if ssh_keys:
ssh_post_block = f"""
# Set up SSH keys for root
mkdir -p /root/.ssh && chmod 700 /root/.ssh
cat > /root/.ssh/authorized_keys << 'SSHKEYS'
{all_keys}
SSHKEYS
chmod 600 /root/.ssh/authorized_keys"""
if admin_user and ssh_keys:
ssh_post_block += f"""
# Set up SSH keys for {admin_user}
ADMIN_HOME=$(getent passwd {admin_user} | cut -d: -f6)
mkdir -p "$ADMIN_HOME/.ssh" && chmod 700 "$ADMIN_HOME/.ssh"
cp /root/.ssh/authorized_keys "$ADMIN_HOME/.ssh/authorized_keys"
chown -R {admin_user}:{admin_user} "$ADMIN_HOME/.ssh"
chmod 600 "$ADMIN_HOME/.ssh/authorized_keys"
# Fix SELinux contexts for SSH
restorecon -R /root/.ssh "$ADMIN_HOME/.ssh" 2>/dev/null || true
# Passwordless sudo for {admin_user}
echo '{admin_user} ALL=(ALL) NOPASSWD: ALL' > /etc/sudoers.d/{admin_user}
chmod 440 /etc/sudoers.d/{admin_user}"""
# ── Determine disk (auto-detect first NVMe/SDA if not specified) ──
disk_line = f'DISK="{disk}"' if disk else '''
DISK=""
for d in /dev/nvme0n1 /dev/sda /dev/vda; do
[ -b "$d" ] && { DISK="$(basename $d)"; break; }
done
[ -z "$DISK" ] && { echo "ERROR: no disk found"; exit 1; }
'''
# ── LVM layout sizes (MB) ──
has_longhorn = (role == "worker")
return f"""# Lab Bastion -- Fedora {FEDORA_VER} server install
# Generated: {datetime.now().isoformat()}
# Target: {hostname}
# Target: {fqdn} (role={role})
text
reboot
@@ -473,39 +627,266 @@ lang {LOCALE}
keyboard uk
timezone {TIMEZONE} --utc
network --bootproto=dhcp --activate --hostname={hostname}
network --bootproto=dhcp --activate --hostname={fqdn}
{auth}
{disk_cmds}
{user_directive}
bootloader --append="console=tty0 console=ttyS0,115200n8"
url --mirrorlist=https://mirrors.fedoraproject.org/mirrorlist?repo=fedora-$releasever&arch=$basearch
# Partitioning is generated dynamically by %pre (supports longhorn preservation)
%include /tmp/part.ks
%pre --log=/tmp/pre-partition.log
#!/bin/bash
set -x
# Progress callback helper
bastion_progress() {{
local stage="$1" detail="${{2:-}}"
local mac=$(ip link show | awk '/ether/ && !/00:00:00:00/ {{print $2; exit}}')
curl -sf -X POST "http://{SERVER_IP}:{HTTP_PORT}/api/progress" \
-H "Content-Type: application/json" \
-d "{{\\"mac\\":\\"$mac\\",\\"stage\\":\\"$stage\\",\\"detail\\":\\"$detail\\"}}" 2>/dev/null || true
}}
bastion_progress "partitioning" "preparing disk layout"
VG="{vg}"
{disk_line}
REPROVISION=no
# Check if VG exists (reprovision scenario)
if vgs $VG &>/dev/null; then
echo "=== Existing VG found - reprovision mode ==="
REPROVISION=yes
# Detect which data LVs to preserve
PRESERVE_LONGHORN=no; PRESERVE_SRV=no; PRESERVE_HOME=no
lvs $VG/longhorn &>/dev/null && PRESERVE_LONGHORN=yes
lvs $VG/srv &>/dev/null && PRESERVE_SRV=yes
lvs $VG/home &>/dev/null && PRESERVE_HOME=yes
echo "Preserving: longhorn=$PRESERVE_LONGHORN srv=$PRESERVE_SRV home=$PRESERVE_HOME"
# Remove only OS logical volumes (keep data LVs)
for lv in root var varlog swap; do
lvremove -f $VG/$lv 2>/dev/null || true
done
fi
if [ "$REPROVISION" = "yes" ]; then
# Find existing boot partitions by type
EFI_PART=$(blkid -t TYPE=vfat -o device /dev/${{DISK}}* 2>/dev/null | head -1)
BOOT_PART=$(blkid -t TYPE=ext4 -o device /dev/${{DISK}}* 2>/dev/null | head -1)
EFI_PART=${{EFI_PART:-/dev/${{DISK}}1}}
BOOT_PART=${{BOOT_PART:-/dev/${{DISK}}2}}
echo "Reusing EFI=$EFI_PART BOOT=$BOOT_PART"
# Build partition config reusing existing PV/VG
cat > /tmp/part.ks << PARTEOF
ignoredisk --only-use=$DISK
clearpart --none
part /boot/efi --onpart=$EFI_PART --fstype=efi
part /boot --onpart=$BOOT_PART --fstype=ext4
volgroup {vg} --useexisting --noformat
logvol swap --vgname={vg} --name=swap --fstype=swap --size=27648
logvol / --vgname={vg} --name=root --fstype=xfs --size=33792
logvol /var --vgname={vg} --name=var --fstype=xfs --size=102400
logvol /var/log --vgname={vg} --name=varlog --fstype=xfs --size=10240
PARTEOF
# Preserve or recreate data LVs
if [ "$PRESERVE_HOME" = "yes" ]; then
echo "logvol /home --vgname={vg} --name=home --useexisting --noformat" >> /tmp/part.ks
else
echo "logvol /home --vgname={vg} --name=home --fstype=xfs --size=10240" >> /tmp/part.ks
fi
if [ "$PRESERVE_SRV" = "yes" ]; then
echo "logvol /srv --vgname={vg} --name=srv --useexisting --noformat" >> /tmp/part.ks
else
echo "logvol /srv --vgname={vg} --name=srv --fstype=xfs --size=20480" >> /tmp/part.ks
fi
if [ "$PRESERVE_LONGHORN" = "yes" ]; then
echo "logvol /var/lib/longhorn --vgname={vg} --name=longhorn --useexisting --noformat" >> /tmp/part.ks
fi
else
# Fresh install
cat > /tmp/part.ks << PARTEOF
ignoredisk --only-use=$DISK
clearpart --all --initlabel --drives=$DISK
part /boot/efi --fstype=efi --size=600 --ondisk=$DISK
part /boot --fstype=ext4 --size=3072 --ondisk=$DISK
part pv.01 --size=1 --grow --ondisk=$DISK
volgroup {vg} pv.01
logvol swap --vgname={vg} --name=swap --fstype=swap --size=27648
logvol / --vgname={vg} --name=root --fstype=xfs --size=33792
logvol /var --vgname={vg} --name=var --fstype=xfs --size=102400
logvol /var/log --vgname={vg} --name=varlog --fstype=xfs --size=10240
logvol /home --vgname={vg} --name=home --fstype=xfs --size=10240
logvol /srv --vgname={vg} --name=srv --fstype=xfs --size=20480
{"logvol /var/lib/longhorn --vgname=" + vg + " --name=longhorn --fstype=xfs --grow --size=1" if has_longhorn else ""}
PARTEOF
fi
echo "=== Generated partition config ==="
cat /tmp/part.ks
echo "==================================="
bastion_progress "partitioning" "layout ready, starting install"
%end
%packages
@core
@server-product
openssh-server
vim-enhanced
tmux
git
curl
wget
python3
lshw
dmidecode
dnf-plugins-core
# Networking and diagnostics
NetworkManager
bind-utils
net-tools
iproute
iputils
traceroute
tcpdump
htop
iotop
strace
jq
# k3s prerequisites
container-selinux
iptables-nft
nftables
policycoreutils-python-utils
chrony
tar
socat
conntrack-tools
ethtool
# Boot management
efibootmgr
# Puppet prerequisites
ruby
ruby-libs
# Exclude desktop
-@workstation-product
-@gnome-desktop
-gnome-shell
-gdm
-PackageKit
-PackageKit-glib
%end
%post --log=/root/bastion-post-install.log
#!/bin/bash
set -x
# Progress callback helper
bastion_progress() {{
local stage="$1" detail="${{2:-}}"
local mac=$(ip link show | awk '/ether/ && !/00:00:00:00/ {{print $2; exit}}')
curl -sf -X POST "http://{SERVER_IP}:{HTTP_PORT}/api/progress" \
-H "Content-Type: application/json" \
-d "{{\\"mac\\":\\"$mac\\",\\"stage\\":\\"$stage\\",\\"detail\\":\\"$detail\\"}}" 2>/dev/null || true
}}
bastion_progress "post-install" "configuring system"
# ── SSH ──
systemctl enable --now sshd
sed -i 's/^#\\?PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config
sed -i 's/^#\\?PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
hostnamectl set-hostname {hostname}
echo "Provisioned by lab-bastion on $(date -Iseconds)" > /etc/lab-provisioned
echo "# Lab node — puppet enrollment pending" > /root/README
{ssh_post_block}
# ── Hostname and domain ──
hostnamectl set-hostname {fqdn}
# ── tmpfs for /tmp ──
echo "tmpfs /tmp tmpfs defaults,noatime,nosuid,nodev,size=4G 0 0" >> /etc/fstab
# ── Kernel modules for k3s ──
cat > /etc/modules-load.d/k3s.conf << 'MODULES'
br_netfilter
overlay
ip_conntrack
MODULES
modprobe br_netfilter || true
modprobe overlay || true
# ── Sysctl for k3s networking ──
cat > /etc/sysctl.d/90-k3s.conf << 'SYSCTL'
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward = 1
net.ipv6.conf.all.forwarding = 1
fs.inotify.max_user_instances = 524288
fs.inotify.max_user_watches = 1048576
SYSCTL
sysctl --system || true
# ── Disable firewalld (k3s manages its own iptables rules) ──
systemctl disable --now firewalld || true
# ── Enable chronyd for time sync ──
systemctl enable --now chronyd
# ── Set boot order: local disk first, PXE after ──
if command -v efibootmgr >/dev/null 2>&1; then
# Find the Fedora boot entry and move it first
FEDORA_ENTRY=$(efibootmgr | grep -i fedora | head -1 | grep -oP 'Boot\\K[0-9A-F]+')
if [ -n "$FEDORA_ENTRY" ]; then
CURRENT_ORDER=$(efibootmgr | grep BootOrder | cut -d: -f2 | tr -d ' ')
# Put Fedora first, keep rest
NEW_ORDER="$FEDORA_ENTRY,$(echo "$CURRENT_ORDER" | sed "s/$FEDORA_ENTRY,\\?//;s/,$//")"
efibootmgr -o "$NEW_ORDER" || true
echo "Boot order set: Fedora first ($NEW_ORDER)"
fi
fi
# ── Provisioning metadata ──
cat > /etc/lab-provisioned << PROVEOF
hostname: {fqdn}
role: {role}
provisioned: $(date -Iseconds)
bastion: {SERVER_IP}
PROVEOF
cat > /root/README << 'README'
# Lab Node -- {fqdn} (role: {role})
#
# Next steps:
# 1. Install puppet agent:
# dnf install -y puppet-agent
#
# 2. Install k3s:
# curl -sfL https://get.k3s.io | sh -
#
# 3. Or join existing cluster:
# curl -sfL https://get.k3s.io | K3S_URL=https://<server>:6443 K3S_TOKEN=<token> sh -
README
IP_ADDR=$(ip -4 addr show | awk '/inet / && !/127.0.0/ {{split($2,a,"/"); print a[1]; exit}}')
bastion_progress "complete" "ready at $IP_ADDR"
%end
"""
@@ -562,6 +943,25 @@ def print_install_started(mac, hostname):
print(f" Serving Fedora {FEDORA_VER} installer + kickstart...")
print(f"\n{'─' * 60}\n", flush=True)
PROGRESS_ICONS = {
"partitioning": "◆",
"installing": "◆◆",
"post-install": "◆◆◆",
"complete": "✔",
"error": "✘",
}
def print_progress(mac, stage, detail=""):
icon = PROGRESS_ICONS.get(stage, "·")
color = GREEN if stage == "complete" else (RED if stage == "error" else YELLOW)
detail_str = f" -- {detail}" if detail else ""
print(f" {color}{icon}{RESET} {mac} {BOLD}{stage}{RESET}{detail_str}", flush=True)
if stage == "complete" and detail:
ip = detail.replace("ready at ", "").strip()
if ip:
admin = ADMIN_USER or "root"
print(f"\n {GREEN}{BOLD} ssh {admin}@{ip}{RESET}\n", flush=True)
# ── HTTP Handler ──────────────────────────────────────────────────
class BastionHandler(SimpleHTTPRequestHandler):
@@ -603,7 +1003,7 @@ class BastionHandler(SimpleHTTPRequestHandler):
echo
echo =============================================
echo Lab PXE Bastion INSTALLING Fedora {FEDORA_VER}
echo Lab PXE Bastion - INSTALLING Fedora {FEDORA_VER}
echo Target: {hostname}
echo MAC: {mac}
echo =============================================
@@ -614,13 +1014,31 @@ initrd http://{SERVER_IP}:{HTTP_PORT}/initrd.img
boot
"""
self.send_text(200, script)
elif mac in state.get("installed", {}):
info = state["installed"][mac]
hostname = info.get("hostname", "?")
print(f" {GREEN}PXE request from {mac} ({hostname}) - already installed, booting local disk{RESET}", flush=True)
script = f"""#!ipxe
echo
echo =============================================
echo Lab PXE Bastion - {hostname}
echo Already installed, booting from local disk
echo =============================================
echo
sleep 3
exit
"""
self.send_text(200, script)
else:
print(f" {YELLOW}PXE request from {mac} → discovery mode{RESET}", flush=True)
script = f"""#!ipxe
echo
echo =============================================
echo Lab PXE Bastion DISCOVERY MODE
echo Lab PXE Bastion - DISCOVERY MODE
echo MAC: {mac}
echo Collecting hardware info...
echo =============================================
@@ -642,7 +1060,10 @@ boot
ks = generate_kickstart(
hostname=cfg.get("hostname", "lab-node"),
disk=cfg.get("disk", ""),
ssh_key=SSH_KEY,
ssh_keys=SSH_KEYS,
domain=DOMAIN,
role=cfg.get("role", "worker"),
admin_user=ADMIN_USER,
)
self.send_text(200, ks)
return
@@ -710,15 +1131,21 @@ boot
mac = data.get("mac", "").lower().replace("-", ":")
hostname = data.get("hostname", "lab-node")
disk = data.get("disk", "")
role = data.get("role", "worker")
if not mac:
self.send_json(400, {"error": "mac is required"})
return
if role not in ("worker", "infra"):
self.send_json(400, {"error": "role must be 'worker' or 'infra'"})
return
state = load_state()
state.setdefault("install_queue", {})[mac] = {
"hostname": hostname,
"disk": disk,
"role": role,
"queued_at": datetime.now().isoformat(),
}
save_state(state)
@@ -729,10 +1156,49 @@ boot
"status": "queued",
"mac": mac,
"hostname": hostname,
"message": "PXE boot the machine to start installation",
"role": role,
"message": f"PXE boot the machine to start installation (role={role})",
})
return
# ── Install progress callback from kickstart ──
if parsed.path == "/api/progress":
try:
data = json.loads(body)
except json.JSONDecodeError:
self.send_json(400, {"error": "invalid JSON"})
return
mac = data.get("mac", "unknown").lower()
stage = data.get("stage", "unknown")
detail = data.get("detail", "")
print_progress(mac, stage, detail)
# Update state with progress
state = load_state()
if mac in state.get("install_queue", {}):
state["install_queue"][mac]["progress"] = stage
state["install_queue"][mac]["progress_at"] = datetime.now().isoformat()
if detail:
state["install_queue"][mac]["progress_detail"] = detail
# Move to installed on completion
if stage == "complete":
cfg = state["install_queue"].pop(mac)
ip = detail.replace("ready at ", "").strip() if detail else ""
state.setdefault("installed", {})[mac] = {
"hostname": cfg.get("hostname", "?"),
"role": cfg.get("role", "?"),
"ip": ip,
"installed_at": datetime.now().isoformat(),
}
save_state(state)
self.send_json(200, {"status": "ok"})
return
self.send_json(404, {"error": "not found"})
@@ -850,9 +1316,11 @@ python3 "$BASTION_DIR/server.py" \
"$HTTP_PORT" \
"$FEDORA_VERSION" \
"$FEDORA_MIRROR" \
"$SSH_KEY_CONTENT" \
"$BASTION_DIR/ssh_keys" \
"$TIMEZONE" \
"$LOCALE" &
"$LOCALE" \
"$DOMAIN" \
"$ADMIN_USER" &
HTTP_PID=$!
sleep 1
@@ -871,6 +1339,7 @@ echo -e " Network: ${BOLD}${NETWORK}/24${NC} via ${BOLD}${IFACE}${NC}"
echo -e " DHCP: ${BOLD}${DHCP_MODE}${NC}$(if [[ "$DHCP_MODE" == "full" ]]; then echo " (${DHCP_RANGE_START}${DHCP_RANGE_END})"; else echo " (alongside existing DHCP)"; fi)"
echo -e " HTTP: ${BOLD}http://${SERVER_IP}:${HTTP_PORT}/${NC}"
echo -e " OS: ${BOLD}Fedora ${FEDORA_VERSION} (${ARCH})${NC}"
echo -e " Domain: ${BOLD}${DOMAIN}${NC}"
echo -e " State: ${BOLD}${STATEFILE}${NC}"
echo ""
echo -e " ${YELLOW}PXE boot any machine on this network.${NC}"

8
bastion/.dockerignore Normal file
View File

@@ -0,0 +1,8 @@
node_modules
dist
.git
*.log
.env
.env.*
*.tsbuildinfo
.taskmaster

3
bastion/.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
node_modules/
dist/
*.tsbuildinfo

View File

@@ -0,0 +1,132 @@
# PRD: Refactor K3s Module from Bash Heredocs to Pulumi TypeScript
## Problem
The k3s install/configure/health module currently generates ~300 lines of bash heredoc strings embedded in TypeScript files (`install.ts`, `configure.ts`, `health.ts`). These are unmaintainable, untestable, and impossible to compose. This is the same bash-in-code problem that drove the bastion TypeScript rewrite.
## Vision
The lab platform uses Pulumi as its IaC engine:
- **Central execution**: labd runs Pulumi programs in labcontroller k8s for cloud/remote resources with RBAC, global state, and audit trail (PulumiRun table already exists in CockroachDB)
- **Local execution**: lab-agents run Pulumi programs directly on bare-metal nodes
- **Multi-environment**: supports multiple datacenters, clouds (baremetal, AWS, GCP), production/dev/ephemeral environments
## Current State
### Files to replace
- `src/modules/modules/k3s/src/install.ts` — 275 lines, generates bash for 10 install phases
- `src/modules/modules/k3s/src/configure.ts` — 118 lines, generates bash for 5 configure phases
- `src/modules/modules/k3s/src/health.ts` — 57 lines, generates bash for 6 health checks
### Existing infrastructure
- `sshExec(ip, user, command, opts)` and `sshExecStreaming()` — SSH execution primitives in `src/modules/src/ssh.ts`
- Module system: `ModuleRunner`, `ModuleRegistry`, `Module` interface with install/configure/health phases
- `@lab/shared` types: `BastionConfig`, `K3sInstallContext`, roles, OS types
- PulumiRun model in Prisma schema (labd) — tracks Pulumi execution state
- labcontroller module generates k8s manifests (cockroachdb.ts, labd.ts, bastion.ts) — these also need Pulumi migration eventually
### 32 distinct operations currently in bash
**Install phase (10 steps):**
1. Load kernel modules (br_netfilter, overlay, ip_conntrack)
2. Apply CIS sysctl hardening (9 params)
3. Disable swap
4. Disable firewall (firewalld/ufw — mask to survive reboot)
5. Set SELinux permissive
6. Write k3s server config (flannel=none, secrets-encryption, audit, CIS hardened)
7. Write audit policy YAML
8. Clean up stale CNI (flannel.1 vxlan, cilium interfaces, port 8472 conflicts)
9. Install k3s binary (curl | sh)
10. Install Cilium CNI (detect arch, detect interface, kubeProxyReplacement)
**Configure phase (5 steps):**
1. Fix CoreDNS upstream DNS (systemd-resolved 127.0.0.53 unreachable from pod netns)
2. Configure log rotation
3. Check certificate expiry
4. Apply default network policies (deny-ingress, allow-dns-egress)
5. Apply Pod Security Standards (restricted)
**Health checks (6 checks):**
1. k3s service active
2. Node Ready condition
3. API server /healthz
4. Secrets encryption enabled
5. Cilium status
6. kube-system pod status
## Requirements
### Architecture decisions needed (discuss with user via task-master)
1. **Pulumi structure**: micro-stacks vs monorepo-by-env vs component-library vs GitOps operator
2. **Multi-cloud support**: how stacks are organized across baremetal/AWS/GCP
3. **Environment model**: how prod/dev/ephemeral environments are represented
4. **State backend**: Pulumi Cloud vs self-hosted (S3/CockroachDB)
5. **Execution model**: who runs `pulumi up` — labd central, lab-agent local, or both?
### Operation design
- Each operation is a typed TypeScript async function using `sshExec()`
- Standard interface: `OperationContext` in, `OperationResult` out
- **Idempotent**: check before act, report `changed: boolean`
- **Composable**: operations grouped into logical units (host-prep, networking, hardening)
- **Testable**: mock sshExec for unit tests
- **Future Pulumi-ready**: each function maps 1:1 to a `remote.Command` resource
### Groups (logical composition)
- `host-prep`: kernel-modules + sysctl + swap + firewall + selinux
- `k3s-server`: k3s-config + audit-policy + cni-cleanup + k3s-install
- `k3s-agent`: k3s-config (agent) + k3s-install (agent mode)
- `networking`: cilium + dns-fix + network-policy
- `hardening`: pod-security + cert-check + log-rotation
### Pulumi integration (when added)
- Add `@pulumi/pulumi` and `@pulumi/command` as dependencies
- Each operation becomes a `command.remote.Command` resource
- Groups become `pulumi.ComponentResource` classes
- K3sCluster becomes a top-level ComponentResource that composes groups
- Stacks per environment: `lab-baremetal`, `aws-prod`, `dev`, `ephemeral-pr-123`
## File structure
```
src/modules/modules/k3s/src/
├── types.ts # K3sConfig, OperationContext, OperationResult
├── utils.ts # sshOpts(), runSequential(), file helpers
├── operations/ # ~15 atomic operations
│ ├── kernel-modules.ts
│ ├── sysctl.ts
│ ├── swap.ts
│ ├── firewall.ts
│ ├── selinux.ts
│ ├── k3s-config.ts
│ ├── audit-policy.ts
│ ├── cni-cleanup.ts
│ ├── k3s-install.ts
│ ├── cilium.ts
│ ├── dns-fix.ts
│ ├── log-rotation.ts
│ ├── network-policy.ts
│ ├── pod-security.ts
│ └── cert-check.ts
├── groups/ # Logical groupings
│ ├── host-prep.ts
│ ├── k3s-server.ts
│ ├── k3s-agent.ts
│ ├── networking.ts
│ └── hardening.ts
├── health/ # Health checks
│ ├── k3s-service.ts
│ ├── node-ready.ts
│ ├── api-health.ts
│ ├── secrets-encryption.ts
│ ├── cilium-status.ts
│ └── pod-status.ts
├── k3s-module.ts # Module implementation
└── index.ts # Public exports
```
## Success criteria
- Zero bash heredoc strings in the k3s module
- Every operation independently testable with mocked sshExec
- `labctl app k3s install <target>` works end-to-end
- `labctl app k3s health` works end-to-end
- Existing test suite passes (updated for new API)
- Clear path to wrapping operations as Pulumi resources

View File

@@ -0,0 +1,172 @@
# PRD: Resource Tracking & kubectl-style CLI
## Problem
The lab platform currently has fragmented state management:
- Bastion keeps machine state in an ephemeral JSON file (`/tmp/lab-bastion/state.json`) that is lost on pod restart
- labd receives state syncs from bastions but only stores them in memory — the `Server` table in CockroachDB is never written to
- There is no system to track relationships between resources (servers belong to clusters, clusters run on servers, networks connect servers)
- The CLI (`labctl`) uses an inconsistent verb-noun structure (`labctl provision list`, `labctl app k3s install`) instead of a uniform resource-oriented pattern
- RBAC permissions reference resources (server, cloud, environment) but there is no resource registry to validate against
## Vision
A unified resource tracking system where all infrastructure objects (servers, clusters, networks, bastions, VMs) are persisted in CockroachDB via labd, with relationships between them, and managed through a kubectl-style CLI. This replaces the ephemeral JSON state and becomes the single source of truth for the platform.
## Current State
### Database (CockroachDB via Prisma)
Existing models that are scaffolded but mostly unused:
- `Server` — hostname, mac, cloud, environment, role, labels, ip, status (0 rows)
- `Agent` — mTLS certificate enrollment per server (0 rows)
- `Bastion` — PXE server registration (1 row, labmaster)
- `Cluster` — k8s cluster metadata (0 rows)
- `User`, `Role`, `Permission`, `UserRole` — RBAC framework (seeded with 3 roles, 6 permissions)
- `JoinToken` — agent/bastion enrollment tokens
- `AuditLog` — action audit trail
### Bastion State (ephemeral JSON)
Three categories tracked per-bastion:
- `discovered` — machines found via PXE with hardware info (CPU, RAM, disks, NICs, arch)
- `install_queue` — machines queued for OS install with progress tracking
- `installed` — machines with OS installed (hostname, role, IP, OS)
### CLI Structure (current)
```
labctl init bastion standalone [start|stop|status]
labctl provision [list|install|reprovision|forget|logs]
labctl app [k3s|labcontroller]
labctl config [list|get|set]
labctl roles
labctl doctor
labctl login
labctl logs
```
## Requirements
### 1. Persist Bastion State to Database
When labd receives `bastion-state-sync` messages, it must upsert machines into the `Server` table:
- Discovered machines → create/update Server with status "discovered", store HardwareInfo as JSON labels
- Queued machines → update Server status to "provisioning"
- Installed machines → update Server with hostname, IP, role, OS, status "installed"
- Track which bastion owns which server (add `bastionId` to Server model)
- Track hardware info: arch, cpu_model, cpu_cores, memory_gb, disks, nics
The bastion's local JSON state becomes a cache; labd's database is the source of truth. On bastion startup, it should load its state from labd if available.
### 2. Resource Model Expansion
Add new models to the Prisma schema for tracking infrastructure:
**Network** — L2/L3 network segments
- name, cidr, vlan, gateway, domain, dhcpEnabled
- Servers have NICs on networks
**ServerNic** — NIC-to-network mapping
- serverId, networkId, mac, ip, name, state (UP/DOWN)
- Derived from HardwareInfo during discovery
**ServerDisk** — Disk inventory per server
- serverId, name, sizeGb, model
- Derived from HardwareInfo during discovery
**ClusterMember** — Server-to-cluster membership
- clusterId, serverId, role (control-plane, worker)
### 3. kubectl-style CLI Redesign
Restructure labctl to follow the `mcpctl` / `kubectl` pattern:
```
# Core CRUD verbs that work on any resource
labctl get <resource> [name] # List or get specific resource
labctl describe <resource> <name> # Detailed view with relationships
labctl create <resource> [flags] # Create a resource
labctl delete <resource> <name> # Delete a resource
labctl edit <resource> <name> # Edit in $EDITOR
labctl apply -f <file> # Declarative apply from YAML
# Resource types (with aliases)
servers (server, srv)
clusters (cluster)
networks (network, net)
bastions (bastion)
roles (role)
users (user)
tokens (token)
audit (audit)
# Output formats
-o table (default), -o json, -o yaml, -o wide
# Examples
labctl get servers # List all servers
labctl get servers -o wide # With extra columns (disks, NICs)
labctl get server labmaster # Get specific server
labctl describe server labmaster # Full details + relationships
labctl get servers --role worker # Filter by role
labctl get servers --status discovered # Filter by status
labctl get clusters # List clusters
labctl describe cluster lab-k3s # Cluster members, health
labctl get networks # List networks
labctl create network --name lab --cidr 192.168.8.0/24 --gateway 192.168.8.1
# Provisioning becomes actions on server resources
labctl provision <server> --os fedora-43 --role worker # Queue install
labctl reprovision <server> # Reinstall
labctl forget <server> # Remove from tracking
# App management stays as-is but simplified
labctl app install k3s <server>
labctl app health k3s [server]
# Admin
labctl bastion start [--foreground] # Start local bastion
labctl bastion status # Bastion health
labctl login # Auth
labctl doctor # Diagnostics
```
### 4. Resource Aliases & Resolution
Follow mcpctl's pattern from `shared.ts`:
- Accept singular, plural, and short aliases: `server`, `servers`, `srv` all resolve to the same resource
- Accept name or ID: `labctl get server labmaster` or `labctl get server <uuid>`
- Accept MAC address for servers: `labctl get server 38:05:25:33:e2:e4`
### 5. RBAC Integration
The existing Permission model uses `action:cloud:environment:server` patterns. Wire this into the resource system:
- CLI commands check permissions before executing
- `labctl get` respects read permissions (only show resources the user can see)
- `labctl provision` requires `apply` permission on the target server
- `labctl delete` requires `destroy` permission
- Audit all resource operations to the AuditLog table
### 6. Bastion State Directory Fix
Fix the bug where the CLI's `--dir` default (`/tmp/lab-bastion`) overrides the `BASTION_DIR=/data` environment variable. The CLI option should use the env var as its default:
```typescript
.option("--dir <dir>", "Bastion data directory", process.env["BASTION_DIR"] ?? "/tmp/lab-bastion")
```
## Technical Constraints
- Database: CockroachDB with Prisma ORM (already deployed)
- API: Fastify + WebSocket (labd)
- CLI: Commander.js (labctl)
- Auth: mTLS certificates (planned), join tokens (implemented)
- Monorepo: pnpm workspace with @lab/shared, @lab/bastion, @lab/cli, @lab/labd
- The bastion-to-labd WebSocket protocol is defined in @lab/shared/protocol
## Success Criteria
1. `labctl get servers` shows all machines (discovered, provisioning, installed) from the database
2. Server state survives bastion and labd pod restarts
3. `labctl describe server <name>` shows hardware info, network, cluster membership
4. Resources have tracked relationships (server→cluster, server→network, bastion→server)
5. RBAC permissions are enforced on CLI operations
6. All resource mutations are audit-logged
7. CLI follows consistent kubectl-style `verb resource [name] [flags]` pattern

View File

@@ -0,0 +1,355 @@
# Lab Platform — Design Document
## Vision
A unified infrastructure management platform that replaces Puppet with a modern, Pulumi-based system. Manages bare-metal servers, cloud VMs, and k3s clusters through a single CLI and API.
## Architecture Overview
```
┌─────────────────────────────────────────────────────────────────┐
│ Developer Workstation (thebeast) │
│ │
│ lab CLI │
│ ├── lab init bastion standalone start (PXE provisioning) │
│ ├── lab provision install/reprovision (bare-metal) │
│ ├── lab get servers --env production (query) │
│ ├── lab exec <server> -- <command> (remote execution) │
│ ├── lab logs <server> (log streaming) │
│ ├── lab apply -f infra.ts (pulumi via labd) │
│ └── lab get roles/users/permissions (RBAC management) │
│ │
│ Connects to: labd via mTLS │
└─────────────────────┬───────────────────────────────────────────┘
│ mTLS (client cert)
┌─────────────────────────────────────────────────────────────────┐
│ labmaster.ad.itaz.eu (infra node, k3s single-node) │
│ │
│ ┌──────────────────────────────────────────────────────┐ │
│ │ labd (master daemon) │ │
│ │ ├── Certificate Authority (issues agent certs) │ │
│ │ ├── RBAC Engine (roles, permissions, ACLs) │ │
│ │ ├── Agent Registry (connected agents, heartbeats) │ │
│ │ ├── Pulumi Executor (runs IaC on behalf of users) │ │
│ │ ├── Log Aggregator (receives agent logs) │ │
│ │ ├── Module Registry (configuration modules) │ │
│ │ └── REST API + WebSocket (agent connections) │ │
│ └──────────────────────────────────────────────────────┘ │
│ │
│ ┌──────────────────────────────────────────────────────┐ │
│ │ bastion (PXE provisioning) │ │
│ │ Running as k3s pod with hostNetwork │ │
│ └──────────────────────────────────────────────────────┘ │
└──────────┬──────────────────────────────────────────────────────┘
│ mTLS (agent certs)
┌──────────────────────┐ ┌──────────────────────┐ ┌────────────┐
│ ser9.ad.itaz.eu │ │ worker-2.ad.itaz.eu │ │ AWS EC2 │
│ (bare-metal worker) │ │ (bare-metal worker) │ │ instances │
│ │ │ │ │ │
│ lab-agent │ │ lab-agent │ │ lab-agent │
│ ├── heartbeat │ │ ├── heartbeat │ │ ├── ... │
│ ├── log shipping │ │ ├── log shipping │ │ └── ... │
│ ├── exec handler │ │ ├── exec handler │ │ │
│ └── module runner │ │ └── module runner │ │ │
└──────────────────────┘ └──────────────────────┘ └────────────┘
```
## Components
### 1. labd (Master Daemon)
The central control plane. Runs on labmaster.ad.itaz.eu as a k3s pod.
**Responsibilities:**
- Certificate Authority — signs agent certificates, manages trust chain
- Agent Registry — tracks connected agents, heartbeats, status
- RBAC — roles, permissions, ACLs per user/group/environment/cloud
- Pulumi Executor — runs Pulumi TypeScript code submitted by users
- Log Aggregator — receives and stores logs from agents
- Module Registry — stores and distributes configuration modules
- REST API — for CLI and external integrations
- WebSocket — persistent agent connections for real-time commands
**Tech:** Fastify, PostgreSQL (via Prisma, reuse mcpctl patterns), WebSocket
### 2. lab-agent
Lightweight daemon running on every managed machine.
**Responsibilities:**
- Connect to labd via mTLS (agent certificate)
- Send heartbeats (status, load, disk, memory)
- Ship logs (journald → labd)
- Execute commands on demand (like `kubectl exec`)
- Run configuration modules (like `puppet agent -tv`)
- Report module run results
**Tech:** Standalone TypeScript binary (bun compiled), systemd service
### 3. lab CLI (extended)
Extends the existing `lab` CLI with platform management commands.
**New commands:**
```
# Server management
lab get servers # List all servers
lab get servers --env production # Filter by environment
lab get servers --cloud baremetal # Filter by cloud
lab get servers --label role=k3s-worker # Filter by label
lab describe server <name> # Detailed server info
lab exec <server> -- <command> # Remote command execution
lab logs <server> [-f] # Stream server logs
# Infrastructure as Code
lab apply -f <file.ts> # Execute Pulumi code via labd
lab plan -f <file.ts> # Dry-run Pulumi code
lab destroy -f <file.ts> # Tear down resources
# RBAC
lab get roles # List roles
lab get users # List users
lab create role <name> # Create role
lab bind role <role> --user <user> # Bind role to user
lab get permissions # List permissions
# Environment/Cloud management
lab get environments # List environments
lab get clouds # List clouds
lab create environment <name> --cloud <cloud>
# Module management
lab get modules # List available modules
lab apply module <name> --target <server> # Apply module to server
```
### 4. Certificate Authority
Built into labd. Issues and manages certificates for agents and users.
**Flow:**
```
1. Agent starts with a join token (one-time or reusable)
2. Agent generates CSR, sends to labd with token
3. labd validates token, signs certificate
4. Agent receives signed cert + CA cert
5. All future communication uses mTLS
For CLI users:
1. User runs `lab login` or `lab init`
2. labd issues a client certificate (or uses existing SSH keys)
3. CLI uses client cert for all API calls
```
**Token types:**
- **One-time token** — for individual bare-metal servers (generated during PXE provision)
- **Reusable token** — for autoscaling groups (AWS ASG instances use the same token)
### 5. RBAC Model
Reuse mcpctl's RBAC patterns. Hierarchical permissions:
```
Cloud → Environment → Server → Action
Examples:
- baremetal:lab:*:exec — can exec on any lab server
- baremetal:lab:puppet:* — full access to puppet server
- aws:production:*:read — read-only on all AWS prod servers
- *:*:*:* — superadmin
```
**Resources:**
- servers, environments, clouds, modules, roles, users, pulumi-stacks
**Actions:**
- read, exec, apply, destroy, manage, admin
**Whitelist/Blacklist:**
- Roles can have `allow` and `deny` rules
- Deny takes precedence (like AWS IAM)
### 6. Module System
Configuration modules define the desired state of a server.
**Module structure:**
```
modules/
k3s-server/
module.yaml # Metadata: name, version, targets, deps
src/
index.ts # Module entry point
install.ts # Installation logic
configure.ts # Configuration logic
health.ts # Health check
tests/
install.test.ts
k3s-agent/
module.yaml
src/
index.ts
labd/
module.yaml
src/
index.ts # Deploy labd to k3s
```
**module.yaml:**
```yaml
name: k3s-server
version: 0.1.0
description: Install and configure k3s server
targets:
roles: [infra]
labels:
k3s: server
dependencies:
- base-server
```
**Module sources:**
- Built-in modules (in this repo, e.g., k3s-server, labd)
- External modules (separate git repos, pulled by URL)
- Module registry (future — like Puppet Forge)
### 7. Cloud/Environment Model
```
Cloud: baremetal
└── Environment: lab
├── Server: puppet.ad.itaz.eu (role=infra, labels={k3s=server})
├── Server: ser9.ad.itaz.eu (role=worker, labels={k3s=agent})
└── ...
Cloud: aws
└── Environment: production
├── Server: i-abc123 (from ASG web-servers)
├── Server: i-def456 (from ASG web-servers)
└── ...
└── Environment: staging
└── ...
```
Each bastion creates an environment under the `baremetal` cloud. AWS autoscaling groups create environments under the `aws` cloud.
### 8. Pulumi Integration
Users submit Pulumi TypeScript code to labd for execution.
```bash
# Apply infrastructure code
lab apply -f infra/k3s-cluster.ts --env lab
# The file is sent to labd, which:
# 1. Checks RBAC (does user have apply permission for this env?)
# 2. Creates a Pulumi stack
# 3. Executes `pulumi up` in a sandboxed environment
# 4. Streams output back to CLI
# 5. Stores state in Pulumi backend (local or S3)
```
**Future AWS extension:**
```typescript
// infra/aws-web-servers.ts
import * as aws from "@pulumi/aws";
const asg = new aws.autoscaling.Group("web-servers", {
maxSize: 10,
minSize: 2,
launchTemplate: { /* ... */ },
// User data installs lab-agent with reusable join token
});
```
## Project Structure
```
lab/
bastion/ # Existing — PXE provisioning
src/
shared/ # @lab/shared — types, constants, RBAC
labd/ # @lab/labd — master daemon
src/
main.ts
server.ts
ca/ # Certificate Authority
rbac/ # RBAC engine (reuse mcpctl patterns)
agents/ # Agent registry + WebSocket
pulumi/ # Pulumi executor
logs/ # Log aggregation
modules/ # Module registry
routes/ # REST API
agent/ # @lab/agent — agent daemon
src/
main.ts
connection.ts # mTLS WebSocket to labd
heartbeat.ts
executor.ts # Command execution
logs.ts # Log shipping
modules.ts # Module runner
cli/ # @lab/cli — extends existing CLI
src/
commands/
init/bastion/ # Existing bastion commands
provision/ # Existing provision commands
get/ # New: get servers/roles/users/etc
exec/ # New: remote execution
logs/ # New: log streaming
apply/ # New: pulumi apply
rbac/ # New: role management
modules/ # Built-in modules
k3s-server/ # Deploy k3s server
k3s-agent/ # Deploy k3s agent
labd/ # Deploy labd to k3s
lab-agent/ # Deploy lab-agent to servers
deploy/
k3s/ # Existing k3s manifests for bastion
labd/ # k3s manifests for labd
```
## Implementation Phases
### Phase 1: Foundation (current + next)
- [x] Bastion (PXE provisioning) — DONE
- [x] CLI structure (`lab init/provision`) — DONE
- [ ] Rename puppet to labmaster, reprovision
- [ ] Deploy k3s on labmaster
- [ ] Build labd skeleton (Fastify + Prisma)
- [ ] Certificate Authority (issue/sign certs)
- [ ] Agent skeleton (connect, heartbeat)
### Phase 2: Core Platform
- [ ] RBAC engine (roles, permissions, ACLs)
- [ ] `lab get servers` with environment/cloud/label filters
- [ ] `lab exec` remote command execution
- [ ] `lab logs` streaming
- [ ] Agent auto-enrollment via PXE provision (join token in kickstart)
### Phase 3: Infrastructure as Code
- [ ] Module system (define, apply, health check)
- [ ] k3s-server module (deploy k3s)
- [ ] labd module (deploy labd to k3s)
- [ ] Pulumi executor in labd
- [ ] `lab apply -f` command
### Phase 4: Multi-Cloud
- [ ] AWS provider (Pulumi-based)
- [ ] Reusable join tokens for autoscaling groups
- [ ] Cloud/environment model
- [ ] Auto-discovery of cloud instances
## Key Design Decisions
1. **Pulumi over Puppet** — TypeScript-native, same language for IaC and platform code
2. **mTLS over SSH** — proper PKI, scalable, no key management per-server
3. **Agents connect to master** (not master pushing to agents) — works through NATs, firewalls
4. **RBAC from day one** — security-first, deny by default
5. **Module system inspired by Puppet** — declarative, testable, versionable
6. **Multi-cloud extensible** — cloud is just a label, provider is pluggable
7. **Reuse mcpctl patterns** — Prisma DB, Fastify routes, CLI structure, RBAC model

View File

@@ -0,0 +1,93 @@
# Dockerfile.bastion -- PXE boot server (dnsmasq DHCP/TFTP + HTTP)
# Requires host networking and NET_ADMIN/NET_RAW capabilities.
# ── Stage 1: Build ───────────────────────────────────────────────
FROM node:22-alpine AS builder
RUN corepack enable && corepack prepare pnpm@9.15.0 --activate
WORKDIR /app
# Copy workspace config and package manifests first (layer cache)
COPY pnpm-workspace.yaml pnpm-lock.yaml package.json tsconfig.base.json tsconfig.json ./
COPY src/shared/package.json src/shared/tsconfig.json src/shared/
COPY src/bastion/package.json src/bastion/tsconfig.json src/bastion/
COPY src/cli/package.json src/cli/tsconfig.json src/cli/
COPY src/modules/package.json src/modules/tsconfig.json src/modules/
# Install all dependencies (dev included -- needed for build)
RUN pnpm install --frozen-lockfile
# Copy source code
COPY src/shared/src/ src/shared/src/
COPY src/bastion/src/ src/bastion/src/
COPY src/cli/src/ src/cli/src/
COPY src/modules/src/ src/modules/src/
COPY src/modules/modules/ src/modules/modules/
# Build TypeScript
RUN pnpm build
# ── Stage 1b: Build iPXE snp.efi (uses UEFI SNP protocol for ISO boot) ──
FROM fedora:43 AS ipxe-builder
RUN dnf install -y git gcc make perl-interpreter xz-devel gcc-aarch64-linux-gnu && dnf clean all
RUN git clone --depth=1 https://github.com/ipxe/ipxe.git /tmp/ipxe
RUN cd /tmp/ipxe/src && make bin-x86_64-efi/snp.efi && \
make CROSS_COMPILE=aarch64-linux-gnu- bin-arm64-efi/snp.efi
# ── Stage 2: Production runtime (Fedora -- needs dnsmasq) ───────
FROM fedora:43
RUN dnf install -y \
dnsmasq \
ipxe-bootimgs-x86 \
ipxe-bootimgs-aarch64 \
iproute \
curl \
openssh-clients \
nodejs \
npm \
xorriso \
mtools \
&& dnf clean all
# iPXE snp.efi built from source (Fedora only ships snponly, which can't
# boot from CD-ROM/USB -- it requires PXE chainloading)
COPY --from=ipxe-builder /tmp/ipxe/src/bin-x86_64-efi/snp.efi /usr/share/ipxe/ipxe-snp-x86_64.efi
COPY --from=ipxe-builder /tmp/ipxe/src/bin-arm64-efi/snp.efi /usr/share/ipxe/arm64-efi/ipxe-snp.efi
# Install pnpm
RUN npm install -g pnpm@9
WORKDIR /app
# Copy workspace config and package manifests
COPY pnpm-workspace.yaml pnpm-lock.yaml package.json ./
COPY src/shared/package.json src/shared/
COPY src/bastion/package.json src/bastion/
COPY src/cli/package.json src/cli/
COPY src/modules/package.json src/modules/
# Install production dependencies
RUN pnpm install --frozen-lockfile --prod 2>/dev/null || pnpm install --prod
# Copy built output from builder
COPY --from=builder /app/src/shared/dist/ src/shared/dist/
COPY --from=builder /app/src/bastion/dist/ src/bastion/dist/
COPY --from=builder /app/src/cli/dist/ src/cli/dist/
COPY --from=builder /app/src/modules/dist/ src/modules/dist/
# Create data directories
RUN mkdir -p /data/state /data/tftp /data/http
ENV NODE_ENV=production
ENV BASTION_DIR=/data
ENV HTTP_PORT=8080
EXPOSE 8080/tcp
EXPOSE 67/udp
EXPOSE 69/udp
EXPOSE 4011/udp
ENTRYPOINT ["node", "src/cli/dist/index.js", "init", "bastion", "standalone", "start", "--foreground"]

73
bastion/Dockerfile.labd Normal file
View File

@@ -0,0 +1,73 @@
# Dockerfile.labd -- multi-stage build for the labd master daemon
# Runs the Fastify API server with Prisma/CockroachDB backend.
# ── Stage 1: Build ───────────────────────────────────────────────
FROM node:22-alpine AS builder
RUN corepack enable && corepack prepare pnpm@9.15.0 --activate
WORKDIR /app
# Copy workspace config and package manifests first (layer cache)
COPY pnpm-workspace.yaml pnpm-lock.yaml package.json tsconfig.base.json tsconfig.json ./
COPY src/shared/package.json src/shared/tsconfig.json src/shared/
COPY src/labd/package.json src/labd/tsconfig.json src/labd/
# Install all dependencies (dev included -- needed for build)
RUN pnpm install --frozen-lockfile
# Copy Prisma schema and generate client
COPY src/labd/prisma/ src/labd/prisma/
RUN pnpm --filter @lab/labd exec prisma generate
# Copy source code
COPY src/shared/src/ src/shared/src/
COPY src/labd/src/ src/labd/src/
# Build TypeScript (shared first via project references)
RUN pnpm --filter @lab/shared build && pnpm --filter @lab/labd build
# Hoist the generated Prisma client so stage 2 can COPY it from a stable path
RUN mkdir -p /app/_prisma && \
cp -r $(find /app/node_modules/.pnpm -path '*/.prisma/client' -type d | head -1) /app/_prisma/client
# ── Stage 2: Production runtime ─────────────────────────────────
FROM node:22-alpine
RUN corepack enable && corepack prepare pnpm@9.15.0 --activate
WORKDIR /app
# Copy workspace config and package manifests
COPY pnpm-workspace.yaml pnpm-lock.yaml package.json ./
COPY src/shared/package.json src/shared/
COPY src/labd/package.json src/labd/
# Install production dependencies only
RUN pnpm install --frozen-lockfile --prod 2>/dev/null || pnpm install --prod
# Copy built output from builder
COPY --from=builder /app/src/shared/dist/ src/shared/dist/
COPY --from=builder /app/src/labd/dist/ src/labd/dist/
# Copy Prisma schema + generated client into pnpm store location
# Prisma expects .prisma/client as a sibling of @prisma/ in the same node_modules
COPY --from=builder /app/src/labd/prisma/ src/labd/prisma/
COPY --from=builder /app/_prisma/client/ /tmp/_prisma_client/
RUN PRISMA_CLIENT_DIR=$(find /app/node_modules/.pnpm -path '*/@prisma/client' -type d | head -1) && \
NM_DIR="$(dirname "$(dirname "$PRISMA_CLIENT_DIR")")" && \
mkdir -p "$NM_DIR/.prisma/client" && \
cp -r /tmp/_prisma_client/* "$NM_DIR/.prisma/client/" && \
echo "Installed Prisma generated client at: $NM_DIR/.prisma/client/" && \
rm -rf /tmp/_prisma_client
ENV NODE_ENV=production
ENV DATABASE_URL=postgresql://root@cockroachdb:26257/labctl?sslmode=disable
ENV LABD_PORT=3100
ENV LABD_HOST=0.0.0.0
EXPOSE 3100
USER node
ENTRYPOINT ["node", "src/labd/dist/main.js"]

358
bastion/README.md Normal file
View File

@@ -0,0 +1,358 @@
# labctl
Infrastructure management platform for bare-metal servers, Kubernetes clusters, and cloud resources.
## Install
```bash
# From Gitea packages (Fedora/RHEL)
sudo dnf config-manager --add-repo https://mysources.co.uk/michal/-/packages/rpm/
sudo dnf install labctl
# From source
cd bastion && pnpm install && pnpm build
bun build src/cli/src/index.ts --compile --outfile dist/labctl
sudo cp dist/labctl /usr/bin/labctl
```
## Quick Start
```bash
# Start the bastion (PXE provisioning server)
sudo labctl init bastion standalone start
# PXE boot a machine — it gets discovered automatically
labctl provision list
# Install Fedora on a discovered machine
labctl provision install 78:55:36:08:35:14 labmaster --role infra
# Reprovision (SSH reboot into PXE, preserves /home /srv /var/lib/rancher)
labctl provision reprovision 78:55:36:08:35:14 labmaster --role infra
```
## Commands
### Bastion (PXE Provisioning)
```bash
# Lifecycle
sudo labctl init bastion standalone start # Start bastion (daemonized)
sudo labctl init bastion standalone start --foreground # Start in foreground
sudo labctl init bastion standalone stop # Stop bastion
labctl init bastion standalone status # Show status, PID, machine count
# Options
sudo labctl init bastion standalone start \
--port 8080 \
--dir /tmp/lab-bastion \
--domain ad.itaz.eu \
--dhcp-mode proxy \
--fedora 43 \
--timezone Europe/London
```
### Provisioning
```bash
# List all machines (discovered, queued, installing, installed)
labctl provision list
# Queue a machine for Fedora install
labctl provision install <mac> <hostname> --role worker # k3s worker (gets longhorn)
labctl provision install <mac> <hostname> --role infra # infra node (gets k3s server + /var/lib/rancher)
# Reprovision — queues install, SSHes in, sets PXE boot, reboots
labctl provision reprovision <mac> <hostname> --role infra
# Remove a machine from state
labctl provision forget <mac>
# Options
labctl provision install <mac> <hostname> \
--role worker \
--disk nvme0n1 \
--port 8080
```
### Server Management (planned)
```bash
# List servers with filters
labctl get servers
labctl get servers --env production
labctl get servers --cloud baremetal
labctl get servers --cloud aws
labctl get servers --label role=k3s-worker
labctl get servers --label asg=web-servers
# Detailed server info
labctl describe server/puppet
labctl describe server/ser9
```
### Remote Execution (planned)
```bash
# Execute commands on servers (audited, RBAC-checked)
labctl exec server/puppet -- whoami
labctl exec server/puppet -- systemctl status k3s
labctl exec server/puppet -it -- bash # interactive TTY
labctl exec server/puppet --timeout 30s -- long-running-task
```
### Kubernetes (planned)
```bash
# Proxied kubectl — audited, RBAC-checked, no kubeconfig needed
labctl kubectl --cluster lab get pods
labctl kubectl --cluster lab get nodes
labctl kubectl --cluster lab logs pod/nginx -f
labctl kubectl --cluster lab exec pod/nginx -- bash
labctl kubectl --cluster lab apply -f deployment.yaml
labctl kubectl --cluster aws-prod get pods --namespace app
# Cluster management
labctl clusters add lab --kubeconfig ~/.kube/config
labctl clusters list
labctl clusters remove staging
```
### Logs (planned)
```bash
# Server logs (journalctl passthrough via agent)
labctl logs server/puppet # all journal
labctl logs server/puppet -f # follow (live stream)
labctl logs server/puppet -n 100 # last 100 lines
labctl logs server/puppet -u k3s # specific unit
labctl logs server/puppet -u sshd --since "1h ago" # time range
labctl logs server/puppet --since "2026-03-17" --until "2026-03-18"
labctl logs server/puppet -k # kernel only
labctl logs server/puppet -p err # errors only
labctl logs server/puppet --file /var/log/nginx/error.log # tail a file
labctl logs server/puppet --file /var/log/nginx/error.log -n 50
# App logs (k8s pod logs)
labctl logs app/bastion
labctl logs app/bastion -f
labctl logs app/labd --container postgres
# Pulumi execution logs
labctl logs pulumi/run-abc123
labctl logs pulumi/run-abc123 -f # follow active run
# Bastion logs
labctl logs bastion/lab
labctl logs bastion/lab --mac 78:55:36:08:35:14 # specific machine's install
# Agent daemon logs
labctl logs agent/puppet
# Audit logs
labctl logs audit
labctl logs audit --user michal
labctl logs audit --user michal --since "1h ago"
labctl logs audit/michal-20260317-abc123 # specific session
labctl logs audit --action kubectl --cluster lab
labctl logs audit --action exec --server puppet
```
### Apps (planned, replaces Helm)
```bash
# Install Pulumi-based apps to Kubernetes
labctl apps list # available apps
labctl apps install bastion # deploy bastion
labctl apps install bastion --set port=8080 # with overrides
labctl apps install bastion -f values.yaml # from values file
labctl apps install monitoring # Prometheus + Grafana
# Manage deployed apps
labctl apps status bastion # health, version, config
labctl apps upgrade bastion # rolling upgrade
labctl apps history bastion # version history
labctl apps rollback bastion 2 # rollback to version 2
labctl apps uninstall bastion
```
### Infrastructure as Code (planned)
```bash
# Execute Pulumi programs via labd (RBAC-checked)
labctl apply -f infra/k3s-cluster.ts --env lab
labctl plan -f infra/k3s-cluster.ts --env lab # dry run
labctl destroy -f infra/k3s-cluster.ts --env lab
```
### RBAC (planned)
```bash
# Roles and permissions
labctl get roles
labctl get users
labctl create role viewer --allow "read:*:*:*"
labctl create role lab-admin --allow "*:baremetal:lab:*" --deny "destroy:*:*:*"
labctl bind role lab-admin --user michal
labctl unbind role lab-admin --user michal
# Permission model: action:cloud:environment:server
# read:*:*:* — read everything
# exec:baremetal:lab:* — exec on any lab server
# kubectl:*:*:* — kubectl on any cluster
# *:baremetal:lab:puppet — full access to puppet only
# manage:*:*:* — manage apps, clusters, tokens
```
### Environments and Clouds (planned)
```bash
labctl get environments
labctl get clouds
labctl create environment staging --cloud aws
labctl create environment lab --cloud baremetal
```
## Partition Layout
Machines installed by the bastion get this LVM layout:
### Worker role (k3s worker with Longhorn)
```
/boot/efi 600MB EFI
/boot 3GB ext4
── LVM VG: labvg ──
swap 27GB (matches RAM)
/ 33GB xfs
/var 100GB xfs
/var/log 10GB xfs
/home 10GB xfs ← preserved on reprovision
/srv 20GB xfs ← preserved on reprovision
/tmp tmpfs 4GB
/var/lib/longhorn rest xfs ← preserved on reprovision (Longhorn PVC storage)
```
### Infra role (k3s server, labmaster)
```
/boot/efi 600MB EFI
/boot 3GB ext4
── LVM VG: labvg ──
swap 27GB (matches RAM)
/ 33GB xfs
/var 100GB xfs
/var/log 10GB xfs
/home 10GB xfs ← preserved on reprovision
/srv 20GB xfs ← preserved on reprovision
/var/lib/rancher 20GB xfs ← preserved on reprovision (k3s etcd data)
/tmp tmpfs 4GB
```
On reprovision, OS partitions (`/`, `/var`, `/var/log`, `swap`) are wiped. Data partitions (`/home`, `/srv`, `/var/lib/longhorn`, `/var/lib/rancher`) are preserved.
## Architecture
```
┌──────────────────────────────────────────────────────────────┐
│ labctl CLI │
│ init | provision | get | exec | logs | apply | apps | kubectl│
└───────────────────────────┬──────────────────────────────────┘
│ mTLS
┌──────────────────────────────────────────────────────────────┐
│ labd (master daemon — stateless, on k3s) │
│ ┌─────┐ ┌──────┐ ┌──────┐ ┌────────┐ ┌──────┐ ┌────────┐ │
│ │ CA │ │ RBAC │ │ Logs │ │ Pulumi │ │ Apps │ │kubectl │ │
│ │ │ │ │ │relay │ │executor│ │ │ │ proxy │ │
│ └─────┘ └──────┘ └──────┘ └────────┘ └──────┘ └────────┘ │
│ CockroachDB │
└──────────────┬─────────────────────────┬─────────────────────┘
│ mTLS │ mTLS
┌──────────▼───────────┐ ┌──────────▼───────────┐
│ lab-agent │ │ lab-agent │
│ bare-metal server │ │ AWS EC2 / cloud VM │
│ ┌────────────────┐ │ │ ┌────────────────┐ │
│ │ heartbeat │ │ │ │ heartbeat │ │
│ │ exec handler │ │ │ │ exec handler │ │
│ │ log streamer │ │ │ │ log streamer │ │
│ │ module runner │ │ │ │ module runner │ │
│ └────────────────┘ │ │ └────────────────┘ │
└──────────────────────┘ └──────────────────────┘
```
## Technology Stack
| Component | Technology |
|-----------|-----------|
| Language | TypeScript (ESM) |
| CLI | Commander.js |
| HTTP Server | Fastify + WebSocket |
| Database | CockroachDB (PostgreSQL compatible) |
| ORM | Prisma |
| IaC | Pulumi (TypeScript) |
| k8s CNI | Cilium |
| Auth | mTLS (built-in CA) |
| Packaging | nfpm (RPM/DEB), bun compile |
| Containers | Podman + podman-compose |
| CI/CD | Gitea Actions |
| Testing | Vitest |
## Development
```bash
cd bastion
# Install dependencies
pnpm install
# Build all packages
pnpm build
# Run tests (30 tests)
pnpm test:run
# Type check
pnpm typecheck
# Lint
pnpm lint
# Generate shell completions
pnpm completions:generate
# Build standalone binary
bun build src/cli/src/index.ts --compile --outfile dist/labctl
# Build RPM/DEB packages (both architectures)
bash scripts/build-rpm.sh --all
# Build Docker image
bash scripts/build-bastion.sh
# Full release (build + publish + install)
bash scripts/release.sh
```
## Project Structure
```
bastion/
├── src/
│ ├── shared/ # @lab/shared — types, constants
│ ├── bastion/ # @lab/bastion — PXE provisioning server
│ ├── cli/ # @lab/cli — CLI binary (labctl)
│ ├── labd/ # @lab/labd — master daemon (planned)
│ └── agent/ # @lab/agent — server agent (planned)
├── modules/ # Built-in configuration modules (planned)
├── deploy/
│ └── k3s/ # Kubernetes manifests
├── stack/
│ ├── Dockerfile
│ └── docker-compose.yml
├── scripts/ # Build, publish, release scripts
├── completions/ # Generated shell completions
└── ARCHITECTURE.md
```
## License
MIT

View File

@@ -0,0 +1,121 @@
# labctl bash completions -- auto-generated by scripts/generate-completions.ts
# DO NOT EDIT MANUALLY -- run: pnpm completions:generate
_labctl() {
local cur prev words cword
_init_completion || return
local top_commands="version init provision config login doctor app roles"
# Extract the subcommand chain (skip options and their values)
local -a subcmd_chain=()
local i skip_next=false
for ((i=1; i < cword; i++)); do
if $skip_next; then skip_next=false; continue; fi
case "${words[i]}" in
-*) ;; # skip options
*) subcmd_chain+=("${words[i]}") ;;
esac
done
local chain_len=${#subcmd_chain[@]}
local chain_str="${subcmd_chain[*]}"
case "$chain_str" in
"init bastion standalone start")
COMPREPLY=($(compgen -W "--port --dir --domain --dhcp-mode --fedora --arch --timezone --locale --skip-dnsmasq --skip-artifacts --foreground -h --help" -- "$cur"))
return ;;
"init bastion standalone stop")
COMPREPLY=($(compgen -W "--dir -h --help" -- "$cur"))
return ;;
"init bastion standalone status")
COMPREPLY=($(compgen -W "--dir --port -h --help" -- "$cur"))
return ;;
"init bastion standalone")
COMPREPLY=($(compgen -W "start stop status -h --help" -- "$cur"))
return ;;
"app labcontroller deploy")
COMPREPLY=($(compgen -W "--user --port --crdb-replicas -h --help" -- "$cur"))
return ;;
"app labcontroller status")
COMPREPLY=($(compgen -W "--user --port -h --help" -- "$cur"))
return ;;
"app k3s install")
COMPREPLY=($(compgen -W "--role --user --port --k3s-server --k3s-token -h --help" -- "$cur"))
return ;;
"app k3s health")
COMPREPLY=($(compgen -W "--user --port -h --help" -- "$cur"))
return ;;
"app k3s list")
COMPREPLY=($(compgen -W "--user --port -h --help" -- "$cur"))
return ;;
"init bastion")
COMPREPLY=($(compgen -W "standalone -h --help" -- "$cur"))
return ;;
"provision list")
COMPREPLY=($(compgen -W "--port -h --help" -- "$cur"))
return ;;
"provision install")
COMPREPLY=($(compgen -W "--role --os --disk --port -h --help" -- "$cur"))
return ;;
"provision reprovision")
COMPREPLY=($(compgen -W "--role --os --disk --port -h --help" -- "$cur"))
return ;;
"provision forget")
COMPREPLY=($(compgen -W "--port -h --help" -- "$cur"))
return ;;
"provision logs")
COMPREPLY=($(compgen -W "-f --follow --port -h --help" -- "$cur"))
return ;;
"config list")
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
return ;;
"config get")
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
return ;;
"config set")
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
return ;;
"config path")
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
return ;;
"app labcontroller")
COMPREPLY=($(compgen -W "deploy status -h --help" -- "$cur"))
return ;;
"app k3s")
COMPREPLY=($(compgen -W "install health list -h --help" -- "$cur"))
return ;;
"version")
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
return ;;
"init")
COMPREPLY=($(compgen -W "bastion -h --help" -- "$cur"))
return ;;
"provision")
COMPREPLY=($(compgen -W "list install reprovision forget logs -h --help" -- "$cur"))
return ;;
"config")
COMPREPLY=($(compgen -W "list get set path -h --help" -- "$cur"))
return ;;
"login")
COMPREPLY=($(compgen -W "--server -h --help" -- "$cur"))
return ;;
"doctor")
COMPREPLY=($(compgen -W "--json -h --help" -- "$cur"))
return ;;
"app")
COMPREPLY=($(compgen -W "labcontroller k3s -h --help" -- "$cur"))
return ;;
"roles")
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
return ;;
"")
COMPREPLY=($(compgen -W "$top_commands -h --help -v --version" -- "$cur"))
return ;;
*)
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
return ;;
esac
}
complete -F _labctl labctl

View File

@@ -0,0 +1,202 @@
# labctl fish completions -- auto-generated by scripts/generate-completions.ts
# DO NOT EDIT MANUALLY -- run: pnpm completions:generate
complete -c labctl -e
complete -c labctl -f
# Global options
complete -c labctl -s v -l version -d 'Show version'
complete -c labctl -s h -l help -d 'Show help'
# Helper: test if exactly a subcommand chain is active (no extra positional args)
function __labctl_using_cmd
set -l tokens (commandline -opc)
set -l expected $argv
set -l depth (count $expected)
set -l found 0
set -l i 1
for tok in $tokens[2..]
if string match -q -- "-*" $tok
continue
end
set i (math $i + 1)
set -l idx (math $i - 1)
if test $idx -le $depth
if test "$tok" != "$expected[$idx]"
return 1
end
set found (math $found + 1)
else
return 1
end
end
test $found -eq $depth
end
# Helper: test if command starts with a subcommand chain (options still apply after args)
function __labctl_in_cmd
set -l tokens (commandline -opc)
set -l expected $argv
set -l depth (count $expected)
set -l found 0
for tok in $tokens[2..]
if string match -q -- "-*" $tok
continue
end
set found (math $found + 1)
if test $found -le $depth
if test "$tok" != "$expected[$found]"
return 1
end
end
end
test $found -ge $depth
end
# Dynamic: fetch machine hostnames from bastion (installed + queued)
function __labctl_installed_hosts
curl -s http://localhost:8080/api/machines 2>/dev/null |
python3 -c 'import sys,json; d=json.load(sys.stdin); hosts=[v.get("hostname","") for v in {**d.get("install_queue",{}), **d.get("installed",{})}.values() if v.get("hostname")]; [print(h) for h in set(hosts)]' 2>/dev/null
end
# Dynamic: fetch all known MAC addresses (discovered + queue + installed)
function __labctl_known_macs
curl -s http://localhost:8080/api/machines 2>/dev/null |
python3 -c 'import sys,json; d=json.load(sys.stdin); [print(k) for k in {**d.get("discovered",{}), **d.get("install_queue",{}), **d.get("installed",{})}]' 2>/dev/null
end
# Dynamic: fetch hostnames and MACs from all states
function __labctl_hosts_and_macs
curl -s http://localhost:8080/api/machines 2>/dev/null |
python3 -c 'import sys,json; d=json.load(sys.stdin); a={**d.get("discovered",{}), **d.get("install_queue",{}), **d.get("installed",{})}; macs=list(a.keys()); hosts=[v.get("hostname","") for v in {**d.get("install_queue",{}), **d.get("installed",{})}.values() if v.get("hostname")]; [print(x) for x in set(macs+hosts)]' 2>/dev/null
end
# Target argument completions
complete -c labctl -n "__labctl_using_cmd app k3s install" -a "(__labctl_installed_hosts)" -d 'installed host'
complete -c labctl -n "__labctl_using_cmd app k3s health" -a "(__labctl_installed_hosts)" -d 'installed host'
complete -c labctl -n "__labctl_using_cmd app labcontroller deploy" -a "(__labctl_installed_hosts)" -d 'installed host'
complete -c labctl -n "__labctl_using_cmd app labcontroller status" -a "(__labctl_installed_hosts)" -d 'installed host'
complete -c labctl -n "__labctl_using_cmd provision install" -a "(__labctl_known_macs)" -d 'MAC address'
complete -c labctl -n "__labctl_using_cmd provision reprovision" -a "(__labctl_hosts_and_macs)" -d 'host or MAC'
complete -c labctl -n "__labctl_using_cmd provision forget" -a "(__labctl_hosts_and_macs)" -d 'host or MAC'
complete -c labctl -n "__labctl_using_cmd provision logs" -a "(__labctl_hosts_and_macs)" -d 'host or MAC'
# Top-level commands
complete -c labctl -n "not __fish_seen_subcommand_from version init provision config login doctor app roles" -a version -d 'Show version information'
complete -c labctl -n "not __fish_seen_subcommand_from version init provision config login doctor app roles" -a init -d 'Initialise infrastructure components'
complete -c labctl -n "not __fish_seen_subcommand_from version init provision config login doctor app roles" -a provision -d 'Machine provisioning operations'
complete -c labctl -n "not __fish_seen_subcommand_from version init provision config login doctor app roles" -a config -d 'View and modify CLI configuration'
complete -c labctl -n "not __fish_seen_subcommand_from version init provision config login doctor app roles" -a login -d 'Authenticate with labd and obtain client certificate'
complete -c labctl -n "not __fish_seen_subcommand_from version init provision config login doctor app roles" -a doctor -d 'Diagnose configuration and connectivity issues'
complete -c labctl -n "not __fish_seen_subcommand_from version init provision config login doctor app roles" -a app -d 'Application management'
complete -c labctl -n "not __fish_seen_subcommand_from version init provision config login doctor app roles" -a roles -d 'List available machine roles'
# init subcommands
complete -c labctl -n "__labctl_using_cmd init" -a bastion -d 'Bastion PXE server management'
# init bastion subcommands
complete -c labctl -n "__labctl_using_cmd init bastion" -a standalone -d 'Standalone bastion server lifecycle'
# init bastion standalone subcommands
complete -c labctl -n "__labctl_using_cmd init bastion standalone" -a start -d 'Start the bastion server (HTTP + dnsmasq PXE)'
complete -c labctl -n "__labctl_using_cmd init bastion standalone" -a stop -d 'Stop a running bastion server'
complete -c labctl -n "__labctl_using_cmd init bastion standalone" -a status -d 'Show bastion server status'
# init bastion standalone start options
complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l port -d 'HTTP port' -x
complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l dir -d 'Bastion data directory' -x
complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l domain -d 'Internal domain for hostnames' -x
complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l dhcp-mode -d 'DHCP mode: proxy or full' -x
complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l fedora -d 'Fedora version' -x
complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l arch -d 'Architecture' -x
complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l timezone -d 'Timezone' -x
complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l locale -d 'Locale' -x
complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l skip-dnsmasq -d 'Skip starting dnsmasq (for testing)'
complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l skip-artifacts -d 'Skip downloading boot artifacts (for testing)'
complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l foreground -d 'Run in foreground (default: daemonize)'
# init bastion standalone stop options
complete -c labctl -n "__labctl_in_cmd init bastion standalone stop" -l dir -d 'Bastion data directory' -x
# init bastion standalone status options
complete -c labctl -n "__labctl_in_cmd init bastion standalone status" -l dir -d 'Bastion data directory' -x
complete -c labctl -n "__labctl_in_cmd init bastion standalone status" -l port -d 'Bastion HTTP port' -x
# provision subcommands
complete -c labctl -n "__labctl_using_cmd provision" -a list -d 'List all known machines'
complete -c labctl -n "__labctl_using_cmd provision" -a install -d 'Queue a discovered machine for OS installation'
complete -c labctl -n "__labctl_using_cmd provision" -a reprovision -d 'Queue install + SSH reboot into PXE (target: hostname, MAC, or IP)'
complete -c labctl -n "__labctl_using_cmd provision" -a forget -d 'Remove a machine from bastion state'
complete -c labctl -n "__labctl_using_cmd provision" -a logs -d 'Show provisioning logs for a machine (hostname, MAC, or IP)'
# provision list options
complete -c labctl -n "__labctl_in_cmd provision list" -l port -d 'Bastion HTTP port' -x
# provision install options
complete -c labctl -n "__labctl_in_cmd provision install" -l role -d 'Machine role (see below)' -xa 'vanilla worker infra labcontroller'
complete -c labctl -n "__labctl_in_cmd provision install" -l os -d 'Operating system' -xa 'fedora-43 ubuntu-26.04'
complete -c labctl -n "__labctl_in_cmd provision install" -l disk -d 'Target disk device (auto-detect if omitted)' -x
complete -c labctl -n "__labctl_in_cmd provision install" -l port -d 'Bastion HTTP port' -x
# provision reprovision options
complete -c labctl -n "__labctl_in_cmd provision reprovision" -l role -d 'Machine role (see below)' -xa 'vanilla worker infra labcontroller'
complete -c labctl -n "__labctl_in_cmd provision reprovision" -l os -d 'Operating system' -xa 'fedora-43 ubuntu-26.04'
complete -c labctl -n "__labctl_in_cmd provision reprovision" -l disk -d 'Target disk device (auto-detect if omitted)' -x
complete -c labctl -n "__labctl_in_cmd provision reprovision" -l port -d 'Bastion HTTP port' -x
# provision forget options
complete -c labctl -n "__labctl_in_cmd provision forget" -l port -d 'Bastion HTTP port' -x
# provision logs options
complete -c labctl -n "__labctl_in_cmd provision logs" -s f -l follow -d 'Follow logs in real-time (SSE stream)'
complete -c labctl -n "__labctl_in_cmd provision logs" -l port -d 'Bastion HTTP port' -x
# config subcommands
complete -c labctl -n "__labctl_using_cmd config" -a list -d 'Show all configuration values'
complete -c labctl -n "__labctl_using_cmd config" -a get -d 'Get a configuration value'
complete -c labctl -n "__labctl_using_cmd config" -a set -d 'Set a configuration value'
complete -c labctl -n "__labctl_using_cmd config" -a path -d 'Show configuration file path'
# login options
complete -c labctl -n "__labctl_in_cmd login" -l server -d 'labd server URL' -x
# doctor options
complete -c labctl -n "__labctl_in_cmd doctor" -l json -d 'Output results as JSON'
# app subcommands
complete -c labctl -n "__labctl_using_cmd app" -a labcontroller -d 'Labcontroller deployment (bastion + labd + CockroachDB)'
complete -c labctl -n "__labctl_using_cmd app" -a k3s -d 'k3s cluster management'
# app labcontroller subcommands
complete -c labctl -n "__labctl_using_cmd app labcontroller" -a deploy -d 'Deploy labcontroller stack to a k3s node'
complete -c labctl -n "__labctl_using_cmd app labcontroller" -a status -d 'Check labcontroller deployment status (all hosts if no target)'
# app labcontroller deploy options
complete -c labctl -n "__labctl_in_cmd app labcontroller deploy" -l user -d 'SSH user' -x
complete -c labctl -n "__labctl_in_cmd app labcontroller deploy" -l port -d 'Bastion HTTP port' -x
complete -c labctl -n "__labctl_in_cmd app labcontroller deploy" -l crdb-replicas -d 'CockroachDB replicas' -x
# app labcontroller status options
complete -c labctl -n "__labctl_in_cmd app labcontroller status" -l user -d 'SSH user' -x
complete -c labctl -n "__labctl_in_cmd app labcontroller status" -l port -d 'Bastion HTTP port' -x
# app k3s subcommands
complete -c labctl -n "__labctl_using_cmd app k3s" -a install -d 'Install k3s on a target machine (hostname, IP, or MAC)'
complete -c labctl -n "__labctl_using_cmd app k3s" -a health -d 'Check k3s health (all hosts if no target given)'
complete -c labctl -n "__labctl_using_cmd app k3s" -a list -d 'List installed machines and their k3s status'
# app k3s install options
complete -c labctl -n "__labctl_in_cmd app k3s install" -l role -d 'k3s role: infra (server) or worker (agent)' -x
complete -c labctl -n "__labctl_in_cmd app k3s install" -l user -d 'SSH user' -x
complete -c labctl -n "__labctl_in_cmd app k3s install" -l port -d 'Bastion HTTP port (for resolving target)' -x
complete -c labctl -n "__labctl_in_cmd app k3s install" -l k3s-server -d 'k3s server URL (required for worker role)' -x
complete -c labctl -n "__labctl_in_cmd app k3s install" -l k3s-token -d 'k3s join token (required for worker role)' -x
# app k3s health options
complete -c labctl -n "__labctl_in_cmd app k3s health" -l user -d 'SSH user' -x
complete -c labctl -n "__labctl_in_cmd app k3s health" -l port -d 'Bastion HTTP port' -x
# app k3s list options
complete -c labctl -n "__labctl_in_cmd app k3s list" -l user -d 'SSH user' -x
complete -c labctl -n "__labctl_in_cmd app k3s list" -l port -d 'Bastion HTTP port' -x

View File

@@ -0,0 +1,13 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: bastion-config
namespace: lab-infra
data:
HTTP_PORT: "8080"
DOMAIN: "ad.itaz.eu"
FEDORA_VERSION: "43"
DHCP_MODE: "proxy"
TIMEZONE: "Europe/London"
LOCALE: "en_GB.UTF-8"
LABD_URL: "http://labd.lab-system.svc.cluster.local:3100"

View File

@@ -0,0 +1,86 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: bastion
namespace: lab-infra
labels:
app: bastion
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app: bastion
template:
metadata:
labels:
app: bastion
spec:
imagePullSecrets:
- name: gitea-registry
hostNetwork: true
dnsPolicy: ClusterFirstWithHostNet
dnsConfig:
options:
- name: ndots
value: "1"
containers:
- name: bastion
image: mysources.co.uk/michal/lab/bastion:latest
imagePullPolicy: Always
command:
- node
- src/cli/dist/index.js
- init
- bastion
- standalone
- start
- --foreground
envFrom:
- configMapRef:
name: bastion-config
env:
- name: BASTION_JOIN_TOKEN
valueFrom:
secretKeyRef:
name: bastion-join-token
key: token
ports:
- containerPort: 8080
name: http
volumeMounts:
- name: state
mountPath: /data
- name: ssh-keys
mountPath: /root/.ssh
readOnly: true
securityContext:
capabilities:
add:
- NET_ADMIN
- NET_RAW
startupProbe:
httpGet:
path: /api/machines
port: 8080
failureThreshold: 60
periodSeconds: 10
livenessProbe:
httpGet:
path: /api/machines
port: 8080
periodSeconds: 30
readinessProbe:
httpGet:
path: /api/machines
port: 8080
periodSeconds: 10
volumes:
- name: state
persistentVolumeClaim:
claimName: bastion-state
- name: ssh-keys
hostPath:
path: /root/.ssh
type: Directory

View File

@@ -0,0 +1,7 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- namespace.yaml
- configmap.yaml
- pvc.yaml
- deployment.yaml

View File

@@ -0,0 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: lab-infra

View File

@@ -0,0 +1,12 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: bastion-state
namespace: lab-infra
spec:
accessModes:
- ReadWriteOnce
storageClassName: local-path
resources:
requests:
storage: 10Gi

View File

@@ -0,0 +1,8 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: labd-config
data:
LABD_PORT: "3100"
LABD_HOST: "0.0.0.0"
LABD_LOG_LEVEL: "info"

View File

@@ -0,0 +1,44 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: labd
spec:
replicas: 1
selector:
matchLabels:
app: labd
template:
metadata:
labels:
app: labd
spec:
containers:
- name: labd
image: mysources.co.uk/michal/lab/labd:latest
imagePullPolicy: Always
ports:
- containerPort: 3100
envFrom:
- configMapRef:
name: labd-config
- secretRef:
name: labd-secrets
livenessProbe:
httpGet:
path: /health/live
port: 3100
initialDelaySeconds: 10
periodSeconds: 15
readinessProbe:
httpGet:
path: /health/ready
port: 3100
initialDelaySeconds: 5
periodSeconds: 10
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 500m
memory: 512Mi

View File

@@ -0,0 +1,18 @@
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: labd
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: labd
minReplicas: 2
maxReplicas: 10
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70

View File

@@ -0,0 +1,14 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: lab-infra
commonLabels:
app: labd
resources:
- deployment.yaml
- service.yaml
- configmap.yaml
- hpa.yaml
- pdb.yaml

View File

@@ -0,0 +1,9 @@
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: labd
spec:
maxUnavailable: 1
selector:
matchLabels:
app: labd

View File

@@ -0,0 +1,12 @@
apiVersion: v1
kind: Service
metadata:
name: labd
spec:
type: ClusterIP
selector:
app: labd
ports:
- port: 3100
targetPort: 3100
protocol: TCP

26
bastion/eslint.config.js Normal file
View File

@@ -0,0 +1,26 @@
import tseslint from '@typescript-eslint/eslint-plugin';
import tsparser from '@typescript-eslint/parser';
export default [
{
files: ['src/*/src/**/*.ts'],
languageOptions: {
parser: tsparser,
parserOptions: {
project: ['./src/*/tsconfig.json'],
tsconfigRootDir: import.meta.dirname,
},
},
plugins: { '@typescript-eslint': tseslint },
rules: {
'@typescript-eslint/explicit-function-return-type': 'error',
'@typescript-eslint/no-explicit-any': 'error',
'@typescript-eslint/no-unused-vars': 'error',
'@typescript-eslint/strict-boolean-expressions': 'error',
'no-console': ['warn', { allow: ['warn', 'error'] }],
},
},
{
ignores: ['**/dist/**', '**/node_modules/**', '**/*.config.*'],
},
];

20
bastion/nfpm.yaml Normal file
View File

@@ -0,0 +1,20 @@
name: labctl
arch: amd64
version: 0.1.0
release: "1"
maintainer: michal
description: Lab infrastructure CLI for bare-metal provisioning
license: MIT
contents:
- src: ./dist/labctl
dst: /usr/bin/labctl
file_info:
mode: 0755
- src: ./completions/labctl.bash
dst: /usr/share/bash-completion/completions/labctl
file_info:
mode: 0644
- src: ./completions/labctl.fish
dst: /usr/share/fish/vendor_completions.d/labctl.fish
file_info:
mode: 0644

43
bastion/package.json Normal file
View File

@@ -0,0 +1,43 @@
{
"name": "lab",
"version": "0.1.0",
"private": true,
"description": "PXE bastion server for discover-first bare-metal provisioning",
"type": "module",
"scripts": {
"build": "pnpm -r run build",
"test": "vitest",
"test:run": "vitest run",
"typecheck": "tsc --build",
"clean": "pnpm -r run clean && rimraf node_modules",
"lint": "eslint 'src/*/src/**/*.ts'",
"lint:fix": "eslint 'src/*/src/**/*.ts' --fix",
"completions:generate": "tsx scripts/generate-completions.ts --write",
"completions:check": "tsx scripts/generate-completions.ts --check",
"test:integration": "vitest run -c tests/integration/vitest.config.ts",
"test:integration:k3s": "vitest run -c tests/integration/vitest.config.ts -t k3s",
"test:integration:k3s:host": "sudo -E $(which npx) vitest run -c tests/integration/vitest.config.ts -t k3s",
"test:integration:pxe": "vitest run -c tests/integration/vitest.config.ts -t 'PXE boot'",
"test:integration:pxe:host": "sudo -E $(which npx) vitest run -c tests/integration/vitest.config.ts -t 'PXE boot'",
"test:integration:iso": "vitest run -c tests/integration/vitest.config.ts -t 'ISO boot'",
"test:integration:iso:host": "sudo -E $(which npx) vitest run -c tests/integration/vitest.config.ts -t 'ISO boot'",
"test:integration:arm-iso": "vitest run -c tests/integration/vitest.config.ts -t 'ARM ISO'",
"test:integration:arm-iso:host": "sudo -E $(which npx) vitest run -c tests/integration/vitest.config.ts -t 'ARM ISO'"
},
"engines": {
"node": ">=20.0.0",
"pnpm": ">=9.0.0"
},
"packageManager": "pnpm@9.15.0",
"devDependencies": {
"@types/node": "^22.10.0",
"@typescript-eslint/eslint-plugin": "^8.57.1",
"@typescript-eslint/parser": "^8.57.1",
"eslint": "^10.0.3",
"eslint-config-prettier": "^10.1.8",
"rimraf": "^6.0.0",
"tsx": "^4.21.0",
"typescript": "^5.7.0",
"vitest": "^3.0.0"
}
}

3646
bastion/pnpm-lock.yaml generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,2 @@
packages:
- "src/*"

127
bastion/scripts/build-bastion.sh Executable file
View File

@@ -0,0 +1,127 @@
#!/bin/bash
# Build bastion container image (multi-arch) and push to Gitea container registry
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
cd "$PROJECT_ROOT"
# Load .env for GITEA_TOKEN
if [ -f .env ]; then
set -a; source .env; set +a
fi
# ── Argument parsing ───────────────────────────────────────────────
PUSH=false
PLATFORMS="linux/amd64,linux/arm64"
usage() {
cat <<EOF
Usage: $(basename "$0") [OPTIONS] [TAG]
Build bastion container image (multi-arch) and optionally push to registry.
Options:
--push Push to registry after building
--platforms LIST Comma-separated platforms (default: linux/amd64,linux/arm64)
-h, --help Show this help message
Arguments:
TAG Image tag (default: version from package.json)
Examples:
$(basename "$0") # build multi-arch, no push
$(basename "$0") --push # build + push with version tag
$(basename "$0") --push latest # build + push as :latest
$(basename "$0") --platforms linux/amd64 # build amd64 only
EOF
exit 0
}
POSITIONAL_ARGS=()
while [[ $# -gt 0 ]]; do
case "$1" in
--push)
PUSH=true
shift
;;
--platforms)
PLATFORMS="$2"
shift 2
;;
-h|--help)
usage
;;
*)
POSITIONAL_ARGS+=("$1")
shift
;;
esac
done
REGISTRY="${GITEA_REGISTRY:-mysources.co.uk}"
REPO="michal/lab/bastion"
FULL_IMAGE="$REGISTRY/$REPO"
VERSION=$(node -p "require('./package.json').version")
TAG="${POSITIONAL_ARGS[0]:-$VERSION}"
echo "==> Building bastion image"
echo " Tag: $TAG"
echo " Platforms: $PLATFORMS"
echo " Registry: $FULL_IMAGE"
# ── Build multi-arch manifest ────────────────────────────────────
MANIFEST="lab-bastion:$TAG"
# Remove existing manifest/image with the same tag
podman manifest rm "$MANIFEST" 2>/dev/null || true
podman rmi "$MANIFEST" 2>/dev/null || true
echo "==> Building for platforms: $PLATFORMS..."
podman build \
--platform "$PLATFORMS" \
--manifest "$MANIFEST" \
-f Dockerfile.bastion \
.
echo "==> Build complete. Manifest:"
podman manifest inspect "$MANIFEST" | grep -E '"(architecture|os)"'
# ── Push ─────────────────────────────────────────────────────────
if [ "$PUSH" = true ]; then
if [ -z "$GITEA_TOKEN" ]; then
# Try reading from ~/.gitea-token
if [ -f "$HOME/.gitea-token" ]; then
GITEA_TOKEN="$(cat "$HOME/.gitea-token")"
else
echo "ERROR: GITEA_TOKEN not set and ~/.gitea-token not found"
exit 1
fi
fi
echo "==> Logging in to $REGISTRY..."
podman login -u michal -p "$GITEA_TOKEN" "$REGISTRY"
echo "==> Pushing $FULL_IMAGE:$TAG..."
podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:$TAG"
# Also tag as :latest if not already
if [ "$TAG" != "latest" ]; then
echo "==> Also pushing as :latest..."
podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:latest"
fi
# Link package to repository if script exists
if [ -f "$SCRIPT_DIR/link-package.sh" ]; then
source "$SCRIPT_DIR/link-package.sh"
link_package "container" "bastion"
fi
echo "==> Pushed successfully!"
else
echo "==> Skipping push (use --push to push to registry)"
fi
echo "==> Done!"
echo " Image: $FULL_IMAGE:$TAG"
echo " Platforms: $PLATFORMS"

118
bastion/scripts/build-labd.sh Executable file
View File

@@ -0,0 +1,118 @@
#!/bin/bash
# Build labd container image (multi-arch) and push to Gitea container registry
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
cd "$PROJECT_ROOT"
# Load .env for GITEA_TOKEN
if [ -f .env ]; then
set -a; source .env; set +a
fi
# ── Argument parsing ───────────────────────────────────────────────
PUSH=false
PLATFORMS="linux/amd64,linux/arm64"
usage() {
cat <<EOF
Usage: $(basename "$0") [OPTIONS] [TAG]
Build labd container image (multi-arch) and optionally push to registry.
Options:
--push Push to registry after building
--platforms LIST Comma-separated platforms (default: linux/amd64,linux/arm64)
-h, --help Show this help message
Arguments:
TAG Image tag (default: version from package.json)
EOF
exit 0
}
POSITIONAL_ARGS=()
while [[ $# -gt 0 ]]; do
case "$1" in
--push)
PUSH=true
shift
;;
--platforms)
PLATFORMS="$2"
shift 2
;;
-h|--help)
usage
;;
*)
POSITIONAL_ARGS+=("$1")
shift
;;
esac
done
REGISTRY="${GITEA_REGISTRY:-mysources.co.uk}"
REPO="michal/lab/labd"
FULL_IMAGE="$REGISTRY/$REPO"
VERSION=$(node -p "require('./package.json').version")
TAG="${POSITIONAL_ARGS[0]:-$VERSION}"
echo "==> Building labd image"
echo " Tag: $TAG"
echo " Platforms: $PLATFORMS"
echo " Registry: $FULL_IMAGE"
# ── Build multi-arch manifest ────────────────────────────────────
MANIFEST="lab-labd:$TAG"
# Remove existing manifest/image with the same tag
podman manifest rm "$MANIFEST" 2>/dev/null || true
podman rmi "$MANIFEST" 2>/dev/null || true
echo "==> Building for platforms: $PLATFORMS..."
podman build \
--platform "$PLATFORMS" \
--manifest "$MANIFEST" \
-f Dockerfile.labd \
.
echo "==> Build complete. Manifest:"
podman manifest inspect "$MANIFEST" | grep -E '"(architecture|os)"'
# ── Push ─────────────────────────────────────────────────────────
if [ "$PUSH" = true ]; then
if [ -z "$GITEA_TOKEN" ]; then
if [ -f "$HOME/.gitea-token" ]; then
GITEA_TOKEN="$(cat "$HOME/.gitea-token")"
else
echo "ERROR: GITEA_TOKEN not set and ~/.gitea-token not found"
exit 1
fi
fi
echo "==> Logging in to $REGISTRY..."
podman login -u michal -p "$GITEA_TOKEN" "$REGISTRY"
echo "==> Pushing $FULL_IMAGE:$TAG..."
podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:$TAG"
if [ "$TAG" != "latest" ]; then
echo "==> Also pushing as :latest..."
podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:latest"
fi
if [ -f "$SCRIPT_DIR/link-package.sh" ]; then
source "$SCRIPT_DIR/link-package.sh"
link_package "container" "labd"
fi
echo "==> Pushed successfully!"
else
echo "==> Skipping push (use --push to push to registry)"
fi
echo "==> Done!"
echo " Image: $FULL_IMAGE:$TAG"
echo " Platforms: $PLATFORMS"

180
bastion/scripts/build-rpm.sh Executable file
View File

@@ -0,0 +1,180 @@
#!/bin/bash
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
cd "$PROJECT_ROOT"
# Load .env if present
if [ -f .env ]; then
set -a; source .env; set +a
fi
# Ensure tools are on PATH
export PATH="$HOME/.npm-global/bin:$HOME/.bun/bin:$HOME/.local/bin:$PATH"
# ── Argument parsing ───────────────────────────────────────────────
BUILD_ALL=false
TARGET_ARCH=""
SKIP_TESTS=false
usage() {
cat <<EOF
Usage: $(basename "$0") [OPTIONS]
Build labctl binary and produce RPM/DEB packages.
Options:
--arch ARCH Target architecture: x86_64 or arm64 (default: host arch)
--all Build for both x86_64 and arm64
--skip-tests Skip unit tests (useful in CI where tests ran separately)
-h, --help Show this help message
EOF
exit 0
}
while [[ $# -gt 0 ]]; do
case "$1" in
--arch)
TARGET_ARCH="$2"
shift 2
;;
--all)
BUILD_ALL=true
shift
;;
--skip-tests)
SKIP_TESTS=true
shift
;;
-h|--help)
usage
;;
*)
echo "Unknown option: $1"
usage
;;
esac
done
# ── Resolve host architecture ─────────────────────────────────────
detect_host_arch() {
local machine
machine="$(uname -m)"
case "$machine" in
x86_64) echo "x86_64" ;;
aarch64) echo "arm64" ;;
arm64) echo "arm64" ;;
*) echo "$machine" ;;
esac
}
# ── Architecture mapping helpers ──────────────────────────────────
# Maps our canonical arch names to the values each tool expects.
bun_target_for() {
case "$1" in
x86_64) echo "bun-linux-x64" ;;
arm64) echo "bun-linux-arm64" ;;
esac
}
nfpm_arch_for() {
case "$1" in
x86_64) echo "amd64" ;;
arm64) echo "arm64" ;;
esac
}
rpm_arch_for() {
case "$1" in
x86_64) echo "x86_64" ;;
arm64) echo "aarch64" ;;
esac
}
deb_arch_for() {
case "$1" in
x86_64) echo "amd64" ;;
arm64) echo "arm64" ;;
esac
}
# ── Build one architecture ────────────────────────────────────────
build_arch() {
local arch="$1"
local bun_target nfpm_arch binary_name
bun_target="$(bun_target_for "$arch")"
nfpm_arch="$(nfpm_arch_for "$arch")"
binary_name="dist/labctl-${arch}"
echo ""
echo "==> Bundling standalone binary for ${arch}..."
bun build src/cli/src/index.ts --compile --target="${bun_target}" --outfile "${binary_name}"
echo "==> Packaging RPM (${arch})..."
# Create a temporary nfpm config with the correct arch and binary path
local tmpconfig
tmpconfig="$(mktemp /tmp/nfpm-XXXXXX.yaml)"
sed -e "s|^arch:.*|arch: ${nfpm_arch}|" \
-e "s|src: ./dist/labctl$|src: ./${binary_name}|" \
nfpm.yaml > "$tmpconfig"
nfpm pkg --config "$tmpconfig" --packager rpm --target dist/
rm -f "$tmpconfig"
local rpm_arch
rpm_arch="$(rpm_arch_for "$arch")"
RPM_FILE=$(ls dist/labctl-*.${rpm_arch}.rpm 2>/dev/null | head -1)
echo "==> Built: $RPM_FILE"
echo " Size: $(du -h "$RPM_FILE" | cut -f1)"
echo ""
echo "==> Packaging DEB (${arch})..."
local deb_arch
deb_arch="$(deb_arch_for "$arch")"
tmpconfig="$(mktemp /tmp/nfpm-XXXXXX.yaml)"
sed -e "s|^arch:.*|arch: ${nfpm_arch}|" \
-e "s|src: ./dist/labctl$|src: ./${binary_name}|" \
nfpm.yaml > "$tmpconfig"
nfpm pkg --config "$tmpconfig" --packager deb --target dist/
rm -f "$tmpconfig"
DEB_FILE=$(ls dist/labctl_*_${deb_arch}.deb 2>/dev/null | head -1)
echo "==> Built: $DEB_FILE"
echo " Size: $(du -h "$DEB_FILE" | cut -f1)"
}
# ── Main ──────────────────────────────────────────────────────────
if [ "$SKIP_TESTS" = false ]; then
echo "==> Running unit tests..."
pnpm test:run
echo ""
fi
echo "==> Building TypeScript..."
pnpm build
echo "==> Generating shell completions..."
pnpm completions:generate
mkdir -p dist
rm -f dist/labctl dist/labctl-x86_64 dist/labctl-arm64 dist/labctl-*.rpm dist/labctl*.deb
if [ "$BUILD_ALL" = true ]; then
build_arch "x86_64"
build_arch "arm64"
elif [ -n "$TARGET_ARCH" ]; then
build_arch "$TARGET_ARCH"
else
# Default to host architecture
HOST_ARCH="$(detect_host_arch)"
build_arch "$HOST_ARCH"
fi
echo ""
echo "==> Build complete. Artifacts in dist/:"
ls -lh dist/labctl* 2>/dev/null || echo " (none)"

View File

@@ -0,0 +1,444 @@
#!/usr/bin/env tsx
/**
* generate-completions.ts -- auto-generates shell completions from the commander.js command tree.
*
* Usage:
* tsx scripts/generate-completions.ts # print generated files to stdout
* tsx scripts/generate-completions.ts --write # write completions/ files
* tsx scripts/generate-completions.ts --check # exit 0 if files match, 1 if stale
*
* Requires `pnpm build` to have run first (workspace packages must be compiled).
*/
import { Command, type Option, type Argument } from 'commander';
import { readFileSync, writeFileSync, mkdirSync } from 'node:fs';
import { join, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
const __dirname = dirname(fileURLToPath(import.meta.url));
const ROOT = join(__dirname, '..');
// ============================================================
// Command tree extraction
// ============================================================
interface CmdInfo {
name: string;
description: string;
hidden: boolean;
options: OptInfo[];
args: ArgInfo[];
subcommands: CmdInfo[];
}
interface OptInfo {
short?: string;
long: string;
description: string;
takesValue: boolean;
choices?: string[];
negate: boolean;
}
interface ArgInfo {
name: string;
description: string;
required: boolean;
variadic: boolean;
choices?: string[];
}
function extractOption(opt: Option): OptInfo {
return {
short: (opt as unknown as Record<string, string>).short || undefined,
long: (opt as unknown as Record<string, string>).long,
description: opt.description,
takesValue: (opt as unknown as Record<string, boolean>).required || (opt as unknown as Record<string, boolean>).optional || false,
choices: (opt as unknown as Record<string, string[] | undefined>).argChoices || undefined,
negate: (opt as unknown as Record<string, boolean>).negate || false,
};
}
function extractArgument(arg: Argument): ArgInfo {
return {
name: (arg as unknown as Record<string, string>)._name ?? arg.name(),
description: arg.description,
required: (arg as unknown as Record<string, boolean>).required,
variadic: (arg as unknown as Record<string, boolean>).variadic,
choices: (arg as unknown as Record<string, string[] | undefined>)._choices || undefined,
};
}
function extractCommand(cmd: Command): CmdInfo {
const options = (cmd.options as Option[])
.filter((o) => {
const long = (o as unknown as Record<string, string>).long;
return long !== '--help' && long !== '--version';
})
.map(extractOption);
const args = ((cmd as unknown as Record<string, Argument[]>).registeredArguments ?? [])
.map(extractArgument);
const subcommands = (cmd.commands as Command[])
.filter((sub) => sub.name() !== 'help')
.map(extractCommand);
if ((cmd.commands as Command[]).some((sub) => sub.name() === 'help')) {
subcommands.push({
name: 'help',
description: 'display help for command',
hidden: false,
options: [],
args: [],
subcommands: [],
});
}
return {
name: cmd.name(),
description: cmd.description(),
hidden: (cmd as unknown as Record<string, boolean>)._hidden ?? false,
options,
args,
subcommands,
};
}
async function extractTree(): Promise<CmdInfo> {
const { createProgram } = await import('../src/cli/src/index.js') as { createProgram: () => Command };
const program = createProgram();
return extractCommand(program);
}
// ============================================================
// Utilities
// ============================================================
function esc(s: string): string {
return s.replace(/'/g, "\\'");
}
/** Collect all commands recursively with their full path. */
function collectCommands(cmd: CmdInfo, prefix: string[] = []): { path: string[]; cmd: CmdInfo }[] {
const result: { path: string[]; cmd: CmdInfo }[] = [];
for (const sub of cmd.subcommands) {
const fullPath = [...prefix, sub.name];
result.push({ path: fullPath, cmd: sub });
result.push(...collectCommands(sub, fullPath));
}
return result;
}
// ============================================================
// Fish completion generator
// ============================================================
function generateFish(root: CmdInfo): string {
const lines: string[] = [];
const emit = (s: string): void => { lines.push(s); };
const BIN = root.name;
emit(`# ${BIN} fish completions -- auto-generated by scripts/generate-completions.ts`);
emit('# DO NOT EDIT MANUALLY -- run: pnpm completions:generate');
emit('');
emit(`complete -c ${BIN} -e`);
emit(`complete -c ${BIN} -f`);
emit('');
// Global options
emit('# Global options');
emit(`complete -c ${BIN} -s v -l version -d 'Show version'`);
emit(`complete -c ${BIN} -s h -l help -d 'Show help'`);
emit('');
const allCmds = collectCommands(root);
// Helper: test if EXACTLY the given subcommand chain is present (for subcommand suggestions)
emit('# Helper: test if exactly a subcommand chain is active (no extra positional args)');
emit(`function __${BIN}_using_cmd`);
emit(' set -l tokens (commandline -opc)');
emit(' set -l expected $argv');
emit(' set -l depth (count $expected)');
emit(' set -l found 0');
emit(' set -l i 1');
emit(' for tok in $tokens[2..]');
emit(' if string match -q -- "-*" $tok');
emit(' continue');
emit(' end');
emit(' set i (math $i + 1)');
emit(' set -l idx (math $i - 1)');
emit(' if test $idx -le $depth');
emit(' if test "$tok" != "$expected[$idx]"');
emit(' return 1');
emit(' end');
emit(' set found (math $found + 1)');
emit(' else');
emit(' return 1');
emit(' end');
emit(' end');
emit(' test $found -eq $depth');
emit('end');
emit('');
// Helper: test if command chain STARTS WITH the given prefix (for options that apply after args)
emit('# Helper: test if command starts with a subcommand chain (options still apply after args)');
emit(`function __${BIN}_in_cmd`);
emit(' set -l tokens (commandline -opc)');
emit(' set -l expected $argv');
emit(' set -l depth (count $expected)');
emit(' set -l found 0');
emit(' for tok in $tokens[2..]');
emit(' if string match -q -- "-*" $tok');
emit(' continue');
emit(' end');
emit(' set found (math $found + 1)');
emit(' if test $found -le $depth');
emit(' if test "$tok" != "$expected[$found]"');
emit(' return 1');
emit(' end');
emit(' end');
emit(' end');
emit(' test $found -ge $depth');
emit('end');
emit('');
// Dynamic completions: fetch machine data from bastion API
emit('# Dynamic: fetch machine hostnames from bastion (installed + queued)');
emit(`function __${BIN}_installed_hosts`);
emit(' curl -s http://localhost:8080/api/machines 2>/dev/null | ');
emit(" python3 -c 'import sys,json; d=json.load(sys.stdin); hosts=[v.get(\"hostname\",\"\") for v in {**d.get(\"install_queue\",{}), **d.get(\"installed\",{})}.values() if v.get(\"hostname\")]; [print(h) for h in set(hosts)]' 2>/dev/null");
emit('end');
emit('');
emit('# Dynamic: fetch all known MAC addresses (discovered + queue + installed)');
emit(`function __${BIN}_known_macs`);
emit(' curl -s http://localhost:8080/api/machines 2>/dev/null | ');
emit(" python3 -c 'import sys,json; d=json.load(sys.stdin); [print(k) for k in {**d.get(\"discovered\",{}), **d.get(\"install_queue\",{}), **d.get(\"installed\",{})}]' 2>/dev/null");
emit('end');
emit('');
emit('# Dynamic: fetch hostnames and MACs from all states');
emit(`function __${BIN}_hosts_and_macs`);
emit(' curl -s http://localhost:8080/api/machines 2>/dev/null | ');
emit(" python3 -c 'import sys,json; d=json.load(sys.stdin); a={**d.get(\"discovered\",{}), **d.get(\"install_queue\",{}), **d.get(\"installed\",{})}; macs=list(a.keys()); hosts=[v.get(\"hostname\",\"\") for v in {**d.get(\"install_queue\",{}), **d.get(\"installed\",{})}.values() if v.get(\"hostname\")]; [print(x) for x in set(macs+hosts)]' 2>/dev/null");
emit('end');
emit('');
// Target completions for commands that accept hostname/IP/MAC
emit('# Target argument completions');
// app k3s — takes hostname/IP
emit(`complete -c ${BIN} -n "__${BIN}_using_cmd app k3s install" -a "(__${BIN}_installed_hosts)" -d 'installed host'`);
emit(`complete -c ${BIN} -n "__${BIN}_using_cmd app k3s health" -a "(__${BIN}_installed_hosts)" -d 'installed host'`);
emit(`complete -c ${BIN} -n "__${BIN}_using_cmd app labcontroller deploy" -a "(__${BIN}_installed_hosts)" -d 'installed host'`);
emit(`complete -c ${BIN} -n "__${BIN}_using_cmd app labcontroller status" -a "(__${BIN}_installed_hosts)" -d 'installed host'`);
// provision install — takes MAC then hostname
emit(`complete -c ${BIN} -n "__${BIN}_using_cmd provision install" -a "(__${BIN}_known_macs)" -d 'MAC address'`);
// provision reprovision/forget/logs — takes MAC or hostname
emit(`complete -c ${BIN} -n "__${BIN}_using_cmd provision reprovision" -a "(__${BIN}_hosts_and_macs)" -d 'host or MAC'`);
emit(`complete -c ${BIN} -n "__${BIN}_using_cmd provision forget" -a "(__${BIN}_hosts_and_macs)" -d 'host or MAC'`);
emit(`complete -c ${BIN} -n "__${BIN}_using_cmd provision logs" -a "(__${BIN}_hosts_and_macs)" -d 'host or MAC'`);
emit('');
// Top-level commands
const topCmds = root.subcommands.filter((c) => !c.hidden);
emit('# Top-level commands');
for (const cmd of topCmds) {
emit(`complete -c ${BIN} -n "not __fish_seen_subcommand_from ${topCmds.map((c) => c.name).join(' ')}" -a ${cmd.name} -d '${esc(cmd.description)}'`);
}
emit('');
// Subcommands and options at each level
for (const { path, cmd } of allCmds) {
if (cmd.hidden) continue;
// If this command has subcommands, offer them
const visibleSubs = cmd.subcommands.filter((s) => !s.hidden);
if (visibleSubs.length > 0) {
const parentCondition = `__${BIN}_using_cmd ${path.join(' ')}`;
emit(`# ${path.join(' ')} subcommands`);
for (const sub of visibleSubs) {
emit(`complete -c ${BIN} -n "${parentCondition}" -a ${sub.name} -d '${esc(sub.description)}'`);
}
emit('');
}
// Options for this command (use __in_cmd so options complete even after positional args)
if (cmd.options.length > 0) {
const condition = `__${BIN}_in_cmd ${path.join(' ')}`;
emit(`# ${path.join(' ')} options`);
for (const opt of cmd.options) {
const parts = [`complete -c ${BIN} -n "${condition}"`];
if (opt.short) parts.push(`-s ${opt.short.replace('-', '')}`);
parts.push(`-l ${opt.long.replace(/^--/, '')}`);
parts.push(`-d '${esc(opt.description)}'`);
if (opt.takesValue) {
if (opt.choices) {
parts.push(`-xa '${opt.choices.join(' ')}'`);
} else {
parts.push('-x');
}
}
emit(parts.join(' '));
}
emit('');
}
}
return lines.join('\n') + '\n';
}
// ============================================================
// Bash completion generator
// ============================================================
function generateBash(root: CmdInfo): string {
const lines: string[] = [];
const emit = (s: string): void => { lines.push(s); };
const BIN = root.name;
emit(`# ${BIN} bash completions -- auto-generated by scripts/generate-completions.ts`);
emit('# DO NOT EDIT MANUALLY -- run: pnpm completions:generate');
emit('');
const allCmds = collectCommands(root);
const topCmds = root.subcommands.filter((c) => !c.hidden).map((c) => c.name);
emit(`_${BIN}() {`);
emit(' local cur prev words cword');
emit(' _init_completion || return');
emit('');
emit(` local top_commands="${topCmds.join(' ')}"`);
emit('');
// Build chain of subcommands from command line
emit(' # Extract the subcommand chain (skip options and their values)');
emit(' local -a subcmd_chain=()');
emit(' local i skip_next=false');
emit(' for ((i=1; i < cword; i++)); do');
emit(' if $skip_next; then skip_next=false; continue; fi');
emit(' case "${words[i]}" in');
emit(' -*) ;; # skip options');
emit(' *) subcmd_chain+=("${words[i]}") ;;');
emit(' esac');
emit(' done');
emit('');
emit(' local chain_len=${#subcmd_chain[@]}');
emit(' local chain_str="${subcmd_chain[*]}"');
emit('');
// Build case statement for each command path
emit(' case "$chain_str" in');
// Start with the deepest paths first to match longest
const sortedCmds = [...allCmds].sort((a, b) => b.path.length - a.path.length);
for (const { path, cmd } of sortedCmds) {
if (cmd.hidden) continue;
const pathStr = path.join(' ');
const visibleSubs = cmd.subcommands.filter((s) => !s.hidden).map((s) => s.name);
const optFlags: string[] = [];
for (const opt of cmd.options) {
if (opt.short) optFlags.push(opt.short);
optFlags.push(opt.long);
}
optFlags.push('-h', '--help');
const completions = [...visibleSubs, ...optFlags].join(' ');
emit(` "${pathStr}")`);
emit(` COMPREPLY=($(compgen -W "${completions}" -- "$cur"))`);
emit(' return ;;');
}
// Top-level (no subcommand yet)
emit(' "")');
emit(` COMPREPLY=($(compgen -W "$top_commands -h --help -v --version" -- "$cur"))`);
emit(' return ;;');
// Default
emit(' *)');
emit(' COMPREPLY=($(compgen -W "-h --help" -- "$cur"))');
emit(' return ;;');
emit(' esac');
emit('}');
emit('');
emit(`complete -F _${BIN} ${BIN}`);
return lines.join('\n') + '\n';
}
// ============================================================
// Main
// ============================================================
async function main(): Promise<void> {
const mode = process.argv[2] ?? '';
let tree: CmdInfo;
try {
tree = await extractTree();
} catch (err) {
console.error('Failed to extract command tree from createProgram().');
console.error('Make sure workspace packages are built: pnpm build');
console.error(err);
process.exit(1);
}
const fishContent = generateFish(tree);
const bashContent = generateBash(tree);
const completionsDir = join(ROOT, 'completions');
const fishPath = join(completionsDir, 'labctl.fish');
const bashPath = join(completionsDir, 'labctl.bash');
if (mode === '--check') {
let stale = false;
try {
const currentFish = readFileSync(fishPath, 'utf-8');
if (currentFish !== fishContent) {
console.error('completions/labctl.fish is stale');
stale = true;
}
} catch {
console.error('completions/labctl.fish does not exist');
stale = true;
}
try {
const currentBash = readFileSync(bashPath, 'utf-8');
if (currentBash !== bashContent) {
console.error('completions/labctl.bash is stale');
stale = true;
}
} catch {
console.error('completions/labctl.bash does not exist');
stale = true;
}
if (stale) {
console.error('Run: pnpm completions:generate');
process.exit(1);
}
console.log('Completions are up to date.');
process.exit(0);
}
if (mode === '--write') {
mkdirSync(completionsDir, { recursive: true });
writeFileSync(fishPath, fishContent);
writeFileSync(bashPath, bashContent);
console.log(`Wrote ${fishPath}`);
console.log(`Wrote ${bashPath}`);
process.exit(0);
}
// Default: print to stdout
console.log('=== completions/labctl.fish ===');
console.log(fishContent);
console.log('=== completions/labctl.bash ===');
console.log(bashContent);
}
main().catch((err) => {
console.error(err);
process.exit(1);
});

65
bastion/scripts/link-package.sh Executable file
View File

@@ -0,0 +1,65 @@
#!/bin/bash
# Link a Gitea package to a repository.
# Works automatically on Gitea 1.24+ (uses API), warns on older versions.
#
# Usage: source scripts/link-package.sh
# link_package <type> <name>
#
# Requires: GITEA_URL, GITEA_TOKEN, GITEA_OWNER, GITEA_REPO
link_package() {
local PKG_TYPE="$1" # e.g. "rpm", "container"
local PKG_NAME="$2" # e.g. "lab", "lab-bastion"
if [ -z "$PKG_TYPE" ] || [ -z "$PKG_NAME" ]; then
echo "Usage: link_package <type> <name>"
return 1
fi
local GITEA_URL="${GITEA_URL:-http://10.0.0.194:3012}"
local GITEA_OWNER="${GITEA_OWNER:-michal}"
local GITEA_REPO="${GITEA_REPO:-lab}"
if [ -z "$GITEA_TOKEN" ]; then
echo "WARNING: GITEA_TOKEN not set, skipping package-repo linking."
return 0
fi
# Check if already linked (search all packages, filter by type+name client-side)
local REPO_LINK
REPO_LINK=$(curl -s -H "Authorization: token ${GITEA_TOKEN}" \
"${GITEA_URL}/api/v1/packages/${GITEA_OWNER}" \
| python3 -c "
import json,sys
for p in json.load(sys.stdin):
if p['type']=='$PKG_TYPE' and p['name']=='$PKG_NAME':
r=p.get('repository')
if r: print(r['full_name'])
break
" 2>/dev/null)
if [ -n "$REPO_LINK" ]; then
echo "==> Package ${PKG_TYPE}/${PKG_NAME} already linked to ${REPO_LINK}"
return 0
fi
# Try Gitea 1.24+ link API
local HTTP_CODE
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
-H "Authorization: token ${GITEA_TOKEN}" \
"${GITEA_URL}/api/v1/packages/${GITEA_OWNER}/${PKG_TYPE}/${PKG_NAME}/-/link/${GITEA_REPO}")
if [ "$HTTP_CODE" = "201" ] || [ "$HTTP_CODE" = "200" ]; then
echo "==> Linked ${PKG_TYPE}/${PKG_NAME} to ${GITEA_OWNER}/${GITEA_REPO}"
return 0
fi
# API not available (Gitea < 1.24) -- warn with manual instructions
local PUBLIC_URL="${GITEA_PUBLIC_URL:-${GITEA_URL}}"
echo ""
echo "WARNING: Could not auto-link ${PKG_TYPE}/${PKG_NAME} to repository (Gitea < 1.24)."
echo "Link it manually in the Gitea UI:"
echo " ${PUBLIC_URL}/${GITEA_OWNER}/-/packages/${PKG_TYPE}/${PKG_NAME}/settings"
echo " -> Link to repository: ${GITEA_OWNER}/${GITEA_REPO}"
return 0
}

72
bastion/scripts/publish-deb.sh Executable file
View File

@@ -0,0 +1,72 @@
#!/bin/bash
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
cd "$PROJECT_ROOT"
# Load .env if present
if [ -f .env ]; then
set -a; source .env; set +a
fi
GITEA_URL="${GITEA_URL:-http://10.0.0.194:3012}"
GITEA_PUBLIC_URL="${GITEA_PUBLIC_URL:-https://mysources.co.uk}"
GITEA_OWNER="${GITEA_OWNER:-michal}"
GITEA_REPO="${GITEA_REPO:-lab}"
GITEA_TOKEN="${GITEA_TOKEN:-$PACKAGES_TOKEN}"
if [ -z "$GITEA_TOKEN" ]; then
echo "Error: GITEA_TOKEN (or PACKAGES_TOKEN) not set. Add it to .env or export it."
exit 1
fi
DEB_FILE=$(ls dist/labctl*.deb 2>/dev/null | head -1)
if [ -z "$DEB_FILE" ]; then
echo "Error: No DEB found in dist/. Run scripts/build-rpm.sh first."
exit 1
fi
# Extract version from the deb filename
DEB_VERSION=$(dpkg-deb --field "$DEB_FILE" Version 2>/dev/null || echo "unknown")
echo "==> Publishing $DEB_FILE (version $DEB_VERSION) to ${GITEA_URL}..."
# Gitea Debian registry: PUT /api/packages/{owner}/debian/pool/{distribution}/{component}/upload
# Publish to each supported distribution.
# Debian: trixie (13/stable), forky (14/testing)
# Ubuntu: noble (24.04 LTS), plucky (25.04)
DISTRIBUTIONS="trixie forky noble plucky"
for DIST in $DISTRIBUTIONS; do
echo " -> $DIST..."
HTTP_CODE=$(curl -s -o /tmp/deb-upload-$DIST.out -w "%{http_code}" \
-X PUT \
-H "Authorization: token ${GITEA_TOKEN}" \
--upload-file "$DEB_FILE" \
"${GITEA_URL}/api/packages/${GITEA_OWNER}/debian/pool/${DIST}/main/upload")
if [ "$HTTP_CODE" = "201" ] || [ "$HTTP_CODE" = "200" ]; then
echo " Published to $DIST"
elif [ "$HTTP_CODE" = "409" ]; then
echo " Already exists in $DIST (skipping)"
else
echo " WARNING: Upload to $DIST returned HTTP $HTTP_CODE"
cat /tmp/deb-upload-$DIST.out 2>/dev/null || true
echo ""
fi
rm -f /tmp/deb-upload-$DIST.out
done
echo ""
echo "==> Published successfully!"
# Ensure package is linked to the repository
source "$SCRIPT_DIR/link-package.sh"
link_package "debian" "labctl"
echo ""
echo "Install with:"
echo " echo \"deb ${GITEA_PUBLIC_URL}/api/packages/${GITEA_OWNER}/debian trixie main\" | sudo tee /etc/apt/sources.list.d/labctl.list"
echo " curl -fsSL ${GITEA_PUBLIC_URL}/api/packages/${GITEA_OWNER}/debian/repository.key | sudo gpg --dearmor -o /etc/apt/keyrings/labctl.gpg"
echo " sudo apt update && sudo apt install labctl"

62
bastion/scripts/publish-rpm.sh Executable file
View File

@@ -0,0 +1,62 @@
#!/bin/bash
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
cd "$PROJECT_ROOT"
# Load .env if present
if [ -f .env ]; then
set -a; source .env; set +a
fi
GITEA_URL="${GITEA_URL:-http://10.0.0.194:3012}"
GITEA_PUBLIC_URL="${GITEA_PUBLIC_URL:-https://mysources.co.uk}"
GITEA_OWNER="${GITEA_OWNER:-michal}"
GITEA_REPO="${GITEA_REPO:-lab}"
GITEA_TOKEN="${GITEA_TOKEN:-$PACKAGES_TOKEN}"
if [ -z "$GITEA_TOKEN" ]; then
echo "Error: GITEA_TOKEN (or PACKAGES_TOKEN) not set. Add it to .env or export it."
exit 1
fi
RPM_FILE=$(ls dist/labctl-*.rpm 2>/dev/null | head -1)
if [ -z "$RPM_FILE" ]; then
echo "Error: No RPM found in dist/. Run scripts/build-rpm.sh first."
exit 1
fi
# Get version string as it appears in Gitea (e.g. "0.1.0-1")
RPM_VERSION=$(rpm -qp --queryformat '%{VERSION}-%{RELEASE}' "$RPM_FILE")
echo "==> Publishing $RPM_FILE (version $RPM_VERSION) to ${GITEA_URL}..."
# Check if version already exists and delete it first
EXISTING=$(curl -s -o /dev/null -w "%{http_code}" \
-H "Authorization: token ${GITEA_TOKEN}" \
"${GITEA_URL}/api/v1/packages/${GITEA_OWNER}/rpm/labctl/${RPM_VERSION}")
if [ "$EXISTING" = "200" ]; then
echo "==> Version $RPM_VERSION already exists, replacing..."
curl -s -o /dev/null -X DELETE \
-H "Authorization: token ${GITEA_TOKEN}" \
"${GITEA_URL}/api/v1/packages/${GITEA_OWNER}/rpm/labctl/${RPM_VERSION}"
fi
# Upload
curl --fail -s -X PUT \
-H "Authorization: token ${GITEA_TOKEN}" \
--upload-file "$RPM_FILE" \
"${GITEA_URL}/api/packages/${GITEA_OWNER}/rpm/upload"
echo ""
echo "==> Published successfully!"
# Ensure package is linked to the repository
source "$SCRIPT_DIR/link-package.sh"
link_package "rpm" "labctl"
echo ""
echo "Install with:"
echo " sudo dnf install labctl # if repo already configured"

75
bastion/scripts/release.sh Executable file
View File

@@ -0,0 +1,75 @@
#!/bin/bash
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
cd "$PROJECT_ROOT"
# Load .env if present
if [ -f .env ]; then
set -a; source .env; set +a
fi
echo "=== lab-bastion release ==="
echo ""
# 1. Build binaries & packages (both architectures)
bash scripts/build-rpm.sh --all
echo ""
# 2. Publish RPM
bash scripts/publish-rpm.sh
echo ""
# 3. Publish DEB
bash scripts/publish-deb.sh
echo ""
# 4. Build & push Docker image
bash scripts/build-bastion.sh
echo ""
# 5. Install locally (Fedora/RHEL only)
if [ -f /etc/fedora-release ] || [ -f /etc/redhat-release ]; then
echo "==> Installing locally..."
RPM_FILE=$(ls dist/labctl-*.rpm 2>/dev/null | head -1)
if [ -n "$RPM_FILE" ]; then
sudo rpm -U --force "$RPM_FILE"
echo ""
echo "==> Installed:"
labctl --version || echo "(labctl binary installed)"
else
echo "==> WARNING: No RPM found in dist/, skipping local install."
fi
else
echo "==> Not Fedora/RHEL — skipping local RPM install."
fi
echo ""
# 6. Summary
GITEA_PUBLIC_URL="${GITEA_PUBLIC_URL:-https://mysources.co.uk}"
GITEA_OWNER="${GITEA_OWNER:-michal}"
REGISTRY="${GITEA_REGISTRY:-mysources.co.uk}"
VERSION=$(node -p "require('./package.json').version")
echo "=== Done! ==="
echo ""
echo "RPM install:"
echo " sudo dnf config-manager --add-repo ${GITEA_PUBLIC_URL}/api/packages/${GITEA_OWNER}/rpm.repo"
echo " sudo dnf install labctl"
echo ""
echo "DEB install (Debian/Ubuntu):"
echo " echo \"deb ${GITEA_PUBLIC_URL}/api/packages/${GITEA_OWNER}/debian trixie main\" | sudo tee /etc/apt/sources.list.d/labctl.list"
echo " curl -fsSL ${GITEA_PUBLIC_URL}/api/packages/${GITEA_OWNER}/debian/repository.key | sudo gpg --dearmor -o /etc/apt/keyrings/labctl.gpg"
echo " sudo apt update && sudo apt install labctl"
echo ""
echo "Docker image:"
echo " podman pull ${REGISTRY}/michal/lab-bastion:${VERSION}"
echo ""
echo "k3s deployment:"
echo " kubectl apply -k deploy/k3s/"

View File

@@ -0,0 +1,71 @@
#!/bin/bash
# Run integration tests inside a Node container with access to host libvirt.
#
# Usage: sudo ./scripts/test-integration.sh [vitest args...]
# Example: sudo ./scripts/test-integration.sh -t k3s
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
# Detect real user (even when running via sudo)
REAL_USER="${SUDO_USER:-$(whoami)}"
REAL_HOME="/home/${REAL_USER}"
echo "==> Running integration tests in container"
echo " Project: ${PROJECT_ROOT}"
echo " User: ${REAL_USER}"
echo " SSH key: ${REAL_HOME}/.ssh/"
echo ""
# Check prerequisites
if ! command -v podman &>/dev/null && ! command -v docker &>/dev/null; then
echo "ERROR: podman or docker required"
exit 1
fi
RUNTIME="podman"
if ! command -v podman &>/dev/null; then
RUNTIME="docker"
fi
# Check libvirt socket
if [ ! -S /var/run/libvirt/libvirt-sock ]; then
echo "ERROR: libvirt socket not found at /var/run/libvirt/libvirt-sock"
echo " Is libvirtd running? Try: sudo systemctl start libvirtd"
exit 1
fi
# Create a temp dir for cloud-init artifacts (avoids SELinux /tmp relabel)
WORK_TMP="/var/tmp/lab-integration-$$"
mkdir -p "${WORK_TMP}"
trap "rm -rf ${WORK_TMP}" EXIT
exec $RUNTIME run --rm \
--name lab-integration-test \
--privileged \
--security-opt label=disable \
--network=host \
-v "${PROJECT_ROOT}:${PROJECT_ROOT}" \
-v "${REAL_HOME}/.ssh:${REAL_HOME}/.ssh:ro" \
-v "/var/run/libvirt/libvirt-sock:/var/run/libvirt/libvirt-sock" \
-v "/var/lib/libvirt/images:/var/lib/libvirt/images" \
-v "${WORK_TMP}:/tmp/lab-integration-tests" \
-w "${PROJECT_ROOT}" \
-e "SSH_KEY_PATH=${REAL_HOME}/.ssh/id_rsa" \
-e "HOME=${REAL_HOME}" \
node:22-bookworm \
bash -c "
# Install system deps for libvirt client + cloud-init ISO creation
apt-get update -qq && apt-get install -y -qq libvirt-clients virtinst genisoimage openssh-client qemu-utils sudo >/dev/null 2>&1
# Install pnpm
corepack enable && corepack prepare pnpm@9 --activate >/dev/null 2>&1
echo '==> Installing project dependencies...'
pnpm install --frozen-lockfile 2>/dev/null
echo '==> Running integration tests...'
echo ''
pnpm run test:integration $*
"

152
bastion/scripts/test-provision.sh Executable file
View File

@@ -0,0 +1,152 @@
#!/bin/bash
# Run PXE and/or ISO boot integration tests.
#
# Usage:
# sudo ./scripts/test-provision.sh # run PXE + ISO (x86_64)
# sudo ./scripts/test-provision.sh pxe # PXE only
# sudo ./scripts/test-provision.sh iso # ISO only (x86_64)
# sudo ./scripts/test-provision.sh arm # ARM ISO boot (emulated, SLOW ~60min)
# sudo ./scripts/test-provision.sh all # all tests including ARM
#
# Prerequisites:
# libvirtd, OVMF (edk2-ovmf), iPXE (ipxe-bootimgs-x86),
# dnsmasq, xorriso, mtools, virt-install, qemu-img
# ARM: qemu-system-aarch64, edk2-aarch64
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
cd "$PROJECT_ROOT"
# Detect real user for SSH keys
REAL_USER="${SUDO_USER:-$(whoami)}"
REAL_HOME=$(getent passwd "$REAL_USER" | cut -d: -f6)
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BOLD='\033[1m'
RESET='\033[0m'
echo ""
echo -e "${BOLD}Lab Bastion -- Provision Integration Tests${RESET}"
echo "==========================================="
echo ""
# --- Prerequisite checks ---
MISSING=""
for cmd in virsh virt-install qemu-img dnsmasq xorriso mformat mcopy curl; do
if ! command -v "$cmd" &>/dev/null; then
MISSING="$MISSING $cmd"
fi
done
if [ -n "$MISSING" ]; then
echo -e "${RED}Missing tools:${RESET}$MISSING"
echo "Install: sudo dnf install libvirt virt-install qemu-img dnsmasq xorriso mtools curl"
exit 1
fi
if ! systemctl is-active libvirtd &>/dev/null; then
echo -e "${RED}libvirtd not running.${RESET} Start with: sudo systemctl start libvirtd"
exit 1
fi
if [ ! -f /usr/share/edk2/ovmf/OVMF_CODE.fd ]; then
echo -e "${RED}OVMF firmware not found.${RESET} Install: sudo dnf install edk2-ovmf"
exit 1
fi
IPXE_EFI=""
for f in /usr/share/ipxe/ipxe-snponly-x86_64.efi /usr/share/ipxe/ipxe-snp-x86_64.efi /usr/share/ipxe/ipxe-x86_64.efi; do
[ -f "$f" ] && IPXE_EFI="$f" && break
done
if [ -z "$IPXE_EFI" ]; then
echo -e "${RED}iPXE EFI binary not found.${RESET} Install: sudo dnf install ipxe-bootimgs-x86"
exit 1
fi
# Find SSH key
SSH_KEY=""
for name in id_ed25519 id_ecdsa id_rsa; do
if [ -f "$REAL_HOME/.ssh/$name" ] && [ -f "$REAL_HOME/.ssh/$name.pub" ]; then
SSH_KEY="$REAL_HOME/.ssh/$name"
break
fi
done
if [ -z "$SSH_KEY" ]; then
echo -e "${RED}No SSH key found in $REAL_HOME/.ssh/${RESET}"
exit 1
fi
echo -e " User: ${BOLD}$REAL_USER${RESET}"
echo -e " SSH key: ${BOLD}$SSH_KEY${RESET}"
echo -e " iPXE: ${BOLD}$IPXE_EFI${RESET}"
echo ""
# --- Determine which tests to run ---
MODE="${1:-both}"
run_test() {
local name="$1" pattern="$2"
echo ""
echo -e "${YELLOW}━━━ Running $name test ━━━${RESET}"
echo ""
if SSH_KEY_PATH="$SSH_KEY" HOME="$REAL_HOME" \
npx vitest run -c tests/integration/vitest.config.ts -t "$pattern" 2>&1; then
echo ""
echo -e "${GREEN}$name test passed${RESET}"
return 0
else
echo ""
echo -e "${RED}$name test failed${RESET}"
return 1
fi
}
FAILED=0
case "$MODE" in
pxe)
run_test "PXE boot" "PXE boot" || FAILED=1
;;
iso)
run_test "ISO boot" "ISO boot" || FAILED=1
;;
arm|arm-iso)
if ! command -v qemu-system-aarch64 &>/dev/null; then
echo -e "${RED}qemu-system-aarch64 not found.${RESET} Install: sudo dnf install qemu-system-aarch64 edk2-aarch64"
exit 1
fi
echo -e "${YELLOW}ARM emulation is ~10x slower than native. Expect 30-60 minutes.${RESET}"
run_test "ARM ISO boot" "ARM ISO" || FAILED=1
;;
both)
run_test "PXE boot" "PXE boot" || FAILED=1
run_test "ISO boot" "ISO boot" || FAILED=1
;;
all)
run_test "PXE boot" "PXE boot" || FAILED=1
run_test "ISO boot" "ISO boot" || FAILED=1
if command -v qemu-system-aarch64 &>/dev/null; then
echo -e "${YELLOW}ARM emulation is ~10x slower than native.${RESET}"
run_test "ARM ISO boot" "ARM ISO" || FAILED=1
else
echo -e "${YELLOW}Skipping ARM test (qemu-system-aarch64 not installed)${RESET}"
fi
;;
*)
echo "Usage: $0 [pxe|iso|arm|both|all]"
exit 1
;;
esac
echo ""
if [ "$FAILED" -eq 0 ]; then
echo -e "${GREEN}${BOLD}All provision tests passed.${RESET}"
else
echo -e "${RED}${BOLD}Some tests failed.${RESET}"
exit 1
fi

View File

@@ -0,0 +1,38 @@
{
"name": "@lab/bastion",
"version": "0.1.0",
"private": true,
"type": "module",
"main": "./dist/main.js",
"types": "./dist/main.d.ts",
"exports": {
".": {
"import": "./dist/main.js",
"types": "./dist/main.d.ts"
},
"./iso-builder": {
"import": "./dist/services/iso-builder.js",
"types": "./dist/services/iso-builder.d.ts"
}
},
"scripts": {
"build": "tsc --build",
"clean": "rimraf dist",
"dev": "tsx src/main.ts",
"test": "vitest",
"test:run": "vitest run"
},
"dependencies": {
"@fastify/static": "^8.0.0",
"@lab/modules": "workspace:*",
"@lab/shared": "workspace:*",
"execa": "^9.5.0",
"fastify": "^5.0.0",
"winston": "^3.17.0",
"ws": "^8.19.0"
},
"devDependencies": {
"@types/node": "^22.10.0",
"@types/ws": "^8.18.0"
}
}

View File

@@ -0,0 +1,58 @@
// Configuration from environment variables with sensible defaults.
import type { BastionConfig } from "@lab/shared";
export function loadConfig(overrides: Partial<BastionConfig> = {}): BastionConfig {
const fedoraVersion = overrides.fedoraVersion ?? process.env["FEDORA_VERSION"] ?? "43";
const arch = overrides.arch ?? process.env["ARCH"] ?? "x86_64";
const httpPort = overrides.httpPort ?? parseInt(process.env["HTTP_PORT"] ?? "8080", 10);
const timezone = overrides.timezone ?? process.env["TIMEZONE"] ?? "Europe/London";
const locale = overrides.locale ?? process.env["LOCALE"] ?? "en_GB.UTF-8";
const bastionDir = overrides.bastionDir ?? process.env["BASTION_DIR"] ?? "/tmp/lab-bastion";
const domain = overrides.domain ?? process.env["DOMAIN"] ?? "ad.itaz.eu";
const dhcpMode = (overrides.dhcpMode ?? process.env["DHCP_MODE"] ?? "proxy") as "proxy" | "full";
const dhcpRangeStart = overrides.dhcpRangeStart ?? process.env["DHCP_RANGE_START"] ?? "";
const dhcpRangeEnd = overrides.dhcpRangeEnd ?? process.env["DHCP_RANGE_END"] ?? "";
const syslogPort = overrides.syslogPort ?? parseInt(process.env["SYSLOG_PORT"] ?? "5514", 10);
const ubuntuVersion = overrides.ubuntuVersion ?? process.env["UBUNTU_VERSION"] ?? "26.04";
const ubuntuMirror = overrides.ubuntuMirror ?? process.env["UBUNTU_MIRROR"]
?? `https://releases.ubuntu.com/${ubuntuVersion}`;
const fedoraMirror = `https://download.fedoraproject.org/pub/fedora/linux/releases/${fedoraVersion}/Everything/${arch}/os`;
const tftpDir = `${bastionDir}/tftp`;
const httpDir = `${bastionDir}/http`;
const stateFile = `${bastionDir}/state.json`;
return {
fedoraVersion,
arch,
httpPort,
timezone,
locale,
bastionDir,
domain,
dhcpMode,
dhcpRangeStart,
dhcpRangeEnd,
ubuntuVersion,
ubuntuMirror,
// These are populated at runtime by the network service
iface: overrides.iface ?? "",
serverIp: overrides.serverIp ?? "",
network: overrides.network ?? "",
gateway: overrides.gateway ?? "",
sshKeys: overrides.sshKeys ?? [],
adminUser: overrides.adminUser ?? "",
syslogPort,
skipDnsmasq: overrides.skipDnsmasq,
skipArtifacts: overrides.skipArtifacts,
labdUrl: overrides.labdUrl ?? process.env["LABD_URL"],
bastionJoinToken: overrides.bastionJoinToken ?? process.env["BASTION_JOIN_TOKEN"],
fedoraMirror,
tftpDir,
httpDir,
stateFile,
};
}

View File

@@ -0,0 +1,359 @@
// Entry point for the bastion server.
// Starts the Fastify HTTP server, dnsmasq, and handles graceful shutdown.
import { mkdirSync, writeFileSync, readFileSync, existsSync, copyFileSync, symlinkSync, unlinkSync } from "node:fs";
import { execSync } from "node:child_process";
import type { BastionConfig } from "@lab/shared";
import { loadConfig } from "./config.js";
import { populateNetworkConfig } from "./services/network.js";
import { createApp } from "./server.js";
import { startDnsmasq, stopDnsmasq, generateDnsmasqConf } from "./services/dnsmasq.js";
import { generateDiscoverKickstart } from "./services/kickstart-generator.js";
import { renderBootIpxe } from "./templates/boot.ipxe.js";
import { logger } from "./services/logger.js";
import { BastionConnection } from "./services/labd-connection.js";
import { progressBus } from "./services/progress-events.js";
import { ensureBootIso } from "./routes/boot-iso.js";
function copyIfMissing(src: string, dest: string, label: string): void {
if (existsSync(dest)) {
logger.info(` ${label} -- cached`);
return;
}
if (!existsSync(src)) {
throw new Error(`${label}: source not found at ${src}`);
}
copyFileSync(src, dest);
logger.info(` ${label} -- copied from ${src}`);
}
function download(url: string, dest: string, label: string): void {
if (existsSync(dest)) {
logger.info(` ${label} -- cached`);
return;
}
logger.info(` ${label} -- downloading...`);
try {
execSync(`curl -# -L -f -o "${dest}" "${url}"`, { stdio: "inherit" });
} catch {
throw new Error(`Failed to download ${label} from ${url}`);
}
}
function symlinkSafe(target: string, linkPath: string): void {
try {
symlinkSync(target, linkPath);
} catch {
// Link may already exist
}
}
function runCmd(cmd: string, args: string[]): boolean {
try {
execSync(`${cmd} ${args.join(" ")}`, { stdio: "pipe" });
return true;
} catch {
return false;
}
}
let fwZoneFlag = "";
let fwOpened = false;
function openFirewall(config: BastionConfig): void {
// Check if firewalld is running
if (!runCmd("firewall-cmd", ["--state"])) return;
// Detect zone for our interface
try {
const zone = execSync(`firewall-cmd --get-zone-of-interface=${config.iface} 2>/dev/null`, { encoding: "utf-8" }).trim();
if (zone) fwZoneFlag = `--zone=${zone}`;
} catch { /* use default zone */ }
const zf = fwZoneFlag ? [fwZoneFlag] : [];
logger.info(`Opening firewall ports (DHCP, TFTP, HTTP:${config.httpPort})...`);
runCmd("firewall-cmd", ["--quiet", ...zf, "--add-service=dhcp"]);
runCmd("firewall-cmd", ["--quiet", ...zf, "--add-service=tftp"]);
runCmd("firewall-cmd", ["--quiet", ...zf, `--add-port=${config.httpPort}/tcp`]);
runCmd("firewall-cmd", ["--quiet", ...zf, "--add-port=4011/udp"]);
fwOpened = true;
}
function closeFirewall(config: BastionConfig): void {
if (!fwOpened) return;
const zf = fwZoneFlag ? [fwZoneFlag] : [];
logger.info("Removing firewall rules...");
runCmd("firewall-cmd", ["--quiet", ...zf, "--remove-service=dhcp"]);
runCmd("firewall-cmd", ["--quiet", ...zf, "--remove-service=tftp"]);
runCmd("firewall-cmd", ["--quiet", ...zf, `--remove-port=${config.httpPort}/tcp`]);
runCmd("firewall-cmd", ["--quiet", ...zf, "--remove-port=4011/udp"]);
}
export async function startBastion(overrides: Partial<BastionConfig> = {}): Promise<void> {
// Load and populate config
let config = loadConfig(overrides);
config = populateNetworkConfig(config);
// Bastion needs root for dnsmasq (DHCP port 67)
if (!config.skipDnsmasq && process.getuid?.() !== 0) {
throw new Error("Must run as root (dnsmasq needs DHCP/TFTP ports). Use: sudo labctl init bastion standalone start");
}
mkdirSync(config.bastionDir, { recursive: true, mode: 0o755 });
const pidFile = `${config.bastionDir}/bastion.pid`;
// Kill old instance if running
try {
if (existsSync(pidFile)) {
const oldPid = parseInt(readFileSync(pidFile, "utf-8").trim(), 10);
if (!isNaN(oldPid)) {
try {
process.kill(oldPid, "SIGTERM");
logger.info(`Killed old bastion process (PID ${oldPid})`);
await new Promise((r) => setTimeout(r, 1000));
} catch {
// Process already dead
}
}
// Remove stale PID file (may be owned by different user)
try { unlinkSync(pidFile); } catch { /* ignore */ }
}
} catch {
// Can't read PID file — try to remove it
try { unlinkSync(pidFile); } catch { /* ignore */ }
}
// Write current PID
writeFileSync(pidFile, String(process.pid), { mode: 0o644 });
// Prepare directories
mkdirSync(config.tftpDir, { recursive: true });
mkdirSync(config.httpDir, { recursive: true });
// Prepare boot artifacts
if (config.skipArtifacts !== true) {
logger.info(`Preparing boot artifacts (Fedora ${config.fedoraVersion} ${config.arch})...`);
copyIfMissing(
"/usr/share/ipxe/undionly.kpxe",
`${config.tftpDir}/undionly.kpxe`,
"iPXE BIOS",
);
copyIfMissing(
"/usr/share/ipxe/ipxe-snponly-x86_64.efi",
`${config.tftpDir}/ipxe.efi`,
"iPXE UEFI x86_64",
);
try {
copyIfMissing(
"/usr/share/ipxe/arm64-efi/snponly.efi",
`${config.tftpDir}/ipxe-arm64.efi`,
"iPXE UEFI arm64",
);
} catch {
logger.warn("arm64 iPXE not available -- skipping");
}
download(
`${config.fedoraMirror}/images/pxeboot/vmlinuz`,
`${config.httpDir}/vmlinuz`,
"Fedora kernel",
);
download(
`${config.fedoraMirror}/images/pxeboot/initrd.img`,
`${config.httpDir}/initrd.img`,
"Fedora initrd",
);
// Ubuntu netboot artifacts (non-fatal — Ubuntu version may not be released yet)
try {
logger.info(`Preparing Ubuntu ${config.ubuntuVersion} netboot artifacts...`);
download(
`${config.ubuntuMirror}/casper/vmlinuz`,
`${config.httpDir}/ubuntu-vmlinuz`,
"Ubuntu kernel",
);
download(
`${config.ubuntuMirror}/casper/initrd`,
`${config.httpDir}/ubuntu-initrd`,
"Ubuntu initrd",
);
} catch {
logger.warn(`Ubuntu ${config.ubuntuVersion} artifacts not available -- Ubuntu provisioning disabled`);
}
// Symlink iPXE binaries into HTTP dir for UEFI HTTP Boot
for (const name of ["ipxe.efi", "ipxe-arm64.efi"]) {
const src = `${config.tftpDir}/${name}`;
const dest = `${config.httpDir}/${name}`;
if (existsSync(src)) {
symlinkSafe(src, dest);
}
}
// Generate boot ISO (served as static file for Range request support)
try {
ensureBootIso(config);
} catch (err) {
logger.warn(`Boot ISO generation failed: ${err instanceof Error ? err.message : String(err)}`);
}
} else {
logger.info("Skipping boot artifacts (--skip-artifacts)");
}
// Write discovery kickstart
const discoverKs = generateDiscoverKickstart(config);
writeFileSync(`${config.httpDir}/discover.ks`, discoverKs);
// Write iPXE boot script
const bootIpxe = renderBootIpxe({
serverIp: config.serverIp,
httpPort: config.httpPort,
});
writeFileSync(`${config.httpDir}/boot.ipxe`, bootIpxe);
// Generate dnsmasq config
generateDnsmasqConf(config);
// Open firewall ports
if (config.skipDnsmasq !== true) {
openFirewall(config);
}
// Start HTTP server + syslog listener
const { app, state, syslog } = createApp(config);
await app.listen({ port: config.httpPort, host: "0.0.0.0" });
logger.info(`HTTP server listening on :${config.httpPort}`);
syslog.start();
// Start dnsmasq (unless skipped)
if (config.skipDnsmasq !== true) {
const dnsmasqProc = startDnsmasq(config);
// Monitor dnsmasq
void dnsmasqProc.then(() => {
logger.error("dnsmasq exited unexpectedly");
logger.error("Check if another DHCP/TFTP service is running.");
process.exit(1);
}).catch((err: unknown) => {
const message = err instanceof Error ? err.message : String(err);
if (!message.includes("was killed")) {
logger.error(`dnsmasq error: ${message}`);
process.exit(1);
}
});
} else {
logger.info("Skipping dnsmasq (--skip-dnsmasq)");
}
// Connect to labd if configured (otherwise run standalone)
let labdConn: BastionConnection | null = null;
if (config.labdUrl) {
labdConn = new BastionConnection(config, () => state.load());
// Wire up command handlers so labd can send install/forget/role commands
labdConn.onCommand("command-install", async (msg) => {
if (msg.type !== "command-install") throw new Error("unexpected");
state.update((s) => {
s.install_queue[msg.mac] = {
hostname: msg.hostname,
disk: msg.disk ?? "/dev/sda",
role: msg.role as import("@lab/shared").Role,
os: msg.os as import("@lab/shared").OsId,
queued_at: new Date().toISOString(),
};
});
return { status: "ok", data: { mac: msg.mac, hostname: msg.hostname } };
});
labdConn.onCommand("command-forget", async (msg) => {
if (msg.type !== "command-forget") throw new Error("unexpected");
const mac = msg.mac.toLowerCase();
state.update((s) => {
delete s.discovered[mac];
delete s.install_queue[mac];
delete s.installed[mac];
});
return { status: "ok", data: { mac } };
});
labdConn.onCommand("command-role-update", async (msg) => {
if (msg.type !== "command-role-update") throw new Error("unexpected");
const mac = msg.mac.toLowerCase();
const current = state.load();
if (!current.installed[mac]) {
return { status: "error", error: `MAC ${mac} not found in installed machines` };
}
state.update((s) => {
const inst = s.installed[mac];
if (inst) inst.role = msg.role;
});
return { status: "ok", data: { mac, role: msg.role } };
});
// Push state to labd on every local state change
state.onChange(() => labdConn?.syncState());
// Forward progress events (stages only, not raw log lines) to labd
progressBus.on((event) => {
if (event.stage !== "log") {
labdConn?.sendProgress(event.mac, event.stage, event.detail);
}
});
labdConn.connect();
logger.info(`Registering with labd at ${config.labdUrl}`);
}
// Print banner
printBanner(config);
// Graceful shutdown
const shutdown = async (): Promise<void> => {
logger.info("Shutting down...");
syslog.stop();
if (labdConn) labdConn.close();
if (config.skipDnsmasq !== true) stopDnsmasq();
closeFirewall(config);
await app.close();
try { unlinkSync(pidFile); } catch { /* ignore */ }
logger.info(`State preserved in ${config.stateFile}`);
process.exit(0);
};
process.on("SIGINT", () => void shutdown());
process.on("SIGTERM", () => void shutdown());
// Keep process alive
await new Promise(() => {});
}
function printBanner(config: BastionConfig): void {
const dhcpInfo = config.dhcpMode === "full"
? `full (${config.dhcpRangeStart}-${config.dhcpRangeEnd})`
: "proxy (alongside existing DHCP)";
console.log("");
console.log("\x1b[36m\x1b[1m" + "=".repeat(60) + "\x1b[0m");
console.log("\x1b[36m\x1b[1m Lab PXE Bastion -- Discovery Mode\x1b[0m");
console.log("\x1b[36m\x1b[1m" + "=".repeat(60) + "\x1b[0m");
console.log("");
console.log(` Network: \x1b[1m${config.network}/24\x1b[0m via \x1b[1m${config.iface}\x1b[0m`);
console.log(` DHCP: \x1b[1m${dhcpInfo}\x1b[0m`);
console.log(` HTTP: \x1b[1mhttp://${config.serverIp}:${config.httpPort}/\x1b[0m`);
console.log(` OS: \x1b[1mFedora ${config.fedoraVersion} (${config.arch})\x1b[0m`);
console.log(` Domain: \x1b[1m${config.domain}\x1b[0m`);
console.log(` State: \x1b[1m${config.stateFile}\x1b[0m`);
console.log("");
console.log(" \x1b[33mPXE boot any machine on this network.\x1b[0m");
console.log(" \x1b[33mIt will be inventoried and rebooted automatically.\x1b[0m");
console.log("");
console.log(" Commands (from another terminal):");
console.log(" \x1b[1mlabctl provision list\x1b[0m -- show machines");
console.log(" \x1b[1mlabctl provision install <mac> <hostname>\x1b[0m -- queue install");
console.log("");
console.log(" Press \x1b[1mCtrl-C\x1b[0m to stop.");
console.log("");
console.log("\x1b[36m---- Waiting for PXE boot requests... ----\x1b[0m");
console.log("");
}

View File

@@ -0,0 +1,401 @@
// REST API routes for machine management.
// /api/machines - list all machines by state
// /api/install - queue a machine for install
// /api/progress - receive install progress callbacks from kickstart
// /api/discover - receive hardware discovery reports from PXE-booted machines
import type { FastifyInstance } from "fastify";
import type { HardwareInfo, InstalledInfo, Role } from "@lab/shared";
import { isValidOsId, SUPPORTED_ROLES } from "@lab/shared";
import type { StateManager } from "../services/state.js";
import { logger } from "../services/logger.js";
import { triggerPostProvisionK3s } from "../services/post-provision.js";
import { progressBus } from "../services/progress-events.js";
import type { ProgressEvent } from "../services/progress-events.js";
import type { InstallLogBuffer } from "../services/install-log.js";
export function registerApiRoutes(
app: FastifyInstance,
state: StateManager,
installLog: InstallLogBuffer,
): void {
// List all machines
app.get("/api/machines", async (_request, reply) => {
return reply.send(state.load());
});
// Queue a machine for install
app.post<{
Body: {
mac?: string;
hostname?: string;
disk?: string;
role?: string;
os?: string;
};
}>("/api/install", async (request, reply) => {
const { mac: rawMac, hostname, disk, role, os } = request.body ?? {};
const mac = (rawMac ?? "").toLowerCase().replace(/-/g, ":");
if (mac === "") {
return reply.status(400).send({ error: "mac is required" });
}
const validRole = role ?? "worker";
if (!(SUPPORTED_ROLES as readonly string[]).includes(validRole)) {
return reply.status(400).send({ error: `invalid role: '${validRole}'. Supported: ${SUPPORTED_ROLES.join(", ")}` });
}
const osId = os ?? "fedora-43";
if (!isValidOsId(osId)) {
return reply.status(400).send({ error: `invalid os: '${osId}'. Supported: fedora-43, ubuntu-26.04` });
}
state.update((s) => {
s.install_queue[mac] = {
hostname: hostname ?? "lab-node",
disk: disk ?? "",
role: validRole as Role,
os: osId,
queued_at: new Date().toISOString(),
};
});
logger.info(`INSTALL QUEUED: ${mac} -> hostname=${hostname ?? "lab-node"} role=${validRole} os=${osId}`);
return reply.send({
status: "queued",
mac,
hostname: hostname ?? "lab-node",
role: validRole,
os: osId,
message: `PXE boot the machine to start installation (role=${validRole}, os=${osId})`,
});
});
// Receive install progress callbacks
app.post<{
Body: {
mac?: string;
stage?: string;
detail?: string;
};
}>("/api/progress", async (request, reply) => {
const { mac: rawMac, stage, detail } = request.body ?? {};
const mac = (rawMac ?? "unknown").toLowerCase();
const stageName = stage ?? "unknown";
const detailStr = detail ?? "";
const GREEN = "\x1b[0;32m";
const YELLOW = "\x1b[1;33m";
const RED = "\x1b[0;31m";
const BOLD = "\x1b[1m";
const RESET = "\x1b[0m";
const icons: Record<string, string> = {
partitioning: "◆", installing: "◆◆", "post-install": "◆◆◆",
complete: "✔", error: "✘",
};
const icon = icons[stageName] ?? "·";
const color = stageName === "complete" ? GREEN : stageName === "error" ? RED : YELLOW;
console.log(` ${color}${icon}${RESET} ${mac} ${BOLD}${stageName}${RESET}${detailStr ? ` -- ${detailStr}` : ""}`);
// Emit progress event for SSE clients
const hostname = state.load().install_queue[mac]?.hostname ?? mac;
progressBus.emit({
mac, hostname, stage: stageName, detail: detailStr,
timestamp: new Date().toISOString(),
});
state.update((s) => {
const queueEntry = s.install_queue[mac];
if (queueEntry) {
queueEntry.progress = stageName;
queueEntry.progress_at = new Date().toISOString();
if (detailStr !== "") {
queueEntry.progress_detail = detailStr;
}
// Append to progress log history
if (!queueEntry.log) queueEntry.log = [];
queueEntry.log.push({
stage: stageName,
detail: detailStr,
timestamp: new Date().toISOString(),
});
// Move to installed on completion
if (stageName === "complete") {
const cfg = s.install_queue[mac];
delete s.install_queue[mac];
const ip = detailStr.startsWith("ready at ")
? detailStr.replace("ready at ", "").trim()
: "";
const installedInfo: InstalledInfo = {
hostname: cfg?.hostname ?? "?",
role: cfg?.role ?? "?",
...(cfg?.os !== undefined ? { os: cfg.os } : {}),
ip,
installed_at: new Date().toISOString(),
};
s.installed[mac] = installedInfo;
const admin = installedInfo.role !== "vanilla" && installedInfo.role !== "" ? "michal" : "root";
console.log(`\n \x1b[0;32m\x1b[1m ssh ${admin}@${ip}\x1b[0m\n`); // eslint-disable-line no-console
// Auto-install k3s for non-vanilla roles
if (installedInfo.role !== "vanilla" && ip !== "") {
void triggerPostProvisionK3s(installedInfo.hostname, ip, installedInfo.role, admin, mac);
}
}
}
});
return reply.send({ status: "ok" });
});
// Receive raw log lines from kickstart scripts
app.post<{
Body: {
mac?: string;
line?: string;
lines?: string[];
tail?: string;
};
}>("/api/log", async (request, reply) => {
const { mac: rawMac, line, lines: rawLines, tail } = request.body ?? {};
const mac = (rawMac ?? "unknown").toLowerCase();
// Collect all lines from the various input formats
const allLines: string[] = [];
if (line) allLines.push(line);
if (rawLines) allLines.push(...rawLines);
if (tail) {
// tail is a string with escaped \n — split it into lines
allLines.push(...tail.split("\\n").filter(Boolean));
}
if (allLines.length === 0) {
return reply.send({ status: "ok", lines: 0 });
}
// Look up hostname from install queue for enriching events
const hostname = state.load().install_queue[mac]?.hostname ?? mac;
// Append to the install log buffer (this also emits to progressBus)
installLog.append(mac, allLines, hostname);
return reply.send({ status: "ok", lines: allLines.length });
});
// Delete a machine from all state
app.delete<{
Params: { mac: string };
}>("/api/machines/:mac", async (request, reply) => {
const mac = request.params.mac.toLowerCase().replace(/-/g, ":");
if (mac === "") {
return reply.status(400).send({ error: "mac is required" });
}
let found = false;
state.update((s) => {
if (s.discovered[mac] !== undefined) {
delete s.discovered[mac];
found = true;
}
if (s.install_queue[mac] !== undefined) {
delete s.install_queue[mac];
found = true;
}
if (s.installed[mac] !== undefined) {
delete s.installed[mac];
found = true;
}
});
if (!found) {
return reply.status(404).send({ error: "machine not found", mac });
}
logger.info(`MACHINE FORGOTTEN: ${mac}`);
return reply.send({ status: "forgotten", mac });
});
// Receive discovery reports
app.post<{
Body: {
mac?: string;
product?: string;
board?: string;
serial?: string;
manufacturer?: string;
cpu_model?: string;
cpu_cores?: number;
memory_gb?: number;
arch?: string;
disks?: Array<{ name: string; size_gb: number; model: string }>;
nics?: Array<{ name: string; mac: string; state: string }>;
};
}>("/api/discover", async (request, reply) => {
const data = request.body;
if (data === null || data === undefined) {
return reply.status(400).send({ error: "invalid JSON" });
}
const mac = (data.mac ?? "unknown").toLowerCase();
const now = new Date().toISOString();
const isNew = state.load().discovered[mac] === undefined;
state.update((s) => {
const existing = s.discovered[mac];
const hwInfo: HardwareInfo = {
mac,
product: data.product ?? "unknown",
board: data.board ?? "unknown",
serial: data.serial ?? "unknown",
manufacturer: data.manufacturer ?? "unknown",
cpu_model: data.cpu_model ?? "unknown",
cpu_cores: data.cpu_cores ?? 0,
memory_gb: data.memory_gb ?? 0,
arch: data.arch ?? "unknown",
disks: data.disks ?? [],
nics: data.nics ?? [],
first_seen: existing?.first_seen ?? now,
last_seen: now,
};
s.discovered[mac] = hwInfo;
});
const label = isNew ? "NEW MACHINE DISCOVERED" : "MACHINE RE-DISCOVERED";
const cpu = data.cpu_model ?? "?";
const cores = data.cpu_cores ?? "?";
const mem = data.memory_gb ?? "?";
logger.info(`${label}: ${mac} -- ${data.manufacturer ?? "?"} ${data.product ?? "?"} (${cpu}, ${cores} cores, ${mem}GB RAM)`);
return reply.send({ status: "ok", mac, new: isNew });
});
// Update a machine's role (e.g. promote infra -> labcontroller)
app.post<{
Body: {
mac?: string;
role?: string;
};
}>("/api/role", async (request, reply) => {
const { mac: rawMac, role } = request.body ?? {};
const mac = (rawMac ?? "").toLowerCase().replace(/-/g, ":");
if (mac === "") {
return reply.status(400).send({ error: "mac is required" });
}
if (!role) {
return reply.status(400).send({ error: "role is required" });
}
let found = false;
state.update((s) => {
if (s.installed[mac]) {
const oldRole = s.installed[mac].role;
s.installed[mac].role = role;
found = true;
logger.info(`ROLE UPDATED: ${mac} (${s.installed[mac].hostname}) ${oldRole} -> ${role}`);
}
});
if (!found) {
return reply.status(404).send({ error: "machine not found in installed state", mac });
}
return reply.send({ status: "updated", mac, role });
});
// Get provision logs for a machine (current state snapshot + raw log lines)
app.get<{
Params: { mac: string };
Querystring: { lines?: string; offset?: string };
}>("/api/logs/:mac", async (request, reply) => {
const mac = request.params.mac.toLowerCase().replace(/-/g, ":");
const logLimit = parseInt(request.query.lines ?? "200", 10);
const logOffset = parseInt(request.query.offset ?? "0", 10);
const currentState = state.load();
const queueEntry = currentState.install_queue[mac];
const installedEntry = currentState.installed[mac];
if (queueEntry) {
return reply.send({
mac,
hostname: queueEntry.hostname,
status: "installing",
progress: queueEntry.progress ?? "queued",
progress_detail: queueEntry.progress_detail ?? "",
progress_at: queueEntry.progress_at ?? queueEntry.queued_at,
role: queueEntry.role,
os: queueEntry.os,
stages: queueEntry.log ?? [],
log_lines: installLog.getLines(mac, logOffset, logLimit),
log_total: installLog.lineCount(mac),
});
}
if (installedEntry) {
return reply.send({
mac,
hostname: installedEntry.hostname,
status: "installed",
progress: "complete",
progress_detail: `ready at ${installedEntry.ip}`,
progress_at: installedEntry.installed_at,
role: installedEntry.role,
ip: installedEntry.ip,
log_lines: installLog.getLines(mac, logOffset, logLimit),
log_total: installLog.lineCount(mac),
});
}
return reply.status(404).send({ error: "machine not found", mac });
});
// SSE stream: follow provision progress for a machine (or all machines)
app.get<{
Params: { mac: string };
}>("/api/logs/:mac/follow", async (request, reply) => {
const filterMac = request.params.mac === "all"
? null
: request.params.mac.toLowerCase().replace(/-/g, ":");
void reply.raw.writeHead(200, {
"Content-Type": "text/event-stream",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
});
// Send current state as first event
const currentState = state.load();
const queueEntry = filterMac ? currentState.install_queue[filterMac] : undefined;
if (queueEntry) {
const initData = JSON.stringify({
mac: filterMac, hostname: queueEntry.hostname,
stage: queueEntry.progress ?? "queued",
detail: queueEntry.progress_detail ?? "",
timestamp: queueEntry.progress_at ?? queueEntry.queued_at,
});
reply.raw.write(`data: ${initData}\n\n`);
}
const onProgress = (event: ProgressEvent): void => {
if (filterMac && event.mac !== filterMac) return;
// Use SSE event types so clients can filter: "stage" for progress, "log" for raw lines
const eventType = event.stage === "log" ? "log" : "stage";
reply.raw.write(`event: ${eventType}\ndata: ${JSON.stringify(event)}\n\n`);
};
progressBus.on(onProgress);
request.raw.on("close", () => {
progressBus.off(onProgress);
});
});
}

View File

@@ -0,0 +1,249 @@
// Boot ISO generation.
// Generates a UEFI-bootable iPXE ISO using xorriso+mtools.
// The ISO is placed in httpDir so @fastify/static serves it with Range request
// support (required by JetKVM, which streams via HTTP Range + NBD).
//
// The ISO embeds kernel + initrd so machines without UEFI NIC support
// (no SNP protocol) can still boot. iPXE loads them from file:/ and the
// Linux kernel handles networking with its own drivers.
import { createHash } from "node:crypto";
import { execSync } from "node:child_process";
import { existsSync, readFileSync, statSync, writeFileSync, mkdirSync, rmSync, unlinkSync } from "node:fs";
import { join } from "node:path";
import { tmpdir } from "node:os";
import type { BastionConfig } from "@lab/shared";
import { logger } from "../services/logger.js";
// iPXE SNP variant (scans all UEFI SNP handles, works from CD-ROM/USB boot).
const IPXE_ISO_PATHS: Record<string, { src: string[]; efiName: string }> = {
x86_64: {
src: [
"/usr/share/ipxe/ipxe-snp-x86_64.efi",
"/usr/share/ipxe/ipxe-x86_64.efi",
],
efiName: "BOOTX64.EFI",
},
aarch64: {
src: [
"/usr/share/ipxe/arm64-efi/ipxe-snp.efi",
"/usr/share/ipxe/arm64-efi/ipxe.efi",
],
efiName: "BOOTAA64.EFI",
},
};
// Fedora PXE kernel/initrd paths per architecture
const FEDORA_MIRROR_BASE = "https://download.fedoraproject.org/pub/fedora/linux/releases";
interface BootPayload {
arch: string;
vmlinuz: string;
initrd: string;
}
function downloadIfMissing(url: string, dest: string, label: string): void {
if (existsSync(dest)) {
logger.info(` ${label} -- cached`);
return;
}
logger.info(` ${label} -- downloading...`);
execSync(`curl -# -L -f -o "${dest}" "${url}"`, { stdio: "inherit" });
}
function generateIso(config: BastionConfig, outputPath: string): void {
const work = join(tmpdir(), `bastion-iso-${process.pid}`);
mkdirSync(join(work, "EFI", "BOOT"), { recursive: true });
const bastionUrl = `http://${config.serverIp}:${config.httpPort}`;
// Copy available iPXE EFI binaries
const archs: string[] = [];
for (const [arch, paths] of Object.entries(IPXE_ISO_PATHS)) {
const srcFile = paths.src.find((s) => existsSync(s));
if (srcFile) {
execSync(`cp "${srcFile}" "${join(work, "EFI", "BOOT", paths.efiName)}"`, { stdio: "pipe" });
archs.push(arch);
logger.info(` iPXE ISO ${arch}: ${srcFile}`);
}
}
if (archs.length === 0) throw new Error("No iPXE EFI binaries found");
// Download and stage kernel/initrd for each architecture.
// These are embedded in the ISO so machines without UEFI NIC support
// can boot the Linux installer (which has its own NIC drivers).
const cacheDir = join(config.bastionDir, "iso-cache");
mkdirSync(cacheDir, { recursive: true });
const payloads: BootPayload[] = [];
for (const arch of ["x86_64", "aarch64"]) {
const mirror = `${FEDORA_MIRROR_BASE}/${config.fedoraVersion}/Everything/${arch}/os`;
const vmlinuzCache = join(cacheDir, `vmlinuz-${arch}`);
const initrdCache = join(cacheDir, `initrd-${arch}`);
try {
downloadIfMissing(
`${mirror}/images/pxeboot/vmlinuz`,
vmlinuzCache,
`Fedora ${arch} kernel`,
);
downloadIfMissing(
`${mirror}/images/pxeboot/initrd.img`,
initrdCache,
`Fedora ${arch} initrd`,
);
payloads.push({ arch, vmlinuz: vmlinuzCache, initrd: initrdCache });
} catch {
logger.warn(` Fedora ${arch} kernel/initrd not available -- skipping`);
}
}
// Write iPXE autoexec script.
// Strategy: try DHCP (for machines with UEFI NIC support), then fall back
// to booting the embedded kernel/initrd from the ISO filesystem.
// iPXE's ${buildarch} resolves to "x86_64" or "arm64".
const ipxeScript = [
"#!ipxe",
"",
"echo",
"echo =============================================",
"echo Lab PXE Bastion -- ISO Boot",
"echo =============================================",
"echo",
"",
"# Try DHCP (works if UEFI has NIC driver / SNP support)",
"set attempts:int32 0",
":retry",
"dhcp && goto netboot ||",
"inc attempts",
"iseq ${attempts} 3 || goto retry_wait",
"goto localboot",
":retry_wait",
"echo DHCP failed (attempt ${attempts}/3), retrying...",
"sleep 2",
"goto retry",
"",
"# Network available -- chain to bastion for dynamic dispatch",
":netboot",
"echo Network OK. Chaining to bastion...",
`chain ${bastionUrl}/boot.ipxe || shell`,
"",
"# No network -- boot embedded kernel (Linux has its own NIC drivers)",
":localboot",
"echo No UEFI network support. Booting embedded installer...",
"echo Linux will configure networking with its own drivers.",
"echo",
"# Map iPXE arch names to Fedora mirror paths (arm64 -> aarch64)",
"set fedarch ${buildarch}",
"iseq ${buildarch} arm64 && set fedarch aarch64 ||",
`kernel file:/vmlinuz-\${buildarch} inst.ks=${bastionUrl}/discover.ks inst.repo=${FEDORA_MIRROR_BASE}/${config.fedoraVersion}/Everything/\${fedarch}/os inst.text || goto no_kernel`,
`initrd file:/initrd-\${buildarch} || goto no_kernel`,
"boot || shell",
"",
":no_kernel",
"echo ERROR: kernel not found for this architecture. Dropping to shell.",
"shell",
].join("\n");
writeFileSync(join(work, "autoexec.ipxe"), ipxeScript);
// Calculate EFI partition size: iPXE binaries + autoexec + kernel/initrd + margin
let payloadSize = 2 * 1024 * 1024; // 2MB base for iPXE + autoexec + FAT overhead
for (const p of payloads) {
payloadSize += statSync(p.vmlinuz).size;
payloadSize += statSync(p.initrd).size;
}
const efiSizeMB = Math.ceil(payloadSize / (1024 * 1024)) + 4; // +4MB margin
logger.info(` EFI partition: ${efiSizeMB}MB (${payloads.length} arch payloads)`);
// Create FAT EFI system partition
const efiImg = join(work, "efi.img");
execSync(`dd if=/dev/zero of="${efiImg}" bs=1M count=${efiSizeMB} 2>/dev/null`, { stdio: "pipe" });
execSync(`mformat -i "${efiImg}" -v LABBOOT ::`, { stdio: "pipe" });
execSync(`mmd -i "${efiImg}" ::/EFI`, { stdio: "pipe" });
execSync(`mmd -i "${efiImg}" ::/EFI/BOOT`, { stdio: "pipe" });
for (const arch of archs) {
const paths = IPXE_ISO_PATHS[arch]!;
execSync(`mcopy -i "${efiImg}" "${join(work, "EFI", "BOOT", paths.efiName)}" ::/EFI/BOOT/${paths.efiName}`, { stdio: "pipe" });
}
execSync(`mcopy -i "${efiImg}" "${join(work, "autoexec.ipxe")}" ::/autoexec.ipxe`, { stdio: "pipe" });
// Copy kernel/initrd onto EFI partition with arch-specific names
for (const p of payloads) {
// iPXE ${buildarch} returns "x86_64" or "arm64"
const archLabel = p.arch === "aarch64" ? "arm64" : p.arch;
execSync(`mcopy -i "${efiImg}" "${p.vmlinuz}" ::/vmlinuz-${archLabel}`, { stdio: "pipe" });
execSync(`mcopy -i "${efiImg}" "${p.initrd}" ::/initrd-${archLabel}`, { stdio: "pipe" });
logger.info(` Embedded ${archLabel}: vmlinuz + initrd`);
}
// Build hybrid ISO: El Torito EFI boot + GPT EFI partition
execSync([
`xorriso -as mkisofs`,
`-o "${outputPath}"`,
`-R`,
`-V LAB_BOOT`,
`-e efi.img`,
`-no-emul-boot`,
`-partition_offset 16`,
`-append_partition 2 0xEF "${efiImg}"`,
`-appended_part_as_gpt`,
`"${work}"`,
].join(" "), { stdio: "pipe" });
rmSync(work, { recursive: true, force: true });
logger.info(`Generated boot ISO (${archs.join(", ")}): ${outputPath}`);
}
/** Compute a short hash of all inputs that affect ISO content. */
function computeIsoHash(config: BastionConfig): string {
const h = createHash("sha256");
h.update(`${config.serverIp}:${config.httpPort}`);
h.update(config.fedoraVersion);
for (const paths of Object.values(IPXE_ISO_PATHS)) {
const srcFile = paths.src.find((s) => existsSync(s));
if (srcFile) {
const st = statSync(srcFile);
h.update(`${srcFile}:${st.size}:${st.mtimeMs}`);
}
}
// Include kernel/initrd cache state
const cacheDir = join(config.bastionDir, "iso-cache");
for (const arch of ["x86_64", "aarch64"]) {
const vmlinuz = join(cacheDir, `vmlinuz-${arch}`);
if (existsSync(vmlinuz)) {
const st = statSync(vmlinuz);
h.update(`${vmlinuz}:${st.size}`);
}
}
return h.digest("hex").slice(0, 16);
}
/**
* Ensure boot.iso exists and is up-to-date in httpDir.
* Called during startup so @fastify/static can serve it with Range support.
*/
export function ensureBootIso(config: BastionConfig): void {
const isoPath = join(config.httpDir, "boot.iso");
const hashPath = join(config.httpDir, "boot.iso.hash");
const currentHash = computeIsoHash(config);
const cachedHash = existsSync(hashPath) ? readFileSync(hashPath, "utf-8").trim() : "";
if (existsSync(isoPath) && currentHash === cachedHash) {
logger.info(" Boot ISO -- cached (up to date)");
return;
}
if (existsSync(isoPath)) {
logger.info(" Boot ISO -- inputs changed, regenerating...");
try { unlinkSync(isoPath); } catch { /* ignore */ }
} else {
logger.info(" Boot ISO -- generating...");
}
generateIso(config, isoPath);
writeFileSync(hashPath, currentHash);
}

View File

@@ -0,0 +1,77 @@
// iPXE dispatch route.
// Routes PXE boot requests based on machine state:
// - install_queue -> install mode (serve Fedora installer + per-MAC kickstart)
// - installed -> exit (boot from local disk)
// - unknown -> discovery mode (collect hardware, POST to bastion)
import type { FastifyInstance } from "fastify";
import type { BastionConfig } from "@lab/shared";
import type { StateManager } from "../services/state.js";
import {
renderDiscoverIpxe,
renderInstallIpxe,
renderLocalBootIpxe,
} from "../templates/boot.ipxe.js";
import { renderUbuntuInstallIpxe } from "../templates/ubuntu-boot.ipxe.js";
import { logger } from "../services/logger.js";
export function registerDispatchRoutes(
app: FastifyInstance,
config: BastionConfig,
state: StateManager,
): void {
app.get<{ Querystring: { mac?: string } }>("/dispatch", async (request, reply) => {
const mac = (request.query.mac ?? "").toLowerCase().replace(/-/g, ":");
const currentState = state.load();
const queueEntry = currentState.install_queue[mac];
if (queueEntry) {
const hostname = queueEntry.hostname ?? "lab-node";
const os = queueEntry.os ?? "fedora-43";
logger.info(`INSTALL STARTED: ${mac} -> ${hostname} (${os})`);
let script: string;
if (os.startsWith("ubuntu")) {
script = renderUbuntuInstallIpxe({
mac,
hostname,
serverIp: config.serverIp,
httpPort: config.httpPort,
ubuntuVersion: config.ubuntuVersion,
});
} else {
script = renderInstallIpxe({
mac,
hostname,
serverIp: config.serverIp,
httpPort: config.httpPort,
fedoraVersion: config.fedoraVersion,
fedoraMirror: config.fedoraMirror,
});
}
return reply.type("text/plain").send(script);
}
const installedEntry = currentState.installed[mac];
if (installedEntry) {
const hostname = installedEntry.hostname ?? "?";
logger.info(`PXE request from ${mac} (${hostname}) - already installed, booting local disk`);
const script = renderLocalBootIpxe(hostname);
return reply.type("text/plain").send(script);
}
// Unknown MAC -> discovery mode
logger.info(`PXE request from ${mac} -> discovery mode`);
const script = renderDiscoverIpxe({
mac,
serverIp: config.serverIp,
httpPort: config.httpPort,
fedoraMirror: config.fedoraMirror,
});
return reply.type("text/plain").send(script);
});
}

View File

@@ -0,0 +1,71 @@
// Kickstart generation routes.
// Serves per-MAC install kickstart, static discovery kickstart,
// and Ubuntu autoinstall cloud-init endpoints.
import type { FastifyInstance } from "fastify";
import type { BastionConfig } from "@lab/shared";
import type { StateManager } from "../services/state.js";
import { generateInstallKickstart, generateDiscoverKickstart } from "../services/kickstart-generator.js";
import { renderUbuntuAutoinstall, renderUbuntuMetaData, type UbuntuAutoinstallParams } from "../templates/ubuntu-autoinstall.js";
export function registerKickstartRoutes(
app: FastifyInstance,
config: BastionConfig,
state: StateManager,
): void {
// Per-MAC install kickstart
app.get<{ Querystring: { mac?: string } }>("/ks", async (request, reply) => {
const mac = (request.query.mac ?? "").toLowerCase().replace(/-/g, ":");
const currentState = state.load();
const queueEntry = currentState.install_queue[mac];
const ks = generateInstallKickstart(config, {
hostname: queueEntry?.hostname ?? "lab-node",
disk: queueEntry?.disk ?? "",
role: queueEntry?.role ?? "worker",
});
return reply.type("text/plain").send(ks);
});
// Static discovery kickstart
app.get("/discover.ks", async (_request, reply) => {
const ks = generateDiscoverKickstart(config);
return reply.type("text/plain").send(ks);
});
// Ubuntu autoinstall user-data (cloud-init)
app.get<{ Params: { mac: string } }>("/autoinstall/:mac/user-data", async (request, reply) => {
const mac = request.params.mac.toLowerCase().replace(/-/g, ":");
const currentState = state.load();
const queueEntry = currentState.install_queue[mac];
const aiParams: UbuntuAutoinstallParams = {
hostname: queueEntry?.hostname ?? "lab-node",
disk: queueEntry?.disk ?? "",
role: queueEntry?.role ?? "worker",
domain: config.domain,
ubuntuVersion: config.ubuntuVersion,
timezone: config.timezone,
locale: config.locale,
serverIp: config.serverIp,
httpPort: config.httpPort,
sshKeys: config.sshKeys,
adminUser: config.adminUser,
};
const userData = renderUbuntuAutoinstall(aiParams);
return reply.type("text/plain").send(userData);
});
// Ubuntu autoinstall meta-data (cloud-init)
app.get<{ Params: { mac: string } }>("/autoinstall/:mac/meta-data", async (request, reply) => {
const mac = request.params.mac.toLowerCase().replace(/-/g, ":");
const currentState = state.load();
const queueEntry = currentState.install_queue[mac];
const hostname = queueEntry?.hostname ?? "lab-node";
const metaData = renderUbuntuMetaData(hostname);
return reply.type("text/plain").send(metaData);
});
}

View File

@@ -0,0 +1,69 @@
// Fastify application setup with all routes registered.
import Fastify from "fastify";
import fastifyStatic from "@fastify/static";
import { mkdirSync, existsSync } from "node:fs";
import type { BastionConfig } from "@lab/shared";
import { StateManager } from "./services/state.js";
import { InstallLogBuffer } from "./services/install-log.js";
import { SyslogListener } from "./services/syslog-listener.js";
import { logger } from "./services/logger.js";
import { registerDispatchRoutes } from "./routes/dispatch.js";
import { registerKickstartRoutes } from "./routes/kickstart.js";
import { registerApiRoutes } from "./routes/api.js";
export function createApp(config: BastionConfig): { app: ReturnType<typeof Fastify>; state: StateManager; installLog: InstallLogBuffer; syslog: SyslogListener } {
const app = Fastify({
logger: false, // We use winston instead
});
const state = new StateManager(config.stateFile);
state.init();
const installLog = new InstallLogBuffer(config.bastionDir);
const syslog = new SyslogListener(config.syslogPort, installLog, state);
// Serve static files (vmlinuz, initrd.img, iPXE binaries) from the HTTP directory
mkdirSync(config.httpDir, { recursive: true });
app.register(fastifyStatic, {
root: config.httpDir,
prefix: "/",
decorateReply: false,
});
// Also serve TFTP files (iPXE EFI binaries) over HTTP for UEFI HTTP Boot
if (existsSync(config.tftpDir)) {
app.register(fastifyStatic, {
root: config.tftpDir,
prefix: "/tftp/",
decorateReply: false,
});
}
// Register route handlers
registerDispatchRoutes(app, config, state);
registerKickstartRoutes(app, config, state);
registerApiRoutes(app, state, installLog);
// boot.iso is generated at startup and served as a static file from httpDir
// (static serving supports HTTP Range requests, required by JetKVM streaming)
// Log all requests
app.addHook("onRequest", async (request) => {
logger.info(`HTTP: ${request.ip} ${request.method} ${request.url}`);
});
return { app, state, installLog, syslog };
}
export async function startServer(config: BastionConfig): Promise<void> {
const { app } = createApp(config);
try {
await app.listen({ port: config.httpPort, host: "0.0.0.0" });
logger.info(`HTTP server listening on :${config.httpPort}`);
} catch (err) {
logger.error(`Failed to start HTTP server: ${err instanceof Error ? err.message : String(err)}`);
throw err;
}
}

View File

@@ -0,0 +1,70 @@
// Generate dnsmasq configuration and manage the dnsmasq process lifecycle.
import { writeFileSync, mkdirSync } from "node:fs";
import { dirname } from "node:path";
import type { ResultPromise } from "execa";
import { execa } from "execa";
import type { BastionConfig } from "@lab/shared";
import { renderDnsmasqConf } from "../templates/dnsmasq.conf.js";
import { logger } from "./logger.js";
type DnsmasqProcess = ResultPromise<{ stdout: "pipe"; stderr: "pipe" }>;
let dnsmasqProcess: DnsmasqProcess | null = null;
/**
* Generate the dnsmasq.conf file from the current configuration.
*/
export function generateDnsmasqConf(config: BastionConfig): string {
const confPath = `${config.bastionDir}/dnsmasq.conf`;
const content = renderDnsmasqConf(config);
mkdirSync(dirname(confPath), { recursive: true });
writeFileSync(confPath, content);
logger.info(`Generated dnsmasq config: ${confPath}`);
return confPath;
}
/**
* Start dnsmasq in the foreground as a child process.
*/
export async function startDnsmasq(config: BastionConfig): Promise<DnsmasqProcess> {
const confPath = generateDnsmasqConf(config);
logger.info(`Starting PXE server (${config.dhcpMode}DHCP on ${config.iface})...`);
const proc = execa("dnsmasq", ["--no-daemon", `--conf-file=${confPath}`], {
stdout: "pipe",
stderr: "pipe",
});
dnsmasqProcess = proc;
proc.stdout?.on("data", (data: Buffer) => {
const line = data.toString().trim();
if (line) logger.info(`dnsmasq: ${line}`);
});
proc.stderr?.on("data", (data: Buffer) => {
const line = data.toString().trim();
if (line) logger.info(`dnsmasq: ${line}`);
});
proc.on("exit", (code) => {
if (code !== null && code !== 0) {
logger.error(`dnsmasq exited with code ${code}. Check if another DHCP/TFTP service is running.`);
}
dnsmasqProcess = null;
});
return proc;
}
/**
* Stop the running dnsmasq process.
*/
export function stopDnsmasq(): void {
if (dnsmasqProcess) {
logger.info("Stopping dnsmasq...");
dnsmasqProcess.kill("SIGTERM");
dnsmasqProcess = null;
}
}

View File

@@ -0,0 +1,86 @@
// Per-machine install log buffer.
// Stores raw log lines in memory (ring buffer) and persists to disk.
// Used by /api/log for ingestion and /api/logs/:mac/follow for SSE streaming.
import { mkdirSync, appendFileSync, readFileSync, existsSync } from "node:fs";
import { join } from "node:path";
import { progressBus } from "./progress-events.js";
const MAX_LINES_IN_MEMORY = 2000;
export interface LogLine {
line: string;
timestamp: string;
}
export class InstallLogBuffer {
/** In-memory ring buffer per MAC */
private buffers = new Map<string, LogLine[]>();
private logDir: string;
constructor(bastionDir: string) {
this.logDir = join(bastionDir, "logs");
mkdirSync(this.logDir, { recursive: true });
}
/** Append log lines for a machine. Stores in memory + appends to file. */
append(mac: string, lines: string[], hostname?: string): void {
const now = new Date().toISOString();
const buffer = this.buffers.get(mac) ?? [];
const newEntries: LogLine[] = lines.map((line) => ({ line, timestamp: now }));
buffer.push(...newEntries);
// Trim to ring buffer size
if (buffer.length > MAX_LINES_IN_MEMORY) {
buffer.splice(0, buffer.length - MAX_LINES_IN_MEMORY);
}
this.buffers.set(mac, buffer);
// Persist to file
const filePath = this.logFilePath(mac);
const fileContent = lines.map((l) => `${now} ${l}`).join("\n") + "\n";
appendFileSync(filePath, fileContent);
// Emit to SSE via progressBus (use "log" stage for log lines)
const host = hostname ?? mac;
for (const line of lines) {
progressBus.emit({
mac,
hostname: host,
stage: "log",
detail: line,
timestamp: now,
});
}
}
/** Get buffered log lines for a machine. */
getLines(mac: string, offset = 0, limit = 500): LogLine[] {
const buffer = this.buffers.get(mac) ?? [];
return buffer.slice(offset, offset + limit);
}
/** Get total line count for a machine. */
lineCount(mac: string): number {
return this.buffers.get(mac)?.length ?? 0;
}
/** Read full log from disk (for machines no longer in memory). */
readFromDisk(mac: string): string | null {
const filePath = this.logFilePath(mac);
if (!existsSync(filePath)) return null;
return readFileSync(filePath, "utf-8");
}
/** Clear log for a machine (after install complete or forget). */
clear(mac: string): void {
this.buffers.delete(mac);
}
private logFilePath(mac: string): string {
// Replace colons with dashes for filesystem safety
return join(this.logDir, `${mac.replace(/:/g, "-")}.log`);
}
}

View File

@@ -0,0 +1,437 @@
// Pure TypeScript UEFI-bootable ISO builder.
// Creates an ISO 9660 image with an embedded FAT EFI system partition
// containing iPXE EFI binaries and an autoexec script.
// No external tools required (no xorriso, mtools).
import { readFileSync } from "node:fs";
const SECTOR_SIZE = 2048; // ISO 9660 logical sector
const FAT_SECTOR_SIZE = 512;
// --- Utility helpers ---
function asciiPad(s: string, len: number, pad = " "): Buffer {
const buf = Buffer.alloc(len, pad.charCodeAt(0));
buf.write(s, 0, Math.min(s.length, len), "ascii");
return buf;
}
function u16le(n: number): Buffer {
const buf = Buffer.alloc(2);
buf.writeUInt16LE(n);
return buf;
}
function u32le(n: number): Buffer {
const buf = Buffer.alloc(4);
buf.writeUInt32LE(n);
return buf;
}
function u16be(n: number): Buffer {
const buf = Buffer.alloc(2);
buf.writeUInt16BE(n);
return buf;
}
function u32be(n: number): Buffer {
const buf = Buffer.alloc(4);
buf.writeUInt32BE(n);
return buf;
}
/** Both-endian 16-bit (ISO 9660 "both-byte" format) */
function u16both(n: number): Buffer {
return Buffer.concat([u16le(n), u16be(n)]);
}
/** Both-endian 32-bit */
function u32both(n: number): Buffer {
return Buffer.concat([u32le(n), u32be(n)]);
}
function isoDate(d: Date): Buffer {
// ISO 9660 date: 17 bytes ASCII "YYYYMMDDHHMMSSCC" + timezone offset
const s =
d.getUTCFullYear().toString().padStart(4, "0") +
(d.getUTCMonth() + 1).toString().padStart(2, "0") +
d.getUTCDate().toString().padStart(2, "0") +
d.getUTCHours().toString().padStart(2, "0") +
d.getUTCMinutes().toString().padStart(2, "0") +
d.getUTCSeconds().toString().padStart(2, "0") +
"00"; // hundredths
const buf = Buffer.alloc(17, 0);
buf.write(s, 0, 16, "ascii");
buf[16] = 0; // UTC offset (0 = UTC)
return buf;
}
function dirRecordDate(d: Date): Buffer {
// 7-byte recording date
const buf = Buffer.alloc(7, 0);
buf[0] = d.getUTCFullYear() - 1900;
buf[1] = d.getUTCMonth() + 1;
buf[2] = d.getUTCDate();
buf[3] = d.getUTCHours();
buf[4] = d.getUTCMinutes();
buf[5] = d.getUTCSeconds();
buf[6] = 0; // UTC
return buf;
}
// --- FAT12 filesystem builder ---
function buildFatImage(files: Array<{ path: string; data: Buffer }>): Buffer {
// Build a minimal FAT12 filesystem in memory
// Layout: BPB | FAT | FAT copy | Root dir | Data clusters
const bytesPerSector = FAT_SECTOR_SIZE;
const sectorsPerCluster = 4; // 2KB clusters
const clusterSize = bytesPerSector * sectorsPerCluster;
const reservedSectors = 1;
const numFats = 2;
const rootEntryCount = 64; // 64 * 32 = 2048 bytes = 4 sectors
const rootDirSectors = Math.ceil((rootEntryCount * 32) / bytesPerSector);
// Calculate data size needed
let totalDataBytes = 0;
for (const f of files) totalDataBytes += Math.ceil(f.data.length / clusterSize) * clusterSize;
// Add directory clusters for EFI and EFI/BOOT
totalDataBytes += clusterSize * 2;
const dataClusters = Math.ceil(totalDataBytes / clusterSize) + 2; // +2 safety
const fatEntries = dataClusters + 2; // clusters start at 2
const fatBytes = Math.ceil((fatEntries * 3) / 2); // FAT12: 1.5 bytes per entry
const sectorsPerFat = Math.ceil(fatBytes / bytesPerSector);
const totalSectors = reservedSectors + (numFats * sectorsPerFat) + rootDirSectors + (dataClusters * sectorsPerCluster);
const image = Buffer.alloc(totalSectors * bytesPerSector, 0);
// --- BPB (BIOS Parameter Block) ---
image[0] = 0xEB; image[1] = 0x3C; image[2] = 0x90; // Jump + NOP
image.write("LABCTL ", 3, 8, "ascii"); // OEM
image.writeUInt16LE(bytesPerSector, 11);
image[13] = sectorsPerCluster;
image.writeUInt16LE(reservedSectors, 14);
image[16] = numFats;
image.writeUInt16LE(rootEntryCount, 17);
image.writeUInt16LE(totalSectors < 0x10000 ? totalSectors : 0, 19);
image[21] = 0xF0; // media descriptor (removable)
image.writeUInt16LE(sectorsPerFat, 22);
image.writeUInt16LE(1, 24); // sectors per track
image.writeUInt16LE(1, 26); // heads
image[38] = 0x29; // Extended boot sig
image.writeUInt32LE(0x12345678, 39); // volume serial
image.write("IPXE BOOT ", 43, 11, "ascii"); // volume label
image.write("FAT12 ", 54, 8, "ascii"); // filesystem type
image[510] = 0x55; image[511] = 0xAA; // Boot signature
// --- FAT table ---
const fatOffset = reservedSectors * bytesPerSector;
const rootDirOffset = fatOffset + (numFats * sectorsPerFat * bytesPerSector);
const dataOffset = rootDirOffset + (rootDirSectors * bytesPerSector);
// FAT12 helper: write a 12-bit entry
function fatSet(fat: number, cluster: number, value: number): void {
const off = fatOffset + (fat * sectorsPerFat * bytesPerSector);
const byteIdx = Math.floor(cluster * 3 / 2);
if (cluster % 2 === 0) {
image[off + byteIdx] = value & 0xFF;
image[off + byteIdx + 1] = (image[off + byteIdx + 1]! & 0xF0) | ((value >> 8) & 0x0F);
} else {
image[off + byteIdx] = (image[off + byteIdx]! & 0x0F) | ((value & 0x0F) << 4);
image[off + byteIdx + 1] = (value >> 4) & 0xFF;
}
}
// Media descriptor in FAT
for (let f = 0; f < numFats; f++) {
fatSet(f, 0, 0xFF0);
fatSet(f, 1, 0xFFF);
}
let nextCluster = 2;
function allocClusters(size: number): number {
const needed = Math.max(1, Math.ceil(size / clusterSize));
const startCluster = nextCluster;
for (let i = 0; i < needed; i++) {
const c = nextCluster++;
const next = (i === needed - 1) ? 0xFFF : c + 1;
for (let f = 0; f < numFats; f++) fatSet(f, c, next);
}
return startCluster;
}
function clusterOffset(cluster: number): number {
return dataOffset + (cluster - 2) * clusterSize;
}
function writeDirEntry(dirBuf: Buffer, entryIdx: number, name: string, ext: string, cluster: number, size: number, isDir: boolean): void {
const off = entryIdx * 32;
dirBuf.write(name.toUpperCase().padEnd(8, " "), off, 8, "ascii");
dirBuf.write(ext.toUpperCase().padEnd(3, " "), off + 8, 3, "ascii");
dirBuf[off + 11] = isDir ? 0x10 : 0x20; // attributes
dirBuf.writeUInt16LE(cluster & 0xFFFF, off + 26); // first cluster low
dirBuf.writeUInt32LE(isDir ? 0 : size, off + 28); // file size
}
// --- Create directory structure ---
// Root: EFI dir + autoexec.ipxe
// EFI: BOOT dir
// BOOT: BOOTX64.EFI, BOOTAA64.EFI
// EFI directory cluster
const efiDirCluster = allocClusters(clusterSize);
const efiDirBuf = Buffer.alloc(clusterSize, 0);
// BOOT directory cluster
const bootDirCluster = allocClusters(clusterSize);
const bootDirBuf = Buffer.alloc(clusterSize, 0);
// Write . and .. entries for EFI
writeDirEntry(efiDirBuf, 0, ".", "", efiDirCluster, 0, true);
writeDirEntry(efiDirBuf, 1, "..", "", 0, 0, true);
// BOOT subdir in EFI
writeDirEntry(efiDirBuf, 2, "BOOT", "", bootDirCluster, 0, true);
// Write . and .. entries for BOOT
writeDirEntry(bootDirBuf, 0, ".", "", bootDirCluster, 0, true);
writeDirEntry(bootDirBuf, 1, "..", "", efiDirCluster, 0, true);
let bootEntryIdx = 2;
// Root directory entries
let rootEntryIdx = 0;
// Volume label
const rootBuf = image.subarray(rootDirOffset, rootDirOffset + rootDirSectors * bytesPerSector);
rootBuf.write("IPXE BOOT ", rootEntryIdx * 32, 11, "ascii");
rootBuf[rootEntryIdx * 32 + 11] = 0x08; // volume label attribute
rootEntryIdx++;
// EFI directory in root
writeDirEntry(rootBuf, rootEntryIdx++, "EFI", "", efiDirCluster, 0, true);
// Write files
for (const file of files) {
const parts = file.path.toUpperCase().split("/").filter(Boolean);
const fileName = parts[parts.length - 1]!;
const nameParts = fileName.split(".");
const name = nameParts[0]!.substring(0, 8);
const ext = (nameParts[1] ?? "").substring(0, 3);
const fileCluster = allocClusters(file.data.length);
file.data.copy(image, clusterOffset(fileCluster));
if (parts.length === 1) {
// Root level file
writeDirEntry(rootBuf, rootEntryIdx++, name, ext, fileCluster, file.data.length, false);
} else if (parts.length === 3 && parts[0] === "EFI" && parts[1] === "BOOT") {
// EFI/BOOT/ file
writeDirEntry(bootDirBuf, bootEntryIdx++, name, ext, fileCluster, file.data.length, false);
}
}
// Write directory clusters to image
efiDirBuf.copy(image, clusterOffset(efiDirCluster));
bootDirBuf.copy(image, clusterOffset(bootDirCluster));
return image;
}
// --- ISO 9660 builder ---
export function buildBootIso(efiFiles: Array<{ path: string; data: Buffer }>, scriptContent?: string): Buffer {
const now = new Date();
// Build FAT image with all files
const allFiles = [...efiFiles];
if (scriptContent) {
allFiles.push({ path: "autoexec.ipxe", data: Buffer.from(scriptContent, "utf-8") });
}
const fatImage = buildFatImage(allFiles);
// ISO layout:
// Sector 0-15: System area (unused)
// Sector 16: Primary Volume Descriptor
// Sector 17: Boot Record Volume Descriptor (El Torito)
// Sector 18: Volume Descriptor Set Terminator
// Sector 19: Root directory record
// Sector 20: El Torito boot catalog
// Sector 21: El Torito boot image (the FAT image, this gets large)
// After FAT: EFI boot image reference for files visible in ISO
const fatSectors = Math.ceil(fatImage.length / SECTOR_SIZE);
const rootDirSector = 19;
const bootCatalogSector = 20;
const efiImageSector = 21;
const totalSectors = efiImageSector + fatSectors + 1;
const iso = Buffer.alloc(totalSectors * SECTOR_SIZE, 0);
// --- Primary Volume Descriptor (sector 16) ---
const pvd = iso.subarray(16 * SECTOR_SIZE, 17 * SECTOR_SIZE);
pvd[0] = 1; // type: Primary
pvd.write("CD001", 1, 5, "ascii"); // standard identifier
pvd[6] = 1; // version
asciiPad("LABCTL", 32).copy(pvd, 8); // system identifier
asciiPad("IPXE_BOOT", 32).copy(pvd, 40); // volume identifier
u32both(totalSectors).copy(pvd, 80); // volume space size
u16both(1).copy(pvd, 120); // volume set size
u16both(1).copy(pvd, 124); // volume sequence number
u16both(SECTOR_SIZE).copy(pvd, 128); // logical block size
// Root directory record (34 bytes)
const rootRec = Buffer.alloc(34, 0);
rootRec[0] = 34; // length
rootRec[1] = 0; // extended attribute length
u32both(rootDirSector).copy(rootRec, 2); // extent location
u32both(SECTOR_SIZE).copy(rootRec, 10); // data length
dirRecordDate(now).copy(rootRec, 18);
rootRec[25] = 0x02; // flags: directory
rootRec[28] = 1; // file unit size
u16both(1).copy(rootRec, 30); // volume sequence
rootRec[32] = 1; // name length
rootRec[33] = 0; // name: root
rootRec.copy(pvd, 156); // copy to PVD
// Volume dates
isoDate(now).copy(pvd, 813); // creation
isoDate(now).copy(pvd, 830); // modification
Buffer.alloc(17, 0x30).copy(pvd, 847); // expiration (none)
isoDate(now).copy(pvd, 864); // effective
pvd[881] = 1; // file structure version
// --- Boot Record Volume Descriptor (El Torito, sector 17) ---
const brvd = iso.subarray(17 * SECTOR_SIZE, 18 * SECTOR_SIZE);
brvd[0] = 0; // type: Boot Record
brvd.write("CD001", 1, 5, "ascii");
brvd[6] = 1; // version
brvd.write("EL TORITO SPECIFICATION", 7, 32, "ascii");
u32le(bootCatalogSector).copy(brvd, 0x47); // boot catalog pointer
// --- Volume Descriptor Set Terminator (sector 18) ---
const vdst = iso.subarray(18 * SECTOR_SIZE, 19 * SECTOR_SIZE);
vdst[0] = 255; // type: terminator
vdst.write("CD001", 1, 5, "ascii");
vdst[6] = 1;
// --- Root Directory (sector 19) ---
const rootDir = iso.subarray(rootDirSector * SECTOR_SIZE, (rootDirSector + 1) * SECTOR_SIZE);
let offset = 0;
// "." entry
const dotRec = Buffer.alloc(34, 0);
dotRec[0] = 34;
u32both(rootDirSector).copy(dotRec, 2);
u32both(SECTOR_SIZE).copy(dotRec, 10);
dirRecordDate(now).copy(dotRec, 18);
dotRec[25] = 0x02;
u16both(1).copy(dotRec, 28);
dotRec[32] = 1;
dotRec[33] = 0;
dotRec.copy(rootDir, offset);
offset += 34;
// ".." entry
const dotdotRec = Buffer.alloc(34, 0);
dotdotRec[0] = 34;
u32both(rootDirSector).copy(dotdotRec, 2);
u32both(SECTOR_SIZE).copy(dotdotRec, 10);
dirRecordDate(now).copy(dotdotRec, 18);
dotdotRec[25] = 0x02;
u16both(1).copy(dotdotRec, 28);
dotdotRec[32] = 1;
dotdotRec[33] = 1;
dotdotRec.copy(rootDir, offset);
offset += 34;
// EFI boot image file entry (the FAT image visible as a file)
const efiFileName = "EFI.IMG;1";
const efiRec = Buffer.alloc(33 + efiFileName.length + ((efiFileName.length % 2 === 0) ? 1 : 0), 0);
efiRec[0] = efiRec.length;
u32both(efiImageSector).copy(efiRec, 2);
u32both(fatImage.length).copy(efiRec, 10);
dirRecordDate(now).copy(efiRec, 18);
efiRec[25] = 0x00; // flags: file
u16both(1).copy(efiRec, 28);
efiRec[32] = efiFileName.length;
efiRec.write(efiFileName, 33, efiFileName.length, "ascii");
efiRec.copy(rootDir, offset);
offset += efiRec.length;
// Boot catalog file entry
const catFileName = "BOOT.CAT;1";
const catRec = Buffer.alloc(33 + catFileName.length + ((catFileName.length % 2 === 0) ? 1 : 0), 0);
catRec[0] = catRec.length;
u32both(bootCatalogSector).copy(catRec, 2);
u32both(SECTOR_SIZE).copy(catRec, 10);
dirRecordDate(now).copy(catRec, 18);
catRec[25] = 0x01; // flags: hidden
u16both(1).copy(catRec, 28);
catRec[32] = catFileName.length;
catRec.write(catFileName, 33, catFileName.length, "ascii");
catRec.copy(rootDir, offset);
// --- El Torito Boot Catalog (sector 20) ---
const catalog = iso.subarray(bootCatalogSector * SECTOR_SIZE, (bootCatalogSector + 1) * SECTOR_SIZE);
// Validation entry (32 bytes)
catalog[0] = 1; // header ID
catalog[1] = 0xEF; // platform: EFI
catalog.write("LABCTL", 4, 24, "ascii"); // ID string
// Calculate checksum for validation entry
let cksum = 0;
for (let i = 0; i < 32; i += 2) {
cksum += catalog[i]! + (catalog[i + 1]! << 8);
}
catalog.writeUInt16LE((0x10000 - (cksum & 0xFFFF)) & 0xFFFF, 28); // checksum
catalog[30] = 0x55;
catalog[31] = 0xAA;
// Default/Initial entry (32 bytes, offset 32)
catalog[32] = 0x88; // bootable
catalog[33] = 0xEF; // type: EFI
catalog.writeUInt16LE(0, 34); // load segment
catalog[36] = 0; // system type
const efiImageSectors512 = Math.ceil(fatImage.length / FAT_SECTOR_SIZE);
catalog.writeUInt16LE(efiImageSectors512 & 0xFFFF, 38); // sector count
catalog.writeUInt32LE(efiImageSector, 40); // load LBA
// --- EFI boot image (FAT filesystem, starting at sector 21) ---
fatImage.copy(iso, efiImageSector * SECTOR_SIZE);
return iso;
}
/** Build a ready-to-serve iPXE boot ISO from system iPXE binaries. */
export function buildBastionBootIso(bastionUrl: string): Buffer {
const efiFiles: Array<{ path: string; data: Buffer }> = [];
const PATHS: Record<string, { src: string; dest: string }> = {
x86_64: { src: "/usr/share/ipxe/ipxe-snponly-x86_64.efi", dest: "EFI/BOOT/BOOTX64.EFI" },
aarch64: { src: "/usr/share/ipxe/arm64-efi/snponly.efi", dest: "EFI/BOOT/BOOTAA64.EFI" },
};
for (const [, paths] of Object.entries(PATHS)) {
try {
efiFiles.push({ path: paths.dest, data: readFileSync(paths.src) });
} catch {
// Architecture not available, skip
}
}
if (efiFiles.length === 0) {
throw new Error("No iPXE EFI binaries found on system");
}
const script = [
"#!ipxe",
"",
"echo Booting from iPXE ISO -- connecting to bastion...",
"dhcp || ( echo DHCP failed, retrying... && sleep 3 && dhcp )",
`chain ${bastionUrl}/boot.ipxe || shell`,
].join("\n");
return buildBootIso(efiFiles, script);
}

View File

@@ -0,0 +1,45 @@
// Generate kickstart content for discovery and install modes.
// Uses template literal functions -- no external template engine.
import type { BastionConfig, Role } from "@lab/shared";
import { renderDiscoverKickstart } from "../templates/discover.ks.js";
import { renderInstallKickstart, type InstallKickstartParams } from "../templates/install.ks.js";
/**
* Generate a discovery kickstart that collects hardware info and POSTs to bastion.
*/
export function generateDiscoverKickstart(config: BastionConfig): string {
return renderDiscoverKickstart({
serverIp: config.serverIp,
httpPort: config.httpPort,
});
}
/**
* Generate an install kickstart with LVM partitioning, packages, and post-install configuration.
*/
export function generateInstallKickstart(
config: BastionConfig,
params: {
hostname: string;
disk: string;
role: Role;
},
): string {
const ksParams: InstallKickstartParams = {
hostname: params.hostname,
disk: params.disk,
role: params.role,
domain: config.domain,
fedoraVersion: config.fedoraVersion,
timezone: config.timezone,
locale: config.locale,
serverIp: config.serverIp,
httpPort: config.httpPort,
syslogPort: config.syslogPort,
sshKeys: config.sshKeys,
adminUser: config.adminUser,
};
return renderInstallKickstart(ksParams);
}

View File

@@ -0,0 +1,252 @@
// WebSocket connection from bastion to labd for registration and state sync.
// If LABD_URL is configured, bastion registers with labd on startup and pushes
// state changes. If not configured, bastion runs standalone (backward compatible).
import WebSocket from "ws";
import { readFileSync, writeFileSync, existsSync } from "node:fs";
import { hostname as osHostname } from "node:os";
import type { BastionState, BastionConfig } from "@lab/shared";
import {
type BastionMessage,
type LabdBastionMessage,
isLabdBastionMessage,
} from "@lab/shared";
import { logger } from "./logger.js";
const HEARTBEAT_INTERVAL_MS = 10_000;
const RECONNECT_BASE_DELAY_MS = 1_000;
const RECONNECT_MAX_DELAY_MS = 30_000;
type CommandHandler = (msg: LabdBastionMessage) => Promise<{ status: "ok" | "error"; data?: unknown; error?: string }>;
export class BastionConnection {
private ws: WebSocket | null = null;
private bastionId: string | null = null;
private heartbeatTimer: NodeJS.Timeout | null = null;
private reconnectTimer: NodeJS.Timeout | null = null;
private retryCount = 0;
private closed = false;
private startTime = Date.now();
private commandHandlers = new Map<string, CommandHandler>();
constructor(
private readonly config: BastionConfig,
private readonly getState: () => BastionState,
) {
// Load persisted bastionId if we've enrolled before
const idFile = `${config.bastionDir}/bastion-id`;
if (existsSync(idFile)) {
this.bastionId = readFileSync(idFile, "utf-8").trim();
}
}
/** Register a handler for incoming commands from labd. */
onCommand(type: string, handler: CommandHandler): void {
this.commandHandlers.set(type, handler);
}
connect(): void {
if (this.closed) return;
if (!this.config.labdUrl) return;
const wsUrl = this.config.labdUrl
.replace(/^https:/, "wss:")
.replace(/^http:/, "ws:");
const token = this.config.bastionJoinToken ?? "";
const url = `${wsUrl}/ws/bastion?token=${encodeURIComponent(token)}`;
logger.info(`Connecting to labd at ${this.config.labdUrl}...`);
this.ws = new WebSocket(url);
this.ws.on("open", () => {
logger.info("Connected to labd");
this.retryCount = 0;
// Send enrollment or re-registration
if (this.bastionId) {
// Already enrolled — send state sync immediately
this.sendStateSync();
} else {
// First time — enroll
this.send({
type: "bastion-enroll",
token,
hostname: osHostname(),
network: this.config.network,
serverIp: this.config.serverIp,
});
}
this.startHeartbeat();
});
this.ws.on("message", (data: WebSocket.Data) => {
try {
const raw = data.toString();
const msg: unknown = JSON.parse(raw);
if (!isLabdBastionMessage(msg)) {
logger.warn(`Unknown message from labd: ${(msg as { type?: string }).type}`);
return;
}
this.handleMessage(msg);
} catch (err) {
logger.error(`Failed to parse labd message: ${err instanceof Error ? err.message : String(err)}`);
}
});
this.ws.on("close", () => {
logger.warn("Disconnected from labd");
this.stopHeartbeat();
this.scheduleReconnect();
});
this.ws.on("error", (err) => {
logger.error(`WebSocket error: ${err.message}`);
// close event will fire after this, triggering reconnect
});
}
/** Push current state to labd. Call this after any state change. */
syncState(): void {
if (!this.bastionId || !this.ws || this.ws.readyState !== WebSocket.OPEN) return;
this.sendStateSync();
}
/** Forward a progress event to labd. */
sendProgress(mac: string, stage: string, detail: string): void {
if (!this.bastionId || !this.ws || this.ws.readyState !== WebSocket.OPEN) return;
this.send({
type: "bastion-progress",
bastionId: this.bastionId,
mac,
stage,
detail,
timestamp: new Date().toISOString(),
});
}
close(): void {
this.closed = true;
this.stopHeartbeat();
if (this.reconnectTimer) {
clearTimeout(this.reconnectTimer);
this.reconnectTimer = null;
}
if (this.ws) {
this.ws.close();
this.ws = null;
}
}
private handleMessage(msg: LabdBastionMessage): void {
switch (msg.type) {
case "bastion-enrolled":
this.bastionId = msg.bastionId;
// Persist for reconnects
writeFileSync(`${this.config.bastionDir}/bastion-id`, msg.bastionId);
logger.info(`Enrolled with labd as bastion ${msg.bastionId}`);
// Send initial state
this.sendStateSync();
break;
case "bastion-heartbeat-ack":
// No-op, confirms labd is alive
break;
case "server-shutdown":
logger.info(`labd shutting down, will reconnect in ${msg.reconnectAfter}ms`);
break;
case "command-install":
case "command-forget":
case "command-role-update":
void this.handleCommand(msg);
break;
}
}
private async handleCommand(msg: LabdBastionMessage & { requestId: string }): Promise<void> {
const handler = this.commandHandlers.get(msg.type);
if (!handler) {
this.send({
type: "command-response",
requestId: msg.requestId,
status: "error",
error: `No handler for command: ${msg.type}`,
});
return;
}
try {
const result = await handler(msg);
this.send({
type: "command-response",
requestId: msg.requestId,
...result,
});
} catch (err) {
this.send({
type: "command-response",
requestId: msg.requestId,
status: "error",
error: err instanceof Error ? err.message : String(err),
});
}
}
private sendStateSync(): void {
if (!this.bastionId) return;
this.send({
type: "bastion-state-sync",
bastionId: this.bastionId,
state: this.getState(),
});
}
private startHeartbeat(): void {
this.stopHeartbeat();
this.heartbeatTimer = setInterval(() => {
if (!this.bastionId) return;
const state = this.getState();
const machineCount =
Object.keys(state.discovered).length +
Object.keys(state.install_queue).length +
Object.keys(state.installed).length;
this.send({
type: "bastion-heartbeat",
bastionId: this.bastionId,
uptime: Math.floor((Date.now() - this.startTime) / 1000),
machineCount,
});
}, HEARTBEAT_INTERVAL_MS);
}
private stopHeartbeat(): void {
if (this.heartbeatTimer) {
clearInterval(this.heartbeatTimer);
this.heartbeatTimer = null;
}
}
private scheduleReconnect(): void {
if (this.closed) return;
const delay = Math.min(
RECONNECT_BASE_DELAY_MS * Math.pow(2, this.retryCount),
RECONNECT_MAX_DELAY_MS,
);
this.retryCount++;
logger.info(`Reconnecting to labd in ${delay}ms (attempt ${this.retryCount})...`);
this.reconnectTimer = setTimeout(() => this.connect(), delay);
}
private send(msg: BastionMessage): void {
if (this.ws && this.ws.readyState === WebSocket.OPEN) {
this.ws.send(JSON.stringify(msg));
}
}
}

View File

@@ -0,0 +1,17 @@
// Winston logger instance shared across the bastion application.
import winston from "winston";
export const logger = winston.createLogger({
level: "info",
format: winston.format.combine(
winston.format.timestamp({ format: "HH:mm:ss" }),
winston.format.printf(({ timestamp, level, message }) => {
const prefix = level === "error" ? "\x1b[31m[bastion]\x1b[0m"
: level === "warn" ? "\x1b[33m[bastion]\x1b[0m"
: "\x1b[32m[bastion]\x1b[0m";
return `${prefix} ${timestamp as string} ${message as string}`;
}),
),
transports: [new winston.transports.Console()],
});

View File

@@ -0,0 +1,166 @@
// Auto-detect network interface, IP, gateway, SSH keys, and admin user.
import { execSync } from "node:child_process";
import { readFileSync, existsSync, mkdirSync } from "node:fs";
import { homedir } from "node:os";
import { join } from "node:path";
import type { BastionConfig } from "@lab/shared";
import { logger } from "./logger.js";
/**
* Detect the default network interface from the routing table.
*/
export function detectInterface(): string {
const output = execSync("ip route", { encoding: "utf-8" });
const match = output.match(/default\s+.*\s+dev\s+(\S+)/);
const ifaceMatch = match?.[1];
if (ifaceMatch === undefined) {
throw new Error("Cannot detect default network interface");
}
return ifaceMatch;
}
/**
* Detect the IPv4 address on a given interface.
*/
export function detectIp(iface: string): string {
const output = execSync(`ip -4 addr show ${iface}`, { encoding: "utf-8" });
const match = output.match(/inet\s+(\d+\.\d+\.\d+\.\d+)/);
const ipMatch = match?.[1];
if (ipMatch === undefined) {
throw new Error(`Cannot detect IP on interface ${iface}`);
}
return ipMatch;
}
/**
* Derive the /24 network address from an IP.
*/
export function deriveNetwork(ip: string): string {
const parts = ip.split(".");
return `${parts[0]}.${parts[1]}.${parts[2]}.0`;
}
/**
* Detect the default gateway.
*/
export function detectGateway(): string {
const output = execSync("ip route", { encoding: "utf-8" });
const match = output.match(/default\s+via\s+(\S+)/);
const gwMatch = match?.[1];
if (gwMatch === undefined) {
throw new Error("Cannot detect default gateway");
}
return gwMatch;
}
/**
* Collect SSH public keys from the current user's SSH directory.
* Sources: authorized_keys, then id_ed25519.pub, id_rsa.pub, id_ecdsa.pub (deduplicated).
*/
export function collectSshKeys(bastionDir: string): { keys: string[]; source: string } {
const sudoUser = process.env["SUDO_USER"];
let realHome: string;
if (sudoUser !== undefined) {
const passwdEntry = execSync(`getent passwd ${sudoUser}`, { encoding: "utf-8" })
.split(":")[5]
?.trim();
realHome = passwdEntry !== undefined && passwdEntry !== "" ? passwdEntry : homedir();
} else {
realHome = homedir();
}
const keys: string[] = [];
const fingerprints = new Set<string>();
let source = "";
// Read authorized_keys
const authKeysPath = join(realHome, ".ssh", "authorized_keys");
if (existsSync(authKeysPath)) {
const content = readFileSync(authKeysPath, "utf-8");
for (const line of content.split("\n")) {
const trimmed = line.trim();
if (trimmed && !trimmed.startsWith("#")) {
const fp = trimmed.split(/\s+/)[1];
if (fp !== undefined && fp !== "" && !fingerprints.has(fp)) {
keys.push(trimmed);
fingerprints.add(fp);
}
}
}
source = authKeysPath;
}
// Also include local pubkey files
const pubKeyFiles = ["id_ed25519.pub", "id_rsa.pub", "id_ecdsa.pub"];
for (const keyFile of pubKeyFiles) {
const keyPath = join(realHome, ".ssh", keyFile);
if (existsSync(keyPath)) {
const keyData = readFileSync(keyPath, "utf-8").trim();
const fp = keyData.split(/\s+/)[1];
if (fp !== undefined && fp !== "" && !fingerprints.has(fp)) {
keys.push(keyData);
fingerprints.add(fp);
source = source ? `${source} + ${keyPath}` : keyPath;
}
}
}
// Generate a keypair if no keys found
if (keys.length === 0) {
const generatedKey = join(bastionDir, "bastion_ed25519");
if (!existsSync(generatedKey)) {
mkdirSync(bastionDir, { recursive: true });
logger.warn("No SSH keys found -- generating ed25519 keypair...");
execSync(`ssh-keygen -t ed25519 -f "${generatedKey}" -N "" -C "bastion-generated@$(hostname)"`, {
encoding: "utf-8",
stdio: "pipe",
});
}
const pubKey = readFileSync(`${generatedKey}.pub`, "utf-8").trim();
keys.push(pubKey);
source = `${generatedKey} (generated)`;
logger.warn(`Using generated keypair: ${generatedKey}`);
logger.warn("Save this private key -- it is the only way to access installed machines.");
}
return { keys, source };
}
/**
* Detect the admin username (SUDO_USER or current user, excluding root).
*/
export function detectAdminUser(): string {
const user = process.env["SUDO_USER"] ?? process.env["USER"] ?? "";
return user === "root" ? "" : user;
}
/**
* Populate runtime network config fields on the config object.
*/
export function populateNetworkConfig(config: BastionConfig): BastionConfig {
const iface = config.iface !== "" ? config.iface : detectInterface();
const serverIp = config.serverIp !== "" ? config.serverIp : detectIp(iface);
const network = config.network !== "" ? config.network : deriveNetwork(serverIp);
const gateway = config.gateway !== "" ? config.gateway : detectGateway();
const { keys: sshKeys, source: sshSource } = config.sshKeys.length > 0
? { keys: config.sshKeys, source: "config" }
: collectSshKeys(config.bastionDir);
const adminUser = config.adminUser !== "" ? config.adminUser : detectAdminUser();
logger.info(`Interface: ${iface} IP: ${serverIp} Network: ${network}`);
logger.info(`SSH keys: ${sshKeys.length} key(s) from ${sshSource}`);
if (adminUser !== "") {
logger.info(`Admin user: ${adminUser} (will be created on installed machines)`);
}
return {
...config,
iface,
serverIp,
network,
gateway,
sshKeys,
adminUser,
};
}

View File

@@ -0,0 +1,233 @@
// Post-provision automation: installs k3s after OS provisioning completes.
// Runs asynchronously — does not block the progress callback.
import { spawn } from "node:child_process";
import { existsSync } from "node:fs";
import { homedir } from "node:os";
import { join } from "node:path";
import { logger } from "./logger.js";
import { progressBus } from "./progress-events.js";
function findSshKey(): string | undefined {
const sudoUser = process.env["SUDO_USER"];
const realHome = sudoUser ? join("/home", sudoUser) : homedir();
for (const name of ["id_ed25519", "id_ecdsa", "id_rsa"]) {
const p = join(realHome, ".ssh", name);
if (existsSync(p)) return p;
}
return undefined;
}
/** Wait for SSH to become available, with retries. */
async function waitForSsh(ip: string, user: string, keyPath: string | undefined, timeoutMs: number): Promise<boolean> {
const start = Date.now();
while (Date.now() - start < timeoutMs) {
try {
const result = await sshExec(ip, user, "echo ok", keyPath);
if (result.includes("ok")) return true;
} catch { /* retry */ }
await new Promise((r) => setTimeout(r, 5000));
}
return false;
}
function sshExec(ip: string, user: string, command: string, keyPath: string | undefined): Promise<string> {
return new Promise((resolve, reject) => {
const args = [
"-o", "StrictHostKeyChecking=no",
"-o", "ConnectTimeout=10",
"-o", "BatchMode=yes",
...(keyPath ? ["-i", keyPath] : []),
`${user}@${ip}`,
command,
];
const proc = spawn("ssh", args, { stdio: ["ignore", "pipe", "pipe"] });
let stdout = "";
proc.stdout.on("data", (d: Buffer) => { stdout += d.toString(); });
proc.on("close", (code) => {
if (code === 0) resolve(stdout);
else reject(new Error(`SSH exit ${code}`));
});
proc.on("error", reject);
});
}
function sshRunStreaming(ip: string, user: string, command: string, keyPath: string | undefined, label: string, mac?: string): Promise<number> {
return new Promise((resolve) => {
const args = [
"-o", "StrictHostKeyChecking=no",
"-o", "ConnectTimeout=10",
"-o", "BatchMode=yes",
...(keyPath ? ["-i", keyPath] : []),
`${user}@${ip}`,
command,
];
const proc = spawn("ssh", args, { stdio: ["ignore", "pipe", "pipe"] });
proc.stdout.on("data", (d: Buffer) => {
for (const line of d.toString().split("\n").filter(Boolean)) {
logger.info(`[k3s:${label}] ${line}`);
if (mac) {
progressBus.emit({ mac, hostname: label, stage: "log", detail: `[k3s] ${line}`, timestamp: new Date().toISOString() });
}
}
});
proc.stderr.on("data", (d: Buffer) => {
for (const line of d.toString().split("\n").filter(Boolean)) {
logger.info(`[k3s:${label}] ${line}`);
if (mac) {
progressBus.emit({ mac, hostname: label, stage: "log", detail: `[k3s] ${line}`, timestamp: new Date().toISOString() });
}
}
});
proc.on("close", (code) => resolve(code ?? 1));
proc.on("error", () => resolve(1));
});
}
/**
* Trigger k3s installation on a freshly provisioned machine.
* Runs in the background — logs progress to bastion console and progressBus.
*/
export async function triggerPostProvisionK3s(
hostname: string,
ip: string,
role: string,
sshUser: string,
mac?: string,
): Promise<void> {
const keyPath = findSshKey();
const emitStage = (stage: string, detail: string): void => {
logger.info(`[k3s] ${detail}`);
if (mac) {
progressBus.emit({ mac, hostname, stage, detail, timestamp: new Date().toISOString() });
}
};
emitStage("post-provision", `auto-installing k3s on ${hostname} (${ip}) role=${role}`);
emitStage("post-provision", "waiting for SSH (machine may still be rebooting)");
// Wait up to 5 minutes for SSH (machine just finished kickstart and is rebooting)
const sshReady = await waitForSsh(ip, sshUser, keyPath, 300_000);
if (!sshReady) {
emitStage("error", `SSH not available on ${hostname} (${ip}) after 5 minutes`);
logger.error(`[k3s] Run manually: labctl app k3s install ${hostname}`);
return;
}
emitStage("post-provision", "SSH ready, installing k3s prerequisites");
// Step 1: Prerequisites
await sshRunStreaming(ip, sshUser, "sudo modprobe br_netfilter overlay 2>/dev/null; sudo swapoff -a", keyPath, hostname, mac);
// Step 2: Sysctl
emitStage("post-provision", "configuring sysctl for k3s");
await sshRunStreaming(ip, sshUser, `sudo bash -c 'cat > /etc/sysctl.d/90-k3s.conf << EOF
net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
net.ipv4.ip_forward=1
vm.panic_on_oom=0
vm.overcommit_memory=1
kernel.panic=10
kernel.panic_on_oops=1
EOF
sysctl --system > /dev/null'`, keyPath, hostname, mac);
// Step 3: SELinux + firewalld + stale CNI cleanup
emitStage("post-provision", "disabling firewalld and cleaning stale CNI");
await sshRunStreaming(ip, sshUser, [
"sudo setenforce 0 2>/dev/null || true",
"sudo systemctl disable --now firewalld 2>/dev/null || true",
"sudo systemctl mask firewalld 2>/dev/null || true",
// Clean stale CNI interfaces that conflict with Cilium (flannel.1 uses same vxlan port 8472)
"sudo systemctl stop k3s 2>/dev/null || true",
"sudo ip link delete flannel.1 2>/dev/null || true",
"sudo ip link delete cilium_vxlan 2>/dev/null || true",
"sudo ip link delete cilium_host 2>/dev/null || true",
"sudo ip link delete cilium_net 2>/dev/null || true",
"sudo rm -rf /etc/cni/net.d/* /var/lib/cni/ 2>/dev/null || true",
].join("; "), keyPath, hostname, mac);
// Step 4: Install k3s
// labcontroller extends infra — both are k3s servers
const k3sRole = (role === "infra" || role === "labcontroller") ? "server" : "agent";
emitStage("post-provision", `installing k3s ${k3sRole}`);
const code = await sshRunStreaming(ip, sshUser,
`curl -sfL https://get.k3s.io | sudo INSTALL_K3S_EXEC="${k3sRole}" INSTALL_K3S_SKIP_SELINUX_RPM=true sh -`,
keyPath, hostname, mac,
);
if (code !== 0) {
emitStage("error", `k3s install failed on ${hostname} (exit ${code})`);
logger.error(`[k3s] Run manually: labctl app k3s install ${hostname}`);
return;
}
// Step 5: Wait for ready
emitStage("post-provision", "waiting for k3s node to become Ready");
await sshRunStreaming(ip, sshUser,
"for i in $(seq 1 60); do sudo k3s kubectl get nodes 2>/dev/null | grep -q Ready && break; sleep 2; done",
keyPath, hostname, mac,
);
emitStage("post-provision", `k3s ${k3sRole} installed on ${hostname} (${ip})`);
// Step 6: Deploy role-specific apps from ROLE_REGISTRY chain
const { ROLE_REGISTRY } = await import("@lab/shared");
const roleInfo = ROLE_REGISTRY.find((r: { name: string }) => r.name === role);
if (roleInfo && roleInfo.apps.length > 0) {
emitStage("post-provision", `deploying apps: ${roleInfo.apps.join(", ")}`);
if (roleInfo.apps.includes("cockroachdb") || roleInfo.apps.includes("labd") || roleInfo.apps.includes("bastion")) {
// This is a labcontroller — deploy the full stack
emitStage("post-provision", `deploying labcontroller stack on ${hostname}`);
try {
const { cockroachDbManifests } = await import("@lab/modules/dist/modules/labcontroller/src/cockroachdb.js");
const { labdManifests } = await import("@lab/modules/dist/modules/labcontroller/src/labd.js");
const { bastionManifests } = await import("@lab/modules/dist/modules/labcontroller/src/bastion.js");
const crdb = cockroachDbManifests();
const labd = labdManifests({ databaseUrl: crdb.connectionString });
const bastion = bastionManifests();
const manifests = [
crdb.namespace, crdb.headlessService, crdb.clientService, crdb.statefulSet,
labd.service, labd.deployment,
bastion.daemonSet,
];
for (const manifest of manifests) {
const json = JSON.stringify(manifest);
const kind = (manifest as { kind?: string }).kind ?? "?";
const name = ((manifest as { metadata?: { name?: string } }).metadata)?.name ?? "?";
const result = await sshRunStreaming(ip, sshUser,
`echo '${json.replace(/'/g, "'\\''")}' | sudo k3s kubectl apply -f -`,
keyPath, hostname, mac,
);
if (result === 0) {
emitStage("post-provision", `applied ${kind}/${name}`);
} else {
emitStage("error", `failed to apply ${kind}/${name}`);
}
}
// Init CockroachDB
const initJson = JSON.stringify(crdb.initJob);
await sshRunStreaming(ip, sshUser,
`echo '${initJson.replace(/'/g, "'\\''")}' | sudo k3s kubectl apply -f - 2>/dev/null; sleep 30; sudo k3s kubectl exec cockroachdb-0 -n lab-system -- /cockroach/cockroach sql --insecure -e 'CREATE DATABASE IF NOT EXISTS lab' 2>/dev/null || true`,
keyPath, hostname, mac,
);
emitStage("post-provision", `labcontroller stack deployed on ${hostname}`);
} catch (err) {
const errMsg = err instanceof Error ? err.message : String(err);
emitStage("error", `failed to deploy labcontroller stack: ${errMsg}`);
logger.error(`[post-provision] Run manually: labctl app labcontroller deploy ${hostname}`);
}
}
}
emitStage("post-provision", `${hostname} (${ip}) provisioning complete (role: ${role})`);
}

View File

@@ -0,0 +1,28 @@
// In-memory event bus for provision progress updates.
// Allows SSE clients to subscribe to real-time progress and log lines.
import { EventEmitter } from "node:events";
export interface ProgressEvent {
mac: string;
hostname: string;
/** "log" for raw log lines, anything else is a progress stage name */
stage: string;
detail: string;
timestamp: string;
}
// Simple typed wrapper around EventEmitter for progress events.
const _bus = new EventEmitter();
export const progressBus = {
emit(event: ProgressEvent): void {
_bus.emit("progress", event);
},
on(listener: (event: ProgressEvent) => void): void {
_bus.on("progress", listener);
},
off(listener: (event: ProgressEvent) => void): void {
_bus.off("progress", listener);
},
};

View File

@@ -0,0 +1,69 @@
// JSON file-backed state management for discovered machines, install queue, and installed machines.
import { readFileSync, writeFileSync, renameSync, mkdirSync } from "node:fs";
import { dirname } from "node:path";
import type { BastionState } from "@lab/shared";
// Re-export types for consumers that import from this module
export type { HardwareInfo, InstallConfig, InstalledInfo, BastionState } from "@lab/shared";
const EMPTY_STATE: BastionState = {
discovered: {},
install_queue: {},
installed: {},
};
export type StateChangeListener = (state: BastionState) => void;
export class StateManager {
private changeListeners: StateChangeListener[] = [];
constructor(private readonly stateFile: string) {}
/** Register a listener that fires after every state update. */
onChange(listener: StateChangeListener): void {
this.changeListeners.push(listener);
}
load(): BastionState {
try {
const raw = readFileSync(this.stateFile, "utf-8");
const parsed = JSON.parse(raw) as Partial<BastionState>;
return {
discovered: parsed.discovered ?? {},
install_queue: parsed.install_queue ?? {},
installed: parsed.installed ?? {},
};
} catch {
return { ...EMPTY_STATE };
}
}
save(state: BastionState): void {
mkdirSync(dirname(this.stateFile), { recursive: true });
const tmp = `${this.stateFile}.tmp`;
writeFileSync(tmp, JSON.stringify(state, null, 2));
renameSync(tmp, this.stateFile);
}
init(): void {
try {
readFileSync(this.stateFile, "utf-8");
} catch {
this.save({ ...EMPTY_STATE });
}
}
/**
* Atomically read, modify, and write state.
*/
update(fn: (state: BastionState) => void): BastionState {
const state = this.load();
fn(state);
this.save(state);
for (const listener of this.changeListeners) {
try { listener(state); } catch { /* don't let listener errors break state updates */ }
}
return state;
}
}

View File

@@ -0,0 +1,99 @@
// UDP syslog listener for receiving Anaconda install logs.
// Anaconda's `logging --host` sends RFC 3164 syslog over UDP.
// We parse the messages and route them to InstallLogBuffer.
import { createSocket, type Socket } from "node:dgram";
import type { InstallLogBuffer } from "./install-log.js";
import type { StateManager } from "./state.js";
import { logger } from "./logger.js";
/**
* Parse a BSD syslog (RFC 3164) message.
* Format: <PRI>TIMESTAMP HOSTNAME APP[PID]: MESSAGE
* Anaconda messages look like: <13>Mar 28 19:32:01 anaconda[1234]: some message
*/
function parseSyslogLine(raw: string): { program: string; message: string } {
// Strip priority: <NN>
const noPri = raw.replace(/^<\d+>/, "");
// Try to extract program and message after the timestamp + hostname
// RFC 3164: "Mon DD HH:MM:SS HOSTNAME PROGRAM[PID]: MESSAGE"
const match = noPri.match(/^\w+\s+\d+\s+[\d:]+\s+\S+\s+(\S+?)(?:\[\d+\])?:\s*(.*)/);
if (match) {
return { program: match[1], message: match[2] };
}
// Fallback: just return the whole line
return { program: "unknown", message: noPri.trim() };
}
export class SyslogListener {
private socket: Socket | null = null;
private port: number;
private installLog: InstallLogBuffer;
private state: StateManager;
constructor(port: number, installLog: InstallLogBuffer, state: StateManager) {
this.port = port;
this.installLog = installLog;
this.state = state;
}
/** Resolve a source IP to a MAC address using the install queue. */
private resolveIpToMac(ip: string): string | null {
const currentState = this.state.load();
// Check install queue — machines being installed have an IP from DHCP
for (const [mac, entry] of Object.entries(currentState.install_queue)) {
// The progress callback sends IP in "complete" detail, but during install
// we need to match by what we know. Check if any progress mentions this IP.
if (entry.progress_detail?.includes(ip)) return mac;
}
// Check installed machines
for (const [mac, info] of Object.entries(currentState.installed)) {
if (info.ip === ip) return mac;
}
return null;
}
/** Resolve a MAC to the hostname from install queue or installed state. */
private resolveHostname(mac: string): string {
const s = this.state.load();
return s.install_queue[mac]?.hostname ?? s.installed[mac]?.hostname ?? mac;
}
start(): void {
this.socket = createSocket("udp4");
this.socket.on("message", (msg, rinfo) => {
const raw = msg.toString("utf-8").trim();
if (!raw) return;
const { program, message } = parseSyslogLine(raw);
const mac = this.resolveIpToMac(rinfo.address);
if (mac) {
const hostname = this.resolveHostname(mac);
const line = program !== "unknown" ? `[${program}] ${message}` : message;
this.installLog.append(mac, [line], hostname);
}
// If we can't resolve the IP, we still log it for debugging
// but don't store it in the install log buffer
});
this.socket.on("error", (err) => {
logger.error(`Syslog listener error: ${err.message}`);
});
this.socket.bind(this.port, "0.0.0.0", () => {
logger.info(`Syslog listener on UDP :${this.port}`);
});
}
stop(): void {
if (this.socket) {
this.socket.close();
this.socket = null;
}
}
}

View File

@@ -0,0 +1,93 @@
// iPXE boot script templates for dispatch routing.
export interface BootIpxeParams {
serverIp: string;
httpPort: number;
}
/**
* Initial iPXE boot script that chains to the dispatch endpoint.
* This is what dnsmasq serves to iPXE clients via HTTP.
*/
export function renderBootIpxe(params: BootIpxeParams): string {
return `#!ipxe
echo
echo ============================================
echo Lab PXE Bastion
echo Contacting server for instructions...
echo ============================================
echo
chain http://${params.serverIp}:${params.httpPort}/dispatch?mac=\${net0/mac}
`;
}
/**
* iPXE script for discovery mode -- boots Fedora installer with discovery kickstart.
*/
export function renderDiscoverIpxe(params: {
mac: string;
serverIp: string;
httpPort: number;
fedoraMirror: string;
}): string {
return `#!ipxe
echo
echo =============================================
echo Lab PXE Bastion - DISCOVERY MODE
echo MAC: ${params.mac}
echo Collecting hardware info...
echo =============================================
echo
kernel http://${params.serverIp}:${params.httpPort}/vmlinuz inst.ks=http://${params.serverIp}:${params.httpPort}/discover.ks inst.stage2=${params.fedoraMirror} inst.text console=ttyS0,115200n8 console=tty0
initrd http://${params.serverIp}:${params.httpPort}/initrd.img
boot
`;
}
/**
* iPXE script for install mode -- boots Fedora installer with per-MAC kickstart.
*/
export function renderInstallIpxe(params: {
mac: string;
hostname: string;
serverIp: string;
httpPort: number;
fedoraVersion: string;
fedoraMirror: string;
}): string {
return `#!ipxe
echo
echo =============================================
echo Lab PXE Bastion - INSTALLING Fedora ${params.fedoraVersion}
echo Target: ${params.hostname}
echo MAC: ${params.mac}
echo =============================================
echo
kernel http://${params.serverIp}:${params.httpPort}/vmlinuz inst.ks=http://${params.serverIp}:${params.httpPort}/ks?mac=${params.mac} inst.repo=${params.fedoraMirror} inst.text console=ttyS0,115200n8 console=tty0
initrd http://${params.serverIp}:${params.httpPort}/initrd.img
boot
`;
}
/**
* iPXE script for already-installed machines -- exits to boot from local disk.
*/
export function renderLocalBootIpxe(hostname: string): string {
return `#!ipxe
echo
echo =============================================
echo Lab PXE Bastion - ${hostname}
echo Already installed, booting from local disk
echo =============================================
echo
sleep 3
exit 1
`;
}

View File

@@ -0,0 +1,118 @@
// Discovery kickstart template.
// Boots Fedora installer, collects hardware info, POSTs to bastion, reboots.
// Never touches the disk.
export interface DiscoverKickstartParams {
serverIp: string;
httpPort: number;
}
export function renderDiscoverKickstart(params: DiscoverKickstartParams): string {
const bastionUrl = `http://${params.serverIp}:${params.httpPort}`;
return `# Lab Bastion -- Discovery Mode
# Collects hardware inventory and reboots. Does NOT install anything.
%pre --erroronfail --log=/tmp/discover.log
#!/bin/bash
set -x
# -- Collect hardware info from /proc, /sys, and available tools --
MAC=$(ip link show | awk '/ether/ && !/00:00:00:00/ {print $2; exit}')
PRODUCT=$(cat /sys/class/dmi/id/product_name 2>/dev/null || echo "unknown")
BOARD=$(cat /sys/class/dmi/id/board_name 2>/dev/null || echo "unknown")
SERIAL=$(cat /sys/class/dmi/id/product_serial 2>/dev/null || echo "unknown")
MANUFACTURER=$(cat /sys/class/dmi/id/sys_vendor 2>/dev/null || echo "unknown")
CPUMODEL=$(grep -m1 'model name' /proc/cpuinfo | cut -d: -f2 | sed 's/^ //')
CPUCORES=$(grep -c '^processor' /proc/cpuinfo)
MEMGB=$(awk '/MemTotal/ {printf "%d", $2/1024/1024}' /proc/meminfo)
ARCHTYPE=$(uname -m)
# Disk info
DISKS_JSON=$(lsblk -Jb -o NAME,SIZE,TYPE,MODEL 2>/dev/null | python3 -c "
import sys, json
data = json.load(sys.stdin)
disks = [d for d in data.get('blockdevices', []) if d.get('type') == 'disk']
result = []
for d in disks:
size_gb = round(int(d.get('size', 0)) / 1073741824, 1)
result.append({
'name': d.get('name', '?'),
'size_gb': size_gb,
'model': (d.get('model') or 'unknown').strip()
})
print(json.dumps(result))
" 2>/dev/null || echo '[]')
# Network interfaces
NICS_JSON=$(ip -j link show 2>/dev/null | python3 -c "
import sys, json
nics = json.load(sys.stdin)
result = []
for n in nics:
if n.get('link_type') == 'loopback':
continue
result.append({
'name': n.get('ifname', '?'),
'mac': n.get('address', '?'),
'state': n.get('operstate', '?')
})
print(json.dumps(result))
" 2>/dev/null || echo '[]')
# -- Build and POST discovery payload --
PAYLOAD=$(python3 -c "
import json
print(json.dumps({
'mac': '$MAC',
'product': '$PRODUCT',
'board': '$BOARD',
'serial': '$SERIAL',
'manufacturer': '$MANUFACTURER',
'cpu_model': '$CPUMODEL',
'cpu_cores': int('$CPUCORES' or 0),
'memory_gb': int('$MEMGB' or 0),
'arch': '$ARCHTYPE',
'disks': $DISKS_JSON,
'nics': $NICS_JSON
}))
")
# POST to bastion
BASTION_URL="${bastionUrl}/api/discover"
if command -v curl >/dev/null 2>&1; then
curl -sf -X POST "$BASTION_URL" \\
-H "Content-Type: application/json" \\
-d "$PAYLOAD" || true
else
python3 -c "
import urllib.request
req = urllib.request.Request('$BASTION_URL',
data=b'''$PAYLOAD''',
headers={'Content-Type': 'application/json'})
try:
urllib.request.urlopen(req, timeout=10)
except Exception as e:
print(f'POST failed: {e}')
"
fi
# -- Reboot -- do NOT let Anaconda proceed --
echo ""
echo "=== Discovery complete, rebooting ==="
echo ""
sleep 3
echo 1 > /proc/sys/kernel/sysrq
echo b > /proc/sysrq-trigger
sleep 5
reboot -f
%end
# Anaconda should never get here, but just in case:
reboot
`;
}

View File

@@ -0,0 +1,97 @@
// dnsmasq configuration template.
// Supports proxy DHCP mode (alongside existing DHCP) and full DHCP mode.
// Handles UEFI HTTP Boot, iPXE chainloading, and PXE service directives.
import type { BastionConfig } from "@lab/shared";
export function renderDnsmasqConf(config: BastionConfig): string {
const {
iface,
serverIp,
httpPort,
network,
gateway,
dhcpMode,
tftpDir,
} = config;
// Derive DHCP range for full mode
let dhcpRangeStart = config.dhcpRangeStart;
let dhcpRangeEnd = config.dhcpRangeEnd;
if (dhcpMode === "full") {
const networkBase = network.replace(/\.0$/, "");
dhcpRangeStart = dhcpRangeStart || `${networkBase}.100`;
dhcpRangeEnd = dhcpRangeEnd || `${networkBase}.200`;
}
const dhcpSection = dhcpMode === "full"
? `# Full DHCP mode -- bastion is the only DHCP server on this network
dhcp-range=${dhcpRangeStart},${dhcpRangeEnd},255.255.255.0,12h
dhcp-option=3,${gateway}
dhcp-option=6,${gateway}`
: `# ProxyDHCP -- works alongside existing DHCP (UniFi etc)
dhcp-range=${network},proxy`;
return `# Lab PXE Bastion -- dnsmasq config
# Disable DNS (we only want DHCP/TFTP)
port=0
# Listen on the right interface
interface=${iface}
bind-dynamic
${dhcpSection}
# TFTP for initial PXE boot
enable-tftp
tftp-root=${tftpDir}
tftp-no-blocksize
# Detect client architecture -- PXE (TFTP) clients
dhcp-match=set:bios,option:client-arch,0
dhcp-match=set:efi-x86_64,option:client-arch,7
dhcp-match=set:efi-x86_64,option:client-arch,9
dhcp-match=set:efi-arm64,option:client-arch,11
# Detect client architecture -- UEFI HTTP Boot clients (no TFTP size limit)
dhcp-match=set:httpboot-x86_64,option:client-arch,16
dhcp-match=set:httpboot-arm64,option:client-arch,20
# Detect iPXE clients (already chainloaded)
dhcp-userclass=set:ipxe,iPXE
# UEFI HTTP Boot -> serve full iPXE EFI via HTTP (no TFTP size limit)
dhcp-boot=tag:httpboot-x86_64,http://${serverIp}:${httpPort}/ipxe.efi
dhcp-boot=tag:httpboot-arm64,http://${serverIp}:${httpPort}/ipxe-arm64.efi
# Echo vendor class back to HTTP Boot clients (required by UEFI HTTP Boot spec)
dhcp-option-force=tag:httpboot-x86_64,60,HTTPClient
dhcp-option-force=tag:httpboot-arm64,60,HTTPClient
# First PXE boot -> serve iPXE binary via TFTP (BIOS and UEFI fallback)
dhcp-boot=tag:bios,tag:!ipxe,undionly.kpxe
dhcp-boot=tag:efi-x86_64,tag:!ipxe,ipxe.efi
dhcp-boot=tag:efi-arm64,tag:!ipxe,ipxe-arm64.efi
# Echo vendor class back to PXE clients (OVMF requires this, real hardware usually doesn't)
dhcp-option-force=tag:efi-x86_64,60,PXEClient
dhcp-option-force=tag:efi-arm64,60,PXEClient
dhcp-option-force=tag:bios,60,PXEClient
# iPXE clients -> chain to boot script via HTTP
dhcp-boot=tag:ipxe,http://${serverIp}:${httpPort}/boot.ipxe
${dhcpMode === "proxy" ? `# PXE service directives (proxy DHCP needs these to respond on port 4011)
pxe-service=tag:!ipxe,x86PC,"PXE Boot",undionly.kpxe
pxe-service=tag:!ipxe,X86-64_EFI,"PXE Boot",ipxe.efi
pxe-service=tag:!ipxe,BC_EFI,"PXE Boot",ipxe.efi
pxe-service=tag:!ipxe,ARM64_EFI,"PXE Boot",ipxe-arm64.efi` : `# Full DHCP mode -- pxe-service directives omitted (they trigger PXE Boot Server
# Discovery protocol which some UEFI implementations don't support). The dhcp-boot
# directives above provide the boot filename directly in the DHCP offer.`}
# Lease file in bastion directory (avoid default /var/lib/dnsmasq which needs root)
dhcp-leasefile=${config.bastionDir}/dnsmasq.leases
# Verbose logging
log-dhcp
`;
}

View File

@@ -0,0 +1,427 @@
// Install kickstart template.
// Full Fedora server install with LVM partitioning, %pre for reprovision detection,
// packages, and %post with SSH keys, user creation, k3s prereqs, progress callbacks.
import type { Role } from "@lab/shared";
export interface InstallKickstartParams {
hostname: string;
disk: string;
role: Role;
domain: string;
fedoraVersion: string;
timezone: string;
locale: string;
serverIp: string;
httpPort: number;
syslogPort: number;
sshKeys: string[];
adminUser: string;
}
export function renderInstallKickstart(params: InstallKickstartParams): string {
const {
hostname,
disk,
role,
domain,
fedoraVersion,
timezone,
locale,
serverIp,
httpPort,
syslogPort,
sshKeys,
adminUser,
} = params;
const fqdn = domain ? `${hostname}.${domain}` : hostname;
const vg = "labvg";
const now = new Date().toISOString();
const hasLonghorn = role === "worker";
const hasRancher = role === "infra";
const isVanilla = role === "vanilla";
// -- Auth section --
// Always set a root password (for serial console debugging) + SSH keys
const auth = sshKeys.length > 0
? `rootpw --plaintext lab-root-pw\nsshkey --username=root "${sshKeys[0]}"`
: "rootpw --plaintext lab-root-pw";
// -- Admin user directive --
const userDirective = adminUser
? `user --name=${adminUser} --groups=wheel --lock`
: "";
// -- SSH keys for %post --
const allKeys = sshKeys.join("\n");
let sshPostBlock = "";
if (sshKeys.length > 0) {
sshPostBlock = `
# Set up SSH keys for root
mkdir -p /root/.ssh && chmod 700 /root/.ssh
cat > /root/.ssh/authorized_keys << 'SSHKEYS'
${allKeys}
SSHKEYS
chmod 600 /root/.ssh/authorized_keys`;
}
if (adminUser && sshKeys.length > 0) {
sshPostBlock += `
# Set up SSH keys for ${adminUser}
ADMIN_HOME=$(getent passwd ${adminUser} | cut -d: -f6)
mkdir -p "$ADMIN_HOME/.ssh" && chmod 700 "$ADMIN_HOME/.ssh"
cp /root/.ssh/authorized_keys "$ADMIN_HOME/.ssh/authorized_keys"
chown -R ${adminUser}:${adminUser} "$ADMIN_HOME/.ssh"
chmod 600 "$ADMIN_HOME/.ssh/authorized_keys"
# Fix SELinux contexts for SSH
restorecon -R /root/.ssh "$ADMIN_HOME/.ssh" 2>/dev/null || true
# Passwordless sudo for ${adminUser}
echo '${adminUser} ALL=(ALL) NOPASSWD: ALL' > /etc/sudoers.d/${adminUser}
chmod 440 /etc/sudoers.d/${adminUser}`;
}
// -- Disk detection --
const diskLine = disk
? `DISK="${disk}"`
: `DISK=""
for d in /dev/nvme0n1 /dev/sda /dev/vda; do
[ -b "$d" ] && { DISK="$(basename $d)"; break; }
done
[ -z "$DISK" ] && { echo "ERROR: no disk found"; exit 1; }`;
// -- Longhorn LV for fresh install --
const longhornFreshLine = hasLonghorn
? `logvol /var/lib/longhorn --vgname=${vg} --name=longhorn --fstype=xfs --grow --size=1`
: "";
// -- Rancher LV for fresh install (infra role) --
const rancherFreshLine = hasRancher
? `logvol /var/lib/rancher --vgname=${vg} --name=rancher --fstype=xfs --size=20480`
: "";
return `# Lab Bastion -- Fedora ${fedoraVersion} server install
# Generated: ${now}
# Target: ${fqdn} (role=${role})
text
reboot
lang ${locale}
keyboard uk
timezone ${timezone} --utc
network --bootproto=dhcp --activate --hostname=${fqdn}
${auth}
${userDirective}
bootloader --append="console=tty0 console=ttyS0,115200n8"
logging --host=${serverIp} --port=${syslogPort}
url --mirrorlist=https://mirrors.fedoraproject.org/mirrorlist?repo=fedora-$releasever&arch=$basearch
# Partitioning is generated dynamically by %pre (supports reprovision preservation)
%include /tmp/part.ks
%pre --log=/tmp/pre-partition.log
#!/bin/bash
set -x
# Progress callback helper
bastion_progress() {
local stage="$1" detail="\${2:-}"
local mac=$(ip link show | awk '/ether/ && !/00:00:00:00/ {print $2; exit}')
curl -sf -X POST "http://${serverIp}:${httpPort}/api/progress" \\
-H "Content-Type: application/json" \\
-d "{\\"mac\\":\\"$mac\\",\\"stage\\":\\"$stage\\",\\"detail\\":\\"$detail\\"}" 2>/dev/null || true
}
bastion_progress "partitioning" "detecting disk"
VG="${vg}"
${diskLine}
REPROVISION=no
# Check if VG exists (reprovision scenario)
if vgs $VG &>/dev/null; then
echo "=== Existing VG found - reprovision mode ==="
REPROVISION=yes
# Detect which data LVs to preserve
PRESERVE_LONGHORN=no; PRESERVE_SRV=no; PRESERVE_HOME=no; PRESERVE_RANCHER=no
lvs $VG/longhorn &>/dev/null && PRESERVE_LONGHORN=yes
lvs $VG/srv &>/dev/null && PRESERVE_SRV=yes
lvs $VG/home &>/dev/null && PRESERVE_HOME=yes
lvs $VG/rancher &>/dev/null && PRESERVE_RANCHER=yes
echo "Preserving: longhorn=$PRESERVE_LONGHORN srv=$PRESERVE_SRV home=$PRESERVE_HOME rancher=$PRESERVE_RANCHER"
# Remove only OS logical volumes (keep data LVs)
for lv in root var varlog swap; do
lvremove -f $VG/$lv 2>/dev/null || true
done
else
bastion_progress "partitioning" "fresh install on $DISK"
fi
if [ "$REPROVISION" = "yes" ]; then
# Find existing boot partitions by type
EFI_PART=$(blkid -t TYPE=vfat -o device /dev/\${DISK}* 2>/dev/null | head -1)
BOOT_PART=$(blkid -t TYPE=ext4 -o device /dev/\${DISK}* 2>/dev/null | head -1)
EFI_PART=\${EFI_PART:-/dev/\${DISK}1}
BOOT_PART=\${BOOT_PART:-/dev/\${DISK}2}
echo "Reusing EFI=$EFI_PART BOOT=$BOOT_PART"
# Build partition config reusing existing PV/VG
cat > /tmp/part.ks << PARTEOF
ignoredisk --only-use=$DISK
clearpart --none
part /boot/efi --onpart=$EFI_PART --fstype=efi
part /boot --onpart=$BOOT_PART --fstype=ext4
volgroup ${vg} --useexisting --noformat
logvol swap --vgname=${vg} --name=swap --fstype=swap --size=27648
logvol / --vgname=${vg} --name=root --fstype=xfs --size=33792
logvol /var --vgname=${vg} --name=var --fstype=xfs --size=102400
logvol /var/log --vgname=${vg} --name=varlog --fstype=xfs --size=10240
PARTEOF
# Preserve or recreate data LVs
if [ "$PRESERVE_HOME" = "yes" ]; then
echo "logvol /home --vgname=${vg} --name=home --useexisting --noformat" >> /tmp/part.ks
else
echo "logvol /home --vgname=${vg} --name=home --fstype=xfs --size=10240" >> /tmp/part.ks
fi
if [ "$PRESERVE_SRV" = "yes" ]; then
echo "logvol /srv --vgname=${vg} --name=srv --useexisting --noformat" >> /tmp/part.ks
else
echo "logvol /srv --vgname=${vg} --name=srv --fstype=xfs --size=20480" >> /tmp/part.ks
fi
if [ "$PRESERVE_LONGHORN" = "yes" ]; then
echo "logvol /var/lib/longhorn --vgname=${vg} --name=longhorn --useexisting --noformat" >> /tmp/part.ks
fi
if [ "$PRESERVE_RANCHER" = "yes" ]; then
echo "logvol /var/lib/rancher --vgname=${vg} --name=rancher --useexisting --noformat" >> /tmp/part.ks
fi
else
# Fresh install
cat > /tmp/part.ks << PARTEOF
ignoredisk --only-use=$DISK
clearpart --all --initlabel --drives=$DISK
part /boot/efi --fstype=efi --size=600 --ondisk=$DISK
part /boot --fstype=ext4 --size=3072 --ondisk=$DISK
part pv.01 --size=1 --grow --ondisk=$DISK
volgroup ${vg} pv.01
logvol swap --vgname=${vg} --name=swap --fstype=swap --size=27648
logvol / --vgname=${vg} --name=root --fstype=xfs --size=33792
logvol /var --vgname=${vg} --name=var --fstype=xfs --size=102400
logvol /var/log --vgname=${vg} --name=varlog --fstype=xfs --size=10240
logvol /home --vgname=${vg} --name=home --fstype=xfs --size=10240
logvol /srv --vgname=${vg} --name=srv --fstype=xfs --size=20480
${longhornFreshLine}
${rancherFreshLine}
PARTEOF
fi
echo "=== Generated partition config ==="
cat /tmp/part.ks
echo "==================================="
bastion_progress "partitioning" "disk layout ready"
%end
%packages
@core
openssh-server
vim-enhanced
tmux
git
curl
wget
python3
lshw
dmidecode
dnf-plugins-core
# Networking and diagnostics
NetworkManager
bind-utils
net-tools
iproute
iputils
traceroute
tcpdump
htop
iotop
strace
jq
${isVanilla ? "# vanilla role -- skipping k3s prerequisites" : `# k3s prerequisites
container-selinux
iptables-nft
nftables
policycoreutils-python-utils
chrony
tar
socat
conntrack-tools
ethtool`}
# Boot management
efibootmgr
# Puppet prerequisites
ruby
ruby-libs
# Exclude desktop
-@workstation-product
-@gnome-desktop
-gnome-shell
-gdm
-PackageKit
-PackageKit-glib
%end
%post --log=/root/bastion-post-install.log
#!/bin/bash
set -x
# Progress callback helper
bastion_progress() {
local stage="$1" detail="\${2:-}"
local mac=$(ip link show | awk '/ether/ && !/00:00:00:00/ {print $2; exit}')
curl -sf -X POST "http://${serverIp}:${httpPort}/api/progress" \\
-H "Content-Type: application/json" \\
-d "{\\"mac\\":\\"$mac\\",\\"stage\\":\\"$stage\\",\\"detail\\":\\"$detail\\"}" 2>/dev/null || true
}
# Send log lines to bastion
bastion_log() {
local line="$1"
local mac=$(ip link show | awk '/ether/ && !/00:00:00:00/ {print $2; exit}')
curl -sf -X POST "http://${serverIp}:${httpPort}/api/log" \\
-H "Content-Type: application/json" \\
-d "{\\"mac\\":\\"$mac\\",\\"line\\":\\"$(echo "$line" | sed 's/\\\\/\\\\\\\\/g; s/"/\\\\"/g')\\"}\" \\
--connect-timeout 5 --max-time 10 2>/dev/null || true
}
# Send an error stage to bastion
bastion_error() {
local detail="$1"
bastion_progress "error" "$detail"
}
# --- Error trap: catch any failure and report to bastion ---
_post_error_handler() {
local exit_code=$? lineno=$1
bastion_error "%post failed at line $lineno (exit $exit_code)"
}
trap '_post_error_handler $LINENO' ERR
bastion_progress "post-install" "configuring system"
# -- SSH --
systemctl enable --now sshd
sed -i 's/^#\\?PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config
sed -i 's/^#\\?PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
${sshPostBlock}
# -- Hostname and domain --
hostnamectl set-hostname ${fqdn}
# -- tmpfs for /tmp --
echo "tmpfs /tmp tmpfs defaults,noatime,nosuid,nodev,size=4G 0 0" >> /etc/fstab
${isVanilla ? `# -- vanilla role: skip k3s kernel/sysctl/firewall setup --
# -- Enable chronyd for time sync --
systemctl enable chronyd || true
# -- Serial console (for debugging — auto-login as root on ttyS0) --
# AWS EC2 compatible: ttyS0 @ 115200n8
systemctl enable serial-getty@ttyS0.service || true
# -- Forward all system logs to serial console --
cat > /etc/rsyslog.d/serial-console.conf << 'RSYSLOG'
*.* /dev/ttyS0
RSYSLOG
systemctl enable rsyslog || true` : `# -- Kernel modules for k3s --
cat > /etc/modules-load.d/k3s.conf << 'MODULES'
br_netfilter
overlay
ip_conntrack
MODULES
modprobe br_netfilter || true
modprobe overlay || true
# -- Sysctl for k3s networking --
cat > /etc/sysctl.d/90-k3s.conf << 'SYSCTL'
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward = 1
net.ipv6.conf.all.forwarding = 1
fs.inotify.max_user_instances = 524288
fs.inotify.max_user_watches = 1048576
SYSCTL
sysctl --system || true
# -- Disable firewalld permanently (k3s/Cilium manage iptables directly) --
systemctl disable --now firewalld || true
systemctl mask firewalld || true
# -- Enable chronyd for time sync --
systemctl enable chronyd || true`}
# -- Boot order: restore network first (Anaconda sets disk first, we undo it) --
# Network boot must stay first so the bastion intercepts every reboot.
if command -v efibootmgr >/dev/null 2>&1; then
PXE_ENTRY=$(efibootmgr | grep -iE 'network|pxe|ipv4|ipv6|http' | head -1 | grep -oP 'Boot\\K[0-9A-F]+')
if [ -n "$PXE_ENTRY" ]; then
CURRENT_ORDER=$(efibootmgr | grep BootOrder | cut -d: -f2 | tr -d ' ')
REST=$(echo "$CURRENT_ORDER" | sed "s/$PXE_ENTRY,\\\\?//;s/,$//" | sed 's/^,//')
NEW_ORDER="$PXE_ENTRY,$REST"
efibootmgr -o "$NEW_ORDER" || true
fi
fi
# -- Provisioning metadata --
cat > /etc/lab-provisioned << PROVEOF
hostname: ${fqdn}
role: ${role}
provisioned: $(date -Iseconds)
bastion: ${serverIp}
PROVEOF
cat > /root/README << 'README'
# Lab Node -- ${fqdn} (role: ${role})
#
# Next steps:
# 1. Install puppet agent:
# dnf install -y puppet-agent
#
# 2. Install k3s:
# curl -sfL https://get.k3s.io | sh -
#
# 3. Or join existing cluster:
# curl -sfL https://get.k3s.io | K3S_URL=https://<server>:6443 K3S_TOKEN=<token> sh -
README
${hasRancher ? `# Install k3s server (skip start - will be configured manually)
curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true sh -
` : ""}
IP_ADDR=$(ip -4 addr show | awk '/inet / && !/127.0.0/ {split($2,a,"/"); print a[1]; exit}')
bastion_progress "complete" "ready at $IP_ADDR"
%end
`;
}

View File

@@ -0,0 +1,299 @@
// Ubuntu autoinstall template (cloud-init).
// Equivalent of the Fedora kickstart: LVM partitioning, packages,
// SSH keys, k3s prereqs, progress callbacks.
export interface UbuntuAutoinstallParams {
hostname: string;
disk: string;
role: string; // "vanilla" | "worker" | "infra"
domain: string;
ubuntuVersion: string;
timezone: string;
locale: string;
serverIp: string;
httpPort: number;
sshKeys: string[];
adminUser: string;
}
export function renderUbuntuAutoinstall(params: UbuntuAutoinstallParams): string {
const {
hostname,
disk,
role,
domain,
timezone,
serverIp,
httpPort,
sshKeys,
adminUser,
} = params;
const fqdn = domain ? `${hostname}.${domain}` : hostname;
const vg = "labvg";
const hasLonghorn = role === "worker";
const hasRancher = role === "infra";
// Determine disk device -- default to biggest NVMe/SCSI/virtio
const diskDevice = disk || "/dev/sda";
// Build the LVM layout to match Fedora kickstart sizes
const extraLvs: string[] = [];
if (hasLonghorn) {
extraLvs.push(` - id: lv-longhorn
name: longhorn
type: lvm_partition
volgroup: vg0
size: -1
- id: fs-longhorn
type: format
volume: lv-longhorn
fstype: xfs
- id: mount-longhorn
type: mount
device: fs-longhorn
path: /var/lib/longhorn`);
}
if (hasRancher) {
extraLvs.push(` - id: lv-rancher
name: rancher
type: lvm_partition
volgroup: vg0
size: 20G
- id: fs-rancher
type: format
volume: lv-rancher
fstype: xfs
- id: mount-rancher
type: mount
device: fs-rancher
path: /var/lib/rancher`);
}
const extraLvsBlock = extraLvs.length > 0 ? "\n" + extraLvs.join("\n") : "";
// SSH keys YAML list
const sshKeysYaml = sshKeys.map((k) => ` - "${k}"`).join("\n");
// late-commands for k3s prereqs, firewall, chrony, admin user, progress callback
const lateCommands: string[] = [
// Kernel modules for k3s
`curtin in-target -- bash -c 'cat > /etc/modules-load.d/k3s.conf << EOF\nbr_netfilter\noverlay\nip_conntrack\nEOF'`,
// Sysctl for k3s networking
`curtin in-target -- bash -c 'cat > /etc/sysctl.d/90-k3s.conf << EOF\nnet.bridge.bridge-nf-call-iptables = 1\nnet.bridge.bridge-nf-call-ip6tables = 1\nnet.ipv4.ip_forward = 1\nnet.ipv6.conf.all.forwarding = 1\nfs.inotify.max_user_instances = 524288\nfs.inotify.max_user_watches = 1048576\nEOF'`,
// Disable ufw firewall
`curtin in-target -- systemctl disable ufw || true`,
// Enable chrony/ntp
`curtin in-target -- systemctl enable chrony || true`,
// tmpfs for /tmp
`curtin in-target -- bash -c 'echo "tmpfs /tmp tmpfs defaults,noatime,nosuid,nodev,size=4G 0 0" >> /etc/fstab'`,
];
// Admin user creation + SSH keys + sudoers
if (adminUser) {
lateCommands.push(
`curtin in-target -- useradd -m -G sudo -s /bin/bash ${adminUser}`,
`curtin in-target -- usermod -L ${adminUser}`,
`curtin in-target -- mkdir -p /home/${adminUser}/.ssh`,
`curtin in-target -- bash -c 'cat > /home/${adminUser}/.ssh/authorized_keys << EOF\n${sshKeys.join("\n")}\nEOF'`,
`curtin in-target -- chmod 700 /home/${adminUser}/.ssh`,
`curtin in-target -- chmod 600 /home/${adminUser}/.ssh/authorized_keys`,
`curtin in-target -- chown -R ${adminUser}:${adminUser} /home/${adminUser}/.ssh`,
`curtin in-target -- bash -c 'echo "${adminUser} ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/${adminUser}'`,
`curtin in-target -- chmod 440 /etc/sudoers.d/${adminUser}`,
);
}
// Provisioning metadata
lateCommands.push(
`curtin in-target -- bash -c 'cat > /etc/lab-provisioned << EOF\nhostname: ${fqdn}\nrole: ${role}\nprovisioned: $(date -Iseconds)\nbastion: ${serverIp}\nEOF'`,
);
// k3s install for infra role
if (hasRancher) {
lateCommands.push(
`curtin in-target -- bash -c 'curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true sh -'`,
);
}
// Progress callback (complete)
lateCommands.push(
`curtin in-target -- bash -c 'IP_ADDR=$(ip -4 addr show | awk "/inet / && !/127.0.0/ {split(\\$2,a,\\"/\\"); print a[1]; exit}"); curl -sf -X POST "http://${serverIp}:${httpPort}/api/progress" -H "Content-Type: application/json" -d "{\\"mac\\":\\"$(ip link show | awk "/ether/ && !/00:00:00:00/ {print \\$2; exit}")\\",\\"stage\\":\\"complete\\",\\"detail\\":\\"ready at $IP_ADDR\\"}" || true'`,
);
const lateCommandsYaml = lateCommands.map((c) => ` - "${c}"`).join("\n");
return `#cloud-config
autoinstall:
version: 1
locale: ${params.locale}
keyboard:
layout: gb
timezone: ${timezone}
identity:
hostname: ${fqdn}
username: ${adminUser || "root"}
password: "!"
ssh:
install-server: true
allow-pw: false
authorized-keys:
${sshKeysYaml}
storage:
config:
- id: disk0
type: disk
ptable: gpt
path: ${diskDevice}
wipe: superblock-recursive
grub_device: true
- id: part-efi
type: partition
device: disk0
size: 600M
flag: boot
grub_device: true
- id: fs-efi
type: format
volume: part-efi
fstype: fat32
- id: mount-efi
type: mount
device: fs-efi
path: /boot/efi
- id: part-boot
type: partition
device: disk0
size: 3G
- id: fs-boot
type: format
volume: part-boot
fstype: ext4
- id: mount-boot
type: mount
device: fs-boot
path: /boot
- id: part-pv
type: partition
device: disk0
size: -1
- id: vg0
type: lvm_volgroup
name: ${vg}
devices:
- part-pv
- id: lv-swap
name: swap
type: lvm_partition
volgroup: vg0
size: 27G
- id: fs-swap
type: format
volume: lv-swap
fstype: swap
- id: mount-swap
type: mount
device: fs-swap
path: none
- id: lv-root
name: root
type: lvm_partition
volgroup: vg0
size: 33G
- id: fs-root
type: format
volume: lv-root
fstype: xfs
- id: mount-root
type: mount
device: fs-root
path: /
- id: lv-var
name: var
type: lvm_partition
volgroup: vg0
size: 100G
- id: fs-var
type: format
volume: lv-var
fstype: xfs
- id: mount-var
type: mount
device: fs-var
path: /var
- id: lv-varlog
name: varlog
type: lvm_partition
volgroup: vg0
size: 10G
- id: fs-varlog
type: format
volume: lv-varlog
fstype: xfs
- id: mount-varlog
type: mount
device: fs-varlog
path: /var/log
- id: lv-home
name: home
type: lvm_partition
volgroup: vg0
size: 10G
- id: fs-home
type: format
volume: lv-home
fstype: xfs
- id: mount-home
type: mount
device: fs-home
path: /home
- id: lv-srv
name: srv
type: lvm_partition
volgroup: vg0
size: 20G
- id: fs-srv
type: format
volume: lv-srv
fstype: xfs
- id: mount-srv
type: mount
device: fs-srv
path: /srv${extraLvsBlock}
packages:
- openssh-server
- curl
- wget
- git
- jq
- htop
- vim
- tmux
- python3
- lshw
- dmidecode
- net-tools
- iproute2
- iputils-ping
- traceroute
- tcpdump
- iotop
- strace
- tar
- containerd
- socat
- conntrack
- ethtool
- iptables
- chrony
- efibootmgr
late-commands:
${lateCommandsYaml}
`;
}
export function renderUbuntuMetaData(hostname: string): string {
return `instance-id: ${hostname}
local-hostname: ${hostname}
`;
}

View File

@@ -0,0 +1,24 @@
// iPXE boot script template for Ubuntu autoinstall.
export function renderUbuntuInstallIpxe(params: {
mac: string;
hostname: string;
serverIp: string;
httpPort: number;
ubuntuVersion: string;
}): string {
return `#!ipxe
echo
echo =============================================
echo Lab PXE Bastion - INSTALLING Ubuntu ${params.ubuntuVersion}
echo Target: ${params.hostname}
echo MAC: ${params.mac}
echo =============================================
echo
kernel http://${params.serverIp}:${params.httpPort}/ubuntu-vmlinuz autoinstall ds=nocloud-net;seedfrom=http://${params.serverIp}:${params.httpPort}/autoinstall/${params.mac}/ ---
initrd http://${params.serverIp}:${params.httpPort}/ubuntu-initrd
boot
`;
}

View File

@@ -0,0 +1,328 @@
import { describe, it, expect, beforeEach, afterEach } from "vitest";
import { mkdirSync, rmSync } from "node:fs";
import { join } from "node:path";
import { tmpdir } from "node:os";
import type { BastionConfig } from "@lab/shared";
import { createApp } from "../src/server.js";
import type { FastifyInstance } from "fastify";
import type { StateManager } from "../src/services/state.js";
import type { InstallLogBuffer } from "../src/services/install-log.js";
function createTestConfig(testDir: string): BastionConfig {
return {
fedoraVersion: "43",
arch: "x86_64",
httpPort: 0,
timezone: "Europe/London",
locale: "en_GB.UTF-8",
bastionDir: testDir,
domain: "test.local",
dhcpMode: "proxy",
dhcpRangeStart: "",
dhcpRangeEnd: "",
ubuntuVersion: "26.04",
ubuntuMirror: "https://releases.ubuntu.com/26.04",
iface: "eth0",
serverIp: "10.0.0.1",
network: "10.0.0.0",
gateway: "10.0.0.1",
sshKeys: ["ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAITEST test@test"],
adminUser: "testadmin",
skipDnsmasq: true,
skipArtifacts: true,
fedoraMirror: "https://download.fedoraproject.org/pub/fedora/linux/releases/43/Everything/x86_64/os",
tftpDir: join(testDir, "tftp"),
httpDir: join(testDir, "http"),
stateFile: join(testDir, "state.json"),
};
}
describe("dispatch routes", () => {
let testDir: string;
let app: FastifyInstance;
let state: StateManager;
let installLog: InstallLogBuffer;
beforeEach(() => {
testDir = join(tmpdir(), `bastion-dispatch-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
mkdirSync(testDir, { recursive: true });
mkdirSync(join(testDir, "http"), { recursive: true });
mkdirSync(join(testDir, "tftp"), { recursive: true });
const config = createTestConfig(testDir);
const result = createApp(config);
app = result.app;
state = result.state;
installLog = result.installLog;
});
afterEach(async () => {
await app.close();
rmSync(testDir, { recursive: true, force: true });
});
it("unknown MAC returns discovery iPXE script", async () => {
const response = await app.inject({
method: "GET",
url: "/dispatch?mac=aa:bb:cc:dd:ee:ff",
});
expect(response.statusCode).toBe(200);
expect(response.headers["content-type"]).toContain("text/plain");
const body = response.body;
expect(body).toContain("#!ipxe");
expect(body).toContain("DISCOVERY MODE");
expect(body).toContain("discover.ks");
});
it("MAC in install_queue returns install iPXE script", async () => {
const mac = "aa:bb:cc:dd:ee:ff";
state.update((s) => {
s.install_queue[mac] = {
hostname: "worker-1",
disk: "/dev/sda",
role: "worker",
queued_at: new Date().toISOString(),
};
});
const response = await app.inject({
method: "GET",
url: `/dispatch?mac=${mac}`,
});
expect(response.statusCode).toBe(200);
const body = response.body;
expect(body).toContain("#!ipxe");
expect(body).toContain("INSTALLING");
expect(body).toContain("worker-1");
expect(body).toContain(`ks?mac=${mac}`);
});
it("MAC in installed returns local boot (exit) script", async () => {
const mac = "aa:bb:cc:dd:ee:ff";
state.update((s) => {
s.installed[mac] = {
hostname: "installed-node",
role: "worker",
ip: "10.0.0.50",
installed_at: new Date().toISOString(),
};
});
const response = await app.inject({
method: "GET",
url: `/dispatch?mac=${mac}`,
});
expect(response.statusCode).toBe(200);
const body = response.body;
expect(body).toContain("#!ipxe");
expect(body).toContain("installed-node");
expect(body).toContain("Already installed");
expect(body).toContain("exit");
});
it("progress endpoint updates state", async () => {
const mac = "aa:bb:cc:dd:ee:ff";
state.update((s) => {
s.install_queue[mac] = {
hostname: "worker-1",
disk: "/dev/sda",
role: "worker",
queued_at: new Date().toISOString(),
};
});
const response = await app.inject({
method: "POST",
url: "/api/progress",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
mac,
stage: "post-install",
detail: "configuring system",
}),
});
expect(response.statusCode).toBe(200);
const result = JSON.parse(response.body);
expect(result.status).toBe("ok");
// Verify state was updated
const currentState = state.load();
expect(currentState.install_queue[mac]?.progress).toBe("post-install");
expect(currentState.install_queue[mac]?.progress_detail).toBe("configuring system");
});
it("progress endpoint with 'complete' stage moves machine to installed", async () => {
const mac = "aa:bb:cc:dd:ee:ff";
state.update((s) => {
s.install_queue[mac] = {
hostname: "worker-1",
disk: "/dev/sda",
role: "worker",
queued_at: new Date().toISOString(),
};
});
const response = await app.inject({
method: "POST",
url: "/api/progress",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
mac,
stage: "complete",
detail: "ready at 10.0.0.50",
}),
});
expect(response.statusCode).toBe(200);
const currentState = state.load();
expect(currentState.install_queue[mac]).toBeUndefined();
expect(currentState.installed[mac]).toBeDefined();
expect(currentState.installed[mac]?.hostname).toBe("worker-1");
expect(currentState.installed[mac]?.ip).toBe("10.0.0.50");
});
it("DELETE /api/machines/:mac removes machine from state", async () => {
const mac = "aa:bb:cc:dd:ee:ff";
state.update((s) => {
s.discovered[mac] = {
mac,
product: "TestBox",
board: "TestBoard",
serial: "SN123",
manufacturer: "TestCorp",
cpu_model: "Test CPU",
cpu_cores: 4,
memory_gb: 16,
arch: "x86_64",
disks: [],
nics: [],
first_seen: new Date().toISOString(),
last_seen: new Date().toISOString(),
};
});
const response = await app.inject({
method: "DELETE",
url: `/api/machines/${encodeURIComponent(mac)}`,
});
expect(response.statusCode).toBe(200);
const result = JSON.parse(response.body);
expect(result.status).toBe("forgotten");
const currentState = state.load();
expect(currentState.discovered[mac]).toBeUndefined();
});
it("DELETE /api/machines/:mac returns 404 for unknown machine", async () => {
const response = await app.inject({
method: "DELETE",
url: "/api/machines/ff:ff:ff:ff:ff:ff",
});
expect(response.statusCode).toBe(404);
const result = JSON.parse(response.body);
expect(result.error).toBe("machine not found");
});
it("POST /api/log accepts a single line", async () => {
const mac = "aa:bb:cc:dd:ee:ff";
const response = await app.inject({
method: "POST",
url: "/api/log",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ mac, line: "hello from kickstart" }),
});
expect(response.statusCode).toBe(200);
const result = JSON.parse(response.body);
expect(result.status).toBe("ok");
expect(result.lines).toBe(1);
// Verify line is stored
const lines = installLog.getLines(mac);
expect(lines).toHaveLength(1);
expect(lines[0]!.line).toBe("hello from kickstart");
});
it("POST /api/log accepts multiple lines", async () => {
const mac = "aa:bb:cc:dd:ee:ff";
const response = await app.inject({
method: "POST",
url: "/api/log",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ mac, lines: ["line 1", "line 2", "line 3"] }),
});
expect(response.statusCode).toBe(200);
const result = JSON.parse(response.body);
expect(result.lines).toBe(3);
const lines = installLog.getLines(mac);
expect(lines).toHaveLength(3);
});
it("GET /api/logs/:mac includes log lines for installing machine", async () => {
const mac = "aa:bb:cc:dd:ee:ff";
state.update((s) => {
s.install_queue[mac] = {
hostname: "test-node",
disk: "/dev/sda",
role: "worker",
queued_at: new Date().toISOString(),
};
});
// Add some log lines
installLog.append(mac, ["log line 1", "log line 2"], "test-node");
const response = await app.inject({
method: "GET",
url: `/api/logs/${encodeURIComponent(mac)}`,
});
expect(response.statusCode).toBe(200);
const result = JSON.parse(response.body);
expect(result.status).toBe("installing");
expect(result.log_lines).toHaveLength(2);
expect(result.log_total).toBe(2);
expect(result.log_lines[0].line).toBe("log line 1");
});
it("progress endpoint with 'error' stage keeps machine in install_queue", async () => {
const mac = "aa:bb:cc:dd:ee:ff";
state.update((s) => {
s.install_queue[mac] = {
hostname: "failing-node",
disk: "/dev/sda",
role: "worker",
queued_at: new Date().toISOString(),
};
});
const response = await app.inject({
method: "POST",
url: "/api/progress",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
mac,
stage: "error",
detail: "%post failed at line 42",
}),
});
expect(response.statusCode).toBe(200);
// Machine should still be in install_queue (not moved to installed)
const currentState = state.load();
expect(currentState.install_queue[mac]).toBeDefined();
expect(currentState.install_queue[mac]?.progress).toBe("error");
expect(currentState.install_queue[mac]?.progress_detail).toBe("%post failed at line 42");
expect(currentState.installed[mac]).toBeUndefined();
});
});

View File

@@ -0,0 +1,215 @@
import { describe, it, expect } from "vitest";
import { renderInstallKickstart, type InstallKickstartParams } from "../src/templates/install.ks.js";
function baseParams(overrides: Partial<InstallKickstartParams> = {}): InstallKickstartParams {
return {
hostname: "testnode",
disk: "",
role: "worker",
domain: "lab.local",
fedoraVersion: "43",
timezone: "Europe/London",
locale: "en_GB.UTF-8",
serverIp: "192.168.1.100",
httpPort: 8080,
syslogPort: 5514,
sshKeys: [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAITEST1 user1@host",
"ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQTEST2 user2@host",
],
adminUser: "admin",
...overrides,
};
}
describe("renderInstallKickstart", () => {
it("worker role includes longhorn partition", () => {
const ks = renderInstallKickstart(baseParams({ role: "worker" }));
expect(ks).toContain("longhorn");
expect(ks).toContain("/var/lib/longhorn");
});
it("infra role does NOT include longhorn partition", () => {
const ks = renderInstallKickstart(baseParams({ role: "infra" }));
// The fresh install longhorn line should not be present
expect(ks).not.toContain("logvol /var/lib/longhorn --vgname=labvg --name=longhorn --fstype=xfs --grow --size=1");
});
it("all SSH keys appear between SSHKEYS markers", () => {
const keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAITEST1 user1@host",
"ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQTEST2 user2@host",
];
const ks = renderInstallKickstart(baseParams({ sshKeys: keys }));
// Both keys should appear between the SSHKEYS markers
const sshkeysMatch = ks.match(/cat > \/root\/\.ssh\/authorized_keys << 'SSHKEYS'\n([\s\S]*?)\nSSHKEYS/);
expect(sshkeysMatch).not.toBeNull();
const keysBlock = sshkeysMatch![1]!;
for (const key of keys) {
expect(keysBlock).toContain(key);
}
});
it("admin user directive appears when adminUser is set", () => {
const ks = renderInstallKickstart(baseParams({ adminUser: "myadmin" }));
expect(ks).toContain("user --name=myadmin --groups=wheel --lock");
});
it("no admin user directive when adminUser is empty", () => {
const ks = renderInstallKickstart(baseParams({ adminUser: "" }));
expect(ks).not.toContain("user --name=");
});
it("FQDN is hostname.domain", () => {
const ks = renderInstallKickstart(baseParams({
hostname: "myhost",
domain: "example.com",
}));
expect(ks).toContain("myhost.example.com");
expect(ks).toContain("--hostname=myhost.example.com");
});
it("restorecon is present", () => {
const ks = renderInstallKickstart(baseParams());
expect(ks).toContain("restorecon");
});
it("sudoers line for admin user", () => {
const ks = renderInstallKickstart(baseParams({ adminUser: "admin" }));
expect(ks).toContain("admin ALL=(ALL) NOPASSWD: ALL");
expect(ks).toContain("/etc/sudoers.d/admin");
});
it("boot order restores network first (bastion controls boot)", () => {
const ks = renderInstallKickstart(baseParams());
expect(ks).toContain("restore network first");
expect(ks).toContain("PXE_ENTRY");
expect(ks).toContain("efibootmgr -o");
});
it("progress callback URLs use correct serverIp and httpPort", () => {
const ks = renderInstallKickstart(baseParams({
serverIp: "10.0.0.5",
httpPort: 9090,
}));
expect(ks).toContain("http://10.0.0.5:9090");
expect(ks).toContain("/api/progress");
});
it("infra role has /var/lib/rancher partition", () => {
const ks = renderInstallKickstart(baseParams({ role: "infra" }));
expect(ks).toContain("logvol /var/lib/rancher --vgname=labvg --name=rancher --fstype=xfs --size=20480");
});
it("infra role has k3s install", () => {
const ks = renderInstallKickstart(baseParams({ role: "infra" }));
expect(ks).toContain("curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true sh -");
});
it("worker role does NOT have /var/lib/rancher partition in fresh install", () => {
const ks = renderInstallKickstart(baseParams({ role: "worker" }));
// Worker should not have the fresh-install rancher partition line
expect(ks).not.toContain("logvol /var/lib/rancher --vgname=labvg --name=rancher --fstype=xfs --size=20480");
});
it("worker role does NOT have k3s install", () => {
const ks = renderInstallKickstart(baseParams({ role: "worker" }));
expect(ks).not.toContain("INSTALL_K3S_SKIP_START");
});
it("reprovision preserves rancher partition", () => {
const ks = renderInstallKickstart(baseParams({ role: "infra" }));
expect(ks).toContain("PRESERVE_RANCHER=no");
expect(ks).toContain('lvs $VG/rancher');
expect(ks).toContain("PRESERVE_RANCHER=yes");
expect(ks).toContain('logvol /var/lib/rancher --vgname=labvg --name=rancher --useexisting --noformat');
});
it("partition sizes are correct", () => {
const ks = renderInstallKickstart(baseParams());
// root = 33792
expect(ks).toContain("--name=root --fstype=xfs --size=33792");
// var = 102400
expect(ks).toContain("--name=var --fstype=xfs --size=102400");
// varlog = 10240
expect(ks).toContain("--name=varlog --fstype=xfs --size=10240");
// home = 10240
expect(ks).toContain("--name=home --fstype=xfs --size=10240");
// srv = 20480
expect(ks).toContain("--name=srv --fstype=xfs --size=20480");
// swap = 27648
expect(ks).toContain("--name=swap --fstype=swap --size=27648");
});
it("vanilla role skips k3s setup", () => {
const ks = renderInstallKickstart(baseParams({ role: "vanilla" }));
expect(ks).toContain("vanilla role");
expect(ks).not.toContain("modules-load.d/k3s.conf");
expect(ks).not.toContain("firewalld");
});
it("worker role has k3s setup", () => {
const ks = renderInstallKickstart(baseParams({ role: "worker" }));
expect(ks).toContain("modules-load.d/k3s.conf");
expect(ks).toContain("sysctl.d/90-k3s.conf");
expect(ks).toContain("firewalld");
});
it("kickstart syntax: no merged partition lines", () => {
for (const role of ["vanilla", "worker", "infra"] as const) {
const ks = renderInstallKickstart(baseParams({ role }));
const lines = ks.split("\n");
for (let i = 0; i < lines.length; i++) {
const l = lines[i].trim();
if (l.startsWith("part ")) {
const partCount = (l.match(/\bpart\b/g) || []).length;
expect(partCount, `line ${i + 1} has ${partCount} 'part' commands (role=${role}): ${l}`).toBe(1);
}
}
}
});
it("kickstart syntax: each section-opening has a %end", () => {
const ks = renderInstallKickstart(baseParams());
// Only match section openers at start of line
const sections = (ks.match(/^%(?:pre|post|packages)\b/gm) || []).length;
const ends = (ks.match(/^%end$/gm) || []).length;
expect(ends, `${sections} sections but ${ends} %end markers`).toBe(sections);
});
it("has complete progress stage", () => {
const ks = renderInstallKickstart(baseParams());
expect(ks).toContain('"complete"');
expect(ks).toContain("ready at");
});
it("sends install logs to bastion via syslog", () => {
const ks = renderInstallKickstart(baseParams({ syslogPort: 5514 }));
expect(ks).toContain("logging --host=192.168.1.100 --port=5514");
});
it("passes ksvalidator syntax check", () => {
for (const role of ["vanilla", "worker", "infra"] as const) {
const ks = renderInstallKickstart(baseParams({ role }));
const { execSync } = require("node:child_process");
const { writeFileSync, unlinkSync } = require("node:fs");
const tmp = `/tmp/ks-test-${role}.ks`;
writeFileSync(tmp, ks);
try {
execSync(`ksvalidator -v F43 ${tmp}`, { encoding: "utf-8" });
} catch (err: unknown) {
const msg = err instanceof Error ? (err as { stderr?: string }).stderr ?? err.message : String(err);
throw new Error(`ksvalidator failed for role=${role}: ${msg}`);
} finally {
try { unlinkSync(tmp); } catch {}
}
}
});
it("forwards system logs to serial console", () => {
const ks = renderInstallKickstart(baseParams({ role: "vanilla" }));
expect(ks).toContain("serial-console.conf");
expect(ks).toContain("/dev/ttyS0");
expect(ks).toContain("rsyslog");
});
});

View File

@@ -0,0 +1,140 @@
import { describe, it, expect, beforeEach, afterEach } from "vitest";
import { mkdirSync, rmSync, existsSync, readFileSync, writeFileSync, chmodSync } from "node:fs";
import { join } from "node:path";
import { tmpdir } from "node:os";
import { StateManager } from "../src/services/state.js";
describe("StateManager", () => {
let testDir: string;
let stateFile: string;
let state: StateManager;
beforeEach(() => {
testDir = join(tmpdir(), `bastion-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
mkdirSync(testDir, { recursive: true });
stateFile = join(testDir, "state.json");
state = new StateManager(stateFile);
});
afterEach(() => {
rmSync(testDir, { recursive: true, force: true });
});
it("creates empty state on first load", () => {
const loaded = state.load();
expect(loaded).toEqual({
discovered: {},
install_queue: {},
installed: {},
});
});
it("init creates the state file", () => {
expect(existsSync(stateFile)).toBe(false);
state.init();
expect(existsSync(stateFile)).toBe(true);
const content = JSON.parse(readFileSync(stateFile, "utf-8"));
expect(content).toEqual({
discovered: {},
install_queue: {},
installed: {},
});
});
it("saves and loads state correctly", () => {
state.init();
state.update((s) => {
s.discovered["aa:bb:cc:dd:ee:ff"] = {
mac: "aa:bb:cc:dd:ee:ff",
product: "TestBox",
board: "TestBoard",
serial: "SN123",
manufacturer: "TestCorp",
cpu_model: "Test CPU",
cpu_cores: 8,
memory_gb: 32,
arch: "x86_64",
disks: [{ name: "sda", size_gb: 500, model: "TestDisk" }],
nics: [{ name: "eth0", mac: "aa:bb:cc:dd:ee:ff", state: "UP" }],
first_seen: "2025-01-01T00:00:00Z",
last_seen: "2025-01-01T00:00:00Z",
};
s.install_queue["11:22:33:44:55:66"] = {
hostname: "worker-1",
disk: "/dev/sda",
role: "worker",
queued_at: "2025-01-01T01:00:00Z",
};
});
// Load in a fresh StateManager to verify persistence
const state2 = new StateManager(stateFile);
const loaded = state2.load();
expect(loaded.discovered["aa:bb:cc:dd:ee:ff"]?.product).toBe("TestBox");
expect(loaded.discovered["aa:bb:cc:dd:ee:ff"]?.cpu_cores).toBe(8);
expect(loaded.install_queue["11:22:33:44:55:66"]?.hostname).toBe("worker-1");
expect(loaded.installed).toEqual({});
});
it("uses atomic writes (tmp file + rename)", () => {
state.init();
// After save, there should be no .tmp file left behind
state.update((s) => {
s.installed["aa:bb:cc:dd:ee:ff"] = {
hostname: "node1",
role: "worker",
ip: "10.0.0.1",
installed_at: "2025-01-01T00:00:00Z",
};
});
const tmpFile = `${stateFile}.tmp`;
expect(existsSync(tmpFile)).toBe(false);
expect(existsSync(stateFile)).toBe(true);
// Verify data was written correctly
const raw = readFileSync(stateFile, "utf-8");
const parsed = JSON.parse(raw);
expect(parsed.installed["aa:bb:cc:dd:ee:ff"].hostname).toBe("node1");
});
});
describe("PID file handling", () => {
let testDir: string;
beforeEach(() => {
testDir = join(tmpdir(), `bastion-pid-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
mkdirSync(testDir, { recursive: true });
});
afterEach(() => {
rmSync(testDir, { recursive: true, force: true });
});
it("handles stale PID file from previous run", () => {
const pidFile = join(testDir, "bastion.pid");
// Simulate a stale PID file with a dead process
writeFileSync(pidFile, "999999999");
// Should be readable
const pid = parseInt(readFileSync(pidFile, "utf-8").trim(), 10);
expect(pid).toBe(999999999);
});
it("handles corrupted PID file gracefully", () => {
const pidFile = join(testDir, "bastion.pid");
writeFileSync(pidFile, "not-a-number\n");
const pid = parseInt(readFileSync(pidFile, "utf-8").trim(), 10);
expect(isNaN(pid)).toBe(true);
});
it("handles missing bastion directory", () => {
const missingDir = join(testDir, "nonexistent", "deep");
mkdirSync(missingDir, { recursive: true });
expect(existsSync(missingDir)).toBe(true);
});
});

View File

@@ -0,0 +1,13 @@
{
"extends": "../../tsconfig.base.json",
"compilerOptions": {
"rootDir": "src",
"outDir": "dist",
"types": ["node"]
},
"include": ["src/**/*.ts"],
"references": [
{ "path": "../shared" },
{ "path": "../modules" }
]
}

View File

@@ -0,0 +1,8 @@
import { defineProject } from 'vitest/config';
export default defineProject({
test: {
name: 'bastion',
include: ['tests/**/*.test.ts'],
},
});

View File

@@ -0,0 +1,29 @@
{
"name": "@lab/cli",
"version": "0.1.0",
"private": true,
"type": "module",
"bin": {
"labctl": "./dist/index.js"
},
"main": "./dist/index.js",
"types": "./dist/index.d.ts",
"scripts": {
"build": "tsc --build",
"clean": "rimraf dist",
"dev": "tsx src/index.ts",
"test": "vitest",
"test:run": "vitest run"
},
"dependencies": {
"@lab/bastion": "workspace:*",
"@lab/modules": "workspace:*",
"@lab/shared": "workspace:*",
"commander": "^13.0.0",
"ws": "^8.19.0"
},
"devDependencies": {
"@types/node": "^22.10.0",
"@types/ws": "^8.18.1"
}
}

View File

@@ -0,0 +1,161 @@
// Typed API client for communicating with labd.
import https from "node:https";
import { readFileSync } from "node:fs";
import { LabdApiError } from "./errors.js";
import type {
Server,
ServerFilters,
JoinToken,
CreateTokenOpts,
EnrollmentRequest,
EnrollmentResponse,
HealthStatus,
RequestOpts,
} from "./types.js";
export interface LabdClientConfig {
baseUrl: string;
certPath?: string;
keyPath?: string;
caPath?: string;
timeoutMs?: number;
}
export class LabdClient {
private config: LabdClientConfig;
private agent: https.Agent | undefined;
private sessionId: string | undefined;
constructor(config: LabdClientConfig) {
this.config = config;
if (config.certPath && config.keyPath) {
this.agent = new https.Agent({
cert: readFileSync(config.certPath),
key: readFileSync(config.keyPath),
ca: config.caPath ? readFileSync(config.caPath) : undefined,
rejectUnauthorized: true,
});
}
}
setSessionId(id: string): void {
this.sessionId = id;
}
// --- Server endpoints ---
async getServers(filters?: ServerFilters): Promise<Server[]> {
return this.request("GET", "/api/servers", { query: filters as Record<string, string | undefined> });
}
async getServer(id: string): Promise<Server> {
return this.request("GET", `/api/servers/${encodeURIComponent(id)}`);
}
// --- Token endpoints ---
async createJoinToken(opts: CreateTokenOpts): Promise<JoinToken> {
return this.request("POST", "/api/tokens", { body: opts });
}
async listTokens(): Promise<JoinToken[]> {
return this.request("GET", "/api/tokens");
}
async revokeToken(id: string): Promise<{ status: string; id: string }> {
return this.request("DELETE", `/api/tokens/${encodeURIComponent(id)}`);
}
// --- Auth endpoints ---
async enroll(req: EnrollmentRequest): Promise<EnrollmentResponse> {
return this.request("POST", "/api/auth/enroll", { body: req });
}
// --- Bastion endpoints ---
async getBastions(): Promise<Array<{
id: string; hostname: string; network: string; serverIp: string;
status: string; machineCount: number; lastHeartbeat?: string; connectedAt?: string;
}>> {
return this.request("GET", "/api/bastions");
}
// --- Machine endpoints (aggregated through labd from bastions) ---
async getMachines(): Promise<import("@lab/shared").BastionState> {
return this.request("GET", "/api/machines");
}
async installMachine(opts: {
mac: string; hostname: string; disk?: string; role?: string; os?: string;
}): Promise<{ status: string; data?: unknown; error?: string }> {
return this.request("POST", "/api/machines/install", { body: opts });
}
async forgetMachine(mac: string): Promise<{ status: string }> {
return this.request("DELETE", `/api/machines/${encodeURIComponent(mac)}`);
}
async updateRole(mac: string, role: string): Promise<{ status: string }> {
return this.request("POST", "/api/machines/role", { body: { mac, role } });
}
async getMachineLogs(mac: string): Promise<Record<string, unknown>> {
return this.request("GET", `/api/machines/${encodeURIComponent(mac)}/logs`);
}
// --- Health endpoints ---
async getHealth(): Promise<HealthStatus> {
return this.request("GET", "/healthz");
}
// --- Internal ---
private async request<T>(method: string, path: string, opts?: RequestOpts): Promise<T> {
const url = new URL(path, this.config.baseUrl);
if (opts?.query) {
for (const [k, v] of Object.entries(opts.query)) {
if (v !== undefined) url.searchParams.set(k, String(v));
}
}
const headers: Record<string, string> = {
"Content-Type": "application/json",
};
if (this.sessionId) {
headers["X-Session-ID"] = this.sessionId;
}
const timeoutMs = this.config.timeoutMs ?? 30_000;
try {
const resp = await fetch(url.toString(), {
method,
headers,
body: opts?.body ? JSON.stringify(opts.body) : undefined,
signal: AbortSignal.timeout(timeoutMs),
// @ts-expect-error -- Node fetch supports dispatcher/agent
agent: this.agent,
});
if (!resp.ok) {
const body = await resp.json().catch(() => ({ error: resp.statusText }));
throw LabdApiError.fromResponse(resp.status, body);
}
return (await resp.json()) as T;
} catch (err) {
if (err instanceof LabdApiError) throw err;
if (err instanceof TypeError && (err.message.includes("fetch") || err.message.includes("ECONNREFUSED"))) {
throw LabdApiError.notConnected(this.config.baseUrl);
}
if (err instanceof DOMException && err.name === "TimeoutError") {
throw LabdApiError.timeout(timeoutMs);
}
throw err;
}
}
}

View File

@@ -0,0 +1,47 @@
// CLI configuration loading for labd client.
// Bridges the CLI config module into LabdClient configuration.
import { loadConfig, CONFIG_DIR, CONFIG_FILE, CERT_DIR } from "../config/index.js";
import { LabdClient, type LabdClientConfig } from "./client.js";
export { CONFIG_DIR, CONFIG_FILE, CERT_DIR };
export function loadClientConfig(
overrides?: Partial<LabdClientConfig>,
): LabdClientConfig {
const cliConfig = loadConfig();
let config: LabdClientConfig = {
baseUrl: cliConfig.labdUrl,
...(cliConfig.certPath ? { certPath: cliConfig.certPath } : {}),
...(cliConfig.keyPath ? { keyPath: cliConfig.keyPath } : {}),
...(cliConfig.caPath ? { caPath: cliConfig.caPath } : {}),
};
// Environment variable overrides (cert paths)
if (process.env["LABCTL_CERT_PATH"]) config.certPath = process.env["LABCTL_CERT_PATH"];
if (process.env["LABCTL_KEY_PATH"]) config.keyPath = process.env["LABCTL_KEY_PATH"];
if (process.env["LABCTL_CA_PATH"]) config.caPath = process.env["LABCTL_CA_PATH"];
if (overrides) {
config = { ...config, ...overrides };
}
return config;
}
export function createLabdClient(
overrides?: Partial<LabdClientConfig>,
): LabdClient {
const config = loadClientConfig(overrides);
return new LabdClient(config);
}
let _singleton: LabdClient | undefined;
export function getLabdClient(): LabdClient {
if (!_singleton) {
_singleton = createLabdClient();
}
return _singleton;
}

View File

@@ -0,0 +1,59 @@
// Structured API error class for labd communication.
export class LabdApiError extends Error {
readonly statusCode: number;
readonly errorCode: string;
readonly detail: string | undefined;
constructor(statusCode: number, message: string, detail?: string) {
super(message);
this.name = "LabdApiError";
this.statusCode = statusCode;
this.errorCode = statusCodeToErrorCode(statusCode);
this.detail = detail;
}
static fromResponse(statusCode: number, body: unknown): LabdApiError {
if (typeof body === "object" && body !== null) {
const b = body as Record<string, unknown>;
const message = typeof b["error"] === "string" ? b["error"] : `HTTP ${statusCode}`;
const detail = typeof b["detail"] === "string" ? b["detail"] : undefined;
return new LabdApiError(statusCode, message, detail);
}
return new LabdApiError(statusCode, `HTTP ${statusCode}`);
}
static notConnected(url: string): LabdApiError {
return new LabdApiError(
0,
`Cannot connect to labd at ${url}`,
"Check that labd is running and the URL is correct.",
);
}
static timeout(timeoutMs: number): LabdApiError {
return new LabdApiError(
0,
`Request timed out after ${timeoutMs}ms`,
"The server may be overloaded. Try again later.",
);
}
}
export function isLabdApiError(err: unknown): err is LabdApiError {
return err instanceof LabdApiError;
}
function statusCodeToErrorCode(code: number): string {
switch (code) {
case 400: return "BAD_REQUEST";
case 401: return "UNAUTHORIZED";
case 403: return "FORBIDDEN";
case 404: return "NOT_FOUND";
case 409: return "CONFLICT";
case 429: return "RATE_LIMITED";
case 500: return "INTERNAL_ERROR";
case 503: return "UNAVAILABLE";
default: return code === 0 ? "CONNECTION_ERROR" : "UNKNOWN";
}
}

View File

@@ -0,0 +1,18 @@
// Public API for labd client.
export { LabdClient, type LabdClientConfig } from "./client.js";
export { LabdApiError, isLabdApiError } from "./errors.js";
export { loadClientConfig, createLabdClient, getLabdClient, CONFIG_DIR, CONFIG_FILE, CERT_DIR } from "./config.js";
export type {
Server,
ServerFilters,
Agent,
JoinToken,
CreateTokenOpts,
EnrollmentRequest,
EnrollmentResponse,
HealthStatus,
ApiErrorBody,
RequestOpts,
} from "./types.js";
export { createLabdWebSocket, streamExec, streamLogs, type StreamOptions } from "./websocket.js";

View File

@@ -0,0 +1,96 @@
// Typed interfaces for labd API requests and responses.
// Matches Prisma schema models and labd route contracts.
// --- Server ---
export interface Server {
id: string;
hostname: string;
mac: string | null;
cloud: string;
environment: string;
role: string;
labels: Record<string, string>;
ip: string | null;
agentVersion: string | null;
status: string;
lastHeartbeat: string | null;
createdAt: string;
updatedAt: string;
agent?: Agent | null;
}
export interface Agent {
id: string;
serverId: string;
certificatePem: string | null;
enrolledAt: string;
lastSeen: string | null;
}
export interface ServerFilters {
cloud?: string;
environment?: string;
status?: string;
}
// --- Join Tokens ---
export interface JoinToken {
id: string;
token?: string; // Only present on creation
type: string;
label: string | null;
usedBy: string | null;
usedAt: string | null;
revokedAt: string | null;
createdAt: string;
expiresAt: string | null;
}
export interface CreateTokenOpts {
type?: "one-time" | "reusable";
label?: string;
expiresInHours?: number;
}
// --- Auth / Enrollment ---
export interface EnrollmentRequest {
token: string;
hostname: string;
csr?: string;
}
export interface EnrollmentResponse {
status: string;
hostname: string;
message: string;
certificatePem: string | null;
}
// --- Health ---
export interface HealthStatus {
status: "healthy" | "degraded";
uptime: number;
timestamp: string;
checks: {
database: "ok" | "error";
};
}
// --- API Error ---
export interface ApiErrorBody {
error: string;
detail?: string;
code?: string;
}
// --- Request helpers ---
export interface RequestOpts {
query?: Record<string, string | number | boolean | undefined>;
body?: unknown;
}

View File

@@ -0,0 +1,160 @@
// WebSocket client for real-time streaming operations (exec, logs).
import { WebSocket } from "ws";
import { loadConfig } from "../config/index.js";
import { readFileSync } from "node:fs";
import { LabdApiError } from "./errors.js";
export interface StreamOptions {
onData: (data: string) => void;
onError: (error: Error) => void;
onClose: () => void;
}
export async function createLabdWebSocket(path: string): Promise<WebSocket> {
const config = loadConfig();
const baseUrl = config.labdUrl.replace("https:", "wss:").replace("http:", "ws:");
const url = new URL(path, baseUrl);
const wsOptions: WebSocket.ClientOptions = {};
if (config.certPath && config.keyPath) {
wsOptions.cert = readFileSync(config.certPath);
wsOptions.key = readFileSync(config.keyPath);
if (config.caPath) wsOptions.ca = readFileSync(config.caPath);
}
return new Promise((resolve, reject) => {
const timeout = setTimeout(() => {
ws.terminate();
reject(LabdApiError.timeout(10_000));
}, 10_000);
const ws = new WebSocket(url.toString(), wsOptions);
ws.on("open", () => {
clearTimeout(timeout);
resolve(ws);
});
ws.on("error", (err: Error) => {
clearTimeout(timeout);
reject(
LabdApiError.notConnected(config.labdUrl + " — " + err.message),
);
});
});
}
export async function streamExec(
serverName: string,
command: string[],
options: StreamOptions & { tty?: boolean; timeout?: number },
): Promise<number> {
const ws = await createLabdWebSocket("/ws/exec");
const requestId = crypto.randomUUID();
return new Promise<number>((resolve, reject) => {
ws.on("message", (raw: Buffer) => {
try {
const msg = JSON.parse(raw.toString()) as {
type: string;
data?: string;
exitCode?: number;
message?: string;
};
switch (msg.type) {
case "exec-stdout":
case "exec-stderr":
if (msg.data) options.onData(msg.data);
break;
case "exec-exit":
ws.close();
resolve(msg.exitCode ?? 1);
break;
case "error":
ws.close();
reject(new Error(msg.message ?? "Remote execution error"));
break;
}
} catch (err) {
options.onError(err instanceof Error ? err : new Error(String(err)));
}
});
ws.on("close", () => {
options.onClose();
});
ws.on("error", (err: Error) => {
options.onError(err);
});
ws.send(
JSON.stringify({
type: "exec",
requestId,
server: serverName,
command,
tty: options.tty ?? false,
timeout: options.timeout ?? 30_000,
}),
);
});
}
export async function streamLogs(
serverName: string,
logOptions: {
follow?: boolean;
lines?: number;
unit?: string;
since?: string;
priority?: string;
kernel?: boolean;
},
options: StreamOptions,
): Promise<void> {
const ws = await createLabdWebSocket("/ws/logs");
const requestId = crypto.randomUUID();
ws.on("message", (raw: Buffer) => {
try {
const msg = JSON.parse(raw.toString()) as {
type: string;
line?: string;
message?: string;
};
switch (msg.type) {
case "log-line":
if (msg.line) options.onData(msg.line);
break;
case "log-end":
ws.close();
break;
case "error":
ws.close();
options.onError(new Error(msg.message ?? "Log streaming error"));
break;
}
} catch (err) {
options.onError(err instanceof Error ? err : new Error(String(err)));
}
});
ws.on("close", () => {
options.onClose();
});
ws.on("error", (err) => {
options.onError(err);
});
ws.send(
JSON.stringify({
type: "log-subscribe",
requestId,
server: serverName,
options: logOptions,
}),
);
}

View File

@@ -0,0 +1,403 @@
// CLI command: labctl app k3s install/health <target>
// Install or check k3s on a target machine via SSH.
import { existsSync } from "node:fs";
import { homedir } from "node:os";
import { join } from "node:path";
import type { Command } from "commander";
import type { BastionState } from "@lab/shared";
import { K3sModule, sshExec } from "@lab/modules";
import { getLabdClient } from "../api/config.js";
function resolveTarget(
target: string,
state: BastionState | null,
): { ip: string; hostname: string; role: string } | null {
// Direct IP
if (/^\d+\.\d+\.\d+\.\d+$/.test(target)) {
return { ip: target, hostname: target, role: "infra" };
}
if (!state) return null;
// Check by MAC
const mac = target.toLowerCase().replace(/-/g, ":");
const installed = state.installed[mac];
if (installed?.ip) {
return { ip: installed.ip, hostname: installed.hostname, role: installed.role };
}
// Check by hostname
for (const [, info] of Object.entries(state.installed)) {
if (info.hostname === target || info.hostname.startsWith(target + ".")) {
return { ip: info.ip, hostname: info.hostname, role: info.role };
}
}
return null;
}
function findSshKey(): string | undefined {
const sudoUser = process.env["SUDO_USER"];
const realHome = sudoUser ? join("/home", sudoUser) : homedir();
for (const name of ["id_ed25519", "id_ecdsa", "id_rsa"]) {
const keyPath = join(realHome, ".ssh", name);
if (existsSync(keyPath)) return keyPath;
}
return undefined;
}
async function fetchState(): Promise<BastionState | null> {
try {
return await getLabdClient().getMachines();
} catch {
return null;
}
}
import { registerLabcontrollerCommands } from "./labcontroller.js";
export function registerAppCommand(program: Command): void {
const appCmd = program.command("app").description("Application management");
// labcontroller subcommands
registerLabcontrollerCommands(appCmd);
const k3sCmd = appCmd.command("k3s").description("k3s cluster management");
k3sCmd
.command("install <target>")
.description("Install k3s on a target machine (hostname, IP, or MAC)")
.option("--role <role>", "k3s role: infra (server) or worker (agent)", "infra")
.option("--user <user>", "SSH user", "michal")
.option("--k3s-server <url>", "k3s server URL (required for worker role)")
.option("--k3s-token <token>", "k3s join token (required for worker role)")
.action(async (target: string, opts: {
role: string;
user: string;
k3sServer?: string;
k3sToken?: string;
}) => {
const state = await fetchState();
const resolved = resolveTarget(target, state);
if (!resolved) {
console.error(`Cannot resolve target: ${target}`);
console.error("Provide an IP address, hostname, or MAC of an installed machine.");
process.exit(1);
}
const role = opts.role === "worker" ? "worker" : "infra";
const sshKey = findSshKey();
console.log(`Installing k3s on ${resolved.hostname} (${resolved.ip}) as ${role}...`);
console.log("");
const k3s = new K3sModule();
const moduleCtx = {
hostname: resolved.hostname,
ip: resolved.ip,
role,
os: "fedora-43" as const,
arch: "x86_64" as const,
sshUser: opts.user,
...(sshKey ? { sshKeyPath: sshKey } : {}),
config: {
...(opts.k3sServer ? { k3sServerUrl: opts.k3sServer } : {}),
...(opts.k3sToken ? { k3sToken: opts.k3sToken } : {}),
},
};
const installResult = await k3s.install(moduleCtx);
for (const line of installResult.output) {
console.log(` ${line}`);
}
if (!installResult.success) {
console.error(`\nk3s install failed: ${installResult.errors.join(", ")}`);
process.exit(1);
}
console.log("\nRunning post-install configuration...\n");
const configResult = await k3s.configure(moduleCtx);
for (const line of configResult.output) {
console.log(` ${line}`);
}
if (!configResult.success) {
console.error(`\nk3s configure failed: ${configResult.errors.join(", ")}`);
process.exit(1);
}
console.log("\nk3s installed successfully.");
// Check if the machine's role requires additional app deployments
try {
const { ROLE_REGISTRY } = await import("@lab/shared");
const freshState = await fetchState();
if (freshState) {
for (const [, info] of Object.entries(freshState.installed)) {
if (info.ip === resolved.ip || info.hostname === resolved.hostname) {
const roleInfo = ROLE_REGISTRY.find((r: { name: string }) => r.name === info.role);
if (roleInfo && roleInfo.apps.length > 0) {
console.log(`\nRole ${info.role} requires: ${roleInfo.apps.join(", ")}`);
console.log(`Deploying automatically...`);
const { execFileSync } = await import("node:child_process");
try {
execFileSync("node", [
process.argv[1] ?? "",
"app", "labcontroller", "deploy", resolved.hostname,
"--user", opts.user,
], { stdio: "inherit" });
} catch {
console.error(`\nAuto-deploy failed. Run manually: labctl app labcontroller deploy ${resolved.hostname}`);
}
}
break;
}
}
}
} catch { /* best-effort chain */ }
console.log(`\nTo get kubeconfig: ssh ${opts.user}@${resolved.ip} sudo cat /etc/rancher/k3s/k3s.yaml`);
});
k3sCmd
.command("health [target]")
.description("Check k3s health (all hosts if no target given)")
.option("--user <user>", "SSH user", "michal")
.action(async (target: string | undefined, opts: { user: string }) => {
const sshKey = findSshKey();
if (!target) {
let state: BastionState;
try {
state = await getLabdClient().getMachines();
} catch (err) {
console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`);
process.exit(1);
}
const entries = Object.entries(state.installed);
if (entries.length === 0) {
console.log("No installed machines.");
return;
}
const BOLD = "\x1b[1m";
const GREEN = "\x1b[32m";
const RED = "\x1b[31m";
const DIM = "\x1b[2m";
const RESET = "\x1b[0m";
const pad = (s: string, w: number) => s.padEnd(w);
console.log(
`${BOLD}${pad("HOST", 22)}${pad("IP", 16)}${pad("ROLE", 8)}${pad("K3S", 14)}${pad("NODE", 10)}${pad("ENCRYPT", 10)}${pad("CNI", 14)}${pad("PODS", 6)}${RESET}`,
);
interface HealthRow {
host: string; ip: string; role: string;
k3s: string; node: string; encrypt: string; cni: string; pods: string;
k3sC: string; nodeC: string; encC: string; cniC: string;
}
const probes = entries.map(async ([_mac, info]): Promise<HealthRow> => {
const r: HealthRow = {
host: info.hostname, ip: info.ip, role: info.role,
k3s: "—", node: "—", encrypt: "—", cni: "—", pods: "—",
k3sC: DIM, nodeC: DIM, encC: DIM, cniC: DIM,
};
if (!info.ip || info.role === "vanilla") {
r.k3s = info.role === "vanilla" ? "n/a" : "no ip";
return r;
}
try {
const svc = await sshExec(info.ip, opts.user, "systemctl is-active k3s 2>/dev/null || systemctl is-active k3s-agent 2>/dev/null", {
...(sshKey ? { keyPath: sshKey } : {}), timeoutMs: 8_000,
});
if (svc.stdout.trim() !== "active") {
r.k3s = svc.stdout.trim() === "inactive" ? "stopped" : "not installed";
r.k3sC = svc.stdout.trim() === "inactive" ? RED : DIM;
return r;
}
r.k3s = "running"; r.k3sC = GREEN;
const [nodeRes, encRes, cniRes, podRes] = await Promise.all([
sshExec(info.ip, opts.user,
"sudo k3s kubectl get nodes -o jsonpath='{.items[0].status.conditions[?(@.type==\"Ready\")].status}' 2>/dev/null",
{ ...(sshKey ? { keyPath: sshKey } : {}), timeoutMs: 8_000 }),
sshExec(info.ip, opts.user,
"sudo k3s secrets-encrypt status 2>/dev/null | head -1",
{ ...(sshKey ? { keyPath: sshKey } : {}), timeoutMs: 8_000 }),
sshExec(info.ip, opts.user,
"sudo k3s kubectl get pods -n kube-system -l k8s-app=cilium --no-headers 2>/dev/null | head -1",
{ ...(sshKey ? { keyPath: sshKey } : {}), timeoutMs: 8_000 }),
sshExec(info.ip, opts.user,
"sudo k3s kubectl get pods -A --no-headers 2>/dev/null | wc -l",
{ ...(sshKey ? { keyPath: sshKey } : {}), timeoutMs: 8_000 }),
]);
r.node = nodeRes.stdout.includes("True") ? "Ready" : "NotReady";
r.nodeC = nodeRes.stdout.includes("True") ? GREEN : RED;
r.encrypt = encRes.stdout.includes("Enabled") ? "yes" : "no";
r.encC = encRes.stdout.includes("Enabled") ? GREEN : RED;
r.cni = cniRes.stdout.includes("Running") ? "cilium" : "flannel";
r.cniC = cniRes.stdout.includes("Running") ? GREEN : DIM;
r.pods = podRes.stdout.trim() || "?";
} catch {
r.k3s = "unreachable"; r.k3sC = RED;
}
return r;
});
const results = await Promise.all(probes);
for (const r of results) {
console.log(
`${pad(r.host, 22)}${pad(r.ip, 16)}${pad(r.role, 8)}${r.k3sC}${pad(r.k3s, 14)}${RESET}${r.nodeC}${pad(r.node, 10)}${RESET}${r.encC}${pad(r.encrypt, 10)}${RESET}${r.cniC}${pad(r.cni, 14)}${RESET}${pad(r.pods, 6)}`,
);
}
return;
}
// Single target: detailed health check
const state = await fetchState();
const resolved = resolveTarget(target, state);
if (!resolved) {
console.error(`Cannot resolve target: ${target}`);
process.exit(1);
}
console.log(`Checking k3s health on ${resolved.hostname} (${resolved.ip})...\n`);
const k3s = new K3sModule();
const healthResult = await k3s.health({
hostname: resolved.hostname,
ip: resolved.ip,
role: resolved.role,
os: "fedora-43" as const,
arch: "x86_64" as const,
sshUser: opts.user,
...(sshKey ? { sshKeyPath: sshKey } : {}),
config: {},
});
for (const line of healthResult.output) {
console.log(` ${line}`);
}
if (healthResult.errors.length > 0) {
for (const err of healthResult.errors) {
console.error(` ERROR: ${err}`);
}
}
process.exit(healthResult.success ? 0 : 1);
});
k3sCmd
.command("list")
.description("List installed machines and their k3s status")
.option("--user <user>", "SSH user", "michal")
.action(async (opts: { user: string }) => {
let state: BastionState;
try {
state = await getLabdClient().getMachines();
} catch (err) {
console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`);
process.exit(1);
}
const entries = Object.entries(state.installed);
if (entries.length === 0) {
console.log("No installed machines.");
return;
}
const sshKey = findSshKey();
const BOLD = "\x1b[1m";
const GREEN = "\x1b[32m";
const RED = "\x1b[31m";
const DIM = "\x1b[2m";
const RESET = "\x1b[0m";
const hdr = (s: string, w: number) => s.padEnd(w);
console.log(
`${BOLD}${hdr("HOSTNAME", 28)}${hdr("IP", 18)}${hdr("ROLE", 10)}${hdr("K3S", 16)}${hdr("NODE", 12)}${hdr("PODS", 6)}${RESET}`,
);
const probes = entries.map(async ([_mac, info]) => {
const row = {
hostname: info.hostname,
ip: info.ip,
role: info.role,
k3s: "—",
node: "—",
pods: "—",
k3sColor: DIM,
nodeColor: DIM,
};
if (!info.ip || info.role === "vanilla") {
row.k3s = info.role === "vanilla" ? "n/a" : "no ip";
return row;
}
try {
const svcResult = await sshExec(info.ip, opts.user, "systemctl is-active k3s 2>/dev/null || systemctl is-active k3s-agent 2>/dev/null", {
...(sshKey ? { keyPath: sshKey } : {}),
timeoutMs: 8_000,
});
const svcStatus = svcResult.stdout.trim();
if (svcStatus === "active") {
row.k3s = "running";
row.k3sColor = GREEN;
const nodeResult = await sshExec(info.ip, opts.user,
"sudo k3s kubectl get nodes -o jsonpath='{.items[0].status.conditions[?(@.type==\"Ready\")].status}' 2>/dev/null || echo unknown",
{ ...(sshKey ? { keyPath: sshKey } : {}), timeoutMs: 8_000 },
);
const nodeReady = nodeResult.stdout.trim();
if (nodeReady.includes("True")) {
row.node = "Ready";
row.nodeColor = GREEN;
} else {
row.node = "NotReady";
row.nodeColor = RED;
}
const podResult = await sshExec(info.ip, opts.user,
"sudo k3s kubectl get pods -A --no-headers 2>/dev/null | wc -l",
{ ...(sshKey ? { keyPath: sshKey } : {}), timeoutMs: 8_000 },
);
row.pods = podResult.stdout.trim() || "?";
} else if (svcStatus === "inactive" || svcStatus === "dead") {
row.k3s = "stopped";
row.k3sColor = RED;
} else {
row.k3s = "not installed";
row.k3sColor = DIM;
}
} catch {
row.k3s = "unreachable";
row.k3sColor = RED;
}
return row;
});
const results = await Promise.all(probes);
for (const r of results) {
console.log(
`${hdr(r.hostname, 28)}${hdr(r.ip, 18)}${hdr(r.role, 10)}${r.k3sColor}${hdr(r.k3s, 16)}${RESET}${r.nodeColor}${hdr(r.node, 12)}${RESET}${hdr(r.pods, 6)}`,
);
}
});
}

View File

@@ -0,0 +1,76 @@
// labctl config — view and modify CLI configuration.
import type { Command } from "commander";
import {
loadConfig,
saveConfig,
getConfigValue,
setConfigValue,
isValidConfigKey,
CONFIG_FILE,
} from "../config/index.js";
export function registerConfigCommand(parent: Command): void {
const configCmd = parent
.command("config")
.description("View and modify CLI configuration");
// config list
configCmd
.command("list")
.description("Show all configuration values")
.action(() => {
const config = loadConfig();
console.log(`# Configuration (${CONFIG_FILE})\n`);
for (const [k, v] of Object.entries(config)) {
if (v !== undefined) {
console.log(`${k}: ${v}`);
}
}
});
// config get <key>
configCmd
.command("get <key>")
.description("Get a configuration value")
.action((key: string) => {
if (!isValidConfigKey(key)) {
console.error(`Unknown config key: ${key}`);
console.error(`Valid keys: labdUrl, certPath, keyPath, caPath, defaultEnvironment, defaultCloud, outputFormat`);
process.exit(1);
}
const config = loadConfig();
const value = getConfigValue(config, key);
if (value) {
console.log(value);
}
});
// config set <key> <value>
configCmd
.command("set <key> <value>")
.description("Set a configuration value")
.action((key: string, value: string) => {
if (!isValidConfigKey(key)) {
console.error(`Unknown config key: ${key}`);
console.error(`Valid keys: labdUrl, certPath, keyPath, caPath, defaultEnvironment, defaultCloud, outputFormat`);
process.exit(1);
}
if (key === "outputFormat" && !["table", "json", "yaml"].includes(value)) {
console.error(`Invalid output format: ${value}. Must be table, json, or yaml.`);
process.exit(1);
}
let config = loadConfig();
config = setConfigValue(config, key, value);
saveConfig(config);
console.log(`Set ${key} = ${value}`);
});
// config path
configCmd
.command("path")
.description("Show configuration file path")
.action(() => {
console.log(CONFIG_FILE);
});
}

View File

@@ -0,0 +1,126 @@
// labctl doctor — diagnose configuration and connectivity issues.
import { existsSync, readFileSync } from "node:fs";
import { X509Certificate } from "node:crypto";
import type { Command } from "commander";
import { loadConfig, CONFIG_FILE, CERT_DIR } from "../config/index.js";
interface DiagnosticResult {
name: string;
status: "ok" | "warn" | "error";
message: string;
}
const GREEN = "\x1b[32m";
const YELLOW = "\x1b[33m";
const RED = "\x1b[31m";
const RESET = "\x1b[0m";
export function registerDoctorCommand(program: Command): void {
program
.command("doctor")
.description("Diagnose configuration and connectivity issues")
.option("--json", "Output results as JSON")
.action(async (opts: { json?: boolean }) => {
const results: DiagnosticResult[] = [];
const config = loadConfig();
// Check config file
results.push({
name: "Configuration file",
status: existsSync(CONFIG_FILE) ? "ok" : "warn",
message: existsSync(CONFIG_FILE) ? CONFIG_FILE : "Using defaults — run 'labctl config set labdUrl <url>'",
});
// Check labd URL
results.push({
name: "labd URL",
status: config.labdUrl ? "ok" : "error",
message: config.labdUrl || "Not configured",
});
// Check client certificate
if (config.certPath && existsSync(config.certPath)) {
try {
const certPem = readFileSync(config.certPath, "utf-8");
const cert = new X509Certificate(certPem);
const expiresIn = new Date(cert.validTo).getTime() - Date.now();
const daysLeft = Math.floor(expiresIn / (1000 * 60 * 60 * 24));
results.push({
name: "Client certificate",
status: daysLeft > 7 ? "ok" : daysLeft > 0 ? "warn" : "error",
message: daysLeft > 0 ? `Valid for ${daysLeft} days` : "Expired!",
});
} catch {
results.push({
name: "Client certificate",
status: "error",
message: "Failed to parse certificate",
});
}
} else {
results.push({
name: "Client certificate",
status: "warn",
message: `Not configured — run 'labctl login'`,
});
}
// Check cert directory
results.push({
name: "Certificate directory",
status: existsSync(CERT_DIR) ? "ok" : "warn",
message: existsSync(CERT_DIR) ? CERT_DIR : "Not created yet",
});
// Test labd connectivity
try {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), 5000);
const resp = await fetch(`${config.labdUrl}/healthz`, {
signal: controller.signal,
});
clearTimeout(timeout);
const body = (await resp.json()) as { status?: string };
results.push({
name: "labd connectivity",
status: resp.ok ? "ok" : "warn",
message: resp.ok
? `Connected — ${body.status ?? "ok"}`
: `HTTP ${resp.status}: ${body.status ?? "unknown"}`,
});
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
results.push({
name: "labd connectivity",
status: "error",
message: msg.includes("abort")
? "Connection timed out (5s)"
: msg.includes("ECONNREFUSED")
? "Connection refused"
: msg,
});
}
// Output
if (opts.json) {
console.log(JSON.stringify(results, null, 2));
} else {
console.log("Running diagnostics...\n");
for (const r of results) {
const icon = r.status === "ok" ? "\u2713" : r.status === "warn" ? "!" : "\u2717";
const color = r.status === "ok" ? GREEN : r.status === "warn" ? YELLOW : RED;
console.log(`${color}${icon}${RESET} ${r.name}: ${r.message}`);
}
const errors = results.filter((r) => r.status === "error").length;
const warns = results.filter((r) => r.status === "warn").length;
const oks = results.filter((r) => r.status === "ok").length;
console.log(`\n${oks} passed, ${warns} warnings, ${errors} errors`);
if (errors > 0) process.exitCode = 1;
}
});
}

View File

@@ -0,0 +1,22 @@
// CLI command: provision forget
// Remove a machine from all bastion state via labd.
import type { Command } from "commander";
import { getLabdClient } from "../api/config.js";
export function registerForgetCommand(parent: Command): void {
parent
.command("forget <mac>")
.description("Remove a machine from bastion state")
.action(async (mac: string) => {
const normalizedMac = mac.toLowerCase().replace(/-/g, ":");
try {
const result = await getLabdClient().forgetMachine(normalizedMac);
console.log(JSON.stringify(result, null, 2));
} catch (err) {
console.error(`Failed: ${err instanceof Error ? err.message : String(err)}`);
process.exit(1);
}
});
}

View File

@@ -0,0 +1,69 @@
// CLI command: provision install
// Queue a discovered machine for OS installation via labd.
import { Command, Option } from "commander";
import { isValidOsId, SUPPORTED_OS, SUPPORTED_ROLES, ROLE_REGISTRY } from "@lab/shared";
import { getLabdClient } from "../api/config.js";
function roleTable(): string {
const lines: string[] = ["", "Available roles:"];
for (const r of ROLE_REGISTRY) {
const parent = r.parent ? ` (extends ${r.parent})` : "";
const apps = r.apps.length > 0 ? ` [auto: ${r.apps.join(", ")}]` : "";
lines.push(` ${r.name.padEnd(16)} ${r.description}${parent}${apps}`);
}
return lines.join("\n");
}
export function registerInstallCommand(parent: Command): void {
parent
.command("install <mac> <hostname>")
.description("Queue a discovered machine for OS installation")
.showHelpAfterError(true)
.addHelpText("after", roleTable())
.addOption(new Option("--role <role>", "Machine role (see below)").choices([...SUPPORTED_ROLES]).default("worker"))
.addOption(new Option("--os <os>", "Operating system").choices([...SUPPORTED_OS]).default("fedora-43"))
.option("--disk <device>", "Target disk device (auto-detect if omitted)")
.action(async (mac: string, hostname: string, opts: {
role: string;
os: string;
disk?: string;
}) => {
if (!isValidOsId(opts.os)) {
console.error(`Unknown OS: ${opts.os}. Supported: ${SUPPORTED_OS.join(", ")}`);
process.exit(1);
}
if (!(SUPPORTED_ROLES as readonly string[]).includes(opts.role)) {
console.error(`Unknown role: ${opts.role}`);
console.error(roleTable());
process.exit(1);
}
try {
const result = await getLabdClient().installMachine({
mac,
hostname,
role: opts.role,
os: opts.os,
...(opts.disk ? { disk: opts.disk } : {}),
});
console.log(JSON.stringify(result, null, 2));
console.log("");
const osLabel = opts.os.startsWith("ubuntu") ? "Ubuntu" : "Fedora";
console.log(`Power on the machine to start ${osLabel} installation.`);
const roleInfo = ROLE_REGISTRY.find(r => r.name === opts.role);
if (roleInfo?.k3s) {
console.log(`After install completes, k3s will be installed automatically (role=${opts.role}).`);
if (roleInfo.apps.length > 0) {
console.log(`Then: ${roleInfo.apps.join(", ")} will be deployed.`);
}
console.log(`To install k3s manually later: labctl app k3s install ${hostname}`);
}
} catch (err) {
console.error(`Failed: ${err instanceof Error ? err.message : String(err)}`);
process.exit(1);
}
});
}

View File

@@ -0,0 +1,298 @@
// CLI command: labctl app labcontroller deploy/status
// Deploy bastion + labd + CockroachDB to a k3s labcontroller node.
import { existsSync, writeFileSync, mkdirSync } from "node:fs";
import { homedir } from "node:os";
import { join } from "node:path";
import type { Command } from "commander";
import type { BastionState } from "@lab/shared";
import { sshExec } from "@lab/modules";
import { getLabdClient } from "../api/config.js";
function findSshKey(): string | undefined {
const sudoUser = process.env["SUDO_USER"];
const realHome = sudoUser ? join("/home", sudoUser) : homedir();
for (const name of ["id_ed25519", "id_ecdsa", "id_rsa"]) {
const p = join(realHome, ".ssh", name);
if (existsSync(p)) return p;
}
return undefined;
}
async function resolveIp(target: string): Promise<string> {
if (/^\d+\.\d+\.\d+\.\d+$/.test(target)) return target;
try {
const state = await getLabdClient().getMachines();
for (const [, info] of Object.entries(state.installed)) {
if (info.hostname === target || info.hostname.startsWith(target + ".")) {
return info.ip;
}
}
} catch { /* use target as-is */ }
return target;
}
export function registerLabcontrollerCommands(appCmd: Command): void {
const lcCmd = appCmd.command("labcontroller").description("Labcontroller deployment (bastion + labd + CockroachDB)");
lcCmd
.command("deploy <target>")
.description("Deploy labcontroller stack to a k3s node")
.option("--user <user>", "SSH user", "michal")
.option("--crdb-replicas <n>", "CockroachDB replicas", "1")
.action(async (target: string, opts: {
user: string;
crdbReplicas: string;
}) => {
const ip = await resolveIp(target);
const sshKey = findSshKey();
const sshOpts = sshKey ? { keyPath: sshKey } : {};
console.log(`Deploying labcontroller stack to ${target} (${ip})...\n`);
// 1. Fetch kubeconfig from target
console.log("[1/4] Fetching kubeconfig...");
const kcResult = await sshExec(ip, opts.user, "sudo cat /etc/rancher/k3s/k3s.yaml", { ...sshOpts, timeoutMs: 10_000 });
if (kcResult.exitCode !== 0) {
console.error(" Failed to fetch kubeconfig. Is k3s running?");
process.exit(1);
}
const kubeconfigDir = join(homedir(), ".kube");
mkdirSync(kubeconfigDir, { recursive: true });
const contextName = `lab-${target}`;
const kubeconfig = kcResult.stdout
.replace(/server:\s*https:\/\/127\.0\.0\.1:6443/, `server: https://${ip}:6443`)
.replace(/name:\s*default/g, `name: ${contextName}`)
.replace(/cluster:\s*default/g, `cluster: ${contextName}`)
.replace(/user:\s*default/g, `user: ${contextName}`);
const tmpPath = join(kubeconfigDir, `.lab-${target}-tmp`);
writeFileSync(tmpPath, kubeconfig, { mode: 0o600 });
const mainConfig = join(kubeconfigDir, "config");
const { spawnSync } = await import("node:child_process");
const mergeResult = spawnSync("kubectl", ["config", "view", "--flatten"], {
encoding: "utf-8",
stdio: ["pipe", "pipe", "pipe"],
env: { ...process.env, KUBECONFIG: `${mainConfig}:${tmpPath}` },
});
if (mergeResult.status === 0 && mergeResult.stdout) {
writeFileSync(mainConfig, mergeResult.stdout, { mode: 0o600 });
spawnSync("kubectl", ["config", "use-context", contextName], {
stdio: "pipe",
env: { ...process.env, KUBECONFIG: mainConfig },
});
console.log(` Merged into ~/.kube/config as context "${contextName}"`);
console.log(` Active context set to "${contextName}"`);
} else {
writeFileSync(join(kubeconfigDir, `lab-${target}`), kubeconfig, { mode: 0o600 });
console.log(` Saved to ~/.kube/lab-${target} (merge failed, use KUBECONFIG=~/.kube/lab-${target})`);
}
try { const { unlinkSync } = await import("node:fs"); unlinkSync(tmpPath); } catch { /* ignore */ }
console.log("");
// 2. Apply CockroachDB manifests
console.log("[2/4] Deploying CockroachDB...");
const { cockroachDbManifests } = await import("@lab/modules/dist/modules/labcontroller/src/cockroachdb.js");
const crdb = cockroachDbManifests({ replicas: parseInt(opts.crdbReplicas, 10) });
const manifests = [crdb.namespace, crdb.headlessService, crdb.clientService, crdb.statefulSet];
for (const manifest of manifests) {
const json = JSON.stringify(manifest);
const kind = (manifest as { kind?: string }).kind ?? "?";
const name = ((manifest as { metadata?: { name?: string } }).metadata)?.name ?? "?";
const result = await sshExec(ip, opts.user,
`echo '${json.replace(/'/g, "'\\''")}' | sudo k3s kubectl apply -f -`,
{ ...sshOpts, timeoutMs: 15_000 },
);
if (result.exitCode === 0) {
console.log(` applied ${kind}/${name}`);
} else {
console.error(` FAILED ${kind}/${name}: ${result.stderr.trim()}`);
}
}
console.log(" Waiting for CockroachDB pod...");
const waitResult = await sshExec(ip, opts.user,
"sudo k3s kubectl wait --for=condition=Ready pod -l app=cockroachdb -n lab-system --timeout=120s 2>/dev/null || echo 'still starting'",
{ ...sshOpts, timeoutMs: 130_000 },
);
console.log(` ${waitResult.stdout.trim()}`);
console.log(" Initializing CockroachDB cluster...");
const initJson = JSON.stringify(crdb.initJob);
await sshExec(ip, opts.user,
`echo '${initJson.replace(/'/g, "'\\''")}' | sudo k3s kubectl apply -f - 2>/dev/null; sudo k3s kubectl wait --for=condition=Complete job/cockroachdb-init -n lab-system --timeout=60s 2>/dev/null || echo 'init may already be done'`,
{ ...sshOpts, timeoutMs: 70_000 },
);
await sshExec(ip, opts.user,
"sudo k3s kubectl exec cockroachdb-0 -n lab-system -- /cockroach/cockroach sql --insecure -e 'CREATE DATABASE IF NOT EXISTS lab' 2>/dev/null || echo 'db may already exist'",
{ ...sshOpts, timeoutMs: 15_000 },
);
console.log(" CockroachDB ready\n");
// 3. Deploy labd
console.log("[3/4] Deploying labd...");
const { labdManifests } = await import("@lab/modules/dist/modules/labcontroller/src/labd.js");
const labd = labdManifests({ databaseUrl: crdb.connectionString });
for (const manifest of [labd.service, labd.deployment]) {
const json = JSON.stringify(manifest);
const kind = (manifest as { kind?: string }).kind ?? "?";
const name = ((manifest as { metadata?: { name?: string } }).metadata)?.name ?? "?";
const result = await sshExec(ip, opts.user,
`echo '${json.replace(/'/g, "'\\''")}' | sudo k3s kubectl apply -f -`,
{ ...sshOpts, timeoutMs: 15_000 },
);
console.log(` ${result.exitCode === 0 ? "applied" : "FAILED"} ${kind}/${name}`);
}
console.log("");
// 4. Deploy bastion
console.log("[4/4] Deploying bastion (hostNetwork)...");
const { bastionManifests } = await import("@lab/modules/dist/modules/labcontroller/src/bastion.js");
const bastion = bastionManifests();
const bJson = JSON.stringify(bastion.daemonSet);
const bResult = await sshExec(ip, opts.user,
`echo '${bJson.replace(/'/g, "'\\''")}' | sudo k3s kubectl apply -f -`,
{ ...sshOpts, timeoutMs: 15_000 },
);
console.log(` ${bResult.exitCode === 0 ? "applied" : "FAILED"} DaemonSet/bastion`);
// 5. Promote host role to labcontroller via labd
console.log("Promoting host role to labcontroller...");
try {
const state = await getLabdClient().getMachines();
for (const [mac, info] of Object.entries(state.installed)) {
if (info.ip === ip || info.hostname === target) {
await getLabdClient().updateRole(mac, "labcontroller");
console.log(` ${info.hostname}: infra -> labcontroller`);
break;
}
}
} catch {
console.log(" Could not update role (labd may not be running yet)");
}
console.log("\n=== Labcontroller deployed ===");
console.log(` CockroachDB: cockroachdb-client.lab-system:26257`);
console.log(` labd: ${ip}:30100`);
console.log(` bastion: ${ip}:8080 (hostNetwork)`);
console.log(` context: lab-${target}`);
console.log(`\n Switch context: kubectl ctx lab-${target}`);
console.log(` View pods: kubectl get pods -n lab-system`);
});
lcCmd
.command("status [target]")
.description("Check labcontroller deployment status (all hosts if no target)")
.option("--user <user>", "SSH user", "michal")
.action(async (target: string | undefined, opts: { user: string }) => {
const sshKey = findSshKey();
const sshOpts = sshKey ? { keyPath: sshKey } : {};
if (!target) {
let state: BastionState;
try {
state = await getLabdClient().getMachines();
} catch (err) {
console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`);
process.exit(1);
}
const entries = Object.entries(state.installed);
if (entries.length === 0) {
console.log("No installed machines.");
return;
}
const BOLD = "\x1b[1m";
const GREEN = "\x1b[32m";
const RED = "\x1b[31m";
const DIM = "\x1b[2m";
const RESET = "\x1b[0m";
const pad = (s: string, w: number) => s.padEnd(w);
console.log(
`${BOLD}${pad("HOST", 22)}${pad("IP", 16)}${pad("ROLE", 14)}${pad("CRDB", 12)}${pad("LABD", 12)}${pad("BASTION", 12)}${pad("NS", 8)}${RESET}`,
);
interface StatusRow {
host: string; ip: string; role: string;
crdb: string; labd: string; bastion: string; ns: string;
crdbC: string; labdC: string; bastionC: string;
}
const probes = entries.map(async ([_mac, info]): Promise<StatusRow> => {
const r: StatusRow = {
host: info.hostname, ip: info.ip, role: info.role ?? "?",
crdb: "—", labd: "—", bastion: "—", ns: "—",
crdbC: DIM, labdC: DIM, bastionC: DIM,
};
if (!info.ip) return r;
try {
const result = await sshExec(info.ip, opts.user,
"sudo k3s kubectl get pods -n lab-system --no-headers -o custom-columns='NAME:.metadata.name,STATUS:.status.phase' 2>/dev/null || echo 'NO_NS'",
{ ...sshOpts, timeoutMs: 10_000 },
);
if (result.stdout.includes("NO_NS") || result.exitCode !== 0) {
r.ns = "none";
return r;
}
r.ns = "ok";
const lines = result.stdout.trim().split("\n").filter(Boolean);
for (const line of lines) {
const [name, status] = line.trim().split(/\s+/);
if (!name) continue;
const running = status === "Running" || status === "Succeeded";
const color = running ? GREEN : RED;
const label = running ? "running" : (status ?? "?").toLowerCase();
if (name.startsWith("cockroachdb-") && !name.includes("init")) {
r.crdb = label; r.crdbC = color;
} else if (name.startsWith("labd-")) {
r.labd = label; r.labdC = color;
} else if (name.startsWith("bastion-")) {
r.bastion = label; r.bastionC = color;
}
}
} catch {
r.crdb = "ssh err"; r.crdbC = RED;
}
return r;
});
const results = await Promise.all(probes);
for (const r of results) {
console.log(
`${pad(r.host, 22)}${pad(r.ip, 16)}${pad(r.role, 14)}${r.crdbC}${pad(r.crdb, 12)}${RESET}${r.labdC}${pad(r.labd, 12)}${RESET}${r.bastionC}${pad(r.bastion, 12)}${RESET}${pad(r.ns, 8)}`,
);
}
return;
}
// Specific target: show detailed pod list
const ip = await resolveIp(target);
console.log(`Labcontroller status on ${target} (${ip}):\n`);
const result = await sshExec(ip, opts.user,
"sudo k3s kubectl get pods -n lab-system -o wide 2>/dev/null || echo 'lab-system namespace not found'",
{ ...sshOpts, timeoutMs: 10_000 },
);
console.log(result.stdout);
});
}

View File

@@ -0,0 +1,98 @@
// CLI command: provision list
// Merged view of all known machines with hardware + install info.
import type { Command } from "commander";
import type { BastionState } from "@lab/shared";
import { getLabdClient } from "../api/config.js";
const BOLD = "\x1b[1m";
const GREEN = "\x1b[0;32m";
const YELLOW = "\x1b[1;33m";
const CYAN = "\x1b[0;36m";
const RESET = "\x1b[0m";
function statusColor(status: string): string {
switch (status) {
case "installed": return GREEN;
case "queued":
case "installing": return YELLOW;
case "discovered": return CYAN;
default: return RESET;
}
}
export function registerListCommand(parent: Command): void {
parent
.command("list")
.description("List all known machines")
.action(async () => {
let state: BastionState;
try {
state = await getLabdClient().getMachines();
} catch (err) {
console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`);
process.exit(1);
}
// Collect all known MACs
const allMacs = new Set([
...Object.keys(state.discovered),
...Object.keys(state.install_queue),
...Object.keys(state.installed),
]);
console.log("");
if (allMacs.size === 0) {
console.log(" No machines known. PXE boot a machine to discover it.");
console.log("");
return;
}
console.log(
`${BOLD} ${"MAC".padEnd(20)} ${"HOSTNAME".padEnd(24)} ${"STATUS".padEnd(12)} ${"ROLE".padEnd(8)} ${"IP".padEnd(16)} ${"CPU".padEnd(24)} ${"CORES".padEnd(6)} ${"RAM".padEnd(6)} PRODUCT${RESET}`,
);
for (const mac of allMacs) {
const hw = state.discovered[mac];
const queued = state.install_queue[mac];
const inst = state.installed[mac];
// Determine status
let status = "discovered";
if (queued !== undefined) {
status = queued.progress !== undefined && queued.progress !== "" && queued.progress !== "waiting"
? "installing"
: "queued";
}
if (inst !== undefined) status = "installed";
const hostname = inst?.hostname ?? queued?.hostname ?? "-";
const role = inst?.role ?? queued?.role ?? "-";
const ip = inst?.ip ?? "-";
const cpu = hw?.cpu_model ?? "-";
const cores = hw?.cpu_cores != null ? String(hw.cpu_cores) : "-";
const ram = hw?.memory_gb != null ? `${hw.memory_gb}GB` : "-";
const product = hw?.product ?? "-";
const color = statusColor(status);
console.log(
` ${mac.padEnd(20)} ${hostname.padEnd(24)} ${color}${status.padEnd(12)}${RESET} ${role.padEnd(8)} ${ip.padEnd(16)} ${cpu.substring(0, 23).padEnd(24)} ${cores.padEnd(6)} ${ram.padEnd(6)} ${product}`,
);
}
// Show install queue details if any
const queueEntries = Object.entries(state.install_queue);
if (queueEntries.length > 0) {
console.log("");
console.log(`${BOLD}PENDING${RESET}`);
for (const [mac, cfg] of queueEntries) {
const progress = cfg.progress ?? "waiting";
const detail = cfg.progress_detail ?? "";
console.log(` ${mac} ${progress}${detail ? ` - ${detail}` : ""}`);
}
}
console.log("");
});
}

View File

@@ -0,0 +1,120 @@
// labctl login — authenticate with labd and obtain client certificate.
import { generateKeyPairSync } from "node:crypto";
import { writeFileSync, existsSync, mkdirSync, readFileSync } from "node:fs";
import { createInterface } from "node:readline";
import type { Command } from "commander";
import { loadConfig, saveConfig, CERT_DIR } from "../config/index.js";
import { join } from "node:path";
export function registerLoginCommand(program: Command): void {
program
.command("login")
.description("Authenticate with labd and obtain client certificate")
.option("--server <url>", "labd server URL")
.action(async (options: { server?: string }) => {
if (!existsSync(CERT_DIR)) {
mkdirSync(CERT_DIR, { recursive: true, mode: 0o700 });
}
const config = loadConfig();
const serverUrl = options.server ?? config.labdUrl;
const keyPath = join(CERT_DIR, "client.key");
const certPath = join(CERT_DIR, "client.crt");
const caPath = join(CERT_DIR, "ca.crt");
// 1. Generate keypair if not exists
if (!existsSync(keyPath)) {
console.log("Generating client keypair...");
const { privateKey } = generateKeyPairSync("ec", {
namedCurve: "P-256",
privateKeyEncoding: { type: "pkcs8", format: "pem" },
publicKeyEncoding: { type: "spki", format: "pem" },
});
writeFileSync(keyPath, privateKey, { mode: 0o600 });
console.log(`Private key saved to ${keyPath}`);
} else {
console.log(`Using existing keypair at ${keyPath}`);
}
// 2. Read public key for CSR (simplified — send public key, labd signs)
const publicKey = readFileSync(keyPath, "utf-8");
// 3. Prompt for token
const token = await promptPassword("Enter join token: ");
if (!token) {
console.error("Token is required.");
process.exit(1);
}
// 4. Submit enrollment request
console.log(`Authenticating with ${serverUrl}...`);
try {
const resp = await fetch(`${serverUrl}/api/auth/user-enroll`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
token,
hostname: `cli-${process.env["USER"] ?? "unknown"}`,
csr: publicKey,
}),
});
if (!resp.ok) {
const body = (await resp.json().catch(() => ({}))) as Record<string, string>;
console.error(`Login failed: ${body["error"] ?? resp.statusText}`);
process.exit(1);
}
const result = (await resp.json()) as {
certificatePem?: string | null;
caPem?: string | null;
status: string;
};
if (result.certificatePem) {
writeFileSync(certPath, result.certificatePem, { mode: 0o600 });
console.log(`Client certificate saved to ${certPath}`);
}
if (result.caPem) {
writeFileSync(caPath, result.caPem, { mode: 0o644 });
console.log(`CA certificate saved to ${caPath}`);
}
// 5. Update config
saveConfig({
...config,
labdUrl: serverUrl,
certPath,
keyPath,
...(existsSync(caPath) ? { caPath } : {}),
});
console.log(`\nLogin successful! Configuration updated.`);
console.log(`Server: ${serverUrl}`);
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
if (message.includes("ECONNREFUSED") || message.includes("fetch")) {
console.error(`Cannot connect to labd at ${serverUrl}`);
console.error("Check that labd is running and the URL is correct.");
} else {
console.error(`Login failed: ${message}`);
}
process.exit(1);
}
});
}
function promptPassword(message: string): Promise<string> {
return new Promise((resolve) => {
const rl = createInterface({
input: process.stdin,
output: process.stdout,
});
rl.question(message, (answer) => {
rl.close();
resolve(answer.trim());
});
});
}

View File

@@ -0,0 +1,85 @@
// CLI command: provision logs
// Show provisioning logs for a machine via labd.
import type { Command } from "commander";
import { getLabdClient } from "../api/config.js";
/** Resolve a target (hostname, MAC, IP) to a MAC address. */
async function resolveToMac(target: string): Promise<string> {
const normalized = target.toLowerCase().replace(/-/g, ":");
// Looks like a MAC already
if (/^([0-9a-f]{2}:){5}[0-9a-f]{2}$/.test(normalized)) {
return normalized;
}
// Resolve from labd aggregated state
try {
const state = await getLabdClient().getMachines();
for (const [mac, info] of Object.entries(state.installed)) {
if (info.hostname === target || info.hostname.startsWith(target + ".") || info.ip === target) {
return mac;
}
}
for (const [mac, info] of Object.entries(state.install_queue)) {
if (info.hostname === target || info.hostname.startsWith(target + ".")) {
return mac;
}
}
for (const mac of Object.keys(state.discovered)) {
if (mac === normalized) return mac;
}
} catch { /* can't reach labd */ }
return normalized;
}
export function registerLogsCommand(parent: Command): void {
parent
.command("logs <target>")
.description("Show provisioning logs for a machine (hostname, MAC, or IP)")
.action(async (target: string) => {
const mac = await resolveToMac(target);
try {
const data = await getLabdClient().getMachineLogs(mac);
const BOLD = "\x1b[1m";
const GREEN = "\x1b[32m";
const YELLOW = "\x1b[33m";
const RED = "\x1b[31m";
const DIM = "\x1b[2m";
const RESET = "\x1b[0m";
console.log(`${BOLD}${data["hostname"]}${RESET} (${mac})`);
console.log(` Status: ${data["status"] === "installed" ? GREEN : YELLOW}${data["status"]}${RESET}`);
console.log(` Role: ${data["role"]}`);
if (data["os"]) console.log(` OS: ${data["os"]}`);
if (data["ip"]) console.log(` IP: ${data["ip"]}`);
console.log("");
const log = data["log"] as Array<{ stage: string; detail: string; timestamp: string }> | undefined;
if (log && log.length > 0) {
console.log(`${BOLD} Log:${RESET}`);
for (const entry of log) {
const time = entry.timestamp.slice(11, 19);
const color = entry.stage === "complete" ? GREEN : entry.stage === "error" ? RED : YELLOW;
const detail = entry.detail ? ` ${DIM}-- ${entry.detail}${RESET}` : "";
console.log(` ${DIM}${time}${RESET} ${color}${entry.stage}${RESET}${detail}`);
}
} else {
console.log(` ${DIM}No progress events yet (queued, waiting for PXE boot)${RESET}`);
}
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
if (msg.includes("404") || msg.includes("not found")) {
console.error(`Machine not found: ${target}`);
console.error("Run 'labctl provision list' to see available machines.");
} else {
console.error(`Cannot reach labd: ${msg}`);
}
process.exit(1);
}
});
}

View File

@@ -0,0 +1,114 @@
// CLI command: provision makeiso
// Generate/serve a UEFI-bootable iPXE ISO for machines that don't support PXE boot.
// Queries labd for connected bastions and provides the download URL.
import { readFileSync, writeFileSync, existsSync } from "node:fs";
import { createInterface } from "node:readline";
import { Command, Option } from "commander";
import { getLabdClient } from "../api/config.js";
import { buildBootIso } from "@lab/bastion/iso-builder";
function prompt(question: string): Promise<string> {
const rl = createInterface({ input: process.stdin, output: process.stdout });
return new Promise((resolve) => {
rl.question(question, (answer) => {
rl.close();
resolve(answer.trim());
});
});
}
const IPXE_PATHS: Record<string, { src: string; dest: string }> = {
x86_64: { src: "/usr/share/ipxe/ipxe-snponly-x86_64.efi", dest: "EFI/BOOT/BOOTX64.EFI" },
aarch64: { src: "/usr/share/ipxe/arm64-efi/snponly.efi", dest: "EFI/BOOT/BOOTAA64.EFI" },
};
async function selectBastion(): Promise<{ hostname: string; serverIp: string; httpPort: number }> {
const bastions = await getLabdClient().getBastions();
const online = bastions.filter(b => b.status === "online");
if (online.length === 0) {
console.error("No bastions online. Start a bastion first.");
process.exit(1);
}
if (online.length === 1) {
const b = online[0]!;
console.log(`Using bastion: ${b.hostname} (${b.serverIp})`);
return { hostname: b.hostname, serverIp: b.serverIp, httpPort: 8080 };
}
console.log("Available bastions:\n");
for (let i = 0; i < online.length; i++) {
const b = online[i]!;
console.log(` ${i + 1}) ${b.hostname} ${b.serverIp} (${b.network})`);
}
console.log("");
const answer = await prompt(`Select bastion [1-${online.length}]: `);
const idx = parseInt(answer, 10) - 1;
if (isNaN(idx) || idx < 0 || idx >= online.length) {
console.error("Invalid selection.");
process.exit(1);
}
const selected = online[idx]!;
return { hostname: selected.hostname, serverIp: selected.serverIp, httpPort: 8080 };
}
export function registerMakeIsoCommand(parent: Command): void {
parent
.command("makeiso")
.description("Generate a UEFI-bootable iPXE ISO for network provisioning")
.addOption(
new Option("--arch <arch...>", "Target architecture(s)")
.choices(["x86_64", "aarch64"])
.default(["x86_64", "aarch64"]),
)
.option("--local", "Build ISO locally instead of using bastion-hosted URL")
.option("--out <path>", "Output path for local ISO build", "ipxe-bastion.iso")
.action(async (opts: { arch: string[]; local?: boolean; out: string }) => {
const bastion = await selectBastion();
const bastionUrl = `http://${bastion.serverIp}:${bastion.httpPort}`;
if (opts.local) {
console.log(`\nGenerating iPXE boot ISO...`);
console.log(` Architectures: ${opts.arch.join(", ")}`);
console.log(` Bastion: ${bastionUrl}`);
const efiFiles: Array<{ path: string; data: Buffer }> = [];
for (const arch of opts.arch) {
const paths = IPXE_PATHS[arch];
if (!paths) {
console.error(`Unknown architecture: ${arch}`);
process.exit(1);
}
if (!existsSync(paths.src)) {
console.error(`iPXE binary not found: ${paths.src}`);
console.error(`Install: sudo dnf install ipxe-bootimgs-${arch === "aarch64" ? "aarch64" : "x86"}`);
process.exit(1);
}
efiFiles.push({ path: paths.dest, data: readFileSync(paths.src) });
console.log(` ${arch}: ${paths.dest.split("/").pop()}`);
}
const script = [
"#!ipxe",
"",
"echo Booting from iPXE ISO -- connecting to bastion...",
"dhcp || ( echo DHCP failed, retrying... && sleep 3 && dhcp )",
`chain ${bastionUrl}/boot.ipxe || shell`,
].join("\n");
const iso = buildBootIso(efiFiles, script);
writeFileSync(opts.out, iso);
console.log(`\nISO written to: ${opts.out} (${(iso.length / 1024 / 1024).toFixed(1)}MB)`);
} else {
console.log(`\nThe bastion serves a boot ISO with the correct URL embedded.`);
console.log(`Use this URL in JetKVM or any BMC virtual media:\n`);
console.log(` ${bastionUrl}/boot.iso`);
}
console.log(`\nMount as virtual CD, boot from it. iPXE will chainload from bastion.`);
});
}

View File

@@ -0,0 +1,161 @@
// CLI command: provision reprovision
// Queue a machine for reinstall and attempt SSH reboot into PXE via labd.
import { execFileSync } from "node:child_process";
import { existsSync } from "node:fs";
import { homedir } from "node:os";
import { join } from "node:path";
import { Command, Option } from "commander";
import type { BastionState } from "@lab/shared";
import { isValidOsId, SUPPORTED_OS, SUPPORTED_ROLES, ROLE_REGISTRY } from "@lab/shared";
import { getLabdClient } from "../api/config.js";
function roleTable(): string {
const lines: string[] = ["", "Available roles:"];
for (const r of ROLE_REGISTRY) {
const parent = r.parent ? ` (extends ${r.parent})` : "";
const apps = r.apps.length > 0 ? ` [auto: ${r.apps.join(", ")}]` : "";
lines.push(` ${r.name.padEnd(16)} ${r.description}${parent}${apps}`);
}
return lines.join("\n");
}
/** Resolve a target (hostname, MAC, or IP) to {mac, hostname, ip} from state. */
function resolveTarget(
target: string,
state: BastionState,
): { mac: string; hostname: string; ip: string } | null {
const normalized = target.toLowerCase().replace(/-/g, ":");
if (state.installed[normalized]) {
const info = state.installed[normalized];
return { mac: normalized, hostname: info.hostname, ip: info.ip };
}
if (state.discovered[normalized]) {
return { mac: normalized, hostname: normalized, ip: "" };
}
for (const [mac, info] of Object.entries(state.installed)) {
if (info.hostname === target || info.hostname.startsWith(target + ".")) {
return { mac, hostname: info.hostname, ip: info.ip };
}
}
for (const [mac, info] of Object.entries(state.installed)) {
if (info.ip === target) {
return { mac, hostname: info.hostname, ip: info.ip };
}
}
return null;
}
export function registerReprovisionCommand(parent: Command): void {
parent
.command("reprovision <target> [hostname]")
.description("Queue install + SSH reboot into PXE (target: hostname, MAC, or IP)")
.showHelpAfterError(true)
.addHelpText("after", roleTable())
.addOption(new Option("--role <role>", "Machine role (see below)").choices([...SUPPORTED_ROLES]).default("worker"))
.addOption(new Option("--os <os>", "Operating system").choices([...SUPPORTED_OS]).default("fedora-43"))
.option("--disk <device>", "Target disk device (auto-detect if omitted)")
.action(async (target: string, hostnameOverride: string | undefined, opts: {
role: string;
os: string;
disk?: string;
}) => {
if (!isValidOsId(opts.os)) {
console.error(`Unknown OS: ${opts.os}. Supported: ${SUPPORTED_OS.join(", ")}`);
process.exit(1);
}
if (!(SUPPORTED_ROLES as readonly string[]).includes(opts.role)) {
console.error(`Unknown role: ${opts.role}`);
console.error(roleTable());
process.exit(1);
}
const client = getLabdClient();
// Resolve target from labd aggregated state
let state: BastionState;
try {
state = await client.getMachines();
} catch (err) {
console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`);
process.exit(1);
}
const resolved = resolveTarget(target, state);
if (!resolved) {
console.error(`Cannot find machine: ${target}`);
console.error("Provide a hostname, MAC, or IP of a known machine.");
console.error("Run 'labctl provision list' to see available machines.");
process.exit(1);
}
const mac = resolved.mac;
const hostname = hostnameOverride ?? resolved.hostname;
const ip = resolved.ip;
console.log(`Reprovisioning ${hostname} (${mac})${ip ? ` at ${ip}` : ""}...`);
console.log(` Role: ${opts.role} OS: ${opts.os}`);
console.log("");
// Queue the install via labd
try {
const result = await client.installMachine({
mac,
hostname,
role: opts.role,
os: opts.os,
...(opts.disk ? { disk: opts.disk } : {}),
});
console.log(JSON.stringify(result, null, 2));
} catch (err) {
console.error(`Failed to queue install: ${err instanceof Error ? err.message : String(err)}`);
process.exit(1);
}
// Try SSH reboot into PXE
if (ip === "") {
console.log("\nNo IP known. Reboot the machine manually into PXE.");
return;
}
const adminUser = process.env["SUDO_USER"] ?? process.env["USER"] ?? "";
const effectiveUser = adminUser === "root" ? "" : adminUser;
if (effectiveUser === "") {
console.log("\nReboot the machine manually into PXE.");
return;
}
console.log(`\nAttempting SSH reboot into PXE (${effectiveUser}@${ip})...`);
const sudoUser = process.env["SUDO_USER"];
const realHome = sudoUser !== undefined ? join("/home", sudoUser) : homedir();
const keyPaths = [
join(realHome, ".ssh", "id_ed25519"),
join(realHome, ".ssh", "id_rsa"),
join(realHome, ".ssh", "id_ecdsa"),
];
const sshKey = keyPaths.find(k => existsSync(k));
const sshArgs = [
"-o", "StrictHostKeyChecking=no",
"-o", "ConnectTimeout=10",
...(sshKey !== undefined ? ["-i", sshKey] : []),
`${effectiveUser}@${ip}`,
'PXE_ENTRY=$(sudo efibootmgr | grep -iE "pxe|network|ipv4" | head -1 | grep -oP "Boot\\K[0-9A-F]+"); if [ -n "$PXE_ENTRY" ]; then sudo efibootmgr --bootnext "$PXE_ENTRY" && echo "PXE set as next boot" && sudo reboot; else echo "No PXE boot entry found, rebooting anyway..." && sudo reboot; fi',
];
try {
execFileSync("ssh", sshArgs, { stdio: "inherit" });
} catch {
// SSH connection closing during reboot is expected
}
console.log("");
console.log("Machine is rebooting into PXE. Install will start automatically.");
});
}

View File

@@ -0,0 +1,145 @@
// CLI command: init bastion standalone start
// Start the bastion server (HTTP + dnsmasq), daemonized by default.
import { spawn, type ChildProcess } from "node:child_process";
import { existsSync, readFileSync, openSync, mkdirSync } from "node:fs";
import type { Command } from "commander";
import { startBastion } from "@lab/bastion";
export function registerStartCommand(parent: Command): void {
parent
.command("start")
.description("Start the bastion server (HTTP + dnsmasq PXE)")
.option("--port <port>", "HTTP port", "8080")
.option("--dir <dir>", "Bastion data directory", "/tmp/lab-bastion")
.option("--domain <domain>", "Internal domain for hostnames", "ad.itaz.eu")
.option("--dhcp-mode <mode>", "DHCP mode: proxy or full", "proxy")
.option("--fedora <version>", "Fedora version", "43")
.option("--arch <arch>", "Architecture", "x86_64")
.option("--timezone <tz>", "Timezone", "Europe/London")
.option("--locale <locale>", "Locale", "en_GB.UTF-8")
.option("--skip-dnsmasq", "Skip starting dnsmasq (for testing)")
.option("--skip-artifacts", "Skip downloading boot artifacts (for testing)")
.option("--foreground", "Run in foreground (default: daemonize)")
.action(async (opts: {
port: string;
dir: string;
domain: string;
dhcpMode: string;
fedora: string;
arch: string;
timezone: string;
locale: string;
skipDnsmasq?: boolean;
skipArtifacts?: boolean;
foreground?: boolean;
}) => {
// Check root early (before daemonize) so the error is visible
if (!opts.skipDnsmasq && process.getuid?.() !== 0) {
console.error("Must run as root (dnsmasq needs DHCP/TFTP ports).");
console.error("Usage: sudo labctl init bastion standalone start");
process.exit(1);
}
if (opts.foreground === true) {
// Run in foreground
await startBastion({
httpPort: parseInt(opts.port, 10),
bastionDir: opts.dir,
domain: opts.domain,
dhcpMode: opts.dhcpMode as "proxy" | "full",
fedoraVersion: opts.fedora,
arch: opts.arch,
timezone: opts.timezone,
locale: opts.locale,
skipDnsmasq: opts.skipDnsmasq,
skipArtifacts: opts.skipArtifacts,
});
return;
}
// Daemonize: re-run with --foreground, redirect output to log file
mkdirSync(opts.dir, { recursive: true });
const logFile = `${opts.dir}/bastion.log`;
// Build explicit argument list instead of re-using process.argv
// (which breaks with bun-compiled binaries)
const fgArgs = [
"init", "bastion", "standalone", "start", "--foreground",
"--port", opts.port,
"--dir", opts.dir,
"--domain", opts.domain,
"--dhcp-mode", opts.dhcpMode,
"--fedora", opts.fedora,
"--arch", opts.arch,
"--timezone", opts.timezone,
"--locale", opts.locale,
];
if (opts.skipDnsmasq) fgArgs.push("--skip-dnsmasq");
if (opts.skipArtifacts) fgArgs.push("--skip-artifacts");
// Determine how to re-invoke ourselves
const execPath = process.argv[0] ?? "labctl";
let spawnCmd: string;
let spawnArgs: string[];
if (execPath.includes("node") || execPath.includes("tsx")) {
const scriptPath = process.argv[1];
spawnCmd = execPath;
spawnArgs = scriptPath ? [scriptPath, ...fgArgs] : fgArgs;
} else {
spawnCmd = execPath;
spawnArgs = fgArgs;
}
// Open log file for the child's stdout/stderr so it survives parent exit
const logFd = openSync(logFile, "a");
const child: ChildProcess = spawn(spawnCmd, spawnArgs, {
detached: true,
stdio: ["ignore", logFd, logFd],
});
// Wait briefly for the child to start, then check it's alive
await new Promise((resolve) => setTimeout(resolve, 3000));
// Check if child is still running
try {
process.kill(child.pid!, 0); // signal 0 = check existence
} catch {
// Child already died — show the log
console.error("Bastion failed to start. Log output:");
console.error("");
try {
const log = readFileSync(logFile, "utf-8");
const lines = log.trim().split("\n").slice(-20);
for (const line of lines) {
console.error(" " + line);
}
} catch {
console.error(" (no log output)");
}
process.exit(1);
}
child.unref();
// Print startup info from the log
try {
const log = readFileSync(logFile, "utf-8");
process.stdout.write(log);
} catch {
// No log yet
}
const pidFile = `${opts.dir}/bastion.pid`;
const pid = existsSync(pidFile)
? readFileSync(pidFile, "utf-8").trim()
: String(child.pid);
console.log("");
console.log(`Bastion running in background (PID ${pid})`);
console.log(`Log: ${logFile}`);
process.exit(0);
});
}

View File

@@ -0,0 +1,42 @@
// CLI command: init bastion standalone status
// Show connected bastions and their machine counts via labd.
import type { Command } from "commander";
import { getLabdClient } from "../api/config.js";
const BOLD = "\x1b[1m";
const GREEN = "\x1b[32m";
const RED = "\x1b[31m";
const DIM = "\x1b[2m";
const RESET = "\x1b[0m";
export function registerStatusCommand(parent: Command): void {
parent
.command("status")
.description("Show bastion server status")
.action(async () => {
try {
const bastions = await getLabdClient().getBastions();
if (bastions.length === 0) {
console.log("No bastions registered.");
return;
}
const pad = (s: string, w: number) => s.padEnd(w);
console.log(
`${BOLD}${pad("HOSTNAME", 24)}${pad("NETWORK", 18)}${pad("IP", 18)}${pad("STATUS", 10)}${pad("MACHINES", 10)}${RESET}`,
);
for (const b of bastions) {
const statusColor = b.status === "online" ? GREEN : RED;
console.log(
`${pad(b.hostname, 24)}${DIM}${pad(b.network, 18)}${RESET}${pad(b.serverIp, 18)}${statusColor}${pad(b.status, 10)}${RESET}${pad(String(b.machineCount), 10)}`,
);
}
} catch (err) {
console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`);
process.exit(1);
}
});
}

Some files were not shown because too many files have changed in this diff Show More