fix: PXE boot debugging — bisect root cause, syslog logging, serial console #3
12
.env.example
Normal file
12
.env.example
Normal file
@@ -0,0 +1,12 @@
|
||||
# API Keys (Required to enable respective provider)
|
||||
ANTHROPIC_API_KEY="your_anthropic_api_key_here" # Required: Format: sk-ant-api03-...
|
||||
PERPLEXITY_API_KEY="your_perplexity_api_key_here" # Optional: Format: pplx-...
|
||||
OPENAI_API_KEY="your_openai_api_key_here" # Optional, for OpenAI models. Format: sk-proj-...
|
||||
GOOGLE_API_KEY="your_google_api_key_here" # Optional, for Google Gemini models.
|
||||
MISTRAL_API_KEY="your_mistral_key_here" # Optional, for Mistral AI models.
|
||||
XAI_API_KEY="YOUR_XAI_KEY_HERE" # Optional, for xAI AI models.
|
||||
GROQ_API_KEY="YOUR_GROQ_KEY_HERE" # Optional, for Groq models.
|
||||
OPENROUTER_API_KEY="YOUR_OPENROUTER_KEY_HERE" # Optional, for OpenRouter models.
|
||||
AZURE_OPENAI_API_KEY="your_azure_key_here" # Optional, for Azure OpenAI models (requires endpoint in .taskmaster/config.json).
|
||||
OLLAMA_API_KEY="your_ollama_api_key_here" # Optional: For remote Ollama servers that require authentication.
|
||||
GITHUB_API_KEY="your_github_api_key_here" # Optional: For GitHub import/export features. Format: ghp_... or github_pat_...
|
||||
263
.gitea/workflows/ci.yml
Normal file
263
.gitea/workflows/ci.yml
Normal file
@@ -0,0 +1,263 @@
|
||||
name: CI/CD
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
pull_request:
|
||||
branches: [main]
|
||||
|
||||
env:
|
||||
GITEA_REGISTRY: 10.0.0.194:3012
|
||||
GITEA_PUBLIC_URL: https://mysources.co.uk
|
||||
GITEA_OWNER: michal
|
||||
|
||||
# ============================================================
|
||||
# Required Gitea secrets:
|
||||
# PACKAGES_TOKEN -- Gitea API token (packages + registry)
|
||||
# ============================================================
|
||||
|
||||
jobs:
|
||||
# -- CI checks (run in parallel on every push/PR) ----------
|
||||
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
working-directory: bastion
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: pnpm/action-setup@v4
|
||||
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 22
|
||||
|
||||
- run: pnpm install --frozen-lockfile
|
||||
|
||||
- name: Lint
|
||||
run: pnpm lint || echo "::warning::Lint has errors -- not blocking CI yet"
|
||||
|
||||
typecheck:
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
working-directory: bastion
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: pnpm/action-setup@v4
|
||||
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 22
|
||||
|
||||
- run: pnpm install --frozen-lockfile
|
||||
|
||||
- name: Typecheck
|
||||
run: pnpm typecheck
|
||||
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
working-directory: bastion
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: pnpm/action-setup@v4
|
||||
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 22
|
||||
|
||||
- run: pnpm install --frozen-lockfile
|
||||
|
||||
- name: Build (needed by completions check)
|
||||
run: pnpm build
|
||||
|
||||
- name: Run tests
|
||||
run: pnpm test:run
|
||||
|
||||
# -- Build & package (both architectures) -------------------
|
||||
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
needs: [lint, typecheck, test]
|
||||
defaults:
|
||||
run:
|
||||
working-directory: bastion
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: pnpm/action-setup@v4
|
||||
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 22
|
||||
|
||||
- name: Install dependencies
|
||||
run: pnpm install --frozen-lockfile
|
||||
|
||||
- name: Build all packages
|
||||
run: pnpm build
|
||||
|
||||
- name: Generate shell completions
|
||||
run: pnpm completions:generate
|
||||
|
||||
- uses: oven-sh/setup-bun@v2
|
||||
|
||||
- name: Install nfpm
|
||||
run: |
|
||||
curl -sL -o /tmp/nfpm.tar.gz "https://github.com/goreleaser/nfpm/releases/download/v2.45.0/nfpm_2.45.0_Linux_x86_64.tar.gz"
|
||||
tar xzf /tmp/nfpm.tar.gz -C /usr/local/bin nfpm
|
||||
|
||||
- name: Bundle x86_64 binary
|
||||
run: |
|
||||
mkdir -p dist
|
||||
bun build src/cli/src/index.ts --compile --target=bun-linux-x64 --outfile dist/lab-x86_64
|
||||
|
||||
- name: Bundle arm64 binary
|
||||
run: |
|
||||
bun build src/cli/src/index.ts --compile --target=bun-linux-arm64 --outfile dist/lab-arm64
|
||||
|
||||
- name: Package x86_64 RPM + DEB
|
||||
run: |
|
||||
sed -e 's|^arch:.*|arch: amd64|' -e 's|src: ./dist/lab$|src: ./dist/lab-x86_64|' nfpm.yaml > /tmp/nfpm-x86_64.yaml
|
||||
nfpm pkg --config /tmp/nfpm-x86_64.yaml --packager rpm --target dist/
|
||||
nfpm pkg --config /tmp/nfpm-x86_64.yaml --packager deb --target dist/
|
||||
|
||||
- name: Package arm64 RPM + DEB
|
||||
run: |
|
||||
sed -e 's|^arch:.*|arch: arm64|' -e 's|src: ./dist/lab$|src: ./dist/lab-arm64|' nfpm.yaml > /tmp/nfpm-arm64.yaml
|
||||
nfpm pkg --config /tmp/nfpm-arm64.yaml --packager rpm --target dist/
|
||||
nfpm pkg --config /tmp/nfpm-arm64.yaml --packager deb --target dist/
|
||||
|
||||
- name: Upload RPM artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: rpm-packages
|
||||
path: bastion/dist/lab-*.rpm
|
||||
retention-days: 7
|
||||
|
||||
- name: Upload DEB artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: deb-packages
|
||||
path: bastion/dist/lab*.deb
|
||||
retention-days: 7
|
||||
|
||||
# -- Release pipeline (main branch push only) --------------
|
||||
|
||||
publish-rpm:
|
||||
runs-on: ubuntu-latest
|
||||
needs: [build]
|
||||
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
|
||||
defaults:
|
||||
run:
|
||||
working-directory: bastion
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Download RPM artifacts
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: rpm-packages
|
||||
path: bastion/dist/
|
||||
|
||||
- name: Install rpm tools
|
||||
run: sudo apt-get update && sudo apt-get install -y rpm
|
||||
|
||||
- name: Publish RPMs to Gitea
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.PACKAGES_TOKEN }}
|
||||
GITEA_URL: http://${{ env.GITEA_REGISTRY }}
|
||||
GITEA_OWNER: ${{ env.GITEA_OWNER }}
|
||||
GITEA_REPO: lab
|
||||
run: |
|
||||
for RPM_FILE in dist/lab-*.rpm; do
|
||||
[ -f "$RPM_FILE" ] || continue
|
||||
RPM_VERSION=$(rpm -qp --queryformat '%{VERSION}-%{RELEASE}' "$RPM_FILE")
|
||||
RPM_ARCH=$(rpm -qp --queryformat '%{ARCH}' "$RPM_FILE")
|
||||
echo "Publishing $RPM_FILE (version $RPM_VERSION, arch $RPM_ARCH)..."
|
||||
|
||||
# Delete existing version if present
|
||||
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" \
|
||||
-H "Authorization: token ${GITEA_TOKEN}" \
|
||||
"${GITEA_URL}/api/v1/packages/${GITEA_OWNER}/rpm/lab/${RPM_VERSION}")
|
||||
|
||||
if [ "$HTTP_CODE" = "200" ]; then
|
||||
echo "Version exists, replacing..."
|
||||
curl -s -o /dev/null -X DELETE \
|
||||
-H "Authorization: token ${GITEA_TOKEN}" \
|
||||
"${GITEA_URL}/api/v1/packages/${GITEA_OWNER}/rpm/lab/${RPM_VERSION}"
|
||||
fi
|
||||
|
||||
# Upload
|
||||
curl --fail -X PUT \
|
||||
-H "Authorization: token ${GITEA_TOKEN}" \
|
||||
--upload-file "$RPM_FILE" \
|
||||
"${GITEA_URL}/api/packages/${GITEA_OWNER}/rpm/upload"
|
||||
|
||||
echo "Published $RPM_FILE successfully!"
|
||||
done
|
||||
|
||||
# Link package to repo
|
||||
source scripts/link-package.sh
|
||||
link_package "rpm" "lab"
|
||||
|
||||
publish-deb:
|
||||
runs-on: ubuntu-latest
|
||||
needs: [build]
|
||||
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
|
||||
defaults:
|
||||
run:
|
||||
working-directory: bastion
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Download DEB artifacts
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: deb-packages
|
||||
path: bastion/dist/
|
||||
|
||||
- name: Publish DEBs to Gitea
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.PACKAGES_TOKEN }}
|
||||
GITEA_URL: http://${{ env.GITEA_REGISTRY }}
|
||||
GITEA_OWNER: ${{ env.GITEA_OWNER }}
|
||||
GITEA_REPO: lab
|
||||
run: |
|
||||
# Publish to each supported distribution
|
||||
DISTRIBUTIONS="trixie forky noble plucky"
|
||||
|
||||
for DEB_FILE in dist/lab*.deb; do
|
||||
[ -f "$DEB_FILE" ] || continue
|
||||
DEB_VERSION=$(dpkg-deb --field "$DEB_FILE" Version)
|
||||
DEB_ARCH=$(dpkg-deb --field "$DEB_FILE" Architecture)
|
||||
echo "Publishing $DEB_FILE (version $DEB_VERSION, arch $DEB_ARCH)..."
|
||||
|
||||
for DIST in $DISTRIBUTIONS; do
|
||||
echo " -> $DIST..."
|
||||
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" \
|
||||
-X PUT \
|
||||
-H "Authorization: token ${GITEA_TOKEN}" \
|
||||
--upload-file "$DEB_FILE" \
|
||||
"${GITEA_URL}/api/packages/${GITEA_OWNER}/debian/pool/${DIST}/main/upload")
|
||||
|
||||
if [ "$HTTP_CODE" = "201" ] || [ "$HTTP_CODE" = "200" ]; then
|
||||
echo " Published to $DIST"
|
||||
elif [ "$HTTP_CODE" = "409" ]; then
|
||||
echo " Already exists in $DIST (skipping)"
|
||||
else
|
||||
echo " WARNING: Upload to $DIST returned HTTP $HTTP_CODE"
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
echo "Published successfully!"
|
||||
|
||||
# Link package to repo
|
||||
source scripts/link-package.sh
|
||||
link_package "debian" "lab"
|
||||
25
.gitignore
vendored
Normal file
25
.gitignore
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
# Logs
|
||||
logs
|
||||
*.log
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
dev-debug.log
|
||||
|
||||
# Dependency directories
|
||||
node_modules/
|
||||
|
||||
# Environment variables
|
||||
.env
|
||||
|
||||
# Editor directories and files
|
||||
.idea
|
||||
.vscode
|
||||
*.suo
|
||||
*.ntvs*
|
||||
*.njsproj
|
||||
*.sln
|
||||
*.sw?
|
||||
|
||||
# OS specific
|
||||
.DS_Store
|
||||
12
.mcp.json
Normal file
12
.mcp.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"mcpServers": {
|
||||
"labctl": {
|
||||
"command": "mcpctl",
|
||||
"args": [
|
||||
"mcp",
|
||||
"-p",
|
||||
"labctl"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
1
.taskmaster/.env
Normal file
1
.taskmaster/.env
Normal file
@@ -0,0 +1 @@
|
||||
PERPLEXITY_API_KEY=dummy
|
||||
44
.taskmaster/config.json
Normal file
44
.taskmaster/config.json
Normal file
@@ -0,0 +1,44 @@
|
||||
{
|
||||
"models": {
|
||||
"main": {
|
||||
"provider": "claude-code",
|
||||
"modelId": "opus",
|
||||
"maxTokens": 32000,
|
||||
"temperature": 0.2
|
||||
},
|
||||
"research": {
|
||||
"provider": "claude-code",
|
||||
"modelId": "opus",
|
||||
"maxTokens": 32000,
|
||||
"temperature": 0.2
|
||||
},
|
||||
"fallback": {
|
||||
"provider": "claude-code",
|
||||
"modelId": "sonnet",
|
||||
"maxTokens": 64000,
|
||||
"temperature": 0.2
|
||||
}
|
||||
},
|
||||
"global": {
|
||||
"logLevel": "info",
|
||||
"debug": false,
|
||||
"defaultNumTasks": 10,
|
||||
"defaultSubtasks": 5,
|
||||
"defaultPriority": "medium",
|
||||
"projectName": "Task Master",
|
||||
"ollamaBaseURL": "http://localhost:11434/api",
|
||||
"bedrockBaseURL": "https://bedrock.us-east-1.amazonaws.com",
|
||||
"responseLanguage": "English",
|
||||
"enableCodebaseAnalysis": true,
|
||||
"enableProxy": false,
|
||||
"anonymousTelemetry": true,
|
||||
"userId": "1234567890"
|
||||
},
|
||||
"claudeCode": {},
|
||||
"codexCli": {},
|
||||
"grokCli": {
|
||||
"timeout": 120000,
|
||||
"workingDirectory": null,
|
||||
"defaultModel": "grok-4-latest"
|
||||
}
|
||||
}
|
||||
452
.taskmaster/docs/prd.md
Normal file
452
.taskmaster/docs/prd.md
Normal file
@@ -0,0 +1,452 @@
|
||||
# labctl — Infrastructure Management Platform
|
||||
|
||||
## Product Requirements Document
|
||||
|
||||
## 1. Overview
|
||||
|
||||
labctl is a unified infrastructure management platform for bare-metal servers, Kubernetes clusters, and cloud resources. It replaces Puppet with a modern, TypeScript-native system using Pulumi for infrastructure as code.
|
||||
|
||||
### 1.1 Core Principles
|
||||
- **Single CLI** (`labctl`) for all infrastructure operations
|
||||
- **mTLS everywhere** — built-in Certificate Authority, no SSH key management
|
||||
- **RBAC from day one** — deny by default, audit everything
|
||||
- **Multi-cloud** — bare metal now, AWS later, extensible to any cloud
|
||||
- **Test infrastructure like code** — ephemeral environments, smoke tests, security tests
|
||||
- **Pulumi over Helm** — TypeScript charts, typed, testable, no YAML templating
|
||||
|
||||
### 1.2 Current State (completed)
|
||||
- PXE bastion for bare-metal provisioning (discover, install, reprovision)
|
||||
- CLI with subcommands: `labctl init bastion`, `labctl provision`
|
||||
- LVM partitioning with reprovision data preservation (/home, /srv, /var/lib/longhorn, /var/lib/rancher)
|
||||
- Worker role (k3s agent + Longhorn) and infra role (k3s server + etcd)
|
||||
- 32 unit tests, VM smoke tests verified on real hardware
|
||||
- Multi-arch builds (x86_64 + arm64), RPM/DEB packaging, Gitea CI/CD
|
||||
- labd scaffold with CockroachDB Prisma schema (Server, Agent, User, Role, Permission, AuditLog, JoinToken, Cluster, PulumiRun)
|
||||
|
||||
### 1.3 Hardware
|
||||
- labmaster (puppet.ad.itaz.eu / 78:55:36:08:35:14): MinisForum SER9, AMD Ryzen 7 255, 16 cores, 27GB RAM, 1TB NVMe, infra role
|
||||
- Future: additional bare-metal worker nodes, AWS EC2 instances
|
||||
|
||||
## 2. Architecture
|
||||
|
||||
### 2.1 Components
|
||||
|
||||
```
|
||||
labctl CLI → labd (master) → lab-agent (on every server)
|
||||
↓
|
||||
CockroachDB
|
||||
```
|
||||
|
||||
**labctl** — CLI binary installed on developer workstations. Compiled with bun to standalone binary. Distributed as RPM/DEB/binary.
|
||||
|
||||
**labd** — Master daemon running as k8s Deployment on labmaster's k3s cluster. Stateless (all state in CockroachDB). Multiple instances behind k8s Service for HA. Manages: CA, RBAC, agent registry, Pulumi executor, kubectl proxy, app deployments, log relay.
|
||||
|
||||
**lab-agent** — Lightweight daemon on every managed machine. Connects to labd via mTLS WebSocket. Handles: heartbeat, command execution, log streaming, module application. Compiled to standalone binary with bun. Installed via systemd service.
|
||||
|
||||
**CockroachDB** — Distributed SQL database. PostgreSQL wire-compatible (Prisma works unchanged). Single node to start, multi-node for HA. Stores: server state, RBAC, audit logs, certificates, kubeconfigs (encrypted), Pulumi state.
|
||||
|
||||
**Bastion** — PXE provisioning server. Runs as k8s pod with hostNetwork (needs DHCP/TFTP). Managed by labd as an "app". Multiple bastions for multiple sites.
|
||||
|
||||
### 2.2 Network Architecture
|
||||
|
||||
**Cilium** as k8s CNI (replacing default flannel):
|
||||
- eBPF-based pod networking
|
||||
- Built-in WireGuard encryption between nodes
|
||||
- Network policies (ties into RBAC)
|
||||
- Hubble for observability
|
||||
- Future: Cluster Mesh for multi-site transparent networking
|
||||
|
||||
No Tailscale dependency — Cilium handles node-to-node encryption. Agents connect to labd over standard TCP/TLS.
|
||||
|
||||
### 2.3 Authentication
|
||||
|
||||
**mTLS with built-in Certificate Authority:**
|
||||
1. labd generates root CA on first start (stored encrypted in CockroachDB)
|
||||
2. Agents enroll with join token → receive signed certificate
|
||||
3. CLI users authenticate with client certificates (or SSH key-based initial auth)
|
||||
4. All communication authenticated via mutual TLS
|
||||
5. Certificate rotation and revocation supported
|
||||
|
||||
**Join tokens:**
|
||||
- One-time tokens: for individual bare-metal servers (generated during PXE provision, embedded in kickstart)
|
||||
- Reusable tokens: for autoscaling groups (AWS ASG instances share a token)
|
||||
- Tokens can be revoked, have optional expiry
|
||||
|
||||
### 2.4 RBAC Model
|
||||
|
||||
Inspired by mcpctl's RBAC (src/mcpd/src/services/, middleware/auth). Hierarchical permissions:
|
||||
|
||||
```
|
||||
action:cloud:environment:server
|
||||
|
||||
Examples:
|
||||
read:*:*:* — read everything
|
||||
exec:baremetal:lab:* — exec on any lab bare-metal server
|
||||
kubectl:*:*:* — kubectl proxy on any cluster
|
||||
*:baremetal:lab:puppet — full access to puppet server only
|
||||
manage:*:*:* — manage apps, clusters, tokens
|
||||
admin:*:*:* — full admin (create users, roles)
|
||||
```
|
||||
|
||||
**Resources:** servers, environments, clouds, modules, roles, users, clusters, apps, pulumi-stacks
|
||||
**Actions:** read, exec, apply, destroy, manage, admin, kubectl
|
||||
**Deny rules:** explicit deny overrides any allow (like AWS IAM)
|
||||
|
||||
Prisma models: Role, Permission (allow/deny), UserRole binding.
|
||||
|
||||
### 2.5 Database
|
||||
|
||||
**CockroachDB** chosen over PostgreSQL and Cassandra:
|
||||
- PostgreSQL wire-compatible — Prisma works, mcpctl patterns reusable
|
||||
- Multi-master replication — any node accepts reads AND writes
|
||||
- Strong consistency (not eventual like Cassandra)
|
||||
- Survives node failures (3 nodes = 1 failure, 5 nodes = 2)
|
||||
- Auto-rebalancing when adding nodes
|
||||
- Start single-node, scale to multi-node with zero code changes (just add nodes)
|
||||
|
||||
**Schema (already scaffolded in Prisma):**
|
||||
- Server — managed machines (hostname, mac, cloud, env, role, labels, status)
|
||||
- Agent — connected agents (cert, enrollment, last seen)
|
||||
- User — platform users (username, cert fingerprint)
|
||||
- Role — RBAC roles with permissions
|
||||
- Permission — allow/deny rules (action:cloud:env:server)
|
||||
- UserRole — user-to-role bindings
|
||||
- JoinToken — enrollment tokens (one-time, reusable, revocable)
|
||||
- AuditLog — every action logged (user, session, action, resource, result, duration)
|
||||
- PulumiRun — infrastructure-as-code execution records
|
||||
- Cluster — managed k8s clusters (kubeconfig encrypted)
|
||||
|
||||
## 3. CLI Command Reference
|
||||
|
||||
### 3.1 Bastion (PXE Provisioning) — IMPLEMENTED
|
||||
```bash
|
||||
sudo labctl init bastion standalone start [--foreground] [--port 8080]
|
||||
sudo labctl init bastion standalone stop
|
||||
labctl init bastion standalone status
|
||||
```
|
||||
|
||||
### 3.2 Provisioning — IMPLEMENTED
|
||||
```bash
|
||||
labctl provision list
|
||||
labctl provision install <mac> <hostname> --role worker|infra
|
||||
labctl provision reprovision <mac> <hostname> --role worker|infra
|
||||
labctl provision forget <mac>
|
||||
```
|
||||
|
||||
### 3.3 Server Management — TO BUILD
|
||||
```bash
|
||||
labctl get servers [--env NAME] [--cloud NAME] [--label KEY=VALUE]
|
||||
labctl describe server/<name>
|
||||
```
|
||||
|
||||
### 3.4 Remote Execution — TO BUILD
|
||||
```bash
|
||||
labctl exec server/<name> -- <command>
|
||||
labctl exec server/<name> -it -- bash # interactive TTY
|
||||
labctl exec server/<name> --timeout 30s -- cmd
|
||||
```
|
||||
|
||||
### 3.5 Kubernetes Proxy — TO BUILD
|
||||
```bash
|
||||
labctl kubectl --cluster <name> <kubectl-args>
|
||||
labctl clusters add <name> --kubeconfig <path>
|
||||
labctl clusters list
|
||||
labctl clusters remove <name>
|
||||
```
|
||||
|
||||
### 3.6 Logs — TO BUILD
|
||||
```bash
|
||||
# Server logs (journalctl passthrough, no DB in hot path)
|
||||
labctl logs server/<name> # all journal
|
||||
labctl logs server/<name> -f # follow (live WebSocket relay)
|
||||
labctl logs server/<name> -n 100 # last 100 lines
|
||||
labctl logs server/<name> -u k3s # specific unit
|
||||
labctl logs server/<name> -u sshd --since "1h ago"
|
||||
labctl logs server/<name> -k # kernel
|
||||
labctl logs server/<name> -p err # errors only
|
||||
labctl logs server/<name> --file /var/log/nginx/error.log
|
||||
|
||||
# App logs (k8s pod logs)
|
||||
labctl logs app/<name> [-f] [--container NAME]
|
||||
|
||||
# Pulumi execution logs
|
||||
labctl logs pulumi/<run-id> [-f]
|
||||
|
||||
# Bastion logs
|
||||
labctl logs bastion/<env> [--mac MAC]
|
||||
|
||||
# Agent daemon logs
|
||||
labctl logs agent/<server>
|
||||
|
||||
# Audit logs (from CockroachDB)
|
||||
labctl logs audit [--user NAME] [--action ACTION] [--since TIME]
|
||||
labctl logs audit/<user-date-sessionid> # specific session
|
||||
```
|
||||
|
||||
Log architecture: agent runs journalctl/tail with user-provided flags, streams stdout over WebSocket to labd, labd relays to CLI. No database in the hot path. Future: Grafana Loki integration for cold storage.
|
||||
|
||||
### 3.7 Apps (Pulumi Charts, replacing Helm) — TO BUILD
|
||||
```bash
|
||||
labctl apps list
|
||||
labctl apps install <name> [--set key=value] [-f values.yaml]
|
||||
labctl apps status <name>
|
||||
labctl apps upgrade <name>
|
||||
labctl apps history <name>
|
||||
labctl apps rollback <name> <version>
|
||||
labctl apps uninstall <name>
|
||||
```
|
||||
|
||||
### 3.8 Infrastructure as Code — TO BUILD
|
||||
```bash
|
||||
labctl apply -f <file.ts> --env <env>
|
||||
labctl plan -f <file.ts> --env <env>
|
||||
labctl destroy -f <file.ts> --env <env>
|
||||
```
|
||||
|
||||
### 3.9 RBAC — TO BUILD
|
||||
```bash
|
||||
labctl get roles
|
||||
labctl get users
|
||||
labctl create role <name> --allow "action:cloud:env:server"
|
||||
labctl create role <name> --deny "destroy:*:*:*"
|
||||
labctl bind role <role> --user <user>
|
||||
labctl unbind role <role> --user <user>
|
||||
labctl get permissions
|
||||
```
|
||||
|
||||
### 3.10 Environments and Clouds — TO BUILD
|
||||
```bash
|
||||
labctl get environments
|
||||
labctl get clouds
|
||||
labctl create environment <name> --cloud <cloud>
|
||||
```
|
||||
|
||||
## 4. Partition Layout
|
||||
|
||||
### Worker Role
|
||||
```
|
||||
/boot/efi 600MB EFI
|
||||
/boot 3GB ext4
|
||||
── LVM VG: labvg ──
|
||||
swap 27GB
|
||||
/ 33GB xfs
|
||||
/var 100GB xfs
|
||||
/var/log 10GB xfs
|
||||
/home 10GB xfs ← preserved on reprovision
|
||||
/srv 20GB xfs ← preserved on reprovision
|
||||
/var/lib/longhorn rest xfs ← preserved (Longhorn PVC storage)
|
||||
/tmp tmpfs 4GB
|
||||
```
|
||||
|
||||
### Infra Role
|
||||
```
|
||||
/boot/efi 600MB EFI
|
||||
/boot 3GB ext4
|
||||
── LVM VG: labvg ──
|
||||
swap 27GB
|
||||
/ 33GB xfs
|
||||
/var 100GB xfs
|
||||
/var/log 10GB xfs
|
||||
/home 10GB xfs ← preserved on reprovision
|
||||
/srv 20GB xfs ← preserved on reprovision
|
||||
/var/lib/rancher 20GB xfs ← preserved (k3s etcd data)
|
||||
/tmp tmpfs 4GB
|
||||
```
|
||||
|
||||
## 5. Module System
|
||||
|
||||
Configuration modules define desired state. Three tiers:
|
||||
1. **Core modules** (this repo, `modules/`): k3s-server, k3s-agent, labd, lab-agent, bastion
|
||||
2. **Official modules** (separate repos): monitoring, cilium, DNS
|
||||
3. **Custom modules** (user repos): pulled by git URL
|
||||
|
||||
Module structure:
|
||||
```
|
||||
module.yaml # name, version, targets (roles/labels), deps
|
||||
src/index.ts # entry point
|
||||
src/install.ts # installation logic
|
||||
src/configure.ts # configuration logic
|
||||
src/health.ts # health check
|
||||
tests/ # vitest tests (mandatory)
|
||||
```
|
||||
|
||||
## 6. Testing Strategy
|
||||
|
||||
### 6.1 Testing Pyramid
|
||||
```
|
||||
Unit Tests → pure logic, milliseconds, every commit
|
||||
Smoke Tests → containers (podman-compose), minutes, every commit
|
||||
Integration Tests → VMs (libvirt), 10-15 min, PRs
|
||||
E2E Tests → real hardware/cloud, 20-30 min, pre-release
|
||||
```
|
||||
|
||||
### 6.2 Smoke Test Stack (podman-compose)
|
||||
```yaml
|
||||
services:
|
||||
cockroachdb:
|
||||
image: cockroachdb/cockroach:latest-v24.3
|
||||
labd:
|
||||
build: .
|
||||
depends_on: [cockroachdb]
|
||||
agent-1:
|
||||
build: ./agent
|
||||
depends_on: [labd]
|
||||
agent-2:
|
||||
build: ./agent
|
||||
depends_on: [labd]
|
||||
```
|
||||
Tests: agent enrollment, certificate issuance, heartbeat, exec, logs, RBAC deny/allow.
|
||||
|
||||
### 6.3 Security Tests (RBAC)
|
||||
- Deny exec without permission
|
||||
- Deny cross-environment access
|
||||
- Deny rules override allow rules
|
||||
- Cannot escalate own permissions
|
||||
- Audit logs all denied attempts
|
||||
- Certificate-based auth cannot be spoofed
|
||||
- Join tokens cannot be reused (one-time)
|
||||
- Expired tokens rejected
|
||||
|
||||
### 6.4 Ephemeral Test Environments
|
||||
```bash
|
||||
labctl test smoke # podman-compose
|
||||
labctl test integration # libvirt VMs
|
||||
labctl env create pr-123 --cloud containers # CI ephemeral
|
||||
labctl env create pr-123 --cloud aws # cloud ephemeral (future)
|
||||
```
|
||||
|
||||
### 6.5 Health Gates for Deployment
|
||||
Before promoting to production, ALL must pass:
|
||||
- labd API responds
|
||||
- Expected number of agents connected
|
||||
- k3s nodes Ready
|
||||
- Certificates valid (>30 days)
|
||||
- RBAC smoke test passes
|
||||
- No error logs in last 5 minutes
|
||||
|
||||
## 7. Cloud/Environment Model
|
||||
|
||||
```
|
||||
Cloud: baremetal
|
||||
└── Environment: lab
|
||||
├── Server: labmaster.ad.itaz.eu (infra, labels={k3s=server})
|
||||
└── Server: ser9.ad.itaz.eu (worker, labels={k3s=agent})
|
||||
|
||||
Cloud: aws (future)
|
||||
└── Environment: production
|
||||
├── Server: i-abc123 (from ASG web-servers)
|
||||
└── Server: i-def456 (from ASG web-servers)
|
||||
```
|
||||
|
||||
Each bastion creates an environment under baremetal cloud. AWS autoscaling groups create environments under aws cloud.
|
||||
|
||||
## 8. App Model (Pulumi Charts)
|
||||
|
||||
Each app is a Pulumi TypeScript program:
|
||||
```
|
||||
app.yaml # name, version, inputs schema, required permissions
|
||||
src/index.ts # Pulumi program
|
||||
values.yaml # defaults
|
||||
tests/ # vitest tests
|
||||
```
|
||||
|
||||
First apps to build:
|
||||
- bastion — PXE provisioning (wrap existing code)
|
||||
- labd — master daemon (self-deployment)
|
||||
- cockroachdb — database
|
||||
- cilium — CNI
|
||||
|
||||
## 9. Implementation Phases
|
||||
|
||||
### Phase 1: Foundation (PARTIALLY DONE)
|
||||
- [x] PXE bastion (discover, install, reprovision)
|
||||
- [x] CLI structure (labctl init/provision)
|
||||
- [x] labd scaffold (Fastify + CockroachDB/Prisma schema)
|
||||
- [x] Multi-arch builds, packaging, CI/CD
|
||||
- [ ] Certificate Authority in labd
|
||||
- [ ] lab-agent skeleton (connect, heartbeat, enrollment)
|
||||
- [ ] Agent enrollment via join tokens
|
||||
- [ ] RBAC engine
|
||||
- [ ] labctl exec (remote execution)
|
||||
- [ ] labctl logs (resource-scoped streaming)
|
||||
- [ ] labctl get servers (with filters)
|
||||
- [ ] Smoke test stack (podman-compose)
|
||||
|
||||
### Phase 2: Deployment
|
||||
- [ ] Reprovision labmaster as labmaster.ad.itaz.eu
|
||||
- [ ] Deploy k3s with Cilium CNI
|
||||
- [ ] Deploy CockroachDB on k3s
|
||||
- [ ] Deploy labd on k3s
|
||||
- [ ] Deploy bastion as managed app
|
||||
- [ ] Auto-enroll agents during PXE provision
|
||||
|
||||
### Phase 3: Infrastructure as Code
|
||||
- [ ] Module system
|
||||
- [ ] Pulumi charts (replacing Helm)
|
||||
- [ ] labctl apps install/upgrade/rollback
|
||||
- [ ] labctl apply -f (Pulumi execution)
|
||||
- [ ] kubectl proxy (audited)
|
||||
- [ ] Kubeconfig store (encrypted)
|
||||
|
||||
### Phase 4: Multi-Cloud
|
||||
- [ ] AWS provider (Pulumi)
|
||||
- [ ] Reusable join tokens for ASGs
|
||||
- [ ] Cilium Cluster Mesh
|
||||
- [ ] Ephemeral test environments
|
||||
- [ ] Grafana Loki for cold logs
|
||||
|
||||
## 10. Technology Stack
|
||||
|
||||
| Component | Technology | Notes |
|
||||
|-----------|-----------|-------|
|
||||
| Language | TypeScript (ESM) | Same for CLI, daemon, agents, IaC |
|
||||
| CLI | Commander.js | Matches mcpctl patterns |
|
||||
| HTTP Server | Fastify + WebSocket | labd and bastion |
|
||||
| Database | CockroachDB | PostgreSQL compatible, Prisma ORM |
|
||||
| ORM | Prisma | Reuse mcpctl patterns |
|
||||
| IaC | Pulumi (TypeScript) | Replaces Helm and Puppet |
|
||||
| k8s CNI | Cilium | eBPF, WireGuard, network policies |
|
||||
| Auth | mTLS (built-in CA) | Certificate-based, no SSH keys |
|
||||
| Packaging | nfpm (RPM/DEB) | bun compile for standalone binary |
|
||||
| Containers | Podman + podman-compose | No Docker dependency |
|
||||
| CI/CD | Gitea Actions | Self-hosted on mysources.co.uk |
|
||||
| Testing | Vitest | Unit + smoke + integration |
|
||||
| Registry | Gitea packages | RPM, DEB, container images |
|
||||
|
||||
## 11. Lessons from mcpctl
|
||||
|
||||
The mcpctl project (../mcpctl/) established patterns reused here:
|
||||
|
||||
**Project structure:** pnpm monorepo with workspace packages (shared, cli, daemon). Each package has own package.json, tsconfig.json, vitest.config.ts.
|
||||
|
||||
**CLI patterns:** Commander.js with factory functions (createXxxCommand). Global options (--project → --env/--cloud). Resource CRUD (get, describe, delete, create, apply).
|
||||
|
||||
**Server patterns:** Fastify with route registration functions. Services layer with repository pattern. Middleware for auth. Health endpoints.
|
||||
|
||||
**Database:** Prisma ORM with PostgreSQL (now CockroachDB, wire-compatible). Migration-first schema. Seed data for initial setup.
|
||||
|
||||
**RBAC:** Role-based with permission strings. Middleware checks on every request. Audit logging in middleware.
|
||||
|
||||
**Testing:** Vitest with separate configs for unit vs smoke. Smoke tests with real database and services. Security tests for RBAC.
|
||||
|
||||
**CI/CD:** Gitea Actions with lint→typecheck→test→build→publish pipeline. nfpm for RPM/DEB. Bun compile for standalone binaries. Podman for container images.
|
||||
|
||||
**Deployment:** Docker/Podman compose for dev stack. Portainer API for production deploy (we'll use k3s instead). systemd for local daemons.
|
||||
|
||||
**Completions:** Generated from Commander tree. Bash + Fish. --write and --check modes. Included in packages.
|
||||
|
||||
**Key learnings applied:**
|
||||
- Start with proper monorepo structure (not flat scripts)
|
||||
- Type safety across packages via workspace references
|
||||
- Test-driven (unit tests before features)
|
||||
- CI from the start (not retrofitted)
|
||||
- RBAC and audit from the start (not bolted on)
|
||||
- Database-first design (schema defines the domain)
|
||||
|
||||
## 12. Gitea Registry
|
||||
|
||||
**Registry:** mysources.co.uk (self-hosted Gitea at 10.0.0.194)
|
||||
**Token:** stored at ~/.gitea-token, env var PACKAGES_TOKEN
|
||||
**Packages:** RPM and DEB published to Gitea packages API
|
||||
**Container images:** pushed to Gitea container registry
|
||||
**API pattern:** Same as mcpctl publish scripts (check existing, delete, re-upload, link to repo)
|
||||
6
.taskmaster/state.json
Normal file
6
.taskmaster/state.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"currentTag": "master",
|
||||
"lastSwitched": "2026-03-18T00:17:54.213Z",
|
||||
"branchTagMapping": {},
|
||||
"migrationNoticeShown": true
|
||||
}
|
||||
180
.taskmaster/tasks/tasks.json
Normal file
180
.taskmaster/tasks/tasks.json
Normal file
@@ -0,0 +1,180 @@
|
||||
{
|
||||
"master": {
|
||||
"tasks": [
|
||||
{
|
||||
"id": 72,
|
||||
"title": "Expand Prisma Schema with Resource Relationships",
|
||||
"description": "Add Network, ServerNic, ServerDisk, and ClusterMember models to the Prisma schema. Add bastionId foreign key to Server model to track which bastion owns each server.",
|
||||
"details": "Edit `bastion/src/labd/prisma/schema.prisma` to add:\n\n1. **Server model changes**:\n - Add `bastionId String?` with relation to Bastion\n - Add `hardwareInfo Json?` for storing raw HardwareInfo\n - Add `os String?` for installed OS\n\n2. **Network model**:\n```prisma\nmodel Network {\n id String @id @default(uuid())\n name String @unique\n cidr String\n vlan Int?\n gateway String?\n domain String?\n dhcpEnabled Boolean @default(false)\n createdAt DateTime @default(now())\n updatedAt DateTime @updatedAt\n \n nics ServerNic[]\n}\n```\n\n3. **ServerNic model**:\n```prisma\nmodel ServerNic {\n id String @id @default(uuid())\n serverId String\n server Server @relation(fields: [serverId], references: [id], onDelete: Cascade)\n networkId String?\n network Network? @relation(fields: [networkId], references: [id])\n mac String\n ip String?\n name String\n state String @default(\"DOWN\")\n \n @@unique([serverId, mac])\n @@index([networkId])\n}\n```\n\n4. **ServerDisk model**:\n```prisma\nmodel ServerDisk {\n id String @id @default(uuid())\n serverId String\n server Server @relation(fields: [serverId], references: [id], onDelete: Cascade)\n name String\n sizeGb Float\n model String?\n \n @@unique([serverId, name])\n}\n```\n\n5. **ClusterMember model**:\n```prisma\nmodel ClusterMember {\n id String @id @default(uuid())\n clusterId String\n cluster Cluster @relation(fields: [clusterId], references: [id], onDelete: Cascade)\n serverId String\n server Server @relation(fields: [serverId], references: [id], onDelete: Cascade)\n role String @default(\"worker\") // control-plane, worker\n joinedAt DateTime @default(now())\n \n @@unique([clusterId, serverId])\n @@index([clusterId])\n @@index([serverId])\n}\n```\n\n6. Update Server model with relations to nics, disks, clusterMemberships, and bastion.\n\nRun `pnpm prisma generate` and `pnpm prisma migrate dev --name add-resource-models`.",
|
||||
"testStrategy": "1. Run `pnpm prisma validate` to verify schema syntax\n2. Run `pnpm prisma generate` to confirm client generation\n3. Create migration and verify it applies cleanly to local CockroachDB\n4. Write unit tests that create/read/delete each new model\n5. Verify cascade deletes work (deleting Server removes its NICs and Disks)",
|
||||
"priority": "high",
|
||||
"dependencies": [],
|
||||
"status": "pending",
|
||||
"subtasks": []
|
||||
},
|
||||
{
|
||||
"id": 73,
|
||||
"title": "Implement State Persistence Service in labd",
|
||||
"description": "Create a new service in labd that persists bastion state syncs to the Server table in CockroachDB. When bastion-state-sync messages arrive, upsert machines into Server with their hardware info, status, and ownership.",
|
||||
"details": "Create `bastion/src/labd/src/services/state-persistence.ts`:\n\n```typescript\nimport type { PrismaClient } from \"@prisma/client\";\nimport type { BastionState, HardwareInfo, InstallConfig, InstalledInfo } from \"@lab/shared\";\nimport { logger } from \"./logger.js\";\n\nexport class StatePersistence {\n constructor(private readonly db: PrismaClient) {}\n\n async syncBastionState(bastionId: string, state: BastionState): Promise<void> {\n // Process discovered machines\n for (const [mac, hw] of Object.entries(state.discovered)) {\n await this.upsertDiscoveredServer(bastionId, mac, hw);\n }\n \n // Process queued machines (update status to provisioning)\n for (const [mac, cfg] of Object.entries(state.install_queue)) {\n await this.upsertQueuedServer(bastionId, mac, cfg);\n }\n \n // Process installed machines\n for (const [mac, info] of Object.entries(state.installed)) {\n await this.upsertInstalledServer(bastionId, mac, info);\n }\n }\n\n private async upsertDiscoveredServer(bastionId: string, mac: string, hw: HardwareInfo): Promise<void> {\n const normalized = mac.toLowerCase();\n \n await this.db.server.upsert({\n where: { mac: normalized },\n create: {\n hostname: `unknown-${normalized.replace(/:/g, \"\").slice(-6)}`,\n mac: normalized,\n bastionId,\n status: \"discovered\",\n hardwareInfo: hw as any,\n labels: {\n arch: hw.arch,\n cpu_model: hw.cpu_model,\n cpu_cores: hw.cpu_cores,\n memory_gb: hw.memory_gb,\n },\n },\n update: {\n bastionId,\n status: \"discovered\", // only if not already provisioning/installed\n hardwareInfo: hw as any,\n },\n });\n \n // Sync NICs and Disks\n await this.syncServerHardware(normalized, hw);\n }\n \n private async syncServerHardware(mac: string, hw: HardwareInfo): Promise<void> {\n const server = await this.db.server.findUnique({ where: { mac } });\n if (!server) return;\n \n // Upsert NICs\n for (const nic of hw.nics) {\n await this.db.serverNic.upsert({\n where: { serverId_mac: { serverId: server.id, mac: nic.mac.toLowerCase() } },\n create: { serverId: server.id, mac: nic.mac.toLowerCase(), name: nic.name, state: nic.state },\n update: { name: nic.name, state: nic.state },\n });\n }\n \n // Upsert Disks\n for (const disk of hw.disks) {\n await this.db.serverDisk.upsert({\n where: { serverId_name: { serverId: server.id, name: disk.name } },\n create: { serverId: server.id, name: disk.name, sizeGb: disk.size_gb, model: disk.model },\n update: { sizeGb: disk.size_gb, model: disk.model },\n });\n }\n }\n \n // Similar methods for upsertQueuedServer and upsertInstalledServer...\n}\n```\n\nIntegrate into `server.ts` WebSocket handler by calling `statePersistence.syncBastionState()` when `bastion-state-sync` messages arrive.",
|
||||
"testStrategy": "1. Unit test StatePersistence with mocked PrismaClient\n2. Integration test: simulate bastion-state-sync message, verify Server rows created\n3. Test idempotency: send same state twice, verify no duplicates\n4. Test status transitions: discovered -> provisioning -> installed\n5. Verify hardware info (NICs, Disks) is correctly persisted",
|
||||
"priority": "high",
|
||||
"dependencies": [
|
||||
72
|
||||
],
|
||||
"status": "pending",
|
||||
"subtasks": []
|
||||
},
|
||||
{
|
||||
"id": 74,
|
||||
"title": "Add State Loading from labd on Bastion Startup",
|
||||
"description": "Modify bastion startup to request its persisted state from labd before using the local JSON cache. This ensures bastions restore their state after pod restarts.",
|
||||
"details": "1. Add new labd API endpoint `GET /api/bastions/:id/state` that returns the aggregated state for a specific bastion from the Server table:\n\n```typescript\n// bastion/src/labd/src/routes/bastions.ts\napp.get<{ Params: { id: string } }>(\"/api/bastions/:id/state\", async (request, reply) => {\n const { id } = request.params;\n \n const servers = await db.server.findMany({\n where: { bastionId: id },\n include: { nics: true, disks: true },\n });\n \n // Transform back to BastionState format\n const state: BastionState = { discovered: {}, install_queue: {}, installed: {} };\n for (const server of servers) {\n const mac = server.mac;\n if (!mac) continue;\n \n switch (server.status) {\n case \"discovered\":\n state.discovered[mac] = transformToHardwareInfo(server);\n break;\n case \"provisioning\":\n state.install_queue[mac] = transformToInstallConfig(server);\n break;\n case \"installed\":\n state.installed[mac] = transformToInstalledInfo(server);\n break;\n }\n }\n \n return reply.send(state);\n});\n```\n\n2. Modify `BastionConnection.connect()` in `labd-connection.ts` to fetch state after enrollment:\n\n```typescript\nprivate async loadRemoteState(): Promise<BastionState | null> {\n if (!this.bastionId || !this.config.labdUrl) return null;\n try {\n const resp = await fetch(`${this.config.labdUrl}/api/bastions/${this.bastionId}/state`);\n if (resp.ok) return await resp.json();\n } catch { /* fall back to local */ }\n return null;\n}\n```\n\n3. In bastion `main.ts`, after establishing labd connection, merge remote state with local state (remote takes precedence for installed machines, local wins for in-progress installs).",
|
||||
"testStrategy": "1. Integration test: start bastion, let it persist state, restart bastion, verify state restored\n2. Test merge logic: local has in-progress install, remote has discovered - verify install preserved\n3. Test offline mode: labd unavailable, bastion falls back to local JSON\n4. Test fresh start: no local state, no remote state - bastion starts with empty state",
|
||||
"priority": "high",
|
||||
"dependencies": [
|
||||
73
|
||||
],
|
||||
"status": "pending",
|
||||
"subtasks": []
|
||||
},
|
||||
{
|
||||
"id": 75,
|
||||
"title": "Fix Bastion --dir Environment Variable Default",
|
||||
"description": "Fix the bug where CLI's --dir default overrides the BASTION_DIR environment variable. The CLI option should use the env var as its default.",
|
||||
"details": "Edit `bastion/src/cli/src/commands/serve.ts`:\n\n```typescript\n// Before (line 14):\n.option(\"--dir <dir>\", \"Bastion data directory\", \"/tmp/lab-bastion\")\n\n// After:\n.option(\n \"--dir <dir>\",\n \"Bastion data directory\",\n process.env[\"BASTION_DIR\"] ?? \"/tmp/lab-bastion\"\n)\n```\n\nThis ensures:\n1. If `BASTION_DIR` env var is set (e.g., in k8s deployment), it's used as default\n2. Explicit `--dir` flag still overrides both\n3. Falls back to `/tmp/lab-bastion` if neither is set\n\nAlso update the k8s deployment manifest `bastion/deploy/k3s/deployment.yaml` to ensure `BASTION_DIR=/data` is properly set.",
|
||||
"testStrategy": "1. Unit test: verify option default reads from process.env\n2. Integration test: set BASTION_DIR, run labctl without --dir, verify correct dir used\n3. Integration test: set BASTION_DIR, run labctl with --dir /custom, verify /custom used\n4. Test no env var: verify default /tmp/lab-bastion used",
|
||||
"priority": "high",
|
||||
"dependencies": [],
|
||||
"status": "pending",
|
||||
"subtasks": []
|
||||
},
|
||||
{
|
||||
"id": 76,
|
||||
"title": "Create Resource Type Registry with Aliases",
|
||||
"description": "Create a centralized resource type registry that maps resource names, plurals, and short aliases to canonical types. This enables kubectl-style resource resolution.",
|
||||
"details": "Create `bastion/src/cli/src/utils/resources.ts`:\n\n```typescript\nexport interface ResourceDefinition {\n kind: string; // Canonical type: \"Server\", \"Cluster\", etc.\n singular: string; // \"server\"\n plural: string; // \"servers\"\n aliases: string[]; // [\"srv\"]\n apiPath: string; // \"/api/servers\"\n columns: TableColumn[]; // Default columns for 'get' output\n wideColumns?: TableColumn[]; // Extra columns for -o wide\n}\n\nconst RESOURCE_DEFINITIONS: ResourceDefinition[] = [\n {\n kind: \"Server\",\n singular: \"server\",\n plural: \"servers\",\n aliases: [\"srv\"],\n apiPath: \"/api/servers\",\n columns: serverColumns,\n wideColumns: serverWideColumns,\n },\n {\n kind: \"Cluster\",\n singular: \"cluster\",\n plural: \"clusters\",\n aliases: [],\n apiPath: \"/api/clusters\",\n columns: clusterColumns,\n },\n {\n kind: \"Network\",\n singular: \"network\",\n plural: \"networks\",\n aliases: [\"net\"],\n apiPath: \"/api/networks\",\n columns: networkColumns,\n },\n // ... bastion, role, user, token, audit\n];\n\nconst aliasMap = new Map<string, ResourceDefinition>();\nfor (const def of RESOURCE_DEFINITIONS) {\n aliasMap.set(def.singular, def);\n aliasMap.set(def.plural, def);\n for (const alias of def.aliases) {\n aliasMap.set(alias, def);\n }\n}\n\nexport function resolveResourceType(input: string): ResourceDefinition {\n const normalized = input.toLowerCase();\n const def = aliasMap.get(normalized);\n if (!def) {\n const valid = RESOURCE_DEFINITIONS.map(d => d.plural).join(\", \");\n throw new Error(`Unknown resource type \"${input}\". Valid types: ${valid}`);\n }\n return def;\n}\n\nexport function resolveResourceIdentifier(input: string): {\n type: ResourceDefinition;\n name?: string;\n} {\n // Handle \"server/labmaster\" or just \"servers\"\n const parts = input.split(\"/\");\n const type = resolveResourceType(parts[0]);\n const name = parts.length > 1 ? parts.slice(1).join(\"/\") : undefined;\n return { type, name };\n}\n```\n\nUpdate `bastion/src/cli/src/utils/resource.ts` to use the new registry.",
|
||||
"testStrategy": "1. Unit test resolveResourceType with all aliases: server, servers, srv -> Server\n2. Test unknown resource type throws descriptive error\n3. Test case insensitivity: SERVER, Server, server all resolve correctly\n4. Test resolveResourceIdentifier parses \"server/labmaster\" correctly",
|
||||
"priority": "high",
|
||||
"dependencies": [],
|
||||
"status": "pending",
|
||||
"subtasks": []
|
||||
},
|
||||
{
|
||||
"id": 77,
|
||||
"title": "Implement 'labctl get' Command",
|
||||
"description": "Create the core 'labctl get <resource> [name]' command that lists resources with filtering and output format support. This is the foundation of the kubectl-style CLI.",
|
||||
"details": "Create `bastion/src/cli/src/commands/get.ts`:\n\n```typescript\nimport { Command } from \"commander\";\nimport { resolveResourceType, type ResourceDefinition } from \"../utils/resources.js\";\nimport { getLabdClient } from \"../api/config.js\";\nimport { formatOutput, type TableColumn } from \"../utils/table.js\";\n\nexport function registerGetCommand(program: Command): void {\n program\n .command(\"get <resource> [name]\")\n .description(\"List resources or get a specific resource by name\")\n .option(\"--status <status>\", \"Filter by status\")\n .option(\"--role <role>\", \"Filter by role (servers only)\")\n .option(\"--cloud <cloud>\", \"Filter by cloud\")\n .option(\"--env <environment>\", \"Filter by environment\")\n .option(\"-l, --label <label>\", \"Filter by label (key=value)\")\n .option(\"-A, --all-namespaces\", \"List across all clouds/environments\")\n .action(async (resource: string, name: string | undefined, opts) => {\n const config = program.opts()[\"_config\"];\n const resourceDef = resolveResourceType(resource);\n const client = getLabdClient();\n \n try {\n let data: unknown[];\n \n if (name) {\n // Get specific resource - could be name, ID, or MAC\n const item = await client.getResource(resourceDef, name);\n data = item ? [item] : [];\n } else {\n // List with filters\n data = await client.listResources(resourceDef, {\n status: opts.status,\n role: opts.role,\n cloud: opts.allNamespaces ? undefined : (opts.cloud ?? config.defaultCloud),\n environment: opts.allNamespaces ? undefined : (opts.env ?? config.defaultEnvironment),\n label: opts.label,\n });\n }\n \n if (data.length === 0) {\n console.log(`No ${resourceDef.plural} found.`);\n return;\n }\n \n const columns = config.outputFormat === \"wide\" && resourceDef.wideColumns\n ? [...resourceDef.columns, ...resourceDef.wideColumns]\n : resourceDef.columns;\n \n formatOutput(data, config.outputFormat, columns);\n } catch (err) {\n console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);\n process.exit(1);\n }\n });\n}\n```\n\nAdd to `index.ts`: `registerGetCommand(program);`\n\nExtend LabdClient with generic resource methods.",
|
||||
"testStrategy": "1. Integration test: `labctl get servers` returns list from labd\n2. Test filtering: `labctl get servers --status discovered` only shows discovered\n3. Test name lookup: `labctl get server labmaster` returns single server\n4. Test MAC lookup: `labctl get server 38:05:25:33:e2:e4` resolves by MAC\n5. Test output formats: -o json, -o yaml, -o wide produce correct output\n6. Test unknown resource: `labctl get foo` shows helpful error",
|
||||
"priority": "high",
|
||||
"dependencies": [
|
||||
76
|
||||
],
|
||||
"status": "pending",
|
||||
"subtasks": []
|
||||
},
|
||||
{
|
||||
"id": 78,
|
||||
"title": "Implement 'labctl describe' Command",
|
||||
"description": "Create the 'labctl describe <resource> <name>' command that shows detailed information about a resource including relationships, hardware info, and history.",
|
||||
"details": "Create `bastion/src/cli/src/commands/describe.ts`:\n\n```typescript\nimport { Command } from \"commander\";\nimport { resolveResourceType } from \"../utils/resources.js\";\nimport { getLabdClient } from \"../api/config.js\";\n\nconst BOLD = \"\\x1b[1m\";\nconst DIM = \"\\x1b[2m\";\nconst RESET = \"\\x1b[0m\";\n\ninterface DescribeSection {\n title: string;\n fields: Array<[string, string | undefined]>;\n}\n\nfunction printDescribe(name: string, sections: DescribeSection[]): void {\n console.log(`${BOLD}Name:${RESET} ${name}`);\n for (const section of sections) {\n console.log(`\\n${BOLD}${section.title}:${RESET}`);\n for (const [key, value] of section.fields) {\n if (value !== undefined) {\n console.log(` ${DIM}${key}:${RESET} ${value}`);\n }\n }\n }\n}\n\nexport function registerDescribeCommand(program: Command): void {\n program\n .command(\"describe <resource> <name>\")\n .description(\"Show detailed information about a resource\")\n .action(async (resource: string, name: string) => {\n const resourceDef = resolveResourceType(resource);\n const client = getLabdClient();\n \n try {\n const item = await client.describeResource(resourceDef, name);\n if (!item) {\n console.error(`${resourceDef.singular} \"${name}\" not found.`);\n process.exit(1);\n }\n \n // Resource-specific formatting\n switch (resourceDef.kind) {\n case \"Server\":\n printServerDescription(item);\n break;\n case \"Cluster\":\n printClusterDescription(item);\n break;\n default:\n console.log(JSON.stringify(item, null, 2));\n }\n } catch (err) {\n console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);\n process.exit(1);\n }\n });\n}\n\nfunction printServerDescription(server: any): void {\n const sections: DescribeSection[] = [\n {\n title: \"Metadata\",\n fields: [\n [\"ID\", server.id],\n [\"Cloud\", server.cloud],\n [\"Environment\", server.environment],\n [\"Role\", server.role],\n [\"Status\", server.status],\n [\"Created\", server.createdAt],\n [\"Last Seen\", server.lastHeartbeat],\n ],\n },\n {\n title: \"Hardware\",\n fields: [\n [\"MAC\", server.mac],\n [\"IP\", server.ip],\n [\"Architecture\", server.hardwareInfo?.arch],\n [\"CPU\", server.hardwareInfo?.cpu_model],\n [\"Cores\", String(server.hardwareInfo?.cpu_cores)],\n [\"Memory\", `${server.hardwareInfo?.memory_gb}GB`],\n [\"Product\", server.hardwareInfo?.product],\n ],\n },\n ];\n \n if (server.nics?.length > 0) {\n sections.push({\n title: \"Network Interfaces\",\n fields: server.nics.map((n: any) => [n.name, `${n.mac} ${n.ip ?? \"\"} (${n.state})`]),\n });\n }\n \n if (server.disks?.length > 0) {\n sections.push({\n title: \"Disks\",\n fields: server.disks.map((d: any) => [d.name, `${d.sizeGb}GB ${d.model ?? \"\"}`]),\n });\n }\n \n if (server.clusterMemberships?.length > 0) {\n sections.push({\n title: \"Cluster Membership\",\n fields: server.clusterMemberships.map((m: any) => [m.cluster.name, m.role]),\n });\n }\n \n printDescribe(server.hostname, sections);\n}\n```",
|
||||
"testStrategy": "1. Integration test: `labctl describe server labmaster` shows full details\n2. Test hardware info display: CPU, memory, disks, NICs all shown\n3. Test cluster membership: server in cluster shows membership section\n4. Test not found: `labctl describe server nonexistent` shows helpful error\n5. Test different resource types: describe cluster, network, bastion",
|
||||
"priority": "medium",
|
||||
"dependencies": [
|
||||
77
|
||||
],
|
||||
"status": "pending",
|
||||
"subtasks": []
|
||||
},
|
||||
{
|
||||
"id": 79,
|
||||
"title": "Implement 'labctl create/delete' Commands",
|
||||
"description": "Create the 'labctl create <resource>' and 'labctl delete <resource> <name>' commands for creating and removing resources like networks, clusters, and tokens.",
|
||||
"details": "Create `bastion/src/cli/src/commands/create.ts`:\n\n```typescript\nimport { Command } from \"commander\";\nimport { resolveResourceType } from \"../utils/resources.js\";\nimport { getLabdClient } from \"../api/config.js\";\n\nexport function registerCreateCommand(program: Command): void {\n const create = program\n .command(\"create <resource>\")\n .description(\"Create a resource\");\n \n // labctl create network --name lab --cidr 192.168.8.0/24\n create\n .command(\"network\")\n .description(\"Create a network\")\n .requiredOption(\"--name <name>\", \"Network name\")\n .requiredOption(\"--cidr <cidr>\", \"Network CIDR (e.g., 192.168.8.0/24)\")\n .option(\"--gateway <gateway>\", \"Gateway IP\")\n .option(\"--vlan <vlan>\", \"VLAN ID\", parseInt)\n .option(\"--domain <domain>\", \"DNS domain\")\n .option(\"--dhcp\", \"Enable DHCP\")\n .action(async (opts) => {\n const client = getLabdClient();\n try {\n const network = await client.createNetwork({\n name: opts.name,\n cidr: opts.cidr,\n gateway: opts.gateway,\n vlan: opts.vlan,\n domain: opts.domain,\n dhcpEnabled: opts.dhcp ?? false,\n });\n console.log(`network/${network.name} created`);\n } catch (err) {\n console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);\n process.exit(1);\n }\n });\n \n // labctl create token --label \"worker enrollment\" --type reusable\n create\n .command(\"token\")\n .description(\"Create a join token\")\n .option(\"--label <label>\", \"Token label/description\")\n .option(\"--type <type>\", \"Token type: one-time or reusable\", \"one-time\")\n .option(\"--expires <duration>\", \"Expiration (e.g., 24h, 7d)\")\n .action(async (opts) => {\n const client = getLabdClient();\n try {\n const token = await client.createToken(opts);\n console.log(`Token created: ${token.token}`);\n if (opts.label) console.log(`Label: ${opts.label}`);\n if (token.expiresAt) console.log(`Expires: ${token.expiresAt}`);\n } catch (err) {\n console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);\n process.exit(1);\n }\n });\n}\n```\n\nCreate `bastion/src/cli/src/commands/delete.ts`:\n\n```typescript\nexport function registerDeleteCommand(program: Command): void {\n program\n .command(\"delete <resource> <name>\")\n .description(\"Delete a resource\")\n .option(\"--force\", \"Skip confirmation\")\n .action(async (resource: string, name: string, opts) => {\n const resourceDef = resolveResourceType(resource);\n const client = getLabdClient();\n \n if (!opts.force) {\n const { confirm } = await import(\"../utils/prompts.js\");\n const yes = await confirm(`Delete ${resourceDef.singular} \"${name}\"?`);\n if (!yes) {\n console.log(\"Cancelled.\");\n return;\n }\n }\n \n try {\n await client.deleteResource(resourceDef, name);\n console.log(`${resourceDef.singular}/${name} deleted`);\n } catch (err) {\n console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);\n process.exit(1);\n }\n });\n}\n```",
|
||||
"testStrategy": "1. Integration test: `labctl create network` creates network in DB\n2. Test validation: missing required flags shows helpful error\n3. Test token creation: token returned is valid UUID, stored in DB\n4. Test delete with confirmation: prompts user, respects --force\n5. Test delete cascade: deleting server removes NICs, disks\n6. Test delete protection: cannot delete bastion with connected servers",
|
||||
"priority": "medium",
|
||||
"dependencies": [
|
||||
77
|
||||
],
|
||||
"status": "pending",
|
||||
"subtasks": []
|
||||
},
|
||||
{
|
||||
"id": 80,
|
||||
"title": "Refactor Provision Commands to kubectl-style",
|
||||
"description": "Refactor existing provision commands to use kubectl-style syntax: 'labctl provision <server>' instead of 'labctl provision install <mac>'.",
|
||||
"details": "The new command structure should be:\n- `labctl provision <server> --os fedora-43 --role worker` (queue install)\n- `labctl reprovision <server>` (reinstall)\n- `labctl forget <server>` (remove from tracking)\n\nModify `bastion/src/cli/src/commands/install.ts` → rename to `provision.ts`:\n\n```typescript\nexport function registerProvisionCommand(program: Command): void {\n program\n .command(\"provision <server>\")\n .description(\"Queue a server for OS installation\")\n .requiredOption(\"--os <os>\", \"Operating system\", \"fedora-43\")\n .requiredOption(\"--role <role>\", \"Server role\", \"worker\")\n .option(\"--disk <disk>\", \"Target disk (auto-detected if not specified)\")\n .option(\"--hostname <hostname>\", \"Override hostname\")\n .action(async (server: string, opts) => {\n const client = getLabdClient();\n \n // Resolve server: could be hostname, MAC, or ID\n const resolved = await client.resolveServer(server);\n if (!resolved) {\n console.error(`Server \"${server}\" not found.`);\n console.error(\"Tip: Use 'labctl get servers' to see available servers.\");\n process.exit(1);\n }\n \n if (resolved.status === \"installed\") {\n console.error(`Server \"${resolved.hostname}\" is already installed.`);\n console.error(\"Tip: Use 'labctl reprovision' to reinstall.\");\n process.exit(1);\n }\n \n try {\n await client.provisionServer(resolved.mac, {\n hostname: opts.hostname ?? resolved.hostname,\n os: opts.os,\n role: opts.role,\n disk: opts.disk,\n });\n console.log(`Server ${resolved.hostname} queued for ${opts.os} installation as ${opts.role}.`);\n } catch (err) {\n console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);\n process.exit(1);\n }\n });\n}\n```\n\nSimilarly update reprovision.ts and forget.ts to accept server name/MAC/ID.\n\nUpdate index.ts to register commands at top level instead of under 'provision' subcommand.",
|
||||
"testStrategy": "1. Test server resolution: provision by hostname, MAC, or UUID all work\n2. Test already installed: provisioning installed server shows reprovision hint\n3. Test unknown server: helpful error message with tip\n4. Test reprovision: reinstalls installed server\n5. Test forget: removes server from all state categories\n6. Backward compat: verify 'labctl provision list' still works (deprecation warning)",
|
||||
"priority": "medium",
|
||||
"dependencies": [
|
||||
77
|
||||
],
|
||||
"status": "pending",
|
||||
"subtasks": []
|
||||
},
|
||||
{
|
||||
"id": 81,
|
||||
"title": "Implement Server and Resource API Endpoints in labd",
|
||||
"description": "Add REST API endpoints in labd for full resource CRUD operations: networks, clusters, tokens. Extend servers endpoint with filters and relationship includes.",
|
||||
"details": "Create/extend labd route files:\n\n1. **Extend servers.ts**:\n```typescript\n// GET /api/servers - with extended filters and includes\napp.get(\"/api/servers\", async (request, reply) => {\n const { status, role, cloud, environment, label, include } = request.query;\n \n const where = {};\n if (status) where.status = status;\n if (role) where.role = role;\n if (cloud) where.cloud = cloud;\n if (environment) where.environment = environment;\n if (label) where.labels = { path: [labelKey], equals: labelValue };\n \n const servers = await db.server.findMany({\n where,\n include: {\n nics: include?.includes(\"nics\"),\n disks: include?.includes(\"disks\"),\n clusterMemberships: include?.includes(\"clusters\") ? { include: { cluster: true } } : false,\n bastion: include?.includes(\"bastion\"),\n },\n });\n return servers;\n});\n\n// GET /api/servers/:id - by ID, hostname, or MAC\napp.get(\"/api/servers/:identifier\", async (request, reply) => {\n const { identifier } = request.params;\n \n // Try UUID first\n let server = await db.server.findUnique({ where: { id: identifier }, include: fullInclude });\n // Try hostname\n if (!server) server = await db.server.findUnique({ where: { hostname: identifier }, include: fullInclude });\n // Try MAC\n if (!server) server = await db.server.findUnique({ where: { mac: identifier.toLowerCase() }, include: fullInclude });\n \n if (!server) return reply.code(404).send({ error: \"Server not found\" });\n return server;\n});\n```\n\n2. **Create networks.ts**:\n```typescript\n// GET /api/networks, POST /api/networks, DELETE /api/networks/:id\nexport function registerNetworkRoutes(app: FastifyInstance, db: DbClient): void {\n app.get(\"/api/networks\", async () => db.network.findMany());\n \n app.post(\"/api/networks\", async (request, reply) => {\n const { name, cidr, gateway, vlan, domain, dhcpEnabled } = request.body;\n // Validate CIDR format\n const network = await db.network.create({ data: { name, cidr, gateway, vlan, domain, dhcpEnabled } });\n return reply.code(201).send(network);\n });\n \n app.delete(\"/api/networks/:id\", async (request, reply) => {\n await db.network.delete({ where: { id: request.params.id } });\n return reply.code(204).send();\n });\n}\n```\n\n3. **Create clusters.ts**:\n```typescript\n// Similar CRUD for clusters with member management\napp.get(\"/api/clusters/:id/members\", ...);\napp.post(\"/api/clusters/:id/members\", ...);\napp.delete(\"/api/clusters/:id/members/:serverId\", ...);\n```",
|
||||
"testStrategy": "1. Integration test all CRUD endpoints with HTTP client\n2. Test server resolution: by id, hostname, and MAC all return same server\n3. Test include parameter: nics, disks, clusters included when requested\n4. Test validation: invalid CIDR rejected, duplicate names rejected\n5. Test cascade: delete network with NICs fails or cascades appropriately",
|
||||
"priority": "medium",
|
||||
"dependencies": [
|
||||
72,
|
||||
73
|
||||
],
|
||||
"status": "pending",
|
||||
"subtasks": []
|
||||
},
|
||||
{
|
||||
"id": 82,
|
||||
"title": "Implement RBAC Permission Checks in CLI",
|
||||
"description": "Wire RBAC permission checks into CLI commands. Check user permissions before executing operations using the existing Permission model.",
|
||||
"details": "1. Create `bastion/src/cli/src/middleware/rbac.ts`:\n\n```typescript\nimport { getLabdClient } from \"../api/config.js\";\n\nexport interface PermissionContext {\n action: string; // read, exec, apply, destroy, manage, admin\n cloud?: string;\n environment?: string;\n server?: string;\n}\n\nexport async function checkPermission(ctx: PermissionContext): Promise<boolean> {\n const client = getLabdClient();\n try {\n const result = await client.checkPermission(ctx);\n return result.allowed;\n } catch {\n // If can't reach labd, fail open for local operations\n return true;\n }\n}\n\nexport async function requirePermission(ctx: PermissionContext): Promise<void> {\n const allowed = await checkPermission(ctx);\n if (!allowed) {\n throw new Error(\n `Permission denied: ${ctx.action} on ${ctx.server ?? \"*\"}@${ctx.cloud ?? \"*\"}/${ctx.environment ?? \"*\"}`\n );\n }\n}\n```\n\n2. Add labd endpoint `POST /api/auth/check-permission`:\n```typescript\napp.post(\"/api/auth/check-permission\", async (request, reply) => {\n const user = await authenticateRequest(request); // from cert or token\n const { action, cloud, environment, server } = request.body;\n \n const permissions = await db.permission.findMany({\n where: {\n role: { userBindings: { some: { userId: user.id } } },\n },\n });\n \n const allowed = permissions.some(p => \n matchesPattern(p.action, action) &&\n matchesPattern(p.cloud, cloud ?? \"*\") &&\n matchesPattern(p.environment, environment ?? \"*\") &&\n matchesPattern(p.server, server ?? \"*\")\n );\n \n return { allowed };\n});\n```\n\n3. Integrate into commands:\n```typescript\n// In provision command\nawait requirePermission({ action: \"apply\", cloud, environment, server: resolved.hostname });\n\n// In delete command\nawait requirePermission({ action: \"destroy\", cloud, environment, server: name });\n\n// In get command (filter results)\nconst servers = await client.listServers(filters);\nconst visible = await filterByPermission(servers, \"read\");\n```",
|
||||
"testStrategy": "1. Unit test permission matching logic with wildcards\n2. Test admin role: has access to all resources\n3. Test operator role: can read/exec but not destroy\n4. Test viewer role: can only read, provision denied\n5. Test scope matching: permission for cloud=aws doesn't grant access to cloud=baremetal\n6. Test denied action is audit-logged",
|
||||
"priority": "medium",
|
||||
"dependencies": [
|
||||
77,
|
||||
81
|
||||
],
|
||||
"status": "pending",
|
||||
"subtasks": []
|
||||
},
|
||||
{
|
||||
"id": 83,
|
||||
"title": "Implement Audit Logging for Resource Operations",
|
||||
"description": "Log all resource mutations to the AuditLog table. Include user, action, resource type/name, result, and source IP.",
|
||||
"details": "1. Create `bastion/src/labd/src/services/audit.ts`:\n\n```typescript\nimport type { PrismaClient } from \"@prisma/client\";\n\nexport interface AuditEntry {\n userId?: string;\n serverId?: string;\n sessionId?: string;\n action: string; // create, update, delete, provision, exec, rbac-denied\n resourceType: string; // server, cluster, network, token, etc.\n resourceName: string;\n args?: string; // sanitized args (no secrets)\n result: \"success\" | \"denied\" | \"error\";\n durationMs?: number;\n sourceIp?: string;\n}\n\nexport class AuditService {\n constructor(private readonly db: PrismaClient) {}\n \n async log(entry: AuditEntry): Promise<void> {\n await this.db.auditLog.create({\n data: {\n userId: entry.userId,\n serverId: entry.serverId,\n sessionId: entry.sessionId,\n action: entry.action,\n resourceType: entry.resourceType,\n resourceName: entry.resourceName,\n args: entry.args,\n result: entry.result,\n durationMs: entry.durationMs,\n sourceIp: entry.sourceIp,\n },\n });\n }\n \n async query(filters: {\n userId?: string;\n action?: string;\n resourceType?: string;\n since?: Date;\n limit?: number;\n }): Promise<AuditEntry[]> {\n return this.db.auditLog.findMany({\n where: {\n userId: filters.userId,\n action: filters.action,\n resourceType: filters.resourceType,\n timestamp: filters.since ? { gte: filters.since } : undefined,\n },\n orderBy: { timestamp: \"desc\" },\n take: filters.limit ?? 100,\n });\n }\n}\n```\n\n2. Add Fastify hook to wrap route handlers:\n```typescript\napp.addHook(\"onResponse\", async (request, reply) => {\n // Log mutations (POST, PUT, DELETE)\n if ([\"POST\", \"PUT\", \"DELETE\"].includes(request.method)) {\n const path = request.url;\n const resourceMatch = path.match(/\\/api\\/(\\w+)(?:\\/([^/]+))?/);\n if (resourceMatch) {\n await auditService.log({\n action: methodToAction(request.method),\n resourceType: resourceMatch[1],\n resourceName: resourceMatch[2] ?? \"\",\n result: reply.statusCode < 400 ? \"success\" : \"error\",\n sourceIp: request.ip,\n });\n }\n }\n});\n```\n\n3. Add `labctl get audit` command to view audit logs.",
|
||||
"testStrategy": "1. Integration test: create network, verify audit log entry created\n2. Test RBAC denial is logged with result=denied\n3. Test sensitive data sanitization: tokens/passwords not in args\n4. Test query filters: by user, action, resourceType, time range\n5. Test `labctl get audit` displays recent entries correctly",
|
||||
"priority": "medium",
|
||||
"dependencies": [
|
||||
81,
|
||||
82
|
||||
],
|
||||
"status": "pending",
|
||||
"subtasks": []
|
||||
},
|
||||
{
|
||||
"id": 84,
|
||||
"title": "Update CLI Entry Point and Help Text",
|
||||
"description": "Update the CLI entry point to register all new commands and update help text to reflect the kubectl-style interface. Add deprecation warnings for old command structure.",
|
||||
"details": "Update `bastion/src/cli/src/index.ts`:\n\n```typescript\nimport { Command } from \"commander\";\nimport { APP_VERSION } from \"@lab/shared\";\nimport { loadConfig } from \"./config/index.js\";\n\n// New kubectl-style commands\nimport { registerGetCommand } from \"./commands/get.js\";\nimport { registerDescribeCommand } from \"./commands/describe.js\";\nimport { registerCreateCommand } from \"./commands/create.js\";\nimport { registerDeleteCommand } from \"./commands/delete.js\";\nimport { registerApplyCommand } from \"./commands/apply.js\";\nimport { registerEditCommand } from \"./commands/edit.js\";\n\n// Action commands\nimport { registerProvisionCommand } from \"./commands/provision.js\";\nimport { registerReprovisionCommand } from \"./commands/reprovision.js\";\nimport { registerForgetCommand } from \"./commands/forget.js\";\n\n// Bastion management\nimport { registerBastionCommand } from \"./commands/bastion.js\"; // start/stop/status\n\n// App management (unchanged)\nimport { registerAppCommand } from \"./commands/app.js\";\n\n// Utility\nimport { registerConfigCommand } from \"./commands/config.js\";\nimport { registerLoginCommand } from \"./commands/login.js\";\nimport { registerDoctorCommand } from \"./commands/doctor.js\";\n\nexport function createProgram(): Command {\n const program = new Command();\n \n program\n .name(\"labctl\")\n .description(\"Lab infrastructure management CLI\")\n .version(APP_VERSION);\n \n // Global options\n program\n .option(\"-o, --output <format>\", \"output format (table, json, yaml, wide)\", \"table\")\n .option(\"--server <url>\", \"override labd server URL\")\n .option(\"--env <name>\", \"override default environment\")\n .option(\"--cloud <name>\", \"override default cloud\")\n .option(\"--debug\", \"enable debug output\")\n .option(\"--no-color\", \"disable colored output\");\n \n // Core CRUD commands\n registerGetCommand(program); // labctl get <resource> [name]\n registerDescribeCommand(program); // labctl describe <resource> <name>\n registerCreateCommand(program); // labctl create <resource>\n registerDeleteCommand(program); // labctl delete <resource> <name>\n registerApplyCommand(program); // labctl apply -f <file>\n registerEditCommand(program); // labctl edit <resource> <name>\n \n // Provisioning actions\n registerProvisionCommand(program); // labctl provision <server>\n registerReprovisionCommand(program);// labctl reprovision <server>\n registerForgetCommand(program); // labctl forget <server>\n \n // Bastion management\n registerBastionCommand(program); // labctl bastion start|stop|status\n \n // App management\n registerAppCommand(program); // labctl app install|health k3s\n \n // Utility\n registerConfigCommand(program);\n registerLoginCommand(program);\n registerDoctorCommand(program);\n \n // Legacy compatibility with deprecation warnings\n registerLegacyCommands(program);\n \n return program;\n}\n\nfunction registerLegacyCommands(program: Command): void {\n // labctl provision list -> labctl get servers (with warning)\n program\n .command(\"provision\")\n .command(\"list\")\n .action(() => {\n console.warn(\"DEPRECATED: Use 'labctl get servers' instead.\");\n // Delegate to get servers\n });\n}\n```\n\nUpdate shell completions in `scripts/generate-completions.ts` for new command structure.",
|
||||
"testStrategy": "1. Test --help shows all new commands with descriptions\n2. Test resource type help: `labctl get --help` lists valid resources\n3. Test deprecated commands show warning but still work\n4. Test shell completions generated for new commands\n5. Test global options: -o, --server, --env, --cloud all work",
|
||||
"priority": "low",
|
||||
"dependencies": [
|
||||
77,
|
||||
78,
|
||||
79,
|
||||
80
|
||||
],
|
||||
"status": "pending",
|
||||
"subtasks": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"created": "2026-03-26T04:26:49.813Z",
|
||||
"updated": "2026-03-26T04:26:49.813Z",
|
||||
"description": "Tasks for master context"
|
||||
}
|
||||
}
|
||||
}
|
||||
47
.taskmaster/templates/example_prd.txt
Normal file
47
.taskmaster/templates/example_prd.txt
Normal file
@@ -0,0 +1,47 @@
|
||||
<context>
|
||||
# Overview
|
||||
[Provide a high-level overview of your product here. Explain what problem it solves, who it's for, and why it's valuable.]
|
||||
|
||||
# Core Features
|
||||
[List and describe the main features of your product. For each feature, include:
|
||||
- What it does
|
||||
- Why it's important
|
||||
- How it works at a high level]
|
||||
|
||||
# User Experience
|
||||
[Describe the user journey and experience. Include:
|
||||
- User personas
|
||||
- Key user flows
|
||||
- UI/UX considerations]
|
||||
</context>
|
||||
<PRD>
|
||||
# Technical Architecture
|
||||
[Outline the technical implementation details:
|
||||
- System components
|
||||
- Data models
|
||||
- APIs and integrations
|
||||
- Infrastructure requirements]
|
||||
|
||||
# Development Roadmap
|
||||
[Break down the development process into phases:
|
||||
- MVP requirements
|
||||
- Future enhancements
|
||||
- Do not think about timelines whatsoever -- all that matters is scope and detailing exactly what needs to be build in each phase so it can later be cut up into tasks]
|
||||
|
||||
# Logical Dependency Chain
|
||||
[Define the logical order of development:
|
||||
- Which features need to be built first (foundation)
|
||||
- Getting as quickly as possible to something usable/visible front end that works
|
||||
- Properly pacing and scoping each feature so it is atomic but can also be built upon and improved as development approaches]
|
||||
|
||||
# Risks and Mitigations
|
||||
[Identify potential risks and how they'll be addressed:
|
||||
- Technical challenges
|
||||
- Figuring out the MVP that we can build upon
|
||||
- Resource constraints]
|
||||
|
||||
# Appendix
|
||||
[Include any additional information:
|
||||
- Research findings
|
||||
- Technical specifications]
|
||||
</PRD>
|
||||
511
.taskmaster/templates/example_prd_rpg.txt
Normal file
511
.taskmaster/templates/example_prd_rpg.txt
Normal file
@@ -0,0 +1,511 @@
|
||||
<rpg-method>
|
||||
# Repository Planning Graph (RPG) Method - PRD Template
|
||||
|
||||
This template teaches you (AI or human) how to create structured, dependency-aware PRDs using the RPG methodology from Microsoft Research. The key insight: separate WHAT (functional) from HOW (structural), then connect them with explicit dependencies.
|
||||
|
||||
## Core Principles
|
||||
|
||||
1. **Dual-Semantics**: Think functional (capabilities) AND structural (code organization) separately, then map them
|
||||
2. **Explicit Dependencies**: Never assume - always state what depends on what
|
||||
3. **Topological Order**: Build foundation first, then layers on top
|
||||
4. **Progressive Refinement**: Start broad, refine iteratively
|
||||
|
||||
## How to Use This Template
|
||||
|
||||
- Follow the instructions in each `<instruction>` block
|
||||
- Look at `<example>` blocks to see good vs bad patterns
|
||||
- Fill in the content sections with your project details
|
||||
- The AI reading this will learn the RPG method by following along
|
||||
- Task Master will parse the resulting PRD into dependency-aware tasks
|
||||
|
||||
## Recommended Tools for Creating PRDs
|
||||
|
||||
When using this template to **create** a PRD (not parse it), use **code-context-aware AI assistants** for best results:
|
||||
|
||||
**Why?** The AI needs to understand your existing codebase to make good architectural decisions about modules, dependencies, and integration points.
|
||||
|
||||
**Recommended tools:**
|
||||
- **Claude Code** (claude-code CLI) - Best for structured reasoning and large contexts
|
||||
- **Cursor/Windsurf** - IDE integration with full codebase context
|
||||
- **Gemini CLI** (gemini-cli) - Massive context window for large codebases
|
||||
- **Codex/Grok CLI** - Strong code generation with context awareness
|
||||
|
||||
**Note:** Once your PRD is created, `task-master parse-prd` works with any configured AI model - it just needs to read the PRD text itself, not your codebase.
|
||||
</rpg-method>
|
||||
|
||||
---
|
||||
|
||||
<overview>
|
||||
<instruction>
|
||||
Start with the problem, not the solution. Be specific about:
|
||||
- What pain point exists?
|
||||
- Who experiences it?
|
||||
- Why existing solutions don't work?
|
||||
- What success looks like (measurable outcomes)?
|
||||
|
||||
Keep this section focused - don't jump into implementation details yet.
|
||||
</instruction>
|
||||
|
||||
## Problem Statement
|
||||
[Describe the core problem. Be concrete about user pain points.]
|
||||
|
||||
## Target Users
|
||||
[Define personas, their workflows, and what they're trying to achieve.]
|
||||
|
||||
## Success Metrics
|
||||
[Quantifiable outcomes. Examples: "80% task completion via autopilot", "< 5% manual intervention rate"]
|
||||
|
||||
</overview>
|
||||
|
||||
---
|
||||
|
||||
<functional-decomposition>
|
||||
<instruction>
|
||||
Now think about CAPABILITIES (what the system DOES), not code structure yet.
|
||||
|
||||
Step 1: Identify high-level capability domains
|
||||
- Think: "What major things does this system do?"
|
||||
- Examples: Data Management, Core Processing, Presentation Layer
|
||||
|
||||
Step 2: For each capability, enumerate specific features
|
||||
- Use explore-exploit strategy:
|
||||
* Exploit: What features are REQUIRED for core value?
|
||||
* Explore: What features make this domain COMPLETE?
|
||||
|
||||
Step 3: For each feature, define:
|
||||
- Description: What it does in one sentence
|
||||
- Inputs: What data/context it needs
|
||||
- Outputs: What it produces/returns
|
||||
- Behavior: Key logic or transformations
|
||||
|
||||
<example type="good">
|
||||
Capability: Data Validation
|
||||
Feature: Schema validation
|
||||
- Description: Validate JSON payloads against defined schemas
|
||||
- Inputs: JSON object, schema definition
|
||||
- Outputs: Validation result (pass/fail) + error details
|
||||
- Behavior: Iterate fields, check types, enforce constraints
|
||||
|
||||
Feature: Business rule validation
|
||||
- Description: Apply domain-specific validation rules
|
||||
- Inputs: Validated data object, rule set
|
||||
- Outputs: Boolean + list of violated rules
|
||||
- Behavior: Execute rules sequentially, short-circuit on failure
|
||||
</example>
|
||||
|
||||
<example type="bad">
|
||||
Capability: validation.js
|
||||
(Problem: This is a FILE, not a CAPABILITY. Mixing structure into functional thinking.)
|
||||
|
||||
Capability: Validation
|
||||
Feature: Make sure data is good
|
||||
(Problem: Too vague. No inputs/outputs. Not actionable.)
|
||||
</example>
|
||||
</instruction>
|
||||
|
||||
## Capability Tree
|
||||
|
||||
### Capability: [Name]
|
||||
[Brief description of what this capability domain covers]
|
||||
|
||||
#### Feature: [Name]
|
||||
- **Description**: [One sentence]
|
||||
- **Inputs**: [What it needs]
|
||||
- **Outputs**: [What it produces]
|
||||
- **Behavior**: [Key logic]
|
||||
|
||||
#### Feature: [Name]
|
||||
- **Description**:
|
||||
- **Inputs**:
|
||||
- **Outputs**:
|
||||
- **Behavior**:
|
||||
|
||||
### Capability: [Name]
|
||||
...
|
||||
|
||||
</functional-decomposition>
|
||||
|
||||
---
|
||||
|
||||
<structural-decomposition>
|
||||
<instruction>
|
||||
NOW think about code organization. Map capabilities to actual file/folder structure.
|
||||
|
||||
Rules:
|
||||
1. Each capability maps to a module (folder or file)
|
||||
2. Features within a capability map to functions/classes
|
||||
3. Use clear module boundaries - each module has ONE responsibility
|
||||
4. Define what each module exports (public interface)
|
||||
|
||||
The goal: Create a clear mapping between "what it does" (functional) and "where it lives" (structural).
|
||||
|
||||
<example type="good">
|
||||
Capability: Data Validation
|
||||
→ Maps to: src/validation/
|
||||
├── schema-validator.js (Schema validation feature)
|
||||
├── rule-validator.js (Business rule validation feature)
|
||||
└── index.js (Public exports)
|
||||
|
||||
Exports:
|
||||
- validateSchema(data, schema)
|
||||
- validateRules(data, rules)
|
||||
</example>
|
||||
|
||||
<example type="bad">
|
||||
Capability: Data Validation
|
||||
→ Maps to: src/utils.js
|
||||
(Problem: "utils" is not a clear module boundary. Where do I find validation logic?)
|
||||
|
||||
Capability: Data Validation
|
||||
→ Maps to: src/validation/everything.js
|
||||
(Problem: One giant file. Features should map to separate files for maintainability.)
|
||||
</example>
|
||||
</instruction>
|
||||
|
||||
## Repository Structure
|
||||
|
||||
```
|
||||
project-root/
|
||||
├── src/
|
||||
│ ├── [module-name]/ # Maps to: [Capability Name]
|
||||
│ │ ├── [file].js # Maps to: [Feature Name]
|
||||
│ │ └── index.js # Public exports
|
||||
│ └── [module-name]/
|
||||
├── tests/
|
||||
└── docs/
|
||||
```
|
||||
|
||||
## Module Definitions
|
||||
|
||||
### Module: [Name]
|
||||
- **Maps to capability**: [Capability from functional decomposition]
|
||||
- **Responsibility**: [Single clear purpose]
|
||||
- **File structure**:
|
||||
```
|
||||
module-name/
|
||||
├── feature1.js
|
||||
├── feature2.js
|
||||
└── index.js
|
||||
```
|
||||
- **Exports**:
|
||||
- `functionName()` - [what it does]
|
||||
- `ClassName` - [what it does]
|
||||
|
||||
</structural-decomposition>
|
||||
|
||||
---
|
||||
|
||||
<dependency-graph>
|
||||
<instruction>
|
||||
This is THE CRITICAL SECTION for Task Master parsing.
|
||||
|
||||
Define explicit dependencies between modules. This creates the topological order for task execution.
|
||||
|
||||
Rules:
|
||||
1. List modules in dependency order (foundation first)
|
||||
2. For each module, state what it depends on
|
||||
3. Foundation modules should have NO dependencies
|
||||
4. Every non-foundation module should depend on at least one other module
|
||||
5. Think: "What must EXIST before I can build this module?"
|
||||
|
||||
<example type="good">
|
||||
Foundation Layer (no dependencies):
|
||||
- error-handling: No dependencies
|
||||
- config-manager: No dependencies
|
||||
- base-types: No dependencies
|
||||
|
||||
Data Layer:
|
||||
- schema-validator: Depends on [base-types, error-handling]
|
||||
- data-ingestion: Depends on [schema-validator, config-manager]
|
||||
|
||||
Core Layer:
|
||||
- algorithm-engine: Depends on [base-types, error-handling]
|
||||
- pipeline-orchestrator: Depends on [algorithm-engine, data-ingestion]
|
||||
</example>
|
||||
|
||||
<example type="bad">
|
||||
- validation: Depends on API
|
||||
- API: Depends on validation
|
||||
(Problem: Circular dependency. This will cause build/runtime issues.)
|
||||
|
||||
- user-auth: Depends on everything
|
||||
(Problem: Too many dependencies. Should be more focused.)
|
||||
</example>
|
||||
</instruction>
|
||||
|
||||
## Dependency Chain
|
||||
|
||||
### Foundation Layer (Phase 0)
|
||||
No dependencies - these are built first.
|
||||
|
||||
- **[Module Name]**: [What it provides]
|
||||
- **[Module Name]**: [What it provides]
|
||||
|
||||
### [Layer Name] (Phase 1)
|
||||
- **[Module Name]**: Depends on [[module-from-phase-0], [module-from-phase-0]]
|
||||
- **[Module Name]**: Depends on [[module-from-phase-0]]
|
||||
|
||||
### [Layer Name] (Phase 2)
|
||||
- **[Module Name]**: Depends on [[module-from-phase-1], [module-from-foundation]]
|
||||
|
||||
[Continue building up layers...]
|
||||
|
||||
</dependency-graph>
|
||||
|
||||
---
|
||||
|
||||
<implementation-roadmap>
|
||||
<instruction>
|
||||
Turn the dependency graph into concrete development phases.
|
||||
|
||||
Each phase should:
|
||||
1. Have clear entry criteria (what must exist before starting)
|
||||
2. Contain tasks that can be parallelized (no inter-dependencies within phase)
|
||||
3. Have clear exit criteria (how do we know phase is complete?)
|
||||
4. Build toward something USABLE (not just infrastructure)
|
||||
|
||||
Phase ordering follows topological sort of dependency graph.
|
||||
|
||||
<example type="good">
|
||||
Phase 0: Foundation
|
||||
Entry: Clean repository
|
||||
Tasks:
|
||||
- Implement error handling utilities
|
||||
- Create base type definitions
|
||||
- Setup configuration system
|
||||
Exit: Other modules can import foundation without errors
|
||||
|
||||
Phase 1: Data Layer
|
||||
Entry: Phase 0 complete
|
||||
Tasks:
|
||||
- Implement schema validator (uses: base types, error handling)
|
||||
- Build data ingestion pipeline (uses: validator, config)
|
||||
Exit: End-to-end data flow from input to validated output
|
||||
</example>
|
||||
|
||||
<example type="bad">
|
||||
Phase 1: Build Everything
|
||||
Tasks:
|
||||
- API
|
||||
- Database
|
||||
- UI
|
||||
- Tests
|
||||
(Problem: No clear focus. Too broad. Dependencies not considered.)
|
||||
</example>
|
||||
</instruction>
|
||||
|
||||
## Development Phases
|
||||
|
||||
### Phase 0: [Foundation Name]
|
||||
**Goal**: [What foundational capability this establishes]
|
||||
|
||||
**Entry Criteria**: [What must be true before starting]
|
||||
|
||||
**Tasks**:
|
||||
- [ ] [Task name] (depends on: [none or list])
|
||||
- Acceptance criteria: [How we know it's done]
|
||||
- Test strategy: [What tests prove it works]
|
||||
|
||||
- [ ] [Task name] (depends on: [none or list])
|
||||
|
||||
**Exit Criteria**: [Observable outcome that proves phase complete]
|
||||
|
||||
**Delivers**: [What can users/developers do after this phase?]
|
||||
|
||||
---
|
||||
|
||||
### Phase 1: [Layer Name]
|
||||
**Goal**:
|
||||
|
||||
**Entry Criteria**: Phase 0 complete
|
||||
|
||||
**Tasks**:
|
||||
- [ ] [Task name] (depends on: [[tasks-from-phase-0]])
|
||||
- [ ] [Task name] (depends on: [[tasks-from-phase-0]])
|
||||
|
||||
**Exit Criteria**:
|
||||
|
||||
**Delivers**:
|
||||
|
||||
---
|
||||
|
||||
[Continue with more phases...]
|
||||
|
||||
</implementation-roadmap>
|
||||
|
||||
---
|
||||
|
||||
<test-strategy>
|
||||
<instruction>
|
||||
Define how testing will be integrated throughout development (TDD approach).
|
||||
|
||||
Specify:
|
||||
1. Test pyramid ratios (unit vs integration vs e2e)
|
||||
2. Coverage requirements
|
||||
3. Critical test scenarios
|
||||
4. Test generation guidelines for Surgical Test Generator
|
||||
|
||||
This section guides the AI when generating tests during the RED phase of TDD.
|
||||
|
||||
<example type="good">
|
||||
Critical Test Scenarios for Data Validation module:
|
||||
- Happy path: Valid data passes all checks
|
||||
- Edge cases: Empty strings, null values, boundary numbers
|
||||
- Error cases: Invalid types, missing required fields
|
||||
- Integration: Validator works with ingestion pipeline
|
||||
</example>
|
||||
</instruction>
|
||||
|
||||
## Test Pyramid
|
||||
|
||||
```
|
||||
/\
|
||||
/E2E\ ← [X]% (End-to-end, slow, comprehensive)
|
||||
/------\
|
||||
/Integration\ ← [Y]% (Module interactions)
|
||||
/------------\
|
||||
/ Unit Tests \ ← [Z]% (Fast, isolated, deterministic)
|
||||
/----------------\
|
||||
```
|
||||
|
||||
## Coverage Requirements
|
||||
- Line coverage: [X]% minimum
|
||||
- Branch coverage: [X]% minimum
|
||||
- Function coverage: [X]% minimum
|
||||
- Statement coverage: [X]% minimum
|
||||
|
||||
## Critical Test Scenarios
|
||||
|
||||
### [Module/Feature Name]
|
||||
**Happy path**:
|
||||
- [Scenario description]
|
||||
- Expected: [What should happen]
|
||||
|
||||
**Edge cases**:
|
||||
- [Scenario description]
|
||||
- Expected: [What should happen]
|
||||
|
||||
**Error cases**:
|
||||
- [Scenario description]
|
||||
- Expected: [How system handles failure]
|
||||
|
||||
**Integration points**:
|
||||
- [What interactions to test]
|
||||
- Expected: [End-to-end behavior]
|
||||
|
||||
## Test Generation Guidelines
|
||||
[Specific instructions for Surgical Test Generator about what to focus on, what patterns to follow, project-specific test conventions]
|
||||
|
||||
</test-strategy>
|
||||
|
||||
---
|
||||
|
||||
<architecture>
|
||||
<instruction>
|
||||
Describe technical architecture, data models, and key design decisions.
|
||||
|
||||
Keep this section AFTER functional/structural decomposition - implementation details come after understanding structure.
|
||||
</instruction>
|
||||
|
||||
## System Components
|
||||
[Major architectural pieces and their responsibilities]
|
||||
|
||||
## Data Models
|
||||
[Core data structures, schemas, database design]
|
||||
|
||||
## Technology Stack
|
||||
[Languages, frameworks, key libraries]
|
||||
|
||||
**Decision: [Technology/Pattern]**
|
||||
- **Rationale**: [Why chosen]
|
||||
- **Trade-offs**: [What we're giving up]
|
||||
- **Alternatives considered**: [What else we looked at]
|
||||
|
||||
</architecture>
|
||||
|
||||
---
|
||||
|
||||
<risks>
|
||||
<instruction>
|
||||
Identify risks that could derail development and how to mitigate them.
|
||||
|
||||
Categories:
|
||||
- Technical risks (complexity, unknowns)
|
||||
- Dependency risks (blocking issues)
|
||||
- Scope risks (creep, underestimation)
|
||||
</instruction>
|
||||
|
||||
## Technical Risks
|
||||
**Risk**: [Description]
|
||||
- **Impact**: [High/Medium/Low - effect on project]
|
||||
- **Likelihood**: [High/Medium/Low]
|
||||
- **Mitigation**: [How to address]
|
||||
- **Fallback**: [Plan B if mitigation fails]
|
||||
|
||||
## Dependency Risks
|
||||
[External dependencies, blocking issues]
|
||||
|
||||
## Scope Risks
|
||||
[Scope creep, underestimation, unclear requirements]
|
||||
|
||||
</risks>
|
||||
|
||||
---
|
||||
|
||||
<appendix>
|
||||
## References
|
||||
[Papers, documentation, similar systems]
|
||||
|
||||
## Glossary
|
||||
[Domain-specific terms]
|
||||
|
||||
## Open Questions
|
||||
[Things to resolve during development]
|
||||
</appendix>
|
||||
|
||||
---
|
||||
|
||||
<task-master-integration>
|
||||
# How Task Master Uses This PRD
|
||||
|
||||
When you run `task-master parse-prd <file>.txt`, the parser:
|
||||
|
||||
1. **Extracts capabilities** → Main tasks
|
||||
- Each `### Capability:` becomes a top-level task
|
||||
|
||||
2. **Extracts features** → Subtasks
|
||||
- Each `#### Feature:` becomes a subtask under its capability
|
||||
|
||||
3. **Parses dependencies** → Task dependencies
|
||||
- `Depends on: [X, Y]` sets task.dependencies = ["X", "Y"]
|
||||
|
||||
4. **Orders by phases** → Task priorities
|
||||
- Phase 0 tasks = highest priority
|
||||
- Phase N tasks = lower priority, properly sequenced
|
||||
|
||||
5. **Uses test strategy** → Test generation context
|
||||
- Feeds test scenarios to Surgical Test Generator during implementation
|
||||
|
||||
**Result**: A dependency-aware task graph that can be executed in topological order.
|
||||
|
||||
## Why RPG Structure Matters
|
||||
|
||||
Traditional flat PRDs lead to:
|
||||
- ❌ Unclear task dependencies
|
||||
- ❌ Arbitrary task ordering
|
||||
- ❌ Circular dependencies discovered late
|
||||
- ❌ Poorly scoped tasks
|
||||
|
||||
RPG-structured PRDs provide:
|
||||
- ✅ Explicit dependency chains
|
||||
- ✅ Topological execution order
|
||||
- ✅ Clear module boundaries
|
||||
- ✅ Validated task graph before implementation
|
||||
|
||||
## Tips for Best Results
|
||||
|
||||
1. **Spend time on dependency graph** - This is the most valuable section for Task Master
|
||||
2. **Keep features atomic** - Each feature should be independently testable
|
||||
3. **Progressive refinement** - Start broad, use `task-master expand` to break down complex tasks
|
||||
4. **Use research mode** - `task-master parse-prd --research` leverages AI for better task generation
|
||||
</task-master-integration>
|
||||
244
STATUS.md
Normal file
244
STATUS.md
Normal file
@@ -0,0 +1,244 @@
|
||||
# labctl Platform — Implementation Status
|
||||
|
||||
## What This Document Is
|
||||
|
||||
An honest assessment of what code exists, what works, what is stubbed, and what
|
||||
hasn't been started — measured against the PRD phases.
|
||||
|
||||
---
|
||||
|
||||
## Architecture Overview (as built)
|
||||
|
||||
```
|
||||
labctl CLI ──HTTP──▶ bastion (PXE server) ← WORKING
|
||||
labctl CLI ──HTTP──▶ labd (master daemon) ← PARTIALLY WORKING
|
||||
│
|
||||
├── CockroachDB/Prisma ← SCHEMA DEFINED, NOT DEPLOYED
|
||||
├── /ws/agent WebSocket ← ACCEPTS CONNECTIONS, DOES NOT ROUTE
|
||||
└── mTLS CA ← NOT IMPLEMENTED
|
||||
|
||||
lab-agent ──WS──▶ labd ← LIBRARY CODE, NO DAEMON BINARY
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Package Inventory
|
||||
|
||||
| Package | Lines of Source | Tests | Status |
|
||||
|---------|---------------|-------|--------|
|
||||
| @lab/shared | ~200 | 0 | Complete — types, protocol, errors |
|
||||
| @lab/bastion | ~800 | 32 | **Production-ready** — PXE discovery, install, reprovision |
|
||||
| @lab/cli | ~600 | 0 (uses bastion tests) | Complete — all commands implemented |
|
||||
| @lab/labd | ~500 | 2 | Partial — routes exist, core features stubbed |
|
||||
| @lab/agent | ~300 | 0 | Library only — no daemon binary |
|
||||
|
||||
All 5 packages compile. 32 tests pass.
|
||||
|
||||
---
|
||||
|
||||
## Phase 1: Foundation
|
||||
|
||||
### DONE — Working in production
|
||||
|
||||
| Feature | Code | How It Works |
|
||||
|---------|------|-------------|
|
||||
| PXE bastion server | `src/bastion/` | Fastify HTTP + dnsmasq DHCP/TFTP. Machines PXE boot, get iPXE script from `/dispatch?mac=XX`, chain to discovery or install kickstart. State persisted to JSON file. |
|
||||
| Machine discovery | `routes/dispatch.ts`, `templates/discover.ks.ts` | Unknown MACs get a mini-kickstart that boots a RAM-only Fedora, scrapes hardware via `/proc`, `/sys`, `dmidecode`, POSTs to `/api/discover`, then reboots. No disk touch. |
|
||||
| Machine installation | `routes/api.ts`, `templates/install.ks.ts` | Queue a MAC via `POST /api/install`. Next PXE boot gets a full Kickstart with LVM partitioning (worker: longhorn LV, infra: rancher LV), SSH keys, k3s kernel prereqs, progress callbacks. |
|
||||
| Reprovision with data preservation | `commands/reprovision.ts`, `install.ks.ts` | `%pre` script detects existing LVM. Reformats `/`, `/var`, `/boot` but preserves `/home`, `/srv`, `/var/lib/longhorn`, `/var/lib/rancher`. |
|
||||
| CLI: init/provision commands | `src/cli/src/commands/` | `labctl init bastion standalone start/stop/status`, `labctl provision list/install/reprovision/forget`. All talk to bastion HTTP API. |
|
||||
| CLI: config management | `config/index.ts`, `commands/config.ts` | `labctl config list/get/set/path`. YAML config at `~/.labctl/config.yaml` with env var overrides. |
|
||||
| labd scaffold | `src/labd/` | Fastify server with health, server listing, token management routes. Prisma schema for all models. Starts with or without database. |
|
||||
| Prisma schema | `prisma/schema.prisma` | 10 models: Server, Agent, User, Role, Permission, UserRole, JoinToken, AuditLog, PulumiRun, Cluster. CockroachDB provider. |
|
||||
| Database seeding | `prisma/seed.ts` | Creates admin/viewer/operator roles with proper allow/deny permissions. Idempotent via upsert. |
|
||||
| Multi-arch builds + packaging | `nfpm.yaml`, `scripts/` | nfpm config for RPM/DEB. Bun compile for standalone binary (102MB labctl in `dist/`). |
|
||||
| Gitea CI/CD | `.gitea/` (on remote) | Lint → typecheck → test → build → publish pipeline on mysources.co.uk. |
|
||||
|
||||
### DONE — Code exists, not yet connected end-to-end
|
||||
|
||||
| Feature | Code | What's Real | What's Missing |
|
||||
|---------|------|------------|----------------|
|
||||
| lab-agent connection library | `lab-agent/src/services/connection.ts` | `AgentConnection` class: WebSocket to labd, heartbeat (10s), exponential backoff reconnect (1-30s), state machine (disconnected/connecting/connected/reconnecting), handles server-shutdown messages. | **No daemon binary.** This is a library — nothing starts it. No systemd unit. No enrollment flow. |
|
||||
| lab-agent command executor | `lab-agent/src/services/executor.ts` | `CommandExecutor` class: `spawn()` with timeout handling (SIGTERM then SIGKILL after 5s), stdout/stderr streaming via EventEmitter, stdin writing, signal forwarding. | **Not wired to WebSocket.** The executor and connection don't talk to each other. No message dispatch. |
|
||||
| Agent registry (labd) | `labd/src/services/agent-registry.ts` | `AgentRegistry`: in-memory Map tracking by serverId and hostname, lifecycle events, heartbeat updates. Singleton exported. | **Not used by /ws/agent handler.** The WebSocket handler in `server.ts` just logs messages — it doesn't call `agentRegistry.register()`. |
|
||||
| Message router (labd) | `labd/src/services/message-router.ts` | `MessageRouter`: handler registration, pending request tracking with timeouts, streaming support, log subscription, agent cleanup on disconnect. | **Not used.** `server.ts` doesn't call `messageRouter.handleMessage()`. The router exists but is dead code. |
|
||||
| Token management | `labd/src/routes/auth.ts` | Create, list, revoke join tokens. Validates one-time vs reusable, expiry, revocation. Marks tokens as used. | Token validation works. **But enrollment returns `certificatePem: null`** — no actual certificate is issued. |
|
||||
| CLI API client | `cli/src/api/client.ts` | `LabdClient` with mTLS support, typed methods for servers/tokens/health/enrollment. | Works for REST endpoints. **No CLI commands use it yet** — existing commands still talk directly to bastion HTTP. |
|
||||
| CLI WebSocket streaming | `cli/src/api/websocket.ts` | `streamExec()` and `streamLogs()` functions. | **No `labctl exec` or `labctl logs` commands exist.** The streaming code has no consumer. |
|
||||
| Zod validation | `labd/src/validation/` | Schemas for createToken, enrollment, serverFilters, createRole, permission patterns. Middleware for body/query validation. | **Not applied to routes.** The schemas and middleware exist but no route uses `preHandler: [validateBody(schema)]`. |
|
||||
| Encryption service | `labd/src/services/encryption.ts` | AES-256-GCM with scrypt key derivation. Encrypt/decrypt roundtrip. Singleton from `CA_ENCRYPTION_KEY` env var. | **Not used anywhere.** No CA key is encrypted, no kubeconfig is stored. |
|
||||
| Graceful shutdown | `labd/src/services/shutdown.ts` | SIGTERM/SIGINT handlers, agent notification, message router cleanup, DB disconnect, force exit timer. | Works but agent notification is a no-op since no agents are registered (see above). |
|
||||
| Rate limiting | `labd/src/middleware/rate-limit.ts` | `@fastify/rate-limit`: 100/min global, 10/min for enrollment, 20/min for tokens. | **Wired up in `server.ts`.** This actually works. |
|
||||
| Health checks | `labd/src/routes/health.ts` | `/healthz`, `/health`, `/health/detailed`, `/health/live`, `/health/ready`. Checks DB latency and agent count. | Works. Returns `agents: { connected: 0 }` since no agents ever register. |
|
||||
| Error hierarchy | `shared/src/errors/` | `LabError`, `NotFoundError`, `PermissionDeniedError`, `ValidationError`, `AgentNotConnectedError`. | **Not used in routes.** Routes still use inline `reply.code(404).send({error: ...})`. |
|
||||
| Table formatting | `cli/src/utils/table.ts` | `printTable`, `formatStatus`, `formatRelativeTime`, predefined column sets. | **Not used by existing commands.** `provision list` has its own inline formatting. |
|
||||
| Resource parsing | `cli/src/utils/resource.ts` | Parse `server/labmaster`, `app/kube-system/nginx` format. | **Not used.** No commands accept `type/name` arguments yet. |
|
||||
| Doctor command | `cli/src/commands/doctor.ts` | Config, cert, connectivity diagnostics. | Works standalone. |
|
||||
| Login command | `cli/src/commands/login.ts` | Generates EC keypair, prompts for token, POSTs to `/api/auth/user-enroll`. | **labd has no `/api/auth/user-enroll` endpoint.** Only `/api/auth/enroll` exists (for agents). Login will 404. |
|
||||
|
||||
### NOT DONE — Phase 1 items from PRD with no code
|
||||
|
||||
| Feature | PRD Description | Status |
|
||||
|---------|----------------|--------|
|
||||
| Certificate Authority | Built-in CA in labd. Generate root CA, sign CSRs, revoke certs, rotate. | **Nothing.** No CA code. No X.509 operations. No `@peculiar/x509` dependency. `EncryptionService` exists but it's for data-at-rest, not PKI. |
|
||||
| RBAC engine | Middleware that checks permissions on every request. Deny overrides allow. | **Nothing.** `auth.ts` middleware is a placeholder. No route checks permissions. Anyone can call any endpoint. |
|
||||
| Audit logging | Log every action with user, session, action, resource, result, duration. | **Nothing.** `AuditLog` Prisma model exists but nothing writes to it. No audit middleware. |
|
||||
| `labctl exec` | Remote command execution via labd → agent WebSocket relay. | **Nothing.** No `exec` CLI command. The executor library exists in lab-agent but isn't connected. |
|
||||
| `labctl logs` | Resource-scoped log streaming (server, app, bastion, audit). | **Nothing.** No `logs` CLI command. |
|
||||
| `labctl get servers` | List servers from labd with filters. | **Nothing.** No `get` CLI command. The API client has `getServers()` but no command calls it. |
|
||||
| Smoke test stack | `podman-compose` with CockroachDB + labd + 2 agents, testing enrollment/heartbeat/exec/RBAC. | **Nothing.** `stack/docker-compose.yml` exists but only runs bastion + CockroachDB, not labd or agents. |
|
||||
| Agent enrollment during PXE | Embed join token in kickstart, agent auto-enrolls on first boot. | **Nothing.** Kickstart installs k3s prereqs but doesn't install or start lab-agent. |
|
||||
|
||||
---
|
||||
|
||||
## Phase 2: Deployment
|
||||
|
||||
**Nothing from Phase 2 has been built.**
|
||||
|
||||
| Feature | Status |
|
||||
|---------|--------|
|
||||
| Reprovision labmaster as labmaster.ad.itaz.eu | Not done — manual operation |
|
||||
| Deploy k3s with Cilium CNI | Not done — kickstart only sets up kernel prereqs, leaves a comment "run `curl -sfL https://get.k3s.io`" |
|
||||
| Deploy CockroachDB on k3s | Not done — `docker-compose.yml` runs it in-memory for dev, no k8s manifests for CRDB |
|
||||
| Deploy labd on k3s | **K8s manifests exist** (`deploy/k8s/labd/base/`) — Deployment, Service, ConfigMap, HPA, PDB. But no CockroachDB to connect to and no TLS configured. |
|
||||
| Deploy bastion as managed app | Not done — bastion runs standalone, no Pulumi chart |
|
||||
| Auto-enroll agents during PXE | Not done — no agent install in kickstart, no token embedding |
|
||||
|
||||
---
|
||||
|
||||
## Phase 3: Infrastructure as Code
|
||||
|
||||
**Nothing from Phase 3 has been built.**
|
||||
|
||||
| Feature | Status |
|
||||
|---------|--------|
|
||||
| Module system | Not done — no `module.yaml`, no module loader |
|
||||
| Pulumi charts | Not done — no Pulumi dependency, no chart structure |
|
||||
| `labctl apps install/upgrade/rollback` | Not done — no `apps` command |
|
||||
| `labctl apply -f` | Not done — no `apply` command |
|
||||
| `kubectl proxy` (audited) | Not done — no kubectl proxy |
|
||||
| Kubeconfig store (encrypted) | `EncryptionService` exists but nothing uses it. `Cluster.kubeconfigEnc` field exists in Prisma but nothing reads/writes it. |
|
||||
|
||||
---
|
||||
|
||||
## Phase 4: Multi-Cloud
|
||||
|
||||
**Nothing from Phase 4 has been built.**
|
||||
|
||||
| Feature | Status |
|
||||
|---------|--------|
|
||||
| AWS provider | Not done |
|
||||
| Reusable join tokens for ASGs | Token model supports `reusable` type, but no AWS integration |
|
||||
| Cilium Cluster Mesh | Not done |
|
||||
| Ephemeral test environments | Not done |
|
||||
| Grafana Loki | Not done |
|
||||
|
||||
---
|
||||
|
||||
## Infrastructure Files
|
||||
|
||||
| File | Status |
|
||||
|------|--------|
|
||||
| `Dockerfile.labd` | Exists. Multi-stage Alpine build. Would work if you `docker build` it. |
|
||||
| `Dockerfile.bastion` | Exists. Multi-stage Fedora build. Would work. |
|
||||
| `.dockerignore` | Exists. |
|
||||
| `deploy/k8s/labd/base/` | Kustomize manifests for labd (Deployment, Service, ConfigMap, HPA, PDB). Points at a non-existent CockroachDB and has no TLS. |
|
||||
| `stack/docker-compose.yml` | Runs bastion + CockroachDB for local dev. Works. |
|
||||
| `nfpm.yaml` | RPM/DEB packaging config. Works with `nfpm pkg`. |
|
||||
|
||||
---
|
||||
|
||||
## The Disconnection Problem
|
||||
|
||||
The core issue is that many services were built in isolation but never wired together:
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────┐
|
||||
│ BUILT BUT NOT CONNECTED │
|
||||
│ │
|
||||
│ AgentConnection ──✗──▶ /ws/agent handler │
|
||||
│ CommandExecutor ──✗──▶ MessageRouter │
|
||||
│ MessageRouter ──✗──▶ /ws/agent handler │
|
||||
│ AgentRegistry ──✗──▶ /ws/agent handler │
|
||||
│ Zod schemas ──✗──▶ Route preHandlers │
|
||||
│ Error classes ──✗──▶ Route error handling │
|
||||
│ LabdClient ──✗──▶ CLI commands (get/exec/logs) │
|
||||
│ Table formatting──✗──▶ CLI commands │
|
||||
│ Resource parsing──✗──▶ CLI commands │
|
||||
│ EncryptionService──✗──▶ CA / kubeconfig storage │
|
||||
│ Login command ──✗──▶ /api/auth/user-enroll (missing) │
|
||||
│ Audit logging ──✗──▶ Any middleware │
|
||||
│ RBAC engine ──✗──▶ Any middleware │
|
||||
└─────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## What Actually Works End-to-End Today
|
||||
|
||||
1. **PXE boot a bare-metal machine:**
|
||||
```
|
||||
labctl init bastion standalone start
|
||||
# Machine PXE boots → discovered automatically
|
||||
labctl provision list
|
||||
labctl provision install AA:BB:CC:DD:EE:FF worker-1 --role worker
|
||||
# Machine reboots → installs Fedora → reports complete
|
||||
```
|
||||
|
||||
2. **Manage bastion lifecycle:**
|
||||
```
|
||||
labctl init bastion standalone status
|
||||
labctl init bastion standalone stop
|
||||
```
|
||||
|
||||
3. **Start labd (without database):**
|
||||
```
|
||||
LABD_PORT=3100 tsx src/labd/src/main.ts
|
||||
# Starts with stub DB, health endpoint works, token/server routes return errors
|
||||
```
|
||||
|
||||
4. **Start labd (with CockroachDB):**
|
||||
```
|
||||
docker-compose -f stack/docker-compose.yml up cockroachdb
|
||||
DATABASE_URL=postgresql://root@localhost:26257/lab tsx src/labd/src/main.ts
|
||||
# Token creation/listing/revocation works
|
||||
# Server listing works (empty until agents register)
|
||||
```
|
||||
|
||||
5. **CLI diagnostics:**
|
||||
```
|
||||
labctl doctor
|
||||
labctl config list
|
||||
labctl version
|
||||
```
|
||||
|
||||
That's it. No agent communication, no remote exec, no log streaming, no RBAC, no certificates.
|
||||
|
||||
---
|
||||
|
||||
## Recommended Next Steps (to make Phase 1 actually work)
|
||||
|
||||
### Priority 1: Wire up the agent connection
|
||||
1. Update `/ws/agent` handler to use `agentRegistry.register()` and `messageRouter.handleMessage()`
|
||||
2. Create lab-agent daemon binary that uses `AgentConnection` + `CommandExecutor`
|
||||
3. Create systemd unit for lab-agent
|
||||
|
||||
### Priority 2: Certificate Authority
|
||||
1. Add `@peculiar/x509` dependency
|
||||
2. Implement CA service: generate root CA, sign CSRs
|
||||
3. Wire enrollment route to actually sign and return certificates
|
||||
4. Store CA key encrypted using `EncryptionService`
|
||||
|
||||
### Priority 3: RBAC + Audit
|
||||
1. Create RBAC middleware that checks `Permission` table
|
||||
2. Create audit middleware that writes to `AuditLog`
|
||||
3. Apply both to all routes
|
||||
|
||||
### Priority 4: CLI commands for labd
|
||||
1. `labctl get servers` using `LabdClient.getServers()`
|
||||
2. `labctl exec server/<name>` using `streamExec()`
|
||||
3. `labctl logs server/<name>` using `streamLogs()`
|
||||
|
||||
### Priority 5: Smoke test stack
|
||||
1. Update `docker-compose.yml` to include labd + 2 agents
|
||||
2. Write integration tests for enrollment → heartbeat → exec → logs
|
||||
575
bastion.sh
575
bastion.sh
@@ -27,6 +27,7 @@ HTTP_PORT="${HTTP_PORT:-8080}"
|
||||
TIMEZONE="${TIMEZONE:-Europe/London}"
|
||||
LOCALE="${LOCALE:-en_GB.UTF-8}"
|
||||
BASTION_DIR="${BASTION_DIR:-/tmp/lab-bastion}"
|
||||
DOMAIN="${DOMAIN:-ad.itaz.eu}" # internal domain for hostnames
|
||||
DHCP_MODE="${DHCP_MODE:-proxy}" # proxy (alongside existing DHCP) or full (bastion IS the DHCP server)
|
||||
DHCP_RANGE_START="${DHCP_RANGE_START:-}" # only for full mode, auto-derived if empty
|
||||
DHCP_RANGE_END="${DHCP_RANGE_END:-}"
|
||||
@@ -45,13 +46,19 @@ CMD="${1:-serve}"
|
||||
|
||||
case "$CMD" in
|
||||
install)
|
||||
[[ $# -ge 3 ]] || { echo "Usage: bastion.sh install <mac> <hostname> [--disk <dev>]"; exit 1; }
|
||||
[[ $# -ge 3 ]] || { echo "Usage: bastion.sh install <mac> <hostname> [--role worker|infra] [--disk <dev>]"; exit 1; }
|
||||
MAC="$2"
|
||||
HOSTNAME="$3"
|
||||
DISK="${5:-}" # --disk <dev>
|
||||
PAYLOAD="{\"mac\":\"$MAC\",\"hostname\":\"$HOSTNAME\""
|
||||
[[ -n "$DISK" ]] && PAYLOAD="$PAYLOAD,\"disk\":\"$DISK\""
|
||||
PAYLOAD="$PAYLOAD}"
|
||||
shift 3
|
||||
DISK="" ROLE="worker"
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--disk) DISK="$2"; shift 2 ;;
|
||||
--role) ROLE="$2"; shift 2 ;;
|
||||
*) echo "Unknown option: $1"; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
PAYLOAD=$(python3 -c "import json; print(json.dumps({k:v for k,v in {'mac':'$MAC','hostname':'$HOSTNAME','disk':'$DISK','role':'$ROLE'}.items() if v}))")
|
||||
RESULT=$(curl -sf -X POST "http://localhost:${HTTP_PORT}/api/install" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$PAYLOAD" 2>&1) || die "Cannot reach bastion at localhost:${HTTP_PORT}. Is it running?"
|
||||
@@ -93,16 +100,62 @@ print()
|
||||
print('\033[1mINSTALLED\033[0m')
|
||||
if installed:
|
||||
for mac, info in installed.items():
|
||||
print(f' {mac:<20} → {info.get(\"hostname\",\"?\")} ({info.get(\"installed_at\",\"?\")})')
|
||||
ip = info.get('ip', '')
|
||||
ip_str = f' ip={ip}' if ip else ''
|
||||
print(f' {mac:<20} → {info.get(\"hostname\",\"?\")} role={info.get(\"role\",\"?\")}{ip_str} ({info.get(\"installed_at\",\"?\")})')
|
||||
else:
|
||||
print(' (none)')
|
||||
print()
|
||||
" 2>/dev/null || echo "$RESULT"
|
||||
exit 0
|
||||
;;
|
||||
reprovision)
|
||||
[[ $# -ge 3 ]] || { echo "Usage: bastion.sh reprovision <mac> <hostname> [--role worker|infra] [--disk <dev>]"; exit 1; }
|
||||
MAC="$2"
|
||||
HOSTNAME="$3"
|
||||
shift 3
|
||||
DISK="" ROLE="worker"
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--disk) DISK="$2"; shift 2 ;;
|
||||
--role) ROLE="$2"; shift 2 ;;
|
||||
*) echo "Unknown option: $1"; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Queue the install
|
||||
PAYLOAD=$(python3 -c "import json; print(json.dumps({k:v for k,v in {'mac':'$MAC','hostname':'$HOSTNAME','disk':'$DISK','role':'$ROLE'}.items() if v}))")
|
||||
RESULT=$(curl -sf -X POST "http://localhost:${HTTP_PORT}/api/install" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$PAYLOAD" 2>&1) || die "Cannot reach bastion at localhost:${HTTP_PORT}. Is it running?"
|
||||
echo "$RESULT" | python3 -m json.tool 2>/dev/null || echo "$RESULT"
|
||||
|
||||
# Try to find IP from installed state and SSH in to trigger PXE reboot
|
||||
IP=$(curl -sf "http://localhost:${HTTP_PORT}/api/machines" 2>/dev/null | \
|
||||
python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('installed',{}).get('${MAC}',{}).get('ip',''))" 2>/dev/null || echo "")
|
||||
ADMIN_USER="${SUDO_USER:-$USER}"
|
||||
[[ "$ADMIN_USER" == "root" ]] && ADMIN_USER=""
|
||||
|
||||
if [[ -n "$IP" && -n "$ADMIN_USER" ]]; then
|
||||
echo ""
|
||||
echo "Attempting SSH reboot into PXE ($ADMIN_USER@$IP)..."
|
||||
ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 "$ADMIN_USER@$IP" \
|
||||
'sudo efibootmgr 2>/dev/null; PXE_ENTRY=$(sudo efibootmgr | grep -iE "pxe|network|ipv4" | head -1 | grep -oP "Boot\K[0-9A-F]+"); if [ -n "$PXE_ENTRY" ]; then sudo efibootmgr --bootnext "$PXE_ENTRY" && echo "PXE set as next boot" && sudo reboot; else echo "No PXE boot entry found, rebooting anyway..." && sudo reboot; fi' 2>&1 && {
|
||||
echo ""
|
||||
echo "Machine is rebooting into PXE. Install will start automatically."
|
||||
} || {
|
||||
echo ""
|
||||
echo "SSH failed. Reboot the machine manually into PXE (e.g. via IPMI/KVM)."
|
||||
}
|
||||
else
|
||||
echo ""
|
||||
echo "No IP known for this machine. Reboot it manually into PXE."
|
||||
fi
|
||||
exit 0
|
||||
;;
|
||||
serve) ;; # continue below
|
||||
*)
|
||||
echo "Usage: bastion.sh [serve|install <mac> <hostname>|list]"
|
||||
echo "Usage: bastion.sh [serve|install|reprovision|list]"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
@@ -111,6 +164,17 @@ esac
|
||||
# SERVE MODE — start the bastion
|
||||
# ══════════════════════════════════════════════════════════════════
|
||||
|
||||
# ──── Kill old instances ──────────────────────────────────────────
|
||||
# Find and kill any previous bastion dnsmasq and HTTP server
|
||||
OLD_DNSMASQ=$(pgrep -f 'dnsmasq --no-daemon --conf-file=/tmp/lab-bastion' 2>/dev/null || true)
|
||||
OLD_HTTP=$(pgrep -f 'python3 /tmp/lab-bastion/server.py' 2>/dev/null || true)
|
||||
if [[ -n "$OLD_DNSMASQ" || -n "$OLD_HTTP" ]]; then
|
||||
warn "Killing old bastion processes..."
|
||||
[[ -n "$OLD_DNSMASQ" ]] && kill $OLD_DNSMASQ 2>/dev/null && log " Stopped old dnsmasq (PID $OLD_DNSMASQ)"
|
||||
[[ -n "$OLD_HTTP" ]] && kill $OLD_HTTP 2>/dev/null && log " Stopped old HTTP server (PID $OLD_HTTP)"
|
||||
sleep 1
|
||||
fi
|
||||
|
||||
# ──── Preflight ───────────────────────────────────────────────────
|
||||
[[ $EUID -eq 0 ]] || die "Must run as root (need DHCP/TFTP ports). Use: sudo bash bastion.sh"
|
||||
|
||||
@@ -143,23 +207,59 @@ GATEWAY="$(ip route | awk '/default/ {print $3; exit}')"
|
||||
[[ -n "$SERVER_IP" ]] || die "Cannot detect IP on interface $IFACE"
|
||||
log "Interface: ${BOLD}$IFACE${NC} IP: ${BOLD}$SERVER_IP${NC} Network: ${BOLD}$NETWORK${NC}"
|
||||
|
||||
# ──── Auto-detect SSH pubkey ──────────────────────────────────────
|
||||
SSH_PUBKEY="${SSH_PUBKEY:-}"
|
||||
if [[ -z "$SSH_PUBKEY" ]]; then
|
||||
# ──── Auto-detect SSH keys ───────────────────────────────────────
|
||||
REAL_HOME="${HOME}"
|
||||
[[ -n "${SUDO_USER:-}" ]] && REAL_HOME="$(getent passwd "$SUDO_USER" | cut -d: -f6)"
|
||||
for keyfile in "$REAL_HOME/.ssh/id_ed25519.pub" "$REAL_HOME/.ssh/id_rsa.pub" "$REAL_HOME/.ssh/id_ecdsa.pub"; do
|
||||
[[ -f "$keyfile" ]] && { SSH_PUBKEY="$keyfile"; break; }
|
||||
done
|
||||
|
||||
SSH_KEYS_CONTENT=""
|
||||
SSH_KEY_SOURCE=""
|
||||
|
||||
# Collect SSH keys from authorized_keys + local pubkeys (deduplicated)
|
||||
SSH_KEY_SOURCE=""
|
||||
if [[ -f "$REAL_HOME/.ssh/authorized_keys" ]]; then
|
||||
SSH_KEYS_CONTENT="$(grep -v '^#' "$REAL_HOME/.ssh/authorized_keys" | grep -v '^$')"
|
||||
SSH_KEY_SOURCE="$REAL_HOME/.ssh/authorized_keys"
|
||||
fi
|
||||
|
||||
SSH_KEY_CONTENT=""
|
||||
if [[ -n "$SSH_PUBKEY" && -f "$SSH_PUBKEY" ]]; then
|
||||
SSH_KEY_CONTENT="$(cat "$SSH_PUBKEY")"
|
||||
log "SSH key: ${BOLD}$SSH_PUBKEY${NC}"
|
||||
# Also include local pubkey files (they may not be in authorized_keys)
|
||||
for keyfile in "$REAL_HOME/.ssh/id_ed25519.pub" "$REAL_HOME/.ssh/id_rsa.pub" "$REAL_HOME/.ssh/id_ecdsa.pub"; do
|
||||
if [[ -f "$keyfile" ]]; then
|
||||
KEY_DATA="$(cat "$keyfile")"
|
||||
KEY_FP="$(awk '{print $2}' "$keyfile")"
|
||||
if [[ -n "$SSH_KEYS_CONTENT" ]]; then
|
||||
# Add only if not already present
|
||||
if ! echo "$SSH_KEYS_CONTENT" | grep -qF "$KEY_FP"; then
|
||||
SSH_KEYS_CONTENT="$SSH_KEYS_CONTENT"$'\n'"$KEY_DATA"
|
||||
SSH_KEY_SOURCE="${SSH_KEY_SOURCE} + $keyfile"
|
||||
fi
|
||||
else
|
||||
warn "No SSH public key found. Set SSH_PUBKEY=/path/to/key.pub"
|
||||
warn "Install mode will use root password 'changeme' as fallback."
|
||||
SSH_KEYS_CONTENT="$KEY_DATA"
|
||||
SSH_KEY_SOURCE="$keyfile"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# Priority 3: generate a keypair
|
||||
if [[ -z "$SSH_KEYS_CONTENT" ]]; then
|
||||
GENERATED_KEY="$BASTION_DIR/bastion_ed25519"
|
||||
if [[ ! -f "$GENERATED_KEY" ]]; then
|
||||
log "No SSH keys found — generating ed25519 keypair..."
|
||||
ssh-keygen -t ed25519 -f "$GENERATED_KEY" -N "" -C "bastion-generated@$(hostname)" >/dev/null 2>&1
|
||||
fi
|
||||
SSH_KEYS_CONTENT="$(cat "${GENERATED_KEY}.pub")"
|
||||
SSH_KEY_SOURCE="$GENERATED_KEY (generated)"
|
||||
warn "Using generated keypair: ${BOLD}$GENERATED_KEY${NC}"
|
||||
warn "Save this private key — it's the only way to access installed machines."
|
||||
fi
|
||||
|
||||
SSH_KEY_COUNT="$(echo "$SSH_KEYS_CONTENT" | wc -l)"
|
||||
log "SSH keys: ${BOLD}${SSH_KEY_COUNT} key(s)${NC} from ${BOLD}${SSH_KEY_SOURCE}${NC}"
|
||||
|
||||
# ──── Detect admin username ──────────────────────────────────────
|
||||
ADMIN_USER="${SUDO_USER:-$USER}"
|
||||
[[ "$ADMIN_USER" == "root" ]] && ADMIN_USER=""
|
||||
if [[ -n "$ADMIN_USER" ]]; then
|
||||
log "Admin user: ${BOLD}${ADMIN_USER}${NC} (will be created on installed machines)"
|
||||
fi
|
||||
|
||||
# ──── Prepare directories ────────────────────────────────────────
|
||||
@@ -264,13 +364,8 @@ FEDORA_MIRROR="https://download.fedoraproject.org/pub/fedora/linux/releases/${FE
|
||||
log "Preparing boot artifacts (Fedora ${FEDORA_VERSION} ${ARCH})..."
|
||||
copy_if_missing "/usr/share/ipxe/undionly.kpxe" "$TFTPDIR/undionly.kpxe" "iPXE BIOS"
|
||||
|
||||
# UEFI x86_64: two-stage PXE boot
|
||||
# Stage 1: tiny PXE loader stub (<20KB) fits in constrained TFTP buffers
|
||||
# Stage 2: full iPXE binary downloaded via UEFI PXE protocol (no size limit)
|
||||
PXELOADER_SRC="$(cd "$(dirname "$0")" && pwd)/pxeloader.c"
|
||||
[[ -f "$PXELOADER_SRC" ]] || PXELOADER_SRC="$(dirname "${BASH_SOURCE[0]}")/pxeloader.c"
|
||||
build_pxeloader "$PXELOADER_SRC" "$TFTPDIR/ipxe.efi" "PXE loader stub (stage 1)"
|
||||
copy_if_missing "/usr/share/ipxe/ipxe-snponly-x86_64.efi" "$TFTPDIR/ipxe-real.efi" "iPXE UEFI x86_64 (stage 2)"
|
||||
# UEFI x86_64: serve iPXE directly via TFTP (UEFI has no TFTP size limit)
|
||||
copy_if_missing "/usr/share/ipxe/ipxe-snponly-x86_64.efi" "$TFTPDIR/ipxe.efi" "iPXE UEFI x86_64"
|
||||
|
||||
copy_if_missing "/usr/share/ipxe/arm64-efi/snponly.efi" "$TFTPDIR/ipxe-arm64.efi" "iPXE UEFI arm64"
|
||||
download "${FEDORA_MIRROR}/images/pxeboot/vmlinuz" "$HTTPDIR/vmlinuz" "Fedora kernel"
|
||||
@@ -375,25 +470,29 @@ except Exception as e:
|
||||
"
|
||||
fi
|
||||
|
||||
# ── Power off — do NOT let Anaconda proceed ──
|
||||
# ── Reboot — do NOT let Anaconda proceed ──
|
||||
echo ""
|
||||
echo "=== Discovery complete, powering off ==="
|
||||
echo "=== Discovery complete, rebooting ==="
|
||||
echo ""
|
||||
sleep 3
|
||||
echo 1 > /proc/sys/kernel/sysrq
|
||||
echo o > /proc/sysrq-trigger
|
||||
echo b > /proc/sysrq-trigger
|
||||
sleep 5
|
||||
poweroff -f
|
||||
reboot -f
|
||||
|
||||
%end
|
||||
|
||||
# Anaconda should never get here, but just in case:
|
||||
poweroff
|
||||
reboot
|
||||
DISCOVER_KS
|
||||
|
||||
# Patch in the bastion URL
|
||||
sed -i "s|__BASTION_URL__|http://${SERVER_IP}:${HTTP_PORT}|g" "$HTTPDIR/discover.ks"
|
||||
|
||||
# Save SSH keys and admin user for the HTTP server to use
|
||||
echo "$SSH_KEYS_CONTENT" > "$BASTION_DIR/ssh_keys"
|
||||
echo "$ADMIN_USER" > "$BASTION_DIR/admin_user"
|
||||
|
||||
# ──── Generate iPXE boot script ───────────────────────────────────
|
||||
# Initial iPXE script chains to /dispatch with the MAC, so the server
|
||||
# can route to discover or install mode per machine.
|
||||
@@ -431,9 +530,17 @@ SERVER_IP = sys.argv[3]
|
||||
HTTP_PORT = int(sys.argv[4])
|
||||
FEDORA_VER = sys.argv[5]
|
||||
FEDORA_MIRROR = sys.argv[6]
|
||||
SSH_KEY = sys.argv[7] if len(sys.argv) > 7 else ""
|
||||
SSH_KEYS_FILE = sys.argv[7] if len(sys.argv) > 7 else ""
|
||||
TIMEZONE = sys.argv[8] if len(sys.argv) > 8 else "Europe/London"
|
||||
LOCALE = sys.argv[9] if len(sys.argv) > 9 else "en_GB.UTF-8"
|
||||
DOMAIN = sys.argv[10] if len(sys.argv) > 10 else "ad.itaz.eu"
|
||||
ADMIN_USER = sys.argv[11] if len(sys.argv) > 11 else ""
|
||||
|
||||
# Load SSH keys from file
|
||||
SSH_KEYS = []
|
||||
if SSH_KEYS_FILE and os.path.isfile(SSH_KEYS_FILE):
|
||||
with open(SSH_KEYS_FILE) as f:
|
||||
SSH_KEYS = [l.strip() for l in f if l.strip() and not l.startswith('#')]
|
||||
|
||||
# ── State management (file-backed, lock-protected) ───────────────
|
||||
|
||||
@@ -452,19 +559,66 @@ def save_state(state):
|
||||
|
||||
# ── Kickstart generation ─────────────────────────────────────────
|
||||
|
||||
def generate_kickstart(hostname, disk="", ssh_key=""):
|
||||
disk_cmds = "clearpart --all --initlabel\nautopart --type=plain"
|
||||
if disk:
|
||||
disk_cmds = f"ignoredisk --only-use={disk}\nclearpart --all --initlabel --drives={disk}\nautopart --type=plain"
|
||||
def generate_kickstart(hostname, disk="", ssh_keys=None, domain="", role="worker", admin_user=""):
|
||||
ssh_keys = ssh_keys or []
|
||||
fqdn = f"{hostname}.{domain}" if domain else hostname
|
||||
vg = "labvg"
|
||||
|
||||
if ssh_key:
|
||||
auth = f'rootpw --lock\nsshkey --username=root "{ssh_key}"'
|
||||
# ── Auth ──
|
||||
if ssh_keys:
|
||||
auth = f'rootpw --lock\nsshkey --username=root "{ssh_keys[0]}"'
|
||||
else:
|
||||
auth = 'rootpw --plaintext changeme'
|
||||
|
||||
return f"""# Lab Bastion — Fedora {FEDORA_VER} install
|
||||
# ── Admin user (kickstart directive) ──
|
||||
user_directive = ""
|
||||
if admin_user:
|
||||
user_directive = f'user --name={admin_user} --groups=wheel --lock'
|
||||
|
||||
# ── SSH keys for %post (root + admin user) ──
|
||||
all_keys = "\n".join(ssh_keys)
|
||||
ssh_post_block = ""
|
||||
if ssh_keys:
|
||||
ssh_post_block = f"""
|
||||
# Set up SSH keys for root
|
||||
mkdir -p /root/.ssh && chmod 700 /root/.ssh
|
||||
cat > /root/.ssh/authorized_keys << 'SSHKEYS'
|
||||
{all_keys}
|
||||
SSHKEYS
|
||||
chmod 600 /root/.ssh/authorized_keys"""
|
||||
|
||||
if admin_user and ssh_keys:
|
||||
ssh_post_block += f"""
|
||||
|
||||
# Set up SSH keys for {admin_user}
|
||||
ADMIN_HOME=$(getent passwd {admin_user} | cut -d: -f6)
|
||||
mkdir -p "$ADMIN_HOME/.ssh" && chmod 700 "$ADMIN_HOME/.ssh"
|
||||
cp /root/.ssh/authorized_keys "$ADMIN_HOME/.ssh/authorized_keys"
|
||||
chown -R {admin_user}:{admin_user} "$ADMIN_HOME/.ssh"
|
||||
chmod 600 "$ADMIN_HOME/.ssh/authorized_keys"
|
||||
|
||||
# Fix SELinux contexts for SSH
|
||||
restorecon -R /root/.ssh "$ADMIN_HOME/.ssh" 2>/dev/null || true
|
||||
|
||||
# Passwordless sudo for {admin_user}
|
||||
echo '{admin_user} ALL=(ALL) NOPASSWD: ALL' > /etc/sudoers.d/{admin_user}
|
||||
chmod 440 /etc/sudoers.d/{admin_user}"""
|
||||
|
||||
# ── Determine disk (auto-detect first NVMe/SDA if not specified) ──
|
||||
disk_line = f'DISK="{disk}"' if disk else '''
|
||||
DISK=""
|
||||
for d in /dev/nvme0n1 /dev/sda /dev/vda; do
|
||||
[ -b "$d" ] && { DISK="$(basename $d)"; break; }
|
||||
done
|
||||
[ -z "$DISK" ] && { echo "ERROR: no disk found"; exit 1; }
|
||||
'''
|
||||
|
||||
# ── LVM layout sizes (MB) ──
|
||||
has_longhorn = (role == "worker")
|
||||
|
||||
return f"""# Lab Bastion -- Fedora {FEDORA_VER} server install
|
||||
# Generated: {datetime.now().isoformat()}
|
||||
# Target: {hostname}
|
||||
# Target: {fqdn} (role={role})
|
||||
|
||||
text
|
||||
reboot
|
||||
@@ -473,39 +627,266 @@ lang {LOCALE}
|
||||
keyboard uk
|
||||
timezone {TIMEZONE} --utc
|
||||
|
||||
network --bootproto=dhcp --activate --hostname={hostname}
|
||||
network --bootproto=dhcp --activate --hostname={fqdn}
|
||||
|
||||
{auth}
|
||||
|
||||
{disk_cmds}
|
||||
{user_directive}
|
||||
|
||||
bootloader --append="console=tty0 console=ttyS0,115200n8"
|
||||
|
||||
url --mirrorlist=https://mirrors.fedoraproject.org/mirrorlist?repo=fedora-$releasever&arch=$basearch
|
||||
|
||||
# Partitioning is generated dynamically by %pre (supports longhorn preservation)
|
||||
%include /tmp/part.ks
|
||||
|
||||
%pre --log=/tmp/pre-partition.log
|
||||
#!/bin/bash
|
||||
set -x
|
||||
|
||||
# Progress callback helper
|
||||
bastion_progress() {{
|
||||
local stage="$1" detail="${{2:-}}"
|
||||
local mac=$(ip link show | awk '/ether/ && !/00:00:00:00/ {{print $2; exit}}')
|
||||
curl -sf -X POST "http://{SERVER_IP}:{HTTP_PORT}/api/progress" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{{\\"mac\\":\\"$mac\\",\\"stage\\":\\"$stage\\",\\"detail\\":\\"$detail\\"}}" 2>/dev/null || true
|
||||
}}
|
||||
|
||||
bastion_progress "partitioning" "preparing disk layout"
|
||||
|
||||
VG="{vg}"
|
||||
{disk_line}
|
||||
|
||||
REPROVISION=no
|
||||
|
||||
# Check if VG exists (reprovision scenario)
|
||||
if vgs $VG &>/dev/null; then
|
||||
echo "=== Existing VG found - reprovision mode ==="
|
||||
REPROVISION=yes
|
||||
|
||||
# Detect which data LVs to preserve
|
||||
PRESERVE_LONGHORN=no; PRESERVE_SRV=no; PRESERVE_HOME=no
|
||||
lvs $VG/longhorn &>/dev/null && PRESERVE_LONGHORN=yes
|
||||
lvs $VG/srv &>/dev/null && PRESERVE_SRV=yes
|
||||
lvs $VG/home &>/dev/null && PRESERVE_HOME=yes
|
||||
|
||||
echo "Preserving: longhorn=$PRESERVE_LONGHORN srv=$PRESERVE_SRV home=$PRESERVE_HOME"
|
||||
|
||||
# Remove only OS logical volumes (keep data LVs)
|
||||
for lv in root var varlog swap; do
|
||||
lvremove -f $VG/$lv 2>/dev/null || true
|
||||
done
|
||||
fi
|
||||
|
||||
if [ "$REPROVISION" = "yes" ]; then
|
||||
# Find existing boot partitions by type
|
||||
EFI_PART=$(blkid -t TYPE=vfat -o device /dev/${{DISK}}* 2>/dev/null | head -1)
|
||||
BOOT_PART=$(blkid -t TYPE=ext4 -o device /dev/${{DISK}}* 2>/dev/null | head -1)
|
||||
EFI_PART=${{EFI_PART:-/dev/${{DISK}}1}}
|
||||
BOOT_PART=${{BOOT_PART:-/dev/${{DISK}}2}}
|
||||
echo "Reusing EFI=$EFI_PART BOOT=$BOOT_PART"
|
||||
|
||||
# Build partition config reusing existing PV/VG
|
||||
cat > /tmp/part.ks << PARTEOF
|
||||
ignoredisk --only-use=$DISK
|
||||
clearpart --none
|
||||
part /boot/efi --onpart=$EFI_PART --fstype=efi
|
||||
part /boot --onpart=$BOOT_PART --fstype=ext4
|
||||
volgroup {vg} --useexisting --noformat
|
||||
logvol swap --vgname={vg} --name=swap --fstype=swap --size=27648
|
||||
logvol / --vgname={vg} --name=root --fstype=xfs --size=33792
|
||||
logvol /var --vgname={vg} --name=var --fstype=xfs --size=102400
|
||||
logvol /var/log --vgname={vg} --name=varlog --fstype=xfs --size=10240
|
||||
PARTEOF
|
||||
|
||||
# Preserve or recreate data LVs
|
||||
if [ "$PRESERVE_HOME" = "yes" ]; then
|
||||
echo "logvol /home --vgname={vg} --name=home --useexisting --noformat" >> /tmp/part.ks
|
||||
else
|
||||
echo "logvol /home --vgname={vg} --name=home --fstype=xfs --size=10240" >> /tmp/part.ks
|
||||
fi
|
||||
|
||||
if [ "$PRESERVE_SRV" = "yes" ]; then
|
||||
echo "logvol /srv --vgname={vg} --name=srv --useexisting --noformat" >> /tmp/part.ks
|
||||
else
|
||||
echo "logvol /srv --vgname={vg} --name=srv --fstype=xfs --size=20480" >> /tmp/part.ks
|
||||
fi
|
||||
|
||||
if [ "$PRESERVE_LONGHORN" = "yes" ]; then
|
||||
echo "logvol /var/lib/longhorn --vgname={vg} --name=longhorn --useexisting --noformat" >> /tmp/part.ks
|
||||
fi
|
||||
|
||||
else
|
||||
# Fresh install
|
||||
cat > /tmp/part.ks << PARTEOF
|
||||
ignoredisk --only-use=$DISK
|
||||
clearpart --all --initlabel --drives=$DISK
|
||||
part /boot/efi --fstype=efi --size=600 --ondisk=$DISK
|
||||
part /boot --fstype=ext4 --size=3072 --ondisk=$DISK
|
||||
part pv.01 --size=1 --grow --ondisk=$DISK
|
||||
volgroup {vg} pv.01
|
||||
logvol swap --vgname={vg} --name=swap --fstype=swap --size=27648
|
||||
logvol / --vgname={vg} --name=root --fstype=xfs --size=33792
|
||||
logvol /var --vgname={vg} --name=var --fstype=xfs --size=102400
|
||||
logvol /var/log --vgname={vg} --name=varlog --fstype=xfs --size=10240
|
||||
logvol /home --vgname={vg} --name=home --fstype=xfs --size=10240
|
||||
logvol /srv --vgname={vg} --name=srv --fstype=xfs --size=20480
|
||||
{"logvol /var/lib/longhorn --vgname=" + vg + " --name=longhorn --fstype=xfs --grow --size=1" if has_longhorn else ""}
|
||||
PARTEOF
|
||||
fi
|
||||
|
||||
echo "=== Generated partition config ==="
|
||||
cat /tmp/part.ks
|
||||
echo "==================================="
|
||||
|
||||
bastion_progress "partitioning" "layout ready, starting install"
|
||||
|
||||
%end
|
||||
|
||||
%packages
|
||||
@core
|
||||
@server-product
|
||||
openssh-server
|
||||
vim-enhanced
|
||||
tmux
|
||||
git
|
||||
curl
|
||||
wget
|
||||
python3
|
||||
lshw
|
||||
dmidecode
|
||||
dnf-plugins-core
|
||||
|
||||
# Networking and diagnostics
|
||||
NetworkManager
|
||||
bind-utils
|
||||
net-tools
|
||||
iproute
|
||||
iputils
|
||||
traceroute
|
||||
tcpdump
|
||||
htop
|
||||
iotop
|
||||
strace
|
||||
jq
|
||||
|
||||
# k3s prerequisites
|
||||
container-selinux
|
||||
iptables-nft
|
||||
nftables
|
||||
policycoreutils-python-utils
|
||||
chrony
|
||||
tar
|
||||
socat
|
||||
conntrack-tools
|
||||
ethtool
|
||||
|
||||
# Boot management
|
||||
efibootmgr
|
||||
|
||||
# Puppet prerequisites
|
||||
ruby
|
||||
ruby-libs
|
||||
|
||||
# Exclude desktop
|
||||
-@workstation-product
|
||||
-@gnome-desktop
|
||||
-gnome-shell
|
||||
-gdm
|
||||
-PackageKit
|
||||
-PackageKit-glib
|
||||
%end
|
||||
|
||||
%post --log=/root/bastion-post-install.log
|
||||
#!/bin/bash
|
||||
set -x
|
||||
|
||||
# Progress callback helper
|
||||
bastion_progress() {{
|
||||
local stage="$1" detail="${{2:-}}"
|
||||
local mac=$(ip link show | awk '/ether/ && !/00:00:00:00/ {{print $2; exit}}')
|
||||
curl -sf -X POST "http://{SERVER_IP}:{HTTP_PORT}/api/progress" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{{\\"mac\\":\\"$mac\\",\\"stage\\":\\"$stage\\",\\"detail\\":\\"$detail\\"}}" 2>/dev/null || true
|
||||
}}
|
||||
|
||||
bastion_progress "post-install" "configuring system"
|
||||
|
||||
# ── SSH ──
|
||||
systemctl enable --now sshd
|
||||
sed -i 's/^#\\?PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config
|
||||
sed -i 's/^#\\?PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
|
||||
hostnamectl set-hostname {hostname}
|
||||
echo "Provisioned by lab-bastion on $(date -Iseconds)" > /etc/lab-provisioned
|
||||
echo "# Lab node — puppet enrollment pending" > /root/README
|
||||
{ssh_post_block}
|
||||
|
||||
# ── Hostname and domain ──
|
||||
hostnamectl set-hostname {fqdn}
|
||||
|
||||
# ── tmpfs for /tmp ──
|
||||
echo "tmpfs /tmp tmpfs defaults,noatime,nosuid,nodev,size=4G 0 0" >> /etc/fstab
|
||||
|
||||
# ── Kernel modules for k3s ──
|
||||
cat > /etc/modules-load.d/k3s.conf << 'MODULES'
|
||||
br_netfilter
|
||||
overlay
|
||||
ip_conntrack
|
||||
MODULES
|
||||
modprobe br_netfilter || true
|
||||
modprobe overlay || true
|
||||
|
||||
# ── Sysctl for k3s networking ──
|
||||
cat > /etc/sysctl.d/90-k3s.conf << 'SYSCTL'
|
||||
net.bridge.bridge-nf-call-iptables = 1
|
||||
net.bridge.bridge-nf-call-ip6tables = 1
|
||||
net.ipv4.ip_forward = 1
|
||||
net.ipv6.conf.all.forwarding = 1
|
||||
fs.inotify.max_user_instances = 524288
|
||||
fs.inotify.max_user_watches = 1048576
|
||||
SYSCTL
|
||||
sysctl --system || true
|
||||
|
||||
# ── Disable firewalld (k3s manages its own iptables rules) ──
|
||||
systemctl disable --now firewalld || true
|
||||
|
||||
# ── Enable chronyd for time sync ──
|
||||
systemctl enable --now chronyd
|
||||
|
||||
# ── Set boot order: local disk first, PXE after ──
|
||||
if command -v efibootmgr >/dev/null 2>&1; then
|
||||
# Find the Fedora boot entry and move it first
|
||||
FEDORA_ENTRY=$(efibootmgr | grep -i fedora | head -1 | grep -oP 'Boot\\K[0-9A-F]+')
|
||||
if [ -n "$FEDORA_ENTRY" ]; then
|
||||
CURRENT_ORDER=$(efibootmgr | grep BootOrder | cut -d: -f2 | tr -d ' ')
|
||||
# Put Fedora first, keep rest
|
||||
NEW_ORDER="$FEDORA_ENTRY,$(echo "$CURRENT_ORDER" | sed "s/$FEDORA_ENTRY,\\?//;s/,$//")"
|
||||
efibootmgr -o "$NEW_ORDER" || true
|
||||
echo "Boot order set: Fedora first ($NEW_ORDER)"
|
||||
fi
|
||||
fi
|
||||
|
||||
# ── Provisioning metadata ──
|
||||
cat > /etc/lab-provisioned << PROVEOF
|
||||
hostname: {fqdn}
|
||||
role: {role}
|
||||
provisioned: $(date -Iseconds)
|
||||
bastion: {SERVER_IP}
|
||||
PROVEOF
|
||||
|
||||
cat > /root/README << 'README'
|
||||
# Lab Node -- {fqdn} (role: {role})
|
||||
#
|
||||
# Next steps:
|
||||
# 1. Install puppet agent:
|
||||
# dnf install -y puppet-agent
|
||||
#
|
||||
# 2. Install k3s:
|
||||
# curl -sfL https://get.k3s.io | sh -
|
||||
#
|
||||
# 3. Or join existing cluster:
|
||||
# curl -sfL https://get.k3s.io | K3S_URL=https://<server>:6443 K3S_TOKEN=<token> sh -
|
||||
README
|
||||
|
||||
IP_ADDR=$(ip -4 addr show | awk '/inet / && !/127.0.0/ {{split($2,a,"/"); print a[1]; exit}}')
|
||||
bastion_progress "complete" "ready at $IP_ADDR"
|
||||
|
||||
%end
|
||||
"""
|
||||
|
||||
@@ -562,6 +943,25 @@ def print_install_started(mac, hostname):
|
||||
print(f" Serving Fedora {FEDORA_VER} installer + kickstart...")
|
||||
print(f"\n{'─' * 60}\n", flush=True)
|
||||
|
||||
PROGRESS_ICONS = {
|
||||
"partitioning": "◆",
|
||||
"installing": "◆◆",
|
||||
"post-install": "◆◆◆",
|
||||
"complete": "✔",
|
||||
"error": "✘",
|
||||
}
|
||||
|
||||
def print_progress(mac, stage, detail=""):
|
||||
icon = PROGRESS_ICONS.get(stage, "·")
|
||||
color = GREEN if stage == "complete" else (RED if stage == "error" else YELLOW)
|
||||
detail_str = f" -- {detail}" if detail else ""
|
||||
print(f" {color}{icon}{RESET} {mac} {BOLD}{stage}{RESET}{detail_str}", flush=True)
|
||||
if stage == "complete" and detail:
|
||||
ip = detail.replace("ready at ", "").strip()
|
||||
if ip:
|
||||
admin = ADMIN_USER or "root"
|
||||
print(f"\n {GREEN}{BOLD} ssh {admin}@{ip}{RESET}\n", flush=True)
|
||||
|
||||
# ── HTTP Handler ──────────────────────────────────────────────────
|
||||
|
||||
class BastionHandler(SimpleHTTPRequestHandler):
|
||||
@@ -603,7 +1003,7 @@ class BastionHandler(SimpleHTTPRequestHandler):
|
||||
|
||||
echo
|
||||
echo =============================================
|
||||
echo Lab PXE Bastion — INSTALLING Fedora {FEDORA_VER}
|
||||
echo Lab PXE Bastion - INSTALLING Fedora {FEDORA_VER}
|
||||
echo Target: {hostname}
|
||||
echo MAC: {mac}
|
||||
echo =============================================
|
||||
@@ -614,13 +1014,31 @@ initrd http://{SERVER_IP}:{HTTP_PORT}/initrd.img
|
||||
boot
|
||||
"""
|
||||
self.send_text(200, script)
|
||||
|
||||
elif mac in state.get("installed", {}):
|
||||
info = state["installed"][mac]
|
||||
hostname = info.get("hostname", "?")
|
||||
print(f" {GREEN}PXE request from {mac} ({hostname}) - already installed, booting local disk{RESET}", flush=True)
|
||||
script = f"""#!ipxe
|
||||
|
||||
echo
|
||||
echo =============================================
|
||||
echo Lab PXE Bastion - {hostname}
|
||||
echo Already installed, booting from local disk
|
||||
echo =============================================
|
||||
echo
|
||||
sleep 3
|
||||
exit
|
||||
"""
|
||||
self.send_text(200, script)
|
||||
|
||||
else:
|
||||
print(f" {YELLOW}PXE request from {mac} → discovery mode{RESET}", flush=True)
|
||||
script = f"""#!ipxe
|
||||
|
||||
echo
|
||||
echo =============================================
|
||||
echo Lab PXE Bastion — DISCOVERY MODE
|
||||
echo Lab PXE Bastion - DISCOVERY MODE
|
||||
echo MAC: {mac}
|
||||
echo Collecting hardware info...
|
||||
echo =============================================
|
||||
@@ -642,7 +1060,10 @@ boot
|
||||
ks = generate_kickstart(
|
||||
hostname=cfg.get("hostname", "lab-node"),
|
||||
disk=cfg.get("disk", ""),
|
||||
ssh_key=SSH_KEY,
|
||||
ssh_keys=SSH_KEYS,
|
||||
domain=DOMAIN,
|
||||
role=cfg.get("role", "worker"),
|
||||
admin_user=ADMIN_USER,
|
||||
)
|
||||
self.send_text(200, ks)
|
||||
return
|
||||
@@ -710,15 +1131,21 @@ boot
|
||||
mac = data.get("mac", "").lower().replace("-", ":")
|
||||
hostname = data.get("hostname", "lab-node")
|
||||
disk = data.get("disk", "")
|
||||
role = data.get("role", "worker")
|
||||
|
||||
if not mac:
|
||||
self.send_json(400, {"error": "mac is required"})
|
||||
return
|
||||
|
||||
if role not in ("worker", "infra"):
|
||||
self.send_json(400, {"error": "role must be 'worker' or 'infra'"})
|
||||
return
|
||||
|
||||
state = load_state()
|
||||
state.setdefault("install_queue", {})[mac] = {
|
||||
"hostname": hostname,
|
||||
"disk": disk,
|
||||
"role": role,
|
||||
"queued_at": datetime.now().isoformat(),
|
||||
}
|
||||
save_state(state)
|
||||
@@ -729,10 +1156,49 @@ boot
|
||||
"status": "queued",
|
||||
"mac": mac,
|
||||
"hostname": hostname,
|
||||
"message": "PXE boot the machine to start installation",
|
||||
"role": role,
|
||||
"message": f"PXE boot the machine to start installation (role={role})",
|
||||
})
|
||||
return
|
||||
|
||||
# ── Install progress callback from kickstart ──
|
||||
if parsed.path == "/api/progress":
|
||||
try:
|
||||
data = json.loads(body)
|
||||
except json.JSONDecodeError:
|
||||
self.send_json(400, {"error": "invalid JSON"})
|
||||
return
|
||||
|
||||
mac = data.get("mac", "unknown").lower()
|
||||
stage = data.get("stage", "unknown")
|
||||
detail = data.get("detail", "")
|
||||
|
||||
print_progress(mac, stage, detail)
|
||||
|
||||
# Update state with progress
|
||||
state = load_state()
|
||||
if mac in state.get("install_queue", {}):
|
||||
state["install_queue"][mac]["progress"] = stage
|
||||
state["install_queue"][mac]["progress_at"] = datetime.now().isoformat()
|
||||
if detail:
|
||||
state["install_queue"][mac]["progress_detail"] = detail
|
||||
|
||||
# Move to installed on completion
|
||||
if stage == "complete":
|
||||
cfg = state["install_queue"].pop(mac)
|
||||
ip = detail.replace("ready at ", "").strip() if detail else ""
|
||||
state.setdefault("installed", {})[mac] = {
|
||||
"hostname": cfg.get("hostname", "?"),
|
||||
"role": cfg.get("role", "?"),
|
||||
"ip": ip,
|
||||
"installed_at": datetime.now().isoformat(),
|
||||
}
|
||||
|
||||
save_state(state)
|
||||
|
||||
self.send_json(200, {"status": "ok"})
|
||||
return
|
||||
|
||||
self.send_json(404, {"error": "not found"})
|
||||
|
||||
|
||||
@@ -850,9 +1316,11 @@ python3 "$BASTION_DIR/server.py" \
|
||||
"$HTTP_PORT" \
|
||||
"$FEDORA_VERSION" \
|
||||
"$FEDORA_MIRROR" \
|
||||
"$SSH_KEY_CONTENT" \
|
||||
"$BASTION_DIR/ssh_keys" \
|
||||
"$TIMEZONE" \
|
||||
"$LOCALE" &
|
||||
"$LOCALE" \
|
||||
"$DOMAIN" \
|
||||
"$ADMIN_USER" &
|
||||
HTTP_PID=$!
|
||||
sleep 1
|
||||
|
||||
@@ -871,6 +1339,7 @@ echo -e " Network: ${BOLD}${NETWORK}/24${NC} via ${BOLD}${IFACE}${NC}"
|
||||
echo -e " DHCP: ${BOLD}${DHCP_MODE}${NC}$(if [[ "$DHCP_MODE" == "full" ]]; then echo " (${DHCP_RANGE_START}–${DHCP_RANGE_END})"; else echo " (alongside existing DHCP)"; fi)"
|
||||
echo -e " HTTP: ${BOLD}http://${SERVER_IP}:${HTTP_PORT}/${NC}"
|
||||
echo -e " OS: ${BOLD}Fedora ${FEDORA_VERSION} (${ARCH})${NC}"
|
||||
echo -e " Domain: ${BOLD}${DOMAIN}${NC}"
|
||||
echo -e " State: ${BOLD}${STATEFILE}${NC}"
|
||||
echo ""
|
||||
echo -e " ${YELLOW}PXE boot any machine on this network.${NC}"
|
||||
|
||||
8
bastion/.dockerignore
Normal file
8
bastion/.dockerignore
Normal file
@@ -0,0 +1,8 @@
|
||||
node_modules
|
||||
dist
|
||||
.git
|
||||
*.log
|
||||
.env
|
||||
.env.*
|
||||
*.tsbuildinfo
|
||||
.taskmaster
|
||||
3
bastion/.gitignore
vendored
Normal file
3
bastion/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
node_modules/
|
||||
dist/
|
||||
*.tsbuildinfo
|
||||
132
bastion/.taskmaster/docs/pulumi-k3s-refactor.md
Normal file
132
bastion/.taskmaster/docs/pulumi-k3s-refactor.md
Normal file
@@ -0,0 +1,132 @@
|
||||
# PRD: Refactor K3s Module from Bash Heredocs to Pulumi TypeScript
|
||||
|
||||
## Problem
|
||||
|
||||
The k3s install/configure/health module currently generates ~300 lines of bash heredoc strings embedded in TypeScript files (`install.ts`, `configure.ts`, `health.ts`). These are unmaintainable, untestable, and impossible to compose. This is the same bash-in-code problem that drove the bastion TypeScript rewrite.
|
||||
|
||||
## Vision
|
||||
|
||||
The lab platform uses Pulumi as its IaC engine:
|
||||
- **Central execution**: labd runs Pulumi programs in labcontroller k8s for cloud/remote resources with RBAC, global state, and audit trail (PulumiRun table already exists in CockroachDB)
|
||||
- **Local execution**: lab-agents run Pulumi programs directly on bare-metal nodes
|
||||
- **Multi-environment**: supports multiple datacenters, clouds (baremetal, AWS, GCP), production/dev/ephemeral environments
|
||||
|
||||
## Current State
|
||||
|
||||
### Files to replace
|
||||
- `src/modules/modules/k3s/src/install.ts` — 275 lines, generates bash for 10 install phases
|
||||
- `src/modules/modules/k3s/src/configure.ts` — 118 lines, generates bash for 5 configure phases
|
||||
- `src/modules/modules/k3s/src/health.ts` — 57 lines, generates bash for 6 health checks
|
||||
|
||||
### Existing infrastructure
|
||||
- `sshExec(ip, user, command, opts)` and `sshExecStreaming()` — SSH execution primitives in `src/modules/src/ssh.ts`
|
||||
- Module system: `ModuleRunner`, `ModuleRegistry`, `Module` interface with install/configure/health phases
|
||||
- `@lab/shared` types: `BastionConfig`, `K3sInstallContext`, roles, OS types
|
||||
- PulumiRun model in Prisma schema (labd) — tracks Pulumi execution state
|
||||
- labcontroller module generates k8s manifests (cockroachdb.ts, labd.ts, bastion.ts) — these also need Pulumi migration eventually
|
||||
|
||||
### 32 distinct operations currently in bash
|
||||
**Install phase (10 steps):**
|
||||
1. Load kernel modules (br_netfilter, overlay, ip_conntrack)
|
||||
2. Apply CIS sysctl hardening (9 params)
|
||||
3. Disable swap
|
||||
4. Disable firewall (firewalld/ufw — mask to survive reboot)
|
||||
5. Set SELinux permissive
|
||||
6. Write k3s server config (flannel=none, secrets-encryption, audit, CIS hardened)
|
||||
7. Write audit policy YAML
|
||||
8. Clean up stale CNI (flannel.1 vxlan, cilium interfaces, port 8472 conflicts)
|
||||
9. Install k3s binary (curl | sh)
|
||||
10. Install Cilium CNI (detect arch, detect interface, kubeProxyReplacement)
|
||||
|
||||
**Configure phase (5 steps):**
|
||||
1. Fix CoreDNS upstream DNS (systemd-resolved 127.0.0.53 unreachable from pod netns)
|
||||
2. Configure log rotation
|
||||
3. Check certificate expiry
|
||||
4. Apply default network policies (deny-ingress, allow-dns-egress)
|
||||
5. Apply Pod Security Standards (restricted)
|
||||
|
||||
**Health checks (6 checks):**
|
||||
1. k3s service active
|
||||
2. Node Ready condition
|
||||
3. API server /healthz
|
||||
4. Secrets encryption enabled
|
||||
5. Cilium status
|
||||
6. kube-system pod status
|
||||
|
||||
## Requirements
|
||||
|
||||
### Architecture decisions needed (discuss with user via task-master)
|
||||
1. **Pulumi structure**: micro-stacks vs monorepo-by-env vs component-library vs GitOps operator
|
||||
2. **Multi-cloud support**: how stacks are organized across baremetal/AWS/GCP
|
||||
3. **Environment model**: how prod/dev/ephemeral environments are represented
|
||||
4. **State backend**: Pulumi Cloud vs self-hosted (S3/CockroachDB)
|
||||
5. **Execution model**: who runs `pulumi up` — labd central, lab-agent local, or both?
|
||||
|
||||
### Operation design
|
||||
- Each operation is a typed TypeScript async function using `sshExec()`
|
||||
- Standard interface: `OperationContext` in, `OperationResult` out
|
||||
- **Idempotent**: check before act, report `changed: boolean`
|
||||
- **Composable**: operations grouped into logical units (host-prep, networking, hardening)
|
||||
- **Testable**: mock sshExec for unit tests
|
||||
- **Future Pulumi-ready**: each function maps 1:1 to a `remote.Command` resource
|
||||
|
||||
### Groups (logical composition)
|
||||
- `host-prep`: kernel-modules + sysctl + swap + firewall + selinux
|
||||
- `k3s-server`: k3s-config + audit-policy + cni-cleanup + k3s-install
|
||||
- `k3s-agent`: k3s-config (agent) + k3s-install (agent mode)
|
||||
- `networking`: cilium + dns-fix + network-policy
|
||||
- `hardening`: pod-security + cert-check + log-rotation
|
||||
|
||||
### Pulumi integration (when added)
|
||||
- Add `@pulumi/pulumi` and `@pulumi/command` as dependencies
|
||||
- Each operation becomes a `command.remote.Command` resource
|
||||
- Groups become `pulumi.ComponentResource` classes
|
||||
- K3sCluster becomes a top-level ComponentResource that composes groups
|
||||
- Stacks per environment: `lab-baremetal`, `aws-prod`, `dev`, `ephemeral-pr-123`
|
||||
|
||||
## File structure
|
||||
|
||||
```
|
||||
src/modules/modules/k3s/src/
|
||||
├── types.ts # K3sConfig, OperationContext, OperationResult
|
||||
├── utils.ts # sshOpts(), runSequential(), file helpers
|
||||
├── operations/ # ~15 atomic operations
|
||||
│ ├── kernel-modules.ts
|
||||
│ ├── sysctl.ts
|
||||
│ ├── swap.ts
|
||||
│ ├── firewall.ts
|
||||
│ ├── selinux.ts
|
||||
│ ├── k3s-config.ts
|
||||
│ ├── audit-policy.ts
|
||||
│ ├── cni-cleanup.ts
|
||||
│ ├── k3s-install.ts
|
||||
│ ├── cilium.ts
|
||||
│ ├── dns-fix.ts
|
||||
│ ├── log-rotation.ts
|
||||
│ ├── network-policy.ts
|
||||
│ ├── pod-security.ts
|
||||
│ └── cert-check.ts
|
||||
├── groups/ # Logical groupings
|
||||
│ ├── host-prep.ts
|
||||
│ ├── k3s-server.ts
|
||||
│ ├── k3s-agent.ts
|
||||
│ ├── networking.ts
|
||||
│ └── hardening.ts
|
||||
├── health/ # Health checks
|
||||
│ ├── k3s-service.ts
|
||||
│ ├── node-ready.ts
|
||||
│ ├── api-health.ts
|
||||
│ ├── secrets-encryption.ts
|
||||
│ ├── cilium-status.ts
|
||||
│ └── pod-status.ts
|
||||
├── k3s-module.ts # Module implementation
|
||||
└── index.ts # Public exports
|
||||
```
|
||||
|
||||
## Success criteria
|
||||
- Zero bash heredoc strings in the k3s module
|
||||
- Every operation independently testable with mocked sshExec
|
||||
- `labctl app k3s install <target>` works end-to-end
|
||||
- `labctl app k3s health` works end-to-end
|
||||
- Existing test suite passes (updated for new API)
|
||||
- Clear path to wrapping operations as Pulumi resources
|
||||
172
bastion/.taskmaster/docs/resource-tracking.md
Normal file
172
bastion/.taskmaster/docs/resource-tracking.md
Normal file
@@ -0,0 +1,172 @@
|
||||
# PRD: Resource Tracking & kubectl-style CLI
|
||||
|
||||
## Problem
|
||||
|
||||
The lab platform currently has fragmented state management:
|
||||
- Bastion keeps machine state in an ephemeral JSON file (`/tmp/lab-bastion/state.json`) that is lost on pod restart
|
||||
- labd receives state syncs from bastions but only stores them in memory — the `Server` table in CockroachDB is never written to
|
||||
- There is no system to track relationships between resources (servers belong to clusters, clusters run on servers, networks connect servers)
|
||||
- The CLI (`labctl`) uses an inconsistent verb-noun structure (`labctl provision list`, `labctl app k3s install`) instead of a uniform resource-oriented pattern
|
||||
- RBAC permissions reference resources (server, cloud, environment) but there is no resource registry to validate against
|
||||
|
||||
## Vision
|
||||
|
||||
A unified resource tracking system where all infrastructure objects (servers, clusters, networks, bastions, VMs) are persisted in CockroachDB via labd, with relationships between them, and managed through a kubectl-style CLI. This replaces the ephemeral JSON state and becomes the single source of truth for the platform.
|
||||
|
||||
## Current State
|
||||
|
||||
### Database (CockroachDB via Prisma)
|
||||
Existing models that are scaffolded but mostly unused:
|
||||
- `Server` — hostname, mac, cloud, environment, role, labels, ip, status (0 rows)
|
||||
- `Agent` — mTLS certificate enrollment per server (0 rows)
|
||||
- `Bastion` — PXE server registration (1 row, labmaster)
|
||||
- `Cluster` — k8s cluster metadata (0 rows)
|
||||
- `User`, `Role`, `Permission`, `UserRole` — RBAC framework (seeded with 3 roles, 6 permissions)
|
||||
- `JoinToken` — agent/bastion enrollment tokens
|
||||
- `AuditLog` — action audit trail
|
||||
|
||||
### Bastion State (ephemeral JSON)
|
||||
Three categories tracked per-bastion:
|
||||
- `discovered` — machines found via PXE with hardware info (CPU, RAM, disks, NICs, arch)
|
||||
- `install_queue` — machines queued for OS install with progress tracking
|
||||
- `installed` — machines with OS installed (hostname, role, IP, OS)
|
||||
|
||||
### CLI Structure (current)
|
||||
```
|
||||
labctl init bastion standalone [start|stop|status]
|
||||
labctl provision [list|install|reprovision|forget|logs]
|
||||
labctl app [k3s|labcontroller]
|
||||
labctl config [list|get|set]
|
||||
labctl roles
|
||||
labctl doctor
|
||||
labctl login
|
||||
labctl logs
|
||||
```
|
||||
|
||||
## Requirements
|
||||
|
||||
### 1. Persist Bastion State to Database
|
||||
|
||||
When labd receives `bastion-state-sync` messages, it must upsert machines into the `Server` table:
|
||||
- Discovered machines → create/update Server with status "discovered", store HardwareInfo as JSON labels
|
||||
- Queued machines → update Server status to "provisioning"
|
||||
- Installed machines → update Server with hostname, IP, role, OS, status "installed"
|
||||
- Track which bastion owns which server (add `bastionId` to Server model)
|
||||
- Track hardware info: arch, cpu_model, cpu_cores, memory_gb, disks, nics
|
||||
|
||||
The bastion's local JSON state becomes a cache; labd's database is the source of truth. On bastion startup, it should load its state from labd if available.
|
||||
|
||||
### 2. Resource Model Expansion
|
||||
|
||||
Add new models to the Prisma schema for tracking infrastructure:
|
||||
|
||||
**Network** — L2/L3 network segments
|
||||
- name, cidr, vlan, gateway, domain, dhcpEnabled
|
||||
- Servers have NICs on networks
|
||||
|
||||
**ServerNic** — NIC-to-network mapping
|
||||
- serverId, networkId, mac, ip, name, state (UP/DOWN)
|
||||
- Derived from HardwareInfo during discovery
|
||||
|
||||
**ServerDisk** — Disk inventory per server
|
||||
- serverId, name, sizeGb, model
|
||||
- Derived from HardwareInfo during discovery
|
||||
|
||||
**ClusterMember** — Server-to-cluster membership
|
||||
- clusterId, serverId, role (control-plane, worker)
|
||||
|
||||
### 3. kubectl-style CLI Redesign
|
||||
|
||||
Restructure labctl to follow the `mcpctl` / `kubectl` pattern:
|
||||
|
||||
```
|
||||
# Core CRUD verbs that work on any resource
|
||||
labctl get <resource> [name] # List or get specific resource
|
||||
labctl describe <resource> <name> # Detailed view with relationships
|
||||
labctl create <resource> [flags] # Create a resource
|
||||
labctl delete <resource> <name> # Delete a resource
|
||||
labctl edit <resource> <name> # Edit in $EDITOR
|
||||
labctl apply -f <file> # Declarative apply from YAML
|
||||
|
||||
# Resource types (with aliases)
|
||||
servers (server, srv)
|
||||
clusters (cluster)
|
||||
networks (network, net)
|
||||
bastions (bastion)
|
||||
roles (role)
|
||||
users (user)
|
||||
tokens (token)
|
||||
audit (audit)
|
||||
|
||||
# Output formats
|
||||
-o table (default), -o json, -o yaml, -o wide
|
||||
|
||||
# Examples
|
||||
labctl get servers # List all servers
|
||||
labctl get servers -o wide # With extra columns (disks, NICs)
|
||||
labctl get server labmaster # Get specific server
|
||||
labctl describe server labmaster # Full details + relationships
|
||||
labctl get servers --role worker # Filter by role
|
||||
labctl get servers --status discovered # Filter by status
|
||||
labctl get clusters # List clusters
|
||||
labctl describe cluster lab-k3s # Cluster members, health
|
||||
labctl get networks # List networks
|
||||
labctl create network --name lab --cidr 192.168.8.0/24 --gateway 192.168.8.1
|
||||
|
||||
# Provisioning becomes actions on server resources
|
||||
labctl provision <server> --os fedora-43 --role worker # Queue install
|
||||
labctl reprovision <server> # Reinstall
|
||||
labctl forget <server> # Remove from tracking
|
||||
|
||||
# App management stays as-is but simplified
|
||||
labctl app install k3s <server>
|
||||
labctl app health k3s [server]
|
||||
|
||||
# Admin
|
||||
labctl bastion start [--foreground] # Start local bastion
|
||||
labctl bastion status # Bastion health
|
||||
labctl login # Auth
|
||||
labctl doctor # Diagnostics
|
||||
```
|
||||
|
||||
### 4. Resource Aliases & Resolution
|
||||
|
||||
Follow mcpctl's pattern from `shared.ts`:
|
||||
- Accept singular, plural, and short aliases: `server`, `servers`, `srv` all resolve to the same resource
|
||||
- Accept name or ID: `labctl get server labmaster` or `labctl get server <uuid>`
|
||||
- Accept MAC address for servers: `labctl get server 38:05:25:33:e2:e4`
|
||||
|
||||
### 5. RBAC Integration
|
||||
|
||||
The existing Permission model uses `action:cloud:environment:server` patterns. Wire this into the resource system:
|
||||
- CLI commands check permissions before executing
|
||||
- `labctl get` respects read permissions (only show resources the user can see)
|
||||
- `labctl provision` requires `apply` permission on the target server
|
||||
- `labctl delete` requires `destroy` permission
|
||||
- Audit all resource operations to the AuditLog table
|
||||
|
||||
### 6. Bastion State Directory Fix
|
||||
|
||||
Fix the bug where the CLI's `--dir` default (`/tmp/lab-bastion`) overrides the `BASTION_DIR=/data` environment variable. The CLI option should use the env var as its default:
|
||||
```typescript
|
||||
.option("--dir <dir>", "Bastion data directory", process.env["BASTION_DIR"] ?? "/tmp/lab-bastion")
|
||||
```
|
||||
|
||||
## Technical Constraints
|
||||
|
||||
- Database: CockroachDB with Prisma ORM (already deployed)
|
||||
- API: Fastify + WebSocket (labd)
|
||||
- CLI: Commander.js (labctl)
|
||||
- Auth: mTLS certificates (planned), join tokens (implemented)
|
||||
- Monorepo: pnpm workspace with @lab/shared, @lab/bastion, @lab/cli, @lab/labd
|
||||
- The bastion-to-labd WebSocket protocol is defined in @lab/shared/protocol
|
||||
|
||||
## Success Criteria
|
||||
|
||||
1. `labctl get servers` shows all machines (discovered, provisioning, installed) from the database
|
||||
2. Server state survives bastion and labd pod restarts
|
||||
3. `labctl describe server <name>` shows hardware info, network, cluster membership
|
||||
4. Resources have tracked relationships (server→cluster, server→network, bastion→server)
|
||||
5. RBAC permissions are enforced on CLI operations
|
||||
6. All resource mutations are audit-logged
|
||||
7. CLI follows consistent kubectl-style `verb resource [name] [flags]` pattern
|
||||
355
bastion/DESIGN-LAB-PLATFORM.md
Normal file
355
bastion/DESIGN-LAB-PLATFORM.md
Normal file
@@ -0,0 +1,355 @@
|
||||
# Lab Platform — Design Document
|
||||
|
||||
## Vision
|
||||
|
||||
A unified infrastructure management platform that replaces Puppet with a modern, Pulumi-based system. Manages bare-metal servers, cloud VMs, and k3s clusters through a single CLI and API.
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ Developer Workstation (thebeast) │
|
||||
│ │
|
||||
│ lab CLI │
|
||||
│ ├── lab init bastion standalone start (PXE provisioning) │
|
||||
│ ├── lab provision install/reprovision (bare-metal) │
|
||||
│ ├── lab get servers --env production (query) │
|
||||
│ ├── lab exec <server> -- <command> (remote execution) │
|
||||
│ ├── lab logs <server> (log streaming) │
|
||||
│ ├── lab apply -f infra.ts (pulumi via labd) │
|
||||
│ └── lab get roles/users/permissions (RBAC management) │
|
||||
│ │
|
||||
│ Connects to: labd via mTLS │
|
||||
└─────────────────────┬───────────────────────────────────────────┘
|
||||
│ mTLS (client cert)
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ labmaster.ad.itaz.eu (infra node, k3s single-node) │
|
||||
│ │
|
||||
│ ┌──────────────────────────────────────────────────────┐ │
|
||||
│ │ labd (master daemon) │ │
|
||||
│ │ ├── Certificate Authority (issues agent certs) │ │
|
||||
│ │ ├── RBAC Engine (roles, permissions, ACLs) │ │
|
||||
│ │ ├── Agent Registry (connected agents, heartbeats) │ │
|
||||
│ │ ├── Pulumi Executor (runs IaC on behalf of users) │ │
|
||||
│ │ ├── Log Aggregator (receives agent logs) │ │
|
||||
│ │ ├── Module Registry (configuration modules) │ │
|
||||
│ │ └── REST API + WebSocket (agent connections) │ │
|
||||
│ └──────────────────────────────────────────────────────┘ │
|
||||
│ │
|
||||
│ ┌──────────────────────────────────────────────────────┐ │
|
||||
│ │ bastion (PXE provisioning) │ │
|
||||
│ │ Running as k3s pod with hostNetwork │ │
|
||||
│ └──────────────────────────────────────────────────────┘ │
|
||||
└──────────┬──────────────────────────────────────────────────────┘
|
||||
│ mTLS (agent certs)
|
||||
▼
|
||||
┌──────────────────────┐ ┌──────────────────────┐ ┌────────────┐
|
||||
│ ser9.ad.itaz.eu │ │ worker-2.ad.itaz.eu │ │ AWS EC2 │
|
||||
│ (bare-metal worker) │ │ (bare-metal worker) │ │ instances │
|
||||
│ │ │ │ │ │
|
||||
│ lab-agent │ │ lab-agent │ │ lab-agent │
|
||||
│ ├── heartbeat │ │ ├── heartbeat │ │ ├── ... │
|
||||
│ ├── log shipping │ │ ├── log shipping │ │ └── ... │
|
||||
│ ├── exec handler │ │ ├── exec handler │ │ │
|
||||
│ └── module runner │ │ └── module runner │ │ │
|
||||
└──────────────────────┘ └──────────────────────┘ └────────────┘
|
||||
```
|
||||
|
||||
## Components
|
||||
|
||||
### 1. labd (Master Daemon)
|
||||
|
||||
The central control plane. Runs on labmaster.ad.itaz.eu as a k3s pod.
|
||||
|
||||
**Responsibilities:**
|
||||
- Certificate Authority — signs agent certificates, manages trust chain
|
||||
- Agent Registry — tracks connected agents, heartbeats, status
|
||||
- RBAC — roles, permissions, ACLs per user/group/environment/cloud
|
||||
- Pulumi Executor — runs Pulumi TypeScript code submitted by users
|
||||
- Log Aggregator — receives and stores logs from agents
|
||||
- Module Registry — stores and distributes configuration modules
|
||||
- REST API — for CLI and external integrations
|
||||
- WebSocket — persistent agent connections for real-time commands
|
||||
|
||||
**Tech:** Fastify, PostgreSQL (via Prisma, reuse mcpctl patterns), WebSocket
|
||||
|
||||
### 2. lab-agent
|
||||
|
||||
Lightweight daemon running on every managed machine.
|
||||
|
||||
**Responsibilities:**
|
||||
- Connect to labd via mTLS (agent certificate)
|
||||
- Send heartbeats (status, load, disk, memory)
|
||||
- Ship logs (journald → labd)
|
||||
- Execute commands on demand (like `kubectl exec`)
|
||||
- Run configuration modules (like `puppet agent -tv`)
|
||||
- Report module run results
|
||||
|
||||
**Tech:** Standalone TypeScript binary (bun compiled), systemd service
|
||||
|
||||
### 3. lab CLI (extended)
|
||||
|
||||
Extends the existing `lab` CLI with platform management commands.
|
||||
|
||||
**New commands:**
|
||||
```
|
||||
# Server management
|
||||
lab get servers # List all servers
|
||||
lab get servers --env production # Filter by environment
|
||||
lab get servers --cloud baremetal # Filter by cloud
|
||||
lab get servers --label role=k3s-worker # Filter by label
|
||||
lab describe server <name> # Detailed server info
|
||||
lab exec <server> -- <command> # Remote command execution
|
||||
lab logs <server> [-f] # Stream server logs
|
||||
|
||||
# Infrastructure as Code
|
||||
lab apply -f <file.ts> # Execute Pulumi code via labd
|
||||
lab plan -f <file.ts> # Dry-run Pulumi code
|
||||
lab destroy -f <file.ts> # Tear down resources
|
||||
|
||||
# RBAC
|
||||
lab get roles # List roles
|
||||
lab get users # List users
|
||||
lab create role <name> # Create role
|
||||
lab bind role <role> --user <user> # Bind role to user
|
||||
lab get permissions # List permissions
|
||||
|
||||
# Environment/Cloud management
|
||||
lab get environments # List environments
|
||||
lab get clouds # List clouds
|
||||
lab create environment <name> --cloud <cloud>
|
||||
|
||||
# Module management
|
||||
lab get modules # List available modules
|
||||
lab apply module <name> --target <server> # Apply module to server
|
||||
```
|
||||
|
||||
### 4. Certificate Authority
|
||||
|
||||
Built into labd. Issues and manages certificates for agents and users.
|
||||
|
||||
**Flow:**
|
||||
```
|
||||
1. Agent starts with a join token (one-time or reusable)
|
||||
2. Agent generates CSR, sends to labd with token
|
||||
3. labd validates token, signs certificate
|
||||
4. Agent receives signed cert + CA cert
|
||||
5. All future communication uses mTLS
|
||||
|
||||
For CLI users:
|
||||
1. User runs `lab login` or `lab init`
|
||||
2. labd issues a client certificate (or uses existing SSH keys)
|
||||
3. CLI uses client cert for all API calls
|
||||
```
|
||||
|
||||
**Token types:**
|
||||
- **One-time token** — for individual bare-metal servers (generated during PXE provision)
|
||||
- **Reusable token** — for autoscaling groups (AWS ASG instances use the same token)
|
||||
|
||||
### 5. RBAC Model
|
||||
|
||||
Reuse mcpctl's RBAC patterns. Hierarchical permissions:
|
||||
|
||||
```
|
||||
Cloud → Environment → Server → Action
|
||||
|
||||
Examples:
|
||||
- baremetal:lab:*:exec — can exec on any lab server
|
||||
- baremetal:lab:puppet:* — full access to puppet server
|
||||
- aws:production:*:read — read-only on all AWS prod servers
|
||||
- *:*:*:* — superadmin
|
||||
```
|
||||
|
||||
**Resources:**
|
||||
- servers, environments, clouds, modules, roles, users, pulumi-stacks
|
||||
|
||||
**Actions:**
|
||||
- read, exec, apply, destroy, manage, admin
|
||||
|
||||
**Whitelist/Blacklist:**
|
||||
- Roles can have `allow` and `deny` rules
|
||||
- Deny takes precedence (like AWS IAM)
|
||||
|
||||
### 6. Module System
|
||||
|
||||
Configuration modules define the desired state of a server.
|
||||
|
||||
**Module structure:**
|
||||
```
|
||||
modules/
|
||||
k3s-server/
|
||||
module.yaml # Metadata: name, version, targets, deps
|
||||
src/
|
||||
index.ts # Module entry point
|
||||
install.ts # Installation logic
|
||||
configure.ts # Configuration logic
|
||||
health.ts # Health check
|
||||
tests/
|
||||
install.test.ts
|
||||
k3s-agent/
|
||||
module.yaml
|
||||
src/
|
||||
index.ts
|
||||
labd/
|
||||
module.yaml
|
||||
src/
|
||||
index.ts # Deploy labd to k3s
|
||||
```
|
||||
|
||||
**module.yaml:**
|
||||
```yaml
|
||||
name: k3s-server
|
||||
version: 0.1.0
|
||||
description: Install and configure k3s server
|
||||
targets:
|
||||
roles: [infra]
|
||||
labels:
|
||||
k3s: server
|
||||
dependencies:
|
||||
- base-server
|
||||
```
|
||||
|
||||
**Module sources:**
|
||||
- Built-in modules (in this repo, e.g., k3s-server, labd)
|
||||
- External modules (separate git repos, pulled by URL)
|
||||
- Module registry (future — like Puppet Forge)
|
||||
|
||||
### 7. Cloud/Environment Model
|
||||
|
||||
```
|
||||
Cloud: baremetal
|
||||
└── Environment: lab
|
||||
├── Server: puppet.ad.itaz.eu (role=infra, labels={k3s=server})
|
||||
├── Server: ser9.ad.itaz.eu (role=worker, labels={k3s=agent})
|
||||
└── ...
|
||||
|
||||
Cloud: aws
|
||||
└── Environment: production
|
||||
├── Server: i-abc123 (from ASG web-servers)
|
||||
├── Server: i-def456 (from ASG web-servers)
|
||||
└── ...
|
||||
└── Environment: staging
|
||||
└── ...
|
||||
```
|
||||
|
||||
Each bastion creates an environment under the `baremetal` cloud. AWS autoscaling groups create environments under the `aws` cloud.
|
||||
|
||||
### 8. Pulumi Integration
|
||||
|
||||
Users submit Pulumi TypeScript code to labd for execution.
|
||||
|
||||
```bash
|
||||
# Apply infrastructure code
|
||||
lab apply -f infra/k3s-cluster.ts --env lab
|
||||
|
||||
# The file is sent to labd, which:
|
||||
# 1. Checks RBAC (does user have apply permission for this env?)
|
||||
# 2. Creates a Pulumi stack
|
||||
# 3. Executes `pulumi up` in a sandboxed environment
|
||||
# 4. Streams output back to CLI
|
||||
# 5. Stores state in Pulumi backend (local or S3)
|
||||
```
|
||||
|
||||
**Future AWS extension:**
|
||||
```typescript
|
||||
// infra/aws-web-servers.ts
|
||||
import * as aws from "@pulumi/aws";
|
||||
|
||||
const asg = new aws.autoscaling.Group("web-servers", {
|
||||
maxSize: 10,
|
||||
minSize: 2,
|
||||
launchTemplate: { /* ... */ },
|
||||
// User data installs lab-agent with reusable join token
|
||||
});
|
||||
```
|
||||
|
||||
## Project Structure
|
||||
|
||||
```
|
||||
lab/
|
||||
bastion/ # Existing — PXE provisioning
|
||||
|
||||
src/
|
||||
shared/ # @lab/shared — types, constants, RBAC
|
||||
labd/ # @lab/labd — master daemon
|
||||
src/
|
||||
main.ts
|
||||
server.ts
|
||||
ca/ # Certificate Authority
|
||||
rbac/ # RBAC engine (reuse mcpctl patterns)
|
||||
agents/ # Agent registry + WebSocket
|
||||
pulumi/ # Pulumi executor
|
||||
logs/ # Log aggregation
|
||||
modules/ # Module registry
|
||||
routes/ # REST API
|
||||
agent/ # @lab/agent — agent daemon
|
||||
src/
|
||||
main.ts
|
||||
connection.ts # mTLS WebSocket to labd
|
||||
heartbeat.ts
|
||||
executor.ts # Command execution
|
||||
logs.ts # Log shipping
|
||||
modules.ts # Module runner
|
||||
cli/ # @lab/cli — extends existing CLI
|
||||
src/
|
||||
commands/
|
||||
init/bastion/ # Existing bastion commands
|
||||
provision/ # Existing provision commands
|
||||
get/ # New: get servers/roles/users/etc
|
||||
exec/ # New: remote execution
|
||||
logs/ # New: log streaming
|
||||
apply/ # New: pulumi apply
|
||||
rbac/ # New: role management
|
||||
|
||||
modules/ # Built-in modules
|
||||
k3s-server/ # Deploy k3s server
|
||||
k3s-agent/ # Deploy k3s agent
|
||||
labd/ # Deploy labd to k3s
|
||||
lab-agent/ # Deploy lab-agent to servers
|
||||
|
||||
deploy/
|
||||
k3s/ # Existing k3s manifests for bastion
|
||||
labd/ # k3s manifests for labd
|
||||
```
|
||||
|
||||
## Implementation Phases
|
||||
|
||||
### Phase 1: Foundation (current + next)
|
||||
- [x] Bastion (PXE provisioning) — DONE
|
||||
- [x] CLI structure (`lab init/provision`) — DONE
|
||||
- [ ] Rename puppet to labmaster, reprovision
|
||||
- [ ] Deploy k3s on labmaster
|
||||
- [ ] Build labd skeleton (Fastify + Prisma)
|
||||
- [ ] Certificate Authority (issue/sign certs)
|
||||
- [ ] Agent skeleton (connect, heartbeat)
|
||||
|
||||
### Phase 2: Core Platform
|
||||
- [ ] RBAC engine (roles, permissions, ACLs)
|
||||
- [ ] `lab get servers` with environment/cloud/label filters
|
||||
- [ ] `lab exec` remote command execution
|
||||
- [ ] `lab logs` streaming
|
||||
- [ ] Agent auto-enrollment via PXE provision (join token in kickstart)
|
||||
|
||||
### Phase 3: Infrastructure as Code
|
||||
- [ ] Module system (define, apply, health check)
|
||||
- [ ] k3s-server module (deploy k3s)
|
||||
- [ ] labd module (deploy labd to k3s)
|
||||
- [ ] Pulumi executor in labd
|
||||
- [ ] `lab apply -f` command
|
||||
|
||||
### Phase 4: Multi-Cloud
|
||||
- [ ] AWS provider (Pulumi-based)
|
||||
- [ ] Reusable join tokens for autoscaling groups
|
||||
- [ ] Cloud/environment model
|
||||
- [ ] Auto-discovery of cloud instances
|
||||
|
||||
## Key Design Decisions
|
||||
|
||||
1. **Pulumi over Puppet** — TypeScript-native, same language for IaC and platform code
|
||||
2. **mTLS over SSH** — proper PKI, scalable, no key management per-server
|
||||
3. **Agents connect to master** (not master pushing to agents) — works through NATs, firewalls
|
||||
4. **RBAC from day one** — security-first, deny by default
|
||||
5. **Module system inspired by Puppet** — declarative, testable, versionable
|
||||
6. **Multi-cloud extensible** — cloud is just a label, provider is pluggable
|
||||
7. **Reuse mcpctl patterns** — Prisma DB, Fastify routes, CLI structure, RBAC model
|
||||
93
bastion/Dockerfile.bastion
Normal file
93
bastion/Dockerfile.bastion
Normal file
@@ -0,0 +1,93 @@
|
||||
# Dockerfile.bastion -- PXE boot server (dnsmasq DHCP/TFTP + HTTP)
|
||||
# Requires host networking and NET_ADMIN/NET_RAW capabilities.
|
||||
|
||||
# ── Stage 1: Build ───────────────────────────────────────────────
|
||||
FROM node:22-alpine AS builder
|
||||
|
||||
RUN corepack enable && corepack prepare pnpm@9.15.0 --activate
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy workspace config and package manifests first (layer cache)
|
||||
COPY pnpm-workspace.yaml pnpm-lock.yaml package.json tsconfig.base.json tsconfig.json ./
|
||||
COPY src/shared/package.json src/shared/tsconfig.json src/shared/
|
||||
COPY src/bastion/package.json src/bastion/tsconfig.json src/bastion/
|
||||
COPY src/cli/package.json src/cli/tsconfig.json src/cli/
|
||||
COPY src/modules/package.json src/modules/tsconfig.json src/modules/
|
||||
|
||||
# Install all dependencies (dev included -- needed for build)
|
||||
RUN pnpm install --frozen-lockfile
|
||||
|
||||
# Copy source code
|
||||
COPY src/shared/src/ src/shared/src/
|
||||
COPY src/bastion/src/ src/bastion/src/
|
||||
COPY src/cli/src/ src/cli/src/
|
||||
COPY src/modules/src/ src/modules/src/
|
||||
COPY src/modules/modules/ src/modules/modules/
|
||||
|
||||
# Build TypeScript
|
||||
RUN pnpm build
|
||||
|
||||
# ── Stage 1b: Build iPXE snp.efi (uses UEFI SNP protocol for ISO boot) ──
|
||||
FROM fedora:43 AS ipxe-builder
|
||||
|
||||
RUN dnf install -y git gcc make perl-interpreter xz-devel gcc-aarch64-linux-gnu && dnf clean all
|
||||
RUN git clone --depth=1 https://github.com/ipxe/ipxe.git /tmp/ipxe
|
||||
RUN cd /tmp/ipxe/src && make bin-x86_64-efi/snp.efi && \
|
||||
make CROSS_COMPILE=aarch64-linux-gnu- bin-arm64-efi/snp.efi
|
||||
|
||||
# ── Stage 2: Production runtime (Fedora -- needs dnsmasq) ───────
|
||||
FROM fedora:43
|
||||
|
||||
RUN dnf install -y \
|
||||
dnsmasq \
|
||||
ipxe-bootimgs-x86 \
|
||||
ipxe-bootimgs-aarch64 \
|
||||
iproute \
|
||||
curl \
|
||||
openssh-clients \
|
||||
nodejs \
|
||||
npm \
|
||||
xorriso \
|
||||
mtools \
|
||||
&& dnf clean all
|
||||
|
||||
# iPXE snp.efi built from source (Fedora only ships snponly, which can't
|
||||
# boot from CD-ROM/USB -- it requires PXE chainloading)
|
||||
COPY --from=ipxe-builder /tmp/ipxe/src/bin-x86_64-efi/snp.efi /usr/share/ipxe/ipxe-snp-x86_64.efi
|
||||
COPY --from=ipxe-builder /tmp/ipxe/src/bin-arm64-efi/snp.efi /usr/share/ipxe/arm64-efi/ipxe-snp.efi
|
||||
|
||||
# Install pnpm
|
||||
RUN npm install -g pnpm@9
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy workspace config and package manifests
|
||||
COPY pnpm-workspace.yaml pnpm-lock.yaml package.json ./
|
||||
COPY src/shared/package.json src/shared/
|
||||
COPY src/bastion/package.json src/bastion/
|
||||
COPY src/cli/package.json src/cli/
|
||||
COPY src/modules/package.json src/modules/
|
||||
|
||||
# Install production dependencies
|
||||
RUN pnpm install --frozen-lockfile --prod 2>/dev/null || pnpm install --prod
|
||||
|
||||
# Copy built output from builder
|
||||
COPY --from=builder /app/src/shared/dist/ src/shared/dist/
|
||||
COPY --from=builder /app/src/bastion/dist/ src/bastion/dist/
|
||||
COPY --from=builder /app/src/cli/dist/ src/cli/dist/
|
||||
COPY --from=builder /app/src/modules/dist/ src/modules/dist/
|
||||
|
||||
# Create data directories
|
||||
RUN mkdir -p /data/state /data/tftp /data/http
|
||||
|
||||
ENV NODE_ENV=production
|
||||
ENV BASTION_DIR=/data
|
||||
ENV HTTP_PORT=8080
|
||||
|
||||
EXPOSE 8080/tcp
|
||||
EXPOSE 67/udp
|
||||
EXPOSE 69/udp
|
||||
EXPOSE 4011/udp
|
||||
|
||||
ENTRYPOINT ["node", "src/cli/dist/index.js", "init", "bastion", "standalone", "start", "--foreground"]
|
||||
73
bastion/Dockerfile.labd
Normal file
73
bastion/Dockerfile.labd
Normal file
@@ -0,0 +1,73 @@
|
||||
# Dockerfile.labd -- multi-stage build for the labd master daemon
|
||||
# Runs the Fastify API server with Prisma/CockroachDB backend.
|
||||
|
||||
# ── Stage 1: Build ───────────────────────────────────────────────
|
||||
FROM node:22-alpine AS builder
|
||||
|
||||
RUN corepack enable && corepack prepare pnpm@9.15.0 --activate
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy workspace config and package manifests first (layer cache)
|
||||
COPY pnpm-workspace.yaml pnpm-lock.yaml package.json tsconfig.base.json tsconfig.json ./
|
||||
COPY src/shared/package.json src/shared/tsconfig.json src/shared/
|
||||
COPY src/labd/package.json src/labd/tsconfig.json src/labd/
|
||||
|
||||
# Install all dependencies (dev included -- needed for build)
|
||||
RUN pnpm install --frozen-lockfile
|
||||
|
||||
# Copy Prisma schema and generate client
|
||||
COPY src/labd/prisma/ src/labd/prisma/
|
||||
RUN pnpm --filter @lab/labd exec prisma generate
|
||||
|
||||
# Copy source code
|
||||
COPY src/shared/src/ src/shared/src/
|
||||
COPY src/labd/src/ src/labd/src/
|
||||
|
||||
# Build TypeScript (shared first via project references)
|
||||
RUN pnpm --filter @lab/shared build && pnpm --filter @lab/labd build
|
||||
|
||||
# Hoist the generated Prisma client so stage 2 can COPY it from a stable path
|
||||
RUN mkdir -p /app/_prisma && \
|
||||
cp -r $(find /app/node_modules/.pnpm -path '*/.prisma/client' -type d | head -1) /app/_prisma/client
|
||||
|
||||
# ── Stage 2: Production runtime ─────────────────────────────────
|
||||
FROM node:22-alpine
|
||||
|
||||
RUN corepack enable && corepack prepare pnpm@9.15.0 --activate
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy workspace config and package manifests
|
||||
COPY pnpm-workspace.yaml pnpm-lock.yaml package.json ./
|
||||
COPY src/shared/package.json src/shared/
|
||||
COPY src/labd/package.json src/labd/
|
||||
|
||||
# Install production dependencies only
|
||||
RUN pnpm install --frozen-lockfile --prod 2>/dev/null || pnpm install --prod
|
||||
|
||||
# Copy built output from builder
|
||||
COPY --from=builder /app/src/shared/dist/ src/shared/dist/
|
||||
COPY --from=builder /app/src/labd/dist/ src/labd/dist/
|
||||
|
||||
# Copy Prisma schema + generated client into pnpm store location
|
||||
# Prisma expects .prisma/client as a sibling of @prisma/ in the same node_modules
|
||||
COPY --from=builder /app/src/labd/prisma/ src/labd/prisma/
|
||||
COPY --from=builder /app/_prisma/client/ /tmp/_prisma_client/
|
||||
RUN PRISMA_CLIENT_DIR=$(find /app/node_modules/.pnpm -path '*/@prisma/client' -type d | head -1) && \
|
||||
NM_DIR="$(dirname "$(dirname "$PRISMA_CLIENT_DIR")")" && \
|
||||
mkdir -p "$NM_DIR/.prisma/client" && \
|
||||
cp -r /tmp/_prisma_client/* "$NM_DIR/.prisma/client/" && \
|
||||
echo "Installed Prisma generated client at: $NM_DIR/.prisma/client/" && \
|
||||
rm -rf /tmp/_prisma_client
|
||||
|
||||
ENV NODE_ENV=production
|
||||
ENV DATABASE_URL=postgresql://root@cockroachdb:26257/labctl?sslmode=disable
|
||||
ENV LABD_PORT=3100
|
||||
ENV LABD_HOST=0.0.0.0
|
||||
|
||||
EXPOSE 3100
|
||||
|
||||
USER node
|
||||
|
||||
ENTRYPOINT ["node", "src/labd/dist/main.js"]
|
||||
358
bastion/README.md
Normal file
358
bastion/README.md
Normal file
@@ -0,0 +1,358 @@
|
||||
# labctl
|
||||
|
||||
Infrastructure management platform for bare-metal servers, Kubernetes clusters, and cloud resources.
|
||||
|
||||
## Install
|
||||
|
||||
```bash
|
||||
# From Gitea packages (Fedora/RHEL)
|
||||
sudo dnf config-manager --add-repo https://mysources.co.uk/michal/-/packages/rpm/
|
||||
sudo dnf install labctl
|
||||
|
||||
# From source
|
||||
cd bastion && pnpm install && pnpm build
|
||||
bun build src/cli/src/index.ts --compile --outfile dist/labctl
|
||||
sudo cp dist/labctl /usr/bin/labctl
|
||||
```
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
# Start the bastion (PXE provisioning server)
|
||||
sudo labctl init bastion standalone start
|
||||
|
||||
# PXE boot a machine — it gets discovered automatically
|
||||
labctl provision list
|
||||
|
||||
# Install Fedora on a discovered machine
|
||||
labctl provision install 78:55:36:08:35:14 labmaster --role infra
|
||||
|
||||
# Reprovision (SSH reboot into PXE, preserves /home /srv /var/lib/rancher)
|
||||
labctl provision reprovision 78:55:36:08:35:14 labmaster --role infra
|
||||
```
|
||||
|
||||
## Commands
|
||||
|
||||
### Bastion (PXE Provisioning)
|
||||
|
||||
```bash
|
||||
# Lifecycle
|
||||
sudo labctl init bastion standalone start # Start bastion (daemonized)
|
||||
sudo labctl init bastion standalone start --foreground # Start in foreground
|
||||
sudo labctl init bastion standalone stop # Stop bastion
|
||||
labctl init bastion standalone status # Show status, PID, machine count
|
||||
|
||||
# Options
|
||||
sudo labctl init bastion standalone start \
|
||||
--port 8080 \
|
||||
--dir /tmp/lab-bastion \
|
||||
--domain ad.itaz.eu \
|
||||
--dhcp-mode proxy \
|
||||
--fedora 43 \
|
||||
--timezone Europe/London
|
||||
```
|
||||
|
||||
### Provisioning
|
||||
|
||||
```bash
|
||||
# List all machines (discovered, queued, installing, installed)
|
||||
labctl provision list
|
||||
|
||||
# Queue a machine for Fedora install
|
||||
labctl provision install <mac> <hostname> --role worker # k3s worker (gets longhorn)
|
||||
labctl provision install <mac> <hostname> --role infra # infra node (gets k3s server + /var/lib/rancher)
|
||||
|
||||
# Reprovision — queues install, SSHes in, sets PXE boot, reboots
|
||||
labctl provision reprovision <mac> <hostname> --role infra
|
||||
|
||||
# Remove a machine from state
|
||||
labctl provision forget <mac>
|
||||
|
||||
# Options
|
||||
labctl provision install <mac> <hostname> \
|
||||
--role worker \
|
||||
--disk nvme0n1 \
|
||||
--port 8080
|
||||
```
|
||||
|
||||
### Server Management (planned)
|
||||
|
||||
```bash
|
||||
# List servers with filters
|
||||
labctl get servers
|
||||
labctl get servers --env production
|
||||
labctl get servers --cloud baremetal
|
||||
labctl get servers --cloud aws
|
||||
labctl get servers --label role=k3s-worker
|
||||
labctl get servers --label asg=web-servers
|
||||
|
||||
# Detailed server info
|
||||
labctl describe server/puppet
|
||||
labctl describe server/ser9
|
||||
```
|
||||
|
||||
### Remote Execution (planned)
|
||||
|
||||
```bash
|
||||
# Execute commands on servers (audited, RBAC-checked)
|
||||
labctl exec server/puppet -- whoami
|
||||
labctl exec server/puppet -- systemctl status k3s
|
||||
labctl exec server/puppet -it -- bash # interactive TTY
|
||||
labctl exec server/puppet --timeout 30s -- long-running-task
|
||||
```
|
||||
|
||||
### Kubernetes (planned)
|
||||
|
||||
```bash
|
||||
# Proxied kubectl — audited, RBAC-checked, no kubeconfig needed
|
||||
labctl kubectl --cluster lab get pods
|
||||
labctl kubectl --cluster lab get nodes
|
||||
labctl kubectl --cluster lab logs pod/nginx -f
|
||||
labctl kubectl --cluster lab exec pod/nginx -- bash
|
||||
labctl kubectl --cluster lab apply -f deployment.yaml
|
||||
labctl kubectl --cluster aws-prod get pods --namespace app
|
||||
|
||||
# Cluster management
|
||||
labctl clusters add lab --kubeconfig ~/.kube/config
|
||||
labctl clusters list
|
||||
labctl clusters remove staging
|
||||
```
|
||||
|
||||
### Logs (planned)
|
||||
|
||||
```bash
|
||||
# Server logs (journalctl passthrough via agent)
|
||||
labctl logs server/puppet # all journal
|
||||
labctl logs server/puppet -f # follow (live stream)
|
||||
labctl logs server/puppet -n 100 # last 100 lines
|
||||
labctl logs server/puppet -u k3s # specific unit
|
||||
labctl logs server/puppet -u sshd --since "1h ago" # time range
|
||||
labctl logs server/puppet --since "2026-03-17" --until "2026-03-18"
|
||||
labctl logs server/puppet -k # kernel only
|
||||
labctl logs server/puppet -p err # errors only
|
||||
labctl logs server/puppet --file /var/log/nginx/error.log # tail a file
|
||||
labctl logs server/puppet --file /var/log/nginx/error.log -n 50
|
||||
|
||||
# App logs (k8s pod logs)
|
||||
labctl logs app/bastion
|
||||
labctl logs app/bastion -f
|
||||
labctl logs app/labd --container postgres
|
||||
|
||||
# Pulumi execution logs
|
||||
labctl logs pulumi/run-abc123
|
||||
labctl logs pulumi/run-abc123 -f # follow active run
|
||||
|
||||
# Bastion logs
|
||||
labctl logs bastion/lab
|
||||
labctl logs bastion/lab --mac 78:55:36:08:35:14 # specific machine's install
|
||||
|
||||
# Agent daemon logs
|
||||
labctl logs agent/puppet
|
||||
|
||||
# Audit logs
|
||||
labctl logs audit
|
||||
labctl logs audit --user michal
|
||||
labctl logs audit --user michal --since "1h ago"
|
||||
labctl logs audit/michal-20260317-abc123 # specific session
|
||||
labctl logs audit --action kubectl --cluster lab
|
||||
labctl logs audit --action exec --server puppet
|
||||
```
|
||||
|
||||
### Apps (planned, replaces Helm)
|
||||
|
||||
```bash
|
||||
# Install Pulumi-based apps to Kubernetes
|
||||
labctl apps list # available apps
|
||||
labctl apps install bastion # deploy bastion
|
||||
labctl apps install bastion --set port=8080 # with overrides
|
||||
labctl apps install bastion -f values.yaml # from values file
|
||||
labctl apps install monitoring # Prometheus + Grafana
|
||||
|
||||
# Manage deployed apps
|
||||
labctl apps status bastion # health, version, config
|
||||
labctl apps upgrade bastion # rolling upgrade
|
||||
labctl apps history bastion # version history
|
||||
labctl apps rollback bastion 2 # rollback to version 2
|
||||
labctl apps uninstall bastion
|
||||
```
|
||||
|
||||
### Infrastructure as Code (planned)
|
||||
|
||||
```bash
|
||||
# Execute Pulumi programs via labd (RBAC-checked)
|
||||
labctl apply -f infra/k3s-cluster.ts --env lab
|
||||
labctl plan -f infra/k3s-cluster.ts --env lab # dry run
|
||||
labctl destroy -f infra/k3s-cluster.ts --env lab
|
||||
```
|
||||
|
||||
### RBAC (planned)
|
||||
|
||||
```bash
|
||||
# Roles and permissions
|
||||
labctl get roles
|
||||
labctl get users
|
||||
labctl create role viewer --allow "read:*:*:*"
|
||||
labctl create role lab-admin --allow "*:baremetal:lab:*" --deny "destroy:*:*:*"
|
||||
labctl bind role lab-admin --user michal
|
||||
labctl unbind role lab-admin --user michal
|
||||
|
||||
# Permission model: action:cloud:environment:server
|
||||
# read:*:*:* — read everything
|
||||
# exec:baremetal:lab:* — exec on any lab server
|
||||
# kubectl:*:*:* — kubectl on any cluster
|
||||
# *:baremetal:lab:puppet — full access to puppet only
|
||||
# manage:*:*:* — manage apps, clusters, tokens
|
||||
```
|
||||
|
||||
### Environments and Clouds (planned)
|
||||
|
||||
```bash
|
||||
labctl get environments
|
||||
labctl get clouds
|
||||
labctl create environment staging --cloud aws
|
||||
labctl create environment lab --cloud baremetal
|
||||
```
|
||||
|
||||
## Partition Layout
|
||||
|
||||
Machines installed by the bastion get this LVM layout:
|
||||
|
||||
### Worker role (k3s worker with Longhorn)
|
||||
```
|
||||
/boot/efi 600MB EFI
|
||||
/boot 3GB ext4
|
||||
── LVM VG: labvg ──
|
||||
swap 27GB (matches RAM)
|
||||
/ 33GB xfs
|
||||
/var 100GB xfs
|
||||
/var/log 10GB xfs
|
||||
/home 10GB xfs ← preserved on reprovision
|
||||
/srv 20GB xfs ← preserved on reprovision
|
||||
/tmp tmpfs 4GB
|
||||
/var/lib/longhorn rest xfs ← preserved on reprovision (Longhorn PVC storage)
|
||||
```
|
||||
|
||||
### Infra role (k3s server, labmaster)
|
||||
```
|
||||
/boot/efi 600MB EFI
|
||||
/boot 3GB ext4
|
||||
── LVM VG: labvg ──
|
||||
swap 27GB (matches RAM)
|
||||
/ 33GB xfs
|
||||
/var 100GB xfs
|
||||
/var/log 10GB xfs
|
||||
/home 10GB xfs ← preserved on reprovision
|
||||
/srv 20GB xfs ← preserved on reprovision
|
||||
/var/lib/rancher 20GB xfs ← preserved on reprovision (k3s etcd data)
|
||||
/tmp tmpfs 4GB
|
||||
```
|
||||
|
||||
On reprovision, OS partitions (`/`, `/var`, `/var/log`, `swap`) are wiped. Data partitions (`/home`, `/srv`, `/var/lib/longhorn`, `/var/lib/rancher`) are preserved.
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌──────────────────────────────────────────────────────────────┐
|
||||
│ labctl CLI │
|
||||
│ init | provision | get | exec | logs | apply | apps | kubectl│
|
||||
└───────────────────────────┬──────────────────────────────────┘
|
||||
│ mTLS
|
||||
▼
|
||||
┌──────────────────────────────────────────────────────────────┐
|
||||
│ labd (master daemon — stateless, on k3s) │
|
||||
│ ┌─────┐ ┌──────┐ ┌──────┐ ┌────────┐ ┌──────┐ ┌────────┐ │
|
||||
│ │ CA │ │ RBAC │ │ Logs │ │ Pulumi │ │ Apps │ │kubectl │ │
|
||||
│ │ │ │ │ │relay │ │executor│ │ │ │ proxy │ │
|
||||
│ └─────┘ └──────┘ └──────┘ └────────┘ └──────┘ └────────┘ │
|
||||
│ CockroachDB │
|
||||
└──────────────┬─────────────────────────┬─────────────────────┘
|
||||
│ mTLS │ mTLS
|
||||
┌──────────▼───────────┐ ┌──────────▼───────────┐
|
||||
│ lab-agent │ │ lab-agent │
|
||||
│ bare-metal server │ │ AWS EC2 / cloud VM │
|
||||
│ ┌────────────────┐ │ │ ┌────────────────┐ │
|
||||
│ │ heartbeat │ │ │ │ heartbeat │ │
|
||||
│ │ exec handler │ │ │ │ exec handler │ │
|
||||
│ │ log streamer │ │ │ │ log streamer │ │
|
||||
│ │ module runner │ │ │ │ module runner │ │
|
||||
│ └────────────────┘ │ │ └────────────────┘ │
|
||||
└──────────────────────┘ └──────────────────────┘
|
||||
```
|
||||
|
||||
## Technology Stack
|
||||
|
||||
| Component | Technology |
|
||||
|-----------|-----------|
|
||||
| Language | TypeScript (ESM) |
|
||||
| CLI | Commander.js |
|
||||
| HTTP Server | Fastify + WebSocket |
|
||||
| Database | CockroachDB (PostgreSQL compatible) |
|
||||
| ORM | Prisma |
|
||||
| IaC | Pulumi (TypeScript) |
|
||||
| k8s CNI | Cilium |
|
||||
| Auth | mTLS (built-in CA) |
|
||||
| Packaging | nfpm (RPM/DEB), bun compile |
|
||||
| Containers | Podman + podman-compose |
|
||||
| CI/CD | Gitea Actions |
|
||||
| Testing | Vitest |
|
||||
|
||||
## Development
|
||||
|
||||
```bash
|
||||
cd bastion
|
||||
|
||||
# Install dependencies
|
||||
pnpm install
|
||||
|
||||
# Build all packages
|
||||
pnpm build
|
||||
|
||||
# Run tests (30 tests)
|
||||
pnpm test:run
|
||||
|
||||
# Type check
|
||||
pnpm typecheck
|
||||
|
||||
# Lint
|
||||
pnpm lint
|
||||
|
||||
# Generate shell completions
|
||||
pnpm completions:generate
|
||||
|
||||
# Build standalone binary
|
||||
bun build src/cli/src/index.ts --compile --outfile dist/labctl
|
||||
|
||||
# Build RPM/DEB packages (both architectures)
|
||||
bash scripts/build-rpm.sh --all
|
||||
|
||||
# Build Docker image
|
||||
bash scripts/build-bastion.sh
|
||||
|
||||
# Full release (build + publish + install)
|
||||
bash scripts/release.sh
|
||||
```
|
||||
|
||||
## Project Structure
|
||||
|
||||
```
|
||||
bastion/
|
||||
├── src/
|
||||
│ ├── shared/ # @lab/shared — types, constants
|
||||
│ ├── bastion/ # @lab/bastion — PXE provisioning server
|
||||
│ ├── cli/ # @lab/cli — CLI binary (labctl)
|
||||
│ ├── labd/ # @lab/labd — master daemon (planned)
|
||||
│ └── agent/ # @lab/agent — server agent (planned)
|
||||
├── modules/ # Built-in configuration modules (planned)
|
||||
├── deploy/
|
||||
│ └── k3s/ # Kubernetes manifests
|
||||
├── stack/
|
||||
│ ├── Dockerfile
|
||||
│ └── docker-compose.yml
|
||||
├── scripts/ # Build, publish, release scripts
|
||||
├── completions/ # Generated shell completions
|
||||
└── ARCHITECTURE.md
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
MIT
|
||||
121
bastion/completions/labctl.bash
Normal file
121
bastion/completions/labctl.bash
Normal file
@@ -0,0 +1,121 @@
|
||||
# labctl bash completions -- auto-generated by scripts/generate-completions.ts
|
||||
# DO NOT EDIT MANUALLY -- run: pnpm completions:generate
|
||||
|
||||
_labctl() {
|
||||
local cur prev words cword
|
||||
_init_completion || return
|
||||
|
||||
local top_commands="version init provision config login doctor app roles"
|
||||
|
||||
# Extract the subcommand chain (skip options and their values)
|
||||
local -a subcmd_chain=()
|
||||
local i skip_next=false
|
||||
for ((i=1; i < cword; i++)); do
|
||||
if $skip_next; then skip_next=false; continue; fi
|
||||
case "${words[i]}" in
|
||||
-*) ;; # skip options
|
||||
*) subcmd_chain+=("${words[i]}") ;;
|
||||
esac
|
||||
done
|
||||
|
||||
local chain_len=${#subcmd_chain[@]}
|
||||
local chain_str="${subcmd_chain[*]}"
|
||||
|
||||
case "$chain_str" in
|
||||
"init bastion standalone start")
|
||||
COMPREPLY=($(compgen -W "--port --dir --domain --dhcp-mode --fedora --arch --timezone --locale --skip-dnsmasq --skip-artifacts --foreground -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"init bastion standalone stop")
|
||||
COMPREPLY=($(compgen -W "--dir -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"init bastion standalone status")
|
||||
COMPREPLY=($(compgen -W "--dir --port -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"init bastion standalone")
|
||||
COMPREPLY=($(compgen -W "start stop status -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"app labcontroller deploy")
|
||||
COMPREPLY=($(compgen -W "--user --port --crdb-replicas -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"app labcontroller status")
|
||||
COMPREPLY=($(compgen -W "--user --port -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"app k3s install")
|
||||
COMPREPLY=($(compgen -W "--role --user --port --k3s-server --k3s-token -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"app k3s health")
|
||||
COMPREPLY=($(compgen -W "--user --port -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"app k3s list")
|
||||
COMPREPLY=($(compgen -W "--user --port -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"init bastion")
|
||||
COMPREPLY=($(compgen -W "standalone -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"provision list")
|
||||
COMPREPLY=($(compgen -W "--port -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"provision install")
|
||||
COMPREPLY=($(compgen -W "--role --os --disk --port -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"provision reprovision")
|
||||
COMPREPLY=($(compgen -W "--role --os --disk --port -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"provision forget")
|
||||
COMPREPLY=($(compgen -W "--port -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"provision logs")
|
||||
COMPREPLY=($(compgen -W "-f --follow --port -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"config list")
|
||||
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
|
||||
return ;;
|
||||
"config get")
|
||||
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
|
||||
return ;;
|
||||
"config set")
|
||||
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
|
||||
return ;;
|
||||
"config path")
|
||||
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
|
||||
return ;;
|
||||
"app labcontroller")
|
||||
COMPREPLY=($(compgen -W "deploy status -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"app k3s")
|
||||
COMPREPLY=($(compgen -W "install health list -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"version")
|
||||
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
|
||||
return ;;
|
||||
"init")
|
||||
COMPREPLY=($(compgen -W "bastion -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"provision")
|
||||
COMPREPLY=($(compgen -W "list install reprovision forget logs -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"config")
|
||||
COMPREPLY=($(compgen -W "list get set path -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"login")
|
||||
COMPREPLY=($(compgen -W "--server -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"doctor")
|
||||
COMPREPLY=($(compgen -W "--json -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"app")
|
||||
COMPREPLY=($(compgen -W "labcontroller k3s -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"roles")
|
||||
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
|
||||
return ;;
|
||||
"")
|
||||
COMPREPLY=($(compgen -W "$top_commands -h --help -v --version" -- "$cur"))
|
||||
return ;;
|
||||
*)
|
||||
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
|
||||
return ;;
|
||||
esac
|
||||
}
|
||||
|
||||
complete -F _labctl labctl
|
||||
202
bastion/completions/labctl.fish
Normal file
202
bastion/completions/labctl.fish
Normal file
@@ -0,0 +1,202 @@
|
||||
# labctl fish completions -- auto-generated by scripts/generate-completions.ts
|
||||
# DO NOT EDIT MANUALLY -- run: pnpm completions:generate
|
||||
|
||||
complete -c labctl -e
|
||||
complete -c labctl -f
|
||||
|
||||
# Global options
|
||||
complete -c labctl -s v -l version -d 'Show version'
|
||||
complete -c labctl -s h -l help -d 'Show help'
|
||||
|
||||
# Helper: test if exactly a subcommand chain is active (no extra positional args)
|
||||
function __labctl_using_cmd
|
||||
set -l tokens (commandline -opc)
|
||||
set -l expected $argv
|
||||
set -l depth (count $expected)
|
||||
set -l found 0
|
||||
set -l i 1
|
||||
for tok in $tokens[2..]
|
||||
if string match -q -- "-*" $tok
|
||||
continue
|
||||
end
|
||||
set i (math $i + 1)
|
||||
set -l idx (math $i - 1)
|
||||
if test $idx -le $depth
|
||||
if test "$tok" != "$expected[$idx]"
|
||||
return 1
|
||||
end
|
||||
set found (math $found + 1)
|
||||
else
|
||||
return 1
|
||||
end
|
||||
end
|
||||
test $found -eq $depth
|
||||
end
|
||||
|
||||
# Helper: test if command starts with a subcommand chain (options still apply after args)
|
||||
function __labctl_in_cmd
|
||||
set -l tokens (commandline -opc)
|
||||
set -l expected $argv
|
||||
set -l depth (count $expected)
|
||||
set -l found 0
|
||||
for tok in $tokens[2..]
|
||||
if string match -q -- "-*" $tok
|
||||
continue
|
||||
end
|
||||
set found (math $found + 1)
|
||||
if test $found -le $depth
|
||||
if test "$tok" != "$expected[$found]"
|
||||
return 1
|
||||
end
|
||||
end
|
||||
end
|
||||
test $found -ge $depth
|
||||
end
|
||||
|
||||
# Dynamic: fetch machine hostnames from bastion (installed + queued)
|
||||
function __labctl_installed_hosts
|
||||
curl -s http://localhost:8080/api/machines 2>/dev/null |
|
||||
python3 -c 'import sys,json; d=json.load(sys.stdin); hosts=[v.get("hostname","") for v in {**d.get("install_queue",{}), **d.get("installed",{})}.values() if v.get("hostname")]; [print(h) for h in set(hosts)]' 2>/dev/null
|
||||
end
|
||||
|
||||
# Dynamic: fetch all known MAC addresses (discovered + queue + installed)
|
||||
function __labctl_known_macs
|
||||
curl -s http://localhost:8080/api/machines 2>/dev/null |
|
||||
python3 -c 'import sys,json; d=json.load(sys.stdin); [print(k) for k in {**d.get("discovered",{}), **d.get("install_queue",{}), **d.get("installed",{})}]' 2>/dev/null
|
||||
end
|
||||
|
||||
# Dynamic: fetch hostnames and MACs from all states
|
||||
function __labctl_hosts_and_macs
|
||||
curl -s http://localhost:8080/api/machines 2>/dev/null |
|
||||
python3 -c 'import sys,json; d=json.load(sys.stdin); a={**d.get("discovered",{}), **d.get("install_queue",{}), **d.get("installed",{})}; macs=list(a.keys()); hosts=[v.get("hostname","") for v in {**d.get("install_queue",{}), **d.get("installed",{})}.values() if v.get("hostname")]; [print(x) for x in set(macs+hosts)]' 2>/dev/null
|
||||
end
|
||||
|
||||
# Target argument completions
|
||||
complete -c labctl -n "__labctl_using_cmd app k3s install" -a "(__labctl_installed_hosts)" -d 'installed host'
|
||||
complete -c labctl -n "__labctl_using_cmd app k3s health" -a "(__labctl_installed_hosts)" -d 'installed host'
|
||||
complete -c labctl -n "__labctl_using_cmd app labcontroller deploy" -a "(__labctl_installed_hosts)" -d 'installed host'
|
||||
complete -c labctl -n "__labctl_using_cmd app labcontroller status" -a "(__labctl_installed_hosts)" -d 'installed host'
|
||||
complete -c labctl -n "__labctl_using_cmd provision install" -a "(__labctl_known_macs)" -d 'MAC address'
|
||||
complete -c labctl -n "__labctl_using_cmd provision reprovision" -a "(__labctl_hosts_and_macs)" -d 'host or MAC'
|
||||
complete -c labctl -n "__labctl_using_cmd provision forget" -a "(__labctl_hosts_and_macs)" -d 'host or MAC'
|
||||
complete -c labctl -n "__labctl_using_cmd provision logs" -a "(__labctl_hosts_and_macs)" -d 'host or MAC'
|
||||
|
||||
# Top-level commands
|
||||
complete -c labctl -n "not __fish_seen_subcommand_from version init provision config login doctor app roles" -a version -d 'Show version information'
|
||||
complete -c labctl -n "not __fish_seen_subcommand_from version init provision config login doctor app roles" -a init -d 'Initialise infrastructure components'
|
||||
complete -c labctl -n "not __fish_seen_subcommand_from version init provision config login doctor app roles" -a provision -d 'Machine provisioning operations'
|
||||
complete -c labctl -n "not __fish_seen_subcommand_from version init provision config login doctor app roles" -a config -d 'View and modify CLI configuration'
|
||||
complete -c labctl -n "not __fish_seen_subcommand_from version init provision config login doctor app roles" -a login -d 'Authenticate with labd and obtain client certificate'
|
||||
complete -c labctl -n "not __fish_seen_subcommand_from version init provision config login doctor app roles" -a doctor -d 'Diagnose configuration and connectivity issues'
|
||||
complete -c labctl -n "not __fish_seen_subcommand_from version init provision config login doctor app roles" -a app -d 'Application management'
|
||||
complete -c labctl -n "not __fish_seen_subcommand_from version init provision config login doctor app roles" -a roles -d 'List available machine roles'
|
||||
|
||||
# init subcommands
|
||||
complete -c labctl -n "__labctl_using_cmd init" -a bastion -d 'Bastion PXE server management'
|
||||
|
||||
# init bastion subcommands
|
||||
complete -c labctl -n "__labctl_using_cmd init bastion" -a standalone -d 'Standalone bastion server lifecycle'
|
||||
|
||||
# init bastion standalone subcommands
|
||||
complete -c labctl -n "__labctl_using_cmd init bastion standalone" -a start -d 'Start the bastion server (HTTP + dnsmasq PXE)'
|
||||
complete -c labctl -n "__labctl_using_cmd init bastion standalone" -a stop -d 'Stop a running bastion server'
|
||||
complete -c labctl -n "__labctl_using_cmd init bastion standalone" -a status -d 'Show bastion server status'
|
||||
|
||||
# init bastion standalone start options
|
||||
complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l port -d 'HTTP port' -x
|
||||
complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l dir -d 'Bastion data directory' -x
|
||||
complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l domain -d 'Internal domain for hostnames' -x
|
||||
complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l dhcp-mode -d 'DHCP mode: proxy or full' -x
|
||||
complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l fedora -d 'Fedora version' -x
|
||||
complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l arch -d 'Architecture' -x
|
||||
complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l timezone -d 'Timezone' -x
|
||||
complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l locale -d 'Locale' -x
|
||||
complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l skip-dnsmasq -d 'Skip starting dnsmasq (for testing)'
|
||||
complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l skip-artifacts -d 'Skip downloading boot artifacts (for testing)'
|
||||
complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l foreground -d 'Run in foreground (default: daemonize)'
|
||||
|
||||
# init bastion standalone stop options
|
||||
complete -c labctl -n "__labctl_in_cmd init bastion standalone stop" -l dir -d 'Bastion data directory' -x
|
||||
|
||||
# init bastion standalone status options
|
||||
complete -c labctl -n "__labctl_in_cmd init bastion standalone status" -l dir -d 'Bastion data directory' -x
|
||||
complete -c labctl -n "__labctl_in_cmd init bastion standalone status" -l port -d 'Bastion HTTP port' -x
|
||||
|
||||
# provision subcommands
|
||||
complete -c labctl -n "__labctl_using_cmd provision" -a list -d 'List all known machines'
|
||||
complete -c labctl -n "__labctl_using_cmd provision" -a install -d 'Queue a discovered machine for OS installation'
|
||||
complete -c labctl -n "__labctl_using_cmd provision" -a reprovision -d 'Queue install + SSH reboot into PXE (target: hostname, MAC, or IP)'
|
||||
complete -c labctl -n "__labctl_using_cmd provision" -a forget -d 'Remove a machine from bastion state'
|
||||
complete -c labctl -n "__labctl_using_cmd provision" -a logs -d 'Show provisioning logs for a machine (hostname, MAC, or IP)'
|
||||
|
||||
# provision list options
|
||||
complete -c labctl -n "__labctl_in_cmd provision list" -l port -d 'Bastion HTTP port' -x
|
||||
|
||||
# provision install options
|
||||
complete -c labctl -n "__labctl_in_cmd provision install" -l role -d 'Machine role (see below)' -xa 'vanilla worker infra labcontroller'
|
||||
complete -c labctl -n "__labctl_in_cmd provision install" -l os -d 'Operating system' -xa 'fedora-43 ubuntu-26.04'
|
||||
complete -c labctl -n "__labctl_in_cmd provision install" -l disk -d 'Target disk device (auto-detect if omitted)' -x
|
||||
complete -c labctl -n "__labctl_in_cmd provision install" -l port -d 'Bastion HTTP port' -x
|
||||
|
||||
# provision reprovision options
|
||||
complete -c labctl -n "__labctl_in_cmd provision reprovision" -l role -d 'Machine role (see below)' -xa 'vanilla worker infra labcontroller'
|
||||
complete -c labctl -n "__labctl_in_cmd provision reprovision" -l os -d 'Operating system' -xa 'fedora-43 ubuntu-26.04'
|
||||
complete -c labctl -n "__labctl_in_cmd provision reprovision" -l disk -d 'Target disk device (auto-detect if omitted)' -x
|
||||
complete -c labctl -n "__labctl_in_cmd provision reprovision" -l port -d 'Bastion HTTP port' -x
|
||||
|
||||
# provision forget options
|
||||
complete -c labctl -n "__labctl_in_cmd provision forget" -l port -d 'Bastion HTTP port' -x
|
||||
|
||||
# provision logs options
|
||||
complete -c labctl -n "__labctl_in_cmd provision logs" -s f -l follow -d 'Follow logs in real-time (SSE stream)'
|
||||
complete -c labctl -n "__labctl_in_cmd provision logs" -l port -d 'Bastion HTTP port' -x
|
||||
|
||||
# config subcommands
|
||||
complete -c labctl -n "__labctl_using_cmd config" -a list -d 'Show all configuration values'
|
||||
complete -c labctl -n "__labctl_using_cmd config" -a get -d 'Get a configuration value'
|
||||
complete -c labctl -n "__labctl_using_cmd config" -a set -d 'Set a configuration value'
|
||||
complete -c labctl -n "__labctl_using_cmd config" -a path -d 'Show configuration file path'
|
||||
|
||||
# login options
|
||||
complete -c labctl -n "__labctl_in_cmd login" -l server -d 'labd server URL' -x
|
||||
|
||||
# doctor options
|
||||
complete -c labctl -n "__labctl_in_cmd doctor" -l json -d 'Output results as JSON'
|
||||
|
||||
# app subcommands
|
||||
complete -c labctl -n "__labctl_using_cmd app" -a labcontroller -d 'Labcontroller deployment (bastion + labd + CockroachDB)'
|
||||
complete -c labctl -n "__labctl_using_cmd app" -a k3s -d 'k3s cluster management'
|
||||
|
||||
# app labcontroller subcommands
|
||||
complete -c labctl -n "__labctl_using_cmd app labcontroller" -a deploy -d 'Deploy labcontroller stack to a k3s node'
|
||||
complete -c labctl -n "__labctl_using_cmd app labcontroller" -a status -d 'Check labcontroller deployment status (all hosts if no target)'
|
||||
|
||||
# app labcontroller deploy options
|
||||
complete -c labctl -n "__labctl_in_cmd app labcontroller deploy" -l user -d 'SSH user' -x
|
||||
complete -c labctl -n "__labctl_in_cmd app labcontroller deploy" -l port -d 'Bastion HTTP port' -x
|
||||
complete -c labctl -n "__labctl_in_cmd app labcontroller deploy" -l crdb-replicas -d 'CockroachDB replicas' -x
|
||||
|
||||
# app labcontroller status options
|
||||
complete -c labctl -n "__labctl_in_cmd app labcontroller status" -l user -d 'SSH user' -x
|
||||
complete -c labctl -n "__labctl_in_cmd app labcontroller status" -l port -d 'Bastion HTTP port' -x
|
||||
|
||||
# app k3s subcommands
|
||||
complete -c labctl -n "__labctl_using_cmd app k3s" -a install -d 'Install k3s on a target machine (hostname, IP, or MAC)'
|
||||
complete -c labctl -n "__labctl_using_cmd app k3s" -a health -d 'Check k3s health (all hosts if no target given)'
|
||||
complete -c labctl -n "__labctl_using_cmd app k3s" -a list -d 'List installed machines and their k3s status'
|
||||
|
||||
# app k3s install options
|
||||
complete -c labctl -n "__labctl_in_cmd app k3s install" -l role -d 'k3s role: infra (server) or worker (agent)' -x
|
||||
complete -c labctl -n "__labctl_in_cmd app k3s install" -l user -d 'SSH user' -x
|
||||
complete -c labctl -n "__labctl_in_cmd app k3s install" -l port -d 'Bastion HTTP port (for resolving target)' -x
|
||||
complete -c labctl -n "__labctl_in_cmd app k3s install" -l k3s-server -d 'k3s server URL (required for worker role)' -x
|
||||
complete -c labctl -n "__labctl_in_cmd app k3s install" -l k3s-token -d 'k3s join token (required for worker role)' -x
|
||||
|
||||
# app k3s health options
|
||||
complete -c labctl -n "__labctl_in_cmd app k3s health" -l user -d 'SSH user' -x
|
||||
complete -c labctl -n "__labctl_in_cmd app k3s health" -l port -d 'Bastion HTTP port' -x
|
||||
|
||||
# app k3s list options
|
||||
complete -c labctl -n "__labctl_in_cmd app k3s list" -l user -d 'SSH user' -x
|
||||
complete -c labctl -n "__labctl_in_cmd app k3s list" -l port -d 'Bastion HTTP port' -x
|
||||
|
||||
13
bastion/deploy/k3s/configmap.yaml
Normal file
13
bastion/deploy/k3s/configmap.yaml
Normal file
@@ -0,0 +1,13 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: bastion-config
|
||||
namespace: lab-infra
|
||||
data:
|
||||
HTTP_PORT: "8080"
|
||||
DOMAIN: "ad.itaz.eu"
|
||||
FEDORA_VERSION: "43"
|
||||
DHCP_MODE: "proxy"
|
||||
TIMEZONE: "Europe/London"
|
||||
LOCALE: "en_GB.UTF-8"
|
||||
LABD_URL: "http://labd.lab-system.svc.cluster.local:3100"
|
||||
86
bastion/deploy/k3s/deployment.yaml
Normal file
86
bastion/deploy/k3s/deployment.yaml
Normal file
@@ -0,0 +1,86 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: bastion
|
||||
namespace: lab-infra
|
||||
labels:
|
||||
app: bastion
|
||||
spec:
|
||||
replicas: 1
|
||||
strategy:
|
||||
type: Recreate
|
||||
selector:
|
||||
matchLabels:
|
||||
app: bastion
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: bastion
|
||||
spec:
|
||||
imagePullSecrets:
|
||||
- name: gitea-registry
|
||||
hostNetwork: true
|
||||
dnsPolicy: ClusterFirstWithHostNet
|
||||
dnsConfig:
|
||||
options:
|
||||
- name: ndots
|
||||
value: "1"
|
||||
containers:
|
||||
- name: bastion
|
||||
image: mysources.co.uk/michal/lab/bastion:latest
|
||||
imagePullPolicy: Always
|
||||
command:
|
||||
- node
|
||||
- src/cli/dist/index.js
|
||||
- init
|
||||
- bastion
|
||||
- standalone
|
||||
- start
|
||||
- --foreground
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: bastion-config
|
||||
env:
|
||||
- name: BASTION_JOIN_TOKEN
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: bastion-join-token
|
||||
key: token
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: http
|
||||
volumeMounts:
|
||||
- name: state
|
||||
mountPath: /data
|
||||
- name: ssh-keys
|
||||
mountPath: /root/.ssh
|
||||
readOnly: true
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- NET_ADMIN
|
||||
- NET_RAW
|
||||
startupProbe:
|
||||
httpGet:
|
||||
path: /api/machines
|
||||
port: 8080
|
||||
failureThreshold: 60
|
||||
periodSeconds: 10
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /api/machines
|
||||
port: 8080
|
||||
periodSeconds: 30
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /api/machines
|
||||
port: 8080
|
||||
periodSeconds: 10
|
||||
volumes:
|
||||
- name: state
|
||||
persistentVolumeClaim:
|
||||
claimName: bastion-state
|
||||
- name: ssh-keys
|
||||
hostPath:
|
||||
path: /root/.ssh
|
||||
type: Directory
|
||||
7
bastion/deploy/k3s/kustomization.yaml
Normal file
7
bastion/deploy/k3s/kustomization.yaml
Normal file
@@ -0,0 +1,7 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- namespace.yaml
|
||||
- configmap.yaml
|
||||
- pvc.yaml
|
||||
- deployment.yaml
|
||||
4
bastion/deploy/k3s/namespace.yaml
Normal file
4
bastion/deploy/k3s/namespace.yaml
Normal file
@@ -0,0 +1,4 @@
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: lab-infra
|
||||
12
bastion/deploy/k3s/pvc.yaml
Normal file
12
bastion/deploy/k3s/pvc.yaml
Normal file
@@ -0,0 +1,12 @@
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: bastion-state
|
||||
namespace: lab-infra
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
storageClassName: local-path
|
||||
resources:
|
||||
requests:
|
||||
storage: 10Gi
|
||||
8
bastion/deploy/k8s/labd/base/configmap.yaml
Normal file
8
bastion/deploy/k8s/labd/base/configmap.yaml
Normal file
@@ -0,0 +1,8 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: labd-config
|
||||
data:
|
||||
LABD_PORT: "3100"
|
||||
LABD_HOST: "0.0.0.0"
|
||||
LABD_LOG_LEVEL: "info"
|
||||
44
bastion/deploy/k8s/labd/base/deployment.yaml
Normal file
44
bastion/deploy/k8s/labd/base/deployment.yaml
Normal file
@@ -0,0 +1,44 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: labd
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: labd
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: labd
|
||||
spec:
|
||||
containers:
|
||||
- name: labd
|
||||
image: mysources.co.uk/michal/lab/labd:latest
|
||||
imagePullPolicy: Always
|
||||
ports:
|
||||
- containerPort: 3100
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: labd-config
|
||||
- secretRef:
|
||||
name: labd-secrets
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health/live
|
||||
port: 3100
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 15
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /health/ready
|
||||
port: 3100
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 128Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
18
bastion/deploy/k8s/labd/base/hpa.yaml
Normal file
18
bastion/deploy/k8s/labd/base/hpa.yaml
Normal file
@@ -0,0 +1,18 @@
|
||||
apiVersion: autoscaling/v2
|
||||
kind: HorizontalPodAutoscaler
|
||||
metadata:
|
||||
name: labd
|
||||
spec:
|
||||
scaleTargetRef:
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: labd
|
||||
minReplicas: 2
|
||||
maxReplicas: 10
|
||||
metrics:
|
||||
- type: Resource
|
||||
resource:
|
||||
name: cpu
|
||||
target:
|
||||
type: Utilization
|
||||
averageUtilization: 70
|
||||
14
bastion/deploy/k8s/labd/base/kustomization.yaml
Normal file
14
bastion/deploy/k8s/labd/base/kustomization.yaml
Normal file
@@ -0,0 +1,14 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
namespace: lab-infra
|
||||
|
||||
commonLabels:
|
||||
app: labd
|
||||
|
||||
resources:
|
||||
- deployment.yaml
|
||||
- service.yaml
|
||||
- configmap.yaml
|
||||
- hpa.yaml
|
||||
- pdb.yaml
|
||||
9
bastion/deploy/k8s/labd/base/pdb.yaml
Normal file
9
bastion/deploy/k8s/labd/base/pdb.yaml
Normal file
@@ -0,0 +1,9 @@
|
||||
apiVersion: policy/v1
|
||||
kind: PodDisruptionBudget
|
||||
metadata:
|
||||
name: labd
|
||||
spec:
|
||||
maxUnavailable: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: labd
|
||||
12
bastion/deploy/k8s/labd/base/service.yaml
Normal file
12
bastion/deploy/k8s/labd/base/service.yaml
Normal file
@@ -0,0 +1,12 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: labd
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: labd
|
||||
ports:
|
||||
- port: 3100
|
||||
targetPort: 3100
|
||||
protocol: TCP
|
||||
26
bastion/eslint.config.js
Normal file
26
bastion/eslint.config.js
Normal file
@@ -0,0 +1,26 @@
|
||||
import tseslint from '@typescript-eslint/eslint-plugin';
|
||||
import tsparser from '@typescript-eslint/parser';
|
||||
|
||||
export default [
|
||||
{
|
||||
files: ['src/*/src/**/*.ts'],
|
||||
languageOptions: {
|
||||
parser: tsparser,
|
||||
parserOptions: {
|
||||
project: ['./src/*/tsconfig.json'],
|
||||
tsconfigRootDir: import.meta.dirname,
|
||||
},
|
||||
},
|
||||
plugins: { '@typescript-eslint': tseslint },
|
||||
rules: {
|
||||
'@typescript-eslint/explicit-function-return-type': 'error',
|
||||
'@typescript-eslint/no-explicit-any': 'error',
|
||||
'@typescript-eslint/no-unused-vars': 'error',
|
||||
'@typescript-eslint/strict-boolean-expressions': 'error',
|
||||
'no-console': ['warn', { allow: ['warn', 'error'] }],
|
||||
},
|
||||
},
|
||||
{
|
||||
ignores: ['**/dist/**', '**/node_modules/**', '**/*.config.*'],
|
||||
},
|
||||
];
|
||||
20
bastion/nfpm.yaml
Normal file
20
bastion/nfpm.yaml
Normal file
@@ -0,0 +1,20 @@
|
||||
name: labctl
|
||||
arch: amd64
|
||||
version: 0.1.0
|
||||
release: "1"
|
||||
maintainer: michal
|
||||
description: Lab infrastructure CLI for bare-metal provisioning
|
||||
license: MIT
|
||||
contents:
|
||||
- src: ./dist/labctl
|
||||
dst: /usr/bin/labctl
|
||||
file_info:
|
||||
mode: 0755
|
||||
- src: ./completions/labctl.bash
|
||||
dst: /usr/share/bash-completion/completions/labctl
|
||||
file_info:
|
||||
mode: 0644
|
||||
- src: ./completions/labctl.fish
|
||||
dst: /usr/share/fish/vendor_completions.d/labctl.fish
|
||||
file_info:
|
||||
mode: 0644
|
||||
43
bastion/package.json
Normal file
43
bastion/package.json
Normal file
@@ -0,0 +1,43 @@
|
||||
{
|
||||
"name": "lab",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"description": "PXE bastion server for discover-first bare-metal provisioning",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"build": "pnpm -r run build",
|
||||
"test": "vitest",
|
||||
"test:run": "vitest run",
|
||||
"typecheck": "tsc --build",
|
||||
"clean": "pnpm -r run clean && rimraf node_modules",
|
||||
"lint": "eslint 'src/*/src/**/*.ts'",
|
||||
"lint:fix": "eslint 'src/*/src/**/*.ts' --fix",
|
||||
"completions:generate": "tsx scripts/generate-completions.ts --write",
|
||||
"completions:check": "tsx scripts/generate-completions.ts --check",
|
||||
"test:integration": "vitest run -c tests/integration/vitest.config.ts",
|
||||
"test:integration:k3s": "vitest run -c tests/integration/vitest.config.ts -t k3s",
|
||||
"test:integration:k3s:host": "sudo -E $(which npx) vitest run -c tests/integration/vitest.config.ts -t k3s",
|
||||
"test:integration:pxe": "vitest run -c tests/integration/vitest.config.ts -t 'PXE boot'",
|
||||
"test:integration:pxe:host": "sudo -E $(which npx) vitest run -c tests/integration/vitest.config.ts -t 'PXE boot'",
|
||||
"test:integration:iso": "vitest run -c tests/integration/vitest.config.ts -t 'ISO boot'",
|
||||
"test:integration:iso:host": "sudo -E $(which npx) vitest run -c tests/integration/vitest.config.ts -t 'ISO boot'",
|
||||
"test:integration:arm-iso": "vitest run -c tests/integration/vitest.config.ts -t 'ARM ISO'",
|
||||
"test:integration:arm-iso:host": "sudo -E $(which npx) vitest run -c tests/integration/vitest.config.ts -t 'ARM ISO'"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=20.0.0",
|
||||
"pnpm": ">=9.0.0"
|
||||
},
|
||||
"packageManager": "pnpm@9.15.0",
|
||||
"devDependencies": {
|
||||
"@types/node": "^22.10.0",
|
||||
"@typescript-eslint/eslint-plugin": "^8.57.1",
|
||||
"@typescript-eslint/parser": "^8.57.1",
|
||||
"eslint": "^10.0.3",
|
||||
"eslint-config-prettier": "^10.1.8",
|
||||
"rimraf": "^6.0.0",
|
||||
"tsx": "^4.21.0",
|
||||
"typescript": "^5.7.0",
|
||||
"vitest": "^3.0.0"
|
||||
}
|
||||
}
|
||||
3646
bastion/pnpm-lock.yaml
generated
Normal file
3646
bastion/pnpm-lock.yaml
generated
Normal file
File diff suppressed because it is too large
Load Diff
2
bastion/pnpm-workspace.yaml
Normal file
2
bastion/pnpm-workspace.yaml
Normal file
@@ -0,0 +1,2 @@
|
||||
packages:
|
||||
- "src/*"
|
||||
127
bastion/scripts/build-bastion.sh
Executable file
127
bastion/scripts/build-bastion.sh
Executable file
@@ -0,0 +1,127 @@
|
||||
#!/bin/bash
|
||||
# Build bastion container image (multi-arch) and push to Gitea container registry
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
|
||||
cd "$PROJECT_ROOT"
|
||||
|
||||
# Load .env for GITEA_TOKEN
|
||||
if [ -f .env ]; then
|
||||
set -a; source .env; set +a
|
||||
fi
|
||||
|
||||
# ── Argument parsing ───────────────────────────────────────────────
|
||||
PUSH=false
|
||||
PLATFORMS="linux/amd64,linux/arm64"
|
||||
|
||||
usage() {
|
||||
cat <<EOF
|
||||
Usage: $(basename "$0") [OPTIONS] [TAG]
|
||||
|
||||
Build bastion container image (multi-arch) and optionally push to registry.
|
||||
|
||||
Options:
|
||||
--push Push to registry after building
|
||||
--platforms LIST Comma-separated platforms (default: linux/amd64,linux/arm64)
|
||||
-h, --help Show this help message
|
||||
|
||||
Arguments:
|
||||
TAG Image tag (default: version from package.json)
|
||||
|
||||
Examples:
|
||||
$(basename "$0") # build multi-arch, no push
|
||||
$(basename "$0") --push # build + push with version tag
|
||||
$(basename "$0") --push latest # build + push as :latest
|
||||
$(basename "$0") --platforms linux/amd64 # build amd64 only
|
||||
EOF
|
||||
exit 0
|
||||
}
|
||||
|
||||
POSITIONAL_ARGS=()
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--push)
|
||||
PUSH=true
|
||||
shift
|
||||
;;
|
||||
--platforms)
|
||||
PLATFORMS="$2"
|
||||
shift 2
|
||||
;;
|
||||
-h|--help)
|
||||
usage
|
||||
;;
|
||||
*)
|
||||
POSITIONAL_ARGS+=("$1")
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
REGISTRY="${GITEA_REGISTRY:-mysources.co.uk}"
|
||||
REPO="michal/lab/bastion"
|
||||
FULL_IMAGE="$REGISTRY/$REPO"
|
||||
VERSION=$(node -p "require('./package.json').version")
|
||||
TAG="${POSITIONAL_ARGS[0]:-$VERSION}"
|
||||
|
||||
echo "==> Building bastion image"
|
||||
echo " Tag: $TAG"
|
||||
echo " Platforms: $PLATFORMS"
|
||||
echo " Registry: $FULL_IMAGE"
|
||||
|
||||
# ── Build multi-arch manifest ────────────────────────────────────
|
||||
MANIFEST="lab-bastion:$TAG"
|
||||
|
||||
# Remove existing manifest/image with the same tag
|
||||
podman manifest rm "$MANIFEST" 2>/dev/null || true
|
||||
podman rmi "$MANIFEST" 2>/dev/null || true
|
||||
|
||||
echo "==> Building for platforms: $PLATFORMS..."
|
||||
podman build \
|
||||
--platform "$PLATFORMS" \
|
||||
--manifest "$MANIFEST" \
|
||||
-f Dockerfile.bastion \
|
||||
.
|
||||
|
||||
echo "==> Build complete. Manifest:"
|
||||
podman manifest inspect "$MANIFEST" | grep -E '"(architecture|os)"'
|
||||
|
||||
# ── Push ─────────────────────────────────────────────────────────
|
||||
if [ "$PUSH" = true ]; then
|
||||
if [ -z "$GITEA_TOKEN" ]; then
|
||||
# Try reading from ~/.gitea-token
|
||||
if [ -f "$HOME/.gitea-token" ]; then
|
||||
GITEA_TOKEN="$(cat "$HOME/.gitea-token")"
|
||||
else
|
||||
echo "ERROR: GITEA_TOKEN not set and ~/.gitea-token not found"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "==> Logging in to $REGISTRY..."
|
||||
podman login -u michal -p "$GITEA_TOKEN" "$REGISTRY"
|
||||
|
||||
echo "==> Pushing $FULL_IMAGE:$TAG..."
|
||||
podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:$TAG"
|
||||
|
||||
# Also tag as :latest if not already
|
||||
if [ "$TAG" != "latest" ]; then
|
||||
echo "==> Also pushing as :latest..."
|
||||
podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:latest"
|
||||
fi
|
||||
|
||||
# Link package to repository if script exists
|
||||
if [ -f "$SCRIPT_DIR/link-package.sh" ]; then
|
||||
source "$SCRIPT_DIR/link-package.sh"
|
||||
link_package "container" "bastion"
|
||||
fi
|
||||
|
||||
echo "==> Pushed successfully!"
|
||||
else
|
||||
echo "==> Skipping push (use --push to push to registry)"
|
||||
fi
|
||||
|
||||
echo "==> Done!"
|
||||
echo " Image: $FULL_IMAGE:$TAG"
|
||||
echo " Platforms: $PLATFORMS"
|
||||
118
bastion/scripts/build-labd.sh
Executable file
118
bastion/scripts/build-labd.sh
Executable file
@@ -0,0 +1,118 @@
|
||||
#!/bin/bash
|
||||
# Build labd container image (multi-arch) and push to Gitea container registry
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
|
||||
cd "$PROJECT_ROOT"
|
||||
|
||||
# Load .env for GITEA_TOKEN
|
||||
if [ -f .env ]; then
|
||||
set -a; source .env; set +a
|
||||
fi
|
||||
|
||||
# ── Argument parsing ───────────────────────────────────────────────
|
||||
PUSH=false
|
||||
PLATFORMS="linux/amd64,linux/arm64"
|
||||
|
||||
usage() {
|
||||
cat <<EOF
|
||||
Usage: $(basename "$0") [OPTIONS] [TAG]
|
||||
|
||||
Build labd container image (multi-arch) and optionally push to registry.
|
||||
|
||||
Options:
|
||||
--push Push to registry after building
|
||||
--platforms LIST Comma-separated platforms (default: linux/amd64,linux/arm64)
|
||||
-h, --help Show this help message
|
||||
|
||||
Arguments:
|
||||
TAG Image tag (default: version from package.json)
|
||||
EOF
|
||||
exit 0
|
||||
}
|
||||
|
||||
POSITIONAL_ARGS=()
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--push)
|
||||
PUSH=true
|
||||
shift
|
||||
;;
|
||||
--platforms)
|
||||
PLATFORMS="$2"
|
||||
shift 2
|
||||
;;
|
||||
-h|--help)
|
||||
usage
|
||||
;;
|
||||
*)
|
||||
POSITIONAL_ARGS+=("$1")
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
REGISTRY="${GITEA_REGISTRY:-mysources.co.uk}"
|
||||
REPO="michal/lab/labd"
|
||||
FULL_IMAGE="$REGISTRY/$REPO"
|
||||
VERSION=$(node -p "require('./package.json').version")
|
||||
TAG="${POSITIONAL_ARGS[0]:-$VERSION}"
|
||||
|
||||
echo "==> Building labd image"
|
||||
echo " Tag: $TAG"
|
||||
echo " Platforms: $PLATFORMS"
|
||||
echo " Registry: $FULL_IMAGE"
|
||||
|
||||
# ── Build multi-arch manifest ────────────────────────────────────
|
||||
MANIFEST="lab-labd:$TAG"
|
||||
|
||||
# Remove existing manifest/image with the same tag
|
||||
podman manifest rm "$MANIFEST" 2>/dev/null || true
|
||||
podman rmi "$MANIFEST" 2>/dev/null || true
|
||||
|
||||
echo "==> Building for platforms: $PLATFORMS..."
|
||||
podman build \
|
||||
--platform "$PLATFORMS" \
|
||||
--manifest "$MANIFEST" \
|
||||
-f Dockerfile.labd \
|
||||
.
|
||||
|
||||
echo "==> Build complete. Manifest:"
|
||||
podman manifest inspect "$MANIFEST" | grep -E '"(architecture|os)"'
|
||||
|
||||
# ── Push ─────────────────────────────────────────────────────────
|
||||
if [ "$PUSH" = true ]; then
|
||||
if [ -z "$GITEA_TOKEN" ]; then
|
||||
if [ -f "$HOME/.gitea-token" ]; then
|
||||
GITEA_TOKEN="$(cat "$HOME/.gitea-token")"
|
||||
else
|
||||
echo "ERROR: GITEA_TOKEN not set and ~/.gitea-token not found"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "==> Logging in to $REGISTRY..."
|
||||
podman login -u michal -p "$GITEA_TOKEN" "$REGISTRY"
|
||||
|
||||
echo "==> Pushing $FULL_IMAGE:$TAG..."
|
||||
podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:$TAG"
|
||||
|
||||
if [ "$TAG" != "latest" ]; then
|
||||
echo "==> Also pushing as :latest..."
|
||||
podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:latest"
|
||||
fi
|
||||
|
||||
if [ -f "$SCRIPT_DIR/link-package.sh" ]; then
|
||||
source "$SCRIPT_DIR/link-package.sh"
|
||||
link_package "container" "labd"
|
||||
fi
|
||||
|
||||
echo "==> Pushed successfully!"
|
||||
else
|
||||
echo "==> Skipping push (use --push to push to registry)"
|
||||
fi
|
||||
|
||||
echo "==> Done!"
|
||||
echo " Image: $FULL_IMAGE:$TAG"
|
||||
echo " Platforms: $PLATFORMS"
|
||||
180
bastion/scripts/build-rpm.sh
Executable file
180
bastion/scripts/build-rpm.sh
Executable file
@@ -0,0 +1,180 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
|
||||
cd "$PROJECT_ROOT"
|
||||
|
||||
# Load .env if present
|
||||
if [ -f .env ]; then
|
||||
set -a; source .env; set +a
|
||||
fi
|
||||
|
||||
# Ensure tools are on PATH
|
||||
export PATH="$HOME/.npm-global/bin:$HOME/.bun/bin:$HOME/.local/bin:$PATH"
|
||||
|
||||
# ── Argument parsing ───────────────────────────────────────────────
|
||||
BUILD_ALL=false
|
||||
TARGET_ARCH=""
|
||||
SKIP_TESTS=false
|
||||
|
||||
usage() {
|
||||
cat <<EOF
|
||||
Usage: $(basename "$0") [OPTIONS]
|
||||
|
||||
Build labctl binary and produce RPM/DEB packages.
|
||||
|
||||
Options:
|
||||
--arch ARCH Target architecture: x86_64 or arm64 (default: host arch)
|
||||
--all Build for both x86_64 and arm64
|
||||
--skip-tests Skip unit tests (useful in CI where tests ran separately)
|
||||
-h, --help Show this help message
|
||||
EOF
|
||||
exit 0
|
||||
}
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--arch)
|
||||
TARGET_ARCH="$2"
|
||||
shift 2
|
||||
;;
|
||||
--all)
|
||||
BUILD_ALL=true
|
||||
shift
|
||||
;;
|
||||
--skip-tests)
|
||||
SKIP_TESTS=true
|
||||
shift
|
||||
;;
|
||||
-h|--help)
|
||||
usage
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $1"
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# ── Resolve host architecture ─────────────────────────────────────
|
||||
detect_host_arch() {
|
||||
local machine
|
||||
machine="$(uname -m)"
|
||||
case "$machine" in
|
||||
x86_64) echo "x86_64" ;;
|
||||
aarch64) echo "arm64" ;;
|
||||
arm64) echo "arm64" ;;
|
||||
*) echo "$machine" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
# ── Architecture mapping helpers ──────────────────────────────────
|
||||
# Maps our canonical arch names to the values each tool expects.
|
||||
bun_target_for() {
|
||||
case "$1" in
|
||||
x86_64) echo "bun-linux-x64" ;;
|
||||
arm64) echo "bun-linux-arm64" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
nfpm_arch_for() {
|
||||
case "$1" in
|
||||
x86_64) echo "amd64" ;;
|
||||
arm64) echo "arm64" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
rpm_arch_for() {
|
||||
case "$1" in
|
||||
x86_64) echo "x86_64" ;;
|
||||
arm64) echo "aarch64" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
deb_arch_for() {
|
||||
case "$1" in
|
||||
x86_64) echo "amd64" ;;
|
||||
arm64) echo "arm64" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
# ── Build one architecture ────────────────────────────────────────
|
||||
build_arch() {
|
||||
local arch="$1"
|
||||
local bun_target nfpm_arch binary_name
|
||||
|
||||
bun_target="$(bun_target_for "$arch")"
|
||||
nfpm_arch="$(nfpm_arch_for "$arch")"
|
||||
binary_name="dist/labctl-${arch}"
|
||||
|
||||
echo ""
|
||||
echo "==> Bundling standalone binary for ${arch}..."
|
||||
bun build src/cli/src/index.ts --compile --target="${bun_target}" --outfile "${binary_name}"
|
||||
|
||||
echo "==> Packaging RPM (${arch})..."
|
||||
# Create a temporary nfpm config with the correct arch and binary path
|
||||
local tmpconfig
|
||||
tmpconfig="$(mktemp /tmp/nfpm-XXXXXX.yaml)"
|
||||
sed -e "s|^arch:.*|arch: ${nfpm_arch}|" \
|
||||
-e "s|src: ./dist/labctl$|src: ./${binary_name}|" \
|
||||
nfpm.yaml > "$tmpconfig"
|
||||
|
||||
nfpm pkg --config "$tmpconfig" --packager rpm --target dist/
|
||||
rm -f "$tmpconfig"
|
||||
|
||||
local rpm_arch
|
||||
rpm_arch="$(rpm_arch_for "$arch")"
|
||||
RPM_FILE=$(ls dist/labctl-*.${rpm_arch}.rpm 2>/dev/null | head -1)
|
||||
echo "==> Built: $RPM_FILE"
|
||||
echo " Size: $(du -h "$RPM_FILE" | cut -f1)"
|
||||
|
||||
echo ""
|
||||
echo "==> Packaging DEB (${arch})..."
|
||||
local deb_arch
|
||||
deb_arch="$(deb_arch_for "$arch")"
|
||||
|
||||
tmpconfig="$(mktemp /tmp/nfpm-XXXXXX.yaml)"
|
||||
sed -e "s|^arch:.*|arch: ${nfpm_arch}|" \
|
||||
-e "s|src: ./dist/labctl$|src: ./${binary_name}|" \
|
||||
nfpm.yaml > "$tmpconfig"
|
||||
|
||||
nfpm pkg --config "$tmpconfig" --packager deb --target dist/
|
||||
rm -f "$tmpconfig"
|
||||
|
||||
DEB_FILE=$(ls dist/labctl_*_${deb_arch}.deb 2>/dev/null | head -1)
|
||||
echo "==> Built: $DEB_FILE"
|
||||
echo " Size: $(du -h "$DEB_FILE" | cut -f1)"
|
||||
}
|
||||
|
||||
# ── Main ──────────────────────────────────────────────────────────
|
||||
|
||||
if [ "$SKIP_TESTS" = false ]; then
|
||||
echo "==> Running unit tests..."
|
||||
pnpm test:run
|
||||
echo ""
|
||||
fi
|
||||
|
||||
echo "==> Building TypeScript..."
|
||||
pnpm build
|
||||
|
||||
echo "==> Generating shell completions..."
|
||||
pnpm completions:generate
|
||||
|
||||
mkdir -p dist
|
||||
rm -f dist/labctl dist/labctl-x86_64 dist/labctl-arm64 dist/labctl-*.rpm dist/labctl*.deb
|
||||
|
||||
if [ "$BUILD_ALL" = true ]; then
|
||||
build_arch "x86_64"
|
||||
build_arch "arm64"
|
||||
elif [ -n "$TARGET_ARCH" ]; then
|
||||
build_arch "$TARGET_ARCH"
|
||||
else
|
||||
# Default to host architecture
|
||||
HOST_ARCH="$(detect_host_arch)"
|
||||
build_arch "$HOST_ARCH"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "==> Build complete. Artifacts in dist/:"
|
||||
ls -lh dist/labctl* 2>/dev/null || echo " (none)"
|
||||
444
bastion/scripts/generate-completions.ts
Normal file
444
bastion/scripts/generate-completions.ts
Normal file
@@ -0,0 +1,444 @@
|
||||
#!/usr/bin/env tsx
|
||||
/**
|
||||
* generate-completions.ts -- auto-generates shell completions from the commander.js command tree.
|
||||
*
|
||||
* Usage:
|
||||
* tsx scripts/generate-completions.ts # print generated files to stdout
|
||||
* tsx scripts/generate-completions.ts --write # write completions/ files
|
||||
* tsx scripts/generate-completions.ts --check # exit 0 if files match, 1 if stale
|
||||
*
|
||||
* Requires `pnpm build` to have run first (workspace packages must be compiled).
|
||||
*/
|
||||
|
||||
import { Command, type Option, type Argument } from 'commander';
|
||||
import { readFileSync, writeFileSync, mkdirSync } from 'node:fs';
|
||||
import { join, dirname } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const ROOT = join(__dirname, '..');
|
||||
|
||||
// ============================================================
|
||||
// Command tree extraction
|
||||
// ============================================================
|
||||
|
||||
interface CmdInfo {
|
||||
name: string;
|
||||
description: string;
|
||||
hidden: boolean;
|
||||
options: OptInfo[];
|
||||
args: ArgInfo[];
|
||||
subcommands: CmdInfo[];
|
||||
}
|
||||
|
||||
interface OptInfo {
|
||||
short?: string;
|
||||
long: string;
|
||||
description: string;
|
||||
takesValue: boolean;
|
||||
choices?: string[];
|
||||
negate: boolean;
|
||||
}
|
||||
|
||||
interface ArgInfo {
|
||||
name: string;
|
||||
description: string;
|
||||
required: boolean;
|
||||
variadic: boolean;
|
||||
choices?: string[];
|
||||
}
|
||||
|
||||
function extractOption(opt: Option): OptInfo {
|
||||
return {
|
||||
short: (opt as unknown as Record<string, string>).short || undefined,
|
||||
long: (opt as unknown as Record<string, string>).long,
|
||||
description: opt.description,
|
||||
takesValue: (opt as unknown as Record<string, boolean>).required || (opt as unknown as Record<string, boolean>).optional || false,
|
||||
choices: (opt as unknown as Record<string, string[] | undefined>).argChoices || undefined,
|
||||
negate: (opt as unknown as Record<string, boolean>).negate || false,
|
||||
};
|
||||
}
|
||||
|
||||
function extractArgument(arg: Argument): ArgInfo {
|
||||
return {
|
||||
name: (arg as unknown as Record<string, string>)._name ?? arg.name(),
|
||||
description: arg.description,
|
||||
required: (arg as unknown as Record<string, boolean>).required,
|
||||
variadic: (arg as unknown as Record<string, boolean>).variadic,
|
||||
choices: (arg as unknown as Record<string, string[] | undefined>)._choices || undefined,
|
||||
};
|
||||
}
|
||||
|
||||
function extractCommand(cmd: Command): CmdInfo {
|
||||
const options = (cmd.options as Option[])
|
||||
.filter((o) => {
|
||||
const long = (o as unknown as Record<string, string>).long;
|
||||
return long !== '--help' && long !== '--version';
|
||||
})
|
||||
.map(extractOption);
|
||||
|
||||
const args = ((cmd as unknown as Record<string, Argument[]>).registeredArguments ?? [])
|
||||
.map(extractArgument);
|
||||
|
||||
const subcommands = (cmd.commands as Command[])
|
||||
.filter((sub) => sub.name() !== 'help')
|
||||
.map(extractCommand);
|
||||
|
||||
if ((cmd.commands as Command[]).some((sub) => sub.name() === 'help')) {
|
||||
subcommands.push({
|
||||
name: 'help',
|
||||
description: 'display help for command',
|
||||
hidden: false,
|
||||
options: [],
|
||||
args: [],
|
||||
subcommands: [],
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
name: cmd.name(),
|
||||
description: cmd.description(),
|
||||
hidden: (cmd as unknown as Record<string, boolean>)._hidden ?? false,
|
||||
options,
|
||||
args,
|
||||
subcommands,
|
||||
};
|
||||
}
|
||||
|
||||
async function extractTree(): Promise<CmdInfo> {
|
||||
const { createProgram } = await import('../src/cli/src/index.js') as { createProgram: () => Command };
|
||||
const program = createProgram();
|
||||
return extractCommand(program);
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// Utilities
|
||||
// ============================================================
|
||||
|
||||
function esc(s: string): string {
|
||||
return s.replace(/'/g, "\\'");
|
||||
}
|
||||
|
||||
/** Collect all commands recursively with their full path. */
|
||||
function collectCommands(cmd: CmdInfo, prefix: string[] = []): { path: string[]; cmd: CmdInfo }[] {
|
||||
const result: { path: string[]; cmd: CmdInfo }[] = [];
|
||||
for (const sub of cmd.subcommands) {
|
||||
const fullPath = [...prefix, sub.name];
|
||||
result.push({ path: fullPath, cmd: sub });
|
||||
result.push(...collectCommands(sub, fullPath));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// Fish completion generator
|
||||
// ============================================================
|
||||
|
||||
function generateFish(root: CmdInfo): string {
|
||||
const lines: string[] = [];
|
||||
const emit = (s: string): void => { lines.push(s); };
|
||||
const BIN = root.name;
|
||||
|
||||
emit(`# ${BIN} fish completions -- auto-generated by scripts/generate-completions.ts`);
|
||||
emit('# DO NOT EDIT MANUALLY -- run: pnpm completions:generate');
|
||||
emit('');
|
||||
emit(`complete -c ${BIN} -e`);
|
||||
emit(`complete -c ${BIN} -f`);
|
||||
emit('');
|
||||
|
||||
// Global options
|
||||
emit('# Global options');
|
||||
emit(`complete -c ${BIN} -s v -l version -d 'Show version'`);
|
||||
emit(`complete -c ${BIN} -s h -l help -d 'Show help'`);
|
||||
emit('');
|
||||
|
||||
const allCmds = collectCommands(root);
|
||||
|
||||
// Helper: test if EXACTLY the given subcommand chain is present (for subcommand suggestions)
|
||||
emit('# Helper: test if exactly a subcommand chain is active (no extra positional args)');
|
||||
emit(`function __${BIN}_using_cmd`);
|
||||
emit(' set -l tokens (commandline -opc)');
|
||||
emit(' set -l expected $argv');
|
||||
emit(' set -l depth (count $expected)');
|
||||
emit(' set -l found 0');
|
||||
emit(' set -l i 1');
|
||||
emit(' for tok in $tokens[2..]');
|
||||
emit(' if string match -q -- "-*" $tok');
|
||||
emit(' continue');
|
||||
emit(' end');
|
||||
emit(' set i (math $i + 1)');
|
||||
emit(' set -l idx (math $i - 1)');
|
||||
emit(' if test $idx -le $depth');
|
||||
emit(' if test "$tok" != "$expected[$idx]"');
|
||||
emit(' return 1');
|
||||
emit(' end');
|
||||
emit(' set found (math $found + 1)');
|
||||
emit(' else');
|
||||
emit(' return 1');
|
||||
emit(' end');
|
||||
emit(' end');
|
||||
emit(' test $found -eq $depth');
|
||||
emit('end');
|
||||
emit('');
|
||||
|
||||
// Helper: test if command chain STARTS WITH the given prefix (for options that apply after args)
|
||||
emit('# Helper: test if command starts with a subcommand chain (options still apply after args)');
|
||||
emit(`function __${BIN}_in_cmd`);
|
||||
emit(' set -l tokens (commandline -opc)');
|
||||
emit(' set -l expected $argv');
|
||||
emit(' set -l depth (count $expected)');
|
||||
emit(' set -l found 0');
|
||||
emit(' for tok in $tokens[2..]');
|
||||
emit(' if string match -q -- "-*" $tok');
|
||||
emit(' continue');
|
||||
emit(' end');
|
||||
emit(' set found (math $found + 1)');
|
||||
emit(' if test $found -le $depth');
|
||||
emit(' if test "$tok" != "$expected[$found]"');
|
||||
emit(' return 1');
|
||||
emit(' end');
|
||||
emit(' end');
|
||||
emit(' end');
|
||||
emit(' test $found -ge $depth');
|
||||
emit('end');
|
||||
emit('');
|
||||
|
||||
// Dynamic completions: fetch machine data from bastion API
|
||||
emit('# Dynamic: fetch machine hostnames from bastion (installed + queued)');
|
||||
emit(`function __${BIN}_installed_hosts`);
|
||||
emit(' curl -s http://localhost:8080/api/machines 2>/dev/null | ');
|
||||
emit(" python3 -c 'import sys,json; d=json.load(sys.stdin); hosts=[v.get(\"hostname\",\"\") for v in {**d.get(\"install_queue\",{}), **d.get(\"installed\",{})}.values() if v.get(\"hostname\")]; [print(h) for h in set(hosts)]' 2>/dev/null");
|
||||
emit('end');
|
||||
emit('');
|
||||
|
||||
emit('# Dynamic: fetch all known MAC addresses (discovered + queue + installed)');
|
||||
emit(`function __${BIN}_known_macs`);
|
||||
emit(' curl -s http://localhost:8080/api/machines 2>/dev/null | ');
|
||||
emit(" python3 -c 'import sys,json; d=json.load(sys.stdin); [print(k) for k in {**d.get(\"discovered\",{}), **d.get(\"install_queue\",{}), **d.get(\"installed\",{})}]' 2>/dev/null");
|
||||
emit('end');
|
||||
emit('');
|
||||
|
||||
emit('# Dynamic: fetch hostnames and MACs from all states');
|
||||
emit(`function __${BIN}_hosts_and_macs`);
|
||||
emit(' curl -s http://localhost:8080/api/machines 2>/dev/null | ');
|
||||
emit(" python3 -c 'import sys,json; d=json.load(sys.stdin); a={**d.get(\"discovered\",{}), **d.get(\"install_queue\",{}), **d.get(\"installed\",{})}; macs=list(a.keys()); hosts=[v.get(\"hostname\",\"\") for v in {**d.get(\"install_queue\",{}), **d.get(\"installed\",{})}.values() if v.get(\"hostname\")]; [print(x) for x in set(macs+hosts)]' 2>/dev/null");
|
||||
emit('end');
|
||||
emit('');
|
||||
|
||||
// Target completions for commands that accept hostname/IP/MAC
|
||||
emit('# Target argument completions');
|
||||
// app k3s — takes hostname/IP
|
||||
emit(`complete -c ${BIN} -n "__${BIN}_using_cmd app k3s install" -a "(__${BIN}_installed_hosts)" -d 'installed host'`);
|
||||
emit(`complete -c ${BIN} -n "__${BIN}_using_cmd app k3s health" -a "(__${BIN}_installed_hosts)" -d 'installed host'`);
|
||||
emit(`complete -c ${BIN} -n "__${BIN}_using_cmd app labcontroller deploy" -a "(__${BIN}_installed_hosts)" -d 'installed host'`);
|
||||
emit(`complete -c ${BIN} -n "__${BIN}_using_cmd app labcontroller status" -a "(__${BIN}_installed_hosts)" -d 'installed host'`);
|
||||
// provision install — takes MAC then hostname
|
||||
emit(`complete -c ${BIN} -n "__${BIN}_using_cmd provision install" -a "(__${BIN}_known_macs)" -d 'MAC address'`);
|
||||
// provision reprovision/forget/logs — takes MAC or hostname
|
||||
emit(`complete -c ${BIN} -n "__${BIN}_using_cmd provision reprovision" -a "(__${BIN}_hosts_and_macs)" -d 'host or MAC'`);
|
||||
emit(`complete -c ${BIN} -n "__${BIN}_using_cmd provision forget" -a "(__${BIN}_hosts_and_macs)" -d 'host or MAC'`);
|
||||
emit(`complete -c ${BIN} -n "__${BIN}_using_cmd provision logs" -a "(__${BIN}_hosts_and_macs)" -d 'host or MAC'`);
|
||||
emit('');
|
||||
|
||||
// Top-level commands
|
||||
const topCmds = root.subcommands.filter((c) => !c.hidden);
|
||||
emit('# Top-level commands');
|
||||
for (const cmd of topCmds) {
|
||||
emit(`complete -c ${BIN} -n "not __fish_seen_subcommand_from ${topCmds.map((c) => c.name).join(' ')}" -a ${cmd.name} -d '${esc(cmd.description)}'`);
|
||||
}
|
||||
emit('');
|
||||
|
||||
// Subcommands and options at each level
|
||||
for (const { path, cmd } of allCmds) {
|
||||
if (cmd.hidden) continue;
|
||||
|
||||
// If this command has subcommands, offer them
|
||||
const visibleSubs = cmd.subcommands.filter((s) => !s.hidden);
|
||||
if (visibleSubs.length > 0) {
|
||||
const parentCondition = `__${BIN}_using_cmd ${path.join(' ')}`;
|
||||
emit(`# ${path.join(' ')} subcommands`);
|
||||
for (const sub of visibleSubs) {
|
||||
emit(`complete -c ${BIN} -n "${parentCondition}" -a ${sub.name} -d '${esc(sub.description)}'`);
|
||||
}
|
||||
emit('');
|
||||
}
|
||||
|
||||
// Options for this command (use __in_cmd so options complete even after positional args)
|
||||
if (cmd.options.length > 0) {
|
||||
const condition = `__${BIN}_in_cmd ${path.join(' ')}`;
|
||||
emit(`# ${path.join(' ')} options`);
|
||||
for (const opt of cmd.options) {
|
||||
const parts = [`complete -c ${BIN} -n "${condition}"`];
|
||||
if (opt.short) parts.push(`-s ${opt.short.replace('-', '')}`);
|
||||
parts.push(`-l ${opt.long.replace(/^--/, '')}`);
|
||||
parts.push(`-d '${esc(opt.description)}'`);
|
||||
if (opt.takesValue) {
|
||||
if (opt.choices) {
|
||||
parts.push(`-xa '${opt.choices.join(' ')}'`);
|
||||
} else {
|
||||
parts.push('-x');
|
||||
}
|
||||
}
|
||||
emit(parts.join(' '));
|
||||
}
|
||||
emit('');
|
||||
}
|
||||
}
|
||||
|
||||
return lines.join('\n') + '\n';
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// Bash completion generator
|
||||
// ============================================================
|
||||
|
||||
function generateBash(root: CmdInfo): string {
|
||||
const lines: string[] = [];
|
||||
const emit = (s: string): void => { lines.push(s); };
|
||||
const BIN = root.name;
|
||||
|
||||
emit(`# ${BIN} bash completions -- auto-generated by scripts/generate-completions.ts`);
|
||||
emit('# DO NOT EDIT MANUALLY -- run: pnpm completions:generate');
|
||||
emit('');
|
||||
|
||||
const allCmds = collectCommands(root);
|
||||
const topCmds = root.subcommands.filter((c) => !c.hidden).map((c) => c.name);
|
||||
|
||||
emit(`_${BIN}() {`);
|
||||
emit(' local cur prev words cword');
|
||||
emit(' _init_completion || return');
|
||||
emit('');
|
||||
emit(` local top_commands="${topCmds.join(' ')}"`);
|
||||
emit('');
|
||||
|
||||
// Build chain of subcommands from command line
|
||||
emit(' # Extract the subcommand chain (skip options and their values)');
|
||||
emit(' local -a subcmd_chain=()');
|
||||
emit(' local i skip_next=false');
|
||||
emit(' for ((i=1; i < cword; i++)); do');
|
||||
emit(' if $skip_next; then skip_next=false; continue; fi');
|
||||
emit(' case "${words[i]}" in');
|
||||
emit(' -*) ;; # skip options');
|
||||
emit(' *) subcmd_chain+=("${words[i]}") ;;');
|
||||
emit(' esac');
|
||||
emit(' done');
|
||||
emit('');
|
||||
emit(' local chain_len=${#subcmd_chain[@]}');
|
||||
emit(' local chain_str="${subcmd_chain[*]}"');
|
||||
emit('');
|
||||
|
||||
// Build case statement for each command path
|
||||
emit(' case "$chain_str" in');
|
||||
|
||||
// Start with the deepest paths first to match longest
|
||||
const sortedCmds = [...allCmds].sort((a, b) => b.path.length - a.path.length);
|
||||
|
||||
for (const { path, cmd } of sortedCmds) {
|
||||
if (cmd.hidden) continue;
|
||||
const pathStr = path.join(' ');
|
||||
const visibleSubs = cmd.subcommands.filter((s) => !s.hidden).map((s) => s.name);
|
||||
const optFlags: string[] = [];
|
||||
for (const opt of cmd.options) {
|
||||
if (opt.short) optFlags.push(opt.short);
|
||||
optFlags.push(opt.long);
|
||||
}
|
||||
optFlags.push('-h', '--help');
|
||||
|
||||
const completions = [...visibleSubs, ...optFlags].join(' ');
|
||||
emit(` "${pathStr}")`);
|
||||
emit(` COMPREPLY=($(compgen -W "${completions}" -- "$cur"))`);
|
||||
emit(' return ;;');
|
||||
}
|
||||
|
||||
// Top-level (no subcommand yet)
|
||||
emit(' "")');
|
||||
emit(` COMPREPLY=($(compgen -W "$top_commands -h --help -v --version" -- "$cur"))`);
|
||||
emit(' return ;;');
|
||||
|
||||
// Default
|
||||
emit(' *)');
|
||||
emit(' COMPREPLY=($(compgen -W "-h --help" -- "$cur"))');
|
||||
emit(' return ;;');
|
||||
|
||||
emit(' esac');
|
||||
emit('}');
|
||||
emit('');
|
||||
emit(`complete -F _${BIN} ${BIN}`);
|
||||
|
||||
return lines.join('\n') + '\n';
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// Main
|
||||
// ============================================================
|
||||
|
||||
async function main(): Promise<void> {
|
||||
const mode = process.argv[2] ?? '';
|
||||
|
||||
let tree: CmdInfo;
|
||||
try {
|
||||
tree = await extractTree();
|
||||
} catch (err) {
|
||||
console.error('Failed to extract command tree from createProgram().');
|
||||
console.error('Make sure workspace packages are built: pnpm build');
|
||||
console.error(err);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const fishContent = generateFish(tree);
|
||||
const bashContent = generateBash(tree);
|
||||
|
||||
const completionsDir = join(ROOT, 'completions');
|
||||
const fishPath = join(completionsDir, 'labctl.fish');
|
||||
const bashPath = join(completionsDir, 'labctl.bash');
|
||||
|
||||
if (mode === '--check') {
|
||||
let stale = false;
|
||||
try {
|
||||
const currentFish = readFileSync(fishPath, 'utf-8');
|
||||
if (currentFish !== fishContent) {
|
||||
console.error('completions/labctl.fish is stale');
|
||||
stale = true;
|
||||
}
|
||||
} catch {
|
||||
console.error('completions/labctl.fish does not exist');
|
||||
stale = true;
|
||||
}
|
||||
try {
|
||||
const currentBash = readFileSync(bashPath, 'utf-8');
|
||||
if (currentBash !== bashContent) {
|
||||
console.error('completions/labctl.bash is stale');
|
||||
stale = true;
|
||||
}
|
||||
} catch {
|
||||
console.error('completions/labctl.bash does not exist');
|
||||
stale = true;
|
||||
}
|
||||
if (stale) {
|
||||
console.error('Run: pnpm completions:generate');
|
||||
process.exit(1);
|
||||
}
|
||||
console.log('Completions are up to date.');
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
if (mode === '--write') {
|
||||
mkdirSync(completionsDir, { recursive: true });
|
||||
writeFileSync(fishPath, fishContent);
|
||||
writeFileSync(bashPath, bashContent);
|
||||
console.log(`Wrote ${fishPath}`);
|
||||
console.log(`Wrote ${bashPath}`);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
// Default: print to stdout
|
||||
console.log('=== completions/labctl.fish ===');
|
||||
console.log(fishContent);
|
||||
console.log('=== completions/labctl.bash ===');
|
||||
console.log(bashContent);
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error(err);
|
||||
process.exit(1);
|
||||
});
|
||||
65
bastion/scripts/link-package.sh
Executable file
65
bastion/scripts/link-package.sh
Executable file
@@ -0,0 +1,65 @@
|
||||
#!/bin/bash
|
||||
# Link a Gitea package to a repository.
|
||||
# Works automatically on Gitea 1.24+ (uses API), warns on older versions.
|
||||
#
|
||||
# Usage: source scripts/link-package.sh
|
||||
# link_package <type> <name>
|
||||
#
|
||||
# Requires: GITEA_URL, GITEA_TOKEN, GITEA_OWNER, GITEA_REPO
|
||||
|
||||
link_package() {
|
||||
local PKG_TYPE="$1" # e.g. "rpm", "container"
|
||||
local PKG_NAME="$2" # e.g. "lab", "lab-bastion"
|
||||
|
||||
if [ -z "$PKG_TYPE" ] || [ -z "$PKG_NAME" ]; then
|
||||
echo "Usage: link_package <type> <name>"
|
||||
return 1
|
||||
fi
|
||||
|
||||
local GITEA_URL="${GITEA_URL:-http://10.0.0.194:3012}"
|
||||
local GITEA_OWNER="${GITEA_OWNER:-michal}"
|
||||
local GITEA_REPO="${GITEA_REPO:-lab}"
|
||||
|
||||
if [ -z "$GITEA_TOKEN" ]; then
|
||||
echo "WARNING: GITEA_TOKEN not set, skipping package-repo linking."
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Check if already linked (search all packages, filter by type+name client-side)
|
||||
local REPO_LINK
|
||||
REPO_LINK=$(curl -s -H "Authorization: token ${GITEA_TOKEN}" \
|
||||
"${GITEA_URL}/api/v1/packages/${GITEA_OWNER}" \
|
||||
| python3 -c "
|
||||
import json,sys
|
||||
for p in json.load(sys.stdin):
|
||||
if p['type']=='$PKG_TYPE' and p['name']=='$PKG_NAME':
|
||||
r=p.get('repository')
|
||||
if r: print(r['full_name'])
|
||||
break
|
||||
" 2>/dev/null)
|
||||
|
||||
if [ -n "$REPO_LINK" ]; then
|
||||
echo "==> Package ${PKG_TYPE}/${PKG_NAME} already linked to ${REPO_LINK}"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Try Gitea 1.24+ link API
|
||||
local HTTP_CODE
|
||||
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
|
||||
-H "Authorization: token ${GITEA_TOKEN}" \
|
||||
"${GITEA_URL}/api/v1/packages/${GITEA_OWNER}/${PKG_TYPE}/${PKG_NAME}/-/link/${GITEA_REPO}")
|
||||
|
||||
if [ "$HTTP_CODE" = "201" ] || [ "$HTTP_CODE" = "200" ]; then
|
||||
echo "==> Linked ${PKG_TYPE}/${PKG_NAME} to ${GITEA_OWNER}/${GITEA_REPO}"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# API not available (Gitea < 1.24) -- warn with manual instructions
|
||||
local PUBLIC_URL="${GITEA_PUBLIC_URL:-${GITEA_URL}}"
|
||||
echo ""
|
||||
echo "WARNING: Could not auto-link ${PKG_TYPE}/${PKG_NAME} to repository (Gitea < 1.24)."
|
||||
echo "Link it manually in the Gitea UI:"
|
||||
echo " ${PUBLIC_URL}/${GITEA_OWNER}/-/packages/${PKG_TYPE}/${PKG_NAME}/settings"
|
||||
echo " -> Link to repository: ${GITEA_OWNER}/${GITEA_REPO}"
|
||||
return 0
|
||||
}
|
||||
72
bastion/scripts/publish-deb.sh
Executable file
72
bastion/scripts/publish-deb.sh
Executable file
@@ -0,0 +1,72 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
|
||||
cd "$PROJECT_ROOT"
|
||||
|
||||
# Load .env if present
|
||||
if [ -f .env ]; then
|
||||
set -a; source .env; set +a
|
||||
fi
|
||||
|
||||
GITEA_URL="${GITEA_URL:-http://10.0.0.194:3012}"
|
||||
GITEA_PUBLIC_URL="${GITEA_PUBLIC_URL:-https://mysources.co.uk}"
|
||||
GITEA_OWNER="${GITEA_OWNER:-michal}"
|
||||
GITEA_REPO="${GITEA_REPO:-lab}"
|
||||
|
||||
GITEA_TOKEN="${GITEA_TOKEN:-$PACKAGES_TOKEN}"
|
||||
if [ -z "$GITEA_TOKEN" ]; then
|
||||
echo "Error: GITEA_TOKEN (or PACKAGES_TOKEN) not set. Add it to .env or export it."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
DEB_FILE=$(ls dist/labctl*.deb 2>/dev/null | head -1)
|
||||
if [ -z "$DEB_FILE" ]; then
|
||||
echo "Error: No DEB found in dist/. Run scripts/build-rpm.sh first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Extract version from the deb filename
|
||||
DEB_VERSION=$(dpkg-deb --field "$DEB_FILE" Version 2>/dev/null || echo "unknown")
|
||||
|
||||
echo "==> Publishing $DEB_FILE (version $DEB_VERSION) to ${GITEA_URL}..."
|
||||
|
||||
# Gitea Debian registry: PUT /api/packages/{owner}/debian/pool/{distribution}/{component}/upload
|
||||
# Publish to each supported distribution.
|
||||
# Debian: trixie (13/stable), forky (14/testing)
|
||||
# Ubuntu: noble (24.04 LTS), plucky (25.04)
|
||||
DISTRIBUTIONS="trixie forky noble plucky"
|
||||
|
||||
for DIST in $DISTRIBUTIONS; do
|
||||
echo " -> $DIST..."
|
||||
HTTP_CODE=$(curl -s -o /tmp/deb-upload-$DIST.out -w "%{http_code}" \
|
||||
-X PUT \
|
||||
-H "Authorization: token ${GITEA_TOKEN}" \
|
||||
--upload-file "$DEB_FILE" \
|
||||
"${GITEA_URL}/api/packages/${GITEA_OWNER}/debian/pool/${DIST}/main/upload")
|
||||
|
||||
if [ "$HTTP_CODE" = "201" ] || [ "$HTTP_CODE" = "200" ]; then
|
||||
echo " Published to $DIST"
|
||||
elif [ "$HTTP_CODE" = "409" ]; then
|
||||
echo " Already exists in $DIST (skipping)"
|
||||
else
|
||||
echo " WARNING: Upload to $DIST returned HTTP $HTTP_CODE"
|
||||
cat /tmp/deb-upload-$DIST.out 2>/dev/null || true
|
||||
echo ""
|
||||
fi
|
||||
rm -f /tmp/deb-upload-$DIST.out
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "==> Published successfully!"
|
||||
|
||||
# Ensure package is linked to the repository
|
||||
source "$SCRIPT_DIR/link-package.sh"
|
||||
link_package "debian" "labctl"
|
||||
|
||||
echo ""
|
||||
echo "Install with:"
|
||||
echo " echo \"deb ${GITEA_PUBLIC_URL}/api/packages/${GITEA_OWNER}/debian trixie main\" | sudo tee /etc/apt/sources.list.d/labctl.list"
|
||||
echo " curl -fsSL ${GITEA_PUBLIC_URL}/api/packages/${GITEA_OWNER}/debian/repository.key | sudo gpg --dearmor -o /etc/apt/keyrings/labctl.gpg"
|
||||
echo " sudo apt update && sudo apt install labctl"
|
||||
62
bastion/scripts/publish-rpm.sh
Executable file
62
bastion/scripts/publish-rpm.sh
Executable file
@@ -0,0 +1,62 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
|
||||
cd "$PROJECT_ROOT"
|
||||
|
||||
# Load .env if present
|
||||
if [ -f .env ]; then
|
||||
set -a; source .env; set +a
|
||||
fi
|
||||
|
||||
GITEA_URL="${GITEA_URL:-http://10.0.0.194:3012}"
|
||||
GITEA_PUBLIC_URL="${GITEA_PUBLIC_URL:-https://mysources.co.uk}"
|
||||
GITEA_OWNER="${GITEA_OWNER:-michal}"
|
||||
GITEA_REPO="${GITEA_REPO:-lab}"
|
||||
|
||||
GITEA_TOKEN="${GITEA_TOKEN:-$PACKAGES_TOKEN}"
|
||||
if [ -z "$GITEA_TOKEN" ]; then
|
||||
echo "Error: GITEA_TOKEN (or PACKAGES_TOKEN) not set. Add it to .env or export it."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
RPM_FILE=$(ls dist/labctl-*.rpm 2>/dev/null | head -1)
|
||||
if [ -z "$RPM_FILE" ]; then
|
||||
echo "Error: No RPM found in dist/. Run scripts/build-rpm.sh first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Get version string as it appears in Gitea (e.g. "0.1.0-1")
|
||||
RPM_VERSION=$(rpm -qp --queryformat '%{VERSION}-%{RELEASE}' "$RPM_FILE")
|
||||
|
||||
echo "==> Publishing $RPM_FILE (version $RPM_VERSION) to ${GITEA_URL}..."
|
||||
|
||||
# Check if version already exists and delete it first
|
||||
EXISTING=$(curl -s -o /dev/null -w "%{http_code}" \
|
||||
-H "Authorization: token ${GITEA_TOKEN}" \
|
||||
"${GITEA_URL}/api/v1/packages/${GITEA_OWNER}/rpm/labctl/${RPM_VERSION}")
|
||||
|
||||
if [ "$EXISTING" = "200" ]; then
|
||||
echo "==> Version $RPM_VERSION already exists, replacing..."
|
||||
curl -s -o /dev/null -X DELETE \
|
||||
-H "Authorization: token ${GITEA_TOKEN}" \
|
||||
"${GITEA_URL}/api/v1/packages/${GITEA_OWNER}/rpm/labctl/${RPM_VERSION}"
|
||||
fi
|
||||
|
||||
# Upload
|
||||
curl --fail -s -X PUT \
|
||||
-H "Authorization: token ${GITEA_TOKEN}" \
|
||||
--upload-file "$RPM_FILE" \
|
||||
"${GITEA_URL}/api/packages/${GITEA_OWNER}/rpm/upload"
|
||||
|
||||
echo ""
|
||||
echo "==> Published successfully!"
|
||||
|
||||
# Ensure package is linked to the repository
|
||||
source "$SCRIPT_DIR/link-package.sh"
|
||||
link_package "rpm" "labctl"
|
||||
|
||||
echo ""
|
||||
echo "Install with:"
|
||||
echo " sudo dnf install labctl # if repo already configured"
|
||||
75
bastion/scripts/release.sh
Executable file
75
bastion/scripts/release.sh
Executable file
@@ -0,0 +1,75 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
|
||||
cd "$PROJECT_ROOT"
|
||||
|
||||
# Load .env if present
|
||||
if [ -f .env ]; then
|
||||
set -a; source .env; set +a
|
||||
fi
|
||||
|
||||
echo "=== lab-bastion release ==="
|
||||
echo ""
|
||||
|
||||
# 1. Build binaries & packages (both architectures)
|
||||
bash scripts/build-rpm.sh --all
|
||||
|
||||
echo ""
|
||||
|
||||
# 2. Publish RPM
|
||||
bash scripts/publish-rpm.sh
|
||||
|
||||
echo ""
|
||||
|
||||
# 3. Publish DEB
|
||||
bash scripts/publish-deb.sh
|
||||
|
||||
echo ""
|
||||
|
||||
# 4. Build & push Docker image
|
||||
bash scripts/build-bastion.sh
|
||||
|
||||
echo ""
|
||||
|
||||
# 5. Install locally (Fedora/RHEL only)
|
||||
if [ -f /etc/fedora-release ] || [ -f /etc/redhat-release ]; then
|
||||
echo "==> Installing locally..."
|
||||
RPM_FILE=$(ls dist/labctl-*.rpm 2>/dev/null | head -1)
|
||||
if [ -n "$RPM_FILE" ]; then
|
||||
sudo rpm -U --force "$RPM_FILE"
|
||||
echo ""
|
||||
echo "==> Installed:"
|
||||
labctl --version || echo "(labctl binary installed)"
|
||||
else
|
||||
echo "==> WARNING: No RPM found in dist/, skipping local install."
|
||||
fi
|
||||
else
|
||||
echo "==> Not Fedora/RHEL — skipping local RPM install."
|
||||
fi
|
||||
|
||||
echo ""
|
||||
|
||||
# 6. Summary
|
||||
GITEA_PUBLIC_URL="${GITEA_PUBLIC_URL:-https://mysources.co.uk}"
|
||||
GITEA_OWNER="${GITEA_OWNER:-michal}"
|
||||
REGISTRY="${GITEA_REGISTRY:-mysources.co.uk}"
|
||||
VERSION=$(node -p "require('./package.json').version")
|
||||
|
||||
echo "=== Done! ==="
|
||||
echo ""
|
||||
echo "RPM install:"
|
||||
echo " sudo dnf config-manager --add-repo ${GITEA_PUBLIC_URL}/api/packages/${GITEA_OWNER}/rpm.repo"
|
||||
echo " sudo dnf install labctl"
|
||||
echo ""
|
||||
echo "DEB install (Debian/Ubuntu):"
|
||||
echo " echo \"deb ${GITEA_PUBLIC_URL}/api/packages/${GITEA_OWNER}/debian trixie main\" | sudo tee /etc/apt/sources.list.d/labctl.list"
|
||||
echo " curl -fsSL ${GITEA_PUBLIC_URL}/api/packages/${GITEA_OWNER}/debian/repository.key | sudo gpg --dearmor -o /etc/apt/keyrings/labctl.gpg"
|
||||
echo " sudo apt update && sudo apt install labctl"
|
||||
echo ""
|
||||
echo "Docker image:"
|
||||
echo " podman pull ${REGISTRY}/michal/lab-bastion:${VERSION}"
|
||||
echo ""
|
||||
echo "k3s deployment:"
|
||||
echo " kubectl apply -k deploy/k3s/"
|
||||
71
bastion/scripts/test-integration.sh
Executable file
71
bastion/scripts/test-integration.sh
Executable file
@@ -0,0 +1,71 @@
|
||||
#!/bin/bash
|
||||
# Run integration tests inside a Node container with access to host libvirt.
|
||||
#
|
||||
# Usage: sudo ./scripts/test-integration.sh [vitest args...]
|
||||
# Example: sudo ./scripts/test-integration.sh -t k3s
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
|
||||
|
||||
# Detect real user (even when running via sudo)
|
||||
REAL_USER="${SUDO_USER:-$(whoami)}"
|
||||
REAL_HOME="/home/${REAL_USER}"
|
||||
|
||||
echo "==> Running integration tests in container"
|
||||
echo " Project: ${PROJECT_ROOT}"
|
||||
echo " User: ${REAL_USER}"
|
||||
echo " SSH key: ${REAL_HOME}/.ssh/"
|
||||
echo ""
|
||||
|
||||
# Check prerequisites
|
||||
if ! command -v podman &>/dev/null && ! command -v docker &>/dev/null; then
|
||||
echo "ERROR: podman or docker required"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
RUNTIME="podman"
|
||||
if ! command -v podman &>/dev/null; then
|
||||
RUNTIME="docker"
|
||||
fi
|
||||
|
||||
# Check libvirt socket
|
||||
if [ ! -S /var/run/libvirt/libvirt-sock ]; then
|
||||
echo "ERROR: libvirt socket not found at /var/run/libvirt/libvirt-sock"
|
||||
echo " Is libvirtd running? Try: sudo systemctl start libvirtd"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Create a temp dir for cloud-init artifacts (avoids SELinux /tmp relabel)
|
||||
WORK_TMP="/var/tmp/lab-integration-$$"
|
||||
mkdir -p "${WORK_TMP}"
|
||||
trap "rm -rf ${WORK_TMP}" EXIT
|
||||
|
||||
exec $RUNTIME run --rm \
|
||||
--name lab-integration-test \
|
||||
--privileged \
|
||||
--security-opt label=disable \
|
||||
--network=host \
|
||||
-v "${PROJECT_ROOT}:${PROJECT_ROOT}" \
|
||||
-v "${REAL_HOME}/.ssh:${REAL_HOME}/.ssh:ro" \
|
||||
-v "/var/run/libvirt/libvirt-sock:/var/run/libvirt/libvirt-sock" \
|
||||
-v "/var/lib/libvirt/images:/var/lib/libvirt/images" \
|
||||
-v "${WORK_TMP}:/tmp/lab-integration-tests" \
|
||||
-w "${PROJECT_ROOT}" \
|
||||
-e "SSH_KEY_PATH=${REAL_HOME}/.ssh/id_rsa" \
|
||||
-e "HOME=${REAL_HOME}" \
|
||||
node:22-bookworm \
|
||||
bash -c "
|
||||
# Install system deps for libvirt client + cloud-init ISO creation
|
||||
apt-get update -qq && apt-get install -y -qq libvirt-clients virtinst genisoimage openssh-client qemu-utils sudo >/dev/null 2>&1
|
||||
|
||||
# Install pnpm
|
||||
corepack enable && corepack prepare pnpm@9 --activate >/dev/null 2>&1
|
||||
|
||||
echo '==> Installing project dependencies...'
|
||||
pnpm install --frozen-lockfile 2>/dev/null
|
||||
|
||||
echo '==> Running integration tests...'
|
||||
echo ''
|
||||
pnpm run test:integration $*
|
||||
"
|
||||
152
bastion/scripts/test-provision.sh
Executable file
152
bastion/scripts/test-provision.sh
Executable file
@@ -0,0 +1,152 @@
|
||||
#!/bin/bash
|
||||
# Run PXE and/or ISO boot integration tests.
|
||||
#
|
||||
# Usage:
|
||||
# sudo ./scripts/test-provision.sh # run PXE + ISO (x86_64)
|
||||
# sudo ./scripts/test-provision.sh pxe # PXE only
|
||||
# sudo ./scripts/test-provision.sh iso # ISO only (x86_64)
|
||||
# sudo ./scripts/test-provision.sh arm # ARM ISO boot (emulated, SLOW ~60min)
|
||||
# sudo ./scripts/test-provision.sh all # all tests including ARM
|
||||
#
|
||||
# Prerequisites:
|
||||
# libvirtd, OVMF (edk2-ovmf), iPXE (ipxe-bootimgs-x86),
|
||||
# dnsmasq, xorriso, mtools, virt-install, qemu-img
|
||||
# ARM: qemu-system-aarch64, edk2-aarch64
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
|
||||
|
||||
cd "$PROJECT_ROOT"
|
||||
|
||||
# Detect real user for SSH keys
|
||||
REAL_USER="${SUDO_USER:-$(whoami)}"
|
||||
REAL_HOME=$(getent passwd "$REAL_USER" | cut -d: -f6)
|
||||
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BOLD='\033[1m'
|
||||
RESET='\033[0m'
|
||||
|
||||
echo ""
|
||||
echo -e "${BOLD}Lab Bastion -- Provision Integration Tests${RESET}"
|
||||
echo "==========================================="
|
||||
echo ""
|
||||
|
||||
# --- Prerequisite checks ---
|
||||
MISSING=""
|
||||
for cmd in virsh virt-install qemu-img dnsmasq xorriso mformat mcopy curl; do
|
||||
if ! command -v "$cmd" &>/dev/null; then
|
||||
MISSING="$MISSING $cmd"
|
||||
fi
|
||||
done
|
||||
|
||||
if [ -n "$MISSING" ]; then
|
||||
echo -e "${RED}Missing tools:${RESET}$MISSING"
|
||||
echo "Install: sudo dnf install libvirt virt-install qemu-img dnsmasq xorriso mtools curl"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! systemctl is-active libvirtd &>/dev/null; then
|
||||
echo -e "${RED}libvirtd not running.${RESET} Start with: sudo systemctl start libvirtd"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f /usr/share/edk2/ovmf/OVMF_CODE.fd ]; then
|
||||
echo -e "${RED}OVMF firmware not found.${RESET} Install: sudo dnf install edk2-ovmf"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
IPXE_EFI=""
|
||||
for f in /usr/share/ipxe/ipxe-snponly-x86_64.efi /usr/share/ipxe/ipxe-snp-x86_64.efi /usr/share/ipxe/ipxe-x86_64.efi; do
|
||||
[ -f "$f" ] && IPXE_EFI="$f" && break
|
||||
done
|
||||
if [ -z "$IPXE_EFI" ]; then
|
||||
echo -e "${RED}iPXE EFI binary not found.${RESET} Install: sudo dnf install ipxe-bootimgs-x86"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Find SSH key
|
||||
SSH_KEY=""
|
||||
for name in id_ed25519 id_ecdsa id_rsa; do
|
||||
if [ -f "$REAL_HOME/.ssh/$name" ] && [ -f "$REAL_HOME/.ssh/$name.pub" ]; then
|
||||
SSH_KEY="$REAL_HOME/.ssh/$name"
|
||||
break
|
||||
fi
|
||||
done
|
||||
if [ -z "$SSH_KEY" ]; then
|
||||
echo -e "${RED}No SSH key found in $REAL_HOME/.ssh/${RESET}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo -e " User: ${BOLD}$REAL_USER${RESET}"
|
||||
echo -e " SSH key: ${BOLD}$SSH_KEY${RESET}"
|
||||
echo -e " iPXE: ${BOLD}$IPXE_EFI${RESET}"
|
||||
echo ""
|
||||
|
||||
# --- Determine which tests to run ---
|
||||
MODE="${1:-both}"
|
||||
|
||||
run_test() {
|
||||
local name="$1" pattern="$2"
|
||||
echo ""
|
||||
echo -e "${YELLOW}━━━ Running $name test ━━━${RESET}"
|
||||
echo ""
|
||||
|
||||
if SSH_KEY_PATH="$SSH_KEY" HOME="$REAL_HOME" \
|
||||
npx vitest run -c tests/integration/vitest.config.ts -t "$pattern" 2>&1; then
|
||||
echo ""
|
||||
echo -e "${GREEN}✔ $name test passed${RESET}"
|
||||
return 0
|
||||
else
|
||||
echo ""
|
||||
echo -e "${RED}✘ $name test failed${RESET}"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
FAILED=0
|
||||
|
||||
case "$MODE" in
|
||||
pxe)
|
||||
run_test "PXE boot" "PXE boot" || FAILED=1
|
||||
;;
|
||||
iso)
|
||||
run_test "ISO boot" "ISO boot" || FAILED=1
|
||||
;;
|
||||
arm|arm-iso)
|
||||
if ! command -v qemu-system-aarch64 &>/dev/null; then
|
||||
echo -e "${RED}qemu-system-aarch64 not found.${RESET} Install: sudo dnf install qemu-system-aarch64 edk2-aarch64"
|
||||
exit 1
|
||||
fi
|
||||
echo -e "${YELLOW}ARM emulation is ~10x slower than native. Expect 30-60 minutes.${RESET}"
|
||||
run_test "ARM ISO boot" "ARM ISO" || FAILED=1
|
||||
;;
|
||||
both)
|
||||
run_test "PXE boot" "PXE boot" || FAILED=1
|
||||
run_test "ISO boot" "ISO boot" || FAILED=1
|
||||
;;
|
||||
all)
|
||||
run_test "PXE boot" "PXE boot" || FAILED=1
|
||||
run_test "ISO boot" "ISO boot" || FAILED=1
|
||||
if command -v qemu-system-aarch64 &>/dev/null; then
|
||||
echo -e "${YELLOW}ARM emulation is ~10x slower than native.${RESET}"
|
||||
run_test "ARM ISO boot" "ARM ISO" || FAILED=1
|
||||
else
|
||||
echo -e "${YELLOW}Skipping ARM test (qemu-system-aarch64 not installed)${RESET}"
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
echo "Usage: $0 [pxe|iso|arm|both|all]"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
echo ""
|
||||
if [ "$FAILED" -eq 0 ]; then
|
||||
echo -e "${GREEN}${BOLD}All provision tests passed.${RESET}"
|
||||
else
|
||||
echo -e "${RED}${BOLD}Some tests failed.${RESET}"
|
||||
exit 1
|
||||
fi
|
||||
38
bastion/src/bastion/package.json
Normal file
38
bastion/src/bastion/package.json
Normal file
@@ -0,0 +1,38 @@
|
||||
{
|
||||
"name": "@lab/bastion",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"main": "./dist/main.js",
|
||||
"types": "./dist/main.d.ts",
|
||||
"exports": {
|
||||
".": {
|
||||
"import": "./dist/main.js",
|
||||
"types": "./dist/main.d.ts"
|
||||
},
|
||||
"./iso-builder": {
|
||||
"import": "./dist/services/iso-builder.js",
|
||||
"types": "./dist/services/iso-builder.d.ts"
|
||||
}
|
||||
},
|
||||
"scripts": {
|
||||
"build": "tsc --build",
|
||||
"clean": "rimraf dist",
|
||||
"dev": "tsx src/main.ts",
|
||||
"test": "vitest",
|
||||
"test:run": "vitest run"
|
||||
},
|
||||
"dependencies": {
|
||||
"@fastify/static": "^8.0.0",
|
||||
"@lab/modules": "workspace:*",
|
||||
"@lab/shared": "workspace:*",
|
||||
"execa": "^9.5.0",
|
||||
"fastify": "^5.0.0",
|
||||
"winston": "^3.17.0",
|
||||
"ws": "^8.19.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^22.10.0",
|
||||
"@types/ws": "^8.18.0"
|
||||
}
|
||||
}
|
||||
58
bastion/src/bastion/src/config.ts
Normal file
58
bastion/src/bastion/src/config.ts
Normal file
@@ -0,0 +1,58 @@
|
||||
// Configuration from environment variables with sensible defaults.
|
||||
|
||||
import type { BastionConfig } from "@lab/shared";
|
||||
|
||||
export function loadConfig(overrides: Partial<BastionConfig> = {}): BastionConfig {
|
||||
const fedoraVersion = overrides.fedoraVersion ?? process.env["FEDORA_VERSION"] ?? "43";
|
||||
const arch = overrides.arch ?? process.env["ARCH"] ?? "x86_64";
|
||||
const httpPort = overrides.httpPort ?? parseInt(process.env["HTTP_PORT"] ?? "8080", 10);
|
||||
const timezone = overrides.timezone ?? process.env["TIMEZONE"] ?? "Europe/London";
|
||||
const locale = overrides.locale ?? process.env["LOCALE"] ?? "en_GB.UTF-8";
|
||||
const bastionDir = overrides.bastionDir ?? process.env["BASTION_DIR"] ?? "/tmp/lab-bastion";
|
||||
const domain = overrides.domain ?? process.env["DOMAIN"] ?? "ad.itaz.eu";
|
||||
const dhcpMode = (overrides.dhcpMode ?? process.env["DHCP_MODE"] ?? "proxy") as "proxy" | "full";
|
||||
const dhcpRangeStart = overrides.dhcpRangeStart ?? process.env["DHCP_RANGE_START"] ?? "";
|
||||
const dhcpRangeEnd = overrides.dhcpRangeEnd ?? process.env["DHCP_RANGE_END"] ?? "";
|
||||
|
||||
const syslogPort = overrides.syslogPort ?? parseInt(process.env["SYSLOG_PORT"] ?? "5514", 10);
|
||||
|
||||
const ubuntuVersion = overrides.ubuntuVersion ?? process.env["UBUNTU_VERSION"] ?? "26.04";
|
||||
const ubuntuMirror = overrides.ubuntuMirror ?? process.env["UBUNTU_MIRROR"]
|
||||
?? `https://releases.ubuntu.com/${ubuntuVersion}`;
|
||||
|
||||
const fedoraMirror = `https://download.fedoraproject.org/pub/fedora/linux/releases/${fedoraVersion}/Everything/${arch}/os`;
|
||||
const tftpDir = `${bastionDir}/tftp`;
|
||||
const httpDir = `${bastionDir}/http`;
|
||||
const stateFile = `${bastionDir}/state.json`;
|
||||
|
||||
return {
|
||||
fedoraVersion,
|
||||
arch,
|
||||
httpPort,
|
||||
timezone,
|
||||
locale,
|
||||
bastionDir,
|
||||
domain,
|
||||
dhcpMode,
|
||||
dhcpRangeStart,
|
||||
dhcpRangeEnd,
|
||||
ubuntuVersion,
|
||||
ubuntuMirror,
|
||||
// These are populated at runtime by the network service
|
||||
iface: overrides.iface ?? "",
|
||||
serverIp: overrides.serverIp ?? "",
|
||||
network: overrides.network ?? "",
|
||||
gateway: overrides.gateway ?? "",
|
||||
sshKeys: overrides.sshKeys ?? [],
|
||||
adminUser: overrides.adminUser ?? "",
|
||||
syslogPort,
|
||||
skipDnsmasq: overrides.skipDnsmasq,
|
||||
skipArtifacts: overrides.skipArtifacts,
|
||||
labdUrl: overrides.labdUrl ?? process.env["LABD_URL"],
|
||||
bastionJoinToken: overrides.bastionJoinToken ?? process.env["BASTION_JOIN_TOKEN"],
|
||||
fedoraMirror,
|
||||
tftpDir,
|
||||
httpDir,
|
||||
stateFile,
|
||||
};
|
||||
}
|
||||
359
bastion/src/bastion/src/main.ts
Normal file
359
bastion/src/bastion/src/main.ts
Normal file
@@ -0,0 +1,359 @@
|
||||
// Entry point for the bastion server.
|
||||
// Starts the Fastify HTTP server, dnsmasq, and handles graceful shutdown.
|
||||
|
||||
import { mkdirSync, writeFileSync, readFileSync, existsSync, copyFileSync, symlinkSync, unlinkSync } from "node:fs";
|
||||
import { execSync } from "node:child_process";
|
||||
import type { BastionConfig } from "@lab/shared";
|
||||
import { loadConfig } from "./config.js";
|
||||
import { populateNetworkConfig } from "./services/network.js";
|
||||
import { createApp } from "./server.js";
|
||||
import { startDnsmasq, stopDnsmasq, generateDnsmasqConf } from "./services/dnsmasq.js";
|
||||
import { generateDiscoverKickstart } from "./services/kickstart-generator.js";
|
||||
import { renderBootIpxe } from "./templates/boot.ipxe.js";
|
||||
import { logger } from "./services/logger.js";
|
||||
import { BastionConnection } from "./services/labd-connection.js";
|
||||
import { progressBus } from "./services/progress-events.js";
|
||||
import { ensureBootIso } from "./routes/boot-iso.js";
|
||||
|
||||
function copyIfMissing(src: string, dest: string, label: string): void {
|
||||
if (existsSync(dest)) {
|
||||
logger.info(` ${label} -- cached`);
|
||||
return;
|
||||
}
|
||||
if (!existsSync(src)) {
|
||||
throw new Error(`${label}: source not found at ${src}`);
|
||||
}
|
||||
copyFileSync(src, dest);
|
||||
logger.info(` ${label} -- copied from ${src}`);
|
||||
}
|
||||
|
||||
function download(url: string, dest: string, label: string): void {
|
||||
if (existsSync(dest)) {
|
||||
logger.info(` ${label} -- cached`);
|
||||
return;
|
||||
}
|
||||
logger.info(` ${label} -- downloading...`);
|
||||
try {
|
||||
execSync(`curl -# -L -f -o "${dest}" "${url}"`, { stdio: "inherit" });
|
||||
} catch {
|
||||
throw new Error(`Failed to download ${label} from ${url}`);
|
||||
}
|
||||
}
|
||||
|
||||
function symlinkSafe(target: string, linkPath: string): void {
|
||||
try {
|
||||
symlinkSync(target, linkPath);
|
||||
} catch {
|
||||
// Link may already exist
|
||||
}
|
||||
}
|
||||
|
||||
function runCmd(cmd: string, args: string[]): boolean {
|
||||
try {
|
||||
execSync(`${cmd} ${args.join(" ")}`, { stdio: "pipe" });
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
let fwZoneFlag = "";
|
||||
let fwOpened = false;
|
||||
|
||||
function openFirewall(config: BastionConfig): void {
|
||||
// Check if firewalld is running
|
||||
if (!runCmd("firewall-cmd", ["--state"])) return;
|
||||
|
||||
// Detect zone for our interface
|
||||
try {
|
||||
const zone = execSync(`firewall-cmd --get-zone-of-interface=${config.iface} 2>/dev/null`, { encoding: "utf-8" }).trim();
|
||||
if (zone) fwZoneFlag = `--zone=${zone}`;
|
||||
} catch { /* use default zone */ }
|
||||
|
||||
const zf = fwZoneFlag ? [fwZoneFlag] : [];
|
||||
logger.info(`Opening firewall ports (DHCP, TFTP, HTTP:${config.httpPort})...`);
|
||||
runCmd("firewall-cmd", ["--quiet", ...zf, "--add-service=dhcp"]);
|
||||
runCmd("firewall-cmd", ["--quiet", ...zf, "--add-service=tftp"]);
|
||||
runCmd("firewall-cmd", ["--quiet", ...zf, `--add-port=${config.httpPort}/tcp`]);
|
||||
runCmd("firewall-cmd", ["--quiet", ...zf, "--add-port=4011/udp"]);
|
||||
fwOpened = true;
|
||||
}
|
||||
|
||||
function closeFirewall(config: BastionConfig): void {
|
||||
if (!fwOpened) return;
|
||||
const zf = fwZoneFlag ? [fwZoneFlag] : [];
|
||||
logger.info("Removing firewall rules...");
|
||||
runCmd("firewall-cmd", ["--quiet", ...zf, "--remove-service=dhcp"]);
|
||||
runCmd("firewall-cmd", ["--quiet", ...zf, "--remove-service=tftp"]);
|
||||
runCmd("firewall-cmd", ["--quiet", ...zf, `--remove-port=${config.httpPort}/tcp`]);
|
||||
runCmd("firewall-cmd", ["--quiet", ...zf, "--remove-port=4011/udp"]);
|
||||
}
|
||||
|
||||
export async function startBastion(overrides: Partial<BastionConfig> = {}): Promise<void> {
|
||||
// Load and populate config
|
||||
let config = loadConfig(overrides);
|
||||
config = populateNetworkConfig(config);
|
||||
|
||||
// Bastion needs root for dnsmasq (DHCP port 67)
|
||||
if (!config.skipDnsmasq && process.getuid?.() !== 0) {
|
||||
throw new Error("Must run as root (dnsmasq needs DHCP/TFTP ports). Use: sudo labctl init bastion standalone start");
|
||||
}
|
||||
|
||||
mkdirSync(config.bastionDir, { recursive: true, mode: 0o755 });
|
||||
const pidFile = `${config.bastionDir}/bastion.pid`;
|
||||
|
||||
// Kill old instance if running
|
||||
try {
|
||||
if (existsSync(pidFile)) {
|
||||
const oldPid = parseInt(readFileSync(pidFile, "utf-8").trim(), 10);
|
||||
if (!isNaN(oldPid)) {
|
||||
try {
|
||||
process.kill(oldPid, "SIGTERM");
|
||||
logger.info(`Killed old bastion process (PID ${oldPid})`);
|
||||
await new Promise((r) => setTimeout(r, 1000));
|
||||
} catch {
|
||||
// Process already dead
|
||||
}
|
||||
}
|
||||
// Remove stale PID file (may be owned by different user)
|
||||
try { unlinkSync(pidFile); } catch { /* ignore */ }
|
||||
}
|
||||
} catch {
|
||||
// Can't read PID file — try to remove it
|
||||
try { unlinkSync(pidFile); } catch { /* ignore */ }
|
||||
}
|
||||
|
||||
// Write current PID
|
||||
writeFileSync(pidFile, String(process.pid), { mode: 0o644 });
|
||||
|
||||
// Prepare directories
|
||||
mkdirSync(config.tftpDir, { recursive: true });
|
||||
mkdirSync(config.httpDir, { recursive: true });
|
||||
|
||||
// Prepare boot artifacts
|
||||
if (config.skipArtifacts !== true) {
|
||||
logger.info(`Preparing boot artifacts (Fedora ${config.fedoraVersion} ${config.arch})...`);
|
||||
|
||||
copyIfMissing(
|
||||
"/usr/share/ipxe/undionly.kpxe",
|
||||
`${config.tftpDir}/undionly.kpxe`,
|
||||
"iPXE BIOS",
|
||||
);
|
||||
copyIfMissing(
|
||||
"/usr/share/ipxe/ipxe-snponly-x86_64.efi",
|
||||
`${config.tftpDir}/ipxe.efi`,
|
||||
"iPXE UEFI x86_64",
|
||||
);
|
||||
try {
|
||||
copyIfMissing(
|
||||
"/usr/share/ipxe/arm64-efi/snponly.efi",
|
||||
`${config.tftpDir}/ipxe-arm64.efi`,
|
||||
"iPXE UEFI arm64",
|
||||
);
|
||||
} catch {
|
||||
logger.warn("arm64 iPXE not available -- skipping");
|
||||
}
|
||||
|
||||
download(
|
||||
`${config.fedoraMirror}/images/pxeboot/vmlinuz`,
|
||||
`${config.httpDir}/vmlinuz`,
|
||||
"Fedora kernel",
|
||||
);
|
||||
download(
|
||||
`${config.fedoraMirror}/images/pxeboot/initrd.img`,
|
||||
`${config.httpDir}/initrd.img`,
|
||||
"Fedora initrd",
|
||||
);
|
||||
|
||||
// Ubuntu netboot artifacts (non-fatal — Ubuntu version may not be released yet)
|
||||
try {
|
||||
logger.info(`Preparing Ubuntu ${config.ubuntuVersion} netboot artifacts...`);
|
||||
download(
|
||||
`${config.ubuntuMirror}/casper/vmlinuz`,
|
||||
`${config.httpDir}/ubuntu-vmlinuz`,
|
||||
"Ubuntu kernel",
|
||||
);
|
||||
download(
|
||||
`${config.ubuntuMirror}/casper/initrd`,
|
||||
`${config.httpDir}/ubuntu-initrd`,
|
||||
"Ubuntu initrd",
|
||||
);
|
||||
} catch {
|
||||
logger.warn(`Ubuntu ${config.ubuntuVersion} artifacts not available -- Ubuntu provisioning disabled`);
|
||||
}
|
||||
|
||||
// Symlink iPXE binaries into HTTP dir for UEFI HTTP Boot
|
||||
for (const name of ["ipxe.efi", "ipxe-arm64.efi"]) {
|
||||
const src = `${config.tftpDir}/${name}`;
|
||||
const dest = `${config.httpDir}/${name}`;
|
||||
if (existsSync(src)) {
|
||||
symlinkSafe(src, dest);
|
||||
}
|
||||
}
|
||||
|
||||
// Generate boot ISO (served as static file for Range request support)
|
||||
try {
|
||||
ensureBootIso(config);
|
||||
} catch (err) {
|
||||
logger.warn(`Boot ISO generation failed: ${err instanceof Error ? err.message : String(err)}`);
|
||||
}
|
||||
} else {
|
||||
logger.info("Skipping boot artifacts (--skip-artifacts)");
|
||||
}
|
||||
|
||||
// Write discovery kickstart
|
||||
const discoverKs = generateDiscoverKickstart(config);
|
||||
writeFileSync(`${config.httpDir}/discover.ks`, discoverKs);
|
||||
|
||||
// Write iPXE boot script
|
||||
const bootIpxe = renderBootIpxe({
|
||||
serverIp: config.serverIp,
|
||||
httpPort: config.httpPort,
|
||||
});
|
||||
writeFileSync(`${config.httpDir}/boot.ipxe`, bootIpxe);
|
||||
|
||||
// Generate dnsmasq config
|
||||
generateDnsmasqConf(config);
|
||||
|
||||
// Open firewall ports
|
||||
if (config.skipDnsmasq !== true) {
|
||||
openFirewall(config);
|
||||
}
|
||||
|
||||
// Start HTTP server + syslog listener
|
||||
const { app, state, syslog } = createApp(config);
|
||||
await app.listen({ port: config.httpPort, host: "0.0.0.0" });
|
||||
logger.info(`HTTP server listening on :${config.httpPort}`);
|
||||
syslog.start();
|
||||
|
||||
// Start dnsmasq (unless skipped)
|
||||
if (config.skipDnsmasq !== true) {
|
||||
const dnsmasqProc = startDnsmasq(config);
|
||||
|
||||
// Monitor dnsmasq
|
||||
void dnsmasqProc.then(() => {
|
||||
logger.error("dnsmasq exited unexpectedly");
|
||||
logger.error("Check if another DHCP/TFTP service is running.");
|
||||
process.exit(1);
|
||||
}).catch((err: unknown) => {
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
if (!message.includes("was killed")) {
|
||||
logger.error(`dnsmasq error: ${message}`);
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
} else {
|
||||
logger.info("Skipping dnsmasq (--skip-dnsmasq)");
|
||||
}
|
||||
|
||||
// Connect to labd if configured (otherwise run standalone)
|
||||
let labdConn: BastionConnection | null = null;
|
||||
if (config.labdUrl) {
|
||||
labdConn = new BastionConnection(config, () => state.load());
|
||||
|
||||
// Wire up command handlers so labd can send install/forget/role commands
|
||||
labdConn.onCommand("command-install", async (msg) => {
|
||||
if (msg.type !== "command-install") throw new Error("unexpected");
|
||||
state.update((s) => {
|
||||
s.install_queue[msg.mac] = {
|
||||
hostname: msg.hostname,
|
||||
disk: msg.disk ?? "/dev/sda",
|
||||
role: msg.role as import("@lab/shared").Role,
|
||||
os: msg.os as import("@lab/shared").OsId,
|
||||
queued_at: new Date().toISOString(),
|
||||
};
|
||||
});
|
||||
return { status: "ok", data: { mac: msg.mac, hostname: msg.hostname } };
|
||||
});
|
||||
|
||||
labdConn.onCommand("command-forget", async (msg) => {
|
||||
if (msg.type !== "command-forget") throw new Error("unexpected");
|
||||
const mac = msg.mac.toLowerCase();
|
||||
state.update((s) => {
|
||||
delete s.discovered[mac];
|
||||
delete s.install_queue[mac];
|
||||
delete s.installed[mac];
|
||||
});
|
||||
return { status: "ok", data: { mac } };
|
||||
});
|
||||
|
||||
labdConn.onCommand("command-role-update", async (msg) => {
|
||||
if (msg.type !== "command-role-update") throw new Error("unexpected");
|
||||
const mac = msg.mac.toLowerCase();
|
||||
const current = state.load();
|
||||
if (!current.installed[mac]) {
|
||||
return { status: "error", error: `MAC ${mac} not found in installed machines` };
|
||||
}
|
||||
state.update((s) => {
|
||||
const inst = s.installed[mac];
|
||||
if (inst) inst.role = msg.role;
|
||||
});
|
||||
return { status: "ok", data: { mac, role: msg.role } };
|
||||
});
|
||||
|
||||
// Push state to labd on every local state change
|
||||
state.onChange(() => labdConn?.syncState());
|
||||
|
||||
// Forward progress events (stages only, not raw log lines) to labd
|
||||
progressBus.on((event) => {
|
||||
if (event.stage !== "log") {
|
||||
labdConn?.sendProgress(event.mac, event.stage, event.detail);
|
||||
}
|
||||
});
|
||||
|
||||
labdConn.connect();
|
||||
logger.info(`Registering with labd at ${config.labdUrl}`);
|
||||
}
|
||||
|
||||
// Print banner
|
||||
printBanner(config);
|
||||
|
||||
// Graceful shutdown
|
||||
const shutdown = async (): Promise<void> => {
|
||||
logger.info("Shutting down...");
|
||||
syslog.stop();
|
||||
if (labdConn) labdConn.close();
|
||||
if (config.skipDnsmasq !== true) stopDnsmasq();
|
||||
closeFirewall(config);
|
||||
await app.close();
|
||||
try { unlinkSync(pidFile); } catch { /* ignore */ }
|
||||
logger.info(`State preserved in ${config.stateFile}`);
|
||||
process.exit(0);
|
||||
};
|
||||
|
||||
process.on("SIGINT", () => void shutdown());
|
||||
process.on("SIGTERM", () => void shutdown());
|
||||
|
||||
// Keep process alive
|
||||
await new Promise(() => {});
|
||||
}
|
||||
|
||||
function printBanner(config: BastionConfig): void {
|
||||
const dhcpInfo = config.dhcpMode === "full"
|
||||
? `full (${config.dhcpRangeStart}-${config.dhcpRangeEnd})`
|
||||
: "proxy (alongside existing DHCP)";
|
||||
|
||||
console.log("");
|
||||
console.log("\x1b[36m\x1b[1m" + "=".repeat(60) + "\x1b[0m");
|
||||
console.log("\x1b[36m\x1b[1m Lab PXE Bastion -- Discovery Mode\x1b[0m");
|
||||
console.log("\x1b[36m\x1b[1m" + "=".repeat(60) + "\x1b[0m");
|
||||
console.log("");
|
||||
console.log(` Network: \x1b[1m${config.network}/24\x1b[0m via \x1b[1m${config.iface}\x1b[0m`);
|
||||
console.log(` DHCP: \x1b[1m${dhcpInfo}\x1b[0m`);
|
||||
console.log(` HTTP: \x1b[1mhttp://${config.serverIp}:${config.httpPort}/\x1b[0m`);
|
||||
console.log(` OS: \x1b[1mFedora ${config.fedoraVersion} (${config.arch})\x1b[0m`);
|
||||
console.log(` Domain: \x1b[1m${config.domain}\x1b[0m`);
|
||||
console.log(` State: \x1b[1m${config.stateFile}\x1b[0m`);
|
||||
console.log("");
|
||||
console.log(" \x1b[33mPXE boot any machine on this network.\x1b[0m");
|
||||
console.log(" \x1b[33mIt will be inventoried and rebooted automatically.\x1b[0m");
|
||||
console.log("");
|
||||
console.log(" Commands (from another terminal):");
|
||||
console.log(" \x1b[1mlabctl provision list\x1b[0m -- show machines");
|
||||
console.log(" \x1b[1mlabctl provision install <mac> <hostname>\x1b[0m -- queue install");
|
||||
console.log("");
|
||||
console.log(" Press \x1b[1mCtrl-C\x1b[0m to stop.");
|
||||
console.log("");
|
||||
console.log("\x1b[36m---- Waiting for PXE boot requests... ----\x1b[0m");
|
||||
console.log("");
|
||||
}
|
||||
401
bastion/src/bastion/src/routes/api.ts
Normal file
401
bastion/src/bastion/src/routes/api.ts
Normal file
@@ -0,0 +1,401 @@
|
||||
// REST API routes for machine management.
|
||||
// /api/machines - list all machines by state
|
||||
// /api/install - queue a machine for install
|
||||
// /api/progress - receive install progress callbacks from kickstart
|
||||
// /api/discover - receive hardware discovery reports from PXE-booted machines
|
||||
|
||||
import type { FastifyInstance } from "fastify";
|
||||
import type { HardwareInfo, InstalledInfo, Role } from "@lab/shared";
|
||||
import { isValidOsId, SUPPORTED_ROLES } from "@lab/shared";
|
||||
import type { StateManager } from "../services/state.js";
|
||||
import { logger } from "../services/logger.js";
|
||||
import { triggerPostProvisionK3s } from "../services/post-provision.js";
|
||||
import { progressBus } from "../services/progress-events.js";
|
||||
import type { ProgressEvent } from "../services/progress-events.js";
|
||||
import type { InstallLogBuffer } from "../services/install-log.js";
|
||||
|
||||
export function registerApiRoutes(
|
||||
app: FastifyInstance,
|
||||
state: StateManager,
|
||||
installLog: InstallLogBuffer,
|
||||
): void {
|
||||
// List all machines
|
||||
app.get("/api/machines", async (_request, reply) => {
|
||||
return reply.send(state.load());
|
||||
});
|
||||
|
||||
// Queue a machine for install
|
||||
app.post<{
|
||||
Body: {
|
||||
mac?: string;
|
||||
hostname?: string;
|
||||
disk?: string;
|
||||
role?: string;
|
||||
os?: string;
|
||||
};
|
||||
}>("/api/install", async (request, reply) => {
|
||||
const { mac: rawMac, hostname, disk, role, os } = request.body ?? {};
|
||||
const mac = (rawMac ?? "").toLowerCase().replace(/-/g, ":");
|
||||
|
||||
if (mac === "") {
|
||||
return reply.status(400).send({ error: "mac is required" });
|
||||
}
|
||||
|
||||
const validRole = role ?? "worker";
|
||||
if (!(SUPPORTED_ROLES as readonly string[]).includes(validRole)) {
|
||||
return reply.status(400).send({ error: `invalid role: '${validRole}'. Supported: ${SUPPORTED_ROLES.join(", ")}` });
|
||||
}
|
||||
|
||||
const osId = os ?? "fedora-43";
|
||||
if (!isValidOsId(osId)) {
|
||||
return reply.status(400).send({ error: `invalid os: '${osId}'. Supported: fedora-43, ubuntu-26.04` });
|
||||
}
|
||||
|
||||
state.update((s) => {
|
||||
s.install_queue[mac] = {
|
||||
hostname: hostname ?? "lab-node",
|
||||
disk: disk ?? "",
|
||||
role: validRole as Role,
|
||||
os: osId,
|
||||
queued_at: new Date().toISOString(),
|
||||
};
|
||||
});
|
||||
|
||||
logger.info(`INSTALL QUEUED: ${mac} -> hostname=${hostname ?? "lab-node"} role=${validRole} os=${osId}`);
|
||||
|
||||
return reply.send({
|
||||
status: "queued",
|
||||
mac,
|
||||
hostname: hostname ?? "lab-node",
|
||||
role: validRole,
|
||||
os: osId,
|
||||
message: `PXE boot the machine to start installation (role=${validRole}, os=${osId})`,
|
||||
});
|
||||
});
|
||||
|
||||
// Receive install progress callbacks
|
||||
app.post<{
|
||||
Body: {
|
||||
mac?: string;
|
||||
stage?: string;
|
||||
detail?: string;
|
||||
};
|
||||
}>("/api/progress", async (request, reply) => {
|
||||
const { mac: rawMac, stage, detail } = request.body ?? {};
|
||||
const mac = (rawMac ?? "unknown").toLowerCase();
|
||||
const stageName = stage ?? "unknown";
|
||||
const detailStr = detail ?? "";
|
||||
|
||||
const GREEN = "\x1b[0;32m";
|
||||
const YELLOW = "\x1b[1;33m";
|
||||
const RED = "\x1b[0;31m";
|
||||
const BOLD = "\x1b[1m";
|
||||
const RESET = "\x1b[0m";
|
||||
const icons: Record<string, string> = {
|
||||
partitioning: "◆", installing: "◆◆", "post-install": "◆◆◆",
|
||||
complete: "✔", error: "✘",
|
||||
};
|
||||
const icon = icons[stageName] ?? "·";
|
||||
const color = stageName === "complete" ? GREEN : stageName === "error" ? RED : YELLOW;
|
||||
console.log(` ${color}${icon}${RESET} ${mac} ${BOLD}${stageName}${RESET}${detailStr ? ` -- ${detailStr}` : ""}`);
|
||||
|
||||
// Emit progress event for SSE clients
|
||||
const hostname = state.load().install_queue[mac]?.hostname ?? mac;
|
||||
progressBus.emit({
|
||||
mac, hostname, stage: stageName, detail: detailStr,
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
|
||||
state.update((s) => {
|
||||
const queueEntry = s.install_queue[mac];
|
||||
if (queueEntry) {
|
||||
queueEntry.progress = stageName;
|
||||
queueEntry.progress_at = new Date().toISOString();
|
||||
if (detailStr !== "") {
|
||||
queueEntry.progress_detail = detailStr;
|
||||
}
|
||||
|
||||
// Append to progress log history
|
||||
if (!queueEntry.log) queueEntry.log = [];
|
||||
queueEntry.log.push({
|
||||
stage: stageName,
|
||||
detail: detailStr,
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
|
||||
// Move to installed on completion
|
||||
if (stageName === "complete") {
|
||||
const cfg = s.install_queue[mac];
|
||||
delete s.install_queue[mac];
|
||||
|
||||
const ip = detailStr.startsWith("ready at ")
|
||||
? detailStr.replace("ready at ", "").trim()
|
||||
: "";
|
||||
|
||||
const installedInfo: InstalledInfo = {
|
||||
hostname: cfg?.hostname ?? "?",
|
||||
role: cfg?.role ?? "?",
|
||||
...(cfg?.os !== undefined ? { os: cfg.os } : {}),
|
||||
ip,
|
||||
installed_at: new Date().toISOString(),
|
||||
};
|
||||
s.installed[mac] = installedInfo;
|
||||
|
||||
const admin = installedInfo.role !== "vanilla" && installedInfo.role !== "" ? "michal" : "root";
|
||||
console.log(`\n \x1b[0;32m\x1b[1m ssh ${admin}@${ip}\x1b[0m\n`); // eslint-disable-line no-console
|
||||
|
||||
// Auto-install k3s for non-vanilla roles
|
||||
if (installedInfo.role !== "vanilla" && ip !== "") {
|
||||
void triggerPostProvisionK3s(installedInfo.hostname, ip, installedInfo.role, admin, mac);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return reply.send({ status: "ok" });
|
||||
});
|
||||
|
||||
// Receive raw log lines from kickstart scripts
|
||||
app.post<{
|
||||
Body: {
|
||||
mac?: string;
|
||||
line?: string;
|
||||
lines?: string[];
|
||||
tail?: string;
|
||||
};
|
||||
}>("/api/log", async (request, reply) => {
|
||||
const { mac: rawMac, line, lines: rawLines, tail } = request.body ?? {};
|
||||
const mac = (rawMac ?? "unknown").toLowerCase();
|
||||
|
||||
// Collect all lines from the various input formats
|
||||
const allLines: string[] = [];
|
||||
if (line) allLines.push(line);
|
||||
if (rawLines) allLines.push(...rawLines);
|
||||
if (tail) {
|
||||
// tail is a string with escaped \n — split it into lines
|
||||
allLines.push(...tail.split("\\n").filter(Boolean));
|
||||
}
|
||||
|
||||
if (allLines.length === 0) {
|
||||
return reply.send({ status: "ok", lines: 0 });
|
||||
}
|
||||
|
||||
// Look up hostname from install queue for enriching events
|
||||
const hostname = state.load().install_queue[mac]?.hostname ?? mac;
|
||||
|
||||
// Append to the install log buffer (this also emits to progressBus)
|
||||
installLog.append(mac, allLines, hostname);
|
||||
|
||||
return reply.send({ status: "ok", lines: allLines.length });
|
||||
});
|
||||
|
||||
// Delete a machine from all state
|
||||
app.delete<{
|
||||
Params: { mac: string };
|
||||
}>("/api/machines/:mac", async (request, reply) => {
|
||||
const mac = request.params.mac.toLowerCase().replace(/-/g, ":");
|
||||
|
||||
if (mac === "") {
|
||||
return reply.status(400).send({ error: "mac is required" });
|
||||
}
|
||||
|
||||
let found = false;
|
||||
state.update((s) => {
|
||||
if (s.discovered[mac] !== undefined) {
|
||||
delete s.discovered[mac];
|
||||
found = true;
|
||||
}
|
||||
if (s.install_queue[mac] !== undefined) {
|
||||
delete s.install_queue[mac];
|
||||
found = true;
|
||||
}
|
||||
if (s.installed[mac] !== undefined) {
|
||||
delete s.installed[mac];
|
||||
found = true;
|
||||
}
|
||||
});
|
||||
|
||||
if (!found) {
|
||||
return reply.status(404).send({ error: "machine not found", mac });
|
||||
}
|
||||
|
||||
logger.info(`MACHINE FORGOTTEN: ${mac}`);
|
||||
return reply.send({ status: "forgotten", mac });
|
||||
});
|
||||
|
||||
// Receive discovery reports
|
||||
app.post<{
|
||||
Body: {
|
||||
mac?: string;
|
||||
product?: string;
|
||||
board?: string;
|
||||
serial?: string;
|
||||
manufacturer?: string;
|
||||
cpu_model?: string;
|
||||
cpu_cores?: number;
|
||||
memory_gb?: number;
|
||||
arch?: string;
|
||||
disks?: Array<{ name: string; size_gb: number; model: string }>;
|
||||
nics?: Array<{ name: string; mac: string; state: string }>;
|
||||
};
|
||||
}>("/api/discover", async (request, reply) => {
|
||||
const data = request.body;
|
||||
if (data === null || data === undefined) {
|
||||
return reply.status(400).send({ error: "invalid JSON" });
|
||||
}
|
||||
|
||||
const mac = (data.mac ?? "unknown").toLowerCase();
|
||||
const now = new Date().toISOString();
|
||||
|
||||
const isNew = state.load().discovered[mac] === undefined;
|
||||
|
||||
state.update((s) => {
|
||||
const existing = s.discovered[mac];
|
||||
const hwInfo: HardwareInfo = {
|
||||
mac,
|
||||
product: data.product ?? "unknown",
|
||||
board: data.board ?? "unknown",
|
||||
serial: data.serial ?? "unknown",
|
||||
manufacturer: data.manufacturer ?? "unknown",
|
||||
cpu_model: data.cpu_model ?? "unknown",
|
||||
cpu_cores: data.cpu_cores ?? 0,
|
||||
memory_gb: data.memory_gb ?? 0,
|
||||
arch: data.arch ?? "unknown",
|
||||
disks: data.disks ?? [],
|
||||
nics: data.nics ?? [],
|
||||
first_seen: existing?.first_seen ?? now,
|
||||
last_seen: now,
|
||||
};
|
||||
s.discovered[mac] = hwInfo;
|
||||
});
|
||||
|
||||
const label = isNew ? "NEW MACHINE DISCOVERED" : "MACHINE RE-DISCOVERED";
|
||||
const cpu = data.cpu_model ?? "?";
|
||||
const cores = data.cpu_cores ?? "?";
|
||||
const mem = data.memory_gb ?? "?";
|
||||
logger.info(`${label}: ${mac} -- ${data.manufacturer ?? "?"} ${data.product ?? "?"} (${cpu}, ${cores} cores, ${mem}GB RAM)`);
|
||||
|
||||
return reply.send({ status: "ok", mac, new: isNew });
|
||||
});
|
||||
|
||||
// Update a machine's role (e.g. promote infra -> labcontroller)
|
||||
app.post<{
|
||||
Body: {
|
||||
mac?: string;
|
||||
role?: string;
|
||||
};
|
||||
}>("/api/role", async (request, reply) => {
|
||||
const { mac: rawMac, role } = request.body ?? {};
|
||||
const mac = (rawMac ?? "").toLowerCase().replace(/-/g, ":");
|
||||
|
||||
if (mac === "") {
|
||||
return reply.status(400).send({ error: "mac is required" });
|
||||
}
|
||||
if (!role) {
|
||||
return reply.status(400).send({ error: "role is required" });
|
||||
}
|
||||
|
||||
let found = false;
|
||||
state.update((s) => {
|
||||
if (s.installed[mac]) {
|
||||
const oldRole = s.installed[mac].role;
|
||||
s.installed[mac].role = role;
|
||||
found = true;
|
||||
logger.info(`ROLE UPDATED: ${mac} (${s.installed[mac].hostname}) ${oldRole} -> ${role}`);
|
||||
}
|
||||
});
|
||||
|
||||
if (!found) {
|
||||
return reply.status(404).send({ error: "machine not found in installed state", mac });
|
||||
}
|
||||
|
||||
return reply.send({ status: "updated", mac, role });
|
||||
});
|
||||
|
||||
// Get provision logs for a machine (current state snapshot + raw log lines)
|
||||
app.get<{
|
||||
Params: { mac: string };
|
||||
Querystring: { lines?: string; offset?: string };
|
||||
}>("/api/logs/:mac", async (request, reply) => {
|
||||
const mac = request.params.mac.toLowerCase().replace(/-/g, ":");
|
||||
const logLimit = parseInt(request.query.lines ?? "200", 10);
|
||||
const logOffset = parseInt(request.query.offset ?? "0", 10);
|
||||
const currentState = state.load();
|
||||
|
||||
const queueEntry = currentState.install_queue[mac];
|
||||
const installedEntry = currentState.installed[mac];
|
||||
|
||||
if (queueEntry) {
|
||||
return reply.send({
|
||||
mac,
|
||||
hostname: queueEntry.hostname,
|
||||
status: "installing",
|
||||
progress: queueEntry.progress ?? "queued",
|
||||
progress_detail: queueEntry.progress_detail ?? "",
|
||||
progress_at: queueEntry.progress_at ?? queueEntry.queued_at,
|
||||
role: queueEntry.role,
|
||||
os: queueEntry.os,
|
||||
stages: queueEntry.log ?? [],
|
||||
log_lines: installLog.getLines(mac, logOffset, logLimit),
|
||||
log_total: installLog.lineCount(mac),
|
||||
});
|
||||
}
|
||||
if (installedEntry) {
|
||||
return reply.send({
|
||||
mac,
|
||||
hostname: installedEntry.hostname,
|
||||
status: "installed",
|
||||
progress: "complete",
|
||||
progress_detail: `ready at ${installedEntry.ip}`,
|
||||
progress_at: installedEntry.installed_at,
|
||||
role: installedEntry.role,
|
||||
ip: installedEntry.ip,
|
||||
log_lines: installLog.getLines(mac, logOffset, logLimit),
|
||||
log_total: installLog.lineCount(mac),
|
||||
});
|
||||
}
|
||||
|
||||
return reply.status(404).send({ error: "machine not found", mac });
|
||||
});
|
||||
|
||||
// SSE stream: follow provision progress for a machine (or all machines)
|
||||
app.get<{
|
||||
Params: { mac: string };
|
||||
}>("/api/logs/:mac/follow", async (request, reply) => {
|
||||
const filterMac = request.params.mac === "all"
|
||||
? null
|
||||
: request.params.mac.toLowerCase().replace(/-/g, ":");
|
||||
|
||||
void reply.raw.writeHead(200, {
|
||||
"Content-Type": "text/event-stream",
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
});
|
||||
|
||||
// Send current state as first event
|
||||
const currentState = state.load();
|
||||
const queueEntry = filterMac ? currentState.install_queue[filterMac] : undefined;
|
||||
if (queueEntry) {
|
||||
const initData = JSON.stringify({
|
||||
mac: filterMac, hostname: queueEntry.hostname,
|
||||
stage: queueEntry.progress ?? "queued",
|
||||
detail: queueEntry.progress_detail ?? "",
|
||||
timestamp: queueEntry.progress_at ?? queueEntry.queued_at,
|
||||
});
|
||||
reply.raw.write(`data: ${initData}\n\n`);
|
||||
}
|
||||
|
||||
const onProgress = (event: ProgressEvent): void => {
|
||||
if (filterMac && event.mac !== filterMac) return;
|
||||
// Use SSE event types so clients can filter: "stage" for progress, "log" for raw lines
|
||||
const eventType = event.stage === "log" ? "log" : "stage";
|
||||
reply.raw.write(`event: ${eventType}\ndata: ${JSON.stringify(event)}\n\n`);
|
||||
};
|
||||
|
||||
progressBus.on(onProgress);
|
||||
|
||||
request.raw.on("close", () => {
|
||||
progressBus.off(onProgress);
|
||||
});
|
||||
});
|
||||
}
|
||||
249
bastion/src/bastion/src/routes/boot-iso.ts
Normal file
249
bastion/src/bastion/src/routes/boot-iso.ts
Normal file
@@ -0,0 +1,249 @@
|
||||
// Boot ISO generation.
|
||||
// Generates a UEFI-bootable iPXE ISO using xorriso+mtools.
|
||||
// The ISO is placed in httpDir so @fastify/static serves it with Range request
|
||||
// support (required by JetKVM, which streams via HTTP Range + NBD).
|
||||
//
|
||||
// The ISO embeds kernel + initrd so machines without UEFI NIC support
|
||||
// (no SNP protocol) can still boot. iPXE loads them from file:/ and the
|
||||
// Linux kernel handles networking with its own drivers.
|
||||
|
||||
import { createHash } from "node:crypto";
|
||||
import { execSync } from "node:child_process";
|
||||
import { existsSync, readFileSync, statSync, writeFileSync, mkdirSync, rmSync, unlinkSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { tmpdir } from "node:os";
|
||||
import type { BastionConfig } from "@lab/shared";
|
||||
import { logger } from "../services/logger.js";
|
||||
|
||||
// iPXE SNP variant (scans all UEFI SNP handles, works from CD-ROM/USB boot).
|
||||
const IPXE_ISO_PATHS: Record<string, { src: string[]; efiName: string }> = {
|
||||
x86_64: {
|
||||
src: [
|
||||
"/usr/share/ipxe/ipxe-snp-x86_64.efi",
|
||||
"/usr/share/ipxe/ipxe-x86_64.efi",
|
||||
],
|
||||
efiName: "BOOTX64.EFI",
|
||||
},
|
||||
aarch64: {
|
||||
src: [
|
||||
"/usr/share/ipxe/arm64-efi/ipxe-snp.efi",
|
||||
"/usr/share/ipxe/arm64-efi/ipxe.efi",
|
||||
],
|
||||
efiName: "BOOTAA64.EFI",
|
||||
},
|
||||
};
|
||||
|
||||
// Fedora PXE kernel/initrd paths per architecture
|
||||
const FEDORA_MIRROR_BASE = "https://download.fedoraproject.org/pub/fedora/linux/releases";
|
||||
|
||||
interface BootPayload {
|
||||
arch: string;
|
||||
vmlinuz: string;
|
||||
initrd: string;
|
||||
}
|
||||
|
||||
function downloadIfMissing(url: string, dest: string, label: string): void {
|
||||
if (existsSync(dest)) {
|
||||
logger.info(` ${label} -- cached`);
|
||||
return;
|
||||
}
|
||||
logger.info(` ${label} -- downloading...`);
|
||||
execSync(`curl -# -L -f -o "${dest}" "${url}"`, { stdio: "inherit" });
|
||||
}
|
||||
|
||||
function generateIso(config: BastionConfig, outputPath: string): void {
|
||||
const work = join(tmpdir(), `bastion-iso-${process.pid}`);
|
||||
mkdirSync(join(work, "EFI", "BOOT"), { recursive: true });
|
||||
|
||||
const bastionUrl = `http://${config.serverIp}:${config.httpPort}`;
|
||||
|
||||
// Copy available iPXE EFI binaries
|
||||
const archs: string[] = [];
|
||||
for (const [arch, paths] of Object.entries(IPXE_ISO_PATHS)) {
|
||||
const srcFile = paths.src.find((s) => existsSync(s));
|
||||
if (srcFile) {
|
||||
execSync(`cp "${srcFile}" "${join(work, "EFI", "BOOT", paths.efiName)}"`, { stdio: "pipe" });
|
||||
archs.push(arch);
|
||||
logger.info(` iPXE ISO ${arch}: ${srcFile}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (archs.length === 0) throw new Error("No iPXE EFI binaries found");
|
||||
|
||||
// Download and stage kernel/initrd for each architecture.
|
||||
// These are embedded in the ISO so machines without UEFI NIC support
|
||||
// can boot the Linux installer (which has its own NIC drivers).
|
||||
const cacheDir = join(config.bastionDir, "iso-cache");
|
||||
mkdirSync(cacheDir, { recursive: true });
|
||||
|
||||
const payloads: BootPayload[] = [];
|
||||
for (const arch of ["x86_64", "aarch64"]) {
|
||||
const mirror = `${FEDORA_MIRROR_BASE}/${config.fedoraVersion}/Everything/${arch}/os`;
|
||||
const vmlinuzCache = join(cacheDir, `vmlinuz-${arch}`);
|
||||
const initrdCache = join(cacheDir, `initrd-${arch}`);
|
||||
|
||||
try {
|
||||
downloadIfMissing(
|
||||
`${mirror}/images/pxeboot/vmlinuz`,
|
||||
vmlinuzCache,
|
||||
`Fedora ${arch} kernel`,
|
||||
);
|
||||
downloadIfMissing(
|
||||
`${mirror}/images/pxeboot/initrd.img`,
|
||||
initrdCache,
|
||||
`Fedora ${arch} initrd`,
|
||||
);
|
||||
payloads.push({ arch, vmlinuz: vmlinuzCache, initrd: initrdCache });
|
||||
} catch {
|
||||
logger.warn(` Fedora ${arch} kernel/initrd not available -- skipping`);
|
||||
}
|
||||
}
|
||||
|
||||
// Write iPXE autoexec script.
|
||||
// Strategy: try DHCP (for machines with UEFI NIC support), then fall back
|
||||
// to booting the embedded kernel/initrd from the ISO filesystem.
|
||||
// iPXE's ${buildarch} resolves to "x86_64" or "arm64".
|
||||
const ipxeScript = [
|
||||
"#!ipxe",
|
||||
"",
|
||||
"echo",
|
||||
"echo =============================================",
|
||||
"echo Lab PXE Bastion -- ISO Boot",
|
||||
"echo =============================================",
|
||||
"echo",
|
||||
"",
|
||||
"# Try DHCP (works if UEFI has NIC driver / SNP support)",
|
||||
"set attempts:int32 0",
|
||||
":retry",
|
||||
"dhcp && goto netboot ||",
|
||||
"inc attempts",
|
||||
"iseq ${attempts} 3 || goto retry_wait",
|
||||
"goto localboot",
|
||||
":retry_wait",
|
||||
"echo DHCP failed (attempt ${attempts}/3), retrying...",
|
||||
"sleep 2",
|
||||
"goto retry",
|
||||
"",
|
||||
"# Network available -- chain to bastion for dynamic dispatch",
|
||||
":netboot",
|
||||
"echo Network OK. Chaining to bastion...",
|
||||
`chain ${bastionUrl}/boot.ipxe || shell`,
|
||||
"",
|
||||
"# No network -- boot embedded kernel (Linux has its own NIC drivers)",
|
||||
":localboot",
|
||||
"echo No UEFI network support. Booting embedded installer...",
|
||||
"echo Linux will configure networking with its own drivers.",
|
||||
"echo",
|
||||
"# Map iPXE arch names to Fedora mirror paths (arm64 -> aarch64)",
|
||||
"set fedarch ${buildarch}",
|
||||
"iseq ${buildarch} arm64 && set fedarch aarch64 ||",
|
||||
`kernel file:/vmlinuz-\${buildarch} inst.ks=${bastionUrl}/discover.ks inst.repo=${FEDORA_MIRROR_BASE}/${config.fedoraVersion}/Everything/\${fedarch}/os inst.text || goto no_kernel`,
|
||||
`initrd file:/initrd-\${buildarch} || goto no_kernel`,
|
||||
"boot || shell",
|
||||
"",
|
||||
":no_kernel",
|
||||
"echo ERROR: kernel not found for this architecture. Dropping to shell.",
|
||||
"shell",
|
||||
].join("\n");
|
||||
|
||||
writeFileSync(join(work, "autoexec.ipxe"), ipxeScript);
|
||||
|
||||
// Calculate EFI partition size: iPXE binaries + autoexec + kernel/initrd + margin
|
||||
let payloadSize = 2 * 1024 * 1024; // 2MB base for iPXE + autoexec + FAT overhead
|
||||
for (const p of payloads) {
|
||||
payloadSize += statSync(p.vmlinuz).size;
|
||||
payloadSize += statSync(p.initrd).size;
|
||||
}
|
||||
const efiSizeMB = Math.ceil(payloadSize / (1024 * 1024)) + 4; // +4MB margin
|
||||
logger.info(` EFI partition: ${efiSizeMB}MB (${payloads.length} arch payloads)`);
|
||||
|
||||
// Create FAT EFI system partition
|
||||
const efiImg = join(work, "efi.img");
|
||||
execSync(`dd if=/dev/zero of="${efiImg}" bs=1M count=${efiSizeMB} 2>/dev/null`, { stdio: "pipe" });
|
||||
execSync(`mformat -i "${efiImg}" -v LABBOOT ::`, { stdio: "pipe" });
|
||||
execSync(`mmd -i "${efiImg}" ::/EFI`, { stdio: "pipe" });
|
||||
execSync(`mmd -i "${efiImg}" ::/EFI/BOOT`, { stdio: "pipe" });
|
||||
|
||||
for (const arch of archs) {
|
||||
const paths = IPXE_ISO_PATHS[arch]!;
|
||||
execSync(`mcopy -i "${efiImg}" "${join(work, "EFI", "BOOT", paths.efiName)}" ::/EFI/BOOT/${paths.efiName}`, { stdio: "pipe" });
|
||||
}
|
||||
execSync(`mcopy -i "${efiImg}" "${join(work, "autoexec.ipxe")}" ::/autoexec.ipxe`, { stdio: "pipe" });
|
||||
|
||||
// Copy kernel/initrd onto EFI partition with arch-specific names
|
||||
for (const p of payloads) {
|
||||
// iPXE ${buildarch} returns "x86_64" or "arm64"
|
||||
const archLabel = p.arch === "aarch64" ? "arm64" : p.arch;
|
||||
execSync(`mcopy -i "${efiImg}" "${p.vmlinuz}" ::/vmlinuz-${archLabel}`, { stdio: "pipe" });
|
||||
execSync(`mcopy -i "${efiImg}" "${p.initrd}" ::/initrd-${archLabel}`, { stdio: "pipe" });
|
||||
logger.info(` Embedded ${archLabel}: vmlinuz + initrd`);
|
||||
}
|
||||
|
||||
// Build hybrid ISO: El Torito EFI boot + GPT EFI partition
|
||||
execSync([
|
||||
`xorriso -as mkisofs`,
|
||||
`-o "${outputPath}"`,
|
||||
`-R`,
|
||||
`-V LAB_BOOT`,
|
||||
`-e efi.img`,
|
||||
`-no-emul-boot`,
|
||||
`-partition_offset 16`,
|
||||
`-append_partition 2 0xEF "${efiImg}"`,
|
||||
`-appended_part_as_gpt`,
|
||||
`"${work}"`,
|
||||
].join(" "), { stdio: "pipe" });
|
||||
|
||||
rmSync(work, { recursive: true, force: true });
|
||||
logger.info(`Generated boot ISO (${archs.join(", ")}): ${outputPath}`);
|
||||
}
|
||||
|
||||
/** Compute a short hash of all inputs that affect ISO content. */
|
||||
function computeIsoHash(config: BastionConfig): string {
|
||||
const h = createHash("sha256");
|
||||
h.update(`${config.serverIp}:${config.httpPort}`);
|
||||
h.update(config.fedoraVersion);
|
||||
for (const paths of Object.values(IPXE_ISO_PATHS)) {
|
||||
const srcFile = paths.src.find((s) => existsSync(s));
|
||||
if (srcFile) {
|
||||
const st = statSync(srcFile);
|
||||
h.update(`${srcFile}:${st.size}:${st.mtimeMs}`);
|
||||
}
|
||||
}
|
||||
// Include kernel/initrd cache state
|
||||
const cacheDir = join(config.bastionDir, "iso-cache");
|
||||
for (const arch of ["x86_64", "aarch64"]) {
|
||||
const vmlinuz = join(cacheDir, `vmlinuz-${arch}`);
|
||||
if (existsSync(vmlinuz)) {
|
||||
const st = statSync(vmlinuz);
|
||||
h.update(`${vmlinuz}:${st.size}`);
|
||||
}
|
||||
}
|
||||
return h.digest("hex").slice(0, 16);
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure boot.iso exists and is up-to-date in httpDir.
|
||||
* Called during startup so @fastify/static can serve it with Range support.
|
||||
*/
|
||||
export function ensureBootIso(config: BastionConfig): void {
|
||||
const isoPath = join(config.httpDir, "boot.iso");
|
||||
const hashPath = join(config.httpDir, "boot.iso.hash");
|
||||
|
||||
const currentHash = computeIsoHash(config);
|
||||
const cachedHash = existsSync(hashPath) ? readFileSync(hashPath, "utf-8").trim() : "";
|
||||
|
||||
if (existsSync(isoPath) && currentHash === cachedHash) {
|
||||
logger.info(" Boot ISO -- cached (up to date)");
|
||||
return;
|
||||
}
|
||||
|
||||
if (existsSync(isoPath)) {
|
||||
logger.info(" Boot ISO -- inputs changed, regenerating...");
|
||||
try { unlinkSync(isoPath); } catch { /* ignore */ }
|
||||
} else {
|
||||
logger.info(" Boot ISO -- generating...");
|
||||
}
|
||||
|
||||
generateIso(config, isoPath);
|
||||
writeFileSync(hashPath, currentHash);
|
||||
}
|
||||
77
bastion/src/bastion/src/routes/dispatch.ts
Normal file
77
bastion/src/bastion/src/routes/dispatch.ts
Normal file
@@ -0,0 +1,77 @@
|
||||
// iPXE dispatch route.
|
||||
// Routes PXE boot requests based on machine state:
|
||||
// - install_queue -> install mode (serve Fedora installer + per-MAC kickstart)
|
||||
// - installed -> exit (boot from local disk)
|
||||
// - unknown -> discovery mode (collect hardware, POST to bastion)
|
||||
|
||||
import type { FastifyInstance } from "fastify";
|
||||
import type { BastionConfig } from "@lab/shared";
|
||||
import type { StateManager } from "../services/state.js";
|
||||
import {
|
||||
renderDiscoverIpxe,
|
||||
renderInstallIpxe,
|
||||
renderLocalBootIpxe,
|
||||
} from "../templates/boot.ipxe.js";
|
||||
import { renderUbuntuInstallIpxe } from "../templates/ubuntu-boot.ipxe.js";
|
||||
import { logger } from "../services/logger.js";
|
||||
|
||||
export function registerDispatchRoutes(
|
||||
app: FastifyInstance,
|
||||
config: BastionConfig,
|
||||
state: StateManager,
|
||||
): void {
|
||||
app.get<{ Querystring: { mac?: string } }>("/dispatch", async (request, reply) => {
|
||||
const mac = (request.query.mac ?? "").toLowerCase().replace(/-/g, ":");
|
||||
const currentState = state.load();
|
||||
|
||||
const queueEntry = currentState.install_queue[mac];
|
||||
if (queueEntry) {
|
||||
const hostname = queueEntry.hostname ?? "lab-node";
|
||||
const os = queueEntry.os ?? "fedora-43";
|
||||
logger.info(`INSTALL STARTED: ${mac} -> ${hostname} (${os})`);
|
||||
|
||||
let script: string;
|
||||
if (os.startsWith("ubuntu")) {
|
||||
script = renderUbuntuInstallIpxe({
|
||||
mac,
|
||||
hostname,
|
||||
serverIp: config.serverIp,
|
||||
httpPort: config.httpPort,
|
||||
ubuntuVersion: config.ubuntuVersion,
|
||||
});
|
||||
} else {
|
||||
script = renderInstallIpxe({
|
||||
mac,
|
||||
hostname,
|
||||
serverIp: config.serverIp,
|
||||
httpPort: config.httpPort,
|
||||
fedoraVersion: config.fedoraVersion,
|
||||
fedoraMirror: config.fedoraMirror,
|
||||
});
|
||||
}
|
||||
|
||||
return reply.type("text/plain").send(script);
|
||||
}
|
||||
|
||||
const installedEntry = currentState.installed[mac];
|
||||
if (installedEntry) {
|
||||
const hostname = installedEntry.hostname ?? "?";
|
||||
logger.info(`PXE request from ${mac} (${hostname}) - already installed, booting local disk`);
|
||||
|
||||
const script = renderLocalBootIpxe(hostname);
|
||||
return reply.type("text/plain").send(script);
|
||||
}
|
||||
|
||||
// Unknown MAC -> discovery mode
|
||||
logger.info(`PXE request from ${mac} -> discovery mode`);
|
||||
|
||||
const script = renderDiscoverIpxe({
|
||||
mac,
|
||||
serverIp: config.serverIp,
|
||||
httpPort: config.httpPort,
|
||||
fedoraMirror: config.fedoraMirror,
|
||||
});
|
||||
|
||||
return reply.type("text/plain").send(script);
|
||||
});
|
||||
}
|
||||
71
bastion/src/bastion/src/routes/kickstart.ts
Normal file
71
bastion/src/bastion/src/routes/kickstart.ts
Normal file
@@ -0,0 +1,71 @@
|
||||
// Kickstart generation routes.
|
||||
// Serves per-MAC install kickstart, static discovery kickstart,
|
||||
// and Ubuntu autoinstall cloud-init endpoints.
|
||||
|
||||
import type { FastifyInstance } from "fastify";
|
||||
import type { BastionConfig } from "@lab/shared";
|
||||
import type { StateManager } from "../services/state.js";
|
||||
import { generateInstallKickstart, generateDiscoverKickstart } from "../services/kickstart-generator.js";
|
||||
import { renderUbuntuAutoinstall, renderUbuntuMetaData, type UbuntuAutoinstallParams } from "../templates/ubuntu-autoinstall.js";
|
||||
|
||||
export function registerKickstartRoutes(
|
||||
app: FastifyInstance,
|
||||
config: BastionConfig,
|
||||
state: StateManager,
|
||||
): void {
|
||||
// Per-MAC install kickstart
|
||||
app.get<{ Querystring: { mac?: string } }>("/ks", async (request, reply) => {
|
||||
const mac = (request.query.mac ?? "").toLowerCase().replace(/-/g, ":");
|
||||
const currentState = state.load();
|
||||
const queueEntry = currentState.install_queue[mac];
|
||||
|
||||
const ks = generateInstallKickstart(config, {
|
||||
hostname: queueEntry?.hostname ?? "lab-node",
|
||||
disk: queueEntry?.disk ?? "",
|
||||
role: queueEntry?.role ?? "worker",
|
||||
});
|
||||
|
||||
return reply.type("text/plain").send(ks);
|
||||
});
|
||||
|
||||
// Static discovery kickstart
|
||||
app.get("/discover.ks", async (_request, reply) => {
|
||||
const ks = generateDiscoverKickstart(config);
|
||||
return reply.type("text/plain").send(ks);
|
||||
});
|
||||
|
||||
// Ubuntu autoinstall user-data (cloud-init)
|
||||
app.get<{ Params: { mac: string } }>("/autoinstall/:mac/user-data", async (request, reply) => {
|
||||
const mac = request.params.mac.toLowerCase().replace(/-/g, ":");
|
||||
const currentState = state.load();
|
||||
const queueEntry = currentState.install_queue[mac];
|
||||
|
||||
const aiParams: UbuntuAutoinstallParams = {
|
||||
hostname: queueEntry?.hostname ?? "lab-node",
|
||||
disk: queueEntry?.disk ?? "",
|
||||
role: queueEntry?.role ?? "worker",
|
||||
domain: config.domain,
|
||||
ubuntuVersion: config.ubuntuVersion,
|
||||
timezone: config.timezone,
|
||||
locale: config.locale,
|
||||
serverIp: config.serverIp,
|
||||
httpPort: config.httpPort,
|
||||
sshKeys: config.sshKeys,
|
||||
adminUser: config.adminUser,
|
||||
};
|
||||
|
||||
const userData = renderUbuntuAutoinstall(aiParams);
|
||||
return reply.type("text/plain").send(userData);
|
||||
});
|
||||
|
||||
// Ubuntu autoinstall meta-data (cloud-init)
|
||||
app.get<{ Params: { mac: string } }>("/autoinstall/:mac/meta-data", async (request, reply) => {
|
||||
const mac = request.params.mac.toLowerCase().replace(/-/g, ":");
|
||||
const currentState = state.load();
|
||||
const queueEntry = currentState.install_queue[mac];
|
||||
const hostname = queueEntry?.hostname ?? "lab-node";
|
||||
|
||||
const metaData = renderUbuntuMetaData(hostname);
|
||||
return reply.type("text/plain").send(metaData);
|
||||
});
|
||||
}
|
||||
69
bastion/src/bastion/src/server.ts
Normal file
69
bastion/src/bastion/src/server.ts
Normal file
@@ -0,0 +1,69 @@
|
||||
// Fastify application setup with all routes registered.
|
||||
|
||||
import Fastify from "fastify";
|
||||
import fastifyStatic from "@fastify/static";
|
||||
import { mkdirSync, existsSync } from "node:fs";
|
||||
import type { BastionConfig } from "@lab/shared";
|
||||
import { StateManager } from "./services/state.js";
|
||||
import { InstallLogBuffer } from "./services/install-log.js";
|
||||
import { SyslogListener } from "./services/syslog-listener.js";
|
||||
import { logger } from "./services/logger.js";
|
||||
import { registerDispatchRoutes } from "./routes/dispatch.js";
|
||||
import { registerKickstartRoutes } from "./routes/kickstart.js";
|
||||
import { registerApiRoutes } from "./routes/api.js";
|
||||
|
||||
|
||||
export function createApp(config: BastionConfig): { app: ReturnType<typeof Fastify>; state: StateManager; installLog: InstallLogBuffer; syslog: SyslogListener } {
|
||||
const app = Fastify({
|
||||
logger: false, // We use winston instead
|
||||
});
|
||||
|
||||
const state = new StateManager(config.stateFile);
|
||||
state.init();
|
||||
|
||||
const installLog = new InstallLogBuffer(config.bastionDir);
|
||||
const syslog = new SyslogListener(config.syslogPort, installLog, state);
|
||||
|
||||
// Serve static files (vmlinuz, initrd.img, iPXE binaries) from the HTTP directory
|
||||
mkdirSync(config.httpDir, { recursive: true });
|
||||
app.register(fastifyStatic, {
|
||||
root: config.httpDir,
|
||||
prefix: "/",
|
||||
decorateReply: false,
|
||||
});
|
||||
|
||||
// Also serve TFTP files (iPXE EFI binaries) over HTTP for UEFI HTTP Boot
|
||||
if (existsSync(config.tftpDir)) {
|
||||
app.register(fastifyStatic, {
|
||||
root: config.tftpDir,
|
||||
prefix: "/tftp/",
|
||||
decorateReply: false,
|
||||
});
|
||||
}
|
||||
|
||||
// Register route handlers
|
||||
registerDispatchRoutes(app, config, state);
|
||||
registerKickstartRoutes(app, config, state);
|
||||
registerApiRoutes(app, state, installLog);
|
||||
// boot.iso is generated at startup and served as a static file from httpDir
|
||||
// (static serving supports HTTP Range requests, required by JetKVM streaming)
|
||||
|
||||
// Log all requests
|
||||
app.addHook("onRequest", async (request) => {
|
||||
logger.info(`HTTP: ${request.ip} ${request.method} ${request.url}`);
|
||||
});
|
||||
|
||||
return { app, state, installLog, syslog };
|
||||
}
|
||||
|
||||
export async function startServer(config: BastionConfig): Promise<void> {
|
||||
const { app } = createApp(config);
|
||||
|
||||
try {
|
||||
await app.listen({ port: config.httpPort, host: "0.0.0.0" });
|
||||
logger.info(`HTTP server listening on :${config.httpPort}`);
|
||||
} catch (err) {
|
||||
logger.error(`Failed to start HTTP server: ${err instanceof Error ? err.message : String(err)}`);
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
70
bastion/src/bastion/src/services/dnsmasq.ts
Normal file
70
bastion/src/bastion/src/services/dnsmasq.ts
Normal file
@@ -0,0 +1,70 @@
|
||||
// Generate dnsmasq configuration and manage the dnsmasq process lifecycle.
|
||||
|
||||
import { writeFileSync, mkdirSync } from "node:fs";
|
||||
import { dirname } from "node:path";
|
||||
import type { ResultPromise } from "execa";
|
||||
import { execa } from "execa";
|
||||
import type { BastionConfig } from "@lab/shared";
|
||||
import { renderDnsmasqConf } from "../templates/dnsmasq.conf.js";
|
||||
import { logger } from "./logger.js";
|
||||
|
||||
type DnsmasqProcess = ResultPromise<{ stdout: "pipe"; stderr: "pipe" }>;
|
||||
let dnsmasqProcess: DnsmasqProcess | null = null;
|
||||
|
||||
/**
|
||||
* Generate the dnsmasq.conf file from the current configuration.
|
||||
*/
|
||||
export function generateDnsmasqConf(config: BastionConfig): string {
|
||||
const confPath = `${config.bastionDir}/dnsmasq.conf`;
|
||||
const content = renderDnsmasqConf(config);
|
||||
mkdirSync(dirname(confPath), { recursive: true });
|
||||
writeFileSync(confPath, content);
|
||||
logger.info(`Generated dnsmasq config: ${confPath}`);
|
||||
return confPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start dnsmasq in the foreground as a child process.
|
||||
*/
|
||||
export async function startDnsmasq(config: BastionConfig): Promise<DnsmasqProcess> {
|
||||
const confPath = generateDnsmasqConf(config);
|
||||
|
||||
logger.info(`Starting PXE server (${config.dhcpMode}DHCP on ${config.iface})...`);
|
||||
|
||||
const proc = execa("dnsmasq", ["--no-daemon", `--conf-file=${confPath}`], {
|
||||
stdout: "pipe",
|
||||
stderr: "pipe",
|
||||
});
|
||||
|
||||
dnsmasqProcess = proc;
|
||||
|
||||
proc.stdout?.on("data", (data: Buffer) => {
|
||||
const line = data.toString().trim();
|
||||
if (line) logger.info(`dnsmasq: ${line}`);
|
||||
});
|
||||
|
||||
proc.stderr?.on("data", (data: Buffer) => {
|
||||
const line = data.toString().trim();
|
||||
if (line) logger.info(`dnsmasq: ${line}`);
|
||||
});
|
||||
|
||||
proc.on("exit", (code) => {
|
||||
if (code !== null && code !== 0) {
|
||||
logger.error(`dnsmasq exited with code ${code}. Check if another DHCP/TFTP service is running.`);
|
||||
}
|
||||
dnsmasqProcess = null;
|
||||
});
|
||||
|
||||
return proc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop the running dnsmasq process.
|
||||
*/
|
||||
export function stopDnsmasq(): void {
|
||||
if (dnsmasqProcess) {
|
||||
logger.info("Stopping dnsmasq...");
|
||||
dnsmasqProcess.kill("SIGTERM");
|
||||
dnsmasqProcess = null;
|
||||
}
|
||||
}
|
||||
86
bastion/src/bastion/src/services/install-log.ts
Normal file
86
bastion/src/bastion/src/services/install-log.ts
Normal file
@@ -0,0 +1,86 @@
|
||||
// Per-machine install log buffer.
|
||||
// Stores raw log lines in memory (ring buffer) and persists to disk.
|
||||
// Used by /api/log for ingestion and /api/logs/:mac/follow for SSE streaming.
|
||||
|
||||
import { mkdirSync, appendFileSync, readFileSync, existsSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { progressBus } from "./progress-events.js";
|
||||
|
||||
const MAX_LINES_IN_MEMORY = 2000;
|
||||
|
||||
export interface LogLine {
|
||||
line: string;
|
||||
timestamp: string;
|
||||
}
|
||||
|
||||
export class InstallLogBuffer {
|
||||
/** In-memory ring buffer per MAC */
|
||||
private buffers = new Map<string, LogLine[]>();
|
||||
private logDir: string;
|
||||
|
||||
constructor(bastionDir: string) {
|
||||
this.logDir = join(bastionDir, "logs");
|
||||
mkdirSync(this.logDir, { recursive: true });
|
||||
}
|
||||
|
||||
/** Append log lines for a machine. Stores in memory + appends to file. */
|
||||
append(mac: string, lines: string[], hostname?: string): void {
|
||||
const now = new Date().toISOString();
|
||||
const buffer = this.buffers.get(mac) ?? [];
|
||||
|
||||
const newEntries: LogLine[] = lines.map((line) => ({ line, timestamp: now }));
|
||||
buffer.push(...newEntries);
|
||||
|
||||
// Trim to ring buffer size
|
||||
if (buffer.length > MAX_LINES_IN_MEMORY) {
|
||||
buffer.splice(0, buffer.length - MAX_LINES_IN_MEMORY);
|
||||
}
|
||||
|
||||
this.buffers.set(mac, buffer);
|
||||
|
||||
// Persist to file
|
||||
const filePath = this.logFilePath(mac);
|
||||
const fileContent = lines.map((l) => `${now} ${l}`).join("\n") + "\n";
|
||||
appendFileSync(filePath, fileContent);
|
||||
|
||||
// Emit to SSE via progressBus (use "log" stage for log lines)
|
||||
const host = hostname ?? mac;
|
||||
for (const line of lines) {
|
||||
progressBus.emit({
|
||||
mac,
|
||||
hostname: host,
|
||||
stage: "log",
|
||||
detail: line,
|
||||
timestamp: now,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/** Get buffered log lines for a machine. */
|
||||
getLines(mac: string, offset = 0, limit = 500): LogLine[] {
|
||||
const buffer = this.buffers.get(mac) ?? [];
|
||||
return buffer.slice(offset, offset + limit);
|
||||
}
|
||||
|
||||
/** Get total line count for a machine. */
|
||||
lineCount(mac: string): number {
|
||||
return this.buffers.get(mac)?.length ?? 0;
|
||||
}
|
||||
|
||||
/** Read full log from disk (for machines no longer in memory). */
|
||||
readFromDisk(mac: string): string | null {
|
||||
const filePath = this.logFilePath(mac);
|
||||
if (!existsSync(filePath)) return null;
|
||||
return readFileSync(filePath, "utf-8");
|
||||
}
|
||||
|
||||
/** Clear log for a machine (after install complete or forget). */
|
||||
clear(mac: string): void {
|
||||
this.buffers.delete(mac);
|
||||
}
|
||||
|
||||
private logFilePath(mac: string): string {
|
||||
// Replace colons with dashes for filesystem safety
|
||||
return join(this.logDir, `${mac.replace(/:/g, "-")}.log`);
|
||||
}
|
||||
}
|
||||
437
bastion/src/bastion/src/services/iso-builder.ts
Normal file
437
bastion/src/bastion/src/services/iso-builder.ts
Normal file
@@ -0,0 +1,437 @@
|
||||
// Pure TypeScript UEFI-bootable ISO builder.
|
||||
// Creates an ISO 9660 image with an embedded FAT EFI system partition
|
||||
// containing iPXE EFI binaries and an autoexec script.
|
||||
// No external tools required (no xorriso, mtools).
|
||||
|
||||
import { readFileSync } from "node:fs";
|
||||
|
||||
const SECTOR_SIZE = 2048; // ISO 9660 logical sector
|
||||
const FAT_SECTOR_SIZE = 512;
|
||||
|
||||
// --- Utility helpers ---
|
||||
|
||||
function asciiPad(s: string, len: number, pad = " "): Buffer {
|
||||
const buf = Buffer.alloc(len, pad.charCodeAt(0));
|
||||
buf.write(s, 0, Math.min(s.length, len), "ascii");
|
||||
return buf;
|
||||
}
|
||||
|
||||
function u16le(n: number): Buffer {
|
||||
const buf = Buffer.alloc(2);
|
||||
buf.writeUInt16LE(n);
|
||||
return buf;
|
||||
}
|
||||
|
||||
function u32le(n: number): Buffer {
|
||||
const buf = Buffer.alloc(4);
|
||||
buf.writeUInt32LE(n);
|
||||
return buf;
|
||||
}
|
||||
|
||||
function u16be(n: number): Buffer {
|
||||
const buf = Buffer.alloc(2);
|
||||
buf.writeUInt16BE(n);
|
||||
return buf;
|
||||
}
|
||||
|
||||
function u32be(n: number): Buffer {
|
||||
const buf = Buffer.alloc(4);
|
||||
buf.writeUInt32BE(n);
|
||||
return buf;
|
||||
}
|
||||
|
||||
/** Both-endian 16-bit (ISO 9660 "both-byte" format) */
|
||||
function u16both(n: number): Buffer {
|
||||
return Buffer.concat([u16le(n), u16be(n)]);
|
||||
}
|
||||
|
||||
/** Both-endian 32-bit */
|
||||
function u32both(n: number): Buffer {
|
||||
return Buffer.concat([u32le(n), u32be(n)]);
|
||||
}
|
||||
|
||||
function isoDate(d: Date): Buffer {
|
||||
// ISO 9660 date: 17 bytes ASCII "YYYYMMDDHHMMSSCC" + timezone offset
|
||||
const s =
|
||||
d.getUTCFullYear().toString().padStart(4, "0") +
|
||||
(d.getUTCMonth() + 1).toString().padStart(2, "0") +
|
||||
d.getUTCDate().toString().padStart(2, "0") +
|
||||
d.getUTCHours().toString().padStart(2, "0") +
|
||||
d.getUTCMinutes().toString().padStart(2, "0") +
|
||||
d.getUTCSeconds().toString().padStart(2, "0") +
|
||||
"00"; // hundredths
|
||||
const buf = Buffer.alloc(17, 0);
|
||||
buf.write(s, 0, 16, "ascii");
|
||||
buf[16] = 0; // UTC offset (0 = UTC)
|
||||
return buf;
|
||||
}
|
||||
|
||||
function dirRecordDate(d: Date): Buffer {
|
||||
// 7-byte recording date
|
||||
const buf = Buffer.alloc(7, 0);
|
||||
buf[0] = d.getUTCFullYear() - 1900;
|
||||
buf[1] = d.getUTCMonth() + 1;
|
||||
buf[2] = d.getUTCDate();
|
||||
buf[3] = d.getUTCHours();
|
||||
buf[4] = d.getUTCMinutes();
|
||||
buf[5] = d.getUTCSeconds();
|
||||
buf[6] = 0; // UTC
|
||||
return buf;
|
||||
}
|
||||
|
||||
// --- FAT12 filesystem builder ---
|
||||
|
||||
function buildFatImage(files: Array<{ path: string; data: Buffer }>): Buffer {
|
||||
// Build a minimal FAT12 filesystem in memory
|
||||
// Layout: BPB | FAT | FAT copy | Root dir | Data clusters
|
||||
|
||||
const bytesPerSector = FAT_SECTOR_SIZE;
|
||||
const sectorsPerCluster = 4; // 2KB clusters
|
||||
const clusterSize = bytesPerSector * sectorsPerCluster;
|
||||
const reservedSectors = 1;
|
||||
const numFats = 2;
|
||||
const rootEntryCount = 64; // 64 * 32 = 2048 bytes = 4 sectors
|
||||
const rootDirSectors = Math.ceil((rootEntryCount * 32) / bytesPerSector);
|
||||
|
||||
// Calculate data size needed
|
||||
let totalDataBytes = 0;
|
||||
for (const f of files) totalDataBytes += Math.ceil(f.data.length / clusterSize) * clusterSize;
|
||||
// Add directory clusters for EFI and EFI/BOOT
|
||||
totalDataBytes += clusterSize * 2;
|
||||
|
||||
const dataClusters = Math.ceil(totalDataBytes / clusterSize) + 2; // +2 safety
|
||||
const fatEntries = dataClusters + 2; // clusters start at 2
|
||||
const fatBytes = Math.ceil((fatEntries * 3) / 2); // FAT12: 1.5 bytes per entry
|
||||
const sectorsPerFat = Math.ceil(fatBytes / bytesPerSector);
|
||||
|
||||
const totalSectors = reservedSectors + (numFats * sectorsPerFat) + rootDirSectors + (dataClusters * sectorsPerCluster);
|
||||
const image = Buffer.alloc(totalSectors * bytesPerSector, 0);
|
||||
|
||||
// --- BPB (BIOS Parameter Block) ---
|
||||
image[0] = 0xEB; image[1] = 0x3C; image[2] = 0x90; // Jump + NOP
|
||||
image.write("LABCTL ", 3, 8, "ascii"); // OEM
|
||||
image.writeUInt16LE(bytesPerSector, 11);
|
||||
image[13] = sectorsPerCluster;
|
||||
image.writeUInt16LE(reservedSectors, 14);
|
||||
image[16] = numFats;
|
||||
image.writeUInt16LE(rootEntryCount, 17);
|
||||
image.writeUInt16LE(totalSectors < 0x10000 ? totalSectors : 0, 19);
|
||||
image[21] = 0xF0; // media descriptor (removable)
|
||||
image.writeUInt16LE(sectorsPerFat, 22);
|
||||
image.writeUInt16LE(1, 24); // sectors per track
|
||||
image.writeUInt16LE(1, 26); // heads
|
||||
image[38] = 0x29; // Extended boot sig
|
||||
image.writeUInt32LE(0x12345678, 39); // volume serial
|
||||
image.write("IPXE BOOT ", 43, 11, "ascii"); // volume label
|
||||
image.write("FAT12 ", 54, 8, "ascii"); // filesystem type
|
||||
image[510] = 0x55; image[511] = 0xAA; // Boot signature
|
||||
|
||||
// --- FAT table ---
|
||||
const fatOffset = reservedSectors * bytesPerSector;
|
||||
const rootDirOffset = fatOffset + (numFats * sectorsPerFat * bytesPerSector);
|
||||
const dataOffset = rootDirOffset + (rootDirSectors * bytesPerSector);
|
||||
|
||||
// FAT12 helper: write a 12-bit entry
|
||||
function fatSet(fat: number, cluster: number, value: number): void {
|
||||
const off = fatOffset + (fat * sectorsPerFat * bytesPerSector);
|
||||
const byteIdx = Math.floor(cluster * 3 / 2);
|
||||
if (cluster % 2 === 0) {
|
||||
image[off + byteIdx] = value & 0xFF;
|
||||
image[off + byteIdx + 1] = (image[off + byteIdx + 1]! & 0xF0) | ((value >> 8) & 0x0F);
|
||||
} else {
|
||||
image[off + byteIdx] = (image[off + byteIdx]! & 0x0F) | ((value & 0x0F) << 4);
|
||||
image[off + byteIdx + 1] = (value >> 4) & 0xFF;
|
||||
}
|
||||
}
|
||||
|
||||
// Media descriptor in FAT
|
||||
for (let f = 0; f < numFats; f++) {
|
||||
fatSet(f, 0, 0xFF0);
|
||||
fatSet(f, 1, 0xFFF);
|
||||
}
|
||||
|
||||
let nextCluster = 2;
|
||||
|
||||
function allocClusters(size: number): number {
|
||||
const needed = Math.max(1, Math.ceil(size / clusterSize));
|
||||
const startCluster = nextCluster;
|
||||
for (let i = 0; i < needed; i++) {
|
||||
const c = nextCluster++;
|
||||
const next = (i === needed - 1) ? 0xFFF : c + 1;
|
||||
for (let f = 0; f < numFats; f++) fatSet(f, c, next);
|
||||
}
|
||||
return startCluster;
|
||||
}
|
||||
|
||||
function clusterOffset(cluster: number): number {
|
||||
return dataOffset + (cluster - 2) * clusterSize;
|
||||
}
|
||||
|
||||
function writeDirEntry(dirBuf: Buffer, entryIdx: number, name: string, ext: string, cluster: number, size: number, isDir: boolean): void {
|
||||
const off = entryIdx * 32;
|
||||
dirBuf.write(name.toUpperCase().padEnd(8, " "), off, 8, "ascii");
|
||||
dirBuf.write(ext.toUpperCase().padEnd(3, " "), off + 8, 3, "ascii");
|
||||
dirBuf[off + 11] = isDir ? 0x10 : 0x20; // attributes
|
||||
dirBuf.writeUInt16LE(cluster & 0xFFFF, off + 26); // first cluster low
|
||||
dirBuf.writeUInt32LE(isDir ? 0 : size, off + 28); // file size
|
||||
}
|
||||
|
||||
// --- Create directory structure ---
|
||||
// Root: EFI dir + autoexec.ipxe
|
||||
// EFI: BOOT dir
|
||||
// BOOT: BOOTX64.EFI, BOOTAA64.EFI
|
||||
|
||||
// EFI directory cluster
|
||||
const efiDirCluster = allocClusters(clusterSize);
|
||||
const efiDirBuf = Buffer.alloc(clusterSize, 0);
|
||||
|
||||
// BOOT directory cluster
|
||||
const bootDirCluster = allocClusters(clusterSize);
|
||||
const bootDirBuf = Buffer.alloc(clusterSize, 0);
|
||||
|
||||
// Write . and .. entries for EFI
|
||||
writeDirEntry(efiDirBuf, 0, ".", "", efiDirCluster, 0, true);
|
||||
writeDirEntry(efiDirBuf, 1, "..", "", 0, 0, true);
|
||||
// BOOT subdir in EFI
|
||||
writeDirEntry(efiDirBuf, 2, "BOOT", "", bootDirCluster, 0, true);
|
||||
|
||||
// Write . and .. entries for BOOT
|
||||
writeDirEntry(bootDirBuf, 0, ".", "", bootDirCluster, 0, true);
|
||||
writeDirEntry(bootDirBuf, 1, "..", "", efiDirCluster, 0, true);
|
||||
|
||||
let bootEntryIdx = 2;
|
||||
|
||||
// Root directory entries
|
||||
let rootEntryIdx = 0;
|
||||
// Volume label
|
||||
const rootBuf = image.subarray(rootDirOffset, rootDirOffset + rootDirSectors * bytesPerSector);
|
||||
rootBuf.write("IPXE BOOT ", rootEntryIdx * 32, 11, "ascii");
|
||||
rootBuf[rootEntryIdx * 32 + 11] = 0x08; // volume label attribute
|
||||
rootEntryIdx++;
|
||||
|
||||
// EFI directory in root
|
||||
writeDirEntry(rootBuf, rootEntryIdx++, "EFI", "", efiDirCluster, 0, true);
|
||||
|
||||
// Write files
|
||||
for (const file of files) {
|
||||
const parts = file.path.toUpperCase().split("/").filter(Boolean);
|
||||
const fileName = parts[parts.length - 1]!;
|
||||
const nameParts = fileName.split(".");
|
||||
const name = nameParts[0]!.substring(0, 8);
|
||||
const ext = (nameParts[1] ?? "").substring(0, 3);
|
||||
|
||||
const fileCluster = allocClusters(file.data.length);
|
||||
file.data.copy(image, clusterOffset(fileCluster));
|
||||
|
||||
if (parts.length === 1) {
|
||||
// Root level file
|
||||
writeDirEntry(rootBuf, rootEntryIdx++, name, ext, fileCluster, file.data.length, false);
|
||||
} else if (parts.length === 3 && parts[0] === "EFI" && parts[1] === "BOOT") {
|
||||
// EFI/BOOT/ file
|
||||
writeDirEntry(bootDirBuf, bootEntryIdx++, name, ext, fileCluster, file.data.length, false);
|
||||
}
|
||||
}
|
||||
|
||||
// Write directory clusters to image
|
||||
efiDirBuf.copy(image, clusterOffset(efiDirCluster));
|
||||
bootDirBuf.copy(image, clusterOffset(bootDirCluster));
|
||||
|
||||
return image;
|
||||
}
|
||||
|
||||
// --- ISO 9660 builder ---
|
||||
|
||||
export function buildBootIso(efiFiles: Array<{ path: string; data: Buffer }>, scriptContent?: string): Buffer {
|
||||
const now = new Date();
|
||||
|
||||
// Build FAT image with all files
|
||||
const allFiles = [...efiFiles];
|
||||
if (scriptContent) {
|
||||
allFiles.push({ path: "autoexec.ipxe", data: Buffer.from(scriptContent, "utf-8") });
|
||||
}
|
||||
const fatImage = buildFatImage(allFiles);
|
||||
|
||||
// ISO layout:
|
||||
// Sector 0-15: System area (unused)
|
||||
// Sector 16: Primary Volume Descriptor
|
||||
// Sector 17: Boot Record Volume Descriptor (El Torito)
|
||||
// Sector 18: Volume Descriptor Set Terminator
|
||||
// Sector 19: Root directory record
|
||||
// Sector 20: El Torito boot catalog
|
||||
// Sector 21: El Torito boot image (the FAT image, this gets large)
|
||||
// After FAT: EFI boot image reference for files visible in ISO
|
||||
|
||||
const fatSectors = Math.ceil(fatImage.length / SECTOR_SIZE);
|
||||
const rootDirSector = 19;
|
||||
const bootCatalogSector = 20;
|
||||
const efiImageSector = 21;
|
||||
const totalSectors = efiImageSector + fatSectors + 1;
|
||||
|
||||
const iso = Buffer.alloc(totalSectors * SECTOR_SIZE, 0);
|
||||
|
||||
// --- Primary Volume Descriptor (sector 16) ---
|
||||
const pvd = iso.subarray(16 * SECTOR_SIZE, 17 * SECTOR_SIZE);
|
||||
pvd[0] = 1; // type: Primary
|
||||
pvd.write("CD001", 1, 5, "ascii"); // standard identifier
|
||||
pvd[6] = 1; // version
|
||||
asciiPad("LABCTL", 32).copy(pvd, 8); // system identifier
|
||||
asciiPad("IPXE_BOOT", 32).copy(pvd, 40); // volume identifier
|
||||
u32both(totalSectors).copy(pvd, 80); // volume space size
|
||||
u16both(1).copy(pvd, 120); // volume set size
|
||||
u16both(1).copy(pvd, 124); // volume sequence number
|
||||
u16both(SECTOR_SIZE).copy(pvd, 128); // logical block size
|
||||
|
||||
// Root directory record (34 bytes)
|
||||
const rootRec = Buffer.alloc(34, 0);
|
||||
rootRec[0] = 34; // length
|
||||
rootRec[1] = 0; // extended attribute length
|
||||
u32both(rootDirSector).copy(rootRec, 2); // extent location
|
||||
u32both(SECTOR_SIZE).copy(rootRec, 10); // data length
|
||||
dirRecordDate(now).copy(rootRec, 18);
|
||||
rootRec[25] = 0x02; // flags: directory
|
||||
rootRec[28] = 1; // file unit size
|
||||
u16both(1).copy(rootRec, 30); // volume sequence
|
||||
rootRec[32] = 1; // name length
|
||||
rootRec[33] = 0; // name: root
|
||||
rootRec.copy(pvd, 156); // copy to PVD
|
||||
|
||||
// Volume dates
|
||||
isoDate(now).copy(pvd, 813); // creation
|
||||
isoDate(now).copy(pvd, 830); // modification
|
||||
Buffer.alloc(17, 0x30).copy(pvd, 847); // expiration (none)
|
||||
isoDate(now).copy(pvd, 864); // effective
|
||||
pvd[881] = 1; // file structure version
|
||||
|
||||
// --- Boot Record Volume Descriptor (El Torito, sector 17) ---
|
||||
const brvd = iso.subarray(17 * SECTOR_SIZE, 18 * SECTOR_SIZE);
|
||||
brvd[0] = 0; // type: Boot Record
|
||||
brvd.write("CD001", 1, 5, "ascii");
|
||||
brvd[6] = 1; // version
|
||||
brvd.write("EL TORITO SPECIFICATION", 7, 32, "ascii");
|
||||
u32le(bootCatalogSector).copy(brvd, 0x47); // boot catalog pointer
|
||||
|
||||
// --- Volume Descriptor Set Terminator (sector 18) ---
|
||||
const vdst = iso.subarray(18 * SECTOR_SIZE, 19 * SECTOR_SIZE);
|
||||
vdst[0] = 255; // type: terminator
|
||||
vdst.write("CD001", 1, 5, "ascii");
|
||||
vdst[6] = 1;
|
||||
|
||||
// --- Root Directory (sector 19) ---
|
||||
const rootDir = iso.subarray(rootDirSector * SECTOR_SIZE, (rootDirSector + 1) * SECTOR_SIZE);
|
||||
let offset = 0;
|
||||
|
||||
// "." entry
|
||||
const dotRec = Buffer.alloc(34, 0);
|
||||
dotRec[0] = 34;
|
||||
u32both(rootDirSector).copy(dotRec, 2);
|
||||
u32both(SECTOR_SIZE).copy(dotRec, 10);
|
||||
dirRecordDate(now).copy(dotRec, 18);
|
||||
dotRec[25] = 0x02;
|
||||
u16both(1).copy(dotRec, 28);
|
||||
dotRec[32] = 1;
|
||||
dotRec[33] = 0;
|
||||
dotRec.copy(rootDir, offset);
|
||||
offset += 34;
|
||||
|
||||
// ".." entry
|
||||
const dotdotRec = Buffer.alloc(34, 0);
|
||||
dotdotRec[0] = 34;
|
||||
u32both(rootDirSector).copy(dotdotRec, 2);
|
||||
u32both(SECTOR_SIZE).copy(dotdotRec, 10);
|
||||
dirRecordDate(now).copy(dotdotRec, 18);
|
||||
dotdotRec[25] = 0x02;
|
||||
u16both(1).copy(dotdotRec, 28);
|
||||
dotdotRec[32] = 1;
|
||||
dotdotRec[33] = 1;
|
||||
dotdotRec.copy(rootDir, offset);
|
||||
offset += 34;
|
||||
|
||||
// EFI boot image file entry (the FAT image visible as a file)
|
||||
const efiFileName = "EFI.IMG;1";
|
||||
const efiRec = Buffer.alloc(33 + efiFileName.length + ((efiFileName.length % 2 === 0) ? 1 : 0), 0);
|
||||
efiRec[0] = efiRec.length;
|
||||
u32both(efiImageSector).copy(efiRec, 2);
|
||||
u32both(fatImage.length).copy(efiRec, 10);
|
||||
dirRecordDate(now).copy(efiRec, 18);
|
||||
efiRec[25] = 0x00; // flags: file
|
||||
u16both(1).copy(efiRec, 28);
|
||||
efiRec[32] = efiFileName.length;
|
||||
efiRec.write(efiFileName, 33, efiFileName.length, "ascii");
|
||||
efiRec.copy(rootDir, offset);
|
||||
offset += efiRec.length;
|
||||
|
||||
// Boot catalog file entry
|
||||
const catFileName = "BOOT.CAT;1";
|
||||
const catRec = Buffer.alloc(33 + catFileName.length + ((catFileName.length % 2 === 0) ? 1 : 0), 0);
|
||||
catRec[0] = catRec.length;
|
||||
u32both(bootCatalogSector).copy(catRec, 2);
|
||||
u32both(SECTOR_SIZE).copy(catRec, 10);
|
||||
dirRecordDate(now).copy(catRec, 18);
|
||||
catRec[25] = 0x01; // flags: hidden
|
||||
u16both(1).copy(catRec, 28);
|
||||
catRec[32] = catFileName.length;
|
||||
catRec.write(catFileName, 33, catFileName.length, "ascii");
|
||||
catRec.copy(rootDir, offset);
|
||||
|
||||
// --- El Torito Boot Catalog (sector 20) ---
|
||||
const catalog = iso.subarray(bootCatalogSector * SECTOR_SIZE, (bootCatalogSector + 1) * SECTOR_SIZE);
|
||||
|
||||
// Validation entry (32 bytes)
|
||||
catalog[0] = 1; // header ID
|
||||
catalog[1] = 0xEF; // platform: EFI
|
||||
catalog.write("LABCTL", 4, 24, "ascii"); // ID string
|
||||
// Calculate checksum for validation entry
|
||||
let cksum = 0;
|
||||
for (let i = 0; i < 32; i += 2) {
|
||||
cksum += catalog[i]! + (catalog[i + 1]! << 8);
|
||||
}
|
||||
catalog.writeUInt16LE((0x10000 - (cksum & 0xFFFF)) & 0xFFFF, 28); // checksum
|
||||
catalog[30] = 0x55;
|
||||
catalog[31] = 0xAA;
|
||||
|
||||
// Default/Initial entry (32 bytes, offset 32)
|
||||
catalog[32] = 0x88; // bootable
|
||||
catalog[33] = 0xEF; // type: EFI
|
||||
catalog.writeUInt16LE(0, 34); // load segment
|
||||
catalog[36] = 0; // system type
|
||||
const efiImageSectors512 = Math.ceil(fatImage.length / FAT_SECTOR_SIZE);
|
||||
catalog.writeUInt16LE(efiImageSectors512 & 0xFFFF, 38); // sector count
|
||||
catalog.writeUInt32LE(efiImageSector, 40); // load LBA
|
||||
|
||||
// --- EFI boot image (FAT filesystem, starting at sector 21) ---
|
||||
fatImage.copy(iso, efiImageSector * SECTOR_SIZE);
|
||||
|
||||
return iso;
|
||||
}
|
||||
|
||||
/** Build a ready-to-serve iPXE boot ISO from system iPXE binaries. */
|
||||
export function buildBastionBootIso(bastionUrl: string): Buffer {
|
||||
const efiFiles: Array<{ path: string; data: Buffer }> = [];
|
||||
|
||||
const PATHS: Record<string, { src: string; dest: string }> = {
|
||||
x86_64: { src: "/usr/share/ipxe/ipxe-snponly-x86_64.efi", dest: "EFI/BOOT/BOOTX64.EFI" },
|
||||
aarch64: { src: "/usr/share/ipxe/arm64-efi/snponly.efi", dest: "EFI/BOOT/BOOTAA64.EFI" },
|
||||
};
|
||||
|
||||
for (const [, paths] of Object.entries(PATHS)) {
|
||||
try {
|
||||
efiFiles.push({ path: paths.dest, data: readFileSync(paths.src) });
|
||||
} catch {
|
||||
// Architecture not available, skip
|
||||
}
|
||||
}
|
||||
|
||||
if (efiFiles.length === 0) {
|
||||
throw new Error("No iPXE EFI binaries found on system");
|
||||
}
|
||||
|
||||
const script = [
|
||||
"#!ipxe",
|
||||
"",
|
||||
"echo Booting from iPXE ISO -- connecting to bastion...",
|
||||
"dhcp || ( echo DHCP failed, retrying... && sleep 3 && dhcp )",
|
||||
`chain ${bastionUrl}/boot.ipxe || shell`,
|
||||
].join("\n");
|
||||
|
||||
return buildBootIso(efiFiles, script);
|
||||
}
|
||||
45
bastion/src/bastion/src/services/kickstart-generator.ts
Normal file
45
bastion/src/bastion/src/services/kickstart-generator.ts
Normal file
@@ -0,0 +1,45 @@
|
||||
// Generate kickstart content for discovery and install modes.
|
||||
// Uses template literal functions -- no external template engine.
|
||||
|
||||
import type { BastionConfig, Role } from "@lab/shared";
|
||||
import { renderDiscoverKickstart } from "../templates/discover.ks.js";
|
||||
import { renderInstallKickstart, type InstallKickstartParams } from "../templates/install.ks.js";
|
||||
|
||||
/**
|
||||
* Generate a discovery kickstart that collects hardware info and POSTs to bastion.
|
||||
*/
|
||||
export function generateDiscoverKickstart(config: BastionConfig): string {
|
||||
return renderDiscoverKickstart({
|
||||
serverIp: config.serverIp,
|
||||
httpPort: config.httpPort,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate an install kickstart with LVM partitioning, packages, and post-install configuration.
|
||||
*/
|
||||
export function generateInstallKickstart(
|
||||
config: BastionConfig,
|
||||
params: {
|
||||
hostname: string;
|
||||
disk: string;
|
||||
role: Role;
|
||||
},
|
||||
): string {
|
||||
const ksParams: InstallKickstartParams = {
|
||||
hostname: params.hostname,
|
||||
disk: params.disk,
|
||||
role: params.role,
|
||||
domain: config.domain,
|
||||
fedoraVersion: config.fedoraVersion,
|
||||
timezone: config.timezone,
|
||||
locale: config.locale,
|
||||
serverIp: config.serverIp,
|
||||
httpPort: config.httpPort,
|
||||
syslogPort: config.syslogPort,
|
||||
sshKeys: config.sshKeys,
|
||||
adminUser: config.adminUser,
|
||||
};
|
||||
|
||||
return renderInstallKickstart(ksParams);
|
||||
}
|
||||
252
bastion/src/bastion/src/services/labd-connection.ts
Normal file
252
bastion/src/bastion/src/services/labd-connection.ts
Normal file
@@ -0,0 +1,252 @@
|
||||
// WebSocket connection from bastion to labd for registration and state sync.
|
||||
// If LABD_URL is configured, bastion registers with labd on startup and pushes
|
||||
// state changes. If not configured, bastion runs standalone (backward compatible).
|
||||
|
||||
import WebSocket from "ws";
|
||||
import { readFileSync, writeFileSync, existsSync } from "node:fs";
|
||||
import { hostname as osHostname } from "node:os";
|
||||
import type { BastionState, BastionConfig } from "@lab/shared";
|
||||
import {
|
||||
type BastionMessage,
|
||||
type LabdBastionMessage,
|
||||
isLabdBastionMessage,
|
||||
} from "@lab/shared";
|
||||
import { logger } from "./logger.js";
|
||||
|
||||
const HEARTBEAT_INTERVAL_MS = 10_000;
|
||||
const RECONNECT_BASE_DELAY_MS = 1_000;
|
||||
const RECONNECT_MAX_DELAY_MS = 30_000;
|
||||
|
||||
type CommandHandler = (msg: LabdBastionMessage) => Promise<{ status: "ok" | "error"; data?: unknown; error?: string }>;
|
||||
|
||||
export class BastionConnection {
|
||||
private ws: WebSocket | null = null;
|
||||
private bastionId: string | null = null;
|
||||
private heartbeatTimer: NodeJS.Timeout | null = null;
|
||||
private reconnectTimer: NodeJS.Timeout | null = null;
|
||||
private retryCount = 0;
|
||||
private closed = false;
|
||||
private startTime = Date.now();
|
||||
private commandHandlers = new Map<string, CommandHandler>();
|
||||
|
||||
constructor(
|
||||
private readonly config: BastionConfig,
|
||||
private readonly getState: () => BastionState,
|
||||
) {
|
||||
// Load persisted bastionId if we've enrolled before
|
||||
const idFile = `${config.bastionDir}/bastion-id`;
|
||||
if (existsSync(idFile)) {
|
||||
this.bastionId = readFileSync(idFile, "utf-8").trim();
|
||||
}
|
||||
}
|
||||
|
||||
/** Register a handler for incoming commands from labd. */
|
||||
onCommand(type: string, handler: CommandHandler): void {
|
||||
this.commandHandlers.set(type, handler);
|
||||
}
|
||||
|
||||
connect(): void {
|
||||
if (this.closed) return;
|
||||
if (!this.config.labdUrl) return;
|
||||
|
||||
const wsUrl = this.config.labdUrl
|
||||
.replace(/^https:/, "wss:")
|
||||
.replace(/^http:/, "ws:");
|
||||
|
||||
const token = this.config.bastionJoinToken ?? "";
|
||||
const url = `${wsUrl}/ws/bastion?token=${encodeURIComponent(token)}`;
|
||||
|
||||
logger.info(`Connecting to labd at ${this.config.labdUrl}...`);
|
||||
|
||||
this.ws = new WebSocket(url);
|
||||
|
||||
this.ws.on("open", () => {
|
||||
logger.info("Connected to labd");
|
||||
this.retryCount = 0;
|
||||
|
||||
// Send enrollment or re-registration
|
||||
if (this.bastionId) {
|
||||
// Already enrolled — send state sync immediately
|
||||
this.sendStateSync();
|
||||
} else {
|
||||
// First time — enroll
|
||||
this.send({
|
||||
type: "bastion-enroll",
|
||||
token,
|
||||
hostname: osHostname(),
|
||||
network: this.config.network,
|
||||
serverIp: this.config.serverIp,
|
||||
});
|
||||
}
|
||||
|
||||
this.startHeartbeat();
|
||||
});
|
||||
|
||||
this.ws.on("message", (data: WebSocket.Data) => {
|
||||
try {
|
||||
const raw = data.toString();
|
||||
const msg: unknown = JSON.parse(raw);
|
||||
|
||||
if (!isLabdBastionMessage(msg)) {
|
||||
logger.warn(`Unknown message from labd: ${(msg as { type?: string }).type}`);
|
||||
return;
|
||||
}
|
||||
|
||||
this.handleMessage(msg);
|
||||
} catch (err) {
|
||||
logger.error(`Failed to parse labd message: ${err instanceof Error ? err.message : String(err)}`);
|
||||
}
|
||||
});
|
||||
|
||||
this.ws.on("close", () => {
|
||||
logger.warn("Disconnected from labd");
|
||||
this.stopHeartbeat();
|
||||
this.scheduleReconnect();
|
||||
});
|
||||
|
||||
this.ws.on("error", (err) => {
|
||||
logger.error(`WebSocket error: ${err.message}`);
|
||||
// close event will fire after this, triggering reconnect
|
||||
});
|
||||
}
|
||||
|
||||
/** Push current state to labd. Call this after any state change. */
|
||||
syncState(): void {
|
||||
if (!this.bastionId || !this.ws || this.ws.readyState !== WebSocket.OPEN) return;
|
||||
this.sendStateSync();
|
||||
}
|
||||
|
||||
/** Forward a progress event to labd. */
|
||||
sendProgress(mac: string, stage: string, detail: string): void {
|
||||
if (!this.bastionId || !this.ws || this.ws.readyState !== WebSocket.OPEN) return;
|
||||
this.send({
|
||||
type: "bastion-progress",
|
||||
bastionId: this.bastionId,
|
||||
mac,
|
||||
stage,
|
||||
detail,
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
}
|
||||
|
||||
close(): void {
|
||||
this.closed = true;
|
||||
this.stopHeartbeat();
|
||||
if (this.reconnectTimer) {
|
||||
clearTimeout(this.reconnectTimer);
|
||||
this.reconnectTimer = null;
|
||||
}
|
||||
if (this.ws) {
|
||||
this.ws.close();
|
||||
this.ws = null;
|
||||
}
|
||||
}
|
||||
|
||||
private handleMessage(msg: LabdBastionMessage): void {
|
||||
switch (msg.type) {
|
||||
case "bastion-enrolled":
|
||||
this.bastionId = msg.bastionId;
|
||||
// Persist for reconnects
|
||||
writeFileSync(`${this.config.bastionDir}/bastion-id`, msg.bastionId);
|
||||
logger.info(`Enrolled with labd as bastion ${msg.bastionId}`);
|
||||
// Send initial state
|
||||
this.sendStateSync();
|
||||
break;
|
||||
|
||||
case "bastion-heartbeat-ack":
|
||||
// No-op, confirms labd is alive
|
||||
break;
|
||||
|
||||
case "server-shutdown":
|
||||
logger.info(`labd shutting down, will reconnect in ${msg.reconnectAfter}ms`);
|
||||
break;
|
||||
|
||||
case "command-install":
|
||||
case "command-forget":
|
||||
case "command-role-update":
|
||||
void this.handleCommand(msg);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private async handleCommand(msg: LabdBastionMessage & { requestId: string }): Promise<void> {
|
||||
const handler = this.commandHandlers.get(msg.type);
|
||||
if (!handler) {
|
||||
this.send({
|
||||
type: "command-response",
|
||||
requestId: msg.requestId,
|
||||
status: "error",
|
||||
error: `No handler for command: ${msg.type}`,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await handler(msg);
|
||||
this.send({
|
||||
type: "command-response",
|
||||
requestId: msg.requestId,
|
||||
...result,
|
||||
});
|
||||
} catch (err) {
|
||||
this.send({
|
||||
type: "command-response",
|
||||
requestId: msg.requestId,
|
||||
status: "error",
|
||||
error: err instanceof Error ? err.message : String(err),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
private sendStateSync(): void {
|
||||
if (!this.bastionId) return;
|
||||
this.send({
|
||||
type: "bastion-state-sync",
|
||||
bastionId: this.bastionId,
|
||||
state: this.getState(),
|
||||
});
|
||||
}
|
||||
|
||||
private startHeartbeat(): void {
|
||||
this.stopHeartbeat();
|
||||
this.heartbeatTimer = setInterval(() => {
|
||||
if (!this.bastionId) return;
|
||||
const state = this.getState();
|
||||
const machineCount =
|
||||
Object.keys(state.discovered).length +
|
||||
Object.keys(state.install_queue).length +
|
||||
Object.keys(state.installed).length;
|
||||
|
||||
this.send({
|
||||
type: "bastion-heartbeat",
|
||||
bastionId: this.bastionId,
|
||||
uptime: Math.floor((Date.now() - this.startTime) / 1000),
|
||||
machineCount,
|
||||
});
|
||||
}, HEARTBEAT_INTERVAL_MS);
|
||||
}
|
||||
|
||||
private stopHeartbeat(): void {
|
||||
if (this.heartbeatTimer) {
|
||||
clearInterval(this.heartbeatTimer);
|
||||
this.heartbeatTimer = null;
|
||||
}
|
||||
}
|
||||
|
||||
private scheduleReconnect(): void {
|
||||
if (this.closed) return;
|
||||
const delay = Math.min(
|
||||
RECONNECT_BASE_DELAY_MS * Math.pow(2, this.retryCount),
|
||||
RECONNECT_MAX_DELAY_MS,
|
||||
);
|
||||
this.retryCount++;
|
||||
logger.info(`Reconnecting to labd in ${delay}ms (attempt ${this.retryCount})...`);
|
||||
this.reconnectTimer = setTimeout(() => this.connect(), delay);
|
||||
}
|
||||
|
||||
private send(msg: BastionMessage): void {
|
||||
if (this.ws && this.ws.readyState === WebSocket.OPEN) {
|
||||
this.ws.send(JSON.stringify(msg));
|
||||
}
|
||||
}
|
||||
}
|
||||
17
bastion/src/bastion/src/services/logger.ts
Normal file
17
bastion/src/bastion/src/services/logger.ts
Normal file
@@ -0,0 +1,17 @@
|
||||
// Winston logger instance shared across the bastion application.
|
||||
|
||||
import winston from "winston";
|
||||
|
||||
export const logger = winston.createLogger({
|
||||
level: "info",
|
||||
format: winston.format.combine(
|
||||
winston.format.timestamp({ format: "HH:mm:ss" }),
|
||||
winston.format.printf(({ timestamp, level, message }) => {
|
||||
const prefix = level === "error" ? "\x1b[31m[bastion]\x1b[0m"
|
||||
: level === "warn" ? "\x1b[33m[bastion]\x1b[0m"
|
||||
: "\x1b[32m[bastion]\x1b[0m";
|
||||
return `${prefix} ${timestamp as string} ${message as string}`;
|
||||
}),
|
||||
),
|
||||
transports: [new winston.transports.Console()],
|
||||
});
|
||||
166
bastion/src/bastion/src/services/network.ts
Normal file
166
bastion/src/bastion/src/services/network.ts
Normal file
@@ -0,0 +1,166 @@
|
||||
// Auto-detect network interface, IP, gateway, SSH keys, and admin user.
|
||||
|
||||
import { execSync } from "node:child_process";
|
||||
import { readFileSync, existsSync, mkdirSync } from "node:fs";
|
||||
import { homedir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import type { BastionConfig } from "@lab/shared";
|
||||
import { logger } from "./logger.js";
|
||||
|
||||
/**
|
||||
* Detect the default network interface from the routing table.
|
||||
*/
|
||||
export function detectInterface(): string {
|
||||
const output = execSync("ip route", { encoding: "utf-8" });
|
||||
const match = output.match(/default\s+.*\s+dev\s+(\S+)/);
|
||||
const ifaceMatch = match?.[1];
|
||||
if (ifaceMatch === undefined) {
|
||||
throw new Error("Cannot detect default network interface");
|
||||
}
|
||||
return ifaceMatch;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect the IPv4 address on a given interface.
|
||||
*/
|
||||
export function detectIp(iface: string): string {
|
||||
const output = execSync(`ip -4 addr show ${iface}`, { encoding: "utf-8" });
|
||||
const match = output.match(/inet\s+(\d+\.\d+\.\d+\.\d+)/);
|
||||
const ipMatch = match?.[1];
|
||||
if (ipMatch === undefined) {
|
||||
throw new Error(`Cannot detect IP on interface ${iface}`);
|
||||
}
|
||||
return ipMatch;
|
||||
}
|
||||
|
||||
/**
|
||||
* Derive the /24 network address from an IP.
|
||||
*/
|
||||
export function deriveNetwork(ip: string): string {
|
||||
const parts = ip.split(".");
|
||||
return `${parts[0]}.${parts[1]}.${parts[2]}.0`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect the default gateway.
|
||||
*/
|
||||
export function detectGateway(): string {
|
||||
const output = execSync("ip route", { encoding: "utf-8" });
|
||||
const match = output.match(/default\s+via\s+(\S+)/);
|
||||
const gwMatch = match?.[1];
|
||||
if (gwMatch === undefined) {
|
||||
throw new Error("Cannot detect default gateway");
|
||||
}
|
||||
return gwMatch;
|
||||
}
|
||||
|
||||
/**
|
||||
* Collect SSH public keys from the current user's SSH directory.
|
||||
* Sources: authorized_keys, then id_ed25519.pub, id_rsa.pub, id_ecdsa.pub (deduplicated).
|
||||
*/
|
||||
export function collectSshKeys(bastionDir: string): { keys: string[]; source: string } {
|
||||
const sudoUser = process.env["SUDO_USER"];
|
||||
let realHome: string;
|
||||
if (sudoUser !== undefined) {
|
||||
const passwdEntry = execSync(`getent passwd ${sudoUser}`, { encoding: "utf-8" })
|
||||
.split(":")[5]
|
||||
?.trim();
|
||||
realHome = passwdEntry !== undefined && passwdEntry !== "" ? passwdEntry : homedir();
|
||||
} else {
|
||||
realHome = homedir();
|
||||
}
|
||||
|
||||
const keys: string[] = [];
|
||||
const fingerprints = new Set<string>();
|
||||
let source = "";
|
||||
|
||||
// Read authorized_keys
|
||||
const authKeysPath = join(realHome, ".ssh", "authorized_keys");
|
||||
if (existsSync(authKeysPath)) {
|
||||
const content = readFileSync(authKeysPath, "utf-8");
|
||||
for (const line of content.split("\n")) {
|
||||
const trimmed = line.trim();
|
||||
if (trimmed && !trimmed.startsWith("#")) {
|
||||
const fp = trimmed.split(/\s+/)[1];
|
||||
if (fp !== undefined && fp !== "" && !fingerprints.has(fp)) {
|
||||
keys.push(trimmed);
|
||||
fingerprints.add(fp);
|
||||
}
|
||||
}
|
||||
}
|
||||
source = authKeysPath;
|
||||
}
|
||||
|
||||
// Also include local pubkey files
|
||||
const pubKeyFiles = ["id_ed25519.pub", "id_rsa.pub", "id_ecdsa.pub"];
|
||||
for (const keyFile of pubKeyFiles) {
|
||||
const keyPath = join(realHome, ".ssh", keyFile);
|
||||
if (existsSync(keyPath)) {
|
||||
const keyData = readFileSync(keyPath, "utf-8").trim();
|
||||
const fp = keyData.split(/\s+/)[1];
|
||||
if (fp !== undefined && fp !== "" && !fingerprints.has(fp)) {
|
||||
keys.push(keyData);
|
||||
fingerprints.add(fp);
|
||||
source = source ? `${source} + ${keyPath}` : keyPath;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Generate a keypair if no keys found
|
||||
if (keys.length === 0) {
|
||||
const generatedKey = join(bastionDir, "bastion_ed25519");
|
||||
if (!existsSync(generatedKey)) {
|
||||
mkdirSync(bastionDir, { recursive: true });
|
||||
logger.warn("No SSH keys found -- generating ed25519 keypair...");
|
||||
execSync(`ssh-keygen -t ed25519 -f "${generatedKey}" -N "" -C "bastion-generated@$(hostname)"`, {
|
||||
encoding: "utf-8",
|
||||
stdio: "pipe",
|
||||
});
|
||||
}
|
||||
const pubKey = readFileSync(`${generatedKey}.pub`, "utf-8").trim();
|
||||
keys.push(pubKey);
|
||||
source = `${generatedKey} (generated)`;
|
||||
logger.warn(`Using generated keypair: ${generatedKey}`);
|
||||
logger.warn("Save this private key -- it is the only way to access installed machines.");
|
||||
}
|
||||
|
||||
return { keys, source };
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect the admin username (SUDO_USER or current user, excluding root).
|
||||
*/
|
||||
export function detectAdminUser(): string {
|
||||
const user = process.env["SUDO_USER"] ?? process.env["USER"] ?? "";
|
||||
return user === "root" ? "" : user;
|
||||
}
|
||||
|
||||
/**
|
||||
* Populate runtime network config fields on the config object.
|
||||
*/
|
||||
export function populateNetworkConfig(config: BastionConfig): BastionConfig {
|
||||
const iface = config.iface !== "" ? config.iface : detectInterface();
|
||||
const serverIp = config.serverIp !== "" ? config.serverIp : detectIp(iface);
|
||||
const network = config.network !== "" ? config.network : deriveNetwork(serverIp);
|
||||
const gateway = config.gateway !== "" ? config.gateway : detectGateway();
|
||||
const { keys: sshKeys, source: sshSource } = config.sshKeys.length > 0
|
||||
? { keys: config.sshKeys, source: "config" }
|
||||
: collectSshKeys(config.bastionDir);
|
||||
const adminUser = config.adminUser !== "" ? config.adminUser : detectAdminUser();
|
||||
|
||||
logger.info(`Interface: ${iface} IP: ${serverIp} Network: ${network}`);
|
||||
logger.info(`SSH keys: ${sshKeys.length} key(s) from ${sshSource}`);
|
||||
if (adminUser !== "") {
|
||||
logger.info(`Admin user: ${adminUser} (will be created on installed machines)`);
|
||||
}
|
||||
|
||||
return {
|
||||
...config,
|
||||
iface,
|
||||
serverIp,
|
||||
network,
|
||||
gateway,
|
||||
sshKeys,
|
||||
adminUser,
|
||||
};
|
||||
}
|
||||
233
bastion/src/bastion/src/services/post-provision.ts
Normal file
233
bastion/src/bastion/src/services/post-provision.ts
Normal file
@@ -0,0 +1,233 @@
|
||||
// Post-provision automation: installs k3s after OS provisioning completes.
|
||||
// Runs asynchronously — does not block the progress callback.
|
||||
|
||||
import { spawn } from "node:child_process";
|
||||
import { existsSync } from "node:fs";
|
||||
import { homedir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { logger } from "./logger.js";
|
||||
import { progressBus } from "./progress-events.js";
|
||||
|
||||
function findSshKey(): string | undefined {
|
||||
const sudoUser = process.env["SUDO_USER"];
|
||||
const realHome = sudoUser ? join("/home", sudoUser) : homedir();
|
||||
for (const name of ["id_ed25519", "id_ecdsa", "id_rsa"]) {
|
||||
const p = join(realHome, ".ssh", name);
|
||||
if (existsSync(p)) return p;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/** Wait for SSH to become available, with retries. */
|
||||
async function waitForSsh(ip: string, user: string, keyPath: string | undefined, timeoutMs: number): Promise<boolean> {
|
||||
const start = Date.now();
|
||||
while (Date.now() - start < timeoutMs) {
|
||||
try {
|
||||
const result = await sshExec(ip, user, "echo ok", keyPath);
|
||||
if (result.includes("ok")) return true;
|
||||
} catch { /* retry */ }
|
||||
await new Promise((r) => setTimeout(r, 5000));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function sshExec(ip: string, user: string, command: string, keyPath: string | undefined): Promise<string> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const args = [
|
||||
"-o", "StrictHostKeyChecking=no",
|
||||
"-o", "ConnectTimeout=10",
|
||||
"-o", "BatchMode=yes",
|
||||
...(keyPath ? ["-i", keyPath] : []),
|
||||
`${user}@${ip}`,
|
||||
command,
|
||||
];
|
||||
const proc = spawn("ssh", args, { stdio: ["ignore", "pipe", "pipe"] });
|
||||
let stdout = "";
|
||||
proc.stdout.on("data", (d: Buffer) => { stdout += d.toString(); });
|
||||
proc.on("close", (code) => {
|
||||
if (code === 0) resolve(stdout);
|
||||
else reject(new Error(`SSH exit ${code}`));
|
||||
});
|
||||
proc.on("error", reject);
|
||||
});
|
||||
}
|
||||
|
||||
function sshRunStreaming(ip: string, user: string, command: string, keyPath: string | undefined, label: string, mac?: string): Promise<number> {
|
||||
return new Promise((resolve) => {
|
||||
const args = [
|
||||
"-o", "StrictHostKeyChecking=no",
|
||||
"-o", "ConnectTimeout=10",
|
||||
"-o", "BatchMode=yes",
|
||||
...(keyPath ? ["-i", keyPath] : []),
|
||||
`${user}@${ip}`,
|
||||
command,
|
||||
];
|
||||
const proc = spawn("ssh", args, { stdio: ["ignore", "pipe", "pipe"] });
|
||||
proc.stdout.on("data", (d: Buffer) => {
|
||||
for (const line of d.toString().split("\n").filter(Boolean)) {
|
||||
logger.info(`[k3s:${label}] ${line}`);
|
||||
if (mac) {
|
||||
progressBus.emit({ mac, hostname: label, stage: "log", detail: `[k3s] ${line}`, timestamp: new Date().toISOString() });
|
||||
}
|
||||
}
|
||||
});
|
||||
proc.stderr.on("data", (d: Buffer) => {
|
||||
for (const line of d.toString().split("\n").filter(Boolean)) {
|
||||
logger.info(`[k3s:${label}] ${line}`);
|
||||
if (mac) {
|
||||
progressBus.emit({ mac, hostname: label, stage: "log", detail: `[k3s] ${line}`, timestamp: new Date().toISOString() });
|
||||
}
|
||||
}
|
||||
});
|
||||
proc.on("close", (code) => resolve(code ?? 1));
|
||||
proc.on("error", () => resolve(1));
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Trigger k3s installation on a freshly provisioned machine.
|
||||
* Runs in the background — logs progress to bastion console and progressBus.
|
||||
*/
|
||||
export async function triggerPostProvisionK3s(
|
||||
hostname: string,
|
||||
ip: string,
|
||||
role: string,
|
||||
sshUser: string,
|
||||
mac?: string,
|
||||
): Promise<void> {
|
||||
const keyPath = findSshKey();
|
||||
|
||||
const emitStage = (stage: string, detail: string): void => {
|
||||
logger.info(`[k3s] ${detail}`);
|
||||
if (mac) {
|
||||
progressBus.emit({ mac, hostname, stage, detail, timestamp: new Date().toISOString() });
|
||||
}
|
||||
};
|
||||
|
||||
emitStage("post-provision", `auto-installing k3s on ${hostname} (${ip}) role=${role}`);
|
||||
emitStage("post-provision", "waiting for SSH (machine may still be rebooting)");
|
||||
|
||||
// Wait up to 5 minutes for SSH (machine just finished kickstart and is rebooting)
|
||||
const sshReady = await waitForSsh(ip, sshUser, keyPath, 300_000);
|
||||
if (!sshReady) {
|
||||
emitStage("error", `SSH not available on ${hostname} (${ip}) after 5 minutes`);
|
||||
logger.error(`[k3s] Run manually: labctl app k3s install ${hostname}`);
|
||||
return;
|
||||
}
|
||||
|
||||
emitStage("post-provision", "SSH ready, installing k3s prerequisites");
|
||||
|
||||
// Step 1: Prerequisites
|
||||
await sshRunStreaming(ip, sshUser, "sudo modprobe br_netfilter overlay 2>/dev/null; sudo swapoff -a", keyPath, hostname, mac);
|
||||
|
||||
// Step 2: Sysctl
|
||||
emitStage("post-provision", "configuring sysctl for k3s");
|
||||
await sshRunStreaming(ip, sshUser, `sudo bash -c 'cat > /etc/sysctl.d/90-k3s.conf << EOF
|
||||
net.bridge.bridge-nf-call-iptables=1
|
||||
net.bridge.bridge-nf-call-ip6tables=1
|
||||
net.ipv4.ip_forward=1
|
||||
vm.panic_on_oom=0
|
||||
vm.overcommit_memory=1
|
||||
kernel.panic=10
|
||||
kernel.panic_on_oops=1
|
||||
EOF
|
||||
sysctl --system > /dev/null'`, keyPath, hostname, mac);
|
||||
|
||||
// Step 3: SELinux + firewalld + stale CNI cleanup
|
||||
emitStage("post-provision", "disabling firewalld and cleaning stale CNI");
|
||||
await sshRunStreaming(ip, sshUser, [
|
||||
"sudo setenforce 0 2>/dev/null || true",
|
||||
"sudo systemctl disable --now firewalld 2>/dev/null || true",
|
||||
"sudo systemctl mask firewalld 2>/dev/null || true",
|
||||
// Clean stale CNI interfaces that conflict with Cilium (flannel.1 uses same vxlan port 8472)
|
||||
"sudo systemctl stop k3s 2>/dev/null || true",
|
||||
"sudo ip link delete flannel.1 2>/dev/null || true",
|
||||
"sudo ip link delete cilium_vxlan 2>/dev/null || true",
|
||||
"sudo ip link delete cilium_host 2>/dev/null || true",
|
||||
"sudo ip link delete cilium_net 2>/dev/null || true",
|
||||
"sudo rm -rf /etc/cni/net.d/* /var/lib/cni/ 2>/dev/null || true",
|
||||
].join("; "), keyPath, hostname, mac);
|
||||
|
||||
// Step 4: Install k3s
|
||||
// labcontroller extends infra — both are k3s servers
|
||||
const k3sRole = (role === "infra" || role === "labcontroller") ? "server" : "agent";
|
||||
emitStage("post-provision", `installing k3s ${k3sRole}`);
|
||||
const code = await sshRunStreaming(ip, sshUser,
|
||||
`curl -sfL https://get.k3s.io | sudo INSTALL_K3S_EXEC="${k3sRole}" INSTALL_K3S_SKIP_SELINUX_RPM=true sh -`,
|
||||
keyPath, hostname, mac,
|
||||
);
|
||||
|
||||
if (code !== 0) {
|
||||
emitStage("error", `k3s install failed on ${hostname} (exit ${code})`);
|
||||
logger.error(`[k3s] Run manually: labctl app k3s install ${hostname}`);
|
||||
return;
|
||||
}
|
||||
|
||||
// Step 5: Wait for ready
|
||||
emitStage("post-provision", "waiting for k3s node to become Ready");
|
||||
await sshRunStreaming(ip, sshUser,
|
||||
"for i in $(seq 1 60); do sudo k3s kubectl get nodes 2>/dev/null | grep -q Ready && break; sleep 2; done",
|
||||
keyPath, hostname, mac,
|
||||
);
|
||||
|
||||
emitStage("post-provision", `k3s ${k3sRole} installed on ${hostname} (${ip})`);
|
||||
|
||||
// Step 6: Deploy role-specific apps from ROLE_REGISTRY chain
|
||||
const { ROLE_REGISTRY } = await import("@lab/shared");
|
||||
const roleInfo = ROLE_REGISTRY.find((r: { name: string }) => r.name === role);
|
||||
|
||||
if (roleInfo && roleInfo.apps.length > 0) {
|
||||
emitStage("post-provision", `deploying apps: ${roleInfo.apps.join(", ")}`);
|
||||
|
||||
if (roleInfo.apps.includes("cockroachdb") || roleInfo.apps.includes("labd") || roleInfo.apps.includes("bastion")) {
|
||||
// This is a labcontroller — deploy the full stack
|
||||
emitStage("post-provision", `deploying labcontroller stack on ${hostname}`);
|
||||
|
||||
try {
|
||||
const { cockroachDbManifests } = await import("@lab/modules/dist/modules/labcontroller/src/cockroachdb.js");
|
||||
const { labdManifests } = await import("@lab/modules/dist/modules/labcontroller/src/labd.js");
|
||||
const { bastionManifests } = await import("@lab/modules/dist/modules/labcontroller/src/bastion.js");
|
||||
|
||||
const crdb = cockroachDbManifests();
|
||||
const labd = labdManifests({ databaseUrl: crdb.connectionString });
|
||||
const bastion = bastionManifests();
|
||||
|
||||
const manifests = [
|
||||
crdb.namespace, crdb.headlessService, crdb.clientService, crdb.statefulSet,
|
||||
labd.service, labd.deployment,
|
||||
bastion.daemonSet,
|
||||
];
|
||||
|
||||
for (const manifest of manifests) {
|
||||
const json = JSON.stringify(manifest);
|
||||
const kind = (manifest as { kind?: string }).kind ?? "?";
|
||||
const name = ((manifest as { metadata?: { name?: string } }).metadata)?.name ?? "?";
|
||||
const result = await sshRunStreaming(ip, sshUser,
|
||||
`echo '${json.replace(/'/g, "'\\''")}' | sudo k3s kubectl apply -f -`,
|
||||
keyPath, hostname, mac,
|
||||
);
|
||||
if (result === 0) {
|
||||
emitStage("post-provision", `applied ${kind}/${name}`);
|
||||
} else {
|
||||
emitStage("error", `failed to apply ${kind}/${name}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Init CockroachDB
|
||||
const initJson = JSON.stringify(crdb.initJob);
|
||||
await sshRunStreaming(ip, sshUser,
|
||||
`echo '${initJson.replace(/'/g, "'\\''")}' | sudo k3s kubectl apply -f - 2>/dev/null; sleep 30; sudo k3s kubectl exec cockroachdb-0 -n lab-system -- /cockroach/cockroach sql --insecure -e 'CREATE DATABASE IF NOT EXISTS lab' 2>/dev/null || true`,
|
||||
keyPath, hostname, mac,
|
||||
);
|
||||
|
||||
emitStage("post-provision", `labcontroller stack deployed on ${hostname}`);
|
||||
} catch (err) {
|
||||
const errMsg = err instanceof Error ? err.message : String(err);
|
||||
emitStage("error", `failed to deploy labcontroller stack: ${errMsg}`);
|
||||
logger.error(`[post-provision] Run manually: labctl app labcontroller deploy ${hostname}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
emitStage("post-provision", `${hostname} (${ip}) provisioning complete (role: ${role})`);
|
||||
}
|
||||
28
bastion/src/bastion/src/services/progress-events.ts
Normal file
28
bastion/src/bastion/src/services/progress-events.ts
Normal file
@@ -0,0 +1,28 @@
|
||||
// In-memory event bus for provision progress updates.
|
||||
// Allows SSE clients to subscribe to real-time progress and log lines.
|
||||
|
||||
import { EventEmitter } from "node:events";
|
||||
|
||||
export interface ProgressEvent {
|
||||
mac: string;
|
||||
hostname: string;
|
||||
/** "log" for raw log lines, anything else is a progress stage name */
|
||||
stage: string;
|
||||
detail: string;
|
||||
timestamp: string;
|
||||
}
|
||||
|
||||
// Simple typed wrapper around EventEmitter for progress events.
|
||||
const _bus = new EventEmitter();
|
||||
|
||||
export const progressBus = {
|
||||
emit(event: ProgressEvent): void {
|
||||
_bus.emit("progress", event);
|
||||
},
|
||||
on(listener: (event: ProgressEvent) => void): void {
|
||||
_bus.on("progress", listener);
|
||||
},
|
||||
off(listener: (event: ProgressEvent) => void): void {
|
||||
_bus.off("progress", listener);
|
||||
},
|
||||
};
|
||||
69
bastion/src/bastion/src/services/state.ts
Normal file
69
bastion/src/bastion/src/services/state.ts
Normal file
@@ -0,0 +1,69 @@
|
||||
// JSON file-backed state management for discovered machines, install queue, and installed machines.
|
||||
|
||||
import { readFileSync, writeFileSync, renameSync, mkdirSync } from "node:fs";
|
||||
import { dirname } from "node:path";
|
||||
import type { BastionState } from "@lab/shared";
|
||||
|
||||
// Re-export types for consumers that import from this module
|
||||
export type { HardwareInfo, InstallConfig, InstalledInfo, BastionState } from "@lab/shared";
|
||||
|
||||
const EMPTY_STATE: BastionState = {
|
||||
discovered: {},
|
||||
install_queue: {},
|
||||
installed: {},
|
||||
};
|
||||
|
||||
export type StateChangeListener = (state: BastionState) => void;
|
||||
|
||||
export class StateManager {
|
||||
private changeListeners: StateChangeListener[] = [];
|
||||
|
||||
constructor(private readonly stateFile: string) {}
|
||||
|
||||
/** Register a listener that fires after every state update. */
|
||||
onChange(listener: StateChangeListener): void {
|
||||
this.changeListeners.push(listener);
|
||||
}
|
||||
|
||||
load(): BastionState {
|
||||
try {
|
||||
const raw = readFileSync(this.stateFile, "utf-8");
|
||||
const parsed = JSON.parse(raw) as Partial<BastionState>;
|
||||
return {
|
||||
discovered: parsed.discovered ?? {},
|
||||
install_queue: parsed.install_queue ?? {},
|
||||
installed: parsed.installed ?? {},
|
||||
};
|
||||
} catch {
|
||||
return { ...EMPTY_STATE };
|
||||
}
|
||||
}
|
||||
|
||||
save(state: BastionState): void {
|
||||
mkdirSync(dirname(this.stateFile), { recursive: true });
|
||||
const tmp = `${this.stateFile}.tmp`;
|
||||
writeFileSync(tmp, JSON.stringify(state, null, 2));
|
||||
renameSync(tmp, this.stateFile);
|
||||
}
|
||||
|
||||
init(): void {
|
||||
try {
|
||||
readFileSync(this.stateFile, "utf-8");
|
||||
} catch {
|
||||
this.save({ ...EMPTY_STATE });
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Atomically read, modify, and write state.
|
||||
*/
|
||||
update(fn: (state: BastionState) => void): BastionState {
|
||||
const state = this.load();
|
||||
fn(state);
|
||||
this.save(state);
|
||||
for (const listener of this.changeListeners) {
|
||||
try { listener(state); } catch { /* don't let listener errors break state updates */ }
|
||||
}
|
||||
return state;
|
||||
}
|
||||
}
|
||||
99
bastion/src/bastion/src/services/syslog-listener.ts
Normal file
99
bastion/src/bastion/src/services/syslog-listener.ts
Normal file
@@ -0,0 +1,99 @@
|
||||
// UDP syslog listener for receiving Anaconda install logs.
|
||||
// Anaconda's `logging --host` sends RFC 3164 syslog over UDP.
|
||||
// We parse the messages and route them to InstallLogBuffer.
|
||||
|
||||
import { createSocket, type Socket } from "node:dgram";
|
||||
import type { InstallLogBuffer } from "./install-log.js";
|
||||
import type { StateManager } from "./state.js";
|
||||
import { logger } from "./logger.js";
|
||||
|
||||
/**
|
||||
* Parse a BSD syslog (RFC 3164) message.
|
||||
* Format: <PRI>TIMESTAMP HOSTNAME APP[PID]: MESSAGE
|
||||
* Anaconda messages look like: <13>Mar 28 19:32:01 anaconda[1234]: some message
|
||||
*/
|
||||
function parseSyslogLine(raw: string): { program: string; message: string } {
|
||||
// Strip priority: <NN>
|
||||
const noPri = raw.replace(/^<\d+>/, "");
|
||||
// Try to extract program and message after the timestamp + hostname
|
||||
// RFC 3164: "Mon DD HH:MM:SS HOSTNAME PROGRAM[PID]: MESSAGE"
|
||||
const match = noPri.match(/^\w+\s+\d+\s+[\d:]+\s+\S+\s+(\S+?)(?:\[\d+\])?:\s*(.*)/);
|
||||
if (match) {
|
||||
return { program: match[1], message: match[2] };
|
||||
}
|
||||
// Fallback: just return the whole line
|
||||
return { program: "unknown", message: noPri.trim() };
|
||||
}
|
||||
|
||||
export class SyslogListener {
|
||||
private socket: Socket | null = null;
|
||||
private port: number;
|
||||
private installLog: InstallLogBuffer;
|
||||
private state: StateManager;
|
||||
|
||||
constructor(port: number, installLog: InstallLogBuffer, state: StateManager) {
|
||||
this.port = port;
|
||||
this.installLog = installLog;
|
||||
this.state = state;
|
||||
}
|
||||
|
||||
/** Resolve a source IP to a MAC address using the install queue. */
|
||||
private resolveIpToMac(ip: string): string | null {
|
||||
const currentState = this.state.load();
|
||||
|
||||
// Check install queue — machines being installed have an IP from DHCP
|
||||
for (const [mac, entry] of Object.entries(currentState.install_queue)) {
|
||||
// The progress callback sends IP in "complete" detail, but during install
|
||||
// we need to match by what we know. Check if any progress mentions this IP.
|
||||
if (entry.progress_detail?.includes(ip)) return mac;
|
||||
}
|
||||
|
||||
// Check installed machines
|
||||
for (const [mac, info] of Object.entries(currentState.installed)) {
|
||||
if (info.ip === ip) return mac;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Resolve a MAC to the hostname from install queue or installed state. */
|
||||
private resolveHostname(mac: string): string {
|
||||
const s = this.state.load();
|
||||
return s.install_queue[mac]?.hostname ?? s.installed[mac]?.hostname ?? mac;
|
||||
}
|
||||
|
||||
start(): void {
|
||||
this.socket = createSocket("udp4");
|
||||
|
||||
this.socket.on("message", (msg, rinfo) => {
|
||||
const raw = msg.toString("utf-8").trim();
|
||||
if (!raw) return;
|
||||
|
||||
const { program, message } = parseSyslogLine(raw);
|
||||
const mac = this.resolveIpToMac(rinfo.address);
|
||||
|
||||
if (mac) {
|
||||
const hostname = this.resolveHostname(mac);
|
||||
const line = program !== "unknown" ? `[${program}] ${message}` : message;
|
||||
this.installLog.append(mac, [line], hostname);
|
||||
}
|
||||
// If we can't resolve the IP, we still log it for debugging
|
||||
// but don't store it in the install log buffer
|
||||
});
|
||||
|
||||
this.socket.on("error", (err) => {
|
||||
logger.error(`Syslog listener error: ${err.message}`);
|
||||
});
|
||||
|
||||
this.socket.bind(this.port, "0.0.0.0", () => {
|
||||
logger.info(`Syslog listener on UDP :${this.port}`);
|
||||
});
|
||||
}
|
||||
|
||||
stop(): void {
|
||||
if (this.socket) {
|
||||
this.socket.close();
|
||||
this.socket = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
93
bastion/src/bastion/src/templates/boot.ipxe.ts
Normal file
93
bastion/src/bastion/src/templates/boot.ipxe.ts
Normal file
@@ -0,0 +1,93 @@
|
||||
// iPXE boot script templates for dispatch routing.
|
||||
|
||||
export interface BootIpxeParams {
|
||||
serverIp: string;
|
||||
httpPort: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initial iPXE boot script that chains to the dispatch endpoint.
|
||||
* This is what dnsmasq serves to iPXE clients via HTTP.
|
||||
*/
|
||||
export function renderBootIpxe(params: BootIpxeParams): string {
|
||||
return `#!ipxe
|
||||
|
||||
echo
|
||||
echo ============================================
|
||||
echo Lab PXE Bastion
|
||||
echo Contacting server for instructions...
|
||||
echo ============================================
|
||||
echo
|
||||
|
||||
chain http://${params.serverIp}:${params.httpPort}/dispatch?mac=\${net0/mac}
|
||||
`;
|
||||
}
|
||||
|
||||
/**
|
||||
* iPXE script for discovery mode -- boots Fedora installer with discovery kickstart.
|
||||
*/
|
||||
export function renderDiscoverIpxe(params: {
|
||||
mac: string;
|
||||
serverIp: string;
|
||||
httpPort: number;
|
||||
fedoraMirror: string;
|
||||
}): string {
|
||||
return `#!ipxe
|
||||
|
||||
echo
|
||||
echo =============================================
|
||||
echo Lab PXE Bastion - DISCOVERY MODE
|
||||
echo MAC: ${params.mac}
|
||||
echo Collecting hardware info...
|
||||
echo =============================================
|
||||
echo
|
||||
|
||||
kernel http://${params.serverIp}:${params.httpPort}/vmlinuz inst.ks=http://${params.serverIp}:${params.httpPort}/discover.ks inst.stage2=${params.fedoraMirror} inst.text console=ttyS0,115200n8 console=tty0
|
||||
initrd http://${params.serverIp}:${params.httpPort}/initrd.img
|
||||
boot
|
||||
`;
|
||||
}
|
||||
|
||||
/**
|
||||
* iPXE script for install mode -- boots Fedora installer with per-MAC kickstart.
|
||||
*/
|
||||
export function renderInstallIpxe(params: {
|
||||
mac: string;
|
||||
hostname: string;
|
||||
serverIp: string;
|
||||
httpPort: number;
|
||||
fedoraVersion: string;
|
||||
fedoraMirror: string;
|
||||
}): string {
|
||||
return `#!ipxe
|
||||
|
||||
echo
|
||||
echo =============================================
|
||||
echo Lab PXE Bastion - INSTALLING Fedora ${params.fedoraVersion}
|
||||
echo Target: ${params.hostname}
|
||||
echo MAC: ${params.mac}
|
||||
echo =============================================
|
||||
echo
|
||||
|
||||
kernel http://${params.serverIp}:${params.httpPort}/vmlinuz inst.ks=http://${params.serverIp}:${params.httpPort}/ks?mac=${params.mac} inst.repo=${params.fedoraMirror} inst.text console=ttyS0,115200n8 console=tty0
|
||||
initrd http://${params.serverIp}:${params.httpPort}/initrd.img
|
||||
boot
|
||||
`;
|
||||
}
|
||||
|
||||
/**
|
||||
* iPXE script for already-installed machines -- exits to boot from local disk.
|
||||
*/
|
||||
export function renderLocalBootIpxe(hostname: string): string {
|
||||
return `#!ipxe
|
||||
|
||||
echo
|
||||
echo =============================================
|
||||
echo Lab PXE Bastion - ${hostname}
|
||||
echo Already installed, booting from local disk
|
||||
echo =============================================
|
||||
echo
|
||||
sleep 3
|
||||
exit 1
|
||||
`;
|
||||
}
|
||||
118
bastion/src/bastion/src/templates/discover.ks.ts
Normal file
118
bastion/src/bastion/src/templates/discover.ks.ts
Normal file
@@ -0,0 +1,118 @@
|
||||
// Discovery kickstart template.
|
||||
// Boots Fedora installer, collects hardware info, POSTs to bastion, reboots.
|
||||
// Never touches the disk.
|
||||
|
||||
export interface DiscoverKickstartParams {
|
||||
serverIp: string;
|
||||
httpPort: number;
|
||||
}
|
||||
|
||||
export function renderDiscoverKickstart(params: DiscoverKickstartParams): string {
|
||||
const bastionUrl = `http://${params.serverIp}:${params.httpPort}`;
|
||||
|
||||
return `# Lab Bastion -- Discovery Mode
|
||||
# Collects hardware inventory and reboots. Does NOT install anything.
|
||||
|
||||
%pre --erroronfail --log=/tmp/discover.log
|
||||
#!/bin/bash
|
||||
set -x
|
||||
|
||||
# -- Collect hardware info from /proc, /sys, and available tools --
|
||||
|
||||
MAC=$(ip link show | awk '/ether/ && !/00:00:00:00/ {print $2; exit}')
|
||||
PRODUCT=$(cat /sys/class/dmi/id/product_name 2>/dev/null || echo "unknown")
|
||||
BOARD=$(cat /sys/class/dmi/id/board_name 2>/dev/null || echo "unknown")
|
||||
SERIAL=$(cat /sys/class/dmi/id/product_serial 2>/dev/null || echo "unknown")
|
||||
MANUFACTURER=$(cat /sys/class/dmi/id/sys_vendor 2>/dev/null || echo "unknown")
|
||||
CPUMODEL=$(grep -m1 'model name' /proc/cpuinfo | cut -d: -f2 | sed 's/^ //')
|
||||
CPUCORES=$(grep -c '^processor' /proc/cpuinfo)
|
||||
MEMGB=$(awk '/MemTotal/ {printf "%d", $2/1024/1024}' /proc/meminfo)
|
||||
ARCHTYPE=$(uname -m)
|
||||
|
||||
# Disk info
|
||||
DISKS_JSON=$(lsblk -Jb -o NAME,SIZE,TYPE,MODEL 2>/dev/null | python3 -c "
|
||||
import sys, json
|
||||
data = json.load(sys.stdin)
|
||||
disks = [d for d in data.get('blockdevices', []) if d.get('type') == 'disk']
|
||||
result = []
|
||||
for d in disks:
|
||||
size_gb = round(int(d.get('size', 0)) / 1073741824, 1)
|
||||
result.append({
|
||||
'name': d.get('name', '?'),
|
||||
'size_gb': size_gb,
|
||||
'model': (d.get('model') or 'unknown').strip()
|
||||
})
|
||||
print(json.dumps(result))
|
||||
" 2>/dev/null || echo '[]')
|
||||
|
||||
# Network interfaces
|
||||
NICS_JSON=$(ip -j link show 2>/dev/null | python3 -c "
|
||||
import sys, json
|
||||
nics = json.load(sys.stdin)
|
||||
result = []
|
||||
for n in nics:
|
||||
if n.get('link_type') == 'loopback':
|
||||
continue
|
||||
result.append({
|
||||
'name': n.get('ifname', '?'),
|
||||
'mac': n.get('address', '?'),
|
||||
'state': n.get('operstate', '?')
|
||||
})
|
||||
print(json.dumps(result))
|
||||
" 2>/dev/null || echo '[]')
|
||||
|
||||
# -- Build and POST discovery payload --
|
||||
|
||||
PAYLOAD=$(python3 -c "
|
||||
import json
|
||||
print(json.dumps({
|
||||
'mac': '$MAC',
|
||||
'product': '$PRODUCT',
|
||||
'board': '$BOARD',
|
||||
'serial': '$SERIAL',
|
||||
'manufacturer': '$MANUFACTURER',
|
||||
'cpu_model': '$CPUMODEL',
|
||||
'cpu_cores': int('$CPUCORES' or 0),
|
||||
'memory_gb': int('$MEMGB' or 0),
|
||||
'arch': '$ARCHTYPE',
|
||||
'disks': $DISKS_JSON,
|
||||
'nics': $NICS_JSON
|
||||
}))
|
||||
")
|
||||
|
||||
# POST to bastion
|
||||
BASTION_URL="${bastionUrl}/api/discover"
|
||||
|
||||
if command -v curl >/dev/null 2>&1; then
|
||||
curl -sf -X POST "$BASTION_URL" \\
|
||||
-H "Content-Type: application/json" \\
|
||||
-d "$PAYLOAD" || true
|
||||
else
|
||||
python3 -c "
|
||||
import urllib.request
|
||||
req = urllib.request.Request('$BASTION_URL',
|
||||
data=b'''$PAYLOAD''',
|
||||
headers={'Content-Type': 'application/json'})
|
||||
try:
|
||||
urllib.request.urlopen(req, timeout=10)
|
||||
except Exception as e:
|
||||
print(f'POST failed: {e}')
|
||||
"
|
||||
fi
|
||||
|
||||
# -- Reboot -- do NOT let Anaconda proceed --
|
||||
echo ""
|
||||
echo "=== Discovery complete, rebooting ==="
|
||||
echo ""
|
||||
sleep 3
|
||||
echo 1 > /proc/sys/kernel/sysrq
|
||||
echo b > /proc/sysrq-trigger
|
||||
sleep 5
|
||||
reboot -f
|
||||
|
||||
%end
|
||||
|
||||
# Anaconda should never get here, but just in case:
|
||||
reboot
|
||||
`;
|
||||
}
|
||||
97
bastion/src/bastion/src/templates/dnsmasq.conf.ts
Normal file
97
bastion/src/bastion/src/templates/dnsmasq.conf.ts
Normal file
@@ -0,0 +1,97 @@
|
||||
// dnsmasq configuration template.
|
||||
// Supports proxy DHCP mode (alongside existing DHCP) and full DHCP mode.
|
||||
// Handles UEFI HTTP Boot, iPXE chainloading, and PXE service directives.
|
||||
|
||||
import type { BastionConfig } from "@lab/shared";
|
||||
|
||||
export function renderDnsmasqConf(config: BastionConfig): string {
|
||||
const {
|
||||
iface,
|
||||
serverIp,
|
||||
httpPort,
|
||||
network,
|
||||
gateway,
|
||||
dhcpMode,
|
||||
tftpDir,
|
||||
} = config;
|
||||
|
||||
// Derive DHCP range for full mode
|
||||
let dhcpRangeStart = config.dhcpRangeStart;
|
||||
let dhcpRangeEnd = config.dhcpRangeEnd;
|
||||
if (dhcpMode === "full") {
|
||||
const networkBase = network.replace(/\.0$/, "");
|
||||
dhcpRangeStart = dhcpRangeStart || `${networkBase}.100`;
|
||||
dhcpRangeEnd = dhcpRangeEnd || `${networkBase}.200`;
|
||||
}
|
||||
|
||||
const dhcpSection = dhcpMode === "full"
|
||||
? `# Full DHCP mode -- bastion is the only DHCP server on this network
|
||||
dhcp-range=${dhcpRangeStart},${dhcpRangeEnd},255.255.255.0,12h
|
||||
dhcp-option=3,${gateway}
|
||||
dhcp-option=6,${gateway}`
|
||||
: `# ProxyDHCP -- works alongside existing DHCP (UniFi etc)
|
||||
dhcp-range=${network},proxy`;
|
||||
|
||||
return `# Lab PXE Bastion -- dnsmasq config
|
||||
|
||||
# Disable DNS (we only want DHCP/TFTP)
|
||||
port=0
|
||||
|
||||
# Listen on the right interface
|
||||
interface=${iface}
|
||||
bind-dynamic
|
||||
|
||||
${dhcpSection}
|
||||
|
||||
# TFTP for initial PXE boot
|
||||
enable-tftp
|
||||
tftp-root=${tftpDir}
|
||||
tftp-no-blocksize
|
||||
|
||||
# Detect client architecture -- PXE (TFTP) clients
|
||||
dhcp-match=set:bios,option:client-arch,0
|
||||
dhcp-match=set:efi-x86_64,option:client-arch,7
|
||||
dhcp-match=set:efi-x86_64,option:client-arch,9
|
||||
dhcp-match=set:efi-arm64,option:client-arch,11
|
||||
|
||||
# Detect client architecture -- UEFI HTTP Boot clients (no TFTP size limit)
|
||||
dhcp-match=set:httpboot-x86_64,option:client-arch,16
|
||||
dhcp-match=set:httpboot-arm64,option:client-arch,20
|
||||
|
||||
# Detect iPXE clients (already chainloaded)
|
||||
dhcp-userclass=set:ipxe,iPXE
|
||||
|
||||
# UEFI HTTP Boot -> serve full iPXE EFI via HTTP (no TFTP size limit)
|
||||
dhcp-boot=tag:httpboot-x86_64,http://${serverIp}:${httpPort}/ipxe.efi
|
||||
dhcp-boot=tag:httpboot-arm64,http://${serverIp}:${httpPort}/ipxe-arm64.efi
|
||||
# Echo vendor class back to HTTP Boot clients (required by UEFI HTTP Boot spec)
|
||||
dhcp-option-force=tag:httpboot-x86_64,60,HTTPClient
|
||||
dhcp-option-force=tag:httpboot-arm64,60,HTTPClient
|
||||
|
||||
# First PXE boot -> serve iPXE binary via TFTP (BIOS and UEFI fallback)
|
||||
dhcp-boot=tag:bios,tag:!ipxe,undionly.kpxe
|
||||
dhcp-boot=tag:efi-x86_64,tag:!ipxe,ipxe.efi
|
||||
dhcp-boot=tag:efi-arm64,tag:!ipxe,ipxe-arm64.efi
|
||||
# Echo vendor class back to PXE clients (OVMF requires this, real hardware usually doesn't)
|
||||
dhcp-option-force=tag:efi-x86_64,60,PXEClient
|
||||
dhcp-option-force=tag:efi-arm64,60,PXEClient
|
||||
dhcp-option-force=tag:bios,60,PXEClient
|
||||
|
||||
# iPXE clients -> chain to boot script via HTTP
|
||||
dhcp-boot=tag:ipxe,http://${serverIp}:${httpPort}/boot.ipxe
|
||||
|
||||
${dhcpMode === "proxy" ? `# PXE service directives (proxy DHCP needs these to respond on port 4011)
|
||||
pxe-service=tag:!ipxe,x86PC,"PXE Boot",undionly.kpxe
|
||||
pxe-service=tag:!ipxe,X86-64_EFI,"PXE Boot",ipxe.efi
|
||||
pxe-service=tag:!ipxe,BC_EFI,"PXE Boot",ipxe.efi
|
||||
pxe-service=tag:!ipxe,ARM64_EFI,"PXE Boot",ipxe-arm64.efi` : `# Full DHCP mode -- pxe-service directives omitted (they trigger PXE Boot Server
|
||||
# Discovery protocol which some UEFI implementations don't support). The dhcp-boot
|
||||
# directives above provide the boot filename directly in the DHCP offer.`}
|
||||
|
||||
# Lease file in bastion directory (avoid default /var/lib/dnsmasq which needs root)
|
||||
dhcp-leasefile=${config.bastionDir}/dnsmasq.leases
|
||||
|
||||
# Verbose logging
|
||||
log-dhcp
|
||||
`;
|
||||
}
|
||||
427
bastion/src/bastion/src/templates/install.ks.ts
Normal file
427
bastion/src/bastion/src/templates/install.ks.ts
Normal file
@@ -0,0 +1,427 @@
|
||||
// Install kickstart template.
|
||||
// Full Fedora server install with LVM partitioning, %pre for reprovision detection,
|
||||
// packages, and %post with SSH keys, user creation, k3s prereqs, progress callbacks.
|
||||
|
||||
import type { Role } from "@lab/shared";
|
||||
|
||||
export interface InstallKickstartParams {
|
||||
hostname: string;
|
||||
disk: string;
|
||||
role: Role;
|
||||
domain: string;
|
||||
fedoraVersion: string;
|
||||
timezone: string;
|
||||
locale: string;
|
||||
serverIp: string;
|
||||
httpPort: number;
|
||||
syslogPort: number;
|
||||
sshKeys: string[];
|
||||
adminUser: string;
|
||||
}
|
||||
|
||||
export function renderInstallKickstart(params: InstallKickstartParams): string {
|
||||
const {
|
||||
hostname,
|
||||
disk,
|
||||
role,
|
||||
domain,
|
||||
fedoraVersion,
|
||||
timezone,
|
||||
locale,
|
||||
serverIp,
|
||||
httpPort,
|
||||
syslogPort,
|
||||
sshKeys,
|
||||
adminUser,
|
||||
} = params;
|
||||
|
||||
const fqdn = domain ? `${hostname}.${domain}` : hostname;
|
||||
const vg = "labvg";
|
||||
const now = new Date().toISOString();
|
||||
const hasLonghorn = role === "worker";
|
||||
const hasRancher = role === "infra";
|
||||
const isVanilla = role === "vanilla";
|
||||
|
||||
// -- Auth section --
|
||||
// Always set a root password (for serial console debugging) + SSH keys
|
||||
const auth = sshKeys.length > 0
|
||||
? `rootpw --plaintext lab-root-pw\nsshkey --username=root "${sshKeys[0]}"`
|
||||
: "rootpw --plaintext lab-root-pw";
|
||||
|
||||
// -- Admin user directive --
|
||||
const userDirective = adminUser
|
||||
? `user --name=${adminUser} --groups=wheel --lock`
|
||||
: "";
|
||||
|
||||
// -- SSH keys for %post --
|
||||
const allKeys = sshKeys.join("\n");
|
||||
let sshPostBlock = "";
|
||||
if (sshKeys.length > 0) {
|
||||
sshPostBlock = `
|
||||
# Set up SSH keys for root
|
||||
mkdir -p /root/.ssh && chmod 700 /root/.ssh
|
||||
cat > /root/.ssh/authorized_keys << 'SSHKEYS'
|
||||
${allKeys}
|
||||
SSHKEYS
|
||||
chmod 600 /root/.ssh/authorized_keys`;
|
||||
}
|
||||
|
||||
if (adminUser && sshKeys.length > 0) {
|
||||
sshPostBlock += `
|
||||
|
||||
# Set up SSH keys for ${adminUser}
|
||||
ADMIN_HOME=$(getent passwd ${adminUser} | cut -d: -f6)
|
||||
mkdir -p "$ADMIN_HOME/.ssh" && chmod 700 "$ADMIN_HOME/.ssh"
|
||||
cp /root/.ssh/authorized_keys "$ADMIN_HOME/.ssh/authorized_keys"
|
||||
chown -R ${adminUser}:${adminUser} "$ADMIN_HOME/.ssh"
|
||||
chmod 600 "$ADMIN_HOME/.ssh/authorized_keys"
|
||||
|
||||
# Fix SELinux contexts for SSH
|
||||
restorecon -R /root/.ssh "$ADMIN_HOME/.ssh" 2>/dev/null || true
|
||||
|
||||
# Passwordless sudo for ${adminUser}
|
||||
echo '${adminUser} ALL=(ALL) NOPASSWD: ALL' > /etc/sudoers.d/${adminUser}
|
||||
chmod 440 /etc/sudoers.d/${adminUser}`;
|
||||
}
|
||||
|
||||
// -- Disk detection --
|
||||
const diskLine = disk
|
||||
? `DISK="${disk}"`
|
||||
: `DISK=""
|
||||
for d in /dev/nvme0n1 /dev/sda /dev/vda; do
|
||||
[ -b "$d" ] && { DISK="$(basename $d)"; break; }
|
||||
done
|
||||
[ -z "$DISK" ] && { echo "ERROR: no disk found"; exit 1; }`;
|
||||
|
||||
// -- Longhorn LV for fresh install --
|
||||
const longhornFreshLine = hasLonghorn
|
||||
? `logvol /var/lib/longhorn --vgname=${vg} --name=longhorn --fstype=xfs --grow --size=1`
|
||||
: "";
|
||||
|
||||
// -- Rancher LV for fresh install (infra role) --
|
||||
const rancherFreshLine = hasRancher
|
||||
? `logvol /var/lib/rancher --vgname=${vg} --name=rancher --fstype=xfs --size=20480`
|
||||
: "";
|
||||
|
||||
return `# Lab Bastion -- Fedora ${fedoraVersion} server install
|
||||
# Generated: ${now}
|
||||
# Target: ${fqdn} (role=${role})
|
||||
|
||||
text
|
||||
reboot
|
||||
|
||||
lang ${locale}
|
||||
keyboard uk
|
||||
timezone ${timezone} --utc
|
||||
|
||||
network --bootproto=dhcp --activate --hostname=${fqdn}
|
||||
|
||||
${auth}
|
||||
${userDirective}
|
||||
|
||||
bootloader --append="console=tty0 console=ttyS0,115200n8"
|
||||
|
||||
logging --host=${serverIp} --port=${syslogPort}
|
||||
|
||||
url --mirrorlist=https://mirrors.fedoraproject.org/mirrorlist?repo=fedora-$releasever&arch=$basearch
|
||||
|
||||
# Partitioning is generated dynamically by %pre (supports reprovision preservation)
|
||||
%include /tmp/part.ks
|
||||
|
||||
%pre --log=/tmp/pre-partition.log
|
||||
#!/bin/bash
|
||||
set -x
|
||||
|
||||
# Progress callback helper
|
||||
bastion_progress() {
|
||||
local stage="$1" detail="\${2:-}"
|
||||
local mac=$(ip link show | awk '/ether/ && !/00:00:00:00/ {print $2; exit}')
|
||||
curl -sf -X POST "http://${serverIp}:${httpPort}/api/progress" \\
|
||||
-H "Content-Type: application/json" \\
|
||||
-d "{\\"mac\\":\\"$mac\\",\\"stage\\":\\"$stage\\",\\"detail\\":\\"$detail\\"}" 2>/dev/null || true
|
||||
}
|
||||
|
||||
bastion_progress "partitioning" "detecting disk"
|
||||
|
||||
VG="${vg}"
|
||||
${diskLine}
|
||||
|
||||
REPROVISION=no
|
||||
|
||||
# Check if VG exists (reprovision scenario)
|
||||
if vgs $VG &>/dev/null; then
|
||||
echo "=== Existing VG found - reprovision mode ==="
|
||||
REPROVISION=yes
|
||||
|
||||
# Detect which data LVs to preserve
|
||||
PRESERVE_LONGHORN=no; PRESERVE_SRV=no; PRESERVE_HOME=no; PRESERVE_RANCHER=no
|
||||
lvs $VG/longhorn &>/dev/null && PRESERVE_LONGHORN=yes
|
||||
lvs $VG/srv &>/dev/null && PRESERVE_SRV=yes
|
||||
lvs $VG/home &>/dev/null && PRESERVE_HOME=yes
|
||||
lvs $VG/rancher &>/dev/null && PRESERVE_RANCHER=yes
|
||||
|
||||
echo "Preserving: longhorn=$PRESERVE_LONGHORN srv=$PRESERVE_SRV home=$PRESERVE_HOME rancher=$PRESERVE_RANCHER"
|
||||
|
||||
# Remove only OS logical volumes (keep data LVs)
|
||||
for lv in root var varlog swap; do
|
||||
lvremove -f $VG/$lv 2>/dev/null || true
|
||||
done
|
||||
else
|
||||
bastion_progress "partitioning" "fresh install on $DISK"
|
||||
fi
|
||||
|
||||
if [ "$REPROVISION" = "yes" ]; then
|
||||
# Find existing boot partitions by type
|
||||
EFI_PART=$(blkid -t TYPE=vfat -o device /dev/\${DISK}* 2>/dev/null | head -1)
|
||||
BOOT_PART=$(blkid -t TYPE=ext4 -o device /dev/\${DISK}* 2>/dev/null | head -1)
|
||||
EFI_PART=\${EFI_PART:-/dev/\${DISK}1}
|
||||
BOOT_PART=\${BOOT_PART:-/dev/\${DISK}2}
|
||||
echo "Reusing EFI=$EFI_PART BOOT=$BOOT_PART"
|
||||
|
||||
# Build partition config reusing existing PV/VG
|
||||
cat > /tmp/part.ks << PARTEOF
|
||||
ignoredisk --only-use=$DISK
|
||||
clearpart --none
|
||||
part /boot/efi --onpart=$EFI_PART --fstype=efi
|
||||
part /boot --onpart=$BOOT_PART --fstype=ext4
|
||||
volgroup ${vg} --useexisting --noformat
|
||||
logvol swap --vgname=${vg} --name=swap --fstype=swap --size=27648
|
||||
logvol / --vgname=${vg} --name=root --fstype=xfs --size=33792
|
||||
logvol /var --vgname=${vg} --name=var --fstype=xfs --size=102400
|
||||
logvol /var/log --vgname=${vg} --name=varlog --fstype=xfs --size=10240
|
||||
PARTEOF
|
||||
|
||||
# Preserve or recreate data LVs
|
||||
if [ "$PRESERVE_HOME" = "yes" ]; then
|
||||
echo "logvol /home --vgname=${vg} --name=home --useexisting --noformat" >> /tmp/part.ks
|
||||
else
|
||||
echo "logvol /home --vgname=${vg} --name=home --fstype=xfs --size=10240" >> /tmp/part.ks
|
||||
fi
|
||||
|
||||
if [ "$PRESERVE_SRV" = "yes" ]; then
|
||||
echo "logvol /srv --vgname=${vg} --name=srv --useexisting --noformat" >> /tmp/part.ks
|
||||
else
|
||||
echo "logvol /srv --vgname=${vg} --name=srv --fstype=xfs --size=20480" >> /tmp/part.ks
|
||||
fi
|
||||
|
||||
if [ "$PRESERVE_LONGHORN" = "yes" ]; then
|
||||
echo "logvol /var/lib/longhorn --vgname=${vg} --name=longhorn --useexisting --noformat" >> /tmp/part.ks
|
||||
fi
|
||||
|
||||
if [ "$PRESERVE_RANCHER" = "yes" ]; then
|
||||
echo "logvol /var/lib/rancher --vgname=${vg} --name=rancher --useexisting --noformat" >> /tmp/part.ks
|
||||
fi
|
||||
|
||||
else
|
||||
# Fresh install
|
||||
cat > /tmp/part.ks << PARTEOF
|
||||
ignoredisk --only-use=$DISK
|
||||
clearpart --all --initlabel --drives=$DISK
|
||||
part /boot/efi --fstype=efi --size=600 --ondisk=$DISK
|
||||
part /boot --fstype=ext4 --size=3072 --ondisk=$DISK
|
||||
part pv.01 --size=1 --grow --ondisk=$DISK
|
||||
volgroup ${vg} pv.01
|
||||
logvol swap --vgname=${vg} --name=swap --fstype=swap --size=27648
|
||||
logvol / --vgname=${vg} --name=root --fstype=xfs --size=33792
|
||||
logvol /var --vgname=${vg} --name=var --fstype=xfs --size=102400
|
||||
logvol /var/log --vgname=${vg} --name=varlog --fstype=xfs --size=10240
|
||||
logvol /home --vgname=${vg} --name=home --fstype=xfs --size=10240
|
||||
logvol /srv --vgname=${vg} --name=srv --fstype=xfs --size=20480
|
||||
${longhornFreshLine}
|
||||
${rancherFreshLine}
|
||||
PARTEOF
|
||||
fi
|
||||
|
||||
echo "=== Generated partition config ==="
|
||||
cat /tmp/part.ks
|
||||
echo "==================================="
|
||||
|
||||
bastion_progress "partitioning" "disk layout ready"
|
||||
|
||||
%end
|
||||
|
||||
%packages
|
||||
@core
|
||||
openssh-server
|
||||
vim-enhanced
|
||||
tmux
|
||||
git
|
||||
curl
|
||||
wget
|
||||
python3
|
||||
lshw
|
||||
dmidecode
|
||||
dnf-plugins-core
|
||||
|
||||
# Networking and diagnostics
|
||||
NetworkManager
|
||||
bind-utils
|
||||
net-tools
|
||||
iproute
|
||||
iputils
|
||||
traceroute
|
||||
tcpdump
|
||||
htop
|
||||
iotop
|
||||
strace
|
||||
jq
|
||||
|
||||
${isVanilla ? "# vanilla role -- skipping k3s prerequisites" : `# k3s prerequisites
|
||||
container-selinux
|
||||
iptables-nft
|
||||
nftables
|
||||
policycoreutils-python-utils
|
||||
chrony
|
||||
tar
|
||||
socat
|
||||
conntrack-tools
|
||||
ethtool`}
|
||||
|
||||
# Boot management
|
||||
efibootmgr
|
||||
|
||||
# Puppet prerequisites
|
||||
ruby
|
||||
ruby-libs
|
||||
|
||||
# Exclude desktop
|
||||
-@workstation-product
|
||||
-@gnome-desktop
|
||||
-gnome-shell
|
||||
-gdm
|
||||
-PackageKit
|
||||
-PackageKit-glib
|
||||
%end
|
||||
|
||||
%post --log=/root/bastion-post-install.log
|
||||
#!/bin/bash
|
||||
set -x
|
||||
|
||||
# Progress callback helper
|
||||
bastion_progress() {
|
||||
local stage="$1" detail="\${2:-}"
|
||||
local mac=$(ip link show | awk '/ether/ && !/00:00:00:00/ {print $2; exit}')
|
||||
curl -sf -X POST "http://${serverIp}:${httpPort}/api/progress" \\
|
||||
-H "Content-Type: application/json" \\
|
||||
-d "{\\"mac\\":\\"$mac\\",\\"stage\\":\\"$stage\\",\\"detail\\":\\"$detail\\"}" 2>/dev/null || true
|
||||
}
|
||||
|
||||
# Send log lines to bastion
|
||||
bastion_log() {
|
||||
local line="$1"
|
||||
local mac=$(ip link show | awk '/ether/ && !/00:00:00:00/ {print $2; exit}')
|
||||
curl -sf -X POST "http://${serverIp}:${httpPort}/api/log" \\
|
||||
-H "Content-Type: application/json" \\
|
||||
-d "{\\"mac\\":\\"$mac\\",\\"line\\":\\"$(echo "$line" | sed 's/\\\\/\\\\\\\\/g; s/"/\\\\"/g')\\"}\" \\
|
||||
--connect-timeout 5 --max-time 10 2>/dev/null || true
|
||||
}
|
||||
|
||||
# Send an error stage to bastion
|
||||
bastion_error() {
|
||||
local detail="$1"
|
||||
bastion_progress "error" "$detail"
|
||||
}
|
||||
|
||||
# --- Error trap: catch any failure and report to bastion ---
|
||||
_post_error_handler() {
|
||||
local exit_code=$? lineno=$1
|
||||
bastion_error "%post failed at line $lineno (exit $exit_code)"
|
||||
}
|
||||
trap '_post_error_handler $LINENO' ERR
|
||||
|
||||
bastion_progress "post-install" "configuring system"
|
||||
|
||||
# -- SSH --
|
||||
systemctl enable --now sshd
|
||||
sed -i 's/^#\\?PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config
|
||||
sed -i 's/^#\\?PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
|
||||
${sshPostBlock}
|
||||
|
||||
# -- Hostname and domain --
|
||||
hostnamectl set-hostname ${fqdn}
|
||||
|
||||
# -- tmpfs for /tmp --
|
||||
echo "tmpfs /tmp tmpfs defaults,noatime,nosuid,nodev,size=4G 0 0" >> /etc/fstab
|
||||
|
||||
${isVanilla ? `# -- vanilla role: skip k3s kernel/sysctl/firewall setup --
|
||||
# -- Enable chronyd for time sync --
|
||||
systemctl enable chronyd || true
|
||||
|
||||
# -- Serial console (for debugging — auto-login as root on ttyS0) --
|
||||
# AWS EC2 compatible: ttyS0 @ 115200n8
|
||||
systemctl enable serial-getty@ttyS0.service || true
|
||||
|
||||
# -- Forward all system logs to serial console --
|
||||
cat > /etc/rsyslog.d/serial-console.conf << 'RSYSLOG'
|
||||
*.* /dev/ttyS0
|
||||
RSYSLOG
|
||||
systemctl enable rsyslog || true` : `# -- Kernel modules for k3s --
|
||||
cat > /etc/modules-load.d/k3s.conf << 'MODULES'
|
||||
br_netfilter
|
||||
overlay
|
||||
ip_conntrack
|
||||
MODULES
|
||||
modprobe br_netfilter || true
|
||||
modprobe overlay || true
|
||||
|
||||
# -- Sysctl for k3s networking --
|
||||
cat > /etc/sysctl.d/90-k3s.conf << 'SYSCTL'
|
||||
net.bridge.bridge-nf-call-iptables = 1
|
||||
net.bridge.bridge-nf-call-ip6tables = 1
|
||||
net.ipv4.ip_forward = 1
|
||||
net.ipv6.conf.all.forwarding = 1
|
||||
fs.inotify.max_user_instances = 524288
|
||||
fs.inotify.max_user_watches = 1048576
|
||||
SYSCTL
|
||||
sysctl --system || true
|
||||
|
||||
# -- Disable firewalld permanently (k3s/Cilium manage iptables directly) --
|
||||
systemctl disable --now firewalld || true
|
||||
systemctl mask firewalld || true
|
||||
|
||||
# -- Enable chronyd for time sync --
|
||||
systemctl enable chronyd || true`}
|
||||
|
||||
# -- Boot order: restore network first (Anaconda sets disk first, we undo it) --
|
||||
# Network boot must stay first so the bastion intercepts every reboot.
|
||||
if command -v efibootmgr >/dev/null 2>&1; then
|
||||
PXE_ENTRY=$(efibootmgr | grep -iE 'network|pxe|ipv4|ipv6|http' | head -1 | grep -oP 'Boot\\K[0-9A-F]+')
|
||||
if [ -n "$PXE_ENTRY" ]; then
|
||||
CURRENT_ORDER=$(efibootmgr | grep BootOrder | cut -d: -f2 | tr -d ' ')
|
||||
REST=$(echo "$CURRENT_ORDER" | sed "s/$PXE_ENTRY,\\\\?//;s/,$//" | sed 's/^,//')
|
||||
NEW_ORDER="$PXE_ENTRY,$REST"
|
||||
efibootmgr -o "$NEW_ORDER" || true
|
||||
fi
|
||||
fi
|
||||
|
||||
# -- Provisioning metadata --
|
||||
cat > /etc/lab-provisioned << PROVEOF
|
||||
hostname: ${fqdn}
|
||||
role: ${role}
|
||||
provisioned: $(date -Iseconds)
|
||||
bastion: ${serverIp}
|
||||
PROVEOF
|
||||
|
||||
cat > /root/README << 'README'
|
||||
# Lab Node -- ${fqdn} (role: ${role})
|
||||
#
|
||||
# Next steps:
|
||||
# 1. Install puppet agent:
|
||||
# dnf install -y puppet-agent
|
||||
#
|
||||
# 2. Install k3s:
|
||||
# curl -sfL https://get.k3s.io | sh -
|
||||
#
|
||||
# 3. Or join existing cluster:
|
||||
# curl -sfL https://get.k3s.io | K3S_URL=https://<server>:6443 K3S_TOKEN=<token> sh -
|
||||
README
|
||||
|
||||
${hasRancher ? `# Install k3s server (skip start - will be configured manually)
|
||||
curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true sh -
|
||||
` : ""}
|
||||
IP_ADDR=$(ip -4 addr show | awk '/inet / && !/127.0.0/ {split($2,a,"/"); print a[1]; exit}')
|
||||
bastion_progress "complete" "ready at $IP_ADDR"
|
||||
|
||||
%end
|
||||
`;
|
||||
}
|
||||
299
bastion/src/bastion/src/templates/ubuntu-autoinstall.ts
Normal file
299
bastion/src/bastion/src/templates/ubuntu-autoinstall.ts
Normal file
@@ -0,0 +1,299 @@
|
||||
// Ubuntu autoinstall template (cloud-init).
|
||||
// Equivalent of the Fedora kickstart: LVM partitioning, packages,
|
||||
// SSH keys, k3s prereqs, progress callbacks.
|
||||
|
||||
export interface UbuntuAutoinstallParams {
|
||||
hostname: string;
|
||||
disk: string;
|
||||
role: string; // "vanilla" | "worker" | "infra"
|
||||
domain: string;
|
||||
ubuntuVersion: string;
|
||||
timezone: string;
|
||||
locale: string;
|
||||
serverIp: string;
|
||||
httpPort: number;
|
||||
sshKeys: string[];
|
||||
adminUser: string;
|
||||
}
|
||||
|
||||
export function renderUbuntuAutoinstall(params: UbuntuAutoinstallParams): string {
|
||||
const {
|
||||
hostname,
|
||||
disk,
|
||||
role,
|
||||
domain,
|
||||
timezone,
|
||||
serverIp,
|
||||
httpPort,
|
||||
sshKeys,
|
||||
adminUser,
|
||||
} = params;
|
||||
|
||||
const fqdn = domain ? `${hostname}.${domain}` : hostname;
|
||||
const vg = "labvg";
|
||||
const hasLonghorn = role === "worker";
|
||||
const hasRancher = role === "infra";
|
||||
|
||||
// Determine disk device -- default to biggest NVMe/SCSI/virtio
|
||||
const diskDevice = disk || "/dev/sda";
|
||||
|
||||
// Build the LVM layout to match Fedora kickstart sizes
|
||||
const extraLvs: string[] = [];
|
||||
if (hasLonghorn) {
|
||||
extraLvs.push(` - id: lv-longhorn
|
||||
name: longhorn
|
||||
type: lvm_partition
|
||||
volgroup: vg0
|
||||
size: -1
|
||||
- id: fs-longhorn
|
||||
type: format
|
||||
volume: lv-longhorn
|
||||
fstype: xfs
|
||||
- id: mount-longhorn
|
||||
type: mount
|
||||
device: fs-longhorn
|
||||
path: /var/lib/longhorn`);
|
||||
}
|
||||
if (hasRancher) {
|
||||
extraLvs.push(` - id: lv-rancher
|
||||
name: rancher
|
||||
type: lvm_partition
|
||||
volgroup: vg0
|
||||
size: 20G
|
||||
- id: fs-rancher
|
||||
type: format
|
||||
volume: lv-rancher
|
||||
fstype: xfs
|
||||
- id: mount-rancher
|
||||
type: mount
|
||||
device: fs-rancher
|
||||
path: /var/lib/rancher`);
|
||||
}
|
||||
|
||||
const extraLvsBlock = extraLvs.length > 0 ? "\n" + extraLvs.join("\n") : "";
|
||||
|
||||
// SSH keys YAML list
|
||||
const sshKeysYaml = sshKeys.map((k) => ` - "${k}"`).join("\n");
|
||||
|
||||
// late-commands for k3s prereqs, firewall, chrony, admin user, progress callback
|
||||
const lateCommands: string[] = [
|
||||
// Kernel modules for k3s
|
||||
`curtin in-target -- bash -c 'cat > /etc/modules-load.d/k3s.conf << EOF\nbr_netfilter\noverlay\nip_conntrack\nEOF'`,
|
||||
// Sysctl for k3s networking
|
||||
`curtin in-target -- bash -c 'cat > /etc/sysctl.d/90-k3s.conf << EOF\nnet.bridge.bridge-nf-call-iptables = 1\nnet.bridge.bridge-nf-call-ip6tables = 1\nnet.ipv4.ip_forward = 1\nnet.ipv6.conf.all.forwarding = 1\nfs.inotify.max_user_instances = 524288\nfs.inotify.max_user_watches = 1048576\nEOF'`,
|
||||
// Disable ufw firewall
|
||||
`curtin in-target -- systemctl disable ufw || true`,
|
||||
// Enable chrony/ntp
|
||||
`curtin in-target -- systemctl enable chrony || true`,
|
||||
// tmpfs for /tmp
|
||||
`curtin in-target -- bash -c 'echo "tmpfs /tmp tmpfs defaults,noatime,nosuid,nodev,size=4G 0 0" >> /etc/fstab'`,
|
||||
];
|
||||
|
||||
// Admin user creation + SSH keys + sudoers
|
||||
if (adminUser) {
|
||||
lateCommands.push(
|
||||
`curtin in-target -- useradd -m -G sudo -s /bin/bash ${adminUser}`,
|
||||
`curtin in-target -- usermod -L ${adminUser}`,
|
||||
`curtin in-target -- mkdir -p /home/${adminUser}/.ssh`,
|
||||
`curtin in-target -- bash -c 'cat > /home/${adminUser}/.ssh/authorized_keys << EOF\n${sshKeys.join("\n")}\nEOF'`,
|
||||
`curtin in-target -- chmod 700 /home/${adminUser}/.ssh`,
|
||||
`curtin in-target -- chmod 600 /home/${adminUser}/.ssh/authorized_keys`,
|
||||
`curtin in-target -- chown -R ${adminUser}:${adminUser} /home/${adminUser}/.ssh`,
|
||||
`curtin in-target -- bash -c 'echo "${adminUser} ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/${adminUser}'`,
|
||||
`curtin in-target -- chmod 440 /etc/sudoers.d/${adminUser}`,
|
||||
);
|
||||
}
|
||||
|
||||
// Provisioning metadata
|
||||
lateCommands.push(
|
||||
`curtin in-target -- bash -c 'cat > /etc/lab-provisioned << EOF\nhostname: ${fqdn}\nrole: ${role}\nprovisioned: $(date -Iseconds)\nbastion: ${serverIp}\nEOF'`,
|
||||
);
|
||||
|
||||
// k3s install for infra role
|
||||
if (hasRancher) {
|
||||
lateCommands.push(
|
||||
`curtin in-target -- bash -c 'curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true sh -'`,
|
||||
);
|
||||
}
|
||||
|
||||
// Progress callback (complete)
|
||||
lateCommands.push(
|
||||
`curtin in-target -- bash -c 'IP_ADDR=$(ip -4 addr show | awk "/inet / && !/127.0.0/ {split(\\$2,a,\\"/\\"); print a[1]; exit}"); curl -sf -X POST "http://${serverIp}:${httpPort}/api/progress" -H "Content-Type: application/json" -d "{\\"mac\\":\\"$(ip link show | awk "/ether/ && !/00:00:00:00/ {print \\$2; exit}")\\",\\"stage\\":\\"complete\\",\\"detail\\":\\"ready at $IP_ADDR\\"}" || true'`,
|
||||
);
|
||||
|
||||
const lateCommandsYaml = lateCommands.map((c) => ` - "${c}"`).join("\n");
|
||||
|
||||
return `#cloud-config
|
||||
autoinstall:
|
||||
version: 1
|
||||
locale: ${params.locale}
|
||||
keyboard:
|
||||
layout: gb
|
||||
timezone: ${timezone}
|
||||
identity:
|
||||
hostname: ${fqdn}
|
||||
username: ${adminUser || "root"}
|
||||
password: "!"
|
||||
ssh:
|
||||
install-server: true
|
||||
allow-pw: false
|
||||
authorized-keys:
|
||||
${sshKeysYaml}
|
||||
storage:
|
||||
config:
|
||||
- id: disk0
|
||||
type: disk
|
||||
ptable: gpt
|
||||
path: ${diskDevice}
|
||||
wipe: superblock-recursive
|
||||
grub_device: true
|
||||
- id: part-efi
|
||||
type: partition
|
||||
device: disk0
|
||||
size: 600M
|
||||
flag: boot
|
||||
grub_device: true
|
||||
- id: fs-efi
|
||||
type: format
|
||||
volume: part-efi
|
||||
fstype: fat32
|
||||
- id: mount-efi
|
||||
type: mount
|
||||
device: fs-efi
|
||||
path: /boot/efi
|
||||
- id: part-boot
|
||||
type: partition
|
||||
device: disk0
|
||||
size: 3G
|
||||
- id: fs-boot
|
||||
type: format
|
||||
volume: part-boot
|
||||
fstype: ext4
|
||||
- id: mount-boot
|
||||
type: mount
|
||||
device: fs-boot
|
||||
path: /boot
|
||||
- id: part-pv
|
||||
type: partition
|
||||
device: disk0
|
||||
size: -1
|
||||
- id: vg0
|
||||
type: lvm_volgroup
|
||||
name: ${vg}
|
||||
devices:
|
||||
- part-pv
|
||||
- id: lv-swap
|
||||
name: swap
|
||||
type: lvm_partition
|
||||
volgroup: vg0
|
||||
size: 27G
|
||||
- id: fs-swap
|
||||
type: format
|
||||
volume: lv-swap
|
||||
fstype: swap
|
||||
- id: mount-swap
|
||||
type: mount
|
||||
device: fs-swap
|
||||
path: none
|
||||
- id: lv-root
|
||||
name: root
|
||||
type: lvm_partition
|
||||
volgroup: vg0
|
||||
size: 33G
|
||||
- id: fs-root
|
||||
type: format
|
||||
volume: lv-root
|
||||
fstype: xfs
|
||||
- id: mount-root
|
||||
type: mount
|
||||
device: fs-root
|
||||
path: /
|
||||
- id: lv-var
|
||||
name: var
|
||||
type: lvm_partition
|
||||
volgroup: vg0
|
||||
size: 100G
|
||||
- id: fs-var
|
||||
type: format
|
||||
volume: lv-var
|
||||
fstype: xfs
|
||||
- id: mount-var
|
||||
type: mount
|
||||
device: fs-var
|
||||
path: /var
|
||||
- id: lv-varlog
|
||||
name: varlog
|
||||
type: lvm_partition
|
||||
volgroup: vg0
|
||||
size: 10G
|
||||
- id: fs-varlog
|
||||
type: format
|
||||
volume: lv-varlog
|
||||
fstype: xfs
|
||||
- id: mount-varlog
|
||||
type: mount
|
||||
device: fs-varlog
|
||||
path: /var/log
|
||||
- id: lv-home
|
||||
name: home
|
||||
type: lvm_partition
|
||||
volgroup: vg0
|
||||
size: 10G
|
||||
- id: fs-home
|
||||
type: format
|
||||
volume: lv-home
|
||||
fstype: xfs
|
||||
- id: mount-home
|
||||
type: mount
|
||||
device: fs-home
|
||||
path: /home
|
||||
- id: lv-srv
|
||||
name: srv
|
||||
type: lvm_partition
|
||||
volgroup: vg0
|
||||
size: 20G
|
||||
- id: fs-srv
|
||||
type: format
|
||||
volume: lv-srv
|
||||
fstype: xfs
|
||||
- id: mount-srv
|
||||
type: mount
|
||||
device: fs-srv
|
||||
path: /srv${extraLvsBlock}
|
||||
packages:
|
||||
- openssh-server
|
||||
- curl
|
||||
- wget
|
||||
- git
|
||||
- jq
|
||||
- htop
|
||||
- vim
|
||||
- tmux
|
||||
- python3
|
||||
- lshw
|
||||
- dmidecode
|
||||
- net-tools
|
||||
- iproute2
|
||||
- iputils-ping
|
||||
- traceroute
|
||||
- tcpdump
|
||||
- iotop
|
||||
- strace
|
||||
- tar
|
||||
- containerd
|
||||
- socat
|
||||
- conntrack
|
||||
- ethtool
|
||||
- iptables
|
||||
- chrony
|
||||
- efibootmgr
|
||||
late-commands:
|
||||
${lateCommandsYaml}
|
||||
`;
|
||||
}
|
||||
|
||||
export function renderUbuntuMetaData(hostname: string): string {
|
||||
return `instance-id: ${hostname}
|
||||
local-hostname: ${hostname}
|
||||
`;
|
||||
}
|
||||
24
bastion/src/bastion/src/templates/ubuntu-boot.ipxe.ts
Normal file
24
bastion/src/bastion/src/templates/ubuntu-boot.ipxe.ts
Normal file
@@ -0,0 +1,24 @@
|
||||
// iPXE boot script template for Ubuntu autoinstall.
|
||||
|
||||
export function renderUbuntuInstallIpxe(params: {
|
||||
mac: string;
|
||||
hostname: string;
|
||||
serverIp: string;
|
||||
httpPort: number;
|
||||
ubuntuVersion: string;
|
||||
}): string {
|
||||
return `#!ipxe
|
||||
|
||||
echo
|
||||
echo =============================================
|
||||
echo Lab PXE Bastion - INSTALLING Ubuntu ${params.ubuntuVersion}
|
||||
echo Target: ${params.hostname}
|
||||
echo MAC: ${params.mac}
|
||||
echo =============================================
|
||||
echo
|
||||
|
||||
kernel http://${params.serverIp}:${params.httpPort}/ubuntu-vmlinuz autoinstall ds=nocloud-net;seedfrom=http://${params.serverIp}:${params.httpPort}/autoinstall/${params.mac}/ ---
|
||||
initrd http://${params.serverIp}:${params.httpPort}/ubuntu-initrd
|
||||
boot
|
||||
`;
|
||||
}
|
||||
328
bastion/src/bastion/tests/dispatch.test.ts
Normal file
328
bastion/src/bastion/tests/dispatch.test.ts
Normal file
@@ -0,0 +1,328 @@
|
||||
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
||||
import { mkdirSync, rmSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { tmpdir } from "node:os";
|
||||
import type { BastionConfig } from "@lab/shared";
|
||||
import { createApp } from "../src/server.js";
|
||||
import type { FastifyInstance } from "fastify";
|
||||
import type { StateManager } from "../src/services/state.js";
|
||||
import type { InstallLogBuffer } from "../src/services/install-log.js";
|
||||
|
||||
function createTestConfig(testDir: string): BastionConfig {
|
||||
return {
|
||||
fedoraVersion: "43",
|
||||
arch: "x86_64",
|
||||
httpPort: 0,
|
||||
timezone: "Europe/London",
|
||||
locale: "en_GB.UTF-8",
|
||||
bastionDir: testDir,
|
||||
domain: "test.local",
|
||||
dhcpMode: "proxy",
|
||||
dhcpRangeStart: "",
|
||||
dhcpRangeEnd: "",
|
||||
ubuntuVersion: "26.04",
|
||||
ubuntuMirror: "https://releases.ubuntu.com/26.04",
|
||||
iface: "eth0",
|
||||
serverIp: "10.0.0.1",
|
||||
network: "10.0.0.0",
|
||||
gateway: "10.0.0.1",
|
||||
sshKeys: ["ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAITEST test@test"],
|
||||
adminUser: "testadmin",
|
||||
skipDnsmasq: true,
|
||||
skipArtifacts: true,
|
||||
fedoraMirror: "https://download.fedoraproject.org/pub/fedora/linux/releases/43/Everything/x86_64/os",
|
||||
tftpDir: join(testDir, "tftp"),
|
||||
httpDir: join(testDir, "http"),
|
||||
stateFile: join(testDir, "state.json"),
|
||||
};
|
||||
}
|
||||
|
||||
describe("dispatch routes", () => {
|
||||
let testDir: string;
|
||||
let app: FastifyInstance;
|
||||
let state: StateManager;
|
||||
let installLog: InstallLogBuffer;
|
||||
|
||||
beforeEach(() => {
|
||||
testDir = join(tmpdir(), `bastion-dispatch-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
|
||||
mkdirSync(testDir, { recursive: true });
|
||||
mkdirSync(join(testDir, "http"), { recursive: true });
|
||||
mkdirSync(join(testDir, "tftp"), { recursive: true });
|
||||
|
||||
const config = createTestConfig(testDir);
|
||||
const result = createApp(config);
|
||||
app = result.app;
|
||||
state = result.state;
|
||||
installLog = result.installLog;
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await app.close();
|
||||
rmSync(testDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it("unknown MAC returns discovery iPXE script", async () => {
|
||||
const response = await app.inject({
|
||||
method: "GET",
|
||||
url: "/dispatch?mac=aa:bb:cc:dd:ee:ff",
|
||||
});
|
||||
|
||||
expect(response.statusCode).toBe(200);
|
||||
expect(response.headers["content-type"]).toContain("text/plain");
|
||||
const body = response.body;
|
||||
expect(body).toContain("#!ipxe");
|
||||
expect(body).toContain("DISCOVERY MODE");
|
||||
expect(body).toContain("discover.ks");
|
||||
});
|
||||
|
||||
it("MAC in install_queue returns install iPXE script", async () => {
|
||||
const mac = "aa:bb:cc:dd:ee:ff";
|
||||
state.update((s) => {
|
||||
s.install_queue[mac] = {
|
||||
hostname: "worker-1",
|
||||
disk: "/dev/sda",
|
||||
role: "worker",
|
||||
queued_at: new Date().toISOString(),
|
||||
};
|
||||
});
|
||||
|
||||
const response = await app.inject({
|
||||
method: "GET",
|
||||
url: `/dispatch?mac=${mac}`,
|
||||
});
|
||||
|
||||
expect(response.statusCode).toBe(200);
|
||||
const body = response.body;
|
||||
expect(body).toContain("#!ipxe");
|
||||
expect(body).toContain("INSTALLING");
|
||||
expect(body).toContain("worker-1");
|
||||
expect(body).toContain(`ks?mac=${mac}`);
|
||||
});
|
||||
|
||||
it("MAC in installed returns local boot (exit) script", async () => {
|
||||
const mac = "aa:bb:cc:dd:ee:ff";
|
||||
state.update((s) => {
|
||||
s.installed[mac] = {
|
||||
hostname: "installed-node",
|
||||
role: "worker",
|
||||
ip: "10.0.0.50",
|
||||
installed_at: new Date().toISOString(),
|
||||
};
|
||||
});
|
||||
|
||||
const response = await app.inject({
|
||||
method: "GET",
|
||||
url: `/dispatch?mac=${mac}`,
|
||||
});
|
||||
|
||||
expect(response.statusCode).toBe(200);
|
||||
const body = response.body;
|
||||
expect(body).toContain("#!ipxe");
|
||||
expect(body).toContain("installed-node");
|
||||
expect(body).toContain("Already installed");
|
||||
expect(body).toContain("exit");
|
||||
});
|
||||
|
||||
it("progress endpoint updates state", async () => {
|
||||
const mac = "aa:bb:cc:dd:ee:ff";
|
||||
state.update((s) => {
|
||||
s.install_queue[mac] = {
|
||||
hostname: "worker-1",
|
||||
disk: "/dev/sda",
|
||||
role: "worker",
|
||||
queued_at: new Date().toISOString(),
|
||||
};
|
||||
});
|
||||
|
||||
const response = await app.inject({
|
||||
method: "POST",
|
||||
url: "/api/progress",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
mac,
|
||||
stage: "post-install",
|
||||
detail: "configuring system",
|
||||
}),
|
||||
});
|
||||
|
||||
expect(response.statusCode).toBe(200);
|
||||
const result = JSON.parse(response.body);
|
||||
expect(result.status).toBe("ok");
|
||||
|
||||
// Verify state was updated
|
||||
const currentState = state.load();
|
||||
expect(currentState.install_queue[mac]?.progress).toBe("post-install");
|
||||
expect(currentState.install_queue[mac]?.progress_detail).toBe("configuring system");
|
||||
});
|
||||
|
||||
it("progress endpoint with 'complete' stage moves machine to installed", async () => {
|
||||
const mac = "aa:bb:cc:dd:ee:ff";
|
||||
state.update((s) => {
|
||||
s.install_queue[mac] = {
|
||||
hostname: "worker-1",
|
||||
disk: "/dev/sda",
|
||||
role: "worker",
|
||||
queued_at: new Date().toISOString(),
|
||||
};
|
||||
});
|
||||
|
||||
const response = await app.inject({
|
||||
method: "POST",
|
||||
url: "/api/progress",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
mac,
|
||||
stage: "complete",
|
||||
detail: "ready at 10.0.0.50",
|
||||
}),
|
||||
});
|
||||
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const currentState = state.load();
|
||||
expect(currentState.install_queue[mac]).toBeUndefined();
|
||||
expect(currentState.installed[mac]).toBeDefined();
|
||||
expect(currentState.installed[mac]?.hostname).toBe("worker-1");
|
||||
expect(currentState.installed[mac]?.ip).toBe("10.0.0.50");
|
||||
});
|
||||
|
||||
it("DELETE /api/machines/:mac removes machine from state", async () => {
|
||||
const mac = "aa:bb:cc:dd:ee:ff";
|
||||
state.update((s) => {
|
||||
s.discovered[mac] = {
|
||||
mac,
|
||||
product: "TestBox",
|
||||
board: "TestBoard",
|
||||
serial: "SN123",
|
||||
manufacturer: "TestCorp",
|
||||
cpu_model: "Test CPU",
|
||||
cpu_cores: 4,
|
||||
memory_gb: 16,
|
||||
arch: "x86_64",
|
||||
disks: [],
|
||||
nics: [],
|
||||
first_seen: new Date().toISOString(),
|
||||
last_seen: new Date().toISOString(),
|
||||
};
|
||||
});
|
||||
|
||||
const response = await app.inject({
|
||||
method: "DELETE",
|
||||
url: `/api/machines/${encodeURIComponent(mac)}`,
|
||||
});
|
||||
|
||||
expect(response.statusCode).toBe(200);
|
||||
const result = JSON.parse(response.body);
|
||||
expect(result.status).toBe("forgotten");
|
||||
|
||||
const currentState = state.load();
|
||||
expect(currentState.discovered[mac]).toBeUndefined();
|
||||
});
|
||||
|
||||
it("DELETE /api/machines/:mac returns 404 for unknown machine", async () => {
|
||||
const response = await app.inject({
|
||||
method: "DELETE",
|
||||
url: "/api/machines/ff:ff:ff:ff:ff:ff",
|
||||
});
|
||||
|
||||
expect(response.statusCode).toBe(404);
|
||||
const result = JSON.parse(response.body);
|
||||
expect(result.error).toBe("machine not found");
|
||||
});
|
||||
|
||||
it("POST /api/log accepts a single line", async () => {
|
||||
const mac = "aa:bb:cc:dd:ee:ff";
|
||||
const response = await app.inject({
|
||||
method: "POST",
|
||||
url: "/api/log",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ mac, line: "hello from kickstart" }),
|
||||
});
|
||||
|
||||
expect(response.statusCode).toBe(200);
|
||||
const result = JSON.parse(response.body);
|
||||
expect(result.status).toBe("ok");
|
||||
expect(result.lines).toBe(1);
|
||||
|
||||
// Verify line is stored
|
||||
const lines = installLog.getLines(mac);
|
||||
expect(lines).toHaveLength(1);
|
||||
expect(lines[0]!.line).toBe("hello from kickstart");
|
||||
});
|
||||
|
||||
it("POST /api/log accepts multiple lines", async () => {
|
||||
const mac = "aa:bb:cc:dd:ee:ff";
|
||||
const response = await app.inject({
|
||||
method: "POST",
|
||||
url: "/api/log",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ mac, lines: ["line 1", "line 2", "line 3"] }),
|
||||
});
|
||||
|
||||
expect(response.statusCode).toBe(200);
|
||||
const result = JSON.parse(response.body);
|
||||
expect(result.lines).toBe(3);
|
||||
|
||||
const lines = installLog.getLines(mac);
|
||||
expect(lines).toHaveLength(3);
|
||||
});
|
||||
|
||||
it("GET /api/logs/:mac includes log lines for installing machine", async () => {
|
||||
const mac = "aa:bb:cc:dd:ee:ff";
|
||||
state.update((s) => {
|
||||
s.install_queue[mac] = {
|
||||
hostname: "test-node",
|
||||
disk: "/dev/sda",
|
||||
role: "worker",
|
||||
queued_at: new Date().toISOString(),
|
||||
};
|
||||
});
|
||||
|
||||
// Add some log lines
|
||||
installLog.append(mac, ["log line 1", "log line 2"], "test-node");
|
||||
|
||||
const response = await app.inject({
|
||||
method: "GET",
|
||||
url: `/api/logs/${encodeURIComponent(mac)}`,
|
||||
});
|
||||
|
||||
expect(response.statusCode).toBe(200);
|
||||
const result = JSON.parse(response.body);
|
||||
expect(result.status).toBe("installing");
|
||||
expect(result.log_lines).toHaveLength(2);
|
||||
expect(result.log_total).toBe(2);
|
||||
expect(result.log_lines[0].line).toBe("log line 1");
|
||||
});
|
||||
|
||||
it("progress endpoint with 'error' stage keeps machine in install_queue", async () => {
|
||||
const mac = "aa:bb:cc:dd:ee:ff";
|
||||
state.update((s) => {
|
||||
s.install_queue[mac] = {
|
||||
hostname: "failing-node",
|
||||
disk: "/dev/sda",
|
||||
role: "worker",
|
||||
queued_at: new Date().toISOString(),
|
||||
};
|
||||
});
|
||||
|
||||
const response = await app.inject({
|
||||
method: "POST",
|
||||
url: "/api/progress",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
mac,
|
||||
stage: "error",
|
||||
detail: "%post failed at line 42",
|
||||
}),
|
||||
});
|
||||
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
// Machine should still be in install_queue (not moved to installed)
|
||||
const currentState = state.load();
|
||||
expect(currentState.install_queue[mac]).toBeDefined();
|
||||
expect(currentState.install_queue[mac]?.progress).toBe("error");
|
||||
expect(currentState.install_queue[mac]?.progress_detail).toBe("%post failed at line 42");
|
||||
expect(currentState.installed[mac]).toBeUndefined();
|
||||
});
|
||||
});
|
||||
215
bastion/src/bastion/tests/kickstart.test.ts
Normal file
215
bastion/src/bastion/tests/kickstart.test.ts
Normal file
@@ -0,0 +1,215 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { renderInstallKickstart, type InstallKickstartParams } from "../src/templates/install.ks.js";
|
||||
|
||||
function baseParams(overrides: Partial<InstallKickstartParams> = {}): InstallKickstartParams {
|
||||
return {
|
||||
hostname: "testnode",
|
||||
disk: "",
|
||||
role: "worker",
|
||||
domain: "lab.local",
|
||||
fedoraVersion: "43",
|
||||
timezone: "Europe/London",
|
||||
locale: "en_GB.UTF-8",
|
||||
serverIp: "192.168.1.100",
|
||||
httpPort: 8080,
|
||||
syslogPort: 5514,
|
||||
sshKeys: [
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAITEST1 user1@host",
|
||||
"ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQTEST2 user2@host",
|
||||
],
|
||||
adminUser: "admin",
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe("renderInstallKickstart", () => {
|
||||
it("worker role includes longhorn partition", () => {
|
||||
const ks = renderInstallKickstart(baseParams({ role: "worker" }));
|
||||
expect(ks).toContain("longhorn");
|
||||
expect(ks).toContain("/var/lib/longhorn");
|
||||
});
|
||||
|
||||
it("infra role does NOT include longhorn partition", () => {
|
||||
const ks = renderInstallKickstart(baseParams({ role: "infra" }));
|
||||
// The fresh install longhorn line should not be present
|
||||
expect(ks).not.toContain("logvol /var/lib/longhorn --vgname=labvg --name=longhorn --fstype=xfs --grow --size=1");
|
||||
});
|
||||
|
||||
it("all SSH keys appear between SSHKEYS markers", () => {
|
||||
const keys = [
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAITEST1 user1@host",
|
||||
"ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQTEST2 user2@host",
|
||||
];
|
||||
const ks = renderInstallKickstart(baseParams({ sshKeys: keys }));
|
||||
// Both keys should appear between the SSHKEYS markers
|
||||
const sshkeysMatch = ks.match(/cat > \/root\/\.ssh\/authorized_keys << 'SSHKEYS'\n([\s\S]*?)\nSSHKEYS/);
|
||||
expect(sshkeysMatch).not.toBeNull();
|
||||
const keysBlock = sshkeysMatch![1]!;
|
||||
for (const key of keys) {
|
||||
expect(keysBlock).toContain(key);
|
||||
}
|
||||
});
|
||||
|
||||
it("admin user directive appears when adminUser is set", () => {
|
||||
const ks = renderInstallKickstart(baseParams({ adminUser: "myadmin" }));
|
||||
expect(ks).toContain("user --name=myadmin --groups=wheel --lock");
|
||||
});
|
||||
|
||||
it("no admin user directive when adminUser is empty", () => {
|
||||
const ks = renderInstallKickstart(baseParams({ adminUser: "" }));
|
||||
expect(ks).not.toContain("user --name=");
|
||||
});
|
||||
|
||||
it("FQDN is hostname.domain", () => {
|
||||
const ks = renderInstallKickstart(baseParams({
|
||||
hostname: "myhost",
|
||||
domain: "example.com",
|
||||
}));
|
||||
expect(ks).toContain("myhost.example.com");
|
||||
expect(ks).toContain("--hostname=myhost.example.com");
|
||||
});
|
||||
|
||||
it("restorecon is present", () => {
|
||||
const ks = renderInstallKickstart(baseParams());
|
||||
expect(ks).toContain("restorecon");
|
||||
});
|
||||
|
||||
it("sudoers line for admin user", () => {
|
||||
const ks = renderInstallKickstart(baseParams({ adminUser: "admin" }));
|
||||
expect(ks).toContain("admin ALL=(ALL) NOPASSWD: ALL");
|
||||
expect(ks).toContain("/etc/sudoers.d/admin");
|
||||
});
|
||||
|
||||
it("boot order restores network first (bastion controls boot)", () => {
|
||||
const ks = renderInstallKickstart(baseParams());
|
||||
expect(ks).toContain("restore network first");
|
||||
expect(ks).toContain("PXE_ENTRY");
|
||||
expect(ks).toContain("efibootmgr -o");
|
||||
});
|
||||
|
||||
it("progress callback URLs use correct serverIp and httpPort", () => {
|
||||
const ks = renderInstallKickstart(baseParams({
|
||||
serverIp: "10.0.0.5",
|
||||
httpPort: 9090,
|
||||
}));
|
||||
expect(ks).toContain("http://10.0.0.5:9090");
|
||||
expect(ks).toContain("/api/progress");
|
||||
});
|
||||
|
||||
it("infra role has /var/lib/rancher partition", () => {
|
||||
const ks = renderInstallKickstart(baseParams({ role: "infra" }));
|
||||
expect(ks).toContain("logvol /var/lib/rancher --vgname=labvg --name=rancher --fstype=xfs --size=20480");
|
||||
});
|
||||
|
||||
it("infra role has k3s install", () => {
|
||||
const ks = renderInstallKickstart(baseParams({ role: "infra" }));
|
||||
expect(ks).toContain("curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true sh -");
|
||||
});
|
||||
|
||||
it("worker role does NOT have /var/lib/rancher partition in fresh install", () => {
|
||||
const ks = renderInstallKickstart(baseParams({ role: "worker" }));
|
||||
// Worker should not have the fresh-install rancher partition line
|
||||
expect(ks).not.toContain("logvol /var/lib/rancher --vgname=labvg --name=rancher --fstype=xfs --size=20480");
|
||||
});
|
||||
|
||||
it("worker role does NOT have k3s install", () => {
|
||||
const ks = renderInstallKickstart(baseParams({ role: "worker" }));
|
||||
expect(ks).not.toContain("INSTALL_K3S_SKIP_START");
|
||||
});
|
||||
|
||||
it("reprovision preserves rancher partition", () => {
|
||||
const ks = renderInstallKickstart(baseParams({ role: "infra" }));
|
||||
expect(ks).toContain("PRESERVE_RANCHER=no");
|
||||
expect(ks).toContain('lvs $VG/rancher');
|
||||
expect(ks).toContain("PRESERVE_RANCHER=yes");
|
||||
expect(ks).toContain('logvol /var/lib/rancher --vgname=labvg --name=rancher --useexisting --noformat');
|
||||
});
|
||||
|
||||
it("partition sizes are correct", () => {
|
||||
const ks = renderInstallKickstart(baseParams());
|
||||
// root = 33792
|
||||
expect(ks).toContain("--name=root --fstype=xfs --size=33792");
|
||||
// var = 102400
|
||||
expect(ks).toContain("--name=var --fstype=xfs --size=102400");
|
||||
// varlog = 10240
|
||||
expect(ks).toContain("--name=varlog --fstype=xfs --size=10240");
|
||||
// home = 10240
|
||||
expect(ks).toContain("--name=home --fstype=xfs --size=10240");
|
||||
// srv = 20480
|
||||
expect(ks).toContain("--name=srv --fstype=xfs --size=20480");
|
||||
// swap = 27648
|
||||
expect(ks).toContain("--name=swap --fstype=swap --size=27648");
|
||||
});
|
||||
|
||||
it("vanilla role skips k3s setup", () => {
|
||||
const ks = renderInstallKickstart(baseParams({ role: "vanilla" }));
|
||||
expect(ks).toContain("vanilla role");
|
||||
expect(ks).not.toContain("modules-load.d/k3s.conf");
|
||||
expect(ks).not.toContain("firewalld");
|
||||
});
|
||||
|
||||
it("worker role has k3s setup", () => {
|
||||
const ks = renderInstallKickstart(baseParams({ role: "worker" }));
|
||||
expect(ks).toContain("modules-load.d/k3s.conf");
|
||||
expect(ks).toContain("sysctl.d/90-k3s.conf");
|
||||
expect(ks).toContain("firewalld");
|
||||
});
|
||||
|
||||
it("kickstart syntax: no merged partition lines", () => {
|
||||
for (const role of ["vanilla", "worker", "infra"] as const) {
|
||||
const ks = renderInstallKickstart(baseParams({ role }));
|
||||
const lines = ks.split("\n");
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
const l = lines[i].trim();
|
||||
if (l.startsWith("part ")) {
|
||||
const partCount = (l.match(/\bpart\b/g) || []).length;
|
||||
expect(partCount, `line ${i + 1} has ${partCount} 'part' commands (role=${role}): ${l}`).toBe(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it("kickstart syntax: each section-opening has a %end", () => {
|
||||
const ks = renderInstallKickstart(baseParams());
|
||||
// Only match section openers at start of line
|
||||
const sections = (ks.match(/^%(?:pre|post|packages)\b/gm) || []).length;
|
||||
const ends = (ks.match(/^%end$/gm) || []).length;
|
||||
expect(ends, `${sections} sections but ${ends} %end markers`).toBe(sections);
|
||||
});
|
||||
|
||||
it("has complete progress stage", () => {
|
||||
const ks = renderInstallKickstart(baseParams());
|
||||
expect(ks).toContain('"complete"');
|
||||
expect(ks).toContain("ready at");
|
||||
});
|
||||
|
||||
it("sends install logs to bastion via syslog", () => {
|
||||
const ks = renderInstallKickstart(baseParams({ syslogPort: 5514 }));
|
||||
expect(ks).toContain("logging --host=192.168.1.100 --port=5514");
|
||||
});
|
||||
|
||||
it("passes ksvalidator syntax check", () => {
|
||||
for (const role of ["vanilla", "worker", "infra"] as const) {
|
||||
const ks = renderInstallKickstart(baseParams({ role }));
|
||||
const { execSync } = require("node:child_process");
|
||||
const { writeFileSync, unlinkSync } = require("node:fs");
|
||||
const tmp = `/tmp/ks-test-${role}.ks`;
|
||||
writeFileSync(tmp, ks);
|
||||
try {
|
||||
execSync(`ksvalidator -v F43 ${tmp}`, { encoding: "utf-8" });
|
||||
} catch (err: unknown) {
|
||||
const msg = err instanceof Error ? (err as { stderr?: string }).stderr ?? err.message : String(err);
|
||||
throw new Error(`ksvalidator failed for role=${role}: ${msg}`);
|
||||
} finally {
|
||||
try { unlinkSync(tmp); } catch {}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it("forwards system logs to serial console", () => {
|
||||
const ks = renderInstallKickstart(baseParams({ role: "vanilla" }));
|
||||
expect(ks).toContain("serial-console.conf");
|
||||
expect(ks).toContain("/dev/ttyS0");
|
||||
expect(ks).toContain("rsyslog");
|
||||
});
|
||||
});
|
||||
140
bastion/src/bastion/tests/state.test.ts
Normal file
140
bastion/src/bastion/tests/state.test.ts
Normal file
@@ -0,0 +1,140 @@
|
||||
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
||||
import { mkdirSync, rmSync, existsSync, readFileSync, writeFileSync, chmodSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { tmpdir } from "node:os";
|
||||
import { StateManager } from "../src/services/state.js";
|
||||
|
||||
describe("StateManager", () => {
|
||||
let testDir: string;
|
||||
let stateFile: string;
|
||||
let state: StateManager;
|
||||
|
||||
beforeEach(() => {
|
||||
testDir = join(tmpdir(), `bastion-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
|
||||
mkdirSync(testDir, { recursive: true });
|
||||
stateFile = join(testDir, "state.json");
|
||||
state = new StateManager(stateFile);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
rmSync(testDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it("creates empty state on first load", () => {
|
||||
const loaded = state.load();
|
||||
expect(loaded).toEqual({
|
||||
discovered: {},
|
||||
install_queue: {},
|
||||
installed: {},
|
||||
});
|
||||
});
|
||||
|
||||
it("init creates the state file", () => {
|
||||
expect(existsSync(stateFile)).toBe(false);
|
||||
state.init();
|
||||
expect(existsSync(stateFile)).toBe(true);
|
||||
|
||||
const content = JSON.parse(readFileSync(stateFile, "utf-8"));
|
||||
expect(content).toEqual({
|
||||
discovered: {},
|
||||
install_queue: {},
|
||||
installed: {},
|
||||
});
|
||||
});
|
||||
|
||||
it("saves and loads state correctly", () => {
|
||||
state.init();
|
||||
|
||||
state.update((s) => {
|
||||
s.discovered["aa:bb:cc:dd:ee:ff"] = {
|
||||
mac: "aa:bb:cc:dd:ee:ff",
|
||||
product: "TestBox",
|
||||
board: "TestBoard",
|
||||
serial: "SN123",
|
||||
manufacturer: "TestCorp",
|
||||
cpu_model: "Test CPU",
|
||||
cpu_cores: 8,
|
||||
memory_gb: 32,
|
||||
arch: "x86_64",
|
||||
disks: [{ name: "sda", size_gb: 500, model: "TestDisk" }],
|
||||
nics: [{ name: "eth0", mac: "aa:bb:cc:dd:ee:ff", state: "UP" }],
|
||||
first_seen: "2025-01-01T00:00:00Z",
|
||||
last_seen: "2025-01-01T00:00:00Z",
|
||||
};
|
||||
|
||||
s.install_queue["11:22:33:44:55:66"] = {
|
||||
hostname: "worker-1",
|
||||
disk: "/dev/sda",
|
||||
role: "worker",
|
||||
queued_at: "2025-01-01T01:00:00Z",
|
||||
};
|
||||
});
|
||||
|
||||
// Load in a fresh StateManager to verify persistence
|
||||
const state2 = new StateManager(stateFile);
|
||||
const loaded = state2.load();
|
||||
|
||||
expect(loaded.discovered["aa:bb:cc:dd:ee:ff"]?.product).toBe("TestBox");
|
||||
expect(loaded.discovered["aa:bb:cc:dd:ee:ff"]?.cpu_cores).toBe(8);
|
||||
expect(loaded.install_queue["11:22:33:44:55:66"]?.hostname).toBe("worker-1");
|
||||
expect(loaded.installed).toEqual({});
|
||||
});
|
||||
|
||||
it("uses atomic writes (tmp file + rename)", () => {
|
||||
state.init();
|
||||
|
||||
// After save, there should be no .tmp file left behind
|
||||
state.update((s) => {
|
||||
s.installed["aa:bb:cc:dd:ee:ff"] = {
|
||||
hostname: "node1",
|
||||
role: "worker",
|
||||
ip: "10.0.0.1",
|
||||
installed_at: "2025-01-01T00:00:00Z",
|
||||
};
|
||||
});
|
||||
|
||||
const tmpFile = `${stateFile}.tmp`;
|
||||
expect(existsSync(tmpFile)).toBe(false);
|
||||
expect(existsSync(stateFile)).toBe(true);
|
||||
|
||||
// Verify data was written correctly
|
||||
const raw = readFileSync(stateFile, "utf-8");
|
||||
const parsed = JSON.parse(raw);
|
||||
expect(parsed.installed["aa:bb:cc:dd:ee:ff"].hostname).toBe("node1");
|
||||
});
|
||||
});
|
||||
|
||||
describe("PID file handling", () => {
|
||||
let testDir: string;
|
||||
|
||||
beforeEach(() => {
|
||||
testDir = join(tmpdir(), `bastion-pid-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
|
||||
mkdirSync(testDir, { recursive: true });
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
rmSync(testDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it("handles stale PID file from previous run", () => {
|
||||
const pidFile = join(testDir, "bastion.pid");
|
||||
// Simulate a stale PID file with a dead process
|
||||
writeFileSync(pidFile, "999999999");
|
||||
// Should be readable
|
||||
const pid = parseInt(readFileSync(pidFile, "utf-8").trim(), 10);
|
||||
expect(pid).toBe(999999999);
|
||||
});
|
||||
|
||||
it("handles corrupted PID file gracefully", () => {
|
||||
const pidFile = join(testDir, "bastion.pid");
|
||||
writeFileSync(pidFile, "not-a-number\n");
|
||||
const pid = parseInt(readFileSync(pidFile, "utf-8").trim(), 10);
|
||||
expect(isNaN(pid)).toBe(true);
|
||||
});
|
||||
|
||||
it("handles missing bastion directory", () => {
|
||||
const missingDir = join(testDir, "nonexistent", "deep");
|
||||
mkdirSync(missingDir, { recursive: true });
|
||||
expect(existsSync(missingDir)).toBe(true);
|
||||
});
|
||||
});
|
||||
13
bastion/src/bastion/tsconfig.json
Normal file
13
bastion/src/bastion/tsconfig.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"extends": "../../tsconfig.base.json",
|
||||
"compilerOptions": {
|
||||
"rootDir": "src",
|
||||
"outDir": "dist",
|
||||
"types": ["node"]
|
||||
},
|
||||
"include": ["src/**/*.ts"],
|
||||
"references": [
|
||||
{ "path": "../shared" },
|
||||
{ "path": "../modules" }
|
||||
]
|
||||
}
|
||||
8
bastion/src/bastion/vitest.config.ts
Normal file
8
bastion/src/bastion/vitest.config.ts
Normal file
@@ -0,0 +1,8 @@
|
||||
import { defineProject } from 'vitest/config';
|
||||
|
||||
export default defineProject({
|
||||
test: {
|
||||
name: 'bastion',
|
||||
include: ['tests/**/*.test.ts'],
|
||||
},
|
||||
});
|
||||
29
bastion/src/cli/package.json
Normal file
29
bastion/src/cli/package.json
Normal file
@@ -0,0 +1,29 @@
|
||||
{
|
||||
"name": "@lab/cli",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"bin": {
|
||||
"labctl": "./dist/index.js"
|
||||
},
|
||||
"main": "./dist/index.js",
|
||||
"types": "./dist/index.d.ts",
|
||||
"scripts": {
|
||||
"build": "tsc --build",
|
||||
"clean": "rimraf dist",
|
||||
"dev": "tsx src/index.ts",
|
||||
"test": "vitest",
|
||||
"test:run": "vitest run"
|
||||
},
|
||||
"dependencies": {
|
||||
"@lab/bastion": "workspace:*",
|
||||
"@lab/modules": "workspace:*",
|
||||
"@lab/shared": "workspace:*",
|
||||
"commander": "^13.0.0",
|
||||
"ws": "^8.19.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^22.10.0",
|
||||
"@types/ws": "^8.18.1"
|
||||
}
|
||||
}
|
||||
161
bastion/src/cli/src/api/client.ts
Normal file
161
bastion/src/cli/src/api/client.ts
Normal file
@@ -0,0 +1,161 @@
|
||||
// Typed API client for communicating with labd.
|
||||
|
||||
import https from "node:https";
|
||||
import { readFileSync } from "node:fs";
|
||||
import { LabdApiError } from "./errors.js";
|
||||
import type {
|
||||
Server,
|
||||
ServerFilters,
|
||||
JoinToken,
|
||||
CreateTokenOpts,
|
||||
EnrollmentRequest,
|
||||
EnrollmentResponse,
|
||||
HealthStatus,
|
||||
RequestOpts,
|
||||
} from "./types.js";
|
||||
|
||||
export interface LabdClientConfig {
|
||||
baseUrl: string;
|
||||
certPath?: string;
|
||||
keyPath?: string;
|
||||
caPath?: string;
|
||||
timeoutMs?: number;
|
||||
}
|
||||
|
||||
export class LabdClient {
|
||||
private config: LabdClientConfig;
|
||||
private agent: https.Agent | undefined;
|
||||
private sessionId: string | undefined;
|
||||
|
||||
constructor(config: LabdClientConfig) {
|
||||
this.config = config;
|
||||
if (config.certPath && config.keyPath) {
|
||||
this.agent = new https.Agent({
|
||||
cert: readFileSync(config.certPath),
|
||||
key: readFileSync(config.keyPath),
|
||||
ca: config.caPath ? readFileSync(config.caPath) : undefined,
|
||||
rejectUnauthorized: true,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
setSessionId(id: string): void {
|
||||
this.sessionId = id;
|
||||
}
|
||||
|
||||
// --- Server endpoints ---
|
||||
|
||||
async getServers(filters?: ServerFilters): Promise<Server[]> {
|
||||
return this.request("GET", "/api/servers", { query: filters as Record<string, string | undefined> });
|
||||
}
|
||||
|
||||
async getServer(id: string): Promise<Server> {
|
||||
return this.request("GET", `/api/servers/${encodeURIComponent(id)}`);
|
||||
}
|
||||
|
||||
// --- Token endpoints ---
|
||||
|
||||
async createJoinToken(opts: CreateTokenOpts): Promise<JoinToken> {
|
||||
return this.request("POST", "/api/tokens", { body: opts });
|
||||
}
|
||||
|
||||
async listTokens(): Promise<JoinToken[]> {
|
||||
return this.request("GET", "/api/tokens");
|
||||
}
|
||||
|
||||
async revokeToken(id: string): Promise<{ status: string; id: string }> {
|
||||
return this.request("DELETE", `/api/tokens/${encodeURIComponent(id)}`);
|
||||
}
|
||||
|
||||
// --- Auth endpoints ---
|
||||
|
||||
async enroll(req: EnrollmentRequest): Promise<EnrollmentResponse> {
|
||||
return this.request("POST", "/api/auth/enroll", { body: req });
|
||||
}
|
||||
|
||||
// --- Bastion endpoints ---
|
||||
|
||||
async getBastions(): Promise<Array<{
|
||||
id: string; hostname: string; network: string; serverIp: string;
|
||||
status: string; machineCount: number; lastHeartbeat?: string; connectedAt?: string;
|
||||
}>> {
|
||||
return this.request("GET", "/api/bastions");
|
||||
}
|
||||
|
||||
// --- Machine endpoints (aggregated through labd from bastions) ---
|
||||
|
||||
async getMachines(): Promise<import("@lab/shared").BastionState> {
|
||||
return this.request("GET", "/api/machines");
|
||||
}
|
||||
|
||||
async installMachine(opts: {
|
||||
mac: string; hostname: string; disk?: string; role?: string; os?: string;
|
||||
}): Promise<{ status: string; data?: unknown; error?: string }> {
|
||||
return this.request("POST", "/api/machines/install", { body: opts });
|
||||
}
|
||||
|
||||
async forgetMachine(mac: string): Promise<{ status: string }> {
|
||||
return this.request("DELETE", `/api/machines/${encodeURIComponent(mac)}`);
|
||||
}
|
||||
|
||||
async updateRole(mac: string, role: string): Promise<{ status: string }> {
|
||||
return this.request("POST", "/api/machines/role", { body: { mac, role } });
|
||||
}
|
||||
|
||||
async getMachineLogs(mac: string): Promise<Record<string, unknown>> {
|
||||
return this.request("GET", `/api/machines/${encodeURIComponent(mac)}/logs`);
|
||||
}
|
||||
|
||||
// --- Health endpoints ---
|
||||
|
||||
async getHealth(): Promise<HealthStatus> {
|
||||
return this.request("GET", "/healthz");
|
||||
}
|
||||
|
||||
// --- Internal ---
|
||||
|
||||
private async request<T>(method: string, path: string, opts?: RequestOpts): Promise<T> {
|
||||
const url = new URL(path, this.config.baseUrl);
|
||||
if (opts?.query) {
|
||||
for (const [k, v] of Object.entries(opts.query)) {
|
||||
if (v !== undefined) url.searchParams.set(k, String(v));
|
||||
}
|
||||
}
|
||||
|
||||
const headers: Record<string, string> = {
|
||||
"Content-Type": "application/json",
|
||||
};
|
||||
if (this.sessionId) {
|
||||
headers["X-Session-ID"] = this.sessionId;
|
||||
}
|
||||
|
||||
const timeoutMs = this.config.timeoutMs ?? 30_000;
|
||||
|
||||
try {
|
||||
const resp = await fetch(url.toString(), {
|
||||
method,
|
||||
headers,
|
||||
body: opts?.body ? JSON.stringify(opts.body) : undefined,
|
||||
signal: AbortSignal.timeout(timeoutMs),
|
||||
// @ts-expect-error -- Node fetch supports dispatcher/agent
|
||||
agent: this.agent,
|
||||
});
|
||||
|
||||
if (!resp.ok) {
|
||||
const body = await resp.json().catch(() => ({ error: resp.statusText }));
|
||||
throw LabdApiError.fromResponse(resp.status, body);
|
||||
}
|
||||
|
||||
return (await resp.json()) as T;
|
||||
} catch (err) {
|
||||
if (err instanceof LabdApiError) throw err;
|
||||
if (err instanceof TypeError && (err.message.includes("fetch") || err.message.includes("ECONNREFUSED"))) {
|
||||
throw LabdApiError.notConnected(this.config.baseUrl);
|
||||
}
|
||||
if (err instanceof DOMException && err.name === "TimeoutError") {
|
||||
throw LabdApiError.timeout(timeoutMs);
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
}
|
||||
47
bastion/src/cli/src/api/config.ts
Normal file
47
bastion/src/cli/src/api/config.ts
Normal file
@@ -0,0 +1,47 @@
|
||||
// CLI configuration loading for labd client.
|
||||
// Bridges the CLI config module into LabdClient configuration.
|
||||
|
||||
import { loadConfig, CONFIG_DIR, CONFIG_FILE, CERT_DIR } from "../config/index.js";
|
||||
import { LabdClient, type LabdClientConfig } from "./client.js";
|
||||
|
||||
export { CONFIG_DIR, CONFIG_FILE, CERT_DIR };
|
||||
|
||||
export function loadClientConfig(
|
||||
overrides?: Partial<LabdClientConfig>,
|
||||
): LabdClientConfig {
|
||||
const cliConfig = loadConfig();
|
||||
|
||||
let config: LabdClientConfig = {
|
||||
baseUrl: cliConfig.labdUrl,
|
||||
...(cliConfig.certPath ? { certPath: cliConfig.certPath } : {}),
|
||||
...(cliConfig.keyPath ? { keyPath: cliConfig.keyPath } : {}),
|
||||
...(cliConfig.caPath ? { caPath: cliConfig.caPath } : {}),
|
||||
};
|
||||
|
||||
// Environment variable overrides (cert paths)
|
||||
if (process.env["LABCTL_CERT_PATH"]) config.certPath = process.env["LABCTL_CERT_PATH"];
|
||||
if (process.env["LABCTL_KEY_PATH"]) config.keyPath = process.env["LABCTL_KEY_PATH"];
|
||||
if (process.env["LABCTL_CA_PATH"]) config.caPath = process.env["LABCTL_CA_PATH"];
|
||||
|
||||
if (overrides) {
|
||||
config = { ...config, ...overrides };
|
||||
}
|
||||
|
||||
return config;
|
||||
}
|
||||
|
||||
export function createLabdClient(
|
||||
overrides?: Partial<LabdClientConfig>,
|
||||
): LabdClient {
|
||||
const config = loadClientConfig(overrides);
|
||||
return new LabdClient(config);
|
||||
}
|
||||
|
||||
let _singleton: LabdClient | undefined;
|
||||
|
||||
export function getLabdClient(): LabdClient {
|
||||
if (!_singleton) {
|
||||
_singleton = createLabdClient();
|
||||
}
|
||||
return _singleton;
|
||||
}
|
||||
59
bastion/src/cli/src/api/errors.ts
Normal file
59
bastion/src/cli/src/api/errors.ts
Normal file
@@ -0,0 +1,59 @@
|
||||
// Structured API error class for labd communication.
|
||||
|
||||
export class LabdApiError extends Error {
|
||||
readonly statusCode: number;
|
||||
readonly errorCode: string;
|
||||
readonly detail: string | undefined;
|
||||
|
||||
constructor(statusCode: number, message: string, detail?: string) {
|
||||
super(message);
|
||||
this.name = "LabdApiError";
|
||||
this.statusCode = statusCode;
|
||||
this.errorCode = statusCodeToErrorCode(statusCode);
|
||||
this.detail = detail;
|
||||
}
|
||||
|
||||
static fromResponse(statusCode: number, body: unknown): LabdApiError {
|
||||
if (typeof body === "object" && body !== null) {
|
||||
const b = body as Record<string, unknown>;
|
||||
const message = typeof b["error"] === "string" ? b["error"] : `HTTP ${statusCode}`;
|
||||
const detail = typeof b["detail"] === "string" ? b["detail"] : undefined;
|
||||
return new LabdApiError(statusCode, message, detail);
|
||||
}
|
||||
return new LabdApiError(statusCode, `HTTP ${statusCode}`);
|
||||
}
|
||||
|
||||
static notConnected(url: string): LabdApiError {
|
||||
return new LabdApiError(
|
||||
0,
|
||||
`Cannot connect to labd at ${url}`,
|
||||
"Check that labd is running and the URL is correct.",
|
||||
);
|
||||
}
|
||||
|
||||
static timeout(timeoutMs: number): LabdApiError {
|
||||
return new LabdApiError(
|
||||
0,
|
||||
`Request timed out after ${timeoutMs}ms`,
|
||||
"The server may be overloaded. Try again later.",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
export function isLabdApiError(err: unknown): err is LabdApiError {
|
||||
return err instanceof LabdApiError;
|
||||
}
|
||||
|
||||
function statusCodeToErrorCode(code: number): string {
|
||||
switch (code) {
|
||||
case 400: return "BAD_REQUEST";
|
||||
case 401: return "UNAUTHORIZED";
|
||||
case 403: return "FORBIDDEN";
|
||||
case 404: return "NOT_FOUND";
|
||||
case 409: return "CONFLICT";
|
||||
case 429: return "RATE_LIMITED";
|
||||
case 500: return "INTERNAL_ERROR";
|
||||
case 503: return "UNAVAILABLE";
|
||||
default: return code === 0 ? "CONNECTION_ERROR" : "UNKNOWN";
|
||||
}
|
||||
}
|
||||
18
bastion/src/cli/src/api/index.ts
Normal file
18
bastion/src/cli/src/api/index.ts
Normal file
@@ -0,0 +1,18 @@
|
||||
// Public API for labd client.
|
||||
|
||||
export { LabdClient, type LabdClientConfig } from "./client.js";
|
||||
export { LabdApiError, isLabdApiError } from "./errors.js";
|
||||
export { loadClientConfig, createLabdClient, getLabdClient, CONFIG_DIR, CONFIG_FILE, CERT_DIR } from "./config.js";
|
||||
export type {
|
||||
Server,
|
||||
ServerFilters,
|
||||
Agent,
|
||||
JoinToken,
|
||||
CreateTokenOpts,
|
||||
EnrollmentRequest,
|
||||
EnrollmentResponse,
|
||||
HealthStatus,
|
||||
ApiErrorBody,
|
||||
RequestOpts,
|
||||
} from "./types.js";
|
||||
export { createLabdWebSocket, streamExec, streamLogs, type StreamOptions } from "./websocket.js";
|
||||
96
bastion/src/cli/src/api/types.ts
Normal file
96
bastion/src/cli/src/api/types.ts
Normal file
@@ -0,0 +1,96 @@
|
||||
// Typed interfaces for labd API requests and responses.
|
||||
// Matches Prisma schema models and labd route contracts.
|
||||
|
||||
// --- Server ---
|
||||
|
||||
export interface Server {
|
||||
id: string;
|
||||
hostname: string;
|
||||
mac: string | null;
|
||||
cloud: string;
|
||||
environment: string;
|
||||
role: string;
|
||||
labels: Record<string, string>;
|
||||
ip: string | null;
|
||||
agentVersion: string | null;
|
||||
status: string;
|
||||
lastHeartbeat: string | null;
|
||||
createdAt: string;
|
||||
updatedAt: string;
|
||||
agent?: Agent | null;
|
||||
}
|
||||
|
||||
export interface Agent {
|
||||
id: string;
|
||||
serverId: string;
|
||||
certificatePem: string | null;
|
||||
enrolledAt: string;
|
||||
lastSeen: string | null;
|
||||
}
|
||||
|
||||
export interface ServerFilters {
|
||||
cloud?: string;
|
||||
environment?: string;
|
||||
status?: string;
|
||||
}
|
||||
|
||||
// --- Join Tokens ---
|
||||
|
||||
export interface JoinToken {
|
||||
id: string;
|
||||
token?: string; // Only present on creation
|
||||
type: string;
|
||||
label: string | null;
|
||||
usedBy: string | null;
|
||||
usedAt: string | null;
|
||||
revokedAt: string | null;
|
||||
createdAt: string;
|
||||
expiresAt: string | null;
|
||||
}
|
||||
|
||||
export interface CreateTokenOpts {
|
||||
type?: "one-time" | "reusable";
|
||||
label?: string;
|
||||
expiresInHours?: number;
|
||||
}
|
||||
|
||||
// --- Auth / Enrollment ---
|
||||
|
||||
export interface EnrollmentRequest {
|
||||
token: string;
|
||||
hostname: string;
|
||||
csr?: string;
|
||||
}
|
||||
|
||||
export interface EnrollmentResponse {
|
||||
status: string;
|
||||
hostname: string;
|
||||
message: string;
|
||||
certificatePem: string | null;
|
||||
}
|
||||
|
||||
// --- Health ---
|
||||
|
||||
export interface HealthStatus {
|
||||
status: "healthy" | "degraded";
|
||||
uptime: number;
|
||||
timestamp: string;
|
||||
checks: {
|
||||
database: "ok" | "error";
|
||||
};
|
||||
}
|
||||
|
||||
// --- API Error ---
|
||||
|
||||
export interface ApiErrorBody {
|
||||
error: string;
|
||||
detail?: string;
|
||||
code?: string;
|
||||
}
|
||||
|
||||
// --- Request helpers ---
|
||||
|
||||
export interface RequestOpts {
|
||||
query?: Record<string, string | number | boolean | undefined>;
|
||||
body?: unknown;
|
||||
}
|
||||
160
bastion/src/cli/src/api/websocket.ts
Normal file
160
bastion/src/cli/src/api/websocket.ts
Normal file
@@ -0,0 +1,160 @@
|
||||
// WebSocket client for real-time streaming operations (exec, logs).
|
||||
|
||||
import { WebSocket } from "ws";
|
||||
import { loadConfig } from "../config/index.js";
|
||||
import { readFileSync } from "node:fs";
|
||||
import { LabdApiError } from "./errors.js";
|
||||
|
||||
export interface StreamOptions {
|
||||
onData: (data: string) => void;
|
||||
onError: (error: Error) => void;
|
||||
onClose: () => void;
|
||||
}
|
||||
|
||||
export async function createLabdWebSocket(path: string): Promise<WebSocket> {
|
||||
const config = loadConfig();
|
||||
const baseUrl = config.labdUrl.replace("https:", "wss:").replace("http:", "ws:");
|
||||
const url = new URL(path, baseUrl);
|
||||
|
||||
const wsOptions: WebSocket.ClientOptions = {};
|
||||
if (config.certPath && config.keyPath) {
|
||||
wsOptions.cert = readFileSync(config.certPath);
|
||||
wsOptions.key = readFileSync(config.keyPath);
|
||||
if (config.caPath) wsOptions.ca = readFileSync(config.caPath);
|
||||
}
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
const timeout = setTimeout(() => {
|
||||
ws.terminate();
|
||||
reject(LabdApiError.timeout(10_000));
|
||||
}, 10_000);
|
||||
|
||||
const ws = new WebSocket(url.toString(), wsOptions);
|
||||
|
||||
ws.on("open", () => {
|
||||
clearTimeout(timeout);
|
||||
resolve(ws);
|
||||
});
|
||||
|
||||
ws.on("error", (err: Error) => {
|
||||
clearTimeout(timeout);
|
||||
reject(
|
||||
LabdApiError.notConnected(config.labdUrl + " — " + err.message),
|
||||
);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
export async function streamExec(
|
||||
serverName: string,
|
||||
command: string[],
|
||||
options: StreamOptions & { tty?: boolean; timeout?: number },
|
||||
): Promise<number> {
|
||||
const ws = await createLabdWebSocket("/ws/exec");
|
||||
const requestId = crypto.randomUUID();
|
||||
|
||||
return new Promise<number>((resolve, reject) => {
|
||||
ws.on("message", (raw: Buffer) => {
|
||||
try {
|
||||
const msg = JSON.parse(raw.toString()) as {
|
||||
type: string;
|
||||
data?: string;
|
||||
exitCode?: number;
|
||||
message?: string;
|
||||
};
|
||||
switch (msg.type) {
|
||||
case "exec-stdout":
|
||||
case "exec-stderr":
|
||||
if (msg.data) options.onData(msg.data);
|
||||
break;
|
||||
case "exec-exit":
|
||||
ws.close();
|
||||
resolve(msg.exitCode ?? 1);
|
||||
break;
|
||||
case "error":
|
||||
ws.close();
|
||||
reject(new Error(msg.message ?? "Remote execution error"));
|
||||
break;
|
||||
}
|
||||
} catch (err) {
|
||||
options.onError(err instanceof Error ? err : new Error(String(err)));
|
||||
}
|
||||
});
|
||||
|
||||
ws.on("close", () => {
|
||||
options.onClose();
|
||||
});
|
||||
|
||||
ws.on("error", (err: Error) => {
|
||||
options.onError(err);
|
||||
});
|
||||
|
||||
ws.send(
|
||||
JSON.stringify({
|
||||
type: "exec",
|
||||
requestId,
|
||||
server: serverName,
|
||||
command,
|
||||
tty: options.tty ?? false,
|
||||
timeout: options.timeout ?? 30_000,
|
||||
}),
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
export async function streamLogs(
|
||||
serverName: string,
|
||||
logOptions: {
|
||||
follow?: boolean;
|
||||
lines?: number;
|
||||
unit?: string;
|
||||
since?: string;
|
||||
priority?: string;
|
||||
kernel?: boolean;
|
||||
},
|
||||
options: StreamOptions,
|
||||
): Promise<void> {
|
||||
const ws = await createLabdWebSocket("/ws/logs");
|
||||
const requestId = crypto.randomUUID();
|
||||
|
||||
ws.on("message", (raw: Buffer) => {
|
||||
try {
|
||||
const msg = JSON.parse(raw.toString()) as {
|
||||
type: string;
|
||||
line?: string;
|
||||
message?: string;
|
||||
};
|
||||
switch (msg.type) {
|
||||
case "log-line":
|
||||
if (msg.line) options.onData(msg.line);
|
||||
break;
|
||||
case "log-end":
|
||||
ws.close();
|
||||
break;
|
||||
case "error":
|
||||
ws.close();
|
||||
options.onError(new Error(msg.message ?? "Log streaming error"));
|
||||
break;
|
||||
}
|
||||
} catch (err) {
|
||||
options.onError(err instanceof Error ? err : new Error(String(err)));
|
||||
}
|
||||
});
|
||||
|
||||
ws.on("close", () => {
|
||||
options.onClose();
|
||||
});
|
||||
|
||||
ws.on("error", (err) => {
|
||||
options.onError(err);
|
||||
});
|
||||
|
||||
ws.send(
|
||||
JSON.stringify({
|
||||
type: "log-subscribe",
|
||||
requestId,
|
||||
server: serverName,
|
||||
options: logOptions,
|
||||
}),
|
||||
);
|
||||
}
|
||||
403
bastion/src/cli/src/commands/app.ts
Normal file
403
bastion/src/cli/src/commands/app.ts
Normal file
@@ -0,0 +1,403 @@
|
||||
// CLI command: labctl app k3s install/health <target>
|
||||
// Install or check k3s on a target machine via SSH.
|
||||
|
||||
import { existsSync } from "node:fs";
|
||||
import { homedir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import type { Command } from "commander";
|
||||
import type { BastionState } from "@lab/shared";
|
||||
import { K3sModule, sshExec } from "@lab/modules";
|
||||
import { getLabdClient } from "../api/config.js";
|
||||
|
||||
function resolveTarget(
|
||||
target: string,
|
||||
state: BastionState | null,
|
||||
): { ip: string; hostname: string; role: string } | null {
|
||||
// Direct IP
|
||||
if (/^\d+\.\d+\.\d+\.\d+$/.test(target)) {
|
||||
return { ip: target, hostname: target, role: "infra" };
|
||||
}
|
||||
|
||||
if (!state) return null;
|
||||
|
||||
// Check by MAC
|
||||
const mac = target.toLowerCase().replace(/-/g, ":");
|
||||
const installed = state.installed[mac];
|
||||
if (installed?.ip) {
|
||||
return { ip: installed.ip, hostname: installed.hostname, role: installed.role };
|
||||
}
|
||||
|
||||
// Check by hostname
|
||||
for (const [, info] of Object.entries(state.installed)) {
|
||||
if (info.hostname === target || info.hostname.startsWith(target + ".")) {
|
||||
return { ip: info.ip, hostname: info.hostname, role: info.role };
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function findSshKey(): string | undefined {
|
||||
const sudoUser = process.env["SUDO_USER"];
|
||||
const realHome = sudoUser ? join("/home", sudoUser) : homedir();
|
||||
for (const name of ["id_ed25519", "id_ecdsa", "id_rsa"]) {
|
||||
const keyPath = join(realHome, ".ssh", name);
|
||||
if (existsSync(keyPath)) return keyPath;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
async function fetchState(): Promise<BastionState | null> {
|
||||
try {
|
||||
return await getLabdClient().getMachines();
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
import { registerLabcontrollerCommands } from "./labcontroller.js";
|
||||
|
||||
export function registerAppCommand(program: Command): void {
|
||||
const appCmd = program.command("app").description("Application management");
|
||||
|
||||
// labcontroller subcommands
|
||||
registerLabcontrollerCommands(appCmd);
|
||||
|
||||
const k3sCmd = appCmd.command("k3s").description("k3s cluster management");
|
||||
|
||||
k3sCmd
|
||||
.command("install <target>")
|
||||
.description("Install k3s on a target machine (hostname, IP, or MAC)")
|
||||
.option("--role <role>", "k3s role: infra (server) or worker (agent)", "infra")
|
||||
.option("--user <user>", "SSH user", "michal")
|
||||
.option("--k3s-server <url>", "k3s server URL (required for worker role)")
|
||||
.option("--k3s-token <token>", "k3s join token (required for worker role)")
|
||||
.action(async (target: string, opts: {
|
||||
role: string;
|
||||
user: string;
|
||||
k3sServer?: string;
|
||||
k3sToken?: string;
|
||||
}) => {
|
||||
const state = await fetchState();
|
||||
const resolved = resolveTarget(target, state);
|
||||
|
||||
if (!resolved) {
|
||||
console.error(`Cannot resolve target: ${target}`);
|
||||
console.error("Provide an IP address, hostname, or MAC of an installed machine.");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const role = opts.role === "worker" ? "worker" : "infra";
|
||||
const sshKey = findSshKey();
|
||||
|
||||
console.log(`Installing k3s on ${resolved.hostname} (${resolved.ip}) as ${role}...`);
|
||||
console.log("");
|
||||
|
||||
const k3s = new K3sModule();
|
||||
const moduleCtx = {
|
||||
hostname: resolved.hostname,
|
||||
ip: resolved.ip,
|
||||
role,
|
||||
os: "fedora-43" as const,
|
||||
arch: "x86_64" as const,
|
||||
sshUser: opts.user,
|
||||
...(sshKey ? { sshKeyPath: sshKey } : {}),
|
||||
config: {
|
||||
...(opts.k3sServer ? { k3sServerUrl: opts.k3sServer } : {}),
|
||||
...(opts.k3sToken ? { k3sToken: opts.k3sToken } : {}),
|
||||
},
|
||||
};
|
||||
|
||||
const installResult = await k3s.install(moduleCtx);
|
||||
for (const line of installResult.output) {
|
||||
console.log(` ${line}`);
|
||||
}
|
||||
if (!installResult.success) {
|
||||
console.error(`\nk3s install failed: ${installResult.errors.join(", ")}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log("\nRunning post-install configuration...\n");
|
||||
const configResult = await k3s.configure(moduleCtx);
|
||||
for (const line of configResult.output) {
|
||||
console.log(` ${line}`);
|
||||
}
|
||||
if (!configResult.success) {
|
||||
console.error(`\nk3s configure failed: ${configResult.errors.join(", ")}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log("\nk3s installed successfully.");
|
||||
|
||||
// Check if the machine's role requires additional app deployments
|
||||
try {
|
||||
const { ROLE_REGISTRY } = await import("@lab/shared");
|
||||
const freshState = await fetchState();
|
||||
if (freshState) {
|
||||
for (const [, info] of Object.entries(freshState.installed)) {
|
||||
if (info.ip === resolved.ip || info.hostname === resolved.hostname) {
|
||||
const roleInfo = ROLE_REGISTRY.find((r: { name: string }) => r.name === info.role);
|
||||
if (roleInfo && roleInfo.apps.length > 0) {
|
||||
console.log(`\nRole ${info.role} requires: ${roleInfo.apps.join(", ")}`);
|
||||
console.log(`Deploying automatically...`);
|
||||
const { execFileSync } = await import("node:child_process");
|
||||
try {
|
||||
execFileSync("node", [
|
||||
process.argv[1] ?? "",
|
||||
"app", "labcontroller", "deploy", resolved.hostname,
|
||||
"--user", opts.user,
|
||||
], { stdio: "inherit" });
|
||||
} catch {
|
||||
console.error(`\nAuto-deploy failed. Run manually: labctl app labcontroller deploy ${resolved.hostname}`);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch { /* best-effort chain */ }
|
||||
|
||||
console.log(`\nTo get kubeconfig: ssh ${opts.user}@${resolved.ip} sudo cat /etc/rancher/k3s/k3s.yaml`);
|
||||
});
|
||||
|
||||
k3sCmd
|
||||
.command("health [target]")
|
||||
.description("Check k3s health (all hosts if no target given)")
|
||||
.option("--user <user>", "SSH user", "michal")
|
||||
.action(async (target: string | undefined, opts: { user: string }) => {
|
||||
const sshKey = findSshKey();
|
||||
|
||||
if (!target) {
|
||||
let state: BastionState;
|
||||
try {
|
||||
state = await getLabdClient().getMachines();
|
||||
} catch (err) {
|
||||
console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const entries = Object.entries(state.installed);
|
||||
if (entries.length === 0) {
|
||||
console.log("No installed machines.");
|
||||
return;
|
||||
}
|
||||
|
||||
const BOLD = "\x1b[1m";
|
||||
const GREEN = "\x1b[32m";
|
||||
const RED = "\x1b[31m";
|
||||
const DIM = "\x1b[2m";
|
||||
const RESET = "\x1b[0m";
|
||||
const pad = (s: string, w: number) => s.padEnd(w);
|
||||
|
||||
console.log(
|
||||
`${BOLD}${pad("HOST", 22)}${pad("IP", 16)}${pad("ROLE", 8)}${pad("K3S", 14)}${pad("NODE", 10)}${pad("ENCRYPT", 10)}${pad("CNI", 14)}${pad("PODS", 6)}${RESET}`,
|
||||
);
|
||||
|
||||
interface HealthRow {
|
||||
host: string; ip: string; role: string;
|
||||
k3s: string; node: string; encrypt: string; cni: string; pods: string;
|
||||
k3sC: string; nodeC: string; encC: string; cniC: string;
|
||||
}
|
||||
|
||||
const probes = entries.map(async ([_mac, info]): Promise<HealthRow> => {
|
||||
const r: HealthRow = {
|
||||
host: info.hostname, ip: info.ip, role: info.role,
|
||||
k3s: "—", node: "—", encrypt: "—", cni: "—", pods: "—",
|
||||
k3sC: DIM, nodeC: DIM, encC: DIM, cniC: DIM,
|
||||
};
|
||||
|
||||
if (!info.ip || info.role === "vanilla") {
|
||||
r.k3s = info.role === "vanilla" ? "n/a" : "no ip";
|
||||
return r;
|
||||
}
|
||||
|
||||
try {
|
||||
const svc = await sshExec(info.ip, opts.user, "systemctl is-active k3s 2>/dev/null || systemctl is-active k3s-agent 2>/dev/null", {
|
||||
...(sshKey ? { keyPath: sshKey } : {}), timeoutMs: 8_000,
|
||||
});
|
||||
|
||||
if (svc.stdout.trim() !== "active") {
|
||||
r.k3s = svc.stdout.trim() === "inactive" ? "stopped" : "not installed";
|
||||
r.k3sC = svc.stdout.trim() === "inactive" ? RED : DIM;
|
||||
return r;
|
||||
}
|
||||
|
||||
r.k3s = "running"; r.k3sC = GREEN;
|
||||
|
||||
const [nodeRes, encRes, cniRes, podRes] = await Promise.all([
|
||||
sshExec(info.ip, opts.user,
|
||||
"sudo k3s kubectl get nodes -o jsonpath='{.items[0].status.conditions[?(@.type==\"Ready\")].status}' 2>/dev/null",
|
||||
{ ...(sshKey ? { keyPath: sshKey } : {}), timeoutMs: 8_000 }),
|
||||
sshExec(info.ip, opts.user,
|
||||
"sudo k3s secrets-encrypt status 2>/dev/null | head -1",
|
||||
{ ...(sshKey ? { keyPath: sshKey } : {}), timeoutMs: 8_000 }),
|
||||
sshExec(info.ip, opts.user,
|
||||
"sudo k3s kubectl get pods -n kube-system -l k8s-app=cilium --no-headers 2>/dev/null | head -1",
|
||||
{ ...(sshKey ? { keyPath: sshKey } : {}), timeoutMs: 8_000 }),
|
||||
sshExec(info.ip, opts.user,
|
||||
"sudo k3s kubectl get pods -A --no-headers 2>/dev/null | wc -l",
|
||||
{ ...(sshKey ? { keyPath: sshKey } : {}), timeoutMs: 8_000 }),
|
||||
]);
|
||||
|
||||
r.node = nodeRes.stdout.includes("True") ? "Ready" : "NotReady";
|
||||
r.nodeC = nodeRes.stdout.includes("True") ? GREEN : RED;
|
||||
|
||||
r.encrypt = encRes.stdout.includes("Enabled") ? "yes" : "no";
|
||||
r.encC = encRes.stdout.includes("Enabled") ? GREEN : RED;
|
||||
|
||||
r.cni = cniRes.stdout.includes("Running") ? "cilium" : "flannel";
|
||||
r.cniC = cniRes.stdout.includes("Running") ? GREEN : DIM;
|
||||
|
||||
r.pods = podRes.stdout.trim() || "?";
|
||||
} catch {
|
||||
r.k3s = "unreachable"; r.k3sC = RED;
|
||||
}
|
||||
|
||||
return r;
|
||||
});
|
||||
|
||||
const results = await Promise.all(probes);
|
||||
for (const r of results) {
|
||||
console.log(
|
||||
`${pad(r.host, 22)}${pad(r.ip, 16)}${pad(r.role, 8)}${r.k3sC}${pad(r.k3s, 14)}${RESET}${r.nodeC}${pad(r.node, 10)}${RESET}${r.encC}${pad(r.encrypt, 10)}${RESET}${r.cniC}${pad(r.cni, 14)}${RESET}${pad(r.pods, 6)}`,
|
||||
);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Single target: detailed health check
|
||||
const state = await fetchState();
|
||||
const resolved = resolveTarget(target, state);
|
||||
|
||||
if (!resolved) {
|
||||
console.error(`Cannot resolve target: ${target}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(`Checking k3s health on ${resolved.hostname} (${resolved.ip})...\n`);
|
||||
|
||||
const k3s = new K3sModule();
|
||||
const healthResult = await k3s.health({
|
||||
hostname: resolved.hostname,
|
||||
ip: resolved.ip,
|
||||
role: resolved.role,
|
||||
os: "fedora-43" as const,
|
||||
arch: "x86_64" as const,
|
||||
sshUser: opts.user,
|
||||
...(sshKey ? { sshKeyPath: sshKey } : {}),
|
||||
config: {},
|
||||
});
|
||||
|
||||
for (const line of healthResult.output) {
|
||||
console.log(` ${line}`);
|
||||
}
|
||||
if (healthResult.errors.length > 0) {
|
||||
for (const err of healthResult.errors) {
|
||||
console.error(` ERROR: ${err}`);
|
||||
}
|
||||
}
|
||||
|
||||
process.exit(healthResult.success ? 0 : 1);
|
||||
});
|
||||
|
||||
k3sCmd
|
||||
.command("list")
|
||||
.description("List installed machines and their k3s status")
|
||||
.option("--user <user>", "SSH user", "michal")
|
||||
.action(async (opts: { user: string }) => {
|
||||
let state: BastionState;
|
||||
try {
|
||||
state = await getLabdClient().getMachines();
|
||||
} catch (err) {
|
||||
console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const entries = Object.entries(state.installed);
|
||||
if (entries.length === 0) {
|
||||
console.log("No installed machines.");
|
||||
return;
|
||||
}
|
||||
|
||||
const sshKey = findSshKey();
|
||||
const BOLD = "\x1b[1m";
|
||||
const GREEN = "\x1b[32m";
|
||||
const RED = "\x1b[31m";
|
||||
const DIM = "\x1b[2m";
|
||||
const RESET = "\x1b[0m";
|
||||
|
||||
const hdr = (s: string, w: number) => s.padEnd(w);
|
||||
console.log(
|
||||
`${BOLD}${hdr("HOSTNAME", 28)}${hdr("IP", 18)}${hdr("ROLE", 10)}${hdr("K3S", 16)}${hdr("NODE", 12)}${hdr("PODS", 6)}${RESET}`,
|
||||
);
|
||||
|
||||
const probes = entries.map(async ([_mac, info]) => {
|
||||
const row = {
|
||||
hostname: info.hostname,
|
||||
ip: info.ip,
|
||||
role: info.role,
|
||||
k3s: "—",
|
||||
node: "—",
|
||||
pods: "—",
|
||||
k3sColor: DIM,
|
||||
nodeColor: DIM,
|
||||
};
|
||||
|
||||
if (!info.ip || info.role === "vanilla") {
|
||||
row.k3s = info.role === "vanilla" ? "n/a" : "no ip";
|
||||
return row;
|
||||
}
|
||||
|
||||
try {
|
||||
const svcResult = await sshExec(info.ip, opts.user, "systemctl is-active k3s 2>/dev/null || systemctl is-active k3s-agent 2>/dev/null", {
|
||||
...(sshKey ? { keyPath: sshKey } : {}),
|
||||
timeoutMs: 8_000,
|
||||
});
|
||||
const svcStatus = svcResult.stdout.trim();
|
||||
|
||||
if (svcStatus === "active") {
|
||||
row.k3s = "running";
|
||||
row.k3sColor = GREEN;
|
||||
|
||||
const nodeResult = await sshExec(info.ip, opts.user,
|
||||
"sudo k3s kubectl get nodes -o jsonpath='{.items[0].status.conditions[?(@.type==\"Ready\")].status}' 2>/dev/null || echo unknown",
|
||||
{ ...(sshKey ? { keyPath: sshKey } : {}), timeoutMs: 8_000 },
|
||||
);
|
||||
const nodeReady = nodeResult.stdout.trim();
|
||||
if (nodeReady.includes("True")) {
|
||||
row.node = "Ready";
|
||||
row.nodeColor = GREEN;
|
||||
} else {
|
||||
row.node = "NotReady";
|
||||
row.nodeColor = RED;
|
||||
}
|
||||
|
||||
const podResult = await sshExec(info.ip, opts.user,
|
||||
"sudo k3s kubectl get pods -A --no-headers 2>/dev/null | wc -l",
|
||||
{ ...(sshKey ? { keyPath: sshKey } : {}), timeoutMs: 8_000 },
|
||||
);
|
||||
row.pods = podResult.stdout.trim() || "?";
|
||||
} else if (svcStatus === "inactive" || svcStatus === "dead") {
|
||||
row.k3s = "stopped";
|
||||
row.k3sColor = RED;
|
||||
} else {
|
||||
row.k3s = "not installed";
|
||||
row.k3sColor = DIM;
|
||||
}
|
||||
} catch {
|
||||
row.k3s = "unreachable";
|
||||
row.k3sColor = RED;
|
||||
}
|
||||
|
||||
return row;
|
||||
});
|
||||
|
||||
const results = await Promise.all(probes);
|
||||
|
||||
for (const r of results) {
|
||||
console.log(
|
||||
`${hdr(r.hostname, 28)}${hdr(r.ip, 18)}${hdr(r.role, 10)}${r.k3sColor}${hdr(r.k3s, 16)}${RESET}${r.nodeColor}${hdr(r.node, 12)}${RESET}${hdr(r.pods, 6)}`,
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
76
bastion/src/cli/src/commands/config.ts
Normal file
76
bastion/src/cli/src/commands/config.ts
Normal file
@@ -0,0 +1,76 @@
|
||||
// labctl config — view and modify CLI configuration.
|
||||
|
||||
import type { Command } from "commander";
|
||||
import {
|
||||
loadConfig,
|
||||
saveConfig,
|
||||
getConfigValue,
|
||||
setConfigValue,
|
||||
isValidConfigKey,
|
||||
CONFIG_FILE,
|
||||
} from "../config/index.js";
|
||||
|
||||
export function registerConfigCommand(parent: Command): void {
|
||||
const configCmd = parent
|
||||
.command("config")
|
||||
.description("View and modify CLI configuration");
|
||||
|
||||
// config list
|
||||
configCmd
|
||||
.command("list")
|
||||
.description("Show all configuration values")
|
||||
.action(() => {
|
||||
const config = loadConfig();
|
||||
console.log(`# Configuration (${CONFIG_FILE})\n`);
|
||||
for (const [k, v] of Object.entries(config)) {
|
||||
if (v !== undefined) {
|
||||
console.log(`${k}: ${v}`);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// config get <key>
|
||||
configCmd
|
||||
.command("get <key>")
|
||||
.description("Get a configuration value")
|
||||
.action((key: string) => {
|
||||
if (!isValidConfigKey(key)) {
|
||||
console.error(`Unknown config key: ${key}`);
|
||||
console.error(`Valid keys: labdUrl, certPath, keyPath, caPath, defaultEnvironment, defaultCloud, outputFormat`);
|
||||
process.exit(1);
|
||||
}
|
||||
const config = loadConfig();
|
||||
const value = getConfigValue(config, key);
|
||||
if (value) {
|
||||
console.log(value);
|
||||
}
|
||||
});
|
||||
|
||||
// config set <key> <value>
|
||||
configCmd
|
||||
.command("set <key> <value>")
|
||||
.description("Set a configuration value")
|
||||
.action((key: string, value: string) => {
|
||||
if (!isValidConfigKey(key)) {
|
||||
console.error(`Unknown config key: ${key}`);
|
||||
console.error(`Valid keys: labdUrl, certPath, keyPath, caPath, defaultEnvironment, defaultCloud, outputFormat`);
|
||||
process.exit(1);
|
||||
}
|
||||
if (key === "outputFormat" && !["table", "json", "yaml"].includes(value)) {
|
||||
console.error(`Invalid output format: ${value}. Must be table, json, or yaml.`);
|
||||
process.exit(1);
|
||||
}
|
||||
let config = loadConfig();
|
||||
config = setConfigValue(config, key, value);
|
||||
saveConfig(config);
|
||||
console.log(`Set ${key} = ${value}`);
|
||||
});
|
||||
|
||||
// config path
|
||||
configCmd
|
||||
.command("path")
|
||||
.description("Show configuration file path")
|
||||
.action(() => {
|
||||
console.log(CONFIG_FILE);
|
||||
});
|
||||
}
|
||||
126
bastion/src/cli/src/commands/doctor.ts
Normal file
126
bastion/src/cli/src/commands/doctor.ts
Normal file
@@ -0,0 +1,126 @@
|
||||
// labctl doctor — diagnose configuration and connectivity issues.
|
||||
|
||||
import { existsSync, readFileSync } from "node:fs";
|
||||
import { X509Certificate } from "node:crypto";
|
||||
import type { Command } from "commander";
|
||||
import { loadConfig, CONFIG_FILE, CERT_DIR } from "../config/index.js";
|
||||
|
||||
interface DiagnosticResult {
|
||||
name: string;
|
||||
status: "ok" | "warn" | "error";
|
||||
message: string;
|
||||
}
|
||||
|
||||
const GREEN = "\x1b[32m";
|
||||
const YELLOW = "\x1b[33m";
|
||||
const RED = "\x1b[31m";
|
||||
const RESET = "\x1b[0m";
|
||||
|
||||
export function registerDoctorCommand(program: Command): void {
|
||||
program
|
||||
.command("doctor")
|
||||
.description("Diagnose configuration and connectivity issues")
|
||||
.option("--json", "Output results as JSON")
|
||||
.action(async (opts: { json?: boolean }) => {
|
||||
const results: DiagnosticResult[] = [];
|
||||
const config = loadConfig();
|
||||
|
||||
// Check config file
|
||||
results.push({
|
||||
name: "Configuration file",
|
||||
status: existsSync(CONFIG_FILE) ? "ok" : "warn",
|
||||
message: existsSync(CONFIG_FILE) ? CONFIG_FILE : "Using defaults — run 'labctl config set labdUrl <url>'",
|
||||
});
|
||||
|
||||
// Check labd URL
|
||||
results.push({
|
||||
name: "labd URL",
|
||||
status: config.labdUrl ? "ok" : "error",
|
||||
message: config.labdUrl || "Not configured",
|
||||
});
|
||||
|
||||
// Check client certificate
|
||||
if (config.certPath && existsSync(config.certPath)) {
|
||||
try {
|
||||
const certPem = readFileSync(config.certPath, "utf-8");
|
||||
const cert = new X509Certificate(certPem);
|
||||
const expiresIn = new Date(cert.validTo).getTime() - Date.now();
|
||||
const daysLeft = Math.floor(expiresIn / (1000 * 60 * 60 * 24));
|
||||
|
||||
results.push({
|
||||
name: "Client certificate",
|
||||
status: daysLeft > 7 ? "ok" : daysLeft > 0 ? "warn" : "error",
|
||||
message: daysLeft > 0 ? `Valid for ${daysLeft} days` : "Expired!",
|
||||
});
|
||||
} catch {
|
||||
results.push({
|
||||
name: "Client certificate",
|
||||
status: "error",
|
||||
message: "Failed to parse certificate",
|
||||
});
|
||||
}
|
||||
} else {
|
||||
results.push({
|
||||
name: "Client certificate",
|
||||
status: "warn",
|
||||
message: `Not configured — run 'labctl login'`,
|
||||
});
|
||||
}
|
||||
|
||||
// Check cert directory
|
||||
results.push({
|
||||
name: "Certificate directory",
|
||||
status: existsSync(CERT_DIR) ? "ok" : "warn",
|
||||
message: existsSync(CERT_DIR) ? CERT_DIR : "Not created yet",
|
||||
});
|
||||
|
||||
// Test labd connectivity
|
||||
try {
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), 5000);
|
||||
const resp = await fetch(`${config.labdUrl}/healthz`, {
|
||||
signal: controller.signal,
|
||||
});
|
||||
clearTimeout(timeout);
|
||||
|
||||
const body = (await resp.json()) as { status?: string };
|
||||
results.push({
|
||||
name: "labd connectivity",
|
||||
status: resp.ok ? "ok" : "warn",
|
||||
message: resp.ok
|
||||
? `Connected — ${body.status ?? "ok"}`
|
||||
: `HTTP ${resp.status}: ${body.status ?? "unknown"}`,
|
||||
});
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
results.push({
|
||||
name: "labd connectivity",
|
||||
status: "error",
|
||||
message: msg.includes("abort")
|
||||
? "Connection timed out (5s)"
|
||||
: msg.includes("ECONNREFUSED")
|
||||
? "Connection refused"
|
||||
: msg,
|
||||
});
|
||||
}
|
||||
|
||||
// Output
|
||||
if (opts.json) {
|
||||
console.log(JSON.stringify(results, null, 2));
|
||||
} else {
|
||||
console.log("Running diagnostics...\n");
|
||||
for (const r of results) {
|
||||
const icon = r.status === "ok" ? "\u2713" : r.status === "warn" ? "!" : "\u2717";
|
||||
const color = r.status === "ok" ? GREEN : r.status === "warn" ? YELLOW : RED;
|
||||
console.log(`${color}${icon}${RESET} ${r.name}: ${r.message}`);
|
||||
}
|
||||
|
||||
const errors = results.filter((r) => r.status === "error").length;
|
||||
const warns = results.filter((r) => r.status === "warn").length;
|
||||
const oks = results.filter((r) => r.status === "ok").length;
|
||||
console.log(`\n${oks} passed, ${warns} warnings, ${errors} errors`);
|
||||
|
||||
if (errors > 0) process.exitCode = 1;
|
||||
}
|
||||
});
|
||||
}
|
||||
22
bastion/src/cli/src/commands/forget.ts
Normal file
22
bastion/src/cli/src/commands/forget.ts
Normal file
@@ -0,0 +1,22 @@
|
||||
// CLI command: provision forget
|
||||
// Remove a machine from all bastion state via labd.
|
||||
|
||||
import type { Command } from "commander";
|
||||
import { getLabdClient } from "../api/config.js";
|
||||
|
||||
export function registerForgetCommand(parent: Command): void {
|
||||
parent
|
||||
.command("forget <mac>")
|
||||
.description("Remove a machine from bastion state")
|
||||
.action(async (mac: string) => {
|
||||
const normalizedMac = mac.toLowerCase().replace(/-/g, ":");
|
||||
|
||||
try {
|
||||
const result = await getLabdClient().forgetMachine(normalizedMac);
|
||||
console.log(JSON.stringify(result, null, 2));
|
||||
} catch (err) {
|
||||
console.error(`Failed: ${err instanceof Error ? err.message : String(err)}`);
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
}
|
||||
69
bastion/src/cli/src/commands/install.ts
Normal file
69
bastion/src/cli/src/commands/install.ts
Normal file
@@ -0,0 +1,69 @@
|
||||
// CLI command: provision install
|
||||
// Queue a discovered machine for OS installation via labd.
|
||||
|
||||
import { Command, Option } from "commander";
|
||||
import { isValidOsId, SUPPORTED_OS, SUPPORTED_ROLES, ROLE_REGISTRY } from "@lab/shared";
|
||||
import { getLabdClient } from "../api/config.js";
|
||||
|
||||
function roleTable(): string {
|
||||
const lines: string[] = ["", "Available roles:"];
|
||||
for (const r of ROLE_REGISTRY) {
|
||||
const parent = r.parent ? ` (extends ${r.parent})` : "";
|
||||
const apps = r.apps.length > 0 ? ` [auto: ${r.apps.join(", ")}]` : "";
|
||||
lines.push(` ${r.name.padEnd(16)} ${r.description}${parent}${apps}`);
|
||||
}
|
||||
return lines.join("\n");
|
||||
}
|
||||
|
||||
export function registerInstallCommand(parent: Command): void {
|
||||
parent
|
||||
.command("install <mac> <hostname>")
|
||||
.description("Queue a discovered machine for OS installation")
|
||||
.showHelpAfterError(true)
|
||||
.addHelpText("after", roleTable())
|
||||
.addOption(new Option("--role <role>", "Machine role (see below)").choices([...SUPPORTED_ROLES]).default("worker"))
|
||||
.addOption(new Option("--os <os>", "Operating system").choices([...SUPPORTED_OS]).default("fedora-43"))
|
||||
.option("--disk <device>", "Target disk device (auto-detect if omitted)")
|
||||
.action(async (mac: string, hostname: string, opts: {
|
||||
role: string;
|
||||
os: string;
|
||||
disk?: string;
|
||||
}) => {
|
||||
if (!isValidOsId(opts.os)) {
|
||||
console.error(`Unknown OS: ${opts.os}. Supported: ${SUPPORTED_OS.join(", ")}`);
|
||||
process.exit(1);
|
||||
}
|
||||
if (!(SUPPORTED_ROLES as readonly string[]).includes(opts.role)) {
|
||||
console.error(`Unknown role: ${opts.role}`);
|
||||
console.error(roleTable());
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await getLabdClient().installMachine({
|
||||
mac,
|
||||
hostname,
|
||||
role: opts.role,
|
||||
os: opts.os,
|
||||
...(opts.disk ? { disk: opts.disk } : {}),
|
||||
});
|
||||
|
||||
console.log(JSON.stringify(result, null, 2));
|
||||
console.log("");
|
||||
const osLabel = opts.os.startsWith("ubuntu") ? "Ubuntu" : "Fedora";
|
||||
console.log(`Power on the machine to start ${osLabel} installation.`);
|
||||
|
||||
const roleInfo = ROLE_REGISTRY.find(r => r.name === opts.role);
|
||||
if (roleInfo?.k3s) {
|
||||
console.log(`After install completes, k3s will be installed automatically (role=${opts.role}).`);
|
||||
if (roleInfo.apps.length > 0) {
|
||||
console.log(`Then: ${roleInfo.apps.join(", ")} will be deployed.`);
|
||||
}
|
||||
console.log(`To install k3s manually later: labctl app k3s install ${hostname}`);
|
||||
}
|
||||
} catch (err) {
|
||||
console.error(`Failed: ${err instanceof Error ? err.message : String(err)}`);
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
}
|
||||
298
bastion/src/cli/src/commands/labcontroller.ts
Normal file
298
bastion/src/cli/src/commands/labcontroller.ts
Normal file
@@ -0,0 +1,298 @@
|
||||
// CLI command: labctl app labcontroller deploy/status
|
||||
// Deploy bastion + labd + CockroachDB to a k3s labcontroller node.
|
||||
|
||||
import { existsSync, writeFileSync, mkdirSync } from "node:fs";
|
||||
import { homedir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import type { Command } from "commander";
|
||||
import type { BastionState } from "@lab/shared";
|
||||
import { sshExec } from "@lab/modules";
|
||||
import { getLabdClient } from "../api/config.js";
|
||||
|
||||
function findSshKey(): string | undefined {
|
||||
const sudoUser = process.env["SUDO_USER"];
|
||||
const realHome = sudoUser ? join("/home", sudoUser) : homedir();
|
||||
for (const name of ["id_ed25519", "id_ecdsa", "id_rsa"]) {
|
||||
const p = join(realHome, ".ssh", name);
|
||||
if (existsSync(p)) return p;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
async function resolveIp(target: string): Promise<string> {
|
||||
if (/^\d+\.\d+\.\d+\.\d+$/.test(target)) return target;
|
||||
try {
|
||||
const state = await getLabdClient().getMachines();
|
||||
for (const [, info] of Object.entries(state.installed)) {
|
||||
if (info.hostname === target || info.hostname.startsWith(target + ".")) {
|
||||
return info.ip;
|
||||
}
|
||||
}
|
||||
} catch { /* use target as-is */ }
|
||||
return target;
|
||||
}
|
||||
|
||||
export function registerLabcontrollerCommands(appCmd: Command): void {
|
||||
const lcCmd = appCmd.command("labcontroller").description("Labcontroller deployment (bastion + labd + CockroachDB)");
|
||||
|
||||
lcCmd
|
||||
.command("deploy <target>")
|
||||
.description("Deploy labcontroller stack to a k3s node")
|
||||
.option("--user <user>", "SSH user", "michal")
|
||||
.option("--crdb-replicas <n>", "CockroachDB replicas", "1")
|
||||
.action(async (target: string, opts: {
|
||||
user: string;
|
||||
crdbReplicas: string;
|
||||
}) => {
|
||||
const ip = await resolveIp(target);
|
||||
const sshKey = findSshKey();
|
||||
const sshOpts = sshKey ? { keyPath: sshKey } : {};
|
||||
|
||||
console.log(`Deploying labcontroller stack to ${target} (${ip})...\n`);
|
||||
|
||||
// 1. Fetch kubeconfig from target
|
||||
console.log("[1/4] Fetching kubeconfig...");
|
||||
const kcResult = await sshExec(ip, opts.user, "sudo cat /etc/rancher/k3s/k3s.yaml", { ...sshOpts, timeoutMs: 10_000 });
|
||||
if (kcResult.exitCode !== 0) {
|
||||
console.error(" Failed to fetch kubeconfig. Is k3s running?");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const kubeconfigDir = join(homedir(), ".kube");
|
||||
mkdirSync(kubeconfigDir, { recursive: true });
|
||||
|
||||
const contextName = `lab-${target}`;
|
||||
const kubeconfig = kcResult.stdout
|
||||
.replace(/server:\s*https:\/\/127\.0\.0\.1:6443/, `server: https://${ip}:6443`)
|
||||
.replace(/name:\s*default/g, `name: ${contextName}`)
|
||||
.replace(/cluster:\s*default/g, `cluster: ${contextName}`)
|
||||
.replace(/user:\s*default/g, `user: ${contextName}`);
|
||||
|
||||
const tmpPath = join(kubeconfigDir, `.lab-${target}-tmp`);
|
||||
writeFileSync(tmpPath, kubeconfig, { mode: 0o600 });
|
||||
|
||||
const mainConfig = join(kubeconfigDir, "config");
|
||||
const { spawnSync } = await import("node:child_process");
|
||||
const mergeResult = spawnSync("kubectl", ["config", "view", "--flatten"], {
|
||||
encoding: "utf-8",
|
||||
stdio: ["pipe", "pipe", "pipe"],
|
||||
env: { ...process.env, KUBECONFIG: `${mainConfig}:${tmpPath}` },
|
||||
});
|
||||
|
||||
if (mergeResult.status === 0 && mergeResult.stdout) {
|
||||
writeFileSync(mainConfig, mergeResult.stdout, { mode: 0o600 });
|
||||
spawnSync("kubectl", ["config", "use-context", contextName], {
|
||||
stdio: "pipe",
|
||||
env: { ...process.env, KUBECONFIG: mainConfig },
|
||||
});
|
||||
console.log(` Merged into ~/.kube/config as context "${contextName}"`);
|
||||
console.log(` Active context set to "${contextName}"`);
|
||||
} else {
|
||||
writeFileSync(join(kubeconfigDir, `lab-${target}`), kubeconfig, { mode: 0o600 });
|
||||
console.log(` Saved to ~/.kube/lab-${target} (merge failed, use KUBECONFIG=~/.kube/lab-${target})`);
|
||||
}
|
||||
|
||||
try { const { unlinkSync } = await import("node:fs"); unlinkSync(tmpPath); } catch { /* ignore */ }
|
||||
console.log("");
|
||||
|
||||
// 2. Apply CockroachDB manifests
|
||||
console.log("[2/4] Deploying CockroachDB...");
|
||||
const { cockroachDbManifests } = await import("@lab/modules/dist/modules/labcontroller/src/cockroachdb.js");
|
||||
const crdb = cockroachDbManifests({ replicas: parseInt(opts.crdbReplicas, 10) });
|
||||
|
||||
const manifests = [crdb.namespace, crdb.headlessService, crdb.clientService, crdb.statefulSet];
|
||||
|
||||
for (const manifest of manifests) {
|
||||
const json = JSON.stringify(manifest);
|
||||
const kind = (manifest as { kind?: string }).kind ?? "?";
|
||||
const name = ((manifest as { metadata?: { name?: string } }).metadata)?.name ?? "?";
|
||||
const result = await sshExec(ip, opts.user,
|
||||
`echo '${json.replace(/'/g, "'\\''")}' | sudo k3s kubectl apply -f -`,
|
||||
{ ...sshOpts, timeoutMs: 15_000 },
|
||||
);
|
||||
if (result.exitCode === 0) {
|
||||
console.log(` applied ${kind}/${name}`);
|
||||
} else {
|
||||
console.error(` FAILED ${kind}/${name}: ${result.stderr.trim()}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(" Waiting for CockroachDB pod...");
|
||||
const waitResult = await sshExec(ip, opts.user,
|
||||
"sudo k3s kubectl wait --for=condition=Ready pod -l app=cockroachdb -n lab-system --timeout=120s 2>/dev/null || echo 'still starting'",
|
||||
{ ...sshOpts, timeoutMs: 130_000 },
|
||||
);
|
||||
console.log(` ${waitResult.stdout.trim()}`);
|
||||
|
||||
console.log(" Initializing CockroachDB cluster...");
|
||||
const initJson = JSON.stringify(crdb.initJob);
|
||||
await sshExec(ip, opts.user,
|
||||
`echo '${initJson.replace(/'/g, "'\\''")}' | sudo k3s kubectl apply -f - 2>/dev/null; sudo k3s kubectl wait --for=condition=Complete job/cockroachdb-init -n lab-system --timeout=60s 2>/dev/null || echo 'init may already be done'`,
|
||||
{ ...sshOpts, timeoutMs: 70_000 },
|
||||
);
|
||||
|
||||
await sshExec(ip, opts.user,
|
||||
"sudo k3s kubectl exec cockroachdb-0 -n lab-system -- /cockroach/cockroach sql --insecure -e 'CREATE DATABASE IF NOT EXISTS lab' 2>/dev/null || echo 'db may already exist'",
|
||||
{ ...sshOpts, timeoutMs: 15_000 },
|
||||
);
|
||||
console.log(" CockroachDB ready\n");
|
||||
|
||||
// 3. Deploy labd
|
||||
console.log("[3/4] Deploying labd...");
|
||||
const { labdManifests } = await import("@lab/modules/dist/modules/labcontroller/src/labd.js");
|
||||
const labd = labdManifests({ databaseUrl: crdb.connectionString });
|
||||
|
||||
for (const manifest of [labd.service, labd.deployment]) {
|
||||
const json = JSON.stringify(manifest);
|
||||
const kind = (manifest as { kind?: string }).kind ?? "?";
|
||||
const name = ((manifest as { metadata?: { name?: string } }).metadata)?.name ?? "?";
|
||||
const result = await sshExec(ip, opts.user,
|
||||
`echo '${json.replace(/'/g, "'\\''")}' | sudo k3s kubectl apply -f -`,
|
||||
{ ...sshOpts, timeoutMs: 15_000 },
|
||||
);
|
||||
console.log(` ${result.exitCode === 0 ? "applied" : "FAILED"} ${kind}/${name}`);
|
||||
}
|
||||
console.log("");
|
||||
|
||||
// 4. Deploy bastion
|
||||
console.log("[4/4] Deploying bastion (hostNetwork)...");
|
||||
const { bastionManifests } = await import("@lab/modules/dist/modules/labcontroller/src/bastion.js");
|
||||
const bastion = bastionManifests();
|
||||
|
||||
const bJson = JSON.stringify(bastion.daemonSet);
|
||||
const bResult = await sshExec(ip, opts.user,
|
||||
`echo '${bJson.replace(/'/g, "'\\''")}' | sudo k3s kubectl apply -f -`,
|
||||
{ ...sshOpts, timeoutMs: 15_000 },
|
||||
);
|
||||
console.log(` ${bResult.exitCode === 0 ? "applied" : "FAILED"} DaemonSet/bastion`);
|
||||
|
||||
// 5. Promote host role to labcontroller via labd
|
||||
console.log("Promoting host role to labcontroller...");
|
||||
try {
|
||||
const state = await getLabdClient().getMachines();
|
||||
for (const [mac, info] of Object.entries(state.installed)) {
|
||||
if (info.ip === ip || info.hostname === target) {
|
||||
await getLabdClient().updateRole(mac, "labcontroller");
|
||||
console.log(` ${info.hostname}: infra -> labcontroller`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
console.log(" Could not update role (labd may not be running yet)");
|
||||
}
|
||||
|
||||
console.log("\n=== Labcontroller deployed ===");
|
||||
console.log(` CockroachDB: cockroachdb-client.lab-system:26257`);
|
||||
console.log(` labd: ${ip}:30100`);
|
||||
console.log(` bastion: ${ip}:8080 (hostNetwork)`);
|
||||
console.log(` context: lab-${target}`);
|
||||
console.log(`\n Switch context: kubectl ctx lab-${target}`);
|
||||
console.log(` View pods: kubectl get pods -n lab-system`);
|
||||
});
|
||||
|
||||
lcCmd
|
||||
.command("status [target]")
|
||||
.description("Check labcontroller deployment status (all hosts if no target)")
|
||||
.option("--user <user>", "SSH user", "michal")
|
||||
.action(async (target: string | undefined, opts: { user: string }) => {
|
||||
const sshKey = findSshKey();
|
||||
const sshOpts = sshKey ? { keyPath: sshKey } : {};
|
||||
|
||||
if (!target) {
|
||||
let state: BastionState;
|
||||
try {
|
||||
state = await getLabdClient().getMachines();
|
||||
} catch (err) {
|
||||
console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const entries = Object.entries(state.installed);
|
||||
if (entries.length === 0) {
|
||||
console.log("No installed machines.");
|
||||
return;
|
||||
}
|
||||
|
||||
const BOLD = "\x1b[1m";
|
||||
const GREEN = "\x1b[32m";
|
||||
const RED = "\x1b[31m";
|
||||
const DIM = "\x1b[2m";
|
||||
const RESET = "\x1b[0m";
|
||||
const pad = (s: string, w: number) => s.padEnd(w);
|
||||
|
||||
console.log(
|
||||
`${BOLD}${pad("HOST", 22)}${pad("IP", 16)}${pad("ROLE", 14)}${pad("CRDB", 12)}${pad("LABD", 12)}${pad("BASTION", 12)}${pad("NS", 8)}${RESET}`,
|
||||
);
|
||||
|
||||
interface StatusRow {
|
||||
host: string; ip: string; role: string;
|
||||
crdb: string; labd: string; bastion: string; ns: string;
|
||||
crdbC: string; labdC: string; bastionC: string;
|
||||
}
|
||||
|
||||
const probes = entries.map(async ([_mac, info]): Promise<StatusRow> => {
|
||||
const r: StatusRow = {
|
||||
host: info.hostname, ip: info.ip, role: info.role ?? "?",
|
||||
crdb: "—", labd: "—", bastion: "—", ns: "—",
|
||||
crdbC: DIM, labdC: DIM, bastionC: DIM,
|
||||
};
|
||||
|
||||
if (!info.ip) return r;
|
||||
|
||||
try {
|
||||
const result = await sshExec(info.ip, opts.user,
|
||||
"sudo k3s kubectl get pods -n lab-system --no-headers -o custom-columns='NAME:.metadata.name,STATUS:.status.phase' 2>/dev/null || echo 'NO_NS'",
|
||||
{ ...sshOpts, timeoutMs: 10_000 },
|
||||
);
|
||||
|
||||
if (result.stdout.includes("NO_NS") || result.exitCode !== 0) {
|
||||
r.ns = "none";
|
||||
return r;
|
||||
}
|
||||
|
||||
r.ns = "ok";
|
||||
const lines = result.stdout.trim().split("\n").filter(Boolean);
|
||||
|
||||
for (const line of lines) {
|
||||
const [name, status] = line.trim().split(/\s+/);
|
||||
if (!name) continue;
|
||||
const running = status === "Running" || status === "Succeeded";
|
||||
const color = running ? GREEN : RED;
|
||||
const label = running ? "running" : (status ?? "?").toLowerCase();
|
||||
|
||||
if (name.startsWith("cockroachdb-") && !name.includes("init")) {
|
||||
r.crdb = label; r.crdbC = color;
|
||||
} else if (name.startsWith("labd-")) {
|
||||
r.labd = label; r.labdC = color;
|
||||
} else if (name.startsWith("bastion-")) {
|
||||
r.bastion = label; r.bastionC = color;
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
r.crdb = "ssh err"; r.crdbC = RED;
|
||||
}
|
||||
|
||||
return r;
|
||||
});
|
||||
|
||||
const results = await Promise.all(probes);
|
||||
for (const r of results) {
|
||||
console.log(
|
||||
`${pad(r.host, 22)}${pad(r.ip, 16)}${pad(r.role, 14)}${r.crdbC}${pad(r.crdb, 12)}${RESET}${r.labdC}${pad(r.labd, 12)}${RESET}${r.bastionC}${pad(r.bastion, 12)}${RESET}${pad(r.ns, 8)}`,
|
||||
);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Specific target: show detailed pod list
|
||||
const ip = await resolveIp(target);
|
||||
|
||||
console.log(`Labcontroller status on ${target} (${ip}):\n`);
|
||||
|
||||
const result = await sshExec(ip, opts.user,
|
||||
"sudo k3s kubectl get pods -n lab-system -o wide 2>/dev/null || echo 'lab-system namespace not found'",
|
||||
{ ...sshOpts, timeoutMs: 10_000 },
|
||||
);
|
||||
console.log(result.stdout);
|
||||
});
|
||||
}
|
||||
98
bastion/src/cli/src/commands/list.ts
Normal file
98
bastion/src/cli/src/commands/list.ts
Normal file
@@ -0,0 +1,98 @@
|
||||
// CLI command: provision list
|
||||
// Merged view of all known machines with hardware + install info.
|
||||
|
||||
import type { Command } from "commander";
|
||||
import type { BastionState } from "@lab/shared";
|
||||
import { getLabdClient } from "../api/config.js";
|
||||
|
||||
const BOLD = "\x1b[1m";
|
||||
const GREEN = "\x1b[0;32m";
|
||||
const YELLOW = "\x1b[1;33m";
|
||||
const CYAN = "\x1b[0;36m";
|
||||
const RESET = "\x1b[0m";
|
||||
|
||||
function statusColor(status: string): string {
|
||||
switch (status) {
|
||||
case "installed": return GREEN;
|
||||
case "queued":
|
||||
case "installing": return YELLOW;
|
||||
case "discovered": return CYAN;
|
||||
default: return RESET;
|
||||
}
|
||||
}
|
||||
|
||||
export function registerListCommand(parent: Command): void {
|
||||
parent
|
||||
.command("list")
|
||||
.description("List all known machines")
|
||||
.action(async () => {
|
||||
let state: BastionState;
|
||||
try {
|
||||
state = await getLabdClient().getMachines();
|
||||
} catch (err) {
|
||||
console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Collect all known MACs
|
||||
const allMacs = new Set([
|
||||
...Object.keys(state.discovered),
|
||||
...Object.keys(state.install_queue),
|
||||
...Object.keys(state.installed),
|
||||
]);
|
||||
|
||||
console.log("");
|
||||
if (allMacs.size === 0) {
|
||||
console.log(" No machines known. PXE boot a machine to discover it.");
|
||||
console.log("");
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(
|
||||
`${BOLD} ${"MAC".padEnd(20)} ${"HOSTNAME".padEnd(24)} ${"STATUS".padEnd(12)} ${"ROLE".padEnd(8)} ${"IP".padEnd(16)} ${"CPU".padEnd(24)} ${"CORES".padEnd(6)} ${"RAM".padEnd(6)} PRODUCT${RESET}`,
|
||||
);
|
||||
|
||||
for (const mac of allMacs) {
|
||||
const hw = state.discovered[mac];
|
||||
const queued = state.install_queue[mac];
|
||||
const inst = state.installed[mac];
|
||||
|
||||
// Determine status
|
||||
let status = "discovered";
|
||||
if (queued !== undefined) {
|
||||
status = queued.progress !== undefined && queued.progress !== "" && queued.progress !== "waiting"
|
||||
? "installing"
|
||||
: "queued";
|
||||
}
|
||||
if (inst !== undefined) status = "installed";
|
||||
|
||||
const hostname = inst?.hostname ?? queued?.hostname ?? "-";
|
||||
const role = inst?.role ?? queued?.role ?? "-";
|
||||
const ip = inst?.ip ?? "-";
|
||||
const cpu = hw?.cpu_model ?? "-";
|
||||
const cores = hw?.cpu_cores != null ? String(hw.cpu_cores) : "-";
|
||||
const ram = hw?.memory_gb != null ? `${hw.memory_gb}GB` : "-";
|
||||
const product = hw?.product ?? "-";
|
||||
|
||||
const color = statusColor(status);
|
||||
|
||||
console.log(
|
||||
` ${mac.padEnd(20)} ${hostname.padEnd(24)} ${color}${status.padEnd(12)}${RESET} ${role.padEnd(8)} ${ip.padEnd(16)} ${cpu.substring(0, 23).padEnd(24)} ${cores.padEnd(6)} ${ram.padEnd(6)} ${product}`,
|
||||
);
|
||||
}
|
||||
|
||||
// Show install queue details if any
|
||||
const queueEntries = Object.entries(state.install_queue);
|
||||
if (queueEntries.length > 0) {
|
||||
console.log("");
|
||||
console.log(`${BOLD}PENDING${RESET}`);
|
||||
for (const [mac, cfg] of queueEntries) {
|
||||
const progress = cfg.progress ?? "waiting";
|
||||
const detail = cfg.progress_detail ?? "";
|
||||
console.log(` ${mac} ${progress}${detail ? ` - ${detail}` : ""}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log("");
|
||||
});
|
||||
}
|
||||
120
bastion/src/cli/src/commands/login.ts
Normal file
120
bastion/src/cli/src/commands/login.ts
Normal file
@@ -0,0 +1,120 @@
|
||||
// labctl login — authenticate with labd and obtain client certificate.
|
||||
|
||||
import { generateKeyPairSync } from "node:crypto";
|
||||
import { writeFileSync, existsSync, mkdirSync, readFileSync } from "node:fs";
|
||||
import { createInterface } from "node:readline";
|
||||
import type { Command } from "commander";
|
||||
import { loadConfig, saveConfig, CERT_DIR } from "../config/index.js";
|
||||
import { join } from "node:path";
|
||||
|
||||
export function registerLoginCommand(program: Command): void {
|
||||
program
|
||||
.command("login")
|
||||
.description("Authenticate with labd and obtain client certificate")
|
||||
.option("--server <url>", "labd server URL")
|
||||
.action(async (options: { server?: string }) => {
|
||||
if (!existsSync(CERT_DIR)) {
|
||||
mkdirSync(CERT_DIR, { recursive: true, mode: 0o700 });
|
||||
}
|
||||
|
||||
const config = loadConfig();
|
||||
const serverUrl = options.server ?? config.labdUrl;
|
||||
|
||||
const keyPath = join(CERT_DIR, "client.key");
|
||||
const certPath = join(CERT_DIR, "client.crt");
|
||||
const caPath = join(CERT_DIR, "ca.crt");
|
||||
|
||||
// 1. Generate keypair if not exists
|
||||
if (!existsSync(keyPath)) {
|
||||
console.log("Generating client keypair...");
|
||||
const { privateKey } = generateKeyPairSync("ec", {
|
||||
namedCurve: "P-256",
|
||||
privateKeyEncoding: { type: "pkcs8", format: "pem" },
|
||||
publicKeyEncoding: { type: "spki", format: "pem" },
|
||||
});
|
||||
writeFileSync(keyPath, privateKey, { mode: 0o600 });
|
||||
console.log(`Private key saved to ${keyPath}`);
|
||||
} else {
|
||||
console.log(`Using existing keypair at ${keyPath}`);
|
||||
}
|
||||
|
||||
// 2. Read public key for CSR (simplified — send public key, labd signs)
|
||||
const publicKey = readFileSync(keyPath, "utf-8");
|
||||
|
||||
// 3. Prompt for token
|
||||
const token = await promptPassword("Enter join token: ");
|
||||
if (!token) {
|
||||
console.error("Token is required.");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// 4. Submit enrollment request
|
||||
console.log(`Authenticating with ${serverUrl}...`);
|
||||
try {
|
||||
const resp = await fetch(`${serverUrl}/api/auth/user-enroll`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
token,
|
||||
hostname: `cli-${process.env["USER"] ?? "unknown"}`,
|
||||
csr: publicKey,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!resp.ok) {
|
||||
const body = (await resp.json().catch(() => ({}))) as Record<string, string>;
|
||||
console.error(`Login failed: ${body["error"] ?? resp.statusText}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const result = (await resp.json()) as {
|
||||
certificatePem?: string | null;
|
||||
caPem?: string | null;
|
||||
status: string;
|
||||
};
|
||||
|
||||
if (result.certificatePem) {
|
||||
writeFileSync(certPath, result.certificatePem, { mode: 0o600 });
|
||||
console.log(`Client certificate saved to ${certPath}`);
|
||||
}
|
||||
if (result.caPem) {
|
||||
writeFileSync(caPath, result.caPem, { mode: 0o644 });
|
||||
console.log(`CA certificate saved to ${caPath}`);
|
||||
}
|
||||
|
||||
// 5. Update config
|
||||
saveConfig({
|
||||
...config,
|
||||
labdUrl: serverUrl,
|
||||
certPath,
|
||||
keyPath,
|
||||
...(existsSync(caPath) ? { caPath } : {}),
|
||||
});
|
||||
|
||||
console.log(`\nLogin successful! Configuration updated.`);
|
||||
console.log(`Server: ${serverUrl}`);
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
if (message.includes("ECONNREFUSED") || message.includes("fetch")) {
|
||||
console.error(`Cannot connect to labd at ${serverUrl}`);
|
||||
console.error("Check that labd is running and the URL is correct.");
|
||||
} else {
|
||||
console.error(`Login failed: ${message}`);
|
||||
}
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function promptPassword(message: string): Promise<string> {
|
||||
return new Promise((resolve) => {
|
||||
const rl = createInterface({
|
||||
input: process.stdin,
|
||||
output: process.stdout,
|
||||
});
|
||||
rl.question(message, (answer) => {
|
||||
rl.close();
|
||||
resolve(answer.trim());
|
||||
});
|
||||
});
|
||||
}
|
||||
85
bastion/src/cli/src/commands/logs.ts
Normal file
85
bastion/src/cli/src/commands/logs.ts
Normal file
@@ -0,0 +1,85 @@
|
||||
// CLI command: provision logs
|
||||
// Show provisioning logs for a machine via labd.
|
||||
|
||||
import type { Command } from "commander";
|
||||
import { getLabdClient } from "../api/config.js";
|
||||
|
||||
/** Resolve a target (hostname, MAC, IP) to a MAC address. */
|
||||
async function resolveToMac(target: string): Promise<string> {
|
||||
const normalized = target.toLowerCase().replace(/-/g, ":");
|
||||
|
||||
// Looks like a MAC already
|
||||
if (/^([0-9a-f]{2}:){5}[0-9a-f]{2}$/.test(normalized)) {
|
||||
return normalized;
|
||||
}
|
||||
|
||||
// Resolve from labd aggregated state
|
||||
try {
|
||||
const state = await getLabdClient().getMachines();
|
||||
|
||||
for (const [mac, info] of Object.entries(state.installed)) {
|
||||
if (info.hostname === target || info.hostname.startsWith(target + ".") || info.ip === target) {
|
||||
return mac;
|
||||
}
|
||||
}
|
||||
for (const [mac, info] of Object.entries(state.install_queue)) {
|
||||
if (info.hostname === target || info.hostname.startsWith(target + ".")) {
|
||||
return mac;
|
||||
}
|
||||
}
|
||||
for (const mac of Object.keys(state.discovered)) {
|
||||
if (mac === normalized) return mac;
|
||||
}
|
||||
} catch { /* can't reach labd */ }
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
export function registerLogsCommand(parent: Command): void {
|
||||
parent
|
||||
.command("logs <target>")
|
||||
.description("Show provisioning logs for a machine (hostname, MAC, or IP)")
|
||||
.action(async (target: string) => {
|
||||
const mac = await resolveToMac(target);
|
||||
|
||||
try {
|
||||
const data = await getLabdClient().getMachineLogs(mac);
|
||||
|
||||
const BOLD = "\x1b[1m";
|
||||
const GREEN = "\x1b[32m";
|
||||
const YELLOW = "\x1b[33m";
|
||||
const RED = "\x1b[31m";
|
||||
const DIM = "\x1b[2m";
|
||||
const RESET = "\x1b[0m";
|
||||
|
||||
console.log(`${BOLD}${data["hostname"]}${RESET} (${mac})`);
|
||||
console.log(` Status: ${data["status"] === "installed" ? GREEN : YELLOW}${data["status"]}${RESET}`);
|
||||
console.log(` Role: ${data["role"]}`);
|
||||
if (data["os"]) console.log(` OS: ${data["os"]}`);
|
||||
if (data["ip"]) console.log(` IP: ${data["ip"]}`);
|
||||
console.log("");
|
||||
|
||||
const log = data["log"] as Array<{ stage: string; detail: string; timestamp: string }> | undefined;
|
||||
if (log && log.length > 0) {
|
||||
console.log(`${BOLD} Log:${RESET}`);
|
||||
for (const entry of log) {
|
||||
const time = entry.timestamp.slice(11, 19);
|
||||
const color = entry.stage === "complete" ? GREEN : entry.stage === "error" ? RED : YELLOW;
|
||||
const detail = entry.detail ? ` ${DIM}-- ${entry.detail}${RESET}` : "";
|
||||
console.log(` ${DIM}${time}${RESET} ${color}${entry.stage}${RESET}${detail}`);
|
||||
}
|
||||
} else {
|
||||
console.log(` ${DIM}No progress events yet (queued, waiting for PXE boot)${RESET}`);
|
||||
}
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
if (msg.includes("404") || msg.includes("not found")) {
|
||||
console.error(`Machine not found: ${target}`);
|
||||
console.error("Run 'labctl provision list' to see available machines.");
|
||||
} else {
|
||||
console.error(`Cannot reach labd: ${msg}`);
|
||||
}
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
}
|
||||
114
bastion/src/cli/src/commands/makeiso.ts
Normal file
114
bastion/src/cli/src/commands/makeiso.ts
Normal file
@@ -0,0 +1,114 @@
|
||||
// CLI command: provision makeiso
|
||||
// Generate/serve a UEFI-bootable iPXE ISO for machines that don't support PXE boot.
|
||||
// Queries labd for connected bastions and provides the download URL.
|
||||
|
||||
import { readFileSync, writeFileSync, existsSync } from "node:fs";
|
||||
import { createInterface } from "node:readline";
|
||||
import { Command, Option } from "commander";
|
||||
import { getLabdClient } from "../api/config.js";
|
||||
import { buildBootIso } from "@lab/bastion/iso-builder";
|
||||
|
||||
function prompt(question: string): Promise<string> {
|
||||
const rl = createInterface({ input: process.stdin, output: process.stdout });
|
||||
return new Promise((resolve) => {
|
||||
rl.question(question, (answer) => {
|
||||
rl.close();
|
||||
resolve(answer.trim());
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
const IPXE_PATHS: Record<string, { src: string; dest: string }> = {
|
||||
x86_64: { src: "/usr/share/ipxe/ipxe-snponly-x86_64.efi", dest: "EFI/BOOT/BOOTX64.EFI" },
|
||||
aarch64: { src: "/usr/share/ipxe/arm64-efi/snponly.efi", dest: "EFI/BOOT/BOOTAA64.EFI" },
|
||||
};
|
||||
|
||||
async function selectBastion(): Promise<{ hostname: string; serverIp: string; httpPort: number }> {
|
||||
const bastions = await getLabdClient().getBastions();
|
||||
const online = bastions.filter(b => b.status === "online");
|
||||
|
||||
if (online.length === 0) {
|
||||
console.error("No bastions online. Start a bastion first.");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
if (online.length === 1) {
|
||||
const b = online[0]!;
|
||||
console.log(`Using bastion: ${b.hostname} (${b.serverIp})`);
|
||||
return { hostname: b.hostname, serverIp: b.serverIp, httpPort: 8080 };
|
||||
}
|
||||
|
||||
console.log("Available bastions:\n");
|
||||
for (let i = 0; i < online.length; i++) {
|
||||
const b = online[i]!;
|
||||
console.log(` ${i + 1}) ${b.hostname} ${b.serverIp} (${b.network})`);
|
||||
}
|
||||
console.log("");
|
||||
|
||||
const answer = await prompt(`Select bastion [1-${online.length}]: `);
|
||||
const idx = parseInt(answer, 10) - 1;
|
||||
if (isNaN(idx) || idx < 0 || idx >= online.length) {
|
||||
console.error("Invalid selection.");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const selected = online[idx]!;
|
||||
return { hostname: selected.hostname, serverIp: selected.serverIp, httpPort: 8080 };
|
||||
}
|
||||
|
||||
export function registerMakeIsoCommand(parent: Command): void {
|
||||
parent
|
||||
.command("makeiso")
|
||||
.description("Generate a UEFI-bootable iPXE ISO for network provisioning")
|
||||
.addOption(
|
||||
new Option("--arch <arch...>", "Target architecture(s)")
|
||||
.choices(["x86_64", "aarch64"])
|
||||
.default(["x86_64", "aarch64"]),
|
||||
)
|
||||
.option("--local", "Build ISO locally instead of using bastion-hosted URL")
|
||||
.option("--out <path>", "Output path for local ISO build", "ipxe-bastion.iso")
|
||||
.action(async (opts: { arch: string[]; local?: boolean; out: string }) => {
|
||||
const bastion = await selectBastion();
|
||||
const bastionUrl = `http://${bastion.serverIp}:${bastion.httpPort}`;
|
||||
|
||||
if (opts.local) {
|
||||
console.log(`\nGenerating iPXE boot ISO...`);
|
||||
console.log(` Architectures: ${opts.arch.join(", ")}`);
|
||||
console.log(` Bastion: ${bastionUrl}`);
|
||||
|
||||
const efiFiles: Array<{ path: string; data: Buffer }> = [];
|
||||
for (const arch of opts.arch) {
|
||||
const paths = IPXE_PATHS[arch];
|
||||
if (!paths) {
|
||||
console.error(`Unknown architecture: ${arch}`);
|
||||
process.exit(1);
|
||||
}
|
||||
if (!existsSync(paths.src)) {
|
||||
console.error(`iPXE binary not found: ${paths.src}`);
|
||||
console.error(`Install: sudo dnf install ipxe-bootimgs-${arch === "aarch64" ? "aarch64" : "x86"}`);
|
||||
process.exit(1);
|
||||
}
|
||||
efiFiles.push({ path: paths.dest, data: readFileSync(paths.src) });
|
||||
console.log(` ${arch}: ${paths.dest.split("/").pop()}`);
|
||||
}
|
||||
|
||||
const script = [
|
||||
"#!ipxe",
|
||||
"",
|
||||
"echo Booting from iPXE ISO -- connecting to bastion...",
|
||||
"dhcp || ( echo DHCP failed, retrying... && sleep 3 && dhcp )",
|
||||
`chain ${bastionUrl}/boot.ipxe || shell`,
|
||||
].join("\n");
|
||||
|
||||
const iso = buildBootIso(efiFiles, script);
|
||||
writeFileSync(opts.out, iso);
|
||||
console.log(`\nISO written to: ${opts.out} (${(iso.length / 1024 / 1024).toFixed(1)}MB)`);
|
||||
} else {
|
||||
console.log(`\nThe bastion serves a boot ISO with the correct URL embedded.`);
|
||||
console.log(`Use this URL in JetKVM or any BMC virtual media:\n`);
|
||||
console.log(` ${bastionUrl}/boot.iso`);
|
||||
}
|
||||
|
||||
console.log(`\nMount as virtual CD, boot from it. iPXE will chainload from bastion.`);
|
||||
});
|
||||
}
|
||||
161
bastion/src/cli/src/commands/reprovision.ts
Normal file
161
bastion/src/cli/src/commands/reprovision.ts
Normal file
@@ -0,0 +1,161 @@
|
||||
// CLI command: provision reprovision
|
||||
// Queue a machine for reinstall and attempt SSH reboot into PXE via labd.
|
||||
|
||||
import { execFileSync } from "node:child_process";
|
||||
import { existsSync } from "node:fs";
|
||||
import { homedir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { Command, Option } from "commander";
|
||||
import type { BastionState } from "@lab/shared";
|
||||
import { isValidOsId, SUPPORTED_OS, SUPPORTED_ROLES, ROLE_REGISTRY } from "@lab/shared";
|
||||
import { getLabdClient } from "../api/config.js";
|
||||
|
||||
function roleTable(): string {
|
||||
const lines: string[] = ["", "Available roles:"];
|
||||
for (const r of ROLE_REGISTRY) {
|
||||
const parent = r.parent ? ` (extends ${r.parent})` : "";
|
||||
const apps = r.apps.length > 0 ? ` [auto: ${r.apps.join(", ")}]` : "";
|
||||
lines.push(` ${r.name.padEnd(16)} ${r.description}${parent}${apps}`);
|
||||
}
|
||||
return lines.join("\n");
|
||||
}
|
||||
|
||||
/** Resolve a target (hostname, MAC, or IP) to {mac, hostname, ip} from state. */
|
||||
function resolveTarget(
|
||||
target: string,
|
||||
state: BastionState,
|
||||
): { mac: string; hostname: string; ip: string } | null {
|
||||
const normalized = target.toLowerCase().replace(/-/g, ":");
|
||||
|
||||
if (state.installed[normalized]) {
|
||||
const info = state.installed[normalized];
|
||||
return { mac: normalized, hostname: info.hostname, ip: info.ip };
|
||||
}
|
||||
|
||||
if (state.discovered[normalized]) {
|
||||
return { mac: normalized, hostname: normalized, ip: "" };
|
||||
}
|
||||
|
||||
for (const [mac, info] of Object.entries(state.installed)) {
|
||||
if (info.hostname === target || info.hostname.startsWith(target + ".")) {
|
||||
return { mac, hostname: info.hostname, ip: info.ip };
|
||||
}
|
||||
}
|
||||
|
||||
for (const [mac, info] of Object.entries(state.installed)) {
|
||||
if (info.ip === target) {
|
||||
return { mac, hostname: info.hostname, ip: info.ip };
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export function registerReprovisionCommand(parent: Command): void {
|
||||
parent
|
||||
.command("reprovision <target> [hostname]")
|
||||
.description("Queue install + SSH reboot into PXE (target: hostname, MAC, or IP)")
|
||||
.showHelpAfterError(true)
|
||||
.addHelpText("after", roleTable())
|
||||
.addOption(new Option("--role <role>", "Machine role (see below)").choices([...SUPPORTED_ROLES]).default("worker"))
|
||||
.addOption(new Option("--os <os>", "Operating system").choices([...SUPPORTED_OS]).default("fedora-43"))
|
||||
.option("--disk <device>", "Target disk device (auto-detect if omitted)")
|
||||
.action(async (target: string, hostnameOverride: string | undefined, opts: {
|
||||
role: string;
|
||||
os: string;
|
||||
disk?: string;
|
||||
}) => {
|
||||
if (!isValidOsId(opts.os)) {
|
||||
console.error(`Unknown OS: ${opts.os}. Supported: ${SUPPORTED_OS.join(", ")}`);
|
||||
process.exit(1);
|
||||
}
|
||||
if (!(SUPPORTED_ROLES as readonly string[]).includes(opts.role)) {
|
||||
console.error(`Unknown role: ${opts.role}`);
|
||||
console.error(roleTable());
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const client = getLabdClient();
|
||||
|
||||
// Resolve target from labd aggregated state
|
||||
let state: BastionState;
|
||||
try {
|
||||
state = await client.getMachines();
|
||||
} catch (err) {
|
||||
console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const resolved = resolveTarget(target, state);
|
||||
if (!resolved) {
|
||||
console.error(`Cannot find machine: ${target}`);
|
||||
console.error("Provide a hostname, MAC, or IP of a known machine.");
|
||||
console.error("Run 'labctl provision list' to see available machines.");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const mac = resolved.mac;
|
||||
const hostname = hostnameOverride ?? resolved.hostname;
|
||||
const ip = resolved.ip;
|
||||
|
||||
console.log(`Reprovisioning ${hostname} (${mac})${ip ? ` at ${ip}` : ""}...`);
|
||||
console.log(` Role: ${opts.role} OS: ${opts.os}`);
|
||||
console.log("");
|
||||
|
||||
// Queue the install via labd
|
||||
try {
|
||||
const result = await client.installMachine({
|
||||
mac,
|
||||
hostname,
|
||||
role: opts.role,
|
||||
os: opts.os,
|
||||
...(opts.disk ? { disk: opts.disk } : {}),
|
||||
});
|
||||
console.log(JSON.stringify(result, null, 2));
|
||||
} catch (err) {
|
||||
console.error(`Failed to queue install: ${err instanceof Error ? err.message : String(err)}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Try SSH reboot into PXE
|
||||
if (ip === "") {
|
||||
console.log("\nNo IP known. Reboot the machine manually into PXE.");
|
||||
return;
|
||||
}
|
||||
|
||||
const adminUser = process.env["SUDO_USER"] ?? process.env["USER"] ?? "";
|
||||
const effectiveUser = adminUser === "root" ? "" : adminUser;
|
||||
|
||||
if (effectiveUser === "") {
|
||||
console.log("\nReboot the machine manually into PXE.");
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`\nAttempting SSH reboot into PXE (${effectiveUser}@${ip})...`);
|
||||
|
||||
const sudoUser = process.env["SUDO_USER"];
|
||||
const realHome = sudoUser !== undefined ? join("/home", sudoUser) : homedir();
|
||||
const keyPaths = [
|
||||
join(realHome, ".ssh", "id_ed25519"),
|
||||
join(realHome, ".ssh", "id_rsa"),
|
||||
join(realHome, ".ssh", "id_ecdsa"),
|
||||
];
|
||||
const sshKey = keyPaths.find(k => existsSync(k));
|
||||
|
||||
const sshArgs = [
|
||||
"-o", "StrictHostKeyChecking=no",
|
||||
"-o", "ConnectTimeout=10",
|
||||
...(sshKey !== undefined ? ["-i", sshKey] : []),
|
||||
`${effectiveUser}@${ip}`,
|
||||
'PXE_ENTRY=$(sudo efibootmgr | grep -iE "pxe|network|ipv4" | head -1 | grep -oP "Boot\\K[0-9A-F]+"); if [ -n "$PXE_ENTRY" ]; then sudo efibootmgr --bootnext "$PXE_ENTRY" && echo "PXE set as next boot" && sudo reboot; else echo "No PXE boot entry found, rebooting anyway..." && sudo reboot; fi',
|
||||
];
|
||||
|
||||
try {
|
||||
execFileSync("ssh", sshArgs, { stdio: "inherit" });
|
||||
} catch {
|
||||
// SSH connection closing during reboot is expected
|
||||
}
|
||||
console.log("");
|
||||
console.log("Machine is rebooting into PXE. Install will start automatically.");
|
||||
});
|
||||
}
|
||||
145
bastion/src/cli/src/commands/serve.ts
Normal file
145
bastion/src/cli/src/commands/serve.ts
Normal file
@@ -0,0 +1,145 @@
|
||||
// CLI command: init bastion standalone start
|
||||
// Start the bastion server (HTTP + dnsmasq), daemonized by default.
|
||||
|
||||
import { spawn, type ChildProcess } from "node:child_process";
|
||||
import { existsSync, readFileSync, openSync, mkdirSync } from "node:fs";
|
||||
import type { Command } from "commander";
|
||||
import { startBastion } from "@lab/bastion";
|
||||
|
||||
export function registerStartCommand(parent: Command): void {
|
||||
parent
|
||||
.command("start")
|
||||
.description("Start the bastion server (HTTP + dnsmasq PXE)")
|
||||
.option("--port <port>", "HTTP port", "8080")
|
||||
.option("--dir <dir>", "Bastion data directory", "/tmp/lab-bastion")
|
||||
.option("--domain <domain>", "Internal domain for hostnames", "ad.itaz.eu")
|
||||
.option("--dhcp-mode <mode>", "DHCP mode: proxy or full", "proxy")
|
||||
.option("--fedora <version>", "Fedora version", "43")
|
||||
.option("--arch <arch>", "Architecture", "x86_64")
|
||||
.option("--timezone <tz>", "Timezone", "Europe/London")
|
||||
.option("--locale <locale>", "Locale", "en_GB.UTF-8")
|
||||
.option("--skip-dnsmasq", "Skip starting dnsmasq (for testing)")
|
||||
.option("--skip-artifacts", "Skip downloading boot artifacts (for testing)")
|
||||
.option("--foreground", "Run in foreground (default: daemonize)")
|
||||
.action(async (opts: {
|
||||
port: string;
|
||||
dir: string;
|
||||
domain: string;
|
||||
dhcpMode: string;
|
||||
fedora: string;
|
||||
arch: string;
|
||||
timezone: string;
|
||||
locale: string;
|
||||
skipDnsmasq?: boolean;
|
||||
skipArtifacts?: boolean;
|
||||
foreground?: boolean;
|
||||
}) => {
|
||||
// Check root early (before daemonize) so the error is visible
|
||||
if (!opts.skipDnsmasq && process.getuid?.() !== 0) {
|
||||
console.error("Must run as root (dnsmasq needs DHCP/TFTP ports).");
|
||||
console.error("Usage: sudo labctl init bastion standalone start");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
if (opts.foreground === true) {
|
||||
// Run in foreground
|
||||
await startBastion({
|
||||
httpPort: parseInt(opts.port, 10),
|
||||
bastionDir: opts.dir,
|
||||
domain: opts.domain,
|
||||
dhcpMode: opts.dhcpMode as "proxy" | "full",
|
||||
fedoraVersion: opts.fedora,
|
||||
arch: opts.arch,
|
||||
timezone: opts.timezone,
|
||||
locale: opts.locale,
|
||||
skipDnsmasq: opts.skipDnsmasq,
|
||||
skipArtifacts: opts.skipArtifacts,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// Daemonize: re-run with --foreground, redirect output to log file
|
||||
mkdirSync(opts.dir, { recursive: true });
|
||||
const logFile = `${opts.dir}/bastion.log`;
|
||||
|
||||
// Build explicit argument list instead of re-using process.argv
|
||||
// (which breaks with bun-compiled binaries)
|
||||
const fgArgs = [
|
||||
"init", "bastion", "standalone", "start", "--foreground",
|
||||
"--port", opts.port,
|
||||
"--dir", opts.dir,
|
||||
"--domain", opts.domain,
|
||||
"--dhcp-mode", opts.dhcpMode,
|
||||
"--fedora", opts.fedora,
|
||||
"--arch", opts.arch,
|
||||
"--timezone", opts.timezone,
|
||||
"--locale", opts.locale,
|
||||
];
|
||||
if (opts.skipDnsmasq) fgArgs.push("--skip-dnsmasq");
|
||||
if (opts.skipArtifacts) fgArgs.push("--skip-artifacts");
|
||||
|
||||
// Determine how to re-invoke ourselves
|
||||
const execPath = process.argv[0] ?? "labctl";
|
||||
let spawnCmd: string;
|
||||
let spawnArgs: string[];
|
||||
|
||||
if (execPath.includes("node") || execPath.includes("tsx")) {
|
||||
const scriptPath = process.argv[1];
|
||||
spawnCmd = execPath;
|
||||
spawnArgs = scriptPath ? [scriptPath, ...fgArgs] : fgArgs;
|
||||
} else {
|
||||
spawnCmd = execPath;
|
||||
spawnArgs = fgArgs;
|
||||
}
|
||||
|
||||
// Open log file for the child's stdout/stderr so it survives parent exit
|
||||
const logFd = openSync(logFile, "a");
|
||||
|
||||
const child: ChildProcess = spawn(spawnCmd, spawnArgs, {
|
||||
detached: true,
|
||||
stdio: ["ignore", logFd, logFd],
|
||||
});
|
||||
|
||||
// Wait briefly for the child to start, then check it's alive
|
||||
await new Promise((resolve) => setTimeout(resolve, 3000));
|
||||
|
||||
// Check if child is still running
|
||||
try {
|
||||
process.kill(child.pid!, 0); // signal 0 = check existence
|
||||
} catch {
|
||||
// Child already died — show the log
|
||||
console.error("Bastion failed to start. Log output:");
|
||||
console.error("");
|
||||
try {
|
||||
const log = readFileSync(logFile, "utf-8");
|
||||
const lines = log.trim().split("\n").slice(-20);
|
||||
for (const line of lines) {
|
||||
console.error(" " + line);
|
||||
}
|
||||
} catch {
|
||||
console.error(" (no log output)");
|
||||
}
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
child.unref();
|
||||
|
||||
// Print startup info from the log
|
||||
try {
|
||||
const log = readFileSync(logFile, "utf-8");
|
||||
process.stdout.write(log);
|
||||
} catch {
|
||||
// No log yet
|
||||
}
|
||||
|
||||
const pidFile = `${opts.dir}/bastion.pid`;
|
||||
const pid = existsSync(pidFile)
|
||||
? readFileSync(pidFile, "utf-8").trim()
|
||||
: String(child.pid);
|
||||
|
||||
console.log("");
|
||||
console.log(`Bastion running in background (PID ${pid})`);
|
||||
console.log(`Log: ${logFile}`);
|
||||
process.exit(0);
|
||||
});
|
||||
}
|
||||
42
bastion/src/cli/src/commands/status.ts
Normal file
42
bastion/src/cli/src/commands/status.ts
Normal file
@@ -0,0 +1,42 @@
|
||||
// CLI command: init bastion standalone status
|
||||
// Show connected bastions and their machine counts via labd.
|
||||
|
||||
import type { Command } from "commander";
|
||||
import { getLabdClient } from "../api/config.js";
|
||||
|
||||
const BOLD = "\x1b[1m";
|
||||
const GREEN = "\x1b[32m";
|
||||
const RED = "\x1b[31m";
|
||||
const DIM = "\x1b[2m";
|
||||
const RESET = "\x1b[0m";
|
||||
|
||||
export function registerStatusCommand(parent: Command): void {
|
||||
parent
|
||||
.command("status")
|
||||
.description("Show bastion server status")
|
||||
.action(async () => {
|
||||
try {
|
||||
const bastions = await getLabdClient().getBastions();
|
||||
|
||||
if (bastions.length === 0) {
|
||||
console.log("No bastions registered.");
|
||||
return;
|
||||
}
|
||||
|
||||
const pad = (s: string, w: number) => s.padEnd(w);
|
||||
console.log(
|
||||
`${BOLD}${pad("HOSTNAME", 24)}${pad("NETWORK", 18)}${pad("IP", 18)}${pad("STATUS", 10)}${pad("MACHINES", 10)}${RESET}`,
|
||||
);
|
||||
|
||||
for (const b of bastions) {
|
||||
const statusColor = b.status === "online" ? GREEN : RED;
|
||||
console.log(
|
||||
`${pad(b.hostname, 24)}${DIM}${pad(b.network, 18)}${RESET}${pad(b.serverIp, 18)}${statusColor}${pad(b.status, 10)}${RESET}${pad(String(b.machineCount), 10)}`,
|
||||
);
|
||||
}
|
||||
} catch (err) {
|
||||
console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`);
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user