Compare commits
38 Commits
816736793d
...
feat/reche
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2eda926d4c | ||
|
|
70258a0cc3 | ||
|
|
e9944c5413 | ||
| 22e2946e95 | |||
|
|
9ddab24931 | ||
|
|
ae91f2895e | ||
|
|
06fc40a857 | ||
|
|
a68d6d617e | ||
|
|
c49a650888 | ||
|
|
87e09af941 | ||
|
|
6f13e284fd | ||
|
|
6c963a15bd | ||
| 8c737d163d | |||
|
|
17bae7ddbf | ||
|
|
bb8f37ef7d | ||
|
|
a8dc79bc5a | ||
|
|
ad76c74020 | ||
|
|
6807632d46 | ||
|
|
53265bb18c | ||
|
|
863c7f2b83 | ||
| 906f93f6f2 | |||
|
|
aea28b5a0f | ||
| f3f0ea48e7 | |||
|
|
49d747db98 | ||
| 8635da08a6 | |||
|
|
6a5f23c0f5 | ||
| 63cc033e3e | |||
|
|
d7a25066bd | ||
| a0f6161533 | |||
|
|
87c1a34232 | ||
| 84afe7d5e4 | |||
|
|
0a4916d3c9 | ||
|
|
a4a4840930 | ||
|
|
8da947a1c3 | ||
|
|
92c65b4672 | ||
|
|
3835fefba1 | ||
| d4e9101bb6 | |||
| 014e8a6e72 |
8
.gitignore
vendored
8
.gitignore
vendored
@@ -23,3 +23,11 @@ node_modules/
|
||||
|
||||
# OS specific
|
||||
.DS_Store
|
||||
|
||||
# Task files
|
||||
# tasks.json
|
||||
# tasks/
|
||||
|
||||
# Asahi build artifacts (large)
|
||||
bastion/.asahi-cache/
|
||||
bastion/asahi-repo/*.zip
|
||||
|
||||
19
CLAUDE.md
Normal file
19
CLAUDE.md
Normal file
@@ -0,0 +1,19 @@
|
||||
## Skill routing
|
||||
|
||||
When the user's request matches an available skill, ALWAYS invoke it using the Skill
|
||||
tool as your FIRST action. Do NOT answer directly, do NOT use other tools first.
|
||||
The skill has specialized workflows that produce better results than ad-hoc answers.
|
||||
|
||||
Key routing rules:
|
||||
- Product ideas, "is this worth building", brainstorming → invoke gstack-office-hours
|
||||
- Bugs, errors, "why is this broken", 500 errors → invoke gstack-investigate
|
||||
- Ship, deploy, push, create PR → invoke gstack-ship
|
||||
- QA, test the site, find bugs → invoke gstack-qa
|
||||
- Code review, check my diff → invoke gstack-review
|
||||
- Update docs after shipping → invoke gstack-document-release
|
||||
- Weekly retro → invoke gstack-retro
|
||||
- Design system, brand → invoke gstack-design-consultation
|
||||
- Visual audit, design polish → invoke gstack-design-review
|
||||
- Architecture review → invoke gstack-plan-eng-review
|
||||
- Save progress, checkpoint, resume → invoke gstack-checkpoint
|
||||
- Code quality, health check → invoke gstack-health
|
||||
47
TODOS.md
Normal file
47
TODOS.md
Normal file
@@ -0,0 +1,47 @@
|
||||
# TODOS
|
||||
|
||||
## P1 — Ship with Phase 1
|
||||
|
||||
### v2.0 Architecture Document Update
|
||||
Update `bastion/docs/ARCHITECTURE.md` to cover v2.0: driver model, fleet system,
|
||||
Pulumi integration, Vault secrets, Deno evaluator, new CLI grammar. The existing
|
||||
doc covers v1.0 comprehensively (432 lines). v2.0 adds 5+ major subsystems.
|
||||
**Effort:** M (human: 1 week / CC: 1-2 days)
|
||||
**Depends on:** Phase 1 complete
|
||||
**Source:** CEO review 2026-04-01
|
||||
|
||||
## P2 — Post-v2.0 Core
|
||||
|
||||
### SSH Emergency Mode (scoped)
|
||||
SSH-based operations limited to: (1) earliest necessary box provisioning before agent
|
||||
is installed, and (2) emergency debugging/fixing operations that can't be done via agent.
|
||||
NOT a general-purpose DeploymentTarget alternative. The v1.0 `recheck` and `fix-ssh-root.sh`
|
||||
patterns are the model. Agent stays the primary management path.
|
||||
**Effort:** S (human: 1 week / CC: 1 day)
|
||||
**Depends on:** Phase 2 complete (DeploymentTarget interface exists)
|
||||
**Source:** CEO review 2026-04-01
|
||||
|
||||
### Prometheus Metrics Endpoint
|
||||
Add `/metrics` endpoint to labd: resource counts by status, apply duration histograms,
|
||||
driver operation latency, fleet pipeline completion rates. Standard Prometheus scraping
|
||||
for Grafana dashboards and alerting.
|
||||
**Effort:** S (human: 2-3 days / CC: 2-3 hours)
|
||||
**Depends on:** Phase 1 (labd exists with resource store)
|
||||
**Source:** CEO review 2026-04-01 (observability gap)
|
||||
|
||||
## P3 — Future Enhancements
|
||||
|
||||
### Infrastructure Graph Visualization
|
||||
Visual representation of resource dependencies, environment topology, fleet status.
|
||||
Could be a web UI or terminal-based (like `kubectl tree`).
|
||||
**Source:** CEO review 2026-04-01
|
||||
|
||||
### `labctl import` for Existing Cloud Resources
|
||||
Discover and import existing AWS/GCP resources into the state store.
|
||||
Pulumi's import functionality could be leveraged.
|
||||
**Source:** CEO review 2026-04-01
|
||||
|
||||
### Built-in Secrets Rotation
|
||||
Automatic rotation of managed secrets (database passwords, API keys).
|
||||
Vault handles rotation but a labctl-native workflow could simplify.
|
||||
**Source:** CEO review 2026-04-01
|
||||
47
bastion/asahi-repo/installer_data.json
Normal file
47
bastion/asahi-repo/installer_data.json
Normal file
@@ -0,0 +1,47 @@
|
||||
{
|
||||
"os_list": [
|
||||
{
|
||||
"name": "Fedora Asahi Lab (infra)",
|
||||
"default_os_name": "Fedora Linux Lab",
|
||||
"boot_object": "m1n1.bin",
|
||||
"next_object": "m1n1/boot.bin",
|
||||
"package": "fedora-asahi-lab.zip",
|
||||
"supported_fw": [
|
||||
"12.3",
|
||||
"12.3.1",
|
||||
"13.5"
|
||||
],
|
||||
"partitions": [
|
||||
{
|
||||
"name": "EFI",
|
||||
"type": "EFI",
|
||||
"size": "524288000B",
|
||||
"format": "fat",
|
||||
"volume_id": "0x804be8a6",
|
||||
"copy_firmware": true,
|
||||
"copy_installer_data": true,
|
||||
"source": "esp"
|
||||
},
|
||||
{
|
||||
"name": "Boot",
|
||||
"type": "Linux",
|
||||
"size": "1073741824B",
|
||||
"image": "boot.img"
|
||||
},
|
||||
{
|
||||
"name": "Root",
|
||||
"type": "Linux",
|
||||
"size": "4626296832B",
|
||||
"expand": false,
|
||||
"image": "root.img"
|
||||
},
|
||||
{
|
||||
"name": "Data",
|
||||
"type": "Linux",
|
||||
"size": "1073741824B",
|
||||
"expand": true
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
4
bastion/bastion/.gitignore
vendored
Normal file
4
bastion/bastion/.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
|
||||
# Asahi build artifacts (large)
|
||||
.asahi-cache/
|
||||
asahi-repo/*.zip
|
||||
@@ -49,6 +49,9 @@ _labctl() {
|
||||
"app k3s list")
|
||||
COMPREPLY=($(compgen -W "--user -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"app k3s kubeconfig")
|
||||
COMPREPLY=($(compgen -W "--user --context --print -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"init bastion")
|
||||
COMPREPLY=($(compgen -W "standalone -h --help" -- "$cur"))
|
||||
return ;;
|
||||
@@ -62,17 +65,26 @@ _labctl() {
|
||||
COMPREPLY=($(compgen -W "--role --os --disk -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"provision debug")
|
||||
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
|
||||
COMPREPLY=($(compgen -W "--pxe-boot -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"provision forget")
|
||||
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
|
||||
return ;;
|
||||
"provision logs")
|
||||
"provision register")
|
||||
COMPREPLY=($(compgen -W "--role --ip -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"provision asahi")
|
||||
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
|
||||
return ;;
|
||||
"provision logs")
|
||||
COMPREPLY=($(compgen -W "-f --follow -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"provision makeiso")
|
||||
COMPREPLY=($(compgen -W "--arch --local --out -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"provision recheck")
|
||||
COMPREPLY=($(compgen -W "--user --target -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"config list")
|
||||
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
|
||||
return ;;
|
||||
@@ -89,7 +101,7 @@ _labctl() {
|
||||
COMPREPLY=($(compgen -W "deploy status -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"app k3s")
|
||||
COMPREPLY=($(compgen -W "install health list -h --help" -- "$cur"))
|
||||
COMPREPLY=($(compgen -W "install health list kubeconfig -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"version")
|
||||
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
|
||||
@@ -98,7 +110,7 @@ _labctl() {
|
||||
COMPREPLY=($(compgen -W "bastion -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"provision")
|
||||
COMPREPLY=($(compgen -W "list install reprovision debug forget logs makeiso -h --help" -- "$cur"))
|
||||
COMPREPLY=($(compgen -W "list install reprovision debug forget register asahi logs makeiso recheck -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"config")
|
||||
COMPREPLY=($(compgen -W "list get set path -h --help" -- "$cur"))
|
||||
|
||||
@@ -124,8 +124,11 @@ complete -c labctl -n "__labctl_using_cmd provision" -a install -d 'Queue a disc
|
||||
complete -c labctl -n "__labctl_using_cmd provision" -a reprovision -d 'Queue install + SSH reboot into PXE (target: hostname, MAC, or IP)'
|
||||
complete -c labctl -n "__labctl_using_cmd provision" -a debug -d 'PXE boot into Fedora rescue mode for debugging (target: hostname, MAC, or IP)'
|
||||
complete -c labctl -n "__labctl_using_cmd provision" -a forget -d 'Remove a machine from bastion state'
|
||||
complete -c labctl -n "__labctl_using_cmd provision" -a register -d 'Register an already-installed machine (e.g. after state loss)'
|
||||
complete -c labctl -n "__labctl_using_cmd provision" -a asahi -d 'Show instructions to provision an Apple Silicon Mac with Asahi Linux'
|
||||
complete -c labctl -n "__labctl_using_cmd provision" -a logs -d 'Show provisioning logs for a machine (hostname, MAC, or IP)'
|
||||
complete -c labctl -n "__labctl_using_cmd provision" -a makeiso -d 'Generate a UEFI-bootable iPXE ISO for network provisioning'
|
||||
complete -c labctl -n "__labctl_using_cmd provision" -a recheck -d 'Refresh hardware info for all installed machines via SSH'
|
||||
|
||||
# provision install options
|
||||
complete -c labctl -n "__labctl_in_cmd provision install" -l role -d 'Machine role (see below)' -xa 'vanilla worker infra labcontroller'
|
||||
@@ -137,11 +140,25 @@ complete -c labctl -n "__labctl_in_cmd provision reprovision" -l role -d 'Machin
|
||||
complete -c labctl -n "__labctl_in_cmd provision reprovision" -l os -d 'Operating system' -xa 'fedora-43 ubuntu-26.04'
|
||||
complete -c labctl -n "__labctl_in_cmd provision reprovision" -l disk -d 'Target disk device (auto-detect if omitted)' -x
|
||||
|
||||
# provision debug options
|
||||
complete -c labctl -n "__labctl_in_cmd provision debug" -l pxe-boot -d 'Boot installed system via PXE (kernel+initrd from network, root from NVMe)'
|
||||
|
||||
# provision register options
|
||||
complete -c labctl -n "__labctl_in_cmd provision register" -l role -d 'Machine role' -xa 'vanilla worker infra labcontroller'
|
||||
complete -c labctl -n "__labctl_in_cmd provision register" -l ip -d 'Machine IP address' -x
|
||||
|
||||
# provision logs options
|
||||
complete -c labctl -n "__labctl_in_cmd provision logs" -s f -l follow -d 'Follow log output in real-time'
|
||||
|
||||
# provision makeiso options
|
||||
complete -c labctl -n "__labctl_in_cmd provision makeiso" -l arch -d 'Target architecture(s)' -xa 'x86_64 aarch64'
|
||||
complete -c labctl -n "__labctl_in_cmd provision makeiso" -l local -d 'Build ISO locally instead of using bastion-hosted URL'
|
||||
complete -c labctl -n "__labctl_in_cmd provision makeiso" -l out -d 'Output path for local ISO build' -x
|
||||
|
||||
# provision recheck options
|
||||
complete -c labctl -n "__labctl_in_cmd provision recheck" -l user -d 'SSH user' -x
|
||||
complete -c labctl -n "__labctl_in_cmd provision recheck" -l target -d 'Only recheck a specific machine (by hostname or MAC)' -x
|
||||
|
||||
# config subcommands
|
||||
complete -c labctl -n "__labctl_using_cmd config" -a list -d 'Show all configuration values'
|
||||
complete -c labctl -n "__labctl_using_cmd config" -a get -d 'Get a configuration value'
|
||||
@@ -173,6 +190,7 @@ complete -c labctl -n "__labctl_in_cmd app labcontroller status" -l user -d 'SSH
|
||||
complete -c labctl -n "__labctl_using_cmd app k3s" -a install -d 'Install k3s on a target machine (hostname, IP, or MAC)'
|
||||
complete -c labctl -n "__labctl_using_cmd app k3s" -a health -d 'Check k3s health (all hosts if no target given)'
|
||||
complete -c labctl -n "__labctl_using_cmd app k3s" -a list -d 'List installed machines and their k3s status'
|
||||
complete -c labctl -n "__labctl_using_cmd app k3s" -a kubeconfig -d 'Fetch kubeconfig from a target and merge into ~/.kube/config'
|
||||
|
||||
# app k3s install options
|
||||
complete -c labctl -n "__labctl_in_cmd app k3s install" -l role -d 'k3s role: infra (server) or worker (agent)' -x
|
||||
@@ -186,3 +204,8 @@ complete -c labctl -n "__labctl_in_cmd app k3s health" -l user -d 'SSH user' -x
|
||||
# app k3s list options
|
||||
complete -c labctl -n "__labctl_in_cmd app k3s list" -l user -d 'SSH user' -x
|
||||
|
||||
# app k3s kubeconfig options
|
||||
complete -c labctl -n "__labctl_in_cmd app k3s kubeconfig" -l user -d 'SSH user' -x
|
||||
complete -c labctl -n "__labctl_in_cmd app k3s kubeconfig" -l context -d 'Context name (defaults to hostname)' -x
|
||||
complete -c labctl -n "__labctl_in_cmd app k3s kubeconfig" -l print -d 'Print kubeconfig to stdout instead of merging'
|
||||
|
||||
|
||||
431
bastion/docs/ARCHITECTURE.md
Normal file
431
bastion/docs/ARCHITECTURE.md
Normal file
@@ -0,0 +1,431 @@
|
||||
# Lab Platform Architecture
|
||||
|
||||
## Overview
|
||||
|
||||
A bare-metal and hybrid cloud infrastructure platform for automated machine provisioning, Kubernetes cluster management, and fleet operations. The platform discovers hardware via PXE boot, installs operating systems unattended, deploys k3s clusters, and provides centralized management through a CLI and API.
|
||||
|
||||
**Components:**
|
||||
- **bastion** -- PXE boot server (DHCP/TFTP/HTTP) for machine discovery and OS installation
|
||||
- **labd** -- Master daemon for multi-bastion aggregation, persistent state, agent management
|
||||
- **labctl** -- CLI tool for operators (kubectl-style interface)
|
||||
- **lab-agent** -- Daemon on provisioned servers for remote execution and monitoring
|
||||
- **modules** -- Declarative configuration system (k3s, labcontroller)
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
labctl (CLI)
|
||||
|
|
||||
labd (master daemon)
|
||||
/ | \
|
||||
bastion1 bastion2 ... (PXE provisioning)
|
||||
/ \ |
|
||||
[machines] [machines] (bare metal)
|
||||
| |
|
||||
lab-agent lab-agent (remote exec)
|
||||
```
|
||||
|
||||
### Communication Patterns
|
||||
|
||||
| Path | Protocol | Auth |
|
||||
|------|----------|------|
|
||||
| labctl -> labd | HTTP/HTTPS | mTLS cert (future: token) |
|
||||
| bastion -> labd | WebSocket | Join token enrollment |
|
||||
| lab-agent -> labd | WebSocket | mTLS certificate |
|
||||
| machine -> bastion | HTTP | None (local network) |
|
||||
| Anaconda -> bastion | HTTP + UDP syslog | None (install-time) |
|
||||
| labctl -> bastion | HTTP | None (standalone mode) |
|
||||
|
||||
### Standalone vs Centralized
|
||||
|
||||
The bastion can operate in two modes:
|
||||
|
||||
1. **Standalone** -- single bastion, state in local JSON file, CLI talks directly to bastion HTTP API
|
||||
2. **Centralized** -- bastion registers with labd via WebSocket, state aggregated in CockroachDB, CLI talks to labd which routes commands to the correct bastion
|
||||
|
||||
---
|
||||
|
||||
## Machine Lifecycle
|
||||
|
||||
```
|
||||
PXE boot
|
||||
|
|
||||
+--------v--------+
|
||||
| DISCOVERED | Hardware inventory collected
|
||||
+---------+-------+
|
||||
|
|
||||
labctl provision install
|
||||
|
|
||||
+---------v-------+
|
||||
| INSTALL_QUEUE | Waiting for next PXE boot
|
||||
+---------+-------+
|
||||
|
|
||||
PXE boot (Anaconda)
|
||||
|
|
||||
+---------v-------+
|
||||
| INSTALLING | Progress: partitioning -> packages -> post-install
|
||||
+---------+-------+
|
||||
|
|
||||
+---------v-------+
|
||||
| INSTALLED | OS ready, SSH accessible
|
||||
+---------+-------+
|
||||
|
|
||||
labctl app k3s install
|
||||
|
|
||||
+---------v-------+
|
||||
| K3S RUNNING | Kubernetes node operational
|
||||
+--------+--------+
|
||||
|
|
||||
labctl provision reprovision
|
||||
|
|
||||
(back to INSTALL_QUEUE)
|
||||
```
|
||||
|
||||
Side paths:
|
||||
- **DEBUG** -- `labctl provision debug` boots Anaconda rescue mode for diagnostics
|
||||
- **FORGET** -- `labctl provision forget` removes machine from all state
|
||||
|
||||
---
|
||||
|
||||
## Packages
|
||||
|
||||
### Monorepo Structure
|
||||
|
||||
TypeScript ESM monorepo with pnpm workspaces. Six packages:
|
||||
|
||||
| Package | Role | Key Tech |
|
||||
|---------|------|----------|
|
||||
| `@lab/shared` | Types, protocol, constants | - |
|
||||
| `@lab/bastion` | PXE server | Fastify, dnsmasq |
|
||||
| `@lab/cli` | CLI binary | Commander.js |
|
||||
| `@lab/labd` | Master daemon | Fastify, Prisma, CockroachDB |
|
||||
| `@lab/agent` | Server agent | WebSocket |
|
||||
| `@lab/modules` | Config modules | SSH, k8s-client |
|
||||
|
||||
### @lab/shared
|
||||
|
||||
Core type system shared by all packages.
|
||||
|
||||
**State Model:**
|
||||
```typescript
|
||||
interface BastionState {
|
||||
discovered: Record<MAC, HardwareInfo>
|
||||
install_queue: Record<MAC, InstallConfig>
|
||||
installed: Record<MAC, InstalledInfo>
|
||||
debug: Record<MAC, DebugConfig>
|
||||
}
|
||||
```
|
||||
|
||||
**Roles:**
|
||||
- `vanilla` -- OS only, no k3s, no cluster services
|
||||
- `worker` -- k3s agent + Longhorn storage (joins existing cluster)
|
||||
- `infra` -- k3s server + etcd (control plane node)
|
||||
- `labcontroller` -- infra + bastion + labd + CockroachDB (self-sufficient)
|
||||
|
||||
**OS Support:**
|
||||
- `fedora-43` -- Anaconda kickstart installer
|
||||
- `ubuntu-26.04` -- cloud-init autoinstall
|
||||
|
||||
**Protocol:** Discriminated union message types for WebSocket communication between agents, bastions, and labd. Type guards and parsers for runtime validation.
|
||||
|
||||
### @lab/bastion
|
||||
|
||||
PXE boot server that handles the physical provisioning lifecycle.
|
||||
|
||||
**Services:**
|
||||
- `StateManager` -- JSON file persistence with immutable update pattern
|
||||
- `SyslogListener` -- UDP syslog receiver (port 5514) for Anaconda install logs
|
||||
- `InstallLogBuffer` -- In-memory ring buffer + disk persistence per machine
|
||||
- `BastionConnection` -- WebSocket client to labd for centralized mode
|
||||
- dnsmasq management (spawn, config generation, proxy/full DHCP)
|
||||
- Network auto-detection (interface, IP, subnet, gateway)
|
||||
- ISO builder (xorriso + mtools for non-PXE machines)
|
||||
|
||||
**HTTP Routes:**
|
||||
|
||||
| Endpoint | Purpose |
|
||||
|----------|---------|
|
||||
| `GET /dispatch?mac=` | Dynamic iPXE script (discover/install/debug/local-boot) |
|
||||
| `GET /ks?mac=` | Per-machine Anaconda kickstart |
|
||||
| `GET /debug.ks` | Rescue mode kickstart |
|
||||
| `GET /debug-setup.sh` | nc listener setup script for rescue shell |
|
||||
| `GET /discover.ks` | Hardware discovery kickstart |
|
||||
| `POST /api/discover` | Hardware inventory report |
|
||||
| `POST /api/install` | Queue machine for install |
|
||||
| `POST /api/progress` | Install progress callback |
|
||||
| `POST /api/log` | Raw log line ingestion |
|
||||
| `POST /api/debug` | Queue debug/rescue mode |
|
||||
| `GET /api/machines` | List all machines |
|
||||
| `GET /api/logs/:mac` | Install logs + progress |
|
||||
| `GET /api/logs/:mac/follow` | SSE stream of progress events |
|
||||
| `DELETE /api/machines/:mac` | Forget machine |
|
||||
|
||||
**Templates:**
|
||||
- `boot.ipxe.ts` -- iPXE scripts for each boot mode (discover, install, debug, pxe-boot-debug, local-boot)
|
||||
- `install.ks.ts` -- Full Fedora kickstart with LVM, SSH, k3s prereqs, progress callbacks, SysRq keys
|
||||
- `debug.ks.ts` -- Minimal rescue kickstart (SSH via inst.sshd)
|
||||
- `ubuntu-autoinstall.ts` -- cloud-init for Ubuntu
|
||||
- `dnsmasq.conf.ts` -- DHCP/TFTP configuration
|
||||
|
||||
**Boot Dispatch Logic:**
|
||||
```
|
||||
1. debug[mac]? -> renderDebugIpxe (auto-clear after serving)
|
||||
2. install_queue[mac]? -> renderInstallIpxe
|
||||
3. installed[mac]? -> renderLocalBootIpxe (exit to disk)
|
||||
4. unknown -> renderDiscoverIpxe
|
||||
```
|
||||
|
||||
### @lab/labd
|
||||
|
||||
Central management daemon. Aggregates multiple bastions, stores persistent state in CockroachDB, relays commands, manages agent fleet.
|
||||
|
||||
**Database (Prisma + CockroachDB):**
|
||||
- `Server` -- hostname, MAC, IP, role, status, cloud, environment, labels
|
||||
- `Bastion` -- hostname, network, serverIp, lastHeartbeat
|
||||
- `Agent` -- certificate, enrollment, heartbeat
|
||||
- `Cluster` -- name, cloud, environment, kubeconfig (encrypted)
|
||||
- `User` / `Role` / `Permission` -- RBAC (action:cloud:env:server matrix)
|
||||
- `JoinToken` -- one-time/reusable enrollment tokens
|
||||
- `AuditLog` -- action, resource, result, timestamp
|
||||
|
||||
**Key Services:**
|
||||
- `BastionRegistry` -- in-memory registry of connected bastions, state aggregation, MAC-to-bastion routing
|
||||
- `AgentRegistry` -- connected agents, heartbeat tracking
|
||||
- `MessageRouter` -- command relay between CLI/agents and bastions
|
||||
|
||||
**Command Routing:**
|
||||
```
|
||||
CLI: labctl provision install <mac> <hostname>
|
||||
-> POST /api/machines/install
|
||||
-> labd finds bastion that knows this MAC
|
||||
-> WebSocket: {type: "command-install", mac, hostname, disk, role}
|
||||
-> bastion updates install_queue
|
||||
-> WebSocket: {type: "command-response", status: "ok"}
|
||||
-> HTTP response to CLI
|
||||
```
|
||||
|
||||
### @lab/cli (labctl)
|
||||
|
||||
Operator CLI. Commander.js binary, distributed as RPM/DEB or standalone bun-compiled executable.
|
||||
|
||||
**Command Groups:**
|
||||
|
||||
```
|
||||
labctl init bastion standalone start|stop|status
|
||||
labctl provision list|install|reprovision|forget|debug|logs|makeiso
|
||||
labctl app k3s install|health|list
|
||||
labctl config list|get|set|path
|
||||
labctl login
|
||||
labctl doctor
|
||||
labctl roles
|
||||
```
|
||||
|
||||
**Key Features:**
|
||||
- Target resolution: hostname, MAC, or IP -> machine lookup
|
||||
- SSH reboot into PXE for reprovision/debug (efibootmgr --bootnext)
|
||||
- Follow mode: `labctl provision logs <target> -f` (5s polling)
|
||||
- Shell completions: bash, fish
|
||||
|
||||
### @lab/modules
|
||||
|
||||
Declarative configuration modules with three-phase lifecycle: install -> configure -> health.
|
||||
|
||||
**k3s Module:**
|
||||
- 5 operation groups: host-prep, networking, k3s-server, k3s-agent, hardening
|
||||
- 15+ individual operations: kernel modules, sysctl, firewall, Cilium CNI, SELinux, audit policy, pod security, cert checks
|
||||
- Health checks: service running, node ready, API health, pod status, Cilium status, secrets encryption
|
||||
- SSH execution backend with progress callbacks
|
||||
|
||||
### @lab/agent
|
||||
|
||||
Daemon on provisioned servers. WebSocket to labd for:
|
||||
- Heartbeat (hostname, uptime, CPU/mem usage)
|
||||
- Command execution (with stdout/stderr streaming)
|
||||
- Log streaming (journalctl relay)
|
||||
- mTLS certificate enrollment and rotation
|
||||
|
||||
---
|
||||
|
||||
## Disk Layout
|
||||
|
||||
### LVM Partitioning (labvg)
|
||||
|
||||
All roles share a common LVM layout. The kickstart `%pre` auto-detects the install disk (NVMe preferred, then SATA, skipping USB/removable).
|
||||
|
||||
| Volume | Size | FS | Reprovision |
|
||||
|--------|------|-----|-------------|
|
||||
| `/boot/efi` | 600 MB | vfat | Reused |
|
||||
| `/boot` | 3 GB | ext4 | Reused |
|
||||
| `swap` | 27 GB | swap | Recreated |
|
||||
| `/` (root) | 33 GB | xfs | Recreated |
|
||||
| `/var` | 100 GB | xfs | Recreated |
|
||||
| `/var/log` | 10 GB | xfs | Recreated |
|
||||
| `/home` | 10 GB | xfs | **Preserved** |
|
||||
| `/srv` | 20 GB | xfs | **Preserved** |
|
||||
| `/var/lib/longhorn` | remaining | xfs | **Preserved** (worker) |
|
||||
| `/var/lib/rancher` | 20 GB | xfs | **Preserved** (infra) |
|
||||
| `/tmp` | 4 GB | tmpfs | - |
|
||||
|
||||
Reprovision detection: if `labvg` VG exists, reuse EFI/boot partitions and preserve data volumes.
|
||||
|
||||
---
|
||||
|
||||
## Kickstart Features
|
||||
|
||||
The Fedora kickstart template (`install.ks.ts`) includes:
|
||||
|
||||
- **Dynamic disk detection** -- `%pre` probes NVMe/SATA/virtio, skips USB/removable, supports both fresh install and reprovision
|
||||
- **Progress callbacks** -- `curl -sf POST /api/progress` at each stage (partitioning, post-install substeps, complete)
|
||||
- **Anaconda syslog forwarding** -- `logging --host --port` streams real-time install logs to bastion
|
||||
- **SSH hardening** -- key-only auth, root login via pubkey only, admin user with passwordless sudo
|
||||
- **Network-first boot order** -- `efibootmgr` reorders boot entries so PXE is always first (bastion controls every reboot)
|
||||
- **SysRq magic keys** -- `kernel.sysrq=1` for emergency reboot via KVM keyboard
|
||||
- **Role-specific setup:**
|
||||
- `vanilla`: chronyd only
|
||||
- `worker`/`infra`: kernel modules (br_netfilter, overlay), sysctl (ip_forward, inotify), firewalld disabled, k3s binary installed
|
||||
- `infra`: k3s server binary pre-installed
|
||||
|
||||
**What is NOT in the kickstart:**
|
||||
- `console=ttyS0` -- causes 30s-per-step boot timeout on hardware without physical serial UART (discovered 2026-03-30, see docs/pxe-boot-debugging-2026-03-30.md)
|
||||
- Background log streamer (`tail -f`) -- prevents Anaconda from syncing filesystem, causes %post writes to not persist
|
||||
|
||||
---
|
||||
|
||||
## Deployment
|
||||
|
||||
### Container Images
|
||||
|
||||
**bastion** (`Dockerfile.bastion`):
|
||||
- Base: Fedora 43 (needs dnsmasq, iPXE)
|
||||
- Multi-stage: Alpine build -> Fedora runtime
|
||||
- iPXE rebuilt from source (SNP driver for EFI)
|
||||
- hostNetwork in k8s (DHCP needs raw sockets)
|
||||
- Capabilities: NET_ADMIN, NET_RAW
|
||||
|
||||
**labd** (`Dockerfile.labd`):
|
||||
- Base: Alpine (minimal)
|
||||
- Multi-stage build with Prisma client generation
|
||||
- Runs as non-root `node` user
|
||||
|
||||
### Kubernetes (k3s)
|
||||
|
||||
```
|
||||
Namespace: lab-infra
|
||||
Deployment: bastion (hostNetwork, PVC for /data, host SSH keys)
|
||||
ConfigMap: bastion-config (env vars)
|
||||
Secret: bastion-join-token
|
||||
PVC: bastion-state (local-path)
|
||||
|
||||
Namespace: lab-system
|
||||
Deployment: labd
|
||||
Service: labd (NodePort 30100)
|
||||
StatefulSet: cockroachdb-0
|
||||
```
|
||||
|
||||
### CLI Distribution
|
||||
|
||||
Built with `nfpm` as RPM/DEB. Includes:
|
||||
- `/usr/bin/labctl` (bun-compiled standalone binary)
|
||||
- `/usr/share/bash-completion/completions/labctl`
|
||||
- `/usr/share/fish/vendor_completions.d/labctl.fish`
|
||||
|
||||
Config: `~/.labctl/config.yaml` with `labdUrl`, output format, default cloud/environment.
|
||||
|
||||
---
|
||||
|
||||
## Build & Release
|
||||
|
||||
```bash
|
||||
# Development
|
||||
pnpm install && pnpm build # Compile all packages
|
||||
pnpm test:run # Unit tests (vitest)
|
||||
npx tsc --noEmit # Type check
|
||||
|
||||
# Deploy
|
||||
bash scripts/deploy.sh all # Build containers + RPM, push, restart pods
|
||||
bash scripts/deploy.sh bastion # Just bastion
|
||||
bash scripts/deploy.sh labd # Just labd
|
||||
bash scripts/deploy.sh labctl # Just CLI (local RPM install)
|
||||
|
||||
# Container builds
|
||||
bash scripts/build-bastion.sh --platforms linux/amd64 --push latest
|
||||
bash scripts/build-labd.sh --platforms linux/amd64 --push latest
|
||||
bash scripts/build-rpm.sh # RPM + DEB packages
|
||||
|
||||
# Integration tests (require libvirt, sudo)
|
||||
sudo tests/integration/run-pxe-test.sh
|
||||
```
|
||||
|
||||
Registry: `mysources.co.uk` (Gitea at 10.0.0.194:3012)
|
||||
|
||||
---
|
||||
|
||||
## Testing
|
||||
|
||||
### Unit Tests
|
||||
- Kickstart rendering (ksvalidator syntax check, partition layout, role-specific sections)
|
||||
- State management (load, save, update, debug field)
|
||||
- Dispatch routing (correct iPXE script for each machine state)
|
||||
- Syslog listener (UDP receive, IP->MAC resolution, RFC 3164 parsing)
|
||||
|
||||
### Integration Tests (libvirt VMs)
|
||||
- **pxe-provision.test.ts** -- Full end-to-end: create VM -> PXE discovery -> queue install -> Anaconda install -> SSH verification -> systemd health -> SELinux enforcing -> boot order check
|
||||
- **iso-provision.test.ts** -- ISO boot for non-PXE machines
|
||||
- **k3s-single-node.test.ts** -- Post-provision k3s installation and health
|
||||
- VM screenshot capture during boot for debugging
|
||||
|
||||
---
|
||||
|
||||
## Security
|
||||
|
||||
- **mTLS** for agent-labd communication (certificate enrollment via join tokens)
|
||||
- **SSH key-only auth** on provisioned machines (no password auth)
|
||||
- **SELinux enforcing** verified in integration tests
|
||||
- **RBAC** (planned): action:cloud:environment:server permission matrix
|
||||
- **Audit logging** (planned): every mutation tracked in CockroachDB
|
||||
- **Network-first boot order** prevents machines from booting without bastion approval
|
||||
- **SysRq keys** enabled for emergency reboot without SSH access
|
||||
|
||||
---
|
||||
|
||||
## Known Issues & Lessons Learned
|
||||
|
||||
### Serial Console Boot Delay (2026-03-30)
|
||||
`console=ttyS0,115200n8` in kernel cmdline causes 30-second timeout at every systemd boot phase on hardware without a physical serial UART. Root cause: systemd blocks writing to non-existent UART. Fix: removed from kickstart entirely.
|
||||
|
||||
### Anaconda %post Log Streamer
|
||||
Background `tail -f` in kickstart `%post` prevents Anaconda from syncing the filesystem. All file writes in %post appear to succeed but are lost on reboot. Fix: removed background log streamer, replaced with Anaconda's built-in `logging --host --port` syslog forwarding.
|
||||
|
||||
### Disk Auto-Detection
|
||||
Hardcoded `/dev/sda` default broke NVMe-only machines. Fix: default to empty string (auto-detect) which triggers the `%pre` disk probe logic.
|
||||
|
||||
### Anaconda Rescue Mode Limitations
|
||||
`%pre` and `%post` sections do not execute in `inst.rescue` mode. SSH in rescue mode is provided by Anaconda's `inst.sshd` kernel parameter + `sshpw` kickstart directive. Manual setup via `curl bastion:8080/debug-setup.sh | bash` for nc listener.
|
||||
|
||||
---
|
||||
|
||||
## Planned Work (Taskmaster)
|
||||
|
||||
13 tasks in queue, all pending:
|
||||
|
||||
1. **#72** Expand Prisma schema with resource relationships (Network, ServerNic, ServerDisk, ClusterMember)
|
||||
2. **#73** State persistence service (bastion state -> CockroachDB)
|
||||
3. **#74** State loading from labd on bastion startup
|
||||
4. **#75** Fix bastion --dir env var default
|
||||
5. **#76** Resource type registry with aliases (kubectl-style)
|
||||
6. **#77** `labctl get <resource>` command
|
||||
7. **#78** `labctl describe <resource>` command
|
||||
8. **#79** `labctl create/delete` commands
|
||||
9. **#80** Refactor provision commands to kubectl-style
|
||||
10. **#81** Server and resource API endpoints in labd
|
||||
11. **#82** RBAC permission checks in CLI
|
||||
12. **#83** Audit logging for resource operations
|
||||
13. **#84** Update CLI entry point and help text
|
||||
|
||||
Additional items not in taskmaster:
|
||||
- Ubuntu autoinstall disk auto-detect (still defaults to /dev/sda)
|
||||
- Verify `inst.sshd` works end-to-end in rescue mode
|
||||
- k3s cluster join vs new cluster distinction in `labctl app k3s install`
|
||||
- arm64 container build (iPXE cross-compilation broken)
|
||||
91
bastion/docs/pxe-boot-debugging-2026-03-30.md
Normal file
91
bastion/docs/pxe-boot-debugging-2026-03-30.md
Normal file
@@ -0,0 +1,91 @@
|
||||
# PXE Boot Debugging Session — 2026-03-30
|
||||
|
||||
## Problem
|
||||
Beelink SER Mini Pro (AMD Ryzen 7 255, Radeon 780M, 64GB DDR5, 1TB NVMe) boots Fedora 43 100x slower than normal after PXE kickstart install. Every systemd boot phase takes ~30 seconds. The Anaconda installer/rescue mode boots fast on the same hardware.
|
||||
|
||||
## Root Cause
|
||||
**`console=ttyS0,115200n8` in kernel cmdline** — added via kickstart `bootloader --append` during install.
|
||||
|
||||
This mini PC has **no physical serial UART**. When systemd writes to ttyS0, each log write blocks for ~30 seconds waiting for the non-existent UART hardware. Since systemd logs at every phase transition, the total boot time was 10+ minutes.
|
||||
|
||||
The Anaconda installer was unaffected because it uses a different init flow that doesn't go through the same systemd phase transitions.
|
||||
|
||||
## How We Found It
|
||||
Hours of systematic elimination:
|
||||
|
||||
| What we tried | Result | Ruled out |
|
||||
|---|---|---|
|
||||
| `modprobe.blacklist=amdgpu` | No change | GPU driver |
|
||||
| `amd_iommu=off` | No change | IOMMU |
|
||||
| Rebuild initramfs without plymouth/drm/fips | No change | Initramfs bloat |
|
||||
| systemd-boot instead of GRUB | Still slow | Bootloader |
|
||||
| PXE-boot kernel+initrd (skip local GRUB entirely) | Still slow | Local bootloader/firmware |
|
||||
| Disable TPM in BIOS | No change | TPM |
|
||||
| Remove `resume=` + resume dracut module | No change | Hibernate resume |
|
||||
| Manual LVM activation in rescue shell | **Fast** | NVMe/LVM themselves |
|
||||
| Remove `console=ttyS0,115200n8` from GRUB | **FAST BOOT** | **This was it** |
|
||||
|
||||
The key breakthrough was noticing the timestamps showed **exactly 30-second gaps** between boot phases — a timeout pattern, not general slowness. Then realising the serial console was added during install and had never been tested without.
|
||||
|
||||
## What Was Fixed (PR #4, merged)
|
||||
|
||||
### 1. Removed serial console from kickstart
|
||||
- Removed `console=ttyS0,115200n8` from `bootloader --append`
|
||||
- Removed `serial-getty@ttyS0.service` enablement
|
||||
- Removed rsyslog serial forwarding
|
||||
|
||||
### 2. Enabled Anaconda syslog forwarding
|
||||
- Uncommented `logging --host --port` directive in kickstart
|
||||
- Bastion's SyslogListener was already built — just needed IP→MAC resolution improvement
|
||||
- Added `registerIp()` calls from kickstart fetch and progress callbacks
|
||||
- Added syslog listener unit tests
|
||||
|
||||
### 3. Fixed disk auto-detection
|
||||
- Default disk changed from `/dev/sda` to `""` (auto-detect) in labd route and bastion command handler
|
||||
- The kickstart `%pre` auto-detect logic probes nvme0n1, sda, sdb, vda in order
|
||||
- Without this fix, NVMe-only machines (like the SER Mini Pro) fail immediately
|
||||
|
||||
### 4. SysRq magic keys
|
||||
- Added `kernel.sysrq=1` sysctl to kickstart `%post`
|
||||
- Enables Alt+SysRq+REISUB via JetKVM for emergency reboot of stuck machines
|
||||
|
||||
### 5. Simplified debug command
|
||||
- Removed `--sshd` flag (SSH always available via `inst.sshd` + `sshpw` in rescue mode)
|
||||
- Added `/debug-setup.sh` HTTP endpoint for nc listener setup from rescue shell
|
||||
- Cleaned up `sshd` field from DebugConfig, protocol types, all routes
|
||||
|
||||
### 6. Added `labctl provision logs -f`
|
||||
- Follow mode with 5-second polling for real-time install monitoring
|
||||
|
||||
## What Works
|
||||
|
||||
- **PXE discovery → install → boot** — full flow works end-to-end
|
||||
- **Anaconda syslog forwarding** — install logs stream to bastion
|
||||
- **Progress callbacks** — stage-by-stage install tracking via curl
|
||||
- **Auto disk detection** — works for NVMe and SATA
|
||||
- **Debug rescue mode** — `labctl provision debug <target>` boots Anaconda rescue with SSH
|
||||
- **Network-first boot order** — bastion controls every reboot via efibootmgr
|
||||
- **SysRq keys** — emergency reboot via JetKVM keyboard
|
||||
|
||||
## What Doesn't Work / Known Issues
|
||||
|
||||
- **`--sshd` in rescue mode** — Anaconda rescue mode skips both `%pre` and `%post` kickstart sections. `inst.sshd` + `sshpw` should provide SSH access, but hasn't been verified end-to-end yet. The `/debug-setup.sh` curl workaround exists for nc.
|
||||
- **arm64 container build** — iPXE cross-compilation fails on arm64 (GCC flag incompatibility). Workaround: build with `--platforms linux/amd64` only.
|
||||
- **Integration test SSH timeout** — VM boots fine but SSH times out due to libvirt nftables reject rules after VM restart. Test infrastructure issue, not a code bug.
|
||||
|
||||
## What Was Skipped / Left To Do
|
||||
|
||||
1. **Syslog UDP port in k3s** — works because bastion uses `hostNetwork: true`, but should be documented properly
|
||||
2. **Background log streamer** — the old `tail -f` approach broke Anaconda filesystem sync. Replaced with syslog forwarding. If more granular %post logging is needed, a synchronous log push at end of %post would be safe.
|
||||
3. **Per-machine hardware overrides** — turned out not to be needed (serial console was the only "special" setting, and removing it is universal)
|
||||
4. **Ubuntu autoinstall disk default** — `ubuntu-autoinstall.ts` still has `disk || "/dev/sda"` fallback (line 38), should be changed to auto-detect
|
||||
5. **Verify `inst.sshd` works in rescue mode** — test SSH with password "debug" next time debug mode is used
|
||||
6. **Re-enable TPM in BIOS** — was disabled during debugging, should be factory-reset (user plans to reset BIOS to factory)
|
||||
|
||||
## Key Learnings
|
||||
|
||||
1. **`console=ttyS0` on hardware without UART = 30s timeout per boot phase.** Never add serial console to kernel cmdline unless the hardware has a verified physical UART.
|
||||
2. **Exactly-N-second gaps in boot logs = timeout, not slowness.** Look for the timeout source, not performance issues.
|
||||
3. **The bisection approach works.** Systematically removing features one at a time found the root cause. But it took hours because the serial console was added early and seemed harmless.
|
||||
4. **Anaconda rescue mode is limited.** It skips `%pre` and `%post`, so you can't automate setup via kickstart. Use `inst.sshd` + `sshpw` for SSH, and serve helper scripts via HTTP for everything else.
|
||||
5. **Default disk paths break NVMe machines.** Always default to auto-detect (empty string) rather than `/dev/sda`.
|
||||
@@ -22,7 +22,11 @@
|
||||
"test:integration:iso": "vitest run -c tests/integration/vitest.config.ts -t 'ISO boot'",
|
||||
"test:integration:iso:host": "sudo -E $(which npx) vitest run -c tests/integration/vitest.config.ts -t 'ISO boot'",
|
||||
"test:integration:arm-iso": "vitest run -c tests/integration/vitest.config.ts -t 'ARM ISO'",
|
||||
"test:integration:arm-iso:host": "sudo -E $(which npx) vitest run -c tests/integration/vitest.config.ts -t 'ARM ISO'"
|
||||
"test:integration:arm-iso:host": "sudo -E $(which npx) vitest run -c tests/integration/vitest.config.ts -t 'ARM ISO'",
|
||||
"test:integration:asahi": "vitest run -c tests/integration/vitest.config.ts -t 'asahi firstboot'",
|
||||
"test:integration:asahi:host": "sudo -E $(which npx) vitest run -c tests/integration/vitest.config.ts -t 'asahi firstboot'",
|
||||
"test:integration:asahi-validate": "vitest run -c tests/integration/vitest.config.ts -t 'asahi.*validation'",
|
||||
"test:integration:asahi-validate:host": "sudo -E $(which npx) vitest run -c tests/integration/vitest.config.ts -t 'asahi.*validation'"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=20.0.0",
|
||||
|
||||
302
bastion/scripts/build-asahi-rootfs.sh
Executable file
302
bastion/scripts/build-asahi-rootfs.sh
Executable file
@@ -0,0 +1,302 @@
|
||||
#!/bin/bash
|
||||
# Build a custom Fedora Asahi Remix rootfs with lab firstboot LVM setup.
|
||||
#
|
||||
# Downloads the upstream Fedora Asahi Remix Server package, injects our
|
||||
# firstboot script + systemd service, and repackages it for the bastion.
|
||||
#
|
||||
# Requirements: root, curl, unzip, mount (loop), zip
|
||||
# Output: bastion/asahi-repo/ directory with package + installer_data.json
|
||||
#
|
||||
# Usage: sudo ./scripts/build-asahi-rootfs.sh [--bastion-ip IP] [--http-port PORT]
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
ASAHI_DIR="$PROJECT_DIR/asahi-repo"
|
||||
CACHE_DIR="$PROJECT_DIR/.asahi-cache"
|
||||
WORK_DIR=""
|
||||
|
||||
# Defaults
|
||||
BASTION_IP="${BASTION_IP:-192.168.8.23}"
|
||||
HTTP_PORT="${HTTP_PORT:-8080}"
|
||||
ROLE="${ROLE:-infra}"
|
||||
HOSTNAME="${HOSTNAME:-mac-studio}"
|
||||
MAC="${MAC:-00:00:00:00:00:00}"
|
||||
ADMIN_USER="${ADMIN_USER:-michal}"
|
||||
|
||||
# Parse args
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--bastion-ip) BASTION_IP="$2"; shift 2 ;;
|
||||
--http-port) HTTP_PORT="$2"; shift 2 ;;
|
||||
--role) ROLE="$2"; shift 2 ;;
|
||||
--hostname) HOSTNAME="$2"; shift 2 ;;
|
||||
--mac) MAC="$2"; shift 2 ;;
|
||||
--admin-user) ADMIN_USER="$2"; shift 2 ;;
|
||||
*) echo "Unknown option: $1"; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
# ── Resolve upstream package URL ─────────────────────────────────
|
||||
echo "==> Fetching Asahi installer data..."
|
||||
INSTALLER_DATA=$(curl -sfL "https://cdn.asahilinux.org/installer/installer_data.json")
|
||||
|
||||
# Find the Server variant package URL
|
||||
SERVER_URL=$(echo "$INSTALLER_DATA" | python3 -c "
|
||||
import sys, json
|
||||
data = json.load(sys.stdin)
|
||||
for os in data.get('os_list', []):
|
||||
name = os.get('name', '').lower()
|
||||
if 'server' in name and 'uefi' not in name and not os.get('expert'):
|
||||
print(os['package'])
|
||||
break
|
||||
" 2>/dev/null)
|
||||
|
||||
if [ -z "$SERVER_URL" ]; then
|
||||
echo "ERROR: Could not find Fedora Asahi Remix Server in installer data."
|
||||
echo "Available variants:"
|
||||
echo "$INSTALLER_DATA" | python3 -c "
|
||||
import sys, json
|
||||
data = json.load(sys.stdin)
|
||||
for os in data.get('os_list', []):
|
||||
print(f\" - {os.get('name', '?')}\")" 2>/dev/null
|
||||
exit 1
|
||||
fi
|
||||
|
||||
PACKAGE_NAME=$(basename "$SERVER_URL")
|
||||
echo " Variant: Fedora Asahi Remix Server"
|
||||
echo " Package: $PACKAGE_NAME"
|
||||
|
||||
# Also extract the partition layout and supported_fw from upstream
|
||||
UPSTREAM_CONFIG=$(echo "$INSTALLER_DATA" | python3 -c "
|
||||
import sys, json
|
||||
data = json.load(sys.stdin)
|
||||
for os in data.get('os_list', []):
|
||||
name = os.get('name', '').lower()
|
||||
if 'server' in name and 'uefi' not in name and not os.get('expert'):
|
||||
json.dump(os, sys.stdout)
|
||||
break
|
||||
")
|
||||
|
||||
# ── Download upstream package ────────────────────────────────────
|
||||
mkdir -p "$CACHE_DIR" "$ASAHI_DIR"
|
||||
|
||||
CACHED_PKG="$CACHE_DIR/$PACKAGE_NAME"
|
||||
if [ -f "$CACHED_PKG" ]; then
|
||||
echo "==> Using cached package: $CACHED_PKG"
|
||||
else
|
||||
echo "==> Downloading $SERVER_URL..."
|
||||
curl -# -L -o "$CACHED_PKG" "$SERVER_URL"
|
||||
fi
|
||||
|
||||
# ── Extract and modify rootfs ────────────────────────────────────
|
||||
WORK_DIR=$(mktemp -d)
|
||||
trap 'echo "==> Cleaning up..."; umount "$WORK_DIR/rootfs" 2>/dev/null || true; rm -rf "$WORK_DIR"' EXIT
|
||||
|
||||
echo "==> Extracting package..."
|
||||
unzip -q -o "$CACHED_PKG" -d "$WORK_DIR/pkg"
|
||||
|
||||
# List contents
|
||||
echo " Package contents:"
|
||||
ls -lh "$WORK_DIR/pkg/" | grep -v ^total | while read -r line; do echo " $line"; done
|
||||
|
||||
# Find root.img
|
||||
ROOT_IMG=$(find "$WORK_DIR/pkg" -name "root.img" -type f | head -1)
|
||||
if [ -z "$ROOT_IMG" ]; then
|
||||
echo "ERROR: root.img not found in package."
|
||||
echo "Contents: $(ls "$WORK_DIR/pkg/")"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "==> Mounting root.img..."
|
||||
mkdir -p "$WORK_DIR/rootfs"
|
||||
mount -o loop "$ROOT_IMG" "$WORK_DIR/rootfs"
|
||||
|
||||
# ── Read SSH keys from the system ────────────────────────────────
|
||||
SSH_KEYS=""
|
||||
REAL_USER="${SUDO_USER:-$USER}"
|
||||
REAL_HOME=$(eval echo "~$REAL_USER")
|
||||
for keyfile in "$REAL_HOME/.ssh/id_ed25519.pub" "$REAL_HOME/.ssh/id_ecdsa.pub" "$REAL_HOME/.ssh/id_rsa.pub"; do
|
||||
if [ -f "$keyfile" ]; then
|
||||
SSH_KEYS=$(cat "$keyfile")
|
||||
echo " SSH key: $keyfile"
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [ -z "$SSH_KEYS" ]; then
|
||||
echo "WARNING: No SSH public key found. You'll need to add keys manually."
|
||||
fi
|
||||
|
||||
# ── Generate firstboot script from bastion ───────────────────────
|
||||
echo "==> Generating firstboot script..."
|
||||
|
||||
# Try to get the script from a running bastion, fall back to local generation
|
||||
FIRSTBOOT_SCRIPT=""
|
||||
FIRSTBOOT_URL="http://$BASTION_IP:$HTTP_PORT/asahi/firstboot.sh?hostname=$HOSTNAME&role=$ROLE&mac=$MAC&user=$ADMIN_USER"
|
||||
FIRSTBOOT_SCRIPT=$(curl -sf "$FIRSTBOOT_URL" 2>/dev/null || echo "")
|
||||
|
||||
if [ -z "$FIRSTBOOT_SCRIPT" ]; then
|
||||
echo " Bastion not reachable, generating script locally..."
|
||||
# Generate a basic firstboot script inline
|
||||
FIRSTBOOT_SCRIPT=$(cd "$PROJECT_DIR" && node -e "
|
||||
const { renderFirstbootScript } = require('./src/bastion/dist/templates/asahi-firstboot.sh.js');
|
||||
process.stdout.write(renderFirstbootScript({
|
||||
hostname: '$HOSTNAME',
|
||||
role: '$ROLE',
|
||||
serverIp: '$BASTION_IP',
|
||||
httpPort: $HTTP_PORT,
|
||||
sshKeys: $([ -n "$SSH_KEYS" ] && echo "[\"$SSH_KEYS\"]" || echo "[]"),
|
||||
adminUser: '$ADMIN_USER',
|
||||
mac: '$MAC',
|
||||
}));
|
||||
" 2>/dev/null) || {
|
||||
echo " ERROR: Could not generate firstboot script. Build the project first: npm run build"
|
||||
exit 1
|
||||
}
|
||||
fi
|
||||
|
||||
# ── Inject files into rootfs ─────────────────────────────────────
|
||||
echo "==> Injecting lab configuration into rootfs..."
|
||||
|
||||
# Firstboot script
|
||||
mkdir -p "$WORK_DIR/rootfs/usr/local/bin"
|
||||
echo "$FIRSTBOOT_SCRIPT" > "$WORK_DIR/rootfs/usr/local/bin/lab-firstboot.sh"
|
||||
chmod 755 "$WORK_DIR/rootfs/usr/local/bin/lab-firstboot.sh"
|
||||
echo " Installed: /usr/local/bin/lab-firstboot.sh"
|
||||
|
||||
# Systemd service
|
||||
mkdir -p "$WORK_DIR/rootfs/etc/systemd/system"
|
||||
cat > "$WORK_DIR/rootfs/etc/systemd/system/lab-firstboot.service" << 'UNIT'
|
||||
[Unit]
|
||||
Description=Lab first-boot LVM setup
|
||||
After=local-fs.target network-online.target
|
||||
Wants=network-online.target
|
||||
ConditionPathExists=!/etc/lab-lvm-setup-done
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/usr/local/bin/lab-firstboot.sh
|
||||
RemainAfterExit=yes
|
||||
StandardOutput=journal+console
|
||||
StandardError=journal+console
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
UNIT
|
||||
echo " Installed: /etc/systemd/system/lab-firstboot.service"
|
||||
|
||||
# Enable the service
|
||||
mkdir -p "$WORK_DIR/rootfs/etc/systemd/system/multi-user.target.wants"
|
||||
ln -sf /etc/systemd/system/lab-firstboot.service \
|
||||
"$WORK_DIR/rootfs/etc/systemd/system/multi-user.target.wants/lab-firstboot.service"
|
||||
echo " Enabled: lab-firstboot.service"
|
||||
|
||||
# SSH authorized keys for root (for initial access before firstboot runs user creation)
|
||||
if [ -n "$SSH_KEYS" ]; then
|
||||
mkdir -p "$WORK_DIR/rootfs/root/.ssh"
|
||||
chmod 700 "$WORK_DIR/rootfs/root/.ssh"
|
||||
echo "$SSH_KEYS" > "$WORK_DIR/rootfs/root/.ssh/authorized_keys"
|
||||
chmod 600 "$WORK_DIR/rootfs/root/.ssh/authorized_keys"
|
||||
echo " Installed: /root/.ssh/authorized_keys"
|
||||
fi
|
||||
|
||||
# Ensure lvm2 and xfsprogs are installed (should be in server image already)
|
||||
echo " Checking required packages..."
|
||||
if [ -f "$WORK_DIR/rootfs/usr/sbin/pvcreate" ] || [ -f "$WORK_DIR/rootfs/usr/bin/pvcreate" ]; then
|
||||
echo " lvm2: present"
|
||||
else
|
||||
echo " WARNING: lvm2 not found in rootfs. LVM setup may fail."
|
||||
fi
|
||||
if [ -f "$WORK_DIR/rootfs/usr/sbin/mkfs.xfs" ] || [ -f "$WORK_DIR/rootfs/usr/bin/mkfs.xfs" ]; then
|
||||
echo " xfsprogs: present"
|
||||
else
|
||||
echo " WARNING: xfsprogs not found in rootfs. LVM setup may fail."
|
||||
fi
|
||||
|
||||
# ── Unmount and repackage ────────────────────────────────────────
|
||||
echo "==> Unmounting rootfs..."
|
||||
umount "$WORK_DIR/rootfs"
|
||||
|
||||
echo "==> Repackaging..."
|
||||
OUTPUT_PKG="$ASAHI_DIR/fedora-asahi-lab.zip"
|
||||
rm -f "$OUTPUT_PKG"
|
||||
(cd "$WORK_DIR/pkg" && zip -q "$OUTPUT_PKG" *)
|
||||
echo " Output: $OUTPUT_PKG ($(du -sh "$OUTPUT_PKG" | cut -f1))"
|
||||
|
||||
# ── Generate installer_data.json ─────────────────────────────────
|
||||
echo "==> Generating installer_data.json..."
|
||||
|
||||
# Parse upstream config to get supported_fw, boot_object, next_object, and partition details
|
||||
python3 << PYEOF > "$ASAHI_DIR/installer_data.json"
|
||||
import json, sys
|
||||
|
||||
upstream = json.loads('''$UPSTREAM_CONFIG''')
|
||||
|
||||
# Build our custom installer data based on upstream
|
||||
# Keep EFI and Boot partitions identical, modify Root to not expand,
|
||||
# add Data partition that expands for LVM.
|
||||
partitions = []
|
||||
for p in upstream.get('partitions', []):
|
||||
if p.get('type') == 'EFI':
|
||||
partitions.append(p)
|
||||
elif p.get('name') == 'Boot':
|
||||
partitions.append(p)
|
||||
elif p.get('name') == 'Root':
|
||||
# Fixed size root, no expand
|
||||
root_p = dict(p)
|
||||
root_p['expand'] = False
|
||||
# Keep the original size (it's the minimum needed for the rootfs)
|
||||
partitions.append(root_p)
|
||||
|
||||
# Add Data partition for LVM
|
||||
partitions.append({
|
||||
"name": "Data",
|
||||
"type": "Linux",
|
||||
"size": "1073741824B", # 1GB minimum, will expand
|
||||
"expand": True
|
||||
})
|
||||
|
||||
data = {
|
||||
"os_list": [{
|
||||
"name": "Fedora Asahi Lab (${ROLE})",
|
||||
"default_os_name": "Fedora Linux Lab",
|
||||
"boot_object": upstream.get("boot_object", "m1n1.bin"),
|
||||
"next_object": upstream.get("next_object", "m1n1/boot.bin"),
|
||||
"package": "fedora-asahi-lab.zip",
|
||||
"supported_fw": upstream.get("supported_fw", ["13.5"]),
|
||||
"partitions": partitions,
|
||||
}]
|
||||
}
|
||||
|
||||
json.dump(data, sys.stdout, indent=2)
|
||||
print()
|
||||
PYEOF
|
||||
|
||||
echo " Generated: $ASAHI_DIR/installer_data.json"
|
||||
|
||||
# Pretty-print the partition layout
|
||||
echo ""
|
||||
echo " Partition layout:"
|
||||
python3 -c "
|
||||
import json
|
||||
with open('$ASAHI_DIR/installer_data.json') as f:
|
||||
data = json.load(f)
|
||||
for p in data['os_list'][0]['partitions']:
|
||||
size = p.get('size', '?')
|
||||
expand = ' (expand)' if p.get('expand') else ''
|
||||
image = f\" [{p['image']}]\" if 'image' in p else ''
|
||||
print(f\" {p['name']:8s} {p['type']:8s} {size:>16s}{expand}{image}\")
|
||||
"
|
||||
|
||||
echo ""
|
||||
echo "==> Build complete!"
|
||||
echo ""
|
||||
echo " Package: $ASAHI_DIR/fedora-asahi-lab.zip"
|
||||
echo " Config: $ASAHI_DIR/installer_data.json"
|
||||
echo ""
|
||||
echo " To serve from bastion, copy to the bastion's HTTP directory"
|
||||
echo " or configure REPO_BASE to point here."
|
||||
echo ""
|
||||
echo " To install on Mac Studio:"
|
||||
echo " curl http://$BASTION_IP:$HTTP_PORT/asahi | sh"
|
||||
@@ -99,16 +99,22 @@ if [ "$PUSH" = true ]; then
|
||||
fi
|
||||
fi
|
||||
|
||||
# Use --tls-verify=false for plain HTTP registries (e.g. 10.0.0.194:3012)
|
||||
TLS_FLAG=""
|
||||
if [[ "$REGISTRY" =~ ^[0-9] ]] || [[ "$REGISTRY" =~ ^localhost ]]; then
|
||||
TLS_FLAG="--tls-verify=false"
|
||||
fi
|
||||
|
||||
echo "==> Logging in to $REGISTRY..."
|
||||
podman login -u michal -p "$GITEA_TOKEN" "$REGISTRY"
|
||||
podman login $TLS_FLAG -u michal -p "$GITEA_TOKEN" "$REGISTRY"
|
||||
|
||||
echo "==> Pushing $FULL_IMAGE:$TAG..."
|
||||
podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:$TAG"
|
||||
podman manifest push --all $TLS_FLAG "$MANIFEST" "docker://$FULL_IMAGE:$TAG"
|
||||
|
||||
# Also tag as :latest if not already
|
||||
if [ "$TAG" != "latest" ]; then
|
||||
echo "==> Also pushing as :latest..."
|
||||
podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:latest"
|
||||
podman manifest push --all $TLS_FLAG "$MANIFEST" "docker://$FULL_IMAGE:latest"
|
||||
fi
|
||||
|
||||
# Link package to repository if script exists
|
||||
|
||||
@@ -92,15 +92,21 @@ if [ "$PUSH" = true ]; then
|
||||
fi
|
||||
fi
|
||||
|
||||
# Use --tls-verify=false for plain HTTP registries (e.g. 10.0.0.194:3012)
|
||||
TLS_FLAG=""
|
||||
if [[ "$REGISTRY" =~ ^[0-9] ]] || [[ "$REGISTRY" =~ ^localhost ]]; then
|
||||
TLS_FLAG="--tls-verify=false"
|
||||
fi
|
||||
|
||||
echo "==> Logging in to $REGISTRY..."
|
||||
podman login -u michal -p "$GITEA_TOKEN" "$REGISTRY"
|
||||
podman login $TLS_FLAG -u michal -p "$GITEA_TOKEN" "$REGISTRY"
|
||||
|
||||
echo "==> Pushing $FULL_IMAGE:$TAG..."
|
||||
podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:$TAG"
|
||||
podman manifest push --all $TLS_FLAG "$MANIFEST" "docker://$FULL_IMAGE:$TAG"
|
||||
|
||||
if [ "$TAG" != "latest" ]; then
|
||||
echo "==> Also pushing as :latest..."
|
||||
podman manifest push --all "$MANIFEST" "docker://$FULL_IMAGE:latest"
|
||||
podman manifest push --all $TLS_FLAG "$MANIFEST" "docker://$FULL_IMAGE:latest"
|
||||
fi
|
||||
|
||||
if [ -f "$SCRIPT_DIR/link-package.sh" ]; then
|
||||
|
||||
@@ -24,6 +24,21 @@ deploy_bastion() {
|
||||
kubectl rollout restart deployment/bastion -n lab-infra
|
||||
kubectl rollout status deployment/bastion -n lab-infra --timeout=180s
|
||||
echo "✓ Bastion deployed"
|
||||
|
||||
# Sync Asahi rootfs package to bastion pod's persistent volume
|
||||
if [ -d "$PROJECT_DIR/asahi-repo" ] && [ -f "$PROJECT_DIR/asahi-repo/fedora-asahi-lab.zip" ]; then
|
||||
echo ""
|
||||
echo "=== Syncing Asahi rootfs to bastion pod ==="
|
||||
BASTION_POD=$(kubectl get pods -n lab-infra -l app=bastion -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
|
||||
if [ -n "$BASTION_POD" ]; then
|
||||
kubectl exec -n lab-infra "$BASTION_POD" -- mkdir -p /data/asahi-repo
|
||||
kubectl cp "$PROJECT_DIR/asahi-repo/installer_data.json" "lab-infra/$BASTION_POD:/data/asahi-repo/installer_data.json"
|
||||
kubectl cp "$PROJECT_DIR/asahi-repo/fedora-asahi-lab.zip" "lab-infra/$BASTION_POD:/data/asahi-repo/fedora-asahi-lab.zip"
|
||||
echo "✓ Asahi rootfs synced ($(du -sh "$PROJECT_DIR/asahi-repo/fedora-asahi-lab.zip" | cut -f1))"
|
||||
else
|
||||
echo "WARNING: Could not find bastion pod — Asahi rootfs not synced"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
deploy_labd() {
|
||||
|
||||
131
bastion/scripts/fix-ssh-root.sh
Normal file
131
bastion/scripts/fix-ssh-root.sh
Normal file
@@ -0,0 +1,131 @@
|
||||
#!/bin/bash
|
||||
# Fix root SSH access on all provisioned machines.
|
||||
# Tries root, lab, michal users to find one that works,
|
||||
# then ensures root has the SSH key and PermitRootLogin is enabled.
|
||||
set -euo pipefail
|
||||
|
||||
SSH_KEY="ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQDMJ3FkUGbG174eoO5RjZd2eNV680FM5pgp0AgpW/QwlJExK3qxMk0DJSr4ICmzGUx4yujAXcrqU1otcOMPzzFzwc5heWpSmlNHU3TIW6NHEt0sF9ZTAbGLw2zSw3si5UouqFkCcENA40mePFJqY+Q9R8N1uvLgu4m/do+Zrn/mk5Ewc1V7OCRE5Acrnaec4T7LTB0BuVXcjPUfAmZ0q5fI+bKPR1q2Kc3+IeGhVkBuZ9OJVeXXhnpedm0uEbLeriK/jUYKYw/1QhsNDM8Tyty+UIGr9QVnWwzCMHB+wuQcDYC9mPGTqg0fYwX8Mp8xMi1PPxdsh1G7bj/cpWMAF43KswWORF2ul8ICGbaE1zEgIYXO790SuBjpBHhaC6Iegqi58hmCuP+a9893q/EU9HyrWTJHCZXC5E4kP1MsM57KrhEpszM6I3sW9f9zMTPd5QsCXFi4si4OMwX4kYNVu3fQGQPpseDPlTTSrT6uUdqj4Irm0c1m9cYTmK0vYgsM3ss= michal@fedora"
|
||||
|
||||
SSH_OPTS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR -o ConnectTimeout=5"
|
||||
USERS_TO_TRY=(root lab michal)
|
||||
|
||||
# Machines: hostname ip
|
||||
MACHINES=(
|
||||
"labmaster 192.168.8.11"
|
||||
"worker0-k8s0 192.168.8.23"
|
||||
"worker1-k8s0 192.168.8.13"
|
||||
"worker2-k8s0 192.168.8.25"
|
||||
"spark-2935 192.168.8.12"
|
||||
)
|
||||
|
||||
BOLD="\033[1m"
|
||||
GREEN="\033[0;32m"
|
||||
RED="\033[0;31m"
|
||||
DIM="\033[2m"
|
||||
RESET="\033[0m"
|
||||
|
||||
# Script to run on each machine (via sudo if needed)
|
||||
read -r -d '' FIX_SCRIPT << 'FIXEOF' || true
|
||||
#!/bin/bash
|
||||
set -e
|
||||
KEY="$1"
|
||||
|
||||
# 1. Ensure root .ssh dir exists
|
||||
mkdir -p /root/.ssh
|
||||
chmod 700 /root/.ssh
|
||||
touch /root/.ssh/authorized_keys
|
||||
chmod 600 /root/.ssh/authorized_keys
|
||||
|
||||
# 2. Add key if not present
|
||||
if ! grep -qF "$KEY" /root/.ssh/authorized_keys 2>/dev/null; then
|
||||
echo "$KEY" >> /root/.ssh/authorized_keys
|
||||
echo "KEY_ADDED"
|
||||
else
|
||||
echo "KEY_EXISTS"
|
||||
fi
|
||||
|
||||
# 3. Fix sshd_config for root login with keys
|
||||
SSHD_CONF="/etc/ssh/sshd_config"
|
||||
CHANGED=0
|
||||
|
||||
# Ensure PermitRootLogin allows key auth
|
||||
CURRENT=$(grep -E "^PermitRootLogin" "$SSHD_CONF" 2>/dev/null | tail -1 || true)
|
||||
if [ "$CURRENT" = "PermitRootLogin prohibit-password" ] || [ "$CURRENT" = "PermitRootLogin without-password" ]; then
|
||||
echo "SSHD_OK"
|
||||
elif [ "$CURRENT" = "PermitRootLogin yes" ]; then
|
||||
echo "SSHD_OK"
|
||||
else
|
||||
# Remove any existing PermitRootLogin lines
|
||||
sed -i '/^#*PermitRootLogin/d' "$SSHD_CONF"
|
||||
echo "PermitRootLogin prohibit-password" >> "$SSHD_CONF"
|
||||
CHANGED=1
|
||||
echo "SSHD_FIXED"
|
||||
fi
|
||||
|
||||
# Ensure PubkeyAuthentication is enabled
|
||||
if grep -qE "^PubkeyAuthentication no" "$SSHD_CONF" 2>/dev/null; then
|
||||
sed -i 's/^PubkeyAuthentication no/PubkeyAuthentication yes/' "$SSHD_CONF"
|
||||
CHANGED=1
|
||||
echo "PUBKEY_FIXED"
|
||||
else
|
||||
echo "PUBKEY_OK"
|
||||
fi
|
||||
|
||||
# Restart sshd if changed
|
||||
if [ "$CHANGED" -eq 1 ]; then
|
||||
systemctl restart sshd 2>/dev/null || systemctl restart ssh 2>/dev/null || true
|
||||
echo "SSHD_RESTARTED"
|
||||
fi
|
||||
|
||||
# 4. Verify root can be reached
|
||||
echo "DONE"
|
||||
FIXEOF
|
||||
|
||||
echo ""
|
||||
echo -e "${BOLD}Fixing root SSH access on all machines...${RESET}"
|
||||
echo ""
|
||||
|
||||
for entry in "${MACHINES[@]}"; do
|
||||
read -r hostname ip <<< "$entry"
|
||||
printf " %-24s ${DIM}(%s)${RESET} " "$hostname" "$ip"
|
||||
|
||||
# Try each user until one works
|
||||
WORKING_USER=""
|
||||
for user in "${USERS_TO_TRY[@]}"; do
|
||||
if ssh $SSH_OPTS "$user@$ip" "true" 2>/dev/null; then
|
||||
WORKING_USER="$user"
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [ -z "$WORKING_USER" ]; then
|
||||
echo -e "${RED}UNREACHABLE${RESET} (tried: ${USERS_TO_TRY[*]})"
|
||||
continue
|
||||
fi
|
||||
|
||||
# Run fix script (with sudo if not root)
|
||||
if [ "$WORKING_USER" = "root" ]; then
|
||||
RESULT=$(ssh $SSH_OPTS "root@$ip" "bash -s -- '$SSH_KEY'" <<< "$FIX_SCRIPT" 2>&1)
|
||||
else
|
||||
RESULT=$(ssh $SSH_OPTS "$WORKING_USER@$ip" "sudo bash -s -- '$SSH_KEY'" <<< "$FIX_SCRIPT" 2>&1)
|
||||
fi
|
||||
|
||||
# Parse result
|
||||
DETAILS=""
|
||||
if echo "$RESULT" | grep -q "KEY_ADDED"; then DETAILS="key added"; fi
|
||||
if echo "$RESULT" | grep -q "KEY_EXISTS"; then DETAILS="key ok"; fi
|
||||
if echo "$RESULT" | grep -q "SSHD_FIXED"; then DETAILS="$DETAILS, sshd fixed"; fi
|
||||
if echo "$RESULT" | grep -q "SSHD_OK"; then DETAILS="$DETAILS, sshd ok"; fi
|
||||
if echo "$RESULT" | grep -q "SSHD_RESTARTED"; then DETAILS="$DETAILS, restarted"; fi
|
||||
|
||||
# Verify root works now
|
||||
if ssh $SSH_OPTS "root@$ip" "true" 2>/dev/null; then
|
||||
echo -e "${GREEN}OK${RESET} ${DIM}(via $WORKING_USER: $DETAILS)${RESET}"
|
||||
else
|
||||
echo -e "${RED}PARTIAL${RESET} ${DIM}(via $WORKING_USER: $DETAILS -- root still blocked)${RESET}"
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo -e "${BOLD}Done.${RESET} Verify: labctl provision recheck --user root"
|
||||
echo ""
|
||||
@@ -257,7 +257,7 @@ export async function startBastion(overrides: Partial<BastionConfig> = {}): Prom
|
||||
state.update((s) => {
|
||||
s.install_queue[msg.mac] = {
|
||||
hostname: msg.hostname,
|
||||
disk: msg.disk ?? "/dev/sda",
|
||||
disk: msg.disk ?? "",
|
||||
role: msg.role as import("@lab/shared").Role,
|
||||
os: msg.os as import("@lab/shared").OsId,
|
||||
queued_at: new Date().toISOString(),
|
||||
@@ -269,7 +269,7 @@ export async function startBastion(overrides: Partial<BastionConfig> = {}): Prom
|
||||
labdConn.onCommand("command-debug", async (msg) => {
|
||||
if (msg.type !== "command-debug") throw new Error("unexpected");
|
||||
const mac = msg.mac.toLowerCase();
|
||||
const sshd = msg.sshd ?? false;
|
||||
const pxeBoot = msg.pxeBoot ?? false;
|
||||
const currentState = state.load();
|
||||
const hostname =
|
||||
currentState.installed[mac]?.hostname ??
|
||||
@@ -277,7 +277,7 @@ export async function startBastion(overrides: Partial<BastionConfig> = {}): Prom
|
||||
currentState.discovered[mac]?.product ??
|
||||
mac;
|
||||
state.update((s) => {
|
||||
s.debug[mac] = { hostname, queued_at: new Date().toISOString(), sshd };
|
||||
s.debug[mac] = { hostname, queued_at: new Date().toISOString(), pxeBoot };
|
||||
});
|
||||
return { status: "ok", data: { mac, hostname } };
|
||||
});
|
||||
@@ -294,6 +294,47 @@ export async function startBastion(overrides: Partial<BastionConfig> = {}): Prom
|
||||
return { status: "ok", data: { mac } };
|
||||
});
|
||||
|
||||
labdConn.onCommand("command-register", async (msg) => {
|
||||
if (msg.type !== "command-register") throw new Error("unexpected");
|
||||
const mac = msg.mac.toLowerCase();
|
||||
state.update((s) => {
|
||||
s.installed[mac] = {
|
||||
hostname: msg.hostname,
|
||||
role: msg.role,
|
||||
ip: msg.ip,
|
||||
installed_at: new Date().toISOString(),
|
||||
};
|
||||
});
|
||||
logger.info(`MACHINE REGISTERED: ${mac} -> ${msg.hostname} (${msg.role}) ip=${msg.ip}`);
|
||||
return { status: "ok", data: { mac, hostname: msg.hostname } };
|
||||
});
|
||||
|
||||
labdConn.onCommand("command-discover", async (msg) => {
|
||||
if (msg.type !== "command-discover") throw new Error("unexpected");
|
||||
const mac = (msg.mac as string).toLowerCase();
|
||||
const now = new Date().toISOString();
|
||||
const existing = state.load().discovered[mac];
|
||||
state.update((s) => {
|
||||
s.discovered[mac] = {
|
||||
mac,
|
||||
product: (msg.product as string) ?? "unknown",
|
||||
board: (msg.board as string) ?? "unknown",
|
||||
serial: (msg.serial as string) ?? "unknown",
|
||||
manufacturer: (msg.manufacturer as string) ?? "unknown",
|
||||
cpu_model: (msg.cpu_model as string) ?? "unknown",
|
||||
cpu_cores: (msg.cpu_cores as number) ?? 0,
|
||||
memory_gb: (msg.memory_gb as number) ?? 0,
|
||||
arch: (msg.arch as string) ?? "unknown",
|
||||
disks: (msg.disks as Array<{ name: string; size_gb: number; model: string }>) ?? [],
|
||||
nics: (msg.nics as Array<{ name: string; mac: string; state: string }>) ?? [],
|
||||
first_seen: existing?.first_seen ?? now,
|
||||
last_seen: now,
|
||||
};
|
||||
});
|
||||
logger.info(`HARDWARE UPDATED: ${mac} -- ${msg.manufacturer ?? "?"} ${msg.product ?? "?"} (${msg.cpu_model ?? "?"}, ${msg.cpu_cores ?? "?"} cores, ${msg.memory_gb ?? "?"}GB RAM)`);
|
||||
return { status: "ok", data: { mac } };
|
||||
});
|
||||
|
||||
labdConn.onCommand("command-role-update", async (msg) => {
|
||||
if (msg.type !== "command-role-update") throw new Error("unexpected");
|
||||
const mac = msg.mac.toLowerCase();
|
||||
|
||||
@@ -13,11 +13,13 @@ import { triggerPostProvisionK3s } from "../services/post-provision.js";
|
||||
import { progressBus } from "../services/progress-events.js";
|
||||
import type { ProgressEvent } from "../services/progress-events.js";
|
||||
import type { InstallLogBuffer } from "../services/install-log.js";
|
||||
import type { SyslogListener } from "../services/syslog-listener.js";
|
||||
|
||||
export function registerApiRoutes(
|
||||
app: FastifyInstance,
|
||||
state: StateManager,
|
||||
installLog: InstallLogBuffer,
|
||||
syslog: SyslogListener,
|
||||
): void {
|
||||
// List all machines
|
||||
app.get("/api/machines", async (_request, reply) => {
|
||||
@@ -84,6 +86,11 @@ export function registerApiRoutes(
|
||||
const { mac: rawMac, stage, detail } = request.body ?? {};
|
||||
const mac = (rawMac ?? "unknown").toLowerCase();
|
||||
const stageName = stage ?? "unknown";
|
||||
|
||||
// Register IP → MAC for syslog routing
|
||||
if (mac !== "unknown") {
|
||||
syslog.registerIp(request.ip, mac);
|
||||
}
|
||||
const detailStr = detail ?? "";
|
||||
|
||||
const GREEN = "\x1b[0;32m";
|
||||
@@ -132,16 +139,26 @@ export function registerApiRoutes(
|
||||
? detailStr.replace("ready at ", "").trim()
|
||||
: "";
|
||||
|
||||
const hw = s.discovered[mac];
|
||||
const installedInfo: InstalledInfo = {
|
||||
hostname: cfg?.hostname ?? "?",
|
||||
role: cfg?.role ?? "?",
|
||||
...(cfg?.os !== undefined ? { os: cfg.os } : {}),
|
||||
ip,
|
||||
installed_at: new Date().toISOString(),
|
||||
// Preserve hardware info from discovery
|
||||
...(hw ? {
|
||||
product: hw.product,
|
||||
manufacturer: hw.manufacturer,
|
||||
cpu_model: hw.cpu_model,
|
||||
cpu_cores: hw.cpu_cores,
|
||||
memory_gb: hw.memory_gb,
|
||||
arch: hw.arch,
|
||||
} : {}),
|
||||
};
|
||||
s.installed[mac] = installedInfo;
|
||||
|
||||
const admin = installedInfo.role !== "vanilla" && installedInfo.role !== "" ? "michal" : "root";
|
||||
const admin = installedInfo.role !== "vanilla" && installedInfo.role !== "" ? "lab" : "root";
|
||||
console.log(`\n \x1b[0;32m\x1b[1m ssh ${admin}@${ip}\x1b[0m\n`); // eslint-disable-line no-console
|
||||
|
||||
// Auto-install k3s for non-vanilla roles
|
||||
@@ -191,10 +208,10 @@ export function registerApiRoutes(
|
||||
|
||||
// Queue debug/rescue mode for a machine
|
||||
app.post<{
|
||||
Body: { mac?: string; sshd?: boolean };
|
||||
Body: { mac?: string; pxeBoot?: boolean };
|
||||
}>("/api/debug", async (request, reply) => {
|
||||
const mac = (request.body?.mac ?? "").toLowerCase().replace(/-/g, ":");
|
||||
const sshd = request.body?.sshd ?? false;
|
||||
const pxeBoot = request.body?.pxeBoot ?? false;
|
||||
if (mac === "") {
|
||||
return reply.status(400).send({ error: "mac is required" });
|
||||
}
|
||||
@@ -208,7 +225,7 @@ export function registerApiRoutes(
|
||||
mac;
|
||||
|
||||
state.update((s) => {
|
||||
s.debug[mac] = { hostname, queued_at: new Date().toISOString(), sshd };
|
||||
s.debug[mac] = { hostname, queued_at: new Date().toISOString(), pxeBoot };
|
||||
});
|
||||
|
||||
logger.info(`DEBUG QUEUED: ${mac} -> ${hostname}`);
|
||||
@@ -308,6 +325,67 @@ export function registerApiRoutes(
|
||||
return reply.send({ status: "ok", mac, new: isNew });
|
||||
});
|
||||
|
||||
// Register an already-installed machine (e.g. re-add after state loss)
|
||||
app.post<{
|
||||
Body: {
|
||||
mac?: string;
|
||||
hostname?: string;
|
||||
role?: string;
|
||||
ip?: string;
|
||||
};
|
||||
}>("/api/register", async (request, reply) => {
|
||||
const { mac: rawMac, hostname, role, ip } = request.body ?? {};
|
||||
const mac = (rawMac ?? "").toLowerCase().replace(/-/g, ":");
|
||||
|
||||
if (mac === "") {
|
||||
return reply.status(400).send({ error: "mac is required" });
|
||||
}
|
||||
if (!hostname) {
|
||||
return reply.status(400).send({ error: "hostname is required" });
|
||||
}
|
||||
|
||||
const validRole = role ?? "worker";
|
||||
if (!(SUPPORTED_ROLES as readonly string[]).includes(validRole)) {
|
||||
return reply.status(400).send({ error: `invalid role: '${validRole}'. Supported: ${SUPPORTED_ROLES.join(", ")}` });
|
||||
}
|
||||
|
||||
state.update((s) => {
|
||||
s.installed[mac] = {
|
||||
hostname,
|
||||
role: validRole,
|
||||
ip: ip ?? "",
|
||||
installed_at: new Date().toISOString(),
|
||||
};
|
||||
});
|
||||
|
||||
logger.info(`MACHINE REGISTERED: ${mac} -> hostname=${hostname} role=${validRole} ip=${ip ?? ""}`);
|
||||
|
||||
return reply.send({
|
||||
status: "registered",
|
||||
mac,
|
||||
hostname,
|
||||
role: validRole,
|
||||
ip: ip ?? "",
|
||||
});
|
||||
});
|
||||
|
||||
// Simple machine state query (used by ks-auto for ISO boot dispatch)
|
||||
app.get<{
|
||||
Params: { mac: string };
|
||||
}>("/api/machine-state/:mac", async (request, reply) => {
|
||||
const mac = request.params.mac.toLowerCase().replace(/-/g, ":");
|
||||
const currentState = state.load();
|
||||
|
||||
if (currentState.debug[mac]) return reply.send("debug");
|
||||
if (currentState.install_queue[mac]) {
|
||||
const progress = currentState.install_queue[mac].progress;
|
||||
return reply.send(progress ? "installing" : "queued");
|
||||
}
|
||||
if (currentState.installed[mac]) return reply.send("installed");
|
||||
if (currentState.discovered[mac]) return reply.send("discovered");
|
||||
return reply.send("unknown");
|
||||
});
|
||||
|
||||
// Update a machine's role (e.g. promote infra -> labcontroller)
|
||||
app.post<{
|
||||
Body: {
|
||||
|
||||
176
bastion/src/bastion/src/routes/asahi.ts
Normal file
176
bastion/src/bastion/src/routes/asahi.ts
Normal file
@@ -0,0 +1,176 @@
|
||||
// Routes for Asahi Linux provisioning.
|
||||
// GET /asahi — wrapper script (curl bastion:8080/asahi | sh)
|
||||
// GET /asahi/installer_data.json — custom installer config (built or fallback)
|
||||
// GET /asahi/repo/* — serves built rootfs package (fedora-asahi-lab.zip)
|
||||
// GET /asahi/firstboot.sh — first-boot LVM setup script (for manual use)
|
||||
|
||||
import type { FastifyInstance } from "fastify";
|
||||
import fastifyStatic from "@fastify/static";
|
||||
import { existsSync, readFileSync } from "node:fs";
|
||||
import { join, dirname } from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import type { BastionConfig } from "@lab/shared";
|
||||
import { renderFirstbootScript, renderFirstbootUnit } from "../templates/asahi-firstboot.sh.js";
|
||||
import type { Role } from "@lab/shared";
|
||||
|
||||
/** Find the asahi-repo directory (built by scripts/build-asahi-rootfs.sh). */
|
||||
function findAsahiRepo(config: BastionConfig): string | null {
|
||||
// Check relative to bastionDir (container deploy)
|
||||
const inBastionDir = join(config.bastionDir, "asahi-repo");
|
||||
if (existsSync(inBastionDir)) return inBastionDir;
|
||||
|
||||
// Check /data/asahi-repo (PVC mount in k3s container)
|
||||
if (existsSync("/data/asahi-repo")) return "/data/asahi-repo";
|
||||
|
||||
// Check relative to project root (dev mode)
|
||||
try {
|
||||
const thisDir = dirname(fileURLToPath(import.meta.url));
|
||||
const projectRoot = join(thisDir, "..", "..", "..", "..");
|
||||
const inProjectRoot = join(projectRoot, "asahi-repo");
|
||||
if (existsSync(inProjectRoot)) return inProjectRoot;
|
||||
} catch { /* import.meta.url not available in tests */ }
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export function registerAsahiRoutes(app: FastifyInstance, config: BastionConfig): void {
|
||||
const repoDir = findAsahiRepo(config);
|
||||
|
||||
// Serve built rootfs package files (fedora-asahi-lab.zip, etc.)
|
||||
if (repoDir) {
|
||||
app.register(fastifyStatic, {
|
||||
root: repoDir,
|
||||
prefix: "/asahi/repo/",
|
||||
decorateReply: false,
|
||||
});
|
||||
}
|
||||
|
||||
// Wrapper script — user runs: curl http://bastion:8080/asahi | sh
|
||||
app.get("/asahi", async (_request, reply) => {
|
||||
const script = `#!/bin/bash
|
||||
# Lab Asahi provisioner — sets up Apple Silicon machines with lab LVM layout.
|
||||
# This wraps the standard Asahi installer with custom installer_data.json
|
||||
# that creates a separate LVM data partition.
|
||||
set -euo pipefail
|
||||
|
||||
BASTION="http://${config.serverIp}:${config.httpPort}"
|
||||
|
||||
echo ""
|
||||
echo " ╔══════════════════════════════════════════════╗"
|
||||
echo " ║ Lab Asahi Provisioner ║"
|
||||
echo " ║ Bastion: \${BASTION} ║"
|
||||
echo " ╚══════════════════════════════════════════════╝"
|
||||
echo ""
|
||||
|
||||
# Check we're on macOS
|
||||
if [ "$(uname)" != "Darwin" ]; then
|
||||
echo "ERROR: This script must be run from macOS on the target Mac."
|
||||
echo " It uses the Asahi Linux installer to set up Apple Silicon boot."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Download the standard Asahi installer
|
||||
echo "Downloading Asahi Linux installer..."
|
||||
WORKDIR=$(mktemp -d)
|
||||
cd "$WORKDIR"
|
||||
|
||||
INSTALLER_BASE="https://cdn.asahilinux.org/installer"
|
||||
PKG_VER=$(curl -s "\${INSTALLER_BASE}/latest")
|
||||
echo " Version: \${PKG_VER}"
|
||||
|
||||
curl -# -L -o "installer-\${PKG_VER}.tar.gz" "\${INSTALLER_BASE}/installer-\${PKG_VER}.tar.gz"
|
||||
|
||||
echo " Extracting..."
|
||||
tar xf "installer-\${PKG_VER}.tar.gz"
|
||||
|
||||
# Download our custom installer_data.json (installer reads it as a local file)
|
||||
echo " Downloading custom installer data from bastion..."
|
||||
curl -sfL -o installer_data.json "\${BASTION}/asahi/installer_data.json"
|
||||
|
||||
# Pre-download the rootfs package (avoids Python HTTP streaming issues on macOS)
|
||||
echo " Downloading rootfs package from bastion..."
|
||||
mkdir -p os
|
||||
curl -# -L -o os/fedora-asahi-lab.zip "\${BASTION}/asahi/repo/fedora-asahi-lab.zip"
|
||||
|
||||
# Point installer to local directory (REPO_BASE + /os/ + package name)
|
||||
export REPO_BASE="\${PWD}"
|
||||
|
||||
echo ""
|
||||
echo " Using custom partition layout + rootfs from bastion."
|
||||
echo " This will create:"
|
||||
echo " - Standard Asahi boot infrastructure (m1n1 + U-Boot)"
|
||||
echo " - Fedora Asahi Remix root partition"
|
||||
echo " - LVM data partition (remaining space)"
|
||||
echo ""
|
||||
echo " After first boot, SSH in and set up LVM:"
|
||||
echo " ssh lab@<ip> 'curl -sf \${BASTION}/asahi/firstboot.sh | sudo bash'"
|
||||
echo ""
|
||||
|
||||
# Run the installer
|
||||
if [ "$USER" != "root" ]; then
|
||||
echo "The installer needs root. Enter your sudo password if prompted."
|
||||
exec caffeinate -dis sudo -E ./install.sh "$@"
|
||||
else
|
||||
exec caffeinate -dis ./install.sh "$@"
|
||||
fi
|
||||
`;
|
||||
return reply.type("text/x-shellscript").send(script);
|
||||
});
|
||||
|
||||
// Custom installer_data.json — serves built config or fallback
|
||||
app.get("/asahi/installer_data.json", async (_request, reply) => {
|
||||
// Prefer the built installer_data.json (from build-asahi-rootfs.sh)
|
||||
if (repoDir) {
|
||||
const builtConfig = join(repoDir, "installer_data.json");
|
||||
if (existsSync(builtConfig)) {
|
||||
const data = JSON.parse(readFileSync(builtConfig, "utf-8"));
|
||||
return reply.type("application/json").send(data);
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: minimal config (won't have boot.img, for testing only)
|
||||
return reply.type("application/json").send({
|
||||
os_list: [{
|
||||
name: "Fedora Asahi Lab",
|
||||
default_os_name: "Fedora Linux with Lab LVM",
|
||||
boot_object: "m1n1.bin",
|
||||
next_object: "m1n1/boot.bin",
|
||||
package: "fedora-asahi-lab.zip",
|
||||
supported_fw: ["13.5"],
|
||||
partitions: [
|
||||
{ name: "EFI", type: "EFI", size: "524288000B", format: "fat",
|
||||
copy_firmware: true, copy_installer_data: true, source: "esp" },
|
||||
{ name: "Root", type: "Linux", size: "5368709120B", image: "root.img", expand: false },
|
||||
{ name: "Data", type: "Linux", size: "1073741824B", expand: true },
|
||||
],
|
||||
}],
|
||||
});
|
||||
});
|
||||
|
||||
// First-boot script — for manual download or embedding in rootfs
|
||||
app.get<{
|
||||
Querystring: { hostname?: string; role?: string; mac?: string; user?: string };
|
||||
}>("/asahi/firstboot.sh", async (request, reply) => {
|
||||
const hostname = request.query.hostname ?? "unknown";
|
||||
const role = (request.query.role ?? "infra") as Role;
|
||||
const mac = request.query.mac ?? "unknown";
|
||||
const user = request.query.user ?? "lab";
|
||||
|
||||
const script = renderFirstbootScript({
|
||||
hostname,
|
||||
role,
|
||||
serverIp: config.serverIp,
|
||||
httpPort: config.httpPort,
|
||||
sshKeys: config.sshKeys ?? [],
|
||||
adminUser: user,
|
||||
mac,
|
||||
});
|
||||
|
||||
return reply.type("text/x-shellscript").send(script);
|
||||
});
|
||||
|
||||
// Systemd unit file for first-boot service
|
||||
app.get("/asahi/firstboot.service", async (_request, reply) => {
|
||||
return reply.type("text/plain").send(renderFirstbootUnit());
|
||||
});
|
||||
}
|
||||
@@ -137,7 +137,7 @@ function generateIso(config: BastionConfig, outputPath: string): void {
|
||||
"# Map iPXE arch names to Fedora mirror paths (arm64 -> aarch64)",
|
||||
"set fedarch ${buildarch}",
|
||||
"iseq ${buildarch} arm64 && set fedarch aarch64 ||",
|
||||
`kernel file:/vmlinuz-\${buildarch} inst.ks=${bastionUrl}/discover.ks inst.repo=${FEDORA_MIRROR_BASE}/${config.fedoraVersion}/Everything/\${fedarch}/os inst.text || goto no_kernel`,
|
||||
`kernel file:/vmlinuz-\${buildarch} inst.ks=${bastionUrl}/ks-auto inst.repo=${FEDORA_MIRROR_BASE}/${config.fedoraVersion}/Everything/\${fedarch}/os inst.text || goto no_kernel`,
|
||||
`initrd file:/initrd-\${buildarch} || goto no_kernel`,
|
||||
"boot || shell",
|
||||
"",
|
||||
|
||||
@@ -11,6 +11,7 @@ import {
|
||||
renderDiscoverIpxe,
|
||||
renderInstallIpxe,
|
||||
renderDebugIpxe,
|
||||
renderPxeBootDebugIpxe,
|
||||
renderLocalBootIpxe,
|
||||
} from "../templates/boot.ipxe.js";
|
||||
import { renderUbuntuInstallIpxe } from "../templates/ubuntu-boot.ipxe.js";
|
||||
@@ -22,21 +23,44 @@ export function registerDispatchRoutes(
|
||||
config: BastionConfig,
|
||||
state: StateManager,
|
||||
): void {
|
||||
// Serve debug/rescue kickstart (minimal: SSH keys + network)
|
||||
app.get<{ Querystring: { mac?: string; sshd?: string } }>("/debug.ks", async (request, reply) => {
|
||||
const mac = (request.query.mac ?? "").toLowerCase().replace(/-/g, ":");
|
||||
const currentState = state.load();
|
||||
const wantSshd = request.query.sshd === "1" || currentState.debug[mac]?.sshd === true;
|
||||
|
||||
// Serve debug/rescue kickstart (minimal: SSH keys + network for inst.sshd)
|
||||
app.get<{ Querystring: { mac?: string } }>("/debug.ks", async (_request, reply) => {
|
||||
const ks = renderDebugKickstart({
|
||||
sshKeys: config.sshKeys ?? [],
|
||||
sshd: wantSshd,
|
||||
serverIp: config.serverIp,
|
||||
httpPort: config.httpPort,
|
||||
});
|
||||
return reply.type("text/plain").send(ks);
|
||||
});
|
||||
|
||||
// Shell script for manual debug setup (nc listener + IP reporting)
|
||||
// Usage from rescue shell: curl http://bastion:port/debug-setup.sh | bash
|
||||
app.get("/debug-setup.sh", async (_request, reply) => {
|
||||
const script = `#!/bin/bash
|
||||
# Lab Bastion debug setup — run from rescue shell
|
||||
set -x
|
||||
|
||||
IP_ADDR=$(ip -4 addr show | awk '/inet / && !/127.0.0/ {split($2,a,"/"); print a[1]; exit}')
|
||||
MAC_ADDR=$(ip link show | awk '/ether/ && !/00:00:00:00/ {print $2; exit}')
|
||||
|
||||
# Start persistent nc listener for remote shell
|
||||
(while true; do nc -l -p 2323 -e /bin/bash 2>/dev/null; done) &
|
||||
echo "nc shell listener on port 2323"
|
||||
|
||||
# Report IP to bastion
|
||||
curl -sf -X POST "http://${config.serverIp}:${config.httpPort}/api/progress" \\
|
||||
-H "Content-Type: application/json" \\
|
||||
-d "{\\"mac\\":\\"$MAC_ADDR\\",\\"stage\\":\\"debug-ready\\",\\"detail\\":\\"nc $IP_ADDR 2323\\"}" 2>/dev/null || true
|
||||
|
||||
echo ""
|
||||
echo "=== Debug environment ready ==="
|
||||
echo " nc $IP_ADDR 2323 (remote shell)"
|
||||
echo " ssh root@$IP_ADDR (password: debug)"
|
||||
echo "==============================="
|
||||
`;
|
||||
return reply.type("text/plain").send(script);
|
||||
});
|
||||
|
||||
app.get<{ Querystring: { mac?: string } }>("/dispatch", async (request, reply) => {
|
||||
const mac = (request.query.mac ?? "").toLowerCase().replace(/-/g, ":");
|
||||
const currentState = state.load();
|
||||
@@ -45,17 +69,27 @@ export function registerDispatchRoutes(
|
||||
const debugEntry = currentState.debug[mac];
|
||||
if (debugEntry) {
|
||||
const hostname = debugEntry.hostname ?? "debug";
|
||||
logger.info(`DEBUG BOOT: ${mac} -> ${hostname} (rescue mode)`);
|
||||
|
||||
state.update((s) => { delete s.debug[mac]; });
|
||||
|
||||
const script = renderDebugIpxe({
|
||||
mac,
|
||||
hostname,
|
||||
serverIp: config.serverIp,
|
||||
httpPort: config.httpPort,
|
||||
fedoraMirror: config.fedoraMirror,
|
||||
});
|
||||
let script: string;
|
||||
if (debugEntry.pxeBoot) {
|
||||
logger.info(`PXE BOOT DEBUG: ${mac} -> ${hostname} (kernel+initrd from PXE, root from NVMe)`);
|
||||
script = renderPxeBootDebugIpxe({
|
||||
mac,
|
||||
hostname,
|
||||
serverIp: config.serverIp,
|
||||
httpPort: config.httpPort,
|
||||
});
|
||||
} else {
|
||||
logger.info(`DEBUG BOOT: ${mac} -> ${hostname} (rescue mode)`);
|
||||
script = renderDebugIpxe({
|
||||
mac,
|
||||
hostname,
|
||||
serverIp: config.serverIp,
|
||||
httpPort: config.httpPort,
|
||||
fedoraMirror: config.fedoraMirror,
|
||||
});
|
||||
}
|
||||
return reply.type("text/plain").send(script);
|
||||
}
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
import type { FastifyInstance } from "fastify";
|
||||
import type { BastionConfig } from "@lab/shared";
|
||||
import type { StateManager } from "../services/state.js";
|
||||
import type { SyslogListener } from "../services/syslog-listener.js";
|
||||
import { generateInstallKickstart, generateDiscoverKickstart } from "../services/kickstart-generator.js";
|
||||
import { renderUbuntuAutoinstall, renderUbuntuMetaData, type UbuntuAutoinstallParams } from "../templates/ubuntu-autoinstall.js";
|
||||
|
||||
@@ -12,6 +13,7 @@ export function registerKickstartRoutes(
|
||||
app: FastifyInstance,
|
||||
config: BastionConfig,
|
||||
state: StateManager,
|
||||
syslog: SyslogListener,
|
||||
): void {
|
||||
// Per-MAC install kickstart
|
||||
app.get<{ Querystring: { mac?: string } }>("/ks", async (request, reply) => {
|
||||
@@ -19,6 +21,11 @@ export function registerKickstartRoutes(
|
||||
const currentState = state.load();
|
||||
const queueEntry = currentState.install_queue[mac];
|
||||
|
||||
// Register IP → MAC so syslog listener can route Anaconda logs
|
||||
if (mac) {
|
||||
syslog.registerIp(request.ip, mac);
|
||||
}
|
||||
|
||||
const ks = generateInstallKickstart(config, {
|
||||
hostname: queueEntry?.hostname ?? "lab-node",
|
||||
disk: queueEntry?.disk ?? "",
|
||||
@@ -34,6 +41,150 @@ export function registerKickstartRoutes(
|
||||
return reply.type("text/plain").send(ks);
|
||||
});
|
||||
|
||||
// Auto-detecting kickstart for ISO boot (no-network machines like R1 ARM).
|
||||
// %pre detects MAC, queries bastion state, writes dynamic kickstart to /tmp.
|
||||
// Main body %include's it — so Anaconda gets either discover or install content.
|
||||
app.get("/ks-auto", async (_request, reply) => {
|
||||
const bastionUrl = `http://${config.serverIp}:${config.httpPort}`;
|
||||
|
||||
const ks = `# Lab Bastion -- Auto-detect kickstart (ISO boot)
|
||||
# %pre detects MAC, queries bastion state, writes /tmp/dynamic.ks.
|
||||
# Main body %include's it to get either discovery reboot or full install.
|
||||
|
||||
%pre --erroronfail --log=/tmp/ks-auto.log
|
||||
#!/bin/bash
|
||||
set -x
|
||||
|
||||
# -- Detect MAC address --
|
||||
MAC=$(ip link show | awk '/ether/ && !/00:00:00:00/ {print $2; exit}')
|
||||
echo "Detected MAC: $MAC"
|
||||
|
||||
# -- Wait for network (Linux drivers may take a moment) --
|
||||
for i in $(seq 1 30); do
|
||||
if curl -sf "${bastionUrl}/healthz" >/dev/null 2>&1; then
|
||||
echo "Bastion reachable at ${bastionUrl}"
|
||||
break
|
||||
fi
|
||||
echo "Waiting for network... ($i/30)"
|
||||
sleep 2
|
||||
done
|
||||
|
||||
# -- Query bastion for machine state --
|
||||
STATE=$(curl -sf "${bastionUrl}/api/machine-state/$MAC" 2>/dev/null || echo "unknown")
|
||||
echo "Machine state: $STATE"
|
||||
|
||||
case "$STATE" in
|
||||
queued|installing)
|
||||
echo "=== Machine queued for install. Fetching install kickstart... ==="
|
||||
curl -sf "${bastionUrl}/ks?mac=$MAC" > /tmp/dynamic.ks
|
||||
if [ -s /tmp/dynamic.ks ]; then
|
||||
echo "Install kickstart downloaded ($(wc -l < /tmp/dynamic.ks) lines)"
|
||||
else
|
||||
echo "ERROR: Failed to download install kickstart"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Run any %pre scripts from the downloaded kickstart.
|
||||
# Anaconda only runs %pre from the top-level file, not from %include'd files.
|
||||
python3 -c "
|
||||
import re, subprocess
|
||||
content = open('/tmp/dynamic.ks').read()
|
||||
blocks = re.findall(r'%pre[^\\n]*\\n(.*?)%end', content, re.DOTALL)
|
||||
for i, script in enumerate(blocks):
|
||||
path = f'/tmp/inner-pre-{i}.sh'
|
||||
with open(path, 'w') as f:
|
||||
f.write(script)
|
||||
print(f'Running inner %pre script {i} ({len(script.splitlines())} lines)')
|
||||
subprocess.run(['bash', path], check=False)
|
||||
"
|
||||
;;
|
||||
|
||||
debug)
|
||||
echo "=== Debug mode ==="
|
||||
curl -sf "${bastionUrl}/debug.ks?mac=$MAC" > /tmp/dynamic.ks 2>/dev/null
|
||||
if [ ! -s /tmp/dynamic.ks ]; then
|
||||
echo "rescue" > /tmp/dynamic.ks
|
||||
fi
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "=== Running hardware discovery ==="
|
||||
# Collect hardware info
|
||||
PRODUCT=$(cat /sys/class/dmi/id/product_name 2>/dev/null || echo "unknown")
|
||||
BOARD=$(cat /sys/class/dmi/id/board_name 2>/dev/null || echo "unknown")
|
||||
SERIAL=$(cat /sys/class/dmi/id/product_serial 2>/dev/null || echo "unknown")
|
||||
MANUFACTURER=$(cat /sys/class/dmi/id/sys_vendor 2>/dev/null || echo "unknown")
|
||||
CPUMODEL=$(grep -m1 'model name' /proc/cpuinfo | cut -d: -f2 | sed 's/^ //')
|
||||
CPUCORES=$(grep -c '^processor' /proc/cpuinfo)
|
||||
MEMGB=$(awk '/MemTotal/ {printf "%d", $2/1024/1024}' /proc/meminfo)
|
||||
ARCHTYPE=$(uname -m)
|
||||
|
||||
DISKS_JSON=$(lsblk -Jb -o NAME,SIZE,TYPE,MODEL 2>/dev/null | python3 -c "
|
||||
import sys, json
|
||||
data = json.load(sys.stdin)
|
||||
disks = [d for d in data.get('blockdevices', []) if d.get('type') == 'disk']
|
||||
result = []
|
||||
for d in disks:
|
||||
size_gb = round(int(d.get('size', 0)) / 1073741824, 1)
|
||||
result.append({'name': d.get('name', '?'), 'size_gb': size_gb, 'model': (d.get('model') or 'unknown').strip()})
|
||||
print(json.dumps(result))
|
||||
" 2>/dev/null || echo '[]')
|
||||
|
||||
NICS_JSON=$(ip -j link show 2>/dev/null | python3 -c "
|
||||
import sys, json
|
||||
nics = json.load(sys.stdin)
|
||||
result = []
|
||||
for n in nics:
|
||||
if n.get('link_type') == 'loopback': continue
|
||||
result.append({'name': n.get('ifname', '?'), 'mac': n.get('address', '?'), 'state': n.get('operstate', '?')})
|
||||
print(json.dumps(result))
|
||||
" 2>/dev/null || echo '[]')
|
||||
|
||||
PAYLOAD=$(python3 -c "
|
||||
import json
|
||||
print(json.dumps({
|
||||
'mac': '$MAC', 'product': '$PRODUCT', 'board': '$BOARD', 'serial': '$SERIAL',
|
||||
'manufacturer': '$MANUFACTURER', 'cpu_model': '$CPUMODEL',
|
||||
'cpu_cores': int('$CPUCORES' or 0), 'memory_gb': int('$MEMGB' or 0),
|
||||
'arch': '$ARCHTYPE', 'disks': $DISKS_JSON, 'nics': $NICS_JSON
|
||||
}))
|
||||
")
|
||||
|
||||
curl -sf -X POST "${bastionUrl}/api/discover" \\
|
||||
-H "Content-Type: application/json" \\
|
||||
-d "$PAYLOAD" || true
|
||||
|
||||
echo ""
|
||||
echo "=== Discovery complete ==="
|
||||
echo "Machine MAC: $MAC"
|
||||
echo "Queue for install: labctl provision install $MAC <hostname> --role infra"
|
||||
echo "Then reboot to start installation."
|
||||
echo ""
|
||||
|
||||
# Write a minimal kickstart that just reboots
|
||||
cat > /tmp/dynamic.ks << 'DISCOVER_KS'
|
||||
# Discovery mode -- reboot to allow install queue
|
||||
reboot
|
||||
DISCOVER_KS
|
||||
|
||||
# Force reboot now (don't wait for Anaconda)
|
||||
sleep 3
|
||||
echo 1 > /proc/sys/kernel/sysrq
|
||||
echo b > /proc/sysrq-trigger
|
||||
sleep 5
|
||||
reboot -f
|
||||
;;
|
||||
esac
|
||||
|
||||
%end
|
||||
|
||||
# Include the dynamically chosen kickstart
|
||||
%include /tmp/dynamic.ks
|
||||
`;
|
||||
|
||||
return reply.type("text/plain").send(ks);
|
||||
});
|
||||
|
||||
// Ubuntu autoinstall user-data (cloud-init)
|
||||
app.get<{ Params: { mac: string } }>("/autoinstall/:mac/user-data", async (request, reply) => {
|
||||
const mac = request.params.mac.toLowerCase().replace(/-/g, ":");
|
||||
|
||||
@@ -11,6 +11,7 @@ import { logger } from "./services/logger.js";
|
||||
import { registerDispatchRoutes } from "./routes/dispatch.js";
|
||||
import { registerKickstartRoutes } from "./routes/kickstart.js";
|
||||
import { registerApiRoutes } from "./routes/api.js";
|
||||
import { registerAsahiRoutes } from "./routes/asahi.js";
|
||||
|
||||
|
||||
export function createApp(config: BastionConfig): { app: ReturnType<typeof Fastify>; state: StateManager; installLog: InstallLogBuffer; syslog: SyslogListener } {
|
||||
@@ -43,8 +44,9 @@ export function createApp(config: BastionConfig): { app: ReturnType<typeof Fasti
|
||||
|
||||
// Register route handlers
|
||||
registerDispatchRoutes(app, config, state);
|
||||
registerKickstartRoutes(app, config, state);
|
||||
registerApiRoutes(app, state, installLog);
|
||||
registerKickstartRoutes(app, config, state, syslog);
|
||||
registerApiRoutes(app, state, installLog, syslog);
|
||||
registerAsahiRoutes(app, config);
|
||||
// boot.iso is generated at startup and served as a static file from httpDir
|
||||
// (static serving supports HTTP Range requests, required by JetKVM streaming)
|
||||
|
||||
|
||||
@@ -165,6 +165,8 @@ export class BastionConnection {
|
||||
case "command-forget":
|
||||
case "command-role-update":
|
||||
case "command-debug":
|
||||
case "command-register":
|
||||
case "command-discover":
|
||||
void this.handleCommand(msg);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -30,6 +30,8 @@ export class SyslogListener {
|
||||
private port: number;
|
||||
private installLog: InstallLogBuffer;
|
||||
private state: StateManager;
|
||||
/** Explicit IP → MAC mapping registered from kickstart/progress requests. */
|
||||
private ipToMac = new Map<string, string>();
|
||||
|
||||
constructor(port: number, installLog: InstallLogBuffer, state: StateManager) {
|
||||
this.port = port;
|
||||
@@ -37,14 +39,21 @@ export class SyslogListener {
|
||||
this.state = state;
|
||||
}
|
||||
|
||||
/** Resolve a source IP to a MAC address using the install queue. */
|
||||
/** Register an IP → MAC mapping (called when we learn a machine's IP). */
|
||||
registerIp(ip: string, mac: string): void {
|
||||
this.ipToMac.set(ip, mac.toLowerCase());
|
||||
}
|
||||
|
||||
/** Resolve a source IP to a MAC address. */
|
||||
private resolveIpToMac(ip: string): string | null {
|
||||
// Check explicit mapping first (most reliable)
|
||||
const explicit = this.ipToMac.get(ip);
|
||||
if (explicit) return explicit;
|
||||
|
||||
const currentState = this.state.load();
|
||||
|
||||
// Check install queue — machines being installed have an IP from DHCP
|
||||
for (const [mac, entry] of Object.entries(currentState.install_queue)) {
|
||||
// The progress callback sends IP in "complete" detail, but during install
|
||||
// we need to match by what we know. Check if any progress mentions this IP.
|
||||
if (entry.progress_detail?.includes(ip)) return mac;
|
||||
}
|
||||
|
||||
|
||||
311
bastion/src/bastion/src/templates/asahi-firstboot.sh.ts
Normal file
311
bastion/src/bastion/src/templates/asahi-firstboot.sh.ts
Normal file
@@ -0,0 +1,311 @@
|
||||
// First-boot LVM setup script for Asahi-provisioned machines.
|
||||
// Embedded in the custom rootfs as a systemd service that runs once on first boot.
|
||||
// Creates the standard lab LVM layout on the data partition, matching install.ks.ts.
|
||||
|
||||
import type { Role } from "@lab/shared";
|
||||
|
||||
export interface AsahiFirstbootParams {
|
||||
hostname: string;
|
||||
role: Role;
|
||||
serverIp: string;
|
||||
httpPort: number;
|
||||
sshKeys: string[];
|
||||
adminUser: string;
|
||||
mac: string;
|
||||
}
|
||||
|
||||
export function renderFirstbootScript(params: AsahiFirstbootParams): string {
|
||||
const { hostname, role, serverIp, httpPort, sshKeys, adminUser, mac } = params;
|
||||
|
||||
const isWorker = role === "worker";
|
||||
const isInfra = role === "infra" || role === "labcontroller";
|
||||
|
||||
// Role-specific LV creation commands
|
||||
const roleLvLines: string[] = [];
|
||||
const roleFormatLines: string[] = [];
|
||||
const roleMountLines: string[] = [];
|
||||
const roleFstabLines: string[] = [];
|
||||
|
||||
if (isInfra) {
|
||||
roleLvLines.push('lvcreate -L 20480M -n rancher labvg -y');
|
||||
roleFormatLines.push('mkfs.xfs /dev/labvg/rancher');
|
||||
roleMountLines.push('mount_lv rancher /var/lib/rancher');
|
||||
roleFstabLines.push('echo "/dev/labvg/rancher /var/lib/rancher xfs defaults 0 0" >> /etc/fstab');
|
||||
}
|
||||
if (isWorker || isInfra) {
|
||||
roleLvLines.push('lvcreate -l 100%FREE -n longhorn labvg -y');
|
||||
roleFormatLines.push('mkfs.xfs /dev/labvg/longhorn');
|
||||
roleMountLines.push('mount_lv longhorn /var/lib/longhorn');
|
||||
roleFstabLines.push('echo "/dev/labvg/longhorn /var/lib/longhorn xfs defaults 0 0" >> /etc/fstab');
|
||||
}
|
||||
|
||||
// SSH key injection block (empty if no keys)
|
||||
const sshKeyBlock = sshKeys.length > 0
|
||||
? sshKeys.map(k => `echo '${k}' >> "$ADMIN_SSH/authorized_keys"`).join('\n')
|
||||
: 'true # no SSH keys configured';
|
||||
const rootSshKeyBlock = sshKeys.length > 0
|
||||
? sshKeys.map(k => `echo '${k}' >> /root/.ssh/authorized_keys`).join('\n')
|
||||
: 'true # no SSH keys configured';
|
||||
|
||||
// NOTE: All bash $ references use $VAR not \${VAR} to avoid TS template conflicts.
|
||||
// Where ${} is needed in bash, we use \\${...} to escape.
|
||||
return `#!/bin/bash
|
||||
# Lab first-boot LVM setup — generated by bastion
|
||||
# This script runs once on first boot via systemd, then disables itself.
|
||||
set -euo pipefail
|
||||
|
||||
MARKER="/etc/lab-lvm-setup-done"
|
||||
LOG="/var/log/lab-firstboot.log"
|
||||
|
||||
exec > >(tee -a "$LOG") 2>&1
|
||||
echo "=== Lab first-boot LVM setup ==="
|
||||
date
|
||||
|
||||
# Already done?
|
||||
if [ -f "$MARKER" ]; then
|
||||
echo "LVM setup already completed, skipping."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# ── Find the data partition ──────────────────────────────────────
|
||||
# The data partition/disk is a large block device that is NOT the root filesystem.
|
||||
# Handles: NVMe partitions, SCSI partitions, whole unpartitioned disks.
|
||||
ROOT_DEV=$(findmnt -n -o SOURCE / | sed 's/\\[.*\\]//') # strip btrfs subvol
|
||||
ROOT_DISK=$(lsblk -n -o PKNAME "$ROOT_DEV" 2>/dev/null | head -1)
|
||||
echo "Root device: $ROOT_DEV (disk: $ROOT_DISK)"
|
||||
|
||||
DATA_PART=""
|
||||
# Scan partitions first, then whole disks
|
||||
for part in /dev/nvme*n*p* /dev/sd*[0-9] /dev/vd*[0-9] /dev/nvme*n* /dev/sd[b-z] /dev/vd[b-z]; do
|
||||
[ -b "$part" ] || continue
|
||||
# Skip root device and root disk
|
||||
[ "$part" = "$ROOT_DEV" ] && continue
|
||||
PART_DISK=$(basename "$part" | sed 's/p[0-9]*$//' | sed 's/[0-9]*$//')
|
||||
[ "$PART_DISK" = "$ROOT_DISK" ] && continue
|
||||
# Skip small devices (<50GB) — EFI, boot, APFS stubs
|
||||
SIZE_BYTES=$(blockdev --getsize64 "$part" 2>/dev/null || echo 0)
|
||||
SIZE_GB=$((SIZE_BYTES / 1073741824))
|
||||
[ "$SIZE_GB" -lt 50 ] && continue
|
||||
# Use if unformatted or already LVM
|
||||
FSTYPE=$(blkid -o value -s TYPE "$part" 2>/dev/null || echo "")
|
||||
if [ -z "$FSTYPE" ] || [ "$FSTYPE" = "LVM2_member" ]; then
|
||||
DATA_PART="$part"
|
||||
echo "Found data device: $DATA_PART ($SIZE_GB GB)"
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [ -z "$DATA_PART" ]; then
|
||||
echo "ERROR: No suitable data partition found for LVM."
|
||||
echo "Expected a large (>50GB) unformatted partition."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# ── Helper function ──────────────────────────────────────────────
|
||||
mount_lv() {
|
||||
local lv="$1" mp="$2"
|
||||
if lvs "labvg/$lv" &>/dev/null; then
|
||||
mkdir -p "$mp"
|
||||
mount "/dev/labvg/$lv" "$mp" 2>/dev/null || true
|
||||
echo " Mounted $lv -> $mp"
|
||||
fi
|
||||
}
|
||||
|
||||
# ── Write fstab function (idempotent) ────────────────────────────
|
||||
write_lab_fstab() {
|
||||
# Remove any previous lab LVM entries (clean slate)
|
||||
sed -i '/# lab-lvm:/d' /etc/fstab
|
||||
sed -i '/# Lab LVM volumes/d' /etc/fstab
|
||||
grep -v "/dev/labvg/" /etc/fstab > /etc/fstab.tmp && mv /etc/fstab.tmp /etc/fstab
|
||||
# Comment out non-LVM entries for mount points we manage
|
||||
for mp in "/var " "/var/log " "/home " "/srv "; do
|
||||
if grep -q "$mp" /etc/fstab; then
|
||||
awk -v m="$mp" '{if($0 !~ /^#/ && index($0,m)) print "# lab-lvm: " $0; else print}' /etc/fstab > /etc/fstab.tmp
|
||||
mv /etc/fstab.tmp /etc/fstab
|
||||
fi
|
||||
done
|
||||
# Add fresh LVM entries
|
||||
echo "# Lab LVM volumes" >> /etc/fstab
|
||||
echo "/dev/labvg/swap none swap defaults 0 0" >> /etc/fstab
|
||||
echo "/dev/labvg/var /var xfs defaults 0 0" >> /etc/fstab
|
||||
echo "/dev/labvg/varlog /var/log xfs defaults 0 0" >> /etc/fstab
|
||||
echo "/dev/labvg/home /home xfs defaults 0 0" >> /etc/fstab
|
||||
echo "/dev/labvg/srv /srv xfs defaults 0 0" >> /etc/fstab
|
||||
${roleFstabLines.join('\n ')}
|
||||
}
|
||||
|
||||
# ── Check for existing VG ────────────────────────────────────────
|
||||
if vgs labvg &>/dev/null; then
|
||||
echo "Volume group 'labvg' already exists — reprovision detected."
|
||||
echo "Activating existing volumes..."
|
||||
vgchange -ay labvg
|
||||
|
||||
mount_lv var /var
|
||||
mount_lv varlog /var/log
|
||||
mount_lv home /home
|
||||
mount_lv srv /srv
|
||||
${roleMountLines.map(l => ` ${l}`).join('\n')}
|
||||
|
||||
# Enable swap
|
||||
if lvs labvg/swap &>/dev/null; then
|
||||
swapon /dev/labvg/swap 2>/dev/null || true
|
||||
echo " Enabled swap"
|
||||
fi
|
||||
|
||||
# Ensure fstab entries exist — comment out conflicting btrfs subvol entries
|
||||
write_lab_fstab
|
||||
|
||||
echo "Existing LVM volumes re-mounted."
|
||||
else
|
||||
# ── Fresh install: create LVM ────────────────────────────────────
|
||||
echo "Creating LVM on $DATA_PART..."
|
||||
|
||||
pvcreate "$DATA_PART"
|
||||
vgcreate labvg "$DATA_PART"
|
||||
|
||||
# Create LVs — sizes match install.ks.ts (in MiB)
|
||||
echo "Creating logical volumes..."
|
||||
lvcreate -L 27648M -n swap labvg -y # 27GB swap
|
||||
lvcreate -L 102400M -n var labvg -y # 100GB /var
|
||||
lvcreate -L 10240M -n varlog labvg -y # 10GB /var/log
|
||||
lvcreate -L 10240M -n home labvg -y # 10GB /home
|
||||
lvcreate -L 20480M -n srv labvg -y # 20GB /srv
|
||||
${roleLvLines.join('\n')}
|
||||
|
||||
# Format
|
||||
echo "Formatting volumes..."
|
||||
mkswap /dev/labvg/swap
|
||||
mkfs.xfs /dev/labvg/var
|
||||
mkfs.xfs /dev/labvg/varlog
|
||||
mkfs.xfs /dev/labvg/home
|
||||
mkfs.xfs /dev/labvg/srv
|
||||
${roleFormatLines.join('\n')}
|
||||
|
||||
# Migrate and mount volumes that can be switched live.
|
||||
# Copy existing content first so we don't shadow files (e.g. /home/user/.ssh).
|
||||
for LV_MOUNT in "home /home" "srv /srv"; do
|
||||
LV_NAME=$(echo "$LV_MOUNT" | awk '{print $1}')
|
||||
MOUNT_PT=$(echo "$LV_MOUNT" | awk '{print $2}')
|
||||
STAGING="/mnt/labvg-$LV_NAME-staging"
|
||||
mkdir -p "$STAGING"
|
||||
mount "/dev/labvg/$LV_NAME" "$STAGING"
|
||||
cp -a "$MOUNT_PT"/. "$STAGING/" 2>/dev/null || true
|
||||
umount "$STAGING"
|
||||
rmdir "$STAGING"
|
||||
mount_lv "$LV_NAME" "$MOUNT_PT"
|
||||
done
|
||||
|
||||
# Mount role-specific volumes (empty, no content to preserve)
|
||||
set +e
|
||||
${roleMountLines.join('\n')}
|
||||
set -e
|
||||
|
||||
# Copy existing /var content into the LV for next boot
|
||||
echo "Preparing /var LV for next boot..."
|
||||
TMPVAR="/mnt/labvg-var-staging"
|
||||
mkdir -p "$TMPVAR"
|
||||
mount /dev/labvg/var "$TMPVAR"
|
||||
cp -a /var/. "$TMPVAR/" 2>/dev/null || true
|
||||
umount "$TMPVAR"
|
||||
rmdir "$TMPVAR"
|
||||
|
||||
# Same for /var/log
|
||||
TMPVARLOG="/mnt/labvg-varlog-staging"
|
||||
mkdir -p "$TMPVARLOG"
|
||||
mount /dev/labvg/varlog "$TMPVARLOG"
|
||||
cp -a /var/log/. "$TMPVARLOG/" 2>/dev/null || true
|
||||
umount "$TMPVARLOG"
|
||||
rmdir "$TMPVARLOG"
|
||||
|
||||
echo "NOTE: /var and /var/log will switch to LVM on next reboot."
|
||||
|
||||
# Enable swap
|
||||
swapon /dev/labvg/swap 2>/dev/null || true
|
||||
|
||||
write_lab_fstab
|
||||
|
||||
echo "LVM setup complete."
|
||||
lvs labvg
|
||||
|
||||
fi # end if/else for reprovision vs fresh install
|
||||
|
||||
# ── Set hostname (use configured value, or keep existing) ────────
|
||||
CONF_HOSTNAME="${hostname}"
|
||||
if [ "$CONF_HOSTNAME" != "unknown" ] && [ -n "$CONF_HOSTNAME" ]; then
|
||||
hostnamectl set-hostname "$CONF_HOSTNAME"
|
||||
fi
|
||||
ACTUAL_HOSTNAME=$(hostname)
|
||||
|
||||
# ── Detect MAC address ───────────────────────────────────────────
|
||||
CONF_MAC="${mac}"
|
||||
if [ "$CONF_MAC" = "unknown" ] || [ -z "$CONF_MAC" ]; then
|
||||
CONF_MAC=$(ip -o link show | grep -v "lo:" | grep "state UP" | head -1 | grep -oP 'link/ether \\K[^ ]+' || echo "unknown")
|
||||
fi
|
||||
|
||||
# ── Configure admin user ─────────────────────────────────────────
|
||||
ADMIN="${adminUser}"
|
||||
if ! id "$ADMIN" &>/dev/null; then
|
||||
useradd -m -G wheel "$ADMIN"
|
||||
echo "$ADMIN ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/$ADMIN
|
||||
chmod 440 /etc/sudoers.d/$ADMIN
|
||||
fi
|
||||
ADMIN_SSH="/home/$ADMIN/.ssh"
|
||||
mkdir -p "$ADMIN_SSH"
|
||||
chmod 700 "$ADMIN_SSH"
|
||||
${sshKeyBlock}
|
||||
chmod 600 "$ADMIN_SSH/authorized_keys"
|
||||
chown -R $ADMIN:$ADMIN "$ADMIN_SSH"
|
||||
|
||||
# Also authorize root
|
||||
mkdir -p /root/.ssh
|
||||
chmod 700 /root/.ssh
|
||||
${rootSshKeyBlock}
|
||||
chmod 600 /root/.ssh/authorized_keys
|
||||
|
||||
# ── Harden SSH (takes effect on next sshd restart/reboot) ────────
|
||||
sed -i 's/^#*PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config
|
||||
sed -i 's/^#*PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
|
||||
|
||||
# ── Write provisioning metadata ──────────────────────────────────
|
||||
cat > /etc/lab-provisioned << LABMETA
|
||||
hostname=$ACTUAL_HOSTNAME
|
||||
role=${role}
|
||||
mac=$CONF_MAC
|
||||
provisioned_at=$(date -Iseconds)
|
||||
method=asahi-firstboot
|
||||
LABMETA
|
||||
|
||||
# ── Register with bastion ─────────────────────────────────────────
|
||||
IP=$(hostname -I | awk '{print $1}')
|
||||
echo "Registering with bastion at ${serverIp}:${httpPort}..."
|
||||
curl -sf -X POST "http://${serverIp}:${httpPort}/api/register" \\
|
||||
-H "Content-Type: application/json" \\
|
||||
-d "{\\"mac\\":\\"$CONF_MAC\\",\\"hostname\\":\\"$ACTUAL_HOSTNAME\\",\\"role\\":\\"${role}\\",\\"ip\\":\\"$IP\\"}" \\
|
||||
2>/dev/null && echo " Registered as $ACTUAL_HOSTNAME ($IP)" \\
|
||||
|| echo " WARNING: Could not reach bastion — register manually with: labctl provision register $CONF_MAC $ACTUAL_HOSTNAME --role ${role} --ip $IP"
|
||||
|
||||
# ── Mark done ────────────────────────────────────────────────────
|
||||
touch "$MARKER"
|
||||
echo "=== First-boot setup complete ==="
|
||||
`;
|
||||
}
|
||||
|
||||
/** Systemd unit file for the first-boot service */
|
||||
export function renderFirstbootUnit(): string {
|
||||
return `[Unit]
|
||||
Description=Lab first-boot LVM setup
|
||||
After=local-fs.target network-online.target
|
||||
Wants=network-online.target
|
||||
ConditionPathExists=!/etc/lab-lvm-setup-done
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/usr/local/bin/lab-firstboot.sh
|
||||
RemainAfterExit=yes
|
||||
StandardOutput=journal+console
|
||||
StandardError=journal+console
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
`;
|
||||
}
|
||||
@@ -102,6 +102,34 @@ boot
|
||||
`;
|
||||
}
|
||||
|
||||
/**
|
||||
* iPXE script for PXE-boot debug mode -- boots the installed system's root
|
||||
* filesystem using the bastion's PXE kernel+initrd instead of local GRUB.
|
||||
* Workaround for UEFI firmware bugs that make local disk boot slow.
|
||||
*/
|
||||
export function renderPxeBootDebugIpxe(params: {
|
||||
mac: string;
|
||||
hostname: string;
|
||||
serverIp: string;
|
||||
httpPort: number;
|
||||
}): string {
|
||||
return `#!ipxe
|
||||
|
||||
echo
|
||||
echo =============================================
|
||||
echo Lab PXE Bastion - PXE BOOT (debug)
|
||||
echo Target: ${params.hostname}
|
||||
echo MAC: ${params.mac}
|
||||
echo Kernel+initrd from PXE, root from NVMe
|
||||
echo =============================================
|
||||
echo
|
||||
|
||||
kernel http://${params.serverIp}:${params.httpPort}/vmlinuz root=/dev/mapper/labvg-root ro rd.lvm.lv=labvg/root rd.lvm.lv=labvg/swap console=tty0
|
||||
initrd http://${params.serverIp}:${params.httpPort}/initrd.img
|
||||
boot
|
||||
`;
|
||||
}
|
||||
|
||||
/**
|
||||
* iPXE script for already-installed machines -- exits to boot from local disk.
|
||||
*/
|
||||
|
||||
@@ -1,76 +1,33 @@
|
||||
// Debug/rescue kickstart template.
|
||||
// Minimal kickstart for Anaconda rescue mode.
|
||||
// When sshd=true: generates host keys, starts sshd, reports IP to bastion.
|
||||
// No dependency on mounted filesystems — fully self-contained.
|
||||
//
|
||||
// SSH access: Anaconda's inst.sshd starts sshd automatically.
|
||||
// The sshpw directive sets the password, sshkey adds authorized keys.
|
||||
// %pre/%post do NOT run in rescue mode — don't put setup code there.
|
||||
|
||||
export interface DebugKickstartParams {
|
||||
sshKeys: string[];
|
||||
sshd?: boolean;
|
||||
serverIp?: string;
|
||||
httpPort?: number;
|
||||
}
|
||||
|
||||
export function renderDebugKickstart(params: DebugKickstartParams): string {
|
||||
const sshpw = "sshpw --username=root --plaintext lab-root-pw";
|
||||
const sshkeyLine = params.sshKeys.length > 0
|
||||
? `sshkey --username=root "${params.sshKeys[0]}"`
|
||||
: "";
|
||||
|
||||
const sshdSetup = params.sshd ? `
|
||||
%post --nochroot --log=/tmp/debug-sshd.log
|
||||
#!/bin/bash
|
||||
set -x
|
||||
|
||||
# Generate host keys (self-contained, no mounted FS needed)
|
||||
ssh-keygen -t ed25519 -f /tmp/ssh_host_ed25519_key -N "" -q
|
||||
ssh-keygen -t rsa -f /tmp/ssh_host_rsa_key -N "" -q
|
||||
|
||||
# Write minimal sshd config
|
||||
cat > /tmp/sshd_config << 'SSHCFG'
|
||||
HostKey /tmp/ssh_host_ed25519_key
|
||||
HostKey /tmp/ssh_host_rsa_key
|
||||
PermitRootLogin yes
|
||||
PasswordAuthentication yes
|
||||
PubkeyAuthentication yes
|
||||
AuthorizedKeysFile /root/.ssh/authorized_keys
|
||||
SSHCFG
|
||||
|
||||
# Set root password for SSH access
|
||||
echo "root:debug" | chpasswd
|
||||
|
||||
# Set up SSH authorized keys
|
||||
mkdir -p /root/.ssh && chmod 700 /root/.ssh
|
||||
${params.sshKeys.map(k => `echo '${k}' >> /root/.ssh/authorized_keys`).join("\n")}
|
||||
chmod 600 /root/.ssh/authorized_keys 2>/dev/null || true
|
||||
|
||||
# Start sshd
|
||||
/usr/sbin/sshd -f /tmp/sshd_config -p 22
|
||||
echo "sshd started on port 22"
|
||||
|
||||
# Start persistent nc listener for remote shell
|
||||
(while true; do nc -l -p 2323 -e /bin/bash 2>/dev/null; done) &
|
||||
echo "nc shell listener on port 2323"
|
||||
|
||||
# Report IP to bastion
|
||||
sleep 2
|
||||
IP_ADDR=$(ip -4 addr show | awk '/inet / && !/127.0.0/ {split($2,a,"/"); print a[1]; exit}')
|
||||
MAC_ADDR=$(ip link show | awk '/ether/ && !/00:00:00:00/ {print $2; exit}')
|
||||
curl -sf -X POST "http://${params.serverIp}:${params.httpPort}/api/progress" \\
|
||||
-H "Content-Type: application/json" \\
|
||||
-d "{\\"mac\\":\\"$MAC_ADDR\\",\\"stage\\":\\"debug-ready\\",\\"detail\\":\\"ssh root@$IP_ADDR (pw: debug) | nc $IP_ADDR 2323\\"}" 2>/dev/null || true
|
||||
|
||||
echo "Debug environment ready: ssh root@$IP_ADDR or nc $IP_ADDR 2323"
|
||||
%end
|
||||
` : "";
|
||||
|
||||
return `# Lab Bastion -- Debug/Rescue Kickstart
|
||||
# Minimal: SSH + network for Anaconda rescue mode
|
||||
#
|
||||
# SSH is started by Anaconda (inst.sshd kernel param).
|
||||
# Password: debug | SSH keys from bastion config.
|
||||
# %pre/%post do NOT run in rescue mode.
|
||||
|
||||
lang en_US.UTF-8
|
||||
keyboard uk
|
||||
network --bootproto=dhcp --activate
|
||||
|
||||
${sshpw}
|
||||
sshpw --username=root --plaintext debug
|
||||
${sshkeyLine}
|
||||
${sshdSetup}`;
|
||||
`;
|
||||
}
|
||||
|
||||
@@ -134,10 +134,9 @@ network --bootproto=dhcp --activate --hostname=${fqdn}
|
||||
${auth}
|
||||
${userDirective}
|
||||
|
||||
bootloader --append="console=tty0 console=ttyS0,115200n8"
|
||||
bootloader --append="console=tty0"
|
||||
|
||||
# logging --host=${serverIp} --port=${syslogPort}
|
||||
# Disabled: syslog UDP port needs to be exposed in k3s service/hostPort first
|
||||
logging --host=${serverIp} --port=${syslogPort}
|
||||
|
||||
url --mirrorlist=https://mirrors.fedoraproject.org/mirrorlist?repo=fedora-$releasever&arch=$basearch
|
||||
|
||||
@@ -342,17 +341,7 @@ echo "tmpfs /tmp tmpfs defaults,noatime,nosuid,nodev,size=4G 0 0" >> /etc/fstab
|
||||
|
||||
${isVanilla ? `# -- vanilla role: skip k3s kernel/sysctl/firewall setup --
|
||||
# -- Enable chronyd for time sync --
|
||||
systemctl enable chronyd || true
|
||||
|
||||
# -- Serial console (for debugging — auto-login as root on ttyS0) --
|
||||
# AWS EC2 compatible: ttyS0 @ 115200n8
|
||||
systemctl enable serial-getty@ttyS0.service || true
|
||||
|
||||
# -- Forward all system logs to serial console --
|
||||
cat > /etc/rsyslog.d/serial-console.conf << 'RSYSLOG'
|
||||
*.* /dev/ttyS0
|
||||
RSYSLOG
|
||||
systemctl enable rsyslog || true` : `# -- Kernel modules for k3s --
|
||||
systemctl enable chronyd || true` : `# -- Kernel modules for k3s --
|
||||
cat > /etc/modules-load.d/k3s.conf << 'MODULES'
|
||||
br_netfilter
|
||||
overlay
|
||||
@@ -396,6 +385,9 @@ fi
|
||||
|
||||
bastion_progress "post-install" "3-bootorder done"
|
||||
|
||||
# -- Enable SysRq magic keys (for emergency reboot via Alt+SysRq+REISUB) --
|
||||
echo "kernel.sysrq=1" > /etc/sysctl.d/90-sysrq.conf
|
||||
|
||||
# -- Provisioning metadata --
|
||||
cat > /etc/lab-provisioned << PROVEOF
|
||||
hostname: ${fqdn}
|
||||
|
||||
225
bastion/src/bastion/tests/asahi.test.ts
Normal file
225
bastion/src/bastion/tests/asahi.test.ts
Normal file
@@ -0,0 +1,225 @@
|
||||
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
||||
import { mkdirSync, rmSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { tmpdir } from "node:os";
|
||||
import type { BastionConfig } from "@lab/shared";
|
||||
import { createApp } from "../src/server.js";
|
||||
import type { FastifyInstance } from "fastify";
|
||||
import { renderFirstbootScript, renderFirstbootUnit } from "../src/templates/asahi-firstboot.sh.js";
|
||||
|
||||
function createTestConfig(testDir: string): BastionConfig {
|
||||
return {
|
||||
fedoraVersion: "43",
|
||||
arch: "x86_64",
|
||||
httpPort: 0,
|
||||
timezone: "Europe/London",
|
||||
locale: "en_GB.UTF-8",
|
||||
bastionDir: testDir,
|
||||
domain: "test.local",
|
||||
dhcpMode: "proxy",
|
||||
dhcpRangeStart: "",
|
||||
dhcpRangeEnd: "",
|
||||
ubuntuVersion: "26.04",
|
||||
ubuntuMirror: "https://releases.ubuntu.com/26.04",
|
||||
iface: "eth0",
|
||||
serverIp: "192.168.8.1",
|
||||
network: "192.168.8.0",
|
||||
gateway: "192.168.8.1",
|
||||
sshKeys: ["ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAITEST test@lab"],
|
||||
adminUser: "michal",
|
||||
syslogPort: 15514,
|
||||
skipDnsmasq: true,
|
||||
skipArtifacts: true,
|
||||
fedoraMirror: "https://download.fedoraproject.org/pub/fedora/linux/releases/43/Everything/x86_64/os",
|
||||
tftpDir: join(testDir, "tftp"),
|
||||
httpDir: join(testDir, "http"),
|
||||
stateFile: join(testDir, "state.json"),
|
||||
};
|
||||
}
|
||||
|
||||
describe("asahi routes", () => {
|
||||
let testDir: string;
|
||||
let app: FastifyInstance;
|
||||
|
||||
beforeEach(() => {
|
||||
testDir = join(tmpdir(), `bastion-asahi-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
|
||||
mkdirSync(testDir, { recursive: true });
|
||||
mkdirSync(join(testDir, "http"), { recursive: true });
|
||||
mkdirSync(join(testDir, "tftp"), { recursive: true });
|
||||
|
||||
const config = createTestConfig(testDir);
|
||||
const result = createApp(config);
|
||||
app = result.app;
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await app.close();
|
||||
rmSync(testDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it("GET /asahi returns wrapper shell script", async () => {
|
||||
const resp = await app.inject({ method: "GET", url: "/asahi" });
|
||||
expect(resp.statusCode).toBe(200);
|
||||
expect(resp.headers["content-type"]).toContain("text/x-shellscript");
|
||||
expect(resp.body).toContain("#!/bin/bash");
|
||||
expect(resp.body).toContain("installer_data.json");
|
||||
expect(resp.body).toContain("192.168.8.1");
|
||||
expect(resp.body).toContain("install.sh");
|
||||
});
|
||||
|
||||
it("GET /asahi/installer_data.json returns valid config", async () => {
|
||||
const resp = await app.inject({ method: "GET", url: "/asahi/installer_data.json" });
|
||||
expect(resp.statusCode).toBe(200);
|
||||
const data = JSON.parse(resp.body);
|
||||
|
||||
expect(data.os_list).toHaveLength(1);
|
||||
const os = data.os_list[0];
|
||||
expect(os.name).toContain("Fedora Asahi Lab");
|
||||
|
||||
// 3 partitions (fallback) or 4 (built: EFI + Boot + Root + Data)
|
||||
expect(os.partitions.length).toBeGreaterThanOrEqual(3);
|
||||
expect(os.partitions[0].type).toBe("EFI");
|
||||
// Last partition should be the expanding Data partition
|
||||
const lastPart = os.partitions[os.partitions.length - 1];
|
||||
expect(lastPart.type).toBe("Linux");
|
||||
expect(lastPart.expand).toBe(true);
|
||||
// Root partition (second-to-last) should NOT expand
|
||||
const rootPart = os.partitions[os.partitions.length - 2];
|
||||
expect(rootPart.expand).toBe(false);
|
||||
expect(rootPart.image).toBe("root.img");
|
||||
});
|
||||
|
||||
it("GET /asahi/firstboot.sh returns parameterized script", async () => {
|
||||
const resp = await app.inject({
|
||||
method: "GET",
|
||||
url: "/asahi/firstboot.sh?hostname=mac-studio&role=infra&mac=00:11:22:33:44:55",
|
||||
});
|
||||
expect(resp.statusCode).toBe(200);
|
||||
expect(resp.body).toContain("#!/bin/bash");
|
||||
expect(resp.body).toContain("mac-studio");
|
||||
expect(resp.body).toContain("labvg");
|
||||
expect(resp.body).toContain("rancher"); // infra gets rancher LV
|
||||
expect(resp.body).toContain("longhorn"); // infra also gets longhorn
|
||||
expect(resp.body).toContain("ssh-ed25519"); // SSH key injected
|
||||
});
|
||||
|
||||
it("GET /asahi/firstboot.service returns systemd unit", async () => {
|
||||
const resp = await app.inject({ method: "GET", url: "/asahi/firstboot.service" });
|
||||
expect(resp.statusCode).toBe(200);
|
||||
expect(resp.body).toContain("[Unit]");
|
||||
expect(resp.body).toContain("lab-firstboot.sh");
|
||||
expect(resp.body).toContain("ConditionPathExists=!/etc/lab-lvm-setup-done");
|
||||
});
|
||||
});
|
||||
|
||||
describe("renderFirstbootScript", () => {
|
||||
const baseParams = {
|
||||
hostname: "test-node",
|
||||
serverIp: "10.0.0.1",
|
||||
httpPort: 8080,
|
||||
sshKeys: ["ssh-ed25519 AAAA... user@host"],
|
||||
adminUser: "testadmin",
|
||||
mac: "aa:bb:cc:dd:ee:ff",
|
||||
};
|
||||
|
||||
it("generates valid bash with shebang", () => {
|
||||
const script = renderFirstbootScript({ ...baseParams, role: "worker" });
|
||||
expect(script.startsWith("#!/bin/bash")).toBe(true);
|
||||
});
|
||||
|
||||
it("includes LVM creation commands", () => {
|
||||
const script = renderFirstbootScript({ ...baseParams, role: "infra" });
|
||||
expect(script).toContain("pvcreate");
|
||||
expect(script).toContain("vgcreate labvg");
|
||||
expect(script).toContain("lvcreate");
|
||||
});
|
||||
|
||||
it("uses correct LV sizes from kickstart layout", () => {
|
||||
const script = renderFirstbootScript({ ...baseParams, role: "infra" });
|
||||
expect(script).toContain("27648M"); // swap
|
||||
expect(script).toContain("102400M"); // /var
|
||||
expect(script).toContain("10240M"); // /var/log and /home
|
||||
expect(script).toContain("20480M"); // /srv and /rancher
|
||||
});
|
||||
|
||||
it("includes rancher LV for infra role", () => {
|
||||
const script = renderFirstbootScript({ ...baseParams, role: "infra" });
|
||||
expect(script).toContain("rancher");
|
||||
expect(script).toContain("/var/lib/rancher");
|
||||
});
|
||||
|
||||
it("includes longhorn for worker role", () => {
|
||||
const script = renderFirstbootScript({ ...baseParams, role: "worker" });
|
||||
expect(script).toContain("longhorn");
|
||||
expect(script).toContain("/var/lib/longhorn");
|
||||
// Worker should NOT have rancher
|
||||
expect(script).not.toContain("rancher");
|
||||
});
|
||||
|
||||
it("includes longhorn for infra role", () => {
|
||||
const script = renderFirstbootScript({ ...baseParams, role: "infra" });
|
||||
expect(script).toContain("longhorn");
|
||||
expect(script).toContain("/var/lib/longhorn");
|
||||
});
|
||||
|
||||
it("vanilla role gets no role-specific LVs", () => {
|
||||
const script = renderFirstbootScript({ ...baseParams, role: "vanilla" });
|
||||
expect(script).not.toContain("rancher");
|
||||
expect(script).not.toContain("longhorn");
|
||||
});
|
||||
|
||||
it("handles reprovision (existing labvg)", () => {
|
||||
const script = renderFirstbootScript({ ...baseParams, role: "infra" });
|
||||
expect(script).toContain("reprovision detected");
|
||||
expect(script).toContain("vgchange -ay labvg");
|
||||
expect(script).toContain("mount_lv var /var");
|
||||
});
|
||||
|
||||
it("injects SSH keys for admin user and root", () => {
|
||||
const script = renderFirstbootScript({ ...baseParams, role: "worker" });
|
||||
expect(script).toContain("ssh-ed25519 AAAA...");
|
||||
expect(script).toContain("testadmin");
|
||||
expect(script).toContain("/root/.ssh/authorized_keys");
|
||||
});
|
||||
|
||||
it("sets hostname", () => {
|
||||
const script = renderFirstbootScript({ ...baseParams, role: "worker" });
|
||||
expect(script).toContain('CONF_HOSTNAME="test-node"');
|
||||
expect(script).toContain("hostnamectl set-hostname");
|
||||
});
|
||||
|
||||
it("includes bastion self-registration", () => {
|
||||
const script = renderFirstbootScript({ ...baseParams, role: "worker" });
|
||||
expect(script).toContain("/api/register");
|
||||
expect(script).toContain("aa:bb:cc:dd:ee:ff");
|
||||
expect(script).toContain("test-node");
|
||||
});
|
||||
|
||||
it("writes provisioning metadata", () => {
|
||||
const script = renderFirstbootScript({ ...baseParams, role: "infra" });
|
||||
expect(script).toContain("/etc/lab-provisioned");
|
||||
expect(script).toContain("method=asahi-firstboot");
|
||||
});
|
||||
|
||||
it("creates marker file to prevent re-run", () => {
|
||||
const script = renderFirstbootScript({ ...baseParams, role: "worker" });
|
||||
expect(script).toContain("/etc/lab-lvm-setup-done");
|
||||
expect(script).toContain('touch "$MARKER"');
|
||||
});
|
||||
});
|
||||
|
||||
describe("renderFirstbootUnit", () => {
|
||||
it("generates valid systemd unit", () => {
|
||||
const unit = renderFirstbootUnit();
|
||||
expect(unit).toContain("[Unit]");
|
||||
expect(unit).toContain("[Service]");
|
||||
expect(unit).toContain("[Install]");
|
||||
expect(unit).toContain("Type=oneshot");
|
||||
expect(unit).toContain("WantedBy=multi-user.target");
|
||||
});
|
||||
|
||||
it("only runs when marker is missing", () => {
|
||||
const unit = renderFirstbootUnit();
|
||||
expect(unit).toContain("ConditionPathExists=!/etc/lab-lvm-setup-done");
|
||||
});
|
||||
});
|
||||
@@ -28,6 +28,7 @@ function createTestConfig(testDir: string): BastionConfig {
|
||||
gateway: "10.0.0.1",
|
||||
sshKeys: ["ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAITEST test@test"],
|
||||
adminUser: "testadmin",
|
||||
syslogPort: 15514,
|
||||
skipDnsmasq: true,
|
||||
skipArtifacts: true,
|
||||
fedoraMirror: "https://download.fedoraproject.org/pub/fedora/linux/releases/43/Everything/x86_64/os",
|
||||
|
||||
@@ -206,10 +206,8 @@ describe("renderInstallKickstart", () => {
|
||||
}
|
||||
});
|
||||
|
||||
it("forwards system logs to serial console", () => {
|
||||
it("does not include serial console (causes 30s boot timeout on hardware without UART)", () => {
|
||||
const ks = renderInstallKickstart(baseParams({ role: "vanilla" }));
|
||||
expect(ks).toContain("serial-console.conf");
|
||||
expect(ks).toContain("/dev/ttyS0");
|
||||
expect(ks).toContain("rsyslog");
|
||||
expect(ks).not.toContain("ttyS0");
|
||||
});
|
||||
});
|
||||
|
||||
121
bastion/src/bastion/tests/syslog-listener.test.ts
Normal file
121
bastion/src/bastion/tests/syslog-listener.test.ts
Normal file
@@ -0,0 +1,121 @@
|
||||
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
||||
import { createSocket } from "node:dgram";
|
||||
import { mkdtempSync, rmSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { tmpdir } from "node:os";
|
||||
import { SyslogListener } from "../src/services/syslog-listener.js";
|
||||
import { InstallLogBuffer } from "../src/services/install-log.js";
|
||||
import { StateManager } from "../src/services/state.js";
|
||||
|
||||
function sendUdpSyslog(port: number, message: string): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const client = createSocket("udp4");
|
||||
const buf = Buffer.from(message);
|
||||
client.send(buf, 0, buf.length, port, "127.0.0.1", (err) => {
|
||||
client.close();
|
||||
if (err) reject(err);
|
||||
else resolve();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
describe("SyslogListener", () => {
|
||||
let tmpDir: string;
|
||||
let state: StateManager;
|
||||
let installLog: InstallLogBuffer;
|
||||
let syslog: SyslogListener;
|
||||
const PORT = 15514; // use non-privileged port for testing
|
||||
|
||||
beforeEach(() => {
|
||||
tmpDir = mkdtempSync(join(tmpdir(), "syslog-test-"));
|
||||
state = new StateManager(join(tmpDir, "state.json"));
|
||||
state.init();
|
||||
installLog = new InstallLogBuffer(tmpDir);
|
||||
syslog = new SyslogListener(PORT, installLog, state);
|
||||
syslog.start();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
syslog.stop();
|
||||
rmSync(tmpDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it("receives and stores syslog messages for registered IP", async () => {
|
||||
const mac = "aa:bb:cc:dd:ee:ff";
|
||||
// Queue a machine so hostname can be resolved
|
||||
state.update((s) => {
|
||||
s.install_queue[mac] = {
|
||||
hostname: "testnode",
|
||||
disk: "/dev/sda",
|
||||
role: "worker",
|
||||
os: "fedora-43",
|
||||
queued_at: new Date().toISOString(),
|
||||
};
|
||||
});
|
||||
|
||||
// Register IP → MAC mapping
|
||||
syslog.registerIp("127.0.0.1", mac);
|
||||
|
||||
// Send a syslog message (RFC 3164 format)
|
||||
await sendUdpSyslog(PORT, "<13>Mar 30 01:30:00 localhost anaconda[1234]: Installing package vim-enhanced");
|
||||
|
||||
// Wait for UDP delivery
|
||||
await new Promise((r) => setTimeout(r, 200));
|
||||
|
||||
const lines = installLog.getLines(mac);
|
||||
expect(lines.length).toBeGreaterThan(0);
|
||||
expect(lines[0]!.line).toContain("anaconda");
|
||||
expect(lines[0]!.line).toContain("Installing package vim-enhanced");
|
||||
});
|
||||
|
||||
it("ignores messages from unknown IPs", async () => {
|
||||
// Don't register any IP mapping
|
||||
await sendUdpSyslog(PORT, "<13>Mar 30 01:30:00 localhost anaconda[1234]: test message");
|
||||
await new Promise((r) => setTimeout(r, 200));
|
||||
|
||||
// No MAC to check, but the listener should not crash
|
||||
// and no logs should be stored for any MAC
|
||||
expect(installLog.lineCount("unknown")).toBe(0);
|
||||
});
|
||||
|
||||
it("resolves IP from installed machines state", async () => {
|
||||
const mac = "11:22:33:44:55:66";
|
||||
state.update((s) => {
|
||||
s.installed[mac] = {
|
||||
hostname: "installed-node",
|
||||
role: "worker",
|
||||
ip: "127.0.0.1",
|
||||
installed_at: new Date().toISOString(),
|
||||
};
|
||||
});
|
||||
|
||||
await sendUdpSyslog(PORT, "<14>Mar 30 02:00:00 installed-node sshd[5678]: Accepted publickey for root");
|
||||
await new Promise((r) => setTimeout(r, 200));
|
||||
|
||||
const lines = installLog.getLines(mac);
|
||||
expect(lines.length).toBeGreaterThan(0);
|
||||
expect(lines[0]!.line).toContain("sshd");
|
||||
});
|
||||
|
||||
it("parses various syslog formats", async () => {
|
||||
const mac = "aa:bb:cc:dd:ee:ff";
|
||||
syslog.registerIp("127.0.0.1", mac);
|
||||
state.update((s) => {
|
||||
s.install_queue[mac] = {
|
||||
hostname: "testnode",
|
||||
disk: "/dev/sda",
|
||||
role: "worker",
|
||||
os: "fedora-43",
|
||||
queued_at: new Date().toISOString(),
|
||||
};
|
||||
});
|
||||
|
||||
// Message without PID
|
||||
await sendUdpSyslog(PORT, "<13>Mar 30 01:30:00 localhost kernel: NVMe device ready");
|
||||
await new Promise((r) => setTimeout(r, 200));
|
||||
|
||||
const lines = installLog.getLines(mac);
|
||||
expect(lines.length).toBeGreaterThan(0);
|
||||
expect(lines[0]!.line).toContain("kernel");
|
||||
});
|
||||
});
|
||||
@@ -94,8 +94,24 @@ export class LabdClient {
|
||||
return this.request("POST", "/api/machines/install", { body: opts });
|
||||
}
|
||||
|
||||
async debugMachine(mac: string, opts?: { sshd?: boolean }): Promise<{ status: string; data?: { mac: string; hostname: string }; error?: string }> {
|
||||
return this.request("POST", "/api/machines/debug", { body: { mac, sshd: opts?.sshd } });
|
||||
async registerMachine(opts: {
|
||||
mac: string; hostname: string; role?: string; ip?: string;
|
||||
}): Promise<{ status: string; data?: unknown; error?: string }> {
|
||||
return this.request("POST", "/api/machines/register", { body: opts });
|
||||
}
|
||||
|
||||
async debugMachine(mac: string, opts?: { pxeBoot?: boolean }): Promise<{ status: string; data?: { mac: string; hostname: string }; error?: string }> {
|
||||
return this.request("POST", "/api/machines/debug", { body: { mac, pxeBoot: opts?.pxeBoot } });
|
||||
}
|
||||
|
||||
async discoverMachine(data: {
|
||||
mac: string; product?: string; board?: string; serial?: string;
|
||||
manufacturer?: string; cpu_model?: string; cpu_cores?: number;
|
||||
memory_gb?: number; arch?: string;
|
||||
disks?: Array<{ name: string; size_gb: number; model: string }>;
|
||||
nics?: Array<{ name: string; mac: string; state: string }>;
|
||||
}): Promise<{ status: string; error?: string }> {
|
||||
return this.request("POST", "/api/machines/discover", { body: data });
|
||||
}
|
||||
|
||||
async forgetMachine(mac: string): Promise<{ status: string }> {
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
// CLI command: labctl app k3s install/health <target>
|
||||
// Install or check k3s on a target machine via SSH.
|
||||
|
||||
import { existsSync } from "node:fs";
|
||||
import { existsSync, writeFileSync, mkdirSync } from "node:fs";
|
||||
import { homedir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { execSync } from "node:child_process";
|
||||
import type { Command } from "commander";
|
||||
import type { BastionState } from "@lab/shared";
|
||||
import { K3sModule, sshExec } from "@lab/modules";
|
||||
@@ -69,7 +70,7 @@ export function registerAppCommand(program: Command): void {
|
||||
.command("install <target>")
|
||||
.description("Install k3s on a target machine (hostname, IP, or MAC)")
|
||||
.option("--role <role>", "k3s role: infra (server) or worker (agent)", "infra")
|
||||
.option("--user <user>", "SSH user", "michal")
|
||||
.option("--user <user>", "SSH user", "root")
|
||||
.option("--k3s-server <url>", "k3s server URL (required for worker role)")
|
||||
.option("--k3s-token <token>", "k3s join token (required for worker role)")
|
||||
.action(async (target: string, opts: {
|
||||
@@ -163,7 +164,7 @@ export function registerAppCommand(program: Command): void {
|
||||
k3sCmd
|
||||
.command("health [target]")
|
||||
.description("Check k3s health (all hosts if no target given)")
|
||||
.option("--user <user>", "SSH user", "michal")
|
||||
.option("--user <user>", "SSH user", "root")
|
||||
.action(async (target: string | undefined, opts: { user: string }) => {
|
||||
const sshKey = findSshKey();
|
||||
|
||||
@@ -303,7 +304,7 @@ export function registerAppCommand(program: Command): void {
|
||||
k3sCmd
|
||||
.command("list")
|
||||
.description("List installed machines and their k3s status")
|
||||
.option("--user <user>", "SSH user", "michal")
|
||||
.option("--user <user>", "SSH user", "root")
|
||||
.action(async (opts: { user: string }) => {
|
||||
let state: BastionState;
|
||||
try {
|
||||
@@ -400,4 +401,88 @@ export function registerAppCommand(program: Command): void {
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
k3sCmd
|
||||
.command("kubeconfig <target>")
|
||||
.description("Fetch kubeconfig from a target and merge into ~/.kube/config")
|
||||
.option("--user <user>", "SSH user", "root")
|
||||
.option("--context <name>", "Context name (defaults to hostname)")
|
||||
.option("--print", "Print kubeconfig to stdout instead of merging")
|
||||
.action(async (target: string, opts: {
|
||||
user: string;
|
||||
context?: string;
|
||||
print?: boolean;
|
||||
}) => {
|
||||
const state = await fetchState();
|
||||
const resolved = resolveTarget(target, state);
|
||||
|
||||
if (!resolved) {
|
||||
console.error(`Cannot resolve target: ${target}`);
|
||||
console.error("Provide an IP address, hostname, or MAC of an installed machine.");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const sshKey = findSshKey();
|
||||
|
||||
// Fetch kubeconfig via SSH
|
||||
let raw: string;
|
||||
try {
|
||||
const result = await sshExec(resolved.ip, opts.user, "cat /etc/rancher/k3s/k3s.yaml", {
|
||||
...(sshKey ? { keyPath: sshKey } : {}),
|
||||
timeoutMs: 10_000,
|
||||
});
|
||||
raw = result.stdout;
|
||||
} catch (err) {
|
||||
console.error(`Failed to fetch kubeconfig: ${err instanceof Error ? err.message : String(err)}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const contextName = opts.context ?? resolved.hostname;
|
||||
|
||||
// Rewrite: replace 127.0.0.1 with actual IP, rename cluster/user/context
|
||||
const rewritten = raw
|
||||
.replace(/server:\s*https:\/\/127\.0\.0\.1:/, `server: https://${resolved.ip}:`)
|
||||
.replace(/name:\s*default/g, `name: ${contextName}`)
|
||||
.replace(/cluster:\s*default/g, `cluster: ${contextName}`)
|
||||
.replace(/user:\s*default/g, `user: ${contextName}`)
|
||||
.replace(/current-context:\s*default/, `current-context: ${contextName}`);
|
||||
|
||||
if (opts.print) {
|
||||
process.stdout.write(rewritten);
|
||||
return;
|
||||
}
|
||||
|
||||
// Merge into ~/.kube/config using kubectl
|
||||
const kubeDir = join(homedir(), ".kube");
|
||||
mkdirSync(kubeDir, { recursive: true });
|
||||
const mainConfig = join(kubeDir, "config");
|
||||
const tmpFile = join(kubeDir, `.labctl-${contextName}.tmp`);
|
||||
|
||||
writeFileSync(tmpFile, rewritten, { mode: 0o600 });
|
||||
|
||||
try {
|
||||
if (existsSync(mainConfig)) {
|
||||
const merged = execSync(
|
||||
`KUBECONFIG="${mainConfig}:${tmpFile}" kubectl config view --flatten`,
|
||||
{ encoding: "utf-8" },
|
||||
);
|
||||
writeFileSync(mainConfig, merged, { mode: 0o600 });
|
||||
} else {
|
||||
writeFileSync(mainConfig, rewritten, { mode: 0o600 });
|
||||
}
|
||||
|
||||
// Set current context
|
||||
execSync(`kubectl config use-context ${contextName}`, { stdio: "pipe" });
|
||||
|
||||
console.log(`Merged kubeconfig for ${contextName} (${resolved.ip})`);
|
||||
console.log(`Context set to: ${contextName}`);
|
||||
console.log(`\nSwitch contexts: kubectl config use-context <name>`);
|
||||
} catch (err) {
|
||||
console.error(`Failed to merge kubeconfig: ${err instanceof Error ? err.message : String(err)}`);
|
||||
console.error(`Standalone config saved at: ${tmpFile}`);
|
||||
process.exit(1);
|
||||
} finally {
|
||||
try { const { unlinkSync } = await import("node:fs"); unlinkSync(tmpFile); } catch { /* ignore */ }
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
69
bastion/src/cli/src/commands/asahi.ts
Normal file
69
bastion/src/cli/src/commands/asahi.ts
Normal file
@@ -0,0 +1,69 @@
|
||||
// CLI command: provision asahi
|
||||
// Prints the curl command to run on the Mac Studio (macOS) to install
|
||||
// Fedora Asahi Remix with lab LVM layout.
|
||||
|
||||
import type { Command } from "commander";
|
||||
import { getLabdClient } from "../api/config.js";
|
||||
|
||||
export function registerAsahiCommand(parent: Command): void {
|
||||
parent
|
||||
.command("asahi")
|
||||
.description("Show instructions to provision an Apple Silicon Mac with Asahi Linux")
|
||||
.action(async () => {
|
||||
// Try to get bastion info to determine the correct URL
|
||||
let bastionUrl = "";
|
||||
try {
|
||||
const bastions = await getLabdClient().getBastions();
|
||||
const online = bastions.find(b => b.status === "online");
|
||||
if (online) {
|
||||
bastionUrl = `http://${online.serverIp}:8080`;
|
||||
}
|
||||
} catch { /* labd not reachable */ }
|
||||
|
||||
if (!bastionUrl) {
|
||||
// Fall back to config
|
||||
const { loadConfig } = await import("../config/index.js");
|
||||
const config = loadConfig();
|
||||
bastionUrl = config.labdUrl ?? "http://<bastion-ip>:8080";
|
||||
// Convert labd URL to bastion URL (labd is on different port/host)
|
||||
bastionUrl = bastionUrl.replace(/:\d+$/, ":8080");
|
||||
}
|
||||
|
||||
const BOLD = "\x1b[1m";
|
||||
const CYAN = "\x1b[36m";
|
||||
const DIM = "\x1b[2m";
|
||||
const RESET = "\x1b[0m";
|
||||
|
||||
console.log("");
|
||||
console.log(`${BOLD} Asahi Linux Provisioning${RESET}`);
|
||||
console.log(`${DIM} For Apple Silicon Macs (Mac Studio, MacBook, etc.)${RESET}`);
|
||||
console.log("");
|
||||
console.log(` Run this command ${BOLD}on the Mac${RESET} (from macOS Terminal):`);
|
||||
console.log("");
|
||||
console.log(` ${CYAN}${BOLD}curl ${bastionUrl}/asahi | sh${RESET}`);
|
||||
console.log("");
|
||||
console.log(` The installer will ask a few interactive questions:`);
|
||||
console.log(` ${BOLD}1.${RESET} Action: press ${BOLD}r${RESET} to resize macOS`);
|
||||
console.log(` ${BOLD}2.${RESET} How much space for Linux: choose maximum`);
|
||||
console.log(` ${BOLD}3.${RESET} Confirm the resize operation`);
|
||||
console.log(` ${BOLD}4.${RESET} macOS password for firmware authentication`);
|
||||
console.log("");
|
||||
console.log(` After that, everything is automatic:`);
|
||||
console.log(` - Asahi boot infrastructure (m1n1 + U-Boot)`);
|
||||
console.log(` - Fedora Asahi Remix root partition`);
|
||||
console.log(` - LVM data partition (remaining space)`);
|
||||
console.log("");
|
||||
console.log(` On first boot, LVM volumes are created automatically:`);
|
||||
console.log(` ${DIM}labvg/swap (27GB), labvg/var (100GB), labvg/varlog (10GB),`);
|
||||
console.log(` labvg/home (10GB), labvg/srv (20GB), labvg/rancher (20GB),`);
|
||||
console.log(` labvg/longhorn (remaining space)${RESET}`);
|
||||
console.log("");
|
||||
console.log(` After first boot, SSH in and run the firstboot script:`);
|
||||
console.log(` ${BOLD}ssh root@<ip> 'curl -sf ${bastionUrl}/asahi/firstboot.sh | bash'${RESET}`);
|
||||
console.log("");
|
||||
console.log(` This sets up LVM, detects hostname/MAC, and self-registers.`);
|
||||
console.log(` Then install k3s:`);
|
||||
console.log(` ${BOLD}labctl app k3s install <hostname> --role infra${RESET}`);
|
||||
console.log("");
|
||||
});
|
||||
}
|
||||
@@ -48,9 +48,9 @@ export function registerDebugCommand(parent: Command): void {
|
||||
parent
|
||||
.command("debug <target>")
|
||||
.description("PXE boot into Fedora rescue mode for debugging (target: hostname, MAC, or IP)")
|
||||
.option("--sshd", "Start SSH + nc listener automatically, report IP to bastion")
|
||||
.option("--pxe-boot", "Boot installed system via PXE (kernel+initrd from network, root from NVMe)")
|
||||
.showHelpAfterError(true)
|
||||
.action(async (target: string, opts: { sshd?: boolean }) => {
|
||||
.action(async (target: string, opts: { pxeBoot?: boolean }) => {
|
||||
const client = getLabdClient();
|
||||
|
||||
// Resolve target from labd aggregated state
|
||||
@@ -74,7 +74,7 @@ export function registerDebugCommand(parent: Command): void {
|
||||
console.log(`Queuing debug mode for ${hostname} (${mac})...`);
|
||||
|
||||
try {
|
||||
const result = await client.debugMachine(mac, { sshd: opts.sshd });
|
||||
const result = await client.debugMachine(mac, { pxeBoot: opts.pxeBoot === true });
|
||||
if (result.error) {
|
||||
console.error(`Failed: ${result.error}`);
|
||||
process.exit(1);
|
||||
@@ -103,6 +103,7 @@ export function registerDebugCommand(parent: Command): void {
|
||||
|
||||
const sshArgs = [
|
||||
"-o", "StrictHostKeyChecking=no",
|
||||
"-o", "UserKnownHostsFile=/dev/null",
|
||||
"-o", "ConnectTimeout=10",
|
||||
...(sshKey !== undefined ? ["-i", sshKey] : []),
|
||||
`${effectiveUser}@${ip}`,
|
||||
@@ -117,38 +118,39 @@ export function registerDebugCommand(parent: Command): void {
|
||||
}
|
||||
}
|
||||
|
||||
// Determine bastion URL from labd config for the setup script URL
|
||||
const bastionUrl = process.env["LABD_URL"]
|
||||
? process.env["LABD_URL"].replace(/\/ws\/bastion$/, "").replace(/^wss?:/, "http:")
|
||||
: "http://<bastion-ip>:8080";
|
||||
|
||||
console.log(`
|
||||
Debug mode queued for ${hostname} (${mac}).
|
||||
Reboot the machine to enter Fedora rescue mode.
|
||||
|
||||
SSH access (started by Anaconda):
|
||||
ssh root@<ip> (password: debug)
|
||||
|
||||
For nc remote shell, run from rescue shell:
|
||||
curl ${bastionUrl}/debug-setup.sh | bash
|
||||
|
||||
Once in rescue shell:
|
||||
|
||||
# Activate LVM
|
||||
vgchange -ay labvg
|
||||
|
||||
# Mount root + other volumes
|
||||
# Activate LVM and mount installed system
|
||||
vgchange -ay
|
||||
mkdir -p /mnt/sysroot
|
||||
mount /dev/labvg/root /mnt/sysroot
|
||||
cat /mnt/sysroot/etc/fstab # check what else to mount
|
||||
mount /dev/labvg/var /mnt/sysroot/var
|
||||
mount /dev/labvg/home /mnt/sysroot/home
|
||||
mount /dev/<vg>/root /mnt/sysroot
|
||||
cat /mnt/sysroot/etc/fstab
|
||||
mount /dev/<vg>/var /mnt/sysroot/var
|
||||
mount /dev/<vg>/home /mnt/sysroot/home
|
||||
|
||||
# Boot the installed system in a container
|
||||
# Boot installed system in a container
|
||||
/mnt/sysroot/usr/bin/systemd-nspawn -D /mnt/sysroot --boot
|
||||
|
||||
# Or just chroot for quick fixes
|
||||
# Or chroot for quick fixes
|
||||
mount --bind /dev /mnt/sysroot/dev
|
||||
mount --bind /proc /mnt/sysroot/proc
|
||||
mount --bind /sys /mnt/sysroot/sys
|
||||
chroot /mnt/sysroot
|
||||
|
||||
# Check initramfs size
|
||||
ls -lh /mnt/sysroot/boot/initramfs-*.img
|
||||
|
||||
# Rebuild initramfs without amdgpu
|
||||
chroot /mnt/sysroot
|
||||
echo 'omit_drivers+=" amdgpu "' > /etc/dracut.conf.d/omit-amdgpu.conf
|
||||
dracut -f --regenerate-all
|
||||
`);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -38,7 +38,7 @@ export function registerLabcontrollerCommands(appCmd: Command): void {
|
||||
lcCmd
|
||||
.command("deploy <target>")
|
||||
.description("Deploy labcontroller stack to a k3s node")
|
||||
.option("--user <user>", "SSH user", "michal")
|
||||
.option("--user <user>", "SSH user", "root")
|
||||
.option("--crdb-replicas <n>", "CockroachDB replicas", "1")
|
||||
.action(async (target: string, opts: {
|
||||
user: string;
|
||||
@@ -193,7 +193,7 @@ export function registerLabcontrollerCommands(appCmd: Command): void {
|
||||
lcCmd
|
||||
.command("status [target]")
|
||||
.description("Check labcontroller deployment status (all hosts if no target)")
|
||||
.option("--user <user>", "SSH user", "michal")
|
||||
.option("--user <user>", "SSH user", "root")
|
||||
.action(async (target: string | undefined, opts: { user: string }) => {
|
||||
const sshKey = findSshKey();
|
||||
const sshOpts = sshKey ? { keyPath: sshKey } : {};
|
||||
|
||||
@@ -69,10 +69,10 @@ export function registerListCommand(parent: Command): void {
|
||||
const hostname = inst?.hostname ?? queued?.hostname ?? "-";
|
||||
const role = inst?.role ?? queued?.role ?? "-";
|
||||
const ip = inst?.ip ?? "-";
|
||||
const cpu = hw?.cpu_model ?? "-";
|
||||
const cores = hw?.cpu_cores != null ? String(hw.cpu_cores) : "-";
|
||||
const ram = hw?.memory_gb != null ? `${hw.memory_gb}GB` : "-";
|
||||
const product = hw?.product ?? "-";
|
||||
const cpu = hw?.cpu_model ?? inst?.cpu_model ?? "-";
|
||||
const cores = (hw?.cpu_cores ?? inst?.cpu_cores) != null ? String(hw?.cpu_cores ?? inst?.cpu_cores) : "-";
|
||||
const ram = (hw?.memory_gb ?? inst?.memory_gb) != null ? `${hw?.memory_gb ?? inst?.memory_gb}GB` : "-";
|
||||
const product = hw?.product ?? inst?.product ?? "-";
|
||||
|
||||
const color = statusColor(status);
|
||||
|
||||
|
||||
@@ -39,19 +39,25 @@ export function registerLogsCommand(parent: Command): void {
|
||||
parent
|
||||
.command("logs <target>")
|
||||
.description("Show provisioning logs for a machine (hostname, MAC, or IP)")
|
||||
.action(async (target: string) => {
|
||||
.option("-f, --follow", "Follow log output in real-time")
|
||||
.action(async (target: string, opts: { follow?: boolean }) => {
|
||||
const mac = await resolveToMac(target);
|
||||
|
||||
const BOLD = "\x1b[1m";
|
||||
const GREEN = "\x1b[32m";
|
||||
const YELLOW = "\x1b[33m";
|
||||
const RED = "\x1b[31m";
|
||||
const DIM = "\x1b[2m";
|
||||
const RESET = "\x1b[0m";
|
||||
|
||||
if (opts.follow) {
|
||||
await followLogs(mac, { BOLD, GREEN, YELLOW, RED, DIM, RESET });
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const data = await getLabdClient().getMachineLogs(mac);
|
||||
|
||||
const BOLD = "\x1b[1m";
|
||||
const GREEN = "\x1b[32m";
|
||||
const YELLOW = "\x1b[33m";
|
||||
const RED = "\x1b[31m";
|
||||
const DIM = "\x1b[2m";
|
||||
const RESET = "\x1b[0m";
|
||||
|
||||
console.log(`${BOLD}${data["hostname"]}${RESET} (${mac})`);
|
||||
console.log(` Status: ${data["status"] === "installed" ? GREEN : YELLOW}${data["status"]}${RESET}`);
|
||||
console.log(` Role: ${data["role"]}`);
|
||||
@@ -83,3 +89,64 @@ export function registerLogsCommand(parent: Command): void {
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/** Follow logs by polling labd. */
|
||||
async function followLogs(
|
||||
mac: string,
|
||||
colors: { BOLD: string; GREEN: string; YELLOW: string; RED: string; DIM: string; RESET: string },
|
||||
): Promise<void> {
|
||||
const { BOLD, GREEN, YELLOW, RED, DIM, RESET } = colors;
|
||||
const client = getLabdClient();
|
||||
|
||||
console.log(`${DIM}Following logs for ${mac} (Ctrl+C to stop)${RESET}`);
|
||||
console.log("");
|
||||
|
||||
let lastStageCount = 0;
|
||||
let lastStatus = "";
|
||||
let sawInstalling = false;
|
||||
|
||||
while (true) {
|
||||
try {
|
||||
const data = await client.getMachineLogs(mac);
|
||||
const status = String(data["status"] ?? "");
|
||||
const log = data["log"] as Array<{ stage: string; detail: string; timestamp: string }> | undefined;
|
||||
|
||||
// Print header once or on status change
|
||||
if (status !== lastStatus) {
|
||||
const hostname = String(data["hostname"] ?? mac);
|
||||
const statusColor = status === "installed" ? GREEN : YELLOW;
|
||||
console.log(` ${BOLD}${hostname}${RESET} ${statusColor}${status}${RESET}`);
|
||||
lastStatus = status;
|
||||
}
|
||||
|
||||
if (status === "installing" || status === "queued") {
|
||||
sawInstalling = true;
|
||||
}
|
||||
|
||||
// Print new stages
|
||||
if (log && log.length > lastStageCount) {
|
||||
for (let i = lastStageCount; i < log.length; i++) {
|
||||
const entry = log[i]!;
|
||||
const time = entry.timestamp.slice(11, 19);
|
||||
const color = entry.stage === "complete" ? GREEN : entry.stage === "error" ? RED : YELLOW;
|
||||
const detail = entry.detail ? ` ${DIM}-- ${entry.detail}${RESET}` : "";
|
||||
console.log(` ${DIM}${time}${RESET} ${color}${entry.stage}${RESET}${detail}`);
|
||||
}
|
||||
lastStageCount = log.length;
|
||||
}
|
||||
|
||||
// Only exit on "installed" if we actually saw the install happen
|
||||
// (avoids exiting immediately when following a reprovision that hasn't started yet)
|
||||
if (status === "installed" && sawInstalling) {
|
||||
const ip = data["ip"] ?? "";
|
||||
console.log("");
|
||||
console.log(` ${GREEN}${BOLD}Install complete!${RESET}${ip ? ` ${DIM}ssh lab@${ip}${RESET}` : ""}`);
|
||||
process.exit(0);
|
||||
}
|
||||
} catch {
|
||||
// Machine may not be in logs yet (still queued)
|
||||
}
|
||||
|
||||
await new Promise((r) => setTimeout(r, 5000));
|
||||
}
|
||||
}
|
||||
|
||||
94
bastion/src/cli/src/commands/recheck.ts
Normal file
94
bastion/src/cli/src/commands/recheck.ts
Normal file
@@ -0,0 +1,94 @@
|
||||
// CLI command: provision recheck
|
||||
// SSH into all installed machines, collect hardware info, update bastion state.
|
||||
|
||||
import type { Command } from "commander";
|
||||
import { sshExec } from "@lab/modules";
|
||||
import { getLabdClient } from "../api/config.js";
|
||||
|
||||
const BOLD = "\x1b[1m";
|
||||
const GREEN = "\x1b[0;32m";
|
||||
const RED = "\x1b[0;31m";
|
||||
const DIM = "\x1b[2m";
|
||||
const RESET = "\x1b[0m";
|
||||
|
||||
const SSH_OPTS = { timeoutMs: 30_000 };
|
||||
|
||||
// Shell script that collects hardware info as JSON.
|
||||
// Kept simple — no Python, pure shell + awk.
|
||||
const HW_COLLECT_SCRIPT = [
|
||||
'P=$(cat /sys/class/dmi/id/product_name 2>/dev/null || echo unknown)',
|
||||
'B=$(cat /sys/class/dmi/id/board_name 2>/dev/null || echo unknown)',
|
||||
'S=$(cat /sys/class/dmi/id/product_serial 2>/dev/null || echo unknown)',
|
||||
'M=$(cat /sys/class/dmi/id/sys_vendor 2>/dev/null || echo unknown)',
|
||||
'C=$(grep -m1 "model name" /proc/cpuinfo 2>/dev/null | cut -d: -f2 | sed "s/^ //" || grep -m1 Model /proc/cpuinfo 2>/dev/null | cut -d: -f2 | sed "s/^ //" || echo unknown)',
|
||||
'N=$(grep -c "^processor" /proc/cpuinfo 2>/dev/null || echo 0)',
|
||||
'R=$(awk "/MemTotal/ {printf \\"%d\\", \\$2/1024/1024}" /proc/meminfo 2>/dev/null || echo 0)',
|
||||
'A=$(uname -m)',
|
||||
'printf \'{"product":"%s","board":"%s","serial":"%s","manufacturer":"%s","cpu_model":"%s","cpu_cores":%s,"memory_gb":%s,"arch":"%s"}\\n\' "$P" "$B" "$S" "$M" "$C" "$N" "$R" "$A"',
|
||||
].join("; ");
|
||||
|
||||
export function registerRecheckCommand(parent: Command): void {
|
||||
parent
|
||||
.command("recheck")
|
||||
.description("Refresh hardware info for all installed machines via SSH")
|
||||
.option("--user <user>", "SSH user", "root")
|
||||
.option("--target <hostname>", "Only recheck a specific machine (by hostname or MAC)")
|
||||
.action(async (opts: { user: string; target?: string }) => {
|
||||
const client = getLabdClient();
|
||||
let state;
|
||||
try {
|
||||
state = await client.getMachines();
|
||||
} catch (err) {
|
||||
console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Build list of machines to check
|
||||
const targets: Array<{ mac: string; hostname: string; ip: string }> = [];
|
||||
for (const [mac, info] of Object.entries(state.installed)) {
|
||||
if (!info.ip) continue;
|
||||
if (opts.target && info.hostname !== opts.target && mac !== opts.target) continue;
|
||||
targets.push({ mac, hostname: info.hostname, ip: info.ip });
|
||||
}
|
||||
|
||||
if (targets.length === 0) {
|
||||
console.log("No installed machines with IPs to check.");
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`\n${BOLD}Rechecking ${targets.length} machine(s)...${RESET}\n`);
|
||||
|
||||
let updated = 0;
|
||||
let failed = 0;
|
||||
|
||||
for (const { mac, hostname, ip } of targets) {
|
||||
process.stdout.write(` ${hostname.padEnd(24)} ${DIM}(${ip})${RESET} `);
|
||||
|
||||
try {
|
||||
const t0 = Date.now();
|
||||
const result = await sshExec(ip, opts.user, HW_COLLECT_SCRIPT, SSH_OPTS);
|
||||
const elapsed = Date.now() - t0;
|
||||
if (result.exitCode !== 0) {
|
||||
console.log(`${RED}SSH failed (exit ${result.exitCode}, ${elapsed}ms)${RESET}`);
|
||||
if (result.stderr) console.log(` ${DIM}${result.stderr.substring(0, 200)}${RESET}`);
|
||||
console.log(`${RED}SSH failed (exit ${result.exitCode})${RESET}`);
|
||||
failed++;
|
||||
continue;
|
||||
}
|
||||
|
||||
const hwData = JSON.parse(result.stdout.trim());
|
||||
await client.discoverMachine({ mac, ...hwData });
|
||||
const cpu = hwData.cpu_model || "?";
|
||||
const cores = hwData.cpu_cores || "?";
|
||||
const mem = hwData.memory_gb || "?";
|
||||
console.log(`${GREEN}OK${RESET} ${DIM}${cpu}, ${cores} cores, ${mem}GB${RESET}`);
|
||||
updated++;
|
||||
} catch (err) {
|
||||
console.log(`${RED}FAIL${RESET} ${DIM}${err instanceof Error ? err.message : String(err)}${RESET}`);
|
||||
failed++;
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\n${BOLD}Done:${RESET} ${updated} updated, ${failed} failed\n`);
|
||||
});
|
||||
}
|
||||
37
bastion/src/cli/src/commands/register.ts
Normal file
37
bastion/src/cli/src/commands/register.ts
Normal file
@@ -0,0 +1,37 @@
|
||||
// CLI command: provision register
|
||||
// Register an already-installed machine that is missing from bastion state.
|
||||
|
||||
import { Command, Option } from "commander";
|
||||
import { SUPPORTED_ROLES } from "@lab/shared";
|
||||
import { getLabdClient } from "../api/config.js";
|
||||
|
||||
export function registerRegisterCommand(parent: Command): void {
|
||||
parent
|
||||
.command("register <mac> <hostname>")
|
||||
.description("Register an already-installed machine (e.g. after state loss)")
|
||||
.addOption(new Option("--role <role>", "Machine role").choices([...SUPPORTED_ROLES]).default("worker"))
|
||||
.option("--ip <address>", "Machine IP address")
|
||||
.action(async (mac: string, hostname: string, opts: {
|
||||
role: string;
|
||||
ip?: string;
|
||||
}) => {
|
||||
try {
|
||||
const result = await getLabdClient().registerMachine({
|
||||
mac,
|
||||
hostname,
|
||||
role: opts.role,
|
||||
...(opts.ip ? { ip: opts.ip } : {}),
|
||||
});
|
||||
|
||||
if (result.error) {
|
||||
console.error(`Failed: ${result.error}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(`Registered ${mac} as ${hostname} (role=${opts.role}${opts.ip ? `, ip=${opts.ip}` : ""})`);
|
||||
} catch (err) {
|
||||
console.error(`Failed: ${err instanceof Error ? err.message : String(err)}`);
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -144,6 +144,7 @@ export function registerReprovisionCommand(parent: Command): void {
|
||||
|
||||
const sshArgs = [
|
||||
"-o", "StrictHostKeyChecking=no",
|
||||
"-o", "UserKnownHostsFile=/dev/null",
|
||||
"-o", "ConnectTimeout=10",
|
||||
...(sshKey !== undefined ? ["-i", sshKey] : []),
|
||||
`${effectiveUser}@${ip}`,
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
// CLI entry point for lab-bastion.
|
||||
// Commands:
|
||||
// init bastion standalone start/stop/status
|
||||
// provision list/install/reprovision/forget
|
||||
// provision list/install/reprovision/forget/register
|
||||
|
||||
import { fileURLToPath } from "node:url";
|
||||
import { Command, Option } from "commander";
|
||||
@@ -16,8 +16,11 @@ import { registerListCommand } from "./commands/list.js";
|
||||
import { registerReprovisionCommand } from "./commands/reprovision.js";
|
||||
import { registerDebugCommand } from "./commands/debug.js";
|
||||
import { registerForgetCommand } from "./commands/forget.js";
|
||||
import { registerRegisterCommand } from "./commands/register.js";
|
||||
import { registerAsahiCommand } from "./commands/asahi.js";
|
||||
import { registerLogsCommand } from "./commands/logs.js";
|
||||
import { registerMakeIsoCommand } from "./commands/makeiso.js";
|
||||
import { registerRecheckCommand } from "./commands/recheck.js";
|
||||
import { registerConfigCommand } from "./commands/config.js";
|
||||
import { registerLoginCommand } from "./commands/login.js";
|
||||
import { registerDoctorCommand } from "./commands/doctor.js";
|
||||
@@ -98,8 +101,11 @@ export function createProgram(): Command {
|
||||
registerReprovisionCommand(provisionCmd);
|
||||
registerDebugCommand(provisionCmd);
|
||||
registerForgetCommand(provisionCmd);
|
||||
registerRegisterCommand(provisionCmd);
|
||||
registerAsahiCommand(provisionCmd);
|
||||
registerLogsCommand(provisionCmd);
|
||||
registerMakeIsoCommand(provisionCmd);
|
||||
registerRecheckCommand(provisionCmd);
|
||||
|
||||
// config list/get/set/path
|
||||
registerConfigCommand(program);
|
||||
|
||||
@@ -137,7 +137,7 @@ describe("bastion smoke tests", () => {
|
||||
|
||||
// Wait for the server to start (look for the banner)
|
||||
const startedAt = Date.now();
|
||||
const maxWait = 10_000;
|
||||
const maxWait = 15_000;
|
||||
while (Date.now() - startedAt < maxWait) {
|
||||
if (stdout.includes("Waiting for PXE boot requests")) break;
|
||||
await sleep(200);
|
||||
|
||||
@@ -151,7 +151,7 @@ export function registerBastionRoutes(app: FastifyInstance, db: DbClient): void
|
||||
try {
|
||||
const result = await sendCommand(all[0]!.bastionId, {
|
||||
type: "command-install",
|
||||
mac, hostname, disk: disk ?? "/dev/sda", role: role ?? "infra", os: os ?? "fedora-43",
|
||||
mac, hostname, disk: disk ?? "", role: role ?? "infra", os: os ?? "fedora-43",
|
||||
});
|
||||
return reply.code(result.status === "ok" ? 200 : 500).send(result);
|
||||
} catch (err) {
|
||||
@@ -164,7 +164,44 @@ export function registerBastionRoutes(app: FastifyInstance, db: DbClient): void
|
||||
try {
|
||||
const result = await sendCommand(bastion.bastionId, {
|
||||
type: "command-install",
|
||||
mac, hostname, disk: disk ?? "/dev/sda", role: role ?? "infra", os: os ?? "fedora-43",
|
||||
mac, hostname, disk: disk ?? "", role: role ?? "infra", os: os ?? "fedora-43",
|
||||
});
|
||||
return reply.code(result.status === "ok" ? 200 : 500).send(result);
|
||||
} catch (err) {
|
||||
return reply.code(500).send({ error: err instanceof Error ? err.message : String(err) });
|
||||
}
|
||||
});
|
||||
|
||||
// Register an already-installed machine — route to correct bastion (or single bastion)
|
||||
app.post<{
|
||||
Body: { mac?: string; hostname?: string; role?: string; ip?: string };
|
||||
}>("/api/machines/register", async (request, reply) => {
|
||||
const { mac, hostname, role, ip } = request.body ?? {};
|
||||
if (!mac || !hostname) {
|
||||
return reply.code(400).send({ error: "mac and hostname are required" });
|
||||
}
|
||||
|
||||
const normalized = mac.toLowerCase().replace(/-/g, ":");
|
||||
|
||||
// Find bastion that knows this MAC, or use single connected bastion
|
||||
const bastion = bastionRegistry.findBastionByMac(normalized);
|
||||
const target = bastion ?? (bastionRegistry.getAll().length === 1 ? bastionRegistry.getAll()[0] : null);
|
||||
|
||||
if (!target) {
|
||||
const all = bastionRegistry.getAll();
|
||||
if (all.length === 0) {
|
||||
return reply.code(503).send({ error: "No bastions connected" });
|
||||
}
|
||||
return reply.code(404).send({ error: `MAC ${normalized} not found on any bastion and multiple bastions connected` });
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await sendCommand(target.bastionId, {
|
||||
type: "command-register",
|
||||
mac: normalized,
|
||||
hostname,
|
||||
role: role ?? "worker",
|
||||
ip: ip ?? "",
|
||||
});
|
||||
return reply.code(result.status === "ok" ? 200 : 500).send(result);
|
||||
} catch (err) {
|
||||
@@ -174,10 +211,10 @@ export function registerBastionRoutes(app: FastifyInstance, db: DbClient): void
|
||||
|
||||
// Queue debug/rescue mode — route to correct bastion by MAC
|
||||
app.post<{
|
||||
Body: { mac?: string; sshd?: boolean };
|
||||
Body: { mac?: string; pxeBoot?: boolean };
|
||||
}>("/api/machines/debug", async (request, reply) => {
|
||||
const mac = (request.body?.mac ?? "").toLowerCase().replace(/-/g, ":");
|
||||
const sshd = request.body?.sshd ?? false;
|
||||
const pxeBoot = request.body?.pxeBoot ?? false;
|
||||
if (!mac) {
|
||||
return reply.code(400).send({ error: "mac is required" });
|
||||
}
|
||||
@@ -190,7 +227,7 @@ export function registerBastionRoutes(app: FastifyInstance, db: DbClient): void
|
||||
}
|
||||
if (all.length === 1) {
|
||||
try {
|
||||
const result = await sendCommand(all[0]!.bastionId, { type: "command-debug", mac, sshd });
|
||||
const result = await sendCommand(all[0]!.bastionId, { type: "command-debug", mac, pxeBoot });
|
||||
return reply.code(result.status === "ok" ? 200 : 500).send(result);
|
||||
} catch (err) {
|
||||
return reply.code(500).send({ error: err instanceof Error ? err.message : String(err) });
|
||||
@@ -200,7 +237,7 @@ export function registerBastionRoutes(app: FastifyInstance, db: DbClient): void
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await sendCommand(bastion.bastionId, { type: "command-debug", mac, sshd });
|
||||
const result = await sendCommand(bastion.bastionId, { type: "command-debug", mac, pxeBoot });
|
||||
return reply.code(result.status === "ok" ? 200 : 500).send(result);
|
||||
} catch (err) {
|
||||
return reply.code(500).send({ error: err instanceof Error ? err.message : String(err) });
|
||||
@@ -223,6 +260,37 @@ export function registerBastionRoutes(app: FastifyInstance, db: DbClient): void
|
||||
}
|
||||
});
|
||||
|
||||
// Update hardware info (discovery data) for a machine
|
||||
app.post<{
|
||||
Body: {
|
||||
mac?: string; product?: string; board?: string; serial?: string;
|
||||
manufacturer?: string; cpu_model?: string; cpu_cores?: number;
|
||||
memory_gb?: number; arch?: string;
|
||||
disks?: Array<{ name: string; size_gb: number; model: string }>;
|
||||
nics?: Array<{ name: string; mac: string; state: string }>;
|
||||
};
|
||||
}>("/api/machines/discover", async (request, reply) => {
|
||||
const data = request.body ?? {};
|
||||
const mac = (data.mac ?? "").toLowerCase().replace(/-/g, ":");
|
||||
if (!mac) {
|
||||
return reply.code(400).send({ error: "mac is required" });
|
||||
}
|
||||
|
||||
const bastion = bastionRegistry.findBastionByMac(mac);
|
||||
const target = bastion ?? (bastionRegistry.getAll().length === 1 ? bastionRegistry.getAll()[0] : null);
|
||||
|
||||
if (!target) {
|
||||
return reply.code(503).send({ error: "No bastion found for this MAC" });
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await sendCommand(target.bastionId, { type: "command-discover", ...data, mac });
|
||||
return reply.code(result.status === "ok" ? 200 : 500).send(result);
|
||||
} catch (err) {
|
||||
return reply.code(500).send({ error: err instanceof Error ? err.message : String(err) });
|
||||
}
|
||||
});
|
||||
|
||||
// Update role
|
||||
app.post<{
|
||||
Body: { mac?: string; role?: string };
|
||||
@@ -257,17 +325,7 @@ export function registerBastionRoutes(app: FastifyInstance, db: DbClient): void
|
||||
const queued = bastion.state.install_queue[mac];
|
||||
const installed = bastion.state.installed[mac];
|
||||
|
||||
if (installed) {
|
||||
return {
|
||||
mac,
|
||||
hostname: installed.hostname,
|
||||
status: "installed",
|
||||
role: installed.role,
|
||||
ip: installed.ip,
|
||||
installed_at: installed.installed_at,
|
||||
};
|
||||
}
|
||||
|
||||
// Active install takes priority over old installed state (reprovision case)
|
||||
if (queued) {
|
||||
return {
|
||||
mac,
|
||||
@@ -282,6 +340,17 @@ export function registerBastionRoutes(app: FastifyInstance, db: DbClient): void
|
||||
};
|
||||
}
|
||||
|
||||
if (installed) {
|
||||
return {
|
||||
mac,
|
||||
hostname: installed.hostname,
|
||||
status: "installed",
|
||||
role: installed.role,
|
||||
ip: installed.ip,
|
||||
installed_at: installed.installed_at,
|
||||
};
|
||||
}
|
||||
|
||||
return reply.code(404).send({ error: `MAC ${mac} not found in install queue or installed` });
|
||||
});
|
||||
}
|
||||
|
||||
@@ -5,14 +5,16 @@ import { runSequential } from "../utils.js";
|
||||
import { applyPodSecurityStandards } from "../operations/pod-security.js";
|
||||
import { checkCertExpiry } from "../operations/cert-check.js";
|
||||
import { configureLogRotation } from "../operations/log-rotation.js";
|
||||
import { configureLonghornDisk } from "../operations/longhorn-disk.js";
|
||||
|
||||
export const hardeningGroup: OperationGroup = {
|
||||
name: "hardening",
|
||||
description: "Pod security, certificate check, log rotation",
|
||||
description: "Pod security, certificate check, log rotation, storage",
|
||||
operations: [
|
||||
{ name: "Apply Pod Security Standards", fn: applyPodSecurityStandards },
|
||||
{ name: "Check certificate expiry", fn: checkCertExpiry },
|
||||
{ name: "Configure log rotation", fn: configureLogRotation },
|
||||
{ name: "Configure Longhorn disk", fn: configureLonghornDisk },
|
||||
],
|
||||
};
|
||||
|
||||
|
||||
@@ -7,16 +7,18 @@ import { applyCisHardening } from "../operations/sysctl.js";
|
||||
import { disableSwap } from "../operations/swap.js";
|
||||
import { disableFirewall } from "../operations/firewall.js";
|
||||
import { setSelinuxPermissive } from "../operations/selinux.js";
|
||||
import { enableIscsi } from "../operations/iscsi.js";
|
||||
|
||||
export const hostPrepGroup: OperationGroup = {
|
||||
name: "host-prep",
|
||||
description: "Prepare host for k3s: kernel modules, sysctl, swap, firewall, SELinux",
|
||||
description: "Prepare host for k3s: kernel modules, sysctl, swap, firewall, SELinux, iSCSI",
|
||||
operations: [
|
||||
{ name: "Load kernel modules", fn: loadKernelModules },
|
||||
{ name: "Apply CIS sysctl", fn: applyCisHardening },
|
||||
{ name: "Disable swap", fn: disableSwap },
|
||||
{ name: "Disable firewall", fn: disableFirewall },
|
||||
{ name: "Set SELinux permissive", fn: setSelinuxPermissive },
|
||||
{ name: "Enable iSCSI", fn: enableIscsi },
|
||||
],
|
||||
};
|
||||
|
||||
|
||||
@@ -78,9 +78,10 @@ export class K3sModule implements Module {
|
||||
return toModuleResult("install", [...prepResults, ...k3sResults], start);
|
||||
}
|
||||
|
||||
// Phase 3: Networking (server only — agents don't install Cilium)
|
||||
// Phase 3: Networking (initial server only — joining servers get Cilium via daemonset)
|
||||
let netResults: OperationResult[] = [];
|
||||
if (isServer) {
|
||||
const isJoiningServer = isServer && !!opCtx.config.k3sServerUrl;
|
||||
if (isServer && !isJoiningServer) {
|
||||
netResults = await runNetworking(opCtx);
|
||||
}
|
||||
|
||||
|
||||
@@ -35,21 +35,15 @@ export const installCilium: Operation = async (ctx): Promise<OperationResult> =>
|
||||
}
|
||||
details.push(`Installed cilium CLI ${version} (${cliArch})`);
|
||||
|
||||
// Detect default network device (avoid tailscale/wireguard)
|
||||
const devResult = await ctx.ssh.exec(
|
||||
"ip -4 route show default | awk '{print $5}' | head -1",
|
||||
sshOpts(ctx),
|
||||
);
|
||||
const defaultDev = devResult.stdout.trim();
|
||||
details.push(`Network device: ${defaultDev}`);
|
||||
|
||||
// Install Cilium
|
||||
// - No hardcoded devices: Cilium auto-detects per node (heterogeneous NICs like eno1 vs enP7s7)
|
||||
// - k8sServiceHost/Port: k3s agents proxy the API on 127.0.0.1:6444 (not 6443)
|
||||
const installResult = await ctx.ssh.exec(
|
||||
`KUBECONFIG=/etc/rancher/k3s/k3s.yaml cilium install \
|
||||
--set kubeProxyReplacement=true \
|
||||
--set ipam.mode=kubernetes \
|
||||
--set devices="${defaultDev}" \
|
||||
--set nodePort.directRoutingDevice="${defaultDev}"`,
|
||||
--set k8sServiceHost=127.0.0.1 \
|
||||
--set k8sServicePort=6444`,
|
||||
{ timeoutMs: 300_000 },
|
||||
);
|
||||
if (installResult.exitCode !== 0) {
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
export { loadKernelModules } from "./kernel-modules.js";
|
||||
export { applyCisHardening } from "./sysctl.js";
|
||||
export { disableSwap } from "./swap.js";
|
||||
export { enableIscsi } from "./iscsi.js";
|
||||
export { disableFirewall } from "./firewall.js";
|
||||
export { setSelinuxPermissive } from "./selinux.js";
|
||||
export { writeK3sConfig } from "./k3s-config.js";
|
||||
@@ -13,3 +14,4 @@ export { configureLogRotation } from "./log-rotation.js";
|
||||
export { applyDefaultNetworkPolicies } from "./network-policy.js";
|
||||
export { applyPodSecurityStandards } from "./pod-security.js";
|
||||
export { checkCertExpiry } from "./cert-check.js";
|
||||
export { configureLonghornDisk } from "./longhorn-disk.js";
|
||||
|
||||
31
bastion/src/modules/modules/k3s/src/operations/iscsi.ts
Normal file
31
bastion/src/modules/modules/k3s/src/operations/iscsi.ts
Normal file
@@ -0,0 +1,31 @@
|
||||
// Install and enable iSCSI initiator (required by Longhorn storage).
|
||||
// Fedora: iscsi-initiator-utils, Ubuntu: open-iscsi
|
||||
|
||||
import type { Operation, OperationResult } from "../types.js";
|
||||
import { sshOpts } from "../utils.js";
|
||||
|
||||
export const enableIscsi: Operation = async (ctx): Promise<OperationResult> => {
|
||||
// Check if iscsid is already running
|
||||
const check = await ctx.ssh.exec("systemctl is-active iscsid 2>/dev/null", sshOpts(ctx));
|
||||
if (check.stdout.trim() === "active") {
|
||||
return { success: true, changed: false, message: "iSCSI already active" };
|
||||
}
|
||||
|
||||
// Install the package (detect distro)
|
||||
const osRelease = await ctx.ssh.exec("cat /etc/os-release", sshOpts(ctx));
|
||||
const osLower = osRelease.stdout.toLowerCase();
|
||||
const isFedora = osLower.includes("fedora") || osLower.includes("rhel") || osLower.includes("centos");
|
||||
|
||||
const pkg = isFedora ? "iscsi-initiator-utils" : "open-iscsi";
|
||||
const installCmd = isFedora ? `sudo dnf install -y ${pkg}` : `sudo apt-get install -y ${pkg}`;
|
||||
|
||||
const install = await ctx.ssh.exec(installCmd, { timeoutMs: 120_000 });
|
||||
if (install.exitCode !== 0) {
|
||||
return { success: false, changed: false, message: `Failed to install ${pkg}`, error: install.stderr.trim() };
|
||||
}
|
||||
|
||||
// Enable and start
|
||||
await ctx.ssh.exec("sudo systemctl enable --now iscsid", sshOpts(ctx));
|
||||
|
||||
return { success: true, changed: true, message: `Installed ${pkg} and enabled iscsid` };
|
||||
};
|
||||
@@ -9,7 +9,12 @@ function isServerRole(role: string): boolean {
|
||||
|
||||
function generateServerConfig(config: K3sConfig): string {
|
||||
const tlsSans = [config.hostname, config.ip, ...(config.tlsSans ?? [])];
|
||||
return `# k3s server configuration — CIS hardened
|
||||
const isJoining = !!config.k3sServerUrl;
|
||||
const clusterLines = isJoining
|
||||
? `server: "${config.k3sServerUrl}"\ntoken: "${config.k3sToken}"`
|
||||
: "cluster-init: true";
|
||||
return `# k3s server configuration — CIS hardened, etcd HA
|
||||
${clusterLines}
|
||||
protect-kernel-defaults: true
|
||||
secrets-encryption: true
|
||||
write-kubeconfig-mode: "0640"
|
||||
@@ -20,6 +25,9 @@ disable:
|
||||
- servicelb
|
||||
- traefik
|
||||
|
||||
node-label:
|
||||
- "node.longhorn.io/create-default-disk=config"
|
||||
|
||||
kube-apiserver-arg:
|
||||
- "anonymous-auth=false"
|
||||
- "audit-log-path=/var/log/kubernetes/audit.log"
|
||||
@@ -42,6 +50,9 @@ ${tlsSans.map((s) => ` - "${s}"`).join("\n")}
|
||||
|
||||
function generateAgentConfig(): string {
|
||||
return `protect-kernel-defaults: true
|
||||
node-label:
|
||||
- "node-role.kubernetes.io/worker=true"
|
||||
- "node.longhorn.io/create-default-disk=config"
|
||||
kubelet-arg:
|
||||
- "protect-kernel-defaults=true"
|
||||
- "streaming-connection-idle-timeout=5m"
|
||||
|
||||
@@ -15,8 +15,21 @@ export const installK3sBinary: Operation = async (ctx): Promise<OperationResult>
|
||||
const alreadyInstalled = version.exitCode === 0;
|
||||
|
||||
if (isServer) {
|
||||
// Clean stale server state when joining an existing cluster
|
||||
// (TLS certs from a previous run cause "newer than datastore" fatal error)
|
||||
if (ctx.config.k3sServerUrl && ctx.config.k3sToken) {
|
||||
await ctx.ssh.exec(
|
||||
"rm -rf /var/lib/rancher/k3s/server/tls /var/lib/rancher/k3s/server/cred /var/lib/rancher/k3s/server/db",
|
||||
sshOpts(ctx),
|
||||
);
|
||||
}
|
||||
|
||||
// If joining an existing cluster, pass K3S_URL and K3S_TOKEN
|
||||
const joinEnv = ctx.config.k3sServerUrl && ctx.config.k3sToken
|
||||
? `K3S_URL="${ctx.config.k3sServerUrl}" K3S_TOKEN="${ctx.config.k3sToken}"`
|
||||
: "";
|
||||
const result = await ctx.ssh.exec(
|
||||
'curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC="server" INSTALL_K3S_SKIP_SELINUX_RPM=true sh -',
|
||||
`curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC="server" INSTALL_K3S_SKIP_SELINUX_RPM=true ${joinEnv} sh -`,
|
||||
{ timeoutMs: 300_000 },
|
||||
);
|
||||
if (result.exitCode !== 0) {
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
// Annotate nodes with Longhorn default disk config when /var/lib/longhorn exists.
|
||||
// The label is set in k3s config (node-label), but the annotation must be applied via kubectl.
|
||||
|
||||
import type { Operation, OperationResult } from "../types.js";
|
||||
import { sshOpts } from "../utils.js";
|
||||
import { sshExec as remoteSshExec } from "../../../../src/ssh.js";
|
||||
|
||||
export const configureLonghornDisk: Operation = async (ctx): Promise<OperationResult> => {
|
||||
// Check if /var/lib/longhorn exists on this node
|
||||
const check = await ctx.ssh.exec("test -d /var/lib/longhorn && echo yes || echo no", sshOpts(ctx));
|
||||
if (check.stdout.trim() !== "yes") {
|
||||
return { success: true, changed: false, message: "No /var/lib/longhorn directory — skipping Longhorn disk config" };
|
||||
}
|
||||
|
||||
// Find the node name (hostname as registered in k3s)
|
||||
const nodeNameResult = await ctx.ssh.exec("hostname -f 2>/dev/null || hostname", sshOpts(ctx));
|
||||
const nodeName = nodeNameResult.stdout.trim();
|
||||
|
||||
const annotation = JSON.stringify([{ path: "/var/lib/longhorn", allowScheduling: true }]);
|
||||
|
||||
// Try kubectl locally first (works on server nodes)
|
||||
const result = await ctx.ssh.exec(
|
||||
`k3s kubectl annotate node "${nodeName}" "node.longhorn.io/default-disks-config=${annotation}" --overwrite 2>&1 || true`,
|
||||
sshOpts(ctx),
|
||||
);
|
||||
|
||||
if (result.stdout.includes("annotated") || result.stdout.includes("unchanged")) {
|
||||
return { success: true, changed: true, message: `Longhorn disk annotation applied to ${nodeName}` };
|
||||
}
|
||||
|
||||
// For worker/agent nodes without local kubectl: apply via the server
|
||||
if (ctx.config.k3sServerUrl) {
|
||||
// The CLI has SSH access to the server — use sshExec from there
|
||||
const serverHost = new URL(ctx.config.k3sServerUrl).hostname;
|
||||
try {
|
||||
const remoteResult = await remoteSshExec(
|
||||
serverHost, "root",
|
||||
`k3s kubectl annotate node "${nodeName}" "node.longhorn.io/default-disks-config=${annotation}" --overwrite`,
|
||||
{ ...(ctx.ssh.keyPath ? { keyPath: ctx.ssh.keyPath } : {}), timeoutMs: 15_000 },
|
||||
);
|
||||
if (remoteResult.stdout.includes("annotated") || remoteResult.stdout.includes("unchanged")) {
|
||||
return { success: true, changed: true, message: `Longhorn disk annotation applied to ${nodeName} (via server)` };
|
||||
}
|
||||
} catch {
|
||||
// Fall through to manual instruction
|
||||
}
|
||||
}
|
||||
|
||||
return { success: true, changed: false, message: "Longhorn disk label set (annotation requires server kubectl)" };
|
||||
};
|
||||
@@ -111,7 +111,9 @@ export type LabdBastionMessage =
|
||||
| { type: "command-install"; requestId: string; mac: string; hostname: string; disk?: string; role: string; os: string }
|
||||
| { type: "command-forget"; requestId: string; mac: string }
|
||||
| { type: "command-role-update"; requestId: string; mac: string; role: string }
|
||||
| { type: "command-debug"; requestId: string; mac: string; sshd?: boolean }
|
||||
| { type: "command-debug"; requestId: string; mac: string; pxeBoot?: boolean }
|
||||
| { type: "command-register"; requestId: string; mac: string; hostname: string; role: string; ip: string }
|
||||
| { type: "command-discover"; requestId: string; mac: string; product?: string; board?: string; serial?: string; manufacturer?: string; cpu_model?: string; cpu_cores?: number; memory_gb?: number; arch?: string; disks?: Array<{ name: string; size_gb: number; model: string }>; nics?: Array<{ name: string; mac: string; state: string }> }
|
||||
| { type: "server-shutdown"; reconnectAfter: number };
|
||||
|
||||
export type BastionMessageType = BastionMessage["type"];
|
||||
@@ -126,7 +128,7 @@ const BASTION_MESSAGE_TYPES = new Set<string>([
|
||||
|
||||
const LABD_BASTION_MESSAGE_TYPES = new Set<string>([
|
||||
"bastion-enrolled", "bastion-heartbeat-ack", "command-install",
|
||||
"command-forget", "command-role-update", "command-debug", "server-shutdown",
|
||||
"command-forget", "command-role-update", "command-debug", "command-register", "command-discover", "server-shutdown",
|
||||
]);
|
||||
|
||||
export function isBastionMessage(msg: unknown): msg is BastionMessage {
|
||||
|
||||
@@ -96,12 +96,19 @@ export interface InstalledInfo {
|
||||
ip: string;
|
||||
installed_at: string;
|
||||
bastionId?: string; // set when aggregated through labd
|
||||
// Hardware info (copied from discovered on install completion)
|
||||
product?: string;
|
||||
manufacturer?: string;
|
||||
cpu_model?: string;
|
||||
cpu_cores?: number;
|
||||
memory_gb?: number;
|
||||
arch?: string;
|
||||
}
|
||||
|
||||
export interface DebugConfig {
|
||||
hostname: string;
|
||||
queued_at: string;
|
||||
sshd?: boolean;
|
||||
pxeBoot?: boolean;
|
||||
}
|
||||
|
||||
export interface BastionState {
|
||||
|
||||
355
bastion/tests/integration/asahi-firstboot.test.ts
Normal file
355
bastion/tests/integration/asahi-firstboot.test.ts
Normal file
@@ -0,0 +1,355 @@
|
||||
// Integration test: Asahi first-boot LVM setup.
|
||||
//
|
||||
// Tests the first-boot script that creates the standard lab LVM layout
|
||||
// on a separate data disk — simulating the Asahi provisioning flow where
|
||||
// the root partition is pre-installed and a data partition is left for LVM.
|
||||
//
|
||||
// Uses a Fedora cloud VM with two disks:
|
||||
// disk0: 20GB root (Fedora cloud image)
|
||||
// disk1: 200GB empty (simulates the Asahi "Data" partition)
|
||||
//
|
||||
// The firstboot script should detect disk1, create labvg + LVs, mount them.
|
||||
// Then we test reprovision: wipe marker, re-run, verify existing VG reused.
|
||||
//
|
||||
// Prerequisites: libvirt, virsh, virt-install, qemu, sudo access, lvm2
|
||||
// Run: sudo pnpm run test:integration:asahi
|
||||
|
||||
import { describe, it, expect, beforeAll, afterAll } from "vitest";
|
||||
import { readFileSync, existsSync } from "node:fs";
|
||||
import { execSync } from "node:child_process";
|
||||
import { join } from "node:path";
|
||||
import { homedir } from "node:os";
|
||||
import { destroyVm, waitForVmIp, waitForSsh, log, ensureCloudImage, createCloudInitIso } from "./helpers/libvirt.js";
|
||||
import { ensureTestNetwork, TEST_NETWORK_NAME } from "./helpers/network.js";
|
||||
import { sshExec, sshRun } from "./helpers/ssh.js";
|
||||
import { renderFirstbootScript } from "../../src/bastion/src/templates/asahi-firstboot.sh.js";
|
||||
|
||||
const VM_NAME = "lab-asahi-firstboot-test";
|
||||
const VM_MEMORY = 4096;
|
||||
const VM_VCPUS = 2;
|
||||
const VM_ROOT_DISK_GB = 20;
|
||||
const VM_DATA_DISK_GB = 200; // Simulates the Asahi "Data" partition
|
||||
const SSH_USER = "fedora";
|
||||
const IMAGE_DIR = "/var/lib/libvirt/images";
|
||||
const IS_ROOT = process.getuid?.() === 0;
|
||||
|
||||
const FEDORA_CLOUD_IMAGE = "https://download.fedoraproject.org/pub/fedora/linux/releases/43/Cloud/x86_64/images/Fedora-Cloud-Base-Generic-43-1.6.x86_64.qcow2";
|
||||
|
||||
function run(cmd: string, opts?: { timeout?: number }): string {
|
||||
const full = IS_ROOT ? cmd : `sudo ${cmd}`;
|
||||
return execSync(full, { encoding: "utf-8", stdio: "pipe", timeout: opts?.timeout ?? 60_000 });
|
||||
}
|
||||
|
||||
function findSshKey(): { pubKey: string; keyPath: string } {
|
||||
const homes = [homedir()];
|
||||
const sudoUser = process.env["SUDO_USER"];
|
||||
if (sudoUser) homes.push(join("/home", sudoUser));
|
||||
if (process.env["SSH_KEY_PATH"]) {
|
||||
const keyPath = process.env["SSH_KEY_PATH"];
|
||||
const pubPath = `${keyPath}.pub`;
|
||||
if (existsSync(keyPath) && existsSync(pubPath)) {
|
||||
return { pubKey: readFileSync(pubPath, "utf-8").trim(), keyPath };
|
||||
}
|
||||
}
|
||||
for (const home of homes) {
|
||||
for (const name of ["id_ed25519", "id_ecdsa", "id_rsa"]) {
|
||||
const keyPath = join(home, ".ssh", name);
|
||||
const pubPath = `${keyPath}.pub`;
|
||||
if (existsSync(keyPath) && existsSync(pubPath)) {
|
||||
return { pubKey: readFileSync(pubPath, "utf-8").trim(), keyPath };
|
||||
}
|
||||
}
|
||||
}
|
||||
throw new Error("No SSH key found");
|
||||
}
|
||||
|
||||
/** Create a VM with two disks: root (cloud image) + empty data disk. */
|
||||
function createTwoDiskVm(config: {
|
||||
name: string;
|
||||
memory: number;
|
||||
vcpus: number;
|
||||
rootDiskGb: number;
|
||||
dataDiskGb: number;
|
||||
network: string;
|
||||
cloudImageUrl: string;
|
||||
sshPubKey: string;
|
||||
}): void {
|
||||
destroyVm(config.name);
|
||||
|
||||
log(`Creating two-disk VM: ${config.name} (root=${config.rootDiskGb}GB, data=${config.dataDiskGb}GB)`);
|
||||
|
||||
const baseImage = ensureCloudImage(config.cloudImageUrl, `${config.name}-base`);
|
||||
const rootDiskPath = join(IMAGE_DIR, `${config.name}.qcow2`);
|
||||
const dataDiskPath = join(IMAGE_DIR, `${config.name}-data.qcow2`);
|
||||
|
||||
// Root disk from cloud image
|
||||
run(`cp "${baseImage}" "${rootDiskPath}"`);
|
||||
run(`qemu-img resize "${rootDiskPath}" ${config.rootDiskGb}G`);
|
||||
|
||||
// Empty data disk
|
||||
run(`qemu-img create -f qcow2 "${dataDiskPath}" ${config.dataDiskGb}G`);
|
||||
|
||||
// Cloud-init with LVM tools
|
||||
const cloudInitIso = createCloudInitIso(config.name, {
|
||||
name: config.name,
|
||||
memory: config.memory,
|
||||
vcpus: config.vcpus,
|
||||
diskSize: config.rootDiskGb,
|
||||
network: config.network,
|
||||
cloudImageUrl: config.cloudImageUrl,
|
||||
sshPubKey: config.sshPubKey,
|
||||
userData: `#cloud-config
|
||||
hostname: ${config.name}
|
||||
manage_etc_hosts: true
|
||||
users:
|
||||
- default
|
||||
- name: fedora
|
||||
sudo: ALL=(ALL) NOPASSWD:ALL
|
||||
shell: /bin/bash
|
||||
ssh_authorized_keys:
|
||||
- ${config.sshPubKey}
|
||||
ssh_pwauth: false
|
||||
package_update: false
|
||||
packages:
|
||||
- lvm2
|
||||
- xfsprogs
|
||||
`,
|
||||
});
|
||||
|
||||
const virtInstallArgs = [
|
||||
"virt-install",
|
||||
`--name=${config.name}`,
|
||||
`--memory=${config.memory}`,
|
||||
`--vcpus=${config.vcpus}`,
|
||||
`--disk=path=${rootDiskPath},format=qcow2`,
|
||||
`--disk=path=${dataDiskPath},format=qcow2`, // Second disk for LVM
|
||||
`--disk=path=${cloudInitIso},device=cdrom`,
|
||||
`--network=network=${config.network},model=virtio`,
|
||||
"--os-variant=generic",
|
||||
"--import",
|
||||
"--noautoconsole",
|
||||
"--wait=0",
|
||||
];
|
||||
|
||||
run(virtInstallArgs.join(" "));
|
||||
log(`Two-disk VM ${config.name} created`);
|
||||
}
|
||||
|
||||
describe("asahi firstboot LVM integration", () => {
|
||||
let vmIp: string;
|
||||
let sshKeyPath: string;
|
||||
let sshPubKey: string;
|
||||
|
||||
beforeAll(async () => {
|
||||
const keys = findSshKey();
|
||||
sshKeyPath = keys.keyPath;
|
||||
sshPubKey = keys.pubKey;
|
||||
|
||||
log("Setting up test network...");
|
||||
ensureTestNetwork();
|
||||
|
||||
log("Creating two-disk VM...");
|
||||
createTwoDiskVm({
|
||||
name: VM_NAME,
|
||||
memory: VM_MEMORY,
|
||||
vcpus: VM_VCPUS,
|
||||
rootDiskGb: VM_ROOT_DISK_GB,
|
||||
dataDiskGb: VM_DATA_DISK_GB,
|
||||
network: TEST_NETWORK_NAME,
|
||||
cloudImageUrl: FEDORA_CLOUD_IMAGE,
|
||||
sshPubKey,
|
||||
});
|
||||
|
||||
log("Waiting for VM IP...");
|
||||
vmIp = await waitForVmIp(VM_NAME, 120_000);
|
||||
|
||||
log("Waiting for SSH...");
|
||||
await waitForSsh(vmIp, SSH_USER, 180_000, sshKeyPath);
|
||||
|
||||
log("Waiting for cloud-init to finish...");
|
||||
await sshRun(vmIp, SSH_USER, "sudo cloud-init status --wait 2>/dev/null || sleep 30", "cloud-init", { keyPath: sshKeyPath });
|
||||
|
||||
// Verify second disk exists
|
||||
const disks = sshExec(vmIp, SSH_USER, "lsblk -d -n -o NAME,SIZE", { keyPath: sshKeyPath });
|
||||
log(`Disks:\n${disks.stdout}`);
|
||||
}, 300_000);
|
||||
|
||||
afterAll(async () => {
|
||||
log("Cleaning up VM...");
|
||||
destroyVm(VM_NAME);
|
||||
// Also remove data disk
|
||||
try { run(`rm -f "${join(IMAGE_DIR, `${VM_NAME}-data.qcow2`)}"`); } catch { /* ignore */ }
|
||||
});
|
||||
|
||||
it("second disk is visible and unformatted", () => {
|
||||
const result = sshExec(vmIp, SSH_USER, "lsblk -d -n -o NAME,SIZE,TYPE | grep disk", { keyPath: sshKeyPath });
|
||||
const disks = result.stdout.trim().split("\n");
|
||||
expect(disks.length).toBeGreaterThanOrEqual(2);
|
||||
|
||||
// Second disk (vdb) should exist
|
||||
const vdb = sshExec(vmIp, SSH_USER, "sudo blkid /dev/vdb 2>/dev/null; echo exit=$?", { keyPath: sshKeyPath });
|
||||
// Should have no filesystem (blkid returns nothing or non-zero)
|
||||
expect(vdb.stdout).toContain("exit=2");
|
||||
});
|
||||
|
||||
it("firstboot script creates LVM on data disk", async () => {
|
||||
// Generate the firstboot script
|
||||
const script = renderFirstbootScript({
|
||||
hostname: "asahi-test",
|
||||
role: "infra",
|
||||
serverIp: "10.0.0.1",
|
||||
httpPort: 8080,
|
||||
sshKeys: [sshPubKey],
|
||||
adminUser: "testadmin",
|
||||
mac: "52:54:00:aa:bb:cc",
|
||||
});
|
||||
|
||||
// Upload and run
|
||||
log("Uploading firstboot script...");
|
||||
await sshRun(vmIp, SSH_USER,
|
||||
`cat > /tmp/firstboot.sh << 'SCRIPT_EOF'\n${script}\nSCRIPT_EOF\nchmod +x /tmp/firstboot.sh`,
|
||||
"upload script", { keyPath: sshKeyPath });
|
||||
|
||||
log("Running firstboot script...");
|
||||
const result = await sshRun(vmIp, SSH_USER,
|
||||
"sudo /tmp/firstboot.sh 2>&1",
|
||||
"firstboot", { keyPath: sshKeyPath, timeout: 120_000 });
|
||||
|
||||
expect(result).toBe(0);
|
||||
}, 180_000);
|
||||
|
||||
it("SSH still works after firstboot script", () => {
|
||||
const result = sshExec(vmIp, SSH_USER, "echo hello", { keyPath: sshKeyPath });
|
||||
if (result.stdout.trim() !== "hello") {
|
||||
log(`SSH debug: exitCode=${result.exitCode} stdout='${result.stdout}' stderr='${result.stderr}'`);
|
||||
}
|
||||
expect(result.stdout.trim()).toBe("hello");
|
||||
});
|
||||
|
||||
it("volume group labvg exists", () => {
|
||||
const result = sshExec(vmIp, SSH_USER, "sudo vgs labvg --noheadings -o vg_name", { keyPath: sshKeyPath });
|
||||
expect(result.stdout.trim()).toBe("labvg");
|
||||
});
|
||||
|
||||
it("all expected logical volumes exist", () => {
|
||||
const result = sshExec(vmIp, SSH_USER,
|
||||
"sudo lvs labvg --noheadings -o lv_name --sort lv_name",
|
||||
{ keyPath: sshKeyPath });
|
||||
const lvs = result.stdout.trim().split("\n").map(l => l.trim()).sort();
|
||||
expect(lvs).toContain("home");
|
||||
expect(lvs).toContain("longhorn");
|
||||
expect(lvs).toContain("rancher"); // infra role
|
||||
expect(lvs).toContain("srv");
|
||||
expect(lvs).toContain("swap");
|
||||
expect(lvs).toContain("var");
|
||||
expect(lvs).toContain("varlog");
|
||||
});
|
||||
|
||||
it("LV sizes match kickstart layout", () => {
|
||||
const result = sshExec(vmIp, SSH_USER,
|
||||
"sudo lvs labvg --noheadings -o lv_name,lv_size --units m --nosuffix",
|
||||
{ keyPath: sshKeyPath });
|
||||
const lvMap = new Map<string, number>();
|
||||
for (const line of result.stdout.trim().split("\n")) {
|
||||
const [name, size] = line.trim().split(/\s+/);
|
||||
if (name && size) lvMap.set(name, Math.round(parseFloat(size)));
|
||||
}
|
||||
|
||||
expect(lvMap.get("swap")).toBe(27648);
|
||||
expect(lvMap.get("var")).toBe(102400);
|
||||
expect(lvMap.get("varlog")).toBe(10240);
|
||||
expect(lvMap.get("home")).toBe(10240);
|
||||
expect(lvMap.get("srv")).toBe(20480);
|
||||
expect(lvMap.get("rancher")).toBe(20480);
|
||||
// longhorn gets remaining — should be at least 5GB (200GB disk - ~191GB used)
|
||||
expect(lvMap.get("longhorn")).toBeGreaterThan(5000);
|
||||
});
|
||||
|
||||
it("non-var volumes are mounted with XFS", () => {
|
||||
const mounts = sshExec(vmIp, SSH_USER, "mount | grep labvg", { keyPath: sshKeyPath });
|
||||
// /var and /var/log deferred to next reboot (can't migrate live)
|
||||
expect(mounts.stdout).toContain("/home ");
|
||||
expect(mounts.stdout).toContain("/srv ");
|
||||
expect(mounts.stdout).toContain("/var/lib/rancher ");
|
||||
expect(mounts.stdout).toContain("/var/lib/longhorn ");
|
||||
expect(mounts.stdout).toContain("xfs");
|
||||
});
|
||||
|
||||
it("swap is active", () => {
|
||||
const result = sshExec(vmIp, SSH_USER, "swapon --show --noheadings", { keyPath: sshKeyPath });
|
||||
// swapon may show /dev/dm-X or /dev/labvg/swap
|
||||
expect(result.stdout.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("fstab has LVM entries", () => {
|
||||
const result = sshExec(vmIp, SSH_USER, "grep labvg /etc/fstab", { keyPath: sshKeyPath });
|
||||
const lines = result.stdout.trim().split("\n");
|
||||
expect(lines.length).toBeGreaterThanOrEqual(7); // swap + var + varlog + home + srv + rancher + longhorn
|
||||
});
|
||||
|
||||
it("hostname was set", () => {
|
||||
const result = sshExec(vmIp, SSH_USER, "hostname", { keyPath: sshKeyPath });
|
||||
expect(result.stdout.trim()).toBe("asahi-test");
|
||||
});
|
||||
|
||||
it("admin user was created with sudo", () => {
|
||||
const result = sshExec(vmIp, SSH_USER, "sudo id testadmin", { keyPath: sshKeyPath });
|
||||
expect(result.stdout).toContain("testadmin");
|
||||
expect(result.stdout).toContain("wheel");
|
||||
});
|
||||
|
||||
it("provisioning metadata file exists", () => {
|
||||
const result = sshExec(vmIp, SSH_USER, "cat /etc/lab-provisioned", { keyPath: sshKeyPath });
|
||||
expect(result.stdout).toContain("hostname=asahi-test");
|
||||
expect(result.stdout).toContain("role=infra");
|
||||
expect(result.stdout).toContain("method=asahi-firstboot");
|
||||
});
|
||||
|
||||
it("marker file prevents re-run", () => {
|
||||
const result = sshExec(vmIp, SSH_USER, "test -f /etc/lab-lvm-setup-done && echo yes", { keyPath: sshKeyPath });
|
||||
expect(result.stdout.trim()).toBe("yes");
|
||||
});
|
||||
|
||||
// ── Reprovision test ──────────────────────────────────────────────
|
||||
|
||||
it("reprovision: detects existing labvg and re-mounts", async () => {
|
||||
// Write a test file to a preserved LV
|
||||
await sshRun(vmIp, SSH_USER,
|
||||
"echo 'precious-data' | sudo tee /var/lib/rancher/test-preserve.txt",
|
||||
"write test data", { keyPath: sshKeyPath });
|
||||
|
||||
// Remove marker to simulate fresh boot after reinstall
|
||||
await sshRun(vmIp, SSH_USER, "sudo rm /etc/lab-lvm-setup-done", "remove marker", { keyPath: sshKeyPath });
|
||||
|
||||
// Unmount everything (simulate reinstall wiping root)
|
||||
await sshRun(vmIp, SSH_USER, `
|
||||
sudo umount /var/lib/longhorn 2>/dev/null || true
|
||||
sudo umount /var/lib/rancher 2>/dev/null || true
|
||||
sudo umount /srv 2>/dev/null || true
|
||||
sudo umount /home 2>/dev/null || true
|
||||
sudo umount /var/log 2>/dev/null || true
|
||||
# Don't unmount /var — it's in use
|
||||
sudo swapoff /dev/labvg/swap 2>/dev/null || true
|
||||
sudo sed -i '/labvg/d' /etc/fstab
|
||||
`, "unmount LVs", { keyPath: sshKeyPath });
|
||||
|
||||
// Re-run firstboot script — should detect existing VG
|
||||
log("Re-running firstboot (reprovision)...");
|
||||
const result = await sshRun(vmIp, SSH_USER,
|
||||
"sudo /tmp/firstboot.sh 2>&1",
|
||||
"firstboot reprovision", { keyPath: sshKeyPath });
|
||||
expect(result).toBe(0);
|
||||
|
||||
// Verify data was preserved
|
||||
const data = sshExec(vmIp, SSH_USER, "cat /var/lib/rancher/test-preserve.txt", { keyPath: sshKeyPath });
|
||||
expect(data.stdout.trim()).toBe("precious-data");
|
||||
|
||||
// Verify marker was re-created
|
||||
const marker = sshExec(vmIp, SSH_USER, "test -f /etc/lab-lvm-setup-done && echo yes", { keyPath: sshKeyPath });
|
||||
expect(marker.stdout.trim()).toBe("yes");
|
||||
|
||||
// Verify fstab was re-populated
|
||||
const fstab = sshExec(vmIp, SSH_USER, "grep labvg /etc/fstab", { keyPath: sshKeyPath });
|
||||
expect(fstab.stdout).toContain("/var/lib/rancher");
|
||||
}, 60_000);
|
||||
});
|
||||
353
bastion/tests/integration/asahi-validate.test.ts
Normal file
353
bastion/tests/integration/asahi-validate.test.ts
Normal file
@@ -0,0 +1,353 @@
|
||||
// Validation tests for Asahi provisioning artifacts.
|
||||
//
|
||||
// Tests that can run WITHOUT Apple Silicon hardware:
|
||||
// 1. Shellcheck the generated firstboot script
|
||||
// 2. Verify the built rootfs ZIP structure
|
||||
// 3. Mount the rootfs and verify injected files
|
||||
// 4. Validate installer_data.json against the Asahi installer's Python parser
|
||||
// 5. Verify partition layout arithmetic
|
||||
//
|
||||
// Prerequisites:
|
||||
// - Run scripts/build-asahi-rootfs.sh first (creates asahi-repo/)
|
||||
// - shellcheck installed (dnf install ShellCheck)
|
||||
// - python3 installed
|
||||
// - root for loop mount (sudo)
|
||||
//
|
||||
// Run: sudo pnpm run test:integration:asahi-validate
|
||||
|
||||
import { describe, it, expect, beforeAll, afterAll } from "vitest";
|
||||
import { existsSync, lstatSync, readFileSync, writeFileSync, mkdirSync, rmSync } from "node:fs";
|
||||
import { execSync, spawnSync } from "node:child_process";
|
||||
import { join } from "node:path";
|
||||
import { tmpdir } from "node:os";
|
||||
import { renderFirstbootScript } from "../../src/bastion/src/templates/asahi-firstboot.sh.js";
|
||||
|
||||
const PROJECT_ROOT = join(import.meta.dirname, "..", "..");
|
||||
const ASAHI_REPO = join(PROJECT_ROOT, "asahi-repo");
|
||||
const ASAHI_CACHE = join(PROJECT_ROOT, ".asahi-cache");
|
||||
const IS_ROOT = process.getuid?.() === 0;
|
||||
|
||||
function run(cmd: string, opts?: { timeout?: number }): string {
|
||||
const full = IS_ROOT ? cmd : `sudo ${cmd}`;
|
||||
return execSync(full, { encoding: "utf-8", stdio: "pipe", timeout: opts?.timeout ?? 60_000 });
|
||||
}
|
||||
|
||||
function hasBuiltArtifacts(): boolean {
|
||||
return existsSync(join(ASAHI_REPO, "fedora-asahi-lab.zip")) &&
|
||||
existsSync(join(ASAHI_REPO, "installer_data.json"));
|
||||
}
|
||||
|
||||
describe("asahi script validation", () => {
|
||||
it("firstboot script passes shellcheck", () => {
|
||||
const script = renderFirstbootScript({
|
||||
hostname: "test-node",
|
||||
role: "infra",
|
||||
serverIp: "10.0.0.1",
|
||||
httpPort: 8080,
|
||||
sshKeys: ["ssh-ed25519 AAAA... user@host"],
|
||||
adminUser: "testadmin",
|
||||
mac: "aa:bb:cc:dd:ee:ff",
|
||||
});
|
||||
|
||||
const tmpFile = join(tmpdir(), `asahi-shellcheck-${Date.now()}.sh`);
|
||||
writeFileSync(tmpFile, script);
|
||||
|
||||
try {
|
||||
const result = spawnSync("shellcheck", [
|
||||
"-s", "bash",
|
||||
"-e", "SC2086,SC2164", // allow unquoted variables (intentional in some LVM commands)
|
||||
tmpFile,
|
||||
], { encoding: "utf-8", stdio: "pipe", timeout: 30_000 });
|
||||
|
||||
if (result.status !== 0) {
|
||||
console.log("Shellcheck warnings/errors:");
|
||||
console.log(result.stdout);
|
||||
}
|
||||
// Allow warnings (exit 1 for warnings), fail on errors (exit 2+)
|
||||
expect(result.status).toBeLessThan(2);
|
||||
} finally {
|
||||
try { rmSync(tmpFile); } catch { /* ignore */ }
|
||||
}
|
||||
});
|
||||
|
||||
it("firstboot script for worker role passes shellcheck", () => {
|
||||
const script = renderFirstbootScript({
|
||||
hostname: "worker-node",
|
||||
role: "worker",
|
||||
serverIp: "10.0.0.1",
|
||||
httpPort: 8080,
|
||||
sshKeys: [],
|
||||
adminUser: "michal",
|
||||
mac: "00:11:22:33:44:55",
|
||||
});
|
||||
|
||||
const tmpFile = join(tmpdir(), `asahi-shellcheck-worker-${Date.now()}.sh`);
|
||||
writeFileSync(tmpFile, script);
|
||||
|
||||
try {
|
||||
const result = spawnSync("shellcheck", ["-s", "bash", "-e", "SC2086,SC2164", tmpFile],
|
||||
{ encoding: "utf-8", stdio: "pipe", timeout: 30_000 });
|
||||
if (result.status !== 0) console.log(result.stdout);
|
||||
expect(result.status).toBeLessThan(2);
|
||||
} finally {
|
||||
try { rmSync(tmpFile); } catch { /* ignore */ }
|
||||
}
|
||||
});
|
||||
|
||||
it("firstboot script for vanilla role passes shellcheck", () => {
|
||||
const script = renderFirstbootScript({
|
||||
hostname: "vanilla-node",
|
||||
role: "vanilla",
|
||||
serverIp: "10.0.0.1",
|
||||
httpPort: 8080,
|
||||
sshKeys: ["ssh-rsa AAAA... user@host"],
|
||||
adminUser: "admin",
|
||||
mac: "ff:ee:dd:cc:bb:aa",
|
||||
});
|
||||
|
||||
const tmpFile = join(tmpdir(), `asahi-shellcheck-vanilla-${Date.now()}.sh`);
|
||||
writeFileSync(tmpFile, script);
|
||||
|
||||
try {
|
||||
const result = spawnSync("shellcheck", ["-s", "bash", "-e", "SC2086,SC2164", tmpFile],
|
||||
{ encoding: "utf-8", stdio: "pipe", timeout: 30_000 });
|
||||
if (result.status !== 0) console.log(result.stdout);
|
||||
expect(result.status).toBeLessThan(2);
|
||||
} finally {
|
||||
try { rmSync(tmpFile); } catch { /* ignore */ }
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("asahi installer_data.json validation", () => {
|
||||
let installerData: Record<string, unknown>;
|
||||
|
||||
beforeAll(() => {
|
||||
if (!hasBuiltArtifacts()) {
|
||||
throw new Error("Run scripts/build-asahi-rootfs.sh first to generate artifacts");
|
||||
}
|
||||
installerData = JSON.parse(readFileSync(join(ASAHI_REPO, "installer_data.json"), "utf-8"));
|
||||
});
|
||||
|
||||
it("has os_list with one entry", () => {
|
||||
const osList = installerData["os_list"] as unknown[];
|
||||
expect(osList).toBeInstanceOf(Array);
|
||||
expect(osList.length).toBe(1);
|
||||
});
|
||||
|
||||
it("has required top-level fields", () => {
|
||||
const os = (installerData["os_list"] as Record<string, unknown>[])[0]!;
|
||||
expect(os["name"]).toBeDefined();
|
||||
expect(os["default_os_name"]).toBeDefined();
|
||||
expect(os["boot_object"]).toBeDefined();
|
||||
expect(os["next_object"]).toBeDefined();
|
||||
expect(os["package"]).toBe("fedora-asahi-lab.zip");
|
||||
expect(os["supported_fw"]).toBeInstanceOf(Array);
|
||||
expect((os["supported_fw"] as string[]).length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("has 4 partitions (EFI + Boot + Root + Data)", () => {
|
||||
const os = (installerData["os_list"] as Record<string, unknown>[])[0]!;
|
||||
const partitions = os["partitions"] as Record<string, unknown>[];
|
||||
expect(partitions).toHaveLength(4);
|
||||
expect(partitions[0]!["name"]).toBe("EFI");
|
||||
expect(partitions[1]!["name"]).toBe("Boot");
|
||||
expect(partitions[2]!["name"]).toBe("Root");
|
||||
expect(partitions[3]!["name"]).toBe("Data");
|
||||
});
|
||||
|
||||
it("EFI partition has correct format", () => {
|
||||
const os = (installerData["os_list"] as Record<string, unknown>[])[0]!;
|
||||
const efi = (os["partitions"] as Record<string, unknown>[])[0]!;
|
||||
expect(efi["type"]).toBe("EFI");
|
||||
expect(efi["format"]).toBe("fat");
|
||||
expect(efi["copy_firmware"]).toBe(true);
|
||||
// Size should be ~500MB in bytes
|
||||
const size = parseInt(String(efi["size"]).replace("B", ""), 10);
|
||||
expect(size).toBeGreaterThanOrEqual(500 * 1024 * 1024);
|
||||
});
|
||||
|
||||
it("Boot partition references boot.img", () => {
|
||||
const os = (installerData["os_list"] as Record<string, unknown>[])[0]!;
|
||||
const boot = (os["partitions"] as Record<string, unknown>[])[1]!;
|
||||
expect(boot["type"]).toBe("Linux");
|
||||
expect(boot["image"]).toBe("boot.img");
|
||||
});
|
||||
|
||||
it("Root partition does NOT expand", () => {
|
||||
const os = (installerData["os_list"] as Record<string, unknown>[])[0]!;
|
||||
const root = (os["partitions"] as Record<string, unknown>[])[2]!;
|
||||
expect(root["type"]).toBe("Linux");
|
||||
expect(root["image"]).toBe("root.img");
|
||||
expect(root["expand"]).toBe(false);
|
||||
});
|
||||
|
||||
it("Data partition expands for LVM", () => {
|
||||
const os = (installerData["os_list"] as Record<string, unknown>[])[0]!;
|
||||
const data = (os["partitions"] as Record<string, unknown>[])[3]!;
|
||||
expect(data["type"]).toBe("Linux");
|
||||
expect(data["expand"]).toBe(true);
|
||||
expect(data["image"]).toBeUndefined(); // No image — empty partition for LVM
|
||||
});
|
||||
|
||||
it("partition sizes use bytes format (NB suffix)", () => {
|
||||
const os = (installerData["os_list"] as Record<string, unknown>[])[0]!;
|
||||
const partitions = os["partitions"] as Record<string, unknown>[];
|
||||
for (const p of partitions) {
|
||||
const size = String(p["size"]);
|
||||
expect(size).toMatch(/^\d+B$/);
|
||||
}
|
||||
});
|
||||
|
||||
it("validates against Asahi installer Python parser", () => {
|
||||
// Download the Asahi installer and run its validation logic on our config
|
||||
const validation = spawnSync("python3", ["-c", `
|
||||
import json, sys
|
||||
|
||||
with open("${join(ASAHI_REPO, "installer_data.json")}") as f:
|
||||
data = json.load(f)
|
||||
|
||||
errors = []
|
||||
os_list = data.get("os_list", [])
|
||||
if not os_list:
|
||||
errors.append("Empty os_list")
|
||||
|
||||
for os_entry in os_list:
|
||||
required = ["name", "default_os_name", "boot_object", "next_object", "package", "supported_fw", "partitions"]
|
||||
for field in required:
|
||||
if field not in os_entry:
|
||||
errors.append(f"Missing field: {field}")
|
||||
|
||||
partitions = os_entry.get("partitions", [])
|
||||
if not partitions:
|
||||
errors.append("No partitions defined")
|
||||
|
||||
has_efi = False
|
||||
has_root_image = False
|
||||
expand_count = 0
|
||||
for p in partitions:
|
||||
if "name" not in p or "type" not in p or "size" not in p:
|
||||
errors.append(f"Partition missing name/type/size: {p}")
|
||||
if p.get("type") == "EFI":
|
||||
has_efi = True
|
||||
if p.get("format") != "fat":
|
||||
errors.append("EFI partition must be FAT format")
|
||||
if p.get("image"):
|
||||
has_root_image = True
|
||||
if p.get("expand"):
|
||||
expand_count += 1
|
||||
# Validate size format
|
||||
size_str = str(p.get("size", ""))
|
||||
if not size_str.endswith("B") or not size_str[:-1].isdigit():
|
||||
errors.append(f"Invalid size format: {size_str} (expected NB)")
|
||||
|
||||
if not has_efi:
|
||||
errors.append("No EFI partition found")
|
||||
if not has_root_image:
|
||||
errors.append("No partition with root image found")
|
||||
if expand_count > 1:
|
||||
errors.append(f"Multiple expanding partitions ({expand_count}) — only one should expand")
|
||||
|
||||
# Verify supported_fw is a list of strings
|
||||
fw = os_entry.get("supported_fw", [])
|
||||
if not isinstance(fw, list) or not all(isinstance(v, str) for v in fw):
|
||||
errors.append("supported_fw must be a list of strings")
|
||||
|
||||
if errors:
|
||||
print("ERRORS:")
|
||||
for e in errors:
|
||||
print(f" - {e}")
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("OK: installer_data.json is valid")
|
||||
`], { encoding: "utf-8", stdio: "pipe", timeout: 10_000 });
|
||||
|
||||
if (validation.status !== 0) {
|
||||
console.log(validation.stdout);
|
||||
console.log(validation.stderr);
|
||||
}
|
||||
expect(validation.stdout).toContain("OK");
|
||||
expect(validation.status).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("asahi rootfs ZIP validation", () => {
|
||||
beforeAll(() => {
|
||||
if (!hasBuiltArtifacts()) {
|
||||
throw new Error("Run scripts/build-asahi-rootfs.sh first to generate artifacts");
|
||||
}
|
||||
});
|
||||
|
||||
it("ZIP contains required files", () => {
|
||||
const result = spawnSync("unzip", ["-l", join(ASAHI_REPO, "fedora-asahi-lab.zip")],
|
||||
{ encoding: "utf-8", stdio: "pipe", timeout: 10_000 });
|
||||
expect(result.stdout).toContain("boot.img");
|
||||
expect(result.stdout).toContain("root.img");
|
||||
expect(result.stdout).toContain("esp/");
|
||||
});
|
||||
|
||||
it("boot.img is ~1GB", () => {
|
||||
const result = spawnSync("unzip", ["-l", join(ASAHI_REPO, "fedora-asahi-lab.zip")],
|
||||
{ encoding: "utf-8", stdio: "pipe", timeout: 10_000 });
|
||||
const bootLine = result.stdout.split("\n").find(l => l.includes("boot.img") && !l.includes("/"));
|
||||
expect(bootLine).toBeDefined();
|
||||
const size = parseInt(bootLine!.trim().split(/\s+/)[0]!, 10);
|
||||
expect(size).toBeGreaterThan(500 * 1024 * 1024); // > 500MB
|
||||
expect(size).toBeLessThan(2 * 1024 * 1024 * 1024); // < 2GB
|
||||
});
|
||||
|
||||
it("root.img is > 3GB", () => {
|
||||
const result = spawnSync("unzip", ["-l", join(ASAHI_REPO, "fedora-asahi-lab.zip")],
|
||||
{ encoding: "utf-8", stdio: "pipe", timeout: 10_000 });
|
||||
const rootLine = result.stdout.split("\n").find(l => l.includes("root.img"));
|
||||
expect(rootLine).toBeDefined();
|
||||
const size = parseInt(rootLine!.trim().split(/\s+/)[0]!, 10);
|
||||
expect(size).toBeGreaterThan(3 * 1024 * 1024 * 1024); // > 3GB
|
||||
});
|
||||
|
||||
it("rootfs contains lab-firstboot.sh", () => {
|
||||
const mountDir = join(tmpdir(), `asahi-rootfs-check-${Date.now()}`);
|
||||
const extractDir = join(tmpdir(), `asahi-rootfs-extract-${Date.now()}`);
|
||||
mkdirSync(mountDir);
|
||||
mkdirSync(extractDir);
|
||||
|
||||
try {
|
||||
// Extract root.img from ZIP
|
||||
run(`unzip -o -j "${join(ASAHI_REPO, "fedora-asahi-lab.zip")}" root.img -d "${extractDir}"`);
|
||||
|
||||
// Mount and check
|
||||
run(`mount -o loop,ro "${join(extractDir, "root.img")}" "${mountDir}"`);
|
||||
|
||||
// Verify firstboot script
|
||||
expect(existsSync(join(mountDir, "usr/local/bin/lab-firstboot.sh"))).toBe(true);
|
||||
const script = readFileSync(join(mountDir, "usr/local/bin/lab-firstboot.sh"), "utf-8");
|
||||
expect(script).toContain("#!/bin/bash");
|
||||
expect(script).toContain("labvg");
|
||||
expect(script).toContain("pvcreate");
|
||||
|
||||
// Verify systemd service
|
||||
expect(existsSync(join(mountDir, "etc/systemd/system/lab-firstboot.service"))).toBe(true);
|
||||
const service = readFileSync(join(mountDir, "etc/systemd/system/lab-firstboot.service"), "utf-8");
|
||||
expect(service).toContain("lab-firstboot.sh");
|
||||
|
||||
// Verify service is enabled (symlink exists)
|
||||
const symlinkPath = join(mountDir, "etc/systemd/system/multi-user.target.wants/lab-firstboot.service");
|
||||
let symlinkExists = false;
|
||||
try { lstatSync(symlinkPath); symlinkExists = true; } catch { /* not found */ }
|
||||
expect(symlinkExists).toBe(true);
|
||||
|
||||
// Verify SSH keys
|
||||
expect(existsSync(join(mountDir, "root/.ssh/authorized_keys"))).toBe(true);
|
||||
|
||||
// Verify lvm2 + xfsprogs are in the image
|
||||
const hasLvm = existsSync(join(mountDir, "usr/bin/pvcreate")) || existsSync(join(mountDir, "usr/sbin/pvcreate"));
|
||||
const hasXfs = existsSync(join(mountDir, "usr/bin/mkfs.xfs")) || existsSync(join(mountDir, "usr/sbin/mkfs.xfs"));
|
||||
expect(hasLvm).toBe(true);
|
||||
expect(hasXfs).toBe(true);
|
||||
} finally {
|
||||
run(`umount "${mountDir}" 2>/dev/null || true`);
|
||||
rmSync(mountDir, { recursive: true, force: true });
|
||||
rmSync(extractDir, { recursive: true, force: true });
|
||||
}
|
||||
}, 120_000);
|
||||
});
|
||||
@@ -224,11 +224,12 @@ describe("PXE boot provisioning", () => {
|
||||
// Generate dnsmasq config
|
||||
generateDnsmasqConf(config);
|
||||
|
||||
// Start HTTP server
|
||||
const { app, state } = createApp(config);
|
||||
// Start HTTP server + syslog listener
|
||||
const { app, state, syslog } = createApp(config);
|
||||
bastionApp = app;
|
||||
await app.listen({ port: config.httpPort, host: "0.0.0.0" });
|
||||
log(`Bastion HTTP server listening on :${HTTP_PORT}`);
|
||||
syslog.start();
|
||||
log(`Bastion HTTP server listening on :${HTTP_PORT}, syslog on UDP :${config.syslogPort}`);
|
||||
|
||||
// Start dnsmasq (fire-and-forget — it runs until killed)
|
||||
// May fail without root (DHCP socket needs CAP_NET_BIND_SERVICE); libvirt network provides DHCP fallback
|
||||
@@ -387,8 +388,8 @@ describe("PXE boot provisioning", () => {
|
||||
expect(data.progress).toBe("complete");
|
||||
});
|
||||
|
||||
it.skip("log lines were captured", async () => {
|
||||
// Requires log streamer in %post — skipped until re-added
|
||||
it("syslog install logs were captured", async () => {
|
||||
// Anaconda forwards logs via syslog (logging --host directive in kickstart)
|
||||
const res = await fetch(`http://${BASTION_IP}:${HTTP_PORT}/api/logs/${encodeURIComponent(vmMac)}`);
|
||||
const data = (await res.json()) as { log_total?: number; log_lines?: Array<{ line: string }> };
|
||||
expect(data.log_total).toBeGreaterThan(0);
|
||||
|
||||
Reference in New Issue
Block a user