feat: PXE debug boot mode for rescue/diagnostics #4
@@ -29,43 +29,49 @@ _labctl() {
|
||||
COMPREPLY=($(compgen -W "--dir -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"init bastion standalone status")
|
||||
COMPREPLY=($(compgen -W "--dir --port -h --help" -- "$cur"))
|
||||
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
|
||||
return ;;
|
||||
"init bastion standalone")
|
||||
COMPREPLY=($(compgen -W "start stop status -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"app labcontroller deploy")
|
||||
COMPREPLY=($(compgen -W "--user --port --crdb-replicas -h --help" -- "$cur"))
|
||||
COMPREPLY=($(compgen -W "--user --crdb-replicas -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"app labcontroller status")
|
||||
COMPREPLY=($(compgen -W "--user --port -h --help" -- "$cur"))
|
||||
COMPREPLY=($(compgen -W "--user -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"app k3s install")
|
||||
COMPREPLY=($(compgen -W "--role --user --port --k3s-server --k3s-token -h --help" -- "$cur"))
|
||||
COMPREPLY=($(compgen -W "--role --user --k3s-server --k3s-token -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"app k3s health")
|
||||
COMPREPLY=($(compgen -W "--user --port -h --help" -- "$cur"))
|
||||
COMPREPLY=($(compgen -W "--user -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"app k3s list")
|
||||
COMPREPLY=($(compgen -W "--user --port -h --help" -- "$cur"))
|
||||
COMPREPLY=($(compgen -W "--user -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"init bastion")
|
||||
COMPREPLY=($(compgen -W "standalone -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"provision list")
|
||||
COMPREPLY=($(compgen -W "--port -h --help" -- "$cur"))
|
||||
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
|
||||
return ;;
|
||||
"provision install")
|
||||
COMPREPLY=($(compgen -W "--role --os --disk --port -h --help" -- "$cur"))
|
||||
COMPREPLY=($(compgen -W "--role --os --disk -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"provision reprovision")
|
||||
COMPREPLY=($(compgen -W "--role --os --disk --port -h --help" -- "$cur"))
|
||||
COMPREPLY=($(compgen -W "--role --os --disk -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"provision debug")
|
||||
COMPREPLY=($(compgen -W "--pxe-boot -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"provision forget")
|
||||
COMPREPLY=($(compgen -W "--port -h --help" -- "$cur"))
|
||||
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
|
||||
return ;;
|
||||
"provision logs")
|
||||
COMPREPLY=($(compgen -W "-f --follow --port -h --help" -- "$cur"))
|
||||
COMPREPLY=($(compgen -W "-f --follow -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"provision makeiso")
|
||||
COMPREPLY=($(compgen -W "--arch --local --out -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"config list")
|
||||
COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
|
||||
@@ -92,7 +98,7 @@ _labctl() {
|
||||
COMPREPLY=($(compgen -W "bastion -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"provision")
|
||||
COMPREPLY=($(compgen -W "list install reprovision forget logs -h --help" -- "$cur"))
|
||||
COMPREPLY=($(compgen -W "list install reprovision debug forget logs makeiso -h --help" -- "$cur"))
|
||||
return ;;
|
||||
"config")
|
||||
COMPREPLY=($(compgen -W "list get set path -h --help" -- "$cur"))
|
||||
|
||||
@@ -118,38 +118,35 @@ complete -c labctl -n "__labctl_in_cmd init bastion standalone start" -l foregro
|
||||
# init bastion standalone stop options
|
||||
complete -c labctl -n "__labctl_in_cmd init bastion standalone stop" -l dir -d 'Bastion data directory' -x
|
||||
|
||||
# init bastion standalone status options
|
||||
complete -c labctl -n "__labctl_in_cmd init bastion standalone status" -l dir -d 'Bastion data directory' -x
|
||||
complete -c labctl -n "__labctl_in_cmd init bastion standalone status" -l port -d 'Bastion HTTP port' -x
|
||||
|
||||
# provision subcommands
|
||||
complete -c labctl -n "__labctl_using_cmd provision" -a list -d 'List all known machines'
|
||||
complete -c labctl -n "__labctl_using_cmd provision" -a install -d 'Queue a discovered machine for OS installation'
|
||||
complete -c labctl -n "__labctl_using_cmd provision" -a reprovision -d 'Queue install + SSH reboot into PXE (target: hostname, MAC, or IP)'
|
||||
complete -c labctl -n "__labctl_using_cmd provision" -a debug -d 'PXE boot into Fedora rescue mode for debugging (target: hostname, MAC, or IP)'
|
||||
complete -c labctl -n "__labctl_using_cmd provision" -a forget -d 'Remove a machine from bastion state'
|
||||
complete -c labctl -n "__labctl_using_cmd provision" -a logs -d 'Show provisioning logs for a machine (hostname, MAC, or IP)'
|
||||
|
||||
# provision list options
|
||||
complete -c labctl -n "__labctl_in_cmd provision list" -l port -d 'Bastion HTTP port' -x
|
||||
complete -c labctl -n "__labctl_using_cmd provision" -a makeiso -d 'Generate a UEFI-bootable iPXE ISO for network provisioning'
|
||||
|
||||
# provision install options
|
||||
complete -c labctl -n "__labctl_in_cmd provision install" -l role -d 'Machine role (see below)' -xa 'vanilla worker infra labcontroller'
|
||||
complete -c labctl -n "__labctl_in_cmd provision install" -l os -d 'Operating system' -xa 'fedora-43 ubuntu-26.04'
|
||||
complete -c labctl -n "__labctl_in_cmd provision install" -l disk -d 'Target disk device (auto-detect if omitted)' -x
|
||||
complete -c labctl -n "__labctl_in_cmd provision install" -l port -d 'Bastion HTTP port' -x
|
||||
|
||||
# provision reprovision options
|
||||
complete -c labctl -n "__labctl_in_cmd provision reprovision" -l role -d 'Machine role (see below)' -xa 'vanilla worker infra labcontroller'
|
||||
complete -c labctl -n "__labctl_in_cmd provision reprovision" -l os -d 'Operating system' -xa 'fedora-43 ubuntu-26.04'
|
||||
complete -c labctl -n "__labctl_in_cmd provision reprovision" -l disk -d 'Target disk device (auto-detect if omitted)' -x
|
||||
complete -c labctl -n "__labctl_in_cmd provision reprovision" -l port -d 'Bastion HTTP port' -x
|
||||
|
||||
# provision forget options
|
||||
complete -c labctl -n "__labctl_in_cmd provision forget" -l port -d 'Bastion HTTP port' -x
|
||||
# provision debug options
|
||||
complete -c labctl -n "__labctl_in_cmd provision debug" -l pxe-boot -d 'Boot installed system via PXE (kernel+initrd from network, root from NVMe)'
|
||||
|
||||
# provision logs options
|
||||
complete -c labctl -n "__labctl_in_cmd provision logs" -s f -l follow -d 'Follow logs in real-time (SSE stream)'
|
||||
complete -c labctl -n "__labctl_in_cmd provision logs" -l port -d 'Bastion HTTP port' -x
|
||||
complete -c labctl -n "__labctl_in_cmd provision logs" -s f -l follow -d 'Follow log output in real-time'
|
||||
|
||||
# provision makeiso options
|
||||
complete -c labctl -n "__labctl_in_cmd provision makeiso" -l arch -d 'Target architecture(s)' -xa 'x86_64 aarch64'
|
||||
complete -c labctl -n "__labctl_in_cmd provision makeiso" -l local -d 'Build ISO locally instead of using bastion-hosted URL'
|
||||
complete -c labctl -n "__labctl_in_cmd provision makeiso" -l out -d 'Output path for local ISO build' -x
|
||||
|
||||
# config subcommands
|
||||
complete -c labctl -n "__labctl_using_cmd config" -a list -d 'Show all configuration values'
|
||||
@@ -173,12 +170,10 @@ complete -c labctl -n "__labctl_using_cmd app labcontroller" -a status -d 'Check
|
||||
|
||||
# app labcontroller deploy options
|
||||
complete -c labctl -n "__labctl_in_cmd app labcontroller deploy" -l user -d 'SSH user' -x
|
||||
complete -c labctl -n "__labctl_in_cmd app labcontroller deploy" -l port -d 'Bastion HTTP port' -x
|
||||
complete -c labctl -n "__labctl_in_cmd app labcontroller deploy" -l crdb-replicas -d 'CockroachDB replicas' -x
|
||||
|
||||
# app labcontroller status options
|
||||
complete -c labctl -n "__labctl_in_cmd app labcontroller status" -l user -d 'SSH user' -x
|
||||
complete -c labctl -n "__labctl_in_cmd app labcontroller status" -l port -d 'Bastion HTTP port' -x
|
||||
|
||||
# app k3s subcommands
|
||||
complete -c labctl -n "__labctl_using_cmd app k3s" -a install -d 'Install k3s on a target machine (hostname, IP, or MAC)'
|
||||
@@ -188,15 +183,12 @@ complete -c labctl -n "__labctl_using_cmd app k3s" -a list -d 'List installed ma
|
||||
# app k3s install options
|
||||
complete -c labctl -n "__labctl_in_cmd app k3s install" -l role -d 'k3s role: infra (server) or worker (agent)' -x
|
||||
complete -c labctl -n "__labctl_in_cmd app k3s install" -l user -d 'SSH user' -x
|
||||
complete -c labctl -n "__labctl_in_cmd app k3s install" -l port -d 'Bastion HTTP port (for resolving target)' -x
|
||||
complete -c labctl -n "__labctl_in_cmd app k3s install" -l k3s-server -d 'k3s server URL (required for worker role)' -x
|
||||
complete -c labctl -n "__labctl_in_cmd app k3s install" -l k3s-token -d 'k3s join token (required for worker role)' -x
|
||||
|
||||
# app k3s health options
|
||||
complete -c labctl -n "__labctl_in_cmd app k3s health" -l user -d 'SSH user' -x
|
||||
complete -c labctl -n "__labctl_in_cmd app k3s health" -l port -d 'Bastion HTTP port' -x
|
||||
|
||||
# app k3s list options
|
||||
complete -c labctl -n "__labctl_in_cmd app k3s list" -l user -d 'SSH user' -x
|
||||
complete -c labctl -n "__labctl_in_cmd app k3s list" -l port -d 'Bastion HTTP port' -x
|
||||
|
||||
|
||||
103
bastion/docs/kickstart-reference.md
Normal file
103
bastion/docs/kickstart-reference.md
Normal file
@@ -0,0 +1,103 @@
|
||||
# Kickstart Reference — Lessons Learned
|
||||
|
||||
This documents pitfalls discovered during PXE boot testing. Read before modifying
|
||||
the kickstart template (`src/bastion/src/templates/install.ks.ts`).
|
||||
|
||||
## Package requirements
|
||||
|
||||
### `kernel-modules` is mandatory
|
||||
|
||||
`@core` only installs `kernel-modules-core`, which lacks common modules like `vfat`,
|
||||
`zram`, and many network/filesystem drivers. Without `kernel-modules`:
|
||||
|
||||
- `/boot/efi` (FAT32) cannot mount → `systemd-remount-fs` fails → **root stays
|
||||
read-only** → sshd-keygen can't write host keys → SSH unreachable
|
||||
- `zram-generator` fails → can trigger emergency mode
|
||||
|
||||
**Always include `kernel-modules` in %packages.** This matches what the real
|
||||
labmaster (192.168.8.11) has installed.
|
||||
|
||||
Regression introduced in commit `fac14b6` which removed `@server-product`
|
||||
(that group pulled in `kernel-modules` via `fedora-release-server`).
|
||||
|
||||
### `dosfstools` is needed
|
||||
|
||||
Provides `mkfs.vfat` and ensures FAT filesystem support is available. The real
|
||||
labmaster has it installed.
|
||||
|
||||
### Verify against the real machine
|
||||
|
||||
Before changing the package list, SSH to the labmaster and compare:
|
||||
```bash
|
||||
ssh 192.168.8.11 "rpm -q <package>"
|
||||
```
|
||||
|
||||
## Anaconda %post execution order
|
||||
|
||||
This is critical and not well documented:
|
||||
|
||||
1. `%pre` scripts run
|
||||
2. Disk partitioning and formatting
|
||||
3. Package installation
|
||||
4. **Anaconda writes system config (fstab, hostname, etc.)**
|
||||
5. `%post` scripts run (in chroot of installed system)
|
||||
6. `%post --nochroot` scripts run
|
||||
7. **Anaconda MAY overwrite fstab again after %post scripts**
|
||||
|
||||
**Consequence:** You cannot reliably modify `/etc/fstab` from `%post` or
|
||||
`%post --nochroot`. Anaconda overwrites it. Tested and confirmed — both
|
||||
`sed` in %post and %post --nochroot had no effect on the final fstab.
|
||||
|
||||
What DOES work from %post:
|
||||
- Writing files to `/etc/` (systemd units, config files, SSH keys)
|
||||
- Enabling/disabling systemd services
|
||||
- Installing additional packages
|
||||
- Running `systemctl enable/mask`
|
||||
|
||||
What does NOT work from %post:
|
||||
- Modifying `/etc/fstab` (Anaconda overwrites it)
|
||||
- `--fsoptions` on `part /boot/efi` (Anaconda ignores it for EFI partitions)
|
||||
|
||||
## UEFI / EFI partition
|
||||
|
||||
- Anaconda always creates an EFI System Partition for UEFI installs
|
||||
- The EFI partition is FAT32 — requires `vfat` kernel module to mount
|
||||
- If `/boot/efi` fails to mount, `systemd-remount-fs` fails, which leaves
|
||||
root as read-only. This cascades to break ALL services that need to write
|
||||
- The EFI partition is used by firmware directly for bootloader — the OS
|
||||
doesn't strictly need it mounted, but Anaconda adds it to fstab
|
||||
|
||||
## VM-specific issues (libvirt/QEMU/OVMF)
|
||||
|
||||
### iPXE exit behavior
|
||||
- `exit` (no args) returns EFI_SUCCESS → OVMF retries PXE, never reaches disk
|
||||
- `exit 1` returns EFI_ABORTED → OVMF moves to next boot device (disk)
|
||||
- VM boot order needs both `network` and `hd`: `--boot=uefi,network,hd`
|
||||
|
||||
### nftables
|
||||
- libvirt creates reject rules for NAT networks in table `ip libvirt_network`
|
||||
(NOT `inet libvirt` — this wrong table name cost hours of debugging)
|
||||
- These rules block new host→VM connections (SSH)
|
||||
- Rules are recreated on every `virsh start` — must delete after each VM restart
|
||||
- Chains: `guest_input` and `guest_output`
|
||||
|
||||
### Serial console
|
||||
- VM serial port: `--serial=tcp,host=127.0.0.1:4555,mode=bind,protocol=telnet`
|
||||
- Use `virsh console <vm-name>` for interactive access (handles telnet protocol)
|
||||
- Raw `socat` works for reading but pagers/readline break interactive use
|
||||
- Add `console=ttyS0,115200n8` to kernel args for boot output on serial
|
||||
|
||||
### SELinux on labmaster
|
||||
- Set to **permissive** — this is for k3s/kubernetes, NOT because SSH needs it
|
||||
- SSH works fine with SELinux enforcing on a properly installed Fedora system
|
||||
- The `ld.so.cache` AVC denials seen during debugging were caused by the
|
||||
read-only root filesystem, not by SELinux policy
|
||||
|
||||
## Testing checklist
|
||||
|
||||
Before merging kickstart changes:
|
||||
1. Check the real labmaster has the same packages: `ssh 192.168.8.11 "rpm -q <pkg>"`
|
||||
2. Run the PXE integration test: `sudo pnpm run test:integration:pxe`
|
||||
3. Verify via serial console (root / `lab-root-pw`) if SSH fails
|
||||
4. Check `mount | grep " / "` — must show `rw`, not `ro`
|
||||
5. Check `systemctl --failed` — no critical failures
|
||||
74
bastion/scripts/deploy.sh
Normal file
74
bastion/scripts/deploy.sh
Normal file
@@ -0,0 +1,74 @@
|
||||
#!/bin/bash
|
||||
# Deploy bastion + labd to k3s cluster and install labctl locally.
|
||||
# Usage: ./scripts/deploy.sh [bastion|labd|labctl|all]
|
||||
#
|
||||
# Builds container images with existing build scripts, pushes to Gitea
|
||||
# registry, restarts k3s pods, and builds/installs labctl RPM.
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
|
||||
cd "$PROJECT_DIR"
|
||||
|
||||
# Load .env if present
|
||||
if [ -f .env ]; then
|
||||
set -a; source .env; set +a
|
||||
fi
|
||||
|
||||
deploy_bastion() {
|
||||
echo "=== Building & pushing bastion image ==="
|
||||
bash scripts/build-bastion.sh --push latest
|
||||
echo ""
|
||||
echo "=== Restarting bastion pod ==="
|
||||
kubectl rollout restart deployment/bastion -n lab-infra
|
||||
kubectl rollout status deployment/bastion -n lab-infra --timeout=180s
|
||||
echo "✓ Bastion deployed"
|
||||
}
|
||||
|
||||
deploy_labd() {
|
||||
echo "=== Building & pushing labd image ==="
|
||||
bash scripts/build-labd.sh --push latest
|
||||
echo ""
|
||||
echo "=== Restarting labd pod ==="
|
||||
kubectl rollout restart deployment/labd -n lab-system
|
||||
kubectl rollout status deployment/labd -n lab-system --timeout=180s
|
||||
echo "✓ Labd deployed"
|
||||
}
|
||||
|
||||
deploy_labctl() {
|
||||
echo "=== Building labctl RPM ==="
|
||||
bash scripts/build-rpm.sh
|
||||
echo ""
|
||||
echo "=== Installing labctl ==="
|
||||
RPM_FILE=$(ls dist/labctl-*.x86_64.rpm 2>/dev/null | head -1)
|
||||
if [ -n "$RPM_FILE" ]; then
|
||||
sudo rpm -U --force "$RPM_FILE"
|
||||
echo "✓ labctl installed: $(labctl --version 2>/dev/null || echo 'installed')"
|
||||
else
|
||||
echo "WARNING: No RPM found, falling back to direct install"
|
||||
pnpm build
|
||||
sudo install -m 755 <(echo '#!/bin/bash'; echo "exec node $PROJECT_DIR/src/cli/dist/index.js \"\$@\"") /usr/local/bin/labctl
|
||||
echo "✓ labctl installed (dev mode)"
|
||||
fi
|
||||
}
|
||||
|
||||
case "${1:-all}" in
|
||||
bastion) deploy_bastion ;;
|
||||
labd) deploy_labd ;;
|
||||
labctl) deploy_labctl ;;
|
||||
all)
|
||||
deploy_bastion
|
||||
echo ""
|
||||
deploy_labd
|
||||
echo ""
|
||||
deploy_labctl
|
||||
;;
|
||||
*)
|
||||
echo "Usage: $0 [bastion|labd|labctl|all]"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
echo ""
|
||||
echo "=== Deploy complete ==="
|
||||
@@ -257,7 +257,7 @@ export async function startBastion(overrides: Partial<BastionConfig> = {}): Prom
|
||||
state.update((s) => {
|
||||
s.install_queue[msg.mac] = {
|
||||
hostname: msg.hostname,
|
||||
disk: msg.disk ?? "/dev/sda",
|
||||
disk: msg.disk ?? "",
|
||||
role: msg.role as import("@lab/shared").Role,
|
||||
os: msg.os as import("@lab/shared").OsId,
|
||||
queued_at: new Date().toISOString(),
|
||||
@@ -266,6 +266,22 @@ export async function startBastion(overrides: Partial<BastionConfig> = {}): Prom
|
||||
return { status: "ok", data: { mac: msg.mac, hostname: msg.hostname } };
|
||||
});
|
||||
|
||||
labdConn.onCommand("command-debug", async (msg) => {
|
||||
if (msg.type !== "command-debug") throw new Error("unexpected");
|
||||
const mac = msg.mac.toLowerCase();
|
||||
const pxeBoot = msg.pxeBoot ?? false;
|
||||
const currentState = state.load();
|
||||
const hostname =
|
||||
currentState.installed[mac]?.hostname ??
|
||||
currentState.install_queue[mac]?.hostname ??
|
||||
currentState.discovered[mac]?.product ??
|
||||
mac;
|
||||
state.update((s) => {
|
||||
s.debug[mac] = { hostname, queued_at: new Date().toISOString(), pxeBoot };
|
||||
});
|
||||
return { status: "ok", data: { mac, hostname } };
|
||||
});
|
||||
|
||||
labdConn.onCommand("command-forget", async (msg) => {
|
||||
if (msg.type !== "command-forget") throw new Error("unexpected");
|
||||
const mac = msg.mac.toLowerCase();
|
||||
@@ -273,6 +289,7 @@ export async function startBastion(overrides: Partial<BastionConfig> = {}): Prom
|
||||
delete s.discovered[mac];
|
||||
delete s.install_queue[mac];
|
||||
delete s.installed[mac];
|
||||
delete s.debug[mac];
|
||||
});
|
||||
return { status: "ok", data: { mac } };
|
||||
});
|
||||
|
||||
@@ -13,11 +13,13 @@ import { triggerPostProvisionK3s } from "../services/post-provision.js";
|
||||
import { progressBus } from "../services/progress-events.js";
|
||||
import type { ProgressEvent } from "../services/progress-events.js";
|
||||
import type { InstallLogBuffer } from "../services/install-log.js";
|
||||
import type { SyslogListener } from "../services/syslog-listener.js";
|
||||
|
||||
export function registerApiRoutes(
|
||||
app: FastifyInstance,
|
||||
state: StateManager,
|
||||
installLog: InstallLogBuffer,
|
||||
syslog: SyslogListener,
|
||||
): void {
|
||||
// List all machines
|
||||
app.get("/api/machines", async (_request, reply) => {
|
||||
@@ -84,6 +86,11 @@ export function registerApiRoutes(
|
||||
const { mac: rawMac, stage, detail } = request.body ?? {};
|
||||
const mac = (rawMac ?? "unknown").toLowerCase();
|
||||
const stageName = stage ?? "unknown";
|
||||
|
||||
// Register IP → MAC for syslog routing
|
||||
if (mac !== "unknown") {
|
||||
syslog.registerIp(request.ip, mac);
|
||||
}
|
||||
const detailStr = detail ?? "";
|
||||
|
||||
const GREEN = "\x1b[0;32m";
|
||||
@@ -189,6 +196,32 @@ export function registerApiRoutes(
|
||||
return reply.send({ status: "ok", lines: allLines.length });
|
||||
});
|
||||
|
||||
// Queue debug/rescue mode for a machine
|
||||
app.post<{
|
||||
Body: { mac?: string; pxeBoot?: boolean };
|
||||
}>("/api/debug", async (request, reply) => {
|
||||
const mac = (request.body?.mac ?? "").toLowerCase().replace(/-/g, ":");
|
||||
const pxeBoot = request.body?.pxeBoot ?? false;
|
||||
if (mac === "") {
|
||||
return reply.status(400).send({ error: "mac is required" });
|
||||
}
|
||||
|
||||
// Look up hostname from installed or discovered state
|
||||
const currentState = state.load();
|
||||
const hostname =
|
||||
currentState.installed[mac]?.hostname ??
|
||||
currentState.install_queue[mac]?.hostname ??
|
||||
currentState.discovered[mac]?.product ??
|
||||
mac;
|
||||
|
||||
state.update((s) => {
|
||||
s.debug[mac] = { hostname, queued_at: new Date().toISOString(), pxeBoot };
|
||||
});
|
||||
|
||||
logger.info(`DEBUG QUEUED: ${mac} -> ${hostname}`);
|
||||
return reply.send({ status: "ok", mac, hostname });
|
||||
});
|
||||
|
||||
// Delete a machine from all state
|
||||
app.delete<{
|
||||
Params: { mac: string };
|
||||
@@ -213,6 +246,10 @@ export function registerApiRoutes(
|
||||
delete s.installed[mac];
|
||||
found = true;
|
||||
}
|
||||
if (s.debug[mac] !== undefined) {
|
||||
delete s.debug[mac];
|
||||
found = true;
|
||||
}
|
||||
});
|
||||
|
||||
if (!found) {
|
||||
|
||||
@@ -10,9 +10,12 @@ import type { StateManager } from "../services/state.js";
|
||||
import {
|
||||
renderDiscoverIpxe,
|
||||
renderInstallIpxe,
|
||||
renderDebugIpxe,
|
||||
renderPxeBootDebugIpxe,
|
||||
renderLocalBootIpxe,
|
||||
} from "../templates/boot.ipxe.js";
|
||||
import { renderUbuntuInstallIpxe } from "../templates/ubuntu-boot.ipxe.js";
|
||||
import { renderDebugKickstart } from "../templates/debug.ks.js";
|
||||
import { logger } from "../services/logger.js";
|
||||
|
||||
export function registerDispatchRoutes(
|
||||
@@ -20,10 +23,76 @@ export function registerDispatchRoutes(
|
||||
config: BastionConfig,
|
||||
state: StateManager,
|
||||
): void {
|
||||
// Serve debug/rescue kickstart (minimal: SSH keys + network for inst.sshd)
|
||||
app.get<{ Querystring: { mac?: string } }>("/debug.ks", async (_request, reply) => {
|
||||
const ks = renderDebugKickstart({
|
||||
sshKeys: config.sshKeys ?? [],
|
||||
serverIp: config.serverIp,
|
||||
httpPort: config.httpPort,
|
||||
});
|
||||
return reply.type("text/plain").send(ks);
|
||||
});
|
||||
|
||||
// Shell script for manual debug setup (nc listener + IP reporting)
|
||||
// Usage from rescue shell: curl http://bastion:port/debug-setup.sh | bash
|
||||
app.get("/debug-setup.sh", async (_request, reply) => {
|
||||
const script = `#!/bin/bash
|
||||
# Lab Bastion debug setup — run from rescue shell
|
||||
set -x
|
||||
|
||||
IP_ADDR=$(ip -4 addr show | awk '/inet / && !/127.0.0/ {split($2,a,"/"); print a[1]; exit}')
|
||||
MAC_ADDR=$(ip link show | awk '/ether/ && !/00:00:00:00/ {print $2; exit}')
|
||||
|
||||
# Start persistent nc listener for remote shell
|
||||
(while true; do nc -l -p 2323 -e /bin/bash 2>/dev/null; done) &
|
||||
echo "nc shell listener on port 2323"
|
||||
|
||||
# Report IP to bastion
|
||||
curl -sf -X POST "http://${config.serverIp}:${config.httpPort}/api/progress" \\
|
||||
-H "Content-Type: application/json" \\
|
||||
-d "{\\"mac\\":\\"$MAC_ADDR\\",\\"stage\\":\\"debug-ready\\",\\"detail\\":\\"nc $IP_ADDR 2323\\"}" 2>/dev/null || true
|
||||
|
||||
echo ""
|
||||
echo "=== Debug environment ready ==="
|
||||
echo " nc $IP_ADDR 2323 (remote shell)"
|
||||
echo " ssh root@$IP_ADDR (password: debug)"
|
||||
echo "==============================="
|
||||
`;
|
||||
return reply.type("text/plain").send(script);
|
||||
});
|
||||
|
||||
app.get<{ Querystring: { mac?: string } }>("/dispatch", async (request, reply) => {
|
||||
const mac = (request.query.mac ?? "").toLowerCase().replace(/-/g, ":");
|
||||
const currentState = state.load();
|
||||
|
||||
// Debug mode takes highest priority — auto-clear after serving once
|
||||
const debugEntry = currentState.debug[mac];
|
||||
if (debugEntry) {
|
||||
const hostname = debugEntry.hostname ?? "debug";
|
||||
state.update((s) => { delete s.debug[mac]; });
|
||||
|
||||
let script: string;
|
||||
if (debugEntry.pxeBoot) {
|
||||
logger.info(`PXE BOOT DEBUG: ${mac} -> ${hostname} (kernel+initrd from PXE, root from NVMe)`);
|
||||
script = renderPxeBootDebugIpxe({
|
||||
mac,
|
||||
hostname,
|
||||
serverIp: config.serverIp,
|
||||
httpPort: config.httpPort,
|
||||
});
|
||||
} else {
|
||||
logger.info(`DEBUG BOOT: ${mac} -> ${hostname} (rescue mode)`);
|
||||
script = renderDebugIpxe({
|
||||
mac,
|
||||
hostname,
|
||||
serverIp: config.serverIp,
|
||||
httpPort: config.httpPort,
|
||||
fedoraMirror: config.fedoraMirror,
|
||||
});
|
||||
}
|
||||
return reply.type("text/plain").send(script);
|
||||
}
|
||||
|
||||
const queueEntry = currentState.install_queue[mac];
|
||||
if (queueEntry) {
|
||||
const hostname = queueEntry.hostname ?? "lab-node";
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
import type { FastifyInstance } from "fastify";
|
||||
import type { BastionConfig } from "@lab/shared";
|
||||
import type { StateManager } from "../services/state.js";
|
||||
import type { SyslogListener } from "../services/syslog-listener.js";
|
||||
import { generateInstallKickstart, generateDiscoverKickstart } from "../services/kickstart-generator.js";
|
||||
import { renderUbuntuAutoinstall, renderUbuntuMetaData, type UbuntuAutoinstallParams } from "../templates/ubuntu-autoinstall.js";
|
||||
|
||||
@@ -12,6 +13,7 @@ export function registerKickstartRoutes(
|
||||
app: FastifyInstance,
|
||||
config: BastionConfig,
|
||||
state: StateManager,
|
||||
syslog: SyslogListener,
|
||||
): void {
|
||||
// Per-MAC install kickstart
|
||||
app.get<{ Querystring: { mac?: string } }>("/ks", async (request, reply) => {
|
||||
@@ -19,6 +21,11 @@ export function registerKickstartRoutes(
|
||||
const currentState = state.load();
|
||||
const queueEntry = currentState.install_queue[mac];
|
||||
|
||||
// Register IP → MAC so syslog listener can route Anaconda logs
|
||||
if (mac) {
|
||||
syslog.registerIp(request.ip, mac);
|
||||
}
|
||||
|
||||
const ks = generateInstallKickstart(config, {
|
||||
hostname: queueEntry?.hostname ?? "lab-node",
|
||||
disk: queueEntry?.disk ?? "",
|
||||
|
||||
@@ -43,8 +43,8 @@ export function createApp(config: BastionConfig): { app: ReturnType<typeof Fasti
|
||||
|
||||
// Register route handlers
|
||||
registerDispatchRoutes(app, config, state);
|
||||
registerKickstartRoutes(app, config, state);
|
||||
registerApiRoutes(app, state, installLog);
|
||||
registerKickstartRoutes(app, config, state, syslog);
|
||||
registerApiRoutes(app, state, installLog, syslog);
|
||||
// boot.iso is generated at startup and served as a static file from httpDir
|
||||
// (static serving supports HTTP Range requests, required by JetKVM streaming)
|
||||
|
||||
|
||||
@@ -164,6 +164,7 @@ export class BastionConnection {
|
||||
case "command-install":
|
||||
case "command-forget":
|
||||
case "command-role-update":
|
||||
case "command-debug":
|
||||
void this.handleCommand(msg);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -11,6 +11,7 @@ const EMPTY_STATE: BastionState = {
|
||||
discovered: {},
|
||||
install_queue: {},
|
||||
installed: {},
|
||||
debug: {},
|
||||
};
|
||||
|
||||
export type StateChangeListener = (state: BastionState) => void;
|
||||
@@ -33,6 +34,7 @@ export class StateManager {
|
||||
discovered: parsed.discovered ?? {},
|
||||
install_queue: parsed.install_queue ?? {},
|
||||
installed: parsed.installed ?? {},
|
||||
debug: parsed.debug ?? {},
|
||||
};
|
||||
} catch {
|
||||
return { ...EMPTY_STATE };
|
||||
|
||||
@@ -18,7 +18,7 @@ function parseSyslogLine(raw: string): { program: string; message: string } {
|
||||
// Try to extract program and message after the timestamp + hostname
|
||||
// RFC 3164: "Mon DD HH:MM:SS HOSTNAME PROGRAM[PID]: MESSAGE"
|
||||
const match = noPri.match(/^\w+\s+\d+\s+[\d:]+\s+\S+\s+(\S+?)(?:\[\d+\])?:\s*(.*)/);
|
||||
if (match) {
|
||||
if (match?.[1] && match[2] !== undefined) {
|
||||
return { program: match[1], message: match[2] };
|
||||
}
|
||||
// Fallback: just return the whole line
|
||||
@@ -30,6 +30,8 @@ export class SyslogListener {
|
||||
private port: number;
|
||||
private installLog: InstallLogBuffer;
|
||||
private state: StateManager;
|
||||
/** Explicit IP → MAC mapping registered from kickstart/progress requests. */
|
||||
private ipToMac = new Map<string, string>();
|
||||
|
||||
constructor(port: number, installLog: InstallLogBuffer, state: StateManager) {
|
||||
this.port = port;
|
||||
@@ -37,14 +39,21 @@ export class SyslogListener {
|
||||
this.state = state;
|
||||
}
|
||||
|
||||
/** Resolve a source IP to a MAC address using the install queue. */
|
||||
/** Register an IP → MAC mapping (called when we learn a machine's IP). */
|
||||
registerIp(ip: string, mac: string): void {
|
||||
this.ipToMac.set(ip, mac.toLowerCase());
|
||||
}
|
||||
|
||||
/** Resolve a source IP to a MAC address. */
|
||||
private resolveIpToMac(ip: string): string | null {
|
||||
// Check explicit mapping first (most reliable)
|
||||
const explicit = this.ipToMac.get(ip);
|
||||
if (explicit) return explicit;
|
||||
|
||||
const currentState = this.state.load();
|
||||
|
||||
// Check install queue — machines being installed have an IP from DHCP
|
||||
for (const [mac, entry] of Object.entries(currentState.install_queue)) {
|
||||
// The progress callback sends IP in "complete" detail, but during install
|
||||
// we need to match by what we know. Check if any progress mentions this IP.
|
||||
if (entry.progress_detail?.includes(ip)) return mac;
|
||||
}
|
||||
|
||||
|
||||
@@ -42,7 +42,7 @@ echo Collecting hardware info...
|
||||
echo =============================================
|
||||
echo
|
||||
|
||||
kernel http://${params.serverIp}:${params.httpPort}/vmlinuz inst.ks=http://${params.serverIp}:${params.httpPort}/discover.ks inst.stage2=${params.fedoraMirror} inst.text console=ttyS0,115200n8 console=tty0
|
||||
kernel http://${params.serverIp}:${params.httpPort}/vmlinuz inst.ks=http://${params.serverIp}:${params.httpPort}/discover.ks inst.stage2=${params.fedoraMirror} inst.text nomodeset
|
||||
initrd http://${params.serverIp}:${params.httpPort}/initrd.img
|
||||
boot
|
||||
`;
|
||||
@@ -69,7 +69,62 @@ echo MAC: ${params.mac}
|
||||
echo =============================================
|
||||
echo
|
||||
|
||||
kernel http://${params.serverIp}:${params.httpPort}/vmlinuz inst.ks=http://${params.serverIp}:${params.httpPort}/ks?mac=${params.mac} inst.repo=${params.fedoraMirror} inst.text console=ttyS0,115200n8 console=tty0
|
||||
kernel http://${params.serverIp}:${params.httpPort}/vmlinuz inst.ks=http://${params.serverIp}:${params.httpPort}/ks?mac=${params.mac} inst.repo=${params.fedoraMirror} inst.text nomodeset
|
||||
initrd http://${params.serverIp}:${params.httpPort}/initrd.img
|
||||
boot
|
||||
`;
|
||||
}
|
||||
|
||||
/**
|
||||
* iPXE script for debug/rescue mode -- boots Fedora installer in rescue mode.
|
||||
* Provides a shell with LVM tools, network, and SSH for inspecting installed systems.
|
||||
*/
|
||||
export function renderDebugIpxe(params: {
|
||||
mac: string;
|
||||
hostname: string;
|
||||
serverIp: string;
|
||||
httpPort: number;
|
||||
fedoraMirror: string;
|
||||
}): string {
|
||||
return `#!ipxe
|
||||
|
||||
echo
|
||||
echo =============================================
|
||||
echo Lab PXE Bastion - DEBUG/RESCUE MODE
|
||||
echo Target: ${params.hostname}
|
||||
echo MAC: ${params.mac}
|
||||
echo =============================================
|
||||
echo
|
||||
|
||||
kernel http://${params.serverIp}:${params.httpPort}/vmlinuz inst.rescue inst.text inst.sshd inst.ks=http://${params.serverIp}:${params.httpPort}/debug.ks?mac=${params.mac} inst.stage2=${params.fedoraMirror}
|
||||
initrd http://${params.serverIp}:${params.httpPort}/initrd.img
|
||||
boot
|
||||
`;
|
||||
}
|
||||
|
||||
/**
|
||||
* iPXE script for PXE-boot debug mode -- boots the installed system's root
|
||||
* filesystem using the bastion's PXE kernel+initrd instead of local GRUB.
|
||||
* Workaround for UEFI firmware bugs that make local disk boot slow.
|
||||
*/
|
||||
export function renderPxeBootDebugIpxe(params: {
|
||||
mac: string;
|
||||
hostname: string;
|
||||
serverIp: string;
|
||||
httpPort: number;
|
||||
}): string {
|
||||
return `#!ipxe
|
||||
|
||||
echo
|
||||
echo =============================================
|
||||
echo Lab PXE Bastion - PXE BOOT (debug)
|
||||
echo Target: ${params.hostname}
|
||||
echo MAC: ${params.mac}
|
||||
echo Kernel+initrd from PXE, root from NVMe
|
||||
echo =============================================
|
||||
echo
|
||||
|
||||
kernel http://${params.serverIp}:${params.httpPort}/vmlinuz root=/dev/mapper/labvg-root ro rd.lvm.lv=labvg/root rd.lvm.lv=labvg/swap console=tty0
|
||||
initrd http://${params.serverIp}:${params.httpPort}/initrd.img
|
||||
boot
|
||||
`;
|
||||
|
||||
33
bastion/src/bastion/src/templates/debug.ks.ts
Normal file
33
bastion/src/bastion/src/templates/debug.ks.ts
Normal file
@@ -0,0 +1,33 @@
|
||||
// Debug/rescue kickstart template.
|
||||
// Minimal kickstart for Anaconda rescue mode.
|
||||
//
|
||||
// SSH access: Anaconda's inst.sshd starts sshd automatically.
|
||||
// The sshpw directive sets the password, sshkey adds authorized keys.
|
||||
// %pre/%post do NOT run in rescue mode — don't put setup code there.
|
||||
|
||||
export interface DebugKickstartParams {
|
||||
sshKeys: string[];
|
||||
serverIp?: string;
|
||||
httpPort?: number;
|
||||
}
|
||||
|
||||
export function renderDebugKickstart(params: DebugKickstartParams): string {
|
||||
const sshkeyLine = params.sshKeys.length > 0
|
||||
? `sshkey --username=root "${params.sshKeys[0]}"`
|
||||
: "";
|
||||
|
||||
return `# Lab Bastion -- Debug/Rescue Kickstart
|
||||
# Minimal: SSH + network for Anaconda rescue mode
|
||||
#
|
||||
# SSH is started by Anaconda (inst.sshd kernel param).
|
||||
# Password: debug | SSH keys from bastion config.
|
||||
# %pre/%post do NOT run in rescue mode.
|
||||
|
||||
lang en_US.UTF-8
|
||||
keyboard uk
|
||||
network --bootproto=dhcp --activate
|
||||
|
||||
sshpw --username=root --plaintext debug
|
||||
${sshkeyLine}
|
||||
`;
|
||||
}
|
||||
@@ -88,8 +88,23 @@ chmod 440 /etc/sudoers.d/${adminUser}`;
|
||||
const diskLine = disk
|
||||
? `DISK="${disk}"`
|
||||
: `DISK=""
|
||||
for d in /dev/nvme0n1 /dev/sda /dev/vda; do
|
||||
[ -b "$d" ] && { DISK="$(basename $d)"; break; }
|
||||
# Wait up to 10s for NVMe/SCSI disks to appear (they init async in initrd)
|
||||
for _wait in $(seq 1 10); do
|
||||
for d in /dev/nvme0n1 /dev/nvme1n1 /dev/sda /dev/sdb /dev/vda; do
|
||||
[ -b "$d" ] || continue
|
||||
_bname=$(basename "$d")
|
||||
# Skip removable disks (USB, CD-ROM, JetKVM virtual media)
|
||||
[ -f "/sys/block/$_bname/removable" ] && [ "$(cat /sys/block/$_bname/removable)" = "1" ] && continue
|
||||
# Skip USB-attached disks (JetKVM virtual media shows as SCSI over USB)
|
||||
_transport=$(readlink -f /sys/block/$_bname/device 2>/dev/null || echo "")
|
||||
echo "$_transport" | grep -q "usb" && continue
|
||||
# Skip disks smaller than 20GB (likely USB sticks)
|
||||
_size=$(cat /sys/block/$_bname/size 2>/dev/null || echo 0)
|
||||
[ "$_size" -lt 41943040 ] && continue
|
||||
DISK="$_bname"
|
||||
break 2
|
||||
done
|
||||
sleep 1
|
||||
done
|
||||
[ -z "$DISK" ] && { echo "ERROR: no disk found"; exit 1; }`;
|
||||
|
||||
@@ -119,7 +134,7 @@ network --bootproto=dhcp --activate --hostname=${fqdn}
|
||||
${auth}
|
||||
${userDirective}
|
||||
|
||||
bootloader --append="console=tty0 console=ttyS0,115200n8"
|
||||
bootloader --append="console=tty0"
|
||||
|
||||
logging --host=${serverIp} --port=${syslogPort}
|
||||
|
||||
@@ -306,56 +321,27 @@ bastion_progress() {
|
||||
-d "{\\"mac\\":\\"$mac\\",\\"stage\\":\\"$stage\\",\\"detail\\":\\"$detail\\"}" 2>/dev/null || true
|
||||
}
|
||||
|
||||
# Send log lines to bastion
|
||||
bastion_log() {
|
||||
local line="$1"
|
||||
local mac=$(ip link show | awk '/ether/ && !/00:00:00:00/ {print $2; exit}')
|
||||
curl -sf -X POST "http://${serverIp}:${httpPort}/api/log" \\
|
||||
-H "Content-Type: application/json" \\
|
||||
-d "{\\"mac\\":\\"$mac\\",\\"line\\":\\"$(echo "$line" | sed 's/\\\\/\\\\\\\\/g; s/"/\\\\"/g')\\"}\" \\
|
||||
--connect-timeout 5 --max-time 10 2>/dev/null || true
|
||||
}
|
||||
|
||||
# Send an error stage to bastion
|
||||
bastion_error() {
|
||||
local detail="$1"
|
||||
bastion_progress "error" "$detail"
|
||||
}
|
||||
|
||||
# --- Error trap: catch any failure and report to bastion ---
|
||||
_post_error_handler() {
|
||||
local exit_code=$? lineno=$1
|
||||
bastion_error "%post failed at line $lineno (exit $exit_code)"
|
||||
}
|
||||
trap '_post_error_handler $LINENO' ERR
|
||||
|
||||
bastion_progress "post-install" "configuring system"
|
||||
|
||||
# -- SSH --
|
||||
systemctl enable --now sshd
|
||||
# Note: only 'enable', not '--now' — systemd is not running in the Anaconda chroot
|
||||
systemctl enable sshd || true
|
||||
sed -i 's/^#\\?PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config
|
||||
sed -i 's/^#\\?PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
|
||||
${sshPostBlock}
|
||||
|
||||
# -- Hostname and domain --
|
||||
hostnamectl set-hostname ${fqdn}
|
||||
bastion_progress "post-install" "1-ssh done"
|
||||
|
||||
# -- Hostname and domain (write directly, hostnamectl needs D-Bus) --
|
||||
echo "${fqdn}" > /etc/hostname
|
||||
|
||||
# -- tmpfs for /tmp --
|
||||
echo "tmpfs /tmp tmpfs defaults,noatime,nosuid,nodev,size=4G 0 0" >> /etc/fstab
|
||||
|
||||
${isVanilla ? `# -- vanilla role: skip k3s kernel/sysctl/firewall setup --
|
||||
# -- Enable chronyd for time sync --
|
||||
systemctl enable chronyd || true
|
||||
|
||||
# -- Serial console (for debugging — auto-login as root on ttyS0) --
|
||||
# AWS EC2 compatible: ttyS0 @ 115200n8
|
||||
systemctl enable serial-getty@ttyS0.service || true
|
||||
|
||||
# -- Forward all system logs to serial console --
|
||||
cat > /etc/rsyslog.d/serial-console.conf << 'RSYSLOG'
|
||||
*.* /dev/ttyS0
|
||||
RSYSLOG
|
||||
systemctl enable rsyslog || true` : `# -- Kernel modules for k3s --
|
||||
systemctl enable chronyd || true` : `# -- Kernel modules for k3s --
|
||||
cat > /etc/modules-load.d/k3s.conf << 'MODULES'
|
||||
br_netfilter
|
||||
overlay
|
||||
@@ -376,12 +362,15 @@ SYSCTL
|
||||
sysctl --system || true
|
||||
|
||||
# -- Disable firewalld permanently (k3s/Cilium manage iptables directly) --
|
||||
systemctl disable --now firewalld || true
|
||||
# Note: no '--now' — systemd is not running in the Anaconda chroot
|
||||
systemctl disable firewalld || true
|
||||
systemctl mask firewalld || true
|
||||
|
||||
# -- Enable chronyd for time sync --
|
||||
systemctl enable chronyd || true`}
|
||||
|
||||
bastion_progress "post-install" "2-system done"
|
||||
|
||||
# -- Boot order: restore network first (Anaconda sets disk first, we undo it) --
|
||||
# Network boot must stay first so the bastion intercepts every reboot.
|
||||
if command -v efibootmgr >/dev/null 2>&1; then
|
||||
@@ -394,6 +383,11 @@ if command -v efibootmgr >/dev/null 2>&1; then
|
||||
fi
|
||||
fi
|
||||
|
||||
bastion_progress "post-install" "3-bootorder done"
|
||||
|
||||
# -- Enable SysRq magic keys (for emergency reboot via Alt+SysRq+REISUB) --
|
||||
echo "kernel.sysrq=1" > /etc/sysctl.d/90-sysrq.conf
|
||||
|
||||
# -- Provisioning metadata --
|
||||
cat > /etc/lab-provisioned << PROVEOF
|
||||
hostname: ${fqdn}
|
||||
@@ -419,6 +413,8 @@ README
|
||||
${hasRancher ? `# Install k3s server (skip start - will be configured manually)
|
||||
curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true sh -
|
||||
` : ""}
|
||||
bastion_progress "post-install" "4-metadata done"
|
||||
|
||||
IP_ADDR=$(ip -4 addr show | awk '/inet / && !/127.0.0/ {split($2,a,"/"); print a[1]; exit}')
|
||||
bastion_progress "complete" "ready at $IP_ADDR"
|
||||
|
||||
|
||||
@@ -28,6 +28,7 @@ function createTestConfig(testDir: string): BastionConfig {
|
||||
gateway: "10.0.0.1",
|
||||
sshKeys: ["ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAITEST test@test"],
|
||||
adminUser: "testadmin",
|
||||
syslogPort: 15514,
|
||||
skipDnsmasq: true,
|
||||
skipArtifacts: true,
|
||||
fedoraMirror: "https://download.fedoraproject.org/pub/fedora/linux/releases/43/Everything/x86_64/os",
|
||||
|
||||
@@ -206,10 +206,8 @@ describe("renderInstallKickstart", () => {
|
||||
}
|
||||
});
|
||||
|
||||
it("forwards system logs to serial console", () => {
|
||||
it("does not include serial console (causes 30s boot timeout on hardware without UART)", () => {
|
||||
const ks = renderInstallKickstart(baseParams({ role: "vanilla" }));
|
||||
expect(ks).toContain("serial-console.conf");
|
||||
expect(ks).toContain("/dev/ttyS0");
|
||||
expect(ks).toContain("rsyslog");
|
||||
expect(ks).not.toContain("ttyS0");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -26,6 +26,7 @@ describe("StateManager", () => {
|
||||
discovered: {},
|
||||
install_queue: {},
|
||||
installed: {},
|
||||
debug: {},
|
||||
});
|
||||
});
|
||||
|
||||
@@ -39,6 +40,7 @@ describe("StateManager", () => {
|
||||
discovered: {},
|
||||
install_queue: {},
|
||||
installed: {},
|
||||
debug: {},
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
121
bastion/src/bastion/tests/syslog-listener.test.ts
Normal file
121
bastion/src/bastion/tests/syslog-listener.test.ts
Normal file
@@ -0,0 +1,121 @@
|
||||
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
||||
import { createSocket } from "node:dgram";
|
||||
import { mkdtempSync, rmSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { tmpdir } from "node:os";
|
||||
import { SyslogListener } from "../src/services/syslog-listener.js";
|
||||
import { InstallLogBuffer } from "../src/services/install-log.js";
|
||||
import { StateManager } from "../src/services/state.js";
|
||||
|
||||
function sendUdpSyslog(port: number, message: string): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const client = createSocket("udp4");
|
||||
const buf = Buffer.from(message);
|
||||
client.send(buf, 0, buf.length, port, "127.0.0.1", (err) => {
|
||||
client.close();
|
||||
if (err) reject(err);
|
||||
else resolve();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
describe("SyslogListener", () => {
|
||||
let tmpDir: string;
|
||||
let state: StateManager;
|
||||
let installLog: InstallLogBuffer;
|
||||
let syslog: SyslogListener;
|
||||
const PORT = 15514; // use non-privileged port for testing
|
||||
|
||||
beforeEach(() => {
|
||||
tmpDir = mkdtempSync(join(tmpdir(), "syslog-test-"));
|
||||
state = new StateManager(join(tmpDir, "state.json"));
|
||||
state.init();
|
||||
installLog = new InstallLogBuffer(tmpDir);
|
||||
syslog = new SyslogListener(PORT, installLog, state);
|
||||
syslog.start();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
syslog.stop();
|
||||
rmSync(tmpDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it("receives and stores syslog messages for registered IP", async () => {
|
||||
const mac = "aa:bb:cc:dd:ee:ff";
|
||||
// Queue a machine so hostname can be resolved
|
||||
state.update((s) => {
|
||||
s.install_queue[mac] = {
|
||||
hostname: "testnode",
|
||||
disk: "/dev/sda",
|
||||
role: "worker",
|
||||
os: "fedora-43",
|
||||
queued_at: new Date().toISOString(),
|
||||
};
|
||||
});
|
||||
|
||||
// Register IP → MAC mapping
|
||||
syslog.registerIp("127.0.0.1", mac);
|
||||
|
||||
// Send a syslog message (RFC 3164 format)
|
||||
await sendUdpSyslog(PORT, "<13>Mar 30 01:30:00 localhost anaconda[1234]: Installing package vim-enhanced");
|
||||
|
||||
// Wait for UDP delivery
|
||||
await new Promise((r) => setTimeout(r, 200));
|
||||
|
||||
const lines = installLog.getLines(mac);
|
||||
expect(lines.length).toBeGreaterThan(0);
|
||||
expect(lines[0]!.line).toContain("anaconda");
|
||||
expect(lines[0]!.line).toContain("Installing package vim-enhanced");
|
||||
});
|
||||
|
||||
it("ignores messages from unknown IPs", async () => {
|
||||
// Don't register any IP mapping
|
||||
await sendUdpSyslog(PORT, "<13>Mar 30 01:30:00 localhost anaconda[1234]: test message");
|
||||
await new Promise((r) => setTimeout(r, 200));
|
||||
|
||||
// No MAC to check, but the listener should not crash
|
||||
// and no logs should be stored for any MAC
|
||||
expect(installLog.lineCount("unknown")).toBe(0);
|
||||
});
|
||||
|
||||
it("resolves IP from installed machines state", async () => {
|
||||
const mac = "11:22:33:44:55:66";
|
||||
state.update((s) => {
|
||||
s.installed[mac] = {
|
||||
hostname: "installed-node",
|
||||
role: "worker",
|
||||
ip: "127.0.0.1",
|
||||
installed_at: new Date().toISOString(),
|
||||
};
|
||||
});
|
||||
|
||||
await sendUdpSyslog(PORT, "<14>Mar 30 02:00:00 installed-node sshd[5678]: Accepted publickey for root");
|
||||
await new Promise((r) => setTimeout(r, 200));
|
||||
|
||||
const lines = installLog.getLines(mac);
|
||||
expect(lines.length).toBeGreaterThan(0);
|
||||
expect(lines[0]!.line).toContain("sshd");
|
||||
});
|
||||
|
||||
it("parses various syslog formats", async () => {
|
||||
const mac = "aa:bb:cc:dd:ee:ff";
|
||||
syslog.registerIp("127.0.0.1", mac);
|
||||
state.update((s) => {
|
||||
s.install_queue[mac] = {
|
||||
hostname: "testnode",
|
||||
disk: "/dev/sda",
|
||||
role: "worker",
|
||||
os: "fedora-43",
|
||||
queued_at: new Date().toISOString(),
|
||||
};
|
||||
});
|
||||
|
||||
// Message without PID
|
||||
await sendUdpSyslog(PORT, "<13>Mar 30 01:30:00 localhost kernel: NVMe device ready");
|
||||
await new Promise((r) => setTimeout(r, 200));
|
||||
|
||||
const lines = installLog.getLines(mac);
|
||||
expect(lines.length).toBeGreaterThan(0);
|
||||
expect(lines[0]!.line).toContain("kernel");
|
||||
});
|
||||
});
|
||||
@@ -94,6 +94,10 @@ export class LabdClient {
|
||||
return this.request("POST", "/api/machines/install", { body: opts });
|
||||
}
|
||||
|
||||
async debugMachine(mac: string, opts?: { pxeBoot?: boolean }): Promise<{ status: string; data?: { mac: string; hostname: string }; error?: string }> {
|
||||
return this.request("POST", "/api/machines/debug", { body: { mac, pxeBoot: opts?.pxeBoot } });
|
||||
}
|
||||
|
||||
async forgetMachine(mac: string): Promise<{ status: string }> {
|
||||
return this.request("DELETE", `/api/machines/${encodeURIComponent(mac)}`);
|
||||
}
|
||||
|
||||
155
bastion/src/cli/src/commands/debug.ts
Normal file
155
bastion/src/cli/src/commands/debug.ts
Normal file
@@ -0,0 +1,155 @@
|
||||
// CLI command: provision debug
|
||||
// Queue a machine for debug/rescue PXE boot and optionally SSH reboot into PXE.
|
||||
|
||||
import { execFileSync } from "node:child_process";
|
||||
import { existsSync } from "node:fs";
|
||||
import { homedir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { Command } from "commander";
|
||||
import type { BastionState } from "@lab/shared";
|
||||
import { getLabdClient } from "../api/config.js";
|
||||
|
||||
/** Resolve a target (hostname, MAC, or IP) to {mac, hostname, ip} from state. */
|
||||
function resolveTarget(
|
||||
target: string,
|
||||
state: BastionState,
|
||||
): { mac: string; hostname: string; ip: string } | null {
|
||||
const normalized = target.toLowerCase().replace(/-/g, ":");
|
||||
|
||||
if (state.installed[normalized]) {
|
||||
const info = state.installed[normalized];
|
||||
return { mac: normalized, hostname: info.hostname, ip: info.ip };
|
||||
}
|
||||
|
||||
if (state.discovered[normalized]) {
|
||||
return { mac: normalized, hostname: normalized, ip: "" };
|
||||
}
|
||||
|
||||
if (state.install_queue[normalized]) {
|
||||
return { mac: normalized, hostname: state.install_queue[normalized].hostname, ip: "" };
|
||||
}
|
||||
|
||||
for (const [mac, info] of Object.entries(state.installed)) {
|
||||
if (info.hostname === target || info.hostname.startsWith(target + ".")) {
|
||||
return { mac, hostname: info.hostname, ip: info.ip };
|
||||
}
|
||||
}
|
||||
|
||||
for (const [mac, info] of Object.entries(state.installed)) {
|
||||
if (info.ip === target) {
|
||||
return { mac, hostname: info.hostname, ip: info.ip };
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export function registerDebugCommand(parent: Command): void {
|
||||
parent
|
||||
.command("debug <target>")
|
||||
.description("PXE boot into Fedora rescue mode for debugging (target: hostname, MAC, or IP)")
|
||||
.option("--pxe-boot", "Boot installed system via PXE (kernel+initrd from network, root from NVMe)")
|
||||
.showHelpAfterError(true)
|
||||
.action(async (target: string, opts: { pxeBoot?: boolean }) => {
|
||||
const client = getLabdClient();
|
||||
|
||||
// Resolve target from labd aggregated state
|
||||
let state: BastionState;
|
||||
try {
|
||||
state = await client.getMachines();
|
||||
} catch (err) {
|
||||
console.error(`Cannot reach labd: ${err instanceof Error ? err.message : String(err)}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const resolved = resolveTarget(target, state);
|
||||
if (!resolved) {
|
||||
console.error(`Cannot find machine: ${target}`);
|
||||
console.error("Provide a hostname, MAC, or IP of a known machine.");
|
||||
console.error("Run 'labctl provision list' to see available machines.");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const { mac, hostname, ip } = resolved;
|
||||
console.log(`Queuing debug mode for ${hostname} (${mac})...`);
|
||||
|
||||
try {
|
||||
const result = await client.debugMachine(mac, { pxeBoot: opts.pxeBoot === true });
|
||||
if (result.error) {
|
||||
console.error(`Failed: ${result.error}`);
|
||||
process.exit(1);
|
||||
}
|
||||
} catch (err) {
|
||||
console.error(`Failed to queue debug: ${err instanceof Error ? err.message : String(err)}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Try SSH reboot into PXE
|
||||
if (ip !== "") {
|
||||
const adminUser = process.env["SUDO_USER"] ?? process.env["USER"] ?? "";
|
||||
const effectiveUser = adminUser === "root" ? "" : adminUser;
|
||||
|
||||
if (effectiveUser !== "") {
|
||||
console.log(`\nAttempting SSH reboot into PXE (${effectiveUser}@${ip})...`);
|
||||
|
||||
const sudoUser = process.env["SUDO_USER"];
|
||||
const realHome = sudoUser !== undefined ? join("/home", sudoUser) : homedir();
|
||||
const keyPaths = [
|
||||
join(realHome, ".ssh", "id_ed25519"),
|
||||
join(realHome, ".ssh", "id_rsa"),
|
||||
join(realHome, ".ssh", "id_ecdsa"),
|
||||
];
|
||||
const sshKey = keyPaths.find(k => existsSync(k));
|
||||
|
||||
const sshArgs = [
|
||||
"-o", "StrictHostKeyChecking=no",
|
||||
"-o", "ConnectTimeout=10",
|
||||
...(sshKey !== undefined ? ["-i", sshKey] : []),
|
||||
`${effectiveUser}@${ip}`,
|
||||
'PXE_ENTRY=$(sudo efibootmgr | grep -iE "pxe|network|ipv4" | head -1 | grep -oP "Boot\\K[0-9A-F]+"); if [ -n "$PXE_ENTRY" ]; then sudo efibootmgr --bootnext "$PXE_ENTRY" && echo "PXE set as next boot" && sudo reboot; else echo "No PXE boot entry found, rebooting anyway..." && sudo reboot; fi',
|
||||
];
|
||||
|
||||
try {
|
||||
execFileSync("ssh", sshArgs, { stdio: "inherit" });
|
||||
} catch {
|
||||
// SSH connection closing during reboot is expected
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Determine bastion URL from labd config for the setup script URL
|
||||
const bastionUrl = process.env["LABD_URL"]
|
||||
? process.env["LABD_URL"].replace(/\/ws\/bastion$/, "").replace(/^wss?:/, "http:")
|
||||
: "http://<bastion-ip>:8080";
|
||||
|
||||
console.log(`
|
||||
Debug mode queued for ${hostname} (${mac}).
|
||||
Reboot the machine to enter Fedora rescue mode.
|
||||
|
||||
SSH access (started by Anaconda):
|
||||
ssh root@<ip> (password: debug)
|
||||
|
||||
For nc remote shell, run from rescue shell:
|
||||
curl ${bastionUrl}/debug-setup.sh | bash
|
||||
|
||||
Once in rescue shell:
|
||||
|
||||
# Activate LVM and mount installed system
|
||||
vgchange -ay
|
||||
mkdir -p /mnt/sysroot
|
||||
mount /dev/<vg>/root /mnt/sysroot
|
||||
cat /mnt/sysroot/etc/fstab
|
||||
mount /dev/<vg>/var /mnt/sysroot/var
|
||||
mount /dev/<vg>/home /mnt/sysroot/home
|
||||
|
||||
# Boot installed system in a container
|
||||
/mnt/sysroot/usr/bin/systemd-nspawn -D /mnt/sysroot --boot
|
||||
|
||||
# Or chroot for quick fixes
|
||||
mount --bind /dev /mnt/sysroot/dev
|
||||
mount --bind /proc /mnt/sysroot/proc
|
||||
mount --bind /sys /mnt/sysroot/sys
|
||||
chroot /mnt/sysroot
|
||||
`);
|
||||
});
|
||||
}
|
||||
@@ -39,12 +39,10 @@ export function registerLogsCommand(parent: Command): void {
|
||||
parent
|
||||
.command("logs <target>")
|
||||
.description("Show provisioning logs for a machine (hostname, MAC, or IP)")
|
||||
.action(async (target: string) => {
|
||||
.option("-f, --follow", "Follow log output in real-time")
|
||||
.action(async (target: string, opts: { follow?: boolean }) => {
|
||||
const mac = await resolveToMac(target);
|
||||
|
||||
try {
|
||||
const data = await getLabdClient().getMachineLogs(mac);
|
||||
|
||||
const BOLD = "\x1b[1m";
|
||||
const GREEN = "\x1b[32m";
|
||||
const YELLOW = "\x1b[33m";
|
||||
@@ -52,6 +50,14 @@ export function registerLogsCommand(parent: Command): void {
|
||||
const DIM = "\x1b[2m";
|
||||
const RESET = "\x1b[0m";
|
||||
|
||||
if (opts.follow) {
|
||||
await followLogs(mac, { BOLD, GREEN, YELLOW, RED, DIM, RESET });
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const data = await getLabdClient().getMachineLogs(mac);
|
||||
|
||||
console.log(`${BOLD}${data["hostname"]}${RESET} (${mac})`);
|
||||
console.log(` Status: ${data["status"] === "installed" ? GREEN : YELLOW}${data["status"]}${RESET}`);
|
||||
console.log(` Role: ${data["role"]}`);
|
||||
@@ -83,3 +89,58 @@ export function registerLogsCommand(parent: Command): void {
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/** Follow logs by polling labd. */
|
||||
async function followLogs(
|
||||
mac: string,
|
||||
colors: { BOLD: string; GREEN: string; YELLOW: string; RED: string; DIM: string; RESET: string },
|
||||
): Promise<void> {
|
||||
const { BOLD, GREEN, YELLOW, RED, DIM, RESET } = colors;
|
||||
const client = getLabdClient();
|
||||
|
||||
console.log(`${DIM}Following logs for ${mac} (Ctrl+C to stop)${RESET}`);
|
||||
console.log("");
|
||||
|
||||
let lastStageCount = 0;
|
||||
let lastStatus = "";
|
||||
|
||||
while (true) {
|
||||
try {
|
||||
const data = await client.getMachineLogs(mac);
|
||||
const status = String(data["status"] ?? "");
|
||||
const log = data["log"] as Array<{ stage: string; detail: string; timestamp: string }> | undefined;
|
||||
|
||||
// Print header once or on status change
|
||||
if (status !== lastStatus) {
|
||||
const hostname = String(data["hostname"] ?? mac);
|
||||
const statusColor = status === "installed" ? GREEN : YELLOW;
|
||||
console.log(` ${BOLD}${hostname}${RESET} ${statusColor}${status}${RESET}`);
|
||||
lastStatus = status;
|
||||
}
|
||||
|
||||
// Print new stages
|
||||
if (log && log.length > lastStageCount) {
|
||||
for (let i = lastStageCount; i < log.length; i++) {
|
||||
const entry = log[i]!;
|
||||
const time = entry.timestamp.slice(11, 19);
|
||||
const color = entry.stage === "complete" ? GREEN : entry.stage === "error" ? RED : YELLOW;
|
||||
const detail = entry.detail ? ` ${DIM}-- ${entry.detail}${RESET}` : "";
|
||||
console.log(` ${DIM}${time}${RESET} ${color}${entry.stage}${RESET}${detail}`);
|
||||
}
|
||||
lastStageCount = log.length;
|
||||
}
|
||||
|
||||
// Done
|
||||
if (status === "installed") {
|
||||
const ip = data["ip"] ?? "";
|
||||
console.log("");
|
||||
console.log(` ${GREEN}${BOLD}Install complete!${RESET}${ip ? ` ${DIM}ssh lab@${ip}${RESET}` : ""}`);
|
||||
process.exit(0);
|
||||
}
|
||||
} catch {
|
||||
// Machine may not be in logs yet (still queued)
|
||||
}
|
||||
|
||||
await new Promise((r) => setTimeout(r, 5000));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@ import { registerStatusCommand } from "./commands/status.js";
|
||||
import { registerInstallCommand } from "./commands/install.js";
|
||||
import { registerListCommand } from "./commands/list.js";
|
||||
import { registerReprovisionCommand } from "./commands/reprovision.js";
|
||||
import { registerDebugCommand } from "./commands/debug.js";
|
||||
import { registerForgetCommand } from "./commands/forget.js";
|
||||
import { registerLogsCommand } from "./commands/logs.js";
|
||||
import { registerMakeIsoCommand } from "./commands/makeiso.js";
|
||||
@@ -95,6 +96,7 @@ export function createProgram(): Command {
|
||||
registerListCommand(provisionCmd);
|
||||
registerInstallCommand(provisionCmd);
|
||||
registerReprovisionCommand(provisionCmd);
|
||||
registerDebugCommand(provisionCmd);
|
||||
registerForgetCommand(provisionCmd);
|
||||
registerLogsCommand(provisionCmd);
|
||||
registerMakeIsoCommand(provisionCmd);
|
||||
|
||||
@@ -34,6 +34,7 @@ async function main(): Promise<void> {
|
||||
server: {
|
||||
findMany: () => dbError(),
|
||||
findUnique: () => dbError(),
|
||||
upsert: () => dbError(),
|
||||
},
|
||||
joinToken: {
|
||||
findUnique: () => dbError(),
|
||||
|
||||
@@ -80,9 +80,54 @@ export function registerBastionRoutes(app: FastifyInstance, db: DbClient): void
|
||||
});
|
||||
});
|
||||
|
||||
// Aggregated machines from all connected bastions
|
||||
// Aggregated machines from all connected bastions + DB fallback
|
||||
app.get("/api/machines", async () => {
|
||||
return bastionRegistry.getAggregatedState();
|
||||
const live = bastionRegistry.getAggregatedState();
|
||||
|
||||
// Merge DB records for machines not currently in any bastion's live state
|
||||
try {
|
||||
const dbServers = (await db.server.findMany({})) as Array<{
|
||||
mac: string | null; hostname: string; role: string; ip: string | null;
|
||||
status: string; labels: Record<string, unknown>;
|
||||
}>;
|
||||
for (const s of dbServers) {
|
||||
if (!s.mac) continue;
|
||||
const mac = s.mac.toLowerCase();
|
||||
// Only add from DB if not already in live state
|
||||
if (!(mac in live.discovered) && !(mac in live.install_queue) && !(mac in live.installed)) {
|
||||
if (s.status === "discovered") {
|
||||
live.discovered[mac] = {
|
||||
mac,
|
||||
product: String(s.labels?.product ?? "unknown"),
|
||||
board: "unknown",
|
||||
serial: "unknown",
|
||||
manufacturer: String(s.labels?.manufacturer ?? "unknown"),
|
||||
cpu_model: String(s.labels?.cpu ?? "unknown"),
|
||||
cpu_cores: Number(s.labels?.cores ?? 0),
|
||||
memory_gb: Number(s.labels?.memory_gb ?? 0),
|
||||
arch: String(s.labels?.arch ?? "unknown"),
|
||||
disks: [],
|
||||
nics: [],
|
||||
first_seen: "",
|
||||
last_seen: "",
|
||||
bastionId: "db",
|
||||
};
|
||||
} else if (s.status === "online" || s.status === "offline") {
|
||||
live.installed[mac] = {
|
||||
hostname: s.hostname,
|
||||
role: s.role,
|
||||
ip: s.ip ?? "",
|
||||
installed_at: "",
|
||||
bastionId: "db",
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// DB unavailable — return live state only
|
||||
}
|
||||
|
||||
return live;
|
||||
});
|
||||
|
||||
// Queue install — route to correct bastion by MAC
|
||||
@@ -106,7 +151,7 @@ export function registerBastionRoutes(app: FastifyInstance, db: DbClient): void
|
||||
try {
|
||||
const result = await sendCommand(all[0]!.bastionId, {
|
||||
type: "command-install",
|
||||
mac, hostname, disk: disk ?? "/dev/sda", role: role ?? "infra", os: os ?? "fedora-43",
|
||||
mac, hostname, disk: disk ?? "", role: role ?? "infra", os: os ?? "fedora-43",
|
||||
});
|
||||
return reply.code(result.status === "ok" ? 200 : 500).send(result);
|
||||
} catch (err) {
|
||||
@@ -119,7 +164,7 @@ export function registerBastionRoutes(app: FastifyInstance, db: DbClient): void
|
||||
try {
|
||||
const result = await sendCommand(bastion.bastionId, {
|
||||
type: "command-install",
|
||||
mac, hostname, disk: disk ?? "/dev/sda", role: role ?? "infra", os: os ?? "fedora-43",
|
||||
mac, hostname, disk: disk ?? "", role: role ?? "infra", os: os ?? "fedora-43",
|
||||
});
|
||||
return reply.code(result.status === "ok" ? 200 : 500).send(result);
|
||||
} catch (err) {
|
||||
@@ -127,6 +172,41 @@ export function registerBastionRoutes(app: FastifyInstance, db: DbClient): void
|
||||
}
|
||||
});
|
||||
|
||||
// Queue debug/rescue mode — route to correct bastion by MAC
|
||||
app.post<{
|
||||
Body: { mac?: string; pxeBoot?: boolean };
|
||||
}>("/api/machines/debug", async (request, reply) => {
|
||||
const mac = (request.body?.mac ?? "").toLowerCase().replace(/-/g, ":");
|
||||
const pxeBoot = request.body?.pxeBoot ?? false;
|
||||
if (!mac) {
|
||||
return reply.code(400).send({ error: "mac is required" });
|
||||
}
|
||||
|
||||
const bastion = bastionRegistry.findBastionByMac(mac);
|
||||
if (!bastion) {
|
||||
const all = bastionRegistry.getAll();
|
||||
if (all.length === 0) {
|
||||
return reply.code(503).send({ error: "No bastions connected" });
|
||||
}
|
||||
if (all.length === 1) {
|
||||
try {
|
||||
const result = await sendCommand(all[0]!.bastionId, { type: "command-debug", mac, pxeBoot });
|
||||
return reply.code(result.status === "ok" ? 200 : 500).send(result);
|
||||
} catch (err) {
|
||||
return reply.code(500).send({ error: err instanceof Error ? err.message : String(err) });
|
||||
}
|
||||
}
|
||||
return reply.code(404).send({ error: `MAC ${mac} not found on any bastion` });
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await sendCommand(bastion.bastionId, { type: "command-debug", mac, pxeBoot });
|
||||
return reply.code(result.status === "ok" ? 200 : 500).send(result);
|
||||
} catch (err) {
|
||||
return reply.code(500).send({ error: err instanceof Error ? err.message : String(err) });
|
||||
}
|
||||
});
|
||||
|
||||
// Forget machine
|
||||
app.delete<{ Params: { mac: string } }>("/api/machines/:mac", async (request, reply) => {
|
||||
const mac = request.params.mac.toLowerCase().replace(/-/g, ":");
|
||||
|
||||
@@ -19,6 +19,7 @@ export interface DbClient {
|
||||
server: {
|
||||
findMany: (...args: unknown[]) => Promise<unknown[]>;
|
||||
findUnique: (...args: unknown[]) => Promise<unknown>;
|
||||
upsert: (...args: unknown[]) => Promise<unknown>;
|
||||
};
|
||||
joinToken: {
|
||||
findUnique: (...args: unknown[]) => Promise<unknown>;
|
||||
@@ -139,7 +140,7 @@ export async function createApp(_config: LabdConfig, db: DbClient): Promise<{
|
||||
socket,
|
||||
connectedAt: new Date(),
|
||||
lastHeartbeat: new Date(),
|
||||
state: { discovered: {}, install_queue: {}, installed: {} },
|
||||
state: { discovered: {}, install_queue: {}, installed: {}, debug: {} },
|
||||
});
|
||||
|
||||
socket.send(JSON.stringify({ type: "bastion-enrolled", bastionId: record.id }));
|
||||
@@ -175,6 +176,52 @@ export async function createApp(_config: LabdConfig, db: DbClient): Promise<{
|
||||
if (bastionId) {
|
||||
bastionRegistry.updateState(bastionId, msg.state);
|
||||
logger.info(`Bastion ${bastionId.slice(0, 8)} state sync: ${Object.keys(msg.state.discovered).length} discovered, ${Object.keys(msg.state.installed).length} installed`);
|
||||
|
||||
// Persist machines to DB
|
||||
void (async () => {
|
||||
try {
|
||||
// Upsert discovered machines
|
||||
for (const [mac, hw] of Object.entries(msg.state.discovered)) {
|
||||
await db.server.upsert({
|
||||
where: { mac },
|
||||
create: {
|
||||
hostname: hw.product ?? mac,
|
||||
mac,
|
||||
role: "unknown",
|
||||
status: "discovered",
|
||||
labels: { cpu: hw.cpu_model, cores: hw.cpu_cores, memory_gb: hw.memory_gb, arch: hw.arch, product: hw.product, manufacturer: hw.manufacturer },
|
||||
},
|
||||
update: {
|
||||
status: "discovered",
|
||||
lastHeartbeat: new Date(),
|
||||
labels: { cpu: hw.cpu_model, cores: hw.cpu_cores, memory_gb: hw.memory_gb, arch: hw.arch, product: hw.product, manufacturer: hw.manufacturer },
|
||||
},
|
||||
});
|
||||
}
|
||||
// Upsert installed machines
|
||||
for (const [mac, info] of Object.entries(msg.state.installed)) {
|
||||
await db.server.upsert({
|
||||
where: { mac },
|
||||
create: {
|
||||
hostname: info.hostname,
|
||||
mac,
|
||||
role: info.role ?? "worker",
|
||||
ip: info.ip,
|
||||
status: "online",
|
||||
},
|
||||
update: {
|
||||
hostname: info.hostname,
|
||||
role: info.role ?? "worker",
|
||||
ip: info.ip,
|
||||
status: "online",
|
||||
lastHeartbeat: new Date(),
|
||||
},
|
||||
});
|
||||
}
|
||||
} catch (err) {
|
||||
logger.warn(`Failed to persist machines to DB: ${err instanceof Error ? err.message : String(err)}`);
|
||||
}
|
||||
})();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
import { EventEmitter } from "node:events";
|
||||
import type { WebSocket } from "ws";
|
||||
import type { BastionState, HardwareInfo, InstallConfig, InstalledInfo } from "@lab/shared";
|
||||
import type { BastionState, HardwareInfo, InstallConfig, InstalledInfo, DebugConfig } from "@lab/shared";
|
||||
|
||||
export interface ConnectedBastion {
|
||||
bastionId: string;
|
||||
@@ -20,6 +20,7 @@ export interface AggregatedState {
|
||||
discovered: Record<string, HardwareInfo>;
|
||||
install_queue: Record<string, InstallConfig>;
|
||||
installed: Record<string, InstalledInfo>;
|
||||
debug: Record<string, DebugConfig>;
|
||||
}
|
||||
|
||||
export class BastionRegistry extends EventEmitter {
|
||||
@@ -86,6 +87,7 @@ export class BastionRegistry extends EventEmitter {
|
||||
discovered: {},
|
||||
install_queue: {},
|
||||
installed: {},
|
||||
debug: {},
|
||||
};
|
||||
|
||||
for (const bastion of this.bastions.values()) {
|
||||
@@ -98,6 +100,9 @@ export class BastionRegistry extends EventEmitter {
|
||||
for (const [mac, info] of Object.entries(bastion.state.installed)) {
|
||||
result.installed[mac] = { ...info, bastionId: bastion.bastionId };
|
||||
}
|
||||
for (const [mac, dbg] of Object.entries(bastion.state.debug ?? {})) {
|
||||
result.debug[mac] = { ...dbg };
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
|
||||
@@ -5,6 +5,7 @@ export type {
|
||||
HardwareInfo,
|
||||
InstallConfig,
|
||||
InstalledInfo,
|
||||
DebugConfig,
|
||||
BastionState,
|
||||
BastionConfig,
|
||||
} from "./types/index.js";
|
||||
|
||||
@@ -111,6 +111,7 @@ export type LabdBastionMessage =
|
||||
| { type: "command-install"; requestId: string; mac: string; hostname: string; disk?: string; role: string; os: string }
|
||||
| { type: "command-forget"; requestId: string; mac: string }
|
||||
| { type: "command-role-update"; requestId: string; mac: string; role: string }
|
||||
| { type: "command-debug"; requestId: string; mac: string; pxeBoot?: boolean }
|
||||
| { type: "server-shutdown"; reconnectAfter: number };
|
||||
|
||||
export type BastionMessageType = BastionMessage["type"];
|
||||
@@ -125,7 +126,7 @@ const BASTION_MESSAGE_TYPES = new Set<string>([
|
||||
|
||||
const LABD_BASTION_MESSAGE_TYPES = new Set<string>([
|
||||
"bastion-enrolled", "bastion-heartbeat-ack", "command-install",
|
||||
"command-forget", "command-role-update", "server-shutdown",
|
||||
"command-forget", "command-role-update", "command-debug", "server-shutdown",
|
||||
]);
|
||||
|
||||
export function isBastionMessage(msg: unknown): msg is BastionMessage {
|
||||
|
||||
@@ -5,6 +5,7 @@ export type {
|
||||
HardwareInfo,
|
||||
InstallConfig,
|
||||
InstalledInfo,
|
||||
DebugConfig,
|
||||
BastionState,
|
||||
} from "./state.js";
|
||||
|
||||
|
||||
@@ -98,8 +98,15 @@ export interface InstalledInfo {
|
||||
bastionId?: string; // set when aggregated through labd
|
||||
}
|
||||
|
||||
export interface DebugConfig {
|
||||
hostname: string;
|
||||
queued_at: string;
|
||||
pxeBoot?: boolean;
|
||||
}
|
||||
|
||||
export interface BastionState {
|
||||
discovered: Record<string, HardwareInfo>;
|
||||
install_queue: Record<string, InstallConfig>;
|
||||
installed: Record<string, InstalledInfo>;
|
||||
debug: Record<string, DebugConfig>;
|
||||
}
|
||||
|
||||
82
bastion/tests/integration/helpers/jetkvm.sh
Executable file
82
bastion/tests/integration/helpers/jetkvm.sh
Executable file
@@ -0,0 +1,82 @@
|
||||
#!/bin/bash
|
||||
# JetKVM helper — authenticate and interact with JetKVM device.
|
||||
# Usage:
|
||||
# jetkvm.sh status — check device status
|
||||
# jetkvm.sh reboot — reboot the target machine via ATX
|
||||
# jetkvm.sh poweron — power on via ATX short press
|
||||
# jetkvm.sh poweroff — power off via ATX long press
|
||||
#
|
||||
# Environment:
|
||||
# JETKVM_HOST — JetKVM IP (default: 192.168.3.10)
|
||||
# JETKVM_PASS — device password
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
HOST="${JETKVM_HOST:-192.168.3.10}"
|
||||
PASS="${JETKVM_PASS:-}"
|
||||
|
||||
if [ -z "$PASS" ]; then
|
||||
echo "ERROR: JETKVM_PASS not set" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
BASE="http://$HOST"
|
||||
|
||||
# Authenticate and get token
|
||||
login() {
|
||||
local resp
|
||||
resp=$(curl -s -X POST "$BASE/auth/login-local" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"password\":\"$PASS\"}" 2>&1)
|
||||
|
||||
local token
|
||||
token=$(echo "$resp" | grep -oP '"token"\s*:\s*"[^"]*"' | head -1 | grep -oP '"[^"]*"$' | tr -d '"')
|
||||
|
||||
if [ -z "$token" ]; then
|
||||
echo "ERROR: Login failed: $resp" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "$token"
|
||||
}
|
||||
|
||||
# Make authenticated request
|
||||
api() {
|
||||
local method="$1" path="$2" body="${3:-}"
|
||||
local token
|
||||
token=$(login)
|
||||
|
||||
if [ -n "$body" ]; then
|
||||
curl -s -X "$method" "$BASE$path" \
|
||||
-H "Authorization: Bearer $token" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$body"
|
||||
else
|
||||
curl -s -X "$method" "$BASE$path" \
|
||||
-H "Authorization: Bearer $token"
|
||||
fi
|
||||
}
|
||||
|
||||
case "${1:-status}" in
|
||||
status)
|
||||
curl -s "$BASE/device/status" 2>&1
|
||||
;;
|
||||
device)
|
||||
api GET /device
|
||||
;;
|
||||
reboot)
|
||||
echo "Sending ATX reset..."
|
||||
api POST /device/atx/reset
|
||||
;;
|
||||
poweron)
|
||||
echo "Sending ATX short power press..."
|
||||
api POST /device/atx/power-short
|
||||
;;
|
||||
poweroff)
|
||||
echo "Sending ATX long power press..."
|
||||
api POST /device/atx/power-long
|
||||
;;
|
||||
*)
|
||||
echo "Usage: $0 {status|device|reboot|poweron|poweroff}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
@@ -224,11 +224,12 @@ describe("PXE boot provisioning", () => {
|
||||
// Generate dnsmasq config
|
||||
generateDnsmasqConf(config);
|
||||
|
||||
// Start HTTP server
|
||||
const { app, state } = createApp(config);
|
||||
// Start HTTP server + syslog listener
|
||||
const { app, state, syslog } = createApp(config);
|
||||
bastionApp = app;
|
||||
await app.listen({ port: config.httpPort, host: "0.0.0.0" });
|
||||
log(`Bastion HTTP server listening on :${HTTP_PORT}`);
|
||||
syslog.start();
|
||||
log(`Bastion HTTP server listening on :${HTTP_PORT}, syslog on UDP :${config.syslogPort}`);
|
||||
|
||||
// Start dnsmasq (fire-and-forget — it runs until killed)
|
||||
// May fail without root (DHCP socket needs CAP_NET_BIND_SERVICE); libvirt network provides DHCP fallback
|
||||
@@ -387,8 +388,8 @@ describe("PXE boot provisioning", () => {
|
||||
expect(data.progress).toBe("complete");
|
||||
});
|
||||
|
||||
it.skip("log lines were captured", async () => {
|
||||
// Requires log streamer in %post — skipped until re-added
|
||||
it("syslog install logs were captured", async () => {
|
||||
// Anaconda forwards logs via syslog (logging --host directive in kickstart)
|
||||
const res = await fetch(`http://${BASTION_IP}:${HTTP_PORT}/api/logs/${encodeURIComponent(vmMac)}`);
|
||||
const data = (await res.json()) as { log_total?: number; log_lines?: Array<{ line: string }> };
|
||||
expect(data.log_total).toBeGreaterThan(0);
|
||||
|
||||
Reference in New Issue
Block a user