feat: install logging, error trapping, PXE/ISO integration tests
Some checks failed
CI/CD / lint (pull_request) Failing after 13s
CI/CD / test (pull_request) Failing after 10s
CI/CD / typecheck (pull_request) Failing after 36s
CI/CD / build (pull_request) Has been skipped
CI/CD / publish-rpm (pull_request) Has been skipped
CI/CD / publish-deb (pull_request) Has been skipped

Kickstart installs on real hardware failed silently — no error reporting,
only 3 progress callbacks, zero log streaming. This overhaul makes every
install fully observable.

Kickstart improvements:
- Error trapping in %pre and %post (trap ERR sends failure details to bastion)
- 12+ granular progress stages (was 3): SSH, hostname, k3s prep, EFI boot, metadata
- Background log streamer: tails %post output and batch-sends to /api/log
- bastion_log() function for explicit log lines from kickstart scripts

Bastion API:
- POST /api/log — receives raw log lines from kickstart (single or batch)
- InstallLogBuffer — per-MAC ring buffer (2000 lines) + file persistence
- GET /api/logs/:mac — now returns log_lines + log_total alongside stages
- SSE /api/logs/:mac/follow — uses named events (event: stage vs event: log)
- Progress events forwarded to labd via bastion-progress WebSocket message
- Post-provision k3s logs routed through progressBus (was console-only)

dnsmasq fixes found during VM testing:
- HTTP Boot filename: ipxe-real.efi → ipxe.efi (leftover from old 2-stage approach)
- pxe-service directives: only in proxy mode (breaks OVMF PXE in full mode)
- PXEClient vendor class echo for UEFI firmware compatibility

Integration tests:
- PXE boot test: blank UEFI VM → dnsmasq → HTTP Boot → iPXE → bastion → install
- ISO boot test: blank VM boots from bastion-generated ISO → same flow
- Shared helpers: pxe-network (no DHCP, nftables fix), pxe-vm (UEFI + ISO boot)
- test-provision.sh: runs both PXE + ISO tests with prerequisite checks
- 250GB sparse QCOW2 disk (LVM layout needs ~204GB)

201 unit tests passing (11 new).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Michal
2026-03-26 22:26:33 +00:00
parent ffc4a782d2
commit 46b017d77e
189 changed files with 16241 additions and 432 deletions

View File

@@ -0,0 +1,6 @@
name: k3s
version: 0.1.0
description: Install and configure k3s with CIS security hardening and Cilium CNI
targets:
roles: [infra, worker]
dependencies: []

View File

@@ -0,0 +1,117 @@
// k3s module — configure phase.
// Post-install configuration: log rotation, network policies, cert rotation.
export function generateConfigureScript(hostname: string): string {
return `#!/bin/bash
set -euo pipefail
echo "=== k3s configure: ${hostname} ==="
# ── 0. Fix CoreDNS upstream resolver ──
# systemd-resolved listens on 127.0.0.53, but that address is unreachable from
# inside CoreDNS's pod network namespace. CoreDNS forwards to /etc/resolv.conf
# which contains 127.0.0.53 on systemd-resolved hosts, causing all external DNS
# lookups to time out. Fix: write a resolv.conf with the real upstream DNS server
# that k3s will use instead of /etc/resolv.conf.
echo "[0/4] Fixing CoreDNS upstream DNS..."
UPSTREAM_DNS=$(resolvectl status 2>/dev/null | grep -A2 "Link.*$(ip -4 route show default | awk '{print $5}' | head -1)" | grep "Current DNS" | awk '{print $NF}' || echo "")
if [ -z "$UPSTREAM_DNS" ]; then
# Fallback: parse resolv.conf from systemd-resolved's real config
UPSTREAM_DNS=$(cat /run/systemd/resolve/resolv.conf 2>/dev/null | grep "^nameserver" | head -1 | awk '{print $2}' || echo "")
fi
if [ -n "$UPSTREAM_DNS" ] && [ "$UPSTREAM_DNS" != "127.0.0.53" ]; then
echo "nameserver $UPSTREAM_DNS" > /etc/rancher/k3s/resolv.conf
echo " Wrote /etc/rancher/k3s/resolv.conf with upstream DNS: $UPSTREAM_DNS"
# k3s reads this file automatically on next restart; restart now to apply
if systemctl is-active k3s >/dev/null 2>&1; then
systemctl restart k3s
echo " Restarted k3s to pick up DNS fix"
# Wait for API to come back
for i in $(seq 1 30); do
if k3s kubectl get nodes >/dev/null 2>&1; then
break
fi
sleep 2
done
fi
else
echo " Upstream DNS already correct or could not detect — skipping"
fi
# ── 1. Log rotation for k3s ──
echo "[1/4] Setting up log rotation..."
cat > /etc/logrotate.d/k3s << 'LOGROTATE'
/var/log/kubernetes/*.log {
daily
rotate 14
compress
delaycompress
missingok
notifempty
copytruncate
maxsize 100M
}
LOGROTATE
# ── 2. Verify certificate rotation ──
echo "[2/4] Checking certificate rotation..."
if k3s certificate rotate --help > /dev/null 2>&1; then
echo " Certificate rotation available"
else
echo " Warning: certificate rotation not available in this k3s version"
fi
# Check cert expiry
CERT_DIR="/var/lib/rancher/k3s/server/tls"
if [ -d "$CERT_DIR" ]; then
for cert in "$CERT_DIR"/*.crt; do
[ -f "$cert" ] || continue
EXPIRY=$(openssl x509 -in "$cert" -enddate -noout 2>/dev/null | cut -d= -f2)
echo " $(basename "$cert"): expires $EXPIRY"
done
fi
# ── 3. Default network policy (deny all ingress by default) ──
echo "[3/4] Applying default network policies..."
k3s kubectl apply -f - << 'NETPOL'
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: default-deny-ingress
namespace: default
spec:
podSelector: {}
policyTypes:
- Ingress
NETPOL
# Allow DNS
k3s kubectl apply -f - << 'DNSPOL'
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: allow-dns
namespace: default
spec:
podSelector: {}
policyTypes:
- Egress
egress:
- to: []
ports:
- port: 53
protocol: UDP
- port: 53
protocol: TCP
DNSPOL
# ── 4. Verify cluster state ──
echo "[4/4] Verifying cluster state..."
k3s kubectl get nodes
k3s kubectl get pods -A
echo "=== k3s configure complete ==="
`;
}

View File

@@ -0,0 +1,22 @@
// Hardening: Pod Security Standards, certificate check, log rotation.
import type { OperationContext, OperationResult, OperationGroup } from "../types.js";
import { runSequential } from "../utils.js";
import { applyPodSecurityStandards } from "../operations/pod-security.js";
import { checkCertExpiry } from "../operations/cert-check.js";
import { configureLogRotation } from "../operations/log-rotation.js";
export const hardeningGroup: OperationGroup = {
name: "hardening",
description: "Pod security, certificate check, log rotation",
operations: [
{ name: "Apply Pod Security Standards", fn: applyPodSecurityStandards },
{ name: "Check certificate expiry", fn: checkCertExpiry },
{ name: "Configure log rotation", fn: configureLogRotation },
],
};
export async function runHardening(ctx: OperationContext): Promise<OperationResult[]> {
ctx.log("Cluster hardening...");
return runSequential(ctx, hardeningGroup.operations);
}

View File

@@ -0,0 +1,26 @@
// Host preparation: kernel modules, sysctl, swap, firewall, SELinux.
import type { OperationContext, OperationResult, OperationGroup } from "../types.js";
import { runSequential } from "../utils.js";
import { loadKernelModules } from "../operations/kernel-modules.js";
import { applyCisHardening } from "../operations/sysctl.js";
import { disableSwap } from "../operations/swap.js";
import { disableFirewall } from "../operations/firewall.js";
import { setSelinuxPermissive } from "../operations/selinux.js";
export const hostPrepGroup: OperationGroup = {
name: "host-prep",
description: "Prepare host for k3s: kernel modules, sysctl, swap, firewall, SELinux",
operations: [
{ name: "Load kernel modules", fn: loadKernelModules },
{ name: "Apply CIS sysctl", fn: applyCisHardening },
{ name: "Disable swap", fn: disableSwap },
{ name: "Disable firewall", fn: disableFirewall },
{ name: "Set SELinux permissive", fn: setSelinuxPermissive },
],
};
export async function runHostPrep(ctx: OperationContext): Promise<OperationResult[]> {
ctx.log("Host preparation...");
return runSequential(ctx, hostPrepGroup.operations);
}

View File

@@ -0,0 +1,5 @@
export { hostPrepGroup, runHostPrep } from "./host-prep.js";
export { k3sServerGroup, runK3sServer } from "./k3s-server.js";
export { k3sAgentGroup, runK3sAgent } from "./k3s-agent.js";
export { networkingGroup, runNetworking } from "./networking.js";
export { hardeningGroup, runHardening } from "./hardening.js";

View File

@@ -0,0 +1,20 @@
// K3s agent installation: config + binary in agent mode.
import type { OperationContext, OperationResult, OperationGroup } from "../types.js";
import { runSequential } from "../utils.js";
import { writeK3sConfig } from "../operations/k3s-config.js";
import { installK3sBinary } from "../operations/k3s-install.js";
export const k3sAgentGroup: OperationGroup = {
name: "k3s-agent",
description: "Install k3s agent and join cluster",
operations: [
{ name: "Write k3s config", fn: writeK3sConfig },
{ name: "Install k3s agent", fn: installK3sBinary },
],
};
export async function runK3sAgent(ctx: OperationContext): Promise<OperationResult[]> {
ctx.log("K3s agent installation...");
return runSequential(ctx, k3sAgentGroup.operations);
}

View File

@@ -0,0 +1,24 @@
// K3s server installation: config, audit policy, CNI cleanup, binary install.
import type { OperationContext, OperationResult, OperationGroup } from "../types.js";
import { runSequential } from "../utils.js";
import { writeK3sConfig } from "../operations/k3s-config.js";
import { writeAuditPolicy } from "../operations/audit-policy.js";
import { cleanupStaleCni } from "../operations/cni-cleanup.js";
import { installK3sBinary } from "../operations/k3s-install.js";
export const k3sServerGroup: OperationGroup = {
name: "k3s-server",
description: "Install k3s server with CIS-hardened config",
operations: [
{ name: "Write k3s config", fn: writeK3sConfig },
{ name: "Write audit policy", fn: writeAuditPolicy },
{ name: "Clean stale CNI", fn: cleanupStaleCni },
{ name: "Install k3s binary", fn: installK3sBinary },
],
};
export async function runK3sServer(ctx: OperationContext): Promise<OperationResult[]> {
ctx.log("K3s server installation...");
return runSequential(ctx, k3sServerGroup.operations);
}

View File

@@ -0,0 +1,22 @@
// Networking: Cilium CNI, CoreDNS fix, network policies.
import type { OperationContext, OperationResult, OperationGroup } from "../types.js";
import { runSequential } from "../utils.js";
import { installCilium } from "../operations/cilium.js";
import { fixCoreDnsUpstream } from "../operations/dns-fix.js";
import { applyDefaultNetworkPolicies } from "../operations/network-policy.js";
export const networkingGroup: OperationGroup = {
name: "networking",
description: "Install Cilium CNI, fix DNS, apply network policies",
operations: [
{ name: "Install Cilium CNI", fn: installCilium },
{ name: "Fix CoreDNS upstream", fn: fixCoreDnsUpstream },
{ name: "Apply network policies", fn: applyDefaultNetworkPolicies },
],
};
export async function runNetworking(ctx: OperationContext): Promise<OperationResult[]> {
ctx.log("Networking setup...");
return runSequential(ctx, networkingGroup.operations);
}

View File

@@ -0,0 +1,56 @@
// k3s module — health check phase.
// Verifies k3s is running, nodes ready, API accessible, Cilium healthy, encryption active.
export interface HealthCheck {
name: string;
command: string;
/** Function to check if the command output indicates success */
check: (stdout: string, exitCode: number) => boolean;
}
export const K3S_HEALTH_CHECKS: HealthCheck[] = [
{
name: "k3s service active",
command: "systemctl is-active k3s",
check: (stdout, code) => code === 0 && stdout.trim() === "active",
},
{
name: "node Ready",
command: "k3s kubectl get nodes -o jsonpath='{.items[0].status.conditions[?(@.type==\"Ready\")].status}'",
check: (stdout) => stdout.includes("True"),
},
{
name: "API server healthy",
command: "k3s kubectl get --raw /healthz",
check: (stdout, code) => code === 0 && stdout.trim() === "ok",
},
{
name: "secrets encryption enabled",
command: "k3s secrets-encrypt status 2>/dev/null || echo 'not available'",
check: (stdout) => stdout.includes("Enabled") || stdout.includes("enabled"),
},
{
name: "Cilium status",
command: "cilium status --brief 2>/dev/null || echo 'cilium not installed'",
check: (stdout, code) => code === 0 && !stdout.includes("not installed"),
},
{
name: "kube-system pods running",
command: "k3s kubectl get pods -n kube-system --no-headers | grep -v Running | grep -v Completed | wc -l",
check: (stdout) => parseInt(stdout.trim(), 10) === 0,
},
];
export function generateHealthScript(): string {
const checks = K3S_HEALTH_CHECKS.map((check, i) => `
echo "[${i + 1}/${K3S_HEALTH_CHECKS.length}] ${check.name}..."
OUTPUT=$(${check.command} 2>&1) || true
echo " result: $OUTPUT"
`).join("\n");
return `#!/bin/bash
echo "=== k3s health check ==="
${checks}
echo "=== health check complete ==="
`;
}

View File

@@ -0,0 +1,8 @@
import type { Operation, OperationResult } from "../types.js";
import { sshOpts } from "../utils.js";
export const checkApiHealth: Operation = async (ctx): Promise<OperationResult> => {
const result = await ctx.ssh.exec("k3s kubectl get --raw /healthz 2>/dev/null", sshOpts(ctx));
const healthy = result.exitCode === 0 && result.stdout.trim() === "ok";
return { success: healthy, changed: false, message: healthy ? "API server healthy" : "API server unhealthy" };
};

View File

@@ -0,0 +1,16 @@
import type { Operation, OperationResult } from "../types.js";
import { sshOpts } from "../utils.js";
export const checkCiliumStatus: Operation = async (ctx): Promise<OperationResult> => {
const result = await ctx.ssh.exec(
"KUBECONFIG=/etc/rancher/k3s/k3s.yaml cilium status --brief 2>/dev/null",
sshOpts(ctx),
);
const ok = result.exitCode === 0;
return {
success: ok,
changed: false,
message: ok ? "Cilium OK" : "Cilium unhealthy",
details: ok ? [result.stdout.trim()] : [result.stderr.trim()],
};
};

View File

@@ -0,0 +1,6 @@
export { checkK3sService } from "./k3s-service.js";
export { checkNodeReady } from "./node-ready.js";
export { checkApiHealth } from "./api-health.js";
export { checkSecretsEncryption } from "./secrets-encryption.js";
export { checkCiliumStatus } from "./cilium-status.js";
export { checkPodStatus } from "./pod-status.js";

View File

@@ -0,0 +1,9 @@
import type { Operation, OperationResult } from "../types.js";
import { isServiceActive } from "../utils.js";
export const checkK3sService: Operation = async (ctx): Promise<OperationResult> => {
const isServer = ctx.config.role === "infra" || ctx.config.role === "labcontroller";
const service = isServer ? "k3s" : "k3s-agent";
const active = await isServiceActive(ctx, service);
return { success: active, changed: false, message: active ? `${service} is active` : `${service} is not active` };
};

View File

@@ -0,0 +1,11 @@
import type { Operation, OperationResult } from "../types.js";
import { sshOpts } from "../utils.js";
export const checkNodeReady: Operation = async (ctx): Promise<OperationResult> => {
const result = await ctx.ssh.exec(
"k3s kubectl get nodes -o jsonpath='{.items[0].status.conditions[?(@.type==\"Ready\")].status}' 2>/dev/null",
sshOpts(ctx),
);
const ready = result.stdout.includes("True");
return { success: ready, changed: false, message: ready ? "Node is Ready" : "Node is NotReady" };
};

View File

@@ -0,0 +1,20 @@
import type { Operation, OperationResult } from "../types.js";
import { sshOpts } from "../utils.js";
export const checkPodStatus: Operation = async (ctx): Promise<OperationResult> => {
const result = await ctx.ssh.exec(
"k3s kubectl get pods -n kube-system --no-headers 2>/dev/null",
sshOpts(ctx),
);
const lines = result.stdout.trim().split("\n").filter(Boolean);
const notReady = lines.filter((l) => !l.includes("Running") && !l.includes("Completed"));
return {
success: notReady.length === 0,
changed: false,
message: notReady.length === 0
? `All ${lines.length} kube-system pods healthy`
: `${notReady.length} unhealthy pod(s)`,
...(notReady.length > 0 ? { details: notReady } : {}),
};
};

View File

@@ -0,0 +1,8 @@
import type { Operation, OperationResult } from "../types.js";
import { sshOpts } from "../utils.js";
export const checkSecretsEncryption: Operation = async (ctx): Promise<OperationResult> => {
const result = await ctx.ssh.exec("k3s secrets-encrypt status 2>/dev/null", sshOpts(ctx));
const enabled = result.stdout.includes("Enabled");
return { success: enabled, changed: false, message: enabled ? "Secrets encryption enabled" : "Secrets encryption not enabled" };
};

View File

@@ -0,0 +1,32 @@
// k3s module entry point.
// New operation-based module
export { K3sModule } from "./k3s-module.js";
// Types
export type {
K3sConfig,
OperationContext,
OperationResult,
Operation,
NamedOperation,
OperationGroup,
SshClient,
} from "./types.js";
// Utilities
export { runSequential, aggregateResults, writeRemoteFile, isServiceActive, checkCommand } from "./utils.js";
// Individual operations
export * from "./operations/index.js";
// Operation groups
export * from "./groups/index.js";
// Health checks
export * from "./health/index.js";
// DEPRECATED: Legacy bash script generators — remove after CLI migration
export { generateInstallScript, type K3sInstallContext } from "./install.js";
export { generateConfigureScript } from "./configure.js";
export { generateHealthScript, K3S_HEALTH_CHECKS, type HealthCheck } from "./health.js";

View File

@@ -0,0 +1,275 @@
// k3s module — install phase.
// Installs k3s with CIS-hardened configuration and Cilium CNI.
export interface K3sInstallContext {
hostname: string;
ip: string;
role: string; // "infra" = server, "worker" = agent
k3sServerUrl?: string; // Required for agent role
k3sToken?: string; // Required for agent role
}
/** Generate the shell script that installs k3s on a target machine. */
export function generateInstallScript(ctx: K3sInstallContext): string {
const isServer = ctx.role === "infra";
return `#!/bin/bash
set -euo pipefail
echo "=== k3s install: ${ctx.hostname} (${ctx.role}) ==="
# ── 1. Verify kernel prerequisites ──
echo "[1/10] Checking kernel modules..."
modprobe br_netfilter
modprobe overlay
modprobe ip_conntrack 2>/dev/null || true
cat > /etc/modules-load.d/k3s.conf << 'MODULES'
br_netfilter
overlay
ip_conntrack
MODULES
# ── 2. CIS-compliant sysctl ──
echo "[2/10] Setting kernel parameters..."
cat > /etc/sysctl.d/90-k3s-cis.conf << 'SYSCTL'
# k3s CIS hardening
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward = 1
vm.panic_on_oom = 0
vm.overcommit_memory = 1
kernel.panic = 10
kernel.panic_on_oops = 1
# inotify limits for large clusters
fs.inotify.max_user_instances = 524288
fs.inotify.max_user_watches = 524288
SYSCTL
sysctl --system > /dev/null
# ── 3. Disable swap (CIS requirement) ──
echo "[3/10] Disabling swap..."
swapoff -a || true
sed -i '/\\sswap\\s/d' /etc/fstab
# ── 4. Disable firewall permanently (k3s/Cilium manage iptables directly) ──
# CRITICAL: firewalld's nftables rules block pod-to-gateway traffic.
# Must survive reboot — use both disable and mask.
echo "[4/10] Disabling firewall..."
systemctl disable --now firewalld 2>/dev/null || true
systemctl mask firewalld 2>/dev/null || true
systemctl disable --now ufw 2>/dev/null || true
systemctl mask ufw 2>/dev/null || true
${isServer ? generateServerInstall(ctx) : generateAgentInstall(ctx)}
echo "=== k3s install complete ==="
`;
}
function generateServerInstall(ctx: K3sInstallContext): string {
return `# ── 5. Set SELinux permissive (Fedora: k3s-selinux RPM has GPG issues with dnf5) ──
echo "[5/10] Configuring SELinux..."
setenforce 0 2>/dev/null || true
sed -i 's/^SELINUX=enforcing/SELINUX=permissive/' /etc/selinux/config 2>/dev/null || true
# ── 5b. Create k3s config directory ──
echo "[5/10] Writing k3s server configuration..."
mkdir -p /etc/rancher/k3s
mkdir -p /var/log/kubernetes
cat > /etc/rancher/k3s/config.yaml << 'K3S_CONFIG'
# k3s server configuration — CIS hardened
protect-kernel-defaults: true
secrets-encryption: true
write-kubeconfig-mode: "0640"
# Disable default components (we use Cilium)
flannel-backend: none
disable-network-policy: true
disable:
- servicelb
- traefik
# API server hardening
kube-apiserver-arg:
- "anonymous-auth=false"
- "audit-log-path=/var/log/kubernetes/audit.log"
- "audit-log-maxage=30"
- "audit-log-maxbackup=10"
- "audit-log-maxsize=100"
- "audit-policy-file=/etc/rancher/k3s/audit-policy.yaml"
- "enable-admission-plugins=NodeRestriction,PodSecurity"
- "request-timeout=300s"
# Kubelet hardening
kubelet-arg:
- "protect-kernel-defaults=true"
- "streaming-connection-idle-timeout=5m"
- "make-iptables-util-chains=true"
# TLS SANs for remote access
tls-san:
- "${ctx.hostname}"
- "${ctx.ip}"
K3S_CONFIG
# ── 6. Write audit policy ──
echo "[6/10] Writing audit policy..."
cat > /etc/rancher/k3s/audit-policy.yaml << 'AUDIT_POLICY'
apiVersion: audit.k8s.io/v1
kind: Policy
rules:
# Log secret/configmap access at metadata level
- level: Metadata
resources:
- group: ""
resources: ["secrets", "configmaps"]
# Log pod/service mutations at request level
- level: RequestResponse
verbs: ["create", "update", "patch", "delete"]
resources:
- group: ""
resources: ["pods", "services", "deployments"]
# Skip noisy endpoints
- level: None
resources:
- group: ""
resources: ["endpoints", "events"]
users: ["system:kube-proxy", "system:apiserver"]
# Default: log everything else at metadata level
- level: Metadata
omitStages:
- "RequestReceived"
AUDIT_POLICY
# ── 6b. Pre-install cleanup: stop existing k3s and remove stale CNI state ──
# CRITICAL: flannel.1 vxlan uses port 8472 which conflicts with Cilium's vxlan.
# If we don't clean this up BEFORE starting k3s with flannel-backend=none + Cilium,
# Cilium will fail with "address already in use" and ALL pod creation will hang.
echo "[6b/10] Cleaning up previous CNI state..."
if systemctl is-active k3s >/dev/null 2>&1; then
echo " Stopping k3s before reconfiguration..."
systemctl stop k3s
sleep 3
fi
# Remove stale flannel interface (uses same vxlan port 8472 as Cilium)
if ip link show flannel.1 >/dev/null 2>&1; then
echo " Removing stale flannel.1 vxlan interface..."
ip link delete flannel.1 2>/dev/null || true
fi
# Remove stale Cilium interfaces from any previous install
for iface in cilium_vxlan cilium_host cilium_net; do
if ip link show "\$iface" >/dev/null 2>&1; then
echo " Removing stale \$iface interface..."
ip link delete "\$iface" 2>/dev/null || true
fi
done
# Remove any other vxlan on port 8472 (Cilium's port)
for iface in \$(ip -o link show type vxlan 2>/dev/null | awk -F': ' '{print \$2}'); do
if ip -d link show "\$iface" 2>/dev/null | grep -q 'dstport 8472'; then
echo " Removing conflicting vxlan interface: \$iface"
ip link delete "\$iface" 2>/dev/null || true
fi
done
# Clean old CNI config and state
rm -rf /etc/cni/net.d/* 2>/dev/null || true
rm -rf /var/lib/cni/ 2>/dev/null || true
echo " CNI state cleaned"
# ── 7. Install k3s server ──
echo "[7/10] Installing k3s server..."
curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC="server" INSTALL_K3S_SKIP_SELINUX_RPM=true sh -
# Force restart to pick up new config (installer may skip if binary unchanged)
echo " Restarting k3s to apply configuration..."
systemctl restart k3s
# ── 8. Wait for k3s API to be available (node will be NotReady until Cilium is installed) ──
echo "[8/10] Waiting for k3s API..."
for i in $(seq 1 60); do
if k3s kubectl get nodes 2>/dev/null; then
echo " API available after \${i}s"
break
fi
sleep 2
done
# ── 9. Install Cilium CNI (node becomes Ready after Cilium provides networking) ──
echo "[9/10] Installing Cilium CNI..."
CILIUM_CLI_VERSION=\$(curl -s https://raw.githubusercontent.com/cilium/cilium-cli/main/stable.txt)
ARCH=\$(uname -m)
case "\$ARCH" in
x86_64) CLI_ARCH="amd64" ;;
aarch64) CLI_ARCH="arm64" ;;
*) CLI_ARCH="\$ARCH" ;;
esac
curl -L --fail --silent \\
"https://github.com/cilium/cilium-cli/releases/download/\${CILIUM_CLI_VERSION}/cilium-linux-\${CLI_ARCH}.tar.gz" \\
| tar xz -C /usr/local/bin
# Detect the default route device (avoid picking up tailscale/wireguard interfaces)
DEFAULT_DEV=\$(ip -4 route show default | awk '{print \$5}' | head -1)
echo " Using network device: \$DEFAULT_DEV"
KUBECONFIG=/etc/rancher/k3s/k3s.yaml cilium install \\
--set kubeProxyReplacement=true \\
--set ipam.mode=kubernetes \\
--set devices="\$DEFAULT_DEV" \\
--set nodePort.directRoutingDevice="\$DEFAULT_DEV"
echo " Waiting for Cilium to become ready..."
KUBECONFIG=/etc/rancher/k3s/k3s.yaml cilium status --wait --wait-duration 300s || echo " Cilium wait timed out (may still be pulling images)"
# Wait for node to become Ready (now that Cilium provides CNI)
echo " Waiting for node Ready..."
k3s kubectl wait --for=condition=Ready node --all --timeout=120s || echo " Node not ready yet (Cilium may still be initializing)"
# ── 10. Apply Pod Security Standards ──
echo "[10/10] Applying Pod Security Standards..."
k3s kubectl label namespace default pod-security.kubernetes.io/enforce=restricted --overwrite
k3s kubectl label namespace default pod-security.kubernetes.io/warn=restricted --overwrite
k3s kubectl label namespace default pod-security.kubernetes.io/audit=restricted --overwrite
`;
}
function generateAgentInstall(ctx: K3sInstallContext): string {
if (!ctx.k3sServerUrl || !ctx.k3sToken) {
return `echo "ERROR: k3s agent requires --k3s-server-url and --k3s-token"
exit 1`;
}
return `# ── 5-10. Install k3s agent ──
echo "[5/10] Installing k3s agent..."
mkdir -p /etc/rancher/k3s
cat > /etc/rancher/k3s/config.yaml << 'K3S_CONFIG'
protect-kernel-defaults: true
kubelet-arg:
- "protect-kernel-defaults=true"
- "streaming-connection-idle-timeout=5m"
- "make-iptables-util-chains=true"
K3S_CONFIG
echo "[6/10] Joining cluster at ${ctx.k3sServerUrl}..."
curl -sfL https://get.k3s.io | \\
INSTALL_K3S_EXEC="agent" \\
K3S_URL="${ctx.k3sServerUrl}" \\
K3S_TOKEN="${ctx.k3sToken}" \\
sh -
echo "[7/10] Waiting for agent to connect..."
sleep 10
echo "[8/10] Verifying agent service..."
systemctl is-active k3s-agent
echo "[9/10] Agent joined successfully"
echo "[10/10] Done"
`;
}

View File

@@ -0,0 +1,112 @@
// K3sModule: implements the Module interface using typed operations.
// Orchestrates install/configure/health phases via operation groups.
import type { Module, ModuleMetadata, ModuleContext, ModuleResult } from "../../../src/types.js";
import type { OperationContext, K3sConfig, OperationResult } from "./types.js";
import { sshExec } from "../../../src/ssh.js";
import { aggregateResults } from "./utils.js";
import { runHostPrep } from "./groups/host-prep.js";
import { runK3sServer } from "./groups/k3s-server.js";
import { runK3sAgent } from "./groups/k3s-agent.js";
import { runNetworking } from "./groups/networking.js";
import { runHardening } from "./groups/hardening.js";
import { runSequential } from "./utils.js";
import * as health from "./health/index.js";
function toOpContext(ctx: ModuleContext): OperationContext {
const config: K3sConfig = {
hostname: ctx.hostname,
ip: ctx.ip,
role: ctx.role as K3sConfig["role"],
k3sServerUrl: ctx.config["k3sServerUrl"] as string | undefined,
k3sToken: ctx.config["k3sToken"] as string | undefined,
tlsSans: ctx.config["tlsSans"] as string[] | undefined,
};
return {
config,
ssh: {
exec: (cmd, opts) => sshExec(ctx.ip, ctx.sshUser, cmd, {
...opts,
...(ctx.sshKeyPath ? { keyPath: ctx.sshKeyPath } : {}),
}),
user: ctx.sshUser,
ip: ctx.ip,
keyPath: ctx.sshKeyPath,
},
os: ctx.os,
arch: ctx.arch,
log: (_msg) => { /* collected via results */ },
};
}
function toModuleResult(phase: ModuleResult["phase"], results: OperationResult[], startMs: number): ModuleResult {
const agg = aggregateResults(results);
return {
success: agg.success,
phase,
duration: Math.round(performance.now() - startMs),
output: agg.details ?? [agg.message],
errors: agg.error ? [agg.error] : [],
};
}
export class K3sModule implements Module {
readonly metadata: ModuleMetadata = {
name: "k3s",
version: "1.0.0",
description: "CIS-hardened k3s with Cilium CNI",
targets: { roles: ["infra", "worker", "labcontroller"] },
dependencies: [],
};
async install(ctx: ModuleContext): Promise<ModuleResult> {
const start = performance.now();
const opCtx = toOpContext(ctx);
const isServer = ctx.role === "infra" || ctx.role === "labcontroller";
// Phase 1: Host preparation
const prepResults = await runHostPrep(opCtx);
if (prepResults.some((r) => !r.success)) {
return toModuleResult("install", prepResults, start);
}
// Phase 2: K3s install (server or agent)
const k3sResults = isServer
? await runK3sServer(opCtx)
: await runK3sAgent(opCtx);
if (k3sResults.some((r) => !r.success)) {
return toModuleResult("install", [...prepResults, ...k3sResults], start);
}
// Phase 3: Networking (server only — agents don't install Cilium)
let netResults: OperationResult[] = [];
if (isServer) {
netResults = await runNetworking(opCtx);
}
return toModuleResult("install", [...prepResults, ...k3sResults, ...netResults], start);
}
async configure(ctx: ModuleContext): Promise<ModuleResult> {
const start = performance.now();
const opCtx = toOpContext(ctx);
const results = await runHardening(opCtx);
return toModuleResult("configure", results, start);
}
async health(ctx: ModuleContext): Promise<ModuleResult> {
const start = performance.now();
const opCtx = toOpContext(ctx);
const checks = await runSequential(opCtx, [
{ name: "K3s service", fn: health.checkK3sService },
{ name: "Node ready", fn: health.checkNodeReady },
{ name: "API health", fn: health.checkApiHealth },
{ name: "Secrets encryption", fn: health.checkSecretsEncryption },
{ name: "Cilium status", fn: health.checkCiliumStatus },
{ name: "Pod status", fn: health.checkPodStatus },
]);
return toModuleResult("health", checks, start);
}
}

View File

@@ -0,0 +1,43 @@
// Write Kubernetes audit policy for k3s API server.
import type { Operation, OperationResult } from "../types.js";
import { writeRemoteFile } from "../utils.js";
const AUDIT_POLICY = `apiVersion: audit.k8s.io/v1
kind: Policy
rules:
# Log secret/configmap access at metadata level
- level: Metadata
resources:
- group: ""
resources: ["secrets", "configmaps"]
# Log pod/service mutations at request level
- level: RequestResponse
verbs: ["create", "update", "patch", "delete"]
resources:
- group: ""
resources: ["pods", "services", "deployments"]
# Skip noisy endpoints
- level: None
resources:
- group: ""
resources: ["endpoints", "events"]
users: ["system:kube-proxy", "system:apiserver"]
# Default: log everything else at metadata level
- level: Metadata
omitStages:
- "RequestReceived"`;
export const writeAuditPolicy: Operation = async (ctx): Promise<OperationResult> => {
const changed = await writeRemoteFile(
ctx,
"/etc/rancher/k3s/audit-policy.yaml",
AUDIT_POLICY,
);
return {
success: true,
changed,
message: changed ? "Audit policy written" : "Audit policy unchanged",
};
};

View File

@@ -0,0 +1,30 @@
// Check k3s TLS certificate expiry.
import type { Operation, OperationResult } from "../types.js";
import { sshOpts } from "../utils.js";
export const checkCertExpiry: Operation = async (ctx): Promise<OperationResult> => {
const details: string[] = [];
// Check if cert rotation is supported
const rotateCheck = await ctx.ssh.exec("k3s certificate rotate --help 2>/dev/null", sshOpts(ctx));
details.push(
rotateCheck.exitCode === 0
? "Certificate rotation available"
: "Certificate rotation not available in this k3s version",
);
// List certificate expiry dates
const certsResult = await ctx.ssh.exec(
'for cert in /var/lib/rancher/k3s/server/tls/*.crt; do [ -f "$cert" ] && echo "$(basename "$cert"): $(openssl x509 -in "$cert" -enddate -noout 2>/dev/null | cut -d= -f2)"; done',
sshOpts(ctx),
);
if (certsResult.stdout.trim()) {
for (const line of certsResult.stdout.trim().split("\n")) {
details.push(line.trim());
}
}
return { success: true, changed: false, message: "Certificate check complete", details };
};

View File

@@ -0,0 +1,78 @@
// Install Cilium CNI with kube-proxy replacement.
// Detects architecture and network interface automatically.
import type { Operation, OperationResult } from "../types.js";
import { sshOpts } from "../utils.js";
export const installCilium: Operation = async (ctx): Promise<OperationResult> => {
const details: string[] = [];
// Check if Cilium is already installed and running
const ciliumCheck = await ctx.ssh.exec(
"KUBECONFIG=/etc/rancher/k3s/k3s.yaml cilium status --brief 2>/dev/null",
sshOpts(ctx),
);
if (ciliumCheck.exitCode === 0 && ciliumCheck.stdout.includes("OK")) {
return { success: true, changed: false, message: "Cilium already installed" };
}
// Install cilium CLI
const cliVersion = await ctx.ssh.exec(
"curl -s https://raw.githubusercontent.com/cilium/cilium-cli/main/stable.txt",
sshOpts(ctx),
);
const version = cliVersion.stdout.trim();
const archMap: Record<string, string> = { x86_64: "amd64", aarch64: "arm64" };
const cliArch = archMap[ctx.arch] ?? ctx.arch;
const dlResult = await ctx.ssh.exec(
`curl -L --fail --silent "https://github.com/cilium/cilium-cli/releases/download/${version}/cilium-linux-${cliArch}.tar.gz" | tar xz -C /usr/local/bin`,
{ timeoutMs: 120_000 },
);
if (dlResult.exitCode !== 0) {
return { success: false, changed: false, message: "Failed to download Cilium CLI", error: dlResult.stderr };
}
details.push(`Installed cilium CLI ${version} (${cliArch})`);
// Detect default network device (avoid tailscale/wireguard)
const devResult = await ctx.ssh.exec(
"ip -4 route show default | awk '{print $5}' | head -1",
sshOpts(ctx),
);
const defaultDev = devResult.stdout.trim();
details.push(`Network device: ${defaultDev}`);
// Install Cilium
const installResult = await ctx.ssh.exec(
`KUBECONFIG=/etc/rancher/k3s/k3s.yaml cilium install \
--set kubeProxyReplacement=true \
--set ipam.mode=kubernetes \
--set devices="${defaultDev}" \
--set nodePort.directRoutingDevice="${defaultDev}"`,
{ timeoutMs: 300_000 },
);
if (installResult.exitCode !== 0) {
return { success: false, changed: true, message: "Cilium install failed", error: installResult.stderr };
}
details.push("Cilium installed");
// Wait for Cilium ready
await ctx.ssh.exec(
"KUBECONFIG=/etc/rancher/k3s/k3s.yaml cilium status --wait --wait-duration 300s 2>/dev/null || true",
{ timeoutMs: 310_000 },
);
// Wait for node Ready
await ctx.ssh.exec(
"k3s kubectl wait --for=condition=Ready node --all --timeout=120s 2>/dev/null || true",
{ timeoutMs: 130_000 },
);
return {
success: true,
changed: true,
message: "Cilium CNI installed",
details,
};
};

View File

@@ -0,0 +1,57 @@
// Clean up stale CNI state before k3s install.
// CRITICAL: flannel.1 vxlan uses port 8472 which conflicts with Cilium.
import type { Operation, OperationResult } from "../types.js";
import { sshOpts, isServiceActive } from "../utils.js";
const STALE_INTERFACES = ["flannel.1", "cilium_vxlan", "cilium_host", "cilium_net"];
export const cleanupStaleCni: Operation = async (ctx): Promise<OperationResult> => {
const details: string[] = [];
let changed = false;
// Stop k3s if running (must stop before interface cleanup)
if (await isServiceActive(ctx, "k3s")) {
await ctx.ssh.exec("systemctl stop k3s", sshOpts(ctx));
details.push("Stopped k3s service");
changed = true;
await new Promise((r) => setTimeout(r, 3000));
}
// Remove known stale interfaces
for (const iface of STALE_INTERFACES) {
const check = await ctx.ssh.exec(`ip link show ${iface} 2>/dev/null`, sshOpts(ctx));
if (check.exitCode === 0) {
await ctx.ssh.exec(`ip link delete ${iface} 2>/dev/null || true`, sshOpts(ctx));
details.push(`Removed interface: ${iface}`);
changed = true;
}
}
// Remove any vxlan on port 8472 (Cilium's port)
const vxlans = await ctx.ssh.exec(
"ip -o link show type vxlan 2>/dev/null | awk -F': ' '{print $2}'",
sshOpts(ctx),
);
for (const iface of vxlans.stdout.trim().split("\n").filter(Boolean)) {
const portCheck = await ctx.ssh.exec(
`ip -d link show "${iface}" 2>/dev/null | grep -q 'dstport 8472'`,
sshOpts(ctx),
);
if (portCheck.exitCode === 0) {
await ctx.ssh.exec(`ip link delete "${iface}" 2>/dev/null || true`, sshOpts(ctx));
details.push(`Removed conflicting vxlan: ${iface}`);
changed = true;
}
}
// Clean CNI config and state directories
await ctx.ssh.exec("rm -rf /etc/cni/net.d/* /var/lib/cni/ 2>/dev/null || true", sshOpts(ctx));
return {
success: true,
changed,
message: changed ? "CNI state cleaned" : "No CNI cleanup needed",
details,
};
};

View File

@@ -0,0 +1,50 @@
// Fix CoreDNS upstream DNS resolution.
// systemd-resolved listens on 127.0.0.53 which is unreachable from pod netns.
// Solution: write /etc/rancher/k3s/resolv.conf with the real upstream DNS.
import type { Operation, OperationResult } from "../types.js";
import { sshOpts, writeRemoteFile, isServiceActive } from "../utils.js";
export const fixCoreDnsUpstream: Operation = async (ctx): Promise<OperationResult> => {
// Detect upstream DNS from systemd-resolved
const dnsResult = await ctx.ssh.exec(
"resolvectl status 2>/dev/null | grep -A2 \"Link.*$(ip -4 route show default | awk '{print $5}' | head -1)\" | grep 'Current DNS' | awk '{print $NF}'",
sshOpts(ctx),
);
let upstream = dnsResult.stdout.trim();
// Fallback: read systemd-resolved's real config
if (!upstream || upstream === "127.0.0.53") {
const fallback = await ctx.ssh.exec(
"cat /run/systemd/resolve/resolv.conf 2>/dev/null | grep '^nameserver' | head -1 | awk '{print $2}'",
sshOpts(ctx),
);
upstream = fallback.stdout.trim();
}
if (!upstream || upstream === "127.0.0.53") {
return { success: true, changed: false, message: "Could not detect upstream DNS, skipping" };
}
const changed = await writeRemoteFile(
ctx,
"/etc/rancher/k3s/resolv.conf",
`nameserver ${upstream}`,
);
if (changed && await isServiceActive(ctx, "k3s")) {
await ctx.ssh.exec("systemctl restart k3s", sshOpts(ctx));
// Wait for API
for (let i = 0; i < 30; i++) {
const check = await ctx.ssh.exec("k3s kubectl get nodes 2>/dev/null", sshOpts(ctx));
if (check.exitCode === 0) break;
await new Promise((r) => setTimeout(r, 2000));
}
}
return {
success: true,
changed,
message: changed ? `DNS upstream set to ${upstream}` : "DNS already configured",
};
};

View File

@@ -0,0 +1,38 @@
// Disable and mask firewall services.
// CRITICAL: firewalld's nftables rules block pod-to-gateway traffic.
// Both disable and mask to survive reboots.
import type { Operation, OperationResult } from "../types.js";
import { sshOpts, isServiceActive } from "../utils.js";
const FIREWALL_SERVICES = ["firewalld", "ufw"];
export const disableFirewall: Operation = async (ctx): Promise<OperationResult> => {
const details: string[] = [];
let changed = false;
for (const svc of FIREWALL_SERVICES) {
const active = await isServiceActive(ctx, svc);
if (active) {
await ctx.ssh.exec(`systemctl disable --now ${svc} 2>/dev/null || true`, sshOpts(ctx));
await ctx.ssh.exec(`systemctl mask ${svc} 2>/dev/null || true`, sshOpts(ctx));
details.push(`Disabled and masked: ${svc}`);
changed = true;
} else {
// Still mask even if not active (might be enabled but stopped)
const masked = await ctx.ssh.exec(`systemctl is-enabled ${svc} 2>/dev/null`, sshOpts(ctx));
if (masked.stdout.trim() !== "masked" && masked.exitCode === 0) {
await ctx.ssh.exec(`systemctl mask ${svc} 2>/dev/null || true`, sshOpts(ctx));
details.push(`Masked: ${svc}`);
changed = true;
}
}
}
return {
success: true,
changed,
message: changed ? "Firewall disabled" : "Firewall already disabled",
details,
};
};

View File

@@ -0,0 +1,15 @@
export { loadKernelModules } from "./kernel-modules.js";
export { applyCisHardening } from "./sysctl.js";
export { disableSwap } from "./swap.js";
export { disableFirewall } from "./firewall.js";
export { setSelinuxPermissive } from "./selinux.js";
export { writeK3sConfig } from "./k3s-config.js";
export { writeAuditPolicy } from "./audit-policy.js";
export { cleanupStaleCni } from "./cni-cleanup.js";
export { installK3sBinary } from "./k3s-install.js";
export { installCilium } from "./cilium.js";
export { fixCoreDnsUpstream } from "./dns-fix.js";
export { configureLogRotation } from "./log-rotation.js";
export { applyDefaultNetworkPolicies } from "./network-policy.js";
export { applyPodSecurityStandards } from "./pod-security.js";
export { checkCertExpiry } from "./cert-check.js";

View File

@@ -0,0 +1,66 @@
// Write k3s server or agent configuration YAML.
import type { Operation, OperationResult, K3sConfig } from "../types.js";
import { sshOpts, writeRemoteFile } from "../utils.js";
function isServerRole(role: string): boolean {
return role === "infra" || role === "labcontroller";
}
function generateServerConfig(config: K3sConfig): string {
const tlsSans = [config.hostname, config.ip, ...(config.tlsSans ?? [])];
return `# k3s server configuration — CIS hardened
protect-kernel-defaults: true
secrets-encryption: true
write-kubeconfig-mode: "0640"
flannel-backend: none
disable-network-policy: true
disable:
- servicelb
- traefik
kube-apiserver-arg:
- "anonymous-auth=false"
- "audit-log-path=/var/log/kubernetes/audit.log"
- "audit-log-maxage=30"
- "audit-log-maxbackup=10"
- "audit-log-maxsize=100"
- "audit-policy-file=/etc/rancher/k3s/audit-policy.yaml"
- "enable-admission-plugins=NodeRestriction,PodSecurity"
- "request-timeout=300s"
kubelet-arg:
- "protect-kernel-defaults=true"
- "streaming-connection-idle-timeout=5m"
- "make-iptables-util-chains=true"
tls-san:
${tlsSans.map((s) => ` - "${s}"`).join("\n")}
`;
}
function generateAgentConfig(): string {
return `protect-kernel-defaults: true
kubelet-arg:
- "protect-kernel-defaults=true"
- "streaming-connection-idle-timeout=5m"
- "make-iptables-util-chains=true"
`;
}
export const writeK3sConfig: Operation = async (ctx): Promise<OperationResult> => {
await ctx.ssh.exec("mkdir -p /etc/rancher/k3s /var/log/kubernetes", sshOpts(ctx));
const content = isServerRole(ctx.config.role)
? generateServerConfig(ctx.config)
: generateAgentConfig();
const changed = await writeRemoteFile(ctx, "/etc/rancher/k3s/config.yaml", content);
return {
success: true,
changed,
message: changed ? "K3s config written" : "K3s config unchanged",
};
};

View File

@@ -0,0 +1,71 @@
// Install k3s binary (server or agent mode).
import type { Operation, OperationResult } from "../types.js";
import { sshOpts } from "../utils.js";
function isServerRole(role: string): boolean {
return role === "infra" || role === "labcontroller";
}
export const installK3sBinary: Operation = async (ctx): Promise<OperationResult> => {
const isServer = isServerRole(ctx.config.role);
// Check if already installed
const version = await ctx.ssh.exec("k3s --version 2>/dev/null", sshOpts(ctx));
const alreadyInstalled = version.exitCode === 0;
if (isServer) {
const result = await ctx.ssh.exec(
'curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC="server" INSTALL_K3S_SKIP_SELINUX_RPM=true sh -',
{ timeoutMs: 300_000 },
);
if (result.exitCode !== 0) {
return {
success: false,
changed: false,
message: "K3s server install failed",
error: result.stderr.trim(),
};
}
} else {
if (!ctx.config.k3sServerUrl || !ctx.config.k3sToken) {
return {
success: false,
changed: false,
message: "Agent requires k3sServerUrl and k3sToken",
error: "Missing agent join configuration",
};
}
const result = await ctx.ssh.exec(
`curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC="agent" K3S_URL="${ctx.config.k3sServerUrl}" K3S_TOKEN="${ctx.config.k3sToken}" sh -`,
{ timeoutMs: 300_000 },
);
if (result.exitCode !== 0) {
return {
success: false,
changed: false,
message: "K3s agent install failed",
error: result.stderr.trim(),
};
}
}
// Restart to ensure config is applied
const service = isServer ? "k3s" : "k3s-agent";
await ctx.ssh.exec(`systemctl restart ${service}`, sshOpts(ctx));
// Wait for API (server only)
if (isServer) {
for (let i = 0; i < 60; i++) {
const check = await ctx.ssh.exec("k3s kubectl get nodes 2>/dev/null", sshOpts(ctx));
if (check.exitCode === 0) break;
await new Promise((r) => setTimeout(r, 2000));
}
}
return {
success: true,
changed: !alreadyInstalled,
message: alreadyInstalled ? "K3s restarted with updated config" : "K3s installed",
};
};

View File

@@ -0,0 +1,39 @@
// Load required kernel modules for k3s container networking.
import type { Operation, OperationResult } from "../types.js";
import { sshOpts, writeRemoteFile } from "../utils.js";
const REQUIRED_MODULES = ["br_netfilter", "overlay", "ip_conntrack"];
export const loadKernelModules: Operation = async (ctx): Promise<OperationResult> => {
const details: string[] = [];
let changed = false;
for (const mod of REQUIRED_MODULES) {
const check = await ctx.ssh.exec(`lsmod | grep -q "^${mod}"`, sshOpts(ctx));
if (check.exitCode !== 0) {
await ctx.ssh.exec(`modprobe ${mod} 2>/dev/null || true`, sshOpts(ctx));
details.push(`Loaded: ${mod}`);
changed = true;
} else {
details.push(`Already loaded: ${mod}`);
}
}
const fileChanged = await writeRemoteFile(
ctx,
"/etc/modules-load.d/k3s.conf",
REQUIRED_MODULES.join("\n"),
);
if (fileChanged) {
details.push("Wrote /etc/modules-load.d/k3s.conf");
changed = true;
}
return {
success: true,
changed,
message: changed ? "Kernel modules configured" : "Kernel modules already configured",
details,
};
};

View File

@@ -0,0 +1,25 @@
// Configure log rotation for k3s.
import type { Operation, OperationResult } from "../types.js";
import { writeRemoteFile } from "../utils.js";
const LOGROTATE_CONFIG = `/var/log/kubernetes/*.log {
daily
rotate 14
compress
delaycompress
missingok
notifempty
copytruncate
maxsize 100M
}`;
export const configureLogRotation: Operation = async (ctx): Promise<OperationResult> => {
const changed = await writeRemoteFile(ctx, "/etc/logrotate.d/k3s", LOGROTATE_CONFIG);
return {
success: true,
changed,
message: changed ? "Log rotation configured" : "Log rotation already configured",
};
};

View File

@@ -0,0 +1,50 @@
// Apply default network policies: deny all ingress, allow DNS egress.
import type { Operation, OperationResult } from "../types.js";
import { sshOpts } from "../utils.js";
const DENY_INGRESS = `apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: default-deny-ingress
namespace: default
spec:
podSelector: {}
policyTypes:
- Ingress`;
const ALLOW_DNS = `apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: allow-dns
namespace: default
spec:
podSelector: {}
policyTypes:
- Egress
egress:
- to: []
ports:
- port: 53
protocol: UDP
- port: 53
protocol: TCP`;
export const applyDefaultNetworkPolicies: Operation = async (ctx): Promise<OperationResult> => {
const details: string[] = [];
for (const [name, yaml] of [["default-deny-ingress", DENY_INGRESS], ["allow-dns", ALLOW_DNS]] as const) {
const escaped = yaml.replace(/'/g, "'\\''");
const result = await ctx.ssh.exec(
`echo '${escaped}' | k3s kubectl apply -f -`,
sshOpts(ctx),
);
if (result.exitCode === 0) {
details.push(`Applied: ${name}`);
} else {
return { success: false, changed: true, message: `Failed to apply ${name}`, error: result.stderr };
}
}
return { success: true, changed: true, message: "Network policies applied", details };
};

View File

@@ -0,0 +1,21 @@
// Apply Pod Security Standards (restricted) to default namespace.
import type { Operation, OperationResult } from "../types.js";
import { sshOpts } from "../utils.js";
const PSS_LABELS = [
"pod-security.kubernetes.io/enforce=restricted",
"pod-security.kubernetes.io/warn=restricted",
"pod-security.kubernetes.io/audit=restricted",
];
export const applyPodSecurityStandards: Operation = async (ctx): Promise<OperationResult> => {
for (const label of PSS_LABELS) {
await ctx.ssh.exec(
`k3s kubectl label namespace default ${label} --overwrite`,
sshOpts(ctx),
);
}
return { success: true, changed: true, message: "Pod Security Standards applied" };
};

View File

@@ -0,0 +1,22 @@
// Set SELinux to permissive mode.
// Fedora: k3s-selinux RPM has GPG issues with dnf5, so we use permissive.
import type { Operation, OperationResult } from "../types.js";
import { sshOpts } from "../utils.js";
export const setSelinuxPermissive: Operation = async (ctx): Promise<OperationResult> => {
const check = await ctx.ssh.exec("getenforce 2>/dev/null || echo Disabled", sshOpts(ctx));
const current = check.stdout.trim();
if (current === "Permissive" || current === "Disabled") {
return { success: true, changed: false, message: `SELinux already ${current.toLowerCase()}` };
}
await ctx.ssh.exec("setenforce 0 2>/dev/null || true", sshOpts(ctx));
await ctx.ssh.exec(
"sed -i 's/^SELINUX=enforcing/SELINUX=permissive/' /etc/selinux/config 2>/dev/null || true",
sshOpts(ctx),
);
return { success: true, changed: true, message: "SELinux set to permissive" };
};

View File

@@ -0,0 +1,22 @@
// Disable swap (CIS requirement for k3s).
import type { Operation, OperationResult } from "../types.js";
import { sshOpts } from "../utils.js";
export const disableSwap: Operation = async (ctx): Promise<OperationResult> => {
const check = await ctx.ssh.exec("swapon --show --noheadings", sshOpts(ctx));
const active = check.stdout.trim().length > 0;
if (active) {
await ctx.ssh.exec("swapoff -a", sshOpts(ctx));
}
// Remove swap entries from fstab permanently
await ctx.ssh.exec("sed -i '/\\sswap\\s/d' /etc/fstab", sshOpts(ctx));
return {
success: true,
changed: active,
message: active ? "Swap disabled" : "Swap already disabled",
};
};

View File

@@ -0,0 +1,30 @@
// Apply CIS-compliant sysctl kernel parameters for k3s.
import type { Operation, OperationResult } from "../types.js";
import { sshOpts, writeRemoteFile } from "../utils.js";
const CIS_SYSCTL = `# k3s CIS hardening
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward = 1
vm.panic_on_oom = 0
vm.overcommit_memory = 1
kernel.panic = 10
kernel.panic_on_oops = 1
# inotify limits for large clusters
fs.inotify.max_user_instances = 524288
fs.inotify.max_user_watches = 524288`;
export const applyCisHardening: Operation = async (ctx): Promise<OperationResult> => {
const changed = await writeRemoteFile(ctx, "/etc/sysctl.d/90-k3s-cis.conf", CIS_SYSCTL);
if (changed) {
await ctx.ssh.exec("sysctl --system > /dev/null", sshOpts(ctx));
}
return {
success: true,
changed,
message: changed ? "Sysctl hardening applied" : "Sysctl already configured",
};
};

View File

@@ -0,0 +1,61 @@
// Core types for the operation-based k3s module.
// Every operation follows: OperationContext → OperationResult.
import type { SshExecResult } from "../../../src/ssh.js";
import type { OsId, Arch, Role } from "@lab/shared";
/** Typed k3s cluster configuration. */
export interface K3sConfig {
hostname: string;
ip: string;
role: Role; // "infra"/"labcontroller" = server, "worker" = agent
// Agent-only (required when role is "worker")
k3sServerUrl?: string | undefined;
k3sToken?: string | undefined;
// Additional TLS SANs for API server certificate
tlsSans?: string[] | undefined;
}
/** SSH execution interface injected into operations. */
export interface SshClient {
exec: (command: string, opts?: { timeoutMs?: number }) => Promise<SshExecResult>;
user: string;
ip: string;
keyPath?: string | undefined;
}
/** Context passed to every operation. */
export interface OperationContext {
config: K3sConfig;
ssh: SshClient;
os: OsId;
arch: Arch;
log: (msg: string) => void;
}
/** Result returned by every operation. */
export interface OperationResult {
success: boolean;
changed: boolean; // idempotency: did this operation modify the target?
message: string;
details?: string[] | undefined;
error?: string | undefined;
}
/** An atomic operation function. */
export type Operation = (ctx: OperationContext) => Promise<OperationResult>;
/** A named group of operations executed sequentially. */
export interface NamedOperation {
name: string;
fn: Operation;
}
/** A logical grouping of related operations. */
export interface OperationGroup {
name: string;
description: string;
operations: NamedOperation[];
}

View File

@@ -0,0 +1,102 @@
// Utility helpers for k3s operations.
// Common patterns: check-before-act, file writing, sequential execution.
import type { OperationContext, OperationResult, NamedOperation } from "./types.js";
/** Default SSH options with 30s timeout. */
export function sshOpts(_ctx: OperationContext): { timeoutMs: number } {
return { timeoutMs: 30_000 };
}
/** Check if a remote command's stdout matches expected value. */
export async function checkCommand(
ctx: OperationContext,
command: string,
expected: string | RegExp,
): Promise<boolean> {
const result = await ctx.ssh.exec(command, sshOpts(ctx));
if (typeof expected === "string") {
return result.stdout.trim() === expected;
}
return expected.test(result.stdout);
}
/** Write a file via SSH only if content differs. Returns whether it changed. */
export async function writeRemoteFile(
ctx: OperationContext,
path: string,
content: string,
mode?: string,
): Promise<boolean> {
// Check existing content
const existing = await ctx.ssh.exec(
`cat ${path} 2>/dev/null || echo '__LABCTL_NOT_FOUND__'`,
sshOpts(ctx),
);
if (existing.stdout.trim() === content.trim()) {
return false; // no change
}
// Write via heredoc
const escaped = content.replace(/\\/g, "\\\\");
await ctx.ssh.exec(
`mkdir -p "$(dirname "${path}")" && cat > "${path}" << 'LABCTL_EOF'\n${escaped}\nLABCTL_EOF`,
sshOpts(ctx),
);
if (mode) {
await ctx.ssh.exec(`chmod ${mode} "${path}"`, sshOpts(ctx));
}
return true; // changed
}
/** Check if a systemd service is active. */
export async function isServiceActive(
ctx: OperationContext,
service: string,
): Promise<boolean> {
const result = await ctx.ssh.exec(
`systemctl is-active ${service} 2>/dev/null`,
sshOpts(ctx),
);
return result.exitCode === 0 && result.stdout.trim() === "active";
}
/** Run named operations sequentially, stopping on first failure. */
export async function runSequential(
ctx: OperationContext,
operations: NamedOperation[],
): Promise<OperationResult[]> {
const results: OperationResult[] = [];
for (const op of operations) {
ctx.log(` ${op.name}...`);
const result = await op.fn(ctx);
results.push(result);
if (result.success) {
ctx.log(` ${op.name}: ${result.changed ? "changed" : "ok"}`);
} else {
ctx.log(` ${op.name}: FAILED — ${result.error ?? result.message}`);
break;
}
}
return results;
}
/** Aggregate multiple OperationResults into one summary. */
export function aggregateResults(results: OperationResult[]): OperationResult {
const allSuccess = results.every((r) => r.success);
const anyChanged = results.some((r) => r.changed);
const details = results.flatMap((r) => r.details ?? [r.message]);
const errors = results.filter((r) => !r.success).map((r) => r.error ?? r.message);
return {
success: allSuccess,
changed: anyChanged,
message: allSuccess
? anyChanged ? "Applied changes" : "Already configured"
: `Failed: ${errors[0]}`,
details,
...(errors.length > 0 ? { error: errors.join("; ") } : {}),
};
}

View File

@@ -0,0 +1,63 @@
// Test helpers: mock SSH client and operation context factory.
import { vi } from "vitest";
import type { OperationContext, K3sConfig, SshClient } from "../src/types.js";
import type { SshExecResult } from "../../../src/ssh.js";
/** Default mock SSH result (success, empty output). */
export const OK: SshExecResult = { exitCode: 0, stdout: "", stderr: "" };
export const FAIL: SshExecResult = { exitCode: 1, stdout: "", stderr: "" };
/** Create a mock SSH result with stdout. */
export function stdout(out: string): SshExecResult {
return { exitCode: 0, stdout: out, stderr: "" };
}
/** Create a mock SSH client with a vi.fn() exec. */
export function mockSsh(): SshClient & { exec: ReturnType<typeof vi.fn> } {
return {
exec: vi.fn<[string, { timeoutMs?: number }?], Promise<SshExecResult>>().mockResolvedValue(OK),
user: "root",
ip: "10.0.0.1",
keyPath: "/root/.ssh/id_ed25519",
};
}
/** Create a full OperationContext with mock SSH. */
export function mockCtx(configOverrides?: Partial<K3sConfig>): OperationContext & { ssh: ReturnType<typeof mockSsh> } {
const config: K3sConfig = {
hostname: "test.local",
ip: "10.0.0.1",
role: "infra",
...configOverrides,
};
return {
config,
ssh: mockSsh(),
os: "fedora-43",
arch: "x86_64",
log: vi.fn(),
};
}
/** Assert that ssh.exec was called with a command matching the pattern. */
export function expectCommand(ssh: ReturnType<typeof mockSsh>, pattern: string | RegExp): void {
const calls = ssh.exec.mock.calls.map((c: [string, unknown?]) => c[0]);
const match = typeof pattern === "string"
? calls.some((c: string) => c.includes(pattern))
: calls.some((c: string) => pattern.test(c));
if (!match) {
throw new Error(`Expected SSH command matching ${pattern}, got:\n${calls.map((c: string) => ` - ${c}`).join("\n")}`);
}
}
/** Assert that ssh.exec was NOT called with a command matching the pattern. */
export function expectNoCommand(ssh: ReturnType<typeof mockSsh>, pattern: string | RegExp): void {
const calls = ssh.exec.mock.calls.map((c: [string, unknown?]) => c[0]);
const match = typeof pattern === "string"
? calls.some((c: string) => c.includes(pattern))
: calls.some((c: string) => pattern.test(c));
if (match) {
throw new Error(`Expected NO SSH command matching ${pattern}, but found one`);
}
}

View File

@@ -0,0 +1,134 @@
// Tests for k3s install script generation.
import { describe, it, expect } from "vitest";
import { generateInstallScript } from "../src/install.js";
describe("k3s install script — server role", () => {
const script = generateInstallScript({
hostname: "labmaster.ad.itaz.eu",
ip: "10.0.0.210",
role: "infra",
});
it("includes CIS sysctl settings", () => {
expect(script).toContain("vm.panic_on_oom");
expect(script).toContain("vm.overcommit_memory");
expect(script).toContain("kernel.panic");
expect(script).toContain("kernel.panic_on_oops");
});
it("loads required kernel modules", () => {
expect(script).toContain("modprobe br_netfilter");
expect(script).toContain("modprobe overlay");
});
it("disables swap", () => {
expect(script).toContain("swapoff -a");
});
it("writes k3s server config with security flags", () => {
expect(script).toContain("protect-kernel-defaults: true");
expect(script).toContain("secrets-encryption: true");
expect(script).toContain("anonymous-auth=false");
expect(script).toContain("write-kubeconfig-mode");
});
it("disables flannel for Cilium", () => {
expect(script).toContain("flannel-backend: none");
expect(script).toContain("disable-network-policy: true");
});
it("disables default servicelb and traefik", () => {
expect(script).toContain("servicelb");
expect(script).toContain("traefik");
});
it("writes audit policy", () => {
expect(script).toContain("audit-policy.yaml");
expect(script).toContain("apiVersion: audit.k8s.io/v1");
expect(script).toContain("kind: Policy");
});
it("includes TLS SANs for hostname and IP", () => {
expect(script).toContain("labmaster.ad.itaz.eu");
expect(script).toContain("10.0.0.210");
});
it("installs k3s as server", () => {
expect(script).toContain('INSTALL_K3S_EXEC="server"');
});
it("installs Cilium", () => {
expect(script).toContain("cilium install");
expect(script).toContain("kubeProxyReplacement=true");
});
it("applies Pod Security Standards", () => {
expect(script).toContain("pod-security.kubernetes.io/enforce=restricted");
});
it("includes PodSecurity admission plugin", () => {
expect(script).toContain("enable-admission-plugins=NodeRestriction,PodSecurity");
});
it("configures audit logging", () => {
expect(script).toContain("audit-log-path=/var/log/kubernetes/audit.log");
expect(script).toContain("audit-log-maxage=30");
});
it("cleans stale flannel vxlan before Cilium install", () => {
expect(script).toContain("flannel.1");
expect(script).toContain("ip link delete flannel.1");
});
it("cleans stale Cilium interfaces before install", () => {
expect(script).toContain("ip link delete");
expect(script).toContain("cilium_vxlan");
expect(script).toContain("cilium_host");
});
it("cleans old CNI config directory", () => {
expect(script).toContain("/etc/cni/net.d");
expect(script).toContain("/var/lib/cni");
});
it("stops k3s before reconfiguration", () => {
expect(script).toContain("systemctl stop k3s");
// Stop must come before interface cleanup
const stopIdx = script.indexOf("systemctl stop k3s");
const cleanIdx = script.indexOf("ip link delete flannel.1");
expect(stopIdx).toBeLessThan(cleanIdx);
});
it("force restarts k3s after install to apply config", () => {
expect(script).toContain("systemctl restart k3s");
});
});
describe("k3s install script — agent role", () => {
it("installs as agent with server URL and token", () => {
const script = generateInstallScript({
hostname: "worker-1",
ip: "10.0.0.50",
role: "worker",
k3sServerUrl: "https://10.0.0.210:6443",
k3sToken: "K10abc123::server:xyz",
});
expect(script).toContain('INSTALL_K3S_EXEC="agent"');
expect(script).toContain("K3S_URL=");
expect(script).toContain("K3S_TOKEN=");
expect(script).not.toContain("cilium install");
});
it("errors without server URL", () => {
const script = generateInstallScript({
hostname: "worker-1",
ip: "10.0.0.50",
role: "worker",
});
expect(script).toContain("ERROR");
expect(script).toContain("exit 1");
});
});

View File

@@ -0,0 +1,350 @@
// Unit tests for k3s operations.
// Each operation is tested for: correctness, idempotency, and error handling.
import { describe, it, expect, beforeEach } from "vitest";
import { mockCtx, OK, FAIL, stdout, expectCommand, expectNoCommand } from "./helpers.js";
// --- Kernel Modules ---
import { loadKernelModules } from "../src/operations/kernel-modules.js";
describe("loadKernelModules", () => {
it("loads missing modules and writes config", async () => {
const ctx = mockCtx();
// lsmod checks: br_netfilter missing, overlay loaded, ip_conntrack missing
ctx.ssh.exec
.mockResolvedValueOnce(FAIL) // br_netfilter not loaded
.mockResolvedValueOnce(OK) // modprobe br_netfilter
.mockResolvedValueOnce(OK) // overlay loaded
.mockResolvedValueOnce(FAIL) // ip_conntrack not loaded
.mockResolvedValueOnce(OK) // modprobe ip_conntrack
.mockResolvedValueOnce(stdout("__LABCTL_NOT_FOUND__")) // cat config file
.mockResolvedValueOnce(OK); // write config
const result = await loadKernelModules(ctx);
expect(result.success).toBe(true);
expect(result.changed).toBe(true);
expect(result.details).toContain("Loaded: br_netfilter");
expect(result.details).toContain("Already loaded: overlay");
});
it("is idempotent when all modules loaded and config exists", async () => {
const ctx = mockCtx();
ctx.ssh.exec
.mockResolvedValueOnce(OK) // br_netfilter loaded
.mockResolvedValueOnce(OK) // overlay loaded
.mockResolvedValueOnce(OK) // ip_conntrack loaded
.mockResolvedValueOnce(stdout("br_netfilter\noverlay\nip_conntrack")); // config exists with correct content
const result = await loadKernelModules(ctx);
expect(result.success).toBe(true);
expect(result.changed).toBe(false);
});
});
// --- Sysctl ---
import { applyCisHardening } from "../src/operations/sysctl.js";
describe("applyCisHardening", () => {
it("writes config and applies sysctl", async () => {
const ctx = mockCtx();
ctx.ssh.exec
.mockResolvedValueOnce(stdout("__LABCTL_NOT_FOUND__")) // file not found
.mockResolvedValueOnce(OK) // write file
.mockResolvedValueOnce(OK); // sysctl --system
const result = await applyCisHardening(ctx);
expect(result.success).toBe(true);
expect(result.changed).toBe(true);
expectCommand(ctx.ssh, "sysctl --system");
});
it("skips sysctl when config unchanged", async () => {
const ctx = mockCtx();
ctx.ssh.exec.mockResolvedValueOnce(stdout("# k3s CIS hardening\nnet.bridge.bridge-nf-call-iptables = 1\nnet.bridge.bridge-nf-call-ip6tables = 1\nnet.ipv4.ip_forward = 1\nvm.panic_on_oom = 0\nvm.overcommit_memory = 1\nkernel.panic = 10\nkernel.panic_on_oops = 1\n# inotify limits for large clusters\nfs.inotify.max_user_instances = 524288\nfs.inotify.max_user_watches = 524288"));
const result = await applyCisHardening(ctx);
expect(result.changed).toBe(false);
expectNoCommand(ctx.ssh, "sysctl --system");
});
});
// --- Swap ---
import { disableSwap } from "../src/operations/swap.js";
describe("disableSwap", () => {
it("disables active swap", async () => {
const ctx = mockCtx();
ctx.ssh.exec
.mockResolvedValueOnce(stdout("/dev/sda2 partition 2G")) // swap active
.mockResolvedValueOnce(OK) // swapoff
.mockResolvedValueOnce(OK); // sed fstab
const result = await disableSwap(ctx);
expect(result.success).toBe(true);
expect(result.changed).toBe(true);
expectCommand(ctx.ssh, "swapoff -a");
});
it("is idempotent when swap already off", async () => {
const ctx = mockCtx();
ctx.ssh.exec
.mockResolvedValueOnce(stdout("")) // no swap
.mockResolvedValueOnce(OK); // sed fstab (always runs)
const result = await disableSwap(ctx);
expect(result.changed).toBe(false);
expectNoCommand(ctx.ssh, "swapoff");
});
});
// --- Firewall ---
import { disableFirewall } from "../src/operations/firewall.js";
describe("disableFirewall", () => {
it("disables active firewalld", async () => {
const ctx = mockCtx();
ctx.ssh.exec
.mockResolvedValueOnce(stdout("active")) // firewalld active
.mockResolvedValueOnce(OK) // disable
.mockResolvedValueOnce(OK) // mask
.mockResolvedValueOnce(FAIL); // ufw not active
const result = await disableFirewall(ctx);
expect(result.success).toBe(true);
expect(result.changed).toBe(true);
expect(result.details).toContain("Disabled and masked: firewalld");
});
it("is idempotent when nothing active", async () => {
const ctx = mockCtx();
ctx.ssh.exec
.mockResolvedValueOnce(FAIL) // firewalld not active
.mockResolvedValueOnce(stdout("masked")) // already masked
.mockResolvedValueOnce(FAIL) // ufw not active
.mockResolvedValueOnce(FAIL); // ufw not enabled
const result = await disableFirewall(ctx);
expect(result.changed).toBe(false);
});
});
// --- SELinux ---
import { setSelinuxPermissive } from "../src/operations/selinux.js";
describe("setSelinuxPermissive", () => {
it("sets enforcing to permissive", async () => {
const ctx = mockCtx();
ctx.ssh.exec
.mockResolvedValueOnce(stdout("Enforcing")) // current mode
.mockResolvedValueOnce(OK) // setenforce 0
.mockResolvedValueOnce(OK); // sed config
const result = await setSelinuxPermissive(ctx);
expect(result.changed).toBe(true);
});
it("skips when already permissive", async () => {
const ctx = mockCtx();
ctx.ssh.exec.mockResolvedValueOnce(stdout("Permissive"));
const result = await setSelinuxPermissive(ctx);
expect(result.changed).toBe(false);
});
it("skips when disabled", async () => {
const ctx = mockCtx();
ctx.ssh.exec.mockResolvedValueOnce(stdout("Disabled"));
const result = await setSelinuxPermissive(ctx);
expect(result.changed).toBe(false);
});
});
// --- K3s Config ---
import { writeK3sConfig } from "../src/operations/k3s-config.js";
describe("writeK3sConfig", () => {
it("writes server config with TLS SANs", async () => {
const ctx = mockCtx({ hostname: "node1.lab", ip: "10.0.1.1", role: "infra" });
ctx.ssh.exec
.mockResolvedValueOnce(OK) // mkdir
.mockResolvedValueOnce(stdout("__LABCTL_NOT_FOUND__")) // cat existing
.mockResolvedValueOnce(OK); // write
const result = await writeK3sConfig(ctx);
expect(result.changed).toBe(true);
// Verify the written content includes TLS SANs
const writeCall = ctx.ssh.exec.mock.calls[2]![0] as string;
expect(writeCall).toContain("node1.lab");
expect(writeCall).toContain("10.0.1.1");
expect(writeCall).toContain("secrets-encryption: true");
expect(writeCall).toContain("flannel-backend: none");
});
it("writes minimal agent config", async () => {
const ctx = mockCtx({ role: "worker" });
ctx.ssh.exec
.mockResolvedValueOnce(OK) // mkdir
.mockResolvedValueOnce(stdout("__LABCTL_NOT_FOUND__"))
.mockResolvedValueOnce(OK);
const result = await writeK3sConfig(ctx);
expect(result.changed).toBe(true);
const writeCall = ctx.ssh.exec.mock.calls[2]![0] as string;
expect(writeCall).toContain("protect-kernel-defaults: true");
expect(writeCall).not.toContain("secrets-encryption");
});
});
// --- CNI Cleanup ---
import { cleanupStaleCni } from "../src/operations/cni-cleanup.js";
describe("cleanupStaleCni", () => {
it("stops k3s and removes stale interfaces", async () => {
const ctx = mockCtx();
ctx.ssh.exec
.mockResolvedValueOnce(stdout("active")) // k3s is active
.mockResolvedValueOnce(OK) // systemctl stop k3s
.mockResolvedValueOnce(OK) // flannel.1 exists
.mockResolvedValueOnce(OK) // delete flannel.1
.mockResolvedValueOnce(FAIL) // cilium_vxlan not found
.mockResolvedValueOnce(FAIL) // cilium_host not found
.mockResolvedValueOnce(FAIL) // cilium_net not found
.mockResolvedValueOnce(stdout("")) // no vxlans
.mockResolvedValueOnce(OK); // rm -rf cni
const result = await cleanupStaleCni(ctx);
expect(result.success).toBe(true);
expect(result.changed).toBe(true);
expect(result.details).toContain("Stopped k3s service");
expect(result.details).toContain("Removed interface: flannel.1");
});
it("is idempotent when nothing to clean", async () => {
const ctx = mockCtx();
ctx.ssh.exec
.mockResolvedValueOnce(FAIL) // k3s not active
.mockResolvedValueOnce(FAIL) // flannel.1 not found
.mockResolvedValueOnce(FAIL) // cilium_vxlan
.mockResolvedValueOnce(FAIL) // cilium_host
.mockResolvedValueOnce(FAIL) // cilium_net
.mockResolvedValueOnce(stdout("")) // no vxlans
.mockResolvedValueOnce(OK); // rm -rf (always runs)
const result = await cleanupStaleCni(ctx);
expect(result.changed).toBe(false);
});
});
// --- K3s Install ---
import { installK3sBinary } from "../src/operations/k3s-install.js";
describe("installK3sBinary", () => {
it("installs k3s server", async () => {
const ctx = mockCtx({ role: "infra" });
ctx.ssh.exec
.mockResolvedValueOnce(FAIL) // k3s not installed
.mockResolvedValueOnce(OK) // curl install
.mockResolvedValueOnce(OK) // restart
.mockResolvedValueOnce(OK); // kubectl get nodes
const result = await installK3sBinary(ctx);
expect(result.success).toBe(true);
expect(result.changed).toBe(true);
});
it("fails agent without server URL", async () => {
const ctx = mockCtx({ role: "worker" });
ctx.ssh.exec.mockResolvedValueOnce(FAIL); // not installed
const result = await installK3sBinary(ctx);
expect(result.success).toBe(false);
expect(result.error).toContain("Missing agent");
});
it("installs k3s agent with URL and token", async () => {
const ctx = mockCtx({ role: "worker", k3sServerUrl: "https://10.0.0.1:6443", k3sToken: "secret" });
ctx.ssh.exec
.mockResolvedValueOnce(FAIL) // not installed
.mockResolvedValueOnce(OK) // curl install
.mockResolvedValueOnce(OK); // restart
const result = await installK3sBinary(ctx);
expect(result.success).toBe(true);
expectCommand(ctx.ssh, "K3S_URL=");
expectCommand(ctx.ssh, "K3S_TOKEN=");
});
});
// --- DNS Fix ---
import { fixCoreDnsUpstream } from "../src/operations/dns-fix.js";
describe("fixCoreDnsUpstream", () => {
it("detects upstream DNS and writes resolv.conf", async () => {
const ctx = mockCtx();
ctx.ssh.exec
.mockResolvedValueOnce(stdout("192.168.8.1")) // resolvectl
.mockResolvedValueOnce(stdout("__LABCTL_NOT_FOUND__")) // cat existing
.mockResolvedValueOnce(OK) // write resolv.conf
.mockResolvedValueOnce(stdout("active")) // k3s active
.mockResolvedValueOnce(OK) // restart
.mockResolvedValueOnce(OK); // kubectl get nodes
const result = await fixCoreDnsUpstream(ctx);
expect(result.success).toBe(true);
expect(result.changed).toBe(true);
expect(result.message).toContain("192.168.8.1");
});
it("falls back to /run/systemd/resolve/resolv.conf", async () => {
const ctx = mockCtx();
ctx.ssh.exec
.mockResolvedValueOnce(stdout("")) // resolvectl empty
.mockResolvedValueOnce(stdout("10.0.0.1")) // fallback resolv.conf
.mockResolvedValueOnce(stdout("__LABCTL_NOT_FOUND__"))
.mockResolvedValueOnce(OK)
.mockResolvedValueOnce(stdout("active"))
.mockResolvedValueOnce(OK)
.mockResolvedValueOnce(OK);
const result = await fixCoreDnsUpstream(ctx);
expect(result.changed).toBe(true);
});
it("skips when upstream cannot be detected", async () => {
const ctx = mockCtx();
ctx.ssh.exec
.mockResolvedValueOnce(stdout("")) // resolvectl empty
.mockResolvedValueOnce(stdout("127.0.0.53")); // fallback is still stub
const result = await fixCoreDnsUpstream(ctx);
expect(result.changed).toBe(false);
});
});
// --- Pod Security ---
import { applyPodSecurityStandards } from "../src/operations/pod-security.js";
describe("applyPodSecurityStandards", () => {
it("applies all three labels", async () => {
const ctx = mockCtx();
const result = await applyPodSecurityStandards(ctx);
expect(result.success).toBe(true);
expect(ctx.ssh.exec).toHaveBeenCalledTimes(3);
expectCommand(ctx.ssh, "pod-security.kubernetes.io/enforce=restricted");
expectCommand(ctx.ssh, "pod-security.kubernetes.io/warn=restricted");
expectCommand(ctx.ssh, "pod-security.kubernetes.io/audit=restricted");
});
});

View File

@@ -0,0 +1,125 @@
// Smoke tests: verify the full operation pipeline composes and runs end-to-end
// with mocked SSH. These test the integration between operations, not individual logic.
import { describe, it, expect } from "vitest";
import { mockCtx, OK, stdout } from "./helpers.js";
import * as ops from "../src/operations/index.js";
import { runSequential } from "../src/utils.js";
import type { NamedOperation } from "../src/types.js";
describe("smoke: full server install pipeline", () => {
it("runs all install operations in sequence without errors", async () => {
const ctx = mockCtx({ hostname: "smoke.local", ip: "10.0.0.99", role: "infra" });
// Default mock returns OK for everything
ctx.ssh.exec.mockResolvedValue(OK);
const pipeline: NamedOperation[] = [
{ name: "Kernel modules", fn: ops.loadKernelModules },
{ name: "Sysctl hardening", fn: ops.applyCisHardening },
{ name: "Disable swap", fn: ops.disableSwap },
{ name: "Disable firewall", fn: ops.disableFirewall },
{ name: "SELinux permissive", fn: ops.setSelinuxPermissive },
{ name: "Write k3s config", fn: ops.writeK3sConfig },
{ name: "Write audit policy", fn: ops.writeAuditPolicy },
{ name: "CNI cleanup", fn: ops.cleanupStaleCni },
];
const results = await runSequential(ctx, pipeline);
expect(results).toHaveLength(pipeline.length);
for (const r of results) {
expect(r.success).toBe(true);
}
// Verify log was called for each operation
expect(ctx.log).toHaveBeenCalledTimes(pipeline.length * 2); // start + end per op
});
});
describe("smoke: full configure pipeline", () => {
it("runs all configure operations", async () => {
const ctx = mockCtx({ role: "infra" });
ctx.ssh.exec.mockResolvedValue(OK);
const pipeline: NamedOperation[] = [
{ name: "Fix CoreDNS", fn: ops.fixCoreDnsUpstream },
{ name: "Log rotation", fn: ops.configureLogRotation },
{ name: "Cert check", fn: ops.checkCertExpiry },
{ name: "Network policies", fn: ops.applyDefaultNetworkPolicies },
{ name: "Pod security", fn: ops.applyPodSecurityStandards },
];
const results = await runSequential(ctx, pipeline);
expect(results).toHaveLength(pipeline.length);
for (const r of results) {
expect(r.success).toBe(true);
}
});
});
describe("smoke: pipeline stops on failure", () => {
it("stops at first failing operation", async () => {
const ctx = mockCtx();
ctx.ssh.exec.mockResolvedValue(OK);
let callCount = 0;
const failingOp = async () => {
callCount++;
return { success: false, changed: false, message: "Boom", error: "test failure" };
};
const neverCalled = async () => {
callCount++;
return { success: true, changed: false, message: "Should not run" };
};
const results = await runSequential(ctx, [
{ name: "OK op", fn: ops.disableSwap },
{ name: "Failing op", fn: failingOp },
{ name: "Never called", fn: neverCalled },
]);
expect(results).toHaveLength(2); // stopped after failure
expect(results[0]!.success).toBe(true);
expect(results[1]!.success).toBe(false);
});
});
describe("smoke: agent install rejects missing config", () => {
it("fails gracefully without server URL", async () => {
const ctx = mockCtx({ role: "worker" });
ctx.ssh.exec.mockResolvedValue(OK);
// Override the version check to say not installed
ctx.ssh.exec.mockResolvedValueOnce({ exitCode: 1, stdout: "", stderr: "" });
const result = await ops.installK3sBinary(ctx);
expect(result.success).toBe(false);
expect(result.error).toBeDefined();
});
});
describe("smoke: all operations are exported", () => {
it("exports all 15 operations", () => {
const exported = [
ops.loadKernelModules,
ops.applyCisHardening,
ops.disableSwap,
ops.disableFirewall,
ops.setSelinuxPermissive,
ops.writeK3sConfig,
ops.writeAuditPolicy,
ops.cleanupStaleCni,
ops.installK3sBinary,
ops.installCilium,
ops.fixCoreDnsUpstream,
ops.configureLogRotation,
ops.applyDefaultNetworkPolicies,
ops.applyPodSecurityStandards,
ops.checkCertExpiry,
];
expect(exported).toHaveLength(15);
for (const op of exported) {
expect(typeof op).toBe("function");
}
});
});

View File

@@ -0,0 +1,6 @@
name: labcontroller
version: 0.1.0
description: Deploy bastion + labd + CockroachDB on k3s via Pulumi. Multi-node auto-clustering.
targets:
roles: [labcontroller]
dependencies: [k3s]

View File

@@ -0,0 +1,90 @@
// Bastion PXE server k8s deployment manifests.
// Uses hostNetwork for DHCP (UDP 67), TFTP (UDP 69), and HTTP.
export interface BastionK8sConfig {
namespace: string;
image: string;
httpPort: number;
dataPath: string; // Host path for state persistence
}
export const BASTION_DEFAULTS: BastionK8sConfig = {
namespace: "lab-system",
image: "gitea.mysources.co.uk/michal/lab-bastion:latest",
httpPort: 8080,
dataPath: "/srv/lab-bastion",
};
export function bastionManifests(opts?: Partial<BastionK8sConfig>) {
const o = { ...BASTION_DEFAULTS, ...opts };
const labels = { app: "bastion", "app.kubernetes.io/part-of": "lab" };
return {
// DaemonSet with hostNetwork — runs on every labcontroller node
// Gives direct access to DHCP port 67, TFTP port 69, HTTP
daemonSet: {
apiVersion: "apps/v1",
kind: "DaemonSet",
metadata: {
name: "bastion",
namespace: o.namespace,
},
spec: {
selector: { matchLabels: labels },
template: {
metadata: { labels },
spec: {
hostNetwork: true,
dnsPolicy: "ClusterFirstWithHostNet",
nodeSelector: {
"node-role.kubernetes.io/control-plane": "true",
},
tolerations: [{
key: "node-role.kubernetes.io/control-plane",
operator: "Exists",
effect: "NoSchedule",
}],
containers: [{
name: "bastion",
image: o.image,
env: [
{ name: "HTTP_PORT", value: String(o.httpPort) },
{ name: "BASTION_DIR", value: "/data" },
{ name: "DHCP_MODE", value: "proxy" },
],
ports: [
{ containerPort: o.httpPort, hostPort: o.httpPort, protocol: "TCP" },
{ containerPort: 67, hostPort: 67, protocol: "UDP" },
{ containerPort: 69, hostPort: 69, protocol: "UDP" },
{ containerPort: 4011, hostPort: 4011, protocol: "UDP" },
],
volumeMounts: [
{ name: "data", mountPath: "/data" },
{ name: "tftpboot", mountPath: "/usr/share/ipxe", readOnly: true },
],
securityContext: {
capabilities: {
add: ["NET_ADMIN", "NET_RAW", "NET_BIND_SERVICE"],
},
},
resources: {
requests: { cpu: "50m", memory: "64Mi" },
limits: { cpu: "500m", memory: "256Mi" },
},
}],
volumes: [
{
name: "data",
hostPath: { path: o.dataPath, type: "DirectoryOrCreate" },
},
{
name: "tftpboot",
hostPath: { path: "/usr/share/ipxe", type: "Directory" },
},
],
},
},
},
},
};
}

View File

@@ -0,0 +1,172 @@
// CockroachDB deployment for labcontroller.
// Uses @kubernetes/client-node to apply resources directly to k3s.
// StatefulSet with headless Service for multi-node auto-clustering.
// Data stored on /srv/cockroachdb/ (preserved across reprovision).
export interface CockroachDbConfig {
namespace: string;
replicas: number;
hostDataPath: string;
version: string;
}
export const COCKROACHDB_DEFAULTS: CockroachDbConfig = {
namespace: "lab-system",
replicas: 1,
hostDataPath: "/srv/cockroachdb",
version: "v24.3.5",
};
/** Generate all k8s manifests for CockroachDB deployment. */
export function cockroachDbManifests(opts?: Partial<CockroachDbConfig>) {
const o = { ...COCKROACHDB_DEFAULTS, ...opts };
const labels = { app: "cockroachdb", "app.kubernetes.io/part-of": "lab" };
const joinHosts = Array.from({ length: o.replicas }, (_, i) =>
`cockroachdb-${i}.cockroachdb.${o.namespace}.svc.cluster.local:26257`
).join(",");
return {
namespace: {
apiVersion: "v1",
kind: "Namespace",
metadata: { name: o.namespace },
},
headlessService: {
apiVersion: "v1",
kind: "Service",
metadata: {
name: "cockroachdb",
namespace: o.namespace,
labels,
},
spec: {
clusterIP: "None",
selector: labels,
ports: [
{ name: "grpc", port: 26257, targetPort: 26257 },
{ name: "http", port: 8080, targetPort: 8080 },
],
},
},
clientService: {
apiVersion: "v1",
kind: "Service",
metadata: {
name: "cockroachdb-client",
namespace: o.namespace,
labels,
},
spec: {
selector: labels,
ports: [
{ name: "sql", port: 26257, targetPort: 26257 },
],
},
},
statefulSet: {
apiVersion: "apps/v1",
kind: "StatefulSet",
metadata: {
name: "cockroachdb",
namespace: o.namespace,
},
spec: {
serviceName: "cockroachdb",
replicas: o.replicas,
selector: { matchLabels: labels },
template: {
metadata: { labels },
spec: {
containers: [{
name: "cockroachdb",
image: `cockroachdb/cockroach:${o.version}`,
ports: [
{ containerPort: 26257, name: "grpc" },
{ containerPort: 8080, name: "http" },
],
command: ["/cockroach/cockroach"],
args: [
"start",
"--logtostderr",
"--insecure",
"$(POD_ADVERTISE)",
`--join=${joinHosts}`,
"--store=path=/cockroach/cockroach-data",
"--cache=.25",
"--max-sql-memory=.25",
],
env: [
{
name: "POD_NAME",
valueFrom: { fieldRef: { fieldPath: "metadata.name" } },
},
{
name: "POD_ADVERTISE",
value: `--advertise-host=$(POD_NAME).cockroachdb.${o.namespace}.svc.cluster.local`,
},
],
volumeMounts: [{
name: "datadir",
mountPath: "/cockroach/cockroach-data",
}],
readinessProbe: {
httpGet: { path: "/health?ready=1", port: 8080, scheme: "HTTP" },
initialDelaySeconds: 10,
periodSeconds: 5,
},
livenessProbe: {
httpGet: { path: "/health", port: 8080, scheme: "HTTP" },
initialDelaySeconds: 30,
periodSeconds: 10,
},
resources: {
requests: { cpu: "100m", memory: "256Mi" },
limits: { cpu: "2", memory: "2Gi" },
},
}],
volumes: [{
name: "datadir",
hostPath: {
path: o.hostDataPath,
type: "DirectoryOrCreate",
},
}],
terminationGracePeriodSeconds: 60,
},
},
},
},
initJob: {
apiVersion: "batch/v1",
kind: "Job",
metadata: {
name: "cockroachdb-init",
namespace: o.namespace,
},
spec: {
template: {
spec: {
restartPolicy: "OnFailure",
containers: [{
name: "init",
image: `cockroachdb/cockroach:${o.version}`,
command: ["/cockroach/cockroach"],
args: [
"init",
"--insecure",
`--host=cockroachdb-0.cockroachdb.${o.namespace}.svc.cluster.local:26257`,
],
}],
},
},
},
},
connectionString: `postgresql://root@cockroachdb-client.${o.namespace}.svc.cluster.local:26257/lab?sslmode=disable`,
};
}

View File

@@ -0,0 +1,18 @@
// Labcontroller deploy helpers.
// The actual deployment uses kubectl apply via SSH (see labcontroller CLI command).
/** Serialize a manifest to JSON for piping to kubectl apply. */
export function toKubectlJson(manifest: Record<string, unknown>): string {
return JSON.stringify(manifest);
}
/** Escape single quotes for embedding in a bash string. */
export function shellEscape(s: string): string {
return s.replace(/'/g, "'\\''");
}
/** Generate a kubectl apply command for a manifest. */
export function kubectlApplyCmd(manifest: Record<string, unknown>): string {
const json = shellEscape(toKubectlJson(manifest));
return `echo '${json}' | sudo k3s kubectl apply -f -`;
}

View File

@@ -0,0 +1,39 @@
// Labcontroller module — deploys bastion + labd + CockroachDB to k3s.
// Multi-node: CockroachDB auto-clusters via headless Service DNS.
export { cockroachDbManifests, type CockroachDbConfig, COCKROACHDB_DEFAULTS } from "./cockroachdb.js";
export { labdManifests, type LabdConfig, LABD_DEFAULTS } from "./labd.js";
export { bastionManifests, type BastionK8sConfig, BASTION_DEFAULTS } from "./bastion.js";
export { toKubectlJson, shellEscape, kubectlApplyCmd } from "./deploy.js";
import { cockroachDbManifests, type CockroachDbConfig } from "./cockroachdb.js";
import { labdManifests, type LabdConfig } from "./labd.js";
import { bastionManifests, type BastionK8sConfig } from "./bastion.js";
export interface LabcontrollerConfig {
cockroachdb?: Partial<CockroachDbConfig>;
labd?: Partial<LabdConfig>;
bastion?: Partial<BastionK8sConfig>;
}
/** Generate all k8s manifests for a full labcontroller deployment. */
export function labcontrollerManifests(config?: LabcontrollerConfig): Record<string, unknown>[] {
const crdb = cockroachDbManifests(config?.cockroachdb);
const labd = labdManifests({
...config?.labd,
databaseUrl: crdb.connectionString,
});
const bastion = bastionManifests(config?.bastion);
// Order matters: namespace first, then services, then workloads
return [
crdb.namespace,
crdb.headlessService,
crdb.clientService,
crdb.statefulSet,
crdb.initJob,
labd.service,
labd.deployment,
bastion.daemonSet,
];
}

View File

@@ -0,0 +1,81 @@
// labd (master daemon) k8s deployment manifests.
export interface LabdConfig {
namespace: string;
image: string;
replicas: number;
databaseUrl: string;
}
export const LABD_DEFAULTS: LabdConfig = {
namespace: "lab-system",
image: "gitea.mysources.co.uk/michal/lab-labd:latest",
replicas: 1,
databaseUrl: "postgresql://root@cockroachdb-client.lab-system.svc.cluster.local:26257/lab?sslmode=disable",
};
export function labdManifests(opts?: Partial<LabdConfig>) {
const o = { ...LABD_DEFAULTS, ...opts };
const labels = { app: "labd", "app.kubernetes.io/part-of": "lab" };
return {
deployment: {
apiVersion: "apps/v1",
kind: "Deployment",
metadata: {
name: "labd",
namespace: o.namespace,
},
spec: {
replicas: o.replicas,
selector: { matchLabels: labels },
template: {
metadata: { labels },
spec: {
containers: [{
name: "labd",
image: o.image,
ports: [{ containerPort: 3100, name: "http" }],
env: [
{ name: "DATABASE_URL", value: o.databaseUrl },
{ name: "LABD_PORT", value: "3100" },
{ name: "LABD_HOST", value: "0.0.0.0" },
],
readinessProbe: {
httpGet: { path: "/healthz", port: 3100 },
initialDelaySeconds: 5,
periodSeconds: 5,
},
livenessProbe: {
httpGet: { path: "/healthz", port: 3100 },
initialDelaySeconds: 10,
periodSeconds: 10,
},
resources: {
requests: { cpu: "50m", memory: "128Mi" },
limits: { cpu: "500m", memory: "512Mi" },
},
}],
},
},
},
},
service: {
apiVersion: "v1",
kind: "Service",
metadata: {
name: "labd",
namespace: o.namespace,
labels,
},
spec: {
type: "NodePort",
selector: labels,
ports: [
{ name: "http", port: 3100, targetPort: 3100, nodePort: 30100 },
],
},
},
};
}

View File

@@ -0,0 +1,21 @@
{
"name": "@lab/modules",
"version": "0.1.0",
"private": true,
"type": "module",
"main": "./dist/src/index.js",
"types": "./dist/src/index.d.ts",
"scripts": {
"build": "tsc --build",
"clean": "rimraf dist"
},
"dependencies": {
"@kubernetes/client-node": "^1.4.0",
"@lab/shared": "workspace:*"
},
"devDependencies": {
"@types/node": "^22.14.1",
"rimraf": "^6.1.3",
"typescript": "^5.9.3"
}
}

View File

@@ -0,0 +1,23 @@
export type {
ModuleMetadata,
ModuleContext,
ModuleResult,
Module,
} from "./types.js";
export { sshExec, sshExecStreaming } from "./ssh.js";
export type { SshExecOptions, SshExecResult } from "./ssh.js";
export { ModuleRunner } from "./runner.js";
export type { Phase, RunOptions } from "./runner.js";
export { ModuleRegistry } from "./registry.js";
// k3s module — operation-based
export { K3sModule } from "../modules/k3s/src/k3s-module.js";
export type { K3sConfig, OperationContext, OperationResult, Operation } from "../modules/k3s/src/types.js";
// DEPRECATED: legacy bash script generators (still used by labcontroller deploy)
export { generateInstallScript, type K3sInstallContext } from "../modules/k3s/src/install.js";
export { generateConfigureScript } from "../modules/k3s/src/configure.js";
export { generateHealthScript, K3S_HEALTH_CHECKS } from "../modules/k3s/src/health.js";

View File

@@ -0,0 +1,30 @@
import type { Module, ModuleMetadata } from "./types.js";
export class ModuleRegistry {
private readonly modules = new Map<string, Module>();
/**
* Register a module. Throws if a module with the same name is already registered.
*/
registerModule(module: Module): void {
const { name } = module.metadata;
if (this.modules.has(name)) {
throw new Error(`Module "${name}" is already registered`);
}
this.modules.set(name, module);
}
/**
* Get a module by name. Returns undefined if not found.
*/
getModule(name: string): Module | undefined {
return this.modules.get(name);
}
/**
* List metadata for all registered modules.
*/
listModules(): ModuleMetadata[] {
return [...this.modules.values()].map((m) => m.metadata);
}
}

View File

@@ -0,0 +1,61 @@
import type { Module, ModuleContext, ModuleResult } from "./types.js";
export type Phase = "install" | "configure" | "health";
export interface RunOptions {
phases?: Phase[];
onProgress?: (phase: string, line: string) => void;
}
const DEFAULT_PHASES: Phase[] = ["install", "configure", "health"];
export class ModuleRunner {
/**
* Run module phases in order. Stops on first failure.
* Returns results for each phase that was executed.
*/
async run(
module: Module,
ctx: ModuleContext,
options?: RunOptions,
): Promise<ModuleResult[]> {
const phases = options?.phases ?? DEFAULT_PHASES;
const onProgress = options?.onProgress ?? ((_phase: string, line: string) => {
console.log(line);
});
const results: ModuleResult[] = [];
for (const phase of phases) {
onProgress(phase, `[${module.metadata.name}] starting phase: ${phase}`);
const start = performance.now();
let result: ModuleResult;
try {
result = await module[phase](ctx);
} catch (err) {
const duration = Math.round(performance.now() - start);
const errorMessage = err instanceof Error ? err.message : String(err);
result = {
success: false,
phase,
duration,
output: [],
errors: [errorMessage],
};
}
results.push(result);
if (result.success) {
onProgress(phase, `[${module.metadata.name}] ${phase} completed in ${result.duration}ms`);
} else {
onProgress(phase, `[${module.metadata.name}] ${phase} failed: ${result.errors.join(", ")}`);
break;
}
}
return results;
}
}

18
bastion/src/modules/src/ssh.d.ts vendored Normal file
View File

@@ -0,0 +1,18 @@
export interface SshExecOptions {
keyPath?: string;
timeoutMs?: number;
}
export interface SshExecResult {
exitCode: number;
stdout: string;
stderr: string;
}
/**
* Execute a command over SSH and return the result when complete.
*/
export declare function sshExec(ip: string, user: string, command: string, options?: SshExecOptions): Promise<SshExecResult>;
/**
* Execute a command over SSH, calling onLine for each line of combined output.
*/
export declare function sshExecStreaming(ip: string, user: string, command: string, onLine: (line: string) => void, options?: SshExecOptions): Promise<SshExecResult>;
//# sourceMappingURL=ssh.d.ts.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"ssh.d.ts","sourceRoot":"","sources":["ssh.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,cAAc;IAC7B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,aAAa;IAC5B,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;CAChB;AAsBD;;GAEG;AACH,wBAAgB,OAAO,CACrB,EAAE,EAAE,MAAM,EACV,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,MAAM,EACf,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,aAAa,CAAC,CAuCxB;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAC9B,EAAE,EAAE,MAAM,EACV,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,MAAM,EACf,MAAM,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,IAAI,EAC9B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,aAAa,CAAC,CAgExB"}

View File

@@ -0,0 +1,111 @@
import { spawn } from "node:child_process";
function buildSshArgs(ip, user, command, options) {
const args = [
"-o", "StrictHostKeyChecking=no",
"-o", "UserKnownHostsFile=/dev/null",
"-o", "LogLevel=ERROR",
];
if (options?.keyPath) {
args.push("-i", options.keyPath);
}
args.push(`${user}@${ip}`, command);
return args;
}
/**
* Execute a command over SSH and return the result when complete.
*/
export function sshExec(ip, user, command, options) {
return new Promise((resolve, reject) => {
const args = buildSshArgs(ip, user, command, options);
const proc = spawn("ssh", args, { stdio: ["ignore", "pipe", "pipe"] });
const stdoutChunks = [];
const stderrChunks = [];
proc.stdout.on("data", (chunk) => stdoutChunks.push(chunk));
proc.stderr.on("data", (chunk) => stderrChunks.push(chunk));
let timedOut = false;
let timer;
if (options?.timeoutMs) {
timer = setTimeout(() => {
timedOut = true;
proc.kill("SIGTERM");
}, options.timeoutMs);
}
proc.on("error", (err) => {
if (timer)
clearTimeout(timer);
reject(err);
});
proc.on("close", (code) => {
if (timer)
clearTimeout(timer);
const stdout = Buffer.concat(stdoutChunks).toString("utf-8");
const stderr = Buffer.concat(stderrChunks).toString("utf-8");
if (timedOut) {
resolve({ exitCode: 124, stdout, stderr: stderr + "\nSSH command timed out" });
return;
}
resolve({ exitCode: code ?? 1, stdout, stderr });
});
});
}
/**
* Execute a command over SSH, calling onLine for each line of combined output.
*/
export function sshExecStreaming(ip, user, command, onLine, options) {
return new Promise((resolve, reject) => {
const args = buildSshArgs(ip, user, command, options);
const proc = spawn("ssh", args, { stdio: ["ignore", "pipe", "pipe"] });
const stdoutChunks = [];
const stderrChunks = [];
let stdoutBuffer = "";
let stderrBuffer = "";
proc.stdout.on("data", (chunk) => {
stdoutChunks.push(chunk);
stdoutBuffer += chunk.toString("utf-8");
const lines = stdoutBuffer.split("\n");
stdoutBuffer = lines.pop() ?? "";
for (const line of lines) {
onLine(line);
}
});
proc.stderr.on("data", (chunk) => {
stderrChunks.push(chunk);
stderrBuffer += chunk.toString("utf-8");
const lines = stderrBuffer.split("\n");
stderrBuffer = lines.pop() ?? "";
for (const line of lines) {
onLine(line);
}
});
let timedOut = false;
let timer;
if (options?.timeoutMs) {
timer = setTimeout(() => {
timedOut = true;
proc.kill("SIGTERM");
}, options.timeoutMs);
}
proc.on("error", (err) => {
if (timer)
clearTimeout(timer);
reject(err);
});
proc.on("close", (code) => {
if (timer)
clearTimeout(timer);
// Flush remaining buffered content
if (stdoutBuffer)
onLine(stdoutBuffer);
if (stderrBuffer)
onLine(stderrBuffer);
const stdout = Buffer.concat(stdoutChunks).toString("utf-8");
const stderr = Buffer.concat(stderrChunks).toString("utf-8");
if (timedOut) {
resolve({ exitCode: 124, stdout, stderr: stderr + "\nSSH command timed out" });
return;
}
resolve({ exitCode: code ?? 1, stdout, stderr });
});
});
}
//# sourceMappingURL=ssh.js.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"ssh.js","sourceRoot":"","sources":["ssh.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAa3C,SAAS,YAAY,CACnB,EAAU,EACV,IAAY,EACZ,OAAe,EACf,OAAwB;IAExB,MAAM,IAAI,GAAa;QACrB,IAAI,EAAE,0BAA0B;QAChC,IAAI,EAAE,8BAA8B;QACpC,IAAI,EAAE,gBAAgB;KACvB,CAAC;IAEF,IAAI,OAAO,EAAE,OAAO,EAAE,CAAC;QACrB,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC;IACnC,CAAC;IAED,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,IAAI,EAAE,EAAE,EAAE,OAAO,CAAC,CAAC;IACpC,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,OAAO,CACrB,EAAU,EACV,IAAY,EACZ,OAAe,EACf,OAAwB;IAExB,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACrC,MAAM,IAAI,GAAG,YAAY,CAAC,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;QACtD,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE,KAAK,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC,CAAC;QAEvE,MAAM,YAAY,GAAa,EAAE,CAAC;QAClC,MAAM,YAAY,GAAa,EAAE,CAAC;QAElC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAa,EAAE,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;QACpE,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAa,EAAE,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;QAEpE,IAAI,QAAQ,GAAG,KAAK,CAAC;QACrB,IAAI,KAAgD,CAAC;QAErD,IAAI,OAAO,EAAE,SAAS,EAAE,CAAC;YACvB,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE;gBACtB,QAAQ,GAAG,IAAI,CAAC;gBAChB,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACvB,CAAC,EAAE,OAAO,CAAC,SAAS,CAAC,CAAC;QACxB,CAAC;QAED,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,GAAG,EAAE,EAAE;YACvB,IAAI,KAAK;gBAAE,YAAY,CAAC,KAAK,CAAC,CAAC;YAC/B,MAAM,CAAC,GAAG,CAAC,CAAC;QACd,CAAC,CAAC,CAAC;QAEH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE;YACxB,IAAI,KAAK;gBAAE,YAAY,CAAC,KAAK,CAAC,CAAC;YAC/B,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;YAC7D,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;YAE7D,IAAI,QAAQ,EAAE,CAAC;gBACb,OAAO,CAAC,EAAE,QAAQ,EAAE,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,yBAAyB,EAAE,CAAC,CAAC;gBAC/E,OAAO;YACT,CAAC;YAED,OAAO,CAAC,EAAE,QAAQ,EAAE,IAAI,IAAI,CAAC,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC;QACnD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAC9B,EAAU,EACV,IAAY,EACZ,OAAe,EACf,MAA8B,EAC9B,OAAwB;IAExB,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACrC,MAAM,IAAI,GAAG,YAAY,CAAC,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;QACtD,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE,KAAK,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC,CAAC;QAEvE,MAAM,YAAY,GAAa,EAAE,CAAC;QAClC,MAAM,YAAY,GAAa,EAAE,CAAC;QAElC,IAAI,YAAY,GAAG,EAAE,CAAC;QACtB,IAAI,YAAY,GAAG,EAAE,CAAC;QAEtB,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAa,EAAE,EAAE;YACvC,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACzB,YAAY,IAAI,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;YACxC,MAAM,KAAK,GAAG,YAAY,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YACvC,YAAY,GAAG,KAAK,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC;YACjC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;gBACzB,MAAM,CAAC,IAAI,CAAC,CAAC;YACf,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAa,EAAE,EAAE;YACvC,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACzB,YAAY,IAAI,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;YACxC,MAAM,KAAK,GAAG,YAAY,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YACvC,YAAY,GAAG,KAAK,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC;YACjC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;gBACzB,MAAM,CAAC,IAAI,CAAC,CAAC;YACf,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,IAAI,QAAQ,GAAG,KAAK,CAAC;QACrB,IAAI,KAAgD,CAAC;QAErD,IAAI,OAAO,EAAE,SAAS,EAAE,CAAC;YACvB,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE;gBACtB,QAAQ,GAAG,IAAI,CAAC;gBAChB,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACvB,CAAC,EAAE,OAAO,CAAC,SAAS,CAAC,CAAC;QACxB,CAAC;QAED,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,GAAG,EAAE,EAAE;YACvB,IAAI,KAAK;gBAAE,YAAY,CAAC,KAAK,CAAC,CAAC;YAC/B,MAAM,CAAC,GAAG,CAAC,CAAC;QACd,CAAC,CAAC,CAAC;QAEH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE;YACxB,IAAI,KAAK;gBAAE,YAAY,CAAC,KAAK,CAAC,CAAC;YAE/B,mCAAmC;YACnC,IAAI,YAAY;gBAAE,MAAM,CAAC,YAAY,CAAC,CAAC;YACvC,IAAI,YAAY;gBAAE,MAAM,CAAC,YAAY,CAAC,CAAC;YAEvC,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;YAC7D,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;YAE7D,IAAI,QAAQ,EAAE,CAAC;gBACb,OAAO,CAAC,EAAE,QAAQ,EAAE,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,yBAAyB,EAAE,CAAC,CAAC;gBAC/E,OAAO;YACT,CAAC;YAED,OAAO,CAAC,EAAE,QAAQ,EAAE,IAAI,IAAI,CAAC,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC;QACnD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC"}

View File

@@ -0,0 +1,156 @@
import { spawn } from "node:child_process";
export interface SshExecOptions {
keyPath?: string;
timeoutMs?: number;
}
export interface SshExecResult {
exitCode: number;
stdout: string;
stderr: string;
}
function buildSshArgs(
ip: string,
user: string,
command: string,
options?: SshExecOptions,
): string[] {
const args: string[] = [
"-o", "StrictHostKeyChecking=no",
"-o", "UserKnownHostsFile=/dev/null",
"-o", "LogLevel=ERROR",
];
if (options?.keyPath) {
args.push("-i", options.keyPath);
}
args.push(`${user}@${ip}`, command);
return args;
}
/**
* Execute a command over SSH and return the result when complete.
*/
export function sshExec(
ip: string,
user: string,
command: string,
options?: SshExecOptions,
): Promise<SshExecResult> {
return new Promise((resolve, reject) => {
const args = buildSshArgs(ip, user, command, options);
const proc = spawn("ssh", args, { stdio: ["ignore", "pipe", "pipe"] });
const stdoutChunks: Buffer[] = [];
const stderrChunks: Buffer[] = [];
proc.stdout.on("data", (chunk: Buffer) => stdoutChunks.push(chunk));
proc.stderr.on("data", (chunk: Buffer) => stderrChunks.push(chunk));
let timedOut = false;
let timer: ReturnType<typeof setTimeout> | undefined;
if (options?.timeoutMs) {
timer = setTimeout(() => {
timedOut = true;
proc.kill("SIGTERM");
}, options.timeoutMs);
}
proc.on("error", (err) => {
if (timer) clearTimeout(timer);
reject(err);
});
proc.on("close", (code) => {
if (timer) clearTimeout(timer);
const stdout = Buffer.concat(stdoutChunks).toString("utf-8");
const stderr = Buffer.concat(stderrChunks).toString("utf-8");
if (timedOut) {
resolve({ exitCode: 124, stdout, stderr: stderr + "\nSSH command timed out" });
return;
}
resolve({ exitCode: code ?? 1, stdout, stderr });
});
});
}
/**
* Execute a command over SSH, calling onLine for each line of combined output.
*/
export function sshExecStreaming(
ip: string,
user: string,
command: string,
onLine: (line: string) => void,
options?: SshExecOptions,
): Promise<SshExecResult> {
return new Promise((resolve, reject) => {
const args = buildSshArgs(ip, user, command, options);
const proc = spawn("ssh", args, { stdio: ["ignore", "pipe", "pipe"] });
const stdoutChunks: Buffer[] = [];
const stderrChunks: Buffer[] = [];
let stdoutBuffer = "";
let stderrBuffer = "";
proc.stdout.on("data", (chunk: Buffer) => {
stdoutChunks.push(chunk);
stdoutBuffer += chunk.toString("utf-8");
const lines = stdoutBuffer.split("\n");
stdoutBuffer = lines.pop() ?? "";
for (const line of lines) {
onLine(line);
}
});
proc.stderr.on("data", (chunk: Buffer) => {
stderrChunks.push(chunk);
stderrBuffer += chunk.toString("utf-8");
const lines = stderrBuffer.split("\n");
stderrBuffer = lines.pop() ?? "";
for (const line of lines) {
onLine(line);
}
});
let timedOut = false;
let timer: ReturnType<typeof setTimeout> | undefined;
if (options?.timeoutMs) {
timer = setTimeout(() => {
timedOut = true;
proc.kill("SIGTERM");
}, options.timeoutMs);
}
proc.on("error", (err) => {
if (timer) clearTimeout(timer);
reject(err);
});
proc.on("close", (code) => {
if (timer) clearTimeout(timer);
// Flush remaining buffered content
if (stdoutBuffer) onLine(stdoutBuffer);
if (stderrBuffer) onLine(stderrBuffer);
const stdout = Buffer.concat(stdoutChunks).toString("utf-8");
const stderr = Buffer.concat(stderrChunks).toString("utf-8");
if (timedOut) {
resolve({ exitCode: 124, stdout, stderr: stderr + "\nSSH command timed out" });
return;
}
resolve({ exitCode: code ?? 1, stdout, stderr });
});
});
}

View File

@@ -0,0 +1,38 @@
import type { OsId, Arch } from "@lab/shared";
export interface ModuleMetadata {
name: string;
version: string;
description: string;
targets: {
roles?: string[];
labels?: Record<string, string>;
};
dependencies?: string[];
}
export interface ModuleContext {
hostname: string;
ip: string;
role: string;
os: OsId;
arch: Arch;
sshUser: string;
sshKeyPath?: string;
config: Record<string, unknown>;
}
export interface ModuleResult {
success: boolean;
phase: "install" | "configure" | "health";
duration: number;
output: string[];
errors: string[];
}
export interface Module {
readonly metadata: ModuleMetadata;
install(ctx: ModuleContext): Promise<ModuleResult>;
configure(ctx: ModuleContext): Promise<ModuleResult>;
health(ctx: ModuleContext): Promise<ModuleResult>;
}

View File

@@ -0,0 +1,13 @@
{
"extends": "../../tsconfig.base.json",
"compilerOptions": {
"rootDir": ".",
"outDir": "dist",
"composite": true
},
"include": ["src/**/*.ts", "modules/**/*.ts"],
"exclude": ["modules/**/tests/**"],
"references": [
{ "path": "../shared" }
]
}