fix: k3s install automation — skip Cilium on join, Longhorn via server, default root user
Some checks failed
CI/CD / typecheck (push) Failing after 10s
CI/CD / test (push) Failing after 9s
CI/CD / lint (push) Failing after 22s
CI/CD / build (push) Has been skipped
CI/CD / publish-rpm (push) Has been skipped
CI/CD / publish-deb (push) Has been skipped

- Skip Cilium install for joining servers (already in cluster via daemonset)
- Longhorn annotation for workers: SSH to server node from CLI to apply
  kubectl annotation (workers don't have kubectl access)
- Default SSH user for k3s/app commands changed to 'root' (operations
  need root privileges, using 'lab' user broke installs)
- k3s server config: cluster-init for initial server, server+token for joins

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Michal
2026-04-01 16:02:19 +01:00
parent a68d6d617e
commit 06fc40a857
5 changed files with 30 additions and 13 deletions

View File

@@ -78,9 +78,10 @@ export class K3sModule implements Module {
return toModuleResult("install", [...prepResults, ...k3sResults], start);
}
// Phase 3: Networking (server only — agents don't install Cilium)
// Phase 3: Networking (initial server only — joining servers get Cilium via daemonset)
let netResults: OperationResult[] = [];
if (isServer) {
const isJoiningServer = isServer && !!opCtx.config.k3sServerUrl;
if (isServer && !isJoiningServer) {
netResults = await runNetworking(opCtx);
}

View File

@@ -3,6 +3,7 @@
import type { Operation, OperationResult } from "../types.js";
import { sshOpts } from "../utils.js";
import { sshExec as remoteSshExec } from "../../../../src/ssh.js";
export const configureLonghornDisk: Operation = async (ctx): Promise<OperationResult> => {
// Check if /var/lib/longhorn exists on this node
@@ -15,12 +16,11 @@ export const configureLonghornDisk: Operation = async (ctx): Promise<OperationRe
const nodeNameResult = await ctx.ssh.exec("hostname -f 2>/dev/null || hostname", sshOpts(ctx));
const nodeName = nodeNameResult.stdout.trim();
// Apply the annotation via kubectl (works on server nodes, or via KUBECONFIG on agents)
const kubectlPrefix = "k3s kubectl";
const annotation = JSON.stringify([{ path: "/var/lib/longhorn", allowScheduling: true }]);
// Try kubectl locally first (works on server nodes)
const result = await ctx.ssh.exec(
`${kubectlPrefix} annotate node "${nodeName}" "node.longhorn.io/default-disks-config=${annotation}" --overwrite 2>&1 || true`,
`k3s kubectl annotate node "${nodeName}" "node.longhorn.io/default-disks-config=${annotation}" --overwrite 2>&1 || true`,
sshOpts(ctx),
);
@@ -28,7 +28,23 @@ export const configureLonghornDisk: Operation = async (ctx): Promise<OperationRe
return { success: true, changed: true, message: `Longhorn disk annotation applied to ${nodeName}` };
}
// If kubectl isn't available (agent node without server access), that's OK —
// the label is set, annotation can be applied from the server later
// For worker/agent nodes without local kubectl: apply via the server
if (ctx.config.k3sServerUrl) {
// The CLI has SSH access to the server — use sshExec from there
const serverHost = new URL(ctx.config.k3sServerUrl).hostname;
try {
const remoteResult = await remoteSshExec(
serverHost, "root",
`k3s kubectl annotate node "${nodeName}" "node.longhorn.io/default-disks-config=${annotation}" --overwrite`,
{ ...(ctx.ssh.keyPath ? { keyPath: ctx.ssh.keyPath } : {}), timeoutMs: 15_000 },
);
if (remoteResult.stdout.includes("annotated") || remoteResult.stdout.includes("unchanged")) {
return { success: true, changed: true, message: `Longhorn disk annotation applied to ${nodeName} (via server)` };
}
} catch {
// Fall through to manual instruction
}
}
return { success: true, changed: false, message: "Longhorn disk label set (annotation requires server kubectl)" };
};