fix: k3s install automation — skip Cilium on join, Longhorn via server, default root user
Some checks failed
Some checks failed
- Skip Cilium install for joining servers (already in cluster via daemonset) - Longhorn annotation for workers: SSH to server node from CLI to apply kubectl annotation (workers don't have kubectl access) - Default SSH user for k3s/app commands changed to 'root' (operations need root privileges, using 'lab' user broke installs) - k3s server config: cluster-init for initial server, server+token for joins Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -78,9 +78,10 @@ export class K3sModule implements Module {
|
||||
return toModuleResult("install", [...prepResults, ...k3sResults], start);
|
||||
}
|
||||
|
||||
// Phase 3: Networking (server only — agents don't install Cilium)
|
||||
// Phase 3: Networking (initial server only — joining servers get Cilium via daemonset)
|
||||
let netResults: OperationResult[] = [];
|
||||
if (isServer) {
|
||||
const isJoiningServer = isServer && !!opCtx.config.k3sServerUrl;
|
||||
if (isServer && !isJoiningServer) {
|
||||
netResults = await runNetworking(opCtx);
|
||||
}
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
|
||||
import type { Operation, OperationResult } from "../types.js";
|
||||
import { sshOpts } from "../utils.js";
|
||||
import { sshExec as remoteSshExec } from "../../../../src/ssh.js";
|
||||
|
||||
export const configureLonghornDisk: Operation = async (ctx): Promise<OperationResult> => {
|
||||
// Check if /var/lib/longhorn exists on this node
|
||||
@@ -15,12 +16,11 @@ export const configureLonghornDisk: Operation = async (ctx): Promise<OperationRe
|
||||
const nodeNameResult = await ctx.ssh.exec("hostname -f 2>/dev/null || hostname", sshOpts(ctx));
|
||||
const nodeName = nodeNameResult.stdout.trim();
|
||||
|
||||
// Apply the annotation via kubectl (works on server nodes, or via KUBECONFIG on agents)
|
||||
const kubectlPrefix = "k3s kubectl";
|
||||
const annotation = JSON.stringify([{ path: "/var/lib/longhorn", allowScheduling: true }]);
|
||||
|
||||
// Try kubectl locally first (works on server nodes)
|
||||
const result = await ctx.ssh.exec(
|
||||
`${kubectlPrefix} annotate node "${nodeName}" "node.longhorn.io/default-disks-config=${annotation}" --overwrite 2>&1 || true`,
|
||||
`k3s kubectl annotate node "${nodeName}" "node.longhorn.io/default-disks-config=${annotation}" --overwrite 2>&1 || true`,
|
||||
sshOpts(ctx),
|
||||
);
|
||||
|
||||
@@ -28,7 +28,23 @@ export const configureLonghornDisk: Operation = async (ctx): Promise<OperationRe
|
||||
return { success: true, changed: true, message: `Longhorn disk annotation applied to ${nodeName}` };
|
||||
}
|
||||
|
||||
// If kubectl isn't available (agent node without server access), that's OK —
|
||||
// the label is set, annotation can be applied from the server later
|
||||
// For worker/agent nodes without local kubectl: apply via the server
|
||||
if (ctx.config.k3sServerUrl) {
|
||||
// The CLI has SSH access to the server — use sshExec from there
|
||||
const serverHost = new URL(ctx.config.k3sServerUrl).hostname;
|
||||
try {
|
||||
const remoteResult = await remoteSshExec(
|
||||
serverHost, "root",
|
||||
`k3s kubectl annotate node "${nodeName}" "node.longhorn.io/default-disks-config=${annotation}" --overwrite`,
|
||||
{ ...(ctx.ssh.keyPath ? { keyPath: ctx.ssh.keyPath } : {}), timeoutMs: 15_000 },
|
||||
);
|
||||
if (remoteResult.stdout.includes("annotated") || remoteResult.stdout.includes("unchanged")) {
|
||||
return { success: true, changed: true, message: `Longhorn disk annotation applied to ${nodeName} (via server)` };
|
||||
}
|
||||
} catch {
|
||||
// Fall through to manual instruction
|
||||
}
|
||||
}
|
||||
|
||||
return { success: true, changed: false, message: "Longhorn disk label set (annotation requires server kubectl)" };
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user