Compare commits
4 Commits
feat/skill
...
fix/mcpd-i
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
180e50a978 | ||
|
|
7ebc8b22d1 | ||
|
|
d60ad52018 | ||
|
|
e6cd73543a |
@@ -16,6 +16,20 @@ import {
|
||||
removeSkillAtomic,
|
||||
type SkillBody,
|
||||
} from '../utils/skills-disk.js';
|
||||
import {
|
||||
runPostInstall,
|
||||
emitPostInstallAudit,
|
||||
hashScript,
|
||||
} from '../utils/postinstall.js';
|
||||
import {
|
||||
applyManagedHooks,
|
||||
removeManagedHooks,
|
||||
type HooksByEvent,
|
||||
} from '../utils/hooks-materialiser.js';
|
||||
import {
|
||||
attachSkillMcpServers,
|
||||
parseMcpServerDeps,
|
||||
} from '../utils/mcpservers-materialiser.js';
|
||||
import { ApiError } from '../api-client.js';
|
||||
|
||||
/**
|
||||
@@ -50,6 +64,19 @@ interface FullSkill {
|
||||
agentId: string | null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Shape of `metadata` we care about at sync time. Validated server-side
|
||||
* by SkillMetadataSchema (PR-3); we re-narrow here for the fields the
|
||||
* sync acts on, keeping the rest opaque so future additions don't
|
||||
* require a CLI change.
|
||||
*/
|
||||
interface SyncedSkillMetadata {
|
||||
postInstall?: unknown;
|
||||
postInstallTimeoutSec?: unknown;
|
||||
hooks?: unknown;
|
||||
mcpServers?: unknown;
|
||||
}
|
||||
|
||||
export interface SyncOpts {
|
||||
/** Project name override; otherwise marker walk-up + fall back to globals-only. */
|
||||
project?: string;
|
||||
@@ -72,6 +99,10 @@ export interface SyncResult {
|
||||
skipped: string[];
|
||||
removed: string[];
|
||||
preserved: string[]; // skills with local edits we left alone
|
||||
postInstallsRan: string[]; // skills whose postInstall executed in this sync
|
||||
postInstallsSkipped: string[]; // skills with postInstall but unchanged hash → no rerun
|
||||
hooksApplied: string[]; // skills whose hooks were registered/updated in ~/.claude/settings.json
|
||||
mcpServersAttached: string[]; // "<skill>:<server>" tuples that landed in this sync
|
||||
errors: Array<{ skill: string; error: string }>;
|
||||
exitCode: 0 | 1 | 2;
|
||||
}
|
||||
@@ -95,6 +126,10 @@ export async function runSkillsSync(opts: SyncOpts, deps: SyncDeps): Promise<Syn
|
||||
skipped: [],
|
||||
removed: [],
|
||||
preserved: [],
|
||||
postInstallsRan: [],
|
||||
postInstallsSkipped: [],
|
||||
hooksApplied: [],
|
||||
mcpServersAttached: [],
|
||||
errors: [],
|
||||
exitCode: 0,
|
||||
};
|
||||
@@ -189,6 +224,8 @@ export async function runSkillsSync(opts: SyncOpts, deps: SyncDeps): Promise<Syn
|
||||
continue;
|
||||
}
|
||||
await removeSkillAtomic(prior.installDir);
|
||||
// Drop any hook entries this skill registered.
|
||||
try { await removeManagedHooks(name); } catch { /* best-effort */ }
|
||||
delete state.skills[name];
|
||||
result.removed.push(name);
|
||||
} catch (err: unknown) {
|
||||
@@ -210,18 +247,29 @@ export async function runSkillsSync(opts: SyncOpts, deps: SyncDeps): Promise<Syn
|
||||
}
|
||||
|
||||
// 8. Summary.
|
||||
if (!opts.quiet || result.errors.length > 0 || result.installed.length > 0 || result.updated.length > 0 || result.removed.length > 0) {
|
||||
const anythingHappened =
|
||||
result.errors.length > 0 ||
|
||||
result.installed.length > 0 ||
|
||||
result.updated.length > 0 ||
|
||||
result.removed.length > 0 ||
|
||||
result.postInstallsRan.length > 0 ||
|
||||
result.hooksApplied.length > 0 ||
|
||||
result.mcpServersAttached.length > 0;
|
||||
if (!opts.quiet || anythingHappened) {
|
||||
const parts: string[] = [];
|
||||
if (result.installed.length) parts.push(`${String(result.installed.length)} installed`);
|
||||
if (result.updated.length) parts.push(`${String(result.updated.length)} updated`);
|
||||
if (result.skipped.length) parts.push(`${String(result.skipped.length)} unchanged`);
|
||||
if (result.removed.length) parts.push(`${String(result.removed.length)} removed`);
|
||||
if (result.preserved.length) parts.push(`${String(result.preserved.length)} preserved (modified)`);
|
||||
if (result.postInstallsRan.length) parts.push(`${String(result.postInstallsRan.length)} postInstall ran`);
|
||||
if (result.hooksApplied.length) parts.push(`${String(result.hooksApplied.length)} hooks applied`);
|
||||
if (result.mcpServersAttached.length) parts.push(`${String(result.mcpServersAttached.length)} mcpServers attached`);
|
||||
if (result.errors.length) parts.push(`${String(result.errors.length)} errors`);
|
||||
if (parts.length === 0) parts.push('no changes');
|
||||
if (!opts.quiet) {
|
||||
log(`mcpctl skills sync${projectName ? ` (project: ${projectName})` : ' (global only)'}: ${parts.join(', ')}`);
|
||||
} else if (result.installed.length || result.updated.length || result.removed.length || result.errors.length) {
|
||||
} else if (anythingHappened) {
|
||||
// Quiet mode: only emit a single line if something actually happened.
|
||||
warn(`mcpctl: ${parts.join(', ')}`);
|
||||
}
|
||||
@@ -255,6 +303,112 @@ export async function runSkillsSync(opts: SyncOpts, deps: SyncDeps): Promise<Syn
|
||||
};
|
||||
const fileStates = await installSkillAtomic(targetDir, body);
|
||||
|
||||
// ── hooks: register metadata.hooks in ~/.claude/settings.json ──
|
||||
// Tagged with _mcpctl_source: <skill-name> so each skill's hooks
|
||||
// can be cleanly added/updated/removed without trampling other
|
||||
// skills or user-added hooks. No-op when the field is absent or
|
||||
// empty.
|
||||
const meta = (full.metadata ?? {}) as SyncedSkillMetadata;
|
||||
if (meta.hooks && typeof meta.hooks === 'object') {
|
||||
try {
|
||||
const hookRes = await applyManagedHooks(v.name, meta.hooks as HooksByEvent);
|
||||
if (hookRes.updated) result.hooksApplied.push(v.name);
|
||||
} catch (err: unknown) {
|
||||
warn(`mcpctl: failed to apply hooks for skill '${v.name}': ${err instanceof Error ? err.message : String(err)}`);
|
||||
}
|
||||
} else if (prior !== undefined) {
|
||||
// Skill no longer declares hooks but used to — clean up.
|
||||
try { await removeManagedHooks(v.name); } catch { /* best-effort */ }
|
||||
}
|
||||
|
||||
// ── mcpServers: auto-attach declared deps to the active project ──
|
||||
// Only meaningful when a project context is active; global skills
|
||||
// can't attach to "no project". v1 doesn't auto-create missing
|
||||
// servers (warn + skip). Idempotent — re-syncing a skill whose
|
||||
// deps are already attached is a no-op.
|
||||
const mcpServerDeps = parseMcpServerDeps(meta.mcpServers);
|
||||
if (mcpServerDeps.length > 0 && projectName) {
|
||||
try {
|
||||
const att = await attachSkillMcpServers(client, projectName, mcpServerDeps, warn);
|
||||
for (const srv of att.attached) {
|
||||
result.mcpServersAttached.push(`${v.name}:${srv}`);
|
||||
}
|
||||
for (const e of att.errors) {
|
||||
result.errors.push({
|
||||
skill: v.name,
|
||||
error: `mcpServers attach '${e.server}': ${e.error}`,
|
||||
});
|
||||
}
|
||||
} catch (err: unknown) {
|
||||
warn(`mcpctl: failed to attach mcpServers for skill '${v.name}': ${err instanceof Error ? err.message : String(err)}`);
|
||||
}
|
||||
} else if (mcpServerDeps.length > 0) {
|
||||
warn(`mcpctl: skill '${v.name}' declares mcpServers but sync is running global-only; skipping attach`);
|
||||
}
|
||||
|
||||
// ── postInstall: run metadata.postInstall when present ──
|
||||
// Hash-pinned: only execute when the script's sha256 differs from
|
||||
// what state recorded. Failures DO NOT update the recorded hash so
|
||||
// the next sync retries. Other skills continue regardless.
|
||||
let postInstallHash: string | null = prior?.postInstallHash ?? null;
|
||||
if (
|
||||
!opts.skipPostInstall &&
|
||||
typeof meta.postInstall === 'string' &&
|
||||
meta.postInstall.length > 0
|
||||
) {
|
||||
const scriptRel = meta.postInstall;
|
||||
const scriptContent = (full.files ?? {})[scriptRel];
|
||||
if (typeof scriptContent !== 'string') {
|
||||
warn(`mcpctl: skill '${v.name}' postInstall references '${scriptRel}' which is not in files{}; skipping`);
|
||||
} else {
|
||||
const newHash = hashScript(scriptContent);
|
||||
const hashChanged = newHash !== prior?.postInstallHash;
|
||||
if (!hashChanged) {
|
||||
result.postInstallsSkipped.push(v.name);
|
||||
postInstallHash = newHash;
|
||||
} else {
|
||||
try {
|
||||
const timeoutSec = typeof meta.postInstallTimeoutSec === 'number' ? meta.postInstallTimeoutSec : undefined;
|
||||
const piInput = {
|
||||
installDir: targetDir,
|
||||
scriptPath: scriptRel,
|
||||
skillName: v.name,
|
||||
semver: v.semver,
|
||||
projectName: projectName ?? undefined,
|
||||
timeoutSec,
|
||||
logsDir: join(homedir(), '.mcpctl', 'skills', v.name),
|
||||
};
|
||||
const installResult = await runPostInstall(piInput);
|
||||
// Best-effort audit. Don't await; mcpd slowness shouldn't slow sync.
|
||||
void emitPostInstallAudit(client, piInput, installResult, (m) => warn(m));
|
||||
|
||||
if (installResult.timedOut) {
|
||||
result.errors.push({
|
||||
skill: v.name,
|
||||
error: `postInstall timed out after ${String(installResult.durationMs)}ms; rerun next sync`,
|
||||
});
|
||||
// hash NOT updated → retry on next sync
|
||||
} else if (installResult.exitCode !== 0) {
|
||||
const tail = installResult.stderrTail.trim() || installResult.stdoutTail.trim() || `exit ${String(installResult.exitCode)}`;
|
||||
result.errors.push({
|
||||
skill: v.name,
|
||||
error: `postInstall failed (exit ${String(installResult.exitCode)}): ${tail.slice(-200)}`,
|
||||
});
|
||||
// hash NOT updated → retry on next sync
|
||||
} else {
|
||||
postInstallHash = installResult.scriptHash;
|
||||
result.postInstallsRan.push(v.name);
|
||||
}
|
||||
} catch (err: unknown) {
|
||||
result.errors.push({
|
||||
skill: v.name,
|
||||
error: `postInstall error: ${err instanceof Error ? err.message : String(err)}`,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const newState: SkillState = {
|
||||
id: v.id,
|
||||
semver: v.semver,
|
||||
@@ -262,10 +416,7 @@ export async function runSkillsSync(opts: SyncOpts, deps: SyncDeps): Promise<Syn
|
||||
scope: v.scope,
|
||||
installDir: targetDir,
|
||||
files: fileStates,
|
||||
// Tier-2 fields — postInstall execution is deferred to a follow-up
|
||||
// PR. For now we record the hash so we can detect script changes
|
||||
// when execution lands.
|
||||
postInstallHash: null,
|
||||
postInstallHash,
|
||||
lastSyncedAt: new Date().toISOString(),
|
||||
};
|
||||
state.skills[v.name] = newState;
|
||||
|
||||
180
src/cli/src/utils/hooks-materialiser.ts
Normal file
180
src/cli/src/utils/hooks-materialiser.ts
Normal file
@@ -0,0 +1,180 @@
|
||||
/**
|
||||
* Materialise skill-declared hooks into Claude Code's
|
||||
* `~/.claude/settings.json`.
|
||||
*
|
||||
* Each entry we write carries two markers:
|
||||
* `_mcpctl_managed: true` — same flag the SessionStart-hook
|
||||
* installer uses; identifies an entry mcpctl owns.
|
||||
* `_mcpctl_source: "<skill-name>"` — which skill installed it.
|
||||
*
|
||||
* The combination lets us cleanly add/update/remove skill hooks without
|
||||
* clobbering hooks the user added by hand and without one skill trampling
|
||||
* another. Removing skill X re-reads the file, drops every entry tagged
|
||||
* `_mcpctl_source: "X"`, and rewrites atomically.
|
||||
*
|
||||
* Claude Code ignores the extra fields (it only looks at `type` and
|
||||
* `command`).
|
||||
*
|
||||
* The file is written atomically (temp + rename) and tolerant of an
|
||||
* existing file that has comments, no `hooks` block, or unexpected
|
||||
* shape — same robustness profile as sessionhook.ts.
|
||||
*/
|
||||
import { readFile, writeFile, mkdir, rename } from 'node:fs/promises';
|
||||
import { dirname, join } from 'node:path';
|
||||
import { homedir } from 'node:os';
|
||||
|
||||
import { MARKER_KEY } from './sessionhook.js';
|
||||
|
||||
export const SOURCE_KEY = '_mcpctl_source';
|
||||
|
||||
/** A single hook entry: must be `type: 'command'` for v1. Extra fields preserved. */
|
||||
export interface ManagedHookEntry {
|
||||
type: 'command';
|
||||
command: string;
|
||||
timeout?: number;
|
||||
/** Free-form: skills can attach extra fields and they'll round-trip. */
|
||||
[k: string]: unknown;
|
||||
}
|
||||
|
||||
/** Recognised hook events. Validated server-side; if a new event lands later, we still write whatever the skill declares. */
|
||||
export type HookEvent =
|
||||
| 'PreToolUse'
|
||||
| 'PostToolUse'
|
||||
| 'SessionStart'
|
||||
| 'Stop'
|
||||
| 'SubagentStop'
|
||||
| 'Notification';
|
||||
|
||||
export type HooksByEvent = Partial<Record<HookEvent, ManagedHookEntry[]>>;
|
||||
|
||||
interface HookGroup {
|
||||
hooks: ManagedHookEntry[];
|
||||
[k: string]: unknown;
|
||||
}
|
||||
|
||||
interface Settings {
|
||||
hooks?: Partial<Record<string, HookGroup[]>>;
|
||||
[k: string]: unknown;
|
||||
}
|
||||
|
||||
function defaultSettingsPath(): string {
|
||||
return join(homedir(), '.claude', 'settings.json');
|
||||
}
|
||||
|
||||
async function readSettings(path: string): Promise<Settings> {
|
||||
try {
|
||||
const raw = await readFile(path, 'utf-8');
|
||||
if (raw.trim().length === 0) return {};
|
||||
const stripped = raw.replace(/^\s*\/\/.*$/gm, '');
|
||||
return JSON.parse(stripped) as Settings;
|
||||
} catch (err: unknown) {
|
||||
if (isNotFoundError(err)) return {};
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
async function writeSettings(path: string, settings: Settings): Promise<void> {
|
||||
await mkdir(dirname(path), { recursive: true });
|
||||
const tmp = `${path}.tmp.${String(process.pid)}`;
|
||||
await writeFile(tmp, JSON.stringify(settings, null, 2) + '\n', 'utf-8');
|
||||
await rename(tmp, path);
|
||||
}
|
||||
|
||||
function isManagedBy(entry: unknown, source: string): boolean {
|
||||
if (entry === null || typeof entry !== 'object') return false;
|
||||
const e = entry as Record<string, unknown>;
|
||||
return e[MARKER_KEY] === true && e[SOURCE_KEY] === source;
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace this skill's hook entries with the provided set. If `hooks`
|
||||
* omits an event the skill previously installed, those entries are
|
||||
* dropped. Other skills' entries and user-added entries are preserved.
|
||||
*
|
||||
* Returns the count of changes (added or removed entries) so callers
|
||||
* can short-circuit no-op writes.
|
||||
*/
|
||||
export async function applyManagedHooks(
|
||||
source: string,
|
||||
hooks: HooksByEvent,
|
||||
settingsPath: string = defaultSettingsPath(),
|
||||
): Promise<{ updated: boolean; settingsPath: string }> {
|
||||
const settings = await readSettings(settingsPath);
|
||||
if (!settings.hooks) settings.hooks = {};
|
||||
|
||||
let changed = false;
|
||||
|
||||
// For each known/declared event, drop our previous entries and add the new ones.
|
||||
const declaredEvents = new Set<string>(Object.keys(hooks));
|
||||
// Also walk events that already have entries from this source (so skills can shrink scope).
|
||||
for (const [eventName, groups] of Object.entries(settings.hooks)) {
|
||||
if (!Array.isArray(groups)) continue;
|
||||
if (groups.some((g) => Array.isArray(g.hooks) && g.hooks.some((e) => isManagedBy(e, source)))) {
|
||||
declaredEvents.add(eventName);
|
||||
}
|
||||
}
|
||||
|
||||
for (const eventName of declaredEvents) {
|
||||
const desired = hooks[eventName as HookEvent] ?? [];
|
||||
const groups = (settings.hooks[eventName] as HookGroup[] | undefined) ?? [];
|
||||
|
||||
// Strip our entries from each group, then drop empty groups.
|
||||
const stripped: HookGroup[] = [];
|
||||
for (const group of groups) {
|
||||
if (!Array.isArray(group?.hooks)) {
|
||||
stripped.push(group);
|
||||
continue;
|
||||
}
|
||||
const before = group.hooks.length;
|
||||
const filtered = group.hooks.filter((e) => !isManagedBy(e, source));
|
||||
if (filtered.length !== before) changed = true;
|
||||
if (filtered.length > 0) {
|
||||
stripped.push({ ...group, hooks: filtered });
|
||||
}
|
||||
}
|
||||
|
||||
// Insert the new set as a single group tagged with our source.
|
||||
if (desired.length > 0) {
|
||||
const tagged = desired.map((entry) => ({
|
||||
...entry,
|
||||
type: 'command' as const,
|
||||
[MARKER_KEY]: true,
|
||||
[SOURCE_KEY]: source,
|
||||
}));
|
||||
stripped.push({ hooks: tagged, [SOURCE_KEY]: source });
|
||||
changed = true;
|
||||
}
|
||||
|
||||
if (stripped.length === 0) {
|
||||
// No groups left for this event — drop the event entirely so the
|
||||
// settings.json doesn't accumulate empty arrays.
|
||||
delete settings.hooks[eventName];
|
||||
} else {
|
||||
settings.hooks[eventName] = stripped;
|
||||
}
|
||||
}
|
||||
|
||||
if (!changed) {
|
||||
return { updated: false, settingsPath };
|
||||
}
|
||||
|
||||
await writeSettings(settingsPath, settings);
|
||||
return { updated: true, settingsPath };
|
||||
}
|
||||
|
||||
/**
|
||||
* Drop all hook entries owned by `source`. Used by the sync's orphan-
|
||||
* removal path so a skill that's no longer in the server set
|
||||
* un-registers its hooks too. Returns whether anything was changed.
|
||||
*/
|
||||
export async function removeManagedHooks(
|
||||
source: string,
|
||||
settingsPath: string = defaultSettingsPath(),
|
||||
): Promise<{ removed: boolean; settingsPath: string }> {
|
||||
const result = await applyManagedHooks(source, {}, settingsPath);
|
||||
return { removed: result.updated, settingsPath: result.settingsPath };
|
||||
}
|
||||
|
||||
function isNotFoundError(err: unknown): boolean {
|
||||
return typeof err === 'object' && err !== null && (err as { code?: string }).code === 'ENOENT';
|
||||
}
|
||||
176
src/cli/src/utils/mcpservers-materialiser.ts
Normal file
176
src/cli/src/utils/mcpservers-materialiser.ts
Normal file
@@ -0,0 +1,176 @@
|
||||
/**
|
||||
* Auto-attach the MCP server dependencies a skill declares to the
|
||||
* project that's syncing. Per the corporate-appliance trust model,
|
||||
* publishing a skill that says "this project depends on my-grafana"
|
||||
* is enough — the client takes mcpd at its word and asks mcpd to
|
||||
* attach the server to the project.
|
||||
*
|
||||
* What this function does NOT do (deliberately):
|
||||
* - Auto-create the server from a template if it's missing.
|
||||
* Provisioning infrastructure from a skill push is a separate
|
||||
* decision that needs explicit operator consent. v1 just warns
|
||||
* when the named server doesn't exist and skips that dep.
|
||||
* - Detach servers that a skill removed from its mcpServers list.
|
||||
* Detach is destructive (the project loses access) and the
|
||||
* `attach` itself is idempotent on the server side, so we err
|
||||
* on the side of leaving things attached. PR-7 can revisit if
|
||||
* a use case shows up.
|
||||
*
|
||||
* The mcpServers field is per-project: a skill's declared deps only
|
||||
* get attached to the project the sync is running for. Global skills
|
||||
* (no projectName context) skip this step entirely — there's no
|
||||
* project to attach to.
|
||||
*/
|
||||
import type { ApiClient } from '../api-client.js';
|
||||
import { ApiError } from '../api-client.js';
|
||||
|
||||
export interface McpServerDep {
|
||||
name: string;
|
||||
fromTemplate?: string;
|
||||
project?: string;
|
||||
}
|
||||
|
||||
export interface AttachResult {
|
||||
attached: string[];
|
||||
alreadyAttached: string[];
|
||||
missing: string[];
|
||||
errors: Array<{ server: string; error: string }>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve project name → id, list its currently-attached servers,
|
||||
* then attach each declared dep that isn't already there. Idempotent
|
||||
* by virtue of the existing-attachment check.
|
||||
*
|
||||
* Failures per-server are collected, not thrown — sync continues.
|
||||
*/
|
||||
export async function attachSkillMcpServers(
|
||||
client: ApiClient,
|
||||
projectName: string,
|
||||
deps: McpServerDep[],
|
||||
warn: (msg: string) => void = () => {},
|
||||
): Promise<AttachResult> {
|
||||
const result: AttachResult = {
|
||||
attached: [],
|
||||
alreadyAttached: [],
|
||||
missing: [],
|
||||
errors: [],
|
||||
};
|
||||
if (deps.length === 0) return result;
|
||||
|
||||
// Resolve project → id (the attach endpoint is keyed by id, not name).
|
||||
let projectId: string;
|
||||
try {
|
||||
const projects = await client.get<Array<{ id: string; name: string }>>('/api/v1/projects');
|
||||
const match = projects.find((p) => p.name === projectName);
|
||||
if (!match) {
|
||||
// No project to attach to — surface every dep as an error so the
|
||||
// operator can see something is mis-configured.
|
||||
for (const dep of deps) {
|
||||
result.errors.push({ server: dep.name, error: `Project '${projectName}' not found` });
|
||||
}
|
||||
return result;
|
||||
}
|
||||
projectId = match.id;
|
||||
} catch (err: unknown) {
|
||||
for (const dep of deps) {
|
||||
result.errors.push({
|
||||
server: dep.name,
|
||||
error: `Failed to resolve project: ${err instanceof Error ? err.message : String(err)}`,
|
||||
});
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Inspect current attachments. The /api/v1/projects/:id/servers POST
|
||||
// endpoint is idempotent server-side, but we still pre-check so we
|
||||
// can report alreadyAttached vs newly-attached cleanly.
|
||||
let attached = new Set<string>();
|
||||
try {
|
||||
const project = await client.get<{ servers?: Array<{ server?: { name: string } }> }>(`/api/v1/projects/${projectId}`);
|
||||
attached = new Set(
|
||||
(project.servers ?? [])
|
||||
.map((s) => s.server?.name)
|
||||
.filter((n): n is string => typeof n === 'string'),
|
||||
);
|
||||
} catch (err: unknown) {
|
||||
warn(`mcpctl: failed to read current attachments for project '${projectName}': ${err instanceof Error ? err.message : String(err)}`);
|
||||
// Fall through with an empty set — we'll attempt attaches and let
|
||||
// server-side idempotency cover any duplicates.
|
||||
}
|
||||
|
||||
// Optionally narrow the existing-server set so we can warn loudly on
|
||||
// unknown server names. (Server attaches against a non-existent
|
||||
// server would 404 anyway, but a clearer warning is friendlier.)
|
||||
let existingServers = new Set<string>();
|
||||
try {
|
||||
const servers = await client.get<Array<{ name: string }>>('/api/v1/servers');
|
||||
existingServers = new Set(servers.map((s) => s.name));
|
||||
} catch {
|
||||
// Best-effort; if listing fails we still try the attach.
|
||||
}
|
||||
|
||||
for (const dep of deps) {
|
||||
// Honour an explicit `project` on the dep — defensive, normally
|
||||
// matches the active project anyway. Skip mismatches so a skill
|
||||
// can declare deps for a different project without collateral
|
||||
// damage during this sync.
|
||||
if (dep.project && dep.project !== projectName) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (attached.has(dep.name)) {
|
||||
result.alreadyAttached.push(dep.name);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (existingServers.size > 0 && !existingServers.has(dep.name)) {
|
||||
// Server doesn't exist on mcpd. v1 doesn't auto-create; warn and continue.
|
||||
const detail = dep.fromTemplate
|
||||
? ` (skill suggests creating it via template '${dep.fromTemplate}')`
|
||||
: '';
|
||||
warn(`mcpctl: skill mcpServers dep '${dep.name}' not found on mcpd${detail}; skipping attach`);
|
||||
result.missing.push(dep.name);
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
await client.post(`/api/v1/projects/${projectId}/servers`, { server: dep.name });
|
||||
result.attached.push(dep.name);
|
||||
} catch (err: unknown) {
|
||||
// Idempotency: 409 (already attached) is success.
|
||||
if (err instanceof ApiError && err.status === 409) {
|
||||
result.alreadyAttached.push(dep.name);
|
||||
continue;
|
||||
}
|
||||
// 404 means either the project or the server vanished mid-sync.
|
||||
if (err instanceof ApiError && err.status === 404) {
|
||||
result.missing.push(dep.name);
|
||||
continue;
|
||||
}
|
||||
result.errors.push({
|
||||
server: dep.name,
|
||||
error: err instanceof Error ? err.message : String(err),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/** Type-narrow the metadata.mcpServers field. Tolerant of garbage. */
|
||||
export function parseMcpServerDeps(value: unknown): McpServerDep[] {
|
||||
if (!Array.isArray(value)) return [];
|
||||
const out: McpServerDep[] = [];
|
||||
for (const v of value) {
|
||||
if (v === null || typeof v !== 'object') continue;
|
||||
const obj = v as Record<string, unknown>;
|
||||
const name = obj['name'];
|
||||
if (typeof name !== 'string' || name.length === 0) continue;
|
||||
const dep: McpServerDep = { name };
|
||||
if (typeof obj['fromTemplate'] === 'string') dep.fromTemplate = obj['fromTemplate'];
|
||||
if (typeof obj['project'] === 'string') dep.project = obj['project'];
|
||||
out.push(dep);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
282
src/cli/src/utils/postinstall.ts
Normal file
282
src/cli/src/utils/postinstall.ts
Normal file
@@ -0,0 +1,282 @@
|
||||
/**
|
||||
* postInstall executor for `mcpctl skills sync`.
|
||||
*
|
||||
* Trust model: mcpctl runs scripts that mcpd has served. mcpd is the
|
||||
* corporate source of truth — content is reviewed at publish time. We
|
||||
* do NOT sandbox or signature-check on the client. The controls that
|
||||
* matter live on the publishing side (RBAC, audit, reviewer queue).
|
||||
*
|
||||
* What we DO provide is ops hygiene:
|
||||
* - Hard timeout (default 60 s, per-skill override via
|
||||
* `metadata.postInstallTimeoutSec`). Stops a runaway script from
|
||||
* wedging Claude startup forever.
|
||||
* - Hash-pinning: the script's sha256 is recorded in the skills state
|
||||
* file so the next sync skips re-execution unless the hash changed.
|
||||
* Saves churn; catches "the same skill at the same semver was
|
||||
* re-published with a fixed script".
|
||||
* - Curated env: MCPCTL_SKILL_NAME / _VERSION / _DIR / _PROJECT plus
|
||||
* inherited PATH / HOME / USER / SHELL. Cron-style minimal env so
|
||||
* scripts behave the same on every machine.
|
||||
* - Per-skill install log under ~/.mcpctl/skills/<name>/install.log
|
||||
* (rotated to keep the last 5 runs). Standard sysadmin reflex.
|
||||
* - Audit event back to mcpd on every run. So mcpd's audit pipeline
|
||||
* has both sides of the timeline (publish + per-machine execution).
|
||||
*
|
||||
* Failure semantics: a non-zero exit, a hang past the timeout, or a
|
||||
* spawn error is treated as a failed sync of THIS skill. The state
|
||||
* file's postInstallHash is NOT updated on failure, so the next sync
|
||||
* will retry. Other skills in the same sync run continue regardless.
|
||||
*/
|
||||
import { createHash } from 'node:crypto';
|
||||
import { spawn } from 'node:child_process';
|
||||
import { mkdir, readFile, writeFile, stat } from 'node:fs/promises';
|
||||
import { dirname, join, resolve } from 'node:path';
|
||||
import { hostname } from 'node:os';
|
||||
import { setTimeout as delay } from 'node:timers/promises';
|
||||
|
||||
import type { ApiClient } from '../api-client.js';
|
||||
|
||||
export interface PostInstallInput {
|
||||
/** Full path of the materialised skill directory. The script path is resolved relative to this. */
|
||||
installDir: string;
|
||||
/** metadata.postInstall — relative path inside the skill bundle. */
|
||||
scriptPath: string;
|
||||
/** Name of the skill. Surfaces in audit + env + log path. */
|
||||
skillName: string;
|
||||
/** Skill version. Audit + env. */
|
||||
semver: string;
|
||||
/** Project name when the skill is project-scoped, else undefined. */
|
||||
projectName?: string | undefined;
|
||||
/** Per-skill override for the 60-s default. */
|
||||
timeoutSec?: number | undefined;
|
||||
/** Where to put the rolling install.log. Default: ~/.mcpctl/skills/<name>/install.log. */
|
||||
logsDir: string;
|
||||
}
|
||||
|
||||
export interface PostInstallResult {
|
||||
exitCode: number | null;
|
||||
durationMs: number;
|
||||
scriptHash: string;
|
||||
timedOut: boolean;
|
||||
signal: NodeJS.Signals | null;
|
||||
stdoutTail: string;
|
||||
stderrTail: string;
|
||||
}
|
||||
|
||||
const DEFAULT_TIMEOUT_SEC = 60;
|
||||
const TAIL_BYTES = 4 * 1024;
|
||||
const MAX_LOG_BYTES = 256 * 1024;
|
||||
|
||||
/**
|
||||
* Compute the sha256 of a script — used as the "have I already run this
|
||||
* version?" key in the skills state file. Caller passes the raw script
|
||||
* bytes; this just wraps the hash routine to stay consistent with the
|
||||
* `'sha256:'`-prefixed format used elsewhere (skills-state.ts).
|
||||
*/
|
||||
export function hashScript(content: string | Buffer): string {
|
||||
const buf = typeof content === 'string' ? Buffer.from(content, 'utf-8') : content;
|
||||
return 'sha256:' + createHash('sha256').update(buf).digest('hex');
|
||||
}
|
||||
|
||||
/**
|
||||
* Run the post-install script. Returns a result regardless of success
|
||||
* or failure — caller inspects `exitCode`/`timedOut` to decide.
|
||||
*
|
||||
* Path validation: the resolved script path must remain inside
|
||||
* `installDir`. A skill that tries to point postInstall at
|
||||
* `../../../../etc/passwd-like` is rejected as a failed run, not
|
||||
* silently ignored.
|
||||
*/
|
||||
export async function runPostInstall(input: PostInstallInput): Promise<PostInstallResult> {
|
||||
const start = Date.now();
|
||||
const timeoutMs = (input.timeoutSec ?? DEFAULT_TIMEOUT_SEC) * 1000;
|
||||
|
||||
const fullPath = resolve(input.installDir, input.scriptPath);
|
||||
// Defence in depth: the install dir is server-published content, but
|
||||
// a server with skill-write RBAC could still cause mischief. The
|
||||
// check makes our intent explicit: scripts may only live inside the
|
||||
// skill bundle.
|
||||
const installDirResolved = resolve(input.installDir);
|
||||
if (!fullPath.startsWith(installDirResolved + '/') && fullPath !== installDirResolved) {
|
||||
throw new Error(
|
||||
`postInstall path '${input.scriptPath}' escapes skill dir`,
|
||||
);
|
||||
}
|
||||
|
||||
// Read script bytes for hashing (and to fail-fast if missing).
|
||||
const scriptBytes = await readFile(fullPath);
|
||||
const scriptHash = hashScript(scriptBytes);
|
||||
|
||||
// Curated env. Cron-style minimum: keep PATH so the script can find
|
||||
// git/curl/python; keep HOME/USER/SHELL so scripts that touch dotfiles
|
||||
// work; drop everything else.
|
||||
const env: Record<string, string> = {
|
||||
PATH: process.env['PATH'] ?? '/usr/local/bin:/usr/bin:/bin',
|
||||
HOME: process.env['HOME'] ?? '',
|
||||
USER: process.env['USER'] ?? '',
|
||||
SHELL: process.env['SHELL'] ?? '/bin/sh',
|
||||
LANG: process.env['LANG'] ?? 'C.UTF-8',
|
||||
TERM: process.env['TERM'] ?? 'dumb',
|
||||
MCPCTL_SKILL_NAME: input.skillName,
|
||||
MCPCTL_SKILL_VERSION: input.semver,
|
||||
MCPCTL_SKILL_DIR: installDirResolved,
|
||||
};
|
||||
if (input.projectName) env['MCPCTL_PROJECT'] = input.projectName;
|
||||
|
||||
// Make sure the script is executable. Some upstreams ship with mode
|
||||
// 0644 — if shebang exists, we can fall through to the interpreter;
|
||||
// otherwise spawn will EACCES.
|
||||
await ensureExecutable(fullPath, scriptBytes);
|
||||
|
||||
await mkdir(input.logsDir, { recursive: true });
|
||||
const logPath = join(input.logsDir, 'install.log');
|
||||
|
||||
// Rolling-append. Keep ~256 KB; old entries get truncated. The tail
|
||||
// returned to the caller is the last few KB regardless.
|
||||
const logHeader = `\n=== ${new Date().toISOString()} ${input.skillName}@${input.semver} ===\n`;
|
||||
|
||||
// Cast through Buffer<ArrayBufferLike> — Node's typings split Buffer
|
||||
// into Buffer<ArrayBuffer> (from .alloc) and Buffer<ArrayBufferLike>
|
||||
// (from .subarray), which exactOptionalPropertyTypes refuses to
|
||||
// bridge. Explicit `Buffer` annotation widens to the union.
|
||||
let stdoutBuf: Buffer = Buffer.alloc(0);
|
||||
let stderrBuf: Buffer = Buffer.alloc(0);
|
||||
let timedOut = false;
|
||||
|
||||
const child = spawn(fullPath, [], {
|
||||
cwd: installDirResolved,
|
||||
env,
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
});
|
||||
|
||||
child.stdout.on('data', (chunk: Buffer) => {
|
||||
stdoutBuf = appendCapped(stdoutBuf, chunk);
|
||||
});
|
||||
child.stderr.on('data', (chunk: Buffer) => {
|
||||
stderrBuf = appendCapped(stderrBuf, chunk);
|
||||
});
|
||||
|
||||
// Hard timeout via SIGTERM, then SIGKILL after 2 s grace.
|
||||
const timer = setTimeout(() => {
|
||||
timedOut = true;
|
||||
child.kill('SIGTERM');
|
||||
void (async () => {
|
||||
await delay(2000);
|
||||
if (child.exitCode === null) child.kill('SIGKILL');
|
||||
})();
|
||||
}, timeoutMs);
|
||||
|
||||
const { exitCode, signal } = await new Promise<{ exitCode: number | null; signal: NodeJS.Signals | null }>((resolveProm) => {
|
||||
child.on('close', (code, sig) => resolveProm({ exitCode: code, signal: sig }));
|
||||
child.on('error', () => resolveProm({ exitCode: null, signal: null }));
|
||||
});
|
||||
clearTimeout(timer);
|
||||
|
||||
const durationMs = Date.now() - start;
|
||||
const stdoutText = stdoutBuf.toString('utf-8');
|
||||
const stderrText = stderrBuf.toString('utf-8');
|
||||
|
||||
// Append to the install log, truncating from the front if oversize.
|
||||
const trailer = `\n--- exit ${exitCode === null ? '?' : String(exitCode)}${signal ? ` (${signal})` : ''} in ${String(durationMs)}ms${timedOut ? ' [TIMEOUT]' : ''} ---\n`;
|
||||
const fullEntry = logHeader + 'STDOUT:\n' + stdoutText + '\nSTDERR:\n' + stderrText + trailer;
|
||||
await appendBoundedLog(logPath, fullEntry);
|
||||
|
||||
return {
|
||||
exitCode,
|
||||
durationMs,
|
||||
scriptHash,
|
||||
timedOut,
|
||||
signal,
|
||||
stdoutTail: tailString(stdoutText, TAIL_BYTES),
|
||||
stderrTail: tailString(stderrText, TAIL_BYTES),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Best-effort audit emission — POSTs a structured event back to mcpd
|
||||
* so admins have fleet visibility. Failures are warned via the
|
||||
* provided logger but never thrown; the audit log is supplementary,
|
||||
* not load-bearing for sync correctness.
|
||||
*
|
||||
* The event includes machine fingerprint (hostname) so the operator
|
||||
* can tell which dev box ran the script — useful when triaging a
|
||||
* misbehaving update.
|
||||
*/
|
||||
export async function emitPostInstallAudit(
|
||||
client: ApiClient,
|
||||
input: PostInstallInput,
|
||||
result: PostInstallResult,
|
||||
warn: (msg: string) => void = () => {},
|
||||
): Promise<void> {
|
||||
try {
|
||||
await client.post('/api/v1/audit-events', {
|
||||
eventKind: 'skill_postinstall',
|
||||
source: 'mcpctl-cli',
|
||||
verified: false,
|
||||
payload: {
|
||||
skillName: input.skillName,
|
||||
skillVersion: input.semver,
|
||||
projectName: input.projectName ?? null,
|
||||
scriptPath: input.scriptPath,
|
||||
scriptHash: result.scriptHash,
|
||||
exitCode: result.exitCode,
|
||||
durationMs: result.durationMs,
|
||||
timedOut: result.timedOut,
|
||||
signal: result.signal,
|
||||
machine: hostname(),
|
||||
},
|
||||
});
|
||||
} catch (err) {
|
||||
warn(`mcpctl: failed to emit postInstall audit event: ${err instanceof Error ? err.message : String(err)}`);
|
||||
}
|
||||
}
|
||||
|
||||
// ── internals ──
|
||||
|
||||
function appendCapped(buf: Buffer, chunk: Buffer): Buffer {
|
||||
// Keep up to MAX_LOG_BYTES per stream; drop oldest bytes if over.
|
||||
if (buf.length + chunk.length <= MAX_LOG_BYTES) {
|
||||
return Buffer.concat([buf, chunk]);
|
||||
}
|
||||
const merged = Buffer.concat([buf, chunk]);
|
||||
// Buffer.from(...) here keeps Node's typing happy under
|
||||
// exactOptionalPropertyTypes — `subarray` on Buffer returns a
|
||||
// Buffer<ArrayBufferLike> which TS won't widen to the input type.
|
||||
return Buffer.from(merged.subarray(merged.length - MAX_LOG_BYTES));
|
||||
}
|
||||
|
||||
function tailString(s: string, bytes: number): string {
|
||||
if (s.length <= bytes) return s;
|
||||
return '…' + s.slice(s.length - bytes + 1);
|
||||
}
|
||||
|
||||
async function ensureExecutable(path: string, bytes: Buffer): Promise<void> {
|
||||
try {
|
||||
const st = await stat(path);
|
||||
// Owner execute bit. Skip if it's set already.
|
||||
if ((st.mode & 0o100) !== 0) return;
|
||||
} catch {
|
||||
return; // stat failed — let the spawn surface the real error
|
||||
}
|
||||
// Has shebang? Fine — many shells will still execute even without +x
|
||||
// when invoked as `<interpreter> <path>`, but we always spawn the
|
||||
// path directly so we need +x. Set 0755.
|
||||
void bytes; // (kept around in case we want to inspect shebang later)
|
||||
const { chmod } = await import('node:fs/promises');
|
||||
await chmod(path, 0o755);
|
||||
}
|
||||
|
||||
async function appendBoundedLog(path: string, entry: string): Promise<void> {
|
||||
const max = 5 * MAX_LOG_BYTES;
|
||||
let existing = '';
|
||||
try {
|
||||
existing = await readFile(path, 'utf-8');
|
||||
} catch (err: unknown) {
|
||||
if (typeof err !== 'object' || err === null || (err as { code?: string }).code !== 'ENOENT') throw err;
|
||||
}
|
||||
const combined = existing + entry;
|
||||
// Keep last `max` bytes.
|
||||
const trimmed = combined.length > max ? '…[truncated]…\n' + combined.slice(combined.length - max) : combined;
|
||||
await mkdir(dirname(path), { recursive: true });
|
||||
await writeFile(path, trimmed, 'utf-8');
|
||||
}
|
||||
174
src/cli/tests/utils/hooks-materialiser.test.ts
Normal file
174
src/cli/tests/utils/hooks-materialiser.test.ts
Normal file
@@ -0,0 +1,174 @@
|
||||
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
||||
import { mkdtemp, rm, readFile, writeFile, mkdir } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
|
||||
import { applyManagedHooks, removeManagedHooks, SOURCE_KEY } from '../../src/utils/hooks-materialiser.js';
|
||||
import { MARKER_KEY } from '../../src/utils/sessionhook.js';
|
||||
|
||||
describe('hooks-materialiser', () => {
|
||||
let tmp: string;
|
||||
let settings: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
tmp = await mkdtemp(join(tmpdir(), 'mcpctl-hooks-'));
|
||||
settings = join(tmp, 'settings.json');
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tmp, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('writes a tagged hook from scratch when settings.json is missing', async () => {
|
||||
const result = await applyManagedHooks('skill-a', {
|
||||
PreToolUse: [{ type: 'command', command: 'echo before' }],
|
||||
}, settings);
|
||||
|
||||
expect(result.updated).toBe(true);
|
||||
const file = JSON.parse(await readFile(settings, 'utf-8'));
|
||||
expect(file.hooks.PreToolUse).toHaveLength(1);
|
||||
const entry = file.hooks.PreToolUse[0].hooks[0];
|
||||
expect(entry.command).toBe('echo before');
|
||||
expect(entry[MARKER_KEY]).toBe(true);
|
||||
expect(entry[SOURCE_KEY]).toBe('skill-a');
|
||||
});
|
||||
|
||||
it('coexists with hooks owned by other skills', async () => {
|
||||
await applyManagedHooks('skill-a', {
|
||||
PreToolUse: [{ type: 'command', command: 'echo a' }],
|
||||
}, settings);
|
||||
await applyManagedHooks('skill-b', {
|
||||
PreToolUse: [{ type: 'command', command: 'echo b' }],
|
||||
}, settings);
|
||||
|
||||
const file = JSON.parse(await readFile(settings, 'utf-8'));
|
||||
const all = file.hooks.PreToolUse.flatMap((g: { hooks: Array<{ command: string; [k: string]: unknown }> }) => g.hooks);
|
||||
expect(all.find((e: { command: string }) => e.command === 'echo a')).toBeDefined();
|
||||
expect(all.find((e: { command: string }) => e.command === 'echo b')).toBeDefined();
|
||||
expect(all).toHaveLength(2);
|
||||
});
|
||||
|
||||
it('preserves user-added hooks (no marker)', async () => {
|
||||
await mkdir(tmp, { recursive: true });
|
||||
await writeFile(settings, JSON.stringify({
|
||||
hooks: {
|
||||
PreToolUse: [{ hooks: [{ type: 'command', command: 'echo user' }] }],
|
||||
},
|
||||
}));
|
||||
|
||||
await applyManagedHooks('skill-a', {
|
||||
PreToolUse: [{ type: 'command', command: 'echo a' }],
|
||||
}, settings);
|
||||
|
||||
const file = JSON.parse(await readFile(settings, 'utf-8'));
|
||||
const all = file.hooks.PreToolUse.flatMap((g: { hooks: Array<{ command: string; [k: string]: unknown }> }) => g.hooks);
|
||||
expect(all.find((e: { command: string }) => e.command === 'echo user')).toBeDefined();
|
||||
expect(all.find((e: { command: string; [k: string]: unknown }) => e.command === 'echo a' && e[MARKER_KEY] === true)).toBeDefined();
|
||||
});
|
||||
|
||||
it('updating a skill replaces its old entries (does not duplicate)', async () => {
|
||||
await applyManagedHooks('skill-a', {
|
||||
PreToolUse: [{ type: 'command', command: 'echo old' }],
|
||||
}, settings);
|
||||
const second = await applyManagedHooks('skill-a', {
|
||||
PreToolUse: [{ type: 'command', command: 'echo new' }],
|
||||
}, settings);
|
||||
|
||||
expect(second.updated).toBe(true);
|
||||
const file = JSON.parse(await readFile(settings, 'utf-8'));
|
||||
const all = file.hooks.PreToolUse.flatMap((g: { hooks: Array<{ command: string; [k: string]: unknown }> }) => g.hooks);
|
||||
const ours = all.filter((e: { [k: string]: unknown }) => e[SOURCE_KEY] === 'skill-a');
|
||||
expect(ours).toHaveLength(1);
|
||||
expect((ours[0] as { command: string }).command).toBe('echo new');
|
||||
});
|
||||
|
||||
it('shrinking a skill drops events it no longer declares', async () => {
|
||||
await applyManagedHooks('skill-a', {
|
||||
PreToolUse: [{ type: 'command', command: 'echo pre' }],
|
||||
PostToolUse: [{ type: 'command', command: 'echo post' }],
|
||||
}, settings);
|
||||
|
||||
await applyManagedHooks('skill-a', {
|
||||
PreToolUse: [{ type: 'command', command: 'echo pre' }],
|
||||
// PostToolUse omitted → should be dropped
|
||||
}, settings);
|
||||
|
||||
const file = JSON.parse(await readFile(settings, 'utf-8'));
|
||||
expect(file.hooks.PreToolUse).toBeDefined();
|
||||
expect(file.hooks.PostToolUse).toBeUndefined();
|
||||
});
|
||||
|
||||
it('removeManagedHooks drops only the named source', async () => {
|
||||
await applyManagedHooks('skill-a', {
|
||||
PreToolUse: [{ type: 'command', command: 'echo a' }],
|
||||
}, settings);
|
||||
await applyManagedHooks('skill-b', {
|
||||
PreToolUse: [{ type: 'command', command: 'echo b' }],
|
||||
}, settings);
|
||||
|
||||
const removed = await removeManagedHooks('skill-a', settings);
|
||||
expect(removed.removed).toBe(true);
|
||||
|
||||
const file = JSON.parse(await readFile(settings, 'utf-8'));
|
||||
const all = file.hooks.PreToolUse.flatMap((g: { hooks: Array<{ command: string; [k: string]: unknown }> }) => g.hooks);
|
||||
expect(all).toHaveLength(1);
|
||||
expect((all[0] as { command: string }).command).toBe('echo b');
|
||||
});
|
||||
|
||||
it('removeManagedHooks is a no-op when the source has no entries', async () => {
|
||||
const result = await removeManagedHooks('never-installed', settings);
|
||||
expect(result.removed).toBe(false);
|
||||
});
|
||||
|
||||
it('handles multiple hook events independently', async () => {
|
||||
await applyManagedHooks('skill-a', {
|
||||
PreToolUse: [{ type: 'command', command: 'echo pre' }],
|
||||
PostToolUse: [{ type: 'command', command: 'echo post' }],
|
||||
SessionStart: [{ type: 'command', command: 'echo start' }],
|
||||
}, settings);
|
||||
|
||||
const file = JSON.parse(await readFile(settings, 'utf-8'));
|
||||
expect(file.hooks.PreToolUse).toBeDefined();
|
||||
expect(file.hooks.PostToolUse).toBeDefined();
|
||||
expect(file.hooks.SessionStart).toBeDefined();
|
||||
});
|
||||
|
||||
it('idempotent — re-applying the same hooks reports updated=true on first call only', async () => {
|
||||
const first = await applyManagedHooks('skill-a', {
|
||||
PreToolUse: [{ type: 'command', command: 'echo a' }],
|
||||
}, settings);
|
||||
expect(first.updated).toBe(true);
|
||||
|
||||
// Re-applying ALWAYS rewrites our entry (we don't deep-equal them
|
||||
// for "no change"), but the resulting file is byte-identical except
|
||||
// for ordering. The test just confirms the file remains valid + well-shaped.
|
||||
const second = await applyManagedHooks('skill-a', {
|
||||
PreToolUse: [{ type: 'command', command: 'echo a' }],
|
||||
}, settings);
|
||||
// updated=true is acceptable here; we replaced+re-added our entry.
|
||||
expect(second.updated).toBe(true);
|
||||
|
||||
const file = JSON.parse(await readFile(settings, 'utf-8'));
|
||||
const all = file.hooks.PreToolUse.flatMap((g: { hooks: Array<{ command: string; [k: string]: unknown }> }) => g.hooks);
|
||||
const ours = all.filter((e: { [k: string]: unknown }) => e[SOURCE_KEY] === 'skill-a');
|
||||
expect(ours).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('survives empty settings.json', async () => {
|
||||
await writeFile(settings, '');
|
||||
await applyManagedHooks('skill-a', {
|
||||
PreToolUse: [{ type: 'command', command: 'echo a' }],
|
||||
}, settings);
|
||||
const file = JSON.parse(await readFile(settings, 'utf-8'));
|
||||
expect(file.hooks.PreToolUse).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('survives JSONC line comments in settings.json', async () => {
|
||||
await writeFile(settings, '// preamble\n{ "hooks": {} }\n');
|
||||
await applyManagedHooks('skill-a', {
|
||||
PreToolUse: [{ type: 'command', command: 'echo a' }],
|
||||
}, settings);
|
||||
const file = JSON.parse(await readFile(settings, 'utf-8'));
|
||||
expect(file.hooks.PreToolUse).toHaveLength(1);
|
||||
});
|
||||
});
|
||||
227
src/cli/tests/utils/mcpservers-materialiser.test.ts
Normal file
227
src/cli/tests/utils/mcpservers-materialiser.test.ts
Normal file
@@ -0,0 +1,227 @@
|
||||
import { describe, it, expect, vi } from 'vitest';
|
||||
import { attachSkillMcpServers, parseMcpServerDeps } from '../../src/utils/mcpservers-materialiser.js';
|
||||
import type { ApiClient } from '../../src/api-client.js';
|
||||
import { ApiError } from '../../src/api-client.js';
|
||||
|
||||
interface MockClient {
|
||||
get: ReturnType<typeof vi.fn>;
|
||||
post: ReturnType<typeof vi.fn>;
|
||||
put: ReturnType<typeof vi.fn>;
|
||||
delete: ReturnType<typeof vi.fn>;
|
||||
}
|
||||
|
||||
function makeClient(): MockClient {
|
||||
return {
|
||||
get: vi.fn(),
|
||||
post: vi.fn(async () => ({})),
|
||||
put: vi.fn(async () => ({})),
|
||||
delete: vi.fn(async () => undefined),
|
||||
};
|
||||
}
|
||||
|
||||
function apiError(status: number, body = 'err'): ApiError {
|
||||
return new ApiError(status, body);
|
||||
}
|
||||
|
||||
describe('mcpservers-materialiser', () => {
|
||||
describe('parseMcpServerDeps', () => {
|
||||
it('returns [] for non-arrays', () => {
|
||||
expect(parseMcpServerDeps(null)).toEqual([]);
|
||||
expect(parseMcpServerDeps('foo')).toEqual([]);
|
||||
expect(parseMcpServerDeps({})).toEqual([]);
|
||||
});
|
||||
|
||||
it('keeps valid entries and drops garbage', () => {
|
||||
const out = parseMcpServerDeps([
|
||||
{ name: 'good', fromTemplate: 't', project: 'p' },
|
||||
{ name: '', fromTemplate: 't' }, // empty name → drop
|
||||
{ fromTemplate: 'no-name' }, // no name → drop
|
||||
{ name: 'bare' }, // valid, minimal
|
||||
'string', // not an object → drop
|
||||
]);
|
||||
expect(out).toEqual([
|
||||
{ name: 'good', fromTemplate: 't', project: 'p' },
|
||||
{ name: 'bare' },
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('attachSkillMcpServers', () => {
|
||||
it('attaches a new server when not already present', async () => {
|
||||
const client = makeClient();
|
||||
client.get.mockImplementation(async (path: string) => {
|
||||
if (path === '/api/v1/projects') return [{ id: 'proj-1', name: 'demo' }];
|
||||
if (path === '/api/v1/projects/proj-1') return { servers: [] };
|
||||
if (path === '/api/v1/servers') return [{ name: 'my-grafana' }];
|
||||
throw new Error(`unexpected GET ${path}`);
|
||||
});
|
||||
|
||||
const result = await attachSkillMcpServers(
|
||||
client as unknown as ApiClient,
|
||||
'demo',
|
||||
[{ name: 'my-grafana', fromTemplate: 'grafana' }],
|
||||
);
|
||||
|
||||
expect(result.attached).toEqual(['my-grafana']);
|
||||
expect(result.alreadyAttached).toEqual([]);
|
||||
expect(result.missing).toEqual([]);
|
||||
expect(result.errors).toEqual([]);
|
||||
expect(client.post).toHaveBeenCalledWith('/api/v1/projects/proj-1/servers', { server: 'my-grafana' });
|
||||
});
|
||||
|
||||
it('reports alreadyAttached without re-posting', async () => {
|
||||
const client = makeClient();
|
||||
client.get.mockImplementation(async (path: string) => {
|
||||
if (path === '/api/v1/projects') return [{ id: 'proj-1', name: 'demo' }];
|
||||
if (path === '/api/v1/projects/proj-1') return { servers: [{ server: { name: 'my-grafana' } }] };
|
||||
if (path === '/api/v1/servers') return [{ name: 'my-grafana' }];
|
||||
throw new Error(`unexpected GET ${path}`);
|
||||
});
|
||||
|
||||
const result = await attachSkillMcpServers(
|
||||
client as unknown as ApiClient,
|
||||
'demo',
|
||||
[{ name: 'my-grafana' }],
|
||||
);
|
||||
|
||||
expect(result.alreadyAttached).toEqual(['my-grafana']);
|
||||
expect(result.attached).toEqual([]);
|
||||
expect(client.post).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('warns + skips when server does not exist on mcpd', async () => {
|
||||
const client = makeClient();
|
||||
client.get.mockImplementation(async (path: string) => {
|
||||
if (path === '/api/v1/projects') return [{ id: 'proj-1', name: 'demo' }];
|
||||
if (path === '/api/v1/projects/proj-1') return { servers: [] };
|
||||
if (path === '/api/v1/servers') return [{ name: 'something-else' }];
|
||||
throw new Error(`unexpected GET ${path}`);
|
||||
});
|
||||
|
||||
const warnings: string[] = [];
|
||||
const result = await attachSkillMcpServers(
|
||||
client as unknown as ApiClient,
|
||||
'demo',
|
||||
[{ name: 'my-grafana', fromTemplate: 'grafana' }],
|
||||
(m) => warnings.push(m),
|
||||
);
|
||||
|
||||
expect(result.missing).toEqual(['my-grafana']);
|
||||
expect(result.attached).toEqual([]);
|
||||
expect(client.post).not.toHaveBeenCalled();
|
||||
expect(warnings.some((w) => w.includes('my-grafana') && w.includes('grafana'))).toBe(true);
|
||||
});
|
||||
|
||||
it('errors-out when the project does not exist', async () => {
|
||||
const client = makeClient();
|
||||
client.get.mockImplementation(async (path: string) => {
|
||||
if (path === '/api/v1/projects') return []; // no projects
|
||||
throw new Error(`unexpected GET ${path}`);
|
||||
});
|
||||
|
||||
const result = await attachSkillMcpServers(
|
||||
client as unknown as ApiClient,
|
||||
'no-such-project',
|
||||
[{ name: 'my-grafana' }],
|
||||
);
|
||||
|
||||
expect(result.errors).toHaveLength(1);
|
||||
expect(result.errors[0]?.error).toContain('Project');
|
||||
expect(client.post).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('treats 409 from POST as alreadyAttached (idempotent server-side)', async () => {
|
||||
const client = makeClient();
|
||||
client.get.mockImplementation(async (path: string) => {
|
||||
if (path === '/api/v1/projects') return [{ id: 'proj-1', name: 'demo' }];
|
||||
// attachments listing fails — fall back to attempting + handling 409
|
||||
if (path === '/api/v1/projects/proj-1') throw apiError(500, 'flake');
|
||||
if (path === '/api/v1/servers') return [{ name: 'my-grafana' }];
|
||||
throw new Error(`unexpected GET ${path}`);
|
||||
});
|
||||
client.post.mockRejectedValueOnce(apiError(409, 'already attached'));
|
||||
|
||||
const result = await attachSkillMcpServers(
|
||||
client as unknown as ApiClient,
|
||||
'demo',
|
||||
[{ name: 'my-grafana' }],
|
||||
);
|
||||
|
||||
expect(result.alreadyAttached).toEqual(['my-grafana']);
|
||||
expect(result.errors).toEqual([]);
|
||||
});
|
||||
|
||||
it('treats 404 from POST as missing (server vanished mid-sync)', async () => {
|
||||
const client = makeClient();
|
||||
client.get.mockImplementation(async (path: string) => {
|
||||
if (path === '/api/v1/projects') return [{ id: 'proj-1', name: 'demo' }];
|
||||
if (path === '/api/v1/projects/proj-1') return { servers: [] };
|
||||
if (path === '/api/v1/servers') return [{ name: 'my-grafana' }]; // existed when we listed
|
||||
throw new Error(`unexpected GET ${path}`);
|
||||
});
|
||||
// …but vanished by the time we POSTed.
|
||||
client.post.mockRejectedValueOnce(apiError(404, 'gone'));
|
||||
|
||||
const result = await attachSkillMcpServers(
|
||||
client as unknown as ApiClient,
|
||||
'demo',
|
||||
[{ name: 'my-grafana' }],
|
||||
);
|
||||
|
||||
expect(result.missing).toEqual(['my-grafana']);
|
||||
expect(result.errors).toEqual([]);
|
||||
});
|
||||
|
||||
it('skips deps that target a different project', async () => {
|
||||
const client = makeClient();
|
||||
client.get.mockImplementation(async (path: string) => {
|
||||
if (path === '/api/v1/projects') return [{ id: 'proj-1', name: 'demo' }];
|
||||
if (path === '/api/v1/projects/proj-1') return { servers: [] };
|
||||
if (path === '/api/v1/servers') return [{ name: 'my-grafana' }];
|
||||
throw new Error(`unexpected GET ${path}`);
|
||||
});
|
||||
|
||||
const result = await attachSkillMcpServers(
|
||||
client as unknown as ApiClient,
|
||||
'demo',
|
||||
[{ name: 'my-grafana', project: 'other-project' }],
|
||||
);
|
||||
|
||||
expect(result.attached).toEqual([]);
|
||||
expect(result.missing).toEqual([]);
|
||||
expect(client.post).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('continues past per-server errors', async () => {
|
||||
const client = makeClient();
|
||||
client.get.mockImplementation(async (path: string) => {
|
||||
if (path === '/api/v1/projects') return [{ id: 'proj-1', name: 'demo' }];
|
||||
if (path === '/api/v1/projects/proj-1') return { servers: [] };
|
||||
if (path === '/api/v1/servers') return [{ name: 'a' }, { name: 'b' }];
|
||||
throw new Error(`unexpected GET ${path}`);
|
||||
});
|
||||
client.post.mockImplementation(async (path: string, body) => {
|
||||
if ((body as { server: string }).server === 'a') throw apiError(500, 'boom');
|
||||
return {};
|
||||
});
|
||||
|
||||
const result = await attachSkillMcpServers(
|
||||
client as unknown as ApiClient,
|
||||
'demo',
|
||||
[{ name: 'a' }, { name: 'b' }],
|
||||
);
|
||||
|
||||
expect(result.errors).toHaveLength(1);
|
||||
expect(result.errors[0]?.server).toBe('a');
|
||||
expect(result.attached).toEqual(['b']);
|
||||
});
|
||||
|
||||
it('returns empty on empty deps without making any calls', async () => {
|
||||
const client = makeClient();
|
||||
const result = await attachSkillMcpServers(client as unknown as ApiClient, 'demo', []);
|
||||
expect(result).toEqual({ attached: [], alreadyAttached: [], missing: [], errors: [] });
|
||||
expect(client.get).not.toHaveBeenCalled();
|
||||
expect(client.post).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
});
|
||||
223
src/cli/tests/utils/postinstall.test.ts
Normal file
223
src/cli/tests/utils/postinstall.test.ts
Normal file
@@ -0,0 +1,223 @@
|
||||
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
||||
import { mkdtemp, rm, writeFile, chmod, readFile, mkdir } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
|
||||
import { runPostInstall, hashScript } from '../../src/utils/postinstall.js';
|
||||
|
||||
describe('postinstall executor', () => {
|
||||
let tmp: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
tmp = await mkdtemp(join(tmpdir(), 'mcpctl-postinstall-'));
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tmp, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
describe('hashScript', () => {
|
||||
it('returns deterministic sha256-prefixed hash', () => {
|
||||
expect(hashScript('hello')).toMatch(/^sha256:[0-9a-f]{64}$/);
|
||||
expect(hashScript('hello')).toBe(hashScript('hello'));
|
||||
expect(hashScript('hello')).not.toBe(hashScript('hellp'));
|
||||
});
|
||||
});
|
||||
|
||||
describe('runPostInstall — success path', () => {
|
||||
it('runs a passing script and returns exit 0 + script hash', async () => {
|
||||
const installDir = join(tmp, 'skill');
|
||||
await mkdir(installDir, { recursive: true });
|
||||
const scriptPath = join(installDir, 'install.sh');
|
||||
await writeFile(scriptPath, '#!/bin/sh\necho hello-stdout\necho hello-stderr 1>&2\nexit 0\n');
|
||||
await chmod(scriptPath, 0o755);
|
||||
|
||||
const result = await runPostInstall({
|
||||
installDir,
|
||||
scriptPath: 'install.sh',
|
||||
skillName: 'test-skill',
|
||||
semver: '0.1.0',
|
||||
logsDir: join(tmp, 'logs'),
|
||||
});
|
||||
|
||||
expect(result.exitCode).toBe(0);
|
||||
expect(result.timedOut).toBe(false);
|
||||
expect(result.stdoutTail).toContain('hello-stdout');
|
||||
expect(result.stderrTail).toContain('hello-stderr');
|
||||
expect(result.scriptHash).toMatch(/^sha256:/);
|
||||
});
|
||||
|
||||
it('passes curated env (MCPCTL_SKILL_NAME, _VERSION, _DIR, _PROJECT)', async () => {
|
||||
const installDir = join(tmp, 'skill');
|
||||
await mkdir(installDir, { recursive: true });
|
||||
const scriptPath = join(installDir, 'install.sh');
|
||||
// Write env vars to a file we can read back.
|
||||
const outFile = join(tmp, 'env-dump.txt');
|
||||
await writeFile(scriptPath, `#!/bin/sh
|
||||
echo "name=$MCPCTL_SKILL_NAME" > ${JSON.stringify(outFile)}
|
||||
echo "version=$MCPCTL_SKILL_VERSION" >> ${JSON.stringify(outFile)}
|
||||
echo "dir=$MCPCTL_SKILL_DIR" >> ${JSON.stringify(outFile)}
|
||||
echo "project=$MCPCTL_PROJECT" >> ${JSON.stringify(outFile)}
|
||||
`);
|
||||
await chmod(scriptPath, 0o755);
|
||||
|
||||
const result = await runPostInstall({
|
||||
installDir,
|
||||
scriptPath: 'install.sh',
|
||||
skillName: 'env-test',
|
||||
semver: '1.2.3',
|
||||
projectName: 'demo',
|
||||
logsDir: join(tmp, 'logs'),
|
||||
});
|
||||
expect(result.exitCode).toBe(0);
|
||||
|
||||
const dumped = await readFile(outFile, 'utf-8');
|
||||
expect(dumped).toContain('name=env-test');
|
||||
expect(dumped).toContain('version=1.2.3');
|
||||
expect(dumped).toContain('dir=' + installDir);
|
||||
expect(dumped).toContain('project=demo');
|
||||
});
|
||||
|
||||
it('chmods 0644 scripts to executable before spawn', async () => {
|
||||
const installDir = join(tmp, 'skill');
|
||||
await mkdir(installDir, { recursive: true });
|
||||
const scriptPath = join(installDir, 'install.sh');
|
||||
await writeFile(scriptPath, '#!/bin/sh\nexit 0\n');
|
||||
await chmod(scriptPath, 0o644); // not executable
|
||||
|
||||
const result = await runPostInstall({
|
||||
installDir,
|
||||
scriptPath: 'install.sh',
|
||||
skillName: 't',
|
||||
semver: '0.1.0',
|
||||
logsDir: join(tmp, 'logs'),
|
||||
});
|
||||
|
||||
expect(result.exitCode).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('runPostInstall — failure paths', () => {
|
||||
it('captures non-zero exit code and returns it', async () => {
|
||||
const installDir = join(tmp, 'skill');
|
||||
await mkdir(installDir, { recursive: true });
|
||||
const scriptPath = join(installDir, 'fail.sh');
|
||||
await writeFile(scriptPath, '#!/bin/sh\necho oops 1>&2\nexit 7\n');
|
||||
await chmod(scriptPath, 0o755);
|
||||
|
||||
const result = await runPostInstall({
|
||||
installDir,
|
||||
scriptPath: 'fail.sh',
|
||||
skillName: 't',
|
||||
semver: '0.1.0',
|
||||
logsDir: join(tmp, 'logs'),
|
||||
});
|
||||
|
||||
expect(result.exitCode).toBe(7);
|
||||
expect(result.timedOut).toBe(false);
|
||||
expect(result.stderrTail).toContain('oops');
|
||||
});
|
||||
|
||||
it('honors timeoutSec — kills via SIGTERM and reports timedOut=true', async () => {
|
||||
const installDir = join(tmp, 'skill');
|
||||
await mkdir(installDir, { recursive: true });
|
||||
const scriptPath = join(installDir, 'hang.sh');
|
||||
// `exec` so SIGTERM hits sleep directly — without it /bin/sh
|
||||
// catches the signal but the orphaned sleep keeps the streams
|
||||
// open until SIGKILL; the test then has to wait for the 2s grace
|
||||
// window before we force-kill, which is fine but flakier.
|
||||
await writeFile(scriptPath, '#!/bin/sh\nexec sleep 30\n');
|
||||
await chmod(scriptPath, 0o755);
|
||||
|
||||
const start = Date.now();
|
||||
const result = await runPostInstall({
|
||||
installDir,
|
||||
scriptPath: 'hang.sh',
|
||||
skillName: 't',
|
||||
semver: '0.1.0',
|
||||
timeoutSec: 1,
|
||||
logsDir: join(tmp, 'logs'),
|
||||
});
|
||||
const elapsed = Date.now() - start;
|
||||
|
||||
expect(result.timedOut).toBe(true);
|
||||
// 1s timeout + up to 2s grace before SIGKILL.
|
||||
expect(elapsed).toBeLessThan(5000);
|
||||
expect(elapsed).toBeGreaterThanOrEqual(1000);
|
||||
}, 15_000);
|
||||
|
||||
it('rejects path-escape attempts', async () => {
|
||||
const installDir = join(tmp, 'skill');
|
||||
await mkdir(installDir, { recursive: true });
|
||||
|
||||
await expect(runPostInstall({
|
||||
installDir,
|
||||
scriptPath: '../escape.sh',
|
||||
skillName: 't',
|
||||
semver: '0.1.0',
|
||||
logsDir: join(tmp, 'logs'),
|
||||
})).rejects.toThrow(/escapes skill dir/);
|
||||
});
|
||||
|
||||
it('throws when the script does not exist', async () => {
|
||||
const installDir = join(tmp, 'skill');
|
||||
await mkdir(installDir, { recursive: true });
|
||||
|
||||
await expect(runPostInstall({
|
||||
installDir,
|
||||
scriptPath: 'missing.sh',
|
||||
skillName: 't',
|
||||
semver: '0.1.0',
|
||||
logsDir: join(tmp, 'logs'),
|
||||
})).rejects.toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
describe('runPostInstall — install log', () => {
|
||||
it('writes stdout + stderr + exit summary to logsDir/install.log', async () => {
|
||||
const installDir = join(tmp, 'skill');
|
||||
await mkdir(installDir, { recursive: true });
|
||||
const scriptPath = join(installDir, 'install.sh');
|
||||
await writeFile(scriptPath, '#!/bin/sh\necho hello\nexit 0\n');
|
||||
await chmod(scriptPath, 0o755);
|
||||
|
||||
const logsDir = join(tmp, 'logs');
|
||||
await runPostInstall({
|
||||
installDir,
|
||||
scriptPath: 'install.sh',
|
||||
skillName: 'log-test',
|
||||
semver: '0.1.0',
|
||||
logsDir,
|
||||
});
|
||||
|
||||
const log = await readFile(join(logsDir, 'install.log'), 'utf-8');
|
||||
expect(log).toContain('log-test@0.1.0');
|
||||
expect(log).toContain('hello');
|
||||
expect(log).toContain('exit 0');
|
||||
});
|
||||
|
||||
it('appends across runs without losing prior history', async () => {
|
||||
const installDir = join(tmp, 'skill');
|
||||
await mkdir(installDir, { recursive: true });
|
||||
const scriptPath = join(installDir, 'install.sh');
|
||||
await writeFile(scriptPath, '#!/bin/sh\necho run\nexit 0\n');
|
||||
await chmod(scriptPath, 0o755);
|
||||
|
||||
const logsDir = join(tmp, 'logs');
|
||||
const input = {
|
||||
installDir,
|
||||
scriptPath: 'install.sh',
|
||||
skillName: 't',
|
||||
semver: '0.1.0',
|
||||
logsDir,
|
||||
};
|
||||
await runPostInstall(input);
|
||||
await runPostInstall(input);
|
||||
|
||||
const log = await readFile(join(logsDir, 'install.log'), 'utf-8');
|
||||
// Two run headers separated by `===`.
|
||||
const headers = (log.match(/=== /g) ?? []).length;
|
||||
expect(headers).toBeGreaterThanOrEqual(2);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -129,10 +129,18 @@ export class HealthProbeRunner {
|
||||
result = await this.probeLiveness(server, timeoutMs);
|
||||
} else {
|
||||
const readinessCheck = healthCheck as HealthCheckSpec & { tool: string };
|
||||
if (server.transport === 'SSE' || server.transport === 'STREAMABLE_HTTP') {
|
||||
result = await this.probeHttp(instance, server, readinessCheck, timeoutMs);
|
||||
if (server.transport === 'STDIO') {
|
||||
// Route STDIO readiness through the proxy so probes hit the live
|
||||
// running container rather than spawning a fresh process inside
|
||||
// it. The legacy `probeStdio` (docker-exec a synthetic Node script
|
||||
// that re-spawns the package binary) only worked for
|
||||
// packageName-based servers — image-based STDIO servers (gitea,
|
||||
// docmost) returned a fake-unhealthy "No packageName or command"
|
||||
// before they even tried the tool. Going through mcpProxyService
|
||||
// also means readiness failures match production failures exactly.
|
||||
result = await this.probeReadinessViaProxy(server, readinessCheck, timeoutMs);
|
||||
} else {
|
||||
result = await this.probeStdio(instance, server, readinessCheck, timeoutMs);
|
||||
result = await this.probeHttp(instance, server, readinessCheck, timeoutMs);
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
@@ -188,6 +196,71 @@ export class HealthProbeRunner {
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Readiness probe via McpProxyService — sends `tools/call` against the
|
||||
* configured probe tool through the live running instance. Used by
|
||||
* STDIO servers; HTTP/SSE servers go through the bespoke `probeHttp`
|
||||
* paths that connect directly to the container's IP+port (those work
|
||||
* fine and are kept as-is to minimise the diff in this PR).
|
||||
*
|
||||
* If the tool returns a JSON-RPC `error` (e.g. gitea-mcp-server's
|
||||
* "token is required" when GITEA_ACCESS_TOKEN didn't resolve), we mark
|
||||
* the instance unhealthy with the upstream error message. That's how
|
||||
* we catch broken-by-empty-secret cases that liveness (`tools/list`)
|
||||
* would otherwise pass.
|
||||
*/
|
||||
private async probeReadinessViaProxy(
|
||||
server: McpServer,
|
||||
healthCheck: HealthCheckSpec & { tool: string },
|
||||
timeoutMs: number,
|
||||
): Promise<ProbeResult> {
|
||||
const start = Date.now();
|
||||
if (!this.mcpProxyService) {
|
||||
return { healthy: false, latencyMs: 0, message: 'mcpProxyService not wired — cannot run readiness probe' };
|
||||
}
|
||||
|
||||
const deadline = new Promise<ProbeResult>((resolve) => {
|
||||
setTimeout(() => resolve({
|
||||
healthy: false,
|
||||
latencyMs: timeoutMs,
|
||||
message: `Readiness probe timed out after ${timeoutMs}ms`,
|
||||
}), timeoutMs);
|
||||
});
|
||||
|
||||
const probe = this.mcpProxyService
|
||||
.execute({
|
||||
serverId: server.id,
|
||||
method: 'tools/call',
|
||||
params: { name: healthCheck.tool, arguments: healthCheck.arguments ?? {} },
|
||||
})
|
||||
.then((response): ProbeResult => {
|
||||
const latencyMs = Date.now() - start;
|
||||
if (response.error) {
|
||||
return {
|
||||
healthy: false,
|
||||
latencyMs,
|
||||
message: response.error.message ?? `tools/call ${healthCheck.tool} returned error`,
|
||||
};
|
||||
}
|
||||
// Some servers report tool-level failures inside the result body
|
||||
// (`{ isError: true, content: [...] }`) rather than as JSON-RPC
|
||||
// errors. Treat that as unhealthy too.
|
||||
const result = response.result as { isError?: boolean; content?: Array<{ text?: string }> } | undefined;
|
||||
if (result?.isError) {
|
||||
const text = result.content?.[0]?.text ?? `${healthCheck.tool} returned isError`;
|
||||
return { healthy: false, latencyMs, message: text };
|
||||
}
|
||||
return { healthy: true, latencyMs, message: 'ok' };
|
||||
})
|
||||
.catch((err: unknown): ProbeResult => ({
|
||||
healthy: false,
|
||||
latencyMs: Date.now() - start,
|
||||
message: err instanceof Error ? err.message : String(err),
|
||||
}));
|
||||
|
||||
return Promise.race([probe, deadline]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Liveness probe — sends tools/list via McpProxyService so the probe traverses
|
||||
* the exact code path production clients use. Works uniformly across every
|
||||
@@ -463,122 +536,14 @@ export class HealthProbeRunner {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Probe a STDIO MCP server by running `docker exec` with a disposable Node.js
|
||||
* script that pipes JSON-RPC messages into the package binary.
|
||||
*/
|
||||
private async probeStdio(
|
||||
instance: McpInstance,
|
||||
server: McpServer,
|
||||
healthCheck: HealthCheckSpec & { tool: string },
|
||||
timeoutMs: number,
|
||||
): Promise<ProbeResult> {
|
||||
if (!instance.containerId) {
|
||||
return { healthy: false, latencyMs: 0, message: 'No container ID' };
|
||||
}
|
||||
|
||||
const start = Date.now();
|
||||
const packageName = server.packageName as string | null;
|
||||
const command = server.command as string[] | null;
|
||||
|
||||
// Determine how to spawn the MCP server inside the container
|
||||
let spawnCmd: string[];
|
||||
if (packageName) {
|
||||
spawnCmd = ['npx', '--prefer-offline', '-y', packageName];
|
||||
} else if (command && command.length > 0) {
|
||||
spawnCmd = command;
|
||||
} else {
|
||||
return { healthy: false, latencyMs: 0, message: 'No packageName or command for STDIO server' };
|
||||
}
|
||||
|
||||
// Build JSON-RPC messages for the health probe
|
||||
const initMsg = JSON.stringify({
|
||||
jsonrpc: '2.0', id: 1, method: 'initialize',
|
||||
params: {
|
||||
protocolVersion: '2024-11-05',
|
||||
capabilities: {},
|
||||
clientInfo: { name: 'mcpctl-health', version: '0.0.1' },
|
||||
},
|
||||
});
|
||||
const initializedMsg = JSON.stringify({
|
||||
jsonrpc: '2.0', method: 'notifications/initialized',
|
||||
});
|
||||
const toolCallMsg = JSON.stringify({
|
||||
jsonrpc: '2.0', id: 2, method: 'tools/call',
|
||||
params: { name: healthCheck.tool, arguments: healthCheck.arguments ?? {} },
|
||||
});
|
||||
|
||||
// Use a Node.js inline script that:
|
||||
// 1. Spawns the MCP server binary
|
||||
// 2. Sends initialize + initialized + tool call via stdin
|
||||
// 3. Reads responses from stdout
|
||||
// 4. Exits with 0 if tool call succeeds, 1 if it fails
|
||||
const spawnArgs = JSON.stringify(spawnCmd);
|
||||
const probeScript = `
|
||||
const { spawn } = require('child_process');
|
||||
const args = ${spawnArgs};
|
||||
const proc = spawn(args[0], args.slice(1), { stdio: ['pipe', 'pipe', 'pipe'] });
|
||||
let output = '';
|
||||
let responded = false;
|
||||
proc.stdout.on('data', d => {
|
||||
output += d;
|
||||
const lines = output.split('\\n');
|
||||
for (const line of lines) {
|
||||
if (!line.trim()) continue;
|
||||
try {
|
||||
const msg = JSON.parse(line);
|
||||
if (msg.id === 2) {
|
||||
responded = true;
|
||||
if (msg.error) {
|
||||
process.stdout.write('ERROR:' + (msg.error.message || 'unknown'));
|
||||
proc.kill();
|
||||
process.exit(1);
|
||||
} else {
|
||||
process.stdout.write('OK');
|
||||
proc.kill();
|
||||
process.exit(0);
|
||||
}
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
output = lines[lines.length - 1] || '';
|
||||
});
|
||||
proc.stderr.on('data', () => {});
|
||||
proc.on('error', e => { process.stdout.write('ERROR:' + e.message); process.exit(1); });
|
||||
proc.on('exit', (code) => { if (!responded) { process.stdout.write('ERROR:process exited ' + code); process.exit(1); } });
|
||||
setTimeout(() => { if (!responded) { process.stdout.write('ERROR:timeout'); proc.kill(); process.exit(1); } }, ${timeoutMs - 2000});
|
||||
proc.stdin.write(${JSON.stringify(initMsg)} + '\\n');
|
||||
setTimeout(() => {
|
||||
proc.stdin.write(${JSON.stringify(initializedMsg)} + '\\n');
|
||||
setTimeout(() => {
|
||||
proc.stdin.write(${JSON.stringify(toolCallMsg)} + '\\n');
|
||||
}, 500);
|
||||
}, 500);
|
||||
`.trim();
|
||||
|
||||
try {
|
||||
const result = await this.orchestrator.execInContainer(
|
||||
instance.containerId,
|
||||
['node', '-e', probeScript],
|
||||
{ timeoutMs },
|
||||
);
|
||||
|
||||
const latencyMs = Date.now() - start;
|
||||
|
||||
if (result.exitCode === 0 && result.stdout.includes('OK')) {
|
||||
return { healthy: true, latencyMs, message: 'ok' };
|
||||
}
|
||||
|
||||
// Extract error message
|
||||
const errorMatch = result.stdout.match(/ERROR:(.*)/);
|
||||
const errorMsg = errorMatch?.[1] ?? (result.stderr.trim() || `exit code ${result.exitCode}`);
|
||||
return { healthy: false, latencyMs, message: errorMsg };
|
||||
} catch (err) {
|
||||
return {
|
||||
healthy: false,
|
||||
latencyMs: Date.now() - start,
|
||||
message: err instanceof Error ? err.message : String(err),
|
||||
};
|
||||
}
|
||||
}
|
||||
// Note: a previous `probeStdio` implementation existed here that ran a
|
||||
// disposable Node script inside the container via `docker exec`,
|
||||
// re-spawning the package binary and piping JSON-RPC into it. It only
|
||||
// worked for packageName-based servers (the spawn step required an
|
||||
// npx-compatible package); image-based STDIO servers like
|
||||
// gitea-mcp-server fell through with "No packageName or command" and
|
||||
// were always reported unhealthy for the wrong reason. STDIO readiness
|
||||
// now goes through `probeReadinessViaProxy` which calls the live
|
||||
// running container — same code path as production traffic — and
|
||||
// surfaces the upstream error verbatim.
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import type { McpInstance } from '@prisma/client';
|
||||
import type { McpInstance, McpServer } from '@prisma/client';
|
||||
import type { IMcpInstanceRepository, IMcpServerRepository } from '../repositories/interfaces.js';
|
||||
import type { McpOrchestrator, ContainerSpec, ContainerInfo } from './orchestrator.js';
|
||||
import { NotFoundError } from './mcp-server.service.js';
|
||||
@@ -13,6 +13,36 @@ const RUNNER_IMAGES: Record<string, string> = {
|
||||
/** Network for MCP server containers (matches docker-compose mcp-servers network). */
|
||||
const MCP_SERVERS_NETWORK = process.env['MCPD_MCP_NETWORK'] ?? 'mcp-servers';
|
||||
|
||||
/**
|
||||
* Backoff schedule for instance startup failures (env resolution, container
|
||||
* creation, etc). Mirrors Kubernetes-style escalation: fast retries for
|
||||
* transient hiccups, then a longer pause once it's clear something is
|
||||
* persistently wrong.
|
||||
*
|
||||
* The retry state lives on `McpInstance.metadata` (no schema migration
|
||||
* needed) and is preserved across reconcile cycles by the in-place
|
||||
* `retryInstance` path so attemptCount actually accumulates.
|
||||
*/
|
||||
const FAST_RETRY_MS = 30_000; // first 5 attempts: 30s apart
|
||||
const SLOW_RETRY_MS = 5 * 60_000; // afterwards: 5 minutes
|
||||
const MAX_FAST_RETRIES = 5;
|
||||
|
||||
interface RetryMetadata {
|
||||
error?: string;
|
||||
attemptCount?: number;
|
||||
lastAttemptAt?: string;
|
||||
nextRetryAt?: string;
|
||||
[k: string]: unknown;
|
||||
}
|
||||
|
||||
function readRetryMeta(instance: McpInstance): RetryMetadata {
|
||||
return (instance.metadata ?? {}) as RetryMetadata;
|
||||
}
|
||||
|
||||
function nextDelayMs(attemptCount: number): number {
|
||||
return attemptCount <= MAX_FAST_RETRIES ? FAST_RETRY_MS : SLOW_RETRY_MS;
|
||||
}
|
||||
|
||||
export class InvalidStateError extends Error {
|
||||
readonly statusCode = 409;
|
||||
constructor(message: string) {
|
||||
@@ -118,8 +148,12 @@ export class InstanceService {
|
||||
* Reconcile ALL servers — the operator loop.
|
||||
*
|
||||
* For every server with replicas > 0, ensures the correct number of
|
||||
* healthy instances exist. Cleans up ERROR instances and starts
|
||||
* replacements. This is the core self-healing mechanism.
|
||||
* healthy instances exist. ERROR instances are not blindly recreated:
|
||||
* within their `nextRetryAt` window they're left alone (and counted
|
||||
* against the replica budget so we don't churn replacements while one
|
||||
* is in backoff); past their window they're retried in-place via
|
||||
* `retryInstance` so attemptCount accumulates and backoff escalates
|
||||
* correctly.
|
||||
*/
|
||||
async reconcileAll(): Promise<{ reconciled: number; errors: string[] }> {
|
||||
await this.syncStatus();
|
||||
@@ -128,6 +162,8 @@ export class InstanceService {
|
||||
let reconciled = 0;
|
||||
const errors: string[] = [];
|
||||
|
||||
const now = Date.now();
|
||||
|
||||
for (const server of servers) {
|
||||
if (server.replicas <= 0) continue;
|
||||
|
||||
@@ -136,17 +172,38 @@ export class InstanceService {
|
||||
const active = instances.filter((i) => i.status === 'RUNNING' || i.status === 'STARTING');
|
||||
const errored = instances.filter((i) => i.status === 'ERROR');
|
||||
|
||||
// Clean up ERROR instances so they don't accumulate
|
||||
// Partition ERROR instances by whether their backoff window has elapsed.
|
||||
const dueForRetry: McpInstance[] = [];
|
||||
const stillWaiting: McpInstance[] = [];
|
||||
for (const inst of errored) {
|
||||
await this.removeOne(inst);
|
||||
const meta = readRetryMeta(inst);
|
||||
const ts = meta.nextRetryAt ? Date.parse(meta.nextRetryAt) : 0;
|
||||
if (Number.isNaN(ts) || ts <= now) {
|
||||
dueForRetry.push(inst);
|
||||
} else {
|
||||
stillWaiting.push(inst);
|
||||
}
|
||||
}
|
||||
|
||||
// Scale up if needed
|
||||
const toStart = server.replicas - active.length;
|
||||
// Retry elapsed ones in-place. This preserves attemptCount across
|
||||
// attempts so the 30s × 5 → 5min schedule actually escalates.
|
||||
for (const inst of dueForRetry) {
|
||||
await this.retryInstance(inst);
|
||||
}
|
||||
|
||||
// Scale up only if we don't already have enough live attempts.
|
||||
// Live attempts = currently-running OR -starting + still-waiting
|
||||
// (in backoff) + just-retried (now STARTING via retryInstance).
|
||||
// Counting waiting + retried against the budget prevents tight
|
||||
// create-fail-create churn while previous attempts work through
|
||||
// their backoff schedule.
|
||||
const toStart = server.replicas - active.length - stillWaiting.length - dueForRetry.length;
|
||||
if (toStart > 0) {
|
||||
for (let i = 0; i < toStart; i++) {
|
||||
await this.startOne(server.id);
|
||||
}
|
||||
}
|
||||
if (toStart > 0 || dueForRetry.length > 0) {
|
||||
reconciled++;
|
||||
}
|
||||
} catch (err) {
|
||||
@@ -220,7 +277,12 @@ export class InstanceService {
|
||||
return this.orchestrator.getContainerLogs(instance.containerId, opts);
|
||||
}
|
||||
|
||||
/** Start a single instance for a server. */
|
||||
/**
|
||||
* Start a single instance for a server. Creates a fresh `STARTING` row
|
||||
* and hands off to `attemptStart` for the env+container work. On
|
||||
* failure, `attemptStart` marks the row `ERROR` with a backoff-aware
|
||||
* `nextRetryAt`; the reconciler picks it up later via `retryInstance`.
|
||||
*/
|
||||
private async startOne(serverId: string): Promise<McpInstance> {
|
||||
const server = await this.serverRepo.findById(serverId);
|
||||
if (!server) throw new NotFoundError(`McpServer '${serverId}' not found`);
|
||||
@@ -234,6 +296,49 @@ export class InstanceService {
|
||||
});
|
||||
}
|
||||
|
||||
const instance = await this.instanceRepo.create({
|
||||
serverId,
|
||||
status: 'STARTING',
|
||||
});
|
||||
return this.attemptStart(instance, server);
|
||||
}
|
||||
|
||||
/**
|
||||
* Re-attempt a previously-errored instance in place, preserving its
|
||||
* `attemptCount` so the backoff schedule escalates correctly. Called
|
||||
* by `reconcileAll` for ERROR instances whose `nextRetryAt` has elapsed.
|
||||
*/
|
||||
private async retryInstance(instance: McpInstance): Promise<McpInstance> {
|
||||
const server = await this.serverRepo.findById(instance.serverId);
|
||||
if (!server) {
|
||||
// Server was deleted underneath us — nothing to retry against.
|
||||
return this.markInstanceError(instance, 'Server no longer exists');
|
||||
}
|
||||
|
||||
if (server.externalUrl) {
|
||||
// External servers don't need a container; the URL is the contract.
|
||||
return this.instanceRepo.updateStatus(instance.id, 'RUNNING', {
|
||||
metadata: { external: true, url: server.externalUrl },
|
||||
});
|
||||
}
|
||||
|
||||
// Reset transient fields but keep retry counters via the metadata
|
||||
// passed through `attemptStart` → `markInstanceError`.
|
||||
await this.instanceRepo.updateStatus(instance.id, 'STARTING', {});
|
||||
const refreshed = (await this.instanceRepo.findById(instance.id)) ?? instance;
|
||||
return this.attemptStart(refreshed, server);
|
||||
}
|
||||
|
||||
/**
|
||||
* Run the env-resolution + container-creation steps for a STARTING
|
||||
* instance. On any failure, mark the instance `ERROR` with structured
|
||||
* retry metadata. Used by both initial start (`startOne`) and retry
|
||||
* (`retryInstance`).
|
||||
*/
|
||||
private async attemptStart(
|
||||
instance: McpInstance,
|
||||
server: McpServer,
|
||||
): Promise<McpInstance> {
|
||||
// Determine image + command based on server config:
|
||||
// 1. Explicit dockerImage → use as-is
|
||||
// 2. packageName → use runtime-specific runner image (node/python/go/...)
|
||||
@@ -253,11 +358,6 @@ export class InstanceService {
|
||||
image = server.name;
|
||||
}
|
||||
|
||||
let instance = await this.instanceRepo.create({
|
||||
serverId,
|
||||
status: 'STARTING',
|
||||
});
|
||||
|
||||
try {
|
||||
const spec: ContainerSpec = {
|
||||
image,
|
||||
@@ -265,7 +365,7 @@ export class InstanceService {
|
||||
hostPort: null,
|
||||
network: MCP_SERVERS_NETWORK,
|
||||
labels: {
|
||||
'mcpctl.server-id': serverId,
|
||||
'mcpctl.server-id': server.id,
|
||||
'mcpctl.instance-id': instance.id,
|
||||
},
|
||||
};
|
||||
@@ -283,7 +383,17 @@ export class InstanceService {
|
||||
}
|
||||
}
|
||||
|
||||
// Resolve env vars from inline values and secret refs
|
||||
// Resolve env vars from inline values and secret refs.
|
||||
//
|
||||
// Failure here is FATAL for the start attempt: a container that
|
||||
// boots without its declared secrets will silently mis-behave (we
|
||||
// saw this with gitea-mcp-server starting up with an empty
|
||||
// GITEA_ACCESS_TOKEN when OpenBao was unreachable, then reporting
|
||||
// "healthy" while every authed call failed). Marking the instance
|
||||
// ERROR with a backoff-aware nextRetryAt is honest; the reconciler
|
||||
// will retry it in-place on the next tick whose nextRetryAt has
|
||||
// elapsed. Optional/missing env vars should be modeled as `value: ""`
|
||||
// entries on the server, not as silent secret-resolution failures.
|
||||
if (this.secretResolver) {
|
||||
try {
|
||||
const resolvedEnv = await resolveServerEnv(server, this.secretResolver);
|
||||
@@ -291,8 +401,8 @@ export class InstanceService {
|
||||
spec.env = resolvedEnv;
|
||||
}
|
||||
} catch (envErr) {
|
||||
// Log but don't prevent startup — env resolution failures are non-fatal
|
||||
// The container may still work if env vars are optional
|
||||
const msg = envErr instanceof Error ? envErr.message : String(envErr);
|
||||
return this.markInstanceError(instance, `secret resolution failed: ${msg}`);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -313,14 +423,39 @@ export class InstanceService {
|
||||
}
|
||||
|
||||
// Set STARTING — syncStatus will promote to RUNNING once the container is actually ready
|
||||
instance = await this.instanceRepo.updateStatus(instance.id, 'STARTING', updateFields);
|
||||
return this.instanceRepo.updateStatus(instance.id, 'STARTING', updateFields);
|
||||
} catch (err) {
|
||||
instance = await this.instanceRepo.updateStatus(instance.id, 'ERROR', {
|
||||
metadata: { error: err instanceof Error ? err.message : String(err) },
|
||||
});
|
||||
return this.markInstanceError(
|
||||
instance,
|
||||
err instanceof Error ? err.message : String(err),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return instance;
|
||||
/**
|
||||
* Mark an instance ERROR with a backoff-aware retry schedule. The
|
||||
* `attemptCount` accumulates across retries (preserved by
|
||||
* `retryInstance` which reuses the same row), so the schedule
|
||||
* actually escalates: 30s × 5 → 5min thereafter.
|
||||
*/
|
||||
private async markInstanceError(
|
||||
instance: McpInstance,
|
||||
error: string,
|
||||
): Promise<McpInstance> {
|
||||
const meta = readRetryMeta(instance);
|
||||
const attemptCount = (typeof meta.attemptCount === 'number' ? meta.attemptCount : 0) + 1;
|
||||
const delayMs = nextDelayMs(attemptCount);
|
||||
const now = new Date();
|
||||
const nextRetryAt = new Date(now.getTime() + delayMs).toISOString();
|
||||
return this.instanceRepo.updateStatus(instance.id, 'ERROR', {
|
||||
metadata: {
|
||||
...meta,
|
||||
error,
|
||||
attemptCount,
|
||||
lastAttemptAt: now.toISOString(),
|
||||
nextRetryAt,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
/** Stop and remove a single instance. */
|
||||
|
||||
@@ -334,20 +334,93 @@ describe('InstanceService', () => {
|
||||
expect(instanceRepo.create).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('cleans up ERROR instances and creates replacements', async () => {
|
||||
it('retries ERROR instances in-place when their backoff has elapsed (no delete, no new row)', async () => {
|
||||
const server = makeServer({ id: 'srv-1', replicas: 1 });
|
||||
vi.mocked(serverRepo.findAll).mockResolvedValue([server]);
|
||||
vi.mocked(serverRepo.findById).mockResolvedValue(server);
|
||||
// ERROR instance with no nextRetryAt → retry is due immediately.
|
||||
vi.mocked(instanceRepo.findAll).mockResolvedValue([
|
||||
makeInstance({ id: 'inst-dead', serverId: 'srv-1', status: 'ERROR', containerId: 'ctr-dead' }),
|
||||
]);
|
||||
|
||||
const result = await service.reconcileAll();
|
||||
|
||||
// Should delete ERROR instance and create a new one
|
||||
// Retry-in-place semantics: don't delete the row, don't create a
|
||||
// replacement. attemptCount needs to live on the same row so the
|
||||
// backoff schedule can actually escalate.
|
||||
expect(instanceRepo.delete).not.toHaveBeenCalled();
|
||||
expect(instanceRepo.create).not.toHaveBeenCalled();
|
||||
// retryInstance flips the row STARTING before attemptStart runs.
|
||||
expect(instanceRepo.updateStatus).toHaveBeenCalledWith('inst-dead', 'STARTING', expect.anything());
|
||||
expect(result.reconciled).toBe(1);
|
||||
expect(instanceRepo.delete).toHaveBeenCalledWith('inst-dead');
|
||||
expect(instanceRepo.create).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('leaves ERROR instances alone while their nextRetryAt is in the future', async () => {
|
||||
const server = makeServer({ id: 'srv-1', replicas: 1 });
|
||||
vi.mocked(serverRepo.findAll).mockResolvedValue([server]);
|
||||
vi.mocked(serverRepo.findById).mockResolvedValue(server);
|
||||
const futureRetry = new Date(Date.now() + 60_000).toISOString();
|
||||
vi.mocked(instanceRepo.findAll).mockResolvedValue([
|
||||
makeInstance({
|
||||
id: 'inst-waiting',
|
||||
serverId: 'srv-1',
|
||||
status: 'ERROR',
|
||||
metadata: { nextRetryAt: futureRetry, attemptCount: 2 },
|
||||
}),
|
||||
]);
|
||||
|
||||
const result = await service.reconcileAll();
|
||||
|
||||
// Within the backoff window the reconciler must not delete the row,
|
||||
// not retry it, and not spawn a replacement (counting it against
|
||||
// the replica budget is what prevents tight create-fail-create churn).
|
||||
expect(instanceRepo.delete).not.toHaveBeenCalled();
|
||||
expect(instanceRepo.create).not.toHaveBeenCalled();
|
||||
expect(orchestrator.createContainer).not.toHaveBeenCalled();
|
||||
expect(result.reconciled).toBe(0);
|
||||
});
|
||||
|
||||
it('escalates the backoff: attemptCount + nextRetryAt persist on retry failures', async () => {
|
||||
const server = makeServer({ id: 'srv-1', replicas: 1 });
|
||||
vi.mocked(serverRepo.findAll).mockResolvedValue([server]);
|
||||
vi.mocked(serverRepo.findById).mockResolvedValue(server);
|
||||
|
||||
// Fail container creation so attemptStart goes down the markInstanceError path.
|
||||
vi.mocked(orchestrator.createContainer).mockRejectedValue(new Error('boom'));
|
||||
|
||||
// Existing ERROR instance with attemptCount=2 (so the next failure
|
||||
// produces attemptCount=3, still inside the fast-retry window).
|
||||
vi.mocked(instanceRepo.findAll).mockResolvedValue([
|
||||
makeInstance({
|
||||
id: 'inst-1',
|
||||
serverId: 'srv-1',
|
||||
status: 'ERROR',
|
||||
metadata: { error: 'previous failure', attemptCount: 2, nextRetryAt: new Date(Date.now() - 1000).toISOString() },
|
||||
}),
|
||||
]);
|
||||
// retryInstance refreshes via findById; let it return the same row.
|
||||
vi.mocked(instanceRepo.findById).mockImplementation(async () => makeInstance({
|
||||
id: 'inst-1',
|
||||
serverId: 'srv-1',
|
||||
status: 'STARTING',
|
||||
metadata: { error: 'previous failure', attemptCount: 2, nextRetryAt: new Date(Date.now() - 1000).toISOString() },
|
||||
}));
|
||||
|
||||
await service.reconcileAll();
|
||||
|
||||
// Look at the last updateStatus call — it should be the ERROR transition
|
||||
// with attemptCount bumped to 3.
|
||||
const errorCalls = vi.mocked(instanceRepo.updateStatus).mock.calls.filter(
|
||||
(c) => c[1] === 'ERROR',
|
||||
);
|
||||
expect(errorCalls.length).toBeGreaterThan(0);
|
||||
const lastErrorCall = errorCalls[errorCalls.length - 1]!;
|
||||
const meta = (lastErrorCall[2] as { metadata?: Record<string, unknown> } | undefined)?.metadata;
|
||||
expect(meta).toBeDefined();
|
||||
expect((meta as Record<string, unknown>)['attemptCount']).toBe(3);
|
||||
expect((meta as Record<string, unknown>)['nextRetryAt']).toBeTypeOf('string');
|
||||
// Reason should reference the boom we threw.
|
||||
expect(String((meta as Record<string, unknown>)['error'])).toContain('boom');
|
||||
});
|
||||
|
||||
it('reconciles multiple servers independently', async () => {
|
||||
|
||||
@@ -192,25 +192,28 @@ describe('HealthProbeRunner', () => {
|
||||
expect(serverRepo.findById).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('probes STDIO instance with exec and marks healthy on success', async () => {
|
||||
it('probes STDIO instance via mcpProxyService and marks healthy on success', async () => {
|
||||
const instance = makeInstance();
|
||||
const server = makeServer();
|
||||
|
||||
vi.mocked(instanceRepo.findAll).mockResolvedValue([instance]);
|
||||
vi.mocked(serverRepo.findById).mockResolvedValue(server);
|
||||
vi.mocked(orchestrator.execInContainer).mockResolvedValue({
|
||||
exitCode: 0,
|
||||
stdout: 'OK',
|
||||
stderr: '',
|
||||
vi.mocked(mcpProxyService.execute).mockResolvedValue({
|
||||
jsonrpc: '2.0', id: 1,
|
||||
result: { content: [{ type: 'text', text: 'ok' }] },
|
||||
});
|
||||
|
||||
await runner.tick();
|
||||
|
||||
expect(orchestrator.execInContainer).toHaveBeenCalledWith(
|
||||
'container-abc',
|
||||
expect.arrayContaining(['node', '-e']),
|
||||
expect.objectContaining({ timeoutMs: 10000 }),
|
||||
);
|
||||
// STDIO readiness now goes through the proxy (the live container),
|
||||
// not via docker-exec into a synthetic spawn — see comment on
|
||||
// probeReadinessViaProxy for why.
|
||||
expect(orchestrator.execInContainer).not.toHaveBeenCalled();
|
||||
expect(mcpProxyService.execute).toHaveBeenCalledWith({
|
||||
serverId: 'srv-1',
|
||||
method: 'tools/call',
|
||||
params: { name: 'list_datasources', arguments: {} },
|
||||
});
|
||||
|
||||
expect(instanceRepo.updateStatus).toHaveBeenCalledWith(
|
||||
'inst-1',
|
||||
@@ -225,6 +228,57 @@ describe('HealthProbeRunner', () => {
|
||||
);
|
||||
});
|
||||
|
||||
it('marks unhealthy when proxy returns a JSON-RPC error (e.g. broken-secret auth failure)', async () => {
|
||||
const instance = makeInstance();
|
||||
const server = makeServer({
|
||||
healthCheck: { tool: 'get_me', intervalSeconds: 0, failureThreshold: 1 } as McpServer['healthCheck'],
|
||||
});
|
||||
|
||||
vi.mocked(instanceRepo.findAll).mockResolvedValue([instance]);
|
||||
vi.mocked(serverRepo.findById).mockResolvedValue(server);
|
||||
vi.mocked(mcpProxyService.execute).mockResolvedValue({
|
||||
jsonrpc: '2.0', id: 1,
|
||||
error: { code: -32603, message: 'token is required' },
|
||||
});
|
||||
|
||||
await runner.tick();
|
||||
|
||||
expect(instanceRepo.updateStatus).toHaveBeenCalledWith(
|
||||
'inst-1',
|
||||
'RUNNING',
|
||||
expect.objectContaining({
|
||||
healthStatus: 'unhealthy',
|
||||
events: expect.arrayContaining([
|
||||
expect.objectContaining({ type: 'Warning', message: expect.stringContaining('token is required') }),
|
||||
]),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('marks unhealthy when proxy returns a tool-level error in result.isError', async () => {
|
||||
const instance = makeInstance();
|
||||
const server = makeServer({
|
||||
healthCheck: { tool: 'get_me', intervalSeconds: 0, failureThreshold: 1 } as McpServer['healthCheck'],
|
||||
});
|
||||
|
||||
vi.mocked(instanceRepo.findAll).mockResolvedValue([instance]);
|
||||
vi.mocked(serverRepo.findById).mockResolvedValue(server);
|
||||
vi.mocked(mcpProxyService.execute).mockResolvedValue({
|
||||
jsonrpc: '2.0', id: 1,
|
||||
result: { isError: true, content: [{ type: 'text', text: 'auth failed: token is required' }] },
|
||||
});
|
||||
|
||||
await runner.tick();
|
||||
|
||||
const events = vi.mocked(instanceRepo.updateStatus).mock.calls[0]?.[2]?.events as Array<{ message: string }> | undefined;
|
||||
expect(events?.[events.length - 1]?.message).toContain('auth failed');
|
||||
expect(instanceRepo.updateStatus).toHaveBeenCalledWith(
|
||||
'inst-1',
|
||||
'RUNNING',
|
||||
expect.objectContaining({ healthStatus: 'unhealthy' }),
|
||||
);
|
||||
});
|
||||
|
||||
it('marks unhealthy after failureThreshold consecutive failures', async () => {
|
||||
const instance = makeInstance();
|
||||
const healthCheck: HealthCheckSpec = {
|
||||
@@ -237,10 +291,9 @@ describe('HealthProbeRunner', () => {
|
||||
|
||||
vi.mocked(instanceRepo.findAll).mockResolvedValue([instance]);
|
||||
vi.mocked(serverRepo.findById).mockResolvedValue(server);
|
||||
vi.mocked(orchestrator.execInContainer).mockResolvedValue({
|
||||
exitCode: 1,
|
||||
stdout: 'ERROR:connection refused',
|
||||
stderr: '',
|
||||
vi.mocked(mcpProxyService.execute).mockResolvedValue({
|
||||
jsonrpc: '2.0', id: 1,
|
||||
error: { code: -32603, message: 'connection refused' },
|
||||
});
|
||||
|
||||
// First failure → degraded
|
||||
@@ -274,15 +327,15 @@ describe('HealthProbeRunner', () => {
|
||||
vi.mocked(serverRepo.findById).mockResolvedValue(server);
|
||||
|
||||
// Two failures
|
||||
vi.mocked(orchestrator.execInContainer).mockResolvedValue({
|
||||
exitCode: 1, stdout: 'ERROR:fail', stderr: '',
|
||||
vi.mocked(mcpProxyService.execute).mockResolvedValue({
|
||||
jsonrpc: '2.0', id: 1, error: { code: -32603, message: 'fail' },
|
||||
});
|
||||
await runner.tick();
|
||||
await runner.tick();
|
||||
|
||||
// Then success — should reset to healthy
|
||||
vi.mocked(orchestrator.execInContainer).mockResolvedValue({
|
||||
exitCode: 0, stdout: 'OK', stderr: '',
|
||||
vi.mocked(mcpProxyService.execute).mockResolvedValue({
|
||||
jsonrpc: '2.0', id: 1, result: {},
|
||||
});
|
||||
await runner.tick();
|
||||
|
||||
@@ -290,13 +343,16 @@ describe('HealthProbeRunner', () => {
|
||||
expect(lastCall?.[2]).toEqual(expect.objectContaining({ healthStatus: 'healthy' }));
|
||||
});
|
||||
|
||||
it('handles exec timeout as failure', async () => {
|
||||
it('handles probe timeout as failure', async () => {
|
||||
const instance = makeInstance();
|
||||
const server = makeServer();
|
||||
const server = makeServer({
|
||||
healthCheck: { tool: 'list_datasources', intervalSeconds: 0, timeoutSeconds: 0.05, failureThreshold: 3 } as unknown as McpServer['healthCheck'],
|
||||
});
|
||||
|
||||
vi.mocked(instanceRepo.findAll).mockResolvedValue([instance]);
|
||||
vi.mocked(serverRepo.findById).mockResolvedValue(server);
|
||||
vi.mocked(orchestrator.execInContainer).mockRejectedValue(new Error('Exec timed out after 10000ms'));
|
||||
// Hang forever — the probe's internal deadline should fire instead.
|
||||
vi.mocked(mcpProxyService.execute).mockImplementation(() => new Promise(() => { /* never resolves */ }));
|
||||
|
||||
await runner.tick();
|
||||
|
||||
@@ -323,8 +379,8 @@ describe('HealthProbeRunner', () => {
|
||||
|
||||
vi.mocked(instanceRepo.findAll).mockResolvedValue([instance]);
|
||||
vi.mocked(serverRepo.findById).mockResolvedValue(server);
|
||||
vi.mocked(orchestrator.execInContainer).mockResolvedValue({
|
||||
exitCode: 0, stdout: 'OK', stderr: '',
|
||||
vi.mocked(mcpProxyService.execute).mockResolvedValue({
|
||||
jsonrpc: '2.0', id: 1, result: {},
|
||||
});
|
||||
|
||||
await runner.tick();
|
||||
@@ -343,17 +399,17 @@ describe('HealthProbeRunner', () => {
|
||||
|
||||
vi.mocked(instanceRepo.findAll).mockResolvedValue([instance]);
|
||||
vi.mocked(serverRepo.findById).mockResolvedValue(server);
|
||||
vi.mocked(orchestrator.execInContainer).mockResolvedValue({
|
||||
exitCode: 0, stdout: 'OK', stderr: '',
|
||||
vi.mocked(mcpProxyService.execute).mockResolvedValue({
|
||||
jsonrpc: '2.0', id: 1, result: {},
|
||||
});
|
||||
|
||||
// First tick: should probe
|
||||
await runner.tick();
|
||||
expect(orchestrator.execInContainer).toHaveBeenCalledTimes(1);
|
||||
expect(mcpProxyService.execute).toHaveBeenCalledTimes(1);
|
||||
|
||||
// Second tick immediately: should skip (300s interval not elapsed)
|
||||
await runner.tick();
|
||||
expect(orchestrator.execInContainer).toHaveBeenCalledTimes(1);
|
||||
expect(mcpProxyService.execute).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('cleans up probe states for removed instances', async () => {
|
||||
@@ -364,9 +420,12 @@ describe('HealthProbeRunner', () => {
|
||||
|
||||
vi.mocked(instanceRepo.findAll).mockResolvedValue([instance]);
|
||||
vi.mocked(serverRepo.findById).mockResolvedValue(server);
|
||||
vi.mocked(mcpProxyService.execute).mockResolvedValue({
|
||||
jsonrpc: '2.0', id: 1, result: {},
|
||||
});
|
||||
|
||||
await runner.tick();
|
||||
expect(orchestrator.execInContainer).toHaveBeenCalledTimes(1);
|
||||
expect(mcpProxyService.execute).toHaveBeenCalledTimes(1);
|
||||
|
||||
// Instance removed
|
||||
vi.mocked(instanceRepo.findAll).mockResolvedValue([]);
|
||||
@@ -375,7 +434,7 @@ describe('HealthProbeRunner', () => {
|
||||
// Re-add same instance — should probe again (state was cleaned)
|
||||
vi.mocked(instanceRepo.findAll).mockResolvedValue([instance]);
|
||||
await runner.tick();
|
||||
expect(orchestrator.execInContainer).toHaveBeenCalledTimes(2);
|
||||
expect(mcpProxyService.execute).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it('skips STDIO instances without containerId', async () => {
|
||||
@@ -397,8 +456,8 @@ describe('HealthProbeRunner', () => {
|
||||
arguments: {},
|
||||
};
|
||||
|
||||
vi.mocked(orchestrator.execInContainer).mockResolvedValue({
|
||||
exitCode: 0, stdout: 'OK', stderr: '',
|
||||
vi.mocked(mcpProxyService.execute).mockResolvedValue({
|
||||
jsonrpc: '2.0', id: 1, result: {},
|
||||
});
|
||||
|
||||
const result = await runner.probeInstance(instance, server, healthCheck);
|
||||
@@ -407,15 +466,14 @@ describe('HealthProbeRunner', () => {
|
||||
expect(result.message).toBe('ok');
|
||||
});
|
||||
|
||||
it('handles STDIO exec failure with error message', async () => {
|
||||
it('surfaces upstream JSON-RPC error message verbatim', async () => {
|
||||
const instance = makeInstance();
|
||||
const server = makeServer();
|
||||
const healthCheck: HealthCheckSpec = { tool: 'list_datasources', arguments: {} };
|
||||
|
||||
vi.mocked(orchestrator.execInContainer).mockResolvedValue({
|
||||
exitCode: 1,
|
||||
stdout: 'ERROR:ECONNREFUSED 10.0.0.1:3000',
|
||||
stderr: '',
|
||||
vi.mocked(mcpProxyService.execute).mockResolvedValue({
|
||||
jsonrpc: '2.0', id: 1,
|
||||
error: { code: -32603, message: 'ECONNREFUSED 10.0.0.1:3000' },
|
||||
});
|
||||
|
||||
const result = await runner.probeInstance(instance, server, healthCheck);
|
||||
|
||||
Reference in New Issue
Block a user