fix(agents): close gaps from /gstack-review
P1 — thread reads now enforce ownership
========================================
chat.service.ts / routes/agent-chat.ts
GET /api/v1/threads/:id/messages was previously RBAC-mapped to
view:agents (no resourceName scope) with the route comment promising
"service-level owner check enforces fine-grained access" — but the
service didn't actually check. Any caller with view:agents could read
another user's thread by guessing/learning the threadId. CUIDs are
hard to brute-force but they leak: SSE `final` chunks, agents-plugin
`_meta.threadId`, and several response bodies surface them. Now
ChatService.listMessages(threadId, ownerId) loads the thread, returns
404 (not 403, to avoid id-enumeration via differential status codes)
if ownerId doesn't match. Regression test in chat-service.test.ts
covers Alice/Bob isolation + nonexistent-thread same-shape 404.
P2 — AgentChatRequestSchema strict mode
========================================
validation/agent.schema.ts
`.merge()` does NOT inherit `.strict()` from AgentChatParamsSchema.
Typo'd fields (e.g. `temprature`) silently fell through and the agent
silently used the default — debuggable only by reading the LLM call
payload. Re-applied `.strict()` on the merged schema.
P2 — per-agent maxIterations override + clamp
==============================================
chat.service.ts
Loop cap was a hard-coded module constant (12), wrong for both
research-style agents (need higher) and cheap-probe agents (could opt
lower). Now reads `agent.extras.maxIterations`, clamps 1..50, falls
back to 12 default. The clamp is the soft-DoS guard: a hostile agent
definition with `maxIterations:1000000` can't burn unbounded LLM calls
per request. Both chat() and chatStream() use ctx.maxIterations now.
Regression test covers low-cap override (rejects with `exceeded 2`)
and hostile-value clamp (rejects with `exceeded 50`).
P3 — SSE write to closed socket
================================
routes/agent-chat.ts
When the upstream adapter throws after some chunks were already
written AND the client disconnected, the catch block tried to flush
more chunks to a closed socket. Without an `on('error')` handler
Node emits unhandled error events; once Pino is wired to alerts
this'd page on every disconnect-mid-stream. writeSseChunk now
checks `reply.raw.destroyed || writableEnded` before write.
P3 — BACKEND_TOKEN_DEAD preserves original stack
=================================================
services/secret-backend-rotator.service.ts
When wrapping mintRoleToken/lookupSelf failures as
BACKEND_TOKEN_DEAD, the new Error() discarded the original throw —
hard to tell whether the inner failure was a network blip vs an
OpenBao API mismatch vs DNS. Now uses `new Error(msg, { cause: err })`
so the inner stack survives.
P3 — .gitignore .claude/scheduled_tasks.lock
=============================================
This persisted state file was leaking into every `git status`.
Tests
=====
mcpd 761/761 (+2 regression tests). mcplocal 715/715. cli 430/430.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -44,3 +44,6 @@ logs.sh
|
|||||||
mcpctl-backup.json
|
mcpctl-backup.json
|
||||||
a.yaml
|
a.yaml
|
||||||
test-mcp.sh
|
test-mcp.sh
|
||||||
|
|
||||||
|
# Claude Code local state
|
||||||
|
.claude/scheduled_tasks.lock
|
||||||
|
|||||||
@@ -126,8 +126,9 @@ export function registerAgentChatRoutes(
|
|||||||
app.get<{ Params: { id: string } }>(
|
app.get<{ Params: { id: string } }>(
|
||||||
'/api/v1/threads/:id/messages',
|
'/api/v1/threads/:id/messages',
|
||||||
async (request, reply) => {
|
async (request, reply) => {
|
||||||
|
const ownerId = request.userId ?? 'system';
|
||||||
try {
|
try {
|
||||||
return await chat.listMessages(request.params.id);
|
return await chat.listMessages(request.params.id, ownerId);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
if (err instanceof NotFoundError) {
|
if (err instanceof NotFoundError) {
|
||||||
reply.code(404);
|
reply.code(404);
|
||||||
@@ -140,5 +141,10 @@ export function registerAgentChatRoutes(
|
|||||||
}
|
}
|
||||||
|
|
||||||
function writeSseChunk(reply: FastifyReply, data: string): void {
|
function writeSseChunk(reply: FastifyReply, data: string): void {
|
||||||
|
// Guard against writing to a destroyed/closed socket — happens when the
|
||||||
|
// client disconnects mid-stream and we still try to flush an error payload
|
||||||
|
// from the catch path. Without the guard, Node emits an unhandled 'error'
|
||||||
|
// on the response and the noise pollutes logs.
|
||||||
|
if (reply.raw.destroyed || reply.raw.writableEnded) return;
|
||||||
reply.raw.write(`data: ${data}\n\n`);
|
reply.raw.write(`data: ${data}\n\n`);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -35,7 +35,27 @@ import type { AgentChatParams } from '../validation/agent.schema.js';
|
|||||||
import { NotFoundError } from './mcp-server.service.js';
|
import { NotFoundError } from './mcp-server.service.js';
|
||||||
|
|
||||||
export const TOOL_NAME_SEPARATOR = '__';
|
export const TOOL_NAME_SEPARATOR = '__';
|
||||||
|
/** Default tool-loop cap. Per-agent override via `agent.extras.maxIterations`, clamped to MIN..MAX. */
|
||||||
export const MAX_ITERATIONS = 12;
|
export const MAX_ITERATIONS = 12;
|
||||||
|
const MIN_ITERATIONS_CAP = 1;
|
||||||
|
const MAX_ITERATIONS_CAP = 50;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Resolve the loop cap for this turn:
|
||||||
|
* agent.extras.maxIterations → clamp(MIN_ITERATIONS_CAP, MAX_ITERATIONS_CAP) →
|
||||||
|
* fallback to MAX_ITERATIONS default.
|
||||||
|
*
|
||||||
|
* The clamp is the soft-DoS guard: a hostile agent definition can't pick a
|
||||||
|
* thousand-iteration cap, even with `create:agents` permission.
|
||||||
|
*/
|
||||||
|
function resolveMaxIterations(extras: Record<string, unknown> | null | undefined): number {
|
||||||
|
const raw = extras?.['maxIterations'];
|
||||||
|
if (typeof raw !== 'number' || !Number.isFinite(raw)) return MAX_ITERATIONS;
|
||||||
|
const truncated = Math.trunc(raw);
|
||||||
|
if (truncated < MIN_ITERATIONS_CAP) return MIN_ITERATIONS_CAP;
|
||||||
|
if (truncated > MAX_ITERATIONS_CAP) return MAX_ITERATIONS_CAP;
|
||||||
|
return truncated;
|
||||||
|
}
|
||||||
|
|
||||||
/** Project-scoped tool surface the chat loop calls into. Stub-friendly. */
|
/** Project-scoped tool surface the chat loop calls into. Stub-friendly. */
|
||||||
export interface ChatTool {
|
export interface ChatTool {
|
||||||
@@ -110,7 +130,16 @@ export class ChatService {
|
|||||||
return rows.map((r) => ({ id: r.id, title: r.title, lastTurnAt: r.lastTurnAt, createdAt: r.createdAt }));
|
return rows.map((r) => ({ id: r.id, title: r.title, lastTurnAt: r.lastTurnAt, createdAt: r.createdAt }));
|
||||||
}
|
}
|
||||||
|
|
||||||
async listMessages(threadId: string): Promise<ChatMessage[]> {
|
async listMessages(threadId: string, ownerId: string): Promise<ChatMessage[]> {
|
||||||
|
// Owner check guards `view:agents` from leaking another user's thread by
|
||||||
|
// ID. Thread IDs are CUIDs (hard to guess) but they leak through SSE
|
||||||
|
// `final` chunks, the agents-plugin tool _meta, and several response
|
||||||
|
// bodies, so id-knowledge is not a security boundary on its own. Return
|
||||||
|
// 404 (not 403) on mismatch to avoid id-enumeration via differential
|
||||||
|
// status codes.
|
||||||
|
const thread = await this.chatRepo.findThread(threadId);
|
||||||
|
if (thread === null) throw new NotFoundError(`Thread not found: ${threadId}`);
|
||||||
|
if (thread.ownerId !== ownerId) throw new NotFoundError(`Thread not found: ${threadId}`);
|
||||||
return this.chatRepo.listMessages(threadId);
|
return this.chatRepo.listMessages(threadId);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -120,7 +149,7 @@ export class ChatService {
|
|||||||
let assistantFinal = '';
|
let assistantFinal = '';
|
||||||
let lastTurnIndex = ctx.startingTurnIndex;
|
let lastTurnIndex = ctx.startingTurnIndex;
|
||||||
try {
|
try {
|
||||||
for (let i = 0; i < MAX_ITERATIONS; i += 1) {
|
for (let i = 0; i < ctx.maxIterations; i += 1) {
|
||||||
const adapter = this.adapters.get(ctx.llmType);
|
const adapter = this.adapters.get(ctx.llmType);
|
||||||
const result = await adapter.infer({
|
const result = await adapter.infer({
|
||||||
body: this.buildBody(ctx),
|
body: this.buildBody(ctx),
|
||||||
@@ -179,7 +208,7 @@ export class ChatService {
|
|||||||
await this.chatRepo.touchThread(ctx.threadId);
|
await this.chatRepo.touchThread(ctx.threadId);
|
||||||
return { threadId: ctx.threadId, assistant: assistantFinal, turnIndex: lastTurnIndex };
|
return { threadId: ctx.threadId, assistant: assistantFinal, turnIndex: lastTurnIndex };
|
||||||
}
|
}
|
||||||
throw new Error(`Chat loop exceeded ${String(MAX_ITERATIONS)} iterations without a terminal turn`);
|
throw new Error(`Chat loop exceeded ${String(ctx.maxIterations)} iterations without a terminal turn`);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
await this.chatRepo.markPendingAsError(ctx.threadId);
|
await this.chatRepo.markPendingAsError(ctx.threadId);
|
||||||
throw err;
|
throw err;
|
||||||
@@ -190,7 +219,7 @@ export class ChatService {
|
|||||||
async *chatStream(args: ChatRequestArgs): AsyncGenerator<ChatStreamChunk> {
|
async *chatStream(args: ChatRequestArgs): AsyncGenerator<ChatStreamChunk> {
|
||||||
const ctx = await this.prepareContext(args);
|
const ctx = await this.prepareContext(args);
|
||||||
try {
|
try {
|
||||||
for (let i = 0; i < MAX_ITERATIONS; i += 1) {
|
for (let i = 0; i < ctx.maxIterations; i += 1) {
|
||||||
const adapter = this.adapters.get(ctx.llmType);
|
const adapter = this.adapters.get(ctx.llmType);
|
||||||
const accumulated: { content: string; toolCalls: Array<{ id: string; name: string; argumentsJson: string }> } = {
|
const accumulated: { content: string; toolCalls: Array<{ id: string; name: string; argumentsJson: string }> } = {
|
||||||
content: '',
|
content: '',
|
||||||
@@ -285,7 +314,7 @@ export class ChatService {
|
|||||||
yield { type: 'final', threadId: ctx.threadId, turnIndex: finalMsg.turnIndex };
|
yield { type: 'final', threadId: ctx.threadId, turnIndex: finalMsg.turnIndex };
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
throw new Error(`Chat loop exceeded ${String(MAX_ITERATIONS)} iterations without a terminal turn`);
|
throw new Error(`Chat loop exceeded ${String(ctx.maxIterations)} iterations without a terminal turn`);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
await this.chatRepo.markPendingAsError(ctx.threadId);
|
await this.chatRepo.markPendingAsError(ctx.threadId);
|
||||||
yield { type: 'error', message: (err as Error).message };
|
yield { type: 'error', message: (err as Error).message };
|
||||||
@@ -306,6 +335,7 @@ export class ChatService {
|
|||||||
toolList: ChatTool[];
|
toolList: ChatTool[];
|
||||||
projectId: string | null;
|
projectId: string | null;
|
||||||
startingTurnIndex: number;
|
startingTurnIndex: number;
|
||||||
|
maxIterations: number;
|
||||||
}> {
|
}> {
|
||||||
const agent = await this.agents.getByName(args.agentName);
|
const agent = await this.agents.getByName(args.agentName);
|
||||||
const llm = await this.llms.getByName(agent.llm.name);
|
const llm = await this.llms.getByName(agent.llm.name);
|
||||||
@@ -370,6 +400,7 @@ export class ChatService {
|
|||||||
toolList: filteredTools,
|
toolList: filteredTools,
|
||||||
projectId,
|
projectId,
|
||||||
startingTurnIndex,
|
startingTurnIndex,
|
||||||
|
maxIterations: resolveMaxIterations(agent.extras),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -136,7 +136,11 @@ export class SecretBackendRotator {
|
|||||||
`This is unrecoverable from inside mcpd — likely cause: OpenBao was re-initialized and all old tokens are invalid. ` +
|
`This is unrecoverable from inside mcpd — likely cause: OpenBao was re-initialized and all old tokens are invalid. ` +
|
||||||
`Remediation: mint a fresh token under role '${cfg.rotation.tokenRole}' using a working OpenBao admin token, ` +
|
`Remediation: mint a fresh token under role '${cfg.rotation.tokenRole}' using a working OpenBao admin token, ` +
|
||||||
`then \`mcpctl create secret ${cfg.tokenSecretRef.name} --data ${cfg.tokenSecretRef.key}=<new-token> --force\`. ` +
|
`then \`mcpctl create secret ${cfg.tokenSecretRef.name} --data ${cfg.tokenSecretRef.key}=<new-token> --force\`. ` +
|
||||||
`Original error: ${msg}`)
|
`Original error: ${msg}`,
|
||||||
|
// Preserve the original stack trace via Error.cause so the inner
|
||||||
|
// failure (network vs OpenBao API vs DNS) is recoverable from the
|
||||||
|
// wrapped throw.
|
||||||
|
{ cause: err })
|
||||||
: err;
|
: err;
|
||||||
const wrappedMsg = wrapped instanceof Error ? wrapped.message : String(wrapped);
|
const wrappedMsg = wrapped instanceof Error ? wrapped.message : String(wrapped);
|
||||||
await this.recordError(backendId, meta, wrappedMsg);
|
await this.recordError(backendId, meta, wrappedMsg);
|
||||||
|
|||||||
@@ -90,6 +90,10 @@ export const UpdateAgentSchema = z.object({
|
|||||||
});
|
});
|
||||||
|
|
||||||
/** Body schema for `POST /api/v1/agents/:name/chat`. */
|
/** Body schema for `POST /api/v1/agents/:name/chat`. */
|
||||||
|
// `.merge()` does NOT inherit `.strict()` from `AgentChatParamsSchema`, so we
|
||||||
|
// re-apply it on the merged schema. Without this, typo'd request fields (e.g.
|
||||||
|
// `temprature` instead of `temperature`) silently fall through and the agent
|
||||||
|
// uses the default — debuggable only by reading the LLM call payload.
|
||||||
export const AgentChatRequestSchema = AgentChatParamsSchema.merge(
|
export const AgentChatRequestSchema = AgentChatParamsSchema.merge(
|
||||||
z.object({
|
z.object({
|
||||||
threadId: z.string().min(1).optional(),
|
threadId: z.string().min(1).optional(),
|
||||||
@@ -105,7 +109,7 @@ export const AgentChatRequestSchema = AgentChatParamsSchema.merge(
|
|||||||
.optional(),
|
.optional(),
|
||||||
stream: z.boolean().optional(),
|
stream: z.boolean().optional(),
|
||||||
}),
|
}),
|
||||||
).refine((v) => v.message !== undefined || (v.messages?.length ?? 0) > 0, {
|
).strict().refine((v) => v.message !== undefined || (v.messages?.length ?? 0) > 0, {
|
||||||
message: 'Either `message` or `messages` is required',
|
message: 'Either `message` or `messages` is required',
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -410,4 +410,91 @@ describe('ChatService', () => {
|
|||||||
expect(ctx.body.tools).toHaveLength(1);
|
expect(ctx.body.tools).toHaveLength(1);
|
||||||
expect(ctx.body.tools?.[0]?.function.name).toBe(`s1${TOOL_NAME_SEPARATOR}a`);
|
expect(ctx.body.tools?.[0]?.function.name).toBe(`s1${TOOL_NAME_SEPARATOR}a`);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Regression: per-agent maxIterations override + clamp.
|
||||||
|
// Found by /gstack-review on 2026-04-25.
|
||||||
|
// Without the clamp, a hostile agent definition with `extras.maxIterations:1000000`
|
||||||
|
// could spin the loop into a near-infinite tool-call burn.
|
||||||
|
it('per-agent extras.maxIterations clamps below default and refuses absurd values', async () => {
|
||||||
|
const chatRepo = mockChatRepo();
|
||||||
|
const tools = mockTools({
|
||||||
|
listTools: vi.fn(async () => [{
|
||||||
|
name: `g${TOOL_NAME_SEPARATOR}t`, description: '', parameters: {},
|
||||||
|
}]),
|
||||||
|
callTool: vi.fn(async () => ({})),
|
||||||
|
});
|
||||||
|
// Agent with maxIterations=2 — only 2 tool-call rounds allowed before bail.
|
||||||
|
const agentsLowCap = {
|
||||||
|
getByName: vi.fn(async () => ({
|
||||||
|
id: 'agent-low', name: 'low', description: '', systemPrompt: '',
|
||||||
|
llm: { id: 'llm-1', name: 'qwen3-thinking' },
|
||||||
|
project: { id: 'proj-1', name: 'mcpctl-dev' },
|
||||||
|
proxyModelName: null, defaultParams: {},
|
||||||
|
extras: { maxIterations: 2 },
|
||||||
|
ownerId: 'owner-1', version: 1, createdAt: NOW, updatedAt: NOW,
|
||||||
|
})),
|
||||||
|
} as unknown as AgentService;
|
||||||
|
const adapter = scriptedAdapter([toolCall(`g${TOOL_NAME_SEPARATOR}t`, {})]);
|
||||||
|
const svc = new ChatService(
|
||||||
|
agentsLowCap, mockLlms(), adapterRegistry(adapter),
|
||||||
|
chatRepo, mockPromptRepo(), tools,
|
||||||
|
);
|
||||||
|
await expect(svc.chat({
|
||||||
|
agentName: 'low', userMessage: 'spin', ownerId: 'owner-1',
|
||||||
|
})).rejects.toThrow(/exceeded 2 iterations/);
|
||||||
|
|
||||||
|
// Hostile agent with maxIterations=1000000 — must clamp to 50, not iterate forever.
|
||||||
|
const agentsHostile = {
|
||||||
|
getByName: vi.fn(async () => ({
|
||||||
|
id: 'agent-bad', name: 'bad', description: '', systemPrompt: '',
|
||||||
|
llm: { id: 'llm-1', name: 'qwen3-thinking' },
|
||||||
|
project: { id: 'proj-1', name: 'mcpctl-dev' },
|
||||||
|
proxyModelName: null, defaultParams: {},
|
||||||
|
extras: { maxIterations: 1_000_000 },
|
||||||
|
ownerId: 'owner-1', version: 1, createdAt: NOW, updatedAt: NOW,
|
||||||
|
})),
|
||||||
|
} as unknown as AgentService;
|
||||||
|
const adapter2 = scriptedAdapter([toolCall(`g${TOOL_NAME_SEPARATOR}t`, {})]);
|
||||||
|
const chatRepo2 = mockChatRepo();
|
||||||
|
const svc2 = new ChatService(
|
||||||
|
agentsHostile, mockLlms(), adapterRegistry(adapter2),
|
||||||
|
chatRepo2, mockPromptRepo(), tools,
|
||||||
|
);
|
||||||
|
await expect(svc2.chat({
|
||||||
|
agentName: 'bad', userMessage: 'spin', ownerId: 'owner-1',
|
||||||
|
})).rejects.toThrow(/exceeded 50 iterations/);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Regression: thread message reads must enforce ownership.
|
||||||
|
// Found by /gstack-review on 2026-04-25.
|
||||||
|
// Without this, any caller with `view:agents` could read another user's thread
|
||||||
|
// by guessing/learning the threadId (CUIDs leak through SSE chunks + tool _meta).
|
||||||
|
it('listMessages refuses a thread owned by another user (404, not 403, to avoid id-enumeration)', async () => {
|
||||||
|
const chatRepo = mockChatRepo();
|
||||||
|
// Pre-seed a thread owned by 'alice'
|
||||||
|
await chatRepo.createThread({ agentId: 'agent-x', ownerId: 'alice' });
|
||||||
|
const aliceThread = chatRepo._threads[0]!;
|
||||||
|
await chatRepo.appendMessage({
|
||||||
|
threadId: aliceThread.id,
|
||||||
|
role: 'user',
|
||||||
|
content: 'private to alice',
|
||||||
|
});
|
||||||
|
|
||||||
|
const svc = new ChatService(
|
||||||
|
mockAgents(), mockLlms(), adapterRegistry(scriptedAdapter([chatCompletion('ok')])),
|
||||||
|
chatRepo, mockPromptRepo(), mockTools(),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Bob requests Alice's thread by id — must 404.
|
||||||
|
await expect(svc.listMessages(aliceThread.id, 'bob'))
|
||||||
|
.rejects.toThrow(/not found/i);
|
||||||
|
|
||||||
|
// Alice gets her own messages.
|
||||||
|
const aliceMessages = await svc.listMessages(aliceThread.id, 'alice');
|
||||||
|
expect(aliceMessages.map((m) => m.content)).toEqual(['private to alice']);
|
||||||
|
|
||||||
|
// Genuinely missing thread — same 404 shape (no oracle leak).
|
||||||
|
await expect(svc.listMessages('cnonexistent000000000000000', 'alice'))
|
||||||
|
.rejects.toThrow(/not found/i);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user