Files
mcpctl/src/mcpd/tests/chat-service.test.ts
Michal 1f0be8a5c1 fix(agents): close gaps from /gstack-review
P1 — thread reads now enforce ownership
========================================
chat.service.ts / routes/agent-chat.ts
  GET /api/v1/threads/:id/messages was previously RBAC-mapped to
  view:agents (no resourceName scope) with the route comment promising
  "service-level owner check enforces fine-grained access" — but the
  service didn't actually check. Any caller with view:agents could read
  another user's thread by guessing/learning the threadId. CUIDs are
  hard to brute-force but they leak: SSE `final` chunks, agents-plugin
  `_meta.threadId`, and several response bodies surface them. Now
  ChatService.listMessages(threadId, ownerId) loads the thread, returns
  404 (not 403, to avoid id-enumeration via differential status codes)
  if ownerId doesn't match. Regression test in chat-service.test.ts
  covers Alice/Bob isolation + nonexistent-thread same-shape 404.

P2 — AgentChatRequestSchema strict mode
========================================
validation/agent.schema.ts
  `.merge()` does NOT inherit `.strict()` from AgentChatParamsSchema.
  Typo'd fields (e.g. `temprature`) silently fell through and the agent
  silently used the default — debuggable only by reading the LLM call
  payload. Re-applied `.strict()` on the merged schema.

P2 — per-agent maxIterations override + clamp
==============================================
chat.service.ts
  Loop cap was a hard-coded module constant (12), wrong for both
  research-style agents (need higher) and cheap-probe agents (could opt
  lower). Now reads `agent.extras.maxIterations`, clamps 1..50, falls
  back to 12 default. The clamp is the soft-DoS guard: a hostile agent
  definition with `maxIterations:1000000` can't burn unbounded LLM calls
  per request. Both chat() and chatStream() use ctx.maxIterations now.
  Regression test covers low-cap override (rejects with `exceeded 2`)
  and hostile-value clamp (rejects with `exceeded 50`).

P3 — SSE write to closed socket
================================
routes/agent-chat.ts
  When the upstream adapter throws after some chunks were already
  written AND the client disconnected, the catch block tried to flush
  more chunks to a closed socket. Without an `on('error')` handler
  Node emits unhandled error events; once Pino is wired to alerts
  this'd page on every disconnect-mid-stream. writeSseChunk now
  checks `reply.raw.destroyed || writableEnded` before write.

P3 — BACKEND_TOKEN_DEAD preserves original stack
=================================================
services/secret-backend-rotator.service.ts
  When wrapping mintRoleToken/lookupSelf failures as
  BACKEND_TOKEN_DEAD, the new Error() discarded the original throw —
  hard to tell whether the inner failure was a network blip vs an
  OpenBao API mismatch vs DNS. Now uses `new Error(msg, { cause: err })`
  so the inner stack survives.

P3 — .gitignore .claude/scheduled_tasks.lock
=============================================
This persisted state file was leaking into every `git status`.

Tests
=====
mcpd 761/761 (+2 regression tests). mcplocal 715/715. cli 430/430.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-25 23:53:19 +01:00

501 lines
18 KiB
TypeScript

import { describe, it, expect, vi } from 'vitest';
import { ChatService, MAX_ITERATIONS, TOOL_NAME_SEPARATOR, type ChatToolDispatcher } from '../src/services/chat.service.js';
import type { AgentService } from '../src/services/agent.service.js';
import type { LlmService } from '../src/services/llm.service.js';
import type { LlmAdapterRegistry } from '../src/services/llm/dispatcher.js';
import type { LlmAdapter, NonStreamingResult, InferContext } from '../src/services/llm/types.js';
import type { IChatRepository } from '../src/repositories/chat.repository.js';
import type { IPromptRepository } from '../src/repositories/prompt.repository.js';
import type { ChatMessage, ChatThread, Prompt } from '@prisma/client';
const NOW = new Date();
function mockChatRepo(): IChatRepository & { _msgs: ChatMessage[]; _threads: ChatThread[] } {
const msgs: ChatMessage[] = [];
const threads: ChatThread[] = [];
let idCounter = 1;
return {
_msgs: msgs,
_threads: threads,
createThread: vi.fn(async ({ agentId, ownerId, title }) => {
const t: ChatThread = {
id: `thread-${String(idCounter++)}`,
agentId,
ownerId,
title: title ?? '',
lastTurnAt: NOW,
createdAt: NOW,
updatedAt: NOW,
};
threads.push(t);
return t;
}),
findThread: vi.fn(async (id: string) => threads.find((t) => t.id === id) ?? null),
listThreadsByAgent: vi.fn(async (agentId: string) => threads.filter((t) => t.agentId === agentId)),
listMessages: vi.fn(async (threadId: string) =>
msgs.filter((m) => m.threadId === threadId).sort((a, b) => a.turnIndex - b.turnIndex)),
appendMessage: vi.fn(async (input) => {
const turnIndex = input.turnIndex ?? msgs.filter((m) => m.threadId === input.threadId).length;
const m: ChatMessage = {
id: `msg-${String(idCounter++)}`,
threadId: input.threadId,
turnIndex,
role: input.role,
content: input.content,
toolCalls: (input.toolCalls ?? null) as ChatMessage['toolCalls'],
toolCallId: input.toolCallId ?? null,
status: input.status ?? 'complete',
createdAt: NOW,
};
msgs.push(m);
return m;
}),
updateStatus: vi.fn(async (id: string, status) => {
const m = msgs.find((x) => x.id === id);
if (!m) throw new Error('not found');
m.status = status;
return m;
}),
markPendingAsError: vi.fn(async (threadId: string) => {
let n = 0;
for (const m of msgs) {
if (m.threadId === threadId && m.status === 'pending') {
m.status = 'error';
n += 1;
}
}
return n;
}),
touchThread: vi.fn(async () => undefined),
nextTurnIndex: vi.fn(async (threadId: string) =>
msgs.filter((m) => m.threadId === threadId).length),
};
}
function mockPromptRepo(rows: Prompt[] = []): IPromptRepository {
return {
findAll: vi.fn(async () => rows),
findGlobal: vi.fn(async () => rows.filter((p) => p.projectId === null)),
findById: vi.fn(async (id: string) => rows.find((p) => p.id === id) ?? null),
findByNameAndProject: vi.fn(async () => null),
create: vi.fn(),
update: vi.fn(),
delete: vi.fn(),
} as unknown as IPromptRepository;
}
function mockTools(impl: Partial<ChatToolDispatcher> = {}): ChatToolDispatcher {
return {
listTools: impl.listTools ?? vi.fn(async () => []),
callTool: impl.callTool ?? vi.fn(async () => ({ ok: true })),
};
}
function mockAgents(): AgentService {
return {
getByName: vi.fn(async (name: string) => ({
id: `agent-${name}`,
name,
description: 'desc',
systemPrompt: 'You are a helpful agent.',
llm: { id: 'llm-1', name: 'qwen3-thinking' },
project: name === 'no-project'
? null
: { id: 'proj-1', name: 'mcpctl-dev' },
proxyModelName: null,
defaultParams: { temperature: 0.5 },
extras: {},
ownerId: 'owner-1',
version: 1,
createdAt: NOW,
updatedAt: NOW,
})),
} as unknown as AgentService;
}
function mockLlms(): LlmService {
return {
getByName: vi.fn(async (name: string) => ({
id: 'llm-1', name, type: 'openai', model: 'qwen3-thinking',
url: '', tier: 'fast', description: '',
apiKeyRef: null, extraConfig: {},
version: 1, createdAt: NOW, updatedAt: NOW,
})),
resolveApiKey: vi.fn(async () => 'fake-key'),
} as unknown as LlmService;
}
/** Adapter that yields a scripted sequence of canned responses, one per call. */
function scriptedAdapter(responses: NonStreamingResult[]): LlmAdapter {
let i = 0;
return {
kind: 'scripted',
infer: vi.fn(async (_ctx: InferContext) => {
const r = responses[i] ?? responses[responses.length - 1];
i += 1;
if (r === undefined) throw new Error('no scripted response');
return r;
}),
stream: async function*(_ctx: InferContext) {
yield { data: '[DONE]', done: true };
},
};
}
function adapterRegistry(adapter: LlmAdapter): LlmAdapterRegistry {
return { get: () => adapter } as unknown as LlmAdapterRegistry;
}
function chatCompletion(content: string): NonStreamingResult {
return {
status: 200,
body: {
id: 'cmpl-1',
object: 'chat.completion',
choices: [{ index: 0, message: { role: 'assistant', content }, finish_reason: 'stop' }],
},
};
}
function toolCall(name: string, args: Record<string, unknown>): NonStreamingResult {
return {
status: 200,
body: {
id: 'cmpl-1',
object: 'chat.completion',
choices: [{
index: 0,
message: {
role: 'assistant',
content: '',
tool_calls: [{
id: `call-${name}`,
type: 'function',
function: { name, arguments: JSON.stringify(args) },
}],
},
finish_reason: 'tool_calls',
}],
},
};
}
describe('ChatService', () => {
it('plain text turn — persists user + assistant rows and returns the reply', async () => {
const chatRepo = mockChatRepo();
const adapter = scriptedAdapter([chatCompletion('hello back')]);
const svc = new ChatService(
mockAgents(), mockLlms(), adapterRegistry(adapter),
chatRepo, mockPromptRepo(), mockTools(),
);
const result = await svc.chat({
agentName: 'reviewer',
userMessage: 'hi',
ownerId: 'owner-1',
});
expect(result.assistant).toBe('hello back');
const stored = chatRepo._msgs.filter((m) => m.threadId === result.threadId);
expect(stored.map((m) => m.role)).toEqual(['user', 'assistant']);
expect(stored[1]?.status).toBe('complete');
});
it('runs a full tool-use round-trip and ends with a text reply', async () => {
const chatRepo = mockChatRepo();
const tools = mockTools({
listTools: vi.fn(async () => [{
name: `grafana${TOOL_NAME_SEPARATOR}query`,
description: 'query grafana',
parameters: { type: 'object', properties: {} },
}]),
callTool: vi.fn(async () => ({ rows: [{ value: 42 }] })),
});
const adapter = scriptedAdapter([
toolCall(`grafana${TOOL_NAME_SEPARATOR}query`, { q: 'cpu' }),
chatCompletion('the answer is 42'),
]);
const svc = new ChatService(
mockAgents(), mockLlms(), adapterRegistry(adapter),
chatRepo, mockPromptRepo(), tools,
);
const result = await svc.chat({
agentName: 'reviewer',
userMessage: 'what is cpu?',
ownerId: 'owner-1',
});
expect(result.assistant).toBe('the answer is 42');
expect(tools.callTool).toHaveBeenCalledWith({
projectId: 'proj-1',
serverName: 'grafana',
toolName: 'query',
args: { q: 'cpu' },
});
const stored = chatRepo._msgs.filter((m) => m.threadId === result.threadId);
expect(stored.map((m) => m.role)).toEqual(['user', 'assistant', 'tool', 'assistant']);
// No `pending` rows leaked.
expect(stored.every((m) => m.status === 'complete')).toBe(true);
// Tool turn's toolCallId links back.
const toolTurn = stored.find((m) => m.role === 'tool');
expect(toolTurn?.toolCallId).toBe(`call-grafana${TOOL_NAME_SEPARATOR}query`);
});
it('caps the loop at MAX_ITERATIONS when the model never settles', async () => {
const chatRepo = mockChatRepo();
const tools = mockTools({
listTools: vi.fn(async () => [{
name: `g${TOOL_NAME_SEPARATOR}t`,
description: '',
parameters: { type: 'object' },
}]),
callTool: vi.fn(async () => ({})),
});
// Always return a tool_call → the loop never reaches a terminal turn.
const adapter = scriptedAdapter([toolCall(`g${TOOL_NAME_SEPARATOR}t`, {})]);
const svc = new ChatService(
mockAgents(), mockLlms(), adapterRegistry(adapter),
chatRepo, mockPromptRepo(), tools,
);
await expect(svc.chat({
agentName: 'reviewer',
userMessage: 'loop forever',
ownerId: 'owner-1',
})).rejects.toThrow(new RegExp(`exceeded ${String(MAX_ITERATIONS)}`));
// After failure, no row should remain `pending`.
expect(chatRepo._msgs.every((m) => m.status !== 'pending')).toBe(true);
});
it('flips pending rows to error when the adapter throws mid-loop', async () => {
const chatRepo = mockChatRepo();
const tools = mockTools({
listTools: vi.fn(async () => [{
name: `g${TOOL_NAME_SEPARATOR}t`, description: '', parameters: {},
}]),
callTool: vi.fn(async () => ({})),
});
const adapter: LlmAdapter = {
kind: 'fail-after-one',
infer: vi.fn()
.mockResolvedValueOnce(toolCall(`g${TOOL_NAME_SEPARATOR}t`, {}))
.mockRejectedValueOnce(new Error('upstream blew up')),
stream: async function*() { yield { data: '[DONE]', done: true }; },
};
const svc = new ChatService(
mockAgents(), mockLlms(), adapterRegistry(adapter),
chatRepo, mockPromptRepo(), tools,
);
await expect(svc.chat({
agentName: 'reviewer',
userMessage: 'go',
ownerId: 'owner-1',
})).rejects.toThrow('upstream blew up');
expect(chatRepo._msgs.some((m) => m.status === 'error')).toBe(false);
expect(chatRepo._msgs.every((m) => m.status !== 'pending')).toBe(true);
});
it('merges per-call params over agent.defaultParams (override wins)', async () => {
const chatRepo = mockChatRepo();
const adapter = scriptedAdapter([chatCompletion('ok')]);
const inferSpy = adapter.infer as ReturnType<typeof vi.fn>;
const svc = new ChatService(
mockAgents(), mockLlms(), adapterRegistry(adapter),
chatRepo, mockPromptRepo(), mockTools(),
);
await svc.chat({
agentName: 'reviewer',
userMessage: 'hi',
ownerId: 'owner-1',
params: { temperature: 0.9, max_tokens: 256 },
});
const ctx = inferSpy.mock.calls[0][0] as InferContext;
expect(ctx.body.temperature).toBe(0.9);
expect(ctx.body.max_tokens).toBe(256);
});
it('forwards `extra` keys into the body for provider-specific knobs', async () => {
const chatRepo = mockChatRepo();
const adapter = scriptedAdapter([chatCompletion('ok')]);
const inferSpy = adapter.infer as ReturnType<typeof vi.fn>;
const svc = new ChatService(
mockAgents(), mockLlms(), adapterRegistry(adapter),
chatRepo, mockPromptRepo(), mockTools(),
);
await svc.chat({
agentName: 'reviewer',
userMessage: 'hi',
ownerId: 'owner-1',
params: { extra: { metadata: { user_id: 'abc' }, repetition_penalty: 1.05 } },
});
const ctx = inferSpy.mock.calls[0][0] as InferContext;
expect((ctx.body as Record<string, unknown>)['repetition_penalty']).toBe(1.05);
expect((ctx.body as Record<string, unknown>)['metadata']).toEqual({ user_id: 'abc' });
});
it('builds a system block from agent.systemPrompt + project prompts (priority desc)', async () => {
const chatRepo = mockChatRepo();
const adapter = scriptedAdapter([chatCompletion('ok')]);
const inferSpy = adapter.infer as ReturnType<typeof vi.fn>;
const prompts: Prompt[] = [
{
id: 'p1', name: 'low', content: 'LOW prompt',
projectId: 'proj-1', priority: 1, summary: null, chapters: null,
linkTarget: null, version: 1, createdAt: NOW, updatedAt: NOW,
},
{
id: 'p2', name: 'high', content: 'HIGH prompt',
projectId: 'proj-1', priority: 9, summary: null, chapters: null,
linkTarget: null, version: 1, createdAt: NOW, updatedAt: NOW,
},
];
const svc = new ChatService(
mockAgents(), mockLlms(), adapterRegistry(adapter),
chatRepo, mockPromptRepo(prompts), mockTools(),
);
await svc.chat({ agentName: 'reviewer', userMessage: 'hi', ownerId: 'owner-1' });
const ctx = inferSpy.mock.calls[0][0] as InferContext;
const sys = ctx.body.messages.find((m) => m.role === 'system');
expect(typeof sys?.content).toBe('string');
const text = sys?.content as string;
// High-priority prompt comes before low-priority.
expect(text.indexOf('HIGH prompt')).toBeLessThan(text.indexOf('LOW prompt'));
// Agent's own system prompt leads.
expect(text.indexOf('You are a helpful agent.')).toBeLessThan(text.indexOf('HIGH prompt'));
});
it('refuses tool calls when the agent has no project attached', async () => {
const chatRepo = mockChatRepo();
const adapter = scriptedAdapter([toolCall(`x${TOOL_NAME_SEPARATOR}y`, {})]);
const tools = mockTools({
listTools: vi.fn(async () => [{ name: `x${TOOL_NAME_SEPARATOR}y`, description: '', parameters: {} }]),
});
const svc = new ChatService(
mockAgents(), mockLlms(), adapterRegistry(adapter),
chatRepo, mockPromptRepo(), tools,
);
await expect(svc.chat({
agentName: 'no-project',
userMessage: 'go',
ownerId: 'owner-1',
})).rejects.toThrow(/Project/);
});
it('honours tools_allowlist (filters tools before sending to adapter)', async () => {
const chatRepo = mockChatRepo();
const adapter = scriptedAdapter([chatCompletion('ok')]);
const inferSpy = adapter.infer as ReturnType<typeof vi.fn>;
const tools = mockTools({
listTools: vi.fn(async () => [
{ name: `s1${TOOL_NAME_SEPARATOR}a`, description: '', parameters: {} },
{ name: `s1${TOOL_NAME_SEPARATOR}b`, description: '', parameters: {} },
]),
});
const svc = new ChatService(
mockAgents(), mockLlms(), adapterRegistry(adapter),
chatRepo, mockPromptRepo(), tools,
);
await svc.chat({
agentName: 'reviewer',
userMessage: 'hi',
ownerId: 'owner-1',
params: { tools_allowlist: [`s1${TOOL_NAME_SEPARATOR}a`] },
});
const ctx = inferSpy.mock.calls[0][0] as InferContext;
expect(ctx.body.tools).toHaveLength(1);
expect(ctx.body.tools?.[0]?.function.name).toBe(`s1${TOOL_NAME_SEPARATOR}a`);
});
// Regression: per-agent maxIterations override + clamp.
// Found by /gstack-review on 2026-04-25.
// Without the clamp, a hostile agent definition with `extras.maxIterations:1000000`
// could spin the loop into a near-infinite tool-call burn.
it('per-agent extras.maxIterations clamps below default and refuses absurd values', async () => {
const chatRepo = mockChatRepo();
const tools = mockTools({
listTools: vi.fn(async () => [{
name: `g${TOOL_NAME_SEPARATOR}t`, description: '', parameters: {},
}]),
callTool: vi.fn(async () => ({})),
});
// Agent with maxIterations=2 — only 2 tool-call rounds allowed before bail.
const agentsLowCap = {
getByName: vi.fn(async () => ({
id: 'agent-low', name: 'low', description: '', systemPrompt: '',
llm: { id: 'llm-1', name: 'qwen3-thinking' },
project: { id: 'proj-1', name: 'mcpctl-dev' },
proxyModelName: null, defaultParams: {},
extras: { maxIterations: 2 },
ownerId: 'owner-1', version: 1, createdAt: NOW, updatedAt: NOW,
})),
} as unknown as AgentService;
const adapter = scriptedAdapter([toolCall(`g${TOOL_NAME_SEPARATOR}t`, {})]);
const svc = new ChatService(
agentsLowCap, mockLlms(), adapterRegistry(adapter),
chatRepo, mockPromptRepo(), tools,
);
await expect(svc.chat({
agentName: 'low', userMessage: 'spin', ownerId: 'owner-1',
})).rejects.toThrow(/exceeded 2 iterations/);
// Hostile agent with maxIterations=1000000 — must clamp to 50, not iterate forever.
const agentsHostile = {
getByName: vi.fn(async () => ({
id: 'agent-bad', name: 'bad', description: '', systemPrompt: '',
llm: { id: 'llm-1', name: 'qwen3-thinking' },
project: { id: 'proj-1', name: 'mcpctl-dev' },
proxyModelName: null, defaultParams: {},
extras: { maxIterations: 1_000_000 },
ownerId: 'owner-1', version: 1, createdAt: NOW, updatedAt: NOW,
})),
} as unknown as AgentService;
const adapter2 = scriptedAdapter([toolCall(`g${TOOL_NAME_SEPARATOR}t`, {})]);
const chatRepo2 = mockChatRepo();
const svc2 = new ChatService(
agentsHostile, mockLlms(), adapterRegistry(adapter2),
chatRepo2, mockPromptRepo(), tools,
);
await expect(svc2.chat({
agentName: 'bad', userMessage: 'spin', ownerId: 'owner-1',
})).rejects.toThrow(/exceeded 50 iterations/);
});
// Regression: thread message reads must enforce ownership.
// Found by /gstack-review on 2026-04-25.
// Without this, any caller with `view:agents` could read another user's thread
// by guessing/learning the threadId (CUIDs leak through SSE chunks + tool _meta).
it('listMessages refuses a thread owned by another user (404, not 403, to avoid id-enumeration)', async () => {
const chatRepo = mockChatRepo();
// Pre-seed a thread owned by 'alice'
await chatRepo.createThread({ agentId: 'agent-x', ownerId: 'alice' });
const aliceThread = chatRepo._threads[0]!;
await chatRepo.appendMessage({
threadId: aliceThread.id,
role: 'user',
content: 'private to alice',
});
const svc = new ChatService(
mockAgents(), mockLlms(), adapterRegistry(scriptedAdapter([chatCompletion('ok')])),
chatRepo, mockPromptRepo(), mockTools(),
);
// Bob requests Alice's thread by id — must 404.
await expect(svc.listMessages(aliceThread.id, 'bob'))
.rejects.toThrow(/not found/i);
// Alice gets her own messages.
const aliceMessages = await svc.listMessages(aliceThread.id, 'alice');
expect(aliceMessages.map((m) => m.content)).toEqual(['private to alice']);
// Genuinely missing thread — same 404 shape (no oracle leak).
await expect(svc.listMessages('cnonexistent000000000000000', 'alice'))
.rejects.toThrow(/not found/i);
});
});