Compare commits

..

3 Commits

Author SHA1 Message Date
Michal
3a28128fb4 feat(agent): MCP-correct chat agent shim on top of LiteLLM
New package @mcpctl/agent that replaces LiteLLM's broken MCP
integration (dropped Mcp-Session-Id, ignored tools/list_changed) with
a thin ~200 LOC loop built on @modelcontextprotocol/sdk +
openai SDK. LiteLLM stays in its actual lane — OpenAI-compatible model
routing — and this agent handles MCP correctly.

Core (src/agent.ts):
  - StreamableHTTPClientTransport for MCP (auto-preserves Mcp-Session-Id).
  - Re-fetches tools/list at the top of every loop so list_changed
    notifications surface new tools to the model on the next turn
    (fixes the gated-session case: begin_session reveals the full
    upstream tool set, next round's inference sees all of them).
  - OpenAI-compatible inference via process.env.AGENT_LLM_BASE_URL
    — points at LiteLLM or vLLM directly.
  - Graceful failure: broken tool calls are serialized back into the
    conversation as the tool's response, agent keeps going.
  - maxIterations cap stops runaway loops; hitIterationLimit surfaces
    truncation in the result.
  - Structural `McpLike` / `LlmLike` interfaces keep the loop testable
    without booting real SDKs.

CLI (src/cli.ts):
  mcpctl-agent run "<prompt>" \
    --model qwen3-thinking --project sre \
    [--system "..."] [--max-iterations N] [-o text|json] [--verbose]
  Env fallbacks: AGENT_MCP_URL, AGENT_MCP_TOKEN,
                 AGENT_LLM_BASE_URL, AGENT_LLM_API_KEY, AGENT_MODEL

Tests (7 cases):
  - direct answer (no tool call) → ok
  - single-round tool call + synthesis → message history correct
  - list_changed refresh: tools/list called at startup + after each
    round → next inference sees newly-exposed tools
  - maxIterations cap → hitIterationLimit flag set
  - failing tool → error serialized into conversation, agent recovers
  - systemPrompt prepended
  - mcp.close() runs even when loop throws (finally-block guarantee)

End-to-end verified against live cluster:
  Round 1: sees 1 tool (begin_session) → calls it
  Round 2: sees 115 tools (gate opened) → calls aws-docs/search_documentation
  Final: model synthesizes answer
  — LiteLLM's chat UI cannot do this today; this loop does.

Still to do (follow-up PRs):
  - Wire into mcpctl binary as `mcpctl agent run ...`
  - Docker image + Pulumi deploy for a long-running HTTP service mode
  - Minimal chat UI (HTMX or plain fetch)
  - Streaming responses

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 18:24:29 +01:00
Michal
6946250090 Revert "feat(mcplocal): per-McpToken gate-ungate cache so service tokens survive proxies"
All checks were successful
CI/CD / lint (push) Successful in 51s
CI/CD / typecheck (push) Successful in 1m46s
CI/CD / test (push) Successful in 1m3s
CI/CD / build (push) Successful in 2m14s
CI/CD / smoke (push) Successful in 4m43s
CI/CD / publish (push) Successful in 1m23s
This reverts commit 39df459bb1.
2026-04-18 18:16:18 +01:00
1480d268c7 Merge pull request #50 feat: McpToken — HTTP-mode mcplocal, CLI verbs, audit plumbing
Some checks failed
CI/CD / typecheck (push) Successful in 55s
CI/CD / lint (push) Successful in 1m42s
CI/CD / test (push) Successful in 1m5s
CI/CD / smoke (push) Failing after 3m40s
CI/CD / build (push) Successful in 3m52s
CI/CD / publish (push) Has been skipped
2026-04-18 16:37:50 +00:00
14 changed files with 649 additions and 191 deletions

105
pnpm-lock.yaml generated
View File

@@ -39,6 +39,28 @@ importers:
specifier: ^4.0.18
version: 4.0.18(@types/node@25.3.0)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2)
src/agent:
dependencies:
'@mcpctl/shared':
specifier: workspace:*
version: link:../shared
'@modelcontextprotocol/sdk':
specifier: ^1.0.0
version: 1.26.0(zod@3.25.76)
commander:
specifier: ^13.0.0
version: 13.1.0
openai:
specifier: ^4.77.0
version: 4.104.0(ws@8.19.0)(zod@3.25.76)
devDependencies:
'@types/node':
specifier: ^25.3.0
version: 25.3.0
vitest:
specifier: ^4.0.0
version: 4.0.18(@types/node@25.3.0)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2)
src/cli:
dependencies:
'@inkjs/ui':
@@ -989,6 +1011,10 @@ packages:
abbrev@1.1.1:
resolution: {integrity: sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q==}
abort-controller@3.0.0:
resolution: {integrity: sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==}
engines: {node: '>=6.5'}
abstract-logging@2.0.1:
resolution: {integrity: sha512-2BjRTZxTPvheOvGbBslFSYOUkr+SjPtOnrLP33f+VIWLzezQpZcqVg7ja3L4dBXmzzgwT+a029jRx5PCi3JuiA==}
@@ -1014,6 +1040,10 @@ packages:
resolution: {integrity: sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==}
engines: {node: '>= 14'}
agentkeepalive@4.6.0:
resolution: {integrity: sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ==}
engines: {node: '>= 8.0.0'}
ajv-formats@3.0.1:
resolution: {integrity: sha512-8iUql50EUR+uUcdRQ3HDqa6EVyo3docL8g5WJ3FNcWmu62IbkGUue/pEyLBW8VGKKucTPgqeks4fIU1DA4yowQ==}
peerDependencies:
@@ -1509,6 +1539,10 @@ packages:
resolution: {integrity: sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==}
engines: {node: '>= 0.6'}
event-target-shim@5.0.1:
resolution: {integrity: sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==}
engines: {node: '>=6'}
events-universal@1.0.1:
resolution: {integrity: sha512-LUd5euvbMLpwOF8m6ivPCbhQeSiYVNb8Vs0fQ8QjXo0JTkEHpz8pxdQf0gStltaPpw0Cca8b39KxvK9cfKRiAw==}
@@ -1610,10 +1644,17 @@ packages:
flatted@3.3.3:
resolution: {integrity: sha512-GX+ysw4PBCz0PzosHDepZGANEuFCMLrnRTiEy9McGjmkCQYwRq4A/X786G/fjM/+OjsWSU1ZrY5qyARZmO/uwg==}
form-data-encoder@1.7.2:
resolution: {integrity: sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==}
form-data@4.0.5:
resolution: {integrity: sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==}
engines: {node: '>= 6'}
formdata-node@4.4.1:
resolution: {integrity: sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==}
engines: {node: '>= 12.20'}
forwarded@0.2.0:
resolution: {integrity: sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==}
engines: {node: '>= 0.6'}
@@ -1726,6 +1767,9 @@ packages:
resolution: {integrity: sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==}
engines: {node: '>= 6'}
humanize-ms@1.2.1:
resolution: {integrity: sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==}
iconv-lite@0.7.2:
resolution: {integrity: sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw==}
engines: {node: '>=0.10.0'}
@@ -2012,6 +2056,11 @@ packages:
node-addon-api@5.1.0:
resolution: {integrity: sha512-eh0GgfEkpnoWDq+VY8OyvYhFEzBk6jIYbRKdIlyTiAXIVJ8PyBaKb0rp7oDtoddbdoHWhq8wwr+XZ81F1rpNdA==}
node-domexception@1.0.0:
resolution: {integrity: sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==}
engines: {node: '>=10.5.0'}
deprecated: Use your platform's native DOMException instead
node-fetch-native@1.6.7:
resolution: {integrity: sha512-g9yhqoedzIUm0nTnTqAQvueMPVOuIY16bqgAJJC8XOOubYFNwz6IER9qs0Gq2Xd0+CecCKFjtdDTMA4u4xG06Q==}
@@ -2073,6 +2122,18 @@ packages:
resolution: {integrity: sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==}
engines: {node: '>=6'}
openai@4.104.0:
resolution: {integrity: sha512-p99EFNsA/yX6UhVO93f5kJsDRLAg+CTA2RBqdHK4RtK8u5IJw32Hyb2dTGKbnnFmnuoBv5r7Z2CURI9sGZpSuA==}
hasBin: true
peerDependencies:
ws: ^8.18.0
zod: ^3.23.8
peerDependenciesMeta:
ws:
optional: true
zod:
optional: true
openid-client@6.8.2:
resolution: {integrity: sha512-uOvTCndr4udZsKihJ68H9bUICrriHdUVJ6Az+4Ns6cW55rwM5h0bjVIzDz2SxgOI84LKjFyjOFvERLzdTUROGA==}
@@ -2647,6 +2708,10 @@ packages:
jsdom:
optional: true
web-streams-polyfill@4.0.0-beta.3:
resolution: {integrity: sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==}
engines: {node: '>= 14'}
webidl-conversions@3.0.1:
resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==}
@@ -3509,6 +3574,10 @@ snapshots:
abbrev@1.1.1: {}
abort-controller@3.0.0:
dependencies:
event-target-shim: 5.0.1
abstract-logging@2.0.1: {}
accepts@2.0.0:
@@ -3530,6 +3599,10 @@ snapshots:
agent-base@7.1.4: {}
agentkeepalive@4.6.0:
dependencies:
humanize-ms: 1.2.1
ajv-formats@3.0.1(ajv@8.18.0):
optionalDependencies:
ajv: 8.18.0
@@ -4020,6 +4093,8 @@ snapshots:
etag@1.8.1: {}
event-target-shim@5.0.1: {}
events-universal@1.0.1:
dependencies:
bare-events: 2.8.2
@@ -4168,6 +4243,8 @@ snapshots:
flatted@3.3.3: {}
form-data-encoder@1.7.2: {}
form-data@4.0.5:
dependencies:
asynckit: 0.4.0
@@ -4176,6 +4253,11 @@ snapshots:
hasown: 2.0.2
mime-types: 2.1.35
formdata-node@4.4.1:
dependencies:
node-domexception: 1.0.0
web-streams-polyfill: 4.0.0-beta.3
forwarded@0.2.0: {}
fresh@2.0.0: {}
@@ -4298,6 +4380,10 @@ snapshots:
transitivePeerDependencies:
- supports-color
humanize-ms@1.2.1:
dependencies:
ms: 2.1.3
iconv-lite@0.7.2:
dependencies:
safer-buffer: 2.1.2
@@ -4551,6 +4637,8 @@ snapshots:
node-addon-api@5.1.0: {}
node-domexception@1.0.0: {}
node-fetch-native@1.6.7: {}
node-fetch@2.7.0:
@@ -4600,6 +4688,21 @@ snapshots:
dependencies:
mimic-fn: 2.1.0
openai@4.104.0(ws@8.19.0)(zod@3.25.76):
dependencies:
'@types/node': 18.19.130
'@types/node-fetch': 2.6.13
abort-controller: 3.0.0
agentkeepalive: 4.6.0
form-data-encoder: 1.7.2
formdata-node: 4.4.1
node-fetch: 2.7.0
optionalDependencies:
ws: 8.19.0
zod: 3.25.76
transitivePeerDependencies:
- encoding
openid-client@6.8.2:
dependencies:
jose: 6.1.3
@@ -5211,6 +5314,8 @@ snapshots:
- tsx
- yaml
web-streams-polyfill@4.0.0-beta.3: {}
webidl-conversions@3.0.1: {}
whatwg-url@5.0.0:

28
src/agent/package.json Normal file
View File

@@ -0,0 +1,28 @@
{
"name": "@mcpctl/agent",
"version": "0.0.1",
"private": true,
"type": "module",
"main": "./dist/index.js",
"types": "./dist/index.d.ts",
"bin": {
"mcpctl-agent": "./dist/cli.js"
},
"scripts": {
"build": "tsc --build",
"clean": "rimraf dist",
"run": "node dist/cli.js",
"test": "vitest",
"test:run": "vitest run"
},
"dependencies": {
"@mcpctl/shared": "workspace:*",
"@modelcontextprotocol/sdk": "^1.0.0",
"commander": "^13.0.0",
"openai": "^4.77.0"
},
"devDependencies": {
"@types/node": "^25.3.0",
"vitest": "^4.0.0"
}
}

201
src/agent/src/agent.ts Normal file
View File

@@ -0,0 +1,201 @@
/**
* MCP-aware chat agent loop.
*
* Correct where LiteLLM's integration is broken:
* - Uses `@modelcontextprotocol/sdk`'s `StreamableHTTPClientTransport`, which
* preserves `Mcp-Session-Id` across requests automatically.
* - Honors `notifications/tools/list_changed`: after every tool-call round we
* re-fetch the tool list before the next model inference, so an MCP server
* that reveals new tools mid-session (gated sessions, auto-install) shows
* them to the model on the next turn.
*
* Inference goes through an OpenAI-compatible endpoint (LiteLLM at
* http://litellm…:4000/v1 in this repo's deployment; vLLM works too). That
* keeps LiteLLM doing its actual job — model routing — and strips it of the
* MCP role it was failing at.
*/
import { Client } from '@modelcontextprotocol/sdk/client/index.js';
import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
import OpenAI from 'openai';
import type {
ChatCompletionMessageParam,
ChatCompletionTool,
ChatCompletionMessageToolCall,
} from 'openai/resources/chat/completions';
export interface AgentConfig {
/** Full URL of the MCP endpoint, e.g. http://mcp.mcpctl.svc:3200/projects/sre/mcp */
mcpUrl: string;
/** Raw `mcpctl_pat_…` bearer for the MCP endpoint. */
mcpToken: string;
/** OpenAI-compatible base URL, e.g. http://litellm…:4000/v1 */
llmBaseUrl: string;
/** API key for the OpenAI-compatible endpoint (LiteLLM master key). */
llmApiKey: string;
/** Model name as known to the OpenAI endpoint, e.g. qwen3-thinking */
model: string;
/** Optional system prompt (prepended as `role:'system'` if given). */
systemPrompt?: string;
/** Hard cap on loop iterations; stops runaway agents. Default 20. */
maxIterations?: number;
/** Per-tool-call timeout ms passed to the MCP SDK. Default 60_000. */
toolTimeoutMs?: number;
}
export interface AgentDeps {
/** Injectable for tests. Creates the MCP Client + transport. */
mcpClientFactory?: (cfg: AgentConfig) => Promise<McpLike>;
/** Injectable for tests. Creates the OpenAI-compatible client. */
llmClientFactory?: (cfg: AgentConfig) => LlmLike;
/** Optional per-iteration logger (stdout, audit sink, etc.). */
log?: (line: string) => void;
}
/**
* Structural typing for the MCP client surface we actually use. Keeps the
* loop testable without importing the concrete SDK in test fixtures. Optional
* fields are `T | undefined` (not `T?`) to stay compatible with the MCP SDK's
* own types under `exactOptionalPropertyTypes`.
*/
export interface McpLike {
listTools(): Promise<{ tools: Array<{ name: string; description?: string | undefined; inputSchema?: unknown }> }>;
callTool(args: { name: string; arguments: Record<string, unknown> }): Promise<unknown>;
close(): Promise<void>;
}
export interface LlmLike {
chat: {
completions: {
create(body: {
model: string;
messages: ChatCompletionMessageParam[];
tools?: ChatCompletionTool[];
tool_choice?: 'auto' | 'none' | { type: 'function'; function: { name: string } };
}): Promise<{ choices: Array<{ message: { role: 'assistant'; content: string | null; tool_calls?: ChatCompletionMessageToolCall[] }; finish_reason?: string | null }> }>;
};
};
}
export interface AgentResult {
/** The final assistant message (after all tool-call rounds). */
finalText: string;
/** Full message history, useful for eval + debugging. */
messages: ChatCompletionMessageParam[];
/** Number of tool-call rounds that ran. Zero if the model answered directly. */
rounds: number;
/** True if the loop terminated because `maxIterations` was hit. */
hitIterationLimit: boolean;
}
export async function runAgent(prompt: string, config: AgentConfig, deps: AgentDeps = {}): Promise<AgentResult> {
const log = deps.log ?? (() => { /* silent */ });
const maxIterations = config.maxIterations ?? 20;
const mcp = await (deps.mcpClientFactory ?? defaultMcpFactory)(config);
try {
const llm = (deps.llmClientFactory ?? defaultLlmFactory)(config);
const messages: ChatCompletionMessageParam[] = [];
if (config.systemPrompt) messages.push({ role: 'system', content: config.systemPrompt });
messages.push({ role: 'user', content: prompt });
let tools = toOpenAiTools(await mcp.listTools());
log(`[agent] starting with ${tools.length} MCP tools`);
let rounds = 0;
for (let i = 0; i < maxIterations; i++) {
const body: Parameters<LlmLike['chat']['completions']['create']>[0] = {
model: config.model,
messages,
};
if (tools.length > 0) {
body.tools = tools;
body.tool_choice = 'auto';
}
const reply = await llm.chat.completions.create(body);
const msg = reply.choices[0]!.message;
messages.push(msg);
const toolCalls = msg.tool_calls ?? [];
if (toolCalls.length === 0) {
log(`[agent] done after ${rounds} tool-call round(s)`);
return { finalText: msg.content ?? '', messages, rounds, hitIterationLimit: false };
}
rounds++;
log(`[agent] round ${rounds}: model asked to call ${toolCalls.length} tool(s)`);
for (const tc of toolCalls) {
const name = tc.function.name;
let args: Record<string, unknown> = {};
try {
args = tc.function.arguments ? JSON.parse(tc.function.arguments) as Record<string, unknown> : {};
} catch (err) {
log(`[agent] tool ${name}: could not parse arguments (${(err as Error).message}) — sending empty args`);
}
log(`[agent] → ${name}(${truncate(JSON.stringify(args), 120)})`);
let result: unknown;
try {
result = await mcp.callTool({ name, arguments: args });
} catch (err) {
result = { error: (err as Error).message };
log(`[agent] ← ERROR: ${(err as Error).message}`);
}
messages.push({
role: 'tool',
tool_call_id: tc.id,
content: typeof result === 'string' ? result : JSON.stringify(result),
});
}
// MCP server may have emitted notifications/tools/list_changed during a
// tool call (e.g. gated sessions revealing tools after begin_session).
// The SDK auto-notifies on that event; simplest correctness: re-fetch
// on every loop before the next inference so the model sees fresh tools.
tools = toOpenAiTools(await mcp.listTools());
}
log(`[agent] hit iteration limit (${maxIterations}) — returning partial`);
const last = messages[messages.length - 1];
const tail = last && last.role === 'assistant'
? (typeof last.content === 'string' ? last.content : '')
: '';
return { finalText: tail, messages, rounds, hitIterationLimit: true };
} finally {
await mcp.close().catch(() => { /* best-effort */ });
}
}
function toOpenAiTools(listed: { tools: Array<{ name: string; description?: string | undefined; inputSchema?: unknown }> }): ChatCompletionTool[] {
return listed.tools.map((t) => {
const fn: { name: string; description?: string; parameters?: Record<string, unknown> } = { name: t.name };
if (t.description !== undefined) fn.description = t.description;
if (t.inputSchema !== undefined) fn.parameters = t.inputSchema as Record<string, unknown>;
return { type: 'function', function: fn } as ChatCompletionTool;
});
}
function truncate(s: string, n: number): string {
return s.length <= n ? s : `${s.slice(0, n - 1)}`;
}
async function defaultMcpFactory(cfg: AgentConfig): Promise<McpLike> {
const client = new Client({ name: 'mcpctl-agent', version: '0.0.1' });
const transport = new StreamableHTTPClientTransport(new URL(cfg.mcpUrl), {
requestInit: { headers: { Authorization: `Bearer ${cfg.mcpToken}` } },
});
// The SDK's Transport interface declares `sessionId: string` while the
// Streamable-HTTP transport starts with `sessionId: undefined` until
// `initialize` populates it — that's legal at runtime but TS exactOptional
// rules reject the direct assignment.
await client.connect(transport as unknown as Parameters<Client['connect']>[0]);
return {
listTools: () => client.listTools() as Promise<{ tools: Array<{ name: string; description?: string | undefined; inputSchema?: unknown }> }>,
callTool: (args) => client.callTool(args),
close: () => client.close(),
};
}
function defaultLlmFactory(cfg: AgentConfig): LlmLike {
return new OpenAI({ baseURL: cfg.llmBaseUrl, apiKey: cfg.llmApiKey }) as unknown as LlmLike;
}

107
src/agent/src/cli.ts Normal file
View File

@@ -0,0 +1,107 @@
#!/usr/bin/env node
/**
* `mcpctl-agent` CLI — standalone for now, will be wired into the mcpctl
* binary as `mcpctl agent run …` in a follow-up so the main CLI's permission
* model + completions pipeline can pick it up.
*
* Usage:
* mcpctl-agent run "analyse last week's slow grafana queries" \
* --model qwen3-thinking \
* --project sre
*
* Env reads (these are the same shape we'd mount from a k8s Secret/ConfigMap
* in the follow-up serve mode):
* AGENT_MCP_URL e.g. https://mcp.ad.itaz.eu/projects/sre/mcp
* AGENT_MCP_TOKEN mcpctl_pat_…
* AGENT_LLM_BASE_URL e.g. http://litellm.nvidia-nim.svc.cluster.local:4000/v1
* AGENT_LLM_API_KEY LiteLLM master key
* AGENT_MODEL default model (overridable with --model)
*/
import { Command } from 'commander';
import { runAgent, type AgentConfig } from './agent.js';
const program = new Command();
program
.name('mcpctl-agent')
.description('MCP-correct chat agent (preserves Mcp-Session-Id, honors tools/list_changed)')
.version('0.0.1');
program
.command('run <prompt>')
.description('One-shot: send a prompt, let the agent use MCP tools until it answers, print the final text')
.option('--mcp-url <url>', 'MCP endpoint URL (default: $AGENT_MCP_URL)')
.option('--mcp-token <bearer>', 'MCP bearer token (default: $AGENT_MCP_TOKEN)')
.option('--llm-base-url <url>', 'OpenAI-compatible endpoint (default: $AGENT_LLM_BASE_URL)')
.option('--llm-api-key <key>', 'API key (default: $AGENT_LLM_API_KEY)')
.option('--model <name>', 'Model to use (default: $AGENT_MODEL)')
.option('--project <name>', 'Override the MCP URL path to /projects/<name>/mcp against the base at $AGENT_MCP_URL')
.option('--system <prompt>', 'System prompt (prepended)')
.option('--max-iterations <n>', 'Max tool-call rounds (default 20)', '20')
.option('-o, --output <format>', 'Output format: text | json', 'text')
.option('--verbose', 'Log each loop iteration to stderr')
.action(async (prompt: string, opts: {
mcpUrl?: string;
mcpToken?: string;
llmBaseUrl?: string;
llmApiKey?: string;
model?: string;
project?: string;
system?: string;
maxIterations: string;
output: string;
verbose?: boolean;
}) => {
const mcpUrl = resolveMcpUrl(opts.mcpUrl, opts.project);
const cfg: AgentConfig = {
mcpUrl,
mcpToken: required('--mcp-token / $AGENT_MCP_TOKEN', opts.mcpToken ?? process.env.AGENT_MCP_TOKEN),
llmBaseUrl: required('--llm-base-url / $AGENT_LLM_BASE_URL', opts.llmBaseUrl ?? process.env.AGENT_LLM_BASE_URL),
llmApiKey: required('--llm-api-key / $AGENT_LLM_API_KEY', opts.llmApiKey ?? process.env.AGENT_LLM_API_KEY),
model: required('--model / $AGENT_MODEL', opts.model ?? process.env.AGENT_MODEL),
maxIterations: Number(opts.maxIterations),
};
if (opts.system !== undefined) cfg.systemPrompt = opts.system;
const logFn = opts.verbose
? (line: string) => process.stderr.write(`${line}\n`)
: () => { /* silent */ };
const result = await runAgent(prompt, cfg, { log: logFn });
if (opts.output === 'json') {
process.stdout.write(`${JSON.stringify({
finalText: result.finalText,
rounds: result.rounds,
hitIterationLimit: result.hitIterationLimit,
messages: result.messages,
}, null, 2)}\n`);
} else {
process.stdout.write(`${result.finalText}\n`);
if (result.hitIterationLimit) process.stderr.write('[agent] hit --max-iterations limit; output may be incomplete\n');
}
});
program.parseAsync(process.argv).catch((err: unknown) => {
const msg = err instanceof Error ? err.message : String(err);
process.stderr.write(`error: ${msg}\n`);
process.exit(1);
});
function resolveMcpUrl(flag: string | undefined, project: string | undefined): string {
const base = flag ?? process.env.AGENT_MCP_URL;
if (!base) throw new Error('--mcp-url or $AGENT_MCP_URL is required');
if (project === undefined) return base;
// If user supplied --project and the URL already ends with /projects/<x>/mcp,
// replace the segment; otherwise treat the base as an origin and append.
const existingMatch = base.match(/^(.+?)\/projects\/[^/]+\/mcp\/?$/);
if (existingMatch) return `${existingMatch[1]}/projects/${encodeURIComponent(project)}/mcp`;
return `${base.replace(/\/+$/, '')}/projects/${encodeURIComponent(project)}/mcp`;
}
function required<T>(label: string, value: T | undefined | null): T {
if (value === undefined || value === null || value === '') {
throw new Error(`${label} is required`);
}
return value;
}

2
src/agent/src/index.ts Normal file
View File

@@ -0,0 +1,2 @@
export { runAgent } from './agent.js';
export type { AgentConfig, AgentDeps, AgentResult, McpLike, LlmLike } from './agent.js';

View File

@@ -0,0 +1,180 @@
import { describe, it, expect, vi } from 'vitest';
import { runAgent, type AgentConfig, type LlmLike, type McpLike } from '../src/agent.js';
const BASE_CONFIG: AgentConfig = {
mcpUrl: 'http://mcp.example/projects/x/mcp',
mcpToken: 'mcpctl_pat_test',
llmBaseUrl: 'http://llm.example/v1',
llmApiKey: 'test',
model: 'qwen3-thinking',
};
function makeMcp(overrides: Partial<McpLike> = {}): McpLike {
return {
listTools: vi.fn(async () => ({ tools: [] })),
callTool: vi.fn(async () => ({ content: [{ type: 'text', text: 'ok' }] })),
close: vi.fn(async () => { /* noop */ }),
...overrides,
};
}
function makeLlm(replies: Array<{ content?: string | null; tool_calls?: Array<{ id: string; name: string; arguments: string }> }>): LlmLike {
const queue = [...replies];
return {
chat: {
completions: {
create: vi.fn(async () => {
const next = queue.shift();
if (!next) throw new Error('LLM mock exhausted');
const message: {
role: 'assistant';
content: string | null;
tool_calls?: Array<{ id: string; type: 'function'; function: { name: string; arguments: string } }>;
} = { role: 'assistant', content: next.content ?? null };
if (next.tool_calls) {
message.tool_calls = next.tool_calls.map((tc) => ({
id: tc.id,
type: 'function' as const,
function: { name: tc.name, arguments: tc.arguments },
}));
}
return { choices: [{ message, finish_reason: next.tool_calls ? 'tool_calls' : 'stop' }] };
}),
},
},
};
}
describe('runAgent', () => {
it('returns directly when the model answers without tool calls', async () => {
const mcp = makeMcp();
const llm = makeLlm([{ content: 'hello world' }]);
const result = await runAgent('hi', BASE_CONFIG, {
mcpClientFactory: async () => mcp,
llmClientFactory: () => llm,
});
expect(result.finalText).toBe('hello world');
expect(result.rounds).toBe(0);
expect(result.hitIterationLimit).toBe(false);
expect(mcp.callTool).not.toHaveBeenCalled();
expect(mcp.close).toHaveBeenCalled();
});
it('executes a tool call, feeds the result back, and terminates on the next assistant turn', async () => {
const mcp = makeMcp({
listTools: vi.fn(async () => ({
tools: [{ name: 'search', description: 'search the docs', inputSchema: { type: 'object' } }],
})),
callTool: vi.fn(async () => ({ content: [{ type: 'text', text: 'a matching doc' }] })),
});
const llm = makeLlm([
{ tool_calls: [{ id: 'call-1', name: 'search', arguments: '{"q":"foo"}' }] },
{ content: 'final answer based on tool result' },
]);
const result = await runAgent('find foo', BASE_CONFIG, {
mcpClientFactory: async () => mcp,
llmClientFactory: () => llm,
});
expect(result.finalText).toBe('final answer based on tool result');
expect(result.rounds).toBe(1);
expect(mcp.callTool).toHaveBeenCalledWith({ name: 'search', arguments: { q: 'foo' } });
// Messages should be: user → assistant (tool_calls) → tool → assistant (final)
expect(result.messages).toHaveLength(4);
expect(result.messages[0]!.role).toBe('user');
expect(result.messages[1]!.role).toBe('assistant');
expect(result.messages[2]!.role).toBe('tool');
expect(result.messages[3]!.role).toBe('assistant');
});
it('refetches tools/list between rounds to honor list_changed', async () => {
const listTools = vi.fn()
.mockResolvedValueOnce({ tools: [{ name: 'begin_session' }] })
.mockResolvedValueOnce({ tools: [{ name: 'begin_session' }, { name: 'search' }, { name: 'fetch' }] });
const mcp = makeMcp({ listTools });
const llm = makeLlm([
{ tool_calls: [{ id: 'c1', name: 'begin_session', arguments: '{}' }] },
{ content: 'done' },
]);
await runAgent('go', BASE_CONFIG, {
mcpClientFactory: async () => mcp,
llmClientFactory: () => llm,
});
// Called at startup + after each round (one round here)
expect(listTools).toHaveBeenCalledTimes(2);
// The second chat.completions.create call should have received all 3 tools
const secondCall = (llm.chat.completions.create as unknown as { mock: { calls: Array<Array<{ tools?: unknown[] }>> } }).mock.calls[1]!;
expect(secondCall[0].tools).toHaveLength(3);
});
it('stops after maxIterations and flags hitIterationLimit', async () => {
const mcp = makeMcp({
listTools: vi.fn(async () => ({ tools: [{ name: 'loop' }] })),
});
// Infinite tool-call stream
const llm: LlmLike = {
chat: {
completions: {
create: vi.fn(async () => ({
choices: [{
message: {
role: 'assistant',
content: null,
tool_calls: [{ id: 'x', type: 'function', function: { name: 'loop', arguments: '{}' } }],
},
finish_reason: 'tool_calls',
}],
})),
},
},
};
const result = await runAgent('trap me', { ...BASE_CONFIG, maxIterations: 3 }, {
mcpClientFactory: async () => mcp,
llmClientFactory: () => llm,
});
expect(result.hitIterationLimit).toBe(true);
expect(result.rounds).toBe(3);
});
it('serializes a failed tool call into the conversation instead of throwing', async () => {
const mcp = makeMcp({
listTools: vi.fn(async () => ({ tools: [{ name: 'fails' }] })),
callTool: vi.fn(async () => { throw new Error('upstream exploded'); }),
});
const llm = makeLlm([
{ tool_calls: [{ id: 'c1', name: 'fails', arguments: '{}' }] },
{ content: 'ok I saw the error, moving on' },
]);
const result = await runAgent('try the broken tool', BASE_CONFIG, {
mcpClientFactory: async () => mcp,
llmClientFactory: () => llm,
});
expect(result.finalText).toBe('ok I saw the error, moving on');
const toolMsg = result.messages.find((m) => m.role === 'tool');
expect(toolMsg).toBeDefined();
expect(String(toolMsg!.content)).toContain('upstream exploded');
});
it('prepends systemPrompt when supplied', async () => {
const mcp = makeMcp();
const llm = makeLlm([{ content: 'fine' }]);
await runAgent('hi', { ...BASE_CONFIG, systemPrompt: 'you are a helpful assistant' }, {
mcpClientFactory: async () => mcp,
llmClientFactory: () => llm,
});
const call = (llm.chat.completions.create as unknown as { mock: { calls: Array<Array<{ messages: Array<{ role: string; content: unknown }> }>> } }).mock.calls[0]![0];
expect(call.messages[0]).toEqual({ role: 'system', content: 'you are a helpful assistant' });
expect(call.messages[1]).toEqual({ role: 'user', content: 'hi' });
});
it('closes the MCP client even when the loop throws', async () => {
const mcp = makeMcp({
listTools: vi.fn(async () => { throw new Error('mcp dead'); }),
});
const llm = makeLlm([]);
await expect(runAgent('x', BASE_CONFIG, {
mcpClientFactory: async () => mcp,
llmClientFactory: () => llm,
})).rejects.toThrow('mcp dead');
expect(mcp.close).toHaveBeenCalled();
});
});

12
src/agent/tsconfig.json Normal file
View File

@@ -0,0 +1,12 @@
{
"extends": "../../tsconfig.base.json",
"compilerOptions": {
"rootDir": "src",
"outDir": "dist",
"types": ["node"]
},
"include": ["src/**/*.ts"],
"references": [
{ "path": "../shared" }
]
}

View File

@@ -41,11 +41,6 @@ export class AuditCollector {
this.sessionPrincipals.set(sessionId, { ...existing, tokenName: token.tokenName, tokenSha: token.tokenSha });
}
/** Look up the McpToken SHA for a session. Returns undefined for non-HTTP-mode sessions. */
getSessionMcpTokenSha(sessionId: string): string | undefined {
return this.sessionPrincipals.get(sessionId)?.tokenSha;
}
/** Queue an audit event. Auto-fills projectName, userName, tokenName, and tokenSha. */
emit(event: Omit<AuditEvent, 'projectName'>): void {
const enriched: AuditEvent = { ...event, projectName: this.projectName };

View File

@@ -3,21 +3,6 @@
*
* Tracks whether a session has gone through the prompt selection flow.
* When gated, only begin_session is accessible. After ungating, all tools work.
*
* Per-token ungate cache:
* When the caller authenticated via an `McpToken` (HTTP-mode service agent),
* we also remember the ungate keyed on the token's SHA. Subsequent sessions
* from the same token automatically start ungated for a TTL window.
*
* Why: LiteLLM and similar MCP-proxying clients don't preserve the
* `mcp-session-id` header across chat completion calls, so every tool call
* lands on a fresh upstream session — which would otherwise be gated anew,
* forcing the agent into a begin_session loop. Keying on the token (which IS
* preserved, because it's in the Authorization header) gives us a stable
* identity that survives stateless proxies.
*
* Claude Code's stdio path keeps its session-id, so this code is a no-op for
* that case (session-id ungate still applies, token ungate is purely additive).
*/
import type { PromptIndexEntry, TagMatchResult } from './tag-matcher.js';
@@ -29,37 +14,15 @@ export interface SessionState {
briefing: string | null;
}
interface TokenUngateEntry {
tokenSha: string;
tags: string[];
ungatedAt: number;
retrievedPrompts: Set<string>;
}
/** Default TTL for per-token ungate cache (1 hour). Tunable via env for testing. */
const DEFAULT_TOKEN_UNGATE_TTL_MS = Number(process.env['MCPLOCAL_TOKEN_UNGATE_TTL_MS']) || 60 * 60 * 1000;
export class SessionGate {
private sessions = new Map<string, SessionState>();
private tokenUngates = new Map<string, TokenUngateEntry>();
private readonly tokenUngateTtlMs: number;
constructor(tokenUngateTtlMs = DEFAULT_TOKEN_UNGATE_TTL_MS) {
this.tokenUngateTtlMs = tokenUngateTtlMs;
}
/**
* Create a new session. Starts gated if the project is gated, UNLESS the
* caller's McpToken already ungated within the last TTL window — in which
* case the session inherits the previous tags + retrievedPrompts so the
* agent doesn't get the full gated greeting on every fresh session.
*/
createSession(sessionId: string, projectGated: boolean, tokenSha?: string): void {
const priorEntry = tokenSha ? this.getActiveTokenEntry(tokenSha) : null;
/** Create a new session. Starts gated if the project is gated. */
createSession(sessionId: string, projectGated: boolean): void {
this.sessions.set(sessionId, {
gated: projectGated && priorEntry === null,
tags: priorEntry ? [...priorEntry.tags] : [],
retrievedPrompts: priorEntry ? new Set(priorEntry.retrievedPrompts) : new Set(),
gated: projectGated,
tags: [],
retrievedPrompts: new Set(),
briefing: null,
});
}
@@ -74,37 +37,18 @@ export class SessionGate {
return this.sessions.get(sessionId)?.gated ?? false;
}
/** True when a token has an active (non-expired) ungate entry. */
isTokenUngated(tokenSha: string): boolean {
return this.getActiveTokenEntry(tokenSha) !== null;
}
/**
* Ungate a session after prompt selection is complete.
*
* When `tokenSha` is supplied, also remember the ungate keyed on the token
* so future sessions from the same token start ungated (survives proxies
* that drop `mcp-session-id`).
*/
ungate(sessionId: string, tags: string[], matchResult: TagMatchResult, tokenSha?: string): void {
/** Ungate a session after prompt selection is complete. */
ungate(sessionId: string, tags: string[], matchResult: TagMatchResult): void {
const session = this.sessions.get(sessionId);
if (!session) return;
session.gated = false;
session.tags = [...session.tags, ...tags];
// Track which prompts have been sent
for (const p of matchResult.fullContent) {
session.retrievedPrompts.add(p.name);
}
if (tokenSha !== undefined && tokenSha !== '') {
this.tokenUngates.set(tokenSha, {
tokenSha,
tags: [...session.tags],
ungatedAt: Date.now(),
retrievedPrompts: new Set(session.retrievedPrompts),
});
}
}
/** Record additional prompts retrieved via read_prompts. */
@@ -129,19 +73,4 @@ export class SessionGate {
removeSession(sessionId: string): void {
this.sessions.delete(sessionId);
}
/** Forget a token's ungate entry (e.g. on revocation signal). */
revokeToken(tokenSha: string): void {
this.tokenUngates.delete(tokenSha);
}
private getActiveTokenEntry(tokenSha: string): TokenUngateEntry | null {
const entry = this.tokenUngates.get(tokenSha);
if (!entry) return null;
if (Date.now() - entry.ungatedAt > this.tokenUngateTtlMs) {
this.tokenUngates.delete(tokenSha);
return null;
}
return entry;
}
}

View File

@@ -25,13 +25,6 @@ export interface PluginContextDeps {
queueNotification: (notification: JsonRpcNotification) => void;
postToMcpd: (path: string, body: Record<string, unknown>) => Promise<unknown>;
auditCollector?: AuditCollector;
/**
* Resolves the principal's McpToken SHA for this session, if the caller
* authenticated via an McpToken. Called lazily so the value reflects the
* session's current state even when the token is attached after the plugin
* context is created.
*/
getMcpTokenSha?: () => string | undefined;
}
/**
@@ -62,11 +55,6 @@ export class PluginContextImpl implements PluginSessionContext {
this.deps = deps;
}
/** McpToken SHA for the current caller, or undefined for STDIO/session-auth callers. */
getMcpTokenSha(): string | undefined {
return this.deps.getMcpTokenSha?.();
}
registerTool(tool: ToolDefinition, handler: VirtualToolHandler): void {
this.virtualTools.set(tool.name, { definition: tool, handler });
}

View File

@@ -50,14 +50,6 @@ export interface PluginSessionContext {
// Audit event emission (auto-fills sessionId and projectName)
emitAuditEvent(event: Omit<AuditEvent, 'sessionId' | 'projectName'>): void;
/**
* McpToken SHA for the current caller, or undefined if the session was
* authenticated via a User session (STDIO/Claude Code path). Plugins can use
* this to key state on the token principal rather than the session-id —
* useful when the session-id doesn't survive a proxy (e.g. LiteLLM).
*/
getMcpTokenSha(): string | undefined;
}
// ── Virtual Server ──────────────────────────────────────────────────

View File

@@ -40,11 +40,7 @@ export function createGatePlugin(config: GatePluginConfig = {}): ProxyModelPlugi
description: 'Gated session flow: begin_session → prompt selection → ungate.',
async onSessionCreate(ctx) {
// Pass the caller's McpToken SHA so the gate can honor a cross-session
// ungate cache keyed on the token principal. Fixes the LiteLLM case where
// each tool call lands on a fresh mcp-session-id → would otherwise loop
// on begin_session forever.
sessionGate.createSession(ctx.sessionId, isGated, ctx.getMcpTokenSha());
sessionGate.createSession(ctx.sessionId, isGated);
// Register begin_session virtual tool
ctx.registerTool(getBeginSessionTool(llmSelector), async (args, callCtx) => {
@@ -268,9 +264,8 @@ async function handleBeginSession(
matchResult = tagMatcher.match(tags, promptIndex);
}
// Ungate the session (and remember the ungate per McpToken if this is a
// service-token request, so the next session from the same token skips the gate).
sessionGate.ungate(ctx.sessionId, tags, matchResult, ctx.getMcpTokenSha());
// Ungate the session
sessionGate.ungate(ctx.sessionId, tags, matchResult);
ctx.queueNotification('notifications/tools/list_changed');
// Audit: gate_decision for begin_session
@@ -456,8 +451,8 @@ async function handleGatedIntercept(
const promptIndex = await ctx.fetchPromptIndex();
const matchResult = tagMatcher.match(tags, promptIndex);
// Ungate the session (and remember per-token if the caller is a McpToken).
sessionGate.ungate(ctx.sessionId, tags, matchResult, ctx.getMcpTokenSha());
// Ungate the session
sessionGate.ungate(ctx.sessionId, tags, matchResult);
ctx.queueNotification('notifications/tools/list_changed');
// Audit: gate_decision for auto-intercept
@@ -527,7 +522,7 @@ async function handleGatedIntercept(
return response;
} catch {
// If prompt retrieval fails, just ungate and route normally
sessionGate.ungate(ctx.sessionId, tags, { fullContent: [], indexOnly: [], remaining: [] }, ctx.getMcpTokenSha());
sessionGate.ungate(ctx.sessionId, tags, { fullContent: [], indexOnly: [], remaining: [] });
ctx.queueNotification('notifications/tools/list_changed');
return ctx.routeToUpstream(request);
}

View File

@@ -198,10 +198,6 @@ export class McpRouter {
return this.mcpdClient.post(path, body);
},
...(this.auditCollector ? { auditCollector: this.auditCollector } : {}),
// Lazily resolve the caller's McpToken SHA via the audit collector's
// session principal map. The token is attached in onsessioninitialized,
// which runs before any plugin context is created, so this is stable.
getMcpTokenSha: () => this.auditCollector?.getSessionMcpTokenSha(sessionId),
};
ctx = new PluginContextImpl(deps);

View File

@@ -152,76 +152,4 @@ describe('SessionGate', () => {
expect(gate.isGated('s1')).toBe(false);
expect(gate.getSession('s2')!.tags).toEqual([]); // s2 untouched
});
describe('per-McpToken ungate cache', () => {
it('new session from an already-ungated token starts ungated, with prior tags + prompts', () => {
const gate = new SessionGate();
gate.createSession('session-1', true, 'tokA');
expect(gate.isGated('session-1')).toBe(true);
gate.ungate('session-1', ['ops'], makeMatchResult(['runbook']), 'tokA');
expect(gate.isTokenUngated('tokA')).toBe(true);
// LiteLLM semantics: same token, brand-new session-id.
gate.createSession('session-2', true, 'tokA');
expect(gate.isGated('session-2')).toBe(false);
const s2 = gate.getSession('session-2')!;
expect(s2.tags).toContain('ops');
expect(s2.retrievedPrompts.has('runbook')).toBe(true);
});
it('does not persist across tokens', () => {
const gate = new SessionGate();
gate.createSession('s1', true, 'tokA');
gate.ungate('s1', ['ops'], makeMatchResult(['p']), 'tokA');
// Different token → fresh gated session.
gate.createSession('s2', true, 'tokB');
expect(gate.isGated('s2')).toBe(true);
expect(gate.isTokenUngated('tokB')).toBe(false);
});
it('is not triggered when no tokenSha is supplied (STDIO path)', () => {
const gate = new SessionGate();
gate.createSession('s1', true);
gate.ungate('s1', ['ops'], makeMatchResult(['p']));
// A second session with no token starts gated — STDIO semantics preserved.
gate.createSession('s2', true);
expect(gate.isGated('s2')).toBe(true);
});
it('honors the TTL window and expires', () => {
const gate = new SessionGate(50); // 50ms TTL for the test
gate.createSession('s1', true, 'tokA');
gate.ungate('s1', ['ops'], makeMatchResult(['p']), 'tokA');
expect(gate.isTokenUngated('tokA')).toBe(true);
return new Promise<void>((resolve) => setTimeout(() => {
expect(gate.isTokenUngated('tokA')).toBe(false);
gate.createSession('s2', true, 'tokA');
expect(gate.isGated('s2')).toBe(true);
resolve();
}, 70));
});
it('revokeToken clears the ungate entry immediately', () => {
const gate = new SessionGate();
gate.createSession('s1', true, 'tokA');
gate.ungate('s1', ['ops'], makeMatchResult(['p']), 'tokA');
expect(gate.isTokenUngated('tokA')).toBe(true);
gate.revokeToken('tokA');
expect(gate.isTokenUngated('tokA')).toBe(false);
gate.createSession('s2', true, 'tokA');
expect(gate.isGated('s2')).toBe(true);
});
it('empty-string tokenSha does not register an ungate entry', () => {
const gate = new SessionGate();
gate.createSession('s1', true, '');
gate.ungate('s1', ['ops'], makeMatchResult(['p']), '');
expect(gate.isTokenUngated('')).toBe(false);
});
});
});