From 3a28128fb43a8568f371866da50922bfd0c01fc0 Mon Sep 17 00:00:00 2001 From: Michal Date: Sat, 18 Apr 2026 18:24:29 +0100 Subject: [PATCH] feat(agent): MCP-correct chat agent shim on top of LiteLLM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New package @mcpctl/agent that replaces LiteLLM's broken MCP integration (dropped Mcp-Session-Id, ignored tools/list_changed) with a thin ~200 LOC loop built on @modelcontextprotocol/sdk + openai SDK. LiteLLM stays in its actual lane — OpenAI-compatible model routing — and this agent handles MCP correctly. Core (src/agent.ts): - StreamableHTTPClientTransport for MCP (auto-preserves Mcp-Session-Id). - Re-fetches tools/list at the top of every loop so list_changed notifications surface new tools to the model on the next turn (fixes the gated-session case: begin_session reveals the full upstream tool set, next round's inference sees all of them). - OpenAI-compatible inference via process.env.AGENT_LLM_BASE_URL — points at LiteLLM or vLLM directly. - Graceful failure: broken tool calls are serialized back into the conversation as the tool's response, agent keeps going. - maxIterations cap stops runaway loops; hitIterationLimit surfaces truncation in the result. - Structural `McpLike` / `LlmLike` interfaces keep the loop testable without booting real SDKs. CLI (src/cli.ts): mcpctl-agent run "" \ --model qwen3-thinking --project sre \ [--system "..."] [--max-iterations N] [-o text|json] [--verbose] Env fallbacks: AGENT_MCP_URL, AGENT_MCP_TOKEN, AGENT_LLM_BASE_URL, AGENT_LLM_API_KEY, AGENT_MODEL Tests (7 cases): - direct answer (no tool call) → ok - single-round tool call + synthesis → message history correct - list_changed refresh: tools/list called at startup + after each round → next inference sees newly-exposed tools - maxIterations cap → hitIterationLimit flag set - failing tool → error serialized into conversation, agent recovers - systemPrompt prepended - mcp.close() runs even when loop throws (finally-block guarantee) End-to-end verified against live cluster: Round 1: sees 1 tool (begin_session) → calls it Round 2: sees 115 tools (gate opened) → calls aws-docs/search_documentation Final: model synthesizes answer — LiteLLM's chat UI cannot do this today; this loop does. Still to do (follow-up PRs): - Wire into mcpctl binary as `mcpctl agent run ...` - Docker image + Pulumi deploy for a long-running HTTP service mode - Minimal chat UI (HTMX or plain fetch) - Streaming responses Co-Authored-By: Claude Opus 4.7 (1M context) --- pnpm-lock.yaml | 105 ++++++++++++++++++ src/agent/package.json | 28 +++++ src/agent/src/agent.ts | 201 ++++++++++++++++++++++++++++++++++ src/agent/src/cli.ts | 107 ++++++++++++++++++ src/agent/src/index.ts | 2 + src/agent/tests/agent.test.ts | 180 ++++++++++++++++++++++++++++++ src/agent/tsconfig.json | 12 ++ 7 files changed, 635 insertions(+) create mode 100644 src/agent/package.json create mode 100644 src/agent/src/agent.ts create mode 100644 src/agent/src/cli.ts create mode 100644 src/agent/src/index.ts create mode 100644 src/agent/tests/agent.test.ts create mode 100644 src/agent/tsconfig.json diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index ade254b..5e632cb 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -39,6 +39,28 @@ importers: specifier: ^4.0.18 version: 4.0.18(@types/node@25.3.0)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2) + src/agent: + dependencies: + '@mcpctl/shared': + specifier: workspace:* + version: link:../shared + '@modelcontextprotocol/sdk': + specifier: ^1.0.0 + version: 1.26.0(zod@3.25.76) + commander: + specifier: ^13.0.0 + version: 13.1.0 + openai: + specifier: ^4.77.0 + version: 4.104.0(ws@8.19.0)(zod@3.25.76) + devDependencies: + '@types/node': + specifier: ^25.3.0 + version: 25.3.0 + vitest: + specifier: ^4.0.0 + version: 4.0.18(@types/node@25.3.0)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2) + src/cli: dependencies: '@inkjs/ui': @@ -989,6 +1011,10 @@ packages: abbrev@1.1.1: resolution: {integrity: sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q==} + abort-controller@3.0.0: + resolution: {integrity: sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==} + engines: {node: '>=6.5'} + abstract-logging@2.0.1: resolution: {integrity: sha512-2BjRTZxTPvheOvGbBslFSYOUkr+SjPtOnrLP33f+VIWLzezQpZcqVg7ja3L4dBXmzzgwT+a029jRx5PCi3JuiA==} @@ -1014,6 +1040,10 @@ packages: resolution: {integrity: sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==} engines: {node: '>= 14'} + agentkeepalive@4.6.0: + resolution: {integrity: sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ==} + engines: {node: '>= 8.0.0'} + ajv-formats@3.0.1: resolution: {integrity: sha512-8iUql50EUR+uUcdRQ3HDqa6EVyo3docL8g5WJ3FNcWmu62IbkGUue/pEyLBW8VGKKucTPgqeks4fIU1DA4yowQ==} peerDependencies: @@ -1509,6 +1539,10 @@ packages: resolution: {integrity: sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==} engines: {node: '>= 0.6'} + event-target-shim@5.0.1: + resolution: {integrity: sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==} + engines: {node: '>=6'} + events-universal@1.0.1: resolution: {integrity: sha512-LUd5euvbMLpwOF8m6ivPCbhQeSiYVNb8Vs0fQ8QjXo0JTkEHpz8pxdQf0gStltaPpw0Cca8b39KxvK9cfKRiAw==} @@ -1610,10 +1644,17 @@ packages: flatted@3.3.3: resolution: {integrity: sha512-GX+ysw4PBCz0PzosHDepZGANEuFCMLrnRTiEy9McGjmkCQYwRq4A/X786G/fjM/+OjsWSU1ZrY5qyARZmO/uwg==} + form-data-encoder@1.7.2: + resolution: {integrity: sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==} + form-data@4.0.5: resolution: {integrity: sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==} engines: {node: '>= 6'} + formdata-node@4.4.1: + resolution: {integrity: sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==} + engines: {node: '>= 12.20'} + forwarded@0.2.0: resolution: {integrity: sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==} engines: {node: '>= 0.6'} @@ -1726,6 +1767,9 @@ packages: resolution: {integrity: sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==} engines: {node: '>= 6'} + humanize-ms@1.2.1: + resolution: {integrity: sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==} + iconv-lite@0.7.2: resolution: {integrity: sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw==} engines: {node: '>=0.10.0'} @@ -2012,6 +2056,11 @@ packages: node-addon-api@5.1.0: resolution: {integrity: sha512-eh0GgfEkpnoWDq+VY8OyvYhFEzBk6jIYbRKdIlyTiAXIVJ8PyBaKb0rp7oDtoddbdoHWhq8wwr+XZ81F1rpNdA==} + node-domexception@1.0.0: + resolution: {integrity: sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==} + engines: {node: '>=10.5.0'} + deprecated: Use your platform's native DOMException instead + node-fetch-native@1.6.7: resolution: {integrity: sha512-g9yhqoedzIUm0nTnTqAQvueMPVOuIY16bqgAJJC8XOOubYFNwz6IER9qs0Gq2Xd0+CecCKFjtdDTMA4u4xG06Q==} @@ -2073,6 +2122,18 @@ packages: resolution: {integrity: sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==} engines: {node: '>=6'} + openai@4.104.0: + resolution: {integrity: sha512-p99EFNsA/yX6UhVO93f5kJsDRLAg+CTA2RBqdHK4RtK8u5IJw32Hyb2dTGKbnnFmnuoBv5r7Z2CURI9sGZpSuA==} + hasBin: true + peerDependencies: + ws: ^8.18.0 + zod: ^3.23.8 + peerDependenciesMeta: + ws: + optional: true + zod: + optional: true + openid-client@6.8.2: resolution: {integrity: sha512-uOvTCndr4udZsKihJ68H9bUICrriHdUVJ6Az+4Ns6cW55rwM5h0bjVIzDz2SxgOI84LKjFyjOFvERLzdTUROGA==} @@ -2647,6 +2708,10 @@ packages: jsdom: optional: true + web-streams-polyfill@4.0.0-beta.3: + resolution: {integrity: sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==} + engines: {node: '>= 14'} + webidl-conversions@3.0.1: resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==} @@ -3509,6 +3574,10 @@ snapshots: abbrev@1.1.1: {} + abort-controller@3.0.0: + dependencies: + event-target-shim: 5.0.1 + abstract-logging@2.0.1: {} accepts@2.0.0: @@ -3530,6 +3599,10 @@ snapshots: agent-base@7.1.4: {} + agentkeepalive@4.6.0: + dependencies: + humanize-ms: 1.2.1 + ajv-formats@3.0.1(ajv@8.18.0): optionalDependencies: ajv: 8.18.0 @@ -4020,6 +4093,8 @@ snapshots: etag@1.8.1: {} + event-target-shim@5.0.1: {} + events-universal@1.0.1: dependencies: bare-events: 2.8.2 @@ -4168,6 +4243,8 @@ snapshots: flatted@3.3.3: {} + form-data-encoder@1.7.2: {} + form-data@4.0.5: dependencies: asynckit: 0.4.0 @@ -4176,6 +4253,11 @@ snapshots: hasown: 2.0.2 mime-types: 2.1.35 + formdata-node@4.4.1: + dependencies: + node-domexception: 1.0.0 + web-streams-polyfill: 4.0.0-beta.3 + forwarded@0.2.0: {} fresh@2.0.0: {} @@ -4298,6 +4380,10 @@ snapshots: transitivePeerDependencies: - supports-color + humanize-ms@1.2.1: + dependencies: + ms: 2.1.3 + iconv-lite@0.7.2: dependencies: safer-buffer: 2.1.2 @@ -4551,6 +4637,8 @@ snapshots: node-addon-api@5.1.0: {} + node-domexception@1.0.0: {} + node-fetch-native@1.6.7: {} node-fetch@2.7.0: @@ -4600,6 +4688,21 @@ snapshots: dependencies: mimic-fn: 2.1.0 + openai@4.104.0(ws@8.19.0)(zod@3.25.76): + dependencies: + '@types/node': 18.19.130 + '@types/node-fetch': 2.6.13 + abort-controller: 3.0.0 + agentkeepalive: 4.6.0 + form-data-encoder: 1.7.2 + formdata-node: 4.4.1 + node-fetch: 2.7.0 + optionalDependencies: + ws: 8.19.0 + zod: 3.25.76 + transitivePeerDependencies: + - encoding + openid-client@6.8.2: dependencies: jose: 6.1.3 @@ -5211,6 +5314,8 @@ snapshots: - tsx - yaml + web-streams-polyfill@4.0.0-beta.3: {} + webidl-conversions@3.0.1: {} whatwg-url@5.0.0: diff --git a/src/agent/package.json b/src/agent/package.json new file mode 100644 index 0000000..2ca44e4 --- /dev/null +++ b/src/agent/package.json @@ -0,0 +1,28 @@ +{ + "name": "@mcpctl/agent", + "version": "0.0.1", + "private": true, + "type": "module", + "main": "./dist/index.js", + "types": "./dist/index.d.ts", + "bin": { + "mcpctl-agent": "./dist/cli.js" + }, + "scripts": { + "build": "tsc --build", + "clean": "rimraf dist", + "run": "node dist/cli.js", + "test": "vitest", + "test:run": "vitest run" + }, + "dependencies": { + "@mcpctl/shared": "workspace:*", + "@modelcontextprotocol/sdk": "^1.0.0", + "commander": "^13.0.0", + "openai": "^4.77.0" + }, + "devDependencies": { + "@types/node": "^25.3.0", + "vitest": "^4.0.0" + } +} diff --git a/src/agent/src/agent.ts b/src/agent/src/agent.ts new file mode 100644 index 0000000..f702114 --- /dev/null +++ b/src/agent/src/agent.ts @@ -0,0 +1,201 @@ +/** + * MCP-aware chat agent loop. + * + * Correct where LiteLLM's integration is broken: + * - Uses `@modelcontextprotocol/sdk`'s `StreamableHTTPClientTransport`, which + * preserves `Mcp-Session-Id` across requests automatically. + * - Honors `notifications/tools/list_changed`: after every tool-call round we + * re-fetch the tool list before the next model inference, so an MCP server + * that reveals new tools mid-session (gated sessions, auto-install) shows + * them to the model on the next turn. + * + * Inference goes through an OpenAI-compatible endpoint (LiteLLM at + * http://litellm…:4000/v1 in this repo's deployment; vLLM works too). That + * keeps LiteLLM doing its actual job — model routing — and strips it of the + * MCP role it was failing at. + */ +import { Client } from '@modelcontextprotocol/sdk/client/index.js'; +import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js'; +import OpenAI from 'openai'; +import type { + ChatCompletionMessageParam, + ChatCompletionTool, + ChatCompletionMessageToolCall, +} from 'openai/resources/chat/completions'; + +export interface AgentConfig { + /** Full URL of the MCP endpoint, e.g. http://mcp.mcpctl.svc:3200/projects/sre/mcp */ + mcpUrl: string; + /** Raw `mcpctl_pat_…` bearer for the MCP endpoint. */ + mcpToken: string; + /** OpenAI-compatible base URL, e.g. http://litellm…:4000/v1 */ + llmBaseUrl: string; + /** API key for the OpenAI-compatible endpoint (LiteLLM master key). */ + llmApiKey: string; + /** Model name as known to the OpenAI endpoint, e.g. qwen3-thinking */ + model: string; + /** Optional system prompt (prepended as `role:'system'` if given). */ + systemPrompt?: string; + /** Hard cap on loop iterations; stops runaway agents. Default 20. */ + maxIterations?: number; + /** Per-tool-call timeout ms passed to the MCP SDK. Default 60_000. */ + toolTimeoutMs?: number; +} + +export interface AgentDeps { + /** Injectable for tests. Creates the MCP Client + transport. */ + mcpClientFactory?: (cfg: AgentConfig) => Promise; + /** Injectable for tests. Creates the OpenAI-compatible client. */ + llmClientFactory?: (cfg: AgentConfig) => LlmLike; + /** Optional per-iteration logger (stdout, audit sink, etc.). */ + log?: (line: string) => void; +} + +/** + * Structural typing for the MCP client surface we actually use. Keeps the + * loop testable without importing the concrete SDK in test fixtures. Optional + * fields are `T | undefined` (not `T?`) to stay compatible with the MCP SDK's + * own types under `exactOptionalPropertyTypes`. + */ +export interface McpLike { + listTools(): Promise<{ tools: Array<{ name: string; description?: string | undefined; inputSchema?: unknown }> }>; + callTool(args: { name: string; arguments: Record }): Promise; + close(): Promise; +} + +export interface LlmLike { + chat: { + completions: { + create(body: { + model: string; + messages: ChatCompletionMessageParam[]; + tools?: ChatCompletionTool[]; + tool_choice?: 'auto' | 'none' | { type: 'function'; function: { name: string } }; + }): Promise<{ choices: Array<{ message: { role: 'assistant'; content: string | null; tool_calls?: ChatCompletionMessageToolCall[] }; finish_reason?: string | null }> }>; + }; + }; +} + +export interface AgentResult { + /** The final assistant message (after all tool-call rounds). */ + finalText: string; + /** Full message history, useful for eval + debugging. */ + messages: ChatCompletionMessageParam[]; + /** Number of tool-call rounds that ran. Zero if the model answered directly. */ + rounds: number; + /** True if the loop terminated because `maxIterations` was hit. */ + hitIterationLimit: boolean; +} + +export async function runAgent(prompt: string, config: AgentConfig, deps: AgentDeps = {}): Promise { + const log = deps.log ?? (() => { /* silent */ }); + const maxIterations = config.maxIterations ?? 20; + + const mcp = await (deps.mcpClientFactory ?? defaultMcpFactory)(config); + try { + const llm = (deps.llmClientFactory ?? defaultLlmFactory)(config); + + const messages: ChatCompletionMessageParam[] = []; + if (config.systemPrompt) messages.push({ role: 'system', content: config.systemPrompt }); + messages.push({ role: 'user', content: prompt }); + + let tools = toOpenAiTools(await mcp.listTools()); + log(`[agent] starting with ${tools.length} MCP tools`); + + let rounds = 0; + for (let i = 0; i < maxIterations; i++) { + const body: Parameters[0] = { + model: config.model, + messages, + }; + if (tools.length > 0) { + body.tools = tools; + body.tool_choice = 'auto'; + } + const reply = await llm.chat.completions.create(body); + const msg = reply.choices[0]!.message; + messages.push(msg); + + const toolCalls = msg.tool_calls ?? []; + if (toolCalls.length === 0) { + log(`[agent] done after ${rounds} tool-call round(s)`); + return { finalText: msg.content ?? '', messages, rounds, hitIterationLimit: false }; + } + + rounds++; + log(`[agent] round ${rounds}: model asked to call ${toolCalls.length} tool(s)`); + + for (const tc of toolCalls) { + const name = tc.function.name; + let args: Record = {}; + try { + args = tc.function.arguments ? JSON.parse(tc.function.arguments) as Record : {}; + } catch (err) { + log(`[agent] tool ${name}: could not parse arguments (${(err as Error).message}) — sending empty args`); + } + log(`[agent] → ${name}(${truncate(JSON.stringify(args), 120)})`); + let result: unknown; + try { + result = await mcp.callTool({ name, arguments: args }); + } catch (err) { + result = { error: (err as Error).message }; + log(`[agent] ← ERROR: ${(err as Error).message}`); + } + messages.push({ + role: 'tool', + tool_call_id: tc.id, + content: typeof result === 'string' ? result : JSON.stringify(result), + }); + } + + // MCP server may have emitted notifications/tools/list_changed during a + // tool call (e.g. gated sessions revealing tools after begin_session). + // The SDK auto-notifies on that event; simplest correctness: re-fetch + // on every loop before the next inference so the model sees fresh tools. + tools = toOpenAiTools(await mcp.listTools()); + } + + log(`[agent] hit iteration limit (${maxIterations}) — returning partial`); + const last = messages[messages.length - 1]; + const tail = last && last.role === 'assistant' + ? (typeof last.content === 'string' ? last.content : '') + : ''; + return { finalText: tail, messages, rounds, hitIterationLimit: true }; + } finally { + await mcp.close().catch(() => { /* best-effort */ }); + } +} + +function toOpenAiTools(listed: { tools: Array<{ name: string; description?: string | undefined; inputSchema?: unknown }> }): ChatCompletionTool[] { + return listed.tools.map((t) => { + const fn: { name: string; description?: string; parameters?: Record } = { name: t.name }; + if (t.description !== undefined) fn.description = t.description; + if (t.inputSchema !== undefined) fn.parameters = t.inputSchema as Record; + return { type: 'function', function: fn } as ChatCompletionTool; + }); +} + +function truncate(s: string, n: number): string { + return s.length <= n ? s : `${s.slice(0, n - 1)}…`; +} + +async function defaultMcpFactory(cfg: AgentConfig): Promise { + const client = new Client({ name: 'mcpctl-agent', version: '0.0.1' }); + const transport = new StreamableHTTPClientTransport(new URL(cfg.mcpUrl), { + requestInit: { headers: { Authorization: `Bearer ${cfg.mcpToken}` } }, + }); + // The SDK's Transport interface declares `sessionId: string` while the + // Streamable-HTTP transport starts with `sessionId: undefined` until + // `initialize` populates it — that's legal at runtime but TS exactOptional + // rules reject the direct assignment. + await client.connect(transport as unknown as Parameters[0]); + return { + listTools: () => client.listTools() as Promise<{ tools: Array<{ name: string; description?: string | undefined; inputSchema?: unknown }> }>, + callTool: (args) => client.callTool(args), + close: () => client.close(), + }; +} + +function defaultLlmFactory(cfg: AgentConfig): LlmLike { + return new OpenAI({ baseURL: cfg.llmBaseUrl, apiKey: cfg.llmApiKey }) as unknown as LlmLike; +} diff --git a/src/agent/src/cli.ts b/src/agent/src/cli.ts new file mode 100644 index 0000000..1fb7e07 --- /dev/null +++ b/src/agent/src/cli.ts @@ -0,0 +1,107 @@ +#!/usr/bin/env node +/** + * `mcpctl-agent` CLI — standalone for now, will be wired into the mcpctl + * binary as `mcpctl agent run …` in a follow-up so the main CLI's permission + * model + completions pipeline can pick it up. + * + * Usage: + * mcpctl-agent run "analyse last week's slow grafana queries" \ + * --model qwen3-thinking \ + * --project sre + * + * Env reads (these are the same shape we'd mount from a k8s Secret/ConfigMap + * in the follow-up serve mode): + * AGENT_MCP_URL e.g. https://mcp.ad.itaz.eu/projects/sre/mcp + * AGENT_MCP_TOKEN mcpctl_pat_… + * AGENT_LLM_BASE_URL e.g. http://litellm.nvidia-nim.svc.cluster.local:4000/v1 + * AGENT_LLM_API_KEY LiteLLM master key + * AGENT_MODEL default model (overridable with --model) + */ +import { Command } from 'commander'; +import { runAgent, type AgentConfig } from './agent.js'; + +const program = new Command(); + +program + .name('mcpctl-agent') + .description('MCP-correct chat agent (preserves Mcp-Session-Id, honors tools/list_changed)') + .version('0.0.1'); + +program + .command('run ') + .description('One-shot: send a prompt, let the agent use MCP tools until it answers, print the final text') + .option('--mcp-url ', 'MCP endpoint URL (default: $AGENT_MCP_URL)') + .option('--mcp-token ', 'MCP bearer token (default: $AGENT_MCP_TOKEN)') + .option('--llm-base-url ', 'OpenAI-compatible endpoint (default: $AGENT_LLM_BASE_URL)') + .option('--llm-api-key ', 'API key (default: $AGENT_LLM_API_KEY)') + .option('--model ', 'Model to use (default: $AGENT_MODEL)') + .option('--project ', 'Override the MCP URL path to /projects//mcp against the base at $AGENT_MCP_URL') + .option('--system ', 'System prompt (prepended)') + .option('--max-iterations ', 'Max tool-call rounds (default 20)', '20') + .option('-o, --output ', 'Output format: text | json', 'text') + .option('--verbose', 'Log each loop iteration to stderr') + .action(async (prompt: string, opts: { + mcpUrl?: string; + mcpToken?: string; + llmBaseUrl?: string; + llmApiKey?: string; + model?: string; + project?: string; + system?: string; + maxIterations: string; + output: string; + verbose?: boolean; + }) => { + const mcpUrl = resolveMcpUrl(opts.mcpUrl, opts.project); + const cfg: AgentConfig = { + mcpUrl, + mcpToken: required('--mcp-token / $AGENT_MCP_TOKEN', opts.mcpToken ?? process.env.AGENT_MCP_TOKEN), + llmBaseUrl: required('--llm-base-url / $AGENT_LLM_BASE_URL', opts.llmBaseUrl ?? process.env.AGENT_LLM_BASE_URL), + llmApiKey: required('--llm-api-key / $AGENT_LLM_API_KEY', opts.llmApiKey ?? process.env.AGENT_LLM_API_KEY), + model: required('--model / $AGENT_MODEL', opts.model ?? process.env.AGENT_MODEL), + maxIterations: Number(opts.maxIterations), + }; + if (opts.system !== undefined) cfg.systemPrompt = opts.system; + + const logFn = opts.verbose + ? (line: string) => process.stderr.write(`${line}\n`) + : () => { /* silent */ }; + + const result = await runAgent(prompt, cfg, { log: logFn }); + + if (opts.output === 'json') { + process.stdout.write(`${JSON.stringify({ + finalText: result.finalText, + rounds: result.rounds, + hitIterationLimit: result.hitIterationLimit, + messages: result.messages, + }, null, 2)}\n`); + } else { + process.stdout.write(`${result.finalText}\n`); + if (result.hitIterationLimit) process.stderr.write('[agent] hit --max-iterations limit; output may be incomplete\n'); + } + }); + +program.parseAsync(process.argv).catch((err: unknown) => { + const msg = err instanceof Error ? err.message : String(err); + process.stderr.write(`error: ${msg}\n`); + process.exit(1); +}); + +function resolveMcpUrl(flag: string | undefined, project: string | undefined): string { + const base = flag ?? process.env.AGENT_MCP_URL; + if (!base) throw new Error('--mcp-url or $AGENT_MCP_URL is required'); + if (project === undefined) return base; + // If user supplied --project and the URL already ends with /projects//mcp, + // replace the segment; otherwise treat the base as an origin and append. + const existingMatch = base.match(/^(.+?)\/projects\/[^/]+\/mcp\/?$/); + if (existingMatch) return `${existingMatch[1]}/projects/${encodeURIComponent(project)}/mcp`; + return `${base.replace(/\/+$/, '')}/projects/${encodeURIComponent(project)}/mcp`; +} + +function required(label: string, value: T | undefined | null): T { + if (value === undefined || value === null || value === '') { + throw new Error(`${label} is required`); + } + return value; +} diff --git a/src/agent/src/index.ts b/src/agent/src/index.ts new file mode 100644 index 0000000..fd83970 --- /dev/null +++ b/src/agent/src/index.ts @@ -0,0 +1,2 @@ +export { runAgent } from './agent.js'; +export type { AgentConfig, AgentDeps, AgentResult, McpLike, LlmLike } from './agent.js'; diff --git a/src/agent/tests/agent.test.ts b/src/agent/tests/agent.test.ts new file mode 100644 index 0000000..5565d05 --- /dev/null +++ b/src/agent/tests/agent.test.ts @@ -0,0 +1,180 @@ +import { describe, it, expect, vi } from 'vitest'; +import { runAgent, type AgentConfig, type LlmLike, type McpLike } from '../src/agent.js'; + +const BASE_CONFIG: AgentConfig = { + mcpUrl: 'http://mcp.example/projects/x/mcp', + mcpToken: 'mcpctl_pat_test', + llmBaseUrl: 'http://llm.example/v1', + llmApiKey: 'test', + model: 'qwen3-thinking', +}; + +function makeMcp(overrides: Partial = {}): McpLike { + return { + listTools: vi.fn(async () => ({ tools: [] })), + callTool: vi.fn(async () => ({ content: [{ type: 'text', text: 'ok' }] })), + close: vi.fn(async () => { /* noop */ }), + ...overrides, + }; +} + +function makeLlm(replies: Array<{ content?: string | null; tool_calls?: Array<{ id: string; name: string; arguments: string }> }>): LlmLike { + const queue = [...replies]; + return { + chat: { + completions: { + create: vi.fn(async () => { + const next = queue.shift(); + if (!next) throw new Error('LLM mock exhausted'); + const message: { + role: 'assistant'; + content: string | null; + tool_calls?: Array<{ id: string; type: 'function'; function: { name: string; arguments: string } }>; + } = { role: 'assistant', content: next.content ?? null }; + if (next.tool_calls) { + message.tool_calls = next.tool_calls.map((tc) => ({ + id: tc.id, + type: 'function' as const, + function: { name: tc.name, arguments: tc.arguments }, + })); + } + return { choices: [{ message, finish_reason: next.tool_calls ? 'tool_calls' : 'stop' }] }; + }), + }, + }, + }; +} + +describe('runAgent', () => { + it('returns directly when the model answers without tool calls', async () => { + const mcp = makeMcp(); + const llm = makeLlm([{ content: 'hello world' }]); + const result = await runAgent('hi', BASE_CONFIG, { + mcpClientFactory: async () => mcp, + llmClientFactory: () => llm, + }); + expect(result.finalText).toBe('hello world'); + expect(result.rounds).toBe(0); + expect(result.hitIterationLimit).toBe(false); + expect(mcp.callTool).not.toHaveBeenCalled(); + expect(mcp.close).toHaveBeenCalled(); + }); + + it('executes a tool call, feeds the result back, and terminates on the next assistant turn', async () => { + const mcp = makeMcp({ + listTools: vi.fn(async () => ({ + tools: [{ name: 'search', description: 'search the docs', inputSchema: { type: 'object' } }], + })), + callTool: vi.fn(async () => ({ content: [{ type: 'text', text: 'a matching doc' }] })), + }); + const llm = makeLlm([ + { tool_calls: [{ id: 'call-1', name: 'search', arguments: '{"q":"foo"}' }] }, + { content: 'final answer based on tool result' }, + ]); + const result = await runAgent('find foo', BASE_CONFIG, { + mcpClientFactory: async () => mcp, + llmClientFactory: () => llm, + }); + expect(result.finalText).toBe('final answer based on tool result'); + expect(result.rounds).toBe(1); + expect(mcp.callTool).toHaveBeenCalledWith({ name: 'search', arguments: { q: 'foo' } }); + // Messages should be: user → assistant (tool_calls) → tool → assistant (final) + expect(result.messages).toHaveLength(4); + expect(result.messages[0]!.role).toBe('user'); + expect(result.messages[1]!.role).toBe('assistant'); + expect(result.messages[2]!.role).toBe('tool'); + expect(result.messages[3]!.role).toBe('assistant'); + }); + + it('refetches tools/list between rounds to honor list_changed', async () => { + const listTools = vi.fn() + .mockResolvedValueOnce({ tools: [{ name: 'begin_session' }] }) + .mockResolvedValueOnce({ tools: [{ name: 'begin_session' }, { name: 'search' }, { name: 'fetch' }] }); + const mcp = makeMcp({ listTools }); + const llm = makeLlm([ + { tool_calls: [{ id: 'c1', name: 'begin_session', arguments: '{}' }] }, + { content: 'done' }, + ]); + await runAgent('go', BASE_CONFIG, { + mcpClientFactory: async () => mcp, + llmClientFactory: () => llm, + }); + // Called at startup + after each round (one round here) + expect(listTools).toHaveBeenCalledTimes(2); + // The second chat.completions.create call should have received all 3 tools + const secondCall = (llm.chat.completions.create as unknown as { mock: { calls: Array> } }).mock.calls[1]!; + expect(secondCall[0].tools).toHaveLength(3); + }); + + it('stops after maxIterations and flags hitIterationLimit', async () => { + const mcp = makeMcp({ + listTools: vi.fn(async () => ({ tools: [{ name: 'loop' }] })), + }); + // Infinite tool-call stream + const llm: LlmLike = { + chat: { + completions: { + create: vi.fn(async () => ({ + choices: [{ + message: { + role: 'assistant', + content: null, + tool_calls: [{ id: 'x', type: 'function', function: { name: 'loop', arguments: '{}' } }], + }, + finish_reason: 'tool_calls', + }], + })), + }, + }, + }; + const result = await runAgent('trap me', { ...BASE_CONFIG, maxIterations: 3 }, { + mcpClientFactory: async () => mcp, + llmClientFactory: () => llm, + }); + expect(result.hitIterationLimit).toBe(true); + expect(result.rounds).toBe(3); + }); + + it('serializes a failed tool call into the conversation instead of throwing', async () => { + const mcp = makeMcp({ + listTools: vi.fn(async () => ({ tools: [{ name: 'fails' }] })), + callTool: vi.fn(async () => { throw new Error('upstream exploded'); }), + }); + const llm = makeLlm([ + { tool_calls: [{ id: 'c1', name: 'fails', arguments: '{}' }] }, + { content: 'ok I saw the error, moving on' }, + ]); + const result = await runAgent('try the broken tool', BASE_CONFIG, { + mcpClientFactory: async () => mcp, + llmClientFactory: () => llm, + }); + expect(result.finalText).toBe('ok I saw the error, moving on'); + const toolMsg = result.messages.find((m) => m.role === 'tool'); + expect(toolMsg).toBeDefined(); + expect(String(toolMsg!.content)).toContain('upstream exploded'); + }); + + it('prepends systemPrompt when supplied', async () => { + const mcp = makeMcp(); + const llm = makeLlm([{ content: 'fine' }]); + await runAgent('hi', { ...BASE_CONFIG, systemPrompt: 'you are a helpful assistant' }, { + mcpClientFactory: async () => mcp, + llmClientFactory: () => llm, + }); + const call = (llm.chat.completions.create as unknown as { mock: { calls: Array }>> } }).mock.calls[0]![0]; + expect(call.messages[0]).toEqual({ role: 'system', content: 'you are a helpful assistant' }); + expect(call.messages[1]).toEqual({ role: 'user', content: 'hi' }); + }); + + it('closes the MCP client even when the loop throws', async () => { + const mcp = makeMcp({ + listTools: vi.fn(async () => { throw new Error('mcp dead'); }), + }); + const llm = makeLlm([]); + await expect(runAgent('x', BASE_CONFIG, { + mcpClientFactory: async () => mcp, + llmClientFactory: () => llm, + })).rejects.toThrow('mcp dead'); + expect(mcp.close).toHaveBeenCalled(); + }); +}); diff --git a/src/agent/tsconfig.json b/src/agent/tsconfig.json new file mode 100644 index 0000000..4c4fbfc --- /dev/null +++ b/src/agent/tsconfig.json @@ -0,0 +1,12 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "rootDir": "src", + "outDir": "dist", + "types": ["node"] + }, + "include": ["src/**/*.ts"], + "references": [ + { "path": "../shared" } + ] +}