From 3a28128fb43a8568f371866da50922bfd0c01fc0 Mon Sep 17 00:00:00 2001
From: Michal <michal@itaz.eu>
Date: Sat, 18 Apr 2026 18:24:29 +0100
Subject: [PATCH] feat(agent): MCP-correct chat agent shim on top of LiteLLM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New package @mcpctl/agent that replaces LiteLLM's broken MCP
integration (dropped Mcp-Session-Id, ignored tools/list_changed) with
a thin ~200 LOC loop built on @modelcontextprotocol/sdk +
openai SDK. LiteLLM stays in its actual lane — OpenAI-compatible model
routing — and this agent handles MCP correctly.

Core (src/agent.ts):
  - StreamableHTTPClientTransport for MCP (auto-preserves Mcp-Session-Id).
  - Re-fetches tools/list at the top of every loop so list_changed
    notifications surface new tools to the model on the next turn
    (fixes the gated-session case: begin_session reveals the full
    upstream tool set, next round's inference sees all of them).
  - OpenAI-compatible inference via process.env.AGENT_LLM_BASE_URL
    — points at LiteLLM or vLLM directly.
  - Graceful failure: broken tool calls are serialized back into the
    conversation as the tool's response, agent keeps going.
  - maxIterations cap stops runaway loops; hitIterationLimit surfaces
    truncation in the result.
  - Structural `McpLike` / `LlmLike` interfaces keep the loop testable
    without booting real SDKs.

CLI (src/cli.ts):
  mcpctl-agent run "<prompt>" \
    --model qwen3-thinking --project sre \
    [--system "..."] [--max-iterations N] [-o text|json] [--verbose]
  Env fallbacks: AGENT_MCP_URL, AGENT_MCP_TOKEN,
                 AGENT_LLM_BASE_URL, AGENT_LLM_API_KEY, AGENT_MODEL

Tests (7 cases):
  - direct answer (no tool call) → ok
  - single-round tool call + synthesis → message history correct
  - list_changed refresh: tools/list called at startup + after each
    round → next inference sees newly-exposed tools
  - maxIterations cap → hitIterationLimit flag set
  - failing tool → error serialized into conversation, agent recovers
  - systemPrompt prepended
  - mcp.close() runs even when loop throws (finally-block guarantee)

End-to-end verified against live cluster:
  Round 1: sees 1 tool (begin_session) → calls it
  Round 2: sees 115 tools (gate opened) → calls aws-docs/search_documentation
  Final: model synthesizes answer
  — LiteLLM's chat UI cannot do this today; this loop does.

Still to do (follow-up PRs):
  - Wire into mcpctl binary as `mcpctl agent run ...`
  - Docker image + Pulumi deploy for a long-running HTTP service mode
  - Minimal chat UI (HTMX or plain fetch)
  - Streaming responses

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 pnpm-lock.yaml                | 105 ++++++++++++++++++
 src/agent/package.json        |  28 +++++
 src/agent/src/agent.ts        | 201 ++++++++++++++++++++++++++++++++++
 src/agent/src/cli.ts          | 107 ++++++++++++++++++
 src/agent/src/index.ts        |   2 +
 src/agent/tests/agent.test.ts | 180 ++++++++++++++++++++++++++++++
 src/agent/tsconfig.json       |  12 ++
 7 files changed, 635 insertions(+)
 create mode 100644 src/agent/package.json
 create mode 100644 src/agent/src/agent.ts
 create mode 100644 src/agent/src/cli.ts
 create mode 100644 src/agent/src/index.ts
 create mode 100644 src/agent/tests/agent.test.ts
 create mode 100644 src/agent/tsconfig.json
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index ade254b..5e632cb 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -39,6 +39,28 @@ importers:
         specifier: ^4.0.18
         version: 4.0.18(@types/node@25.3.0)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2)
 
+  src/agent:
+    dependencies:
+      '@mcpctl/shared':
+        specifier: workspace:*
+        version: link:../shared
+      '@modelcontextprotocol/sdk':
+        specifier: ^1.0.0
+        version: 1.26.0(zod@3.25.76)
+      commander:
+        specifier: ^13.0.0
+        version: 13.1.0
+      openai:
+        specifier: ^4.77.0
+        version: 4.104.0(ws@8.19.0)(zod@3.25.76)
+    devDependencies:
+      '@types/node':
+        specifier: ^25.3.0
+        version: 25.3.0
+      vitest:
+        specifier: ^4.0.0
+        version: 4.0.18(@types/node@25.3.0)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2)
+
   src/cli:
     dependencies:
       '@inkjs/ui':
@@ -989,6 +1011,10 @@ packages:
   abbrev@1.1.1:
     resolution: {integrity: sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q==}
 
+  abort-controller@3.0.0:
+    resolution: {integrity: sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==}
+    engines: {node: '>=6.5'}
+
   abstract-logging@2.0.1:
     resolution: {integrity: sha512-2BjRTZxTPvheOvGbBslFSYOUkr+SjPtOnrLP33f+VIWLzezQpZcqVg7ja3L4dBXmzzgwT+a029jRx5PCi3JuiA==}
 
@@ -1014,6 +1040,10 @@ packages:
     resolution: {integrity: sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==}
     engines: {node: '>= 14'}
 
+  agentkeepalive@4.6.0:
+    resolution: {integrity: sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ==}
+    engines: {node: '>= 8.0.0'}
+
   ajv-formats@3.0.1:
     resolution: {integrity: sha512-8iUql50EUR+uUcdRQ3HDqa6EVyo3docL8g5WJ3FNcWmu62IbkGUue/pEyLBW8VGKKucTPgqeks4fIU1DA4yowQ==}
     peerDependencies:
@@ -1509,6 +1539,10 @@ packages:
     resolution: {integrity: sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==}
     engines: {node: '>= 0.6'}
 
+  event-target-shim@5.0.1:
+    resolution: {integrity: sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==}
+    engines: {node: '>=6'}
+
   events-universal@1.0.1:
     resolution: {integrity: sha512-LUd5euvbMLpwOF8m6ivPCbhQeSiYVNb8Vs0fQ8QjXo0JTkEHpz8pxdQf0gStltaPpw0Cca8b39KxvK9cfKRiAw==}
 
@@ -1610,10 +1644,17 @@ packages:
   flatted@3.3.3:
     resolution: {integrity: sha512-GX+ysw4PBCz0PzosHDepZGANEuFCMLrnRTiEy9McGjmkCQYwRq4A/X786G/fjM/+OjsWSU1ZrY5qyARZmO/uwg==}
 
+  form-data-encoder@1.7.2:
+    resolution: {integrity: sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==}
+
   form-data@4.0.5:
     resolution: {integrity: sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==}
     engines: {node: '>= 6'}
 
+  formdata-node@4.4.1:
+    resolution: {integrity: sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==}
+    engines: {node: '>= 12.20'}
+
   forwarded@0.2.0:
     resolution: {integrity: sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==}
     engines: {node: '>= 0.6'}
@@ -1726,6 +1767,9 @@ packages:
     resolution: {integrity: sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==}
     engines: {node: '>= 6'}
 
+  humanize-ms@1.2.1:
+    resolution: {integrity: sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==}
+
   iconv-lite@0.7.2:
     resolution: {integrity: sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw==}
     engines: {node: '>=0.10.0'}
@@ -2012,6 +2056,11 @@ packages:
   node-addon-api@5.1.0:
     resolution: {integrity: sha512-eh0GgfEkpnoWDq+VY8OyvYhFEzBk6jIYbRKdIlyTiAXIVJ8PyBaKb0rp7oDtoddbdoHWhq8wwr+XZ81F1rpNdA==}
 
+  node-domexception@1.0.0:
+    resolution: {integrity: sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==}
+    engines: {node: '>=10.5.0'}
+    deprecated: Use your platform's native DOMException instead
+
   node-fetch-native@1.6.7:
     resolution: {integrity: sha512-g9yhqoedzIUm0nTnTqAQvueMPVOuIY16bqgAJJC8XOOubYFNwz6IER9qs0Gq2Xd0+CecCKFjtdDTMA4u4xG06Q==}
 
@@ -2073,6 +2122,18 @@ packages:
     resolution: {integrity: sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==}
     engines: {node: '>=6'}
 
+  openai@4.104.0:
+    resolution: {integrity: sha512-p99EFNsA/yX6UhVO93f5kJsDRLAg+CTA2RBqdHK4RtK8u5IJw32Hyb2dTGKbnnFmnuoBv5r7Z2CURI9sGZpSuA==}
+    hasBin: true
+    peerDependencies:
+      ws: ^8.18.0
+      zod: ^3.23.8
+    peerDependenciesMeta:
+      ws:
+        optional: true
+      zod:
+        optional: true
+
   openid-client@6.8.2:
     resolution: {integrity: sha512-uOvTCndr4udZsKihJ68H9bUICrriHdUVJ6Az+4Ns6cW55rwM5h0bjVIzDz2SxgOI84LKjFyjOFvERLzdTUROGA==}
 
@@ -2647,6 +2708,10 @@ packages:
       jsdom:
         optional: true
 
+  web-streams-polyfill@4.0.0-beta.3:
+    resolution: {integrity: sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==}
+    engines: {node: '>= 14'}
+
   webidl-conversions@3.0.1:
     resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==}
 
@@ -3509,6 +3574,10 @@ snapshots:
 
   abbrev@1.1.1: {}
 
+  abort-controller@3.0.0:
+    dependencies:
+      event-target-shim: 5.0.1
+
   abstract-logging@2.0.1: {}
 
   accepts@2.0.0:
@@ -3530,6 +3599,10 @@ snapshots:
 
   agent-base@7.1.4: {}
 
+  agentkeepalive@4.6.0:
+    dependencies:
+      humanize-ms: 1.2.1
+
   ajv-formats@3.0.1(ajv@8.18.0):
     optionalDependencies:
       ajv: 8.18.0
@@ -4020,6 +4093,8 @@ snapshots:
 
   etag@1.8.1: {}
 
+  event-target-shim@5.0.1: {}
+
   events-universal@1.0.1:
     dependencies:
       bare-events: 2.8.2
@@ -4168,6 +4243,8 @@ snapshots:
 
   flatted@3.3.3: {}
 
+  form-data-encoder@1.7.2: {}
+
   form-data@4.0.5:
     dependencies:
       asynckit: 0.4.0
@@ -4176,6 +4253,11 @@ snapshots:
       hasown: 2.0.2
       mime-types: 2.1.35
 
+  formdata-node@4.4.1:
+    dependencies:
+      node-domexception: 1.0.0
+      web-streams-polyfill: 4.0.0-beta.3
+
   forwarded@0.2.0: {}
 
   fresh@2.0.0: {}
@@ -4298,6 +4380,10 @@ snapshots:
     transitivePeerDependencies:
       - supports-color
 
+  humanize-ms@1.2.1:
+    dependencies:
+      ms: 2.1.3
+
   iconv-lite@0.7.2:
     dependencies:
       safer-buffer: 2.1.2
@@ -4551,6 +4637,8 @@ snapshots:
 
   node-addon-api@5.1.0: {}
 
+  node-domexception@1.0.0: {}
+
   node-fetch-native@1.6.7: {}
 
   node-fetch@2.7.0:
@@ -4600,6 +4688,21 @@ snapshots:
     dependencies:
       mimic-fn: 2.1.0
 
+  openai@4.104.0(ws@8.19.0)(zod@3.25.76):
+    dependencies:
+      '@types/node': 18.19.130
+      '@types/node-fetch': 2.6.13
+      abort-controller: 3.0.0
+      agentkeepalive: 4.6.0
+      form-data-encoder: 1.7.2
+      formdata-node: 4.4.1
+      node-fetch: 2.7.0
+    optionalDependencies:
+      ws: 8.19.0
+      zod: 3.25.76
+    transitivePeerDependencies:
+      - encoding
+
   openid-client@6.8.2:
     dependencies:
       jose: 6.1.3
@@ -5211,6 +5314,8 @@ snapshots:
       - tsx
       - yaml
 
+  web-streams-polyfill@4.0.0-beta.3: {}
+
   webidl-conversions@3.0.1: {}
 
   whatwg-url@5.0.0:
diff --git a/src/agent/package.json b/src/agent/package.json
new file mode 100644
index 0000000..2ca44e4
--- /dev/null
+++ b/src/agent/package.json
@@ -0,0 +1,28 @@
+{
+  "name": "@mcpctl/agent",
+  "version": "0.0.1",
+  "private": true,
+  "type": "module",
+  "main": "./dist/index.js",
+  "types": "./dist/index.d.ts",
+  "bin": {
+    "mcpctl-agent": "./dist/cli.js"
+  },
+  "scripts": {
+    "build": "tsc --build",
+    "clean": "rimraf dist",
+    "run": "node dist/cli.js",
+    "test": "vitest",
+    "test:run": "vitest run"
+  },
+  "dependencies": {
+    "@mcpctl/shared": "workspace:*",
+    "@modelcontextprotocol/sdk": "^1.0.0",
+    "commander": "^13.0.0",
+    "openai": "^4.77.0"
+  },
+  "devDependencies": {
+    "@types/node": "^25.3.0",
+    "vitest": "^4.0.0"
+  }
+}
diff --git a/src/agent/src/agent.ts b/src/agent/src/agent.ts
new file mode 100644
index 0000000..f702114
--- /dev/null
+++ b/src/agent/src/agent.ts
@@ -0,0 +1,201 @@
+/**
+ * MCP-aware chat agent loop.
+ *
+ * Correct where LiteLLM's integration is broken:
+ *   - Uses `@modelcontextprotocol/sdk`'s `StreamableHTTPClientTransport`, which
+ *     preserves `Mcp-Session-Id` across requests automatically.
+ *   - Honors `notifications/tools/list_changed`: after every tool-call round we
+ *     re-fetch the tool list before the next model inference, so an MCP server
+ *     that reveals new tools mid-session (gated sessions, auto-install) shows
+ *     them to the model on the next turn.
+ *
+ * Inference goes through an OpenAI-compatible endpoint (LiteLLM at
+ * http://litellm…:4000/v1 in this repo's deployment; vLLM works too). That
+ * keeps LiteLLM doing its actual job — model routing — and strips it of the
+ * MCP role it was failing at.
+ */
+import { Client } from '@modelcontextprotocol/sdk/client/index.js';
+import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
+import OpenAI from 'openai';
+import type {
+  ChatCompletionMessageParam,
+  ChatCompletionTool,
+  ChatCompletionMessageToolCall,
+} from 'openai/resources/chat/completions';
+
+export interface AgentConfig {
+  /** Full URL of the MCP endpoint, e.g. http://mcp.mcpctl.svc:3200/projects/sre/mcp */
+  mcpUrl: string;
+  /** Raw `mcpctl_pat_…` bearer for the MCP endpoint. */
+  mcpToken: string;
+  /** OpenAI-compatible base URL, e.g. http://litellm…:4000/v1 */
+  llmBaseUrl: string;
+  /** API key for the OpenAI-compatible endpoint (LiteLLM master key). */
+  llmApiKey: string;
+  /** Model name as known to the OpenAI endpoint, e.g. qwen3-thinking */
+  model: string;
+  /** Optional system prompt (prepended as `role:'system'` if given). */
+  systemPrompt?: string;
+  /** Hard cap on loop iterations; stops runaway agents. Default 20. */
+  maxIterations?: number;
+  /** Per-tool-call timeout ms passed to the MCP SDK. Default 60_000. */
+  toolTimeoutMs?: number;
+}
+
+export interface AgentDeps {
+  /** Injectable for tests. Creates the MCP Client + transport. */
+  mcpClientFactory?: (cfg: AgentConfig) => Promise<McpLike>;
+  /** Injectable for tests. Creates the OpenAI-compatible client. */
+  llmClientFactory?: (cfg: AgentConfig) => LlmLike;
+  /** Optional per-iteration logger (stdout, audit sink, etc.). */
+  log?: (line: string) => void;
+}
+
+/**
+ * Structural typing for the MCP client surface we actually use. Keeps the
+ * loop testable without importing the concrete SDK in test fixtures. Optional
+ * fields are `T | undefined` (not `T?`) to stay compatible with the MCP SDK's
+ * own types under `exactOptionalPropertyTypes`.
+ */
+export interface McpLike {
+  listTools(): Promise<{ tools: Array<{ name: string; description?: string | undefined; inputSchema?: unknown }> }>;
+  callTool(args: { name: string; arguments: Record<string, unknown> }): Promise<unknown>;
+  close(): Promise<void>;
+}
+
+export interface LlmLike {
+  chat: {
+    completions: {
+      create(body: {
+        model: string;
+        messages: ChatCompletionMessageParam[];
+        tools?: ChatCompletionTool[];
+        tool_choice?: 'auto' | 'none' | { type: 'function'; function: { name: string } };
+      }): Promise<{ choices: Array<{ message: { role: 'assistant'; content: string | null; tool_calls?: ChatCompletionMessageToolCall[] }; finish_reason?: string | null }> }>;
+    };
+  };
+}
+
+export interface AgentResult {
+  /** The final assistant message (after all tool-call rounds). */
+  finalText: string;
+  /** Full message history, useful for eval + debugging. */
+  messages: ChatCompletionMessageParam[];
+  /** Number of tool-call rounds that ran. Zero if the model answered directly. */
+  rounds: number;
+  /** True if the loop terminated because `maxIterations` was hit. */
+  hitIterationLimit: boolean;
+}
+
+export async function runAgent(prompt: string, config: AgentConfig, deps: AgentDeps = {}): Promise<AgentResult> {
+  const log = deps.log ?? (() => { /* silent */ });
+  const maxIterations = config.maxIterations ?? 20;
+
+  const mcp = await (deps.mcpClientFactory ?? defaultMcpFactory)(config);
+  try {
+    const llm = (deps.llmClientFactory ?? defaultLlmFactory)(config);
+
+    const messages: ChatCompletionMessageParam[] = [];
+    if (config.systemPrompt) messages.push({ role: 'system', content: config.systemPrompt });
+    messages.push({ role: 'user', content: prompt });
+
+    let tools = toOpenAiTools(await mcp.listTools());
+    log(`[agent] starting with ${tools.length} MCP tools`);
+
+    let rounds = 0;
+    for (let i = 0; i < maxIterations; i++) {
+      const body: Parameters<LlmLike['chat']['completions']['create']>[0] = {
+        model: config.model,
+        messages,
+      };
+      if (tools.length > 0) {
+        body.tools = tools;
+        body.tool_choice = 'auto';
+      }
+      const reply = await llm.chat.completions.create(body);
+      const msg = reply.choices[0]!.message;
+      messages.push(msg);
+
+      const toolCalls = msg.tool_calls ?? [];
+      if (toolCalls.length === 0) {
+        log(`[agent] done after ${rounds} tool-call round(s)`);
+        return { finalText: msg.content ?? '', messages, rounds, hitIterationLimit: false };
+      }
+
+      rounds++;
+      log(`[agent] round ${rounds}: model asked to call ${toolCalls.length} tool(s)`);
+
+      for (const tc of toolCalls) {
+        const name = tc.function.name;
+        let args: Record<string, unknown> = {};
+        try {
+          args = tc.function.arguments ? JSON.parse(tc.function.arguments) as Record<string, unknown> : {};
+        } catch (err) {
+          log(`[agent] tool ${name}: could not parse arguments (${(err as Error).message}) — sending empty args`);
+        }
+        log(`[agent]   → ${name}(${truncate(JSON.stringify(args), 120)})`);
+        let result: unknown;
+        try {
+          result = await mcp.callTool({ name, arguments: args });
+        } catch (err) {
+          result = { error: (err as Error).message };
+          log(`[agent]   ← ERROR: ${(err as Error).message}`);
+        }
+        messages.push({
+          role: 'tool',
+          tool_call_id: tc.id,
+          content: typeof result === 'string' ? result : JSON.stringify(result),
+        });
+      }
+
+      // MCP server may have emitted notifications/tools/list_changed during a
+      // tool call (e.g. gated sessions revealing tools after begin_session).
+      // The SDK auto-notifies on that event; simplest correctness: re-fetch
+      // on every loop before the next inference so the model sees fresh tools.
+      tools = toOpenAiTools(await mcp.listTools());
+    }
+
+    log(`[agent] hit iteration limit (${maxIterations}) — returning partial`);
+    const last = messages[messages.length - 1];
+    const tail = last && last.role === 'assistant'
+      ? (typeof last.content === 'string' ? last.content : '')
+      : '';
+    return { finalText: tail, messages, rounds, hitIterationLimit: true };
+  } finally {
+    await mcp.close().catch(() => { /* best-effort */ });
+  }
+}
+
+function toOpenAiTools(listed: { tools: Array<{ name: string; description?: string | undefined; inputSchema?: unknown }> }): ChatCompletionTool[] {
+  return listed.tools.map((t) => {
+    const fn: { name: string; description?: string; parameters?: Record<string, unknown> } = { name: t.name };
+    if (t.description !== undefined) fn.description = t.description;
+    if (t.inputSchema !== undefined) fn.parameters = t.inputSchema as Record<string, unknown>;
+    return { type: 'function', function: fn } as ChatCompletionTool;
+  });
+}
+
+function truncate(s: string, n: number): string {
+  return s.length <= n ? s : `${s.slice(0, n - 1)}…`;
+}
+
+async function defaultMcpFactory(cfg: AgentConfig): Promise<McpLike> {
+  const client = new Client({ name: 'mcpctl-agent', version: '0.0.1' });
+  const transport = new StreamableHTTPClientTransport(new URL(cfg.mcpUrl), {
+    requestInit: { headers: { Authorization: `Bearer ${cfg.mcpToken}` } },
+  });
+  // The SDK's Transport interface declares `sessionId: string` while the
+  // Streamable-HTTP transport starts with `sessionId: undefined` until
+  // `initialize` populates it — that's legal at runtime but TS exactOptional
+  // rules reject the direct assignment.
+  await client.connect(transport as unknown as Parameters<Client['connect']>[0]);
+  return {
+    listTools: () => client.listTools() as Promise<{ tools: Array<{ name: string; description?: string | undefined; inputSchema?: unknown }> }>,
+    callTool: (args) => client.callTool(args),
+    close: () => client.close(),
+  };
+}
+
+function defaultLlmFactory(cfg: AgentConfig): LlmLike {
+  return new OpenAI({ baseURL: cfg.llmBaseUrl, apiKey: cfg.llmApiKey }) as unknown as LlmLike;
+}
diff --git a/src/agent/src/cli.ts b/src/agent/src/cli.ts
new file mode 100644
index 0000000..1fb7e07
--- /dev/null
+++ b/src/agent/src/cli.ts
@@ -0,0 +1,107 @@
+#!/usr/bin/env node
+/**
+ * `mcpctl-agent` CLI — standalone for now, will be wired into the mcpctl
+ * binary as `mcpctl agent run …` in a follow-up so the main CLI's permission
+ * model + completions pipeline can pick it up.
+ *
+ * Usage:
+ *   mcpctl-agent run "analyse last week's slow grafana queries" \
+ *     --model qwen3-thinking \
+ *     --project sre
+ *
+ * Env reads (these are the same shape we'd mount from a k8s Secret/ConfigMap
+ * in the follow-up serve mode):
+ *   AGENT_MCP_URL        e.g. https://mcp.ad.itaz.eu/projects/sre/mcp
+ *   AGENT_MCP_TOKEN      mcpctl_pat_…
+ *   AGENT_LLM_BASE_URL   e.g. http://litellm.nvidia-nim.svc.cluster.local:4000/v1
+ *   AGENT_LLM_API_KEY    LiteLLM master key
+ *   AGENT_MODEL          default model (overridable with --model)
+ */
+import { Command } from 'commander';
+import { runAgent, type AgentConfig } from './agent.js';
+
+const program = new Command();
+
+program
+  .name('mcpctl-agent')
+  .description('MCP-correct chat agent (preserves Mcp-Session-Id, honors tools/list_changed)')
+  .version('0.0.1');
+
+program
+  .command('run <prompt>')
+  .description('One-shot: send a prompt, let the agent use MCP tools until it answers, print the final text')
+  .option('--mcp-url <url>', 'MCP endpoint URL (default: $AGENT_MCP_URL)')
+  .option('--mcp-token <bearer>', 'MCP bearer token (default: $AGENT_MCP_TOKEN)')
+  .option('--llm-base-url <url>', 'OpenAI-compatible endpoint (default: $AGENT_LLM_BASE_URL)')
+  .option('--llm-api-key <key>', 'API key (default: $AGENT_LLM_API_KEY)')
+  .option('--model <name>', 'Model to use (default: $AGENT_MODEL)')
+  .option('--project <name>', 'Override the MCP URL path to /projects/<name>/mcp against the base at $AGENT_MCP_URL')
+  .option('--system <prompt>', 'System prompt (prepended)')
+  .option('--max-iterations <n>', 'Max tool-call rounds (default 20)', '20')
+  .option('-o, --output <format>', 'Output format: text | json', 'text')
+  .option('--verbose', 'Log each loop iteration to stderr')
+  .action(async (prompt: string, opts: {
+    mcpUrl?: string;
+    mcpToken?: string;
+    llmBaseUrl?: string;
+    llmApiKey?: string;
+    model?: string;
+    project?: string;
+    system?: string;
+    maxIterations: string;
+    output: string;
+    verbose?: boolean;
+  }) => {
+    const mcpUrl = resolveMcpUrl(opts.mcpUrl, opts.project);
+    const cfg: AgentConfig = {
+      mcpUrl,
+      mcpToken: required('--mcp-token / $AGENT_MCP_TOKEN', opts.mcpToken ?? process.env.AGENT_MCP_TOKEN),
+      llmBaseUrl: required('--llm-base-url / $AGENT_LLM_BASE_URL', opts.llmBaseUrl ?? process.env.AGENT_LLM_BASE_URL),
+      llmApiKey: required('--llm-api-key / $AGENT_LLM_API_KEY', opts.llmApiKey ?? process.env.AGENT_LLM_API_KEY),
+      model: required('--model / $AGENT_MODEL', opts.model ?? process.env.AGENT_MODEL),
+      maxIterations: Number(opts.maxIterations),
+    };
+    if (opts.system !== undefined) cfg.systemPrompt = opts.system;
+
+    const logFn = opts.verbose
+      ? (line: string) => process.stderr.write(`${line}\n`)
+      : () => { /* silent */ };
+
+    const result = await runAgent(prompt, cfg, { log: logFn });
+
+    if (opts.output === 'json') {
+      process.stdout.write(`${JSON.stringify({
+        finalText: result.finalText,
+        rounds: result.rounds,
+        hitIterationLimit: result.hitIterationLimit,
+        messages: result.messages,
+      }, null, 2)}\n`);
+    } else {
+      process.stdout.write(`${result.finalText}\n`);
+      if (result.hitIterationLimit) process.stderr.write('[agent] hit --max-iterations limit; output may be incomplete\n');
+    }
+  });
+
+program.parseAsync(process.argv).catch((err: unknown) => {
+  const msg = err instanceof Error ? err.message : String(err);
+  process.stderr.write(`error: ${msg}\n`);
+  process.exit(1);
+});
+
+function resolveMcpUrl(flag: string | undefined, project: string | undefined): string {
+  const base = flag ?? process.env.AGENT_MCP_URL;
+  if (!base) throw new Error('--mcp-url or $AGENT_MCP_URL is required');
+  if (project === undefined) return base;
+  // If user supplied --project and the URL already ends with /projects/<x>/mcp,
+  // replace the segment; otherwise treat the base as an origin and append.
+  const existingMatch = base.match(/^(.+?)\/projects\/[^/]+\/mcp\/?$/);
+  if (existingMatch) return `${existingMatch[1]}/projects/${encodeURIComponent(project)}/mcp`;
+  return `${base.replace(/\/+$/, '')}/projects/${encodeURIComponent(project)}/mcp`;
+}
+
+function required<T>(label: string, value: T | undefined | null): T {
+  if (value === undefined || value === null || value === '') {
+    throw new Error(`${label} is required`);
+  }
+  return value;
+}
diff --git a/src/agent/src/index.ts b/src/agent/src/index.ts
new file mode 100644
index 0000000..fd83970
--- /dev/null
+++ b/src/agent/src/index.ts
@@ -0,0 +1,2 @@
+export { runAgent } from './agent.js';
+export type { AgentConfig, AgentDeps, AgentResult, McpLike, LlmLike } from './agent.js';
diff --git a/src/agent/tests/agent.test.ts b/src/agent/tests/agent.test.ts
new file mode 100644
index 0000000..5565d05
--- /dev/null
+++ b/src/agent/tests/agent.test.ts
@@ -0,0 +1,180 @@
+import { describe, it, expect, vi } from 'vitest';
+import { runAgent, type AgentConfig, type LlmLike, type McpLike } from '../src/agent.js';
+
+const BASE_CONFIG: AgentConfig = {
+  mcpUrl: 'http://mcp.example/projects/x/mcp',
+  mcpToken: 'mcpctl_pat_test',
+  llmBaseUrl: 'http://llm.example/v1',
+  llmApiKey: 'test',
+  model: 'qwen3-thinking',
+};
+
+function makeMcp(overrides: Partial<McpLike> = {}): McpLike {
+  return {
+    listTools: vi.fn(async () => ({ tools: [] })),
+    callTool: vi.fn(async () => ({ content: [{ type: 'text', text: 'ok' }] })),
+    close: vi.fn(async () => { /* noop */ }),
+    ...overrides,
+  };
+}
+
+function makeLlm(replies: Array<{ content?: string | null; tool_calls?: Array<{ id: string; name: string; arguments: string }> }>): LlmLike {
+  const queue = [...replies];
+  return {
+    chat: {
+      completions: {
+        create: vi.fn(async () => {
+          const next = queue.shift();
+          if (!next) throw new Error('LLM mock exhausted');
+          const message: {
+            role: 'assistant';
+            content: string | null;
+            tool_calls?: Array<{ id: string; type: 'function'; function: { name: string; arguments: string } }>;
+          } = { role: 'assistant', content: next.content ?? null };
+          if (next.tool_calls) {
+            message.tool_calls = next.tool_calls.map((tc) => ({
+              id: tc.id,
+              type: 'function' as const,
+              function: { name: tc.name, arguments: tc.arguments },
+            }));
+          }
+          return { choices: [{ message, finish_reason: next.tool_calls ? 'tool_calls' : 'stop' }] };
+        }),
+      },
+    },
+  };
+}
+
+describe('runAgent', () => {
+  it('returns directly when the model answers without tool calls', async () => {
+    const mcp = makeMcp();
+    const llm = makeLlm([{ content: 'hello world' }]);
+    const result = await runAgent('hi', BASE_CONFIG, {
+      mcpClientFactory: async () => mcp,
+      llmClientFactory: () => llm,
+    });
+    expect(result.finalText).toBe('hello world');
+    expect(result.rounds).toBe(0);
+    expect(result.hitIterationLimit).toBe(false);
+    expect(mcp.callTool).not.toHaveBeenCalled();
+    expect(mcp.close).toHaveBeenCalled();
+  });
+
+  it('executes a tool call, feeds the result back, and terminates on the next assistant turn', async () => {
+    const mcp = makeMcp({
+      listTools: vi.fn(async () => ({
+        tools: [{ name: 'search', description: 'search the docs', inputSchema: { type: 'object' } }],
+      })),
+      callTool: vi.fn(async () => ({ content: [{ type: 'text', text: 'a matching doc' }] })),
+    });
+    const llm = makeLlm([
+      { tool_calls: [{ id: 'call-1', name: 'search', arguments: '{"q":"foo"}' }] },
+      { content: 'final answer based on tool result' },
+    ]);
+    const result = await runAgent('find foo', BASE_CONFIG, {
+      mcpClientFactory: async () => mcp,
+      llmClientFactory: () => llm,
+    });
+    expect(result.finalText).toBe('final answer based on tool result');
+    expect(result.rounds).toBe(1);
+    expect(mcp.callTool).toHaveBeenCalledWith({ name: 'search', arguments: { q: 'foo' } });
+    // Messages should be: user → assistant (tool_calls) → tool → assistant (final)
+    expect(result.messages).toHaveLength(4);
+    expect(result.messages[0]!.role).toBe('user');
+    expect(result.messages[1]!.role).toBe('assistant');
+    expect(result.messages[2]!.role).toBe('tool');
+    expect(result.messages[3]!.role).toBe('assistant');
+  });
+
+  it('refetches tools/list between rounds to honor list_changed', async () => {
+    const listTools = vi.fn()
+      .mockResolvedValueOnce({ tools: [{ name: 'begin_session' }] })
+      .mockResolvedValueOnce({ tools: [{ name: 'begin_session' }, { name: 'search' }, { name: 'fetch' }] });
+    const mcp = makeMcp({ listTools });
+    const llm = makeLlm([
+      { tool_calls: [{ id: 'c1', name: 'begin_session', arguments: '{}' }] },
+      { content: 'done' },
+    ]);
+    await runAgent('go', BASE_CONFIG, {
+      mcpClientFactory: async () => mcp,
+      llmClientFactory: () => llm,
+    });
+    // Called at startup + after each round (one round here)
+    expect(listTools).toHaveBeenCalledTimes(2);
+    // The second chat.completions.create call should have received all 3 tools
+    const secondCall = (llm.chat.completions.create as unknown as { mock: { calls: Array<Array<{ tools?: unknown[] }>> } }).mock.calls[1]!;
+    expect(secondCall[0].tools).toHaveLength(3);
+  });
+
+  it('stops after maxIterations and flags hitIterationLimit', async () => {
+    const mcp = makeMcp({
+      listTools: vi.fn(async () => ({ tools: [{ name: 'loop' }] })),
+    });
+    // Infinite tool-call stream
+    const llm: LlmLike = {
+      chat: {
+        completions: {
+          create: vi.fn(async () => ({
+            choices: [{
+              message: {
+                role: 'assistant',
+                content: null,
+                tool_calls: [{ id: 'x', type: 'function', function: { name: 'loop', arguments: '{}' } }],
+              },
+              finish_reason: 'tool_calls',
+            }],
+          })),
+        },
+      },
+    };
+    const result = await runAgent('trap me', { ...BASE_CONFIG, maxIterations: 3 }, {
+      mcpClientFactory: async () => mcp,
+      llmClientFactory: () => llm,
+    });
+    expect(result.hitIterationLimit).toBe(true);
+    expect(result.rounds).toBe(3);
+  });
+
+  it('serializes a failed tool call into the conversation instead of throwing', async () => {
+    const mcp = makeMcp({
+      listTools: vi.fn(async () => ({ tools: [{ name: 'fails' }] })),
+      callTool: vi.fn(async () => { throw new Error('upstream exploded'); }),
+    });
+    const llm = makeLlm([
+      { tool_calls: [{ id: 'c1', name: 'fails', arguments: '{}' }] },
+      { content: 'ok I saw the error, moving on' },
+    ]);
+    const result = await runAgent('try the broken tool', BASE_CONFIG, {
+      mcpClientFactory: async () => mcp,
+      llmClientFactory: () => llm,
+    });
+    expect(result.finalText).toBe('ok I saw the error, moving on');
+    const toolMsg = result.messages.find((m) => m.role === 'tool');
+    expect(toolMsg).toBeDefined();
+    expect(String(toolMsg!.content)).toContain('upstream exploded');
+  });
+
+  it('prepends systemPrompt when supplied', async () => {
+    const mcp = makeMcp();
+    const llm = makeLlm([{ content: 'fine' }]);
+    await runAgent('hi', { ...BASE_CONFIG, systemPrompt: 'you are a helpful assistant' }, {
+      mcpClientFactory: async () => mcp,
+      llmClientFactory: () => llm,
+    });
+    const call = (llm.chat.completions.create as unknown as { mock: { calls: Array<Array<{ messages: Array<{ role: string; content: unknown }> }>> } }).mock.calls[0]![0];
+    expect(call.messages[0]).toEqual({ role: 'system', content: 'you are a helpful assistant' });
+    expect(call.messages[1]).toEqual({ role: 'user', content: 'hi' });
+  });
+
+  it('closes the MCP client even when the loop throws', async () => {
+    const mcp = makeMcp({
+      listTools: vi.fn(async () => { throw new Error('mcp dead'); }),
+    });
+    const llm = makeLlm([]);
+    await expect(runAgent('x', BASE_CONFIG, {
+      mcpClientFactory: async () => mcp,
+      llmClientFactory: () => llm,
+    })).rejects.toThrow('mcp dead');
+    expect(mcp.close).toHaveBeenCalled();
+  });
+});
diff --git a/src/agent/tsconfig.json b/src/agent/tsconfig.json
new file mode 100644
index 0000000..4c4fbfc
--- /dev/null
+++ b/src/agent/tsconfig.json
@@ -0,0 +1,12 @@
+{
+  "extends": "../../tsconfig.base.json",
+  "compilerOptions": {
+    "rootDir": "src",
+    "outDir": "dist",
+    "types": ["node"]
+  },
+  "include": ["src/**/*.ts"],
+  "references": [
+    { "path": "../shared" }
+  ]
+}