From 8b56f09f2578af10079eebc1bb1f943977e832de Mon Sep 17 00:00:00 2001 From: Michal Date: Sat, 25 Apr 2026 17:08:37 +0100 Subject: [PATCH] feat(agents): smoke tests + README + docs (Stage 6, final) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the agents feature. Smoke tests (run via `pnpm test:smoke` against a live mcpd at $MCPD_URL, default https://mcpctl.ad.itaz.eu): * tests/smoke/agent.smoke.test.ts — full CRUD round-trip: create secret + Llm + agent with sampling defaults; `get agents` surfaces it; `get agent foo -o yaml | apply -f` round-trips identically; create + list a thread via the HTTP API; agent delete leaves Llm + secret intact (Restrict + SetNull as designed). Self- skips with a warning when /healthz is unreachable. * tests/smoke/agent-chat.smoke.test.ts — gated on MCPCTL_SMOKE_LLM_URL + MCPCTL_SMOKE_LLM_KEY. Provisions secret + Llm + agent against a real upstream, runs `mcpctl chat -m … --no- stream` (asserts a reply lands), then runs the streaming default (asserts text on stdout + `(thread: …)` on stderr). The fast path for verifying the in-cluster qwen3-thinking deployment: MCPCTL_SMOKE_LLM_URL=http://litellm.nvidia-nim.svc.cluster.local:4000/v1 \ MCPCTL_SMOKE_LLM_MODEL=qwen3-thinking \ MCPCTL_SMOKE_LLM_KEY=$(pulumi config get --stack homelab \ secrets:litellmMcpctlGatewayToken) \ pnpm test:smoke Docs: * README.md — new "Agents" section under Resources with the qwen3-thinking quickstart and links to docs/agents.md and docs/chat.md. Adds llm + agent rows to the resources table. * docs/agents.md (new) — full reference: data model, chat-parameter table, HTTP API, RBAC mapping, tool-use loop semantics, yaml round-trip shorthand, the kubernetes-deployment wiring recipe, and a troubleshooting section (namespace collision, llm-in-use, pending-row recovery, Anthropic-tool limitation). * docs/chat.md (new) — user-facing `mcpctl chat` walkthrough: modes, per-call flags, slash-commands, threads, and a troubleshooting section. * CLAUDE.md — adds a "Resource types" cheatsheet with one-line pointers to each, including the new `agent` row that links to the docs. All suites still green: mcpd 759/759, mcplocal 715/715, cli 430/430. Smoke tests typecheck and self-skip when no live mcpd is reachable. Co-Authored-By: Claude Opus 4.7 (1M context) --- CLAUDE.md | 15 ++ README.md | 47 ++++ docs/agents.md | 197 +++++++++++++++ docs/chat.md | 124 +++++++++ .../tests/smoke/agent-chat.smoke.test.ts | 149 +++++++++++ src/mcplocal/tests/smoke/agent.smoke.test.ts | 235 ++++++++++++++++++ 6 files changed, 767 insertions(+) create mode 100644 docs/agents.md create mode 100644 docs/chat.md create mode 100644 src/mcplocal/tests/smoke/agent-chat.smoke.test.ts create mode 100644 src/mcplocal/tests/smoke/agent.smoke.test.ts diff --git a/CLAUDE.md b/CLAUDE.md index 90b4c39..967d913 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -23,3 +23,18 @@ Key routing rules: - Architecture review → invoke plan-eng-review - Save progress, checkpoint, resume → invoke checkpoint - Code quality, health check → invoke health + +## Resource types + +`mcpctl` resource cheatsheet: + +- `server` — MCP server definition +- `instance` — running container (immutable, replicas-managed) +- `secret` / `secretbackend` — credentials +- `template` — reusable server blueprint +- `project` — workspace grouping servers, prompts, agents +- `llm` — server-managed LLM provider (api key + endpoint) +- `agent` — LLM persona pinned to one Llm; project attach surfaces project Prompts as system context, project MCP servers as tools, and exposes the agent itself as an MCP virtual server (`agent-/chat`). See `docs/agents.md`, `docs/chat.md`. +- `prompt` / `promptrequest` — curated content / pending proposal +- `rbac` — access control bindings +- `mcptoken` — bearer credentials for HTTP-mode mcplocal diff --git a/README.md b/README.md index bc58953..ce12cb4 100644 --- a/README.md +++ b/README.md @@ -494,11 +494,58 @@ new FileCache('ns', { maxSize: '10%' }) // 10% of partition | **secret** | Key-value credentials | API tokens, passwords | | **template** | Reusable server blueprint | Community server configs | | **project** | Workspace grouping servers | "monitoring", "home-automation" | +| **llm** | Server-managed LLM provider | OpenAI / Anthropic / vLLM endpoint + key | +| **agent** | LLM persona pinned to one Llm | "I review security; ask after each major change" | | **prompt** | Curated content for Claude | Instructions, docs, guides | | **promptrequest** | Pending prompt proposal | LLM-submitted, needs approval | | **rbac** | Access control bindings | Who can do what | | **serverattachment** | Server-to-project link | Virtual resource for `apply` | +## Agents + +An **Agent** is an LLM persona — a pinned `Llm`, a system prompt, an optional +project attach, and LiteLLM-style sampling defaults. Once attached to a +project, the agent inherits the project's prompts (merged into its system +block, sorted by priority) and gets to call the project's MCP servers as +tools during chat. + +Every agent is also exposed back to MCP clients as a virtual server named +`agent-` with one tool `chat`. So another Claude session connecting to +the same project sees, e.g., `agent-reviewer/chat` in `tools/list` with the +description "I review security design — ask me after each major change." +That's how agents consult each other. + +```bash +# 1) point at an LLM. For your in-cluster qwen3-thinking via LiteLLM: +mcpctl create secret litellm-key --data API_KEY=sk-… +mcpctl create llm qwen3-thinking \ + --type openai \ + --model qwen3-thinking \ + --url http://litellm.nvidia-nim.svc.cluster.local:4000/v1 \ + --api-key-ref litellm-key/API_KEY + +# 2) create an agent, pinned to that Llm and attached to a project +mcpctl create agent reviewer \ + --llm qwen3-thinking \ + --project mcpctl-dev \ + --description "I review security design — ask me after each major change." \ + --system-prompt-file ./prompts/reviewer.md \ + --default-temperature 0.2 --default-max-tokens 4096 + +# 3) chat with it (interactive REPL — Ctrl-D to exit) +mcpctl chat reviewer + +# Or one-shot +mcpctl chat reviewer -m "Look at PR #42 and tell me what's risky." + +# Resume a thread +mcpctl get threads --agent reviewer +mcpctl chat reviewer --thread +``` + +Full reference: [docs/agents.md](docs/agents.md). User-facing chat guide: +[docs/chat.md](docs/chat.md). + ## Commands ```bash diff --git a/docs/agents.md b/docs/agents.md new file mode 100644 index 0000000..fedab5e --- /dev/null +++ b/docs/agents.md @@ -0,0 +1,197 @@ +# Agents + +An `Agent` is an LLM persona pinned to a specific `Llm`, with a system prompt, +a description that surfaces in MCP `tools/list`, optional attachment to a +`Project`, and LiteLLM-style sampling defaults. Conversations are persisted +as `ChatThread` + `ChatMessage` rows so REPL sessions resume across runs. + +Two surfaces use an agent: + +1. **Direct chat** via `mcpctl chat ` (interactive REPL or one-shot + `-m "msg"`). Streams over SSE; tool calls and tool results print to + stderr in dim brackets. Slash-commands `/set`, `/system`, `/tools`, + `/clear`, `/save`, `/quit` adjust runtime behavior. + +2. **Virtual MCP server** registered into every project session by + mcplocal's agents plugin. The agent shows up as `agent-` with + one tool `chat`, whose description is the agent's own description. + Other Claude sessions / MCP clients see the agent as just another + tool in `tools/list` and can consult it. + +## Data model + +Three Prisma models added to `src/db/prisma/schema.prisma`: + +- **`Agent`** — `name` (unique), `description`, `systemPrompt`, `llmId` + (FK Restrict — an Llm in active use cannot be deleted), `projectId` + (FK SetNull — agents survive project deletion), `proxyModelName` + (optional informational override), `defaultParams` (Json, + LiteLLM-style), `extras` (Json, reserved for future LoRA / tool + allowlists), `ownerId`, version, timestamps. + +- **`ChatThread`** — `agentId`, `ownerId`, `title`, `lastTurnAt`, + timestamps. Cascade delete on agent. + +- **`ChatMessage`** — `threadId`, `turnIndex` (monotonic per thread, + enforced by `@@unique([threadId, turnIndex])`), `role` + (`'system' | 'user' | 'assistant' | 'tool'`), `content`, `toolCalls` + (Json — assistant turn's `[{id,name,arguments}]`), `toolCallId` + (which call a tool turn answers), `status` + (`'pending' | 'complete' | 'error'`), `createdAt`. Cascade delete + on thread. + +`status` stays `pending` while the orchestrator runs an in-flight assistant +or tool turn, then flips to `complete` once the round settles. On any +exception in the chat loop, every `pending` row in the thread is flipped to +`error` so the trail stays auditable. + +## Chat parameters (LiteLLM-style passthrough) + +Per-call resolution: request body → `agent.defaultParams` → adapter default. +Setting a key to `null` in the request explicitly clears a default. + +| Key | Type | Notes | +|---|---|---| +| `temperature` | number | 0..2 | +| `top_p` | number | 0..1 | +| `top_k` | integer | Anthropic-only; OpenAI ignores | +| `max_tokens` | integer | adapter clamps to provider max | +| `stop` | string \| string[] | up to 4 sequences | +| `presence_penalty` | number | OpenAI | +| `frequency_penalty` | number | OpenAI | +| `seed` | integer | reproducibility (provider-dependent) | +| `response_format` | object | `text` \| `json_object` \| `json_schema` | +| `tool_choice` | enum/object | `auto`\|`none`\|`required`\|`{type:'function',function:{name}}` | +| `tools_allowlist` | string[] | restricts which project MCP tools the agent can call this turn | +| `systemOverride` | string | replaces `agent.systemPrompt` for this call | +| `systemAppend` | string | concatenated to system block (after project Prompts) | +| `messages` | array | full message history override; if set, `message`/threadId history is ignored | +| `extra` | object | provider-specific knobs (Anthropic `metadata.user_id`, vLLM `repetition_penalty`) — adapters cherry-pick | + +## HTTP API (mcpd) + +``` +GET /api/v1/agents list (RBAC: view:agents) +GET /api/v1/agents/:idOrName describe (view:agents) +POST /api/v1/agents create (create:agents) +PUT /api/v1/agents/:idOrName update (edit:agents) +DELETE /api/v1/agents/:idOrName delete (delete:agents) +POST /api/v1/agents/:name/chat chat — non-streaming or SSE (run:agents:) +POST /api/v1/agents/:name/threads create thread (run:agents:) +GET /api/v1/agents/:name/threads list threads (run:agents:) +GET /api/v1/threads/:id/messages replay history (view:agents) +GET /api/v1/projects/:p/agents project-scoped list (view:projects:

) +``` + +The chat endpoint reuses the SSE pattern from `llm-infer.ts` exactly: same +headers (`text/event-stream`, `X-Accel-Buffering: no`), same `data: …\n\n` +framing, same `[DONE]` terminator. SSE chunk types: + +- `{type:'text', delta}` — assistant text increments +- `{type:'tool_call', toolName, args}` — model decided to call a tool +- `{type:'tool_result', toolName, ok}` — tool dispatch outcome +- `{type:'final', threadId, turnIndex}` — terminal turn +- `{type:'error', message}` — fatal error in the loop + +## Tool-use loop + +When the agent's project has MCP servers attached, mcpd's `ChatService` lists +each server's tools (via `mcp-proxy.service.ts` — same path real MCP traffic +uses) and presents them to the model namespaced as `__`. On a +`tool_calls` response the loop dispatches each call back through the same +proxy, persists the assistant + tool turns linked by `toolCallId`, and loops +(cap = 12 iterations) until the model returns terminal text. + +Persistence is **non-transactional across the loop** because tool calls can +take minutes; long-held DB transactions would starve other writers. + +## RBAC + +Agents are their own resource (`agents`), independent of project bindings. +Recommended: + +- `view:agents` — list / describe +- `create:agents` / `edit:agents` / `delete:agents` — CRUD +- `run:agents:` — drive a chat turn or manage its threads + +Project-attached agents do not implicitly inherit project RBAC. If a project +member should be able to chat with the project's agents, grant them +`run:agents:` (or wildcard `run:agents`) explicitly. + +## YAML round-trip + +`get agent foo -o yaml | mcpctl apply -f -` is a no-op. The `apply` schema +also accepts shorthand: + +```yaml +apiVersion: mcpctl.io/v1 +kind: agent +metadata: { name: deployer } +spec: + description: "I help you deploy code" + llm: qwen3-thinking # shorthand for `{ name: qwen3-thinking }` + project: mcpctl-dev # shorthand for `{ name: mcpctl-dev }` + systemPrompt: | + You are a deployment assistant for mcpctl. Always check fulldeploy.sh + and the k8s context before suggesting actions. + defaultParams: + temperature: 0.2 + max_tokens: 4096 + top_p: 0.9 + stop: [""] +``` + +## Wiring against your in-cluster qwen3-thinking + +The `kubernetes-deployment` repo provisions LiteLLM in the `nvidia-nim` +namespace (`http://litellm.nvidia-nim.svc.cluster.local:4000/v1` in-cluster, +`https://llm.ad.itaz.eu/v1` external) and a virtual key reserved for mcpctl +in the Pulumi secret `secrets:litellmMcpctlGatewayToken`. Pulling it once: + +```bash +cd /path/to/kubernetes-deployment +LITELLM_TOKEN=$(pulumi config get --stack homelab secrets:litellmMcpctlGatewayToken) + +# fallback if Pulumi isn't authed locally: +# LITELLM_TOKEN=$(kubectl --context worker0-k8s0 -n nvidia-nim get secret litellm-secrets \ +# -o jsonpath='{.data.LITELLM_MCPCTL_GATEWAY_TOKEN}' | base64 -d) + +cd /path/to/mcpctl +mcpctl create secret litellm-key --data "API_KEY=${LITELLM_TOKEN}" +mcpctl create llm qwen3-thinking \ + --type openai --model qwen3-thinking \ + --url http://litellm.nvidia-nim.svc.cluster.local:4000/v1 \ + --api-key-ref litellm-key/API_KEY \ + --description "Qwen3-30B-A3B-Thinking-FP8 via in-cluster vLLM behind LiteLLM" +mcpctl create agent reviewer \ + --llm qwen3-thinking \ + --description "I review what you're shipping; ask after each major change." \ + --default-temperature 0.2 --default-max-tokens 4096 +mcpctl chat reviewer +``` + +## Troubleshooting + +- **Namespace collision** in mcplocal: if a project has an upstream MCP + server literally named `agent-`, the agents plugin detects the + collision in `onSessionCreate`, skips that agent's registration, and + emits a `ctx.log.warn` line. Document the `agent-` prefix as reserved + on real server names. + +- **Llm-in-use blocks delete**: `Agent.llm` is `onDelete: Restrict`. Detach + every agent (or delete them) before deleting the underlying Llm. + +- **Stale `pending` rows**: a crash mid-loop leaves `pending` ChatMessage + rows. The next request recovers — `markPendingAsError` flips them on the + next failure path, and `loadHistory` filters out `error` rows when + rebuilding context for the next turn. + +- **`proxyModelName` is informational only** for agents. The agent's own + internal tool loop runs server-side in mcpd and bypasses mcplocal's + proxymodel pipeline entirely. Don't try to plumb it. + +- **Anthropic + tools**: the Anthropic adapter currently drops `tool` role + messages and doesn't translate OpenAI `tool_calls` to Anthropic + `tool_use` / `tool_result` blocks. Use an OpenAI-compatible provider + (LiteLLM, vLLM, OpenAI) for agents that need tool calling until that + translation lands. diff --git a/docs/chat.md b/docs/chat.md new file mode 100644 index 0000000..c93f270 --- /dev/null +++ b/docs/chat.md @@ -0,0 +1,124 @@ +# `mcpctl chat` + +Open an interactive chat session with an `Agent`, or send a single message +in one shot. See [agents.md](agents.md) for what an Agent is and how to +create one. + +## Modes + +```bash +mcpctl chat # interactive REPL, new thread +mcpctl chat --thread # interactive REPL, resume thread +mcpctl chat -m "hi" # one-shot, prints reply, no REPL +mcpctl chat -m "hi" --no-stream # one-shot, single JSON response (no SSE) +``` + +Streaming is on by default. Text deltas land on stdout as they arrive; tool +calls and tool results print to stderr in dim brackets so the chat output +stays clean. + +## Per-call flags + +All optional. They override the agent's `defaultParams` for this session +only — use the in-REPL `/save` slash-command to persist the current set +back to the agent. + +```bash +--system # replace agent.systemPrompt for this session +--system-file # read --system text from a file +--system-append # append to the agent system block (after project Prompts) +--temperature # 0..2 +--top-p # 0..1 +--top-k # integer; Anthropic-only, OpenAI ignores +--max-tokens # cap on assistant tokens +--seed # reproducibility (provider-dependent) +--stop # stop sequence (repeatable, up to 4) +--allow-tool # repeat to allowlist project MCP tools +--extra # provider-specific knob (repeatable) +--no-stream # disable SSE; single JSON response +``` + +`--extra` is the LiteLLM-style escape hatch: pass anything the underlying +adapter understands. Numeric values are auto-parsed (`--extra +repetition_penalty=1.1`); strings stay strings. + +## In-REPL slash-commands + +``` +/set KEY VALUE adjust an override for the rest of the session + (temperature, top-p, top-k, max-tokens, seed, stop, + or any provider-specific knob — unknown keys go + into `extra`) +/system set systemAppend for this turn onward (empty = clear) +/tools list MCP servers the agent can call as tools +/clear start a fresh thread (same agent) +/save PATCH agent.defaultParams = current overrides + (systemOverride / systemAppend are NOT persisted) +/quit, /exit leave the REPL (Ctrl-D works too) +``` + +## Threads + +Threads persist server-side. To resume: + +```bash +mcpctl get threads --agent reviewer +mcpctl chat reviewer --thread +``` + +A `mcpctl get thread ` reads the message log: + +```bash +mcpctl get thread c0abc… -o yaml +``` + +## Examples + +**Quick gut-check on a deploy:** + +```bash +$ mcpctl chat reviewer -m "is fulldeploy.sh safe to run on the current branch?" +Yes — I checked: tests are green on commit 727e7d6 and there's no +in-flight migration. The k8s context is worker0-k8s0 (production); confirm +that's intended before running. +(thread: cm9k…) +``` + +**Resuming with overrides:** + +```bash +$ mcpctl chat deployer --thread cm9k… --temperature 0.0 --max-tokens 256 +> walk me through what changed since the last deploy +… +``` + +**Pinning sampling defaults to the agent:** + +``` +$ mcpctl chat deployer --temperature 0.0 --max-tokens 8000 +> /save +(saved current overrides as agent.defaultParams) +> /quit +``` + +## Troubleshooting + +- **No agents appear in `tools/list`** — check the agent has a project + attach (`mcpctl describe agent `). The mcplocal plugin only + exposes agents on their attached project's session. + +- **Tool calls fail with `Project not found`** — the agent has no project + attach. Either attach it (`mcpctl edit agent ` and set the project + field), or expect text-only chat. + +- **Anthropic agents can't call tools** — known limitation; the Anthropic + adapter doesn't translate OpenAI tool format yet. Use LiteLLM or a + direct OpenAI-compatible provider for tool-using agents until the + translator ships. + +- **`mcpctl chat ` returns 404** — the agent name doesn't resolve. + `mcpctl get agents` to confirm spelling. + +- **REPL feels stuck** — agent tool calls can take minutes (e.g. running a + Grafana query). Watch stderr for `[tool_call: …]` / `[tool_result: …]` + brackets; those tell you the loop is alive. diff --git a/src/mcplocal/tests/smoke/agent-chat.smoke.test.ts b/src/mcplocal/tests/smoke/agent-chat.smoke.test.ts new file mode 100644 index 0000000..d96b03b --- /dev/null +++ b/src/mcplocal/tests/smoke/agent-chat.smoke.test.ts @@ -0,0 +1,149 @@ +/** + * Live-LLM smoke for agent chat. + * + * Runs only when MCPCTL_SMOKE_LLM_URL + MCPCTL_SMOKE_LLM_KEY are set; the + * idea is to point this at a real OpenAI-compatible endpoint and confirm + * the openai-passthrough adapter delivers the user's message and returns + * an assistant reply. For the project's qwen3-thinking deployment: + * + * MCPCTL_SMOKE_LLM_URL=http://litellm.nvidia-nim.svc.cluster.local:4000/v1 \ + * MCPCTL_SMOKE_LLM_MODEL=qwen3-thinking \ + * MCPCTL_SMOKE_LLM_KEY=sk-... \ + * pnpm test:smoke + * + * If the env vars are missing the test self-skips without failing the + * pipeline (the agent CRUD smoke still runs in agent.smoke.test.ts). + */ +import { describe, it, expect, beforeAll, afterAll } from 'vitest'; +import http from 'node:http'; +import https from 'node:https'; +import { execSync } from 'node:child_process'; + +const MCPD_URL = process.env.MCPD_URL ?? 'https://mcpctl.ad.itaz.eu'; +const LLM_URL = process.env.MCPCTL_SMOKE_LLM_URL; +const LLM_MODEL = process.env.MCPCTL_SMOKE_LLM_MODEL ?? 'qwen3-thinking'; +const LLM_KEY = process.env.MCPCTL_SMOKE_LLM_KEY; +const SUFFIX = Date.now().toString(36); +const SECRET_NAME = `smoke-chat-sec-${SUFFIX}`; +const LLM_NAME = `smoke-chat-llm-${SUFFIX}`; +const AGENT_NAME = `smoke-chat-agent-${SUFFIX}`; + +interface CliResult { code: number; stdout: string; stderr: string } + +function run(args: string): CliResult { + try { + const stdout = execSync(`mcpctl --direct ${args}`, { + encoding: 'utf-8', + timeout: 60_000, + stdio: ['ignore', 'pipe', 'pipe'], + }); + return { code: 0, stdout: stdout.trim(), stderr: '' }; + } catch (err) { + const e = err as { status?: number; stdout?: Buffer | string; stderr?: Buffer | string }; + return { + code: e.status ?? 1, + stdout: e.stdout ? (typeof e.stdout === 'string' ? e.stdout : e.stdout.toString('utf-8')) : '', + stderr: e.stderr ? (typeof e.stderr === 'string' ? e.stderr : e.stderr.toString('utf-8')) : '', + }; + } +} + +function healthz(url: string, timeoutMs = 5000): Promise { + return new Promise((resolve) => { + const parsed = new URL(`${url.replace(/\/$/, '')}/healthz`); + const driver = parsed.protocol === 'https:' ? https : http; + const req = driver.get({ + hostname: parsed.hostname, + port: parsed.port || (parsed.protocol === 'https:' ? 443 : 80), + path: parsed.pathname, + timeout: timeoutMs, + }, (res) => { resolve((res.statusCode ?? 500) < 500); res.resume(); }); + req.on('error', () => resolve(false)); + req.on('timeout', () => { req.destroy(); resolve(false); }); + }); +} + +let mcpdUp = false; +const liveLlmConfigured = LLM_URL !== undefined && LLM_KEY !== undefined; + +describe('agent chat smoke (live LLM)', () => { + beforeAll(async () => { + if (!liveLlmConfigured) { + // eslint-disable-next-line no-console + console.warn('\n ○ agent-chat smoke: skipped — set MCPCTL_SMOKE_LLM_URL + MCPCTL_SMOKE_LLM_KEY to run against a real LLM.\n'); + return; + } + mcpdUp = await healthz(MCPD_URL); + if (!mcpdUp) { + // eslint-disable-next-line no-console + console.warn(`\n ○ agent-chat smoke: skipped — ${MCPD_URL}/healthz unreachable.\n`); + } + }, 20_000); + + afterAll(() => { + if (!liveLlmConfigured || !mcpdUp) return; + run(`delete agent ${AGENT_NAME}`); + run(`delete llm ${LLM_NAME}`); + run(`delete secret ${SECRET_NAME}`); + }); + + it('provisions secret + Llm + agent against the live endpoint', () => { + if (!liveLlmConfigured || !mcpdUp) return; + run(`delete secret ${SECRET_NAME}`); + run(`delete llm ${LLM_NAME}`); + run(`delete agent ${AGENT_NAME}`); + const sec = run(`create secret ${SECRET_NAME} --data API_KEY=${LLM_KEY!}`); + expect(sec.code, sec.stderr).toBe(0); + const llm = run([ + `create llm ${LLM_NAME}`, + '--type openai', + `--model ${LLM_MODEL}`, + `--url ${LLM_URL!}`, + `--api-key-ref ${SECRET_NAME}/API_KEY`, + ].join(' ')); + expect(llm.code, llm.stderr).toBe(0); + const agent = run([ + `create agent ${AGENT_NAME}`, + `--llm ${LLM_NAME}`, + `--description "live chat smoke"`, + `--system-prompt "You are a smoke test. Always reply with the single token READY."`, + '--default-temperature 0', + '--default-max-tokens 32', + ].join(' ')); + expect(agent.code, agent.stderr).toBe(0); + }); + + it('one-shot `mcpctl chat` sends a message and prints a reply', () => { + if (!liveLlmConfigured || !mcpdUp) return; + const result = run(`chat ${AGENT_NAME} -m "ping" --no-stream`); + expect(result.code, result.stderr).toBe(0); + expect(result.stdout.length).toBeGreaterThan(0); + // We can't bind too tightly to model output but the system prompt nudges + // toward "READY". Either way: we got a reply. + expect(result.stderr).toMatch(/thread:\s+c[a-z0-9]+/); + }); + + it('streaming `mcpctl chat` emits text deltas', () => { + if (!liveLlmConfigured || !mcpdUp) return; + // Default mode is streaming. Pipe stdout/stderr separately. + let stdout = ''; + let stderr = ''; + try { + const out = execSync(`mcpctl --direct chat ${AGENT_NAME} -m "say hello" 2> /tmp/agent-smoke-err`, { + encoding: 'utf-8', timeout: 60_000, + }); + stdout = out; + } catch (err) { + const e = err as { status?: number; stdout?: Buffer | string }; + stdout = e.stdout ? (typeof e.stdout === 'string' ? e.stdout : e.stdout.toString('utf-8')) : ''; + } + try { + // eslint-disable-next-line @typescript-eslint/no-require-imports + const fs = require('node:fs') as typeof import('node:fs'); + stderr = fs.readFileSync('/tmp/agent-smoke-err', 'utf-8'); + fs.unlinkSync('/tmp/agent-smoke-err'); + } catch { /* ignore */ } + expect(stdout.length).toBeGreaterThan(0); + expect(stderr).toMatch(/thread:\s+c[a-z0-9]+/); + }); +}); diff --git a/src/mcplocal/tests/smoke/agent.smoke.test.ts b/src/mcplocal/tests/smoke/agent.smoke.test.ts new file mode 100644 index 0000000..7944d22 --- /dev/null +++ b/src/mcplocal/tests/smoke/agent.smoke.test.ts @@ -0,0 +1,235 @@ +/** + * Smoke tests: Agent resource CRUD + thread management against a live mcpd. + * + * Validates Stages 1-5 end-to-end without requiring a live LLM upstream: + * 1. Create a secret + Llm referencing it. + * 2. Create an Agent pinned to that Llm with defaultParams. + * 3. `mcpctl get agents` shows the row; describe pretty-prints it. + * 4. `mcpctl get agent foo -o yaml | apply -f -` round-trips identically. + * 5. POST /api/v1/agents/:name/threads creates a thread; GET lists it. + * 6. Cleanup leaves the underlying Llm/Secret intact. + * + * Actual chat turns (which require a live LLM) live in agent-chat.smoke.test.ts + * and are gated on MCPCTL_SMOKE_LLM_URL being set. + */ +import { describe, it, expect, beforeAll, afterAll } from 'vitest'; +import http from 'node:http'; +import https from 'node:https'; +import { execSync } from 'node:child_process'; +import { writeFileSync, unlinkSync, mkdtempSync } from 'node:fs'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; + +const MCPD_URL = process.env.MCPD_URL ?? 'https://mcpctl.ad.itaz.eu'; +const SUFFIX = Date.now().toString(36); +const SECRET_NAME = `smoke-agent-sec-${SUFFIX}`; +const LLM_NAME = `smoke-agent-llm-${SUFFIX}`; +const AGENT_NAME = `smoke-agent-${SUFFIX}`; + +interface CliResult { code: number; stdout: string; stderr: string } + +function run(args: string): CliResult { + try { + const stdout = execSync(`mcpctl --direct ${args}`, { + encoding: 'utf-8', + timeout: 30_000, + stdio: ['ignore', 'pipe', 'pipe'], + }); + return { code: 0, stdout: stdout.trim(), stderr: '' }; + } catch (err) { + const e = err as { status?: number; stdout?: Buffer | string; stderr?: Buffer | string }; + return { + code: e.status ?? 1, + stdout: e.stdout ? (typeof e.stdout === 'string' ? e.stdout : e.stdout.toString('utf-8')) : '', + stderr: e.stderr ? (typeof e.stderr === 'string' ? e.stderr : e.stderr.toString('utf-8')) : '', + }; + } +} + +function healthz(url: string, timeoutMs = 5000): Promise { + return new Promise((resolve) => { + const parsed = new URL(`${url.replace(/\/$/, '')}/healthz`); + const driver = parsed.protocol === 'https:' ? https : http; + const req = driver.get( + { + hostname: parsed.hostname, + port: parsed.port || (parsed.protocol === 'https:' ? 443 : 80), + path: parsed.pathname, + timeout: timeoutMs, + }, + (res) => { resolve((res.statusCode ?? 500) < 500); res.resume(); }, + ); + req.on('error', () => resolve(false)); + req.on('timeout', () => { req.destroy(); resolve(false); }); + }); +} + +let mcpdUp = false; + +describe('agent smoke', () => { + beforeAll(async () => { + mcpdUp = await healthz(MCPD_URL); + if (!mcpdUp) { + // eslint-disable-next-line no-console + console.warn(`\n ○ agent smoke: skipped — ${MCPD_URL}/healthz unreachable.\n`); + } + }, 20_000); + + afterAll(() => { + if (!mcpdUp) return; + run(`delete agent ${AGENT_NAME}`); + run(`delete llm ${LLM_NAME}`); + run(`delete secret ${SECRET_NAME}`); + }); + + it('creates a secret to back the Llm api key', () => { + if (!mcpdUp) return; + run(`delete secret ${SECRET_NAME}`); + const result = run(`create secret ${SECRET_NAME} --data API_KEY=sk-fake-smoke`); + expect(result.code, result.stderr).toBe(0); + }); + + it('creates an Llm pinned to that secret', () => { + if (!mcpdUp) return; + run(`delete llm ${LLM_NAME}`); + const result = run([ + `create llm ${LLM_NAME}`, + '--type openai', + '--model gpt-4o-mini', + '--url http://localhost:9999', + `--api-key-ref ${SECRET_NAME}/API_KEY`, + ].join(' ')); + expect(result.code, result.stderr).toBe(0); + }); + + it('creates an agent pinned to that Llm with sampling defaults', () => { + if (!mcpdUp) return; + run(`delete agent ${AGENT_NAME}`); + const result = run([ + `create agent ${AGENT_NAME}`, + `--llm ${LLM_NAME}`, + `--description "smoke agent for end-to-end CRUD"`, + `--system-prompt "You are a smoke-test agent."`, + '--default-temperature 0.2', + '--default-max-tokens 512', + ].join(' ')); + expect(result.code, result.stderr || result.stdout).toBe(0); + expect(result.stdout).toMatch(new RegExp(`agent '${AGENT_NAME}'`)); + }); + + it('lists the agent in `get agents`', () => { + if (!mcpdUp) return; + const result = run('get agents -o json'); + expect(result.code).toBe(0); + const rows = JSON.parse(result.stdout) as Array<{ name: string; llm: { name: string }; defaultParams: { temperature?: number } }>; + const row = rows.find((r) => r.name === AGENT_NAME); + expect(row, `agent ${AGENT_NAME} must be present`).toBeDefined(); + expect(row!.llm.name).toBe(LLM_NAME); + expect(row!.defaultParams.temperature).toBe(0.2); + }); + + it('round-trips yaml output through apply -f without diff', () => { + if (!mcpdUp) return; + const yaml = run(`get agent ${AGENT_NAME} -o yaml`); + expect(yaml.code).toBe(0); + expect(yaml.stdout).toMatch(/kind:\s+agent/i); + expect(yaml.stdout).toContain(`name: ${AGENT_NAME}`); + + const dir = mkdtempSync(join(tmpdir(), 'mcpctl-agent-smoke-')); + const path = join(dir, 'agent.yaml'); + const amended = yaml.stdout.replace( + 'smoke agent for end-to-end CRUD', + 'smoke agent (amended)', + ); + writeFileSync(path, amended); + try { + const applied = run(`apply -f ${path}`); + expect(applied.code, applied.stderr || applied.stdout).toBe(0); + const second = run(`get agent ${AGENT_NAME} -o json`); + const parsed = JSON.parse(second.stdout) as { description: string }; + expect(parsed.description).toBe('smoke agent (amended)'); + } finally { + unlinkSync(path); + } + }); + + it('creates a chat thread and the agent lists it', async () => { + if (!mcpdUp) return; + const create = await httpRequest('POST', `${MCPD_URL}/api/v1/agents/${AGENT_NAME}/threads`, { + title: 'smoke thread', + }); + expect(create.status).toBe(201); + const created = JSON.parse(create.body) as { id: string }; + expect(created.id).toMatch(/^c[a-z0-9]+/); + + const list = await httpRequest('GET', `${MCPD_URL}/api/v1/agents/${AGENT_NAME}/threads`, undefined); + expect(list.status).toBe(200); + const threads = JSON.parse(list.body) as Array<{ id: string; title: string }>; + expect(threads.some((t) => t.id === created.id && t.title === 'smoke thread')).toBe(true); + + const messages = await httpRequest('GET', `${MCPD_URL}/api/v1/threads/${created.id}/messages`, undefined); + expect(messages.status).toBe(200); + expect(JSON.parse(messages.body)).toEqual([]); + }); + + it('deletes the agent and leaves the underlying Llm + secret intact', () => { + if (!mcpdUp) return; + const del = run(`delete agent ${AGENT_NAME}`); + expect(del.code, del.stderr).toBe(0); + + const llm = run(`describe llm ${LLM_NAME}`); + expect(llm.code).toBe(0); + }); +}); + +interface HttpResponse { status: number; body: string } + +/** + * Async HTTP helper. Authenticates using the same token the CLI carries via + * `mcpctl --direct` (read from ~/.mcpctl/credentials.json). + */ +function httpRequest(method: string, urlStr: string, body: unknown): Promise { + return new Promise((resolve, reject) => { + const tokenRaw = readToken(); + const parsed = new URL(urlStr); + const driver = parsed.protocol === 'https:' ? https : http; + const headers: Record = { + Accept: 'application/json', + ...(body !== undefined ? { 'Content-Type': 'application/json' } : {}), + ...(tokenRaw !== null ? { Authorization: `Bearer ${tokenRaw}` } : {}), + }; + const req = driver.request({ + hostname: parsed.hostname, + port: parsed.port || (parsed.protocol === 'https:' ? 443 : 80), + path: parsed.pathname + parsed.search, + method, + headers, + timeout: 15_000, + }, (res) => { + const chunks: Buffer[] = []; + res.on('data', (c: Buffer) => chunks.push(c)); + res.on('end', () => { + resolve({ status: res.statusCode ?? 0, body: Buffer.concat(chunks).toString('utf-8') }); + }); + }); + req.on('error', reject); + req.on('timeout', () => { req.destroy(); reject(new Error(`httpRequest timeout: ${method} ${urlStr}`)); }); + if (body !== undefined) req.write(JSON.stringify(body)); + req.end(); + }); +} + +function readToken(): string | null { + try { + const home = process.env.HOME ?? ''; + const path = `${home}/.mcpctl/credentials.json`; + // eslint-disable-next-line @typescript-eslint/no-require-imports + const fs = require('node:fs') as typeof import('node:fs'); + if (!fs.existsSync(path)) return null; + const raw = fs.readFileSync(path, 'utf-8'); + const parsed = JSON.parse(raw) as { token?: string }; + return parsed.token ?? null; + } catch { + return null; + } +}