From 9fc31e5945792fdf5b2bfd4189c0373a0dd74716 Mon Sep 17 00:00:00 2001 From: Michal Date: Sat, 7 Mar 2026 23:37:07 +0000 Subject: [PATCH] docs: ProxyModel authoring guide in README, mark cache tasks done Co-Authored-By: Claude Opus 4.6 --- .taskmaster/tasks/tasks.json | 124 +++++++++---------- README.md | 232 +++++++++++++++++++++++++++++++++++ 2 files changed, 294 insertions(+), 62 deletions(-) diff --git a/.taskmaster/tasks/tasks.json b/.taskmaster/tasks/tasks.json index fffd263..d897f79 100644 --- a/.taskmaster/tasks/tasks.json +++ b/.taskmaster/tasks/tasks.json @@ -2078,9 +2078,9 @@ "dependencies": [ "82" ], - "status": "deferred", + "status": "cancelled", "subtasks": [], - "updatedAt": "2026-02-28T01:07:00.043Z" + "updatedAt": "2026-03-07T01:27:15.554Z" }, { "id": "84", @@ -2106,9 +2106,9 @@ "dependencies": [ "79" ], - "status": "deferred", + "status": "done", "subtasks": [], - "updatedAt": "2026-02-28T01:07:00.050Z" + "updatedAt": "2026-03-07T02:38:57.221Z" }, { "id": "86", @@ -2120,9 +2120,9 @@ "dependencies": [ "78" ], - "status": "deferred", + "status": "done", "subtasks": [], - "updatedAt": "2026-02-28T01:07:00.059Z" + "updatedAt": "2026-03-07T02:38:57.226Z" }, { "id": "87", @@ -2132,7 +2132,7 @@ "testStrategy": "Database migration test: verify migration applies cleanly. API tests: verify proxyModel field is returned in project GET, can be updated via PATCH. CLI tests: verify `mcpctl describe project ` shows proxyModel.", "priority": "high", "dependencies": [], - "status": "deferred", + "status": "cancelled", "subtasks": [ { "id": 1, @@ -2145,7 +2145,7 @@ "parentId": "undefined" } ], - "updatedAt": "2026-02-28T01:07:00.065Z" + "updatedAt": "2026-03-07T01:27:15.571Z" }, { "id": "88", @@ -2157,9 +2157,9 @@ "dependencies": [ "87" ], - "status": "deferred", + "status": "cancelled", "subtasks": [], - "updatedAt": "2026-02-28T01:07:00.071Z" + "updatedAt": "2026-03-07T23:36:15.209Z" }, { "id": "89", @@ -2172,9 +2172,9 @@ "78", "79" ], - "status": "deferred", + "status": "cancelled", "subtasks": [], - "updatedAt": "2026-02-28T01:07:00.076Z" + "updatedAt": "2026-03-07T01:27:15.577Z" }, { "id": "90", @@ -2186,9 +2186,9 @@ "dependencies": [ "79" ], - "status": "deferred", + "status": "cancelled", "subtasks": [], - "updatedAt": "2026-02-28T01:07:00.082Z" + "updatedAt": "2026-03-07T01:27:15.582Z" }, { "id": "91", @@ -2200,9 +2200,9 @@ "dependencies": [ "78" ], - "status": "deferred", + "status": "cancelled", "subtasks": [], - "updatedAt": "2026-02-28T01:07:00.087Z" + "updatedAt": "2026-03-07T01:27:15.587Z" }, { "id": "92", @@ -2214,9 +2214,9 @@ "dependencies": [ "79" ], - "status": "deferred", + "status": "cancelled", "subtasks": [], - "updatedAt": "2026-02-28T01:07:00.092Z" + "updatedAt": "2026-03-07T01:27:15.592Z" }, { "id": "93", @@ -2228,9 +2228,9 @@ "dependencies": [ "71" ], - "status": "deferred", + "status": "cancelled", "subtasks": [], - "updatedAt": "2026-02-28T01:07:00.098Z" + "updatedAt": "2026-03-07T01:27:15.598Z" }, { "id": "94", @@ -2242,9 +2242,9 @@ "dependencies": [ "78" ], - "status": "deferred", + "status": "cancelled", "subtasks": [], - "updatedAt": "2026-02-28T01:07:00.102Z" + "updatedAt": "2026-03-07T01:27:15.605Z" }, { "id": "95", @@ -2257,9 +2257,9 @@ "78", "79" ], - "status": "deferred", + "status": "cancelled", "subtasks": [], - "updatedAt": "2026-02-28T01:07:00.107Z" + "updatedAt": "2026-03-07T01:27:15.613Z" }, { "id": "96", @@ -2271,9 +2271,9 @@ "dependencies": [ "79" ], - "status": "deferred", + "status": "cancelled", "subtasks": [], - "updatedAt": "2026-02-28T01:07:00.116Z" + "updatedAt": "2026-03-07T01:27:15.619Z" }, { "id": "97", @@ -2285,9 +2285,9 @@ "dependencies": [ "78" ], - "status": "deferred", + "status": "cancelled", "subtasks": [], - "updatedAt": "2026-02-28T01:07:00.122Z" + "updatedAt": "2026-03-07T01:27:15.625Z" }, { "id": "98", @@ -2299,9 +2299,9 @@ "dependencies": [ "73" ], - "status": "deferred", + "status": "done", "subtasks": [], - "updatedAt": "2026-02-28T01:07:00.128Z" + "updatedAt": "2026-03-07T23:36:15.877Z" }, { "id": "99", @@ -2313,9 +2313,9 @@ "dependencies": [ "98" ], - "status": "deferred", + "status": "done", "subtasks": [], - "updatedAt": "2026-02-28T01:07:00.134Z" + "updatedAt": "2026-03-07T23:36:15.892Z" }, { "id": "100", @@ -2327,9 +2327,9 @@ "dependencies": [ "98" ], - "status": "deferred", + "status": "done", "subtasks": [], - "updatedAt": "2026-02-28T01:11:23.496Z" + "updatedAt": "2026-03-07T23:36:15.902Z" }, { "id": "101", @@ -2341,9 +2341,9 @@ "dependencies": [ "98" ], - "status": "deferred", + "status": "done", "subtasks": [], - "updatedAt": "2026-02-28T01:11:23.501Z" + "updatedAt": "2026-03-07T23:36:15.957Z" }, { "id": "102", @@ -2355,9 +2355,9 @@ "dependencies": [ "98" ], - "status": "deferred", + "status": "done", "subtasks": [], - "updatedAt": "2026-02-28T01:11:23.507Z" + "updatedAt": "2026-03-07T23:36:15.981Z" }, { "id": "103", @@ -2380,9 +2380,9 @@ "101", "102" ], - "status": "deferred", + "status": "cancelled", "subtasks": [], - "updatedAt": "2026-02-28T01:07:00.141Z" + "updatedAt": "2026-03-07T01:27:15.630Z" }, { "id": "104", @@ -2394,9 +2394,9 @@ "dependencies": [ "80" ], - "status": "deferred", + "status": "cancelled", "subtasks": [], - "updatedAt": "2026-02-28T01:07:00.147Z" + "updatedAt": "2026-03-07T01:27:15.636Z" }, { "id": "105", @@ -2408,9 +2408,9 @@ "dependencies": [ "104" ], - "status": "deferred", + "status": "done", "subtasks": [], - "updatedAt": "2026-02-28T01:11:23.514Z" + "updatedAt": "2026-03-07T02:22:03.301Z" }, { "id": "106", @@ -2422,9 +2422,9 @@ "dependencies": [ "105" ], - "status": "deferred", + "status": "done", "subtasks": [], - "updatedAt": "2026-02-28T01:11:23.521Z" + "updatedAt": "2026-03-07T02:32:05.366Z" }, { "id": "107", @@ -2436,9 +2436,9 @@ "dependencies": [ "106" ], - "status": "deferred", + "status": "done", "subtasks": [], - "updatedAt": "2026-02-28T01:11:23.528Z" + "updatedAt": "2026-03-07T02:32:05.373Z" }, { "id": "108", @@ -2451,9 +2451,9 @@ "105", "82" ], - "status": "deferred", + "status": "done", "subtasks": [], - "updatedAt": "2026-02-28T01:11:23.534Z" + "updatedAt": "2026-03-07T02:22:03.308Z" }, { "id": "109", @@ -2468,9 +2468,9 @@ "107", "108" ], - "status": "deferred", + "status": "done", "subtasks": [], - "updatedAt": "2026-02-28T01:11:23.542Z" + "updatedAt": "2026-03-07T02:32:05.393Z" }, { "id": "110", @@ -2499,9 +2499,9 @@ "82", "83" ], - "status": "deferred", + "status": "done", "subtasks": [], - "updatedAt": "2026-02-28T01:07:00.158Z" + "updatedAt": "2026-03-07T03:02:47.422Z" }, { "id": "112", @@ -2516,9 +2516,9 @@ "93", "94" ], - "status": "deferred", + "status": "done", "subtasks": [], - "updatedAt": "2026-02-28T01:07:00.164Z" + "updatedAt": "2026-03-07T03:02:47.439Z" }, { "id": "113", @@ -2534,28 +2534,28 @@ "108", "109" ], - "status": "deferred", + "status": "done", "subtasks": [], - "updatedAt": "2026-02-28T01:11:23.552Z" + "updatedAt": "2026-03-07T02:22:17.268Z" }, { "id": "114", "title": "ProxyModel v2: Code-based MCP middleware plugin system", "description": "Redesign the ProxyModel framework from a YAML-configured content transformation pipeline into a full code-based MCP middleware plugin system. Proxy models become TypeScript files that can intercept any MCP request/response, create synthetic tools, maintain per-session state, and compose via multiple inheritance with compile-time conflict detection. The existing gate functionality (begin_session, tools/list filtering, prompt scoring, ungating) becomes the first proxy model implementation, proving the framework works by implementing gate entirely as a plugin with zero gate-specific code in router.ts.", "details": "## Vision\n\nA proxy model is a TypeScript code file (not YAML) that acts as full MCP middleware. It can:\n- Intercept any MCP request (initialize, tools/list, tools/call, resources/*, prompts/*)\n- Modify any response before it reaches the client\n- Create synthetic tools (e.g. begin_session doesn't exist upstream)\n- Maintain per-session state (gated/ungated, accumulated tags, etc.)\n- Access project resources (prompts, servers, config)\n- Transform content (what stages do today: paginate, section-split, etc.)\n\n## Key design decisions\n\n1. Code not YAML: Proxy models live as .ts files in a known directory (e.g. ~/.mcpctl/proxymodels/). File exists = model exists. No create/delete via CLI.\n2. Stages deprecated: No separate stage resource. Content transformation is just code inside the proxy model.\n3. Multiple inheritance: A model can extend [gate, subindex] to compose behaviors from multiple parents. Conflicts (two parents intercepting the same method incompatibly) detected at load/compile time, not runtime.\n4. Gate is just a proxy model: The ~300 lines of gate logic in router.ts move into a gate.ts proxy model file. Router becomes thin plumbing (~100 lines).\n5. gated:true replaced by proxyModel field: Projects get a proxyModel: gate field. If the assigned model implements gating, the project is gated. No separate boolean.\n6. Discoverable as resources: mcpctl get proxymodels lists available models (discovered from files). mcpctl describe proxymodel gate shows details. But no create/delete commands.\n7. Attached to projects: mcpctl edit project foo --proxyModel gate or via apply YAML.\n\n## Framework interface (sketch)\n\nexport interface ProxyModelContext {\n session: SessionState;\n project: ProjectConfig;\n upstream: UpstreamClient;\n llm?: LLMProvider;\n cache?: CacheProvider;\n}\n\nexport interface ProxyModel {\n name: string;\n extends?: string[];\n onInitialize?(ctx, request): Promise;\n onToolsList?(ctx): Promise;\n onToolCall?(ctx, name, args): Promise;\n onResourceRead?(ctx, uri): Promise;\n transformContent?(ctx, content, contentType): Promise;\n createSessionState?(): Record;\n}\n\n## Migration path\n\n1. Define the ProxyModel TypeScript interface\n2. Implement the plugin loader (discover .ts files, compile, validate inheritance, detect conflicts)\n3. Implement the router integration (router delegates to loaded proxy model)\n4. Extract gate logic from router.ts into gate.ts proxy model\n5. Extract content pipeline (passthrough, paginate, section-split) into proxy model code\n6. Add proxyModel field to Project schema (replaces gated boolean)\n7. Add CLI: get proxymodels, describe proxymodel, edit project --proxyModel\n8. Add smoke tests: gate proxy model produces identical behavior to current hardcoded gate\n9. Deprecate gated field (backward compat: gated:true maps to proxyModel:gate)\n\n## Supersedes\n\nThis task supersedes deferred tasks 83, 85-97, 98-99, 103, 104, 110, 111-112 which assumed the old YAML/stage architecture.", - "status": "in-progress", + "status": "done", "priority": "high", "dependencies": [], "testStrategy": "1. Gate proxy model smoke test: identical behavior to current hardcoded gate (begin_session, tools/list filtering, ungating). 2. Composition test: model extending [gate, paginate] inherits both behaviors. 3. Conflict detection test: two parents intercepting same hook differently = compile-time error. 4. Discovery test: drop a .ts file in proxymodels dir, mcpctl get proxymodels shows it. 5. Existing smoke tests (proxy-pipeline.test.ts) pass unchanged after migration.", "subtasks": [], - "updatedAt": "2026-02-28T03:37:04.389Z" + "updatedAt": "2026-03-07T01:26:57.383Z" } ], "metadata": { "version": "1.0.0", - "lastModified": "2026-02-28T03:37:04.390Z", + "lastModified": "2026-03-07T23:36:15.981Z", "taskCount": 114, - "completedCount": 80, + "completedCount": 96, "tags": [ "master" ] diff --git a/README.md b/README.md index 0dc1e34..10c2884 100644 --- a/README.md +++ b/README.md @@ -256,6 +256,215 @@ mcpctl describe proxymodel default # Pipeline details (stages, controller) mcpctl describe proxymodel gate # Plugin details (hooks, extends) ``` +### Custom Stages + +Drop `.js` or `.mjs` files in `~/.mcpctl/stages/` to add custom transformation stages. Each file must `export default` an async function matching the `StageHandler` contract: + +```javascript +// ~/.mcpctl/stages/redact-keys.js +export default async function(content, ctx) { + // ctx provides: contentType, sourceName, projectName, sessionId, + // originalContent, llm, cache, log, config + const redacted = content.replace(/([A-Z_]+_KEY)=\S+/g, '$1=***'); + ctx.log.info(`Redacted ${content.length - redacted.length} chars of secrets`); + return { content: redacted }; +} +``` + +Stages loaded from disk appear as `local` source. Use them in a custom ProxyModel YAML: + +```yaml +kind: ProxyModel +metadata: + name: secure-pipeline +spec: + stages: + - type: redact-keys # matches filename without extension + - type: section-split + - type: summarize-tree +``` + +**Stage contract reference:** + +| Field | Type | Description | +|-------|------|-------------| +| `content` | `string` | Input content (from previous stage or raw upstream) | +| `ctx.contentType` | `'toolResult' \| 'prompt' \| 'resource'` | What kind of content is being processed | +| `ctx.sourceName` | `string` | Tool name, prompt name, or resource URI | +| `ctx.originalContent` | `string` | The unmodified content before any stage ran | +| `ctx.llm` | `LLMProvider` | Call `ctx.llm.complete(prompt)` for LLM summarization | +| `ctx.cache` | `CacheProvider` | Call `ctx.cache.getOrCompute(key, fn)` to cache expensive results | +| `ctx.log` | `StageLogger` | `debug()`, `info()`, `warn()`, `error()` | +| `ctx.config` | `Record` | Config values from the ProxyModel YAML | + +**Return value:** + +```typescript +{ content: string; sections?: Section[]; metadata?: Record } +``` + +If `sections` is returned, the framework stores them and presents a table of contents to the client. The client can drill into individual sections via `_resultId` + `_section` parameters on subsequent tool or prompt calls. + +### Section Drill-Down + +When a stage (like `section-split`) produces sections, the pipeline automatically: + +1. Replaces the full content with a compact table of contents +2. Appends a `_resultId` for subsequent drill-down +3. Stores the full sections in memory (5-minute TTL) + +Claude then calls the same tool (or `prompts/get`) again with `_resultId` and `_section` parameters to retrieve a specific section. This works for both tool results and prompt responses. + +``` +# What Claude sees (tool result): +3 sections (json): +[users] Users (4K chars) +[config] Config (1K chars) +[logs] Logs (8K chars) + +_resultId: pm-abc123 — use _resultId and _section parameters to drill into a section. + +# Claude drills down: +→ tools/call: grafana/query { _resultId: "pm-abc123", _section: "logs" } +← [full 8K content of the logs section] +``` + +### Hot-Reload + +Stages and ProxyModels reload automatically when files change — no restart needed. + +- **Stages** (`~/.mcpctl/stages/*.js`): File watcher with 300ms debounce. Add, edit, or remove stage files and they take effect on the next tool call. +- **ProxyModels** (`~/.mcpctl/proxymodels/*.yaml`): Re-read from disk on every request, so changes are always picked up. + +Force a manual reload via the HTTP API: + +```bash +curl -X POST http://localhost:3200/proxymodels/reload +# {"loaded": 3} + +curl http://localhost:3200/proxymodels/stages +# [{"name":"passthrough","source":"built-in"},{"name":"redact-keys","source":"local"},...] +``` + +### Built-in Stages Reference + +| Stage | Description | Key Config | +|-------|------------|------------| +| `passthrough` | Returns content unchanged | — | +| `paginate` | Splits large content into numbered pages | `pageSize` (default: 8000 chars) | +| `section-split` | Splits content into named sections by structure (headers, JSON keys, code boundaries) | `minSectionSize` (500), `maxSectionSize` (15000) | +| `summarize-tree` | Generates LLM summaries for each section | `maxTokens` (200), `maxDepth` (2) | + +`section-split` detects content type automatically: + +| Content Type | Split Strategy | +|-------------|---------------| +| JSON array | One section per array element, using `name`/`id`/`label` as section ID | +| JSON object | One section per top-level key | +| YAML | One section per top-level key | +| Markdown | One section per `##` header | +| Code | One section per function/class boundary | +| XML | One section per top-level element | + +### Pause Queue (Model Studio) + +The pause queue lets you intercept pipeline results in real-time — inspect what the pipeline produced, edit it, or drop it before Claude receives the response. + +```bash +# Enable pause mode +curl -X PUT http://localhost:3200/pause -d '{"paused":true}' + +# View queued items (blocked tool calls waiting for your decision) +curl http://localhost:3200/pause/queue + +# Release an item (send transformed content to Claude) +curl -X POST http://localhost:3200/pause/queue//release + +# Edit and release (send your modified content instead) +curl -X POST http://localhost:3200/pause/queue//edit -d '{"content":"modified content"}' + +# Drop an item (send empty response) +curl -X POST http://localhost:3200/pause/queue//drop + +# Release all queued items at once +curl -X POST http://localhost:3200/pause/release-all + +# Disable pause mode +curl -X PUT http://localhost:3200/pause -d '{"paused":false}' +``` + +The pause queue is also available as MCP tools via `mcpctl console --stdin-mcp`, which gives Claude direct access to `pause`, `get_pause_queue`, and `release_paused` tools for self-monitoring. + +## LLM Providers + +ProxyModel stages that need LLM capabilities (like `summarize-tree`) use configurable providers. Configure in `~/.mcpctl/config.yaml`: + +```yaml +llm: + - name: vllm-local + type: openai-compatible + baseUrl: http://localhost:8000/v1 + model: Qwen/Qwen3-32B + - name: anthropic + type: anthropic + model: claude-sonnet-4-20250514 + # API key from: mcpctl create secret llm-keys --data ANTHROPIC_API_KEY=sk-... +``` + +Providers support **tiered routing** (`fast` for quick summaries, `heavy` for complex analysis) and **automatic failover** — if one provider is down, the next is tried. + +```bash +# Check active providers +mcpctl status # Shows LLM provider status + +# View provider details +curl http://localhost:3200/llm/providers +``` + +## Pipeline Cache + +ProxyModel pipelines cache LLM-generated results (summaries, section indexes) to avoid redundant API calls. The cache is persistent across mcplocal restarts. + +### Namespace Isolation + +Each combination of **LLM provider + model + ProxyModel** gets its own cache namespace: + +``` +~/.mcpctl/cache/openai--gpt-4o--content-pipeline/ +~/.mcpctl/cache/anthropic--claude-sonnet-4-20250514--content-pipeline/ +~/.mcpctl/cache/vllm--qwen-72b--subindex/ +``` + +Switching LLM providers or models automatically uses a fresh cache — no stale results from a different model. + +### CLI Management + +```bash +# View cache statistics (per-namespace breakdown) +mcpctl cache stats + +# Clear all cache entries +mcpctl cache clear + +# Clear a specific namespace +mcpctl cache clear openai--gpt-4o--content-pipeline + +# Clear entries older than 7 days +mcpctl cache clear --older-than 7 +``` + +### Size Limits + +The cache enforces a configurable maximum size (default: 256MB). When exceeded, the oldest entries are evicted (LRU). Entries older than 30 days are automatically expired. + +Size can be specified as bytes, human-readable units, or a percentage of the filesystem: + +```typescript +new FileCache('ns', { maxSize: '512MB' }) // fixed size +new FileCache('ns', { maxSize: '1.5GB' }) // fractional units +new FileCache('ns', { maxSize: '10%' }) // 10% of partition +``` + ## Resources | Resource | What it is | Example | @@ -301,6 +510,8 @@ mcpctl delete server grafana mcpctl logs grafana # Container logs mcpctl console monitoring # Interactive MCP console mcpctl console --inspect # Traffic inspector +mcpctl console --audit # Audit event timeline +mcpctl console --stdin-mcp # Claude monitor (MCP tools for Claude) # Backup and restore mcpctl backup -o backup.json @@ -387,6 +598,27 @@ The traffic inspector watches MCP traffic from other clients in real-time: mcpctl console --inspect ``` +### Claude Monitor (stdin-mcp) + +Connect Claude itself as a monitor via the inspect MCP server: + +```bash +mcpctl console --stdin-mcp +``` + +This exposes MCP tools that let Claude observe and control traffic: + +| Tool | Description | +|------|------------| +| `list_models` | List configured LLM providers and their status | +| `list_stages` | List all available pipeline stages (built-in + custom) | +| `switch_model` | Change the active LLM provider for pipeline stages | +| `get_model_info` | Get details about a specific LLM provider | +| `reload_stages` | Force reload custom stages from disk | +| `pause` | Toggle pause mode (intercept pipeline results) | +| `get_pause_queue` | List items held in the pause queue | +| `release_paused` | Release, edit, or drop a paused item | + ## Architecture ```