feat: audit console TUI, system prompt management, and CLI improvements

Audit Console Phase 1: tool_call_trace emission from mcplocal router, session_bind/rbac_decision event kinds, GET /audit/sessions endpoint, full Ink TUI with session sidebar, event timeline, and detail view (mcpctl console --audit). System prompts: move 6 hardcoded LLM prompts to mcpctl-system project with extensible ResourceRuleRegistry validation framework, template variable enforcement ({{maxTokens}}, {{pageCount}}), and delete-resets- to-default behavior. All consumers fetch via SystemPromptFetcher with hardcoded fallbacks. CLI: -p shorthand for --project across get/create/delete/config commands, console auto-scroll improvements, shell completions regenerated. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-03 23:50:54 +00:00
parent 89f869f460
commit 5d859ca7d8
42 changed files with 1932 additions and 77 deletions
--- a/src/mcplocal/tests/proxymodel-stages.test.ts
+++ b/src/mcplocal/tests/proxymodel-stages.test.ts
@@ -47,6 +47,7 @@ function mockCtx(original: string, config: Record<string, unknown> = {}, llmAvai
    llm: mockLlm,
    cache: mockCache,
    log: mockLog,
+    getSystemPrompt: async (_name: string, fallback: string) => fallback,
    config,
  };
 }
--- a/src/mcplocal/tests/proxymodel-types.test.ts
+++ b/src/mcplocal/tests/proxymodel-types.test.ts
@@ -136,6 +136,7 @@ function createMockContext(original: string): StageContext {
    llm: mockLlm,
    cache: mockCache,
    log: mockLog,
+    getSystemPrompt: async (_name: string, fallback: string) => fallback,
    config: {},
  };
 }
--- a/src/mcplocal/tests/router.test.ts
+++ b/src/mcplocal/tests/router.test.ts
@@ -580,4 +580,93 @@ describe('McpRouter', () => {
      expect(config!).toHaveProperty('llm', haLlm);
    });
  });
+
+  describe('tool_call_trace audit emission', () => {
+    it('emits tool_call_trace on successful tool call', async () => {
+      const alpha = mockUpstream('alpha', { tools: [{ name: 'do_thing' }] });
+      router.addUpstream(alpha);
+      await router.discoverTools();
+
+      const emitted: Array<Record<string, unknown>> = [];
+      const mockCollector = { emit: vi.fn((e: Record<string, unknown>) => emitted.push(e)) };
+      router.setAuditCollector(mockCollector as never);
+
+      await router.route(
+        { jsonrpc: '2.0', id: 1, method: 'tools/call', params: { name: 'alpha/do_thing', arguments: { key: 'val' } } },
+        { sessionId: 'sess-1' },
+      );
+
+      expect(mockCollector.emit).toHaveBeenCalledOnce();
+      const event = emitted[0]!;
+      expect(event['eventKind']).toBe('tool_call_trace');
+      expect(event['sessionId']).toBe('sess-1');
+      expect(event['serverName']).toBe('alpha');
+      expect(event['verified']).toBe(true);
+      const payload = event['payload'] as Record<string, unknown>;
+      expect(payload['toolName']).toBe('alpha/do_thing');
+      expect(payload['argKeys']).toBe('key');
+      expect(payload['durationMs']).toBeTypeOf('number');
+      expect(payload['resultSizeBytes']).toBeTypeOf('number');
+      expect(payload['error']).toBeNull();
+    });
+
+    it('does not emit when auditCollector is not set', async () => {
+      const alpha = mockUpstream('alpha', { tools: [{ name: 'do_thing' }] });
+      router.addUpstream(alpha);
+      await router.discoverTools();
+
+      // No setAuditCollector call — should not throw
+      const resp = await router.route(
+        { jsonrpc: '2.0', id: 1, method: 'tools/call', params: { name: 'alpha/do_thing', arguments: {} } },
+        { sessionId: 'sess-1' },
+      );
+      expect(resp.result).toBeDefined();
+    });
+
+    it('does not emit when sessionId is missing', async () => {
+      const alpha = mockUpstream('alpha', { tools: [{ name: 'do_thing' }] });
+      router.addUpstream(alpha);
+      await router.discoverTools();
+
+      const mockCollector = { emit: vi.fn() };
+      router.setAuditCollector(mockCollector as never);
+
+      await router.route(
+        { jsonrpc: '2.0', id: 1, method: 'tools/call', params: { name: 'alpha/do_thing', arguments: {} } },
+      );
+
+      expect(mockCollector.emit).not.toHaveBeenCalled();
+    });
+
+    it('captures error in trace when upstream returns error', async () => {
+      const failing: UpstreamConnection = {
+        name: 'fail-srv',
+        isAlive: vi.fn(() => true),
+        close: vi.fn(async () => {}),
+        onNotification: vi.fn(),
+        send: vi.fn(async (req: JsonRpcRequest): Promise<JsonRpcResponse> => {
+          if (req.method === 'tools/list') {
+            return { jsonrpc: '2.0', id: req.id, result: { tools: [{ name: 'fail_tool' }] } };
+          }
+          return { jsonrpc: '2.0', id: req.id, error: { code: -32000, message: 'Something broke' } };
+        }),
+      };
+      router.addUpstream(failing);
+      await router.discoverTools();
+
+      const emitted: Array<Record<string, unknown>> = [];
+      const mockCollector = { emit: vi.fn((e: Record<string, unknown>) => emitted.push(e)) };
+      router.setAuditCollector(mockCollector as never);
+
+      await router.route(
+        { jsonrpc: '2.0', id: 1, method: 'tools/call', params: { name: 'fail-srv/fail_tool', arguments: { a: 1, b: 2 } } },
+        { sessionId: 'sess-err' },
+      );
+
+      expect(mockCollector.emit).toHaveBeenCalledOnce();
+      const payload = emitted[0]!['payload'] as Record<string, unknown>;
+      expect(payload['error']).toBe('Something broke');
+      expect(payload['argKeys']).toBe('a, b');
+    });
+  });
 });
--- a/src/mcplocal/tests/smoke/system-prompts.test.ts
+++ b/src/mcplocal/tests/smoke/system-prompts.test.ts
@@ -0,0 +1,207 @@
+/**
+ * Smoke tests: System prompts (LLM pipeline).
+ *
+ * Validates that the 6 LLM system prompts are created in mcpctl-system,
+ * that validation rejects edits missing required template variables,
+ * and that deletion resets to defaults.
+ *
+ * Run with: pnpm test:smoke
+ */
+import { describe, it, expect, beforeAll } from 'vitest';
+import http from 'node:http';
+import { readFileSync } from 'node:fs';
+import { join } from 'node:path';
+import { homedir } from 'node:os';
+import { isMcplocalRunning, getMcpdUrl } from './mcp-client.js';
+
+const MCPD_URL = getMcpdUrl();
+
+function loadMcpdCredentials(): { token: string; url: string } {
+  try {
+    const raw = readFileSync(join(homedir(), '.mcpctl', 'credentials'), 'utf-8');
+    const parsed = JSON.parse(raw) as { token?: string; mcpdUrl?: string };
+    return {
+      token: parsed.token ?? '',
+      url: parsed.mcpdUrl ?? MCPD_URL,
+    };
+  } catch {
+    return { token: '', url: MCPD_URL };
+  }
+}
+
+const MCPD_CREDS = loadMcpdCredentials();
+const MCPD_EFFECTIVE_URL = MCPD_CREDS.url || MCPD_URL;
+
+interface Prompt {
+  id: string;
+  name: string;
+  content: string;
+  priority: number;
+  projectId: string;
+}
+
+function mcpdRequest<T>(method: string, path: string, body?: unknown): Promise<{ status: number; data: T }> {
+  return new Promise((resolve, reject) => {
+    const url = new URL(path, MCPD_EFFECTIVE_URL);
+    const headers: Record<string, string> = {
+      'Accept': 'application/json',
+    };
+    if (body !== undefined) headers['Content-Type'] = 'application/json';
+    if (MCPD_CREDS.token) headers['Authorization'] = `Bearer ${MCPD_CREDS.token}`;
+
+    const bodyStr = body !== undefined ? JSON.stringify(body) : undefined;
+    if (bodyStr) headers['Content-Length'] = String(Buffer.byteLength(bodyStr));
+
+    const req = http.request(url, { method, timeout: 10_000, headers }, (res) => {
+      const chunks: Buffer[] = [];
+      res.on('data', (chunk: Buffer) => chunks.push(chunk));
+      res.on('end', () => {
+        const raw = Buffer.concat(chunks).toString();
+        try {
+          resolve({ status: res.statusCode ?? 500, data: raw ? JSON.parse(raw) as T : (undefined as T) });
+        } catch {
+          resolve({ status: res.statusCode ?? 500, data: raw as unknown as T });
+        }
+      });
+    });
+    req.on('error', reject);
+    req.on('timeout', () => reject(new Error('Request timeout')));
+    if (bodyStr) req.write(bodyStr);
+    req.end();
+  });
+}
+
+const LLM_PROMPT_NAMES = [
+  'llm-response-filter',
+  'llm-request-optimization',
+  'llm-pagination-index',
+  'llm-gate-context-selector',
+  'llm-summarize',
+  'llm-paginate-titles',
+];
+
+describe('Smoke: System Prompts', () => {
+  let available = false;
+  let systemProjectId = '';
+  let prompts: Prompt[] = [];
+
+  beforeAll(async () => {
+    console.log('');
+    console.log('  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━');
+    console.log('  Smoke Test: System Prompts (LLM pipeline)');
+    console.log('  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━');
+
+    available = await isMcplocalRunning();
+    if (!available) {
+      console.log('\n  ✗ mcplocal not running — all tests will be skipped\n');
+      return;
+    }
+
+    // Find mcpctl-system project
+    const projectsResult = await mcpdRequest<Array<{ id: string; name: string }>>('GET', '/api/v1/projects');
+    const systemProject = projectsResult.data.find((p) => p.name === 'mcpctl-system');
+    if (!systemProject) {
+      console.log('\n  ✗ mcpctl-system project not found — tests will fail\n');
+      return;
+    }
+    systemProjectId = systemProject.id;
+
+    // Fetch all prompts for system project (API uses project name, not ID)
+    const promptsResult = await mcpdRequest<Prompt[]>('GET', `/api/v1/prompts?project=mcpctl-system`);
+    prompts = promptsResult.data;
+    console.log(`\n  ✓ Found ${prompts.length} system prompts\n`);
+  });
+
+  it('all 6 LLM prompts exist in mcpctl-system', () => {
+    if (!available) return;
+
+    const promptNames = prompts.map((p) => p.name);
+    for (const name of LLM_PROMPT_NAMES) {
+      expect(promptNames, `Missing system prompt: ${name}`).toContain(name);
+    }
+  });
+
+  it('edit a prompt with no required vars succeeds', async () => {
+    if (!available) return;
+
+    const prompt = prompts.find((p) => p.name === 'llm-response-filter');
+    expect(prompt).toBeDefined();
+
+    const newContent = prompt!.content + '\n- Additional custom rule';
+    const result = await mcpdRequest<Prompt>('PUT', `/api/v1/prompts/${prompt!.id}`, {
+      content: newContent,
+    });
+    expect(result.status).toBe(200);
+    expect(result.data.content).toBe(newContent);
+
+    // Restore original
+    await mcpdRequest('PUT', `/api/v1/prompts/${prompt!.id}`, {
+      content: prompt!.content,
+    });
+  });
+
+  it('edit llm-summarize removing {{maxTokens}} is rejected with 400', async () => {
+    if (!available) return;
+
+    const prompt = prompts.find((p) => p.name === 'llm-summarize');
+    expect(prompt).toBeDefined();
+
+    const result = await mcpdRequest<{ message?: string }>('PUT', `/api/v1/prompts/${prompt!.id}`, {
+      content: 'Summarize this content briefly.',
+    });
+    expect(result.status).toBe(400);
+  });
+
+  it('edit llm-paginate-titles removing {{pageCount}} is rejected with 400', async () => {
+    if (!available) return;
+
+    const prompt = prompts.find((p) => p.name === 'llm-paginate-titles');
+    expect(prompt).toBeDefined();
+
+    const result = await mcpdRequest<{ message?: string }>('PUT', `/api/v1/prompts/${prompt!.id}`, {
+      content: 'Generate some titles for pages.',
+    });
+    expect(result.status).toBe(400);
+  });
+
+  it('edit with required vars present succeeds', async () => {
+    if (!available) return;
+
+    const prompt = prompts.find((p) => p.name === 'llm-summarize');
+    expect(prompt).toBeDefined();
+
+    const newContent = 'Custom: Summarize in about {{maxTokens}} tokens. Keep it concise.';
+    const result = await mcpdRequest<Prompt>('PUT', `/api/v1/prompts/${prompt!.id}`, {
+      content: newContent,
+    });
+    expect(result.status).toBe(200);
+    expect(result.data.content).toBe(newContent);
+
+    // Restore original
+    await mcpdRequest('PUT', `/api/v1/prompts/${prompt!.id}`, {
+      content: prompt!.content,
+    });
+  });
+
+  it('delete a system prompt resets to default', async () => {
+    if (!available) return;
+
+    const prompt = prompts.find((p) => p.name === 'llm-gate-context-selector');
+    expect(prompt).toBeDefined();
+
+    // First, modify the prompt
+    await mcpdRequest('PUT', `/api/v1/prompts/${prompt!.id}`, {
+      content: 'Temporarily customized content.',
+    });
+
+    // Delete should reset to default, not actually delete
+    const deleteResult = await mcpdRequest<Prompt>('DELETE', `/api/v1/prompts/${prompt!.id}`);
+    expect(deleteResult.status).toBe(200);
+    expect(deleteResult.data.content).toContain('context selection assistant');
+
+    // Prompt should still exist
+    const getResult = await mcpdRequest<Prompt>('GET', `/api/v1/prompts/${prompt!.id}`);
+    expect(getResult.status).toBe(200);
+    expect(getResult.data.name).toBe('llm-gate-context-selector');
+  });
+});
--- a/src/mcplocal/tests/system-prompt-fetching.test.ts
+++ b/src/mcplocal/tests/system-prompt-fetching.test.ts
@@ -0,0 +1,160 @@
+import { describe, it, expect, vi } from 'vitest';
+import type { StageContext, LLMProvider, CacheProvider, StageLogger, SystemPromptFetcher } from '../src/proxymodel/types.js';
+import paginate from '../src/proxymodel/stages/paginate.js';
+import summarizeTree from '../src/proxymodel/stages/summarize-tree.js';
+
+function mockCtx(
+  original: string,
+  config: Record<string, unknown> = {},
+  opts: { llmAvailable?: boolean; getSystemPrompt?: SystemPromptFetcher } = {},
+): StageContext {
+  const mockLlm: LLMProvider = {
+    async complete(prompt) {
+      // For paginate: return JSON array of titles
+      if (prompt.includes('short descriptive titles') || prompt.includes('JSON array')) {
+        return '["Title A", "Title B"]';
+      }
+      // For summarize: return a summary
+      return `Summary of: ${prompt.slice(0, 40)}...`;
+    },
+    available: () => opts.llmAvailable ?? false,
+  };
+
+  const cache = new Map<string, string>();
+  const mockCache: CacheProvider = {
+    async getOrCompute(key, compute) {
+      if (cache.has(key)) return cache.get(key)!;
+      const val = await compute();
+      cache.set(key, val);
+      return val;
+    },
+    hash(content) { return content.slice(0, 8); },
+    async get(key) { return cache.get(key) ?? null; },
+    async set(key, value) { cache.set(key, value); },
+  };
+
+  const mockLog: StageLogger = {
+    debug: vi.fn(),
+    info: vi.fn(),
+    warn: vi.fn(),
+    error: vi.fn(),
+  };
+
+  return {
+    contentType: 'toolResult',
+    sourceName: 'test/tool',
+    projectName: 'test',
+    sessionId: 'sess-1',
+    originalContent: original,
+    llm: mockLlm,
+    cache: mockCache,
+    log: mockLog,
+    getSystemPrompt: opts.getSystemPrompt ?? (async (_name, fallback) => fallback),
+    config,
+  };
+}
+
+describe('System prompt fetching in stages', () => {
+  describe('paginate stage', () => {
+    it('uses getSystemPrompt to fetch paginate-titles prompt', async () => {
+      const fetchSpy = vi.fn(async (_name: string, fallback: string) => fallback);
+      const content = 'A'.repeat(9000); // Larger than default pageSize (8000)
+      const ctx = mockCtx(content, {}, { llmAvailable: true, getSystemPrompt: fetchSpy });
+
+      await paginate(content, ctx);
+
+      expect(fetchSpy).toHaveBeenCalledWith(
+        'llm-paginate-titles',
+        expect.stringContaining('{{pageCount}}'),
+      );
+    });
+
+    it('falls back to hardcoded default when fetcher returns fallback', async () => {
+      const content = 'B'.repeat(9000);
+      const ctx = mockCtx(content, {}, { llmAvailable: true });
+
+      const result = await paginate(content, ctx);
+      // Should still produce paginated output (uses default prompt)
+      expect(result.content).toContain('pages');
+    });
+
+    it('interpolates {{pageCount}} in the fetched template', async () => {
+      let capturedPrompt = '';
+      const customFetcher: SystemPromptFetcher = async (name, fallback) => {
+        if (name === 'llm-paginate-titles') {
+          return 'Custom: generate {{pageCount}} titles please';
+        }
+        return fallback;
+      };
+
+      const mockLlm: LLMProvider = {
+        async complete(prompt) {
+          capturedPrompt = prompt;
+          return '["A", "B"]';
+        },
+        available: () => true,
+      };
+
+      const content = 'C'.repeat(9000);
+      const ctx = mockCtx(content, {}, { llmAvailable: true, getSystemPrompt: customFetcher });
+      // Override llm to capture the prompt
+      (ctx as { llm: LLMProvider }).llm = mockLlm;
+
+      await paginate(content, ctx);
+
+      expect(capturedPrompt).toContain('Custom: generate 2 titles please');
+      expect(capturedPrompt).not.toContain('{{pageCount}}');
+    });
+  });
+
+  describe('summarize-tree stage', () => {
+    it('uses getSystemPrompt to fetch llm-summarize prompt', async () => {
+      const fetchSpy = vi.fn(async (_name: string, fallback: string) => fallback);
+      // Need prose content > 2000 chars with headers to trigger LLM summary
+      const sections = [
+        '# Section 1\n' + 'Word '.repeat(500),
+        '# Section 2\n' + 'Text '.repeat(500),
+      ].join('\n\n');
+
+      const ctx = mockCtx(sections, {}, { llmAvailable: true, getSystemPrompt: fetchSpy });
+
+      await summarizeTree(sections, ctx);
+
+      expect(fetchSpy).toHaveBeenCalledWith(
+        'llm-summarize',
+        expect.stringContaining('{{maxTokens}}'),
+      );
+    });
+
+    it('interpolates {{maxTokens}} in the fetched template', async () => {
+      let capturedPrompt = '';
+      const customFetcher: SystemPromptFetcher = async (name, fallback) => {
+        if (name === 'llm-summarize') {
+          return 'Custom summary in {{maxTokens}} tokens max';
+        }
+        return fallback;
+      };
+
+      const mockLlm: LLMProvider = {
+        async complete(prompt) {
+          capturedPrompt = prompt;
+          return 'A brief summary';
+        },
+        available: () => true,
+      };
+
+      const sections = [
+        '# Part A\n' + 'Content '.repeat(500),
+        '# Part B\n' + 'More '.repeat(500),
+      ].join('\n\n');
+
+      const ctx = mockCtx(sections, {}, { llmAvailable: true, getSystemPrompt: customFetcher });
+      (ctx as { llm: LLMProvider }).llm = mockLlm;
+
+      await summarizeTree(sections, ctx);
+
+      expect(capturedPrompt).toContain('Custom summary in 200 tokens max');
+      expect(capturedPrompt).not.toContain('{{maxTokens}}');
+    });
+  });
+});