feat(mcpd): inference proxy — POST /api/v1/llms/:name/infer

Why: the point of the Llm resource (Phase 1) is that credentials never leave the server. This lands the proxy: clients POST OpenAI chat/completions to mcpd, mcpd attaches the provider API key server-side, and the response streams back as OpenAI-format SSE. Design: - Wire format client-side is always OpenAI chat/completions — every existing SDK speaks it. Adapters translate on the provider side. - `openai | vllm | deepseek | ollama` → pure passthrough (they already speak OpenAI). `anthropic` → translator to/from Anthropic Messages API (system-string extraction, content-block flattening, SSE event remap). - Plain fetch; no @anthropic-ai/sdk dep. Consistent with the OpenBao driver shape and keeps the proxy layer thin. - `gemini-cli` intentionally rejected — subprocess providers need extra lifecycle plumbing; deferred to a follow-up. - Streaming: adapters yield `StreamingChunk`s; the route frames them as `data: <json>\n\n` + terminal `data: [DONE]\n\n` so any OpenAI client works unchanged. RBAC: - New URL special-case in mapUrlToPermission: `POST /api/v1/llms/:name/infer` → `run:llms:<name>` (not the default create:llms). Users need an explicit `{role: 'run', resource: 'llms', [name: X]}` binding to call infer. - Possession of `edit:llms` does NOT imply `run` — keeps catalogue management separate from spend. Audit: route emits an `llm_inference_call` event per request (llm name, model, user/tokenSha, streaming, duration, status). main.ts wires it to the structured logger for now; hook is in place for a richer audit sink later. Tests: - 11 adapter tests (passthrough POST shape + default URLs + no-auth ollama + SSE forwarding; anthropic translate request/response + non-2xx wrap + SSE event translation; registry dispatch + caching + unsupported-provider). - 7 route tests (404, 400, non-streaming dispatch + audit, apiKey failure, null apiKeyRef path, streaming SSE output, 502 on adapter error). - Full suite 1830/1830 (+18 from Phase 1's 1812). TypeScript clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 22:43:55 +01:00
parent 6ff90a8228
commit 23f53a0798
8 changed files with 1079 additions and 0 deletions
--- a/src/mcpd/tests/llm-adapters.test.ts
+++ b/src/mcpd/tests/llm-adapters.test.ts
@@ -0,0 +1,210 @@
+import { describe, it, expect, vi } from 'vitest';
+import { OpenAiPassthroughAdapter } from '../src/services/llm/adapters/openai-passthrough.js';
+import { AnthropicAdapter } from '../src/services/llm/adapters/anthropic.js';
+import { LlmAdapterRegistry, UnsupportedProviderError } from '../src/services/llm/dispatcher.js';
+import type { InferContext } from '../src/services/llm/types.js';
+
+function mockFetch(responses: Array<{ match: RegExp; status: number; body?: unknown; text?: string }>): ReturnType<typeof vi.fn> {
+  return vi.fn(async (input: string | URL, _init?: RequestInit) => {
+    const url = String(input);
+    const match = responses.find((r) => r.match.test(url));
+    if (!match) throw new Error(`unexpected fetch: ${url}`);
+    const body = match.body !== undefined ? JSON.stringify(match.body) : (match.text ?? '');
+    return new Response(body, { status: match.status, headers: { 'Content-Type': 'application/json' } });
+  });
+}
+
+function makeCtx(overrides: Partial<InferContext> = {}): InferContext {
+  return {
+    body: { model: '', messages: [{ role: 'user', content: 'hello' }] },
+    modelOverride: 'default-model',
+    apiKey: 'test-key',
+    url: '',
+    extraConfig: {},
+    ...overrides,
+  };
+}
+
+// Helper to build a streaming Response from SSE lines.
+function sseResponse(events: string[]): Response {
+  const body = events.join('\n\n') + '\n\n';
+  const stream = new ReadableStream<Uint8Array>({
+    start(controller) {
+      controller.enqueue(new TextEncoder().encode(body));
+      controller.close();
+    },
+  });
+  return new Response(stream, { status: 200, headers: { 'Content-Type': 'text/event-stream' } });
+}
+
+describe('OpenAiPassthroughAdapter', () => {
+  it('infer: POSTs to <url>/v1/chat/completions with Authorization + body', async () => {
+    const fetchFn = mockFetch([{
+      match: /\/v1\/chat\/completions$/,
+      status: 200,
+      body: { id: 'x', choices: [{ message: { role: 'assistant', content: 'hi' } }] },
+    }]);
+    const adapter = new OpenAiPassthroughAdapter('openai', { fetch: fetchFn as unknown as typeof fetch });
+    const ctx = makeCtx({ url: 'https://api.example.com' });
+    const res = await adapter.infer(ctx);
+    expect(res.status).toBe(200);
+    const [url, init] = fetchFn.mock.calls[0] as [string, RequestInit];
+    expect(url).toBe('https://api.example.com/v1/chat/completions');
+    expect(init.method).toBe('POST');
+    const headers = init.headers as Record<string, string>;
+    expect(headers['Authorization']).toBe('Bearer test-key');
+    const sent = JSON.parse(init.body as string) as { model: string; stream: boolean };
+    expect(sent.model).toBe('default-model');  // filled from modelOverride
+    expect(sent.stream).toBe(false);
+  });
+
+  it('infer: uses default URL for openai when url is empty', async () => {
+    const fetchFn = mockFetch([{ match: /api\.openai\.com/, status: 200, body: {} }]);
+    const adapter = new OpenAiPassthroughAdapter('openai', { fetch: fetchFn as unknown as typeof fetch });
+    await adapter.infer(makeCtx());
+    const [url] = fetchFn.mock.calls[0] as [string, RequestInit];
+    expect(url).toBe('https://api.openai.com/v1/chat/completions');
+  });
+
+  it('infer: throws for vllm when url is empty (no default)', async () => {
+    const adapter = new OpenAiPassthroughAdapter('vllm', { fetch: vi.fn() as unknown as typeof fetch });
+    await expect(adapter.infer(makeCtx())).rejects.toThrow(/no default endpoint/);
+  });
+
+  it('infer: omits Authorization when apiKey is empty', async () => {
+    const fetchFn = mockFetch([{ match: /ollama/, status: 200, body: {} }]);
+    const adapter = new OpenAiPassthroughAdapter('ollama', { fetch: fetchFn as unknown as typeof fetch });
+    await adapter.infer(makeCtx({ url: 'http://ollama:11434', apiKey: '' }));
+    const [, init] = fetchFn.mock.calls[0] as [string, RequestInit];
+    const headers = init.headers as Record<string, string>;
+    expect(headers['Authorization']).toBeUndefined();
+  });
+
+  it('stream: forwards SSE chunks and emits terminal [DONE]', async () => {
+    const fetchFn = vi.fn(async () => sseResponse([
+      'data: {"choices":[{"delta":{"content":"hi"}}]}',
+      'data: {"choices":[{"delta":{"content":"!"}}]}',
+      'data: [DONE]',
+    ]));
+    const adapter = new OpenAiPassthroughAdapter('openai', { fetch: fetchFn as unknown as typeof fetch });
+    const ctx = makeCtx({ url: 'http://example', body: { model: '', messages: [], stream: true } });
+    const chunks: { data: string; done?: boolean }[] = [];
+    for await (const c of adapter.stream(ctx)) chunks.push(c);
+    expect(chunks).toHaveLength(3);
+    expect(chunks[2]?.done).toBe(true);
+  });
+});
+
+describe('AnthropicAdapter', () => {
+  it('infer: translates system+user messages, posts to /v1/messages', async () => {
+    const fetchFn = mockFetch([{
+      match: /\/v1\/messages$/,
+      status: 200,
+      body: {
+        id: 'msg_01', model: 'claude-3-5-sonnet-20241022', role: 'assistant',
+        content: [{ type: 'text', text: 'howdy' }],
+        stop_reason: 'end_turn',
+        usage: { input_tokens: 5, output_tokens: 2 },
+      },
+    }]);
+    const adapter = new AnthropicAdapter({ fetch: fetchFn as unknown as typeof fetch });
+    const ctx = makeCtx({
+      body: {
+        model: '', messages: [
+          { role: 'system', content: 'be nice' },
+          { role: 'user', content: 'hi' },
+        ],
+      },
+      modelOverride: 'claude-3-5-sonnet-20241022',
+    });
+    const res = await adapter.infer(ctx);
+    expect(res.status).toBe(200);
+
+    const [url, init] = fetchFn.mock.calls[0] as [string, RequestInit];
+    expect(url).toBe('https://api.anthropic.com/v1/messages');
+    const headers = init.headers as Record<string, string>;
+    expect(headers['x-api-key']).toBe('test-key');
+    expect(headers['anthropic-version']).toBeDefined();
+
+    const sent = JSON.parse(init.body as string) as {
+      model: string; system: string; messages: Array<{ role: string; content: string }>; max_tokens: number;
+    };
+    expect(sent.model).toBe('claude-3-5-sonnet-20241022');
+    expect(sent.system).toBe('be nice');
+    expect(sent.messages).toEqual([{ role: 'user', content: 'hi' }]);
+    expect(sent.max_tokens).toBe(1024); // default
+
+    // Response shape: OpenAI chat.completion
+    const body = res.body as { choices: Array<{ message: { content: string }; finish_reason: string }>; usage: { total_tokens: number } };
+    expect(body.choices[0]!.message.content).toBe('howdy');
+    expect(body.choices[0]!.finish_reason).toBe('stop');
+    expect(body.usage.total_tokens).toBe(7);
+  });
+
+  it('infer: returns a synthetic error body on non-2xx', async () => {
+    const fetchFn = vi.fn(async () => new Response('boom', { status: 500 }));
+    const adapter = new AnthropicAdapter({ fetch: fetchFn as unknown as typeof fetch });
+    const res = await adapter.infer(makeCtx({ body: { model: '', messages: [{ role: 'user', content: 'x' }] } }));
+    expect(res.status).toBe(500);
+    const body = res.body as { error: { message: string } };
+    expect(body.error.message).toMatch(/HTTP 500/);
+  });
+
+  it('stream: translates anthropic event stream into OpenAI chunks', async () => {
+    const events = [
+      'event: message_start\ndata: {"type":"message_start","message":{"id":"m","content":[]}}',
+      'event: content_block_delta\ndata: {"type":"content_block_delta","delta":{"type":"text_delta","text":"he"}}',
+      'event: content_block_delta\ndata: {"type":"content_block_delta","delta":{"type":"text_delta","text":"llo"}}',
+      'event: message_delta\ndata: {"type":"message_delta","delta":{"stop_reason":"end_turn"}}',
+      'event: message_stop\ndata: {"type":"message_stop"}',
+    ];
+    const fetchFn = vi.fn(async () => sseResponse(events));
+    const adapter = new AnthropicAdapter({ fetch: fetchFn as unknown as typeof fetch });
+    const ctx = makeCtx({ body: { model: '', messages: [{ role: 'user', content: 'hi' }], stream: true } });
+
+    const chunks: { data: string; done?: boolean }[] = [];
+    for await (const c of adapter.stream(ctx)) chunks.push(c);
+
+    // Expect: role-prime, two text deltas, finish-reason, [DONE]
+    expect(chunks[chunks.length - 1]?.data).toBe('[DONE]');
+    expect(chunks[chunks.length - 1]?.done).toBe(true);
+
+    // First chunk is the role-prime (role: assistant, content: '').
+    const first = JSON.parse(chunks[0]!.data) as { choices: [{ delta: { role: string; content: string } }] };
+    expect(first.choices[0]!.delta.role).toBe('assistant');
+
+    // Next two chunks carry the text.
+    const d1 = JSON.parse(chunks[1]!.data) as { choices: [{ delta: { content: string } }] };
+    const d2 = JSON.parse(chunks[2]!.data) as { choices: [{ delta: { content: string } }] };
+    expect(d1.choices[0]!.delta.content).toBe('he');
+    expect(d2.choices[0]!.delta.content).toBe('llo');
+
+    // Finish-reason chunk.
+    const stopped = JSON.parse(chunks[3]!.data) as { choices: [{ finish_reason: string }] };
+    expect(stopped.choices[0]!.finish_reason).toBe('stop');
+  });
+});
+
+describe('LlmAdapterRegistry', () => {
+  it('returns the right adapter kind for each type', () => {
+    const reg = new LlmAdapterRegistry();
+    expect(reg.get('openai').kind).toBe('openai');
+    expect(reg.get('vllm').kind).toBe('vllm');
+    expect(reg.get('deepseek').kind).toBe('deepseek');
+    expect(reg.get('ollama').kind).toBe('ollama');
+    expect(reg.get('anthropic').kind).toBe('anthropic');
+  });
+
+  it('caches adapters between calls', () => {
+    const reg = new LlmAdapterRegistry();
+    const a = reg.get('openai');
+    const b = reg.get('openai');
+    expect(a).toBe(b);
+  });
+
+  it('rejects unsupported providers (gemini-cli is deferred)', () => {
+    const reg = new LlmAdapterRegistry();
+    expect(() => reg.get('gemini-cli')).toThrow(UnsupportedProviderError);
+    expect(() => reg.get('bogus')).toThrow(UnsupportedProviderError);
+  });
+});