src/mcpd/tests/llm-infer-route.test.ts

import { describe, it, expect, vi, afterEach } from 'vitest';
import Fastify from 'fastify';
import type { FastifyInstance } from 'fastify';
import { registerLlmInferRoutes } from '../src/routes/llm-infer.js';
import { LlmAdapterRegistry } from '../src/services/llm/dispatcher.js';
import { errorHandler } from '../src/middleware/error-handler.js';
import type { LlmView } from '../src/services/llm.service.js';
import { NotFoundError } from '../src/services/mcp-server.service.js';

let app: FastifyInstance;

function makeLlmView(overrides: Partial<LlmView> = {}): LlmView {
  return {
    id: 'llm-1',
    name: 'claude',
    type: 'anthropic',
    model: 'claude-3-5-sonnet-20241022',
    url: '',
    tier: 'heavy',
    description: '',
    apiKeyRef: { name: 'anthropic-key', key: 'token' },
    extraConfig: {},
    version: 1,
    createdAt: new Date(),
    updatedAt: new Date(),
    ...overrides,
  };
}

afterEach(async () => {
  if (app) await app.close();
});

function sseResponse(events: string[]): Response {
  const body = events.join('\n\n') + '\n\n';
  const stream = new ReadableStream<Uint8Array>({
    start(controller) {
      controller.enqueue(new TextEncoder().encode(body));
      controller.close();
    },
  });
  return new Response(stream, { status: 200 });
}

interface LlmServiceLike {
  getByName: (name: string) => Promise<LlmView>;
  resolveApiKey: (name: string) => Promise<string>;
}

async function setupApp(
  llmService: LlmServiceLike,
  adapters: LlmAdapterRegistry,
  onInferenceEvent?: Parameters<typeof registerLlmInferRoutes>[1]['onInferenceEvent'],
): Promise<FastifyInstance> {
  app = Fastify({ logger: false });
  app.setErrorHandler(errorHandler);
  const deps: Parameters<typeof registerLlmInferRoutes>[1] = {
    // eslint-disable-next-line @typescript-eslint/no-explicit-any
    llmService: llmService as any,
    adapters,
  };
  if (onInferenceEvent !== undefined) deps.onInferenceEvent = onInferenceEvent;
  registerLlmInferRoutes(app, deps);
  await app.ready();
  return app;
}

describe('POST /api/v1/llms/:name/infer', () => {
  it('returns 404 when the Llm does not exist', async () => {
    const svc: LlmServiceLike = {
      getByName: async () => { throw new NotFoundError('Llm not found: missing'); },
      resolveApiKey: async () => '',
    };
    await setupApp(svc, new LlmAdapterRegistry());
    const res = await app.inject({
      method: 'POST',
      url: '/api/v1/llms/missing/infer',
      payload: { messages: [{ role: 'user', content: 'hi' }] },
    });
    expect(res.statusCode).toBe(404);
  });

  it('returns 400 when messages is missing', async () => {
    const svc: LlmServiceLike = {
      getByName: async () => makeLlmView({ apiKeyRef: null }),
      resolveApiKey: async () => '',
    };
    await setupApp(svc, new LlmAdapterRegistry());
    const res = await app.inject({
      method: 'POST',
      url: '/api/v1/llms/claude/infer',
      payload: {},
    });
    expect(res.statusCode).toBe(400);
  });

  it('dispatches non-streaming to the adapter and returns its JSON', async () => {
    const fetchFn = vi.fn(async () => new Response(JSON.stringify({
      id: 'msg_1', model: 'claude-3-5-sonnet-20241022', role: 'assistant',
      content: [{ type: 'text', text: 'hello' }],
      stop_reason: 'end_turn',
      usage: { input_tokens: 1, output_tokens: 1 },
    }), { status: 200 }));
    const adapters = new LlmAdapterRegistry({ fetch: fetchFn as unknown as typeof fetch });
    const svc: LlmServiceLike = {
      getByName: async () => makeLlmView(),
      resolveApiKey: async () => 'sk-ant-xyz',
    };
    const events: unknown[] = [];
    await setupApp(svc, adapters, (e) => events.push(e));

    const res = await app.inject({
      method: 'POST',
      url: '/api/v1/llms/claude/infer',
      payload: { messages: [{ role: 'user', content: 'hi' }] },
    });
    expect(res.statusCode).toBe(200);
    const body = res.json<{ choices: Array<{ message: { content: string } }> }>();
    expect(body.choices[0]!.message.content).toBe('hello');

    // Audit event emitted
    expect(events).toHaveLength(1);
    expect((events[0] as { kind: string; llmName: string; status: number }).kind).toBe('llm_inference_call');
    expect((events[0] as { llmName: string }).llmName).toBe('claude');
    expect((events[0] as { streaming: boolean }).streaming).toBe(false);
    expect((events[0] as { status: number }).status).toBe(200);
  });

  it('500s when apiKey resolution fails', async () => {
    const adapters = new LlmAdapterRegistry();
    const svc: LlmServiceLike = {
      getByName: async () => makeLlmView(),
      resolveApiKey: async () => { throw new Error('secret not found'); },
    };
    await setupApp(svc, adapters);

    const res = await app.inject({
      method: 'POST',
      url: '/api/v1/llms/claude/infer',
      payload: { messages: [{ role: 'user', content: 'hi' }] },
    });
    expect(res.statusCode).toBe(500);
    expect(res.json<{ error: string }>().error).toMatch(/secret not found/);
  });

  it('skips apiKey resolution when the Llm has no apiKeyRef', async () => {
    const fetchFn = vi.fn(async () => new Response(JSON.stringify({ id: 'x', choices: [] }), { status: 200 }));
    const adapters = new LlmAdapterRegistry({ fetch: fetchFn as unknown as typeof fetch });
    const resolveSpy = vi.fn();
    const svc: LlmServiceLike = {
      getByName: async () => makeLlmView({ type: 'ollama', url: 'http://ollama:11434', apiKeyRef: null }),
      resolveApiKey: resolveSpy as unknown as LlmServiceLike['resolveApiKey'],
    };
    await setupApp(svc, adapters);

    const res = await app.inject({
      method: 'POST',
      url: '/api/v1/llms/ollama-local/infer',
      payload: { messages: [{ role: 'user', content: 'hi' }] },
    });
    expect(res.statusCode).toBe(200);
    expect(resolveSpy).not.toHaveBeenCalled();
  });

  it('streams SSE chunks for stream: true', async () => {
    const fetchFn = vi.fn(async () => sseResponse([
      'event: content_block_delta\ndata: {"type":"content_block_delta","delta":{"type":"text_delta","text":"hi"}}',
      'event: message_stop\ndata: {"type":"message_stop"}',
    ]));
    const adapters = new LlmAdapterRegistry({ fetch: fetchFn as unknown as typeof fetch });
    const svc: LlmServiceLike = {
      getByName: async () => makeLlmView(),
      resolveApiKey: async () => 'sk-ant-xyz',
    };
    const events: Array<{ streaming: boolean; status: number }> = [];
    // eslint-disable-next-line @typescript-eslint/no-explicit-any
    await setupApp(svc, adapters, ((e: any) => events.push(e)) as any);

    const res = await app.inject({
      method: 'POST',
      url: '/api/v1/llms/claude/infer',
      payload: { messages: [{ role: 'user', content: 'hi' }], stream: true },
    });
    expect(res.statusCode).toBe(200);
    expect(res.body).toContain('data:');
    expect(res.body).toContain('[DONE]');
    expect(events).toHaveLength(1);
    expect(events[0]!.streaming).toBe(true);
  });

  it('502s on adapter errors (non-streaming)', async () => {
    const fetchFn = vi.fn(async () => { throw new Error('upstream down'); });
    const adapters = new LlmAdapterRegistry({ fetch: fetchFn as unknown as typeof fetch });
    const svc: LlmServiceLike = {
      getByName: async () => makeLlmView({ type: 'openai', url: 'http://example', apiKeyRef: null }),
      resolveApiKey: async () => '',
    };
    await setupApp(svc, adapters);

    const res = await app.inject({
      method: 'POST',
      url: '/api/v1/llms/x/infer',
      payload: { messages: [{ role: 'user', content: 'hi' }] },
    });
    expect(res.statusCode).toBe(502);
    expect(res.json<{ error: string }>().error).toMatch(/upstream down/);
  });
});
feat(mcpd): inference proxy — POST /api/v1/llms/:name/infer Why: the point of the Llm resource (Phase 1) is that credentials never leave the server. This lands the proxy: clients POST OpenAI chat/completions to mcpd, mcpd attaches the provider API key server-side, and the response streams back as OpenAI-format SSE. Design: - Wire format client-side is always OpenAI chat/completions — every existing SDK speaks it. Adapters translate on the provider side. - `openai \| vllm \| deepseek \| ollama` → pure passthrough (they already speak OpenAI). `anthropic` → translator to/from Anthropic Messages API (system-string extraction, content-block flattening, SSE event remap). - Plain fetch; no @anthropic-ai/sdk dep. Consistent with the OpenBao driver shape and keeps the proxy layer thin. - `gemini-cli` intentionally rejected — subprocess providers need extra lifecycle plumbing; deferred to a follow-up. - Streaming: adapters yield `StreamingChunk`s; the route frames them as `data: <json>\n\n` + terminal `data: [DONE]\n\n` so any OpenAI client works unchanged. RBAC: - New URL special-case in mapUrlToPermission: `POST /api/v1/llms/:name/infer` → `run:llms:<name>` (not the default create:llms). Users need an explicit `{role: 'run', resource: 'llms', [name: X]}` binding to call infer. - Possession of `edit:llms` does NOT imply `run` — keeps catalogue management separate from spend. Audit: route emits an `llm_inference_call` event per request (llm name, model, user/tokenSha, streaming, duration, status). main.ts wires it to the structured logger for now; hook is in place for a richer audit sink later. Tests: - 11 adapter tests (passthrough POST shape + default URLs + no-auth ollama + SSE forwarding; anthropic translate request/response + non-2xx wrap + SSE event translation; registry dispatch + caching + unsupported-provider). - 7 route tests (404, 400, non-streaming dispatch + audit, apiKey failure, null apiKeyRef path, streaming SSE output, 502 on adapter error). - Full suite 1830/1830 (+18 from Phase 1's 1812). TypeScript clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-04-18 22:43:55 +01:00			`import { describe, it, expect, vi, afterEach } from 'vitest';`
			`import Fastify from 'fastify';`
			`import type { FastifyInstance } from 'fastify';`
			`import { registerLlmInferRoutes } from '../src/routes/llm-infer.js';`
			`import { LlmAdapterRegistry } from '../src/services/llm/dispatcher.js';`
			`import { errorHandler } from '../src/middleware/error-handler.js';`
			`import type { LlmView } from '../src/services/llm.service.js';`
			`import { NotFoundError } from '../src/services/mcp-server.service.js';`

			`let app: FastifyInstance;`

			`function makeLlmView(overrides: Partial<LlmView> = {}): LlmView {`
			`return {`
			`id: 'llm-1',`
			`name: 'claude',`
			`type: 'anthropic',`
			`model: 'claude-3-5-sonnet-20241022',`
			`url: '',`
			`tier: 'heavy',`
			`description: '',`
			`apiKeyRef: { name: 'anthropic-key', key: 'token' },`
			`extraConfig: {},`
			`version: 1,`
			`createdAt: new Date(),`
			`updatedAt: new Date(),`
			`...overrides,`
			`};`
			`}`

			`afterEach(async () => {`
			`if (app) await app.close();`
			`});`

			`function sseResponse(events: string[]): Response {`
			`const body = events.join('\n\n') + '\n\n';`
			`const stream = new ReadableStream<Uint8Array>({`
			`start(controller) {`
			`controller.enqueue(new TextEncoder().encode(body));`
			`controller.close();`
			`},`
			`});`
			`return new Response(stream, { status: 200 });`
			`}`

			`interface LlmServiceLike {`
			`getByName: (name: string) => Promise<LlmView>;`
			`resolveApiKey: (name: string) => Promise<string>;`
			`}`

			`async function setupApp(`
			`llmService: LlmServiceLike,`
			`adapters: LlmAdapterRegistry,`
			`onInferenceEvent?: Parameters<typeof registerLlmInferRoutes>[1]['onInferenceEvent'],`
			`): Promise<FastifyInstance> {`
			`app = Fastify({ logger: false });`
			`app.setErrorHandler(errorHandler);`
			`const deps: Parameters<typeof registerLlmInferRoutes>[1] = {`
			`// eslint-disable-next-line @typescript-eslint/no-explicit-any`
			`llmService: llmService as any,`
			`adapters,`
			`};`
			`if (onInferenceEvent !== undefined) deps.onInferenceEvent = onInferenceEvent;`
			`registerLlmInferRoutes(app, deps);`
			`await app.ready();`
			`return app;`
			`}`

			`describe('POST /api/v1/llms/:name/infer', () => {`
			`it('returns 404 when the Llm does not exist', async () => {`
			`const svc: LlmServiceLike = {`
			`getByName: async () => { throw new NotFoundError('Llm not found: missing'); },`
			`resolveApiKey: async () => '',`
			`};`
			`await setupApp(svc, new LlmAdapterRegistry());`
			`const res = await app.inject({`
			`method: 'POST',`
			`url: '/api/v1/llms/missing/infer',`
			`payload: { messages: [{ role: 'user', content: 'hi' }] },`
			`});`
			`expect(res.statusCode).toBe(404);`
			`});`

			`it('returns 400 when messages is missing', async () => {`
			`const svc: LlmServiceLike = {`
			`getByName: async () => makeLlmView({ apiKeyRef: null }),`
			`resolveApiKey: async () => '',`
			`};`
			`await setupApp(svc, new LlmAdapterRegistry());`
			`const res = await app.inject({`
			`method: 'POST',`
			`url: '/api/v1/llms/claude/infer',`
			`payload: {},`
			`});`
			`expect(res.statusCode).toBe(400);`
			`});`

			`it('dispatches non-streaming to the adapter and returns its JSON', async () => {`
			`const fetchFn = vi.fn(async () => new Response(JSON.stringify({`
			`id: 'msg_1', model: 'claude-3-5-sonnet-20241022', role: 'assistant',`
			`content: [{ type: 'text', text: 'hello' }],`
			`stop_reason: 'end_turn',`
			`usage: { input_tokens: 1, output_tokens: 1 },`
			`}), { status: 200 }));`
			`const adapters = new LlmAdapterRegistry({ fetch: fetchFn as unknown as typeof fetch });`
			`const svc: LlmServiceLike = {`
			`getByName: async () => makeLlmView(),`
			`resolveApiKey: async () => 'sk-ant-xyz',`
			`};`
			`const events: unknown[] = [];`
			`await setupApp(svc, adapters, (e) => events.push(e));`

			`const res = await app.inject({`
			`method: 'POST',`
			`url: '/api/v1/llms/claude/infer',`
			`payload: { messages: [{ role: 'user', content: 'hi' }] },`
			`});`
			`expect(res.statusCode).toBe(200);`
			`const body = res.json<{ choices: Array<{ message: { content: string } }> }>();`
			`expect(body.choices[0]!.message.content).toBe('hello');`

			`// Audit event emitted`
			`expect(events).toHaveLength(1);`
			`expect((events[0] as { kind: string; llmName: string; status: number }).kind).toBe('llm_inference_call');`
			`expect((events[0] as { llmName: string }).llmName).toBe('claude');`
			`expect((events[0] as { streaming: boolean }).streaming).toBe(false);`
			`expect((events[0] as { status: number }).status).toBe(200);`
			`});`

			`it('500s when apiKey resolution fails', async () => {`
			`const adapters = new LlmAdapterRegistry();`
			`const svc: LlmServiceLike = {`
			`getByName: async () => makeLlmView(),`
			`resolveApiKey: async () => { throw new Error('secret not found'); },`
			`};`
			`await setupApp(svc, adapters);`

			`const res = await app.inject({`
			`method: 'POST',`
			`url: '/api/v1/llms/claude/infer',`
			`payload: { messages: [{ role: 'user', content: 'hi' }] },`
			`});`
			`expect(res.statusCode).toBe(500);`
			`expect(res.json<{ error: string }>().error).toMatch(/secret not found/);`
			`});`

			`it('skips apiKey resolution when the Llm has no apiKeyRef', async () => {`
			`const fetchFn = vi.fn(async () => new Response(JSON.stringify({ id: 'x', choices: [] }), { status: 200 }));`
			`const adapters = new LlmAdapterRegistry({ fetch: fetchFn as unknown as typeof fetch });`
			`const resolveSpy = vi.fn();`
			`const svc: LlmServiceLike = {`
			`getByName: async () => makeLlmView({ type: 'ollama', url: 'http://ollama:11434', apiKeyRef: null }),`
			`resolveApiKey: resolveSpy as unknown as LlmServiceLike['resolveApiKey'],`
			`};`
			`await setupApp(svc, adapters);`

			`const res = await app.inject({`
			`method: 'POST',`
			`url: '/api/v1/llms/ollama-local/infer',`
			`payload: { messages: [{ role: 'user', content: 'hi' }] },`
			`});`
			`expect(res.statusCode).toBe(200);`
			`expect(resolveSpy).not.toHaveBeenCalled();`
			`});`

			`it('streams SSE chunks for stream: true', async () => {`
			`const fetchFn = vi.fn(async () => sseResponse([`
			`'event: content_block_delta\ndata: {"type":"content_block_delta","delta":{"type":"text_delta","text":"hi"}}',`
			`'event: message_stop\ndata: {"type":"message_stop"}',`
			`]));`
			`const adapters = new LlmAdapterRegistry({ fetch: fetchFn as unknown as typeof fetch });`
			`const svc: LlmServiceLike = {`
			`getByName: async () => makeLlmView(),`
			`resolveApiKey: async () => 'sk-ant-xyz',`
			`};`
			`const events: Array<{ streaming: boolean; status: number }> = [];`
			`// eslint-disable-next-line @typescript-eslint/no-explicit-any`
			`await setupApp(svc, adapters, ((e: any) => events.push(e)) as any);`

			`const res = await app.inject({`
			`method: 'POST',`
			`url: '/api/v1/llms/claude/infer',`
			`payload: { messages: [{ role: 'user', content: 'hi' }], stream: true },`
			`});`
			`expect(res.statusCode).toBe(200);`
			`expect(res.body).toContain('data:');`
			`expect(res.body).toContain('[DONE]');`
			`expect(events).toHaveLength(1);`
			`expect(events[0]!.streaming).toBe(true);`
			`});`

			`it('502s on adapter errors (non-streaming)', async () => {`
			`const fetchFn = vi.fn(async () => { throw new Error('upstream down'); });`
			`const adapters = new LlmAdapterRegistry({ fetch: fetchFn as unknown as typeof fetch });`
			`const svc: LlmServiceLike = {`
			`getByName: async () => makeLlmView({ type: 'openai', url: 'http://example', apiKeyRef: null }),`
			`resolveApiKey: async () => '',`
			`};`
			`await setupApp(svc, adapters);`

			`const res = await app.inject({`
			`method: 'POST',`
			`url: '/api/v1/llms/x/infer',`
			`payload: { messages: [{ role: 'user', content: 'hi' }] },`
			`});`
			`expect(res.statusCode).toBe(502);`
			`expect(res.json<{ error: string }>().error).toMatch(/upstream down/);`
			`});`
			`});`