import { describe, it, expect, vi, afterEach } from 'vitest'; import Fastify from 'fastify'; import type { FastifyInstance } from 'fastify'; import { registerLlmInferRoutes } from '../src/routes/llm-infer.js'; import { LlmAdapterRegistry } from '../src/services/llm/dispatcher.js'; import { errorHandler } from '../src/middleware/error-handler.js'; import type { LlmView } from '../src/services/llm.service.js'; import { NotFoundError } from '../src/services/mcp-server.service.js'; let app: FastifyInstance; function makeLlmView(overrides: Partial = {}): LlmView { return { id: 'llm-1', name: 'claude', type: 'anthropic', model: 'claude-3-5-sonnet-20241022', url: '', tier: 'heavy', description: '', apiKeyRef: { name: 'anthropic-key', key: 'token' }, extraConfig: {}, version: 1, createdAt: new Date(), updatedAt: new Date(), ...overrides, }; } afterEach(async () => { if (app) await app.close(); }); function sseResponse(events: string[]): Response { const body = events.join('\n\n') + '\n\n'; const stream = new ReadableStream({ start(controller) { controller.enqueue(new TextEncoder().encode(body)); controller.close(); }, }); return new Response(stream, { status: 200 }); } interface LlmServiceLike { getByName: (name: string) => Promise; resolveApiKey: (name: string) => Promise; } async function setupApp( llmService: LlmServiceLike, adapters: LlmAdapterRegistry, onInferenceEvent?: Parameters[1]['onInferenceEvent'], ): Promise { app = Fastify({ logger: false }); app.setErrorHandler(errorHandler); const deps: Parameters[1] = { // eslint-disable-next-line @typescript-eslint/no-explicit-any llmService: llmService as any, adapters, }; if (onInferenceEvent !== undefined) deps.onInferenceEvent = onInferenceEvent; registerLlmInferRoutes(app, deps); await app.ready(); return app; } describe('POST /api/v1/llms/:name/infer', () => { it('returns 404 when the Llm does not exist', async () => { const svc: LlmServiceLike = { getByName: async () => { throw new NotFoundError('Llm not found: missing'); }, resolveApiKey: async () => '', }; await setupApp(svc, new LlmAdapterRegistry()); const res = await app.inject({ method: 'POST', url: '/api/v1/llms/missing/infer', payload: { messages: [{ role: 'user', content: 'hi' }] }, }); expect(res.statusCode).toBe(404); }); it('returns 400 when messages is missing', async () => { const svc: LlmServiceLike = { getByName: async () => makeLlmView({ apiKeyRef: null }), resolveApiKey: async () => '', }; await setupApp(svc, new LlmAdapterRegistry()); const res = await app.inject({ method: 'POST', url: '/api/v1/llms/claude/infer', payload: {}, }); expect(res.statusCode).toBe(400); }); it('dispatches non-streaming to the adapter and returns its JSON', async () => { const fetchFn = vi.fn(async () => new Response(JSON.stringify({ id: 'msg_1', model: 'claude-3-5-sonnet-20241022', role: 'assistant', content: [{ type: 'text', text: 'hello' }], stop_reason: 'end_turn', usage: { input_tokens: 1, output_tokens: 1 }, }), { status: 200 })); const adapters = new LlmAdapterRegistry({ fetch: fetchFn as unknown as typeof fetch }); const svc: LlmServiceLike = { getByName: async () => makeLlmView(), resolveApiKey: async () => 'sk-ant-xyz', }; const events: unknown[] = []; await setupApp(svc, adapters, (e) => events.push(e)); const res = await app.inject({ method: 'POST', url: '/api/v1/llms/claude/infer', payload: { messages: [{ role: 'user', content: 'hi' }] }, }); expect(res.statusCode).toBe(200); const body = res.json<{ choices: Array<{ message: { content: string } }> }>(); expect(body.choices[0]!.message.content).toBe('hello'); // Audit event emitted expect(events).toHaveLength(1); expect((events[0] as { kind: string; llmName: string; status: number }).kind).toBe('llm_inference_call'); expect((events[0] as { llmName: string }).llmName).toBe('claude'); expect((events[0] as { streaming: boolean }).streaming).toBe(false); expect((events[0] as { status: number }).status).toBe(200); }); it('500s when apiKey resolution fails', async () => { const adapters = new LlmAdapterRegistry(); const svc: LlmServiceLike = { getByName: async () => makeLlmView(), resolveApiKey: async () => { throw new Error('secret not found'); }, }; await setupApp(svc, adapters); const res = await app.inject({ method: 'POST', url: '/api/v1/llms/claude/infer', payload: { messages: [{ role: 'user', content: 'hi' }] }, }); expect(res.statusCode).toBe(500); expect(res.json<{ error: string }>().error).toMatch(/secret not found/); }); it('skips apiKey resolution when the Llm has no apiKeyRef', async () => { const fetchFn = vi.fn(async () => new Response(JSON.stringify({ id: 'x', choices: [] }), { status: 200 })); const adapters = new LlmAdapterRegistry({ fetch: fetchFn as unknown as typeof fetch }); const resolveSpy = vi.fn(); const svc: LlmServiceLike = { getByName: async () => makeLlmView({ type: 'ollama', url: 'http://ollama:11434', apiKeyRef: null }), resolveApiKey: resolveSpy as unknown as LlmServiceLike['resolveApiKey'], }; await setupApp(svc, adapters); const res = await app.inject({ method: 'POST', url: '/api/v1/llms/ollama-local/infer', payload: { messages: [{ role: 'user', content: 'hi' }] }, }); expect(res.statusCode).toBe(200); expect(resolveSpy).not.toHaveBeenCalled(); }); it('streams SSE chunks for stream: true', async () => { const fetchFn = vi.fn(async () => sseResponse([ 'event: content_block_delta\ndata: {"type":"content_block_delta","delta":{"type":"text_delta","text":"hi"}}', 'event: message_stop\ndata: {"type":"message_stop"}', ])); const adapters = new LlmAdapterRegistry({ fetch: fetchFn as unknown as typeof fetch }); const svc: LlmServiceLike = { getByName: async () => makeLlmView(), resolveApiKey: async () => 'sk-ant-xyz', }; const events: Array<{ streaming: boolean; status: number }> = []; // eslint-disable-next-line @typescript-eslint/no-explicit-any await setupApp(svc, adapters, ((e: any) => events.push(e)) as any); const res = await app.inject({ method: 'POST', url: '/api/v1/llms/claude/infer', payload: { messages: [{ role: 'user', content: 'hi' }], stream: true }, }); expect(res.statusCode).toBe(200); expect(res.body).toContain('data:'); expect(res.body).toContain('[DONE]'); expect(events).toHaveLength(1); expect(events[0]!.streaming).toBe(true); }); it('502s on adapter errors (non-streaming)', async () => { const fetchFn = vi.fn(async () => { throw new Error('upstream down'); }); const adapters = new LlmAdapterRegistry({ fetch: fetchFn as unknown as typeof fetch }); const svc: LlmServiceLike = { getByName: async () => makeLlmView({ type: 'openai', url: 'http://example', apiKeyRef: null }), resolveApiKey: async () => '', }; await setupApp(svc, adapters); const res = await app.inject({ method: 'POST', url: '/api/v1/llms/x/infer', payload: { messages: [{ role: 'user', content: 'hi' }] }, }); expect(res.statusCode).toBe(502); expect(res.json<{ error: string }>().error).toMatch(/upstream down/); }); });