209 lines
7.5 KiB
TypeScript
209 lines
7.5 KiB
TypeScript
|
|
import { describe, it, expect, vi, afterEach } from 'vitest';
|
||
|
|
import Fastify from 'fastify';
|
||
|
|
import type { FastifyInstance } from 'fastify';
|
||
|
|
import { registerLlmInferRoutes } from '../src/routes/llm-infer.js';
|
||
|
|
import { LlmAdapterRegistry } from '../src/services/llm/dispatcher.js';
|
||
|
|
import { errorHandler } from '../src/middleware/error-handler.js';
|
||
|
|
import type { LlmView } from '../src/services/llm.service.js';
|
||
|
|
import { NotFoundError } from '../src/services/mcp-server.service.js';
|
||
|
|
|
||
|
|
let app: FastifyInstance;
|
||
|
|
|
||
|
|
function makeLlmView(overrides: Partial<LlmView> = {}): LlmView {
|
||
|
|
return {
|
||
|
|
id: 'llm-1',
|
||
|
|
name: 'claude',
|
||
|
|
type: 'anthropic',
|
||
|
|
model: 'claude-3-5-sonnet-20241022',
|
||
|
|
url: '',
|
||
|
|
tier: 'heavy',
|
||
|
|
description: '',
|
||
|
|
apiKeyRef: { name: 'anthropic-key', key: 'token' },
|
||
|
|
extraConfig: {},
|
||
|
|
version: 1,
|
||
|
|
createdAt: new Date(),
|
||
|
|
updatedAt: new Date(),
|
||
|
|
...overrides,
|
||
|
|
};
|
||
|
|
}
|
||
|
|
|
||
|
|
afterEach(async () => {
|
||
|
|
if (app) await app.close();
|
||
|
|
});
|
||
|
|
|
||
|
|
function sseResponse(events: string[]): Response {
|
||
|
|
const body = events.join('\n\n') + '\n\n';
|
||
|
|
const stream = new ReadableStream<Uint8Array>({
|
||
|
|
start(controller) {
|
||
|
|
controller.enqueue(new TextEncoder().encode(body));
|
||
|
|
controller.close();
|
||
|
|
},
|
||
|
|
});
|
||
|
|
return new Response(stream, { status: 200 });
|
||
|
|
}
|
||
|
|
|
||
|
|
interface LlmServiceLike {
|
||
|
|
getByName: (name: string) => Promise<LlmView>;
|
||
|
|
resolveApiKey: (name: string) => Promise<string>;
|
||
|
|
}
|
||
|
|
|
||
|
|
async function setupApp(
|
||
|
|
llmService: LlmServiceLike,
|
||
|
|
adapters: LlmAdapterRegistry,
|
||
|
|
onInferenceEvent?: Parameters<typeof registerLlmInferRoutes>[1]['onInferenceEvent'],
|
||
|
|
): Promise<FastifyInstance> {
|
||
|
|
app = Fastify({ logger: false });
|
||
|
|
app.setErrorHandler(errorHandler);
|
||
|
|
const deps: Parameters<typeof registerLlmInferRoutes>[1] = {
|
||
|
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||
|
|
llmService: llmService as any,
|
||
|
|
adapters,
|
||
|
|
};
|
||
|
|
if (onInferenceEvent !== undefined) deps.onInferenceEvent = onInferenceEvent;
|
||
|
|
registerLlmInferRoutes(app, deps);
|
||
|
|
await app.ready();
|
||
|
|
return app;
|
||
|
|
}
|
||
|
|
|
||
|
|
describe('POST /api/v1/llms/:name/infer', () => {
|
||
|
|
it('returns 404 when the Llm does not exist', async () => {
|
||
|
|
const svc: LlmServiceLike = {
|
||
|
|
getByName: async () => { throw new NotFoundError('Llm not found: missing'); },
|
||
|
|
resolveApiKey: async () => '',
|
||
|
|
};
|
||
|
|
await setupApp(svc, new LlmAdapterRegistry());
|
||
|
|
const res = await app.inject({
|
||
|
|
method: 'POST',
|
||
|
|
url: '/api/v1/llms/missing/infer',
|
||
|
|
payload: { messages: [{ role: 'user', content: 'hi' }] },
|
||
|
|
});
|
||
|
|
expect(res.statusCode).toBe(404);
|
||
|
|
});
|
||
|
|
|
||
|
|
it('returns 400 when messages is missing', async () => {
|
||
|
|
const svc: LlmServiceLike = {
|
||
|
|
getByName: async () => makeLlmView({ apiKeyRef: null }),
|
||
|
|
resolveApiKey: async () => '',
|
||
|
|
};
|
||
|
|
await setupApp(svc, new LlmAdapterRegistry());
|
||
|
|
const res = await app.inject({
|
||
|
|
method: 'POST',
|
||
|
|
url: '/api/v1/llms/claude/infer',
|
||
|
|
payload: {},
|
||
|
|
});
|
||
|
|
expect(res.statusCode).toBe(400);
|
||
|
|
});
|
||
|
|
|
||
|
|
it('dispatches non-streaming to the adapter and returns its JSON', async () => {
|
||
|
|
const fetchFn = vi.fn(async () => new Response(JSON.stringify({
|
||
|
|
id: 'msg_1', model: 'claude-3-5-sonnet-20241022', role: 'assistant',
|
||
|
|
content: [{ type: 'text', text: 'hello' }],
|
||
|
|
stop_reason: 'end_turn',
|
||
|
|
usage: { input_tokens: 1, output_tokens: 1 },
|
||
|
|
}), { status: 200 }));
|
||
|
|
const adapters = new LlmAdapterRegistry({ fetch: fetchFn as unknown as typeof fetch });
|
||
|
|
const svc: LlmServiceLike = {
|
||
|
|
getByName: async () => makeLlmView(),
|
||
|
|
resolveApiKey: async () => 'sk-ant-xyz',
|
||
|
|
};
|
||
|
|
const events: unknown[] = [];
|
||
|
|
await setupApp(svc, adapters, (e) => events.push(e));
|
||
|
|
|
||
|
|
const res = await app.inject({
|
||
|
|
method: 'POST',
|
||
|
|
url: '/api/v1/llms/claude/infer',
|
||
|
|
payload: { messages: [{ role: 'user', content: 'hi' }] },
|
||
|
|
});
|
||
|
|
expect(res.statusCode).toBe(200);
|
||
|
|
const body = res.json<{ choices: Array<{ message: { content: string } }> }>();
|
||
|
|
expect(body.choices[0]!.message.content).toBe('hello');
|
||
|
|
|
||
|
|
// Audit event emitted
|
||
|
|
expect(events).toHaveLength(1);
|
||
|
|
expect((events[0] as { kind: string; llmName: string; status: number }).kind).toBe('llm_inference_call');
|
||
|
|
expect((events[0] as { llmName: string }).llmName).toBe('claude');
|
||
|
|
expect((events[0] as { streaming: boolean }).streaming).toBe(false);
|
||
|
|
expect((events[0] as { status: number }).status).toBe(200);
|
||
|
|
});
|
||
|
|
|
||
|
|
it('500s when apiKey resolution fails', async () => {
|
||
|
|
const adapters = new LlmAdapterRegistry();
|
||
|
|
const svc: LlmServiceLike = {
|
||
|
|
getByName: async () => makeLlmView(),
|
||
|
|
resolveApiKey: async () => { throw new Error('secret not found'); },
|
||
|
|
};
|
||
|
|
await setupApp(svc, adapters);
|
||
|
|
|
||
|
|
const res = await app.inject({
|
||
|
|
method: 'POST',
|
||
|
|
url: '/api/v1/llms/claude/infer',
|
||
|
|
payload: { messages: [{ role: 'user', content: 'hi' }] },
|
||
|
|
});
|
||
|
|
expect(res.statusCode).toBe(500);
|
||
|
|
expect(res.json<{ error: string }>().error).toMatch(/secret not found/);
|
||
|
|
});
|
||
|
|
|
||
|
|
it('skips apiKey resolution when the Llm has no apiKeyRef', async () => {
|
||
|
|
const fetchFn = vi.fn(async () => new Response(JSON.stringify({ id: 'x', choices: [] }), { status: 200 }));
|
||
|
|
const adapters = new LlmAdapterRegistry({ fetch: fetchFn as unknown as typeof fetch });
|
||
|
|
const resolveSpy = vi.fn();
|
||
|
|
const svc: LlmServiceLike = {
|
||
|
|
getByName: async () => makeLlmView({ type: 'ollama', url: 'http://ollama:11434', apiKeyRef: null }),
|
||
|
|
resolveApiKey: resolveSpy as unknown as LlmServiceLike['resolveApiKey'],
|
||
|
|
};
|
||
|
|
await setupApp(svc, adapters);
|
||
|
|
|
||
|
|
const res = await app.inject({
|
||
|
|
method: 'POST',
|
||
|
|
url: '/api/v1/llms/ollama-local/infer',
|
||
|
|
payload: { messages: [{ role: 'user', content: 'hi' }] },
|
||
|
|
});
|
||
|
|
expect(res.statusCode).toBe(200);
|
||
|
|
expect(resolveSpy).not.toHaveBeenCalled();
|
||
|
|
});
|
||
|
|
|
||
|
|
it('streams SSE chunks for stream: true', async () => {
|
||
|
|
const fetchFn = vi.fn(async () => sseResponse([
|
||
|
|
'event: content_block_delta\ndata: {"type":"content_block_delta","delta":{"type":"text_delta","text":"hi"}}',
|
||
|
|
'event: message_stop\ndata: {"type":"message_stop"}',
|
||
|
|
]));
|
||
|
|
const adapters = new LlmAdapterRegistry({ fetch: fetchFn as unknown as typeof fetch });
|
||
|
|
const svc: LlmServiceLike = {
|
||
|
|
getByName: async () => makeLlmView(),
|
||
|
|
resolveApiKey: async () => 'sk-ant-xyz',
|
||
|
|
};
|
||
|
|
const events: Array<{ streaming: boolean; status: number }> = [];
|
||
|
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||
|
|
await setupApp(svc, adapters, ((e: any) => events.push(e)) as any);
|
||
|
|
|
||
|
|
const res = await app.inject({
|
||
|
|
method: 'POST',
|
||
|
|
url: '/api/v1/llms/claude/infer',
|
||
|
|
payload: { messages: [{ role: 'user', content: 'hi' }], stream: true },
|
||
|
|
});
|
||
|
|
expect(res.statusCode).toBe(200);
|
||
|
|
expect(res.body).toContain('data:');
|
||
|
|
expect(res.body).toContain('[DONE]');
|
||
|
|
expect(events).toHaveLength(1);
|
||
|
|
expect(events[0]!.streaming).toBe(true);
|
||
|
|
});
|
||
|
|
|
||
|
|
it('502s on adapter errors (non-streaming)', async () => {
|
||
|
|
const fetchFn = vi.fn(async () => { throw new Error('upstream down'); });
|
||
|
|
const adapters = new LlmAdapterRegistry({ fetch: fetchFn as unknown as typeof fetch });
|
||
|
|
const svc: LlmServiceLike = {
|
||
|
|
getByName: async () => makeLlmView({ type: 'openai', url: 'http://example', apiKeyRef: null }),
|
||
|
|
resolveApiKey: async () => '',
|
||
|
|
};
|
||
|
|
await setupApp(svc, adapters);
|
||
|
|
|
||
|
|
const res = await app.inject({
|
||
|
|
method: 'POST',
|
||
|
|
url: '/api/v1/llms/x/infer',
|
||
|
|
payload: { messages: [{ role: 'user', content: 'hi' }] },
|
||
|
|
});
|
||
|
|
expect(res.statusCode).toBe(502);
|
||
|
|
expect(res.json<{ error: string }>().error).toMatch(/upstream down/);
|
||
|
|
});
|
||
|
|
});
|