feat(mcpd): virtual-LLM routes + GC ticker (v1 Stage 3)
End-to-end backend wiring. After this stage, an mcplocal client can
register a provider, hold the SSE channel open, heartbeat, and have
its inference requests fanned through the relay — all without
touching the agent layer or the public-LLM path.
Routes (new file: routes/virtual-llms.ts):
POST /api/v1/llms/_provider-register → returns { providerSessionId, llms[] }
GET /api/v1/llms/_provider-stream → SSE channel keyed by
x-mcpctl-provider-session header.
Emits `event: hello` on open,
`event: task` on inference fan-out,
`: ping` every 20 s for proxies.
POST /api/v1/llms/_provider-heartbeat → bumps lastHeartbeatAt
POST /api/v1/llms/_provider-task/:id/result
→ mcplocal pushes result back;
body shape is one of:
{ error: 'msg' }
{ chunk: { data, done? } }
{ status, body }
LlmService:
- LlmView gains kind/status/lastHeartbeatAt/inactiveSince so route
handlers + the upcoming `mcpctl get llm` columns can branch on
kind without re-fetching the row.
llm-infer.ts:
- Detects llm.kind === 'virtual' and delegates to
VirtualLlmService.enqueueInferTask. Streaming + non-streaming both
supported; on 503 (publisher offline) the existing audit hook still
fires with the right status code.
- Adds optional `virtualLlms: VirtualLlmService` to LlmInferDeps;
absence in test fixtures returns a 500 with a clear "server
misconfiguration" message rather than silently falling through to
the public path against an empty URL.
main.ts:
- Constructs VirtualLlmService(llmRepo).
- Passes it to registerLlmInferRoutes.
- Calls registerVirtualLlmRoutes(app, virtualLlmService).
- 60-s GC ticker started after app.listen; clears on graceful
shutdown alongside the existing reconcile timer.
Tests: 11 new virtual-LLM route assertions (validation paths,
service plumbing for register/heartbeat/task-result) + 3 new
infer-route assertions (kind=virtual non-streaming relay, 503 path,
500 when virtualLlms dep missing). mcpd suite: 833/833 (was 819,
+14). Typecheck clean.
The full SSE handshake is exercised by the smoke test in Stage 6;
under app.inject the keep-alive blocks until close so unit-level
SSE testing isn't worth the complexity here.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -20,6 +20,10 @@ function makeLlmView(overrides: Partial<LlmView> = {}): LlmView {
|
||||
description: '',
|
||||
apiKeyRef: { name: 'anthropic-key', key: 'token' },
|
||||
extraConfig: {},
|
||||
kind: 'public',
|
||||
status: 'active',
|
||||
lastHeartbeatAt: null,
|
||||
inactiveSince: null,
|
||||
version: 1,
|
||||
createdAt: new Date(),
|
||||
updatedAt: new Date(),
|
||||
@@ -205,4 +209,87 @@ describe('POST /api/v1/llms/:name/infer', () => {
|
||||
expect(res.statusCode).toBe(502);
|
||||
expect(res.json<{ error: string }>().error).toMatch(/upstream down/);
|
||||
});
|
||||
|
||||
// ── Virtual-provider branch (kind=virtual) ──
|
||||
|
||||
it('routes kind=virtual non-streaming through VirtualLlmService.enqueueInferTask', async () => {
|
||||
const svc: LlmServiceLike = {
|
||||
getByName: async () => makeLlmView({ kind: 'virtual', type: 'openai', apiKeyRef: null }),
|
||||
resolveApiKey: async () => '',
|
||||
};
|
||||
const enqueue = vi.fn(async () => ({
|
||||
taskId: 't-1',
|
||||
done: Promise.resolve({ status: 200, body: { choices: [{ message: { content: 'hello from relay' } }] } }),
|
||||
onChunk: () => () => undefined,
|
||||
}));
|
||||
app = Fastify({ logger: false });
|
||||
app.setErrorHandler(errorHandler);
|
||||
registerLlmInferRoutes(app, {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
llmService: svc as any,
|
||||
adapters: new LlmAdapterRegistry(),
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
virtualLlms: { enqueueInferTask: enqueue } as any,
|
||||
});
|
||||
await app.ready();
|
||||
|
||||
const res = await app.inject({
|
||||
method: 'POST',
|
||||
url: '/api/v1/llms/vllm-local/infer',
|
||||
payload: { messages: [{ role: 'user', content: 'hi' }] },
|
||||
});
|
||||
expect(res.statusCode).toBe(200);
|
||||
expect(res.json<{ choices: Array<{ message: { content: string } }> }>().choices[0]!.message.content).toBe('hello from relay');
|
||||
expect(enqueue).toHaveBeenCalledWith(
|
||||
'claude',
|
||||
expect.objectContaining({ messages: expect.any(Array) }),
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
it('returns 503 when the publisher is offline (VirtualLlmService throws)', async () => {
|
||||
const svc: LlmServiceLike = {
|
||||
getByName: async () => makeLlmView({ kind: 'virtual', apiKeyRef: null, type: 'openai' }),
|
||||
resolveApiKey: async () => '',
|
||||
};
|
||||
const enqueue = vi.fn(async () => {
|
||||
throw Object.assign(new Error('no live SSE session; publisher offline'), { statusCode: 503 });
|
||||
});
|
||||
app = Fastify({ logger: false });
|
||||
app.setErrorHandler(errorHandler);
|
||||
registerLlmInferRoutes(app, {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
llmService: svc as any,
|
||||
adapters: new LlmAdapterRegistry(),
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
virtualLlms: { enqueueInferTask: enqueue } as any,
|
||||
});
|
||||
await app.ready();
|
||||
|
||||
const res = await app.inject({
|
||||
method: 'POST',
|
||||
url: '/api/v1/llms/vllm-local/infer',
|
||||
payload: { messages: [{ role: 'user', content: 'hi' }] },
|
||||
});
|
||||
expect(res.statusCode).toBe(503);
|
||||
expect(res.json<{ error: string }>().error).toMatch(/publisher offline/);
|
||||
});
|
||||
|
||||
it('returns 500 when virtualLlms dep is missing but the row is kind=virtual', async () => {
|
||||
// Defensive: prior test configurations may not pass virtualLlms. We
|
||||
// surface a clear server-misconfiguration error rather than calling
|
||||
// the public-adapter path, which would try to hit an empty URL.
|
||||
const svc: LlmServiceLike = {
|
||||
getByName: async () => makeLlmView({ kind: 'virtual', apiKeyRef: null, type: 'openai' }),
|
||||
resolveApiKey: async () => '',
|
||||
};
|
||||
await setupApp(svc, new LlmAdapterRegistry()); // no virtualLlms
|
||||
const res = await app.inject({
|
||||
method: 'POST',
|
||||
url: '/api/v1/llms/vllm-local/infer',
|
||||
payload: { messages: [{ role: 'user', content: 'hi' }] },
|
||||
});
|
||||
expect(res.statusCode).toBe(500);
|
||||
expect(res.json<{ error: string }>().error).toMatch(/virtual LLM dispatch unavailable/);
|
||||
});
|
||||
});
|
||||
|
||||
184
src/mcpd/tests/virtual-llm-routes.test.ts
Normal file
184
src/mcpd/tests/virtual-llm-routes.test.ts
Normal file
@@ -0,0 +1,184 @@
|
||||
import { describe, it, expect, vi, afterEach } from 'vitest';
|
||||
import Fastify from 'fastify';
|
||||
import type { FastifyInstance } from 'fastify';
|
||||
import { registerVirtualLlmRoutes } from '../src/routes/virtual-llms.js';
|
||||
import type {
|
||||
VirtualLlmService,
|
||||
VirtualSessionHandle,
|
||||
} from '../src/services/virtual-llm.service.js';
|
||||
|
||||
let app: FastifyInstance;
|
||||
|
||||
afterEach(async () => {
|
||||
if (app) await app.close();
|
||||
});
|
||||
|
||||
function fakeService(overrides: Partial<VirtualLlmService> = {}): VirtualLlmService {
|
||||
return {
|
||||
register: vi.fn(async (input) => ({
|
||||
providerSessionId: input.providerSessionId ?? 'sess-generated',
|
||||
llms: [],
|
||||
})),
|
||||
heartbeat: vi.fn(async () => undefined),
|
||||
bindSession: vi.fn(),
|
||||
unbindSession: vi.fn(async () => undefined),
|
||||
enqueueInferTask: vi.fn(),
|
||||
completeTask: vi.fn(() => true),
|
||||
pushTaskChunk: vi.fn(() => true),
|
||||
failTask: vi.fn(() => true),
|
||||
gcSweep: vi.fn(),
|
||||
...overrides,
|
||||
} as unknown as VirtualLlmService;
|
||||
}
|
||||
|
||||
async function setupApp(svc: VirtualLlmService): Promise<FastifyInstance> {
|
||||
app = Fastify({ logger: false });
|
||||
registerVirtualLlmRoutes(app, svc);
|
||||
await app.ready();
|
||||
return app;
|
||||
}
|
||||
|
||||
describe('POST /api/v1/llms/_provider-register', () => {
|
||||
it('returns 400 when providers is missing or empty', async () => {
|
||||
await setupApp(fakeService());
|
||||
const a = await app.inject({ method: 'POST', url: '/api/v1/llms/_provider-register', payload: {} });
|
||||
expect(a.statusCode).toBe(400);
|
||||
const b = await app.inject({ method: 'POST', url: '/api/v1/llms/_provider-register', payload: { providers: [] } });
|
||||
expect(b.statusCode).toBe(400);
|
||||
});
|
||||
|
||||
it('returns 400 when a provider entry is missing required fields', async () => {
|
||||
await setupApp(fakeService());
|
||||
const res = await app.inject({
|
||||
method: 'POST',
|
||||
url: '/api/v1/llms/_provider-register',
|
||||
payload: { providers: [{ name: 'incomplete' }] },
|
||||
});
|
||||
expect(res.statusCode).toBe(400);
|
||||
});
|
||||
|
||||
it('forwards a valid registration to the service and returns 201', async () => {
|
||||
const register = vi.fn(async () => ({
|
||||
providerSessionId: 'sess-xyz',
|
||||
llms: [{ id: 'l1' }],
|
||||
}));
|
||||
await setupApp(fakeService({ register: register as unknown as VirtualLlmService['register'] }));
|
||||
const res = await app.inject({
|
||||
method: 'POST',
|
||||
url: '/api/v1/llms/_provider-register',
|
||||
payload: {
|
||||
providerSessionId: 'sess-xyz',
|
||||
providers: [{ name: 'vllm-local', type: 'openai', model: 'm', tier: 'fast', extraConfig: { gpu: 1 } }],
|
||||
},
|
||||
});
|
||||
expect(res.statusCode).toBe(201);
|
||||
expect(register).toHaveBeenCalledWith({
|
||||
providerSessionId: 'sess-xyz',
|
||||
providers: [{ name: 'vllm-local', type: 'openai', model: 'm', tier: 'fast', extraConfig: { gpu: 1 } }],
|
||||
});
|
||||
expect(res.json()).toMatchObject({ providerSessionId: 'sess-xyz' });
|
||||
});
|
||||
|
||||
it('surfaces service errors with their declared status code (e.g. 409 conflict)', async () => {
|
||||
const register = vi.fn(async () => {
|
||||
throw Object.assign(new Error('Cannot publish over public LLM: dup'), { statusCode: 409 });
|
||||
});
|
||||
await setupApp(fakeService({ register: register as unknown as VirtualLlmService['register'] }));
|
||||
const res = await app.inject({
|
||||
method: 'POST',
|
||||
url: '/api/v1/llms/_provider-register',
|
||||
payload: { providers: [{ name: 'dup', type: 'openai', model: 'm' }] },
|
||||
});
|
||||
expect(res.statusCode).toBe(409);
|
||||
expect(res.json()).toMatchObject({ error: expect.stringMatching(/public LLM/) });
|
||||
});
|
||||
});
|
||||
|
||||
describe('POST /api/v1/llms/_provider-heartbeat', () => {
|
||||
it('returns 400 without providerSessionId', async () => {
|
||||
await setupApp(fakeService());
|
||||
const res = await app.inject({
|
||||
method: 'POST',
|
||||
url: '/api/v1/llms/_provider-heartbeat',
|
||||
payload: {},
|
||||
});
|
||||
expect(res.statusCode).toBe(400);
|
||||
});
|
||||
|
||||
it('forwards the sessionId to service.heartbeat', async () => {
|
||||
const heartbeat = vi.fn(async () => undefined);
|
||||
await setupApp(fakeService({ heartbeat }));
|
||||
const res = await app.inject({
|
||||
method: 'POST',
|
||||
url: '/api/v1/llms/_provider-heartbeat',
|
||||
payload: { providerSessionId: 'sess-abc' },
|
||||
});
|
||||
expect(res.statusCode).toBe(200);
|
||||
expect(heartbeat).toHaveBeenCalledWith('sess-abc');
|
||||
});
|
||||
});
|
||||
|
||||
describe('POST /api/v1/llms/_provider-task/:taskId/result', () => {
|
||||
it('forwards { error } to service.failTask', async () => {
|
||||
const failTask = vi.fn(() => true);
|
||||
await setupApp(fakeService({ failTask }));
|
||||
const res = await app.inject({
|
||||
method: 'POST',
|
||||
url: '/api/v1/llms/_provider-task/t-1/result',
|
||||
payload: { error: 'upstream blew up' },
|
||||
});
|
||||
expect(res.statusCode).toBe(200);
|
||||
expect(failTask).toHaveBeenCalledWith('t-1', expect.objectContaining({ message: 'upstream blew up' }));
|
||||
});
|
||||
|
||||
it('forwards { chunk } to service.pushTaskChunk', async () => {
|
||||
const pushTaskChunk = vi.fn(() => true);
|
||||
await setupApp(fakeService({ pushTaskChunk }));
|
||||
const res = await app.inject({
|
||||
method: 'POST',
|
||||
url: '/api/v1/llms/_provider-task/t-2/result',
|
||||
payload: { chunk: { data: 'hello' } },
|
||||
});
|
||||
expect(res.statusCode).toBe(200);
|
||||
expect(pushTaskChunk).toHaveBeenCalledWith('t-2', { data: 'hello' });
|
||||
});
|
||||
|
||||
it('forwards { status, body } to service.completeTask', async () => {
|
||||
const completeTask = vi.fn(() => true);
|
||||
await setupApp(fakeService({ completeTask }));
|
||||
const res = await app.inject({
|
||||
method: 'POST',
|
||||
url: '/api/v1/llms/_provider-task/t-3/result',
|
||||
payload: { status: 200, body: { ok: true } },
|
||||
});
|
||||
expect(res.statusCode).toBe(200);
|
||||
expect(completeTask).toHaveBeenCalledWith('t-3', { status: 200, body: { ok: true } });
|
||||
});
|
||||
|
||||
it('returns 400 for an empty/unrecognised result body', async () => {
|
||||
await setupApp(fakeService());
|
||||
const res = await app.inject({
|
||||
method: 'POST',
|
||||
url: '/api/v1/llms/_provider-task/t-4/result',
|
||||
payload: {},
|
||||
});
|
||||
expect(res.statusCode).toBe(400);
|
||||
});
|
||||
});
|
||||
|
||||
describe('GET /api/v1/llms/_provider-stream', () => {
|
||||
it('returns 400 without the x-mcpctl-provider-session header', async () => {
|
||||
await setupApp(fakeService());
|
||||
const res = await app.inject({
|
||||
method: 'GET',
|
||||
url: '/api/v1/llms/_provider-stream',
|
||||
});
|
||||
expect(res.statusCode).toBe(400);
|
||||
});
|
||||
|
||||
// Note: a full SSE handshake test would require a real HTTP listen
|
||||
// because `app.inject` holds the response open and never returns under
|
||||
// the `text/event-stream` keep-alive. The smoke test in Stage 6 spins
|
||||
// up a real listener and exercises the open → bind → task → close
|
||||
// round-trip end to end.
|
||||
});
|
||||
Reference in New Issue
Block a user