feat(mcpd): virtual-LLM routes + GC ticker (v1 Stage 3)

End-to-end backend wiring. After this stage, an mcplocal client can
register a provider, hold the SSE channel open, heartbeat, and have
its inference requests fanned through the relay — all without
touching the agent layer or the public-LLM path.

Routes (new file: routes/virtual-llms.ts):
  POST /api/v1/llms/_provider-register    → returns { providerSessionId, llms[] }
  GET  /api/v1/llms/_provider-stream      → SSE channel keyed by
                                            x-mcpctl-provider-session header.
                                            Emits `event: hello` on open,
                                            `event: task` on inference fan-out,
                                            `: ping` every 20 s for proxies.
  POST /api/v1/llms/_provider-heartbeat   → bumps lastHeartbeatAt
  POST /api/v1/llms/_provider-task/:id/result
                                          → mcplocal pushes result back;
                                            body shape is one of:
                                              { error: 'msg' }
                                              { chunk: { data, done? } }
                                              { status, body }

LlmService:
- LlmView gains kind/status/lastHeartbeatAt/inactiveSince so route
  handlers + the upcoming `mcpctl get llm` columns can branch on
  kind without re-fetching the row.

llm-infer.ts:
- Detects llm.kind === 'virtual' and delegates to
  VirtualLlmService.enqueueInferTask. Streaming + non-streaming both
  supported; on 503 (publisher offline) the existing audit hook still
  fires with the right status code.
- Adds optional `virtualLlms: VirtualLlmService` to LlmInferDeps;
  absence in test fixtures returns a 500 with a clear "server
  misconfiguration" message rather than silently falling through to
  the public path against an empty URL.

main.ts:
- Constructs VirtualLlmService(llmRepo).
- Passes it to registerLlmInferRoutes.
- Calls registerVirtualLlmRoutes(app, virtualLlmService).
- 60-s GC ticker started after app.listen; clears on graceful
  shutdown alongside the existing reconcile timer.

Tests: 11 new virtual-LLM route assertions (validation paths,
service plumbing for register/heartbeat/task-result) + 3 new
infer-route assertions (kind=virtual non-streaming relay, 503 path,
500 when virtualLlms dep missing). mcpd suite: 833/833 (was 819,
+14). Typecheck clean.

The full SSE handshake is exercised by the smoke test in Stage 6;
under app.inject the keep-alive blocks until close so unit-level
SSE testing isn't worth the complexity here.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Michal
2026-04-27 14:15:18 +01:00
parent 2215922618
commit 192a3831df
6 changed files with 553 additions and 16 deletions

View File

@@ -20,6 +20,10 @@ function makeLlmView(overrides: Partial<LlmView> = {}): LlmView {
description: '',
apiKeyRef: { name: 'anthropic-key', key: 'token' },
extraConfig: {},
kind: 'public',
status: 'active',
lastHeartbeatAt: null,
inactiveSince: null,
version: 1,
createdAt: new Date(),
updatedAt: new Date(),
@@ -205,4 +209,87 @@ describe('POST /api/v1/llms/:name/infer', () => {
expect(res.statusCode).toBe(502);
expect(res.json<{ error: string }>().error).toMatch(/upstream down/);
});
// ── Virtual-provider branch (kind=virtual) ──
it('routes kind=virtual non-streaming through VirtualLlmService.enqueueInferTask', async () => {
const svc: LlmServiceLike = {
getByName: async () => makeLlmView({ kind: 'virtual', type: 'openai', apiKeyRef: null }),
resolveApiKey: async () => '',
};
const enqueue = vi.fn(async () => ({
taskId: 't-1',
done: Promise.resolve({ status: 200, body: { choices: [{ message: { content: 'hello from relay' } }] } }),
onChunk: () => () => undefined,
}));
app = Fastify({ logger: false });
app.setErrorHandler(errorHandler);
registerLlmInferRoutes(app, {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
llmService: svc as any,
adapters: new LlmAdapterRegistry(),
// eslint-disable-next-line @typescript-eslint/no-explicit-any
virtualLlms: { enqueueInferTask: enqueue } as any,
});
await app.ready();
const res = await app.inject({
method: 'POST',
url: '/api/v1/llms/vllm-local/infer',
payload: { messages: [{ role: 'user', content: 'hi' }] },
});
expect(res.statusCode).toBe(200);
expect(res.json<{ choices: Array<{ message: { content: string } }> }>().choices[0]!.message.content).toBe('hello from relay');
expect(enqueue).toHaveBeenCalledWith(
'claude',
expect.objectContaining({ messages: expect.any(Array) }),
false,
);
});
it('returns 503 when the publisher is offline (VirtualLlmService throws)', async () => {
const svc: LlmServiceLike = {
getByName: async () => makeLlmView({ kind: 'virtual', apiKeyRef: null, type: 'openai' }),
resolveApiKey: async () => '',
};
const enqueue = vi.fn(async () => {
throw Object.assign(new Error('no live SSE session; publisher offline'), { statusCode: 503 });
});
app = Fastify({ logger: false });
app.setErrorHandler(errorHandler);
registerLlmInferRoutes(app, {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
llmService: svc as any,
adapters: new LlmAdapterRegistry(),
// eslint-disable-next-line @typescript-eslint/no-explicit-any
virtualLlms: { enqueueInferTask: enqueue } as any,
});
await app.ready();
const res = await app.inject({
method: 'POST',
url: '/api/v1/llms/vllm-local/infer',
payload: { messages: [{ role: 'user', content: 'hi' }] },
});
expect(res.statusCode).toBe(503);
expect(res.json<{ error: string }>().error).toMatch(/publisher offline/);
});
it('returns 500 when virtualLlms dep is missing but the row is kind=virtual', async () => {
// Defensive: prior test configurations may not pass virtualLlms. We
// surface a clear server-misconfiguration error rather than calling
// the public-adapter path, which would try to hit an empty URL.
const svc: LlmServiceLike = {
getByName: async () => makeLlmView({ kind: 'virtual', apiKeyRef: null, type: 'openai' }),
resolveApiKey: async () => '',
};
await setupApp(svc, new LlmAdapterRegistry()); // no virtualLlms
const res = await app.inject({
method: 'POST',
url: '/api/v1/llms/vllm-local/infer',
payload: { messages: [{ role: 'user', content: 'hi' }] },
});
expect(res.statusCode).toBe(500);
expect(res.json<{ error: string }>().error).toMatch(/virtual LLM dispatch unavailable/);
});
});

View File

@@ -0,0 +1,184 @@
import { describe, it, expect, vi, afterEach } from 'vitest';
import Fastify from 'fastify';
import type { FastifyInstance } from 'fastify';
import { registerVirtualLlmRoutes } from '../src/routes/virtual-llms.js';
import type {
VirtualLlmService,
VirtualSessionHandle,
} from '../src/services/virtual-llm.service.js';
let app: FastifyInstance;
afterEach(async () => {
if (app) await app.close();
});
function fakeService(overrides: Partial<VirtualLlmService> = {}): VirtualLlmService {
return {
register: vi.fn(async (input) => ({
providerSessionId: input.providerSessionId ?? 'sess-generated',
llms: [],
})),
heartbeat: vi.fn(async () => undefined),
bindSession: vi.fn(),
unbindSession: vi.fn(async () => undefined),
enqueueInferTask: vi.fn(),
completeTask: vi.fn(() => true),
pushTaskChunk: vi.fn(() => true),
failTask: vi.fn(() => true),
gcSweep: vi.fn(),
...overrides,
} as unknown as VirtualLlmService;
}
async function setupApp(svc: VirtualLlmService): Promise<FastifyInstance> {
app = Fastify({ logger: false });
registerVirtualLlmRoutes(app, svc);
await app.ready();
return app;
}
describe('POST /api/v1/llms/_provider-register', () => {
it('returns 400 when providers is missing or empty', async () => {
await setupApp(fakeService());
const a = await app.inject({ method: 'POST', url: '/api/v1/llms/_provider-register', payload: {} });
expect(a.statusCode).toBe(400);
const b = await app.inject({ method: 'POST', url: '/api/v1/llms/_provider-register', payload: { providers: [] } });
expect(b.statusCode).toBe(400);
});
it('returns 400 when a provider entry is missing required fields', async () => {
await setupApp(fakeService());
const res = await app.inject({
method: 'POST',
url: '/api/v1/llms/_provider-register',
payload: { providers: [{ name: 'incomplete' }] },
});
expect(res.statusCode).toBe(400);
});
it('forwards a valid registration to the service and returns 201', async () => {
const register = vi.fn(async () => ({
providerSessionId: 'sess-xyz',
llms: [{ id: 'l1' }],
}));
await setupApp(fakeService({ register: register as unknown as VirtualLlmService['register'] }));
const res = await app.inject({
method: 'POST',
url: '/api/v1/llms/_provider-register',
payload: {
providerSessionId: 'sess-xyz',
providers: [{ name: 'vllm-local', type: 'openai', model: 'm', tier: 'fast', extraConfig: { gpu: 1 } }],
},
});
expect(res.statusCode).toBe(201);
expect(register).toHaveBeenCalledWith({
providerSessionId: 'sess-xyz',
providers: [{ name: 'vllm-local', type: 'openai', model: 'm', tier: 'fast', extraConfig: { gpu: 1 } }],
});
expect(res.json()).toMatchObject({ providerSessionId: 'sess-xyz' });
});
it('surfaces service errors with their declared status code (e.g. 409 conflict)', async () => {
const register = vi.fn(async () => {
throw Object.assign(new Error('Cannot publish over public LLM: dup'), { statusCode: 409 });
});
await setupApp(fakeService({ register: register as unknown as VirtualLlmService['register'] }));
const res = await app.inject({
method: 'POST',
url: '/api/v1/llms/_provider-register',
payload: { providers: [{ name: 'dup', type: 'openai', model: 'm' }] },
});
expect(res.statusCode).toBe(409);
expect(res.json()).toMatchObject({ error: expect.stringMatching(/public LLM/) });
});
});
describe('POST /api/v1/llms/_provider-heartbeat', () => {
it('returns 400 without providerSessionId', async () => {
await setupApp(fakeService());
const res = await app.inject({
method: 'POST',
url: '/api/v1/llms/_provider-heartbeat',
payload: {},
});
expect(res.statusCode).toBe(400);
});
it('forwards the sessionId to service.heartbeat', async () => {
const heartbeat = vi.fn(async () => undefined);
await setupApp(fakeService({ heartbeat }));
const res = await app.inject({
method: 'POST',
url: '/api/v1/llms/_provider-heartbeat',
payload: { providerSessionId: 'sess-abc' },
});
expect(res.statusCode).toBe(200);
expect(heartbeat).toHaveBeenCalledWith('sess-abc');
});
});
describe('POST /api/v1/llms/_provider-task/:taskId/result', () => {
it('forwards { error } to service.failTask', async () => {
const failTask = vi.fn(() => true);
await setupApp(fakeService({ failTask }));
const res = await app.inject({
method: 'POST',
url: '/api/v1/llms/_provider-task/t-1/result',
payload: { error: 'upstream blew up' },
});
expect(res.statusCode).toBe(200);
expect(failTask).toHaveBeenCalledWith('t-1', expect.objectContaining({ message: 'upstream blew up' }));
});
it('forwards { chunk } to service.pushTaskChunk', async () => {
const pushTaskChunk = vi.fn(() => true);
await setupApp(fakeService({ pushTaskChunk }));
const res = await app.inject({
method: 'POST',
url: '/api/v1/llms/_provider-task/t-2/result',
payload: { chunk: { data: 'hello' } },
});
expect(res.statusCode).toBe(200);
expect(pushTaskChunk).toHaveBeenCalledWith('t-2', { data: 'hello' });
});
it('forwards { status, body } to service.completeTask', async () => {
const completeTask = vi.fn(() => true);
await setupApp(fakeService({ completeTask }));
const res = await app.inject({
method: 'POST',
url: '/api/v1/llms/_provider-task/t-3/result',
payload: { status: 200, body: { ok: true } },
});
expect(res.statusCode).toBe(200);
expect(completeTask).toHaveBeenCalledWith('t-3', { status: 200, body: { ok: true } });
});
it('returns 400 for an empty/unrecognised result body', async () => {
await setupApp(fakeService());
const res = await app.inject({
method: 'POST',
url: '/api/v1/llms/_provider-task/t-4/result',
payload: {},
});
expect(res.statusCode).toBe(400);
});
});
describe('GET /api/v1/llms/_provider-stream', () => {
it('returns 400 without the x-mcpctl-provider-session header', async () => {
await setupApp(fakeService());
const res = await app.inject({
method: 'GET',
url: '/api/v1/llms/_provider-stream',
});
expect(res.statusCode).toBe(400);
});
// Note: a full SSE handshake test would require a real HTTP listen
// because `app.inject` holds the response open and never returns under
// the `text/event-stream` keep-alive. The smoke test in Stage 6 spins
// up a real listener and exercises the open → bind → task → close
// round-trip end to end.
});