feat(mcpd): virtual-LLM routes + GC ticker (v1 Stage 3)

End-to-end backend wiring. After this stage, an mcplocal client can register a provider, hold the SSE channel open, heartbeat, and have its inference requests fanned through the relay — all without touching the agent layer or the public-LLM path. Routes (new file: routes/virtual-llms.ts): POST /api/v1/llms/_provider-register → returns { providerSessionId, llms[] } GET /api/v1/llms/_provider-stream → SSE channel keyed by x-mcpctl-provider-session header. Emits `event: hello` on open, `event: task` on inference fan-out, `: ping` every 20 s for proxies. POST /api/v1/llms/_provider-heartbeat → bumps lastHeartbeatAt POST /api/v1/llms/_provider-task/:id/result → mcplocal pushes result back; body shape is one of: { error: 'msg' } { chunk: { data, done? } } { status, body } LlmService: - LlmView gains kind/status/lastHeartbeatAt/inactiveSince so route handlers + the upcoming `mcpctl get llm` columns can branch on kind without re-fetching the row. llm-infer.ts: - Detects llm.kind === 'virtual' and delegates to VirtualLlmService.enqueueInferTask. Streaming + non-streaming both supported; on 503 (publisher offline) the existing audit hook still fires with the right status code. - Adds optional `virtualLlms: VirtualLlmService` to LlmInferDeps; absence in test fixtures returns a 500 with a clear "server misconfiguration" message rather than silently falling through to the public path against an empty URL. main.ts: - Constructs VirtualLlmService(llmRepo). - Passes it to registerLlmInferRoutes. - Calls registerVirtualLlmRoutes(app, virtualLlmService). - 60-s GC ticker started after app.listen; clears on graceful shutdown alongside the existing reconcile timer. Tests: 11 new virtual-LLM route assertions (validation paths, service plumbing for register/heartbeat/task-result) + 3 new infer-route assertions (kind=virtual non-streaming relay, 503 path, 500 when virtualLlms dep missing). mcpd suite: 833/833 (was 819, +14). Typecheck clean. The full SSE handshake is exercised by the smoke test in Stage 6; under app.inject the keep-alive blocks until close so unit-level SSE testing isn't worth the complexity here. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-27 14:15:18 +01:00
parent 2215922618
commit 192a3831df
6 changed files with 553 additions and 16 deletions
--- a/src/mcpd/tests/llm-infer-route.test.ts
+++ b/src/mcpd/tests/llm-infer-route.test.ts
@@ -20,6 +20,10 @@ function makeLlmView(overrides: Partial<LlmView> = {}): LlmView {
    description: '',
    apiKeyRef: { name: 'anthropic-key', key: 'token' },
    extraConfig: {},
+    kind: 'public',
+    status: 'active',
+    lastHeartbeatAt: null,
+    inactiveSince: null,
    version: 1,
    createdAt: new Date(),
    updatedAt: new Date(),
@@ -205,4 +209,87 @@ describe('POST /api/v1/llms/:name/infer', () => {
    expect(res.statusCode).toBe(502);
    expect(res.json<{ error: string }>().error).toMatch(/upstream down/);
  });
+
+  // ── Virtual-provider branch (kind=virtual) ──
+
+  it('routes kind=virtual non-streaming through VirtualLlmService.enqueueInferTask', async () => {
+    const svc: LlmServiceLike = {
+      getByName: async () => makeLlmView({ kind: 'virtual', type: 'openai', apiKeyRef: null }),
+      resolveApiKey: async () => '',
+    };
+    const enqueue = vi.fn(async () => ({
+      taskId: 't-1',
+      done: Promise.resolve({ status: 200, body: { choices: [{ message: { content: 'hello from relay' } }] } }),
+      onChunk: () => () => undefined,
+    }));
+    app = Fastify({ logger: false });
+    app.setErrorHandler(errorHandler);
+    registerLlmInferRoutes(app, {
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      llmService: svc as any,
+      adapters: new LlmAdapterRegistry(),
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      virtualLlms: { enqueueInferTask: enqueue } as any,
+    });
+    await app.ready();
+
+    const res = await app.inject({
+      method: 'POST',
+      url: '/api/v1/llms/vllm-local/infer',
+      payload: { messages: [{ role: 'user', content: 'hi' }] },
+    });
+    expect(res.statusCode).toBe(200);
+    expect(res.json<{ choices: Array<{ message: { content: string } }> }>().choices[0]!.message.content).toBe('hello from relay');
+    expect(enqueue).toHaveBeenCalledWith(
+      'claude',
+      expect.objectContaining({ messages: expect.any(Array) }),
+      false,
+    );
+  });
+
+  it('returns 503 when the publisher is offline (VirtualLlmService throws)', async () => {
+    const svc: LlmServiceLike = {
+      getByName: async () => makeLlmView({ kind: 'virtual', apiKeyRef: null, type: 'openai' }),
+      resolveApiKey: async () => '',
+    };
+    const enqueue = vi.fn(async () => {
+      throw Object.assign(new Error('no live SSE session; publisher offline'), { statusCode: 503 });
+    });
+    app = Fastify({ logger: false });
+    app.setErrorHandler(errorHandler);
+    registerLlmInferRoutes(app, {
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      llmService: svc as any,
+      adapters: new LlmAdapterRegistry(),
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      virtualLlms: { enqueueInferTask: enqueue } as any,
+    });
+    await app.ready();
+
+    const res = await app.inject({
+      method: 'POST',
+      url: '/api/v1/llms/vllm-local/infer',
+      payload: { messages: [{ role: 'user', content: 'hi' }] },
+    });
+    expect(res.statusCode).toBe(503);
+    expect(res.json<{ error: string }>().error).toMatch(/publisher offline/);
+  });
+
+  it('returns 500 when virtualLlms dep is missing but the row is kind=virtual', async () => {
+    // Defensive: prior test configurations may not pass virtualLlms. We
+    // surface a clear server-misconfiguration error rather than calling
+    // the public-adapter path, which would try to hit an empty URL.
+    const svc: LlmServiceLike = {
+      getByName: async () => makeLlmView({ kind: 'virtual', apiKeyRef: null, type: 'openai' }),
+      resolveApiKey: async () => '',
+    };
+    await setupApp(svc, new LlmAdapterRegistry()); // no virtualLlms
+    const res = await app.inject({
+      method: 'POST',
+      url: '/api/v1/llms/vllm-local/infer',
+      payload: { messages: [{ role: 'user', content: 'hi' }] },
+    });
+    expect(res.statusCode).toBe(500);
+    expect(res.json<{ error: string }>().error).toMatch(/virtual LLM dispatch unavailable/);
+  });
 });
--- a/src/mcpd/tests/virtual-llm-routes.test.ts
+++ b/src/mcpd/tests/virtual-llm-routes.test.ts
@@ -0,0 +1,184 @@
+import { describe, it, expect, vi, afterEach } from 'vitest';
+import Fastify from 'fastify';
+import type { FastifyInstance } from 'fastify';
+import { registerVirtualLlmRoutes } from '../src/routes/virtual-llms.js';
+import type {
+  VirtualLlmService,
+  VirtualSessionHandle,
+} from '../src/services/virtual-llm.service.js';
+
+let app: FastifyInstance;
+
+afterEach(async () => {
+  if (app) await app.close();
+});
+
+function fakeService(overrides: Partial<VirtualLlmService> = {}): VirtualLlmService {
+  return {
+    register: vi.fn(async (input) => ({
+      providerSessionId: input.providerSessionId ?? 'sess-generated',
+      llms: [],
+    })),
+    heartbeat: vi.fn(async () => undefined),
+    bindSession: vi.fn(),
+    unbindSession: vi.fn(async () => undefined),
+    enqueueInferTask: vi.fn(),
+    completeTask: vi.fn(() => true),
+    pushTaskChunk: vi.fn(() => true),
+    failTask: vi.fn(() => true),
+    gcSweep: vi.fn(),
+    ...overrides,
+  } as unknown as VirtualLlmService;
+}
+
+async function setupApp(svc: VirtualLlmService): Promise<FastifyInstance> {
+  app = Fastify({ logger: false });
+  registerVirtualLlmRoutes(app, svc);
+  await app.ready();
+  return app;
+}
+
+describe('POST /api/v1/llms/_provider-register', () => {
+  it('returns 400 when providers is missing or empty', async () => {
+    await setupApp(fakeService());
+    const a = await app.inject({ method: 'POST', url: '/api/v1/llms/_provider-register', payload: {} });
+    expect(a.statusCode).toBe(400);
+    const b = await app.inject({ method: 'POST', url: '/api/v1/llms/_provider-register', payload: { providers: [] } });
+    expect(b.statusCode).toBe(400);
+  });
+
+  it('returns 400 when a provider entry is missing required fields', async () => {
+    await setupApp(fakeService());
+    const res = await app.inject({
+      method: 'POST',
+      url: '/api/v1/llms/_provider-register',
+      payload: { providers: [{ name: 'incomplete' }] },
+    });
+    expect(res.statusCode).toBe(400);
+  });
+
+  it('forwards a valid registration to the service and returns 201', async () => {
+    const register = vi.fn(async () => ({
+      providerSessionId: 'sess-xyz',
+      llms: [{ id: 'l1' }],
+    }));
+    await setupApp(fakeService({ register: register as unknown as VirtualLlmService['register'] }));
+    const res = await app.inject({
+      method: 'POST',
+      url: '/api/v1/llms/_provider-register',
+      payload: {
+        providerSessionId: 'sess-xyz',
+        providers: [{ name: 'vllm-local', type: 'openai', model: 'm', tier: 'fast', extraConfig: { gpu: 1 } }],
+      },
+    });
+    expect(res.statusCode).toBe(201);
+    expect(register).toHaveBeenCalledWith({
+      providerSessionId: 'sess-xyz',
+      providers: [{ name: 'vllm-local', type: 'openai', model: 'm', tier: 'fast', extraConfig: { gpu: 1 } }],
+    });
+    expect(res.json()).toMatchObject({ providerSessionId: 'sess-xyz' });
+  });
+
+  it('surfaces service errors with their declared status code (e.g. 409 conflict)', async () => {
+    const register = vi.fn(async () => {
+      throw Object.assign(new Error('Cannot publish over public LLM: dup'), { statusCode: 409 });
+    });
+    await setupApp(fakeService({ register: register as unknown as VirtualLlmService['register'] }));
+    const res = await app.inject({
+      method: 'POST',
+      url: '/api/v1/llms/_provider-register',
+      payload: { providers: [{ name: 'dup', type: 'openai', model: 'm' }] },
+    });
+    expect(res.statusCode).toBe(409);
+    expect(res.json()).toMatchObject({ error: expect.stringMatching(/public LLM/) });
+  });
+});
+
+describe('POST /api/v1/llms/_provider-heartbeat', () => {
+  it('returns 400 without providerSessionId', async () => {
+    await setupApp(fakeService());
+    const res = await app.inject({
+      method: 'POST',
+      url: '/api/v1/llms/_provider-heartbeat',
+      payload: {},
+    });
+    expect(res.statusCode).toBe(400);
+  });
+
+  it('forwards the sessionId to service.heartbeat', async () => {
+    const heartbeat = vi.fn(async () => undefined);
+    await setupApp(fakeService({ heartbeat }));
+    const res = await app.inject({
+      method: 'POST',
+      url: '/api/v1/llms/_provider-heartbeat',
+      payload: { providerSessionId: 'sess-abc' },
+    });
+    expect(res.statusCode).toBe(200);
+    expect(heartbeat).toHaveBeenCalledWith('sess-abc');
+  });
+});
+
+describe('POST /api/v1/llms/_provider-task/:taskId/result', () => {
+  it('forwards { error } to service.failTask', async () => {
+    const failTask = vi.fn(() => true);
+    await setupApp(fakeService({ failTask }));
+    const res = await app.inject({
+      method: 'POST',
+      url: '/api/v1/llms/_provider-task/t-1/result',
+      payload: { error: 'upstream blew up' },
+    });
+    expect(res.statusCode).toBe(200);
+    expect(failTask).toHaveBeenCalledWith('t-1', expect.objectContaining({ message: 'upstream blew up' }));
+  });
+
+  it('forwards { chunk } to service.pushTaskChunk', async () => {
+    const pushTaskChunk = vi.fn(() => true);
+    await setupApp(fakeService({ pushTaskChunk }));
+    const res = await app.inject({
+      method: 'POST',
+      url: '/api/v1/llms/_provider-task/t-2/result',
+      payload: { chunk: { data: 'hello' } },
+    });
+    expect(res.statusCode).toBe(200);
+    expect(pushTaskChunk).toHaveBeenCalledWith('t-2', { data: 'hello' });
+  });
+
+  it('forwards { status, body } to service.completeTask', async () => {
+    const completeTask = vi.fn(() => true);
+    await setupApp(fakeService({ completeTask }));
+    const res = await app.inject({
+      method: 'POST',
+      url: '/api/v1/llms/_provider-task/t-3/result',
+      payload: { status: 200, body: { ok: true } },
+    });
+    expect(res.statusCode).toBe(200);
+    expect(completeTask).toHaveBeenCalledWith('t-3', { status: 200, body: { ok: true } });
+  });
+
+  it('returns 400 for an empty/unrecognised result body', async () => {
+    await setupApp(fakeService());
+    const res = await app.inject({
+      method: 'POST',
+      url: '/api/v1/llms/_provider-task/t-4/result',
+      payload: {},
+    });
+    expect(res.statusCode).toBe(400);
+  });
+});
+
+describe('GET /api/v1/llms/_provider-stream', () => {
+  it('returns 400 without the x-mcpctl-provider-session header', async () => {
+    await setupApp(fakeService());
+    const res = await app.inject({
+      method: 'GET',
+      url: '/api/v1/llms/_provider-stream',
+    });
+    expect(res.statusCode).toBe(400);
+  });
+
+  // Note: a full SSE handshake test would require a real HTTP listen
+  // because `app.inject` holds the response open and never returns under
+  // the `text/event-stream` keep-alive. The smoke test in Stage 6 spins
+  // up a real listener and exercises the open → bind → task → close
+  // round-trip end to end.
+});