Files
mcpctl/src/mcpd/tests/virtual-llm-service.test.ts

348 lines
14 KiB
TypeScript
Raw Normal View History

feat(mcpd): VirtualLlmService + repo lifecycle helpers (v1 Stage 2) The state machine for kind=virtual Llm rows. Wires the schema added in Stage 1 into something that can register, heartbeat, time out, and relay inference tasks. The HTTP routes (Stage 3) plug into this. Repository (extends ILlmRepository): - create/update accept kind/providerSessionId/lastHeartbeatAt/status/ inactiveSince/type so VirtualLlmService can drive the lifecycle. - findBySessionId(sessionId) — the reconnect lookup. - findStaleVirtuals(cutoff) — heartbeat-stale rows for the GC sweep. - findExpiredInactives(cutoff) — 4h-expired rows for deletion. VirtualLlmService: - register(): sticky-id-aware upsert. New names insert as kind=virtual/ status=active. Existing virtual rows from the same session reactivate in place; existing inactive virtuals from a foreign session can be adopted (sticky reconnect). Refuses to overwrite a public row or a foreign session's still-active virtual. - heartbeat(): bumps lastHeartbeatAt for every row owned by the session; revives inactive rows. - bindSession()/unbindSession(): in-memory map of sessionId → SSE handle. Disconnect immediately flips owned rows to inactive AND rejects any in-flight tasks for that session. - enqueueInferTask(): pushes an `infer` task frame to the SSE handle, returns a PendingTaskRef whose `done` resolves when the publisher POSTs the result back. Streaming variant exposes onChunk(cb). - completeTask/pushTaskChunk/failTask: route-side hooks called from the result POST handler (lands in Stage 3). - gcSweep(): flips heartbeat-stale active virtuals to inactive (90s cutoff), deletes inactives past 4h. Idempotent. Lifecycle constants live in this file (HEARTBEAT_TIMEOUT_MS=90s, INACTIVE_RETENTION_MS=4h) so future stages can tune in one place. 18 new mocked-repo tests cover: register variants (insert, sticky reconnect, refuse public-overwrite, refuse foreign-session, adopt inactive-foreign), heartbeat-revive, unbind cascade, enqueue happy path + 503 paths (no session, inactive, public-Llm), complete/fail/ streaming chunk fan-out, GC sweep flip + delete + idempotence. mcpd suite: 819/819 (was 801, +18). Typecheck clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-27 14:05:19 +01:00
import { describe, it, expect, vi } from 'vitest';
import { VirtualLlmService, type VirtualSessionHandle } from '../src/services/virtual-llm.service.js';
import type { ILlmRepository } from '../src/repositories/llm.repository.js';
import type { Llm } from '@prisma/client';
function makeLlm(overrides: Partial<Llm> = {}): Llm {
return {
id: `llm-${Math.random().toString(36).slice(2, 8)}`,
name: 'vllm-local',
type: 'openai',
model: 'm',
url: '',
tier: 'fast',
description: '',
apiKeySecretId: null,
apiKeySecretKey: null,
extraConfig: {} as Llm['extraConfig'],
kind: 'virtual',
providerSessionId: 's-1',
lastHeartbeatAt: new Date(),
status: 'active',
inactiveSince: null,
version: 1,
createdAt: new Date(),
updatedAt: new Date(),
...overrides,
};
}
function mockRepo(initial: Llm[] = []): ILlmRepository {
const rows = new Map<string, Llm>(initial.map((l) => [l.id, l]));
let counter = rows.size;
return {
findAll: vi.fn(async () => [...rows.values()]),
findById: vi.fn(async (id: string) => rows.get(id) ?? null),
findByName: vi.fn(async (name: string) => {
for (const l of rows.values()) if (l.name === name) return l;
return null;
}),
findByTier: vi.fn(async () => []),
findBySessionId: vi.fn(async (sid: string) =>
[...rows.values()].filter((l) => l.providerSessionId === sid)),
findStaleVirtuals: vi.fn(async (cutoff: Date) =>
[...rows.values()].filter((l) =>
l.kind === 'virtual'
&& l.status === 'active'
&& l.lastHeartbeatAt !== null
&& l.lastHeartbeatAt < cutoff)),
findExpiredInactives: vi.fn(async (cutoff: Date) =>
[...rows.values()].filter((l) =>
l.kind === 'virtual'
&& l.status === 'inactive'
&& l.inactiveSince !== null
&& l.inactiveSince < cutoff)),
create: vi.fn(async (data) => {
counter += 1;
const row = makeLlm({
id: `llm-${String(counter)}`,
name: data.name,
type: data.type,
model: data.model,
url: data.url ?? '',
tier: data.tier ?? 'fast',
description: data.description ?? '',
kind: data.kind ?? 'public',
providerSessionId: data.providerSessionId ?? null,
status: data.status ?? 'active',
lastHeartbeatAt: data.lastHeartbeatAt ?? null,
inactiveSince: data.inactiveSince ?? null,
});
rows.set(row.id, row);
return row;
}),
update: vi.fn(async (id, data) => {
const existing = rows.get(id);
if (!existing) throw new Error('not found');
const next: Llm = {
...existing,
...(data.type !== undefined ? { type: data.type } : {}),
...(data.model !== undefined ? { model: data.model } : {}),
...(data.tier !== undefined ? { tier: data.tier } : {}),
...(data.description !== undefined ? { description: data.description } : {}),
...(data.kind !== undefined ? { kind: data.kind } : {}),
...(data.providerSessionId !== undefined ? { providerSessionId: data.providerSessionId } : {}),
...(data.status !== undefined ? { status: data.status } : {}),
...(data.lastHeartbeatAt !== undefined ? { lastHeartbeatAt: data.lastHeartbeatAt } : {}),
...(data.inactiveSince !== undefined ? { inactiveSince: data.inactiveSince } : {}),
};
rows.set(id, next);
return next;
}),
delete: vi.fn(async (id: string) => { rows.delete(id); }),
};
}
function fakeSession(): VirtualSessionHandle & { tasks: Array<unknown>; alive: boolean } {
const tasks: unknown[] = [];
return {
tasks,
alive: true,
pushTask(t) { tasks.push(t); },
};
}
describe('VirtualLlmService', () => {
it('register inserts new virtual rows with active status + sessionId', async () => {
const repo = mockRepo();
const svc = new VirtualLlmService(repo);
const { providerSessionId, llms } = await svc.register({
providerSessionId: null,
providers: [
{ name: 'vllm-local', type: 'openai', model: 'Qwen/Qwen2.5-7B-Instruct-AWQ', tier: 'fast' },
],
});
expect(providerSessionId).toMatch(/^[0-9a-f-]{36}$/);
expect(llms).toHaveLength(1);
expect(llms[0]!.kind).toBe('virtual');
expect(llms[0]!.status).toBe('active');
expect(llms[0]!.providerSessionId).toBe(providerSessionId);
expect(llms[0]!.lastHeartbeatAt).not.toBeNull();
});
it('register reuses the same row on sticky reconnect (same name + sessionId)', async () => {
const repo = mockRepo();
const svc = new VirtualLlmService(repo);
const first = await svc.register({
providerSessionId: 'fixed-id',
providers: [{ name: 'vllm-local', type: 'openai', model: 'm' }],
});
expect(first.llms[0]!.id).toMatch(/^llm-/);
const firstId = first.llms[0]!.id;
const second = await svc.register({
providerSessionId: 'fixed-id',
providers: [{ name: 'vllm-local', type: 'openai', model: 'm-updated' }],
});
expect(second.llms[0]!.id).toBe(firstId);
expect(second.llms[0]!.model).toBe('m-updated');
});
it('register refuses to overwrite a public LLM with the same name', async () => {
const repo = mockRepo([makeLlm({ name: 'qwen3-thinking', kind: 'public', providerSessionId: null })]);
const svc = new VirtualLlmService(repo);
await expect(svc.register({
providerSessionId: 'sess-x',
providers: [{ name: 'qwen3-thinking', type: 'openai', model: 'm' }],
})).rejects.toThrow(/Cannot publish over public/);
});
it('register refuses if another active session owns the name', async () => {
const repo = mockRepo([makeLlm({ name: 'vllm-local', providerSessionId: 'other', status: 'active' })]);
const svc = new VirtualLlmService(repo);
await expect(svc.register({
providerSessionId: 'mine',
providers: [{ name: 'vllm-local', type: 'openai', model: 'm' }],
})).rejects.toThrow(/already active under a different session/);
});
it('register adopts an inactive virtual row from a different session (sticky reconnect after lapse)', async () => {
const repo = mockRepo([makeLlm({
name: 'vllm-local', providerSessionId: 'old-session',
status: 'inactive', inactiveSince: new Date(),
})]);
const svc = new VirtualLlmService(repo);
const { llms } = await svc.register({
providerSessionId: 'new-session',
providers: [{ name: 'vllm-local', type: 'openai', model: 'm' }],
});
expect(llms[0]!.providerSessionId).toBe('new-session');
expect(llms[0]!.status).toBe('active');
expect(llms[0]!.inactiveSince).toBeNull();
});
it('heartbeat bumps lastHeartbeatAt + revives an inactive row', async () => {
const past = new Date(Date.now() - 5_000);
const repo = mockRepo([makeLlm({
name: 'vllm-local', providerSessionId: 'sess', status: 'inactive',
lastHeartbeatAt: past, inactiveSince: past,
})]);
const svc = new VirtualLlmService(repo);
await svc.heartbeat('sess');
const row = await repo.findByName('vllm-local');
expect(row?.status).toBe('active');
expect(row?.inactiveSince).toBeNull();
expect(row!.lastHeartbeatAt!.getTime()).toBeGreaterThan(past.getTime());
});
it('unbindSession flips all owned rows to inactive immediately', async () => {
const repo = mockRepo([
makeLlm({ name: 'a', providerSessionId: 'sess' }),
makeLlm({ name: 'b', providerSessionId: 'sess' }),
makeLlm({ name: 'c', providerSessionId: 'other' }),
]);
const svc = new VirtualLlmService(repo);
svc.bindSession('sess', fakeSession());
await svc.unbindSession('sess');
expect((await repo.findByName('a'))?.status).toBe('inactive');
expect((await repo.findByName('b'))?.status).toBe('inactive');
expect((await repo.findByName('c'))?.status).toBe('active');
});
it('enqueueInferTask pushes a task frame to the SSE session', async () => {
const repo = mockRepo([makeLlm({ name: 'vllm-local', providerSessionId: 'sess' })]);
const svc = new VirtualLlmService(repo);
const session = fakeSession();
svc.bindSession('sess', session);
const ref = await svc.enqueueInferTask(
'vllm-local',
{ model: 'm', messages: [{ role: 'user', content: 'hi' }] },
false,
);
expect(session.tasks).toHaveLength(1);
const t = session.tasks[0] as { kind: string; taskId: string; llmName: string; streaming: boolean };
expect(t.kind).toBe('infer');
expect(t.taskId).toBe(ref.taskId);
expect(t.llmName).toBe('vllm-local');
expect(t.streaming).toBe(false);
});
it('enqueueInferTask rejects when the publisher is offline (no session bound)', async () => {
const repo = mockRepo([makeLlm({ name: 'vllm-local', providerSessionId: 'sess' })]);
const svc = new VirtualLlmService(repo);
await expect(
svc.enqueueInferTask('vllm-local', { model: 'm', messages: [] }, false),
).rejects.toThrow(/no live SSE session|publisher offline/);
});
it('enqueueInferTask rejects when the row is inactive', async () => {
const repo = mockRepo([makeLlm({ name: 'vllm-local', providerSessionId: 'sess', status: 'inactive', inactiveSince: new Date() })]);
const svc = new VirtualLlmService(repo);
svc.bindSession('sess', fakeSession());
await expect(
svc.enqueueInferTask('vllm-local', { model: 'm', messages: [] }, false),
).rejects.toThrow(/inactive|publisher offline/);
});
it('enqueueInferTask rejects when the LLM is public (not virtual)', async () => {
const repo = mockRepo([makeLlm({ name: 'qwen3-thinking', kind: 'public', providerSessionId: null })]);
const svc = new VirtualLlmService(repo);
await expect(
svc.enqueueInferTask('qwen3-thinking', { model: 'm', messages: [] }, false),
).rejects.toThrow(/not a virtual provider/);
});
it('completeTask resolves the pending non-streaming promise', async () => {
const repo = mockRepo([makeLlm({ name: 'vllm-local', providerSessionId: 'sess' })]);
const svc = new VirtualLlmService(repo);
svc.bindSession('sess', fakeSession());
const ref = await svc.enqueueInferTask(
'vllm-local',
{ model: 'm', messages: [{ role: 'user', content: 'hi' }] },
false,
);
expect(svc.completeTask(ref.taskId, { status: 200, body: { ok: true } })).toBe(true);
await expect(ref.done).resolves.toEqual({ status: 200, body: { ok: true } });
});
it('streaming: pushTaskChunk fans chunks to subscribers; done resolves the ref', async () => {
const repo = mockRepo([makeLlm({ name: 'vllm-local', providerSessionId: 'sess' })]);
const svc = new VirtualLlmService(repo);
svc.bindSession('sess', fakeSession());
const ref = await svc.enqueueInferTask(
'vllm-local',
{ model: 'm', messages: [{ role: 'user', content: 'hi' }], stream: true },
true,
);
const got: Array<{ data: string; done?: boolean }> = [];
ref.onChunk((c) => got.push(c));
expect(svc.pushTaskChunk(ref.taskId, { data: 'hello' })).toBe(true);
expect(svc.pushTaskChunk(ref.taskId, { data: ' world' })).toBe(true);
expect(svc.pushTaskChunk(ref.taskId, { data: '[DONE]', done: true })).toBe(true);
expect(got.map((c) => c.data)).toEqual(['hello', ' world', '[DONE]']);
await expect(ref.done).resolves.toMatchObject({ status: 200 });
});
it('failTask rejects the pending promise with a clear error', async () => {
const repo = mockRepo([makeLlm({ name: 'vllm-local', providerSessionId: 'sess' })]);
const svc = new VirtualLlmService(repo);
svc.bindSession('sess', fakeSession());
const ref = await svc.enqueueInferTask(
'vllm-local',
{ model: 'm', messages: [{ role: 'user', content: 'hi' }] },
false,
);
expect(svc.failTask(ref.taskId, new Error('upstream blew up'))).toBe(true);
await expect(ref.done).rejects.toThrow(/upstream blew up/);
});
it('unbindSession rejects in-flight tasks for that session', async () => {
const repo = mockRepo([makeLlm({ name: 'vllm-local', providerSessionId: 'sess' })]);
const svc = new VirtualLlmService(repo);
svc.bindSession('sess', fakeSession());
const ref = await svc.enqueueInferTask(
'vllm-local',
{ model: 'm', messages: [{ role: 'user', content: 'hi' }] },
false,
);
await svc.unbindSession('sess');
await expect(ref.done).rejects.toThrow(/publisher disconnected/);
});
it('gcSweep flips heartbeat-stale active virtuals to inactive', async () => {
const long = new Date(Date.now() - 5 * 60 * 1000); // 5 min ago — past the 90-s cutoff
const recent = new Date(Date.now() - 30 * 1000); // 30 s ago — within the cutoff
const repo = mockRepo([
makeLlm({ name: 'stale', providerSessionId: 'a', status: 'active', lastHeartbeatAt: long }),
makeLlm({ name: 'fresh', providerSessionId: 'b', status: 'active', lastHeartbeatAt: recent }),
]);
const svc = new VirtualLlmService(repo);
const result = await svc.gcSweep();
expect(result.markedInactive).toBe(1);
expect((await repo.findByName('stale'))?.status).toBe('inactive');
expect((await repo.findByName('fresh'))?.status).toBe('active');
});
it('gcSweep deletes virtuals inactive past the 4h retention window', async () => {
const ancient = new Date(Date.now() - 5 * 60 * 60 * 1000); // 5 h ago
const fresh = new Date(Date.now() - 1 * 60 * 60 * 1000); // 1 h ago
const repo = mockRepo([
makeLlm({ name: 'old', providerSessionId: 'a', status: 'inactive', inactiveSince: ancient }),
makeLlm({ name: 'recent', providerSessionId: 'b', status: 'inactive', inactiveSince: fresh }),
makeLlm({ name: 'public-survivor', providerSessionId: null, kind: 'public' }),
]);
const svc = new VirtualLlmService(repo);
const result = await svc.gcSweep();
expect(result.deleted).toBe(1);
expect(await repo.findByName('old')).toBeNull();
expect(await repo.findByName('recent')).not.toBeNull();
expect(await repo.findByName('public-survivor')).not.toBeNull();
});
it('gcSweep is idempotent — running twice in a row is a no-op the second time', async () => {
const long = new Date(Date.now() - 5 * 60 * 1000);
const repo = mockRepo([
makeLlm({ name: 'stale', providerSessionId: 'a', status: 'active', lastHeartbeatAt: long }),
]);
const svc = new VirtualLlmService(repo);
const first = await svc.gcSweep();
const second = await svc.gcSweep();
expect(first.markedInactive).toBe(1);
expect(second.markedInactive).toBe(0);
expect(second.deleted).toBe(0);
});
});