feat(mcpd): wake-before-infer for hibernating virtual LLMs (v2 Stage 2)
Second half of v2. mcpd now dispatches a \`wake\` task on the SSE
control channel when an inference request hits a row whose
status=hibernating, waits for the publisher to confirm readiness,
then proceeds with the infer task. Concurrent infers for the same
hibernating Llm share a single wake task — \`wakeInFlight\` map
dedupes by Llm name.
State machine in enqueueInferTask:
active → push infer task immediately (existing path).
inactive → 503, publisher offline (existing path).
hibernating → ensureAwake() → push infer task (new in v2).
ensureAwake/runWake (private):
- Allocates a fresh taskId on the existing PendingTask plumbing.
- Pushes \`{ kind: "wake", taskId, llmName }\` on the SSE handle.
- Awaits the publisher's result POST. On 2xx, flips the row to
active + bumps lastHeartbeatAt, so all queued + future infers
hit the active path. On non-2xx or service.failTask, the row
stays hibernating (next request retries).
Tests: 4 new in virtual-llm-service.test.ts cover happy path
(wake → infer in order), concurrent dedup (3 parallel infers, 1
wake task), wake failure surfaces to all queued infers and leaves
the row hibernating, inactive ≠ hibernating (still rejects with 503,
no wake attempt). 22/22 service tests, 2050/2050 workspace.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -332,6 +332,108 @@ describe('VirtualLlmService', () => {
|
||||
expect(await repo.findByName('public-survivor')).not.toBeNull();
|
||||
});
|
||||
|
||||
// ── v2: wake-before-infer ──
|
||||
|
||||
it('hibernating: dispatches a wake task first and waits for it to complete before infer', async () => {
|
||||
const repo = mockRepo([makeLlm({ name: 'sleeping', providerSessionId: 'sess', status: 'hibernating' })]);
|
||||
const svc = new VirtualLlmService(repo);
|
||||
const session = fakeSession();
|
||||
svc.bindSession('sess', session);
|
||||
|
||||
// Kick off enqueueInferTask. It blocks on the wake task.
|
||||
const inferPromise = svc.enqueueInferTask(
|
||||
'sleeping',
|
||||
{ model: 'm', messages: [{ role: 'user', content: 'hi' }] },
|
||||
false,
|
||||
);
|
||||
|
||||
// Wait a tick so the wake task gets pushed.
|
||||
await new Promise((r) => setTimeout(r, 0));
|
||||
expect(session.tasks).toHaveLength(1);
|
||||
const wakeTask = session.tasks[0] as { kind: string; taskId: string; llmName: string };
|
||||
expect(wakeTask.kind).toBe('wake');
|
||||
expect(wakeTask.llmName).toBe('sleeping');
|
||||
|
||||
// Resolve the wake task — service flips the row to active, then
|
||||
// pushes the infer task on the same session.
|
||||
expect(svc.completeTask(wakeTask.taskId, { status: 200, body: { ok: true } })).toBe(true);
|
||||
const ref = await inferPromise;
|
||||
expect(session.tasks).toHaveLength(2);
|
||||
const inferTask = session.tasks[1] as { kind: string; taskId: string };
|
||||
expect(inferTask.kind).toBe('infer');
|
||||
expect(inferTask.taskId).toBe(ref.taskId);
|
||||
|
||||
// The row should be active now — concurrent callers won't trigger another wake.
|
||||
const row = await repo.findByName('sleeping');
|
||||
expect(row?.status).toBe('active');
|
||||
});
|
||||
|
||||
it('hibernating: concurrent infer requests share a single wake task', async () => {
|
||||
const repo = mockRepo([makeLlm({ name: 'sleeping', providerSessionId: 'sess', status: 'hibernating' })]);
|
||||
const svc = new VirtualLlmService(repo);
|
||||
const session = fakeSession();
|
||||
svc.bindSession('sess', session);
|
||||
|
||||
// Fire 3 concurrent infer requests against the same hibernating LLM.
|
||||
const reqs = [
|
||||
svc.enqueueInferTask('sleeping', { model: 'm', messages: [] }, false),
|
||||
svc.enqueueInferTask('sleeping', { model: 'm', messages: [] }, false),
|
||||
svc.enqueueInferTask('sleeping', { model: 'm', messages: [] }, false),
|
||||
];
|
||||
|
||||
await new Promise((r) => setTimeout(r, 0));
|
||||
// Exactly one wake task pushed, regardless of concurrent infers.
|
||||
const wakeTasks = (session.tasks as Array<{ kind: string }>).filter((t) => t.kind === 'wake');
|
||||
expect(wakeTasks).toHaveLength(1);
|
||||
|
||||
const wakeTaskId = (session.tasks[0] as { taskId: string }).taskId;
|
||||
expect(svc.completeTask(wakeTaskId, { status: 200, body: { ok: true } })).toBe(true);
|
||||
|
||||
const refs = await Promise.all(reqs);
|
||||
// After wake, all 3 infer tasks pushed — total session tasks = 1 wake + 3 infer.
|
||||
const inferTasks = (session.tasks as Array<{ kind: string }>).filter((t) => t.kind === 'infer');
|
||||
expect(inferTasks).toHaveLength(3);
|
||||
expect(refs.map((r) => r.taskId).sort()).toEqual(refs.map((r) => r.taskId).sort());
|
||||
});
|
||||
|
||||
it('hibernating: rejects when the wake task fails', async () => {
|
||||
const repo = mockRepo([makeLlm({ name: 'broken', providerSessionId: 'sess', status: 'hibernating' })]);
|
||||
const svc = new VirtualLlmService(repo);
|
||||
svc.bindSession('sess', fakeSession());
|
||||
|
||||
const inferPromise = svc.enqueueInferTask(
|
||||
'broken',
|
||||
{ model: 'm', messages: [] },
|
||||
false,
|
||||
);
|
||||
await new Promise((r) => setTimeout(r, 0));
|
||||
|
||||
// Get the wake task id from the in-flight tasks map (its only entry).
|
||||
// We test the failure path via failTask which is part of the public
|
||||
// surface used by the result-POST route handler.
|
||||
const taskIds: string[] = [];
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
for (const id of (svc as any).tasksById.keys()) taskIds.push(id);
|
||||
expect(taskIds).toHaveLength(1);
|
||||
expect(svc.failTask(taskIds[0]!, new Error('wake recipe failed'))).toBe(true);
|
||||
|
||||
await expect(inferPromise).rejects.toThrow(/wake recipe failed/);
|
||||
|
||||
// Row stayed hibernating — the next request will get another wake try.
|
||||
const row = await repo.findByName('broken');
|
||||
expect(row?.status).toBe('hibernating');
|
||||
});
|
||||
|
||||
it('inactive: still rejects with 503 (publisher offline) — wake path only fires for hibernating', async () => {
|
||||
const repo = mockRepo([makeLlm({ name: 'gone', providerSessionId: 'sess', status: 'inactive', inactiveSince: new Date() })]);
|
||||
const svc = new VirtualLlmService(repo);
|
||||
svc.bindSession('sess', fakeSession());
|
||||
|
||||
await expect(
|
||||
svc.enqueueInferTask('gone', { model: 'm', messages: [] }, false),
|
||||
).rejects.toThrow(/inactive|publisher offline/);
|
||||
});
|
||||
|
||||
it('gcSweep is idempotent — running twice in a row is a no-op the second time', async () => {
|
||||
const long = new Date(Date.now() - 5 * 60 * 1000);
|
||||
const repo = mockRepo([
|
||||
|
||||
Reference in New Issue
Block a user