import { describe, it, expect, vi } from 'vitest'; import { VirtualLlmService, type VirtualSessionHandle } from '../src/services/virtual-llm.service.js'; import type { ILlmRepository } from '../src/repositories/llm.repository.js'; import type { Llm } from '@prisma/client'; function makeLlm(overrides: Partial = {}): Llm { return { id: `llm-${Math.random().toString(36).slice(2, 8)}`, name: 'vllm-local', type: 'openai', model: 'm', url: '', tier: 'fast', description: '', apiKeySecretId: null, apiKeySecretKey: null, extraConfig: {} as Llm['extraConfig'], kind: 'virtual', providerSessionId: 's-1', lastHeartbeatAt: new Date(), status: 'active', inactiveSince: null, version: 1, createdAt: new Date(), updatedAt: new Date(), ...overrides, }; } function mockRepo(initial: Llm[] = []): ILlmRepository { const rows = new Map(initial.map((l) => [l.id, l])); let counter = rows.size; return { findAll: vi.fn(async () => [...rows.values()]), findById: vi.fn(async (id: string) => rows.get(id) ?? null), findByName: vi.fn(async (name: string) => { for (const l of rows.values()) if (l.name === name) return l; return null; }), findByTier: vi.fn(async () => []), findBySessionId: vi.fn(async (sid: string) => [...rows.values()].filter((l) => l.providerSessionId === sid)), findStaleVirtuals: vi.fn(async (cutoff: Date) => [...rows.values()].filter((l) => l.kind === 'virtual' && l.status === 'active' && l.lastHeartbeatAt !== null && l.lastHeartbeatAt < cutoff)), findExpiredInactives: vi.fn(async (cutoff: Date) => [...rows.values()].filter((l) => l.kind === 'virtual' && l.status === 'inactive' && l.inactiveSince !== null && l.inactiveSince < cutoff)), create: vi.fn(async (data) => { counter += 1; const row = makeLlm({ id: `llm-${String(counter)}`, name: data.name, type: data.type, model: data.model, url: data.url ?? '', tier: data.tier ?? 'fast', description: data.description ?? '', kind: data.kind ?? 'public', providerSessionId: data.providerSessionId ?? null, status: data.status ?? 'active', lastHeartbeatAt: data.lastHeartbeatAt ?? null, inactiveSince: data.inactiveSince ?? null, }); rows.set(row.id, row); return row; }), update: vi.fn(async (id, data) => { const existing = rows.get(id); if (!existing) throw new Error('not found'); const next: Llm = { ...existing, ...(data.type !== undefined ? { type: data.type } : {}), ...(data.model !== undefined ? { model: data.model } : {}), ...(data.tier !== undefined ? { tier: data.tier } : {}), ...(data.description !== undefined ? { description: data.description } : {}), ...(data.kind !== undefined ? { kind: data.kind } : {}), ...(data.providerSessionId !== undefined ? { providerSessionId: data.providerSessionId } : {}), ...(data.status !== undefined ? { status: data.status } : {}), ...(data.lastHeartbeatAt !== undefined ? { lastHeartbeatAt: data.lastHeartbeatAt } : {}), ...(data.inactiveSince !== undefined ? { inactiveSince: data.inactiveSince } : {}), }; rows.set(id, next); return next; }), delete: vi.fn(async (id: string) => { rows.delete(id); }), }; } function fakeSession(): VirtualSessionHandle & { tasks: Array; alive: boolean } { const tasks: unknown[] = []; return { tasks, alive: true, pushTask(t) { tasks.push(t); }, }; } describe('VirtualLlmService', () => { it('register inserts new virtual rows with active status + sessionId', async () => { const repo = mockRepo(); const svc = new VirtualLlmService(repo); const { providerSessionId, llms } = await svc.register({ providerSessionId: null, providers: [ { name: 'vllm-local', type: 'openai', model: 'Qwen/Qwen2.5-7B-Instruct-AWQ', tier: 'fast' }, ], }); expect(providerSessionId).toMatch(/^[0-9a-f-]{36}$/); expect(llms).toHaveLength(1); expect(llms[0]!.kind).toBe('virtual'); expect(llms[0]!.status).toBe('active'); expect(llms[0]!.providerSessionId).toBe(providerSessionId); expect(llms[0]!.lastHeartbeatAt).not.toBeNull(); }); it('register reuses the same row on sticky reconnect (same name + sessionId)', async () => { const repo = mockRepo(); const svc = new VirtualLlmService(repo); const first = await svc.register({ providerSessionId: 'fixed-id', providers: [{ name: 'vllm-local', type: 'openai', model: 'm' }], }); expect(first.llms[0]!.id).toMatch(/^llm-/); const firstId = first.llms[0]!.id; const second = await svc.register({ providerSessionId: 'fixed-id', providers: [{ name: 'vllm-local', type: 'openai', model: 'm-updated' }], }); expect(second.llms[0]!.id).toBe(firstId); expect(second.llms[0]!.model).toBe('m-updated'); }); it('register refuses to overwrite a public LLM with the same name', async () => { const repo = mockRepo([makeLlm({ name: 'qwen3-thinking', kind: 'public', providerSessionId: null })]); const svc = new VirtualLlmService(repo); await expect(svc.register({ providerSessionId: 'sess-x', providers: [{ name: 'qwen3-thinking', type: 'openai', model: 'm' }], })).rejects.toThrow(/Cannot publish over public/); }); it('register refuses if another active session owns the name', async () => { const repo = mockRepo([makeLlm({ name: 'vllm-local', providerSessionId: 'other', status: 'active' })]); const svc = new VirtualLlmService(repo); await expect(svc.register({ providerSessionId: 'mine', providers: [{ name: 'vllm-local', type: 'openai', model: 'm' }], })).rejects.toThrow(/already active under a different session/); }); it('register adopts an inactive virtual row from a different session (sticky reconnect after lapse)', async () => { const repo = mockRepo([makeLlm({ name: 'vllm-local', providerSessionId: 'old-session', status: 'inactive', inactiveSince: new Date(), })]); const svc = new VirtualLlmService(repo); const { llms } = await svc.register({ providerSessionId: 'new-session', providers: [{ name: 'vllm-local', type: 'openai', model: 'm' }], }); expect(llms[0]!.providerSessionId).toBe('new-session'); expect(llms[0]!.status).toBe('active'); expect(llms[0]!.inactiveSince).toBeNull(); }); it('heartbeat bumps lastHeartbeatAt + revives an inactive row', async () => { const past = new Date(Date.now() - 5_000); const repo = mockRepo([makeLlm({ name: 'vllm-local', providerSessionId: 'sess', status: 'inactive', lastHeartbeatAt: past, inactiveSince: past, })]); const svc = new VirtualLlmService(repo); await svc.heartbeat('sess'); const row = await repo.findByName('vllm-local'); expect(row?.status).toBe('active'); expect(row?.inactiveSince).toBeNull(); expect(row!.lastHeartbeatAt!.getTime()).toBeGreaterThan(past.getTime()); }); it('unbindSession flips all owned rows to inactive immediately', async () => { const repo = mockRepo([ makeLlm({ name: 'a', providerSessionId: 'sess' }), makeLlm({ name: 'b', providerSessionId: 'sess' }), makeLlm({ name: 'c', providerSessionId: 'other' }), ]); const svc = new VirtualLlmService(repo); svc.bindSession('sess', fakeSession()); await svc.unbindSession('sess'); expect((await repo.findByName('a'))?.status).toBe('inactive'); expect((await repo.findByName('b'))?.status).toBe('inactive'); expect((await repo.findByName('c'))?.status).toBe('active'); }); it('enqueueInferTask pushes a task frame to the SSE session', async () => { const repo = mockRepo([makeLlm({ name: 'vllm-local', providerSessionId: 'sess' })]); const svc = new VirtualLlmService(repo); const session = fakeSession(); svc.bindSession('sess', session); const ref = await svc.enqueueInferTask( 'vllm-local', { model: 'm', messages: [{ role: 'user', content: 'hi' }] }, false, ); expect(session.tasks).toHaveLength(1); const t = session.tasks[0] as { kind: string; taskId: string; llmName: string; streaming: boolean }; expect(t.kind).toBe('infer'); expect(t.taskId).toBe(ref.taskId); expect(t.llmName).toBe('vllm-local'); expect(t.streaming).toBe(false); }); it('enqueueInferTask rejects when the publisher is offline (no session bound)', async () => { const repo = mockRepo([makeLlm({ name: 'vllm-local', providerSessionId: 'sess' })]); const svc = new VirtualLlmService(repo); await expect( svc.enqueueInferTask('vllm-local', { model: 'm', messages: [] }, false), ).rejects.toThrow(/no live SSE session|publisher offline/); }); it('enqueueInferTask rejects when the row is inactive', async () => { const repo = mockRepo([makeLlm({ name: 'vllm-local', providerSessionId: 'sess', status: 'inactive', inactiveSince: new Date() })]); const svc = new VirtualLlmService(repo); svc.bindSession('sess', fakeSession()); await expect( svc.enqueueInferTask('vllm-local', { model: 'm', messages: [] }, false), ).rejects.toThrow(/inactive|publisher offline/); }); it('enqueueInferTask rejects when the LLM is public (not virtual)', async () => { const repo = mockRepo([makeLlm({ name: 'qwen3-thinking', kind: 'public', providerSessionId: null })]); const svc = new VirtualLlmService(repo); await expect( svc.enqueueInferTask('qwen3-thinking', { model: 'm', messages: [] }, false), ).rejects.toThrow(/not a virtual provider/); }); it('completeTask resolves the pending non-streaming promise', async () => { const repo = mockRepo([makeLlm({ name: 'vllm-local', providerSessionId: 'sess' })]); const svc = new VirtualLlmService(repo); svc.bindSession('sess', fakeSession()); const ref = await svc.enqueueInferTask( 'vllm-local', { model: 'm', messages: [{ role: 'user', content: 'hi' }] }, false, ); expect(svc.completeTask(ref.taskId, { status: 200, body: { ok: true } })).toBe(true); await expect(ref.done).resolves.toEqual({ status: 200, body: { ok: true } }); }); it('streaming: pushTaskChunk fans chunks to subscribers; done resolves the ref', async () => { const repo = mockRepo([makeLlm({ name: 'vllm-local', providerSessionId: 'sess' })]); const svc = new VirtualLlmService(repo); svc.bindSession('sess', fakeSession()); const ref = await svc.enqueueInferTask( 'vllm-local', { model: 'm', messages: [{ role: 'user', content: 'hi' }], stream: true }, true, ); const got: Array<{ data: string; done?: boolean }> = []; ref.onChunk((c) => got.push(c)); expect(svc.pushTaskChunk(ref.taskId, { data: 'hello' })).toBe(true); expect(svc.pushTaskChunk(ref.taskId, { data: ' world' })).toBe(true); expect(svc.pushTaskChunk(ref.taskId, { data: '[DONE]', done: true })).toBe(true); expect(got.map((c) => c.data)).toEqual(['hello', ' world', '[DONE]']); await expect(ref.done).resolves.toMatchObject({ status: 200 }); }); it('failTask rejects the pending promise with a clear error', async () => { const repo = mockRepo([makeLlm({ name: 'vllm-local', providerSessionId: 'sess' })]); const svc = new VirtualLlmService(repo); svc.bindSession('sess', fakeSession()); const ref = await svc.enqueueInferTask( 'vllm-local', { model: 'm', messages: [{ role: 'user', content: 'hi' }] }, false, ); expect(svc.failTask(ref.taskId, new Error('upstream blew up'))).toBe(true); await expect(ref.done).rejects.toThrow(/upstream blew up/); }); it('unbindSession rejects in-flight tasks for that session', async () => { const repo = mockRepo([makeLlm({ name: 'vllm-local', providerSessionId: 'sess' })]); const svc = new VirtualLlmService(repo); svc.bindSession('sess', fakeSession()); const ref = await svc.enqueueInferTask( 'vllm-local', { model: 'm', messages: [{ role: 'user', content: 'hi' }] }, false, ); await svc.unbindSession('sess'); await expect(ref.done).rejects.toThrow(/publisher disconnected/); }); it('gcSweep flips heartbeat-stale active virtuals to inactive', async () => { const long = new Date(Date.now() - 5 * 60 * 1000); // 5 min ago — past the 90-s cutoff const recent = new Date(Date.now() - 30 * 1000); // 30 s ago — within the cutoff const repo = mockRepo([ makeLlm({ name: 'stale', providerSessionId: 'a', status: 'active', lastHeartbeatAt: long }), makeLlm({ name: 'fresh', providerSessionId: 'b', status: 'active', lastHeartbeatAt: recent }), ]); const svc = new VirtualLlmService(repo); const result = await svc.gcSweep(); expect(result.markedInactive).toBe(1); expect((await repo.findByName('stale'))?.status).toBe('inactive'); expect((await repo.findByName('fresh'))?.status).toBe('active'); }); it('gcSweep deletes virtuals inactive past the 4h retention window', async () => { const ancient = new Date(Date.now() - 5 * 60 * 60 * 1000); // 5 h ago const fresh = new Date(Date.now() - 1 * 60 * 60 * 1000); // 1 h ago const repo = mockRepo([ makeLlm({ name: 'old', providerSessionId: 'a', status: 'inactive', inactiveSince: ancient }), makeLlm({ name: 'recent', providerSessionId: 'b', status: 'inactive', inactiveSince: fresh }), makeLlm({ name: 'public-survivor', providerSessionId: null, kind: 'public' }), ]); const svc = new VirtualLlmService(repo); const result = await svc.gcSweep(); expect(result.deleted).toBe(1); expect(await repo.findByName('old')).toBeNull(); expect(await repo.findByName('recent')).not.toBeNull(); expect(await repo.findByName('public-survivor')).not.toBeNull(); }); // ── v2: wake-before-infer ── it('hibernating: dispatches a wake task first and waits for it to complete before infer', async () => { const repo = mockRepo([makeLlm({ name: 'sleeping', providerSessionId: 'sess', status: 'hibernating' })]); const svc = new VirtualLlmService(repo); const session = fakeSession(); svc.bindSession('sess', session); // Kick off enqueueInferTask. It blocks on the wake task. const inferPromise = svc.enqueueInferTask( 'sleeping', { model: 'm', messages: [{ role: 'user', content: 'hi' }] }, false, ); // Wait a tick so the wake task gets pushed. await new Promise((r) => setTimeout(r, 0)); expect(session.tasks).toHaveLength(1); const wakeTask = session.tasks[0] as { kind: string; taskId: string; llmName: string }; expect(wakeTask.kind).toBe('wake'); expect(wakeTask.llmName).toBe('sleeping'); // Resolve the wake task — service flips the row to active, then // pushes the infer task on the same session. expect(svc.completeTask(wakeTask.taskId, { status: 200, body: { ok: true } })).toBe(true); const ref = await inferPromise; expect(session.tasks).toHaveLength(2); const inferTask = session.tasks[1] as { kind: string; taskId: string }; expect(inferTask.kind).toBe('infer'); expect(inferTask.taskId).toBe(ref.taskId); // The row should be active now — concurrent callers won't trigger another wake. const row = await repo.findByName('sleeping'); expect(row?.status).toBe('active'); }); it('hibernating: concurrent infer requests share a single wake task', async () => { const repo = mockRepo([makeLlm({ name: 'sleeping', providerSessionId: 'sess', status: 'hibernating' })]); const svc = new VirtualLlmService(repo); const session = fakeSession(); svc.bindSession('sess', session); // Fire 3 concurrent infer requests against the same hibernating LLM. const reqs = [ svc.enqueueInferTask('sleeping', { model: 'm', messages: [] }, false), svc.enqueueInferTask('sleeping', { model: 'm', messages: [] }, false), svc.enqueueInferTask('sleeping', { model: 'm', messages: [] }, false), ]; await new Promise((r) => setTimeout(r, 0)); // Exactly one wake task pushed, regardless of concurrent infers. const wakeTasks = (session.tasks as Array<{ kind: string }>).filter((t) => t.kind === 'wake'); expect(wakeTasks).toHaveLength(1); const wakeTaskId = (session.tasks[0] as { taskId: string }).taskId; expect(svc.completeTask(wakeTaskId, { status: 200, body: { ok: true } })).toBe(true); const refs = await Promise.all(reqs); // After wake, all 3 infer tasks pushed — total session tasks = 1 wake + 3 infer. const inferTasks = (session.tasks as Array<{ kind: string }>).filter((t) => t.kind === 'infer'); expect(inferTasks).toHaveLength(3); expect(refs.map((r) => r.taskId).sort()).toEqual(refs.map((r) => r.taskId).sort()); }); it('hibernating: rejects when the wake task fails', async () => { const repo = mockRepo([makeLlm({ name: 'broken', providerSessionId: 'sess', status: 'hibernating' })]); const svc = new VirtualLlmService(repo); svc.bindSession('sess', fakeSession()); const inferPromise = svc.enqueueInferTask( 'broken', { model: 'm', messages: [] }, false, ); await new Promise((r) => setTimeout(r, 0)); // Get the wake task id from the in-flight tasks map (its only entry). // We test the failure path via failTask which is part of the public // surface used by the result-POST route handler. const taskIds: string[] = []; // eslint-disable-next-line @typescript-eslint/no-explicit-any for (const id of (svc as any).tasksById.keys()) taskIds.push(id); expect(taskIds).toHaveLength(1); expect(svc.failTask(taskIds[0]!, new Error('wake recipe failed'))).toBe(true); await expect(inferPromise).rejects.toThrow(/wake recipe failed/); // Row stayed hibernating — the next request will get another wake try. const row = await repo.findByName('broken'); expect(row?.status).toBe('hibernating'); }); it('inactive: still rejects with 503 (publisher offline) — wake path only fires for hibernating', async () => { const repo = mockRepo([makeLlm({ name: 'gone', providerSessionId: 'sess', status: 'inactive', inactiveSince: new Date() })]); const svc = new VirtualLlmService(repo); svc.bindSession('sess', fakeSession()); await expect( svc.enqueueInferTask('gone', { model: 'm', messages: [] }, false), ).rejects.toThrow(/inactive|publisher offline/); }); it('gcSweep is idempotent — running twice in a row is a no-op the second time', async () => { const long = new Date(Date.now() - 5 * 60 * 1000); const repo = mockRepo([ makeLlm({ name: 'stale', providerSessionId: 'a', status: 'active', lastHeartbeatAt: long }), ]); const svc = new VirtualLlmService(repo); const first = await svc.gcSweep(); const second = await svc.gcSweep(); expect(first.markedInactive).toBe(1); expect(second.markedInactive).toBe(0); expect(second.deleted).toBe(0); }); });