mcpctl/src/mcpd/tests/virtual-llm-service.test.ts

import { describe, it, expect, vi } from 'vitest';
import { VirtualLlmService, type VirtualSessionHandle } from '../src/services/virtual-llm.service.js';
import type { ILlmRepository } from '../src/repositories/llm.repository.js';
import type { Llm } from '@prisma/client';

function makeLlm(overrides: Partial<Llm> = {}): Llm {
  return {
    id: `llm-${Math.random().toString(36).slice(2, 8)}`,
    name: 'vllm-local',
    type: 'openai',
    model: 'm',
    url: '',
    tier: 'fast',
    description: '',
    apiKeySecretId: null,
    apiKeySecretKey: null,
    extraConfig: {} as Llm['extraConfig'],
    kind: 'virtual',
    providerSessionId: 's-1',
    lastHeartbeatAt: new Date(),
    status: 'active',
    inactiveSince: null,
    version: 1,
    createdAt: new Date(),
    updatedAt: new Date(),
    ...overrides,
  };
}

function mockRepo(initial: Llm[] = []): ILlmRepository {
  const rows = new Map<string, Llm>(initial.map((l) => [l.id, l]));
  let counter = rows.size;
  return {
    findAll: vi.fn(async () => [...rows.values()]),
    findById: vi.fn(async (id: string) => rows.get(id) ?? null),
    findByName: vi.fn(async (name: string) => {
      for (const l of rows.values()) if (l.name === name) return l;
      return null;
    }),
    findByTier: vi.fn(async () => []),
    findBySessionId: vi.fn(async (sid: string) =>
      [...rows.values()].filter((l) => l.providerSessionId === sid)),
    findStaleVirtuals: vi.fn(async (cutoff: Date) =>
      [...rows.values()].filter((l) =>
        l.kind === 'virtual'
        && l.status === 'active'
        && l.lastHeartbeatAt !== null
        && l.lastHeartbeatAt < cutoff)),
    findExpiredInactives: vi.fn(async (cutoff: Date) =>
      [...rows.values()].filter((l) =>
        l.kind === 'virtual'
        && l.status === 'inactive'
        && l.inactiveSince !== null
        && l.inactiveSince < cutoff)),
    create: vi.fn(async (data) => {
      counter += 1;
      const row = makeLlm({
        id: `llm-${String(counter)}`,
        name: data.name,
        type: data.type,
        model: data.model,
        url: data.url ?? '',
        tier: data.tier ?? 'fast',
        description: data.description ?? '',
        kind: data.kind ?? 'public',
        providerSessionId: data.providerSessionId ?? null,
        status: data.status ?? 'active',
        lastHeartbeatAt: data.lastHeartbeatAt ?? null,
        inactiveSince: data.inactiveSince ?? null,
      });
      rows.set(row.id, row);
      return row;
    }),
    update: vi.fn(async (id, data) => {
      const existing = rows.get(id);
      if (!existing) throw new Error('not found');
      const next: Llm = {
        ...existing,
        ...(data.type !== undefined ? { type: data.type } : {}),
        ...(data.model !== undefined ? { model: data.model } : {}),
        ...(data.tier !== undefined ? { tier: data.tier } : {}),
        ...(data.description !== undefined ? { description: data.description } : {}),
        ...(data.kind !== undefined ? { kind: data.kind } : {}),
        ...(data.providerSessionId !== undefined ? { providerSessionId: data.providerSessionId } : {}),
        ...(data.status !== undefined ? { status: data.status } : {}),
        ...(data.lastHeartbeatAt !== undefined ? { lastHeartbeatAt: data.lastHeartbeatAt } : {}),
        ...(data.inactiveSince !== undefined ? { inactiveSince: data.inactiveSince } : {}),
      };
      rows.set(id, next);
      return next;
    }),
    delete: vi.fn(async (id: string) => { rows.delete(id); }),
  };
}

function fakeSession(): VirtualSessionHandle & { tasks: Array<unknown>; alive: boolean } {
  const tasks: unknown[] = [];
  return {
    tasks,
    alive: true,
    pushTask(t) { tasks.push(t); },
  };
}

describe('VirtualLlmService', () => {
  it('register inserts new virtual rows with active status + sessionId', async () => {
    const repo = mockRepo();
    const svc = new VirtualLlmService(repo);
    const { providerSessionId, llms } = await svc.register({
      providerSessionId: null,
      providers: [
        { name: 'vllm-local', type: 'openai', model: 'Qwen/Qwen2.5-7B-Instruct-AWQ', tier: 'fast' },
      ],
    });
    expect(providerSessionId).toMatch(/^[0-9a-f-]{36}$/);
    expect(llms).toHaveLength(1);
    expect(llms[0]!.kind).toBe('virtual');
    expect(llms[0]!.status).toBe('active');
    expect(llms[0]!.providerSessionId).toBe(providerSessionId);
    expect(llms[0]!.lastHeartbeatAt).not.toBeNull();
  });

  it('register reuses the same row on sticky reconnect (same name + sessionId)', async () => {
    const repo = mockRepo();
    const svc = new VirtualLlmService(repo);
    const first = await svc.register({
      providerSessionId: 'fixed-id',
      providers: [{ name: 'vllm-local', type: 'openai', model: 'm' }],
    });
    expect(first.llms[0]!.id).toMatch(/^llm-/);
    const firstId = first.llms[0]!.id;

    const second = await svc.register({
      providerSessionId: 'fixed-id',
      providers: [{ name: 'vllm-local', type: 'openai', model: 'm-updated' }],
    });
    expect(second.llms[0]!.id).toBe(firstId);
    expect(second.llms[0]!.model).toBe('m-updated');
  });

  it('register refuses to overwrite a public LLM with the same name', async () => {
    const repo = mockRepo([makeLlm({ name: 'qwen3-thinking', kind: 'public', providerSessionId: null })]);
    const svc = new VirtualLlmService(repo);
    await expect(svc.register({
      providerSessionId: 'sess-x',
      providers: [{ name: 'qwen3-thinking', type: 'openai', model: 'm' }],
    })).rejects.toThrow(/Cannot publish over public/);
  });

  it('register refuses if another active session owns the name', async () => {
    const repo = mockRepo([makeLlm({ name: 'vllm-local', providerSessionId: 'other', status: 'active' })]);
    const svc = new VirtualLlmService(repo);
    await expect(svc.register({
      providerSessionId: 'mine',
      providers: [{ name: 'vllm-local', type: 'openai', model: 'm' }],
    })).rejects.toThrow(/already active under a different session/);
  });

  it('register adopts an inactive virtual row from a different session (sticky reconnect after lapse)', async () => {
    const repo = mockRepo([makeLlm({
      name: 'vllm-local', providerSessionId: 'old-session',
      status: 'inactive', inactiveSince: new Date(),
    })]);
    const svc = new VirtualLlmService(repo);
    const { llms } = await svc.register({
      providerSessionId: 'new-session',
      providers: [{ name: 'vllm-local', type: 'openai', model: 'm' }],
    });
    expect(llms[0]!.providerSessionId).toBe('new-session');
    expect(llms[0]!.status).toBe('active');
    expect(llms[0]!.inactiveSince).toBeNull();
  });

  it('heartbeat bumps lastHeartbeatAt + revives an inactive row', async () => {
    const past = new Date(Date.now() - 5_000);
    const repo = mockRepo([makeLlm({
      name: 'vllm-local', providerSessionId: 'sess', status: 'inactive',
      lastHeartbeatAt: past, inactiveSince: past,
    })]);
    const svc = new VirtualLlmService(repo);
    await svc.heartbeat('sess');
    const row = await repo.findByName('vllm-local');
    expect(row?.status).toBe('active');
    expect(row?.inactiveSince).toBeNull();
    expect(row!.lastHeartbeatAt!.getTime()).toBeGreaterThan(past.getTime());
  });

  it('unbindSession flips all owned rows to inactive immediately', async () => {
    const repo = mockRepo([
      makeLlm({ name: 'a', providerSessionId: 'sess' }),
      makeLlm({ name: 'b', providerSessionId: 'sess' }),
      makeLlm({ name: 'c', providerSessionId: 'other' }),
    ]);
    const svc = new VirtualLlmService(repo);
    svc.bindSession('sess', fakeSession());
    await svc.unbindSession('sess');
    expect((await repo.findByName('a'))?.status).toBe('inactive');
    expect((await repo.findByName('b'))?.status).toBe('inactive');
    expect((await repo.findByName('c'))?.status).toBe('active');
  });

  it('enqueueInferTask pushes a task frame to the SSE session', async () => {
    const repo = mockRepo([makeLlm({ name: 'vllm-local', providerSessionId: 'sess' })]);
    const svc = new VirtualLlmService(repo);
    const session = fakeSession();
    svc.bindSession('sess', session);

    const ref = await svc.enqueueInferTask(
      'vllm-local',
      { model: 'm', messages: [{ role: 'user', content: 'hi' }] },
      false,
    );
    expect(session.tasks).toHaveLength(1);
    const t = session.tasks[0] as { kind: string; taskId: string; llmName: string; streaming: boolean };
    expect(t.kind).toBe('infer');
    expect(t.taskId).toBe(ref.taskId);
    expect(t.llmName).toBe('vllm-local');
    expect(t.streaming).toBe(false);
  });

  it('enqueueInferTask rejects when the publisher is offline (no session bound)', async () => {
    const repo = mockRepo([makeLlm({ name: 'vllm-local', providerSessionId: 'sess' })]);
    const svc = new VirtualLlmService(repo);
    await expect(
      svc.enqueueInferTask('vllm-local', { model: 'm', messages: [] }, false),
    ).rejects.toThrow(/no live SSE session|publisher offline/);
  });

  it('enqueueInferTask rejects when the row is inactive', async () => {
    const repo = mockRepo([makeLlm({ name: 'vllm-local', providerSessionId: 'sess', status: 'inactive', inactiveSince: new Date() })]);
    const svc = new VirtualLlmService(repo);
    svc.bindSession('sess', fakeSession());
    await expect(
      svc.enqueueInferTask('vllm-local', { model: 'm', messages: [] }, false),
    ).rejects.toThrow(/inactive|publisher offline/);
  });

  it('enqueueInferTask rejects when the LLM is public (not virtual)', async () => {
    const repo = mockRepo([makeLlm({ name: 'qwen3-thinking', kind: 'public', providerSessionId: null })]);
    const svc = new VirtualLlmService(repo);
    await expect(
      svc.enqueueInferTask('qwen3-thinking', { model: 'm', messages: [] }, false),
    ).rejects.toThrow(/not a virtual provider/);
  });

  it('completeTask resolves the pending non-streaming promise', async () => {
    const repo = mockRepo([makeLlm({ name: 'vllm-local', providerSessionId: 'sess' })]);
    const svc = new VirtualLlmService(repo);
    svc.bindSession('sess', fakeSession());
    const ref = await svc.enqueueInferTask(
      'vllm-local',
      { model: 'm', messages: [{ role: 'user', content: 'hi' }] },
      false,
    );
    expect(svc.completeTask(ref.taskId, { status: 200, body: { ok: true } })).toBe(true);
    await expect(ref.done).resolves.toEqual({ status: 200, body: { ok: true } });
  });

  it('streaming: pushTaskChunk fans chunks to subscribers; done resolves the ref', async () => {
    const repo = mockRepo([makeLlm({ name: 'vllm-local', providerSessionId: 'sess' })]);
    const svc = new VirtualLlmService(repo);
    svc.bindSession('sess', fakeSession());
    const ref = await svc.enqueueInferTask(
      'vllm-local',
      { model: 'm', messages: [{ role: 'user', content: 'hi' }], stream: true },
      true,
    );
    const got: Array<{ data: string; done?: boolean }> = [];
    ref.onChunk((c) => got.push(c));

    expect(svc.pushTaskChunk(ref.taskId, { data: 'hello' })).toBe(true);
    expect(svc.pushTaskChunk(ref.taskId, { data: ' world' })).toBe(true);
    expect(svc.pushTaskChunk(ref.taskId, { data: '[DONE]', done: true })).toBe(true);

    expect(got.map((c) => c.data)).toEqual(['hello', ' world', '[DONE]']);
    await expect(ref.done).resolves.toMatchObject({ status: 200 });
  });

  it('failTask rejects the pending promise with a clear error', async () => {
    const repo = mockRepo([makeLlm({ name: 'vllm-local', providerSessionId: 'sess' })]);
    const svc = new VirtualLlmService(repo);
    svc.bindSession('sess', fakeSession());
    const ref = await svc.enqueueInferTask(
      'vllm-local',
      { model: 'm', messages: [{ role: 'user', content: 'hi' }] },
      false,
    );
    expect(svc.failTask(ref.taskId, new Error('upstream blew up'))).toBe(true);
    await expect(ref.done).rejects.toThrow(/upstream blew up/);
  });

  it('unbindSession rejects in-flight tasks for that session', async () => {
    const repo = mockRepo([makeLlm({ name: 'vllm-local', providerSessionId: 'sess' })]);
    const svc = new VirtualLlmService(repo);
    svc.bindSession('sess', fakeSession());
    const ref = await svc.enqueueInferTask(
      'vllm-local',
      { model: 'm', messages: [{ role: 'user', content: 'hi' }] },
      false,
    );
    await svc.unbindSession('sess');
    await expect(ref.done).rejects.toThrow(/publisher disconnected/);
  });

  it('gcSweep flips heartbeat-stale active virtuals to inactive', async () => {
    const long = new Date(Date.now() - 5 * 60 * 1000); // 5 min ago — past the 90-s cutoff
    const recent = new Date(Date.now() - 30 * 1000);   // 30 s ago — within the cutoff
    const repo = mockRepo([
      makeLlm({ name: 'stale', providerSessionId: 'a', status: 'active', lastHeartbeatAt: long }),
      makeLlm({ name: 'fresh', providerSessionId: 'b', status: 'active', lastHeartbeatAt: recent }),
    ]);
    const svc = new VirtualLlmService(repo);
    const result = await svc.gcSweep();
    expect(result.markedInactive).toBe(1);
    expect((await repo.findByName('stale'))?.status).toBe('inactive');
    expect((await repo.findByName('fresh'))?.status).toBe('active');
  });

  it('gcSweep deletes virtuals inactive past the 4h retention window', async () => {
    const ancient = new Date(Date.now() - 5 * 60 * 60 * 1000); // 5 h ago
    const fresh = new Date(Date.now() - 1 * 60 * 60 * 1000);   // 1 h ago
    const repo = mockRepo([
      makeLlm({ name: 'old', providerSessionId: 'a', status: 'inactive', inactiveSince: ancient }),
      makeLlm({ name: 'recent', providerSessionId: 'b', status: 'inactive', inactiveSince: fresh }),
      makeLlm({ name: 'public-survivor', providerSessionId: null, kind: 'public' }),
    ]);
    const svc = new VirtualLlmService(repo);
    const result = await svc.gcSweep();
    expect(result.deleted).toBe(1);
    expect(await repo.findByName('old')).toBeNull();
    expect(await repo.findByName('recent')).not.toBeNull();
    expect(await repo.findByName('public-survivor')).not.toBeNull();
  });

  // ── v2: wake-before-infer ──

  it('hibernating: dispatches a wake task first and waits for it to complete before infer', async () => {
    const repo = mockRepo([makeLlm({ name: 'sleeping', providerSessionId: 'sess', status: 'hibernating' })]);
    const svc = new VirtualLlmService(repo);
    const session = fakeSession();
    svc.bindSession('sess', session);

    // Kick off enqueueInferTask. It blocks on the wake task.
    const inferPromise = svc.enqueueInferTask(
      'sleeping',
      { model: 'm', messages: [{ role: 'user', content: 'hi' }] },
      false,
    );

    // Wait a tick so the wake task gets pushed.
    await new Promise((r) => setTimeout(r, 0));
    expect(session.tasks).toHaveLength(1);
    const wakeTask = session.tasks[0] as { kind: string; taskId: string; llmName: string };
    expect(wakeTask.kind).toBe('wake');
    expect(wakeTask.llmName).toBe('sleeping');

    // Resolve the wake task — service flips the row to active, then
    // pushes the infer task on the same session.
    expect(svc.completeTask(wakeTask.taskId, { status: 200, body: { ok: true } })).toBe(true);
    const ref = await inferPromise;
    expect(session.tasks).toHaveLength(2);
    const inferTask = session.tasks[1] as { kind: string; taskId: string };
    expect(inferTask.kind).toBe('infer');
    expect(inferTask.taskId).toBe(ref.taskId);

    // The row should be active now — concurrent callers won't trigger another wake.
    const row = await repo.findByName('sleeping');
    expect(row?.status).toBe('active');
  });

  it('hibernating: concurrent infer requests share a single wake task', async () => {
    const repo = mockRepo([makeLlm({ name: 'sleeping', providerSessionId: 'sess', status: 'hibernating' })]);
    const svc = new VirtualLlmService(repo);
    const session = fakeSession();
    svc.bindSession('sess', session);

    // Fire 3 concurrent infer requests against the same hibernating LLM.
    const reqs = [
      svc.enqueueInferTask('sleeping', { model: 'm', messages: [] }, false),
      svc.enqueueInferTask('sleeping', { model: 'm', messages: [] }, false),
      svc.enqueueInferTask('sleeping', { model: 'm', messages: [] }, false),
    ];

    await new Promise((r) => setTimeout(r, 0));
    // Exactly one wake task pushed, regardless of concurrent infers.
    const wakeTasks = (session.tasks as Array<{ kind: string }>).filter((t) => t.kind === 'wake');
    expect(wakeTasks).toHaveLength(1);

    const wakeTaskId = (session.tasks[0] as { taskId: string }).taskId;
    expect(svc.completeTask(wakeTaskId, { status: 200, body: { ok: true } })).toBe(true);

    const refs = await Promise.all(reqs);
    // After wake, all 3 infer tasks pushed — total session tasks = 1 wake + 3 infer.
    const inferTasks = (session.tasks as Array<{ kind: string }>).filter((t) => t.kind === 'infer');
    expect(inferTasks).toHaveLength(3);
    expect(refs.map((r) => r.taskId).sort()).toEqual(refs.map((r) => r.taskId).sort());
  });

  it('hibernating: rejects when the wake task fails', async () => {
    const repo = mockRepo([makeLlm({ name: 'broken', providerSessionId: 'sess', status: 'hibernating' })]);
    const svc = new VirtualLlmService(repo);
    svc.bindSession('sess', fakeSession());

    const inferPromise = svc.enqueueInferTask(
      'broken',
      { model: 'm', messages: [] },
      false,
    );
    await new Promise((r) => setTimeout(r, 0));

    // Get the wake task id from the in-flight tasks map (its only entry).
    // We test the failure path via failTask which is part of the public
    // surface used by the result-POST route handler.
    const taskIds: string[] = [];
    // eslint-disable-next-line @typescript-eslint/no-explicit-any
    for (const id of (svc as any).tasksById.keys()) taskIds.push(id);
    expect(taskIds).toHaveLength(1);
    expect(svc.failTask(taskIds[0]!, new Error('wake recipe failed'))).toBe(true);

    await expect(inferPromise).rejects.toThrow(/wake recipe failed/);

    // Row stayed hibernating — the next request will get another wake try.
    const row = await repo.findByName('broken');
    expect(row?.status).toBe('hibernating');
  });

  it('inactive: still rejects with 503 (publisher offline) — wake path only fires for hibernating', async () => {
    const repo = mockRepo([makeLlm({ name: 'gone', providerSessionId: 'sess', status: 'inactive', inactiveSince: new Date() })]);
    const svc = new VirtualLlmService(repo);
    svc.bindSession('sess', fakeSession());

    await expect(
      svc.enqueueInferTask('gone', { model: 'm', messages: [] }, false),
    ).rejects.toThrow(/inactive|publisher offline/);
  });

  it('gcSweep is idempotent — running twice in a row is a no-op the second time', async () => {
    const long = new Date(Date.now() - 5 * 60 * 1000);
    const repo = mockRepo([
      makeLlm({ name: 'stale', providerSessionId: 'a', status: 'active', lastHeartbeatAt: long }),
    ]);
    const svc = new VirtualLlmService(repo);
    const first = await svc.gcSweep();
    const second = await svc.gcSweep();
    expect(first.markedInactive).toBe(1);
    expect(second.markedInactive).toBe(0);
    expect(second.deleted).toBe(0);
  });
});