src/db/tests/inference-task-schema.test.ts

/**
 * v5 db-level tests for the InferenceTask queue. Exercises the actual
 * column shapes + index lookups; the mcpd-side service tests cover the
 * state machine + signal channels with a mocked repo.
 */
import { describe, it, expect, beforeAll, afterAll, beforeEach } from 'vitest';
import type { PrismaClient } from '@prisma/client';
import { setupTestDb, cleanupTestDb, clearAllTables } from './helpers.js';

async function makeOwner(prisma: PrismaClient): Promise<string> {
  const u = await prisma.user.create({
    data: { email: `owner-${String(Date.now())}@test`, passwordHash: 'x' },
  });
  return u.id;
}

describe('InferenceTask schema (v5)', () => {
  let prisma: PrismaClient;

  beforeAll(async () => {
    prisma = await setupTestDb();
  }, 30_000);

  afterAll(async () => {
    await cleanupTestDb();
  });

  beforeEach(async () => {
    await clearAllTables(prisma);
  });

  it('defaults a fresh row to status=pending with claim/completion fields null', async () => {
    const ownerId = await makeOwner(prisma);
    const row = await prisma.inferenceTask.create({
      data: {
        poolName: 'qwen-pool',
        llmName: 'qwen-prod-1',
        model: 'qwen3-thinking',
        requestBody: { messages: [{ role: 'user', content: 'hi' }] },
        ownerId,
      },
    });
    expect(row.status).toBe('pending');
    expect(row.claimedBy).toBeNull();
    expect(row.claimedAt).toBeNull();
    expect(row.streamStartedAt).toBeNull();
    expect(row.completedAt).toBeNull();
    expect(row.responseBody).toBeNull();
    expect(row.streaming).toBe(false);
  });

  it('roundtrips streaming=true and a structured requestBody/responseBody', async () => {
    const ownerId = await makeOwner(prisma);
    const requestBody = {
      messages: [{ role: 'user', content: 'hello' }],
      temperature: 0.2,
      tools: [{ type: 'function', function: { name: 'noop' } }],
    };
    const row = await prisma.inferenceTask.create({
      data: {
        poolName: 'qwen-pool',
        llmName: 'qwen-prod-1',
        model: 'qwen3',
        requestBody,
        streaming: true,
        ownerId,
      },
    });
    expect(row.streaming).toBe(true);
    expect(row.requestBody).toEqual(requestBody);

    const completedAt = new Date();
    const responseBody = { choices: [{ message: { role: 'assistant', content: 'world' } }] };
    const updated = await prisma.inferenceTask.update({
      where: { id: row.id },
      data: { status: 'completed', responseBody, completedAt },
    });
    expect(updated.responseBody).toEqual(responseBody);
    expect(updated.completedAt?.getTime()).toBe(completedAt.getTime());
  });

  it('compound index supports the dispatcher\'s drain query (status + poolName IN ...)', async () => {
    // The actual EXPLAIN/index-use check is too brittle for unit tests;
    // here we verify the QUERY shape that the repo's findPendingForPools
    // issues — same WHERE/ORDER BY — returns the expected rows in FIFO
    // order. Index usage is implied by the Prisma model definition.
    const ownerId = await makeOwner(prisma);
    const t1 = await prisma.inferenceTask.create({
      data: { poolName: 'pool-a', llmName: 'a-1', model: 'm', requestBody: {}, ownerId },
    });
    await new Promise((r) => setTimeout(r, 5));
    const t2 = await prisma.inferenceTask.create({
      data: { poolName: 'pool-a', llmName: 'a-2', model: 'm', requestBody: {}, ownerId },
    });
    await prisma.inferenceTask.create({
      data: { poolName: 'pool-b', llmName: 'b-1', model: 'm', requestBody: {}, ownerId },
    });
    // One row in pool-a is no longer pending — must be excluded.
    await prisma.inferenceTask.create({
      data: { poolName: 'pool-a', llmName: 'a-3', model: 'm', requestBody: {}, ownerId, status: 'completed' },
    });

    const drained = await prisma.inferenceTask.findMany({
      where: { status: 'pending', poolName: { in: ['pool-a', 'pool-b'] } },
      orderBy: { createdAt: 'asc' },
    });
    expect(drained.map((r) => r.id)).toEqual([t1.id, t2.id, drained[2]!.id]);
    expect(drained.map((r) => r.poolName)).toEqual(['pool-a', 'pool-a', 'pool-b']);
  });

  it('claimedBy index supports unbindSession revert (worker disconnect path)', async () => {
    const ownerId = await makeOwner(prisma);
    await prisma.inferenceTask.create({
      data: { poolName: 'p', llmName: 'l', model: 'm', requestBody: {}, ownerId, status: 'claimed', claimedBy: 'sess-A' },
    });
    await prisma.inferenceTask.create({
      data: { poolName: 'p', llmName: 'l', model: 'm', requestBody: {}, ownerId, status: 'running', claimedBy: 'sess-A' },
    });
    await prisma.inferenceTask.create({
      data: { poolName: 'p', llmName: 'l', model: 'm', requestBody: {}, ownerId, status: 'claimed', claimedBy: 'sess-B' },
    });
    // Completed-but-claimedBy=sess-A row: must NOT revert (terminal state).
    await prisma.inferenceTask.create({
      data: { poolName: 'p', llmName: 'l', model: 'm', requestBody: {}, ownerId, status: 'completed', claimedBy: 'sess-A' },
    });

    const heldByA = await prisma.inferenceTask.findMany({
      where: { claimedBy: 'sess-A', status: { in: ['claimed', 'running'] } },
    });
    expect(heldByA).toHaveLength(2);
  });

  it('GC predicate (terminal + completedAt < cutoff) is index-friendly and filters correctly', async () => {
    const ownerId = await makeOwner(prisma);
    const old = new Date(Date.now() - 8 * 24 * 60 * 60 * 1000); // 8 d ago
    const recent = new Date(Date.now() - 1 * 60 * 60 * 1000); // 1 h ago
    await prisma.inferenceTask.create({
      data: { poolName: 'p', llmName: 'l', model: 'm', requestBody: {}, ownerId, status: 'completed', completedAt: old },
    });
    await prisma.inferenceTask.create({
      data: { poolName: 'p', llmName: 'l', model: 'm', requestBody: {}, ownerId, status: 'error', completedAt: old, errorMessage: 'boom' },
    });
    // Inside retention — must not be picked up by GC.
    await prisma.inferenceTask.create({
      data: { poolName: 'p', llmName: 'l', model: 'm', requestBody: {}, ownerId, status: 'completed', completedAt: recent },
    });
    // Pending row — must not be picked up by terminal GC.
    await prisma.inferenceTask.create({
      data: { poolName: 'p', llmName: 'l', model: 'm', requestBody: {}, ownerId, status: 'pending' },
    });

    const cutoff = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000);
    const expired = await prisma.inferenceTask.findMany({
      where: {
        status: { in: ['completed', 'error', 'cancelled'] },
        completedAt: { lt: cutoff },
      },
    });
    expect(expired).toHaveLength(2);
  });

  it('agentId is nullable — direct chat-llm tasks have no agent', async () => {
    const ownerId = await makeOwner(prisma);
    const row = await prisma.inferenceTask.create({
      data: { poolName: 'p', llmName: 'l', model: 'm', requestBody: {}, ownerId, agentId: null },
    });
    expect(row.agentId).toBeNull();
  });
});
feat(mcpd+db): durable InferenceTask queue + state machine (v5 Stage 1) The persistence + signaling layer for v5. No integration with the existing in-flight inference path yet — that's Stage 2. This commit just lands the durable queue underneath, with a state machine that mcpd's HTTP handlers, the worker result-POST route, and the GC sweep will all build on. Schema (src/db/prisma/schema.prisma + migration): - New `InferenceTask` model + `InferenceTaskStatus` enum (pending\|claimed\|running\|completed\|error\|cancelled). - Routing fields stored at enqueue time so a later rename of `Llm.poolName` doesn't reroute already-queued work: `poolName` (effective pool key), `llmName` (pinned target), `model`, `tier`. - Worker tracking: `claimedBy` (providerSessionId) + `claimedAt`, cleared on revert. - Bodies as `Json`: requestBody (always set), responseBody (set at completion). Streaming chunks are NOT persisted — too expensive at delta granularity. The final assembled body lands once per task. - Lifecycle timestamps: createdAt, claimedAt, streamStartedAt, completedAt. Plus ownerId (RBAC + audit) and agentId (null for direct chat-llm calls). - Indexes for the hot paths: (status, poolName) for the dispatcher's drain query, claimedBy for the disconnect revert, completedAt for the GC retention sweep, owner/agent for the async API listing. Repository (src/mcpd/src/repositories/inference-task.repository.ts): - CRUD + state transitions as conditional CAS via `updateMany`. Two workers racing to claim the same row both run the UPDATE; whichever the DB serializes first sees affected=1 and gets the row, the loser sees 0 and falls through to the next candidate. No application- level locking required. - findPendingForPools(poolNames[]) for the worker drain on bind. - findHeldBy(claimedBy) for the unbindSession revert. - findStalePending + findExpiredTerminal for the GC sweep. Service (src/mcpd/src/services/inference-task.service.ts): - Owns the in-process EventEmitter that wakes blocked HTTP handlers when a worker POSTs results. The DB row is the source of truth for state; the EventEmitter just signals "go re-read row X" so we don't have to poll. Single-instance assumption for v5; pg LISTEN/NOTIFY is the v6 swap when scaling horizontally — no schema change needed, just replace the emitter wakeup. - waitFor(taskId, timeoutMs) returns { done, chunks }: the terminal promise + an async iterator of streaming deltas. Throws on cancel (clear message) or error (worker's errorMessage propagates) or timeout. Polls the row once at subscribe time so an already- terminal task resolves immediately without waiting for an event that's never coming. - gcSweep flips stale pending rows to error (with a clear message about the timeout) and deletes terminal rows past retention. Defaults: 1h pending timeout, 7d terminal retention; both configurable. Tests: - 6 db-level schema tests (defaults, json roundtrip, drain query shape, claimedBy filter, GC predicate, agentId nullable). - 13 service tests covering enqueue, the CAS race on tryClaim, complete/fail/cancel, idempotent terminal transitions, revertHeldBy on disconnect, and the full waitFor signal lifecycle (immediate resolve, wake on event, chunk streaming, cancel/error/timeout paths). Plus a gcSweep test with a fixed clock. mcpd 881/881 (was 868; +13). db pool-schema 14/14, +6 new inference-task-schema. Pre-existing failures in models.test.ts (Secret FK fixture issue, also fails on main HEAD) are unrelated. Stage 2 (next): VirtualLlmService rewires through this — remove the in-memory pendingTasks map; enqueue creates a row, dispatch picks an active session, the result-route updates the row + emits the wakeup. Worker disconnect reverts; worker bind drains. 2026-04-28 02:14:45 +01:00			`/**`
			`* v5 db-level tests for the InferenceTask queue. Exercises the actual`
			`* column shapes + index lookups; the mcpd-side service tests cover the`
			`* state machine + signal channels with a mocked repo.`
			`*/`
			`import { describe, it, expect, beforeAll, afterAll, beforeEach } from 'vitest';`
			`import type { PrismaClient } from '@prisma/client';`
			`import { setupTestDb, cleanupTestDb, clearAllTables } from './helpers.js';`

			`async function makeOwner(prisma: PrismaClient): Promise<string> {`
			`const u = await prisma.user.create({`
			data: { email: `owner-${String(Date.now())}@test`, passwordHash: 'x' },
			`});`
			`return u.id;`
			`}`

			`describe('InferenceTask schema (v5)', () => {`
			`let prisma: PrismaClient;`

			`beforeAll(async () => {`
			`prisma = await setupTestDb();`
			`}, 30_000);`

			`afterAll(async () => {`
			`await cleanupTestDb();`
			`});`

			`beforeEach(async () => {`
			`await clearAllTables(prisma);`
			`});`

			`it('defaults a fresh row to status=pending with claim/completion fields null', async () => {`
			`const ownerId = await makeOwner(prisma);`
			`const row = await prisma.inferenceTask.create({`
			`data: {`
			`poolName: 'qwen-pool',`
			`llmName: 'qwen-prod-1',`
			`model: 'qwen3-thinking',`
			`requestBody: { messages: [{ role: 'user', content: 'hi' }] },`
			`ownerId,`
			`},`
			`});`
			`expect(row.status).toBe('pending');`
			`expect(row.claimedBy).toBeNull();`
			`expect(row.claimedAt).toBeNull();`
			`expect(row.streamStartedAt).toBeNull();`
			`expect(row.completedAt).toBeNull();`
			`expect(row.responseBody).toBeNull();`
			`expect(row.streaming).toBe(false);`
			`});`

			`it('roundtrips streaming=true and a structured requestBody/responseBody', async () => {`
			`const ownerId = await makeOwner(prisma);`
			`const requestBody = {`
			`messages: [{ role: 'user', content: 'hello' }],`
			`temperature: 0.2,`
			`tools: [{ type: 'function', function: { name: 'noop' } }],`
			`};`
			`const row = await prisma.inferenceTask.create({`
			`data: {`
			`poolName: 'qwen-pool',`
			`llmName: 'qwen-prod-1',`
			`model: 'qwen3',`
			`requestBody,`
			`streaming: true,`
			`ownerId,`
			`},`
			`});`
			`expect(row.streaming).toBe(true);`
			`expect(row.requestBody).toEqual(requestBody);`

			`const completedAt = new Date();`
			`const responseBody = { choices: [{ message: { role: 'assistant', content: 'world' } }] };`
			`const updated = await prisma.inferenceTask.update({`
			`where: { id: row.id },`
			`data: { status: 'completed', responseBody, completedAt },`
			`});`
			`expect(updated.responseBody).toEqual(responseBody);`
			`expect(updated.completedAt?.getTime()).toBe(completedAt.getTime());`
			`});`

			`it('compound index supports the dispatcher\'s drain query (status + poolName IN ...)', async () => {`
			`// The actual EXPLAIN/index-use check is too brittle for unit tests;`
			`// here we verify the QUERY shape that the repo's findPendingForPools`
			`// issues — same WHERE/ORDER BY — returns the expected rows in FIFO`
			`// order. Index usage is implied by the Prisma model definition.`
			`const ownerId = await makeOwner(prisma);`
			`const t1 = await prisma.inferenceTask.create({`
			`data: { poolName: 'pool-a', llmName: 'a-1', model: 'm', requestBody: {}, ownerId },`
			`});`
			`await new Promise((r) => setTimeout(r, 5));`
			`const t2 = await prisma.inferenceTask.create({`
			`data: { poolName: 'pool-a', llmName: 'a-2', model: 'm', requestBody: {}, ownerId },`
			`});`
			`await prisma.inferenceTask.create({`
			`data: { poolName: 'pool-b', llmName: 'b-1', model: 'm', requestBody: {}, ownerId },`
			`});`
			`// One row in pool-a is no longer pending — must be excluded.`
			`await prisma.inferenceTask.create({`
			`data: { poolName: 'pool-a', llmName: 'a-3', model: 'm', requestBody: {}, ownerId, status: 'completed' },`
			`});`

			`const drained = await prisma.inferenceTask.findMany({`
			`where: { status: 'pending', poolName: { in: ['pool-a', 'pool-b'] } },`
			`orderBy: { createdAt: 'asc' },`
			`});`
			`expect(drained.map((r) => r.id)).toEqual([t1.id, t2.id, drained[2]!.id]);`
			`expect(drained.map((r) => r.poolName)).toEqual(['pool-a', 'pool-a', 'pool-b']);`
			`});`

			`it('claimedBy index supports unbindSession revert (worker disconnect path)', async () => {`
			`const ownerId = await makeOwner(prisma);`
			`await prisma.inferenceTask.create({`
			`data: { poolName: 'p', llmName: 'l', model: 'm', requestBody: {}, ownerId, status: 'claimed', claimedBy: 'sess-A' },`
			`});`
			`await prisma.inferenceTask.create({`
			`data: { poolName: 'p', llmName: 'l', model: 'm', requestBody: {}, ownerId, status: 'running', claimedBy: 'sess-A' },`
			`});`
			`await prisma.inferenceTask.create({`
			`data: { poolName: 'p', llmName: 'l', model: 'm', requestBody: {}, ownerId, status: 'claimed', claimedBy: 'sess-B' },`
			`});`
			`// Completed-but-claimedBy=sess-A row: must NOT revert (terminal state).`
			`await prisma.inferenceTask.create({`
			`data: { poolName: 'p', llmName: 'l', model: 'm', requestBody: {}, ownerId, status: 'completed', claimedBy: 'sess-A' },`
			`});`

			`const heldByA = await prisma.inferenceTask.findMany({`
			`where: { claimedBy: 'sess-A', status: { in: ['claimed', 'running'] } },`
			`});`
			`expect(heldByA).toHaveLength(2);`
			`});`

			`it('GC predicate (terminal + completedAt < cutoff) is index-friendly and filters correctly', async () => {`
			`const ownerId = await makeOwner(prisma);`
			`const old = new Date(Date.now() - 8 * 24 * 60 * 60 * 1000); // 8 d ago`
			`const recent = new Date(Date.now() - 1 * 60 * 60 * 1000); // 1 h ago`
			`await prisma.inferenceTask.create({`
			`data: { poolName: 'p', llmName: 'l', model: 'm', requestBody: {}, ownerId, status: 'completed', completedAt: old },`
			`});`
			`await prisma.inferenceTask.create({`
			`data: { poolName: 'p', llmName: 'l', model: 'm', requestBody: {}, ownerId, status: 'error', completedAt: old, errorMessage: 'boom' },`
			`});`
			`// Inside retention — must not be picked up by GC.`
			`await prisma.inferenceTask.create({`
			`data: { poolName: 'p', llmName: 'l', model: 'm', requestBody: {}, ownerId, status: 'completed', completedAt: recent },`
			`});`
			`// Pending row — must not be picked up by terminal GC.`
			`await prisma.inferenceTask.create({`
			`data: { poolName: 'p', llmName: 'l', model: 'm', requestBody: {}, ownerId, status: 'pending' },`
			`});`

			`const cutoff = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000);`
			`const expired = await prisma.inferenceTask.findMany({`
			`where: {`
			`status: { in: ['completed', 'error', 'cancelled'] },`
			`completedAt: { lt: cutoff },`
			`},`
			`});`
			`expect(expired).toHaveLength(2);`
			`});`

			`it('agentId is nullable — direct chat-llm tasks have no agent', async () => {`
			`const ownerId = await makeOwner(prisma);`
			`const row = await prisma.inferenceTask.create({`
			`data: { poolName: 'p', llmName: 'l', model: 'm', requestBody: {}, ownerId, agentId: null },`
			`});`
			`expect(row.agentId).toBeNull();`
			`});`
			`});`