feat(mcpd+db): Llm.poolName + chat dispatcher pool failover (v4 Stage 1)

Adds LB-pool-by-shared-name without introducing a new resource. The
existing `Llm.name` stays globally unique; a new optional `poolName`
column declares membership in a pool. Multiple Llms sharing a non-null
`poolName` stack into one load-balanced pool that the chat dispatcher
expands at request time.

Effective pool key = `poolName ?? name`. Solo rows (poolName=null) are
addressable as a "pool of 1" via their own name, so existing single-Llm
agents and YAMLs keep working unchanged. A solo row whose name happens
to match an explicit poolName joins the same pool — by design — so an
operator can transparently promote an existing Llm to pool seed.

Dispatcher (chat.service): prepareContext now resolves a randomly-
shuffled list of viable pool candidates (status != inactive) once per
turn. runOneInference and streamInference iterate the list on
transport-level failure (network, virtual publisher disconnect) until
one succeeds or the list is exhausted. Streaming failover only covers
"failed before first chunk" — once we've yielded text, we're committed
to that backend. Auth/4xx errors surfaced as result.status are NOT
retried; siblings with the same key/model would fail identically.

When the agent's pinned Llm is itself inactive but a sibling pool
member is up, dispatch transparently uses the sibling — that's the
whole point. When every member is inactive, prepareContext throws a
clear "No active Llm in pool '<key>' (pinned: <name>)" error rather
than letting the dispatcher's "exhausted" branch surface it.

Tests:
- 5 new chat-service tests for pool dispatch / failover / pinned-down /
  all-inactive (chat-service.test.ts).
- 7 new db schema tests for the column, the unique-name invariant, the
  fallback-to-name semantics, and the solo-name-joins-explicit-pool
  edge case (llm-pool-schema.test.ts).
- mcpd 865/865 (was 860; +5), db pool-schema 7/7, no regressions.

Stage 2 (next): HTTP route /api/v1/llms/<name>/members + aggregate pool
stats on the existing single-Llm route, CLI POOL column + describe
block + --pool-name flag, yaml round-trip.
This commit is contained in:
Michal
2026-04-27 22:02:41 +01:00
parent f5bdeea8e7
commit 7949e1393d
9 changed files with 586 additions and 71 deletions

View File

@@ -0,0 +1,129 @@
/**
* v4 schema-level tests for `Llm.poolName` and the dispatcher's
* `findByPoolName` query semantics. Lives in the db package because it
* exercises the actual Prisma column + index — the mcpd-side unit tests
* already cover the dispatcher's behavior with a mocked LlmService.
*/
import { describe, it, expect, beforeAll, afterAll, beforeEach } from 'vitest';
import type { PrismaClient } from '@prisma/client';
import { setupTestDb, cleanupTestDb, clearAllTables } from './helpers.js';
/** Re-implementation of the LlmRepository query for direct schema verification. */
function findByPoolName(prisma: PrismaClient, poolName: string) {
return prisma.llm.findMany({
where: {
OR: [
{ poolName },
{ AND: [{ poolName: null }, { name: poolName }] },
],
},
orderBy: { name: 'asc' },
});
}
describe('Llm.poolName (v4)', () => {
let prisma: PrismaClient;
beforeAll(async () => {
prisma = await setupTestDb();
}, 30_000);
afterAll(async () => {
await cleanupTestDb();
});
beforeEach(async () => {
await clearAllTables(prisma);
});
it('defaults poolName to NULL for freshly inserted rows', async () => {
const llm = await prisma.llm.create({
data: { name: 'plain', type: 'openai', model: 'gpt-4o' },
});
expect(llm.poolName).toBeNull();
});
it('allows multiple rows to share a poolName (the v4 stacking behavior)', async () => {
await prisma.llm.create({
data: { name: 'qwen-prod-1', type: 'openai', model: 'qwen3-thinking', poolName: 'qwen-pool' },
});
await prisma.llm.create({
data: { name: 'qwen-prod-2', type: 'openai', model: 'qwen3-thinking', poolName: 'qwen-pool' },
});
await prisma.llm.create({
data: { name: 'qwen-prod-3', type: 'openai', model: 'qwen3-thinking', poolName: 'qwen-pool' },
});
const members = await findByPoolName(prisma, 'qwen-pool');
expect(members.map((m) => m.name).sort()).toEqual(['qwen-prod-1', 'qwen-prod-2', 'qwen-prod-3']);
});
it('keeps `name` globally unique even when multiple rows share a poolName', async () => {
await prisma.llm.create({
data: { name: 'qwen-prod-1', type: 'openai', model: 'qwen3-thinking', poolName: 'qwen-pool' },
});
await expect(
prisma.llm.create({
data: { name: 'qwen-prod-1', type: 'openai', model: 'qwen3-thinking', poolName: 'qwen-pool' },
}),
).rejects.toThrow();
});
it('falls back to `name` as the effective pool key for solo rows (poolName=NULL)', async () => {
// Solo row addressable via its own name as a "pool of 1".
await prisma.llm.create({
data: { name: 'gpt-4o', type: 'openai', model: 'gpt-4o' },
});
const members = await findByPoolName(prisma, 'gpt-4o');
expect(members.map((m) => m.name)).toEqual(['gpt-4o']);
});
it('a solo row with name=X joins the same pool as explicit poolName=X members', async () => {
// Edge case: an existing solo Llm named "qwen-pool" pre-dates pool
// adoption, then a publisher registers with poolName=qwen-pool. Both
// should appear in the dispatcher's candidate list — the effective
// pool key (poolName ?? name) is "qwen-pool" for each.
await prisma.llm.create({
data: { name: 'qwen-pool', type: 'openai', model: 'qwen3-thinking' },
});
await prisma.llm.create({
data: { name: 'qwen-prod-2', type: 'openai', model: 'qwen3-thinking', poolName: 'qwen-pool' },
});
const members = await findByPoolName(prisma, 'qwen-pool');
expect(members.map((m) => m.name).sort()).toEqual(['qwen-pool', 'qwen-prod-2']);
});
it('does not match a solo row by `name` when its poolName is set to something else', async () => {
// Solo with name=foo but poolName=bar should NOT match findByPoolName('foo')
// — the explicit poolName takes precedence over the name fallback.
await prisma.llm.create({
data: { name: 'foo', type: 'openai', model: 'm', poolName: 'bar' },
});
const members = await findByPoolName(prisma, 'foo');
expect(members.map((m) => m.name)).toEqual([]);
const inBar = await findByPoolName(prisma, 'bar');
expect(inBar.map((m) => m.name)).toEqual(['foo']);
});
it('updates poolName via update() and round-trips correctly', async () => {
const llm = await prisma.llm.create({
data: { name: 'qwen-prod-1', type: 'openai', model: 'qwen3-thinking' },
});
expect(llm.poolName).toBeNull();
const updated = await prisma.llm.update({
where: { id: llm.id },
data: { poolName: 'qwen-pool' },
});
expect(updated.poolName).toBe('qwen-pool');
// Revert to solo (NULL).
const reverted = await prisma.llm.update({
where: { id: llm.id },
data: { poolName: null },
});
expect(reverted.poolName).toBeNull();
});
});