feat(mcpd+cli+mcplocal): /llms/<name>/members + POOL column + --pool-name (v4 Stage 2)

Surfaces the v4 pool model end-to-end:

- mcpd: GET /api/v1/llms/:name/members returns the effective pool the
  named anchor belongs to, plus aggregate stats (size, activeCount,
  explicit vs implicit pool key). RBAC inherits from `view:llms` —
  same as the single-Llm route. Members are full LlmView shapes so
  callers don't need a second roundtrip to render the pool block.

- mcpd: VirtualLlmService.register accepts an optional `poolName` on
  RegisterProviderInput; the route's `coerceProviderInput` validates
  the same character set as CreateLlmSchema.poolName. Backwards
  compatible — older mcplocals that don't send the field continue to
  publish solo Llms.

- CLI `get llm` table: new POOL column right after NAME. Solo rows
  show "-" so the "no pool / pool of 1" case is unambiguous (per
  user direction "make sure we see it, prominently visible and
  impossible to mistake").

- CLI `describe llm`: fetches /members and renders a Pool block at
  the top of the detail view when the row is in an explicit pool OR
  when its implicit pool has size > 1. Each member line shows
  kind/status; the anchor row gets "← this row". Block is suppressed
  for solo rows so describe stays compact in the common case.

- CLI `create llm --pool-name <name>` flag and apply schema both
  accept the new field. Yaml round-trip preserves it: get -o yaml
  emits `poolName: <name>`, apply -f re-imports it without diff.
  Verified end-to-end against the live mcpd.

- mcplocal: LlmProviderFileEntry gains optional `poolName`; main.ts
  and registrar.ts thread it through into the register payload. Use
  case for distributed inference: each user's mcplocal picks a
  unique `name` (e.g. `vllm-<host>-qwen3`) but a shared `poolName`
  (e.g. `user-vllm-qwen3-thinking`); agents see one logical pool
  that auto-grows as workers come online.

- Shell completions: regenerated from source via the existing
  scripts/generate-completions.ts. `--pool-name` now suggests in
  fish + bash for `mcpctl create llm`.

Tests: +3 new mcpd route tests for /members (explicit pool, solo
pool of 1, missing-anchor 404). All suites green:
  mcpd 868/868 (was 865, +3),
  mcplocal 723/723,
  cli 437/437.

Stage 3 (next): live smoke against 2 publishers sharing a pool name +
docs.
This commit is contained in:
Michal
2026-04-27 23:18:53 +01:00
parent 7949e1393d
commit e21f96080d
14 changed files with 213 additions and 6 deletions

View File

@@ -61,6 +61,10 @@ const LlmSpecSchema = z.object({
key: z.string().min(1),
}).nullable().optional(),
extraConfig: z.record(z.unknown()).default({}),
// v4: optional pool key. Same validation as on the mcpd side
// (CreateLlmSchema). Null means "solo Llm, effective pool key falls
// back to the row's own name".
poolName: z.string().min(1).max(100).regex(/^[a-z0-9-]+$/).nullable().optional(),
});
const AgentChatParamsAppliedSchema = z.object({

View File

@@ -263,6 +263,7 @@ export function createCreateCommand(deps: CreateCommandDeps): Command {
.option('--description <text>', 'Description')
.option('--api-key-ref <ref>', 'API key reference in SECRET/KEY form (e.g. anthropic-key/token)')
.option('--extra <entry>', 'Extra config key=value (repeat)', collect, [])
.option('--pool-name <pool>', 'Stack with other Llms sharing this pool name; agents pinned to any member dispatch across the pool')
.option('--force', 'Update if already exists')
.option('--skip-auth-check', 'Skip the upstream auth probe (for offline registration before infra exists)')
.action(async (name: string, opts) => {
@@ -274,6 +275,7 @@ export function createCreateCommand(deps: CreateCommandDeps): Command {
};
if (opts.url) body.url = opts.url;
if (opts.description !== undefined) body.description = opts.description;
if (opts.poolName !== undefined) body.poolName = opts.poolName;
if (opts.apiKeyRef) {
const slashIdx = (opts.apiKeyRef as string).indexOf('/');
if (slashIdx < 1) throw new Error(`Invalid --api-key-ref '${opts.apiKeyRef as string}'. Expected SECRET_NAME/KEY_NAME`);

View File

@@ -243,7 +243,15 @@ function formatSecretDetail(secret: Record<string, unknown>, showValues: boolean
return lines.join('\n');
}
function formatLlmDetail(llm: Record<string, unknown>): string {
interface PoolMembersInfo {
poolName: string;
explicitPoolName: string | null;
size: number;
activeCount: number;
members: Array<{ id?: string; name: string; status?: string; kind?: string; url?: string }>;
}
function formatLlmDetail(llm: Record<string, unknown>, pool?: PoolMembersInfo): string {
const lines: string[] = [];
lines.push(`=== LLM: ${llm.name} ===`);
lines.push(`${pad('Name:')}${llm.name}`);
@@ -253,6 +261,29 @@ function formatLlmDetail(llm: Record<string, unknown>): string {
if (llm.url) lines.push(`${pad('URL:')}${llm.url}`);
if (llm.description) lines.push(`${pad('Description:')}${llm.description}`);
// v4 Pool block: only render when there's actually pool context to show.
// For solo Llms (poolName null AND pool size 1), suppress the section so
// describe stays compact for the common case. For explicit-pool members
// OR rows whose name is implicitly seeding a pool (size > 1), render up
// top so it's the first thing the operator sees — pool routing is a
// significant behavioral fact.
const poolNameVal = llm.poolName as string | null | undefined;
const isExplicitPool = poolNameVal !== null && poolNameVal !== undefined && poolNameVal !== '';
const isImplicitPool = pool !== undefined && pool.size > 1;
if (isExplicitPool || isImplicitPool) {
lines.push('');
lines.push('Pool:');
const effective = pool?.poolName ?? (poolNameVal ?? llm.name as string);
lines.push(` ${pad('Pool name:', 14)}${effective}${isExplicitPool ? '' : ' (implicit, falls back to name)'}`);
if (pool !== undefined) {
lines.push(` ${pad('Members:', 14)}${String(pool.size)} (${String(pool.activeCount)} active)`);
for (const m of pool.members) {
const youSuffix = m.name === llm.name ? ' ← this row' : '';
lines.push(` - ${m.name} [${m.kind ?? '?'}/${m.status ?? '?'}]${youSuffix}`);
}
}
}
const ref = llm.apiKeyRef as { name: string; key: string } | null | undefined;
lines.push('');
lines.push('API Key:');
@@ -982,9 +1013,22 @@ export function createDescribeCommand(deps: DescribeCommandDeps): Command {
case 'secretbackends':
deps.log(formatSecretBackendDetail(item));
break;
case 'llms':
deps.log(formatLlmDetail(item));
case 'llms': {
// v4: also fetch pool membership so the describe Pool block
// can show siblings + active counts. Best-effort — older
// mcpd versions without the /members route 404 here, in
// which case we render the row alone.
let poolInfo: PoolMembersInfo | undefined;
try {
poolInfo = await deps.client.get<PoolMembersInfo>(
`/api/v1/llms/${encodeURIComponent(item.name as string)}/members`,
);
} catch {
// Old mcpd without /members, or RBAC denial — fall back silently.
}
deps.log(formatLlmDetail(item, poolInfo));
break;
}
case 'projects': {
const [projectPrompts, llms] = await Promise.all([
deps.client

View File

@@ -136,10 +136,16 @@ interface LlmRow {
// mcpd responses that predate the kind/status columns).
kind?: 'public' | 'virtual';
status?: 'active' | 'inactive' | 'hibernating';
// v4: explicit pool key. NULL = solo Llm (effective pool = its own name).
poolName?: string | null;
}
// v4: POOL column placed right after NAME so an operator can't miss
// which Llms stack into the same dispatcher pool. Solo rows show "-"
// to make the "no pool / pool of 1" case unambiguous.
const llmColumns: Column<LlmRow>[] = [
{ header: 'NAME', key: 'name' },
{ header: 'POOL', key: (r) => (r.poolName !== null && r.poolName !== undefined && r.poolName !== '') ? r.poolName : '-', width: 18 },
{ header: 'KIND', key: (r) => r.kind ?? 'public', width: 8 },
{ header: 'STATUS', key: (r) => r.status ?? 'active', width: 12 },
{ header: 'TYPE', key: 'type', width: 12 },

View File

@@ -1,6 +1,6 @@
import type { FastifyInstance } from 'fastify';
import type { LlmService } from '../services/llm.service.js';
import { LlmAuthVerificationError } from '../services/llm.service.js';
import type { LlmService, LlmView } from '../services/llm.service.js';
import { LlmAuthVerificationError, effectivePoolName } from '../services/llm.service.js';
import { NotFoundError, ConflictError } from '../services/mcp-server.service.js';
export function registerLlmRoutes(
@@ -86,6 +86,44 @@ export function registerLlmRoutes(
throw err;
}
});
// v4: list all members of the effective pool that the named Llm belongs to.
// The path uses an explicit `/members` suffix so it can't collide with the
// single-Llm `/api/v1/llms/:id` route — an Llm whose name happens to be
// "members" would otherwise be unaddressable.
// Returns [] members[] AND a small header object so callers don't need a
// second roundtrip to compute pool stats; agents.ts and the CLI both want
// size + activeCount.
app.get<{ Params: { name: string } }>('/api/v1/llms/:name/members', async (request, reply) => {
try {
const anchor = await getByIdOrName(service, request.params.name);
const members = await service.listPoolMembers(effectivePoolName(anchor));
return {
poolName: effectivePoolName(anchor),
explicitPoolName: anchor.poolName,
size: members.length,
activeCount: members.filter((m) => m.status === 'active').length,
members,
};
} catch (err) {
if (err instanceof NotFoundError) {
reply.code(404);
return { error: err.message };
}
throw err;
}
});
}
/** v4: convenience type for the new `/members` endpoint response. */
export interface PoolMembersResponse {
/** Effective pool key (poolName ?? name on the anchor row). */
poolName: string;
/** Anchor row's literal poolName field — null when it falls back to its own name. */
explicitPoolName: string | null;
size: number;
activeCount: number;
members: LlmView[];
}
const CUID_RE = /^c[a-z0-9]{24}/i;

View File

@@ -201,6 +201,7 @@ function coerceProviderInput(raw: unknown): {
description?: string;
extraConfig?: Record<string, unknown>;
initialStatus?: 'active' | 'hibernating';
poolName?: string;
} {
if (raw === null || typeof raw !== 'object') {
throw Object.assign(new Error('provider entry must be an object'), { statusCode: 400 });
@@ -227,5 +228,11 @@ function coerceProviderInput(raw: unknown): {
if (o['initialStatus'] === 'active' || o['initialStatus'] === 'hibernating') {
out.initialStatus = o['initialStatus'];
}
// v4: optional pool key. Validation matches CreateLlmSchema.poolName so
// a publisher can't slip an uppercase or whitespace name past the
// virtual-publish path that the public CRUD path would reject.
if (typeof o['poolName'] === 'string' && /^[a-z0-9-]+$/.test(o['poolName']) && o['poolName'].length >= 1 && o['poolName'].length <= 100) {
out.poolName = o['poolName'];
}
return out;
}

View File

@@ -115,6 +115,17 @@ export class LlmService {
return this.repo.findByPoolName(poolName);
}
/**
* v4: API/CLI-facing version of `findByPoolName` that returns full
* `LlmView`s (apiKeyRef resolved via SecretService). Used by the
* `GET /api/v1/llms/:name/members` route and `mcpctl describe llm` —
* both of which want the full row shape for display.
*/
async listPoolMembers(poolName: string): Promise<LlmView[]> {
const rows = await this.repo.findByPoolName(poolName);
return Promise.all(rows.map((r) => this.toView(r)));
}
async create(input: unknown, opts: { skipAuthCheck?: boolean } = {}): Promise<LlmView> {
const data = CreateLlmSchema.parse(input);
const existing = await this.repo.findByName(data.name);

View File

@@ -47,6 +47,15 @@ export interface RegisterProviderInput {
* publish time.
*/
initialStatus?: 'active' | 'hibernating';
/**
* v4: optional pool key. When set, this virtual Llm row stacks with
* any other Llms (public OR virtual from any session) sharing the
* same value. The chat dispatcher then load-balances across all
* healthy members. Cluster-wide name uniqueness still applies — the
* publisher picks a unique `name` (e.g. `vllm-<host>-qwen3`) and
* shares the `poolName` with siblings.
*/
poolName?: string;
}
export interface RegisterResult {
@@ -147,6 +156,7 @@ export class VirtualLlmService implements IVirtualLlmService {
tier: p.tier ?? 'fast',
description: p.description ?? '',
...(p.extraConfig !== undefined ? { extraConfig: p.extraConfig } : {}),
...(p.poolName !== undefined ? { poolName: p.poolName } : {}),
kind: 'virtual',
providerSessionId: sessionId,
status: initialStatus,
@@ -180,6 +190,7 @@ export class VirtualLlmService implements IVirtualLlmService {
...(p.tier !== undefined ? { tier: p.tier } : {}),
...(p.description !== undefined ? { description: p.description } : {}),
...(p.extraConfig !== undefined ? { extraConfig: p.extraConfig } : {}),
...(p.poolName !== undefined ? { poolName: p.poolName } : {}),
kind: 'virtual',
providerSessionId: sessionId,
status: initialStatus,

View File

@@ -21,6 +21,12 @@ function makeLlm(overrides: Partial<Llm> = {}): Llm {
apiKeySecretId: null,
apiKeySecretKey: null,
extraConfig: {},
poolName: null,
kind: 'public',
providerSessionId: null,
lastHeartbeatAt: null,
status: 'active',
inactiveSince: null,
version: 1,
createdAt: new Date(),
updatedAt: new Date(),
@@ -38,6 +44,17 @@ function mockRepo(initial: Llm[] = []): ILlmRepository {
return null;
}),
findByTier: vi.fn(async () => []),
findByPoolName: vi.fn(async (poolName: string) => {
const out: Llm[] = [];
for (const r of rows.values()) {
if (r.poolName === poolName) out.push(r);
else if (r.poolName === null && r.name === poolName) out.push(r);
}
return out;
}),
findBySessionId: vi.fn(async () => []),
findStaleVirtuals: vi.fn(async () => []),
findExpiredInactives: vi.fn(async () => []),
create: vi.fn(async (data) => {
const row = makeLlm({ id: 'new-id', name: data.name, type: data.type, model: data.model });
rows.set(row.id, row);
@@ -191,4 +208,50 @@ describe('Llm Routes', () => {
const res = await app.inject({ method: 'DELETE', url: '/api/v1/llms/missing' });
expect(res.statusCode).toBe(404);
});
// ── v4: GET /api/v1/llms/:name/members ──
it('GET /api/v1/llms/:name/members returns all members of an explicit pool', async () => {
await createApp(mockRepo([
makeLlm({ id: 'l1', name: 'qwen-prod-1', poolName: 'qwen-pool', model: 'qwen3' }),
makeLlm({ id: 'l2', name: 'qwen-prod-2', poolName: 'qwen-pool', model: 'qwen3' }),
makeLlm({ id: 'l3', name: 'qwen-prod-3', poolName: 'qwen-pool', model: 'qwen3', status: 'inactive' }),
makeLlm({ id: 'other', name: 'gpt-4o', poolName: null, model: 'gpt-4o' }),
]));
// Hit via any pool member's name — the route resolves the anchor's
// effective pool key and lists all matching rows.
const res = await app.inject({ method: 'GET', url: '/api/v1/llms/qwen-prod-1/members' });
expect(res.statusCode).toBe(200);
const body = res.json<{
poolName: string;
explicitPoolName: string | null;
size: number;
activeCount: number;
members: Array<{ name: string }>;
}>();
expect(body.poolName).toBe('qwen-pool');
expect(body.explicitPoolName).toBe('qwen-pool');
expect(body.size).toBe(3);
expect(body.activeCount).toBe(2);
expect(body.members.map((m) => m.name).sort()).toEqual(['qwen-prod-1', 'qwen-prod-2', 'qwen-prod-3']);
});
it('GET /api/v1/llms/:name/members for a solo Llm returns a pool of 1', async () => {
await createApp(mockRepo([
makeLlm({ id: 'solo', name: 'gpt-4o', poolName: null, model: 'gpt-4o' }),
]));
const res = await app.inject({ method: 'GET', url: '/api/v1/llms/gpt-4o/members' });
expect(res.statusCode).toBe(200);
const body = res.json<{ poolName: string; explicitPoolName: string | null; size: number; activeCount: number }>();
expect(body.poolName).toBe('gpt-4o');
expect(body.explicitPoolName).toBeNull();
expect(body.size).toBe(1);
expect(body.activeCount).toBe(1);
});
it('GET /api/v1/llms/:name/members returns 404 when the anchor name does not exist', async () => {
await createApp(mockRepo());
const res = await app.inject({ method: 'GET', url: '/api/v1/llms/nope/members' });
expect(res.statusCode).toBe(404);
});
});

View File

@@ -93,6 +93,18 @@ export interface LlmProviderFileEntry {
* - `command`: spawn a shell command (e.g. `systemctl --user start vllm`)
*/
wake?: WakeRecipe;
/**
* v4: opt this provider into a load-balanced pool. When set, the
* published Llm row carries `poolName` and stacks with any other Llms
* (public OR virtual) sharing the same value. Agents pinned to any
* pool member dispatch across all healthy members at chat time.
*
* Convention for distributed-compute setups: each user's mcplocal
* picks a unique `name` (e.g. `vllm-<hostname>-qwen3`) but a shared
* `poolName` (e.g. `user-vllm-qwen3-thinking`). Result: agents see one
* logical pool that auto-grows as more workers come online.
*/
poolName?: string;
}
export type WakeRecipe =

View File

@@ -218,6 +218,7 @@ async function maybeStartVirtualLlmRegistrar(
};
if (entry.tier !== undefined) item.tier = entry.tier;
if (entry.wake !== undefined) item.wake = entry.wake;
if (entry.poolName !== undefined) item.poolName = entry.poolName;
published.push(item);
}
// v3: forward locally-declared agents alongside the providers. We

View File

@@ -54,6 +54,12 @@ export interface RegistrarPublishedProvider {
* the registrar runs this recipe and waits for the backend to come up.
*/
wake?: WakeRecipe;
/**
* v4: optional pool key. When set, the published Llm row carries
* `poolName` and stacks with any other Llms sharing the same value.
* Agents pinned to any pool member dispatch across all healthy members.
*/
poolName?: string;
}
/**
@@ -185,6 +191,7 @@ export class VirtualLlmRegistrar {
model: p.model,
...(p.tier !== undefined ? { tier: p.tier } : {}),
...(p.description !== undefined ? { description: p.description } : {}),
...(p.poolName !== undefined ? { poolName: p.poolName } : {}),
initialStatus,
};
}));