feat(mcpd+cli+mcplocal): /llms/<name>/members + POOL column + --pool-name (v4 Stage 2)
Surfaces the v4 pool model end-to-end: - mcpd: GET /api/v1/llms/:name/members returns the effective pool the named anchor belongs to, plus aggregate stats (size, activeCount, explicit vs implicit pool key). RBAC inherits from `view:llms` — same as the single-Llm route. Members are full LlmView shapes so callers don't need a second roundtrip to render the pool block. - mcpd: VirtualLlmService.register accepts an optional `poolName` on RegisterProviderInput; the route's `coerceProviderInput` validates the same character set as CreateLlmSchema.poolName. Backwards compatible — older mcplocals that don't send the field continue to publish solo Llms. - CLI `get llm` table: new POOL column right after NAME. Solo rows show "-" so the "no pool / pool of 1" case is unambiguous (per user direction "make sure we see it, prominently visible and impossible to mistake"). - CLI `describe llm`: fetches /members and renders a Pool block at the top of the detail view when the row is in an explicit pool OR when its implicit pool has size > 1. Each member line shows kind/status; the anchor row gets "← this row". Block is suppressed for solo rows so describe stays compact in the common case. - CLI `create llm --pool-name <name>` flag and apply schema both accept the new field. Yaml round-trip preserves it: get -o yaml emits `poolName: <name>`, apply -f re-imports it without diff. Verified end-to-end against the live mcpd. - mcplocal: LlmProviderFileEntry gains optional `poolName`; main.ts and registrar.ts thread it through into the register payload. Use case for distributed inference: each user's mcplocal picks a unique `name` (e.g. `vllm-<host>-qwen3`) but a shared `poolName` (e.g. `user-vllm-qwen3-thinking`); agents see one logical pool that auto-grows as workers come online. - Shell completions: regenerated from source via the existing scripts/generate-completions.ts. `--pool-name` now suggests in fish + bash for `mcpctl create llm`. Tests: +3 new mcpd route tests for /members (explicit pool, solo pool of 1, missing-anchor 404). All suites green: mcpd 868/868 (was 865, +3), mcplocal 723/723, cli 437/437. Stage 3 (next): live smoke against 2 publishers sharing a pool name + docs.
This commit is contained in:
@@ -61,6 +61,10 @@ const LlmSpecSchema = z.object({
|
||||
key: z.string().min(1),
|
||||
}).nullable().optional(),
|
||||
extraConfig: z.record(z.unknown()).default({}),
|
||||
// v4: optional pool key. Same validation as on the mcpd side
|
||||
// (CreateLlmSchema). Null means "solo Llm, effective pool key falls
|
||||
// back to the row's own name".
|
||||
poolName: z.string().min(1).max(100).regex(/^[a-z0-9-]+$/).nullable().optional(),
|
||||
});
|
||||
|
||||
const AgentChatParamsAppliedSchema = z.object({
|
||||
|
||||
@@ -263,6 +263,7 @@ export function createCreateCommand(deps: CreateCommandDeps): Command {
|
||||
.option('--description <text>', 'Description')
|
||||
.option('--api-key-ref <ref>', 'API key reference in SECRET/KEY form (e.g. anthropic-key/token)')
|
||||
.option('--extra <entry>', 'Extra config key=value (repeat)', collect, [])
|
||||
.option('--pool-name <pool>', 'Stack with other Llms sharing this pool name; agents pinned to any member dispatch across the pool')
|
||||
.option('--force', 'Update if already exists')
|
||||
.option('--skip-auth-check', 'Skip the upstream auth probe (for offline registration before infra exists)')
|
||||
.action(async (name: string, opts) => {
|
||||
@@ -274,6 +275,7 @@ export function createCreateCommand(deps: CreateCommandDeps): Command {
|
||||
};
|
||||
if (opts.url) body.url = opts.url;
|
||||
if (opts.description !== undefined) body.description = opts.description;
|
||||
if (opts.poolName !== undefined) body.poolName = opts.poolName;
|
||||
if (opts.apiKeyRef) {
|
||||
const slashIdx = (opts.apiKeyRef as string).indexOf('/');
|
||||
if (slashIdx < 1) throw new Error(`Invalid --api-key-ref '${opts.apiKeyRef as string}'. Expected SECRET_NAME/KEY_NAME`);
|
||||
|
||||
@@ -243,7 +243,15 @@ function formatSecretDetail(secret: Record<string, unknown>, showValues: boolean
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
function formatLlmDetail(llm: Record<string, unknown>): string {
|
||||
interface PoolMembersInfo {
|
||||
poolName: string;
|
||||
explicitPoolName: string | null;
|
||||
size: number;
|
||||
activeCount: number;
|
||||
members: Array<{ id?: string; name: string; status?: string; kind?: string; url?: string }>;
|
||||
}
|
||||
|
||||
function formatLlmDetail(llm: Record<string, unknown>, pool?: PoolMembersInfo): string {
|
||||
const lines: string[] = [];
|
||||
lines.push(`=== LLM: ${llm.name} ===`);
|
||||
lines.push(`${pad('Name:')}${llm.name}`);
|
||||
@@ -253,6 +261,29 @@ function formatLlmDetail(llm: Record<string, unknown>): string {
|
||||
if (llm.url) lines.push(`${pad('URL:')}${llm.url}`);
|
||||
if (llm.description) lines.push(`${pad('Description:')}${llm.description}`);
|
||||
|
||||
// v4 Pool block: only render when there's actually pool context to show.
|
||||
// For solo Llms (poolName null AND pool size 1), suppress the section so
|
||||
// describe stays compact for the common case. For explicit-pool members
|
||||
// OR rows whose name is implicitly seeding a pool (size > 1), render up
|
||||
// top so it's the first thing the operator sees — pool routing is a
|
||||
// significant behavioral fact.
|
||||
const poolNameVal = llm.poolName as string | null | undefined;
|
||||
const isExplicitPool = poolNameVal !== null && poolNameVal !== undefined && poolNameVal !== '';
|
||||
const isImplicitPool = pool !== undefined && pool.size > 1;
|
||||
if (isExplicitPool || isImplicitPool) {
|
||||
lines.push('');
|
||||
lines.push('Pool:');
|
||||
const effective = pool?.poolName ?? (poolNameVal ?? llm.name as string);
|
||||
lines.push(` ${pad('Pool name:', 14)}${effective}${isExplicitPool ? '' : ' (implicit, falls back to name)'}`);
|
||||
if (pool !== undefined) {
|
||||
lines.push(` ${pad('Members:', 14)}${String(pool.size)} (${String(pool.activeCount)} active)`);
|
||||
for (const m of pool.members) {
|
||||
const youSuffix = m.name === llm.name ? ' ← this row' : '';
|
||||
lines.push(` - ${m.name} [${m.kind ?? '?'}/${m.status ?? '?'}]${youSuffix}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const ref = llm.apiKeyRef as { name: string; key: string } | null | undefined;
|
||||
lines.push('');
|
||||
lines.push('API Key:');
|
||||
@@ -982,9 +1013,22 @@ export function createDescribeCommand(deps: DescribeCommandDeps): Command {
|
||||
case 'secretbackends':
|
||||
deps.log(formatSecretBackendDetail(item));
|
||||
break;
|
||||
case 'llms':
|
||||
deps.log(formatLlmDetail(item));
|
||||
case 'llms': {
|
||||
// v4: also fetch pool membership so the describe Pool block
|
||||
// can show siblings + active counts. Best-effort — older
|
||||
// mcpd versions without the /members route 404 here, in
|
||||
// which case we render the row alone.
|
||||
let poolInfo: PoolMembersInfo | undefined;
|
||||
try {
|
||||
poolInfo = await deps.client.get<PoolMembersInfo>(
|
||||
`/api/v1/llms/${encodeURIComponent(item.name as string)}/members`,
|
||||
);
|
||||
} catch {
|
||||
// Old mcpd without /members, or RBAC denial — fall back silently.
|
||||
}
|
||||
deps.log(formatLlmDetail(item, poolInfo));
|
||||
break;
|
||||
}
|
||||
case 'projects': {
|
||||
const [projectPrompts, llms] = await Promise.all([
|
||||
deps.client
|
||||
|
||||
@@ -136,10 +136,16 @@ interface LlmRow {
|
||||
// mcpd responses that predate the kind/status columns).
|
||||
kind?: 'public' | 'virtual';
|
||||
status?: 'active' | 'inactive' | 'hibernating';
|
||||
// v4: explicit pool key. NULL = solo Llm (effective pool = its own name).
|
||||
poolName?: string | null;
|
||||
}
|
||||
|
||||
// v4: POOL column placed right after NAME so an operator can't miss
|
||||
// which Llms stack into the same dispatcher pool. Solo rows show "-"
|
||||
// to make the "no pool / pool of 1" case unambiguous.
|
||||
const llmColumns: Column<LlmRow>[] = [
|
||||
{ header: 'NAME', key: 'name' },
|
||||
{ header: 'POOL', key: (r) => (r.poolName !== null && r.poolName !== undefined && r.poolName !== '') ? r.poolName : '-', width: 18 },
|
||||
{ header: 'KIND', key: (r) => r.kind ?? 'public', width: 8 },
|
||||
{ header: 'STATUS', key: (r) => r.status ?? 'active', width: 12 },
|
||||
{ header: 'TYPE', key: 'type', width: 12 },
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import type { FastifyInstance } from 'fastify';
|
||||
import type { LlmService } from '../services/llm.service.js';
|
||||
import { LlmAuthVerificationError } from '../services/llm.service.js';
|
||||
import type { LlmService, LlmView } from '../services/llm.service.js';
|
||||
import { LlmAuthVerificationError, effectivePoolName } from '../services/llm.service.js';
|
||||
import { NotFoundError, ConflictError } from '../services/mcp-server.service.js';
|
||||
|
||||
export function registerLlmRoutes(
|
||||
@@ -86,6 +86,44 @@ export function registerLlmRoutes(
|
||||
throw err;
|
||||
}
|
||||
});
|
||||
|
||||
// v4: list all members of the effective pool that the named Llm belongs to.
|
||||
// The path uses an explicit `/members` suffix so it can't collide with the
|
||||
// single-Llm `/api/v1/llms/:id` route — an Llm whose name happens to be
|
||||
// "members" would otherwise be unaddressable.
|
||||
// Returns [] members[] AND a small header object so callers don't need a
|
||||
// second roundtrip to compute pool stats; agents.ts and the CLI both want
|
||||
// size + activeCount.
|
||||
app.get<{ Params: { name: string } }>('/api/v1/llms/:name/members', async (request, reply) => {
|
||||
try {
|
||||
const anchor = await getByIdOrName(service, request.params.name);
|
||||
const members = await service.listPoolMembers(effectivePoolName(anchor));
|
||||
return {
|
||||
poolName: effectivePoolName(anchor),
|
||||
explicitPoolName: anchor.poolName,
|
||||
size: members.length,
|
||||
activeCount: members.filter((m) => m.status === 'active').length,
|
||||
members,
|
||||
};
|
||||
} catch (err) {
|
||||
if (err instanceof NotFoundError) {
|
||||
reply.code(404);
|
||||
return { error: err.message };
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/** v4: convenience type for the new `/members` endpoint response. */
|
||||
export interface PoolMembersResponse {
|
||||
/** Effective pool key (poolName ?? name on the anchor row). */
|
||||
poolName: string;
|
||||
/** Anchor row's literal poolName field — null when it falls back to its own name. */
|
||||
explicitPoolName: string | null;
|
||||
size: number;
|
||||
activeCount: number;
|
||||
members: LlmView[];
|
||||
}
|
||||
|
||||
const CUID_RE = /^c[a-z0-9]{24}/i;
|
||||
|
||||
@@ -201,6 +201,7 @@ function coerceProviderInput(raw: unknown): {
|
||||
description?: string;
|
||||
extraConfig?: Record<string, unknown>;
|
||||
initialStatus?: 'active' | 'hibernating';
|
||||
poolName?: string;
|
||||
} {
|
||||
if (raw === null || typeof raw !== 'object') {
|
||||
throw Object.assign(new Error('provider entry must be an object'), { statusCode: 400 });
|
||||
@@ -227,5 +228,11 @@ function coerceProviderInput(raw: unknown): {
|
||||
if (o['initialStatus'] === 'active' || o['initialStatus'] === 'hibernating') {
|
||||
out.initialStatus = o['initialStatus'];
|
||||
}
|
||||
// v4: optional pool key. Validation matches CreateLlmSchema.poolName so
|
||||
// a publisher can't slip an uppercase or whitespace name past the
|
||||
// virtual-publish path that the public CRUD path would reject.
|
||||
if (typeof o['poolName'] === 'string' && /^[a-z0-9-]+$/.test(o['poolName']) && o['poolName'].length >= 1 && o['poolName'].length <= 100) {
|
||||
out.poolName = o['poolName'];
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
@@ -115,6 +115,17 @@ export class LlmService {
|
||||
return this.repo.findByPoolName(poolName);
|
||||
}
|
||||
|
||||
/**
|
||||
* v4: API/CLI-facing version of `findByPoolName` that returns full
|
||||
* `LlmView`s (apiKeyRef resolved via SecretService). Used by the
|
||||
* `GET /api/v1/llms/:name/members` route and `mcpctl describe llm` —
|
||||
* both of which want the full row shape for display.
|
||||
*/
|
||||
async listPoolMembers(poolName: string): Promise<LlmView[]> {
|
||||
const rows = await this.repo.findByPoolName(poolName);
|
||||
return Promise.all(rows.map((r) => this.toView(r)));
|
||||
}
|
||||
|
||||
async create(input: unknown, opts: { skipAuthCheck?: boolean } = {}): Promise<LlmView> {
|
||||
const data = CreateLlmSchema.parse(input);
|
||||
const existing = await this.repo.findByName(data.name);
|
||||
|
||||
@@ -47,6 +47,15 @@ export interface RegisterProviderInput {
|
||||
* publish time.
|
||||
*/
|
||||
initialStatus?: 'active' | 'hibernating';
|
||||
/**
|
||||
* v4: optional pool key. When set, this virtual Llm row stacks with
|
||||
* any other Llms (public OR virtual from any session) sharing the
|
||||
* same value. The chat dispatcher then load-balances across all
|
||||
* healthy members. Cluster-wide name uniqueness still applies — the
|
||||
* publisher picks a unique `name` (e.g. `vllm-<host>-qwen3`) and
|
||||
* shares the `poolName` with siblings.
|
||||
*/
|
||||
poolName?: string;
|
||||
}
|
||||
|
||||
export interface RegisterResult {
|
||||
@@ -147,6 +156,7 @@ export class VirtualLlmService implements IVirtualLlmService {
|
||||
tier: p.tier ?? 'fast',
|
||||
description: p.description ?? '',
|
||||
...(p.extraConfig !== undefined ? { extraConfig: p.extraConfig } : {}),
|
||||
...(p.poolName !== undefined ? { poolName: p.poolName } : {}),
|
||||
kind: 'virtual',
|
||||
providerSessionId: sessionId,
|
||||
status: initialStatus,
|
||||
@@ -180,6 +190,7 @@ export class VirtualLlmService implements IVirtualLlmService {
|
||||
...(p.tier !== undefined ? { tier: p.tier } : {}),
|
||||
...(p.description !== undefined ? { description: p.description } : {}),
|
||||
...(p.extraConfig !== undefined ? { extraConfig: p.extraConfig } : {}),
|
||||
...(p.poolName !== undefined ? { poolName: p.poolName } : {}),
|
||||
kind: 'virtual',
|
||||
providerSessionId: sessionId,
|
||||
status: initialStatus,
|
||||
|
||||
@@ -21,6 +21,12 @@ function makeLlm(overrides: Partial<Llm> = {}): Llm {
|
||||
apiKeySecretId: null,
|
||||
apiKeySecretKey: null,
|
||||
extraConfig: {},
|
||||
poolName: null,
|
||||
kind: 'public',
|
||||
providerSessionId: null,
|
||||
lastHeartbeatAt: null,
|
||||
status: 'active',
|
||||
inactiveSince: null,
|
||||
version: 1,
|
||||
createdAt: new Date(),
|
||||
updatedAt: new Date(),
|
||||
@@ -38,6 +44,17 @@ function mockRepo(initial: Llm[] = []): ILlmRepository {
|
||||
return null;
|
||||
}),
|
||||
findByTier: vi.fn(async () => []),
|
||||
findByPoolName: vi.fn(async (poolName: string) => {
|
||||
const out: Llm[] = [];
|
||||
for (const r of rows.values()) {
|
||||
if (r.poolName === poolName) out.push(r);
|
||||
else if (r.poolName === null && r.name === poolName) out.push(r);
|
||||
}
|
||||
return out;
|
||||
}),
|
||||
findBySessionId: vi.fn(async () => []),
|
||||
findStaleVirtuals: vi.fn(async () => []),
|
||||
findExpiredInactives: vi.fn(async () => []),
|
||||
create: vi.fn(async (data) => {
|
||||
const row = makeLlm({ id: 'new-id', name: data.name, type: data.type, model: data.model });
|
||||
rows.set(row.id, row);
|
||||
@@ -191,4 +208,50 @@ describe('Llm Routes', () => {
|
||||
const res = await app.inject({ method: 'DELETE', url: '/api/v1/llms/missing' });
|
||||
expect(res.statusCode).toBe(404);
|
||||
});
|
||||
|
||||
// ── v4: GET /api/v1/llms/:name/members ──
|
||||
|
||||
it('GET /api/v1/llms/:name/members returns all members of an explicit pool', async () => {
|
||||
await createApp(mockRepo([
|
||||
makeLlm({ id: 'l1', name: 'qwen-prod-1', poolName: 'qwen-pool', model: 'qwen3' }),
|
||||
makeLlm({ id: 'l2', name: 'qwen-prod-2', poolName: 'qwen-pool', model: 'qwen3' }),
|
||||
makeLlm({ id: 'l3', name: 'qwen-prod-3', poolName: 'qwen-pool', model: 'qwen3', status: 'inactive' }),
|
||||
makeLlm({ id: 'other', name: 'gpt-4o', poolName: null, model: 'gpt-4o' }),
|
||||
]));
|
||||
// Hit via any pool member's name — the route resolves the anchor's
|
||||
// effective pool key and lists all matching rows.
|
||||
const res = await app.inject({ method: 'GET', url: '/api/v1/llms/qwen-prod-1/members' });
|
||||
expect(res.statusCode).toBe(200);
|
||||
const body = res.json<{
|
||||
poolName: string;
|
||||
explicitPoolName: string | null;
|
||||
size: number;
|
||||
activeCount: number;
|
||||
members: Array<{ name: string }>;
|
||||
}>();
|
||||
expect(body.poolName).toBe('qwen-pool');
|
||||
expect(body.explicitPoolName).toBe('qwen-pool');
|
||||
expect(body.size).toBe(3);
|
||||
expect(body.activeCount).toBe(2);
|
||||
expect(body.members.map((m) => m.name).sort()).toEqual(['qwen-prod-1', 'qwen-prod-2', 'qwen-prod-3']);
|
||||
});
|
||||
|
||||
it('GET /api/v1/llms/:name/members for a solo Llm returns a pool of 1', async () => {
|
||||
await createApp(mockRepo([
|
||||
makeLlm({ id: 'solo', name: 'gpt-4o', poolName: null, model: 'gpt-4o' }),
|
||||
]));
|
||||
const res = await app.inject({ method: 'GET', url: '/api/v1/llms/gpt-4o/members' });
|
||||
expect(res.statusCode).toBe(200);
|
||||
const body = res.json<{ poolName: string; explicitPoolName: string | null; size: number; activeCount: number }>();
|
||||
expect(body.poolName).toBe('gpt-4o');
|
||||
expect(body.explicitPoolName).toBeNull();
|
||||
expect(body.size).toBe(1);
|
||||
expect(body.activeCount).toBe(1);
|
||||
});
|
||||
|
||||
it('GET /api/v1/llms/:name/members returns 404 when the anchor name does not exist', async () => {
|
||||
await createApp(mockRepo());
|
||||
const res = await app.inject({ method: 'GET', url: '/api/v1/llms/nope/members' });
|
||||
expect(res.statusCode).toBe(404);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -93,6 +93,18 @@ export interface LlmProviderFileEntry {
|
||||
* - `command`: spawn a shell command (e.g. `systemctl --user start vllm`)
|
||||
*/
|
||||
wake?: WakeRecipe;
|
||||
/**
|
||||
* v4: opt this provider into a load-balanced pool. When set, the
|
||||
* published Llm row carries `poolName` and stacks with any other Llms
|
||||
* (public OR virtual) sharing the same value. Agents pinned to any
|
||||
* pool member dispatch across all healthy members at chat time.
|
||||
*
|
||||
* Convention for distributed-compute setups: each user's mcplocal
|
||||
* picks a unique `name` (e.g. `vllm-<hostname>-qwen3`) but a shared
|
||||
* `poolName` (e.g. `user-vllm-qwen3-thinking`). Result: agents see one
|
||||
* logical pool that auto-grows as more workers come online.
|
||||
*/
|
||||
poolName?: string;
|
||||
}
|
||||
|
||||
export type WakeRecipe =
|
||||
|
||||
@@ -218,6 +218,7 @@ async function maybeStartVirtualLlmRegistrar(
|
||||
};
|
||||
if (entry.tier !== undefined) item.tier = entry.tier;
|
||||
if (entry.wake !== undefined) item.wake = entry.wake;
|
||||
if (entry.poolName !== undefined) item.poolName = entry.poolName;
|
||||
published.push(item);
|
||||
}
|
||||
// v3: forward locally-declared agents alongside the providers. We
|
||||
|
||||
@@ -54,6 +54,12 @@ export interface RegistrarPublishedProvider {
|
||||
* the registrar runs this recipe and waits for the backend to come up.
|
||||
*/
|
||||
wake?: WakeRecipe;
|
||||
/**
|
||||
* v4: optional pool key. When set, the published Llm row carries
|
||||
* `poolName` and stacks with any other Llms sharing the same value.
|
||||
* Agents pinned to any pool member dispatch across all healthy members.
|
||||
*/
|
||||
poolName?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -185,6 +191,7 @@ export class VirtualLlmRegistrar {
|
||||
model: p.model,
|
||||
...(p.tier !== undefined ? { tier: p.tier } : {}),
|
||||
...(p.description !== undefined ? { description: p.description } : {}),
|
||||
...(p.poolName !== undefined ? { poolName: p.poolName } : {}),
|
||||
initialStatus,
|
||||
};
|
||||
}));
|
||||
|
||||
Reference in New Issue
Block a user