diff --git a/completions/mcpctl.bash b/completions/mcpctl.bash index 2b86325..21a1b1e 100644 --- a/completions/mcpctl.bash +++ b/completions/mcpctl.bash @@ -185,7 +185,7 @@ _mcpctl() { COMPREPLY=($(compgen -W "--data --force -h --help" -- "$cur")) ;; llm) - COMPREPLY=($(compgen -W "--type --model --url --tier --description --api-key-ref --extra --force --skip-auth-check -h --help" -- "$cur")) + COMPREPLY=($(compgen -W "--type --model --url --tier --description --api-key-ref --extra --pool-name --force --skip-auth-check -h --help" -- "$cur")) ;; agent) COMPREPLY=($(compgen -W "--llm --project --description --system-prompt --system-prompt-file --proxy-model --default-temperature --default-top-p --default-top-k --default-max-tokens --default-seed --default-stop --default-extra --default-params-file --force -h --help" -- "$cur")) diff --git a/completions/mcpctl.fish b/completions/mcpctl.fish index 810b375..afe320d 100644 --- a/completions/mcpctl.fish +++ b/completions/mcpctl.fish @@ -333,6 +333,7 @@ complete -c mcpctl -n "__mcpctl_subcmd_active create llm" -l tier -d 'Tier: fast complete -c mcpctl -n "__mcpctl_subcmd_active create llm" -l description -d 'Description' -x complete -c mcpctl -n "__mcpctl_subcmd_active create llm" -l api-key-ref -d 'API key reference in SECRET/KEY form (e.g. anthropic-key/token)' -x complete -c mcpctl -n "__mcpctl_subcmd_active create llm" -l extra -d 'Extra config key=value (repeat)' -x +complete -c mcpctl -n "__mcpctl_subcmd_active create llm" -l pool-name -d 'Stack with other Llms sharing this pool name; agents pinned to any member dispatch across the pool' -x complete -c mcpctl -n "__mcpctl_subcmd_active create llm" -l force -d 'Update if already exists' complete -c mcpctl -n "__mcpctl_subcmd_active create llm" -l skip-auth-check -d 'Skip the upstream auth probe (for offline registration before infra exists)' diff --git a/src/cli/src/commands/apply.ts b/src/cli/src/commands/apply.ts index 4eac13a..ecb7b63 100644 --- a/src/cli/src/commands/apply.ts +++ b/src/cli/src/commands/apply.ts @@ -61,6 +61,10 @@ const LlmSpecSchema = z.object({ key: z.string().min(1), }).nullable().optional(), extraConfig: z.record(z.unknown()).default({}), + // v4: optional pool key. Same validation as on the mcpd side + // (CreateLlmSchema). Null means "solo Llm, effective pool key falls + // back to the row's own name". + poolName: z.string().min(1).max(100).regex(/^[a-z0-9-]+$/).nullable().optional(), }); const AgentChatParamsAppliedSchema = z.object({ diff --git a/src/cli/src/commands/create.ts b/src/cli/src/commands/create.ts index a94f677..d0f4d69 100644 --- a/src/cli/src/commands/create.ts +++ b/src/cli/src/commands/create.ts @@ -263,6 +263,7 @@ export function createCreateCommand(deps: CreateCommandDeps): Command { .option('--description ', 'Description') .option('--api-key-ref ', 'API key reference in SECRET/KEY form (e.g. anthropic-key/token)') .option('--extra ', 'Extra config key=value (repeat)', collect, []) + .option('--pool-name ', 'Stack with other Llms sharing this pool name; agents pinned to any member dispatch across the pool') .option('--force', 'Update if already exists') .option('--skip-auth-check', 'Skip the upstream auth probe (for offline registration before infra exists)') .action(async (name: string, opts) => { @@ -274,6 +275,7 @@ export function createCreateCommand(deps: CreateCommandDeps): Command { }; if (opts.url) body.url = opts.url; if (opts.description !== undefined) body.description = opts.description; + if (opts.poolName !== undefined) body.poolName = opts.poolName; if (opts.apiKeyRef) { const slashIdx = (opts.apiKeyRef as string).indexOf('/'); if (slashIdx < 1) throw new Error(`Invalid --api-key-ref '${opts.apiKeyRef as string}'. Expected SECRET_NAME/KEY_NAME`); diff --git a/src/cli/src/commands/describe.ts b/src/cli/src/commands/describe.ts index 998d12d..bda4ffb 100644 --- a/src/cli/src/commands/describe.ts +++ b/src/cli/src/commands/describe.ts @@ -243,7 +243,15 @@ function formatSecretDetail(secret: Record, showValues: boolean return lines.join('\n'); } -function formatLlmDetail(llm: Record): string { +interface PoolMembersInfo { + poolName: string; + explicitPoolName: string | null; + size: number; + activeCount: number; + members: Array<{ id?: string; name: string; status?: string; kind?: string; url?: string }>; +} + +function formatLlmDetail(llm: Record, pool?: PoolMembersInfo): string { const lines: string[] = []; lines.push(`=== LLM: ${llm.name} ===`); lines.push(`${pad('Name:')}${llm.name}`); @@ -253,6 +261,29 @@ function formatLlmDetail(llm: Record): string { if (llm.url) lines.push(`${pad('URL:')}${llm.url}`); if (llm.description) lines.push(`${pad('Description:')}${llm.description}`); + // v4 Pool block: only render when there's actually pool context to show. + // For solo Llms (poolName null AND pool size 1), suppress the section so + // describe stays compact for the common case. For explicit-pool members + // OR rows whose name is implicitly seeding a pool (size > 1), render up + // top so it's the first thing the operator sees — pool routing is a + // significant behavioral fact. + const poolNameVal = llm.poolName as string | null | undefined; + const isExplicitPool = poolNameVal !== null && poolNameVal !== undefined && poolNameVal !== ''; + const isImplicitPool = pool !== undefined && pool.size > 1; + if (isExplicitPool || isImplicitPool) { + lines.push(''); + lines.push('Pool:'); + const effective = pool?.poolName ?? (poolNameVal ?? llm.name as string); + lines.push(` ${pad('Pool name:', 14)}${effective}${isExplicitPool ? '' : ' (implicit, falls back to name)'}`); + if (pool !== undefined) { + lines.push(` ${pad('Members:', 14)}${String(pool.size)} (${String(pool.activeCount)} active)`); + for (const m of pool.members) { + const youSuffix = m.name === llm.name ? ' ← this row' : ''; + lines.push(` - ${m.name} [${m.kind ?? '?'}/${m.status ?? '?'}]${youSuffix}`); + } + } + } + const ref = llm.apiKeyRef as { name: string; key: string } | null | undefined; lines.push(''); lines.push('API Key:'); @@ -982,9 +1013,22 @@ export function createDescribeCommand(deps: DescribeCommandDeps): Command { case 'secretbackends': deps.log(formatSecretBackendDetail(item)); break; - case 'llms': - deps.log(formatLlmDetail(item)); + case 'llms': { + // v4: also fetch pool membership so the describe Pool block + // can show siblings + active counts. Best-effort — older + // mcpd versions without the /members route 404 here, in + // which case we render the row alone. + let poolInfo: PoolMembersInfo | undefined; + try { + poolInfo = await deps.client.get( + `/api/v1/llms/${encodeURIComponent(item.name as string)}/members`, + ); + } catch { + // Old mcpd without /members, or RBAC denial — fall back silently. + } + deps.log(formatLlmDetail(item, poolInfo)); break; + } case 'projects': { const [projectPrompts, llms] = await Promise.all([ deps.client diff --git a/src/cli/src/commands/get.ts b/src/cli/src/commands/get.ts index f3e07ac..f931e30 100644 --- a/src/cli/src/commands/get.ts +++ b/src/cli/src/commands/get.ts @@ -136,10 +136,16 @@ interface LlmRow { // mcpd responses that predate the kind/status columns). kind?: 'public' | 'virtual'; status?: 'active' | 'inactive' | 'hibernating'; + // v4: explicit pool key. NULL = solo Llm (effective pool = its own name). + poolName?: string | null; } +// v4: POOL column placed right after NAME so an operator can't miss +// which Llms stack into the same dispatcher pool. Solo rows show "-" +// to make the "no pool / pool of 1" case unambiguous. const llmColumns: Column[] = [ { header: 'NAME', key: 'name' }, + { header: 'POOL', key: (r) => (r.poolName !== null && r.poolName !== undefined && r.poolName !== '') ? r.poolName : '-', width: 18 }, { header: 'KIND', key: (r) => r.kind ?? 'public', width: 8 }, { header: 'STATUS', key: (r) => r.status ?? 'active', width: 12 }, { header: 'TYPE', key: 'type', width: 12 }, diff --git a/src/mcpd/src/routes/llms.ts b/src/mcpd/src/routes/llms.ts index 7d34571..b153a9a 100644 --- a/src/mcpd/src/routes/llms.ts +++ b/src/mcpd/src/routes/llms.ts @@ -1,6 +1,6 @@ import type { FastifyInstance } from 'fastify'; -import type { LlmService } from '../services/llm.service.js'; -import { LlmAuthVerificationError } from '../services/llm.service.js'; +import type { LlmService, LlmView } from '../services/llm.service.js'; +import { LlmAuthVerificationError, effectivePoolName } from '../services/llm.service.js'; import { NotFoundError, ConflictError } from '../services/mcp-server.service.js'; export function registerLlmRoutes( @@ -86,6 +86,44 @@ export function registerLlmRoutes( throw err; } }); + + // v4: list all members of the effective pool that the named Llm belongs to. + // The path uses an explicit `/members` suffix so it can't collide with the + // single-Llm `/api/v1/llms/:id` route — an Llm whose name happens to be + // "members" would otherwise be unaddressable. + // Returns [] members[] AND a small header object so callers don't need a + // second roundtrip to compute pool stats; agents.ts and the CLI both want + // size + activeCount. + app.get<{ Params: { name: string } }>('/api/v1/llms/:name/members', async (request, reply) => { + try { + const anchor = await getByIdOrName(service, request.params.name); + const members = await service.listPoolMembers(effectivePoolName(anchor)); + return { + poolName: effectivePoolName(anchor), + explicitPoolName: anchor.poolName, + size: members.length, + activeCount: members.filter((m) => m.status === 'active').length, + members, + }; + } catch (err) { + if (err instanceof NotFoundError) { + reply.code(404); + return { error: err.message }; + } + throw err; + } + }); +} + +/** v4: convenience type for the new `/members` endpoint response. */ +export interface PoolMembersResponse { + /** Effective pool key (poolName ?? name on the anchor row). */ + poolName: string; + /** Anchor row's literal poolName field — null when it falls back to its own name. */ + explicitPoolName: string | null; + size: number; + activeCount: number; + members: LlmView[]; } const CUID_RE = /^c[a-z0-9]{24}/i; diff --git a/src/mcpd/src/routes/virtual-llms.ts b/src/mcpd/src/routes/virtual-llms.ts index d92bc8f..fbde2ba 100644 --- a/src/mcpd/src/routes/virtual-llms.ts +++ b/src/mcpd/src/routes/virtual-llms.ts @@ -201,6 +201,7 @@ function coerceProviderInput(raw: unknown): { description?: string; extraConfig?: Record; initialStatus?: 'active' | 'hibernating'; + poolName?: string; } { if (raw === null || typeof raw !== 'object') { throw Object.assign(new Error('provider entry must be an object'), { statusCode: 400 }); @@ -227,5 +228,11 @@ function coerceProviderInput(raw: unknown): { if (o['initialStatus'] === 'active' || o['initialStatus'] === 'hibernating') { out.initialStatus = o['initialStatus']; } + // v4: optional pool key. Validation matches CreateLlmSchema.poolName so + // a publisher can't slip an uppercase or whitespace name past the + // virtual-publish path that the public CRUD path would reject. + if (typeof o['poolName'] === 'string' && /^[a-z0-9-]+$/.test(o['poolName']) && o['poolName'].length >= 1 && o['poolName'].length <= 100) { + out.poolName = o['poolName']; + } return out; } diff --git a/src/mcpd/src/services/llm.service.ts b/src/mcpd/src/services/llm.service.ts index c3c7ff9..af37176 100644 --- a/src/mcpd/src/services/llm.service.ts +++ b/src/mcpd/src/services/llm.service.ts @@ -115,6 +115,17 @@ export class LlmService { return this.repo.findByPoolName(poolName); } + /** + * v4: API/CLI-facing version of `findByPoolName` that returns full + * `LlmView`s (apiKeyRef resolved via SecretService). Used by the + * `GET /api/v1/llms/:name/members` route and `mcpctl describe llm` — + * both of which want the full row shape for display. + */ + async listPoolMembers(poolName: string): Promise { + const rows = await this.repo.findByPoolName(poolName); + return Promise.all(rows.map((r) => this.toView(r))); + } + async create(input: unknown, opts: { skipAuthCheck?: boolean } = {}): Promise { const data = CreateLlmSchema.parse(input); const existing = await this.repo.findByName(data.name); diff --git a/src/mcpd/src/services/virtual-llm.service.ts b/src/mcpd/src/services/virtual-llm.service.ts index fe70e9f..e0befd6 100644 --- a/src/mcpd/src/services/virtual-llm.service.ts +++ b/src/mcpd/src/services/virtual-llm.service.ts @@ -47,6 +47,15 @@ export interface RegisterProviderInput { * publish time. */ initialStatus?: 'active' | 'hibernating'; + /** + * v4: optional pool key. When set, this virtual Llm row stacks with + * any other Llms (public OR virtual from any session) sharing the + * same value. The chat dispatcher then load-balances across all + * healthy members. Cluster-wide name uniqueness still applies — the + * publisher picks a unique `name` (e.g. `vllm--qwen3`) and + * shares the `poolName` with siblings. + */ + poolName?: string; } export interface RegisterResult { @@ -147,6 +156,7 @@ export class VirtualLlmService implements IVirtualLlmService { tier: p.tier ?? 'fast', description: p.description ?? '', ...(p.extraConfig !== undefined ? { extraConfig: p.extraConfig } : {}), + ...(p.poolName !== undefined ? { poolName: p.poolName } : {}), kind: 'virtual', providerSessionId: sessionId, status: initialStatus, @@ -180,6 +190,7 @@ export class VirtualLlmService implements IVirtualLlmService { ...(p.tier !== undefined ? { tier: p.tier } : {}), ...(p.description !== undefined ? { description: p.description } : {}), ...(p.extraConfig !== undefined ? { extraConfig: p.extraConfig } : {}), + ...(p.poolName !== undefined ? { poolName: p.poolName } : {}), kind: 'virtual', providerSessionId: sessionId, status: initialStatus, diff --git a/src/mcpd/tests/llm-routes.test.ts b/src/mcpd/tests/llm-routes.test.ts index 0a7ef6c..9ca116e 100644 --- a/src/mcpd/tests/llm-routes.test.ts +++ b/src/mcpd/tests/llm-routes.test.ts @@ -21,6 +21,12 @@ function makeLlm(overrides: Partial = {}): Llm { apiKeySecretId: null, apiKeySecretKey: null, extraConfig: {}, + poolName: null, + kind: 'public', + providerSessionId: null, + lastHeartbeatAt: null, + status: 'active', + inactiveSince: null, version: 1, createdAt: new Date(), updatedAt: new Date(), @@ -38,6 +44,17 @@ function mockRepo(initial: Llm[] = []): ILlmRepository { return null; }), findByTier: vi.fn(async () => []), + findByPoolName: vi.fn(async (poolName: string) => { + const out: Llm[] = []; + for (const r of rows.values()) { + if (r.poolName === poolName) out.push(r); + else if (r.poolName === null && r.name === poolName) out.push(r); + } + return out; + }), + findBySessionId: vi.fn(async () => []), + findStaleVirtuals: vi.fn(async () => []), + findExpiredInactives: vi.fn(async () => []), create: vi.fn(async (data) => { const row = makeLlm({ id: 'new-id', name: data.name, type: data.type, model: data.model }); rows.set(row.id, row); @@ -191,4 +208,50 @@ describe('Llm Routes', () => { const res = await app.inject({ method: 'DELETE', url: '/api/v1/llms/missing' }); expect(res.statusCode).toBe(404); }); + + // ── v4: GET /api/v1/llms/:name/members ── + + it('GET /api/v1/llms/:name/members returns all members of an explicit pool', async () => { + await createApp(mockRepo([ + makeLlm({ id: 'l1', name: 'qwen-prod-1', poolName: 'qwen-pool', model: 'qwen3' }), + makeLlm({ id: 'l2', name: 'qwen-prod-2', poolName: 'qwen-pool', model: 'qwen3' }), + makeLlm({ id: 'l3', name: 'qwen-prod-3', poolName: 'qwen-pool', model: 'qwen3', status: 'inactive' }), + makeLlm({ id: 'other', name: 'gpt-4o', poolName: null, model: 'gpt-4o' }), + ])); + // Hit via any pool member's name — the route resolves the anchor's + // effective pool key and lists all matching rows. + const res = await app.inject({ method: 'GET', url: '/api/v1/llms/qwen-prod-1/members' }); + expect(res.statusCode).toBe(200); + const body = res.json<{ + poolName: string; + explicitPoolName: string | null; + size: number; + activeCount: number; + members: Array<{ name: string }>; + }>(); + expect(body.poolName).toBe('qwen-pool'); + expect(body.explicitPoolName).toBe('qwen-pool'); + expect(body.size).toBe(3); + expect(body.activeCount).toBe(2); + expect(body.members.map((m) => m.name).sort()).toEqual(['qwen-prod-1', 'qwen-prod-2', 'qwen-prod-3']); + }); + + it('GET /api/v1/llms/:name/members for a solo Llm returns a pool of 1', async () => { + await createApp(mockRepo([ + makeLlm({ id: 'solo', name: 'gpt-4o', poolName: null, model: 'gpt-4o' }), + ])); + const res = await app.inject({ method: 'GET', url: '/api/v1/llms/gpt-4o/members' }); + expect(res.statusCode).toBe(200); + const body = res.json<{ poolName: string; explicitPoolName: string | null; size: number; activeCount: number }>(); + expect(body.poolName).toBe('gpt-4o'); + expect(body.explicitPoolName).toBeNull(); + expect(body.size).toBe(1); + expect(body.activeCount).toBe(1); + }); + + it('GET /api/v1/llms/:name/members returns 404 when the anchor name does not exist', async () => { + await createApp(mockRepo()); + const res = await app.inject({ method: 'GET', url: '/api/v1/llms/nope/members' }); + expect(res.statusCode).toBe(404); + }); }); diff --git a/src/mcplocal/src/http/config.ts b/src/mcplocal/src/http/config.ts index 9e9cd5e..d06b9b6 100644 --- a/src/mcplocal/src/http/config.ts +++ b/src/mcplocal/src/http/config.ts @@ -93,6 +93,18 @@ export interface LlmProviderFileEntry { * - `command`: spawn a shell command (e.g. `systemctl --user start vllm`) */ wake?: WakeRecipe; + /** + * v4: opt this provider into a load-balanced pool. When set, the + * published Llm row carries `poolName` and stacks with any other Llms + * (public OR virtual) sharing the same value. Agents pinned to any + * pool member dispatch across all healthy members at chat time. + * + * Convention for distributed-compute setups: each user's mcplocal + * picks a unique `name` (e.g. `vllm--qwen3`) but a shared + * `poolName` (e.g. `user-vllm-qwen3-thinking`). Result: agents see one + * logical pool that auto-grows as more workers come online. + */ + poolName?: string; } export type WakeRecipe = diff --git a/src/mcplocal/src/main.ts b/src/mcplocal/src/main.ts index 9300a20..c4d901a 100644 --- a/src/mcplocal/src/main.ts +++ b/src/mcplocal/src/main.ts @@ -218,6 +218,7 @@ async function maybeStartVirtualLlmRegistrar( }; if (entry.tier !== undefined) item.tier = entry.tier; if (entry.wake !== undefined) item.wake = entry.wake; + if (entry.poolName !== undefined) item.poolName = entry.poolName; published.push(item); } // v3: forward locally-declared agents alongside the providers. We diff --git a/src/mcplocal/src/providers/registrar.ts b/src/mcplocal/src/providers/registrar.ts index cd7e46d..aedc23e 100644 --- a/src/mcplocal/src/providers/registrar.ts +++ b/src/mcplocal/src/providers/registrar.ts @@ -54,6 +54,12 @@ export interface RegistrarPublishedProvider { * the registrar runs this recipe and waits for the backend to come up. */ wake?: WakeRecipe; + /** + * v4: optional pool key. When set, the published Llm row carries + * `poolName` and stacks with any other Llms sharing the same value. + * Agents pinned to any pool member dispatch across all healthy members. + */ + poolName?: string; } /** @@ -185,6 +191,7 @@ export class VirtualLlmRegistrar { model: p.model, ...(p.tier !== undefined ? { tier: p.tier } : {}), ...(p.description !== undefined ? { description: p.description } : {}), + ...(p.poolName !== undefined ? { poolName: p.poolName } : {}), initialStatus, }; }));