feat(mcpd+cli+mcplocal): /llms/<name>/members + POOL column + --pool-name (v4 Stage 2)

Surfaces the v4 pool model end-to-end: - mcpd: GET /api/v1/llms/:name/members returns the effective pool the named anchor belongs to, plus aggregate stats (size, activeCount, explicit vs implicit pool key). RBAC inherits from `view:llms` — same as the single-Llm route. Members are full LlmView shapes so callers don't need a second roundtrip to render the pool block. - mcpd: VirtualLlmService.register accepts an optional `poolName` on RegisterProviderInput; the route's `coerceProviderInput` validates the same character set as CreateLlmSchema.poolName. Backwards compatible — older mcplocals that don't send the field continue to publish solo Llms. - CLI `get llm` table: new POOL column right after NAME. Solo rows show "-" so the "no pool / pool of 1" case is unambiguous (per user direction "make sure we see it, prominently visible and impossible to mistake"). - CLI `describe llm`: fetches /members and renders a Pool block at the top of the detail view when the row is in an explicit pool OR when its implicit pool has size > 1. Each member line shows kind/status; the anchor row gets "← this row". Block is suppressed for solo rows so describe stays compact in the common case. - CLI `create llm --pool-name <name>` flag and apply schema both accept the new field. Yaml round-trip preserves it: get -o yaml emits `poolName: <name>`, apply -f re-imports it without diff. Verified end-to-end against the live mcpd. - mcplocal: LlmProviderFileEntry gains optional `poolName`; main.ts and registrar.ts thread it through into the register payload. Use case for distributed inference: each user's mcplocal picks a unique `name` (e.g. `vllm-<host>-qwen3`) but a shared `poolName` (e.g. `user-vllm-qwen3-thinking`); agents see one logical pool that auto-grows as workers come online. - Shell completions: regenerated from source via the existing scripts/generate-completions.ts. `--pool-name` now suggests in fish + bash for `mcpctl create llm`. Tests: +3 new mcpd route tests for /members (explicit pool, solo pool of 1, missing-anchor 404). All suites green: mcpd 868/868 (was 865, +3), mcplocal 723/723, cli 437/437. Stage 3 (next): live smoke against 2 publishers sharing a pool name + docs.
2026-04-27 23:18:53 +01:00
parent 7949e1393d
commit e21f96080d
14 changed files with 213 additions and 6 deletions
--- a/src/cli/src/commands/apply.ts
+++ b/src/cli/src/commands/apply.ts
@@ -61,6 +61,10 @@ const LlmSpecSchema = z.object({
    key: z.string().min(1),
  }).nullable().optional(),
  extraConfig: z.record(z.unknown()).default({}),
+  // v4: optional pool key. Same validation as on the mcpd side
+  // (CreateLlmSchema). Null means "solo Llm, effective pool key falls
+  // back to the row's own name".
+  poolName: z.string().min(1).max(100).regex(/^[a-z0-9-]+$/).nullable().optional(),
 });

 const AgentChatParamsAppliedSchema = z.object({
--- a/src/cli/src/commands/create.ts
+++ b/src/cli/src/commands/create.ts
@@ -263,6 +263,7 @@ export function createCreateCommand(deps: CreateCommandDeps): Command {
    .option('--description <text>', 'Description')
    .option('--api-key-ref <ref>', 'API key reference in SECRET/KEY form (e.g. anthropic-key/token)')
    .option('--extra <entry>', 'Extra config key=value (repeat)', collect, [])
+    .option('--pool-name <pool>', 'Stack with other Llms sharing this pool name; agents pinned to any member dispatch across the pool')
    .option('--force', 'Update if already exists')
    .option('--skip-auth-check', 'Skip the upstream auth probe (for offline registration before infra exists)')
    .action(async (name: string, opts) => {
@@ -274,6 +275,7 @@ export function createCreateCommand(deps: CreateCommandDeps): Command {
      };
      if (opts.url) body.url = opts.url;
      if (opts.description !== undefined) body.description = opts.description;
+      if (opts.poolName !== undefined) body.poolName = opts.poolName;
      if (opts.apiKeyRef) {
        const slashIdx = (opts.apiKeyRef as string).indexOf('/');
        if (slashIdx < 1) throw new Error(`Invalid --api-key-ref '${opts.apiKeyRef as string}'. Expected SECRET_NAME/KEY_NAME`);
--- a/src/cli/src/commands/describe.ts
+++ b/src/cli/src/commands/describe.ts
@@ -243,7 +243,15 @@ function formatSecretDetail(secret: Record<string, unknown>, showValues: boolean
  return lines.join('\n');
 }

-function formatLlmDetail(llm: Record<string, unknown>): string {
+interface PoolMembersInfo {
+  poolName: string;
+  explicitPoolName: string | null;
+  size: number;
+  activeCount: number;
+  members: Array<{ id?: string; name: string; status?: string; kind?: string; url?: string }>;
+}
+
+function formatLlmDetail(llm: Record<string, unknown>, pool?: PoolMembersInfo): string {
  const lines: string[] = [];
  lines.push(`=== LLM: ${llm.name} ===`);
  lines.push(`${pad('Name:')}${llm.name}`);
@@ -253,6 +261,29 @@ function formatLlmDetail(llm: Record<string, unknown>): string {
  if (llm.url) lines.push(`${pad('URL:')}${llm.url}`);
  if (llm.description) lines.push(`${pad('Description:')}${llm.description}`);

+  // v4 Pool block: only render when there's actually pool context to show.
+  // For solo Llms (poolName null AND pool size 1), suppress the section so
+  // describe stays compact for the common case. For explicit-pool members
+  // OR rows whose name is implicitly seeding a pool (size > 1), render up
+  // top so it's the first thing the operator sees — pool routing is a
+  // significant behavioral fact.
+  const poolNameVal = llm.poolName as string | null | undefined;
+  const isExplicitPool = poolNameVal !== null && poolNameVal !== undefined && poolNameVal !== '';
+  const isImplicitPool = pool !== undefined && pool.size > 1;
+  if (isExplicitPool || isImplicitPool) {
+    lines.push('');
+    lines.push('Pool:');
+    const effective = pool?.poolName ?? (poolNameVal ?? llm.name as string);
+    lines.push(`  ${pad('Pool name:', 14)}${effective}${isExplicitPool ? '' : '  (implicit, falls back to name)'}`);
+    if (pool !== undefined) {
+      lines.push(`  ${pad('Members:', 14)}${String(pool.size)} (${String(pool.activeCount)} active)`);
+      for (const m of pool.members) {
+        const youSuffix = m.name === llm.name ? '  ← this row' : '';
+        lines.push(`    - ${m.name}  [${m.kind ?? '?'}/${m.status ?? '?'}]${youSuffix}`);
+      }
+    }
+  }
+
  const ref = llm.apiKeyRef as { name: string; key: string } | null | undefined;
  lines.push('');
  lines.push('API Key:');
@@ -982,9 +1013,22 @@ export function createDescribeCommand(deps: DescribeCommandDeps): Command {
          case 'secretbackends':
            deps.log(formatSecretBackendDetail(item));
            break;
-          case 'llms':
-            deps.log(formatLlmDetail(item));
+          case 'llms': {
+            // v4: also fetch pool membership so the describe Pool block
+            // can show siblings + active counts. Best-effort — older
+            // mcpd versions without the /members route 404 here, in
+            // which case we render the row alone.
+            let poolInfo: PoolMembersInfo | undefined;
+            try {
+              poolInfo = await deps.client.get<PoolMembersInfo>(
+                `/api/v1/llms/${encodeURIComponent(item.name as string)}/members`,
+              );
+            } catch {
+              // Old mcpd without /members, or RBAC denial — fall back silently.
+            }
+            deps.log(formatLlmDetail(item, poolInfo));
            break;
+          }
          case 'projects': {
            const [projectPrompts, llms] = await Promise.all([
              deps.client
--- a/src/cli/src/commands/get.ts
+++ b/src/cli/src/commands/get.ts
@@ -136,10 +136,16 @@ interface LlmRow {
  // mcpd responses that predate the kind/status columns).
  kind?: 'public' | 'virtual';
  status?: 'active' | 'inactive' | 'hibernating';
+  // v4: explicit pool key. NULL = solo Llm (effective pool = its own name).
+  poolName?: string | null;
 }

+// v4: POOL column placed right after NAME so an operator can't miss
+// which Llms stack into the same dispatcher pool. Solo rows show "-"
+// to make the "no pool / pool of 1" case unambiguous.
 const llmColumns: Column<LlmRow>[] = [
  { header: 'NAME', key: 'name' },
+  { header: 'POOL', key: (r) => (r.poolName !== null && r.poolName !== undefined && r.poolName !== '') ? r.poolName : '-', width: 18 },
  { header: 'KIND', key: (r) => r.kind ?? 'public', width: 8 },
  { header: 'STATUS', key: (r) => r.status ?? 'active', width: 12 },
  { header: 'TYPE', key: 'type', width: 12 },
--- a/src/mcpd/src/routes/llms.ts
+++ b/src/mcpd/src/routes/llms.ts
@@ -1,6 +1,6 @@
 import type { FastifyInstance } from 'fastify';
-import type { LlmService } from '../services/llm.service.js';
-import { LlmAuthVerificationError } from '../services/llm.service.js';
+import type { LlmService, LlmView } from '../services/llm.service.js';
+import { LlmAuthVerificationError, effectivePoolName } from '../services/llm.service.js';
 import { NotFoundError, ConflictError } from '../services/mcp-server.service.js';

 export function registerLlmRoutes(
@@ -86,6 +86,44 @@ export function registerLlmRoutes(
      throw err;
    }
  });
+
+  // v4: list all members of the effective pool that the named Llm belongs to.
+  // The path uses an explicit `/members` suffix so it can't collide with the
+  // single-Llm `/api/v1/llms/:id` route — an Llm whose name happens to be
+  // "members" would otherwise be unaddressable.
+  // Returns []  members[] AND a small header object so callers don't need a
+  // second roundtrip to compute pool stats; agents.ts and the CLI both want
+  // size + activeCount.
+  app.get<{ Params: { name: string } }>('/api/v1/llms/:name/members', async (request, reply) => {
+    try {
+      const anchor = await getByIdOrName(service, request.params.name);
+      const members = await service.listPoolMembers(effectivePoolName(anchor));
+      return {
+        poolName: effectivePoolName(anchor),
+        explicitPoolName: anchor.poolName,
+        size: members.length,
+        activeCount: members.filter((m) => m.status === 'active').length,
+        members,
+      };
+    } catch (err) {
+      if (err instanceof NotFoundError) {
+        reply.code(404);
+        return { error: err.message };
+      }
+      throw err;
+    }
+  });
+}
+
+/** v4: convenience type for the new `/members` endpoint response. */
+export interface PoolMembersResponse {
+  /** Effective pool key (poolName ?? name on the anchor row). */
+  poolName: string;
+  /** Anchor row's literal poolName field — null when it falls back to its own name. */
+  explicitPoolName: string | null;
+  size: number;
+  activeCount: number;
+  members: LlmView[];
 }

 const CUID_RE = /^c[a-z0-9]{24}/i;
--- a/src/mcpd/src/routes/virtual-llms.ts
+++ b/src/mcpd/src/routes/virtual-llms.ts
@@ -201,6 +201,7 @@ function coerceProviderInput(raw: unknown): {
  description?: string;
  extraConfig?: Record<string, unknown>;
  initialStatus?: 'active' | 'hibernating';
+  poolName?: string;
 } {
  if (raw === null || typeof raw !== 'object') {
    throw Object.assign(new Error('provider entry must be an object'), { statusCode: 400 });
@@ -227,5 +228,11 @@ function coerceProviderInput(raw: unknown): {
  if (o['initialStatus'] === 'active' || o['initialStatus'] === 'hibernating') {
    out.initialStatus = o['initialStatus'];
  }
+  // v4: optional pool key. Validation matches CreateLlmSchema.poolName so
+  // a publisher can't slip an uppercase or whitespace name past the
+  // virtual-publish path that the public CRUD path would reject.
+  if (typeof o['poolName'] === 'string' && /^[a-z0-9-]+$/.test(o['poolName']) && o['poolName'].length >= 1 && o['poolName'].length <= 100) {
+    out.poolName = o['poolName'];
+  }
  return out;
 }
--- a/src/mcpd/src/services/llm.service.ts
+++ b/src/mcpd/src/services/llm.service.ts
@@ -115,6 +115,17 @@ export class LlmService {
    return this.repo.findByPoolName(poolName);
  }

+  /**
+   * v4: API/CLI-facing version of `findByPoolName` that returns full
+   * `LlmView`s (apiKeyRef resolved via SecretService). Used by the
+   * `GET /api/v1/llms/:name/members` route and `mcpctl describe llm` —
+   * both of which want the full row shape for display.
+   */
+  async listPoolMembers(poolName: string): Promise<LlmView[]> {
+    const rows = await this.repo.findByPoolName(poolName);
+    return Promise.all(rows.map((r) => this.toView(r)));
+  }
+
  async create(input: unknown, opts: { skipAuthCheck?: boolean } = {}): Promise<LlmView> {
    const data = CreateLlmSchema.parse(input);
    const existing = await this.repo.findByName(data.name);
--- a/src/mcpd/src/services/virtual-llm.service.ts
+++ b/src/mcpd/src/services/virtual-llm.service.ts
@@ -47,6 +47,15 @@ export interface RegisterProviderInput {
   * publish time.
   */
  initialStatus?: 'active' | 'hibernating';
+  /**
+   * v4: optional pool key. When set, this virtual Llm row stacks with
+   * any other Llms (public OR virtual from any session) sharing the
+   * same value. The chat dispatcher then load-balances across all
+   * healthy members. Cluster-wide name uniqueness still applies — the
+   * publisher picks a unique `name` (e.g. `vllm-<host>-qwen3`) and
+   * shares the `poolName` with siblings.
+   */
+  poolName?: string;
 }

 export interface RegisterResult {
@@ -147,6 +156,7 @@ export class VirtualLlmService implements IVirtualLlmService {
          tier: p.tier ?? 'fast',
          description: p.description ?? '',
          ...(p.extraConfig !== undefined ? { extraConfig: p.extraConfig } : {}),
+          ...(p.poolName !== undefined ? { poolName: p.poolName } : {}),
          kind: 'virtual',
          providerSessionId: sessionId,
          status: initialStatus,
@@ -180,6 +190,7 @@ export class VirtualLlmService implements IVirtualLlmService {
        ...(p.tier !== undefined ? { tier: p.tier } : {}),
        ...(p.description !== undefined ? { description: p.description } : {}),
        ...(p.extraConfig !== undefined ? { extraConfig: p.extraConfig } : {}),
+        ...(p.poolName !== undefined ? { poolName: p.poolName } : {}),
        kind: 'virtual',
        providerSessionId: sessionId,
        status: initialStatus,
--- a/src/mcpd/tests/llm-routes.test.ts
+++ b/src/mcpd/tests/llm-routes.test.ts
@@ -21,6 +21,12 @@ function makeLlm(overrides: Partial<Llm> = {}): Llm {
    apiKeySecretId: null,
    apiKeySecretKey: null,
    extraConfig: {},
+    poolName: null,
+    kind: 'public',
+    providerSessionId: null,
+    lastHeartbeatAt: null,
+    status: 'active',
+    inactiveSince: null,
    version: 1,
    createdAt: new Date(),
    updatedAt: new Date(),
@@ -38,6 +44,17 @@ function mockRepo(initial: Llm[] = []): ILlmRepository {
      return null;
    }),
    findByTier: vi.fn(async () => []),
+    findByPoolName: vi.fn(async (poolName: string) => {
+      const out: Llm[] = [];
+      for (const r of rows.values()) {
+        if (r.poolName === poolName) out.push(r);
+        else if (r.poolName === null && r.name === poolName) out.push(r);
+      }
+      return out;
+    }),
+    findBySessionId: vi.fn(async () => []),
+    findStaleVirtuals: vi.fn(async () => []),
+    findExpiredInactives: vi.fn(async () => []),
    create: vi.fn(async (data) => {
      const row = makeLlm({ id: 'new-id', name: data.name, type: data.type, model: data.model });
      rows.set(row.id, row);
@@ -191,4 +208,50 @@ describe('Llm Routes', () => {
    const res = await app.inject({ method: 'DELETE', url: '/api/v1/llms/missing' });
    expect(res.statusCode).toBe(404);
  });
+
+  // ── v4: GET /api/v1/llms/:name/members ──
+
+  it('GET /api/v1/llms/:name/members returns all members of an explicit pool', async () => {
+    await createApp(mockRepo([
+      makeLlm({ id: 'l1', name: 'qwen-prod-1', poolName: 'qwen-pool', model: 'qwen3' }),
+      makeLlm({ id: 'l2', name: 'qwen-prod-2', poolName: 'qwen-pool', model: 'qwen3' }),
+      makeLlm({ id: 'l3', name: 'qwen-prod-3', poolName: 'qwen-pool', model: 'qwen3', status: 'inactive' }),
+      makeLlm({ id: 'other', name: 'gpt-4o', poolName: null, model: 'gpt-4o' }),
+    ]));
+    // Hit via any pool member's name — the route resolves the anchor's
+    // effective pool key and lists all matching rows.
+    const res = await app.inject({ method: 'GET', url: '/api/v1/llms/qwen-prod-1/members' });
+    expect(res.statusCode).toBe(200);
+    const body = res.json<{
+      poolName: string;
+      explicitPoolName: string | null;
+      size: number;
+      activeCount: number;
+      members: Array<{ name: string }>;
+    }>();
+    expect(body.poolName).toBe('qwen-pool');
+    expect(body.explicitPoolName).toBe('qwen-pool');
+    expect(body.size).toBe(3);
+    expect(body.activeCount).toBe(2);
+    expect(body.members.map((m) => m.name).sort()).toEqual(['qwen-prod-1', 'qwen-prod-2', 'qwen-prod-3']);
+  });
+
+  it('GET /api/v1/llms/:name/members for a solo Llm returns a pool of 1', async () => {
+    await createApp(mockRepo([
+      makeLlm({ id: 'solo', name: 'gpt-4o', poolName: null, model: 'gpt-4o' }),
+    ]));
+    const res = await app.inject({ method: 'GET', url: '/api/v1/llms/gpt-4o/members' });
+    expect(res.statusCode).toBe(200);
+    const body = res.json<{ poolName: string; explicitPoolName: string | null; size: number; activeCount: number }>();
+    expect(body.poolName).toBe('gpt-4o');
+    expect(body.explicitPoolName).toBeNull();
+    expect(body.size).toBe(1);
+    expect(body.activeCount).toBe(1);
+  });
+
+  it('GET /api/v1/llms/:name/members returns 404 when the anchor name does not exist', async () => {
+    await createApp(mockRepo());
+    const res = await app.inject({ method: 'GET', url: '/api/v1/llms/nope/members' });
+    expect(res.statusCode).toBe(404);
+  });
 });
--- a/src/mcplocal/src/http/config.ts
+++ b/src/mcplocal/src/http/config.ts
@@ -93,6 +93,18 @@ export interface LlmProviderFileEntry {
   *   - `command`: spawn a shell command (e.g. `systemctl --user start vllm`)
   */
  wake?: WakeRecipe;
+  /**
+   * v4: opt this provider into a load-balanced pool. When set, the
+   * published Llm row carries `poolName` and stacks with any other Llms
+   * (public OR virtual) sharing the same value. Agents pinned to any
+   * pool member dispatch across all healthy members at chat time.
+   *
+   * Convention for distributed-compute setups: each user's mcplocal
+   * picks a unique `name` (e.g. `vllm-<hostname>-qwen3`) but a shared
+   * `poolName` (e.g. `user-vllm-qwen3-thinking`). Result: agents see one
+   * logical pool that auto-grows as more workers come online.
+   */
+  poolName?: string;
 }

 export type WakeRecipe =
--- a/src/mcplocal/src/main.ts
+++ b/src/mcplocal/src/main.ts
@@ -218,6 +218,7 @@ async function maybeStartVirtualLlmRegistrar(
    };
    if (entry.tier !== undefined) item.tier = entry.tier;
    if (entry.wake !== undefined) item.wake = entry.wake;
+    if (entry.poolName !== undefined) item.poolName = entry.poolName;
    published.push(item);
  }
  // v3: forward locally-declared agents alongside the providers. We
--- a/src/mcplocal/src/providers/registrar.ts
+++ b/src/mcplocal/src/providers/registrar.ts
@@ -54,6 +54,12 @@ export interface RegistrarPublishedProvider {
   * the registrar runs this recipe and waits for the backend to come up.
   */
  wake?: WakeRecipe;
+  /**
+   * v4: optional pool key. When set, the published Llm row carries
+   * `poolName` and stacks with any other Llms sharing the same value.
+   * Agents pinned to any pool member dispatch across all healthy members.
+   */
+  poolName?: string;
 }

 /**
@@ -185,6 +191,7 @@ export class VirtualLlmRegistrar {
        model: p.model,
        ...(p.tier !== undefined ? { tier: p.tier } : {}),
        ...(p.description !== undefined ? { description: p.description } : {}),
+        ...(p.poolName !== undefined ? { poolName: p.poolName } : {}),
        initialStatus,
      };
    }));