feat(cli): live "say hi" probe for server LLMs in mcpctl status

Status was showing the server-side LLM list but not whether each one actually serves inference. This adds a per-LLM probe that POSTs a tiny prompt to /api/v1/llms/<name>/infer: messages: [{ role: 'user', content: "Say exactly the word 'hi' and nothing else." }] max_tokens: 8, temperature: 0 Each registered LLM gets a one-line health line: Server LLMs: 2 registered (probing live "say hi"...) fast qwen3-thinking ✓ "hi" 312ms openai → qwen3-thinking http://litellm.../v1 key:litellm/API_KEY heavy sonnet ✗ upstream auth failed: 401 anthropic → claude-sonnet-4-5 provider default no key Probes run in parallel so a single slow LLM doesn't gate the others; each has its own 15-second timeout. JSON/YAML output gains a \`health: { ok, ms, say?, error? }\` field per server LLM so dashboards get the same liveness signal. Tests: 25/25 (was 24, +1 new for the failure-path render). Workspace suite: 2006/2006 across 149 files. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-27 12:02:00 +01:00
parent de96af7bf6
commit e4af16477c
2 changed files with 197 additions and 23 deletions
--- a/src/cli/tests/commands/status.test.ts
+++ b/src/cli/tests/commands/status.test.ts
@@ -28,6 +28,7 @@ function baseDeps(overrides?: Partial<StatusCommandDeps>): Partial<StatusCommand
    checkHealth: async () => true,
    fetchProviders: async () => null,
    fetchServerLlms: async () => null,
+    probeServerLlm: async () => ({ ok: true, ms: 12, say: 'hi' }),
    isTTY: false,
    ...overrides,
  };
@@ -210,14 +211,33 @@ describe('status command', () => {
        { id: 'l1', name: 'qwen3-thinking', type: 'openai', model: 'qwen3-thinking', tier: 'fast', url: 'http://x:4000/v1', apiKeyRef: { name: 'litellm', key: 'API_KEY' } },
        { id: 'l2', name: 'sonnet', type: 'anthropic', model: 'claude-sonnet-4-5', tier: 'heavy', url: '', apiKeyRef: null },
      ],
+      probeServerLlm: async () => ({ ok: true, ms: 42, say: 'hi' }),
    }));
    await cmd.parseAsync([], { from: 'user' });
    const out = output.join('\n');
    expect(out).toContain('Server LLMs: 2 registered');
-    expect(out).toContain('qwen3-thinking (openai → qwen3-thinking)');
-    expect(out).toContain('sonnet (anthropic → claude-sonnet-4-5)');
+    expect(out).toContain('qwen3-thinking');
+    expect(out).toContain('openai → qwen3-thinking');
+    expect(out).toContain('sonnet');
+    expect(out).toContain('anthropic → claude-sonnet-4-5');
    expect(out).toMatch(/fast\s+qwen3-thinking/);
    expect(out).toMatch(/heavy\s+sonnet/);
+    // Health probe result rendered for each LLM
+    expect(out).toContain('✓ "hi" 42ms');
+  });
+
+  it('renders a failed "say hi" probe with the error message', async () => {
+    const cmd = createStatusCommand(baseDeps({
+      fetchServerLlms: async () => [
+        { id: 'l1', name: 'broken', type: 'openai', model: 'gpt-4o', tier: 'fast', url: 'http://x', apiKeyRef: null },
+      ],
+      probeServerLlm: async () => ({ ok: false, ms: 5000, error: 'upstream auth failed: 401' }),
+    }));
+    await cmd.parseAsync([], { from: 'user' });
+    const out = output.join('\n');
+    expect(out).toContain('Server LLMs: 1 registered');
+    expect(out).toContain('broken');
+    expect(out).toContain('✗ upstream auth failed: 401');
  });

  it('renders "none registered" when mcpd has no Llm rows', async () => {
@@ -254,13 +274,22 @@ describe('status command', () => {
    expect(capturedToken).toBeNull();
  });

-  it('includes serverLlms in JSON output', async () => {
+  it('includes serverLlms with probed health in JSON output', async () => {
    const llms = [
      { id: 'l1', name: 'qwen3-thinking', type: 'openai', model: 'qwen3-thinking', tier: 'fast', url: 'http://x', apiKeyRef: null },
    ];
-    const cmd = createStatusCommand(baseDeps({ fetchServerLlms: async () => llms }));
+    const cmd = createStatusCommand(baseDeps({
+      fetchServerLlms: async () => llms,
+      probeServerLlm: async () => ({ ok: true, ms: 99, say: 'hi' }),
+    }));
    await cmd.parseAsync(['-o', 'json'], { from: 'user' });
-    const parsed = JSON.parse(output[0]) as { serverLlms?: typeof llms };
-    expect(parsed.serverLlms).toEqual(llms);
+    const parsed = JSON.parse(output[0]) as {
+      serverLlms?: Array<typeof llms[number] & { health: { ok: boolean; ms: number; say?: string } }>;
+    };
+    expect(parsed.serverLlms).toHaveLength(1);
+    expect(parsed.serverLlms![0]).toMatchObject({
+      name: 'qwen3-thinking',
+      health: { ok: true, ms: 99, say: 'hi' },
+    });
  });
 });