feat(cli): live "say hi" probe for server LLMs in mcpctl status
Some checks failed
CI/CD / lint (pull_request) Successful in 55s
CI/CD / test (pull_request) Successful in 1m13s
CI/CD / typecheck (pull_request) Successful in 3m10s
CI/CD / smoke (pull_request) Failing after 1m46s
CI/CD / build (pull_request) Successful in 3m24s
CI/CD / publish (pull_request) Has been skipped

Status was showing the server-side LLM list but not whether each one
actually serves inference. This adds a per-LLM probe that POSTs a
tiny prompt to /api/v1/llms/<name>/infer:

  messages: [{ role: 'user', content: "Say exactly the word 'hi' and nothing else." }]
  max_tokens: 8, temperature: 0

Each registered LLM gets a one-line health line:

  Server LLMs: 2 registered (probing live "say hi"...)
    fast   qwen3-thinking  ✓ "hi" 312ms
              openai → qwen3-thinking  http://litellm.../v1  key:litellm/API_KEY
    heavy  sonnet  ✗ upstream auth failed: 401
              anthropic → claude-sonnet-4-5  provider default  no key

Probes run in parallel so a single slow LLM doesn't gate the others;
each has its own 15-second timeout. JSON/YAML output gains a
\`health: { ok, ms, say?, error? }\` field per server LLM so dashboards
get the same liveness signal.

Tests: 25/25 (was 24, +1 new for the failure-path render). Workspace
suite: 2006/2006 across 149 files.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Michal
2026-04-27 12:02:00 +01:00
parent de96af7bf6
commit e4af16477c
2 changed files with 197 additions and 23 deletions

View File

@@ -28,6 +28,7 @@ function baseDeps(overrides?: Partial<StatusCommandDeps>): Partial<StatusCommand
checkHealth: async () => true,
fetchProviders: async () => null,
fetchServerLlms: async () => null,
probeServerLlm: async () => ({ ok: true, ms: 12, say: 'hi' }),
isTTY: false,
...overrides,
};
@@ -210,14 +211,33 @@ describe('status command', () => {
{ id: 'l1', name: 'qwen3-thinking', type: 'openai', model: 'qwen3-thinking', tier: 'fast', url: 'http://x:4000/v1', apiKeyRef: { name: 'litellm', key: 'API_KEY' } },
{ id: 'l2', name: 'sonnet', type: 'anthropic', model: 'claude-sonnet-4-5', tier: 'heavy', url: '', apiKeyRef: null },
],
probeServerLlm: async () => ({ ok: true, ms: 42, say: 'hi' }),
}));
await cmd.parseAsync([], { from: 'user' });
const out = output.join('\n');
expect(out).toContain('Server LLMs: 2 registered');
expect(out).toContain('qwen3-thinking (openai → qwen3-thinking)');
expect(out).toContain('sonnet (anthropic → claude-sonnet-4-5)');
expect(out).toContain('qwen3-thinking');
expect(out).toContain('openai → qwen3-thinking');
expect(out).toContain('sonnet');
expect(out).toContain('anthropic → claude-sonnet-4-5');
expect(out).toMatch(/fast\s+qwen3-thinking/);
expect(out).toMatch(/heavy\s+sonnet/);
// Health probe result rendered for each LLM
expect(out).toContain('✓ "hi" 42ms');
});
it('renders a failed "say hi" probe with the error message', async () => {
const cmd = createStatusCommand(baseDeps({
fetchServerLlms: async () => [
{ id: 'l1', name: 'broken', type: 'openai', model: 'gpt-4o', tier: 'fast', url: 'http://x', apiKeyRef: null },
],
probeServerLlm: async () => ({ ok: false, ms: 5000, error: 'upstream auth failed: 401' }),
}));
await cmd.parseAsync([], { from: 'user' });
const out = output.join('\n');
expect(out).toContain('Server LLMs: 1 registered');
expect(out).toContain('broken');
expect(out).toContain('✗ upstream auth failed: 401');
});
it('renders "none registered" when mcpd has no Llm rows', async () => {
@@ -254,13 +274,22 @@ describe('status command', () => {
expect(capturedToken).toBeNull();
});
it('includes serverLlms in JSON output', async () => {
it('includes serverLlms with probed health in JSON output', async () => {
const llms = [
{ id: 'l1', name: 'qwen3-thinking', type: 'openai', model: 'qwen3-thinking', tier: 'fast', url: 'http://x', apiKeyRef: null },
];
const cmd = createStatusCommand(baseDeps({ fetchServerLlms: async () => llms }));
const cmd = createStatusCommand(baseDeps({
fetchServerLlms: async () => llms,
probeServerLlm: async () => ({ ok: true, ms: 99, say: 'hi' }),
}));
await cmd.parseAsync(['-o', 'json'], { from: 'user' });
const parsed = JSON.parse(output[0]) as { serverLlms?: typeof llms };
expect(parsed.serverLlms).toEqual(llms);
const parsed = JSON.parse(output[0]) as {
serverLlms?: Array<typeof llms[number] & { health: { ok: boolean; ms: number; say?: string } }>;
};
expect(parsed.serverLlms).toHaveLength(1);
expect(parsed.serverLlms![0]).toMatchObject({
name: 'qwen3-thinking',
health: { ok: true, ms: 99, say: 'hi' },
});
});
});