feat(cli): live "say hi" probe for server LLMs in mcpctl status
Some checks failed
CI/CD / lint (pull_request) Successful in 55s
CI/CD / test (pull_request) Successful in 1m13s
CI/CD / typecheck (pull_request) Successful in 3m10s
CI/CD / smoke (pull_request) Failing after 1m46s
CI/CD / build (pull_request) Successful in 3m24s
CI/CD / publish (pull_request) Has been skipped
Some checks failed
CI/CD / lint (pull_request) Successful in 55s
CI/CD / test (pull_request) Successful in 1m13s
CI/CD / typecheck (pull_request) Successful in 3m10s
CI/CD / smoke (pull_request) Failing after 1m46s
CI/CD / build (pull_request) Successful in 3m24s
CI/CD / publish (pull_request) Has been skipped
Status was showing the server-side LLM list but not whether each one
actually serves inference. This adds a per-LLM probe that POSTs a
tiny prompt to /api/v1/llms/<name>/infer:
messages: [{ role: 'user', content: "Say exactly the word 'hi' and nothing else." }]
max_tokens: 8, temperature: 0
Each registered LLM gets a one-line health line:
Server LLMs: 2 registered (probing live "say hi"...)
fast qwen3-thinking ✓ "hi" 312ms
openai → qwen3-thinking http://litellm.../v1 key:litellm/API_KEY
heavy sonnet ✗ upstream auth failed: 401
anthropic → claude-sonnet-4-5 provider default no key
Probes run in parallel so a single slow LLM doesn't gate the others;
each has its own 15-second timeout. JSON/YAML output gains a
\`health: { ok, ms, say?, error? }\` field per server LLM so dashboards
get the same liveness signal.
Tests: 25/25 (was 24, +1 new for the failure-path render). Workspace
suite: 2006/2006 across 149 files.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -28,6 +28,7 @@ function baseDeps(overrides?: Partial<StatusCommandDeps>): Partial<StatusCommand
|
||||
checkHealth: async () => true,
|
||||
fetchProviders: async () => null,
|
||||
fetchServerLlms: async () => null,
|
||||
probeServerLlm: async () => ({ ok: true, ms: 12, say: 'hi' }),
|
||||
isTTY: false,
|
||||
...overrides,
|
||||
};
|
||||
@@ -210,14 +211,33 @@ describe('status command', () => {
|
||||
{ id: 'l1', name: 'qwen3-thinking', type: 'openai', model: 'qwen3-thinking', tier: 'fast', url: 'http://x:4000/v1', apiKeyRef: { name: 'litellm', key: 'API_KEY' } },
|
||||
{ id: 'l2', name: 'sonnet', type: 'anthropic', model: 'claude-sonnet-4-5', tier: 'heavy', url: '', apiKeyRef: null },
|
||||
],
|
||||
probeServerLlm: async () => ({ ok: true, ms: 42, say: 'hi' }),
|
||||
}));
|
||||
await cmd.parseAsync([], { from: 'user' });
|
||||
const out = output.join('\n');
|
||||
expect(out).toContain('Server LLMs: 2 registered');
|
||||
expect(out).toContain('qwen3-thinking (openai → qwen3-thinking)');
|
||||
expect(out).toContain('sonnet (anthropic → claude-sonnet-4-5)');
|
||||
expect(out).toContain('qwen3-thinking');
|
||||
expect(out).toContain('openai → qwen3-thinking');
|
||||
expect(out).toContain('sonnet');
|
||||
expect(out).toContain('anthropic → claude-sonnet-4-5');
|
||||
expect(out).toMatch(/fast\s+qwen3-thinking/);
|
||||
expect(out).toMatch(/heavy\s+sonnet/);
|
||||
// Health probe result rendered for each LLM
|
||||
expect(out).toContain('✓ "hi" 42ms');
|
||||
});
|
||||
|
||||
it('renders a failed "say hi" probe with the error message', async () => {
|
||||
const cmd = createStatusCommand(baseDeps({
|
||||
fetchServerLlms: async () => [
|
||||
{ id: 'l1', name: 'broken', type: 'openai', model: 'gpt-4o', tier: 'fast', url: 'http://x', apiKeyRef: null },
|
||||
],
|
||||
probeServerLlm: async () => ({ ok: false, ms: 5000, error: 'upstream auth failed: 401' }),
|
||||
}));
|
||||
await cmd.parseAsync([], { from: 'user' });
|
||||
const out = output.join('\n');
|
||||
expect(out).toContain('Server LLMs: 1 registered');
|
||||
expect(out).toContain('broken');
|
||||
expect(out).toContain('✗ upstream auth failed: 401');
|
||||
});
|
||||
|
||||
it('renders "none registered" when mcpd has no Llm rows', async () => {
|
||||
@@ -254,13 +274,22 @@ describe('status command', () => {
|
||||
expect(capturedToken).toBeNull();
|
||||
});
|
||||
|
||||
it('includes serverLlms in JSON output', async () => {
|
||||
it('includes serverLlms with probed health in JSON output', async () => {
|
||||
const llms = [
|
||||
{ id: 'l1', name: 'qwen3-thinking', type: 'openai', model: 'qwen3-thinking', tier: 'fast', url: 'http://x', apiKeyRef: null },
|
||||
];
|
||||
const cmd = createStatusCommand(baseDeps({ fetchServerLlms: async () => llms }));
|
||||
const cmd = createStatusCommand(baseDeps({
|
||||
fetchServerLlms: async () => llms,
|
||||
probeServerLlm: async () => ({ ok: true, ms: 99, say: 'hi' }),
|
||||
}));
|
||||
await cmd.parseAsync(['-o', 'json'], { from: 'user' });
|
||||
const parsed = JSON.parse(output[0]) as { serverLlms?: typeof llms };
|
||||
expect(parsed.serverLlms).toEqual(llms);
|
||||
const parsed = JSON.parse(output[0]) as {
|
||||
serverLlms?: Array<typeof llms[number] & { health: { ok: boolean; ms: number; say?: string } }>;
|
||||
};
|
||||
expect(parsed.serverLlms).toHaveLength(1);
|
||||
expect(parsed.serverLlms![0]).toMatchObject({
|
||||
name: 'qwen3-thinking',
|
||||
health: { ok: true, ms: 99, say: 'hi' },
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user