diff --git a/src/cli/src/commands/status.ts b/src/cli/src/commands/status.ts index 567db36..c77e472 100644 --- a/src/cli/src/commands/status.ts +++ b/src/cli/src/commands/status.ts @@ -44,6 +44,19 @@ interface ServerLlm { apiKeyRef?: { name: string; key: string } | null; } +/** + * Result of a live "say hi" probe against a server LLM. `ok` says we got a + * 200 + non-empty content back; `say` is the trimmed first 16 chars of the + * reply for the user to spot-check (most LLMs say "hi", a misbehaving one + * says "Hello! How can I assist…"). `ms` is end-to-end including TLS. + */ +export interface ServerLlmHealth { + ok: boolean; + ms: number; + say?: string; + error?: string; +} + export interface StatusCommandDeps { configDeps: Partial; credentialsDeps: Partial; @@ -62,6 +75,12 @@ export interface StatusCommandDeps { * command stays printable even when mcpd is unreachable. */ fetchServerLlms: (mcpdUrl: string, token: string | null) => Promise; + /** + * Probe a single server LLM with a tiny "say hi" prompt to check it's + * actually serving inference. Used per-LLM in parallel after the fetch. + * Always resolves (never throws) so one bad LLM doesn't sink the section. + */ + probeServerLlm: (mcpdUrl: string, name: string, token: string | null) => Promise; isTTY: boolean; } @@ -205,6 +224,85 @@ function defaultFetchServerLlms(mcpdUrl: string, token: string | null): Promise< }); } +/** + * POST a tiny "say hi" prompt to /api/v1/llms//infer and decide if + * the LLM actually serves inference. Returns ok=true when the response is + * 200 with a non-empty assistant message; otherwise ok=false with an + * error string suitable for one-line display. + */ +const PROBE_TIMEOUT_MS = 15_000; +const PROBE_BODY = JSON.stringify({ + messages: [{ role: 'user', content: "Say exactly the word 'hi' and nothing else." }], + max_tokens: 8, + temperature: 0, +}); + +function defaultProbeServerLlm(mcpdUrl: string, name: string, token: string | null): Promise { + return new Promise((resolve) => { + const started = Date.now(); + const u = new URL(`${mcpdUrl}/api/v1/llms/${encodeURIComponent(name)}/infer`); + const driver = u.protocol === 'https:' ? https : http; + const headers: Record = { + 'Content-Type': 'application/json', + Accept: 'application/json', + 'Content-Length': String(Buffer.byteLength(PROBE_BODY)), + }; + if (token !== null) headers['Authorization'] = `Bearer ${token}`; + + let req: http.ClientRequest; + try { + req = driver.request({ + hostname: u.hostname, + port: u.port || (u.protocol === 'https:' ? 443 : 80), + path: u.pathname + u.search, + method: 'POST', + headers, + timeout: PROBE_TIMEOUT_MS, + }, (res) => { + const chunks: Buffer[] = []; + res.on('data', (c: Buffer) => chunks.push(c)); + res.on('end', () => { + const ms = Date.now() - started; + const body = Buffer.concat(chunks).toString('utf-8'); + if ((res.statusCode ?? 0) !== 200) { + // Pull out just the error message if the body is JSON, else the + // raw status — keeps the line tidy. + let msg = `HTTP ${String(res.statusCode ?? 0)}`; + try { + const parsed = JSON.parse(body) as { error?: string }; + if (typeof parsed.error === 'string') msg = parsed.error; + } catch { /* not JSON, fall through */ } + resolve({ ok: false, ms, error: msg.slice(0, 80) }); + return; + } + let content = ''; + try { + const parsed = JSON.parse(body) as { + choices?: Array<{ message?: { content?: string } }>; + }; + content = parsed.choices?.[0]?.message?.content?.trim() ?? ''; + } catch { + resolve({ ok: false, ms, error: 'invalid response body' }); + return; + } + if (content === '') { + resolve({ ok: false, ms, error: 'empty content' }); + return; + } + resolve({ ok: true, ms, say: content.slice(0, 16) }); + }); + }); + } catch { + resolve({ ok: false, ms: Date.now() - started, error: 'request failed' }); + return; + } + req.on('error', (e) => resolve({ ok: false, ms: Date.now() - started, error: e.message.slice(0, 80) })); + req.on('timeout', () => { req.destroy(); resolve({ ok: false, ms: Date.now() - started, error: 'timeout' }); }); + req.write(PROBE_BODY); + req.end(); + }); +} + const SPINNER_FRAMES = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏']; const defaultDeps: StatusCommandDeps = { @@ -217,6 +315,7 @@ const defaultDeps: StatusCommandDeps = { fetchModels: defaultFetchModels, fetchProviders: defaultFetchProviders, fetchServerLlms: defaultFetchServerLlms, + probeServerLlm: defaultProbeServerLlm, isTTY: process.stdout.isTTY ?? false, }; @@ -278,7 +377,7 @@ function formatProviderStatus(name: string, info: ProvidersInfo, ansi: boolean): } export function createStatusCommand(deps?: Partial): Command { - const { configDeps, credentialsDeps, log, write, checkHealth, checkLlm, fetchModels, fetchProviders, fetchServerLlms, isTTY } = { ...defaultDeps, ...deps }; + const { configDeps, credentialsDeps, log, write, checkHealth, checkLlm, fetchModels, fetchProviders, fetchServerLlms, probeServerLlm, isTTY } = { ...defaultDeps, ...deps }; return new Command('status') .description('Show mcpctl status and connectivity') @@ -292,14 +391,25 @@ export function createStatusCommand(deps?: Partial): Command if (opts.output !== 'table') { // JSON/YAML: run everything in parallel, wait, output at once + const token = creds?.token ?? null; const [mcplocalReachable, mcpdReachable, llmStatus, providersInfo, serverLlms] = await Promise.all([ checkHealth(config.mcplocalUrl), checkHealth(config.mcpdUrl), llmLabel ? checkLlm(config.mcplocalUrl) : Promise.resolve(null), multiProvider ? fetchProviders(config.mcplocalUrl) : Promise.resolve(null), - fetchServerLlms(config.mcpdUrl, creds?.token ?? null), + fetchServerLlms(config.mcpdUrl, token), ]); + // Probe each server LLM in parallel — adds 0-2 sec to JSON mode but + // gives consumers (scripts, dashboards) the same liveness signal as + // the table view. + const serverLlmsWithHealth = serverLlms !== null + ? await Promise.all(serverLlms.map(async (l) => ({ + ...l, + health: await probeServerLlm(config.mcpdUrl, l.name, token), + }))) + : null; + const llm = llmLabel ? llmStatus === 'ok' ? llmLabel : `${llmLabel} (${llmStatus})` : null; @@ -316,7 +426,7 @@ export function createStatusCommand(deps?: Partial): Command llm, llmStatus, ...(providersInfo ? { providers: providersInfo } : {}), - ...(serverLlms !== null ? { serverLlms } : {}), + ...(serverLlmsWithHealth !== null ? { serverLlms: serverLlmsWithHealth } : {}), }; log(opts.output === 'json' ? formatJson(status) : formatYaml(status)); @@ -341,11 +451,12 @@ export function createStatusCommand(deps?: Partial): Command // Server LLMs (mcpd-managed) — fetched in parallel regardless of the // local-LLM config, so the section renders even on machines without // a configured client-side provider. - const serverLlmsPromise = fetchServerLlms(config.mcpdUrl, creds?.token ?? null); + const token = creds?.token ?? null; + const serverLlmsPromise = fetchServerLlms(config.mcpdUrl, token); if (!llmLabel) { log(`LLM: not configured (run 'mcpctl config setup')`); - await renderServerLlmsSection(serverLlmsPromise, isTTY); + await renderServerLlmsSection(serverLlmsPromise, config.mcpdUrl, token, isTTY); return; } @@ -409,17 +520,20 @@ export function createStatusCommand(deps?: Partial): Command } } - await renderServerLlmsSection(serverLlmsPromise, isTTY); + await renderServerLlmsSection(serverLlmsPromise, config.mcpdUrl, token, isTTY); }); /** - * Print a "Server LLMs:" section listing mcpd-managed Llm rows by tier. - * These are the rows created via `mcpctl create llm` — distinct from the - * mcplocal-side providers shown by the existing "LLM:" lines above. The - * caller awaits a pre-launched promise so this doesn't add round-trips. + * Print a "Server LLMs:" section listing mcpd-managed Llm rows by tier + * with a per-LLM "say hi" liveness probe. Distinct from the mcplocal-side + * providers shown by the existing "LLM:" lines above. The caller awaits a + * pre-launched promise so this doesn't add fetch round-trips, but the + * probe itself runs here (after the user has the rest of the screen). */ async function renderServerLlmsSection( serverLlmsPromise: Promise, + mcpdUrl: string, + token: string | null, ansi: boolean, ): Promise { const llms = await serverLlmsPromise; @@ -433,6 +547,14 @@ export function createStatusCommand(deps?: Partial): Command return; } + log(`Server LLMs: ${String(llms.length)} registered ${ansi ? DIM : ''}(probing live "say hi"...)${ansi ? RESET : ''}`); + + // Run all probes in parallel — one slow LLM doesn't block the others. + const healthByName = new Map(); + await Promise.all(llms.map(async (l) => { + healthByName.set(l.name, await probeServerLlm(mcpdUrl, l.name, token)); + })); + const byTier = new Map(); for (const l of llms) { const arr = byTier.get(l.tier) ?? []; @@ -440,19 +562,42 @@ export function createStatusCommand(deps?: Partial): Command byTier.set(l.tier, arr); } - log(`Server LLMs: ${String(llms.length)} registered`); // Print tiers in a stable order — fast/heavy first, then anything else. const tierOrder = ['fast', 'heavy', ...[...byTier.keys()].filter((t) => t !== 'fast' && t !== 'heavy').sort()]; for (const tier of tierOrder) { const rows = byTier.get(tier); if (rows === undefined || rows.length === 0) continue; - const formatted = rows.map((r) => { - const upstream = r.url !== '' ? r.url : 'provider default'; - const auth = r.apiKeyRef ? `key:${r.apiKeyRef.name}/${r.apiKeyRef.key}` : 'no key'; - const line = `${r.name} (${r.type} → ${r.model}) ${upstream} ${auth}`; - return ansi ? `${DIM}${line}${RESET}` : line; - }); + const formatted = rows.map((r) => formatServerLlmLine(r, healthByName.get(r.name), ansi)); log(` ${tier.padEnd(6)} ${formatted.join('\n ')}`); } } } + +/** + * Format a single server-LLM row plus its health-probe outcome on one line. + * Exported via module scope (not closure) so it stays cheap to test in + * isolation; takes \`ansi\` rather than reading a TTY at call time. + */ +function formatServerLlmLine(r: ServerLlm, h: ServerLlmHealth | undefined, ansi: boolean): string { + const upstream = r.url !== '' ? r.url : 'provider default'; + const auth = r.apiKeyRef ? `key:${r.apiKeyRef.name}/${r.apiKeyRef.key}` : 'no key'; + let healthStr: string; + if (h === undefined) { + healthStr = ansi ? `${DIM}? probe skipped${RESET}` : '? probe skipped'; + } else if (h.ok) { + const reply = h.say !== undefined ? `"${h.say}"` : 'ok'; + const ms = `${String(h.ms)}ms`; + healthStr = ansi ? `${GREEN}✓ ${reply} ${DIM}${ms}${RESET}` : `✓ ${reply} ${ms}`; + } else { + const err = h.error ?? 'failed'; + healthStr = ansi ? `${RED}✗ ${err}${RESET}` : `✗ ${err}`; + } + const meta = `${r.type} → ${r.model}`; + // Two-line layout: name + health on top, dim metadata indented below. + // Keeps the at-a-glance signal (✓/✗) close to the LLM name. + const head = `${r.name} ${healthStr}`; + const tail = ansi + ? `${DIM}${meta} ${upstream} ${auth}${RESET}` + : `${meta} ${upstream} ${auth}`; + return `${head}\n ${tail}`; +} diff --git a/src/cli/tests/commands/status.test.ts b/src/cli/tests/commands/status.test.ts index 469f158..4313a7e 100644 --- a/src/cli/tests/commands/status.test.ts +++ b/src/cli/tests/commands/status.test.ts @@ -28,6 +28,7 @@ function baseDeps(overrides?: Partial): Partial true, fetchProviders: async () => null, fetchServerLlms: async () => null, + probeServerLlm: async () => ({ ok: true, ms: 12, say: 'hi' }), isTTY: false, ...overrides, }; @@ -210,14 +211,33 @@ describe('status command', () => { { id: 'l1', name: 'qwen3-thinking', type: 'openai', model: 'qwen3-thinking', tier: 'fast', url: 'http://x:4000/v1', apiKeyRef: { name: 'litellm', key: 'API_KEY' } }, { id: 'l2', name: 'sonnet', type: 'anthropic', model: 'claude-sonnet-4-5', tier: 'heavy', url: '', apiKeyRef: null }, ], + probeServerLlm: async () => ({ ok: true, ms: 42, say: 'hi' }), })); await cmd.parseAsync([], { from: 'user' }); const out = output.join('\n'); expect(out).toContain('Server LLMs: 2 registered'); - expect(out).toContain('qwen3-thinking (openai → qwen3-thinking)'); - expect(out).toContain('sonnet (anthropic → claude-sonnet-4-5)'); + expect(out).toContain('qwen3-thinking'); + expect(out).toContain('openai → qwen3-thinking'); + expect(out).toContain('sonnet'); + expect(out).toContain('anthropic → claude-sonnet-4-5'); expect(out).toMatch(/fast\s+qwen3-thinking/); expect(out).toMatch(/heavy\s+sonnet/); + // Health probe result rendered for each LLM + expect(out).toContain('✓ "hi" 42ms'); + }); + + it('renders a failed "say hi" probe with the error message', async () => { + const cmd = createStatusCommand(baseDeps({ + fetchServerLlms: async () => [ + { id: 'l1', name: 'broken', type: 'openai', model: 'gpt-4o', tier: 'fast', url: 'http://x', apiKeyRef: null }, + ], + probeServerLlm: async () => ({ ok: false, ms: 5000, error: 'upstream auth failed: 401' }), + })); + await cmd.parseAsync([], { from: 'user' }); + const out = output.join('\n'); + expect(out).toContain('Server LLMs: 1 registered'); + expect(out).toContain('broken'); + expect(out).toContain('✗ upstream auth failed: 401'); }); it('renders "none registered" when mcpd has no Llm rows', async () => { @@ -254,13 +274,22 @@ describe('status command', () => { expect(capturedToken).toBeNull(); }); - it('includes serverLlms in JSON output', async () => { + it('includes serverLlms with probed health in JSON output', async () => { const llms = [ { id: 'l1', name: 'qwen3-thinking', type: 'openai', model: 'qwen3-thinking', tier: 'fast', url: 'http://x', apiKeyRef: null }, ]; - const cmd = createStatusCommand(baseDeps({ fetchServerLlms: async () => llms })); + const cmd = createStatusCommand(baseDeps({ + fetchServerLlms: async () => llms, + probeServerLlm: async () => ({ ok: true, ms: 99, say: 'hi' }), + })); await cmd.parseAsync(['-o', 'json'], { from: 'user' }); - const parsed = JSON.parse(output[0]) as { serverLlms?: typeof llms }; - expect(parsed.serverLlms).toEqual(llms); + const parsed = JSON.parse(output[0]) as { + serverLlms?: Array; + }; + expect(parsed.serverLlms).toHaveLength(1); + expect(parsed.serverLlms![0]).toMatchObject({ + name: 'qwen3-thinking', + health: { ok: true, ms: 99, say: 'hi' }, + }); }); });