fix: cache LLM health check result for 10 minutes
Some checks are pending
CI / lint (push) Waiting to run
CI / typecheck (push) Waiting to run
CI / test (push) Waiting to run
CI / build (push) Blocked by required conditions
CI / package (push) Blocked by required conditions

Avoids burning tokens on every `mcpctl status` call. The /llm/health
endpoint now caches successful results for 10min, errors for 1min.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Michal
2026-02-25 01:39:15 +00:00
parent 9bd3127519
commit 0824f8e635

View File

@@ -81,31 +81,47 @@ export async function createHttpServer(
reply.code(200).send({ status: 'ok' }); reply.code(200).send({ status: 'ok' });
}); });
// LLM health check — tests the active provider with a tiny prompt // LLM health check — cached to avoid burning tokens on every call.
// Does a real inference call at most once per 10 minutes.
let llmHealthCache: { result: Record<string, unknown>; expiresAt: number } | null = null;
const LLM_HEALTH_CACHE_MS = 10 * 60 * 1000; // 10 minutes
app.get('/llm/health', async (_request, reply) => { app.get('/llm/health', async (_request, reply) => {
const provider = deps.providerRegistry?.getActive() ?? null; const provider = deps.providerRegistry?.getActive() ?? null;
if (!provider) { if (!provider) {
reply.code(200).send({ status: 'not configured' }); reply.code(200).send({ status: 'not configured' });
return; return;
} }
// Return cached result if fresh
if (llmHealthCache && Date.now() < llmHealthCache.expiresAt) {
reply.code(200).send(llmHealthCache.result);
return;
}
try { try {
const result = await provider.complete({ const result = await provider.complete({
messages: [{ role: 'user', content: 'Respond with exactly: ok' }], messages: [{ role: 'user', content: 'Respond with exactly: ok' }],
maxTokens: 10, maxTokens: 10,
}); });
const ok = result.content.trim().toLowerCase().includes('ok'); const ok = result.content.trim().toLowerCase().includes('ok');
reply.code(200).send({ const response = {
status: ok ? 'ok' : 'unexpected response', status: ok ? 'ok' : 'unexpected response',
provider: provider.name, provider: provider.name,
response: result.content.trim().slice(0, 100), response: result.content.trim().slice(0, 100),
}); };
llmHealthCache = { result: response, expiresAt: Date.now() + LLM_HEALTH_CACHE_MS };
reply.code(200).send(response);
} catch (err) { } catch (err) {
const msg = (err as Error).message ?? String(err); const msg = (err as Error).message ?? String(err);
reply.code(200).send({ const response = {
status: 'error', status: 'error',
provider: provider.name, provider: provider.name,
error: msg.slice(0, 200), error: msg.slice(0, 200),
}); };
// Cache errors for 1 minute only (retry sooner)
llmHealthCache = { result: response, expiresAt: Date.now() + 60_000 };
reply.code(200).send(response);
} }
}); });