fix: cache LLM health check result for 10 minutes
Avoids burning tokens on every `mcpctl status` call. The /llm/health endpoint now caches successful results for 10min, errors for 1min. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -81,31 +81,47 @@ export async function createHttpServer(
|
|||||||
reply.code(200).send({ status: 'ok' });
|
reply.code(200).send({ status: 'ok' });
|
||||||
});
|
});
|
||||||
|
|
||||||
// LLM health check — tests the active provider with a tiny prompt
|
// LLM health check — cached to avoid burning tokens on every call.
|
||||||
|
// Does a real inference call at most once per 10 minutes.
|
||||||
|
let llmHealthCache: { result: Record<string, unknown>; expiresAt: number } | null = null;
|
||||||
|
const LLM_HEALTH_CACHE_MS = 10 * 60 * 1000; // 10 minutes
|
||||||
|
|
||||||
app.get('/llm/health', async (_request, reply) => {
|
app.get('/llm/health', async (_request, reply) => {
|
||||||
const provider = deps.providerRegistry?.getActive() ?? null;
|
const provider = deps.providerRegistry?.getActive() ?? null;
|
||||||
if (!provider) {
|
if (!provider) {
|
||||||
reply.code(200).send({ status: 'not configured' });
|
reply.code(200).send({ status: 'not configured' });
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Return cached result if fresh
|
||||||
|
if (llmHealthCache && Date.now() < llmHealthCache.expiresAt) {
|
||||||
|
reply.code(200).send(llmHealthCache.result);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const result = await provider.complete({
|
const result = await provider.complete({
|
||||||
messages: [{ role: 'user', content: 'Respond with exactly: ok' }],
|
messages: [{ role: 'user', content: 'Respond with exactly: ok' }],
|
||||||
maxTokens: 10,
|
maxTokens: 10,
|
||||||
});
|
});
|
||||||
const ok = result.content.trim().toLowerCase().includes('ok');
|
const ok = result.content.trim().toLowerCase().includes('ok');
|
||||||
reply.code(200).send({
|
const response = {
|
||||||
status: ok ? 'ok' : 'unexpected response',
|
status: ok ? 'ok' : 'unexpected response',
|
||||||
provider: provider.name,
|
provider: provider.name,
|
||||||
response: result.content.trim().slice(0, 100),
|
response: result.content.trim().slice(0, 100),
|
||||||
});
|
};
|
||||||
|
llmHealthCache = { result: response, expiresAt: Date.now() + LLM_HEALTH_CACHE_MS };
|
||||||
|
reply.code(200).send(response);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
const msg = (err as Error).message ?? String(err);
|
const msg = (err as Error).message ?? String(err);
|
||||||
reply.code(200).send({
|
const response = {
|
||||||
status: 'error',
|
status: 'error',
|
||||||
provider: provider.name,
|
provider: provider.name,
|
||||||
error: msg.slice(0, 200),
|
error: msg.slice(0, 200),
|
||||||
});
|
};
|
||||||
|
// Cache errors for 1 minute only (retry sooner)
|
||||||
|
llmHealthCache = { result: response, expiresAt: Date.now() + 60_000 };
|
||||||
|
reply.code(200).send(response);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user