From 0824f8e6355ac3818fc3ec6877d75b7cf92d5ca9 Mon Sep 17 00:00:00 2001 From: Michal Date: Wed, 25 Feb 2026 01:39:15 +0000 Subject: [PATCH] fix: cache LLM health check result for 10 minutes Avoids burning tokens on every `mcpctl status` call. The /llm/health endpoint now caches successful results for 10min, errors for 1min. Co-Authored-By: Claude Opus 4.6 --- src/mcplocal/src/http/server.ts | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/src/mcplocal/src/http/server.ts b/src/mcplocal/src/http/server.ts index 2c7e665..9ffd13d 100644 --- a/src/mcplocal/src/http/server.ts +++ b/src/mcplocal/src/http/server.ts @@ -81,31 +81,47 @@ export async function createHttpServer( reply.code(200).send({ status: 'ok' }); }); - // LLM health check — tests the active provider with a tiny prompt + // LLM health check — cached to avoid burning tokens on every call. + // Does a real inference call at most once per 10 minutes. + let llmHealthCache: { result: Record; expiresAt: number } | null = null; + const LLM_HEALTH_CACHE_MS = 10 * 60 * 1000; // 10 minutes + app.get('/llm/health', async (_request, reply) => { const provider = deps.providerRegistry?.getActive() ?? null; if (!provider) { reply.code(200).send({ status: 'not configured' }); return; } + + // Return cached result if fresh + if (llmHealthCache && Date.now() < llmHealthCache.expiresAt) { + reply.code(200).send(llmHealthCache.result); + return; + } + try { const result = await provider.complete({ messages: [{ role: 'user', content: 'Respond with exactly: ok' }], maxTokens: 10, }); const ok = result.content.trim().toLowerCase().includes('ok'); - reply.code(200).send({ + const response = { status: ok ? 'ok' : 'unexpected response', provider: provider.name, response: result.content.trim().slice(0, 100), - }); + }; + llmHealthCache = { result: response, expiresAt: Date.now() + LLM_HEALTH_CACHE_MS }; + reply.code(200).send(response); } catch (err) { const msg = (err as Error).message ?? String(err); - reply.code(200).send({ + const response = { status: 'error', provider: provider.name, error: msg.slice(0, 200), - }); + }; + // Cache errors for 1 minute only (retry sooner) + llmHealthCache = { result: response, expiresAt: Date.now() + 60_000 }; + reply.code(200).send(response); } });