From 0824f8e6355ac3818fc3ec6877d75b7cf92d5ca9 Mon Sep 17 00:00:00 2001
From: Michal <michal@itaz.eu>
Date: Wed, 25 Feb 2026 01:39:15 +0000
Subject: [PATCH] fix: cache LLM health check result for 10 minutes

Avoids burning tokens on every `mcpctl status` call. The /llm/health
endpoint now caches successful results for 10min, errors for 1min.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/mcplocal/src/http/server.ts | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/src/mcplocal/src/http/server.ts b/src/mcplocal/src/http/server.ts
index 2c7e665..9ffd13d 100644
--- a/src/mcplocal/src/http/server.ts
+++ b/src/mcplocal/src/http/server.ts
@@ -81,31 +81,47 @@ export async function createHttpServer(
     reply.code(200).send({ status: 'ok' });
   });
 
-  // LLM health check — tests the active provider with a tiny prompt
+  // LLM health check — cached to avoid burning tokens on every call.
+  // Does a real inference call at most once per 10 minutes.
+  let llmHealthCache: { result: Record<string, unknown>; expiresAt: number } | null = null;
+  const LLM_HEALTH_CACHE_MS = 10 * 60 * 1000; // 10 minutes
+
   app.get('/llm/health', async (_request, reply) => {
     const provider = deps.providerRegistry?.getActive() ?? null;
     if (!provider) {
       reply.code(200).send({ status: 'not configured' });
       return;
     }
+
+    // Return cached result if fresh
+    if (llmHealthCache && Date.now() < llmHealthCache.expiresAt) {
+      reply.code(200).send(llmHealthCache.result);
+      return;
+    }
+
     try {
       const result = await provider.complete({
         messages: [{ role: 'user', content: 'Respond with exactly: ok' }],
         maxTokens: 10,
       });
       const ok = result.content.trim().toLowerCase().includes('ok');
-      reply.code(200).send({
+      const response = {
         status: ok ? 'ok' : 'unexpected response',
         provider: provider.name,
         response: result.content.trim().slice(0, 100),
-      });
+      };
+      llmHealthCache = { result: response, expiresAt: Date.now() + LLM_HEALTH_CACHE_MS };
+      reply.code(200).send(response);
     } catch (err) {
       const msg = (err as Error).message ?? String(err);
-      reply.code(200).send({
+      const response = {
         status: 'error',
         provider: provider.name,
         error: msg.slice(0, 200),
-      });
+      };
+      // Cache errors for 1 minute only (retry sooner)
+      llmHealthCache = { result: response, expiresAt: Date.now() + 60_000 };
+      reply.code(200).send(response);
     }
   });