fix: warmup ACP subprocess eagerly to avoid 30s cold-start on status
The pool refactor made ACP client creation lazy, causing the first /llm/health call to spawn + initialize + prompt Gemini in one request (30s+). Now warmup() eagerly starts the subprocess on mcplocal boot. Also fetch models in parallel with LLM health check. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -47,7 +47,7 @@ function defaultCheckHealth(url: string): Promise<boolean> {
|
||||
*/
|
||||
function defaultCheckLlm(mcplocalUrl: string): Promise<string> {
|
||||
return new Promise((resolve) => {
|
||||
const req = http.get(`${mcplocalUrl}/llm/health`, { timeout: 30000 }, (res) => {
|
||||
const req = http.get(`${mcplocalUrl}/llm/health`, { timeout: 45000 }, (res) => {
|
||||
const chunks: Buffer[] = [];
|
||||
res.on('data', (chunk: Buffer) => chunks.push(chunk));
|
||||
res.on('end', () => {
|
||||
@@ -167,8 +167,9 @@ export function createStatusCommand(deps?: Partial<StatusCommandDeps>): Command
|
||||
return;
|
||||
}
|
||||
|
||||
// LLM check with spinner — queries mcplocal's /llm/health endpoint
|
||||
// LLM check + models fetch in parallel — queries mcplocal endpoints
|
||||
const llmPromise = checkLlm(config.mcplocalUrl);
|
||||
const modelsPromise = fetchModels(config.mcplocalUrl);
|
||||
|
||||
if (isTTY) {
|
||||
let frame = 0;
|
||||
@@ -177,7 +178,7 @@ export function createStatusCommand(deps?: Partial<StatusCommandDeps>): Command
|
||||
frame++;
|
||||
}, 80);
|
||||
|
||||
const llmStatus = await llmPromise;
|
||||
const [llmStatus, models] = await Promise.all([llmPromise, modelsPromise]);
|
||||
clearInterval(interval);
|
||||
|
||||
if (llmStatus === 'ok' || llmStatus === 'ok (key stored)') {
|
||||
@@ -185,20 +186,20 @@ export function createStatusCommand(deps?: Partial<StatusCommandDeps>): Command
|
||||
} else {
|
||||
write(`${CLEAR_LINE}LLM: ${llmLabel} ${RED}✗ ${llmStatus}${RESET}\n`);
|
||||
}
|
||||
if (models.length > 0) {
|
||||
log(`${DIM} Available: ${models.join(', ')}${RESET}`);
|
||||
}
|
||||
} else {
|
||||
// Non-TTY: no spinner, just wait and print
|
||||
const llmStatus = await llmPromise;
|
||||
const [llmStatus, models] = await Promise.all([llmPromise, modelsPromise]);
|
||||
if (llmStatus === 'ok' || llmStatus === 'ok (key stored)') {
|
||||
log(`LLM: ${llmLabel} ✓ ${llmStatus}`);
|
||||
} else {
|
||||
log(`LLM: ${llmLabel} ✗ ${llmStatus}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Show available models (non-blocking, best effort)
|
||||
const models = await fetchModels(config.mcplocalUrl);
|
||||
if (models.length > 0) {
|
||||
log(`${DIM} Available: ${models.join(', ')}${RESET}`);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -28,7 +28,9 @@ export async function createProviderFromConfig(
|
||||
const cfg: GeminiAcpConfig = {};
|
||||
if (config.binaryPath) cfg.binaryPath = config.binaryPath;
|
||||
if (config.model) cfg.defaultModel = config.model;
|
||||
registry.register(new GeminiAcpProvider(cfg));
|
||||
const provider = new GeminiAcpProvider(cfg);
|
||||
provider.warmup();
|
||||
registry.register(provider);
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
@@ -83,6 +83,18 @@ export class GeminiAcpProvider implements LlmProvider {
|
||||
this.pool.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Eagerly spawn the default model's ACP subprocess so it's ready
|
||||
* for the first request (avoids 30s cold-start on health checks).
|
||||
*/
|
||||
warmup(): void {
|
||||
const entry = this.getOrCreateEntry(this.defaultModel);
|
||||
// Fire-and-forget: start the subprocess initialization in the background
|
||||
entry.client.ensureReady().catch(() => {
|
||||
// Ignore errors — next request will retry
|
||||
});
|
||||
}
|
||||
|
||||
/** Number of active pool entries (for testing). */
|
||||
get poolSize(): number {
|
||||
return this.pool.size;
|
||||
|
||||
Reference in New Issue
Block a user