fix: warmup ACP subprocess eagerly to avoid 30s cold-start on status
The pool refactor made ACP client creation lazy, causing the first /llm/health call to spawn + initialize + prompt Gemini in one request (30s+). Now warmup() eagerly starts the subprocess on mcplocal boot. Also fetch models in parallel with LLM health check. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -47,7 +47,7 @@ function defaultCheckHealth(url: string): Promise<boolean> {
|
|||||||
*/
|
*/
|
||||||
function defaultCheckLlm(mcplocalUrl: string): Promise<string> {
|
function defaultCheckLlm(mcplocalUrl: string): Promise<string> {
|
||||||
return new Promise((resolve) => {
|
return new Promise((resolve) => {
|
||||||
const req = http.get(`${mcplocalUrl}/llm/health`, { timeout: 30000 }, (res) => {
|
const req = http.get(`${mcplocalUrl}/llm/health`, { timeout: 45000 }, (res) => {
|
||||||
const chunks: Buffer[] = [];
|
const chunks: Buffer[] = [];
|
||||||
res.on('data', (chunk: Buffer) => chunks.push(chunk));
|
res.on('data', (chunk: Buffer) => chunks.push(chunk));
|
||||||
res.on('end', () => {
|
res.on('end', () => {
|
||||||
@@ -167,8 +167,9 @@ export function createStatusCommand(deps?: Partial<StatusCommandDeps>): Command
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// LLM check with spinner — queries mcplocal's /llm/health endpoint
|
// LLM check + models fetch in parallel — queries mcplocal endpoints
|
||||||
const llmPromise = checkLlm(config.mcplocalUrl);
|
const llmPromise = checkLlm(config.mcplocalUrl);
|
||||||
|
const modelsPromise = fetchModels(config.mcplocalUrl);
|
||||||
|
|
||||||
if (isTTY) {
|
if (isTTY) {
|
||||||
let frame = 0;
|
let frame = 0;
|
||||||
@@ -177,7 +178,7 @@ export function createStatusCommand(deps?: Partial<StatusCommandDeps>): Command
|
|||||||
frame++;
|
frame++;
|
||||||
}, 80);
|
}, 80);
|
||||||
|
|
||||||
const llmStatus = await llmPromise;
|
const [llmStatus, models] = await Promise.all([llmPromise, modelsPromise]);
|
||||||
clearInterval(interval);
|
clearInterval(interval);
|
||||||
|
|
||||||
if (llmStatus === 'ok' || llmStatus === 'ok (key stored)') {
|
if (llmStatus === 'ok' || llmStatus === 'ok (key stored)') {
|
||||||
@@ -185,20 +186,20 @@ export function createStatusCommand(deps?: Partial<StatusCommandDeps>): Command
|
|||||||
} else {
|
} else {
|
||||||
write(`${CLEAR_LINE}LLM: ${llmLabel} ${RED}✗ ${llmStatus}${RESET}\n`);
|
write(`${CLEAR_LINE}LLM: ${llmLabel} ${RED}✗ ${llmStatus}${RESET}\n`);
|
||||||
}
|
}
|
||||||
|
if (models.length > 0) {
|
||||||
|
log(`${DIM} Available: ${models.join(', ')}${RESET}`);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// Non-TTY: no spinner, just wait and print
|
// Non-TTY: no spinner, just wait and print
|
||||||
const llmStatus = await llmPromise;
|
const [llmStatus, models] = await Promise.all([llmPromise, modelsPromise]);
|
||||||
if (llmStatus === 'ok' || llmStatus === 'ok (key stored)') {
|
if (llmStatus === 'ok' || llmStatus === 'ok (key stored)') {
|
||||||
log(`LLM: ${llmLabel} ✓ ${llmStatus}`);
|
log(`LLM: ${llmLabel} ✓ ${llmStatus}`);
|
||||||
} else {
|
} else {
|
||||||
log(`LLM: ${llmLabel} ✗ ${llmStatus}`);
|
log(`LLM: ${llmLabel} ✗ ${llmStatus}`);
|
||||||
}
|
}
|
||||||
}
|
if (models.length > 0) {
|
||||||
|
log(`${DIM} Available: ${models.join(', ')}${RESET}`);
|
||||||
// Show available models (non-blocking, best effort)
|
}
|
||||||
const models = await fetchModels(config.mcplocalUrl);
|
|
||||||
if (models.length > 0) {
|
|
||||||
log(`${DIM} Available: ${models.join(', ')}${RESET}`);
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -28,7 +28,9 @@ export async function createProviderFromConfig(
|
|||||||
const cfg: GeminiAcpConfig = {};
|
const cfg: GeminiAcpConfig = {};
|
||||||
if (config.binaryPath) cfg.binaryPath = config.binaryPath;
|
if (config.binaryPath) cfg.binaryPath = config.binaryPath;
|
||||||
if (config.model) cfg.defaultModel = config.model;
|
if (config.model) cfg.defaultModel = config.model;
|
||||||
registry.register(new GeminiAcpProvider(cfg));
|
const provider = new GeminiAcpProvider(cfg);
|
||||||
|
provider.warmup();
|
||||||
|
registry.register(provider);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -83,6 +83,18 @@ export class GeminiAcpProvider implements LlmProvider {
|
|||||||
this.pool.clear();
|
this.pool.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Eagerly spawn the default model's ACP subprocess so it's ready
|
||||||
|
* for the first request (avoids 30s cold-start on health checks).
|
||||||
|
*/
|
||||||
|
warmup(): void {
|
||||||
|
const entry = this.getOrCreateEntry(this.defaultModel);
|
||||||
|
// Fire-and-forget: start the subprocess initialization in the background
|
||||||
|
entry.client.ensureReady().catch(() => {
|
||||||
|
// Ignore errors — next request will retry
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
/** Number of active pool entries (for testing). */
|
/** Number of active pool entries (for testing). */
|
||||||
get poolSize(): number {
|
get poolSize(): number {
|
||||||
return this.pool.size;
|
return this.pool.size;
|
||||||
|
|||||||
Reference in New Issue
Block a user