feat(cli+mcplocal): persistent provider disable/enable
Some checks failed
CI/CD / lint (pull_request) Successful in 55s
CI/CD / test (pull_request) Successful in 1m11s
CI/CD / typecheck (pull_request) Successful in 3m20s
CI/CD / smoke (pull_request) Failing after 52s
CI/CD / build (pull_request) Successful in 3m59s
CI/CD / publish (pull_request) Has been skipped

Adds two new subcommands on top of v7's provider lifecycle CLI:

  mcpctl provider disable vllm-local   # release GPU + survive restart
  mcpctl provider enable  vllm-local   # clear the flag, ready to chat

Use case: vLLM keeps crashing on engine init. `down` works for "now"
but the next chat triggers a restart; `disable` writes
`disabled: true` into the provider's entry in ~/.mcpctl/config.json
and short-circuits complete()/ensureRunning() until you re-enable.

Implementation:
- LlmProviderEntry / LlmProviderFileEntry: new optional `disabled` field
- ManagedVllmProvider: setDisabled(bool), isDisabled(), gate in
  complete()/ensureRunning(), expose `disabled` in getStatus()
- mcplocal HTTP: POST /llm/providers/:name/{disable,enable} write the
  config file and apply the change live; /start returns 409 when the
  target is disabled instead of silently failing
- Boot: createSingleProvider honors `entry.disabled` so a known-bad
  vLLM doesn't auto-start on the first chat after mcplocal restart
- CLI: `disable` / `enable` subcommands on `mcpctl provider`; status
  output now shows `(disabled)` next to the state

`enable` is live — provider stays in the registry while disabled, so
flipping the flag back is enough; no mcplocal restart needed.

Tests: cli 437/437, mcplocal 731/731.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Michal
2026-05-03 15:57:01 +01:00
parent fe27947f80
commit d04adb5623
8 changed files with 211 additions and 14 deletions

View File

@@ -1,14 +1,22 @@
/**
* `mcpctl provider <name> <up|down|status>`
* `mcpctl provider <action> <name>` — managed LLM lifecycle control.
*
* Lifecycle control for managed local LLM providers (vllm-managed). Talks
* to mcplocal's `/llm/providers/:name/{status,start,stop}` HTTP endpoints
* — non-managed providers (anthropic, openai, gemini-cli) get a clear
* error rather than a no-op.
* up — warmup() now (next chat is fast)
* down — dispose() now; auto-restarts on next chat
* status — state, pid, uptime, disabled flag
* disable — dispose() AND set `disabled: true` in ~/.mcpctl/config.json;
* survives mcplocal restarts; complete()/ensureRunning() short-
* circuit so the GPU process doesn't spawn until you `enable`
* enable — clear the disabled flag (live + on disk)
*
* Practical use: `mcpctl provider vllm-local down` to release GPU memory
* without restarting mcplocal (which would drop the SSE connection to mcpd
* and re-publish all virtual Llms).
* Talks to mcplocal's `/llm/providers/:name/{status,start,stop,enable,disable}`
* HTTP endpoints. Non-managed providers (anthropic, openai, gemini-cli)
* get a clear 400 rather than a no-op for the lifecycle actions.
*
* Practical use:
* `mcpctl provider down vllm-local` — release GPU memory now
* `mcpctl provider disable vllm-local` — release GPU AND prevent auto-start
* (e.g. when vLLM keeps crashing)
*/
import { Command } from 'commander';
import http from 'node:http';
@@ -25,6 +33,10 @@ interface ProviderStatusResponse {
lastError?: string | null;
pid?: number | null;
uptime?: number | null;
/** True when the persistent disable flag is set (config + live). */
disabled?: boolean;
/** Set by /enable + /disable — true if the config file was rewritten. */
persisted?: boolean;
}
interface ErrorResponse {
@@ -54,7 +66,8 @@ function formatStatus(s: ProviderStatusResponse): string {
if (!s.managed) {
return `${s.name}: unmanaged (no lifecycle — API-key or remote provider)`;
}
const lines = [`${s.name}: ${s.state ?? 'unknown'}`];
const stateLabel = s.disabled === true ? `${s.state ?? 'stopped'} (disabled)` : (s.state ?? 'unknown');
const lines = [`${s.name}: ${stateLabel}`];
if (s.pid !== null && s.pid !== undefined) lines.push(` pid: ${String(s.pid)}`);
if (s.uptime !== null && s.uptime !== undefined) {
const sec = s.uptime;
@@ -126,5 +139,47 @@ export function createProviderCommand(deps: ProviderCommandDeps): Command {
deps.log(`${status.name}: ${status.state ?? 'stopped'} (GPU released — next chat will trigger restart)`);
});
cmd
.command('disable')
.description('Persistently disable a managed provider (survives mcplocal restart)')
.argument('<name>', 'Provider name (e.g. vllm-local)')
.action(async (name: string) => {
const res = await fetchJson<ProviderStatusResponse | ErrorResponse>(
`${mcplocalUrl}/llm/providers/${encodeURIComponent(name)}/disable`,
'POST',
);
if (res.status !== 200) {
deps.log(`error: ${(res.body as ErrorResponse).error}`);
process.exitCode = 1;
return;
}
const status = res.body as ProviderStatusResponse;
const persistedNote = status.persisted === true
? ' (saved to ~/.mcpctl/config.json — survives restart)'
: ' (live only — provider is not in config file, restart will undo)';
deps.log(`${status.name}: disabled${persistedNote}`);
});
cmd
.command('enable')
.description('Re-enable a previously-disabled provider')
.argument('<name>', 'Provider name (e.g. vllm-local)')
.action(async (name: string) => {
const res = await fetchJson<ProviderStatusResponse | ErrorResponse>(
`${mcplocalUrl}/llm/providers/${encodeURIComponent(name)}/enable`,
'POST',
);
if (res.status !== 200) {
deps.log(`error: ${(res.body as ErrorResponse).error}`);
process.exitCode = 1;
return;
}
const status = res.body as ProviderStatusResponse;
const persistedNote = status.persisted === true
? ' (saved to ~/.mcpctl/config.json)'
: '';
deps.log(`${status.name}: enabled${persistedNote} — next chat will start it`);
});
return cmd;
}

View File

@@ -46,6 +46,9 @@ export const LlmProviderEntrySchema = z.object({
idleTimeoutMinutes: z.number().int().positive().optional(),
/** vllm-managed: extra args for `vllm serve` */
extraArgs: z.array(z.string()).optional(),
/** When true, mcplocal keeps the provider registered but suppresses
* auto-start. Toggle via `mcpctl provider {enable,disable} <name>`. */
disabled: z.boolean().optional(),
}).strict();
export type LlmProviderEntry = z.infer<typeof LlmProviderEntrySchema>;

View File

@@ -1,4 +1,4 @@
import { existsSync, readFileSync } from 'node:fs';
import { existsSync, readFileSync, writeFileSync } from 'node:fs';
import { join } from 'node:path';
import { homedir } from 'node:os';
@@ -64,6 +64,16 @@ export interface LlmProviderFileEntry {
idleTimeoutMinutes?: number;
/** vllm-managed: extra args for `vllm serve` */
extraArgs?: string[];
/**
* Persistent disable. When true:
* - mcplocal still instantiates the provider so `mcpctl provider enable`
* can flip it back live, but
* - `complete()` and `ensureRunning()` short-circuit with an error so
* the GPU process never spawns.
* Toggled via `mcpctl provider {enable,disable} <name>`. Survives mcplocal
* restarts because it lives here in the user's config file.
*/
disabled?: boolean;
/**
* If set, this local provider is allowed to substitute for the centralized
* Llm of this name when the mcpd inference proxy is unreachable.
@@ -180,6 +190,37 @@ function loadFullConfig(): McpctlConfig {
}
}
/**
* Persist a `disabled: bool` flag onto the named provider in
* ~/.mcpctl/config.json. Used by the `mcpctl provider {enable,disable}`
* route to make the change survive a mcplocal restart.
*
* Returns true when the file was actually rewritten (provider found and
* its flag changed), false when the provider isn't in the config or the
* flag already matches.
*/
export function setProviderDisabledInConfig(name: string, disabled: boolean): boolean {
const configPath = join(homedir(), '.mcpctl', 'config.json');
if (!existsSync(configPath)) return false;
const raw = readFileSync(configPath, 'utf-8');
// Round-trip via JSON.parse → mutate → JSON.stringify. We don't try to
// preserve comments or formatting because the config file has always
// been plain JSON (no JSONC) and the alternative — a structural editor
// — is a lot of code for a feature toggle.
const parsed = JSON.parse(raw) as McpctlConfig;
if (parsed.llm === undefined || !isMultiConfig(parsed.llm)) return false;
const entry = parsed.llm.providers.find((p) => p.name === name);
if (entry === undefined) return false;
const current = entry.disabled === true;
if (current === disabled) return false;
if (disabled) entry.disabled = true; else delete entry.disabled;
writeFileSync(configPath, JSON.stringify(parsed, null, 2) + '\n', 'utf-8');
// Invalidate the cached config so subsequent loadLlmProviders() calls
// see the change. (Boot-time only, but cheap and correct.)
cachedConfig = null;
return true;
}
/** Type guard: is config the multi-provider format? */
function isMultiConfig(llm: LlmFileConfig | LlmMultiFileConfig): llm is LlmMultiFileConfig {
return 'providers' in llm && Array.isArray((llm as LlmMultiFileConfig).providers);

View File

@@ -3,6 +3,7 @@ import type { FastifyInstance } from 'fastify';
import cors from '@fastify/cors';
import { APP_VERSION } from '@mcpctl/shared';
import type { HttpConfig } from './config.js';
import { setProviderDisabledInConfig } from './config.js';
import { McpdClient } from './mcpd-client.js';
import { registerProxyRoutes } from './routes/proxy.js';
import { registerMcpEndpoint } from './mcp-endpoint.js';
@@ -271,6 +272,10 @@ export async function createHttpServer(
reply.code(400).send({ error: `Provider '${request.params.name}' is not managed (nothing to start)` });
return;
}
if ((provider as ManagedVllmProvider).isDisabled?.()) {
reply.code(409).send({ error: `Provider '${request.params.name}' is disabled — run \`mcpctl provider enable\` first` });
return;
}
(provider as ManagedVllmProvider).warmup();
// warmup() is fire-and-forget — return current state immediately so
// the CLI can show 'starting' and the user knows it's been kicked.
@@ -278,6 +283,50 @@ export async function createHttpServer(
reply.code(202).send({ name: provider.name, managed: true, ...status });
});
// Persistent disable: dispose the running process AND set the
// `disabled: true` flag on the provider's entry in ~/.mcpctl/config.json
// so the next mcplocal restart doesn't auto-start it. Live: complete()
// and ensureRunning() short-circuit immediately.
app.post<{ Params: { name: string } }>('/llm/providers/:name/disable', async (request, reply) => {
const registry = deps.providerRegistry;
const provider = registry?.get(request.params.name) ?? null;
if (provider === null) {
reply.code(404).send({ error: `Provider '${request.params.name}' not found` });
return;
}
if (!('setDisabled' in provider) || typeof (provider as ManagedVllmProvider).setDisabled !== 'function') {
reply.code(400).send({ error: `Provider '${request.params.name}' is not managed (no enable/disable surface)` });
return;
}
(provider as ManagedVllmProvider).setDisabled(true);
// Best-effort persist. If the entry isn't in the config (e.g. the
// provider was registered programmatically) the live disable still
// sticks for this mcplocal lifetime; we just can't persist it.
const persisted = setProviderDisabledInConfig(request.params.name, true);
const status = (provider as ManagedVllmProvider).getStatus();
reply.code(200).send({ name: provider.name, managed: true, persisted, ...status });
});
// Inverse of /disable: clear the flag both live and on disk. The
// provider stays in the registry (we never removed it), so the next
// chat triggers ensureRunning() normally.
app.post<{ Params: { name: string } }>('/llm/providers/:name/enable', async (request, reply) => {
const registry = deps.providerRegistry;
const provider = registry?.get(request.params.name) ?? null;
if (provider === null) {
reply.code(404).send({ error: `Provider '${request.params.name}' not found` });
return;
}
if (!('setDisabled' in provider) || typeof (provider as ManagedVllmProvider).setDisabled !== 'function') {
reply.code(400).send({ error: `Provider '${request.params.name}' is not managed (no enable/disable surface)` });
return;
}
(provider as ManagedVllmProvider).setDisabled(false);
const persisted = setProviderDisabledInConfig(request.params.name, false);
const status = (provider as ManagedVllmProvider).getStatus();
reply.code(200).send({ name: provider.name, managed: true, persisted, ...status });
});
// ProxyModel discovery endpoints
registerProxymodelEndpoint(app);

View File

@@ -141,7 +141,12 @@ async function createSingleProvider(
if (entry.maxModelLen !== undefined) cfg.maxModelLen = entry.maxModelLen;
if (entry.idleTimeoutMinutes !== undefined) cfg.idleTimeoutMinutes = entry.idleTimeoutMinutes;
if (entry.extraArgs !== undefined) cfg.extraArgs = entry.extraArgs;
return new ManagedVllmProvider(cfg);
const provider = new ManagedVllmProvider(cfg);
// v7+ persistent disable: honor the flag at boot so a known-bad vLLM
// doesn't auto-start on first chat. The provider stays registered so
// `mcpctl provider enable` can flip it back live without restart.
if (entry.disabled === true) provider.setDisabled(true);
return provider;
}
default:

View File

@@ -33,6 +33,8 @@ export interface ManagedVllmStatus {
lastError: string | null;
pid: number | null;
uptime: number | null;
/** Persistent disable. When true, complete()/ensureRunning() short-circuit. */
disabled: boolean;
}
const POLL_INTERVAL_MS = 2000;
@@ -54,6 +56,7 @@ export class ManagedVllmProvider implements LlmProvider {
private inner: OpenAiProvider | null = null;
private state: ManagedVllmState = 'stopped';
private lastError: string | null = null;
private disabled = false;
private lastUsed = 0;
private startedAt = 0;
private errorAt = 0;
@@ -86,6 +89,9 @@ export class ManagedVllmProvider implements LlmProvider {
}
async complete(options: CompletionOptions): Promise<CompletionResult> {
if (this.disabled) {
throw new Error('vllm-managed provider is disabled — run `mcpctl provider enable <name>` to re-enable');
}
await this.ensureRunning();
this.lastUsed = Date.now();
this.resetIdleTimer();
@@ -101,9 +107,11 @@ export class ManagedVllmProvider implements LlmProvider {
/**
* A managed provider is "available" unless in a permanent error state.
* When stopped, it can be auto-started on demand.
* When stopped, it can be auto-started on demand. Disabled providers
* report unavailable so health probes don't try to wake them.
*/
async isAvailable(): Promise<boolean> {
if (this.disabled) return false;
return this.state !== 'error';
}
@@ -115,9 +123,34 @@ export class ManagedVllmProvider implements LlmProvider {
uptime: this.state === 'running' && this.startedAt > 0
? Math.floor((Date.now() - this.startedAt) / 1000)
: null,
disabled: this.disabled,
};
}
/**
* Toggle persistent disable. When set true, dispose any running process
* immediately and gate complete()/ensureRunning() so the next chat fails
* fast rather than spawning a new vLLM. The caller (HTTP endpoint) is
* responsible for persisting the flag to ~/.mcpctl/config.json.
*/
setDisabled(value: boolean): void {
if (this.disabled === value) return;
this.disabled = value;
if (value) {
this.killProcess();
this.clearIdleTimer();
// Reset the error cooldown so re-enable starts clean.
if (this.state === 'error') {
this.state = 'stopped';
this.lastError = null;
}
}
}
isDisabled(): boolean {
return this.disabled;
}
/** Eagerly start vLLM so it's ready when the first complete() call arrives. */
warmup(): void {
if (this.state === 'stopped') {
@@ -135,6 +168,9 @@ export class ManagedVllmProvider implements LlmProvider {
// --- Internal ---
async ensureRunning(): Promise<void> {
if (this.disabled) {
throw new Error('vllm-managed provider is disabled — run `mcpctl provider enable <name>` to re-enable');
}
if (this.state === 'running' && this.process && !this.process.killed) {
return;
}