feat(cli+mcplocal): persistent provider disable/enable

Adds two new subcommands on top of v7's provider lifecycle CLI: mcpctl provider disable vllm-local # release GPU + survive restart mcpctl provider enable vllm-local # clear the flag, ready to chat Use case: vLLM keeps crashing on engine init. `down` works for "now" but the next chat triggers a restart; `disable` writes `disabled: true` into the provider's entry in ~/.mcpctl/config.json and short-circuits complete()/ensureRunning() until you re-enable. Implementation: - LlmProviderEntry / LlmProviderFileEntry: new optional `disabled` field - ManagedVllmProvider: setDisabled(bool), isDisabled(), gate in complete()/ensureRunning(), expose `disabled` in getStatus() - mcplocal HTTP: POST /llm/providers/:name/{disable,enable} write the config file and apply the change live; /start returns 409 when the target is disabled instead of silently failing - Boot: createSingleProvider honors `entry.disabled` so a known-bad vLLM doesn't auto-start on the first chat after mcplocal restart - CLI: `disable` / `enable` subcommands on `mcpctl provider`; status output now shows `(disabled)` next to the state `enable` is live — provider stays in the registry while disabled, so flipping the flag back is enough; no mcplocal restart needed. Tests: cli 437/437, mcplocal 731/731. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-03 15:57:01 +01:00
parent fe27947f80
commit d04adb5623
8 changed files with 211 additions and 14 deletions
--- a/src/cli/src/commands/provider.ts
+++ b/src/cli/src/commands/provider.ts
@@ -1,14 +1,22 @@
 /**
- * `mcpctl provider <name> <up|down|status>`
+ * `mcpctl provider <action> <name>` — managed LLM lifecycle control.
 *
- * Lifecycle control for managed local LLM providers (vllm-managed). Talks
- * to mcplocal's `/llm/providers/:name/{status,start,stop}` HTTP endpoints
- * — non-managed providers (anthropic, openai, gemini-cli) get a clear
- * error rather than a no-op.
+ *   up      — warmup() now (next chat is fast)
+ *   down    — dispose() now; auto-restarts on next chat
+ *   status  — state, pid, uptime, disabled flag
+ *   disable — dispose() AND set `disabled: true` in ~/.mcpctl/config.json;
+ *             survives mcplocal restarts; complete()/ensureRunning() short-
+ *             circuit so the GPU process doesn't spawn until you `enable`
+ *   enable  — clear the disabled flag (live + on disk)
 *
- * Practical use: `mcpctl provider vllm-local down` to release GPU memory
- * without restarting mcplocal (which would drop the SSE connection to mcpd
- * and re-publish all virtual Llms).
+ * Talks to mcplocal's `/llm/providers/:name/{status,start,stop,enable,disable}`
+ * HTTP endpoints. Non-managed providers (anthropic, openai, gemini-cli)
+ * get a clear 400 rather than a no-op for the lifecycle actions.
+ *
+ * Practical use:
+ *   `mcpctl provider down vllm-local`    — release GPU memory now
+ *   `mcpctl provider disable vllm-local` — release GPU AND prevent auto-start
+ *                                          (e.g. when vLLM keeps crashing)
 */
 import { Command } from 'commander';
 import http from 'node:http';
@@ -25,6 +33,10 @@ interface ProviderStatusResponse {
  lastError?: string | null;
  pid?: number | null;
  uptime?: number | null;
+  /** True when the persistent disable flag is set (config + live). */
+  disabled?: boolean;
+  /** Set by /enable + /disable — true if the config file was rewritten. */
+  persisted?: boolean;
 }

 interface ErrorResponse {
@@ -54,7 +66,8 @@ function formatStatus(s: ProviderStatusResponse): string {
  if (!s.managed) {
    return `${s.name}: unmanaged (no lifecycle — API-key or remote provider)`;
  }
-  const lines = [`${s.name}: ${s.state ?? 'unknown'}`];
+  const stateLabel = s.disabled === true ? `${s.state ?? 'stopped'} (disabled)` : (s.state ?? 'unknown');
+  const lines = [`${s.name}: ${stateLabel}`];
  if (s.pid !== null && s.pid !== undefined) lines.push(`  pid: ${String(s.pid)}`);
  if (s.uptime !== null && s.uptime !== undefined) {
    const sec = s.uptime;
@@ -126,5 +139,47 @@ export function createProviderCommand(deps: ProviderCommandDeps): Command {
      deps.log(`${status.name}: ${status.state ?? 'stopped'} (GPU released — next chat will trigger restart)`);
    });

+  cmd
+    .command('disable')
+    .description('Persistently disable a managed provider (survives mcplocal restart)')
+    .argument('<name>', 'Provider name (e.g. vllm-local)')
+    .action(async (name: string) => {
+      const res = await fetchJson<ProviderStatusResponse | ErrorResponse>(
+        `${mcplocalUrl}/llm/providers/${encodeURIComponent(name)}/disable`,
+        'POST',
+      );
+      if (res.status !== 200) {
+        deps.log(`error: ${(res.body as ErrorResponse).error}`);
+        process.exitCode = 1;
+        return;
+      }
+      const status = res.body as ProviderStatusResponse;
+      const persistedNote = status.persisted === true
+        ? ' (saved to ~/.mcpctl/config.json — survives restart)'
+        : ' (live only — provider is not in config file, restart will undo)';
+      deps.log(`${status.name}: disabled${persistedNote}`);
+    });
+
+  cmd
+    .command('enable')
+    .description('Re-enable a previously-disabled provider')
+    .argument('<name>', 'Provider name (e.g. vllm-local)')
+    .action(async (name: string) => {
+      const res = await fetchJson<ProviderStatusResponse | ErrorResponse>(
+        `${mcplocalUrl}/llm/providers/${encodeURIComponent(name)}/enable`,
+        'POST',
+      );
+      if (res.status !== 200) {
+        deps.log(`error: ${(res.body as ErrorResponse).error}`);
+        process.exitCode = 1;
+        return;
+      }
+      const status = res.body as ProviderStatusResponse;
+      const persistedNote = status.persisted === true
+        ? ' (saved to ~/.mcpctl/config.json)'
+        : '';
+      deps.log(`${status.name}: enabled${persistedNote} — next chat will start it`);
+    });
+
  return cmd;
 }
--- a/src/cli/src/config/schema.ts
+++ b/src/cli/src/config/schema.ts
@@ -46,6 +46,9 @@ export const LlmProviderEntrySchema = z.object({
  idleTimeoutMinutes: z.number().int().positive().optional(),
  /** vllm-managed: extra args for `vllm serve` */
  extraArgs: z.array(z.string()).optional(),
+  /** When true, mcplocal keeps the provider registered but suppresses
+   *  auto-start. Toggle via `mcpctl provider {enable,disable} <name>`. */
+  disabled: z.boolean().optional(),
 }).strict();

 export type LlmProviderEntry = z.infer<typeof LlmProviderEntrySchema>;
--- a/src/mcplocal/src/http/config.ts
+++ b/src/mcplocal/src/http/config.ts
@@ -1,4 +1,4 @@
-import { existsSync, readFileSync } from 'node:fs';
+import { existsSync, readFileSync, writeFileSync } from 'node:fs';
 import { join } from 'node:path';
 import { homedir } from 'node:os';

@@ -64,6 +64,16 @@ export interface LlmProviderFileEntry {
  idleTimeoutMinutes?: number;
  /** vllm-managed: extra args for `vllm serve` */
  extraArgs?: string[];
+  /**
+   * Persistent disable. When true:
+   *   - mcplocal still instantiates the provider so `mcpctl provider enable`
+   *     can flip it back live, but
+   *   - `complete()` and `ensureRunning()` short-circuit with an error so
+   *     the GPU process never spawns.
+   * Toggled via `mcpctl provider {enable,disable} <name>`. Survives mcplocal
+   * restarts because it lives here in the user's config file.
+   */
+  disabled?: boolean;
  /**
   * If set, this local provider is allowed to substitute for the centralized
   * Llm of this name when the mcpd inference proxy is unreachable.
@@ -180,6 +190,37 @@ function loadFullConfig(): McpctlConfig {
  }
 }

+/**
+ * Persist a `disabled: bool` flag onto the named provider in
+ * ~/.mcpctl/config.json. Used by the `mcpctl provider {enable,disable}`
+ * route to make the change survive a mcplocal restart.
+ *
+ * Returns true when the file was actually rewritten (provider found and
+ * its flag changed), false when the provider isn't in the config or the
+ * flag already matches.
+ */
+export function setProviderDisabledInConfig(name: string, disabled: boolean): boolean {
+  const configPath = join(homedir(), '.mcpctl', 'config.json');
+  if (!existsSync(configPath)) return false;
+  const raw = readFileSync(configPath, 'utf-8');
+  // Round-trip via JSON.parse → mutate → JSON.stringify. We don't try to
+  // preserve comments or formatting because the config file has always
+  // been plain JSON (no JSONC) and the alternative — a structural editor
+  // — is a lot of code for a feature toggle.
+  const parsed = JSON.parse(raw) as McpctlConfig;
+  if (parsed.llm === undefined || !isMultiConfig(parsed.llm)) return false;
+  const entry = parsed.llm.providers.find((p) => p.name === name);
+  if (entry === undefined) return false;
+  const current = entry.disabled === true;
+  if (current === disabled) return false;
+  if (disabled) entry.disabled = true; else delete entry.disabled;
+  writeFileSync(configPath, JSON.stringify(parsed, null, 2) + '\n', 'utf-8');
+  // Invalidate the cached config so subsequent loadLlmProviders() calls
+  // see the change. (Boot-time only, but cheap and correct.)
+  cachedConfig = null;
+  return true;
+}
+
 /** Type guard: is config the multi-provider format? */
 function isMultiConfig(llm: LlmFileConfig | LlmMultiFileConfig): llm is LlmMultiFileConfig {
  return 'providers' in llm && Array.isArray((llm as LlmMultiFileConfig).providers);
--- a/src/mcplocal/src/http/server.ts
+++ b/src/mcplocal/src/http/server.ts
@@ -3,6 +3,7 @@ import type { FastifyInstance } from 'fastify';
 import cors from '@fastify/cors';
 import { APP_VERSION } from '@mcpctl/shared';
 import type { HttpConfig } from './config.js';
+import { setProviderDisabledInConfig } from './config.js';
 import { McpdClient } from './mcpd-client.js';
 import { registerProxyRoutes } from './routes/proxy.js';
 import { registerMcpEndpoint } from './mcp-endpoint.js';
@@ -271,6 +272,10 @@ export async function createHttpServer(
      reply.code(400).send({ error: `Provider '${request.params.name}' is not managed (nothing to start)` });
      return;
    }
+    if ((provider as ManagedVllmProvider).isDisabled?.()) {
+      reply.code(409).send({ error: `Provider '${request.params.name}' is disabled — run \`mcpctl provider enable\` first` });
+      return;
+    }
    (provider as ManagedVllmProvider).warmup();
    // warmup() is fire-and-forget — return current state immediately so
    // the CLI can show 'starting' and the user knows it's been kicked.
@@ -278,6 +283,50 @@ export async function createHttpServer(
    reply.code(202).send({ name: provider.name, managed: true, ...status });
  });

+  // Persistent disable: dispose the running process AND set the
+  // `disabled: true` flag on the provider's entry in ~/.mcpctl/config.json
+  // so the next mcplocal restart doesn't auto-start it. Live: complete()
+  // and ensureRunning() short-circuit immediately.
+  app.post<{ Params: { name: string } }>('/llm/providers/:name/disable', async (request, reply) => {
+    const registry = deps.providerRegistry;
+    const provider = registry?.get(request.params.name) ?? null;
+    if (provider === null) {
+      reply.code(404).send({ error: `Provider '${request.params.name}' not found` });
+      return;
+    }
+    if (!('setDisabled' in provider) || typeof (provider as ManagedVllmProvider).setDisabled !== 'function') {
+      reply.code(400).send({ error: `Provider '${request.params.name}' is not managed (no enable/disable surface)` });
+      return;
+    }
+    (provider as ManagedVllmProvider).setDisabled(true);
+    // Best-effort persist. If the entry isn't in the config (e.g. the
+    // provider was registered programmatically) the live disable still
+    // sticks for this mcplocal lifetime; we just can't persist it.
+    const persisted = setProviderDisabledInConfig(request.params.name, true);
+    const status = (provider as ManagedVllmProvider).getStatus();
+    reply.code(200).send({ name: provider.name, managed: true, persisted, ...status });
+  });
+
+  // Inverse of /disable: clear the flag both live and on disk. The
+  // provider stays in the registry (we never removed it), so the next
+  // chat triggers ensureRunning() normally.
+  app.post<{ Params: { name: string } }>('/llm/providers/:name/enable', async (request, reply) => {
+    const registry = deps.providerRegistry;
+    const provider = registry?.get(request.params.name) ?? null;
+    if (provider === null) {
+      reply.code(404).send({ error: `Provider '${request.params.name}' not found` });
+      return;
+    }
+    if (!('setDisabled' in provider) || typeof (provider as ManagedVllmProvider).setDisabled !== 'function') {
+      reply.code(400).send({ error: `Provider '${request.params.name}' is not managed (no enable/disable surface)` });
+      return;
+    }
+    (provider as ManagedVllmProvider).setDisabled(false);
+    const persisted = setProviderDisabledInConfig(request.params.name, false);
+    const status = (provider as ManagedVllmProvider).getStatus();
+    reply.code(200).send({ name: provider.name, managed: true, persisted, ...status });
+  });
+
  // ProxyModel discovery endpoints
  registerProxymodelEndpoint(app);

--- a/src/mcplocal/src/llm-config.ts
+++ b/src/mcplocal/src/llm-config.ts
@@ -141,7 +141,12 @@ async function createSingleProvider(
      if (entry.maxModelLen !== undefined) cfg.maxModelLen = entry.maxModelLen;
      if (entry.idleTimeoutMinutes !== undefined) cfg.idleTimeoutMinutes = entry.idleTimeoutMinutes;
      if (entry.extraArgs !== undefined) cfg.extraArgs = entry.extraArgs;
-      return new ManagedVllmProvider(cfg);
+      const provider = new ManagedVllmProvider(cfg);
+      // v7+ persistent disable: honor the flag at boot so a known-bad vLLM
+      // doesn't auto-start on first chat. The provider stays registered so
+      // `mcpctl provider enable` can flip it back live without restart.
+      if (entry.disabled === true) provider.setDisabled(true);
+      return provider;
    }

    default:
--- a/src/mcplocal/src/providers/vllm-managed.ts
+++ b/src/mcplocal/src/providers/vllm-managed.ts
@@ -33,6 +33,8 @@ export interface ManagedVllmStatus {
  lastError: string | null;
  pid: number | null;
  uptime: number | null;
+  /** Persistent disable. When true, complete()/ensureRunning() short-circuit. */
+  disabled: boolean;
 }

 const POLL_INTERVAL_MS = 2000;
@@ -54,6 +56,7 @@ export class ManagedVllmProvider implements LlmProvider {
  private inner: OpenAiProvider | null = null;
  private state: ManagedVllmState = 'stopped';
  private lastError: string | null = null;
+  private disabled = false;
  private lastUsed = 0;
  private startedAt = 0;
  private errorAt = 0;
@@ -86,6 +89,9 @@ export class ManagedVllmProvider implements LlmProvider {
  }

  async complete(options: CompletionOptions): Promise<CompletionResult> {
+    if (this.disabled) {
+      throw new Error('vllm-managed provider is disabled — run `mcpctl provider enable <name>` to re-enable');
+    }
    await this.ensureRunning();
    this.lastUsed = Date.now();
    this.resetIdleTimer();
@@ -101,9 +107,11 @@ export class ManagedVllmProvider implements LlmProvider {

  /**
   * A managed provider is "available" unless in a permanent error state.
-   * When stopped, it can be auto-started on demand.
+   * When stopped, it can be auto-started on demand. Disabled providers
+   * report unavailable so health probes don't try to wake them.
   */
  async isAvailable(): Promise<boolean> {
+    if (this.disabled) return false;
    return this.state !== 'error';
  }

@@ -115,9 +123,34 @@ export class ManagedVllmProvider implements LlmProvider {
      uptime: this.state === 'running' && this.startedAt > 0
        ? Math.floor((Date.now() - this.startedAt) / 1000)
        : null,
+      disabled: this.disabled,
    };
  }

+  /**
+   * Toggle persistent disable. When set true, dispose any running process
+   * immediately and gate complete()/ensureRunning() so the next chat fails
+   * fast rather than spawning a new vLLM. The caller (HTTP endpoint) is
+   * responsible for persisting the flag to ~/.mcpctl/config.json.
+   */
+  setDisabled(value: boolean): void {
+    if (this.disabled === value) return;
+    this.disabled = value;
+    if (value) {
+      this.killProcess();
+      this.clearIdleTimer();
+      // Reset the error cooldown so re-enable starts clean.
+      if (this.state === 'error') {
+        this.state = 'stopped';
+        this.lastError = null;
+      }
+    }
+  }
+
+  isDisabled(): boolean {
+    return this.disabled;
+  }
+
  /** Eagerly start vLLM so it's ready when the first complete() call arrives. */
  warmup(): void {
    if (this.state === 'stopped') {
@@ -135,6 +168,9 @@ export class ManagedVllmProvider implements LlmProvider {
  // --- Internal ---

  async ensureRunning(): Promise<void> {
+    if (this.disabled) {
+      throw new Error('vllm-managed provider is disabled — run `mcpctl provider enable <name>` to re-enable');
+    }
    if (this.state === 'running' && this.process && !this.process.killed) {
      return;
    }