diff --git a/completions/mcpctl.bash b/completions/mcpctl.bash
index 8f9a5d6..fec5261 100644
--- a/completions/mcpctl.bash
+++ b/completions/mcpctl.bash
@@ -347,7 +347,7 @@ _mcpctl() {
     provider)
       local provider_sub=$(_mcpctl_get_subcmd $subcmd_pos)
       if [[ -z "$provider_sub" ]]; then
-        COMPREPLY=($(compgen -W "status up down help" -- "$cur"))
+        COMPREPLY=($(compgen -W "status up down disable enable help" -- "$cur"))
       else
         case "$provider_sub" in
           status)
@@ -359,6 +359,12 @@ _mcpctl() {
           down)
             COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
             ;;
+          disable)
+            COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
+            ;;
+          enable)
+            COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
+            ;;
           *)
             COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
             ;;
diff --git a/completions/mcpctl.fish b/completions/mcpctl.fish
index fa28aeb..def55a4 100644
--- a/completions/mcpctl.fish
+++ b/completions/mcpctl.fish
@@ -451,10 +451,12 @@ complete -c mcpctl -n "__mcpctl_subcmd_active cache clear" -l older-than -d 'Cle
 complete -c mcpctl -n "__mcpctl_subcmd_active cache clear" -s y -l yes -d 'Skip confirmation'
 
 # provider subcommands
-set -l provider_cmds status up down
+set -l provider_cmds status up down disable enable
 complete -c mcpctl -n "__fish_seen_subcommand_from provider; and not __fish_seen_subcommand_from $provider_cmds" -a status -d 'Show lifecycle state of a provider'
 complete -c mcpctl -n "__fish_seen_subcommand_from provider; and not __fish_seen_subcommand_from $provider_cmds" -a up -d 'Start a managed provider (warm up so first chat is fast)'
 complete -c mcpctl -n "__fish_seen_subcommand_from provider; and not __fish_seen_subcommand_from $provider_cmds" -a down -d 'Stop a managed provider now (releases GPU memory)'
+complete -c mcpctl -n "__fish_seen_subcommand_from provider; and not __fish_seen_subcommand_from $provider_cmds" -a disable -d 'Persistently disable a managed provider (survives mcplocal restart)'
+complete -c mcpctl -n "__fish_seen_subcommand_from provider; and not __fish_seen_subcommand_from $provider_cmds" -a enable -d 'Re-enable a previously-disabled provider'
 
 # test subcommands
 set -l test_cmds mcp
diff --git a/src/cli/src/commands/provider.ts b/src/cli/src/commands/provider.ts
index 93d6b8c..f341959 100644
--- a/src/cli/src/commands/provider.ts
+++ b/src/cli/src/commands/provider.ts
@@ -1,14 +1,22 @@
 /**
- * `mcpctl provider <name> <up|down|status>`
+ * `mcpctl provider <action> <name>` — managed LLM lifecycle control.
  *
- * Lifecycle control for managed local LLM providers (vllm-managed). Talks
- * to mcplocal's `/llm/providers/:name/{status,start,stop}` HTTP endpoints
- * — non-managed providers (anthropic, openai, gemini-cli) get a clear
- * error rather than a no-op.
+ *   up      — warmup() now (next chat is fast)
+ *   down    — dispose() now; auto-restarts on next chat
+ *   status  — state, pid, uptime, disabled flag
+ *   disable — dispose() AND set `disabled: true` in ~/.mcpctl/config.json;
+ *             survives mcplocal restarts; complete()/ensureRunning() short-
+ *             circuit so the GPU process doesn't spawn until you `enable`
+ *   enable  — clear the disabled flag (live + on disk)
  *
- * Practical use: `mcpctl provider vllm-local down` to release GPU memory
- * without restarting mcplocal (which would drop the SSE connection to mcpd
- * and re-publish all virtual Llms).
+ * Talks to mcplocal's `/llm/providers/:name/{status,start,stop,enable,disable}`
+ * HTTP endpoints. Non-managed providers (anthropic, openai, gemini-cli)
+ * get a clear 400 rather than a no-op for the lifecycle actions.
+ *
+ * Practical use:
+ *   `mcpctl provider down vllm-local`    — release GPU memory now
+ *   `mcpctl provider disable vllm-local` — release GPU AND prevent auto-start
+ *                                          (e.g. when vLLM keeps crashing)
  */
 import { Command } from 'commander';
 import http from 'node:http';
@@ -25,6 +33,10 @@ interface ProviderStatusResponse {
   lastError?: string | null;
   pid?: number | null;
   uptime?: number | null;
+  /** True when the persistent disable flag is set (config + live). */
+  disabled?: boolean;
+  /** Set by /enable + /disable — true if the config file was rewritten. */
+  persisted?: boolean;
 }
 
 interface ErrorResponse {
@@ -54,7 +66,8 @@ function formatStatus(s: ProviderStatusResponse): string {
   if (!s.managed) {
     return `${s.name}: unmanaged (no lifecycle — API-key or remote provider)`;
   }
-  const lines = [`${s.name}: ${s.state ?? 'unknown'}`];
+  const stateLabel = s.disabled === true ? `${s.state ?? 'stopped'} (disabled)` : (s.state ?? 'unknown');
+  const lines = [`${s.name}: ${stateLabel}`];
   if (s.pid !== null && s.pid !== undefined) lines.push(`  pid: ${String(s.pid)}`);
   if (s.uptime !== null && s.uptime !== undefined) {
     const sec = s.uptime;
@@ -126,5 +139,47 @@ export function createProviderCommand(deps: ProviderCommandDeps): Command {
       deps.log(`${status.name}: ${status.state ?? 'stopped'} (GPU released — next chat will trigger restart)`);
     });
 
+  cmd
+    .command('disable')
+    .description('Persistently disable a managed provider (survives mcplocal restart)')
+    .argument('<name>', 'Provider name (e.g. vllm-local)')
+    .action(async (name: string) => {
+      const res = await fetchJson<ProviderStatusResponse | ErrorResponse>(
+        `${mcplocalUrl}/llm/providers/${encodeURIComponent(name)}/disable`,
+        'POST',
+      );
+      if (res.status !== 200) {
+        deps.log(`error: ${(res.body as ErrorResponse).error}`);
+        process.exitCode = 1;
+        return;
+      }
+      const status = res.body as ProviderStatusResponse;
+      const persistedNote = status.persisted === true
+        ? ' (saved to ~/.mcpctl/config.json — survives restart)'
+        : ' (live only — provider is not in config file, restart will undo)';
+      deps.log(`${status.name}: disabled${persistedNote}`);
+    });
+
+  cmd
+    .command('enable')
+    .description('Re-enable a previously-disabled provider')
+    .argument('<name>', 'Provider name (e.g. vllm-local)')
+    .action(async (name: string) => {
+      const res = await fetchJson<ProviderStatusResponse | ErrorResponse>(
+        `${mcplocalUrl}/llm/providers/${encodeURIComponent(name)}/enable`,
+        'POST',
+      );
+      if (res.status !== 200) {
+        deps.log(`error: ${(res.body as ErrorResponse).error}`);
+        process.exitCode = 1;
+        return;
+      }
+      const status = res.body as ProviderStatusResponse;
+      const persistedNote = status.persisted === true
+        ? ' (saved to ~/.mcpctl/config.json)'
+        : '';
+      deps.log(`${status.name}: enabled${persistedNote} — next chat will start it`);
+    });
+
   return cmd;
 }
diff --git a/src/cli/src/config/schema.ts b/src/cli/src/config/schema.ts
index f3dd74b..e020ae6 100644
--- a/src/cli/src/config/schema.ts
+++ b/src/cli/src/config/schema.ts
@@ -46,6 +46,9 @@ export const LlmProviderEntrySchema = z.object({
   idleTimeoutMinutes: z.number().int().positive().optional(),
   /** vllm-managed: extra args for `vllm serve` */
   extraArgs: z.array(z.string()).optional(),
+  /** When true, mcplocal keeps the provider registered but suppresses
+   *  auto-start. Toggle via `mcpctl provider {enable,disable} <name>`. */
+  disabled: z.boolean().optional(),
 }).strict();
 
 export type LlmProviderEntry = z.infer<typeof LlmProviderEntrySchema>;
diff --git a/src/mcplocal/src/http/config.ts b/src/mcplocal/src/http/config.ts
index b99ebb9..1dbe8ec 100644
--- a/src/mcplocal/src/http/config.ts
+++ b/src/mcplocal/src/http/config.ts
@@ -1,4 +1,4 @@
-import { existsSync, readFileSync } from 'node:fs';
+import { existsSync, readFileSync, writeFileSync } from 'node:fs';
 import { join } from 'node:path';
 import { homedir } from 'node:os';
 
@@ -64,6 +64,16 @@ export interface LlmProviderFileEntry {
   idleTimeoutMinutes?: number;
   /** vllm-managed: extra args for `vllm serve` */
   extraArgs?: string[];
+  /**
+   * Persistent disable. When true:
+   *   - mcplocal still instantiates the provider so `mcpctl provider enable`
+   *     can flip it back live, but
+   *   - `complete()` and `ensureRunning()` short-circuit with an error so
+   *     the GPU process never spawns.
+   * Toggled via `mcpctl provider {enable,disable} <name>`. Survives mcplocal
+   * restarts because it lives here in the user's config file.
+   */
+  disabled?: boolean;
   /**
    * If set, this local provider is allowed to substitute for the centralized
    * Llm of this name when the mcpd inference proxy is unreachable.
@@ -180,6 +190,37 @@ function loadFullConfig(): McpctlConfig {
   }
 }
 
+/**
+ * Persist a `disabled: bool` flag onto the named provider in
+ * ~/.mcpctl/config.json. Used by the `mcpctl provider {enable,disable}`
+ * route to make the change survive a mcplocal restart.
+ *
+ * Returns true when the file was actually rewritten (provider found and
+ * its flag changed), false when the provider isn't in the config or the
+ * flag already matches.
+ */
+export function setProviderDisabledInConfig(name: string, disabled: boolean): boolean {
+  const configPath = join(homedir(), '.mcpctl', 'config.json');
+  if (!existsSync(configPath)) return false;
+  const raw = readFileSync(configPath, 'utf-8');
+  // Round-trip via JSON.parse → mutate → JSON.stringify. We don't try to
+  // preserve comments or formatting because the config file has always
+  // been plain JSON (no JSONC) and the alternative — a structural editor
+  // — is a lot of code for a feature toggle.
+  const parsed = JSON.parse(raw) as McpctlConfig;
+  if (parsed.llm === undefined || !isMultiConfig(parsed.llm)) return false;
+  const entry = parsed.llm.providers.find((p) => p.name === name);
+  if (entry === undefined) return false;
+  const current = entry.disabled === true;
+  if (current === disabled) return false;
+  if (disabled) entry.disabled = true; else delete entry.disabled;
+  writeFileSync(configPath, JSON.stringify(parsed, null, 2) + '\n', 'utf-8');
+  // Invalidate the cached config so subsequent loadLlmProviders() calls
+  // see the change. (Boot-time only, but cheap and correct.)
+  cachedConfig = null;
+  return true;
+}
+
 /** Type guard: is config the multi-provider format? */
 function isMultiConfig(llm: LlmFileConfig | LlmMultiFileConfig): llm is LlmMultiFileConfig {
   return 'providers' in llm && Array.isArray((llm as LlmMultiFileConfig).providers);
diff --git a/src/mcplocal/src/http/server.ts b/src/mcplocal/src/http/server.ts
index ad5c51c..56a0210 100644
--- a/src/mcplocal/src/http/server.ts
+++ b/src/mcplocal/src/http/server.ts
@@ -3,6 +3,7 @@ import type { FastifyInstance } from 'fastify';
 import cors from '@fastify/cors';
 import { APP_VERSION } from '@mcpctl/shared';
 import type { HttpConfig } from './config.js';
+import { setProviderDisabledInConfig } from './config.js';
 import { McpdClient } from './mcpd-client.js';
 import { registerProxyRoutes } from './routes/proxy.js';
 import { registerMcpEndpoint } from './mcp-endpoint.js';
@@ -271,6 +272,10 @@ export async function createHttpServer(
       reply.code(400).send({ error: `Provider '${request.params.name}' is not managed (nothing to start)` });
       return;
     }
+    if ((provider as ManagedVllmProvider).isDisabled?.()) {
+      reply.code(409).send({ error: `Provider '${request.params.name}' is disabled — run \`mcpctl provider enable\` first` });
+      return;
+    }
     (provider as ManagedVllmProvider).warmup();
     // warmup() is fire-and-forget — return current state immediately so
     // the CLI can show 'starting' and the user knows it's been kicked.
@@ -278,6 +283,50 @@ export async function createHttpServer(
     reply.code(202).send({ name: provider.name, managed: true, ...status });
   });
 
+  // Persistent disable: dispose the running process AND set the
+  // `disabled: true` flag on the provider's entry in ~/.mcpctl/config.json
+  // so the next mcplocal restart doesn't auto-start it. Live: complete()
+  // and ensureRunning() short-circuit immediately.
+  app.post<{ Params: { name: string } }>('/llm/providers/:name/disable', async (request, reply) => {
+    const registry = deps.providerRegistry;
+    const provider = registry?.get(request.params.name) ?? null;
+    if (provider === null) {
+      reply.code(404).send({ error: `Provider '${request.params.name}' not found` });
+      return;
+    }
+    if (!('setDisabled' in provider) || typeof (provider as ManagedVllmProvider).setDisabled !== 'function') {
+      reply.code(400).send({ error: `Provider '${request.params.name}' is not managed (no enable/disable surface)` });
+      return;
+    }
+    (provider as ManagedVllmProvider).setDisabled(true);
+    // Best-effort persist. If the entry isn't in the config (e.g. the
+    // provider was registered programmatically) the live disable still
+    // sticks for this mcplocal lifetime; we just can't persist it.
+    const persisted = setProviderDisabledInConfig(request.params.name, true);
+    const status = (provider as ManagedVllmProvider).getStatus();
+    reply.code(200).send({ name: provider.name, managed: true, persisted, ...status });
+  });
+
+  // Inverse of /disable: clear the flag both live and on disk. The
+  // provider stays in the registry (we never removed it), so the next
+  // chat triggers ensureRunning() normally.
+  app.post<{ Params: { name: string } }>('/llm/providers/:name/enable', async (request, reply) => {
+    const registry = deps.providerRegistry;
+    const provider = registry?.get(request.params.name) ?? null;
+    if (provider === null) {
+      reply.code(404).send({ error: `Provider '${request.params.name}' not found` });
+      return;
+    }
+    if (!('setDisabled' in provider) || typeof (provider as ManagedVllmProvider).setDisabled !== 'function') {
+      reply.code(400).send({ error: `Provider '${request.params.name}' is not managed (no enable/disable surface)` });
+      return;
+    }
+    (provider as ManagedVllmProvider).setDisabled(false);
+    const persisted = setProviderDisabledInConfig(request.params.name, false);
+    const status = (provider as ManagedVllmProvider).getStatus();
+    reply.code(200).send({ name: provider.name, managed: true, persisted, ...status });
+  });
+
   // ProxyModel discovery endpoints
   registerProxymodelEndpoint(app);
 
diff --git a/src/mcplocal/src/llm-config.ts b/src/mcplocal/src/llm-config.ts
index 5777249..6e8429e 100644
--- a/src/mcplocal/src/llm-config.ts
+++ b/src/mcplocal/src/llm-config.ts
@@ -141,7 +141,12 @@ async function createSingleProvider(
       if (entry.maxModelLen !== undefined) cfg.maxModelLen = entry.maxModelLen;
       if (entry.idleTimeoutMinutes !== undefined) cfg.idleTimeoutMinutes = entry.idleTimeoutMinutes;
       if (entry.extraArgs !== undefined) cfg.extraArgs = entry.extraArgs;
-      return new ManagedVllmProvider(cfg);
+      const provider = new ManagedVllmProvider(cfg);
+      // v7+ persistent disable: honor the flag at boot so a known-bad vLLM
+      // doesn't auto-start on first chat. The provider stays registered so
+      // `mcpctl provider enable` can flip it back live without restart.
+      if (entry.disabled === true) provider.setDisabled(true);
+      return provider;
     }
 
     default:
diff --git a/src/mcplocal/src/providers/vllm-managed.ts b/src/mcplocal/src/providers/vllm-managed.ts
index c12c906..2da26fc 100644
--- a/src/mcplocal/src/providers/vllm-managed.ts
+++ b/src/mcplocal/src/providers/vllm-managed.ts
@@ -33,6 +33,8 @@ export interface ManagedVllmStatus {
   lastError: string | null;
   pid: number | null;
   uptime: number | null;
+  /** Persistent disable. When true, complete()/ensureRunning() short-circuit. */
+  disabled: boolean;
 }
 
 const POLL_INTERVAL_MS = 2000;
@@ -54,6 +56,7 @@ export class ManagedVllmProvider implements LlmProvider {
   private inner: OpenAiProvider | null = null;
   private state: ManagedVllmState = 'stopped';
   private lastError: string | null = null;
+  private disabled = false;
   private lastUsed = 0;
   private startedAt = 0;
   private errorAt = 0;
@@ -86,6 +89,9 @@ export class ManagedVllmProvider implements LlmProvider {
   }
 
   async complete(options: CompletionOptions): Promise<CompletionResult> {
+    if (this.disabled) {
+      throw new Error('vllm-managed provider is disabled — run `mcpctl provider enable <name>` to re-enable');
+    }
     await this.ensureRunning();
     this.lastUsed = Date.now();
     this.resetIdleTimer();
@@ -101,9 +107,11 @@ export class ManagedVllmProvider implements LlmProvider {
 
   /**
    * A managed provider is "available" unless in a permanent error state.
-   * When stopped, it can be auto-started on demand.
+   * When stopped, it can be auto-started on demand. Disabled providers
+   * report unavailable so health probes don't try to wake them.
    */
   async isAvailable(): Promise<boolean> {
+    if (this.disabled) return false;
     return this.state !== 'error';
   }
 
@@ -115,9 +123,34 @@ export class ManagedVllmProvider implements LlmProvider {
       uptime: this.state === 'running' && this.startedAt > 0
         ? Math.floor((Date.now() - this.startedAt) / 1000)
         : null,
+      disabled: this.disabled,
     };
   }
 
+  /**
+   * Toggle persistent disable. When set true, dispose any running process
+   * immediately and gate complete()/ensureRunning() so the next chat fails
+   * fast rather than spawning a new vLLM. The caller (HTTP endpoint) is
+   * responsible for persisting the flag to ~/.mcpctl/config.json.
+   */
+  setDisabled(value: boolean): void {
+    if (this.disabled === value) return;
+    this.disabled = value;
+    if (value) {
+      this.killProcess();
+      this.clearIdleTimer();
+      // Reset the error cooldown so re-enable starts clean.
+      if (this.state === 'error') {
+        this.state = 'stopped';
+        this.lastError = null;
+      }
+    }
+  }
+
+  isDisabled(): boolean {
+    return this.disabled;
+  }
+
   /** Eagerly start vLLM so it's ready when the first complete() call arrives. */
   warmup(): void {
     if (this.state === 'stopped') {
@@ -135,6 +168,9 @@ export class ManagedVllmProvider implements LlmProvider {
   // --- Internal ---
 
   async ensureRunning(): Promise<void> {
+    if (this.disabled) {
+      throw new Error('vllm-managed provider is disabled — run `mcpctl provider enable <name>` to re-enable');
+    }
     if (this.state === 'running' && this.process && !this.process.killed) {
       return;
     }