diff --git a/completions/mcpctl.bash b/completions/mcpctl.bash
index eb27698..8f9a5d6 100644
--- a/completions/mcpctl.bash
+++ b/completions/mcpctl.bash
@@ -5,7 +5,7 @@ _mcpctl() {
   local cur prev words cword
   _init_completion || return
 
-  local commands="status login logout config get describe delete logs create edit apply chat chat-llm patch backup approve console cache test migrate rotate"
+  local commands="status login logout config get describe delete logs create edit apply chat chat-llm patch backup approve console cache provider test migrate rotate"
   local project_commands="get describe delete logs create edit attach-server detach-server"
   local global_opts="-v --version --daemon-url --direct -p --project -h --help"
   local resources="servers instances secrets secretbackends llms agents personalities templates projects users groups rbac prompts promptrequests serverattachments proxymodels inference-tasks all"
@@ -344,6 +344,27 @@ _mcpctl() {
         esac
       fi
       return ;;
+    provider)
+      local provider_sub=$(_mcpctl_get_subcmd $subcmd_pos)
+      if [[ -z "$provider_sub" ]]; then
+        COMPREPLY=($(compgen -W "status up down help" -- "$cur"))
+      else
+        case "$provider_sub" in
+          status)
+            COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
+            ;;
+          up)
+            COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
+            ;;
+          down)
+            COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
+            ;;
+          *)
+            COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
+            ;;
+        esac
+      fi
+      return ;;
     test)
       local test_sub=$(_mcpctl_get_subcmd $subcmd_pos)
       if [[ -z "$test_sub" ]]; then
diff --git a/completions/mcpctl.fish b/completions/mcpctl.fish
index e4162a2..fa28aeb 100644
--- a/completions/mcpctl.fish
+++ b/completions/mcpctl.fish
@@ -4,7 +4,7 @@
 # Erase any stale completions from previous versions
 complete -c mcpctl -e
 
-set -l commands status login logout config get describe delete logs create edit apply chat chat-llm patch backup approve console cache test migrate rotate
+set -l commands status login logout config get describe delete logs create edit apply chat chat-llm patch backup approve console cache provider test migrate rotate
 set -l project_commands get describe delete logs create edit attach-server detach-server
 
 # Disable file completions by default
@@ -238,6 +238,7 @@ complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_
 complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a approve -d 'Approve a pending prompt request (atomic: delete request, create prompt)'
 complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a console -d 'Interactive MCP console — unified timeline with tools, provenance, and lab replay'
 complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a cache -d 'Manage ProxyModel pipeline cache'
+complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a provider -d 'Control local LLM providers (start/stop/status)'
 complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a test -d 'Utilities for testing MCP endpoints and config'
 complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a migrate -d 'Move resources between backends (currently: secrets between SecretBackends)'
 complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a rotate -d 'Force rotation of a credential-rotating resource (currently: secretbackend)'
@@ -449,6 +450,12 @@ complete -c mcpctl -n "__fish_seen_subcommand_from cache; and not __fish_seen_su
 complete -c mcpctl -n "__mcpctl_subcmd_active cache clear" -l older-than -d 'Clear entries older than N days' -x
 complete -c mcpctl -n "__mcpctl_subcmd_active cache clear" -s y -l yes -d 'Skip confirmation'
 
+# provider subcommands
+set -l provider_cmds status up down
+complete -c mcpctl -n "__fish_seen_subcommand_from provider; and not __fish_seen_subcommand_from $provider_cmds" -a status -d 'Show lifecycle state of a provider'
+complete -c mcpctl -n "__fish_seen_subcommand_from provider; and not __fish_seen_subcommand_from $provider_cmds" -a up -d 'Start a managed provider (warm up so first chat is fast)'
+complete -c mcpctl -n "__fish_seen_subcommand_from provider; and not __fish_seen_subcommand_from $provider_cmds" -a down -d 'Stop a managed provider now (releases GPU memory)'
+
 # test subcommands
 set -l test_cmds mcp
 complete -c mcpctl -n "__fish_seen_subcommand_from test; and not __fish_seen_subcommand_from $test_cmds" -a mcp -d 'Verify a Streamable-HTTP MCP endpoint: health, initialize, tools/list, optionally call a tool.'
diff --git a/src/cli/src/commands/provider.ts b/src/cli/src/commands/provider.ts
new file mode 100644
index 0000000..93d6b8c
--- /dev/null
+++ b/src/cli/src/commands/provider.ts
@@ -0,0 +1,130 @@
+/**
+ * `mcpctl provider <name> <up|down|status>`
+ *
+ * Lifecycle control for managed local LLM providers (vllm-managed). Talks
+ * to mcplocal's `/llm/providers/:name/{status,start,stop}` HTTP endpoints
+ * — non-managed providers (anthropic, openai, gemini-cli) get a clear
+ * error rather than a no-op.
+ *
+ * Practical use: `mcpctl provider vllm-local down` to release GPU memory
+ * without restarting mcplocal (which would drop the SSE connection to mcpd
+ * and re-publish all virtual Llms).
+ */
+import { Command } from 'commander';
+import http from 'node:http';
+
+export interface ProviderCommandDeps {
+  log: (...args: string[]) => void;
+  mcplocalUrl?: string;
+}
+
+interface ProviderStatusResponse {
+  name: string;
+  managed: boolean;
+  state?: 'stopped' | 'starting' | 'running' | 'error';
+  lastError?: string | null;
+  pid?: number | null;
+  uptime?: number | null;
+}
+
+interface ErrorResponse {
+  error: string;
+}
+
+function fetchJson<T>(url: string, method: 'GET' | 'POST'): Promise<{ status: number; body: T }> {
+  return new Promise((resolve, reject) => {
+    const req = http.request(url, { method, timeout: 10_000 }, (res) => {
+      let data = '';
+      res.on('data', (chunk: Buffer) => { data += chunk.toString(); });
+      res.on('end', () => {
+        try {
+          resolve({ status: res.statusCode ?? 0, body: JSON.parse(data) as T });
+        } catch {
+          reject(new Error(`Invalid response from mcplocal: ${data.slice(0, 200)}`));
+        }
+      });
+    });
+    req.on('error', () => reject(new Error('Cannot connect to mcplocal. Is it running? (`systemctl --user status mcplocal`)')));
+    req.on('timeout', () => { req.destroy(); reject(new Error('mcplocal request timed out')); });
+    req.end();
+  });
+}
+
+function formatStatus(s: ProviderStatusResponse): string {
+  if (!s.managed) {
+    return `${s.name}: unmanaged (no lifecycle — API-key or remote provider)`;
+  }
+  const lines = [`${s.name}: ${s.state ?? 'unknown'}`];
+  if (s.pid !== null && s.pid !== undefined) lines.push(`  pid: ${String(s.pid)}`);
+  if (s.uptime !== null && s.uptime !== undefined) {
+    const sec = s.uptime;
+    const fmt = sec < 60 ? `${String(sec)}s`
+      : sec < 3600 ? `${String(Math.floor(sec / 60))}m`
+        : `${String(Math.floor(sec / 3600))}h${String(Math.floor((sec % 3600) / 60))}m`;
+    lines.push(`  uptime: ${fmt}`);
+  }
+  if (s.lastError !== null && s.lastError !== undefined) lines.push(`  lastError: ${s.lastError}`);
+  return lines.join('\n');
+}
+
+export function createProviderCommand(deps: ProviderCommandDeps): Command {
+  const cmd = new Command('provider')
+    .description('Control local LLM providers (start/stop/status)');
+
+  const mcplocalUrl = deps.mcplocalUrl ?? 'http://localhost:3200';
+
+  cmd
+    .command('status')
+    .description('Show lifecycle state of a provider')
+    .argument('<name>', 'Provider name (e.g. vllm-local)')
+    .action(async (name: string) => {
+      const res = await fetchJson<ProviderStatusResponse | ErrorResponse>(
+        `${mcplocalUrl}/llm/providers/${encodeURIComponent(name)}/status`,
+        'GET',
+      );
+      if (res.status !== 200) {
+        deps.log(`error: ${(res.body as ErrorResponse).error}`);
+        process.exitCode = 1;
+        return;
+      }
+      deps.log(formatStatus(res.body as ProviderStatusResponse));
+    });
+
+  cmd
+    .command('up')
+    .description('Start a managed provider (warm up so first chat is fast)')
+    .argument('<name>', 'Provider name (e.g. vllm-local)')
+    .action(async (name: string) => {
+      const res = await fetchJson<ProviderStatusResponse | ErrorResponse>(
+        `${mcplocalUrl}/llm/providers/${encodeURIComponent(name)}/start`,
+        'POST',
+      );
+      if (res.status !== 202 && res.status !== 200) {
+        deps.log(`error: ${(res.body as ErrorResponse).error}`);
+        process.exitCode = 1;
+        return;
+      }
+      const status = res.body as ProviderStatusResponse;
+      deps.log(`${status.name}: ${status.state ?? 'starting'} (warmup kicked — chat to confirm it's ready)`);
+    });
+
+  cmd
+    .command('down')
+    .description('Stop a managed provider now (releases GPU memory)')
+    .argument('<name>', 'Provider name (e.g. vllm-local)')
+    .action(async (name: string) => {
+      const res = await fetchJson<ProviderStatusResponse | ErrorResponse>(
+        `${mcplocalUrl}/llm/providers/${encodeURIComponent(name)}/stop`,
+        'POST',
+      );
+      if (res.status !== 200) {
+        deps.log(`error: ${(res.body as ErrorResponse).error}`);
+        process.exitCode = 1;
+        return;
+      }
+      const status = res.body as ProviderStatusResponse;
+      deps.log(`${status.name}: ${status.state ?? 'stopped'} (GPU released — next chat will trigger restart)`);
+    });
+
+  return cmd;
+}
diff --git a/src/cli/src/index.ts b/src/cli/src/index.ts
index 4f54215..26bb8fb 100644
--- a/src/cli/src/index.ts
+++ b/src/cli/src/index.ts
@@ -18,6 +18,7 @@ import { createMcpCommand } from './commands/mcp.js';
 import { createPatchCommand } from './commands/patch.js';
 import { createConsoleCommand } from './commands/console/index.js';
 import { createCacheCommand } from './commands/cache.js';
+import { createProviderCommand } from './commands/provider.js';
 import { createChatCommand } from './commands/chat.js';
 import { createChatLlmCommand } from './commands/chat-llm.js';
 import { createMigrateCommand } from './commands/migrate.js';
@@ -280,6 +281,11 @@ export function createProgram(): Command {
     mcplocalUrl: config.mcplocalUrl,
   }));
 
+  program.addCommand(createProviderCommand({
+    log: (...args) => console.log(...args),
+    mcplocalUrl: config.mcplocalUrl,
+  }));
+
   program.addCommand(createTestCommand({
     log: (...args) => console.log(...args),
   }));
diff --git a/src/cli/tests/completions.test.ts b/src/cli/tests/completions.test.ts
index 084e68b..2e303ab 100644
--- a/src/cli/tests/completions.test.ts
+++ b/src/cli/tests/completions.test.ts
@@ -120,7 +120,16 @@ describe('fish completions', () => {
 
   it('non-project commands do not show with --project', () => {
     const nonProjectCmds = ['status', 'login', 'logout', 'config', 'apply', 'backup'];
-    const lines = fishFile.split('\n').filter((l) => l.startsWith('complete') && l.includes('-a '));
+    // Only check top-level command lines — those are the ones whose
+    // visibility is gated on `__mcpctl_has_project`. Lines scoped to a
+    // sub-command (e.g. `provider status`) live under a different
+    // `__fish_seen_subcommand_from <parent>` predicate and don't need
+    // the project guard.
+    const topLevelMarkers = ['$commands', '$project_commands'];
+    const lines = fishFile.split('\n').filter((l) => {
+      if (!l.startsWith('complete') || !l.includes('-a ')) return false;
+      return topLevelMarkers.some((m) => l.includes(m));
+    });
 
     for (const cmd of nonProjectCmds) {
       const cmdLines = lines.filter((l) => {
diff --git a/src/mcplocal/src/http/server.ts b/src/mcplocal/src/http/server.ts
index 9e5c48d..ad5c51c 100644
--- a/src/mcplocal/src/http/server.ts
+++ b/src/mcplocal/src/http/server.ts
@@ -220,6 +220,64 @@ export async function createHttpServer(
     });
   });
 
+  // Per-provider status (managed providers expose lifecycle state). Used by
+  // `mcpctl provider <name> status` to read vllm-managed's state without
+  // burning a token like /llm/health does.
+  app.get<{ Params: { name: string } }>('/llm/providers/:name/status', async (request, reply) => {
+    const registry = deps.providerRegistry;
+    const provider = registry?.get(request.params.name) ?? null;
+    if (provider === null) {
+      reply.code(404).send({ error: `Provider '${request.params.name}' not found` });
+      return;
+    }
+    if (!('getStatus' in provider) || typeof (provider as ManagedVllmProvider).getStatus !== 'function') {
+      // Non-managed providers (anthropic, openai, gemini-cli) have no
+      // lifecycle — they're always "ready" as long as the API key works.
+      reply.code(200).send({ name: provider.name, managed: false });
+      return;
+    }
+    const status = (provider as ManagedVllmProvider).getStatus();
+    reply.code(200).send({ name: provider.name, managed: true, ...status });
+  });
+
+  // Stop a managed provider (free GPU memory). No-op on non-managed
+  // providers. Returns 200 with the resulting status either way.
+  app.post<{ Params: { name: string } }>('/llm/providers/:name/stop', async (request, reply) => {
+    const registry = deps.providerRegistry;
+    const provider = registry?.get(request.params.name) ?? null;
+    if (provider === null) {
+      reply.code(404).send({ error: `Provider '${request.params.name}' not found` });
+      return;
+    }
+    if (!('dispose' in provider) || typeof (provider as ManagedVllmProvider).dispose !== 'function') {
+      reply.code(400).send({ error: `Provider '${request.params.name}' is not managed (nothing to stop)` });
+      return;
+    }
+    (provider as ManagedVllmProvider).dispose();
+    const status = (provider as ManagedVllmProvider).getStatus();
+    reply.code(200).send({ name: provider.name, managed: true, ...status });
+  });
+
+  // Start (warm up) a managed provider so the first chat doesn't pay
+  // the model-load latency.
+  app.post<{ Params: { name: string } }>('/llm/providers/:name/start', async (request, reply) => {
+    const registry = deps.providerRegistry;
+    const provider = registry?.get(request.params.name) ?? null;
+    if (provider === null) {
+      reply.code(404).send({ error: `Provider '${request.params.name}' not found` });
+      return;
+    }
+    if (!('warmup' in provider) || typeof (provider as ManagedVllmProvider).warmup !== 'function') {
+      reply.code(400).send({ error: `Provider '${request.params.name}' is not managed (nothing to start)` });
+      return;
+    }
+    (provider as ManagedVllmProvider).warmup();
+    // warmup() is fire-and-forget — return current state immediately so
+    // the CLI can show 'starting' and the user knows it's been kicked.
+    const status = (provider as ManagedVllmProvider).getStatus();
+    reply.code(202).send({ name: provider.name, managed: true, ...status });
+  });
+
   // ProxyModel discovery endpoints
   registerProxymodelEndpoint(app);