From 356cbe87b5a915209a290a41f74ead5262c1cbf1 Mon Sep 17 00:00:00 2001
From: Michal <michal@itaz.eu>
Date: Wed, 29 Apr 2026 15:58:46 +0100
Subject: [PATCH] feat(cli+mcplocal): mcpctl provider <name> {up,down,status}
 for managed LLMs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds lifecycle control for managed local LLM providers (vllm-managed)
without the nuclear option of restarting mcplocal. Practical use:

  mcpctl provider vllm-local down    # release GPU memory now
  mcpctl provider vllm-local up      # warm up before the next chat
  mcpctl provider vllm-local status  # see state, pid, uptime

mcplocal exposes three new endpoints:

  GET  /llm/providers/:name/status   → returns lifecycle state for
                                       managed providers, { managed: false }
                                       for unmanaged (anthropic, openai, …)
  POST /llm/providers/:name/start    → calls warmup() (202 + initial state)
  POST /llm/providers/:name/stop     → calls dispose() (200 + post-stop state)

Stop and start return 400 for non-managed providers — stopping an API-key
provider is meaningless. The CLI surfaces the error verbatim.

Restarting mcplocal would also free the GPU but drops the SSE connection
to mcpd and forces every virtual Llm to re-publish; this is the targeted,
non-disruptive escape hatch.

The completions test gained a `topLevelMarkers` filter so a sub-command
named `status` (under `provider`) doesn't trip the existing "non-project
commands must guard with __mcpctl_has_project" rule.

Tests: cli 437/437, mcplocal 731/731.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 completions/mcpctl.bash           |  23 +++++-
 completions/mcpctl.fish           |   9 ++-
 src/cli/src/commands/provider.ts  | 130 ++++++++++++++++++++++++++++++
 src/cli/src/index.ts              |   6 ++
 src/cli/tests/completions.test.ts |  11 ++-
 src/mcplocal/src/http/server.ts   |  58 +++++++++++++
 6 files changed, 234 insertions(+), 3 deletions(-)
 create mode 100644 src/cli/src/commands/provider.ts
diff --git a/completions/mcpctl.bash b/completions/mcpctl.bash
index eb27698..8f9a5d6 100644
--- a/completions/mcpctl.bash
+++ b/completions/mcpctl.bash
@@ -5,7 +5,7 @@ _mcpctl() {
   local cur prev words cword
   _init_completion || return
 
-  local commands="status login logout config get describe delete logs create edit apply chat chat-llm patch backup approve console cache test migrate rotate"
+  local commands="status login logout config get describe delete logs create edit apply chat chat-llm patch backup approve console cache provider test migrate rotate"
   local project_commands="get describe delete logs create edit attach-server detach-server"
   local global_opts="-v --version --daemon-url --direct -p --project -h --help"
   local resources="servers instances secrets secretbackends llms agents personalities templates projects users groups rbac prompts promptrequests serverattachments proxymodels inference-tasks all"
@@ -344,6 +344,27 @@ _mcpctl() {
         esac
       fi
       return ;;
+    provider)
+      local provider_sub=$(_mcpctl_get_subcmd $subcmd_pos)
+      if [[ -z "$provider_sub" ]]; then
+        COMPREPLY=($(compgen -W "status up down help" -- "$cur"))
+      else
+        case "$provider_sub" in
+          status)
+            COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
+            ;;
+          up)
+            COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
+            ;;
+          down)
+            COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
+            ;;
+          *)
+            COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
+            ;;
+        esac
+      fi
+      return ;;
     test)
       local test_sub=$(_mcpctl_get_subcmd $subcmd_pos)
       if [[ -z "$test_sub" ]]; then
diff --git a/completions/mcpctl.fish b/completions/mcpctl.fish
index e4162a2..fa28aeb 100644
--- a/completions/mcpctl.fish
+++ b/completions/mcpctl.fish
@@ -4,7 +4,7 @@
 # Erase any stale completions from previous versions
 complete -c mcpctl -e
 
-set -l commands status login logout config get describe delete logs create edit apply chat chat-llm patch backup approve console cache test migrate rotate
+set -l commands status login logout config get describe delete logs create edit apply chat chat-llm patch backup approve console cache provider test migrate rotate
 set -l project_commands get describe delete logs create edit attach-server detach-server
 
 # Disable file completions by default
@@ -238,6 +238,7 @@ complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_
 complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a approve -d 'Approve a pending prompt request (atomic: delete request, create prompt)'
 complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a console -d 'Interactive MCP console — unified timeline with tools, provenance, and lab replay'
 complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a cache -d 'Manage ProxyModel pipeline cache'
+complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a provider -d 'Control local LLM providers (start/stop/status)'
 complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a test -d 'Utilities for testing MCP endpoints and config'
 complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a migrate -d 'Move resources between backends (currently: secrets between SecretBackends)'
 complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a rotate -d 'Force rotation of a credential-rotating resource (currently: secretbackend)'
@@ -449,6 +450,12 @@ complete -c mcpctl -n "__fish_seen_subcommand_from cache; and not __fish_seen_su
 complete -c mcpctl -n "__mcpctl_subcmd_active cache clear" -l older-than -d 'Clear entries older than N days' -x
 complete -c mcpctl -n "__mcpctl_subcmd_active cache clear" -s y -l yes -d 'Skip confirmation'
 
+# provider subcommands
+set -l provider_cmds status up down
+complete -c mcpctl -n "__fish_seen_subcommand_from provider; and not __fish_seen_subcommand_from $provider_cmds" -a status -d 'Show lifecycle state of a provider'
+complete -c mcpctl -n "__fish_seen_subcommand_from provider; and not __fish_seen_subcommand_from $provider_cmds" -a up -d 'Start a managed provider (warm up so first chat is fast)'
+complete -c mcpctl -n "__fish_seen_subcommand_from provider; and not __fish_seen_subcommand_from $provider_cmds" -a down -d 'Stop a managed provider now (releases GPU memory)'
+
 # test subcommands
 set -l test_cmds mcp
 complete -c mcpctl -n "__fish_seen_subcommand_from test; and not __fish_seen_subcommand_from $test_cmds" -a mcp -d 'Verify a Streamable-HTTP MCP endpoint: health, initialize, tools/list, optionally call a tool.'
diff --git a/src/cli/src/commands/provider.ts b/src/cli/src/commands/provider.ts
new file mode 100644
index 0000000..93d6b8c
--- /dev/null
+++ b/src/cli/src/commands/provider.ts
@@ -0,0 +1,130 @@
+/**
+ * `mcpctl provider <name> <up|down|status>`
+ *
+ * Lifecycle control for managed local LLM providers (vllm-managed). Talks
+ * to mcplocal's `/llm/providers/:name/{status,start,stop}` HTTP endpoints
+ * — non-managed providers (anthropic, openai, gemini-cli) get a clear
+ * error rather than a no-op.
+ *
+ * Practical use: `mcpctl provider vllm-local down` to release GPU memory
+ * without restarting mcplocal (which would drop the SSE connection to mcpd
+ * and re-publish all virtual Llms).
+ */
+import { Command } from 'commander';
+import http from 'node:http';
+
+export interface ProviderCommandDeps {
+  log: (...args: string[]) => void;
+  mcplocalUrl?: string;
+}
+
+interface ProviderStatusResponse {
+  name: string;
+  managed: boolean;
+  state?: 'stopped' | 'starting' | 'running' | 'error';
+  lastError?: string | null;
+  pid?: number | null;
+  uptime?: number | null;
+}
+
+interface ErrorResponse {
+  error: string;
+}
+
+function fetchJson<T>(url: string, method: 'GET' | 'POST'): Promise<{ status: number; body: T }> {
+  return new Promise((resolve, reject) => {
+    const req = http.request(url, { method, timeout: 10_000 }, (res) => {
+      let data = '';
+      res.on('data', (chunk: Buffer) => { data += chunk.toString(); });
+      res.on('end', () => {
+        try {
+          resolve({ status: res.statusCode ?? 0, body: JSON.parse(data) as T });
+        } catch {
+          reject(new Error(`Invalid response from mcplocal: ${data.slice(0, 200)}`));
+        }
+      });
+    });
+    req.on('error', () => reject(new Error('Cannot connect to mcplocal. Is it running? (`systemctl --user status mcplocal`)')));
+    req.on('timeout', () => { req.destroy(); reject(new Error('mcplocal request timed out')); });
+    req.end();
+  });
+}
+
+function formatStatus(s: ProviderStatusResponse): string {
+  if (!s.managed) {
+    return `${s.name}: unmanaged (no lifecycle — API-key or remote provider)`;
+  }
+  const lines = [`${s.name}: ${s.state ?? 'unknown'}`];
+  if (s.pid !== null && s.pid !== undefined) lines.push(`  pid: ${String(s.pid)}`);
+  if (s.uptime !== null && s.uptime !== undefined) {
+    const sec = s.uptime;
+    const fmt = sec < 60 ? `${String(sec)}s`
+      : sec < 3600 ? `${String(Math.floor(sec / 60))}m`
+        : `${String(Math.floor(sec / 3600))}h${String(Math.floor((sec % 3600) / 60))}m`;
+    lines.push(`  uptime: ${fmt}`);
+  }
+  if (s.lastError !== null && s.lastError !== undefined) lines.push(`  lastError: ${s.lastError}`);
+  return lines.join('\n');
+}
+
+export function createProviderCommand(deps: ProviderCommandDeps): Command {
+  const cmd = new Command('provider')
+    .description('Control local LLM providers (start/stop/status)');
+
+  const mcplocalUrl = deps.mcplocalUrl ?? 'http://localhost:3200';
+
+  cmd
+    .command('status')
+    .description('Show lifecycle state of a provider')
+    .argument('<name>', 'Provider name (e.g. vllm-local)')
+    .action(async (name: string) => {
+      const res = await fetchJson<ProviderStatusResponse | ErrorResponse>(
+        `${mcplocalUrl}/llm/providers/${encodeURIComponent(name)}/status`,
+        'GET',
+      );
+      if (res.status !== 200) {
+        deps.log(`error: ${(res.body as ErrorResponse).error}`);
+        process.exitCode = 1;
+        return;
+      }
+      deps.log(formatStatus(res.body as ProviderStatusResponse));
+    });
+
+  cmd
+    .command('up')
+    .description('Start a managed provider (warm up so first chat is fast)')
+    .argument('<name>', 'Provider name (e.g. vllm-local)')
+    .action(async (name: string) => {
+      const res = await fetchJson<ProviderStatusResponse | ErrorResponse>(
+        `${mcplocalUrl}/llm/providers/${encodeURIComponent(name)}/start`,
+        'POST',
+      );
+      if (res.status !== 202 && res.status !== 200) {
+        deps.log(`error: ${(res.body as ErrorResponse).error}`);
+        process.exitCode = 1;
+        return;
+      }
+      const status = res.body as ProviderStatusResponse;
+      deps.log(`${status.name}: ${status.state ?? 'starting'} (warmup kicked — chat to confirm it's ready)`);
+    });
+
+  cmd
+    .command('down')
+    .description('Stop a managed provider now (releases GPU memory)')
+    .argument('<name>', 'Provider name (e.g. vllm-local)')
+    .action(async (name: string) => {
+      const res = await fetchJson<ProviderStatusResponse | ErrorResponse>(
+        `${mcplocalUrl}/llm/providers/${encodeURIComponent(name)}/stop`,
+        'POST',
+      );
+      if (res.status !== 200) {
+        deps.log(`error: ${(res.body as ErrorResponse).error}`);
+        process.exitCode = 1;
+        return;
+      }
+      const status = res.body as ProviderStatusResponse;
+      deps.log(`${status.name}: ${status.state ?? 'stopped'} (GPU released — next chat will trigger restart)`);
+    });
+
+  return cmd;
+}
diff --git a/src/cli/src/index.ts b/src/cli/src/index.ts
index 4f54215..26bb8fb 100644
--- a/src/cli/src/index.ts
+++ b/src/cli/src/index.ts
@@ -18,6 +18,7 @@ import { createMcpCommand } from './commands/mcp.js';
 import { createPatchCommand } from './commands/patch.js';
 import { createConsoleCommand } from './commands/console/index.js';
 import { createCacheCommand } from './commands/cache.js';
+import { createProviderCommand } from './commands/provider.js';
 import { createChatCommand } from './commands/chat.js';
 import { createChatLlmCommand } from './commands/chat-llm.js';
 import { createMigrateCommand } from './commands/migrate.js';
@@ -280,6 +281,11 @@ export function createProgram(): Command {
     mcplocalUrl: config.mcplocalUrl,
   }));
 
+  program.addCommand(createProviderCommand({
+    log: (...args) => console.log(...args),
+    mcplocalUrl: config.mcplocalUrl,
+  }));
+
   program.addCommand(createTestCommand({
     log: (...args) => console.log(...args),
   }));
diff --git a/src/cli/tests/completions.test.ts b/src/cli/tests/completions.test.ts
index 084e68b..2e303ab 100644
--- a/src/cli/tests/completions.test.ts
+++ b/src/cli/tests/completions.test.ts
@@ -120,7 +120,16 @@ describe('fish completions', () => {
 
   it('non-project commands do not show with --project', () => {
     const nonProjectCmds = ['status', 'login', 'logout', 'config', 'apply', 'backup'];
-    const lines = fishFile.split('\n').filter((l) => l.startsWith('complete') && l.includes('-a '));
+    // Only check top-level command lines — those are the ones whose
+    // visibility is gated on `__mcpctl_has_project`. Lines scoped to a
+    // sub-command (e.g. `provider status`) live under a different
+    // `__fish_seen_subcommand_from <parent>` predicate and don't need
+    // the project guard.
+    const topLevelMarkers = ['$commands', '$project_commands'];
+    const lines = fishFile.split('\n').filter((l) => {
+      if (!l.startsWith('complete') || !l.includes('-a ')) return false;
+      return topLevelMarkers.some((m) => l.includes(m));
+    });
 
     for (const cmd of nonProjectCmds) {
       const cmdLines = lines.filter((l) => {
diff --git a/src/mcplocal/src/http/server.ts b/src/mcplocal/src/http/server.ts
index 9e5c48d..ad5c51c 100644
--- a/src/mcplocal/src/http/server.ts
+++ b/src/mcplocal/src/http/server.ts
@@ -220,6 +220,64 @@ export async function createHttpServer(
     });
   });
 
+  // Per-provider status (managed providers expose lifecycle state). Used by
+  // `mcpctl provider <name> status` to read vllm-managed's state without
+  // burning a token like /llm/health does.
+  app.get<{ Params: { name: string } }>('/llm/providers/:name/status', async (request, reply) => {
+    const registry = deps.providerRegistry;
+    const provider = registry?.get(request.params.name) ?? null;
+    if (provider === null) {
+      reply.code(404).send({ error: `Provider '${request.params.name}' not found` });
+      return;
+    }
+    if (!('getStatus' in provider) || typeof (provider as ManagedVllmProvider).getStatus !== 'function') {
+      // Non-managed providers (anthropic, openai, gemini-cli) have no
+      // lifecycle — they're always "ready" as long as the API key works.
+      reply.code(200).send({ name: provider.name, managed: false });
+      return;
+    }
+    const status = (provider as ManagedVllmProvider).getStatus();
+    reply.code(200).send({ name: provider.name, managed: true, ...status });
+  });
+
+  // Stop a managed provider (free GPU memory). No-op on non-managed
+  // providers. Returns 200 with the resulting status either way.
+  app.post<{ Params: { name: string } }>('/llm/providers/:name/stop', async (request, reply) => {
+    const registry = deps.providerRegistry;
+    const provider = registry?.get(request.params.name) ?? null;
+    if (provider === null) {
+      reply.code(404).send({ error: `Provider '${request.params.name}' not found` });
+      return;
+    }
+    if (!('dispose' in provider) || typeof (provider as ManagedVllmProvider).dispose !== 'function') {
+      reply.code(400).send({ error: `Provider '${request.params.name}' is not managed (nothing to stop)` });
+      return;
+    }
+    (provider as ManagedVllmProvider).dispose();
+    const status = (provider as ManagedVllmProvider).getStatus();
+    reply.code(200).send({ name: provider.name, managed: true, ...status });
+  });
+
+  // Start (warm up) a managed provider so the first chat doesn't pay
+  // the model-load latency.
+  app.post<{ Params: { name: string } }>('/llm/providers/:name/start', async (request, reply) => {
+    const registry = deps.providerRegistry;
+    const provider = registry?.get(request.params.name) ?? null;
+    if (provider === null) {
+      reply.code(404).send({ error: `Provider '${request.params.name}' not found` });
+      return;
+    }
+    if (!('warmup' in provider) || typeof (provider as ManagedVllmProvider).warmup !== 'function') {
+      reply.code(400).send({ error: `Provider '${request.params.name}' is not managed (nothing to start)` });
+      return;
+    }
+    (provider as ManagedVllmProvider).warmup();
+    // warmup() is fire-and-forget — return current state immediately so
+    // the CLI can show 'starting' and the user knows it's been kicked.
+    const status = (provider as ManagedVllmProvider).getStatus();
+    reply.code(202).send({ name: provider.name, managed: true, ...status });
+  });
+
   // ProxyModel discovery endpoints
   registerProxymodelEndpoint(app);