diff --git a/completions/mcpctl.bash b/completions/mcpctl.bash index 8f9a5d6..fec5261 100644 --- a/completions/mcpctl.bash +++ b/completions/mcpctl.bash @@ -347,7 +347,7 @@ _mcpctl() { provider) local provider_sub=$(_mcpctl_get_subcmd $subcmd_pos) if [[ -z "$provider_sub" ]]; then - COMPREPLY=($(compgen -W "status up down help" -- "$cur")) + COMPREPLY=($(compgen -W "status up down disable enable help" -- "$cur")) else case "$provider_sub" in status) @@ -359,6 +359,12 @@ _mcpctl() { down) COMPREPLY=($(compgen -W "-h --help" -- "$cur")) ;; + disable) + COMPREPLY=($(compgen -W "-h --help" -- "$cur")) + ;; + enable) + COMPREPLY=($(compgen -W "-h --help" -- "$cur")) + ;; *) COMPREPLY=($(compgen -W "-h --help" -- "$cur")) ;; diff --git a/completions/mcpctl.fish b/completions/mcpctl.fish index fa28aeb..def55a4 100644 --- a/completions/mcpctl.fish +++ b/completions/mcpctl.fish @@ -451,10 +451,12 @@ complete -c mcpctl -n "__mcpctl_subcmd_active cache clear" -l older-than -d 'Cle complete -c mcpctl -n "__mcpctl_subcmd_active cache clear" -s y -l yes -d 'Skip confirmation' # provider subcommands -set -l provider_cmds status up down +set -l provider_cmds status up down disable enable complete -c mcpctl -n "__fish_seen_subcommand_from provider; and not __fish_seen_subcommand_from $provider_cmds" -a status -d 'Show lifecycle state of a provider' complete -c mcpctl -n "__fish_seen_subcommand_from provider; and not __fish_seen_subcommand_from $provider_cmds" -a up -d 'Start a managed provider (warm up so first chat is fast)' complete -c mcpctl -n "__fish_seen_subcommand_from provider; and not __fish_seen_subcommand_from $provider_cmds" -a down -d 'Stop a managed provider now (releases GPU memory)' +complete -c mcpctl -n "__fish_seen_subcommand_from provider; and not __fish_seen_subcommand_from $provider_cmds" -a disable -d 'Persistently disable a managed provider (survives mcplocal restart)' +complete -c mcpctl -n "__fish_seen_subcommand_from provider; and not __fish_seen_subcommand_from $provider_cmds" -a enable -d 'Re-enable a previously-disabled provider' # test subcommands set -l test_cmds mcp diff --git a/src/cli/src/commands/provider.ts b/src/cli/src/commands/provider.ts index 93d6b8c..f341959 100644 --- a/src/cli/src/commands/provider.ts +++ b/src/cli/src/commands/provider.ts @@ -1,14 +1,22 @@ /** - * `mcpctl provider ` + * `mcpctl provider ` — managed LLM lifecycle control. * - * Lifecycle control for managed local LLM providers (vllm-managed). Talks - * to mcplocal's `/llm/providers/:name/{status,start,stop}` HTTP endpoints - * — non-managed providers (anthropic, openai, gemini-cli) get a clear - * error rather than a no-op. + * up — warmup() now (next chat is fast) + * down — dispose() now; auto-restarts on next chat + * status — state, pid, uptime, disabled flag + * disable — dispose() AND set `disabled: true` in ~/.mcpctl/config.json; + * survives mcplocal restarts; complete()/ensureRunning() short- + * circuit so the GPU process doesn't spawn until you `enable` + * enable — clear the disabled flag (live + on disk) * - * Practical use: `mcpctl provider vllm-local down` to release GPU memory - * without restarting mcplocal (which would drop the SSE connection to mcpd - * and re-publish all virtual Llms). + * Talks to mcplocal's `/llm/providers/:name/{status,start,stop,enable,disable}` + * HTTP endpoints. Non-managed providers (anthropic, openai, gemini-cli) + * get a clear 400 rather than a no-op for the lifecycle actions. + * + * Practical use: + * `mcpctl provider down vllm-local` — release GPU memory now + * `mcpctl provider disable vllm-local` — release GPU AND prevent auto-start + * (e.g. when vLLM keeps crashing) */ import { Command } from 'commander'; import http from 'node:http'; @@ -25,6 +33,10 @@ interface ProviderStatusResponse { lastError?: string | null; pid?: number | null; uptime?: number | null; + /** True when the persistent disable flag is set (config + live). */ + disabled?: boolean; + /** Set by /enable + /disable — true if the config file was rewritten. */ + persisted?: boolean; } interface ErrorResponse { @@ -54,7 +66,8 @@ function formatStatus(s: ProviderStatusResponse): string { if (!s.managed) { return `${s.name}: unmanaged (no lifecycle — API-key or remote provider)`; } - const lines = [`${s.name}: ${s.state ?? 'unknown'}`]; + const stateLabel = s.disabled === true ? `${s.state ?? 'stopped'} (disabled)` : (s.state ?? 'unknown'); + const lines = [`${s.name}: ${stateLabel}`]; if (s.pid !== null && s.pid !== undefined) lines.push(` pid: ${String(s.pid)}`); if (s.uptime !== null && s.uptime !== undefined) { const sec = s.uptime; @@ -126,5 +139,47 @@ export function createProviderCommand(deps: ProviderCommandDeps): Command { deps.log(`${status.name}: ${status.state ?? 'stopped'} (GPU released — next chat will trigger restart)`); }); + cmd + .command('disable') + .description('Persistently disable a managed provider (survives mcplocal restart)') + .argument('', 'Provider name (e.g. vllm-local)') + .action(async (name: string) => { + const res = await fetchJson( + `${mcplocalUrl}/llm/providers/${encodeURIComponent(name)}/disable`, + 'POST', + ); + if (res.status !== 200) { + deps.log(`error: ${(res.body as ErrorResponse).error}`); + process.exitCode = 1; + return; + } + const status = res.body as ProviderStatusResponse; + const persistedNote = status.persisted === true + ? ' (saved to ~/.mcpctl/config.json — survives restart)' + : ' (live only — provider is not in config file, restart will undo)'; + deps.log(`${status.name}: disabled${persistedNote}`); + }); + + cmd + .command('enable') + .description('Re-enable a previously-disabled provider') + .argument('', 'Provider name (e.g. vllm-local)') + .action(async (name: string) => { + const res = await fetchJson( + `${mcplocalUrl}/llm/providers/${encodeURIComponent(name)}/enable`, + 'POST', + ); + if (res.status !== 200) { + deps.log(`error: ${(res.body as ErrorResponse).error}`); + process.exitCode = 1; + return; + } + const status = res.body as ProviderStatusResponse; + const persistedNote = status.persisted === true + ? ' (saved to ~/.mcpctl/config.json)' + : ''; + deps.log(`${status.name}: enabled${persistedNote} — next chat will start it`); + }); + return cmd; } diff --git a/src/cli/src/config/schema.ts b/src/cli/src/config/schema.ts index f3dd74b..e020ae6 100644 --- a/src/cli/src/config/schema.ts +++ b/src/cli/src/config/schema.ts @@ -46,6 +46,9 @@ export const LlmProviderEntrySchema = z.object({ idleTimeoutMinutes: z.number().int().positive().optional(), /** vllm-managed: extra args for `vllm serve` */ extraArgs: z.array(z.string()).optional(), + /** When true, mcplocal keeps the provider registered but suppresses + * auto-start. Toggle via `mcpctl provider {enable,disable} `. */ + disabled: z.boolean().optional(), }).strict(); export type LlmProviderEntry = z.infer; diff --git a/src/mcplocal/src/http/config.ts b/src/mcplocal/src/http/config.ts index b99ebb9..1dbe8ec 100644 --- a/src/mcplocal/src/http/config.ts +++ b/src/mcplocal/src/http/config.ts @@ -1,4 +1,4 @@ -import { existsSync, readFileSync } from 'node:fs'; +import { existsSync, readFileSync, writeFileSync } from 'node:fs'; import { join } from 'node:path'; import { homedir } from 'node:os'; @@ -64,6 +64,16 @@ export interface LlmProviderFileEntry { idleTimeoutMinutes?: number; /** vllm-managed: extra args for `vllm serve` */ extraArgs?: string[]; + /** + * Persistent disable. When true: + * - mcplocal still instantiates the provider so `mcpctl provider enable` + * can flip it back live, but + * - `complete()` and `ensureRunning()` short-circuit with an error so + * the GPU process never spawns. + * Toggled via `mcpctl provider {enable,disable} `. Survives mcplocal + * restarts because it lives here in the user's config file. + */ + disabled?: boolean; /** * If set, this local provider is allowed to substitute for the centralized * Llm of this name when the mcpd inference proxy is unreachable. @@ -180,6 +190,37 @@ function loadFullConfig(): McpctlConfig { } } +/** + * Persist a `disabled: bool` flag onto the named provider in + * ~/.mcpctl/config.json. Used by the `mcpctl provider {enable,disable}` + * route to make the change survive a mcplocal restart. + * + * Returns true when the file was actually rewritten (provider found and + * its flag changed), false when the provider isn't in the config or the + * flag already matches. + */ +export function setProviderDisabledInConfig(name: string, disabled: boolean): boolean { + const configPath = join(homedir(), '.mcpctl', 'config.json'); + if (!existsSync(configPath)) return false; + const raw = readFileSync(configPath, 'utf-8'); + // Round-trip via JSON.parse → mutate → JSON.stringify. We don't try to + // preserve comments or formatting because the config file has always + // been plain JSON (no JSONC) and the alternative — a structural editor + // — is a lot of code for a feature toggle. + const parsed = JSON.parse(raw) as McpctlConfig; + if (parsed.llm === undefined || !isMultiConfig(parsed.llm)) return false; + const entry = parsed.llm.providers.find((p) => p.name === name); + if (entry === undefined) return false; + const current = entry.disabled === true; + if (current === disabled) return false; + if (disabled) entry.disabled = true; else delete entry.disabled; + writeFileSync(configPath, JSON.stringify(parsed, null, 2) + '\n', 'utf-8'); + // Invalidate the cached config so subsequent loadLlmProviders() calls + // see the change. (Boot-time only, but cheap and correct.) + cachedConfig = null; + return true; +} + /** Type guard: is config the multi-provider format? */ function isMultiConfig(llm: LlmFileConfig | LlmMultiFileConfig): llm is LlmMultiFileConfig { return 'providers' in llm && Array.isArray((llm as LlmMultiFileConfig).providers); diff --git a/src/mcplocal/src/http/server.ts b/src/mcplocal/src/http/server.ts index ad5c51c..56a0210 100644 --- a/src/mcplocal/src/http/server.ts +++ b/src/mcplocal/src/http/server.ts @@ -3,6 +3,7 @@ import type { FastifyInstance } from 'fastify'; import cors from '@fastify/cors'; import { APP_VERSION } from '@mcpctl/shared'; import type { HttpConfig } from './config.js'; +import { setProviderDisabledInConfig } from './config.js'; import { McpdClient } from './mcpd-client.js'; import { registerProxyRoutes } from './routes/proxy.js'; import { registerMcpEndpoint } from './mcp-endpoint.js'; @@ -271,6 +272,10 @@ export async function createHttpServer( reply.code(400).send({ error: `Provider '${request.params.name}' is not managed (nothing to start)` }); return; } + if ((provider as ManagedVllmProvider).isDisabled?.()) { + reply.code(409).send({ error: `Provider '${request.params.name}' is disabled — run \`mcpctl provider enable\` first` }); + return; + } (provider as ManagedVllmProvider).warmup(); // warmup() is fire-and-forget — return current state immediately so // the CLI can show 'starting' and the user knows it's been kicked. @@ -278,6 +283,50 @@ export async function createHttpServer( reply.code(202).send({ name: provider.name, managed: true, ...status }); }); + // Persistent disable: dispose the running process AND set the + // `disabled: true` flag on the provider's entry in ~/.mcpctl/config.json + // so the next mcplocal restart doesn't auto-start it. Live: complete() + // and ensureRunning() short-circuit immediately. + app.post<{ Params: { name: string } }>('/llm/providers/:name/disable', async (request, reply) => { + const registry = deps.providerRegistry; + const provider = registry?.get(request.params.name) ?? null; + if (provider === null) { + reply.code(404).send({ error: `Provider '${request.params.name}' not found` }); + return; + } + if (!('setDisabled' in provider) || typeof (provider as ManagedVllmProvider).setDisabled !== 'function') { + reply.code(400).send({ error: `Provider '${request.params.name}' is not managed (no enable/disable surface)` }); + return; + } + (provider as ManagedVllmProvider).setDisabled(true); + // Best-effort persist. If the entry isn't in the config (e.g. the + // provider was registered programmatically) the live disable still + // sticks for this mcplocal lifetime; we just can't persist it. + const persisted = setProviderDisabledInConfig(request.params.name, true); + const status = (provider as ManagedVllmProvider).getStatus(); + reply.code(200).send({ name: provider.name, managed: true, persisted, ...status }); + }); + + // Inverse of /disable: clear the flag both live and on disk. The + // provider stays in the registry (we never removed it), so the next + // chat triggers ensureRunning() normally. + app.post<{ Params: { name: string } }>('/llm/providers/:name/enable', async (request, reply) => { + const registry = deps.providerRegistry; + const provider = registry?.get(request.params.name) ?? null; + if (provider === null) { + reply.code(404).send({ error: `Provider '${request.params.name}' not found` }); + return; + } + if (!('setDisabled' in provider) || typeof (provider as ManagedVllmProvider).setDisabled !== 'function') { + reply.code(400).send({ error: `Provider '${request.params.name}' is not managed (no enable/disable surface)` }); + return; + } + (provider as ManagedVllmProvider).setDisabled(false); + const persisted = setProviderDisabledInConfig(request.params.name, false); + const status = (provider as ManagedVllmProvider).getStatus(); + reply.code(200).send({ name: provider.name, managed: true, persisted, ...status }); + }); + // ProxyModel discovery endpoints registerProxymodelEndpoint(app); diff --git a/src/mcplocal/src/llm-config.ts b/src/mcplocal/src/llm-config.ts index 5777249..6e8429e 100644 --- a/src/mcplocal/src/llm-config.ts +++ b/src/mcplocal/src/llm-config.ts @@ -141,7 +141,12 @@ async function createSingleProvider( if (entry.maxModelLen !== undefined) cfg.maxModelLen = entry.maxModelLen; if (entry.idleTimeoutMinutes !== undefined) cfg.idleTimeoutMinutes = entry.idleTimeoutMinutes; if (entry.extraArgs !== undefined) cfg.extraArgs = entry.extraArgs; - return new ManagedVllmProvider(cfg); + const provider = new ManagedVllmProvider(cfg); + // v7+ persistent disable: honor the flag at boot so a known-bad vLLM + // doesn't auto-start on first chat. The provider stays registered so + // `mcpctl provider enable` can flip it back live without restart. + if (entry.disabled === true) provider.setDisabled(true); + return provider; } default: diff --git a/src/mcplocal/src/providers/vllm-managed.ts b/src/mcplocal/src/providers/vllm-managed.ts index c12c906..2da26fc 100644 --- a/src/mcplocal/src/providers/vllm-managed.ts +++ b/src/mcplocal/src/providers/vllm-managed.ts @@ -33,6 +33,8 @@ export interface ManagedVllmStatus { lastError: string | null; pid: number | null; uptime: number | null; + /** Persistent disable. When true, complete()/ensureRunning() short-circuit. */ + disabled: boolean; } const POLL_INTERVAL_MS = 2000; @@ -54,6 +56,7 @@ export class ManagedVllmProvider implements LlmProvider { private inner: OpenAiProvider | null = null; private state: ManagedVllmState = 'stopped'; private lastError: string | null = null; + private disabled = false; private lastUsed = 0; private startedAt = 0; private errorAt = 0; @@ -86,6 +89,9 @@ export class ManagedVllmProvider implements LlmProvider { } async complete(options: CompletionOptions): Promise { + if (this.disabled) { + throw new Error('vllm-managed provider is disabled — run `mcpctl provider enable ` to re-enable'); + } await this.ensureRunning(); this.lastUsed = Date.now(); this.resetIdleTimer(); @@ -101,9 +107,11 @@ export class ManagedVllmProvider implements LlmProvider { /** * A managed provider is "available" unless in a permanent error state. - * When stopped, it can be auto-started on demand. + * When stopped, it can be auto-started on demand. Disabled providers + * report unavailable so health probes don't try to wake them. */ async isAvailable(): Promise { + if (this.disabled) return false; return this.state !== 'error'; } @@ -115,9 +123,34 @@ export class ManagedVllmProvider implements LlmProvider { uptime: this.state === 'running' && this.startedAt > 0 ? Math.floor((Date.now() - this.startedAt) / 1000) : null, + disabled: this.disabled, }; } + /** + * Toggle persistent disable. When set true, dispose any running process + * immediately and gate complete()/ensureRunning() so the next chat fails + * fast rather than spawning a new vLLM. The caller (HTTP endpoint) is + * responsible for persisting the flag to ~/.mcpctl/config.json. + */ + setDisabled(value: boolean): void { + if (this.disabled === value) return; + this.disabled = value; + if (value) { + this.killProcess(); + this.clearIdleTimer(); + // Reset the error cooldown so re-enable starts clean. + if (this.state === 'error') { + this.state = 'stopped'; + this.lastError = null; + } + } + } + + isDisabled(): boolean { + return this.disabled; + } + /** Eagerly start vLLM so it's ready when the first complete() call arrives. */ warmup(): void { if (this.state === 'stopped') { @@ -135,6 +168,9 @@ export class ManagedVllmProvider implements LlmProvider { // --- Internal --- async ensureRunning(): Promise { + if (this.disabled) { + throw new Error('vllm-managed provider is disabled — run `mcpctl provider enable ` to re-enable'); + } if (this.state === 'running' && this.process && !this.process.killed) { return; }