feat(cli+mcplocal): persistent provider disable/enable
Some checks failed
CI/CD / lint (pull_request) Successful in 55s
CI/CD / test (pull_request) Successful in 1m11s
CI/CD / typecheck (pull_request) Successful in 3m20s
CI/CD / smoke (pull_request) Failing after 52s
CI/CD / build (pull_request) Successful in 3m59s
CI/CD / publish (pull_request) Has been skipped
Some checks failed
CI/CD / lint (pull_request) Successful in 55s
CI/CD / test (pull_request) Successful in 1m11s
CI/CD / typecheck (pull_request) Successful in 3m20s
CI/CD / smoke (pull_request) Failing after 52s
CI/CD / build (pull_request) Successful in 3m59s
CI/CD / publish (pull_request) Has been skipped
Adds two new subcommands on top of v7's provider lifecycle CLI:
mcpctl provider disable vllm-local # release GPU + survive restart
mcpctl provider enable vllm-local # clear the flag, ready to chat
Use case: vLLM keeps crashing on engine init. `down` works for "now"
but the next chat triggers a restart; `disable` writes
`disabled: true` into the provider's entry in ~/.mcpctl/config.json
and short-circuits complete()/ensureRunning() until you re-enable.
Implementation:
- LlmProviderEntry / LlmProviderFileEntry: new optional `disabled` field
- ManagedVllmProvider: setDisabled(bool), isDisabled(), gate in
complete()/ensureRunning(), expose `disabled` in getStatus()
- mcplocal HTTP: POST /llm/providers/:name/{disable,enable} write the
config file and apply the change live; /start returns 409 when the
target is disabled instead of silently failing
- Boot: createSingleProvider honors `entry.disabled` so a known-bad
vLLM doesn't auto-start on the first chat after mcplocal restart
- CLI: `disable` / `enable` subcommands on `mcpctl provider`; status
output now shows `(disabled)` next to the state
`enable` is live — provider stays in the registry while disabled, so
flipping the flag back is enough; no mcplocal restart needed.
Tests: cli 437/437, mcplocal 731/731.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,14 +1,22 @@
|
||||
/**
|
||||
* `mcpctl provider <name> <up|down|status>`
|
||||
* `mcpctl provider <action> <name>` — managed LLM lifecycle control.
|
||||
*
|
||||
* Lifecycle control for managed local LLM providers (vllm-managed). Talks
|
||||
* to mcplocal's `/llm/providers/:name/{status,start,stop}` HTTP endpoints
|
||||
* — non-managed providers (anthropic, openai, gemini-cli) get a clear
|
||||
* error rather than a no-op.
|
||||
* up — warmup() now (next chat is fast)
|
||||
* down — dispose() now; auto-restarts on next chat
|
||||
* status — state, pid, uptime, disabled flag
|
||||
* disable — dispose() AND set `disabled: true` in ~/.mcpctl/config.json;
|
||||
* survives mcplocal restarts; complete()/ensureRunning() short-
|
||||
* circuit so the GPU process doesn't spawn until you `enable`
|
||||
* enable — clear the disabled flag (live + on disk)
|
||||
*
|
||||
* Practical use: `mcpctl provider vllm-local down` to release GPU memory
|
||||
* without restarting mcplocal (which would drop the SSE connection to mcpd
|
||||
* and re-publish all virtual Llms).
|
||||
* Talks to mcplocal's `/llm/providers/:name/{status,start,stop,enable,disable}`
|
||||
* HTTP endpoints. Non-managed providers (anthropic, openai, gemini-cli)
|
||||
* get a clear 400 rather than a no-op for the lifecycle actions.
|
||||
*
|
||||
* Practical use:
|
||||
* `mcpctl provider down vllm-local` — release GPU memory now
|
||||
* `mcpctl provider disable vllm-local` — release GPU AND prevent auto-start
|
||||
* (e.g. when vLLM keeps crashing)
|
||||
*/
|
||||
import { Command } from 'commander';
|
||||
import http from 'node:http';
|
||||
@@ -25,6 +33,10 @@ interface ProviderStatusResponse {
|
||||
lastError?: string | null;
|
||||
pid?: number | null;
|
||||
uptime?: number | null;
|
||||
/** True when the persistent disable flag is set (config + live). */
|
||||
disabled?: boolean;
|
||||
/** Set by /enable + /disable — true if the config file was rewritten. */
|
||||
persisted?: boolean;
|
||||
}
|
||||
|
||||
interface ErrorResponse {
|
||||
@@ -54,7 +66,8 @@ function formatStatus(s: ProviderStatusResponse): string {
|
||||
if (!s.managed) {
|
||||
return `${s.name}: unmanaged (no lifecycle — API-key or remote provider)`;
|
||||
}
|
||||
const lines = [`${s.name}: ${s.state ?? 'unknown'}`];
|
||||
const stateLabel = s.disabled === true ? `${s.state ?? 'stopped'} (disabled)` : (s.state ?? 'unknown');
|
||||
const lines = [`${s.name}: ${stateLabel}`];
|
||||
if (s.pid !== null && s.pid !== undefined) lines.push(` pid: ${String(s.pid)}`);
|
||||
if (s.uptime !== null && s.uptime !== undefined) {
|
||||
const sec = s.uptime;
|
||||
@@ -126,5 +139,47 @@ export function createProviderCommand(deps: ProviderCommandDeps): Command {
|
||||
deps.log(`${status.name}: ${status.state ?? 'stopped'} (GPU released — next chat will trigger restart)`);
|
||||
});
|
||||
|
||||
cmd
|
||||
.command('disable')
|
||||
.description('Persistently disable a managed provider (survives mcplocal restart)')
|
||||
.argument('<name>', 'Provider name (e.g. vllm-local)')
|
||||
.action(async (name: string) => {
|
||||
const res = await fetchJson<ProviderStatusResponse | ErrorResponse>(
|
||||
`${mcplocalUrl}/llm/providers/${encodeURIComponent(name)}/disable`,
|
||||
'POST',
|
||||
);
|
||||
if (res.status !== 200) {
|
||||
deps.log(`error: ${(res.body as ErrorResponse).error}`);
|
||||
process.exitCode = 1;
|
||||
return;
|
||||
}
|
||||
const status = res.body as ProviderStatusResponse;
|
||||
const persistedNote = status.persisted === true
|
||||
? ' (saved to ~/.mcpctl/config.json — survives restart)'
|
||||
: ' (live only — provider is not in config file, restart will undo)';
|
||||
deps.log(`${status.name}: disabled${persistedNote}`);
|
||||
});
|
||||
|
||||
cmd
|
||||
.command('enable')
|
||||
.description('Re-enable a previously-disabled provider')
|
||||
.argument('<name>', 'Provider name (e.g. vllm-local)')
|
||||
.action(async (name: string) => {
|
||||
const res = await fetchJson<ProviderStatusResponse | ErrorResponse>(
|
||||
`${mcplocalUrl}/llm/providers/${encodeURIComponent(name)}/enable`,
|
||||
'POST',
|
||||
);
|
||||
if (res.status !== 200) {
|
||||
deps.log(`error: ${(res.body as ErrorResponse).error}`);
|
||||
process.exitCode = 1;
|
||||
return;
|
||||
}
|
||||
const status = res.body as ProviderStatusResponse;
|
||||
const persistedNote = status.persisted === true
|
||||
? ' (saved to ~/.mcpctl/config.json)'
|
||||
: '';
|
||||
deps.log(`${status.name}: enabled${persistedNote} — next chat will start it`);
|
||||
});
|
||||
|
||||
return cmd;
|
||||
}
|
||||
|
||||
@@ -46,6 +46,9 @@ export const LlmProviderEntrySchema = z.object({
|
||||
idleTimeoutMinutes: z.number().int().positive().optional(),
|
||||
/** vllm-managed: extra args for `vllm serve` */
|
||||
extraArgs: z.array(z.string()).optional(),
|
||||
/** When true, mcplocal keeps the provider registered but suppresses
|
||||
* auto-start. Toggle via `mcpctl provider {enable,disable} <name>`. */
|
||||
disabled: z.boolean().optional(),
|
||||
}).strict();
|
||||
|
||||
export type LlmProviderEntry = z.infer<typeof LlmProviderEntrySchema>;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { existsSync, readFileSync } from 'node:fs';
|
||||
import { existsSync, readFileSync, writeFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import { homedir } from 'node:os';
|
||||
|
||||
@@ -64,6 +64,16 @@ export interface LlmProviderFileEntry {
|
||||
idleTimeoutMinutes?: number;
|
||||
/** vllm-managed: extra args for `vllm serve` */
|
||||
extraArgs?: string[];
|
||||
/**
|
||||
* Persistent disable. When true:
|
||||
* - mcplocal still instantiates the provider so `mcpctl provider enable`
|
||||
* can flip it back live, but
|
||||
* - `complete()` and `ensureRunning()` short-circuit with an error so
|
||||
* the GPU process never spawns.
|
||||
* Toggled via `mcpctl provider {enable,disable} <name>`. Survives mcplocal
|
||||
* restarts because it lives here in the user's config file.
|
||||
*/
|
||||
disabled?: boolean;
|
||||
/**
|
||||
* If set, this local provider is allowed to substitute for the centralized
|
||||
* Llm of this name when the mcpd inference proxy is unreachable.
|
||||
@@ -180,6 +190,37 @@ function loadFullConfig(): McpctlConfig {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Persist a `disabled: bool` flag onto the named provider in
|
||||
* ~/.mcpctl/config.json. Used by the `mcpctl provider {enable,disable}`
|
||||
* route to make the change survive a mcplocal restart.
|
||||
*
|
||||
* Returns true when the file was actually rewritten (provider found and
|
||||
* its flag changed), false when the provider isn't in the config or the
|
||||
* flag already matches.
|
||||
*/
|
||||
export function setProviderDisabledInConfig(name: string, disabled: boolean): boolean {
|
||||
const configPath = join(homedir(), '.mcpctl', 'config.json');
|
||||
if (!existsSync(configPath)) return false;
|
||||
const raw = readFileSync(configPath, 'utf-8');
|
||||
// Round-trip via JSON.parse → mutate → JSON.stringify. We don't try to
|
||||
// preserve comments or formatting because the config file has always
|
||||
// been plain JSON (no JSONC) and the alternative — a structural editor
|
||||
// — is a lot of code for a feature toggle.
|
||||
const parsed = JSON.parse(raw) as McpctlConfig;
|
||||
if (parsed.llm === undefined || !isMultiConfig(parsed.llm)) return false;
|
||||
const entry = parsed.llm.providers.find((p) => p.name === name);
|
||||
if (entry === undefined) return false;
|
||||
const current = entry.disabled === true;
|
||||
if (current === disabled) return false;
|
||||
if (disabled) entry.disabled = true; else delete entry.disabled;
|
||||
writeFileSync(configPath, JSON.stringify(parsed, null, 2) + '\n', 'utf-8');
|
||||
// Invalidate the cached config so subsequent loadLlmProviders() calls
|
||||
// see the change. (Boot-time only, but cheap and correct.)
|
||||
cachedConfig = null;
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Type guard: is config the multi-provider format? */
|
||||
function isMultiConfig(llm: LlmFileConfig | LlmMultiFileConfig): llm is LlmMultiFileConfig {
|
||||
return 'providers' in llm && Array.isArray((llm as LlmMultiFileConfig).providers);
|
||||
|
||||
@@ -3,6 +3,7 @@ import type { FastifyInstance } from 'fastify';
|
||||
import cors from '@fastify/cors';
|
||||
import { APP_VERSION } from '@mcpctl/shared';
|
||||
import type { HttpConfig } from './config.js';
|
||||
import { setProviderDisabledInConfig } from './config.js';
|
||||
import { McpdClient } from './mcpd-client.js';
|
||||
import { registerProxyRoutes } from './routes/proxy.js';
|
||||
import { registerMcpEndpoint } from './mcp-endpoint.js';
|
||||
@@ -271,6 +272,10 @@ export async function createHttpServer(
|
||||
reply.code(400).send({ error: `Provider '${request.params.name}' is not managed (nothing to start)` });
|
||||
return;
|
||||
}
|
||||
if ((provider as ManagedVllmProvider).isDisabled?.()) {
|
||||
reply.code(409).send({ error: `Provider '${request.params.name}' is disabled — run \`mcpctl provider enable\` first` });
|
||||
return;
|
||||
}
|
||||
(provider as ManagedVllmProvider).warmup();
|
||||
// warmup() is fire-and-forget — return current state immediately so
|
||||
// the CLI can show 'starting' and the user knows it's been kicked.
|
||||
@@ -278,6 +283,50 @@ export async function createHttpServer(
|
||||
reply.code(202).send({ name: provider.name, managed: true, ...status });
|
||||
});
|
||||
|
||||
// Persistent disable: dispose the running process AND set the
|
||||
// `disabled: true` flag on the provider's entry in ~/.mcpctl/config.json
|
||||
// so the next mcplocal restart doesn't auto-start it. Live: complete()
|
||||
// and ensureRunning() short-circuit immediately.
|
||||
app.post<{ Params: { name: string } }>('/llm/providers/:name/disable', async (request, reply) => {
|
||||
const registry = deps.providerRegistry;
|
||||
const provider = registry?.get(request.params.name) ?? null;
|
||||
if (provider === null) {
|
||||
reply.code(404).send({ error: `Provider '${request.params.name}' not found` });
|
||||
return;
|
||||
}
|
||||
if (!('setDisabled' in provider) || typeof (provider as ManagedVllmProvider).setDisabled !== 'function') {
|
||||
reply.code(400).send({ error: `Provider '${request.params.name}' is not managed (no enable/disable surface)` });
|
||||
return;
|
||||
}
|
||||
(provider as ManagedVllmProvider).setDisabled(true);
|
||||
// Best-effort persist. If the entry isn't in the config (e.g. the
|
||||
// provider was registered programmatically) the live disable still
|
||||
// sticks for this mcplocal lifetime; we just can't persist it.
|
||||
const persisted = setProviderDisabledInConfig(request.params.name, true);
|
||||
const status = (provider as ManagedVllmProvider).getStatus();
|
||||
reply.code(200).send({ name: provider.name, managed: true, persisted, ...status });
|
||||
});
|
||||
|
||||
// Inverse of /disable: clear the flag both live and on disk. The
|
||||
// provider stays in the registry (we never removed it), so the next
|
||||
// chat triggers ensureRunning() normally.
|
||||
app.post<{ Params: { name: string } }>('/llm/providers/:name/enable', async (request, reply) => {
|
||||
const registry = deps.providerRegistry;
|
||||
const provider = registry?.get(request.params.name) ?? null;
|
||||
if (provider === null) {
|
||||
reply.code(404).send({ error: `Provider '${request.params.name}' not found` });
|
||||
return;
|
||||
}
|
||||
if (!('setDisabled' in provider) || typeof (provider as ManagedVllmProvider).setDisabled !== 'function') {
|
||||
reply.code(400).send({ error: `Provider '${request.params.name}' is not managed (no enable/disable surface)` });
|
||||
return;
|
||||
}
|
||||
(provider as ManagedVllmProvider).setDisabled(false);
|
||||
const persisted = setProviderDisabledInConfig(request.params.name, false);
|
||||
const status = (provider as ManagedVllmProvider).getStatus();
|
||||
reply.code(200).send({ name: provider.name, managed: true, persisted, ...status });
|
||||
});
|
||||
|
||||
// ProxyModel discovery endpoints
|
||||
registerProxymodelEndpoint(app);
|
||||
|
||||
|
||||
@@ -141,7 +141,12 @@ async function createSingleProvider(
|
||||
if (entry.maxModelLen !== undefined) cfg.maxModelLen = entry.maxModelLen;
|
||||
if (entry.idleTimeoutMinutes !== undefined) cfg.idleTimeoutMinutes = entry.idleTimeoutMinutes;
|
||||
if (entry.extraArgs !== undefined) cfg.extraArgs = entry.extraArgs;
|
||||
return new ManagedVllmProvider(cfg);
|
||||
const provider = new ManagedVllmProvider(cfg);
|
||||
// v7+ persistent disable: honor the flag at boot so a known-bad vLLM
|
||||
// doesn't auto-start on first chat. The provider stays registered so
|
||||
// `mcpctl provider enable` can flip it back live without restart.
|
||||
if (entry.disabled === true) provider.setDisabled(true);
|
||||
return provider;
|
||||
}
|
||||
|
||||
default:
|
||||
|
||||
@@ -33,6 +33,8 @@ export interface ManagedVllmStatus {
|
||||
lastError: string | null;
|
||||
pid: number | null;
|
||||
uptime: number | null;
|
||||
/** Persistent disable. When true, complete()/ensureRunning() short-circuit. */
|
||||
disabled: boolean;
|
||||
}
|
||||
|
||||
const POLL_INTERVAL_MS = 2000;
|
||||
@@ -54,6 +56,7 @@ export class ManagedVllmProvider implements LlmProvider {
|
||||
private inner: OpenAiProvider | null = null;
|
||||
private state: ManagedVllmState = 'stopped';
|
||||
private lastError: string | null = null;
|
||||
private disabled = false;
|
||||
private lastUsed = 0;
|
||||
private startedAt = 0;
|
||||
private errorAt = 0;
|
||||
@@ -86,6 +89,9 @@ export class ManagedVllmProvider implements LlmProvider {
|
||||
}
|
||||
|
||||
async complete(options: CompletionOptions): Promise<CompletionResult> {
|
||||
if (this.disabled) {
|
||||
throw new Error('vllm-managed provider is disabled — run `mcpctl provider enable <name>` to re-enable');
|
||||
}
|
||||
await this.ensureRunning();
|
||||
this.lastUsed = Date.now();
|
||||
this.resetIdleTimer();
|
||||
@@ -101,9 +107,11 @@ export class ManagedVllmProvider implements LlmProvider {
|
||||
|
||||
/**
|
||||
* A managed provider is "available" unless in a permanent error state.
|
||||
* When stopped, it can be auto-started on demand.
|
||||
* When stopped, it can be auto-started on demand. Disabled providers
|
||||
* report unavailable so health probes don't try to wake them.
|
||||
*/
|
||||
async isAvailable(): Promise<boolean> {
|
||||
if (this.disabled) return false;
|
||||
return this.state !== 'error';
|
||||
}
|
||||
|
||||
@@ -115,9 +123,34 @@ export class ManagedVllmProvider implements LlmProvider {
|
||||
uptime: this.state === 'running' && this.startedAt > 0
|
||||
? Math.floor((Date.now() - this.startedAt) / 1000)
|
||||
: null,
|
||||
disabled: this.disabled,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Toggle persistent disable. When set true, dispose any running process
|
||||
* immediately and gate complete()/ensureRunning() so the next chat fails
|
||||
* fast rather than spawning a new vLLM. The caller (HTTP endpoint) is
|
||||
* responsible for persisting the flag to ~/.mcpctl/config.json.
|
||||
*/
|
||||
setDisabled(value: boolean): void {
|
||||
if (this.disabled === value) return;
|
||||
this.disabled = value;
|
||||
if (value) {
|
||||
this.killProcess();
|
||||
this.clearIdleTimer();
|
||||
// Reset the error cooldown so re-enable starts clean.
|
||||
if (this.state === 'error') {
|
||||
this.state = 'stopped';
|
||||
this.lastError = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
isDisabled(): boolean {
|
||||
return this.disabled;
|
||||
}
|
||||
|
||||
/** Eagerly start vLLM so it's ready when the first complete() call arrives. */
|
||||
warmup(): void {
|
||||
if (this.state === 'stopped') {
|
||||
@@ -135,6 +168,9 @@ export class ManagedVllmProvider implements LlmProvider {
|
||||
// --- Internal ---
|
||||
|
||||
async ensureRunning(): Promise<void> {
|
||||
if (this.disabled) {
|
||||
throw new Error('vllm-managed provider is disabled — run `mcpctl provider enable <name>` to re-enable');
|
||||
}
|
||||
if (this.state === 'running' && this.process && !this.process.killed) {
|
||||
return;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user