diff --git a/completions/mcpctl.bash b/completions/mcpctl.bash index eb27698..8f9a5d6 100644 --- a/completions/mcpctl.bash +++ b/completions/mcpctl.bash @@ -5,7 +5,7 @@ _mcpctl() { local cur prev words cword _init_completion || return - local commands="status login logout config get describe delete logs create edit apply chat chat-llm patch backup approve console cache test migrate rotate" + local commands="status login logout config get describe delete logs create edit apply chat chat-llm patch backup approve console cache provider test migrate rotate" local project_commands="get describe delete logs create edit attach-server detach-server" local global_opts="-v --version --daemon-url --direct -p --project -h --help" local resources="servers instances secrets secretbackends llms agents personalities templates projects users groups rbac prompts promptrequests serverattachments proxymodels inference-tasks all" @@ -344,6 +344,27 @@ _mcpctl() { esac fi return ;; + provider) + local provider_sub=$(_mcpctl_get_subcmd $subcmd_pos) + if [[ -z "$provider_sub" ]]; then + COMPREPLY=($(compgen -W "status up down help" -- "$cur")) + else + case "$provider_sub" in + status) + COMPREPLY=($(compgen -W "-h --help" -- "$cur")) + ;; + up) + COMPREPLY=($(compgen -W "-h --help" -- "$cur")) + ;; + down) + COMPREPLY=($(compgen -W "-h --help" -- "$cur")) + ;; + *) + COMPREPLY=($(compgen -W "-h --help" -- "$cur")) + ;; + esac + fi + return ;; test) local test_sub=$(_mcpctl_get_subcmd $subcmd_pos) if [[ -z "$test_sub" ]]; then diff --git a/completions/mcpctl.fish b/completions/mcpctl.fish index e4162a2..fa28aeb 100644 --- a/completions/mcpctl.fish +++ b/completions/mcpctl.fish @@ -4,7 +4,7 @@ # Erase any stale completions from previous versions complete -c mcpctl -e -set -l commands status login logout config get describe delete logs create edit apply chat chat-llm patch backup approve console cache test migrate rotate +set -l commands status login logout config get describe delete logs create edit apply chat chat-llm patch backup approve console cache provider test migrate rotate set -l project_commands get describe delete logs create edit attach-server detach-server # Disable file completions by default @@ -238,6 +238,7 @@ complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_ complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a approve -d 'Approve a pending prompt request (atomic: delete request, create prompt)' complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a console -d 'Interactive MCP console — unified timeline with tools, provenance, and lab replay' complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a cache -d 'Manage ProxyModel pipeline cache' +complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a provider -d 'Control local LLM providers (start/stop/status)' complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a test -d 'Utilities for testing MCP endpoints and config' complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a migrate -d 'Move resources between backends (currently: secrets between SecretBackends)' complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a rotate -d 'Force rotation of a credential-rotating resource (currently: secretbackend)' @@ -449,6 +450,12 @@ complete -c mcpctl -n "__fish_seen_subcommand_from cache; and not __fish_seen_su complete -c mcpctl -n "__mcpctl_subcmd_active cache clear" -l older-than -d 'Clear entries older than N days' -x complete -c mcpctl -n "__mcpctl_subcmd_active cache clear" -s y -l yes -d 'Skip confirmation' +# provider subcommands +set -l provider_cmds status up down +complete -c mcpctl -n "__fish_seen_subcommand_from provider; and not __fish_seen_subcommand_from $provider_cmds" -a status -d 'Show lifecycle state of a provider' +complete -c mcpctl -n "__fish_seen_subcommand_from provider; and not __fish_seen_subcommand_from $provider_cmds" -a up -d 'Start a managed provider (warm up so first chat is fast)' +complete -c mcpctl -n "__fish_seen_subcommand_from provider; and not __fish_seen_subcommand_from $provider_cmds" -a down -d 'Stop a managed provider now (releases GPU memory)' + # test subcommands set -l test_cmds mcp complete -c mcpctl -n "__fish_seen_subcommand_from test; and not __fish_seen_subcommand_from $test_cmds" -a mcp -d 'Verify a Streamable-HTTP MCP endpoint: health, initialize, tools/list, optionally call a tool.' diff --git a/src/cli/src/commands/provider.ts b/src/cli/src/commands/provider.ts new file mode 100644 index 0000000..93d6b8c --- /dev/null +++ b/src/cli/src/commands/provider.ts @@ -0,0 +1,130 @@ +/** + * `mcpctl provider ` + * + * Lifecycle control for managed local LLM providers (vllm-managed). Talks + * to mcplocal's `/llm/providers/:name/{status,start,stop}` HTTP endpoints + * — non-managed providers (anthropic, openai, gemini-cli) get a clear + * error rather than a no-op. + * + * Practical use: `mcpctl provider vllm-local down` to release GPU memory + * without restarting mcplocal (which would drop the SSE connection to mcpd + * and re-publish all virtual Llms). + */ +import { Command } from 'commander'; +import http from 'node:http'; + +export interface ProviderCommandDeps { + log: (...args: string[]) => void; + mcplocalUrl?: string; +} + +interface ProviderStatusResponse { + name: string; + managed: boolean; + state?: 'stopped' | 'starting' | 'running' | 'error'; + lastError?: string | null; + pid?: number | null; + uptime?: number | null; +} + +interface ErrorResponse { + error: string; +} + +function fetchJson(url: string, method: 'GET' | 'POST'): Promise<{ status: number; body: T }> { + return new Promise((resolve, reject) => { + const req = http.request(url, { method, timeout: 10_000 }, (res) => { + let data = ''; + res.on('data', (chunk: Buffer) => { data += chunk.toString(); }); + res.on('end', () => { + try { + resolve({ status: res.statusCode ?? 0, body: JSON.parse(data) as T }); + } catch { + reject(new Error(`Invalid response from mcplocal: ${data.slice(0, 200)}`)); + } + }); + }); + req.on('error', () => reject(new Error('Cannot connect to mcplocal. Is it running? (`systemctl --user status mcplocal`)'))); + req.on('timeout', () => { req.destroy(); reject(new Error('mcplocal request timed out')); }); + req.end(); + }); +} + +function formatStatus(s: ProviderStatusResponse): string { + if (!s.managed) { + return `${s.name}: unmanaged (no lifecycle — API-key or remote provider)`; + } + const lines = [`${s.name}: ${s.state ?? 'unknown'}`]; + if (s.pid !== null && s.pid !== undefined) lines.push(` pid: ${String(s.pid)}`); + if (s.uptime !== null && s.uptime !== undefined) { + const sec = s.uptime; + const fmt = sec < 60 ? `${String(sec)}s` + : sec < 3600 ? `${String(Math.floor(sec / 60))}m` + : `${String(Math.floor(sec / 3600))}h${String(Math.floor((sec % 3600) / 60))}m`; + lines.push(` uptime: ${fmt}`); + } + if (s.lastError !== null && s.lastError !== undefined) lines.push(` lastError: ${s.lastError}`); + return lines.join('\n'); +} + +export function createProviderCommand(deps: ProviderCommandDeps): Command { + const cmd = new Command('provider') + .description('Control local LLM providers (start/stop/status)'); + + const mcplocalUrl = deps.mcplocalUrl ?? 'http://localhost:3200'; + + cmd + .command('status') + .description('Show lifecycle state of a provider') + .argument('', 'Provider name (e.g. vllm-local)') + .action(async (name: string) => { + const res = await fetchJson( + `${mcplocalUrl}/llm/providers/${encodeURIComponent(name)}/status`, + 'GET', + ); + if (res.status !== 200) { + deps.log(`error: ${(res.body as ErrorResponse).error}`); + process.exitCode = 1; + return; + } + deps.log(formatStatus(res.body as ProviderStatusResponse)); + }); + + cmd + .command('up') + .description('Start a managed provider (warm up so first chat is fast)') + .argument('', 'Provider name (e.g. vllm-local)') + .action(async (name: string) => { + const res = await fetchJson( + `${mcplocalUrl}/llm/providers/${encodeURIComponent(name)}/start`, + 'POST', + ); + if (res.status !== 202 && res.status !== 200) { + deps.log(`error: ${(res.body as ErrorResponse).error}`); + process.exitCode = 1; + return; + } + const status = res.body as ProviderStatusResponse; + deps.log(`${status.name}: ${status.state ?? 'starting'} (warmup kicked — chat to confirm it's ready)`); + }); + + cmd + .command('down') + .description('Stop a managed provider now (releases GPU memory)') + .argument('', 'Provider name (e.g. vllm-local)') + .action(async (name: string) => { + const res = await fetchJson( + `${mcplocalUrl}/llm/providers/${encodeURIComponent(name)}/stop`, + 'POST', + ); + if (res.status !== 200) { + deps.log(`error: ${(res.body as ErrorResponse).error}`); + process.exitCode = 1; + return; + } + const status = res.body as ProviderStatusResponse; + deps.log(`${status.name}: ${status.state ?? 'stopped'} (GPU released — next chat will trigger restart)`); + }); + + return cmd; +} diff --git a/src/cli/src/index.ts b/src/cli/src/index.ts index 4f54215..26bb8fb 100644 --- a/src/cli/src/index.ts +++ b/src/cli/src/index.ts @@ -18,6 +18,7 @@ import { createMcpCommand } from './commands/mcp.js'; import { createPatchCommand } from './commands/patch.js'; import { createConsoleCommand } from './commands/console/index.js'; import { createCacheCommand } from './commands/cache.js'; +import { createProviderCommand } from './commands/provider.js'; import { createChatCommand } from './commands/chat.js'; import { createChatLlmCommand } from './commands/chat-llm.js'; import { createMigrateCommand } from './commands/migrate.js'; @@ -280,6 +281,11 @@ export function createProgram(): Command { mcplocalUrl: config.mcplocalUrl, })); + program.addCommand(createProviderCommand({ + log: (...args) => console.log(...args), + mcplocalUrl: config.mcplocalUrl, + })); + program.addCommand(createTestCommand({ log: (...args) => console.log(...args), })); diff --git a/src/cli/tests/completions.test.ts b/src/cli/tests/completions.test.ts index 084e68b..2e303ab 100644 --- a/src/cli/tests/completions.test.ts +++ b/src/cli/tests/completions.test.ts @@ -120,7 +120,16 @@ describe('fish completions', () => { it('non-project commands do not show with --project', () => { const nonProjectCmds = ['status', 'login', 'logout', 'config', 'apply', 'backup']; - const lines = fishFile.split('\n').filter((l) => l.startsWith('complete') && l.includes('-a ')); + // Only check top-level command lines — those are the ones whose + // visibility is gated on `__mcpctl_has_project`. Lines scoped to a + // sub-command (e.g. `provider status`) live under a different + // `__fish_seen_subcommand_from ` predicate and don't need + // the project guard. + const topLevelMarkers = ['$commands', '$project_commands']; + const lines = fishFile.split('\n').filter((l) => { + if (!l.startsWith('complete') || !l.includes('-a ')) return false; + return topLevelMarkers.some((m) => l.includes(m)); + }); for (const cmd of nonProjectCmds) { const cmdLines = lines.filter((l) => { diff --git a/src/mcplocal/src/http/server.ts b/src/mcplocal/src/http/server.ts index 9e5c48d..ad5c51c 100644 --- a/src/mcplocal/src/http/server.ts +++ b/src/mcplocal/src/http/server.ts @@ -220,6 +220,64 @@ export async function createHttpServer( }); }); + // Per-provider status (managed providers expose lifecycle state). Used by + // `mcpctl provider status` to read vllm-managed's state without + // burning a token like /llm/health does. + app.get<{ Params: { name: string } }>('/llm/providers/:name/status', async (request, reply) => { + const registry = deps.providerRegistry; + const provider = registry?.get(request.params.name) ?? null; + if (provider === null) { + reply.code(404).send({ error: `Provider '${request.params.name}' not found` }); + return; + } + if (!('getStatus' in provider) || typeof (provider as ManagedVllmProvider).getStatus !== 'function') { + // Non-managed providers (anthropic, openai, gemini-cli) have no + // lifecycle — they're always "ready" as long as the API key works. + reply.code(200).send({ name: provider.name, managed: false }); + return; + } + const status = (provider as ManagedVllmProvider).getStatus(); + reply.code(200).send({ name: provider.name, managed: true, ...status }); + }); + + // Stop a managed provider (free GPU memory). No-op on non-managed + // providers. Returns 200 with the resulting status either way. + app.post<{ Params: { name: string } }>('/llm/providers/:name/stop', async (request, reply) => { + const registry = deps.providerRegistry; + const provider = registry?.get(request.params.name) ?? null; + if (provider === null) { + reply.code(404).send({ error: `Provider '${request.params.name}' not found` }); + return; + } + if (!('dispose' in provider) || typeof (provider as ManagedVllmProvider).dispose !== 'function') { + reply.code(400).send({ error: `Provider '${request.params.name}' is not managed (nothing to stop)` }); + return; + } + (provider as ManagedVllmProvider).dispose(); + const status = (provider as ManagedVllmProvider).getStatus(); + reply.code(200).send({ name: provider.name, managed: true, ...status }); + }); + + // Start (warm up) a managed provider so the first chat doesn't pay + // the model-load latency. + app.post<{ Params: { name: string } }>('/llm/providers/:name/start', async (request, reply) => { + const registry = deps.providerRegistry; + const provider = registry?.get(request.params.name) ?? null; + if (provider === null) { + reply.code(404).send({ error: `Provider '${request.params.name}' not found` }); + return; + } + if (!('warmup' in provider) || typeof (provider as ManagedVllmProvider).warmup !== 'function') { + reply.code(400).send({ error: `Provider '${request.params.name}' is not managed (nothing to start)` }); + return; + } + (provider as ManagedVllmProvider).warmup(); + // warmup() is fire-and-forget — return current state immediately so + // the CLI can show 'starting' and the user knows it's been kicked. + const status = (provider as ManagedVllmProvider).getStatus(); + reply.code(202).send({ name: provider.name, managed: true, ...status }); + }); + // ProxyModel discovery endpoints registerProxymodelEndpoint(app);