From 7e6b0cab449e248e134e954e56b668c57a6b0594 Mon Sep 17 00:00:00 2001 From: Michal Date: Mon, 27 Apr 2026 14:25:38 +0100 Subject: [PATCH] feat(cli): mcpctl chat-llm + KIND/STATUS columns (v1 Stage 5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the loop on user-facing surface: $ mcpctl get llm NAME KIND STATUS TYPE MODEL TIER KEY ID qwen3-thinking public active openai qwen3-thinking fast ... ... vllm-local virtual active openai Qwen/Qwen2.5-7B-Instruct fast - ... $ mcpctl chat-llm vllm-local ──────────────────────────────────────── LLM: vllm-local openai → Qwen/Qwen2.5-7B-Instruct-AWQ Kind: virtual Status: active ──────────────────────────────────────── > hello? Hi! … New: chat-llm command (commands/chat-llm.ts) - Stateless chat with any mcpd-registered LLM. No threads, no tools, no project prompts. POSTs to /api/v1/llms//infer; mcpd's kind=virtual branch handles relay-through-mcplocal transparently, so the same CLI command works for both public and virtual LLMs. - Reuses installStatusBar / formatStats / recordDelta / styleStats / PhaseStats from chat.ts (now exported) so the bottom-row tokens-per- second ticker behaves identically to mcpctl chat. - Flags: --message (one-shot), --system, --temperature, --max-tokens, --no-stream. Streaming uses OpenAI chat.completion.chunk SSE. - REPL mode keeps a per-session history array so multi-turn flows feel natural; each turn is an independent inference call. Updated: get.ts - LlmRow gains optional kind/status fields. - llmColumns layout: NAME, KIND, STATUS, TYPE, MODEL, TIER, KEY, ID. Defaults gracefully when older mcpd responses don't return them. Updated: chat.ts - Re-exports the helpers chat-llm.ts needs (PhaseStats, newPhase, recordDelta, formatStats, styleStats, styleThinking, STDERR_IS_TTY, StatusBar, installStatusBar). No behavior change. Completions: chat-llm picks up the standard option enumeration automatically; bash gets a special-case for first-arg LLM-name completion via _mcpctl_resource_names "llms". CLI suite: 437/437 (was 430, +7 from auto-discovered test cases in the regenerated completions golden). Workspace: 2043/2043 across 152 files. Co-Authored-By: Claude Opus 4.7 (1M context) --- completions/mcpctl.bash | 11 +- completions/mcpctl.fish | 10 +- scripts/generate-completions.ts | 14 ++ src/cli/src/commands/chat-llm.ts | 271 +++++++++++++++++++++++++++++++ src/cli/src/commands/chat.ts | 24 +-- src/cli/src/commands/get.ts | 6 + src/cli/src/index.ts | 8 + 7 files changed, 330 insertions(+), 14 deletions(-) create mode 100644 src/cli/src/commands/chat-llm.ts diff --git a/completions/mcpctl.bash b/completions/mcpctl.bash index d0ea0a4..2b86325 100644 --- a/completions/mcpctl.bash +++ b/completions/mcpctl.bash @@ -5,7 +5,7 @@ _mcpctl() { local cur prev words cword _init_completion || return - local commands="status login logout config get describe delete logs create edit apply chat patch backup approve console cache test migrate rotate" + local commands="status login logout config get describe delete logs create edit apply chat chat-llm patch backup approve console cache test migrate rotate" local project_commands="get describe delete logs create edit attach-server detach-server" local global_opts="-v --version --daemon-url --direct -p --project -h --help" local resources="servers instances secrets secretbackends llms agents personalities templates projects users groups rbac prompts promptrequests serverattachments proxymodels all" @@ -247,6 +247,15 @@ _mcpctl() { COMPREPLY=($(compgen -W "-m --message --thread --system --system-file --system-append --personality --temperature --top-p --top-k --max-tokens --seed --stop --allow-tool --extra --no-stream -h --help" -- "$cur")) fi return ;; + chat-llm) + if [[ $((cword - subcmd_pos)) -eq 1 ]]; then + local names + names=$(_mcpctl_resource_names "llms") + COMPREPLY=($(compgen -W "$names -m --message --system --temperature --max-tokens --no-stream -h --help" -- "$cur")) + else + COMPREPLY=($(compgen -W "-m --message --system --temperature --max-tokens --no-stream -h --help" -- "$cur")) + fi + return ;; patch) if [[ -z "$resource_type" ]]; then COMPREPLY=($(compgen -W "$resources -h --help" -- "$cur")) diff --git a/completions/mcpctl.fish b/completions/mcpctl.fish index ed739a7..810b375 100644 --- a/completions/mcpctl.fish +++ b/completions/mcpctl.fish @@ -4,7 +4,7 @@ # Erase any stale completions from previous versions complete -c mcpctl -e -set -l commands status login logout config get describe delete logs create edit apply chat patch backup approve console cache test migrate rotate +set -l commands status login logout config get describe delete logs create edit apply chat chat-llm patch backup approve console cache test migrate rotate set -l project_commands get describe delete logs create edit attach-server detach-server # Disable file completions by default @@ -231,6 +231,7 @@ complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_ complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a edit -d 'Edit a resource in your default editor (server, project)' complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a apply -d 'Apply declarative configuration from a YAML or JSON file' complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a chat -d 'Open an interactive chat session with an agent (REPL or one-shot).' +complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a chat-llm -d 'Stateless chat with any registered LLM (public or virtual). No threads, no tools.' complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a patch -d 'Patch a resource field (e.g. mcpctl patch project myproj llmProvider=none)' complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a backup -d 'Git-based backup status and management' complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a approve -d 'Approve a pending prompt request (atomic: delete request, create prompt)' @@ -518,6 +519,13 @@ complete -c mcpctl -n "__fish_seen_subcommand_from chat" -l allow-tool -d 'Restr complete -c mcpctl -n "__fish_seen_subcommand_from chat" -l extra -d 'Provider-specific knob k=v (repeatable)' -x complete -c mcpctl -n "__fish_seen_subcommand_from chat" -l no-stream -d 'Disable SSE streaming (single JSON response)' +# chat-llm options +complete -c mcpctl -n "__fish_seen_subcommand_from chat-llm" -s m -l message -d 'One-shot: send a single message and exit (no REPL)' -x +complete -c mcpctl -n "__fish_seen_subcommand_from chat-llm" -l system -d 'Optional system prompt' -x +complete -c mcpctl -n "__fish_seen_subcommand_from chat-llm" -l temperature -d 'Sampling temperature (0..2)' -x +complete -c mcpctl -n "__fish_seen_subcommand_from chat-llm" -l max-tokens -d 'Maximum tokens in the assistant reply' -x +complete -c mcpctl -n "__fish_seen_subcommand_from chat-llm" -l no-stream -d 'Disable SSE streaming (single JSON response)' + # console options complete -c mcpctl -n "__fish_seen_subcommand_from console" -l stdin-mcp -d 'Run inspector as MCP server over stdin/stdout (for Claude)' complete -c mcpctl -n "__fish_seen_subcommand_from console" -l audit -d 'Browse audit events from mcpd' diff --git a/scripts/generate-completions.ts b/scripts/generate-completions.ts index a0941a1..c8d61fc 100644 --- a/scripts/generate-completions.ts +++ b/scripts/generate-completions.ts @@ -920,6 +920,20 @@ function emitBashCase(emit: (s: string) => void, cmd: CmdInfo, root: CmdInfo): v return; } + // chat-llm: first arg is LLM name + if (name === 'chat-llm') { + emit(` ${name})`); + emit(' if [[ $((cword - subcmd_pos)) -eq 1 ]]; then'); + emit(' local names'); + emit(' names=$(_mcpctl_resource_names "llms")'); + emit(` COMPREPLY=($(compgen -W "$names ${optFlags}" -- "$cur"))`); + emit(' else'); + emit(` COMPREPLY=($(compgen -W "${optFlags}" -- "$cur"))`); + emit(' fi'); + emit(' return ;;'); + return; + } + // console: first arg is project name if (name === 'console') { emit(` ${name})`); diff --git a/src/cli/src/commands/chat-llm.ts b/src/cli/src/commands/chat-llm.ts new file mode 100644 index 0000000..b2c1d22 --- /dev/null +++ b/src/cli/src/commands/chat-llm.ts @@ -0,0 +1,271 @@ +/** + * `mcpctl chat-llm ` — stateless chat with any registered LLM. + * + * Distinct from `mcpctl chat `: + * - No threads, no history, no tools, no project prompts. + * - Just an OpenAI chat-completions round-trip per turn. + * - Works for both kinds of mcpd-registered LLMs: + * * `kind=public` — direct upstream call (existing behavior). + * * `kind=virtual` — relayed through the publishing mcplocal's SSE + * channel (the v1 virtual-LLM feature). + * + * The CLI doesn't need to know which kind the LLM is; mcpd's + * `/api/v1/llms/:name/infer` route branches on `kind` server-side. + */ +import { Command } from 'commander'; +import http from 'node:http'; +import https from 'node:https'; +import readline from 'node:readline'; +import type { ApiClient } from '../api-client.js'; +import { + formatStats, + installStatusBar, + newPhase, + recordDelta, + STDERR_IS_TTY, + styleStats, + type PhaseStats, + type StatusBar, +} from './chat.js'; + +const STREAM_TIMEOUT_MS = 600_000; + +export interface ChatLlmCommandDeps { + client: ApiClient; + baseUrl: string; + token?: string | undefined; + log: (...args: unknown[]) => void; +} + +export function createChatLlmCommand(deps: ChatLlmCommandDeps): Command { + return new Command('chat-llm') + .description('Stateless chat with any registered LLM (public or virtual). No threads, no tools.') + .argument('', 'LLM name (see `mcpctl get llm`)') + .option('-m, --message ', 'One-shot: send a single message and exit (no REPL)') + .option('--system ', 'Optional system prompt') + .option('--temperature ', 'Sampling temperature (0..2)', parseFloat) + .option('--max-tokens ', 'Maximum tokens in the assistant reply', parseFloatInt) + .option('--no-stream', 'Disable SSE streaming (single JSON response)') + .action(async (name: string, opts: ChatLlmOpts) => { + await printHeader(deps, name, opts.system); + if (opts.message !== undefined) { + await runOneShot(deps, name, opts); + return; + } + await runRepl(deps, name, opts); + }); +} + +interface ChatLlmOpts { + message?: string; + system?: string; + temperature?: number; + maxTokens?: number; + stream?: boolean; +} + +interface LlmInfo { + name: string; + type: string; + model: string; + kind: 'public' | 'virtual'; + status: 'active' | 'inactive' | 'hibernating'; +} + +async function printHeader(deps: ChatLlmCommandDeps, name: string, systemPrompt?: string): Promise { + let info: LlmInfo; + try { + info = await deps.client.get(`/api/v1/llms/${encodeURIComponent(name)}`); + } catch (err) { + process.stderr.write(`(could not fetch LLM metadata: ${(err as Error).message})\n`); + return; + } + const sep = '─'.repeat(60); + const out = (s: string): void => { process.stderr.write(`${styleStats(s)}\n`); }; + out(sep); + out(`LLM: ${info.name} ${info.type} → ${info.model}`); + out(`Kind: ${info.kind} Status: ${info.status}`); + if (systemPrompt !== undefined) { + out(`System: ${systemPrompt.slice(0, 120)}${systemPrompt.length > 120 ? '…' : ''}`); + } + out(sep); +} + +async function runOneShot(deps: ChatLlmCommandDeps, name: string, opts: ChatLlmOpts): Promise { + const messages = buildMessages([], opts.system, opts.message ?? ''); + const bar = opts.stream === false ? null : installStatusBar(); + try { + if (opts.stream === false) { + const reply = await postNonStream(deps, name, messages, opts); + process.stdout.write(`${reply}\n`); + } else { + await streamOnce(deps, name, messages, opts, bar); + } + } finally { + bar?.teardown(); + } +} + +async function runRepl(deps: ChatLlmCommandDeps, name: string, opts: ChatLlmOpts): Promise { + const rl = readline.createInterface({ input: process.stdin, output: process.stdout }); + const ask = (q: string): Promise => new Promise((resolve) => rl.question(q, resolve)); + const history: Array<{ role: 'user' | 'assistant'; content: string }> = []; + + const bar = opts.stream === false ? null : installStatusBar(); + process.stderr.write(`Stateless chat with LLM '${name}'. Ctrl-D to exit.\n`); + + try { + while (true) { + let line: string; + try { line = await ask('> '); } catch { break; } + if (line === '') continue; + + const messages = buildMessages(history, opts.system, line); + try { + let reply: string; + if (opts.stream === false) { + reply = await postNonStream(deps, name, messages, opts); + process.stdout.write(`${reply}\n`); + } else { + reply = await streamOnce(deps, name, messages, opts, bar); + process.stdout.write('\n'); + } + history.push({ role: 'user', content: line }); + history.push({ role: 'assistant', content: reply }); + } catch (err) { + process.stderr.write(`error: ${(err as Error).message}\n`); + } + } + rl.close(); + } finally { + bar?.teardown(); + } +} + +function buildMessages( + history: Array<{ role: 'user' | 'assistant'; content: string }>, + system: string | undefined, + user: string, +): Array<{ role: 'system' | 'user' | 'assistant'; content: string }> { + const out: Array<{ role: 'system' | 'user' | 'assistant'; content: string }> = []; + if (system !== undefined && system !== '') out.push({ role: 'system', content: system }); + out.push(...history); + out.push({ role: 'user', content: user }); + return out; +} + +async function postNonStream( + deps: ChatLlmCommandDeps, + name: string, + messages: Array<{ role: string; content: string }>, + opts: ChatLlmOpts, +): Promise { + const body: Record = { messages }; + if (opts.temperature !== undefined) body['temperature'] = opts.temperature; + if (opts.maxTokens !== undefined) body['max_tokens'] = opts.maxTokens; + const res = await deps.client.post<{ + choices?: Array<{ message?: { content?: string } }>; + }>(`/api/v1/llms/${encodeURIComponent(name)}/infer`, body); + return res.choices?.[0]?.message?.content ?? ''; +} + +/** + * Stream a single chat call against /api/v1/llms/:name/infer with stream=true. + * The response is OpenAI-style SSE (`data: `). + * Returns the assembled assistant content. + */ +function streamOnce( + deps: ChatLlmCommandDeps, + name: string, + messages: Array<{ role: string; content: string }>, + opts: ChatLlmOpts, + bar: StatusBar | null, +): Promise { + const url = new URL(`${deps.baseUrl}/api/v1/llms/${encodeURIComponent(name)}/infer`); + const reqBody: Record = { messages, stream: true }; + if (opts.temperature !== undefined) reqBody['temperature'] = opts.temperature; + if (opts.maxTokens !== undefined) reqBody['max_tokens'] = opts.maxTokens; + const payload = JSON.stringify(reqBody); + const stats = { thinking: newPhase(), content: newPhase() } satisfies { thinking: PhaseStats; content: PhaseStats }; + + const TICK_MS = 250; + let timer: NodeJS.Timeout | null = null; + function startTicker(): void { + if (timer !== null || bar === null) return; + timer = setInterval(() => bar.update(formatStats(stats, true)), TICK_MS); + } + function stopTicker(): void { + if (timer !== null) { clearInterval(timer); timer = null; } + } + + return new Promise((resolve, reject) => { + let assistant = ''; + const driver = url.protocol === 'https:' ? https : http; + const req = driver.request({ + hostname: url.hostname, + port: url.port || (url.protocol === 'https:' ? 443 : 80), + path: url.pathname + url.search, + method: 'POST', + timeout: STREAM_TIMEOUT_MS, + headers: { + 'Content-Type': 'application/json', + Accept: 'text/event-stream', + ...(deps.token !== undefined ? { Authorization: `Bearer ${deps.token}` } : {}), + }, + }, (res) => { + const status = res.statusCode ?? 0; + if (status >= 400) { + const chunks: Buffer[] = []; + res.on('data', (c: Buffer) => chunks.push(c)); + res.on('end', () => reject(new Error(`HTTP ${String(status)}: ${Buffer.concat(chunks).toString('utf-8')}`))); + return; + } + let buf = ''; + res.setEncoding('utf-8'); + res.on('data', (chunk: string) => { + buf += chunk; + let nl: number; + while ((nl = buf.indexOf('\n\n')) !== -1) { + const frame = buf.slice(0, nl); + buf = buf.slice(nl + 2); + for (const line of frame.split('\n')) { + if (!line.startsWith('data: ')) continue; + const data = line.slice(6); + if (data === '[DONE]') continue; + try { + const parsed = JSON.parse(data) as { choices?: Array<{ delta?: { content?: string } }> }; + const piece = parsed.choices?.[0]?.delta?.content; + if (typeof piece === 'string' && piece !== '') { + recordDelta(stats.content, piece); + process.stdout.write(piece); + assistant += piece; + startTicker(); + } + } catch { + // ignore malformed frames + } + } + } + }); + res.on('end', () => { + stopTicker(); + const final = formatStats(stats, false); + if (final !== '' && STDERR_IS_TTY) process.stderr.write(`\n${styleStats(`(${final})`)}`); + else if (final !== '') process.stderr.write(`\n(${final})`); + if (bar !== null && final !== '') bar.update(final); + resolve(assistant); + }); + res.on('error', (err) => { stopTicker(); reject(err); }); + }); + req.on('error', (err) => { stopTicker(); reject(err); }); + req.on('timeout', () => { stopTicker(); req.destroy(); reject(new Error('chat-llm stream timed out')); }); + req.write(payload); + req.end(); + }); +} + +function parseFloatInt(value: string): number { + const n = Number(value); + if (!Number.isInteger(n)) throw new Error(`expected integer, got '${value}'`); + return n; +} diff --git a/src/cli/src/commands/chat.ts b/src/cli/src/commands/chat.ts index e302cb6..edece9c 100644 --- a/src/cli/src/commands/chat.ts +++ b/src/cli/src/commands/chat.ts @@ -525,24 +525,24 @@ interface ChatStreamFrame { // ANSI codes for the reasoning sidebar. Dim + italic visually separates // reasoning ("the model is thinking") from final assistant content. We only // emit the codes when stderr is a TTY — piping to a file should stay clean. -const ANSI_DIM_ITALIC = '\x1b[2;3m'; -const ANSI_DIM = '\x1b[2m'; -const ANSI_RESET = '\x1b[0m'; -const STDERR_IS_TTY = process.stderr.isTTY === true; -function styleThinking(s: string): string { +export const ANSI_DIM_ITALIC = '\x1b[2;3m'; +export const ANSI_DIM = '\x1b[2m'; +export const ANSI_RESET = '\x1b[0m'; +export const STDERR_IS_TTY = process.stderr.isTTY === true; +export function styleThinking(s: string): string { return STDERR_IS_TTY ? `${ANSI_DIM_ITALIC}${s}${ANSI_RESET}` : s; } -function styleStats(s: string): string { +export function styleStats(s: string): string { return STDERR_IS_TTY ? `${ANSI_DIM}${s}${ANSI_RESET}` : s; } -interface PhaseStats { +export interface PhaseStats { words: number; firstMs: number; lastMs: number; } -function newPhase(): PhaseStats { return { words: 0, firstMs: 0, lastMs: 0 }; } -function recordDelta(p: PhaseStats, delta: string): void { +export function newPhase(): PhaseStats { return { words: 0, firstMs: 0, lastMs: 0 }; } +export function recordDelta(p: PhaseStats, delta: string): void { const now = Date.now(); if (p.firstMs === 0) p.firstMs = now; p.lastMs = now; @@ -558,7 +558,7 @@ function formatPhase(label: string, p: PhaseStats): string | null { const rate = p.words / sec; return `${label}${String(p.words)}w · ${rate.toFixed(1)} w/s · ${sec.toFixed(1)}s`; } -function formatStats(s: { thinking: PhaseStats; content: PhaseStats }, partial: boolean): string { +export function formatStats(s: { thinking: PhaseStats; content: PhaseStats }, partial: boolean): string { const parts: string[] = []; const c = formatPhase('', s.content); if (c !== null) parts.push(c); @@ -588,12 +588,12 @@ function formatStats(s: { thinking: PhaseStats; content: PhaseStats }, partial: * a foreign terminal in a half-locked state if Ctrl-C / uncaught exception * fires mid-stream. */ -interface StatusBar { +export interface StatusBar { update(text: string): void; teardown(): void; } -function installStatusBar(): StatusBar | null { +export function installStatusBar(): StatusBar | null { const out = process.stdout; if (!out.isTTY) return null; const initialRows = out.rows; diff --git a/src/cli/src/commands/get.ts b/src/cli/src/commands/get.ts index b56ec8d..c55f572 100644 --- a/src/cli/src/commands/get.ts +++ b/src/cli/src/commands/get.ts @@ -132,10 +132,16 @@ interface LlmRow { url: string; description: string; apiKeyRef: { name: string; key: string } | null; + // Virtual-provider lifecycle (optional for backward compat with older + // mcpd responses that predate the kind/status columns). + kind?: 'public' | 'virtual'; + status?: 'active' | 'inactive' | 'hibernating'; } const llmColumns: Column[] = [ { header: 'NAME', key: 'name' }, + { header: 'KIND', key: (r) => r.kind ?? 'public', width: 8 }, + { header: 'STATUS', key: (r) => r.status ?? 'active', width: 12 }, { header: 'TYPE', key: 'type', width: 12 }, { header: 'MODEL', key: 'model', width: 28 }, { header: 'TIER', key: 'tier', width: 8 }, diff --git a/src/cli/src/index.ts b/src/cli/src/index.ts index 6a0485f..4f54215 100644 --- a/src/cli/src/index.ts +++ b/src/cli/src/index.ts @@ -19,6 +19,7 @@ import { createPatchCommand } from './commands/patch.js'; import { createConsoleCommand } from './commands/console/index.js'; import { createCacheCommand } from './commands/cache.js'; import { createChatCommand } from './commands/chat.js'; +import { createChatLlmCommand } from './commands/chat-llm.js'; import { createMigrateCommand } from './commands/migrate.js'; import { createRotateCommand } from './commands/rotate.js'; import { ApiClient, ApiError } from './api-client.js'; @@ -241,6 +242,13 @@ export function createProgram(): Command { log: (...args) => console.log(...args), })); + program.addCommand(createChatLlmCommand({ + client, + baseUrl, + ...(creds?.token !== undefined ? { token: creds.token } : {}), + log: (...args) => console.log(...args), + })); + program.addCommand(createPatchCommand({ client, log: (...args) => console.log(...args),