feat(cli): mcpctl chat-llm + KIND/STATUS columns (v1 Stage 5)

Closes the loop on user-facing surface:

  $ mcpctl get llm
  NAME             KIND     STATUS    TYPE     MODEL                       TIER  KEY  ID
  qwen3-thinking   public   active    openai   qwen3-thinking              fast  ...  ...
  vllm-local       virtual  active    openai   Qwen/Qwen2.5-7B-Instruct    fast  -    ...

  $ mcpctl chat-llm vllm-local
  ────────────────────────────────────────
  LLM: vllm-local  openai → Qwen/Qwen2.5-7B-Instruct-AWQ
  Kind: virtual    Status: active
  ────────────────────────────────────────
  > hello?
  Hi! …

New: chat-llm command (commands/chat-llm.ts)
- Stateless chat with any mcpd-registered LLM. No threads, no tools,
  no project prompts. POSTs to /api/v1/llms/<name>/infer; mcpd's
  kind=virtual branch handles relay-through-mcplocal transparently,
  so the same CLI command works for both public and virtual LLMs.
- Reuses installStatusBar / formatStats / recordDelta / styleStats /
  PhaseStats from chat.ts (now exported) so the bottom-row tokens-per-
  second ticker behaves identically to mcpctl chat.
- Flags: --message (one-shot), --system, --temperature, --max-tokens,
  --no-stream. Streaming uses OpenAI chat.completion.chunk SSE.
- REPL mode keeps a per-session history array so multi-turn flows
  feel natural; each turn is an independent inference call.

Updated: get.ts
- LlmRow gains optional kind/status fields.
- llmColumns layout: NAME, KIND, STATUS, TYPE, MODEL, TIER, KEY, ID.
  Defaults gracefully when older mcpd responses don't return them.

Updated: chat.ts
- Re-exports the helpers chat-llm.ts needs (PhaseStats, newPhase,
  recordDelta, formatStats, styleStats, styleThinking, STDERR_IS_TTY,
  StatusBar, installStatusBar). No behavior change.

Completions: chat-llm picks up the standard option enumeration
automatically; bash gets a special-case for first-arg LLM-name
completion via _mcpctl_resource_names "llms".

CLI suite: 437/437 (was 430, +7 from auto-discovered test cases in
the regenerated completions golden). Workspace: 2043/2043 across
152 files.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Michal
2026-04-27 14:25:38 +01:00
parent 97174f450f
commit 7e6b0cab44
7 changed files with 330 additions and 14 deletions

View File

@@ -0,0 +1,271 @@
/**
* `mcpctl chat-llm <name>` — stateless chat with any registered LLM.
*
* Distinct from `mcpctl chat <agent>`:
* - No threads, no history, no tools, no project prompts.
* - Just an OpenAI chat-completions round-trip per turn.
* - Works for both kinds of mcpd-registered LLMs:
* * `kind=public` — direct upstream call (existing behavior).
* * `kind=virtual` — relayed through the publishing mcplocal's SSE
* channel (the v1 virtual-LLM feature).
*
* The CLI doesn't need to know which kind the LLM is; mcpd's
* `/api/v1/llms/:name/infer` route branches on `kind` server-side.
*/
import { Command } from 'commander';
import http from 'node:http';
import https from 'node:https';
import readline from 'node:readline';
import type { ApiClient } from '../api-client.js';
import {
formatStats,
installStatusBar,
newPhase,
recordDelta,
STDERR_IS_TTY,
styleStats,
type PhaseStats,
type StatusBar,
} from './chat.js';
const STREAM_TIMEOUT_MS = 600_000;
export interface ChatLlmCommandDeps {
client: ApiClient;
baseUrl: string;
token?: string | undefined;
log: (...args: unknown[]) => void;
}
export function createChatLlmCommand(deps: ChatLlmCommandDeps): Command {
return new Command('chat-llm')
.description('Stateless chat with any registered LLM (public or virtual). No threads, no tools.')
.argument('<name>', 'LLM name (see `mcpctl get llm`)')
.option('-m, --message <text>', 'One-shot: send a single message and exit (no REPL)')
.option('--system <text>', 'Optional system prompt')
.option('--temperature <n>', 'Sampling temperature (0..2)', parseFloat)
.option('--max-tokens <n>', 'Maximum tokens in the assistant reply', parseFloatInt)
.option('--no-stream', 'Disable SSE streaming (single JSON response)')
.action(async (name: string, opts: ChatLlmOpts) => {
await printHeader(deps, name, opts.system);
if (opts.message !== undefined) {
await runOneShot(deps, name, opts);
return;
}
await runRepl(deps, name, opts);
});
}
interface ChatLlmOpts {
message?: string;
system?: string;
temperature?: number;
maxTokens?: number;
stream?: boolean;
}
interface LlmInfo {
name: string;
type: string;
model: string;
kind: 'public' | 'virtual';
status: 'active' | 'inactive' | 'hibernating';
}
async function printHeader(deps: ChatLlmCommandDeps, name: string, systemPrompt?: string): Promise<void> {
let info: LlmInfo;
try {
info = await deps.client.get<LlmInfo>(`/api/v1/llms/${encodeURIComponent(name)}`);
} catch (err) {
process.stderr.write(`(could not fetch LLM metadata: ${(err as Error).message})\n`);
return;
}
const sep = '─'.repeat(60);
const out = (s: string): void => { process.stderr.write(`${styleStats(s)}\n`); };
out(sep);
out(`LLM: ${info.name} ${info.type}${info.model}`);
out(`Kind: ${info.kind} Status: ${info.status}`);
if (systemPrompt !== undefined) {
out(`System: ${systemPrompt.slice(0, 120)}${systemPrompt.length > 120 ? '…' : ''}`);
}
out(sep);
}
async function runOneShot(deps: ChatLlmCommandDeps, name: string, opts: ChatLlmOpts): Promise<void> {
const messages = buildMessages([], opts.system, opts.message ?? '');
const bar = opts.stream === false ? null : installStatusBar();
try {
if (opts.stream === false) {
const reply = await postNonStream(deps, name, messages, opts);
process.stdout.write(`${reply}\n`);
} else {
await streamOnce(deps, name, messages, opts, bar);
}
} finally {
bar?.teardown();
}
}
async function runRepl(deps: ChatLlmCommandDeps, name: string, opts: ChatLlmOpts): Promise<void> {
const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
const ask = (q: string): Promise<string> => new Promise((resolve) => rl.question(q, resolve));
const history: Array<{ role: 'user' | 'assistant'; content: string }> = [];
const bar = opts.stream === false ? null : installStatusBar();
process.stderr.write(`Stateless chat with LLM '${name}'. Ctrl-D to exit.\n`);
try {
while (true) {
let line: string;
try { line = await ask('> '); } catch { break; }
if (line === '') continue;
const messages = buildMessages(history, opts.system, line);
try {
let reply: string;
if (opts.stream === false) {
reply = await postNonStream(deps, name, messages, opts);
process.stdout.write(`${reply}\n`);
} else {
reply = await streamOnce(deps, name, messages, opts, bar);
process.stdout.write('\n');
}
history.push({ role: 'user', content: line });
history.push({ role: 'assistant', content: reply });
} catch (err) {
process.stderr.write(`error: ${(err as Error).message}\n`);
}
}
rl.close();
} finally {
bar?.teardown();
}
}
function buildMessages(
history: Array<{ role: 'user' | 'assistant'; content: string }>,
system: string | undefined,
user: string,
): Array<{ role: 'system' | 'user' | 'assistant'; content: string }> {
const out: Array<{ role: 'system' | 'user' | 'assistant'; content: string }> = [];
if (system !== undefined && system !== '') out.push({ role: 'system', content: system });
out.push(...history);
out.push({ role: 'user', content: user });
return out;
}
async function postNonStream(
deps: ChatLlmCommandDeps,
name: string,
messages: Array<{ role: string; content: string }>,
opts: ChatLlmOpts,
): Promise<string> {
const body: Record<string, unknown> = { messages };
if (opts.temperature !== undefined) body['temperature'] = opts.temperature;
if (opts.maxTokens !== undefined) body['max_tokens'] = opts.maxTokens;
const res = await deps.client.post<{
choices?: Array<{ message?: { content?: string } }>;
}>(`/api/v1/llms/${encodeURIComponent(name)}/infer`, body);
return res.choices?.[0]?.message?.content ?? '';
}
/**
* Stream a single chat call against /api/v1/llms/:name/infer with stream=true.
* The response is OpenAI-style SSE (`data: <chat.completion.chunk>`).
* Returns the assembled assistant content.
*/
function streamOnce(
deps: ChatLlmCommandDeps,
name: string,
messages: Array<{ role: string; content: string }>,
opts: ChatLlmOpts,
bar: StatusBar | null,
): Promise<string> {
const url = new URL(`${deps.baseUrl}/api/v1/llms/${encodeURIComponent(name)}/infer`);
const reqBody: Record<string, unknown> = { messages, stream: true };
if (opts.temperature !== undefined) reqBody['temperature'] = opts.temperature;
if (opts.maxTokens !== undefined) reqBody['max_tokens'] = opts.maxTokens;
const payload = JSON.stringify(reqBody);
const stats = { thinking: newPhase(), content: newPhase() } satisfies { thinking: PhaseStats; content: PhaseStats };
const TICK_MS = 250;
let timer: NodeJS.Timeout | null = null;
function startTicker(): void {
if (timer !== null || bar === null) return;
timer = setInterval(() => bar.update(formatStats(stats, true)), TICK_MS);
}
function stopTicker(): void {
if (timer !== null) { clearInterval(timer); timer = null; }
}
return new Promise<string>((resolve, reject) => {
let assistant = '';
const driver = url.protocol === 'https:' ? https : http;
const req = driver.request({
hostname: url.hostname,
port: url.port || (url.protocol === 'https:' ? 443 : 80),
path: url.pathname + url.search,
method: 'POST',
timeout: STREAM_TIMEOUT_MS,
headers: {
'Content-Type': 'application/json',
Accept: 'text/event-stream',
...(deps.token !== undefined ? { Authorization: `Bearer ${deps.token}` } : {}),
},
}, (res) => {
const status = res.statusCode ?? 0;
if (status >= 400) {
const chunks: Buffer[] = [];
res.on('data', (c: Buffer) => chunks.push(c));
res.on('end', () => reject(new Error(`HTTP ${String(status)}: ${Buffer.concat(chunks).toString('utf-8')}`)));
return;
}
let buf = '';
res.setEncoding('utf-8');
res.on('data', (chunk: string) => {
buf += chunk;
let nl: number;
while ((nl = buf.indexOf('\n\n')) !== -1) {
const frame = buf.slice(0, nl);
buf = buf.slice(nl + 2);
for (const line of frame.split('\n')) {
if (!line.startsWith('data: ')) continue;
const data = line.slice(6);
if (data === '[DONE]') continue;
try {
const parsed = JSON.parse(data) as { choices?: Array<{ delta?: { content?: string } }> };
const piece = parsed.choices?.[0]?.delta?.content;
if (typeof piece === 'string' && piece !== '') {
recordDelta(stats.content, piece);
process.stdout.write(piece);
assistant += piece;
startTicker();
}
} catch {
// ignore malformed frames
}
}
}
});
res.on('end', () => {
stopTicker();
const final = formatStats(stats, false);
if (final !== '' && STDERR_IS_TTY) process.stderr.write(`\n${styleStats(`(${final})`)}`);
else if (final !== '') process.stderr.write(`\n(${final})`);
if (bar !== null && final !== '') bar.update(final);
resolve(assistant);
});
res.on('error', (err) => { stopTicker(); reject(err); });
});
req.on('error', (err) => { stopTicker(); reject(err); });
req.on('timeout', () => { stopTicker(); req.destroy(); reject(new Error('chat-llm stream timed out')); });
req.write(payload);
req.end();
});
}
function parseFloatInt(value: string): number {
const n = Number(value);
if (!Number.isInteger(n)) throw new Error(`expected integer, got '${value}'`);
return n;
}

View File

@@ -525,24 +525,24 @@ interface ChatStreamFrame {
// ANSI codes for the reasoning sidebar. Dim + italic visually separates
// reasoning ("the model is thinking") from final assistant content. We only
// emit the codes when stderr is a TTY — piping to a file should stay clean.
const ANSI_DIM_ITALIC = '\x1b[2;3m';
const ANSI_DIM = '\x1b[2m';
const ANSI_RESET = '\x1b[0m';
const STDERR_IS_TTY = process.stderr.isTTY === true;
function styleThinking(s: string): string {
export const ANSI_DIM_ITALIC = '\x1b[2;3m';
export const ANSI_DIM = '\x1b[2m';
export const ANSI_RESET = '\x1b[0m';
export const STDERR_IS_TTY = process.stderr.isTTY === true;
export function styleThinking(s: string): string {
return STDERR_IS_TTY ? `${ANSI_DIM_ITALIC}${s}${ANSI_RESET}` : s;
}
function styleStats(s: string): string {
export function styleStats(s: string): string {
return STDERR_IS_TTY ? `${ANSI_DIM}${s}${ANSI_RESET}` : s;
}
interface PhaseStats {
export interface PhaseStats {
words: number;
firstMs: number;
lastMs: number;
}
function newPhase(): PhaseStats { return { words: 0, firstMs: 0, lastMs: 0 }; }
function recordDelta(p: PhaseStats, delta: string): void {
export function newPhase(): PhaseStats { return { words: 0, firstMs: 0, lastMs: 0 }; }
export function recordDelta(p: PhaseStats, delta: string): void {
const now = Date.now();
if (p.firstMs === 0) p.firstMs = now;
p.lastMs = now;
@@ -558,7 +558,7 @@ function formatPhase(label: string, p: PhaseStats): string | null {
const rate = p.words / sec;
return `${label}${String(p.words)}w · ${rate.toFixed(1)} w/s · ${sec.toFixed(1)}s`;
}
function formatStats(s: { thinking: PhaseStats; content: PhaseStats }, partial: boolean): string {
export function formatStats(s: { thinking: PhaseStats; content: PhaseStats }, partial: boolean): string {
const parts: string[] = [];
const c = formatPhase('', s.content);
if (c !== null) parts.push(c);
@@ -588,12 +588,12 @@ function formatStats(s: { thinking: PhaseStats; content: PhaseStats }, partial:
* a foreign terminal in a half-locked state if Ctrl-C / uncaught exception
* fires mid-stream.
*/
interface StatusBar {
export interface StatusBar {
update(text: string): void;
teardown(): void;
}
function installStatusBar(): StatusBar | null {
export function installStatusBar(): StatusBar | null {
const out = process.stdout;
if (!out.isTTY) return null;
const initialRows = out.rows;

View File

@@ -132,10 +132,16 @@ interface LlmRow {
url: string;
description: string;
apiKeyRef: { name: string; key: string } | null;
// Virtual-provider lifecycle (optional for backward compat with older
// mcpd responses that predate the kind/status columns).
kind?: 'public' | 'virtual';
status?: 'active' | 'inactive' | 'hibernating';
}
const llmColumns: Column<LlmRow>[] = [
{ header: 'NAME', key: 'name' },
{ header: 'KIND', key: (r) => r.kind ?? 'public', width: 8 },
{ header: 'STATUS', key: (r) => r.status ?? 'active', width: 12 },
{ header: 'TYPE', key: 'type', width: 12 },
{ header: 'MODEL', key: 'model', width: 28 },
{ header: 'TIER', key: 'tier', width: 8 },

View File

@@ -19,6 +19,7 @@ import { createPatchCommand } from './commands/patch.js';
import { createConsoleCommand } from './commands/console/index.js';
import { createCacheCommand } from './commands/cache.js';
import { createChatCommand } from './commands/chat.js';
import { createChatLlmCommand } from './commands/chat-llm.js';
import { createMigrateCommand } from './commands/migrate.js';
import { createRotateCommand } from './commands/rotate.js';
import { ApiClient, ApiError } from './api-client.js';
@@ -241,6 +242,13 @@ export function createProgram(): Command {
log: (...args) => console.log(...args),
}));
program.addCommand(createChatLlmCommand({
client,
baseUrl,
...(creds?.token !== undefined ? { token: creds.token } : {}),
log: (...args) => console.log(...args),
}));
program.addCommand(createPatchCommand({
client,
log: (...args) => console.log(...args),