feat(cli): mcpctl chat-llm + KIND/STATUS columns (v1 Stage 5)
Closes the loop on user-facing surface: $ mcpctl get llm NAME KIND STATUS TYPE MODEL TIER KEY ID qwen3-thinking public active openai qwen3-thinking fast ... ... vllm-local virtual active openai Qwen/Qwen2.5-7B-Instruct fast - ... $ mcpctl chat-llm vllm-local ──────────────────────────────────────── LLM: vllm-local openai → Qwen/Qwen2.5-7B-Instruct-AWQ Kind: virtual Status: active ──────────────────────────────────────── > hello? Hi! … New: chat-llm command (commands/chat-llm.ts) - Stateless chat with any mcpd-registered LLM. No threads, no tools, no project prompts. POSTs to /api/v1/llms/<name>/infer; mcpd's kind=virtual branch handles relay-through-mcplocal transparently, so the same CLI command works for both public and virtual LLMs. - Reuses installStatusBar / formatStats / recordDelta / styleStats / PhaseStats from chat.ts (now exported) so the bottom-row tokens-per- second ticker behaves identically to mcpctl chat. - Flags: --message (one-shot), --system, --temperature, --max-tokens, --no-stream. Streaming uses OpenAI chat.completion.chunk SSE. - REPL mode keeps a per-session history array so multi-turn flows feel natural; each turn is an independent inference call. Updated: get.ts - LlmRow gains optional kind/status fields. - llmColumns layout: NAME, KIND, STATUS, TYPE, MODEL, TIER, KEY, ID. Defaults gracefully when older mcpd responses don't return them. Updated: chat.ts - Re-exports the helpers chat-llm.ts needs (PhaseStats, newPhase, recordDelta, formatStats, styleStats, styleThinking, STDERR_IS_TTY, StatusBar, installStatusBar). No behavior change. Completions: chat-llm picks up the standard option enumeration automatically; bash gets a special-case for first-arg LLM-name completion via _mcpctl_resource_names "llms". CLI suite: 437/437 (was 430, +7 from auto-discovered test cases in the regenerated completions golden). Workspace: 2043/2043 across 152 files. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
271
src/cli/src/commands/chat-llm.ts
Normal file
271
src/cli/src/commands/chat-llm.ts
Normal file
@@ -0,0 +1,271 @@
|
||||
/**
|
||||
* `mcpctl chat-llm <name>` — stateless chat with any registered LLM.
|
||||
*
|
||||
* Distinct from `mcpctl chat <agent>`:
|
||||
* - No threads, no history, no tools, no project prompts.
|
||||
* - Just an OpenAI chat-completions round-trip per turn.
|
||||
* - Works for both kinds of mcpd-registered LLMs:
|
||||
* * `kind=public` — direct upstream call (existing behavior).
|
||||
* * `kind=virtual` — relayed through the publishing mcplocal's SSE
|
||||
* channel (the v1 virtual-LLM feature).
|
||||
*
|
||||
* The CLI doesn't need to know which kind the LLM is; mcpd's
|
||||
* `/api/v1/llms/:name/infer` route branches on `kind` server-side.
|
||||
*/
|
||||
import { Command } from 'commander';
|
||||
import http from 'node:http';
|
||||
import https from 'node:https';
|
||||
import readline from 'node:readline';
|
||||
import type { ApiClient } from '../api-client.js';
|
||||
import {
|
||||
formatStats,
|
||||
installStatusBar,
|
||||
newPhase,
|
||||
recordDelta,
|
||||
STDERR_IS_TTY,
|
||||
styleStats,
|
||||
type PhaseStats,
|
||||
type StatusBar,
|
||||
} from './chat.js';
|
||||
|
||||
const STREAM_TIMEOUT_MS = 600_000;
|
||||
|
||||
export interface ChatLlmCommandDeps {
|
||||
client: ApiClient;
|
||||
baseUrl: string;
|
||||
token?: string | undefined;
|
||||
log: (...args: unknown[]) => void;
|
||||
}
|
||||
|
||||
export function createChatLlmCommand(deps: ChatLlmCommandDeps): Command {
|
||||
return new Command('chat-llm')
|
||||
.description('Stateless chat with any registered LLM (public or virtual). No threads, no tools.')
|
||||
.argument('<name>', 'LLM name (see `mcpctl get llm`)')
|
||||
.option('-m, --message <text>', 'One-shot: send a single message and exit (no REPL)')
|
||||
.option('--system <text>', 'Optional system prompt')
|
||||
.option('--temperature <n>', 'Sampling temperature (0..2)', parseFloat)
|
||||
.option('--max-tokens <n>', 'Maximum tokens in the assistant reply', parseFloatInt)
|
||||
.option('--no-stream', 'Disable SSE streaming (single JSON response)')
|
||||
.action(async (name: string, opts: ChatLlmOpts) => {
|
||||
await printHeader(deps, name, opts.system);
|
||||
if (opts.message !== undefined) {
|
||||
await runOneShot(deps, name, opts);
|
||||
return;
|
||||
}
|
||||
await runRepl(deps, name, opts);
|
||||
});
|
||||
}
|
||||
|
||||
interface ChatLlmOpts {
|
||||
message?: string;
|
||||
system?: string;
|
||||
temperature?: number;
|
||||
maxTokens?: number;
|
||||
stream?: boolean;
|
||||
}
|
||||
|
||||
interface LlmInfo {
|
||||
name: string;
|
||||
type: string;
|
||||
model: string;
|
||||
kind: 'public' | 'virtual';
|
||||
status: 'active' | 'inactive' | 'hibernating';
|
||||
}
|
||||
|
||||
async function printHeader(deps: ChatLlmCommandDeps, name: string, systemPrompt?: string): Promise<void> {
|
||||
let info: LlmInfo;
|
||||
try {
|
||||
info = await deps.client.get<LlmInfo>(`/api/v1/llms/${encodeURIComponent(name)}`);
|
||||
} catch (err) {
|
||||
process.stderr.write(`(could not fetch LLM metadata: ${(err as Error).message})\n`);
|
||||
return;
|
||||
}
|
||||
const sep = '─'.repeat(60);
|
||||
const out = (s: string): void => { process.stderr.write(`${styleStats(s)}\n`); };
|
||||
out(sep);
|
||||
out(`LLM: ${info.name} ${info.type} → ${info.model}`);
|
||||
out(`Kind: ${info.kind} Status: ${info.status}`);
|
||||
if (systemPrompt !== undefined) {
|
||||
out(`System: ${systemPrompt.slice(0, 120)}${systemPrompt.length > 120 ? '…' : ''}`);
|
||||
}
|
||||
out(sep);
|
||||
}
|
||||
|
||||
async function runOneShot(deps: ChatLlmCommandDeps, name: string, opts: ChatLlmOpts): Promise<void> {
|
||||
const messages = buildMessages([], opts.system, opts.message ?? '');
|
||||
const bar = opts.stream === false ? null : installStatusBar();
|
||||
try {
|
||||
if (opts.stream === false) {
|
||||
const reply = await postNonStream(deps, name, messages, opts);
|
||||
process.stdout.write(`${reply}\n`);
|
||||
} else {
|
||||
await streamOnce(deps, name, messages, opts, bar);
|
||||
}
|
||||
} finally {
|
||||
bar?.teardown();
|
||||
}
|
||||
}
|
||||
|
||||
async function runRepl(deps: ChatLlmCommandDeps, name: string, opts: ChatLlmOpts): Promise<void> {
|
||||
const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
|
||||
const ask = (q: string): Promise<string> => new Promise((resolve) => rl.question(q, resolve));
|
||||
const history: Array<{ role: 'user' | 'assistant'; content: string }> = [];
|
||||
|
||||
const bar = opts.stream === false ? null : installStatusBar();
|
||||
process.stderr.write(`Stateless chat with LLM '${name}'. Ctrl-D to exit.\n`);
|
||||
|
||||
try {
|
||||
while (true) {
|
||||
let line: string;
|
||||
try { line = await ask('> '); } catch { break; }
|
||||
if (line === '') continue;
|
||||
|
||||
const messages = buildMessages(history, opts.system, line);
|
||||
try {
|
||||
let reply: string;
|
||||
if (opts.stream === false) {
|
||||
reply = await postNonStream(deps, name, messages, opts);
|
||||
process.stdout.write(`${reply}\n`);
|
||||
} else {
|
||||
reply = await streamOnce(deps, name, messages, opts, bar);
|
||||
process.stdout.write('\n');
|
||||
}
|
||||
history.push({ role: 'user', content: line });
|
||||
history.push({ role: 'assistant', content: reply });
|
||||
} catch (err) {
|
||||
process.stderr.write(`error: ${(err as Error).message}\n`);
|
||||
}
|
||||
}
|
||||
rl.close();
|
||||
} finally {
|
||||
bar?.teardown();
|
||||
}
|
||||
}
|
||||
|
||||
function buildMessages(
|
||||
history: Array<{ role: 'user' | 'assistant'; content: string }>,
|
||||
system: string | undefined,
|
||||
user: string,
|
||||
): Array<{ role: 'system' | 'user' | 'assistant'; content: string }> {
|
||||
const out: Array<{ role: 'system' | 'user' | 'assistant'; content: string }> = [];
|
||||
if (system !== undefined && system !== '') out.push({ role: 'system', content: system });
|
||||
out.push(...history);
|
||||
out.push({ role: 'user', content: user });
|
||||
return out;
|
||||
}
|
||||
|
||||
async function postNonStream(
|
||||
deps: ChatLlmCommandDeps,
|
||||
name: string,
|
||||
messages: Array<{ role: string; content: string }>,
|
||||
opts: ChatLlmOpts,
|
||||
): Promise<string> {
|
||||
const body: Record<string, unknown> = { messages };
|
||||
if (opts.temperature !== undefined) body['temperature'] = opts.temperature;
|
||||
if (opts.maxTokens !== undefined) body['max_tokens'] = opts.maxTokens;
|
||||
const res = await deps.client.post<{
|
||||
choices?: Array<{ message?: { content?: string } }>;
|
||||
}>(`/api/v1/llms/${encodeURIComponent(name)}/infer`, body);
|
||||
return res.choices?.[0]?.message?.content ?? '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Stream a single chat call against /api/v1/llms/:name/infer with stream=true.
|
||||
* The response is OpenAI-style SSE (`data: <chat.completion.chunk>`).
|
||||
* Returns the assembled assistant content.
|
||||
*/
|
||||
function streamOnce(
|
||||
deps: ChatLlmCommandDeps,
|
||||
name: string,
|
||||
messages: Array<{ role: string; content: string }>,
|
||||
opts: ChatLlmOpts,
|
||||
bar: StatusBar | null,
|
||||
): Promise<string> {
|
||||
const url = new URL(`${deps.baseUrl}/api/v1/llms/${encodeURIComponent(name)}/infer`);
|
||||
const reqBody: Record<string, unknown> = { messages, stream: true };
|
||||
if (opts.temperature !== undefined) reqBody['temperature'] = opts.temperature;
|
||||
if (opts.maxTokens !== undefined) reqBody['max_tokens'] = opts.maxTokens;
|
||||
const payload = JSON.stringify(reqBody);
|
||||
const stats = { thinking: newPhase(), content: newPhase() } satisfies { thinking: PhaseStats; content: PhaseStats };
|
||||
|
||||
const TICK_MS = 250;
|
||||
let timer: NodeJS.Timeout | null = null;
|
||||
function startTicker(): void {
|
||||
if (timer !== null || bar === null) return;
|
||||
timer = setInterval(() => bar.update(formatStats(stats, true)), TICK_MS);
|
||||
}
|
||||
function stopTicker(): void {
|
||||
if (timer !== null) { clearInterval(timer); timer = null; }
|
||||
}
|
||||
|
||||
return new Promise<string>((resolve, reject) => {
|
||||
let assistant = '';
|
||||
const driver = url.protocol === 'https:' ? https : http;
|
||||
const req = driver.request({
|
||||
hostname: url.hostname,
|
||||
port: url.port || (url.protocol === 'https:' ? 443 : 80),
|
||||
path: url.pathname + url.search,
|
||||
method: 'POST',
|
||||
timeout: STREAM_TIMEOUT_MS,
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
Accept: 'text/event-stream',
|
||||
...(deps.token !== undefined ? { Authorization: `Bearer ${deps.token}` } : {}),
|
||||
},
|
||||
}, (res) => {
|
||||
const status = res.statusCode ?? 0;
|
||||
if (status >= 400) {
|
||||
const chunks: Buffer[] = [];
|
||||
res.on('data', (c: Buffer) => chunks.push(c));
|
||||
res.on('end', () => reject(new Error(`HTTP ${String(status)}: ${Buffer.concat(chunks).toString('utf-8')}`)));
|
||||
return;
|
||||
}
|
||||
let buf = '';
|
||||
res.setEncoding('utf-8');
|
||||
res.on('data', (chunk: string) => {
|
||||
buf += chunk;
|
||||
let nl: number;
|
||||
while ((nl = buf.indexOf('\n\n')) !== -1) {
|
||||
const frame = buf.slice(0, nl);
|
||||
buf = buf.slice(nl + 2);
|
||||
for (const line of frame.split('\n')) {
|
||||
if (!line.startsWith('data: ')) continue;
|
||||
const data = line.slice(6);
|
||||
if (data === '[DONE]') continue;
|
||||
try {
|
||||
const parsed = JSON.parse(data) as { choices?: Array<{ delta?: { content?: string } }> };
|
||||
const piece = parsed.choices?.[0]?.delta?.content;
|
||||
if (typeof piece === 'string' && piece !== '') {
|
||||
recordDelta(stats.content, piece);
|
||||
process.stdout.write(piece);
|
||||
assistant += piece;
|
||||
startTicker();
|
||||
}
|
||||
} catch {
|
||||
// ignore malformed frames
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
res.on('end', () => {
|
||||
stopTicker();
|
||||
const final = formatStats(stats, false);
|
||||
if (final !== '' && STDERR_IS_TTY) process.stderr.write(`\n${styleStats(`(${final})`)}`);
|
||||
else if (final !== '') process.stderr.write(`\n(${final})`);
|
||||
if (bar !== null && final !== '') bar.update(final);
|
||||
resolve(assistant);
|
||||
});
|
||||
res.on('error', (err) => { stopTicker(); reject(err); });
|
||||
});
|
||||
req.on('error', (err) => { stopTicker(); reject(err); });
|
||||
req.on('timeout', () => { stopTicker(); req.destroy(); reject(new Error('chat-llm stream timed out')); });
|
||||
req.write(payload);
|
||||
req.end();
|
||||
});
|
||||
}
|
||||
|
||||
function parseFloatInt(value: string): number {
|
||||
const n = Number(value);
|
||||
if (!Number.isInteger(n)) throw new Error(`expected integer, got '${value}'`);
|
||||
return n;
|
||||
}
|
||||
@@ -525,24 +525,24 @@ interface ChatStreamFrame {
|
||||
// ANSI codes for the reasoning sidebar. Dim + italic visually separates
|
||||
// reasoning ("the model is thinking") from final assistant content. We only
|
||||
// emit the codes when stderr is a TTY — piping to a file should stay clean.
|
||||
const ANSI_DIM_ITALIC = '\x1b[2;3m';
|
||||
const ANSI_DIM = '\x1b[2m';
|
||||
const ANSI_RESET = '\x1b[0m';
|
||||
const STDERR_IS_TTY = process.stderr.isTTY === true;
|
||||
function styleThinking(s: string): string {
|
||||
export const ANSI_DIM_ITALIC = '\x1b[2;3m';
|
||||
export const ANSI_DIM = '\x1b[2m';
|
||||
export const ANSI_RESET = '\x1b[0m';
|
||||
export const STDERR_IS_TTY = process.stderr.isTTY === true;
|
||||
export function styleThinking(s: string): string {
|
||||
return STDERR_IS_TTY ? `${ANSI_DIM_ITALIC}${s}${ANSI_RESET}` : s;
|
||||
}
|
||||
function styleStats(s: string): string {
|
||||
export function styleStats(s: string): string {
|
||||
return STDERR_IS_TTY ? `${ANSI_DIM}${s}${ANSI_RESET}` : s;
|
||||
}
|
||||
|
||||
interface PhaseStats {
|
||||
export interface PhaseStats {
|
||||
words: number;
|
||||
firstMs: number;
|
||||
lastMs: number;
|
||||
}
|
||||
function newPhase(): PhaseStats { return { words: 0, firstMs: 0, lastMs: 0 }; }
|
||||
function recordDelta(p: PhaseStats, delta: string): void {
|
||||
export function newPhase(): PhaseStats { return { words: 0, firstMs: 0, lastMs: 0 }; }
|
||||
export function recordDelta(p: PhaseStats, delta: string): void {
|
||||
const now = Date.now();
|
||||
if (p.firstMs === 0) p.firstMs = now;
|
||||
p.lastMs = now;
|
||||
@@ -558,7 +558,7 @@ function formatPhase(label: string, p: PhaseStats): string | null {
|
||||
const rate = p.words / sec;
|
||||
return `${label}${String(p.words)}w · ${rate.toFixed(1)} w/s · ${sec.toFixed(1)}s`;
|
||||
}
|
||||
function formatStats(s: { thinking: PhaseStats; content: PhaseStats }, partial: boolean): string {
|
||||
export function formatStats(s: { thinking: PhaseStats; content: PhaseStats }, partial: boolean): string {
|
||||
const parts: string[] = [];
|
||||
const c = formatPhase('', s.content);
|
||||
if (c !== null) parts.push(c);
|
||||
@@ -588,12 +588,12 @@ function formatStats(s: { thinking: PhaseStats; content: PhaseStats }, partial:
|
||||
* a foreign terminal in a half-locked state if Ctrl-C / uncaught exception
|
||||
* fires mid-stream.
|
||||
*/
|
||||
interface StatusBar {
|
||||
export interface StatusBar {
|
||||
update(text: string): void;
|
||||
teardown(): void;
|
||||
}
|
||||
|
||||
function installStatusBar(): StatusBar | null {
|
||||
export function installStatusBar(): StatusBar | null {
|
||||
const out = process.stdout;
|
||||
if (!out.isTTY) return null;
|
||||
const initialRows = out.rows;
|
||||
|
||||
@@ -132,10 +132,16 @@ interface LlmRow {
|
||||
url: string;
|
||||
description: string;
|
||||
apiKeyRef: { name: string; key: string } | null;
|
||||
// Virtual-provider lifecycle (optional for backward compat with older
|
||||
// mcpd responses that predate the kind/status columns).
|
||||
kind?: 'public' | 'virtual';
|
||||
status?: 'active' | 'inactive' | 'hibernating';
|
||||
}
|
||||
|
||||
const llmColumns: Column<LlmRow>[] = [
|
||||
{ header: 'NAME', key: 'name' },
|
||||
{ header: 'KIND', key: (r) => r.kind ?? 'public', width: 8 },
|
||||
{ header: 'STATUS', key: (r) => r.status ?? 'active', width: 12 },
|
||||
{ header: 'TYPE', key: 'type', width: 12 },
|
||||
{ header: 'MODEL', key: 'model', width: 28 },
|
||||
{ header: 'TIER', key: 'tier', width: 8 },
|
||||
|
||||
@@ -19,6 +19,7 @@ import { createPatchCommand } from './commands/patch.js';
|
||||
import { createConsoleCommand } from './commands/console/index.js';
|
||||
import { createCacheCommand } from './commands/cache.js';
|
||||
import { createChatCommand } from './commands/chat.js';
|
||||
import { createChatLlmCommand } from './commands/chat-llm.js';
|
||||
import { createMigrateCommand } from './commands/migrate.js';
|
||||
import { createRotateCommand } from './commands/rotate.js';
|
||||
import { ApiClient, ApiError } from './api-client.js';
|
||||
@@ -241,6 +242,13 @@ export function createProgram(): Command {
|
||||
log: (...args) => console.log(...args),
|
||||
}));
|
||||
|
||||
program.addCommand(createChatLlmCommand({
|
||||
client,
|
||||
baseUrl,
|
||||
...(creds?.token !== undefined ? { token: creds.token } : {}),
|
||||
log: (...args) => console.log(...args),
|
||||
}));
|
||||
|
||||
program.addCommand(createPatchCommand({
|
||||
client,
|
||||
log: (...args) => console.log(...args),
|
||||
|
||||
Reference in New Issue
Block a user