feat(cli): mcpctl chat-llm + KIND/STATUS columns (v1 Stage 5)

Closes the loop on user-facing surface: $ mcpctl get llm NAME KIND STATUS TYPE MODEL TIER KEY ID qwen3-thinking public active openai qwen3-thinking fast ... ... vllm-local virtual active openai Qwen/Qwen2.5-7B-Instruct fast - ... $ mcpctl chat-llm vllm-local ──────────────────────────────────────── LLM: vllm-local openai → Qwen/Qwen2.5-7B-Instruct-AWQ Kind: virtual Status: active ──────────────────────────────────────── > hello? Hi! … New: chat-llm command (commands/chat-llm.ts) - Stateless chat with any mcpd-registered LLM. No threads, no tools, no project prompts. POSTs to /api/v1/llms/<name>/infer; mcpd's kind=virtual branch handles relay-through-mcplocal transparently, so the same CLI command works for both public and virtual LLMs. - Reuses installStatusBar / formatStats / recordDelta / styleStats / PhaseStats from chat.ts (now exported) so the bottom-row tokens-per- second ticker behaves identically to mcpctl chat. - Flags: --message (one-shot), --system, --temperature, --max-tokens, --no-stream. Streaming uses OpenAI chat.completion.chunk SSE. - REPL mode keeps a per-session history array so multi-turn flows feel natural; each turn is an independent inference call. Updated: get.ts - LlmRow gains optional kind/status fields. - llmColumns layout: NAME, KIND, STATUS, TYPE, MODEL, TIER, KEY, ID. Defaults gracefully when older mcpd responses don't return them. Updated: chat.ts - Re-exports the helpers chat-llm.ts needs (PhaseStats, newPhase, recordDelta, formatStats, styleStats, styleThinking, STDERR_IS_TTY, StatusBar, installStatusBar). No behavior change. Completions: chat-llm picks up the standard option enumeration automatically; bash gets a special-case for first-arg LLM-name completion via _mcpctl_resource_names "llms". CLI suite: 437/437 (was 430, +7 from auto-discovered test cases in the regenerated completions golden). Workspace: 2043/2043 across 152 files. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-27 14:25:38 +01:00
parent 97174f450f
commit 7e6b0cab44
7 changed files with 330 additions and 14 deletions
--- a/src/cli/src/commands/chat-llm.ts
+++ b/src/cli/src/commands/chat-llm.ts
@@ -0,0 +1,271 @@
+/**
+ * `mcpctl chat-llm <name>` — stateless chat with any registered LLM.
+ *
+ * Distinct from `mcpctl chat <agent>`:
+ *   - No threads, no history, no tools, no project prompts.
+ *   - Just an OpenAI chat-completions round-trip per turn.
+ *   - Works for both kinds of mcpd-registered LLMs:
+ *     * `kind=public` — direct upstream call (existing behavior).
+ *     * `kind=virtual` — relayed through the publishing mcplocal's SSE
+ *       channel (the v1 virtual-LLM feature).
+ *
+ * The CLI doesn't need to know which kind the LLM is; mcpd's
+ * `/api/v1/llms/:name/infer` route branches on `kind` server-side.
+ */
+import { Command } from 'commander';
+import http from 'node:http';
+import https from 'node:https';
+import readline from 'node:readline';
+import type { ApiClient } from '../api-client.js';
+import {
+  formatStats,
+  installStatusBar,
+  newPhase,
+  recordDelta,
+  STDERR_IS_TTY,
+  styleStats,
+  type PhaseStats,
+  type StatusBar,
+} from './chat.js';
+
+const STREAM_TIMEOUT_MS = 600_000;
+
+export interface ChatLlmCommandDeps {
+  client: ApiClient;
+  baseUrl: string;
+  token?: string | undefined;
+  log: (...args: unknown[]) => void;
+}
+
+export function createChatLlmCommand(deps: ChatLlmCommandDeps): Command {
+  return new Command('chat-llm')
+    .description('Stateless chat with any registered LLM (public or virtual). No threads, no tools.')
+    .argument('<name>', 'LLM name (see `mcpctl get llm`)')
+    .option('-m, --message <text>', 'One-shot: send a single message and exit (no REPL)')
+    .option('--system <text>', 'Optional system prompt')
+    .option('--temperature <n>', 'Sampling temperature (0..2)', parseFloat)
+    .option('--max-tokens <n>', 'Maximum tokens in the assistant reply', parseFloatInt)
+    .option('--no-stream', 'Disable SSE streaming (single JSON response)')
+    .action(async (name: string, opts: ChatLlmOpts) => {
+      await printHeader(deps, name, opts.system);
+      if (opts.message !== undefined) {
+        await runOneShot(deps, name, opts);
+        return;
+      }
+      await runRepl(deps, name, opts);
+    });
+}
+
+interface ChatLlmOpts {
+  message?: string;
+  system?: string;
+  temperature?: number;
+  maxTokens?: number;
+  stream?: boolean;
+}
+
+interface LlmInfo {
+  name: string;
+  type: string;
+  model: string;
+  kind: 'public' | 'virtual';
+  status: 'active' | 'inactive' | 'hibernating';
+}
+
+async function printHeader(deps: ChatLlmCommandDeps, name: string, systemPrompt?: string): Promise<void> {
+  let info: LlmInfo;
+  try {
+    info = await deps.client.get<LlmInfo>(`/api/v1/llms/${encodeURIComponent(name)}`);
+  } catch (err) {
+    process.stderr.write(`(could not fetch LLM metadata: ${(err as Error).message})\n`);
+    return;
+  }
+  const sep = '─'.repeat(60);
+  const out = (s: string): void => { process.stderr.write(`${styleStats(s)}\n`); };
+  out(sep);
+  out(`LLM: ${info.name}  ${info.type} → ${info.model}`);
+  out(`Kind: ${info.kind}    Status: ${info.status}`);
+  if (systemPrompt !== undefined) {
+    out(`System: ${systemPrompt.slice(0, 120)}${systemPrompt.length > 120 ? '…' : ''}`);
+  }
+  out(sep);
+}
+
+async function runOneShot(deps: ChatLlmCommandDeps, name: string, opts: ChatLlmOpts): Promise<void> {
+  const messages = buildMessages([], opts.system, opts.message ?? '');
+  const bar = opts.stream === false ? null : installStatusBar();
+  try {
+    if (opts.stream === false) {
+      const reply = await postNonStream(deps, name, messages, opts);
+      process.stdout.write(`${reply}\n`);
+    } else {
+      await streamOnce(deps, name, messages, opts, bar);
+    }
+  } finally {
+    bar?.teardown();
+  }
+}
+
+async function runRepl(deps: ChatLlmCommandDeps, name: string, opts: ChatLlmOpts): Promise<void> {
+  const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
+  const ask = (q: string): Promise<string> => new Promise((resolve) => rl.question(q, resolve));
+  const history: Array<{ role: 'user' | 'assistant'; content: string }> = [];
+
+  const bar = opts.stream === false ? null : installStatusBar();
+  process.stderr.write(`Stateless chat with LLM '${name}'. Ctrl-D to exit.\n`);
+
+  try {
+    while (true) {
+      let line: string;
+      try { line = await ask('> '); } catch { break; }
+      if (line === '') continue;
+
+      const messages = buildMessages(history, opts.system, line);
+      try {
+        let reply: string;
+        if (opts.stream === false) {
+          reply = await postNonStream(deps, name, messages, opts);
+          process.stdout.write(`${reply}\n`);
+        } else {
+          reply = await streamOnce(deps, name, messages, opts, bar);
+          process.stdout.write('\n');
+        }
+        history.push({ role: 'user', content: line });
+        history.push({ role: 'assistant', content: reply });
+      } catch (err) {
+        process.stderr.write(`error: ${(err as Error).message}\n`);
+      }
+    }
+    rl.close();
+  } finally {
+    bar?.teardown();
+  }
+}
+
+function buildMessages(
+  history: Array<{ role: 'user' | 'assistant'; content: string }>,
+  system: string | undefined,
+  user: string,
+): Array<{ role: 'system' | 'user' | 'assistant'; content: string }> {
+  const out: Array<{ role: 'system' | 'user' | 'assistant'; content: string }> = [];
+  if (system !== undefined && system !== '') out.push({ role: 'system', content: system });
+  out.push(...history);
+  out.push({ role: 'user', content: user });
+  return out;
+}
+
+async function postNonStream(
+  deps: ChatLlmCommandDeps,
+  name: string,
+  messages: Array<{ role: string; content: string }>,
+  opts: ChatLlmOpts,
+): Promise<string> {
+  const body: Record<string, unknown> = { messages };
+  if (opts.temperature !== undefined) body['temperature'] = opts.temperature;
+  if (opts.maxTokens !== undefined) body['max_tokens'] = opts.maxTokens;
+  const res = await deps.client.post<{
+    choices?: Array<{ message?: { content?: string } }>;
+  }>(`/api/v1/llms/${encodeURIComponent(name)}/infer`, body);
+  return res.choices?.[0]?.message?.content ?? '';
+}
+
+/**
+ * Stream a single chat call against /api/v1/llms/:name/infer with stream=true.
+ * The response is OpenAI-style SSE (`data: <chat.completion.chunk>`).
+ * Returns the assembled assistant content.
+ */
+function streamOnce(
+  deps: ChatLlmCommandDeps,
+  name: string,
+  messages: Array<{ role: string; content: string }>,
+  opts: ChatLlmOpts,
+  bar: StatusBar | null,
+): Promise<string> {
+  const url = new URL(`${deps.baseUrl}/api/v1/llms/${encodeURIComponent(name)}/infer`);
+  const reqBody: Record<string, unknown> = { messages, stream: true };
+  if (opts.temperature !== undefined) reqBody['temperature'] = opts.temperature;
+  if (opts.maxTokens !== undefined) reqBody['max_tokens'] = opts.maxTokens;
+  const payload = JSON.stringify(reqBody);
+  const stats = { thinking: newPhase(), content: newPhase() } satisfies { thinking: PhaseStats; content: PhaseStats };
+
+  const TICK_MS = 250;
+  let timer: NodeJS.Timeout | null = null;
+  function startTicker(): void {
+    if (timer !== null || bar === null) return;
+    timer = setInterval(() => bar.update(formatStats(stats, true)), TICK_MS);
+  }
+  function stopTicker(): void {
+    if (timer !== null) { clearInterval(timer); timer = null; }
+  }
+
+  return new Promise<string>((resolve, reject) => {
+    let assistant = '';
+    const driver = url.protocol === 'https:' ? https : http;
+    const req = driver.request({
+      hostname: url.hostname,
+      port: url.port || (url.protocol === 'https:' ? 443 : 80),
+      path: url.pathname + url.search,
+      method: 'POST',
+      timeout: STREAM_TIMEOUT_MS,
+      headers: {
+        'Content-Type': 'application/json',
+        Accept: 'text/event-stream',
+        ...(deps.token !== undefined ? { Authorization: `Bearer ${deps.token}` } : {}),
+      },
+    }, (res) => {
+      const status = res.statusCode ?? 0;
+      if (status >= 400) {
+        const chunks: Buffer[] = [];
+        res.on('data', (c: Buffer) => chunks.push(c));
+        res.on('end', () => reject(new Error(`HTTP ${String(status)}: ${Buffer.concat(chunks).toString('utf-8')}`)));
+        return;
+      }
+      let buf = '';
+      res.setEncoding('utf-8');
+      res.on('data', (chunk: string) => {
+        buf += chunk;
+        let nl: number;
+        while ((nl = buf.indexOf('\n\n')) !== -1) {
+          const frame = buf.slice(0, nl);
+          buf = buf.slice(nl + 2);
+          for (const line of frame.split('\n')) {
+            if (!line.startsWith('data: ')) continue;
+            const data = line.slice(6);
+            if (data === '[DONE]') continue;
+            try {
+              const parsed = JSON.parse(data) as { choices?: Array<{ delta?: { content?: string } }> };
+              const piece = parsed.choices?.[0]?.delta?.content;
+              if (typeof piece === 'string' && piece !== '') {
+                recordDelta(stats.content, piece);
+                process.stdout.write(piece);
+                assistant += piece;
+                startTicker();
+              }
+            } catch {
+              // ignore malformed frames
+            }
+          }
+        }
+      });
+      res.on('end', () => {
+        stopTicker();
+        const final = formatStats(stats, false);
+        if (final !== '' && STDERR_IS_TTY) process.stderr.write(`\n${styleStats(`(${final})`)}`);
+        else if (final !== '') process.stderr.write(`\n(${final})`);
+        if (bar !== null && final !== '') bar.update(final);
+        resolve(assistant);
+      });
+      res.on('error', (err) => { stopTicker(); reject(err); });
+    });
+    req.on('error', (err) => { stopTicker(); reject(err); });
+    req.on('timeout', () => { stopTicker(); req.destroy(); reject(new Error('chat-llm stream timed out')); });
+    req.write(payload);
+    req.end();
+  });
+}
+
+function parseFloatInt(value: string): number {
+  const n = Number(value);
+  if (!Number.isInteger(n)) throw new Error(`expected integer, got '${value}'`);
+  return n;
+}
--- a/src/cli/src/commands/chat.ts
+++ b/src/cli/src/commands/chat.ts
@@ -525,24 +525,24 @@ interface ChatStreamFrame {
 // ANSI codes for the reasoning sidebar. Dim + italic visually separates
 // reasoning ("the model is thinking") from final assistant content. We only
 // emit the codes when stderr is a TTY — piping to a file should stay clean.
-const ANSI_DIM_ITALIC = '\x1b[2;3m';
-const ANSI_DIM = '\x1b[2m';
-const ANSI_RESET = '\x1b[0m';
-const STDERR_IS_TTY = process.stderr.isTTY === true;
-function styleThinking(s: string): string {
+export const ANSI_DIM_ITALIC = '\x1b[2;3m';
+export const ANSI_DIM = '\x1b[2m';
+export const ANSI_RESET = '\x1b[0m';
+export const STDERR_IS_TTY = process.stderr.isTTY === true;
+export function styleThinking(s: string): string {
  return STDERR_IS_TTY ? `${ANSI_DIM_ITALIC}${s}${ANSI_RESET}` : s;
 }
-function styleStats(s: string): string {
+export function styleStats(s: string): string {
  return STDERR_IS_TTY ? `${ANSI_DIM}${s}${ANSI_RESET}` : s;
 }

-interface PhaseStats {
+export interface PhaseStats {
  words: number;
  firstMs: number;
  lastMs: number;
 }
-function newPhase(): PhaseStats { return { words: 0, firstMs: 0, lastMs: 0 }; }
-function recordDelta(p: PhaseStats, delta: string): void {
+export function newPhase(): PhaseStats { return { words: 0, firstMs: 0, lastMs: 0 }; }
+export function recordDelta(p: PhaseStats, delta: string): void {
  const now = Date.now();
  if (p.firstMs === 0) p.firstMs = now;
  p.lastMs = now;
@@ -558,7 +558,7 @@ function formatPhase(label: string, p: PhaseStats): string | null {
  const rate = p.words / sec;
  return `${label}${String(p.words)}w · ${rate.toFixed(1)} w/s · ${sec.toFixed(1)}s`;
 }
-function formatStats(s: { thinking: PhaseStats; content: PhaseStats }, partial: boolean): string {
+export function formatStats(s: { thinking: PhaseStats; content: PhaseStats }, partial: boolean): string {
  const parts: string[] = [];
  const c = formatPhase('', s.content);
  if (c !== null) parts.push(c);
@@ -588,12 +588,12 @@ function formatStats(s: { thinking: PhaseStats; content: PhaseStats }, partial:
 * a foreign terminal in a half-locked state if Ctrl-C / uncaught exception
 * fires mid-stream.
 */
-interface StatusBar {
+export interface StatusBar {
  update(text: string): void;
  teardown(): void;
 }

-function installStatusBar(): StatusBar | null {
+export function installStatusBar(): StatusBar | null {
  const out = process.stdout;
  if (!out.isTTY) return null;
  const initialRows = out.rows;
--- a/src/cli/src/commands/get.ts
+++ b/src/cli/src/commands/get.ts
@@ -132,10 +132,16 @@ interface LlmRow {
  url: string;
  description: string;
  apiKeyRef: { name: string; key: string } | null;
+  // Virtual-provider lifecycle (optional for backward compat with older
+  // mcpd responses that predate the kind/status columns).
+  kind?: 'public' | 'virtual';
+  status?: 'active' | 'inactive' | 'hibernating';
 }

 const llmColumns: Column<LlmRow>[] = [
  { header: 'NAME', key: 'name' },
+  { header: 'KIND', key: (r) => r.kind ?? 'public', width: 8 },
+  { header: 'STATUS', key: (r) => r.status ?? 'active', width: 12 },
  { header: 'TYPE', key: 'type', width: 12 },
  { header: 'MODEL', key: 'model', width: 28 },
  { header: 'TIER', key: 'tier', width: 8 },
--- a/src/cli/src/index.ts
+++ b/src/cli/src/index.ts
@@ -19,6 +19,7 @@ import { createPatchCommand } from './commands/patch.js';
 import { createConsoleCommand } from './commands/console/index.js';
 import { createCacheCommand } from './commands/cache.js';
 import { createChatCommand } from './commands/chat.js';
+import { createChatLlmCommand } from './commands/chat-llm.js';
 import { createMigrateCommand } from './commands/migrate.js';
 import { createRotateCommand } from './commands/rotate.js';
 import { ApiClient, ApiError } from './api-client.js';
@@ -241,6 +242,13 @@ export function createProgram(): Command {
    log: (...args) => console.log(...args),
  }));

+  program.addCommand(createChatLlmCommand({
+    client,
+    baseUrl,
+    ...(creds?.token !== undefined ? { token: creds.token } : {}),
+    log: (...args) => console.log(...args),
+  }));
+
  program.addCommand(createPatchCommand({
    client,
    log: (...args) => console.log(...args),