From 7e6b0cab449e248e134e954e56b668c57a6b0594 Mon Sep 17 00:00:00 2001
From: Michal <michal@itaz.eu>
Date: Mon, 27 Apr 2026 14:25:38 +0100
Subject: [PATCH] feat(cli): mcpctl chat-llm + KIND/STATUS columns (v1 Stage 5)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes the loop on user-facing surface:

  $ mcpctl get llm
  NAME             KIND     STATUS    TYPE     MODEL                       TIER  KEY  ID
  qwen3-thinking   public   active    openai   qwen3-thinking              fast  ...  ...
  vllm-local       virtual  active    openai   Qwen/Qwen2.5-7B-Instruct    fast  -    ...

  $ mcpctl chat-llm vllm-local
  ────────────────────────────────────────
  LLM: vllm-local  openai → Qwen/Qwen2.5-7B-Instruct-AWQ
  Kind: virtual    Status: active
  ────────────────────────────────────────
  > hello?
  Hi! …

New: chat-llm command (commands/chat-llm.ts)
- Stateless chat with any mcpd-registered LLM. No threads, no tools,
  no project prompts. POSTs to /api/v1/llms/<name>/infer; mcpd's
  kind=virtual branch handles relay-through-mcplocal transparently,
  so the same CLI command works for both public and virtual LLMs.
- Reuses installStatusBar / formatStats / recordDelta / styleStats /
  PhaseStats from chat.ts (now exported) so the bottom-row tokens-per-
  second ticker behaves identically to mcpctl chat.
- Flags: --message (one-shot), --system, --temperature, --max-tokens,
  --no-stream. Streaming uses OpenAI chat.completion.chunk SSE.
- REPL mode keeps a per-session history array so multi-turn flows
  feel natural; each turn is an independent inference call.

Updated: get.ts
- LlmRow gains optional kind/status fields.
- llmColumns layout: NAME, KIND, STATUS, TYPE, MODEL, TIER, KEY, ID.
  Defaults gracefully when older mcpd responses don't return them.

Updated: chat.ts
- Re-exports the helpers chat-llm.ts needs (PhaseStats, newPhase,
  recordDelta, formatStats, styleStats, styleThinking, STDERR_IS_TTY,
  StatusBar, installStatusBar). No behavior change.

Completions: chat-llm picks up the standard option enumeration
automatically; bash gets a special-case for first-arg LLM-name
completion via _mcpctl_resource_names "llms".

CLI suite: 437/437 (was 430, +7 from auto-discovered test cases in
the regenerated completions golden). Workspace: 2043/2043 across
152 files.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 completions/mcpctl.bash          |  11 +-
 completions/mcpctl.fish          |  10 +-
 scripts/generate-completions.ts  |  14 ++
 src/cli/src/commands/chat-llm.ts | 271 +++++++++++++++++++++++++++++++
 src/cli/src/commands/chat.ts     |  24 +--
 src/cli/src/commands/get.ts      |   6 +
 src/cli/src/index.ts             |   8 +
 7 files changed, 330 insertions(+), 14 deletions(-)
 create mode 100644 src/cli/src/commands/chat-llm.ts
diff --git a/completions/mcpctl.bash b/completions/mcpctl.bash
index d0ea0a4..2b86325 100644
--- a/completions/mcpctl.bash
+++ b/completions/mcpctl.bash
@@ -5,7 +5,7 @@ _mcpctl() {
   local cur prev words cword
   _init_completion || return
 
-  local commands="status login logout config get describe delete logs create edit apply chat patch backup approve console cache test migrate rotate"
+  local commands="status login logout config get describe delete logs create edit apply chat chat-llm patch backup approve console cache test migrate rotate"
   local project_commands="get describe delete logs create edit attach-server detach-server"
   local global_opts="-v --version --daemon-url --direct -p --project -h --help"
   local resources="servers instances secrets secretbackends llms agents personalities templates projects users groups rbac prompts promptrequests serverattachments proxymodels all"
@@ -247,6 +247,15 @@ _mcpctl() {
         COMPREPLY=($(compgen -W "-m --message --thread --system --system-file --system-append --personality --temperature --top-p --top-k --max-tokens --seed --stop --allow-tool --extra --no-stream -h --help" -- "$cur"))
       fi
       return ;;
+    chat-llm)
+      if [[ $((cword - subcmd_pos)) -eq 1 ]]; then
+        local names
+        names=$(_mcpctl_resource_names "llms")
+        COMPREPLY=($(compgen -W "$names -m --message --system --temperature --max-tokens --no-stream -h --help" -- "$cur"))
+      else
+        COMPREPLY=($(compgen -W "-m --message --system --temperature --max-tokens --no-stream -h --help" -- "$cur"))
+      fi
+      return ;;
     patch)
       if [[ -z "$resource_type" ]]; then
         COMPREPLY=($(compgen -W "$resources -h --help" -- "$cur"))
diff --git a/completions/mcpctl.fish b/completions/mcpctl.fish
index ed739a7..810b375 100644
--- a/completions/mcpctl.fish
+++ b/completions/mcpctl.fish
@@ -4,7 +4,7 @@
 # Erase any stale completions from previous versions
 complete -c mcpctl -e
 
-set -l commands status login logout config get describe delete logs create edit apply chat patch backup approve console cache test migrate rotate
+set -l commands status login logout config get describe delete logs create edit apply chat chat-llm patch backup approve console cache test migrate rotate
 set -l project_commands get describe delete logs create edit attach-server detach-server
 
 # Disable file completions by default
@@ -231,6 +231,7 @@ complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_
 complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a edit -d 'Edit a resource in your default editor (server, project)'
 complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a apply -d 'Apply declarative configuration from a YAML or JSON file'
 complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a chat -d 'Open an interactive chat session with an agent (REPL or one-shot).'
+complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a chat-llm -d 'Stateless chat with any registered LLM (public or virtual). No threads, no tools.'
 complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a patch -d 'Patch a resource field (e.g. mcpctl patch project myproj llmProvider=none)'
 complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a backup -d 'Git-based backup status and management'
 complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a approve -d 'Approve a pending prompt request (atomic: delete request, create prompt)'
@@ -518,6 +519,13 @@ complete -c mcpctl -n "__fish_seen_subcommand_from chat" -l allow-tool -d 'Restr
 complete -c mcpctl -n "__fish_seen_subcommand_from chat" -l extra -d 'Provider-specific knob k=v (repeatable)' -x
 complete -c mcpctl -n "__fish_seen_subcommand_from chat" -l no-stream -d 'Disable SSE streaming (single JSON response)'
 
+# chat-llm options
+complete -c mcpctl -n "__fish_seen_subcommand_from chat-llm" -s m -l message -d 'One-shot: send a single message and exit (no REPL)' -x
+complete -c mcpctl -n "__fish_seen_subcommand_from chat-llm" -l system -d 'Optional system prompt' -x
+complete -c mcpctl -n "__fish_seen_subcommand_from chat-llm" -l temperature -d 'Sampling temperature (0..2)' -x
+complete -c mcpctl -n "__fish_seen_subcommand_from chat-llm" -l max-tokens -d 'Maximum tokens in the assistant reply' -x
+complete -c mcpctl -n "__fish_seen_subcommand_from chat-llm" -l no-stream -d 'Disable SSE streaming (single JSON response)'
+
 # console options
 complete -c mcpctl -n "__fish_seen_subcommand_from console" -l stdin-mcp -d 'Run inspector as MCP server over stdin/stdout (for Claude)'
 complete -c mcpctl -n "__fish_seen_subcommand_from console" -l audit -d 'Browse audit events from mcpd'
diff --git a/scripts/generate-completions.ts b/scripts/generate-completions.ts
index a0941a1..c8d61fc 100644
--- a/scripts/generate-completions.ts
+++ b/scripts/generate-completions.ts
@@ -920,6 +920,20 @@ function emitBashCase(emit: (s: string) => void, cmd: CmdInfo, root: CmdInfo): v
     return;
   }
 
+  // chat-llm: first arg is LLM name
+  if (name === 'chat-llm') {
+    emit(`    ${name})`);
+    emit('      if [[ $((cword - subcmd_pos)) -eq 1 ]]; then');
+    emit('        local names');
+    emit('        names=$(_mcpctl_resource_names "llms")');
+    emit(`        COMPREPLY=($(compgen -W "$names ${optFlags}" -- "$cur"))`);
+    emit('      else');
+    emit(`        COMPREPLY=($(compgen -W "${optFlags}" -- "$cur"))`);
+    emit('      fi');
+    emit('      return ;;');
+    return;
+  }
+
   // console: first arg is project name
   if (name === 'console') {
     emit(`    ${name})`);
diff --git a/src/cli/src/commands/chat-llm.ts b/src/cli/src/commands/chat-llm.ts
new file mode 100644
index 0000000..b2c1d22
--- /dev/null
+++ b/src/cli/src/commands/chat-llm.ts
@@ -0,0 +1,271 @@
+/**
+ * `mcpctl chat-llm <name>` — stateless chat with any registered LLM.
+ *
+ * Distinct from `mcpctl chat <agent>`:
+ *   - No threads, no history, no tools, no project prompts.
+ *   - Just an OpenAI chat-completions round-trip per turn.
+ *   - Works for both kinds of mcpd-registered LLMs:
+ *     * `kind=public` — direct upstream call (existing behavior).
+ *     * `kind=virtual` — relayed through the publishing mcplocal's SSE
+ *       channel (the v1 virtual-LLM feature).
+ *
+ * The CLI doesn't need to know which kind the LLM is; mcpd's
+ * `/api/v1/llms/:name/infer` route branches on `kind` server-side.
+ */
+import { Command } from 'commander';
+import http from 'node:http';
+import https from 'node:https';
+import readline from 'node:readline';
+import type { ApiClient } from '../api-client.js';
+import {
+  formatStats,
+  installStatusBar,
+  newPhase,
+  recordDelta,
+  STDERR_IS_TTY,
+  styleStats,
+  type PhaseStats,
+  type StatusBar,
+} from './chat.js';
+
+const STREAM_TIMEOUT_MS = 600_000;
+
+export interface ChatLlmCommandDeps {
+  client: ApiClient;
+  baseUrl: string;
+  token?: string | undefined;
+  log: (...args: unknown[]) => void;
+}
+
+export function createChatLlmCommand(deps: ChatLlmCommandDeps): Command {
+  return new Command('chat-llm')
+    .description('Stateless chat with any registered LLM (public or virtual). No threads, no tools.')
+    .argument('<name>', 'LLM name (see `mcpctl get llm`)')
+    .option('-m, --message <text>', 'One-shot: send a single message and exit (no REPL)')
+    .option('--system <text>', 'Optional system prompt')
+    .option('--temperature <n>', 'Sampling temperature (0..2)', parseFloat)
+    .option('--max-tokens <n>', 'Maximum tokens in the assistant reply', parseFloatInt)
+    .option('--no-stream', 'Disable SSE streaming (single JSON response)')
+    .action(async (name: string, opts: ChatLlmOpts) => {
+      await printHeader(deps, name, opts.system);
+      if (opts.message !== undefined) {
+        await runOneShot(deps, name, opts);
+        return;
+      }
+      await runRepl(deps, name, opts);
+    });
+}
+
+interface ChatLlmOpts {
+  message?: string;
+  system?: string;
+  temperature?: number;
+  maxTokens?: number;
+  stream?: boolean;
+}
+
+interface LlmInfo {
+  name: string;
+  type: string;
+  model: string;
+  kind: 'public' | 'virtual';
+  status: 'active' | 'inactive' | 'hibernating';
+}
+
+async function printHeader(deps: ChatLlmCommandDeps, name: string, systemPrompt?: string): Promise<void> {
+  let info: LlmInfo;
+  try {
+    info = await deps.client.get<LlmInfo>(`/api/v1/llms/${encodeURIComponent(name)}`);
+  } catch (err) {
+    process.stderr.write(`(could not fetch LLM metadata: ${(err as Error).message})\n`);
+    return;
+  }
+  const sep = '─'.repeat(60);
+  const out = (s: string): void => { process.stderr.write(`${styleStats(s)}\n`); };
+  out(sep);
+  out(`LLM: ${info.name}  ${info.type} → ${info.model}`);
+  out(`Kind: ${info.kind}    Status: ${info.status}`);
+  if (systemPrompt !== undefined) {
+    out(`System: ${systemPrompt.slice(0, 120)}${systemPrompt.length > 120 ? '…' : ''}`);
+  }
+  out(sep);
+}
+
+async function runOneShot(deps: ChatLlmCommandDeps, name: string, opts: ChatLlmOpts): Promise<void> {
+  const messages = buildMessages([], opts.system, opts.message ?? '');
+  const bar = opts.stream === false ? null : installStatusBar();
+  try {
+    if (opts.stream === false) {
+      const reply = await postNonStream(deps, name, messages, opts);
+      process.stdout.write(`${reply}\n`);
+    } else {
+      await streamOnce(deps, name, messages, opts, bar);
+    }
+  } finally {
+    bar?.teardown();
+  }
+}
+
+async function runRepl(deps: ChatLlmCommandDeps, name: string, opts: ChatLlmOpts): Promise<void> {
+  const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
+  const ask = (q: string): Promise<string> => new Promise((resolve) => rl.question(q, resolve));
+  const history: Array<{ role: 'user' | 'assistant'; content: string }> = [];
+
+  const bar = opts.stream === false ? null : installStatusBar();
+  process.stderr.write(`Stateless chat with LLM '${name}'. Ctrl-D to exit.\n`);
+
+  try {
+    while (true) {
+      let line: string;
+      try { line = await ask('> '); } catch { break; }
+      if (line === '') continue;
+
+      const messages = buildMessages(history, opts.system, line);
+      try {
+        let reply: string;
+        if (opts.stream === false) {
+          reply = await postNonStream(deps, name, messages, opts);
+          process.stdout.write(`${reply}\n`);
+        } else {
+          reply = await streamOnce(deps, name, messages, opts, bar);
+          process.stdout.write('\n');
+        }
+        history.push({ role: 'user', content: line });
+        history.push({ role: 'assistant', content: reply });
+      } catch (err) {
+        process.stderr.write(`error: ${(err as Error).message}\n`);
+      }
+    }
+    rl.close();
+  } finally {
+    bar?.teardown();
+  }
+}
+
+function buildMessages(
+  history: Array<{ role: 'user' | 'assistant'; content: string }>,
+  system: string | undefined,
+  user: string,
+): Array<{ role: 'system' | 'user' | 'assistant'; content: string }> {
+  const out: Array<{ role: 'system' | 'user' | 'assistant'; content: string }> = [];
+  if (system !== undefined && system !== '') out.push({ role: 'system', content: system });
+  out.push(...history);
+  out.push({ role: 'user', content: user });
+  return out;
+}
+
+async function postNonStream(
+  deps: ChatLlmCommandDeps,
+  name: string,
+  messages: Array<{ role: string; content: string }>,
+  opts: ChatLlmOpts,
+): Promise<string> {
+  const body: Record<string, unknown> = { messages };
+  if (opts.temperature !== undefined) body['temperature'] = opts.temperature;
+  if (opts.maxTokens !== undefined) body['max_tokens'] = opts.maxTokens;
+  const res = await deps.client.post<{
+    choices?: Array<{ message?: { content?: string } }>;
+  }>(`/api/v1/llms/${encodeURIComponent(name)}/infer`, body);
+  return res.choices?.[0]?.message?.content ?? '';
+}
+
+/**
+ * Stream a single chat call against /api/v1/llms/:name/infer with stream=true.
+ * The response is OpenAI-style SSE (`data: <chat.completion.chunk>`).
+ * Returns the assembled assistant content.
+ */
+function streamOnce(
+  deps: ChatLlmCommandDeps,
+  name: string,
+  messages: Array<{ role: string; content: string }>,
+  opts: ChatLlmOpts,
+  bar: StatusBar | null,
+): Promise<string> {
+  const url = new URL(`${deps.baseUrl}/api/v1/llms/${encodeURIComponent(name)}/infer`);
+  const reqBody: Record<string, unknown> = { messages, stream: true };
+  if (opts.temperature !== undefined) reqBody['temperature'] = opts.temperature;
+  if (opts.maxTokens !== undefined) reqBody['max_tokens'] = opts.maxTokens;
+  const payload = JSON.stringify(reqBody);
+  const stats = { thinking: newPhase(), content: newPhase() } satisfies { thinking: PhaseStats; content: PhaseStats };
+
+  const TICK_MS = 250;
+  let timer: NodeJS.Timeout | null = null;
+  function startTicker(): void {
+    if (timer !== null || bar === null) return;
+    timer = setInterval(() => bar.update(formatStats(stats, true)), TICK_MS);
+  }
+  function stopTicker(): void {
+    if (timer !== null) { clearInterval(timer); timer = null; }
+  }
+
+  return new Promise<string>((resolve, reject) => {
+    let assistant = '';
+    const driver = url.protocol === 'https:' ? https : http;
+    const req = driver.request({
+      hostname: url.hostname,
+      port: url.port || (url.protocol === 'https:' ? 443 : 80),
+      path: url.pathname + url.search,
+      method: 'POST',
+      timeout: STREAM_TIMEOUT_MS,
+      headers: {
+        'Content-Type': 'application/json',
+        Accept: 'text/event-stream',
+        ...(deps.token !== undefined ? { Authorization: `Bearer ${deps.token}` } : {}),
+      },
+    }, (res) => {
+      const status = res.statusCode ?? 0;
+      if (status >= 400) {
+        const chunks: Buffer[] = [];
+        res.on('data', (c: Buffer) => chunks.push(c));
+        res.on('end', () => reject(new Error(`HTTP ${String(status)}: ${Buffer.concat(chunks).toString('utf-8')}`)));
+        return;
+      }
+      let buf = '';
+      res.setEncoding('utf-8');
+      res.on('data', (chunk: string) => {
+        buf += chunk;
+        let nl: number;
+        while ((nl = buf.indexOf('\n\n')) !== -1) {
+          const frame = buf.slice(0, nl);
+          buf = buf.slice(nl + 2);
+          for (const line of frame.split('\n')) {
+            if (!line.startsWith('data: ')) continue;
+            const data = line.slice(6);
+            if (data === '[DONE]') continue;
+            try {
+              const parsed = JSON.parse(data) as { choices?: Array<{ delta?: { content?: string } }> };
+              const piece = parsed.choices?.[0]?.delta?.content;
+              if (typeof piece === 'string' && piece !== '') {
+                recordDelta(stats.content, piece);
+                process.stdout.write(piece);
+                assistant += piece;
+                startTicker();
+              }
+            } catch {
+              // ignore malformed frames
+            }
+          }
+        }
+      });
+      res.on('end', () => {
+        stopTicker();
+        const final = formatStats(stats, false);
+        if (final !== '' && STDERR_IS_TTY) process.stderr.write(`\n${styleStats(`(${final})`)}`);
+        else if (final !== '') process.stderr.write(`\n(${final})`);
+        if (bar !== null && final !== '') bar.update(final);
+        resolve(assistant);
+      });
+      res.on('error', (err) => { stopTicker(); reject(err); });
+    });
+    req.on('error', (err) => { stopTicker(); reject(err); });
+    req.on('timeout', () => { stopTicker(); req.destroy(); reject(new Error('chat-llm stream timed out')); });
+    req.write(payload);
+    req.end();
+  });
+}
+
+function parseFloatInt(value: string): number {
+  const n = Number(value);
+  if (!Number.isInteger(n)) throw new Error(`expected integer, got '${value}'`);
+  return n;
+}
diff --git a/src/cli/src/commands/chat.ts b/src/cli/src/commands/chat.ts
index e302cb6..edece9c 100644
--- a/src/cli/src/commands/chat.ts
+++ b/src/cli/src/commands/chat.ts
@@ -525,24 +525,24 @@ interface ChatStreamFrame {
 // ANSI codes for the reasoning sidebar. Dim + italic visually separates
 // reasoning ("the model is thinking") from final assistant content. We only
 // emit the codes when stderr is a TTY — piping to a file should stay clean.
-const ANSI_DIM_ITALIC = '\x1b[2;3m';
-const ANSI_DIM = '\x1b[2m';
-const ANSI_RESET = '\x1b[0m';
-const STDERR_IS_TTY = process.stderr.isTTY === true;
-function styleThinking(s: string): string {
+export const ANSI_DIM_ITALIC = '\x1b[2;3m';
+export const ANSI_DIM = '\x1b[2m';
+export const ANSI_RESET = '\x1b[0m';
+export const STDERR_IS_TTY = process.stderr.isTTY === true;
+export function styleThinking(s: string): string {
   return STDERR_IS_TTY ? `${ANSI_DIM_ITALIC}${s}${ANSI_RESET}` : s;
 }
-function styleStats(s: string): string {
+export function styleStats(s: string): string {
   return STDERR_IS_TTY ? `${ANSI_DIM}${s}${ANSI_RESET}` : s;
 }
 
-interface PhaseStats {
+export interface PhaseStats {
   words: number;
   firstMs: number;
   lastMs: number;
 }
-function newPhase(): PhaseStats { return { words: 0, firstMs: 0, lastMs: 0 }; }
-function recordDelta(p: PhaseStats, delta: string): void {
+export function newPhase(): PhaseStats { return { words: 0, firstMs: 0, lastMs: 0 }; }
+export function recordDelta(p: PhaseStats, delta: string): void {
   const now = Date.now();
   if (p.firstMs === 0) p.firstMs = now;
   p.lastMs = now;
@@ -558,7 +558,7 @@ function formatPhase(label: string, p: PhaseStats): string | null {
   const rate = p.words / sec;
   return `${label}${String(p.words)}w · ${rate.toFixed(1)} w/s · ${sec.toFixed(1)}s`;
 }
-function formatStats(s: { thinking: PhaseStats; content: PhaseStats }, partial: boolean): string {
+export function formatStats(s: { thinking: PhaseStats; content: PhaseStats }, partial: boolean): string {
   const parts: string[] = [];
   const c = formatPhase('', s.content);
   if (c !== null) parts.push(c);
@@ -588,12 +588,12 @@ function formatStats(s: { thinking: PhaseStats; content: PhaseStats }, partial:
  * a foreign terminal in a half-locked state if Ctrl-C / uncaught exception
  * fires mid-stream.
  */
-interface StatusBar {
+export interface StatusBar {
   update(text: string): void;
   teardown(): void;
 }
 
-function installStatusBar(): StatusBar | null {
+export function installStatusBar(): StatusBar | null {
   const out = process.stdout;
   if (!out.isTTY) return null;
   const initialRows = out.rows;
diff --git a/src/cli/src/commands/get.ts b/src/cli/src/commands/get.ts
index b56ec8d..c55f572 100644
--- a/src/cli/src/commands/get.ts
+++ b/src/cli/src/commands/get.ts
@@ -132,10 +132,16 @@ interface LlmRow {
   url: string;
   description: string;
   apiKeyRef: { name: string; key: string } | null;
+  // Virtual-provider lifecycle (optional for backward compat with older
+  // mcpd responses that predate the kind/status columns).
+  kind?: 'public' | 'virtual';
+  status?: 'active' | 'inactive' | 'hibernating';
 }
 
 const llmColumns: Column<LlmRow>[] = [
   { header: 'NAME', key: 'name' },
+  { header: 'KIND', key: (r) => r.kind ?? 'public', width: 8 },
+  { header: 'STATUS', key: (r) => r.status ?? 'active', width: 12 },
   { header: 'TYPE', key: 'type', width: 12 },
   { header: 'MODEL', key: 'model', width: 28 },
   { header: 'TIER', key: 'tier', width: 8 },
diff --git a/src/cli/src/index.ts b/src/cli/src/index.ts
index 6a0485f..4f54215 100644
--- a/src/cli/src/index.ts
+++ b/src/cli/src/index.ts
@@ -19,6 +19,7 @@ import { createPatchCommand } from './commands/patch.js';
 import { createConsoleCommand } from './commands/console/index.js';
 import { createCacheCommand } from './commands/cache.js';
 import { createChatCommand } from './commands/chat.js';
+import { createChatLlmCommand } from './commands/chat-llm.js';
 import { createMigrateCommand } from './commands/migrate.js';
 import { createRotateCommand } from './commands/rotate.js';
 import { ApiClient, ApiError } from './api-client.js';
@@ -241,6 +242,13 @@ export function createProgram(): Command {
     log: (...args) => console.log(...args),
   }));
 
+  program.addCommand(createChatLlmCommand({
+    client,
+    baseUrl,
+    ...(creds?.token !== undefined ? { token: creds.token } : {}),
+    log: (...args) => console.log(...args),
+  }));
+
   program.addCommand(createPatchCommand({
     client,
     log: (...args) => console.log(...args),