From cc9822d38b8221bdc618927c22c8349084409d5d Mon Sep 17 00:00:00 2001 From: Michal Date: Sun, 26 Apr 2026 17:15:26 +0100 Subject: [PATCH] feat(chat): live tokens/sec ticker + final stats footer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While streaming, the REPL now shows a live word/sec counter on a status line one row below the cursor — refreshes every 250ms via ANSI cursor save+restore so it floats with the content as the response grows. After each response, a dim stats footer prints on stderr: (47w · 12.3 w/s · 3.9s | thinking 234w · 38 w/s · 6.2s) The ticker is stderr-only and only emits when stderr is a TTY — pipes to a file stay clean for grepping/redirect. Words are whitespace- separated tokens (good enough across English/code/Markdown without a tokenizer dependency; CJK under-counts but the rate is still directional). Both phases tracked separately: - thinking: reasoning_content from qwen3-thinking / deepseek-reasoner / o1, where the model's scratchpad is the long part - content: the actual assistant answer Final stats also added to the --no-stream path: total HTTP duration and word count, since we don't get per-token timing there. CLI suite still 430/430. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/cli/src/commands/chat.ts | 107 +++++++++++++++++++++++++++++++++-- 1 file changed, 101 insertions(+), 6 deletions(-) diff --git a/src/cli/src/commands/chat.ts b/src/cli/src/commands/chat.ts index bcd4a65..951954a 100644 --- a/src/cli/src/commands/chat.ts +++ b/src/cli/src/commands/chat.ts @@ -141,9 +141,12 @@ async function runOneShot( if (stream === false) { const body: Record = { message, ...overrides }; if (threadId !== undefined) body.threadId = threadId; + const startMs = Date.now(); const res = await chatRequestNonStream(deps, agent, body); + const sec = Math.max(0.05, (Date.now() - startMs) / 1000); + const words = (res.assistant.match(/\S+/g) ?? []).length; process.stdout.write(`${res.assistant}\n`); - process.stderr.write(`(thread: ${res.threadId})\n`); + process.stderr.write(styleStats(`(${String(words)}w · ${(words / sec).toFixed(1)} w/s · ${sec.toFixed(1)}s)`) + ` thread:${res.threadId}\n`); return; } const finalThread = await streamOnce(deps, agent, message, threadId, overrides); @@ -365,6 +368,39 @@ async function streamOnce( const url = new URL(`${deps.baseUrl}/api/v1/agents/${encodeURIComponent(agent)}/chat`); const body = JSON.stringify({ message, threadId, stream: true, ...overrides }); + // Per-response counters. Updated on every text/thinking delta, surfaced + // via the live ticker (stderr) and the final stats footer. + const stats = { thinking: newPhase(), content: newPhase() }; + + // Live ticker: every TICK_MS, draws a stats line on a ledger one row below + // the current cursor using ANSI save/restore. The ledger floats with the + // content as it grows (terminal scrolls take the saved position with them + // on modern emulators). Disabled when stderr isn't a TTY (pipes stay clean). + const TICK_MS = 250; + let tickerTimer: NodeJS.Timeout | null = null; + let tickerActive = false; + function drawTicker(): void { + if (!STDERR_IS_TTY) return; + const text = formatStats(stats, true); + if (text === '') return; + // \x1b[s = save cursor, \n = down one (scrolls if at bottom), + // \x1b[K = clear line, write ticker, \x1b[u = restore. + process.stderr.write(`\x1b[s\n\x1b[K${styleStats(text)}\x1b[u`); + tickerActive = true; + } + function clearTicker(): void { + if (!STDERR_IS_TTY || !tickerActive) return; + process.stderr.write('\x1b[s\n\x1b[K\x1b[u'); + tickerActive = false; + } + function stopTicker(): void { + if (tickerTimer !== null) { + clearInterval(tickerTimer); + tickerTimer = null; + } + clearTicker(); + } + return new Promise((resolve, reject) => { const driver = url.protocol === 'https:' ? https : http; const req = driver.request({ @@ -402,14 +438,26 @@ async function streamOnce( const evt = JSON.parse(data) as ChatStreamFrame; switch (evt.type) { case 'text': - if (typeof evt.delta === 'string') process.stdout.write(evt.delta); + if (typeof evt.delta === 'string') { + recordDelta(stats.content, evt.delta); + process.stdout.write(evt.delta); + if (tickerTimer === null && STDERR_IS_TTY) { + tickerTimer = setInterval(drawTicker, TICK_MS); + } + } break; case 'thinking': // Reasoning models (qwen3-thinking, deepseek-reasoner, o1 // family) emit this for tens of seconds before producing // any content delta. Show it dim+italic on stderr so the // final answer (stdout) stays clean for grepping/redirect. - if (typeof evt.delta === 'string') process.stderr.write(styleThinking(evt.delta)); + if (typeof evt.delta === 'string') { + recordDelta(stats.thinking, evt.delta); + process.stderr.write(styleThinking(evt.delta)); + if (tickerTimer === null && STDERR_IS_TTY) { + tickerTimer = setInterval(drawTicker, TICK_MS); + } + } break; case 'tool_call': process.stderr.write(`\n[tool_call: ${evt.toolName ?? ''}]\n`); @@ -430,11 +478,21 @@ async function streamOnce( } } }); - res.on('end', () => resolve(resolvedThread)); - res.on('error', reject); + res.on('end', () => { + stopTicker(); + const final = formatStats(stats, false); + if (final !== '' && STDERR_IS_TTY) { + process.stderr.write(`\n${styleStats(`(${final})`)}`); + } else if (final !== '') { + process.stderr.write(`\n(${final})`); + } + resolve(resolvedThread); + }); + res.on('error', (err) => { stopTicker(); reject(err); }); }); - req.on('error', reject); + req.on('error', (err) => { stopTicker(); reject(err); }); req.on('timeout', () => { + stopTicker(); req.destroy(); reject(new Error('chat stream timed out')); }); @@ -457,11 +515,48 @@ interface ChatStreamFrame { // reasoning ("the model is thinking") from final assistant content. We only // emit the codes when stderr is a TTY — piping to a file should stay clean. const ANSI_DIM_ITALIC = '\x1b[2;3m'; +const ANSI_DIM = '\x1b[2m'; const ANSI_RESET = '\x1b[0m'; const STDERR_IS_TTY = process.stderr.isTTY === true; function styleThinking(s: string): string { return STDERR_IS_TTY ? `${ANSI_DIM_ITALIC}${s}${ANSI_RESET}` : s; } +function styleStats(s: string): string { + return STDERR_IS_TTY ? `${ANSI_DIM}${s}${ANSI_RESET}` : s; +} + +interface PhaseStats { + words: number; + firstMs: number; + lastMs: number; +} +function newPhase(): PhaseStats { return { words: 0, firstMs: 0, lastMs: 0 }; } +function recordDelta(p: PhaseStats, delta: string): void { + const now = Date.now(); + if (p.firstMs === 0) p.firstMs = now; + p.lastMs = now; + // Whitespace-separated tokens. Good enough across languages without a + // tokenizer dependency. CJK languages will under-count, but for English/ + // code/Markdown (the common case) this matches user expectations. + const matches = delta.match(/\S+/g); + if (matches !== null) p.words += matches.length; +} +function formatPhase(label: string, p: PhaseStats): string | null { + if (p.words === 0) return null; + const sec = Math.max(0.05, (p.lastMs - p.firstMs) / 1000); + const rate = p.words / sec; + return `${label}${String(p.words)}w · ${rate.toFixed(1)} w/s · ${sec.toFixed(1)}s`; +} +function formatStats(s: { thinking: PhaseStats; content: PhaseStats }, partial: boolean): string { + const parts: string[] = []; + const c = formatPhase('', s.content); + if (c !== null) parts.push(c); + const t = formatPhase('thinking ', s.thinking); + if (t !== null) parts.push(t); + if (parts.length === 0) return ''; + const prefix = partial ? '⏵ ' : ''; + return `${prefix}${parts.join(' | ')}`; +} function collect(value: string, prev: string[]): string[] { return [...prev, value];