feat(chat): surface reasoning_content as thinking chunks; fix --no-stream timeout
Reasoning models (qwen3-thinking, deepseek-reasoner, OpenAI o1 family) emit
their scratchpad as `delta.reasoning_content` (or `delta.reasoning`,
or `delta.provider_specific_fields.reasoning_content` when LiteLLM passes
through from vLLM) — separate from `delta.content`. Before this commit
mcpd's parseStreamingChunk only watched `content`, so the model's 30-90s
reasoning phase looked like dead air to the REPL: streaming connection
open, no chunks, no progress. Caught during the agents-feature shakedown
when qwen3-thinking sat silent for 90s on a docmost__list_pages call.
mcpd
====
chat.service.ts
- parseStreamingChunk extracts a `reasoningDelta` from the chunk body,
accepting all four spellings (reasoning_content / reasoning /
provider_specific_fields.{reasoning_content,reasoning}). Future
providers can add their own field names by extending the
fallback chain.
- chatStream yields `{ type: 'thinking', delta }` chunks as reasoning
arrives, alongside the existing `{ type: 'text', delta }` for content.
- Reasoning is intentionally NOT persisted to the thread. It's the
model's scratchpad, not part of the conversation. Subsequent turns
don't see it.
- Adds 'thinking' to the ChatStreamChunk.type union.
CLI
===
chat.ts
- streamOnce handles 'thinking' chunks: writes them dim+italic to
stderr (ANSI 2;3m) so the model's reasoning visually flows like a
quote block while the final answer streams to stdout. Plain text
when stderr isn't a TTY (pipe to file → no escape codes leak).
- chatRequestNonStream replaces the shared ApiClient.post() for the
--no-stream path. ApiClient defaults to a 10s timeout, way too tight
for any chat that calls a tool: LLM round + tool dispatch + LLM
summary easily exceeds 10s. The new helper uses the same 600s timeout
the streaming path has been using all along.
Tests:
chat-service.test.ts (+2):
- reasoning_content deltas surface as `thinking` chunks (not text);
reasoning is NOT persisted to the assistant turn's content.
- LiteLLM's provider_specific_fields.reasoning_content shape parses
identically to the vendor-native shape.
mcpd 777/777, cli 430/430.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -411,6 +411,76 @@ describe('ChatService', () => {
|
||||
expect(ctx.body.tools?.[0]?.function.name).toBe(`s1${TOOL_NAME_SEPARATOR}a`);
|
||||
});
|
||||
|
||||
// Regression: reasoning_content (qwen3-thinking, deepseek-reasoner, o1)
|
||||
// streams as `thinking` chunks, separate from `text`.
|
||||
// Without this, the model's 30-90s reasoning phase looks like dead air to
|
||||
// the REPL — caught by user feedback during the agents-feature shakedown.
|
||||
it('chatStream surfaces reasoning_content deltas as `thinking` chunks', async () => {
|
||||
const chatRepo = mockChatRepo();
|
||||
// Adapter that yields a sequence of openai-format chunks: 2 reasoning
|
||||
// deltas, then 1 content delta, then [DONE].
|
||||
const adapter: LlmAdapter = {
|
||||
kind: 'scripted-thinking',
|
||||
infer: vi.fn(),
|
||||
stream: async function*() {
|
||||
yield { data: JSON.stringify({ choices: [{ delta: { reasoning_content: 'Let me think... ' }, finish_reason: null }] }) };
|
||||
yield { data: JSON.stringify({ choices: [{ delta: { reasoning_content: 'OK, ready.' }, finish_reason: null }] }) };
|
||||
yield { data: JSON.stringify({ choices: [{ delta: { content: 'DONE' }, finish_reason: 'stop' }] }) };
|
||||
yield { data: '[DONE]', done: true };
|
||||
},
|
||||
};
|
||||
const svc = new ChatService(
|
||||
mockAgents(), mockLlms(), adapterRegistry(adapter),
|
||||
chatRepo, mockPromptRepo(), mockTools(),
|
||||
);
|
||||
|
||||
const chunks: Array<{ type: string; delta?: string }> = [];
|
||||
for await (const chunk of svc.chatStream({
|
||||
agentName: 'reviewer', userMessage: 'hi', ownerId: 'owner-1',
|
||||
})) {
|
||||
chunks.push({ type: chunk.type, delta: chunk.delta });
|
||||
}
|
||||
|
||||
// Expect: 2 thinking + 1 text + 1 final
|
||||
expect(chunks.filter((c) => c.type === 'thinking').map((c) => c.delta))
|
||||
.toEqual(['Let me think... ', 'OK, ready.']);
|
||||
expect(chunks.filter((c) => c.type === 'text').map((c) => c.delta)).toEqual(['DONE']);
|
||||
expect(chunks.find((c) => c.type === 'final')).toBeDefined();
|
||||
|
||||
// Reasoning is NOT persisted to the thread — only assistant content.
|
||||
const assistantTurn = chatRepo._msgs.find((m) => m.role === 'assistant');
|
||||
expect(assistantTurn?.content).toBe('DONE');
|
||||
expect(assistantTurn?.content).not.toContain('Let me think');
|
||||
});
|
||||
|
||||
// Regression: provider_specific_fields.reasoning_content shape (LiteLLM
|
||||
// passthrough from vLLM) is also recognized.
|
||||
it('chatStream recognizes LiteLLM provider_specific_fields.reasoning_content', async () => {
|
||||
const chatRepo = mockChatRepo();
|
||||
const adapter: LlmAdapter = {
|
||||
kind: 'scripted-litellm',
|
||||
infer: vi.fn(),
|
||||
stream: async function*() {
|
||||
yield { data: JSON.stringify({ choices: [{ delta: { provider_specific_fields: { reasoning_content: 'thinking via litellm...' } }, finish_reason: null }] }) };
|
||||
yield { data: JSON.stringify({ choices: [{ delta: { content: 'ok' }, finish_reason: 'stop' }] }) };
|
||||
yield { data: '[DONE]', done: true };
|
||||
},
|
||||
};
|
||||
const svc = new ChatService(
|
||||
mockAgents(), mockLlms(), adapterRegistry(adapter),
|
||||
chatRepo, mockPromptRepo(), mockTools(),
|
||||
);
|
||||
|
||||
const chunks: Array<{ type: string; delta?: string }> = [];
|
||||
for await (const chunk of svc.chatStream({
|
||||
agentName: 'reviewer', userMessage: 'hi', ownerId: 'owner-1',
|
||||
})) {
|
||||
chunks.push({ type: chunk.type, delta: chunk.delta });
|
||||
}
|
||||
expect(chunks.filter((c) => c.type === 'thinking').map((c) => c.delta))
|
||||
.toEqual(['thinking via litellm...']);
|
||||
});
|
||||
|
||||
// Regression: per-agent maxIterations override + clamp.
|
||||
// Found by /gstack-review on 2026-04-25.
|
||||
// Without the clamp, a hostile agent definition with `extras.maxIterations:1000000`
|
||||
|
||||
Reference in New Issue
Block a user