Files
mcpctl/src/mcplocal/tests/smoke/agent-chat.smoke.test.ts
Michal 610808b9e7
Some checks failed
CI/CD / lint (pull_request) Successful in 54s
CI/CD / test (pull_request) Successful in 1m7s
CI/CD / typecheck (pull_request) Successful in 2m37s
CI/CD / smoke (pull_request) Failing after 1m43s
CI/CD / build (pull_request) Successful in 5m42s
CI/CD / publish (pull_request) Has been skipped
fix(chat): real fixes for thinking-model + URL conventions, not test tweaks
Five real bugs surfaced by the agent-chat smoke against live
qwen3-thinking. None of these are fixed by changing the test — the
test was right to fail.

1. openai-passthrough adapter doubled `/v1` in the request URL. The
   adapter hard-codes `/v1/chat/completions` after the configured base,
   but every OpenAI-compat provider documents its base URL with a
   trailing `/v1` (api.openai.com/v1, llm.example.com/v1, …). Users
   pasting that conventional shape produced
   `https://x/v1/v1/chat/completions` → 404. endpointUrl now strips a
   trailing `/v1` so both forms canonicalize. `/v1beta` (Anthropic-style)
   is preserved.

2. Non-streaming chat returned an empty assistant when thinking models
   (qwen3-thinking, deepseek-reasoner, OpenAI o1) emitted only
   `reasoning_content` with `content: null`. extractChoice now also
   pulls reasoning (every spelling the streaming parser already knows
   about), and a new pickAssistantText helper falls back to it when
   content is empty. A `[response truncated by max_tokens]` marker is
   appended when finish_reason is `length`, so users see the cut-off
   instead of guessing why the answer is short. Symmetric streaming
   fix: the chatStream loop accumulates reasoning and yields ONE
   synthesized `text` frame at the end when content stayed empty,
   keeping the CLI's stdout (which only prints `text` deltas) in sync
   with the persisted thread message.

3. `mcpctl get agent X -o yaml` emitted `kind: public` (the v3
   lifecycle field) instead of `kind: agent` (apply envelope), so
   round-tripping through `apply -f` failed. Same fix shape as the v1
   Llm strip in toApplyDocs — drop kind/status/lastHeartbeatAt/
   inactiveSince/providerSessionId for the agents resource too.

4. Non-streaming `mcpctl chat` printed `thread:<cuid>` (no space) on
   stderr; streaming printed `(thread: <cuid>)` (with space). Tests
   and any other regex watching for one form missed the other.
   Standardize on `thread: <cuid>` (single space) in both paths.

5. agent-chat.smoke's `run()` used `execSync`, which discards stderr on
   success — making any `expect(stderr).toMatch(...)` assertion
   structurally impossible to satisfy in the happy path. Switch to
   `spawnSync` so stderr is actually captured. Includes a small
   shell-style argv splitter so the existing call sites with quoted
   multi-word values (`--system-prompt "..."`) keep working.

Tests: +6 new mcpd unit tests (4 chat-service for the reasoning
fallback / truncation marker / content-preference / streaming synth;
2 llm-adapters for the URL strip + /v1beta preservation). Full mcpd
+ mcplocal + smoke green: 860/860 + 723/723 + 139/139.
2026-04-27 18:39:01 +01:00

166 lines
6.2 KiB
TypeScript

/**
* Live-LLM smoke for agent chat.
*
* Runs only when MCPCTL_SMOKE_LLM_URL + MCPCTL_SMOKE_LLM_KEY are set; the
* idea is to point this at a real OpenAI-compatible endpoint and confirm
* the openai-passthrough adapter delivers the user's message and returns
* an assistant reply. For the project's qwen3-thinking deployment:
*
* MCPCTL_SMOKE_LLM_URL=http://litellm.nvidia-nim.svc.cluster.local:4000/v1 \
* MCPCTL_SMOKE_LLM_MODEL=qwen3-thinking \
* MCPCTL_SMOKE_LLM_KEY=sk-... \
* pnpm test:smoke
*
* If the env vars are missing the test self-skips without failing the
* pipeline (the agent CRUD smoke still runs in agent.smoke.test.ts).
*/
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
import http from 'node:http';
import https from 'node:https';
import { spawnSync, execSync } from 'node:child_process';
const MCPD_URL = process.env.MCPD_URL ?? 'https://mcpctl.ad.itaz.eu';
const LLM_URL = process.env.MCPCTL_SMOKE_LLM_URL;
const LLM_MODEL = process.env.MCPCTL_SMOKE_LLM_MODEL ?? 'qwen3-thinking';
const LLM_KEY = process.env.MCPCTL_SMOKE_LLM_KEY;
const SUFFIX = Date.now().toString(36);
const SECRET_NAME = `smoke-chat-sec-${SUFFIX}`;
const LLM_NAME = `smoke-chat-llm-${SUFFIX}`;
const AGENT_NAME = `smoke-chat-agent-${SUFFIX}`;
interface CliResult { code: number; stdout: string; stderr: string }
function run(args: string): CliResult {
// spawnSync (not execSync) — execSync returns only stdout on success and
// discards stderr, which made any `thread:` assertion against a successful
// chat impossible to evaluate. Splitting the args correctly handles the
// few existing call sites that quote-wrap multi-word values like
// `--system-prompt "You are..."`.
const argv = splitArgs(args);
const res = spawnSync('mcpctl', ['--direct', ...argv], {
encoding: 'utf-8',
timeout: 60_000,
});
return {
code: res.status ?? 1,
stdout: (res.stdout ?? '').trim(),
stderr: (res.stderr ?? '').trim(),
};
}
/**
* Tokenize a shell-style argv string with simple double-quote support — just
* enough for the smoke test's call shapes. Not a full POSIX parser; we only
* need to keep `--system-prompt "You are a smoke test..."` together as one
* arg.
*/
function splitArgs(s: string): string[] {
const out: string[] = [];
const re = /"([^"]*)"|(\S+)/g;
let m: RegExpExecArray | null;
while ((m = re.exec(s)) !== null) {
out.push(m[1] !== undefined ? m[1] : (m[2] ?? ''));
}
return out;
}
function healthz(url: string, timeoutMs = 5000): Promise<boolean> {
return new Promise((resolve) => {
const parsed = new URL(`${url.replace(/\/$/, '')}/healthz`);
const driver = parsed.protocol === 'https:' ? https : http;
const req = driver.get({
hostname: parsed.hostname,
port: parsed.port || (parsed.protocol === 'https:' ? 443 : 80),
path: parsed.pathname,
timeout: timeoutMs,
}, (res) => { resolve((res.statusCode ?? 500) < 500); res.resume(); });
req.on('error', () => resolve(false));
req.on('timeout', () => { req.destroy(); resolve(false); });
});
}
let mcpdUp = false;
const liveLlmConfigured = LLM_URL !== undefined && LLM_KEY !== undefined;
describe('agent chat smoke (live LLM)', () => {
beforeAll(async () => {
if (!liveLlmConfigured) {
// eslint-disable-next-line no-console
console.warn('\n ○ agent-chat smoke: skipped — set MCPCTL_SMOKE_LLM_URL + MCPCTL_SMOKE_LLM_KEY to run against a real LLM.\n');
return;
}
mcpdUp = await healthz(MCPD_URL);
if (!mcpdUp) {
// eslint-disable-next-line no-console
console.warn(`\n ○ agent-chat smoke: skipped — ${MCPD_URL}/healthz unreachable.\n`);
}
}, 20_000);
afterAll(() => {
if (!liveLlmConfigured || !mcpdUp) return;
run(`delete agent ${AGENT_NAME}`);
run(`delete llm ${LLM_NAME}`);
run(`delete secret ${SECRET_NAME}`);
});
it('provisions secret + Llm + agent against the live endpoint', () => {
if (!liveLlmConfigured || !mcpdUp) return;
run(`delete secret ${SECRET_NAME}`);
run(`delete llm ${LLM_NAME}`);
run(`delete agent ${AGENT_NAME}`);
const sec = run(`create secret ${SECRET_NAME} --data API_KEY=${LLM_KEY!}`);
expect(sec.code, sec.stderr).toBe(0);
const llm = run([
`create llm ${LLM_NAME}`,
'--type openai',
`--model ${LLM_MODEL}`,
`--url ${LLM_URL!}`,
`--api-key-ref ${SECRET_NAME}/API_KEY`,
].join(' '));
expect(llm.code, llm.stderr).toBe(0);
const agent = run([
`create agent ${AGENT_NAME}`,
`--llm ${LLM_NAME}`,
`--description "live chat smoke"`,
`--system-prompt "You are a smoke test. Always reply with the single token READY."`,
'--default-temperature 0',
'--default-max-tokens 32',
].join(' '));
expect(agent.code, agent.stderr).toBe(0);
});
it('one-shot `mcpctl chat` sends a message and prints a reply', () => {
if (!liveLlmConfigured || !mcpdUp) return;
const result = run(`chat ${AGENT_NAME} -m "ping" --no-stream`);
expect(result.code, result.stderr).toBe(0);
expect(result.stdout.length).toBeGreaterThan(0);
// We can't bind too tightly to model output but the system prompt nudges
// toward "READY". Either way: we got a reply.
expect(result.stderr).toMatch(/thread:\s+c[a-z0-9]+/);
});
it('streaming `mcpctl chat` emits text deltas', () => {
if (!liveLlmConfigured || !mcpdUp) return;
// Default mode is streaming. Pipe stdout/stderr separately.
let stdout = '';
let stderr = '';
try {
const out = execSync(`mcpctl --direct chat ${AGENT_NAME} -m "say hello" 2> /tmp/agent-smoke-err`, {
encoding: 'utf-8', timeout: 60_000,
});
stdout = out;
} catch (err) {
const e = err as { status?: number; stdout?: Buffer | string };
stdout = e.stdout ? (typeof e.stdout === 'string' ? e.stdout : e.stdout.toString('utf-8')) : '';
}
try {
// eslint-disable-next-line @typescript-eslint/no-require-imports
const fs = require('node:fs') as typeof import('node:fs');
stderr = fs.readFileSync('/tmp/agent-smoke-err', 'utf-8');
fs.unlinkSync('/tmp/agent-smoke-err');
} catch { /* ignore */ }
expect(stdout.length).toBeGreaterThan(0);
expect(stderr).toMatch(/thread:\s+c[a-z0-9]+/);
});
});