Some checks failed
CI/CD / lint (pull_request) Successful in 54s
CI/CD / test (pull_request) Successful in 1m7s
CI/CD / typecheck (pull_request) Successful in 2m37s
CI/CD / smoke (pull_request) Failing after 1m43s
CI/CD / build (pull_request) Successful in 5m42s
CI/CD / publish (pull_request) Has been skipped
Five real bugs surfaced by the agent-chat smoke against live qwen3-thinking. None of these are fixed by changing the test — the test was right to fail. 1. openai-passthrough adapter doubled `/v1` in the request URL. The adapter hard-codes `/v1/chat/completions` after the configured base, but every OpenAI-compat provider documents its base URL with a trailing `/v1` (api.openai.com/v1, llm.example.com/v1, …). Users pasting that conventional shape produced `https://x/v1/v1/chat/completions` → 404. endpointUrl now strips a trailing `/v1` so both forms canonicalize. `/v1beta` (Anthropic-style) is preserved. 2. Non-streaming chat returned an empty assistant when thinking models (qwen3-thinking, deepseek-reasoner, OpenAI o1) emitted only `reasoning_content` with `content: null`. extractChoice now also pulls reasoning (every spelling the streaming parser already knows about), and a new pickAssistantText helper falls back to it when content is empty. A `[response truncated by max_tokens]` marker is appended when finish_reason is `length`, so users see the cut-off instead of guessing why the answer is short. Symmetric streaming fix: the chatStream loop accumulates reasoning and yields ONE synthesized `text` frame at the end when content stayed empty, keeping the CLI's stdout (which only prints `text` deltas) in sync with the persisted thread message. 3. `mcpctl get agent X -o yaml` emitted `kind: public` (the v3 lifecycle field) instead of `kind: agent` (apply envelope), so round-tripping through `apply -f` failed. Same fix shape as the v1 Llm strip in toApplyDocs — drop kind/status/lastHeartbeatAt/ inactiveSince/providerSessionId for the agents resource too. 4. Non-streaming `mcpctl chat` printed `thread:<cuid>` (no space) on stderr; streaming printed `(thread: <cuid>)` (with space). Tests and any other regex watching for one form missed the other. Standardize on `thread: <cuid>` (single space) in both paths. 5. agent-chat.smoke's `run()` used `execSync`, which discards stderr on success — making any `expect(stderr).toMatch(...)` assertion structurally impossible to satisfy in the happy path. Switch to `spawnSync` so stderr is actually captured. Includes a small shell-style argv splitter so the existing call sites with quoted multi-word values (`--system-prompt "..."`) keep working. Tests: +6 new mcpd unit tests (4 chat-service for the reasoning fallback / truncation marker / content-preference / streaming synth; 2 llm-adapters for the URL strip + /v1beta preservation). Full mcpd + mcplocal + smoke green: 860/860 + 723/723 + 139/139.
166 lines
6.2 KiB
TypeScript
166 lines
6.2 KiB
TypeScript
/**
|
|
* Live-LLM smoke for agent chat.
|
|
*
|
|
* Runs only when MCPCTL_SMOKE_LLM_URL + MCPCTL_SMOKE_LLM_KEY are set; the
|
|
* idea is to point this at a real OpenAI-compatible endpoint and confirm
|
|
* the openai-passthrough adapter delivers the user's message and returns
|
|
* an assistant reply. For the project's qwen3-thinking deployment:
|
|
*
|
|
* MCPCTL_SMOKE_LLM_URL=http://litellm.nvidia-nim.svc.cluster.local:4000/v1 \
|
|
* MCPCTL_SMOKE_LLM_MODEL=qwen3-thinking \
|
|
* MCPCTL_SMOKE_LLM_KEY=sk-... \
|
|
* pnpm test:smoke
|
|
*
|
|
* If the env vars are missing the test self-skips without failing the
|
|
* pipeline (the agent CRUD smoke still runs in agent.smoke.test.ts).
|
|
*/
|
|
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
|
|
import http from 'node:http';
|
|
import https from 'node:https';
|
|
import { spawnSync, execSync } from 'node:child_process';
|
|
|
|
const MCPD_URL = process.env.MCPD_URL ?? 'https://mcpctl.ad.itaz.eu';
|
|
const LLM_URL = process.env.MCPCTL_SMOKE_LLM_URL;
|
|
const LLM_MODEL = process.env.MCPCTL_SMOKE_LLM_MODEL ?? 'qwen3-thinking';
|
|
const LLM_KEY = process.env.MCPCTL_SMOKE_LLM_KEY;
|
|
const SUFFIX = Date.now().toString(36);
|
|
const SECRET_NAME = `smoke-chat-sec-${SUFFIX}`;
|
|
const LLM_NAME = `smoke-chat-llm-${SUFFIX}`;
|
|
const AGENT_NAME = `smoke-chat-agent-${SUFFIX}`;
|
|
|
|
interface CliResult { code: number; stdout: string; stderr: string }
|
|
|
|
function run(args: string): CliResult {
|
|
// spawnSync (not execSync) — execSync returns only stdout on success and
|
|
// discards stderr, which made any `thread:` assertion against a successful
|
|
// chat impossible to evaluate. Splitting the args correctly handles the
|
|
// few existing call sites that quote-wrap multi-word values like
|
|
// `--system-prompt "You are..."`.
|
|
const argv = splitArgs(args);
|
|
const res = spawnSync('mcpctl', ['--direct', ...argv], {
|
|
encoding: 'utf-8',
|
|
timeout: 60_000,
|
|
});
|
|
return {
|
|
code: res.status ?? 1,
|
|
stdout: (res.stdout ?? '').trim(),
|
|
stderr: (res.stderr ?? '').trim(),
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Tokenize a shell-style argv string with simple double-quote support — just
|
|
* enough for the smoke test's call shapes. Not a full POSIX parser; we only
|
|
* need to keep `--system-prompt "You are a smoke test..."` together as one
|
|
* arg.
|
|
*/
|
|
function splitArgs(s: string): string[] {
|
|
const out: string[] = [];
|
|
const re = /"([^"]*)"|(\S+)/g;
|
|
let m: RegExpExecArray | null;
|
|
while ((m = re.exec(s)) !== null) {
|
|
out.push(m[1] !== undefined ? m[1] : (m[2] ?? ''));
|
|
}
|
|
return out;
|
|
}
|
|
|
|
function healthz(url: string, timeoutMs = 5000): Promise<boolean> {
|
|
return new Promise((resolve) => {
|
|
const parsed = new URL(`${url.replace(/\/$/, '')}/healthz`);
|
|
const driver = parsed.protocol === 'https:' ? https : http;
|
|
const req = driver.get({
|
|
hostname: parsed.hostname,
|
|
port: parsed.port || (parsed.protocol === 'https:' ? 443 : 80),
|
|
path: parsed.pathname,
|
|
timeout: timeoutMs,
|
|
}, (res) => { resolve((res.statusCode ?? 500) < 500); res.resume(); });
|
|
req.on('error', () => resolve(false));
|
|
req.on('timeout', () => { req.destroy(); resolve(false); });
|
|
});
|
|
}
|
|
|
|
let mcpdUp = false;
|
|
const liveLlmConfigured = LLM_URL !== undefined && LLM_KEY !== undefined;
|
|
|
|
describe('agent chat smoke (live LLM)', () => {
|
|
beforeAll(async () => {
|
|
if (!liveLlmConfigured) {
|
|
// eslint-disable-next-line no-console
|
|
console.warn('\n ○ agent-chat smoke: skipped — set MCPCTL_SMOKE_LLM_URL + MCPCTL_SMOKE_LLM_KEY to run against a real LLM.\n');
|
|
return;
|
|
}
|
|
mcpdUp = await healthz(MCPD_URL);
|
|
if (!mcpdUp) {
|
|
// eslint-disable-next-line no-console
|
|
console.warn(`\n ○ agent-chat smoke: skipped — ${MCPD_URL}/healthz unreachable.\n`);
|
|
}
|
|
}, 20_000);
|
|
|
|
afterAll(() => {
|
|
if (!liveLlmConfigured || !mcpdUp) return;
|
|
run(`delete agent ${AGENT_NAME}`);
|
|
run(`delete llm ${LLM_NAME}`);
|
|
run(`delete secret ${SECRET_NAME}`);
|
|
});
|
|
|
|
it('provisions secret + Llm + agent against the live endpoint', () => {
|
|
if (!liveLlmConfigured || !mcpdUp) return;
|
|
run(`delete secret ${SECRET_NAME}`);
|
|
run(`delete llm ${LLM_NAME}`);
|
|
run(`delete agent ${AGENT_NAME}`);
|
|
const sec = run(`create secret ${SECRET_NAME} --data API_KEY=${LLM_KEY!}`);
|
|
expect(sec.code, sec.stderr).toBe(0);
|
|
const llm = run([
|
|
`create llm ${LLM_NAME}`,
|
|
'--type openai',
|
|
`--model ${LLM_MODEL}`,
|
|
`--url ${LLM_URL!}`,
|
|
`--api-key-ref ${SECRET_NAME}/API_KEY`,
|
|
].join(' '));
|
|
expect(llm.code, llm.stderr).toBe(0);
|
|
const agent = run([
|
|
`create agent ${AGENT_NAME}`,
|
|
`--llm ${LLM_NAME}`,
|
|
`--description "live chat smoke"`,
|
|
`--system-prompt "You are a smoke test. Always reply with the single token READY."`,
|
|
'--default-temperature 0',
|
|
'--default-max-tokens 32',
|
|
].join(' '));
|
|
expect(agent.code, agent.stderr).toBe(0);
|
|
});
|
|
|
|
it('one-shot `mcpctl chat` sends a message and prints a reply', () => {
|
|
if (!liveLlmConfigured || !mcpdUp) return;
|
|
const result = run(`chat ${AGENT_NAME} -m "ping" --no-stream`);
|
|
expect(result.code, result.stderr).toBe(0);
|
|
expect(result.stdout.length).toBeGreaterThan(0);
|
|
// We can't bind too tightly to model output but the system prompt nudges
|
|
// toward "READY". Either way: we got a reply.
|
|
expect(result.stderr).toMatch(/thread:\s+c[a-z0-9]+/);
|
|
});
|
|
|
|
it('streaming `mcpctl chat` emits text deltas', () => {
|
|
if (!liveLlmConfigured || !mcpdUp) return;
|
|
// Default mode is streaming. Pipe stdout/stderr separately.
|
|
let stdout = '';
|
|
let stderr = '';
|
|
try {
|
|
const out = execSync(`mcpctl --direct chat ${AGENT_NAME} -m "say hello" 2> /tmp/agent-smoke-err`, {
|
|
encoding: 'utf-8', timeout: 60_000,
|
|
});
|
|
stdout = out;
|
|
} catch (err) {
|
|
const e = err as { status?: number; stdout?: Buffer | string };
|
|
stdout = e.stdout ? (typeof e.stdout === 'string' ? e.stdout : e.stdout.toString('utf-8')) : '';
|
|
}
|
|
try {
|
|
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
const fs = require('node:fs') as typeof import('node:fs');
|
|
stderr = fs.readFileSync('/tmp/agent-smoke-err', 'utf-8');
|
|
fs.unlinkSync('/tmp/agent-smoke-err');
|
|
} catch { /* ignore */ }
|
|
expect(stdout.length).toBeGreaterThan(0);
|
|
expect(stderr).toMatch(/thread:\s+c[a-z0-9]+/);
|
|
});
|
|
});
|