/** * Live-LLM smoke for agent chat. * * Runs only when MCPCTL_SMOKE_LLM_URL + MCPCTL_SMOKE_LLM_KEY are set; the * idea is to point this at a real OpenAI-compatible endpoint and confirm * the openai-passthrough adapter delivers the user's message and returns * an assistant reply. For the project's qwen3-thinking deployment: * * MCPCTL_SMOKE_LLM_URL=http://litellm.nvidia-nim.svc.cluster.local:4000/v1 \ * MCPCTL_SMOKE_LLM_MODEL=qwen3-thinking \ * MCPCTL_SMOKE_LLM_KEY=sk-... \ * pnpm test:smoke * * If the env vars are missing the test self-skips without failing the * pipeline (the agent CRUD smoke still runs in agent.smoke.test.ts). */ import { describe, it, expect, beforeAll, afterAll } from 'vitest'; import http from 'node:http'; import https from 'node:https'; import { spawnSync, execSync } from 'node:child_process'; const MCPD_URL = process.env.MCPD_URL ?? 'https://mcpctl.ad.itaz.eu'; const LLM_URL = process.env.MCPCTL_SMOKE_LLM_URL; const LLM_MODEL = process.env.MCPCTL_SMOKE_LLM_MODEL ?? 'qwen3-thinking'; const LLM_KEY = process.env.MCPCTL_SMOKE_LLM_KEY; const SUFFIX = Date.now().toString(36); const SECRET_NAME = `smoke-chat-sec-${SUFFIX}`; const LLM_NAME = `smoke-chat-llm-${SUFFIX}`; const AGENT_NAME = `smoke-chat-agent-${SUFFIX}`; interface CliResult { code: number; stdout: string; stderr: string } function run(args: string): CliResult { // spawnSync (not execSync) — execSync returns only stdout on success and // discards stderr, which made any `thread:` assertion against a successful // chat impossible to evaluate. Splitting the args correctly handles the // few existing call sites that quote-wrap multi-word values like // `--system-prompt "You are..."`. const argv = splitArgs(args); const res = spawnSync('mcpctl', ['--direct', ...argv], { encoding: 'utf-8', timeout: 60_000, }); return { code: res.status ?? 1, stdout: (res.stdout ?? '').trim(), stderr: (res.stderr ?? '').trim(), }; } /** * Tokenize a shell-style argv string with simple double-quote support — just * enough for the smoke test's call shapes. Not a full POSIX parser; we only * need to keep `--system-prompt "You are a smoke test..."` together as one * arg. */ function splitArgs(s: string): string[] { const out: string[] = []; const re = /"([^"]*)"|(\S+)/g; let m: RegExpExecArray | null; while ((m = re.exec(s)) !== null) { out.push(m[1] !== undefined ? m[1] : (m[2] ?? '')); } return out; } function healthz(url: string, timeoutMs = 5000): Promise { return new Promise((resolve) => { const parsed = new URL(`${url.replace(/\/$/, '')}/healthz`); const driver = parsed.protocol === 'https:' ? https : http; const req = driver.get({ hostname: parsed.hostname, port: parsed.port || (parsed.protocol === 'https:' ? 443 : 80), path: parsed.pathname, timeout: timeoutMs, }, (res) => { resolve((res.statusCode ?? 500) < 500); res.resume(); }); req.on('error', () => resolve(false)); req.on('timeout', () => { req.destroy(); resolve(false); }); }); } let mcpdUp = false; const liveLlmConfigured = LLM_URL !== undefined && LLM_KEY !== undefined; describe('agent chat smoke (live LLM)', () => { beforeAll(async () => { if (!liveLlmConfigured) { // eslint-disable-next-line no-console console.warn('\n ○ agent-chat smoke: skipped — set MCPCTL_SMOKE_LLM_URL + MCPCTL_SMOKE_LLM_KEY to run against a real LLM.\n'); return; } mcpdUp = await healthz(MCPD_URL); if (!mcpdUp) { // eslint-disable-next-line no-console console.warn(`\n ○ agent-chat smoke: skipped — ${MCPD_URL}/healthz unreachable.\n`); } }, 20_000); afterAll(() => { if (!liveLlmConfigured || !mcpdUp) return; run(`delete agent ${AGENT_NAME}`); run(`delete llm ${LLM_NAME}`); run(`delete secret ${SECRET_NAME}`); }); it('provisions secret + Llm + agent against the live endpoint', () => { if (!liveLlmConfigured || !mcpdUp) return; run(`delete secret ${SECRET_NAME}`); run(`delete llm ${LLM_NAME}`); run(`delete agent ${AGENT_NAME}`); const sec = run(`create secret ${SECRET_NAME} --data API_KEY=${LLM_KEY!}`); expect(sec.code, sec.stderr).toBe(0); const llm = run([ `create llm ${LLM_NAME}`, '--type openai', `--model ${LLM_MODEL}`, `--url ${LLM_URL!}`, `--api-key-ref ${SECRET_NAME}/API_KEY`, ].join(' ')); expect(llm.code, llm.stderr).toBe(0); const agent = run([ `create agent ${AGENT_NAME}`, `--llm ${LLM_NAME}`, `--description "live chat smoke"`, `--system-prompt "You are a smoke test. Always reply with the single token READY."`, '--default-temperature 0', '--default-max-tokens 32', ].join(' ')); expect(agent.code, agent.stderr).toBe(0); }); it('one-shot `mcpctl chat` sends a message and prints a reply', () => { if (!liveLlmConfigured || !mcpdUp) return; const result = run(`chat ${AGENT_NAME} -m "ping" --no-stream`); expect(result.code, result.stderr).toBe(0); expect(result.stdout.length).toBeGreaterThan(0); // We can't bind too tightly to model output but the system prompt nudges // toward "READY". Either way: we got a reply. expect(result.stderr).toMatch(/thread:\s+c[a-z0-9]+/); }); it('streaming `mcpctl chat` emits text deltas', () => { if (!liveLlmConfigured || !mcpdUp) return; // Default mode is streaming. Pipe stdout/stderr separately. let stdout = ''; let stderr = ''; try { const out = execSync(`mcpctl --direct chat ${AGENT_NAME} -m "say hello" 2> /tmp/agent-smoke-err`, { encoding: 'utf-8', timeout: 60_000, }); stdout = out; } catch (err) { const e = err as { status?: number; stdout?: Buffer | string }; stdout = e.stdout ? (typeof e.stdout === 'string' ? e.stdout : e.stdout.toString('utf-8')) : ''; } try { // eslint-disable-next-line @typescript-eslint/no-require-imports const fs = require('node:fs') as typeof import('node:fs'); stderr = fs.readFileSync('/tmp/agent-smoke-err', 'utf-8'); fs.unlinkSync('/tmp/agent-smoke-err'); } catch { /* ignore */ } expect(stdout.length).toBeGreaterThan(0); expect(stderr).toMatch(/thread:\s+c[a-z0-9]+/); }); });