src/mcplocal/tests/smoke/agent-chat.smoke.test.ts

/**
 * Live-LLM smoke for agent chat.
 *
 * Runs only when MCPCTL_SMOKE_LLM_URL + MCPCTL_SMOKE_LLM_KEY are set; the
 * idea is to point this at a real OpenAI-compatible endpoint and confirm
 * the openai-passthrough adapter delivers the user's message and returns
 * an assistant reply. For the project's qwen3-thinking deployment:
 *
 *   MCPCTL_SMOKE_LLM_URL=http://litellm.nvidia-nim.svc.cluster.local:4000/v1 \
 *   MCPCTL_SMOKE_LLM_MODEL=qwen3-thinking \
 *   MCPCTL_SMOKE_LLM_KEY=sk-... \
 *     pnpm test:smoke
 *
 * If the env vars are missing the test self-skips without failing the
 * pipeline (the agent CRUD smoke still runs in agent.smoke.test.ts).
 */
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
import http from 'node:http';
import https from 'node:https';
import { execSync } from 'node:child_process';

const MCPD_URL = process.env.MCPD_URL ?? 'https://mcpctl.ad.itaz.eu';
const LLM_URL = process.env.MCPCTL_SMOKE_LLM_URL;
const LLM_MODEL = process.env.MCPCTL_SMOKE_LLM_MODEL ?? 'qwen3-thinking';
const LLM_KEY = process.env.MCPCTL_SMOKE_LLM_KEY;
const SUFFIX = Date.now().toString(36);
const SECRET_NAME = `smoke-chat-sec-${SUFFIX}`;
const LLM_NAME = `smoke-chat-llm-${SUFFIX}`;
const AGENT_NAME = `smoke-chat-agent-${SUFFIX}`;

interface CliResult { code: number; stdout: string; stderr: string }

function run(args: string): CliResult {
  try {
    const stdout = execSync(`mcpctl --direct ${args}`, {
      encoding: 'utf-8',
      timeout: 60_000,
      stdio: ['ignore', 'pipe', 'pipe'],
    });
    return { code: 0, stdout: stdout.trim(), stderr: '' };
  } catch (err) {
    const e = err as { status?: number; stdout?: Buffer | string; stderr?: Buffer | string };
    return {
      code: e.status ?? 1,
      stdout: e.stdout ? (typeof e.stdout === 'string' ? e.stdout : e.stdout.toString('utf-8')) : '',
      stderr: e.stderr ? (typeof e.stderr === 'string' ? e.stderr : e.stderr.toString('utf-8')) : '',
    };
  }
}

function healthz(url: string, timeoutMs = 5000): Promise<boolean> {
  return new Promise((resolve) => {
    const parsed = new URL(`${url.replace(/\/$/, '')}/healthz`);
    const driver = parsed.protocol === 'https:' ? https : http;
    const req = driver.get({
      hostname: parsed.hostname,
      port: parsed.port || (parsed.protocol === 'https:' ? 443 : 80),
      path: parsed.pathname,
      timeout: timeoutMs,
    }, (res) => { resolve((res.statusCode ?? 500) < 500); res.resume(); });
    req.on('error', () => resolve(false));
    req.on('timeout', () => { req.destroy(); resolve(false); });
  });
}

let mcpdUp = false;
const liveLlmConfigured = LLM_URL !== undefined && LLM_KEY !== undefined;

describe('agent chat smoke (live LLM)', () => {
  beforeAll(async () => {
    if (!liveLlmConfigured) {
      // eslint-disable-next-line no-console
      console.warn('\n  ○ agent-chat smoke: skipped — set MCPCTL_SMOKE_LLM_URL + MCPCTL_SMOKE_LLM_KEY to run against a real LLM.\n');
      return;
    }
    mcpdUp = await healthz(MCPD_URL);
    if (!mcpdUp) {
      // eslint-disable-next-line no-console
      console.warn(`\n  ○ agent-chat smoke: skipped — ${MCPD_URL}/healthz unreachable.\n`);
    }
  }, 20_000);

  afterAll(() => {
    if (!liveLlmConfigured || !mcpdUp) return;
    run(`delete agent ${AGENT_NAME}`);
    run(`delete llm ${LLM_NAME}`);
    run(`delete secret ${SECRET_NAME}`);
  });

  it('provisions secret + Llm + agent against the live endpoint', () => {
    if (!liveLlmConfigured || !mcpdUp) return;
    run(`delete secret ${SECRET_NAME}`);
    run(`delete llm ${LLM_NAME}`);
    run(`delete agent ${AGENT_NAME}`);
    const sec = run(`create secret ${SECRET_NAME} --data API_KEY=${LLM_KEY!}`);
    expect(sec.code, sec.stderr).toBe(0);
    const llm = run([
      `create llm ${LLM_NAME}`,
      '--type openai',
      `--model ${LLM_MODEL}`,
      `--url ${LLM_URL!}`,
      `--api-key-ref ${SECRET_NAME}/API_KEY`,
    ].join(' '));
    expect(llm.code, llm.stderr).toBe(0);
    const agent = run([
      `create agent ${AGENT_NAME}`,
      `--llm ${LLM_NAME}`,
      `--description "live chat smoke"`,
      `--system-prompt "You are a smoke test. Always reply with the single token READY."`,
      '--default-temperature 0',
      '--default-max-tokens 32',
    ].join(' '));
    expect(agent.code, agent.stderr).toBe(0);
  });

  it('one-shot `mcpctl chat` sends a message and prints a reply', () => {
    if (!liveLlmConfigured || !mcpdUp) return;
    const result = run(`chat ${AGENT_NAME} -m "ping" --no-stream`);
    expect(result.code, result.stderr).toBe(0);
    expect(result.stdout.length).toBeGreaterThan(0);
    // We can't bind too tightly to model output but the system prompt nudges
    // toward "READY". Either way: we got a reply.
    expect(result.stderr).toMatch(/thread:\s+c[a-z0-9]+/);
  });

  it('streaming `mcpctl chat` emits text deltas', () => {
    if (!liveLlmConfigured || !mcpdUp) return;
    // Default mode is streaming. Pipe stdout/stderr separately.
    let stdout = '';
    let stderr = '';
    try {
      const out = execSync(`mcpctl --direct chat ${AGENT_NAME} -m "say hello" 2> /tmp/agent-smoke-err`, {
        encoding: 'utf-8', timeout: 60_000,
      });
      stdout = out;
    } catch (err) {
      const e = err as { status?: number; stdout?: Buffer | string };
      stdout = e.stdout ? (typeof e.stdout === 'string' ? e.stdout : e.stdout.toString('utf-8')) : '';
    }
    try {
      // eslint-disable-next-line @typescript-eslint/no-require-imports
      const fs = require('node:fs') as typeof import('node:fs');
      stderr = fs.readFileSync('/tmp/agent-smoke-err', 'utf-8');
      fs.unlinkSync('/tmp/agent-smoke-err');
    } catch { /* ignore */ }
    expect(stdout.length).toBeGreaterThan(0);
    expect(stderr).toMatch(/thread:\s+c[a-z0-9]+/);
  });
});
feat(agents): smoke tests + README + docs (Stage 6, final) Closes the agents feature. Smoke tests (run via `pnpm test:smoke` against a live mcpd at $MCPD_URL, default https://mcpctl.ad.itaz.eu): * tests/smoke/agent.smoke.test.ts — full CRUD round-trip: create secret + Llm + agent with sampling defaults; `get agents` surfaces it; `get agent foo -o yaml \| apply -f` round-trips identically; create + list a thread via the HTTP API; agent delete leaves Llm + secret intact (Restrict + SetNull as designed). Self- skips with a warning when /healthz is unreachable. * tests/smoke/agent-chat.smoke.test.ts — gated on MCPCTL_SMOKE_LLM_URL + MCPCTL_SMOKE_LLM_KEY. Provisions secret + Llm + agent against a real upstream, runs `mcpctl chat -m … --no- stream` (asserts a reply lands), then runs the streaming default (asserts text on stdout + `(thread: …)` on stderr). The fast path for verifying the in-cluster qwen3-thinking deployment: MCPCTL_SMOKE_LLM_URL=http://litellm.nvidia-nim.svc.cluster.local:4000/v1 \ MCPCTL_SMOKE_LLM_MODEL=qwen3-thinking \ MCPCTL_SMOKE_LLM_KEY=$(pulumi config get --stack homelab \ secrets:litellmMcpctlGatewayToken) \ pnpm test:smoke Docs: * README.md — new "Agents" section under Resources with the qwen3-thinking quickstart and links to docs/agents.md and docs/chat.md. Adds llm + agent rows to the resources table. * docs/agents.md (new) — full reference: data model, chat-parameter table, HTTP API, RBAC mapping, tool-use loop semantics, yaml round-trip shorthand, the kubernetes-deployment wiring recipe, and a troubleshooting section (namespace collision, llm-in-use, pending-row recovery, Anthropic-tool limitation). * docs/chat.md (new) — user-facing `mcpctl chat` walkthrough: modes, per-call flags, slash-commands, threads, and a troubleshooting section. * CLAUDE.md — adds a "Resource types" cheatsheet with one-line pointers to each, including the new `agent` row that links to the docs. All suites still green: mcpd 759/759, mcplocal 715/715, cli 430/430. Smoke tests typecheck and self-skip when no live mcpd is reachable. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-04-25 17:08:37 +01:00			`/**`
			`* Live-LLM smoke for agent chat.`
			`*`
			`* Runs only when MCPCTL_SMOKE_LLM_URL + MCPCTL_SMOKE_LLM_KEY are set; the`
			`* idea is to point this at a real OpenAI-compatible endpoint and confirm`
			`* the openai-passthrough adapter delivers the user's message and returns`
			`* an assistant reply. For the project's qwen3-thinking deployment:`
			`*`
			`* MCPCTL_SMOKE_LLM_URL=http://litellm.nvidia-nim.svc.cluster.local:4000/v1 \`
			`* MCPCTL_SMOKE_LLM_MODEL=qwen3-thinking \`
			`* MCPCTL_SMOKE_LLM_KEY=sk-... \`
			`* pnpm test:smoke`
			`*`
			`* If the env vars are missing the test self-skips without failing the`
			`* pipeline (the agent CRUD smoke still runs in agent.smoke.test.ts).`
			`*/`
			`import { describe, it, expect, beforeAll, afterAll } from 'vitest';`
			`import http from 'node:http';`
			`import https from 'node:https';`
			`import { execSync } from 'node:child_process';`

			`const MCPD_URL = process.env.MCPD_URL ?? 'https://mcpctl.ad.itaz.eu';`
			`const LLM_URL = process.env.MCPCTL_SMOKE_LLM_URL;`
			`const LLM_MODEL = process.env.MCPCTL_SMOKE_LLM_MODEL ?? 'qwen3-thinking';`
			`const LLM_KEY = process.env.MCPCTL_SMOKE_LLM_KEY;`
			`const SUFFIX = Date.now().toString(36);`
			const SECRET_NAME = `smoke-chat-sec-${SUFFIX}`;
			const LLM_NAME = `smoke-chat-llm-${SUFFIX}`;
			const AGENT_NAME = `smoke-chat-agent-${SUFFIX}`;

			`interface CliResult { code: number; stdout: string; stderr: string }`

			`function run(args: string): CliResult {`
			`try {`
			const stdout = execSync(`mcpctl --direct ${args}`, {
			`encoding: 'utf-8',`
			`timeout: 60_000,`
			`stdio: ['ignore', 'pipe', 'pipe'],`
			`});`
			`return { code: 0, stdout: stdout.trim(), stderr: '' };`
			`} catch (err) {`
			`const e = err as { status?: number; stdout?: Buffer \| string; stderr?: Buffer \| string };`
			`return {`
			`code: e.status ?? 1,`
			`stdout: e.stdout ? (typeof e.stdout === 'string' ? e.stdout : e.stdout.toString('utf-8')) : '',`
			`stderr: e.stderr ? (typeof e.stderr === 'string' ? e.stderr : e.stderr.toString('utf-8')) : '',`
			`};`
			`}`
			`}`

			`function healthz(url: string, timeoutMs = 5000): Promise<boolean> {`
			`return new Promise((resolve) => {`
			const parsed = new URL(`${url.replace(/\/$/, '')}/healthz`);
			`const driver = parsed.protocol === 'https:' ? https : http;`
			`const req = driver.get({`
			`hostname: parsed.hostname,`
			`port: parsed.port \|\| (parsed.protocol === 'https:' ? 443 : 80),`
			`path: parsed.pathname,`
			`timeout: timeoutMs,`
			`}, (res) => { resolve((res.statusCode ?? 500) < 500); res.resume(); });`
			`req.on('error', () => resolve(false));`
			`req.on('timeout', () => { req.destroy(); resolve(false); });`
			`});`
			`}`

			`let mcpdUp = false;`
			`const liveLlmConfigured = LLM_URL !== undefined && LLM_KEY !== undefined;`

			`describe('agent chat smoke (live LLM)', () => {`
			`beforeAll(async () => {`
			`if (!liveLlmConfigured) {`
			`// eslint-disable-next-line no-console`
			`console.warn('\n ○ agent-chat smoke: skipped — set MCPCTL_SMOKE_LLM_URL + MCPCTL_SMOKE_LLM_KEY to run against a real LLM.\n');`
			`return;`
			`}`
			`mcpdUp = await healthz(MCPD_URL);`
			`if (!mcpdUp) {`
			`// eslint-disable-next-line no-console`
			console.warn(`\n ○ agent-chat smoke: skipped — ${MCPD_URL}/healthz unreachable.\n`);
			`}`
			`}, 20_000);`

			`afterAll(() => {`
			`if (!liveLlmConfigured \|\| !mcpdUp) return;`
			run(`delete agent ${AGENT_NAME}`);
			run(`delete llm ${LLM_NAME}`);
			run(`delete secret ${SECRET_NAME}`);
			`});`

			`it('provisions secret + Llm + agent against the live endpoint', () => {`
			`if (!liveLlmConfigured \|\| !mcpdUp) return;`
			run(`delete secret ${SECRET_NAME}`);
			run(`delete llm ${LLM_NAME}`);
			run(`delete agent ${AGENT_NAME}`);
			const sec = run(`create secret ${SECRET_NAME} --data API_KEY=${LLM_KEY!}`);
			`expect(sec.code, sec.stderr).toBe(0);`
			`const llm = run([`
			`create llm ${LLM_NAME}`,
			`'--type openai',`
			`--model ${LLM_MODEL}`,
			`--url ${LLM_URL!}`,
			`--api-key-ref ${SECRET_NAME}/API_KEY`,
			`].join(' '));`
			`expect(llm.code, llm.stderr).toBe(0);`
			`const agent = run([`
			`create agent ${AGENT_NAME}`,
			`--llm ${LLM_NAME}`,
			`--description "live chat smoke"`,
			`--system-prompt "You are a smoke test. Always reply with the single token READY."`,
			`'--default-temperature 0',`
			`'--default-max-tokens 32',`
			`].join(' '));`
			`expect(agent.code, agent.stderr).toBe(0);`
			`});`

			it('one-shot `mcpctl chat` sends a message and prints a reply', () => {
			`if (!liveLlmConfigured \|\| !mcpdUp) return;`
			const result = run(`chat ${AGENT_NAME} -m "ping" --no-stream`);
			`expect(result.code, result.stderr).toBe(0);`
			`expect(result.stdout.length).toBeGreaterThan(0);`
			`// We can't bind too tightly to model output but the system prompt nudges`
			`// toward "READY". Either way: we got a reply.`
			`expect(result.stderr).toMatch(/thread:\s+c[a-z0-9]+/);`
			`});`

			it('streaming `mcpctl chat` emits text deltas', () => {
			`if (!liveLlmConfigured \|\| !mcpdUp) return;`
			`// Default mode is streaming. Pipe stdout/stderr separately.`
			`let stdout = '';`
			`let stderr = '';`
			`try {`
			const out = execSync(`mcpctl --direct chat ${AGENT_NAME} -m "say hello" 2> /tmp/agent-smoke-err`, {
			`encoding: 'utf-8', timeout: 60_000,`
			`});`
			`stdout = out;`
			`} catch (err) {`
			`const e = err as { status?: number; stdout?: Buffer \| string };`
			`stdout = e.stdout ? (typeof e.stdout === 'string' ? e.stdout : e.stdout.toString('utf-8')) : '';`
			`}`
			`try {`
			`// eslint-disable-next-line @typescript-eslint/no-require-imports`
			`const fs = require('node:fs') as typeof import('node:fs');`
			`stderr = fs.readFileSync('/tmp/agent-smoke-err', 'utf-8');`
			`fs.unlinkSync('/tmp/agent-smoke-err');`
			`} catch { /* ignore */ }`
			`expect(stdout.length).toBeGreaterThan(0);`
			`expect(stderr).toMatch(/thread:\s+c[a-z0-9]+/);`
			`});`
			`});`