feat(mcpd): inference proxy — POST /api/v1/llms/:name/infer
Why: the point of the Llm resource (Phase 1) is that credentials never leave
the server. This lands the proxy: clients POST OpenAI chat/completions to
mcpd, mcpd attaches the provider API key server-side, and the response
streams back as OpenAI-format SSE.
Design:
- Wire format client-side is always OpenAI chat/completions — every existing
SDK speaks it. Adapters translate on the provider side.
- `openai | vllm | deepseek | ollama` → pure passthrough (they already speak
OpenAI). `anthropic` → translator to/from Anthropic Messages API
(system-string extraction, content-block flattening, SSE event remap).
- Plain fetch; no @anthropic-ai/sdk dep. Consistent with the OpenBao driver
shape and keeps the proxy layer thin.
- `gemini-cli` intentionally rejected — subprocess providers need extra
lifecycle plumbing; deferred to a follow-up.
- Streaming: adapters yield `StreamingChunk`s; the route frames them as
`data: <json>\n\n` + terminal `data: [DONE]\n\n` so any OpenAI client
works unchanged.
RBAC:
- New URL special-case in mapUrlToPermission: `POST /api/v1/llms/:name/infer`
→ `run:llms:<name>` (not the default create:llms). Users need an explicit
`{role: 'run', resource: 'llms', [name: X]}` binding to call infer.
- Possession of `edit:llms` does NOT imply `run` — keeps catalogue
management separate from spend.
Audit: route emits an `llm_inference_call` event per request (llm name,
model, user/tokenSha, streaming, duration, status). main.ts wires it to the
structured logger for now; hook is in place for a richer audit sink later.
Tests:
- 11 adapter tests (passthrough POST shape + default URLs + no-auth ollama +
SSE forwarding; anthropic translate request/response + non-2xx wrap + SSE
event translation; registry dispatch + caching + unsupported-provider).
- 7 route tests (404, 400, non-streaming dispatch + audit, apiKey failure,
null apiKeyRef path, streaming SSE output, 502 on adapter error).
- Full suite 1830/1830 (+18 from Phase 1's 1812). TypeScript clean.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 22:43:55 +01:00
|
|
|
import { describe, it, expect, vi } from 'vitest';
|
|
|
|
|
import { OpenAiPassthroughAdapter } from '../src/services/llm/adapters/openai-passthrough.js';
|
|
|
|
|
import { AnthropicAdapter } from '../src/services/llm/adapters/anthropic.js';
|
|
|
|
|
import { LlmAdapterRegistry, UnsupportedProviderError } from '../src/services/llm/dispatcher.js';
|
|
|
|
|
import type { InferContext } from '../src/services/llm/types.js';
|
|
|
|
|
|
|
|
|
|
function mockFetch(responses: Array<{ match: RegExp; status: number; body?: unknown; text?: string }>): ReturnType<typeof vi.fn> {
|
|
|
|
|
return vi.fn(async (input: string | URL, _init?: RequestInit) => {
|
|
|
|
|
const url = String(input);
|
|
|
|
|
const match = responses.find((r) => r.match.test(url));
|
|
|
|
|
if (!match) throw new Error(`unexpected fetch: ${url}`);
|
|
|
|
|
const body = match.body !== undefined ? JSON.stringify(match.body) : (match.text ?? '');
|
|
|
|
|
return new Response(body, { status: match.status, headers: { 'Content-Type': 'application/json' } });
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function makeCtx(overrides: Partial<InferContext> = {}): InferContext {
|
|
|
|
|
return {
|
|
|
|
|
body: { model: '', messages: [{ role: 'user', content: 'hello' }] },
|
|
|
|
|
modelOverride: 'default-model',
|
|
|
|
|
apiKey: 'test-key',
|
|
|
|
|
url: '',
|
|
|
|
|
extraConfig: {},
|
|
|
|
|
...overrides,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Helper to build a streaming Response from SSE lines.
|
|
|
|
|
function sseResponse(events: string[]): Response {
|
|
|
|
|
const body = events.join('\n\n') + '\n\n';
|
|
|
|
|
const stream = new ReadableStream<Uint8Array>({
|
|
|
|
|
start(controller) {
|
|
|
|
|
controller.enqueue(new TextEncoder().encode(body));
|
|
|
|
|
controller.close();
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
return new Response(stream, { status: 200, headers: { 'Content-Type': 'text/event-stream' } });
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
describe('OpenAiPassthroughAdapter', () => {
|
|
|
|
|
it('infer: POSTs to <url>/v1/chat/completions with Authorization + body', async () => {
|
|
|
|
|
const fetchFn = mockFetch([{
|
|
|
|
|
match: /\/v1\/chat\/completions$/,
|
|
|
|
|
status: 200,
|
|
|
|
|
body: { id: 'x', choices: [{ message: { role: 'assistant', content: 'hi' } }] },
|
|
|
|
|
}]);
|
|
|
|
|
const adapter = new OpenAiPassthroughAdapter('openai', { fetch: fetchFn as unknown as typeof fetch });
|
|
|
|
|
const ctx = makeCtx({ url: 'https://api.example.com' });
|
|
|
|
|
const res = await adapter.infer(ctx);
|
|
|
|
|
expect(res.status).toBe(200);
|
|
|
|
|
const [url, init] = fetchFn.mock.calls[0] as [string, RequestInit];
|
|
|
|
|
expect(url).toBe('https://api.example.com/v1/chat/completions');
|
|
|
|
|
expect(init.method).toBe('POST');
|
|
|
|
|
const headers = init.headers as Record<string, string>;
|
|
|
|
|
expect(headers['Authorization']).toBe('Bearer test-key');
|
|
|
|
|
const sent = JSON.parse(init.body as string) as { model: string; stream: boolean };
|
|
|
|
|
expect(sent.model).toBe('default-model'); // filled from modelOverride
|
|
|
|
|
expect(sent.stream).toBe(false);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('infer: uses default URL for openai when url is empty', async () => {
|
|
|
|
|
const fetchFn = mockFetch([{ match: /api\.openai\.com/, status: 200, body: {} }]);
|
|
|
|
|
const adapter = new OpenAiPassthroughAdapter('openai', { fetch: fetchFn as unknown as typeof fetch });
|
|
|
|
|
await adapter.infer(makeCtx());
|
|
|
|
|
const [url] = fetchFn.mock.calls[0] as [string, RequestInit];
|
|
|
|
|
expect(url).toBe('https://api.openai.com/v1/chat/completions');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('infer: throws for vllm when url is empty (no default)', async () => {
|
|
|
|
|
const adapter = new OpenAiPassthroughAdapter('vllm', { fetch: vi.fn() as unknown as typeof fetch });
|
|
|
|
|
await expect(adapter.infer(makeCtx())).rejects.toThrow(/no default endpoint/);
|
|
|
|
|
});
|
|
|
|
|
|
fix(chat): real fixes for thinking-model + URL conventions, not test tweaks
Five real bugs surfaced by the agent-chat smoke against live
qwen3-thinking. None of these are fixed by changing the test — the
test was right to fail.
1. openai-passthrough adapter doubled `/v1` in the request URL. The
adapter hard-codes `/v1/chat/completions` after the configured base,
but every OpenAI-compat provider documents its base URL with a
trailing `/v1` (api.openai.com/v1, llm.example.com/v1, …). Users
pasting that conventional shape produced
`https://x/v1/v1/chat/completions` → 404. endpointUrl now strips a
trailing `/v1` so both forms canonicalize. `/v1beta` (Anthropic-style)
is preserved.
2. Non-streaming chat returned an empty assistant when thinking models
(qwen3-thinking, deepseek-reasoner, OpenAI o1) emitted only
`reasoning_content` with `content: null`. extractChoice now also
pulls reasoning (every spelling the streaming parser already knows
about), and a new pickAssistantText helper falls back to it when
content is empty. A `[response truncated by max_tokens]` marker is
appended when finish_reason is `length`, so users see the cut-off
instead of guessing why the answer is short. Symmetric streaming
fix: the chatStream loop accumulates reasoning and yields ONE
synthesized `text` frame at the end when content stayed empty,
keeping the CLI's stdout (which only prints `text` deltas) in sync
with the persisted thread message.
3. `mcpctl get agent X -o yaml` emitted `kind: public` (the v3
lifecycle field) instead of `kind: agent` (apply envelope), so
round-tripping through `apply -f` failed. Same fix shape as the v1
Llm strip in toApplyDocs — drop kind/status/lastHeartbeatAt/
inactiveSince/providerSessionId for the agents resource too.
4. Non-streaming `mcpctl chat` printed `thread:<cuid>` (no space) on
stderr; streaming printed `(thread: <cuid>)` (with space). Tests
and any other regex watching for one form missed the other.
Standardize on `thread: <cuid>` (single space) in both paths.
5. agent-chat.smoke's `run()` used `execSync`, which discards stderr on
success — making any `expect(stderr).toMatch(...)` assertion
structurally impossible to satisfy in the happy path. Switch to
`spawnSync` so stderr is actually captured. Includes a small
shell-style argv splitter so the existing call sites with quoted
multi-word values (`--system-prompt "..."`) keep working.
Tests: +6 new mcpd unit tests (4 chat-service for the reasoning
fallback / truncation marker / content-preference / streaming synth;
2 llm-adapters for the URL strip + /v1beta preservation). Full mcpd
+ mcplocal + smoke green: 860/860 + 723/723 + 139/139.
2026-04-27 18:39:01 +01:00
|
|
|
it('infer: strips a trailing /v1 from the configured URL', async () => {
|
|
|
|
|
// Users naturally paste the OpenAI-style base URL with /v1 because
|
|
|
|
|
// every provider documents it that way (https://api.openai.com/v1,
|
|
|
|
|
// https://llm.example.com/v1). The adapter then re-appends
|
|
|
|
|
// /v1/chat/completions; without normalization this would produce a
|
|
|
|
|
// doubled-/v1 404 against LiteLLM and friends.
|
|
|
|
|
const fetchFn = mockFetch([{ match: /\/v1\/chat\/completions$/, status: 200, body: {} }]);
|
|
|
|
|
const adapter = new OpenAiPassthroughAdapter('openai', { fetch: fetchFn as unknown as typeof fetch });
|
|
|
|
|
await adapter.infer(makeCtx({ url: 'https://llm.example.com/v1' }));
|
|
|
|
|
const [url1] = fetchFn.mock.calls[0] as [string];
|
|
|
|
|
expect(url1).toBe('https://llm.example.com/v1/chat/completions');
|
|
|
|
|
|
|
|
|
|
// Trailing slash + /v1 should also normalize correctly.
|
|
|
|
|
const fetchFn2 = mockFetch([{ match: /\/v1\/chat\/completions$/, status: 200, body: {} }]);
|
|
|
|
|
const adapter2 = new OpenAiPassthroughAdapter('openai', { fetch: fetchFn2 as unknown as typeof fetch });
|
|
|
|
|
await adapter2.infer(makeCtx({ url: 'https://llm.example.com/v1/' }));
|
|
|
|
|
const [url2] = fetchFn2.mock.calls[0] as [string];
|
|
|
|
|
expect(url2).toBe('https://llm.example.com/v1/chat/completions');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('infer: preserves a trailing /v1beta suffix (only exact /v1 is stripped)', async () => {
|
|
|
|
|
// Some providers expose `/v1beta` as a parallel API surface — don't
|
|
|
|
|
// accidentally rewrite that to `/v1` or strip it.
|
|
|
|
|
const fetchFn = mockFetch([{ match: /\/v1beta\/v1\/chat\/completions$/, status: 200, body: {} }]);
|
|
|
|
|
const adapter = new OpenAiPassthroughAdapter('openai', { fetch: fetchFn as unknown as typeof fetch });
|
|
|
|
|
await adapter.infer(makeCtx({ url: 'https://api.example.com/v1beta' }));
|
|
|
|
|
const [url] = fetchFn.mock.calls[0] as [string];
|
|
|
|
|
expect(url).toBe('https://api.example.com/v1beta/v1/chat/completions');
|
|
|
|
|
});
|
|
|
|
|
|
feat(mcpd): inference proxy — POST /api/v1/llms/:name/infer
Why: the point of the Llm resource (Phase 1) is that credentials never leave
the server. This lands the proxy: clients POST OpenAI chat/completions to
mcpd, mcpd attaches the provider API key server-side, and the response
streams back as OpenAI-format SSE.
Design:
- Wire format client-side is always OpenAI chat/completions — every existing
SDK speaks it. Adapters translate on the provider side.
- `openai | vllm | deepseek | ollama` → pure passthrough (they already speak
OpenAI). `anthropic` → translator to/from Anthropic Messages API
(system-string extraction, content-block flattening, SSE event remap).
- Plain fetch; no @anthropic-ai/sdk dep. Consistent with the OpenBao driver
shape and keeps the proxy layer thin.
- `gemini-cli` intentionally rejected — subprocess providers need extra
lifecycle plumbing; deferred to a follow-up.
- Streaming: adapters yield `StreamingChunk`s; the route frames them as
`data: <json>\n\n` + terminal `data: [DONE]\n\n` so any OpenAI client
works unchanged.
RBAC:
- New URL special-case in mapUrlToPermission: `POST /api/v1/llms/:name/infer`
→ `run:llms:<name>` (not the default create:llms). Users need an explicit
`{role: 'run', resource: 'llms', [name: X]}` binding to call infer.
- Possession of `edit:llms` does NOT imply `run` — keeps catalogue
management separate from spend.
Audit: route emits an `llm_inference_call` event per request (llm name,
model, user/tokenSha, streaming, duration, status). main.ts wires it to the
structured logger for now; hook is in place for a richer audit sink later.
Tests:
- 11 adapter tests (passthrough POST shape + default URLs + no-auth ollama +
SSE forwarding; anthropic translate request/response + non-2xx wrap + SSE
event translation; registry dispatch + caching + unsupported-provider).
- 7 route tests (404, 400, non-streaming dispatch + audit, apiKey failure,
null apiKeyRef path, streaming SSE output, 502 on adapter error).
- Full suite 1830/1830 (+18 from Phase 1's 1812). TypeScript clean.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 22:43:55 +01:00
|
|
|
it('infer: omits Authorization when apiKey is empty', async () => {
|
|
|
|
|
const fetchFn = mockFetch([{ match: /ollama/, status: 200, body: {} }]);
|
|
|
|
|
const adapter = new OpenAiPassthroughAdapter('ollama', { fetch: fetchFn as unknown as typeof fetch });
|
|
|
|
|
await adapter.infer(makeCtx({ url: 'http://ollama:11434', apiKey: '' }));
|
|
|
|
|
const [, init] = fetchFn.mock.calls[0] as [string, RequestInit];
|
|
|
|
|
const headers = init.headers as Record<string, string>;
|
|
|
|
|
expect(headers['Authorization']).toBeUndefined();
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('stream: forwards SSE chunks and emits terminal [DONE]', async () => {
|
|
|
|
|
const fetchFn = vi.fn(async () => sseResponse([
|
|
|
|
|
'data: {"choices":[{"delta":{"content":"hi"}}]}',
|
|
|
|
|
'data: {"choices":[{"delta":{"content":"!"}}]}',
|
|
|
|
|
'data: [DONE]',
|
|
|
|
|
]));
|
|
|
|
|
const adapter = new OpenAiPassthroughAdapter('openai', { fetch: fetchFn as unknown as typeof fetch });
|
|
|
|
|
const ctx = makeCtx({ url: 'http://example', body: { model: '', messages: [], stream: true } });
|
|
|
|
|
const chunks: { data: string; done?: boolean }[] = [];
|
|
|
|
|
for await (const c of adapter.stream(ctx)) chunks.push(c);
|
|
|
|
|
expect(chunks).toHaveLength(3);
|
|
|
|
|
expect(chunks[2]?.done).toBe(true);
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
describe('AnthropicAdapter', () => {
|
|
|
|
|
it('infer: translates system+user messages, posts to /v1/messages', async () => {
|
|
|
|
|
const fetchFn = mockFetch([{
|
|
|
|
|
match: /\/v1\/messages$/,
|
|
|
|
|
status: 200,
|
|
|
|
|
body: {
|
|
|
|
|
id: 'msg_01', model: 'claude-3-5-sonnet-20241022', role: 'assistant',
|
|
|
|
|
content: [{ type: 'text', text: 'howdy' }],
|
|
|
|
|
stop_reason: 'end_turn',
|
|
|
|
|
usage: { input_tokens: 5, output_tokens: 2 },
|
|
|
|
|
},
|
|
|
|
|
}]);
|
|
|
|
|
const adapter = new AnthropicAdapter({ fetch: fetchFn as unknown as typeof fetch });
|
|
|
|
|
const ctx = makeCtx({
|
|
|
|
|
body: {
|
|
|
|
|
model: '', messages: [
|
|
|
|
|
{ role: 'system', content: 'be nice' },
|
|
|
|
|
{ role: 'user', content: 'hi' },
|
|
|
|
|
],
|
|
|
|
|
},
|
|
|
|
|
modelOverride: 'claude-3-5-sonnet-20241022',
|
|
|
|
|
});
|
|
|
|
|
const res = await adapter.infer(ctx);
|
|
|
|
|
expect(res.status).toBe(200);
|
|
|
|
|
|
|
|
|
|
const [url, init] = fetchFn.mock.calls[0] as [string, RequestInit];
|
|
|
|
|
expect(url).toBe('https://api.anthropic.com/v1/messages');
|
|
|
|
|
const headers = init.headers as Record<string, string>;
|
|
|
|
|
expect(headers['x-api-key']).toBe('test-key');
|
|
|
|
|
expect(headers['anthropic-version']).toBeDefined();
|
|
|
|
|
|
|
|
|
|
const sent = JSON.parse(init.body as string) as {
|
|
|
|
|
model: string; system: string; messages: Array<{ role: string; content: string }>; max_tokens: number;
|
|
|
|
|
};
|
|
|
|
|
expect(sent.model).toBe('claude-3-5-sonnet-20241022');
|
|
|
|
|
expect(sent.system).toBe('be nice');
|
|
|
|
|
expect(sent.messages).toEqual([{ role: 'user', content: 'hi' }]);
|
|
|
|
|
expect(sent.max_tokens).toBe(1024); // default
|
|
|
|
|
|
|
|
|
|
// Response shape: OpenAI chat.completion
|
|
|
|
|
const body = res.body as { choices: Array<{ message: { content: string }; finish_reason: string }>; usage: { total_tokens: number } };
|
|
|
|
|
expect(body.choices[0]!.message.content).toBe('howdy');
|
|
|
|
|
expect(body.choices[0]!.finish_reason).toBe('stop');
|
|
|
|
|
expect(body.usage.total_tokens).toBe(7);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('infer: returns a synthetic error body on non-2xx', async () => {
|
|
|
|
|
const fetchFn = vi.fn(async () => new Response('boom', { status: 500 }));
|
|
|
|
|
const adapter = new AnthropicAdapter({ fetch: fetchFn as unknown as typeof fetch });
|
|
|
|
|
const res = await adapter.infer(makeCtx({ body: { model: '', messages: [{ role: 'user', content: 'x' }] } }));
|
|
|
|
|
expect(res.status).toBe(500);
|
|
|
|
|
const body = res.body as { error: { message: string } };
|
|
|
|
|
expect(body.error.message).toMatch(/HTTP 500/);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('stream: translates anthropic event stream into OpenAI chunks', async () => {
|
|
|
|
|
const events = [
|
|
|
|
|
'event: message_start\ndata: {"type":"message_start","message":{"id":"m","content":[]}}',
|
|
|
|
|
'event: content_block_delta\ndata: {"type":"content_block_delta","delta":{"type":"text_delta","text":"he"}}',
|
|
|
|
|
'event: content_block_delta\ndata: {"type":"content_block_delta","delta":{"type":"text_delta","text":"llo"}}',
|
|
|
|
|
'event: message_delta\ndata: {"type":"message_delta","delta":{"stop_reason":"end_turn"}}',
|
|
|
|
|
'event: message_stop\ndata: {"type":"message_stop"}',
|
|
|
|
|
];
|
|
|
|
|
const fetchFn = vi.fn(async () => sseResponse(events));
|
|
|
|
|
const adapter = new AnthropicAdapter({ fetch: fetchFn as unknown as typeof fetch });
|
|
|
|
|
const ctx = makeCtx({ body: { model: '', messages: [{ role: 'user', content: 'hi' }], stream: true } });
|
|
|
|
|
|
|
|
|
|
const chunks: { data: string; done?: boolean }[] = [];
|
|
|
|
|
for await (const c of adapter.stream(ctx)) chunks.push(c);
|
|
|
|
|
|
|
|
|
|
// Expect: role-prime, two text deltas, finish-reason, [DONE]
|
|
|
|
|
expect(chunks[chunks.length - 1]?.data).toBe('[DONE]');
|
|
|
|
|
expect(chunks[chunks.length - 1]?.done).toBe(true);
|
|
|
|
|
|
|
|
|
|
// First chunk is the role-prime (role: assistant, content: '').
|
|
|
|
|
const first = JSON.parse(chunks[0]!.data) as { choices: [{ delta: { role: string; content: string } }] };
|
|
|
|
|
expect(first.choices[0]!.delta.role).toBe('assistant');
|
|
|
|
|
|
|
|
|
|
// Next two chunks carry the text.
|
|
|
|
|
const d1 = JSON.parse(chunks[1]!.data) as { choices: [{ delta: { content: string } }] };
|
|
|
|
|
const d2 = JSON.parse(chunks[2]!.data) as { choices: [{ delta: { content: string } }] };
|
|
|
|
|
expect(d1.choices[0]!.delta.content).toBe('he');
|
|
|
|
|
expect(d2.choices[0]!.delta.content).toBe('llo');
|
|
|
|
|
|
|
|
|
|
// Finish-reason chunk.
|
|
|
|
|
const stopped = JSON.parse(chunks[3]!.data) as { choices: [{ finish_reason: string }] };
|
|
|
|
|
expect(stopped.choices[0]!.finish_reason).toBe('stop');
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
describe('LlmAdapterRegistry', () => {
|
|
|
|
|
it('returns the right adapter kind for each type', () => {
|
|
|
|
|
const reg = new LlmAdapterRegistry();
|
|
|
|
|
expect(reg.get('openai').kind).toBe('openai');
|
|
|
|
|
expect(reg.get('vllm').kind).toBe('vllm');
|
|
|
|
|
expect(reg.get('deepseek').kind).toBe('deepseek');
|
|
|
|
|
expect(reg.get('ollama').kind).toBe('ollama');
|
|
|
|
|
expect(reg.get('anthropic').kind).toBe('anthropic');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('caches adapters between calls', () => {
|
|
|
|
|
const reg = new LlmAdapterRegistry();
|
|
|
|
|
const a = reg.get('openai');
|
|
|
|
|
const b = reg.get('openai');
|
|
|
|
|
expect(a).toBe(b);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('rejects unsupported providers (gemini-cli is deferred)', () => {
|
|
|
|
|
const reg = new LlmAdapterRegistry();
|
|
|
|
|
expect(() => reg.get('gemini-cli')).toThrow(UnsupportedProviderError);
|
|
|
|
|
expect(() => reg.get('bogus')).toThrow(UnsupportedProviderError);
|
|
|
|
|
});
|
|
|
|
|
});
|
feat(llm): probe upstream auth at registration time
mcpd now runs a cheap auth probe whenever an Llm is created (or its
apiKeyRef/url is updated). Catches misconfigured tokens / wrong URLs at
registration with a 422 + structured error message, instead of silently
500-ing on first chat with a generic "fetch failed". Caught in the wild
today: the homelab Pulumi config exposed `MCPCTL_GATEWAY_TOKEN` (which
is mcpctl_pat_-prefixed, intended for LiteLLM→mcplocal direction) where
LiteLLM expects `LITELLM_MASTER_KEY` (sk-prefixed). The probe makes
this immediate.
Probe shape (LlmAdapter.verifyAuth):
- OpenAI passthrough → GET <url>/v1/models. Cheap, idempotent, gated
by the same auth as chat/completions.
- Anthropic → POST /v1/messages with max_tokens:1, "ping". Anthropic
has no list-models endpoint; this is the cheapest auth-exercising
call.
- Returns one of:
{ ok: true }
{ ok: false, reason: "auth", status, body } — 401/403, fail hard
{ ok: false, reason: "unreachable", error } — network, warn-only
{ ok: false, reason: "unexpected", status, body } — non-auth 4xx, warn-only
Behavior:
- LlmService.create()/update() runs the probe after resolveApiKey.
Throws LlmAuthVerificationError on `auth`, logs warn for
unreachable/unexpected, swallows for offline registration.
- Probe is skipped when there's no apiKeyRef (nothing to verify) or
when the caller passes skipAuthCheck=true.
- update() probes only when apiKeyRef OR url changes — pure
description/tier updates don't trigger upstream calls.
- Routes catch LlmAuthVerificationError and return 422 with
`{ error, status }`. The CLI surfaces the message verbatim via
ApiError.
Opt-out:
- CLI: `mcpctl create llm ... --skip-auth-check` for offline
registration before the upstream is reachable.
- HTTP: side-channel body field `_skipAuthCheck: true` (stripped
before validation, never persisted on the row).
Side fix in same commit (caught while testing): src/cli/src/index.ts
read `program.opts()` BEFORE `program.parse()`, so `--direct` was a
no-op for ApiClient — every command went to mcplocal regardless. Some
commands accidentally still worked because mcplocal forwards plain
`/api/v1/*` to mcpd, but flows that need direct SSE streaming (e.g.
`mcpctl chat`) couldn't reach mcpd. Fixed by peeking at process.argv
directly for the two global flags before Commander's parse runs.
Tests:
- llm-adapters.test.ts (+8): OpenAI 200/401/403/404/network, Anthropic
200/401/400 (typo'd model = unexpected, NOT auth — registration
shouldn't block on bad model names that surface at chat time).
- llm-service.test.ts (+6): create-throws-on-auth-fail (no row
written), warn-only on unreachable/unexpected, skipAuthCheck
bypass, no-key skip, update-only-probes-on-auth-affecting-change.
mcpd 775/775, mcplocal 715/715, cli 430/430.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-26 16:51:55 +01:00
|
|
|
|
|
|
|
|
describe('verifyAuth — registration-time probe', () => {
|
|
|
|
|
it('OpenAI passthrough: 200 from /v1/models → ok', async () => {
|
|
|
|
|
const fetchImpl = mockFetch([
|
|
|
|
|
{ match: /\/v1\/models$/, status: 200, body: { data: [{ id: 'gpt-4o-mini' }] } },
|
|
|
|
|
]);
|
|
|
|
|
const adapter = new OpenAiPassthroughAdapter('openai', { fetch: fetchImpl as unknown as typeof fetch });
|
|
|
|
|
const result = await adapter.verifyAuth(makeCtx({ url: 'http://lite:4000', apiKey: 'sk-good' }));
|
|
|
|
|
expect(result).toEqual({ ok: true });
|
|
|
|
|
expect(fetchImpl).toHaveBeenCalledWith('http://lite:4000/v1/models', expect.objectContaining({ method: 'GET' }));
|
|
|
|
|
const callInit = fetchImpl.mock.calls[0][1] as RequestInit;
|
|
|
|
|
expect((callInit.headers as Record<string, string>)['Authorization']).toBe('Bearer sk-good');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('OpenAI passthrough: 401 → reason=auth (caller throws)', async () => {
|
|
|
|
|
const fetchImpl = mockFetch([
|
|
|
|
|
{ match: /\/v1\/models$/, status: 401, text: '{"error":"invalid_api_key"}' },
|
|
|
|
|
]);
|
|
|
|
|
const adapter = new OpenAiPassthroughAdapter('openai', { fetch: fetchImpl as unknown as typeof fetch });
|
|
|
|
|
const result = await adapter.verifyAuth(makeCtx({ url: 'http://lite:4000', apiKey: 'sk-bad' }));
|
|
|
|
|
expect(result.ok).toBe(false);
|
|
|
|
|
if (!result.ok) {
|
|
|
|
|
expect(result.reason).toBe('auth');
|
|
|
|
|
if (result.reason === 'auth') {
|
|
|
|
|
expect(result.status).toBe(401);
|
|
|
|
|
expect(result.body).toContain('invalid_api_key');
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('OpenAI passthrough: 403 → reason=auth', async () => {
|
|
|
|
|
const fetchImpl = mockFetch([
|
|
|
|
|
{ match: /\/v1\/models$/, status: 403, text: 'forbidden' },
|
|
|
|
|
]);
|
|
|
|
|
const adapter = new OpenAiPassthroughAdapter('openai', { fetch: fetchImpl as unknown as typeof fetch });
|
|
|
|
|
const result = await adapter.verifyAuth(makeCtx({ url: 'http://lite:4000', apiKey: 'k' }));
|
|
|
|
|
expect(result.ok).toBe(false);
|
|
|
|
|
if (!result.ok) expect(result.reason).toBe('auth');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('OpenAI passthrough: 404 (proxy without /v1/models) → reason=unexpected (warn-only)', async () => {
|
|
|
|
|
const fetchImpl = mockFetch([
|
|
|
|
|
{ match: /\/v1\/models$/, status: 404, text: 'not found' },
|
|
|
|
|
]);
|
|
|
|
|
const adapter = new OpenAiPassthroughAdapter('openai', { fetch: fetchImpl as unknown as typeof fetch });
|
|
|
|
|
const result = await adapter.verifyAuth(makeCtx({ url: 'http://lite:4000', apiKey: 'k' }));
|
|
|
|
|
expect(result.ok).toBe(false);
|
|
|
|
|
if (!result.ok) expect(result.reason).toBe('unexpected');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('OpenAI passthrough: network error → reason=unreachable (warn-only)', async () => {
|
|
|
|
|
const fetchImpl = vi.fn(async () => { throw new Error('ECONNREFUSED 127.0.0.1:9999'); });
|
|
|
|
|
const adapter = new OpenAiPassthroughAdapter('openai', { fetch: fetchImpl as unknown as typeof fetch });
|
|
|
|
|
const result = await adapter.verifyAuth(makeCtx({ url: 'http://localhost:9999', apiKey: 'k' }));
|
|
|
|
|
expect(result.ok).toBe(false);
|
|
|
|
|
if (!result.ok) {
|
|
|
|
|
expect(result.reason).toBe('unreachable');
|
|
|
|
|
if (result.reason === 'unreachable') {
|
|
|
|
|
expect(result.error).toContain('ECONNREFUSED');
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('Anthropic: 200 from /v1/messages probe → ok', async () => {
|
|
|
|
|
const fetchImpl = mockFetch([
|
|
|
|
|
{ match: /\/v1\/messages$/, status: 200, body: { id: 'msg_x', content: [{ type: 'text', text: 'pong' }] } },
|
|
|
|
|
]);
|
|
|
|
|
const adapter = new AnthropicAdapter({ fetch: fetchImpl as unknown as typeof fetch });
|
|
|
|
|
const result = await adapter.verifyAuth(makeCtx({ url: 'https://api.anthropic.com', apiKey: 'sk-ant-good' }));
|
|
|
|
|
expect(result.ok).toBe(true);
|
|
|
|
|
const callInit = fetchImpl.mock.calls[0][1] as RequestInit;
|
|
|
|
|
expect((callInit.headers as Record<string, string>)['x-api-key']).toBe('sk-ant-good');
|
|
|
|
|
const reqBody = JSON.parse(callInit.body as string) as { max_tokens: number };
|
|
|
|
|
expect(reqBody.max_tokens).toBe(1);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('Anthropic: 401 → reason=auth', async () => {
|
|
|
|
|
const fetchImpl = mockFetch([
|
|
|
|
|
{ match: /\/v1\/messages$/, status: 401, text: '{"type":"authentication_error"}' },
|
|
|
|
|
]);
|
|
|
|
|
const adapter = new AnthropicAdapter({ fetch: fetchImpl as unknown as typeof fetch });
|
|
|
|
|
const result = await adapter.verifyAuth(makeCtx({ apiKey: 'bad' }));
|
|
|
|
|
expect(result.ok).toBe(false);
|
|
|
|
|
if (!result.ok) expect(result.reason).toBe('auth');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('Anthropic: 400 (typo\'d model) → reason=unexpected, NOT auth', async () => {
|
|
|
|
|
// Auth was fine; the request was rejected for a different reason. We
|
|
|
|
|
// don't want to block registration on bad model names — that error
|
|
|
|
|
// surfaces at chat time when the user actually picks a model.
|
|
|
|
|
const fetchImpl = mockFetch([
|
|
|
|
|
{ match: /\/v1\/messages$/, status: 400, text: '{"error":"model not found"}' },
|
|
|
|
|
]);
|
|
|
|
|
const adapter = new AnthropicAdapter({ fetch: fetchImpl as unknown as typeof fetch });
|
|
|
|
|
const result = await adapter.verifyAuth(makeCtx({ apiKey: 'sk-ant-x', modelOverride: 'claude-fake' }));
|
|
|
|
|
expect(result.ok).toBe(false);
|
|
|
|
|
if (!result.ok) expect(result.reason).toBe('unexpected');
|
|
|
|
|
});
|
|
|
|
|
});
|