Merge pull request 'feat(mcpd): Llm resource — CRUD + CLI + apply' (#52 ) from feat/llm into main

Merge pull request 'feat(mcpd): pluggable SecretBackend + OpenBao driver + migrate' (#51 ) from feat/secretbackend into main
2026-04-19 21:39:27 +00:00 · 2026-04-19 21:39:17 +00:00
28 changed files with 10 additions and 2283 deletions
--- a/completions/mcpctl.bash
+++ b/completions/mcpctl.bash
@@ -191,7 +191,7 @@ _mcpctl() {
            COMPREPLY=($(compgen -W "--type --description --default --url --namespace --mount --path-prefix --token-secret --config --force -h --help" -- "$cur"))
            ;;
          project)
-            COMPREPLY=($(compgen -W "-d --description --proxy-model --prompt --llm --llm-model --gated --no-gated --server --force -h --help" -- "$cur"))
+            COMPREPLY=($(compgen -W "-d --description --proxy-model --prompt --gated --no-gated --server --force -h --help" -- "$cur"))
            ;;
          user)
            COMPREPLY=($(compgen -W "--password --name --force -h --help" -- "$cur"))
--- a/completions/mcpctl.fish
+++ b/completions/mcpctl.fish
@@ -344,8 +344,6 @@ complete -c mcpctl -n "__mcpctl_subcmd_active create secretbackend" -l force -d
 complete -c mcpctl -n "__mcpctl_subcmd_active create project" -s d -l description -d 'Project description' -x
 complete -c mcpctl -n "__mcpctl_subcmd_active create project" -l proxy-model -d 'Plugin name (default, content-pipeline, gate, none)' -x
 complete -c mcpctl -n "__mcpctl_subcmd_active create project" -l prompt -d 'Project-level prompt / instructions for the LLM' -x
-complete -c mcpctl -n "__mcpctl_subcmd_active create project" -l llm -d 'Name of an Llm resource (see \'mcpctl get llms\'), or \'none\' to disable' -x
-complete -c mcpctl -n "__mcpctl_subcmd_active create project" -l llm-model -d 'Override the model string for this project (defaults to the Llm\'s own model)' -x
 complete -c mcpctl -n "__mcpctl_subcmd_active create project" -l gated -d '[deprecated: use --proxy-model default]'
 complete -c mcpctl -n "__mcpctl_subcmd_active create project" -l no-gated -d '[deprecated: use --proxy-model content-pipeline]'
 complete -c mcpctl -n "__mcpctl_subcmd_active create project" -l server -d 'Server name (repeat for multiple)' -x
--- a/src/cli/src/commands/apply.ts
+++ b/src/cli/src/commands/apply.ts
@@ -149,12 +149,7 @@ const ProjectSpecSchema = z.object({
  prompt: z.string().max(10000).default(''),
  proxyModel: z.string().optional(),
  gated: z.boolean().optional(),
-  // Name of an `Llm` resource (see `mcpctl get llms`), or the literal 'none'
-  // to disable LLM features for this project. Unknown names fall back to the
-  // consumer's registry default — `mcpctl describe project` will flag that.
  llmProvider: z.string().optional(),
-  // Override the model string for this project; defaults to the Llm's own
-  // model when unset.
  llmModel: z.string().optional(),
  servers: z.array(z.string()).default([]),
 });
--- a/src/cli/src/commands/create.ts
+++ b/src/cli/src/commands/create.ts
@@ -378,8 +378,6 @@ export function createCreateCommand(deps: CreateCommandDeps): Command {
    .option('-d, --description <text>', 'Project description', '')
    .option('--proxy-model <name>', 'Plugin name (default, content-pipeline, gate, none)')
    .option('--prompt <text>', 'Project-level prompt / instructions for the LLM')
-    .option('--llm <name>', "Name of an Llm resource (see 'mcpctl get llms'), or 'none' to disable")
-    .option('--llm-model <model>', 'Override the model string for this project (defaults to the Llm\'s own model)')
    .option('--gated', '[deprecated: use --proxy-model default]')
    .option('--no-gated', '[deprecated: use --proxy-model content-pipeline]')
    .option('--server <name>', 'Server name (repeat for multiple)', collect, [])
@@ -399,8 +397,6 @@ export function createCreateCommand(deps: CreateCommandDeps): Command {
      // Pass gated for backward compat with older mcpd
      if (opts.gated !== undefined) body.gated = opts.gated as boolean;
      if (opts.server.length > 0) body.servers = opts.server;
-      if (opts.llm) body.llmProvider = opts.llm;
-      if (opts.llmModel) body.llmModel = opts.llmModel;

      try {
        const project = await client.post<{ id: string; name: string }>('/api/v1/projects', body);
--- a/src/cli/src/commands/describe.ts
+++ b/src/cli/src/commands/describe.ts
@@ -137,7 +137,6 @@ function formatInstanceDetail(instance: Record<string, unknown>, inspect?: Recor
 function formatProjectDetail(
  project: Record<string, unknown>,
  prompts: Array<{ name: string; priority: number; linkTarget: string | null }> = [],
-  knownLlmNames?: Set<string>,
 ): string {
  const lines: string[] = [];
  lines.push(`=== Project: ${project.name} ===`);
@@ -152,21 +151,8 @@ function formatProjectDetail(
  lines.push('');
  lines.push('Plugin Config:');
  lines.push(`  ${pad('Plugin:', 18)}${proxyModel}`);
-  if (llmProvider) {
-    // As of Phase 4, llmProvider names a centralized Llm resource (see
-    // `mcpctl get llms`). A value like "none" disables LLM for the project;
-    // anything else that doesn't match a registered Llm falls back to the
-    // registry default on consumers — flag it so operators notice.
-    const resolvable = knownLlmNames === undefined
-      || llmProvider === 'none'
-      || knownLlmNames.has(llmProvider);
-    if (resolvable) {
-      lines.push(`  ${pad('LLM:', 18)}${llmProvider}`);
-    } else {
-      lines.push(`  ${pad('LLM:', 18)}${llmProvider}  [warning: no Llm registered with this name — will fall back to registry default]`);
-    }
-  }
-  if (llmModel) lines.push(`  ${pad('LLM Model:', 18)}${llmModel} (override)`);
+  if (llmProvider) lines.push(`  ${pad('LLM Provider:', 18)}${llmProvider}`);
+  if (llmModel) lines.push(`  ${pad('LLM Model:', 18)}${llmModel}`);

  // Servers section
  const servers = project.servers as Array<{ server: { name: string } }> | undefined;
@@ -901,16 +887,10 @@ export function createDescribeCommand(deps: DescribeCommandDeps): Command {
            deps.log(formatLlmDetail(item));
            break;
          case 'projects': {
-            const [projectPrompts, llms] = await Promise.all([
-              deps.client
+            const projectPrompts = await deps.client
              .get<Array<{ name: string; priority: number; linkTarget: string | null }>>(`/api/v1/prompts?projectId=${item.id as string}`)
-                .catch(() => []),
-              deps.client
-                .get<Array<{ name: string }>>('/api/v1/llms')
-                .catch(() => [] as Array<{ name: string }>),
-            ]);
-            const llmNames = new Set(llms.map((l) => l.name));
-            deps.log(formatProjectDetail(item, projectPrompts, llmNames));
+              .catch(() => []);
+            deps.log(formatProjectDetail(item, projectPrompts));
            break;
          }
          case 'users': {
--- a/src/cli/tests/commands/describe.test.ts
+++ b/src/cli/tests/commands/describe.test.ts
@@ -108,77 +108,6 @@ describe('describe command', () => {
    expect(text).not.toContain('Gated:');
  });

-  it('shows project Llm reference without warning when the name matches a registered Llm', async () => {
-    const deps = makeDeps({
-      id: 'proj-1',
-      name: 'with-llm',
-      description: '',
-      ownerId: 'user-1',
-      proxyModel: 'default',
-      llmProvider: 'claude',
-      llmModel: 'claude-3-opus',
-      createdAt: '2025-01-01',
-    });
-    // /api/v1/llms returns a claude entry → no warning
-    deps.client = {
-      get: vi.fn(async (path: string) => {
-        if (path === '/api/v1/llms') return [{ name: 'claude' }];
-        return [];
-      }),
-    } as unknown as typeof deps.client;
-    const cmd = createDescribeCommand(deps);
-    await cmd.parseAsync(['node', 'test', 'project', 'proj-1']);
-    const text = deps.output.join('\n');
-    expect(text).toContain('LLM:');
-    expect(text).toContain('claude');
-    expect(text).not.toContain('warning:');
-  });
-
-  it('warns on describe project when llmProvider does not resolve to any registered Llm', async () => {
-    const deps = makeDeps({
-      id: 'proj-1',
-      name: 'orphan',
-      description: '',
-      ownerId: 'user-1',
-      proxyModel: 'default',
-      llmProvider: 'claude-ghost',
-      createdAt: '2025-01-01',
-    });
-    deps.client = {
-      get: vi.fn(async (path: string) => {
-        if (path === '/api/v1/llms') return [{ name: 'claude' }, { name: 'gpt-4o' }];
-        return [];
-      }),
-    } as unknown as typeof deps.client;
-    const cmd = createDescribeCommand(deps);
-    await cmd.parseAsync(['node', 'test', 'project', 'proj-1']);
-    const text = deps.output.join('\n');
-    expect(text).toContain('claude-ghost');
-    expect(text).toContain('warning:');
-    expect(text).toContain('fall back to registry default');
-  });
-
-  it('does not warn when llmProvider is "none" (explicit disable)', async () => {
-    const deps = makeDeps({
-      id: 'proj-1',
-      name: 'no-llm',
-      description: '',
-      ownerId: 'user-1',
-      proxyModel: 'default',
-      llmProvider: 'none',
-      createdAt: '2025-01-01',
-    });
-    deps.client = {
-      get: vi.fn(async () => []),
-    } as unknown as typeof deps.client;
-    const cmd = createDescribeCommand(deps);
-    await cmd.parseAsync(['node', 'test', 'project', 'proj-1']);
-    const text = deps.output.join('\n');
-    expect(text).toContain('LLM:');
-    expect(text).toContain('none');
-    expect(text).not.toContain('warning:');
-  });
-
  it('shows project Plugin Config defaulting to "default" when proxyModel is empty', async () => {
    const deps = makeDeps({
      id: 'proj-1',
--- a/src/mcpd/src/main.ts
+++ b/src/mcpd/src/main.ts
@@ -28,9 +28,7 @@ import { registerSecretBackendRoutes } from './routes/secret-backends.js';
 import { registerSecretMigrateRoutes } from './routes/secret-migrate.js';
 import { LlmRepository } from './repositories/llm.repository.js';
 import { LlmService } from './services/llm.service.js';
-import { LlmAdapterRegistry } from './services/llm/dispatcher.js';
 import { registerLlmRoutes } from './routes/llms.js';
-import { registerLlmInferRoutes } from './routes/llm-infer.js';
 import { PromptRepository } from './repositories/prompt.repository.js';
 import { PromptRequestRepository } from './repositories/prompt-request.repository.js';
 import { bootstrapSystemProject } from './bootstrap/system-project.js';
@@ -107,12 +105,6 @@ function mapUrlToPermission(method: string, url: string): PermissionCheck {
  // /api/v1/secrets/migrate is a bulk cross-backend operation — treat as op, not a plain secret write.
  if (url.startsWith('/api/v1/secrets/migrate')) return { kind: 'operation', operation: 'migrate-secrets' };

-  // /api/v1/llms/:name/infer → `run:llms:<name>` (not the default create:llms).
-  const inferMatch = url.match(/^\/api\/v1\/llms\/([^/?]+)\/infer/);
-  if (inferMatch?.[1]) {
-    return { kind: 'resource', resource: 'llms', action: 'run', resourceName: inferMatch[1] };
-  }
-
  const resourceMap: Record<string, string | undefined> = {
    'servers': 'servers',
    'instances': 'instances',
@@ -342,7 +334,6 @@ async function main(): Promise<void> {
  const secretService = new SecretService(secretRepo, secretBackendService);
  const secretMigrateService = new SecretMigrateService(secretRepo, secretBackendService);
  const llmService = new LlmService(llmRepo, secretService);
-  const llmAdapters = new LlmAdapterRegistry();
  const instanceService = new InstanceService(instanceRepo, serverRepo, orchestrator, secretService);
  serverService.setInstanceService(instanceService);
  const projectService = new ProjectService(projectRepo, serverRepo);
@@ -484,23 +475,6 @@ async function main(): Promise<void> {
  registerSecretBackendRoutes(app, secretBackendService);
  registerSecretMigrateRoutes(app, secretMigrateService);
  registerLlmRoutes(app, llmService);
-  registerLlmInferRoutes(app, {
-    llmService,
-    adapters: llmAdapters,
-    onInferenceEvent: (event) => {
-      app.log.info({
-        event: 'llm_inference_call',
-        llm: event.llmName,
-        model: event.model,
-        type: event.type,
-        userId: event.userId,
-        tokenSha: event.tokenSha,
-        streaming: event.streaming,
-        durationMs: event.durationMs,
-        status: event.status,
-      });
-    },
-  });
  registerInstanceRoutes(app, instanceService);
  registerProjectRoutes(app, projectService);
  registerAuditLogRoutes(app, auditLogService);
--- a/src/mcpd/src/routes/llm-infer.ts
+++ b/src/mcpd/src/routes/llm-infer.ts
@@ -1,145 +0,0 @@
-/**
- * POST /api/v1/llms/:name/infer
- *
- * OpenAI-compatible chat completions endpoint. The RBAC check runs in the
- * global hook — this URL maps to `run:llms:<name>`, not the default
- * `create:llms`. See `main.ts:mapUrlToPermission`.
- *
- * Non-streaming: resolves the Llm, dispatches to the right provider adapter,
- * returns the OpenAI chat.completion JSON.
- *
- * Streaming (`stream: true`): pipes adapter-emitted chunks back as
- * `text/event-stream`. Adapters translate provider-native SSE into OpenAI
- * `chat.completion.chunk`s so clients can use any OpenAI SDK unchanged.
- */
-import type { FastifyInstance, FastifyReply } from 'fastify';
-import type { LlmService } from '../services/llm.service.js';
-import type { LlmAdapterRegistry } from '../services/llm/dispatcher.js';
-import { NotFoundError } from '../services/mcp-server.service.js';
-import type { OpenAiChatRequest, InferContext } from '../services/llm/types.js';
-
-export interface LlmInferDeps {
-  llmService: LlmService;
-  adapters: LlmAdapterRegistry;
-  /** Optional hook to emit audit events — consumer may ignore. */
-  onInferenceEvent?: (event: InferenceAuditEvent) => void;
-}
-
-export interface InferenceAuditEvent {
-  kind: 'llm_inference_call';
-  llmName: string;
-  model: string;
-  type: string;
-  userId?: string | undefined;
-  tokenSha?: string | undefined;
-  streaming: boolean;
-  durationMs: number;
-  status: number;
-}
-
-export function registerLlmInferRoutes(
-  app: FastifyInstance,
-  deps: LlmInferDeps,
-): void {
-  app.post<{ Params: { name: string }; Body: OpenAiChatRequest }>(
-    '/api/v1/llms/:name/infer',
-    async (request, reply) => {
-      const started = Date.now();
-      let llm;
-      try {
-        llm = await deps.llmService.getByName(request.params.name);
-      } catch (err) {
-        if (err instanceof NotFoundError) {
-          reply.code(404);
-          return { error: err.message };
-        }
-        throw err;
-      }
-
-      const body = (request.body ?? {}) as OpenAiChatRequest;
-      if (!body.messages || body.messages.length === 0) {
-        reply.code(400);
-        return { error: 'messages is required' };
-      }
-
-      // Resolve API key (may be empty string for providers that don't take one).
-      let apiKey = '';
-      if (llm.apiKeyRef !== null) {
-        try {
-          apiKey = await deps.llmService.resolveApiKey(llm.name);
-        } catch (err) {
-          reply.code(500);
-          return { error: `Failed to resolve API key: ${err instanceof Error ? err.message : String(err)}` };
-        }
-      }
-
-      const ctx: InferContext = {
-        body,
-        modelOverride: llm.model,
-        apiKey,
-        url: llm.url,
-        extraConfig: llm.extraConfig,
-      };
-
-      const adapter = deps.adapters.get(llm.type);
-      const streaming = body.stream === true;
-
-      const audit = (status: number): void => {
-        if (deps.onInferenceEvent === undefined) return;
-        deps.onInferenceEvent({
-          kind: 'llm_inference_call',
-          llmName: llm.name,
-          model: llm.model,
-          type: llm.type,
-          userId: request.userId,
-          tokenSha: request.mcpToken?.tokenSha,
-          streaming,
-          durationMs: Date.now() - started,
-          status,
-        });
-      };
-
-      if (!streaming) {
-        try {
-          const result = await adapter.infer(ctx);
-          reply.code(result.status);
-          audit(result.status);
-          return result.body;
-        } catch (err) {
-          audit(502);
-          reply.code(502);
-          return { error: err instanceof Error ? err.message : String(err) };
-        }
-      }
-
-      // Streaming path — set SSE headers and pipe chunks.
-      reply.raw.writeHead(200, {
-        'Content-Type': 'text/event-stream',
-        'Cache-Control': 'no-cache',
-        Connection: 'keep-alive',
-        'X-Accel-Buffering': 'no',
-      });
-      try {
-        for await (const chunk of adapter.stream(ctx)) {
-          writeSseChunk(reply, chunk.data);
-          if (chunk.done === true) break;
-        }
-        audit(200);
-      } catch (err) {
-        const payload = JSON.stringify({
-          error: { message: err instanceof Error ? err.message : String(err) },
-        });
-        writeSseChunk(reply, payload);
-        writeSseChunk(reply, '[DONE]');
-        audit(502);
-      } finally {
-        reply.raw.end();
-      }
-      return reply;
-    },
-  );
-}
-
-function writeSseChunk(reply: FastifyReply, data: string): void {
-  reply.raw.write(`data: ${data}\n\n`);
-}
--- a/src/mcpd/src/routes/llms.ts
+++ b/src/mcpd/src/routes/llms.ts
@@ -10,12 +10,9 @@ export function registerLlmRoutes(
    return service.list();
  });

-  // Accepts either CUID or human name. Used both by the CLI (which usually
-  // resolves to CUID first) and by FailoverRouter's RBAC pre-check (which
-  // hands over the user-facing name to avoid an extra round-trip).
  app.get<{ Params: { id: string } }>('/api/v1/llms/:id', async (request, reply) => {
    try {
-      return await getByIdOrName(service, request.params.id);
+      return await service.getById(request.params.id);
    } catch (err) {
      if (err instanceof NotFoundError) {
        reply.code(404);
@@ -25,10 +22,6 @@ export function registerLlmRoutes(
    }
  });

-  // No explicit HEAD handler: Fastify auto-derives HEAD from GET, which runs
-  // the same RBAC hook + lookup and drops the body. That's exactly what
-  // FailoverRouter wants for its "can the caller still view this Llm?" probe.
-
  app.post('/api/v1/llms', async (request, reply) => {
    try {
      const row = await service.create(request.body);
@@ -69,17 +62,3 @@ export function registerLlmRoutes(
    }
  });
 }
-
-const CUID_RE = /^c[a-z0-9]{24}/i;
-
-/**
- * Look up by CUID first; if the input doesn't look like one, fall back to
- * findByName. Lets the same URL serve both `mcpctl describe llm <name>` and
- * the FailoverRouter's name-based RBAC check.
- */
-async function getByIdOrName(service: LlmService, idOrName: string) {
-  if (CUID_RE.test(idOrName)) {
-    return service.getById(idOrName);
-  }
-  return service.getByName(idOrName);
-}
--- a/src/mcpd/src/services/llm/adapters/anthropic.ts
+++ b/src/mcpd/src/services/llm/adapters/anthropic.ts
@@ -1,256 +0,0 @@
-/**
- * Anthropic adapter — translates between OpenAI chat/completions format and
- * the Anthropic Messages API (`POST /v1/messages`).
- *
- * Key differences we translate:
- *   - OpenAI `role: 'system'` messages become a top-level `system` string.
- *   - Anthropic returns `content: [{ type: 'text', text }]` — we join into
- *     OpenAI's `content: "…"` string.
- *   - Streaming: Anthropic emits a sequence of
- *     `message_start / content_block_{start,delta,stop} / message_delta /
- *     message_stop` events. We translate those to OpenAI
- *     `chat.completion.chunk` deltas.
- *
- * This adapter implements the subset needed for plain-text chat — tool-use
- * translation is intentionally left out for this phase; agents that need tool
- * calling should target an OpenAI-compatible provider until the translator
- * covers it.
- */
-import type {
-  LlmAdapter,
-  InferContext,
-  NonStreamingResult,
-  StreamingChunk,
-  AdapterDeps,
-  OpenAiMessage,
-} from '../types.js';
-
-const DEFAULT_ANTHROPIC_URL = 'https://api.anthropic.com';
-const ANTHROPIC_VERSION = '2023-06-01';
-
-interface AnthropicMessageResponse {
-  id: string;
-  model: string;
-  role: 'assistant';
-  content: Array<{ type: 'text'; text: string } | { type: string; [k: string]: unknown }>;
-  stop_reason?: string;
-  usage?: { input_tokens: number; output_tokens: number };
-}
-
-export class AnthropicAdapter implements LlmAdapter {
-  readonly kind = 'anthropic';
-  private readonly fetchImpl: typeof globalThis.fetch;
-
-  constructor(deps: AdapterDeps = {}) {
-    this.fetchImpl = deps.fetch ?? globalThis.fetch;
-  }
-
-  async infer(ctx: InferContext): Promise<NonStreamingResult> {
-    const url = (ctx.url !== '' ? ctx.url : DEFAULT_ANTHROPIC_URL).replace(/\/+$/, '');
-    const body = this.toAnthropicRequest(ctx, false);
-    const res = await this.fetchImpl(`${url}/v1/messages`, {
-      method: 'POST',
-      headers: this.headers(ctx),
-      body: JSON.stringify(body),
-    });
-    if (!res.ok) {
-      const text = await res.text().catch(() => '');
-      return {
-        status: res.status,
-        body: { error: { message: `anthropic: HTTP ${String(res.status)} ${text}` } },
-      };
-    }
-    const anth = await res.json() as AnthropicMessageResponse;
-    return { status: 200, body: this.toOpenAiResponse(anth) };
-  }
-
-  async *stream(ctx: InferContext): AsyncGenerator<StreamingChunk> {
-    const url = (ctx.url !== '' ? ctx.url : DEFAULT_ANTHROPIC_URL).replace(/\/+$/, '');
-    const body = this.toAnthropicRequest(ctx, true);
-    const res = await this.fetchImpl(`${url}/v1/messages`, {
-      method: 'POST',
-      headers: this.headers(ctx),
-      body: JSON.stringify(body),
-    });
-    if (!res.ok || res.body === null) {
-      const text = await res.text().catch(() => '');
-      throw new Error(`anthropic stream: HTTP ${String(res.status)} ${text}`);
-    }
-
-    const id = `chatcmpl-${cryptoNonce()}`;
-    const model = body.model;
-    const created = Math.floor(Date.now() / 1000);
-
-    // Parse Anthropic SSE. Each event is `event: <name>\ndata: <json>\n\n`.
-    const decoder = new TextDecoder();
-    let buf = '';
-    const reader = res.body.getReader();
-    let emittedFirst = false;
-
-    const baseChunk = (delta: Record<string, unknown>, finishReason?: string): string => {
-      const chunk = {
-        id,
-        object: 'chat.completion.chunk',
-        created,
-        model,
-        choices: [{
-          index: 0,
-          delta,
-          finish_reason: finishReason ?? null,
-        }],
-      };
-      return JSON.stringify(chunk);
-    };
-
-    try {
-      // eslint-disable-next-line no-constant-condition
-      while (true) {
-        const { value, done } = await reader.read();
-        if (done) break;
-        buf += decoder.decode(value, { stream: true });
-
-        let idx: number;
-        while ((idx = buf.indexOf('\n\n')) !== -1) {
-          const rawEvent = buf.slice(0, idx);
-          buf = buf.slice(idx + 2);
-          const parsed = parseSseEvent(rawEvent);
-          if (parsed === null) continue;
-          const { event, data } = parsed;
-
-          if (event === 'content_block_delta') {
-            const textDelta = (data as { delta?: { type?: string; text?: string } }).delta;
-            if (textDelta?.type === 'text_delta' && typeof textDelta.text === 'string') {
-              if (!emittedFirst) {
-                yield { data: baseChunk({ role: 'assistant', content: '' }) };
-                emittedFirst = true;
-              }
-              yield { data: baseChunk({ content: textDelta.text }) };
-            }
-          } else if (event === 'message_delta') {
-            const stopReason = (data as { delta?: { stop_reason?: string } }).delta?.stop_reason;
-            if (typeof stopReason === 'string') {
-              yield { data: baseChunk({}, mapStopReason(stopReason)) };
-            }
-          } else if (event === 'message_stop') {
-            yield { data: '[DONE]', done: true };
-            return;
-          } else if (event === 'error') {
-            throw new Error(`anthropic stream error: ${JSON.stringify(data)}`);
-          }
-        }
-      }
-    } finally {
-      reader.releaseLock();
-    }
-    // Anthropic closed without message_stop — give consumer a clean end.
-    yield { data: '[DONE]', done: true };
-  }
-
-  private headers(ctx: InferContext): Record<string, string> {
-    return {
-      'Content-Type': 'application/json',
-      'x-api-key': ctx.apiKey,
-      'anthropic-version': ANTHROPIC_VERSION,
-    };
-  }
-
-  /** Translate the OpenAI request to the Anthropic Messages shape. */
-  private toAnthropicRequest(ctx: InferContext, stream: boolean): {
-    model: string;
-    max_tokens: number;
-    messages: Array<{ role: 'user' | 'assistant'; content: string }>;
-    system?: string;
-    stream?: boolean;
-    temperature?: number;
-    top_p?: number;
-    stop_sequences?: string[];
-  } {
-    const { body } = ctx;
-    const systemParts: string[] = [];
-    const messages: Array<{ role: 'user' | 'assistant'; content: string }> = [];
-
-    for (const msg of body.messages) {
-      const text = normaliseContent(msg);
-      if (msg.role === 'system') {
-        systemParts.push(text);
-      } else if (msg.role === 'user' || msg.role === 'assistant') {
-        messages.push({ role: msg.role, content: text });
-      }
-      // `tool` role messages are dropped — tool translation is out of scope
-      // for this phase.
-    }
-
-    const out: ReturnType<typeof this.toAnthropicRequest> = {
-      model: body.model !== '' ? body.model : ctx.modelOverride,
-      max_tokens: typeof body.max_tokens === 'number' ? body.max_tokens : 1024,
-      messages,
-    };
-    if (systemParts.length > 0) out.system = systemParts.join('\n\n');
-    if (stream) out.stream = true;
-    if (typeof body.temperature === 'number') out.temperature = body.temperature;
-    if (typeof body.top_p === 'number') out.top_p = body.top_p;
-    if (body.stop !== undefined) {
-      out.stop_sequences = Array.isArray(body.stop) ? body.stop : [body.stop];
-    }
-    return out;
-  }
-
-  private toOpenAiResponse(anth: AnthropicMessageResponse): Record<string, unknown> {
-    const text = anth.content
-      .map((c) => (c.type === 'text' && typeof (c as { text?: unknown }).text === 'string'
-        ? (c as { text: string }).text
-        : ''))
-      .join('');
-    return {
-      id: `chatcmpl-${anth.id}`,
-      object: 'chat.completion',
-      created: Math.floor(Date.now() / 1000),
-      model: anth.model,
-      choices: [{
-        index: 0,
-        message: { role: 'assistant', content: text },
-        finish_reason: mapStopReason(anth.stop_reason ?? 'end_turn'),
-      }],
-      usage: anth.usage ? {
-        prompt_tokens: anth.usage.input_tokens,
-        completion_tokens: anth.usage.output_tokens,
-        total_tokens: anth.usage.input_tokens + anth.usage.output_tokens,
-      } : undefined,
-    };
-  }
-}
-
-function normaliseContent(msg: OpenAiMessage): string {
-  if (typeof msg.content === 'string') return msg.content;
-  return msg.content
-    .map((part) => (typeof part.text === 'string' ? part.text : ''))
-    .join('');
-}
-
-function mapStopReason(r: string): string {
-  // Anthropic → OpenAI finish_reason
-  if (r === 'end_turn' || r === 'stop_sequence') return 'stop';
-  if (r === 'max_tokens') return 'length';
-  if (r === 'tool_use') return 'tool_calls';
-  return r;
-}
-
-function parseSseEvent(raw: string): { event: string; data: unknown } | null {
-  let event = '';
-  let dataLine = '';
-  for (const line of raw.split('\n')) {
-    if (line.startsWith('event:')) event = line.slice(6).trim();
-    else if (line.startsWith('data:')) dataLine += line.slice(5).trim();
-  }
-  if (dataLine === '') return null;
-  try {
-    return { event, data: JSON.parse(dataLine) as unknown };
-  } catch {
-    return null;
-  }
-}
-
-function cryptoNonce(): string {
-  // Not security-sensitive — just a short randomish id.
-  return Math.random().toString(36).slice(2, 10);
-}
--- a/src/mcpd/src/services/llm/adapters/openai-passthrough.ts
+++ b/src/mcpd/src/services/llm/adapters/openai-passthrough.ts
@@ -1,112 +0,0 @@
-/**
- * OpenAI-passthrough adapter.
- *
- * Covers any provider that already speaks OpenAI chat/completions on the
- * wire: `openai`, `vllm`, `deepseek`, `ollama` (with their openai-compatible
- * endpoint enabled). The adapter forwards the request body verbatim and
- * streams the response straight through — no wire translation.
- *
- * Defaults when `url` is empty:
- *   - openai → https://api.openai.com
- *   - deepseek → https://api.deepseek.com
- *   - vllm/ollama → must be configured; these have no canonical public URL.
- */
-import type { LlmAdapter, InferContext, NonStreamingResult, StreamingChunk, AdapterDeps } from '../types.js';
-
-const DEFAULT_URLS: Record<string, string> = {
-  openai: 'https://api.openai.com',
-  deepseek: 'https://api.deepseek.com',
-};
-
-export class OpenAiPassthroughAdapter implements LlmAdapter {
-  readonly kind: string;
-  private readonly fetchImpl: typeof globalThis.fetch;
-
-  constructor(kind: 'openai' | 'vllm' | 'deepseek' | 'ollama', deps: AdapterDeps = {}) {
-    this.kind = kind;
-    this.fetchImpl = deps.fetch ?? globalThis.fetch;
-  }
-
-  async infer(ctx: InferContext): Promise<NonStreamingResult> {
-    const url = this.endpointUrl(ctx.url);
-    const body = this.prepareBody(ctx, false);
-    const res = await this.fetchImpl(`${url}/v1/chat/completions`, {
-      method: 'POST',
-      headers: this.headers(ctx),
-      body: JSON.stringify(body),
-    });
-    const json = await res.json() as unknown;
-    return { status: res.status, body: json };
-  }
-
-  async *stream(ctx: InferContext): AsyncGenerator<StreamingChunk> {
-    const url = this.endpointUrl(ctx.url);
-    const body = this.prepareBody(ctx, true);
-    const res = await this.fetchImpl(`${url}/v1/chat/completions`, {
-      method: 'POST',
-      headers: this.headers(ctx),
-      body: JSON.stringify(body),
-    });
-    if (!res.ok || res.body === null) {
-      const text = await res.text().catch(() => '');
-      throw new Error(`${this.kind} stream: HTTP ${String(res.status)} ${text}`);
-    }
-
-    // Re-frame the provider's SSE stream into our `StreamingChunk` shape.
-    // OpenAI-compat providers already emit `data: {...}` + `data: [DONE]` —
-    // we just unwrap the `data: ` prefix, forward payloads, and emit a
-    // single terminal `done` chunk so the consumer always gets one.
-    const decoder = new TextDecoder();
-    let buf = '';
-    const reader = res.body.getReader();
-    try {
-      // eslint-disable-next-line no-constant-condition
-      while (true) {
-        const { value, done } = await reader.read();
-        if (done) break;
-        buf += decoder.decode(value, { stream: true });
-        let idx: number;
-        while ((idx = buf.indexOf('\n\n')) !== -1) {
-          const event = buf.slice(0, idx);
-          buf = buf.slice(idx + 2);
-          for (const line of event.split('\n')) {
-            if (!line.startsWith('data:')) continue;
-            const payload = line.slice(5).trim();
-            if (payload === '') continue;
-            if (payload === '[DONE]') {
-              yield { data: '[DONE]', done: true };
-              return;
-            }
-            yield { data: payload };
-          }
-        }
-      }
-    } finally {
-      reader.releaseLock();
-    }
-    // Provider closed without emitting [DONE] — give the consumer a clean end.
-    yield { data: '[DONE]', done: true };
-  }
-
-  private endpointUrl(url: string): string {
-    if (url !== '') return url.replace(/\/+$/, '');
-    const def = DEFAULT_URLS[this.kind];
-    if (def === undefined) {
-      throw new Error(`${this.kind}: url is required (no default endpoint for this provider)`);
-    }
-    return def;
-  }
-
-  private headers(ctx: InferContext): Record<string, string> {
-    const headers: Record<string, string> = { 'Content-Type': 'application/json' };
-    if (ctx.apiKey !== '') headers['Authorization'] = `Bearer ${ctx.apiKey}`;
-    return headers;
-  }
-
-  private prepareBody(ctx: InferContext, stream: boolean): Record<string, unknown> {
-    const out: Record<string, unknown> = { ...ctx.body };
-    if (out.model === undefined || out.model === '') out.model = ctx.modelOverride;
-    out.stream = stream;
-    return out;
-  }
-}
--- a/src/mcpd/src/services/llm/dispatcher.ts
+++ b/src/mcpd/src/services/llm/dispatcher.ts
@@ -1,52 +0,0 @@
-/**
- * Adapter dispatcher for the inference proxy.
- *
- * `getAdapter(type)` returns the right adapter instance for an Llm's `type`
- * column. Adapters are cached per-type — they carry no per-request state.
- * The caller (the infer route) supplies the resolved API key + request body
- * through `InferContext`, so a single adapter instance serves every Llm of
- * that type.
- */
-import type { LlmAdapter, AdapterDeps } from './types.js';
-import { OpenAiPassthroughAdapter } from './adapters/openai-passthrough.js';
-import { AnthropicAdapter } from './adapters/anthropic.js';
-
-export class UnsupportedProviderError extends Error {
-  constructor(type: string) {
-    super(`Unsupported LLM provider: ${type}`);
-    this.name = 'UnsupportedProviderError';
-  }
-}
-
-export class LlmAdapterRegistry {
-  private readonly cache = new Map<string, LlmAdapter>();
-
-  constructor(private readonly deps: AdapterDeps = {}) {}
-
-  get(type: string): LlmAdapter {
-    const cached = this.cache.get(type);
-    if (cached !== undefined) return cached;
-    const adapter = this.build(type);
-    this.cache.set(type, adapter);
-    return adapter;
-  }
-
-  private build(type: string): LlmAdapter {
-    switch (type) {
-      case 'openai':
-      case 'vllm':
-      case 'deepseek':
-      case 'ollama':
-        return new OpenAiPassthroughAdapter(type, this.deps);
-      case 'anthropic':
-        return new AnthropicAdapter(this.deps);
-      case 'gemini-cli':
-        // Intentionally deferred — gemini-cli requires the binary on the mcpd
-        // pod filesystem and subprocess lifecycle management. Flagged as
-        // homelab-only in the plan; not landing in this phase.
-        throw new UnsupportedProviderError(`${type} (subprocess providers are not supported in the proxy yet)`);
-      default:
-        throw new UnsupportedProviderError(type);
-    }
-  }
-}
--- a/src/mcpd/src/services/llm/types.ts
+++ b/src/mcpd/src/services/llm/types.ts
@@ -1,70 +0,0 @@
-/**
- * Shared types for the LLM inference proxy.
- *
- * The wire format on the mcpctl side is OpenAI's chat/completions v1 — it's
- * the de-facto lingua franca and every client library already speaks it.
- * Provider-specific adapters translate to/from that shape.
- */
-
-export interface OpenAiMessage {
-  role: 'system' | 'user' | 'assistant' | 'tool';
-  content: string | Array<{ type: string; text?: string; [k: string]: unknown }>;
-  name?: string;
-  tool_call_id?: string;
-  tool_calls?: Array<{ id: string; type: 'function'; function: { name: string; arguments: string } }>;
-}
-
-export interface OpenAiChatRequest {
-  model: string;
-  messages: OpenAiMessage[];
-  stream?: boolean;
-  temperature?: number;
-  max_tokens?: number;
-  top_p?: number;
-  stop?: string | string[];
-  tools?: Array<{ type: 'function'; function: { name: string; description?: string; parameters?: Record<string, unknown> } }>;
-  tool_choice?: unknown;
-  // Passthrough: unknown extras forwarded as-is.
-  [k: string]: unknown;
-}
-
-export interface InferContext {
-  /** Normalised OpenAI-format body. Adapters read/transform from here. */
-  body: OpenAiChatRequest;
-  /** The Llm row's `model` field, used when the request body has an empty model. */
-  modelOverride: string;
-  /** The resolved API key, or empty string for providers that don't take one. */
-  apiKey: string;
-  /** Target URL from the Llm row (may be empty for provider-default). */
-  url: string;
-  /** Arbitrary config from the Llm row (e.g. vllm gpu settings). */
-  extraConfig: Record<string, unknown>;
-}
-
-export interface NonStreamingResult {
-  status: number;
-  /** OpenAI chat.completion response body. */
-  body: unknown;
-}
-
-export interface StreamingChunk {
-  /** Raw SSE data payload. Consumer emits `data: <payload>\n\n`. */
-  data: string;
-  /** Mark the end of stream — consumer emits `data: [DONE]\n\n`. */
-  done?: boolean;
-}
-
-export interface LlmAdapter {
-  readonly kind: string;
-  /** Non-streaming request. Returns the final chat.completion body. */
-  infer(ctx: InferContext): Promise<NonStreamingResult>;
-  /**
-   * Streaming request. Yields OpenAI-format SSE chunks. Adapters translate
-   * provider-native stream formats into OpenAI `chat.completion.chunk`s.
-   */
-  stream(ctx: InferContext): AsyncGenerator<StreamingChunk>;
-}
-
-export interface AdapterDeps {
-  fetch?: typeof globalThis.fetch;
-}
--- a/src/mcpd/tests/llm-adapters.test.ts
+++ b/src/mcpd/tests/llm-adapters.test.ts
@@ -1,210 +0,0 @@
-import { describe, it, expect, vi } from 'vitest';
-import { OpenAiPassthroughAdapter } from '../src/services/llm/adapters/openai-passthrough.js';
-import { AnthropicAdapter } from '../src/services/llm/adapters/anthropic.js';
-import { LlmAdapterRegistry, UnsupportedProviderError } from '../src/services/llm/dispatcher.js';
-import type { InferContext } from '../src/services/llm/types.js';
-
-function mockFetch(responses: Array<{ match: RegExp; status: number; body?: unknown; text?: string }>): ReturnType<typeof vi.fn> {
-  return vi.fn(async (input: string | URL, _init?: RequestInit) => {
-    const url = String(input);
-    const match = responses.find((r) => r.match.test(url));
-    if (!match) throw new Error(`unexpected fetch: ${url}`);
-    const body = match.body !== undefined ? JSON.stringify(match.body) : (match.text ?? '');
-    return new Response(body, { status: match.status, headers: { 'Content-Type': 'application/json' } });
-  });
-}
-
-function makeCtx(overrides: Partial<InferContext> = {}): InferContext {
-  return {
-    body: { model: '', messages: [{ role: 'user', content: 'hello' }] },
-    modelOverride: 'default-model',
-    apiKey: 'test-key',
-    url: '',
-    extraConfig: {},
-    ...overrides,
-  };
-}
-
-// Helper to build a streaming Response from SSE lines.
-function sseResponse(events: string[]): Response {
-  const body = events.join('\n\n') + '\n\n';
-  const stream = new ReadableStream<Uint8Array>({
-    start(controller) {
-      controller.enqueue(new TextEncoder().encode(body));
-      controller.close();
-    },
-  });
-  return new Response(stream, { status: 200, headers: { 'Content-Type': 'text/event-stream' } });
-}
-
-describe('OpenAiPassthroughAdapter', () => {
-  it('infer: POSTs to <url>/v1/chat/completions with Authorization + body', async () => {
-    const fetchFn = mockFetch([{
-      match: /\/v1\/chat\/completions$/,
-      status: 200,
-      body: { id: 'x', choices: [{ message: { role: 'assistant', content: 'hi' } }] },
-    }]);
-    const adapter = new OpenAiPassthroughAdapter('openai', { fetch: fetchFn as unknown as typeof fetch });
-    const ctx = makeCtx({ url: 'https://api.example.com' });
-    const res = await adapter.infer(ctx);
-    expect(res.status).toBe(200);
-    const [url, init] = fetchFn.mock.calls[0] as [string, RequestInit];
-    expect(url).toBe('https://api.example.com/v1/chat/completions');
-    expect(init.method).toBe('POST');
-    const headers = init.headers as Record<string, string>;
-    expect(headers['Authorization']).toBe('Bearer test-key');
-    const sent = JSON.parse(init.body as string) as { model: string; stream: boolean };
-    expect(sent.model).toBe('default-model');  // filled from modelOverride
-    expect(sent.stream).toBe(false);
-  });
-
-  it('infer: uses default URL for openai when url is empty', async () => {
-    const fetchFn = mockFetch([{ match: /api\.openai\.com/, status: 200, body: {} }]);
-    const adapter = new OpenAiPassthroughAdapter('openai', { fetch: fetchFn as unknown as typeof fetch });
-    await adapter.infer(makeCtx());
-    const [url] = fetchFn.mock.calls[0] as [string, RequestInit];
-    expect(url).toBe('https://api.openai.com/v1/chat/completions');
-  });
-
-  it('infer: throws for vllm when url is empty (no default)', async () => {
-    const adapter = new OpenAiPassthroughAdapter('vllm', { fetch: vi.fn() as unknown as typeof fetch });
-    await expect(adapter.infer(makeCtx())).rejects.toThrow(/no default endpoint/);
-  });
-
-  it('infer: omits Authorization when apiKey is empty', async () => {
-    const fetchFn = mockFetch([{ match: /ollama/, status: 200, body: {} }]);
-    const adapter = new OpenAiPassthroughAdapter('ollama', { fetch: fetchFn as unknown as typeof fetch });
-    await adapter.infer(makeCtx({ url: 'http://ollama:11434', apiKey: '' }));
-    const [, init] = fetchFn.mock.calls[0] as [string, RequestInit];
-    const headers = init.headers as Record<string, string>;
-    expect(headers['Authorization']).toBeUndefined();
-  });
-
-  it('stream: forwards SSE chunks and emits terminal [DONE]', async () => {
-    const fetchFn = vi.fn(async () => sseResponse([
-      'data: {"choices":[{"delta":{"content":"hi"}}]}',
-      'data: {"choices":[{"delta":{"content":"!"}}]}',
-      'data: [DONE]',
-    ]));
-    const adapter = new OpenAiPassthroughAdapter('openai', { fetch: fetchFn as unknown as typeof fetch });
-    const ctx = makeCtx({ url: 'http://example', body: { model: '', messages: [], stream: true } });
-    const chunks: { data: string; done?: boolean }[] = [];
-    for await (const c of adapter.stream(ctx)) chunks.push(c);
-    expect(chunks).toHaveLength(3);
-    expect(chunks[2]?.done).toBe(true);
-  });
-});
-
-describe('AnthropicAdapter', () => {
-  it('infer: translates system+user messages, posts to /v1/messages', async () => {
-    const fetchFn = mockFetch([{
-      match: /\/v1\/messages$/,
-      status: 200,
-      body: {
-        id: 'msg_01', model: 'claude-3-5-sonnet-20241022', role: 'assistant',
-        content: [{ type: 'text', text: 'howdy' }],
-        stop_reason: 'end_turn',
-        usage: { input_tokens: 5, output_tokens: 2 },
-      },
-    }]);
-    const adapter = new AnthropicAdapter({ fetch: fetchFn as unknown as typeof fetch });
-    const ctx = makeCtx({
-      body: {
-        model: '', messages: [
-          { role: 'system', content: 'be nice' },
-          { role: 'user', content: 'hi' },
-        ],
-      },
-      modelOverride: 'claude-3-5-sonnet-20241022',
-    });
-    const res = await adapter.infer(ctx);
-    expect(res.status).toBe(200);
-
-    const [url, init] = fetchFn.mock.calls[0] as [string, RequestInit];
-    expect(url).toBe('https://api.anthropic.com/v1/messages');
-    const headers = init.headers as Record<string, string>;
-    expect(headers['x-api-key']).toBe('test-key');
-    expect(headers['anthropic-version']).toBeDefined();
-
-    const sent = JSON.parse(init.body as string) as {
-      model: string; system: string; messages: Array<{ role: string; content: string }>; max_tokens: number;
-    };
-    expect(sent.model).toBe('claude-3-5-sonnet-20241022');
-    expect(sent.system).toBe('be nice');
-    expect(sent.messages).toEqual([{ role: 'user', content: 'hi' }]);
-    expect(sent.max_tokens).toBe(1024); // default
-
-    // Response shape: OpenAI chat.completion
-    const body = res.body as { choices: Array<{ message: { content: string }; finish_reason: string }>; usage: { total_tokens: number } };
-    expect(body.choices[0]!.message.content).toBe('howdy');
-    expect(body.choices[0]!.finish_reason).toBe('stop');
-    expect(body.usage.total_tokens).toBe(7);
-  });
-
-  it('infer: returns a synthetic error body on non-2xx', async () => {
-    const fetchFn = vi.fn(async () => new Response('boom', { status: 500 }));
-    const adapter = new AnthropicAdapter({ fetch: fetchFn as unknown as typeof fetch });
-    const res = await adapter.infer(makeCtx({ body: { model: '', messages: [{ role: 'user', content: 'x' }] } }));
-    expect(res.status).toBe(500);
-    const body = res.body as { error: { message: string } };
-    expect(body.error.message).toMatch(/HTTP 500/);
-  });
-
-  it('stream: translates anthropic event stream into OpenAI chunks', async () => {
-    const events = [
-      'event: message_start\ndata: {"type":"message_start","message":{"id":"m","content":[]}}',
-      'event: content_block_delta\ndata: {"type":"content_block_delta","delta":{"type":"text_delta","text":"he"}}',
-      'event: content_block_delta\ndata: {"type":"content_block_delta","delta":{"type":"text_delta","text":"llo"}}',
-      'event: message_delta\ndata: {"type":"message_delta","delta":{"stop_reason":"end_turn"}}',
-      'event: message_stop\ndata: {"type":"message_stop"}',
-    ];
-    const fetchFn = vi.fn(async () => sseResponse(events));
-    const adapter = new AnthropicAdapter({ fetch: fetchFn as unknown as typeof fetch });
-    const ctx = makeCtx({ body: { model: '', messages: [{ role: 'user', content: 'hi' }], stream: true } });
-
-    const chunks: { data: string; done?: boolean }[] = [];
-    for await (const c of adapter.stream(ctx)) chunks.push(c);
-
-    // Expect: role-prime, two text deltas, finish-reason, [DONE]
-    expect(chunks[chunks.length - 1]?.data).toBe('[DONE]');
-    expect(chunks[chunks.length - 1]?.done).toBe(true);
-
-    // First chunk is the role-prime (role: assistant, content: '').
-    const first = JSON.parse(chunks[0]!.data) as { choices: [{ delta: { role: string; content: string } }] };
-    expect(first.choices[0]!.delta.role).toBe('assistant');
-
-    // Next two chunks carry the text.
-    const d1 = JSON.parse(chunks[1]!.data) as { choices: [{ delta: { content: string } }] };
-    const d2 = JSON.parse(chunks[2]!.data) as { choices: [{ delta: { content: string } }] };
-    expect(d1.choices[0]!.delta.content).toBe('he');
-    expect(d2.choices[0]!.delta.content).toBe('llo');
-
-    // Finish-reason chunk.
-    const stopped = JSON.parse(chunks[3]!.data) as { choices: [{ finish_reason: string }] };
-    expect(stopped.choices[0]!.finish_reason).toBe('stop');
-  });
-});
-
-describe('LlmAdapterRegistry', () => {
-  it('returns the right adapter kind for each type', () => {
-    const reg = new LlmAdapterRegistry();
-    expect(reg.get('openai').kind).toBe('openai');
-    expect(reg.get('vllm').kind).toBe('vllm');
-    expect(reg.get('deepseek').kind).toBe('deepseek');
-    expect(reg.get('ollama').kind).toBe('ollama');
-    expect(reg.get('anthropic').kind).toBe('anthropic');
-  });
-
-  it('caches adapters between calls', () => {
-    const reg = new LlmAdapterRegistry();
-    const a = reg.get('openai');
-    const b = reg.get('openai');
-    expect(a).toBe(b);
-  });
-
-  it('rejects unsupported providers (gemini-cli is deferred)', () => {
-    const reg = new LlmAdapterRegistry();
-    expect(() => reg.get('gemini-cli')).toThrow(UnsupportedProviderError);
-    expect(() => reg.get('bogus')).toThrow(UnsupportedProviderError);
-  });
-});
--- a/src/mcpd/tests/llm-infer-route.test.ts
+++ b/src/mcpd/tests/llm-infer-route.test.ts
@@ -1,208 +0,0 @@
-import { describe, it, expect, vi, afterEach } from 'vitest';
-import Fastify from 'fastify';
-import type { FastifyInstance } from 'fastify';
-import { registerLlmInferRoutes } from '../src/routes/llm-infer.js';
-import { LlmAdapterRegistry } from '../src/services/llm/dispatcher.js';
-import { errorHandler } from '../src/middleware/error-handler.js';
-import type { LlmView } from '../src/services/llm.service.js';
-import { NotFoundError } from '../src/services/mcp-server.service.js';
-
-let app: FastifyInstance;
-
-function makeLlmView(overrides: Partial<LlmView> = {}): LlmView {
-  return {
-    id: 'llm-1',
-    name: 'claude',
-    type: 'anthropic',
-    model: 'claude-3-5-sonnet-20241022',
-    url: '',
-    tier: 'heavy',
-    description: '',
-    apiKeyRef: { name: 'anthropic-key', key: 'token' },
-    extraConfig: {},
-    version: 1,
-    createdAt: new Date(),
-    updatedAt: new Date(),
-    ...overrides,
-  };
-}
-
-afterEach(async () => {
-  if (app) await app.close();
-});
-
-function sseResponse(events: string[]): Response {
-  const body = events.join('\n\n') + '\n\n';
-  const stream = new ReadableStream<Uint8Array>({
-    start(controller) {
-      controller.enqueue(new TextEncoder().encode(body));
-      controller.close();
-    },
-  });
-  return new Response(stream, { status: 200 });
-}
-
-interface LlmServiceLike {
-  getByName: (name: string) => Promise<LlmView>;
-  resolveApiKey: (name: string) => Promise<string>;
-}
-
-async function setupApp(
-  llmService: LlmServiceLike,
-  adapters: LlmAdapterRegistry,
-  onInferenceEvent?: Parameters<typeof registerLlmInferRoutes>[1]['onInferenceEvent'],
-): Promise<FastifyInstance> {
-  app = Fastify({ logger: false });
-  app.setErrorHandler(errorHandler);
-  const deps: Parameters<typeof registerLlmInferRoutes>[1] = {
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    llmService: llmService as any,
-    adapters,
-  };
-  if (onInferenceEvent !== undefined) deps.onInferenceEvent = onInferenceEvent;
-  registerLlmInferRoutes(app, deps);
-  await app.ready();
-  return app;
-}
-
-describe('POST /api/v1/llms/:name/infer', () => {
-  it('returns 404 when the Llm does not exist', async () => {
-    const svc: LlmServiceLike = {
-      getByName: async () => { throw new NotFoundError('Llm not found: missing'); },
-      resolveApiKey: async () => '',
-    };
-    await setupApp(svc, new LlmAdapterRegistry());
-    const res = await app.inject({
-      method: 'POST',
-      url: '/api/v1/llms/missing/infer',
-      payload: { messages: [{ role: 'user', content: 'hi' }] },
-    });
-    expect(res.statusCode).toBe(404);
-  });
-
-  it('returns 400 when messages is missing', async () => {
-    const svc: LlmServiceLike = {
-      getByName: async () => makeLlmView({ apiKeyRef: null }),
-      resolveApiKey: async () => '',
-    };
-    await setupApp(svc, new LlmAdapterRegistry());
-    const res = await app.inject({
-      method: 'POST',
-      url: '/api/v1/llms/claude/infer',
-      payload: {},
-    });
-    expect(res.statusCode).toBe(400);
-  });
-
-  it('dispatches non-streaming to the adapter and returns its JSON', async () => {
-    const fetchFn = vi.fn(async () => new Response(JSON.stringify({
-      id: 'msg_1', model: 'claude-3-5-sonnet-20241022', role: 'assistant',
-      content: [{ type: 'text', text: 'hello' }],
-      stop_reason: 'end_turn',
-      usage: { input_tokens: 1, output_tokens: 1 },
-    }), { status: 200 }));
-    const adapters = new LlmAdapterRegistry({ fetch: fetchFn as unknown as typeof fetch });
-    const svc: LlmServiceLike = {
-      getByName: async () => makeLlmView(),
-      resolveApiKey: async () => 'sk-ant-xyz',
-    };
-    const events: unknown[] = [];
-    await setupApp(svc, adapters, (e) => events.push(e));
-
-    const res = await app.inject({
-      method: 'POST',
-      url: '/api/v1/llms/claude/infer',
-      payload: { messages: [{ role: 'user', content: 'hi' }] },
-    });
-    expect(res.statusCode).toBe(200);
-    const body = res.json<{ choices: Array<{ message: { content: string } }> }>();
-    expect(body.choices[0]!.message.content).toBe('hello');
-
-    // Audit event emitted
-    expect(events).toHaveLength(1);
-    expect((events[0] as { kind: string; llmName: string; status: number }).kind).toBe('llm_inference_call');
-    expect((events[0] as { llmName: string }).llmName).toBe('claude');
-    expect((events[0] as { streaming: boolean }).streaming).toBe(false);
-    expect((events[0] as { status: number }).status).toBe(200);
-  });
-
-  it('500s when apiKey resolution fails', async () => {
-    const adapters = new LlmAdapterRegistry();
-    const svc: LlmServiceLike = {
-      getByName: async () => makeLlmView(),
-      resolveApiKey: async () => { throw new Error('secret not found'); },
-    };
-    await setupApp(svc, adapters);
-
-    const res = await app.inject({
-      method: 'POST',
-      url: '/api/v1/llms/claude/infer',
-      payload: { messages: [{ role: 'user', content: 'hi' }] },
-    });
-    expect(res.statusCode).toBe(500);
-    expect(res.json<{ error: string }>().error).toMatch(/secret not found/);
-  });
-
-  it('skips apiKey resolution when the Llm has no apiKeyRef', async () => {
-    const fetchFn = vi.fn(async () => new Response(JSON.stringify({ id: 'x', choices: [] }), { status: 200 }));
-    const adapters = new LlmAdapterRegistry({ fetch: fetchFn as unknown as typeof fetch });
-    const resolveSpy = vi.fn();
-    const svc: LlmServiceLike = {
-      getByName: async () => makeLlmView({ type: 'ollama', url: 'http://ollama:11434', apiKeyRef: null }),
-      resolveApiKey: resolveSpy as unknown as LlmServiceLike['resolveApiKey'],
-    };
-    await setupApp(svc, adapters);
-
-    const res = await app.inject({
-      method: 'POST',
-      url: '/api/v1/llms/ollama-local/infer',
-      payload: { messages: [{ role: 'user', content: 'hi' }] },
-    });
-    expect(res.statusCode).toBe(200);
-    expect(resolveSpy).not.toHaveBeenCalled();
-  });
-
-  it('streams SSE chunks for stream: true', async () => {
-    const fetchFn = vi.fn(async () => sseResponse([
-      'event: content_block_delta\ndata: {"type":"content_block_delta","delta":{"type":"text_delta","text":"hi"}}',
-      'event: message_stop\ndata: {"type":"message_stop"}',
-    ]));
-    const adapters = new LlmAdapterRegistry({ fetch: fetchFn as unknown as typeof fetch });
-    const svc: LlmServiceLike = {
-      getByName: async () => makeLlmView(),
-      resolveApiKey: async () => 'sk-ant-xyz',
-    };
-    const events: Array<{ streaming: boolean; status: number }> = [];
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    await setupApp(svc, adapters, ((e: any) => events.push(e)) as any);
-
-    const res = await app.inject({
-      method: 'POST',
-      url: '/api/v1/llms/claude/infer',
-      payload: { messages: [{ role: 'user', content: 'hi' }], stream: true },
-    });
-    expect(res.statusCode).toBe(200);
-    expect(res.body).toContain('data:');
-    expect(res.body).toContain('[DONE]');
-    expect(events).toHaveLength(1);
-    expect(events[0]!.streaming).toBe(true);
-  });
-
-  it('502s on adapter errors (non-streaming)', async () => {
-    const fetchFn = vi.fn(async () => { throw new Error('upstream down'); });
-    const adapters = new LlmAdapterRegistry({ fetch: fetchFn as unknown as typeof fetch });
-    const svc: LlmServiceLike = {
-      getByName: async () => makeLlmView({ type: 'openai', url: 'http://example', apiKeyRef: null }),
-      resolveApiKey: async () => '',
-    };
-    await setupApp(svc, adapters);
-
-    const res = await app.inject({
-      method: 'POST',
-      url: '/api/v1/llms/x/infer',
-      payload: { messages: [{ role: 'user', content: 'hi' }] },
-    });
-    expect(res.statusCode).toBe(502);
-    expect(res.json<{ error: string }>().error).toMatch(/upstream down/);
-  });
-});
--- a/src/mcpd/tests/llm-routes.test.ts
+++ b/src/mcpd/tests/llm-routes.test.ts
@@ -104,25 +104,6 @@ describe('Llm Routes', () => {
    expect(res.statusCode).toBe(404);
  });

-  it('GET /api/v1/llms/:nameOrId resolves by human name when not a CUID', async () => {
-    await createApp(mockRepo([makeLlm({ id: 'llm-1', name: 'claude' })]));
-    const res = await app.inject({ method: 'GET', url: '/api/v1/llms/claude' });
-    expect(res.statusCode).toBe(200);
-    expect(res.json<{ name: string; id: string }>().name).toBe('claude');
-  });
-
-  it('HEAD /api/v1/llms/:name returns 200 for an existing Llm (failover RBAC pre-check)', async () => {
-    await createApp(mockRepo([makeLlm({ name: 'claude' })]));
-    const res = await app.inject({ method: 'HEAD', url: '/api/v1/llms/claude' });
-    expect(res.statusCode).toBe(200);
-  });
-
-  it('HEAD /api/v1/llms/:name returns 404 for a missing Llm', async () => {
-    await createApp(mockRepo());
-    const res = await app.inject({ method: 'HEAD', url: '/api/v1/llms/missing' });
-    expect(res.statusCode).toBe(404);
-  });
-
  it('POST /api/v1/llms creates and returns 201', async () => {
    await createApp(mockRepo());
    const res = await app.inject({
--- a/src/mcplocal/src/discovery.ts
+++ b/src/mcplocal/src/discovery.ts
@@ -57,16 +57,9 @@ export async function refreshProjectUpstreams(

 /**
 * Fetch a project's LLM config (llmProvider, llmModel) from mcpd.
- *
- * Phase 4 redefines `llmProvider` semantically: it names a centralized `Llm`
- * resource (see `mcpctl get llms`) — NOT a local provider. Consumers should
- * resolve it through mcpd's inference proxy when reachable. The field remains
- * a free-form string on the wire for backward compatibility; local overrides
- * in `~/.mcpctl/config.json` still take priority, and unknown names fall
- * through to the registry default.
+ * These are the project-level "recommendations" — local overrides take priority.
 */
 export interface ProjectLlmConfig {
-  /** Name of an `Llm` resource on mcpd, or 'none' to disable LLM features. */
  llmProvider?: string;
  llmModel?: string;
  proxyModel?: string;
@@ -74,31 +67,6 @@ export interface ProjectLlmConfig {
  serverOverrides?: Record<string, { proxyModel?: string }>;
 }

-/**
- * Resolve a project's `llmProvider` against mcpd's Llm registry. Returns:
- *   - 'registered'  — an Llm with this name exists
- *   - 'disabled'    — value is 'none'
- *   - 'unregistered'— no Llm matches (consumer should fall back to registry default)
- *   - 'unreachable' — mcpd couldn't be queried
- */
-export type LlmReferenceStatus = 'registered' | 'disabled' | 'unregistered' | 'unreachable';
-
-export async function resolveProjectLlmReference(
-  mcpdClient: McpdClient,
-  llmProvider: string | undefined,
-): Promise<LlmReferenceStatus> {
-  if (llmProvider === undefined || llmProvider === '') return 'unregistered';
-  if (llmProvider === 'none') return 'disabled';
-  try {
-    await mcpdClient.get(`/api/v1/llms/${encodeURIComponent(llmProvider)}`);
-    return 'registered';
-  } catch (err) {
-    const msg = err instanceof Error ? err.message : String(err);
-    if (msg.includes('404') || msg.toLowerCase().includes('not found')) return 'unregistered';
-    return 'unreachable';
-  }
-}
-
 export async function fetchProjectLlmConfig(
  mcpdClient: McpdClient,
  projectName: string,
--- a/src/mcplocal/src/http/config.ts
+++ b/src/mcplocal/src/http/config.ts
@@ -64,14 +64,6 @@ export interface LlmProviderFileEntry {
  idleTimeoutMinutes?: number;
  /** vllm-managed: extra args for `vllm serve` */
  extraArgs?: string[];
-  /**
-   * If set, this local provider is allowed to substitute for the centralized
-   * Llm of this name when the mcpd inference proxy is unreachable.
-   * RBAC is still enforced — the caller must have view permission on the
-   * named Llm via mcpd before failover is permitted (fail-closed if mcpd
-   * itself can't be reached).
-   */
-  failoverFor?: string;
 }

 export interface ProjectLlmOverride {
--- a/src/mcplocal/src/http/project-mcp-endpoint.ts
+++ b/src/mcplocal/src/http/project-mcp-endpoint.ts
@@ -101,16 +101,7 @@ export function registerProjectMcpEndpoint(app: FastifyInstance, mcpdClient: Mcp
      complete: async () => '',
      available: () => false,
    };
-    // Build cache namespace: provider--model--proxymodel.
-    // Resolution order:
-    //   1. local ~/.mcpctl override
-    //   2. mcpdConfig.llmProvider (Phase 4: name of a centralized Llm)
-    //   3. local registry default (fast tier → active provider)
-    //   4. literal 'none'
-    // If (2) names an Llm the HTTP-mode proxy-model pipeline can route
-    // through mcpd's /api/v1/llms/:name/infer (pivot lands when the client
-    // integrates that path); meanwhile the value is still usable as a cache
-    // key, and the describe-project warning flags stale configs.
+    // Build cache namespace: provider--model--proxymodel
    const llmProvider = localOverride?.provider ?? mcpdConfig.llmProvider
      ?? effectiveRegistry?.getTierProviders('fast')[0]
      ?? effectiveRegistry?.getActiveName()
--- a/src/mcplocal/src/llm-config.ts
+++ b/src/mcplocal/src/llm-config.ts
@@ -173,9 +173,6 @@ export async function createProvidersFromConfig(
    if (entry.tier) {
      registry.assignTier(provider.name, entry.tier);
    }
-    if (entry.failoverFor) {
-      registry.registerFailover(entry.failoverFor, provider.name);
-    }
  }

  return registry;
--- a/src/mcplocal/src/providers/failover-router.ts
+++ b/src/mcplocal/src/providers/failover-router.ts
@@ -1,107 +0,0 @@
-/**
- * FailoverRouter — orchestrates "try mcpd's centralized Llm, fall back to a
- * local provider when authorized" for clients that consume the inference
- * proxy.
- *
- * Decision flow on a centralized inference call:
- *
- *   1. Call the primary (the supplied `primary` callback, typically an HTTP
- *      POST to mcpd /api/v1/llms/:name/infer).
- *   2. If that succeeds → done.
- *   3. If it fails AND a local provider is registered as failover for this
- *      Llm name → call mcpd /api/v1/llms/:name (RBAC-gated) to verify the
- *      caller still has permission to view this Llm. mcpd unreachable →
- *      fail-closed (re-throw the original error). 403 → fail-closed.
- *   4. 200 → invoke the local provider's `complete()` and tag the result
- *      as `failover: true` for client-side audit.
- *
- * The check call uses HEAD to avoid pulling the Llm body (and any
- * description / extraConfig) over the wire — mcpd treats both methods the
- * same in the RBAC hook because the URL maps to the same permission.
- */
-import type { LlmProvider } from './types.js';
-import type { ProviderRegistry } from './registry.js';
-
-export interface FailoverDecision<T> {
-  result: T;
-  failover: boolean;
-  /** Name of the local provider used (only set when failover === true). */
-  via?: string;
-}
-
-export interface FailoverRouterDeps {
-  /** Injected fetch for the RBAC pre-check. Tests mock this. */
-  fetch?: typeof globalThis.fetch;
-  /** mcpd base URL (no trailing slash). */
-  mcpdUrl: string;
-  /** Bearer token to attach to the RBAC pre-check call. */
-  bearerToken?: string;
-}
-
-/** Outcome of the RBAC pre-check. Used internally + exposed for tests. */
-export type AuthCheckOutcome = 'allowed' | 'forbidden' | 'unreachable';
-
-export class FailoverRouter {
-  private readonly fetchImpl: typeof globalThis.fetch;
-  private readonly mcpdUrl: string;
-  private readonly bearer: string | undefined;
-
-  constructor(
-    private readonly registry: ProviderRegistry,
-    deps: FailoverRouterDeps,
-  ) {
-    this.fetchImpl = deps.fetch ?? globalThis.fetch;
-    this.mcpdUrl = deps.mcpdUrl.replace(/\/+$/, '');
-    if (deps.bearerToken !== undefined) this.bearer = deps.bearerToken;
-  }
-
-  /**
-   * Run a primary inference attempt; on failure, fall back to the local
-   * provider if one is registered for this Llm AND the caller still has
-   * `view:llms:<llmName>` on mcpd.
-   *
-   * `primary` should reject (throw) when mcpd's proxy is unreachable or
-   * returns a 5xx — that's the signal to consider failover. 4xx errors that
-   * indicate a bad request are surfaced as-is; the router only retries on
-   * primary failure shapes that look like an upstream/network issue.
-   */
-  async run<T>(
-    llmName: string,
-    primary: () => Promise<T>,
-    localCall: (provider: LlmProvider) => Promise<T>,
-  ): Promise<FailoverDecision<T>> {
-    try {
-      const result = await primary();
-      return { result, failover: false };
-    } catch (primaryErr) {
-      const local = this.registry.getFailoverFor(llmName);
-      if (local === null) throw primaryErr;
-
-      const auth = await this.checkAuth(llmName);
-      if (auth !== 'allowed') {
-        // Fail-closed for forbidden AND unreachable.
-        throw primaryErr;
-      }
-
-      const result = await localCall(local);
-      return { result, failover: true, via: local.name };
-    }
-  }
-
-  /** RBAC pre-check exposed for tests / status-display callers. */
-  async checkAuth(llmName: string): Promise<AuthCheckOutcome> {
-    const url = `${this.mcpdUrl}/api/v1/llms/${encodeURIComponent(llmName)}`;
-    const headers: Record<string, string> = {};
-    if (this.bearer !== undefined) headers['Authorization'] = `Bearer ${this.bearer}`;
-    let res: Response;
-    try {
-      res = await this.fetchImpl(url, { method: 'HEAD', headers });
-    } catch {
-      return 'unreachable';
-    }
-    if (res.status === 200 || res.status === 204) return 'allowed';
-    if (res.status === 403 || res.status === 401) return 'forbidden';
-    // Anything else (404, 500…) — treat as unreachable for the failover flow.
-    return 'unreachable';
-  }
-}
--- a/src/mcplocal/src/providers/registry.ts
+++ b/src/mcplocal/src/providers/registry.ts
@@ -8,8 +8,6 @@ export class ProviderRegistry {
  private providers = new Map<string, LlmProvider>();
  private activeProvider: string | null = null;
  private tierProviders = new Map<Tier, string[]>();
-  /** Maps a centralized Llm name → local provider name that can substitute when mcpd is unreachable. */
-  private failoverMap = new Map<string, string>();

  register(provider: LlmProvider): void {
    this.providers.set(provider.name, provider);
@@ -33,30 +31,6 @@ export class ProviderRegistry {
        this.tierProviders.set(tier, filtered);
      }
    }
-    // Remove from failover map (any entry whose local-provider value points at this name)
-    for (const [centralName, localName] of this.failoverMap) {
-      if (localName === name) this.failoverMap.delete(centralName);
-    }
-  }
-
-  /** Mark `localProviderName` as the failover for the centralized Llm named `centralLlmName`. */
-  registerFailover(centralLlmName: string, localProviderName: string): void {
-    if (!this.providers.has(localProviderName)) {
-      throw new Error(`Provider '${localProviderName}' is not registered`);
-    }
-    this.failoverMap.set(centralLlmName, localProviderName);
-  }
-
-  /** Look up the local provider that can substitute for a centralized Llm, if any. */
-  getFailoverFor(centralLlmName: string): LlmProvider | null {
-    const localName = this.failoverMap.get(centralLlmName);
-    if (localName === undefined) return null;
-    return this.providers.get(localName) ?? null;
-  }
-
-  /** Names of central Llms that have a local failover registered. Used in status output. */
-  listFailovers(): Array<{ centralLlmName: string; localProviderName: string }> {
-    return [...this.failoverMap.entries()].map(([centralLlmName, localProviderName]) => ({ centralLlmName, localProviderName }));
  }

  setActive(name: string): void {
--- a/src/mcplocal/tests/failover-router.test.ts
+++ b/src/mcplocal/tests/failover-router.test.ts
@@ -1,170 +0,0 @@
-import { describe, it, expect, vi } from 'vitest';
-import { ProviderRegistry } from '../src/providers/registry.js';
-import { FailoverRouter } from '../src/providers/failover-router.js';
-import type { LlmProvider, CompleteResponse } from '../src/providers/types.js';
-
-function fakeProvider(name: string): LlmProvider {
-  const completeFn = vi.fn(async (): Promise<CompleteResponse> => ({
-    content: 'local response',
-    finishReason: 'stop',
-  }));
-  return {
-    name,
-    complete: completeFn,
-    listModels: vi.fn(async () => [name]),
-    isAvailable: vi.fn(async () => true),
-  };
-}
-
-function makeFetch(behaviour: { method: string; status?: number; throw?: boolean }): ReturnType<typeof vi.fn> {
-  return vi.fn(async (url: string | URL, init?: RequestInit) => {
-    if (behaviour.throw === true) throw new Error('connection refused');
-    expect(init?.method).toBe(behaviour.method);
-    expect(String(url)).toMatch(/\/api\/v1\/llms\//);
-    return new Response(null, { status: behaviour.status ?? 200 });
-  });
-}
-
-describe('ProviderRegistry — failover map', () => {
-  it('registerFailover maps a central name → local provider name', () => {
-    const reg = new ProviderRegistry();
-    const local = fakeProvider('vllm-local');
-    reg.register(local);
-    reg.registerFailover('claude', 'vllm-local');
-
-    const found = reg.getFailoverFor('claude');
-    expect(found?.name).toBe('vllm-local');
-  });
-
-  it('getFailoverFor returns null when no map entry exists', () => {
-    const reg = new ProviderRegistry();
-    reg.register(fakeProvider('vllm-local'));
-    expect(reg.getFailoverFor('claude')).toBeNull();
-  });
-
-  it('registerFailover throws when local provider is not registered', () => {
-    const reg = new ProviderRegistry();
-    expect(() => reg.registerFailover('claude', 'missing')).toThrow(/not registered/);
-  });
-
-  it('unregister removes failover entries that pointed at the removed provider', () => {
-    const reg = new ProviderRegistry();
-    reg.register(fakeProvider('vllm-local'));
-    reg.registerFailover('claude', 'vllm-local');
-    reg.unregister('vllm-local');
-    expect(reg.getFailoverFor('claude')).toBeNull();
-    expect(reg.listFailovers()).toEqual([]);
-  });
-
-  it('listFailovers reports the current map', () => {
-    const reg = new ProviderRegistry();
-    reg.register(fakeProvider('vllm-local'));
-    reg.registerFailover('claude', 'vllm-local');
-    reg.registerFailover('opus', 'vllm-local');
-    expect(reg.listFailovers()).toEqual([
-      { centralLlmName: 'claude', localProviderName: 'vllm-local' },
-      { centralLlmName: 'opus', localProviderName: 'vllm-local' },
-    ]);
-  });
-});
-
-describe('FailoverRouter', () => {
-  it('returns primary result when primary succeeds', async () => {
-    const reg = new ProviderRegistry();
-    reg.register(fakeProvider('vllm-local'));
-    reg.registerFailover('claude', 'vllm-local');
-
-    const router = new FailoverRouter(reg, {
-      mcpdUrl: 'http://mcpd',
-      fetch: vi.fn() as unknown as typeof fetch,
-    });
-    const out = await router.run('claude', async () => 'central', async () => 'local');
-    expect(out.failover).toBe(false);
-    expect(out.result).toBe('central');
-  });
-
-  it('falls back to local when primary fails AND mcpd auth-checks 200', async () => {
-    const reg = new ProviderRegistry();
-    reg.register(fakeProvider('vllm-local'));
-    reg.registerFailover('claude', 'vllm-local');
-
-    const fetchFn = makeFetch({ method: 'HEAD', status: 200 });
-    const router = new FailoverRouter(reg, {
-      mcpdUrl: 'http://mcpd',
-      fetch: fetchFn as unknown as typeof fetch,
-      bearerToken: 'bearer-x',
-    });
-    const out = await router.run(
-      'claude',
-      async () => { throw new Error('upstream down'); },
-      async (provider) => `via:${provider.name}`,
-    );
-    expect(out.failover).toBe(true);
-    expect(out.via).toBe('vllm-local');
-    expect(out.result).toBe('via:vllm-local');
-
-    // Bearer was attached
-    const [, init] = fetchFn.mock.calls[0] as [string, RequestInit];
-    expect((init.headers as Record<string, string>)['Authorization']).toBe('Bearer bearer-x');
-  });
-
-  it('re-throws primary error when no local failover is registered', async () => {
-    const reg = new ProviderRegistry();
-    const router = new FailoverRouter(reg, {
-      mcpdUrl: 'http://mcpd',
-      fetch: vi.fn() as unknown as typeof fetch,
-    });
-    await expect(router.run(
-      'claude',
-      async () => { throw new Error('boom'); },
-      async () => 'never',
-    )).rejects.toThrow('boom');
-  });
-
-  it('re-throws (fail-closed) when mcpd returns 403 to the auth check', async () => {
-    const reg = new ProviderRegistry();
-    reg.register(fakeProvider('vllm-local'));
-    reg.registerFailover('claude', 'vllm-local');
-
-    const router = new FailoverRouter(reg, {
-      mcpdUrl: 'http://mcpd',
-      fetch: makeFetch({ method: 'HEAD', status: 403 }) as unknown as typeof fetch,
-    });
-    await expect(router.run(
-      'claude',
-      async () => { throw new Error('upstream down'); },
-      async () => 'never',
-    )).rejects.toThrow('upstream down');
-  });
-
-  it('re-throws (fail-closed) when mcpd itself is unreachable for the auth check', async () => {
-    const reg = new ProviderRegistry();
-    reg.register(fakeProvider('vllm-local'));
-    reg.registerFailover('claude', 'vllm-local');
-
-    const router = new FailoverRouter(reg, {
-      mcpdUrl: 'http://mcpd',
-      fetch: makeFetch({ method: 'HEAD', throw: true }) as unknown as typeof fetch,
-    });
-    await expect(router.run(
-      'claude',
-      async () => { throw new Error('upstream down'); },
-      async () => 'never',
-    )).rejects.toThrow('upstream down');
-  });
-
-  it('checkAuth maps responses correctly', async () => {
-    const reg = new ProviderRegistry();
-    const make = (status: number) => new FailoverRouter(reg, {
-      mcpdUrl: 'http://mcpd',
-      fetch: (async () => new Response(null, { status })) as unknown as typeof fetch,
-    });
-
-    expect(await make(200).checkAuth('claude')).toBe('allowed');
-    expect(await make(204).checkAuth('claude')).toBe('allowed');
-    expect(await make(401).checkAuth('claude')).toBe('forbidden');
-    expect(await make(403).checkAuth('claude')).toBe('forbidden');
-    expect(await make(404).checkAuth('claude')).toBe('unreachable');
-    expect(await make(500).checkAuth('claude')).toBe('unreachable');
-  });
-});
--- a/src/mcplocal/tests/llm-reference-resolver.test.ts
+++ b/src/mcplocal/tests/llm-reference-resolver.test.ts
@@ -1,45 +0,0 @@
-import { describe, it, expect, vi } from 'vitest';
-import { resolveProjectLlmReference } from '../src/discovery.js';
-import type { McpdClient } from '../src/http/mcpd-client.js';
-
-function mockClient(get: (path: string) => Promise<unknown>): McpdClient {
-  return { get } as unknown as McpdClient;
-}
-
-describe('resolveProjectLlmReference', () => {
-  it('returns "disabled" for the literal string "none"', async () => {
-    const client = mockClient(async () => { throw new Error('should not be called'); });
-    expect(await resolveProjectLlmReference(client, 'none')).toBe('disabled');
-  });
-
-  it('returns "unregistered" when llmProvider is empty or undefined', async () => {
-    const client = mockClient(async () => { throw new Error('should not be called'); });
-    expect(await resolveProjectLlmReference(client, undefined)).toBe('unregistered');
-    expect(await resolveProjectLlmReference(client, '')).toBe('unregistered');
-  });
-
-  it('returns "registered" when mcpd returns 200 for the name', async () => {
-    const get = vi.fn(async () => ({ name: 'claude' }));
-    expect(await resolveProjectLlmReference(mockClient(get), 'claude')).toBe('registered');
-    expect(get).toHaveBeenCalledWith('/api/v1/llms/claude');
-  });
-
-  it('returns "unregistered" on 404', async () => {
-    const client = mockClient(async () => { throw new Error('HTTP 404 not found'); });
-    expect(await resolveProjectLlmReference(client, 'missing')).toBe('unregistered');
-  });
-
-  it('returns "unreachable" on other errors (500, network)', async () => {
-    const client = mockClient(async () => { throw new Error('HTTP 500 internal error'); });
-    expect(await resolveProjectLlmReference(client, 'x')).toBe('unreachable');
-
-    const client2 = mockClient(async () => { throw new Error('ECONNREFUSED'); });
-    expect(await resolveProjectLlmReference(client2, 'x')).toBe('unreachable');
-  });
-
-  it('URL-encodes names with special characters', async () => {
-    const get = vi.fn(async () => ({}));
-    await resolveProjectLlmReference(mockClient(get), 'weird name/with/slashes');
-    expect(get).toHaveBeenCalledWith('/api/v1/llms/weird%20name%2Fwith%2Fslashes');
-  });
-});
--- a/src/mcplocal/tests/smoke/llm-infer.smoke.test.ts
+++ b/src/mcplocal/tests/smoke/llm-infer.smoke.test.ts
@@ -1,214 +0,0 @@
-/**
- * Smoke tests: `POST /api/v1/llms/:name/infer` against live mcpd.
- *
- * Validates the Phase 2 inference proxy path without needing a real provider
- * key. We exercise the error-shape guarantees:
- *   1. Missing Llm → 404.
- *   2. Existing Llm + empty body → 400.
- *   3. Existing Llm pointed at an unreachable URL → 502 with an error body.
- *   4. RBAC: non-admin calling infer without `run:llms:<name>` → 403 (skipped
- *      if we can't mint a scoped McpToken in this environment).
- *
- * The happy-path test needs a real provider, so we skip it by default and
- * gate on LLM_INFER_SMOKE_REAL=1 + a working Llm name supplied via
- * LLM_INFER_SMOKE_LLM.
- */
-import { describe, it, expect, beforeAll, afterAll } from 'vitest';
-import http from 'node:http';
-import https from 'node:https';
-import { execSync } from 'node:child_process';
-
-const MCPD_URL = process.env.MCPD_URL ?? 'https://mcpctl.ad.itaz.eu';
-const SUFFIX = Date.now().toString(36);
-const SECRET_NAME = `smoke-infer-sec-${SUFFIX}`;
-const LLM_NAME = `smoke-infer-${SUFFIX}`;
-
-interface CliResult { code: number; stdout: string; stderr: string }
-
-function run(args: string): CliResult {
-  try {
-    const stdout = execSync(`mcpctl --direct ${args}`, {
-      encoding: 'utf-8',
-      timeout: 30_000,
-      stdio: ['ignore', 'pipe', 'pipe'],
-    });
-    return { code: 0, stdout: stdout.trim(), stderr: '' };
-  } catch (err) {
-    const e = err as { status?: number; stdout?: Buffer | string; stderr?: Buffer | string };
-    return {
-      code: e.status ?? 1,
-      stdout: e.stdout ? (typeof e.stdout === 'string' ? e.stdout : e.stdout.toString('utf-8')) : '',
-      stderr: e.stderr ? (typeof e.stderr === 'string' ? e.stderr : e.stderr.toString('utf-8')) : '',
-    };
-  }
-}
-
-function healthz(url: string, timeoutMs = 5000): Promise<boolean> {
-  return new Promise((resolve) => {
-    const parsed = new URL(`${url.replace(/\/$/, '')}/healthz`);
-    const driver = parsed.protocol === 'https:' ? https : http;
-    const req = driver.get(
-      {
-        hostname: parsed.hostname,
-        port: parsed.port || (parsed.protocol === 'https:' ? 443 : 80),
-        path: parsed.pathname,
-        timeout: timeoutMs,
-      },
-      (res) => { resolve((res.statusCode ?? 500) < 500); res.resume(); },
-    );
-    req.on('error', () => resolve(false));
-    req.on('timeout', () => { req.destroy(); resolve(false); });
-  });
-}
-
-/** Look up the current session bearer so we can POST /infer directly. */
-function getBearer(): string | undefined {
-  // Try ~/.mcpctl/credentials.json via the CLI — `mcpctl config get` knows where it lives.
-  // If that shape changes, fall back to MCPCTL_TOKEN env.
-  const envToken = process.env.MCPCTL_TOKEN;
-  if (envToken !== undefined && envToken !== '') return envToken;
-  try {
-    // shape: { "session": { "token": "..." } } or similar — be defensive.
-    const out = execSync('cat ~/.mcpctl/credentials.json 2>/dev/null', { encoding: 'utf-8' });
-    const parsed = JSON.parse(out) as Record<string, unknown>;
-    const token = (parsed.token ?? (parsed.session as { token?: string } | undefined)?.token);
-    return typeof token === 'string' ? token : undefined;
-  } catch {
-    return undefined;
-  }
-}
-
-async function post(
-  path: string,
-  body: unknown,
-  bearer?: string,
-): Promise<{ status: number; body: unknown }> {
-  const url = new URL(`${MCPD_URL.replace(/\/$/, '')}${path}`);
-  const driver = url.protocol === 'https:' ? https : http;
-  const payload = JSON.stringify(body);
-  const headers: Record<string, string> = {
-    'Content-Type': 'application/json',
-    'Content-Length': Buffer.byteLength(payload).toString(),
-  };
-  if (bearer !== undefined) headers['Authorization'] = `Bearer ${bearer}`;
-
-  return new Promise((resolve, reject) => {
-    const req = driver.request(
-      {
-        hostname: url.hostname,
-        port: url.port || (url.protocol === 'https:' ? 443 : 80),
-        path: url.pathname + url.search,
-        method: 'POST',
-        headers,
-        timeout: 15_000,
-      },
-      (res) => {
-        const chunks: Buffer[] = [];
-        res.on('data', (c: Buffer) => chunks.push(c));
-        res.on('end', () => {
-          const raw = Buffer.concat(chunks).toString('utf-8');
-          let parsed: unknown = raw;
-          try { parsed = JSON.parse(raw); } catch { /* leave as string */ }
-          resolve({ status: res.statusCode ?? 0, body: parsed });
-        });
-      },
-    );
-    req.on('error', reject);
-    req.on('timeout', () => { req.destroy(); reject(new Error('request timed out')); });
-    req.write(payload);
-    req.end();
-  });
-}
-
-let mcpdUp = false;
-let bearer: string | undefined;
-
-describe('llm-infer smoke', () => {
-  beforeAll(async () => {
-    mcpdUp = await healthz(MCPD_URL);
-    if (!mcpdUp) {
-      // eslint-disable-next-line no-console
-      console.warn(`\n  ○ llm-infer smoke: skipped — ${MCPD_URL}/healthz unreachable.\n`);
-      return;
-    }
-    bearer = getBearer();
-    if (bearer === undefined) {
-      // eslint-disable-next-line no-console
-      console.warn('\n  ○ llm-infer smoke: no bearer available (set MCPCTL_TOKEN or login). Direct POST tests will skip.\n');
-    }
-  }, 20_000);
-
-  afterAll(() => {
-    if (!mcpdUp) return;
-    run(`delete llm ${LLM_NAME}`);
-    run(`delete secret ${SECRET_NAME}`);
-  });
-
-  it('creates a fixture secret + Llm pointed at an unreachable URL', () => {
-    if (!mcpdUp) return;
-    run(`delete llm ${LLM_NAME}`);
-    run(`delete secret ${SECRET_NAME}`);
-
-    expect(run(`create secret ${SECRET_NAME} --data token=sk-fake`).code).toBe(0);
-    const createLlm = run([
-      `create llm ${LLM_NAME}`,
-      '--type openai',
-      '--model gpt-4o-mini',
-      // Unroutable host so any actual upstream call returns an adapter error → 502
-      '--url http://127.0.0.1:1',
-      `--api-key-ref ${SECRET_NAME}/token`,
-    ].join(' '));
-    expect(createLlm.code, createLlm.stderr || createLlm.stdout).toBe(0);
-  });
-
-  it('returns 404 for an unknown Llm name', async () => {
-    if (!mcpdUp || bearer === undefined) return;
-    const res = await post('/api/v1/llms/__nonexistent_llm__/infer',
-      { messages: [{ role: 'user', content: 'hi' }] }, bearer);
-    expect(res.status).toBe(404);
-  });
-
-  it('returns 400 when messages is missing', async () => {
-    if (!mcpdUp || bearer === undefined) return;
-    const res = await post(`/api/v1/llms/${LLM_NAME}/infer`, {}, bearer);
-    expect(res.status).toBe(400);
-    const body = res.body as { error?: string };
-    expect(body.error ?? '').toMatch(/messages/i);
-  });
-
-  it('returns 502 when the upstream provider is unreachable', async () => {
-    if (!mcpdUp || bearer === undefined) return;
-    const res = await post(`/api/v1/llms/${LLM_NAME}/infer`,
-      { messages: [{ role: 'user', content: 'hi' }] }, bearer);
-    // 502 is what the proxy returns on adapter errors; some paths may return
-    // the upstream's own status if the request reached it, so accept any
-    // non-2xx with an error body.
-    expect(res.status).toBeGreaterThanOrEqual(400);
-    expect(res.status).not.toBe(404);
-    expect(res.status).not.toBe(400);
-    const body = res.body as { error?: string | { message?: string } };
-    const msg = typeof body.error === 'string' ? body.error : body.error?.message ?? '';
-    expect(msg, 'error body must describe the failure').not.toBe('');
-  }, 30_000);
-
-  it('happy-path inference (opt-in: LLM_INFER_SMOKE_REAL=1 + LLM_INFER_SMOKE_LLM=<name>)', async () => {
-    if (!mcpdUp || bearer === undefined) return;
-    if (process.env.LLM_INFER_SMOKE_REAL !== '1') {
-      // eslint-disable-next-line no-console
-      console.warn('    ○ happy-path skipped — set LLM_INFER_SMOKE_REAL=1 and LLM_INFER_SMOKE_LLM=<name> of a working Llm.');
-      return;
-    }
-    const name = process.env.LLM_INFER_SMOKE_LLM;
-    if (name === undefined || name === '') {
-      throw new Error('LLM_INFER_SMOKE_LLM must be set when LLM_INFER_SMOKE_REAL=1');
-    }
-    const res = await post(`/api/v1/llms/${name}/infer`, {
-      messages: [{ role: 'user', content: 'Say "smoke-ok" and nothing else.' }],
-      max_tokens: 8,
-    }, bearer);
-    expect(res.status).toBe(200);
-    const body = res.body as { choices?: Array<{ message?: { content?: string } }> };
-    const content = body.choices?.[0]?.message?.content ?? '';
-    expect(content).toMatch(/smoke-ok/i);
-  }, 60_000);
-});
--- a/src/mcplocal/tests/smoke/llm.smoke.test.ts
+++ b/src/mcplocal/tests/smoke/llm.smoke.test.ts
@@ -1,162 +0,0 @@
-/**
- * Smoke tests: Llm resource CRUD + apiKeyRef linkage against live mcpd.
- *
- * Exercises the Phase 1 CLI contract end-to-end:
- *   1. Create a secret carrying a fake API key.
- *   2. `mcpctl create llm` referencing that secret via --api-key-ref.
- *   3. `mcpctl describe llm` shows type/model/tier + the secret ref.
- *   4. `mcpctl get llms -o yaml` round-trips cleanly into `apply -f`.
- *   5. Delete llm + secret.
- *
- * Inference itself is covered in llm-infer.smoke.test.ts — this file is
- * purely about the registry.
- */
-import { describe, it, expect, beforeAll, afterAll } from 'vitest';
-import http from 'node:http';
-import https from 'node:https';
-import { execSync } from 'node:child_process';
-import { writeFileSync, unlinkSync, mkdtempSync } from 'node:fs';
-import { join } from 'node:path';
-import { tmpdir } from 'node:os';
-
-const MCPD_URL = process.env.MCPD_URL ?? 'https://mcpctl.ad.itaz.eu';
-const SUFFIX = Date.now().toString(36);
-const SECRET_NAME = `smoke-llm-sec-${SUFFIX}`;
-const LLM_NAME = `smoke-llm-${SUFFIX}`;
-
-interface CliResult { code: number; stdout: string; stderr: string }
-
-function run(args: string): CliResult {
-  try {
-    const stdout = execSync(`mcpctl --direct ${args}`, {
-      encoding: 'utf-8',
-      timeout: 30_000,
-      stdio: ['ignore', 'pipe', 'pipe'],
-    });
-    return { code: 0, stdout: stdout.trim(), stderr: '' };
-  } catch (err) {
-    const e = err as { status?: number; stdout?: Buffer | string; stderr?: Buffer | string };
-    return {
-      code: e.status ?? 1,
-      stdout: e.stdout ? (typeof e.stdout === 'string' ? e.stdout : e.stdout.toString('utf-8')) : '',
-      stderr: e.stderr ? (typeof e.stderr === 'string' ? e.stderr : e.stderr.toString('utf-8')) : '',
-    };
-  }
-}
-
-function healthz(url: string, timeoutMs = 5000): Promise<boolean> {
-  return new Promise((resolve) => {
-    const parsed = new URL(`${url.replace(/\/$/, '')}/healthz`);
-    const driver = parsed.protocol === 'https:' ? https : http;
-    const req = driver.get(
-      {
-        hostname: parsed.hostname,
-        port: parsed.port || (parsed.protocol === 'https:' ? 443 : 80),
-        path: parsed.pathname,
-        timeout: timeoutMs,
-      },
-      (res) => { resolve((res.statusCode ?? 500) < 500); res.resume(); },
-    );
-    req.on('error', () => resolve(false));
-    req.on('timeout', () => { req.destroy(); resolve(false); });
-  });
-}
-
-let mcpdUp = false;
-
-describe('llm smoke', () => {
-  beforeAll(async () => {
-    mcpdUp = await healthz(MCPD_URL);
-    if (!mcpdUp) {
-      // eslint-disable-next-line no-console
-      console.warn(`\n  ○ llm smoke: skipped — ${MCPD_URL}/healthz unreachable. Set MCPD_URL to override.\n`);
-    }
-  }, 20_000);
-
-  afterAll(() => {
-    if (!mcpdUp) return;
-    run(`delete llm ${LLM_NAME}`);
-    run(`delete secret ${SECRET_NAME}`);
-  });
-
-  it('creates a secret to hold the fake API key', () => {
-    if (!mcpdUp) return;
-    run(`delete secret ${SECRET_NAME}`); // idempotent cleanup
-    const result = run(`create secret ${SECRET_NAME} --data token=sk-fake-xyz`);
-    expect(result.code, result.stderr).toBe(0);
-  });
-
-  it('creates an Llm pointing at the secret via --api-key-ref', () => {
-    if (!mcpdUp) return;
-    run(`delete llm ${LLM_NAME}`);
-    const cmd = [
-      `create llm ${LLM_NAME}`,
-      '--type openai',
-      '--model gpt-4o-mini',
-      '--tier fast',
-      '--url http://nowhere.example:9000',
-      `--api-key-ref ${SECRET_NAME}/token`,
-      '--description smoke-test',
-    ].join(' ');
-    const result = run(cmd);
-    expect(result.code, result.stderr || result.stdout).toBe(0);
-    expect(result.stdout).toMatch(new RegExp(`llm '${LLM_NAME}'`));
-  });
-
-  it('describe llm shows the secret ref in sectioned output', () => {
-    if (!mcpdUp) return;
-    const result = run(`describe llm ${LLM_NAME}`);
-    expect(result.code, result.stderr).toBe(0);
-    expect(result.stdout).toContain(`=== LLM: ${LLM_NAME} ===`);
-    expect(result.stdout).toContain('Type:');
-    expect(result.stdout).toContain('openai');
-    expect(result.stdout).toContain('Model:');
-    expect(result.stdout).toContain('gpt-4o-mini');
-    expect(result.stdout).toContain('API Key:');
-    expect(result.stdout).toContain(SECRET_NAME);
-    expect(result.stdout).toContain('token');
-    // Raw key value must NOT appear — only the ref
-    expect(result.stdout).not.toContain('sk-fake-xyz');
-  });
-
-  it('get llms shows the row with KEY column rendered as "secret://name/key"', () => {
-    if (!mcpdUp) return;
-    const result = run('get llms');
-    expect(result.code).toBe(0);
-    expect(result.stdout).toContain(LLM_NAME);
-    expect(result.stdout).toContain(`secret://${SECRET_NAME}/token`);
-  });
-
-  it('round-trips yaml output → apply -f', () => {
-    if (!mcpdUp) return;
-    const yaml = run(`get llm ${LLM_NAME} -o yaml`);
-    expect(yaml.code).toBe(0);
-    expect(yaml.stdout).toMatch(/kind:\s+llm/);
-    expect(yaml.stdout).toContain(`name: ${LLM_NAME}`);
-    expect(yaml.stdout).toContain(`name: ${SECRET_NAME}`); // apiKeyRef block
-
-    // Change the description via apply -f with the YAML we just pulled.
-    const dir = mkdtempSync(join(tmpdir(), 'mcpctl-smoke-'));
-    const path = join(dir, 'llm.yaml');
-    const amended = yaml.stdout.replace('description: smoke-test', 'description: smoke-test-amended');
-    writeFileSync(path, amended);
-    try {
-      const applied = run(`apply -f ${path}`);
-      expect(applied.code, applied.stderr || applied.stdout).toBe(0);
-      const described = run(`describe llm ${LLM_NAME}`);
-      expect(described.stdout).toContain('smoke-test-amended');
-    } finally {
-      unlinkSync(path);
-    }
-  });
-
-  it('deletes the llm and leaves the underlying secret intact', () => {
-    if (!mcpdUp) return;
-    const del = run(`delete llm ${LLM_NAME}`);
-    expect(del.code, del.stderr).toBe(0);
-
-    // Secret still exists (apiKeyRef uses onDelete: SetNull so the secret isn't touched)
-    const secret = run(`describe secret ${SECRET_NAME}`);
-    expect(secret.code).toBe(0);
-  });
-});
--- a/src/mcplocal/tests/smoke/project-llm-ref.smoke.test.ts
+++ b/src/mcplocal/tests/smoke/project-llm-ref.smoke.test.ts
@@ -1,130 +0,0 @@
-/**
- * Smoke tests: Project.llmProvider as Llm reference (Phase 4).
- *
- * Verifies the describe-project warning behavior against live mcpd:
- *   1. Project with `--llm <existing>` → no warning.
- *   2. Project with `--llm <nonexistent>` → describe flags the orphan.
- *   3. Project with `--llm none` → explicit disable, no warning.
- */
-import { describe, it, expect, beforeAll, afterAll } from 'vitest';
-import http from 'node:http';
-import https from 'node:https';
-import { execSync } from 'node:child_process';
-
-const MCPD_URL = process.env.MCPD_URL ?? 'https://mcpctl.ad.itaz.eu';
-const SUFFIX = Date.now().toString(36);
-const LLM_NAME = `smoke-proj-llm-${SUFFIX}`;
-const PROJ_OK = `smoke-proj-ok-${SUFFIX}`;
-const PROJ_ORPHAN = `smoke-proj-orphan-${SUFFIX}`;
-const PROJ_NONE = `smoke-proj-none-${SUFFIX}`;
-
-interface CliResult { code: number; stdout: string; stderr: string }
-
-function run(args: string): CliResult {
-  try {
-    const stdout = execSync(`mcpctl --direct ${args}`, {
-      encoding: 'utf-8',
-      timeout: 30_000,
-      stdio: ['ignore', 'pipe', 'pipe'],
-    });
-    return { code: 0, stdout: stdout.trim(), stderr: '' };
-  } catch (err) {
-    const e = err as { status?: number; stdout?: Buffer | string; stderr?: Buffer | string };
-    return {
-      code: e.status ?? 1,
-      stdout: e.stdout ? (typeof e.stdout === 'string' ? e.stdout : e.stdout.toString('utf-8')) : '',
-      stderr: e.stderr ? (typeof e.stderr === 'string' ? e.stderr : e.stderr.toString('utf-8')) : '',
-    };
-  }
-}
-
-function healthz(url: string, timeoutMs = 5000): Promise<boolean> {
-  return new Promise((resolve) => {
-    const parsed = new URL(`${url.replace(/\/$/, '')}/healthz`);
-    const driver = parsed.protocol === 'https:' ? https : http;
-    const req = driver.get(
-      {
-        hostname: parsed.hostname,
-        port: parsed.port || (parsed.protocol === 'https:' ? 443 : 80),
-        path: parsed.pathname,
-        timeout: timeoutMs,
-      },
-      (res) => { resolve((res.statusCode ?? 500) < 500); res.resume(); },
-    );
-    req.on('error', () => resolve(false));
-    req.on('timeout', () => { req.destroy(); resolve(false); });
-  });
-}
-
-let mcpdUp = false;
-
-describe('project-llm-ref smoke', () => {
-  beforeAll(async () => {
-    mcpdUp = await healthz(MCPD_URL);
-    if (!mcpdUp) {
-      // eslint-disable-next-line no-console
-      console.warn(`\n  ○ project-llm-ref smoke: skipped — ${MCPD_URL}/healthz unreachable.\n`);
-      return;
-    }
-    // Fixture: an Llm we can point projects at.
-    run(`delete llm ${LLM_NAME}`);
-    const createLlm = run([
-      `create llm ${LLM_NAME}`,
-      '--type openai',
-      '--model gpt-4o-mini',
-      '--tier fast',
-      '--url http://127.0.0.1:1',
-    ].join(' '));
-    if (createLlm.code !== 0) {
-      // eslint-disable-next-line no-console
-      console.warn(`    ○ could not create fixture Llm: ${createLlm.stderr || createLlm.stdout}`);
-    }
-  }, 30_000);
-
-  afterAll(() => {
-    if (!mcpdUp) return;
-    run(`delete project ${PROJ_OK} --force`);
-    run(`delete project ${PROJ_ORPHAN} --force`);
-    run(`delete project ${PROJ_NONE} --force`);
-    run(`delete llm ${LLM_NAME}`);
-  });
-
-  it('project with --llm pointing at a registered Llm describes without warning', () => {
-    if (!mcpdUp) return;
-    run(`delete project ${PROJ_OK} --force`);
-    const created = run(`create project ${PROJ_OK} --llm ${LLM_NAME}`);
-    expect(created.code, created.stderr || created.stdout).toBe(0);
-
-    const described = run(`describe project ${PROJ_OK}`);
-    expect(described.code).toBe(0);
-    expect(described.stdout).toContain('LLM:');
-    expect(described.stdout).toContain(LLM_NAME);
-    expect(described.stdout).not.toContain('warning:');
-  });
-
-  it('project with --llm naming an unregistered Llm shows the warning line', () => {
-    if (!mcpdUp) return;
-    run(`delete project ${PROJ_ORPHAN} --force`);
-    const created = run(`create project ${PROJ_ORPHAN} --llm claude-ghost-${SUFFIX}`);
-    expect(created.code, created.stderr || created.stdout).toBe(0);
-
-    const described = run(`describe project ${PROJ_ORPHAN}`);
-    expect(described.code).toBe(0);
-    expect(described.stdout).toContain(`claude-ghost-${SUFFIX}`);
-    expect(described.stdout).toContain('warning:');
-    expect(described.stdout).toContain('registry default');
-  });
-
-  it('project with --llm none treats it as an explicit disable (no warning)', () => {
-    if (!mcpdUp) return;
-    run(`delete project ${PROJ_NONE} --force`);
-    const created = run(`create project ${PROJ_NONE} --llm none`);
-    expect(created.code).toBe(0);
-
-    const described = run(`describe project ${PROJ_NONE}`);
-    expect(described.code).toBe(0);
-    expect(described.stdout).toContain('LLM:');
-    expect(described.stdout).toContain('none');
-    expect(described.stdout).not.toContain('warning:');
-  });
-});
--- a/src/mcplocal/tests/smoke/secretbackend.smoke.test.ts
+++ b/src/mcplocal/tests/smoke/secretbackend.smoke.test.ts
@@ -1,146 +0,0 @@
-/**
- * Smoke tests: SecretBackend CRUD against live mcpd.
- *
- * Exercises the Phase 0 CLI contract end-to-end:
- *   1. `mcpctl get secretbackends` — the seeded `default` (plaintext) row exists
- *      and is marked isDefault.
- *   2. `mcpctl create secretbackend <name> --type plaintext` — create + list.
- *   3. `mcpctl describe secretbackend <name>` — sectioned output; config
- *      values that look like credentials are masked.
- *   4. `mcpctl delete secretbackend default` — fails with 409 (cannot delete
- *      the default row).
- *   5. Cleanup: delete the test row; confirm it's gone.
- *
- * Target: mcpd direct (not mcplocal). We use `--direct` so the CLI bypasses
- * mcplocal and hits mcpd at the configured URL. If mcpd is unreachable we
- * skip with a clear message — same pattern as the mcptoken smoke.
- *
- * Run with: pnpm test:smoke
- */
-import { describe, it, expect, beforeAll, afterAll } from 'vitest';
-import http from 'node:http';
-import https from 'node:https';
-import { execSync } from 'node:child_process';
-
-const MCPD_URL = process.env.MCPD_URL ?? 'https://mcpctl.ad.itaz.eu';
-const BACKEND_NAME = `smoke-sb-${Date.now().toString(36)}`;
-
-interface CliResult { code: number; stdout: string; stderr: string }
-
-function run(args: string): CliResult {
-  try {
-    const stdout = execSync(`mcpctl --direct ${args}`, {
-      encoding: 'utf-8',
-      timeout: 30_000,
-      stdio: ['ignore', 'pipe', 'pipe'],
-    });
-    return { code: 0, stdout: stdout.trim(), stderr: '' };
-  } catch (err) {
-    const e = err as { status?: number; stdout?: Buffer | string; stderr?: Buffer | string };
-    return {
-      code: e.status ?? 1,
-      stdout: e.stdout ? (typeof e.stdout === 'string' ? e.stdout : e.stdout.toString('utf-8')) : '',
-      stderr: e.stderr ? (typeof e.stderr === 'string' ? e.stderr : e.stderr.toString('utf-8')) : '',
-    };
-  }
-}
-
-function healthz(url: string, timeoutMs = 5000): Promise<boolean> {
-  return new Promise((resolve) => {
-    const parsed = new URL(`${url.replace(/\/$/, '')}/healthz`);
-    const driver = parsed.protocol === 'https:' ? https : http;
-    const req = driver.get(
-      {
-        hostname: parsed.hostname,
-        port: parsed.port || (parsed.protocol === 'https:' ? 443 : 80),
-        path: parsed.pathname,
-        timeout: timeoutMs,
-      },
-      (res) => { resolve((res.statusCode ?? 500) < 500); res.resume(); },
-    );
-    req.on('error', () => resolve(false));
-    req.on('timeout', () => { req.destroy(); resolve(false); });
-  });
-}
-
-let mcpdUp = false;
-
-describe('secretbackend smoke', () => {
-  beforeAll(async () => {
-    mcpdUp = await healthz(MCPD_URL);
-    if (!mcpdUp) {
-      // eslint-disable-next-line no-console
-      console.warn(`\n  ○ secretbackend smoke: skipped — ${MCPD_URL}/healthz unreachable. Set MCPD_URL to override.\n`);
-    }
-  }, 20_000);
-
-  afterAll(() => {
-    if (!mcpdUp) return;
-    run(`delete secretbackend ${BACKEND_NAME}`);
-  });
-
-  it('lists at least one secretbackend (the seeded plaintext default)', () => {
-    if (!mcpdUp) return;
-    const result = run('get secretbackends -o json');
-    expect(result.code, result.stderr).toBe(0);
-    const rows = JSON.parse(result.stdout) as Array<{ name: string; type: string; isDefault: boolean }>;
-    expect(rows.length).toBeGreaterThan(0);
-    const defaultRow = rows.find((r) => r.isDefault === true);
-    expect(defaultRow, 'a default backend must exist').toBeDefined();
-    expect(defaultRow!.type).toBe('plaintext');
-  });
-
-  it('creates a plaintext backend and round-trips it through describe', () => {
-    if (!mcpdUp) return;
-    // Idempotent cleanup in case a prior run left debris
-    run(`delete secretbackend ${BACKEND_NAME}`);
-
-    const created = run(`create secretbackend ${BACKEND_NAME} --type plaintext --description smoke-test`);
-    expect(created.code, created.stderr || created.stdout).toBe(0);
-    expect(created.stdout).toMatch(new RegExp(`secretbackend '${BACKEND_NAME}'`));
-
-    const described = run(`describe secretbackend ${BACKEND_NAME}`);
-    expect(described.code, described.stderr).toBe(0);
-    expect(described.stdout).toContain(`=== SecretBackend: ${BACKEND_NAME} ===`);
-    expect(described.stdout).toContain('Type:');
-    expect(described.stdout).toContain('plaintext');
-    expect(described.stdout).toContain('smoke-test');
-  });
-
-  it('refuses to delete the seeded default backend', () => {
-    if (!mcpdUp) return;
-    // Find whichever row is currently the default — we don't hard-code the name
-    // because operators may have renamed or swapped it.
-    const listed = run('get secretbackends -o json');
-    expect(listed.code).toBe(0);
-    const rows = JSON.parse(listed.stdout) as Array<{ name: string; isDefault: boolean }>;
-    const def = rows.find((r) => r.isDefault);
-    expect(def).toBeDefined();
-
-    const del = run(`delete secretbackend ${def!.name}`);
-    // 409 surfaces as exit 1 with a descriptive error
-    expect(del.code).toBe(1);
-    const combined = (del.stderr + del.stdout).toLowerCase();
-    expect(combined).toMatch(/default|in use|cannot delete/);
-  });
-
-  it('round-trips get -o yaml → apply -f', () => {
-    if (!mcpdUp) return;
-    const yaml = run(`get secretbackend ${BACKEND_NAME} -o yaml`);
-    expect(yaml.code).toBe(0);
-    // Apply-compatible output must start with `kind: secretbackend`
-    expect(yaml.stdout).toMatch(/kind:\s+secretbackend/);
-    expect(yaml.stdout).toContain(`name: ${BACKEND_NAME}`);
-    expect(yaml.stdout).toContain('type: plaintext');
-  });
-
-  it('deletes the test backend and confirms it is gone', () => {
-    if (!mcpdUp) return;
-    const del = run(`delete secretbackend ${BACKEND_NAME}`);
-    expect(del.code, del.stderr).toBe(0);
-
-    const listed = run('get secretbackends -o json');
-    const rows = JSON.parse(listed.stdout) as Array<{ name: string }>;
-    expect(rows.find((r) => r.name === BACKEND_NAME)).toBeUndefined();
-  });
-});
Author	SHA1	Message	Date
michal	9e3507752f	Merge pull request 'feat(mcpd): Llm resource — CRUD + CLI + apply' (#52 ) from feat/llm into main Some checks failed CI/CD / lint (push) Has started running Details CI/CD / typecheck (push) Has been cancelled Details CI/CD / test (push) Has been cancelled Details CI/CD / smoke (push) Has been cancelled Details CI/CD / build (push) Has been cancelled Details CI/CD / publish (push) Has been cancelled Details	2026-04-19 21:39:27 +00:00
michal	97ac1e75ef	Merge pull request 'feat(mcpd): pluggable SecretBackend + OpenBao driver + migrate' (#51 ) from feat/secretbackend into main Some checks failed CI/CD / lint (push) Has started running Details CI/CD / test (push) Has been cancelled Details CI/CD / typecheck (push) Has been cancelled Details CI/CD / smoke (push) Has been cancelled Details CI/CD / build (push) Has been cancelled Details CI/CD / publish (push) Has been cancelled Details	2026-04-19 21:39:17 +00:00