feat(agent): MCP-correct chat agent shim on top of LiteLLM

New package @mcpctl/agent that replaces LiteLLM's broken MCP integration (dropped Mcp-Session-Id, ignored tools/list_changed) with a thin ~200 LOC loop built on @modelcontextprotocol/sdk + openai SDK. LiteLLM stays in its actual lane — OpenAI-compatible model routing — and this agent handles MCP correctly. Core (src/agent.ts): - StreamableHTTPClientTransport for MCP (auto-preserves Mcp-Session-Id). - Re-fetches tools/list at the top of every loop so list_changed notifications surface new tools to the model on the next turn (fixes the gated-session case: begin_session reveals the full upstream tool set, next round's inference sees all of them). - OpenAI-compatible inference via process.env.AGENT_LLM_BASE_URL — points at LiteLLM or vLLM directly. - Graceful failure: broken tool calls are serialized back into the conversation as the tool's response, agent keeps going. - maxIterations cap stops runaway loops; hitIterationLimit surfaces truncation in the result. - Structural `McpLike` / `LlmLike` interfaces keep the loop testable without booting real SDKs. CLI (src/cli.ts): mcpctl-agent run "<prompt>" \ --model qwen3-thinking --project sre \ [--system "..."] [--max-iterations N] [-o text|json] [--verbose] Env fallbacks: AGENT_MCP_URL, AGENT_MCP_TOKEN, AGENT_LLM_BASE_URL, AGENT_LLM_API_KEY, AGENT_MODEL Tests (7 cases): - direct answer (no tool call) → ok - single-round tool call + synthesis → message history correct - list_changed refresh: tools/list called at startup + after each round → next inference sees newly-exposed tools - maxIterations cap → hitIterationLimit flag set - failing tool → error serialized into conversation, agent recovers - systemPrompt prepended - mcp.close() runs even when loop throws (finally-block guarantee) End-to-end verified against live cluster: Round 1: sees 1 tool (begin_session) → calls it Round 2: sees 115 tools (gate opened) → calls aws-docs/search_documentation Final: model synthesizes answer — LiteLLM's chat UI cannot do this today; this loop does. Still to do (follow-up PRs): - Wire into mcpctl binary as `mcpctl agent run ...` - Docker image + Pulumi deploy for a long-running HTTP service mode - Minimal chat UI (HTMX or plain fetch) - Streaming responses Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 18:24:29 +01:00
59 changed files with 772 additions and 4115 deletions
--- a/completions/mcpctl.bash
+++ b/completions/mcpctl.bash
@@ -5,11 +5,11 @@ _mcpctl() {
  local cur prev words cword
  _init_completion || return

-  local commands="status login logout config get describe delete logs create edit apply patch backup approve console cache test migrate"
+  local commands="status login logout config get describe delete logs create edit apply patch backup approve console cache test"
  local project_commands="get describe delete logs create edit attach-server detach-server"
  local global_opts="-v --version --daemon-url --direct -p --project -h --help"
-  local resources="servers instances secrets secretbackends llms templates projects users groups rbac prompts promptrequests serverattachments proxymodels all"
-  local resource_aliases="servers instances secrets secretbackends llms templates projects users groups rbac prompts promptrequests serverattachments proxymodels all server srv instance inst secret sec secretbackend sb llm template tpl project proj user group rbac-definition rbac-binding prompt promptrequest pr serverattachment sa proxymodel pm"
+  local resources="servers instances secrets templates projects users groups rbac prompts promptrequests serverattachments proxymodels all"
+  local resource_aliases="servers instances secrets templates projects users groups rbac prompts promptrequests serverattachments proxymodels all server srv instance inst secret sec template tpl project proj user group rbac-definition rbac-binding prompt promptrequest pr serverattachment sa proxymodel pm"

  # Check if --project/-p was given
  local has_project=false
@@ -175,7 +175,7 @@ _mcpctl() {
    create)
      local create_sub=$(_mcpctl_get_subcmd $subcmd_pos)
      if [[ -z "$create_sub" ]]; then
-        COMPREPLY=($(compgen -W "server secret llm secretbackend project user group rbac mcptoken prompt serverattachment promptrequest help" -- "$cur"))
+        COMPREPLY=($(compgen -W "server secret project user group rbac mcptoken prompt serverattachment promptrequest help" -- "$cur"))
      else
        case "$create_sub" in
          server)
@@ -184,12 +184,6 @@ _mcpctl() {
          secret)
            COMPREPLY=($(compgen -W "--data --force -h --help" -- "$cur"))
            ;;
-          llm)
-            COMPREPLY=($(compgen -W "--type --model --url --tier --description --api-key-ref --extra --force -h --help" -- "$cur"))
-            ;;
-          secretbackend)
-            COMPREPLY=($(compgen -W "--type --description --default --url --namespace --mount --path-prefix --token-secret --config --force -h --help" -- "$cur"))
-            ;;
          project)
            COMPREPLY=($(compgen -W "-d --description --proxy-model --prompt --gated --no-gated --server --force -h --help" -- "$cur"))
            ;;
@@ -335,21 +329,6 @@ _mcpctl() {
        esac
      fi
      return ;;
-    migrate)
-      local migrate_sub=$(_mcpctl_get_subcmd $subcmd_pos)
-      if [[ -z "$migrate_sub" ]]; then
-        COMPREPLY=($(compgen -W "secrets help" -- "$cur"))
-      else
-        case "$migrate_sub" in
-          secrets)
-            COMPREPLY=($(compgen -W "--from --to --names --keep-source --dry-run -h --help" -- "$cur"))
-            ;;
-          *)
-            COMPREPLY=($(compgen -W "-h --help" -- "$cur"))
-            ;;
-        esac
-      fi
-      return ;;
    help)
      COMPREPLY=($(compgen -W "$commands" -- "$cur"))
      return ;;
--- a/completions/mcpctl.fish
+++ b/completions/mcpctl.fish
@@ -4,7 +4,7 @@
 # Erase any stale completions from previous versions
 complete -c mcpctl -e

-set -l commands status login logout config get describe delete logs create edit apply patch backup approve console cache test migrate
+set -l commands status login logout config get describe delete logs create edit apply patch backup approve console cache test
 set -l project_commands get describe delete logs create edit attach-server detach-server

 # Disable file completions by default
@@ -31,10 +31,10 @@ function __mcpctl_has_project
 end

 # Resource type detection
-set -l resources servers instances secrets secretbackends llms templates projects users groups rbac prompts promptrequests serverattachments proxymodels all
+set -l resources servers instances secrets templates projects users groups rbac prompts promptrequests serverattachments proxymodels all

 function __mcpctl_needs_resource_type
-    set -l resource_aliases servers instances secrets secretbackends llms templates projects users groups rbac prompts promptrequests serverattachments proxymodels all server srv instance inst secret sec secretbackend sb llm template tpl project proj user group rbac-definition rbac-binding prompt promptrequest pr serverattachment sa proxymodel pm
+    set -l resource_aliases servers instances secrets templates projects users groups rbac prompts promptrequests serverattachments proxymodels all server srv instance inst secret sec template tpl project proj user group rbac-definition rbac-binding prompt promptrequest pr serverattachment sa proxymodel pm
    set -l tokens (commandline -opc)
    set -l found_cmd false
    for tok in $tokens
@@ -59,8 +59,6 @@ function __mcpctl_resolve_resource
        case server srv servers;      echo servers
        case instance inst instances; echo instances
        case secret sec secrets;      echo secrets
-        case secretbackend sb secretbackends; echo secretbackends
-        case llm llms;                echo llms
        case template tpl templates;  echo templates
        case project proj projects;   echo projects
        case user users;              echo users
@@ -76,7 +74,7 @@ function __mcpctl_resolve_resource
 end

 function __mcpctl_get_resource_type
-    set -l resource_aliases servers instances secrets secretbackends llms templates projects users groups rbac prompts promptrequests serverattachments proxymodels all server srv instance inst secret sec secretbackend sb llm template tpl project proj user group rbac-definition rbac-binding prompt promptrequest pr serverattachment sa proxymodel pm
+    set -l resource_aliases servers instances secrets templates projects users groups rbac prompts promptrequests serverattachments proxymodels all server srv instance inst secret sec template tpl project proj user group rbac-definition rbac-binding prompt promptrequest pr serverattachment sa proxymodel pm
    set -l tokens (commandline -opc)
    set -l found_cmd false
    for tok in $tokens
@@ -225,7 +223,7 @@ complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_
 complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a describe -d 'Show detailed information about a resource'
 complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a delete -d 'Delete a resource (server, instance, secret, project, user, group, rbac)'
 complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a logs -d 'Get logs from an MCP server instance'
-complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a create -d 'Create a resource (server, secret, secretbackend, llm, project, user, group, rbac, serverattachment, prompt)'
+complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a create -d 'Create a resource (server, secret, project, user, group, rbac, serverattachment, prompt)'
 complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a edit -d 'Edit a resource in your default editor (server, project)'
 complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a apply -d 'Apply declarative configuration from a YAML or JSON file'
 complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a patch -d 'Patch a resource field (e.g. mcpctl patch project myproj llmProvider=none)'
@@ -234,14 +232,13 @@ complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_
 complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a console -d 'Interactive MCP console — unified timeline with tools, provenance, and lab replay'
 complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a cache -d 'Manage ProxyModel pipeline cache'
 complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a test -d 'Utilities for testing MCP endpoints and config'
-complete -c mcpctl -n "not __mcpctl_has_project; and not __fish_seen_subcommand_from $commands" -a migrate -d 'Move resources between backends (currently: secrets between SecretBackends)'

 # Project-scoped commands (with --project)
 complete -c mcpctl -n "__mcpctl_has_project; and not __fish_seen_subcommand_from $project_commands" -a get -d 'List resources (servers, projects, instances, all)'
 complete -c mcpctl -n "__mcpctl_has_project; and not __fish_seen_subcommand_from $project_commands" -a describe -d 'Show detailed information about a resource'
 complete -c mcpctl -n "__mcpctl_has_project; and not __fish_seen_subcommand_from $project_commands" -a delete -d 'Delete a resource (server, instance, secret, project, user, group, rbac)'
 complete -c mcpctl -n "__mcpctl_has_project; and not __fish_seen_subcommand_from $project_commands" -a logs -d 'Get logs from an MCP server instance'
-complete -c mcpctl -n "__mcpctl_has_project; and not __fish_seen_subcommand_from $project_commands" -a create -d 'Create a resource (server, secret, secretbackend, llm, project, user, group, rbac, serverattachment, prompt)'
+complete -c mcpctl -n "__mcpctl_has_project; and not __fish_seen_subcommand_from $project_commands" -a create -d 'Create a resource (server, secret, project, user, group, rbac, serverattachment, prompt)'
 complete -c mcpctl -n "__mcpctl_has_project; and not __fish_seen_subcommand_from $project_commands" -a edit -d 'Edit a resource in your default editor (server, project)'
 complete -c mcpctl -n "__mcpctl_has_project; and not __fish_seen_subcommand_from $project_commands" -a attach-server -d 'Attach a server to a project (requires --project)'
 complete -c mcpctl -n "__mcpctl_has_project; and not __fish_seen_subcommand_from $project_commands" -a detach-server -d 'Detach a server from a project (requires --project)'
@@ -284,11 +281,9 @@ complete -c mcpctl -n "__mcpctl_subcmd_active config claude-generate" -l stdout
 complete -c mcpctl -n "__mcpctl_subcmd_active config impersonate" -l quit -d 'Stop impersonating and return to original identity'

 # create subcommands
-set -l create_cmds server secret llm secretbackend project user group rbac mcptoken prompt serverattachment promptrequest
+set -l create_cmds server secret project user group rbac mcptoken prompt serverattachment promptrequest
 complete -c mcpctl -n "__fish_seen_subcommand_from create; and not __fish_seen_subcommand_from $create_cmds" -a server -d 'Create an MCP server definition'
 complete -c mcpctl -n "__fish_seen_subcommand_from create; and not __fish_seen_subcommand_from $create_cmds" -a secret -d 'Create a secret'
-complete -c mcpctl -n "__fish_seen_subcommand_from create; and not __fish_seen_subcommand_from $create_cmds" -a llm -d 'Register a server-managed LLM (anthropic, openai, vllm, ollama, deepseek, gemini-cli)'
-complete -c mcpctl -n "__fish_seen_subcommand_from create; and not __fish_seen_subcommand_from $create_cmds" -a secretbackend -d 'Create a secret backend (plaintext, openbao)'
 complete -c mcpctl -n "__fish_seen_subcommand_from create; and not __fish_seen_subcommand_from $create_cmds" -a project -d 'Create a project'
 complete -c mcpctl -n "__fish_seen_subcommand_from create; and not __fish_seen_subcommand_from $create_cmds" -a user -d 'Create a user'
 complete -c mcpctl -n "__fish_seen_subcommand_from create; and not __fish_seen_subcommand_from $create_cmds" -a group -d 'Create a group'
@@ -318,28 +313,6 @@ complete -c mcpctl -n "__mcpctl_subcmd_active create server" -l force -d 'Update
 complete -c mcpctl -n "__mcpctl_subcmd_active create secret" -l data -d 'Secret data KEY=value (repeat for multiple)' -x
 complete -c mcpctl -n "__mcpctl_subcmd_active create secret" -l force -d 'Update if already exists'

-# create llm options
-complete -c mcpctl -n "__mcpctl_subcmd_active create llm" -l type -d 'Provider type (anthropic, openai, deepseek, vllm, ollama, gemini-cli)' -x
-complete -c mcpctl -n "__mcpctl_subcmd_active create llm" -l model -d 'Model identifier (e.g. claude-3-5-sonnet-20241022)' -x
-complete -c mcpctl -n "__mcpctl_subcmd_active create llm" -l url -d 'Endpoint URL (empty = provider default)' -x
-complete -c mcpctl -n "__mcpctl_subcmd_active create llm" -l tier -d 'Tier: fast or heavy' -x
-complete -c mcpctl -n "__mcpctl_subcmd_active create llm" -l description -d 'Description' -x
-complete -c mcpctl -n "__mcpctl_subcmd_active create llm" -l api-key-ref -d 'API key reference in SECRET/KEY form (e.g. anthropic-key/token)' -x
-complete -c mcpctl -n "__mcpctl_subcmd_active create llm" -l extra -d 'Extra config key=value (repeat)' -x
-complete -c mcpctl -n "__mcpctl_subcmd_active create llm" -l force -d 'Update if already exists'
-
-# create secretbackend options
-complete -c mcpctl -n "__mcpctl_subcmd_active create secretbackend" -l type -d 'Backend type (plaintext, openbao)' -x
-complete -c mcpctl -n "__mcpctl_subcmd_active create secretbackend" -l description -d 'Description' -x
-complete -c mcpctl -n "__mcpctl_subcmd_active create secretbackend" -l default -d 'Promote this backend to default (atomically demotes the current one)'
-complete -c mcpctl -n "__mcpctl_subcmd_active create secretbackend" -l url -d 'openbao: vault URL (e.g. http://bao.example:8200)' -x
-complete -c mcpctl -n "__mcpctl_subcmd_active create secretbackend" -l namespace -d 'openbao: X-Vault-Namespace header value' -x
-complete -c mcpctl -n "__mcpctl_subcmd_active create secretbackend" -l mount -d 'openbao: KV v2 mount point (default: secret)' -x
-complete -c mcpctl -n "__mcpctl_subcmd_active create secretbackend" -l path-prefix -d 'openbao: path prefix under mount (default: mcpctl)' -x
-complete -c mcpctl -n "__mcpctl_subcmd_active create secretbackend" -l token-secret -d 'openbao: token secret reference in SECRET/KEY form (e.g. bao-creds/token)' -x
-complete -c mcpctl -n "__mcpctl_subcmd_active create secretbackend" -l config -d 'Extra config as key=value (repeat for multiple)' -x
-complete -c mcpctl -n "__mcpctl_subcmd_active create secretbackend" -l force -d 'Update if already exists'
-
 # create project options
 complete -c mcpctl -n "__mcpctl_subcmd_active create project" -s d -l description -d 'Project description' -x
 complete -c mcpctl -n "__mcpctl_subcmd_active create project" -l proxy-model -d 'Plugin name (default, content-pipeline, gate, none)' -x
@@ -418,17 +391,6 @@ complete -c mcpctl -n "__mcpctl_subcmd_active test mcp" -l timeout -d 'Per-reque
 complete -c mcpctl -n "__mcpctl_subcmd_active test mcp" -s o -l output -d 'Output format: text or json' -x
 complete -c mcpctl -n "__mcpctl_subcmd_active test mcp" -l no-health -d 'Skip the /healthz preflight check'

-# migrate subcommands
-set -l migrate_cmds secrets
-complete -c mcpctl -n "__fish_seen_subcommand_from migrate; and not __fish_seen_subcommand_from $migrate_cmds" -a secrets -d 'Migrate secrets from one SecretBackend to another'
-
-# migrate secrets options
-complete -c mcpctl -n "__mcpctl_subcmd_active migrate secrets" -l from -d 'Source SecretBackend name' -x
-complete -c mcpctl -n "__mcpctl_subcmd_active migrate secrets" -l to -d 'Destination SecretBackend name' -x
-complete -c mcpctl -n "__mcpctl_subcmd_active migrate secrets" -l names -d 'Comma-separated secret names (default: all)' -x
-complete -c mcpctl -n "__mcpctl_subcmd_active migrate secrets" -l keep-source -d 'Leave the source copy intact (default: delete from source after write+commit)'
-complete -c mcpctl -n "__mcpctl_subcmd_active migrate secrets" -l dry-run -d 'Show which secrets would be migrated without touching them'
-
 # status options
 complete -c mcpctl -n "__fish_seen_subcommand_from status" -s o -l output -d 'output format (table, json, yaml)' -x

--- a/docs/secret-backends.md
+++ b/docs/secret-backends.md
@@ -1,167 +0,0 @@
-# Secret backends
-
-`mcpctl` stores the raw data for `Secret` resources in a pluggable **backend**.
-The default is `plaintext` — the secret payload lives in Postgres as plain JSON
-— which is fine for laptop development but a poor fit for shared clusters. For
-production, point at an external KV store and delete secrets from the DB after
-migration.
-
-This guide covers the model, the shipped drivers, and how to migrate without
-downtime.
-
-## Model
-
- A `SecretBackend` resource is a single named driver instance (e.g. a pointer
-  at one OpenBao deployment).
- Every `Secret` row carries a `backendId` FK — the backend that owns its data.
- Exactly one `SecretBackend` has `isDefault: true`. New secrets created through
-  the API/CLI land on that backend.
- The `plaintext` backend is seeded at startup and named `default`. It cannot
-  be deleted — there needs to always be one row where the driver's own
-  credentials can bootstrap from (see below).
-
-## CLI
-
-```bash
-mcpctl get secretbackends              # list backends
-mcpctl describe secretbackend <name>   # inspect config (credentials masked)
-mcpctl create secretbackend <name> --type plaintext [--default] [--description ...]
-mcpctl create secretbackend <name> --type openbao \
-  --url http://bao.example:8200 \
-  --token-secret bao-creds/token \
-  [--namespace <ns>] [--mount secret] [--path-prefix mcpctl] \
-  [--default]
-mcpctl delete secretbackend <name>     # blocked if any secret still points at it
-
-mcpctl migrate secrets --from default --to bao
-mcpctl migrate secrets --from default --to bao --names a,b --keep-source
-mcpctl migrate secrets --from default --to bao --dry-run
-```
-
-Anything you can do with `create secretbackend` also works via `apply -f`:
-
-```yaml
-kind: secretbackend
-name: bao
-type: openbao
-description: "shared cluster OpenBao"
-isDefault: true
-config:
-  url: http://bao.svc.cluster.local:8200
-  tokenSecretRef: { name: bao-creds, key: token }
-  namespace: platform
-```
-
-## Drivers
-
-### plaintext
-
-Trivial. `Secret.data` holds the JSON, `externalRef` is empty.
-
- Storage: Postgres column.
- Bootstrap: seeded as `default` at startup.
- Cost: zero setup, zero encryption at rest, full access for any DB reader.
-
-Use for development, CI, or single-tenant self-hosts where the DB itself is
-treated as sensitive.
-
-### openbao
-
-Talks HTTP to an [OpenBao](https://openbao.org) (MPL 2.0 Vault fork) KV v2
-mount. Also compatible with HashiCorp Vault KV v2 — the wire protocol is the
-same.
-
-| Config key       | Required? | Description |
-|------------------|-----------|-------------|
-| `url`            | yes       | Base URL, e.g. `http://bao.svc.cluster.local:8200`. |
-| `tokenSecretRef` | yes       | `{ name, key }` pointing at a `Secret` on the **plaintext** backend that holds the bootstrap token. |
-| `mount`          | no        | KV v2 mount name. Default `secret`. |
-| `pathPrefix`     | no        | Path prefix under the mount. Default `mcpctl`. Secrets land at `<mount>/<pathPrefix>/<secretName>`. |
-| `namespace`      | no        | `X-Vault-Namespace` header for OpenBao/Vault Enterprise namespaces. |
-
-The driver only stores a reference in `Secret.externalRef` (`mount/path`). The
-`Secret.data` column is left empty for openbao-backed rows — you can safely
-drop DB-level access to secrets after migration.
-
-#### Required OpenBao policy
-
-Minimum token policy for a backend that lives at `secret/mcpctl/`:
-
-```hcl
-path "secret/data/mcpctl/*" {
-  capabilities = ["create", "read", "update"]
-}
-
-path "secret/metadata/mcpctl/*" {
-  capabilities = ["list", "delete"]
-}
-
-path "secret/metadata/mcpctl/" {
-  capabilities = ["list"]
-}
-```
-
-Grant `delete` on `metadata/...` only if you need mcpctl to fully remove
-secrets — OpenBao soft-deletes until the metadata is gone.
-
-#### Chicken-and-egg: where does the OpenBao token live?
-
-mcpd reads the OpenBao token from a `Secret` on the **plaintext** backend.
-That's the whole point of keeping plaintext around — it's the trust root:
-
-1. Operator creates a plaintext `Secret` holding the bootstrap token.
-2. Operator creates the `openbao` backend, pointing at that secret via
-   `tokenSecretRef`.
-3. Operator runs `mcpctl migrate secrets --from default --to bao` to move all
-   other secrets off plaintext.
-4. After migration, the only sensitive row left on plaintext is the OpenBao
-   token itself. DB access is now equivalent to OpenBao token access (a single
-   key), not equivalent to all API keys in the system.
-
-Follow-up work (not shipped yet) replaces static token auth with Kubernetes
-ServiceAccount auth so no bootstrap token is needed at all.
-
-## Migration — `mcpctl migrate secrets`
-
-Atomicity is **per secret**, not per batch. Remote writes can't roll back, so we
-don't pretend. For each secret the service:
-
-1. Reads the plaintext from the source driver.
-2. Writes it to the destination driver.
-3. Updates the `Secret` row: flips `backendId`, sets new `externalRef`, clears
-   `data`.
-4. Deletes from source (skipped with `--keep-source`).
-
-If the command is interrupted between step 2 and 3, the destination has an
-orphan entry but the source still owns the row. Re-running is idempotent — the
-service skips secrets that are already on the destination and picks up the
-rest.
-
-```bash
-# Dry-run first: see what would move.
-mcpctl migrate secrets --from default --to bao --dry-run
-
-# Migrate everything.
-mcpctl migrate secrets --from default --to bao
-
-# Migrate a subset only.
-mcpctl migrate secrets --from default --to bao --names api-keys,oauth-client
-
-# Leave the source copy in place (useful for A/B validation).
-mcpctl migrate secrets --from default --to bao --keep-source
-```
-
-The command prints a per-secret summary (migrated / skipped / failed) and exits
-non-zero if any secret failed. Ctrl-C during the run is safe — restart when you
-want, no duplicate writes.
-
-## RBAC
-
- `resource: secretbackends` — gated like any other resource (`view`,
-  `create`, `edit`, `delete`).
- `role: run, action: migrate-secrets` — required to call
-  `POST /api/v1/secrets/migrate`.
-
-Describe output masks config values whose keys look like credentials
-(`token`, `secret`, `password`, `key`), so `mcpctl describe secretbackend` is
-safe to paste into tickets.
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -39,6 +39,28 @@ importers:
        specifier: ^4.0.18
        version: 4.0.18(@types/node@25.3.0)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2)

+  src/agent:
+    dependencies:
+      '@mcpctl/shared':
+        specifier: workspace:*
+        version: link:../shared
+      '@modelcontextprotocol/sdk':
+        specifier: ^1.0.0
+        version: 1.26.0(zod@3.25.76)
+      commander:
+        specifier: ^13.0.0
+        version: 13.1.0
+      openai:
+        specifier: ^4.77.0
+        version: 4.104.0(ws@8.19.0)(zod@3.25.76)
+    devDependencies:
+      '@types/node':
+        specifier: ^25.3.0
+        version: 25.3.0
+      vitest:
+        specifier: ^4.0.0
+        version: 4.0.18(@types/node@25.3.0)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2)
+
  src/cli:
    dependencies:
      '@inkjs/ui':
@@ -989,6 +1011,10 @@ packages:
  abbrev@1.1.1:
    resolution: {integrity: sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q==}

+  abort-controller@3.0.0:
+    resolution: {integrity: sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==}
+    engines: {node: '>=6.5'}
+
  abstract-logging@2.0.1:
    resolution: {integrity: sha512-2BjRTZxTPvheOvGbBslFSYOUkr+SjPtOnrLP33f+VIWLzezQpZcqVg7ja3L4dBXmzzgwT+a029jRx5PCi3JuiA==}

@@ -1014,6 +1040,10 @@ packages:
    resolution: {integrity: sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==}
    engines: {node: '>= 14'}

+  agentkeepalive@4.6.0:
+    resolution: {integrity: sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ==}
+    engines: {node: '>= 8.0.0'}
+
  ajv-formats@3.0.1:
    resolution: {integrity: sha512-8iUql50EUR+uUcdRQ3HDqa6EVyo3docL8g5WJ3FNcWmu62IbkGUue/pEyLBW8VGKKucTPgqeks4fIU1DA4yowQ==}
    peerDependencies:
@@ -1509,6 +1539,10 @@ packages:
    resolution: {integrity: sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==}
    engines: {node: '>= 0.6'}

+  event-target-shim@5.0.1:
+    resolution: {integrity: sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==}
+    engines: {node: '>=6'}
+
  events-universal@1.0.1:
    resolution: {integrity: sha512-LUd5euvbMLpwOF8m6ivPCbhQeSiYVNb8Vs0fQ8QjXo0JTkEHpz8pxdQf0gStltaPpw0Cca8b39KxvK9cfKRiAw==}

@@ -1610,10 +1644,17 @@ packages:
  flatted@3.3.3:
    resolution: {integrity: sha512-GX+ysw4PBCz0PzosHDepZGANEuFCMLrnRTiEy9McGjmkCQYwRq4A/X786G/fjM/+OjsWSU1ZrY5qyARZmO/uwg==}

+  form-data-encoder@1.7.2:
+    resolution: {integrity: sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==}
+
  form-data@4.0.5:
    resolution: {integrity: sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==}
    engines: {node: '>= 6'}

+  formdata-node@4.4.1:
+    resolution: {integrity: sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==}
+    engines: {node: '>= 12.20'}
+
  forwarded@0.2.0:
    resolution: {integrity: sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==}
    engines: {node: '>= 0.6'}
@@ -1726,6 +1767,9 @@ packages:
    resolution: {integrity: sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==}
    engines: {node: '>= 6'}

+  humanize-ms@1.2.1:
+    resolution: {integrity: sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==}
+
  iconv-lite@0.7.2:
    resolution: {integrity: sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw==}
    engines: {node: '>=0.10.0'}
@@ -2012,6 +2056,11 @@ packages:
  node-addon-api@5.1.0:
    resolution: {integrity: sha512-eh0GgfEkpnoWDq+VY8OyvYhFEzBk6jIYbRKdIlyTiAXIVJ8PyBaKb0rp7oDtoddbdoHWhq8wwr+XZ81F1rpNdA==}

+  node-domexception@1.0.0:
+    resolution: {integrity: sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==}
+    engines: {node: '>=10.5.0'}
+    deprecated: Use your platform's native DOMException instead
+
  node-fetch-native@1.6.7:
    resolution: {integrity: sha512-g9yhqoedzIUm0nTnTqAQvueMPVOuIY16bqgAJJC8XOOubYFNwz6IER9qs0Gq2Xd0+CecCKFjtdDTMA4u4xG06Q==}

@@ -2073,6 +2122,18 @@ packages:
    resolution: {integrity: sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==}
    engines: {node: '>=6'}

+  openai@4.104.0:
+    resolution: {integrity: sha512-p99EFNsA/yX6UhVO93f5kJsDRLAg+CTA2RBqdHK4RtK8u5IJw32Hyb2dTGKbnnFmnuoBv5r7Z2CURI9sGZpSuA==}
+    hasBin: true
+    peerDependencies:
+      ws: ^8.18.0
+      zod: ^3.23.8
+    peerDependenciesMeta:
+      ws:
+        optional: true
+      zod:
+        optional: true
+
  openid-client@6.8.2:
    resolution: {integrity: sha512-uOvTCndr4udZsKihJ68H9bUICrriHdUVJ6Az+4Ns6cW55rwM5h0bjVIzDz2SxgOI84LKjFyjOFvERLzdTUROGA==}

@@ -2647,6 +2708,10 @@ packages:
      jsdom:
        optional: true

+  web-streams-polyfill@4.0.0-beta.3:
+    resolution: {integrity: sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==}
+    engines: {node: '>= 14'}
+
  webidl-conversions@3.0.1:
    resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==}

@@ -3509,6 +3574,10 @@ snapshots:

  abbrev@1.1.1: {}

+  abort-controller@3.0.0:
+    dependencies:
+      event-target-shim: 5.0.1
+
  abstract-logging@2.0.1: {}

  accepts@2.0.0:
@@ -3530,6 +3599,10 @@ snapshots:

  agent-base@7.1.4: {}

+  agentkeepalive@4.6.0:
+    dependencies:
+      humanize-ms: 1.2.1
+
  ajv-formats@3.0.1(ajv@8.18.0):
    optionalDependencies:
      ajv: 8.18.0
@@ -4020,6 +4093,8 @@ snapshots:

  etag@1.8.1: {}

+  event-target-shim@5.0.1: {}
+
  events-universal@1.0.1:
    dependencies:
      bare-events: 2.8.2
@@ -4168,6 +4243,8 @@ snapshots:

  flatted@3.3.3: {}

+  form-data-encoder@1.7.2: {}
+
  form-data@4.0.5:
    dependencies:
      asynckit: 0.4.0
@@ -4176,6 +4253,11 @@ snapshots:
      hasown: 2.0.2
      mime-types: 2.1.35

+  formdata-node@4.4.1:
+    dependencies:
+      node-domexception: 1.0.0
+      web-streams-polyfill: 4.0.0-beta.3
+
  forwarded@0.2.0: {}

  fresh@2.0.0: {}
@@ -4298,6 +4380,10 @@ snapshots:
    transitivePeerDependencies:
      - supports-color

+  humanize-ms@1.2.1:
+    dependencies:
+      ms: 2.1.3
+
  iconv-lite@0.7.2:
    dependencies:
      safer-buffer: 2.1.2
@@ -4551,6 +4637,8 @@ snapshots:

  node-addon-api@5.1.0: {}

+  node-domexception@1.0.0: {}
+
  node-fetch-native@1.6.7: {}

  node-fetch@2.7.0:
@@ -4600,6 +4688,21 @@ snapshots:
    dependencies:
      mimic-fn: 2.1.0

+  openai@4.104.0(ws@8.19.0)(zod@3.25.76):
+    dependencies:
+      '@types/node': 18.19.130
+      '@types/node-fetch': 2.6.13
+      abort-controller: 3.0.0
+      agentkeepalive: 4.6.0
+      form-data-encoder: 1.7.2
+      formdata-node: 4.4.1
+      node-fetch: 2.7.0
+    optionalDependencies:
+      ws: 8.19.0
+      zod: 3.25.76
+    transitivePeerDependencies:
+      - encoding
+
  openid-client@6.8.2:
    dependencies:
      jose: 6.1.3
@@ -5211,6 +5314,8 @@ snapshots:
      - tsx
      - yaml

+  web-streams-polyfill@4.0.0-beta.3: {}
+
  webidl-conversions@3.0.1: {}

  whatwg-url@5.0.0:
--- a/scripts/generate-completions.ts
+++ b/scripts/generate-completions.ts
@@ -184,7 +184,7 @@ async function extractTree(): Promise<CmdInfo> {
 // ============================================================

 const CANONICAL_RESOURCES = [
-  'servers', 'instances', 'secrets', 'secretbackends', 'llms', 'templates', 'projects',
+  'servers', 'instances', 'secrets', 'templates', 'projects',
  'users', 'groups', 'rbac', 'prompts', 'promptrequests',
  'serverattachments', 'proxymodels', 'all',
 ];
@@ -193,8 +193,6 @@ const ALIAS_ENTRIES: [string, string][] = [
  ['server', 'servers'], ['srv', 'servers'],
  ['instance', 'instances'], ['inst', 'instances'],
  ['secret', 'secrets'], ['sec', 'secrets'],
-  ['secretbackend', 'secretbackends'], ['sb', 'secretbackends'],
-  ['llm', 'llms'], ['llms', 'llms'],
  ['template', 'templates'], ['tpl', 'templates'],
  ['project', 'projects'], ['proj', 'projects'],
  ['user', 'users'],
--- a/src/agent/package.json
+++ b/src/agent/package.json
@@ -0,0 +1,28 @@
+{
+  "name": "@mcpctl/agent",
+  "version": "0.0.1",
+  "private": true,
+  "type": "module",
+  "main": "./dist/index.js",
+  "types": "./dist/index.d.ts",
+  "bin": {
+    "mcpctl-agent": "./dist/cli.js"
+  },
+  "scripts": {
+    "build": "tsc --build",
+    "clean": "rimraf dist",
+    "run": "node dist/cli.js",
+    "test": "vitest",
+    "test:run": "vitest run"
+  },
+  "dependencies": {
+    "@mcpctl/shared": "workspace:*",
+    "@modelcontextprotocol/sdk": "^1.0.0",
+    "commander": "^13.0.0",
+    "openai": "^4.77.0"
+  },
+  "devDependencies": {
+    "@types/node": "^25.3.0",
+    "vitest": "^4.0.0"
+  }
+}
--- a/src/agent/src/agent.ts
+++ b/src/agent/src/agent.ts
@@ -0,0 +1,201 @@
+/**
+ * MCP-aware chat agent loop.
+ *
+ * Correct where LiteLLM's integration is broken:
+ *   - Uses `@modelcontextprotocol/sdk`'s `StreamableHTTPClientTransport`, which
+ *     preserves `Mcp-Session-Id` across requests automatically.
+ *   - Honors `notifications/tools/list_changed`: after every tool-call round we
+ *     re-fetch the tool list before the next model inference, so an MCP server
+ *     that reveals new tools mid-session (gated sessions, auto-install) shows
+ *     them to the model on the next turn.
+ *
+ * Inference goes through an OpenAI-compatible endpoint (LiteLLM at
+ * http://litellm…:4000/v1 in this repo's deployment; vLLM works too). That
+ * keeps LiteLLM doing its actual job — model routing — and strips it of the
+ * MCP role it was failing at.
+ */
+import { Client } from '@modelcontextprotocol/sdk/client/index.js';
+import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
+import OpenAI from 'openai';
+import type {
+  ChatCompletionMessageParam,
+  ChatCompletionTool,
+  ChatCompletionMessageToolCall,
+} from 'openai/resources/chat/completions';
+
+export interface AgentConfig {
+  /** Full URL of the MCP endpoint, e.g. http://mcp.mcpctl.svc:3200/projects/sre/mcp */
+  mcpUrl: string;
+  /** Raw `mcpctl_pat_…` bearer for the MCP endpoint. */
+  mcpToken: string;
+  /** OpenAI-compatible base URL, e.g. http://litellm…:4000/v1 */
+  llmBaseUrl: string;
+  /** API key for the OpenAI-compatible endpoint (LiteLLM master key). */
+  llmApiKey: string;
+  /** Model name as known to the OpenAI endpoint, e.g. qwen3-thinking */
+  model: string;
+  /** Optional system prompt (prepended as `role:'system'` if given). */
+  systemPrompt?: string;
+  /** Hard cap on loop iterations; stops runaway agents. Default 20. */
+  maxIterations?: number;
+  /** Per-tool-call timeout ms passed to the MCP SDK. Default 60_000. */
+  toolTimeoutMs?: number;
+}
+
+export interface AgentDeps {
+  /** Injectable for tests. Creates the MCP Client + transport. */
+  mcpClientFactory?: (cfg: AgentConfig) => Promise<McpLike>;
+  /** Injectable for tests. Creates the OpenAI-compatible client. */
+  llmClientFactory?: (cfg: AgentConfig) => LlmLike;
+  /** Optional per-iteration logger (stdout, audit sink, etc.). */
+  log?: (line: string) => void;
+}
+
+/**
+ * Structural typing for the MCP client surface we actually use. Keeps the
+ * loop testable without importing the concrete SDK in test fixtures. Optional
+ * fields are `T | undefined` (not `T?`) to stay compatible with the MCP SDK's
+ * own types under `exactOptionalPropertyTypes`.
+ */
+export interface McpLike {
+  listTools(): Promise<{ tools: Array<{ name: string; description?: string | undefined; inputSchema?: unknown }> }>;
+  callTool(args: { name: string; arguments: Record<string, unknown> }): Promise<unknown>;
+  close(): Promise<void>;
+}
+
+export interface LlmLike {
+  chat: {
+    completions: {
+      create(body: {
+        model: string;
+        messages: ChatCompletionMessageParam[];
+        tools?: ChatCompletionTool[];
+        tool_choice?: 'auto' | 'none' | { type: 'function'; function: { name: string } };
+      }): Promise<{ choices: Array<{ message: { role: 'assistant'; content: string | null; tool_calls?: ChatCompletionMessageToolCall[] }; finish_reason?: string | null }> }>;
+    };
+  };
+}
+
+export interface AgentResult {
+  /** The final assistant message (after all tool-call rounds). */
+  finalText: string;
+  /** Full message history, useful for eval + debugging. */
+  messages: ChatCompletionMessageParam[];
+  /** Number of tool-call rounds that ran. Zero if the model answered directly. */
+  rounds: number;
+  /** True if the loop terminated because `maxIterations` was hit. */
+  hitIterationLimit: boolean;
+}
+
+export async function runAgent(prompt: string, config: AgentConfig, deps: AgentDeps = {}): Promise<AgentResult> {
+  const log = deps.log ?? (() => { /* silent */ });
+  const maxIterations = config.maxIterations ?? 20;
+
+  const mcp = await (deps.mcpClientFactory ?? defaultMcpFactory)(config);
+  try {
+    const llm = (deps.llmClientFactory ?? defaultLlmFactory)(config);
+
+    const messages: ChatCompletionMessageParam[] = [];
+    if (config.systemPrompt) messages.push({ role: 'system', content: config.systemPrompt });
+    messages.push({ role: 'user', content: prompt });
+
+    let tools = toOpenAiTools(await mcp.listTools());
+    log(`[agent] starting with ${tools.length} MCP tools`);
+
+    let rounds = 0;
+    for (let i = 0; i < maxIterations; i++) {
+      const body: Parameters<LlmLike['chat']['completions']['create']>[0] = {
+        model: config.model,
+        messages,
+      };
+      if (tools.length > 0) {
+        body.tools = tools;
+        body.tool_choice = 'auto';
+      }
+      const reply = await llm.chat.completions.create(body);
+      const msg = reply.choices[0]!.message;
+      messages.push(msg);
+
+      const toolCalls = msg.tool_calls ?? [];
+      if (toolCalls.length === 0) {
+        log(`[agent] done after ${rounds} tool-call round(s)`);
+        return { finalText: msg.content ?? '', messages, rounds, hitIterationLimit: false };
+      }
+
+      rounds++;
+      log(`[agent] round ${rounds}: model asked to call ${toolCalls.length} tool(s)`);
+
+      for (const tc of toolCalls) {
+        const name = tc.function.name;
+        let args: Record<string, unknown> = {};
+        try {
+          args = tc.function.arguments ? JSON.parse(tc.function.arguments) as Record<string, unknown> : {};
+        } catch (err) {
+          log(`[agent] tool ${name}: could not parse arguments (${(err as Error).message}) — sending empty args`);
+        }
+        log(`[agent]   → ${name}(${truncate(JSON.stringify(args), 120)})`);
+        let result: unknown;
+        try {
+          result = await mcp.callTool({ name, arguments: args });
+        } catch (err) {
+          result = { error: (err as Error).message };
+          log(`[agent]   ← ERROR: ${(err as Error).message}`);
+        }
+        messages.push({
+          role: 'tool',
+          tool_call_id: tc.id,
+          content: typeof result === 'string' ? result : JSON.stringify(result),
+        });
+      }
+
+      // MCP server may have emitted notifications/tools/list_changed during a
+      // tool call (e.g. gated sessions revealing tools after begin_session).
+      // The SDK auto-notifies on that event; simplest correctness: re-fetch
+      // on every loop before the next inference so the model sees fresh tools.
+      tools = toOpenAiTools(await mcp.listTools());
+    }
+
+    log(`[agent] hit iteration limit (${maxIterations}) — returning partial`);
+    const last = messages[messages.length - 1];
+    const tail = last && last.role === 'assistant'
+      ? (typeof last.content === 'string' ? last.content : '')
+      : '';
+    return { finalText: tail, messages, rounds, hitIterationLimit: true };
+  } finally {
+    await mcp.close().catch(() => { /* best-effort */ });
+  }
+}
+
+function toOpenAiTools(listed: { tools: Array<{ name: string; description?: string | undefined; inputSchema?: unknown }> }): ChatCompletionTool[] {
+  return listed.tools.map((t) => {
+    const fn: { name: string; description?: string; parameters?: Record<string, unknown> } = { name: t.name };
+    if (t.description !== undefined) fn.description = t.description;
+    if (t.inputSchema !== undefined) fn.parameters = t.inputSchema as Record<string, unknown>;
+    return { type: 'function', function: fn } as ChatCompletionTool;
+  });
+}
+
+function truncate(s: string, n: number): string {
+  return s.length <= n ? s : `${s.slice(0, n - 1)}…`;
+}
+
+async function defaultMcpFactory(cfg: AgentConfig): Promise<McpLike> {
+  const client = new Client({ name: 'mcpctl-agent', version: '0.0.1' });
+  const transport = new StreamableHTTPClientTransport(new URL(cfg.mcpUrl), {
+    requestInit: { headers: { Authorization: `Bearer ${cfg.mcpToken}` } },
+  });
+  // The SDK's Transport interface declares `sessionId: string` while the
+  // Streamable-HTTP transport starts with `sessionId: undefined` until
+  // `initialize` populates it — that's legal at runtime but TS exactOptional
+  // rules reject the direct assignment.
+  await client.connect(transport as unknown as Parameters<Client['connect']>[0]);
+  return {
+    listTools: () => client.listTools() as Promise<{ tools: Array<{ name: string; description?: string | undefined; inputSchema?: unknown }> }>,
+    callTool: (args) => client.callTool(args),
+    close: () => client.close(),
+  };
+}
+
+function defaultLlmFactory(cfg: AgentConfig): LlmLike {
+  return new OpenAI({ baseURL: cfg.llmBaseUrl, apiKey: cfg.llmApiKey }) as unknown as LlmLike;
+}
--- a/src/agent/src/cli.ts
+++ b/src/agent/src/cli.ts
@@ -0,0 +1,107 @@
+#!/usr/bin/env node
+/**
+ * `mcpctl-agent` CLI — standalone for now, will be wired into the mcpctl
+ * binary as `mcpctl agent run …` in a follow-up so the main CLI's permission
+ * model + completions pipeline can pick it up.
+ *
+ * Usage:
+ *   mcpctl-agent run "analyse last week's slow grafana queries" \
+ *     --model qwen3-thinking \
+ *     --project sre
+ *
+ * Env reads (these are the same shape we'd mount from a k8s Secret/ConfigMap
+ * in the follow-up serve mode):
+ *   AGENT_MCP_URL        e.g. https://mcp.ad.itaz.eu/projects/sre/mcp
+ *   AGENT_MCP_TOKEN      mcpctl_pat_…
+ *   AGENT_LLM_BASE_URL   e.g. http://litellm.nvidia-nim.svc.cluster.local:4000/v1
+ *   AGENT_LLM_API_KEY    LiteLLM master key
+ *   AGENT_MODEL          default model (overridable with --model)
+ */
+import { Command } from 'commander';
+import { runAgent, type AgentConfig } from './agent.js';
+
+const program = new Command();
+
+program
+  .name('mcpctl-agent')
+  .description('MCP-correct chat agent (preserves Mcp-Session-Id, honors tools/list_changed)')
+  .version('0.0.1');
+
+program
+  .command('run <prompt>')
+  .description('One-shot: send a prompt, let the agent use MCP tools until it answers, print the final text')
+  .option('--mcp-url <url>', 'MCP endpoint URL (default: $AGENT_MCP_URL)')
+  .option('--mcp-token <bearer>', 'MCP bearer token (default: $AGENT_MCP_TOKEN)')
+  .option('--llm-base-url <url>', 'OpenAI-compatible endpoint (default: $AGENT_LLM_BASE_URL)')
+  .option('--llm-api-key <key>', 'API key (default: $AGENT_LLM_API_KEY)')
+  .option('--model <name>', 'Model to use (default: $AGENT_MODEL)')
+  .option('--project <name>', 'Override the MCP URL path to /projects/<name>/mcp against the base at $AGENT_MCP_URL')
+  .option('--system <prompt>', 'System prompt (prepended)')
+  .option('--max-iterations <n>', 'Max tool-call rounds (default 20)', '20')
+  .option('-o, --output <format>', 'Output format: text | json', 'text')
+  .option('--verbose', 'Log each loop iteration to stderr')
+  .action(async (prompt: string, opts: {
+    mcpUrl?: string;
+    mcpToken?: string;
+    llmBaseUrl?: string;
+    llmApiKey?: string;
+    model?: string;
+    project?: string;
+    system?: string;
+    maxIterations: string;
+    output: string;
+    verbose?: boolean;
+  }) => {
+    const mcpUrl = resolveMcpUrl(opts.mcpUrl, opts.project);
+    const cfg: AgentConfig = {
+      mcpUrl,
+      mcpToken: required('--mcp-token / $AGENT_MCP_TOKEN', opts.mcpToken ?? process.env.AGENT_MCP_TOKEN),
+      llmBaseUrl: required('--llm-base-url / $AGENT_LLM_BASE_URL', opts.llmBaseUrl ?? process.env.AGENT_LLM_BASE_URL),
+      llmApiKey: required('--llm-api-key / $AGENT_LLM_API_KEY', opts.llmApiKey ?? process.env.AGENT_LLM_API_KEY),
+      model: required('--model / $AGENT_MODEL', opts.model ?? process.env.AGENT_MODEL),
+      maxIterations: Number(opts.maxIterations),
+    };
+    if (opts.system !== undefined) cfg.systemPrompt = opts.system;
+
+    const logFn = opts.verbose
+      ? (line: string) => process.stderr.write(`${line}\n`)
+      : () => { /* silent */ };
+
+    const result = await runAgent(prompt, cfg, { log: logFn });
+
+    if (opts.output === 'json') {
+      process.stdout.write(`${JSON.stringify({
+        finalText: result.finalText,
+        rounds: result.rounds,
+        hitIterationLimit: result.hitIterationLimit,
+        messages: result.messages,
+      }, null, 2)}\n`);
+    } else {
+      process.stdout.write(`${result.finalText}\n`);
+      if (result.hitIterationLimit) process.stderr.write('[agent] hit --max-iterations limit; output may be incomplete\n');
+    }
+  });
+
+program.parseAsync(process.argv).catch((err: unknown) => {
+  const msg = err instanceof Error ? err.message : String(err);
+  process.stderr.write(`error: ${msg}\n`);
+  process.exit(1);
+});
+
+function resolveMcpUrl(flag: string | undefined, project: string | undefined): string {
+  const base = flag ?? process.env.AGENT_MCP_URL;
+  if (!base) throw new Error('--mcp-url or $AGENT_MCP_URL is required');
+  if (project === undefined) return base;
+  // If user supplied --project and the URL already ends with /projects/<x>/mcp,
+  // replace the segment; otherwise treat the base as an origin and append.
+  const existingMatch = base.match(/^(.+?)\/projects\/[^/]+\/mcp\/?$/);
+  if (existingMatch) return `${existingMatch[1]}/projects/${encodeURIComponent(project)}/mcp`;
+  return `${base.replace(/\/+$/, '')}/projects/${encodeURIComponent(project)}/mcp`;
+}
+
+function required<T>(label: string, value: T | undefined | null): T {
+  if (value === undefined || value === null || value === '') {
+    throw new Error(`${label} is required`);
+  }
+  return value;
+}
--- a/src/agent/src/index.ts
+++ b/src/agent/src/index.ts
@@ -0,0 +1,2 @@
+export { runAgent } from './agent.js';
+export type { AgentConfig, AgentDeps, AgentResult, McpLike, LlmLike } from './agent.js';
--- a/src/agent/tests/agent.test.ts
+++ b/src/agent/tests/agent.test.ts
@@ -0,0 +1,180 @@
+import { describe, it, expect, vi } from 'vitest';
+import { runAgent, type AgentConfig, type LlmLike, type McpLike } from '../src/agent.js';
+
+const BASE_CONFIG: AgentConfig = {
+  mcpUrl: 'http://mcp.example/projects/x/mcp',
+  mcpToken: 'mcpctl_pat_test',
+  llmBaseUrl: 'http://llm.example/v1',
+  llmApiKey: 'test',
+  model: 'qwen3-thinking',
+};
+
+function makeMcp(overrides: Partial<McpLike> = {}): McpLike {
+  return {
+    listTools: vi.fn(async () => ({ tools: [] })),
+    callTool: vi.fn(async () => ({ content: [{ type: 'text', text: 'ok' }] })),
+    close: vi.fn(async () => { /* noop */ }),
+    ...overrides,
+  };
+}
+
+function makeLlm(replies: Array<{ content?: string | null; tool_calls?: Array<{ id: string; name: string; arguments: string }> }>): LlmLike {
+  const queue = [...replies];
+  return {
+    chat: {
+      completions: {
+        create: vi.fn(async () => {
+          const next = queue.shift();
+          if (!next) throw new Error('LLM mock exhausted');
+          const message: {
+            role: 'assistant';
+            content: string | null;
+            tool_calls?: Array<{ id: string; type: 'function'; function: { name: string; arguments: string } }>;
+          } = { role: 'assistant', content: next.content ?? null };
+          if (next.tool_calls) {
+            message.tool_calls = next.tool_calls.map((tc) => ({
+              id: tc.id,
+              type: 'function' as const,
+              function: { name: tc.name, arguments: tc.arguments },
+            }));
+          }
+          return { choices: [{ message, finish_reason: next.tool_calls ? 'tool_calls' : 'stop' }] };
+        }),
+      },
+    },
+  };
+}
+
+describe('runAgent', () => {
+  it('returns directly when the model answers without tool calls', async () => {
+    const mcp = makeMcp();
+    const llm = makeLlm([{ content: 'hello world' }]);
+    const result = await runAgent('hi', BASE_CONFIG, {
+      mcpClientFactory: async () => mcp,
+      llmClientFactory: () => llm,
+    });
+    expect(result.finalText).toBe('hello world');
+    expect(result.rounds).toBe(0);
+    expect(result.hitIterationLimit).toBe(false);
+    expect(mcp.callTool).not.toHaveBeenCalled();
+    expect(mcp.close).toHaveBeenCalled();
+  });
+
+  it('executes a tool call, feeds the result back, and terminates on the next assistant turn', async () => {
+    const mcp = makeMcp({
+      listTools: vi.fn(async () => ({
+        tools: [{ name: 'search', description: 'search the docs', inputSchema: { type: 'object' } }],
+      })),
+      callTool: vi.fn(async () => ({ content: [{ type: 'text', text: 'a matching doc' }] })),
+    });
+    const llm = makeLlm([
+      { tool_calls: [{ id: 'call-1', name: 'search', arguments: '{"q":"foo"}' }] },
+      { content: 'final answer based on tool result' },
+    ]);
+    const result = await runAgent('find foo', BASE_CONFIG, {
+      mcpClientFactory: async () => mcp,
+      llmClientFactory: () => llm,
+    });
+    expect(result.finalText).toBe('final answer based on tool result');
+    expect(result.rounds).toBe(1);
+    expect(mcp.callTool).toHaveBeenCalledWith({ name: 'search', arguments: { q: 'foo' } });
+    // Messages should be: user → assistant (tool_calls) → tool → assistant (final)
+    expect(result.messages).toHaveLength(4);
+    expect(result.messages[0]!.role).toBe('user');
+    expect(result.messages[1]!.role).toBe('assistant');
+    expect(result.messages[2]!.role).toBe('tool');
+    expect(result.messages[3]!.role).toBe('assistant');
+  });
+
+  it('refetches tools/list between rounds to honor list_changed', async () => {
+    const listTools = vi.fn()
+      .mockResolvedValueOnce({ tools: [{ name: 'begin_session' }] })
+      .mockResolvedValueOnce({ tools: [{ name: 'begin_session' }, { name: 'search' }, { name: 'fetch' }] });
+    const mcp = makeMcp({ listTools });
+    const llm = makeLlm([
+      { tool_calls: [{ id: 'c1', name: 'begin_session', arguments: '{}' }] },
+      { content: 'done' },
+    ]);
+    await runAgent('go', BASE_CONFIG, {
+      mcpClientFactory: async () => mcp,
+      llmClientFactory: () => llm,
+    });
+    // Called at startup + after each round (one round here)
+    expect(listTools).toHaveBeenCalledTimes(2);
+    // The second chat.completions.create call should have received all 3 tools
+    const secondCall = (llm.chat.completions.create as unknown as { mock: { calls: Array<Array<{ tools?: unknown[] }>> } }).mock.calls[1]!;
+    expect(secondCall[0].tools).toHaveLength(3);
+  });
+
+  it('stops after maxIterations and flags hitIterationLimit', async () => {
+    const mcp = makeMcp({
+      listTools: vi.fn(async () => ({ tools: [{ name: 'loop' }] })),
+    });
+    // Infinite tool-call stream
+    const llm: LlmLike = {
+      chat: {
+        completions: {
+          create: vi.fn(async () => ({
+            choices: [{
+              message: {
+                role: 'assistant',
+                content: null,
+                tool_calls: [{ id: 'x', type: 'function', function: { name: 'loop', arguments: '{}' } }],
+              },
+              finish_reason: 'tool_calls',
+            }],
+          })),
+        },
+      },
+    };
+    const result = await runAgent('trap me', { ...BASE_CONFIG, maxIterations: 3 }, {
+      mcpClientFactory: async () => mcp,
+      llmClientFactory: () => llm,
+    });
+    expect(result.hitIterationLimit).toBe(true);
+    expect(result.rounds).toBe(3);
+  });
+
+  it('serializes a failed tool call into the conversation instead of throwing', async () => {
+    const mcp = makeMcp({
+      listTools: vi.fn(async () => ({ tools: [{ name: 'fails' }] })),
+      callTool: vi.fn(async () => { throw new Error('upstream exploded'); }),
+    });
+    const llm = makeLlm([
+      { tool_calls: [{ id: 'c1', name: 'fails', arguments: '{}' }] },
+      { content: 'ok I saw the error, moving on' },
+    ]);
+    const result = await runAgent('try the broken tool', BASE_CONFIG, {
+      mcpClientFactory: async () => mcp,
+      llmClientFactory: () => llm,
+    });
+    expect(result.finalText).toBe('ok I saw the error, moving on');
+    const toolMsg = result.messages.find((m) => m.role === 'tool');
+    expect(toolMsg).toBeDefined();
+    expect(String(toolMsg!.content)).toContain('upstream exploded');
+  });
+
+  it('prepends systemPrompt when supplied', async () => {
+    const mcp = makeMcp();
+    const llm = makeLlm([{ content: 'fine' }]);
+    await runAgent('hi', { ...BASE_CONFIG, systemPrompt: 'you are a helpful assistant' }, {
+      mcpClientFactory: async () => mcp,
+      llmClientFactory: () => llm,
+    });
+    const call = (llm.chat.completions.create as unknown as { mock: { calls: Array<Array<{ messages: Array<{ role: string; content: unknown }> }>> } }).mock.calls[0]![0];
+    expect(call.messages[0]).toEqual({ role: 'system', content: 'you are a helpful assistant' });
+    expect(call.messages[1]).toEqual({ role: 'user', content: 'hi' });
+  });
+
+  it('closes the MCP client even when the loop throws', async () => {
+    const mcp = makeMcp({
+      listTools: vi.fn(async () => { throw new Error('mcp dead'); }),
+    });
+    const llm = makeLlm([]);
+    await expect(runAgent('x', BASE_CONFIG, {
+      mcpClientFactory: async () => mcp,
+      llmClientFactory: () => llm,
+    })).rejects.toThrow('mcp dead');
+    expect(mcp.close).toHaveBeenCalled();
+  });
+});
--- a/src/agent/tsconfig.json
+++ b/src/agent/tsconfig.json
@@ -0,0 +1,12 @@
+{
+  "extends": "../../tsconfig.base.json",
+  "compilerOptions": {
+    "rootDir": "src",
+    "outDir": "dist",
+    "types": ["node"]
+  },
+  "include": ["src/**/*.ts"],
+  "references": [
+    { "path": "../shared" }
+  ]
+}
--- a/src/cli/src/commands/apply.ts
+++ b/src/cli/src/commands/apply.ts
@@ -41,28 +41,6 @@ const SecretSpecSchema = z.object({
  data: z.record(z.string()).default({}),
 });

-const SecretBackendSpecSchema = z.object({
-  name: z.string().min(1),
-  type: z.string().min(1),
-  description: z.string().default(''),
-  isDefault: z.boolean().optional(),
-  config: z.record(z.unknown()).default({}),
-});
-
-const LlmSpecSchema = z.object({
-  name: z.string().min(1).max(100).regex(/^[a-z0-9-]+$/),
-  type: z.enum(['anthropic', 'openai', 'deepseek', 'vllm', 'ollama', 'gemini-cli']),
-  model: z.string().min(1),
-  url: z.string().url().optional(),
-  tier: z.enum(['fast', 'heavy']).default('fast'),
-  description: z.string().max(500).default(''),
-  apiKeyRef: z.object({
-    name: z.string().min(1),
-    key: z.string().min(1),
-  }).nullable().optional(),
-  extraConfig: z.record(z.unknown()).default({}),
-});
-
 const TemplateEnvEntrySchema = z.object({
  name: z.string().min(1),
  description: z.string().optional(),
@@ -164,9 +142,7 @@ const McpTokenSpecSchema = z.object({
 });

 const ApplyConfigSchema = z.object({
-  secretbackends: z.array(SecretBackendSpecSchema).default([]),
  secrets: z.array(SecretSpecSchema).default([]),
-  llms: z.array(LlmSpecSchema).default([]),
  servers: z.array(ServerSpecSchema).default([]),
  users: z.array(UserSpecSchema).default([]),
  groups: z.array(GroupSpecSchema).default([]),
@@ -207,9 +183,7 @@ export function createApplyCommand(deps: ApplyCommandDeps): Command {

      if (opts.dryRun) {
        log('Dry run - would apply:');
-        if (config.secretbackends.length > 0) log(`  ${config.secretbackends.length} secretbackend(s)`);
        if (config.secrets.length > 0) log(`  ${config.secrets.length} secret(s)`);
-        if (config.llms.length > 0) log(`  ${config.llms.length} llm(s)`);
        if (config.servers.length > 0) log(`  ${config.servers.length} server(s)`);
        if (config.users.length > 0) log(`  ${config.users.length} user(s)`);
        if (config.groups.length > 0) log(`  ${config.groups.length} group(s)`);
@@ -255,8 +229,6 @@ const KIND_TO_RESOURCE: Record<string, string> = {
  promptrequest: 'promptrequests',
  serverattachment: 'serverattachments',
  mcptoken: 'mcptokens',
-  secretbackend: 'secretbackends',
-  llm: 'llms',
 };

 /**
@@ -352,30 +324,6 @@ async function applyConfig(client: ApiClient, config: ApplyConfig, log: (...args
    }
  }

-  // Apply secret backends first — secrets reference them.
-  // When multiple backends claim isDefault: true, the server's atomic swap will
-  // leave whichever was applied last as the effective default.
-  for (const sb of config.secretbackends) {
-    try {
-      const existing = await cachedFindByName('secretbackends', sb.name);
-      if (existing) {
-        const updateBody: Record<string, unknown> = {
-          config: sb.config,
-          description: sb.description,
-        };
-        if (sb.isDefault !== undefined) updateBody.isDefault = sb.isDefault;
-        await withRetry(() => client.put(`/api/v1/secretbackends/${existing.id}`, updateBody));
-        log(`Updated secretbackend: ${sb.name}`);
-      } else {
-        await withRetry(() => client.post('/api/v1/secretbackends', sb));
-        invalidateCache('secretbackends');
-        log(`Created secretbackend: ${sb.name}`);
-      }
-    } catch (err) {
-      log(`Error applying secretbackend '${sb.name}': ${err instanceof Error ? err.message : err}`);
-    }
-  }
-
  // Apply secrets
  for (const secret of config.secrets) {
    try {
@@ -393,25 +341,6 @@ async function applyConfig(client: ApiClient, config: ApplyConfig, log: (...args
    }
  }

-  // Apply LLMs (after secrets — apiKeyRef resolves to an existing Secret)
-  for (const llm of config.llms) {
-    try {
-      const existing = await cachedFindByName('llms', llm.name);
-      if (existing) {
-        // Exclude type on update — type is immutable.
-        const { name: _n, type: _t, ...updateBody } = llm;
-        await withRetry(() => client.put(`/api/v1/llms/${existing.id}`, updateBody));
-        log(`Updated llm: ${llm.name}`);
-      } else {
-        await withRetry(() => client.post('/api/v1/llms', llm));
-        invalidateCache('llms');
-        log(`Created llm: ${llm.name}`);
-      }
-    } catch (err) {
-      log(`Error applying llm '${llm.name}': ${err instanceof Error ? err.message : err}`);
-    }
-  }
-
  // Apply servers
  for (const server of config.servers) {
    try {
--- a/src/cli/src/commands/create.ts
+++ b/src/cli/src/commands/create.ts
@@ -88,7 +88,7 @@ export function createCreateCommand(deps: CreateCommandDeps): Command {
  const { client, log } = deps;

  const cmd = new Command('create')
-    .description('Create a resource (server, secret, secretbackend, llm, project, user, group, rbac, serverattachment, prompt)');
+    .description('Create a resource (server, secret, project, user, group, rbac, serverattachment, prompt)');

  // --- create server ---
  cmd.command('server')
@@ -252,125 +252,6 @@ export function createCreateCommand(deps: CreateCommandDeps): Command {
      }
    });

-  // --- create llm ---
-  cmd.command('llm')
-    .description('Register a server-managed LLM (anthropic, openai, vllm, ollama, deepseek, gemini-cli)')
-    .argument('<name>', 'LLM name (lowercase alphanumeric with hyphens)')
-    .requiredOption('--type <type>', 'Provider type (anthropic, openai, deepseek, vllm, ollama, gemini-cli)')
-    .requiredOption('--model <model>', 'Model identifier (e.g. claude-3-5-sonnet-20241022)')
-    .option('--url <url>', 'Endpoint URL (empty = provider default)')
-    .option('--tier <tier>', 'Tier: fast or heavy', 'fast')
-    .option('--description <text>', 'Description')
-    .option('--api-key-ref <ref>', 'API key reference in SECRET/KEY form (e.g. anthropic-key/token)')
-    .option('--extra <entry>', 'Extra config key=value (repeat)', collect, [])
-    .option('--force', 'Update if already exists')
-    .action(async (name: string, opts) => {
-      const body: Record<string, unknown> = {
-        name,
-        type: opts.type,
-        model: opts.model,
-        tier: opts.tier,
-      };
-      if (opts.url) body.url = opts.url;
-      if (opts.description !== undefined) body.description = opts.description;
-      if (opts.apiKeyRef) {
-        const slashIdx = (opts.apiKeyRef as string).indexOf('/');
-        if (slashIdx < 1) throw new Error(`Invalid --api-key-ref '${opts.apiKeyRef as string}'. Expected SECRET_NAME/KEY_NAME`);
-        body.apiKeyRef = {
-          name: (opts.apiKeyRef as string).slice(0, slashIdx),
-          key: (opts.apiKeyRef as string).slice(slashIdx + 1),
-        };
-      }
-      if (opts.extra && (opts.extra as string[]).length > 0) {
-        const extra: Record<string, unknown> = {};
-        for (const entry of opts.extra as string[]) {
-          const eqIdx = entry.indexOf('=');
-          if (eqIdx === -1) throw new Error(`Invalid --extra '${entry}'. Expected key=value`);
-          extra[entry.slice(0, eqIdx)] = entry.slice(eqIdx + 1);
-        }
-        body.extraConfig = extra;
-      }
-
-      try {
-        const row = await client.post<{ id: string; name: string }>('/api/v1/llms', body);
-        log(`llm '${row.name}' created (id: ${row.id})`);
-      } catch (err) {
-        if (err instanceof ApiError && err.status === 409 && opts.force) {
-          const existing = (await client.get<Array<{ id: string; name: string }>>('/api/v1/llms')).find((l) => l.name === name);
-          if (!existing) throw err;
-          const { name: _n, type: _t, ...updateBody } = body;
-          await client.put(`/api/v1/llms/${existing.id}`, updateBody);
-          log(`llm '${name}' updated (id: ${existing.id})`);
-        } else {
-          throw err;
-        }
-      }
-    });
-
-  // --- create secretbackend ---
-  cmd.command('secretbackend')
-    .alias('sb')
-    .description('Create a secret backend (plaintext, openbao)')
-    .argument('<name>', 'Backend name (lowercase, hyphens allowed)')
-    .requiredOption('--type <type>', 'Backend type (plaintext, openbao)')
-    .option('--description <text>', 'Description')
-    .option('--default', 'Promote this backend to default (atomically demotes the current one)')
-    .option('--url <url>', 'openbao: vault URL (e.g. http://bao.example:8200)')
-    .option('--namespace <ns>', 'openbao: X-Vault-Namespace header value')
-    .option('--mount <mount>', 'openbao: KV v2 mount point (default: secret)')
-    .option('--path-prefix <prefix>', 'openbao: path prefix under mount (default: mcpctl)')
-    .option('--token-secret <ref>', 'openbao: token secret reference in SECRET/KEY form (e.g. bao-creds/token)')
-    .option('--config <entry>', 'Extra config as key=value (repeat for multiple)', collect, [])
-    .option('--force', 'Update if already exists')
-    .action(async (name: string, opts) => {
-      const type = opts.type as string;
-      const config: Record<string, unknown> = {};
-
-      if (type === 'openbao') {
-        if (!opts.url) throw new Error('--url is required for openbao backend');
-        if (!opts.tokenSecret) throw new Error('--token-secret is required for openbao backend (format: SECRET/KEY)');
-        const slashIdx = (opts.tokenSecret as string).indexOf('/');
-        if (slashIdx < 1) throw new Error(`Invalid --token-secret '${opts.tokenSecret as string}'. Expected SECRET_NAME/KEY_NAME`);
-        config.url = opts.url;
-        config.tokenSecretRef = {
-          name: (opts.tokenSecret as string).slice(0, slashIdx),
-          key: (opts.tokenSecret as string).slice(slashIdx + 1),
-        };
-        if (opts.namespace) config.namespace = opts.namespace;
-        if (opts.mount) config.mount = opts.mount;
-        if (opts.pathPrefix) config.pathPrefix = opts.pathPrefix;
-      }
-
-      // Extra config key=value pairs (overwrite/extend above)
-      for (const entry of opts.config as string[]) {
-        const eqIdx = entry.indexOf('=');
-        if (eqIdx === -1) throw new Error(`Invalid --config '${entry}'. Expected key=value`);
-        config[entry.slice(0, eqIdx)] = entry.slice(eqIdx + 1);
-      }
-
-      const body: Record<string, unknown> = { name, type, config };
-      if (opts.description !== undefined) body.description = opts.description;
-      if (opts.default) body.isDefault = true;
-
-      try {
-        const row = await client.post<{ id: string; name: string }>('/api/v1/secretbackends', body);
-        log(`secretbackend '${row.name}' created (id: ${row.id})`);
-        if (opts.default) log(`  promoted to default backend`);
-      } catch (err) {
-        if (err instanceof ApiError && err.status === 409 && opts.force) {
-          const existing = (await client.get<Array<{ id: string; name: string }>>('/api/v1/secretbackends')).find((b) => b.name === name);
-          if (!existing) throw err;
-          const updateBody: Record<string, unknown> = { config };
-          if (opts.description !== undefined) updateBody.description = opts.description;
-          if (opts.default) updateBody.isDefault = true;
-          await client.put(`/api/v1/secretbackends/${existing.id}`, updateBody);
-          log(`secretbackend '${name}' updated (id: ${existing.id})`);
-        } else {
-          throw err;
-        }
-      }
-    });
-
  // --- create project ---
  cmd.command('project')
    .description('Create a project')
--- a/src/cli/src/commands/describe.ts
+++ b/src/cli/src/commands/describe.ts
@@ -218,80 +218,6 @@ function formatSecretDetail(secret: Record<string, unknown>, showValues: boolean
  return lines.join('\n');
 }

-function formatLlmDetail(llm: Record<string, unknown>): string {
-  const lines: string[] = [];
-  lines.push(`=== LLM: ${llm.name} ===`);
-  lines.push(`${pad('Name:')}${llm.name}`);
-  lines.push(`${pad('Type:')}${llm.type}`);
-  lines.push(`${pad('Model:')}${llm.model}`);
-  lines.push(`${pad('Tier:')}${llm.tier ?? 'fast'}`);
-  if (llm.url) lines.push(`${pad('URL:')}${llm.url}`);
-  if (llm.description) lines.push(`${pad('Description:')}${llm.description}`);
-
-  const ref = llm.apiKeyRef as { name: string; key: string } | null | undefined;
-  lines.push('');
-  lines.push('API Key:');
-  if (ref) {
-    lines.push(`  ${pad('Secret:', 12)}${ref.name}`);
-    lines.push(`  ${pad('Key:', 12)}${ref.key}`);
-  } else {
-    lines.push('  (none)');
-  }
-
-  const extra = llm.extraConfig as Record<string, unknown> | undefined;
-  if (extra && Object.keys(extra).length > 0) {
-    lines.push('');
-    lines.push('Extra Config:');
-    const keyW = Math.max(6, ...Object.keys(extra).map((k) => k.length)) + 2;
-    for (const [k, v] of Object.entries(extra)) {
-      let display: string;
-      if (v === null || v === undefined) display = '-';
-      else if (typeof v === 'object') display = JSON.stringify(v);
-      else display = String(v);
-      lines.push(`  ${k.padEnd(keyW)}${display}`);
-    }
-  }
-
-  lines.push('');
-  lines.push('Metadata:');
-  lines.push(`  ${pad('ID:', 12)}${llm.id}`);
-  if (llm.createdAt) lines.push(`  ${pad('Created:', 12)}${llm.createdAt}`);
-  if (llm.updatedAt) lines.push(`  ${pad('Updated:', 12)}${llm.updatedAt}`);
-
-  return lines.join('\n');
-}
-
-function formatSecretBackendDetail(backend: Record<string, unknown>): string {
-  const lines: string[] = [];
-  lines.push(`=== SecretBackend: ${backend.name} ===`);
-  lines.push(`${pad('Name:')}${backend.name}`);
-  lines.push(`${pad('Type:')}${backend.type}`);
-  lines.push(`${pad('Default:')}${backend.isDefault ? 'yes' : 'no'}`);
-  if (backend.description) lines.push(`${pad('Description:')}${backend.description}`);
-
-  const config = backend.config as Record<string, unknown> | undefined;
-  if (config && Object.keys(config).length > 0) {
-    lines.push('');
-    lines.push('Config:');
-    const keyW = Math.max(6, ...Object.keys(config).map((k) => k.length)) + 2;
-    for (const [key, value] of Object.entries(config)) {
-      let display: string;
-      if (value === null || value === undefined) display = '-';
-      else if (typeof value === 'object') display = JSON.stringify(value);
-      else display = String(value);
-      lines.push(`  ${key.padEnd(keyW)}${display}`);
-    }
-  }
-
-  lines.push('');
-  lines.push('Metadata:');
-  lines.push(`  ${pad('ID:', 12)}${backend.id}`);
-  if (backend.createdAt) lines.push(`  ${pad('Created:', 12)}${backend.createdAt}`);
-  if (backend.updatedAt) lines.push(`  ${pad('Updated:', 12)}${backend.updatedAt}`);
-
-  return lines.join('\n');
-}
-
 function formatTemplateDetail(template: Record<string, unknown>): string {
  const lines: string[] = [];
  lines.push(`=== Template: ${template.name} ===`);
@@ -880,12 +806,6 @@ export function createDescribeCommand(deps: DescribeCommandDeps): Command {
          case 'templates':
            deps.log(formatTemplateDetail(item));
            break;
-          case 'secretbackends':
-            deps.log(formatSecretBackendDetail(item));
-            break;
-          case 'llms':
-            deps.log(formatLlmDetail(item));
-            break;
          case 'projects': {
            const projectPrompts = await deps.client
              .get<Array<{ name: string; priority: number; linkTarget: string | null }>>(`/api/v1/prompts?projectId=${item.id as string}`)
--- a/src/cli/src/commands/get.ts
+++ b/src/cli/src/commands/get.ts
@@ -119,43 +119,6 @@ const rbacColumns: Column<RbacRow>[] = [
  { header: 'ID', key: 'id' },
 ];

-interface LlmRow {
-  id: string;
-  name: string;
-  type: string;
-  model: string;
-  tier: string;
-  url: string;
-  description: string;
-  apiKeyRef: { name: string; key: string } | null;
-}
-
-const llmColumns: Column<LlmRow>[] = [
-  { header: 'NAME', key: 'name' },
-  { header: 'TYPE', key: 'type', width: 12 },
-  { header: 'MODEL', key: 'model', width: 28 },
-  { header: 'TIER', key: 'tier', width: 8 },
-  { header: 'KEY', key: (r) => r.apiKeyRef ? `secret://${r.apiKeyRef.name}/${r.apiKeyRef.key}` : '-', width: 34 },
-  { header: 'ID', key: 'id' },
-];
-
-interface SecretBackendRow {
-  id: string;
-  name: string;
-  type: string;
-  isDefault: boolean;
-  description: string;
-  config?: Record<string, unknown>;
-}
-
-const secretBackendColumns: Column<SecretBackendRow>[] = [
-  { header: 'NAME', key: 'name' },
-  { header: 'TYPE', key: 'type', width: 14 },
-  { header: 'DEFAULT', key: (r) => r.isDefault ? '*' : '', width: 8 },
-  { header: 'DESCRIPTION', key: (r) => r.description || '-', width: 30 },
-  { header: 'ID', key: 'id' },
-];
-
 interface McpTokenRow {
  id: string;
  name: string;
@@ -302,10 +265,6 @@ function getColumnsForResource(resource: string): Column<Record<string, unknown>
      return proxymodelColumns as unknown as Column<Record<string, unknown>>[];
    case 'mcptokens':
      return mcpTokenColumns as unknown as Column<Record<string, unknown>>[];
-    case 'secretbackends':
-      return secretBackendColumns as unknown as Column<Record<string, unknown>>[];
-    case 'llms':
-      return llmColumns as unknown as Column<Record<string, unknown>>[];
    default:
      return [
        { header: 'ID', key: 'id' as keyof Record<string, unknown> },
@@ -328,8 +287,6 @@ const RESOURCE_KIND: Record<string, string> = {
  promptrequests: 'promptrequest',
  serverattachments: 'serverattachment',
  mcptokens: 'mcptoken',
-  secretbackends: 'secretbackend',
-  llms: 'llm',
 };

 /**
--- a/src/cli/src/commands/migrate.ts
+++ b/src/cli/src/commands/migrate.ts
@@ -1,80 +0,0 @@
-import { Command } from 'commander';
-import type { ApiClient } from '../api-client.js';
-
-export interface MigrateCommandDeps {
-  client: ApiClient;
-  log: (...args: unknown[]) => void;
-}
-
-interface MigrateResult {
-  migrated: Array<{ name: string }>;
-  skipped: Array<{ name: string; reason: string }>;
-  failed: Array<{ name: string; error: string }>;
-}
-
-interface DryRunResult {
-  dryRun: true;
-  candidates: Array<{ id: string; name: string }>;
-}
-
-/**
- * Top-level `mcpctl migrate <subcommand>` verb.
- *
- * Today only `secrets` is implemented (SecretBackend → SecretBackend move),
- * but the command is structured so new migrations can slot in.
- *
- * Per-secret atomicity is handled server-side — if this command is interrupted
- * mid-run, re-running is idempotent (skips secrets already on the destination).
- */
-export function createMigrateCommand(deps: MigrateCommandDeps): Command {
-  const { client, log } = deps;
-
-  const cmd = new Command('migrate')
-    .description('Move resources between backends (currently: secrets between SecretBackends)');
-
-  cmd.command('secrets')
-    .description('Migrate secrets from one SecretBackend to another')
-    .requiredOption('--from <name>', 'Source SecretBackend name')
-    .requiredOption('--to <name>', 'Destination SecretBackend name')
-    .option('--names <csv>', 'Comma-separated secret names (default: all)')
-    .option('--keep-source', 'Leave the source copy intact (default: delete from source after write+commit)')
-    .option('--dry-run', 'Show which secrets would be migrated without touching them')
-    .action(async (opts) => {
-      const body: Record<string, unknown> = { from: opts.from, to: opts.to };
-      if (opts.names) body.names = (opts.names as string).split(',').map((s) => s.trim()).filter(Boolean);
-      if (opts.keepSource) body.keepSource = true;
-      if (opts.dryRun) body.dryRun = true;
-
-      if (opts.dryRun) {
-        const res = await client.post<DryRunResult>('/api/v1/secrets/migrate', body);
-        if (res.candidates.length === 0) {
-          log(`No secrets to migrate from '${opts.from as string}' to '${opts.to as string}'.`);
-          return;
-        }
-        log(`Dry run — ${String(res.candidates.length)} secret(s) would be migrated from '${opts.from as string}' → '${opts.to as string}':`);
-        for (const c of res.candidates) log(`  - ${c.name}`);
-        return;
-      }
-
-      const res = await client.post<MigrateResult>('/api/v1/secrets/migrate', body);
-
-      if (res.migrated.length > 0) {
-        log(`Migrated ${String(res.migrated.length)} secret(s) from '${opts.from as string}' → '${opts.to as string}':`);
-        for (const m of res.migrated) log(`  ✓ ${m.name}`);
-      }
-      if (res.skipped.length > 0) {
-        log(`Skipped ${String(res.skipped.length)}:`);
-        for (const s of res.skipped) log(`  - ${s.name}: ${s.reason}`);
-      }
-      if (res.failed.length > 0) {
-        log(`Failed ${String(res.failed.length)}:`);
-        for (const f of res.failed) log(`  ✗ ${f.name}: ${f.error}`);
-        process.exitCode = 1;
-      }
-      if (res.migrated.length === 0 && res.skipped.length === 0 && res.failed.length === 0) {
-        log(`No secrets to migrate from '${opts.from as string}' to '${opts.to as string}'.`);
-      }
-    });
-
-  return cmd;
-}
--- a/src/cli/src/commands/shared.ts
+++ b/src/cli/src/commands/shared.ts
@@ -31,11 +31,6 @@ export const RESOURCE_ALIASES: Record<string, string> = {
  mcptokens: 'mcptokens',
  token: 'mcptokens',
  tokens: 'mcptokens',
-  secretbackend: 'secretbackends',
-  secretbackends: 'secretbackends',
-  sb: 'secretbackends',
-  llm: 'llms',
-  llms: 'llms',
  all: 'all',
 };

--- a/src/cli/src/index.ts
+++ b/src/cli/src/index.ts
@@ -18,7 +18,6 @@ import { createMcpCommand } from './commands/mcp.js';
 import { createPatchCommand } from './commands/patch.js';
 import { createConsoleCommand } from './commands/console/index.js';
 import { createCacheCommand } from './commands/cache.js';
-import { createMigrateCommand } from './commands/migrate.js';
 import { ApiClient, ApiError } from './api-client.js';
 import { loadConfig } from './config/index.js';
 import { loadCredentials } from './auth/index.js';
@@ -250,11 +249,6 @@ export function createProgram(): Command {
    log: (...args) => console.log(...args),
  }));

-  program.addCommand(createMigrateCommand({
-    client,
-    log: (...args) => console.log(...args),
-  }));
-
  return program;
 }

--- a/src/db/prisma/schema.prisma
+++ b/src/db/prisma/schema.prisma
@@ -111,79 +111,17 @@ model McpTemplate {
  @@index([name])
 }

-// ── Secret Backends ──
-//
-// Pluggable storage for Secret.data. Default is `plaintext` (data stored in
-// Secret.data JSON). Other drivers (e.g. `openbao`) store only a reference in
-// Secret.externalRef and fetch actual values from the external system at read
-// time. A `plaintext` row is seeded on first startup so the system always has
-// a viable backend; additional backends are user-managed via
-// `mcpctl create secretbackend`.
-
-model SecretBackend {
-  id          String   @id @default(cuid())
-  name        String   @unique
-  type        String                                  // plaintext | openbao | (future: vault, aws-sm, ...)
-  config      Json     @default("{}")                 // type-specific: url, mount, namespace, tokenSecretRef
-  isDefault   Boolean  @default(false)                // exactly one row has isDefault=true
-  description String   @default("")
-  version     Int      @default(1)
-  createdAt   DateTime @default(now())
-  updatedAt   DateTime @updatedAt
-
-  secrets Secret[]
-
-  @@index([name])
-  @@index([isDefault])
-}
-
 // ── Secrets ──

 model Secret {
-  id          String   @id @default(cuid())
-  name        String   @unique
-  backendId   String                                  // FK to SecretBackend — dispatches read/write
-  data        Json     @default("{}")                 // populated by plaintext backend only
-  externalRef String   @default("")                   // populated by non-plaintext backends (e.g. "mount/path#v3")
-  version     Int      @default(1)
-  createdAt   DateTime @default(now())
-  updatedAt   DateTime @updatedAt
-
-  backend SecretBackend @relation(fields: [backendId], references: [id])
-  llms    Llm[]
+  id        String   @id @default(cuid())
+  name      String   @unique
+  data      Json     @default("{}")
+  version   Int      @default(1)
+  createdAt DateTime @default(now())
+  updatedAt DateTime @updatedAt

  @@index([name])
-  @@index([backendId])
-}
-
-// ── LLMs ──
-//
-// Server-managed LLM providers. Clients (agent, HTTP-mode mcplocal) send
-// OpenAI-format requests to `mcpd /api/v1/llms/:name/infer` — mcpd attaches the
-// provider API key server-side so credentials never leave the cluster.
-// Credentials are stored by reference: `apiKeySecret` points at a Secret, and
-// `apiKeySecretKey` names the key within that secret's data.
-
-model Llm {
-  id              String   @id @default(cuid())
-  name            String   @unique
-  type            String                                  // anthropic | openai | deepseek | vllm | ollama | gemini-cli
-  model           String                                  // e.g. claude-3-5-sonnet-20241022
-  url             String   @default("")                   // endpoint (empty for provider default)
-  tier            String   @default("fast")               // fast | heavy
-  description     String   @default("")
-  apiKeySecretId  String?                                 // FK to Secret
-  apiKeySecretKey String?                                 // key inside the Secret's data
-  extraConfig     Json     @default("{}")                 // per-type extras
-  version         Int      @default(1)
-  createdAt       DateTime @default(now())
-  updatedAt       DateTime @updatedAt
-
-  apiKeySecret Secret? @relation(fields: [apiKeySecretId], references: [id], onDelete: SetNull)
-
-  @@index([name])
-  @@index([tier])
-  @@index([apiKeySecretId])
 }

 // ── Groups ──
--- a/src/mcpd/src/bootstrap/secret-backends.ts
+++ b/src/mcpd/src/bootstrap/secret-backends.ts
@@ -1,53 +0,0 @@
-/**
- * Bootstrap the `plaintext` SecretBackend + backfill existing Secret rows.
- *
- * Runs on every mcpd startup. Idempotent:
- *   - if no SecretBackend exists, create `default` (type `plaintext`, isDefault=true)
- *   - if any Secret has no backendId (fresh after schema migration), point it at `default`
- *   - if no backend is currently flagged default, promote `default`
- *
- * Safe to run repeatedly; never destroys configuration.
- */
-import type { PrismaClient } from '@prisma/client';
-
-/** Well-known name for the always-present plaintext backend. */
-export const DEFAULT_PLAINTEXT_BACKEND_NAME = 'default';
-
-export async function bootstrapSecretBackends(prisma: PrismaClient): Promise<void> {
-  let plaintext = await prisma.secretBackend.findUnique({
-    where: { name: DEFAULT_PLAINTEXT_BACKEND_NAME },
-  });
-
-  if (plaintext === null) {
-    plaintext = await prisma.secretBackend.create({
-      data: {
-        name: DEFAULT_PLAINTEXT_BACKEND_NAME,
-        type: 'plaintext',
-        isDefault: true,
-        description: 'Default in-database plaintext backend. Seeded on first startup.',
-      },
-    });
-  }
-
-  const currentDefault = await prisma.secretBackend.findFirst({ where: { isDefault: true } });
-  if (currentDefault === null) {
-    await prisma.secretBackend.update({
-      where: { id: plaintext.id },
-      data: { isDefault: true },
-    });
-  }
-
-  // Backfill any secrets left with an empty backendId after the schema migration.
-  // `findMany({ where: { backendId: '' } })` catches rows that existed before
-  // the column was added and had a default-empty value assigned.
-  const orphans = await prisma.secret.findMany({
-    where: { backendId: '' },
-    select: { id: true },
-  });
-  if (orphans.length > 0) {
-    await prisma.secret.updateMany({
-      where: { id: { in: orphans.map((o) => o.id) } },
-      data: { backendId: plaintext.id },
-    });
-  }
-}
--- a/src/mcpd/src/main.ts
+++ b/src/mcpd/src/main.ts
@@ -20,17 +20,6 @@ import {
  AuditEventRepository,
  McpTokenRepository,
 } from './repositories/index.js';
-import { SecretBackendRepository } from './repositories/secret-backend.repository.js';
-import { SecretBackendService } from './services/secret-backend.service.js';
-import { SecretMigrateService } from './services/secret-migrate.service.js';
-import { bootstrapSecretBackends } from './bootstrap/secret-backends.js';
-import { registerSecretBackendRoutes } from './routes/secret-backends.js';
-import { registerSecretMigrateRoutes } from './routes/secret-migrate.js';
-import { LlmRepository } from './repositories/llm.repository.js';
-import { LlmService } from './services/llm.service.js';
-import { LlmAdapterRegistry } from './services/llm/dispatcher.js';
-import { registerLlmRoutes } from './routes/llms.js';
-import { registerLlmInferRoutes } from './routes/llm-infer.js';
 import { PromptRepository } from './repositories/prompt.repository.js';
 import { PromptRequestRepository } from './repositories/prompt-request.repository.js';
 import { bootstrapSystemProject } from './bootstrap/system-project.js';
@@ -104,20 +93,11 @@ function mapUrlToPermission(method: string, url: string): PermissionCheck {
  if (segment === 'backup') return { kind: 'operation', operation: 'backup' };
  if (segment === 'restore') return { kind: 'operation', operation: 'restore' };
  if (segment === 'audit-logs' && method === 'DELETE') return { kind: 'operation', operation: 'audit-purge' };
-  // /api/v1/secrets/migrate is a bulk cross-backend operation — treat as op, not a plain secret write.
-  if (url.startsWith('/api/v1/secrets/migrate')) return { kind: 'operation', operation: 'migrate-secrets' };
-
-  // /api/v1/llms/:name/infer → `run:llms:<name>` (not the default create:llms).
-  const inferMatch = url.match(/^\/api\/v1\/llms\/([^/?]+)\/infer/);
-  if (inferMatch?.[1]) {
-    return { kind: 'resource', resource: 'llms', action: 'run', resourceName: inferMatch[1] };
-  }

  const resourceMap: Record<string, string | undefined> = {
    'servers': 'servers',
    'instances': 'instances',
    'secrets': 'secrets',
-    'secretbackends': 'secretbackends',
    'projects': 'projects',
    'templates': 'templates',
    'users': 'users',
@@ -128,7 +108,6 @@ function mapUrlToPermission(method: string, url: string): PermissionCheck {
    'prompts': 'prompts',
    'promptrequests': 'promptrequests',
    'mcptokens': 'mcptokens',
-    'llms': 'llms',
  };

  const resource = resourceMap[segment];
@@ -282,8 +261,6 @@ async function main(): Promise<void> {
  // Repositories
  const serverRepo = new McpServerRepository(prisma);
  const secretRepo = new SecretRepository(prisma);
-  const secretBackendRepo = new SecretBackendRepository(prisma);
-  const llmRepo = new LlmRepository(prisma);
  const instanceRepo = new McpInstanceRepository(prisma);
  const projectRepo = new ProjectRepository(prisma);
  const auditLogRepo = new AuditLogRepository(prisma);
@@ -294,20 +271,14 @@ async function main(): Promise<void> {
  const groupRepo = new GroupRepository(prisma);
  const mcpTokenRepo = new McpTokenRepository(prisma);

-  // SecretBackend bootstrap: ensure a `plaintext` default row exists and any
-  // pre-existing `Secret` rows are pointed at it. Idempotent per run.
-  await bootstrapSecretBackends(prisma);
-
  // CUID detection for RBAC name resolution
  const CUID_RE = /^c[^\s-]{8,}$/i;
  const nameResolvers: Record<string, { findById(id: string): Promise<{ name: string } | null> }> = {
    servers: serverRepo,
    secrets: secretRepo,
-    secretbackends: secretBackendRepo,
    projects: projectRepo,
    groups: groupRepo,
    mcptokens: mcpTokenRepo,
-    llms: llmRepo,
  };

  // Migrate legacy 'admin' role → granular roles
@@ -320,31 +291,9 @@ async function main(): Promise<void> {

  // Services
  const serverService = new McpServerService(serverRepo);
-  // SecretBackend service — needs a lazy bridge to the yet-to-be-constructed
-  // SecretService because the OpenBao driver's auth token lives in a plaintext
-  // Secret. The bridge defers the resolve until after `secretService` is
-  // assigned, breaking the circular dependency at construction time.
-  const secretResolverBridge = {
-    resolve: async (name: string, key: string): Promise<string> => secretService.resolve(name, key),
-  };
-  const secretBackendService = new SecretBackendService(secretBackendRepo, {
-    plaintext: {
-      listAllPlaintext: async () => {
-        const rows = await prisma.secret.findMany({
-          where: { backend: { type: 'plaintext' } },
-          select: { name: true, data: true },
-        });
-        return rows.map((r) => ({ name: r.name, data: r.data as Record<string, string> }));
-      },
-    },
-    secretRefResolver: secretResolverBridge,
-  });
-  const secretService = new SecretService(secretRepo, secretBackendService);
-  const secretMigrateService = new SecretMigrateService(secretRepo, secretBackendService);
-  const llmService = new LlmService(llmRepo, secretService);
-  const llmAdapters = new LlmAdapterRegistry();
-  const instanceService = new InstanceService(instanceRepo, serverRepo, orchestrator, secretService);
+  const instanceService = new InstanceService(instanceRepo, serverRepo, orchestrator, secretRepo);
  serverService.setInstanceService(instanceService);
+  const secretService = new SecretService(secretRepo);
  const projectService = new ProjectService(projectRepo, serverRepo);
  const auditLogService = new AuditLogService(auditLogRepo);
  const auditEventService = new AuditEventService(auditEventRepo);
@@ -364,7 +313,7 @@ async function main(): Promise<void> {
  promptRuleRegistry.register(systemPromptVarsRule);
  const promptService = new PromptService(promptRepo, promptRequestRepo, projectRepo, promptRuleRegistry);
  const backupService = new BackupService(serverRepo, projectRepo, secretRepo, userRepo, groupRepo, rbacDefinitionRepo, promptRepo, templateRepo);
-  const restoreService = new RestoreService(serverRepo, projectRepo, secretRepo, secretService, userRepo, groupRepo, rbacDefinitionRepo, promptRepo, templateRepo);
+  const restoreService = new RestoreService(serverRepo, projectRepo, secretRepo, userRepo, groupRepo, rbacDefinitionRepo, promptRepo, templateRepo);

  // Shared auth dependencies. Both the global auth hook and the per-route
  // preHandler on /api/v1/mcp/proxy must know how to resolve both session
@@ -481,26 +430,6 @@ async function main(): Promise<void> {
  registerMcpServerRoutes(app, serverService, instanceService);
  registerTemplateRoutes(app, templateService);
  registerSecretRoutes(app, secretService);
-  registerSecretBackendRoutes(app, secretBackendService);
-  registerSecretMigrateRoutes(app, secretMigrateService);
-  registerLlmRoutes(app, llmService);
-  registerLlmInferRoutes(app, {
-    llmService,
-    adapters: llmAdapters,
-    onInferenceEvent: (event) => {
-      app.log.info({
-        event: 'llm_inference_call',
-        llm: event.llmName,
-        model: event.model,
-        type: event.type,
-        userId: event.userId,
-        tokenSha: event.tokenSha,
-        streaming: event.streaming,
-        durationMs: event.durationMs,
-        status: event.status,
-      });
-    },
-  });
  registerInstanceRoutes(app, instanceService);
  registerProjectRoutes(app, projectService);
  registerAuditLogRoutes(app, auditLogService);
--- a/src/mcpd/src/repositories/interfaces.ts
+++ b/src/mcpd/src/repositories/interfaces.ts
@@ -1,6 +1,6 @@
 import type { McpServer, McpInstance, AuditLog, AuditEvent, McpToken, Secret, InstanceStatus } from '@prisma/client';
 import type { CreateMcpServerInput, UpdateMcpServerInput } from '../validation/mcp-server.schema.js';
-import type { SecretRepoCreateInput, SecretRepoUpdateInput } from './secret.repository.js';
+import type { CreateSecretInput, UpdateSecretInput } from '../validation/secret.schema.js';

 export interface IMcpServerRepository {
  findAll(): Promise<McpServer[]>;
@@ -24,9 +24,8 @@ export interface ISecretRepository {
  findAll(): Promise<Secret[]>;
  findById(id: string): Promise<Secret | null>;
  findByName(name: string): Promise<Secret | null>;
-  findByBackend(backendId: string): Promise<Secret[]>;
-  create(data: SecretRepoCreateInput): Promise<Secret>;
-  update(id: string, data: SecretRepoUpdateInput): Promise<Secret>;
+  create(data: CreateSecretInput): Promise<Secret>;
+  update(id: string, data: UpdateSecretInput): Promise<Secret>;
  delete(id: string): Promise<void>;
 }

--- a/src/mcpd/src/repositories/llm.repository.ts
+++ b/src/mcpd/src/repositories/llm.repository.ts
@@ -1,89 +0,0 @@
-import type { PrismaClient, Llm, Prisma } from '@prisma/client';
-
-export interface CreateLlmInput {
-  name: string;
-  type: string;
-  model: string;
-  url?: string;
-  tier?: string;
-  description?: string;
-  apiKeySecretId?: string | null;
-  apiKeySecretKey?: string | null;
-  extraConfig?: Record<string, unknown>;
-}
-
-export interface UpdateLlmInput {
-  model?: string;
-  url?: string;
-  tier?: string;
-  description?: string;
-  apiKeySecretId?: string | null;
-  apiKeySecretKey?: string | null;
-  extraConfig?: Record<string, unknown>;
-}
-
-export interface ILlmRepository {
-  findAll(): Promise<Llm[]>;
-  findById(id: string): Promise<Llm | null>;
-  findByName(name: string): Promise<Llm | null>;
-  findByTier(tier: string): Promise<Llm[]>;
-  create(data: CreateLlmInput): Promise<Llm>;
-  update(id: string, data: UpdateLlmInput): Promise<Llm>;
-  delete(id: string): Promise<void>;
-}
-
-export class LlmRepository implements ILlmRepository {
-  constructor(private readonly prisma: PrismaClient) {}
-
-  async findAll(): Promise<Llm[]> {
-    return this.prisma.llm.findMany({ orderBy: { name: 'asc' } });
-  }
-
-  async findById(id: string): Promise<Llm | null> {
-    return this.prisma.llm.findUnique({ where: { id } });
-  }
-
-  async findByName(name: string): Promise<Llm | null> {
-    return this.prisma.llm.findUnique({ where: { name } });
-  }
-
-  async findByTier(tier: string): Promise<Llm[]> {
-    return this.prisma.llm.findMany({ where: { tier }, orderBy: { name: 'asc' } });
-  }
-
-  async create(data: CreateLlmInput): Promise<Llm> {
-    return this.prisma.llm.create({
-      data: {
-        name: data.name,
-        type: data.type,
-        model: data.model,
-        url: data.url ?? '',
-        tier: data.tier ?? 'fast',
-        description: data.description ?? '',
-        apiKeySecretId: data.apiKeySecretId ?? null,
-        apiKeySecretKey: data.apiKeySecretKey ?? null,
-        extraConfig: (data.extraConfig ?? {}) as Prisma.InputJsonValue,
-      },
-    });
-  }
-
-  async update(id: string, data: UpdateLlmInput): Promise<Llm> {
-    const updateData: Prisma.LlmUpdateInput = {};
-    if (data.model !== undefined) updateData.model = data.model;
-    if (data.url !== undefined) updateData.url = data.url;
-    if (data.tier !== undefined) updateData.tier = data.tier;
-    if (data.description !== undefined) updateData.description = data.description;
-    if (data.apiKeySecretId !== undefined) {
-      updateData.apiKeySecret = data.apiKeySecretId === null
-        ? { disconnect: true }
-        : { connect: { id: data.apiKeySecretId } };
-    }
-    if (data.apiKeySecretKey !== undefined) updateData.apiKeySecretKey = data.apiKeySecretKey;
-    if (data.extraConfig !== undefined) updateData.extraConfig = data.extraConfig as Prisma.InputJsonValue;
-    return this.prisma.llm.update({ where: { id }, data: updateData });
-  }
-
-  async delete(id: string): Promise<void> {
-    await this.prisma.llm.delete({ where: { id } });
-  }
-}
--- a/src/mcpd/src/repositories/secret-backend.repository.ts
+++ b/src/mcpd/src/repositories/secret-backend.repository.ts
@@ -1,103 +0,0 @@
-import type { PrismaClient, SecretBackend, Prisma } from '@prisma/client';
-
-export interface CreateSecretBackendInput {
-  name: string;
-  type: string;
-  config?: Record<string, unknown>;
-  isDefault?: boolean;
-  description?: string;
-}
-
-export interface UpdateSecretBackendInput {
-  config?: Record<string, unknown>;
-  isDefault?: boolean;
-  description?: string;
-}
-
-export interface ISecretBackendRepository {
-  findAll(): Promise<SecretBackend[]>;
-  findById(id: string): Promise<SecretBackend | null>;
-  findByName(name: string): Promise<SecretBackend | null>;
-  findDefault(): Promise<SecretBackend | null>;
-  create(data: CreateSecretBackendInput): Promise<SecretBackend>;
-  update(id: string, data: UpdateSecretBackendInput): Promise<SecretBackend>;
-  /**
-   * Atomically clear `isDefault` on every row except the one named, then set
-   * the given row as default. Used by `setDefault`.
-   */
-  setAsDefault(id: string): Promise<SecretBackend>;
-  delete(id: string): Promise<void>;
-  /** Count secrets that still reference this backend — used to guard delete. */
-  countReferencingSecrets(backendId: string): Promise<number>;
-}
-
-export class SecretBackendRepository implements ISecretBackendRepository {
-  constructor(private readonly prisma: PrismaClient) {}
-
-  async findAll(): Promise<SecretBackend[]> {
-    return this.prisma.secretBackend.findMany({ orderBy: { name: 'asc' } });
-  }
-
-  async findById(id: string): Promise<SecretBackend | null> {
-    return this.prisma.secretBackend.findUnique({ where: { id } });
-  }
-
-  async findByName(name: string): Promise<SecretBackend | null> {
-    return this.prisma.secretBackend.findUnique({ where: { name } });
-  }
-
-  async findDefault(): Promise<SecretBackend | null> {
-    return this.prisma.secretBackend.findFirst({ where: { isDefault: true } });
-  }
-
-  async create(data: CreateSecretBackendInput): Promise<SecretBackend> {
-    return this.prisma.$transaction(async (tx) => {
-      if (data.isDefault === true) {
-        await tx.secretBackend.updateMany({ where: { isDefault: true }, data: { isDefault: false } });
-      }
-      return tx.secretBackend.create({
-        data: {
-          name: data.name,
-          type: data.type,
-          config: (data.config ?? {}) as Prisma.InputJsonValue,
-          isDefault: data.isDefault ?? false,
-          description: data.description ?? '',
-        },
-      });
-    });
-  }
-
-  async update(id: string, data: UpdateSecretBackendInput): Promise<SecretBackend> {
-    return this.prisma.$transaction(async (tx) => {
-      if (data.isDefault === true) {
-        await tx.secretBackend.updateMany({
-          where: { isDefault: true, NOT: { id } },
-          data: { isDefault: false },
-        });
-      }
-      const updateData: Prisma.SecretBackendUpdateInput = {};
-      if (data.config !== undefined) updateData.config = data.config as Prisma.InputJsonValue;
-      if (data.isDefault !== undefined) updateData.isDefault = data.isDefault;
-      if (data.description !== undefined) updateData.description = data.description;
-      return tx.secretBackend.update({ where: { id }, data: updateData });
-    });
-  }
-
-  async setAsDefault(id: string): Promise<SecretBackend> {
-    return this.prisma.$transaction(async (tx) => {
-      await tx.secretBackend.updateMany({
-        where: { isDefault: true, NOT: { id } },
-        data: { isDefault: false },
-      });
-      return tx.secretBackend.update({ where: { id }, data: { isDefault: true } });
-    });
-  }
-
-  async delete(id: string): Promise<void> {
-    await this.prisma.secretBackend.delete({ where: { id } });
-  }
-
-  async countReferencingSecrets(backendId: string): Promise<number> {
-    return this.prisma.secret.count({ where: { backendId } });
-  }
-}
--- a/src/mcpd/src/repositories/secret.repository.ts
+++ b/src/mcpd/src/repositories/secret.repository.ts
@@ -1,18 +1,6 @@
-import { type PrismaClient, type Secret, type Prisma } from '@prisma/client';
+import { type PrismaClient, type Secret } from '@prisma/client';
 import type { ISecretRepository } from './interfaces.js';
-
-export interface SecretRepoCreateInput {
-  name: string;
-  backendId: string;
-  data?: Record<string, string>;
-  externalRef?: string;
-}
-
-export interface SecretRepoUpdateInput {
-  data?: Record<string, string>;
-  externalRef?: string;
-  backendId?: string;
-}
+import type { CreateSecretInput, UpdateSecretInput } from '../validation/secret.schema.js';

 export class SecretRepository implements ISecretRepository {
  constructor(private readonly prisma: PrismaClient) {}
@@ -29,29 +17,20 @@ export class SecretRepository implements ISecretRepository {
    return this.prisma.secret.findUnique({ where: { name } });
  }

-  async findByBackend(backendId: string): Promise<Secret[]> {
-    return this.prisma.secret.findMany({ where: { backendId }, orderBy: { name: 'asc' } });
-  }
-
-  async create(data: SecretRepoCreateInput): Promise<Secret> {
+  async create(data: CreateSecretInput): Promise<Secret> {
    return this.prisma.secret.create({
      data: {
        name: data.name,
-        backendId: data.backendId,
-        data: (data.data ?? {}) as Prisma.InputJsonValue,
-        externalRef: data.externalRef ?? '',
+        data: data.data,
      },
    });
  }

-  async update(id: string, data: SecretRepoUpdateInput): Promise<Secret> {
-    const updateData: Prisma.SecretUpdateInput = {};
-    if (data.data !== undefined) updateData.data = data.data as Prisma.InputJsonValue;
-    if (data.externalRef !== undefined) updateData.externalRef = data.externalRef;
-    if (data.backendId !== undefined) {
-      updateData.backend = { connect: { id: data.backendId } };
-    }
-    return this.prisma.secret.update({ where: { id }, data: updateData });
+  async update(id: string, data: UpdateSecretInput): Promise<Secret> {
+    return this.prisma.secret.update({
+      where: { id },
+      data: { data: data.data },
+    });
  }

  async delete(id: string): Promise<void> {
--- a/src/mcpd/src/routes/llm-infer.ts
+++ b/src/mcpd/src/routes/llm-infer.ts
@@ -1,145 +0,0 @@
-/**
- * POST /api/v1/llms/:name/infer
- *
- * OpenAI-compatible chat completions endpoint. The RBAC check runs in the
- * global hook — this URL maps to `run:llms:<name>`, not the default
- * `create:llms`. See `main.ts:mapUrlToPermission`.
- *
- * Non-streaming: resolves the Llm, dispatches to the right provider adapter,
- * returns the OpenAI chat.completion JSON.
- *
- * Streaming (`stream: true`): pipes adapter-emitted chunks back as
- * `text/event-stream`. Adapters translate provider-native SSE into OpenAI
- * `chat.completion.chunk`s so clients can use any OpenAI SDK unchanged.
- */
-import type { FastifyInstance, FastifyReply } from 'fastify';
-import type { LlmService } from '../services/llm.service.js';
-import type { LlmAdapterRegistry } from '../services/llm/dispatcher.js';
-import { NotFoundError } from '../services/mcp-server.service.js';
-import type { OpenAiChatRequest, InferContext } from '../services/llm/types.js';
-
-export interface LlmInferDeps {
-  llmService: LlmService;
-  adapters: LlmAdapterRegistry;
-  /** Optional hook to emit audit events — consumer may ignore. */
-  onInferenceEvent?: (event: InferenceAuditEvent) => void;
-}
-
-export interface InferenceAuditEvent {
-  kind: 'llm_inference_call';
-  llmName: string;
-  model: string;
-  type: string;
-  userId?: string | undefined;
-  tokenSha?: string | undefined;
-  streaming: boolean;
-  durationMs: number;
-  status: number;
-}
-
-export function registerLlmInferRoutes(
-  app: FastifyInstance,
-  deps: LlmInferDeps,
-): void {
-  app.post<{ Params: { name: string }; Body: OpenAiChatRequest }>(
-    '/api/v1/llms/:name/infer',
-    async (request, reply) => {
-      const started = Date.now();
-      let llm;
-      try {
-        llm = await deps.llmService.getByName(request.params.name);
-      } catch (err) {
-        if (err instanceof NotFoundError) {
-          reply.code(404);
-          return { error: err.message };
-        }
-        throw err;
-      }
-
-      const body = (request.body ?? {}) as OpenAiChatRequest;
-      if (!body.messages || body.messages.length === 0) {
-        reply.code(400);
-        return { error: 'messages is required' };
-      }
-
-      // Resolve API key (may be empty string for providers that don't take one).
-      let apiKey = '';
-      if (llm.apiKeyRef !== null) {
-        try {
-          apiKey = await deps.llmService.resolveApiKey(llm.name);
-        } catch (err) {
-          reply.code(500);
-          return { error: `Failed to resolve API key: ${err instanceof Error ? err.message : String(err)}` };
-        }
-      }
-
-      const ctx: InferContext = {
-        body,
-        modelOverride: llm.model,
-        apiKey,
-        url: llm.url,
-        extraConfig: llm.extraConfig,
-      };
-
-      const adapter = deps.adapters.get(llm.type);
-      const streaming = body.stream === true;
-
-      const audit = (status: number): void => {
-        if (deps.onInferenceEvent === undefined) return;
-        deps.onInferenceEvent({
-          kind: 'llm_inference_call',
-          llmName: llm.name,
-          model: llm.model,
-          type: llm.type,
-          userId: request.userId,
-          tokenSha: request.mcpToken?.tokenSha,
-          streaming,
-          durationMs: Date.now() - started,
-          status,
-        });
-      };
-
-      if (!streaming) {
-        try {
-          const result = await adapter.infer(ctx);
-          reply.code(result.status);
-          audit(result.status);
-          return result.body;
-        } catch (err) {
-          audit(502);
-          reply.code(502);
-          return { error: err instanceof Error ? err.message : String(err) };
-        }
-      }
-
-      // Streaming path — set SSE headers and pipe chunks.
-      reply.raw.writeHead(200, {
-        'Content-Type': 'text/event-stream',
-        'Cache-Control': 'no-cache',
-        Connection: 'keep-alive',
-        'X-Accel-Buffering': 'no',
-      });
-      try {
-        for await (const chunk of adapter.stream(ctx)) {
-          writeSseChunk(reply, chunk.data);
-          if (chunk.done === true) break;
-        }
-        audit(200);
-      } catch (err) {
-        const payload = JSON.stringify({
-          error: { message: err instanceof Error ? err.message : String(err) },
-        });
-        writeSseChunk(reply, payload);
-        writeSseChunk(reply, '[DONE]');
-        audit(502);
-      } finally {
-        reply.raw.end();
-      }
-      return reply;
-    },
-  );
-}
-
-function writeSseChunk(reply: FastifyReply, data: string): void {
-  reply.raw.write(`data: ${data}\n\n`);
-}
--- a/src/mcpd/src/routes/llms.ts
+++ b/src/mcpd/src/routes/llms.ts
@@ -1,85 +0,0 @@
-import type { FastifyInstance } from 'fastify';
-import type { LlmService } from '../services/llm.service.js';
-import { NotFoundError, ConflictError } from '../services/mcp-server.service.js';
-
-export function registerLlmRoutes(
-  app: FastifyInstance,
-  service: LlmService,
-): void {
-  app.get('/api/v1/llms', async () => {
-    return service.list();
-  });
-
-  // Accepts either CUID or human name. Used both by the CLI (which usually
-  // resolves to CUID first) and by FailoverRouter's RBAC pre-check (which
-  // hands over the user-facing name to avoid an extra round-trip).
-  app.get<{ Params: { id: string } }>('/api/v1/llms/:id', async (request, reply) => {
-    try {
-      return await getByIdOrName(service, request.params.id);
-    } catch (err) {
-      if (err instanceof NotFoundError) {
-        reply.code(404);
-        return { error: err.message };
-      }
-      throw err;
-    }
-  });
-
-  // No explicit HEAD handler: Fastify auto-derives HEAD from GET, which runs
-  // the same RBAC hook + lookup and drops the body. That's exactly what
-  // FailoverRouter wants for its "can the caller still view this Llm?" probe.
-
-  app.post('/api/v1/llms', async (request, reply) => {
-    try {
-      const row = await service.create(request.body);
-      reply.code(201);
-      return row;
-    } catch (err) {
-      if (err instanceof ConflictError) {
-        reply.code(409);
-        return { error: err.message };
-      }
-      throw err;
-    }
-  });
-
-  app.put<{ Params: { id: string } }>('/api/v1/llms/:id', async (request, reply) => {
-    try {
-      return await service.update(request.params.id, request.body);
-    } catch (err) {
-      if (err instanceof NotFoundError) {
-        reply.code(404);
-        return { error: err.message };
-      }
-      throw err;
-    }
-  });
-
-  app.delete<{ Params: { id: string } }>('/api/v1/llms/:id', async (request, reply) => {
-    try {
-      await service.delete(request.params.id);
-      reply.code(204);
-      return null;
-    } catch (err) {
-      if (err instanceof NotFoundError) {
-        reply.code(404);
-        return { error: err.message };
-      }
-      throw err;
-    }
-  });
-}
-
-const CUID_RE = /^c[a-z0-9]{24}/i;
-
-/**
- * Look up by CUID first; if the input doesn't look like one, fall back to
- * findByName. Lets the same URL serve both `mcpctl describe llm <name>` and
- * the FailoverRouter's name-based RBAC check.
- */
-async function getByIdOrName(service: LlmService, idOrName: string) {
-  if (CUID_RE.test(idOrName)) {
-    return service.getById(idOrName);
-  }
-  return service.getByName(idOrName);
-}
--- a/src/mcpd/src/routes/secret-backends.ts
+++ b/src/mcpd/src/routes/secret-backends.ts
@@ -1,89 +0,0 @@
-import type { FastifyInstance } from 'fastify';
-import type { SecretBackendService } from '../services/secret-backend.service.js';
-import { SecretBackendInUseError } from '../services/secret-backend.service.js';
-import { NotFoundError, ConflictError } from '../services/mcp-server.service.js';
-
-export function registerSecretBackendRoutes(
-  app: FastifyInstance,
-  service: SecretBackendService,
-): void {
-  app.get('/api/v1/secretbackends', async () => {
-    const rows = await service.list();
-    return rows.map(redactConfig);
-  });
-
-  app.get<{ Params: { id: string } }>('/api/v1/secretbackends/:id', async (request) => {
-    const row = await service.getById(request.params.id);
-    return redactConfig(row);
-  });
-
-  app.post('/api/v1/secretbackends', async (request, reply) => {
-    try {
-      const row = await service.create(request.body as {
-        name: string;
-        type: string;
-        config?: Record<string, unknown>;
-        isDefault?: boolean;
-        description?: string;
-      });
-      reply.code(201);
-      return redactConfig(row);
-    } catch (err) {
-      if (err instanceof ConflictError) {
-        reply.code(409);
-        return { error: err.message };
-      }
-      throw err;
-    }
-  });
-
-  app.put<{ Params: { id: string } }>('/api/v1/secretbackends/:id', async (request) => {
-    const row = await service.update(request.params.id, request.body as {
-      config?: Record<string, unknown>;
-      isDefault?: boolean;
-      description?: string;
-    });
-    return redactConfig(row);
-  });
-
-  app.post<{ Params: { id: string } }>('/api/v1/secretbackends/:id/default', async (request) => {
-    const row = await service.setDefault(request.params.id);
-    return redactConfig(row);
-  });
-
-  app.delete<{ Params: { id: string } }>('/api/v1/secretbackends/:id', async (request, reply) => {
-    try {
-      await service.delete(request.params.id);
-      reply.code(204);
-      return null;
-    } catch (err) {
-      if (err instanceof SecretBackendInUseError) {
-        reply.code(409);
-        return { error: err.message };
-      }
-      if (err instanceof NotFoundError) {
-        reply.code(404);
-        return { error: err.message };
-      }
-      throw err;
-    }
-  });
-}
-
-/**
- * Strip any value from `config` whose key looks like a credential, and replace
- * tokenSecretRef with a short description. Prevents accidental exposure via
- * GET responses.
- */
-function redactConfig<T extends { config: unknown }>(row: T): T {
-  const config = (row.config ?? {}) as Record<string, unknown>;
-  const cleaned: Record<string, unknown> = {};
-  for (const [k, v] of Object.entries(config)) {
-    if (/token|secret|password|key/i.test(k) && typeof v === 'string') {
-      cleaned[k] = '***';
-    } else {
-      cleaned[k] = v;
-    }
-  }
-  return { ...row, config: cleaned };
-}
--- a/src/mcpd/src/routes/secret-migrate.ts
+++ b/src/mcpd/src/routes/secret-migrate.ts
@@ -1,41 +0,0 @@
-import type { FastifyInstance } from 'fastify';
-import type { SecretMigrateService } from '../services/secret-migrate.service.js';
-
-export function registerSecretMigrateRoutes(
-  app: FastifyInstance,
-  service: SecretMigrateService,
-): void {
-  /**
-   * POST /api/v1/secrets/migrate
-   *   body: { from: string, to: string, names?: string[], keepSource?: boolean, dryRun?: boolean }
-   *   RBAC: operation `migrate-secrets` (role:run).
-   */
-  app.post<{
-    Body: {
-      from: string;
-      to: string;
-      names?: string[];
-      keepSource?: boolean;
-      dryRun?: boolean;
-    };
-  }>('/api/v1/secrets/migrate', async (request, reply) => {
-    const { from, to, names, keepSource, dryRun } = request.body;
-    if (!from || !to) {
-      reply.code(400);
-      return { error: 'from and to are required' };
-    }
-
-    if (dryRun === true) {
-      const options: Parameters<SecretMigrateService['dryRun']>[0] = { from, to };
-      if (names !== undefined) options.names = names;
-      if (keepSource !== undefined) options.keepSource = keepSource;
-      const secrets = await service.dryRun(options);
-      return { dryRun: true, candidates: secrets.map((s) => ({ id: s.id, name: s.name })) };
-    }
-
-    const options: Parameters<SecretMigrateService['migrate']>[0] = { from, to };
-    if (names !== undefined) options.names = names;
-    if (keepSource !== undefined) options.keepSource = keepSource;
-    return service.migrate(options);
-  });
-}
--- a/src/mcpd/src/services/backup/restore-service.ts
+++ b/src/mcpd/src/services/backup/restore-service.ts
@@ -6,7 +6,6 @@ import type { IRbacDefinitionRepository } from '../../repositories/rbac-definiti
 import type { IPromptRepository } from '../../repositories/prompt.repository.js';
 import type { ITemplateRepository } from '../../repositories/template.repository.js';
 import type { RbacRoleBinding } from '../../validation/rbac-definition.schema.js';
-import type { SecretService } from '../secret.service.js';
 import { decrypt } from './crypto.js';
 import type { BackupBundle } from './backup-service.js';

@@ -42,7 +41,6 @@ export class RestoreService {
    private serverRepo: IMcpServerRepository,
    private projectRepo: IProjectRepository,
    private secretRepo: ISecretRepository,
-    private secretService: SecretService,
    private userRepo?: IUserRepository,
    private groupRepo?: IGroupRepository,
    private rbacRepo?: IRbacDefinitionRepository,
@@ -127,13 +125,16 @@ export class RestoreService {
            result.secretsSkipped++;
            continue;
          }
-          // overwrite — route through SecretService so backend dispatch applies.
-          await this.secretService.update(existing.id, { data: secret.data });
+          // overwrite
+          await this.secretRepo.update(existing.id, { data: secret.data });
          result.secretsCreated++;
          continue;
        }

-        await this.secretService.create({ name: secret.name, data: secret.data });
+        await this.secretRepo.create({
+          name: secret.name,
+          data: secret.data,
+        });
        result.secretsCreated++;
      } catch (err) {
        result.errors.push(`Failed to restore secret "${secret.name}": ${err instanceof Error ? err.message : String(err)}`);
--- a/src/mcpd/src/services/env-resolver.ts
+++ b/src/mcpd/src/services/env-resolver.ts
@@ -1,44 +1,42 @@
 import type { McpServer } from '@prisma/client';
+import type { ISecretRepository } from '../repositories/interfaces.js';
 import type { ServerEnvEntry } from '../validation/mcp-server.schema.js';

-/**
- * Minimal dependency surface for the env resolver: anything that can turn a
- * (secretName, key) pair into a string. Matches `SecretService.resolve()` so
- * resolution now flows through the configured SecretBackend driver instead
- * of reading `Secret.data` directly.
- */
-export interface SecretResolver {
-  resolve(secretName: string, key: string): Promise<string>;
-}
-
 /**
 * Resolve a server's env entries into a flat key-value map.
 * - Inline `value` entries are used directly.
- * - `valueFrom.secretRef` entries are looked up through the resolver.
+ * - `valueFrom.secretRef` entries are looked up from the secret repository.
 * Throws if a referenced secret or key is missing.
 */
 export async function resolveServerEnv(
  server: McpServer,
-  resolver: SecretResolver,
+  secretRepo: ISecretRepository,
 ): Promise<Record<string, string>> {
  const entries = server.env as ServerEnvEntry[];
  if (!entries || entries.length === 0) return {};

  const result: Record<string, string> = {};
+  const secretCache = new Map<string, Record<string, string>>();

  for (const entry of entries) {
    if (entry.value !== undefined) {
      result[entry.name] = entry.value;
    } else if (entry.valueFrom?.secretRef) {
      const { name: secretName, key } = entry.valueFrom.secretRef;
-      try {
-        result[entry.name] = await resolver.resolve(secretName, key);
-      } catch (err) {
-        const msg = err instanceof Error ? err.message : String(err);
-        throw new Error(
-          `Cannot resolve secret for server '${server.name}' env '${entry.name}': ${msg}`,
-        );
+
+      if (!secretCache.has(secretName)) {
+        const secret = await secretRepo.findByName(secretName);
+        if (!secret) {
+          throw new Error(`Secret '${secretName}' not found (referenced by server '${server.name}' env '${entry.name}')`);
+        }
+        secretCache.set(secretName, secret.data as Record<string, string>);
      }
+
+      const data = secretCache.get(secretName)!;
+      if (!(key in data)) {
+        throw new Error(`Key '${key}' not found in secret '${secretName}' (referenced by server '${server.name}' env '${entry.name}')`);
+      }
+      result[entry.name] = data[key]!;
    }
  }

--- a/src/mcpd/src/services/instance.service.ts
+++ b/src/mcpd/src/services/instance.service.ts
@@ -1,8 +1,8 @@
 import type { McpInstance } from '@prisma/client';
-import type { IMcpInstanceRepository, IMcpServerRepository } from '../repositories/interfaces.js';
+import type { IMcpInstanceRepository, IMcpServerRepository, ISecretRepository } from '../repositories/interfaces.js';
 import type { McpOrchestrator, ContainerSpec, ContainerInfo } from './orchestrator.js';
 import { NotFoundError } from './mcp-server.service.js';
-import { resolveServerEnv, type SecretResolver } from './env-resolver.js';
+import { resolveServerEnv } from './env-resolver.js';

 /** Runner images for package-based MCP servers, keyed by runtime name. */
 const RUNNER_IMAGES: Record<string, string> = {
@@ -26,7 +26,7 @@ export class InstanceService {
    private instanceRepo: IMcpInstanceRepository,
    private serverRepo: IMcpServerRepository,
    private orchestrator: McpOrchestrator,
-    private secretResolver?: SecretResolver,
+    private secretRepo?: ISecretRepository,
  ) {}

  async list(serverId?: string): Promise<McpInstance[]> {
@@ -284,9 +284,9 @@ export class InstanceService {
      }

      // Resolve env vars from inline values and secret refs
-      if (this.secretResolver) {
+      if (this.secretRepo) {
        try {
-          const resolvedEnv = await resolveServerEnv(server, this.secretResolver);
+          const resolvedEnv = await resolveServerEnv(server, this.secretRepo);
          if (Object.keys(resolvedEnv).length > 0) {
            spec.env = resolvedEnv;
          }
--- a/src/mcpd/src/services/llm.service.ts
+++ b/src/mcpd/src/services/llm.service.ts
@@ -1,180 +0,0 @@
-/**
- * LlmService — CRUD over `Llm` rows plus credential resolution.
- *
- * Credentials are stored by reference: the row carries `(apiKeySecretId,
- * apiKeySecretKey)`. Callers that need the raw key (the inference proxy, once
- * it lands in Phase 2) call `resolveApiKey()`, which reads through the
- * SecretService (whose own backend dispatch transparently hits plaintext or
- * OpenBao as configured).
- *
- * The CLI/API accepts `apiKeyRef: { name, key }` — the service translates
- * that to the FK pair.
- */
-import type { Llm } from '@prisma/client';
-import type { ILlmRepository } from '../repositories/llm.repository.js';
-import type { SecretService } from './secret.service.js';
-import {
-  CreateLlmSchema,
-  UpdateLlmSchema,
-  type CreateLlmInput,
-  type ApiKeyRef,
-} from '../validation/llm.schema.js';
-import { NotFoundError, ConflictError } from './mcp-server.service.js';
-
-/** Shape returned by API layer — merges DB row with a human-readable apiKeyRef. */
-export interface LlmView {
-  id: string;
-  name: string;
-  type: string;
-  model: string;
-  url: string;
-  tier: string;
-  description: string;
-  apiKeyRef: ApiKeyRef | null;
-  extraConfig: Record<string, unknown>;
-  version: number;
-  createdAt: Date;
-  updatedAt: Date;
-}
-
-export class LlmService {
-  constructor(
-    private readonly repo: ILlmRepository,
-    private readonly secrets: SecretService,
-  ) {}
-
-  async list(): Promise<LlmView[]> {
-    const rows = await this.repo.findAll();
-    return Promise.all(rows.map((r) => this.toView(r)));
-  }
-
-  async getById(id: string): Promise<LlmView> {
-    const row = await this.repo.findById(id);
-    if (row === null) throw new NotFoundError(`Llm not found: ${id}`);
-    return this.toView(row);
-  }
-
-  async getByName(name: string): Promise<LlmView> {
-    const row = await this.repo.findByName(name);
-    if (row === null) throw new NotFoundError(`Llm not found: ${name}`);
-    return this.toView(row);
-  }
-
-  async create(input: unknown): Promise<LlmView> {
-    const data = CreateLlmSchema.parse(input);
-    const existing = await this.repo.findByName(data.name);
-    if (existing !== null) throw new ConflictError(`Llm already exists: ${data.name}`);
-
-    const apiKeyFields = await this.resolveApiKeyRefToIds(data.apiKeyRef);
-    const row = await this.repo.create({
-      name: data.name,
-      type: data.type,
-      model: data.model,
-      url: data.url ?? '',
-      tier: data.tier,
-      description: data.description,
-      apiKeySecretId: apiKeyFields.id,
-      apiKeySecretKey: apiKeyFields.key,
-      extraConfig: data.extraConfig,
-    });
-    return this.toView(row);
-  }
-
-  async update(id: string, input: unknown): Promise<LlmView> {
-    const data = UpdateLlmSchema.parse(input);
-    await this.getById(id);
-
-    const updateFields: Parameters<ILlmRepository['update']>[1] = {};
-    if (data.model !== undefined) updateFields.model = data.model;
-    if (data.url !== undefined) updateFields.url = data.url;
-    if (data.tier !== undefined) updateFields.tier = data.tier;
-    if (data.description !== undefined) updateFields.description = data.description;
-    if (data.extraConfig !== undefined) updateFields.extraConfig = data.extraConfig;
-
-    // apiKeyRef: null → explicit unlink; object → replace; undefined → leave alone.
-    if (data.apiKeyRef !== undefined) {
-      if (data.apiKeyRef === null) {
-        updateFields.apiKeySecretId = null;
-        updateFields.apiKeySecretKey = null;
-      } else {
-        const resolved = await this.resolveApiKeyRefToIds(data.apiKeyRef);
-        updateFields.apiKeySecretId = resolved.id;
-        updateFields.apiKeySecretKey = resolved.key;
-      }
-    }
-
-    const row = await this.repo.update(id, updateFields);
-    return this.toView(row);
-  }
-
-  async delete(id: string): Promise<void> {
-    await this.getById(id);
-    await this.repo.delete(id);
-  }
-
-  /**
-   * Return the raw API key string for a given Llm. Called by the inference
-   * proxy in Phase 2. Throws NotFoundError if the Llm has no apiKeyRef, or the
-   * referenced secret/key doesn't exist.
-   */
-  async resolveApiKey(llmName: string): Promise<string> {
-    const row = await this.repo.findByName(llmName);
-    if (row === null) throw new NotFoundError(`Llm not found: ${llmName}`);
-    if (row.apiKeySecretId === null || row.apiKeySecretKey === null) {
-      throw new NotFoundError(`Llm '${llmName}' has no apiKeyRef configured`);
-    }
-    const secret = await this.secrets.getById(row.apiKeySecretId);
-    const data = await this.secrets.resolveData(secret);
-    const value = data[row.apiKeySecretKey];
-    if (value === undefined) {
-      throw new NotFoundError(`Secret '${secret.name}' has no key '${row.apiKeySecretKey}'`);
-    }
-    return value;
-  }
-
-  private async resolveApiKeyRefToIds(ref: ApiKeyRef | undefined): Promise<{ id: string | null; key: string | null }> {
-    if (ref === undefined) return { id: null, key: null };
-    const secret = await this.secrets.getByName(ref.name);
-    return { id: secret.id, key: ref.key };
-  }
-
-  private async toView(row: Llm): Promise<LlmView> {
-    let apiKeyRef: ApiKeyRef | null = null;
-    if (row.apiKeySecretId !== null && row.apiKeySecretKey !== null) {
-      const secret = await this.secrets.getById(row.apiKeySecretId).catch(() => null);
-      if (secret !== null) {
-        apiKeyRef = { name: secret.name, key: row.apiKeySecretKey };
-      }
-    }
-    return {
-      id: row.id,
-      name: row.name,
-      type: row.type,
-      model: row.model,
-      url: row.url,
-      tier: row.tier,
-      description: row.description,
-      apiKeyRef,
-      extraConfig: row.extraConfig as Record<string, unknown>,
-      version: row.version,
-      createdAt: row.createdAt,
-      updatedAt: row.updatedAt,
-    };
-  }
-
-  // ── Backup/restore helpers ──
-
-  async upsertByName(input: CreateLlmInput): Promise<LlmView> {
-    const existing = await this.repo.findByName(input.name);
-    if (existing !== null) {
-      return this.update(existing.id, input);
-    }
-    return this.create(input);
-  }
-
-  async deleteByName(name: string): Promise<void> {
-    const row = await this.repo.findByName(name);
-    if (row === null) return;
-    await this.delete(row.id);
-  }
-}
--- a/src/mcpd/src/services/llm/adapters/anthropic.ts
+++ b/src/mcpd/src/services/llm/adapters/anthropic.ts
@@ -1,256 +0,0 @@
-/**
- * Anthropic adapter — translates between OpenAI chat/completions format and
- * the Anthropic Messages API (`POST /v1/messages`).
- *
- * Key differences we translate:
- *   - OpenAI `role: 'system'` messages become a top-level `system` string.
- *   - Anthropic returns `content: [{ type: 'text', text }]` — we join into
- *     OpenAI's `content: "…"` string.
- *   - Streaming: Anthropic emits a sequence of
- *     `message_start / content_block_{start,delta,stop} / message_delta /
- *     message_stop` events. We translate those to OpenAI
- *     `chat.completion.chunk` deltas.
- *
- * This adapter implements the subset needed for plain-text chat — tool-use
- * translation is intentionally left out for this phase; agents that need tool
- * calling should target an OpenAI-compatible provider until the translator
- * covers it.
- */
-import type {
-  LlmAdapter,
-  InferContext,
-  NonStreamingResult,
-  StreamingChunk,
-  AdapterDeps,
-  OpenAiMessage,
-} from '../types.js';
-
-const DEFAULT_ANTHROPIC_URL = 'https://api.anthropic.com';
-const ANTHROPIC_VERSION = '2023-06-01';
-
-interface AnthropicMessageResponse {
-  id: string;
-  model: string;
-  role: 'assistant';
-  content: Array<{ type: 'text'; text: string } | { type: string; [k: string]: unknown }>;
-  stop_reason?: string;
-  usage?: { input_tokens: number; output_tokens: number };
-}
-
-export class AnthropicAdapter implements LlmAdapter {
-  readonly kind = 'anthropic';
-  private readonly fetchImpl: typeof globalThis.fetch;
-
-  constructor(deps: AdapterDeps = {}) {
-    this.fetchImpl = deps.fetch ?? globalThis.fetch;
-  }
-
-  async infer(ctx: InferContext): Promise<NonStreamingResult> {
-    const url = (ctx.url !== '' ? ctx.url : DEFAULT_ANTHROPIC_URL).replace(/\/+$/, '');
-    const body = this.toAnthropicRequest(ctx, false);
-    const res = await this.fetchImpl(`${url}/v1/messages`, {
-      method: 'POST',
-      headers: this.headers(ctx),
-      body: JSON.stringify(body),
-    });
-    if (!res.ok) {
-      const text = await res.text().catch(() => '');
-      return {
-        status: res.status,
-        body: { error: { message: `anthropic: HTTP ${String(res.status)} ${text}` } },
-      };
-    }
-    const anth = await res.json() as AnthropicMessageResponse;
-    return { status: 200, body: this.toOpenAiResponse(anth) };
-  }
-
-  async *stream(ctx: InferContext): AsyncGenerator<StreamingChunk> {
-    const url = (ctx.url !== '' ? ctx.url : DEFAULT_ANTHROPIC_URL).replace(/\/+$/, '');
-    const body = this.toAnthropicRequest(ctx, true);
-    const res = await this.fetchImpl(`${url}/v1/messages`, {
-      method: 'POST',
-      headers: this.headers(ctx),
-      body: JSON.stringify(body),
-    });
-    if (!res.ok || res.body === null) {
-      const text = await res.text().catch(() => '');
-      throw new Error(`anthropic stream: HTTP ${String(res.status)} ${text}`);
-    }
-
-    const id = `chatcmpl-${cryptoNonce()}`;
-    const model = body.model;
-    const created = Math.floor(Date.now() / 1000);
-
-    // Parse Anthropic SSE. Each event is `event: <name>\ndata: <json>\n\n`.
-    const decoder = new TextDecoder();
-    let buf = '';
-    const reader = res.body.getReader();
-    let emittedFirst = false;
-
-    const baseChunk = (delta: Record<string, unknown>, finishReason?: string): string => {
-      const chunk = {
-        id,
-        object: 'chat.completion.chunk',
-        created,
-        model,
-        choices: [{
-          index: 0,
-          delta,
-          finish_reason: finishReason ?? null,
-        }],
-      };
-      return JSON.stringify(chunk);
-    };
-
-    try {
-      // eslint-disable-next-line no-constant-condition
-      while (true) {
-        const { value, done } = await reader.read();
-        if (done) break;
-        buf += decoder.decode(value, { stream: true });
-
-        let idx: number;
-        while ((idx = buf.indexOf('\n\n')) !== -1) {
-          const rawEvent = buf.slice(0, idx);
-          buf = buf.slice(idx + 2);
-          const parsed = parseSseEvent(rawEvent);
-          if (parsed === null) continue;
-          const { event, data } = parsed;
-
-          if (event === 'content_block_delta') {
-            const textDelta = (data as { delta?: { type?: string; text?: string } }).delta;
-            if (textDelta?.type === 'text_delta' && typeof textDelta.text === 'string') {
-              if (!emittedFirst) {
-                yield { data: baseChunk({ role: 'assistant', content: '' }) };
-                emittedFirst = true;
-              }
-              yield { data: baseChunk({ content: textDelta.text }) };
-            }
-          } else if (event === 'message_delta') {
-            const stopReason = (data as { delta?: { stop_reason?: string } }).delta?.stop_reason;
-            if (typeof stopReason === 'string') {
-              yield { data: baseChunk({}, mapStopReason(stopReason)) };
-            }
-          } else if (event === 'message_stop') {
-            yield { data: '[DONE]', done: true };
-            return;
-          } else if (event === 'error') {
-            throw new Error(`anthropic stream error: ${JSON.stringify(data)}`);
-          }
-        }
-      }
-    } finally {
-      reader.releaseLock();
-    }
-    // Anthropic closed without message_stop — give consumer a clean end.
-    yield { data: '[DONE]', done: true };
-  }
-
-  private headers(ctx: InferContext): Record<string, string> {
-    return {
-      'Content-Type': 'application/json',
-      'x-api-key': ctx.apiKey,
-      'anthropic-version': ANTHROPIC_VERSION,
-    };
-  }
-
-  /** Translate the OpenAI request to the Anthropic Messages shape. */
-  private toAnthropicRequest(ctx: InferContext, stream: boolean): {
-    model: string;
-    max_tokens: number;
-    messages: Array<{ role: 'user' | 'assistant'; content: string }>;
-    system?: string;
-    stream?: boolean;
-    temperature?: number;
-    top_p?: number;
-    stop_sequences?: string[];
-  } {
-    const { body } = ctx;
-    const systemParts: string[] = [];
-    const messages: Array<{ role: 'user' | 'assistant'; content: string }> = [];
-
-    for (const msg of body.messages) {
-      const text = normaliseContent(msg);
-      if (msg.role === 'system') {
-        systemParts.push(text);
-      } else if (msg.role === 'user' || msg.role === 'assistant') {
-        messages.push({ role: msg.role, content: text });
-      }
-      // `tool` role messages are dropped — tool translation is out of scope
-      // for this phase.
-    }
-
-    const out: ReturnType<typeof this.toAnthropicRequest> = {
-      model: body.model !== '' ? body.model : ctx.modelOverride,
-      max_tokens: typeof body.max_tokens === 'number' ? body.max_tokens : 1024,
-      messages,
-    };
-    if (systemParts.length > 0) out.system = systemParts.join('\n\n');
-    if (stream) out.stream = true;
-    if (typeof body.temperature === 'number') out.temperature = body.temperature;
-    if (typeof body.top_p === 'number') out.top_p = body.top_p;
-    if (body.stop !== undefined) {
-      out.stop_sequences = Array.isArray(body.stop) ? body.stop : [body.stop];
-    }
-    return out;
-  }
-
-  private toOpenAiResponse(anth: AnthropicMessageResponse): Record<string, unknown> {
-    const text = anth.content
-      .map((c) => (c.type === 'text' && typeof (c as { text?: unknown }).text === 'string'
-        ? (c as { text: string }).text
-        : ''))
-      .join('');
-    return {
-      id: `chatcmpl-${anth.id}`,
-      object: 'chat.completion',
-      created: Math.floor(Date.now() / 1000),
-      model: anth.model,
-      choices: [{
-        index: 0,
-        message: { role: 'assistant', content: text },
-        finish_reason: mapStopReason(anth.stop_reason ?? 'end_turn'),
-      }],
-      usage: anth.usage ? {
-        prompt_tokens: anth.usage.input_tokens,
-        completion_tokens: anth.usage.output_tokens,
-        total_tokens: anth.usage.input_tokens + anth.usage.output_tokens,
-      } : undefined,
-    };
-  }
-}
-
-function normaliseContent(msg: OpenAiMessage): string {
-  if (typeof msg.content === 'string') return msg.content;
-  return msg.content
-    .map((part) => (typeof part.text === 'string' ? part.text : ''))
-    .join('');
-}
-
-function mapStopReason(r: string): string {
-  // Anthropic → OpenAI finish_reason
-  if (r === 'end_turn' || r === 'stop_sequence') return 'stop';
-  if (r === 'max_tokens') return 'length';
-  if (r === 'tool_use') return 'tool_calls';
-  return r;
-}
-
-function parseSseEvent(raw: string): { event: string; data: unknown } | null {
-  let event = '';
-  let dataLine = '';
-  for (const line of raw.split('\n')) {
-    if (line.startsWith('event:')) event = line.slice(6).trim();
-    else if (line.startsWith('data:')) dataLine += line.slice(5).trim();
-  }
-  if (dataLine === '') return null;
-  try {
-    return { event, data: JSON.parse(dataLine) as unknown };
-  } catch {
-    return null;
-  }
-}
-
-function cryptoNonce(): string {
-  // Not security-sensitive — just a short randomish id.
-  return Math.random().toString(36).slice(2, 10);
-}
--- a/src/mcpd/src/services/llm/adapters/openai-passthrough.ts
+++ b/src/mcpd/src/services/llm/adapters/openai-passthrough.ts
@@ -1,112 +0,0 @@
-/**
- * OpenAI-passthrough adapter.
- *
- * Covers any provider that already speaks OpenAI chat/completions on the
- * wire: `openai`, `vllm`, `deepseek`, `ollama` (with their openai-compatible
- * endpoint enabled). The adapter forwards the request body verbatim and
- * streams the response straight through — no wire translation.
- *
- * Defaults when `url` is empty:
- *   - openai → https://api.openai.com
- *   - deepseek → https://api.deepseek.com
- *   - vllm/ollama → must be configured; these have no canonical public URL.
- */
-import type { LlmAdapter, InferContext, NonStreamingResult, StreamingChunk, AdapterDeps } from '../types.js';
-
-const DEFAULT_URLS: Record<string, string> = {
-  openai: 'https://api.openai.com',
-  deepseek: 'https://api.deepseek.com',
-};
-
-export class OpenAiPassthroughAdapter implements LlmAdapter {
-  readonly kind: string;
-  private readonly fetchImpl: typeof globalThis.fetch;
-
-  constructor(kind: 'openai' | 'vllm' | 'deepseek' | 'ollama', deps: AdapterDeps = {}) {
-    this.kind = kind;
-    this.fetchImpl = deps.fetch ?? globalThis.fetch;
-  }
-
-  async infer(ctx: InferContext): Promise<NonStreamingResult> {
-    const url = this.endpointUrl(ctx.url);
-    const body = this.prepareBody(ctx, false);
-    const res = await this.fetchImpl(`${url}/v1/chat/completions`, {
-      method: 'POST',
-      headers: this.headers(ctx),
-      body: JSON.stringify(body),
-    });
-    const json = await res.json() as unknown;
-    return { status: res.status, body: json };
-  }
-
-  async *stream(ctx: InferContext): AsyncGenerator<StreamingChunk> {
-    const url = this.endpointUrl(ctx.url);
-    const body = this.prepareBody(ctx, true);
-    const res = await this.fetchImpl(`${url}/v1/chat/completions`, {
-      method: 'POST',
-      headers: this.headers(ctx),
-      body: JSON.stringify(body),
-    });
-    if (!res.ok || res.body === null) {
-      const text = await res.text().catch(() => '');
-      throw new Error(`${this.kind} stream: HTTP ${String(res.status)} ${text}`);
-    }
-
-    // Re-frame the provider's SSE stream into our `StreamingChunk` shape.
-    // OpenAI-compat providers already emit `data: {...}` + `data: [DONE]` —
-    // we just unwrap the `data: ` prefix, forward payloads, and emit a
-    // single terminal `done` chunk so the consumer always gets one.
-    const decoder = new TextDecoder();
-    let buf = '';
-    const reader = res.body.getReader();
-    try {
-      // eslint-disable-next-line no-constant-condition
-      while (true) {
-        const { value, done } = await reader.read();
-        if (done) break;
-        buf += decoder.decode(value, { stream: true });
-        let idx: number;
-        while ((idx = buf.indexOf('\n\n')) !== -1) {
-          const event = buf.slice(0, idx);
-          buf = buf.slice(idx + 2);
-          for (const line of event.split('\n')) {
-            if (!line.startsWith('data:')) continue;
-            const payload = line.slice(5).trim();
-            if (payload === '') continue;
-            if (payload === '[DONE]') {
-              yield { data: '[DONE]', done: true };
-              return;
-            }
-            yield { data: payload };
-          }
-        }
-      }
-    } finally {
-      reader.releaseLock();
-    }
-    // Provider closed without emitting [DONE] — give the consumer a clean end.
-    yield { data: '[DONE]', done: true };
-  }
-
-  private endpointUrl(url: string): string {
-    if (url !== '') return url.replace(/\/+$/, '');
-    const def = DEFAULT_URLS[this.kind];
-    if (def === undefined) {
-      throw new Error(`${this.kind}: url is required (no default endpoint for this provider)`);
-    }
-    return def;
-  }
-
-  private headers(ctx: InferContext): Record<string, string> {
-    const headers: Record<string, string> = { 'Content-Type': 'application/json' };
-    if (ctx.apiKey !== '') headers['Authorization'] = `Bearer ${ctx.apiKey}`;
-    return headers;
-  }
-
-  private prepareBody(ctx: InferContext, stream: boolean): Record<string, unknown> {
-    const out: Record<string, unknown> = { ...ctx.body };
-    if (out.model === undefined || out.model === '') out.model = ctx.modelOverride;
-    out.stream = stream;
-    return out;
-  }
-}
--- a/src/mcpd/src/services/llm/dispatcher.ts
+++ b/src/mcpd/src/services/llm/dispatcher.ts
@@ -1,52 +0,0 @@
-/**
- * Adapter dispatcher for the inference proxy.
- *
- * `getAdapter(type)` returns the right adapter instance for an Llm's `type`
- * column. Adapters are cached per-type — they carry no per-request state.
- * The caller (the infer route) supplies the resolved API key + request body
- * through `InferContext`, so a single adapter instance serves every Llm of
- * that type.
- */
-import type { LlmAdapter, AdapterDeps } from './types.js';
-import { OpenAiPassthroughAdapter } from './adapters/openai-passthrough.js';
-import { AnthropicAdapter } from './adapters/anthropic.js';
-
-export class UnsupportedProviderError extends Error {
-  constructor(type: string) {
-    super(`Unsupported LLM provider: ${type}`);
-    this.name = 'UnsupportedProviderError';
-  }
-}
-
-export class LlmAdapterRegistry {
-  private readonly cache = new Map<string, LlmAdapter>();
-
-  constructor(private readonly deps: AdapterDeps = {}) {}
-
-  get(type: string): LlmAdapter {
-    const cached = this.cache.get(type);
-    if (cached !== undefined) return cached;
-    const adapter = this.build(type);
-    this.cache.set(type, adapter);
-    return adapter;
-  }
-
-  private build(type: string): LlmAdapter {
-    switch (type) {
-      case 'openai':
-      case 'vllm':
-      case 'deepseek':
-      case 'ollama':
-        return new OpenAiPassthroughAdapter(type, this.deps);
-      case 'anthropic':
-        return new AnthropicAdapter(this.deps);
-      case 'gemini-cli':
-        // Intentionally deferred — gemini-cli requires the binary on the mcpd
-        // pod filesystem and subprocess lifecycle management. Flagged as
-        // homelab-only in the plan; not landing in this phase.
-        throw new UnsupportedProviderError(`${type} (subprocess providers are not supported in the proxy yet)`);
-      default:
-        throw new UnsupportedProviderError(type);
-    }
-  }
-}
--- a/src/mcpd/src/services/llm/types.ts
+++ b/src/mcpd/src/services/llm/types.ts
@@ -1,70 +0,0 @@
-/**
- * Shared types for the LLM inference proxy.
- *
- * The wire format on the mcpctl side is OpenAI's chat/completions v1 — it's
- * the de-facto lingua franca and every client library already speaks it.
- * Provider-specific adapters translate to/from that shape.
- */
-
-export interface OpenAiMessage {
-  role: 'system' | 'user' | 'assistant' | 'tool';
-  content: string | Array<{ type: string; text?: string; [k: string]: unknown }>;
-  name?: string;
-  tool_call_id?: string;
-  tool_calls?: Array<{ id: string; type: 'function'; function: { name: string; arguments: string } }>;
-}
-
-export interface OpenAiChatRequest {
-  model: string;
-  messages: OpenAiMessage[];
-  stream?: boolean;
-  temperature?: number;
-  max_tokens?: number;
-  top_p?: number;
-  stop?: string | string[];
-  tools?: Array<{ type: 'function'; function: { name: string; description?: string; parameters?: Record<string, unknown> } }>;
-  tool_choice?: unknown;
-  // Passthrough: unknown extras forwarded as-is.
-  [k: string]: unknown;
-}
-
-export interface InferContext {
-  /** Normalised OpenAI-format body. Adapters read/transform from here. */
-  body: OpenAiChatRequest;
-  /** The Llm row's `model` field, used when the request body has an empty model. */
-  modelOverride: string;
-  /** The resolved API key, or empty string for providers that don't take one. */
-  apiKey: string;
-  /** Target URL from the Llm row (may be empty for provider-default). */
-  url: string;
-  /** Arbitrary config from the Llm row (e.g. vllm gpu settings). */
-  extraConfig: Record<string, unknown>;
-}
-
-export interface NonStreamingResult {
-  status: number;
-  /** OpenAI chat.completion response body. */
-  body: unknown;
-}
-
-export interface StreamingChunk {
-  /** Raw SSE data payload. Consumer emits `data: <payload>\n\n`. */
-  data: string;
-  /** Mark the end of stream — consumer emits `data: [DONE]\n\n`. */
-  done?: boolean;
-}
-
-export interface LlmAdapter {
-  readonly kind: string;
-  /** Non-streaming request. Returns the final chat.completion body. */
-  infer(ctx: InferContext): Promise<NonStreamingResult>;
-  /**
-   * Streaming request. Yields OpenAI-format SSE chunks. Adapters translate
-   * provider-native stream formats into OpenAI `chat.completion.chunk`s.
-   */
-  stream(ctx: InferContext): AsyncGenerator<StreamingChunk>;
-}
-
-export interface AdapterDeps {
-  fetch?: typeof globalThis.fetch;
-}
--- a/src/mcpd/src/services/secret-backend.service.ts
+++ b/src/mcpd/src/services/secret-backend.service.ts
@@ -1,88 +0,0 @@
-import type { SecretBackend } from '@prisma/client';
-import type { ISecretBackendRepository } from '../repositories/secret-backend.repository.js';
-import type { SecretBackendDriver } from './secret-backends/types.js';
-import { createDriver, type DriverFactoryDeps } from './secret-backends/factory.js';
-import { NotFoundError, ConflictError } from './mcp-server.service.js';
-
-export class SecretBackendInUseError extends Error {
-  constructor(backendName: string, count: number) {
-    super(`SecretBackend '${backendName}' is still referenced by ${String(count)} secret(s); migrate them first`);
-    this.name = 'SecretBackendInUseError';
-  }
-}
-
-export class SecretBackendService {
-  private driverCache = new Map<string, SecretBackendDriver>(); // keyed by backend id
-
-  constructor(
-    private readonly repo: ISecretBackendRepository,
-    private readonly driverDeps: DriverFactoryDeps,
-  ) {}
-
-  async list(): Promise<SecretBackend[]> {
-    return this.repo.findAll();
-  }
-
-  async getById(id: string): Promise<SecretBackend> {
-    const row = await this.repo.findById(id);
-    if (row === null) throw new NotFoundError(`SecretBackend not found: ${id}`);
-    return row;
-  }
-
-  async getByName(name: string): Promise<SecretBackend> {
-    const row = await this.repo.findByName(name);
-    if (row === null) throw new NotFoundError(`SecretBackend not found: ${name}`);
-    return row;
-  }
-
-  async getDefault(): Promise<SecretBackend> {
-    const row = await this.repo.findDefault();
-    if (row === null) {
-      throw new Error('No default SecretBackend configured. This shouldn\'t happen — the plaintext row should have been seeded on startup.');
-    }
-    return row;
-  }
-
-  async create(input: {
-    name: string;
-    type: string;
-    config?: Record<string, unknown>;
-    isDefault?: boolean;
-    description?: string;
-  }): Promise<SecretBackend> {
-    if (!input.name || !input.type) throw new Error('name and type are required');
-    const existing = await this.repo.findByName(input.name);
-    if (existing !== null) throw new ConflictError(`SecretBackend already exists: ${input.name}`);
-    return this.repo.create(input);
-  }
-
-  async update(id: string, input: { config?: Record<string, unknown>; isDefault?: boolean; description?: string }): Promise<SecretBackend> {
-    await this.getById(id);
-    const row = await this.repo.update(id, input);
-    this.driverCache.delete(id); // config may have changed; rebuild lazily
-    return row;
-  }
-
-  async setDefault(id: string): Promise<SecretBackend> {
-    await this.getById(id);
-    return this.repo.setAsDefault(id);
-  }
-
-  async delete(id: string): Promise<void> {
-    const row = await this.getById(id);
-    const count = await this.repo.countReferencingSecrets(id);
-    if (count > 0) throw new SecretBackendInUseError(row.name, count);
-    if (row.isDefault) throw new Error(`Cannot delete the default SecretBackend '${row.name}'; promote another one first`);
-    await this.repo.delete(id);
-    this.driverCache.delete(id);
-  }
-
-  /** Get the driver for a given backend id, creating + caching on first call. */
-  driverFor(backend: SecretBackend): SecretBackendDriver {
-    const cached = this.driverCache.get(backend.id);
-    if (cached) return cached;
-    const driver = createDriver(backend, this.driverDeps);
-    this.driverCache.set(backend.id, driver);
-    return driver;
-  }
-}
--- a/src/mcpd/src/services/secret-backends/factory.ts
+++ b/src/mcpd/src/services/secret-backends/factory.ts
@@ -1,43 +0,0 @@
-/**
- * Build a `SecretBackendDriver` from a `SecretBackend` row.
- *
- * Lives separate from the service because it's the only place aware of every
- * driver type — adding a new backend means adding one case here and one
- * driver file. Everything else (service, routes, CLI) is type-agnostic.
- */
-import type { SecretBackend } from '@prisma/client';
-import type { SecretBackendDriver, SecretRefResolver } from './types.js';
-import { PlaintextDriver, type PlaintextDriverDeps } from './plaintext.js';
-import { OpenBaoDriver, type OpenBaoConfig } from './openbao.js';
-
-export interface DriverFactoryDeps {
-  plaintext: PlaintextDriverDeps;
-  /** Resolves `{secretName, key}` against the plaintext backend — used by remote drivers' auth. */
-  secretRefResolver: SecretRefResolver;
-  /** Overridable for tests. */
-  fetch?: typeof globalThis.fetch;
-}
-
-export function createDriver(row: SecretBackend, deps: DriverFactoryDeps): SecretBackendDriver {
-  switch (row.type) {
-    case 'plaintext':
-      return new PlaintextDriver(deps.plaintext);
-
-    case 'openbao': {
-      const cfg = row.config as unknown as OpenBaoConfig;
-      if (!cfg.url || !cfg.tokenSecretRef?.name || !cfg.tokenSecretRef?.key) {
-        throw new Error(
-          `SecretBackend '${row.name}' (openbao): config must provide url + tokenSecretRef {name, key}`,
-        );
-      }
-      const driverDeps: { fetch?: typeof globalThis.fetch; secretRefResolver: SecretRefResolver } = {
-        secretRefResolver: deps.secretRefResolver,
-      };
-      if (deps.fetch !== undefined) driverDeps.fetch = deps.fetch;
-      return new OpenBaoDriver(cfg, driverDeps);
-    }
-
-    default:
-      throw new Error(`Unknown SecretBackend type: ${row.type}`);
-  }
-}
--- a/src/mcpd/src/services/secret-backends/openbao.ts
+++ b/src/mcpd/src/services/secret-backends/openbao.ts
@@ -1,133 +0,0 @@
-/**
- * OpenBao (MPL 2.0 fork of HashiCorp Vault) driver for the KV v2 secrets engine.
- *
- * Uses the plain HTTP API — no third-party client — so we don't pick up a
- * Vault SDK licensing headache. Endpoints touched:
- *
- *   POST   <url>/v1/<mount>/data/<path>     -- write
- *   GET    <url>/v1/<mount>/data/<path>     -- read latest
- *   DELETE <url>/v1/<mount>/metadata/<path> -- full delete (all versions)
- *   LIST   <url>/v1/<mount>/metadata/        -- for migration
- *
- * Auth: static token for v1. The token is stored in a `Secret` on the
- * plaintext backend (see `config.tokenSecretRef = { name, key }`); the driver
- * resolves it on construction via the injected `SecretRefResolver`. Follow-up
- * work (not here) adds Kubernetes ServiceAccount auth.
- *
- * Path layout inside OpenBao:
- *   <mount>/<pathPrefix>/<secretName>
- * `mount` and `pathPrefix` come from the backend's `config` JSON; defaults are
- * `secret` and `mcpctl/`.
- */
-import type { SecretBackendDriver, SecretData, ExternalRef, SecretRefResolver } from './types.js';
-
-export interface OpenBaoConfig {
-  url: string;
-  mount?: string;
-  pathPrefix?: string;
-  namespace?: string;
-  tokenSecretRef: { name: string; key: string };
-}
-
-export interface OpenBaoDriverDeps {
-  /** Injected HTTP fetcher — mockable in tests. */
-  fetch?: typeof globalThis.fetch;
-  secretRefResolver: SecretRefResolver;
-}
-
-export class OpenBaoDriver implements SecretBackendDriver {
-  readonly kind = 'openbao';
-
-  private readonly url: string;
-  private readonly mount: string;
-  private readonly pathPrefix: string;
-  private readonly namespace: string | undefined;
-  private readonly tokenSecretRef: { name: string; key: string };
-  private readonly fetchImpl: typeof globalThis.fetch;
-  private readonly resolver: SecretRefResolver;
-  private cachedToken: string | undefined;
-
-  constructor(config: OpenBaoConfig, deps: OpenBaoDriverDeps) {
-    this.url = config.url.replace(/\/+$/, '');
-    this.mount = (config.mount ?? 'secret').replace(/^\/|\/$/g, '');
-    this.pathPrefix = (config.pathPrefix ?? 'mcpctl').replace(/^\/|\/$/g, '');
-    if (config.namespace !== undefined) this.namespace = config.namespace;
-    this.tokenSecretRef = config.tokenSecretRef;
-    this.fetchImpl = deps.fetch ?? globalThis.fetch;
-    this.resolver = deps.secretRefResolver;
-  }
-
-  async read(input: { name: string; externalRef: ExternalRef; data: SecretData }): Promise<SecretData> {
-    const path = this.pathFor(input.name);
-    const res = await this.request('GET', `/v1/${this.mount}/data/${path}`);
-    if (res.status === 404) {
-      throw new Error(`OpenBao: secret '${input.name}' not found at ${path}`);
-    }
-    if (!res.ok) throw new Error(`OpenBao read ${path}: HTTP ${res.status}`);
-    const body = await res.json() as { data?: { data?: SecretData } };
-    return body.data?.data ?? {};
-  }
-
-  async write(input: { name: string; data: SecretData }): Promise<{ externalRef: ExternalRef; storedData: SecretData }> {
-    const path = this.pathFor(input.name);
-    const res = await this.request('POST', `/v1/${this.mount}/data/${path}`, { data: input.data });
-    if (!res.ok) throw new Error(`OpenBao write ${path}: HTTP ${res.status}`);
-    return { externalRef: `${this.mount}/${path}`, storedData: {} };
-  }
-
-  async delete(input: { name: string; externalRef: ExternalRef }): Promise<void> {
-    const path = this.pathFor(input.name);
-    const res = await this.request('DELETE', `/v1/${this.mount}/metadata/${path}`);
-    if (!res.ok && res.status !== 404) {
-      throw new Error(`OpenBao delete ${path}: HTTP ${res.status}`);
-    }
-  }
-
-  async list(): Promise<Array<{ name: string; externalRef: ExternalRef }>> {
-    const listPath = this.pathPrefix === '' ? '' : `${this.pathPrefix}/`;
-    const res = await this.request('LIST', `/v1/${this.mount}/metadata/${listPath}`);
-    if (res.status === 404) return [];
-    if (!res.ok) throw new Error(`OpenBao list: HTTP ${res.status}`);
-    const body = await res.json() as { data?: { keys?: string[] } };
-    const keys = body.data?.keys ?? [];
-    return keys
-      .filter((k) => !k.endsWith('/'))
-      .map((k) => ({
-        name: k,
-        externalRef: `${this.mount}/${this.pathPrefix === '' ? '' : `${this.pathPrefix}/`}${k}`,
-      }));
-  }
-
-  async healthCheck(): Promise<{ ok: boolean; detail?: string }> {
-    try {
-      const res = await this.request('GET', '/v1/sys/health');
-      return { ok: res.ok, detail: `HTTP ${res.status}` };
-    } catch (err) {
-      return { ok: false, detail: err instanceof Error ? err.message : String(err) };
-    }
-  }
-
-  private pathFor(name: string): string {
-    const safe = encodeURIComponent(name);
-    return this.pathPrefix === '' ? safe : `${this.pathPrefix}/${safe}`;
-  }
-
-  private async getToken(): Promise<string> {
-    if (this.cachedToken !== undefined) return this.cachedToken;
-    const token = await this.resolver.resolve(this.tokenSecretRef.name, this.tokenSecretRef.key);
-    this.cachedToken = token;
-    return token;
-  }
-
-  private async request(method: string, path: string, body?: unknown): Promise<Response> {
-    const token = await this.getToken();
-    const headers: Record<string, string> = { 'X-Vault-Token': token };
-    if (this.namespace !== undefined) headers['X-Vault-Namespace'] = this.namespace;
-    if (body !== undefined) headers['Content-Type'] = 'application/json';
-
-    const init: RequestInit = { method, headers };
-    if (body !== undefined) init.body = JSON.stringify(body);
-
-    return this.fetchImpl(`${this.url}${path}`, init);
-  }
-}
--- a/src/mcpd/src/services/secret-backends/plaintext.ts
+++ b/src/mcpd/src/services/secret-backends/plaintext.ts
@@ -1,44 +0,0 @@
-/**
- * Plaintext backend driver — stores Secret.data directly in the DB column.
- *
- * This is the bootstrap/default backend. It always exists (seeded on startup)
- * so the system can hold its own backends' auth credentials (e.g. OpenBao
- * token) somewhere before the real backend is configured.
- *
- * The driver is deliberately almost a no-op: the service writes to and reads
- * from `Secret.data` directly. We still route through the driver interface so
- * the service layer can stay uniform.
- */
-import type { SecretBackendDriver, SecretData, ExternalRef } from './types.js';
-
-export interface PlaintextDriverDeps {
-  /** Queries `prisma.secret.findMany(...)` for the `list` method (migration path). */
-  listAllPlaintext: () => Promise<Array<{ name: string; data: SecretData }>>;
-}
-
-export class PlaintextDriver implements SecretBackendDriver {
-  readonly kind = 'plaintext';
-
-  constructor(private readonly deps: PlaintextDriverDeps) {}
-
-  async read(input: { name: string; externalRef: ExternalRef; data: SecretData }): Promise<SecretData> {
-    return input.data;
-  }
-
-  async write(input: { name: string; data: SecretData }): Promise<{ externalRef: ExternalRef; storedData: SecretData }> {
-    return { externalRef: '', storedData: input.data };
-  }
-
-  async delete(_input: { name: string; externalRef: ExternalRef }): Promise<void> {
-    // The row deletion itself is the secret service's job; nothing remote to clean up here.
-  }
-
-  async list(): Promise<Array<{ name: string; externalRef: ExternalRef }>> {
-    const rows = await this.deps.listAllPlaintext();
-    return rows.map((r) => ({ name: r.name, externalRef: '' }));
-  }
-
-  async healthCheck(): Promise<{ ok: boolean; detail?: string }> {
-    return { ok: true, detail: 'plaintext backend (DB)' };
-  }
-}
--- a/src/mcpd/src/services/secret-backends/types.ts
+++ b/src/mcpd/src/services/secret-backends/types.ts
@@ -1,68 +0,0 @@
-/**
- * SecretBackend driver interface.
- *
- * The plaintext backend stores `data` in the DB column directly.
- * Remote backends (openbao, vault, cloud KV) store an opaque `externalRef`
- * and fetch the actual data on demand.
- *
- * Drivers are stateless factories keyed on a `SecretBackend` config row.
- * Secret management (CRUD, naming) stays in the service layer; drivers
- * handle only the storage I/O.
- */
-
-/**
- * Opaque reference written by a driver on `write` and read back on `read`.
- *
- * For the plaintext driver this is unused — the data itself lives in
- * `Secret.data`. For openbao it's a string like `secret/data/mcpctl/mysecret`
- * that tells the driver where to fetch on next `read`.
- */
-export type ExternalRef = string;
-
-/** The shape of secret data — a flat map of key → value. */
-export type SecretData = Record<string, string>;
-
-export interface SecretBackendDriver {
-  /** Human-readable identifier, included in errors. */
-  readonly kind: string;
-
-  /**
-   * Read the stored secret. For plaintext this is a no-op — the data is
-   * already in the Secret row and passed in here for symmetry. For remote
-   * backends this makes the network call.
-   */
-  read(input: { name: string; externalRef: ExternalRef; data: SecretData }): Promise<SecretData>;
-
-  /**
-   * Store a new secret (or a new version of an existing one). Returns the
-   * reference (or an empty string for plaintext) + the `data` object that
-   * should be persisted on the Secret row (empty for remote backends).
-   */
-  write(input: { name: string; data: SecretData }): Promise<{ externalRef: ExternalRef; storedData: SecretData }>;
-
-  /** Remove the secret from the backend. Idempotent — missing is OK. */
-  delete(input: { name: string; externalRef: ExternalRef }): Promise<void>;
-
-  /** List everything the backend knows about. Used for migration + drift detection. */
-  list(): Promise<Array<{ name: string; externalRef: ExternalRef }>>;
-
-  /** Optional: health probe. Used by `mcpctl describe secretbackend`. */
-  healthCheck?(): Promise<{ ok: boolean; detail?: string }>;
-}
-
-/** Stored config for a SecretBackend row; dispatched on `type`. */
-export interface BackendRow {
-  id: string;
-  name: string;
-  type: string;
-  config: Record<string, unknown>;
-}
-
-/**
- * Dependency passed to the openbao driver so it can resolve its own auth
- * token (stored in the plaintext backend — chicken-and-egg bootstrap).
- * Implemented by the SecretService so we don't have a circular import.
- */
-export interface SecretRefResolver {
-  resolve(secretName: string, key: string): Promise<string>;
-}
--- a/src/mcpd/src/services/secret-migrate.service.ts
+++ b/src/mcpd/src/services/secret-migrate.service.ts
@@ -1,113 +0,0 @@
-/**
- * Move secrets from one SecretBackend to another.
- *
- * Per-secret atomicity: for each secret we
- *   1. resolve the data via the source driver,
- *   2. write it to the destination driver,
- *   3. update the Secret row (flip backendId + set new externalRef, clear data),
- *   4. optionally delete from source.
- *
- * If the process dies between 2 and 3, the destination has an orphan entry
- * but the row still points at the source — restart is idempotent (skips rows
- * already on destination). We never run a batch-wide transaction because each
- * remote driver write is a real HTTP call that can't roll back.
- */
-import type { Secret } from '@prisma/client';
-import type { ISecretRepository } from '../repositories/interfaces.js';
-import type { SecretBackendService } from './secret-backend.service.js';
-
-export interface MigrateOptions {
-  /** Source backend name. */
-  from: string;
-  /** Destination backend name. */
-  to: string;
-  /** If provided, only migrate secrets with these names. Otherwise migrate all. */
-  names?: string[];
-  /** Leave the source copy intact after migration. Default false. */
-  keepSource?: boolean;
-}
-
-export interface MigrateResult {
-  migrated: Array<{ name: string }>;
-  skipped: Array<{ name: string; reason: string }>;
-  failed: Array<{ name: string; error: string }>;
-}
-
-export class SecretMigrateService {
-  constructor(
-    private readonly secretRepo: ISecretRepository,
-    private readonly backends: SecretBackendService,
-  ) {}
-
-  async migrate(opts: MigrateOptions): Promise<MigrateResult> {
-    const source = await this.backends.getByName(opts.from);
-    const dest = await this.backends.getByName(opts.to);
-    if (source.id === dest.id) {
-      return { migrated: [], skipped: [], failed: [{ name: '*', error: 'source and destination are the same backend' }] };
-    }
-
-    const sourceDriver = this.backends.driverFor(source);
-    const destDriver = this.backends.driverFor(dest);
-
-    let secrets = await this.secretRepo.findByBackend(source.id);
-    if (opts.names && opts.names.length > 0) {
-      const wanted = new Set(opts.names);
-      secrets = secrets.filter((s) => wanted.has(s.name));
-    }
-
-    const result: MigrateResult = { migrated: [], skipped: [], failed: [] };
-    for (const secret of secrets) {
-      try {
-        // Skip if somehow already on destination (re-run safety).
-        if (secret.backendId === dest.id) {
-          result.skipped.push({ name: secret.name, reason: 'already on destination' });
-          continue;
-        }
-
-        const data = await sourceDriver.read({
-          name: secret.name,
-          externalRef: secret.externalRef,
-          data: secret.data as Record<string, string>,
-        });
-        const written = await destDriver.write({ name: secret.name, data });
-
-        await this.secretRepo.update(secret.id, {
-          backendId: dest.id,
-          data: written.storedData,
-          externalRef: written.externalRef,
-        });
-
-        if (opts.keepSource !== true) {
-          await sourceDriver.delete({ name: secret.name, externalRef: secret.externalRef })
-            .catch((err: unknown) => {
-              // Destination is intact; best-effort source cleanup. Log + continue.
-              const msg = err instanceof Error ? err.message : String(err);
-              result.skipped.push({ name: secret.name, reason: `migrated OK; source cleanup failed: ${msg}` });
-            });
-        }
-
-        result.migrated.push({ name: secret.name });
-      } catch (err) {
-        const msg = err instanceof Error ? err.message : String(err);
-        result.failed.push({ name: secret.name, error: msg });
-      }
-    }
-
-    return result;
-  }
-
-  /** Track which secrets would be touched by a migrate run, without performing it. */
-  async dryRun(opts: MigrateOptions): Promise<Array<Secret>> {
-    const source = await this.backends.getByName(opts.from);
-    let secrets = await this.secretRepo.findByBackend(source.id);
-    if (opts.names && opts.names.length > 0) {
-      const wanted = new Set(opts.names);
-      secrets = secrets.filter((s) => wanted.has(s.name));
-    }
-    return secrets;
-  }
-}
-
-export interface SecretMigrateRouteDeps {
-  migrateService: SecretMigrateService;
-}
--- a/src/mcpd/src/services/secret.service.ts
+++ b/src/mcpd/src/services/secret.service.ts
@@ -1,23 +1,10 @@
-/**
- * SecretService — CRUD over `Secret` rows.
- *
- * Dispatches storage I/O through the `SecretBackendService`: on create/update
- * the default backend's driver writes, and the resulting {externalRef,
- * storedData} is persisted on the row. On read (`resolveData`) the row's
- * `backendId` selects the driver, which fetches the actual data.
- */
 import type { Secret } from '@prisma/client';
 import type { ISecretRepository } from '../repositories/interfaces.js';
-import type { SecretBackendService } from './secret-backend.service.js';
 import { CreateSecretSchema, UpdateSecretSchema } from '../validation/secret.schema.js';
 import { NotFoundError, ConflictError } from './mcp-server.service.js';
-import type { SecretRefResolver } from './secret-backends/types.js';

-export class SecretService implements SecretRefResolver {
-  constructor(
-    private readonly repo: ISecretRepository,
-    private readonly backends: SecretBackendService,
-  ) {}
+export class SecretService {
+  constructor(private readonly repo: ISecretRepository) {}

  async list(): Promise<Secret[]> {
    return this.repo.findAll();
@@ -39,79 +26,47 @@ export class SecretService implements SecretRefResolver {
    return secret;
  }

-  /** Return the secret's actual data by dispatching through its backend driver. */
-  async resolveData(secret: Secret): Promise<Record<string, string>> {
-    const backend = await this.backends.getById(secret.backendId);
-    const driver = this.backends.driverFor(backend);
-    return driver.read({
-      name: secret.name,
-      externalRef: secret.externalRef,
-      data: secret.data as Record<string, string>,
-    });
-  }
-
-  /** Convenience: resolve {secretName, key} → string. Implements SecretRefResolver. */
-  async resolve(secretName: string, key: string): Promise<string> {
-    const secret = await this.getByName(secretName);
-    const data = await this.resolveData(secret);
-    const value = data[key];
-    if (value === undefined) {
-      throw new NotFoundError(`Secret '${secretName}' has no key '${key}'`);
-    }
-    return value;
-  }
-
  async create(input: unknown): Promise<Secret> {
    const data = CreateSecretSchema.parse(input);
+
    const existing = await this.repo.findByName(data.name);
    if (existing !== null) {
      throw new ConflictError(`Secret already exists: ${data.name}`);
    }
-    const backend = await this.backends.getDefault();
-    const driver = this.backends.driverFor(backend);
-    const written = await driver.write({ name: data.name, data: data.data });
-    return this.repo.create({
-      name: data.name,
-      backendId: backend.id,
-      data: written.storedData,
-      externalRef: written.externalRef,
-    });
+
+    return this.repo.create(data);
  }

  async update(id: string, input: unknown): Promise<Secret> {
    const data = UpdateSecretSchema.parse(input);
-    const existing = await this.getById(id);
-    const backend = await this.backends.getById(existing.backendId);
-    const driver = this.backends.driverFor(backend);
-    const written = await driver.write({ name: existing.name, data: data.data });
-    return this.repo.update(id, {
-      data: written.storedData,
-      externalRef: written.externalRef,
-    });
+
+    // Verify exists
+    await this.getById(id);
+
+    return this.repo.update(id, data);
  }

  async delete(id: string): Promise<void> {
-    const existing = await this.getById(id);
-    const backend = await this.backends.getById(existing.backendId);
-    const driver = this.backends.driverFor(backend);
-    await driver.delete({ name: existing.name, externalRef: existing.externalRef });
+    // Verify exists
+    await this.getById(id);
    await this.repo.delete(id);
  }

-  // ── Backup/restore helpers (preserved) ──
+  // ── Backup/restore helpers ──

  async upsertByName(data: Record<string, unknown>): Promise<Secret> {
    const name = data['name'] as string;
    const existing = await this.repo.findByName(name);
    if (existing !== null) {
-      return this.update(existing.id, data);
+      const { name: _, ...updateFields } = data;
+      return this.repo.update(existing.id, updateFields as Parameters<ISecretRepository['update']>[1]);
    }
-    return this.create(data);
+    return this.repo.create(data as Parameters<ISecretRepository['create']>[0]);
  }

  async deleteByName(name: string): Promise<void> {
    const existing = await this.repo.findByName(name);
    if (existing === null) return;
-    await this.delete(existing.id);
+    await this.repo.delete(existing.id);
  }
 }
--- a/src/mcpd/src/validation/llm.schema.ts
+++ b/src/mcpd/src/validation/llm.schema.ts
@@ -1,39 +0,0 @@
-import { z } from 'zod';
-
-export const LLM_TYPES = ['anthropic', 'openai', 'deepseek', 'vllm', 'ollama', 'gemini-cli'] as const;
-export const LLM_TIERS = ['fast', 'heavy'] as const;
-
-/**
- * Reference to a key inside a Secret. `name` is the Secret resource name;
- * `key` is the JSON key inside that secret's `data` map. mcpd resolves the
- * pair through SecretService at inference time, so credentials never leave
- * the server.
- */
-export const ApiKeyRefSchema = z.object({
-  name: z.string().min(1),
-  key: z.string().min(1),
-});
-
-export const CreateLlmSchema = z.object({
-  name: z.string().min(1).max(100).regex(/^[a-z0-9-]+$/, 'Name must be lowercase alphanumeric with hyphens'),
-  type: z.enum(LLM_TYPES),
-  model: z.string().min(1),
-  url: z.string().url().optional(),
-  tier: z.enum(LLM_TIERS).default('fast'),
-  description: z.string().max(500).default(''),
-  apiKeyRef: ApiKeyRefSchema.optional(),
-  extraConfig: z.record(z.unknown()).default({}),
-});
-
-export const UpdateLlmSchema = z.object({
-  model: z.string().min(1).optional(),
-  url: z.string().url().or(z.literal('')).optional(),
-  tier: z.enum(LLM_TIERS).optional(),
-  description: z.string().max(500).optional(),
-  apiKeyRef: ApiKeyRefSchema.nullable().optional(),
-  extraConfig: z.record(z.unknown()).optional(),
-});
-
-export type CreateLlmInput = z.infer<typeof CreateLlmSchema>;
-export type UpdateLlmInput = z.infer<typeof UpdateLlmSchema>;
-export type ApiKeyRef = z.infer<typeof ApiKeyRefSchema>;
--- a/src/mcpd/src/validation/rbac-definition.schema.ts
+++ b/src/mcpd/src/validation/rbac-definition.schema.ts
@@ -1,7 +1,7 @@
 import { z } from 'zod';

 export const RBAC_ROLES = ['edit', 'view', 'create', 'delete', 'run', 'expose'] as const;
-export const RBAC_RESOURCES = ['*', 'servers', 'instances', 'secrets', 'secretbackends', 'llms', 'projects', 'templates', 'users', 'groups', 'rbac', 'prompts', 'promptrequests', 'mcptokens'] as const;
+export const RBAC_RESOURCES = ['*', 'servers', 'instances', 'secrets', 'projects', 'templates', 'users', 'groups', 'rbac', 'prompts', 'promptrequests', 'mcptokens'] as const;

 /** Singular→plural map for resource names. */
 const RESOURCE_ALIASES: Record<string, string> = {
@@ -15,8 +15,6 @@ const RESOURCE_ALIASES: Record<string, string> = {
  prompt: 'prompts',
  promptrequest: 'promptrequests',
  mcptoken: 'mcptokens',
-  secretbackend: 'secretbackends',
-  llm: 'llms',
 };

 /** Normalize a resource name to its canonical plural form. */
--- a/src/mcpd/tests/backup.test.ts
+++ b/src/mcpd/tests/backup.test.ts
@@ -9,25 +9,6 @@ import type { IProjectRepository } from '../src/repositories/project.repository.
 import type { IUserRepository } from '../src/repositories/user.repository.js';
 import type { IGroupRepository } from '../src/repositories/group.repository.js';
 import type { IRbacDefinitionRepository } from '../src/repositories/rbac-definition.repository.js';
-import type { SecretService } from '../src/services/secret.service.js';
-
-/**
- * Minimal SecretService shim over a mock repo — just the `.create()` / `.update()`
- * methods that RestoreService calls. We don't need the backend-dispatch path
- * here since the restore happy-path tests don't exercise remote backends.
- */
-function mockSecretService(repo: ISecretRepository): SecretService {
-  return {
-    create: vi.fn(async (input: unknown) => {
-      const data = input as { name: string; data: Record<string, string> };
-      return repo.create({ name: data.name, backendId: 'backend-plaintext', data: data.data, externalRef: '' });
-    }),
-    update: vi.fn(async (id: string, input: unknown) => {
-      const data = input as { data: Record<string, string> };
-      return repo.update(id, { data: data.data });
-    }),
-  } as unknown as SecretService;
-}

 // Mock data
 const mockServers = [
@@ -314,7 +295,7 @@ describe('RestoreService', () => {
    (userRepo.findByEmail as ReturnType<typeof vi.fn>).mockResolvedValue(null);
    (groupRepo.findByName as ReturnType<typeof vi.fn>).mockResolvedValue(null);
    (rbacRepo.findByName as ReturnType<typeof vi.fn>).mockResolvedValue(null);
-    restoreService = new RestoreService(serverRepo, projectRepo, secretRepo, mockSecretService(secretRepo), userRepo, groupRepo, rbacRepo);
+    restoreService = new RestoreService(serverRepo, projectRepo, secretRepo, userRepo, groupRepo, rbacRepo);
  });

  const validBundle = {
@@ -595,7 +576,7 @@ describe('Backup Routes', () => {
    (rGroupRepo.findByName as ReturnType<typeof vi.fn>).mockResolvedValue(null);
    const rRbacRepo = mockRbacRepo();
    (rRbacRepo.findByName as ReturnType<typeof vi.fn>).mockResolvedValue(null);
-    restoreService = new RestoreService(rSRepo, rPrRepo, rSecRepo, mockSecretService(rSecRepo), rUserRepo, rGroupRepo, rRbacRepo);
+    restoreService = new RestoreService(rSRepo, rPrRepo, rSecRepo, rUserRepo, rGroupRepo, rRbacRepo);
  });

  async function buildApp() {
--- a/src/mcpd/tests/env-resolver.test.ts
+++ b/src/mcpd/tests/env-resolver.test.ts
@@ -1,5 +1,6 @@
 import { describe, it, expect, vi } from 'vitest';
-import { resolveServerEnv, type SecretResolver } from '../src/services/env-resolver.js';
+import { resolveServerEnv } from '../src/services/env-resolver.js';
+import type { ISecretRepository } from '../src/repositories/interfaces.js';
 import type { McpServer } from '@prisma/client';

 function makeServer(env: unknown[]): McpServer {
@@ -22,16 +23,18 @@ function makeServer(env: unknown[]): McpServer {
  } as McpServer;
 }

-/** A SecretResolver backed by a {secretName: {key: value}} map. */
-function mockResolver(secrets: Record<string, Record<string, string>>): SecretResolver {
+function mockSecretRepo(secrets: Record<string, Record<string, string>>): ISecretRepository {
  return {
-    resolve: vi.fn(async (name: string, key: string): Promise<string> => {
+    findAll: vi.fn(async () => []),
+    findById: vi.fn(async () => null),
+    findByName: vi.fn(async (name: string) => {
      const data = secrets[name];
-      if (!data) throw new Error(`Secret '${name}' not found`);
-      const value = data[key];
-      if (value === undefined) throw new Error(`Key '${key}' not found in secret '${name}'`);
-      return value;
+      if (!data) return null;
+      return { id: `sec-${name}`, name, data, version: 1, createdAt: new Date(), updatedAt: new Date() };
    }),
+    create: vi.fn(async () => ({} as never)),
+    update: vi.fn(async () => ({} as never)),
+    delete: vi.fn(async () => {}),
  };
 }

@@ -41,7 +44,8 @@ describe('resolveServerEnv', () => {
      { name: 'FOO', value: 'bar' },
      { name: 'BAZ', value: 'qux' },
    ]);
-    const result = await resolveServerEnv(server, mockResolver({}));
+    const repo = mockSecretRepo({});
+    const result = await resolveServerEnv(server, repo);
    expect(result).toEqual({ FOO: 'bar', BAZ: 'qux' });
  });

@@ -49,8 +53,10 @@ describe('resolveServerEnv', () => {
    const server = makeServer([
      { name: 'TOKEN', valueFrom: { secretRef: { name: 'ha-creds', key: 'HOMEASSISTANT_TOKEN' } } },
    ]);
-    const resolver = mockResolver({ 'ha-creds': { HOMEASSISTANT_TOKEN: 'secret-token-123' } });
-    const result = await resolveServerEnv(server, resolver);
+    const repo = mockSecretRepo({
+      'ha-creds': { HOMEASSISTANT_TOKEN: 'secret-token-123' },
+    });
+    const result = await resolveServerEnv(server, repo);
    expect(result).toEqual({ TOKEN: 'secret-token-123' });
  });

@@ -59,42 +65,48 @@ describe('resolveServerEnv', () => {
      { name: 'URL', value: 'https://ha.local' },
      { name: 'TOKEN', valueFrom: { secretRef: { name: 'creds', key: 'TOKEN' } } },
    ]);
-    const resolver = mockResolver({ creds: { TOKEN: 'my-token' } });
-    const result = await resolveServerEnv(server, resolver);
+    const repo = mockSecretRepo({
+      creds: { TOKEN: 'my-token' },
+    });
+    const result = await resolveServerEnv(server, repo);
    expect(result).toEqual({ URL: 'https://ha.local', TOKEN: 'my-token' });
  });

-  it('calls the resolver once per distinct ref', async () => {
+  it('caches secret lookups', async () => {
    const server = makeServer([
      { name: 'A', valueFrom: { secretRef: { name: 'shared', key: 'KEY_A' } } },
      { name: 'B', valueFrom: { secretRef: { name: 'shared', key: 'KEY_B' } } },
    ]);
-    const resolver = mockResolver({ shared: { KEY_A: 'val-a', KEY_B: 'val-b' } });
-    const result = await resolveServerEnv(server, resolver);
+    const repo = mockSecretRepo({
+      shared: { KEY_A: 'val-a', KEY_B: 'val-b' },
+    });
+    const result = await resolveServerEnv(server, repo);
    expect(result).toEqual({ A: 'val-a', B: 'val-b' });
-    // Resolver is called per-entry now — caching moved to the SecretService layer,
-    // which is where downstream drivers can be hit at most once per (name, key) pair.
-    expect(resolver.resolve).toHaveBeenCalledTimes(2);
+    expect(repo.findByName).toHaveBeenCalledTimes(1);
  });

  it('throws when secret not found', async () => {
    const server = makeServer([
      { name: 'TOKEN', valueFrom: { secretRef: { name: 'missing', key: 'TOKEN' } } },
    ]);
-    await expect(resolveServerEnv(server, mockResolver({}))).rejects.toThrow(/Secret 'missing' not found/);
+    const repo = mockSecretRepo({});
+    await expect(resolveServerEnv(server, repo)).rejects.toThrow("Secret 'missing' not found");
  });

  it('throws when secret key not found', async () => {
    const server = makeServer([
      { name: 'TOKEN', valueFrom: { secretRef: { name: 'creds', key: 'NONEXISTENT' } } },
    ]);
-    const resolver = mockResolver({ creds: { OTHER_KEY: 'val' } });
-    await expect(resolveServerEnv(server, resolver)).rejects.toThrow(/Key 'NONEXISTENT' not found/);
+    const repo = mockSecretRepo({
+      creds: { OTHER_KEY: 'val' },
+    });
+    await expect(resolveServerEnv(server, repo)).rejects.toThrow("Key 'NONEXISTENT' not found in secret 'creds'");
  });

  it('returns empty map for empty env', async () => {
    const server = makeServer([]);
-    const result = await resolveServerEnv(server, mockResolver({}));
+    const repo = mockSecretRepo({});
+    const result = await resolveServerEnv(server, repo);
    expect(result).toEqual({});
  });
 });
--- a/src/mcpd/tests/llm-adapters.test.ts
+++ b/src/mcpd/tests/llm-adapters.test.ts
@@ -1,210 +0,0 @@
-import { describe, it, expect, vi } from 'vitest';
-import { OpenAiPassthroughAdapter } from '../src/services/llm/adapters/openai-passthrough.js';
-import { AnthropicAdapter } from '../src/services/llm/adapters/anthropic.js';
-import { LlmAdapterRegistry, UnsupportedProviderError } from '../src/services/llm/dispatcher.js';
-import type { InferContext } from '../src/services/llm/types.js';
-
-function mockFetch(responses: Array<{ match: RegExp; status: number; body?: unknown; text?: string }>): ReturnType<typeof vi.fn> {
-  return vi.fn(async (input: string | URL, _init?: RequestInit) => {
-    const url = String(input);
-    const match = responses.find((r) => r.match.test(url));
-    if (!match) throw new Error(`unexpected fetch: ${url}`);
-    const body = match.body !== undefined ? JSON.stringify(match.body) : (match.text ?? '');
-    return new Response(body, { status: match.status, headers: { 'Content-Type': 'application/json' } });
-  });
-}
-
-function makeCtx(overrides: Partial<InferContext> = {}): InferContext {
-  return {
-    body: { model: '', messages: [{ role: 'user', content: 'hello' }] },
-    modelOverride: 'default-model',
-    apiKey: 'test-key',
-    url: '',
-    extraConfig: {},
-    ...overrides,
-  };
-}
-
-// Helper to build a streaming Response from SSE lines.
-function sseResponse(events: string[]): Response {
-  const body = events.join('\n\n') + '\n\n';
-  const stream = new ReadableStream<Uint8Array>({
-    start(controller) {
-      controller.enqueue(new TextEncoder().encode(body));
-      controller.close();
-    },
-  });
-  return new Response(stream, { status: 200, headers: { 'Content-Type': 'text/event-stream' } });
-}
-
-describe('OpenAiPassthroughAdapter', () => {
-  it('infer: POSTs to <url>/v1/chat/completions with Authorization + body', async () => {
-    const fetchFn = mockFetch([{
-      match: /\/v1\/chat\/completions$/,
-      status: 200,
-      body: { id: 'x', choices: [{ message: { role: 'assistant', content: 'hi' } }] },
-    }]);
-    const adapter = new OpenAiPassthroughAdapter('openai', { fetch: fetchFn as unknown as typeof fetch });
-    const ctx = makeCtx({ url: 'https://api.example.com' });
-    const res = await adapter.infer(ctx);
-    expect(res.status).toBe(200);
-    const [url, init] = fetchFn.mock.calls[0] as [string, RequestInit];
-    expect(url).toBe('https://api.example.com/v1/chat/completions');
-    expect(init.method).toBe('POST');
-    const headers = init.headers as Record<string, string>;
-    expect(headers['Authorization']).toBe('Bearer test-key');
-    const sent = JSON.parse(init.body as string) as { model: string; stream: boolean };
-    expect(sent.model).toBe('default-model');  // filled from modelOverride
-    expect(sent.stream).toBe(false);
-  });
-
-  it('infer: uses default URL for openai when url is empty', async () => {
-    const fetchFn = mockFetch([{ match: /api\.openai\.com/, status: 200, body: {} }]);
-    const adapter = new OpenAiPassthroughAdapter('openai', { fetch: fetchFn as unknown as typeof fetch });
-    await adapter.infer(makeCtx());
-    const [url] = fetchFn.mock.calls[0] as [string, RequestInit];
-    expect(url).toBe('https://api.openai.com/v1/chat/completions');
-  });
-
-  it('infer: throws for vllm when url is empty (no default)', async () => {
-    const adapter = new OpenAiPassthroughAdapter('vllm', { fetch: vi.fn() as unknown as typeof fetch });
-    await expect(adapter.infer(makeCtx())).rejects.toThrow(/no default endpoint/);
-  });
-
-  it('infer: omits Authorization when apiKey is empty', async () => {
-    const fetchFn = mockFetch([{ match: /ollama/, status: 200, body: {} }]);
-    const adapter = new OpenAiPassthroughAdapter('ollama', { fetch: fetchFn as unknown as typeof fetch });
-    await adapter.infer(makeCtx({ url: 'http://ollama:11434', apiKey: '' }));
-    const [, init] = fetchFn.mock.calls[0] as [string, RequestInit];
-    const headers = init.headers as Record<string, string>;
-    expect(headers['Authorization']).toBeUndefined();
-  });
-
-  it('stream: forwards SSE chunks and emits terminal [DONE]', async () => {
-    const fetchFn = vi.fn(async () => sseResponse([
-      'data: {"choices":[{"delta":{"content":"hi"}}]}',
-      'data: {"choices":[{"delta":{"content":"!"}}]}',
-      'data: [DONE]',
-    ]));
-    const adapter = new OpenAiPassthroughAdapter('openai', { fetch: fetchFn as unknown as typeof fetch });
-    const ctx = makeCtx({ url: 'http://example', body: { model: '', messages: [], stream: true } });
-    const chunks: { data: string; done?: boolean }[] = [];
-    for await (const c of adapter.stream(ctx)) chunks.push(c);
-    expect(chunks).toHaveLength(3);
-    expect(chunks[2]?.done).toBe(true);
-  });
-});
-
-describe('AnthropicAdapter', () => {
-  it('infer: translates system+user messages, posts to /v1/messages', async () => {
-    const fetchFn = mockFetch([{
-      match: /\/v1\/messages$/,
-      status: 200,
-      body: {
-        id: 'msg_01', model: 'claude-3-5-sonnet-20241022', role: 'assistant',
-        content: [{ type: 'text', text: 'howdy' }],
-        stop_reason: 'end_turn',
-        usage: { input_tokens: 5, output_tokens: 2 },
-      },
-    }]);
-    const adapter = new AnthropicAdapter({ fetch: fetchFn as unknown as typeof fetch });
-    const ctx = makeCtx({
-      body: {
-        model: '', messages: [
-          { role: 'system', content: 'be nice' },
-          { role: 'user', content: 'hi' },
-        ],
-      },
-      modelOverride: 'claude-3-5-sonnet-20241022',
-    });
-    const res = await adapter.infer(ctx);
-    expect(res.status).toBe(200);
-
-    const [url, init] = fetchFn.mock.calls[0] as [string, RequestInit];
-    expect(url).toBe('https://api.anthropic.com/v1/messages');
-    const headers = init.headers as Record<string, string>;
-    expect(headers['x-api-key']).toBe('test-key');
-    expect(headers['anthropic-version']).toBeDefined();
-
-    const sent = JSON.parse(init.body as string) as {
-      model: string; system: string; messages: Array<{ role: string; content: string }>; max_tokens: number;
-    };
-    expect(sent.model).toBe('claude-3-5-sonnet-20241022');
-    expect(sent.system).toBe('be nice');
-    expect(sent.messages).toEqual([{ role: 'user', content: 'hi' }]);
-    expect(sent.max_tokens).toBe(1024); // default
-
-    // Response shape: OpenAI chat.completion
-    const body = res.body as { choices: Array<{ message: { content: string }; finish_reason: string }>; usage: { total_tokens: number } };
-    expect(body.choices[0]!.message.content).toBe('howdy');
-    expect(body.choices[0]!.finish_reason).toBe('stop');
-    expect(body.usage.total_tokens).toBe(7);
-  });
-
-  it('infer: returns a synthetic error body on non-2xx', async () => {
-    const fetchFn = vi.fn(async () => new Response('boom', { status: 500 }));
-    const adapter = new AnthropicAdapter({ fetch: fetchFn as unknown as typeof fetch });
-    const res = await adapter.infer(makeCtx({ body: { model: '', messages: [{ role: 'user', content: 'x' }] } }));
-    expect(res.status).toBe(500);
-    const body = res.body as { error: { message: string } };
-    expect(body.error.message).toMatch(/HTTP 500/);
-  });
-
-  it('stream: translates anthropic event stream into OpenAI chunks', async () => {
-    const events = [
-      'event: message_start\ndata: {"type":"message_start","message":{"id":"m","content":[]}}',
-      'event: content_block_delta\ndata: {"type":"content_block_delta","delta":{"type":"text_delta","text":"he"}}',
-      'event: content_block_delta\ndata: {"type":"content_block_delta","delta":{"type":"text_delta","text":"llo"}}',
-      'event: message_delta\ndata: {"type":"message_delta","delta":{"stop_reason":"end_turn"}}',
-      'event: message_stop\ndata: {"type":"message_stop"}',
-    ];
-    const fetchFn = vi.fn(async () => sseResponse(events));
-    const adapter = new AnthropicAdapter({ fetch: fetchFn as unknown as typeof fetch });
-    const ctx = makeCtx({ body: { model: '', messages: [{ role: 'user', content: 'hi' }], stream: true } });
-
-    const chunks: { data: string; done?: boolean }[] = [];
-    for await (const c of adapter.stream(ctx)) chunks.push(c);
-
-    // Expect: role-prime, two text deltas, finish-reason, [DONE]
-    expect(chunks[chunks.length - 1]?.data).toBe('[DONE]');
-    expect(chunks[chunks.length - 1]?.done).toBe(true);
-
-    // First chunk is the role-prime (role: assistant, content: '').
-    const first = JSON.parse(chunks[0]!.data) as { choices: [{ delta: { role: string; content: string } }] };
-    expect(first.choices[0]!.delta.role).toBe('assistant');
-
-    // Next two chunks carry the text.
-    const d1 = JSON.parse(chunks[1]!.data) as { choices: [{ delta: { content: string } }] };
-    const d2 = JSON.parse(chunks[2]!.data) as { choices: [{ delta: { content: string } }] };
-    expect(d1.choices[0]!.delta.content).toBe('he');
-    expect(d2.choices[0]!.delta.content).toBe('llo');
-
-    // Finish-reason chunk.
-    const stopped = JSON.parse(chunks[3]!.data) as { choices: [{ finish_reason: string }] };
-    expect(stopped.choices[0]!.finish_reason).toBe('stop');
-  });
-});
-
-describe('LlmAdapterRegistry', () => {
-  it('returns the right adapter kind for each type', () => {
-    const reg = new LlmAdapterRegistry();
-    expect(reg.get('openai').kind).toBe('openai');
-    expect(reg.get('vllm').kind).toBe('vllm');
-    expect(reg.get('deepseek').kind).toBe('deepseek');
-    expect(reg.get('ollama').kind).toBe('ollama');
-    expect(reg.get('anthropic').kind).toBe('anthropic');
-  });
-
-  it('caches adapters between calls', () => {
-    const reg = new LlmAdapterRegistry();
-    const a = reg.get('openai');
-    const b = reg.get('openai');
-    expect(a).toBe(b);
-  });
-
-  it('rejects unsupported providers (gemini-cli is deferred)', () => {
-    const reg = new LlmAdapterRegistry();
-    expect(() => reg.get('gemini-cli')).toThrow(UnsupportedProviderError);
-    expect(() => reg.get('bogus')).toThrow(UnsupportedProviderError);
-  });
-});
--- a/src/mcpd/tests/llm-infer-route.test.ts
+++ b/src/mcpd/tests/llm-infer-route.test.ts
@@ -1,208 +0,0 @@
-import { describe, it, expect, vi, afterEach } from 'vitest';
-import Fastify from 'fastify';
-import type { FastifyInstance } from 'fastify';
-import { registerLlmInferRoutes } from '../src/routes/llm-infer.js';
-import { LlmAdapterRegistry } from '../src/services/llm/dispatcher.js';
-import { errorHandler } from '../src/middleware/error-handler.js';
-import type { LlmView } from '../src/services/llm.service.js';
-import { NotFoundError } from '../src/services/mcp-server.service.js';
-
-let app: FastifyInstance;
-
-function makeLlmView(overrides: Partial<LlmView> = {}): LlmView {
-  return {
-    id: 'llm-1',
-    name: 'claude',
-    type: 'anthropic',
-    model: 'claude-3-5-sonnet-20241022',
-    url: '',
-    tier: 'heavy',
-    description: '',
-    apiKeyRef: { name: 'anthropic-key', key: 'token' },
-    extraConfig: {},
-    version: 1,
-    createdAt: new Date(),
-    updatedAt: new Date(),
-    ...overrides,
-  };
-}
-
-afterEach(async () => {
-  if (app) await app.close();
-});
-
-function sseResponse(events: string[]): Response {
-  const body = events.join('\n\n') + '\n\n';
-  const stream = new ReadableStream<Uint8Array>({
-    start(controller) {
-      controller.enqueue(new TextEncoder().encode(body));
-      controller.close();
-    },
-  });
-  return new Response(stream, { status: 200 });
-}
-
-interface LlmServiceLike {
-  getByName: (name: string) => Promise<LlmView>;
-  resolveApiKey: (name: string) => Promise<string>;
-}
-
-async function setupApp(
-  llmService: LlmServiceLike,
-  adapters: LlmAdapterRegistry,
-  onInferenceEvent?: Parameters<typeof registerLlmInferRoutes>[1]['onInferenceEvent'],
-): Promise<FastifyInstance> {
-  app = Fastify({ logger: false });
-  app.setErrorHandler(errorHandler);
-  const deps: Parameters<typeof registerLlmInferRoutes>[1] = {
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    llmService: llmService as any,
-    adapters,
-  };
-  if (onInferenceEvent !== undefined) deps.onInferenceEvent = onInferenceEvent;
-  registerLlmInferRoutes(app, deps);
-  await app.ready();
-  return app;
-}
-
-describe('POST /api/v1/llms/:name/infer', () => {
-  it('returns 404 when the Llm does not exist', async () => {
-    const svc: LlmServiceLike = {
-      getByName: async () => { throw new NotFoundError('Llm not found: missing'); },
-      resolveApiKey: async () => '',
-    };
-    await setupApp(svc, new LlmAdapterRegistry());
-    const res = await app.inject({
-      method: 'POST',
-      url: '/api/v1/llms/missing/infer',
-      payload: { messages: [{ role: 'user', content: 'hi' }] },
-    });
-    expect(res.statusCode).toBe(404);
-  });
-
-  it('returns 400 when messages is missing', async () => {
-    const svc: LlmServiceLike = {
-      getByName: async () => makeLlmView({ apiKeyRef: null }),
-      resolveApiKey: async () => '',
-    };
-    await setupApp(svc, new LlmAdapterRegistry());
-    const res = await app.inject({
-      method: 'POST',
-      url: '/api/v1/llms/claude/infer',
-      payload: {},
-    });
-    expect(res.statusCode).toBe(400);
-  });
-
-  it('dispatches non-streaming to the adapter and returns its JSON', async () => {
-    const fetchFn = vi.fn(async () => new Response(JSON.stringify({
-      id: 'msg_1', model: 'claude-3-5-sonnet-20241022', role: 'assistant',
-      content: [{ type: 'text', text: 'hello' }],
-      stop_reason: 'end_turn',
-      usage: { input_tokens: 1, output_tokens: 1 },
-    }), { status: 200 }));
-    const adapters = new LlmAdapterRegistry({ fetch: fetchFn as unknown as typeof fetch });
-    const svc: LlmServiceLike = {
-      getByName: async () => makeLlmView(),
-      resolveApiKey: async () => 'sk-ant-xyz',
-    };
-    const events: unknown[] = [];
-    await setupApp(svc, adapters, (e) => events.push(e));
-
-    const res = await app.inject({
-      method: 'POST',
-      url: '/api/v1/llms/claude/infer',
-      payload: { messages: [{ role: 'user', content: 'hi' }] },
-    });
-    expect(res.statusCode).toBe(200);
-    const body = res.json<{ choices: Array<{ message: { content: string } }> }>();
-    expect(body.choices[0]!.message.content).toBe('hello');
-
-    // Audit event emitted
-    expect(events).toHaveLength(1);
-    expect((events[0] as { kind: string; llmName: string; status: number }).kind).toBe('llm_inference_call');
-    expect((events[0] as { llmName: string }).llmName).toBe('claude');
-    expect((events[0] as { streaming: boolean }).streaming).toBe(false);
-    expect((events[0] as { status: number }).status).toBe(200);
-  });
-
-  it('500s when apiKey resolution fails', async () => {
-    const adapters = new LlmAdapterRegistry();
-    const svc: LlmServiceLike = {
-      getByName: async () => makeLlmView(),
-      resolveApiKey: async () => { throw new Error('secret not found'); },
-    };
-    await setupApp(svc, adapters);
-
-    const res = await app.inject({
-      method: 'POST',
-      url: '/api/v1/llms/claude/infer',
-      payload: { messages: [{ role: 'user', content: 'hi' }] },
-    });
-    expect(res.statusCode).toBe(500);
-    expect(res.json<{ error: string }>().error).toMatch(/secret not found/);
-  });
-
-  it('skips apiKey resolution when the Llm has no apiKeyRef', async () => {
-    const fetchFn = vi.fn(async () => new Response(JSON.stringify({ id: 'x', choices: [] }), { status: 200 }));
-    const adapters = new LlmAdapterRegistry({ fetch: fetchFn as unknown as typeof fetch });
-    const resolveSpy = vi.fn();
-    const svc: LlmServiceLike = {
-      getByName: async () => makeLlmView({ type: 'ollama', url: 'http://ollama:11434', apiKeyRef: null }),
-      resolveApiKey: resolveSpy as unknown as LlmServiceLike['resolveApiKey'],
-    };
-    await setupApp(svc, adapters);
-
-    const res = await app.inject({
-      method: 'POST',
-      url: '/api/v1/llms/ollama-local/infer',
-      payload: { messages: [{ role: 'user', content: 'hi' }] },
-    });
-    expect(res.statusCode).toBe(200);
-    expect(resolveSpy).not.toHaveBeenCalled();
-  });
-
-  it('streams SSE chunks for stream: true', async () => {
-    const fetchFn = vi.fn(async () => sseResponse([
-      'event: content_block_delta\ndata: {"type":"content_block_delta","delta":{"type":"text_delta","text":"hi"}}',
-      'event: message_stop\ndata: {"type":"message_stop"}',
-    ]));
-    const adapters = new LlmAdapterRegistry({ fetch: fetchFn as unknown as typeof fetch });
-    const svc: LlmServiceLike = {
-      getByName: async () => makeLlmView(),
-      resolveApiKey: async () => 'sk-ant-xyz',
-    };
-    const events: Array<{ streaming: boolean; status: number }> = [];
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    await setupApp(svc, adapters, ((e: any) => events.push(e)) as any);
-
-    const res = await app.inject({
-      method: 'POST',
-      url: '/api/v1/llms/claude/infer',
-      payload: { messages: [{ role: 'user', content: 'hi' }], stream: true },
-    });
-    expect(res.statusCode).toBe(200);
-    expect(res.body).toContain('data:');
-    expect(res.body).toContain('[DONE]');
-    expect(events).toHaveLength(1);
-    expect(events[0]!.streaming).toBe(true);
-  });
-
-  it('502s on adapter errors (non-streaming)', async () => {
-    const fetchFn = vi.fn(async () => { throw new Error('upstream down'); });
-    const adapters = new LlmAdapterRegistry({ fetch: fetchFn as unknown as typeof fetch });
-    const svc: LlmServiceLike = {
-      getByName: async () => makeLlmView({ type: 'openai', url: 'http://example', apiKeyRef: null }),
-      resolveApiKey: async () => '',
-    };
-    await setupApp(svc, adapters);
-
-    const res = await app.inject({
-      method: 'POST',
-      url: '/api/v1/llms/x/infer',
-      payload: { messages: [{ role: 'user', content: 'hi' }] },
-    });
-    expect(res.statusCode).toBe(502);
-    expect(res.json<{ error: string }>().error).toMatch(/upstream down/);
-  });
-});
--- a/src/mcpd/tests/llm-routes.test.ts
+++ b/src/mcpd/tests/llm-routes.test.ts
@@ -1,194 +0,0 @@
-import { describe, it, expect, vi, afterEach } from 'vitest';
-import Fastify from 'fastify';
-import type { FastifyInstance } from 'fastify';
-import { registerLlmRoutes } from '../src/routes/llms.js';
-import { LlmService } from '../src/services/llm.service.js';
-import { errorHandler } from '../src/middleware/error-handler.js';
-import type { ILlmRepository } from '../src/repositories/llm.repository.js';
-import type { Llm, Secret } from '@prisma/client';
-
-let app: FastifyInstance;
-
-function makeLlm(overrides: Partial<Llm> = {}): Llm {
-  return {
-    id: 'llm-1',
-    name: 'claude',
-    type: 'anthropic',
-    model: 'claude-3-5-sonnet-20241022',
-    url: '',
-    tier: 'heavy',
-    description: '',
-    apiKeySecretId: null,
-    apiKeySecretKey: null,
-    extraConfig: {},
-    version: 1,
-    createdAt: new Date(),
-    updatedAt: new Date(),
-    ...overrides,
-  };
-}
-
-function mockRepo(initial: Llm[] = []): ILlmRepository {
-  const rows = new Map(initial.map((r) => [r.id, r]));
-  return {
-    findAll: vi.fn(async () => [...rows.values()]),
-    findById: vi.fn(async (id: string) => rows.get(id) ?? null),
-    findByName: vi.fn(async (name: string) => {
-      for (const r of rows.values()) if (r.name === name) return r;
-      return null;
-    }),
-    findByTier: vi.fn(async () => []),
-    create: vi.fn(async (data) => {
-      const row = makeLlm({ id: 'new-id', name: data.name, type: data.type, model: data.model });
-      rows.set(row.id, row);
-      return row;
-    }),
-    update: vi.fn(async (id, data) => {
-      const existing = rows.get(id)!;
-      const next: Llm = {
-        ...existing,
-        ...(data.model !== undefined ? { model: data.model } : {}),
-      };
-      rows.set(id, next);
-      return next;
-    }),
-    delete: vi.fn(async (id) => { rows.delete(id); }),
-  };
-}
-
-function mockSecretService() {
-  const sec: Secret = {
-    id: 'sec-1', name: 'anthropic-key', backendId: 'b', data: {}, externalRef: '',
-    version: 1, createdAt: new Date(), updatedAt: new Date(),
-  };
-  return {
-    getById: vi.fn(async (id: string) => {
-      if (id === sec.id) return sec;
-      throw new Error('not found');
-    }),
-    getByName: vi.fn(async (name: string) => {
-      if (name === sec.name) return sec;
-      throw new Error('not found');
-    }),
-    resolveData: vi.fn(async () => ({ token: 'sk-ant-xyz' })),
-  };
-}
-
-afterEach(async () => {
-  if (app) await app.close();
-});
-
-async function createApp(repo: ILlmRepository): Promise<FastifyInstance> {
-  app = Fastify({ logger: false });
-  app.setErrorHandler(errorHandler);
-  // eslint-disable-next-line @typescript-eslint/no-explicit-any
-  const service = new LlmService(repo, mockSecretService() as any);
-  registerLlmRoutes(app, service);
-  await app.ready();
-  return app;
-}
-
-describe('Llm Routes', () => {
-  it('GET /api/v1/llms returns a list', async () => {
-    await createApp(mockRepo([makeLlm()]));
-    const res = await app.inject({ method: 'GET', url: '/api/v1/llms' });
-    expect(res.statusCode).toBe(200);
-    const body = res.json<Array<{ name: string }>>();
-    expect(body).toHaveLength(1);
-    expect(body[0]!.name).toBe('claude');
-  });
-
-  it('GET /api/v1/llms/:id returns 404 when missing', async () => {
-    await createApp(mockRepo());
-    const res = await app.inject({ method: 'GET', url: '/api/v1/llms/missing' });
-    expect(res.statusCode).toBe(404);
-  });
-
-  it('GET /api/v1/llms/:nameOrId resolves by human name when not a CUID', async () => {
-    await createApp(mockRepo([makeLlm({ id: 'llm-1', name: 'claude' })]));
-    const res = await app.inject({ method: 'GET', url: '/api/v1/llms/claude' });
-    expect(res.statusCode).toBe(200);
-    expect(res.json<{ name: string; id: string }>().name).toBe('claude');
-  });
-
-  it('HEAD /api/v1/llms/:name returns 200 for an existing Llm (failover RBAC pre-check)', async () => {
-    await createApp(mockRepo([makeLlm({ name: 'claude' })]));
-    const res = await app.inject({ method: 'HEAD', url: '/api/v1/llms/claude' });
-    expect(res.statusCode).toBe(200);
-  });
-
-  it('HEAD /api/v1/llms/:name returns 404 for a missing Llm', async () => {
-    await createApp(mockRepo());
-    const res = await app.inject({ method: 'HEAD', url: '/api/v1/llms/missing' });
-    expect(res.statusCode).toBe(404);
-  });
-
-  it('POST /api/v1/llms creates and returns 201', async () => {
-    await createApp(mockRepo());
-    const res = await app.inject({
-      method: 'POST',
-      url: '/api/v1/llms',
-      payload: {
-        name: 'ollama-local',
-        type: 'ollama',
-        model: 'llama3',
-        url: 'http://localhost:11434',
-      },
-    });
-    expect(res.statusCode).toBe(201);
-    expect(res.json<{ name: string }>().name).toBe('ollama-local');
-  });
-
-  it('POST /api/v1/llms rejects bad input with 400', async () => {
-    await createApp(mockRepo());
-    const res = await app.inject({
-      method: 'POST',
-      url: '/api/v1/llms',
-      payload: { name: '', type: 'anthropic', model: 'x' },
-    });
-    expect(res.statusCode).toBe(400);
-  });
-
-  it('POST /api/v1/llms returns 409 when name exists', async () => {
-    await createApp(mockRepo([makeLlm({ name: 'claude' })]));
-    const res = await app.inject({
-      method: 'POST',
-      url: '/api/v1/llms',
-      payload: { name: 'claude', type: 'anthropic', model: 'x' },
-    });
-    expect(res.statusCode).toBe(409);
-  });
-
-  it('PUT /api/v1/llms/:id updates model', async () => {
-    await createApp(mockRepo([makeLlm({ id: 'llm-1' })]));
-    const res = await app.inject({
-      method: 'PUT',
-      url: '/api/v1/llms/llm-1',
-      payload: { model: 'claude-3-opus' },
-    });
-    expect(res.statusCode).toBe(200);
-    expect(res.json<{ model: string }>().model).toBe('claude-3-opus');
-  });
-
-  it('PUT /api/v1/llms/:id returns 404 when missing', async () => {
-    await createApp(mockRepo());
-    const res = await app.inject({
-      method: 'PUT',
-      url: '/api/v1/llms/missing',
-      payload: { model: 'x' },
-    });
-    expect(res.statusCode).toBe(404);
-  });
-
-  it('DELETE /api/v1/llms/:id returns 204', async () => {
-    await createApp(mockRepo([makeLlm({ id: 'llm-1' })]));
-    const res = await app.inject({ method: 'DELETE', url: '/api/v1/llms/llm-1' });
-    expect(res.statusCode).toBe(204);
-  });
-
-  it('DELETE /api/v1/llms/:id returns 404 when missing', async () => {
-    await createApp(mockRepo());
-    const res = await app.inject({ method: 'DELETE', url: '/api/v1/llms/missing' });
-    expect(res.statusCode).toBe(404);
-  });
-});
--- a/src/mcpd/tests/llm-service.test.ts
+++ b/src/mcpd/tests/llm-service.test.ts
@@ -1,232 +0,0 @@
-import { describe, it, expect, vi } from 'vitest';
-import { LlmService } from '../src/services/llm.service.js';
-import type { ILlmRepository } from '../src/repositories/llm.repository.js';
-import type { Llm, Secret } from '@prisma/client';
-
-function makeLlm(overrides: Partial<Llm> = {}): Llm {
-  return {
-    id: 'llm-1',
-    name: 'claude',
-    type: 'anthropic',
-    model: 'claude-3-5-sonnet-20241022',
-    url: '',
-    tier: 'heavy',
-    description: '',
-    apiKeySecretId: null,
-    apiKeySecretKey: null,
-    extraConfig: {},
-    version: 1,
-    createdAt: new Date(),
-    updatedAt: new Date(),
-    ...overrides,
-  };
-}
-
-function makeSecret(overrides: Partial<Secret> = {}): Secret {
-  return {
-    id: 'sec-anthropic',
-    name: 'anthropic-key',
-    backendId: 'backend-plaintext',
-    data: {},
-    externalRef: '',
-    version: 1,
-    createdAt: new Date(),
-    updatedAt: new Date(),
-    ...overrides,
-  };
-}
-
-function mockRepo(initial: Llm[] = []): ILlmRepository {
-  const rows = new Map<string, Llm>(initial.map((r) => [r.id, r]));
-  return {
-    findAll: vi.fn(async () => [...rows.values()]),
-    findById: vi.fn(async (id: string) => rows.get(id) ?? null),
-    findByName: vi.fn(async (name: string) => {
-      for (const r of rows.values()) if (r.name === name) return r;
-      return null;
-    }),
-    findByTier: vi.fn(async (tier: string) => [...rows.values()].filter((r) => r.tier === tier)),
-    create: vi.fn(async (data) => {
-      const row = makeLlm({
-        id: `llm-${String(rows.size + 1)}`,
-        name: data.name,
-        type: data.type,
-        model: data.model,
-        url: data.url ?? '',
-        tier: data.tier ?? 'fast',
-        description: data.description ?? '',
-        apiKeySecretId: data.apiKeySecretId ?? null,
-        apiKeySecretKey: data.apiKeySecretKey ?? null,
-        extraConfig: (data.extraConfig ?? {}) as Llm['extraConfig'],
-      });
-      rows.set(row.id, row);
-      return row;
-    }),
-    update: vi.fn(async (id, data) => {
-      const existing = rows.get(id);
-      if (!existing) throw new Error('not found');
-      const next: Llm = {
-        ...existing,
-        ...(data.model !== undefined ? { model: data.model } : {}),
-        ...(data.url !== undefined ? { url: data.url } : {}),
-        ...(data.tier !== undefined ? { tier: data.tier } : {}),
-        ...(data.description !== undefined ? { description: data.description } : {}),
-        ...(data.apiKeySecretId !== undefined ? { apiKeySecretId: data.apiKeySecretId } : {}),
-        ...(data.apiKeySecretKey !== undefined ? { apiKeySecretKey: data.apiKeySecretKey } : {}),
-        ...(data.extraConfig !== undefined ? { extraConfig: data.extraConfig as Llm['extraConfig'] } : {}),
-      };
-      rows.set(id, next);
-      return next;
-    }),
-    delete: vi.fn(async (id) => { rows.delete(id); }),
-  };
-}
-
-function mockSecrets(secretByName: Record<string, Secret>, resolved: Record<string, string> = {}): {
-  getById: ReturnType<typeof vi.fn>;
-  getByName: ReturnType<typeof vi.fn>;
-  resolveData: ReturnType<typeof vi.fn>;
-} {
-  return {
-    getById: vi.fn(async (id: string) => {
-      for (const s of Object.values(secretByName)) if (s.id === id) return s;
-      throw new Error(`secret not found: ${id}`);
-    }),
-    getByName: vi.fn(async (name: string) => {
-      const s = secretByName[name];
-      if (!s) throw new Error(`secret not found: ${name}`);
-      return s;
-    }),
-    resolveData: vi.fn(async () => resolved),
-  };
-}
-
-describe('LlmService', () => {
-  it('create parses input and resolves apiKeyRef → secret id', async () => {
-    const repo = mockRepo();
-    const sec = makeSecret();
-    const secrets = mockSecrets({ 'anthropic-key': sec });
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    const svc = new LlmService(repo, secrets as any);
-
-    const view = await svc.create({
-      name: 'claude',
-      type: 'anthropic',
-      model: 'claude-3-5-sonnet-20241022',
-      tier: 'heavy',
-      apiKeyRef: { name: 'anthropic-key', key: 'token' },
-    });
-
-    expect(view.name).toBe('claude');
-    expect(view.apiKeyRef).toEqual({ name: 'anthropic-key', key: 'token' });
-    expect(secrets.getByName).toHaveBeenCalledWith('anthropic-key');
-    expect(repo.create).toHaveBeenCalledWith(expect.objectContaining({
-      apiKeySecretId: sec.id,
-      apiKeySecretKey: 'token',
-    }));
-  });
-
-  it('create without apiKeyRef leaves FK columns null', async () => {
-    const repo = mockRepo();
-    const secrets = mockSecrets({});
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    const svc = new LlmService(repo, secrets as any);
-
-    const view = await svc.create({
-      name: 'ollama-local',
-      type: 'ollama',
-      model: 'llama3',
-      url: 'http://localhost:11434',
-      tier: 'fast',
-    });
-
-    expect(view.apiKeyRef).toBeNull();
-    expect(secrets.getByName).not.toHaveBeenCalled();
-  });
-
-  it('create rejects duplicate name', async () => {
-    const repo = mockRepo([makeLlm({ name: 'claude' })]);
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    const svc = new LlmService(repo, mockSecrets({}) as any);
-    await expect(svc.create({
-      name: 'claude', type: 'anthropic', model: 'x',
-    })).rejects.toThrow(/already exists/);
-  });
-
-  it('update with apiKeyRef null unlinks the secret', async () => {
-    const sec = makeSecret();
-    const repo = mockRepo([makeLlm({ apiKeySecretId: sec.id, apiKeySecretKey: 'token' })]);
-    const secrets = mockSecrets({ 'anthropic-key': sec });
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    const svc = new LlmService(repo, secrets as any);
-
-    await svc.update('llm-1', { apiKeyRef: null });
-    expect(repo.update).toHaveBeenCalledWith('llm-1', expect.objectContaining({
-      apiKeySecretId: null,
-      apiKeySecretKey: null,
-    }));
-  });
-
-  it('resolveApiKey reads through SecretService', async () => {
-    const sec = makeSecret();
-    const repo = mockRepo([makeLlm({ apiKeySecretId: sec.id, apiKeySecretKey: 'token' })]);
-    const secrets = mockSecrets({ 'anthropic-key': sec }, { token: 'sk-ant-xyz' });
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    const svc = new LlmService(repo, secrets as any);
-
-    const key = await svc.resolveApiKey('claude');
-    expect(key).toBe('sk-ant-xyz');
-  });
-
-  it('resolveApiKey throws when Llm has no apiKeyRef', async () => {
-    const repo = mockRepo([makeLlm()]);
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    const svc = new LlmService(repo, mockSecrets({}) as any);
-    await expect(svc.resolveApiKey('claude')).rejects.toThrow(/no apiKeyRef/);
-  });
-
-  it('resolveApiKey throws when the secret key is missing', async () => {
-    const sec = makeSecret();
-    const repo = mockRepo([makeLlm({ apiKeySecretId: sec.id, apiKeySecretKey: 'missing-key' })]);
-    const secrets = mockSecrets({ 'anthropic-key': sec }, { token: 'x' });
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    const svc = new LlmService(repo, secrets as any);
-    await expect(svc.resolveApiKey('claude')).rejects.toThrow(/no key 'missing-key'/);
-  });
-
-  it('list returns views with apiKeyRef rendered from secret name', async () => {
-    const sec = makeSecret();
-    const repo = mockRepo([makeLlm({ apiKeySecretId: sec.id, apiKeySecretKey: 'token' })]);
-    const secrets = mockSecrets({ 'anthropic-key': sec });
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    const svc = new LlmService(repo, secrets as any);
-
-    const items = await svc.list();
-    expect(items).toHaveLength(1);
-    expect(items[0]!.apiKeyRef).toEqual({ name: 'anthropic-key', key: 'token' });
-  });
-
-  it('delete happy path', async () => {
-    const repo = mockRepo([makeLlm()]);
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    const svc = new LlmService(repo, mockSecrets({}) as any);
-    await svc.delete('llm-1');
-    expect(repo.delete).toHaveBeenCalledWith('llm-1');
-  });
-
-  it('validation: rejects invalid type', async () => {
-    const repo = mockRepo();
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    const svc = new LlmService(repo, mockSecrets({}) as any);
-    await expect(svc.create({ name: 'x', type: 'bogus', model: 'y' })).rejects.toThrow();
-  });
-
-  it('validation: rejects invalid tier', async () => {
-    const repo = mockRepo();
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    const svc = new LlmService(repo, mockSecrets({}) as any);
-    await expect(svc.create({
-      name: 'x', type: 'openai', model: 'gpt-4', tier: 'warp-speed',
-    })).rejects.toThrow();
-  });
-});
--- a/src/mcpd/tests/secret-backends.test.ts
+++ b/src/mcpd/tests/secret-backends.test.ts
@@ -1,132 +0,0 @@
-import { describe, it, expect, vi } from 'vitest';
-import { PlaintextDriver } from '../src/services/secret-backends/plaintext.js';
-import { OpenBaoDriver } from '../src/services/secret-backends/openbao.js';
-
-describe('PlaintextDriver', () => {
-  const driver = new PlaintextDriver({ listAllPlaintext: async () => [{ name: 'a', data: { k: 'v' } }] });
-
-  it('read returns the data passed in', async () => {
-    const result = await driver.read({ name: 's', externalRef: '', data: { token: 'abc' } });
-    expect(result).toEqual({ token: 'abc' });
-  });
-
-  it('write returns storedData = input, externalRef = empty', async () => {
-    const result = await driver.write({ name: 's', data: { k: 'v' } });
-    expect(result).toEqual({ externalRef: '', storedData: { k: 'v' } });
-  });
-
-  it('list delegates to the injected dep', async () => {
-    const list = await driver.list();
-    expect(list).toEqual([{ name: 'a', externalRef: '' }]);
-  });
-
-  it('delete is a no-op', async () => {
-    await expect(driver.delete({ name: 's', externalRef: '' })).resolves.toBeUndefined();
-  });
-});
-
-describe('OpenBaoDriver', () => {
-  function makeFetch(responses: Array<{ url: RegExp; status: number; body?: unknown }>): ReturnType<typeof vi.fn> {
-    return vi.fn(async (url: string | URL, _init?: RequestInit) => {
-      const urlStr = String(url);
-      const match = responses.find((r) => r.url.test(urlStr));
-      if (!match) throw new Error(`unexpected fetch: ${urlStr}`);
-      return new Response(match.body ? JSON.stringify(match.body) : '', { status: match.status });
-    });
-  }
-
-  const resolver = { resolve: vi.fn(async () => 'test-vault-token') };
-
-  it('write sends POST to .../data/<path> with {data: ...}', async () => {
-    const fetchFn = makeFetch([{ url: /\/v1\/secret\/data\/mcpctl\/mytoken$/, status: 200 }]);
-    const driver = new OpenBaoDriver(
-      { url: 'http://bao.example:8200', tokenSecretRef: { name: 'bao', key: 'token' } },
-      { fetch: fetchFn as unknown as typeof fetch, secretRefResolver: resolver },
-    );
-    const result = await driver.write({ name: 'mytoken', data: { api_key: 'secret-xyz' } });
-    expect(result.externalRef).toBe('secret/mcpctl/mytoken');
-    expect(result.storedData).toEqual({});
-    expect(fetchFn).toHaveBeenCalledTimes(1);
-    const [, init] = fetchFn.mock.calls[0] as [unknown, RequestInit];
-    expect(init.method).toBe('POST');
-    expect(JSON.parse(init.body as string)).toEqual({ data: { api_key: 'secret-xyz' } });
-    const headers = init.headers as Record<string, string>;
-    expect(headers['X-Vault-Token']).toBe('test-vault-token');
-  });
-
-  it('read returns body.data.data', async () => {
-    const fetchFn = makeFetch([{
-      url: /\/v1\/secret\/data\/mcpctl\/mytoken$/,
-      status: 200,
-      body: { data: { data: { api_key: 'secret-xyz' } } },
-    }]);
-    const driver = new OpenBaoDriver(
-      { url: 'http://bao.example:8200', tokenSecretRef: { name: 'bao', key: 'token' } },
-      { fetch: fetchFn as unknown as typeof fetch, secretRefResolver: resolver },
-    );
-    const result = await driver.read({ name: 'mytoken', externalRef: 'secret/mcpctl/mytoken', data: {} });
-    expect(result).toEqual({ api_key: 'secret-xyz' });
-  });
-
-  it('read throws when the path 404s', async () => {
-    const fetchFn = makeFetch([{ url: /\/data\//, status: 404 }]);
-    const driver = new OpenBaoDriver(
-      { url: 'http://bao.example:8200', tokenSecretRef: { name: 'bao', key: 'token' } },
-      { fetch: fetchFn as unknown as typeof fetch, secretRefResolver: resolver },
-    );
-    await expect(driver.read({ name: 'missing', externalRef: '', data: {} })).rejects.toThrow(/not found/);
-  });
-
-  it('delete swallows 404', async () => {
-    const fetchFn = makeFetch([{ url: /\/metadata\//, status: 404 }]);
-    const driver = new OpenBaoDriver(
-      { url: 'http://bao.example:8200', tokenSecretRef: { name: 'bao', key: 'token' } },
-      { fetch: fetchFn as unknown as typeof fetch, secretRefResolver: resolver },
-    );
-    await expect(driver.delete({ name: 'gone', externalRef: '' })).resolves.toBeUndefined();
-  });
-
-  it('list returns names from the metadata LIST call', async () => {
-    const fetchFn = makeFetch([{
-      url: /\/v1\/secret\/metadata\/mcpctl\/$/,
-      status: 200,
-      body: { data: { keys: ['token1', 'token2', 'sub-folder/'] } },
-    }]);
-    const driver = new OpenBaoDriver(
-      { url: 'http://bao.example:8200', tokenSecretRef: { name: 'bao', key: 'token' } },
-      { fetch: fetchFn as unknown as typeof fetch, secretRefResolver: resolver },
-    );
-    const result = await driver.list();
-    // Sub-folders (trailing slash) are excluded; only leaf keys are returned.
-    expect(result).toEqual([
-      { name: 'token1', externalRef: 'secret/mcpctl/token1' },
-      { name: 'token2', externalRef: 'secret/mcpctl/token2' },
-    ]);
-  });
-
-  it('caches the vault token after first resolve', async () => {
-    const fetchFn = makeFetch([
-      { url: /\/v1\/secret\/data\/mcpctl\//, status: 200, body: { data: { data: { k: 'v' } } } },
-    ]);
-    const singleResolver = { resolve: vi.fn(async () => 'test-vault-token') };
-    const driver = new OpenBaoDriver(
-      { url: 'http://bao.example:8200', tokenSecretRef: { name: 'bao', key: 'token' } },
-      { fetch: fetchFn as unknown as typeof fetch, secretRefResolver: singleResolver },
-    );
-    await driver.read({ name: 'a', externalRef: '', data: {} });
-    await driver.read({ name: 'a', externalRef: '', data: {} });
-    expect(singleResolver.resolve).toHaveBeenCalledTimes(1);
-  });
-
-  it('propagates X-Vault-Namespace when configured', async () => {
-    const fetchFn = makeFetch([{ url: /\/v1\/secret\/data\/mcpctl\//, status: 200 }]);
-    const driver = new OpenBaoDriver(
-      { url: 'http://bao.example:8200', namespace: 'myteam', tokenSecretRef: { name: 'bao', key: 'token' } },
-      { fetch: fetchFn as unknown as typeof fetch, secretRefResolver: resolver },
-    );
-    await driver.write({ name: 'x', data: { k: 'v' } });
-    const [, init] = fetchFn.mock.calls[0] as [unknown, RequestInit];
-    const headers = init.headers as Record<string, string>;
-    expect(headers['X-Vault-Namespace']).toBe('myteam');
-  });
-});
--- a/src/mcpd/tests/secret-routes.test.ts
+++ b/src/mcpd/tests/secret-routes.test.ts
@@ -3,68 +3,43 @@ import Fastify from 'fastify';
 import type { FastifyInstance } from 'fastify';
 import { registerSecretRoutes } from '../src/routes/secrets.js';
 import { SecretService } from '../src/services/secret.service.js';
-import { SecretBackendService } from '../src/services/secret-backend.service.js';
 import { errorHandler } from '../src/middleware/error-handler.js';
 import type { ISecretRepository } from '../src/repositories/interfaces.js';
-import type { ISecretBackendRepository } from '../src/repositories/secret-backend.repository.js';
-import type { SecretBackend } from '@prisma/client';

 let app: FastifyInstance;

-const PLAINTEXT_BACKEND: SecretBackend = {
-  id: 'backend-plaintext',
-  name: 'default',
-  type: 'plaintext',
-  config: {},
-  isDefault: true,
-  description: '',
-  version: 1,
-  createdAt: new Date(),
-  updatedAt: new Date(),
-};
-
-function makeSecret(overrides: Partial<{ id: string; name: string; data: Record<string, string>; externalRef: string; backendId: string }> = {}) {
-  return {
-    id: overrides.id ?? 'sec-1',
-    name: overrides.name ?? 'ha-creds',
-    backendId: overrides.backendId ?? PLAINTEXT_BACKEND.id,
-    data: overrides.data ?? { TOKEN: 'abc' },
-    externalRef: overrides.externalRef ?? '',
-    version: 1,
-    createdAt: new Date(),
-    updatedAt: new Date(),
-  };
-}
-
 function mockRepo(): ISecretRepository {
-  let lastCreated: ReturnType<typeof makeSecret> | null = null;
+  let lastCreated: Record<string, unknown> | null = null;
  return {
-    findAll: vi.fn(async () => [makeSecret()]),
+    findAll: vi.fn(async () => [
+      { id: '1', name: 'ha-creds', data: { TOKEN: 'abc' }, version: 1, createdAt: new Date(), updatedAt: new Date() },
+    ]),
    findById: vi.fn(async (id: string) => {
-      if (lastCreated && lastCreated.id === id) return lastCreated;
+      if (lastCreated && (lastCreated as { id: string }).id === id) return lastCreated as never;
      return null;
    }),
    findByName: vi.fn(async () => null),
-    findByBackend: vi.fn(async () => []),
    create: vi.fn(async (data) => {
-      const secret = makeSecret({
+      const secret = {
        id: 'new-id',
        name: data.name,
        data: data.data ?? {},
-        externalRef: data.externalRef ?? '',
-        backendId: data.backendId,
-      });
+        version: 1,
+        createdAt: new Date(),
+        updatedAt: new Date(),
+      };
      lastCreated = secret;
      return secret;
    }),
    update: vi.fn(async (id, data) => {
-      const secret = makeSecret({
+      const secret = {
        id,
-        name: lastCreated?.name ?? 'ha-creds',
+        name: 'ha-creds',
        data: data.data,
-        externalRef: data.externalRef,
-        backendId: data.backendId ?? PLAINTEXT_BACKEND.id,
-      });
+        version: 2,
+        createdAt: new Date(),
+        updatedAt: new Date(),
+      };
      lastCreated = secret;
      return secret;
    }),
@@ -72,32 +47,14 @@ function mockRepo(): ISecretRepository {
  };
 }

-function mockBackendRepo(): ISecretBackendRepository {
-  return {
-    findAll: vi.fn(async () => [PLAINTEXT_BACKEND]),
-    findById: vi.fn(async (id) => (id === PLAINTEXT_BACKEND.id ? PLAINTEXT_BACKEND : null)),
-    findByName: vi.fn(async (name) => (name === PLAINTEXT_BACKEND.name ? PLAINTEXT_BACKEND : null)),
-    findDefault: vi.fn(async () => PLAINTEXT_BACKEND),
-    create: vi.fn(async () => PLAINTEXT_BACKEND),
-    update: vi.fn(async () => PLAINTEXT_BACKEND),
-    setAsDefault: vi.fn(async () => PLAINTEXT_BACKEND),
-    delete: vi.fn(async () => {}),
-    countReferencingSecrets: vi.fn(async () => 0),
-  };
-}
-
 afterEach(async () => {
  if (app) await app.close();
 });

-async function createApp(repo: ISecretRepository) {
+function createApp(repo: ISecretRepository) {
  app = Fastify({ logger: false });
  app.setErrorHandler(errorHandler);
-  const backends = new SecretBackendService(mockBackendRepo(), {
-    plaintext: { listAllPlaintext: async () => [] },
-    secretRefResolver: { resolve: async () => '' },
-  });
-  const service = new SecretService(repo, backends);
+  const service = new SecretService(repo);
  registerSecretRoutes(app, service);
  return app.ready();
 }
@@ -172,7 +129,7 @@ describe('Secret Routes', () => {
  describe('PUT /api/v1/secrets/:id', () => {
    it('updates a secret', async () => {
      const repo = mockRepo();
-      vi.mocked(repo.findById).mockResolvedValue(makeSecret({ id: '1' }) as never);
+      vi.mocked(repo.findById).mockResolvedValue({ id: '1', name: 'ha-creds' } as never);
      await createApp(repo);
      const res = await app.inject({
        method: 'PUT',
@@ -197,7 +154,7 @@ describe('Secret Routes', () => {
  describe('DELETE /api/v1/secrets/:id', () => {
    it('deletes a secret and returns 204', async () => {
      const repo = mockRepo();
-      vi.mocked(repo.findById).mockResolvedValue(makeSecret({ id: '1' }) as never);
+      vi.mocked(repo.findById).mockResolvedValue({ id: '1', name: 'ha-creds' } as never);
      await createApp(repo);
      const res = await app.inject({ method: 'DELETE', url: '/api/v1/secrets/1' });
      expect(res.statusCode).toBe(204);
--- a/src/mcplocal/src/http/config.ts
+++ b/src/mcplocal/src/http/config.ts
@@ -64,14 +64,6 @@ export interface LlmProviderFileEntry {
  idleTimeoutMinutes?: number;
  /** vllm-managed: extra args for `vllm serve` */
  extraArgs?: string[];
-  /**
-   * If set, this local provider is allowed to substitute for the centralized
-   * Llm of this name when the mcpd inference proxy is unreachable.
-   * RBAC is still enforced — the caller must have view permission on the
-   * named Llm via mcpd before failover is permitted (fail-closed if mcpd
-   * itself can't be reached).
-   */
-  failoverFor?: string;
 }

 export interface ProjectLlmOverride {
--- a/src/mcplocal/src/llm-config.ts
+++ b/src/mcplocal/src/llm-config.ts
@@ -173,9 +173,6 @@ export async function createProvidersFromConfig(
    if (entry.tier) {
      registry.assignTier(provider.name, entry.tier);
    }
-    if (entry.failoverFor) {
-      registry.registerFailover(entry.failoverFor, provider.name);
-    }
  }

  return registry;
--- a/src/mcplocal/src/providers/failover-router.ts
+++ b/src/mcplocal/src/providers/failover-router.ts
@@ -1,107 +0,0 @@
-/**
- * FailoverRouter — orchestrates "try mcpd's centralized Llm, fall back to a
- * local provider when authorized" for clients that consume the inference
- * proxy.
- *
- * Decision flow on a centralized inference call:
- *
- *   1. Call the primary (the supplied `primary` callback, typically an HTTP
- *      POST to mcpd /api/v1/llms/:name/infer).
- *   2. If that succeeds → done.
- *   3. If it fails AND a local provider is registered as failover for this
- *      Llm name → call mcpd /api/v1/llms/:name (RBAC-gated) to verify the
- *      caller still has permission to view this Llm. mcpd unreachable →
- *      fail-closed (re-throw the original error). 403 → fail-closed.
- *   4. 200 → invoke the local provider's `complete()` and tag the result
- *      as `failover: true` for client-side audit.
- *
- * The check call uses HEAD to avoid pulling the Llm body (and any
- * description / extraConfig) over the wire — mcpd treats both methods the
- * same in the RBAC hook because the URL maps to the same permission.
- */
-import type { LlmProvider } from './types.js';
-import type { ProviderRegistry } from './registry.js';
-
-export interface FailoverDecision<T> {
-  result: T;
-  failover: boolean;
-  /** Name of the local provider used (only set when failover === true). */
-  via?: string;
-}
-
-export interface FailoverRouterDeps {
-  /** Injected fetch for the RBAC pre-check. Tests mock this. */
-  fetch?: typeof globalThis.fetch;
-  /** mcpd base URL (no trailing slash). */
-  mcpdUrl: string;
-  /** Bearer token to attach to the RBAC pre-check call. */
-  bearerToken?: string;
-}
-
-/** Outcome of the RBAC pre-check. Used internally + exposed for tests. */
-export type AuthCheckOutcome = 'allowed' | 'forbidden' | 'unreachable';
-
-export class FailoverRouter {
-  private readonly fetchImpl: typeof globalThis.fetch;
-  private readonly mcpdUrl: string;
-  private readonly bearer: string | undefined;
-
-  constructor(
-    private readonly registry: ProviderRegistry,
-    deps: FailoverRouterDeps,
-  ) {
-    this.fetchImpl = deps.fetch ?? globalThis.fetch;
-    this.mcpdUrl = deps.mcpdUrl.replace(/\/+$/, '');
-    if (deps.bearerToken !== undefined) this.bearer = deps.bearerToken;
-  }
-
-  /**
-   * Run a primary inference attempt; on failure, fall back to the local
-   * provider if one is registered for this Llm AND the caller still has
-   * `view:llms:<llmName>` on mcpd.
-   *
-   * `primary` should reject (throw) when mcpd's proxy is unreachable or
-   * returns a 5xx — that's the signal to consider failover. 4xx errors that
-   * indicate a bad request are surfaced as-is; the router only retries on
-   * primary failure shapes that look like an upstream/network issue.
-   */
-  async run<T>(
-    llmName: string,
-    primary: () => Promise<T>,
-    localCall: (provider: LlmProvider) => Promise<T>,
-  ): Promise<FailoverDecision<T>> {
-    try {
-      const result = await primary();
-      return { result, failover: false };
-    } catch (primaryErr) {
-      const local = this.registry.getFailoverFor(llmName);
-      if (local === null) throw primaryErr;
-
-      const auth = await this.checkAuth(llmName);
-      if (auth !== 'allowed') {
-        // Fail-closed for forbidden AND unreachable.
-        throw primaryErr;
-      }
-
-      const result = await localCall(local);
-      return { result, failover: true, via: local.name };
-    }
-  }
-
-  /** RBAC pre-check exposed for tests / status-display callers. */
-  async checkAuth(llmName: string): Promise<AuthCheckOutcome> {
-    const url = `${this.mcpdUrl}/api/v1/llms/${encodeURIComponent(llmName)}`;
-    const headers: Record<string, string> = {};
-    if (this.bearer !== undefined) headers['Authorization'] = `Bearer ${this.bearer}`;
-    let res: Response;
-    try {
-      res = await this.fetchImpl(url, { method: 'HEAD', headers });
-    } catch {
-      return 'unreachable';
-    }
-    if (res.status === 200 || res.status === 204) return 'allowed';
-    if (res.status === 403 || res.status === 401) return 'forbidden';
-    // Anything else (404, 500…) — treat as unreachable for the failover flow.
-    return 'unreachable';
-  }
-}
--- a/src/mcplocal/src/providers/registry.ts
+++ b/src/mcplocal/src/providers/registry.ts
@@ -8,8 +8,6 @@ export class ProviderRegistry {
  private providers = new Map<string, LlmProvider>();
  private activeProvider: string | null = null;
  private tierProviders = new Map<Tier, string[]>();
-  /** Maps a centralized Llm name → local provider name that can substitute when mcpd is unreachable. */
-  private failoverMap = new Map<string, string>();

  register(provider: LlmProvider): void {
    this.providers.set(provider.name, provider);
@@ -33,30 +31,6 @@ export class ProviderRegistry {
        this.tierProviders.set(tier, filtered);
      }
    }
-    // Remove from failover map (any entry whose local-provider value points at this name)
-    for (const [centralName, localName] of this.failoverMap) {
-      if (localName === name) this.failoverMap.delete(centralName);
-    }
-  }
-
-  /** Mark `localProviderName` as the failover for the centralized Llm named `centralLlmName`. */
-  registerFailover(centralLlmName: string, localProviderName: string): void {
-    if (!this.providers.has(localProviderName)) {
-      throw new Error(`Provider '${localProviderName}' is not registered`);
-    }
-    this.failoverMap.set(centralLlmName, localProviderName);
-  }
-
-  /** Look up the local provider that can substitute for a centralized Llm, if any. */
-  getFailoverFor(centralLlmName: string): LlmProvider | null {
-    const localName = this.failoverMap.get(centralLlmName);
-    if (localName === undefined) return null;
-    return this.providers.get(localName) ?? null;
-  }
-
-  /** Names of central Llms that have a local failover registered. Used in status output. */
-  listFailovers(): Array<{ centralLlmName: string; localProviderName: string }> {
-    return [...this.failoverMap.entries()].map(([centralLlmName, localProviderName]) => ({ centralLlmName, localProviderName }));
  }

  setActive(name: string): void {
--- a/src/mcplocal/tests/failover-router.test.ts
+++ b/src/mcplocal/tests/failover-router.test.ts
@@ -1,170 +0,0 @@
-import { describe, it, expect, vi } from 'vitest';
-import { ProviderRegistry } from '../src/providers/registry.js';
-import { FailoverRouter } from '../src/providers/failover-router.js';
-import type { LlmProvider, CompleteResponse } from '../src/providers/types.js';
-
-function fakeProvider(name: string): LlmProvider {
-  const completeFn = vi.fn(async (): Promise<CompleteResponse> => ({
-    content: 'local response',
-    finishReason: 'stop',
-  }));
-  return {
-    name,
-    complete: completeFn,
-    listModels: vi.fn(async () => [name]),
-    isAvailable: vi.fn(async () => true),
-  };
-}
-
-function makeFetch(behaviour: { method: string; status?: number; throw?: boolean }): ReturnType<typeof vi.fn> {
-  return vi.fn(async (url: string | URL, init?: RequestInit) => {
-    if (behaviour.throw === true) throw new Error('connection refused');
-    expect(init?.method).toBe(behaviour.method);
-    expect(String(url)).toMatch(/\/api\/v1\/llms\//);
-    return new Response(null, { status: behaviour.status ?? 200 });
-  });
-}
-
-describe('ProviderRegistry — failover map', () => {
-  it('registerFailover maps a central name → local provider name', () => {
-    const reg = new ProviderRegistry();
-    const local = fakeProvider('vllm-local');
-    reg.register(local);
-    reg.registerFailover('claude', 'vllm-local');
-
-    const found = reg.getFailoverFor('claude');
-    expect(found?.name).toBe('vllm-local');
-  });
-
-  it('getFailoverFor returns null when no map entry exists', () => {
-    const reg = new ProviderRegistry();
-    reg.register(fakeProvider('vllm-local'));
-    expect(reg.getFailoverFor('claude')).toBeNull();
-  });
-
-  it('registerFailover throws when local provider is not registered', () => {
-    const reg = new ProviderRegistry();
-    expect(() => reg.registerFailover('claude', 'missing')).toThrow(/not registered/);
-  });
-
-  it('unregister removes failover entries that pointed at the removed provider', () => {
-    const reg = new ProviderRegistry();
-    reg.register(fakeProvider('vllm-local'));
-    reg.registerFailover('claude', 'vllm-local');
-    reg.unregister('vllm-local');
-    expect(reg.getFailoverFor('claude')).toBeNull();
-    expect(reg.listFailovers()).toEqual([]);
-  });
-
-  it('listFailovers reports the current map', () => {
-    const reg = new ProviderRegistry();
-    reg.register(fakeProvider('vllm-local'));
-    reg.registerFailover('claude', 'vllm-local');
-    reg.registerFailover('opus', 'vllm-local');
-    expect(reg.listFailovers()).toEqual([
-      { centralLlmName: 'claude', localProviderName: 'vllm-local' },
-      { centralLlmName: 'opus', localProviderName: 'vllm-local' },
-    ]);
-  });
-});
-
-describe('FailoverRouter', () => {
-  it('returns primary result when primary succeeds', async () => {
-    const reg = new ProviderRegistry();
-    reg.register(fakeProvider('vllm-local'));
-    reg.registerFailover('claude', 'vllm-local');
-
-    const router = new FailoverRouter(reg, {
-      mcpdUrl: 'http://mcpd',
-      fetch: vi.fn() as unknown as typeof fetch,
-    });
-    const out = await router.run('claude', async () => 'central', async () => 'local');
-    expect(out.failover).toBe(false);
-    expect(out.result).toBe('central');
-  });
-
-  it('falls back to local when primary fails AND mcpd auth-checks 200', async () => {
-    const reg = new ProviderRegistry();
-    reg.register(fakeProvider('vllm-local'));
-    reg.registerFailover('claude', 'vllm-local');
-
-    const fetchFn = makeFetch({ method: 'HEAD', status: 200 });
-    const router = new FailoverRouter(reg, {
-      mcpdUrl: 'http://mcpd',
-      fetch: fetchFn as unknown as typeof fetch,
-      bearerToken: 'bearer-x',
-    });
-    const out = await router.run(
-      'claude',
-      async () => { throw new Error('upstream down'); },
-      async (provider) => `via:${provider.name}`,
-    );
-    expect(out.failover).toBe(true);
-    expect(out.via).toBe('vllm-local');
-    expect(out.result).toBe('via:vllm-local');
-
-    // Bearer was attached
-    const [, init] = fetchFn.mock.calls[0] as [string, RequestInit];
-    expect((init.headers as Record<string, string>)['Authorization']).toBe('Bearer bearer-x');
-  });
-
-  it('re-throws primary error when no local failover is registered', async () => {
-    const reg = new ProviderRegistry();
-    const router = new FailoverRouter(reg, {
-      mcpdUrl: 'http://mcpd',
-      fetch: vi.fn() as unknown as typeof fetch,
-    });
-    await expect(router.run(
-      'claude',
-      async () => { throw new Error('boom'); },
-      async () => 'never',
-    )).rejects.toThrow('boom');
-  });
-
-  it('re-throws (fail-closed) when mcpd returns 403 to the auth check', async () => {
-    const reg = new ProviderRegistry();
-    reg.register(fakeProvider('vllm-local'));
-    reg.registerFailover('claude', 'vllm-local');
-
-    const router = new FailoverRouter(reg, {
-      mcpdUrl: 'http://mcpd',
-      fetch: makeFetch({ method: 'HEAD', status: 403 }) as unknown as typeof fetch,
-    });
-    await expect(router.run(
-      'claude',
-      async () => { throw new Error('upstream down'); },
-      async () => 'never',
-    )).rejects.toThrow('upstream down');
-  });
-
-  it('re-throws (fail-closed) when mcpd itself is unreachable for the auth check', async () => {
-    const reg = new ProviderRegistry();
-    reg.register(fakeProvider('vllm-local'));
-    reg.registerFailover('claude', 'vllm-local');
-
-    const router = new FailoverRouter(reg, {
-      mcpdUrl: 'http://mcpd',
-      fetch: makeFetch({ method: 'HEAD', throw: true }) as unknown as typeof fetch,
-    });
-    await expect(router.run(
-      'claude',
-      async () => { throw new Error('upstream down'); },
-      async () => 'never',
-    )).rejects.toThrow('upstream down');
-  });
-
-  it('checkAuth maps responses correctly', async () => {
-    const reg = new ProviderRegistry();
-    const make = (status: number) => new FailoverRouter(reg, {
-      mcpdUrl: 'http://mcpd',
-      fetch: (async () => new Response(null, { status })) as unknown as typeof fetch,
-    });
-
-    expect(await make(200).checkAuth('claude')).toBe('allowed');
-    expect(await make(204).checkAuth('claude')).toBe('allowed');
-    expect(await make(401).checkAuth('claude')).toBe('forbidden');
-    expect(await make(403).checkAuth('claude')).toBe('forbidden');
-    expect(await make(404).checkAuth('claude')).toBe('unreachable');
-    expect(await make(500).checkAuth('claude')).toBe('unreachable');
-  });
-});