feat: per-project LLM models, ACP session pool, smart pagination tests

- ACP session pool with per-model subprocesses and 8h idle eviction - Per-project LLM config: local override → mcpd recommendation → global default - Model override support in ResponsePaginator - /llm/models endpoint + available models in mcpctl status - Remove --llm-provider/--llm-model from create project (use edit/apply) - 8 new smart pagination integration tests (e2e flow) - 260 mcplocal tests, 330 CLI tests passing Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-25 01:29:38 +00:00
parent d2dedf74e5
commit 61a07024e9
14 changed files with 786 additions and 49 deletions
--- a/src/cli/src/commands/create.ts
+++ b/src/cli/src/commands/create.ts
@@ -196,8 +196,6 @@ export function createCreateCommand(deps: CreateCommandDeps): Command {
    .argument('<name>', 'Project name')
    .option('-d, --description <text>', 'Project description', '')
    .option('--proxy-mode <mode>', 'Proxy mode (direct, filtered)')
-    .option('--llm-provider <name>', 'LLM provider name')
-    .option('--llm-model <name>', 'LLM model name')
    .option('--prompt <text>', 'Project-level prompt / instructions for the LLM')
    .option('--server <name>', 'Server name (repeat for multiple)', collect, [])
    .option('--force', 'Update if already exists')
@@ -208,8 +206,6 @@ export function createCreateCommand(deps: CreateCommandDeps): Command {
        proxyMode: opts.proxyMode ?? 'direct',
      };
      if (opts.prompt) body.prompt = opts.prompt;
-      if (opts.llmProvider) body.llmProvider = opts.llmProvider;
-      if (opts.llmModel) body.llmModel = opts.llmModel;
      if (opts.server.length > 0) body.servers = opts.server;

      try {
--- a/src/cli/src/commands/status.ts
+++ b/src/cli/src/commands/status.ts
@@ -22,6 +22,8 @@ export interface StatusCommandDeps {
  checkHealth: (url: string) => Promise<boolean>;
  /** Check LLM health via mcplocal's /llm/health endpoint */
  checkLlm: (mcplocalUrl: string) => Promise<string>;
+  /** Fetch available models from mcplocal's /llm/models endpoint */
+  fetchModels: (mcplocalUrl: string) => Promise<string[]>;
  isTTY: boolean;
 }

@@ -70,6 +72,25 @@ function defaultCheckLlm(mcplocalUrl: string): Promise<string> {
  });
 }

+function defaultFetchModels(mcplocalUrl: string): Promise<string[]> {
+  return new Promise((resolve) => {
+    const req = http.get(`${mcplocalUrl}/llm/models`, { timeout: 5000 }, (res) => {
+      const chunks: Buffer[] = [];
+      res.on('data', (chunk: Buffer) => chunks.push(chunk));
+      res.on('end', () => {
+        try {
+          const body = JSON.parse(Buffer.concat(chunks).toString('utf-8')) as { models?: string[] };
+          resolve(body.models ?? []);
+        } catch {
+          resolve([]);
+        }
+      });
+    });
+    req.on('error', () => resolve([]));
+    req.on('timeout', () => { req.destroy(); resolve([]); });
+  });
+}
+
 const SPINNER_FRAMES = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];

 const defaultDeps: StatusCommandDeps = {
@@ -79,11 +100,12 @@ const defaultDeps: StatusCommandDeps = {
  write: (text) => process.stdout.write(text),
  checkHealth: defaultCheckHealth,
  checkLlm: defaultCheckLlm,
+  fetchModels: defaultFetchModels,
  isTTY: process.stdout.isTTY ?? false,
 };

 export function createStatusCommand(deps?: Partial<StatusCommandDeps>): Command {
-  const { configDeps, credentialsDeps, log, write, checkHealth, checkLlm, isTTY } = { ...defaultDeps, ...deps };
+  const { configDeps, credentialsDeps, log, write, checkHealth, checkLlm, fetchModels, isTTY } = { ...defaultDeps, ...deps };

  return new Command('status')
    .description('Show mcpctl status and connectivity')
@@ -172,5 +194,11 @@ export function createStatusCommand(deps?: Partial<StatusCommandDeps>): Command
          log(`LLM:        ${llmLabel} ✗ ${llmStatus}`);
        }
      }
+
+      // Show available models (non-blocking, best effort)
+      const models = await fetchModels(config.mcplocalUrl);
+      if (models.length > 0) {
+        log(`${DIM}            Available: ${models.join(', ')}${RESET}`);
+      }
    });
 }
--- a/src/cli/tests/commands/project.test.ts
+++ b/src/cli/tests/commands/project.test.ts
@@ -30,8 +30,6 @@ describe('project with new fields', () => {
        'project', 'smart-home',
        '-d', 'Smart home project',
        '--proxy-mode', 'filtered',
-        '--llm-provider', 'gemini-cli',
-        '--llm-model', 'gemini-2.0-flash',
        '--server', 'my-grafana',
        '--server', 'my-ha',
      ], { from: 'user' });
@@ -40,8 +38,6 @@ describe('project with new fields', () => {
        name: 'smart-home',
        description: 'Smart home project',
        proxyMode: 'filtered',
-        llmProvider: 'gemini-cli',
-        llmModel: 'gemini-2.0-flash',
        servers: ['my-grafana', 'my-ha'],
      }));
    });
--- a/src/mcplocal/src/discovery.ts
+++ b/src/mcplocal/src/discovery.ts
@@ -48,6 +48,33 @@ export async function refreshProjectUpstreams(
  return syncUpstreams(router, mcpdClient, servers);
 }

+/**
+ * Fetch a project's LLM config (llmProvider, llmModel) from mcpd.
+ * These are the project-level "recommendations" — local overrides take priority.
+ */
+export interface ProjectLlmConfig {
+  llmProvider?: string;
+  llmModel?: string;
+}
+
+export async function fetchProjectLlmConfig(
+  mcpdClient: McpdClient,
+  projectName: string,
+): Promise<ProjectLlmConfig> {
+  try {
+    const project = await mcpdClient.get<{
+      llmProvider?: string;
+      llmModel?: string;
+    }>(`/api/v1/projects/${encodeURIComponent(projectName)}`);
+    const config: ProjectLlmConfig = {};
+    if (project.llmProvider) config.llmProvider = project.llmProvider;
+    if (project.llmModel) config.llmModel = project.llmModel;
+    return config;
+  } catch {
+    return {};
+  }
+}
+
 /** Shared sync logic: reconcile a router's upstreams with a server list. */
 function syncUpstreams(router: McpRouter, mcpdClient: McpdClient, servers: McpdServer[]): string[] {
  const registered: string[] = [];
--- a/src/mcplocal/src/http/config.ts
+++ b/src/mcplocal/src/http/config.ts
@@ -44,21 +44,54 @@ export interface LlmFileConfig {
  binaryPath?: string;
 }

+export interface ProjectLlmOverride {
+  model?: string;
+  provider?: string;
+}
+
+interface McpctlConfig {
+  llm?: LlmFileConfig;
+  projects?: Record<string, { llm?: ProjectLlmOverride }>;
+}
+
+/** Cached config for the process lifetime (reloaded on SIGHUP if needed). */
+let cachedConfig: McpctlConfig | null = null;
+
+function loadFullConfig(): McpctlConfig {
+  if (cachedConfig) return cachedConfig;
+  try {
+    const configPath = join(homedir(), '.mcpctl', 'config.json');
+    if (!existsSync(configPath)) return {};
+    const raw = readFileSync(configPath, 'utf-8');
+    cachedConfig = JSON.parse(raw) as McpctlConfig;
+    return cachedConfig;
+  } catch {
+    return {};
+  }
+}
+
 /**
 * Load LLM configuration from ~/.mcpctl/config.json.
 * Returns undefined if no LLM section is configured.
 */
 export function loadLlmConfig(): LlmFileConfig | undefined {
-  try {
-    const configPath = join(homedir(), '.mcpctl', 'config.json');
-    if (!existsSync(configPath)) return undefined;
-    const raw = readFileSync(configPath, 'utf-8');
-    const parsed = JSON.parse(raw) as { llm?: LlmFileConfig };
-    if (!parsed.llm?.provider || parsed.llm.provider === 'none') return undefined;
-    return parsed.llm;
-  } catch {
-    return undefined;
+  const config = loadFullConfig();
+  if (!config.llm?.provider || config.llm.provider === 'none') return undefined;
+  return config.llm;
 }
+
+/**
+ * Load per-project LLM override from ~/.mcpctl/config.json.
+ * Returns the project-specific model/provider override, or undefined.
+ */
+export function loadProjectLlmOverride(projectName: string): ProjectLlmOverride | undefined {
+  const config = loadFullConfig();
+  return config.projects?.[projectName]?.llm;
+}
+
+/** Reset cached config (for testing). */
+export function resetConfigCache(): void {
+  cachedConfig = null;
 }

 export function loadHttpConfig(env: Record<string, string | undefined> = process.env): HttpConfig {
--- a/src/mcplocal/src/http/project-mcp-endpoint.ts
+++ b/src/mcplocal/src/http/project-mcp-endpoint.ts
@@ -13,7 +13,8 @@ import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/
 import type { JSONRPCMessage } from '@modelcontextprotocol/sdk/types.js';
 import { McpRouter } from '../router.js';
 import { ResponsePaginator } from '../llm/pagination.js';
-import { refreshProjectUpstreams } from '../discovery.js';
+import { refreshProjectUpstreams, fetchProjectLlmConfig } from '../discovery.js';
+import { loadProjectLlmOverride } from './config.js';
 import type { McpdClient } from './mcpd-client.js';
 import type { ProviderRegistry } from '../providers/registry.js';
 import type { JsonRpcRequest } from '../types.js';
@@ -46,8 +47,13 @@ export function registerProjectMcpEndpoint(app: FastifyInstance, mcpdClient: Mcp
    const router = existing?.router ?? new McpRouter();
    await refreshProjectUpstreams(router, mcpdClient, projectName, authToken);

-    // Wire pagination support with LLM provider if configured
-    router.setPaginator(new ResponsePaginator(providerRegistry ?? null));
+    // Resolve project LLM model: local override → mcpd recommendation → global default
+    const localOverride = loadProjectLlmOverride(projectName);
+    const mcpdConfig = await fetchProjectLlmConfig(mcpdClient, projectName);
+    const resolvedModel = localOverride?.model ?? mcpdConfig.llmModel ?? undefined;
+
+    // Wire pagination support with LLM provider and project model override
+    router.setPaginator(new ResponsePaginator(providerRegistry ?? null, {}, resolvedModel));

    // Configure prompt resources with SA-scoped client for RBAC
    const saClient = mcpdClient.withHeaders({ 'X-Service-Account': `project:${projectName}` });
--- a/src/mcplocal/src/http/server.ts
+++ b/src/mcplocal/src/http/server.ts
@@ -109,6 +109,21 @@ export async function createHttpServer(
    }
  });

+  // LLM models — list available models from the active provider
+  app.get('/llm/models', async (_request, reply) => {
+    const provider = deps.providerRegistry?.getActive() ?? null;
+    if (!provider) {
+      reply.code(200).send({ models: [], provider: null });
+      return;
+    }
+    try {
+      const models = await provider.listModels();
+      reply.code(200).send({ models, provider: provider.name });
+    } catch {
+      reply.code(200).send({ models: [], provider: provider.name });
+    }
+  });
+
  // Proxy management routes to mcpd
  const mcpdClient = new McpdClient(config.mcpdUrl, config.mcpdToken);
  registerProxyRoutes(app, mcpdClient);
--- a/src/mcplocal/src/llm/pagination.ts
+++ b/src/mcplocal/src/llm/pagination.ts
@@ -105,6 +105,7 @@ export class ResponsePaginator {
  constructor(
    private providers: ProviderRegistry | null,
    config: Partial<PaginationConfig> = {},
+    private modelOverride?: string,
  ) {
    this.config = { ...DEFAULT_PAGINATION_CONFIG, ...config };
  }
@@ -129,7 +130,8 @@ export class ResponsePaginator {

    try {
      index = await this.generateSmartIndex(resultId, toolName, raw, pages);
-    } catch {
+    } catch (err) {
+      console.error(`[pagination] Smart index failed for ${toolName}, falling back to simple:`, err instanceof Error ? err.message : String(err));
      index = this.generateSimpleIndex(resultId, toolName, raw, pages);
    }

@@ -259,9 +261,12 @@ export class ResponsePaginator {
      ],
      maxTokens: this.config.indexMaxTokens,
      temperature: 0,
+      ...(this.modelOverride ? { model: this.modelOverride } : {}),
    });

-    const summaries = JSON.parse(result.content) as Array<{ page: number; summary: string }>;
+    // LLMs often wrap JSON in ```json ... ``` fences — strip them
+    const cleaned = result.content.replace(/^```(?:json)?\s*\n?/i, '').replace(/\n?```\s*$/i, '').trim();
+    const summaries = JSON.parse(cleaned) as Array<{ page: number; summary: string }>;

    return {
      resultId,
--- a/src/mcplocal/src/providers/gemini-acp.ts
+++ b/src/mcplocal/src/providers/gemini-acp.ts
@@ -11,39 +11,56 @@ export interface GeminiAcpConfig {
  defaultModel?: string;
  requestTimeoutMs?: number;
  initTimeoutMs?: number;
+  /** Idle TTL for pooled sessions in ms (default: 8 hours) */
+  idleTtlMs?: number;
  /** Override for testing — passed through to AcpClient */
  spawn?: AcpClientConfig['spawn'];
 }

+interface PoolEntry {
+  client: AcpClient;
+  lastUsed: number;
+  queue: Promise<void>;
+}
+
 /**
 * Gemini CLI provider using ACP (Agent Client Protocol) mode.
- * Keeps the gemini process alive as a persistent subprocess, eliminating
- * the ~10s cold-start per call. Auto-restarts on crash or timeout.
+ *
+ * Maintains a pool of persistent subprocesses keyed by model name.
+ * Each model gets its own `gemini --experimental-acp` subprocess with
+ * a serial request queue. Idle sessions are evicted after 8 hours.
+ *
+ * NOTE: Gemini ACP currently doesn't support per-session model selection,
+ * so all sessions use the same model. The pool infrastructure is ready for
+ * when vLLM/OpenAI providers are added (they support per-request model).
 */
 export class GeminiAcpProvider implements LlmProvider {
  readonly name = 'gemini-cli';
-  private client: AcpClient;
+  private pool = new Map<string, PoolEntry>();
  private binaryPath: string;
  private defaultModel: string;
-  private queue: Promise<void> = Promise.resolve();
+  private readonly requestTimeoutMs: number;
+  private readonly initTimeoutMs: number;
+  private readonly idleTtlMs: number;
+  private readonly spawnOverride?: AcpClientConfig['spawn'];

  constructor(config?: GeminiAcpConfig) {
    this.binaryPath = config?.binaryPath ?? 'gemini';
    this.defaultModel = config?.defaultModel ?? 'gemini-2.5-flash';
-
-    const acpConfig: AcpClientConfig = {
-      binaryPath: this.binaryPath,
-      model: this.defaultModel,
-      requestTimeoutMs: config?.requestTimeoutMs ?? 60_000,
-      initTimeoutMs: config?.initTimeoutMs ?? 30_000,
-    };
-    if (config?.spawn) acpConfig.spawn = config.spawn;
-
-    this.client = new AcpClient(acpConfig);
+    this.requestTimeoutMs = config?.requestTimeoutMs ?? 60_000;
+    this.initTimeoutMs = config?.initTimeoutMs ?? 30_000;
+    this.idleTtlMs = config?.idleTtlMs ?? 8 * 60 * 60 * 1000; // 8 hours
+    if (config?.spawn) this.spawnOverride = config.spawn;
  }

  async complete(options: CompletionOptions): Promise<CompletionResult> {
-    return this.enqueue(() => this.doComplete(options));
+    const model = options.model ?? this.defaultModel;
+    const entry = this.getOrCreateEntry(model);
+    entry.lastUsed = Date.now();
+
+    this.evictIdle();
+
+    return this.enqueue(entry, () => this.doComplete(entry.client, options));
  }

  async listModels(): Promise<string[]> {
@@ -60,12 +77,51 @@ export class GeminiAcpProvider implements LlmProvider {
  }

  dispose(): void {
-    this.client.dispose();
+    for (const entry of this.pool.values()) {
+      entry.client.dispose();
+    }
+    this.pool.clear();
+  }
+
+  /** Number of active pool entries (for testing). */
+  get poolSize(): number {
+    return this.pool.size;
  }

  // --- Private ---

-  private async doComplete(options: CompletionOptions): Promise<CompletionResult> {
+  private getOrCreateEntry(model: string): PoolEntry {
+    const existing = this.pool.get(model);
+    if (existing) return existing;
+
+    const acpConfig: AcpClientConfig = {
+      binaryPath: this.binaryPath,
+      model,
+      requestTimeoutMs: this.requestTimeoutMs,
+      initTimeoutMs: this.initTimeoutMs,
+    };
+    if (this.spawnOverride) acpConfig.spawn = this.spawnOverride;
+
+    const entry: PoolEntry = {
+      client: new AcpClient(acpConfig),
+      lastUsed: Date.now(),
+      queue: Promise.resolve(),
+    };
+    this.pool.set(model, entry);
+    return entry;
+  }
+
+  private evictIdle(): void {
+    const now = Date.now();
+    for (const [model, entry] of this.pool) {
+      if (now - entry.lastUsed > this.idleTtlMs) {
+        entry.client.dispose();
+        this.pool.delete(model);
+      }
+    }
+  }
+
+  private async doComplete(client: AcpClient, options: CompletionOptions): Promise<CompletionResult> {
    const prompt = options.messages
      .map((m) => {
        if (m.role === 'system') return `System: ${m.content}`;
@@ -75,7 +131,7 @@ export class GeminiAcpProvider implements LlmProvider {
      })
      .join('\n\n');

-    const content = await this.client.prompt(prompt);
+    const content = await client.prompt(prompt);

    return {
      content: content.trim(),
@@ -85,9 +141,9 @@ export class GeminiAcpProvider implements LlmProvider {
    };
  }

-  private enqueue<T>(fn: () => Promise<T>): Promise<T> {
+  private enqueue<T>(entry: PoolEntry, fn: () => Promise<T>): Promise<T> {
    const result = new Promise<T>((resolve, reject) => {
-      this.queue = this.queue.then(
+      entry.queue = entry.queue.then(
        () => fn().then(resolve, reject),
        () => fn().then(resolve, reject),
      );
--- a/src/mcplocal/tests/gemini-acp.test.ts
+++ b/src/mcplocal/tests/gemini-acp.test.ts
@@ -69,7 +69,7 @@ describe('GeminiAcpProvider', () => {
      expect(result.content).toBe('padded response');
    });

-    it('serializes concurrent calls', async () => {
+    it('serializes concurrent calls to same model', async () => {
      const callOrder: number[] = [];
      let callCount = 0;

@@ -110,6 +110,70 @@ describe('GeminiAcpProvider', () => {
    });
  });

+  describe('session pool', () => {
+    it('creates separate pool entries for different models', async () => {
+      mockPrompt.mockResolvedValue('ok');
+
+      await provider.complete({ messages: [{ role: 'user', content: 'a' }], model: 'gemini-2.5-flash' });
+      await provider.complete({ messages: [{ role: 'user', content: 'b' }], model: 'gemini-2.5-pro' });
+
+      expect(provider.poolSize).toBe(2);
+    });
+
+    it('reuses existing pool entry for same model', async () => {
+      mockPrompt.mockResolvedValue('ok');
+
+      await provider.complete({ messages: [{ role: 'user', content: 'a' }], model: 'gemini-2.5-flash' });
+      await provider.complete({ messages: [{ role: 'user', content: 'b' }], model: 'gemini-2.5-flash' });
+
+      expect(provider.poolSize).toBe(1);
+    });
+
+    it('uses defaultModel when no model specified', async () => {
+      mockPrompt.mockResolvedValue('ok');
+
+      await provider.complete({ messages: [{ role: 'user', content: 'a' }] });
+
+      expect(provider.poolSize).toBe(1);
+    });
+
+    it('evicts idle sessions', async () => {
+      // Use a very short TTL for testing
+      const shortTtl = new GeminiAcpProvider({
+        binaryPath: '/usr/bin/gemini',
+        defaultModel: 'gemini-2.5-flash',
+        idleTtlMs: 1, // 1ms TTL
+      });
+
+      mockPrompt.mockResolvedValue('ok');
+      await shortTtl.complete({ messages: [{ role: 'user', content: 'a' }], model: 'model-a' });
+      expect(shortTtl.poolSize).toBe(1);
+
+      // Wait for TTL to expire
+      await new Promise((r) => setTimeout(r, 10));
+
+      // Next complete call triggers eviction of old entry and creates new one
+      await shortTtl.complete({ messages: [{ role: 'user', content: 'b' }], model: 'model-b' });
+      // model-a should have been evicted, only model-b remains
+      expect(shortTtl.poolSize).toBe(1);
+      expect(mockDispose).toHaveBeenCalled();
+
+      shortTtl.dispose();
+    });
+
+    it('dispose kills all pooled clients', async () => {
+      mockPrompt.mockResolvedValue('ok');
+
+      await provider.complete({ messages: [{ role: 'user', content: 'a' }], model: 'model-a' });
+      await provider.complete({ messages: [{ role: 'user', content: 'b' }], model: 'model-b' });
+      expect(provider.poolSize).toBe(2);
+
+      provider.dispose();
+      expect(provider.poolSize).toBe(0);
+      expect(mockDispose).toHaveBeenCalledTimes(2);
+    });
+  });
+
  describe('listModels', () => {
    it('returns static model list', async () => {
      const models = await provider.listModels();
@@ -120,7 +184,9 @@ describe('GeminiAcpProvider', () => {
  });

  describe('dispose', () => {
-    it('delegates to AcpClient', () => {
+    it('delegates to all pooled AcpClients', async () => {
+      mockPrompt.mockResolvedValue('ok');
+      await provider.complete({ messages: [{ role: 'user', content: 'test' }] });
      provider.dispose();
      expect(mockDispose).toHaveBeenCalled();
    });
--- a/src/mcplocal/tests/http/config.test.ts
+++ b/src/mcplocal/tests/http/config.test.ts
@@ -1,5 +1,5 @@
-import { describe, it, expect, vi, afterEach } from 'vitest';
-import { loadLlmConfig } from '../../src/http/config.js';
+import { describe, it, expect, vi, afterEach, beforeEach } from 'vitest';
+import { loadLlmConfig, resetConfigCache } from '../../src/http/config.js';
 import { existsSync, readFileSync } from 'node:fs';

 vi.mock('node:fs', async () => {
@@ -11,6 +11,10 @@ vi.mock('node:fs', async () => {
  };
 });

+beforeEach(() => {
+  resetConfigCache();
+});
+
 afterEach(() => {
  vi.restoreAllMocks();
 });
--- a/src/mcplocal/tests/integration/e2e-flow.test.ts
+++ b/src/mcplocal/tests/integration/e2e-flow.test.ts
@@ -6,13 +6,14 @@
 * (node:http) and a mock LLM provider. No Docker or external services needed.
 */

-import { describe, it, expect, beforeEach, afterEach, afterAll } from 'vitest';
+import { describe, it, expect, vi, beforeEach, afterEach, afterAll } from 'vitest';
 import { createServer, type Server, type IncomingMessage, type ServerResponse } from 'node:http';

 import { McpRouter } from '../../src/router.js';
 import { McpdUpstream } from '../../src/upstream/mcpd.js';
 import { McpdClient } from '../../src/http/mcpd-client.js';
 import { LlmProcessor, DEFAULT_PROCESSOR_CONFIG } from '../../src/llm/processor.js';
+import { ResponsePaginator } from '../../src/llm/pagination.js';
 import { ProviderRegistry } from '../../src/providers/registry.js';
 import { TieredHealthMonitor } from '../../src/health/tiered.js';
 import { refreshUpstreams } from '../../src/discovery.js';
@@ -1096,4 +1097,429 @@ describe('End-to-end integration: 3-tier architecture', () => {
      }
    });
  });
+
+  // -----------------------------------------------------------------------
+  // 8. Smart pagination through the full pipeline
+  // -----------------------------------------------------------------------
+  describe('Smart pagination', () => {
+    // Helper: generate a large JSON response (~100KB)
+    function makeLargeToolResult(): { flows: Array<{ id: string; type: string; label: string; wires: string[] }> } {
+      return {
+        flows: Array.from({ length: 200 }, (_, i) => ({
+          id: `flow-${String(i).padStart(4, '0')}`,
+          type: i % 3 === 0 ? 'function' : i % 3 === 1 ? 'http request' : 'inject',
+          label: `Node ${String(i)}: ${i % 3 === 0 ? 'Data transform' : i % 3 === 1 ? 'API call' : 'Timer trigger'}`,
+          wires: [`flow-${String(i + 1).padStart(4, '0')}`],
+        })),
+      };
+    }
+
+    it('paginates large tool response with smart AI summaries through router', async () => {
+      const largeResult = makeLargeToolResult();
+
+      mockMcpd = await startMockMcpd({
+        servers: [{ id: 'srv-nodered', name: 'node-red', transport: 'stdio' }],
+        proxyResponses: new Map([
+          ['srv-nodered:tools/list', {
+            result: { tools: [{ name: 'get_flows', description: 'Get all flows' }] },
+          }],
+          ['srv-nodered:tools/call', {
+            result: largeResult,
+          }],
+        ]),
+      });
+
+      const client = new McpdClient(mockMcpd.baseUrl, mockMcpd.config.expectedToken);
+      router = new McpRouter();
+      await refreshUpstreams(router, client);
+      await router.discoverTools();
+
+      // Set up paginator with LLM provider for smart summaries
+      const registry = new ProviderRegistry();
+      const completeFn = vi.fn().mockImplementation(() => ({
+        content: JSON.stringify([
+          { page: 1, summary: 'Function nodes and data transforms (flow-0000 through flow-0050)' },
+          { page: 2, summary: 'HTTP request nodes and API integrations (flow-0051 through flow-0100)' },
+          { page: 3, summary: 'Inject/timer nodes and triggers (flow-0101 through flow-0150)' },
+          { page: 4, summary: 'Remaining nodes and wire connections (flow-0151 through flow-0199)' },
+        ]),
+      }));
+      const mockProvider: LlmProvider = {
+        name: 'test-paginator',
+        isAvailable: () => true,
+        complete: completeFn,
+      };
+      registry.register(mockProvider);
+
+      // Low threshold so our response triggers pagination
+      const paginator = new ResponsePaginator(registry, {
+        sizeThreshold: 1000,
+        pageSize: 8000,
+      });
+      router.setPaginator(paginator);
+
+      // Call the tool — should get pagination index, not raw data
+      const response = await router.route({
+        jsonrpc: '2.0',
+        id: 'paginate-1',
+        method: 'tools/call',
+        params: { name: 'node-red/get_flows', arguments: {} },
+      });
+
+      expect(response.error).toBeUndefined();
+      const result = response.result as { content: Array<{ type: string; text: string }> };
+      expect(result.content).toHaveLength(1);
+      const indexText = result.content[0]!.text;
+
+      // Verify smart index with AI summaries
+      expect(indexText).toContain('AI-generated summaries');
+      expect(indexText).toContain('Function nodes and data transforms');
+      expect(indexText).toContain('HTTP request nodes');
+      expect(indexText).toContain('_resultId');
+      expect(indexText).toContain('_page');
+
+      // LLM was called to generate summaries
+      expect(completeFn).toHaveBeenCalledOnce();
+      const llmCall = completeFn.mock.calls[0]![0]!;
+      expect(llmCall.messages[0].role).toBe('system');
+      expect(llmCall.messages[1].content).toContain('node-red/get_flows');
+    });
+
+    it('retrieves specific pages after pagination via _resultId/_page', async () => {
+      const largeResult = makeLargeToolResult();
+
+      mockMcpd = await startMockMcpd({
+        servers: [{ id: 'srv-nodered', name: 'node-red', transport: 'stdio' }],
+        proxyResponses: new Map([
+          ['srv-nodered:tools/list', {
+            result: { tools: [{ name: 'get_flows', description: 'Get all flows' }] },
+          }],
+          ['srv-nodered:tools/call', {
+            result: largeResult,
+          }],
+        ]),
+      });
+
+      const client = new McpdClient(mockMcpd.baseUrl, mockMcpd.config.expectedToken);
+      router = new McpRouter();
+      await refreshUpstreams(router, client);
+      await router.discoverTools();
+
+      // Simple paginator (no LLM) for predictable behavior
+      const paginator = new ResponsePaginator(null, {
+        sizeThreshold: 1000,
+        pageSize: 8000,
+      });
+      router.setPaginator(paginator);
+
+      // First call — get the pagination index
+      const indexResponse = await router.route({
+        jsonrpc: '2.0',
+        id: 'idx-1',
+        method: 'tools/call',
+        params: { name: 'node-red/get_flows', arguments: {} },
+      });
+
+      expect(indexResponse.error).toBeUndefined();
+      const indexResult = indexResponse.result as { content: Array<{ text: string }> };
+      const indexText = indexResult.content[0]!.text;
+      const resultIdMatch = /"_resultId": "([^"]+)"/.exec(indexText);
+      expect(resultIdMatch).not.toBeNull();
+      const resultId = resultIdMatch![1]!;
+
+      // Second call — retrieve page 1 via _resultId/_page
+      const page1Response = await router.route({
+        jsonrpc: '2.0',
+        id: 'page-1',
+        method: 'tools/call',
+        params: {
+          name: 'node-red/get_flows',
+          arguments: { _resultId: resultId, _page: 1 },
+        },
+      });
+
+      expect(page1Response.error).toBeUndefined();
+      const page1Result = page1Response.result as { content: Array<{ text: string }> };
+      expect(page1Result.content[0]!.text).toContain('Page 1/');
+      // Page content should contain flow data
+      expect(page1Result.content[0]!.text).toContain('flow-');
+
+      // Third call — retrieve page 2
+      const page2Response = await router.route({
+        jsonrpc: '2.0',
+        id: 'page-2',
+        method: 'tools/call',
+        params: {
+          name: 'node-red/get_flows',
+          arguments: { _resultId: resultId, _page: 2 },
+        },
+      });
+
+      expect(page2Response.error).toBeUndefined();
+      const page2Result = page2Response.result as { content: Array<{ text: string }> };
+      expect(page2Result.content[0]!.text).toContain('Page 2/');
+    });
+
+    it('retrieves full content with _page=all', async () => {
+      const largeResult = makeLargeToolResult();
+
+      mockMcpd = await startMockMcpd({
+        servers: [{ id: 'srv-nodered', name: 'node-red', transport: 'stdio' }],
+        proxyResponses: new Map([
+          ['srv-nodered:tools/list', {
+            result: { tools: [{ name: 'get_flows', description: 'Get all flows' }] },
+          }],
+          ['srv-nodered:tools/call', {
+            result: largeResult,
+          }],
+        ]),
+      });
+
+      const client = new McpdClient(mockMcpd.baseUrl, mockMcpd.config.expectedToken);
+      router = new McpRouter();
+      await refreshUpstreams(router, client);
+      await router.discoverTools();
+
+      const paginator = new ResponsePaginator(null, {
+        sizeThreshold: 1000,
+        pageSize: 8000,
+      });
+      router.setPaginator(paginator);
+
+      // Get index
+      const indexResponse = await router.route({
+        jsonrpc: '2.0',
+        id: 'all-idx',
+        method: 'tools/call',
+        params: { name: 'node-red/get_flows', arguments: {} },
+      });
+      const indexText = (indexResponse.result as { content: Array<{ text: string }> }).content[0]!.text;
+      const resultId = /"_resultId": "([^"]+)"/.exec(indexText)![1]!;
+
+      // Request all pages
+      const allResponse = await router.route({
+        jsonrpc: '2.0',
+        id: 'all-1',
+        method: 'tools/call',
+        params: {
+          name: 'node-red/get_flows',
+          arguments: { _resultId: resultId, _page: 'all' },
+        },
+      });
+
+      expect(allResponse.error).toBeUndefined();
+      const allResult = allResponse.result as { content: Array<{ text: string }> };
+      // Full response should contain the original JSON
+      const fullText = allResult.content[0]!.text;
+      expect(fullText).toContain('flow-0000');
+      expect(fullText).toContain('flow-0199');
+      // Should be the full serialized result
+      expect(JSON.parse(fullText)).toEqual(largeResult);
+    });
+
+    it('falls back to simple index when LLM fails', async () => {
+      const largeResult = makeLargeToolResult();
+
+      mockMcpd = await startMockMcpd({
+        servers: [{ id: 'srv-nodered', name: 'node-red', transport: 'stdio' }],
+        proxyResponses: new Map([
+          ['srv-nodered:tools/list', {
+            result: { tools: [{ name: 'get_flows', description: 'Get all flows' }] },
+          }],
+          ['srv-nodered:tools/call', {
+            result: largeResult,
+          }],
+        ]),
+      });
+
+      const client = new McpdClient(mockMcpd.baseUrl, mockMcpd.config.expectedToken);
+      router = new McpRouter();
+      await refreshUpstreams(router, client);
+      await router.discoverTools();
+
+      // Set up paginator with a failing LLM
+      const registry = new ProviderRegistry();
+      registry.register(createFailingLlmProvider('broken-llm'));
+      const paginator = new ResponsePaginator(registry, {
+        sizeThreshold: 1000,
+        pageSize: 8000,
+      });
+      router.setPaginator(paginator);
+
+      const response = await router.route({
+        jsonrpc: '2.0',
+        id: 'fallback-idx',
+        method: 'tools/call',
+        params: { name: 'node-red/get_flows', arguments: {} },
+      });
+
+      expect(response.error).toBeUndefined();
+      const text = (response.result as { content: Array<{ text: string }> }).content[0]!.text;
+      // Should still paginate, just without AI summaries
+      expect(text).toContain('_resultId');
+      expect(text).not.toContain('AI-generated summaries');
+      expect(text).toContain('Page 1:');
+    });
+
+    it('returns expired cache message for stale _resultId', async () => {
+      router = new McpRouter();
+      const paginator = new ResponsePaginator(null, { sizeThreshold: 100, pageSize: 50 });
+      router.setPaginator(paginator);
+
+      // Try to retrieve a page with an unknown resultId
+      const response = await router.route({
+        jsonrpc: '2.0',
+        id: 'stale-1',
+        method: 'tools/call',
+        params: {
+          name: 'anything/tool',
+          arguments: { _resultId: 'nonexistent-id', _page: 1 },
+        },
+      });
+
+      expect(response.error).toBeUndefined();
+      const text = (response.result as { content: Array<{ text: string }> }).content[0]!.text;
+      expect(text).toContain('expired');
+      expect(text).toContain('re-call');
+    });
+
+    it('skips pagination for small responses', async () => {
+      mockMcpd = await startMockMcpd({
+        servers: [{ id: 'srv-small', name: 'smallserver', transport: 'stdio' }],
+        proxyResponses: new Map([
+          ['srv-small:tools/list', {
+            result: { tools: [{ name: 'get_status', description: 'Get status' }] },
+          }],
+          ['srv-small:tools/call', {
+            result: { status: 'ok', uptime: 12345 },
+          }],
+        ]),
+      });
+
+      const client = new McpdClient(mockMcpd.baseUrl, mockMcpd.config.expectedToken);
+      router = new McpRouter();
+      await refreshUpstreams(router, client);
+      await router.discoverTools();
+
+      const paginator = new ResponsePaginator(null, { sizeThreshold: 80000, pageSize: 40000 });
+      router.setPaginator(paginator);
+
+      const response = await router.route({
+        jsonrpc: '2.0',
+        id: 'small-1',
+        method: 'tools/call',
+        params: { name: 'smallserver/get_status', arguments: {} },
+      });
+
+      expect(response.error).toBeUndefined();
+      // Should return the raw result directly, not a pagination index
+      expect(response.result).toEqual({ status: 'ok', uptime: 12345 });
+    });
+
+    it('handles markdown-fenced LLM responses (Gemini quirk)', async () => {
+      const largeResult = makeLargeToolResult();
+
+      mockMcpd = await startMockMcpd({
+        servers: [{ id: 'srv-nodered', name: 'node-red', transport: 'stdio' }],
+        proxyResponses: new Map([
+          ['srv-nodered:tools/list', {
+            result: { tools: [{ name: 'get_flows', description: 'Get all flows' }] },
+          }],
+          ['srv-nodered:tools/call', {
+            result: largeResult,
+          }],
+        ]),
+      });
+
+      const client = new McpdClient(mockMcpd.baseUrl, mockMcpd.config.expectedToken);
+      router = new McpRouter();
+      await refreshUpstreams(router, client);
+      await router.discoverTools();
+
+      // Simulate Gemini wrapping JSON in ```json fences
+      const registry = new ProviderRegistry();
+      const mockProvider: LlmProvider = {
+        name: 'gemini-mock',
+        isAvailable: () => true,
+        complete: vi.fn().mockResolvedValue({
+          content: '```json\n' + JSON.stringify([
+            { page: 1, summary: 'Climate automation flows' },
+            { page: 2, summary: 'Lighting control flows' },
+          ]) + '\n```',
+        }),
+      };
+      registry.register(mockProvider);
+
+      const paginator = new ResponsePaginator(registry, {
+        sizeThreshold: 1000,
+        pageSize: 8000,
+      });
+      router.setPaginator(paginator);
+
+      const response = await router.route({
+        jsonrpc: '2.0',
+        id: 'fence-1',
+        method: 'tools/call',
+        params: { name: 'node-red/get_flows', arguments: {} },
+      });
+
+      expect(response.error).toBeUndefined();
+      const text = (response.result as { content: Array<{ text: string }> }).content[0]!.text;
+      // Fences were stripped — smart summaries should appear
+      expect(text).toContain('AI-generated summaries');
+      expect(text).toContain('Climate automation flows');
+      expect(text).toContain('Lighting control flows');
+    });
+
+    it('passes model override to LLM when project has custom model', async () => {
+      const largeResult = makeLargeToolResult();
+
+      mockMcpd = await startMockMcpd({
+        servers: [{ id: 'srv-nodered', name: 'node-red', transport: 'stdio' }],
+        proxyResponses: new Map([
+          ['srv-nodered:tools/list', {
+            result: { tools: [{ name: 'get_flows', description: 'Get all flows' }] },
+          }],
+          ['srv-nodered:tools/call', {
+            result: largeResult,
+          }],
+        ]),
+      });
+
+      const client = new McpdClient(mockMcpd.baseUrl, mockMcpd.config.expectedToken);
+      router = new McpRouter();
+      await refreshUpstreams(router, client);
+      await router.discoverTools();
+
+      const registry = new ProviderRegistry();
+      const completeFn = vi.fn().mockResolvedValue({
+        content: JSON.stringify([{ page: 1, summary: 'test' }]),
+      });
+      const mockProvider: LlmProvider = {
+        name: 'test-model-override',
+        isAvailable: () => true,
+        complete: completeFn,
+      };
+      registry.register(mockProvider);
+
+      // Paginator with per-project model override
+      const paginator = new ResponsePaginator(registry, {
+        sizeThreshold: 1000,
+        pageSize: 80000, // One big page so we get exactly 1 summary
+      }, 'gemini-2.5-pro');
+      router.setPaginator(paginator);
+
+      await router.route({
+        jsonrpc: '2.0',
+        id: 'model-1',
+        method: 'tools/call',
+        params: { name: 'node-red/get_flows', arguments: {} },
+      });
+
+      // Verify the model was passed through to the LLM call
+      expect(completeFn).toHaveBeenCalledOnce();
+      const llmOpts = completeFn.mock.calls[0]![0]!;
+      expect(llmOpts.model).toBe('gemini-2.5-pro');
+    });
+  });
 });
--- a/src/mcplocal/tests/pagination.test.ts
+++ b/src/mcplocal/tests/pagination.test.ts
@@ -150,6 +150,25 @@ describe('ResponsePaginator', () => {
      expect(text).toContain('HTTP request nodes and API integrations');
    });

+    it('strips markdown code fences from LLM JSON response', async () => {
+      const summaries = [
+        { page: 1, summary: 'Config section' },
+        { page: 2, summary: 'Data section' },
+      ];
+      // Gemini often wraps JSON in ```json ... ``` fences
+      const fenced = '```json\n' + JSON.stringify(summaries) + '\n```';
+      const registry = makeProvider(fenced);
+      const paginator = new ResponsePaginator(registry, { sizeThreshold: 100, pageSize: 60 });
+      const raw = makeLargeStringWithNewlines(150);
+      const result = await paginator.paginate('test/tool', raw);
+
+      expect(result).not.toBeNull();
+      const text = result!.content[0]!.text;
+      expect(text).toContain('AI-generated summaries');
+      expect(text).toContain('Config section');
+      expect(text).toContain('Data section');
+    });
+
    it('falls back to simple index on LLM failure', async () => {
      const provider: LlmProvider = {
        name: 'test',
@@ -225,6 +244,56 @@ describe('ResponsePaginator', () => {
      const text = result!.content[0]!.text;
      expect(text).not.toContain('AI-generated summaries');
    });
+
+    it('passes modelOverride to provider.complete()', async () => {
+      const completeFn = vi.fn().mockResolvedValue({
+        content: JSON.stringify([{ page: 1, summary: 'test' }, { page: 2, summary: 'test2' }]),
+      });
+      const provider: LlmProvider = {
+        name: 'test',
+        isAvailable: () => true,
+        complete: completeFn,
+      };
+      const registry = {
+        getActive: () => provider,
+        register: vi.fn(),
+        setActive: vi.fn(),
+        listProviders: () => [{ name: 'test', available: true, active: true }],
+      } as unknown as ProviderRegistry;
+
+      const paginator = new ResponsePaginator(registry, { sizeThreshold: 100, pageSize: 60 }, 'gemini-2.5-pro');
+      const raw = makeLargeStringWithNewlines(150);
+      await paginator.paginate('test/tool', raw);
+
+      expect(completeFn).toHaveBeenCalledOnce();
+      const call = completeFn.mock.calls[0]![0]!;
+      expect(call.model).toBe('gemini-2.5-pro');
+    });
+
+    it('omits model when no modelOverride set', async () => {
+      const completeFn = vi.fn().mockResolvedValue({
+        content: JSON.stringify([{ page: 1, summary: 'test' }, { page: 2, summary: 'test2' }]),
+      });
+      const provider: LlmProvider = {
+        name: 'test',
+        isAvailable: () => true,
+        complete: completeFn,
+      };
+      const registry = {
+        getActive: () => provider,
+        register: vi.fn(),
+        setActive: vi.fn(),
+        listProviders: () => [{ name: 'test', available: true, active: true }],
+      } as unknown as ProviderRegistry;
+
+      const paginator = new ResponsePaginator(registry, { sizeThreshold: 100, pageSize: 60 });
+      const raw = makeLargeStringWithNewlines(150);
+      await paginator.paginate('test/tool', raw);
+
+      expect(completeFn).toHaveBeenCalledOnce();
+      const call = completeFn.mock.calls[0]![0]!;
+      expect(call.model).toBeUndefined();
+    });
  });

  // --- getPage ---
--- a/src/mcplocal/tests/project-mcp-endpoint.test.ts
+++ b/src/mcplocal/tests/project-mcp-endpoint.test.ts
@@ -6,8 +6,18 @@ import { registerProjectMcpEndpoint } from '../src/http/project-mcp-endpoint.js'
 // Mock discovery module — we don't want real HTTP calls
 vi.mock('../src/discovery.js', () => ({
  refreshProjectUpstreams: vi.fn(async () => ['mock-server']),
+  fetchProjectLlmConfig: vi.fn(async () => ({})),
 }));

+// Mock config module — don't read real config files
+vi.mock('../src/http/config.js', async () => {
+  const actual = await vi.importActual<typeof import('../src/http/config.js')>('../src/http/config.js');
+  return {
+    ...actual,
+    loadProjectLlmOverride: vi.fn(() => undefined),
+  };
+});
+
 import { refreshProjectUpstreams } from '../src/discovery.js';

 function mockMcpdClient() {