Files
mcpctl/src/mcplocal/src/llm/pagination.ts
Michal 9ce705608b
Some checks failed
CI / lint (pull_request) Has been cancelled
CI / typecheck (pull_request) Has been cancelled
CI / test (pull_request) Has been cancelled
CI / build (pull_request) Has been cancelled
CI / package (pull_request) Has been cancelled
feat: tiered LLM providers (fast/heavy) with multi-provider config
Adds tier-based LLM routing so fast local models (vLLM, Ollama) handle
structured tasks while cloud models (Gemini, Anthropic) are reserved for
heavy reasoning. Single-provider configs continue to work via fallback.

- Tier type + ProviderRegistry with assignTier/getProvider/fallback chain
- Multi-provider config format: { providers: [{ name, type, tier, ... }] }
- NamedProvider wrapper for multiple instances of same provider type
- Setup wizard: Simple (legacy) / Advanced (fast+heavy tiers) modes
- Status display: tiered view with /llm/providers endpoint
- Call sites use getProvider('fast') instead of getActive()
- Full backward compatibility with existing single-provider configs

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-25 02:16:08 +00:00

360 lines
11 KiB
TypeScript

import { randomUUID } from 'node:crypto';
import type { ProviderRegistry } from '../providers/registry.js';
import { estimateTokens } from './token-counter.js';
// --- Configuration ---
export interface PaginationConfig {
/** Character threshold above which responses get paginated (default 80_000) */
sizeThreshold: number;
/** Characters per page (default 40_000) */
pageSize: number;
/** Max cached results (LRU eviction) (default 64) */
maxCachedResults: number;
/** TTL for cached results in ms (default 300_000 = 5 min) */
ttlMs: number;
/** Max tokens for the LLM index generation call (default 2048) */
indexMaxTokens: number;
}
export const DEFAULT_PAGINATION_CONFIG: PaginationConfig = {
sizeThreshold: 80_000,
pageSize: 40_000,
maxCachedResults: 64,
ttlMs: 300_000,
indexMaxTokens: 2048,
};
// --- Cache Entry ---
interface PageInfo {
/** 0-based page index */
index: number;
/** Start character offset in the raw string */
startChar: number;
/** End character offset (exclusive) */
endChar: number;
/** Approximate token count */
estimatedTokens: number;
}
interface CachedResult {
resultId: string;
toolName: string;
raw: string;
pages: PageInfo[];
index: PaginationIndex;
createdAt: number;
}
// --- Index Types ---
export interface PageSummary {
page: number;
startChar: number;
endChar: number;
estimatedTokens: number;
summary: string;
}
export interface PaginationIndex {
resultId: string;
toolName: string;
totalSize: number;
totalTokens: number;
totalPages: number;
pageSummaries: PageSummary[];
indexType: 'smart' | 'simple';
}
// --- The MCP response format ---
export interface PaginatedToolResponse {
content: Array<{
type: 'text';
text: string;
}>;
}
// --- LLM Prompt ---
export const PAGINATION_INDEX_SYSTEM_PROMPT = `You are a document indexing assistant. Given a large tool response split into pages, generate a concise summary for each page describing what data it contains.
Rules:
- For each page, write 1-2 sentences describing the key content
- Be specific: mention entity names, IDs, counts, or key fields visible on that page
- If it's JSON, describe the structure and notable entries
- If it's text, describe the topics covered
- Output valid JSON only: an array of objects with "page" (1-based number) and "summary" (string)
- Example output: [{"page": 1, "summary": "Configuration nodes and global settings (inject, debug, function nodes 1-15)"}, {"page": 2, "summary": "HTTP request nodes and API integrations (nodes 16-40)"}]`;
/**
* Handles transparent pagination of large MCP tool responses.
*
* When a tool response exceeds the size threshold, it is cached and an
* index is returned instead. The LLM can then request specific pages
* via _page/_resultId parameters on subsequent tool calls.
*
* If an LLM provider is available, the index includes AI-generated
* per-page summaries. Otherwise, simple byte-range descriptions are used.
*/
export class ResponsePaginator {
private cache = new Map<string, CachedResult>();
private readonly config: PaginationConfig;
constructor(
private providers: ProviderRegistry | null,
config: Partial<PaginationConfig> = {},
private modelOverride?: string,
) {
this.config = { ...DEFAULT_PAGINATION_CONFIG, ...config };
}
/**
* Check if a raw response string should be paginated.
*/
shouldPaginate(raw: string): boolean {
return raw.length >= this.config.sizeThreshold;
}
/**
* Paginate a large response: cache it and return the index.
* Returns null if the response is below threshold.
*/
async paginate(toolName: string, raw: string): Promise<PaginatedToolResponse | null> {
if (!this.shouldPaginate(raw)) return null;
const resultId = randomUUID();
const pages = this.splitPages(raw);
let index: PaginationIndex;
try {
index = await this.generateSmartIndex(resultId, toolName, raw, pages);
} catch (err) {
console.error(`[pagination] Smart index failed for ${toolName}, falling back to simple:`, err instanceof Error ? err.message : String(err));
index = this.generateSimpleIndex(resultId, toolName, raw, pages);
}
// Store in cache
this.evictExpired();
this.evictLRU();
this.cache.set(resultId, {
resultId,
toolName,
raw,
pages,
index,
createdAt: Date.now(),
});
return this.formatIndexResponse(index);
}
/**
* Serve a specific page from cache.
* Returns null if the resultId is not found (cache miss / expired).
*/
getPage(resultId: string, page: number | 'all'): PaginatedToolResponse | null {
this.evictExpired();
const entry = this.cache.get(resultId);
if (!entry) return null;
if (page === 'all') {
return {
content: [{ type: 'text', text: entry.raw }],
};
}
// Pages are 1-based in the API
const pageInfo = entry.pages[page - 1];
if (!pageInfo) {
return {
content: [{
type: 'text',
text: `Error: page ${String(page)} is out of range. This result has ${String(entry.pages.length)} pages (1-${String(entry.pages.length)}).`,
}],
};
}
const pageContent = entry.raw.slice(pageInfo.startChar, pageInfo.endChar);
return {
content: [{
type: 'text',
text: `[Page ${String(page)}/${String(entry.pages.length)} of result ${resultId}]\n\n${pageContent}`,
}],
};
}
/**
* Check if a tool call has pagination parameters (_page / _resultId).
* Returns the parsed pagination request, or null if not a pagination request.
*/
static extractPaginationParams(
args: Record<string, unknown>,
): { resultId: string; page: number | 'all' } | null {
const resultId = args['_resultId'];
const pageParam = args['_page'];
if (typeof resultId !== 'string' || pageParam === undefined) return null;
if (pageParam === 'all') return { resultId, page: 'all' };
const page = Number(pageParam);
if (!Number.isInteger(page) || page < 1) return null;
return { resultId, page };
}
// --- Private methods ---
private splitPages(raw: string): PageInfo[] {
const pages: PageInfo[] = [];
let offset = 0;
let pageIndex = 0;
while (offset < raw.length) {
const end = Math.min(offset + this.config.pageSize, raw.length);
// Try to break at a newline boundary if we're not at the end
let breakAt = end;
if (end < raw.length) {
const lastNewline = raw.lastIndexOf('\n', end);
if (lastNewline > offset) {
breakAt = lastNewline + 1;
}
}
pages.push({
index: pageIndex,
startChar: offset,
endChar: breakAt,
estimatedTokens: estimateTokens(raw.slice(offset, breakAt)),
});
offset = breakAt;
pageIndex++;
}
return pages;
}
private async generateSmartIndex(
resultId: string,
toolName: string,
raw: string,
pages: PageInfo[],
): Promise<PaginationIndex> {
const provider = this.providers?.getProvider('fast');
if (!provider) {
return this.generateSimpleIndex(resultId, toolName, raw, pages);
}
// Build a prompt with page previews (first ~500 chars of each page)
const previews = pages.map((p, i) => {
const preview = raw.slice(p.startChar, Math.min(p.startChar + 500, p.endChar));
const truncated = p.endChar - p.startChar > 500 ? '\n[...]' : '';
return `--- Page ${String(i + 1)} (chars ${String(p.startChar)}-${String(p.endChar)}, ~${String(p.estimatedTokens)} tokens) ---\n${preview}${truncated}`;
}).join('\n\n');
const result = await provider.complete({
messages: [
{ role: 'system', content: PAGINATION_INDEX_SYSTEM_PROMPT },
{ role: 'user', content: `Tool: ${toolName}\nTotal size: ${String(raw.length)} chars, ${String(pages.length)} pages\n\n${previews}` },
],
maxTokens: this.config.indexMaxTokens,
temperature: 0,
...(this.modelOverride ? { model: this.modelOverride } : {}),
});
// LLMs often wrap JSON in ```json ... ``` fences — strip them
const cleaned = result.content.replace(/^```(?:json)?\s*\n?/i, '').replace(/\n?```\s*$/i, '').trim();
const summaries = JSON.parse(cleaned) as Array<{ page: number; summary: string }>;
return {
resultId,
toolName,
totalSize: raw.length,
totalTokens: estimateTokens(raw),
totalPages: pages.length,
indexType: 'smart',
pageSummaries: pages.map((p, i) => ({
page: i + 1,
startChar: p.startChar,
endChar: p.endChar,
estimatedTokens: p.estimatedTokens,
summary: summaries.find((s) => s.page === i + 1)?.summary ?? `Page ${String(i + 1)}`,
})),
};
}
private generateSimpleIndex(
resultId: string,
toolName: string,
raw: string,
pages: PageInfo[],
): PaginationIndex {
return {
resultId,
toolName,
totalSize: raw.length,
totalTokens: estimateTokens(raw),
totalPages: pages.length,
indexType: 'simple',
pageSummaries: pages.map((p, i) => ({
page: i + 1,
startChar: p.startChar,
endChar: p.endChar,
estimatedTokens: p.estimatedTokens,
summary: `Page ${String(i + 1)}: characters ${String(p.startChar)}-${String(p.endChar)} (~${String(p.estimatedTokens)} tokens)`,
})),
};
}
private formatIndexResponse(index: PaginationIndex): PaginatedToolResponse {
const lines = [
`This response is too large to return directly (${String(index.totalSize)} chars, ~${String(index.totalTokens)} tokens).`,
`It has been split into ${String(index.totalPages)} pages.`,
'',
'To retrieve a specific page, call this same tool again with additional arguments:',
` "_resultId": "${index.resultId}"`,
` "_page": <page_number> (1-${String(index.totalPages)})`,
' "_page": "all" (returns the full response)',
'',
`--- Page Index${index.indexType === 'smart' ? ' (AI-generated summaries)' : ''} ---`,
];
for (const page of index.pageSummaries) {
lines.push(` Page ${String(page.page)}: ${page.summary}`);
}
return {
content: [{ type: 'text', text: lines.join('\n') }],
};
}
private evictExpired(): void {
const now = Date.now();
for (const [id, entry] of this.cache) {
if (now - entry.createdAt > this.config.ttlMs) {
this.cache.delete(id);
}
}
}
private evictLRU(): void {
while (this.cache.size >= this.config.maxCachedResults) {
const oldest = this.cache.keys().next();
if (oldest.done) break;
this.cache.delete(oldest.value);
}
}
/** Exposed for testing. */
get cacheSize(): number {
return this.cache.size;
}
/** Clear all cached results. */
clearCache(): void {
this.cache.clear();
}
}