Adds tier-based LLM routing so fast local models (vLLM, Ollama) handle
structured tasks while cloud models (Gemini, Anthropic) are reserved for
heavy reasoning. Single-provider configs continue to work via fallback.
- Tier type + ProviderRegistry with assignTier/getProvider/fallback chain
- Multi-provider config format: { providers: [{ name, type, tier, ... }] }
- NamedProvider wrapper for multiple instances of same provider type
- Setup wizard: Simple (legacy) / Advanced (fast+heavy tiers) modes
- Status display: tiered view with /llm/providers endpoint
- Call sites use getProvider('fast') instead of getActive()
- Full backward compatibility with existing single-provider configs
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
360 lines
11 KiB
TypeScript
360 lines
11 KiB
TypeScript
import { randomUUID } from 'node:crypto';
|
|
import type { ProviderRegistry } from '../providers/registry.js';
|
|
import { estimateTokens } from './token-counter.js';
|
|
|
|
// --- Configuration ---
|
|
|
|
export interface PaginationConfig {
|
|
/** Character threshold above which responses get paginated (default 80_000) */
|
|
sizeThreshold: number;
|
|
/** Characters per page (default 40_000) */
|
|
pageSize: number;
|
|
/** Max cached results (LRU eviction) (default 64) */
|
|
maxCachedResults: number;
|
|
/** TTL for cached results in ms (default 300_000 = 5 min) */
|
|
ttlMs: number;
|
|
/** Max tokens for the LLM index generation call (default 2048) */
|
|
indexMaxTokens: number;
|
|
}
|
|
|
|
export const DEFAULT_PAGINATION_CONFIG: PaginationConfig = {
|
|
sizeThreshold: 80_000,
|
|
pageSize: 40_000,
|
|
maxCachedResults: 64,
|
|
ttlMs: 300_000,
|
|
indexMaxTokens: 2048,
|
|
};
|
|
|
|
// --- Cache Entry ---
|
|
|
|
interface PageInfo {
|
|
/** 0-based page index */
|
|
index: number;
|
|
/** Start character offset in the raw string */
|
|
startChar: number;
|
|
/** End character offset (exclusive) */
|
|
endChar: number;
|
|
/** Approximate token count */
|
|
estimatedTokens: number;
|
|
}
|
|
|
|
interface CachedResult {
|
|
resultId: string;
|
|
toolName: string;
|
|
raw: string;
|
|
pages: PageInfo[];
|
|
index: PaginationIndex;
|
|
createdAt: number;
|
|
}
|
|
|
|
// --- Index Types ---
|
|
|
|
export interface PageSummary {
|
|
page: number;
|
|
startChar: number;
|
|
endChar: number;
|
|
estimatedTokens: number;
|
|
summary: string;
|
|
}
|
|
|
|
export interface PaginationIndex {
|
|
resultId: string;
|
|
toolName: string;
|
|
totalSize: number;
|
|
totalTokens: number;
|
|
totalPages: number;
|
|
pageSummaries: PageSummary[];
|
|
indexType: 'smart' | 'simple';
|
|
}
|
|
|
|
// --- The MCP response format ---
|
|
|
|
export interface PaginatedToolResponse {
|
|
content: Array<{
|
|
type: 'text';
|
|
text: string;
|
|
}>;
|
|
}
|
|
|
|
// --- LLM Prompt ---
|
|
|
|
export const PAGINATION_INDEX_SYSTEM_PROMPT = `You are a document indexing assistant. Given a large tool response split into pages, generate a concise summary for each page describing what data it contains.
|
|
|
|
Rules:
|
|
- For each page, write 1-2 sentences describing the key content
|
|
- Be specific: mention entity names, IDs, counts, or key fields visible on that page
|
|
- If it's JSON, describe the structure and notable entries
|
|
- If it's text, describe the topics covered
|
|
- Output valid JSON only: an array of objects with "page" (1-based number) and "summary" (string)
|
|
- Example output: [{"page": 1, "summary": "Configuration nodes and global settings (inject, debug, function nodes 1-15)"}, {"page": 2, "summary": "HTTP request nodes and API integrations (nodes 16-40)"}]`;
|
|
|
|
/**
|
|
* Handles transparent pagination of large MCP tool responses.
|
|
*
|
|
* When a tool response exceeds the size threshold, it is cached and an
|
|
* index is returned instead. The LLM can then request specific pages
|
|
* via _page/_resultId parameters on subsequent tool calls.
|
|
*
|
|
* If an LLM provider is available, the index includes AI-generated
|
|
* per-page summaries. Otherwise, simple byte-range descriptions are used.
|
|
*/
|
|
export class ResponsePaginator {
|
|
private cache = new Map<string, CachedResult>();
|
|
private readonly config: PaginationConfig;
|
|
|
|
constructor(
|
|
private providers: ProviderRegistry | null,
|
|
config: Partial<PaginationConfig> = {},
|
|
private modelOverride?: string,
|
|
) {
|
|
this.config = { ...DEFAULT_PAGINATION_CONFIG, ...config };
|
|
}
|
|
|
|
/**
|
|
* Check if a raw response string should be paginated.
|
|
*/
|
|
shouldPaginate(raw: string): boolean {
|
|
return raw.length >= this.config.sizeThreshold;
|
|
}
|
|
|
|
/**
|
|
* Paginate a large response: cache it and return the index.
|
|
* Returns null if the response is below threshold.
|
|
*/
|
|
async paginate(toolName: string, raw: string): Promise<PaginatedToolResponse | null> {
|
|
if (!this.shouldPaginate(raw)) return null;
|
|
|
|
const resultId = randomUUID();
|
|
const pages = this.splitPages(raw);
|
|
let index: PaginationIndex;
|
|
|
|
try {
|
|
index = await this.generateSmartIndex(resultId, toolName, raw, pages);
|
|
} catch (err) {
|
|
console.error(`[pagination] Smart index failed for ${toolName}, falling back to simple:`, err instanceof Error ? err.message : String(err));
|
|
index = this.generateSimpleIndex(resultId, toolName, raw, pages);
|
|
}
|
|
|
|
// Store in cache
|
|
this.evictExpired();
|
|
this.evictLRU();
|
|
this.cache.set(resultId, {
|
|
resultId,
|
|
toolName,
|
|
raw,
|
|
pages,
|
|
index,
|
|
createdAt: Date.now(),
|
|
});
|
|
|
|
return this.formatIndexResponse(index);
|
|
}
|
|
|
|
/**
|
|
* Serve a specific page from cache.
|
|
* Returns null if the resultId is not found (cache miss / expired).
|
|
*/
|
|
getPage(resultId: string, page: number | 'all'): PaginatedToolResponse | null {
|
|
this.evictExpired();
|
|
const entry = this.cache.get(resultId);
|
|
if (!entry) return null;
|
|
|
|
if (page === 'all') {
|
|
return {
|
|
content: [{ type: 'text', text: entry.raw }],
|
|
};
|
|
}
|
|
|
|
// Pages are 1-based in the API
|
|
const pageInfo = entry.pages[page - 1];
|
|
if (!pageInfo) {
|
|
return {
|
|
content: [{
|
|
type: 'text',
|
|
text: `Error: page ${String(page)} is out of range. This result has ${String(entry.pages.length)} pages (1-${String(entry.pages.length)}).`,
|
|
}],
|
|
};
|
|
}
|
|
|
|
const pageContent = entry.raw.slice(pageInfo.startChar, pageInfo.endChar);
|
|
return {
|
|
content: [{
|
|
type: 'text',
|
|
text: `[Page ${String(page)}/${String(entry.pages.length)} of result ${resultId}]\n\n${pageContent}`,
|
|
}],
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Check if a tool call has pagination parameters (_page / _resultId).
|
|
* Returns the parsed pagination request, or null if not a pagination request.
|
|
*/
|
|
static extractPaginationParams(
|
|
args: Record<string, unknown>,
|
|
): { resultId: string; page: number | 'all' } | null {
|
|
const resultId = args['_resultId'];
|
|
const pageParam = args['_page'];
|
|
if (typeof resultId !== 'string' || pageParam === undefined) return null;
|
|
|
|
if (pageParam === 'all') return { resultId, page: 'all' };
|
|
|
|
const page = Number(pageParam);
|
|
if (!Number.isInteger(page) || page < 1) return null;
|
|
|
|
return { resultId, page };
|
|
}
|
|
|
|
// --- Private methods ---
|
|
|
|
private splitPages(raw: string): PageInfo[] {
|
|
const pages: PageInfo[] = [];
|
|
let offset = 0;
|
|
let pageIndex = 0;
|
|
|
|
while (offset < raw.length) {
|
|
const end = Math.min(offset + this.config.pageSize, raw.length);
|
|
// Try to break at a newline boundary if we're not at the end
|
|
let breakAt = end;
|
|
if (end < raw.length) {
|
|
const lastNewline = raw.lastIndexOf('\n', end);
|
|
if (lastNewline > offset) {
|
|
breakAt = lastNewline + 1;
|
|
}
|
|
}
|
|
|
|
pages.push({
|
|
index: pageIndex,
|
|
startChar: offset,
|
|
endChar: breakAt,
|
|
estimatedTokens: estimateTokens(raw.slice(offset, breakAt)),
|
|
});
|
|
|
|
offset = breakAt;
|
|
pageIndex++;
|
|
}
|
|
|
|
return pages;
|
|
}
|
|
|
|
private async generateSmartIndex(
|
|
resultId: string,
|
|
toolName: string,
|
|
raw: string,
|
|
pages: PageInfo[],
|
|
): Promise<PaginationIndex> {
|
|
const provider = this.providers?.getProvider('fast');
|
|
if (!provider) {
|
|
return this.generateSimpleIndex(resultId, toolName, raw, pages);
|
|
}
|
|
|
|
// Build a prompt with page previews (first ~500 chars of each page)
|
|
const previews = pages.map((p, i) => {
|
|
const preview = raw.slice(p.startChar, Math.min(p.startChar + 500, p.endChar));
|
|
const truncated = p.endChar - p.startChar > 500 ? '\n[...]' : '';
|
|
return `--- Page ${String(i + 1)} (chars ${String(p.startChar)}-${String(p.endChar)}, ~${String(p.estimatedTokens)} tokens) ---\n${preview}${truncated}`;
|
|
}).join('\n\n');
|
|
|
|
const result = await provider.complete({
|
|
messages: [
|
|
{ role: 'system', content: PAGINATION_INDEX_SYSTEM_PROMPT },
|
|
{ role: 'user', content: `Tool: ${toolName}\nTotal size: ${String(raw.length)} chars, ${String(pages.length)} pages\n\n${previews}` },
|
|
],
|
|
maxTokens: this.config.indexMaxTokens,
|
|
temperature: 0,
|
|
...(this.modelOverride ? { model: this.modelOverride } : {}),
|
|
});
|
|
|
|
// LLMs often wrap JSON in ```json ... ``` fences — strip them
|
|
const cleaned = result.content.replace(/^```(?:json)?\s*\n?/i, '').replace(/\n?```\s*$/i, '').trim();
|
|
const summaries = JSON.parse(cleaned) as Array<{ page: number; summary: string }>;
|
|
|
|
return {
|
|
resultId,
|
|
toolName,
|
|
totalSize: raw.length,
|
|
totalTokens: estimateTokens(raw),
|
|
totalPages: pages.length,
|
|
indexType: 'smart',
|
|
pageSummaries: pages.map((p, i) => ({
|
|
page: i + 1,
|
|
startChar: p.startChar,
|
|
endChar: p.endChar,
|
|
estimatedTokens: p.estimatedTokens,
|
|
summary: summaries.find((s) => s.page === i + 1)?.summary ?? `Page ${String(i + 1)}`,
|
|
})),
|
|
};
|
|
}
|
|
|
|
private generateSimpleIndex(
|
|
resultId: string,
|
|
toolName: string,
|
|
raw: string,
|
|
pages: PageInfo[],
|
|
): PaginationIndex {
|
|
return {
|
|
resultId,
|
|
toolName,
|
|
totalSize: raw.length,
|
|
totalTokens: estimateTokens(raw),
|
|
totalPages: pages.length,
|
|
indexType: 'simple',
|
|
pageSummaries: pages.map((p, i) => ({
|
|
page: i + 1,
|
|
startChar: p.startChar,
|
|
endChar: p.endChar,
|
|
estimatedTokens: p.estimatedTokens,
|
|
summary: `Page ${String(i + 1)}: characters ${String(p.startChar)}-${String(p.endChar)} (~${String(p.estimatedTokens)} tokens)`,
|
|
})),
|
|
};
|
|
}
|
|
|
|
private formatIndexResponse(index: PaginationIndex): PaginatedToolResponse {
|
|
const lines = [
|
|
`This response is too large to return directly (${String(index.totalSize)} chars, ~${String(index.totalTokens)} tokens).`,
|
|
`It has been split into ${String(index.totalPages)} pages.`,
|
|
'',
|
|
'To retrieve a specific page, call this same tool again with additional arguments:',
|
|
` "_resultId": "${index.resultId}"`,
|
|
` "_page": <page_number> (1-${String(index.totalPages)})`,
|
|
' "_page": "all" (returns the full response)',
|
|
'',
|
|
`--- Page Index${index.indexType === 'smart' ? ' (AI-generated summaries)' : ''} ---`,
|
|
];
|
|
|
|
for (const page of index.pageSummaries) {
|
|
lines.push(` Page ${String(page.page)}: ${page.summary}`);
|
|
}
|
|
|
|
return {
|
|
content: [{ type: 'text', text: lines.join('\n') }],
|
|
};
|
|
}
|
|
|
|
private evictExpired(): void {
|
|
const now = Date.now();
|
|
for (const [id, entry] of this.cache) {
|
|
if (now - entry.createdAt > this.config.ttlMs) {
|
|
this.cache.delete(id);
|
|
}
|
|
}
|
|
}
|
|
|
|
private evictLRU(): void {
|
|
while (this.cache.size >= this.config.maxCachedResults) {
|
|
const oldest = this.cache.keys().next();
|
|
if (oldest.done) break;
|
|
this.cache.delete(oldest.value);
|
|
}
|
|
}
|
|
|
|
/** Exposed for testing. */
|
|
get cacheSize(): number {
|
|
return this.cache.size;
|
|
}
|
|
|
|
/** Clear all cached results. */
|
|
clearCache(): void {
|
|
this.cache.clear();
|
|
}
|
|
}
|