feat: tiered LLM providers (fast/heavy) with multi-provider config
Some checks failed
CI / lint (pull_request) Has been cancelled
CI / typecheck (pull_request) Has been cancelled
CI / test (pull_request) Has been cancelled
CI / build (pull_request) Has been cancelled
CI / package (pull_request) Has been cancelled

Adds tier-based LLM routing so fast local models (vLLM, Ollama) handle
structured tasks while cloud models (Gemini, Anthropic) are reserved for
heavy reasoning. Single-provider configs continue to work via fallback.

- Tier type + ProviderRegistry with assignTier/getProvider/fallback chain
- Multi-provider config format: { providers: [{ name, type, tier, ... }] }
- NamedProvider wrapper for multiple instances of same provider type
- Setup wizard: Simple (legacy) / Advanced (fast+heavy tiers) modes
- Status display: tiered view with /llm/providers endpoint
- Call sites use getProvider('fast') instead of getActive()
- Full backward compatibility with existing single-provider configs

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Michal
2026-02-25 02:16:08 +00:00
parent 0824f8e635
commit 9ce705608b
17 changed files with 834 additions and 285 deletions

View File

@@ -44,13 +44,27 @@ export interface LlmFileConfig {
binaryPath?: string;
}
/** Multi-provider entry from config file. */
export interface LlmProviderFileEntry {
name: string;
type: string;
model?: string;
url?: string;
binaryPath?: string;
tier?: 'fast' | 'heavy';
}
export interface ProjectLlmOverride {
model?: string;
provider?: string;
}
interface LlmMultiFileConfig {
providers: LlmProviderFileEntry[];
}
interface McpctlConfig {
llm?: LlmFileConfig;
llm?: LlmFileConfig | LlmMultiFileConfig;
projects?: Record<string, { llm?: ProjectLlmOverride }>;
}
@@ -70,16 +84,58 @@ function loadFullConfig(): McpctlConfig {
}
}
/** Type guard: is config the multi-provider format? */
function isMultiConfig(llm: LlmFileConfig | LlmMultiFileConfig): llm is LlmMultiFileConfig {
return 'providers' in llm && Array.isArray((llm as LlmMultiFileConfig).providers);
}
/**
* Load LLM configuration from ~/.mcpctl/config.json.
* Returns undefined if no LLM section is configured.
* @deprecated Use loadLlmProviders() for multi-provider support.
*/
export function loadLlmConfig(): LlmFileConfig | undefined {
const config = loadFullConfig();
if (!config.llm?.provider || config.llm.provider === 'none') return undefined;
if (!config.llm) return undefined;
if (isMultiConfig(config.llm)) {
// Multi-provider format — return first provider as legacy compat
const first = config.llm.providers[0];
if (!first) return undefined;
const legacy: LlmFileConfig = { provider: first.type };
if (first.model) legacy.model = first.model;
if (first.url) legacy.url = first.url;
if (first.binaryPath) legacy.binaryPath = first.binaryPath;
return legacy;
}
if (!config.llm.provider || config.llm.provider === 'none') return undefined;
return config.llm;
}
/**
* Load LLM providers from ~/.mcpctl/config.json.
* Normalizes both legacy single-provider and multi-provider formats.
* Returns empty array if no LLM is configured.
*/
export function loadLlmProviders(): LlmProviderFileEntry[] {
const config = loadFullConfig();
if (!config.llm) return [];
if (isMultiConfig(config.llm)) {
return config.llm.providers.filter((p) => p.type !== 'none');
}
// Legacy single-provider format → normalize to one entry
if (!config.llm.provider || config.llm.provider === 'none') return [];
const entry: LlmProviderFileEntry = {
name: config.llm.provider,
type: config.llm.provider,
};
if (config.llm.model) entry.model = config.llm.model;
if (config.llm.url) entry.url = config.llm.url;
if (config.llm.binaryPath) entry.binaryPath = config.llm.binaryPath;
return [entry];
}
/**
* Load per-project LLM override from ~/.mcpctl/config.json.
* Returns the project-specific model/provider override, or undefined.

View File

@@ -87,7 +87,7 @@ export async function createHttpServer(
const LLM_HEALTH_CACHE_MS = 10 * 60 * 1000; // 10 minutes
app.get('/llm/health', async (_request, reply) => {
const provider = deps.providerRegistry?.getActive() ?? null;
const provider = deps.providerRegistry?.getProvider('fast') ?? null;
if (!provider) {
reply.code(200).send({ status: 'not configured' });
return;
@@ -127,7 +127,7 @@ export async function createHttpServer(
// LLM models — list available models from the active provider
app.get('/llm/models', async (_request, reply) => {
const provider = deps.providerRegistry?.getActive() ?? null;
const provider = deps.providerRegistry?.getProvider('fast') ?? null;
if (!provider) {
reply.code(200).send({ models: [], provider: null });
return;
@@ -140,6 +140,22 @@ export async function createHttpServer(
}
});
// LLM providers — list all registered providers with tier assignments
app.get('/llm/providers', async (_request, reply) => {
const registry = deps.providerRegistry;
if (!registry) {
reply.code(200).send({ providers: [], tiers: { fast: [], heavy: [] } });
return;
}
reply.code(200).send({
providers: registry.list(),
tiers: {
fast: registry.getTierProviders('fast'),
heavy: registry.getTierProviders('heavy'),
},
});
});
// Proxy management routes to mcpd
const mcpdClient = new McpdClient(config.mcpdUrl, config.mcpdToken);
registerProxyRoutes(app, mcpdClient);

View File

@@ -1,11 +1,12 @@
import type { SecretStore } from '@mcpctl/shared';
import type { LlmFileConfig } from './http/config.js';
import type { LlmFileConfig, LlmProviderFileEntry } from './http/config.js';
import { ProviderRegistry } from './providers/registry.js';
import { GeminiAcpProvider } from './providers/gemini-acp.js';
import { OllamaProvider } from './providers/ollama.js';
import { AnthropicProvider } from './providers/anthropic.js';
import { OpenAiProvider } from './providers/openai.js';
import { DeepSeekProvider } from './providers/deepseek.js';
import type { LlmProvider } from './providers/types.js';
import type { GeminiAcpConfig } from './providers/gemini-acp.js';
import type { OllamaConfig } from './providers/ollama.js';
import type { AnthropicConfig } from './providers/anthropic.js';
@@ -13,87 +14,158 @@ import type { OpenAiConfig } from './providers/openai.js';
import type { DeepSeekConfig } from './providers/deepseek.js';
/**
* Create a ProviderRegistry from user config + secret store.
* Returns an empty registry if config is undefined or provider is 'none'.
* Thin wrapper that delegates all LlmProvider methods but overrides `name`.
* Used when the user's chosen name (e.g. "vllm-local") differs from the
* underlying provider's name (e.g. "openai").
*/
export async function createProviderFromConfig(
config: LlmFileConfig | undefined,
secretStore: SecretStore,
): Promise<ProviderRegistry> {
const registry = new ProviderRegistry();
if (!config?.provider || config.provider === 'none') return registry;
class NamedProvider implements LlmProvider {
readonly name: string;
private inner: LlmProvider;
switch (config.provider) {
constructor(name: string, inner: LlmProvider) {
this.name = name;
this.inner = inner;
}
complete(...args: Parameters<LlmProvider['complete']>) {
return this.inner.complete(...args);
}
listModels() {
return this.inner.listModels();
}
isAvailable() {
return this.inner.isAvailable();
}
dispose() {
this.inner.dispose?.();
}
}
/**
* Create a single LlmProvider from a provider entry config.
* Returns null if required config is missing (logs warning).
*/
async function createSingleProvider(
entry: LlmProviderFileEntry,
secretStore: SecretStore,
): Promise<LlmProvider | null> {
switch (entry.type) {
case 'gemini-cli': {
const cfg: GeminiAcpConfig = {};
if (config.binaryPath) cfg.binaryPath = config.binaryPath;
if (config.model) cfg.defaultModel = config.model;
if (entry.binaryPath) cfg.binaryPath = entry.binaryPath;
if (entry.model) cfg.defaultModel = entry.model;
const provider = new GeminiAcpProvider(cfg);
provider.warmup();
registry.register(provider);
break;
return provider;
}
case 'ollama': {
const cfg: OllamaConfig = {};
if (config.url) cfg.baseUrl = config.url;
if (config.model) cfg.defaultModel = config.model;
registry.register(new OllamaProvider(cfg));
break;
if (entry.url) cfg.baseUrl = entry.url;
if (entry.model) cfg.defaultModel = entry.model;
return new OllamaProvider(cfg);
}
case 'anthropic': {
const apiKey = await secretStore.get('anthropic-api-key');
if (!apiKey) {
process.stderr.write('Warning: Anthropic API key not found in secret store. Run "mcpctl config setup" to configure.\n');
return registry;
process.stderr.write(`Warning: Anthropic API key not found for provider "${entry.name}". Run "mcpctl config setup" to configure.\n`);
return null;
}
const cfg: AnthropicConfig = { apiKey };
if (config.model) cfg.defaultModel = config.model;
registry.register(new AnthropicProvider(cfg));
break;
if (entry.model) cfg.defaultModel = entry.model;
return new AnthropicProvider(cfg);
}
case 'openai': {
const apiKey = await secretStore.get('openai-api-key');
if (!apiKey) {
process.stderr.write('Warning: OpenAI API key not found in secret store. Run "mcpctl config setup" to configure.\n');
return registry;
process.stderr.write(`Warning: OpenAI API key not found for provider "${entry.name}". Run "mcpctl config setup" to configure.\n`);
return null;
}
const cfg: OpenAiConfig = { apiKey };
if (config.url) cfg.baseUrl = config.url;
if (config.model) cfg.defaultModel = config.model;
registry.register(new OpenAiProvider(cfg));
break;
if (entry.url) cfg.baseUrl = entry.url;
if (entry.model) cfg.defaultModel = entry.model;
return new OpenAiProvider(cfg);
}
case 'deepseek': {
const apiKey = await secretStore.get('deepseek-api-key');
if (!apiKey) {
process.stderr.write('Warning: DeepSeek API key not found in secret store. Run "mcpctl config setup" to configure.\n');
return registry;
process.stderr.write(`Warning: DeepSeek API key not found for provider "${entry.name}". Run "mcpctl config setup" to configure.\n`);
return null;
}
const cfg: DeepSeekConfig = { apiKey };
if (config.url) cfg.baseUrl = config.url;
if (config.model) cfg.defaultModel = config.model;
registry.register(new DeepSeekProvider(cfg));
break;
if (entry.url) cfg.baseUrl = entry.url;
if (entry.model) cfg.defaultModel = entry.model;
return new DeepSeekProvider(cfg);
}
case 'vllm': {
// vLLM uses OpenAI-compatible API
if (!config.url) {
process.stderr.write('Warning: vLLM URL not configured. Run "mcpctl config setup" to configure.\n');
return registry;
if (!entry.url) {
process.stderr.write(`Warning: vLLM URL not configured for provider "${entry.name}". Run "mcpctl config setup" to configure.\n`);
return null;
}
registry.register(new OpenAiProvider({
return new OpenAiProvider({
apiKey: 'unused',
baseUrl: config.url,
defaultModel: config.model ?? 'default',
}));
break;
baseUrl: entry.url,
defaultModel: entry.model ?? 'default',
});
}
default:
return null;
}
}
/**
* Create a ProviderRegistry from multi-provider config entries + secret store.
* Registers each provider, wraps with NamedProvider if needed, assigns tiers.
*/
export async function createProvidersFromConfig(
entries: LlmProviderFileEntry[],
secretStore: SecretStore,
): Promise<ProviderRegistry> {
const registry = new ProviderRegistry();
for (const entry of entries) {
const rawProvider = await createSingleProvider(entry, secretStore);
if (!rawProvider) continue;
// Wrap with NamedProvider if user name differs from provider's built-in name
const provider = rawProvider.name !== entry.name
? new NamedProvider(entry.name, rawProvider)
: rawProvider;
registry.register(provider);
if (entry.tier) {
registry.assignTier(provider.name, entry.tier);
}
}
return registry;
}
/**
* Create a ProviderRegistry from legacy single-provider config + secret store.
* @deprecated Use createProvidersFromConfig() with loadLlmProviders() instead.
*/
export async function createProviderFromConfig(
config: LlmFileConfig | undefined,
secretStore: SecretStore,
): Promise<ProviderRegistry> {
if (!config?.provider || config.provider === 'none') {
return new ProviderRegistry();
}
const entry: LlmProviderFileEntry = {
name: config.provider,
type: config.provider,
};
if (config.model) entry.model = config.model;
if (config.url) entry.url = config.url;
if (config.binaryPath) entry.binaryPath = config.binaryPath;
return createProvidersFromConfig([entry], secretStore);
}

View File

@@ -242,7 +242,7 @@ export class ResponsePaginator {
raw: string,
pages: PageInfo[],
): Promise<PaginationIndex> {
const provider = this.providers?.getActive();
const provider = this.providers?.getProvider('fast');
if (!provider) {
return this.generateSimpleIndex(resultId, toolName, raw, pages);
}

View File

@@ -106,7 +106,7 @@ export class LlmProcessor {
return { optimized: false, params };
}
const provider = this.providers.getActive();
const provider = this.providers.getProvider('fast');
if (!provider) {
return { optimized: false, params };
}
@@ -142,7 +142,7 @@ export class LlmProcessor {
return { filtered: false, result: response.result, originalSize: raw.length, filteredSize: raw.length };
}
const provider = this.providers.getActive();
const provider = this.providers.getProvider('fast');
if (!provider) {
const raw = JSON.stringify(response.result);
return { filtered: false, result: response.result, originalSize: raw.length, filteredSize: raw.length };

View File

@@ -7,9 +7,9 @@ import { StdioProxyServer } from './server.js';
import { StdioUpstream } from './upstream/stdio.js';
import { HttpUpstream } from './upstream/http.js';
import { createHttpServer } from './http/server.js';
import { loadHttpConfig, loadLlmConfig } from './http/config.js';
import { loadHttpConfig, loadLlmProviders } from './http/config.js';
import type { HttpConfig } from './http/config.js';
import { createProviderFromConfig } from './llm-config.js';
import { createProvidersFromConfig } from './llm-config.js';
import { createSecretStore } from '@mcpctl/shared';
import type { ProviderRegistry } from './providers/registry.js';
@@ -65,13 +65,19 @@ export async function main(argv: string[] = process.argv): Promise<MainResult> {
const args = parseArgs(argv);
const httpConfig = loadHttpConfig();
// Load LLM provider from user config + secret store
const llmConfig = loadLlmConfig();
// Load LLM providers from user config + secret store
const llmEntries = loadLlmProviders();
const secretStore = await createSecretStore();
const providerRegistry = await createProviderFromConfig(llmConfig, secretStore);
const activeLlm = providerRegistry.getActive();
if (activeLlm) {
process.stderr.write(`LLM provider: ${activeLlm.name}\n`);
const providerRegistry = await createProvidersFromConfig(llmEntries, secretStore);
if (providerRegistry.hasTierConfig()) {
const fast = providerRegistry.getTierProviders('fast');
const heavy = providerRegistry.getTierProviders('heavy');
process.stderr.write(`LLM providers: fast=[${fast.join(',')}] heavy=[${heavy.join(',')}]\n`);
} else {
const activeLlm = providerRegistry.getActive();
if (activeLlm) {
process.stderr.write(`LLM provider: ${activeLlm.name}\n`);
}
}
let upstreamConfigs: UpstreamConfig[] = [];

View File

@@ -1,11 +1,13 @@
import type { LlmProvider } from './types.js';
import type { LlmProvider, Tier } from './types.js';
/**
* Registry for LLM providers. Supports switching the active provider at runtime.
* Registry for LLM providers. Supports tier-based routing (fast/heavy)
* with cross-tier fallback, and legacy single-provider mode.
*/
export class ProviderRegistry {
private providers = new Map<string, LlmProvider>();
private activeProvider: string | null = null;
private tierProviders = new Map<Tier, string[]>();
register(provider: LlmProvider): void {
this.providers.set(provider.name, provider);
@@ -20,6 +22,15 @@ export class ProviderRegistry {
const first = this.providers.keys().next();
this.activeProvider = first.done ? null : first.value;
}
// Remove from tier assignments
for (const [tier, names] of this.tierProviders) {
const filtered = names.filter((n) => n !== name);
if (filtered.length === 0) {
this.tierProviders.delete(tier);
} else {
this.tierProviders.set(tier, filtered);
}
}
}
setActive(name: string): void {
@@ -34,6 +45,42 @@ export class ProviderRegistry {
return this.providers.get(this.activeProvider) ?? null;
}
/** Assign a provider to a tier. Call order = priority within the tier. */
assignTier(providerName: string, tier: Tier): void {
if (!this.providers.has(providerName)) {
throw new Error(`Provider '${providerName}' is not registered`);
}
const existing = this.tierProviders.get(tier) ?? [];
if (!existing.includes(providerName)) {
this.tierProviders.set(tier, [...existing, providerName]);
}
}
/**
* Get provider for a specific tier with fallback.
* Resolution: requested tier → other tier → getActive() (legacy).
*/
getProvider(tier: Tier): LlmProvider | null {
const primary = this.firstInTier(tier);
if (primary) return primary;
const otherTier: Tier = tier === 'fast' ? 'heavy' : 'fast';
const fallback = this.firstInTier(otherTier);
if (fallback) return fallback;
return this.getActive();
}
/** Get provider names assigned to a tier. */
getTierProviders(tier: Tier): string[] {
return this.tierProviders.get(tier) ?? [];
}
/** Whether any tier assignments exist (vs legacy single-provider mode). */
hasTierConfig(): boolean {
return this.tierProviders.size > 0;
}
get(name: string): LlmProvider | undefined {
return this.providers.get(name);
}
@@ -46,10 +93,31 @@ export class ProviderRegistry {
return this.activeProvider;
}
/** Provider info for status display. */
listProviders(): Array<{ name: string; tiers: Tier[] }> {
return this.list().map((name) => {
const tiers: Tier[] = [];
for (const [tier, names] of this.tierProviders) {
if (names.includes(name)) tiers.push(tier);
}
return { name, tiers };
});
}
/** Dispose all registered providers that have a dispose method. */
disposeAll(): void {
for (const provider of this.providers.values()) {
provider.dispose?.();
}
}
private firstInTier(tier: Tier): LlmProvider | null {
const names = this.tierProviders.get(tier);
if (!names) return null;
for (const name of names) {
const provider = this.providers.get(name);
if (provider) return provider;
}
return null;
}
}

View File

@@ -44,6 +44,9 @@ export interface CompletionOptions {
model?: string;
}
/** LLM provider tier. 'fast' = local inference, 'heavy' = cloud reasoning. */
export type Tier = 'fast' | 'heavy';
export interface LlmProvider {
/** Provider identifier (e.g., 'openai', 'anthropic', 'ollama') */
readonly name: string;

View File

@@ -116,9 +116,9 @@ describe('createProviderFromConfig', () => {
{ provider: 'vllm', model: 'my-model', url: 'http://gpu-server:8000' },
store,
);
// vLLM reuses OpenAI provider under the hood
// vLLM reuses OpenAI provider under the hood, wrapped with NamedProvider
expect(registry.getActive()).not.toBeNull();
expect(registry.getActive()!.name).toBe('openai');
expect(registry.getActive()!.name).toBe('vllm');
});
it('returns empty registry when vllm URL is missing', async () => {

View File

@@ -11,6 +11,7 @@ function makeProvider(response: string): ProviderRegistry {
};
return {
getActive: () => provider,
getProvider: () => provider,
register: vi.fn(),
setActive: vi.fn(),
listProviders: () => [{ name: 'test', available: true, active: true }],
@@ -177,6 +178,7 @@ describe('ResponsePaginator', () => {
};
const registry = {
getActive: () => provider,
getProvider: () => provider,
register: vi.fn(),
setActive: vi.fn(),
listProviders: () => [{ name: 'test', available: true, active: true }],
@@ -208,6 +210,7 @@ describe('ResponsePaginator', () => {
};
const registry = {
getActive: () => provider,
getProvider: () => provider,
register: vi.fn(),
setActive: vi.fn(),
listProviders: () => [{ name: 'test', available: true, active: true }],
@@ -231,6 +234,7 @@ describe('ResponsePaginator', () => {
it('falls back to simple when no active provider', async () => {
const registry = {
getActive: () => null,
getProvider: () => null,
register: vi.fn(),
setActive: vi.fn(),
listProviders: () => [],
@@ -256,6 +260,7 @@ describe('ResponsePaginator', () => {
};
const registry = {
getActive: () => provider,
getProvider: () => provider,
register: vi.fn(),
setActive: vi.fn(),
listProviders: () => [{ name: 'test', available: true, active: true }],
@@ -281,6 +286,7 @@ describe('ResponsePaginator', () => {
};
const registry = {
getActive: () => provider,
getProvider: () => provider,
register: vi.fn(),
setActive: vi.fn(),
listProviders: () => [{ name: 'test', available: true, active: true }],

View File

@@ -115,4 +115,105 @@ describe('ProviderRegistry', () => {
expect(models).toEqual(['anthropic-model-1', 'anthropic-model-2']);
});
describe('tier management', () => {
it('assigns providers to tiers', () => {
registry.register(mockProvider('vllm'));
registry.register(mockProvider('gemini'));
registry.assignTier('vllm', 'fast');
registry.assignTier('gemini', 'heavy');
expect(registry.getTierProviders('fast')).toEqual(['vllm']);
expect(registry.getTierProviders('heavy')).toEqual(['gemini']);
expect(registry.hasTierConfig()).toBe(true);
});
it('getProvider returns tier-specific provider', () => {
const vllm = mockProvider('vllm');
const gemini = mockProvider('gemini');
registry.register(vllm);
registry.register(gemini);
registry.assignTier('vllm', 'fast');
registry.assignTier('gemini', 'heavy');
expect(registry.getProvider('fast')).toBe(vllm);
expect(registry.getProvider('heavy')).toBe(gemini);
});
it('getProvider falls back to other tier', () => {
const vllm = mockProvider('vllm');
registry.register(vllm);
registry.assignTier('vllm', 'fast');
// Requesting heavy but only fast exists → falls back to fast
expect(registry.getProvider('heavy')).toBe(vllm);
});
it('getProvider falls back to getActive when no tiers', () => {
const openai = mockProvider('openai');
registry.register(openai);
// No tier assignments → falls back to legacy getActive()
expect(registry.getProvider('fast')).toBe(openai);
expect(registry.getProvider('heavy')).toBe(openai);
expect(registry.hasTierConfig()).toBe(false);
});
it('unregister removes from tier assignments', () => {
registry.register(mockProvider('vllm'));
registry.register(mockProvider('gemini'));
registry.assignTier('vllm', 'fast');
registry.assignTier('gemini', 'heavy');
registry.unregister('vllm');
expect(registry.getTierProviders('fast')).toEqual([]);
expect(registry.getTierProviders('heavy')).toEqual(['gemini']);
});
it('assignTier throws for unregistered provider', () => {
expect(() => registry.assignTier('unknown', 'fast')).toThrow("Provider 'unknown' is not registered");
});
it('multiple providers in same tier uses first', () => {
const vllm = mockProvider('vllm');
const ollama = mockProvider('ollama');
registry.register(vllm);
registry.register(ollama);
registry.assignTier('vllm', 'fast');
registry.assignTier('ollama', 'fast');
expect(registry.getProvider('fast')).toBe(vllm);
expect(registry.getTierProviders('fast')).toEqual(['vllm', 'ollama']);
});
it('listProviders includes tier info', () => {
registry.register(mockProvider('vllm'));
registry.register(mockProvider('gemini'));
registry.assignTier('vllm', 'fast');
registry.assignTier('gemini', 'heavy');
const providers = registry.listProviders();
expect(providers).toEqual([
{ name: 'vllm', tiers: ['fast'] },
{ name: 'gemini', tiers: ['heavy'] },
]);
});
it('disposeAll calls dispose on all providers', () => {
const disposeFn = vi.fn();
const provider = { ...mockProvider('test'), dispose: disposeFn };
registry.register(provider);
registry.disposeAll();
expect(disposeFn).toHaveBeenCalledOnce();
});
});
});