From 4cfdd805d8d2557d3ec53aa9be3bdd22419319ae Mon Sep 17 00:00:00 2001 From: Michal Date: Tue, 3 Mar 2026 22:04:58 +0000 Subject: [PATCH] feat: LLM provider failover in proxymodel adapter LLMProviderAdapter now tries all registered providers before giving up: 1. Named provider (if specified) 2. All 'fast' tier providers in order 3. All 'heavy' tier providers in order 4. Legacy active provider Previously, if the first provider (e.g., vllm-local) failed, the adapter threw immediately even though Anthropic and Gemini were available. Now it logs the failure and tries the next candidate. Co-Authored-By: Claude Opus 4.6 --- src/mcplocal/src/proxymodel/llm-adapter.ts | 90 ++++++++++++++---- .../tests/proxymodel-llm-adapter.test.ts | 93 ++++++++++++++++++- 2 files changed, 163 insertions(+), 20 deletions(-) diff --git a/src/mcplocal/src/proxymodel/llm-adapter.ts b/src/mcplocal/src/proxymodel/llm-adapter.ts index 2fa9639..ad8a6ff 100644 --- a/src/mcplocal/src/proxymodel/llm-adapter.ts +++ b/src/mcplocal/src/proxymodel/llm-adapter.ts @@ -1,8 +1,12 @@ /** * Adapts the internal ProviderRegistry into the public LLMProvider interface * that stages use via ctx.llm. + * + * Implements tier-aware failover: if the primary provider fails, tries + * remaining providers in the same tier, then cross-tier, then legacy active. */ import type { ProviderRegistry } from '../providers/registry.js'; +import type { LlmProvider, CompletionOptions } from '../providers/types.js'; import type { LLMProvider, LLMCompleteOptions } from './types.js'; export class LLMProviderAdapter implements LLMProvider { @@ -13,24 +17,81 @@ export class LLMProviderAdapter implements LLMProvider { ) {} async complete(prompt: string, options?: LLMCompleteOptions): Promise { - let provider; - if (this.providerName) { - provider = this.registry.get(this.providerName) ?? null; - } - if (!provider) { - provider = this.registry.getProvider('fast'); - } - if (!provider) { + const candidates = this.getCandidates(); + if (candidates.length === 0) { throw new Error('No LLM provider available'); } + const opts = this.buildOpts(prompt, options); + let lastError: Error | null = null; + + for (const provider of candidates) { + try { + const result = await provider.complete(opts); + return result.content; + } catch (err) { + lastError = err as Error; + process.stderr.write( + `[llm-adapter] ${provider.name} failed, trying next: ${lastError.message}\n`, + ); + } + } + + throw lastError ?? new Error('All LLM providers failed'); + } + + available(): boolean { + return this.getCandidates().length > 0; + } + + /** + * Build an ordered list of providers to try: + * 1. Named provider (if specified) + * 2. All 'fast' tier providers + * 3. All 'heavy' tier providers + * 4. Active provider (legacy fallback) + * Deduplicates by name. + */ + private getCandidates(): LlmProvider[] { + const seen = new Set(); + const candidates: LlmProvider[] = []; + + const add = (p: LlmProvider | null | undefined) => { + if (p && !seen.has(p.name)) { + seen.add(p.name); + candidates.push(p); + } + }; + + // Preferred provider first + if (this.providerName) { + add(this.registry.get(this.providerName)); + } + + // All fast tier providers + for (const name of this.registry.getTierProviders('fast')) { + add(this.registry.get(name)); + } + + // All heavy tier providers + for (const name of this.registry.getTierProviders('heavy')) { + add(this.registry.get(name)); + } + + // Legacy fallback + add(this.registry.getActive()); + + return candidates; + } + + private buildOpts(prompt: string, options?: LLMCompleteOptions): CompletionOptions { const messages = []; if (options?.system) { messages.push({ role: 'system' as const, content: options.system }); } messages.push({ role: 'user' as const, content: prompt }); - const opts: Parameters[0] = { + const opts: CompletionOptions = { messages, temperature: 0, }; @@ -40,15 +101,6 @@ export class LLMProviderAdapter implements LLMProvider { if (options?.maxTokens !== undefined) { opts.maxTokens = options.maxTokens; } - const result = await provider.complete(opts); - - return result.content; - } - - available(): boolean { - if (this.providerName) { - return this.registry.get(this.providerName) !== undefined; - } - return this.registry.getProvider('fast') !== null; + return opts; } } diff --git a/src/mcplocal/tests/proxymodel-llm-adapter.test.ts b/src/mcplocal/tests/proxymodel-llm-adapter.test.ts index f2f6442..815d46e 100644 --- a/src/mcplocal/tests/proxymodel-llm-adapter.test.ts +++ b/src/mcplocal/tests/proxymodel-llm-adapter.test.ts @@ -17,6 +17,15 @@ function mockProvider(name: string, response = 'mock response'): LlmProvider { }; } +function failingProvider(name: string, error = 'connection refused'): LlmProvider { + return { + name, + complete: vi.fn().mockRejectedValue(new Error(error)), + listModels: vi.fn().mockResolvedValue([]), + isAvailable: vi.fn().mockResolvedValue(true), + }; +} + describe('LLMProviderAdapter', () => { it('available() returns true when a provider is registered', () => { const registry = new ProviderRegistry(); @@ -45,7 +54,6 @@ describe('LLMProviderAdapter', () => { expect(result).toBe('mock response'); expect(provider.complete).toHaveBeenCalledWith({ messages: [{ role: 'user', content: 'summarize this' }], - maxTokens: undefined, temperature: 0, }); }); @@ -75,4 +83,87 @@ describe('LLMProviderAdapter', () => { await expect(adapter.complete('test')).rejects.toThrow('No LLM provider available'); }); + + // --- Failover tests --- + + it('falls back to next provider in same tier on failure', async () => { + const failing = failingProvider('vllm-local', 'vLLM startup timed out'); + const working = mockProvider('anthropic', 'anthropic response'); + const registry = new ProviderRegistry(); + registry.register(failing); + registry.register(working); + registry.assignTier('vllm-local', 'fast'); + registry.assignTier('anthropic', 'fast'); + + const adapter = new LLMProviderAdapter(registry); + const result = await adapter.complete('test'); + + expect(result).toBe('anthropic response'); + expect(failing.complete).toHaveBeenCalledOnce(); + expect(working.complete).toHaveBeenCalledOnce(); + }); + + it('falls back cross-tier when all fast providers fail', async () => { + const fastFail = failingProvider('vllm-local'); + const heavy = mockProvider('gemini', 'gemini response'); + const registry = new ProviderRegistry(); + registry.register(fastFail); + registry.register(heavy); + registry.assignTier('vllm-local', 'fast'); + registry.assignTier('gemini', 'heavy'); + + const adapter = new LLMProviderAdapter(registry); + const result = await adapter.complete('test'); + + expect(result).toBe('gemini response'); + expect(fastFail.complete).toHaveBeenCalledOnce(); + expect(heavy.complete).toHaveBeenCalledOnce(); + }); + + it('throws last error when all providers fail', async () => { + const fail1 = failingProvider('vllm', 'vLLM down'); + const fail2 = failingProvider('anthropic', 'rate limited'); + const registry = new ProviderRegistry(); + registry.register(fail1); + registry.register(fail2); + registry.assignTier('vllm', 'fast'); + registry.assignTier('anthropic', 'heavy'); + + const adapter = new LLMProviderAdapter(registry); + await expect(adapter.complete('test')).rejects.toThrow('rate limited'); + expect(fail1.complete).toHaveBeenCalledOnce(); + expect(fail2.complete).toHaveBeenCalledOnce(); + }); + + it('does not retry provider that already succeeded', async () => { + const fast = mockProvider('fast-provider', 'fast result'); + const heavy = mockProvider('heavy-provider', 'heavy result'); + const registry = new ProviderRegistry(); + registry.register(fast); + registry.register(heavy); + registry.assignTier('fast-provider', 'fast'); + registry.assignTier('heavy-provider', 'heavy'); + + const adapter = new LLMProviderAdapter(registry); + const result = await adapter.complete('test'); + + expect(result).toBe('fast result'); + expect(fast.complete).toHaveBeenCalledOnce(); + expect(heavy.complete).not.toHaveBeenCalled(); + }); + + it('prefers named provider but falls back on failure', async () => { + const named = failingProvider('preferred', 'preferred down'); + const fallback = mockProvider('fallback', 'fallback response'); + const registry = new ProviderRegistry(); + registry.register(named); + registry.register(fallback); + registry.assignTier('preferred', 'fast'); + registry.assignTier('fallback', 'fast'); + + const adapter = new LLMProviderAdapter(registry, 'preferred'); + const result = await adapter.complete('test'); + + expect(result).toBe('fallback response'); + }); });