feat: LLM provider failover in proxymodel adapter
LLMProviderAdapter now tries all registered providers before giving up: 1. Named provider (if specified) 2. All 'fast' tier providers in order 3. All 'heavy' tier providers in order 4. Legacy active provider Previously, if the first provider (e.g., vllm-local) failed, the adapter threw immediately even though Anthropic and Gemini were available. Now it logs the failure and tries the next candidate. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,8 +1,12 @@
|
|||||||
/**
|
/**
|
||||||
* Adapts the internal ProviderRegistry into the public LLMProvider interface
|
* Adapts the internal ProviderRegistry into the public LLMProvider interface
|
||||||
* that stages use via ctx.llm.
|
* that stages use via ctx.llm.
|
||||||
|
*
|
||||||
|
* Implements tier-aware failover: if the primary provider fails, tries
|
||||||
|
* remaining providers in the same tier, then cross-tier, then legacy active.
|
||||||
*/
|
*/
|
||||||
import type { ProviderRegistry } from '../providers/registry.js';
|
import type { ProviderRegistry } from '../providers/registry.js';
|
||||||
|
import type { LlmProvider, CompletionOptions } from '../providers/types.js';
|
||||||
import type { LLMProvider, LLMCompleteOptions } from './types.js';
|
import type { LLMProvider, LLMCompleteOptions } from './types.js';
|
||||||
|
|
||||||
export class LLMProviderAdapter implements LLMProvider {
|
export class LLMProviderAdapter implements LLMProvider {
|
||||||
@@ -13,24 +17,81 @@ export class LLMProviderAdapter implements LLMProvider {
|
|||||||
) {}
|
) {}
|
||||||
|
|
||||||
async complete(prompt: string, options?: LLMCompleteOptions): Promise<string> {
|
async complete(prompt: string, options?: LLMCompleteOptions): Promise<string> {
|
||||||
let provider;
|
const candidates = this.getCandidates();
|
||||||
if (this.providerName) {
|
if (candidates.length === 0) {
|
||||||
provider = this.registry.get(this.providerName) ?? null;
|
|
||||||
}
|
|
||||||
if (!provider) {
|
|
||||||
provider = this.registry.getProvider('fast');
|
|
||||||
}
|
|
||||||
if (!provider) {
|
|
||||||
throw new Error('No LLM provider available');
|
throw new Error('No LLM provider available');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const opts = this.buildOpts(prompt, options);
|
||||||
|
let lastError: Error | null = null;
|
||||||
|
|
||||||
|
for (const provider of candidates) {
|
||||||
|
try {
|
||||||
|
const result = await provider.complete(opts);
|
||||||
|
return result.content;
|
||||||
|
} catch (err) {
|
||||||
|
lastError = err as Error;
|
||||||
|
process.stderr.write(
|
||||||
|
`[llm-adapter] ${provider.name} failed, trying next: ${lastError.message}\n`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw lastError ?? new Error('All LLM providers failed');
|
||||||
|
}
|
||||||
|
|
||||||
|
available(): boolean {
|
||||||
|
return this.getCandidates().length > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build an ordered list of providers to try:
|
||||||
|
* 1. Named provider (if specified)
|
||||||
|
* 2. All 'fast' tier providers
|
||||||
|
* 3. All 'heavy' tier providers
|
||||||
|
* 4. Active provider (legacy fallback)
|
||||||
|
* Deduplicates by name.
|
||||||
|
*/
|
||||||
|
private getCandidates(): LlmProvider[] {
|
||||||
|
const seen = new Set<string>();
|
||||||
|
const candidates: LlmProvider[] = [];
|
||||||
|
|
||||||
|
const add = (p: LlmProvider | null | undefined) => {
|
||||||
|
if (p && !seen.has(p.name)) {
|
||||||
|
seen.add(p.name);
|
||||||
|
candidates.push(p);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Preferred provider first
|
||||||
|
if (this.providerName) {
|
||||||
|
add(this.registry.get(this.providerName));
|
||||||
|
}
|
||||||
|
|
||||||
|
// All fast tier providers
|
||||||
|
for (const name of this.registry.getTierProviders('fast')) {
|
||||||
|
add(this.registry.get(name));
|
||||||
|
}
|
||||||
|
|
||||||
|
// All heavy tier providers
|
||||||
|
for (const name of this.registry.getTierProviders('heavy')) {
|
||||||
|
add(this.registry.get(name));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Legacy fallback
|
||||||
|
add(this.registry.getActive());
|
||||||
|
|
||||||
|
return candidates;
|
||||||
|
}
|
||||||
|
|
||||||
|
private buildOpts(prompt: string, options?: LLMCompleteOptions): CompletionOptions {
|
||||||
const messages = [];
|
const messages = [];
|
||||||
if (options?.system) {
|
if (options?.system) {
|
||||||
messages.push({ role: 'system' as const, content: options.system });
|
messages.push({ role: 'system' as const, content: options.system });
|
||||||
}
|
}
|
||||||
messages.push({ role: 'user' as const, content: prompt });
|
messages.push({ role: 'user' as const, content: prompt });
|
||||||
|
|
||||||
const opts: Parameters<typeof provider.complete>[0] = {
|
const opts: CompletionOptions = {
|
||||||
messages,
|
messages,
|
||||||
temperature: 0,
|
temperature: 0,
|
||||||
};
|
};
|
||||||
@@ -40,15 +101,6 @@ export class LLMProviderAdapter implements LLMProvider {
|
|||||||
if (options?.maxTokens !== undefined) {
|
if (options?.maxTokens !== undefined) {
|
||||||
opts.maxTokens = options.maxTokens;
|
opts.maxTokens = options.maxTokens;
|
||||||
}
|
}
|
||||||
const result = await provider.complete(opts);
|
return opts;
|
||||||
|
|
||||||
return result.content;
|
|
||||||
}
|
|
||||||
|
|
||||||
available(): boolean {
|
|
||||||
if (this.providerName) {
|
|
||||||
return this.registry.get(this.providerName) !== undefined;
|
|
||||||
}
|
|
||||||
return this.registry.getProvider('fast') !== null;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,6 +17,15 @@ function mockProvider(name: string, response = 'mock response'): LlmProvider {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function failingProvider(name: string, error = 'connection refused'): LlmProvider {
|
||||||
|
return {
|
||||||
|
name,
|
||||||
|
complete: vi.fn().mockRejectedValue(new Error(error)),
|
||||||
|
listModels: vi.fn().mockResolvedValue([]),
|
||||||
|
isAvailable: vi.fn().mockResolvedValue(true),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
describe('LLMProviderAdapter', () => {
|
describe('LLMProviderAdapter', () => {
|
||||||
it('available() returns true when a provider is registered', () => {
|
it('available() returns true when a provider is registered', () => {
|
||||||
const registry = new ProviderRegistry();
|
const registry = new ProviderRegistry();
|
||||||
@@ -45,7 +54,6 @@ describe('LLMProviderAdapter', () => {
|
|||||||
expect(result).toBe('mock response');
|
expect(result).toBe('mock response');
|
||||||
expect(provider.complete).toHaveBeenCalledWith({
|
expect(provider.complete).toHaveBeenCalledWith({
|
||||||
messages: [{ role: 'user', content: 'summarize this' }],
|
messages: [{ role: 'user', content: 'summarize this' }],
|
||||||
maxTokens: undefined,
|
|
||||||
temperature: 0,
|
temperature: 0,
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
@@ -75,4 +83,87 @@ describe('LLMProviderAdapter', () => {
|
|||||||
|
|
||||||
await expect(adapter.complete('test')).rejects.toThrow('No LLM provider available');
|
await expect(adapter.complete('test')).rejects.toThrow('No LLM provider available');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// --- Failover tests ---
|
||||||
|
|
||||||
|
it('falls back to next provider in same tier on failure', async () => {
|
||||||
|
const failing = failingProvider('vllm-local', 'vLLM startup timed out');
|
||||||
|
const working = mockProvider('anthropic', 'anthropic response');
|
||||||
|
const registry = new ProviderRegistry();
|
||||||
|
registry.register(failing);
|
||||||
|
registry.register(working);
|
||||||
|
registry.assignTier('vllm-local', 'fast');
|
||||||
|
registry.assignTier('anthropic', 'fast');
|
||||||
|
|
||||||
|
const adapter = new LLMProviderAdapter(registry);
|
||||||
|
const result = await adapter.complete('test');
|
||||||
|
|
||||||
|
expect(result).toBe('anthropic response');
|
||||||
|
expect(failing.complete).toHaveBeenCalledOnce();
|
||||||
|
expect(working.complete).toHaveBeenCalledOnce();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('falls back cross-tier when all fast providers fail', async () => {
|
||||||
|
const fastFail = failingProvider('vllm-local');
|
||||||
|
const heavy = mockProvider('gemini', 'gemini response');
|
||||||
|
const registry = new ProviderRegistry();
|
||||||
|
registry.register(fastFail);
|
||||||
|
registry.register(heavy);
|
||||||
|
registry.assignTier('vllm-local', 'fast');
|
||||||
|
registry.assignTier('gemini', 'heavy');
|
||||||
|
|
||||||
|
const adapter = new LLMProviderAdapter(registry);
|
||||||
|
const result = await adapter.complete('test');
|
||||||
|
|
||||||
|
expect(result).toBe('gemini response');
|
||||||
|
expect(fastFail.complete).toHaveBeenCalledOnce();
|
||||||
|
expect(heavy.complete).toHaveBeenCalledOnce();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('throws last error when all providers fail', async () => {
|
||||||
|
const fail1 = failingProvider('vllm', 'vLLM down');
|
||||||
|
const fail2 = failingProvider('anthropic', 'rate limited');
|
||||||
|
const registry = new ProviderRegistry();
|
||||||
|
registry.register(fail1);
|
||||||
|
registry.register(fail2);
|
||||||
|
registry.assignTier('vllm', 'fast');
|
||||||
|
registry.assignTier('anthropic', 'heavy');
|
||||||
|
|
||||||
|
const adapter = new LLMProviderAdapter(registry);
|
||||||
|
await expect(adapter.complete('test')).rejects.toThrow('rate limited');
|
||||||
|
expect(fail1.complete).toHaveBeenCalledOnce();
|
||||||
|
expect(fail2.complete).toHaveBeenCalledOnce();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does not retry provider that already succeeded', async () => {
|
||||||
|
const fast = mockProvider('fast-provider', 'fast result');
|
||||||
|
const heavy = mockProvider('heavy-provider', 'heavy result');
|
||||||
|
const registry = new ProviderRegistry();
|
||||||
|
registry.register(fast);
|
||||||
|
registry.register(heavy);
|
||||||
|
registry.assignTier('fast-provider', 'fast');
|
||||||
|
registry.assignTier('heavy-provider', 'heavy');
|
||||||
|
|
||||||
|
const adapter = new LLMProviderAdapter(registry);
|
||||||
|
const result = await adapter.complete('test');
|
||||||
|
|
||||||
|
expect(result).toBe('fast result');
|
||||||
|
expect(fast.complete).toHaveBeenCalledOnce();
|
||||||
|
expect(heavy.complete).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('prefers named provider but falls back on failure', async () => {
|
||||||
|
const named = failingProvider('preferred', 'preferred down');
|
||||||
|
const fallback = mockProvider('fallback', 'fallback response');
|
||||||
|
const registry = new ProviderRegistry();
|
||||||
|
registry.register(named);
|
||||||
|
registry.register(fallback);
|
||||||
|
registry.assignTier('preferred', 'fast');
|
||||||
|
registry.assignTier('fallback', 'fast');
|
||||||
|
|
||||||
|
const adapter = new LLMProviderAdapter(registry, 'preferred');
|
||||||
|
const result = await adapter.complete('test');
|
||||||
|
|
||||||
|
expect(result).toBe('fallback response');
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user