fix: MCP proxy resilience — discovery cache, default liveness probes
Some checks failed
CI/CD / lint (push) Successful in 52s
CI/CD / typecheck (push) Successful in 1m51s
CI/CD / test (push) Successful in 1m1s
CI/CD / smoke (push) Failing after 3m21s
CI/CD / build (push) Successful in 4m9s
CI/CD / publish (push) Has been skipped

Adds a per-server tools/list cache in McpRouter (positive + negative TTL)
so a slow or dead upstream only stalls the first discovery call, not every
subsequent client request. Invalidated on upstream add/remove.

Health probes now apply a default liveness spec (tools/list via the real
production path) to any RUNNING instance without an explicit healthCheck,
so synthetic and real failures converge on the same signal.

Includes supporting updates in mcpd-client, discovery, upstream/mcpd,
seeder, and fulldeploy/release scripts.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Michal
2026-04-17 00:48:57 +01:00
parent c968d76e00
commit 3149ea3ae7
15 changed files with 499 additions and 32 deletions

View File

@@ -3,7 +3,7 @@ import { refreshUpstreams } from '../src/discovery.js';
import { McpRouter } from '../src/router.js';
function mockMcpdClient(servers: Array<{ id: string; name: string; transport: string }>) {
return {
const client = {
baseUrl: 'http://test:3100',
token: 'test-token',
get: vi.fn(async () => servers),
@@ -11,7 +11,10 @@ function mockMcpdClient(servers: Array<{ id: string; name: string; transport: st
put: vi.fn(),
delete: vi.fn(),
forward: vi.fn(),
withTimeout: vi.fn(() => client),
withHeaders: vi.fn(() => client),
};
return client;
}
describe('refreshUpstreams', () => {

View File

@@ -107,4 +107,38 @@ describe('McpdUpstream', () => {
const response = await upstream.send(request);
expect(response.error).toEqual({ code: -32601, message: 'Tool not found' });
});
it('routes */list methods through discoveryClient when provided', async () => {
const mainClient = mockMcpdClient();
const discoveryClient = mockMcpdClient(new Map([
['srv-1:tools/list', { result: { tools: [] } }],
['srv-1:resources/list', { result: { resources: [] } }],
['srv-1:prompts/list', { result: { prompts: [] } }],
]));
const upstream = new McpdUpstream('srv-1', 'slack', mainClient as any, undefined, discoveryClient as any);
await upstream.send({ jsonrpc: '2.0', id: '1', method: 'tools/list' });
await upstream.send({ jsonrpc: '2.0', id: '2', method: 'resources/list' });
await upstream.send({ jsonrpc: '2.0', id: '3', method: 'prompts/list' });
expect(discoveryClient.post).toHaveBeenCalledTimes(3);
expect(mainClient.post).not.toHaveBeenCalled();
});
it('routes tools/call through mainClient even when discoveryClient is set', async () => {
const mainClient = mockMcpdClient(new Map([
['srv-1:tools/call', { result: { ok: true } }],
]));
const discoveryClient = mockMcpdClient();
const upstream = new McpdUpstream('srv-1', 'slack', mainClient as any, undefined, discoveryClient as any);
await upstream.send({
jsonrpc: '2.0', id: '1', method: 'tools/call',
params: { name: 'noop', arguments: {} },
});
expect(mainClient.post).toHaveBeenCalledTimes(1);
expect(discoveryClient.post).not.toHaveBeenCalled();
});
});

View File

@@ -3,7 +3,7 @@ import { refreshProjectUpstreams } from '../src/discovery.js';
import { McpRouter } from '../src/router.js';
function mockMcpdClient(servers: Array<{ id: string; name: string; transport: string }>) {
return {
const client = {
baseUrl: 'http://test:3100',
token: 'test-token',
get: vi.fn(async () => servers),
@@ -11,7 +11,10 @@ function mockMcpdClient(servers: Array<{ id: string; name: string; transport: st
put: vi.fn(),
delete: vi.fn(),
forward: vi.fn(async () => ({ status: 200, body: servers })),
withTimeout: vi.fn(() => client),
withHeaders: vi.fn(() => client),
};
return client;
}
describe('refreshProjectUpstreams', () => {

View File

@@ -0,0 +1,137 @@
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import { McpRouter } from '../src/router.js';
import type { UpstreamConnection, JsonRpcRequest, JsonRpcResponse } from '../src/types.js';
function mockUpstream(name: string, opts: { tools?: Array<{ name: string }>; resources?: Array<{ uri: string }>; err?: string } = {}): UpstreamConnection {
return {
name,
isAlive: () => true,
close: async () => {},
send: vi.fn(async (req: JsonRpcRequest): Promise<JsonRpcResponse> => {
if (opts.err) {
return { jsonrpc: '2.0', id: req.id, error: { code: -32603, message: opts.err } };
}
if (req.method === 'tools/list') {
return { jsonrpc: '2.0', id: req.id, result: { tools: opts.tools ?? [] } };
}
if (req.method === 'resources/list') {
return { jsonrpc: '2.0', id: req.id, result: { resources: opts.resources ?? [] } };
}
return { jsonrpc: '2.0', id: req.id, error: { code: -32601, message: 'not handled' } };
}),
} as UpstreamConnection;
}
describe('McpRouter discovery cache', () => {
let router: McpRouter;
beforeEach(() => {
router = new McpRouter();
vi.useFakeTimers();
vi.setSystemTime(new Date('2026-04-15T12:00:00Z'));
});
afterEach(() => {
vi.useRealTimers();
});
it('serves tools/list from cache on the second call within TTL', async () => {
const upstream = mockUpstream('slack', { tools: [{ name: 'search' }] });
router.addUpstream(upstream);
await router.discoverTools();
await router.discoverTools();
expect(upstream.send).toHaveBeenCalledTimes(1);
});
it('re-fetches after positive TTL expires', async () => {
const upstream = mockUpstream('slack', { tools: [{ name: 'search' }] });
router.addUpstream(upstream);
await router.discoverTools();
vi.advanceTimersByTime(31_000);
await router.discoverTools();
expect(upstream.send).toHaveBeenCalledTimes(2);
});
it('negative cache prevents repeated calls to a failing upstream', async () => {
const upstream = mockUpstream('broken', { err: 'mcpd proxy error: timeout' });
router.addUpstream(upstream);
await router.discoverTools();
await router.discoverTools();
await router.discoverTools();
expect(upstream.send).toHaveBeenCalledTimes(1);
});
it('negative cache expires after negative TTL', async () => {
const upstream = mockUpstream('broken', { err: 'mcpd proxy error: timeout' });
router.addUpstream(upstream);
await router.discoverTools();
vi.advanceTimersByTime(31_000);
await router.discoverTools();
expect(upstream.send).toHaveBeenCalledTimes(2);
});
it('re-registering a server invalidates its cache entry', async () => {
const upstream1 = mockUpstream('slack', { tools: [{ name: 'v1' }] });
router.addUpstream(upstream1);
await router.discoverTools();
expect(upstream1.send).toHaveBeenCalledTimes(1);
const upstream2 = mockUpstream('slack', { tools: [{ name: 'v2' }] });
router.addUpstream(upstream2);
const tools = await router.discoverTools();
expect(upstream2.send).toHaveBeenCalledTimes(1);
expect(tools.map((t) => t.name)).toEqual(['slack/v2']);
});
it('removeUpstream clears cache so follow-up add re-fetches', async () => {
const upstream1 = mockUpstream('slack', { tools: [{ name: 'v1' }] });
router.addUpstream(upstream1);
await router.discoverTools();
router.removeUpstream('slack');
const upstream2 = mockUpstream('slack', { tools: [{ name: 'v2' }] });
router.addUpstream(upstream2);
await router.discoverTools();
expect(upstream2.send).toHaveBeenCalledTimes(1);
});
it('one dead server does not block cached results for others', async () => {
const broken = mockUpstream('broken', { err: 'timeout' });
const healthy = mockUpstream('healthy', { tools: [{ name: 'ping' }] });
router.addUpstream(broken);
router.addUpstream(healthy);
const first = await router.discoverTools();
expect(first.map((t) => t.name)).toEqual(['healthy/ping']);
// Second call: both come from cache.
const second = await router.discoverTools();
expect(second.map((t) => t.name)).toEqual(['healthy/ping']);
expect(broken.send).toHaveBeenCalledTimes(1);
expect(healthy.send).toHaveBeenCalledTimes(1);
});
it('discoverResources uses its own cache key independent of tools/list', async () => {
const upstream = mockUpstream('docs', { tools: [{ name: 'search' }], resources: [{ uri: 'doc://1' }] });
router.addUpstream(upstream);
await router.discoverTools();
await router.discoverResources();
await router.discoverTools();
await router.discoverResources();
// Each method cached separately → exactly one call per method.
expect(upstream.send).toHaveBeenCalledTimes(2);
});
});