feat: automatic reconciliation loop for MCP server instances
mcpd now runs a periodic reconcileAll() every 30s that: - Detects crashed/missing containers (syncStatus) - Cleans up ERROR instances - Creates replacement pods to match desired replica count This replaces the old syncStatus-only timer. Servers migrated from another deployment or recovering from node failures will automatically get their instances recreated. 6 new tests for reconcileAll covering: missing instances, skip replicas=0, already-at-count, ERROR cleanup, multi-server, error isolation. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -294,4 +294,99 @@ describe('InstanceService', () => {
|
||||
expect(result.stdout).toBe('log output');
|
||||
});
|
||||
});
|
||||
|
||||
describe('reconcileAll', () => {
|
||||
it('creates missing instances for servers with replicas > 0', async () => {
|
||||
const server = makeServer({ id: 'srv-1', name: 'grafana', replicas: 1 });
|
||||
vi.mocked(serverRepo.findAll).mockResolvedValue([server]);
|
||||
vi.mocked(serverRepo.findById).mockResolvedValue(server);
|
||||
// No instances exist
|
||||
vi.mocked(instanceRepo.findAll).mockResolvedValue([]);
|
||||
|
||||
const result = await service.reconcileAll();
|
||||
|
||||
expect(result.reconciled).toBe(1);
|
||||
expect(result.errors).toHaveLength(0);
|
||||
expect(instanceRepo.create).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('skips servers with replicas = 0', async () => {
|
||||
const server = makeServer({ id: 'srv-1', replicas: 0 });
|
||||
vi.mocked(serverRepo.findAll).mockResolvedValue([server]);
|
||||
vi.mocked(instanceRepo.findAll).mockResolvedValue([]);
|
||||
|
||||
const result = await service.reconcileAll();
|
||||
|
||||
expect(result.reconciled).toBe(0);
|
||||
expect(instanceRepo.create).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('does not create instances when already at desired count', async () => {
|
||||
const server = makeServer({ id: 'srv-1', replicas: 1 });
|
||||
vi.mocked(serverRepo.findAll).mockResolvedValue([server]);
|
||||
vi.mocked(instanceRepo.findAll).mockResolvedValue([
|
||||
makeInstance({ id: 'inst-1', serverId: 'srv-1', status: 'RUNNING' }),
|
||||
]);
|
||||
|
||||
const result = await service.reconcileAll();
|
||||
|
||||
expect(result.reconciled).toBe(0);
|
||||
expect(instanceRepo.create).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('cleans up ERROR instances and creates replacements', async () => {
|
||||
const server = makeServer({ id: 'srv-1', replicas: 1 });
|
||||
vi.mocked(serverRepo.findAll).mockResolvedValue([server]);
|
||||
vi.mocked(serverRepo.findById).mockResolvedValue(server);
|
||||
vi.mocked(instanceRepo.findAll).mockResolvedValue([
|
||||
makeInstance({ id: 'inst-dead', serverId: 'srv-1', status: 'ERROR', containerId: 'ctr-dead' }),
|
||||
]);
|
||||
|
||||
const result = await service.reconcileAll();
|
||||
|
||||
// Should delete ERROR instance and create a new one
|
||||
expect(result.reconciled).toBe(1);
|
||||
expect(instanceRepo.delete).toHaveBeenCalledWith('inst-dead');
|
||||
expect(instanceRepo.create).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('reconciles multiple servers independently', async () => {
|
||||
const srv1 = makeServer({ id: 'srv-1', name: 'grafana', replicas: 1, dockerImage: 'grafana:latest' });
|
||||
const srv2 = makeServer({ id: 'srv-2', name: 'node-red', replicas: 1, dockerImage: 'nodered:latest' });
|
||||
vi.mocked(serverRepo.findAll).mockResolvedValue([srv1, srv2]);
|
||||
vi.mocked(serverRepo.findById).mockImplementation(async (id) => {
|
||||
if (id === 'srv-1') return srv1;
|
||||
if (id === 'srv-2') return srv2;
|
||||
return null;
|
||||
});
|
||||
// srv-1 has a running instance, srv-2 has none
|
||||
vi.mocked(instanceRepo.findAll).mockImplementation(async (serverId) => {
|
||||
if (serverId === 'srv-1') return [makeInstance({ serverId: 'srv-1', status: 'RUNNING' })];
|
||||
return [];
|
||||
});
|
||||
|
||||
const result = await service.reconcileAll();
|
||||
|
||||
// Only srv-2 needed reconciliation
|
||||
expect(result.reconciled).toBe(1);
|
||||
});
|
||||
|
||||
it('collects errors without stopping other servers', async () => {
|
||||
const srv1 = makeServer({ id: 'srv-1', name: 'broken', replicas: 1 });
|
||||
const srv2 = makeServer({ id: 'srv-2', name: 'healthy', replicas: 1, dockerImage: 'img:latest' });
|
||||
vi.mocked(serverRepo.findAll).mockResolvedValue([srv1, srv2]);
|
||||
vi.mocked(serverRepo.findById).mockImplementation(async (id) => {
|
||||
if (id === 'srv-2') return srv2;
|
||||
return null; // srv-1 can't be found → will error
|
||||
});
|
||||
vi.mocked(instanceRepo.findAll).mockResolvedValue([]);
|
||||
|
||||
const result = await service.reconcileAll();
|
||||
|
||||
// srv-1 errored, srv-2 reconciled
|
||||
expect(result.errors).toHaveLength(1);
|
||||
expect(result.errors[0]).toContain('broken');
|
||||
expect(result.reconciled).toBe(1);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user