From 39df459bb1fa0331f0485d34c87ecb6264c6d556 Mon Sep 17 00:00:00 2001 From: Michal Date: Sat, 18 Apr 2026 17:34:28 +0100 Subject: [PATCH] feat(mcplocal): per-McpToken gate-ungate cache so service tokens survive proxies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the LiteLLM loop: LiteLLM's /mcp/ proxy doesn't propagate the mcp-session-id header, so every tool call from qwen3 landed on a fresh upstream session, which always started gated, so the only visible tool was begin_session — forever. The session-id gate works fine for Claude Code (stdio, long-lived), but breaks through session-stripping proxies. Identity that DOES survive: the McpToken (always in the Authorization header). So now the gate keys its ungate state on both: - sessionId → per-session (unchanged; Claude Code path) - tokenSha → per-token (NEW; service-token path) Flow for an McpToken caller: 1. first begin_session succeeds → session ungated + tokenSha cached 2. next request lands on a new mcp-session-id (proxy stripped it) 3. SessionGate.createSession sees tokenSha, finds active token entry, starts the new session ungated with the prior tags + retrievedPrompts 4. tools/list on the fresh session returns the full upstream set — no more begin_session loop Plumbing: - AuditCollector.getSessionMcpTokenSha(sessionId) exposes the already- tracked principal. - PluginSessionContext gets getMcpTokenSha() so plugins can read the token identity without knowing about the collector. - SessionGate gains (tokenSha?: string) on createSession/ungate, plus isTokenUngated and revokeToken. TTL defaults to 1hr; tunable via MCPLOCAL_TOKEN_UNGATE_TTL_MS env var. - Gate plugin passes ctx.getMcpTokenSha() at every ungate call site (begin_session, gated-intercept, intercept-fallback). Tests: 7 new cases in session-gate.test.ts covering cross-session persistence, token isolation, STDIO-path unchanged, TTL expiry, revokeToken, and the empty-string edge case. 21/21 pass; 690/690 in mcplocal overall. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/mcplocal/src/audit/collector.ts | 5 ++ src/mcplocal/src/gate/session-gate.ts | 87 +++++++++++++++++-- src/mcplocal/src/proxymodel/plugin-context.ts | 12 +++ src/mcplocal/src/proxymodel/plugin.ts | 8 ++ src/mcplocal/src/proxymodel/plugins/gate.ts | 17 ++-- src/mcplocal/src/router.ts | 4 + src/mcplocal/tests/session-gate.test.ts | 72 +++++++++++++++ 7 files changed, 191 insertions(+), 14 deletions(-) diff --git a/src/mcplocal/src/audit/collector.ts b/src/mcplocal/src/audit/collector.ts index a3b8aa8..0cd73c3 100644 --- a/src/mcplocal/src/audit/collector.ts +++ b/src/mcplocal/src/audit/collector.ts @@ -41,6 +41,11 @@ export class AuditCollector { this.sessionPrincipals.set(sessionId, { ...existing, tokenName: token.tokenName, tokenSha: token.tokenSha }); } + /** Look up the McpToken SHA for a session. Returns undefined for non-HTTP-mode sessions. */ + getSessionMcpTokenSha(sessionId: string): string | undefined { + return this.sessionPrincipals.get(sessionId)?.tokenSha; + } + /** Queue an audit event. Auto-fills projectName, userName, tokenName, and tokenSha. */ emit(event: Omit): void { const enriched: AuditEvent = { ...event, projectName: this.projectName }; diff --git a/src/mcplocal/src/gate/session-gate.ts b/src/mcplocal/src/gate/session-gate.ts index 920451a..137516f 100644 --- a/src/mcplocal/src/gate/session-gate.ts +++ b/src/mcplocal/src/gate/session-gate.ts @@ -3,6 +3,21 @@ * * Tracks whether a session has gone through the prompt selection flow. * When gated, only begin_session is accessible. After ungating, all tools work. + * + * Per-token ungate cache: + * When the caller authenticated via an `McpToken` (HTTP-mode service agent), + * we also remember the ungate keyed on the token's SHA. Subsequent sessions + * from the same token automatically start ungated for a TTL window. + * + * Why: LiteLLM and similar MCP-proxying clients don't preserve the + * `mcp-session-id` header across chat completion calls, so every tool call + * lands on a fresh upstream session — which would otherwise be gated anew, + * forcing the agent into a begin_session loop. Keying on the token (which IS + * preserved, because it's in the Authorization header) gives us a stable + * identity that survives stateless proxies. + * + * Claude Code's stdio path keeps its session-id, so this code is a no-op for + * that case (session-id ungate still applies, token ungate is purely additive). */ import type { PromptIndexEntry, TagMatchResult } from './tag-matcher.js'; @@ -14,15 +29,37 @@ export interface SessionState { briefing: string | null; } +interface TokenUngateEntry { + tokenSha: string; + tags: string[]; + ungatedAt: number; + retrievedPrompts: Set; +} + +/** Default TTL for per-token ungate cache (1 hour). Tunable via env for testing. */ +const DEFAULT_TOKEN_UNGATE_TTL_MS = Number(process.env['MCPLOCAL_TOKEN_UNGATE_TTL_MS']) || 60 * 60 * 1000; + export class SessionGate { private sessions = new Map(); + private tokenUngates = new Map(); + private readonly tokenUngateTtlMs: number; - /** Create a new session. Starts gated if the project is gated. */ - createSession(sessionId: string, projectGated: boolean): void { + constructor(tokenUngateTtlMs = DEFAULT_TOKEN_UNGATE_TTL_MS) { + this.tokenUngateTtlMs = tokenUngateTtlMs; + } + + /** + * Create a new session. Starts gated if the project is gated, UNLESS the + * caller's McpToken already ungated within the last TTL window — in which + * case the session inherits the previous tags + retrievedPrompts so the + * agent doesn't get the full gated greeting on every fresh session. + */ + createSession(sessionId: string, projectGated: boolean, tokenSha?: string): void { + const priorEntry = tokenSha ? this.getActiveTokenEntry(tokenSha) : null; this.sessions.set(sessionId, { - gated: projectGated, - tags: [], - retrievedPrompts: new Set(), + gated: projectGated && priorEntry === null, + tags: priorEntry ? [...priorEntry.tags] : [], + retrievedPrompts: priorEntry ? new Set(priorEntry.retrievedPrompts) : new Set(), briefing: null, }); } @@ -37,18 +74,37 @@ export class SessionGate { return this.sessions.get(sessionId)?.gated ?? false; } - /** Ungate a session after prompt selection is complete. */ - ungate(sessionId: string, tags: string[], matchResult: TagMatchResult): void { + /** True when a token has an active (non-expired) ungate entry. */ + isTokenUngated(tokenSha: string): boolean { + return this.getActiveTokenEntry(tokenSha) !== null; + } + + /** + * Ungate a session after prompt selection is complete. + * + * When `tokenSha` is supplied, also remember the ungate keyed on the token + * so future sessions from the same token start ungated (survives proxies + * that drop `mcp-session-id`). + */ + ungate(sessionId: string, tags: string[], matchResult: TagMatchResult, tokenSha?: string): void { const session = this.sessions.get(sessionId); if (!session) return; session.gated = false; session.tags = [...session.tags, ...tags]; - // Track which prompts have been sent for (const p of matchResult.fullContent) { session.retrievedPrompts.add(p.name); } + + if (tokenSha !== undefined && tokenSha !== '') { + this.tokenUngates.set(tokenSha, { + tokenSha, + tags: [...session.tags], + ungatedAt: Date.now(), + retrievedPrompts: new Set(session.retrievedPrompts), + }); + } } /** Record additional prompts retrieved via read_prompts. */ @@ -73,4 +129,19 @@ export class SessionGate { removeSession(sessionId: string): void { this.sessions.delete(sessionId); } + + /** Forget a token's ungate entry (e.g. on revocation signal). */ + revokeToken(tokenSha: string): void { + this.tokenUngates.delete(tokenSha); + } + + private getActiveTokenEntry(tokenSha: string): TokenUngateEntry | null { + const entry = this.tokenUngates.get(tokenSha); + if (!entry) return null; + if (Date.now() - entry.ungatedAt > this.tokenUngateTtlMs) { + this.tokenUngates.delete(tokenSha); + return null; + } + return entry; + } } diff --git a/src/mcplocal/src/proxymodel/plugin-context.ts b/src/mcplocal/src/proxymodel/plugin-context.ts index 39a6fd7..21fbb6e 100644 --- a/src/mcplocal/src/proxymodel/plugin-context.ts +++ b/src/mcplocal/src/proxymodel/plugin-context.ts @@ -25,6 +25,13 @@ export interface PluginContextDeps { queueNotification: (notification: JsonRpcNotification) => void; postToMcpd: (path: string, body: Record) => Promise; auditCollector?: AuditCollector; + /** + * Resolves the principal's McpToken SHA for this session, if the caller + * authenticated via an McpToken. Called lazily so the value reflects the + * session's current state even when the token is attached after the plugin + * context is created. + */ + getMcpTokenSha?: () => string | undefined; } /** @@ -55,6 +62,11 @@ export class PluginContextImpl implements PluginSessionContext { this.deps = deps; } + /** McpToken SHA for the current caller, or undefined for STDIO/session-auth callers. */ + getMcpTokenSha(): string | undefined { + return this.deps.getMcpTokenSha?.(); + } + registerTool(tool: ToolDefinition, handler: VirtualToolHandler): void { this.virtualTools.set(tool.name, { definition: tool, handler }); } diff --git a/src/mcplocal/src/proxymodel/plugin.ts b/src/mcplocal/src/proxymodel/plugin.ts index 9c2fc6b..b2d2f39 100644 --- a/src/mcplocal/src/proxymodel/plugin.ts +++ b/src/mcplocal/src/proxymodel/plugin.ts @@ -50,6 +50,14 @@ export interface PluginSessionContext { // Audit event emission (auto-fills sessionId and projectName) emitAuditEvent(event: Omit): void; + + /** + * McpToken SHA for the current caller, or undefined if the session was + * authenticated via a User session (STDIO/Claude Code path). Plugins can use + * this to key state on the token principal rather than the session-id — + * useful when the session-id doesn't survive a proxy (e.g. LiteLLM). + */ + getMcpTokenSha(): string | undefined; } // ── Virtual Server ────────────────────────────────────────────────── diff --git a/src/mcplocal/src/proxymodel/plugins/gate.ts b/src/mcplocal/src/proxymodel/plugins/gate.ts index 41e1af6..bc51268 100644 --- a/src/mcplocal/src/proxymodel/plugins/gate.ts +++ b/src/mcplocal/src/proxymodel/plugins/gate.ts @@ -40,7 +40,11 @@ export function createGatePlugin(config: GatePluginConfig = {}): ProxyModelPlugi description: 'Gated session flow: begin_session → prompt selection → ungate.', async onSessionCreate(ctx) { - sessionGate.createSession(ctx.sessionId, isGated); + // Pass the caller's McpToken SHA so the gate can honor a cross-session + // ungate cache keyed on the token principal. Fixes the LiteLLM case where + // each tool call lands on a fresh mcp-session-id → would otherwise loop + // on begin_session forever. + sessionGate.createSession(ctx.sessionId, isGated, ctx.getMcpTokenSha()); // Register begin_session virtual tool ctx.registerTool(getBeginSessionTool(llmSelector), async (args, callCtx) => { @@ -264,8 +268,9 @@ async function handleBeginSession( matchResult = tagMatcher.match(tags, promptIndex); } - // Ungate the session - sessionGate.ungate(ctx.sessionId, tags, matchResult); + // Ungate the session (and remember the ungate per McpToken if this is a + // service-token request, so the next session from the same token skips the gate). + sessionGate.ungate(ctx.sessionId, tags, matchResult, ctx.getMcpTokenSha()); ctx.queueNotification('notifications/tools/list_changed'); // Audit: gate_decision for begin_session @@ -451,8 +456,8 @@ async function handleGatedIntercept( const promptIndex = await ctx.fetchPromptIndex(); const matchResult = tagMatcher.match(tags, promptIndex); - // Ungate the session - sessionGate.ungate(ctx.sessionId, tags, matchResult); + // Ungate the session (and remember per-token if the caller is a McpToken). + sessionGate.ungate(ctx.sessionId, tags, matchResult, ctx.getMcpTokenSha()); ctx.queueNotification('notifications/tools/list_changed'); // Audit: gate_decision for auto-intercept @@ -522,7 +527,7 @@ async function handleGatedIntercept( return response; } catch { // If prompt retrieval fails, just ungate and route normally - sessionGate.ungate(ctx.sessionId, tags, { fullContent: [], indexOnly: [], remaining: [] }); + sessionGate.ungate(ctx.sessionId, tags, { fullContent: [], indexOnly: [], remaining: [] }, ctx.getMcpTokenSha()); ctx.queueNotification('notifications/tools/list_changed'); return ctx.routeToUpstream(request); } diff --git a/src/mcplocal/src/router.ts b/src/mcplocal/src/router.ts index e5c80d0..6e8f365 100644 --- a/src/mcplocal/src/router.ts +++ b/src/mcplocal/src/router.ts @@ -198,6 +198,10 @@ export class McpRouter { return this.mcpdClient.post(path, body); }, ...(this.auditCollector ? { auditCollector: this.auditCollector } : {}), + // Lazily resolve the caller's McpToken SHA via the audit collector's + // session principal map. The token is attached in onsessioninitialized, + // which runs before any plugin context is created, so this is stable. + getMcpTokenSha: () => this.auditCollector?.getSessionMcpTokenSha(sessionId), }; ctx = new PluginContextImpl(deps); diff --git a/src/mcplocal/tests/session-gate.test.ts b/src/mcplocal/tests/session-gate.test.ts index a086df1..b604899 100644 --- a/src/mcplocal/tests/session-gate.test.ts +++ b/src/mcplocal/tests/session-gate.test.ts @@ -152,4 +152,76 @@ describe('SessionGate', () => { expect(gate.isGated('s1')).toBe(false); expect(gate.getSession('s2')!.tags).toEqual([]); // s2 untouched }); + + describe('per-McpToken ungate cache', () => { + it('new session from an already-ungated token starts ungated, with prior tags + prompts', () => { + const gate = new SessionGate(); + gate.createSession('session-1', true, 'tokA'); + expect(gate.isGated('session-1')).toBe(true); + + gate.ungate('session-1', ['ops'], makeMatchResult(['runbook']), 'tokA'); + expect(gate.isTokenUngated('tokA')).toBe(true); + + // LiteLLM semantics: same token, brand-new session-id. + gate.createSession('session-2', true, 'tokA'); + expect(gate.isGated('session-2')).toBe(false); + const s2 = gate.getSession('session-2')!; + expect(s2.tags).toContain('ops'); + expect(s2.retrievedPrompts.has('runbook')).toBe(true); + }); + + it('does not persist across tokens', () => { + const gate = new SessionGate(); + gate.createSession('s1', true, 'tokA'); + gate.ungate('s1', ['ops'], makeMatchResult(['p']), 'tokA'); + + // Different token → fresh gated session. + gate.createSession('s2', true, 'tokB'); + expect(gate.isGated('s2')).toBe(true); + expect(gate.isTokenUngated('tokB')).toBe(false); + }); + + it('is not triggered when no tokenSha is supplied (STDIO path)', () => { + const gate = new SessionGate(); + gate.createSession('s1', true); + gate.ungate('s1', ['ops'], makeMatchResult(['p'])); + + // A second session with no token starts gated — STDIO semantics preserved. + gate.createSession('s2', true); + expect(gate.isGated('s2')).toBe(true); + }); + + it('honors the TTL window and expires', () => { + const gate = new SessionGate(50); // 50ms TTL for the test + gate.createSession('s1', true, 'tokA'); + gate.ungate('s1', ['ops'], makeMatchResult(['p']), 'tokA'); + expect(gate.isTokenUngated('tokA')).toBe(true); + + return new Promise((resolve) => setTimeout(() => { + expect(gate.isTokenUngated('tokA')).toBe(false); + gate.createSession('s2', true, 'tokA'); + expect(gate.isGated('s2')).toBe(true); + resolve(); + }, 70)); + }); + + it('revokeToken clears the ungate entry immediately', () => { + const gate = new SessionGate(); + gate.createSession('s1', true, 'tokA'); + gate.ungate('s1', ['ops'], makeMatchResult(['p']), 'tokA'); + expect(gate.isTokenUngated('tokA')).toBe(true); + + gate.revokeToken('tokA'); + expect(gate.isTokenUngated('tokA')).toBe(false); + gate.createSession('s2', true, 'tokA'); + expect(gate.isGated('s2')).toBe(true); + }); + + it('empty-string tokenSha does not register an ungate entry', () => { + const gate = new SessionGate(); + gate.createSession('s1', true, ''); + gate.ungate('s1', ['ops'], makeMatchResult(['p']), ''); + expect(gate.isTokenUngated('')).toBe(false); + }); + }); });