feat(mcplocal): per-McpToken gate-ungate cache so service tokens survive proxies
All checks were successful
CI/CD / lint (pull_request) Successful in 1m0s
CI/CD / typecheck (pull_request) Successful in 1m51s
CI/CD / test (pull_request) Successful in 1m3s
CI/CD / build (pull_request) Successful in 2m13s
CI/CD / smoke (pull_request) Successful in 4m49s
CI/CD / publish (pull_request) Has been skipped
All checks were successful
CI/CD / lint (pull_request) Successful in 1m0s
CI/CD / typecheck (pull_request) Successful in 1m51s
CI/CD / test (pull_request) Successful in 1m3s
CI/CD / build (pull_request) Successful in 2m13s
CI/CD / smoke (pull_request) Successful in 4m49s
CI/CD / publish (pull_request) Has been skipped
Fixes the LiteLLM loop: LiteLLM's /mcp/ proxy doesn't propagate the
mcp-session-id header, so every tool call from qwen3 landed on a fresh
upstream session, which always started gated, so the only visible tool
was begin_session — forever.
The session-id gate works fine for Claude Code (stdio, long-lived), but
breaks through session-stripping proxies. Identity that DOES survive:
the McpToken (always in the Authorization header). So now the gate
keys its ungate state on both:
- sessionId → per-session (unchanged; Claude Code path)
- tokenSha → per-token (NEW; service-token path)
Flow for an McpToken caller:
1. first begin_session succeeds → session ungated + tokenSha cached
2. next request lands on a new mcp-session-id (proxy stripped it)
3. SessionGate.createSession sees tokenSha, finds active token entry,
starts the new session ungated with the prior tags + retrievedPrompts
4. tools/list on the fresh session returns the full upstream set — no
more begin_session loop
Plumbing:
- AuditCollector.getSessionMcpTokenSha(sessionId) exposes the already-
tracked principal.
- PluginSessionContext gets getMcpTokenSha() so plugins can read the
token identity without knowing about the collector.
- SessionGate gains (tokenSha?: string) on createSession/ungate, plus
isTokenUngated and revokeToken. TTL defaults to 1hr; tunable via
MCPLOCAL_TOKEN_UNGATE_TTL_MS env var.
- Gate plugin passes ctx.getMcpTokenSha() at every ungate call site
(begin_session, gated-intercept, intercept-fallback).
Tests: 7 new cases in session-gate.test.ts covering cross-session
persistence, token isolation, STDIO-path unchanged, TTL expiry,
revokeToken, and the empty-string edge case. 21/21 pass; 690/690 in
mcplocal overall.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -41,6 +41,11 @@ export class AuditCollector {
|
||||
this.sessionPrincipals.set(sessionId, { ...existing, tokenName: token.tokenName, tokenSha: token.tokenSha });
|
||||
}
|
||||
|
||||
/** Look up the McpToken SHA for a session. Returns undefined for non-HTTP-mode sessions. */
|
||||
getSessionMcpTokenSha(sessionId: string): string | undefined {
|
||||
return this.sessionPrincipals.get(sessionId)?.tokenSha;
|
||||
}
|
||||
|
||||
/** Queue an audit event. Auto-fills projectName, userName, tokenName, and tokenSha. */
|
||||
emit(event: Omit<AuditEvent, 'projectName'>): void {
|
||||
const enriched: AuditEvent = { ...event, projectName: this.projectName };
|
||||
|
||||
@@ -3,6 +3,21 @@
|
||||
*
|
||||
* Tracks whether a session has gone through the prompt selection flow.
|
||||
* When gated, only begin_session is accessible. After ungating, all tools work.
|
||||
*
|
||||
* Per-token ungate cache:
|
||||
* When the caller authenticated via an `McpToken` (HTTP-mode service agent),
|
||||
* we also remember the ungate keyed on the token's SHA. Subsequent sessions
|
||||
* from the same token automatically start ungated for a TTL window.
|
||||
*
|
||||
* Why: LiteLLM and similar MCP-proxying clients don't preserve the
|
||||
* `mcp-session-id` header across chat completion calls, so every tool call
|
||||
* lands on a fresh upstream session — which would otherwise be gated anew,
|
||||
* forcing the agent into a begin_session loop. Keying on the token (which IS
|
||||
* preserved, because it's in the Authorization header) gives us a stable
|
||||
* identity that survives stateless proxies.
|
||||
*
|
||||
* Claude Code's stdio path keeps its session-id, so this code is a no-op for
|
||||
* that case (session-id ungate still applies, token ungate is purely additive).
|
||||
*/
|
||||
|
||||
import type { PromptIndexEntry, TagMatchResult } from './tag-matcher.js';
|
||||
@@ -14,15 +29,37 @@ export interface SessionState {
|
||||
briefing: string | null;
|
||||
}
|
||||
|
||||
interface TokenUngateEntry {
|
||||
tokenSha: string;
|
||||
tags: string[];
|
||||
ungatedAt: number;
|
||||
retrievedPrompts: Set<string>;
|
||||
}
|
||||
|
||||
/** Default TTL for per-token ungate cache (1 hour). Tunable via env for testing. */
|
||||
const DEFAULT_TOKEN_UNGATE_TTL_MS = Number(process.env['MCPLOCAL_TOKEN_UNGATE_TTL_MS']) || 60 * 60 * 1000;
|
||||
|
||||
export class SessionGate {
|
||||
private sessions = new Map<string, SessionState>();
|
||||
private tokenUngates = new Map<string, TokenUngateEntry>();
|
||||
private readonly tokenUngateTtlMs: number;
|
||||
|
||||
/** Create a new session. Starts gated if the project is gated. */
|
||||
createSession(sessionId: string, projectGated: boolean): void {
|
||||
constructor(tokenUngateTtlMs = DEFAULT_TOKEN_UNGATE_TTL_MS) {
|
||||
this.tokenUngateTtlMs = tokenUngateTtlMs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new session. Starts gated if the project is gated, UNLESS the
|
||||
* caller's McpToken already ungated within the last TTL window — in which
|
||||
* case the session inherits the previous tags + retrievedPrompts so the
|
||||
* agent doesn't get the full gated greeting on every fresh session.
|
||||
*/
|
||||
createSession(sessionId: string, projectGated: boolean, tokenSha?: string): void {
|
||||
const priorEntry = tokenSha ? this.getActiveTokenEntry(tokenSha) : null;
|
||||
this.sessions.set(sessionId, {
|
||||
gated: projectGated,
|
||||
tags: [],
|
||||
retrievedPrompts: new Set(),
|
||||
gated: projectGated && priorEntry === null,
|
||||
tags: priorEntry ? [...priorEntry.tags] : [],
|
||||
retrievedPrompts: priorEntry ? new Set(priorEntry.retrievedPrompts) : new Set(),
|
||||
briefing: null,
|
||||
});
|
||||
}
|
||||
@@ -37,18 +74,37 @@ export class SessionGate {
|
||||
return this.sessions.get(sessionId)?.gated ?? false;
|
||||
}
|
||||
|
||||
/** Ungate a session after prompt selection is complete. */
|
||||
ungate(sessionId: string, tags: string[], matchResult: TagMatchResult): void {
|
||||
/** True when a token has an active (non-expired) ungate entry. */
|
||||
isTokenUngated(tokenSha: string): boolean {
|
||||
return this.getActiveTokenEntry(tokenSha) !== null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Ungate a session after prompt selection is complete.
|
||||
*
|
||||
* When `tokenSha` is supplied, also remember the ungate keyed on the token
|
||||
* so future sessions from the same token start ungated (survives proxies
|
||||
* that drop `mcp-session-id`).
|
||||
*/
|
||||
ungate(sessionId: string, tags: string[], matchResult: TagMatchResult, tokenSha?: string): void {
|
||||
const session = this.sessions.get(sessionId);
|
||||
if (!session) return;
|
||||
|
||||
session.gated = false;
|
||||
session.tags = [...session.tags, ...tags];
|
||||
|
||||
// Track which prompts have been sent
|
||||
for (const p of matchResult.fullContent) {
|
||||
session.retrievedPrompts.add(p.name);
|
||||
}
|
||||
|
||||
if (tokenSha !== undefined && tokenSha !== '') {
|
||||
this.tokenUngates.set(tokenSha, {
|
||||
tokenSha,
|
||||
tags: [...session.tags],
|
||||
ungatedAt: Date.now(),
|
||||
retrievedPrompts: new Set(session.retrievedPrompts),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/** Record additional prompts retrieved via read_prompts. */
|
||||
@@ -73,4 +129,19 @@ export class SessionGate {
|
||||
removeSession(sessionId: string): void {
|
||||
this.sessions.delete(sessionId);
|
||||
}
|
||||
|
||||
/** Forget a token's ungate entry (e.g. on revocation signal). */
|
||||
revokeToken(tokenSha: string): void {
|
||||
this.tokenUngates.delete(tokenSha);
|
||||
}
|
||||
|
||||
private getActiveTokenEntry(tokenSha: string): TokenUngateEntry | null {
|
||||
const entry = this.tokenUngates.get(tokenSha);
|
||||
if (!entry) return null;
|
||||
if (Date.now() - entry.ungatedAt > this.tokenUngateTtlMs) {
|
||||
this.tokenUngates.delete(tokenSha);
|
||||
return null;
|
||||
}
|
||||
return entry;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,6 +25,13 @@ export interface PluginContextDeps {
|
||||
queueNotification: (notification: JsonRpcNotification) => void;
|
||||
postToMcpd: (path: string, body: Record<string, unknown>) => Promise<unknown>;
|
||||
auditCollector?: AuditCollector;
|
||||
/**
|
||||
* Resolves the principal's McpToken SHA for this session, if the caller
|
||||
* authenticated via an McpToken. Called lazily so the value reflects the
|
||||
* session's current state even when the token is attached after the plugin
|
||||
* context is created.
|
||||
*/
|
||||
getMcpTokenSha?: () => string | undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -55,6 +62,11 @@ export class PluginContextImpl implements PluginSessionContext {
|
||||
this.deps = deps;
|
||||
}
|
||||
|
||||
/** McpToken SHA for the current caller, or undefined for STDIO/session-auth callers. */
|
||||
getMcpTokenSha(): string | undefined {
|
||||
return this.deps.getMcpTokenSha?.();
|
||||
}
|
||||
|
||||
registerTool(tool: ToolDefinition, handler: VirtualToolHandler): void {
|
||||
this.virtualTools.set(tool.name, { definition: tool, handler });
|
||||
}
|
||||
|
||||
@@ -50,6 +50,14 @@ export interface PluginSessionContext {
|
||||
|
||||
// Audit event emission (auto-fills sessionId and projectName)
|
||||
emitAuditEvent(event: Omit<AuditEvent, 'sessionId' | 'projectName'>): void;
|
||||
|
||||
/**
|
||||
* McpToken SHA for the current caller, or undefined if the session was
|
||||
* authenticated via a User session (STDIO/Claude Code path). Plugins can use
|
||||
* this to key state on the token principal rather than the session-id —
|
||||
* useful when the session-id doesn't survive a proxy (e.g. LiteLLM).
|
||||
*/
|
||||
getMcpTokenSha(): string | undefined;
|
||||
}
|
||||
|
||||
// ── Virtual Server ──────────────────────────────────────────────────
|
||||
|
||||
@@ -40,7 +40,11 @@ export function createGatePlugin(config: GatePluginConfig = {}): ProxyModelPlugi
|
||||
description: 'Gated session flow: begin_session → prompt selection → ungate.',
|
||||
|
||||
async onSessionCreate(ctx) {
|
||||
sessionGate.createSession(ctx.sessionId, isGated);
|
||||
// Pass the caller's McpToken SHA so the gate can honor a cross-session
|
||||
// ungate cache keyed on the token principal. Fixes the LiteLLM case where
|
||||
// each tool call lands on a fresh mcp-session-id → would otherwise loop
|
||||
// on begin_session forever.
|
||||
sessionGate.createSession(ctx.sessionId, isGated, ctx.getMcpTokenSha());
|
||||
|
||||
// Register begin_session virtual tool
|
||||
ctx.registerTool(getBeginSessionTool(llmSelector), async (args, callCtx) => {
|
||||
@@ -264,8 +268,9 @@ async function handleBeginSession(
|
||||
matchResult = tagMatcher.match(tags, promptIndex);
|
||||
}
|
||||
|
||||
// Ungate the session
|
||||
sessionGate.ungate(ctx.sessionId, tags, matchResult);
|
||||
// Ungate the session (and remember the ungate per McpToken if this is a
|
||||
// service-token request, so the next session from the same token skips the gate).
|
||||
sessionGate.ungate(ctx.sessionId, tags, matchResult, ctx.getMcpTokenSha());
|
||||
ctx.queueNotification('notifications/tools/list_changed');
|
||||
|
||||
// Audit: gate_decision for begin_session
|
||||
@@ -451,8 +456,8 @@ async function handleGatedIntercept(
|
||||
const promptIndex = await ctx.fetchPromptIndex();
|
||||
const matchResult = tagMatcher.match(tags, promptIndex);
|
||||
|
||||
// Ungate the session
|
||||
sessionGate.ungate(ctx.sessionId, tags, matchResult);
|
||||
// Ungate the session (and remember per-token if the caller is a McpToken).
|
||||
sessionGate.ungate(ctx.sessionId, tags, matchResult, ctx.getMcpTokenSha());
|
||||
ctx.queueNotification('notifications/tools/list_changed');
|
||||
|
||||
// Audit: gate_decision for auto-intercept
|
||||
@@ -522,7 +527,7 @@ async function handleGatedIntercept(
|
||||
return response;
|
||||
} catch {
|
||||
// If prompt retrieval fails, just ungate and route normally
|
||||
sessionGate.ungate(ctx.sessionId, tags, { fullContent: [], indexOnly: [], remaining: [] });
|
||||
sessionGate.ungate(ctx.sessionId, tags, { fullContent: [], indexOnly: [], remaining: [] }, ctx.getMcpTokenSha());
|
||||
ctx.queueNotification('notifications/tools/list_changed');
|
||||
return ctx.routeToUpstream(request);
|
||||
}
|
||||
|
||||
@@ -198,6 +198,10 @@ export class McpRouter {
|
||||
return this.mcpdClient.post(path, body);
|
||||
},
|
||||
...(this.auditCollector ? { auditCollector: this.auditCollector } : {}),
|
||||
// Lazily resolve the caller's McpToken SHA via the audit collector's
|
||||
// session principal map. The token is attached in onsessioninitialized,
|
||||
// which runs before any plugin context is created, so this is stable.
|
||||
getMcpTokenSha: () => this.auditCollector?.getSessionMcpTokenSha(sessionId),
|
||||
};
|
||||
|
||||
ctx = new PluginContextImpl(deps);
|
||||
|
||||
@@ -152,4 +152,76 @@ describe('SessionGate', () => {
|
||||
expect(gate.isGated('s1')).toBe(false);
|
||||
expect(gate.getSession('s2')!.tags).toEqual([]); // s2 untouched
|
||||
});
|
||||
|
||||
describe('per-McpToken ungate cache', () => {
|
||||
it('new session from an already-ungated token starts ungated, with prior tags + prompts', () => {
|
||||
const gate = new SessionGate();
|
||||
gate.createSession('session-1', true, 'tokA');
|
||||
expect(gate.isGated('session-1')).toBe(true);
|
||||
|
||||
gate.ungate('session-1', ['ops'], makeMatchResult(['runbook']), 'tokA');
|
||||
expect(gate.isTokenUngated('tokA')).toBe(true);
|
||||
|
||||
// LiteLLM semantics: same token, brand-new session-id.
|
||||
gate.createSession('session-2', true, 'tokA');
|
||||
expect(gate.isGated('session-2')).toBe(false);
|
||||
const s2 = gate.getSession('session-2')!;
|
||||
expect(s2.tags).toContain('ops');
|
||||
expect(s2.retrievedPrompts.has('runbook')).toBe(true);
|
||||
});
|
||||
|
||||
it('does not persist across tokens', () => {
|
||||
const gate = new SessionGate();
|
||||
gate.createSession('s1', true, 'tokA');
|
||||
gate.ungate('s1', ['ops'], makeMatchResult(['p']), 'tokA');
|
||||
|
||||
// Different token → fresh gated session.
|
||||
gate.createSession('s2', true, 'tokB');
|
||||
expect(gate.isGated('s2')).toBe(true);
|
||||
expect(gate.isTokenUngated('tokB')).toBe(false);
|
||||
});
|
||||
|
||||
it('is not triggered when no tokenSha is supplied (STDIO path)', () => {
|
||||
const gate = new SessionGate();
|
||||
gate.createSession('s1', true);
|
||||
gate.ungate('s1', ['ops'], makeMatchResult(['p']));
|
||||
|
||||
// A second session with no token starts gated — STDIO semantics preserved.
|
||||
gate.createSession('s2', true);
|
||||
expect(gate.isGated('s2')).toBe(true);
|
||||
});
|
||||
|
||||
it('honors the TTL window and expires', () => {
|
||||
const gate = new SessionGate(50); // 50ms TTL for the test
|
||||
gate.createSession('s1', true, 'tokA');
|
||||
gate.ungate('s1', ['ops'], makeMatchResult(['p']), 'tokA');
|
||||
expect(gate.isTokenUngated('tokA')).toBe(true);
|
||||
|
||||
return new Promise<void>((resolve) => setTimeout(() => {
|
||||
expect(gate.isTokenUngated('tokA')).toBe(false);
|
||||
gate.createSession('s2', true, 'tokA');
|
||||
expect(gate.isGated('s2')).toBe(true);
|
||||
resolve();
|
||||
}, 70));
|
||||
});
|
||||
|
||||
it('revokeToken clears the ungate entry immediately', () => {
|
||||
const gate = new SessionGate();
|
||||
gate.createSession('s1', true, 'tokA');
|
||||
gate.ungate('s1', ['ops'], makeMatchResult(['p']), 'tokA');
|
||||
expect(gate.isTokenUngated('tokA')).toBe(true);
|
||||
|
||||
gate.revokeToken('tokA');
|
||||
expect(gate.isTokenUngated('tokA')).toBe(false);
|
||||
gate.createSession('s2', true, 'tokA');
|
||||
expect(gate.isGated('s2')).toBe(true);
|
||||
});
|
||||
|
||||
it('empty-string tokenSha does not register an ungate entry', () => {
|
||||
const gate = new SessionGate();
|
||||
gate.createSession('s1', true, '');
|
||||
gate.ungate('s1', ['ops'], makeMatchResult(['p']), '');
|
||||
expect(gate.isTokenUngated('')).toBe(false);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user