= {
toolUseID: string
data: P
}
export function filterToolProgressMessages(
progressMessagesForMessage: ProgressMessage[],
): ProgressMessage[] {
return progressMessagesForMessage.filter(
(msg): msg is ProgressMessage =>
msg.data?.type !== 'hook_progress',
)
}
export type ToolResult = {
data: T
newMessages?: (
| UserMessage
| AssistantMessage
| AttachmentMessage
| SystemMessage
)[]
// contextModifier is only honored for tools that aren't concurrency safe.
contextModifier?: (context: ToolUseContext) => ToolUseContext
/** MCP protocol metadata (structuredContent, _meta) to pass through to SDK consumers */
mcpMeta?: {
_meta?: Record
structuredContent?: Record
}
}
export type ToolCallProgress
= (
progress: ToolProgress
,
) => void
// Type for any schema that outputs an object with string keys
export type AnyObject = z.ZodType<{ [key: string]: unknown }>
/**
* Checks if a tool matches the given name (primary name or alias).
*/
export function toolMatchesName(
tool: { name: string; aliases?: string[] },
name: string,
): boolean {
return tool.name === name || (tool.aliases?.includes(name) ?? false)
}
/**
* Finds a tool by name or alias from a list of tools.
*/
export function findToolByName(tools: Tools, name: string): Tool | undefined {
return tools.find(t => toolMatchesName(t, name))
}
export type Tool<
Input extends AnyObject = AnyObject,
Output = unknown,
P extends ToolProgressData = ToolProgressData,
> = {
/**
* Optional aliases for backwards compatibility when a tool is renamed.
* The tool can be looked up by any of these names in addition to its primary name.
*/
aliases?: string[]
/**
* One-line capability phrase used by ToolSearch for keyword matching.
* Helps the model find this tool via keyword search when it's deferred.
* 3–10 words, no trailing period.
* Prefer terms not already in the tool name (e.g. 'jupyter' for NotebookEdit).
*/
searchHint?: string
call(
args: z.infer,
context: ToolUseContext,
canUseTool: CanUseToolFn,
parentMessage: AssistantMessage,
onProgress?: ToolCallProgress
,
): Promise>
description(
input: z.infer,
options: {
isNonInteractiveSession: boolean
toolPermissionContext: ToolPermissionContext
tools: Tools
},
): Promise
readonly inputSchema: Input
// Type for MCP tools that can specify their input schema directly in JSON Schema format
// rather than converting from Zod schema
readonly inputJSONSchema?: ToolInputJSONSchema
// Optional because TungstenTool doesn't define this. TODO: Make it required.
// When we do that, we can also go through and make this a bit more type-safe.
outputSchema?: z.ZodType
inputsEquivalent?(a: z.infer, b: z.infer): boolean
isConcurrencySafe(input: z.infer): boolean
isEnabled(): boolean
isReadOnly(input: z.infer): boolean
/** Defaults to false. Only set when the tool performs irreversible operations (delete, overwrite, send). */
isDestructive?(input: z.infer): boolean
/**
* What should happen when the user submits a new message while this tool
* is running.
*
* - `'cancel'` — stop the tool and discard its result
* - `'block'` — keep running; the new message waits
*
* Defaults to `'block'` when not implemented.
*/
interruptBehavior?(): 'cancel' | 'block'
/**
* Returns information about whether this tool use is a search or read operation
* that should be collapsed into a condensed display in the UI. Examples include
* file searching (Grep, Glob), file reading (Read), and bash commands like find,
* grep, wc, etc.
*
* Returns an object indicating whether the operation is a search or read operation:
* - `isSearch: true` for search operations (grep, find, glob patterns)
* - `isRead: true` for read operations (cat, head, tail, file read)
* - `isList: true` for directory-listing operations (ls, tree, du)
* - All can be false if the operation shouldn't be collapsed
*/
isSearchOrReadCommand?(input: z.infer): {
isSearch: boolean
isRead: boolean
isList?: boolean
}
isOpenWorld?(input: z.infer): boolean
requiresUserInteraction?(): boolean
isMcp?: boolean
isLsp?: boolean
/**
* When true, this tool is deferred (sent with defer_loading: true) and requires
* ToolSearch to be used before it can be called.
*/
readonly shouldDefer?: boolean
/**
* When true, this tool is never deferred — its full schema appears in the
* initial prompt even when ToolSearch is enabled. For MCP tools, set via
* `_meta['anthropic/alwaysLoad']`. Use for tools the model must see on
* turn 1 without a ToolSearch round-trip.
*/
readonly alwaysLoad?: boolean
/**
* For MCP tools: the server and tool names as received from the MCP server (unnormalized).
* Present on all MCP tools regardless of whether `name` is prefixed (mcp__server__tool)
* or unprefixed (CLAUDE_AGENT_SDK_MCP_NO_PREFIX mode).
*/
mcpInfo?: { serverName: string; toolName: string }
readonly name: string
/**
* Maximum size in characters for tool result before it gets persisted to disk.
* When exceeded, the result is saved to a file and Claude receives a preview
* with the file path instead of the full content.
*
* Set to Infinity for tools whose output must never be persisted (e.g. Read,
* where persisting creates a circular Read→file→Read loop and the tool
* already self-bounds via its own limits).
*/
maxResultSizeChars: number
/**
* When true, enables strict mode for this tool, which causes the API to
* more strictly adhere to tool instructions and parameter schemas.
* Only applied when the tengu_tool_pear is enabled.
*/
readonly strict?: boolean
/**
* Called on copies of tool_use input before observers see it (SDK stream,
* transcript, canUseTool, PreToolUse/PostToolUse hooks). Mutate in place
* to add legacy/derived fields. Must be idempotent. The original API-bound
* input is never mutated (preserves prompt cache). Not re-applied when a
* hook/permission returns a fresh updatedInput — those own their shape.
*/
backfillObservableInput?(input: Record): void
/**
* Determines if this tool is allowed to run with this input in the current context.
* It informs the model of why the tool use failed, and does not directly display any UI.
* @param input
* @param context
*/
validateInput?(
input: z.infer,
context: ToolUseContext,
): Promise
/**
* Determines if the user is asked for permission. Only called after validateInput() passes.
* General permission logic is in permissions.ts. This method contains tool-specific logic.
* @param input
* @param context
*/
checkPermissions(
input: z.infer,
context: ToolUseContext,
): Promise
// Optional method for tools that operate on a file path
getPath?(input: z.infer): string
/**
* Prepare a matcher for hook `if` conditions (permission-rule patterns like
* "git *" from "Bash(git *)"). Called once per hook-input pair; any
* expensive parsing happens here. Returns a closure that is called per
* hook pattern. If not implemented, only tool-name-level matching works.
*/
preparePermissionMatcher?(
input: z.infer,
): Promise<(pattern: string) => boolean>
prompt(options: {
getToolPermissionContext: () => Promise
tools: Tools
agents: AgentDefinition[]
allowedAgentTypes?: string[]
}): Promise
userFacingName(input: Partial> | undefined): string
userFacingNameBackgroundColor?(
input: Partial> | undefined,
): keyof Theme | undefined
/**
* Transparent wrappers (e.g. REPL) delegate all rendering to their progress
* handler, which emits native-looking blocks for each inner tool call.
* The wrapper itself shows nothing.
*/
isTransparentWrapper?(): boolean
/**
* Returns a short string summary of this tool use for display in compact views.
* @param input The tool input
* @returns A short string summary, or null to not display
*/
getToolUseSummary?(input: Partial> | undefined): string | null
/**
* Returns a human-readable present-tense activity description for spinner display.
* Example: "Reading src/foo.ts", "Running bun test", "Searching for pattern"
* @param input The tool input
* @returns Activity description string, or null to fall back to tool name
*/
getActivityDescription?(
input: Partial> | undefined,
): string | null
/**
* Returns a compact representation of this tool use for the auto-mode
* security classifier. Examples: `ls -la` for Bash, `/tmp/x: new content`
* for Edit. Return '' to skip this tool in the classifier transcript
* (e.g. tools with no security relevance). May return an object to avoid
* double-encoding when the caller JSON-wraps the value.
*/
toAutoClassifierInput(input: z.infer): unknown
mapToolResultToToolResultBlockParam(
content: Output,
toolUseID: string,
): ToolResultBlockParam
/**
* Optional. When omitted, the tool result renders nothing (same as returning
* null). Omit for tools whose results are surfaced elsewhere (e.g., TodoWrite
* updates the todo panel, not the transcript).
*/
renderToolResultMessage?(
content: Output,
progressMessagesForMessage: ProgressMessage
[],
options: {
style?: 'condensed'
theme: ThemeName
tools: Tools
verbose: boolean
isTranscriptMode?: boolean
isBriefOnly?: boolean
/** Original tool_use input, when available. Useful for compact result
* summaries that reference what was requested (e.g. "Sent to #foo"). */
input?: unknown
},
): React.ReactNode
/**
* Flattened text of what renderToolResultMessage shows IN TRANSCRIPT
* MODE (verbose=true, isTranscriptMode=true). For transcript search
* indexing: the index counts occurrences in this string, the highlight
* overlay scans the actual screen buffer. For count ≡ highlight, this
* must return the text that ends up visible — not the model-facing
* serialization from mapToolResultToToolResultBlockParam (which adds
* system-reminders, persisted-output wrappers).
*
* Chrome can be skipped (under-count is fine). "Found 3 files in 12ms"
* isn't worth indexing. Phantoms are not fine — text that's claimed
* here but doesn't render is a count≠highlight bug.
*
* Optional: omitted → field-name heuristic in transcriptSearch.ts.
* Drift caught by test/utils/transcriptSearch.renderFidelity.test.tsx
* which renders sample outputs and flags text that's indexed-but-not-
* rendered (phantom) or rendered-but-not-indexed (under-count warning).
*/
extractSearchText?(out: Output): string
/**
* Render the tool use message. Note that `input` is partial because we render
* the message as soon as possible, possibly before tool parameters have fully
* streamed in.
*/
renderToolUseMessage(
input: Partial>,
options: { theme: ThemeName; verbose: boolean; commands?: Command[] },
): React.ReactNode
/**
* Returns true when the non-verbose rendering of this output is truncated
* (i.e., clicking to expand would reveal more content). Gates
* click-to-expand in fullscreen — only messages where verbose actually
* shows more get a hover/click affordance. Unset means never truncated.
*/
isResultTruncated?(output: Output): boolean
/**
* Renders an optional tag to display after the tool use message.
* Used for additional metadata like timeout, model, resume ID, etc.
* Returns null to not display anything.
*/
renderToolUseTag?(input: Partial>): React.ReactNode
/**
* Optional. When omitted, no progress UI is shown while the tool runs.
*/
renderToolUseProgressMessage?(
progressMessagesForMessage: ProgressMessage
[],
options: {
tools: Tools
verbose: boolean
terminalSize?: { columns: number; rows: number }
inProgressToolCallCount?: number
isTranscriptMode?: boolean
},
): React.ReactNode
renderToolUseQueuedMessage?(): React.ReactNode
/**
* Optional. When omitted, falls back to .
* Only define this for tools that need custom rejection UI (e.g., file edits
* that show the rejected diff).
*/
renderToolUseRejectedMessage?(
input: z.infer,
options: {
columns: number
messages: Message[]
style?: 'condensed'
theme: ThemeName
tools: Tools
verbose: boolean
progressMessagesForMessage: ProgressMessage
[]
isTranscriptMode?: boolean
},
): React.ReactNode
/**
* Optional. When omitted, falls back to .
* Only define this for tools that need custom error UI (e.g., search tools
* that show "File not found" instead of the raw error).
*/
renderToolUseErrorMessage?(
result: ToolResultBlockParam['content'],
options: {
progressMessagesForMessage: ProgressMessage
[]
tools: Tools
verbose: boolean
isTranscriptMode?: boolean
},
): React.ReactNode
/**
* Renders multiple parallel instances of this tool as a group.
* @returns React node to render, or null to fall back to individual rendering
*/
/**
* Renders multiple tool uses as a group (non-verbose mode only).
* In verbose mode, individual tool uses render at their original positions.
* @returns React node to render, or null to fall back to individual rendering
*/
renderGroupedToolUse?(
toolUses: Array<{
param: ToolUseBlockParam
isResolved: boolean
isError: boolean
isInProgress: boolean
progressMessages: ProgressMessage
[]
result?: {
param: ToolResultBlockParam
output: unknown
}
}>,
options: {
shouldAnimate: boolean
tools: Tools
},
): React.ReactNode | null
}
/**
* A collection of tools. Use this type instead of `Tool[]` to make it easier
* to track where tool sets are assembled, passed, and filtered across the codebase.
*/
export type Tools = readonly Tool[]
/**
* Methods that `buildTool` supplies a default for. A `ToolDef` may omit these;
* the resulting `Tool` always has them.
*/
type DefaultableToolKeys =
| 'isEnabled'
| 'isConcurrencySafe'
| 'isReadOnly'
| 'isDestructive'
| 'checkPermissions'
| 'toAutoClassifierInput'
| 'userFacingName'
/**
* Tool definition accepted by `buildTool`. Same shape as `Tool` but with the
* defaultable methods optional — `buildTool` fills them in so callers always
* see a complete `Tool`.
*/
export type ToolDef<
Input extends AnyObject = AnyObject,
Output = unknown,
P extends ToolProgressData = ToolProgressData,
> = Omit, DefaultableToolKeys> &
Partial, DefaultableToolKeys>>
/**
* Type-level spread mirroring `{ ...TOOL_DEFAULTS, ...def }`. For each
* defaultable key: if D provides it (required), D's type wins; if D omits
* it or has it optional (inherited from Partial<> in the constraint), the
* default fills in. All other keys come from D verbatim — preserving arity,
* optional presence, and literal types exactly as `satisfies Tool` did.
*/
type BuiltTool = Omit & {
[K in DefaultableToolKeys]-?: K extends keyof D
? undefined extends D[K]
? ToolDefaults[K]
: D[K]
: ToolDefaults[K]
}
/**
* Build a complete `Tool` from a partial definition, filling in safe defaults
* for the commonly-stubbed methods. All tool exports should go through this so
* that defaults live in one place and callers never need `?.() ?? default`.
*
* Defaults (fail-closed where it matters):
* - `isEnabled` → `true`
* - `isConcurrencySafe` → `false` (assume not safe)
* - `isReadOnly` → `false` (assume writes)
* - `isDestructive` → `false`
* - `checkPermissions` → `{ behavior: 'allow', updatedInput }` (defer to general permission system)
* - `toAutoClassifierInput` → `''` (skip classifier — security-relevant tools must override)
* - `userFacingName` → `name`
*/
const TOOL_DEFAULTS = {
isEnabled: () => true,
isConcurrencySafe: (_input?: unknown) => false,
isReadOnly: (_input?: unknown) => false,
isDestructive: (_input?: unknown) => false,
checkPermissions: (
input: { [key: string]: unknown },
_ctx?: ToolUseContext,
): Promise =>
Promise.resolve({ behavior: 'allow', updatedInput: input }),
toAutoClassifierInput: (_input?: unknown) => '',
userFacingName: (_input?: unknown) => '',
}
// The defaults type is the ACTUAL shape of TOOL_DEFAULTS (optional params so
// both 0-arg and full-arg call sites type-check — stubs varied in arity and
// tests relied on that), not the interface's strict signatures.
type ToolDefaults = typeof TOOL_DEFAULTS
// D infers the concrete object-literal type from the call site. The
// constraint provides contextual typing for method parameters; `any` in
// constraint position is structural and never leaks into the return type.
// BuiltTool mirrors runtime `{...TOOL_DEFAULTS, ...def}` at the type level.
// eslint-disable-next-line @typescript-eslint/no-explicit-any
type AnyToolDef = ToolDef
export function buildTool(def: D): BuiltTool {
// The runtime spread is straightforward; the `as` bridges the gap between
// the structural-any constraint and the precise BuiltTool return. The
// type semantics are proven by the 0-error typecheck across all 60+ tools.
return {
...TOOL_DEFAULTS,
userFacingName: () => def.name,
...def,
} as BuiltTool
}
================================================
FILE: restored-src/src/assistant/sessionHistory.ts
================================================
import axios from 'axios'
import { getOauthConfig } from '../constants/oauth.js'
import type { SDKMessage } from '../entrypoints/agentSdkTypes.js'
import { logForDebugging } from '../utils/debug.js'
import { getOAuthHeaders, prepareApiRequest } from '../utils/teleport/api.js'
export const HISTORY_PAGE_SIZE = 100
export type HistoryPage = {
/** Chronological order within the page. */
events: SDKMessage[]
/** Oldest event ID in this page → before_id cursor for next-older page. */
firstId: string | null
/** true = older events exist. */
hasMore: boolean
}
type SessionEventsResponse = {
data: SDKMessage[]
has_more: boolean
first_id: string | null
last_id: string | null
}
export type HistoryAuthCtx = {
baseUrl: string
headers: Record
}
/** Prepare auth + headers + base URL once, reuse across pages. */
export async function createHistoryAuthCtx(
sessionId: string,
): Promise {
const { accessToken, orgUUID } = await prepareApiRequest()
return {
baseUrl: `${getOauthConfig().BASE_API_URL}/v1/sessions/${sessionId}/events`,
headers: {
...getOAuthHeaders(accessToken),
'anthropic-beta': 'ccr-byoc-2025-07-29',
'x-organization-uuid': orgUUID,
},
}
}
async function fetchPage(
ctx: HistoryAuthCtx,
params: Record,
label: string,
): Promise {
const resp = await axios
.get(ctx.baseUrl, {
headers: ctx.headers,
params,
timeout: 15000,
validateStatus: () => true,
})
.catch(() => null)
if (!resp || resp.status !== 200) {
logForDebugging(`[${label}] HTTP ${resp?.status ?? 'error'}`)
return null
}
return {
events: Array.isArray(resp.data.data) ? resp.data.data : [],
firstId: resp.data.first_id,
hasMore: resp.data.has_more,
}
}
/**
* Newest page: last `limit` events, chronological, via anchor_to_latest.
* has_more=true means older events exist.
*/
export async function fetchLatestEvents(
ctx: HistoryAuthCtx,
limit = HISTORY_PAGE_SIZE,
): Promise {
return fetchPage(ctx, { limit, anchor_to_latest: true }, 'fetchLatestEvents')
}
/** Older page: events immediately before `beforeId` cursor. */
export async function fetchOlderEvents(
ctx: HistoryAuthCtx,
beforeId: string,
limit = HISTORY_PAGE_SIZE,
): Promise {
return fetchPage(ctx, { limit, before_id: beforeId }, 'fetchOlderEvents')
}
================================================
FILE: restored-src/src/bootstrap/state.ts
================================================
import type { BetaMessageStreamParams } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
import type { Attributes, Meter, MetricOptions } from '@opentelemetry/api'
import type { logs } from '@opentelemetry/api-logs'
import type { LoggerProvider } from '@opentelemetry/sdk-logs'
import type { MeterProvider } from '@opentelemetry/sdk-metrics'
import type { BasicTracerProvider } from '@opentelemetry/sdk-trace-base'
import { realpathSync } from 'fs'
import sumBy from 'lodash-es/sumBy.js'
import { cwd } from 'process'
import type { HookEvent, ModelUsage } from 'src/entrypoints/agentSdkTypes.js'
import type { AgentColorName } from 'src/tools/AgentTool/agentColorManager.js'
import type { HookCallbackMatcher } from 'src/types/hooks.js'
// Indirection for browser-sdk build (package.json "browser" field swaps
// crypto.ts for crypto.browser.ts). Pure leaf re-export of node:crypto —
// zero circular-dep risk. Path-alias import bypasses bootstrap-isolation
// (rule only checks ./ and / prefixes); explicit disable documents intent.
// eslint-disable-next-line custom-rules/bootstrap-isolation
import { randomUUID } from 'src/utils/crypto.js'
import type { ModelSetting } from 'src/utils/model/model.js'
import type { ModelStrings } from 'src/utils/model/modelStrings.js'
import type { SettingSource } from 'src/utils/settings/constants.js'
import { resetSettingsCache } from 'src/utils/settings/settingsCache.js'
import type { PluginHookMatcher } from 'src/utils/settings/types.js'
import { createSignal } from 'src/utils/signal.js'
// Union type for registered hooks - can be SDK callbacks or native plugin hooks
type RegisteredHookMatcher = HookCallbackMatcher | PluginHookMatcher
import type { SessionId } from 'src/types/ids.js'
// DO NOT ADD MORE STATE HERE - BE JUDICIOUS WITH GLOBAL STATE
// dev: true on entries that came via --dangerously-load-development-channels.
// The allowlist gate checks this per-entry (not the session-wide
// hasDevChannels bit) so passing both flags doesn't let the dev dialog's
// acceptance leak allowlist-bypass to the --channels entries.
export type ChannelEntry =
| { kind: 'plugin'; name: string; marketplace: string; dev?: boolean }
| { kind: 'server'; name: string; dev?: boolean }
export type AttributedCounter = {
add(value: number, additionalAttributes?: Attributes): void
}
type State = {
originalCwd: string
// Stable project root - set once at startup (including by --worktree flag),
// never updated by mid-session EnterWorktreeTool.
// Use for project identity (history, skills, sessions) not file operations.
projectRoot: string
totalCostUSD: number
totalAPIDuration: number
totalAPIDurationWithoutRetries: number
totalToolDuration: number
turnHookDurationMs: number
turnToolDurationMs: number
turnClassifierDurationMs: number
turnToolCount: number
turnHookCount: number
turnClassifierCount: number
startTime: number
lastInteractionTime: number
totalLinesAdded: number
totalLinesRemoved: number
hasUnknownModelCost: boolean
cwd: string
modelUsage: { [modelName: string]: ModelUsage }
mainLoopModelOverride: ModelSetting | undefined
initialMainLoopModel: ModelSetting
modelStrings: ModelStrings | null
isInteractive: boolean
kairosActive: boolean
// When true, ensureToolResultPairing throws on mismatch instead of
// repairing with synthetic placeholders. HFI opts in at startup so
// trajectories fail fast rather than conditioning the model on fake
// tool_results.
strictToolResultPairing: boolean
sdkAgentProgressSummariesEnabled: boolean
userMsgOptIn: boolean
clientType: string
sessionSource: string | undefined
questionPreviewFormat: 'markdown' | 'html' | undefined
flagSettingsPath: string | undefined
flagSettingsInline: Record | null
allowedSettingSources: SettingSource[]
sessionIngressToken: string | null | undefined
oauthTokenFromFd: string | null | undefined
apiKeyFromFd: string | null | undefined
// Telemetry state
meter: Meter | null
sessionCounter: AttributedCounter | null
locCounter: AttributedCounter | null
prCounter: AttributedCounter | null
commitCounter: AttributedCounter | null
costCounter: AttributedCounter | null
tokenCounter: AttributedCounter | null
codeEditToolDecisionCounter: AttributedCounter | null
activeTimeCounter: AttributedCounter | null
statsStore: { observe(name: string, value: number): void } | null
sessionId: SessionId
// Parent session ID for tracking session lineage (e.g., plan mode -> implementation)
parentSessionId: SessionId | undefined
// Logger state
loggerProvider: LoggerProvider | null
eventLogger: ReturnType | null
// Meter provider state
meterProvider: MeterProvider | null
// Tracer provider state
tracerProvider: BasicTracerProvider | null
// Agent color state
agentColorMap: Map
agentColorIndex: number
// Last API request for bug reports
lastAPIRequest: Omit | null
// Messages from the last API request (ant-only; reference, not clone).
// Captures the exact post-compaction, CLAUDE.md-injected message set sent
// to the API so /share's serialized_conversation.json reflects reality.
lastAPIRequestMessages: BetaMessageStreamParams['messages'] | null
// Last auto-mode classifier request(s) for /share transcript
lastClassifierRequests: unknown[] | null
// CLAUDE.md content cached by context.ts for the auto-mode classifier.
// Breaks the yoloClassifier → claudemd → filesystem → permissions cycle.
cachedClaudeMdContent: string | null
// In-memory error log for recent errors
inMemoryErrorLog: Array<{ error: string; timestamp: string }>
// Session-only plugins from --plugin-dir flag
inlinePlugins: Array
// Explicit --chrome / --no-chrome flag value (undefined = not set on CLI)
chromeFlagOverride: boolean | undefined
// Use cowork_plugins directory instead of plugins (--cowork flag or env var)
useCoworkPlugins: boolean
// Session-only bypass permissions mode flag (not persisted)
sessionBypassPermissionsMode: boolean
// Session-only flag gating the .claude/scheduled_tasks.json watcher
// (useScheduledTasks). Set by cronScheduler.start() when the JSON has
// entries, or by CronCreateTool. Not persisted.
scheduledTasksEnabled: boolean
// Session-only cron tasks created via CronCreate with durable: false.
// Fire on schedule like file-backed tasks but are never written to
// .claude/scheduled_tasks.json — they die with the process. Typed via
// SessionCronTask below (not importing from cronTasks.ts keeps
// bootstrap a leaf of the import DAG).
sessionCronTasks: SessionCronTask[]
// Teams created this session via TeamCreate. cleanupSessionTeams()
// removes these on gracefulShutdown so subagent-created teams don't
// persist on disk forever (gh-32730). TeamDelete removes entries to
// avoid double-cleanup. Lives here (not teamHelpers.ts) so
// resetStateForTests() clears it between tests.
sessionCreatedTeams: Set
// Session-only trust flag for home directory (not persisted to disk)
// When running from home dir, trust dialog is shown but not saved to disk.
// This flag allows features requiring trust to work during the session.
sessionTrustAccepted: boolean
// Session-only flag to disable session persistence to disk
sessionPersistenceDisabled: boolean
// Track if user has exited plan mode in this session (for re-entry guidance)
hasExitedPlanMode: boolean
// Track if we need to show the plan mode exit attachment (one-time notification)
needsPlanModeExitAttachment: boolean
// Track if we need to show the auto mode exit attachment (one-time notification)
needsAutoModeExitAttachment: boolean
// Track if LSP plugin recommendation has been shown this session (only show once)
lspRecommendationShownThisSession: boolean
// SDK init event state - jsonSchema for structured output
initJsonSchema: Record | null
// Registered hooks - SDK callbacks and plugin native hooks
registeredHooks: Partial> | null
// Cache for plan slugs: sessionId -> wordSlug
planSlugCache: Map
// Track teleported session for reliability logging
teleportedSessionInfo: {
isTeleported: boolean
hasLoggedFirstMessage: boolean
sessionId: string | null
} | null
// Track invoked skills for preservation across compaction
// Keys are composite: `${agentId ?? ''}:${skillName}` to prevent cross-agent overwrites
invokedSkills: Map<
string,
{
skillName: string
skillPath: string
content: string
invokedAt: number
agentId: string | null
}
>
// Track slow operations for dev bar display (ant-only)
slowOperations: Array<{
operation: string
durationMs: number
timestamp: number
}>
// SDK-provided betas (e.g., context-1m-2025-08-07)
sdkBetas: string[] | undefined
// Main thread agent type (from --agent flag or settings)
mainThreadAgentType: string | undefined
// Remote mode (--remote flag)
isRemoteMode: boolean
// Direct connect server URL (for display in header)
directConnectServerUrl: string | undefined
// System prompt section cache state
systemPromptSectionCache: Map
// Last date emitted to the model (for detecting midnight date changes)
lastEmittedDate: string | null
// Additional directories from --add-dir flag (for CLAUDE.md loading)
additionalDirectoriesForClaudeMd: string[]
// Channel server allowlist from --channels flag (servers whose channel
// notifications should register this session). Parsed once in main.tsx —
// the tag decides trust model: 'plugin' → marketplace verification +
// allowlist, 'server' → allowlist always fails (schema is plugin-only).
// Either kind needs entry.dev to bypass allowlist.
allowedChannels: ChannelEntry[]
// True if any entry in allowedChannels came from
// --dangerously-load-development-channels (so ChannelsNotice can name the
// right flag in policy-blocked messages)
hasDevChannels: boolean
// Dir containing the session's `.jsonl`; null = derive from originalCwd.
sessionProjectDir: string | null
// Cached prompt cache 1h TTL allowlist from GrowthBook (session-stable)
promptCache1hAllowlist: string[] | null
// Cached 1h TTL user eligibility (session-stable). Latched on first
// evaluation so mid-session overage flips don't change the cache_control
// TTL, which would bust the server-side prompt cache.
promptCache1hEligible: boolean | null
// Sticky-on latch for AFK_MODE_BETA_HEADER. Once auto mode is first
// activated, keep sending the header for the rest of the session so
// Shift+Tab toggles don't bust the ~50-70K token prompt cache.
afkModeHeaderLatched: boolean | null
// Sticky-on latch for FAST_MODE_BETA_HEADER. Once fast mode is first
// enabled, keep sending the header so cooldown enter/exit doesn't
// double-bust the prompt cache. The `speed` body param stays dynamic.
fastModeHeaderLatched: boolean | null
// Sticky-on latch for the cache-editing beta header. Once cached
// microcompact is first enabled, keep sending the header so mid-session
// GrowthBook/settings toggles don't bust the prompt cache.
cacheEditingHeaderLatched: boolean | null
// Sticky-on latch for clearing thinking from prior tool loops. Triggered
// when >1h since last API call (confirmed cache miss — no cache-hit
// benefit to keeping thinking). Once latched, stays on so the newly-warmed
// thinking-cleared cache isn't busted by flipping back to keep:'all'.
thinkingClearLatched: boolean | null
// Current prompt ID (UUID) correlating a user prompt with subsequent OTel events
promptId: string | null
// Last API requestId for the main conversation chain (not subagents).
// Updated after each successful API response for main-session queries.
// Read at shutdown to send cache eviction hints to inference.
lastMainRequestId: string | undefined
// Timestamp (Date.now()) of the last successful API call completion.
// Used to compute timeSinceLastApiCallMs in tengu_api_success for
// correlating cache misses with idle time (cache TTL is ~5min).
lastApiCompletionTimestamp: number | null
// Set to true after compaction (auto or manual /compact). Consumed by
// logAPISuccess to tag the first post-compaction API call so we can
// distinguish compaction-induced cache misses from TTL expiry.
pendingPostCompaction: boolean
}
// ALSO HERE - THINK THRICE BEFORE MODIFYING
function getInitialState(): State {
// Resolve symlinks in cwd to match behavior of shell.ts setCwd
// This ensures consistency with how paths are sanitized for session storage
let resolvedCwd = ''
if (
typeof process !== 'undefined' &&
typeof process.cwd === 'function' &&
typeof realpathSync === 'function'
) {
const rawCwd = cwd()
try {
resolvedCwd = realpathSync(rawCwd).normalize('NFC')
} catch {
// File Provider EPERM on CloudStorage mounts (lstat per path component).
resolvedCwd = rawCwd.normalize('NFC')
}
}
const state: State = {
originalCwd: resolvedCwd,
projectRoot: resolvedCwd,
totalCostUSD: 0,
totalAPIDuration: 0,
totalAPIDurationWithoutRetries: 0,
totalToolDuration: 0,
turnHookDurationMs: 0,
turnToolDurationMs: 0,
turnClassifierDurationMs: 0,
turnToolCount: 0,
turnHookCount: 0,
turnClassifierCount: 0,
startTime: Date.now(),
lastInteractionTime: Date.now(),
totalLinesAdded: 0,
totalLinesRemoved: 0,
hasUnknownModelCost: false,
cwd: resolvedCwd,
modelUsage: {},
mainLoopModelOverride: undefined,
initialMainLoopModel: null,
modelStrings: null,
isInteractive: false,
kairosActive: false,
strictToolResultPairing: false,
sdkAgentProgressSummariesEnabled: false,
userMsgOptIn: false,
clientType: 'cli',
sessionSource: undefined,
questionPreviewFormat: undefined,
sessionIngressToken: undefined,
oauthTokenFromFd: undefined,
apiKeyFromFd: undefined,
flagSettingsPath: undefined,
flagSettingsInline: null,
allowedSettingSources: [
'userSettings',
'projectSettings',
'localSettings',
'flagSettings',
'policySettings',
],
// Telemetry state
meter: null,
sessionCounter: null,
locCounter: null,
prCounter: null,
commitCounter: null,
costCounter: null,
tokenCounter: null,
codeEditToolDecisionCounter: null,
activeTimeCounter: null,
statsStore: null,
sessionId: randomUUID() as SessionId,
parentSessionId: undefined,
// Logger state
loggerProvider: null,
eventLogger: null,
// Meter provider state
meterProvider: null,
tracerProvider: null,
// Agent color state
agentColorMap: new Map(),
agentColorIndex: 0,
// Last API request for bug reports
lastAPIRequest: null,
lastAPIRequestMessages: null,
// Last auto-mode classifier request(s) for /share transcript
lastClassifierRequests: null,
cachedClaudeMdContent: null,
// In-memory error log for recent errors
inMemoryErrorLog: [],
// Session-only plugins from --plugin-dir flag
inlinePlugins: [],
// Explicit --chrome / --no-chrome flag value (undefined = not set on CLI)
chromeFlagOverride: undefined,
// Use cowork_plugins directory instead of plugins
useCoworkPlugins: false,
// Session-only bypass permissions mode flag (not persisted)
sessionBypassPermissionsMode: false,
// Scheduled tasks disabled until flag or dialog enables them
scheduledTasksEnabled: false,
sessionCronTasks: [],
sessionCreatedTeams: new Set(),
// Session-only trust flag (not persisted to disk)
sessionTrustAccepted: false,
// Session-only flag to disable session persistence to disk
sessionPersistenceDisabled: false,
// Track if user has exited plan mode in this session
hasExitedPlanMode: false,
// Track if we need to show the plan mode exit attachment
needsPlanModeExitAttachment: false,
// Track if we need to show the auto mode exit attachment
needsAutoModeExitAttachment: false,
// Track if LSP plugin recommendation has been shown this session
lspRecommendationShownThisSession: false,
// SDK init event state
initJsonSchema: null,
registeredHooks: null,
// Cache for plan slugs
planSlugCache: new Map(),
// Track teleported session for reliability logging
teleportedSessionInfo: null,
// Track invoked skills for preservation across compaction
invokedSkills: new Map(),
// Track slow operations for dev bar display
slowOperations: [],
// SDK-provided betas
sdkBetas: undefined,
// Main thread agent type
mainThreadAgentType: undefined,
// Remote mode
isRemoteMode: false,
...(process.env.USER_TYPE === 'ant'
? {
replBridgeActive: false,
}
: {}),
// Direct connect server URL
directConnectServerUrl: undefined,
// System prompt section cache state
systemPromptSectionCache: new Map(),
// Last date emitted to the model
lastEmittedDate: null,
// Additional directories from --add-dir flag (for CLAUDE.md loading)
additionalDirectoriesForClaudeMd: [],
// Channel server allowlist from --channels flag
allowedChannels: [],
hasDevChannels: false,
// Session project dir (null = derive from originalCwd)
sessionProjectDir: null,
// Prompt cache 1h allowlist (null = not yet fetched from GrowthBook)
promptCache1hAllowlist: null,
// Prompt cache 1h eligibility (null = not yet evaluated)
promptCache1hEligible: null,
// Beta header latches (null = not yet triggered)
afkModeHeaderLatched: null,
fastModeHeaderLatched: null,
cacheEditingHeaderLatched: null,
thinkingClearLatched: null,
// Current prompt ID
promptId: null,
lastMainRequestId: undefined,
lastApiCompletionTimestamp: null,
pendingPostCompaction: false,
}
return state
}
// AND ESPECIALLY HERE
const STATE: State = getInitialState()
export function getSessionId(): SessionId {
return STATE.sessionId
}
export function regenerateSessionId(
options: { setCurrentAsParent?: boolean } = {},
): SessionId {
if (options.setCurrentAsParent) {
STATE.parentSessionId = STATE.sessionId
}
// Drop the outgoing session's plan-slug entry so the Map doesn't
// accumulate stale keys. Callers that need to carry the slug across
// (REPL.tsx clearContext) read it before calling clearConversation.
STATE.planSlugCache.delete(STATE.sessionId)
// Regenerated sessions live in the current project: reset projectDir to
// null so getTranscriptPath() derives from originalCwd.
STATE.sessionId = randomUUID() as SessionId
STATE.sessionProjectDir = null
return STATE.sessionId
}
export function getParentSessionId(): SessionId | undefined {
return STATE.parentSessionId
}
/**
* Atomically switch the active session. `sessionId` and `sessionProjectDir`
* always change together — there is no separate setter for either, so they
* cannot drift out of sync (CC-34).
*
* @param projectDir — directory containing `.jsonl`. Omit (or
* pass `null`) for sessions in the current project — the path will derive
* from originalCwd at read time. Pass `dirname(transcriptPath)` when the
* session lives in a different project directory (git worktrees,
* cross-project resume). Every call resets the project dir; it never
* carries over from the previous session.
*/
export function switchSession(
sessionId: SessionId,
projectDir: string | null = null,
): void {
// Drop the outgoing session's plan-slug entry so the Map stays bounded
// across repeated /resume. Only the current session's slug is ever read
// (plans.ts getPlanSlug defaults to getSessionId()).
STATE.planSlugCache.delete(STATE.sessionId)
STATE.sessionId = sessionId
STATE.sessionProjectDir = projectDir
sessionSwitched.emit(sessionId)
}
const sessionSwitched = createSignal<[id: SessionId]>()
/**
* Register a callback that fires when switchSession changes the active
* sessionId. bootstrap can't import listeners directly (DAG leaf), so
* callers register themselves. concurrentSessions.ts uses this to keep the
* PID file's sessionId in sync with --resume.
*/
export const onSessionSwitch = sessionSwitched.subscribe
/**
* Project directory the current session's transcript lives in, or `null` if
* the session was created in the current project (common case — derive from
* originalCwd). See `switchSession()`.
*/
export function getSessionProjectDir(): string | null {
return STATE.sessionProjectDir
}
export function getOriginalCwd(): string {
return STATE.originalCwd
}
/**
* Get the stable project root directory.
* Unlike getOriginalCwd(), this is never updated by mid-session EnterWorktreeTool
* (so skills/history stay stable when entering a throwaway worktree).
* It IS set at startup by --worktree, since that worktree is the session's project.
* Use for project identity (history, skills, sessions) not file operations.
*/
export function getProjectRoot(): string {
return STATE.projectRoot
}
export function setOriginalCwd(cwd: string): void {
STATE.originalCwd = cwd.normalize('NFC')
}
/**
* Only for --worktree startup flag. Mid-session EnterWorktreeTool must NOT
* call this — skills/history should stay anchored to where the session started.
*/
export function setProjectRoot(cwd: string): void {
STATE.projectRoot = cwd.normalize('NFC')
}
export function getCwdState(): string {
return STATE.cwd
}
export function setCwdState(cwd: string): void {
STATE.cwd = cwd.normalize('NFC')
}
export function getDirectConnectServerUrl(): string | undefined {
return STATE.directConnectServerUrl
}
export function setDirectConnectServerUrl(url: string): void {
STATE.directConnectServerUrl = url
}
export function addToTotalDurationState(
duration: number,
durationWithoutRetries: number,
): void {
STATE.totalAPIDuration += duration
STATE.totalAPIDurationWithoutRetries += durationWithoutRetries
}
export function resetTotalDurationStateAndCost_FOR_TESTS_ONLY(): void {
STATE.totalAPIDuration = 0
STATE.totalAPIDurationWithoutRetries = 0
STATE.totalCostUSD = 0
}
export function addToTotalCostState(
cost: number,
modelUsage: ModelUsage,
model: string,
): void {
STATE.modelUsage[model] = modelUsage
STATE.totalCostUSD += cost
}
export function getTotalCostUSD(): number {
return STATE.totalCostUSD
}
export function getTotalAPIDuration(): number {
return STATE.totalAPIDuration
}
export function getTotalDuration(): number {
return Date.now() - STATE.startTime
}
export function getTotalAPIDurationWithoutRetries(): number {
return STATE.totalAPIDurationWithoutRetries
}
export function getTotalToolDuration(): number {
return STATE.totalToolDuration
}
export function addToToolDuration(duration: number): void {
STATE.totalToolDuration += duration
STATE.turnToolDurationMs += duration
STATE.turnToolCount++
}
export function getTurnHookDurationMs(): number {
return STATE.turnHookDurationMs
}
export function addToTurnHookDuration(duration: number): void {
STATE.turnHookDurationMs += duration
STATE.turnHookCount++
}
export function resetTurnHookDuration(): void {
STATE.turnHookDurationMs = 0
STATE.turnHookCount = 0
}
export function getTurnHookCount(): number {
return STATE.turnHookCount
}
export function getTurnToolDurationMs(): number {
return STATE.turnToolDurationMs
}
export function resetTurnToolDuration(): void {
STATE.turnToolDurationMs = 0
STATE.turnToolCount = 0
}
export function getTurnToolCount(): number {
return STATE.turnToolCount
}
export function getTurnClassifierDurationMs(): number {
return STATE.turnClassifierDurationMs
}
export function addToTurnClassifierDuration(duration: number): void {
STATE.turnClassifierDurationMs += duration
STATE.turnClassifierCount++
}
export function resetTurnClassifierDuration(): void {
STATE.turnClassifierDurationMs = 0
STATE.turnClassifierCount = 0
}
export function getTurnClassifierCount(): number {
return STATE.turnClassifierCount
}
export function getStatsStore(): {
observe(name: string, value: number): void
} | null {
return STATE.statsStore
}
export function setStatsStore(
store: { observe(name: string, value: number): void } | null,
): void {
STATE.statsStore = store
}
/**
* Marks that an interaction occurred.
*
* By default the actual Date.now() call is deferred until the next Ink render
* frame (via flushInteractionTime()) so we avoid calling Date.now() on every
* single keypress.
*
* Pass `immediate = true` when calling from React useEffect callbacks or
* other code that runs *after* the Ink render cycle has already flushed.
* Without it the timestamp stays stale until the next render, which may never
* come if the user is idle (e.g. permission dialog waiting for input).
*/
let interactionTimeDirty = false
export function updateLastInteractionTime(immediate?: boolean): void {
if (immediate) {
flushInteractionTime_inner()
} else {
interactionTimeDirty = true
}
}
/**
* If an interaction was recorded since the last flush, update the timestamp
* now. Called by Ink before each render cycle so we batch many keypresses into
* a single Date.now() call.
*/
export function flushInteractionTime(): void {
if (interactionTimeDirty) {
flushInteractionTime_inner()
}
}
function flushInteractionTime_inner(): void {
STATE.lastInteractionTime = Date.now()
interactionTimeDirty = false
}
export function addToTotalLinesChanged(added: number, removed: number): void {
STATE.totalLinesAdded += added
STATE.totalLinesRemoved += removed
}
export function getTotalLinesAdded(): number {
return STATE.totalLinesAdded
}
export function getTotalLinesRemoved(): number {
return STATE.totalLinesRemoved
}
export function getTotalInputTokens(): number {
return sumBy(Object.values(STATE.modelUsage), 'inputTokens')
}
export function getTotalOutputTokens(): number {
return sumBy(Object.values(STATE.modelUsage), 'outputTokens')
}
export function getTotalCacheReadInputTokens(): number {
return sumBy(Object.values(STATE.modelUsage), 'cacheReadInputTokens')
}
export function getTotalCacheCreationInputTokens(): number {
return sumBy(Object.values(STATE.modelUsage), 'cacheCreationInputTokens')
}
export function getTotalWebSearchRequests(): number {
return sumBy(Object.values(STATE.modelUsage), 'webSearchRequests')
}
let outputTokensAtTurnStart = 0
let currentTurnTokenBudget: number | null = null
export function getTurnOutputTokens(): number {
return getTotalOutputTokens() - outputTokensAtTurnStart
}
export function getCurrentTurnTokenBudget(): number | null {
return currentTurnTokenBudget
}
let budgetContinuationCount = 0
export function snapshotOutputTokensForTurn(budget: number | null): void {
outputTokensAtTurnStart = getTotalOutputTokens()
currentTurnTokenBudget = budget
budgetContinuationCount = 0
}
export function getBudgetContinuationCount(): number {
return budgetContinuationCount
}
export function incrementBudgetContinuationCount(): void {
budgetContinuationCount++
}
export function setHasUnknownModelCost(): void {
STATE.hasUnknownModelCost = true
}
export function hasUnknownModelCost(): boolean {
return STATE.hasUnknownModelCost
}
export function getLastMainRequestId(): string | undefined {
return STATE.lastMainRequestId
}
export function setLastMainRequestId(requestId: string): void {
STATE.lastMainRequestId = requestId
}
export function getLastApiCompletionTimestamp(): number | null {
return STATE.lastApiCompletionTimestamp
}
export function setLastApiCompletionTimestamp(timestamp: number): void {
STATE.lastApiCompletionTimestamp = timestamp
}
/** Mark that a compaction just occurred. The next API success event will
* include isPostCompaction=true, then the flag auto-resets. */
export function markPostCompaction(): void {
STATE.pendingPostCompaction = true
}
/** Consume the post-compaction flag. Returns true once after compaction,
* then returns false until the next compaction. */
export function consumePostCompaction(): boolean {
const was = STATE.pendingPostCompaction
STATE.pendingPostCompaction = false
return was
}
export function getLastInteractionTime(): number {
return STATE.lastInteractionTime
}
// Scroll drain suspension — background intervals check this before doing work
// so they don't compete with scroll frames for the event loop. Set by
// ScrollBox scrollBy/scrollTo, cleared SCROLL_DRAIN_IDLE_MS after the last
// scroll event. Module-scope (not in STATE) — ephemeral hot-path flag, no
// test-reset needed since the debounce timer self-clears.
let scrollDraining = false
let scrollDrainTimer: ReturnType | undefined
const SCROLL_DRAIN_IDLE_MS = 150
/** Mark that a scroll event just happened. Background intervals gate on
* getIsScrollDraining() and skip their work until the debounce clears. */
export function markScrollActivity(): void {
scrollDraining = true
if (scrollDrainTimer) clearTimeout(scrollDrainTimer)
scrollDrainTimer = setTimeout(() => {
scrollDraining = false
scrollDrainTimer = undefined
}, SCROLL_DRAIN_IDLE_MS)
scrollDrainTimer.unref?.()
}
/** True while scroll is actively draining (within 150ms of last event).
* Intervals should early-return when this is set — the work picks up next
* tick after scroll settles. */
export function getIsScrollDraining(): boolean {
return scrollDraining
}
/** Await this before expensive one-shot work (network, subprocess) that could
* coincide with scroll. Resolves immediately if not scrolling; otherwise
* polls at the idle interval until the flag clears. */
export async function waitForScrollIdle(): Promise {
while (scrollDraining) {
// bootstrap-isolation forbids importing sleep() from src/utils/
// eslint-disable-next-line no-restricted-syntax
await new Promise(r => setTimeout(r, SCROLL_DRAIN_IDLE_MS).unref?.())
}
}
export function getModelUsage(): { [modelName: string]: ModelUsage } {
return STATE.modelUsage
}
export function getUsageForModel(model: string): ModelUsage | undefined {
return STATE.modelUsage[model]
}
/**
* Gets the model override set from the --model CLI flag or after the user
* updates their configured model.
*/
export function getMainLoopModelOverride(): ModelSetting | undefined {
return STATE.mainLoopModelOverride
}
export function getInitialMainLoopModel(): ModelSetting {
return STATE.initialMainLoopModel
}
export function setMainLoopModelOverride(
model: ModelSetting | undefined,
): void {
STATE.mainLoopModelOverride = model
}
export function setInitialMainLoopModel(model: ModelSetting): void {
STATE.initialMainLoopModel = model
}
export function getSdkBetas(): string[] | undefined {
return STATE.sdkBetas
}
export function setSdkBetas(betas: string[] | undefined): void {
STATE.sdkBetas = betas
}
export function resetCostState(): void {
STATE.totalCostUSD = 0
STATE.totalAPIDuration = 0
STATE.totalAPIDurationWithoutRetries = 0
STATE.totalToolDuration = 0
STATE.startTime = Date.now()
STATE.totalLinesAdded = 0
STATE.totalLinesRemoved = 0
STATE.hasUnknownModelCost = false
STATE.modelUsage = {}
STATE.promptId = null
}
/**
* Sets cost state values for session restore.
* Called by restoreCostStateForSession in cost-tracker.ts.
*/
export function setCostStateForRestore({
totalCostUSD,
totalAPIDuration,
totalAPIDurationWithoutRetries,
totalToolDuration,
totalLinesAdded,
totalLinesRemoved,
lastDuration,
modelUsage,
}: {
totalCostUSD: number
totalAPIDuration: number
totalAPIDurationWithoutRetries: number
totalToolDuration: number
totalLinesAdded: number
totalLinesRemoved: number
lastDuration: number | undefined
modelUsage: { [modelName: string]: ModelUsage } | undefined
}): void {
STATE.totalCostUSD = totalCostUSD
STATE.totalAPIDuration = totalAPIDuration
STATE.totalAPIDurationWithoutRetries = totalAPIDurationWithoutRetries
STATE.totalToolDuration = totalToolDuration
STATE.totalLinesAdded = totalLinesAdded
STATE.totalLinesRemoved = totalLinesRemoved
// Restore per-model usage breakdown
if (modelUsage) {
STATE.modelUsage = modelUsage
}
// Adjust startTime to make wall duration accumulate
if (lastDuration) {
STATE.startTime = Date.now() - lastDuration
}
}
// Only used in tests
export function resetStateForTests(): void {
if (process.env.NODE_ENV !== 'test') {
throw new Error('resetStateForTests can only be called in tests')
}
Object.entries(getInitialState()).forEach(([key, value]) => {
STATE[key as keyof State] = value as never
})
outputTokensAtTurnStart = 0
currentTurnTokenBudget = null
budgetContinuationCount = 0
sessionSwitched.clear()
}
// You shouldn't use this directly. See src/utils/model/modelStrings.ts::getModelStrings()
export function getModelStrings(): ModelStrings | null {
return STATE.modelStrings
}
// You shouldn't use this directly. See src/utils/model/modelStrings.ts
export function setModelStrings(modelStrings: ModelStrings): void {
STATE.modelStrings = modelStrings
}
// Test utility function to reset model strings for re-initialization.
// Separate from setModelStrings because we only want to accept 'null' in tests.
export function resetModelStringsForTestingOnly() {
STATE.modelStrings = null
}
export function setMeter(
meter: Meter,
createCounter: (name: string, options: MetricOptions) => AttributedCounter,
): void {
STATE.meter = meter
// Initialize all counters using the provided factory
STATE.sessionCounter = createCounter('claude_code.session.count', {
description: 'Count of CLI sessions started',
})
STATE.locCounter = createCounter('claude_code.lines_of_code.count', {
description:
"Count of lines of code modified, with the 'type' attribute indicating whether lines were added or removed",
})
STATE.prCounter = createCounter('claude_code.pull_request.count', {
description: 'Number of pull requests created',
})
STATE.commitCounter = createCounter('claude_code.commit.count', {
description: 'Number of git commits created',
})
STATE.costCounter = createCounter('claude_code.cost.usage', {
description: 'Cost of the Claude Code session',
unit: 'USD',
})
STATE.tokenCounter = createCounter('claude_code.token.usage', {
description: 'Number of tokens used',
unit: 'tokens',
})
STATE.codeEditToolDecisionCounter = createCounter(
'claude_code.code_edit_tool.decision',
{
description:
'Count of code editing tool permission decisions (accept/reject) for Edit, Write, and NotebookEdit tools',
},
)
STATE.activeTimeCounter = createCounter('claude_code.active_time.total', {
description: 'Total active time in seconds',
unit: 's',
})
}
export function getMeter(): Meter | null {
return STATE.meter
}
export function getSessionCounter(): AttributedCounter | null {
return STATE.sessionCounter
}
export function getLocCounter(): AttributedCounter | null {
return STATE.locCounter
}
export function getPrCounter(): AttributedCounter | null {
return STATE.prCounter
}
export function getCommitCounter(): AttributedCounter | null {
return STATE.commitCounter
}
export function getCostCounter(): AttributedCounter | null {
return STATE.costCounter
}
export function getTokenCounter(): AttributedCounter | null {
return STATE.tokenCounter
}
export function getCodeEditToolDecisionCounter(): AttributedCounter | null {
return STATE.codeEditToolDecisionCounter
}
export function getActiveTimeCounter(): AttributedCounter | null {
return STATE.activeTimeCounter
}
export function getLoggerProvider(): LoggerProvider | null {
return STATE.loggerProvider
}
export function setLoggerProvider(provider: LoggerProvider | null): void {
STATE.loggerProvider = provider
}
export function getEventLogger(): ReturnType | null {
return STATE.eventLogger
}
export function setEventLogger(
logger: ReturnType | null,
): void {
STATE.eventLogger = logger
}
export function getMeterProvider(): MeterProvider | null {
return STATE.meterProvider
}
export function setMeterProvider(provider: MeterProvider | null): void {
STATE.meterProvider = provider
}
export function getTracerProvider(): BasicTracerProvider | null {
return STATE.tracerProvider
}
export function setTracerProvider(provider: BasicTracerProvider | null): void {
STATE.tracerProvider = provider
}
export function getIsNonInteractiveSession(): boolean {
return !STATE.isInteractive
}
export function getIsInteractive(): boolean {
return STATE.isInteractive
}
export function setIsInteractive(value: boolean): void {
STATE.isInteractive = value
}
export function getClientType(): string {
return STATE.clientType
}
export function setClientType(type: string): void {
STATE.clientType = type
}
export function getSdkAgentProgressSummariesEnabled(): boolean {
return STATE.sdkAgentProgressSummariesEnabled
}
export function setSdkAgentProgressSummariesEnabled(value: boolean): void {
STATE.sdkAgentProgressSummariesEnabled = value
}
export function getKairosActive(): boolean {
return STATE.kairosActive
}
export function setKairosActive(value: boolean): void {
STATE.kairosActive = value
}
export function getStrictToolResultPairing(): boolean {
return STATE.strictToolResultPairing
}
export function setStrictToolResultPairing(value: boolean): void {
STATE.strictToolResultPairing = value
}
// Field name 'userMsgOptIn' avoids excluded-string substrings ('BriefTool',
// 'SendUserMessage' — case-insensitive). All callers are inside feature()
// guards so these accessors don't need their own (matches getKairosActive).
export function getUserMsgOptIn(): boolean {
return STATE.userMsgOptIn
}
export function setUserMsgOptIn(value: boolean): void {
STATE.userMsgOptIn = value
}
export function getSessionSource(): string | undefined {
return STATE.sessionSource
}
export function setSessionSource(source: string): void {
STATE.sessionSource = source
}
export function getQuestionPreviewFormat(): 'markdown' | 'html' | undefined {
return STATE.questionPreviewFormat
}
export function setQuestionPreviewFormat(format: 'markdown' | 'html'): void {
STATE.questionPreviewFormat = format
}
export function getAgentColorMap(): Map {
return STATE.agentColorMap
}
export function getFlagSettingsPath(): string | undefined {
return STATE.flagSettingsPath
}
export function setFlagSettingsPath(path: string | undefined): void {
STATE.flagSettingsPath = path
}
export function getFlagSettingsInline(): Record | null {
return STATE.flagSettingsInline
}
export function setFlagSettingsInline(
settings: Record | null,
): void {
STATE.flagSettingsInline = settings
}
export function getSessionIngressToken(): string | null | undefined {
return STATE.sessionIngressToken
}
export function setSessionIngressToken(token: string | null): void {
STATE.sessionIngressToken = token
}
export function getOauthTokenFromFd(): string | null | undefined {
return STATE.oauthTokenFromFd
}
export function setOauthTokenFromFd(token: string | null): void {
STATE.oauthTokenFromFd = token
}
export function getApiKeyFromFd(): string | null | undefined {
return STATE.apiKeyFromFd
}
export function setApiKeyFromFd(key: string | null): void {
STATE.apiKeyFromFd = key
}
export function setLastAPIRequest(
params: Omit | null,
): void {
STATE.lastAPIRequest = params
}
export function getLastAPIRequest(): Omit<
BetaMessageStreamParams,
'messages'
> | null {
return STATE.lastAPIRequest
}
export function setLastAPIRequestMessages(
messages: BetaMessageStreamParams['messages'] | null,
): void {
STATE.lastAPIRequestMessages = messages
}
export function getLastAPIRequestMessages():
| BetaMessageStreamParams['messages']
| null {
return STATE.lastAPIRequestMessages
}
export function setLastClassifierRequests(requests: unknown[] | null): void {
STATE.lastClassifierRequests = requests
}
export function getLastClassifierRequests(): unknown[] | null {
return STATE.lastClassifierRequests
}
export function setCachedClaudeMdContent(content: string | null): void {
STATE.cachedClaudeMdContent = content
}
export function getCachedClaudeMdContent(): string | null {
return STATE.cachedClaudeMdContent
}
export function addToInMemoryErrorLog(errorInfo: {
error: string
timestamp: string
}): void {
const MAX_IN_MEMORY_ERRORS = 100
if (STATE.inMemoryErrorLog.length >= MAX_IN_MEMORY_ERRORS) {
STATE.inMemoryErrorLog.shift() // Remove oldest error
}
STATE.inMemoryErrorLog.push(errorInfo)
}
export function getAllowedSettingSources(): SettingSource[] {
return STATE.allowedSettingSources
}
export function setAllowedSettingSources(sources: SettingSource[]): void {
STATE.allowedSettingSources = sources
}
export function preferThirdPartyAuthentication(): boolean {
// IDE extension should behave as 1P for authentication reasons.
return getIsNonInteractiveSession() && STATE.clientType !== 'claude-vscode'
}
export function setInlinePlugins(plugins: Array): void {
STATE.inlinePlugins = plugins
}
export function getInlinePlugins(): Array {
return STATE.inlinePlugins
}
export function setChromeFlagOverride(value: boolean | undefined): void {
STATE.chromeFlagOverride = value
}
export function getChromeFlagOverride(): boolean | undefined {
return STATE.chromeFlagOverride
}
export function setUseCoworkPlugins(value: boolean): void {
STATE.useCoworkPlugins = value
resetSettingsCache()
}
export function getUseCoworkPlugins(): boolean {
return STATE.useCoworkPlugins
}
export function setSessionBypassPermissionsMode(enabled: boolean): void {
STATE.sessionBypassPermissionsMode = enabled
}
export function getSessionBypassPermissionsMode(): boolean {
return STATE.sessionBypassPermissionsMode
}
export function setScheduledTasksEnabled(enabled: boolean): void {
STATE.scheduledTasksEnabled = enabled
}
export function getScheduledTasksEnabled(): boolean {
return STATE.scheduledTasksEnabled
}
export type SessionCronTask = {
id: string
cron: string
prompt: string
createdAt: number
recurring?: boolean
/**
* When set, the task was created by an in-process teammate (not the team lead).
* The scheduler routes fires to that teammate's pendingUserMessages queue
* instead of the main REPL command queue. Session-only — never written to disk.
*/
agentId?: string
}
export function getSessionCronTasks(): SessionCronTask[] {
return STATE.sessionCronTasks
}
export function addSessionCronTask(task: SessionCronTask): void {
STATE.sessionCronTasks.push(task)
}
/**
* Returns the number of tasks actually removed. Callers use this to skip
* downstream work (e.g. the disk read in removeCronTasks) when all ids
* were accounted for here.
*/
export function removeSessionCronTasks(ids: readonly string[]): number {
if (ids.length === 0) return 0
const idSet = new Set(ids)
const remaining = STATE.sessionCronTasks.filter(t => !idSet.has(t.id))
const removed = STATE.sessionCronTasks.length - remaining.length
if (removed === 0) return 0
STATE.sessionCronTasks = remaining
return removed
}
export function setSessionTrustAccepted(accepted: boolean): void {
STATE.sessionTrustAccepted = accepted
}
export function getSessionTrustAccepted(): boolean {
return STATE.sessionTrustAccepted
}
export function setSessionPersistenceDisabled(disabled: boolean): void {
STATE.sessionPersistenceDisabled = disabled
}
export function isSessionPersistenceDisabled(): boolean {
return STATE.sessionPersistenceDisabled
}
export function hasExitedPlanModeInSession(): boolean {
return STATE.hasExitedPlanMode
}
export function setHasExitedPlanMode(value: boolean): void {
STATE.hasExitedPlanMode = value
}
export function needsPlanModeExitAttachment(): boolean {
return STATE.needsPlanModeExitAttachment
}
export function setNeedsPlanModeExitAttachment(value: boolean): void {
STATE.needsPlanModeExitAttachment = value
}
export function handlePlanModeTransition(
fromMode: string,
toMode: string,
): void {
// If switching TO plan mode, clear any pending exit attachment
// This prevents sending both plan_mode and plan_mode_exit when user toggles quickly
if (toMode === 'plan' && fromMode !== 'plan') {
STATE.needsPlanModeExitAttachment = false
}
// If switching out of plan mode, trigger the plan_mode_exit attachment
if (fromMode === 'plan' && toMode !== 'plan') {
STATE.needsPlanModeExitAttachment = true
}
}
export function needsAutoModeExitAttachment(): boolean {
return STATE.needsAutoModeExitAttachment
}
export function setNeedsAutoModeExitAttachment(value: boolean): void {
STATE.needsAutoModeExitAttachment = value
}
export function handleAutoModeTransition(
fromMode: string,
toMode: string,
): void {
// Auto↔plan transitions are handled by prepareContextForPlanMode (auto may
// stay active through plan if opted in) and ExitPlanMode (restores mode).
// Skip both directions so this function only handles direct auto transitions.
if (
(fromMode === 'auto' && toMode === 'plan') ||
(fromMode === 'plan' && toMode === 'auto')
) {
return
}
const fromIsAuto = fromMode === 'auto'
const toIsAuto = toMode === 'auto'
// If switching TO auto mode, clear any pending exit attachment
// This prevents sending both auto_mode and auto_mode_exit when user toggles quickly
if (toIsAuto && !fromIsAuto) {
STATE.needsAutoModeExitAttachment = false
}
// If switching out of auto mode, trigger the auto_mode_exit attachment
if (fromIsAuto && !toIsAuto) {
STATE.needsAutoModeExitAttachment = true
}
}
// LSP plugin recommendation session tracking
export function hasShownLspRecommendationThisSession(): boolean {
return STATE.lspRecommendationShownThisSession
}
export function setLspRecommendationShownThisSession(value: boolean): void {
STATE.lspRecommendationShownThisSession = value
}
// SDK init event state
export function setInitJsonSchema(schema: Record): void {
STATE.initJsonSchema = schema
}
export function getInitJsonSchema(): Record | null {
return STATE.initJsonSchema
}
export function registerHookCallbacks(
hooks: Partial>,
): void {
if (!STATE.registeredHooks) {
STATE.registeredHooks = {}
}
// `registerHookCallbacks` may be called multiple times, so we need to merge (not overwrite)
for (const [event, matchers] of Object.entries(hooks)) {
const eventKey = event as HookEvent
if (!STATE.registeredHooks[eventKey]) {
STATE.registeredHooks[eventKey] = []
}
STATE.registeredHooks[eventKey]!.push(...matchers)
}
}
export function getRegisteredHooks(): Partial<
Record
> | null {
return STATE.registeredHooks
}
export function clearRegisteredHooks(): void {
STATE.registeredHooks = null
}
export function clearRegisteredPluginHooks(): void {
if (!STATE.registeredHooks) {
return
}
const filtered: Partial> = {}
for (const [event, matchers] of Object.entries(STATE.registeredHooks)) {
// Keep only callback hooks (those without pluginRoot)
const callbackHooks = matchers.filter(m => !('pluginRoot' in m))
if (callbackHooks.length > 0) {
filtered[event as HookEvent] = callbackHooks
}
}
STATE.registeredHooks = Object.keys(filtered).length > 0 ? filtered : null
}
export function resetSdkInitState(): void {
STATE.initJsonSchema = null
STATE.registeredHooks = null
}
export function getPlanSlugCache(): Map {
return STATE.planSlugCache
}
export function getSessionCreatedTeams(): Set {
return STATE.sessionCreatedTeams
}
// Teleported session tracking for reliability logging
export function setTeleportedSessionInfo(info: {
sessionId: string | null
}): void {
STATE.teleportedSessionInfo = {
isTeleported: true,
hasLoggedFirstMessage: false,
sessionId: info.sessionId,
}
}
export function getTeleportedSessionInfo(): {
isTeleported: boolean
hasLoggedFirstMessage: boolean
sessionId: string | null
} | null {
return STATE.teleportedSessionInfo
}
export function markFirstTeleportMessageLogged(): void {
if (STATE.teleportedSessionInfo) {
STATE.teleportedSessionInfo.hasLoggedFirstMessage = true
}
}
// Invoked skills tracking for preservation across compaction
export type InvokedSkillInfo = {
skillName: string
skillPath: string
content: string
invokedAt: number
agentId: string | null
}
export function addInvokedSkill(
skillName: string,
skillPath: string,
content: string,
agentId: string | null = null,
): void {
const key = `${agentId ?? ''}:${skillName}`
STATE.invokedSkills.set(key, {
skillName,
skillPath,
content,
invokedAt: Date.now(),
agentId,
})
}
export function getInvokedSkills(): Map {
return STATE.invokedSkills
}
export function getInvokedSkillsForAgent(
agentId: string | undefined | null,
): Map {
const normalizedId = agentId ?? null
const filtered = new Map()
for (const [key, skill] of STATE.invokedSkills) {
if (skill.agentId === normalizedId) {
filtered.set(key, skill)
}
}
return filtered
}
export function clearInvokedSkills(
preservedAgentIds?: ReadonlySet,
): void {
if (!preservedAgentIds || preservedAgentIds.size === 0) {
STATE.invokedSkills.clear()
return
}
for (const [key, skill] of STATE.invokedSkills) {
if (skill.agentId === null || !preservedAgentIds.has(skill.agentId)) {
STATE.invokedSkills.delete(key)
}
}
}
export function clearInvokedSkillsForAgent(agentId: string): void {
for (const [key, skill] of STATE.invokedSkills) {
if (skill.agentId === agentId) {
STATE.invokedSkills.delete(key)
}
}
}
// Slow operations tracking for dev bar
const MAX_SLOW_OPERATIONS = 10
const SLOW_OPERATION_TTL_MS = 10000
export function addSlowOperation(operation: string, durationMs: number): void {
if (process.env.USER_TYPE !== 'ant') return
// Skip tracking for editor sessions (user editing a prompt file in $EDITOR)
// These are intentionally slow since the user is drafting text
if (operation.includes('exec') && operation.includes('claude-prompt-')) {
return
}
const now = Date.now()
// Remove stale operations
STATE.slowOperations = STATE.slowOperations.filter(
op => now - op.timestamp < SLOW_OPERATION_TTL_MS,
)
// Add new operation
STATE.slowOperations.push({ operation, durationMs, timestamp: now })
// Keep only the most recent operations
if (STATE.slowOperations.length > MAX_SLOW_OPERATIONS) {
STATE.slowOperations = STATE.slowOperations.slice(-MAX_SLOW_OPERATIONS)
}
}
const EMPTY_SLOW_OPERATIONS: ReadonlyArray<{
operation: string
durationMs: number
timestamp: number
}> = []
export function getSlowOperations(): ReadonlyArray<{
operation: string
durationMs: number
timestamp: number
}> {
// Most common case: nothing tracked. Return a stable reference so the
// caller's setState() can bail via Object.is instead of re-rendering at 2fps.
if (STATE.slowOperations.length === 0) {
return EMPTY_SLOW_OPERATIONS
}
const now = Date.now()
// Only allocate a new array when something actually expired; otherwise keep
// the reference stable across polls while ops are still fresh.
if (
STATE.slowOperations.some(op => now - op.timestamp >= SLOW_OPERATION_TTL_MS)
) {
STATE.slowOperations = STATE.slowOperations.filter(
op => now - op.timestamp < SLOW_OPERATION_TTL_MS,
)
if (STATE.slowOperations.length === 0) {
return EMPTY_SLOW_OPERATIONS
}
}
// Safe to return directly: addSlowOperation() reassigns STATE.slowOperations
// before pushing, so the array held in React state is never mutated.
return STATE.slowOperations
}
export function getMainThreadAgentType(): string | undefined {
return STATE.mainThreadAgentType
}
export function setMainThreadAgentType(agentType: string | undefined): void {
STATE.mainThreadAgentType = agentType
}
export function getIsRemoteMode(): boolean {
return STATE.isRemoteMode
}
export function setIsRemoteMode(value: boolean): void {
STATE.isRemoteMode = value
}
// System prompt section accessors
export function getSystemPromptSectionCache(): Map {
return STATE.systemPromptSectionCache
}
export function setSystemPromptSectionCacheEntry(
name: string,
value: string | null,
): void {
STATE.systemPromptSectionCache.set(name, value)
}
export function clearSystemPromptSectionState(): void {
STATE.systemPromptSectionCache.clear()
}
// Last emitted date accessors (for detecting midnight date changes)
export function getLastEmittedDate(): string | null {
return STATE.lastEmittedDate
}
export function setLastEmittedDate(date: string | null): void {
STATE.lastEmittedDate = date
}
export function getAdditionalDirectoriesForClaudeMd(): string[] {
return STATE.additionalDirectoriesForClaudeMd
}
export function setAdditionalDirectoriesForClaudeMd(
directories: string[],
): void {
STATE.additionalDirectoriesForClaudeMd = directories
}
export function getAllowedChannels(): ChannelEntry[] {
return STATE.allowedChannels
}
export function setAllowedChannels(entries: ChannelEntry[]): void {
STATE.allowedChannels = entries
}
export function getHasDevChannels(): boolean {
return STATE.hasDevChannels
}
export function setHasDevChannels(value: boolean): void {
STATE.hasDevChannels = value
}
export function getPromptCache1hAllowlist(): string[] | null {
return STATE.promptCache1hAllowlist
}
export function setPromptCache1hAllowlist(allowlist: string[] | null): void {
STATE.promptCache1hAllowlist = allowlist
}
export function getPromptCache1hEligible(): boolean | null {
return STATE.promptCache1hEligible
}
export function setPromptCache1hEligible(eligible: boolean | null): void {
STATE.promptCache1hEligible = eligible
}
export function getAfkModeHeaderLatched(): boolean | null {
return STATE.afkModeHeaderLatched
}
export function setAfkModeHeaderLatched(v: boolean): void {
STATE.afkModeHeaderLatched = v
}
export function getFastModeHeaderLatched(): boolean | null {
return STATE.fastModeHeaderLatched
}
export function setFastModeHeaderLatched(v: boolean): void {
STATE.fastModeHeaderLatched = v
}
export function getCacheEditingHeaderLatched(): boolean | null {
return STATE.cacheEditingHeaderLatched
}
export function setCacheEditingHeaderLatched(v: boolean): void {
STATE.cacheEditingHeaderLatched = v
}
export function getThinkingClearLatched(): boolean | null {
return STATE.thinkingClearLatched
}
export function setThinkingClearLatched(v: boolean): void {
STATE.thinkingClearLatched = v
}
/**
* Reset beta header latches to null. Called on /clear and /compact so a
* fresh conversation gets fresh header evaluation.
*/
export function clearBetaHeaderLatches(): void {
STATE.afkModeHeaderLatched = null
STATE.fastModeHeaderLatched = null
STATE.cacheEditingHeaderLatched = null
STATE.thinkingClearLatched = null
}
export function getPromptId(): string | null {
return STATE.promptId
}
export function setPromptId(id: string | null): void {
STATE.promptId = id
}
================================================
FILE: restored-src/src/bridge/bridgeApi.ts
================================================
import axios from 'axios'
import { debugBody, extractErrorDetail } from './debugUtils.js'
import {
BRIDGE_LOGIN_INSTRUCTION,
type BridgeApiClient,
type BridgeConfig,
type PermissionResponseEvent,
type WorkResponse,
} from './types.js'
type BridgeApiDeps = {
baseUrl: string
getAccessToken: () => string | undefined
runnerVersion: string
onDebug?: (msg: string) => void
/**
* Called on 401 to attempt OAuth token refresh. Returns true if refreshed,
* in which case the request is retried once. Injected because
* handleOAuth401Error from utils/auth.ts transitively pulls in config.ts →
* file.ts → permissions/filesystem.ts → sessionStorage.ts → commands.ts
* (~1300 modules). Daemon callers using env-var tokens omit this — their
* tokens don't refresh, so 401 goes straight to BridgeFatalError.
*/
onAuth401?: (staleAccessToken: string) => Promise
/**
* Returns the trusted device token to send as X-Trusted-Device-Token on
* bridge API calls. Bridge sessions have SecurityTier=ELEVATED on the
* server (CCR v2); when the server's enforcement flag is on,
* ConnectBridgeWorker requires a trusted device at JWT-issuance.
* Optional — when absent or returning undefined, the header is omitted
* and the server falls through to its flag-off/no-op path. The CLI-side
* gate is tengu_sessions_elevated_auth_enforcement (see trustedDevice.ts).
*/
getTrustedDeviceToken?: () => string | undefined
}
const BETA_HEADER = 'environments-2025-11-01'
/** Allowlist pattern for server-provided IDs used in URL path segments. */
const SAFE_ID_PATTERN = /^[a-zA-Z0-9_-]+$/
/**
* Validate that a server-provided ID is safe to interpolate into a URL path.
* Prevents path traversal (e.g. `../../admin`) and injection via IDs that
* contain slashes, dots, or other special characters.
*/
export function validateBridgeId(id: string, label: string): string {
if (!id || !SAFE_ID_PATTERN.test(id)) {
throw new Error(`Invalid ${label}: contains unsafe characters`)
}
return id
}
/** Fatal bridge errors that should not be retried (e.g. auth failures). */
export class BridgeFatalError extends Error {
readonly status: number
/** Server-provided error type, e.g. "environment_expired". */
readonly errorType: string | undefined
constructor(message: string, status: number, errorType?: string) {
super(message)
this.name = 'BridgeFatalError'
this.status = status
this.errorType = errorType
}
}
export function createBridgeApiClient(deps: BridgeApiDeps): BridgeApiClient {
function debug(msg: string): void {
deps.onDebug?.(msg)
}
let consecutiveEmptyPolls = 0
const EMPTY_POLL_LOG_INTERVAL = 100
function getHeaders(accessToken: string): Record {
const headers: Record = {
Authorization: `Bearer ${accessToken}`,
'Content-Type': 'application/json',
'anthropic-version': '2023-06-01',
'anthropic-beta': BETA_HEADER,
'x-environment-runner-version': deps.runnerVersion,
}
const deviceToken = deps.getTrustedDeviceToken?.()
if (deviceToken) {
headers['X-Trusted-Device-Token'] = deviceToken
}
return headers
}
function resolveAuth(): string {
const accessToken = deps.getAccessToken()
if (!accessToken) {
throw new Error(BRIDGE_LOGIN_INSTRUCTION)
}
return accessToken
}
/**
* Execute an OAuth-authenticated request with a single retry on 401.
* On 401, attempts token refresh via handleOAuth401Error (same pattern as
* withRetry.ts for v1/messages). If refresh succeeds, retries the request
* once with the new token. If refresh fails or the retry also returns 401,
* the 401 response is returned for handleErrorStatus to throw BridgeFatalError.
*/
async function withOAuthRetry(
fn: (accessToken: string) => Promise<{ status: number; data: T }>,
context: string,
): Promise<{ status: number; data: T }> {
const accessToken = resolveAuth()
const response = await fn(accessToken)
if (response.status !== 401) {
return response
}
if (!deps.onAuth401) {
debug(`[bridge:api] ${context}: 401 received, no refresh handler`)
return response
}
// Attempt token refresh — matches the pattern in withRetry.ts
debug(`[bridge:api] ${context}: 401 received, attempting token refresh`)
const refreshed = await deps.onAuth401(accessToken)
if (refreshed) {
debug(`[bridge:api] ${context}: Token refreshed, retrying request`)
const newToken = resolveAuth()
const retryResponse = await fn(newToken)
if (retryResponse.status !== 401) {
return retryResponse
}
debug(`[bridge:api] ${context}: Retry after refresh also got 401`)
} else {
debug(`[bridge:api] ${context}: Token refresh failed`)
}
// Refresh failed — return 401 for handleErrorStatus to throw
return response
}
return {
async registerBridgeEnvironment(
config: BridgeConfig,
): Promise<{ environment_id: string; environment_secret: string }> {
debug(
`[bridge:api] POST /v1/environments/bridge bridgeId=${config.bridgeId}`,
)
const response = await withOAuthRetry(
(token: string) =>
axios.post<{
environment_id: string
environment_secret: string
}>(
`${deps.baseUrl}/v1/environments/bridge`,
{
machine_name: config.machineName,
directory: config.dir,
branch: config.branch,
git_repo_url: config.gitRepoUrl,
// Advertise session capacity so claude.ai/code can show
// "2/4 sessions" badges and only block the picker when
// actually at capacity. Backends that don't yet accept
// this field will silently ignore it.
max_sessions: config.maxSessions,
// worker_type lets claude.ai filter environments by origin
// (e.g. assistant picker only shows assistant-mode workers).
// Desktop cowork app sends "cowork"; we send a distinct value.
metadata: { worker_type: config.workerType },
// Idempotent re-registration: if we have a backend-issued
// environment_id from a prior session (--session-id resume),
// send it back so the backend reattaches instead of creating
// a new env. The backend may still hand back a fresh ID if
// the old one expired — callers must compare the response.
...(config.reuseEnvironmentId && {
environment_id: config.reuseEnvironmentId,
}),
},
{
headers: getHeaders(token),
timeout: 15_000,
validateStatus: status => status < 500,
},
),
'Registration',
)
handleErrorStatus(response.status, response.data, 'Registration')
debug(
`[bridge:api] POST /v1/environments/bridge -> ${response.status} environment_id=${response.data.environment_id}`,
)
debug(
`[bridge:api] >>> ${debugBody({ machine_name: config.machineName, directory: config.dir, branch: config.branch, git_repo_url: config.gitRepoUrl, max_sessions: config.maxSessions, metadata: { worker_type: config.workerType } })}`,
)
debug(`[bridge:api] <<< ${debugBody(response.data)}`)
return response.data
},
async pollForWork(
environmentId: string,
environmentSecret: string,
signal?: AbortSignal,
reclaimOlderThanMs?: number,
): Promise {
validateBridgeId(environmentId, 'environmentId')
// Save and reset so errors break the "consecutive empty" streak.
// Restored below when the response is truly empty.
const prevEmptyPolls = consecutiveEmptyPolls
consecutiveEmptyPolls = 0
const response = await axios.get(
`${deps.baseUrl}/v1/environments/${environmentId}/work/poll`,
{
headers: getHeaders(environmentSecret),
params:
reclaimOlderThanMs !== undefined
? { reclaim_older_than_ms: reclaimOlderThanMs }
: undefined,
timeout: 10_000,
signal,
validateStatus: status => status < 500,
},
)
handleErrorStatus(response.status, response.data, 'Poll')
// Empty body or null = no work available
if (!response.data) {
consecutiveEmptyPolls = prevEmptyPolls + 1
if (
consecutiveEmptyPolls === 1 ||
consecutiveEmptyPolls % EMPTY_POLL_LOG_INTERVAL === 0
) {
debug(
`[bridge:api] GET .../work/poll -> ${response.status} (no work, ${consecutiveEmptyPolls} consecutive empty polls)`,
)
}
return null
}
debug(
`[bridge:api] GET .../work/poll -> ${response.status} workId=${response.data.id} type=${response.data.data?.type}${response.data.data?.id ? ` sessionId=${response.data.data.id}` : ''}`,
)
debug(`[bridge:api] <<< ${debugBody(response.data)}`)
return response.data
},
async acknowledgeWork(
environmentId: string,
workId: string,
sessionToken: string,
): Promise {
validateBridgeId(environmentId, 'environmentId')
validateBridgeId(workId, 'workId')
debug(`[bridge:api] POST .../work/${workId}/ack`)
const response = await axios.post(
`${deps.baseUrl}/v1/environments/${environmentId}/work/${workId}/ack`,
{},
{
headers: getHeaders(sessionToken),
timeout: 10_000,
validateStatus: s => s < 500,
},
)
handleErrorStatus(response.status, response.data, 'Acknowledge')
debug(`[bridge:api] POST .../work/${workId}/ack -> ${response.status}`)
},
async stopWork(
environmentId: string,
workId: string,
force: boolean,
): Promise {
validateBridgeId(environmentId, 'environmentId')
validateBridgeId(workId, 'workId')
debug(`[bridge:api] POST .../work/${workId}/stop force=${force}`)
const response = await withOAuthRetry(
(token: string) =>
axios.post(
`${deps.baseUrl}/v1/environments/${environmentId}/work/${workId}/stop`,
{ force },
{
headers: getHeaders(token),
timeout: 10_000,
validateStatus: s => s < 500,
},
),
'StopWork',
)
handleErrorStatus(response.status, response.data, 'StopWork')
debug(`[bridge:api] POST .../work/${workId}/stop -> ${response.status}`)
},
async deregisterEnvironment(environmentId: string): Promise {
validateBridgeId(environmentId, 'environmentId')
debug(`[bridge:api] DELETE /v1/environments/bridge/${environmentId}`)
const response = await withOAuthRetry(
(token: string) =>
axios.delete(
`${deps.baseUrl}/v1/environments/bridge/${environmentId}`,
{
headers: getHeaders(token),
timeout: 10_000,
validateStatus: s => s < 500,
},
),
'Deregister',
)
handleErrorStatus(response.status, response.data, 'Deregister')
debug(
`[bridge:api] DELETE /v1/environments/bridge/${environmentId} -> ${response.status}`,
)
},
async archiveSession(sessionId: string): Promise {
validateBridgeId(sessionId, 'sessionId')
debug(`[bridge:api] POST /v1/sessions/${sessionId}/archive`)
const response = await withOAuthRetry(
(token: string) =>
axios.post(
`${deps.baseUrl}/v1/sessions/${sessionId}/archive`,
{},
{
headers: getHeaders(token),
timeout: 10_000,
validateStatus: s => s < 500,
},
),
'ArchiveSession',
)
// 409 = already archived (idempotent, not an error)
if (response.status === 409) {
debug(
`[bridge:api] POST /v1/sessions/${sessionId}/archive -> 409 (already archived)`,
)
return
}
handleErrorStatus(response.status, response.data, 'ArchiveSession')
debug(
`[bridge:api] POST /v1/sessions/${sessionId}/archive -> ${response.status}`,
)
},
async reconnectSession(
environmentId: string,
sessionId: string,
): Promise {
validateBridgeId(environmentId, 'environmentId')
validateBridgeId(sessionId, 'sessionId')
debug(
`[bridge:api] POST /v1/environments/${environmentId}/bridge/reconnect session_id=${sessionId}`,
)
const response = await withOAuthRetry(
(token: string) =>
axios.post(
`${deps.baseUrl}/v1/environments/${environmentId}/bridge/reconnect`,
{ session_id: sessionId },
{
headers: getHeaders(token),
timeout: 10_000,
validateStatus: s => s < 500,
},
),
'ReconnectSession',
)
handleErrorStatus(response.status, response.data, 'ReconnectSession')
debug(`[bridge:api] POST .../bridge/reconnect -> ${response.status}`)
},
async heartbeatWork(
environmentId: string,
workId: string,
sessionToken: string,
): Promise<{ lease_extended: boolean; state: string }> {
validateBridgeId(environmentId, 'environmentId')
validateBridgeId(workId, 'workId')
debug(`[bridge:api] POST .../work/${workId}/heartbeat`)
const response = await axios.post<{
lease_extended: boolean
state: string
last_heartbeat: string
ttl_seconds: number
}>(
`${deps.baseUrl}/v1/environments/${environmentId}/work/${workId}/heartbeat`,
{},
{
headers: getHeaders(sessionToken),
timeout: 10_000,
validateStatus: s => s < 500,
},
)
handleErrorStatus(response.status, response.data, 'Heartbeat')
debug(
`[bridge:api] POST .../work/${workId}/heartbeat -> ${response.status} lease_extended=${response.data.lease_extended} state=${response.data.state}`,
)
return response.data
},
async sendPermissionResponseEvent(
sessionId: string,
event: PermissionResponseEvent,
sessionToken: string,
): Promise {
validateBridgeId(sessionId, 'sessionId')
debug(
`[bridge:api] POST /v1/sessions/${sessionId}/events type=${event.type}`,
)
const response = await axios.post(
`${deps.baseUrl}/v1/sessions/${sessionId}/events`,
{ events: [event] },
{
headers: getHeaders(sessionToken),
timeout: 10_000,
validateStatus: s => s < 500,
},
)
handleErrorStatus(
response.status,
response.data,
'SendPermissionResponseEvent',
)
debug(
`[bridge:api] POST /v1/sessions/${sessionId}/events -> ${response.status}`,
)
debug(`[bridge:api] >>> ${debugBody({ events: [event] })}`)
debug(`[bridge:api] <<< ${debugBody(response.data)}`)
},
}
}
function handleErrorStatus(
status: number,
data: unknown,
context: string,
): void {
if (status === 200 || status === 204) {
return
}
const detail = extractErrorDetail(data)
const errorType = extractErrorTypeFromData(data)
switch (status) {
case 401:
throw new BridgeFatalError(
`${context}: Authentication failed (401)${detail ? `: ${detail}` : ''}. ${BRIDGE_LOGIN_INSTRUCTION}`,
401,
errorType,
)
case 403:
throw new BridgeFatalError(
isExpiredErrorType(errorType)
? 'Remote Control session has expired. Please restart with `claude remote-control` or /remote-control.'
: `${context}: Access denied (403)${detail ? `: ${detail}` : ''}. Check your organization permissions.`,
403,
errorType,
)
case 404:
throw new BridgeFatalError(
detail ??
`${context}: Not found (404). Remote Control may not be available for this organization.`,
404,
errorType,
)
case 410:
throw new BridgeFatalError(
detail ??
'Remote Control session has expired. Please restart with `claude remote-control` or /remote-control.',
410,
errorType ?? 'environment_expired',
)
case 429:
throw new Error(`${context}: Rate limited (429). Polling too frequently.`)
default:
throw new Error(
`${context}: Failed with status ${status}${detail ? `: ${detail}` : ''}`,
)
}
}
/** Check whether an error type string indicates a session/environment expiry. */
export function isExpiredErrorType(errorType: string | undefined): boolean {
if (!errorType) {
return false
}
return errorType.includes('expired') || errorType.includes('lifetime')
}
/**
* Check whether a BridgeFatalError is a suppressible 403 permission error.
* These are 403 errors for scopes like 'external_poll_sessions' or operations
* like StopWork that fail because the user's role lacks 'environments:manage'.
* They don't affect core functionality and shouldn't be shown to users.
*/
export function isSuppressible403(err: BridgeFatalError): boolean {
if (err.status !== 403) {
return false
}
return (
err.message.includes('external_poll_sessions') ||
err.message.includes('environments:manage')
)
}
function extractErrorTypeFromData(data: unknown): string | undefined {
if (data && typeof data === 'object') {
if (
'error' in data &&
data.error &&
typeof data.error === 'object' &&
'type' in data.error &&
typeof data.error.type === 'string'
) {
return data.error.type
}
}
return undefined
}
================================================
FILE: restored-src/src/bridge/bridgeConfig.ts
================================================
/**
* Shared bridge auth/URL resolution. Consolidates the ant-only
* CLAUDE_BRIDGE_* dev overrides that were previously copy-pasted across
* a dozen files — inboundAttachments, BriefTool/upload, bridgeMain,
* initReplBridge, remoteBridgeCore, daemon workers, /rename,
* /remote-control.
*
* Two layers: *Override() returns the ant-only env var (or undefined);
* the non-Override versions fall through to the real OAuth store/config.
* Callers that compose with a different auth source (e.g. daemon workers
* using IPC auth) use the Override getters directly.
*/
import { getOauthConfig } from '../constants/oauth.js'
import { getClaudeAIOAuthTokens } from '../utils/auth.js'
/** Ant-only dev override: CLAUDE_BRIDGE_OAUTH_TOKEN, else undefined. */
export function getBridgeTokenOverride(): string | undefined {
return (
(process.env.USER_TYPE === 'ant' &&
process.env.CLAUDE_BRIDGE_OAUTH_TOKEN) ||
undefined
)
}
/** Ant-only dev override: CLAUDE_BRIDGE_BASE_URL, else undefined. */
export function getBridgeBaseUrlOverride(): string | undefined {
return (
(process.env.USER_TYPE === 'ant' && process.env.CLAUDE_BRIDGE_BASE_URL) ||
undefined
)
}
/**
* Access token for bridge API calls: dev override first, then the OAuth
* keychain. Undefined means "not logged in".
*/
export function getBridgeAccessToken(): string | undefined {
return getBridgeTokenOverride() ?? getClaudeAIOAuthTokens()?.accessToken
}
/**
* Base URL for bridge API calls: dev override first, then the production
* OAuth config. Always returns a URL.
*/
export function getBridgeBaseUrl(): string {
return getBridgeBaseUrlOverride() ?? getOauthConfig().BASE_API_URL
}
================================================
FILE: restored-src/src/bridge/bridgeDebug.ts
================================================
import { logForDebugging } from '../utils/debug.js'
import { BridgeFatalError } from './bridgeApi.js'
import type { BridgeApiClient } from './types.js'
/**
* Ant-only fault injection for manually testing bridge recovery paths.
*
* Real failure modes this targets (BQ 2026-03-12, 7-day window):
* poll 404 not_found_error — 147K sessions/week, dead onEnvironmentLost gate
* ws_closed 1002/1006 — 22K sessions/week, zombie poll after close
* register transient failure — residual: network blips during doReconnect
*
* Usage: /bridge-kick from the REPL while Remote Control is
* connected, then tail debug.log to watch the recovery machinery react.
*
* Module-level state is intentional here: one bridge per REPL process, the
* /bridge-kick slash command has no other way to reach into initBridgeCore's
* closures, and teardown clears the slot.
*/
/** One-shot fault to inject on the next matching api call. */
type BridgeFault = {
method:
| 'pollForWork'
| 'registerBridgeEnvironment'
| 'reconnectSession'
| 'heartbeatWork'
/** Fatal errors go through handleErrorStatus → BridgeFatalError. Transient
* errors surface as plain axios rejections (5xx / network). Recovery code
* distinguishes the two: fatal → teardown, transient → retry/backoff. */
kind: 'fatal' | 'transient'
status: number
errorType?: string
/** Remaining injections. Decremented on consume; removed at 0. */
count: number
}
export type BridgeDebugHandle = {
/** Invoke the transport's permanent-close handler directly. Tests the
* ws_closed → reconnectEnvironmentWithSession escalation (#22148). */
fireClose: (code: number) => void
/** Call reconnectEnvironmentWithSession() — same as SIGUSR2 but
* reachable from the slash command. */
forceReconnect: () => void
/** Queue a fault for the next N calls to the named api method. */
injectFault: (fault: BridgeFault) => void
/** Abort the at-capacity sleep so an injected poll fault lands
* immediately instead of up to 10min later. */
wakePollLoop: () => void
/** env/session IDs for the debug.log grep. */
describe: () => string
}
let debugHandle: BridgeDebugHandle | null = null
const faultQueue: BridgeFault[] = []
export function registerBridgeDebugHandle(h: BridgeDebugHandle): void {
debugHandle = h
}
export function clearBridgeDebugHandle(): void {
debugHandle = null
faultQueue.length = 0
}
export function getBridgeDebugHandle(): BridgeDebugHandle | null {
return debugHandle
}
export function injectBridgeFault(fault: BridgeFault): void {
faultQueue.push(fault)
logForDebugging(
`[bridge:debug] Queued fault: ${fault.method} ${fault.kind}/${fault.status}${fault.errorType ? `/${fault.errorType}` : ''} ×${fault.count}`,
)
}
/**
* Wrap a BridgeApiClient so each call first checks the fault queue. If a
* matching fault is queued, throw the specified error instead of calling
* through. Delegates everything else to the real client.
*
* Only called when USER_TYPE === 'ant' — zero overhead in external builds.
*/
export function wrapApiForFaultInjection(
api: BridgeApiClient,
): BridgeApiClient {
function consume(method: BridgeFault['method']): BridgeFault | null {
const idx = faultQueue.findIndex(f => f.method === method)
if (idx === -1) return null
const fault = faultQueue[idx]!
fault.count--
if (fault.count <= 0) faultQueue.splice(idx, 1)
return fault
}
function throwFault(fault: BridgeFault, context: string): never {
logForDebugging(
`[bridge:debug] Injecting ${fault.kind} fault into ${context}: status=${fault.status} errorType=${fault.errorType ?? 'none'}`,
)
if (fault.kind === 'fatal') {
throw new BridgeFatalError(
`[injected] ${context} ${fault.status}`,
fault.status,
fault.errorType,
)
}
// Transient: mimic an axios rejection (5xx / network). No .status on
// the error itself — that's how the catch blocks distinguish.
throw new Error(`[injected transient] ${context} ${fault.status}`)
}
return {
...api,
async pollForWork(envId, secret, signal, reclaimMs) {
const f = consume('pollForWork')
if (f) throwFault(f, 'Poll')
return api.pollForWork(envId, secret, signal, reclaimMs)
},
async registerBridgeEnvironment(config) {
const f = consume('registerBridgeEnvironment')
if (f) throwFault(f, 'Registration')
return api.registerBridgeEnvironment(config)
},
async reconnectSession(envId, sessionId) {
const f = consume('reconnectSession')
if (f) throwFault(f, 'ReconnectSession')
return api.reconnectSession(envId, sessionId)
},
async heartbeatWork(envId, workId, token) {
const f = consume('heartbeatWork')
if (f) throwFault(f, 'Heartbeat')
return api.heartbeatWork(envId, workId, token)
},
}
}
================================================
FILE: restored-src/src/bridge/bridgeEnabled.ts
================================================
import { feature } from 'bun:bundle'
import {
checkGate_CACHED_OR_BLOCKING,
getDynamicConfig_CACHED_MAY_BE_STALE,
getFeatureValue_CACHED_MAY_BE_STALE,
} from '../services/analytics/growthbook.js'
// Namespace import breaks the bridgeEnabled → auth → config → bridgeEnabled
// cycle — authModule.foo is a live binding, so by the time the helpers below
// call it, auth.js is fully loaded. Previously used require() for the same
// deferral, but require() hits a CJS cache that diverges from the ESM
// namespace after mock.module() (daemon/auth.test.ts), breaking spyOn.
import * as authModule from '../utils/auth.js'
import { isEnvTruthy } from '../utils/envUtils.js'
import { lt } from '../utils/semver.js'
/**
* Runtime check for bridge mode entitlement.
*
* Remote Control requires a claude.ai subscription (the bridge auths to CCR
* with the claude.ai OAuth token). isClaudeAISubscriber() excludes
* Bedrock/Vertex/Foundry, apiKeyHelper/gateway deployments, env-var API keys,
* and Console API logins — none of which have the OAuth token CCR needs.
* See github.com/deshaw/anthropic-issues/issues/24.
*
* The `feature('BRIDGE_MODE')` guard ensures the GrowthBook string literal
* is only referenced when bridge mode is enabled at build time.
*/
export function isBridgeEnabled(): boolean {
// Positive ternary pattern — see docs/feature-gating.md.
// Negative pattern (if (!feature(...)) return) does not eliminate
// inline string literals from external builds.
return feature('BRIDGE_MODE')
? isClaudeAISubscriber() &&
getFeatureValue_CACHED_MAY_BE_STALE('tengu_ccr_bridge', false)
: false
}
/**
* Blocking entitlement check for Remote Control.
*
* Returns cached `true` immediately (fast path). If the disk cache says
* `false` or is missing, awaits GrowthBook init and fetches the fresh
* server value (slow path, max ~5s), then writes it to disk.
*
* Use at entitlement gates where a stale `false` would unfairly block access.
* For user-facing error paths, prefer `getBridgeDisabledReason()` which gives
* a specific diagnostic. For render-body UI visibility checks, use
* `isBridgeEnabled()` instead.
*/
export async function isBridgeEnabledBlocking(): Promise {
return feature('BRIDGE_MODE')
? isClaudeAISubscriber() &&
(await checkGate_CACHED_OR_BLOCKING('tengu_ccr_bridge'))
: false
}
/**
* Diagnostic message for why Remote Control is unavailable, or null if
* it's enabled. Call this instead of a bare `isBridgeEnabledBlocking()`
* check when you need to show the user an actionable error.
*
* The GrowthBook gate targets on organizationUUID, which comes from
* config.oauthAccount — populated by /api/oauth/profile during login.
* That endpoint requires the user:profile scope. Tokens without it
* (setup-token, CLAUDE_CODE_OAUTH_TOKEN env var, or pre-scope-expansion
* logins) leave oauthAccount unpopulated, so the gate falls back to
* false and users see a dead-end "not enabled" message with no hint
* that re-login would fix it. See CC-1165 / gh-33105.
*/
export async function getBridgeDisabledReason(): Promise {
if (feature('BRIDGE_MODE')) {
if (!isClaudeAISubscriber()) {
return 'Remote Control requires a claude.ai subscription. Run `claude auth login` to sign in with your claude.ai account.'
}
if (!hasProfileScope()) {
return 'Remote Control requires a full-scope login token. Long-lived tokens (from `claude setup-token` or CLAUDE_CODE_OAUTH_TOKEN) are limited to inference-only for security reasons. Run `claude auth login` to use Remote Control.'
}
if (!getOauthAccountInfo()?.organizationUuid) {
return 'Unable to determine your organization for Remote Control eligibility. Run `claude auth login` to refresh your account information.'
}
if (!(await checkGate_CACHED_OR_BLOCKING('tengu_ccr_bridge'))) {
return 'Remote Control is not yet enabled for your account.'
}
return null
}
return 'Remote Control is not available in this build.'
}
// try/catch: main.tsx:5698 calls isBridgeEnabled() while defining the Commander
// program, before enableConfigs() runs. isClaudeAISubscriber() → getGlobalConfig()
// throws "Config accessed before allowed" there. Pre-config, no OAuth token can
// exist anyway — false is correct. Same swallow getFeatureValue_CACHED_MAY_BE_STALE
// already does at growthbook.ts:775-780.
function isClaudeAISubscriber(): boolean {
try {
return authModule.isClaudeAISubscriber()
} catch {
return false
}
}
function hasProfileScope(): boolean {
try {
return authModule.hasProfileScope()
} catch {
return false
}
}
function getOauthAccountInfo(): ReturnType<
typeof authModule.getOauthAccountInfo
> {
try {
return authModule.getOauthAccountInfo()
} catch {
return undefined
}
}
/**
* Runtime check for the env-less (v2) REPL bridge path.
* Returns true when the GrowthBook flag `tengu_bridge_repl_v2` is enabled.
*
* This gates which implementation initReplBridge uses — NOT whether bridge
* is available at all (see isBridgeEnabled above). Daemon/print paths stay
* on the env-based implementation regardless of this gate.
*/
export function isEnvLessBridgeEnabled(): boolean {
return feature('BRIDGE_MODE')
? getFeatureValue_CACHED_MAY_BE_STALE('tengu_bridge_repl_v2', false)
: false
}
/**
* Kill-switch for the `cse_*` → `session_*` client-side retag shim.
*
* The shim exists because compat/convert.go:27 validates TagSession and the
* claude.ai frontend routes on `session_*`, while v2 worker endpoints hand out
* `cse_*`. Once the server tags by environment_kind and the frontend accepts
* `cse_*` directly, flip this to false to make toCompatSessionId a no-op.
* Defaults to true — the shim stays active until explicitly disabled.
*/
export function isCseShimEnabled(): boolean {
return feature('BRIDGE_MODE')
? getFeatureValue_CACHED_MAY_BE_STALE(
'tengu_bridge_repl_v2_cse_shim_enabled',
true,
)
: true
}
/**
* Returns an error message if the current CLI version is below the
* minimum required for the v1 (env-based) Remote Control path, or null if the
* version is fine. The v2 (env-less) path uses checkEnvLessBridgeMinVersion()
* in envLessBridgeConfig.ts instead — the two implementations have independent
* version floors.
*
* Uses cached (non-blocking) GrowthBook config. If GrowthBook hasn't
* loaded yet, the default '0.0.0' means the check passes — a safe fallback.
*/
export function checkBridgeMinVersion(): string | null {
// Positive pattern — see docs/feature-gating.md.
// Negative pattern (if (!feature(...)) return) does not eliminate
// inline string literals from external builds.
if (feature('BRIDGE_MODE')) {
const config = getDynamicConfig_CACHED_MAY_BE_STALE<{
minVersion: string
}>('tengu_bridge_min_version', { minVersion: '0.0.0' })
if (config.minVersion && lt(MACRO.VERSION, config.minVersion)) {
return `Your version of Claude Code (${MACRO.VERSION}) is too old for Remote Control.\nVersion ${config.minVersion} or higher is required. Run \`claude update\` to update.`
}
}
return null
}
/**
* Default for remoteControlAtStartup when the user hasn't explicitly set it.
* When the CCR_AUTO_CONNECT build flag is present (ant-only) and the
* tengu_cobalt_harbor GrowthBook gate is on, all sessions connect to CCR by
* default — the user can still opt out by setting remoteControlAtStartup=false
* in config (explicit settings always win over this default).
*
* Defined here rather than in config.ts to avoid a direct
* config.ts → growthbook.ts import cycle (growthbook.ts → user.ts → config.ts).
*/
export function getCcrAutoConnectDefault(): boolean {
return feature('CCR_AUTO_CONNECT')
? getFeatureValue_CACHED_MAY_BE_STALE('tengu_cobalt_harbor', false)
: false
}
/**
* Opt-in CCR mirror mode — every local session spawns an outbound-only
* Remote Control session that receives forwarded events. Separate from
* getCcrAutoConnectDefault (bidirectional Remote Control). Env var wins for
* local opt-in; GrowthBook controls rollout.
*/
export function isCcrMirrorEnabled(): boolean {
return feature('CCR_MIRROR')
? isEnvTruthy(process.env.CLAUDE_CODE_CCR_MIRROR) ||
getFeatureValue_CACHED_MAY_BE_STALE('tengu_ccr_mirror', false)
: false
}
================================================
FILE: restored-src/src/bridge/bridgeMain.ts
================================================
import { feature } from 'bun:bundle'
import { randomUUID } from 'crypto'
import { hostname, tmpdir } from 'os'
import { basename, join, resolve } from 'path'
import { getRemoteSessionUrl } from '../constants/product.js'
import { shutdownDatadog } from '../services/analytics/datadog.js'
import { shutdown1PEventLogging } from '../services/analytics/firstPartyEventLogger.js'
import { checkGate_CACHED_OR_BLOCKING } from '../services/analytics/growthbook.js'
import {
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
logEvent,
logEventAsync,
} from '../services/analytics/index.js'
import { isInBundledMode } from '../utils/bundledMode.js'
import { logForDebugging } from '../utils/debug.js'
import { logForDiagnosticsNoPII } from '../utils/diagLogs.js'
import { isEnvTruthy, isInProtectedNamespace } from '../utils/envUtils.js'
import { errorMessage } from '../utils/errors.js'
import { truncateToWidth } from '../utils/format.js'
import { logError } from '../utils/log.js'
import { sleep } from '../utils/sleep.js'
import { createAgentWorktree, removeAgentWorktree } from '../utils/worktree.js'
import {
BridgeFatalError,
createBridgeApiClient,
isExpiredErrorType,
isSuppressible403,
validateBridgeId,
} from './bridgeApi.js'
import { formatDuration } from './bridgeStatusUtil.js'
import { createBridgeLogger } from './bridgeUI.js'
import { createCapacityWake } from './capacityWake.js'
import { describeAxiosError } from './debugUtils.js'
import { createTokenRefreshScheduler } from './jwtUtils.js'
import { getPollIntervalConfig } from './pollConfig.js'
import { toCompatSessionId, toInfraSessionId } from './sessionIdCompat.js'
import { createSessionSpawner, safeFilenameId } from './sessionRunner.js'
import { getTrustedDeviceToken } from './trustedDevice.js'
import {
BRIDGE_LOGIN_ERROR,
type BridgeApiClient,
type BridgeConfig,
type BridgeLogger,
DEFAULT_SESSION_TIMEOUT_MS,
type SessionDoneStatus,
type SessionHandle,
type SessionSpawner,
type SessionSpawnOpts,
type SpawnMode,
} from './types.js'
import {
buildCCRv2SdkUrl,
buildSdkUrl,
decodeWorkSecret,
registerWorker,
sameSessionId,
} from './workSecret.js'
export type BackoffConfig = {
connInitialMs: number
connCapMs: number
connGiveUpMs: number
generalInitialMs: number
generalCapMs: number
generalGiveUpMs: number
/** SIGTERM→SIGKILL grace period on shutdown. Default 30s. */
shutdownGraceMs?: number
/** stopWorkWithRetry base delay (1s/2s/4s backoff). Default 1000ms. */
stopWorkBaseDelayMs?: number
}
const DEFAULT_BACKOFF: BackoffConfig = {
connInitialMs: 2_000,
connCapMs: 120_000, // 2 minutes
connGiveUpMs: 600_000, // 10 minutes
generalInitialMs: 500,
generalCapMs: 30_000,
generalGiveUpMs: 600_000, // 10 minutes
}
/** Status update interval for the live display (ms). */
const STATUS_UPDATE_INTERVAL_MS = 1_000
const SPAWN_SESSIONS_DEFAULT = 32
/**
* GrowthBook gate for multi-session spawn modes (--spawn / --capacity / --create-session-in-dir).
* Sibling of tengu_ccr_bridge_multi_environment (multiple envs per host:dir) —
* this one enables multiple sessions per environment.
* Rollout staged via targeting rules: ants first, then gradual external.
*
* Uses the blocking gate check so a stale disk-cache miss doesn't unfairly
* deny access. The fast path (cache has true) is still instant; only the
* cold-start path awaits the server fetch, and that fetch also seeds the
* disk cache for next time.
*/
async function isMultiSessionSpawnEnabled(): Promise {
return checkGate_CACHED_OR_BLOCKING('tengu_ccr_bridge_multi_session')
}
/**
* Returns the threshold for detecting system sleep/wake in the poll loop.
* Must exceed the max backoff cap — otherwise normal backoff delays trigger
* false sleep detection (resetting the error budget indefinitely). Using
* 2× the connection backoff cap, matching the pattern in WebSocketTransport
* and replBridge.
*/
function pollSleepDetectionThresholdMs(backoff: BackoffConfig): number {
return backoff.connCapMs * 2
}
/**
* Returns the args that must precede CLI flags when spawning a child claude
* process. In compiled binaries, process.execPath is the claude binary itself
* and args go directly to it. In npm installs (node running cli.js),
* process.execPath is the node runtime — the child spawn must pass the script
* path as the first arg, otherwise node interprets --sdk-url as a node option
* and exits with "bad option: --sdk-url". See anthropics/claude-code#28334.
*/
function spawnScriptArgs(): string[] {
if (isInBundledMode() || !process.argv[1]) {
return []
}
return [process.argv[1]]
}
/** Attempt to spawn a session; returns error string if spawn throws. */
function safeSpawn(
spawner: SessionSpawner,
opts: SessionSpawnOpts,
dir: string,
): SessionHandle | string {
try {
return spawner.spawn(opts, dir)
} catch (err) {
const errMsg = errorMessage(err)
logError(new Error(`Session spawn failed: ${errMsg}`))
return errMsg
}
}
export async function runBridgeLoop(
config: BridgeConfig,
environmentId: string,
environmentSecret: string,
api: BridgeApiClient,
spawner: SessionSpawner,
logger: BridgeLogger,
signal: AbortSignal,
backoffConfig: BackoffConfig = DEFAULT_BACKOFF,
initialSessionId?: string,
getAccessToken?: () => string | undefined | Promise,
): Promise {
// Local abort controller so that onSessionDone can stop the poll loop.
// Linked to the incoming signal so external aborts also work.
const controller = new AbortController()
if (signal.aborted) {
controller.abort()
} else {
signal.addEventListener('abort', () => controller.abort(), { once: true })
}
const loopSignal = controller.signal
const activeSessions = new Map()
const sessionStartTimes = new Map()
const sessionWorkIds = new Map()
// Compat-surface ID (session_*) computed once at spawn and cached so
// cleanup and status-update ticks use the same key regardless of whether
// the tengu_bridge_repl_v2_cse_shim_enabled gate flips mid-session.
const sessionCompatIds = new Map()
// Session ingress JWTs for heartbeat auth, keyed by sessionId.
// Stored separately from handle.accessToken because the token refresh
// scheduler overwrites that field with the OAuth token (~3h55m in).
const sessionIngressTokens = new Map()
const sessionTimers = new Map>()
const completedWorkIds = new Set()
const sessionWorktrees = new Map<
string,
{
worktreePath: string
worktreeBranch?: string
gitRoot?: string
hookBased?: boolean
}
>()
// Track sessions killed by the timeout watchdog so onSessionDone can
// distinguish them from server-initiated or shutdown interrupts.
const timedOutSessions = new Set()
// Sessions that already have a title (server-set or bridge-derived) so
// onFirstUserMessage doesn't clobber a user-assigned --name / web rename.
// Keyed by compatSessionId to match logger.setSessionTitle's key.
const titledSessions = new Set()
// Signal to wake the at-capacity sleep early when a session completes,
// so the bridge can immediately accept new work.
const capacityWake = createCapacityWake(loopSignal)
/**
* Heartbeat all active work items.
* Returns 'ok' if at least one heartbeat succeeded, 'auth_failed' if any
* got a 401/403 (JWT expired — re-queued via reconnectSession so the next
* poll delivers fresh work), or 'failed' if all failed for other reasons.
*/
async function heartbeatActiveWorkItems(): Promise<
'ok' | 'auth_failed' | 'fatal' | 'failed'
> {
let anySuccess = false
let anyFatal = false
const authFailedSessions: string[] = []
for (const [sessionId] of activeSessions) {
const workId = sessionWorkIds.get(sessionId)
const ingressToken = sessionIngressTokens.get(sessionId)
if (!workId || !ingressToken) {
continue
}
try {
await api.heartbeatWork(environmentId, workId, ingressToken)
anySuccess = true
} catch (err) {
logForDebugging(
`[bridge:heartbeat] Failed for sessionId=${sessionId} workId=${workId}: ${errorMessage(err)}`,
)
if (err instanceof BridgeFatalError) {
logEvent('tengu_bridge_heartbeat_error', {
status:
err.status as unknown as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
error_type: (err.status === 401 || err.status === 403
? 'auth_failed'
: 'fatal') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
if (err.status === 401 || err.status === 403) {
authFailedSessions.push(sessionId)
} else {
// 404/410 = environment expired or deleted — no point retrying
anyFatal = true
}
}
}
}
// JWT expired → trigger server-side re-dispatch. Without this, work stays
// ACK'd out of the Redis PEL and poll returns empty forever (CC-1263).
// The existingHandle path below delivers the fresh token to the child.
// sessionId is already in the format /bridge/reconnect expects: it comes
// from work.data.id, which matches the server's EnvironmentInstance store
// (cse_* under the compat gate, session_* otherwise).
for (const sessionId of authFailedSessions) {
logger.logVerbose(
`Session ${sessionId} token expired — re-queuing via bridge/reconnect`,
)
try {
await api.reconnectSession(environmentId, sessionId)
logForDebugging(
`[bridge:heartbeat] Re-queued sessionId=${sessionId} via bridge/reconnect`,
)
} catch (err) {
logger.logError(
`Failed to refresh session ${sessionId} token: ${errorMessage(err)}`,
)
logForDebugging(
`[bridge:heartbeat] reconnectSession(${sessionId}) failed: ${errorMessage(err)}`,
{ level: 'error' },
)
}
}
if (anyFatal) {
return 'fatal'
}
if (authFailedSessions.length > 0) {
return 'auth_failed'
}
return anySuccess ? 'ok' : 'failed'
}
// Sessions spawned with CCR v2 env vars. v2 children cannot use OAuth
// tokens (CCR worker endpoints validate the JWT's session_id claim,
// register_worker.go:32), so onRefresh triggers server re-dispatch
// instead — the next poll delivers fresh work with a new JWT via the
// existingHandle path below.
const v2Sessions = new Set()
// Proactive token refresh: schedules a timer 5min before the session
// ingress JWT expires. v1 delivers OAuth directly; v2 calls
// reconnectSession to trigger server re-dispatch (CC-1263: without
// this, v2 daemon sessions silently die at ~5h since the server does
// not auto-re-dispatch ACK'd work on lease expiry).
const tokenRefresh = getAccessToken
? createTokenRefreshScheduler({
getAccessToken,
onRefresh: (sessionId, oauthToken) => {
const handle = activeSessions.get(sessionId)
if (!handle) {
return
}
if (v2Sessions.has(sessionId)) {
logger.logVerbose(
`Refreshing session ${sessionId} token via bridge/reconnect`,
)
void api
.reconnectSession(environmentId, sessionId)
.catch((err: unknown) => {
logger.logError(
`Failed to refresh session ${sessionId} token: ${errorMessage(err)}`,
)
logForDebugging(
`[bridge:token] reconnectSession(${sessionId}) failed: ${errorMessage(err)}`,
{ level: 'error' },
)
})
} else {
handle.updateAccessToken(oauthToken)
}
},
label: 'bridge',
})
: null
const loopStartTime = Date.now()
// Track all in-flight cleanup promises (stopWork, worktree removal) so
// the shutdown sequence can await them before process.exit().
const pendingCleanups = new Set>()
function trackCleanup(p: Promise): void {
pendingCleanups.add(p)
void p.finally(() => pendingCleanups.delete(p))
}
let connBackoff = 0
let generalBackoff = 0
let connErrorStart: number | null = null
let generalErrorStart: number | null = null
let lastPollErrorTime: number | null = null
let statusUpdateTimer: ReturnType | null = null
// Set by BridgeFatalError and give-up paths so the shutdown block can
// skip the resume message (resume is impossible after env expiry/auth
// failure/sustained connection errors).
let fatalExit = false
logForDebugging(
`[bridge:work] Starting poll loop spawnMode=${config.spawnMode} maxSessions=${config.maxSessions} environmentId=${environmentId}`,
)
logForDiagnosticsNoPII('info', 'bridge_loop_started', {
max_sessions: config.maxSessions,
spawn_mode: config.spawnMode,
})
// For ant users, show where session debug logs will land so they can tail them.
// sessionRunner.ts uses the same base path. File appears once a session spawns.
if (process.env.USER_TYPE === 'ant') {
let debugGlob: string
if (config.debugFile) {
const ext = config.debugFile.lastIndexOf('.')
debugGlob =
ext > 0
? `${config.debugFile.slice(0, ext)}-*${config.debugFile.slice(ext)}`
: `${config.debugFile}-*`
} else {
debugGlob = join(tmpdir(), 'claude', 'bridge-session-*.log')
}
logger.setDebugLogPath(debugGlob)
}
logger.printBanner(config, environmentId)
// Seed the logger's session count + spawn mode before any render. Without
// this, setAttached() below renders with the logger's default sessionMax=1,
// showing "Capacity: 0/1" until the status ticker kicks in (which is gated
// by !initialSessionId and only starts after the poll loop picks up work).
logger.updateSessionCount(0, config.maxSessions, config.spawnMode)
// If an initial session was pre-created, show its URL from the start so
// the user can click through immediately (matching /remote-control behavior).
if (initialSessionId) {
logger.setAttached(initialSessionId)
}
/** Refresh the inline status display. Shows idle or active depending on state. */
function updateStatusDisplay(): void {
// Push the session count (no-op when maxSessions === 1) so the
// next renderStatusLine tick shows the current count.
logger.updateSessionCount(
activeSessions.size,
config.maxSessions,
config.spawnMode,
)
// Push per-session activity into the multi-session display.
for (const [sid, handle] of activeSessions) {
const act = handle.currentActivity
if (act) {
logger.updateSessionActivity(sessionCompatIds.get(sid) ?? sid, act)
}
}
if (activeSessions.size === 0) {
logger.updateIdleStatus()
return
}
// Show the most recently started session that is still actively working.
// Sessions whose current activity is 'result' or 'error' are between
// turns — the CLI emitted its result but the process stays alive waiting
// for the next user message. Skip updating so the status line keeps
// whatever state it had (Attached / session title).
const [sessionId, handle] = [...activeSessions.entries()].pop()!
const startTime = sessionStartTimes.get(sessionId)
if (!startTime) return
const activity = handle.currentActivity
if (!activity || activity.type === 'result' || activity.type === 'error') {
// Session is between turns — keep current status (Attached/titled).
// In multi-session mode, still refresh so bullet-list activities stay current.
if (config.maxSessions > 1) logger.refreshDisplay()
return
}
const elapsed = formatDuration(Date.now() - startTime)
// Build trail from recent tool activities (last 5)
const trail = handle.activities
.filter(a => a.type === 'tool_start')
.slice(-5)
.map(a => a.summary)
logger.updateSessionStatus(sessionId, elapsed, activity, trail)
}
/** Start the status display update ticker. */
function startStatusUpdates(): void {
stopStatusUpdates()
// Call immediately so the first transition (e.g. Connecting → Ready)
// happens without delay, avoiding concurrent timer races.
updateStatusDisplay()
statusUpdateTimer = setInterval(
updateStatusDisplay,
STATUS_UPDATE_INTERVAL_MS,
)
}
/** Stop the status display update ticker. */
function stopStatusUpdates(): void {
if (statusUpdateTimer) {
clearInterval(statusUpdateTimer)
statusUpdateTimer = null
}
}
function onSessionDone(
sessionId: string,
startTime: number,
handle: SessionHandle,
): (status: SessionDoneStatus) => void {
return (rawStatus: SessionDoneStatus): void => {
const workId = sessionWorkIds.get(sessionId)
activeSessions.delete(sessionId)
sessionStartTimes.delete(sessionId)
sessionWorkIds.delete(sessionId)
sessionIngressTokens.delete(sessionId)
const compatId = sessionCompatIds.get(sessionId) ?? sessionId
sessionCompatIds.delete(sessionId)
logger.removeSession(compatId)
titledSessions.delete(compatId)
v2Sessions.delete(sessionId)
// Clear per-session timeout timer
const timer = sessionTimers.get(sessionId)
if (timer) {
clearTimeout(timer)
sessionTimers.delete(sessionId)
}
// Clear token refresh timer
tokenRefresh?.cancel(sessionId)
// Wake the at-capacity sleep so the bridge can accept new work immediately
capacityWake.wake()
// If the session was killed by the timeout watchdog, treat it as a
// failed session (not a server/shutdown interrupt) so we still call
// stopWork and archiveSession below.
const wasTimedOut = timedOutSessions.delete(sessionId)
const status: SessionDoneStatus =
wasTimedOut && rawStatus === 'interrupted' ? 'failed' : rawStatus
const durationMs = Date.now() - startTime
logForDebugging(
`[bridge:session] sessionId=${sessionId} workId=${workId ?? 'unknown'} exited status=${status} duration=${formatDuration(durationMs)}`,
)
logEvent('tengu_bridge_session_done', {
status:
status as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
duration_ms: durationMs,
})
logForDiagnosticsNoPII('info', 'bridge_session_done', {
status,
duration_ms: durationMs,
})
// Clear the status display before printing final log
logger.clearStatus()
stopStatusUpdates()
// Build error message from stderr if available
const stderrSummary =
handle.lastStderr.length > 0 ? handle.lastStderr.join('\n') : undefined
let failureMessage: string | undefined
switch (status) {
case 'completed':
logger.logSessionComplete(sessionId, durationMs)
break
case 'failed':
// Skip failure log during shutdown — the child exits non-zero when
// killed, which is expected and not a real failure.
// Also skip for timeout-killed sessions — the timeout watchdog
// already logged a clear timeout message.
if (!wasTimedOut && !loopSignal.aborted) {
failureMessage = stderrSummary ?? 'Process exited with error'
logger.logSessionFailed(sessionId, failureMessage)
logError(new Error(`Bridge session failed: ${failureMessage}`))
}
break
case 'interrupted':
logger.logVerbose(`Session ${sessionId} interrupted`)
break
}
// Notify the server that this work item is done. Skip for interrupted
// sessions — interrupts are either server-initiated (the server already
// knows) or caused by bridge shutdown (which calls stopWork() separately).
if (status !== 'interrupted' && workId) {
trackCleanup(
stopWorkWithRetry(
api,
environmentId,
workId,
logger,
backoffConfig.stopWorkBaseDelayMs,
),
)
completedWorkIds.add(workId)
}
// Clean up worktree if one was created for this session
const wt = sessionWorktrees.get(sessionId)
if (wt) {
sessionWorktrees.delete(sessionId)
trackCleanup(
removeAgentWorktree(
wt.worktreePath,
wt.worktreeBranch,
wt.gitRoot,
wt.hookBased,
).catch((err: unknown) =>
logger.logVerbose(
`Failed to remove worktree ${wt.worktreePath}: ${errorMessage(err)}`,
),
),
)
}
// Lifecycle decision: in multi-session mode, keep the bridge running
// after a session completes. In single-session mode, abort the poll
// loop so the bridge exits cleanly.
if (status !== 'interrupted' && !loopSignal.aborted) {
if (config.spawnMode !== 'single-session') {
// Multi-session: archive the completed session so it doesn't linger
// as stale in the web UI. archiveSession is idempotent (409 if already
// archived), so double-archiving at shutdown is safe.
// sessionId arrived as cse_* from the work poll (infrastructure-layer
// tag). archiveSession hits /v1/sessions/{id}/archive which is the
// compat surface and validates TagSession (session_*). Re-tag — same
// UUID underneath.
trackCleanup(
api
.archiveSession(compatId)
.catch((err: unknown) =>
logger.logVerbose(
`Failed to archive session ${sessionId}: ${errorMessage(err)}`,
),
),
)
logForDebugging(
`[bridge:session] Session ${status}, returning to idle (multi-session mode)`,
)
} else {
// Single-session: coupled lifecycle — tear down environment
logForDebugging(
`[bridge:session] Session ${status}, aborting poll loop to tear down environment`,
)
controller.abort()
return
}
}
if (!loopSignal.aborted) {
startStatusUpdates()
}
}
}
// Start the idle status display immediately — unless we have a pre-created
// session, in which case setAttached() already set up the display and the
// poll loop will start status updates when it picks up the session.
if (!initialSessionId) {
startStatusUpdates()
}
while (!loopSignal.aborted) {
// Fetched once per iteration — the GrowthBook cache refreshes every
// 5 min, so a loop running at the at-capacity rate picks up config
// changes within one sleep cycle.
const pollConfig = getPollIntervalConfig()
try {
const work = await api.pollForWork(
environmentId,
environmentSecret,
loopSignal,
pollConfig.reclaim_older_than_ms,
)
// Log reconnection if we were previously disconnected
const wasDisconnected =
connErrorStart !== null || generalErrorStart !== null
if (wasDisconnected) {
const disconnectedMs =
Date.now() - (connErrorStart ?? generalErrorStart ?? Date.now())
logger.logReconnected(disconnectedMs)
logForDebugging(
`[bridge:poll] Reconnected after ${formatDuration(disconnectedMs)}`,
)
logEvent('tengu_bridge_reconnected', {
disconnected_ms: disconnectedMs,
})
}
connBackoff = 0
generalBackoff = 0
connErrorStart = null
generalErrorStart = null
lastPollErrorTime = null
// Null response = no work available in the queue.
// Add a minimum delay to avoid hammering the server.
if (!work) {
// Use live check (not a snapshot) since sessions can end during poll.
const atCap = activeSessions.size >= config.maxSessions
if (atCap) {
const atCapMs = pollConfig.multisession_poll_interval_ms_at_capacity
// Heartbeat loops WITHOUT polling. When at-capacity polling is also
// enabled (atCapMs > 0), the loop tracks a deadline and breaks out
// to poll at that interval — heartbeat and poll compose instead of
// one suppressing the other. We break out to poll when:
// - Poll deadline reached (atCapMs > 0 only)
// - Auth fails (JWT expired → poll refreshes tokens)
// - Capacity wake fires (session ended → poll for new work)
// - Loop aborted (shutdown)
if (pollConfig.non_exclusive_heartbeat_interval_ms > 0) {
logEvent('tengu_bridge_heartbeat_mode_entered', {
active_sessions: activeSessions.size,
heartbeat_interval_ms:
pollConfig.non_exclusive_heartbeat_interval_ms,
})
// Deadline computed once at entry — GB updates to atCapMs don't
// shift an in-flight deadline (next entry picks up the new value).
const pollDeadline = atCapMs > 0 ? Date.now() + atCapMs : null
let hbResult: 'ok' | 'auth_failed' | 'fatal' | 'failed' = 'ok'
let hbCycles = 0
while (
!loopSignal.aborted &&
activeSessions.size >= config.maxSessions &&
(pollDeadline === null || Date.now() < pollDeadline)
) {
// Re-read config each cycle so GrowthBook updates take effect
const hbConfig = getPollIntervalConfig()
if (hbConfig.non_exclusive_heartbeat_interval_ms <= 0) break
// Capture capacity signal BEFORE the async heartbeat call so
// a session ending during the HTTP request is caught by the
// subsequent sleep (instead of being lost to a replaced controller).
const cap = capacityWake.signal()
hbResult = await heartbeatActiveWorkItems()
if (hbResult === 'auth_failed' || hbResult === 'fatal') {
cap.cleanup()
break
}
hbCycles++
await sleep(
hbConfig.non_exclusive_heartbeat_interval_ms,
cap.signal,
)
cap.cleanup()
}
// Determine exit reason for telemetry
const exitReason =
hbResult === 'auth_failed' || hbResult === 'fatal'
? hbResult
: loopSignal.aborted
? 'shutdown'
: activeSessions.size < config.maxSessions
? 'capacity_changed'
: pollDeadline !== null && Date.now() >= pollDeadline
? 'poll_due'
: 'config_disabled'
logEvent('tengu_bridge_heartbeat_mode_exited', {
reason:
exitReason as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
heartbeat_cycles: hbCycles,
active_sessions: activeSessions.size,
})
if (exitReason === 'poll_due') {
// bridgeApi throttles empty-poll logs (EMPTY_POLL_LOG_INTERVAL=100)
// so the once-per-10min poll_due poll is invisible at counter=2.
// Log it here so verification runs see both endpoints in the debug log.
logForDebugging(
`[bridge:poll] Heartbeat poll_due after ${hbCycles} cycles — falling through to pollForWork`,
)
}
// On auth_failed or fatal, sleep before polling to avoid a tight
// poll+heartbeat loop. Auth_failed: heartbeatActiveWorkItems
// already called reconnectSession — the sleep gives the server
// time to propagate the re-queue. Fatal (404/410): may be a
// single work item GCd while the environment is still valid.
// Use atCapMs if enabled, else the heartbeat interval as a floor
// (guaranteed > 0 here) so heartbeat-only configs don't tight-loop.
if (hbResult === 'auth_failed' || hbResult === 'fatal') {
const cap = capacityWake.signal()
await sleep(
atCapMs > 0
? atCapMs
: pollConfig.non_exclusive_heartbeat_interval_ms,
cap.signal,
)
cap.cleanup()
}
} else if (atCapMs > 0) {
// Heartbeat disabled: slow poll as liveness signal.
const cap = capacityWake.signal()
await sleep(atCapMs, cap.signal)
cap.cleanup()
}
} else {
const interval =
activeSessions.size > 0
? pollConfig.multisession_poll_interval_ms_partial_capacity
: pollConfig.multisession_poll_interval_ms_not_at_capacity
await sleep(interval, loopSignal)
}
continue
}
// At capacity — we polled to keep the heartbeat alive, but cannot
// accept new work right now. We still enter the switch below so that
// token refreshes for existing sessions are processed (the case
// 'session' handler checks for existing sessions before the inner
// capacity guard).
const atCapacityBeforeSwitch = activeSessions.size >= config.maxSessions
// Skip work items that have already been completed and stopped.
// The server may re-deliver stale work before processing our stop
// request, which would otherwise cause a duplicate session spawn.
if (completedWorkIds.has(work.id)) {
logForDebugging(
`[bridge:work] Skipping already-completed workId=${work.id}`,
)
// Respect capacity throttle — without a sleep here, persistent stale
// redeliveries would tight-loop at poll-request speed (the !work
// branch above is the only sleep, and work != null skips it).
if (atCapacityBeforeSwitch) {
const cap = capacityWake.signal()
if (pollConfig.non_exclusive_heartbeat_interval_ms > 0) {
await heartbeatActiveWorkItems()
await sleep(
pollConfig.non_exclusive_heartbeat_interval_ms,
cap.signal,
)
} else if (pollConfig.multisession_poll_interval_ms_at_capacity > 0) {
await sleep(
pollConfig.multisession_poll_interval_ms_at_capacity,
cap.signal,
)
}
cap.cleanup()
} else {
await sleep(1000, loopSignal)
}
continue
}
// Decode the work secret for session spawning and to extract the JWT
// used for the ack call below.
let secret
try {
secret = decodeWorkSecret(work.secret)
} catch (err) {
const errMsg = errorMessage(err)
logger.logError(
`Failed to decode work secret for workId=${work.id}: ${errMsg}`,
)
logEvent('tengu_bridge_work_secret_failed', {})
// Can't ack (needs the JWT we failed to decode). stopWork uses OAuth,
// so it's callable here — prevents XAUTOCLAIM from re-delivering this
// poisoned item every reclaim_older_than_ms cycle.
completedWorkIds.add(work.id)
trackCleanup(
stopWorkWithRetry(
api,
environmentId,
work.id,
logger,
backoffConfig.stopWorkBaseDelayMs,
),
)
// Respect capacity throttle before retrying — without a sleep here,
// repeated decode failures at capacity would tight-loop at
// poll-request speed (work != null skips the !work sleep above).
if (atCapacityBeforeSwitch) {
const cap = capacityWake.signal()
if (pollConfig.non_exclusive_heartbeat_interval_ms > 0) {
await heartbeatActiveWorkItems()
await sleep(
pollConfig.non_exclusive_heartbeat_interval_ms,
cap.signal,
)
} else if (pollConfig.multisession_poll_interval_ms_at_capacity > 0) {
await sleep(
pollConfig.multisession_poll_interval_ms_at_capacity,
cap.signal,
)
}
cap.cleanup()
}
continue
}
// Explicitly acknowledge after committing to handle the work — NOT
// before. The at-capacity guard inside case 'session' can break
// without spawning; acking there would permanently lose the work.
// Ack failures are non-fatal: server re-delivers, and existingHandle
// / completedWorkIds paths handle the dedup.
const ackWork = async (): Promise => {
logForDebugging(`[bridge:work] Acknowledging workId=${work.id}`)
try {
await api.acknowledgeWork(
environmentId,
work.id,
secret.session_ingress_token,
)
} catch (err) {
logForDebugging(
`[bridge:work] Acknowledge failed workId=${work.id}: ${errorMessage(err)}`,
)
}
}
const workType: string = work.data.type
switch (work.data.type) {
case 'healthcheck':
await ackWork()
logForDebugging('[bridge:work] Healthcheck received')
logger.logVerbose('Healthcheck received')
break
case 'session': {
const sessionId = work.data.id
try {
validateBridgeId(sessionId, 'session_id')
} catch {
await ackWork()
logger.logError(`Invalid session_id received: ${sessionId}`)
break
}
// If the session is already running, deliver the fresh token so
// the child process can reconnect its WebSocket with the new
// session ingress token. This handles the case where the server
// re-dispatches work for an existing session after the WS drops.
const existingHandle = activeSessions.get(sessionId)
if (existingHandle) {
existingHandle.updateAccessToken(secret.session_ingress_token)
sessionIngressTokens.set(sessionId, secret.session_ingress_token)
sessionWorkIds.set(sessionId, work.id)
// Re-schedule next refresh from the fresh JWT's expiry. onRefresh
// branches on v2Sessions so both v1 and v2 are safe here.
tokenRefresh?.schedule(sessionId, secret.session_ingress_token)
logForDebugging(
`[bridge:work] Updated access token for existing sessionId=${sessionId} workId=${work.id}`,
)
await ackWork()
break
}
// At capacity — token refresh for existing sessions is handled
// above, but we cannot spawn new ones. The post-switch capacity
// sleep will throttle the loop; just break here.
if (activeSessions.size >= config.maxSessions) {
logForDebugging(
`[bridge:work] At capacity (${activeSessions.size}/${config.maxSessions}), cannot spawn new session for workId=${work.id}`,
)
break
}
await ackWork()
const spawnStartTime = Date.now()
// CCR v2 path: register this bridge as the session worker, get the
// epoch, and point the child at /v1/code/sessions/{id}. The child
// already has the full v2 client (SSETransport + CCRClient) — same
// code path environment-manager launches in containers.
//
// v1 path: Session-Ingress WebSocket. Uses config.sessionIngressUrl
// (not secret.api_base_url, which may point to a remote proxy tunnel
// that doesn't know about locally-created sessions).
let sdkUrl: string
let useCcrV2 = false
let workerEpoch: number | undefined
// Server decides per-session via the work secret; env var is the
// ant-dev override (e.g. forcing v2 before the server flag is on).
if (
secret.use_code_sessions === true ||
isEnvTruthy(process.env.CLAUDE_BRIDGE_USE_CCR_V2)
) {
sdkUrl = buildCCRv2SdkUrl(config.apiBaseUrl, sessionId)
// Retry once on transient failure (network blip, 500) before
// permanently giving up and killing the session.
for (let attempt = 1; attempt <= 2; attempt++) {
try {
workerEpoch = await registerWorker(
sdkUrl,
secret.session_ingress_token,
)
useCcrV2 = true
logForDebugging(
`[bridge:session] CCR v2: registered worker sessionId=${sessionId} epoch=${workerEpoch} attempt=${attempt}`,
)
break
} catch (err) {
const errMsg = errorMessage(err)
if (attempt < 2) {
logForDebugging(
`[bridge:session] CCR v2: registerWorker attempt ${attempt} failed, retrying: ${errMsg}`,
)
await sleep(2_000, loopSignal)
if (loopSignal.aborted) break
continue
}
logger.logError(
`CCR v2 worker registration failed for session ${sessionId}: ${errMsg}`,
)
logError(new Error(`registerWorker failed: ${errMsg}`))
completedWorkIds.add(work.id)
trackCleanup(
stopWorkWithRetry(
api,
environmentId,
work.id,
logger,
backoffConfig.stopWorkBaseDelayMs,
),
)
}
}
if (!useCcrV2) break
} else {
sdkUrl = buildSdkUrl(config.sessionIngressUrl, sessionId)
}
// In worktree mode, on-demand sessions get an isolated git worktree
// so concurrent sessions don't interfere with each other's file
// changes. The pre-created initial session (if any) runs in
// config.dir so the user's first session lands in the directory they
// invoked `rc` from — matching the old single-session UX.
// In same-dir and single-session modes, all sessions share config.dir.
// Capture spawnMode before the await below — the `w` key handler
// mutates config.spawnMode directly, and createAgentWorktree can
// take 1-2s, so reading config.spawnMode after the await can
// produce contradictory analytics (spawn_mode:'same-dir', in_worktree:true).
const spawnModeAtDecision = config.spawnMode
let sessionDir = config.dir
let worktreeCreateMs = 0
if (
spawnModeAtDecision === 'worktree' &&
(initialSessionId === undefined ||
!sameSessionId(sessionId, initialSessionId))
) {
const wtStart = Date.now()
try {
const wt = await createAgentWorktree(
`bridge-${safeFilenameId(sessionId)}`,
)
worktreeCreateMs = Date.now() - wtStart
sessionWorktrees.set(sessionId, {
worktreePath: wt.worktreePath,
worktreeBranch: wt.worktreeBranch,
gitRoot: wt.gitRoot,
hookBased: wt.hookBased,
})
sessionDir = wt.worktreePath
logForDebugging(
`[bridge:session] Created worktree for sessionId=${sessionId} at ${wt.worktreePath}`,
)
} catch (err) {
const errMsg = errorMessage(err)
logger.logError(
`Failed to create worktree for session ${sessionId}: ${errMsg}`,
)
logError(new Error(`Worktree creation failed: ${errMsg}`))
completedWorkIds.add(work.id)
trackCleanup(
stopWorkWithRetry(
api,
environmentId,
work.id,
logger,
backoffConfig.stopWorkBaseDelayMs,
),
)
break
}
}
logForDebugging(
`[bridge:session] Spawning sessionId=${sessionId} sdkUrl=${sdkUrl}`,
)
// compat-surface session_* form for logger/Sessions-API calls.
// Work poll returns cse_* under v2 compat; convert before spawn so
// the onFirstUserMessage callback can close over it.
const compatSessionId = toCompatSessionId(sessionId)
const spawnResult = safeSpawn(
spawner,
{
sessionId,
sdkUrl,
accessToken: secret.session_ingress_token,
useCcrV2,
workerEpoch,
onFirstUserMessage: text => {
// Server-set titles (--name, web rename) win. fetchSessionTitle
// runs concurrently; if it already populated titledSessions,
// skip. If it hasn't resolved yet, the derived title sticks —
// acceptable since the server had no title at spawn time.
if (titledSessions.has(compatSessionId)) return
titledSessions.add(compatSessionId)
const title = deriveSessionTitle(text)
logger.setSessionTitle(compatSessionId, title)
logForDebugging(
`[bridge:title] derived title for ${compatSessionId}: ${title}`,
)
void import('./createSession.js')
.then(({ updateBridgeSessionTitle }) =>
updateBridgeSessionTitle(compatSessionId, title, {
baseUrl: config.apiBaseUrl,
}),
)
.catch(err =>
logForDebugging(
`[bridge:title] failed to update title for ${compatSessionId}: ${err}`,
{ level: 'error' },
),
)
},
},
sessionDir,
)
if (typeof spawnResult === 'string') {
logger.logError(
`Failed to spawn session ${sessionId}: ${spawnResult}`,
)
// Clean up worktree if one was created for this session
const wt = sessionWorktrees.get(sessionId)
if (wt) {
sessionWorktrees.delete(sessionId)
trackCleanup(
removeAgentWorktree(
wt.worktreePath,
wt.worktreeBranch,
wt.gitRoot,
wt.hookBased,
).catch((err: unknown) =>
logger.logVerbose(
`Failed to remove worktree ${wt.worktreePath}: ${errorMessage(err)}`,
),
),
)
}
completedWorkIds.add(work.id)
trackCleanup(
stopWorkWithRetry(
api,
environmentId,
work.id,
logger,
backoffConfig.stopWorkBaseDelayMs,
),
)
break
}
const handle = spawnResult
const spawnDurationMs = Date.now() - spawnStartTime
logEvent('tengu_bridge_session_started', {
active_sessions: activeSessions.size,
spawn_mode:
spawnModeAtDecision as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
in_worktree: sessionWorktrees.has(sessionId),
spawn_duration_ms: spawnDurationMs,
worktree_create_ms: worktreeCreateMs,
inProtectedNamespace: isInProtectedNamespace(),
})
logForDiagnosticsNoPII('info', 'bridge_session_started', {
spawn_mode: spawnModeAtDecision,
in_worktree: sessionWorktrees.has(sessionId),
spawn_duration_ms: spawnDurationMs,
worktree_create_ms: worktreeCreateMs,
})
activeSessions.set(sessionId, handle)
sessionWorkIds.set(sessionId, work.id)
sessionIngressTokens.set(sessionId, secret.session_ingress_token)
sessionCompatIds.set(sessionId, compatSessionId)
const startTime = Date.now()
sessionStartTimes.set(sessionId, startTime)
// Use a generic prompt description since we no longer get startup_context
logger.logSessionStart(sessionId, `Session ${sessionId}`)
// Compute the actual debug file path (mirrors sessionRunner.ts logic)
const safeId = safeFilenameId(sessionId)
let sessionDebugFile: string | undefined
if (config.debugFile) {
const ext = config.debugFile.lastIndexOf('.')
if (ext > 0) {
sessionDebugFile = `${config.debugFile.slice(0, ext)}-${safeId}${config.debugFile.slice(ext)}`
} else {
sessionDebugFile = `${config.debugFile}-${safeId}`
}
} else if (config.verbose || process.env.USER_TYPE === 'ant') {
sessionDebugFile = join(
tmpdir(),
'claude',
`bridge-session-${safeId}.log`,
)
}
if (sessionDebugFile) {
logger.logVerbose(`Debug log: ${sessionDebugFile}`)
}
// Register in the sessions Map before starting status updates so the
// first render tick shows the correct count and bullet list in sync.
logger.addSession(
compatSessionId,
getRemoteSessionUrl(compatSessionId, config.sessionIngressUrl),
)
// Start live status updates and transition to "Attached" state.
startStatusUpdates()
logger.setAttached(compatSessionId)
// One-shot title fetch. If the session already has a title (set via
// --name, web rename, or /remote-control), display it and mark as
// titled so the first-user-message fallback doesn't overwrite it.
// Otherwise onFirstUserMessage derives one from the first prompt.
void fetchSessionTitle(compatSessionId, config.apiBaseUrl)
.then(title => {
if (title && activeSessions.has(sessionId)) {
titledSessions.add(compatSessionId)
logger.setSessionTitle(compatSessionId, title)
logForDebugging(
`[bridge:title] server title for ${compatSessionId}: ${title}`,
)
}
})
.catch(err =>
logForDebugging(
`[bridge:title] failed to fetch title for ${compatSessionId}: ${err}`,
{ level: 'error' },
),
)
// Start per-session timeout watchdog
const timeoutMs =
config.sessionTimeoutMs ?? DEFAULT_SESSION_TIMEOUT_MS
if (timeoutMs > 0) {
const timer = setTimeout(
onSessionTimeout,
timeoutMs,
sessionId,
timeoutMs,
logger,
timedOutSessions,
handle,
)
sessionTimers.set(sessionId, timer)
}
// Schedule proactive token refresh before the JWT expires.
// onRefresh branches on v2Sessions: v1 delivers OAuth to the
// child, v2 triggers server re-dispatch via reconnectSession.
if (useCcrV2) {
v2Sessions.add(sessionId)
}
tokenRefresh?.schedule(sessionId, secret.session_ingress_token)
void handle.done.then(onSessionDone(sessionId, startTime, handle))
break
}
default:
await ackWork()
// Gracefully ignore unknown work types. The backend may send new
// types before the bridge client is updated.
logForDebugging(
`[bridge:work] Unknown work type: ${workType}, skipping`,
)
break
}
// When at capacity, throttle the loop. The switch above still runs so
// existing-session token refreshes are processed, but we sleep here
// to avoid busy-looping. Include the capacity wake signal so the
// sleep is interrupted immediately when a session completes.
if (atCapacityBeforeSwitch) {
const cap = capacityWake.signal()
if (pollConfig.non_exclusive_heartbeat_interval_ms > 0) {
await heartbeatActiveWorkItems()
await sleep(
pollConfig.non_exclusive_heartbeat_interval_ms,
cap.signal,
)
} else if (pollConfig.multisession_poll_interval_ms_at_capacity > 0) {
await sleep(
pollConfig.multisession_poll_interval_ms_at_capacity,
cap.signal,
)
}
cap.cleanup()
}
} catch (err) {
if (loopSignal.aborted) {
break
}
// Fatal errors (401/403) — no point retrying, auth won't fix itself
if (err instanceof BridgeFatalError) {
fatalExit = true
// Server-enforced expiry gets a clean status message, not an error
if (isExpiredErrorType(err.errorType)) {
logger.logStatus(err.message)
} else if (isSuppressible403(err)) {
// Cosmetic 403 errors (e.g., external_poll_sessions scope,
// environments:manage permission) — don't show to user
logForDebugging(`[bridge:work] Suppressed 403 error: ${err.message}`)
} else {
logger.logError(err.message)
logError(err)
}
logEvent('tengu_bridge_fatal_error', {
status: err.status,
error_type:
err.errorType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
logForDiagnosticsNoPII(
isExpiredErrorType(err.errorType) ? 'info' : 'error',
'bridge_fatal_error',
{ status: err.status, error_type: err.errorType },
)
break
}
const errMsg = describeAxiosError(err)
if (isConnectionError(err) || isServerError(err)) {
const now = Date.now()
// Detect system sleep/wake: if the gap since the last poll error
// greatly exceeds the expected backoff, the machine likely slept.
// Reset error tracking so the bridge retries with a fresh budget.
if (
lastPollErrorTime !== null &&
now - lastPollErrorTime > pollSleepDetectionThresholdMs(backoffConfig)
) {
logForDebugging(
`[bridge:work] Detected system sleep (${Math.round((now - lastPollErrorTime) / 1000)}s gap), resetting error budget`,
)
logForDiagnosticsNoPII('info', 'bridge_poll_sleep_detected', {
gapMs: now - lastPollErrorTime,
})
connErrorStart = null
connBackoff = 0
generalErrorStart = null
generalBackoff = 0
}
lastPollErrorTime = now
if (!connErrorStart) {
connErrorStart = now
}
const elapsed = now - connErrorStart
if (elapsed >= backoffConfig.connGiveUpMs) {
logger.logError(
`Server unreachable for ${Math.round(elapsed / 60_000)} minutes, giving up.`,
)
logEvent('tengu_bridge_poll_give_up', {
error_type:
'connection' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
elapsed_ms: elapsed,
})
logForDiagnosticsNoPII('error', 'bridge_poll_give_up', {
error_type: 'connection',
elapsed_ms: elapsed,
})
fatalExit = true
break
}
// Reset the other track when switching error types
generalErrorStart = null
generalBackoff = 0
connBackoff = connBackoff
? Math.min(connBackoff * 2, backoffConfig.connCapMs)
: backoffConfig.connInitialMs
const delay = addJitter(connBackoff)
logger.logVerbose(
`Connection error, retrying in ${formatDelay(delay)} (${Math.round(elapsed / 1000)}s elapsed): ${errMsg}`,
)
logger.updateReconnectingStatus(
formatDelay(delay),
formatDuration(elapsed),
)
// The poll_due heartbeat-loop exit leaves a healthy lease exposed to
// this backoff path. Heartbeat before each sleep so /poll outages
// (the VerifyEnvironmentSecretAuth DB path heartbeat was introduced
// to avoid) don't kill the 300s lease TTL. No-op when activeSessions
// is empty or heartbeat is disabled.
if (getPollIntervalConfig().non_exclusive_heartbeat_interval_ms > 0) {
await heartbeatActiveWorkItems()
}
await sleep(delay, loopSignal)
} else {
const now = Date.now()
// Sleep detection for general errors (same logic as connection errors)
if (
lastPollErrorTime !== null &&
now - lastPollErrorTime > pollSleepDetectionThresholdMs(backoffConfig)
) {
logForDebugging(
`[bridge:work] Detected system sleep (${Math.round((now - lastPollErrorTime) / 1000)}s gap), resetting error budget`,
)
logForDiagnosticsNoPII('info', 'bridge_poll_sleep_detected', {
gapMs: now - lastPollErrorTime,
})
connErrorStart = null
connBackoff = 0
generalErrorStart = null
generalBackoff = 0
}
lastPollErrorTime = now
if (!generalErrorStart) {
generalErrorStart = now
}
const elapsed = now - generalErrorStart
if (elapsed >= backoffConfig.generalGiveUpMs) {
logger.logError(
`Persistent errors for ${Math.round(elapsed / 60_000)} minutes, giving up.`,
)
logEvent('tengu_bridge_poll_give_up', {
error_type:
'general' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
elapsed_ms: elapsed,
})
logForDiagnosticsNoPII('error', 'bridge_poll_give_up', {
error_type: 'general',
elapsed_ms: elapsed,
})
fatalExit = true
break
}
// Reset the other track when switching error types
connErrorStart = null
connBackoff = 0
generalBackoff = generalBackoff
? Math.min(generalBackoff * 2, backoffConfig.generalCapMs)
: backoffConfig.generalInitialMs
const delay = addJitter(generalBackoff)
logger.logVerbose(
`Poll failed, retrying in ${formatDelay(delay)} (${Math.round(elapsed / 1000)}s elapsed): ${errMsg}`,
)
logger.updateReconnectingStatus(
formatDelay(delay),
formatDuration(elapsed),
)
if (getPollIntervalConfig().non_exclusive_heartbeat_interval_ms > 0) {
await heartbeatActiveWorkItems()
}
await sleep(delay, loopSignal)
}
}
}
// Clean up
stopStatusUpdates()
logger.clearStatus()
const loopDurationMs = Date.now() - loopStartTime
logEvent('tengu_bridge_shutdown', {
active_sessions: activeSessions.size,
loop_duration_ms: loopDurationMs,
})
logForDiagnosticsNoPII('info', 'bridge_shutdown', {
active_sessions: activeSessions.size,
loop_duration_ms: loopDurationMs,
})
// Graceful shutdown: kill active sessions, report them as interrupted,
// archive sessions, then deregister the environment so the web UI shows
// the bridge as offline.
// Collect all session IDs to archive on exit. This includes:
// 1. Active sessions (snapshot before killing — onSessionDone clears maps)
// 2. The initial auto-created session (may never have had work dispatched)
// api.archiveSession is idempotent (409 if already archived), so
// double-archiving is safe.
const sessionsToArchive = new Set(activeSessions.keys())
if (initialSessionId) {
sessionsToArchive.add(initialSessionId)
}
// Snapshot before killing — onSessionDone clears sessionCompatIds.
const compatIdSnapshot = new Map(sessionCompatIds)
if (activeSessions.size > 0) {
logForDebugging(
`[bridge:shutdown] Shutting down ${activeSessions.size} active session(s)`,
)
logger.logStatus(
`Shutting down ${activeSessions.size} active session(s)\u2026`,
)
// Snapshot work IDs before killing — onSessionDone clears the maps when
// each child exits, so we need a copy for the stopWork calls below.
const shutdownWorkIds = new Map(sessionWorkIds)
for (const [sessionId, handle] of activeSessions.entries()) {
logForDebugging(
`[bridge:shutdown] Sending SIGTERM to sessionId=${sessionId}`,
)
handle.kill()
}
const timeout = new AbortController()
await Promise.race([
Promise.allSettled([...activeSessions.values()].map(h => h.done)),
sleep(backoffConfig.shutdownGraceMs ?? 30_000, timeout.signal),
])
timeout.abort()
// SIGKILL any processes that didn't respond to SIGTERM within the grace window
for (const [sid, handle] of activeSessions.entries()) {
logForDebugging(`[bridge:shutdown] Force-killing stuck sessionId=${sid}`)
handle.forceKill()
}
// Clear any remaining session timeout and refresh timers
for (const timer of sessionTimers.values()) {
clearTimeout(timer)
}
sessionTimers.clear()
tokenRefresh?.cancelAll()
// Clean up any remaining worktrees from active sessions.
// Snapshot and clear the map first so onSessionDone (which may fire
// during the await below when handle.done resolves) won't try to
// remove the same worktrees again.
if (sessionWorktrees.size > 0) {
const remainingWorktrees = [...sessionWorktrees.values()]
sessionWorktrees.clear()
logForDebugging(
`[bridge:shutdown] Cleaning up ${remainingWorktrees.length} worktree(s)`,
)
await Promise.allSettled(
remainingWorktrees.map(wt =>
removeAgentWorktree(
wt.worktreePath,
wt.worktreeBranch,
wt.gitRoot,
wt.hookBased,
),
),
)
}
// Stop all active work items so the server knows they're done
await Promise.allSettled(
[...shutdownWorkIds.entries()].map(([sessionId, workId]) => {
return api
.stopWork(environmentId, workId, true)
.catch(err =>
logger.logVerbose(
`Failed to stop work ${workId} for session ${sessionId}: ${errorMessage(err)}`,
),
)
}),
)
}
// Ensure all in-flight cleanup (stopWork, worktree removal) from
// onSessionDone completes before deregistering — otherwise
// process.exit() can kill them mid-flight.
if (pendingCleanups.size > 0) {
await Promise.allSettled([...pendingCleanups])
}
// In single-session mode with a known session, leave the session and
// environment alive so `claude remote-control --session-id=` can resume.
// The backend GCs stale environments via a 4h TTL (BRIDGE_LAST_POLL_TTL).
// Archiving the session or deregistering the environment would make the
// printed resume command a lie — deregister deletes Firestore + Redis stream.
// Skip when the loop exited fatally (env expired, auth failed, give-up) —
// resume is impossible in those cases and the message would contradict the
// error already printed.
// feature('KAIROS') gate: --session-id is ant-only; without the gate,
// revert to the pre-PR behavior (archive + deregister on every shutdown).
if (
feature('KAIROS') &&
config.spawnMode === 'single-session' &&
initialSessionId &&
!fatalExit
) {
logger.logStatus(
`Resume this session by running \`claude remote-control --continue\``,
)
logForDebugging(
`[bridge:shutdown] Skipping archive+deregister to allow resume of session ${initialSessionId}`,
)
return
}
// Archive all known sessions so they don't linger as idle/running on the
// server after the bridge goes offline.
if (sessionsToArchive.size > 0) {
logForDebugging(
`[bridge:shutdown] Archiving ${sessionsToArchive.size} session(s)`,
)
await Promise.allSettled(
[...sessionsToArchive].map(sessionId =>
api
.archiveSession(
compatIdSnapshot.get(sessionId) ?? toCompatSessionId(sessionId),
)
.catch(err =>
logger.logVerbose(
`Failed to archive session ${sessionId}: ${errorMessage(err)}`,
),
),
),
)
}
// Deregister the environment so the web UI shows the bridge as offline
// and the Redis stream is cleaned up.
try {
await api.deregisterEnvironment(environmentId)
logForDebugging(
`[bridge:shutdown] Environment deregistered, bridge offline`,
)
logger.logVerbose('Environment deregistered.')
} catch (err) {
logger.logVerbose(`Failed to deregister environment: ${errorMessage(err)}`)
}
// Clear the crash-recovery pointer — the env is gone, pointer would be
// stale. The early return above (resumable SIGINT shutdown) skips this,
// leaving the pointer as a backup for the printed --session-id hint.
const { clearBridgePointer } = await import('./bridgePointer.js')
await clearBridgePointer(config.dir)
logger.logVerbose('Environment offline.')
}
const CONNECTION_ERROR_CODES = new Set([
'ECONNREFUSED',
'ECONNRESET',
'ETIMEDOUT',
'ENETUNREACH',
'EHOSTUNREACH',
])
export function isConnectionError(err: unknown): boolean {
if (
err &&
typeof err === 'object' &&
'code' in err &&
typeof err.code === 'string' &&
CONNECTION_ERROR_CODES.has(err.code)
) {
return true
}
return false
}
/** Detect HTTP 5xx errors from axios (code: 'ERR_BAD_RESPONSE'). */
export function isServerError(err: unknown): boolean {
return (
!!err &&
typeof err === 'object' &&
'code' in err &&
typeof err.code === 'string' &&
err.code === 'ERR_BAD_RESPONSE'
)
}
/** Add ±25% jitter to a delay value. */
function addJitter(ms: number): number {
return Math.max(0, ms + ms * 0.25 * (2 * Math.random() - 1))
}
function formatDelay(ms: number): string {
return ms >= 1000 ? `${(ms / 1000).toFixed(1)}s` : `${Math.round(ms)}ms`
}
/**
* Retry stopWork with exponential backoff (3 attempts, 1s/2s/4s).
* Ensures the server learns the work item ended, preventing server-side zombies.
*/
async function stopWorkWithRetry(
api: BridgeApiClient,
environmentId: string,
workId: string,
logger: BridgeLogger,
baseDelayMs = 1000,
): Promise {
const MAX_ATTEMPTS = 3
for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++) {
try {
await api.stopWork(environmentId, workId, false)
logForDebugging(
`[bridge:work] stopWork succeeded for workId=${workId} on attempt ${attempt}/${MAX_ATTEMPTS}`,
)
return
} catch (err) {
// Auth/permission errors won't be fixed by retrying
if (err instanceof BridgeFatalError) {
if (isSuppressible403(err)) {
logForDebugging(
`[bridge:work] Suppressed stopWork 403 for ${workId}: ${err.message}`,
)
} else {
logger.logError(`Failed to stop work ${workId}: ${err.message}`)
}
logForDiagnosticsNoPII('error', 'bridge_stop_work_failed', {
attempts: attempt,
fatal: true,
})
return
}
const errMsg = errorMessage(err)
if (attempt < MAX_ATTEMPTS) {
const delay = addJitter(baseDelayMs * Math.pow(2, attempt - 1))
logger.logVerbose(
`Failed to stop work ${workId} (attempt ${attempt}/${MAX_ATTEMPTS}), retrying in ${formatDelay(delay)}: ${errMsg}`,
)
await sleep(delay)
} else {
logger.logError(
`Failed to stop work ${workId} after ${MAX_ATTEMPTS} attempts: ${errMsg}`,
)
logForDiagnosticsNoPII('error', 'bridge_stop_work_failed', {
attempts: MAX_ATTEMPTS,
})
}
}
}
}
function onSessionTimeout(
sessionId: string,
timeoutMs: number,
logger: BridgeLogger,
timedOutSessions: Set,
handle: SessionHandle,
): void {
logForDebugging(
`[bridge:session] sessionId=${sessionId} timed out after ${formatDuration(timeoutMs)}`,
)
logEvent('tengu_bridge_session_timeout', {
timeout_ms: timeoutMs,
})
logger.logSessionFailed(
sessionId,
`Session timed out after ${formatDuration(timeoutMs)}`,
)
timedOutSessions.add(sessionId)
handle.kill()
}
export type ParsedArgs = {
verbose: boolean
sandbox: boolean
debugFile?: string
sessionTimeoutMs?: number
permissionMode?: string
name?: string
/** Value passed to --spawn (if any); undefined if no --spawn flag was given. */
spawnMode: SpawnMode | undefined
/** Value passed to --capacity (if any); undefined if no --capacity flag was given. */
capacity: number | undefined
/** --[no-]create-session-in-dir override; undefined = use default (on). */
createSessionInDir: boolean | undefined
/** Resume an existing session instead of creating a new one. */
sessionId?: string
/** Resume the last session in this directory (reads bridge-pointer.json). */
continueSession: boolean
help: boolean
error?: string
}
const SPAWN_FLAG_VALUES = ['session', 'same-dir', 'worktree'] as const
function parseSpawnValue(raw: string | undefined): SpawnMode | string {
if (raw === 'session') return 'single-session'
if (raw === 'same-dir') return 'same-dir'
if (raw === 'worktree') return 'worktree'
return `--spawn requires one of: ${SPAWN_FLAG_VALUES.join(', ')} (got: ${raw ?? ''})`
}
function parseCapacityValue(raw: string | undefined): number | string {
const n = raw === undefined ? NaN : parseInt(raw, 10)
if (isNaN(n) || n < 1) {
return `--capacity requires a positive integer (got: ${raw ?? ''})`
}
return n
}
export function parseArgs(args: string[]): ParsedArgs {
let verbose = false
let sandbox = false
let debugFile: string | undefined
let sessionTimeoutMs: number | undefined
let permissionMode: string | undefined
let name: string | undefined
let help = false
let spawnMode: SpawnMode | undefined
let capacity: number | undefined
let createSessionInDir: boolean | undefined
let sessionId: string | undefined
let continueSession = false
for (let i = 0; i < args.length; i++) {
const arg = args[i]!
if (arg === '--help' || arg === '-h') {
help = true
} else if (arg === '--verbose' || arg === '-v') {
verbose = true
} else if (arg === '--sandbox') {
sandbox = true
} else if (arg === '--no-sandbox') {
sandbox = false
} else if (arg === '--debug-file' && i + 1 < args.length) {
debugFile = resolve(args[++i]!)
} else if (arg.startsWith('--debug-file=')) {
debugFile = resolve(arg.slice('--debug-file='.length))
} else if (arg === '--session-timeout' && i + 1 < args.length) {
sessionTimeoutMs = parseInt(args[++i]!, 10) * 1000
} else if (arg.startsWith('--session-timeout=')) {
sessionTimeoutMs =
parseInt(arg.slice('--session-timeout='.length), 10) * 1000
} else if (arg === '--permission-mode' && i + 1 < args.length) {
permissionMode = args[++i]!
} else if (arg.startsWith('--permission-mode=')) {
permissionMode = arg.slice('--permission-mode='.length)
} else if (arg === '--name' && i + 1 < args.length) {
name = args[++i]!
} else if (arg.startsWith('--name=')) {
name = arg.slice('--name='.length)
} else if (
feature('KAIROS') &&
arg === '--session-id' &&
i + 1 < args.length
) {
sessionId = args[++i]!
if (!sessionId) {
return makeError('--session-id requires a value')
}
} else if (feature('KAIROS') && arg.startsWith('--session-id=')) {
sessionId = arg.slice('--session-id='.length)
if (!sessionId) {
return makeError('--session-id requires a value')
}
} else if (feature('KAIROS') && (arg === '--continue' || arg === '-c')) {
continueSession = true
} else if (arg === '--spawn' || arg.startsWith('--spawn=')) {
if (spawnMode !== undefined) {
return makeError('--spawn may only be specified once')
}
const raw = arg.startsWith('--spawn=')
? arg.slice('--spawn='.length)
: args[++i]
const v = parseSpawnValue(raw)
if (v === 'single-session' || v === 'same-dir' || v === 'worktree') {
spawnMode = v
} else {
return makeError(v)
}
} else if (arg === '--capacity' || arg.startsWith('--capacity=')) {
if (capacity !== undefined) {
return makeError('--capacity may only be specified once')
}
const raw = arg.startsWith('--capacity=')
? arg.slice('--capacity='.length)
: args[++i]
const v = parseCapacityValue(raw)
if (typeof v === 'number') capacity = v
else return makeError(v)
} else if (arg === '--create-session-in-dir') {
createSessionInDir = true
} else if (arg === '--no-create-session-in-dir') {
createSessionInDir = false
} else {
return makeError(
`Unknown argument: ${arg}\nRun 'claude remote-control --help' for usage.`,
)
}
}
// Note: gate check for --spawn/--capacity/--create-session-in-dir is in bridgeMain
// (gate-aware error). Flag cross-validation happens here.
// --capacity only makes sense for multi-session modes.
if (spawnMode === 'single-session' && capacity !== undefined) {
return makeError(
`--capacity cannot be used with --spawn=session (single-session mode has fixed capacity 1).`,
)
}
// --session-id / --continue resume a specific session on its original
// environment; incompatible with spawn-related flags (which configure
// fresh session creation), and mutually exclusive with each other.
if (
(sessionId || continueSession) &&
(spawnMode !== undefined ||
capacity !== undefined ||
createSessionInDir !== undefined)
) {
return makeError(
`--session-id and --continue cannot be used with --spawn, --capacity, or --create-session-in-dir.`,
)
}
if (sessionId && continueSession) {
return makeError(`--session-id and --continue cannot be used together.`)
}
return {
verbose,
sandbox,
debugFile,
sessionTimeoutMs,
permissionMode,
name,
spawnMode,
capacity,
createSessionInDir,
sessionId,
continueSession,
help,
}
function makeError(error: string): ParsedArgs {
return {
verbose,
sandbox,
debugFile,
sessionTimeoutMs,
permissionMode,
name,
spawnMode,
capacity,
createSessionInDir,
sessionId,
continueSession,
help,
error,
}
}
}
async function printHelp(): Promise {
// Use EXTERNAL_PERMISSION_MODES for help text — internal modes (bubble)
// are ant-only and auto is feature-gated; they're still accepted by validation.
const { EXTERNAL_PERMISSION_MODES } = await import('../types/permissions.js')
const modes = EXTERNAL_PERMISSION_MODES.join(', ')
const showServer = await isMultiSessionSpawnEnabled()
const serverOptions = showServer
? ` --spawn Spawn mode: same-dir, worktree, session
(default: same-dir)
--capacity Max concurrent sessions in worktree or
same-dir mode (default: ${SPAWN_SESSIONS_DEFAULT})
--[no-]create-session-in-dir Pre-create a session in the current
directory; in worktree mode this session
stays in cwd while on-demand sessions get
isolated worktrees (default: on)
`
: ''
const serverDescription = showServer
? `
Remote Control runs as a persistent server that accepts multiple concurrent
sessions in the current directory. One session is pre-created on start so
you have somewhere to type immediately. Use --spawn=worktree to isolate
each on-demand session in its own git worktree, or --spawn=session for
the classic single-session mode (exits when that session ends). Press 'w'
during runtime to toggle between same-dir and worktree.
`
: ''
const serverNote = showServer
? ` - Worktree mode requires a git repository or WorktreeCreate/WorktreeRemove hooks
`
: ''
const help = `
Remote Control - Connect your local environment to claude.ai/code
USAGE
claude remote-control [options]
OPTIONS
--name Name for the session (shown in claude.ai/code)
${
feature('KAIROS')
? ` -c, --continue Resume the last session in this directory
--session-id Resume a specific session by ID (cannot be
used with spawn flags or --continue)
`
: ''
} --permission-mode Permission mode for spawned sessions
(${modes})
--debug-file Write debug logs to file
-v, --verbose Enable verbose output
-h, --help Show this help
${serverOptions}
DESCRIPTION
Remote Control allows you to control sessions on your local device from
claude.ai/code (https://claude.ai/code). Run this command in the
directory you want to work in, then connect from the Claude app or web.
${serverDescription}
NOTES
- You must be logged in with a Claude account that has a subscription
- Run \`claude\` first in the directory to accept the workspace trust dialog
${serverNote}`
// biome-ignore lint/suspicious/noConsole: intentional help output
console.log(help)
}
const TITLE_MAX_LEN = 80
/** Derive a session title from a user message: first line, truncated. */
function deriveSessionTitle(text: string): string {
// Collapse whitespace — newlines/tabs would break the single-line status display.
const flat = text.replace(/\s+/g, ' ').trim()
return truncateToWidth(flat, TITLE_MAX_LEN)
}
/**
* One-shot fetch of a session's title via GET /v1/sessions/{id}.
*
* Uses `getBridgeSession` from createSession.ts (ccr-byoc headers + org UUID)
* rather than the environments-level bridgeApi client, whose headers make the
* Sessions API return 404. Returns undefined if the session has no title yet
* or the fetch fails — the caller falls back to deriving a title from the
* first user message.
*/
async function fetchSessionTitle(
compatSessionId: string,
baseUrl: string,
): Promise {
const { getBridgeSession } = await import('./createSession.js')
const session = await getBridgeSession(compatSessionId, { baseUrl })
return session?.title || undefined
}
export async function bridgeMain(args: string[]): Promise {
const parsed = parseArgs(args)
if (parsed.help) {
await printHelp()
return
}
if (parsed.error) {
// biome-ignore lint/suspicious/noConsole: intentional error output
console.error(`Error: ${parsed.error}`)
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(1)
}
const {
verbose,
sandbox,
debugFile,
sessionTimeoutMs,
permissionMode,
name,
spawnMode: parsedSpawnMode,
capacity: parsedCapacity,
createSessionInDir: parsedCreateSessionInDir,
sessionId: parsedSessionId,
continueSession,
} = parsed
// Mutable so --continue can set it from the pointer file. The #20460
// resume flow below then treats it the same as an explicit --session-id.
let resumeSessionId = parsedSessionId
// When --continue found a pointer, this is the directory it came from
// (may be a worktree sibling, not `dir`). On resume-flow deterministic
// failure, clear THIS file so --continue doesn't keep hitting the same
// dead session. Undefined for explicit --session-id (leaves pointer alone).
let resumePointerDir: string | undefined
const usedMultiSessionFeature =
parsedSpawnMode !== undefined ||
parsedCapacity !== undefined ||
parsedCreateSessionInDir !== undefined
// Validate permission mode early so the user gets an error before
// the bridge starts polling for work.
if (permissionMode !== undefined) {
const { PERMISSION_MODES } = await import('../types/permissions.js')
const valid: readonly string[] = PERMISSION_MODES
if (!valid.includes(permissionMode)) {
// biome-ignore lint/suspicious/noConsole: intentional error output
console.error(
`Error: Invalid permission mode '${permissionMode}'. Valid modes: ${valid.join(', ')}`,
)
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(1)
}
}
const dir = resolve('.')
// The bridge fast-path bypasses init.ts, so we must enable config reading
// before any code that transitively calls getGlobalConfig()
const { enableConfigs, checkHasTrustDialogAccepted } = await import(
'../utils/config.js'
)
enableConfigs()
// Initialize analytics and error reporting sinks. The bridge bypasses the
// setup() init flow, so we call initSinks() directly to attach sinks here.
const { initSinks } = await import('../utils/sinks.js')
initSinks()
// Gate-aware validation: --spawn / --capacity / --create-session-in-dir require
// the multi-session gate. parseArgs has already validated flag combinations;
// here we only check the gate since that requires an async GrowthBook call.
// Runs after enableConfigs() (GrowthBook cache reads global config) and after
// initSinks() so the denial event can be enqueued.
const multiSessionEnabled = await isMultiSessionSpawnEnabled()
if (usedMultiSessionFeature && !multiSessionEnabled) {
await logEventAsync('tengu_bridge_multi_session_denied', {
used_spawn: parsedSpawnMode !== undefined,
used_capacity: parsedCapacity !== undefined,
used_create_session_in_dir: parsedCreateSessionInDir !== undefined,
})
// logEventAsync only enqueues — process.exit() discards buffered events.
// Flush explicitly, capped at 500ms to match gracefulShutdown.ts.
// (sleep() doesn't unref its timer, but process.exit() follows immediately
// so the ref'd timer can't delay shutdown.)
await Promise.race([
Promise.all([shutdown1PEventLogging(), shutdownDatadog()]),
sleep(500, undefined, { unref: true }),
]).catch(() => {})
// biome-ignore lint/suspicious/noConsole: intentional error output
console.error(
'Error: Multi-session Remote Control is not enabled for your account yet.',
)
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(1)
}
// Set the bootstrap CWD so that trust checks, project config lookups, and
// git utilities (getBranch, getRemoteUrl) resolve against the correct path.
const { setOriginalCwd, setCwdState } = await import('../bootstrap/state.js')
setOriginalCwd(dir)
setCwdState(dir)
// The bridge bypasses main.tsx (which renders the interactive TrustDialog via showSetupScreens),
// so we must verify trust was previously established by a normal `claude` session.
if (!checkHasTrustDialogAccepted()) {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.error(
`Error: Workspace not trusted. Please run \`claude\` in ${dir} first to review and accept the workspace trust dialog.`,
)
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(1)
}
// Resolve auth
const { clearOAuthTokenCache, checkAndRefreshOAuthTokenIfNeeded } =
await import('../utils/auth.js')
const { getBridgeAccessToken, getBridgeBaseUrl } = await import(
'./bridgeConfig.js'
)
const bridgeToken = getBridgeAccessToken()
if (!bridgeToken) {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.error(BRIDGE_LOGIN_ERROR)
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(1)
}
// First-time remote dialog — explain what bridge does and get consent
const {
getGlobalConfig,
saveGlobalConfig,
getCurrentProjectConfig,
saveCurrentProjectConfig,
} = await import('../utils/config.js')
if (!getGlobalConfig().remoteDialogSeen) {
const readline = await import('readline')
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
})
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(
'\nRemote Control lets you access this CLI session from the web (claude.ai/code)\nor the Claude app, so you can pick up where you left off on any device.\n\nYou can disconnect remote access anytime by running /remote-control again.\n',
)
const answer = await new Promise(resolve => {
rl.question('Enable Remote Control? (y/n) ', resolve)
})
rl.close()
saveGlobalConfig(current => {
if (current.remoteDialogSeen) return current
return { ...current, remoteDialogSeen: true }
})
if (answer.toLowerCase() !== 'y' && answer.toLowerCase() !== 'yes') {
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(0)
}
}
// --continue: resolve the most recent session from the crash-recovery
// pointer and chain into the #20460 --session-id flow. Worktree-aware:
// checks current dir first (fast path, zero exec), then fans out to git
// worktree siblings if that misses — the REPL bridge writes to
// getOriginalCwd() which EnterWorktreeTool/activeWorktreeSession can
// point at a worktree while the user's shell is at the repo root.
// KAIROS-gated at parseArgs — continueSession is always false in external
// builds, so this block tree-shakes.
if (feature('KAIROS') && continueSession) {
const { readBridgePointerAcrossWorktrees } = await import(
'./bridgePointer.js'
)
const found = await readBridgePointerAcrossWorktrees(dir)
if (!found) {
// biome-ignore lint/suspicious/noConsole: intentional error output
console.error(
`Error: No recent session found in this directory or its worktrees. Run \`claude remote-control\` to start a new one.`,
)
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(1)
}
const { pointer, dir: pointerDir } = found
const ageMin = Math.round(pointer.ageMs / 60_000)
const ageStr = ageMin < 60 ? `${ageMin}m` : `${Math.round(ageMin / 60)}h`
const fromWt = pointerDir !== dir ? ` from worktree ${pointerDir}` : ''
// biome-ignore lint/suspicious/noConsole: intentional info output
console.error(
`Resuming session ${pointer.sessionId} (${ageStr} ago)${fromWt}\u2026`,
)
resumeSessionId = pointer.sessionId
// Track where the pointer came from so the #20460 exit(1) paths below
// clear the RIGHT file on deterministic failure — otherwise --continue
// would keep hitting the same dead session. May be a worktree sibling.
resumePointerDir = pointerDir
}
// In production, baseUrl is the Anthropic API (from OAuth config).
// CLAUDE_BRIDGE_BASE_URL overrides this for ant local dev only.
const baseUrl = getBridgeBaseUrl()
// For non-localhost targets, require HTTPS to protect credentials.
if (
baseUrl.startsWith('http://') &&
!baseUrl.includes('localhost') &&
!baseUrl.includes('127.0.0.1')
) {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.error(
'Error: Remote Control base URL uses HTTP. Only HTTPS or localhost HTTP is allowed.',
)
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(1)
}
// Session ingress URL for WebSocket connections. In production this is the
// same as baseUrl (Envoy routes /v1/session_ingress/* to session-ingress).
// Locally, session-ingress runs on a different port (9413) than the
// contain-provide-api (8211), so CLAUDE_BRIDGE_SESSION_INGRESS_URL must be
// set explicitly. Ant-only, matching CLAUDE_BRIDGE_BASE_URL.
const sessionIngressUrl =
process.env.USER_TYPE === 'ant' &&
process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
? process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
: baseUrl
const { getBranch, getRemoteUrl, findGitRoot } = await import(
'../utils/git.js'
)
// Precheck worktree availability for the first-run dialog and the `w`
// toggle. Unconditional so we know upfront whether worktree is an option.
const { hasWorktreeCreateHook } = await import('../utils/hooks.js')
const worktreeAvailable = hasWorktreeCreateHook() || findGitRoot(dir) !== null
// Load saved per-project spawn-mode preference. Gated by multiSessionEnabled
// so a GrowthBook rollback cleanly reverts users to single-session —
// otherwise a saved pref would silently re-enable multi-session behavior
// (worktree isolation, 32 max sessions, w toggle) despite the gate being off.
// Also guard against a stale worktree pref left over from when this dir WAS
// a git repo (or the user copied config) — clear it on disk so the warning
// doesn't repeat on every launch.
let savedSpawnMode = multiSessionEnabled
? getCurrentProjectConfig().remoteControlSpawnMode
: undefined
if (savedSpawnMode === 'worktree' && !worktreeAvailable) {
// biome-ignore lint/suspicious/noConsole: intentional warning output
console.error(
'Warning: Saved spawn mode is worktree but this directory is not a git repository. Falling back to same-dir.',
)
savedSpawnMode = undefined
saveCurrentProjectConfig(current => {
if (current.remoteControlSpawnMode === undefined) return current
return { ...current, remoteControlSpawnMode: undefined }
})
}
// First-run spawn-mode choice: ask once per project when the choice is
// meaningful (gate on, both modes available, no explicit override, not
// resuming). Saves to ProjectConfig so subsequent runs skip this.
if (
multiSessionEnabled &&
!savedSpawnMode &&
worktreeAvailable &&
parsedSpawnMode === undefined &&
!resumeSessionId &&
process.stdin.isTTY
) {
const readline = await import('readline')
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
})
// biome-ignore lint/suspicious/noConsole: intentional dialog output
console.log(
`\nClaude Remote Control is launching in spawn mode which lets you create new sessions in this project from Claude Code on Web or your Mobile app. Learn more here: https://code.claude.com/docs/en/remote-control\n\n` +
`Spawn mode for this project:\n` +
` [1] same-dir \u2014 sessions share the current directory (default)\n` +
` [2] worktree \u2014 each session gets an isolated git worktree\n\n` +
`This can be changed later or explicitly set with --spawn=same-dir or --spawn=worktree.\n`,
)
const answer = await new Promise(resolve => {
rl.question('Choose [1/2] (default: 1): ', resolve)
})
rl.close()
const chosen: 'same-dir' | 'worktree' =
answer.trim() === '2' ? 'worktree' : 'same-dir'
savedSpawnMode = chosen
logEvent('tengu_bridge_spawn_mode_chosen', {
spawn_mode:
chosen as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
saveCurrentProjectConfig(current => {
if (current.remoteControlSpawnMode === chosen) return current
return { ...current, remoteControlSpawnMode: chosen }
})
}
// Determine effective spawn mode.
// Precedence: resume > explicit --spawn > saved project pref > gate default
// - resuming via --continue / --session-id: always single-session (resume
// targets one specific session in its original directory)
// - explicit --spawn flag: use that value directly (does not persist)
// - saved ProjectConfig.remoteControlSpawnMode: set by first-run dialog or `w`
// - default with gate on: same-dir (persistent multi-session, shared cwd)
// - default with gate off: single-session (unchanged legacy behavior)
// Track how spawn mode was determined, for rollout analytics.
type SpawnModeSource = 'resume' | 'flag' | 'saved' | 'gate_default'
let spawnModeSource: SpawnModeSource
let spawnMode: SpawnMode
if (resumeSessionId) {
spawnMode = 'single-session'
spawnModeSource = 'resume'
} else if (parsedSpawnMode !== undefined) {
spawnMode = parsedSpawnMode
spawnModeSource = 'flag'
} else if (savedSpawnMode !== undefined) {
spawnMode = savedSpawnMode
spawnModeSource = 'saved'
} else {
spawnMode = multiSessionEnabled ? 'same-dir' : 'single-session'
spawnModeSource = 'gate_default'
}
const maxSessions =
spawnMode === 'single-session'
? 1
: (parsedCapacity ?? SPAWN_SESSIONS_DEFAULT)
// Pre-create an empty session on start so the user has somewhere to type
// immediately, running in the current directory (exempted from worktree
// creation in the spawn loop). On by default; --no-create-session-in-dir
// opts out for a pure on-demand server where every session is isolated.
// The effectiveResumeSessionId guard at the creation site handles the
// resume case (skip creation when resume succeeded; fall through to
// fresh creation on env-mismatch fallback).
const preCreateSession = parsedCreateSessionInDir ?? true
// Without --continue: a leftover pointer means the previous run didn't
// shut down cleanly (crash, kill -9, terminal closed). Clear it so the
// stale env doesn't linger past its relevance. Runs in all modes
// (clearBridgePointer is a no-op when no file exists) — covers the
// gate-transition case where a user crashed in single-session mode then
// starts fresh in worktree mode. Only single-session mode writes new
// pointers.
if (!resumeSessionId) {
const { clearBridgePointer } = await import('./bridgePointer.js')
await clearBridgePointer(dir)
}
// Worktree mode requires either git or WorktreeCreate/WorktreeRemove hooks.
// Only reachable via explicit --spawn=worktree (default is same-dir);
// saved worktree pref was already guarded above.
if (spawnMode === 'worktree' && !worktreeAvailable) {
// biome-ignore lint/suspicious/noConsole: intentional error output
console.error(
`Error: Worktree mode requires a git repository or WorktreeCreate hooks configured. Use --spawn=session for single-session mode.`,
)
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(1)
}
const branch = await getBranch()
const gitRepoUrl = await getRemoteUrl()
const machineName = hostname()
const bridgeId = randomUUID()
const { handleOAuth401Error } = await import('../utils/auth.js')
const api = createBridgeApiClient({
baseUrl,
getAccessToken: getBridgeAccessToken,
runnerVersion: MACRO.VERSION,
onDebug: logForDebugging,
onAuth401: handleOAuth401Error,
getTrustedDeviceToken,
})
// When resuming a session via --session-id, fetch it to learn its
// environment_id and reuse that for registration (idempotent on the
// backend). Left undefined otherwise — the backend rejects
// client-generated UUIDs and will allocate a fresh environment.
// feature('KAIROS') gate: --session-id is ant-only; parseArgs already
// rejects the flag when the gate is off, so resumeSessionId is always
// undefined here in external builds — this guard is for tree-shaking.
let reuseEnvironmentId: string | undefined
if (feature('KAIROS') && resumeSessionId) {
try {
validateBridgeId(resumeSessionId, 'sessionId')
} catch {
// biome-ignore lint/suspicious/noConsole: intentional error output
console.error(
`Error: Invalid session ID "${resumeSessionId}". Session IDs must not contain unsafe characters.`,
)
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(1)
}
// Proactively refresh the OAuth token — getBridgeSession uses raw axios
// without the withOAuthRetry 401-refresh logic. An expired-but-present
// token would otherwise produce a misleading "not found" error.
await checkAndRefreshOAuthTokenIfNeeded()
clearOAuthTokenCache()
const { getBridgeSession } = await import('./createSession.js')
const session = await getBridgeSession(resumeSessionId, {
baseUrl,
getAccessToken: getBridgeAccessToken,
})
if (!session) {
// Session gone on server → pointer is stale. Clear it so the user
// isn't re-prompted next launch. (Explicit --session-id leaves the
// pointer alone — it's an independent file they may not even have.)
// resumePointerDir may be a worktree sibling — clear THAT file.
if (resumePointerDir) {
const { clearBridgePointer } = await import('./bridgePointer.js')
await clearBridgePointer(resumePointerDir)
}
// biome-ignore lint/suspicious/noConsole: intentional error output
console.error(
`Error: Session ${resumeSessionId} not found. It may have been archived or expired, or your login may have lapsed (run \`claude /login\`).`,
)
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(1)
}
if (!session.environment_id) {
if (resumePointerDir) {
const { clearBridgePointer } = await import('./bridgePointer.js')
await clearBridgePointer(resumePointerDir)
}
// biome-ignore lint/suspicious/noConsole: intentional error output
console.error(
`Error: Session ${resumeSessionId} has no environment_id. It may never have been attached to a bridge.`,
)
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(1)
}
reuseEnvironmentId = session.environment_id
logForDebugging(
`[bridge:init] Resuming session ${resumeSessionId} on environment ${reuseEnvironmentId}`,
)
}
const config: BridgeConfig = {
dir,
machineName,
branch,
gitRepoUrl,
maxSessions,
spawnMode,
verbose,
sandbox,
bridgeId,
workerType: 'claude_code',
environmentId: randomUUID(),
reuseEnvironmentId,
apiBaseUrl: baseUrl,
sessionIngressUrl,
debugFile,
sessionTimeoutMs,
}
logForDebugging(
`[bridge:init] bridgeId=${bridgeId}${reuseEnvironmentId ? ` reuseEnvironmentId=${reuseEnvironmentId}` : ''} dir=${dir} branch=${branch} gitRepoUrl=${gitRepoUrl} machine=${machineName}`,
)
logForDebugging(
`[bridge:init] apiBaseUrl=${baseUrl} sessionIngressUrl=${sessionIngressUrl}`,
)
logForDebugging(
`[bridge:init] sandbox=${sandbox}${debugFile ? ` debugFile=${debugFile}` : ''}`,
)
// Register the bridge environment before entering the poll loop.
let environmentId: string
let environmentSecret: string
try {
const reg = await api.registerBridgeEnvironment(config)
environmentId = reg.environment_id
environmentSecret = reg.environment_secret
} catch (err) {
logEvent('tengu_bridge_registration_failed', {
status: err instanceof BridgeFatalError ? err.status : undefined,
})
// Registration failures are fatal — print a clean message instead of a stack trace.
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.error(
err instanceof BridgeFatalError && err.status === 404
? 'Remote Control environments are not available for your account.'
: `Error: ${errorMessage(err)}`,
)
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(1)
}
// Tracks whether the --session-id resume flow completed successfully.
// Used below to skip fresh session creation and seed initialSessionId.
// Cleared on env mismatch so we gracefully fall back to a new session.
let effectiveResumeSessionId: string | undefined
if (feature('KAIROS') && resumeSessionId) {
if (reuseEnvironmentId && environmentId !== reuseEnvironmentId) {
// Backend returned a different environment_id — the original env
// expired or was reaped. Reconnect won't work against the new env
// (session is bound to the old one). Log to sentry for visibility
// and fall through to fresh session creation on the new env.
logError(
new Error(
`Bridge resume env mismatch: requested ${reuseEnvironmentId}, backend returned ${environmentId}. Falling back to fresh session.`,
),
)
// biome-ignore lint/suspicious/noConsole: intentional warning output
console.warn(
`Warning: Could not resume session ${resumeSessionId} — its environment has expired. Creating a fresh session instead.`,
)
// Don't deregister — we're going to use this new environment.
// effectiveResumeSessionId stays undefined → fresh session path below.
} else {
// Force-stop any stale worker instances for this session and re-queue
// it so our poll loop picks it up. Must happen after registration so
// the backend knows a live worker exists for the environment.
//
// The pointer stores a session_* ID but /bridge/reconnect looks
// sessions up by their infra tag (cse_*) when ccr_v2_compat_enabled
// is on. Try both; the conversion is a no-op if already cse_*.
const infraResumeId = toInfraSessionId(resumeSessionId)
const reconnectCandidates =
infraResumeId === resumeSessionId
? [resumeSessionId]
: [resumeSessionId, infraResumeId]
let reconnected = false
let lastReconnectErr: unknown
for (const candidateId of reconnectCandidates) {
try {
await api.reconnectSession(environmentId, candidateId)
logForDebugging(
`[bridge:init] Session ${candidateId} re-queued via bridge/reconnect`,
)
effectiveResumeSessionId = resumeSessionId
reconnected = true
break
} catch (err) {
lastReconnectErr = err
logForDebugging(
`[bridge:init] reconnectSession(${candidateId}) failed: ${errorMessage(err)}`,
)
}
}
if (!reconnected) {
const err = lastReconnectErr
// Do NOT deregister on transient reconnect failure — at this point
// environmentId IS the session's own environment. Deregistering
// would make retry impossible. The backend's 4h TTL cleans up.
const isFatal = err instanceof BridgeFatalError
// Clear pointer only on fatal reconnect failure. Transient failures
// ("try running the same command again") should keep the pointer so
// next launch re-prompts — that IS the retry mechanism.
if (resumePointerDir && isFatal) {
const { clearBridgePointer } = await import('./bridgePointer.js')
await clearBridgePointer(resumePointerDir)
}
// biome-ignore lint/suspicious/noConsole: intentional error output
console.error(
isFatal
? `Error: ${errorMessage(err)}`
: `Error: Failed to reconnect session ${resumeSessionId}: ${errorMessage(err)}\nThe session may still be resumable — try running the same command again.`,
)
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(1)
}
}
}
logForDebugging(
`[bridge:init] Registered, server environmentId=${environmentId}`,
)
const startupPollConfig = getPollIntervalConfig()
logEvent('tengu_bridge_started', {
max_sessions: config.maxSessions,
has_debug_file: !!config.debugFile,
sandbox: config.sandbox,
verbose: config.verbose,
heartbeat_interval_ms:
startupPollConfig.non_exclusive_heartbeat_interval_ms,
spawn_mode:
config.spawnMode as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
spawn_mode_source:
spawnModeSource as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
multi_session_gate: multiSessionEnabled,
pre_create_session: preCreateSession,
worktree_available: worktreeAvailable,
})
logForDiagnosticsNoPII('info', 'bridge_started', {
max_sessions: config.maxSessions,
sandbox: config.sandbox,
spawn_mode: config.spawnMode,
})
const spawner = createSessionSpawner({
execPath: process.execPath,
scriptArgs: spawnScriptArgs(),
env: process.env,
verbose,
sandbox,
debugFile,
permissionMode,
onDebug: logForDebugging,
onActivity: (sessionId, activity) => {
logForDebugging(
`[bridge:activity] sessionId=${sessionId} ${activity.type} ${activity.summary}`,
)
},
onPermissionRequest: (sessionId, request, _accessToken) => {
logForDebugging(
`[bridge:perm] sessionId=${sessionId} tool=${request.request.tool_name} request_id=${request.request_id} (not auto-approving)`,
)
},
})
const logger = createBridgeLogger({ verbose })
const { parseGitHubRepository } = await import('../utils/detectRepository.js')
const ownerRepo = gitRepoUrl ? parseGitHubRepository(gitRepoUrl) : null
// Use the repo name from the parsed owner/repo, or fall back to the dir basename
const repoName = ownerRepo ? ownerRepo.split('/').pop()! : basename(dir)
logger.setRepoInfo(repoName, branch)
// `w` toggle is available iff we're in a multi-session mode AND worktree
// is a valid option. When unavailable, the mode suffix and hint are hidden.
const toggleAvailable = spawnMode !== 'single-session' && worktreeAvailable
if (toggleAvailable) {
// Safe cast: spawnMode is not single-session (checked above), and the
// saved-worktree-in-non-git guard + exit check above ensure worktree
// is only reached when available.
logger.setSpawnModeDisplay(spawnMode as 'same-dir' | 'worktree')
}
// Listen for keys: space toggles QR code, w toggles spawn mode
const onStdinData = (data: Buffer): void => {
if (data[0] === 0x03 || data[0] === 0x04) {
// Ctrl+C / Ctrl+D — trigger graceful shutdown
process.emit('SIGINT')
return
}
if (data[0] === 0x20 /* space */) {
logger.toggleQr()
return
}
if (data[0] === 0x77 /* 'w' */) {
if (!toggleAvailable) return
const newMode: 'same-dir' | 'worktree' =
config.spawnMode === 'same-dir' ? 'worktree' : 'same-dir'
config.spawnMode = newMode
logEvent('tengu_bridge_spawn_mode_toggled', {
spawn_mode:
newMode as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
logger.logStatus(
newMode === 'worktree'
? 'Spawn mode: worktree (new sessions get isolated git worktrees)'
: 'Spawn mode: same-dir (new sessions share the current directory)',
)
logger.setSpawnModeDisplay(newMode)
logger.refreshDisplay()
saveCurrentProjectConfig(current => {
if (current.remoteControlSpawnMode === newMode) return current
return { ...current, remoteControlSpawnMode: newMode }
})
return
}
}
if (process.stdin.isTTY) {
process.stdin.setRawMode(true)
process.stdin.resume()
process.stdin.on('data', onStdinData)
}
const controller = new AbortController()
const onSigint = (): void => {
logForDebugging('[bridge:shutdown] SIGINT received, shutting down')
controller.abort()
}
const onSigterm = (): void => {
logForDebugging('[bridge:shutdown] SIGTERM received, shutting down')
controller.abort()
}
process.on('SIGINT', onSigint)
process.on('SIGTERM', onSigterm)
// Auto-create an empty session so the user has somewhere to type
// immediately (matching /remote-control behavior). Controlled by
// preCreateSession: on by default; --no-create-session-in-dir opts out.
// When a --session-id resume succeeded, skip creation entirely — the
// session already exists and bridge/reconnect has re-queued it.
// When resume was requested but failed on env mismatch, effectiveResumeSessionId
// is undefined, so we fall through to fresh session creation (honoring the
// "Creating a fresh session instead" warning printed above).
let initialSessionId: string | null =
feature('KAIROS') && effectiveResumeSessionId
? effectiveResumeSessionId
: null
if (preCreateSession && !(feature('KAIROS') && effectiveResumeSessionId)) {
const { createBridgeSession } = await import('./createSession.js')
try {
initialSessionId = await createBridgeSession({
environmentId,
title: name,
events: [],
gitRepoUrl,
branch,
signal: controller.signal,
baseUrl,
getAccessToken: getBridgeAccessToken,
permissionMode,
})
if (initialSessionId) {
logForDebugging(
`[bridge:init] Created initial session ${initialSessionId}`,
)
}
} catch (err) {
logForDebugging(
`[bridge:init] Session creation failed (non-fatal): ${errorMessage(err)}`,
)
}
}
// Crash-recovery pointer: write immediately so kill -9 at any point
// after this leaves a recoverable trail. Covers both fresh sessions and
// resumed ones (so a second crash after resume is still recoverable).
// Cleared when runBridgeLoop falls through to archive+deregister; left in
// place on the SIGINT resumable-shutdown return (backup for when the user
// closes the terminal before copying the printed --session-id hint).
// Refreshed hourly so a 5h+ session that crashes still has a fresh
// pointer (staleness checks file mtime, backend TTL is rolling-from-poll).
let pointerRefreshTimer: ReturnType | null = null
// Single-session only: --continue forces single-session mode on resume,
// so a pointer written in multi-session mode would contradict the user's
// config when they try to resume. The resumable-shutdown path is also
// gated to single-session (line ~1254) so the pointer would be orphaned.
if (initialSessionId && spawnMode === 'single-session') {
const { writeBridgePointer } = await import('./bridgePointer.js')
const pointerPayload = {
sessionId: initialSessionId,
environmentId,
source: 'standalone' as const,
}
await writeBridgePointer(config.dir, pointerPayload)
pointerRefreshTimer = setInterval(
writeBridgePointer,
60 * 60 * 1000,
config.dir,
pointerPayload,
)
// Don't let the interval keep the process alive on its own.
pointerRefreshTimer.unref?.()
}
try {
await runBridgeLoop(
config,
environmentId,
environmentSecret,
api,
spawner,
logger,
controller.signal,
undefined,
initialSessionId ?? undefined,
async () => {
// Clear the memoized OAuth token cache so we re-read from secure
// storage, picking up tokens refreshed by child processes.
clearOAuthTokenCache()
// Proactively refresh the token if it's expired on disk too.
await checkAndRefreshOAuthTokenIfNeeded()
return getBridgeAccessToken()
},
)
} finally {
if (pointerRefreshTimer !== null) {
clearInterval(pointerRefreshTimer)
}
process.off('SIGINT', onSigint)
process.off('SIGTERM', onSigterm)
process.stdin.off('data', onStdinData)
if (process.stdin.isTTY) {
process.stdin.setRawMode(false)
}
process.stdin.pause()
}
// The bridge bypasses init.ts (and its graceful shutdown handler), so we
// must exit explicitly.
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(0)
}
// ─── Headless bridge (daemon worker) ────────────────────────────────────────
/**
* Thrown by runBridgeHeadless for configuration issues the supervisor should
* NOT retry (trust not accepted, worktree unavailable, http-not-https). The
* daemon worker catches this and exits with EXIT_CODE_PERMANENT so the
* supervisor parks the worker instead of respawning it on backoff.
*/
export class BridgeHeadlessPermanentError extends Error {
constructor(message: string) {
super(message)
this.name = 'BridgeHeadlessPermanentError'
}
}
export type HeadlessBridgeOpts = {
dir: string
name?: string
spawnMode: 'same-dir' | 'worktree'
capacity: number
permissionMode?: string
sandbox: boolean
sessionTimeoutMs?: number
createSessionOnStart: boolean
getAccessToken: () => string | undefined
onAuth401: (failedToken: string) => Promise
log: (s: string) => void
}
/**
* Non-interactive bridge entrypoint for the `remoteControl` daemon worker.
*
* Linear subset of bridgeMain(): no readline dialogs, no stdin key handlers,
* no TUI, no process.exit(). Config comes from the caller (daemon.json), auth
* comes via IPC (supervisor's AuthManager), logs go to the worker's stdout
* pipe. Throws on fatal errors — the worker catches and maps permanent vs
* transient to the right exit code.
*
* Resolves cleanly when `signal` aborts and the poll loop tears down.
*/
export async function runBridgeHeadless(
opts: HeadlessBridgeOpts,
signal: AbortSignal,
): Promise {
const { dir, log } = opts
// Worker inherits the supervisor's CWD. chdir first so git utilities
// (getBranch/getRemoteUrl) — which read from bootstrap CWD state set
// below — resolve against the right repo.
process.chdir(dir)
const { setOriginalCwd, setCwdState } = await import('../bootstrap/state.js')
setOriginalCwd(dir)
setCwdState(dir)
const { enableConfigs, checkHasTrustDialogAccepted } = await import(
'../utils/config.js'
)
enableConfigs()
const { initSinks } = await import('../utils/sinks.js')
initSinks()
if (!checkHasTrustDialogAccepted()) {
throw new BridgeHeadlessPermanentError(
`Workspace not trusted: ${dir}. Run \`claude\` in that directory first to accept the trust dialog.`,
)
}
if (!opts.getAccessToken()) {
// Transient — supervisor's AuthManager may pick up a token on next cycle.
throw new Error(BRIDGE_LOGIN_ERROR)
}
const { getBridgeBaseUrl } = await import('./bridgeConfig.js')
const baseUrl = getBridgeBaseUrl()
if (
baseUrl.startsWith('http://') &&
!baseUrl.includes('localhost') &&
!baseUrl.includes('127.0.0.1')
) {
throw new BridgeHeadlessPermanentError(
'Remote Control base URL uses HTTP. Only HTTPS or localhost HTTP is allowed.',
)
}
const sessionIngressUrl =
process.env.USER_TYPE === 'ant' &&
process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
? process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
: baseUrl
const { getBranch, getRemoteUrl, findGitRoot } = await import(
'../utils/git.js'
)
const { hasWorktreeCreateHook } = await import('../utils/hooks.js')
if (opts.spawnMode === 'worktree') {
const worktreeAvailable =
hasWorktreeCreateHook() || findGitRoot(dir) !== null
if (!worktreeAvailable) {
throw new BridgeHeadlessPermanentError(
`Worktree mode requires a git repository or WorktreeCreate hooks. Directory ${dir} has neither.`,
)
}
}
const branch = await getBranch()
const gitRepoUrl = await getRemoteUrl()
const machineName = hostname()
const bridgeId = randomUUID()
const config: BridgeConfig = {
dir,
machineName,
branch,
gitRepoUrl,
maxSessions: opts.capacity,
spawnMode: opts.spawnMode,
verbose: false,
sandbox: opts.sandbox,
bridgeId,
workerType: 'claude_code',
environmentId: randomUUID(),
apiBaseUrl: baseUrl,
sessionIngressUrl,
sessionTimeoutMs: opts.sessionTimeoutMs,
}
const api = createBridgeApiClient({
baseUrl,
getAccessToken: opts.getAccessToken,
runnerVersion: MACRO.VERSION,
onDebug: log,
onAuth401: opts.onAuth401,
getTrustedDeviceToken,
})
let environmentId: string
let environmentSecret: string
try {
const reg = await api.registerBridgeEnvironment(config)
environmentId = reg.environment_id
environmentSecret = reg.environment_secret
} catch (err) {
// Transient — let supervisor backoff-retry.
throw new Error(`Bridge registration failed: ${errorMessage(err)}`)
}
const spawner = createSessionSpawner({
execPath: process.execPath,
scriptArgs: spawnScriptArgs(),
env: process.env,
verbose: false,
sandbox: opts.sandbox,
permissionMode: opts.permissionMode,
onDebug: log,
})
const logger = createHeadlessBridgeLogger(log)
logger.printBanner(config, environmentId)
let initialSessionId: string | undefined
if (opts.createSessionOnStart) {
const { createBridgeSession } = await import('./createSession.js')
try {
const sid = await createBridgeSession({
environmentId,
title: opts.name,
events: [],
gitRepoUrl,
branch,
signal,
baseUrl,
getAccessToken: opts.getAccessToken,
permissionMode: opts.permissionMode,
})
if (sid) {
initialSessionId = sid
log(`created initial session ${sid}`)
}
} catch (err) {
log(`session pre-creation failed (non-fatal): ${errorMessage(err)}`)
}
}
await runBridgeLoop(
config,
environmentId,
environmentSecret,
api,
spawner,
logger,
signal,
undefined,
initialSessionId,
async () => opts.getAccessToken(),
)
}
/** BridgeLogger adapter that routes everything to a single line-log fn. */
function createHeadlessBridgeLogger(log: (s: string) => void): BridgeLogger {
const noop = (): void => {}
return {
printBanner: (cfg, envId) =>
log(
`registered environmentId=${envId} dir=${cfg.dir} spawnMode=${cfg.spawnMode} capacity=${cfg.maxSessions}`,
),
logSessionStart: (id, _prompt) => log(`session start ${id}`),
logSessionComplete: (id, ms) => log(`session complete ${id} (${ms}ms)`),
logSessionFailed: (id, err) => log(`session failed ${id}: ${err}`),
logStatus: log,
logVerbose: log,
logError: s => log(`error: ${s}`),
logReconnected: ms => log(`reconnected after ${ms}ms`),
addSession: (id, _url) => log(`session attached ${id}`),
removeSession: id => log(`session detached ${id}`),
updateIdleStatus: noop,
updateReconnectingStatus: noop,
updateSessionStatus: noop,
updateSessionActivity: noop,
updateSessionCount: noop,
updateFailedStatus: noop,
setSpawnModeDisplay: noop,
setRepoInfo: noop,
setDebugLogPath: noop,
setAttached: noop,
setSessionTitle: noop,
clearStatus: noop,
toggleQr: noop,
refreshDisplay: noop,
}
}
================================================
FILE: restored-src/src/bridge/bridgeMessaging.ts
================================================
/**
* Shared transport-layer helpers for bridge message handling.
*
* Extracted from replBridge.ts so both the env-based core (initBridgeCore)
* and the env-less core (initEnvLessBridgeCore) can use the same ingress
* parsing, control-request handling, and echo-dedup machinery.
*
* Everything here is pure — no closure over bridge-specific state. All
* collaborators (transport, sessionId, UUID sets, callbacks) are passed
* as params.
*/
import { randomUUID } from 'crypto'
import type { SDKMessage } from '../entrypoints/agentSdkTypes.js'
import type {
SDKControlRequest,
SDKControlResponse,
} from '../entrypoints/sdk/controlTypes.js'
import type { SDKResultSuccess } from '../entrypoints/sdk/coreTypes.js'
import { logEvent } from '../services/analytics/index.js'
import { EMPTY_USAGE } from '../services/api/emptyUsage.js'
import type { Message } from '../types/message.js'
import { normalizeControlMessageKeys } from '../utils/controlMessageCompat.js'
import { logForDebugging } from '../utils/debug.js'
import { stripDisplayTagsAllowEmpty } from '../utils/displayTags.js'
import { errorMessage } from '../utils/errors.js'
import type { PermissionMode } from '../utils/permissions/PermissionMode.js'
import { jsonParse } from '../utils/slowOperations.js'
import type { ReplBridgeTransport } from './replBridgeTransport.js'
// ─── Type guards ─────────────────────────────────────────────────────────────
/** Type predicate for parsed WebSocket messages. SDKMessage is a
* discriminated union on `type` — validating the discriminant is
* sufficient for the predicate; callers narrow further via the union. */
export function isSDKMessage(value: unknown): value is SDKMessage {
return (
value !== null &&
typeof value === 'object' &&
'type' in value &&
typeof value.type === 'string'
)
}
/** Type predicate for control_response messages from the server. */
export function isSDKControlResponse(
value: unknown,
): value is SDKControlResponse {
return (
value !== null &&
typeof value === 'object' &&
'type' in value &&
value.type === 'control_response' &&
'response' in value
)
}
/** Type predicate for control_request messages from the server. */
export function isSDKControlRequest(
value: unknown,
): value is SDKControlRequest {
return (
value !== null &&
typeof value === 'object' &&
'type' in value &&
value.type === 'control_request' &&
'request_id' in value &&
'request' in value
)
}
/**
* True for message types that should be forwarded to the bridge transport.
* The server only wants user/assistant turns and slash-command system events;
* everything else (tool_result, progress, etc.) is internal REPL chatter.
*/
export function isEligibleBridgeMessage(m: Message): boolean {
// Virtual messages (REPL inner calls) are display-only — bridge/SDK
// consumers see the REPL tool_use/result which summarizes the work.
if ((m.type === 'user' || m.type === 'assistant') && m.isVirtual) {
return false
}
return (
m.type === 'user' ||
m.type === 'assistant' ||
(m.type === 'system' && m.subtype === 'local_command')
)
}
/**
* Extract title-worthy text from a Message for onUserMessage. Returns
* undefined for messages that shouldn't title the session: non-user, meta
* (nudges), tool results, compact summaries, non-human origins (task
* notifications, channel messages), or pure display-tag content
* (, , etc.).
*
* Synthetic interrupts ([Request interrupted by user]) are NOT filtered here —
* isSyntheticMessage lives in messages.ts (heavy import, pulls command
* registry). The initialMessages path in initReplBridge checks it; the
* writeMessages path reaching an interrupt as the *first* message is
* implausible (an interrupt implies a prior prompt already flowed through).
*/
export function extractTitleText(m: Message): string | undefined {
if (m.type !== 'user' || m.isMeta || m.toolUseResult || m.isCompactSummary)
return undefined
if (m.origin && m.origin.kind !== 'human') return undefined
const content = m.message.content
let raw: string | undefined
if (typeof content === 'string') {
raw = content
} else {
for (const block of content) {
if (block.type === 'text') {
raw = block.text
break
}
}
}
if (!raw) return undefined
const clean = stripDisplayTagsAllowEmpty(raw)
return clean || undefined
}
// ─── Ingress routing ─────────────────────────────────────────────────────────
/**
* Parse an ingress WebSocket message and route it to the appropriate handler.
* Ignores messages whose UUID is in recentPostedUUIDs (echoes of what we sent)
* or in recentInboundUUIDs (re-deliveries we've already forwarded — e.g.
* server replayed history after a transport swap lost the seq-num cursor).
*/
export function handleIngressMessage(
data: string,
recentPostedUUIDs: BoundedUUIDSet,
recentInboundUUIDs: BoundedUUIDSet,
onInboundMessage: ((msg: SDKMessage) => void | Promise) | undefined,
onPermissionResponse?: ((response: SDKControlResponse) => void) | undefined,
onControlRequest?: ((request: SDKControlRequest) => void) | undefined,
): void {
try {
const parsed: unknown = normalizeControlMessageKeys(jsonParse(data))
// control_response is not an SDKMessage — check before the type guard
if (isSDKControlResponse(parsed)) {
logForDebugging('[bridge:repl] Ingress message type=control_response')
onPermissionResponse?.(parsed)
return
}
// control_request from the server (initialize, set_model, can_use_tool).
// Must respond promptly or the server kills the WS (~10-14s timeout).
if (isSDKControlRequest(parsed)) {
logForDebugging(
`[bridge:repl] Inbound control_request subtype=${parsed.request.subtype}`,
)
onControlRequest?.(parsed)
return
}
if (!isSDKMessage(parsed)) return
// Check for UUID to detect echoes of our own messages
const uuid =
'uuid' in parsed && typeof parsed.uuid === 'string'
? parsed.uuid
: undefined
if (uuid && recentPostedUUIDs.has(uuid)) {
logForDebugging(
`[bridge:repl] Ignoring echo: type=${parsed.type} uuid=${uuid}`,
)
return
}
// Defensive dedup: drop inbound prompts we've already forwarded. The
// SSE seq-num carryover (lastTransportSequenceNum) is the primary fix
// for history-replay; this catches edge cases where that negotiation
// fails (server ignores from_sequence_num, transport died before
// receiving any frames, etc).
if (uuid && recentInboundUUIDs.has(uuid)) {
logForDebugging(
`[bridge:repl] Ignoring re-delivered inbound: type=${parsed.type} uuid=${uuid}`,
)
return
}
logForDebugging(
`[bridge:repl] Ingress message type=${parsed.type}${uuid ? ` uuid=${uuid}` : ''}`,
)
if (parsed.type === 'user') {
if (uuid) recentInboundUUIDs.add(uuid)
logEvent('tengu_bridge_message_received', {
is_repl: true,
})
// Fire-and-forget — handler may be async (attachment resolution).
void onInboundMessage?.(parsed)
} else {
logForDebugging(
`[bridge:repl] Ignoring non-user inbound message: type=${parsed.type}`,
)
}
} catch (err) {
logForDebugging(
`[bridge:repl] Failed to parse ingress message: ${errorMessage(err)}`,
)
}
}
// ─── Server-initiated control requests ───────────────────────────────────────
export type ServerControlRequestHandlers = {
transport: ReplBridgeTransport | null
sessionId: string
/**
* When true, all mutable requests (interrupt, set_model, set_permission_mode,
* set_max_thinking_tokens) reply with an error instead of false-success.
* initialize still replies success — the server kills the connection otherwise.
* Used by the outbound-only bridge mode and the SDK's /bridge subpath so claude.ai sees a
* proper error instead of "action succeeded but nothing happened locally".
*/
outboundOnly?: boolean
onInterrupt?: () => void
onSetModel?: (model: string | undefined) => void
onSetMaxThinkingTokens?: (maxTokens: number | null) => void
onSetPermissionMode?: (
mode: PermissionMode,
) => { ok: true } | { ok: false; error: string }
}
const OUTBOUND_ONLY_ERROR =
'This session is outbound-only. Enable Remote Control locally to allow inbound control.'
/**
* Respond to inbound control_request messages from the server. The server
* sends these for session lifecycle events (initialize, set_model) and
* for turn-level coordination (interrupt, set_max_thinking_tokens). If we
* don't respond, the server hangs and kills the WS after ~10-14s.
*
* Previously a closure inside initBridgeCore's onWorkReceived; now takes
* collaborators as params so both cores can use it.
*/
export function handleServerControlRequest(
request: SDKControlRequest,
handlers: ServerControlRequestHandlers,
): void {
const {
transport,
sessionId,
outboundOnly,
onInterrupt,
onSetModel,
onSetMaxThinkingTokens,
onSetPermissionMode,
} = handlers
if (!transport) {
logForDebugging(
'[bridge:repl] Cannot respond to control_request: transport not configured',
)
return
}
let response: SDKControlResponse
// Outbound-only: reply error for mutable requests so claude.ai doesn't show
// false success. initialize must still succeed (server kills the connection
// if it doesn't — see comment above).
if (outboundOnly && request.request.subtype !== 'initialize') {
response = {
type: 'control_response',
response: {
subtype: 'error',
request_id: request.request_id,
error: OUTBOUND_ONLY_ERROR,
},
}
const event = { ...response, session_id: sessionId }
void transport.write(event)
logForDebugging(
`[bridge:repl] Rejected ${request.request.subtype} (outbound-only) request_id=${request.request_id}`,
)
return
}
switch (request.request.subtype) {
case 'initialize':
// Respond with minimal capabilities — the REPL handles
// commands, models, and account info itself.
response = {
type: 'control_response',
response: {
subtype: 'success',
request_id: request.request_id,
response: {
commands: [],
output_style: 'normal',
available_output_styles: ['normal'],
models: [],
account: {},
pid: process.pid,
},
},
}
break
case 'set_model':
onSetModel?.(request.request.model)
response = {
type: 'control_response',
response: {
subtype: 'success',
request_id: request.request_id,
},
}
break
case 'set_max_thinking_tokens':
onSetMaxThinkingTokens?.(request.request.max_thinking_tokens)
response = {
type: 'control_response',
response: {
subtype: 'success',
request_id: request.request_id,
},
}
break
case 'set_permission_mode': {
// The callback returns a policy verdict so we can send an error
// control_response without importing isAutoModeGateEnabled /
// isBypassPermissionsModeDisabled here (bootstrap-isolation). If no
// callback is registered (daemon context, which doesn't wire this —
// see daemonBridge.ts), return an error verdict rather than a silent
// false-success: the mode is never actually applied in that context,
// so success would lie to the client.
const verdict = onSetPermissionMode?.(request.request.mode) ?? {
ok: false,
error:
'set_permission_mode is not supported in this context (onSetPermissionMode callback not registered)',
}
if (verdict.ok) {
response = {
type: 'control_response',
response: {
subtype: 'success',
request_id: request.request_id,
},
}
} else {
response = {
type: 'control_response',
response: {
subtype: 'error',
request_id: request.request_id,
error: verdict.error,
},
}
}
break
}
case 'interrupt':
onInterrupt?.()
response = {
type: 'control_response',
response: {
subtype: 'success',
request_id: request.request_id,
},
}
break
default:
// Unknown subtype — respond with error so the server doesn't
// hang waiting for a reply that never comes.
response = {
type: 'control_response',
response: {
subtype: 'error',
request_id: request.request_id,
error: `REPL bridge does not handle control_request subtype: ${request.request.subtype}`,
},
}
}
const event = { ...response, session_id: sessionId }
void transport.write(event)
logForDebugging(
`[bridge:repl] Sent control_response for ${request.request.subtype} request_id=${request.request_id} result=${response.response.subtype}`,
)
}
// ─── Result message (for session archival on teardown) ───────────────────────
/**
* Build a minimal `SDKResultSuccess` message for session archival.
* The server needs this event before a WS close to trigger archival.
*/
export function makeResultMessage(sessionId: string): SDKResultSuccess {
return {
type: 'result',
subtype: 'success',
duration_ms: 0,
duration_api_ms: 0,
is_error: false,
num_turns: 0,
result: '',
stop_reason: null,
total_cost_usd: 0,
usage: { ...EMPTY_USAGE },
modelUsage: {},
permission_denials: [],
session_id: sessionId,
uuid: randomUUID(),
}
}
// ─── BoundedUUIDSet (echo-dedup ring buffer) ─────────────────────────────────
/**
* FIFO-bounded set backed by a circular buffer. Evicts the oldest entry
* when capacity is reached, keeping memory usage constant at O(capacity).
*
* Messages are added in chronological order, so evicted entries are always
* the oldest. The caller relies on external ordering (the hook's
* lastWrittenIndexRef) as the primary dedup — this set is a secondary
* safety net for echo filtering and race-condition dedup.
*/
export class BoundedUUIDSet {
private readonly capacity: number
private readonly ring: (string | undefined)[]
private readonly set = new Set()
private writeIdx = 0
constructor(capacity: number) {
this.capacity = capacity
this.ring = new Array(capacity)
}
add(uuid: string): void {
if (this.set.has(uuid)) return
// Evict the entry at the current write position (if occupied)
const evicted = this.ring[this.writeIdx]
if (evicted !== undefined) {
this.set.delete(evicted)
}
this.ring[this.writeIdx] = uuid
this.set.add(uuid)
this.writeIdx = (this.writeIdx + 1) % this.capacity
}
has(uuid: string): boolean {
return this.set.has(uuid)
}
clear(): void {
this.set.clear()
this.ring.fill(undefined)
this.writeIdx = 0
}
}
================================================
FILE: restored-src/src/bridge/bridgePermissionCallbacks.ts
================================================
import type { PermissionUpdate } from '../utils/permissions/PermissionUpdateSchema.js'
type BridgePermissionResponse = {
behavior: 'allow' | 'deny'
updatedInput?: Record
updatedPermissions?: PermissionUpdate[]
message?: string
}
type BridgePermissionCallbacks = {
sendRequest(
requestId: string,
toolName: string,
input: Record,
toolUseId: string,
description: string,
permissionSuggestions?: PermissionUpdate[],
blockedPath?: string,
): void
sendResponse(requestId: string, response: BridgePermissionResponse): void
/** Cancel a pending control_request so the web app can dismiss its prompt. */
cancelRequest(requestId: string): void
onResponse(
requestId: string,
handler: (response: BridgePermissionResponse) => void,
): () => void // returns unsubscribe
}
/** Type predicate for validating a parsed control_response payload
* as a BridgePermissionResponse. Checks the required `behavior`
* discriminant rather than using an unsafe `as` cast. */
function isBridgePermissionResponse(
value: unknown,
): value is BridgePermissionResponse {
if (!value || typeof value !== 'object') return false
return (
'behavior' in value &&
(value.behavior === 'allow' || value.behavior === 'deny')
)
}
export { isBridgePermissionResponse }
export type { BridgePermissionCallbacks, BridgePermissionResponse }
================================================
FILE: restored-src/src/bridge/bridgePointer.ts
================================================
import { mkdir, readFile, stat, unlink, writeFile } from 'fs/promises'
import { dirname, join } from 'path'
import { z } from 'zod/v4'
import { logForDebugging } from '../utils/debug.js'
import { isENOENT } from '../utils/errors.js'
import { getWorktreePathsPortable } from '../utils/getWorktreePathsPortable.js'
import { lazySchema } from '../utils/lazySchema.js'
import {
getProjectsDir,
sanitizePath,
} from '../utils/sessionStoragePortable.js'
import { jsonParse, jsonStringify } from '../utils/slowOperations.js'
/**
* Upper bound on worktree fanout. git worktree list is naturally bounded
* (50 is a LOT), but this caps the parallel stat() burst and guards against
* pathological setups. Above this, --continue falls back to current-dir-only.
*/
const MAX_WORKTREE_FANOUT = 50
/**
* Crash-recovery pointer for Remote Control sessions.
*
* Written immediately after a bridge session is created, periodically
* refreshed during the session, and cleared on clean shutdown. If the
* process dies unclean (crash, kill -9, terminal closed), the pointer
* persists. On next startup, `claude remote-control` detects it and offers
* to resume via the --session-id flow from #20460.
*
* Staleness is checked against the file's mtime (not an embedded timestamp)
* so that a periodic re-write with the same content serves as a refresh —
* matches the backend's rolling BRIDGE_LAST_POLL_TTL (4h) semantics. A
* bridge that's been polling for 5+ hours and then crashes still has a
* fresh pointer as long as the refresh ran within the window.
*
* Scoped per working directory (alongside transcript JSONL files) so two
* concurrent bridges in different repos don't clobber each other.
*/
export const BRIDGE_POINTER_TTL_MS = 4 * 60 * 60 * 1000
const BridgePointerSchema = lazySchema(() =>
z.object({
sessionId: z.string(),
environmentId: z.string(),
source: z.enum(['standalone', 'repl']),
}),
)
export type BridgePointer = z.infer>
export function getBridgePointerPath(dir: string): string {
return join(getProjectsDir(), sanitizePath(dir), 'bridge-pointer.json')
}
/**
* Write the pointer. Also used to refresh mtime during long sessions —
* calling with the same IDs is a cheap no-content-change write that bumps
* the staleness clock. Best-effort — a crash-recovery file must never
* itself cause a crash. Logs and swallows on error.
*/
export async function writeBridgePointer(
dir: string,
pointer: BridgePointer,
): Promise {
const path = getBridgePointerPath(dir)
try {
await mkdir(dirname(path), { recursive: true })
await writeFile(path, jsonStringify(pointer), 'utf8')
logForDebugging(`[bridge:pointer] wrote ${path}`)
} catch (err: unknown) {
logForDebugging(`[bridge:pointer] write failed: ${err}`, { level: 'warn' })
}
}
/**
* Read the pointer and its age (ms since last write). Operates directly
* and handles errors — no existence check (CLAUDE.md TOCTOU rule). Returns
* null on any failure: missing file, corrupted JSON, schema mismatch, or
* stale (mtime > 4h ago). Stale/invalid pointers are deleted so they don't
* keep re-prompting after the backend has already GC'd the env.
*/
export async function readBridgePointer(
dir: string,
): Promise<(BridgePointer & { ageMs: number }) | null> {
const path = getBridgePointerPath(dir)
let raw: string
let mtimeMs: number
try {
// stat for mtime (staleness anchor), then read. Two syscalls, but both
// are needed — mtime IS the data we return, not a TOCTOU guard.
mtimeMs = (await stat(path)).mtimeMs
raw = await readFile(path, 'utf8')
} catch {
return null
}
const parsed = BridgePointerSchema().safeParse(safeJsonParse(raw))
if (!parsed.success) {
logForDebugging(`[bridge:pointer] invalid schema, clearing: ${path}`)
await clearBridgePointer(dir)
return null
}
const ageMs = Math.max(0, Date.now() - mtimeMs)
if (ageMs > BRIDGE_POINTER_TTL_MS) {
logForDebugging(`[bridge:pointer] stale (>4h mtime), clearing: ${path}`)
await clearBridgePointer(dir)
return null
}
return { ...parsed.data, ageMs }
}
/**
* Worktree-aware read for `--continue`. The REPL bridge writes its pointer
* to `getOriginalCwd()` which EnterWorktreeTool/activeWorktreeSession can
* mutate to a worktree path — but `claude remote-control --continue` runs
* with `resolve('.')` = shell CWD. This fans out across git worktree
* siblings to find the freshest pointer, matching /resume's semantics.
*
* Fast path: checks `dir` first. Only shells out to `git worktree list` if
* that misses — the common case (pointer in launch dir) is one stat, zero
* exec. Fanout reads run in parallel; capped at MAX_WORKTREE_FANOUT.
*
* Returns the pointer AND the dir it was found in, so the caller can clear
* the right file on resume failure.
*/
export async function readBridgePointerAcrossWorktrees(
dir: string,
): Promise<{ pointer: BridgePointer & { ageMs: number }; dir: string } | null> {
// Fast path: current dir. Covers standalone bridge (always matches) and
// REPL bridge when no worktree mutation happened.
const here = await readBridgePointer(dir)
if (here) {
return { pointer: here, dir }
}
// Fanout: scan worktree siblings. getWorktreePathsPortable has a 5s
// timeout and returns [] on any error (not a git repo, git not installed).
const worktrees = await getWorktreePathsPortable(dir)
if (worktrees.length <= 1) return null
if (worktrees.length > MAX_WORKTREE_FANOUT) {
logForDebugging(
`[bridge:pointer] ${worktrees.length} worktrees exceeds fanout cap ${MAX_WORKTREE_FANOUT}, skipping`,
)
return null
}
// Dedupe against `dir` so we don't re-stat it. sanitizePath normalizes
// case/separators so worktree-list output matches our fast-path key even
// on Windows where git may emit C:/ vs stored c:/.
const dirKey = sanitizePath(dir)
const candidates = worktrees.filter(wt => sanitizePath(wt) !== dirKey)
// Parallel stat+read. Each readBridgePointer is a stat() that ENOENTs
// for worktrees with no pointer (cheap) plus a ~100-byte read for the
// rare ones that have one. Promise.all → latency ≈ slowest single stat.
const results = await Promise.all(
candidates.map(async wt => {
const p = await readBridgePointer(wt)
return p ? { pointer: p, dir: wt } : null
}),
)
// Pick freshest (lowest ageMs). The pointer stores environmentId so
// resume reconnects to the right env regardless of which worktree
// --continue was invoked from.
let freshest: {
pointer: BridgePointer & { ageMs: number }
dir: string
} | null = null
for (const r of results) {
if (r && (!freshest || r.pointer.ageMs < freshest.pointer.ageMs)) {
freshest = r
}
}
if (freshest) {
logForDebugging(
`[bridge:pointer] fanout found pointer in worktree ${freshest.dir} (ageMs=${freshest.pointer.ageMs})`,
)
}
return freshest
}
/**
* Delete the pointer. Idempotent — ENOENT is expected when the process
* shut down clean previously.
*/
export async function clearBridgePointer(dir: string): Promise {
const path = getBridgePointerPath(dir)
try {
await unlink(path)
logForDebugging(`[bridge:pointer] cleared ${path}`)
} catch (err: unknown) {
if (!isENOENT(err)) {
logForDebugging(`[bridge:pointer] clear failed: ${err}`, {
level: 'warn',
})
}
}
}
function safeJsonParse(raw: string): unknown {
try {
return jsonParse(raw)
} catch {
return null
}
}
================================================
FILE: restored-src/src/bridge/bridgeStatusUtil.ts
================================================
import {
getClaudeAiBaseUrl,
getRemoteSessionUrl,
} from '../constants/product.js'
import { stringWidth } from '../ink/stringWidth.js'
import { formatDuration, truncateToWidth } from '../utils/format.js'
import { getGraphemeSegmenter } from '../utils/intl.js'
/** Bridge status state machine states. */
export type StatusState =
| 'idle'
| 'attached'
| 'titled'
| 'reconnecting'
| 'failed'
/** How long a tool activity line stays visible after last tool_start (ms). */
export const TOOL_DISPLAY_EXPIRY_MS = 30_000
/** Interval for the shimmer animation tick (ms). */
export const SHIMMER_INTERVAL_MS = 150
export function timestamp(): string {
const now = new Date()
const h = String(now.getHours()).padStart(2, '0')
const m = String(now.getMinutes()).padStart(2, '0')
const s = String(now.getSeconds()).padStart(2, '0')
return `${h}:${m}:${s}`
}
export { formatDuration, truncateToWidth as truncatePrompt }
/** Abbreviate a tool activity summary for the trail display. */
export function abbreviateActivity(summary: string): string {
return truncateToWidth(summary, 30)
}
/** Build the connect URL shown when the bridge is idle. */
export function buildBridgeConnectUrl(
environmentId: string,
ingressUrl?: string,
): string {
const baseUrl = getClaudeAiBaseUrl(undefined, ingressUrl)
return `${baseUrl}/code?bridge=${environmentId}`
}
/**
* Build the session URL shown when a session is attached. Delegates to
* getRemoteSessionUrl for the cse_→session_ prefix translation, then appends
* the v1-specific ?bridge={environmentId} query.
*/
export function buildBridgeSessionUrl(
sessionId: string,
environmentId: string,
ingressUrl?: string,
): string {
return `${getRemoteSessionUrl(sessionId, ingressUrl)}?bridge=${environmentId}`
}
/** Compute the glimmer index for a reverse-sweep shimmer animation. */
export function computeGlimmerIndex(
tick: number,
messageWidth: number,
): number {
const cycleLength = messageWidth + 20
return messageWidth + 10 - (tick % cycleLength)
}
/**
* Split text into three segments by visual column position for shimmer rendering.
*
* Uses grapheme segmentation and `stringWidth` so the split is correct for
* multi-byte characters, emoji, and CJK glyphs.
*
* Returns `{ before, shimmer, after }` strings. Both renderers (chalk in
* bridgeUI.ts and React/Ink in bridge.tsx) apply their own coloring to
* these segments.
*/
export function computeShimmerSegments(
text: string,
glimmerIndex: number,
): { before: string; shimmer: string; after: string } {
const messageWidth = stringWidth(text)
const shimmerStart = glimmerIndex - 1
const shimmerEnd = glimmerIndex + 1
// When shimmer is offscreen, return all text as "before"
if (shimmerStart >= messageWidth || shimmerEnd < 0) {
return { before: text, shimmer: '', after: '' }
}
// Split into at most 3 segments by visual column position
const clampedStart = Math.max(0, shimmerStart)
let colPos = 0
let before = ''
let shimmer = ''
let after = ''
for (const { segment } of getGraphemeSegmenter().segment(text)) {
const segWidth = stringWidth(segment)
if (colPos + segWidth <= clampedStart) {
before += segment
} else if (colPos > shimmerEnd) {
after += segment
} else {
shimmer += segment
}
colPos += segWidth
}
return { before, shimmer, after }
}
/** Computed bridge status label and color from connection state. */
export type BridgeStatusInfo = {
label:
| 'Remote Control failed'
| 'Remote Control reconnecting'
| 'Remote Control active'
| 'Remote Control connecting\u2026'
color: 'error' | 'warning' | 'success'
}
/** Derive a status label and color from the bridge connection state. */
export function getBridgeStatus({
error,
connected,
sessionActive,
reconnecting,
}: {
error: string | undefined
connected: boolean
sessionActive: boolean
reconnecting: boolean
}): BridgeStatusInfo {
if (error) return { label: 'Remote Control failed', color: 'error' }
if (reconnecting)
return { label: 'Remote Control reconnecting', color: 'warning' }
if (sessionActive || connected)
return { label: 'Remote Control active', color: 'success' }
return { label: 'Remote Control connecting\u2026', color: 'warning' }
}
/** Footer text shown when bridge is idle (Ready state). */
export function buildIdleFooterText(url: string): string {
return `Code everywhere with the Claude app or ${url}`
}
/** Footer text shown when a session is active (Connected state). */
export function buildActiveFooterText(url: string): string {
return `Continue coding in the Claude app or ${url}`
}
/** Footer text shown when the bridge has failed. */
export const FAILED_FOOTER_TEXT = 'Something went wrong, please try again'
/**
* Wrap text in an OSC 8 terminal hyperlink. Zero visual width for layout purposes.
* strip-ansi (used by stringWidth) correctly strips these sequences, so
* countVisualLines in bridgeUI.ts remains accurate.
*/
export function wrapWithOsc8Link(text: string, url: string): string {
return `\x1b]8;;${url}\x07${text}\x1b]8;;\x07`
}
================================================
FILE: restored-src/src/bridge/bridgeUI.ts
================================================
import chalk from 'chalk'
import { toString as qrToString } from 'qrcode'
import {
BRIDGE_FAILED_INDICATOR,
BRIDGE_READY_INDICATOR,
BRIDGE_SPINNER_FRAMES,
} from '../constants/figures.js'
import { stringWidth } from '../ink/stringWidth.js'
import { logForDebugging } from '../utils/debug.js'
import {
buildActiveFooterText,
buildBridgeConnectUrl,
buildBridgeSessionUrl,
buildIdleFooterText,
FAILED_FOOTER_TEXT,
formatDuration,
type StatusState,
TOOL_DISPLAY_EXPIRY_MS,
timestamp,
truncatePrompt,
wrapWithOsc8Link,
} from './bridgeStatusUtil.js'
import type {
BridgeConfig,
BridgeLogger,
SessionActivity,
SpawnMode,
} from './types.js'
const QR_OPTIONS = {
type: 'utf8' as const,
errorCorrectionLevel: 'L' as const,
small: true,
}
/** Generate a QR code and return its lines. */
async function generateQr(url: string): Promise {
const qr = await qrToString(url, QR_OPTIONS)
return qr.split('\n').filter((line: string) => line.length > 0)
}
export function createBridgeLogger(options: {
verbose: boolean
write?: (s: string) => void
}): BridgeLogger {
const write = options.write ?? ((s: string) => process.stdout.write(s))
const verbose = options.verbose
// Track how many status lines are currently displayed at the bottom
let statusLineCount = 0
// Status state machine
let currentState: StatusState = 'idle'
let currentStateText = 'Ready'
let repoName = ''
let branch = ''
let debugLogPath = ''
// Connect URL (built in printBanner with correct base for staging/prod)
let connectUrl = ''
let cachedIngressUrl = ''
let cachedEnvironmentId = ''
let activeSessionUrl: string | null = null
// QR code lines for the current URL
let qrLines: string[] = []
let qrVisible = false
// Tool activity for the second status line
let lastToolSummary: string | null = null
let lastToolTime = 0
// Session count indicator (shown when multi-session mode is enabled)
let sessionActive = 0
let sessionMax = 1
// Spawn mode shown in the session-count line + gates the `w` hint
let spawnModeDisplay: 'same-dir' | 'worktree' | null = null
let spawnMode: SpawnMode = 'single-session'
// Per-session display info for the multi-session bullet list (keyed by compat sessionId)
const sessionDisplayInfo = new Map<
string,
{ title?: string; url: string; activity?: SessionActivity }
>()
// Connecting spinner state
let connectingTimer: ReturnType | null = null
let connectingTick = 0
/**
* Count how many visual terminal rows a string occupies, accounting for
* line wrapping. Each `\n` is one row, and content wider than the terminal
* wraps to additional rows.
*/
function countVisualLines(text: string): number {
// eslint-disable-next-line custom-rules/prefer-use-terminal-size
const cols = process.stdout.columns || 80 // non-React CLI context
let count = 0
// Split on newlines to get logical lines
for (const logical of text.split('\n')) {
if (logical.length === 0) {
// Empty segment between consecutive \n — counts as 1 row
count++
continue
}
const width = stringWidth(logical)
count += Math.max(1, Math.ceil(width / cols))
}
// The trailing \n in "line\n" produces an empty last element — don't count it
// because the cursor sits at the start of the next line, not a new visual row.
if (text.endsWith('\n')) {
count--
}
return count
}
/** Write a status line and track its visual line count. */
function writeStatus(text: string): void {
write(text)
statusLineCount += countVisualLines(text)
}
/** Clear any currently displayed status lines. */
function clearStatusLines(): void {
if (statusLineCount <= 0) return
logForDebugging(`[bridge:ui] clearStatusLines count=${statusLineCount}`)
// Move cursor up to the start of the status block, then erase everything below
write(`\x1b[${statusLineCount}A`) // cursor up N lines
write('\x1b[J') // erase from cursor to end of screen
statusLineCount = 0
}
/** Print a permanent log line, clearing status first and restoring after. */
function printLog(line: string): void {
clearStatusLines()
write(line)
}
/** Regenerate the QR code with the given URL. */
function regenerateQr(url: string): void {
generateQr(url)
.then(lines => {
qrLines = lines
renderStatusLine()
})
.catch(e => {
logForDebugging(`QR code generation failed: ${e}`, { level: 'error' })
})
}
/** Render the connecting spinner line (shown before first updateIdleStatus). */
function renderConnectingLine(): void {
clearStatusLines()
const frame =
BRIDGE_SPINNER_FRAMES[connectingTick % BRIDGE_SPINNER_FRAMES.length]!
let suffix = ''
if (repoName) {
suffix += chalk.dim(' \u00b7 ') + chalk.dim(repoName)
}
if (branch) {
suffix += chalk.dim(' \u00b7 ') + chalk.dim(branch)
}
writeStatus(
`${chalk.yellow(frame)} ${chalk.yellow('Connecting')}${suffix}\n`,
)
}
/** Start the connecting spinner. Stopped by first updateIdleStatus(). */
function startConnecting(): void {
stopConnecting()
renderConnectingLine()
connectingTimer = setInterval(() => {
connectingTick++
renderConnectingLine()
}, 150)
}
/** Stop the connecting spinner. */
function stopConnecting(): void {
if (connectingTimer) {
clearInterval(connectingTimer)
connectingTimer = null
}
}
/** Render and write the current status lines based on state. */
function renderStatusLine(): void {
if (currentState === 'reconnecting' || currentState === 'failed') {
// These states are handled separately (updateReconnectingStatus /
// updateFailedStatus). Return before clearing so callers like toggleQr
// and setSpawnModeDisplay don't blank the display during these states.
return
}
clearStatusLines()
const isIdle = currentState === 'idle'
// QR code above the status line
if (qrVisible) {
for (const line of qrLines) {
writeStatus(`${chalk.dim(line)}\n`)
}
}
// Determine indicator and colors based on state
const indicator = BRIDGE_READY_INDICATOR
const indicatorColor = isIdle ? chalk.green : chalk.cyan
const baseColor = isIdle ? chalk.green : chalk.cyan
const stateText = baseColor(currentStateText)
// Build the suffix with repo and branch
let suffix = ''
if (repoName) {
suffix += chalk.dim(' \u00b7 ') + chalk.dim(repoName)
}
// In worktree mode each session gets its own branch, so showing the
// bridge's branch would be misleading.
if (branch && spawnMode !== 'worktree') {
suffix += chalk.dim(' \u00b7 ') + chalk.dim(branch)
}
if (process.env.USER_TYPE === 'ant' && debugLogPath) {
writeStatus(
`${chalk.yellow('[ANT-ONLY] Logs:')} ${chalk.dim(debugLogPath)}\n`,
)
}
writeStatus(`${indicatorColor(indicator)} ${stateText}${suffix}\n`)
// Session count and per-session list (multi-session mode only)
if (sessionMax > 1) {
const modeHint =
spawnMode === 'worktree'
? 'New sessions will be created in an isolated worktree'
: 'New sessions will be created in the current directory'
writeStatus(
` ${chalk.dim(`Capacity: ${sessionActive}/${sessionMax} \u00b7 ${modeHint}`)}\n`,
)
for (const [, info] of sessionDisplayInfo) {
const titleText = info.title
? truncatePrompt(info.title, 35)
: chalk.dim('Attached')
const titleLinked = wrapWithOsc8Link(titleText, info.url)
const act = info.activity
const showAct = act && act.type !== 'result' && act.type !== 'error'
const actText = showAct
? chalk.dim(` ${truncatePrompt(act.summary, 40)}`)
: ''
writeStatus(` ${titleLinked}${actText}
`)
}
}
// Mode line for spawn modes with a single slot (or true single-session mode)
if (sessionMax === 1) {
const modeText =
spawnMode === 'single-session'
? 'Single session \u00b7 exits when complete'
: spawnMode === 'worktree'
? `Capacity: ${sessionActive}/1 \u00b7 New sessions will be created in an isolated worktree`
: `Capacity: ${sessionActive}/1 \u00b7 New sessions will be created in the current directory`
writeStatus(` ${chalk.dim(modeText)}\n`)
}
// Tool activity line for single-session mode
if (
sessionMax === 1 &&
!isIdle &&
lastToolSummary &&
Date.now() - lastToolTime < TOOL_DISPLAY_EXPIRY_MS
) {
writeStatus(` ${chalk.dim(truncatePrompt(lastToolSummary, 60))}\n`)
}
// Blank line separator before footer
const url = activeSessionUrl ?? connectUrl
if (url) {
writeStatus('\n')
const footerText = isIdle
? buildIdleFooterText(url)
: buildActiveFooterText(url)
const qrHint = qrVisible
? chalk.dim.italic('space to hide QR code')
: chalk.dim.italic('space to show QR code')
const toggleHint = spawnModeDisplay
? chalk.dim.italic(' \u00b7 w to toggle spawn mode')
: ''
writeStatus(`${chalk.dim(footerText)}\n`)
writeStatus(`${qrHint}${toggleHint}\n`)
}
}
return {
printBanner(config: BridgeConfig, environmentId: string): void {
cachedIngressUrl = config.sessionIngressUrl
cachedEnvironmentId = environmentId
connectUrl = buildBridgeConnectUrl(environmentId, cachedIngressUrl)
regenerateQr(connectUrl)
if (verbose) {
write(chalk.dim(`Remote Control`) + ` v${MACRO.VERSION}\n`)
}
if (verbose) {
if (config.spawnMode !== 'single-session') {
write(chalk.dim(`Spawn mode: `) + `${config.spawnMode}\n`)
write(
chalk.dim(`Max concurrent sessions: `) + `${config.maxSessions}\n`,
)
}
write(chalk.dim(`Environment ID: `) + `${environmentId}\n`)
}
if (config.sandbox) {
write(chalk.dim(`Sandbox: `) + `${chalk.green('Enabled')}\n`)
}
write('\n')
// Start connecting spinner — first updateIdleStatus() will stop it
startConnecting()
},
logSessionStart(sessionId: string, prompt: string): void {
if (verbose) {
const short = truncatePrompt(prompt, 80)
printLog(
chalk.dim(`[${timestamp()}]`) +
` Session started: ${chalk.white(`"${short}"`)} (${chalk.dim(sessionId)})\n`,
)
}
},
logSessionComplete(sessionId: string, durationMs: number): void {
printLog(
chalk.dim(`[${timestamp()}]`) +
` Session ${chalk.green('completed')} (${formatDuration(durationMs)}) ${chalk.dim(sessionId)}\n`,
)
},
logSessionFailed(sessionId: string, error: string): void {
printLog(
chalk.dim(`[${timestamp()}]`) +
` Session ${chalk.red('failed')}: ${error} ${chalk.dim(sessionId)}\n`,
)
},
logStatus(message: string): void {
printLog(chalk.dim(`[${timestamp()}]`) + ` ${message}\n`)
},
logVerbose(message: string): void {
if (verbose) {
printLog(chalk.dim(`[${timestamp()}] ${message}`) + '\n')
}
},
logError(message: string): void {
printLog(chalk.red(`[${timestamp()}] Error: ${message}`) + '\n')
},
logReconnected(disconnectedMs: number): void {
printLog(
chalk.dim(`[${timestamp()}]`) +
` ${chalk.green('Reconnected')} after ${formatDuration(disconnectedMs)}\n`,
)
},
setRepoInfo(repo: string, branchName: string): void {
repoName = repo
branch = branchName
},
setDebugLogPath(path: string): void {
debugLogPath = path
},
updateIdleStatus(): void {
stopConnecting()
currentState = 'idle'
currentStateText = 'Ready'
lastToolSummary = null
lastToolTime = 0
activeSessionUrl = null
regenerateQr(connectUrl)
renderStatusLine()
},
setAttached(sessionId: string): void {
stopConnecting()
currentState = 'attached'
currentStateText = 'Connected'
lastToolSummary = null
lastToolTime = 0
// Multi-session: keep footer/QR on the environment connect URL so users
// can spawn more sessions. Per-session links are in the bullet list.
if (sessionMax <= 1) {
activeSessionUrl = buildBridgeSessionUrl(
sessionId,
cachedEnvironmentId,
cachedIngressUrl,
)
regenerateQr(activeSessionUrl)
}
renderStatusLine()
},
updateReconnectingStatus(delayStr: string, elapsedStr: string): void {
stopConnecting()
clearStatusLines()
currentState = 'reconnecting'
// QR code above the status line
if (qrVisible) {
for (const line of qrLines) {
writeStatus(`${chalk.dim(line)}\n`)
}
}
const frame =
BRIDGE_SPINNER_FRAMES[connectingTick % BRIDGE_SPINNER_FRAMES.length]!
connectingTick++
writeStatus(
`${chalk.yellow(frame)} ${chalk.yellow('Reconnecting')} ${chalk.dim('\u00b7')} ${chalk.dim(`retrying in ${delayStr}`)} ${chalk.dim('\u00b7')} ${chalk.dim(`disconnected ${elapsedStr}`)}\n`,
)
},
updateFailedStatus(error: string): void {
stopConnecting()
clearStatusLines()
currentState = 'failed'
let suffix = ''
if (repoName) {
suffix += chalk.dim(' \u00b7 ') + chalk.dim(repoName)
}
if (branch) {
suffix += chalk.dim(' \u00b7 ') + chalk.dim(branch)
}
writeStatus(
`${chalk.red(BRIDGE_FAILED_INDICATOR)} ${chalk.red('Remote Control Failed')}${suffix}\n`,
)
writeStatus(`${chalk.dim(FAILED_FOOTER_TEXT)}\n`)
if (error) {
writeStatus(`${chalk.red(error)}\n`)
}
},
updateSessionStatus(
_sessionId: string,
_elapsed: string,
activity: SessionActivity,
_trail: string[],
): void {
// Cache tool activity for the second status line
if (activity.type === 'tool_start') {
lastToolSummary = activity.summary
lastToolTime = Date.now()
}
renderStatusLine()
},
clearStatus(): void {
stopConnecting()
clearStatusLines()
},
toggleQr(): void {
qrVisible = !qrVisible
renderStatusLine()
},
updateSessionCount(active: number, max: number, mode: SpawnMode): void {
if (sessionActive === active && sessionMax === max && spawnMode === mode)
return
sessionActive = active
sessionMax = max
spawnMode = mode
// Don't re-render here — the status ticker calls renderStatusLine
// on its own cadence, and the next tick will pick up the new values.
},
setSpawnModeDisplay(mode: 'same-dir' | 'worktree' | null): void {
if (spawnModeDisplay === mode) return
spawnModeDisplay = mode
// Also sync the #21118-added spawnMode so the next render shows correct
// mode hint + branch visibility. Don't render here — matches
// updateSessionCount: called before printBanner (initial setup) and
// again from the `w` handler (which follows with refreshDisplay).
if (mode) spawnMode = mode
},
addSession(sessionId: string, url: string): void {
sessionDisplayInfo.set(sessionId, { url })
},
updateSessionActivity(sessionId: string, activity: SessionActivity): void {
const info = sessionDisplayInfo.get(sessionId)
if (!info) return
info.activity = activity
},
setSessionTitle(sessionId: string, title: string): void {
const info = sessionDisplayInfo.get(sessionId)
if (!info) return
info.title = title
// Guard against reconnecting/failed — renderStatusLine clears then returns
// early for those states, which would erase the spinner/error.
if (currentState === 'reconnecting' || currentState === 'failed') return
if (sessionMax === 1) {
// Single-session: show title in the main status line too.
currentState = 'titled'
currentStateText = truncatePrompt(title, 40)
}
renderStatusLine()
},
removeSession(sessionId: string): void {
sessionDisplayInfo.delete(sessionId)
},
refreshDisplay(): void {
// Skip during reconnecting/failed — renderStatusLine clears then returns
// early for those states, which would erase the spinner/error.
if (currentState === 'reconnecting' || currentState === 'failed') return
renderStatusLine()
},
}
}
================================================
FILE: restored-src/src/bridge/capacityWake.ts
================================================
/**
* Shared capacity-wake primitive for bridge poll loops.
*
* Both replBridge.ts and bridgeMain.ts need to sleep while "at capacity"
* but wake early when either (a) the outer loop signal aborts (shutdown),
* or (b) capacity frees up (session done / transport lost). This module
* encapsulates the mutable wake-controller + two-signal merger that both
* poll loops previously duplicated byte-for-byte.
*/
export type CapacitySignal = { signal: AbortSignal; cleanup: () => void }
export type CapacityWake = {
/**
* Create a signal that aborts when either the outer loop signal or the
* capacity-wake controller fires. Returns the merged signal and a cleanup
* function that removes listeners when the sleep resolves normally
* (without abort).
*/
signal(): CapacitySignal
/**
* Abort the current at-capacity sleep and arm a fresh controller so the
* poll loop immediately re-checks for new work.
*/
wake(): void
}
export function createCapacityWake(outerSignal: AbortSignal): CapacityWake {
let wakeController = new AbortController()
function wake(): void {
wakeController.abort()
wakeController = new AbortController()
}
function signal(): CapacitySignal {
const merged = new AbortController()
const abort = (): void => merged.abort()
if (outerSignal.aborted || wakeController.signal.aborted) {
merged.abort()
return { signal: merged.signal, cleanup: () => {} }
}
outerSignal.addEventListener('abort', abort, { once: true })
const capSig = wakeController.signal
capSig.addEventListener('abort', abort, { once: true })
return {
signal: merged.signal,
cleanup: () => {
outerSignal.removeEventListener('abort', abort)
capSig.removeEventListener('abort', abort)
},
}
}
return { signal, wake }
}
================================================
FILE: restored-src/src/bridge/codeSessionApi.ts
================================================
/**
* Thin HTTP wrappers for the CCR v2 code-session API.
*
* Separate file from remoteBridgeCore.ts so the SDK /bridge subpath can
* export createCodeSession + fetchRemoteCredentials without bundling the
* heavy CLI tree (analytics, transport, etc.). Callers supply explicit
* accessToken + baseUrl — no implicit auth or config reads.
*/
import axios from 'axios'
import { logForDebugging } from '../utils/debug.js'
import { errorMessage } from '../utils/errors.js'
import { jsonStringify } from '../utils/slowOperations.js'
import { extractErrorDetail } from './debugUtils.js'
const ANTHROPIC_VERSION = '2023-06-01'
function oauthHeaders(accessToken: string): Record {
return {
Authorization: `Bearer ${accessToken}`,
'Content-Type': 'application/json',
'anthropic-version': ANTHROPIC_VERSION,
}
}
export async function createCodeSession(
baseUrl: string,
accessToken: string,
title: string,
timeoutMs: number,
tags?: string[],
): Promise {
const url = `${baseUrl}/v1/code/sessions`
let response
try {
response = await axios.post(
url,
// bridge: {} is the positive signal for the oneof runner — omitting it
// (or sending environment_id: "") now 400s. BridgeRunner is an empty
// message today; it's a placeholder for future bridge-specific options.
{ title, bridge: {}, ...(tags?.length ? { tags } : {}) },
{
headers: oauthHeaders(accessToken),
timeout: timeoutMs,
validateStatus: s => s < 500,
},
)
} catch (err: unknown) {
logForDebugging(
`[code-session] Session create request failed: ${errorMessage(err)}`,
)
return null
}
if (response.status !== 200 && response.status !== 201) {
const detail = extractErrorDetail(response.data)
logForDebugging(
`[code-session] Session create failed ${response.status}${detail ? `: ${detail}` : ''}`,
)
return null
}
const data: unknown = response.data
if (
!data ||
typeof data !== 'object' ||
!('session' in data) ||
!data.session ||
typeof data.session !== 'object' ||
!('id' in data.session) ||
typeof data.session.id !== 'string' ||
!data.session.id.startsWith('cse_')
) {
logForDebugging(
`[code-session] No session.id (cse_*) in response: ${jsonStringify(data).slice(0, 200)}`,
)
return null
}
return data.session.id
}
/**
* Credentials from POST /bridge. JWT is opaque — do not decode.
* Each /bridge call bumps worker_epoch server-side (it IS the register).
*/
export type RemoteCredentials = {
worker_jwt: string
api_base_url: string
expires_in: number
worker_epoch: number
}
export async function fetchRemoteCredentials(
sessionId: string,
baseUrl: string,
accessToken: string,
timeoutMs: number,
trustedDeviceToken?: string,
): Promise {
const url = `${baseUrl}/v1/code/sessions/${sessionId}/bridge`
const headers = oauthHeaders(accessToken)
if (trustedDeviceToken) {
headers['X-Trusted-Device-Token'] = trustedDeviceToken
}
let response
try {
response = await axios.post(
url,
{},
{
headers,
timeout: timeoutMs,
validateStatus: s => s < 500,
},
)
} catch (err: unknown) {
logForDebugging(
`[code-session] /bridge request failed: ${errorMessage(err)}`,
)
return null
}
if (response.status !== 200) {
const detail = extractErrorDetail(response.data)
logForDebugging(
`[code-session] /bridge failed ${response.status}${detail ? `: ${detail}` : ''}`,
)
return null
}
const data: unknown = response.data
if (
data === null ||
typeof data !== 'object' ||
!('worker_jwt' in data) ||
typeof data.worker_jwt !== 'string' ||
!('expires_in' in data) ||
typeof data.expires_in !== 'number' ||
!('api_base_url' in data) ||
typeof data.api_base_url !== 'string' ||
!('worker_epoch' in data)
) {
logForDebugging(
`[code-session] /bridge response malformed (need worker_jwt, expires_in, api_base_url, worker_epoch): ${jsonStringify(data).slice(0, 200)}`,
)
return null
}
// protojson serializes int64 as a string to avoid JS precision loss;
// Go may also return a number depending on encoder settings.
const rawEpoch = data.worker_epoch
const epoch = typeof rawEpoch === 'string' ? Number(rawEpoch) : rawEpoch
if (
typeof epoch !== 'number' ||
!Number.isFinite(epoch) ||
!Number.isSafeInteger(epoch)
) {
logForDebugging(
`[code-session] /bridge worker_epoch invalid: ${jsonStringify(rawEpoch)}`,
)
return null
}
return {
worker_jwt: data.worker_jwt,
api_base_url: data.api_base_url,
expires_in: data.expires_in,
worker_epoch: epoch,
}
}
================================================
FILE: restored-src/src/bridge/createSession.ts
================================================
import type { SDKMessage } from '../entrypoints/agentSdkTypes.js'
import { logForDebugging } from '../utils/debug.js'
import { errorMessage } from '../utils/errors.js'
import { extractErrorDetail } from './debugUtils.js'
import { toCompatSessionId } from './sessionIdCompat.js'
type GitSource = {
type: 'git_repository'
url: string
revision?: string
}
type GitOutcome = {
type: 'git_repository'
git_info: { type: 'github'; repo: string; branches: string[] }
}
// Events must be wrapped in { type: 'event', data: } for the
// POST /v1/sessions endpoint (discriminated union format).
type SessionEvent = {
type: 'event'
data: SDKMessage
}
/**
* Create a session on a bridge environment via POST /v1/sessions.
*
* Used by both `claude remote-control` (empty session so the user has somewhere to
* type immediately) and `/remote-control` (session pre-populated with conversation
* history).
*
* Returns the session ID on success, or null if creation fails (non-fatal).
*/
export async function createBridgeSession({
environmentId,
title,
events,
gitRepoUrl,
branch,
signal,
baseUrl: baseUrlOverride,
getAccessToken,
permissionMode,
}: {
environmentId: string
title?: string
events: SessionEvent[]
gitRepoUrl: string | null
branch: string
signal: AbortSignal
baseUrl?: string
getAccessToken?: () => string | undefined
permissionMode?: string
}): Promise {
const { getClaudeAIOAuthTokens } = await import('../utils/auth.js')
const { getOrganizationUUID } = await import('../services/oauth/client.js')
const { getOauthConfig } = await import('../constants/oauth.js')
const { getOAuthHeaders } = await import('../utils/teleport/api.js')
const { parseGitHubRepository } = await import('../utils/detectRepository.js')
const { getDefaultBranch } = await import('../utils/git.js')
const { getMainLoopModel } = await import('../utils/model/model.js')
const { default: axios } = await import('axios')
const accessToken =
getAccessToken?.() ?? getClaudeAIOAuthTokens()?.accessToken
if (!accessToken) {
logForDebugging('[bridge] No access token for session creation')
return null
}
const orgUUID = await getOrganizationUUID()
if (!orgUUID) {
logForDebugging('[bridge] No org UUID for session creation')
return null
}
// Build git source and outcome context
let gitSource: GitSource | null = null
let gitOutcome: GitOutcome | null = null
if (gitRepoUrl) {
const { parseGitRemote } = await import('../utils/detectRepository.js')
const parsed = parseGitRemote(gitRepoUrl)
if (parsed) {
const { host, owner, name } = parsed
const revision = branch || (await getDefaultBranch()) || undefined
gitSource = {
type: 'git_repository',
url: `https://${host}/${owner}/${name}`,
revision,
}
gitOutcome = {
type: 'git_repository',
git_info: {
type: 'github',
repo: `${owner}/${name}`,
branches: [`claude/${branch || 'task'}`],
},
}
} else {
// Fallback: try parseGitHubRepository for owner/repo format
const ownerRepo = parseGitHubRepository(gitRepoUrl)
if (ownerRepo) {
const [owner, name] = ownerRepo.split('/')
if (owner && name) {
const revision = branch || (await getDefaultBranch()) || undefined
gitSource = {
type: 'git_repository',
url: `https://github.com/${owner}/${name}`,
revision,
}
gitOutcome = {
type: 'git_repository',
git_info: {
type: 'github',
repo: `${owner}/${name}`,
branches: [`claude/${branch || 'task'}`],
},
}
}
}
}
}
const requestBody = {
...(title !== undefined && { title }),
events,
session_context: {
sources: gitSource ? [gitSource] : [],
outcomes: gitOutcome ? [gitOutcome] : [],
model: getMainLoopModel(),
},
environment_id: environmentId,
source: 'remote-control',
...(permissionMode && { permission_mode: permissionMode }),
}
const headers = {
...getOAuthHeaders(accessToken),
'anthropic-beta': 'ccr-byoc-2025-07-29',
'x-organization-uuid': orgUUID,
}
const url = `${baseUrlOverride ?? getOauthConfig().BASE_API_URL}/v1/sessions`
let response
try {
response = await axios.post(url, requestBody, {
headers,
signal,
validateStatus: s => s < 500,
})
} catch (err: unknown) {
logForDebugging(
`[bridge] Session creation request failed: ${errorMessage(err)}`,
)
return null
}
const isSuccess = response.status === 200 || response.status === 201
if (!isSuccess) {
const detail = extractErrorDetail(response.data)
logForDebugging(
`[bridge] Session creation failed with status ${response.status}${detail ? `: ${detail}` : ''}`,
)
return null
}
const sessionData: unknown = response.data
if (
!sessionData ||
typeof sessionData !== 'object' ||
!('id' in sessionData) ||
typeof sessionData.id !== 'string'
) {
logForDebugging('[bridge] No session ID in response')
return null
}
return sessionData.id
}
/**
* Fetch a bridge session via GET /v1/sessions/{id}.
*
* Returns the session's environment_id (for `--session-id` resume) and title.
* Uses the same org-scoped headers as create/archive — the environments-level
* client in bridgeApi.ts uses a different beta header and no org UUID, which
* makes the Sessions API return 404.
*/
export async function getBridgeSession(
sessionId: string,
opts?: { baseUrl?: string; getAccessToken?: () => string | undefined },
): Promise<{ environment_id?: string; title?: string } | null> {
const { getClaudeAIOAuthTokens } = await import('../utils/auth.js')
const { getOrganizationUUID } = await import('../services/oauth/client.js')
const { getOauthConfig } = await import('../constants/oauth.js')
const { getOAuthHeaders } = await import('../utils/teleport/api.js')
const { default: axios } = await import('axios')
const accessToken =
opts?.getAccessToken?.() ?? getClaudeAIOAuthTokens()?.accessToken
if (!accessToken) {
logForDebugging('[bridge] No access token for session fetch')
return null
}
const orgUUID = await getOrganizationUUID()
if (!orgUUID) {
logForDebugging('[bridge] No org UUID for session fetch')
return null
}
const headers = {
...getOAuthHeaders(accessToken),
'anthropic-beta': 'ccr-byoc-2025-07-29',
'x-organization-uuid': orgUUID,
}
const url = `${opts?.baseUrl ?? getOauthConfig().BASE_API_URL}/v1/sessions/${sessionId}`
logForDebugging(`[bridge] Fetching session ${sessionId}`)
let response
try {
response = await axios.get<{ environment_id?: string; title?: string }>(
url,
{ headers, timeout: 10_000, validateStatus: s => s < 500 },
)
} catch (err: unknown) {
logForDebugging(
`[bridge] Session fetch request failed: ${errorMessage(err)}`,
)
return null
}
if (response.status !== 200) {
const detail = extractErrorDetail(response.data)
logForDebugging(
`[bridge] Session fetch failed with status ${response.status}${detail ? `: ${detail}` : ''}`,
)
return null
}
return response.data
}
/**
* Archive a bridge session via POST /v1/sessions/{id}/archive.
*
* The CCR server never auto-archives sessions — archival is always an
* explicit client action. Both `claude remote-control` (standalone bridge) and the
* always-on `/remote-control` REPL bridge call this during shutdown to archive any
* sessions that are still alive.
*
* The archive endpoint accepts sessions in any status (running, idle,
* requires_action, pending) and returns 409 if already archived, making
* it safe to call even if the server-side runner already archived the
* session.
*
* Callers must handle errors — this function has no try/catch; 5xx,
* timeouts, and network errors throw. Archival is best-effort during
* cleanup; call sites wrap with .catch().
*/
export async function archiveBridgeSession(
sessionId: string,
opts?: {
baseUrl?: string
getAccessToken?: () => string | undefined
timeoutMs?: number
},
): Promise {
const { getClaudeAIOAuthTokens } = await import('../utils/auth.js')
const { getOrganizationUUID } = await import('../services/oauth/client.js')
const { getOauthConfig } = await import('../constants/oauth.js')
const { getOAuthHeaders } = await import('../utils/teleport/api.js')
const { default: axios } = await import('axios')
const accessToken =
opts?.getAccessToken?.() ?? getClaudeAIOAuthTokens()?.accessToken
if (!accessToken) {
logForDebugging('[bridge] No access token for session archive')
return
}
const orgUUID = await getOrganizationUUID()
if (!orgUUID) {
logForDebugging('[bridge] No org UUID for session archive')
return
}
const headers = {
...getOAuthHeaders(accessToken),
'anthropic-beta': 'ccr-byoc-2025-07-29',
'x-organization-uuid': orgUUID,
}
const url = `${opts?.baseUrl ?? getOauthConfig().BASE_API_URL}/v1/sessions/${sessionId}/archive`
logForDebugging(`[bridge] Archiving session ${sessionId}`)
const response = await axios.post(
url,
{},
{
headers,
timeout: opts?.timeoutMs ?? 10_000,
validateStatus: s => s < 500,
},
)
if (response.status === 200) {
logForDebugging(`[bridge] Session ${sessionId} archived successfully`)
} else {
const detail = extractErrorDetail(response.data)
logForDebugging(
`[bridge] Session archive failed with status ${response.status}${detail ? `: ${detail}` : ''}`,
)
}
}
/**
* Update the title of a bridge session via PATCH /v1/sessions/{id}.
*
* Called when the user renames a session via /rename while a bridge
* connection is active, so the title stays in sync on claude.ai/code.
*
* Errors are swallowed — title sync is best-effort.
*/
export async function updateBridgeSessionTitle(
sessionId: string,
title: string,
opts?: { baseUrl?: string; getAccessToken?: () => string | undefined },
): Promise {
const { getClaudeAIOAuthTokens } = await import('../utils/auth.js')
const { getOrganizationUUID } = await import('../services/oauth/client.js')
const { getOauthConfig } = await import('../constants/oauth.js')
const { getOAuthHeaders } = await import('../utils/teleport/api.js')
const { default: axios } = await import('axios')
const accessToken =
opts?.getAccessToken?.() ?? getClaudeAIOAuthTokens()?.accessToken
if (!accessToken) {
logForDebugging('[bridge] No access token for session title update')
return
}
const orgUUID = await getOrganizationUUID()
if (!orgUUID) {
logForDebugging('[bridge] No org UUID for session title update')
return
}
const headers = {
...getOAuthHeaders(accessToken),
'anthropic-beta': 'ccr-byoc-2025-07-29',
'x-organization-uuid': orgUUID,
}
// Compat gateway only accepts session_* (compat/convert.go:27). v2 callers
// pass raw cse_*; retag here so all callers can pass whatever they hold.
// Idempotent for v1's session_* and bridgeMain's pre-converted compatSessionId.
const compatId = toCompatSessionId(sessionId)
const url = `${opts?.baseUrl ?? getOauthConfig().BASE_API_URL}/v1/sessions/${compatId}`
logForDebugging(`[bridge] Updating session title: ${compatId} → ${title}`)
try {
const response = await axios.patch(
url,
{ title },
{ headers, timeout: 10_000, validateStatus: s => s < 500 },
)
if (response.status === 200) {
logForDebugging(`[bridge] Session title updated successfully`)
} else {
const detail = extractErrorDetail(response.data)
logForDebugging(
`[bridge] Session title update failed with status ${response.status}${detail ? `: ${detail}` : ''}`,
)
}
} catch (err: unknown) {
logForDebugging(
`[bridge] Session title update request failed: ${errorMessage(err)}`,
)
}
}
================================================
FILE: restored-src/src/bridge/debugUtils.ts
================================================
import {
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
logEvent,
} from '../services/analytics/index.js'
import { logForDebugging } from '../utils/debug.js'
import { errorMessage } from '../utils/errors.js'
import { jsonStringify } from '../utils/slowOperations.js'
const DEBUG_MSG_LIMIT = 2000
const SECRET_FIELD_NAMES = [
'session_ingress_token',
'environment_secret',
'access_token',
'secret',
'token',
]
const SECRET_PATTERN = new RegExp(
`"(${SECRET_FIELD_NAMES.join('|')})"\\s*:\\s*"([^"]*)"`,
'g',
)
const REDACT_MIN_LENGTH = 16
export function redactSecrets(s: string): string {
return s.replace(SECRET_PATTERN, (_match, field: string, value: string) => {
if (value.length < REDACT_MIN_LENGTH) {
return `"${field}":"[REDACTED]"`
}
const redacted = `${value.slice(0, 8)}...${value.slice(-4)}`
return `"${field}":"${redacted}"`
})
}
/** Truncate a string for debug logging, collapsing newlines. */
export function debugTruncate(s: string): string {
const flat = s.replace(/\n/g, '\\n')
if (flat.length <= DEBUG_MSG_LIMIT) {
return flat
}
return flat.slice(0, DEBUG_MSG_LIMIT) + `... (${flat.length} chars)`
}
/** Truncate a JSON-serializable value for debug logging. */
export function debugBody(data: unknown): string {
const raw = typeof data === 'string' ? data : jsonStringify(data)
const s = redactSecrets(raw)
if (s.length <= DEBUG_MSG_LIMIT) {
return s
}
return s.slice(0, DEBUG_MSG_LIMIT) + `... (${s.length} chars)`
}
/**
* Extract a descriptive error message from an axios error (or any error).
* For HTTP errors, appends the server's response body message if available,
* since axios's default message only includes the status code.
*/
export function describeAxiosError(err: unknown): string {
const msg = errorMessage(err)
if (err && typeof err === 'object' && 'response' in err) {
const response = (err as { response?: { data?: unknown } }).response
if (response?.data && typeof response.data === 'object') {
const data = response.data as Record
const detail =
typeof data.message === 'string'
? data.message
: typeof data.error === 'object' &&
data.error &&
'message' in data.error &&
typeof (data.error as Record).message ===
'string'
? (data.error as Record).message
: undefined
if (detail) {
return `${msg}: ${detail}`
}
}
}
return msg
}
/**
* Extract the HTTP status code from an axios error, if present.
* Returns undefined for non-HTTP errors (e.g. network failures).
*/
export function extractHttpStatus(err: unknown): number | undefined {
if (
err &&
typeof err === 'object' &&
'response' in err &&
(err as { response?: { status?: unknown } }).response &&
typeof (err as { response: { status?: unknown } }).response.status ===
'number'
) {
return (err as { response: { status: number } }).response.status
}
return undefined
}
/**
* Pull a human-readable message out of an API error response body.
* Checks `data.message` first, then `data.error.message`.
*/
export function extractErrorDetail(data: unknown): string | undefined {
if (!data || typeof data !== 'object') return undefined
if ('message' in data && typeof data.message === 'string') {
return data.message
}
if (
'error' in data &&
data.error !== null &&
typeof data.error === 'object' &&
'message' in data.error &&
typeof data.error.message === 'string'
) {
return data.error.message
}
return undefined
}
/**
* Log a bridge init skip — debug message + `tengu_bridge_repl_skipped`
* analytics event. Centralizes the event name and the AnalyticsMetadata
* cast so call sites don't each repeat the 5-line boilerplate.
*/
export function logBridgeSkip(
reason: string,
debugMsg?: string,
v2?: boolean,
): void {
if (debugMsg) {
logForDebugging(debugMsg)
}
logEvent('tengu_bridge_repl_skipped', {
reason:
reason as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
...(v2 !== undefined && { v2 }),
})
}
================================================
FILE: restored-src/src/bridge/envLessBridgeConfig.ts
================================================
import { z } from 'zod/v4'
import { getFeatureValue_DEPRECATED } from '../services/analytics/growthbook.js'
import { lazySchema } from '../utils/lazySchema.js'
import { lt } from '../utils/semver.js'
import { isEnvLessBridgeEnabled } from './bridgeEnabled.js'
export type EnvLessBridgeConfig = {
// withRetry — init-phase backoff (createSession, POST /bridge, recovery /bridge)
init_retry_max_attempts: number
init_retry_base_delay_ms: number
init_retry_jitter_fraction: number
init_retry_max_delay_ms: number
// axios timeout for POST /sessions, POST /bridge, POST /archive
http_timeout_ms: number
// BoundedUUIDSet ring size (echo + re-delivery dedup)
uuid_dedup_buffer_size: number
// CCRClient worker heartbeat cadence. Server TTL is 60s — 20s gives 3× margin.
heartbeat_interval_ms: number
// ±fraction of interval — per-beat jitter to spread fleet load.
heartbeat_jitter_fraction: number
// Fire proactive JWT refresh this long before expires_in. Larger buffer =
// more frequent refresh (refresh cadence ≈ expires_in - buffer).
token_refresh_buffer_ms: number
// Archive POST timeout in teardown(). Distinct from http_timeout_ms because
// gracefulShutdown races runCleanupFunctions() against a 2s cap — a 10s
// axios timeout on a slow/stalled archive burns the whole budget on a
// request that forceExit will kill anyway.
teardown_archive_timeout_ms: number
// Deadline for onConnect after transport.connect(). If neither onConnect
// nor onClose fires before this, emit tengu_bridge_repl_connect_timeout
// — the only telemetry for the ~1% of sessions that emit `started` then
// go silent (no error, no event, just nothing).
connect_timeout_ms: number
// Semver floor for the env-less bridge path. Separate from the v1
// tengu_bridge_min_version config so a v2-specific bug can force upgrades
// without blocking v1 (env-based) clients, and vice versa.
min_version: string
// When true, tell users their claude.ai app may be too old to see v2
// sessions — lets us roll the v2 bridge before the app ships the new
// session-list query.
should_show_app_upgrade_message: boolean
}
export const DEFAULT_ENV_LESS_BRIDGE_CONFIG: EnvLessBridgeConfig = {
init_retry_max_attempts: 3,
init_retry_base_delay_ms: 500,
init_retry_jitter_fraction: 0.25,
init_retry_max_delay_ms: 4000,
http_timeout_ms: 10_000,
uuid_dedup_buffer_size: 2000,
heartbeat_interval_ms: 20_000,
heartbeat_jitter_fraction: 0.1,
token_refresh_buffer_ms: 300_000,
teardown_archive_timeout_ms: 1500,
connect_timeout_ms: 15_000,
min_version: '0.0.0',
should_show_app_upgrade_message: false,
}
// Floors reject the whole object on violation (fall back to DEFAULT) rather
// than partially trusting — same defense-in-depth as pollConfig.ts.
const envLessBridgeConfigSchema = lazySchema(() =>
z.object({
init_retry_max_attempts: z.number().int().min(1).max(10).default(3),
init_retry_base_delay_ms: z.number().int().min(100).default(500),
init_retry_jitter_fraction: z.number().min(0).max(1).default(0.25),
init_retry_max_delay_ms: z.number().int().min(500).default(4000),
http_timeout_ms: z.number().int().min(2000).default(10_000),
uuid_dedup_buffer_size: z.number().int().min(100).max(50_000).default(2000),
// Server TTL is 60s. Floor 5s prevents thrash; cap 30s keeps ≥2× margin.
heartbeat_interval_ms: z
.number()
.int()
.min(5000)
.max(30_000)
.default(20_000),
// ±fraction per beat. Cap 0.5: at max interval (30s) × 1.5 = 45s worst case,
// still under the 60s TTL.
heartbeat_jitter_fraction: z.number().min(0).max(0.5).default(0.1),
// Floor 30s prevents tight-looping. Cap 30min rejects buffer-vs-delay
// semantic inversion: ops entering expires_in-5min (the *delay until
// refresh*) instead of 5min (the *buffer before expiry*) yields
// delayMs = expires_in - buffer ≈ 5min instead of ≈4h. Both are positive
// durations so .min() alone can't distinguish; .max() catches the
// inverted value since buffer ≥ 30min is nonsensical for a multi-hour JWT.
token_refresh_buffer_ms: z
.number()
.int()
.min(30_000)
.max(1_800_000)
.default(300_000),
// Cap 2000 keeps this under gracefulShutdown's 2s cleanup race — a higher
// timeout just lies to axios since forceExit kills the socket regardless.
teardown_archive_timeout_ms: z
.number()
.int()
.min(500)
.max(2000)
.default(1500),
// Observed p99 connect is ~2-3s; 15s is ~5× headroom. Floor 5s bounds
// false-positive rate under transient slowness; cap 60s bounds how long
// a truly-stalled session stays dark.
connect_timeout_ms: z.number().int().min(5_000).max(60_000).default(15_000),
min_version: z
.string()
.refine(v => {
try {
lt(v, '0.0.0')
return true
} catch {
return false
}
})
.default('0.0.0'),
should_show_app_upgrade_message: z.boolean().default(false),
}),
)
/**
* Fetch the env-less bridge timing config from GrowthBook. Read once per
* initEnvLessBridgeCore call — config is fixed for the lifetime of a bridge
* session.
*
* Uses the blocking getter (not _CACHED_MAY_BE_STALE) because /remote-control
* runs well after GrowthBook init — initializeGrowthBook() resolves instantly,
* so there's no startup penalty, and we get the fresh in-memory remoteEval
* value instead of the stale-on-first-read disk cache. The _DEPRECATED suffix
* warns against startup-path usage, which this isn't.
*/
export async function getEnvLessBridgeConfig(): Promise {
const raw = await getFeatureValue_DEPRECATED(
'tengu_bridge_repl_v2_config',
DEFAULT_ENV_LESS_BRIDGE_CONFIG,
)
const parsed = envLessBridgeConfigSchema().safeParse(raw)
return parsed.success ? parsed.data : DEFAULT_ENV_LESS_BRIDGE_CONFIG
}
/**
* Returns an error message if the current CLI version is below the minimum
* required for the env-less (v2) bridge path, or null if the version is fine.
*
* v2 analogue of checkBridgeMinVersion() — reads from tengu_bridge_repl_v2_config
* instead of tengu_bridge_min_version so the two implementations can enforce
* independent floors.
*/
export async function checkEnvLessBridgeMinVersion(): Promise {
const cfg = await getEnvLessBridgeConfig()
if (cfg.min_version && lt(MACRO.VERSION, cfg.min_version)) {
return `Your version of Claude Code (${MACRO.VERSION}) is too old for Remote Control.\nVersion ${cfg.min_version} or higher is required. Run \`claude update\` to update.`
}
return null
}
/**
* Whether to nudge users toward upgrading their claude.ai app when a
* Remote Control session starts. True only when the v2 bridge is active
* AND the should_show_app_upgrade_message config bit is set — lets us
* roll the v2 bridge before the app ships the new session-list query.
*/
export async function shouldShowAppUpgradeMessage(): Promise {
if (!isEnvLessBridgeEnabled()) return false
const cfg = await getEnvLessBridgeConfig()
return cfg.should_show_app_upgrade_message
}
================================================
FILE: restored-src/src/bridge/flushGate.ts
================================================
/**
* State machine for gating message writes during an initial flush.
*
* When a bridge session starts, historical messages are flushed to the
* server via a single HTTP POST. During that flush, new messages must
* be queued to prevent them from arriving at the server interleaved
* with the historical messages.
*
* Lifecycle:
* start() → enqueue() returns true, items are queued
* end() → returns queued items for draining, enqueue() returns false
* drop() → discards queued items (permanent transport close)
* deactivate() → clears active flag without dropping items
* (transport replacement — new transport will drain)
*/
export class FlushGate {
private _active = false
private _pending: T[] = []
get active(): boolean {
return this._active
}
get pendingCount(): number {
return this._pending.length
}
/** Mark flush as in-progress. enqueue() will start queuing items. */
start(): void {
this._active = true
}
/**
* End the flush and return any queued items for draining.
* Caller is responsible for sending the returned items.
*/
end(): T[] {
this._active = false
return this._pending.splice(0)
}
/**
* If flush is active, queue the items and return true.
* If flush is not active, return false (caller should send directly).
*/
enqueue(...items: T[]): boolean {
if (!this._active) return false
this._pending.push(...items)
return true
}
/**
* Discard all queued items (permanent transport close).
* Returns the number of items dropped.
*/
drop(): number {
this._active = false
const count = this._pending.length
this._pending.length = 0
return count
}
/**
* Clear the active flag without dropping queued items.
* Used when the transport is replaced (onWorkReceived) — the new
* transport's flush will drain the pending items.
*/
deactivate(): void {
this._active = false
}
}
================================================
FILE: restored-src/src/bridge/inboundAttachments.ts
================================================
/**
* Resolve file_uuid attachments on inbound bridge user messages.
*
* Web composer uploads via cookie-authed /api/{org}/upload, sends file_uuid
* alongside the message. Here we fetch each via GET /api/oauth/files/{uuid}/content
* (oauth-authed, same store), write to ~/.claude/uploads/{sessionId}/, and
* return @path refs to prepend. Claude's Read tool takes it from there.
*
* Best-effort: any failure (no token, network, non-2xx, disk) logs debug and
* skips that attachment. The message still reaches Claude, just without @path.
*/
import type { ContentBlockParam } from '@anthropic-ai/sdk/resources/messages.mjs'
import axios from 'axios'
import { randomUUID } from 'crypto'
import { mkdir, writeFile } from 'fs/promises'
import { basename, join } from 'path'
import { z } from 'zod/v4'
import { getSessionId } from '../bootstrap/state.js'
import { logForDebugging } from '../utils/debug.js'
import { getClaudeConfigHomeDir } from '../utils/envUtils.js'
import { lazySchema } from '../utils/lazySchema.js'
import { getBridgeAccessToken, getBridgeBaseUrl } from './bridgeConfig.js'
const DOWNLOAD_TIMEOUT_MS = 30_000
function debug(msg: string): void {
logForDebugging(`[bridge:inbound-attach] ${msg}`)
}
const attachmentSchema = lazySchema(() =>
z.object({
file_uuid: z.string(),
file_name: z.string(),
}),
)
const attachmentsArraySchema = lazySchema(() => z.array(attachmentSchema()))
export type InboundAttachment = z.infer>
/** Pull file_attachments off a loosely-typed inbound message. */
export function extractInboundAttachments(msg: unknown): InboundAttachment[] {
if (typeof msg !== 'object' || msg === null || !('file_attachments' in msg)) {
return []
}
const parsed = attachmentsArraySchema().safeParse(msg.file_attachments)
return parsed.success ? parsed.data : []
}
/**
* Strip path components and keep only filename-safe chars. file_name comes
* from the network (web composer), so treat it as untrusted even though the
* composer controls it.
*/
function sanitizeFileName(name: string): string {
const base = basename(name).replace(/[^a-zA-Z0-9._-]/g, '_')
return base || 'attachment'
}
function uploadsDir(): string {
return join(getClaudeConfigHomeDir(), 'uploads', getSessionId())
}
/**
* Fetch + write one attachment. Returns the absolute path on success,
* undefined on any failure.
*/
async function resolveOne(att: InboundAttachment): Promise {
const token = getBridgeAccessToken()
if (!token) {
debug('skip: no oauth token')
return undefined
}
let data: Buffer
try {
// getOauthConfig() (via getBridgeBaseUrl) throws on a non-allowlisted
// CLAUDE_CODE_CUSTOM_OAUTH_URL — keep it inside the try so a bad
// FedStart URL degrades to "no @path" instead of crashing print.ts's
// reader loop (which has no catch around the await).
const url = `${getBridgeBaseUrl()}/api/oauth/files/${encodeURIComponent(att.file_uuid)}/content`
const response = await axios.get(url, {
headers: { Authorization: `Bearer ${token}` },
responseType: 'arraybuffer',
timeout: DOWNLOAD_TIMEOUT_MS,
validateStatus: () => true,
})
if (response.status !== 200) {
debug(`fetch ${att.file_uuid} failed: status=${response.status}`)
return undefined
}
data = Buffer.from(response.data)
} catch (e) {
debug(`fetch ${att.file_uuid} threw: ${e}`)
return undefined
}
// uuid-prefix makes collisions impossible across messages and within one
// (same filename, different files). 8 chars is enough — this isn't security.
const safeName = sanitizeFileName(att.file_name)
const prefix = (
att.file_uuid.slice(0, 8) || randomUUID().slice(0, 8)
).replace(/[^a-zA-Z0-9_-]/g, '_')
const dir = uploadsDir()
const outPath = join(dir, `${prefix}-${safeName}`)
try {
await mkdir(dir, { recursive: true })
await writeFile(outPath, data)
} catch (e) {
debug(`write ${outPath} failed: ${e}`)
return undefined
}
debug(`resolved ${att.file_uuid} → ${outPath} (${data.length} bytes)`)
return outPath
}
/**
* Resolve all attachments on an inbound message to a prefix string of
* @path refs. Empty string if none resolved.
*/
export async function resolveInboundAttachments(
attachments: InboundAttachment[],
): Promise {
if (attachments.length === 0) return ''
debug(`resolving ${attachments.length} attachment(s)`)
const paths = await Promise.all(attachments.map(resolveOne))
const ok = paths.filter((p): p is string => p !== undefined)
if (ok.length === 0) return ''
// Quoted form — extractAtMentionedFiles truncates unquoted @refs at the
// first space, which breaks any home dir with spaces (/Users/John Smith/).
return ok.map(p => `@"${p}"`).join(' ') + ' '
}
/**
* Prepend @path refs to content, whichever form it's in.
* Targets the LAST text block — processUserInputBase reads inputString
* from processedBlocks[processedBlocks.length - 1], so putting refs in
* block[0] means they're silently ignored for [text, image] content.
*/
export function prependPathRefs(
content: string | Array,
prefix: string,
): string | Array {
if (!prefix) return content
if (typeof content === 'string') return prefix + content
const i = content.findLastIndex(b => b.type === 'text')
if (i !== -1) {
const b = content[i]!
if (b.type === 'text') {
return [
...content.slice(0, i),
{ ...b, text: prefix + b.text },
...content.slice(i + 1),
]
}
}
// No text block — append one at the end so it's last.
return [...content, { type: 'text', text: prefix.trimEnd() }]
}
/**
* Convenience: extract + resolve + prepend. No-op when the message has no
* file_attachments field (fast path — no network, returns same reference).
*/
export async function resolveAndPrepend(
msg: unknown,
content: string | Array,
): Promise> {
const attachments = extractInboundAttachments(msg)
if (attachments.length === 0) return content
const prefix = await resolveInboundAttachments(attachments)
return prependPathRefs(content, prefix)
}
================================================
FILE: restored-src/src/bridge/inboundMessages.ts
================================================
import type {
Base64ImageSource,
ContentBlockParam,
ImageBlockParam,
} from '@anthropic-ai/sdk/resources/messages.mjs'
import type { UUID } from 'crypto'
import type { SDKMessage } from '../entrypoints/agentSdkTypes.js'
import { detectImageFormatFromBase64 } from '../utils/imageResizer.js'
/**
* Process an inbound user message from the bridge, extracting content
* and UUID for enqueueing. Supports both string content and
* ContentBlockParam[] (e.g. messages containing images).
*
* Normalizes image blocks from bridge clients that may use camelCase
* `mediaType` instead of snake_case `media_type` (mobile-apps#5825).
*
* Returns the extracted fields, or undefined if the message should be
* skipped (non-user type, missing/empty content).
*/
export function extractInboundMessageFields(
msg: SDKMessage,
):
| { content: string | Array; uuid: UUID | undefined }
| undefined {
if (msg.type !== 'user') return undefined
const content = msg.message?.content
if (!content) return undefined
if (Array.isArray(content) && content.length === 0) return undefined
const uuid =
'uuid' in msg && typeof msg.uuid === 'string'
? (msg.uuid as UUID)
: undefined
return {
content: Array.isArray(content) ? normalizeImageBlocks(content) : content,
uuid,
}
}
/**
* Normalize image content blocks from bridge clients. iOS/web clients may
* send `mediaType` (camelCase) instead of `media_type` (snake_case), or
* omit the field entirely. Without normalization, the bad block poisons
* the session — every subsequent API call fails with
* "media_type: Field required".
*
* Fast-path scan returns the original array reference when no
* normalization is needed (zero allocation on the happy path).
*/
export function normalizeImageBlocks(
blocks: Array,
): Array {
if (!blocks.some(isMalformedBase64Image)) return blocks
return blocks.map(block => {
if (!isMalformedBase64Image(block)) return block
const src = block.source as unknown as Record
const mediaType =
typeof src.mediaType === 'string' && src.mediaType
? src.mediaType
: detectImageFormatFromBase64(block.source.data)
return {
...block,
source: {
type: 'base64' as const,
media_type: mediaType as Base64ImageSource['media_type'],
data: block.source.data,
},
}
})
}
function isMalformedBase64Image(
block: ContentBlockParam,
): block is ImageBlockParam & { source: Base64ImageSource } {
if (block.type !== 'image' || block.source?.type !== 'base64') return false
return !(block.source as unknown as Record).media_type
}
================================================
FILE: restored-src/src/bridge/initReplBridge.ts
================================================
/**
* REPL-specific wrapper around initBridgeCore. Owns the parts that read
* bootstrap state — gates, cwd, session ID, git context, OAuth, title
* derivation — then delegates to the bootstrap-free core.
*
* Split out of replBridge.ts because the sessionStorage import
* (getCurrentSessionTitle) transitively pulls in src/commands.ts → the
* entire slash command + React component tree (~1300 modules). Keeping
* initBridgeCore in a file that doesn't touch sessionStorage lets
* daemonBridge.ts import the core without bloating the Agent SDK bundle.
*
* Called via dynamic import by useReplBridge (auto-start) and print.ts
* (SDK -p mode via query.enableRemoteControl).
*/
import { feature } from 'bun:bundle'
import { hostname } from 'os'
import { getOriginalCwd, getSessionId } from '../bootstrap/state.js'
import type { SDKMessage } from '../entrypoints/agentSdkTypes.js'
import type { SDKControlResponse } from '../entrypoints/sdk/controlTypes.js'
import { getFeatureValue_CACHED_WITH_REFRESH } from '../services/analytics/growthbook.js'
import { getOrganizationUUID } from '../services/oauth/client.js'
import {
isPolicyAllowed,
waitForPolicyLimitsToLoad,
} from '../services/policyLimits/index.js'
import type { Message } from '../types/message.js'
import {
checkAndRefreshOAuthTokenIfNeeded,
getClaudeAIOAuthTokens,
handleOAuth401Error,
} from '../utils/auth.js'
import { getGlobalConfig, saveGlobalConfig } from '../utils/config.js'
import { logForDebugging } from '../utils/debug.js'
import { stripDisplayTagsAllowEmpty } from '../utils/displayTags.js'
import { errorMessage } from '../utils/errors.js'
import { getBranch, getRemoteUrl } from '../utils/git.js'
import { toSDKMessages } from '../utils/messages/mappers.js'
import {
getContentText,
getMessagesAfterCompactBoundary,
isSyntheticMessage,
} from '../utils/messages.js'
import type { PermissionMode } from '../utils/permissions/PermissionMode.js'
import { getCurrentSessionTitle } from '../utils/sessionStorage.js'
import {
extractConversationText,
generateSessionTitle,
} from '../utils/sessionTitle.js'
import { generateShortWordSlug } from '../utils/words.js'
import {
getBridgeAccessToken,
getBridgeBaseUrl,
getBridgeTokenOverride,
} from './bridgeConfig.js'
import {
checkBridgeMinVersion,
isBridgeEnabledBlocking,
isCseShimEnabled,
isEnvLessBridgeEnabled,
} from './bridgeEnabled.js'
import {
archiveBridgeSession,
createBridgeSession,
updateBridgeSessionTitle,
} from './createSession.js'
import { logBridgeSkip } from './debugUtils.js'
import { checkEnvLessBridgeMinVersion } from './envLessBridgeConfig.js'
import { getPollIntervalConfig } from './pollConfig.js'
import type { BridgeState, ReplBridgeHandle } from './replBridge.js'
import { initBridgeCore } from './replBridge.js'
import { setCseShimGate } from './sessionIdCompat.js'
import type { BridgeWorkerType } from './types.js'
export type InitBridgeOptions = {
onInboundMessage?: (msg: SDKMessage) => void | Promise
onPermissionResponse?: (response: SDKControlResponse) => void
onInterrupt?: () => void
onSetModel?: (model: string | undefined) => void
onSetMaxThinkingTokens?: (maxTokens: number | null) => void
onSetPermissionMode?: (
mode: PermissionMode,
) => { ok: true } | { ok: false; error: string }
onStateChange?: (state: BridgeState, detail?: string) => void
initialMessages?: Message[]
// Explicit session name from `/remote-control `. When set, overrides
// the title derived from the conversation or /rename.
initialName?: string
// Fresh view of the full conversation at call time. Used by onUserMessage's
// count-3 derivation to call generateSessionTitle over the full conversation.
// Optional — print.ts's SDK enableRemoteControl path has no REPL message
// array; count-3 falls back to the single message text when absent.
getMessages?: () => Message[]
// UUIDs already flushed in a prior bridge session. Messages with these
// UUIDs are excluded from the initial flush to avoid poisoning the
// server (duplicate UUIDs across sessions cause the WS to be killed).
// Mutated in place — newly flushed UUIDs are added after each flush.
previouslyFlushedUUIDs?: Set
/** See BridgeCoreParams.perpetual. */
perpetual?: boolean
/**
* When true, the bridge only forwards events outbound (no SSE inbound
* stream). Used by CCR mirror mode — local sessions visible on claude.ai
* without enabling inbound control.
*/
outboundOnly?: boolean
tags?: string[]
}
export async function initReplBridge(
options?: InitBridgeOptions,
): Promise {
const {
onInboundMessage,
onPermissionResponse,
onInterrupt,
onSetModel,
onSetMaxThinkingTokens,
onSetPermissionMode,
onStateChange,
initialMessages,
getMessages,
previouslyFlushedUUIDs,
initialName,
perpetual,
outboundOnly,
tags,
} = options ?? {}
// Wire the cse_ shim kill switch so toCompatSessionId respects the
// GrowthBook gate. Daemon/SDK paths skip this — shim defaults to active.
setCseShimGate(isCseShimEnabled)
// 1. Runtime gate
if (!(await isBridgeEnabledBlocking())) {
logBridgeSkip('not_enabled', '[bridge:repl] Skipping: bridge not enabled')
return null
}
// 1b. Minimum version check — deferred to after the v1/v2 branch below,
// since each implementation has its own floor (tengu_bridge_min_version
// for v1, tengu_bridge_repl_v2_config.min_version for v2).
// 2. Check OAuth — must be signed in with claude.ai. Runs before the
// policy check so console-auth users get the actionable "/login" hint
// instead of a misleading policy error from a stale/wrong-org cache.
if (!getBridgeAccessToken()) {
logBridgeSkip('no_oauth', '[bridge:repl] Skipping: no OAuth tokens')
onStateChange?.('failed', '/login')
return null
}
// 3. Check organization policy — remote control may be disabled
await waitForPolicyLimitsToLoad()
if (!isPolicyAllowed('allow_remote_control')) {
logBridgeSkip(
'policy_denied',
'[bridge:repl] Skipping: allow_remote_control policy not allowed',
)
onStateChange?.('failed', "disabled by your organization's policy")
return null
}
// When CLAUDE_BRIDGE_OAUTH_TOKEN is set (ant-only local dev), the bridge
// uses that token directly via getBridgeAccessToken() — keychain state is
// irrelevant. Skip 2b/2c to preserve that decoupling: an expired keychain
// token shouldn't block a bridge connection that doesn't use it.
if (!getBridgeTokenOverride()) {
// 2a. Cross-process backoff. If N prior processes already saw this exact
// dead token (matched by expiresAt), skip silently — no event, no refresh
// attempt. The count threshold tolerates transient refresh failures (auth
// server 5xx, lockfile errors per auth.ts:1437/1444/1485): each process
// independently retries until 3 consecutive failures prove the token dead.
// Mirrors useReplBridge's MAX_CONSECUTIVE_INIT_FAILURES for in-process.
// The expiresAt key is content-addressed: /login → new token → new expiresAt
// → this stops matching without any explicit clear.
const cfg = getGlobalConfig()
if (
cfg.bridgeOauthDeadExpiresAt != null &&
(cfg.bridgeOauthDeadFailCount ?? 0) >= 3 &&
getClaudeAIOAuthTokens()?.expiresAt === cfg.bridgeOauthDeadExpiresAt
) {
logForDebugging(
`[bridge:repl] Skipping: cross-process backoff (dead token seen ${cfg.bridgeOauthDeadFailCount} times)`,
)
return null
}
// 2b. Proactively refresh if expired. Mirrors bridgeMain.ts:2096 — the REPL
// bridge fires at useEffect mount BEFORE any v1/messages call, making this
// usually the first OAuth request of the session. Without this, ~9% of
// registrations hit the server with a >8h-expired token → 401 → withOAuthRetry
// recovers, but the server logs a 401 we can avoid. VPN egress IPs observed
// at 30:1 401:200 when many unrelated users cluster at the 8h TTL boundary.
//
// Fresh-token cost: one memoized read + one Date.now() comparison (~µs).
// checkAndRefreshOAuthTokenIfNeeded clears its own cache in every path that
// touches the keychain (refresh success, lockfile race, throw), so no
// explicit clearOAuthTokenCache() here — that would force a blocking
// keychain spawn on the 91%+ fresh-token path.
await checkAndRefreshOAuthTokenIfNeeded()
// 2c. Skip if token is still expired post-refresh-attempt. Env-var / FD
// tokens (auth.ts:894-917) have expiresAt=null → never trip this. But a
// keychain token whose refresh token is dead (password change, org left,
// token GC'd) has expiresAt ({
...c,
bridgeOauthDeadExpiresAt: deadExpiresAt,
bridgeOauthDeadFailCount:
c.bridgeOauthDeadExpiresAt === deadExpiresAt
? (c.bridgeOauthDeadFailCount ?? 0) + 1
: 1,
}))
return null
}
}
// 4. Compute baseUrl — needed by both v1 (env-based) and v2 (env-less)
// paths. Hoisted above the v2 gate so both can use it.
const baseUrl = getBridgeBaseUrl()
// 5. Derive session title. Precedence: explicit initialName → /rename
// (session storage) → last meaningful user message → generated slug.
// Cosmetic only (claude.ai session list); the model never sees it.
// Two flags: `hasExplicitTitle` (initialName or /rename — never auto-
// overwrite) vs. `hasTitle` (any title, including auto-derived — blocks
// the count-1 re-derivation but not count-3). The onUserMessage callback
// (wired to both v1 and v2 below) derives from the 1st prompt and again
// from the 3rd so mobile/web show a title that reflects more context.
// The slug fallback (e.g. "remote-control-graceful-unicorn") makes
// auto-started sessions distinguishable in the claude.ai list before the
// first prompt.
let title = `remote-control-${generateShortWordSlug()}`
let hasTitle = false
let hasExplicitTitle = false
if (initialName) {
title = initialName
hasTitle = true
hasExplicitTitle = true
} else {
const sessionId = getSessionId()
const customTitle = sessionId
? getCurrentSessionTitle(sessionId)
: undefined
if (customTitle) {
title = customTitle
hasTitle = true
hasExplicitTitle = true
} else if (initialMessages && initialMessages.length > 0) {
// Find the last user message that has meaningful content. Skip meta
// (nudges), tool results, compact summaries ("This session is being
// continued…"), non-human origins (task notifications, channel pushes),
// and synthetic interrupts ([Request interrupted by user]) — none are
// human-authored. Same filter as extractTitleText + isSyntheticMessage.
for (let i = initialMessages.length - 1; i >= 0; i--) {
const msg = initialMessages[i]!
if (
msg.type !== 'user' ||
msg.isMeta ||
msg.toolUseResult ||
msg.isCompactSummary ||
(msg.origin && msg.origin.kind !== 'human') ||
isSyntheticMessage(msg)
)
continue
const rawContent = getContentText(msg.message.content)
if (!rawContent) continue
const derived = deriveTitle(rawContent)
if (!derived) continue
title = derived
hasTitle = true
break
}
}
}
// Shared by both v1 and v2 — fires on every title-worthy user message until
// it returns true. At count 1: deriveTitle placeholder immediately, then
// generateSessionTitle (Haiku, sentence-case) fire-and-forget upgrade. At
// count 3: re-generate over the full conversation. Skips entirely if the
// title is explicit (/remote-control or /rename) — re-checks
// sessionStorage at call time so /rename between messages isn't clobbered.
// Skips count 1 if initialMessages already derived (that title is fresh);
// still refreshes at count 3. v2 passes cse_*; updateBridgeSessionTitle
// retags internally.
let userMessageCount = 0
let lastBridgeSessionId: string | undefined
let genSeq = 0
const patch = (
derived: string,
bridgeSessionId: string,
atCount: number,
): void => {
hasTitle = true
title = derived
logForDebugging(
`[bridge:repl] derived title from message ${atCount}: ${derived}`,
)
void updateBridgeSessionTitle(bridgeSessionId, derived, {
baseUrl,
getAccessToken: getBridgeAccessToken,
}).catch(() => {})
}
// Fire-and-forget Haiku generation with post-await guards. Re-checks /rename
// (sessionStorage), v1 env-lost (lastBridgeSessionId), and same-session
// out-of-order resolution (genSeq — count-1's Haiku resolving after count-3
// would clobber the richer title). generateSessionTitle never rejects.
const generateAndPatch = (input: string, bridgeSessionId: string): void => {
const gen = ++genSeq
const atCount = userMessageCount
void generateSessionTitle(input, AbortSignal.timeout(15_000)).then(
generated => {
if (
generated &&
gen === genSeq &&
lastBridgeSessionId === bridgeSessionId &&
!getCurrentSessionTitle(getSessionId())
) {
patch(generated, bridgeSessionId, atCount)
}
},
)
}
const onUserMessage = (text: string, bridgeSessionId: string): boolean => {
if (hasExplicitTitle || getCurrentSessionTitle(getSessionId())) {
return true
}
// v1 env-lost re-creates the session with a new ID. Reset the count so
// the new session gets its own count-3 derivation; hasTitle stays true
// (new session was created via getCurrentTitle(), which reads the count-1
// title from this closure), so count-1 of the fresh cycle correctly skips.
if (
lastBridgeSessionId !== undefined &&
lastBridgeSessionId !== bridgeSessionId
) {
userMessageCount = 0
}
lastBridgeSessionId = bridgeSessionId
userMessageCount++
if (userMessageCount === 1 && !hasTitle) {
const placeholder = deriveTitle(text)
if (placeholder) patch(placeholder, bridgeSessionId, userMessageCount)
generateAndPatch(text, bridgeSessionId)
} else if (userMessageCount === 3) {
const msgs = getMessages?.()
const input = msgs
? extractConversationText(getMessagesAfterCompactBoundary(msgs))
: text
generateAndPatch(input, bridgeSessionId)
}
// Also re-latches if v1 env-lost resets the transport's done flag past 3.
return userMessageCount >= 3
}
const initialHistoryCap = getFeatureValue_CACHED_WITH_REFRESH(
'tengu_bridge_initial_history_cap',
200,
5 * 60 * 1000,
)
// Fetch orgUUID before the v1/v2 branch — both paths need it. v1 for
// environment registration; v2 for archive (which lives at the compat
// /v1/sessions/{id}/archive, not /v1/code/sessions). Without it, v2
// archive 404s and sessions stay alive in CCR after /exit.
const orgUUID = await getOrganizationUUID()
if (!orgUUID) {
logBridgeSkip('no_org_uuid', '[bridge:repl] Skipping: no org UUID')
onStateChange?.('failed', '/login')
return null
}
// ── GrowthBook gate: env-less bridge ──────────────────────────────────
// When enabled, skips the Environments API layer entirely (no register/
// poll/ack/heartbeat) and connects directly via POST /bridge → worker_jwt.
// See server PR #292605 (renamed in #293280). REPL-only — daemon/print stay
// on env-based.
//
// NAMING: "env-less" is distinct from "CCR v2" (the /worker/* transport).
// The env-based path below can ALSO use CCR v2 via CLAUDE_CODE_USE_CCR_V2.
// tengu_bridge_repl_v2 gates env-less (no poll loop), not transport version.
//
// perpetual (assistant-mode session continuity via bridge-pointer.json) is
// env-coupled and not yet implemented here — fall back to env-based when set
// so KAIROS users don't silently lose cross-restart continuity.
if (isEnvLessBridgeEnabled() && !perpetual) {
const versionError = await checkEnvLessBridgeMinVersion()
if (versionError) {
logBridgeSkip(
'version_too_old',
`[bridge:repl] Skipping: ${versionError}`,
true,
)
onStateChange?.('failed', 'run `claude update` to upgrade')
return null
}
logForDebugging(
'[bridge:repl] Using env-less bridge path (tengu_bridge_repl_v2)',
)
const { initEnvLessBridgeCore } = await import('./remoteBridgeCore.js')
return initEnvLessBridgeCore({
baseUrl,
orgUUID,
title,
getAccessToken: getBridgeAccessToken,
onAuth401: handleOAuth401Error,
toSDKMessages,
initialHistoryCap,
initialMessages,
// v2 always creates a fresh server session (new cse_* id), so
// previouslyFlushedUUIDs is not passed — there's no cross-session
// UUID collision risk, and the ref persists across enable→disable→
// re-enable cycles which would cause the new session to receive zero
// history (all UUIDs already in the set from the prior enable).
// v1 handles this by calling previouslyFlushedUUIDs.clear() on fresh
// session creation (replBridge.ts:768); v2 skips the param entirely.
onInboundMessage,
onUserMessage,
onPermissionResponse,
onInterrupt,
onSetModel,
onSetMaxThinkingTokens,
onSetPermissionMode,
onStateChange,
outboundOnly,
tags,
})
}
// ── v1 path: env-based (register/poll/ack/heartbeat) ──────────────────
const versionError = checkBridgeMinVersion()
if (versionError) {
logBridgeSkip('version_too_old', `[bridge:repl] Skipping: ${versionError}`)
onStateChange?.('failed', 'run `claude update` to upgrade')
return null
}
// Gather git context — this is the bootstrap-read boundary.
// Everything from here down is passed explicitly to bridgeCore.
const branch = await getBranch()
const gitRepoUrl = await getRemoteUrl()
const sessionIngressUrl =
process.env.USER_TYPE === 'ant' &&
process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
? process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
: baseUrl
// Assistant-mode sessions advertise a distinct worker_type so the web UI
// can filter them into a dedicated picker. KAIROS guard keeps the
// assistant module out of external builds entirely.
let workerType: BridgeWorkerType = 'claude_code'
if (feature('KAIROS')) {
/* eslint-disable @typescript-eslint/no-require-imports */
const { isAssistantMode } =
require('../assistant/index.js') as typeof import('../assistant/index.js')
/* eslint-enable @typescript-eslint/no-require-imports */
if (isAssistantMode()) {
workerType = 'claude_code_assistant'
}
}
// 6. Delegate. BridgeCoreHandle is a structural superset of
// ReplBridgeHandle (adds writeSdkMessages which REPL callers don't use),
// so no adapter needed — just the narrower type on the way out.
return initBridgeCore({
dir: getOriginalCwd(),
machineName: hostname(),
branch,
gitRepoUrl,
title,
baseUrl,
sessionIngressUrl,
workerType,
getAccessToken: getBridgeAccessToken,
createSession: opts =>
createBridgeSession({
...opts,
events: [],
baseUrl,
getAccessToken: getBridgeAccessToken,
}),
archiveSession: sessionId =>
archiveBridgeSession(sessionId, {
baseUrl,
getAccessToken: getBridgeAccessToken,
// gracefulShutdown.ts:407 races runCleanupFunctions against 2s.
// Teardown also does stopWork (parallel) + deregister (sequential),
// so archive can't have the full budget. 1.5s matches v2's
// teardown_archive_timeout_ms default.
timeoutMs: 1500,
}).catch((err: unknown) => {
// archiveBridgeSession has no try/catch — 5xx/timeout/network throw
// straight through. Previously swallowed silently, making archive
// failures BQ-invisible and undiagnosable from debug logs.
logForDebugging(
`[bridge:repl] archiveBridgeSession threw: ${errorMessage(err)}`,
{ level: 'error' },
)
}),
// getCurrentTitle is read on reconnect-after-env-lost to re-title the new
// session. /rename writes to session storage; onUserMessage mutates
// `title` directly — both paths are picked up here.
getCurrentTitle: () => getCurrentSessionTitle(getSessionId()) ?? title,
onUserMessage,
toSDKMessages,
onAuth401: handleOAuth401Error,
getPollIntervalConfig,
initialHistoryCap,
initialMessages,
previouslyFlushedUUIDs,
onInboundMessage,
onPermissionResponse,
onInterrupt,
onSetModel,
onSetMaxThinkingTokens,
onSetPermissionMode,
onStateChange,
perpetual,
})
}
const TITLE_MAX_LEN = 50
/**
* Quick placeholder title: strip display tags, take the first sentence,
* collapse whitespace, truncate to 50 chars. Returns undefined if the result
* is empty (e.g. message was only ). Replaced by
* generateSessionTitle once Haiku resolves (~1-15s).
*/
function deriveTitle(raw: string): string | undefined {
// Strip , , etc. — these appear in
// user messages when IDE/hooks inject context. stripDisplayTagsAllowEmpty
// returns '' (not the original) so pure-tag messages are skipped.
const clean = stripDisplayTagsAllowEmpty(raw)
// First sentence is usually the intent; rest is often context/detail.
// Capture group instead of lookbehind — keeps YARR JIT happy.
const firstSentence = /^(.*?[.!?])\s/.exec(clean)?.[1] ?? clean
// Collapse newlines/tabs — titles are single-line in the claude.ai list.
const flat = firstSentence.replace(/\s+/g, ' ').trim()
if (!flat) return undefined
return flat.length > TITLE_MAX_LEN
? flat.slice(0, TITLE_MAX_LEN - 1) + '\u2026'
: flat
}
================================================
FILE: restored-src/src/bridge/jwtUtils.ts
================================================
import { logEvent } from '../services/analytics/index.js'
import { logForDebugging } from '../utils/debug.js'
import { logForDiagnosticsNoPII } from '../utils/diagLogs.js'
import { errorMessage } from '../utils/errors.js'
import { jsonParse } from '../utils/slowOperations.js'
/** Format a millisecond duration as a human-readable string (e.g. "5m 30s"). */
function formatDuration(ms: number): string {
if (ms < 60_000) return `${Math.round(ms / 1000)}s`
const m = Math.floor(ms / 60_000)
const s = Math.round((ms % 60_000) / 1000)
return s > 0 ? `${m}m ${s}s` : `${m}m`
}
/**
* Decode a JWT's payload segment without verifying the signature.
* Strips the `sk-ant-si-` session-ingress prefix if present.
* Returns the parsed JSON payload as `unknown`, or `null` if the
* token is malformed or the payload is not valid JSON.
*/
export function decodeJwtPayload(token: string): unknown | null {
const jwt = token.startsWith('sk-ant-si-')
? token.slice('sk-ant-si-'.length)
: token
const parts = jwt.split('.')
if (parts.length !== 3 || !parts[1]) return null
try {
return jsonParse(Buffer.from(parts[1], 'base64url').toString('utf8'))
} catch {
return null
}
}
/**
* Decode the `exp` (expiry) claim from a JWT without verifying the signature.
* @returns The `exp` value in Unix seconds, or `null` if unparseable
*/
export function decodeJwtExpiry(token: string): number | null {
const payload = decodeJwtPayload(token)
if (
payload !== null &&
typeof payload === 'object' &&
'exp' in payload &&
typeof payload.exp === 'number'
) {
return payload.exp
}
return null
}
/** Refresh buffer: request a new token before expiry. */
const TOKEN_REFRESH_BUFFER_MS = 5 * 60 * 1000
/** Fallback refresh interval when the new token's expiry is unknown. */
const FALLBACK_REFRESH_INTERVAL_MS = 30 * 60 * 1000 // 30 minutes
/** Max consecutive failures before giving up on the refresh chain. */
const MAX_REFRESH_FAILURES = 3
/** Retry delay when getAccessToken returns undefined. */
const REFRESH_RETRY_DELAY_MS = 60_000
/**
* Creates a token refresh scheduler that proactively refreshes session tokens
* before they expire. Used by both the standalone bridge and the REPL bridge.
*
* When a token is about to expire, the scheduler calls `onRefresh` with the
* session ID and the bridge's OAuth access token. The caller is responsible
* for delivering the token to the appropriate transport (child process stdin
* for standalone bridge, WebSocket reconnect for REPL bridge).
*/
export function createTokenRefreshScheduler({
getAccessToken,
onRefresh,
label,
refreshBufferMs = TOKEN_REFRESH_BUFFER_MS,
}: {
getAccessToken: () => string | undefined | Promise
onRefresh: (sessionId: string, oauthToken: string) => void
label: string
/** How long before expiry to fire refresh. Defaults to 5 min. */
refreshBufferMs?: number
}): {
schedule: (sessionId: string, token: string) => void
scheduleFromExpiresIn: (sessionId: string, expiresInSeconds: number) => void
cancel: (sessionId: string) => void
cancelAll: () => void
} {
const timers = new Map>()
const failureCounts = new Map()
// Generation counter per session — incremented by schedule() and cancel()
// so that in-flight async doRefresh() calls can detect when they've been
// superseded and should skip setting follow-up timers.
const generations = new Map()
function nextGeneration(sessionId: string): number {
const gen = (generations.get(sessionId) ?? 0) + 1
generations.set(sessionId, gen)
return gen
}
function schedule(sessionId: string, token: string): void {
const expiry = decodeJwtExpiry(token)
if (!expiry) {
// Token is not a decodable JWT (e.g. an OAuth token passed from the
// REPL bridge WebSocket open handler). Preserve any existing timer
// (such as the follow-up refresh set by doRefresh) so the refresh
// chain is not broken.
logForDebugging(
`[${label}:token] Could not decode JWT expiry for sessionId=${sessionId}, token prefix=${token.slice(0, 15)}…, keeping existing timer`,
)
return
}
// Clear any existing refresh timer — we have a concrete expiry to replace it.
const existing = timers.get(sessionId)
if (existing) {
clearTimeout(existing)
}
// Bump generation to invalidate any in-flight async doRefresh.
const gen = nextGeneration(sessionId)
const expiryDate = new Date(expiry * 1000).toISOString()
const delayMs = expiry * 1000 - Date.now() - refreshBufferMs
if (delayMs <= 0) {
logForDebugging(
`[${label}:token] Token for sessionId=${sessionId} expires=${expiryDate} (past or within buffer), refreshing immediately`,
)
void doRefresh(sessionId, gen)
return
}
logForDebugging(
`[${label}:token] Scheduled token refresh for sessionId=${sessionId} in ${formatDuration(delayMs)} (expires=${expiryDate}, buffer=${refreshBufferMs / 1000}s)`,
)
const timer = setTimeout(doRefresh, delayMs, sessionId, gen)
timers.set(sessionId, timer)
}
/**
* Schedule refresh using an explicit TTL (seconds until expiry) rather
* than decoding a JWT's exp claim. Used by callers whose JWT is opaque
* (e.g. POST /v1/code/sessions/{id}/bridge returns expires_in directly).
*/
function scheduleFromExpiresIn(
sessionId: string,
expiresInSeconds: number,
): void {
const existing = timers.get(sessionId)
if (existing) clearTimeout(existing)
const gen = nextGeneration(sessionId)
// Clamp to 30s floor — if refreshBufferMs exceeds the server's expires_in
// (e.g. very large buffer for frequent-refresh testing, or server shortens
// expires_in unexpectedly), unclamped delayMs ≤ 0 would tight-loop.
const delayMs = Math.max(expiresInSeconds * 1000 - refreshBufferMs, 30_000)
logForDebugging(
`[${label}:token] Scheduled token refresh for sessionId=${sessionId} in ${formatDuration(delayMs)} (expires_in=${expiresInSeconds}s, buffer=${refreshBufferMs / 1000}s)`,
)
const timer = setTimeout(doRefresh, delayMs, sessionId, gen)
timers.set(sessionId, timer)
}
async function doRefresh(sessionId: string, gen: number): Promise {
let oauthToken: string | undefined
try {
oauthToken = await getAccessToken()
} catch (err) {
logForDebugging(
`[${label}:token] getAccessToken threw for sessionId=${sessionId}: ${errorMessage(err)}`,
{ level: 'error' },
)
}
// If the session was cancelled or rescheduled while we were awaiting,
// the generation will have changed — bail out to avoid orphaned timers.
if (generations.get(sessionId) !== gen) {
logForDebugging(
`[${label}:token] doRefresh for sessionId=${sessionId} stale (gen ${gen} vs ${generations.get(sessionId)}), skipping`,
)
return
}
if (!oauthToken) {
const failures = (failureCounts.get(sessionId) ?? 0) + 1
failureCounts.set(sessionId, failures)
logForDebugging(
`[${label}:token] No OAuth token available for refresh, sessionId=${sessionId} (failure ${failures}/${MAX_REFRESH_FAILURES})`,
{ level: 'error' },
)
logForDiagnosticsNoPII('error', 'bridge_token_refresh_no_oauth')
// Schedule a retry so the refresh chain can recover if the token
// becomes available again (e.g. transient cache clear during refresh).
// Cap retries to avoid spamming on genuine failures.
if (failures < MAX_REFRESH_FAILURES) {
const retryTimer = setTimeout(
doRefresh,
REFRESH_RETRY_DELAY_MS,
sessionId,
gen,
)
timers.set(sessionId, retryTimer)
}
return
}
// Reset failure counter on successful token retrieval
failureCounts.delete(sessionId)
logForDebugging(
`[${label}:token] Refreshing token for sessionId=${sessionId}: new token prefix=${oauthToken.slice(0, 15)}…`,
)
logEvent('tengu_bridge_token_refreshed', {})
onRefresh(sessionId, oauthToken)
// Schedule a follow-up refresh so long-running sessions stay authenticated.
// Without this, the initial one-shot timer leaves the session vulnerable
// to token expiry if it runs past the first refresh window.
const timer = setTimeout(
doRefresh,
FALLBACK_REFRESH_INTERVAL_MS,
sessionId,
gen,
)
timers.set(sessionId, timer)
logForDebugging(
`[${label}:token] Scheduled follow-up refresh for sessionId=${sessionId} in ${formatDuration(FALLBACK_REFRESH_INTERVAL_MS)}`,
)
}
function cancel(sessionId: string): void {
// Bump generation to invalidate any in-flight async doRefresh.
nextGeneration(sessionId)
const timer = timers.get(sessionId)
if (timer) {
clearTimeout(timer)
timers.delete(sessionId)
}
failureCounts.delete(sessionId)
}
function cancelAll(): void {
// Bump all generations so in-flight doRefresh calls are invalidated.
for (const sessionId of generations.keys()) {
nextGeneration(sessionId)
}
for (const timer of timers.values()) {
clearTimeout(timer)
}
timers.clear()
failureCounts.clear()
}
return { schedule, scheduleFromExpiresIn, cancel, cancelAll }
}
================================================
FILE: restored-src/src/bridge/pollConfig.ts
================================================
import { z } from 'zod/v4'
import { getFeatureValue_CACHED_WITH_REFRESH } from '../services/analytics/growthbook.js'
import { lazySchema } from '../utils/lazySchema.js'
import {
DEFAULT_POLL_CONFIG,
type PollIntervalConfig,
} from './pollConfigDefaults.js'
// .min(100) on the seek-work intervals restores the old Math.max(..., 100)
// defense-in-depth floor against fat-fingered GrowthBook values. Unlike a
// clamp, Zod rejects the whole object on violation — a config with one bad
// field falls back to DEFAULT_POLL_CONFIG entirely rather than being
// partially trusted.
//
// The at_capacity intervals use a 0-or-≥100 refinement: 0 means "disabled"
// (heartbeat-only mode), ≥100 is the fat-finger floor. Values 1–99 are
// rejected so unit confusion (ops thinks seconds, enters 10) doesn't poll
// every 10ms against the VerifyEnvironmentSecretAuth DB path.
//
// The object-level refines require at least one at-capacity liveness
// mechanism enabled: heartbeat OR the relevant poll interval. Without this,
// the hb=0, atCapMs=0 drift config (ops disables heartbeat without
// restoring at_capacity) falls through every throttle site with no sleep —
// tight-looping /poll at HTTP-round-trip speed.
const zeroOrAtLeast100 = {
message: 'must be 0 (disabled) or ≥100ms',
}
const pollIntervalConfigSchema = lazySchema(() =>
z
.object({
poll_interval_ms_not_at_capacity: z.number().int().min(100),
// 0 = no at-capacity polling. Independent of heartbeat — both can be
// enabled (heartbeat runs, periodically breaks out to poll).
poll_interval_ms_at_capacity: z
.number()
.int()
.refine(v => v === 0 || v >= 100, zeroOrAtLeast100),
// 0 = disabled; positive value = heartbeat at this interval while at
// capacity. Runs alongside at-capacity polling, not instead of it.
// Named non_exclusive to distinguish from the old heartbeat_interval_ms
// (either-or semantics in pre-#22145 clients). .default(0) so existing
// GrowthBook configs without this field parse successfully.
non_exclusive_heartbeat_interval_ms: z.number().int().min(0).default(0),
// Multisession (bridgeMain.ts) intervals. Defaults match the
// single-session values so existing configs without these fields
// preserve current behavior.
multisession_poll_interval_ms_not_at_capacity: z
.number()
.int()
.min(100)
.default(
DEFAULT_POLL_CONFIG.multisession_poll_interval_ms_not_at_capacity,
),
multisession_poll_interval_ms_partial_capacity: z
.number()
.int()
.min(100)
.default(
DEFAULT_POLL_CONFIG.multisession_poll_interval_ms_partial_capacity,
),
multisession_poll_interval_ms_at_capacity: z
.number()
.int()
.refine(v => v === 0 || v >= 100, zeroOrAtLeast100)
.default(DEFAULT_POLL_CONFIG.multisession_poll_interval_ms_at_capacity),
// .min(1) matches the server's ge=1 constraint (work_v1.py:230).
reclaim_older_than_ms: z.number().int().min(1).default(5000),
session_keepalive_interval_v2_ms: z
.number()
.int()
.min(0)
.default(120_000),
})
.refine(
cfg =>
cfg.non_exclusive_heartbeat_interval_ms > 0 ||
cfg.poll_interval_ms_at_capacity > 0,
{
message:
'at-capacity liveness requires non_exclusive_heartbeat_interval_ms > 0 or poll_interval_ms_at_capacity > 0',
},
)
.refine(
cfg =>
cfg.non_exclusive_heartbeat_interval_ms > 0 ||
cfg.multisession_poll_interval_ms_at_capacity > 0,
{
message:
'at-capacity liveness requires non_exclusive_heartbeat_interval_ms > 0 or multisession_poll_interval_ms_at_capacity > 0',
},
),
)
/**
* Fetch the bridge poll interval config from GrowthBook with a 5-minute
* refresh window. Validates the served JSON against the schema; falls back
* to defaults if the flag is absent, malformed, or partially-specified.
*
* Shared by bridgeMain.ts (standalone) and replBridge.ts (REPL) so ops
* can tune both poll rates fleet-wide with a single config push.
*/
export function getPollIntervalConfig(): PollIntervalConfig {
const raw = getFeatureValue_CACHED_WITH_REFRESH(
'tengu_bridge_poll_interval_config',
DEFAULT_POLL_CONFIG,
5 * 60 * 1000,
)
const parsed = pollIntervalConfigSchema().safeParse(raw)
return parsed.success ? parsed.data : DEFAULT_POLL_CONFIG
}
================================================
FILE: restored-src/src/bridge/pollConfigDefaults.ts
================================================
/**
* Bridge poll interval defaults. Extracted from pollConfig.ts so callers
* that don't need live GrowthBook tuning (daemon via Agent SDK) can avoid
* the growthbook.ts → config.ts → file.ts → sessionStorage.ts → commands.ts
* transitive dependency chain.
*/
/**
* Poll interval when actively seeking work (no transport / below maxSessions).
* Governs user-visible "connecting…" latency on initial work pickup and
* recovery speed after the server re-dispatches a work item.
*/
const POLL_INTERVAL_MS_NOT_AT_CAPACITY = 2000
/**
* Poll interval when the transport is connected. Runs independently of
* heartbeat — when both are enabled, the heartbeat loop breaks out to poll
* at this interval. Set to 0 to disable at-capacity polling entirely.
*
* Server-side constraints that bound this value:
* - BRIDGE_LAST_POLL_TTL = 4h (Redis key expiry → environment auto-archived)
* - max_poll_stale_seconds = 24h (session-creation health gate, currently disabled)
*
* 10 minutes gives 24× headroom on the Redis TTL while still picking up
* server-initiated token-rotation redispatches within one poll cycle.
* The transport auto-reconnects internally for 10 minutes on transient WS
* failures, so poll is not the recovery path — it's strictly a liveness
* signal plus a backstop for permanent close.
*/
const POLL_INTERVAL_MS_AT_CAPACITY = 600_000
/**
* Multisession bridge (bridgeMain.ts) poll intervals. Defaults match the
* single-session values so existing GrowthBook configs without these fields
* preserve current behavior. Ops can tune these independently via the
* tengu_bridge_poll_interval_config GB flag.
*/
const MULTISESSION_POLL_INTERVAL_MS_NOT_AT_CAPACITY =
POLL_INTERVAL_MS_NOT_AT_CAPACITY
const MULTISESSION_POLL_INTERVAL_MS_PARTIAL_CAPACITY =
POLL_INTERVAL_MS_NOT_AT_CAPACITY
const MULTISESSION_POLL_INTERVAL_MS_AT_CAPACITY = POLL_INTERVAL_MS_AT_CAPACITY
export type PollIntervalConfig = {
poll_interval_ms_not_at_capacity: number
poll_interval_ms_at_capacity: number
non_exclusive_heartbeat_interval_ms: number
multisession_poll_interval_ms_not_at_capacity: number
multisession_poll_interval_ms_partial_capacity: number
multisession_poll_interval_ms_at_capacity: number
reclaim_older_than_ms: number
session_keepalive_interval_v2_ms: number
}
export const DEFAULT_POLL_CONFIG: PollIntervalConfig = {
poll_interval_ms_not_at_capacity: POLL_INTERVAL_MS_NOT_AT_CAPACITY,
poll_interval_ms_at_capacity: POLL_INTERVAL_MS_AT_CAPACITY,
// 0 = disabled. When > 0, at-capacity loops send per-work-item heartbeats
// at this interval. Independent of poll_interval_ms_at_capacity — both may
// run (heartbeat periodically yields to poll). 60s gives 5× headroom under
// the server's 300s heartbeat TTL. Named non_exclusive to distinguish from
// the old heartbeat_interval_ms field (either-or semantics in pre-#22145
// clients — heartbeat suppressed poll). Old clients ignore this key; ops
// can set both fields during rollout.
non_exclusive_heartbeat_interval_ms: 0,
multisession_poll_interval_ms_not_at_capacity:
MULTISESSION_POLL_INTERVAL_MS_NOT_AT_CAPACITY,
multisession_poll_interval_ms_partial_capacity:
MULTISESSION_POLL_INTERVAL_MS_PARTIAL_CAPACITY,
multisession_poll_interval_ms_at_capacity:
MULTISESSION_POLL_INTERVAL_MS_AT_CAPACITY,
// Poll query param: reclaim unacknowledged work items older than this.
// Matches the server's DEFAULT_RECLAIM_OLDER_THAN_MS (work_service.py:24).
// Enables picking up stale-pending work after JWT expiry, when the prior
// ack failed because the session_ingress_token was already stale.
reclaim_older_than_ms: 5000,
// 0 = disabled. When > 0, push a silent {type:'keep_alive'} frame to
// session-ingress at this interval so upstream proxies don't GC an idle
// remote-control session. 2 min is the default. _v2: bridge-only gate
// (pre-v2 clients read the old key, new clients ignore it).
session_keepalive_interval_v2_ms: 120_000,
}
================================================
FILE: restored-src/src/bridge/remoteBridgeCore.ts
================================================
// biome-ignore-all assist/source/organizeImports: ANT-ONLY import markers must not be reordered
/**
* Env-less Remote Control bridge core.
*
* "Env-less" = no Environments API layer. Distinct from "CCR v2" (the
* /worker/* transport protocol) — the env-based path (replBridge.ts) can also
* use CCR v2 transport via CLAUDE_CODE_USE_CCR_V2. This file is about removing
* the poll/dispatch layer, not about which transport protocol is underneath.
*
* Unlike initBridgeCore (env-based, ~2400 lines), this connects directly
* to the session-ingress layer without the Environments API work-dispatch
* layer:
*
* 1. POST /v1/code/sessions (OAuth, no env_id) → session.id
* 2. POST /v1/code/sessions/{id}/bridge (OAuth) → {worker_jwt, expires_in, api_base_url, worker_epoch}
* Each /bridge call bumps epoch — it IS the register. No separate /worker/register.
* 3. createV2ReplTransport(worker_jwt, worker_epoch) → SSE + CCRClient
* 4. createTokenRefreshScheduler → proactive /bridge re-call (new JWT + new epoch)
* 5. 401 on SSE → rebuild transport with fresh /bridge credentials (same seq-num)
*
* No register/poll/ack/stop/heartbeat/deregister environment lifecycle.
* The Environments API historically existed because CCR's /worker/*
* endpoints required a session_id+role=worker JWT that only the work-dispatch
* layer could mint. Server PR #292605 (renamed in #293280) adds the /bridge endpoint as a direct
* OAuth→worker_jwt exchange, making the env layer optional for REPL sessions.
*
* Gated by `tengu_bridge_repl_v2` GrowthBook flag in initReplBridge.ts.
* REPL-only — daemon/print stay on env-based.
*/
import { feature } from 'bun:bundle'
import axios from 'axios'
import {
createV2ReplTransport,
type ReplBridgeTransport,
} from './replBridgeTransport.js'
import { buildCCRv2SdkUrl } from './workSecret.js'
import { toCompatSessionId } from './sessionIdCompat.js'
import { FlushGate } from './flushGate.js'
import { createTokenRefreshScheduler } from './jwtUtils.js'
import { getTrustedDeviceToken } from './trustedDevice.js'
import {
getEnvLessBridgeConfig,
type EnvLessBridgeConfig,
} from './envLessBridgeConfig.js'
import {
handleIngressMessage,
handleServerControlRequest,
makeResultMessage,
isEligibleBridgeMessage,
extractTitleText,
BoundedUUIDSet,
} from './bridgeMessaging.js'
import { logBridgeSkip } from './debugUtils.js'
import { logForDebugging } from '../utils/debug.js'
import { logForDiagnosticsNoPII } from '../utils/diagLogs.js'
import { isInProtectedNamespace } from '../utils/envUtils.js'
import { errorMessage } from '../utils/errors.js'
import { sleep } from '../utils/sleep.js'
import { registerCleanup } from '../utils/cleanupRegistry.js'
import {
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
logEvent,
} from '../services/analytics/index.js'
import type { ReplBridgeHandle, BridgeState } from './replBridge.js'
import type { Message } from '../types/message.js'
import type { SDKMessage } from '../entrypoints/agentSdkTypes.js'
import type {
SDKControlRequest,
SDKControlResponse,
} from '../entrypoints/sdk/controlTypes.js'
import type { PermissionMode } from '../utils/permissions/PermissionMode.js'
const ANTHROPIC_VERSION = '2023-06-01'
// Telemetry discriminator for ws_connected. 'initial' is the default and
// never passed to rebuildTransport (which can only be called post-init);
// Exclude<> makes that constraint explicit at both signatures.
type ConnectCause = 'initial' | 'proactive_refresh' | 'auth_401_recovery'
function oauthHeaders(accessToken: string): Record {
return {
Authorization: `Bearer ${accessToken}`,
'Content-Type': 'application/json',
'anthropic-version': ANTHROPIC_VERSION,
}
}
export type EnvLessBridgeParams = {
baseUrl: string
orgUUID: string
title: string
getAccessToken: () => string | undefined
onAuth401?: (staleAccessToken: string) => Promise
/**
* Converts internal Message[] → SDKMessage[] for writeMessages() and the
* initial-flush/drain paths. Injected rather than imported — mappers.ts
* transitively pulls in src/commands.ts (entire command registry + React
* tree) which would bloat bundles that don't already have it.
*/
toSDKMessages: (messages: Message[]) => SDKMessage[]
initialHistoryCap: number
initialMessages?: Message[]
onInboundMessage?: (msg: SDKMessage) => void | Promise
/**
* Fired on each title-worthy user message seen in writeMessages() until
* the callback returns true (done). Mirrors replBridge.ts's onUserMessage —
* caller derives a title and PATCHes /v1/sessions/{id} so auto-started
* sessions don't stay at the generic fallback. The caller owns the
* derive-at-count-1-and-3 policy; the transport just keeps calling until
* told to stop. sessionId is the raw cse_* — updateBridgeSessionTitle
* retags internally.
*/
onUserMessage?: (text: string, sessionId: string) => boolean
onPermissionResponse?: (response: SDKControlResponse) => void
onInterrupt?: () => void
onSetModel?: (model: string | undefined) => void
onSetMaxThinkingTokens?: (maxTokens: number | null) => void
onSetPermissionMode?: (
mode: PermissionMode,
) => { ok: true } | { ok: false; error: string }
onStateChange?: (state: BridgeState, detail?: string) => void
/**
* When true, skip opening the SSE read stream — only the CCRClient write
* path is activated. Threaded to createV2ReplTransport and
* handleServerControlRequest.
*/
outboundOnly?: boolean
/** Free-form tags for session categorization (e.g. ['ccr-mirror']). */
tags?: string[]
}
/**
* Create a session, fetch a worker JWT, connect the v2 transport.
*
* Returns null on any pre-flight failure (session create failed, /bridge
* failed, transport setup failed). Caller (initReplBridge) surfaces this
* as a generic "initialization failed" state.
*/
export async function initEnvLessBridgeCore(
params: EnvLessBridgeParams,
): Promise {
const {
baseUrl,
orgUUID,
title,
getAccessToken,
onAuth401,
toSDKMessages,
initialHistoryCap,
initialMessages,
onInboundMessage,
onUserMessage,
onPermissionResponse,
onInterrupt,
onSetModel,
onSetMaxThinkingTokens,
onSetPermissionMode,
onStateChange,
outboundOnly,
tags,
} = params
const cfg = await getEnvLessBridgeConfig()
// ── 1. Create session (POST /v1/code/sessions, no env_id) ───────────────
const accessToken = getAccessToken()
if (!accessToken) {
logForDebugging('[remote-bridge] No OAuth token')
return null
}
const createdSessionId = await withRetry(
() =>
createCodeSession(baseUrl, accessToken, title, cfg.http_timeout_ms, tags),
'createCodeSession',
cfg,
)
if (!createdSessionId) {
onStateChange?.('failed', 'Session creation failed — see debug log')
logBridgeSkip('v2_session_create_failed', undefined, true)
return null
}
const sessionId: string = createdSessionId
logForDebugging(`[remote-bridge] Created session ${sessionId}`)
logForDiagnosticsNoPII('info', 'bridge_repl_v2_session_created')
// ── 2. Fetch bridge credentials (POST /bridge → worker_jwt, expires_in, api_base_url) ──
const credentials = await withRetry(
() =>
fetchRemoteCredentials(
sessionId,
baseUrl,
accessToken,
cfg.http_timeout_ms,
),
'fetchRemoteCredentials',
cfg,
)
if (!credentials) {
onStateChange?.('failed', 'Remote credentials fetch failed — see debug log')
logBridgeSkip('v2_remote_creds_failed', undefined, true)
void archiveSession(
sessionId,
baseUrl,
accessToken,
orgUUID,
cfg.http_timeout_ms,
)
return null
}
logForDebugging(
`[remote-bridge] Fetched bridge credentials (expires_in=${credentials.expires_in}s)`,
)
// ── 3. Build v2 transport (SSETransport + CCRClient) ────────────────────
const sessionUrl = buildCCRv2SdkUrl(credentials.api_base_url, sessionId)
logForDebugging(`[remote-bridge] v2 session URL: ${sessionUrl}`)
let transport: ReplBridgeTransport
try {
transport = await createV2ReplTransport({
sessionUrl,
ingressToken: credentials.worker_jwt,
sessionId,
epoch: credentials.worker_epoch,
heartbeatIntervalMs: cfg.heartbeat_interval_ms,
heartbeatJitterFraction: cfg.heartbeat_jitter_fraction,
// Per-instance closure — keeps the worker JWT out of
// process.env.CLAUDE_CODE_SESSION_ACCESS_TOKEN, which mcp/client.ts
// reads ungatedly and would otherwise send to user-configured ws/http
// MCP servers. Frozen-at-construction is correct: transport is fully
// rebuilt on refresh (rebuildTransport below).
getAuthToken: () => credentials.worker_jwt,
outboundOnly,
})
} catch (err) {
logForDebugging(
`[remote-bridge] v2 transport setup failed: ${errorMessage(err)}`,
{ level: 'error' },
)
onStateChange?.('failed', `Transport setup failed: ${errorMessage(err)}`)
logBridgeSkip('v2_transport_setup_failed', undefined, true)
void archiveSession(
sessionId,
baseUrl,
accessToken,
orgUUID,
cfg.http_timeout_ms,
)
return null
}
logForDebugging(
`[remote-bridge] v2 transport created (epoch=${credentials.worker_epoch})`,
)
onStateChange?.('ready')
// ── 4. State ────────────────────────────────────────────────────────────
// Echo dedup: messages we POST come back on the read stream. Seeded with
// initial message UUIDs so server echoes of flushed history are recognized.
// Both sets cover initial UUIDs — recentPostedUUIDs is a 2000-cap ring buffer
// and could evict them after enough live writes; initialMessageUUIDs is the
// unbounded fallback. Defense-in-depth; mirrors replBridge.ts.
const recentPostedUUIDs = new BoundedUUIDSet(cfg.uuid_dedup_buffer_size)
const initialMessageUUIDs = new Set()
if (initialMessages) {
for (const msg of initialMessages) {
initialMessageUUIDs.add(msg.uuid)
recentPostedUUIDs.add(msg.uuid)
}
}
// Defensive dedup for re-delivered inbound prompts (seq-num negotiation
// edge cases, server history replay after transport swap).
const recentInboundUUIDs = new BoundedUUIDSet(cfg.uuid_dedup_buffer_size)
// FlushGate: queue live writes while the history flush POST is in flight,
// so the server receives [history..., live...] in order.
const flushGate = new FlushGate()
let initialFlushDone = false
let tornDown = false
let authRecoveryInFlight = false
// Latch for onUserMessage — flips true when the callback returns true
// (policy says "done deriving"). sessionId is const (no re-create path —
// rebuildTransport swaps JWT/epoch, same session), so no reset needed.
let userMessageCallbackDone = !onUserMessage
// Telemetry: why did onConnect fire? Set by rebuildTransport before
// wireTransportCallbacks; read asynchronously by onConnect. Race-safe
// because authRecoveryInFlight serializes rebuild callers, and a fresh
// initEnvLessBridgeCore() call gets a fresh closure defaulting to 'initial'.
let connectCause: ConnectCause = 'initial'
// Deadline for onConnect after transport.connect(). Cleared by onConnect
// (connected) and onClose (got a close — not silent). If neither fires
// before cfg.connect_timeout_ms, onConnectTimeout emits — the only
// signal for the `started → (silence)` gap.
let connectDeadline: ReturnType | undefined
function onConnectTimeout(cause: ConnectCause): void {
if (tornDown) return
logEvent('tengu_bridge_repl_connect_timeout', {
v2: true,
elapsed_ms: cfg.connect_timeout_ms,
cause:
cause as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
}
// ── 5. JWT refresh scheduler ────────────────────────────────────────────
// Schedule a callback 5min before expiry (per response.expires_in). On fire,
// re-fetch /bridge with OAuth → rebuild transport with fresh credentials.
// Each /bridge call bumps epoch server-side, so a JWT-only swap would leave
// the old CCRClient heartbeating with a stale epoch → 409 within 20s.
// JWT is opaque — do not decode.
const refresh = createTokenRefreshScheduler({
refreshBufferMs: cfg.token_refresh_buffer_ms,
getAccessToken: async () => {
// Unconditionally refresh OAuth before calling /bridge — getAccessToken()
// returns expired tokens as non-null strings (doesn't check expiresAt),
// so truthiness doesn't mean valid. Pass the stale token to onAuth401
// so handleOAuth401Error's keychain-comparison can detect parallel refresh.
const stale = getAccessToken()
if (onAuth401) await onAuth401(stale ?? '')
return getAccessToken() ?? stale
},
onRefresh: (sid, oauthToken) => {
void (async () => {
// Laptop wake: overdue proactive timer + SSE 401 fire ~simultaneously.
// Claim the flag BEFORE the /bridge fetch so the other path skips
// entirely — prevents double epoch bump (each /bridge call bumps; if
// both fetch, the first rebuild gets a stale epoch and 409s).
if (authRecoveryInFlight || tornDown) {
logForDebugging(
'[remote-bridge] Recovery already in flight, skipping proactive refresh',
)
return
}
authRecoveryInFlight = true
try {
const fresh = await withRetry(
() =>
fetchRemoteCredentials(
sid,
baseUrl,
oauthToken,
cfg.http_timeout_ms,
),
'fetchRemoteCredentials (proactive)',
cfg,
)
if (!fresh || tornDown) return
await rebuildTransport(fresh, 'proactive_refresh')
logForDebugging(
'[remote-bridge] Transport rebuilt (proactive refresh)',
)
} catch (err) {
logForDebugging(
`[remote-bridge] Proactive refresh rebuild failed: ${errorMessage(err)}`,
{ level: 'error' },
)
logForDiagnosticsNoPII(
'error',
'bridge_repl_v2_proactive_refresh_failed',
)
if (!tornDown) {
onStateChange?.('failed', `Refresh failed: ${errorMessage(err)}`)
}
} finally {
authRecoveryInFlight = false
}
})()
},
label: 'remote',
})
refresh.scheduleFromExpiresIn(sessionId, credentials.expires_in)
// ── 6. Wire callbacks (extracted so transport-rebuild can re-wire) ──────
function wireTransportCallbacks(): void {
transport.setOnConnect(() => {
clearTimeout(connectDeadline)
logForDebugging('[remote-bridge] v2 transport connected')
logForDiagnosticsNoPII('info', 'bridge_repl_v2_transport_connected')
logEvent('tengu_bridge_repl_ws_connected', {
v2: true,
cause:
connectCause as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
if (!initialFlushDone && initialMessages && initialMessages.length > 0) {
initialFlushDone = true
// Capture current transport — if 401/teardown happens mid-flush,
// the stale .finally() must not drain the gate or signal connected.
// (Same guard pattern as replBridge.ts:1119.)
const flushTransport = transport
void flushHistory(initialMessages)
.catch(e =>
logForDebugging(`[remote-bridge] flushHistory failed: ${e}`),
)
.finally(() => {
// authRecoveryInFlight catches the v1-vs-v2 asymmetry: v1 nulls
// transport synchronously in setOnClose (replBridge.ts:1175), so
// transport !== flushTransport trips immediately. v2 doesn't null —
// transport reassigned only at rebuildTransport:346, 3 awaits deep.
// authRecoveryInFlight is set synchronously at rebuildTransport entry.
if (
transport !== flushTransport ||
tornDown ||
authRecoveryInFlight
) {
return
}
drainFlushGate()
onStateChange?.('connected')
})
} else if (!flushGate.active) {
onStateChange?.('connected')
}
})
transport.setOnData((data: string) => {
handleIngressMessage(
data,
recentPostedUUIDs,
recentInboundUUIDs,
onInboundMessage,
// Remote client answered the permission prompt — the turn resumes.
// Without this the server stays on requires_action until the next
// user message or turn-end result.
onPermissionResponse
? res => {
transport.reportState('running')
onPermissionResponse(res)
}
: undefined,
req =>
handleServerControlRequest(req, {
transport,
sessionId,
onInterrupt,
onSetModel,
onSetMaxThinkingTokens,
onSetPermissionMode,
outboundOnly,
}),
)
})
transport.setOnClose((code?: number) => {
clearTimeout(connectDeadline)
if (tornDown) return
logForDebugging(`[remote-bridge] v2 transport closed (code=${code})`)
logEvent('tengu_bridge_repl_ws_closed', { code, v2: true })
// onClose fires only for TERMINAL failures: 401 (JWT invalid),
// 4090 (CCR epoch mismatch), 4091 (CCR init failed), or SSE 10-min
// reconnect budget exhausted. Transient disconnects are handled
// transparently inside SSETransport. 401 we can recover from (fetch
// fresh JWT, rebuild transport); all other codes are dead-ends.
if (code === 401 && !authRecoveryInFlight) {
void recoverFromAuthFailure()
return
}
onStateChange?.('failed', `Transport closed (code ${code})`)
})
}
// ── 7. Transport rebuild (shared by proactive refresh + 401 recovery) ──
// Every /bridge call bumps epoch server-side. Both refresh paths must
// rebuild the transport with the new epoch — a JWT-only swap leaves the
// old CCRClient heartbeating stale epoch → 409. SSE resumes from the old
// transport's high-water-mark seq-num so no server-side replay.
// Caller MUST set authRecoveryInFlight = true before calling (synchronously,
// before any await) and clear it in a finally. This function doesn't manage
// the flag — moving it here would be too late to prevent a double /bridge
// fetch, and each fetch bumps epoch.
async function rebuildTransport(
fresh: RemoteCredentials,
cause: Exclude,
): Promise {
connectCause = cause
// Queue writes during rebuild — once /bridge returns, the old transport's
// epoch is stale and its next write/heartbeat 409s. Without this gate,
// writeMessages adds UUIDs to recentPostedUUIDs then writeBatch silently
// no-ops (closed uploader after 409) → permanent silent message loss.
flushGate.start()
try {
const seq = transport.getLastSequenceNum()
transport.close()
transport = await createV2ReplTransport({
sessionUrl: buildCCRv2SdkUrl(fresh.api_base_url, sessionId),
ingressToken: fresh.worker_jwt,
sessionId,
epoch: fresh.worker_epoch,
heartbeatIntervalMs: cfg.heartbeat_interval_ms,
heartbeatJitterFraction: cfg.heartbeat_jitter_fraction,
initialSequenceNum: seq,
getAuthToken: () => fresh.worker_jwt,
outboundOnly,
})
if (tornDown) {
// Teardown fired during the async createV2ReplTransport window.
// Don't wire/connect/schedule — we'd re-arm timers after cancelAll()
// and fire onInboundMessage into a torn-down bridge.
transport.close()
return
}
wireTransportCallbacks()
transport.connect()
connectDeadline = setTimeout(
onConnectTimeout,
cfg.connect_timeout_ms,
connectCause,
)
refresh.scheduleFromExpiresIn(sessionId, fresh.expires_in)
// Drain queued writes into the new uploader. Runs before
// ccr.initialize() resolves (transport.connect() is fire-and-forget),
// but the uploader serializes behind the initial PUT /worker. If
// init fails (4091), events drop — but only recentPostedUUIDs
// (per-instance) is populated, so re-enabling the bridge re-flushes.
drainFlushGate()
} finally {
// End the gate on failure paths too — drainFlushGate already ended
// it on success. Queued messages are dropped (transport still dead).
flushGate.drop()
}
}
// ── 8. 401 recovery (OAuth refresh + rebuild) ───────────────────────────
async function recoverFromAuthFailure(): Promise {
// setOnClose already guards `!authRecoveryInFlight` but that check and
// this set must be atomic against onRefresh — claim synchronously before
// any await. Laptop wake fires both paths ~simultaneously.
if (authRecoveryInFlight) return
authRecoveryInFlight = true
onStateChange?.('reconnecting', 'JWT expired — refreshing')
logForDebugging('[remote-bridge] 401 on SSE — attempting JWT refresh')
try {
// Unconditionally try OAuth refresh — getAccessToken() returns expired
// tokens as non-null strings, so !oauthToken doesn't catch expiry.
// Pass the stale token so handleOAuth401Error's keychain-comparison
// can detect if another tab already refreshed.
const stale = getAccessToken()
if (onAuth401) await onAuth401(stale ?? '')
const oauthToken = getAccessToken() ?? stale
if (!oauthToken || tornDown) {
if (!tornDown) {
onStateChange?.('failed', 'JWT refresh failed: no OAuth token')
}
return
}
const fresh = await withRetry(
() =>
fetchRemoteCredentials(
sessionId,
baseUrl,
oauthToken,
cfg.http_timeout_ms,
),
'fetchRemoteCredentials (recovery)',
cfg,
)
if (!fresh || tornDown) {
if (!tornDown) {
onStateChange?.('failed', 'JWT refresh failed after 401')
}
return
}
// If 401 interrupted the initial flush, writeBatch may have silently
// no-op'd on the closed uploader (ccr.close() ran in the SSE wrapper
// before our setOnClose callback). Reset so the new onConnect re-flushes.
// (v1 scopes initialFlushDone inside the per-transport closure at
// replBridge.ts:1027 so it resets naturally; v2 has it at outer scope.)
initialFlushDone = false
await rebuildTransport(fresh, 'auth_401_recovery')
logForDebugging('[remote-bridge] Transport rebuilt after 401')
} catch (err) {
logForDebugging(
`[remote-bridge] 401 recovery failed: ${errorMessage(err)}`,
{ level: 'error' },
)
logForDiagnosticsNoPII('error', 'bridge_repl_v2_jwt_refresh_failed')
if (!tornDown) {
onStateChange?.('failed', `JWT refresh failed: ${errorMessage(err)}`)
}
} finally {
authRecoveryInFlight = false
}
}
wireTransportCallbacks()
// Start flushGate BEFORE connect so writeMessages() during handshake
// queues instead of racing the history POST.
if (initialMessages && initialMessages.length > 0) {
flushGate.start()
}
transport.connect()
connectDeadline = setTimeout(
onConnectTimeout,
cfg.connect_timeout_ms,
connectCause,
)
// ── 8. History flush + drain helpers ────────────────────────────────────
function drainFlushGate(): void {
const msgs = flushGate.end()
if (msgs.length === 0) return
for (const msg of msgs) recentPostedUUIDs.add(msg.uuid)
const events = toSDKMessages(msgs).map(m => ({
...m,
session_id: sessionId,
}))
if (msgs.some(m => m.type === 'user')) {
transport.reportState('running')
}
logForDebugging(
`[remote-bridge] Drained ${msgs.length} queued message(s) after flush`,
)
void transport.writeBatch(events)
}
async function flushHistory(msgs: Message[]): Promise {
// v2 always creates a fresh server session (unconditional createCodeSession
// above) — no session reuse, no double-post risk. Unlike v1, we do NOT
// filter by previouslyFlushedUUIDs: that set persists across REPL enable/
// disable cycles (useRef), so it would wrongly suppress history on re-enable.
const eligible = msgs.filter(isEligibleBridgeMessage)
const capped =
initialHistoryCap > 0 && eligible.length > initialHistoryCap
? eligible.slice(-initialHistoryCap)
: eligible
if (capped.length < eligible.length) {
logForDebugging(
`[remote-bridge] Capped initial flush: ${eligible.length} -> ${capped.length} (cap=${initialHistoryCap})`,
)
}
const events = toSDKMessages(capped).map(m => ({
...m,
session_id: sessionId,
}))
if (events.length === 0) return
// Mid-turn init: if Remote Control is enabled while a query is running,
// the last eligible message is a user prompt or tool_result (both 'user'
// type). Without this the init PUT's 'idle' sticks until the next user-
// type message forwards via writeMessages — which for a pure-text turn
// is never (only assistant chunks stream post-init). Check eligible (pre-
// cap), not capped: the cap may truncate to a user message even when the
// actual trailing message is assistant.
if (eligible.at(-1)?.type === 'user') {
transport.reportState('running')
}
logForDebugging(`[remote-bridge] Flushing ${events.length} history events`)
await transport.writeBatch(events)
}
// ── 9. Teardown ───────────────────────────────────────────────────────────
// On SIGINT/SIGTERM//exit, gracefulShutdown races runCleanupFunctions()
// against a 2s cap before forceExit kills the process. Budget accordingly:
// - archive: teardown_archive_timeout_ms (default 1500, cap 2000)
// - result write: fire-and-forget, archive latency covers the drain
// - 401 retry: only if first archive 401s, shares the same budget
async function teardown(): Promise {
if (tornDown) return
tornDown = true
refresh.cancelAll()
clearTimeout(connectDeadline)
flushGate.drop()
// Fire the result message before archive — transport.write() only awaits
// enqueue (SerialBatchEventUploader resolves once buffered, drain is
// async). Archiving before close() gives the uploader's drain loop a
// window (typical archive ≈ 100-500ms) to POST the result without an
// explicit sleep. close() sets closed=true which interrupts drain at the
// next while-check, so close-before-archive drops the result.
transport.reportState('idle')
void transport.write(makeResultMessage(sessionId))
let token = getAccessToken()
let status = await archiveSession(
sessionId,
baseUrl,
token,
orgUUID,
cfg.teardown_archive_timeout_ms,
)
// Token is usually fresh (refresh scheduler runs 5min before expiry) but
// laptop-wake past the refresh window leaves getAccessToken() returning a
// stale string. Retry once on 401 — onAuth401 (= handleOAuth401Error)
// clears keychain cache + force-refreshes. No proactive refresh on the
// happy path: handleOAuth401Error force-refreshes even valid tokens,
// which would waste budget 99% of the time. try/catch mirrors
// recoverFromAuthFailure: keychain reads can throw (macOS locked after
// wake); an uncaught throw here would skip transport.close + telemetry.
if (status === 401 && onAuth401) {
try {
await onAuth401(token ?? '')
token = getAccessToken()
status = await archiveSession(
sessionId,
baseUrl,
token,
orgUUID,
cfg.teardown_archive_timeout_ms,
)
} catch (err) {
logForDebugging(
`[remote-bridge] Teardown 401 retry threw: ${errorMessage(err)}`,
{ level: 'error' },
)
}
}
transport.close()
const archiveStatus: ArchiveTelemetryStatus =
status === 'no_token'
? 'skipped_no_token'
: status === 'timeout' || status === 'error'
? 'network_error'
: status >= 500
? 'server_5xx'
: status >= 400
? 'server_4xx'
: 'ok'
logForDebugging(`[remote-bridge] Torn down (archive=${status})`)
logForDiagnosticsNoPII('info', 'bridge_repl_v2_teardown')
logEvent(
feature('CCR_MIRROR') && outboundOnly
? 'tengu_ccr_mirror_teardown'
: 'tengu_bridge_repl_teardown',
{
v2: true,
archive_status:
archiveStatus as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
archive_ok: typeof status === 'number' && status < 400,
archive_http_status: typeof status === 'number' ? status : undefined,
archive_timeout: status === 'timeout',
archive_no_token: status === 'no_token',
},
)
}
const unregister = registerCleanup(teardown)
if (feature('CCR_MIRROR') && outboundOnly) {
logEvent('tengu_ccr_mirror_started', {
v2: true,
expires_in_s: credentials.expires_in,
})
} else {
logEvent('tengu_bridge_repl_started', {
has_initial_messages: !!(initialMessages && initialMessages.length > 0),
v2: true,
expires_in_s: credentials.expires_in,
inProtectedNamespace: isInProtectedNamespace(),
})
}
// ── 10. Handle ──────────────────────────────────────────────────────────
return {
bridgeSessionId: sessionId,
environmentId: '',
sessionIngressUrl: credentials.api_base_url,
writeMessages(messages) {
const filtered = messages.filter(
m =>
isEligibleBridgeMessage(m) &&
!initialMessageUUIDs.has(m.uuid) &&
!recentPostedUUIDs.has(m.uuid),
)
if (filtered.length === 0) return
// Fire onUserMessage for title derivation. Scan before the flushGate
// check — prompts are title-worthy even if they queue. Keeps calling
// on every title-worthy message until the callback returns true; the
// caller owns the policy (derive at 1st and 3rd, skip if explicit).
if (!userMessageCallbackDone) {
for (const m of filtered) {
const text = extractTitleText(m)
if (text !== undefined && onUserMessage?.(text, sessionId)) {
userMessageCallbackDone = true
break
}
}
}
if (flushGate.enqueue(...filtered)) {
logForDebugging(
`[remote-bridge] Queued ${filtered.length} message(s) during flush`,
)
return
}
for (const msg of filtered) recentPostedUUIDs.add(msg.uuid)
const events = toSDKMessages(filtered).map(m => ({
...m,
session_id: sessionId,
}))
// v2 does not derive worker_status from events server-side (unlike v1
// session-ingress session_status_updater.go). Push it from here so the
// CCR web session list shows Running instead of stuck on Idle. A user
// message in the batch marks turn start. CCRClient.reportState dedupes
// consecutive same-state pushes.
if (filtered.some(m => m.type === 'user')) {
transport.reportState('running')
}
logForDebugging(`[remote-bridge] Sending ${filtered.length} message(s)`)
void transport.writeBatch(events)
},
writeSdkMessages(messages: SDKMessage[]) {
const filtered = messages.filter(
m => !m.uuid || !recentPostedUUIDs.has(m.uuid),
)
if (filtered.length === 0) return
for (const msg of filtered) {
if (msg.uuid) recentPostedUUIDs.add(msg.uuid)
}
const events = filtered.map(m => ({ ...m, session_id: sessionId }))
void transport.writeBatch(events)
},
sendControlRequest(request: SDKControlRequest) {
if (authRecoveryInFlight) {
logForDebugging(
`[remote-bridge] Dropping control_request during 401 recovery: ${request.request_id}`,
)
return
}
const event = { ...request, session_id: sessionId }
if (request.request.subtype === 'can_use_tool') {
transport.reportState('requires_action')
}
void transport.write(event)
logForDebugging(
`[remote-bridge] Sent control_request request_id=${request.request_id}`,
)
},
sendControlResponse(response: SDKControlResponse) {
if (authRecoveryInFlight) {
logForDebugging(
'[remote-bridge] Dropping control_response during 401 recovery',
)
return
}
const event = { ...response, session_id: sessionId }
transport.reportState('running')
void transport.write(event)
logForDebugging('[remote-bridge] Sent control_response')
},
sendControlCancelRequest(requestId: string) {
if (authRecoveryInFlight) {
logForDebugging(
`[remote-bridge] Dropping control_cancel_request during 401 recovery: ${requestId}`,
)
return
}
const event = {
type: 'control_cancel_request' as const,
request_id: requestId,
session_id: sessionId,
}
// Hook/classifier/channel/recheck resolved the permission locally —
// interactiveHandler calls only cancelRequest (no sendResponse) on
// those paths, so without this the server stays on requires_action.
transport.reportState('running')
void transport.write(event)
logForDebugging(
`[remote-bridge] Sent control_cancel_request request_id=${requestId}`,
)
},
sendResult() {
if (authRecoveryInFlight) {
logForDebugging('[remote-bridge] Dropping result during 401 recovery')
return
}
transport.reportState('idle')
void transport.write(makeResultMessage(sessionId))
logForDebugging(`[remote-bridge] Sent result`)
},
async teardown() {
unregister()
await teardown()
},
}
}
// ─── Session API (v2 /code/sessions, no env) ─────────────────────────────────
/** Retry an async init call with exponential backoff + jitter. */
async function withRetry(
fn: () => Promise,
label: string,
cfg: EnvLessBridgeConfig,
): Promise {
const max = cfg.init_retry_max_attempts
for (let attempt = 1; attempt <= max; attempt++) {
const result = await fn()
if (result !== null) return result
if (attempt < max) {
const base = cfg.init_retry_base_delay_ms * 2 ** (attempt - 1)
const jitter =
base * cfg.init_retry_jitter_fraction * (2 * Math.random() - 1)
const delay = Math.min(base + jitter, cfg.init_retry_max_delay_ms)
logForDebugging(
`[remote-bridge] ${label} failed (attempt ${attempt}/${max}), retrying in ${Math.round(delay)}ms`,
)
await sleep(delay)
}
}
return null
}
// Moved to codeSessionApi.ts so the SDK /bridge subpath can bundle them
// without pulling in this file's heavy CLI tree (analytics, transport).
export {
createCodeSession,
type RemoteCredentials,
} from './codeSessionApi.js'
import {
createCodeSession,
fetchRemoteCredentials as fetchRemoteCredentialsRaw,
type RemoteCredentials,
} from './codeSessionApi.js'
import { getBridgeBaseUrlOverride } from './bridgeConfig.js'
// CLI-side wrapper that applies the CLAUDE_BRIDGE_BASE_URL dev override and
// injects the trusted-device token (both are env/GrowthBook reads that the
// SDK-facing codeSessionApi.ts export must stay free of).
export async function fetchRemoteCredentials(
sessionId: string,
baseUrl: string,
accessToken: string,
timeoutMs: number,
): Promise {
const creds = await fetchRemoteCredentialsRaw(
sessionId,
baseUrl,
accessToken,
timeoutMs,
getTrustedDeviceToken(),
)
if (!creds) return null
return getBridgeBaseUrlOverride()
? { ...creds, api_base_url: baseUrl }
: creds
}
type ArchiveStatus = number | 'timeout' | 'error' | 'no_token'
// Single categorical for BQ `GROUP BY archive_status`. The booleans on
// _teardown predate this and are redundant with it (except archive_timeout,
// which distinguishes ECONNABORTED from other network errors — both map to
// 'network_error' here since the dominant cause in a 1.5s window is timeout).
type ArchiveTelemetryStatus =
| 'ok'
| 'skipped_no_token'
| 'network_error'
| 'server_4xx'
| 'server_5xx'
async function archiveSession(
sessionId: string,
baseUrl: string,
accessToken: string | undefined,
orgUUID: string,
timeoutMs: number,
): Promise {
if (!accessToken) return 'no_token'
// Archive lives at the compat layer (/v1/sessions/*, not /v1/code/sessions).
// compat.parseSessionID only accepts TagSession (session_*), so retag cse_*.
// anthropic-beta + x-organization-uuid are required — without them the
// compat gateway 404s before reaching the handler.
//
// Unlike bridgeMain.ts (which caches compatId in sessionCompatIds to keep
// in-memory titledSessions/logger keys consistent across a mid-session
// gate flip), this compatId is only a server URL path segment — no
// in-memory state. Fresh compute matches whatever the server currently
// validates: if the gate is OFF, the server has been updated to accept
// cse_* and we correctly send it.
const compatId = toCompatSessionId(sessionId)
try {
const response = await axios.post(
`${baseUrl}/v1/sessions/${compatId}/archive`,
{},
{
headers: {
...oauthHeaders(accessToken),
'anthropic-beta': 'ccr-byoc-2025-07-29',
'x-organization-uuid': orgUUID,
},
timeout: timeoutMs,
validateStatus: () => true,
},
)
logForDebugging(
`[remote-bridge] Archive ${compatId} status=${response.status}`,
)
return response.status
} catch (err) {
const msg = errorMessage(err)
logForDebugging(`[remote-bridge] Archive failed: ${msg}`)
return axios.isAxiosError(err) && err.code === 'ECONNABORTED'
? 'timeout'
: 'error'
}
}
================================================
FILE: restored-src/src/bridge/replBridge.ts
================================================
// biome-ignore-all assist/source/organizeImports: ANT-ONLY import markers must not be reordered
import { randomUUID } from 'crypto'
import {
createBridgeApiClient,
BridgeFatalError,
isExpiredErrorType,
isSuppressible403,
} from './bridgeApi.js'
import type { BridgeConfig, BridgeApiClient } from './types.js'
import { logForDebugging } from '../utils/debug.js'
import { logForDiagnosticsNoPII } from '../utils/diagLogs.js'
import {
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
logEvent,
} from '../services/analytics/index.js'
import { registerCleanup } from '../utils/cleanupRegistry.js'
import {
handleIngressMessage,
handleServerControlRequest,
makeResultMessage,
isEligibleBridgeMessage,
extractTitleText,
BoundedUUIDSet,
} from './bridgeMessaging.js'
import {
decodeWorkSecret,
buildSdkUrl,
buildCCRv2SdkUrl,
sameSessionId,
} from './workSecret.js'
import { toCompatSessionId, toInfraSessionId } from './sessionIdCompat.js'
import { updateSessionBridgeId } from '../utils/concurrentSessions.js'
import { getTrustedDeviceToken } from './trustedDevice.js'
import { HybridTransport } from '../cli/transports/HybridTransport.js'
import {
type ReplBridgeTransport,
createV1ReplTransport,
createV2ReplTransport,
} from './replBridgeTransport.js'
import { updateSessionIngressAuthToken } from '../utils/sessionIngressAuth.js'
import { isEnvTruthy, isInProtectedNamespace } from '../utils/envUtils.js'
import { validateBridgeId } from './bridgeApi.js'
import {
describeAxiosError,
extractHttpStatus,
logBridgeSkip,
} from './debugUtils.js'
import type { Message } from '../types/message.js'
import type { SDKMessage } from '../entrypoints/agentSdkTypes.js'
import type { PermissionMode } from '../utils/permissions/PermissionMode.js'
import type {
SDKControlRequest,
SDKControlResponse,
} from '../entrypoints/sdk/controlTypes.js'
import { createCapacityWake, type CapacitySignal } from './capacityWake.js'
import { FlushGate } from './flushGate.js'
import {
DEFAULT_POLL_CONFIG,
type PollIntervalConfig,
} from './pollConfigDefaults.js'
import { errorMessage } from '../utils/errors.js'
import { sleep } from '../utils/sleep.js'
import {
wrapApiForFaultInjection,
registerBridgeDebugHandle,
clearBridgeDebugHandle,
injectBridgeFault,
} from './bridgeDebug.js'
export type ReplBridgeHandle = {
bridgeSessionId: string
environmentId: string
sessionIngressUrl: string
writeMessages(messages: Message[]): void
writeSdkMessages(messages: SDKMessage[]): void
sendControlRequest(request: SDKControlRequest): void
sendControlResponse(response: SDKControlResponse): void
sendControlCancelRequest(requestId: string): void
sendResult(): void
teardown(): Promise
}
export type BridgeState = 'ready' | 'connected' | 'reconnecting' | 'failed'
/**
* Explicit-param input to initBridgeCore. Everything initReplBridge reads
* from bootstrap state (cwd, session ID, git, OAuth) becomes a field here.
* A daemon caller (Agent SDK, PR 4) that never runs main.tsx fills these
* in itself.
*/
export type BridgeCoreParams = {
dir: string
machineName: string
branch: string
gitRepoUrl: string | null
title: string
baseUrl: string
sessionIngressUrl: string
/**
* Opaque string sent as metadata.worker_type. Use BridgeWorkerType for
* the two CLI-originated values; daemon callers may send any string the
* backend recognizes (it's just a filter key on the web side).
*/
workerType: string
getAccessToken: () => string | undefined
/**
* POST /v1/sessions. Injected because `createSession.ts` lazy-loads
* `auth.ts`/`model.ts`/`oauth/client.ts` and `bun --outfile` inlines
* dynamic imports — the lazy-load doesn't help, the whole REPL tree ends
* up in the Agent SDK bundle.
*
* REPL wrapper passes `createBridgeSession` from `createSession.ts`.
* Daemon wrapper passes `createBridgeSessionLean` from `sessionApi.ts`
* (HTTP-only, orgUUID+model supplied by the daemon caller).
*
* Receives `gitRepoUrl`+`branch` so the REPL wrapper can build the git
* source/outcome for claude.ai's session card. Daemon ignores them.
*/
createSession: (opts: {
environmentId: string
title: string
gitRepoUrl: string | null
branch: string
signal: AbortSignal
}) => Promise
/**
* POST /v1/sessions/{id}/archive. Same injection rationale. Best-effort;
* the callback MUST NOT throw.
*/
archiveSession: (sessionId: string) => Promise
/**
* Invoked on reconnect-after-env-lost to refresh the title. REPL wrapper
* reads session storage (picks up /rename); daemon returns the static
* title. Defaults to () => title.
*/
getCurrentTitle?: () => string
/**
* Converts internal Message[] → SDKMessage[] for writeMessages() and the
* initial-flush/drain paths. REPL wrapper passes the real toSDKMessages
* from utils/messages/mappers.ts. Daemon callers that only use
* writeSdkMessages() and pass no initialMessages can omit this — those
* code paths are unreachable.
*
* Injected rather than imported because mappers.ts transitively pulls in
* src/commands.ts via messages.ts → api.ts → prompts.ts, dragging the
* entire command registry + React tree into the Agent SDK bundle.
*/
toSDKMessages?: (messages: Message[]) => SDKMessage[]
/**
* OAuth 401 refresh handler passed to createBridgeApiClient. REPL wrapper
* passes handleOAuth401Error; daemon passes its AuthManager's handler.
* Injected because utils/auth.ts transitively pulls in the command
* registry via config.ts → file.ts → permissions/filesystem.ts →
* sessionStorage.ts → commands.ts.
*/
onAuth401?: (staleAccessToken: string) => Promise
/**
* Poll interval config getter for the work-poll heartbeat loop. REPL
* wrapper passes the GrowthBook-backed getPollIntervalConfig (allows ops
* to live-tune poll rates fleet-wide). Daemon passes a static config
* with a 60s heartbeat (5× headroom under the 300s work-lease TTL).
* Injected because growthbook.ts transitively pulls in the command
* registry via the same config.ts chain.
*/
getPollIntervalConfig?: () => PollIntervalConfig
/**
* Max initial messages to replay on connect. REPL wrapper reads from the
* tengu_bridge_initial_history_cap GrowthBook flag. Daemon passes no
* initialMessages so this is never read. Default 200 matches the flag
* default.
*/
initialHistoryCap?: number
// Same REPL-flush machinery as InitBridgeOptions — daemon omits these.
initialMessages?: Message[]
previouslyFlushedUUIDs?: Set
onInboundMessage?: (msg: SDKMessage) => void
onPermissionResponse?: (response: SDKControlResponse) => void
onInterrupt?: () => void
onSetModel?: (model: string | undefined) => void
onSetMaxThinkingTokens?: (maxTokens: number | null) => void
/**
* Returns a policy verdict so this module can emit an error control_response
* without importing the policy checks itself (bootstrap-isolation constraint).
* The callback must guard `auto` (isAutoModeGateEnabled) and
* `bypassPermissions` (isBypassPermissionsModeDisabled AND
* isBypassPermissionsModeAvailable) BEFORE calling transitionPermissionMode —
* that function's internal auto-gate check is a defensive throw, not a
* graceful guard, and its side-effect order is setAutoModeActive(true) then
* throw, which corrupts the 3-way invariant documented in src/CLAUDE.md if
* the callback lets the throw escape here.
*/
onSetPermissionMode?: (
mode: PermissionMode,
) => { ok: true } | { ok: false; error: string }
onStateChange?: (state: BridgeState, detail?: string) => void
/**
* Fires on each real user message to flow through writeMessages() until
* the callback returns true (done). Mirrors remoteBridgeCore.ts's
* onUserMessage so the REPL bridge can derive a session title from early
* prompts when none was set at init time (e.g. user runs /remote-control
* on an empty conversation, then types). Tool-result wrappers, meta
* messages, and display-tag-only messages are skipped. Receives
* currentSessionId so the wrapper can PATCH the title without a closure
* dance to reach the not-yet-returned handle. The caller owns the
* derive-at-count-1-and-3 policy; the transport just keeps calling until
* told to stop. Not fired for the writeSdkMessages daemon path (daemon
* sets its own title at init). Distinct from SessionSpawnOpts's
* onFirstUserMessage (spawn-bridge, PR #21250), which stays fire-once.
*/
onUserMessage?: (text: string, sessionId: string) => boolean
/** See InitBridgeOptions.perpetual. */
perpetual?: boolean
/**
* Seeds lastTransportSequenceNum — the SSE event-stream high-water mark
* that's carried across transport swaps within one process. Daemon callers
* pass the value they persisted at shutdown so the FIRST SSE connect of a
* fresh process sends from_sequence_num and the server doesn't replay full
* history. REPL callers omit (fresh session each run → 0 is correct).
*/
initialSSESequenceNum?: number
}
/**
* Superset of ReplBridgeHandle. Adds getSSESequenceNum for daemon callers
* that persist the SSE seq-num across process restarts and pass it back as
* initialSSESequenceNum on the next start.
*/
export type BridgeCoreHandle = ReplBridgeHandle & {
/**
* Current SSE sequence-number high-water mark. Updates as transports
* swap. Daemon callers persist this on shutdown and pass it back as
* initialSSESequenceNum on next start.
*/
getSSESequenceNum(): number
}
/**
* Poll error recovery constants. When the work poll starts failing (e.g.
* server 500s), we use exponential backoff and give up after this timeout.
* This is deliberately long — the server is the authority on when a session
* is truly dead. As long as the server accepts our poll, we keep waiting
* for it to re-dispatch the work item.
*/
const POLL_ERROR_INITIAL_DELAY_MS = 2_000
const POLL_ERROR_MAX_DELAY_MS = 60_000
const POLL_ERROR_GIVE_UP_MS = 15 * 60 * 1000
// Monotonically increasing counter for distinguishing init calls in logs
let initSequence = 0
/**
* Bootstrap-free core: env registration → session creation → poll loop →
* ingress WS → teardown. Reads nothing from bootstrap/state or
* sessionStorage — all context comes from params. Caller (initReplBridge
* below, or a daemon in PR 4) has already passed entitlement gates and
* gathered git/auth/title.
*
* Returns null on registration or session-creation failure.
*/
export async function initBridgeCore(
params: BridgeCoreParams,
): Promise {
const {
dir,
machineName,
branch,
gitRepoUrl,
title,
baseUrl,
sessionIngressUrl,
workerType,
getAccessToken,
createSession,
archiveSession,
getCurrentTitle = () => title,
toSDKMessages = () => {
throw new Error(
'BridgeCoreParams.toSDKMessages not provided. Pass it if you use writeMessages() or initialMessages — daemon callers that only use writeSdkMessages() never hit this path.',
)
},
onAuth401,
getPollIntervalConfig = () => DEFAULT_POLL_CONFIG,
initialHistoryCap = 200,
initialMessages,
previouslyFlushedUUIDs,
onInboundMessage,
onPermissionResponse,
onInterrupt,
onSetModel,
onSetMaxThinkingTokens,
onSetPermissionMode,
onStateChange,
onUserMessage,
perpetual,
initialSSESequenceNum = 0,
} = params
const seq = ++initSequence
// bridgePointer import hoisted: perpetual mode reads it before register;
// non-perpetual writes it after session create; both use clear at teardown.
const { writeBridgePointer, clearBridgePointer, readBridgePointer } =
await import('./bridgePointer.js')
// Perpetual mode: read the crash-recovery pointer and treat it as prior
// state. The pointer is written unconditionally after session create
// (crash-recovery for all sessions); perpetual mode just skips the
// teardown clear so it survives clean exits too. Only reuse 'repl'
// pointers — a crashed standalone bridge (`claude remote-control`)
// writes source:'standalone' with a different workerType.
const rawPrior = perpetual ? await readBridgePointer(dir) : null
const prior = rawPrior?.source === 'repl' ? rawPrior : null
logForDebugging(
`[bridge:repl] initBridgeCore #${seq} starting (initialMessages=${initialMessages?.length ?? 0}${prior ? ` perpetual prior=env:${prior.environmentId}` : ''})`,
)
// 5. Register bridge environment
const rawApi = createBridgeApiClient({
baseUrl,
getAccessToken,
runnerVersion: MACRO.VERSION,
onDebug: logForDebugging,
onAuth401,
getTrustedDeviceToken,
})
// Ant-only: interpose so /bridge-kick can inject poll/register/heartbeat
// failures. Zero cost in external builds (rawApi passes through unchanged).
const api =
process.env.USER_TYPE === 'ant' ? wrapApiForFaultInjection(rawApi) : rawApi
const bridgeConfig: BridgeConfig = {
dir,
machineName,
branch,
gitRepoUrl,
maxSessions: 1,
spawnMode: 'single-session',
verbose: false,
sandbox: false,
bridgeId: randomUUID(),
workerType,
environmentId: randomUUID(),
reuseEnvironmentId: prior?.environmentId,
apiBaseUrl: baseUrl,
sessionIngressUrl,
}
let environmentId: string
let environmentSecret: string
try {
const reg = await api.registerBridgeEnvironment(bridgeConfig)
environmentId = reg.environment_id
environmentSecret = reg.environment_secret
} catch (err) {
logBridgeSkip(
'registration_failed',
`[bridge:repl] Environment registration failed: ${errorMessage(err)}`,
)
// Stale pointer may be the cause (expired/deleted env) — clear it so
// the next start doesn't retry the same dead ID.
if (prior) {
await clearBridgePointer(dir)
}
onStateChange?.('failed', errorMessage(err))
return null
}
logForDebugging(`[bridge:repl] Environment registered: ${environmentId}`)
logForDiagnosticsNoPII('info', 'bridge_repl_env_registered')
logEvent('tengu_bridge_repl_env_registered', {})
/**
* Reconnect-in-place: if the just-registered environmentId matches what
* was requested, call reconnectSession to force-stop stale workers and
* re-queue the session. Used at init (perpetual mode — env is alive but
* idle after clean teardown) and in doReconnect() Strategy 1 (env lost
* then resurrected). Returns true on success; caller falls back to
* fresh session creation on false.
*/
async function tryReconnectInPlace(
requestedEnvId: string,
sessionId: string,
): Promise {
if (environmentId !== requestedEnvId) {
logForDebugging(
`[bridge:repl] Env mismatch (requested ${requestedEnvId}, got ${environmentId}) — cannot reconnect in place`,
)
return false
}
// The pointer stores what createBridgeSession returned (session_*,
// compat/convert.go:41). /bridge/reconnect is an environments-layer
// endpoint — once the server's ccr_v2_compat_enabled gate is on it
// looks sessions up by their infra tag (cse_*) and returns "Session
// not found" for the session_* costume. We don't know the gate state
// pre-poll, so try both; the re-tag is a no-op if the ID is already
// cse_* (doReconnect Strategy 1 path — currentSessionId never mutates
// to cse_* but future-proof the check).
const infraId = toInfraSessionId(sessionId)
const candidates =
infraId === sessionId ? [sessionId] : [sessionId, infraId]
for (const id of candidates) {
try {
await api.reconnectSession(environmentId, id)
logForDebugging(
`[bridge:repl] Reconnected session ${id} in place on env ${environmentId}`,
)
return true
} catch (err) {
logForDebugging(
`[bridge:repl] reconnectSession(${id}) failed: ${errorMessage(err)}`,
)
}
}
logForDebugging(
'[bridge:repl] reconnectSession exhausted — falling through to fresh session',
)
return false
}
// Perpetual init: env is alive but has no queued work after clean
// teardown. reconnectSession re-queues it. doReconnect() has the same
// call but only fires on poll 404 (env dead);
// here the env is alive but idle.
const reusedPriorSession = prior
? await tryReconnectInPlace(prior.environmentId, prior.sessionId)
: false
if (prior && !reusedPriorSession) {
await clearBridgePointer(dir)
}
// 6. Create session on the bridge. Initial messages are NOT included as
// session creation events because those use STREAM_ONLY persistence and
// are published before the CCR UI subscribes, so they get lost. Instead,
// initial messages are flushed via the ingress WebSocket once it connects.
// Mutable session ID — updated when the environment+session pair is
// re-created after a connection loss.
let currentSessionId: string
if (reusedPriorSession && prior) {
currentSessionId = prior.sessionId
logForDebugging(
`[bridge:repl] Perpetual session reused: ${currentSessionId}`,
)
// Server already has all initialMessages from the prior CLI run. Mark
// them as previously-flushed so the initial flush filter excludes them
// (previouslyFlushedUUIDs is a fresh Set on every CLI start). Duplicate
// UUIDs cause the server to kill the WebSocket.
if (initialMessages && previouslyFlushedUUIDs) {
for (const msg of initialMessages) {
previouslyFlushedUUIDs.add(msg.uuid)
}
}
} else {
const createdSessionId = await createSession({
environmentId,
title,
gitRepoUrl,
branch,
signal: AbortSignal.timeout(15_000),
})
if (!createdSessionId) {
logForDebugging(
'[bridge:repl] Session creation failed, deregistering environment',
)
logEvent('tengu_bridge_repl_session_failed', {})
await api.deregisterEnvironment(environmentId).catch(() => {})
onStateChange?.('failed', 'Session creation failed')
return null
}
currentSessionId = createdSessionId
logForDebugging(`[bridge:repl] Session created: ${currentSessionId}`)
}
// Crash-recovery pointer: written now so a kill -9 at any point after
// this leaves a recoverable trail. Cleared in teardown (non-perpetual)
// or left alone (perpetual mode — pointer survives clean exit too).
// `claude remote-control --continue` from the same directory will detect
// it and offer to resume.
await writeBridgePointer(dir, {
sessionId: currentSessionId,
environmentId,
source: 'repl',
})
logForDiagnosticsNoPII('info', 'bridge_repl_session_created')
logEvent('tengu_bridge_repl_started', {
has_initial_messages: !!(initialMessages && initialMessages.length > 0),
inProtectedNamespace: isInProtectedNamespace(),
})
// UUIDs of initial messages. Used for dedup in writeMessages to avoid
// re-sending messages that were already flushed on WebSocket open.
const initialMessageUUIDs = new Set()
if (initialMessages) {
for (const msg of initialMessages) {
initialMessageUUIDs.add(msg.uuid)
}
}
// Bounded ring buffer of UUIDs for messages we've already sent to the
// server via the ingress WebSocket. Serves two purposes:
// 1. Echo filtering — ignore our own messages bouncing back on the WS.
// 2. Secondary dedup in writeMessages — catch race conditions where
// the hook's index-based tracking isn't sufficient.
//
// Seeded with initialMessageUUIDs so that when the server echoes back
// the initial conversation context over the ingress WebSocket, those
// messages are recognized as echoes and not re-injected into the REPL.
//
// Capacity of 2000 covers well over any realistic echo window (echoes
// arrive within milliseconds) and any messages that might be re-encountered
// after compaction. The hook's lastWrittenIndexRef is the primary dedup;
// this is a safety net.
const recentPostedUUIDs = new BoundedUUIDSet(2000)
for (const uuid of initialMessageUUIDs) {
recentPostedUUIDs.add(uuid)
}
// Bounded set of INBOUND prompt UUIDs we've already forwarded to the REPL.
// Defensive dedup for when the server re-delivers prompts (seq-num
// negotiation failure, server edge cases, transport swap races). The
// seq-num carryover below is the primary fix; this is the safety net.
const recentInboundUUIDs = new BoundedUUIDSet(2000)
// 7. Start poll loop for work items — this is what makes the session
// "live" on claude.ai. When a user types there, the backend dispatches
// a work item to our environment. We poll for it, get the ingress token,
// and connect the ingress WebSocket.
//
// The poll loop keeps running: when work arrives it connects the ingress
// WebSocket, and if the WebSocket drops unexpectedly (code != 1000) it
// resumes polling to get a fresh ingress token and reconnect.
const pollController = new AbortController()
// Adapter over either HybridTransport (v1: WS reads + POST writes to
// Session-Ingress) or SSETransport+CCRClient (v2: SSE reads + POST
// writes to CCR /worker/*). The v1/v2 choice is made in onWorkReceived:
// server-driven via secret.use_code_sessions, with CLAUDE_BRIDGE_USE_CCR_V2
// as an ant-dev override.
let transport: ReplBridgeTransport | null = null
// Bumped on every onWorkReceived. Captured in createV2ReplTransport's .then()
// closure to detect stale resolutions: if two calls race while transport is
// null, both registerWorker() (bumping server epoch), and whichever resolves
// SECOND is the correct one — but the transport !== null check gets this
// backwards (first-to-resolve installs, second discards). The generation
// counter catches it independent of transport state.
let v2Generation = 0
// SSE sequence-number high-water mark carried across transport swaps.
// Without this, each new SSETransport starts at 0, sends no
// from_sequence_num / Last-Event-ID on its first connect, and the server
// replays the entire session event history — every prompt ever sent
// re-delivered as fresh inbound messages on every onWorkReceived.
//
// Seed only when we actually reconnected the prior session. If
// `reusedPriorSession` is false we fell through to `createSession()` —
// the caller's persisted seq-num belongs to a dead session and applying
// it to the fresh stream (starting at 1) silently drops events. Same
// hazard as doReconnect Strategy 2; same fix as the reset there.
let lastTransportSequenceNum = reusedPriorSession ? initialSSESequenceNum : 0
// Track the current work ID so teardown can call stopWork
let currentWorkId: string | null = null
// Session ingress JWT for the current work item — used for heartbeat auth.
let currentIngressToken: string | null = null
// Signal to wake the at-capacity sleep early when the transport is lost,
// so the poll loop immediately switches back to fast polling for new work.
const capacityWake = createCapacityWake(pollController.signal)
const wakePollLoop = capacityWake.wake
const capacitySignal = capacityWake.signal
// Gates message writes during the initial flush to prevent ordering
// races where new messages arrive at the server interleaved with history.
const flushGate = new FlushGate()
// Latch for onUserMessage — flips true when the callback returns true
// (policy says "done deriving"). If no callback, skip scanning entirely
// (daemon path — no title derivation needed).
let userMessageCallbackDone = !onUserMessage
// Shared counter for environment re-creations, used by both
// onEnvironmentLost and the abnormal-close handler.
const MAX_ENVIRONMENT_RECREATIONS = 3
let environmentRecreations = 0
let reconnectPromise: Promise | null = null
/**
* Recover from onEnvironmentLost (poll returned 404 — env was reaped
* server-side). Tries two strategies in order:
*
* 1. Reconnect-in-place: idempotent re-register with reuseEnvironmentId
* → if the backend returns the same env ID, call reconnectSession()
* to re-queue the existing session. currentSessionId stays the same;
* the URL on the user's phone stays valid; previouslyFlushedUUIDs is
* preserved so history isn't re-sent.
*
* 2. Fresh session fallback: if the backend returns a different env ID
* (original TTL-expired, e.g. laptop slept >4h) or reconnectSession()
* throws, archive the old session and create a new one on the
* now-registered env. Old behavior before #20460 primitives landed.
*
* Uses a promise-based reentrancy guard so concurrent callers share the
* same reconnection attempt.
*/
async function reconnectEnvironmentWithSession(): Promise {
if (reconnectPromise) {
return reconnectPromise
}
reconnectPromise = doReconnect()
try {
return await reconnectPromise
} finally {
reconnectPromise = null
}
}
async function doReconnect(): Promise {
environmentRecreations++
// Invalidate any in-flight v2 handshake — the environment is being
// recreated, so a stale transport arriving post-reconnect would be
// pointed at a dead session.
v2Generation++
logForDebugging(
`[bridge:repl] Reconnecting after env lost (attempt ${environmentRecreations}/${MAX_ENVIRONMENT_RECREATIONS})`,
)
if (environmentRecreations > MAX_ENVIRONMENT_RECREATIONS) {
logForDebugging(
`[bridge:repl] Environment reconnect limit reached (${MAX_ENVIRONMENT_RECREATIONS}), giving up`,
)
return false
}
// Close the stale transport. Capture seq BEFORE close — if Strategy 1
// (tryReconnectInPlace) succeeds we keep the SAME session, and the
// next transport must resume where this one left off, not replay from
// the last transport-swap checkpoint.
if (transport) {
const seq = transport.getLastSequenceNum()
if (seq > lastTransportSequenceNum) {
lastTransportSequenceNum = seq
}
transport.close()
transport = null
}
// Transport is gone — wake the poll loop out of its at-capacity
// heartbeat sleep so it can fast-poll for re-dispatched work.
wakePollLoop()
// Reset flush gate so writeMessages() hits the !transport guard
// instead of silently queuing into a dead buffer.
flushGate.drop()
// Release the current work item (force=false — we may want the session
// back). Best-effort: the env is probably gone, so this likely 404s.
if (currentWorkId) {
const workIdBeingCleared = currentWorkId
await api
.stopWork(environmentId, workIdBeingCleared, false)
.catch(() => {})
// When doReconnect runs concurrently with the poll loop (ws_closed
// handler case — void-called, unlike the awaited onEnvironmentLost
// path), onWorkReceived can fire during the stopWork await and set
// a fresh currentWorkId. If it did, the poll loop has already
// recovered on its own — defer to it rather than proceeding to
// archiveSession, which would destroy the session its new
// transport is connected to.
if (currentWorkId !== workIdBeingCleared) {
logForDebugging(
'[bridge:repl] Poll loop recovered during stopWork await — deferring to it',
)
environmentRecreations = 0
return true
}
currentWorkId = null
currentIngressToken = null
}
// Bail out if teardown started while we were awaiting
if (pollController.signal.aborted) {
logForDebugging('[bridge:repl] Reconnect aborted by teardown')
return false
}
// Strategy 1: idempotent re-register with the server-issued env ID.
// If the backend resurrects the same env (fresh secret), we can
// reconnect the existing session. If it hands back a different ID, the
// original env is truly gone and we fall through to a fresh session.
const requestedEnvId = environmentId
bridgeConfig.reuseEnvironmentId = requestedEnvId
try {
const reg = await api.registerBridgeEnvironment(bridgeConfig)
environmentId = reg.environment_id
environmentSecret = reg.environment_secret
} catch (err) {
bridgeConfig.reuseEnvironmentId = undefined
logForDebugging(
`[bridge:repl] Environment re-registration failed: ${errorMessage(err)}`,
)
return false
}
// Clear before any await — a stale value would poison the next fresh
// registration if doReconnect runs again.
bridgeConfig.reuseEnvironmentId = undefined
logForDebugging(
`[bridge:repl] Re-registered: requested=${requestedEnvId} got=${environmentId}`,
)
// Bail out if teardown started while we were registering
if (pollController.signal.aborted) {
logForDebugging(
'[bridge:repl] Reconnect aborted after env registration, cleaning up',
)
await api.deregisterEnvironment(environmentId).catch(() => {})
return false
}
// Same race as above, narrower window: poll loop may have set up a
// transport during the registerBridgeEnvironment await. Bail before
// tryReconnectInPlace/archiveSession kill it server-side.
if (transport !== null) {
logForDebugging(
'[bridge:repl] Poll loop recovered during registerBridgeEnvironment await — deferring to it',
)
environmentRecreations = 0
return true
}
// Strategy 1: same helper as perpetual init. currentSessionId stays
// the same on success; URL on mobile/web stays valid;
// previouslyFlushedUUIDs preserved (no re-flush).
if (await tryReconnectInPlace(requestedEnvId, currentSessionId)) {
logEvent('tengu_bridge_repl_reconnected_in_place', {})
environmentRecreations = 0
return true
}
// Env differs → TTL-expired/reaped; or reconnect failed.
// Don't deregister — we have a fresh secret for this env either way.
if (environmentId !== requestedEnvId) {
logEvent('tengu_bridge_repl_env_expired_fresh_session', {})
}
// Strategy 2: fresh session on the now-registered environment.
// Archive the old session first — it's orphaned (bound to a dead env,
// or reconnectSession rejected it). Don't deregister the env — we just
// got a fresh secret for it and are about to use it.
await archiveSession(currentSessionId)
// Bail out if teardown started while we were archiving
if (pollController.signal.aborted) {
logForDebugging(
'[bridge:repl] Reconnect aborted after archive, cleaning up',
)
await api.deregisterEnvironment(environmentId).catch(() => {})
return false
}
// Re-read the current title in case the user renamed the session.
// REPL wrapper reads session storage; daemon wrapper returns the
// original title (nothing to refresh).
const currentTitle = getCurrentTitle()
// Create a new session on the now-registered environment
const newSessionId = await createSession({
environmentId,
title: currentTitle,
gitRepoUrl,
branch,
signal: AbortSignal.timeout(15_000),
})
if (!newSessionId) {
logForDebugging(
'[bridge:repl] Session creation failed during reconnection',
)
return false
}
// Bail out if teardown started during session creation (up to 15s)
if (pollController.signal.aborted) {
logForDebugging(
'[bridge:repl] Reconnect aborted after session creation, cleaning up',
)
await archiveSession(newSessionId)
return false
}
currentSessionId = newSessionId
// Re-publish to the PID file so peer dedup (peerRegistry.ts) picks up the
// new ID — setReplBridgeHandle only fires at init/teardown, not reconnect.
void updateSessionBridgeId(toCompatSessionId(newSessionId)).catch(() => {})
// Reset per-session transport state IMMEDIATELY after the session swap,
// before any await. If this runs after `await writeBridgePointer` below,
// there's a window where handle.bridgeSessionId already returns session B
// but getSSESequenceNum() still returns session A's seq — a daemon
// persistState() in that window writes {bridgeSessionId: B, seq: OLD_A},
// which PASSES the session-ID validation check and defeats it entirely.
//
// The SSE seq-num is scoped to the session's event stream — carrying it
// over leaves the transport's lastSequenceNum stuck high (seq only
// advances when received > last), and its next internal reconnect would
// send from_sequence_num=OLD_SEQ against a stream starting at 1 → all
// events in the gap silently dropped. Inbound UUID dedup is also
// session-scoped.
lastTransportSequenceNum = 0
recentInboundUUIDs.clear()
// Title derivation is session-scoped too: if the user typed during the
// createSession await above, the callback fired against the OLD archived
// session ID (PATCH lost) and the new session got `currentTitle` captured
// BEFORE they typed. Reset so the next prompt can re-derive. Self-
// correcting: if the caller's policy is already done (explicit title or
// count ≥ 3), it returns true on the first post-reset call and re-latches.
userMessageCallbackDone = !onUserMessage
logForDebugging(`[bridge:repl] Re-created session: ${currentSessionId}`)
// Rewrite the crash-recovery pointer with the new IDs so a crash after
// this point resumes the right session. (The reconnect-in-place path
// above doesn't touch the pointer — same session, same env.)
await writeBridgePointer(dir, {
sessionId: currentSessionId,
environmentId,
source: 'repl',
})
// Clear flushed UUIDs so initial messages are re-sent to the new session.
// UUIDs are scoped per-session on the server, so re-flushing is safe.
previouslyFlushedUUIDs?.clear()
// Reset the counter so independent reconnections hours apart don't
// exhaust the limit — it guards against rapid consecutive failures,
// not lifetime total.
environmentRecreations = 0
return true
}
// Helper: get the current OAuth access token for session ingress auth.
// Unlike the JWT path, OAuth tokens are refreshed by the standard OAuth
// flow — no proactive scheduler needed.
function getOAuthToken(): string | undefined {
return getAccessToken()
}
// Drain any messages that were queued during the initial flush.
// Called after writeBatch completes (or fails) so queued messages
// are sent in order after the historical messages.
function drainFlushGate(): void {
const msgs = flushGate.end()
if (msgs.length === 0) return
if (!transport) {
logForDebugging(
`[bridge:repl] Cannot drain ${msgs.length} pending message(s): no transport`,
)
return
}
for (const msg of msgs) {
recentPostedUUIDs.add(msg.uuid)
}
const sdkMessages = toSDKMessages(msgs)
const events = sdkMessages.map(sdkMsg => ({
...sdkMsg,
session_id: currentSessionId,
}))
logForDebugging(
`[bridge:repl] Drained ${msgs.length} pending message(s) after flush`,
)
void transport.writeBatch(events)
}
// Teardown reference — set after definition below. All callers are async
// callbacks that run after assignment, so the reference is always valid.
let doTeardownImpl: (() => Promise) | null = null
function triggerTeardown(): void {
void doTeardownImpl?.()
}
/**
* Body of the transport's setOnClose callback, hoisted to initBridgeCore
* scope so /bridge-kick can fire it directly. setOnClose wraps this with
* a stale-transport guard; debugFireClose calls it bare.
*
* With autoReconnect:true, this only fires on: clean close (1000),
* permanent server rejection (4001/1002/4003), or 10-min budget
* exhaustion. Transient drops are retried internally by the transport.
*/
function handleTransportPermanentClose(closeCode: number | undefined): void {
logForDebugging(
`[bridge:repl] Transport permanently closed: code=${closeCode}`,
)
logEvent('tengu_bridge_repl_ws_closed', {
code: closeCode,
})
// Capture SSE seq high-water mark before nulling. When called from
// setOnClose the guard guarantees transport !== null; when fired from
// /bridge-kick it may already be null (e.g. fired twice) — skip.
if (transport) {
const closedSeq = transport.getLastSequenceNum()
if (closedSeq > lastTransportSequenceNum) {
lastTransportSequenceNum = closedSeq
}
transport = null
}
// Transport is gone — wake the poll loop out of its at-capacity
// heartbeat sleep so it's fast-polling by the time the reconnect
// below completes and the server re-queues work.
wakePollLoop()
// Reset flush state so writeMessages() hits the !transport guard
// (with a warning log) instead of silently queuing into a buffer
// that will never be drained. Unlike onWorkReceived (which
// preserves pending messages for the new transport), onClose is
// a permanent close — no new transport will drain these.
const dropped = flushGate.drop()
if (dropped > 0) {
logForDebugging(
`[bridge:repl] Dropping ${dropped} pending message(s) on transport close (code=${closeCode})`,
{ level: 'warn' },
)
}
if (closeCode === 1000) {
// Clean close — session ended normally. Tear down the bridge.
onStateChange?.('failed', 'session ended')
pollController.abort()
triggerTeardown()
return
}
// Transport reconnect budget exhausted or permanent server
// rejection. By this point the env has usually been reaped
// server-side (BQ 2026-03-12: ~98% of ws_closed never recover
// via poll alone). stopWork(force=false) can't re-dispatch work
// from an archived env; reconnectEnvironmentWithSession can
// re-activate it via POST /bridge/reconnect, or fall through
// to a fresh session if the env is truly gone. The poll loop
// (already woken above) picks up the re-queued work once
// doReconnect completes.
onStateChange?.(
'reconnecting',
`Remote Control connection lost (code ${closeCode})`,
)
logForDebugging(
`[bridge:repl] Transport reconnect budget exhausted (code=${closeCode}), attempting env reconnect`,
)
void reconnectEnvironmentWithSession().then(success => {
if (success) return
// doReconnect has four abort-check return-false sites for
// teardown-in-progress. Don't pollute the BQ failure signal
// or double-teardown when the user just quit.
if (pollController.signal.aborted) return
// doReconnect returns false (never throws) on genuine failure.
// The dangerous case: registerBridgeEnvironment succeeded (so
// environmentId now points at a fresh valid env) but
// createSession failed — poll loop would poll a sessionless
// env getting null work with no errors, never hitting any
// give-up path. Tear down explicitly.
logForDebugging(
'[bridge:repl] reconnectEnvironmentWithSession resolved false — tearing down',
)
logEvent('tengu_bridge_repl_reconnect_failed', {
close_code: closeCode,
})
onStateChange?.('failed', 'reconnection failed')
triggerTeardown()
})
}
// Ant-only: SIGUSR2 → force doReconnect() for manual testing. Skips the
// ~30s poll wait — fire-and-observe in the debug log immediately.
// Windows has no USR signals; `process.on` would throw there.
let sigusr2Handler: (() => void) | undefined
if (process.env.USER_TYPE === 'ant' && process.platform !== 'win32') {
sigusr2Handler = () => {
logForDebugging(
'[bridge:repl] SIGUSR2 received — forcing doReconnect() for testing',
)
void reconnectEnvironmentWithSession()
}
process.on('SIGUSR2', sigusr2Handler)
}
// Ant-only: /bridge-kick fault injection. handleTransportPermanentClose
// is defined below and assigned into this slot so the slash command can
// invoke it directly — the real setOnClose callback is buried inside
// wireTransport which is itself inside onWorkReceived.
let debugFireClose: ((code: number) => void) | null = null
if (process.env.USER_TYPE === 'ant') {
registerBridgeDebugHandle({
fireClose: code => {
if (!debugFireClose) {
logForDebugging('[bridge:debug] fireClose: no transport wired yet')
return
}
logForDebugging(`[bridge:debug] fireClose(${code}) — injecting`)
debugFireClose(code)
},
forceReconnect: () => {
logForDebugging('[bridge:debug] forceReconnect — injecting')
void reconnectEnvironmentWithSession()
},
injectFault: injectBridgeFault,
wakePollLoop,
describe: () =>
`env=${environmentId} session=${currentSessionId} transport=${transport?.getStateLabel() ?? 'null'} workId=${currentWorkId ?? 'null'}`,
})
}
const pollOpts = {
api,
getCredentials: () => ({ environmentId, environmentSecret }),
signal: pollController.signal,
getPollIntervalConfig,
onStateChange,
getWsState: () => transport?.getStateLabel() ?? 'null',
// REPL bridge is single-session: having any transport == at capacity.
// No need to check isConnectedStatus() — even while the transport is
// auto-reconnecting internally (up to 10 min), poll is heartbeat-only.
isAtCapacity: () => transport !== null,
capacitySignal,
onFatalError: triggerTeardown,
getHeartbeatInfo: () => {
if (!currentWorkId || !currentIngressToken) {
return null
}
return {
environmentId,
workId: currentWorkId,
sessionToken: currentIngressToken,
}
},
// Work-item JWT expired (or work gone). The transport is useless —
// SSE reconnects and CCR writes use the same stale token. Without
// this callback the poll loop would do a 10-min at-capacity backoff,
// during which the work lease (300s TTL) expires and the server stops
// forwarding prompts → ~25-min dead window observed in daemon logs.
// Kill the transport + work state so isAtCapacity()=false; the loop
// fast-polls and picks up the server's re-dispatched work in seconds.
onHeartbeatFatal: (err: BridgeFatalError) => {
logForDebugging(
`[bridge:repl] heartbeatWork fatal (status=${err.status}) — tearing down work item for fast re-dispatch`,
)
if (transport) {
const seq = transport.getLastSequenceNum()
if (seq > lastTransportSequenceNum) {
lastTransportSequenceNum = seq
}
transport.close()
transport = null
}
flushGate.drop()
// force=false → server re-queues. Likely already expired, but
// idempotent and makes re-dispatch immediate if not.
if (currentWorkId) {
void api
.stopWork(environmentId, currentWorkId, false)
.catch((e: unknown) => {
logForDebugging(
`[bridge:repl] stopWork after heartbeat fatal: ${errorMessage(e)}`,
)
})
}
currentWorkId = null
currentIngressToken = null
wakePollLoop()
onStateChange?.(
'reconnecting',
'Work item lease expired, fetching fresh token',
)
},
async onEnvironmentLost() {
const success = await reconnectEnvironmentWithSession()
if (!success) {
return null
}
return { environmentId, environmentSecret }
},
onWorkReceived: (
workSessionId: string,
ingressToken: string,
workId: string,
serverUseCcrV2: boolean,
) => {
// When new work arrives while a transport is already open, the
// server has decided to re-dispatch (e.g. token rotation, server
// restart). Close the existing transport and reconnect — discarding
// the work causes a stuck 'reconnecting' state if the old WS dies
// shortly after (the server won't re-dispatch a work item it
// already delivered).
// ingressToken (JWT) is stored for heartbeat auth (both v1 and v2).
// Transport auth diverges — see the v1/v2 split below.
if (transport?.isConnectedStatus()) {
logForDebugging(
`[bridge:repl] Work received while transport connected, replacing with fresh token (workId=${workId})`,
)
}
logForDebugging(
`[bridge:repl] Work received: workId=${workId} workSessionId=${workSessionId} currentSessionId=${currentSessionId} match=${sameSessionId(workSessionId, currentSessionId)}`,
)
// Refresh the crash-recovery pointer's mtime. Staleness checks file
// mtime (not embedded timestamp) so this re-write bumps the clock —
// a 5h+ session that crashes still has a fresh pointer. Fires once
// per work dispatch (infrequent — bounded by user message rate).
void writeBridgePointer(dir, {
sessionId: currentSessionId,
environmentId,
source: 'repl',
})
// Reject foreign session IDs — the server shouldn't assign sessions
// from other environments. Since we create env+session as a pair,
// a mismatch indicates an unexpected server-side reassignment.
//
// Compare by underlying UUID, not by tagged-ID prefix. When CCR
// v2's compat layer serves the session, createBridgeSession gets
// session_* from the v1-facing API (compat/convert.go:41) but the
// infrastructure layer delivers cse_* in the work queue
// (container_manager.go:129). Same UUID, different tag.
if (!sameSessionId(workSessionId, currentSessionId)) {
logForDebugging(
`[bridge:repl] Rejecting foreign session: expected=${currentSessionId} got=${workSessionId}`,
)
return
}
currentWorkId = workId
currentIngressToken = ingressToken
// Server decides per-session (secret.use_code_sessions from the work
// secret, threaded through runWorkPollLoop). The env var is an ant-dev
// override for forcing v2 before the server flag is on for your user —
// requires ccr_v2_compat_enabled server-side or registerWorker 404s.
//
// Kept separate from CLAUDE_CODE_USE_CCR_V2 (the child-SDK transport
// selector set by sessionRunner/environment-manager) to avoid the
// inheritance hazard in spawn mode where the parent's orchestrator
// var would leak into a v1 child.
const useCcrV2 =
serverUseCcrV2 || isEnvTruthy(process.env.CLAUDE_BRIDGE_USE_CCR_V2)
// Auth is the one place v1 and v2 diverge hard:
//
// - v1 (Session-Ingress): accepts OAuth OR JWT. We prefer OAuth
// because the standard OAuth refresh flow handles expiry — no
// separate JWT refresh scheduler needed.
//
// - v2 (CCR /worker/*): REQUIRES the JWT. register_worker.go:32
// validates the session_id claim, which OAuth tokens don't carry.
// The JWT from the work secret has both that claim and the worker
// role (environment_auth.py:856). JWT refresh: when it expires the
// server re-dispatches work with a fresh one, and onWorkReceived
// fires again. createV2ReplTransport stores it via
// updateSessionIngressAuthToken() before touching the network.
let v1OauthToken: string | undefined
if (!useCcrV2) {
v1OauthToken = getOAuthToken()
if (!v1OauthToken) {
logForDebugging(
'[bridge:repl] No OAuth token available for session ingress, skipping work',
)
return
}
updateSessionIngressAuthToken(v1OauthToken)
}
logEvent('tengu_bridge_repl_work_received', {})
// Close the previous transport. Nullify BEFORE calling close() so
// the close callback doesn't treat the programmatic close as
// "session ended normally" and trigger a full teardown.
if (transport) {
const oldTransport = transport
transport = null
// Capture the SSE sequence high-water mark so the next transport
// resumes the stream instead of replaying from seq 0. Use max() —
// a transport that died early (never received any frames) would
// otherwise reset a non-zero mark back to 0.
const oldSeq = oldTransport.getLastSequenceNum()
if (oldSeq > lastTransportSequenceNum) {
lastTransportSequenceNum = oldSeq
}
oldTransport.close()
}
// Reset flush state — the old flush (if any) is no longer relevant.
// Preserve pending messages so they're drained after the new
// transport's flush completes (the hook has already advanced its
// lastWrittenIndex and won't re-send them).
flushGate.deactivate()
// Closure adapter over the shared handleServerControlRequest —
// captures transport/currentSessionId so the transport.setOnData
// callback below doesn't need to thread them through.
const onServerControlRequest = (request: SDKControlRequest): void =>
handleServerControlRequest(request, {
transport,
sessionId: currentSessionId,
onInterrupt,
onSetModel,
onSetMaxThinkingTokens,
onSetPermissionMode,
})
let initialFlushDone = false
// Wire callbacks onto a freshly constructed transport and connect.
// Extracted so the (sync) v1 and (async) v2 construction paths can
// share the identical callback + flush machinery.
const wireTransport = (newTransport: ReplBridgeTransport): void => {
transport = newTransport
newTransport.setOnConnect(() => {
// Guard: if transport was replaced by a newer onWorkReceived call
// while the WS was connecting, ignore this stale callback.
if (transport !== newTransport) return
logForDebugging('[bridge:repl] Ingress transport connected')
logEvent('tengu_bridge_repl_ws_connected', {})
// Update the env var with the latest OAuth token so POST writes
// (which read via getSessionIngressAuthToken()) use a fresh token.
// v2 skips this — createV2ReplTransport already stored the JWT,
// and overwriting it with OAuth would break subsequent /worker/*
// requests (session_id claim check).
if (!useCcrV2) {
const freshToken = getOAuthToken()
if (freshToken) {
updateSessionIngressAuthToken(freshToken)
}
}
// Reset teardownStarted so future teardowns are not blocked.
teardownStarted = false
// Flush initial messages only on first connect, not on every
// WS reconnection. Re-flushing would cause duplicate messages.
// IMPORTANT: onStateChange('connected') is deferred until the
// flush completes. This prevents writeMessages() from sending
// new messages that could arrive at the server interleaved with
// the historical messages, and delays the web UI from showing
// the session as active until history is persisted.
if (
!initialFlushDone &&
initialMessages &&
initialMessages.length > 0
) {
initialFlushDone = true
// Cap the initial flush to the most recent N messages. The full
// history is UI-only (model doesn't see it) and large replays cause
// slow session-ingress persistence (each event is a threadstore write)
// plus elevated Firestore pressure. A 0 or negative cap disables it.
const historyCap = initialHistoryCap
const eligibleMessages = initialMessages.filter(
m =>
isEligibleBridgeMessage(m) &&
!previouslyFlushedUUIDs?.has(m.uuid),
)
const cappedMessages =
historyCap > 0 && eligibleMessages.length > historyCap
? eligibleMessages.slice(-historyCap)
: eligibleMessages
if (cappedMessages.length < eligibleMessages.length) {
logForDebugging(
`[bridge:repl] Capped initial flush: ${eligibleMessages.length} -> ${cappedMessages.length} (cap=${historyCap})`,
)
logEvent('tengu_bridge_repl_history_capped', {
eligible_count: eligibleMessages.length,
capped_count: cappedMessages.length,
})
}
const sdkMessages = toSDKMessages(cappedMessages)
if (sdkMessages.length > 0) {
logForDebugging(
`[bridge:repl] Flushing ${sdkMessages.length} initial message(s) via transport`,
)
const events = sdkMessages.map(sdkMsg => ({
...sdkMsg,
session_id: currentSessionId,
}))
const dropsBefore = newTransport.droppedBatchCount
void newTransport
.writeBatch(events)
.then(() => {
// If any batch was dropped during this flush (SI down for
// maxConsecutiveFailures attempts), flush() still resolved
// normally but the events were NOT delivered. Don't mark
// UUIDs as flushed — keep them eligible for re-send on the
// next onWorkReceived (JWT refresh re-dispatch, line ~1144).
if (newTransport.droppedBatchCount > dropsBefore) {
logForDebugging(
`[bridge:repl] Initial flush dropped ${newTransport.droppedBatchCount - dropsBefore} batch(es) — not marking ${sdkMessages.length} UUID(s) as flushed`,
)
return
}
if (previouslyFlushedUUIDs) {
for (const sdkMsg of sdkMessages) {
if (sdkMsg.uuid) {
previouslyFlushedUUIDs.add(sdkMsg.uuid)
}
}
}
})
.catch(e =>
logForDebugging(`[bridge:repl] Initial flush failed: ${e}`),
)
.finally(() => {
// Guard: if transport was replaced during the flush,
// don't signal connected or drain — the new transport
// owns the lifecycle now.
if (transport !== newTransport) return
drainFlushGate()
onStateChange?.('connected')
})
} else {
// All initial messages were already flushed (filtered by
// previouslyFlushedUUIDs). No flush POST needed — clear
// the flag and signal connected immediately. This is the
// first connect for this transport (inside !initialFlushDone),
// so no flush POST is in-flight — the flag was set before
// connect() and must be cleared here.
drainFlushGate()
onStateChange?.('connected')
}
} else if (!flushGate.active) {
// No initial messages or already flushed on first connect.
// WS auto-reconnect path — only signal connected if no flush
// POST is in-flight. If one is, .finally() owns the lifecycle.
onStateChange?.('connected')
}
})
newTransport.setOnData(data => {
handleIngressMessage(
data,
recentPostedUUIDs,
recentInboundUUIDs,
onInboundMessage,
onPermissionResponse,
onServerControlRequest,
)
})
// Body lives at initBridgeCore scope so /bridge-kick can call it
// directly via debugFireClose. All referenced closures (transport,
// wakePollLoop, flushGate, reconnectEnvironmentWithSession, etc.)
// are already at that scope. The only lexical dependency on
// wireTransport was `newTransport.getLastSequenceNum()` — but after
// the guard below passes we know transport === newTransport.
debugFireClose = handleTransportPermanentClose
newTransport.setOnClose(closeCode => {
// Guard: if transport was replaced, ignore stale close.
if (transport !== newTransport) return
handleTransportPermanentClose(closeCode)
})
// Start the flush gate before connect() to cover the WS handshake
// window. Between transport assignment and setOnConnect firing,
// writeMessages() could send messages via HTTP POST before the
// initial flush starts. Starting the gate here ensures those
// calls are queued. If there are no initial messages, the gate
// stays inactive.
if (
!initialFlushDone &&
initialMessages &&
initialMessages.length > 0
) {
flushGate.start()
}
newTransport.connect()
} // end wireTransport
// Bump unconditionally — ANY new transport (v1 or v2) invalidates an
// in-flight v2 handshake. Also bumped in doReconnect().
v2Generation++
if (useCcrV2) {
// workSessionId is the cse_* form (infrastructure-layer ID from the
// work queue), which is what /v1/code/sessions/{id}/worker/* wants.
// The session_* form (currentSessionId) is NOT usable here —
// handler/convert.go:30 validates TagCodeSession.
const sessionUrl = buildCCRv2SdkUrl(baseUrl, workSessionId)
const thisGen = v2Generation
logForDebugging(
`[bridge:repl] CCR v2: sessionUrl=${sessionUrl} session=${workSessionId} gen=${thisGen}`,
)
void createV2ReplTransport({
sessionUrl,
ingressToken,
sessionId: workSessionId,
initialSequenceNum: lastTransportSequenceNum,
}).then(
t => {
// Teardown started while registerWorker was in flight. Teardown
// saw transport === null and skipped close(); installing now
// would leak CCRClient heartbeat timers and reset
// teardownStarted via wireTransport's side effects.
if (pollController.signal.aborted) {
t.close()
return
}
// onWorkReceived may have fired again while registerWorker()
// was in flight (server re-dispatch with a fresh JWT). The
// transport !== null check alone gets the race wrong when BOTH
// attempts saw transport === null — it keeps the first resolver
// (stale epoch) and discards the second (correct epoch). The
// generation check catches it regardless of transport state.
if (thisGen !== v2Generation) {
logForDebugging(
`[bridge:repl] CCR v2: discarding stale handshake gen=${thisGen} current=${v2Generation}`,
)
t.close()
return
}
wireTransport(t)
},
(err: unknown) => {
logForDebugging(
`[bridge:repl] CCR v2: createV2ReplTransport failed: ${errorMessage(err)}`,
{ level: 'error' },
)
logEvent('tengu_bridge_repl_ccr_v2_init_failed', {})
// If a newer attempt is in flight or already succeeded, don't
// touch its work item — our failure is irrelevant.
if (thisGen !== v2Generation) return
// Release the work item so the server re-dispatches immediately
// instead of waiting for its own timeout. currentWorkId was set
// above; without this, the session looks stuck to the user.
if (currentWorkId) {
void api
.stopWork(environmentId, currentWorkId, false)
.catch((e: unknown) => {
logForDebugging(
`[bridge:repl] stopWork after v2 init failure: ${errorMessage(e)}`,
)
})
currentWorkId = null
currentIngressToken = null
}
wakePollLoop()
},
)
} else {
// v1: HybridTransport (WS reads + POST writes to Session-Ingress).
// autoReconnect is true (default) — when the WS dies, the transport
// reconnects automatically with exponential backoff. POST writes
// continue during reconnection (they use getSessionIngressAuthToken()
// independently of WS state). The poll loop remains as a secondary
// fallback if the reconnect budget is exhausted (10 min).
//
// Auth: uses OAuth tokens directly instead of the JWT from the work
// secret. refreshHeaders picks up the latest OAuth token on each
// WS reconnect attempt.
const wsUrl = buildSdkUrl(sessionIngressUrl, workSessionId)
logForDebugging(`[bridge:repl] Ingress URL: ${wsUrl}`)
logForDebugging(
`[bridge:repl] Creating HybridTransport: session=${workSessionId}`,
)
// v1OauthToken was validated non-null above (we'd have returned early).
const oauthToken = v1OauthToken ?? ''
wireTransport(
createV1ReplTransport(
new HybridTransport(
new URL(wsUrl),
{
Authorization: `Bearer ${oauthToken}`,
'anthropic-version': '2023-06-01',
},
workSessionId,
() => ({
Authorization: `Bearer ${getOAuthToken() ?? oauthToken}`,
'anthropic-version': '2023-06-01',
}),
// Cap retries so a persistently-failing session-ingress can't
// pin the uploader drain loop for the lifetime of the bridge.
// 50 attempts ≈ 20 min (15s POST timeout + 8s backoff + jitter
// per cycle at steady state). Bridge-only — 1P keeps indefinite.
{
maxConsecutiveFailures: 50,
isBridge: true,
onBatchDropped: () => {
onStateChange?.(
'reconnecting',
'Lost sync with Remote Control — events could not be delivered',
)
// SI has been down ~20 min. Wake the poll loop so that when
// SI recovers, next poll → onWorkReceived → fresh transport
// → initial flush succeeds → onStateChange('connected') at
// ~line 1420. Without this, state stays 'reconnecting' even
// after SI recovers — daemon.ts:437 denies all permissions,
// useReplBridge.ts:311 keeps replBridgeSessionActive=false.
// If the env was archived during the outage, poll 404 →
// onEnvironmentLost recovery path handles it.
wakePollLoop()
},
},
),
),
)
}
},
}
void startWorkPollLoop(pollOpts)
// Perpetual mode: hourly mtime refresh of the crash-recovery pointer.
// The onWorkReceived refresh only fires per user prompt — a
// daemon idle for >4h would have a stale pointer, and the next restart
// would clear it (readBridgePointer TTL check) → fresh session. The
// standalone bridge (bridgeMain.ts) has an identical hourly timer.
const pointerRefreshTimer = perpetual
? setInterval(() => {
// doReconnect() reassigns currentSessionId/environmentId non-
// atomically (env at ~:634, session at ~:719, awaits in between).
// If this timer fires in that window, its fire-and-forget write can
// race with (and overwrite) doReconnect's own pointer write at ~:740,
// leaving the pointer at the now-archived old session. doReconnect
// writes the pointer itself, so skipping here is free.
if (reconnectPromise) return
void writeBridgePointer(dir, {
sessionId: currentSessionId,
environmentId,
source: 'repl',
})
}, 60 * 60_000)
: null
pointerRefreshTimer?.unref?.()
// Push a silent keep_alive frame on a fixed interval so upstream proxies
// and the session-ingress layer don't GC an otherwise-idle remote control
// session. The keep_alive type is filtered before reaching any client UI
// (Query.ts drops it; web/iOS/Android never see it in their message loop).
// Interval comes from GrowthBook (tengu_bridge_poll_interval_config
// session_keepalive_interval_v2_ms, default 120s); 0 = disabled.
const keepAliveIntervalMs =
getPollIntervalConfig().session_keepalive_interval_v2_ms
const keepAliveTimer =
keepAliveIntervalMs > 0
? setInterval(() => {
if (!transport) return
logForDebugging('[bridge:repl] keep_alive sent')
void transport.write({ type: 'keep_alive' }).catch((err: unknown) => {
logForDebugging(
`[bridge:repl] keep_alive write failed: ${errorMessage(err)}`,
)
})
}, keepAliveIntervalMs)
: null
keepAliveTimer?.unref?.()
// Shared teardown sequence used by both cleanup registration and
// the explicit teardown() method on the returned handle.
let teardownStarted = false
doTeardownImpl = async (): Promise => {
if (teardownStarted) {
logForDebugging(
`[bridge:repl] Teardown already in progress, skipping duplicate call env=${environmentId} session=${currentSessionId}`,
)
return
}
teardownStarted = true
const teardownStart = Date.now()
logForDebugging(
`[bridge:repl] Teardown starting: env=${environmentId} session=${currentSessionId} workId=${currentWorkId ?? 'none'} transportState=${transport?.getStateLabel() ?? 'null'}`,
)
if (pointerRefreshTimer !== null) {
clearInterval(pointerRefreshTimer)
}
if (keepAliveTimer !== null) {
clearInterval(keepAliveTimer)
}
if (sigusr2Handler) {
process.off('SIGUSR2', sigusr2Handler)
}
if (process.env.USER_TYPE === 'ant') {
clearBridgeDebugHandle()
debugFireClose = null
}
pollController.abort()
logForDebugging('[bridge:repl] Teardown: poll loop aborted')
// Capture the live transport's seq BEFORE close() — close() is sync
// (just aborts the SSE fetch) and does NOT invoke onClose, so the
// setOnClose capture path never runs for explicit teardown.
// Without this, getSSESequenceNum() after teardown returns the stale
// lastTransportSequenceNum (captured at the last transport swap), and
// daemon callers persisting that value lose all events since then.
if (transport) {
const finalSeq = transport.getLastSequenceNum()
if (finalSeq > lastTransportSequenceNum) {
lastTransportSequenceNum = finalSeq
}
}
if (perpetual) {
// Perpetual teardown is LOCAL-ONLY — do not send result, do not call
// stopWork, do not close the transport. All of those signal the
// server (and any mobile/attach subscribers) that the session is
// ending. Instead: stop polling, let the socket die with the
// process; the backend times the work-item lease back to pending on
// its own (TTL 300s). Next daemon start reads the pointer and
// reconnectSession re-queues work.
transport = null
flushGate.drop()
// Refresh the pointer mtime so that sessions lasting longer than
// BRIDGE_POINTER_TTL_MS (4h) don't appear stale on next start.
await writeBridgePointer(dir, {
sessionId: currentSessionId,
environmentId,
source: 'repl',
})
logForDebugging(
`[bridge:repl] Teardown (perpetual): leaving env=${environmentId} session=${currentSessionId} alive on server, duration=${Date.now() - teardownStart}ms`,
)
return
}
// Fire the result message, then archive, THEN close. transport.write()
// only enqueues (SerialBatchEventUploader resolves on buffer-add); the
// stopWork/archive latency (~200-500ms) is the drain window for the
// result POST. Closing BEFORE archive meant relying on HybridTransport's
// void-ed 3s grace period, which nothing awaits — forceExit can kill the
// socket mid-POST. Same reorder as remoteBridgeCore.ts teardown (#22803).
const teardownTransport = transport
transport = null
flushGate.drop()
if (teardownTransport) {
void teardownTransport.write(makeResultMessage(currentSessionId))
}
const stopWorkP = currentWorkId
? api
.stopWork(environmentId, currentWorkId, true)
.then(() => {
logForDebugging('[bridge:repl] Teardown: stopWork completed')
})
.catch((err: unknown) => {
logForDebugging(
`[bridge:repl] Teardown stopWork failed: ${errorMessage(err)}`,
)
})
: Promise.resolve()
// Run stopWork and archiveSession in parallel. gracefulShutdown.ts:407
// races runCleanupFunctions() against 2s (NOT the 5s outer failsafe),
// so archive is capped at 1.5s at the injection site to stay under budget.
// archiveSession is contractually no-throw; the injected implementations
// log their own success/failure internally.
await Promise.all([stopWorkP, archiveSession(currentSessionId)])
teardownTransport?.close()
logForDebugging('[bridge:repl] Teardown: transport closed')
await api.deregisterEnvironment(environmentId).catch((err: unknown) => {
logForDebugging(
`[bridge:repl] Teardown deregister failed: ${errorMessage(err)}`,
)
})
// Clear the crash-recovery pointer — explicit disconnect or clean REPL
// exit means the user is done with this session. Crash/kill-9 never
// reaches this line, leaving the pointer for next-launch recovery.
await clearBridgePointer(dir)
logForDebugging(
`[bridge:repl] Teardown complete: env=${environmentId} duration=${Date.now() - teardownStart}ms`,
)
}
// 8. Register cleanup for graceful shutdown
const unregister = registerCleanup(() => doTeardownImpl?.())
logForDebugging(
`[bridge:repl] Ready: env=${environmentId} session=${currentSessionId}`,
)
onStateChange?.('ready')
return {
get bridgeSessionId() {
return currentSessionId
},
get environmentId() {
return environmentId
},
getSSESequenceNum() {
// lastTransportSequenceNum only updates when a transport is CLOSED
// (captured at swap/onClose). During normal operation the CURRENT
// transport's live seq isn't reflected there. Merge both so callers
// (e.g. daemon persistState()) get the actual high-water mark.
const live = transport?.getLastSequenceNum() ?? 0
return Math.max(lastTransportSequenceNum, live)
},
sessionIngressUrl,
writeMessages(messages) {
// Filter to user/assistant messages that haven't already been sent.
// Two layers of dedup:
// - initialMessageUUIDs: messages sent as session creation events
// - recentPostedUUIDs: messages recently sent via POST
const filtered = messages.filter(
m =>
isEligibleBridgeMessage(m) &&
!initialMessageUUIDs.has(m.uuid) &&
!recentPostedUUIDs.has(m.uuid),
)
if (filtered.length === 0) return
// Fire onUserMessage for title derivation. Scan before the flushGate
// check — prompts are title-worthy even if they queue behind the
// initial history flush. Keeps calling on every title-worthy message
// until the callback returns true; the caller owns the policy.
if (!userMessageCallbackDone) {
for (const m of filtered) {
const text = extractTitleText(m)
if (text !== undefined && onUserMessage?.(text, currentSessionId)) {
userMessageCallbackDone = true
break
}
}
}
// Queue messages while the initial flush is in progress to prevent
// them from arriving at the server interleaved with history.
if (flushGate.enqueue(...filtered)) {
logForDebugging(
`[bridge:repl] Queued ${filtered.length} message(s) during initial flush`,
)
return
}
if (!transport) {
const types = filtered.map(m => m.type).join(',')
logForDebugging(
`[bridge:repl] Transport not configured, dropping ${filtered.length} message(s) [${types}] for session=${currentSessionId}`,
{ level: 'warn' },
)
return
}
// Track in the bounded ring buffer for echo filtering and dedup.
for (const msg of filtered) {
recentPostedUUIDs.add(msg.uuid)
}
logForDebugging(
`[bridge:repl] Sending ${filtered.length} message(s) via transport`,
)
// Convert to SDK format and send via HTTP POST (HybridTransport).
// The web UI receives them via the subscribe WebSocket.
const sdkMessages = toSDKMessages(filtered)
const events = sdkMessages.map(sdkMsg => ({
...sdkMsg,
session_id: currentSessionId,
}))
void transport.writeBatch(events)
},
writeSdkMessages(messages) {
// Daemon path: query() already yields SDKMessage, skip conversion.
// Still run echo dedup (server bounces writes back on the WS).
// No initialMessageUUIDs filter — daemon has no initial messages.
// No flushGate — daemon never starts it (no initial flush).
const filtered = messages.filter(
m => !m.uuid || !recentPostedUUIDs.has(m.uuid),
)
if (filtered.length === 0) return
if (!transport) {
logForDebugging(
`[bridge:repl] Transport not configured, dropping ${filtered.length} SDK message(s) for session=${currentSessionId}`,
{ level: 'warn' },
)
return
}
for (const msg of filtered) {
if (msg.uuid) recentPostedUUIDs.add(msg.uuid)
}
const events = filtered.map(m => ({ ...m, session_id: currentSessionId }))
void transport.writeBatch(events)
},
sendControlRequest(request: SDKControlRequest) {
if (!transport) {
logForDebugging(
'[bridge:repl] Transport not configured, skipping control_request',
)
return
}
const event = { ...request, session_id: currentSessionId }
void transport.write(event)
logForDebugging(
`[bridge:repl] Sent control_request request_id=${request.request_id}`,
)
},
sendControlResponse(response: SDKControlResponse) {
if (!transport) {
logForDebugging(
'[bridge:repl] Transport not configured, skipping control_response',
)
return
}
const event = { ...response, session_id: currentSessionId }
void transport.write(event)
logForDebugging('[bridge:repl] Sent control_response')
},
sendControlCancelRequest(requestId: string) {
if (!transport) {
logForDebugging(
'[bridge:repl] Transport not configured, skipping control_cancel_request',
)
return
}
const event = {
type: 'control_cancel_request' as const,
request_id: requestId,
session_id: currentSessionId,
}
void transport.write(event)
logForDebugging(
`[bridge:repl] Sent control_cancel_request request_id=${requestId}`,
)
},
sendResult() {
if (!transport) {
logForDebugging(
`[bridge:repl] sendResult: skipping, transport not configured session=${currentSessionId}`,
)
return
}
void transport.write(makeResultMessage(currentSessionId))
logForDebugging(
`[bridge:repl] Sent result for session=${currentSessionId}`,
)
},
async teardown() {
unregister()
await doTeardownImpl?.()
logForDebugging('[bridge:repl] Torn down')
logEvent('tengu_bridge_repl_teardown', {})
},
}
}
/**
* Persistent poll loop for work items. Runs in the background for the
* lifetime of the bridge connection.
*
* When a work item arrives, acknowledges it and calls onWorkReceived
* with the session ID and ingress token (which connects the ingress
* WebSocket). Then continues polling — the server will dispatch a new
* work item if the ingress WebSocket drops, allowing automatic
* reconnection without tearing down the bridge.
*/
async function startWorkPollLoop({
api,
getCredentials,
signal,
onStateChange,
onWorkReceived,
onEnvironmentLost,
getWsState,
isAtCapacity,
capacitySignal,
onFatalError,
getPollIntervalConfig = () => DEFAULT_POLL_CONFIG,
getHeartbeatInfo,
onHeartbeatFatal,
}: {
api: BridgeApiClient
getCredentials: () => { environmentId: string; environmentSecret: string }
signal: AbortSignal
onStateChange?: (state: BridgeState, detail?: string) => void
onWorkReceived: (
sessionId: string,
ingressToken: string,
workId: string,
useCodeSessions: boolean,
) => void
/** Called when the environment has been deleted. Returns new credentials or null. */
onEnvironmentLost?: () => Promise<{
environmentId: string
environmentSecret: string
} | null>
/** Returns the current WebSocket readyState label for diagnostic logging. */
getWsState?: () => string
/**
* Returns true when the caller cannot accept new work (transport already
* connected). When true, the loop polls at the configured at-capacity
* interval as a heartbeat only. Server-side BRIDGE_LAST_POLL_TTL is
* 4 hours — anything shorter than that is sufficient for liveness.
*/
isAtCapacity?: () => boolean
/**
* Produces a signal that aborts when capacity frees up (transport lost),
* merged with the loop signal. Used to interrupt the at-capacity sleep
* so recovery polling starts immediately.
*/
capacitySignal?: () => CapacitySignal
/** Called on unrecoverable errors (e.g. server-side expiry) to trigger full teardown. */
onFatalError?: () => void
/** Poll interval config getter — defaults to DEFAULT_POLL_CONFIG. */
getPollIntervalConfig?: () => PollIntervalConfig
/**
* Returns the current work ID and session ingress token for heartbeat.
* When null, heartbeat is not possible (no active work item).
*/
getHeartbeatInfo?: () => {
environmentId: string
workId: string
sessionToken: string
} | null
/**
* Called when heartbeatWork throws BridgeFatalError (401/403/404/410 —
* JWT expired or work item gone). Caller should tear down the transport
* + work state so isAtCapacity() flips to false and the loop fast-polls
* for the server's re-dispatched work item. When provided, the loop
* SKIPS the at-capacity backoff sleep (which would otherwise cause a
* ~10-minute dead window before recovery). When omitted, falls back to
* the backoff sleep to avoid a tight poll+heartbeat loop.
*/
onHeartbeatFatal?: (err: BridgeFatalError) => void
}): Promise {
const MAX_ENVIRONMENT_RECREATIONS = 3
logForDebugging(
`[bridge:repl] Starting work poll loop for env=${getCredentials().environmentId}`,
)
let consecutiveErrors = 0
let firstErrorTime: number | null = null
let lastPollErrorTime: number | null = null
let environmentRecreations = 0
// Set when the at-capacity sleep overruns its deadline by a large margin
// (process suspension). Consumed at the top of the next iteration to
// force one fast-poll cycle — isAtCapacity() is `transport !== null`,
// which stays true while the transport auto-reconnects, so the poll
// loop would otherwise go straight back to a 10-minute sleep on a
// transport that may be pointed at a dead socket.
let suspensionDetected = false
while (!signal.aborted) {
// Capture credentials outside try so the catch block can detect
// whether a concurrent reconnection replaced the environment.
const { environmentId: envId, environmentSecret: envSecret } =
getCredentials()
const pollConfig = getPollIntervalConfig()
try {
const work = await api.pollForWork(
envId,
envSecret,
signal,
pollConfig.reclaim_older_than_ms,
)
// A successful poll proves the env is genuinely healthy — reset the
// env-loss counter so events hours apart each start fresh. Outside
// the state-change guard below because onEnvLost's success path
// already emits 'ready'; emitting again here would be a duplicate.
// (onEnvLost returning creds does NOT reset this — that would break
// oscillation protection when the new env immediately dies.)
environmentRecreations = 0
// Reset error tracking on successful poll
if (consecutiveErrors > 0) {
logForDebugging(
`[bridge:repl] Poll recovered after ${consecutiveErrors} consecutive error(s)`,
)
consecutiveErrors = 0
firstErrorTime = null
lastPollErrorTime = null
onStateChange?.('ready')
}
if (!work) {
// Read-and-clear: after a detected suspension, skip the at-capacity
// branch exactly once. The pollForWork above already refreshed the
// server's BRIDGE_LAST_POLL_TTL; this fast cycle gives any
// re-dispatched work item a chance to land before we go back under.
const skipAtCapacityOnce = suspensionDetected
suspensionDetected = false
if (isAtCapacity?.() && capacitySignal && !skipAtCapacityOnce) {
const atCapMs = pollConfig.poll_interval_ms_at_capacity
// Heartbeat loops WITHOUT polling. When at-capacity polling is also
// enabled (atCapMs > 0), the loop tracks a deadline and breaks out
// to poll at that interval — heartbeat and poll compose instead of
// one suppressing the other. Breaks out when:
// - Poll deadline reached (atCapMs > 0 only)
// - Auth fails (JWT expired → poll refreshes tokens)
// - Capacity wake fires (transport lost → poll for new work)
// - Heartbeat config disabled (GrowthBook update)
// - Loop aborted (shutdown)
if (
pollConfig.non_exclusive_heartbeat_interval_ms > 0 &&
getHeartbeatInfo
) {
logEvent('tengu_bridge_heartbeat_mode_entered', {
heartbeat_interval_ms:
pollConfig.non_exclusive_heartbeat_interval_ms,
})
// Deadline computed once at entry — GB updates to atCapMs don't
// shift an in-flight deadline (next entry picks up the new value).
const pollDeadline = atCapMs > 0 ? Date.now() + atCapMs : null
let needsBackoff = false
let hbCycles = 0
while (
!signal.aborted &&
isAtCapacity() &&
(pollDeadline === null || Date.now() < pollDeadline)
) {
const hbConfig = getPollIntervalConfig()
if (hbConfig.non_exclusive_heartbeat_interval_ms <= 0) break
const info = getHeartbeatInfo()
if (!info) break
// Capture capacity signal BEFORE the async heartbeat call so
// a transport loss during the HTTP request is caught by the
// subsequent sleep.
const cap = capacitySignal()
try {
await api.heartbeatWork(
info.environmentId,
info.workId,
info.sessionToken,
)
} catch (err) {
logForDebugging(
`[bridge:repl:heartbeat] Failed: ${errorMessage(err)}`,
)
if (err instanceof BridgeFatalError) {
cap.cleanup()
logEvent('tengu_bridge_heartbeat_error', {
status:
err.status as unknown as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
error_type: (err.status === 401 || err.status === 403
? 'auth_failed'
: 'fatal') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
// JWT expired (401/403) or work item gone (404/410).
// Either way the current transport is dead — SSE
// reconnects and CCR writes will fail on the same
// stale token. If the caller gave us a recovery hook,
// tear down work state and skip backoff: isAtCapacity()
// flips to false, next outer-loop iteration fast-polls
// for the server's re-dispatched work item. Without
// the hook, backoff to avoid tight poll+heartbeat loop.
if (onHeartbeatFatal) {
onHeartbeatFatal(err)
logForDebugging(
`[bridge:repl:heartbeat] Fatal (status=${err.status}), work state cleared — fast-polling for re-dispatch`,
)
} else {
needsBackoff = true
}
break
}
}
hbCycles++
await sleep(
hbConfig.non_exclusive_heartbeat_interval_ms,
cap.signal,
)
cap.cleanup()
}
const exitReason = needsBackoff
? 'error'
: signal.aborted
? 'shutdown'
: !isAtCapacity()
? 'capacity_changed'
: pollDeadline !== null && Date.now() >= pollDeadline
? 'poll_due'
: 'config_disabled'
logEvent('tengu_bridge_heartbeat_mode_exited', {
reason:
exitReason as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
heartbeat_cycles: hbCycles,
})
// On auth_failed or fatal, backoff before polling to avoid a
// tight poll+heartbeat loop. Fall through to the shared sleep
// below — it's the same capacitySignal-wrapped sleep the legacy
// path uses, and both need the suspension-overrun check.
if (!needsBackoff) {
if (exitReason === 'poll_due') {
// bridgeApi throttles empty-poll logs (EMPTY_POLL_LOG_INTERVAL=100)
// so the once-per-10min poll_due poll is invisible at counter=2.
// Log it here so verification runs see both endpoints in the debug log.
logForDebugging(
`[bridge:repl] Heartbeat poll_due after ${hbCycles} cycles — falling through to pollForWork`,
)
}
continue
}
}
// At-capacity sleep — reached by both the legacy path (heartbeat
// disabled) and the heartbeat-backoff path (needsBackoff=true).
// Merged so the suspension detector covers both; previously the
// backoff path had no overrun check and could go straight back
// under for 10 min after a laptop wake. Use atCapMs when enabled,
// else the heartbeat interval as a floor (guaranteed > 0 on the
// backoff path) so heartbeat-only configs don't tight-loop.
const sleepMs =
atCapMs > 0
? atCapMs
: pollConfig.non_exclusive_heartbeat_interval_ms
if (sleepMs > 0) {
const cap = capacitySignal()
const sleepStart = Date.now()
await sleep(sleepMs, cap.signal)
cap.cleanup()
// Process-suspension detector. A setTimeout overshooting its
// deadline by 60s means the process was suspended (laptop lid,
// SIGSTOP, VM pause) — even a pathological GC pause is seconds,
// not minutes. Early aborts (wakePollLoop → cap.signal) produce
// overrun < 0 and fall through. Note: this only catches sleeps
// that outlast their deadline; WebSocketTransport's ping
// interval (10s granularity) is the primary detector for shorter
// suspensions. This is the backstop for when that detector isn't
// running (transport mid-reconnect, interval stopped).
const overrun = Date.now() - sleepStart - sleepMs
if (overrun > 60_000) {
logForDebugging(
`[bridge:repl] At-capacity sleep overran by ${Math.round(overrun / 1000)}s — process suspension detected, forcing one fast-poll cycle`,
)
logEvent('tengu_bridge_repl_suspension_detected', {
overrun_ms: overrun,
})
suspensionDetected = true
}
}
} else {
await sleep(pollConfig.poll_interval_ms_not_at_capacity, signal)
}
continue
}
// Decode before type dispatch — need the JWT for the explicit ack.
let secret
try {
secret = decodeWorkSecret(work.secret)
} catch (err) {
logForDebugging(
`[bridge:repl] Failed to decode work secret: ${errorMessage(err)}`,
)
logEvent('tengu_bridge_repl_work_secret_failed', {})
// Can't ack (needs the JWT we failed to decode). stopWork uses OAuth.
// Prevents XAUTOCLAIM re-delivering this poisoned item every cycle.
await api.stopWork(envId, work.id, false).catch(() => {})
continue
}
// Explicitly acknowledge to prevent redelivery. Non-fatal on failure:
// server re-delivers, and the onWorkReceived callback handles dedup.
logForDebugging(`[bridge:repl] Acknowledging workId=${work.id}`)
try {
await api.acknowledgeWork(envId, work.id, secret.session_ingress_token)
} catch (err) {
logForDebugging(
`[bridge:repl] Acknowledge failed workId=${work.id}: ${errorMessage(err)}`,
)
}
if (work.data.type === 'healthcheck') {
logForDebugging('[bridge:repl] Healthcheck received')
continue
}
if (work.data.type === 'session') {
const workSessionId = work.data.id
try {
validateBridgeId(workSessionId, 'session_id')
} catch {
logForDebugging(
`[bridge:repl] Invalid session_id in work: ${workSessionId}`,
)
continue
}
onWorkReceived(
workSessionId,
secret.session_ingress_token,
work.id,
secret.use_code_sessions === true,
)
logForDebugging('[bridge:repl] Work accepted, continuing poll loop')
}
} catch (err) {
if (signal.aborted) break
// Detect permanent "environment deleted" error — no amount of
// retrying will recover. Re-register a new environment instead.
// Checked BEFORE the generic BridgeFatalError bail. pollForWork uses
// validateStatus: s => s < 500, so 404 is always wrapped into a
// BridgeFatalError by handleErrorStatus() — never an axios-shaped
// error. The poll endpoint's only path param is the env ID; 404
// unambiguously means env-gone (no-work is a 200 with null body).
// The server sends error.type='not_found_error' (standard Anthropic
// API shape), not a bridge-specific string — but status===404 is
// the real signal and survives body-shape changes.
if (
err instanceof BridgeFatalError &&
err.status === 404 &&
onEnvironmentLost
) {
// If credentials have already been refreshed by a concurrent
// reconnection (e.g. WS close handler), the stale poll's error
// is expected — skip onEnvironmentLost and retry with fresh creds.
const currentEnvId = getCredentials().environmentId
if (envId !== currentEnvId) {
logForDebugging(
`[bridge:repl] Stale poll error for old env=${envId}, current env=${currentEnvId} — skipping onEnvironmentLost`,
)
consecutiveErrors = 0
firstErrorTime = null
continue
}
environmentRecreations++
logForDebugging(
`[bridge:repl] Environment deleted, attempting re-registration (attempt ${environmentRecreations}/${MAX_ENVIRONMENT_RECREATIONS})`,
)
logEvent('tengu_bridge_repl_env_lost', {
attempt: environmentRecreations,
} as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
if (environmentRecreations > MAX_ENVIRONMENT_RECREATIONS) {
logForDebugging(
`[bridge:repl] Environment re-registration limit reached (${MAX_ENVIRONMENT_RECREATIONS}), giving up`,
)
onStateChange?.(
'failed',
'Environment deleted and re-registration limit reached',
)
onFatalError?.()
break
}
onStateChange?.('reconnecting', 'environment lost, recreating session')
const newCreds = await onEnvironmentLost()
// doReconnect() makes several sequential network calls (1-5s).
// If the user triggered teardown during that window, its internal
// abort checks return false — but we need to re-check here to
// avoid emitting a spurious 'failed' + onFatalError() during
// graceful shutdown.
if (signal.aborted) break
if (newCreds) {
// Credentials are updated in the outer scope via
// reconnectEnvironmentWithSession — getCredentials() will
// return the fresh values on the next poll iteration.
// Do NOT reset environmentRecreations here — onEnvLost returning
// creds only proves we tried to fix it, not that the env is
// healthy. A successful poll (above) is the reset point; if the
// new env immediately dies again we still want the limit to fire.
consecutiveErrors = 0
firstErrorTime = null
onStateChange?.('ready')
logForDebugging(
`[bridge:repl] Re-registered environment: ${newCreds.environmentId}`,
)
continue
}
onStateChange?.(
'failed',
'Environment deleted and re-registration failed',
)
onFatalError?.()
break
}
// Fatal errors (401/403/404/410) — no point retrying
if (err instanceof BridgeFatalError) {
const isExpiry = isExpiredErrorType(err.errorType)
const isSuppressible = isSuppressible403(err)
logForDebugging(
`[bridge:repl] Fatal poll error: ${err.message} (status=${err.status}, type=${err.errorType ?? 'unknown'})${isSuppressible ? ' (suppressed)' : ''}`,
)
logEvent('tengu_bridge_repl_fatal_error', {
status: err.status,
error_type:
err.errorType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
logForDiagnosticsNoPII(
isExpiry ? 'info' : 'error',
'bridge_repl_fatal_error',
{ status: err.status, error_type: err.errorType },
)
// Cosmetic 403 errors (e.g., external_poll_sessions scope,
// environments:manage permission) — suppress user-visible error
// but always trigger teardown so cleanup runs.
if (!isSuppressible) {
onStateChange?.(
'failed',
isExpiry
? 'session expired · /remote-control to reconnect'
: err.message,
)
}
// Always trigger teardown — matches bridgeMain.ts where fatalExit=true
// is unconditional and post-loop cleanup always runs.
onFatalError?.()
break
}
const now = Date.now()
// Detect system sleep/wake: if the gap since the last poll error
// greatly exceeds the max backoff delay, the machine likely slept.
// Reset error tracking so we retry with a fresh budget instead of
// immediately giving up.
if (
lastPollErrorTime !== null &&
now - lastPollErrorTime > POLL_ERROR_MAX_DELAY_MS * 2
) {
logForDebugging(
`[bridge:repl] Detected system sleep (${Math.round((now - lastPollErrorTime) / 1000)}s gap), resetting poll error budget`,
)
logForDiagnosticsNoPII('info', 'bridge_repl_poll_sleep_detected', {
gapMs: now - lastPollErrorTime,
})
consecutiveErrors = 0
firstErrorTime = null
}
lastPollErrorTime = now
consecutiveErrors++
if (firstErrorTime === null) {
firstErrorTime = now
}
const elapsed = now - firstErrorTime
const httpStatus = extractHttpStatus(err)
const errMsg = describeAxiosError(err)
const wsLabel = getWsState?.() ?? 'unknown'
logForDebugging(
`[bridge:repl] Poll error (attempt ${consecutiveErrors}, elapsed ${Math.round(elapsed / 1000)}s, ws=${wsLabel}): ${errMsg}`,
)
logEvent('tengu_bridge_repl_poll_error', {
status: httpStatus,
consecutiveErrors,
elapsedMs: elapsed,
} as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
// Only transition to 'reconnecting' on the first error — stay
// there until a successful poll (avoid flickering the UI state).
if (consecutiveErrors === 1) {
onStateChange?.('reconnecting', errMsg)
}
// Give up after continuous failures
if (elapsed >= POLL_ERROR_GIVE_UP_MS) {
logForDebugging(
`[bridge:repl] Poll failures exceeded ${POLL_ERROR_GIVE_UP_MS / 1000}s (${consecutiveErrors} errors), giving up`,
)
logForDiagnosticsNoPII('info', 'bridge_repl_poll_give_up')
logEvent('tengu_bridge_repl_poll_give_up', {
consecutiveErrors,
elapsedMs: elapsed,
lastStatus: httpStatus,
} as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
onStateChange?.('failed', 'connection to server lost')
break
}
// Exponential backoff: 2s → 4s → 8s → 16s → 32s → 60s (cap)
const backoff = Math.min(
POLL_ERROR_INITIAL_DELAY_MS * 2 ** (consecutiveErrors - 1),
POLL_ERROR_MAX_DELAY_MS,
)
// The poll_due heartbeat-loop exit leaves a healthy lease exposed to
// this backoff path. Heartbeat before each sleep so /poll outages
// (the VerifyEnvironmentSecretAuth DB path heartbeat was introduced to
// avoid) don't kill the 300s lease TTL.
if (getPollIntervalConfig().non_exclusive_heartbeat_interval_ms > 0) {
const info = getHeartbeatInfo?.()
if (info) {
try {
await api.heartbeatWork(
info.environmentId,
info.workId,
info.sessionToken,
)
} catch {
// Best-effort — if heartbeat also fails the lease dies, same as
// pre-poll_due behavior (where the only heartbeat-loop exits were
// ones where the lease was already dying).
}
}
}
await sleep(backoff, signal)
}
}
logForDebugging(
`[bridge:repl] Work poll loop ended (aborted=${signal.aborted}) env=${getCredentials().environmentId}`,
)
}
// Exported for testing only
export {
startWorkPollLoop as _startWorkPollLoopForTesting,
POLL_ERROR_INITIAL_DELAY_MS as _POLL_ERROR_INITIAL_DELAY_MS_ForTesting,
POLL_ERROR_MAX_DELAY_MS as _POLL_ERROR_MAX_DELAY_MS_ForTesting,
POLL_ERROR_GIVE_UP_MS as _POLL_ERROR_GIVE_UP_MS_ForTesting,
}
================================================
FILE: restored-src/src/bridge/replBridgeHandle.ts
================================================
import { updateSessionBridgeId } from '../utils/concurrentSessions.js'
import type { ReplBridgeHandle } from './replBridge.js'
import { toCompatSessionId } from './sessionIdCompat.js'
/**
* Global pointer to the active REPL bridge handle, so callers outside
* useReplBridge's React tree (tools, slash commands) can invoke handle methods
* like subscribePR. Same one-bridge-per-process justification as bridgeDebug.ts
* — the handle's closure captures the sessionId and getAccessToken that created
* the session, and re-deriving those independently (BriefTool/upload.ts pattern)
* risks staging/prod token divergence.
*
* Set from useReplBridge.tsx when init completes; cleared on teardown.
*/
let handle: ReplBridgeHandle | null = null
export function setReplBridgeHandle(h: ReplBridgeHandle | null): void {
handle = h
// Publish (or clear) our bridge session ID in the session record so other
// local peers can dedup us out of their bridge list — local is preferred.
void updateSessionBridgeId(getSelfBridgeCompatId() ?? null).catch(() => {})
}
export function getReplBridgeHandle(): ReplBridgeHandle | null {
return handle
}
/**
* Our own bridge session ID in the session_* compat format the API returns
* in /v1/sessions responses — or undefined if bridge isn't connected.
*/
export function getSelfBridgeCompatId(): string | undefined {
const h = getReplBridgeHandle()
return h ? toCompatSessionId(h.bridgeSessionId) : undefined
}
================================================
FILE: restored-src/src/bridge/replBridgeTransport.ts
================================================
import type { StdoutMessage } from 'src/entrypoints/sdk/controlTypes.js'
import { CCRClient } from '../cli/transports/ccrClient.js'
import type { HybridTransport } from '../cli/transports/HybridTransport.js'
import { SSETransport } from '../cli/transports/SSETransport.js'
import { logForDebugging } from '../utils/debug.js'
import { errorMessage } from '../utils/errors.js'
import { updateSessionIngressAuthToken } from '../utils/sessionIngressAuth.js'
import type { SessionState } from '../utils/sessionState.js'
import { registerWorker } from './workSecret.js'
/**
* Transport abstraction for replBridge. Covers exactly the surface that
* replBridge.ts uses against HybridTransport so the v1/v2 choice is
* confined to the construction site.
*
* - v1: HybridTransport (WS reads + POST writes to Session-Ingress)
* - v2: SSETransport (reads) + CCRClient (writes to CCR v2 /worker/*)
*
* The v2 write path goes through CCRClient.writeEvent → SerialBatchEventUploader,
* NOT through SSETransport.write() — SSETransport.write() targets the
* Session-Ingress POST URL shape, which is wrong for CCR v2.
*/
export type ReplBridgeTransport = {
write(message: StdoutMessage): Promise
writeBatch(messages: StdoutMessage[]): Promise
close(): void
isConnectedStatus(): boolean
getStateLabel(): string
setOnData(callback: (data: string) => void): void
setOnClose(callback: (closeCode?: number) => void): void
setOnConnect(callback: () => void): void
connect(): void
/**
* High-water mark of the underlying read stream's event sequence numbers.
* replBridge reads this before swapping transports so the new one can
* resume from where the old one left off (otherwise the server replays
* the entire session history from seq 0).
*
* v1 returns 0 — Session-Ingress WS doesn't use SSE sequence numbers;
* replay-on-reconnect is handled by the server-side message cursor.
*/
getLastSequenceNum(): number
/**
* Monotonic count of batches dropped via maxConsecutiveFailures.
* Snapshot before writeBatch() and compare after to detect silent drops
* (writeBatch() resolves normally even when batches were dropped).
* v2 returns 0 — the v2 write path doesn't set maxConsecutiveFailures.
*/
readonly droppedBatchCount: number
/**
* PUT /worker state (v2 only; v1 is a no-op). `requires_action` tells
* the backend a permission prompt is pending — claude.ai shows the
* "waiting for input" indicator. REPL/daemon callers don't need this
* (user watches the REPL locally); multi-session worker callers do.
*/
reportState(state: SessionState): void
/** PUT /worker external_metadata (v2 only; v1 is a no-op). */
reportMetadata(metadata: Record): void
/**
* POST /worker/events/{id}/delivery (v2 only; v1 is a no-op). Populates
* CCR's processing_at/processed_at columns. `received` is auto-fired by
* CCRClient on every SSE frame and is not exposed here.
*/
reportDelivery(eventId: string, status: 'processing' | 'processed'): void
/**
* Drain the write queue before close() (v2 only; v1 resolves
* immediately — HybridTransport POSTs are already awaited per-write).
*/
flush(): Promise
}
/**
* v1 adapter: HybridTransport already has the full surface (it extends
* WebSocketTransport which has setOnConnect + getStateLabel). This is a
* no-op wrapper that exists only so replBridge's `transport` variable
* has a single type.
*/
export function createV1ReplTransport(
hybrid: HybridTransport,
): ReplBridgeTransport {
return {
write: msg => hybrid.write(msg),
writeBatch: msgs => hybrid.writeBatch(msgs),
close: () => hybrid.close(),
isConnectedStatus: () => hybrid.isConnectedStatus(),
getStateLabel: () => hybrid.getStateLabel(),
setOnData: cb => hybrid.setOnData(cb),
setOnClose: cb => hybrid.setOnClose(cb),
setOnConnect: cb => hybrid.setOnConnect(cb),
connect: () => void hybrid.connect(),
// v1 Session-Ingress WS doesn't use SSE sequence numbers; replay
// semantics are different. Always return 0 so the seq-num carryover
// logic in replBridge is a no-op for v1.
getLastSequenceNum: () => 0,
get droppedBatchCount() {
return hybrid.droppedBatchCount
},
reportState: () => {},
reportMetadata: () => {},
reportDelivery: () => {},
flush: () => Promise.resolve(),
}
}
/**
* v2 adapter: wrap SSETransport (reads) + CCRClient (writes, heartbeat,
* state, delivery tracking).
*
* Auth: v2 endpoints validate the JWT's session_id claim (register_worker.go:32)
* and worker role (environment_auth.py:856). OAuth tokens have neither.
* This is the inverse of the v1 replBridge path, which deliberately uses OAuth.
* The JWT is refreshed when the poll loop re-dispatches work — the caller
* invokes createV2ReplTransport again with the fresh token.
*
* Registration happens here (not in the caller) so the entire v2 handshake
* is one async step. registerWorker failure propagates — replBridge will
* catch it and stay on the poll loop.
*/
export async function createV2ReplTransport(opts: {
sessionUrl: string
ingressToken: string
sessionId: string
/**
* SSE sequence-number high-water mark from the previous transport.
* Passed to the new SSETransport so its first connect() sends
* from_sequence_num / Last-Event-ID and the server resumes from where
* the old stream left off. Without this, every transport swap asks the
* server to replay the entire session history from seq 0.
*/
initialSequenceNum?: number
/**
* Worker epoch from POST /bridge response. When provided, the server
* already bumped epoch (the /bridge call IS the register — see server
* PR #293280). When omitted (v1 CCR-v2 path via replBridge.ts poll loop),
* call registerWorker as before.
*/
epoch?: number
/** CCRClient heartbeat interval. Defaults to 20s when omitted. */
heartbeatIntervalMs?: number
/** ±fraction per-beat jitter. Defaults to 0 (no jitter) when omitted. */
heartbeatJitterFraction?: number
/**
* When true, skip opening the SSE read stream — only the CCRClient write
* path is activated. Use for mirror-mode attachments that forward events
* but never receive inbound prompts or control requests.
*/
outboundOnly?: boolean
/**
* Per-instance auth header source. When provided, CCRClient + SSETransport
* read auth from this closure instead of the process-wide
* CLAUDE_CODE_SESSION_ACCESS_TOKEN env var. Required for callers managing
* multiple concurrent sessions — the env-var path stomps across sessions.
* When omitted, falls back to the env var (single-session callers).
*/
getAuthToken?: () => string | undefined
}): Promise {
const {
sessionUrl,
ingressToken,
sessionId,
initialSequenceNum,
getAuthToken,
} = opts
// Auth header builder. If getAuthToken is provided, read from it
// (per-instance, multi-session safe). Otherwise write ingressToken to
// the process-wide env var (legacy single-session path — CCRClient's
// default getAuthHeaders reads it via getSessionIngressAuthHeaders).
let getAuthHeaders: (() => Record) | undefined
if (getAuthToken) {
getAuthHeaders = (): Record => {
const token = getAuthToken()
if (!token) return {}
return { Authorization: `Bearer ${token}` }
}
} else {
// CCRClient.request() and SSETransport.connect() both read auth via
// getSessionIngressAuthHeaders() → this env var. Set it before either
// touches the network.
updateSessionIngressAuthToken(ingressToken)
}
const epoch = opts.epoch ?? (await registerWorker(sessionUrl, ingressToken))
logForDebugging(
`[bridge:repl] CCR v2: worker sessionId=${sessionId} epoch=${epoch}${opts.epoch !== undefined ? ' (from /bridge)' : ' (via registerWorker)'}`,
)
// Derive SSE stream URL. Same logic as transportUtils.ts:26-33 but
// starting from an http(s) base instead of a --sdk-url that might be ws://.
const sseUrl = new URL(sessionUrl)
sseUrl.pathname = sseUrl.pathname.replace(/\/$/, '') + '/worker/events/stream'
const sse = new SSETransport(
sseUrl,
{},
sessionId,
undefined,
initialSequenceNum,
getAuthHeaders,
)
let onCloseCb: ((closeCode?: number) => void) | undefined
const ccr = new CCRClient(sse, new URL(sessionUrl), {
getAuthHeaders,
heartbeatIntervalMs: opts.heartbeatIntervalMs,
heartbeatJitterFraction: opts.heartbeatJitterFraction,
// Default is process.exit(1) — correct for spawn-mode children. In-process,
// that kills the REPL. Close instead: replBridge's onClose wakes the poll
// loop, which picks up the server's re-dispatch (with fresh epoch).
onEpochMismatch: () => {
logForDebugging(
'[bridge:repl] CCR v2: epoch superseded (409) — closing for poll-loop recovery',
)
// Close resources in a try block so the throw always executes.
// If ccr.close() or sse.close() throw, we still need to unwind
// the caller (request()) — otherwise handleEpochMismatch's `never`
// return type is violated at runtime and control falls through.
try {
ccr.close()
sse.close()
onCloseCb?.(4090)
} catch (closeErr: unknown) {
logForDebugging(
`[bridge:repl] CCR v2: error during epoch-mismatch cleanup: ${errorMessage(closeErr)}`,
{ level: 'error' },
)
}
// Don't return — the calling request() code continues after the 409
// branch, so callers see the logged warning and a false return. We
// throw to unwind; the uploaders catch it as a send failure.
throw new Error('epoch superseded')
},
})
// CCRClient's constructor wired sse.setOnEvent → reportDelivery('received').
// remoteIO.ts additionally sends 'processing'/'processed' via
// setCommandLifecycleListener, which the in-process query loop fires. This
// transport's only caller (replBridge/daemonBridge) has no such wiring — the
// daemon's agent child is a separate process (ProcessTransport), and its
// notifyCommandLifecycle calls fire with listener=null in its own module
// scope. So events stay at 'received' forever, and reconnectSession re-queues
// them on every daemon restart (observed: 21→24→25 phantom prompts as
// "user sent a new message while you were working" system-reminders).
//
// Fix: ACK 'processed' immediately alongside 'received'. The window between
// SSE receipt and transcript-write is narrow (queue → SDK → child stdin →
// model); a crash there loses one prompt vs. the observed N-prompt flood on
// every restart. Overwrite the constructor's wiring to do both — setOnEvent
// replaces, not appends (SSETransport.ts:658).
sse.setOnEvent(event => {
ccr.reportDelivery(event.event_id, 'received')
ccr.reportDelivery(event.event_id, 'processed')
})
// Both sse.connect() and ccr.initialize() are deferred to connect() below.
// replBridge's calling order is newTransport → setOnConnect → setOnData →
// setOnClose → connect(), and both calls need those callbacks wired first:
// sse.connect() opens the stream (events flow to onData/onClose immediately),
// and ccr.initialize().then() fires onConnectCb.
//
// onConnect fires once ccr.initialize() resolves. Writes go via
// CCRClient HTTP POST (SerialBatchEventUploader), not SSE, so the
// write path is ready the moment workerEpoch is set. SSE.connect()
// awaits its read loop and never resolves — don't gate on it.
// The SSE stream opens in parallel (~30ms) and starts delivering
// inbound events via setOnData; outbound doesn't need to wait for it.
let onConnectCb: (() => void) | undefined
let ccrInitialized = false
let closed = false
return {
write(msg) {
return ccr.writeEvent(msg)
},
async writeBatch(msgs) {
// SerialBatchEventUploader already batches internally (maxBatchSize=100);
// sequential enqueue preserves order and the uploader coalesces.
// Check closed between writes to avoid sending partial batches after
// transport teardown (epoch mismatch, SSE drop).
for (const m of msgs) {
if (closed) break
await ccr.writeEvent(m)
}
},
close() {
closed = true
ccr.close()
sse.close()
},
isConnectedStatus() {
// Write-readiness, not read-readiness — replBridge checks this
// before calling writeBatch. SSE open state is orthogonal.
return ccrInitialized
},
getStateLabel() {
// SSETransport doesn't expose its state string; synthesize from
// what we can observe. replBridge only uses this for debug logging.
if (sse.isClosedStatus()) return 'closed'
if (sse.isConnectedStatus()) return ccrInitialized ? 'connected' : 'init'
return 'connecting'
},
setOnData(cb) {
sse.setOnData(cb)
},
setOnClose(cb) {
onCloseCb = cb
// SSE reconnect-budget exhaustion fires onClose(undefined) — map to
// 4092 so ws_closed telemetry can distinguish it from HTTP-status
// closes (SSETransport:280 passes response.status). Stop CCRClient's
// heartbeat timer before notifying replBridge. (sse.close() doesn't
// invoke this, so the epoch-mismatch path above isn't double-firing.)
sse.setOnClose(code => {
ccr.close()
cb(code ?? 4092)
})
},
setOnConnect(cb) {
onConnectCb = cb
},
getLastSequenceNum() {
return sse.getLastSequenceNum()
},
// v2 write path (CCRClient) doesn't set maxConsecutiveFailures — no drops.
droppedBatchCount: 0,
reportState(state) {
ccr.reportState(state)
},
reportMetadata(metadata) {
ccr.reportMetadata(metadata)
},
reportDelivery(eventId, status) {
ccr.reportDelivery(eventId, status)
},
flush() {
return ccr.flush()
},
connect() {
// Outbound-only: skip the SSE read stream entirely — no inbound
// events to receive, no delivery ACKs to send. Only the CCRClient
// write path (POST /worker/events) and heartbeat are needed.
if (!opts.outboundOnly) {
// Fire-and-forget — SSETransport.connect() awaits readStream()
// (the read loop) and only resolves on stream close/error. The
// spawn-mode path in remoteIO.ts does the same void discard.
void sse.connect()
}
void ccr.initialize(epoch).then(
() => {
ccrInitialized = true
logForDebugging(
`[bridge:repl] v2 transport ready for writes (epoch=${epoch}, sse=${sse.isConnectedStatus() ? 'open' : 'opening'})`,
)
onConnectCb?.()
},
(err: unknown) => {
logForDebugging(
`[bridge:repl] CCR v2 initialize failed: ${errorMessage(err)}`,
{ level: 'error' },
)
// Close transport resources and notify replBridge via onClose
// so the poll loop can retry on the next work dispatch.
// Without this callback, replBridge never learns the transport
// failed to initialize and sits with transport === null forever.
ccr.close()
sse.close()
onCloseCb?.(4091) // 4091 = init failure, distinguishable from 4090 epoch mismatch
},
)
},
}
}
================================================
FILE: restored-src/src/bridge/sessionIdCompat.ts
================================================
/**
* Session ID tag translation helpers for the CCR v2 compat layer.
*
* Lives in its own file (rather than workSecret.ts) so that sessionHandle.ts
* and replBridgeTransport.ts (bridge.mjs entry points) can import from
* workSecret.ts without pulling in these retag functions.
*
* The isCseShimEnabled kill switch is injected via setCseShimGate() to avoid
* a static import of bridgeEnabled.ts → growthbook.ts → config.ts — all
* banned from the sdk.mjs bundle (scripts/build-agent-sdk.sh). Callers that
* already import bridgeEnabled.ts register the gate; the SDK path never does,
* so the shim defaults to active (matching isCseShimEnabled()'s own default).
*/
let _isCseShimEnabled: (() => boolean) | undefined
/**
* Register the GrowthBook gate for the cse_ shim. Called from bridge
* init code that already imports bridgeEnabled.ts.
*/
export function setCseShimGate(gate: () => boolean): void {
_isCseShimEnabled = gate
}
/**
* Re-tag a `cse_*` session ID to `session_*` for use with the v1 compat API.
*
* Worker endpoints (/v1/code/sessions/{id}/worker/*) want `cse_*`; that's
* what the work poll delivers. Client-facing compat endpoints
* (/v1/sessions/{id}, /v1/sessions/{id}/archive, /v1/sessions/{id}/events)
* want `session_*` — compat/convert.go:27 validates TagSession. Same UUID,
* different costume. No-op for IDs that aren't `cse_*`.
*
* bridgeMain holds one sessionId variable for both worker registration and
* session-management calls. It arrives as `cse_*` from the work poll under
* the compat gate, so archiveSession/fetchSessionTitle need this re-tag.
*/
export function toCompatSessionId(id: string): string {
if (!id.startsWith('cse_')) return id
if (_isCseShimEnabled && !_isCseShimEnabled()) return id
return 'session_' + id.slice('cse_'.length)
}
/**
* Re-tag a `session_*` session ID to `cse_*` for infrastructure-layer calls.
*
* Inverse of toCompatSessionId. POST /v1/environments/{id}/bridge/reconnect
* lives below the compat layer: once ccr_v2_compat_enabled is on server-side,
* it looks sessions up by their infra tag (`cse_*`). createBridgeSession still
* returns `session_*` (compat/convert.go:41) and that's what bridge-pointer
* stores — so perpetual reconnect passes the wrong costume and gets "Session
* not found" back. Same UUID, wrong tag. No-op for IDs that aren't `session_*`.
*/
export function toInfraSessionId(id: string): string {
if (!id.startsWith('session_')) return id
return 'cse_' + id.slice('session_'.length)
}
================================================
FILE: restored-src/src/bridge/sessionRunner.ts
================================================
import { type ChildProcess, spawn } from 'child_process'
import { createWriteStream, type WriteStream } from 'fs'
import { tmpdir } from 'os'
import { dirname, join } from 'path'
import { createInterface } from 'readline'
import { jsonParse, jsonStringify } from '../utils/slowOperations.js'
import { debugTruncate } from './debugUtils.js'
import type {
SessionActivity,
SessionDoneStatus,
SessionHandle,
SessionSpawner,
SessionSpawnOpts,
} from './types.js'
const MAX_ACTIVITIES = 10
const MAX_STDERR_LINES = 10
/**
* Sanitize a session ID for use in file names.
* Strips any characters that could cause path traversal (e.g. `../`, `/`)
* or other filesystem issues, replacing them with underscores.
*/
export function safeFilenameId(id: string): string {
return id.replace(/[^a-zA-Z0-9_-]/g, '_')
}
/**
* A control_request emitted by the child CLI when it needs permission to
* execute a **specific** tool invocation (not a general capability check).
* The bridge forwards this to the server so the user can approve/deny.
*/
export type PermissionRequest = {
type: 'control_request'
request_id: string
request: {
/** Per-invocation permission check — "may I run this tool with these inputs?" */
subtype: 'can_use_tool'
tool_name: string
input: Record
tool_use_id: string
}
}
type SessionSpawnerDeps = {
execPath: string
/**
* Arguments that must precede the CLI flags when spawning. Empty for
* compiled binaries (where execPath is the claude binary itself); contains
* the script path (process.argv[1]) for npm installs where execPath is the
* node runtime. Without this, node sees --sdk-url as a node option and
* exits with "bad option: --sdk-url" (see anthropics/claude-code#28334).
*/
scriptArgs: string[]
env: NodeJS.ProcessEnv
verbose: boolean
sandbox: boolean
debugFile?: string
permissionMode?: string
onDebug: (msg: string) => void
onActivity?: (sessionId: string, activity: SessionActivity) => void
onPermissionRequest?: (
sessionId: string,
request: PermissionRequest,
accessToken: string,
) => void
}
/** Map tool names to human-readable verbs for the status display. */
const TOOL_VERBS: Record = {
Read: 'Reading',
Write: 'Writing',
Edit: 'Editing',
MultiEdit: 'Editing',
Bash: 'Running',
Glob: 'Searching',
Grep: 'Searching',
WebFetch: 'Fetching',
WebSearch: 'Searching',
Task: 'Running task',
FileReadTool: 'Reading',
FileWriteTool: 'Writing',
FileEditTool: 'Editing',
GlobTool: 'Searching',
GrepTool: 'Searching',
BashTool: 'Running',
NotebookEditTool: 'Editing notebook',
LSP: 'LSP',
}
function toolSummary(name: string, input: Record): string {
const verb = TOOL_VERBS[name] ?? name
const target =
(input.file_path as string) ??
(input.filePath as string) ??
(input.pattern as string) ??
(input.command as string | undefined)?.slice(0, 60) ??
(input.url as string) ??
(input.query as string) ??
''
if (target) {
return `${verb} ${target}`
}
return verb
}
function extractActivities(
line: string,
sessionId: string,
onDebug: (msg: string) => void,
): SessionActivity[] {
let parsed: unknown
try {
parsed = jsonParse(line)
} catch {
return []
}
if (!parsed || typeof parsed !== 'object') {
return []
}
const msg = parsed as Record
const activities: SessionActivity[] = []
const now = Date.now()
switch (msg.type) {
case 'assistant': {
const message = msg.message as Record | undefined
if (!message) break
const content = message.content
if (!Array.isArray(content)) break
for (const block of content) {
if (!block || typeof block !== 'object') continue
const b = block as Record
if (b.type === 'tool_use') {
const name = (b.name as string) ?? 'Tool'
const input = (b.input as Record) ?? {}
const summary = toolSummary(name, input)
activities.push({
type: 'tool_start',
summary,
timestamp: now,
})
onDebug(
`[bridge:activity] sessionId=${sessionId} tool_use name=${name} ${inputPreview(input)}`,
)
} else if (b.type === 'text') {
const text = (b.text as string) ?? ''
if (text.length > 0) {
activities.push({
type: 'text',
summary: text.slice(0, 80),
timestamp: now,
})
onDebug(
`[bridge:activity] sessionId=${sessionId} text "${text.slice(0, 100)}"`,
)
}
}
}
break
}
case 'result': {
const subtype = msg.subtype as string | undefined
if (subtype === 'success') {
activities.push({
type: 'result',
summary: 'Session completed',
timestamp: now,
})
onDebug(
`[bridge:activity] sessionId=${sessionId} result subtype=success`,
)
} else if (subtype) {
const errors = msg.errors as string[] | undefined
const errorSummary = errors?.[0] ?? `Error: ${subtype}`
activities.push({
type: 'error',
summary: errorSummary,
timestamp: now,
})
onDebug(
`[bridge:activity] sessionId=${sessionId} result subtype=${subtype} error="${errorSummary}"`,
)
} else {
onDebug(
`[bridge:activity] sessionId=${sessionId} result subtype=undefined`,
)
}
break
}
default:
break
}
return activities
}
/**
* Extract plain text from a replayed SDKUserMessage NDJSON line. Returns the
* trimmed text if this looks like a real human-authored message, otherwise
* undefined so the caller keeps waiting for the first real message.
*/
function extractUserMessageText(
msg: Record,
): string | undefined {
// Skip tool-result user messages (wrapped subagent results) and synthetic
// caveat messages — neither is human-authored.
if (msg.parent_tool_use_id != null || msg.isSynthetic || msg.isReplay)
return undefined
const message = msg.message as Record | undefined
const content = message?.content
let text: string | undefined
if (typeof content === 'string') {
text = content
} else if (Array.isArray(content)) {
for (const block of content) {
if (
block &&
typeof block === 'object' &&
(block as Record).type === 'text'
) {
text = (block as Record).text as string | undefined
break
}
}
}
text = text?.trim()
return text ? text : undefined
}
/** Build a short preview of tool input for debug logging. */
function inputPreview(input: Record