Repository: ntegrals/openbrowser
Branch: master
Commit: 622f36985df6
Files: 119
Total size: 697.5 KB

Directory structure:
gitextract_rxlca7z1/

├── .github/
│   ├── CONTRIBUTING.md
│   └── workflows/
│       └── ci.yml
├── .gitignore
├── LICENSE
├── README.md
├── biome.json
├── bunfig.toml
├── package.json
├── packages/
│   ├── cli/
│   │   ├── package.json
│   │   ├── src/
│   │   │   ├── commands/
│   │   │   │   ├── click.ts
│   │   │   │   ├── eval.ts
│   │   │   │   ├── extract.ts
│   │   │   │   ├── interactive.ts
│   │   │   │   ├── open.ts
│   │   │   │   ├── run.ts
│   │   │   │   ├── screenshot.ts
│   │   │   │   ├── sessions.ts
│   │   │   │   ├── state.ts
│   │   │   │   └── type.ts
│   │   │   ├── display.ts
│   │   │   ├── globals.ts
│   │   │   ├── index.ts
│   │   │   ├── protocol.ts
│   │   │   ├── server.ts
│   │   │   └── sessions.ts
│   │   └── tsconfig.json
│   ├── core/
│   │   ├── package.json
│   │   ├── src/
│   │   │   ├── agent/
│   │   │   │   ├── agent.test.ts
│   │   │   │   ├── agent.ts
│   │   │   │   ├── conversation/
│   │   │   │   │   ├── service.ts
│   │   │   │   │   ├── types.ts
│   │   │   │   │   └── utils.ts
│   │   │   │   ├── conversation.test.ts
│   │   │   │   ├── evaluator.ts
│   │   │   │   ├── index.ts
│   │   │   │   ├── instructions/
│   │   │   │   │   ├── instructions-compact.md
│   │   │   │   │   ├── instructions-direct.md
│   │   │   │   │   └── instructions.md
│   │   │   │   ├── instructions.ts
│   │   │   │   ├── replay-recorder.ts
│   │   │   │   ├── stall-detector.test.ts
│   │   │   │   ├── stall-detector.ts
│   │   │   │   └── types.ts
│   │   │   ├── bridge/
│   │   │   │   ├── adapter.ts
│   │   │   │   ├── client.ts
│   │   │   │   ├── index.ts
│   │   │   │   ├── mcp-types.ts
│   │   │   │   ├── server.test.ts
│   │   │   │   └── server.ts
│   │   │   ├── commands/
│   │   │   │   ├── catalog/
│   │   │   │   │   ├── catalog.ts
│   │   │   │   │   └── types.ts
│   │   │   │   ├── catalog.test.ts
│   │   │   │   ├── executor.test.ts
│   │   │   │   ├── executor.ts
│   │   │   │   ├── extraction/
│   │   │   │   │   └── extractor.ts
│   │   │   │   ├── index.ts
│   │   │   │   ├── types.ts
│   │   │   │   └── utils.ts
│   │   │   ├── config/
│   │   │   │   ├── config.ts
│   │   │   │   ├── index.ts
│   │   │   │   └── types.ts
│   │   │   ├── errors.ts
│   │   │   ├── index.ts
│   │   │   ├── logging.ts
│   │   │   ├── metering/
│   │   │   │   ├── index.ts
│   │   │   │   ├── tracker.test.ts
│   │   │   │   ├── tracker.ts
│   │   │   │   └── types.ts
│   │   │   ├── model/
│   │   │   │   ├── adapters/
│   │   │   │   │   └── vercel.ts
│   │   │   │   ├── index.ts
│   │   │   │   ├── interface.ts
│   │   │   │   ├── messages.ts
│   │   │   │   ├── schema-optimizer.ts
│   │   │   │   └── types.ts
│   │   │   ├── page/
│   │   │   │   ├── content-extractor.ts
│   │   │   │   ├── index.ts
│   │   │   │   ├── page-analyzer.test.ts
│   │   │   │   ├── page-analyzer.ts
│   │   │   │   ├── renderer/
│   │   │   │   │   ├── interactive-elements.ts
│   │   │   │   │   ├── layer-order.ts
│   │   │   │   │   └── tree-renderer.ts
│   │   │   │   ├── renderer.test.ts
│   │   │   │   ├── snapshot-builder.ts
│   │   │   │   └── types.ts
│   │   │   ├── sandbox/
│   │   │   │   ├── file-access.ts
│   │   │   │   └── index.ts
│   │   │   ├── telemetry.ts
│   │   │   ├── types.ts
│   │   │   ├── utils.ts
│   │   │   └── viewport/
│   │   │       ├── event-hub.ts
│   │   │       ├── events.ts
│   │   │       ├── guard-base.ts
│   │   │       ├── guards/
│   │   │       │   ├── blank-page.ts
│   │   │       │   ├── crash.ts
│   │   │       │   ├── default-handler.ts
│   │   │       │   ├── downloads.ts
│   │   │       │   ├── har-capture.ts
│   │   │       │   ├── local-instance.ts
│   │   │       │   ├── page-ready.ts
│   │   │       │   ├── permissions.ts
│   │   │       │   ├── persistence.ts
│   │   │       │   ├── popups.ts
│   │   │       │   ├── screenshot.ts
│   │   │       │   ├── url-policy.ts
│   │   │       │   └── video-capture.ts
│   │   │       ├── index.ts
│   │   │       ├── launch-profile.test.ts
│   │   │       ├── launch-profile.ts
│   │   │       ├── types.ts
│   │   │       ├── viewport.ts
│   │   │       └── visual-tracer.ts
│   │   └── tsconfig.json
│   └── sandbox/
│       ├── package.json
│       ├── src/
│       │   ├── index.ts
│       │   ├── sandbox.ts
│       │   └── types.ts
│       └── tsconfig.json
├── tsconfig.base.json
└── tsconfig.json

================================================
FILE CONTENTS
================================================

================================================
FILE: .github/CONTRIBUTING.md
================================================
# Contributing to Open Browser

Thank you for your interest in contributing!

## Getting Started

1. Fork the repository
2. Clone your fork: `git clone https://github.com/YOUR_USERNAME/openbrowser.git`
3. Install dependencies: `bun install`
4. Create a branch: `git checkout -b my-feature`
5. Make your changes and add tests
6. Run tests: `bun run test`
7. Submit a pull request

## Code Style

We use [Biome](https://biomejs.dev/) for formatting and linting. Run `bun run format` before committing.

## Reporting Issues

Please use GitHub Issues to report bugs or request features. Include:
- Steps to reproduce
- Expected vs actual behavior
- Browser and OS version


================================================
FILE: .github/workflows/ci.yml
================================================
name: CI

on:
  push:
    branches: [main]
  pull_request:
    branches: [main]

jobs:
  test:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: oven-sh/setup-bun@v2
      - run: bun install
      - run: bun run build
      - run: bun run test
      - run: bun run lint


================================================
FILE: .gitignore
================================================
node_modules/
dist/
.env
*.tsbuildinfo
.DS_Store
traces/
coverage/
recordings/
tmp/
*.log


================================================
FILE: LICENSE
================================================
MIT License

Copyright (c) 2024-2026 Open Browser Contributors

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: README.md
================================================
<h1 align="center">Open Browser</h1>

<p align="center">
  <b>AI-powered autonomous web browsing framework for TypeScript.</b>
</p>

<p align="center">
  <a href="https://github.com/ntegrals/openbrowser/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-blue.svg" alt="License"></a>
  <a href="https://github.com/ntegrals/openbrowser"><img src="https://img.shields.io/github/stars/ntegrals/openbrowser?style=social" alt="GitHub stars"></a>
</p>

<img src="./media/header.png" alt="Header"></a>

---

Give an AI agent a browser. It clicks, types, navigates, and extracts data — autonomously completing tasks on any website. Built on Playwright with first-class support for OpenAI, Anthropic, and Google models.

> **Production-ready since v1.0.** Contributions welcome.

## Why Open Browser?

- **Autonomous agents**: Describe a task in natural language, and an AI agent navigates the web to complete it — clicking, typing, scrolling, and extracting data without manual scripting
- **Multi-model support**: Works with OpenAI, Anthropic, and Google out of the box via the Vercel AI SDK — swap models with a single flag
- **Interactive REPL**: Drop into a live browser session and issue commands interactively — great for debugging, prototyping, and exploration
- **Sandboxed execution**: Run agents in resource-limited environments with CPU/memory monitoring, timeouts, and domain restrictions
- **Production-ready**: Stall detection, cost tracking, session management, replay recording, and comprehensive error handling
- **Open source**: MIT licensed, fully extensible, bring your own API keys

## Quick Start

```bash
# Install dependencies
bun install

# Set up your API keys
cp .env.example .env
# Edit .env with your API keys

# Run an agent
bun run open-browser run "Find the top story on Hacker News and summarize it"

# Or open a browser interactively
bun run open-browser interactive
```

## Architecture

Open Browser is a monorepo with three packages:

| Package                     | Description                                                                |
| --------------------------- | -------------------------------------------------------------------------- |
| **`open-browser`**          | Core library — agent logic, browser control, DOM analysis, LLM integration |
| **`@open-browser/cli`**     | Command-line interface for running agents and browser commands             |
| **`@open-browser/sandbox`** | Sandboxed execution with resource limits and monitoring                    |

## CLI Commands

### Run an AI Agent

```bash
open-browser run <task> [options]
```

Describe what you want done. The agent figures out the rest.

```bash
# Search and extract information
open-browser run "Find the price of the MacBook Pro on apple.com"

# Fill out forms
open-browser run "Sign up for the newsletter on example.com with test@email.com"

# Multi-step workflows
open-browser run "Go to GitHub, find the open-browser repo, and star it"
```

| Option                       | Description                               |
| ---------------------------- | ----------------------------------------- |
| `-m, --model <model>`        | Model to use (default: `gpt-4o`)          |
| `-p, --provider <provider>`  | Provider: `openai`, `anthropic`, `google` |
| `--headless / --no-headless` | Show or hide the browser window           |
| `--max-steps <n>`            | Max agent steps (default: `25`)           |
| `-v, --verbose`              | Show detailed step info                   |
| `--no-cost`                  | Hide cost tracking                        |

### Browser Commands

```bash
open-browser open <url>              # Open a URL
open-browser click <selector>        # Click an element
open-browser type <selector> <text>  # Type into an input
open-browser screenshot [output]     # Capture a screenshot
open-browser eval <expression>       # Run JavaScript on the page
open-browser extract <goal>          # Extract content as markdown
open-browser state                   # Show current URL, title, and tabs
open-browser sessions                # List active browser sessions
```

### Interactive REPL

```bash
open-browser interactive
```

Drop into a live `browser>` prompt with full control:

```
browser> open https://news.ycombinator.com
browser> extract "top 5 stories with titles and points"
browser> click .morelink
browser> screenshot front-page.png
browser> help
```

## Using as a Library

```typescript
import { Agent, createViewport, createModel } from 'open-browser'

const viewport = await createViewport({ headless: true })
const model = createModel('openai', 'gpt-4o')

const agent = new Agent({
  viewport,
  model,
  task: 'Go to example.com and extract the main heading',
  settings: {
    stepLimit: 50,
    enableScreenshots: true,
  },
})

const result = await agent.run()
console.log(result)
```

### Sandboxed Execution

Run agents with resource limits and monitoring:

```typescript
import { Sandbox } from '@open-browser/sandbox'

const sandbox = new Sandbox({
  timeout: 300_000, // 5 minute timeout
  maxMemoryMB: 512, // Memory limit
  allowedDomains: ['example.com'],
  stepLimit: 100,
  captureOutput: true,
})

const result = await sandbox.run({
  task: 'Complete the checkout form',
  model: languageModel,
})

console.log(result.metrics) // steps, URLs visited, CPU time
```

## Configuration

### Environment Variables

```bash
# LLM Provider Keys (at least one required)
OPENAI_API_KEY=sk-...
ANTHROPIC_API_KEY=sk-ant-...
GOOGLE_GENERATIVE_AI_API_KEY=...

# Browser
BROWSER_HEADLESS=true
BROWSER_DISABLE_SECURITY=false

# Recording & Debugging
OPEN_BROWSER_TRACE_PATH=./traces
OPEN_BROWSER_SAVE_RECORDING_PATH=./recordings
```

### Agent Configuration

| Setting             | Default  | Description                               |
| ------------------- | -------- | ----------------------------------------- |
| `stepLimit`         | `100`    | Maximum agent iterations                  |
| `commandsPerStep`   | `10`     | Actions per agent step                    |
| `failureThreshold`  | `5`      | Consecutive failures before stopping      |
| `enableScreenshots` | `true`   | Include page screenshots in agent context |
| `contextWindowSize` | `128000` | Token budget for conversation             |
| `allowedUrls`       | `[]`     | Restrict navigation to specific URLs      |
| `blockedUrls`       | `[]`     | Block navigation to specific URLs         |

### Viewport Configuration

| Setting            | Default         | Description                                 |
| ------------------ | --------------- | ------------------------------------------- |
| `headless`         | `true`          | Run browser without visible window          |
| `width` / `height` | `1280` / `1100` | Browser window dimensions                   |
| `relaxedSecurity`  | `false`         | Disable browser security features           |
| `proxy`            | —               | Proxy server configuration                  |
| `cookieFile`       | —               | Path to cookie file for persistent sessions |

## How It Works

```
                    ┌─────────────┐
  "Book a flight"   │             │
  ───────────────►  │    Agent    │  ◄── LLM (OpenAI / Anthropic / Google)
                    │             │
                    └──────┬──────┘
                           │
                    ┌──────▼──────┐
                    │   Commands  │  click, type, scroll, extract, navigate...
                    └──────┬──────┘
                           │
                    ┌──────▼──────┐
                    │  Viewport   │  Playwright browser instance
                    └──────┬──────┘
                           │
                    ┌──────▼──────┐
                    │  DOM / Page │  Snapshot, interactive elements, content
                    └─────────────┘
```

1. You describe a **task** in natural language
2. The **Agent** sends the current page state + task to an LLM
3. The LLM decides what **commands** to execute (click, type, navigate, extract...)
4. Commands execute against the **Viewport** (Playwright browser)
5. The agent observes the result, detects stalls, and loops until the task is complete

## Model Support

| Provider      | Example Models                                  | Flag           |
| ------------- | ----------------------------------------------- | -------------- |
| **OpenAI**    | `gpt-4o`, `gpt-4o-mini`, `o1`                   | `-p openai`    |
| **Anthropic** | `claude-sonnet-4-5-20250929`, `claude-opus-4-6` | `-p anthropic` |
| **Google**    | `gemini-2.0-flash`, `gemini-2.5-pro`            | `-p google`    |

## Project Structure

```
packages/
├── core/                    # Core library (open-browser)
│   └── src/
│       ├── agent/           # Agent logic, conversation, stall detection
│       ├── commands/        # Action schemas and executor (25+ commands)
│       ├── viewport/        # Browser control, events, guards
│       ├── page/            # DOM analysis, content extraction
│       ├── model/           # LLM adapter and message formatting
│       ├── metering/        # Cost tracking
│       ├── bridge/          # IPC server/client
│       └── config/          # Configuration types
├── cli/                     # CLI (@open-browser/cli)
│   └── src/
│       ├── commands/        # CLI command implementations
│       └── index.ts         # Entry point
└── sandbox/                 # Sandbox (@open-browser/sandbox)
    └── src/
        └── sandbox.ts       # Resource-limited execution
```

## Development

```bash
# Install dependencies
bun install

# Type check
bun run build

# Run tests
bun run test

# Lint
bun run lint

# Format
bun run format
```

## Contributing

Contributions are welcome! Please see [CONTRIBUTING.md](.github/CONTRIBUTING.md) for guidelines.

## License

[MIT](LICENSE)


================================================
FILE: biome.json
================================================
{
  "$schema": "https://biomejs.dev/schemas/1.9.0/schema.json",
  "organizeImports": {
    "enabled": true
  },
  "linter": {
    "enabled": true,
    "rules": {
      "recommended": true,
      "complexity": {
        "noForEach": "off"
      },
      "style": {
        "noNonNullAssertion": "off",
        "useConst": "warn"
      },
      "suspicious": {
        "noExplicitAny": "off"
      }
    }
  },
  "formatter": {
    "enabled": true,
    "indentStyle": "tab",
    "indentWidth": 2,
    "lineWidth": 120
  },
  "javascript": {
    "formatter": {
      "quoteStyle": "single",
      "semicolons": "always",
      "trailingCommas": "all"
    }
  },
  "files": {
    "ignore": ["node_modules", "dist", "*.json", "*.d.ts"]
  }
}


================================================
FILE: bunfig.toml
================================================
[install]
peer = false

[test]
timeout = 60000


================================================
FILE: package.json
================================================
{
  "name": "open-browser-monorepo",
  "private": true,
  "workspaces": ["packages/*"],
  "scripts": {
    "build": "bun run --filter '*' build",
    "test": "bun run --filter '*' test",
    "lint": "biome check .",
    "format": "biome format --write ."
  },
  "devDependencies": {
    "@biomejs/biome": "^1.9.4",
    "@types/bun": "^1.2.0",
    "typescript": "^5.8.0"
  },
  "trustedDependencies": [
    "@biomejs/biome"
  ]
}


================================================
FILE: packages/cli/package.json
================================================
{
  "name": "@open-browser/cli",
  "version": "1.1.0",
  "description": "CLI for Open Browser - AI-powered autonomous web browsing",
  "type": "module",
  "main": "src/index.ts",
  "bin": {
    "open-browser": "src/index.ts"
  },
  "scripts": {
    "build": "tsc --noEmit",
    "test": "bun test",
    "start": "bun run src/index.ts"
  },
  "dependencies": {
    "open-browser": "workspace:*",
    "commander": "^12.1.0",
    "chalk": "^5.4.0"
  },
  "license": "MIT"
}


================================================
FILE: packages/cli/src/commands/click.ts
================================================
import type { Command } from 'commander';
import chalk from 'chalk';
import { sessionManager } from '../globals.js';

export function registerClickCommand(program: Command): void {
	program
		.command('click')
		.description('Click on an element matching the given CSS selector')
		.argument('<selector>', 'CSS selector of the element to click')
		.option('-s, --session <id>', 'Session ID to use')
		.action(async (selector: string, options: { session?: string }) => {
			try {
				const browser = options.session
					? sessionManager.get(options.session)
					: sessionManager.getDefault();

				if (!browser) {
					console.error(chalk.red('No active session. Use "open" command first.'));
					process.exit(1);
				}

				await browser.click(selector);
				console.log(chalk.green('Clicked:'), selector);
			} catch (error) {
				console.error(chalk.red('Failed to click:'), error instanceof Error ? error.message : String(error));
				process.exit(1);
			}
		});
}


================================================
FILE: packages/cli/src/commands/eval.ts
================================================
import type { Command } from 'commander';
import chalk from 'chalk';
import { sessionManager } from '../globals.js';

export function registerEvalCommand(program: Command): void {
	program
		.command('eval')
		.description('Evaluate a JavaScript expression in the browser')
		.argument('<expression>', 'JavaScript expression to evaluate')
		.option('-s, --session <id>', 'Session ID to use')
		.action(async (expression: string, options: { session?: string }) => {
			try {
				const browser = options.session
					? sessionManager.get(options.session)
					: sessionManager.getDefault();

				if (!browser) {
					console.error(chalk.red('No active session. Use "open" command first.'));
					process.exit(1);
				}

				const result = await browser.evaluate(expression);

				if (result === undefined) {
					console.log(chalk.dim('undefined'));
				} else if (result === null) {
					console.log(chalk.dim('null'));
				} else if (typeof result === 'object') {
					console.log(JSON.stringify(result, null, 2));
				} else {
					console.log(String(result));
				}
			} catch (error) {
				console.error(chalk.red('Evaluation failed:'), error instanceof Error ? error.message : String(error));
				process.exit(1);
			}
		});
}


================================================
FILE: packages/cli/src/commands/extract.ts
================================================
import type { Command } from 'commander';
import chalk from 'chalk';
import { extractMarkdown } from 'open-browser';
import { sessionManager } from '../globals.js';

export function registerExtractCommand(program: Command): void {
	program
		.command('extract')
		.description('Extract content from the current page as markdown')
		.argument('<goal>', 'Description of what to extract (used as a label)')
		.option('-s, --session <id>', 'Session ID to use')
		.action(async (goal: string, options: { session?: string }) => {
			try {
				const browser = options.session
					? sessionManager.get(options.session)
					: sessionManager.getDefault();

				if (!browser) {
					console.error(chalk.red('No active session. Use "open" command first.'));
					process.exit(1);
				}

				console.log(chalk.dim(`Extracting: ${goal}`));

				const markdown = await extractMarkdown(browser.currentPage);

				if (!markdown) {
					console.log(chalk.yellow('No content extracted from the page.'));
				} else {
					console.log(markdown);
				}
			} catch (error) {
				console.error(chalk.red('Extraction failed:'), error instanceof Error ? error.message : String(error));
				process.exit(1);
			}
		});
}


================================================
FILE: packages/cli/src/commands/interactive.ts
================================================
import * as readline from 'node:readline';
import type { Command } from 'commander';
import chalk from 'chalk';
import {
	Viewport,
	extractMarkdown,
} from 'open-browser';
import {
	Spinner,
	displayInfo,
	displayError,
	displaySeparator,
} from '../display.js';

interface InteractiveOptions {
	headless: boolean;
}

/**
 * Interactive REPL-like session for browser automation.
 * Supports commands: open, click, type, eval, extract, screenshot, state, back, forward, tabs, help, quit
 */
export function registerInteractiveCommand(program: Command): void {
	program
		.command('interactive')
		.alias('repl')
		.description('Start an interactive browser session (REPL mode)')
		.option('--headless', 'Run browser in headless mode', false)
		.action(async (options: InteractiveOptions) => {
			console.log(chalk.bold.white('Interactive Browser Session'));
			console.log(chalk.dim('Type "help" for available commands, "quit" to exit.'));
			displaySeparator();

			let browser: Viewport | null = null;

			try {
				const spinner = new Spinner('Starting browser...');
				spinner.start();

				browser = new Viewport({
					headless: options.headless,
				});
				await browser.start();

				spinner.stop(chalk.green('Browser ready.'));
				console.log('');

				const rl = readline.createInterface({
					input: process.stdin,
					output: process.stdout,
					prompt: chalk.cyan('browser> '),
					terminal: true,
				});

				rl.prompt();

				rl.on('line', async (line) => {
					const trimmed = line.trim();
					if (!trimmed) {
						rl.prompt();
						return;
					}

					const [command, ...args] = parseCommandLine(trimmed);

					try {
						const shouldQuit = await handleCommand(
							command.toLowerCase(),
							args,
							browser!,
						);
						if (shouldQuit) {
							rl.close();
							return;
						}
					} catch (error) {
						displayError(
							error instanceof Error ? error.message : String(error),
						);
					}

					rl.prompt();
				});

				rl.on('close', async () => {
					console.log('');
					displayInfo('Closing browser session...');
					if (browser) {
						await browser.close().catch(() => {});
					}
					process.exit(0);
				});
			} catch (error) {
				displayError(
					error instanceof Error ? error.message : String(error),
				);
				if (browser) {
					await browser.close().catch(() => {});
				}
				process.exit(1);
			}
		});
}

// ── Command Parsing ──

function parseCommandLine(input: string): string[] {
	const tokens: string[] = [];
	let current = '';
	let inQuote: string | null = null;

	for (const char of input) {
		if (inQuote) {
			if (char === inQuote) {
				inQuote = null;
			} else {
				current += char;
			}
		} else if (char === '"' || char === "'") {
			inQuote = char;
		} else if (char === ' ' || char === '\t') {
			if (current) {
				tokens.push(current);
				current = '';
			}
		} else {
			current += char;
		}
	}

	if (current) {
		tokens.push(current);
	}

	return tokens;
}

// ── Command Handler ──

async function handleCommand(
	command: string,
	args: string[],
	browser: Viewport,
): Promise<boolean> {
	switch (command) {
		case 'open':
		case 'goto':
		case 'navigate': {
			const url = args[0];
			if (!url) {
				displayError('Usage: open <url>');
				return false;
			}
			const spinner = new Spinner(`Navigating to ${url}...`);
			spinner.start();
			await browser.navigate(url);
			const finalUrl = browser.currentPage.url();
			spinner.stop(`${chalk.green('Loaded:')} ${finalUrl}`);
			return false;
		}

		case 'tap': {
			const selector = args.join(' ');
			if (!selector) {
				displayError('Usage: click <selector>');
				return false;
			}
			await browser.click(selector);
			console.log(chalk.green('Clicked:'), selector);
			return false;
		}

		case 'type': {
			const selector = args[0];
			const text = args.slice(1).join(' ');
			if (!selector || !text) {
				displayError('Usage: type <selector> <text>');
				return false;
			}
			await browser.type(selector, text);
			console.log(chalk.green('Typed:'), text);
			return false;
		}

		case 'eval':
		case 'js': {
			const expression = args.join(' ');
			if (!expression) {
				displayError('Usage: eval <expression>');
				return false;
			}
			const result = await browser.evaluate(expression);
			if (result === undefined) {
				console.log(chalk.dim('undefined'));
			} else if (result === null) {
				console.log(chalk.dim('null'));
			} else if (typeof result === 'object') {
				console.log(JSON.stringify(result, null, 2));
			} else {
				console.log(String(result));
			}
			return false;
		}

		case 'extract':
		case 'markdown': {
			const spinner = new Spinner('Extracting page content...');
			spinner.start();
			const markdown = await extractMarkdown(browser.currentPage);
			spinner.stop();
			if (markdown) {
				// Show first 2000 chars
				const preview = markdown.length > 2000
					? `${markdown.slice(0, 2000)}\n${chalk.dim(`... (${markdown.length} chars total)`)}`
					: markdown;
				console.log(preview);
			} else {
				console.log(chalk.yellow('No content found.'));
			}
			return false;
		}

		case 'capture': {
			const outputPath = args[0] || 'screenshot.png';
			const result = await browser.screenshot(false);
			const fs = await import('node:fs');
			const path = await import('node:path');
			const buffer = Buffer.from(result.base64, 'base64');
			const resolved = path.resolve(outputPath);
			fs.writeFileSync(resolved, buffer);
			console.log(chalk.green('Screenshot saved:'), resolved);
			console.log(chalk.dim(`${result.width}x${result.height}`));
			return false;
		}

		case 'state':
		case 'info': {
			const state = await browser.getState();
			console.log(`${chalk.white('URL:')}   ${state.url}`);
			console.log(`${chalk.white('Title:')} ${state.title}`);
			if (state.tabs.length > 1) {
				console.log(`${chalk.white('Tabs:')}`);
				for (const tab of state.tabs) {
					const marker = tab.isActive ? chalk.cyan(' > ') : '   ';
					console.log(`${marker}[${tab.tabId}] ${tab.title || '(untitled)'} - ${tab.url}`);
				}
			}
			return false;
		}

		case 'back': {
			await browser.currentPage.goBack({ timeout: 5000 }).catch(() => {});
			console.log(chalk.green('Navigated back'));
			return false;
		}

		case 'forward': {
			await browser.currentPage.goForward({ timeout: 5000 }).catch(() => {});
			console.log(chalk.green('Navigated forward'));
			return false;
		}

		case 'tabs': {
			const state = await browser.getState();
			for (const tab of state.tabs) {
				const marker = tab.isActive ? chalk.cyan(' > ') : '   ';
				console.log(`${marker}[${tab.tabId}] ${tab.title || '(untitled)'} - ${tab.url}`);
			}
			return false;
		}

		case 'url': {
			console.log(browser.currentPage.url());
			return false;
		}

		case 'title': {
			const title = await browser.currentPage.title();
			console.log(title);
			return false;
		}

		case 'reload':
		case 'refresh': {
			await browser.currentPage.reload({ timeout: 10000 }).catch(() => {});
			console.log(chalk.green('Page reloaded'));
			return false;
		}

		case 'wait': {
			const ms = Number.parseInt(args[0] || '1000', 10);
			console.log(chalk.dim(`Waiting ${ms}ms...`));
			await new Promise((resolve) => setTimeout(resolve, ms));
			return false;
		}

		case 'help': {
			printHelp();
			return false;
		}

		case 'quit':
		case 'exit':
		case 'q': {
			return true;
		}

		default: {
			console.log(chalk.yellow(`Unknown command: ${command}`));
			console.log(chalk.dim('Type "help" for available commands.'));
			return false;
		}
	}
}

function printHelp(): void {
	console.log(chalk.bold('Available commands:'));
	console.log('');
	const commands = [
		['open <url>', 'Navigate to a URL'],
		['click <selector>', 'Click an element'],
		['type <selector> <text>', 'Type text into an element'],
		['eval <expression>', 'Run JavaScript in the browser'],
		['extract', 'Extract page content as markdown'],
		['screenshot [path]', 'Take a screenshot'],
		['state', 'Show current browser state'],
		['back', 'Navigate back'],
		['forward', 'Navigate forward'],
		['tabs', 'List open tabs'],
		['url', 'Show current URL'],
		['title', 'Show current page title'],
		['reload', 'Reload the current page'],
		['wait [ms]', 'Wait for the specified time'],
		['help', 'Show this help message'],
		['quit', 'Exit the interactive session'],
	];

	for (const [cmd, desc] of commands) {
		console.log(`  ${chalk.cyan(cmd.padEnd(25))} ${desc}`);
	}
}


================================================
FILE: packages/cli/src/commands/open.ts
================================================
import type { Command } from 'commander';
import chalk from 'chalk';
import { sessionManager } from '../globals.js';

export function registerOpenCommand(program: Command): void {
	program
		.command('open')
		.description('Open a URL in the browser')
		.argument('<url>', 'URL to navigate to')
		.option('--headless', 'Run in headless mode', false)
		.option('-s, --session <id>', 'Reuse an existing session')
		.action(async (url: string, options: { headless: boolean; session?: string }) => {
			try {
				let sessionId = options.session;

				if (sessionId) {
					const browser = sessionManager.get(sessionId);
					if (!browser) {
						console.error(chalk.red(`Session "${sessionId}" not found.`));
						process.exit(1);
					}
					await browser.navigate(url);
				} else {
					// Try to reuse the default session, or create a new one
					sessionId = sessionManager.getDefaultId();

					if (!sessionId) {
						sessionId = await sessionManager.create({
							headless: options.headless,
						});
					}

					const browser = sessionManager.get(sessionId)!;
					await browser.navigate(url);
				}

				const browser = sessionManager.get(sessionId)!;
				const finalUrl = browser.currentPage.url();

				console.log(chalk.green('Session:'), sessionId);
				console.log(chalk.green('URL:'), finalUrl);
			} catch (error) {
				console.error(chalk.red('Failed to open URL:'), error instanceof Error ? error.message : String(error));
				process.exit(1);
			}
		});
}


================================================
FILE: packages/cli/src/commands/run.ts
================================================
import type { Command } from 'commander';
import chalk from 'chalk';
import {
	Agent,
	Viewport,
	VercelModelAdapter,
	type LanguageModel,
	type CommandResult,
	type StepRecord,
} from 'open-browser';
import {
	Spinner,
	displayStep,
	displayTotalCost,
	displayResult,
	displayHeader,
	displaySeparator,
	displayError,
} from '../display.js';

interface RunOptions {
	model: string;
	provider: string;
	headless: boolean;
	stepLimit: number;
	verbose: boolean;
	noCost: boolean;
}

/**
 * Dynamically import and create a Vercel AI SDK language model
 * based on the provider and model ID strings.
 */
async function createModel(provider: string, modelId: string): Promise<LanguageModel> {
	let languageModel: import('ai').LanguageModelV1;

	switch (provider) {
		case 'openai': {
			const { createOpenAI } = await import('@ai-sdk/openai');
			const openai = createOpenAI({});
			languageModel = openai(modelId);
			break;
		}
		case 'anthropic': {
			const { createAnthropic } = await import('@ai-sdk/anthropic');
			const anthropic = createAnthropic({});
			languageModel = anthropic(modelId);
			break;
		}
		case 'google': {
			const { createGoogleGenerativeAI } = await import('@ai-sdk/google');
			const google = createGoogleGenerativeAI({});
			languageModel = google(modelId);
			break;
		}
		default:
			throw new Error(
				`Unsupported provider: ${provider}. ` +
				'Supported: openai, anthropic, google',
			);
	}

	return new VercelModelAdapter({ model: languageModel });
}

export function registerRunCommand(program: Command): void {
	program
		.command('run')
		.description('Run an AI agent to complete a browser task')
		.argument('<task>', 'Description of the task for the agent to complete')
		.option('-m, --model <model>', 'Model ID to use', 'gpt-4o')
		.option('-p, --provider <provider>', 'LLM provider (openai, anthropic, google)', 'openai')
		.option('--headless', 'Run browser in headless mode', true)
		.option('--no-headless', 'Show the browser window')
		.option('--max-steps <n>', 'Maximum number of agent steps', '25')
		.option('-v, --verbose', 'Show detailed step information', false)
		.option('--no-cost', 'Hide cost tracking information')
		.action(async (task: string, options: RunOptions) => {
			const stepLimit = Number.parseInt(String(options.stepLimit), 10);

			displayHeader(`Agent Task: ${task}`);
			console.log(
				`${chalk.dim('model:')} ${options.model}  ` +
				`${chalk.dim('provider:')} ${options.provider}  ` +
				`${chalk.dim('max steps:')} ${stepLimit}`,
			);
			displaySeparator();

			const spinner = new Spinner('Starting browser...');
			spinner.start();

			let browser: Viewport | null = null;

			try {
				// Initialize the LLM
				spinner.update('Loading model...');
				const model = await createModel(options.provider, options.model);

				// Initialize the browser
				spinner.update('Starting browser...');
				browser = new Viewport({
					headless: options.headless,
				});
				await browser.start();
				spinner.update('Browser ready, starting agent...');

				// Track per-step timing
				const stepTimings = new Map<number, number>();
				let currentStepStart = 0;

				// Create the agent
				const agent = new Agent({
					task,
					model,
					browser,
					settings: {
						stepLimit,
					},
					onStepStart: (step) => {
						currentStepStart = Date.now();
						stepTimings.set(step, currentStepStart);
						spinner.update(`Step ${step}: thinking...`);
					},
					onStepEnd: (step, results) => {
						const durationMs = Date.now() - (stepTimings.get(step) ?? currentStepStart);

						spinner.stop();

						// Display each action result for this step
						for (const result of results) {
							displayStep({
								step,
								action: extractActionName(result),
								target: extractActionTarget(result),
								durationMs,
								success: result.success,
								error: result.error,
								extractedContent: result.extractedContent,
							});
						}

						if (options.verbose) {
							displaySeparator();
						}

						// Restart spinner for next step
						spinner.start();
						spinner.update(`Step ${step + 1}: thinking...`);
					},
				});

				spinner.update('Agent running...');

				// Execute the agent
				const result = await agent.run();

				spinner.stop();

				// Display result
				displayResult(result.success, result.finalResult);

				// Display cost summary
				if (!options.noCost && result.totalCost) {
					displayTotalCost({
						steps: result.history.entries.length,
						inputTokens: result.totalCost.totalInputTokens,
						outputTokens: result.totalCost.totalOutputTokens,
						totalCost: result.totalCost.totalCost,
						durationMs: computeTotalDuration(result.history.entries),
					});
				} else if (!options.noCost) {
					// Show basic timing even without cost data
					const totalMs = computeTotalDuration(result.history.entries);
					console.log('');
					console.log(
						chalk.dim(
							`Completed in ${result.history.entries.length} step(s), ` +
							`${(totalMs / 1000).toFixed(1)}s`,
						),
					);
				}

				// Display errors if any
				if (result.errors.length > 0) {
					console.log('');
					console.log(chalk.bold.yellow('Errors encountered:'));
					for (const err of result.errors) {
						console.log(`  ${chalk.red('-')} ${err}`);
					}
				}

				// Exit with appropriate code
				process.exit(result.success ? 0 : 1);
			} catch (error) {
				spinner.stop();
				displayError(
					error instanceof Error ? error.message : String(error),
				);
				process.exit(1);
			} finally {
				if (browser) {
					await browser.close().catch(() => {});
				}
			}
		});
}

// ── Helpers ──

function extractActionName(result: CommandResult): string {
	if (result.isDone) return 'done';
	if (result.extractedContent) return 'extract';
	return result.success ? 'action' : 'failed_action';
}

function extractActionTarget(result: CommandResult): string | undefined {
	if (result.extractedContent) {
		return result.extractedContent.slice(0, 80);
	}
	return undefined;
}

function computeTotalDuration(entries: StepRecord[]): number {
	return entries.reduce((sum, e) => sum + e.duration, 0);
}


================================================
FILE: packages/cli/src/commands/screenshot.ts
================================================
import type { Command } from 'commander';
import chalk from 'chalk';
import * as fs from 'node:fs';
import * as path from 'node:path';
import { sessionManager } from '../globals.js';

export function registerScreenshotCommand(program: Command): void {
	program
		.command('screenshot')
		.description('Take a screenshot of the current page')
		.argument('[output]', 'Output file path', 'screenshot.png')
		.option('-s, --session <id>', 'Session ID to use')
		.option('--full-page', 'Capture the full page', false)
		.action(async (output: string, options: { session?: string; fullPage: boolean }) => {
			try {
				const browser = options.session
					? sessionManager.get(options.session)
					: sessionManager.getDefault();

				if (!browser) {
					console.error(chalk.red('No active session. Use "open" command first.'));
					process.exit(1);
				}

				const result = await browser.screenshot(options.fullPage);
				const buffer = Buffer.from(result.base64, 'base64');

				const outputPath = path.resolve(output);
				fs.writeFileSync(outputPath, buffer);

				console.log(chalk.green('Screenshot saved:'), outputPath);
				console.log(chalk.green('Dimensions:'), `${result.width}x${result.height}`);
			} catch (error) {
				console.error(chalk.red('Failed to take screenshot:'), error instanceof Error ? error.message : String(error));
				process.exit(1);
			}
		});
}


================================================
FILE: packages/cli/src/commands/sessions.ts
================================================
import type { Command } from 'commander';
import chalk from 'chalk';
import { sessionManager } from '../globals.js';

export function registerSessionsCommand(program: Command): void {
	program
		.command('sessions')
		.description('List all active browser sessions')
		.action(() => {
			try {
				const sessions = sessionManager.list();

				if (sessions.length === 0) {
					console.log(chalk.yellow('No active sessions.'));
					return;
				}

				console.log(chalk.bold(`Active Sessions (${sessions.length}):`));
				for (const session of sessions) {
					const created = new Date(session.createdAt).toLocaleTimeString();
					const accessed = new Date(session.lastAccessedAt).toLocaleTimeString();
					console.log(`  ${chalk.cyan(session.id)}  created ${created}  last used ${accessed}`);
				}
			} catch (error) {
				console.error(chalk.red('Failed to list sessions:'), error instanceof Error ? error.message : String(error));
				process.exit(1);
			}
		});

	program
		.command('sessions:close')
		.description('Close a specific session or all sessions')
		.argument('[id]', 'Session ID to close (omit to close all)')
		.action(async (id?: string) => {
			try {
				if (id) {
					const closed = await sessionManager.close(id);
					if (closed) {
						console.log(chalk.green('Closed session:'), id);
					} else {
						console.error(chalk.red(`Session "${id}" not found.`));
						process.exit(1);
					}
				} else {
					const count = sessionManager.activeCount;
					await sessionManager.closeAll();
					console.log(chalk.green(`Closed ${count} session(s).`));
				}
			} catch (error) {
				console.error(chalk.red('Failed to close session:'), error instanceof Error ? error.message : String(error));
				process.exit(1);
			}
		});
}


================================================
FILE: packages/cli/src/commands/state.ts
================================================
import type { Command } from 'commander';
import chalk from 'chalk';
import { sessionManager } from '../globals.js';

export function registerStateCommand(program: Command): void {
	program
		.command('state')
		.description('Print the current browser state (URL, title, tabs)')
		.option('-s, --session <id>', 'Session ID to use')
		.action(async (options: { session?: string }) => {
			try {
				const browser = options.session
					? sessionManager.get(options.session)
					: sessionManager.getDefault();

				if (!browser) {
					console.error(chalk.red('No active session. Use "open" command first.'));
					process.exit(1);
				}

				const state = await browser.getState();

				console.log(chalk.bold('Browser State'));
				console.log(chalk.green('URL:'), state.url);
				console.log(chalk.green('Title:'), state.title);
				console.log(chalk.green('Tabs:'), state.tabs.length);

				for (const tab of state.tabs) {
					const marker = tab.isActive ? chalk.cyan('→') : ' ';
					console.log(`  ${marker} [${tab.tabId}] ${tab.title || '(untitled)'} - ${tab.url}`);
				}
			} catch (error) {
				console.error(chalk.red('Failed to get state:'), error instanceof Error ? error.message : String(error));
				process.exit(1);
			}
		});
}


================================================
FILE: packages/cli/src/commands/type.ts
================================================
import type { Command } from 'commander';
import chalk from 'chalk';
import { sessionManager } from '../globals.js';

export function registerTypeCommand(program: Command): void {
	program
		.command('type')
		.description('Type text into an element matching the given CSS selector')
		.argument('<selector>', 'CSS selector of the input element')
		.argument('<text>', 'Text to type into the element')
		.option('-s, --session <id>', 'Session ID to use')
		.action(async (selector: string, text: string, options: { session?: string }) => {
			try {
				const browser = options.session
					? sessionManager.get(options.session)
					: sessionManager.getDefault();

				if (!browser) {
					console.error(chalk.red('No active session. Use "open" command first.'));
					process.exit(1);
				}

				await browser.type(selector, text);
				console.log(chalk.green('Typed into:'), selector);
			} catch (error) {
				console.error(chalk.red('Failed to type:'), error instanceof Error ? error.message : String(error));
				process.exit(1);
			}
		});
}


================================================
FILE: packages/cli/src/display.ts
================================================
import chalk from 'chalk';

// ── Spinner ──

const SPINNER_FRAMES = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];

export class Spinner {
	private intervalId: ReturnType<typeof setInterval> | null = null;
	private frameIndex = 0;
	private message: string;

	constructor(message: string) {
		this.message = message;
	}

	start(): void {
		if (this.intervalId) return;
		this.frameIndex = 0;

		this.intervalId = setInterval(() => {
			const frame = SPINNER_FRAMES[this.frameIndex % SPINNER_FRAMES.length];
			process.stdout.write(`\r${chalk.cyan(frame)} ${this.message}`);
			this.frameIndex++;
		}, 80);
	}

	update(message: string): void {
		this.message = message;
	}

	stop(finalMessage?: string): void {
		if (this.intervalId) {
			clearInterval(this.intervalId);
			this.intervalId = null;
		}
		// Clear the spinner line
		process.stdout.write('\r\x1b[K');
		if (finalMessage) {
			console.log(finalMessage);
		}
	}
}

// ── Step Display ──

export interface StepDisplayInfo {
	step: number;
	action: string;
	target?: string;
	durationMs: number;
	success: boolean;
	error?: string;
	extractedContent?: string;
}

/**
 * Format and display a single agent step with its result.
 */
export function displayStep(info: StepDisplayInfo): void {
	const stepLabel = chalk.bold.white(`Step ${info.step}`);
	const actionLabel = chalk.yellow(info.action);
	const durationLabel = chalk.dim(`${info.durationMs}ms`);
	const statusIcon = info.success ? chalk.green('✓') : chalk.red('✗');

	console.log(`${stepLabel} ${statusIcon} ${actionLabel} ${durationLabel}`);

	if (info.target) {
		console.log(`  ${chalk.dim('target:')} ${info.target}`);
	}

	if (info.error) {
		console.log(`  ${chalk.red('error:')} ${info.error}`);
	}

	if (info.extractedContent) {
		const preview = info.extractedContent.length > 120
			? `${info.extractedContent.slice(0, 120)}...`
			: info.extractedContent;
		console.log(`  ${chalk.dim('output:')} ${preview}`);
	}
}

// ── Cost Display ──

export interface CostDisplayInfo {
	inputTokens: number;
	outputTokens: number;
	totalCost: number;
}

/**
 * Display token usage and cost for a single step.
 */
export function displayStepCost(info: CostDisplayInfo): void {
	const tokens = chalk.dim(
		`tokens: ${info.inputTokens.toLocaleString()} in / ${info.outputTokens.toLocaleString()} out`,
	);
	const cost = chalk.dim(`cost: $${info.totalCost.toFixed(4)}`);
	console.log(`  ${tokens}  ${cost}`);
}

/**
 * Display a summary of total cost and token usage.
 */
export function displayTotalCost(info: CostDisplayInfo & { steps: number; durationMs: number }): void {
	console.log('');
	console.log(chalk.bold('Summary'));
	console.log(chalk.dim('─'.repeat(50)));
	console.log(`  ${chalk.white('Steps:')}        ${info.steps}`);
	console.log(`  ${chalk.white('Duration:')}     ${(info.durationMs / 1000).toFixed(1)}s`);
	console.log(`  ${chalk.white('Input tokens:')} ${info.inputTokens.toLocaleString()}`);
	console.log(`  ${chalk.white('Output tokens:')} ${info.outputTokens.toLocaleString()}`);
	console.log(`  ${chalk.white('Total tokens:')} ${(info.inputTokens + info.outputTokens).toLocaleString()}`);
	console.log(`  ${chalk.white('Total cost:')}   $${info.totalCost.toFixed(4)}`);
	console.log(chalk.dim('─'.repeat(50)));
}

// ── Progress Bar ──

export function displayProgressBar(current: number, total: number, width = 30): void {
	const ratio = Math.min(current / total, 1);
	const filled = Math.round(ratio * width);
	const empty = width - filled;
	const bar = chalk.green('█'.repeat(filled)) + chalk.dim('░'.repeat(empty));
	const pct = (ratio * 100).toFixed(0).padStart(3);
	process.stdout.write(`\r  [${bar}] ${pct}% (${current}/${total})`);
}

// ── Result Display ──

export function displayResult(success: boolean, output?: string): void {
	console.log('');
	if (success) {
		console.log(chalk.bold.green('Task completed successfully'));
	} else {
		console.log(chalk.bold.red('Task failed'));
	}

	if (output) {
		console.log('');
		console.log(chalk.bold('Result:'));
		console.log(output);
	}
}

// ── Helpers ──

export function displayError(message: string): void {
	console.error(chalk.red('Error:'), message);
}

export function displayWarning(message: string): void {
	console.warn(chalk.yellow('Warning:'), message);
}

export function displayInfo(message: string): void {
	console.log(chalk.blue('Info:'), message);
}

export function displaySeparator(): void {
	console.log(chalk.dim('─'.repeat(60)));
}

export function displayHeader(title: string): void {
	console.log('');
	console.log(chalk.bold.white(title));
	console.log(chalk.dim('═'.repeat(60)));
}


================================================
FILE: packages/cli/src/globals.ts
================================================
import { SessionManager } from './sessions.js';

export const sessionManager = new SessionManager();


================================================
FILE: packages/cli/src/index.ts
================================================
#!/usr/bin/env bun
import { Command } from 'commander';
import { registerOpenCommand } from './commands/open.js';
import { registerClickCommand } from './commands/click.js';
import { registerTypeCommand } from './commands/type.js';
import { registerStateCommand } from './commands/state.js';
import { registerScreenshotCommand } from './commands/screenshot.js';
import { registerEvalCommand } from './commands/eval.js';
import { registerExtractCommand } from './commands/extract.js';
import { registerSessionsCommand } from './commands/sessions.js';
import { registerRunCommand } from './commands/run.js';
import { registerInteractiveCommand } from './commands/interactive.js';

const program = new Command();

program
	.name('open-browser')
	.description('AI-powered autonomous web browsing CLI')
	.version('0.1.0');

// ── Browser manipulation commands ──
registerOpenCommand(program);
registerClickCommand(program);
registerTypeCommand(program);
registerStateCommand(program);
registerScreenshotCommand(program);
registerEvalCommand(program);
registerExtractCommand(program);
registerSessionsCommand(program);

// ── Agent and interactive commands ──
registerRunCommand(program);
registerInteractiveCommand(program);

program.parse();


================================================
FILE: packages/cli/src/protocol.ts
================================================
export interface CLIRequest {
	id: string;
	command: string;
	args: Record<string, unknown>;
}

export interface CLIResponse {
	id: string;
	success: boolean;
	data?: unknown;
	error?: string;
}

export function serializeRequest(req: CLIRequest): string {
	return JSON.stringify(req) + '\n';
}

export function parseRequest(data: string): CLIRequest | null {
	try {
		return JSON.parse(data.trim()) as CLIRequest;
	} catch {
		return null;
	}
}

export function serializeResponse(res: CLIResponse): string {
	return JSON.stringify(res) + '\n';
}

export function parseResponse(data: string): CLIResponse | null {
	try {
		return JSON.parse(data.trim()) as CLIResponse;
	} catch {
		return null;
	}
}


================================================
FILE: packages/cli/src/server.ts
================================================
import * as net from 'node:net';
import * as fs from 'node:fs';
import * as path from 'node:path';
import * as os from 'node:os';
import { SessionManager } from './sessions.js';
import { type CLIRequest, type CLIResponse, parseRequest, serializeResponse } from './protocol.js';

const SOCKET_DIR = path.join(os.tmpdir(), 'open-browser');
const SOCKET_PATH = path.join(SOCKET_DIR, 'server.sock');

export class CLIServer {
	private server: net.Server | null = null;
	readonly sessions: SessionManager;

	constructor() {
		this.sessions = new SessionManager();
	}

	async start(): Promise<string> {
		if (!fs.existsSync(SOCKET_DIR)) {
			fs.mkdirSync(SOCKET_DIR, { recursive: true });
		}

		// Clean up stale socket
		if (fs.existsSync(SOCKET_PATH)) {
			fs.unlinkSync(SOCKET_PATH);
		}

		return new Promise((resolve, reject) => {
			this.server = net.createServer((socket) => {
				let buffer = '';

				socket.on('data', async (data) => {
					buffer += data.toString();
					const lines = buffer.split('\n');
					buffer = lines.pop() ?? '';

					for (const line of lines) {
						if (!line.trim()) continue;
						const request = parseRequest(line);
						if (request) {
							const response = await this.handleRequest(request);
							socket.write(serializeResponse(response));
						}
					}
				});

				socket.on('error', () => {
					// Client disconnected
				});
			});

			this.server.on('error', reject);
			this.server.listen(SOCKET_PATH, () => {
				resolve(SOCKET_PATH);
			});
		});
	}

	private async handleRequest(request: CLIRequest): Promise<CLIResponse> {
		try {
			switch (request.command) {
				case 'open': {
					const url = request.args.url as string;
					let sessionId = request.args.session as string | undefined;

					if (!sessionId) {
						sessionId = this.sessions.getDefaultId();
					}

					if (!sessionId) {
						sessionId = await this.sessions.create({
							headless: request.args.headless as boolean | undefined,
						});
					}

					const browser = this.sessions.get(sessionId)!;
					await browser.navigate(url);

					return {
						id: request.id,
						success: true,
						data: { sessionId, url: browser.currentPage.url() },
					};
				}

				case 'tap': {
					const browser = this.getSessionBrowser(request);
					const selector = request.args.selector as string;
					await browser.click(selector);
					return { id: request.id, success: true };
				}

				case 'type': {
					const browser = this.getSessionBrowser(request);
					const selector = request.args.selector as string;
					const text = request.args.text as string;
					await browser.type(selector, text);
					return { id: request.id, success: true };
				}

				case 'state': {
					const browser = this.getSessionBrowser(request);
					const state = await browser.getState();
					return { id: request.id, success: true, data: state };
				}

				case 'capture': {
					const browser = this.getSessionBrowser(request);
					const result = await browser.screenshot(request.args.fullPage as boolean);
					return { id: request.id, success: true, data: result };
				}

				case 'eval': {
					const browser = this.getSessionBrowser(request);
					const expression = request.args.expression as string;
					const result = await browser.evaluate(expression);
					return { id: request.id, success: true, data: result };
				}

				case 'sessions': {
					return {
						id: request.id,
						success: true,
						data: this.sessions.list(),
					};
				}

				case 'close': {
					const sessionId = request.args.session as string | undefined;
					if (sessionId) {
						await this.sessions.close(sessionId);
					} else {
						await this.sessions.closeAll();
					}
					return { id: request.id, success: true };
				}

				default:
					return {
						id: request.id,
						success: false,
						error: `Unknown command: ${request.command}`,
					};
			}
		} catch (error) {
			return {
				id: request.id,
				success: false,
				error: error instanceof Error ? error.message : String(error),
			};
		}
	}

	private getSessionBrowser(request: CLIRequest) {
		const sessionId = request.args.session as string | undefined;
		const browser = sessionId
			? this.sessions.get(sessionId)
			: this.sessions.getDefault();

		if (!browser) {
			throw new Error('No active session. Use "open" command first.');
		}

		return browser;
	}

	async stop(): Promise<void> {
		await this.sessions.closeAll();

		if (this.server) {
			return new Promise((resolve) => {
				this.server!.close(() => {
					if (fs.existsSync(SOCKET_PATH)) {
						fs.unlinkSync(SOCKET_PATH);
					}
					resolve();
				});
			});
		}
	}

	static get socketPath(): string {
		return SOCKET_PATH;
	}
}


================================================
FILE: packages/cli/src/sessions.ts
================================================
import { Viewport, type ViewportOptions } from 'open-browser';
import { nanoid } from 'nanoid';

interface ManagedSession {
	id: string;
	browser: Viewport;
	createdAt: number;
	lastAccessedAt: number;
}

export class SessionManager {
	private sessions = new Map<string, ManagedSession>();

	async create(options?: ViewportOptions): Promise<string> {
		const id = nanoid(8);
		const browser = new Viewport(options);
		await browser.start();

		this.sessions.set(id, {
			id,
			browser,
			createdAt: Date.now(),
			lastAccessedAt: Date.now(),
		});

		return id;
	}

	get(id: string): Viewport | undefined {
		const session = this.sessions.get(id);
		if (session) {
			session.lastAccessedAt = Date.now();
			return session.browser;
		}
		return undefined;
	}

	async close(id: string): Promise<boolean> {
		const session = this.sessions.get(id);
		if (!session) return false;

		await session.browser.close();
		this.sessions.delete(id);
		return true;
	}

	async closeAll(): Promise<void> {
		for (const session of this.sessions.values()) {
			await session.browser.close();
		}
		this.sessions.clear();
	}

	list(): Array<{ id: string; createdAt: number; lastAccessedAt: number }> {
		return [...this.sessions.values()].map((s) => ({
			id: s.id,
			createdAt: s.createdAt,
			lastAccessedAt: s.lastAccessedAt,
		}));
	}

	get activeCount(): number {
		return this.sessions.size;
	}

	getDefault(): Viewport | undefined {
		const first = this.sessions.values().next();
		if (first.done) return undefined;
		first.value.lastAccessedAt = Date.now();
		return first.value.browser;
	}

	getDefaultId(): string | undefined {
		const first = this.sessions.keys().next();
		return first.done ? undefined : first.value;
	}
}


================================================
FILE: packages/cli/tsconfig.json
================================================
{
  "extends": "../../tsconfig.base.json",
  "compilerOptions": {
    "rootDir": "src",
    "outDir": "dist"
  },
  "include": ["src/**/*.ts"]
}


================================================
FILE: packages/core/package.json
================================================
{
  "name": "open-browser",
  "version": "1.1.0",
  "description": "AI-powered autonomous web browsing library for TypeScript",
  "type": "module",
  "main": "src/index.ts",
  "types": "src/index.ts",
  "exports": {
    ".": "./src/index.ts"
  },
  "scripts": {
    "build": "tsc --noEmit",
    "test": "bun test",
    "lint": "biome check src/"
  },
  "dependencies": {
    "ai": "^4.2.0",
    "@ai-sdk/openai": "^1.1.0",
    "@ai-sdk/anthropic": "^1.1.0",
    "@ai-sdk/google": "^1.1.0",
    "zod": "^3.24.0",
    "playwright": "^1.51.0",
    "mitt": "^3.0.2",
    "nanoid": "^5.1.0",
    "turndown": "^7.2.1",
    "dotenv": "^16.5.0"
  },
  "devDependencies": {
    "@types/turndown": "^5.0.5"
  },
  "peerDependencies": {
    "sharp": ">=0.33.0"
  },
  "peerDependenciesMeta": {
    "sharp": {
      "optional": true
    }
  },
  "license": "MIT"
}


================================================
FILE: packages/core/src/agent/agent.test.ts
================================================
import { test, expect, describe, beforeEach, mock } from 'bun:test';
import { Agent, type AgentOptions } from '../agent/agent.js';
import type { PageAnalyzer } from '../page/page-analyzer.js';

// ── Mock PageAnalyzer factory (injected via AgentOptions.domService) ──

const mockExtractState = mock(async () => ({
	tree: '<div>[1] <button>Click me</button></div>',
	selectorMap: { 1: 'button' },
	elementCount: 10,
	interactiveElementCount: 1,
	scrollPosition: { x: 0, y: 0 },
	viewportSize: { width: 1280, height: 1100 },
	documentSize: { width: 1280, height: 2000 },
	pixelsAbove: 0,
	pixelsBelow: 900,
}));

function createMockPageAnalyzer(): PageAnalyzer {
	return {
		extractState: mockExtractState,
		clickElementByIndex: mock(async () => {}),
		getCachedTree: mock(() => null),
		getCachedSelectorMap: mock(() => null),
		clearCache: mock(() => {}),
		getInteractedElements: mock(() => []),
		clearInteractedElements: mock(() => {}),
		getElementSelector: mock(async () => undefined),
		getElementByBackendNodeId: mock(async () => null),
		clickAtCoordinates: mock(async () => {}),
		inputTextByIndex: mock(async () => {}),
		extractWithIframes: mock(async () => ({ mainTree: null, iframeTrees: [] })),
	} as unknown as PageAnalyzer;
}
import type { RunOutcome } from './types.js';
import type { LanguageModel, InferenceOptions } from '../model/interface.js';
import type { InferenceResult, InferenceUsage } from '../model/types.js';
import type { Viewport } from '../viewport/viewport.js';
import type { ViewportSnapshot } from '../viewport/types.js';
import type { CommandExecutor } from '../commands/executor.js';
import type { Command, CommandResult, ExecutionContext } from '../commands/types.js';
import type { CommandCatalog } from '../commands/catalog/catalog.js';

// ── Mock Factories ──

function createMockUsage(input = 100, output = 50): InferenceUsage {
	return { inputTokens: input, outputTokens: output, totalTokens: input + output };
}

function createMockModel(options?: {
	responses?: Array<{
		currentState: { evaluation: string; memory: string; nextGoal: string };
		actions: Command[];
	}>;
	modelId?: string;
}): LanguageModel {
	let callCount = 0;
	const responses = options?.responses ?? [
		{
			currentState: {
				evaluation: 'Page loaded',
				memory: '',
				nextGoal: 'Click element',
			},
			actions: [{ action: 'tap', index: 1, clickCount: 1 } as Command],
		},
	];

	return {
		modelId: options?.modelId ?? 'test-model',
		provider: 'custom',
		invoke: async <T>(_options: InferenceOptions<T>): Promise<InferenceResult<T>> => {
			const responseIndex = Math.min(callCount, responses.length - 1);
			callCount++;
			return {
				parsed: responses[responseIndex] as unknown as T,
				usage: createMockUsage(),
				finishReason: 'stop',
			};
		},
	};
}

function createDoneOnStepModel(doneOnStep: number, result = 'Task completed'): LanguageModel {
	const responses: Array<{
		currentState: { evaluation: string; memory: string; nextGoal: string };
		actions: Command[];
	}> = [];

	for (let i = 1; i < doneOnStep; i++) {
		responses.push({
			currentState: {
				evaluation: `Step ${i} assessment`,
				memory: '',
				nextGoal: `Goal for step ${i + 1}`,
			},
			actions: [{ action: 'tap', index: i, clickCount: 1 } as Command],
		});
	}

	responses.push({
		currentState: {
			evaluation: 'Task done',
			memory: '',
			nextGoal: 'Report result',
		},
		actions: [{ action: 'finish', text: result, success: true } as Command],
	});

	return createMockModel({ responses });
}

function createMockBrowserState(): ViewportSnapshot {
	return {
		url: 'https://example.com',
		title: 'Example Page',
		tabs: [
			{ tabId: 0 as any, url: 'https://example.com', title: 'Example Page', isActive: true },
		],
		activeTabIndex: 0,
	};
}

function createMockRegistry(): CommandCatalog{
	return {
		register: mock(() => {}),
		get: mock(() => undefined),
		getAll: mock(() => []),
		getActionDescriptions: mock(() => 'click: Click on an element'),
		getPromptDescription: mock(() => 'click: Click on an element by its index\ngo_to_url: Navigate to a URL'),
		has: mock(() => false),
	} as unknown as CommandCatalog;
}

function createMockTools(actionResults?: CommandResult[]): CommandExecutor {
	const defaultResults: CommandResult[] = [{ success: true }];
	return {
		registry: createMockRegistry(),
		commandsPerStep: 10,
		setCoordinateClicking: mock(() => {}),
		executeActions: mock(async (_actions: Command[], _ctx: ExecutionContext) => {
			return actionResults ?? defaultResults;
		}),
		executeAction: mock(async (_action: Command, _ctx: ExecutionContext) => {
			return (actionResults ?? defaultResults)[0];
		}),
	} as unknown as CommandExecutor;
}

function createMockBrowser(overrides?: {
	browserState?: ViewportSnapshot;
	isConnected?: boolean;
}): Viewport {
	const state = overrides?.browserState ?? createMockBrowserState();
	return {
		isConnected: overrides?.isConnected ?? true,
		start: mock(async () => {}),
		getState: mock(async () => state),
		screenshot: mock(async () => ({ base64: 'fake_screenshot', width: 1280, height: 1100 })),
		navigate: mock(async () => {}),
		currentPage: {
			viewportSize: () => ({ width: 1280, height: 1100 }),
			evaluate: mock(async () => ({})),
		} as any,
		cdp: {
			send: mock(async () => ({})),
		} as any,
	} as unknown as Viewport;
}

function createDefaultAgentOptions(overrides?: Partial<AgentOptions>): AgentOptions {
	return {
		task: 'Find the price of the product',
		model: createDoneOnStepModel(2),
		browser: createMockBrowser(),
		tools: createMockTools([{ success: true, isDone: false }]),
		domService: createMockPageAnalyzer(),
		settings: {
			stepLimit: 5,
			enableScreenshots: false,
			commandDelayMs: 0,
			retryDelay: 0,
			autoNavigateToUrls: false,
			contextWindowSize: 50000,
		},
		...overrides,
	};
}

// ── Tests ──

describe('Agent', () => {
	describe('constructor', () => {
		test('creates agent with default settings merged', () => {
			const agent = new Agent(createDefaultAgentOptions());
			const state = agent.getState();
			expect(state.step).toBe(0);
			expect(state.isRunning).toBe(false);
			expect(state.isDone).toBe(false);
			expect(state.failureCount).toBe(0);
			expect(state.consecutiveFailures).toBe(0);
		});

		test('overrides default settings with provided values', () => {
			const agent = new Agent(
				createDefaultAgentOptions({
					settings: {
						stepLimit: 50,
						enableScreenshots: false,
						commandDelayMs: 0,
						retryDelay: 0,
						autoNavigateToUrls: false,
						contextWindowSize: 50000,
					},
				}),
			);
			const state = agent.getState();
			expect(state.stepLimit).toBe(50);
		});

		test('initializes cost tracking to zero', () => {
			const agent = new Agent(createDefaultAgentOptions());
			const cost = agent.getAccumulatedCost();
			expect(cost.totalCost).toBe(0);
			expect(cost.totalInputTokens).toBe(0);
			expect(cost.totalOutputTokens).toBe(0);
		});

		test('initializes empty history', () => {
			const agent = new Agent(createDefaultAgentOptions());
			const history = agent.getHistory();
			expect(history.entries).toHaveLength(0);
			expect(history.task).toBe('Find the price of the product');
		});

		test('uses custom tools when provided', () => {
			const customTools = createMockTools();
			const agent = new Agent(createDefaultAgentOptions({ tools: customTools }));
			expect(agent).toBeDefined();
		});
	});

	describe('run() basic flow', () => {
		test('completes when done action is returned', async () => {
			const doneModel = createDoneOnStepModel(1, 'The price is $42');
			const tools = createMockTools([
				{ success: true, isDone: true, extractedContent: 'The price is $42' },
			]);

			const agent = new Agent(
				createDefaultAgentOptions({ model: doneModel, tools }),
			);

			const result = await agent.run();

			expect(result.finalResult).toBe('The price is $42');
			expect(result.success).toBe(true);
			expect(result.errors).toHaveLength(0);
		});

		test('sets isRunning to false after completion', async () => {
			const doneModel = createDoneOnStepModel(1, 'Done');
			const tools = createMockTools([
				{ success: true, isDone: true, extractedContent: 'Done' },
			]);

			const agent = new Agent(
				createDefaultAgentOptions({ model: doneModel, tools }),
			);
			await agent.run();

			const state = agent.getState();
			expect(state.isRunning).toBe(false);
		});

		test('calls onStepStart callback', async () => {
			const stepStarts: number[] = [];

			const doneModel = createDoneOnStepModel(2, 'Result');
			let callCount = 0;
			const tools = createMockTools();
			(tools.executeActions as any) = mock(async () => {
				callCount++;
				if (callCount >= 2) {
					return [{ success: true, isDone: true, extractedContent: 'Result' }];
				}
				return [{ success: true }];
			});

			const agent = new Agent(
				createDefaultAgentOptions({
					model: doneModel,
					tools,
					onStepStart: (step) => stepStarts.push(step),
				}),
			);

			await agent.run();

			expect(stepStarts.length).toBeGreaterThan(0);
			expect(stepStarts[0]).toBe(1);
		});

		test('calls onDone callback with result', async () => {
			let doneResult: RunOutcome | undefined;

			const doneModel = createDoneOnStepModel(1, 'Final answer');
			const tools = createMockTools([
				{ success: true, isDone: true, extractedContent: 'Final answer' },
			]);

			const agent = new Agent(
				createDefaultAgentOptions({
					model: doneModel,
					tools,
					onDone: (r) => { doneResult = r; },
				}),
			);

			await agent.run();

			expect(doneResult).toBeDefined();
			expect(doneResult!.finalResult).toBe('Final answer');
		});

		test('starts browser if not connected', async () => {
			const browser = createMockBrowser({ isConnected: false });
			const doneModel = createDoneOnStepModel(1, 'Result');
			const tools = createMockTools([
				{ success: true, isDone: true, extractedContent: 'Result' },
			]);

			const agent = new Agent(
				createDefaultAgentOptions({ browser, model: doneModel, tools }),
			);
			await agent.run();

			expect(browser.start).toHaveBeenCalled();
		});
	});

	describe('step execution', () => {
		test('invokes browser.getState() on each step', async () => {
			const browser = createMockBrowser();
			const doneModel = createDoneOnStepModel(1, 'Done');
			const tools = createMockTools([
				{ success: true, isDone: true, extractedContent: 'Done' },
			]);

			const agent = new Agent(
				createDefaultAgentOptions({ browser, model: doneModel, tools }),
			);
			await agent.run();

			expect(browser.getState).toHaveBeenCalled();
		});

		test('invokes PageAnalyzer.extractState on each step', async () => {
			const doneModel = createDoneOnStepModel(1, 'Done');
			const tools = createMockTools([
				{ success: true, isDone: true, extractedContent: 'Done' },
			]);

			mockExtractState.mockClear();
			const agent = new Agent(
				createDefaultAgentOptions({ model: doneModel, tools }),
			);
			await agent.run();

			expect(mockExtractState).toHaveBeenCalled();
		});

		test('records history entries for each step', async () => {
			let callCount = 0;
			const tools = createMockTools();
			(tools.executeActions as any) = mock(async () => {
				callCount++;
				if (callCount >= 3) {
					return [{ success: true, isDone: true, extractedContent: 'Done' }];
				}
				return [{ success: true }];
			});

			const model = createDoneOnStepModel(3, 'Done');
			const agent = new Agent(
				createDefaultAgentOptions({ model, tools }),
			);
			await agent.run();

			const history = agent.getHistory();
			expect(history.entries.length).toBeGreaterThanOrEqual(1);
		});

		test('token usage is tracked across steps', async () => {
			let callCount = 0;
			const tools = createMockTools();
			(tools.executeActions as any) = mock(async () => {
				callCount++;
				if (callCount >= 2) {
					return [{ success: true, isDone: true, extractedContent: 'Done' }];
				}
				return [{ success: true }];
			});

			const model = createDoneOnStepModel(2, 'Done');
			const agent = new Agent(
				createDefaultAgentOptions({ model, tools }),
			);
			await agent.run();

			const state = agent.getState();
			expect(state.totalInputTokens).toBeGreaterThan(0);
			expect(state.totalOutputTokens).toBeGreaterThan(0);
		});
	});

	describe('failure recovery', () => {
		test('consecutive failures increment failure count', async () => {
			let callCount = 0;
			const errorModel: LanguageModel = {
				modelId: 'test-model',
				provider: 'custom',
				invoke: async <T>(): Promise<InferenceResult<T>> => {
					callCount++;
					throw new Error(`Simulated error ${callCount}`);
				},
			};

			const agent = new Agent(
				createDefaultAgentOptions({
					model: errorModel,
					settings: {
						stepLimit: 10,
						failureThreshold: 3,
						retryDelay: 0,
						enableScreenshots: false,
						commandDelayMs: 0,
						autoNavigateToUrls: false,
						contextWindowSize: 50000,
					},
				}),
			);

			const result = await agent.run();
			expect(result.errors.length).toBeGreaterThan(0);
		});

		test('agent records error about consecutive failures after failureThreshold', async () => {
			let callCount = 0;
			const errorModel: LanguageModel = {
				modelId: 'test-model',
				provider: 'custom',
				invoke: async <T>(): Promise<InferenceResult<T>> => {
					callCount++;
					throw new Error(`Error ${callCount}`);
				},
			};

			const agent = new Agent(
				createDefaultAgentOptions({
					model: errorModel,
					settings: {
						stepLimit: 20,
						failureThreshold: 3,
						retryDelay: 0,
						enableScreenshots: false,
						commandDelayMs: 0,
						autoNavigateToUrls: false,
						contextWindowSize: 50000,
					},
				}),
			);

			const result = await agent.run();
			const hasFailureError = result.errors.some(
				(e) => e.includes('consecutive failures'),
			);
			expect(hasFailureError).toBe(true);
		});

		test('successful step resets consecutive failure count', async () => {
			let callCount = 0;
			const model: LanguageModel = {
				modelId: 'test-model',
				provider: 'custom',
				invoke: async <T>(): Promise<InferenceResult<T>> => {
					callCount++;
					if (callCount === 1) {
						throw new Error('Transient error');
					}
					return {
						parsed: {
							currentState: { evaluation: 'Done', memory: '', nextGoal: '' },
							actions: [{ action: 'finish', text: 'Success', success: true }],
						} as unknown as T,
						usage: createMockUsage(),
						finishReason: 'stop',
					};
				},
			};

			const tools = createMockTools([
				{ success: true, isDone: true, extractedContent: 'Success' },
			]);

			const agent = new Agent(
				createDefaultAgentOptions({
					model,
					tools,
					settings: {
						stepLimit: 10,
						failureThreshold: 5,
						retryDelay: 0,
						enableScreenshots: false,
						commandDelayMs: 0,
						autoNavigateToUrls: false,
						contextWindowSize: 50000,
					},
				}),
			);

			const result = await agent.run();
			expect(result.finalResult).toBe('Success');
		});
	});

	describe('done action detection and result extraction', () => {
		test('detects done action and extracts result text', async () => {
			const tools = createMockTools([
				{ success: true, isDone: true, extractedContent: 'Product costs $99' },
			]);

			const model = createDoneOnStepModel(1, 'Product costs $99');
			const agent = new Agent(
				createDefaultAgentOptions({ model, tools }),
			);
			const result = await agent.run();

			expect(result.finalResult).toBe('Product costs $99');
			expect(result.success).toBe(true);
		});

		test('handles done action with success=false', async () => {
			const model = createMockModel({
				responses: [{
					currentState: { evaluation: 'Cannot find', memory: '', nextGoal: '' },
					actions: [{ action: 'finish', text: 'Could not find', success: false } as Command],
				}],
			});

			const tools = createMockTools([
				{ success: false, isDone: true, extractedContent: 'Could not find' },
			]);

			const agent = new Agent(
				createDefaultAgentOptions({ model, tools }),
			);
			const result = await agent.run();

			expect(result.finalResult).toBe('Could not find');
			expect(result.success).toBe(false);
		});
	});

	describe('pause / resume / stop', () => {
		test('pause sets isPaused flag', () => {
			const agent = new Agent(createDefaultAgentOptions());
			agent.pause();
			expect(agent.getState().isPaused).toBe(true);
		});

		test('resume clears isPaused flag', () => {
			const agent = new Agent(createDefaultAgentOptions());
			agent.pause();
			agent.resume();
			expect(agent.getState().isPaused).toBe(false);
		});

		test('stop sets isRunning to false', async () => {
			let stepCount = 0;
			const tools = createMockTools();
			(tools.executeActions as any) = mock(async () => {
				stepCount++;
				return [{ success: true }];
			});

			const model = createMockModel();
			const agent = new Agent(
				createDefaultAgentOptions({
					model,
					tools,
					settings: {
						stepLimit: 100,
						enableScreenshots: false,
						commandDelayMs: 0,
						retryDelay: 0,
						autoNavigateToUrls: false,
						contextWindowSize: 50000,
					},
				}),
			);

			const runPromise = agent.run();

			// Stop after a brief moment
			await new Promise((r) => setTimeout(r, 50));
			agent.stop();

			await runPromise;
			const state = agent.getState();
			expect(state.isRunning).toBe(false);
		});
	});

	describe('max steps reached', () => {
		test('returns error when max steps exceeded without done', async () => {
			const model = createMockModel();
			const tools = createMockTools([{ success: true }]);

			const agent = new Agent(
				createDefaultAgentOptions({
					model,
					tools,
					settings: {
						stepLimit: 3,
						enableScreenshots: false,
						commandDelayMs: 0,
						retryDelay: 0,
						autoNavigateToUrls: false,
						contextWindowSize: 50000,
					},
				}),
			);

			const result = await agent.run();

			const hasMaxStepsError = result.errors.some(
				(e) => e.includes('maximum steps'),
			);
			expect(hasMaxStepsError).toBe(true);
		});

		test('run() accepts stepLimit parameter to override settings', async () => {
			const model = createMockModel();
			const tools = createMockTools([{ success: true }]);

			const agent = new Agent(
				createDefaultAgentOptions({
					model,
					tools,
					settings: {
						stepLimit: 100,
						enableScreenshots: false,
						commandDelayMs: 0,
						retryDelay: 0,
						autoNavigateToUrls: false,
						contextWindowSize: 50000,
					},
				}),
			);

			const result = await agent.run(2);

			const hasMaxStepsError = result.errors.some(
				(e) => e.includes('maximum steps'),
			);
			expect(hasMaxStepsError).toBe(true);
		});
	});

	describe('sensitive data filtering', () => {
		test('filters sensitive values from action results', async () => {
			const tools = createMockTools([
				{
					success: true,
					isDone: true,
					extractedContent: 'Your API key is sk-12345 and password is hunter2',
				},
			]);

			const model = createDoneOnStepModel(1, 'Done');
			const agent = new Agent(
				createDefaultAgentOptions({
					model,
					tools,
					settings: {
						stepLimit: 5,
						enableScreenshots: false,
						commandDelayMs: 0,
						retryDelay: 0,
						autoNavigateToUrls: false,
						contextWindowSize: 50000,
						maskedValues: {
							apiKey: 'sk-12345',
							password: 'hunter2',
						},
					},
				}),
			);

			const result = await agent.run();

			const history = agent.getHistory();
			for (const entry of history.entries) {
				for (const ar of entry.actionResults) {
					if (ar.extractedContent) {
						expect(ar.extractedContent).not.toContain('sk-12345');
						expect(ar.extractedContent).not.toContain('hunter2');
					}
				}
			}
		});

		test('returns unmodified results when no sensitive data configured', async () => {
			const tools = createMockTools([
				{
					success: true,
					isDone: true,
					extractedContent: 'Plain text result',
				},
			]);

			const model = createDoneOnStepModel(1, 'Done');
			const agent = new Agent(
				createDefaultAgentOptions({ model, tools }),
			);

			const result = await agent.run();
			expect(result.finalResult).toBe('Plain text result');
		});
	});

	describe('history recording', () => {
		test('history entries contain step number', async () => {
			let callCount = 0;
			const tools = createMockTools();
			(tools.executeActions as any) = mock(async () => {
				callCount++;
				if (callCount >= 2) {
					return [{ success: true, isDone: true, extractedContent: 'Done' }];
				}
				return [{ success: true }];
			});

			const model = createDoneOnStepModel(2, 'Done');
			const agent = new Agent(
				createDefaultAgentOptions({ model, tools }),
			);
			await agent.run();

			const history = agent.getHistory();
			expect(history.entries.length).toBeGreaterThanOrEqual(1);
			expect(history.entries[0].step).toBe(1);
		});

		test('history entries contain browser state info', async () => {
			const doneModel = createDoneOnStepModel(1, 'Done');
			const tools = createMockTools([
				{ success: true, isDone: true, extractedContent: 'Done' },
			]);

			const agent = new Agent(
				createDefaultAgentOptions({ model: doneModel, tools }),
			);
			await agent.run();

			const history = agent.getHistory();
			expect(history.entries.length).toBeGreaterThanOrEqual(1);
			expect(history.entries[0].browserState.url).toBe('https://example.com');
			expect(history.entries[0].browserState.title).toBe('Example Page');
		});

		test('history entries contain usage info', async () => {
			const doneModel = createDoneOnStepModel(1, 'Done');
			const tools = createMockTools([
				{ success: true, isDone: true, extractedContent: 'Done' },
			]);

			const agent = new Agent(
				createDefaultAgentOptions({ model: doneModel, tools }),
			);
			await agent.run();

			const history = agent.getHistory();
			expect(history.entries.length).toBeGreaterThanOrEqual(1);
			expect(history.entries[0].usage).toBeDefined();
			expect(history.entries[0].usage!.inputTokens).toBe(100);
			expect(history.entries[0].usage!.outputTokens).toBe(50);
		});

		test('history is finalized after run', async () => {
			const doneModel = createDoneOnStepModel(1, 'Done');
			const tools = createMockTools([
				{ success: true, isDone: true, extractedContent: 'Done' },
			]);

			const agent = new Agent(
				createDefaultAgentOptions({ model: doneModel, tools }),
			);
			await agent.run();

			const history = agent.getHistory();
			expect(history.endTime).toBeDefined();
			expect(history.totalDuration).toBeDefined();
		});
	});

	describe('cost tracking', () => {
		test('cumulative cost accumulates across steps', async () => {
			let callCount = 0;
			const tools = createMockTools();
			(tools.executeActions as any) = mock(async () => {
				callCount++;
				if (callCount >= 3) {
					return [{ success: true, isDone: true, extractedContent: 'Done' }];
				}
				return [{ success: true }];
			});

			const model = createDoneOnStepModel(3, 'Done');
			const agent = new Agent(
				createDefaultAgentOptions({ model, tools }),
			);
			await agent.run();

			const cost = agent.getAccumulatedCost();
			expect(cost.totalInputTokens).toBeGreaterThanOrEqual(100);
			expect(cost.totalOutputTokens).toBeGreaterThanOrEqual(50);
		});
	});

	describe('follow-up tasks', () => {
		test('addNewTask stores follow-up tasks', () => {
			const agent = new Agent(createDefaultAgentOptions());
			agent.addNewTask('Follow up: check price again');
			agent.addNewTask('Follow up: compare with competitor');

			const tasks = agent.getFollowUpTasks();
			expect(tasks).toHaveLength(2);
			expect(tasks[0]).toBe('Follow up: check price again');
			expect(tasks[1]).toBe('Follow up: compare with competitor');
		});

		test('getFollowUpTasks returns a copy', () => {
			const agent = new Agent(createDefaultAgentOptions());
			agent.addNewTask('Task 1');

			const tasks1 = agent.getFollowUpTasks();
			const tasks2 = agent.getFollowUpTasks();
			expect(tasks1).toEqual(tasks2);
			expect(tasks1).not.toBe(tasks2);
		});
	});

	describe('getState', () => {
		test('returns a copy of the state', () => {
			const agent = new Agent(createDefaultAgentOptions());
			const state1 = agent.getState();
			const state2 = agent.getState();
			expect(state1).toEqual(state2);
			expect(state1).not.toBe(state2);
		});

		test('tracks current URL after run', async () => {
			const doneModel = createDoneOnStepModel(1, 'Done');
			const tools = createMockTools([
				{ success: true, isDone: true, extractedContent: 'Done' },
			]);

			const agent = new Agent(
				createDefaultAgentOptions({ model: doneModel, tools }),
			);
			await agent.run();

			const state = agent.getState();
			expect(state.currentUrl).toBe('https://example.com');
		});
	});

	describe('getAccumulatedCost', () => {
		test('returns a copy of cost data', () => {
			const agent = new Agent(createDefaultAgentOptions());
			const cost1 = agent.getAccumulatedCost();
			const cost2 = agent.getAccumulatedCost();
			expect(cost1).toEqual(cost2);
			expect(cost1).not.toBe(cost2);
		});
	});

	describe('run result structure', () => {
		test('result contains all expected fields', async () => {
			const doneModel = createDoneOnStepModel(1, 'Answer');
			const tools = createMockTools([
				{ success: true, isDone: true, extractedContent: 'Answer' },
			]);

			const agent = new Agent(
				createDefaultAgentOptions({ model: doneModel, tools }),
			);
			const result = await agent.run();

			expect(result).toHaveProperty('finalResult');
			expect(result).toHaveProperty('success');
			expect(result).toHaveProperty('history');
			expect(result).toHaveProperty('errors');
			expect(result).toHaveProperty('totalCost');
		});

		test('result.history is an ExecutionLog', async () => {
			const doneModel = createDoneOnStepModel(1, 'Answer');
			const tools = createMockTools([
				{ success: true, isDone: true, extractedContent: 'Answer' },
			]);

			const agent = new Agent(
				createDefaultAgentOptions({ model: doneModel, tools }),
			);
			const result = await agent.run();

			expect(result.history).toBeDefined();
			expect(result.history.task).toBe('Find the price of the product');
			expect(typeof result.history.finalResult).toBe('function');
		});
	});
});


================================================
FILE: packages/core/src/agent/agent.ts
================================================
import { z, ZodError } from 'zod';
import type { LanguageModel, InferenceOptions } from '../model/interface.js';
import type { Viewport } from '../viewport/viewport.js';
import type { FileAccess } from '../sandbox/file-access.js';
import { PageAnalyzer } from '../page/page-analyzer.js';
import { CommandExecutor } from '../commands/executor.js';
import type { Command, CommandResult, ExecutionContext } from '../commands/types.js';
import { CommandSchema } from '../commands/types.js';
import { InstructionBuilder } from './instructions.js';
import { ConversationManager } from './conversation/service.js';
import { StallDetector, hashPageTree, hashTextContent } from './stall-detector.js';
import { ReplayRecorder } from './replay-recorder.js';
import { ResultEvaluator } from './evaluator.js';
import {
	type AgentConfig,
	type AgentState,
	type AgentDecision,
	type StepRecord,
	ExecutionLog,
	type RunOutcome,
	type AccumulatedCost,
	type EvaluationResult,
	type QuickCheckResult,
	ReasoningSchema,
	AgentDecisionCompactSchema,
	AgentDecisionDirectSchema,
	PlanRevisionSchema,
	DEFAULT_AGENT_CONFIG,
	calculateStepCost,
	supportsDeepReasoning,
	supportsCoordinateMode,
	isCompactModel,
} from './types.js';
import {
	AgentError,
	StepLimitExceededError,
	AgentStalledError,
	ModelThrottledError,
} from '../errors.js';
import {
	Timer,
	sleep,
	truncateText,
	withDeadline,
	extractUrls,
	escapeRegExp,
} from '../utils.js';
import { createLogger } from '../logging.js';

const logger = createLogger('agent');

// ── Agent Options ──

export interface AgentOptions {
	task: string;
	model: LanguageModel;
	browser: Viewport;
	tools?: CommandExecutor;
	/** Pre-configured PageAnalyzer instance (defaults to a new PageAnalyzer) */
	domService?: PageAnalyzer;
	settings?: Partial<AgentConfig>;
	/** Separate model for the judge (defaults to main model) */
	judgeModel?: LanguageModel;
	/** Separate model for extraction actions (defaults to main model) */
	extractionModel?: LanguageModel;
	/** File system access for sandbox operations */
	fileSystem?: FileAccess;
	onStepStart?: (step: number) => void;
	onStepEnd?: (step: number, result: CommandResult[]) => void;
	onDone?: (result: RunOutcome) => void;
}

// ── Agent ──

export class Agent {
	private model: LanguageModel;
	private browser: Viewport;
	private tools: CommandExecutor;
	private domService: PageAnalyzer;
	private messageManager: ConversationManager;
	private loopDetector: StallDetector;
	private gifRecorder?: ReplayRecorder;
	private judge?: ResultEvaluator;
	private settings: AgentConfig;
	private extractionModel?: LanguageModel;
	private fileSystem?: FileAccess;

	private state: AgentState;
	private historyList: ExecutionLog;
	private startTime = 0;
	private followUpTasks: string[] = [];

	private onStepStart?: (step: number) => void;
	private onStepEnd?: (step: number, result: CommandResult[]) => void;
	private onDone?: (result: RunOutcome) => void;

	constructor(options: AgentOptions) {
		this.model = options.model;
		this.browser = options.browser;
		this.settings = { ...DEFAULT_AGENT_CONFIG, ...options.settings, task: options.task };
		this.extractionModel = options.extractionModel;
		this.fileSystem = options.fileSystem;

		this.tools = options.tools ?? new CommandExecutor({
			model: this.extractionModel ?? this.model,
			allowedUrls: this.settings.allowedUrls,
			blockedUrls: this.settings.blockedUrls,
			commandsPerStep: this.settings.commandsPerStep,
		});

		this.domService = options.domService ?? new PageAnalyzer({
			capturedAttributes: this.settings.capturedAttributes,
		});

		this.messageManager = new ConversationManager({
			contextWindowSize: this.settings.contextWindowSize,
			includeLastScreenshot: this.settings.enableScreenshots,
			maskedValues: this.settings.maskedValues,
			compaction: this.settings.conversationCompaction,
		});

		this.loopDetector = new StallDetector();

		if (this.settings.replayOutputPath) {
			this.gifRecorder = new ReplayRecorder({
				outputPath: this.settings.replayOutputPath,
			});
		}

		// Judge setup
		if (this.settings.enableEvaluation || this.settings.enableSimpleJudge) {
			const judgeModel = options.judgeModel ?? this.model;
			this.judge = new ResultEvaluator(judgeModel);
		}

		// Auto-enable coordinate clicking for supported models
		if (this.settings.autoEnableCoordinateClicking) {
			if (supportsCoordinateMode(this.model.modelId)) {
				this.tools.setCoordinateClicking(true);
				logger.info(`Coordinate clicking auto-enabled for model ${this.model.modelId}`);
			}
		}

		// Initialize state
		this.state = {
			step: 0,
			stepLimit: this.settings.stepLimit,
			failureCount: 0,
			consecutiveFailures: 0,
			isRunning: false,
			isPaused: false,
			isDone: false,
			totalInputTokens: 0,
			totalOutputTokens: 0,
			cumulativeCost: {
				totalInputTokens: 0,
				totalOutputTokens: 0,
				totalInputCost: 0,
				totalOutputCost: 0,
				totalCost: 0,
			},
		};

		this.historyList = new ExecutionLog({
			task: this.settings.task,
		});

		this.onStepStart = options.onStepStart;
		this.onStepEnd = options.onStepEnd;
		this.onDone = options.onDone;
	}

	// ────────────────────────────────────────
	//  Main run loop
	// ────────────────────────────────────────

	async run(stepLimit?: number): Promise<RunOutcome> {
		const effectiveMaxSteps = stepLimit ?? this.settings.stepLimit;
		this.state.stepLimit = effectiveMaxSteps;
		this.state.isRunning = true;
		this.startTime = Date.now();

		// Ensure browser is started
		if (!this.browser.isConnected) {
			await this.browser.start();
		}

		// Build system prompt (may be rebuilt per step if dynamicCommandSchema is on)
		this.rebuildInstructionBuilder();

		// URL extraction: auto-navigate to first URL found in task text
		if (this.settings.autoNavigateToUrls) {
			await this.autoNavigateFromTask();
		}

		// Execute initial actions before the main loop
		if (this.settings.preflightCommands.length > 0) {
			await this.executeInitialActions();
		}

		const errors: string[] = [];
		let finalResult: string | undefined;
		let success = false;
		let judgement: EvaluationResult | undefined;
		let simpleJudgement: QuickCheckResult | undefined;

		try {
			for (let step = 1; step <= effectiveMaxSteps; step++) {
				if (!this.state.isRunning || this.state.isDone) break;

				// Pause support
				while (this.state.isPaused) {
					await sleep(100);
				}

				this.state.step = step;
				this.onStepStart?.(step);

				try {
					// Wrap step execution in optional timeout
					const stepPromise = this.executeStep(step, effectiveMaxSteps);
					const result = this.settings.stepDeadlineMs > 0
						? await withDeadline(
								stepPromise,
								this.settings.stepDeadlineMs,
								`Step ${step} timed out after ${this.settings.stepDeadlineMs}ms`,
						  )
						: await stepPromise;

					this.state.consecutiveFailures = 0;

					// Check if done
					const doneResult = result.find((r) => r.isDone);
					if (doneResult) {
						finalResult = doneResult.extractedContent;
						success = doneResult.success;

						// Simple judge: quick validation before accepting the result
						if (this.settings.enableSimpleJudge && this.judge && finalResult) {
							simpleJudgement = await this.judge.simpleEvaluate(
								this.settings.task,
								finalResult,
							);

							if (simpleJudgement.shouldRetry && step < effectiveMaxSteps) {
								logger.info(
									`Simple judge suggests retry: ${simpleJudgement.reason}`,
								);
								this.messageManager.addCommandResultMessage(
									`The result was reviewed and found lacking: ${simpleJudgement.reason}. ` +
									'Please try a different approach to complete the task.',
									step,
								);
								// Don't mark as done -- continue the loop
								continue;
							}
						}

						this.state.isDone = true;
						break;
					}

					this.onStepEnd?.(step, result);

					// Planning: periodically update the plan
					if (this.settings.enableStrategy && this.shouldUpdatePlan(step)) {
						await this.updatePlan(step);
					}

					// Replan on stall: if loop detector shows stuck + planning enabled
					if (this.settings.restrategizeOnStall && this.settings.enableStrategy) {
						const loopCheck = this.loopDetector.isStuck();
						if (loopCheck.stuck && loopCheck.severity >= 2) {
							logger.info('Agent stalled, triggering replan');
							await this.updatePlan(step);
						}
					}

					// Message compaction: every N steps (LLM-based)
					if (this.messageManager.shouldCompactWithLlm()) {
						const compacted = await this.messageManager.compactWithLlm(this.model);
						if (compacted) {
							logger.debug(`Messages compacted at step ${step}`);
						}
					}

					// Save conversation per step if configured
					if (this.settings.conversationOutputPath) {
						await this.saveConversation(step);
					}
				} catch (error) {
					// Rate limit retry with exponential backoff
					if (error instanceof ModelThrottledError) {
						const waitMs = error.retryAfterMs ?? Math.min(
							60_000,
							this.settings.retryDelay * 1000 * 2 ** this.state.consecutiveFailures,
						);
						logger.warn(`Rate limited, waiting ${waitMs}ms before retry`);
						await sleep(waitMs);
						this.state.consecutiveFailures++;
						// Don't count rate limits toward max failures
						continue;
					}

					const message = error instanceof Error ? error.message : String(error);
					errors.push(`Step ${step}: ${message}`);

					this.state.failureCount++;
					this.state.consecutiveFailures++;

					if (this.state.consecutiveFailures >= this.settings.failureThreshold) {
						// Failure recovery: make one final LLM call to diagnose
						const failureSummary = await this.makeFailureRecoveryCall(errors);
						if (failureSummary) {
							finalResult = failureSummary;
						}

						throw new AgentError(
							`Too many consecutive failures (${this.state.consecutiveFailures})`,
						);
					}

					// Add error message to conversation
					this.messageManager.addCommandResultMessage(
						`Error: ${truncateText(message, 400)}`,
						step,
					);

					// Wait before retry
					await sleep(this.settings.retryDelay * 1000);
				}
			}

			if (!this.state.isDone && this.state.step >= effectiveMaxSteps) {
				throw new StepLimitExceededError(this.state.step, effectiveMaxSteps);
			}
		} catch (error) {
			if (
				error instanceof StepLimitExceededError ||
				error instanceof AgentStalledError ||
				error instanceof AgentError
			) {
				errors.push(error.message);
			} else {
				throw error;
			}
		} finally {
			this.state.isRunning = false;

			// Save recording
			if (this.gifRecorder) {
				await this.gifRecorder.save();
			}
		}

		// Full judge evaluation after completion
		if (this.settings.enableEvaluation && this.judge && finalResult) {
			judgement = await this.judge.evaluate(
				this.settings.task,
				finalResult,
				this.historyList.entries,
				{
					expectedOutcome: this.settings.expectedOutcome,
					includeScreenshots: this.settings.enableScreenshots,
				},
			);
		}

		// Finalize history
		this.historyList.finish();

		const runResult: RunOutcome = {
			finalResult,
			success,
			history: this.historyList,
			errors,
			judgement,
			simpleJudgement,
			totalCost: { ...this.state.cumulativeCost },
		};

		this.onDone?.(runResult);
		return runResult;
	}

	// ────────────────────────────────────────
	//  Step Execution
	// ────────────────────────────────────────

	private async executeStep(step: number, stepLimit: number): Promise<CommandResult[]> {
		const timer = new Timer();

		// Get browser state
		const browserState = await this.browser.getState();
		this.state.currentUrl = browserState.url;

		// Dynamic action schema: rebuild system prompt per step based on current URL
		if (this.settings.dynamicCommandSchema) {
			this.rebuildInstructionBuilder(browserState.url);
		}

		// Extract DOM
		const domState = await this.domService.extractState(
			this.browser.currentPage,
			this.browser.cdp!,
		);

		// Take screenshot if using vision
		let screenshot: string | undefined;
		if (this.settings.enableScreenshots) {
			const screenshotResult = await this.browser.screenshot();
			screenshot = screenshotResult.base64;

			if (this.gifRecorder) {
				const actionLabel = browserState.url;
				this.gifRecorder.addFrame(screenshot, step, actionLabel);
			}
		}

		// Build state message
		const stateText = InstructionBuilder.buildStatePrompt(
			browserState.url,
			browserState.title,
			browserState.tabs,
			domState.tree,
			step,
			stepLimit,
			domState.pixelsAbove,
			domState.pixelsBelow,
		);

		// Check for loop
		const loopCheck = this.loopDetector.isStuck();
		let additionalContext = '';
		if (loopCheck.stuck) {
			additionalContext = InstructionBuilder.buildLoopNudge(
				this.loopDetector.getLoopNudgeMessage(),
			);

			// Severe loop: throw stuck error
			if (loopCheck.severity >= 3) {
				throw new AgentStalledError(
					`Agent stuck: ${loopCheck.reason} (severity ${loopCheck.severity})`,
				);
			}
		}

		// Add plan context if planning is enabled
		if (this.settings.enableStrategy && this.state.currentPlan) {
			additionalContext += InstructionBuilder.buildPlanPrompt(this.state.currentPlan);
		}

		// Add messages
		this.messageManager.addStateMessage(
			stateText + additionalContext,
			screenshot,
			step,
		);

		// Determine output schema based on mode
		const outputSchema = this.getOutputSchema();

		// Invoke LLM with optional timeout and Zod recovery
		const completion = await this.invokeLlmWithRecovery(outputSchema, step);

		// Update token tracking
		this.state.totalInputTokens += completion.usage.inputTokens;
		this.state.totalOutputTokens += completion.usage.outputTokens;

		// Cost tracking
		this.updateCostTracking(completion.usage.inputTokens, completion.usage.outputTokens, step);

		const output = completion.parsed;

		// Normalize output to standard AgentDecision shape
		const normalizedOutput = this.normalizeOutput(output);

		// Add assistant response
		this.messageManager.addAssistantMessage(
			JSON.stringify(normalizedOutput.currentState),
			step,
		);

		// Execute actions
		const context: ExecutionContext = {
			page: this.browser.currentPage,
			cdpSession: this.browser.cdp!,
			domService: this.domService,
			browserSession: this.browser,
			extractionLlm: this.extractionModel,
			fileSystem: this.fileSystem,
			maskedValues: this.settings.maskedValues,
		};

		const actions = normalizedOutput.actions as Command[];
		const results = await this.tools.executeActions(actions, context);

		// Record for loop detection (with enhanced fingerprint)
		this.loopDetector.recordAction(actions);
		this.loopDetector.recordFingerprint({
			url: browserState.url,
			domHash: hashPageTree(domState.tree),
			scrollY: domState.scrollPosition.y,
			elementCount: domState.elementCount,
			textHash: hashTextContent(domState.tree.slice(0, 2000)),
		});

		// Filter sensitive data from results
		const filteredResults = this.filterSensitiveData(results);

		// Add action results to conversation
		const resultText = filteredResults
			.map((r, i) => {
				const actionName = actions[i]?.action ?? 'unknown';
				const status = r.success ? 'success' : `error: ${r.error}`;
				const content = r.extractedContent
					? `\nContent: ${r.extractedContent}`
					: '';
				return `${actionName}: ${status}${content}`;
			})
			.join('\n');

		if (resultText) {
			this.messageManager.addCommandResultMessage(resultText, step);
		}

		// Wait between actions
		if (this.settings.commandDelayMs > 0) {
			await sleep(this.settings.commandDelayMs * 1000);
		}

		// Record history entry
		const entry: StepRecord = {
			step,
			timestamp: Date.now(),
			browserState: {
				url: browserState.url,
				title: browserState.title,
				tabs: browserState.tabs,
				interactedElements: actions
					.filter((a): a is Command & { index: number } => 'index' in a)
					.map((a) => ({
						index: a.index,
						description: '',
						action: a.action,
					})),
				screenshot,
			},
			agentOutput: normalizedOutput as AgentDecision,
			actionResults: filteredResults,
			usage: completion.usage,
			duration: timer.elapsed(),
			metadata: {
				stepNumber: step,
				durationMs: timer.elapsed(),
				inputTokens: completion.usage.inputTokens,
				outputTokens: completion.usage.outputTokens,
				actionCount: actions.length,
				url: browserState.url,
				startedAt: Date.now() - timer.elapsed(),
				completedAt: Date.now(),
			},
		};

		this.historyList.addEntry(entry);

		return results;
	}

	// ────────────────────────────────────────
	//  LLM Invocation with Zod Recovery
	// ────────────────────────────────────────

	private async invokeLlmWithRecovery(
		outputSchema: z.ZodType<unknown>,
		step: number,
		retryCount = 0,
	): Promise<{
		parsed: Record<string, unknown>;
		usage: { inputTokens: number; outputTokens: number; totalTokens: number };
	}> {
		const messages = this.messageManager.getMessages();

		const invokeOptions: InferenceOptions<unknown> = {
			messages,
			responseSchema: outputSchema,
			schemaName: this.getSchemaName(),
			schemaDescription: 'Agent decision with current state assessment and actions to take',
		};

		// Extended thinking: pass thinking budget as maxTokens
		if (
			this.settings.enableDeepReasoning &&
			supportsDeepReasoning(this.model.modelId)
		) {
			invokeOptions.maxTokens = this.settings.reasoningBudget;
		}

		try {
			// Wrap LLM call in optional timeout
			const invokePromise = this.model.invoke(invokeOptions);
			const completion =
				this.settings.modelDeadlineMs > 0
					? await withDeadline(
							invokePromise,
							this.settings.modelDeadlineMs,
							`LLM call timed out after ${this.settings.modelDeadlineMs}ms`,
					  )
					: await invokePromise;

			return {
				parsed: completion.parsed as Record<string, unknown>,
				usage: completion.usage,
			};
		} catch (error) {
			// Zod validation error recovery: re-prompt with the error details
			if (error instanceof ZodError && retryCount < 2) {
				logger.warn(
					`Zod validation failed (attempt ${retryCount + 1}), re-prompting LLM`,
				);

				const issues = error.issues
					.map((issue) => `- ${issue.path.join('.')}: ${issue.message}`)
					.join('\n');

				this.messageManager.addCommandResultMessage(
					'Your previous response had a validation error. ' +
					'Please fix the following issues and respond again:\n' +
					`${issues}\n\n` +
					'Make sure your response matches the expected JSON schema exactly.',
					step,
				);

				return this.invokeLlmWithRecovery(outputSchema, step, retryCount + 1);
			}

			// Re-throw rate limit errors for special handling in the main loop
			if (error instanceof ModelThrottledError) {
				throw error;
			}

			throw error;
		}
	}

	// ────────────────────────────────────────
	//  Output Schema Selection
	// ────────────────────────────────────────

	private getOutputSchema(): z.ZodType<unknown> {
		// Flash mode: simpler schema for cheaper / faster models
		if (this.settings.compactMode || isCompactModel(this.model.modelId)) {
			return AgentDecisionCompactSchema as z.ZodType<unknown>;
		}

		// Extended thinking: model reasons internally, skip brain schema
		if (
			this.settings.enableDeepReasoning &&
			supportsDeepReasoning(this.model.modelId)
		) {
			return AgentDecisionDirectSchema as z.ZodType<unknown>;
		}

		// Default full schema with brain + typed action union
		return z.object({
			currentState: ReasoningSchema,
			actions: z.array(CommandSchema),
		}) as z.ZodType<unknown>;
	}

	private getSchemaName(): string {
		if (this.settings.compactMode || isCompactModel(this.model.modelId)) {
			return 'AgentDecisionCompact';
		}
		if (
			this.settings.enableDeepReasoning &&
			supportsDeepReasoning(this.model.modelId)
		) {
			return 'AgentDecisionDirect';
		}
		return 'AgentDecision';
	}

	/**
	 * Normalize the various output schema shapes into the standard AgentDecision.
	 */
	private normalizeOutput(output: Record<string, unknown>): AgentDecision {
		// Flash schema: { goal, actions }
		if ('goal' in output && !('currentState' in output)) {
			return {
				currentState: {
					evaluation: String(output.goal ?? ''),
					memory: '',
					nextGoal: String(output.goal ?? ''),
				},
				actions: (output.actions ?? []) as Record<string, unknown>[],
			};
		}

		// No-thinking schema: { actions } only
		if (!('currentState' in output) && 'actions' in output) {
			return {
				currentState: {
					evaluation: '',
					memory: '',
					nextGoal: '',
				},
				actions: (output.actions ?? []) as Record<string, unknown>[],
			};
		}

		// Standard schema passthrough
		return output as AgentDecision;
	}

	// ────────────────────────────────────────
	//  Planning System
	// ────────────────────────────────────────

	private shouldUpdatePlan(step: number): boolean {
		if (!this.settings.enableStrategy) return false;
		const interval =
			this.settings.strategyInterval > 0 ? this.settings.strategyInterval : 5;
		const lastPlan = this.state.lastPlanStep ?? 0;
		return step - lastPlan >= interval;
	}

	private async updatePlan(step: number): Promise<void> {
		try {
			const recentHistory = this.historyList.entries
				.slice(-5)
				.map(
					(e) =>
						`Step ${e.step}: ${e.agentOutput.currentState?.evaluation ?? '(no eval)'}`,
				)
				.join('\n');

			const planPrompt =
				`Task: ${this.settings.task}\n\n` +
				`Current step: ${step}/${this.state.stepLimit}\n` +
				(this.state.currentPlan
					? `Current plan:\n${this.state.currentPlan}\n\n`
					: '') +
				`Recent progress:\n${recentHistory}\n\n` +
				'Based on the current progress, provide an updated plan. ' +
				'Include what has been accomplished and what remains.';

			// Use ephemeral message so the plan prompt doesn't persist
			this.messageManager.addEphemeralMessage(planPrompt);

			const completion = await this.model.invoke({
				messages: this.messageManager.getMessages(),
				responseSchema: PlanRevisionSchema,
				schemaName: 'PlanRevision',
				temperature: 0.3,
			});

			this.state.currentPlan = completion.parsed.plan;
			this.state.lastPlanStep = step;

			logger.info(`Plan updated at step ${step}: ${completion.parsed.reasoning}`);
		} catch (error) {
			logger.warn(
				`Plan update failed at step ${step}: ${
					error instanceof Error ? error.message : String(error)
				}`,
			);
		}
	}

	// ────────────────────────────────────────
	//  System Prompt Management
	// ────────────────────────────────────────

	/**
	 * (Re)build the system prompt. When `pageUrl` is provided, the registry
	 * can filter action descriptions to show only domain-relevant actions.
	 */
	private rebuildInstructionBuilder(pageUrl?: string): void {
		const systemPrompt = InstructionBuilder.fromSettings(
			this.settings,
			this.tools.registry,
			pageUrl,
		);
		this.messageManager.setInstructionBuilder(systemPrompt.build());
	}

	// ────────────────────────────────────────
	//  URL Extraction from Task Text
	// ────────────────────────────────────────

	private async autoNavigateFromTask(): Promise<void> {
		const urls = extractUrls(this.settings.task);
		if (urls.length === 0) return;

		const firstUrl = urls[0];
		logger.info(`Auto-navigating to URL found in task: ${firstUrl}`);

		try {
			await this.browser.navigate(firstUrl);
			// Give the page a moment to load
			await sleep(1000);
		} catch (error) {
			logger.warn(
				`Auto-navigation to ${firstUrl} failed: ${
					error instanceof Error ? error.message : String(error)
				}`,
			);
		}
	}

	// ────────────────────────────────────────
	//  Initial Actions
	// ────────────────────────────────────────

	private async executeInitialActions(): Promise<void> {
		logger.info(
			`Executing ${this.settings.preflightCommands.length} initial action(s)`,
		);

		const context: ExecutionContext = {
			page: this.browser.currentPage,
			cdpSession: this.browser.cdp!,
			domService: this.domService,
			browserSession: this.browser,
			extractionLlm: this.extractionModel,
			fileSystem: this.fileSystem,
			maskedValues: this.settings.maskedValues,
		};

		for (const action of this.settings.preflightCommands) {
			try {
				await this.tools.executeAction(action, context);
				logger.debug(`Initial action ${action.action} completed`);
			} catch (error) {
				logger.warn(
					`Initial action ${action.action} failed: ${
						error instanceof Error ? error.message : String(error)
					}`,
				);
			}
		}

		await sleep(500);
	}

	// ────────────────────────────────────────
	//  Failure Recovery
	// ────────────────────────────────────────

	/**
	 * On max failures, make one final LLM call to produce a diagnostic
	 * summary. Returns a description of what went wrong, or undefined
	 * if the recovery call itself fails.
	 */
	private async makeFailureRecoveryCall(
		errors: string[],
	): Promise<string | undefined> {
		try {
			const errorSummary = errors.slice(-5).join('\n');

			const recoverySchema = z.object({
				diagnosis: z.string().describe('What went wrong'),
				suggestion: z.string().describe('What could be tried differently'),
			});

			const completion = await this.model.invoke({
				messages: [
					{
						role: 'system' as const,
						content:
							'You are a diagnostic assistant. Analyze the errors that occurred during ' +
							'a web browsing automation task and provide a brief diagnosis.',
					},
					{
						role: 'user' as const,
						content:
							`Task: ${this.settings.task}\n\n` +
							`Errors encountered:\n${errorSummary}\n\n` +
							'Provide a brief diagnosis of what went wrong and what could be tried differently.',
					},
				],
				responseSchema: recoverySchema,
				schemaName: 'FailureRecovery',
				temperature: 0,
			});

			const result =
				`Task failed. Diagnosis: ${completion.parsed.diagnosis}. ` +
				`Suggestion: ${completion.parsed.suggestion}`;
			logger.info(`Failure recovery: ${result}`);
			return result;
		} catch {
			logger.debug('Failure recovery call itself failed');
			return undefined;
		}
	}

	// ────────────────────────────────────────
	//  Cost Tracking
	// ────────────────────────────────────────

	private updateCostTracking(
		inputTokens: number,
		outputTokens: number,
		step: number,
	): void {
		const stepCost = calculateStepCost(
			inputTokens,
			outputTokens,
			this.model.modelId,
		);

		this.state.cumulativeCost.totalInputTokens += inputTokens;
		this.state.cumulativeCost.totalOutputTokens += outputTokens;

		if (stepCost) {
			this.state.cumulativeCost.totalInputCost += stepCost.inputCost;
			this.state.cumulativeCost.totalOutputCost += stepCost.outputCost;
			this.state.cumulativeCost.totalCost += stepCost.totalCost;

			logger.debug(
				`Step ${step} cost: $${stepCost.totalCost.toFixed(4)} ` +
				`(cumulative: $${this.state.cumulativeCost.totalCost.toFixed(4)})`,
			);
		}
	}

	// ────────────────────────────────────────
	//  Sensitive Data Filtering
	// ────────────────────────────────────────

	private filterSensitiveData(results: CommandResult[]): CommandResult[] {
		if (!this.settings.maskedValues) return results;

		return results.map((r) => {
			if (!r.extractedContent) return r;

			let content = r.extractedContent;
			for (const [key, value] of Object.entries(this.settings.maskedValues!)) {
				content = content.replace(
					new RegExp(escapeRegExp(value), 'g'),
					`<${key}>`,
				);
			}

			return { ...r, extractedContent: content };
		});
	}

	// ────────────────────────────────────────
	//  Save Conversation
	// ────────────────────────────────────────

	private async saveConversation(step: number): Promise<void> {
		if (!this.settings.conversationOutputPath) return;

		try {
			const filePath = this.settings.conversationOutputPath.replace(
				/\{step\}/g,
				step.toString(),
			);
			await this.messageManager.saveToFile(filePath);
		} catch (error) {
			logger.debug(
				`Failed to save conversation at step ${step}: ${
					error instanceof Error ? error.message : String(error)
				}`,
			);
		}
	}

	// ────────────────────────────────────────
	//  Follow-up Tasks
	// ────────────────────────────────────────

	/**
	 * Add a follow-up task to be executed after the current task completes.
	 * Tasks are stored and can be retrieved via getFollowUpTasks().
	 */
	addNewTask(task: string): void {
		this.followUpTasks.push(task);
		logger.info(`Follow-up task added: ${truncateText(task, 100)}`);
	}

	getFollowUpTasks(): string[] {
		return [...this.followUpTasks];
	}

	// ────────────────────────────────────────
	//  Control Methods
	// ────────────────────────────────────────

	pause(): void {
		this.state.isPaused = true;
	}

	resume(): void {
		this.state.isPaused = false;
	}

	stop(): void {
		this.state.isRunning = false;
	}

	getState(): AgentState {
		return { ...this.state };
	}

	getHistory(): ExecutionLog {
		return this.historyList;
	}

	getAccumulatedCost(): AccumulatedCost {
		return { ...this.state.cumulativeCost };
	}
}


================================================
FILE: packages/core/src/agent/conversation/service.ts
================================================
import { z } from 'zod';
import type { Message } from '../../model/messages.js';
import {
	systemMessage,
	userMessage,
	assistantMessage,
	imageContent,
	textContent,
	type ContentPart,
} from '../../model/messages.js';
import type { LanguageModel } from '../../model/interface.js';
import type {
	ConversationManagerOptions,
	TrackedMessage,
	ConversationManagerState,
	ConversationEntry,
	SerializedTrackedMessage,
	MessageCategory,
} from './types.js';
import {
	estimateTokens,
	estimateMessageTokens,
	redactMessages,
	extractTextContent,
	truncate,
} from './utils.js';

// ── LLM Compaction Summary Schema ──

const CompactionSummarySchema = z.object({
	summary: z.string().describe('Concise summary of the conversation so far'),
});

// ── ConversationManager ──

export class ConversationManager {
	private messages: TrackedMessage[] = [];
	private systemPromptMessage: Message | null = null;
	private systemPromptText: string | null = null;
	private options: ConversationManagerOptions;
	private historyItems: ConversationEntry[] = [];
	private currentStep = 0;
	private lastCompactionStep = 0;

	constructor(options: ConversationManagerOptions) {
		this.options = options;
	}

	// ────────────────────────────────────────
	//  System Prompt
	// ────────────────────────────────────────

	setInstructionBuilder(prompt: string): void {
		this.systemPromptText = prompt;
		this.systemPromptMessage = systemMessage(prompt);
	}

	// ────────────────────────────────────────
	//  Add Messages
	// ────────────────────────────────────────

	addStateMessage(
		stateText: string,
		screenshot?: string,
		step?: number,
	): void {
		const content: ContentPart[] = [textContent(stateText)];

		if (screenshot && this.options.includeLastScreenshot) {
			content.push(imageContent(screenshot, 'image/png'));
		}

		if (step !== undefined) this.currentStep = step;

		this.messages.push({
			message: userMessage(content),
			isCompactable: true,
			tokenEstimate: estimateMessageTokens(content),
			step,
			category: 'state',
			addedAt: Date.now(),
		});

		this.recordConversationEntry(step ?? this.currentStep, 'state', stateText, !!screenshot);
	}

	addAssistantMessage(text: string, step?: number): void {
		if (step !== undefined) this.currentStep = step;

		this.messages.push({
			message: assistantMessage(text),
			isCompactable: true,
			tokenEstimate: estimateTokens(text),
			step,
			category: 'assistant',
			addedAt: Date.now(),
		});

		this.recordConversationEntry(step ?? this.currentStep, 'assistant', text);
	}

	addCommandResultMessage(text: string, step?: number): void {
		if (step !== undefined) this.currentStep = step;

		this.messages.push({
			message: userMessage(text),
			isCompactable: true,
			tokenEstimate: estimateTokens(text),
			step,
			category: 'action_result',
			addedAt: Date.now(),
		});

		this.recordConversationEntry(step ?? this.currentStep, 'action_result', text);
	}

	addUserMessage(text: string): void {
		this.messages.push({
			message: userMessage(text),
			isCompactable: false,
			tokenEstimate: estimateTokens(text),
			category: 'user',
			addedAt: Date.now(),
		});

		this.recordConversationEntry(this.currentStep, 'user', text);
	}

	/**
	 * Add an ephemeral message that is included in the next getMessages() call
	 * and then automatically removed. Useful for one-shot instructions or
	 * temporary context that should not persist across steps.
	 */
	addEphemeralMessage(text: string, role: 'user' | 'assistant' = 'user'): void {
		const msg =
			role === 'user' ? userMessage(text) : assistantMessage(text);

		this.messages.push({
			message: msg,
			isCompactable: false,
			tokenEstimate: estimateTokens(text),
			category: role === 'user' ? 'user' : 'assistant',
			ephemeral: true,
			ephemeralRead: false,
			addedAt: Date.now(),
		});
	}

	// ────────────────────────────────────────
	//  Get Messages (with compaction + filtering)
	// ────────────────────────────────────────

	getMessages(): Message[] {
		const result: Message[] = [];

		if (this.systemPromptMessage) {
			result.push(this.systemPromptMessage);
		}

		// Check if we need to compact
		const totalTokens = this.estimateTotalTokens();
		if (totalTokens > this.options.contextWindowSize) {
			this.compact();
		}

		for (const managed of this.messages) {
			result.push(managed.message);
		}

		// Mark ephemeral messages as read so they can be cleaned up
		this.consumeEphemeralMessages();

		// Apply sensitive data filtering
		if (this.options.maskedValues && Object.keys(this.options.maskedValues).length > 0) {
			return redactMessages(result, this.options.maskedValues);
		}

		return result;
	}

	// ────────────────────────────────────────
	//  Ephemeral Message Lifecycle
	// ────────────────────────────────────────

	/**
	 * After getMessages() has been called, remove ephemeral messages that were already read.
	 * Freshly-added ephemeral messages are marked as read (so they survive one getMessages call).
	 */
	private consumeEphemeralMessages(): void {
		// Remove previously-read ephemeral messages
		this.messages = this.messages.filter(
			(m) => !(m.ephemeral && m.ephemeralRead),
		);

		// Mark remaining ephemeral messages as read for the next pass
		for (const m of this.messages) {
			if (m.ephemeral && !m.ephemeralRead) {
				m.ephemeralRead = true;
			}
		}
	}

	// ────────────────────────────────────────
	//  Token Estimation
	// ────────────────────────────────────────

	estimateTotalTokens(): number {
		let total = 0;
		if (this.systemPromptMessage) {
			total += estimateTokens(
				typeof this.systemPromptMessage.content === 'string'
					? this.systemPromptMessage.content
					: '',
			);
		}
		for (const managed of this.messages) {
			total += managed.tokenEstimate;
		}
		return total;
	}

	// ────────────────────────────────────────
	//  Basic Compaction (image removal + old message replacement)
	// ────────────────────────────────────────

	private compact(): void {
		// Remove screenshots from older messages (keep only last)
		let foundLast = false;
		for (let i = this.messages.length - 1; i >= 0; i--) {
			const msg = this.messages[i];
			if (!msg.isCompactable) continue;

			const content = msg.message.content;
			if (Array.isArray(content)) {
				const hasImage = content.some(
					(p) => typeof p === 'object' && p !== null && (p as ContentPart).type === 'image',
				);
				if (hasImage) {
					if (foundLast) {
						// Remove images from this message
						const filtered = content.filter(
							(p) =>
								typeof p !== 'object' ||
								p === null ||
								(p as ContentPart).type !== 'image',
						);
						if (filtered.length > 0) {
							msg.message = userMessage(filtered as ContentPart[]);
							msg.tokenEstimate = estimateMessageTokens(filtered);
						}
					} else {
						foundLast = true;
					}
				}
			}
		}

		// If still over budget, remove old compactable state messages
		while (
			this.estimateTotalTokens() > this.options.contextWindowSize &&
			this.messages.length > 4
		) {
			// Find first compactable message
			const idx = this.messages.findIndex((m) => m.isCompactable);
			if (idx === -1) break;

			// Replace with a summary
			const removed = this.messages.splice(idx, 1)[0];
			const summary = `[Step ${removed.step ?? '?'} state omitted to save tokens]`;
			this.messages.splice(idx, 0, {
				message: userMessage(summary),
				isCompactable: true,
				tokenEstimate: estimateTokens(summary),
				step: removed.step,
				category: 'compaction_summary',
				addedAt: Date.now(),
			});
		}
	}

	// ────────────────────────────────────────
	//  LLM-Based Compaction
	// ────────────────────────────────────────

	/**
	 * Run LLM-based message compaction: send the older portion of the conversation
	 * to a summarization model and replace it with a single summary message.
	 *
	 * Call this periodically (e.g. every N steps as configured in compaction.interval).
	 * Returns true if compaction was performed, false if skipped.
	 */
	async compactWithLlm(model?: LanguageModel): Promise<boolean> {
		const compactionConfig = this.options.compaction;
		if (!compactionConfig) return false;

		const llm = model ?? this.options.compactionModel;
		if (!llm) return false;

		// Only compact if enough steps have passed since last compaction
		if (
			compactionConfig.interval > 0 &&
			this.currentStep - this.lastCompactionStep < compactionConfig.interval
		) {
			return false;
		}

		const targetTokens =
			compactionConfig.targetTokens ??
			Math.floor(this.options.contextWindowSize * 0.6);

		// If we're under the target, no need to compact
		if (this.estimateTotalTokens() <= targetTokens) return false;

		// Split messages: keep the last few messages intact, summarize the rest
		const keepCount = Math.min(6, Math.floor(this.messages.length / 2));
		const toSummarize = this.messages.slice(0, this.messages.length - keepCount);
		const toKeep = this.messages.slice(this.messages.length - keepCount);

		if (toSummarize.length === 0) return false;

		// Build a transcript of the messages to summarize
		const transcript = toSummarize
			.map((m) => {
				const role = m.message.role;
				const text = extractTextContent(m.message);
				const stepLabel = m.step !== undefined ? ` (step ${m.step})` : '';
				return `[${role}${stepLabel}]: ${truncate(text, 500)}`;
			})
			.join('\n');

		const prompt = [
			systemMessage(
				'You are a conversation summarizer. Summarize the following agent-browser conversation transcript. ' +
				'Preserve key facts: URLs visited, actions taken, errors encountered, extracted data, and the current task state. ' +
				'Be concise but complete.',
			),
			userMessage(
				`Summarize this conversation transcript:\n\n${transcript}`,
			),
		];

		try {
			const completion = await llm.invoke({
				messages: prompt,
				responseSchema: CompactionSummarySchema,
				schemaName: 'CompactionSummary',
				schemaDescription: 'A concise summary of the conversation so far',
				maxTokens: compactionConfig.maxTokens,
				temperature: 0,
			});

			const summaryText = `[Conversation summary of steps 1-${toSummarize[toSummarize.length - 1]?.step ?? '?'}]\n${completion.parsed.summary}`;

			// Replace the summarized messages with a single summary
			this.messages = [
				{
					message: userMessage(summaryText),
					isCompactable: false, // Don't re-compact the summary
					tokenEstimate: estimateTokens(summaryText),
					category: 'compaction_summary',
					addedAt: Date.now(),
				},
				...toKeep,
			];

			this.lastCompactionStep = this.currentStep;
			return true;
		} catch {
			// If LLM compaction fails, fall back to basic compaction silently
			return false;
		}
	}

	/**
	 * Check whether LLM compaction should run at the current step.
	 * This is a convenience check; the caller can use it to decide whether
	 * to call compactWithLlm().
	 */
	shouldCompactWithLlm(): boolean {
		const config = this.options.compaction;
		if (!config || config.interval <= 0) return false;
		return (
			this.currentStep - this.lastCompactionStep >= config.interval &&
			this.estimateTotalTokens() > (config.targetTokens ?? this.options.contextWindowSize * 0.6)
		);
	}

	// ────────────────────────────────────────
	//  History Items & Description
	// ────────────────────────────────────────

	private recordConversationEntry(
		step: number,
		category: MessageCategory,
		content: string,
		hasScreenshot?: boolean,
	): void {
		this.historyItems.push({
			step,
			category,
			summary: truncate(content, 120),
			content: truncate(content, 2000),
			hasScreenshot,
			timestamp: Date.now(),
		});
	}

	/**
	 * Build a human-readable description of the agent's history,
	 * with "N steps omitted" truncation for long histories.
	 *
	 * @param stepLimitShown Maximum number of steps to show in full detail.
	 *   If the history is longer, middle steps are replaced with a "N steps omitted" line.
	 */
	agentHistoryDescription(stepLimitShown = 10): string {
		// Group history items by step
		const byStep = new Map<number, ConversationEntry[]>();
		for (const item of this.historyItems) {
			const existing = byStep.get(item.step);
			if (existing) {
				existing.push(item);
			} else {
				byStep.set(item.step, [item]);
			}
		}

		const stepNumbers = [...byStep.keys()].sort((a, b) => a - b);
		if (stepNumbers.length === 0) return '(no history)';

		const lines: string[] = [];

		if (stepNumbers.length <= stepLimitShown) {
			// Show all steps
			for (const stepNum of stepNumbers) {
				lines.push(this.formatStepDescription(stepNum, byStep.get(stepNum)!));
			}
		} else {
			// Show first few, omitted middle, last few
			const headCount = Math.ceil(stepLimitShown / 2);
			const tailCount = stepLimitShown - headCount;
			const headSteps = stepNumbers.slice(0, headCount);
			const tailSteps = stepNumbers.slice(stepNumbers.length - tailCount);
			const omittedCount = stepNumbers.length - headCount - tailCount;

			for (const stepNum of headSteps) {
				lines.push(this.formatStepDescription(stepNum, byStep.get(stepNum)!));
			}

			lines.push(`  ... (${omittedCount} steps omitted) ...`);

			for (const stepNum of tailSteps) {
				lines.push(this.formatStepDescription(stepNum, byStep.get(stepNum)!));
			}
		}

		return lines.join('\n');
	}

	private formatStepDescription(step: number, items: ConversationEntry[]): string {
		const parts = items.map((item) => {
			const prefix = item.category === 'state' ? 'State' :
				item.category === 'assistant' ? 'Agent' :
				item.category === 'action_result' ? 'Result' :
				item.category === 'user' ? 'User' : item.category;
			return `${prefix}: ${item.summary}`;
		});
		return `Step ${step}:\n  ${parts.join('\n  ')}`;
	}

	/** Get all recorded history items. */
	getConversationEntrys(): readonly ConversationEntry[] {
		return this.historyItems;
	}

	// ────────────────────────────────────────
	//  Save / Load (Conversation Persistence)
	// ────────────────────────────────────────

	/**
	 * Serialize the current state to a persistence-friendly snapshot.
	 * Screenshots are stripped (replaced with placeholder text) to keep size manageable.
	 */
	save(): ConversationManagerState {
		const serialized: SerializedTrackedMessage[] = this.messages.map((m) => ({
			role: m.message.role,
			content: extractTextContent(m.message),
			isCompactable: m.isCompactable,
			tokenEstimate: m.tokenEstimate,
			step: m.step,
			category: m.category,
		}));

		return {
			systemPrompt: this.systemPromptText,
			messages: serialized,
			historyItems: [...this.historyItems],
			currentStep: this.currentStep,
		};
	}

	/**
	 * Restore the ConversationManager from a previously saved state.
	 * This replaces all current messages and history.
	 */
	load(state: ConversationManagerState): void {
		if (state.systemPrompt) {
			this.setInstructionBuilder(state.systemPrompt);
		} else {
			this.systemPromptMessage = null;
			this.systemPromptText = null;
		}

		this.messages = state.messages.map((s) => ({
			message:
				s.role === 'assistant'
					? assistantMessage(s.content)
					: userMessage(s.content),
			isCompactable: s.isCompactable,
			tokenEstimate: s.tokenEstimate,
			step: s.step,
			category: s.category,
			addedAt: Date.now(),
		}));

		this.historyItems = [...state.historyItems];
		this.currentStep = state.currentStep;
	}

	/**
	 * Save the conversation state to a JSON file.
	 */
	async saveToFile(filePath: string): Promise<string> {
		const { writeFile, mkdir } = await import('node:fs/promises');
		const { dirname } = await import('node:path');
		await mkdir(dirname(filePath), { recursive: true });
		const json = JSON.stringify(this.save(), null, 2);
		await writeFile(filePath, json, 'utf-8');
		return filePath;
	}

	/**
	 * Load conversation state from a JSON file.
	 */
	async loadFromFile(filePath: string): Promise<void> {
		const { readFile } = await import('node:fs/promises');
		const raw = await readFile(filePath, 'utf-8');
		const state = JSON.parse(raw) as ConversationManagerState;
		this.load(state);
	}

	// ────────────────────────────────────────
	//  Accessors
	// ────────────────────────────────────────

	get messageCount(): number {
		return this.messages.length + (this.systemPromptMessage ? 1 : 0);
	}

	get step(): number {
		return this.currentStep;
	}

	clear(): void {
		this.messages = [];
		this.historyItems = [];
		this.currentStep = 0;
		this.lastCompactionStep = 0;
	}

	/**
	 * Remove all messages but preserve history items and step counter.
	 * Useful when restarting message context without losing the history summary.
	 */
	resetMessages(): void {
		this.messages = [];
		this.lastCompactionStep = 0;
	}
}


================================================
FILE: packages/core/src/agent/conversation/types.ts
================================================
import type { Message } from '../../model/messages.js';
import type { CompactionPolicy } from '../types.js';
import type { LanguageModel } from '../../model/interface.js';

// ── Message Manager Options ──

export interface ConversationManagerOptions {
	contextWindowSize: number;
	estimateTokens?: (text: string) => number;
	includeLastScreenshot: boolean;
	/** Sensitive key-value pairs to mask in outgoing messages. */
	maskedValues?: Record<string, string>;
	/** LLM-based compaction configuration. */
	compaction?: CompactionPolicy;
	/** LanguageModel used for LLM-based compaction. Ignored if compaction is not set. */
	compactionModel?: LanguageModel;
}

// ── Managed Message ──

export type MessageCategory =
	| 'system'
	| 'state'
	| 'action_result'
	| 'assistant'
	| 'user'
	| 'compaction_summary';

export interface TrackedMessage {
	message: Message;
	isCompactable: boolean;
	tokenEstimate: number;
	step?: number;
	/** Semantic category for structured history tracking. */
	category?: MessageCategory;
	/** When true, this message is only included on the next getMessages() call then removed. */
	ephemeral?: boolean;
	/** When true, this message has already been read (consumed) in an ephemeral pass. */
	ephemeralRead?: boolean;
	/** Timestamp when this message was added. */
	addedAt?: number;
}

// ── History Item ──

/**
 * A structured entry in the agent's conversation history, richer than TrackedMessage.
 * Used for building human-readable summaries and for save/load.
 */
export interface ConversationEntry {
	/** Step number this item belongs to. */
	step: number;
	/** Category of this history item. */
	category: MessageCategory;
	/** Brief human-readable summary of this item (e.g. "Clicked element 5" or "Navigated to google.com"). */
	summary: string;
	/** The full text content (truncated for large payloads). */
	content?: string;
	/** Whether this item included a screenshot. */
	hasScreenshot?: boolean;
	/** Timestamp. */
	timestamp: number;
}

// ── Message Manager State (persistence) ──

/**
 * Serializable snapshot of the ConversationManager for save/load.
 */
export interface ConversationManagerState {
	systemPrompt: string | null;
	messages: SerializedTrackedMessage[];
	historyItems: ConversationEntry[];
	/** Step count at the time of snapshot. */
	currentStep: number;
}

/**
 * Serializable form of TrackedMessage (Message content may contain base64
 * screenshots, which are replaced with placeholders during serialization).
 */
export interface SerializedTrackedMessage {
	role: string;
	content: string;
	isCompactable: boolean;
	tokenEstimate: number;
	step?: number;
	category?: MessageCategory;
}


================================================
FILE: packages/core/src/agent/conversation/utils.ts
================================================
import type { Message } from '../../model/messages.js';
import type { ContentPart } from '../../model/messages.js';

/**
 * Rough token estimation: ~4 characters per token.
 */
export function estimateTokens(text: string): number {
	return Math.ceil(text.length / 4);
}

export function estimateMessageTokens(content: string | unknown[]): number {
	if (typeof content === 'string') {
		return estimateTokens(content);
	}

	let total = 0;
	for (const part of content) {
		if (typeof part === 'object' && part !== null) {
			const p = part as Record<string, unknown>;
			if (p.type === 'text' && typeof p.text === 'string') {
				total += estimateTokens(p.text);
			} else if (p.type === 'image') {
				total += 1000; // Approximate cost for an image
			}
		}
	}
	return total;
}

// ── Sensitive Data Filtering ──

const MASK = '***';

/**
 * Replace all occurrences of each sensitive value in `text` with a mask.
 * Keys are used only for logging context; values are the secrets to redact.
 */
export function redactSensitiveValues(
	text: string,
	maskedValues: Record<string, string>,
): string {
	let result = text;
	for (const [_key, value] of Object.entries(maskedValues)) {
		if (!value) continue;
		// Escape regex special characters in the value
		const escaped = value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
		result = result.replace(new RegExp(escaped, 'g'), MASK);
	}
	return result;
}

/**
 * Deep-filter a Message, masking any sensitive values found in text content.
 * Returns a new message (does not mutate the original).
 */
export function redactMessage(
	message: Message,
	maskedValues: Record<string, string>,
): Message {
	const entries = Object.entries(maskedValues);
	if (entries.length === 0) return message;

	const content = message.content;

	if (typeof content === 'string') {
		return {
			...message,
			content: redactSensitiveValues(content, maskedValues),
		} as Message;
	}

	if (Array.isArray(content)) {
		const filtered = (content as ContentPart[]).map((part) => {
			if (part.type === 'text') {
				return {
					...part,
					text: redactSensitiveValues(part.text, maskedValues),
				};
			}
			// Images are left as-is (binary data)
			return part;
		});
		return {
			...message,
			content: filtered,
		} as Message;
	}

	return message;
}

/**
 * Filter an array of Messages, masking sensitive data in each.
 */
export function redactMessages(
	messages: Message[],
	maskedValues: Record<string, string>,
): Message[] {
	if (Object.keys(maskedValues).length === 0) return messages;
	return messages.map((m) => redactMessage(m, maskedValues));
}

/**
 * Extract the text content from a Message as a plain string.
 * For multi-part content, concatenates all text parts.
 */
export function extractTextContent(message: Message): string {
	const content = message.content;
	if (typeof content === 'string') return content;
	if (Array.isArray(content)) {
		return (content as ContentPart[])
			.filter((p): p is Extract<ContentPart, { type: 'text' }> => p.type === 'text')
			.map((p) => p.text)
			.join('\n');
	}
	return '';
}

/**
 * Truncate a string to maxLen characters, appending an ellipsis if truncated.
 */
export function truncate(text: string, maxLen: number): string {
	if (text.length <= maxLen) return text;
	return `${text.slice(0, maxLen - 3)}...`;
}


================================================
FILE: packages/core/src/agent/conversation.test.ts
================================================
import { test, expect, describe, beforeEach } from 'bun:test';
import { ConversationManager } from './conversation/service.js';
import type { ConversationManagerOptions } from './conversation/types.js';
import type { LanguageModel, InferenceOptions } from '../model/interface.js';
import type { InferenceResult } from '../model/types.js';

// ── Helpers ──

function createManager(
	overrides: Partial<ConversationManagerOptions> = {},
): ConversationManager {
	return new ConversationManager({
		contextWindowSize: 10000,
		includeLastScreenshot: true,
		...overrides,
	});
}

function createMockModel(summary = 'Summary of the conversation'): LanguageModel {
	return {
		modelId: 'test-model',
		provider: 'custom',
		invoke: async <T>(_options: InferenceOptions<T>): Promise<InferenceResult<T>> => {
			return {
				parsed: { summary } as unknown as T,
				usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
				finishReason: 'stop',
			};
		},
	};
}

// ── Tests ──

describe('ConversationManager', () => {
	let mm: ConversationManager;

	beforeEach(() => {
		mm = createManager();
	});

	describe('system prompt', () => {
		test('setInstructionBuilder stores the system prompt', () => {
			mm.setInstructionBuilder('You are a helpful assistant');
			const messages = mm.getMessages();
			expect(messages[0]).toEqual({
				role: 'system',
				content: 'You are a helpful assistant',
			});
		});

		test('system prompt appears first in getMessages', () => {
			mm.setInstructionBuilder('System');
			mm.addStateMessage('State text', undefined, 1);
			const messages = mm.getMessages();
			expect(messages[0].role).toBe('system');
			expect(messages[1].role).toBe('user');
		});

		test('changing system prompt replaces the previous one', () => {
			mm.setInstructionBuilder('First');
			mm.setInstructionBuilder('Second');
			const messages = mm.getMessages();
			const systemMessages = messages.filter((m) => m.role === 'system');
			expect(systemMessages).toHaveLength(1);
			expect(systemMessages[0].content).toBe('Second');
		});
	});

	describe('addStateMessage', () => {
		test('adds a user message with state text', () => {
			mm.addStateMessage('Page state info', undefined, 1);
			const messages = mm.getMessages();
			expect(messages).toHaveLength(1);
			expect(messages[0].role).toBe('user');
		});

		test('includes screenshot when provided and vision enabled', () => {
			mm.addStateMessage('State', 'base64screenshot', 1);
			const messages = mm.getMessages();
			const content = messages[0].content;
			expect(Array.isArray(content)).toBe(true);
			if (Array.isArray(content)) {
				expect(content).toHaveLength(2);
				expect(content[0]).toEqual({ type: 'text', text: 'State' });
				expect(content[1]).toHaveProperty('type', 'image');
			}
		});

		test('excludes screenshot when vision disabled', () => {
			const noVision = createManager({ includeLastScreenshot: false });
			noVision.addStateMessage('State', 'base64screenshot', 1);
			const messages = noVision.getMessages();
			const content = messages[0].content;
			// Content should be text-only array
			expect(Array.isArray(content)).toBe(true);
			if (Array.isArray(content)) {
				expect(content).toHaveLength(1);
				expect(content[0]).toHaveProperty('type', 'text');
			}
		});

		test('updates messageCount', () => {
			expect(mm.messageCount).toBe(0);
			mm.addStateMessage('State 1', undefined, 1);
			expect(mm.messageCount).toBe(1);
			mm.addStateMessage('State 2', undefined, 2);
			expect(mm.messageCount).toBe(2);
		});
	});

	describe('addAssistantMessage', () => {
		test('adds an assistant role message', () => {
			mm.addAssistantMessage('Agent response', 1);
			const messages = mm.getMessages();
			expect(messages[0].role).toBe('assistant');
			expect(messages[0].content).toBe('Agent response');
		});
	});

	describe('addCommandResultMessage', () => {
		test('adds a user role message for action results', () => {
			mm.addCommandResultMessage('click: success', 1);
			const messages = mm.getMessages();
			expect(messages[0].role).toBe('user');
			expect(messages[0].content).toBe('click: success');
		});
	});

	describe('getMessages ordering', () => {
		test('returns messages in correct order', () => {
			mm.setInstructionBuilder('System prompt');
			mm.addStateMessage('State text', undefined, 1);
			mm.addAssistantMessage('Agent thought', 1);
			mm.addCommandResultMessage('Action result', 1);

			const messages = mm.getMessages();
			expect(messages).toHaveLength(4);
			expect(messages[0].role).toBe('system');
			expect(messages[1].role).toBe('user');
			expect(messages[2].role).toBe('assistant');
			expect(messages[3].role).toBe('user');
		});
	});

	describe('compaction - screenshot removal', () => {
		test('removes old screenshots when over token budget, keeps last', () => {
			// 3 screenshots: each ~1000 tokens for image + ~2 for text = ~3006 total.
			// Budget of 1500: after removing 2 old screenshots (saving 2000),
			// total becomes ~1006 < 1500, so compact exits successfully.
			const small = createManager({ contextWindowSize: 1500 });
			small.addStateMessage('State 1', 'screenshot1', 1);
			small.addStateMessage('State 2', 'screenshot2', 2);
			small.addStateMessage('State 3', 'screenshot3', 3);

			const messages = small.getMessages();
			// After compaction, older screenshots should be removed
			// The last message should still have its image
			const lastMessage = messages[messages.length - 1];
			const lastContent = lastMessage.content;
			expect(Array.isArray(lastContent)).toBe(true);
			if (Array.isArray(lastContent)) {
				const hasImage = lastContent.some(
					(p: any) => typeof p === 'object' && p.type === 'image',
				);
				expect(hasImage).toBe(true);

				// Older messages should have had their images removed
				const firstMsg = messages[0];
				const firstContent = firstMsg.content;
				if (Array.isArray(firstContent)) {
					const firstHasImage = firstContent.some(
						(p: any) => typeof p === 'object' && p.type === 'image',
					);
					expect(firstHasImage).toBe(false);
				}
			}
		});
	});

	describe('compaction - token budget behavior', () => {
		test('does not trigger compaction when under budget', () => {
			// Budget of 10000 means no compaction needed for a few messages
			const large = createManager({ contextWindowSize: 10000, includeLastScreenshot: false });
			large.addStateMessage('Short state', undefined, 1);
			large.addAssistantMessage('Short response', 1);

			const messages = large.getMessages();
			// No summaries should be present
			const summaryMessages = messages.filter(
				(m) =>
					typeof m.content === 'string' &&
					m.content.includes('omitted to save tokens'),
			);
			expect(summaryMessages).toHaveLength(0);
		});

		test('estimateTotalTokens reflects actual message content', () => {
			const mm2 = createManager({ contextWindowSize: 100000, includeLastScreenshot: false });
			mm2.addStateMessage('A'.repeat(400), undefined, 1); // ~100 tokens
			mm2.addStateMessage('B'.repeat(800), undefined, 2); // ~200 tokens

			const total = mm2.estimateTotalTokens();
			// Total should be roughly 300 tokens for 1200 chars
			expect(total).toBeGreaterThanOrEqual(250);
			expect(total).toBeLessThanOrEqual(400);
		});
	});

	describe('token estimation', () => {
		test('estimateTotalTokens includes system prompt', () => {
			mm.setInstructionBuilder('System prompt text');
			const tokensWithSystem = mm.estimateTotalTokens();
			expect(tokensWithSystem).toBeGreaterThan(0);
		});

		test('estimateTotalTokens grows with messages', () => {
			const before = mm.estimateTotalTokens();
			mm.addStateMessage('Some state text', undefined, 1);
			const after = mm.estimateTotalTokens();
			expect(after).toBeGreaterThan(before);
		});

		test('estimateTotalTokens counts images as ~1000 tokens', () => {
			mm.addStateMessage('Text', 'screenshot', 1);
			const tokens = mm.estimateTotalTokens();
			// Text ~4 chars = 1 token, plus ~1000 for image
			expect(tokens).toBeGreaterThanOrEqual(1000);
		});
	});

	describe('history items', () => {
		test('records history for each added message', () => {
			mm.addStateMessage('State text', undefined, 1);
			mm.addAssistantMessage('Agent response', 1);
			mm.addCommandResultMessage('Result text', 1);

			const items = mm.getConversationEntrys();
			expect(items).toHaveLength(3);
			expect(items[0].category).toBe('state');
			expect(items[1].category).toBe('assistant');
			expect(items[2].category).toBe('action_result');
		});

		test('history items include step number', () => {
			mm.addStateMessage('State', undefined, 5);
			const items = mm.getConversationEntrys();
			expect(items[0].step).toBe(5);
		});

		test('history items include truncated summary', () => {
			const longText = 'a'.repeat(200);
			mm.addStateMessage(longText, undefined, 1);
			const items = mm.getConversationEntrys();
			// Summary should be truncated to 120 chars
			expect(items[0].summary.length).toBeLessThanOrEqual(123); // 120 + '...'
		});

		test('history items track screenshot presence', () => {
			mm.addStateMessage('State', 'screenshot_data', 1);
			const items = mm.getConversationEntrys();
			expect(items[0].hasScreenshot).toBe(true);
		});
	});

	describe('agentHistoryDescription', () => {
		test('returns "(no history)" when empty', () => {
			expect(mm.agentHistoryDescription()).toBe('(no history)');
		});

		test('shows all steps when under stepLimitShown', () => {
			mm.addStateMessage('State 1', undefined, 1);
			mm.addAssistantMessage('Agent 1', 1);
			mm.addStateMessage('State 2', undefined, 2);
			mm.addAssistantMessage('Agent 2', 2);

			const desc = mm.agentHistoryDescription(10);
			expect(desc).toContain('Step 1:');
			expect(desc).toContain('Step 2:');
		});

		test('truncates with "steps omitted" when exceeding stepLimitShown', () => {
			for (let i = 1; i <= 20; i++) {
				mm.addStateMessage(`State ${i}`, undefined, i);
				mm.addAssistantMessage(`Agent ${i}`, i);
			}

			const desc = mm.agentHistoryDescription(4);
			expect(desc).toContain('steps omitted');
			// Should show first 2 and last 2 steps
			expect(desc).toContain('Step 1:');
			expect(desc).toContain('Step 2:');
			expect(desc).toContain('Step 19:');
			expect(desc).toContain('Step 20:');
		});

		test('includes category prefixes in description', () => {
			mm.addStateMessage('Page loaded', undefined, 1);
			mm.addAssistantMessage('Clicking button', 1);
			mm.addCommandResultMessage('click: success', 1);

			const desc = mm.agentHistoryDescription();
			expect(desc).toContain('State:');
			expect(desc).toContain('Agent:');
			expect(desc).toContain('Result:');
		});
	});

	describe('ephemeral messages', () => {
		test('ephemeral message appears in first getMessages call', () => {
			mm.addEphemeralMessage('Temporary instruction');
			const messages = mm.getMessages();
			const found = messages.some(
				(m) => typeof m.content === 'string' && m.content === 'Temporary instruction',
			);
			expect(found).toBe(true);
		});

		test('ephemeral message is removed after being consumed', () => {
			mm.addEphemeralMessage('Temp');

			// First call: message is present and gets marked as read
			const first = mm.getMessages();
			expect(first.some((m) => typeof m.content === 'string' && m.content === 'Temp')).toBe(true);

			// Second call: message is still in result (removal happens after building result),
			// then gets removed during consumeEphemeralMessages
			const second = mm.getMessages();

			// Third call: message is now actually gone from this.messages
			const third = mm.getMessages();
			const found = third.some(
				(m) => typeof m.content === 'string' && m.content === 'Temp',
			);
			expect(found).toBe(false);
		});

		test('ephemeral message with assistant role', () => {
			mm.addEphemeralMessage('Agent thought', 'assistant');
			const messages = mm.getMessages();
			const found = messages.find(
				(m) => m.role === 'assistant' && m.content === 'Agent thought',
			);
			expect(found).toBeDefined();
		});

		test('multiple ephemeral messages all appear then get cleaned up', () => {
			mm.addEphemeralMessage('Temp 1');
			mm.addEphemeralMessage('Temp 2');

			// First call: both present, marked as read
			const first = mm.getMessages();
			expect(first).toHaveLength(2);

			// Second call: still in result (removal after build), then removed
			mm.getMessages();

			// Third call: messages have been removed
			const third = mm.getMessages();
			expect(third).toHaveLength(0);
		});
	});

	describe('save / load round-trip', () => {
		test('save and load preserves system prompt', () => {
			mm.setInstructionBuilder('My system prompt');
			mm.addStateMessage('State 1', undefined, 1);

			const saved = mm.save();
			const restored = createManager();
			restored.load(saved);

			const messages = restored.getMessages();
			expect(messages[0].role).toBe('system');
			expect(messages[0].content).toBe('My system prompt');
		});

		test('save and load preserves messages', () => {
			mm.addStateMessage('State 1', undefined, 1);
			mm.addAssistantMessage('Response 1', 1);
			mm.addCommandResultMessage('Result 1', 1);

			const saved = mm.save();
			const restored = createManager();
			restored.load(saved);

			const messages = restored.getMessages();
			expect(messages).toHaveLength(3);
			expect(messages[0].role).toBe('user');
			expect(messages[1].role).toBe('assistant');
			expect(messages[2].role).toBe('user');
		});

		test('save and load preserves history items', () => {
			mm.addStateMessage('State 1', undefined, 1);
			mm.addAssistantMessage('Response 1', 1);

			const saved = mm.save();
			const restored = createManager();
			restored.load(saved);

			const items = restored.getConversationEntrys();
			expect(items).toHaveLength(2);
			expect(items[0].category).toBe('state');
			expect(items[1].category).toBe('assistant');
		});

		test('save and load preserves currentStep', () => {
			mm.addStateMessage('State', undefined, 7);
			const saved = mm.save();
			expect(saved.currentStep).toBe(7);

			const restored = createManager();
			restored.load(saved);
			expect(restored.step).toBe(7);
		});

		test('save strips screenshots (text only in serialized form)', () => {
			mm.addStateMessage('State with screenshot', 'base64data', 1);
			const saved = mm.save();
			// Serialized content should be text-only, no base64
			for (const msg of saved.messages) {
				expect(msg.content).not.toContain('base64data');
			}
		});

		test('load with null system prompt clears system prompt', () => {
			mm.setInstructionBuilder('Initial prompt');
			const saved = mm.save();
			saved.systemPrompt = null;

			mm.load(saved);
			const messages = mm.getMessages();
			const hasSystem = messages.some((m) => m.role === 'system');
			expect(hasSystem).toBe(false);
		});
	});

	describe('sensitive data filtering', () => {
		test('masks sensitive values in outgoing messages', () => {
			const sensitive = createManager({
				maskedValues: { password: 'secret123', apiKey: 'key-abc' },
			});
			sensitive.addStateMessage('Login with password secret123', undefined, 1);
			sensitive.addAssistantMessage('Using key-abc to authenticate', 1);

			const messages = sensitive.getMessages();

			// Text should have been masked
			const stateMsg = messages[0];
			if (typeof stateMsg.content === 'string') {
				expect(stateMsg.content).not.toContain('secret123');
				expect(stateMsg.content).toContain('***');
			} else if (Array.isArray(stateMsg.content)) {
				const textPart = stateMsg.content.find((p: any) => p.type === 'text');
				expect((textPart as any).text).not.toContain('secret123');
			}

			const assistantMsg = messages[1];
			if (typeof assistantMsg.content === 'string') {
				expect(assistantMsg.content).not.toContain('key-abc');
				expect(assistantMsg.content).toContain('***');
			}
		});

		test('no filtering when maskedValues is empty', () => {
			const noSensitive = createManager({ maskedValues: {} });
			noSensitive.addStateMessage('Plain text with secret123', undefined, 1);
			const messages = noSensitive.getMessages();

			const content = messages[0].content;
			if (Array.isArray(content)) {
				const textPart = content.find((p: any) => p.type === 'text');
				expect((textPart as any).text).toContain('secret123');
			}
		});

		test('no filtering when maskedValues is not set', () => {
			mm.addStateMessage('Text with sensitive data', undefined, 1);
			const messages = mm.getMessages();
			const content = messages[0].content;
			if (Array.isArray(content)) {
				const textPart = content.find((p: any) => p.type === 'text');
				expect((textPart as any).text).toContain('sensitive data');
			}
		});
	});

	describe('LLM-based compaction', () => {
		test('shouldCompactWithLlm returns false when no compaction config', () => {
			expect(mm.shouldCompactWithLlm()).toBe(false);
		});

		test('shouldCompactWithLlm returns false when interval not reached', () => {
			const withCompaction = createManager({
				compaction: { interval: 10, maxTokens: 500 },
			});
			// Only 1 message, interval not reached
			withCompaction.addStateMessage('State', undefined, 1);
			expect(withCompaction.shouldCompactWithLlm()).toBe(false);
		});

		test('compactWithLlm returns false without a model', async () => {
			const withCompaction = createManager({
				contextWindowSize: 100000,
				includeLastScreenshot: false,
				compaction: { interval: 1, maxTokens: 500, targetTokens: 10 },
			});
			// Add enough messages so estimateTotalTokens > targetTokens (10)
			for (let i = 1; i <= 5; i++) {
				withCompaction.addStateMessage('x'.repeat(100), undefined, i);
			}
			const result = await withCompaction.compactWithLlm();
			expect(result).toBe(false);
		});

		test('compactWithLlm performs compaction with model', async () => {
			const model = createMockModel('Summarized: visited pages and clicked buttons');
			// Use large contextWindowSize so getMessages() doesn't trigger basic compact(),
			// but low targetTokens so the LLM compaction decides to run.
			const longText = 'A'.repeat(500);
			const withCompaction = createManager({
				contextWindowSize: 100000,
				includeLastScreenshot: false,
				compaction: { interval: 1, maxTokens: 500, targetTokens: 500 },
			});

			// Add lots of messages to exceed targetTokens (500).
			// Each 500-char message = ~125 tokens. 10 messages = ~1250 tokens > 500.
			for (let i = 1; i <= 10; i++) {
				withCompaction.addStateMessage(`${longText} step ${i}`, undefined, i);
				withCompaction.addAssistantMessage(`${longText} response ${i}`, i);
			}

			const result = await withCompaction.compactWithLlm(model);
			expect(result).toBe(true);

			// After compaction, message count should be reduced
			const messages = withCompaction.getMessages();
			expect(messages.length).toBeLessThan(20);

			// First message should be the summary
			const firstContent = messages[0].content;
			expect(typeof firstContent).toBe('string');
			expect(firstContent as string).toContain('Conversation summary');
		});
	});

	describe('clear and resetMessages', () => {
		test('clear removes all messages and history', () => {
			mm.setInstructionBuilder('System');
			mm.addStateMessage('State', undefined, 1);
			mm.addAssistantMessage('Response', 1);

			mm.clear();

			expect(mm.messageCount).toBe(1); // system prompt still present via setInstructionBuilder
			expect(mm.getConversationEntrys()).toHaveLength(0);
			expect(mm.step).toBe(0);
		});

		test('resetMessages removes messages but preserves history', () => {
			mm.addStateMessage('State', undefined, 1);
			mm.addAssistantMessage('Response', 1);

			const historyBefore = mm.getConversationEntrys().length;
			mm.resetMessages();

			// Messages cleared
			const messages = mm.getMessages();
			expect(messages).toHaveLength(0);

			// History preserved
			expect(mm.getConversationEntrys()).toHaveLength(historyBefore);
		});
	});

	describe('messageCount', () => {
		test('includes system prompt in count', () => {
			mm.setInstructionBuilder('System');
			expect(mm.messageCount).toBe(1);

			mm.addStateMessage('State', undefined, 1);
			expect(mm.messageCount).toBe(2);
		});

		test('does not count system prompt when not set', () => {
			expect(mm.messageCount).toBe(0);
			mm.addStateMessage('State', undefined, 1);
			expect(mm.messageCount).toBe(1);
		});
	});

	describe('step tracking', () => {
		test('step reflects the most recent step from added messages', () => {
			mm.addStateMessage('State 1', undefined, 1);
			expect(mm.step).toBe(1);

			mm.addStateMessage('State 5', undefined, 5);
			expect(mm.step).toBe(5);
		});
	});
});


================================================
FILE: packages/core/src/agent/evaluator.ts
================================================
import type { LanguageModel } from '../model/interface.js';
import type { Message, ContentPart } from '../model/messages.js';
import { systemMessage, userMessage, imageContent, textContent } from '../model/messages.js';
import {
	EvaluationResultSchema,
	QuickCheckResultSchema,
	type EvaluationResult,
	type QuickCheckResult,
	type StepRecord,
} from './types.js';
import { createLogger } from '../logging.js';

const logger = createLogger('judge');

// ── Judge System Prompts ──

const JUDGE_SYSTEM_PROMPT = `You are an expert task completion judge. Your job is to evaluate whether a web browser automation agent completed its assigned task successfully.

You will be provided with:
1. The task description
2. A history of steps the agent took (including actions and their results)
3. Screenshots from during execution (if available)
4. Optionally, ground truth information about the expected result

Evaluate thoroughly:
- Did the agent actually complete the task, or just claim to?
- Is the extracted information correct and complete?
- Did the agent handle errors and edge cases appropriately?
- Was the agent stuck at any point without recovery?

If ground truth is provided, compare the agent's result against it.

Be strict but fair. Partial completions should be marked with lower confidence.`;

const SIMPLE_JUDGE_SYSTEM_PROMPT = `You are a quick-check validator for web browser automation results.
Given a task and the agent's final result, determine if the result appears correct.
Be concise. Focus on whether the result directly answers/completes the task.`;

export class ResultEvaluator {
	private model: LanguageModel;

	constructor(model: LanguageModel) {
		this.model = model;
	}

	/**
	 * Full evaluation with step history, screenshots, and optional ground truth.
	 * Provides detailed verdict with failure analysis.
	 */
	async evaluate(
		task: string,
		result: string,
		history: StepRecord[],
		options?: {
			expectedOutcome?: string;
			includeScreenshots?: boolean;
		},
	): Promise<EvaluationResult> {
		const messages = constructEvaluatorMessages(task, result, history, options);

		try {
			const completion = await this.model.invoke({
				messages,
				responseSchema: EvaluationResultSchema,
				schemaName: 'EvaluationResult',
				temperature: 0,
			});

			logger.info(
				`Judge verdict: complete=${completion.parsed.isComplete}, ` +
				`confidence=${completion.parsed.confidence}, ` +
				`verdict=${completion.parsed.verdict ?? 'n/a'}`,
			);

			return completion.parsed;
		} catch (error) {
			logger.error('Judge evaluation failed', error);
			return {
				isComplete: false,
				reason: `Judge evaluation failed: ${error instanceof Error ? error.message : String(error)}`,
				confidence: 0,
				verdict: 'unknown',
			};
		}
	}

	/**
	 * Lightweight always-on validation.
	 * Quick pass/fail check without detailed history analysis.
	 * Useful for running after every "done" action to catch obvious errors.
	 */
	async simpleEvaluate(
		task: string,
		result: string,
	): Promise<QuickCheckResult> {
		const messages = constructQuickCheckMessages(task, result);

		try {
			const completion = await this.model.invoke({
				messages,
				responseSchema: QuickCheckResultSchema,
				schemaName: 'QuickCheckResult',
				temperature: 0,
			});

			logger.debug(
				`Simple judge: passed=${completion.parsed.passed}, reason=${completion.parsed.reason}`,
			);

			return completion.parsed;
		} catch (error) {
			logger.error('Simple judge evaluation failed', error);
			return {
				passed: true, // Default to pass on error to avoid blocking
				reason: `Simple judge failed: ${error instanceof Error ? error.message : String(error)}`,
				shouldRetry: false,
			};
		}
	}
}

// ── Message Construction ──

/**
 * Build the full message array for detailed judge evaluation.
 * Includes step-by-step history, screenshots (if enabled), and ground truth.
 */
export function constructEvaluatorMessages(
	task: string,
	result: string,
	history: StepRecord[],
	options?: {
		expectedOutcome?: string;
		includeScreenshots?: boolean;
	},
): Message[] {
	const messages: Message[] = [
		systemMessage(JUDGE_SYSTEM_PROMPT),
	];

	// Build the evaluation prompt
	const parts: string[] = [];
	parts.push(`## Task\n${task}`);
	parts.push(`## Agent's Final Result\n${result}`);

	// Step history summary
	if (history.length > 0) {
		const stepSummaries: string[] = [];
		for (const entry of history) {
			const actions = entry.agentOutput.actions
				.map((a) => {
					const actionObj = a as Record<string, unknown>;
					return actionObj.action ?? 'unknown';
				})
				.join(', ');

			const results = entry.actionResults
				.map((r) => {
					if (r.isDone) return `DONE: ${r.extractedContent?.slice(0, 200) ?? ''}`;
					if (r.error) return `ERROR: ${r.error.slice(0, 150)}`;
					if (r.extractedContent) return `OK: ${r.extractedContent.slice(0, 150)}`;
					return r.success ? 'OK' : 'FAILED';
				})
				.join('; ');

			const evaluation = entry.agentOutput.currentState?.evaluation ?? '';
			stepSummaries.push(
				`Step ${entry.step} [${entry.browserState.url}]:\n` +
				`  Eval: ${evaluation.slice(0, 200)}\n` +
				`  Actions: ${actions}\n` +
				`  Results: ${results}`,
			);
		}

		parts.push(`## Step History (${history.length} steps)\n${stepSummaries.join('\n\n')}`);
	}

	// Ground truth
	if (options?.expectedOutcome) {
		parts.push(
			`## Ground Truth (Expected Result)\n${options.expectedOutcome}\n\n` +
			'Compare the agent\'s result against this ground truth carefully.',
		);
	}

	parts.push(
		'## Instructions\n' +
		'Evaluate the task completion. Provide:\n' +
		'- isComplete: whether the task was fully completed\n' +
		'- reason: detailed explanation\n' +
		'- confidence: 0-1 score\n' +
		'- verdict: "success", "partial", "failed", or "unknown"\n' +
		'- failureReason: if failed, explain why\n' +
		'- impossibleTask: true if the task appears impossible\n' +
		'- reachedCaptcha: true if a CAPTCHA blocked progress',
	);

	// If screenshots are requested and available, include the last few
	if (options?.includeScreenshots) {
		const screenshotEntries = history
			.filter((e) => e.browserState.screenshot)
			.slice(-3); // Last 3 screenshots

		if (screenshotEntries.length > 0) {
			const content: ContentPart[] = [
				textContent(`${parts.join('\n\n')}\n\nBelow are screenshots from the agent's execution:`),
			];

			for (const entry of screenshotEntries) {
				if (entry.browserState.screenshot) {
					content.push(
						textContent(`Screenshot from step ${entry.step} (${entry.browserState.url}):`),
					);
					content.push(imageContent(entry.browserState.screenshot));
				}
			}

			messages.push(userMessage(content));
			return messages;
		}
	}

	messages.push(userMessage(parts.join('\n\n')));
	return messages;
}

/**
 * Build messages for lightweight simple judge evaluation.
 * Only includes task and result -- no history or screenshots.
 */
export function constructQuickCheckMessages(
	task: string,
	result: string,
): Message[] {
	return [
		systemMessage(SIMPLE_JUDGE_SYSTEM_PROMPT),
		userMessage(
			`Task: ${task}\n\n` +
			`Agent's Result: ${result}\n\n` +
			'Does this result correctly complete the task? ' +
			'If not, should the agent retry with a different approach?',
		),
	];
}


================================================
FILE: packages/core/src/agent/index.ts
================================================
export { Agent, type AgentOptions } from '../agent/agent.js';
export {
	InstructionBuilder,
	StepPromptBuilder,
	buildCommandDescriptions,
	buildContextualCommands,
	buildExtractionInstructionBuilder,
	buildExtractionUserPrompt,
	clearTemplateCache,
	type PromptTemplate,
	type InstructionBuilderOptions,
	type StepInfo,
	type StepPromptBuilderOptions,
} from './instructions.js';
export { ConversationManager } from './conversation/service.js';
export {
	StallDetector,
	hashPageTree,
	hashTextContent,
	type PageSignature,
	type StallDetectorConfig,
	type StallCheckResult,
} from './stall-detector.js';
export {
	ResultEvaluator,
	constructEvaluatorMessages,
	constructQuickCheckMessages,
} from './evaluator.js';
export { ReplayRecorder, type ReplayRecorderOptions } from './replay-recorder.js';
export {
	type AgentConfig,
	type AgentState,
	type AgentDecision,
	type AgentDecisionCompact,
	type AgentDecisionDirect,
	type StepRecord,
	ExecutionLog,
	type RunOutcome,
	type Reasoning,
	type PlanStep,
	type EvaluationResult,
	type QuickCheckResult,
	type CompactionPolicy,
	type StepTelemetry,
	type ExtractedVariable,
	type AccumulatedCost,
	type StepCostBreakdown,
	type PricingTable,
	type PlanRevision,
	AgentDecisionSchema,
	AgentDecisionCompactSchema,
	AgentDecisionDirectSchema,
	ReasoningSchema,
	EvaluationResultSchema,
	QuickCheckResultSchema,
	PlanStepSchema,
	StrategyPlanSchema,
	PlanRevisionSchema,
	PRICING_TABLE,
	calculateStepCost,
	supportsDeepReasoning,
	supportsCoordinateMode,
	isCompactModel,
	DEFAULT_AGENT_CONFIG,
} from './types.js';
export type {
	ConversationManagerOptions,
	TrackedMessage,
	ConversationManagerState,
	ConversationEntry,
	SerializedTrackedMessage,
	MessageCategory,
} from './conversation/types.js';
export {
	estimateTokens,
	estimateMessageTokens,
	redactSensitiveValues,
	redactMessage,
	redactMessages,
	extractTextContent,
	truncate,
} from './conversation/utils.js';


================================================
FILE: packages/core/src/agent/instructions/instructions-compact.md
================================================
You are an AI agent that controls a web browser to complete tasks. You operate in an iterative loop: observe, decide, act, repeat.

Your task: {{task}}

<language_settings>Default: English. Match the task's language.</language_settings>

<browser_state>
Elements: `[index]<type>text</type>`. Only `[indexed]` elements are interactive. Indentation = child. `*[` = new element.
</browser_state>

<rules>
- Only interact with elements that have a numeric [index]
- If research is needed, open a **new tab** instead of reusing the current one
- If the page changes after an input action, analyze new elements (e.g., suggestions) before proceeding
- If an action sequence was interrupted, complete remaining actions in the next step
- For autocomplete fields: type text, WAIT for suggestions, click the correct one or press Enter
- Handle popups/modals/cookie banners immediately before other actions
- If blocked by captcha/login/403, try alternative approaches rather than retrying
- ALWAYS look for filter/sort options FIRST when the task specifies criteria
- Detect unproductive loops: if same URL for 3+ steps without progress, change approach
</rules>

<action_rules>
Maximum {{maxActionsPerStep}} actions per step. If the page changes after an action, remaining actions are skipped.
Check browser state each step to verify your previous action succeeded.
When chaining actions, never take consequential actions (form submissions, critical button clicks) without confirming changes occurred.
</action_rules>

<available_actions>
{{actionDescriptions}}
</available_actions>

<efficiency>
Combine actions when sensible. Do not predict actions that do not apply to the current page.
**Recommended combinations:**
- `input_text` + `click` -> Fill field and submit
- `input_text` + `input_text` -> Fill multiple fields
- `click` + `click` -> Multi-step flows (when page does not navigate between clicks)

Do not chain actions that change browser state multiple times (e.g., click then navigate). Always have one clear goal per step.
</efficiency>

<output>
Respond with valid JSON:
```json
{
  "currentState": {
    "evaluation": "One-sentence analysis of last action. State success, failure, or uncertain.",
    "memory": "1-3 sentences: progress tracking, data found, approaches tried.",
    "nextGoal": "Next immediate goal in one clear sentence."
  },
  "actions": [{"action_name": {"param": "value"}}]
}
```
Action list should NEVER be empty.
</output>

<task_completion>
Call `done` when:
- Task is fully completed
- Reached max steps (even if incomplete)
- Absolutely impossible to continue

Set `success=true` ONLY if the full task is completed. Put ALL findings in the `text` field.
Before calling done with success=true: re-read the task, verify every requirement is met, confirm actions completed via page state, ensure no data was fabricated.
</task_completion>

<error_recovery>
1. Verify state using screenshot as ground truth
2. Handle blocking popups/overlays first
3. If element not found, scroll to reveal more content
4. If action fails 2-3 times, try alternative approach
5. If blocked by login/captcha/403, try alternative sites
6. If stuck in a loop, acknowledge and change strategy
</error_recovery>


================================================
FILE: packages/core/src/agent/instructions/instructions-direct.md
================================================
You are an AI agent that controls a web browser to complete tasks. You operate in an iterative loop: observe the current page state, decide on actions, execute them, and repeat until the task is done.

Your task: {{task}}

<capabilities>
You excel at:
1. Navigating complex websites and extracting precise information
2. Automating form submissions and interactive web actions
3. Gathering and organizing information across multiple pages
4. Operating effectively in an iterative agent loop
5. Adapting strategies when encountering obstacles
</capabilities>

<language_settings>
- Default working language: **English**
- Always respond in the same language as the task description
</language_settings>

<input>
At every step, your input will consist of:
1. **Agent history**: A chronological event stream including your previous actions and their results.
2. **Browser state**: Current URL, open tabs, interactive elements indexed for actions, and visible page content.
3. **Screenshot** (when vision is enabled): A screenshot of the current page with bounding boxes around interactive elements.
</input>

<browser_state>
Browser state is given as:
- **Current URL**: The URL of the page you are currently viewing.
- **Open Tabs**: Open tabs with their IDs.
- **Interactive Elements**: All interactive elements in the format `[index]<type>text</type>` where:
  - `index`: Numeric identifier for interaction
  - `type`: HTML element type (button, input, etc.)
  - `text`: Element description

Important notes:
- Only elements with numeric indexes in `[]` are interactive
- Indentation (with tab) means the element is a child of the element above
- Elements tagged with `*[` are **new** interactive elements that appeared since the last step
- Pure text elements without `[]` are not interactive
</browser_state>

<screenshot>
If vision is enabled, you will receive a screenshot of the current page with bounding boxes around interactive elements.
- This is your **ground truth**: use it to evaluate your progress
- If an interactive element has no text in browser_state, its index is at the top center of its bounding box
</screenshot>

<rules>
Strictly follow these rules while using the browser:
- Only interact with elements that have a numeric `[index]`
- Only use indexes that are explicitly provided
- If research is needed, open a **new tab** instead of reusing the current one
- If the page changes after an action, analyze new elements before proceeding
- By default, only elements in the visible viewport are listed
- If the page is not fully loaded, use the wait action
- Use extract_content only if information is NOT visible in browser_state
- extract_content is expensive - do NOT call it multiple times on the same page
- If you fill an input field and your action sequence is interrupted, something changed (e.g., suggestions appeared)
- Complete any remaining actions from interrupted sequences in the next step
- For autocomplete fields: type text, WAIT for suggestions, click the correct one or press Enter
- If the task specifies criteria (price, rating, location, etc.), look for filter/sort options FIRST
- Handle popups, modals, cookie banners immediately before other actions
- If blocked by captcha/login/403, try alternative approaches
- Detect loops: if same URL for 3+ steps without progress, change approach
- Do not log in unless the task requires it and you have credentials
</rules>

<output_format>
## Output Format
Respond with:
1. **currentState**: Your assessment including:
   - `evaluation`: Assessment of how the last action went
   - `memory`: Important information to remember
   - `nextGoal`: The next immediate goal
2. **actions**: A list of actions to execute (max {{maxActionsPerStep}} per step)
</output_format>

<action_rules>
Maximum {{maxActionsPerStep}} actions per step, executed sequentially.
- If the page changes after an action, remaining actions are skipped and you get the new state.
- Check browser state each step to verify your previous action achieved its goal.
- When chaining actions, never take consequential actions without confirming changes occurred.
</action_rules>

<available_actions>
{{actionDescriptions}}
</available_actions>

<efficiency>
Combine actions when sensible. Do not predict actions that do not apply to the current page.

**Recommended combinations:**
- `input_text` + `input_text` + `click` -> Fill multiple fields then submit
- `input_text` + `send_keys` -> Fill a field and press Enter
- `scroll` + `scroll` -> Scroll further down

Do not try multiple paths in one step. Have one clear goal per step.
Place page-changing actions **last** in your action list.
</efficiency>

<reasoning>
Be clear and concise in your decision-making:
1. Analyze the last action result - state success, failure, or uncertain
2. Analyze browser state and screenshot to understand current position
3. If stuck, consider alternative approaches
4. Store concise, actionable context in memory
5. State your next immediate goal clearly
</reasoning>

<task_completion>
Call `done` when:
- Task is fully completed
- Reached max steps (even if incomplete)
- Absolutely impossible to continue

Rules:
- Set `success=true` ONLY if the full task is completed
- Put ALL relevant findings in the `text` field
- Call `done` as a single action - never combine with other actions

**Before calling done with success=true, verify:**
1. Re-read the original task and check every requirement
2. Verify correct count, filters, format
3. Confirm actions completed via page state/screenshot
4. Ensure no fabricated data
5. If anything is unmet or uncertain, set success to false
</task_completion>

<error_recovery>
When encountering errors:
1. Verify state using screenshot as ground truth
2. Check for blocking popups/overlays
3. If element not found, scroll to reveal content
4. If action fails 2-3 times, try alternative approach
5. If blocked by login/captcha/403, try alternative sites
6. If page structure differs from expected, re-analyze and adapt
7. If stuck in loop, acknowledge in memory and change strategy
8. If max_steps approaching, prioritize most important parts
</error_recovery>

<examples>
**Good evaluation examples:**
- "Successfully navigated to the product page and found the target information. Verdict: Success"
- "Failed to input text into the search bar - element not visible. Verdict: Failure"

**Good memory examples:**
- "Visited 2 of 5 target websites. Collected pricing from Amazon ($39.99) and eBay ($42.00). Still need Walmart, Target, Best Buy."
- "Search returned results but no filter applied. User wants items under $50 with 4+ stars. Will apply price filter first."

**Good next goal examples:**
- "Click 'Add to Cart' to proceed with purchase flow."
- "Apply price filter to narrow results to items under $50."
</examples>

<critical_reminders>
1. ALWAYS verify action success using screenshot/browser state
2. ALWAYS handle popups/modals before other actions
3. ALWAYS apply filters when task specifies criteria
4. NEVER repeat failing actions more than 2-3 times
5. NEVER assume success without verification
6. Track progress in memory to avoid loops
7. Match requested output format exactly
8. Be efficient - combine actions when possible
</critical_reminders>


================================================
FILE: packages/core/src/agent/instructions/instructions.md
================================================
You are an AI agent that controls a web browser to complete tasks. You operate in an iterative loop: observe the current page state, decide on actions, execute them, and repeat until the task is done.

Your task: {{task}}

<capabilities>
You excel at:
1. Navigating complex websites and extracting precise information
2. Automating form submissions and interactive web actions
3. Gathering and organizing information across multiple pages
4. Operating effectively in an iterative agent loop
5. Adapting strategies when encountering obstacles
</capabilities>

<language_settings>
- Default working language: **English**
- Always respond in the same language as the task description
</language_settings>

<input>
At every step, your input will consist of:
1. **Agent history**: A chronological event stream including your previous actions and their results.
2. **Browser state**: Current URL, open tabs, interactive elements indexed for actions, and visible page content.
3. **Screenshot** (when vision is enabled): A screenshot of the current page with bounding boxes around interactive elements.
</input>

<browser_state>
Browser state is given as:
- **Current URL**: The URL of the page you are currently viewing.
- **Open Tabs**: Open tabs with their IDs.
- **Interactive Elements**: All interactive elements in the format `[index]<type>text</type>` where:
  - `index`: Numeric identifier for interaction
  - `type`: HTML element type (button, input, etc.)
  - `text`: Element description

Examples:
```
[33]<div>User form</div>
	*[35]<button aria-label='Submit form'>Submit</button>
```

Important notes:
- Only elements with numeric indexes in `[]` are interactive
- Indentation (with tab) means the element is a child of the element above
- Elements tagged with `*[` are **new** interactive elements that appeared since the last step. Your previous actions caused that change. Consider if you need to interact with them.
- Pure text elements without `[]` are not interactive
</browser_state>

<screenshot>
If vision is enabled, you will receive a screenshot of the current page with bounding boxes around interactive elements.
- This is your **ground truth**: use it to evaluate your progress
- If an interactive element has no text in browser_state, its index is written at the top center of its bounding box in the screenshot
- Use the screenshot action if you need more visual information
</screenshot>

<rules>
Strictly follow these rules while using the browser:

**Element Interaction:**
- Only interact with elements that have a numeric `[index]` assigned
- Only use indexes that are explicitly provided in the current browser state
- If a page changes after an action (e.g., input text triggers suggestions), analyze new elements before proceeding

**Navigation:**
- If research is needed, open a **new tab** instead of reusing the current one
- By default, only elements in the visible viewport are listed
- If the page is not fully loaded, use the wait action

**Content Extraction:**
- Use extract_content on specific pages to gather structured information from the entire page, including parts not currently visible
- Only call extract_content if the information is NOT already visible in browser_state - prefer using text directly from browser_state
- extract_content is expensive - do NOT call it multiple times with the same query on the same page

**Input Handling:**
- If you fill an input field and your action sequence is interrupted, something likely changed (e.g., suggestions appeared)
- If the action sequence was interrupted in a previous step, complete any remaining actions that were not executed
- For autocomplete/combobox fields: type your text, then WAIT for suggestions in the next step. If suggestions appear (marked with `*[`), click the correct one. If none appear, press Enter.
- After input, you may need to press Enter, click a search button, or select from a dropdown

**Filters and Criteria:**
- If the task includes specific criteria (product type, rating, price, location, etc.), ALWAYS look for filter/sort options FIRST before browsing results

**Error Recovery:**
- If a captcha appears, attempt solving it. If blocked after 3-4 steps, try alternative approaches or report the limitation
- Handle popups, modals, cookie banners, and overlays immediately before other actions
- If you encounter access denied (403), bot detection, or rate limiting, do NOT retry the same URL repeatedly - try alternatives
- Detect and break out of unproductive loops: if you are on the same URL for 3+ steps without progress, or the same action fails 2-3 times, try a different approach

**Authentication:**
- Do not log into a page unless required by the task and you have credentials
</rules>

<output_format>
## Output Format
Respond with:
1. **currentState**: Your assessment of the current state including:
   - `evaluation`: Assessment of how the last action went
   - `memory`: Important information to remember (progress, data found, approaches tried)
   - `nextGoal`: The next immediate goal to pursue
2. **actions**: A list of actions to execute (max {{maxActionsPerStep}} per step)
</output_format>

<action_rules>
You are allowed to use a maximum of {{maxActionsPerStep}} actions per step.
Multiple actions execute sequentially (one after another).
- If the page changes after an action, remaining actions are automatically skipped and you get the new state.
- Check the browser state each step to verify your previous action achieved its goal.
</action_rules>

<available_actions>
{{actionDescriptions}}
</available_actions>

<efficiency>
You can output multiple actions in one step. Be efficient where it makes sense, but do not predict actions that do not make sense for the current page.

**Action categories:**
- **Page-changing (always last):** navigate, search_google, go_back, switch_tab - these always change the page. Remaining actions after them are skipped automatically.
- **Potentially page-changing:** click (on links/buttons that navigate) - monitored at runtime; if the page changes, remaining actions are skipped.
- **Safe to chain:** input_text, scroll, extract_content, find_elements - these do not change the page and can be freely combined.

**Recommended combinations:**
- `input_text` + `input_text` + `click` -> Fill multiple form fields then submit
- `input_text` + `send_keys` -> Fill a field and press Enter
- `scroll` + `scroll` -> Scroll further down the page

Do not try multiple different paths in one step. Always have one clear goal per step.
Place any page-changing action **last** in your action list.
</efficiency>

<reasoning>
You must reason systematically at every step:
1. Analyze the most recent action result - clearly state success, failure, or uncertainty. Never assume success without verification.
2. Analyze browser state, screenshot, and history to understand current position relative to the task.
3. If stuck (same actions repeated without progress), consider alternative approaches.
4. Decide what concise, actionable context should be stored in memory.
5. State your next immediate goal clearly.
</reasoning>

<task_completion>
You must use the `done` action when:
- You have fully completed the task
- You reach the final allowed step, even if the task is incomplete
- It is absolutely impossible to continue

Rules for `done`:
- Set `success` to `true` only if the FULL task has been completed
- If any part is missing, incomplete, or uncertain, set `success` to `false`
- Put ALL relevant findings in the `text` field
- You are ONLY allowed to call `done` as a single action - never combine it with other actions

**Before calling done with success=true, verify:**
1. Re-read the original task and list every concrete requirement
2. Check each requirement against your results (correct count, filters applied, format matched)
3. Verify actions actually completed (check page state/screenshot)
4. Ensure no data was fabricated - every fact must come from pages you visited
5. If ANY requirement is unmet or uncertain, set success to false
</task_completion>

<budget_management>
- When you reach 75% of your step budget, critically evaluate whether you can complete the full task in remaining steps
- If completion is unlikely, shift strategy: focus on highest-value remaining items and consolidate results
- For large multi-item tasks, estimate per-item cost from the first few items and prioritize if the task will exceed your budget
</budget_management>

<error_recovery>
When encountering errors or unexpected states:
1. Verify the current state using screenshot as ground truth
2. Check if a popup, modal, or overlay is blocking interaction
3. If an element is not found, scroll to reveal more content
4. If an action fails repeatedly (2-3 times), try an alternative approach
5. If blocked by login/captcha/403, consider alternative sites or search engines
6. If the page structure is different than expected, re-analyze and adapt
7. If stuck in a loop, explicitly acknowledge it in memory and change strategy
8. If max_steps is approaching, prioritize completing the most important parts
</error_recovery>

<examples>
**Good evaluation examples:**
- "Successfully navigated to the product page and found the target information. Verdict: Success"
- "Failed to input text into the search bar - element not visible. Verdict: Failure"

**Good memory examples:**
- "Visited 2 of 5 target websites. Collected pricing data from Amazon ($39.99) and eBay ($42.00). Still need Walmart, Target, Best Buy."
- "Search returned results but no filter applied yet. User wants items under $50 with 4+ stars. Will apply price filter first."
- "Captcha appeared twice on this site. Will try alternative approach via search engine."

**Good next goal examples:**
- "Click the 'Add to Cart' button to proceed with the purchase flow."
- "Apply price filter to narrow results to items under $50."
- "Close the popup blocking the main content."
</examples>

<critical_reminders>
1. ALWAYS verify action success using screenshot/browser state before proceeding
2. ALWAYS handle popups/modals/cookie banners before other actions
3. ALWAYS apply filters when the task specifies criteria
4. NEVER repeat the same failing action more than 2-3 times
5. NEVER assume success without verification
6. Track progress in memory to avoid loops
7. Match the task's requested output format exactly
8. Be efficient - combine actions when possible but verify between major steps
</critical_reminders>


================================================
FILE: packages/core/src/agent/instructions.ts
================================================
import { readFileSync } from 'node:fs';
import { resolve, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';

import type { AgentConfig } from './types.js';
import type { ViewportSnapshot, TabDescriptor } from '../viewport/types.js';
import type { CommandCatalog } from '../commands/catalog/catalog.js';
import type { ContentPart } from '../model/messages.js';
import { textContent, imageContent } from '../model/messages.js';
import { isNewTabPage, sanitizeSurrogates, dedent } from '../utils.js';

// ── Template types ──

export type PromptTemplate = 'default' | 'flash' | 'no-thinking';

export interface InstructionBuilderOptions {
	/** Maximum actions the agent can take per step. */
	commandsPerStep: number;
	/** Override the entire system prompt with a custom string. */
	overrideInstructionBuilder?: string;
	/** Append additional instructions to the system prompt. */
	extendInstructionBuilder?: string;
	/** Which template variant to use. Defaults to 'default'. */
	template?: PromptTemplate;
	/** Whether to include sensitive-data warnings. */
	hasSensitiveData?: boolean;
}

export interface StepInfo {
	step: number;
	stepLimit: number;
}

export interface StepPromptBuilderOptions {
	browserState: ViewportSnapshot;
	task: string;
	stepInfo?: StepInfo;
	actionDescriptions?: string;
	pageFilteredActions?: string;
	agentHistoryDescription?: string;
	maskedValues?: string;
	planDescription?: string;
	screenshots?: string[];
	enableScreenshots?: boolean;
	maxElementsLength?: number;
}

// ── Template loading ──

/**
 * Directory containing the .md system prompt templates.
 * Resolved relative to this file's location so it works regardless of
 * the current working directory or whether the package is installed.
 */
const TEMPLATES_DIR = resolve(dirname(fileURLToPath(import.meta.url)), 'instructions');

/** Cache loaded templates so we only hit the filesystem once per variant. */
const templateCache = new Map<string, string>();

/**
 * Map from PromptTemplate variant to the corresponding filename.
 */
const TEMPLATE_FILES: Record<PromptTemplate, string> = {
	default: 'instructions.md',
	flash: 'instructions-compact.md',
	'no-thinking': 'instructions-direct.md',
};

/**
 * Load a system-prompt template from disk. Results are cached.
 *
 * @param variant - Which prompt template to load.
 * @returns The raw template string with `{{variable}}` placeholders.
 * @throws If the template file cannot be read.
 */
function loadTemplate(variant: PromptTemplate): string {
	const cached = templateCache.get(variant);
	if (cached !== undefined) return cached;

	const filename = TEMPLATE_FILES[variant];
	const filepath = resolve(TEMPLATES_DIR, filename);

	try {
		const content = readFileSync(filepath, 'utf-8');
		templateCache.set(variant, content);
		return content;
	} catch (error) {
		const message = error instanceof Error ? error.message : String(error);
		throw new Error(`Failed to load system prompt template "${filename}": ${message}`);
	}
}

/**
 * Interpolate `{{key}}` placeholders in a template string.
 * Unmatched placeholders are left as-is so downstream code can detect them.
 */
function interpolate(template: string, variables: Record<string, string>): string {
	return template.replace(/\{\{(\w+)\}\}/g, (match, key: string) => {
		return key in variables ? variables[key] : match;
	});
}

/**
 * Clear the template cache. Useful for testing or hot-reloading.
 */
export function clearTemplateCache(): void {
	templateCache.clear();
}

// ── InstructionBuilder ──

/**
 * Builds the system prompt for the browser automation agent.
 *
 * In the simplest case it loads a `.md` template from the `system-prompts/`
 * directory and interpolates variables like `{{task}}`, `{{commandsPerStep}}`,
 * and `{{actionDescriptions}}`.
 *
 * The class also exposes static helpers for building per-step state messages,
 * action results, and other ancillary prompt fragments that are injected as
 * user messages during the agent loop.
 */
export class InstructionBuilder {
	private options: InstructionBuilderOptions;
	private actionDescriptions: string;

	constructor(options: InstructionBuilderOptions, actionDescriptions: string) {
		this.options = options;
		this.actionDescriptions = actionDescriptions;
	}

	/**
	 * Build and return the complete system prompt string.
	 *
	 * If `overrideInstructionBuilder` is set, it is returned verbatim (after
	 * optional extension). Otherwise, the appropriate `.md` template is
	 * loaded and interpolated with the current settings.
	 */
	build(): string {
		if (this.options.overrideInstructionBuilder) {
			let prompt = this.options.overrideInstructionBuilder;
			if (this.options.extendInstructionBuilder) {
				prompt += `\n${this.options.extendInstructionBuilder}`;
			}
			return prompt;
		}

		const variant = this.options.template ?? 'default';
		const template = loadTemplate(variant);

		const variables: Record<string, string> = {
			task: '(set per-step in user messages)',
			commandsPerStep: String(this.options.commandsPerStep),
			actionDescriptions: this.actionDescriptions,
		};

		let prompt = interpolate(template, variables);

		if (this.options.extendInstructionBuilder) {
			prompt += `\n${this.options.extendInstructionBuilder}`;
		}

		return prompt;
	}

	/**
	 * Convenience: create a InstructionBuilder from AgentConfig + a CommandCatalog.
	 * Pulls action descriptions directly from the registry, optionally
	 * filtered by the current page URL.
	 */
	static fromSettings(settings: AgentConfig, registry: CommandCatalog, pageUrl?: string): InstructionBuilder {
		const descriptions = registry.getPromptDescription(pageUrl);

		return new InstructionBuilder(
			{
				commandsPerStep: settings.commandsPerStep,
				overrideInstructionBuilder: settings.overrideInstructionBuilder,
				extendInstructionBuilder: settings.extendInstructionBuilder,
				hasSensitiveData: settings.maskedValues !== undefined,
			},
			descriptions,
		);
	}

	// ── Static prompt fragment builders ──

	static buildTaskPrompt(task: string): string {
		return `Your current task: ${task}`;
	}

	static buildStatePrompt(
		url: string,
		title: string,
		tabs: Array<{ url: string; title: string; isActive: boolean }>,
		domTree: string,
		step: number,
		stepLimit: number,
		pixelsAbove?: number,
		pixelsBelow?: number,
	): string {
		const parts: string[] = [];

		parts.push(`[Step ${step}/${stepLimit}]`);
		parts.push(`Current URL: ${url}`);
		parts.push(`Page Title: ${title}`);

		if (tabs.length > 1) {
			const tabList = tabs
				.map((t, i) => `  [${i}] ${t.isActive ? '(active) ' : ''}${t.title} - ${t.url}`)
				.join('\n');
			parts.push(`Open Tabs:\n${tabList}`);
		}

		if (pixelsAbove !== undefined && pixelsAbove > 0) {
			parts.push(`Scroll position: ${pixelsAbove}px from top`);
		}
		if (pixelsBelow !== undefined && pixelsBelow > 0) {
			parts.push(`${pixelsBelow}px of content below the visible area`);
		}

		parts.push(`\nPage content:\n${domTree}`);

		return parts.join('\n');
	}

	static buildCommandResultPrompt(results: Array<{ action: string; result: string }>): string {
		if (results.length === 0) return '';

		const formatted = results
			.map((r) => `Action: ${r.action}\nResult: ${r.result}`)
			.join('\n---\n');

		return `Previous action results:\n${formatted}`;
	}

	static buildLoopNudge(message: string): string {
		return `\nIMPORTANT: ${message}`;
	}

	static buildPlanPrompt(currentPlan: string): string {
		return `\nCurrent plan:\n${currentPlan}`;
	}
}

// ── StepPromptBuilder ──

/**
 * Constructs the per-step user message for the agent.
 *
 * Each step of the agent loop sends a user message containing:
 * - The current browser state (URL, tabs, interactive elements)
 * - Scroll position and page boundaries
 * - Agent history summary
 * - Step information (step N of M)
 * - Optionally: screenshots, sensitive data warnings, plan description
 * - Optionally: page-specific action descriptions
 *
 * The message can be returned as a plain string or as a multipart content
 * array (text + images) when vision is enabled.
 */
export class StepPromptBuilder {
	private browserState: ViewportSnapshot;
	private task: string;
	private stepInfo?: StepInfo;
	private actionDescriptions?: string;
	private pageFilteredActions?: string;
	private agentHistoryDescription?: string;
	private maskedValues?: string;
	private planDescription?: string;
	private screenshots: string[];
	private enableScreenshots: boolean;
	private maxElementsLength: number;

	constructor(options: StepPromptBuilderOptions) {
		this.browserState = options.browserState;
		this.task = options.task;
		this.stepInfo = options.stepInfo;
		this.actionDescriptions = options.actionDescriptions;
		this.pageFilteredActions = options.pageFilteredActions;
		this.agentHistoryDescription = options.agentHistoryDescription;
		this.maskedValues = options.maskedValues;
		this.planDescription = options.planDescription;
		this.screenshots = options.screenshots ?? [];
		this.enableScreenshots = options.enableScreenshots ?? false;
		this.maxElementsLength = options.maxElementsLength ?? 40_000;
	}

	/**
	 * Build the user message content.
	 *
	 * When vision is disabled (or no screenshots are available), returns a
	 * single string. When vision is enabled and screenshots exist, returns
	 * a `ContentPart[]` array interleaving text and image parts.
	 */
	getUserMessage(): string | ContentPart[] {
		// Skip screenshots on step 0 for new-tab pages with a single tab
		let effectiveVision = this.enableScreenshots;
		if (
			isNewTabPage(this.browserState.url) &&
			this.stepInfo?.step === 0 &&
			this.browserState.tabs.length <= 1
		) {
			effectiveVision = false;
		}

		const stateDescription = this.buildStateDescription();

		if (effectiveVision && this.screenshots.length > 0) {
			const parts: ContentPart[] = [textContent(stateDescription)];

			for (let i = 0; i < this.screenshots.length; i++) {
				const label =
					i === this.screenshots.length - 1 ? 'Current screenshot:' : 'Previous screenshot:';
				parts.push(textContent(label));
				parts.push(imageContent(this.screenshots[i], 'image/png'));
			}

			return parts;
		}

		return stateDescription;
	}

	/**
	 * Build the complete text description of the current state.
	 * This includes agent history, agent state (task, step info, plan),
	 * and browser state (URL, tabs, elements, scroll position).
	 */
	private buildStateDescription(): string {
		const sections: string[] = [];

		// Agent history
		sections.push(this.buildAgentHistorySection());

		// Agent state (task, step info, plan, sensitive data)
		sections.push(this.buildAgentStateSection());

		// Browser state (URL, tabs, elements)
		sections.push(this.buildBrowserStateSection());

		// Page-specific actions (if any domain-filtered actions apply)
		if (this.pageFilteredActions) {
			sections.push(
				`<page_specific_actions>\n${this.pageFilteredActions}\n</page_specific_actions>`,
			);
		}

		// Sanitize surrogates to prevent JSON serialization issues
		return sanitizeSurrogates(sections.join('\n\n'));
	}

	private buildAgentHistorySection(): string {
		const history = this.agentHistoryDescription?.trim() ?? '';
		return `<agent_history>\n${history}\n</agent_history>`;
	}

	private buildAgentStateSection(): string {
		const parts: string[] = [];

		parts.push(`<user_request>\n${this.task}\n</user_request>`);

		if (this.planDescription) {
			parts.push(`<plan>\n${this.planDescription}\n</plan>`);
		}

		if (this.maskedValues) {
			parts.push(`<sensitive_data>${this.maskedValues}</sensitive_data>`);
		}

		if (this.stepInfo) {
			const today = new Date().toISOString().slice(0, 10);
			parts.push(
				`<step_info>Step ${this.stepInfo.step + 1} of ${this.stepInfo.stepLimit} | Today: ${today}</step_info>`,
			);
		}

		return `<agent_state>\n${parts.join('\n')}\n</agent_state>`;
	}

	private buildBrowserStateSection(): string {
		const parts: string[] = [];

		// Tabs
		const tabsText = this.buildTabsText();
		if (tabsText) {
			parts.push(tabsText);
		}

		// Scroll / page info
		const pageInfo = this.buildPageInfoText();
		if (pageInfo) {
			parts.push(pageInfo);
		}

		// Interactive elements
		parts.push(this.buildElementsText());

		return `<browser_state>\n${parts.join('\n')}\n</browser_state>`;
	}

	private buildTabsText(): string {
		const { tabs, url, title } = this.browserState;
		if (tabs.length === 0) return '';

		// Try to identify the current tab
		const currentCandidates = tabs.filter((t) => t.url === url && t.title === title);
		const currentTabId =
			currentCandidates.length === 1 ? currentCandidates[0].tabId : undefined;

		const lines: string[] = [];
		if (currentTabId) {
			lines.push(`Current tab: ${String(currentTabId).slice(-4)}`);
		}

		lines.push('Available tabs:');
		for (const tab of tabs) {
			lines.push(`Tab ${String(tab.tabId).slice(-4)}: ${tab.url} - ${tab.title.slice(0, 30)}`);
		}

		return lines.join('\n');
	}

	private buildPageInfoText(): string {
		const { pixelsAbove, pixelsBelow } = this.browserState;
		const parts: string[] = [];

		if (pixelsAbove !== undefined && pixelsAbove > 0) {
			// Estimate "pages above" assuming ~900px viewport height
			const pagesAbove = (pixelsAbove / 900).toFixed(1);
			parts.push(`${pagesAbove} pages above`);
		}
		if (pixelsBelow !== undefined && pixelsBelow > 0) {
			const pagesBelow = (pixelsBelow / 900).toFixed(1);
			parts.push(`${pagesBelow} pages below`);
		}

		if (parts.length === 0) return '';
		return `<page_info>${parts.join(', ')}</page_info>`;
	}

	private buildElementsText(): string {
		let elementsText = this.browserState.domTree ?? '';

		if (!elementsText) {
			return 'Interactive elements:\nempty page';
		}

		// Truncate if too long
		let truncatedNote = '';
		if (elementsText.length > this.maxElementsLength) {
			elementsText = elementsText.slice(0, this.maxElementsLength);
			truncatedNote = ` (truncated to ${this.maxElementsLength} characters)`;
		}

		// Add start/end of page markers based on scroll position
		const hasContentAbove =
			this.browserState.pixelsAbove !== undefined && this.browserState.pixelsAbove > 0;
		const hasContentBelow =
			this.browserState.pixelsBelow !== undefined && this.browserState.pixelsBelow > 0;

		if (!hasContentAbove) {
			elementsText = `[Start of page]\n${elementsText}`;
		}
		if (!hasContentBelow) {
			elementsText = `${elementsText}\n[End of page]`;
		}

		return `Interactive elements${truncatedNote}:\n${elementsText}`;
	}
}

// ── Dynamic action descriptions ──

/**
 * Build action descriptions from a registry, optionally filtered by
 * the current page URL. Returns a formatted string suitable for
 * injection into the system prompt's `{{actionDescriptions}}` slot.
 */
export function buildCommandDescriptions(registry: CommandCatalog, pageUrl?: string): string {
	return registry.getPromptDescription(pageUrl);
}

/**
 * Build a description of actions that are specific to the current page's domain.
 * Returns `undefined` if there are no domain-specific actions beyond the
 * universal set.
 *
 * This is injected as a `<page_specific_actions>` section in the per-step
 * user message when the page URL triggers extra actions.
 */
export function buildContextualCommands(registry: CommandCatalog, pageUrl: string): string | undefined {
	const allActions = registry.getAll();
	const domainActions = registry.getActionsForDomain(extractDomain(pageUrl));

	// If all actions are already shown (no domain filtering), nothing extra to show
	if (domainActions.length === allActions.length) return undefined;

	// Find domain-specific actions (ones that have a domainFilter)
	const extraActions = domainActions.filter(
		(a) => a.domainFilter && a.domainFilter.length > 0,
	);

	if (extraActions.length === 0) return undefined;

	const lines = extraActions.map(
		(a) => `- ${a.name}: ${a.description}`,
	);

	return `The following actions are available on this page:\n${lines.join('\n')}`;
}

// ── Rerun / extraction prompt helpers ──

/**
 * Build a system prompt for the extraction/AI-step action used during reruns.
 */
export function buildExtractionInstructionBuilder(): string {
	return dedent(`
		You are an expert at extracting data from webpages.

		You will be given:
		1. A query describing what to extract
		2. The markdown of the webpage (filtered to remove noise)
		3. Optionally, a screenshot of the current page state

		Instructions:
		- Extract information from the webpage that is relevant to the query
		- ONLY use the information available in the webpage - do not make up information
		- If the information is not available, mention that clearly
		- If the query asks for all items, list all of them

		Output:
		- Present ALL relevant information in a concise way
		- Do not use conversational format - directly output the relevant information
		- If information is unavailable, state that clearly
	`);
}

/**
 * Build a user prompt for the extraction/AI-step action.
 */
export function buildExtractionUserPrompt(
	query: string,
	statsSummary: string,
	content: string,
): string {
	return [
		`<query>\n${query}\n</query>`,
		`<content_stats>\n${statsSummary}\n</content_stats>`,
		`<webpage_content>\n${content}\n</webpage_content>`,
	].join('\n\n');
}

// ── Helpers ──

function extractDomain(url: string): string {
	try {
		return new URL(url).hostname.replace(/^www\./, '').toLowerCase();
	} catch {
		return '';
	}
}


================================================
FILE: packages/core/src/agent/replay-recorder.ts
================================================
import * as fs from 'node:fs';
import * as path from 'node:path';
import { createLogger } from '../logging.js';

const logger = createLogger('gif-recorder');

export interface ReplayRecorderOptions {
	/** Output file path. Extension determines format (.gif or .png for fallback). */
	outputPath: string;
	/** Delay between frames in milliseconds */
	frameDelay?: number;
	/** Resize frames to this width (maintains aspect ratio). 0 = no resize. */
	resizeWidth?: number;
	/** Quality (1-30, lower = better quality). Only used for GIF encoding. */
	quality?: number;
}

interface FrameData {
	buffer: Buffer;
	stepNumber: number;
	label?: string;
}

/**
 * Records agent screenshots and encodes them into an animated GIF.
 *
 * Uses the `sharp` library (optional dependency) for image processing
 * and compositing step-number overlays. If sharp is not available,
 * falls back to saving individual PNG frames.
 *
 * Usage:
 *   const recorder = new ReplayRecorder({ outputPath: './recording.gif' });
 *   recorder.addFrame(screenshotBase64, 1);
 *   // ... more frames ...
 *   await recorder.save(); // -> path to GIF or frames directory
 */
export class ReplayRecorder {
	private frames: FrameData[] = [];
	private outputPath: string;
	private frameDelay: number;
	private resizeWidth: number;
	private quality: number;

	constructor(options: ReplayRecorderOptions) {
		this.outputPath = options.outputPath;
		this.frameDelay = options.frameDelay ?? 500;
		this.resizeWidth = options.resizeWidth ?? 800;
		this.quality = options.quality ?? 10;
	}

	/**
	 * Add a screenshot frame to the recording.
	 * @param screenshotBase64 - PNG screenshot as base64 string
	 * @param stepNumber - Step number for the overlay annotation
	 * @param label - Optional label text (e.g., the action taken)
	 */
	addFrame(screenshotBase64: string, stepNumber?: number, label?: string): void {
		const buffer = Buffer.from(screenshotBase64, 'base64');
		this.frames.push({
			buffer,
			stepNumber: stepNumber ?? this.frames.length + 1,
			label,
		});
	}

	/**
	 * Save the recording. Attempts GIF encoding with sharp, falls back
	 * to individual PNG frames if sharp is not available.
	 *
	 * @param generateGif - true to generate a GIF, 'path' to override output path,
	 *                      false to only save individual frames
	 * @returns The path where the recording was saved
	 */
	async save(generateGif: string | boolean = true): Promise<string> {
		if (this.frames.length === 0) {
			logger.debug('No frames to save');
			return this.outputPath;
		}

		const effectivePath = typeof generateGif === 'string' ? generateGif : this.outputPath;
		const dir = path.dirname(effectivePath);
		if (!fs.existsSync(dir)) {
			fs.mkdirSync(dir, { recursive: true });
		}

		// Always save individual frames as fallback / debug
		await this.saveFrames(effectivePath);

		if (generateGif === false) {
			return effectivePath;
		}

		// Try to generate actual GIF using sharp
		try {
			const gifPath = await this.encodeGif(effectivePath);
			logger.info(`GIF saved: ${gifPath} (${this.frames.length} frames)`);
			return gifPath;
		} catch (error) {
			logger.warn(
				`GIF encoding failed, falling back to individual frames: ${
					error instanceof Error ? error.message : String(error)
				}`,
			);
			return effectivePath;
		}
	}

	/**
	 * Encode frames into an animated GIF using sharp.
	 * Sharp must be installed as a peer dependency.
	 */
	private async encodeGif(outputPath: string): Promise<string> {
		// Dynamic import -- sharp is an optional dependency.
		// Use indirect require to avoid TS module resolution error.
		// eslint-disable-next-line @typescript-eslint/no-explicit-any
		let sharpModule: any;
		try {
			// Indirect dynamic import avoids TS2307 for optional peer deps
			const moduleName = 'sharp';
			sharpModule = await import(/* webpackIgnore: true */ moduleName);
		} catch {
			throw new Error(
				'sharp is not installed. Install it with: npm install sharp',
			);
		}

		// Resolve the default export (handles both ESM and CJS)
		const sharp = sharpModule.default ?? sharpModule;

		const gifPath = outputPath.replace(/\.[^.]+$/, '.gif');
		const processedFrames: Buffer[] = [];

		for (const frame of this.frames) {
			let img = sharp(frame.buffer);

			// Resize if configured
			if (this.resizeWidth > 0) {
				img = img.resize(this.resizeWidth, undefined, {
					fit: 'inside',
					withoutEnlargement: true,
				});
			}

			// Composite a step number overlay onto the frame
			const overlayBuffer = this.createStepOverlaySvg(
				frame.stepNumber,
				frame.label,
			);

			img = img.composite([
				{
					input: Buffer.from(overlayBuffer),
					gravity: 'northwest',
				},
			]);

			// Convert to PNG for further processing
			const processed = await img
				.flatten({ background: { r: 255, g: 255, b: 255 } })
				.png()
				.toBuffer();

			processedFrames.push(processed);
		}

		// Attempt to assemble an animated GIF from the processed frames
		try {
			const firstFrame = sharp(processedFrames[0]);
			const metadata = await firstFrame.metadata();
			const width = metadata.width ?? this.resizeWidth;
			const height = metadata.height ?? 600;

			// Convert each frame to raw RGBA
			const rawFrames: Buffer[] = [];
			for (const frameBuffer of processedFrames) {
				const raw = await sharp(frameBuffer)
					.resize(width, height, {
						fit: 'contain',
						background: { r: 255, g: 255, b: 255 },
					})
					.raw()
					.ensureAlpha()
					.toBuffer();
				rawFrames.push(raw);
			}

			// Concatenate all raw frames and encode as animated GIF
			const combinedRaw = Buffer.concat(rawFrames);
			await sharp(combinedRaw, {
				raw: {
					width,
					height,
					channels: 4,
					pages: rawFrames.length,
				},
			})
				.gif({
					delay: Array(rawFrames.length).fill(this.frameDelay),
					loop: 0,
				})
				.toFile(gifPath);

			return gifPath;
		} catch (animatedError) {
			// If animated GIF creation fails, save the last frame as a static image
			logger.debug(
				`Animated GIF assembly failed, saving static image: ${
					animatedError instanceof Error
						? animatedError.message
						: String(animatedError)
				}`,
			);
			const lastFrame = processedFrames[processedFrames.length - 1];
			const staticPath = outputPath.replace(/\.[^.]+$/, '.png');
			await sharp(lastFrame).png().toFile(staticPath);
			return staticPath;
		}
	}

	/**
	 * Create an SVG overlay with the step number and optional label.
	 * Returns an SVG string that can be composited onto the frame.
	 */
	private createStepOverlaySvg(stepNumber: number, label?: string): string {
		const labelText = label ? ` - ${label.slice(0, 40)}` : '';
		const text = `Step ${stepNumber}${labelText}`;
		const width = Math.max(200, text.length * 10 + 20);
		const height = 36;

		return `<svg width="${width}" height="${height}" xmlns="http://www.w3.org/2000/svg">
			<rect x="0" y="0" width="${width}" height="${height}" rx="4" fill="rgba(0,0,0,0.7)"/>
			<text x="10" y="24" font-family="monospace" font-size="16" fill="white">${this.escapeXml(text)}</text>
		</svg>`;
	}

	/**
	 * Save individual PNG frames to a directory alongside the output path.
	 */
	private async saveFrames(outputPath: string): Promise<string> {
		const framesDir = outputPath.replace(/\.[^.]+$/, '_frames');
		if (!fs.existsSync(framesDir)) {
			fs.mkdirSync(framesDir, { recursive: true });
		}

		for (let i = 0; i < this.frames.length; i++) {
			const frame = this.frames[i];
			const framePath = path.join(
				framesDir,
				`frame_${frame.stepNumber.toString().padStart(4, '0')}.png`,
			);
			fs.writeFileSync(framePath, frame.buffer);
		}

		// Also save the last frame as the preview image
		if (this.frames.length > 0) {
			const lastFrame = this.frames[this.frames.length - 1];
			const previewPath = outputPath.replace(/\.[^.]+$/, '_preview.png');
			fs.writeFileSync(previewPath, lastFrame.buffer);
		}

		logger.debug(`Saved ${this.frames.length} frames to ${framesDir}`);
		return framesDir;
	}

	/** Escape XML special characters for SVG text content */
	private escapeXml(text: string): string {
		return text
			.replace(/&/g, '&amp;')
			.replace(/</g, '&lt;')
			.replace(/>/g, '&gt;')
			.replace(/"/g, '&quot;')
			.replace(/'/g, '&apos;');
	}

	get frameCount(): number {
		return this.frames.length;
	}

	clear(): void {
		this.frames = [];
	}
}


================================================
FILE: packages/core/src/agent/stall-detector.test.ts
================================================
import { test, expect, describe, beforeEach } from 'bun:test';
import {
	StallDetector,
	hashPageTree,
	hashTextContent,
	type PageSignature,
} from './stall-detector.js';
import type { Command } from '../commands/types.js';

// ── Helpers ──

function clickAction(index: number): Command {
	return { action: 'tap', index, clickCount: 1 };
}

function inputAction(index: number, text: string): Command {
	return { action: 'type_text', index, text, clearFirst: true };
}

function navigateAction(url: string): Command {
	return { action: 'navigate', url };
}

function scrollAction(direction: 'up' | 'down', index?: number): Command {
	return { action: 'scroll', direction, index };
}

function doneAction(text: string): Command {
	return { action: 'finish', text, success: true };
}

function searchGoogleAction(query: string): Command {
	return { action: 'web_search', query };
}

function makeFingerprint(overrides: Partial<PageSignature> = {}): PageSignature {
	return {
		url: 'https://example.com',
		domHash: 'abc123',
		scrollY: 0,
		elementCount: 50,
		textHash: 'texthash1',
		...overrides,
	};
}

// ── Tests ──

describe('StallDetector', () => {
	let detector: StallDetector;

	beforeEach(() => {
		detector = new StallDetector();
	});

	describe('initial state', () => {
		test('isStuck returns not stuck when no actions recorded', () => {
			const result = detector.isStuck();
			expect(result.stuck).toBe(false);
			expect(result.severity).toBe(0);
		});

		test('getTotalRepetitions returns 0 initially', () => {
			expect(detector.getTotalRepetitions()).toBe(0);
		});

		test('getLoopNudgeMessage returns empty string when not stuck', () => {
			expect(detector.getLoopNudgeMessage()).toBe('');
		});
	});

	describe('recordAction and repeated action detection', () => {
		test('does not flag non-repeated actions', () => {
			detector.recordAction([clickAction(1)]);
			detector.recordAction([clickAction(2)]);
			detector.recordAction([clickAction(3)]);

			const result = detector.isStuck();
			expect(result.stuck).toBe(false);
		});

		test('flags the same action repeated maxRepeatedActions times (default 3)', () => {
			detector.recordAction([clickAction(5)]);
			detector.recordAction([clickAction(5)]);
			detector.recordAction([clickAction(5)]);

			const result = detector.isStuck();
			expect(result.stuck).toBe(true);
			expect(result.reason).toContain('repeated');
			expect(result.reason).toContain('3');
		});

		test('flags repeated multi-action steps', () => {
			const actions: Command[] = [clickAction(1), inputAction(2, 'hello')];
			detector.recordAction(actions);
			detector.recordAction(actions);
			detector.recordAction(actions);

			const result = detector.isStuck();
			expect(result.stuck).toBe(true);
		});

		test('does not flag when only two repeated actions (below threshold)', () => {
			detector.recordAction([clickAction(5)]);
			detector.recordAction([clickAction(5)]);

			const result = detector.isStuck();
			expect(result.stuck).toBe(false);
		});

		test('custom maxRepeatedActions threshold', () => {
			// With maxRepeatedActions=5, only 5+ trailing repeats should trigger.
			// Note: cycle detection (A->B->A->B) fires with 4 identical actions
			// because all 4 being the same matches the pattern. So we can only test
			// that at exactly 3 trailing repeats (below our custom threshold of 5,
			// and below the cycle check threshold of 4 identical entries), it's not stuck.
			const custom = new StallDetector({ maxRepeatedActions: 5 });
			custom.recordAction([clickAction(10)]); // prefix to avoid cycle match
			custom.recordAction([clickAction(1)]);
			custom.recordAction([clickAction(1)]);
			custom.recordAction([clickAction(1)]);
			// 3 trailing repeats < 5 threshold, and cycle check sees [10,1,1,1] which is not A->B->A->B
			expect(custom.isStuck().stuck).toBe(false);

			// Add two more to reach 5 trailing repeats
			custom.recordAction([clickAction(1)]);
			custom.recordAction([clickAction(1)]);
			expect(custom.isStuck().stuck).toBe(true);
		});
	});

	describe('action cycle detection (A -> B -> A -> B)', () => {
		test('detects alternating two-action cycle', () => {
			detector.recordAction([clickAction(1)]);
			detector.recordAction([clickAction(2)]);
			detector.recordAction([clickAction(1)]);
			detector.recordAction([clickAction(2)]);

			const result = detector.isStuck();
			expect(result.stuck).toBe(true);
			expect(result.reason).toContain('cycle');
		});

		test('does not falsely detect A -> B -> A -> C as a cycle', () => {
			detector.recordAction([clickAction(1)]);
			detector.recordAction([clickAction(2)]);
			detector.recordAction([clickAction(1)]);
			detector.recordAction([clickAction(3)]);

			const result = detector.isStuck();
			expect(result.stuck).toBe(false);
		});
	});

	describe('triple cycle detection (A -> B -> C -> A -> B -> C)', () => {
		test('detects 3-step cycle', () => {
			detector.recordAction([clickAction(1)]);
			detector.recordAction([clickAction(2)]);
			detector.recordAction([clickAction(3)]);
			detector.recordAction([clickAction(1)]);
			detector.recordAction([clickAction(2)]);
			detector.recordAction([clickAction(3)]);

			const result = detector.isStuck();
			expect(result.stuck).toBe(true);
			expect(result.reason).toContain('3-step');
		});

		test('does not detect partial triple cycle', () => {
			detector.recordAction([clickAction(1)]);
			detector.recordAction([clickAction(2)]);
			detector.recordAction([clickAction(3)]);
			detector.recordAction([clickAction(1)]);
			detector.recordAction([clickAction(2)]);

			// Only 5 entries, needs 6 for triple check
			const result = detector.isStuck();
			expect(result.stuck).toBe(false);
		});
	});

	describe('fingerprint-based stuck detection', () => {
		test('detects repeated page fingerprints', () => {
			const fp = makeFingerprint();
			detector.recordFingerprint(fp);
			detector.recordFingerprint(fp);
			detector.recordFingerprint(fp);

			const result = detector.isStuck();
			expect(result.stuck).toBe(true);
			expect(result.reason).toContain('Page state unchanged');
		});

		test('different fingerprints do not trigger stuck', () => {
			detector.recordFingerprint(makeFingerprint({ domHash: 'hash1' }));
			detector.recordFingerprint(makeFingerprint({ domHash: 'hash2' }));
			detector.recordFingerprint(makeFingerprint({ domHash: 'hash3' }));

			const result = detector.isStuck();
			expect(result.stuck).toBe(false);
		});

		test('scroll position bucketed (200px buckets) - same bucket triggers stuck', () => {
			// scrollY 0 and 100 are in the same bucket (both floor to 0)
			detector.recordFingerprint(makeFingerprint({ scrollY: 0 }));
			detector.recordFingerprint(makeFingerprint({ scrollY: 50 }));
			detector.recordFingerprint(makeFingerprint({ scrollY: 100 }));

			const result = detector.isStuck();
			expect(result.stuck).toBe(true);
		});

		test('different scroll buckets not considered stuck', () => {
			detector.recordFingerprint(makeFingerprint({ scrollY: 0 }));
			detector.recordFingerprint(makeFingerprint({ scrollY: 200 }));
			detector.recordFingerprint(makeFingerprint({ scrollY: 400 }));

			const result = detector.isStuck();
			expect(result.stuck).toBe(false);
		});

		test('custom maxRepeatedFingerprints threshold', () => {
			const custom = new StallDetector({ maxRepeatedFingerprints: 5 });
			const fp = makeFingerprint();
			for (let i = 0; i < 4; i++) {
				custom.recordFingerprint(fp);
			}
			expect(custom.isStuck().stuck).toBe(false);

			custom.recordFingerprint(fp);
			expect(custom.isStuck().stuck).toBe(true);
		});
	});

	describe('consecutive stagnant pages detection', () => {
		test('detects stagnant pages with same URL and similar element count', () => {
			const detector5 = new StallDetector({ maxStagnantPages: 5 });
			for (let i = 0; i < 5; i++) {
				// Different domHash/scrollY so fingerprint hashing is distinct,
				// but same URL and elementCount triggers stagnant detection.
				detector5.recordFingerprint(
					makeFingerprint({
						domHash: `hash_${i}`,
						scrollY: i * 200,
						elementCount: 50,
					}),
				);
			}

			const result = detector5.isStuck();
			expect(result.stuck).toBe(true);
			expect(result.reason).toContain('stagnant');
		});

		test('different URLs do not trigger stagnant detection', () => {
			for (let i = 0; i < 5; i++) {
				detector.recordFingerprint(
					makeFingerprint({
						url: `https://example.com/page${i}`,
						domHash: `hash_${i}`,
						scrollY: i * 200,
						elementCount: 50,
					}),
				);
			}

			const result = detector.isStuck();
			expect(result.stuck).toBe(false);
		});
	});

	describe('escalating nudge messages', () => {
		test('severity 0 for repetitions below 5', () => {
			// 3 repetitions -> gets flagged as stuck but severity 0
			for (let i = 0; i < 3; i++) {
				detector.recordAction([clickAction(1)]);
			}
			const result = detector.isStuck();
			expect(result.stuck).toBe(true);
			expect(result.severity).toBe(0);
		});

		test('severity 1 at 5+ total repetitions via cycle detection', () => {
			// Cycle detection path uses getSeverity(this.totalRepetitions)
			// so accumulating enough totalRepetitions can reach severity 1.
			const det = new StallDetector({ maxRepeatedActions: 3 });

			// First: accumulate 3 via repeated actions
			for (let i = 0; i < 3; i++) {
				det.recordAction([clickAction(1)]);
			}
			det.isStuck(); // totalRepetitions += 3

			// Break the trailing sequence, then trigger a 2-cycle
			det.recordAction([clickAction(10)]);
			// A->B->A->B cycle adds 2 to totalRepetitions -> total 5
			det.recordAction([clickAction(20)]);
			det.recordAction([clickAction(10)]);
			det.recordAction([clickAction(20)]);
			const result = det.isStuck();
			expect(result.stuck).toBe(true);
			// totalRepetitions = 3 + 2 = 5, getSeverity(5) = 1
			expect(result.severity).toBe(1);
		});

		test('nudge message contains appropriate text', () => {
			for (let i = 0; i < 3; i++) {
				detector.recordAction([clickAction(1)]);
			}
			const msg = detector.getLoopNudgeMessage();
			expect(msg).toContain('Warning:');
			expect(msg.length).toBeGreaterThan(0);
		});
	});

	describe('action hash normalization', () => {
		test('click actions normalized by index only', () => {
			// Two click actions with same index but different click counts
			// should both normalize to "click:5"
			const d1 = new StallDetector();
			const d2 = new StallDetector();

			const act1: Command = { action: 'tap', index: 5, clickCount: 1 };
			const act2: Command = { action: 'tap', index: 5, clickCount: 2 };

			// Record 3 of each in separate detectors
			for (let i = 0; i < 3; i++) {
				d1.recordAction([act1]);
				d2.recordAction([act2]);
			}

			// Both should detect as stuck since click is normalized by index
			expect(d1.isStuck().stuck).toBe(true);
			expect(d2.isStuck().stuck).toBe(true);
		});

		test('search queries normalized for order independence', () => {
			// "best pizza NYC" and "NYC best pizza" should produce same hash
			const d = new StallDetector();
			d.recordAction([searchGoogleAction('best pizza NYC')]);
			d.recordAction([searchGoogleAction('NYC best pizza')]);
			d.recordAction([searchGoogleAction('pizza best NYC')]);

			expect(d.isStuck().stuck).toBe(true);
		});

		test('different navigate URLs not considered same action', () => {
			detector.recordAction([navigateAction('https://a.com')]);
			detector.recordAction([navigateAction('https://b.com')]);
			detector.recordAction([navigateAction('https://c.com')]);

			expect(detector.isStuck().stuck).toBe(false);
		});

		test('scroll actions include direction and index', () => {
			// Same direction, same index -> stuck
			for (let i = 0; i < 3; i++) {
				detector.recordAction([scrollAction('down', 1)]);
			}
			expect(detector.isStuck().stuck).toBe(true);
		});

		test('done actions include text prefix', () => {
			detector.recordAction([doneAction('Task completed successfully')]);
			detector.recordAction([doneAction('Task completed successfully')]);
			detector.recordAction([doneAction('Task completed successfully')]);

			expect(detector.isStuck().stuck).toBe(true);
		});
	});

	describe('reset', () => {
		test('clears all history and repetitions', () => {
			for (let i = 0; i < 3; i++) {
				detector.recordAction([clickAction(1)]);
				detector.recordFingerprint(makeFingerprint());
			}
			expect(detector.isStuck().stuck).toBe(true);

			detector.reset();

			expect(detector.isStuck().stuck).toBe(false);
			expect(detector.getTotalRepetitions()).toBe(0);
			expect(detector.getLoopNudgeMessage()).toBe('');
		});
	});

	describe('window size pruning', () => {
		test('keeps action history within bounds', () => {
			const smallWindow = new StallDetector({ windowSize: 5 });

			// Record 15 unique actions, then 3 repeated
			for (let i = 0; i < 15; i++) {
				smallWindow.recordAction([clickAction(i)]);
			}

			// Now repeat same action 3 times
			for (let i = 0; i < 3; i++) {
				smallWindow.recordAction([clickAction(99)]);
			}

			// Should still detect the repetition
			expect(smallWindow.isStuck().stuck).toBe(true);
		});
	});
});

describe('hashPageTree', () => {
	test('produces consistent hash for same input', () => {
		const hash1 = hashPageTree('<div>hello</div>');
		const hash2 = hashPageTree('<div>hello</div>');
		expect(hash1).toBe(hash2);
	});

	test('produces different hash for different input', () => {
		const hash1 = hashPageTree('<div>hello</div>');
		const hash2 = hashPageTree('<div>world</div>');
		expect(hash1).not.toBe(hash2);
	});

	test('returns a base-36 string', () => {
		const hash = hashPageTree('some content');
		expect(typeof hash).toBe('string');
		// Base-36 characters: 0-9, a-z, and optional leading minus
		expect(hash).toMatch(/^-?[0-9a-z]+$/);
	});

	test('handles empty string', () => {
		const hash = hashPageTree('');
		expect(hash).toBe('0');
	});
});

describe('hashTextContent', () => {
	test('produces consistent hash for same input', () => {
		const hash1 = hashTextContent('Hello World');
		const hash2 = hashTextContent('Hello World');
		expect(hash1).toBe(hash2);
	});

	test('normalizes case: same hash for different casing', () => {
		const hash1 = hashTextContent('Hello World');
		const hash2 = hashTextContent('hello world');
		expect(hash1).toBe(hash2);
	});

	test('normalizes whitespace: collapses multiple spaces', () => {
		const hash1 = hashTextContent('hello    world');
		const hash2 = hashTextContent('hello world');
		expect(hash1).toBe(hash2);
	});

	test('removes punctuation for content-based matching', () => {
		const hash1 = hashTextContent('hello, world!');
		const hash2 = hashTextContent('hello world');
		expect(hash1).toBe(hash2);
	});

	test('handles empty string', () => {
		const hash = hashTextContent('');
		expect(hash).toBe('0');
	});
});


================================================
FILE: packages/core/src/agent/stall-detector.ts
================================================
import type { Command } from '../commands/types.js';

// ── Enhanced Page Fingerprint ──

export interface PageSignature {
	url: string;
	domHash: string;
	scrollY: number;
	elementCount?: number;
	textHash?: string;
}

export interface StallDetectorConfig {
	maxRepeatedActions: number;
	maxRepeatedFingerprints: number;
	windowSize: number;
	/** Number of consecutive stagnant pages before raising stall alert */
	maxStagnantPages: number;
}

const DEFAULT_OPTIONS: StallDetectorConfig = {
	maxRepeatedActions: 3,
	maxRepeatedFingerprints: 3,
	windowSize: 10,
	maxStagnantPages: 5,
};

export interface StallCheckResult {
	stuck: boolean;
	reason?: string;
	/** Escalation level: 0 = not stuck, 1 = mild, 2 = moderate, 3 = severe */
	severity: number;
}

/**
 * Nudge messages that escalate in urgency as repetitions increase.
 * Thresholds: 5 repetitions = mild, 8 = moderate, 12 = severe.
 */
const ESCALATING_NUDGES = [
	{
		threshold: 5,
		severity: 1,
		message:
			'You seem to be repeating similar actions. Consider trying a different approach:\n' +
			'- Click a different element\n' +
			'- Try an alternative navigation path\n' +
			'- Use search to find what you need',
	},
	{
		threshold: 8,
		severity: 2,
		message:
			'WARNING: You are stuck in a loop and have been repeating actions. You MUST change your approach:\n' +
			'- Navigate to a completely different page\n' +
			'- Try a fundamentally different strategy\n' +
			'- If the current approach is not working, consider using the done action to report the issue',
	},
	{
		threshold: 12,
		severity: 3,
		message:
			'CRITICAL: You have been stuck for many steps. This approach is NOT working.\n' +
			'You MUST either:\n' +
			'1. Use the done action to report that the task cannot be completed with your current approach\n' +
			'2. Navigate to a completely different website or page\n' +
			'3. Try a radically different interaction method\n' +
			'Do NOT repeat the same actions again.',
	},
];

export class StallDetector {
	private actionHistory: string[] = [];
	private fingerprintHistory: PageSignature[] = [];
	private fingerprintHashes: string[] = [];
	private options: StallDetectorConfig;
	private totalRepetitions = 0;

	constructor(options?: Partial<StallDetectorConfig>) {
		this.options = { ...DEFAULT_OPTIONS, ...options };
	}

	recordAction(actions: Command[]): void {
		const key = this.normalizeActionHash(actions);
		this.actionHistory.push(key);

		// Keep only the window
		if (this.actionHistory.length > this.options.windowSize * 2) {
			this.actionHistory = this.actionHistory.slice(-this.options.windowSize * 2);
		}
	}

	recordFingerprint(fingerprint: PageSignature): void {
		this.fingerprintHistory.push(fingerprint);
		const hash = this.hashFingerprint(fingerprint);
		this.fingerprintHashes.push(hash);

		if (this.fingerprintHistory.length > this.options.windowSize * 2) {
			this.fingerprintHistory = this.fingerprintHistory.slice(-this.options.windowSize * 2);
			this.fingerprintHashes = this.fingerprintHashes.slice(-this.options.windowSize * 2);
		}
	}

	isStuck(): StallCheckResult {
		// Check for repeated actions
		const actionRepetitions = this.countTrailingRepetitions(this.actionHistory);

		if (actionRepetitions >= this.options.maxRepeatedActions) {
			this.totalRepetitions += actionRepetitions;
			const severity = this.getSeverity(actionRepetitions);
			return {
				stuck: true,
				reason: `Same action repeated ${actionRepetitions} times`,
				severity,
			};
		}

		// Check for action cycle (A -> B -> A -> B)
		if (this.actionHistory.length >= 4) {
			const last4 = this.actionHistory.slice(-4);
			if (last4[0] === last4[2] && last4[1] === last4[3]) {
				this.totalRepetitions += 2;
				return {
					stuck: true,
					reason: 'Detected action cycle (alternating between two actions)',
					severity: this.getSeverity(this.totalRepetitions),
				};
			}
		}

		// Check for triple cycle (A -> B -> C -> A -> B -> C)
		if (this.actionHistory.length >= 6) {
			const last6 = this.actionHistory.slice(-6);
			if (
				last6[0] === last6[3] &&
				last6[1] === last6[4] &&
				last6[2] === last6[5]
			) {
				this.totalRepetitions += 3;
				return {
					stuck: true,
					reason: 'Detected 3-step action cycle',
					severity: this.getSeverity(this.totalRepetitions),
				};
			}
		}

		// Check for repeated fingerprints (same page state)
		const fpRepetitions = this.countTrailingRepetitions(this.fingerprintHashes);

		if (fpRepetitions >= this.options.maxRepeatedFingerprints) {
			this.totalRepetitions += fpRepetitions;
			return {
				stuck: true,
				reason: `Page state unchanged for ${fpRepetitions} steps`,
				severity: this.getSeverity(fpRepetitions),
			};
		}

		// Check for consecutive stagnant pages (URL + elementCount unchanged)
		const stagnantCount = this.countConsecutiveStagnantPages();
		if (stagnantCount >= this.options.maxStagnantPages) {
			this.totalRepetitions += stagnantCount;
			return {
				stuck: true,
				reason: `Page appears stagnant for ${stagnantCount} consecutive steps (same URL and element structure)`,
				severity: this.getSeverity(stagnantCount),
			};
		}

		return { stuck: false, severity: 0 };
	}

	getLoopNudgeMessage(): string {
		const result = this.isStuck();
		if (!result.stuck) {
			return '';
		}

		// Find the appropriate escalating nudge
		const nudge = this.getEscalatingNudge();
		return `Warning: ${result.reason ?? 'You appear to be stuck'}.\n${nudge}`;
	}

	/** Get total number of detected repetitions across the session */
	getTotalRepetitions(): number {
		return this.totalRepetitions;
	}

	reset(): void {
		this.actionHistory = [];
		this.fingerprintHistory = [];
		this.fingerprintHashes = [];
		this.totalRepetitions = 0;
	}

	// ── Private helpers ──

	/**
	 * Normalize action hash for better deduplication:
	 * - Sort search token strings for order-independent matching
	 * - Use element index (not full params) for click actions
	 * - Use URL (not full params) for navigate actions
	 */
	private normalizeActionHash(actions: Command[]): string {
		const normalized = actions.map((action) => {
			switch (action.action) {
				case 'tap':
					// Normalize click: use index as the primary key, ignore transient params
					return `click:${action.index}`;

				case 'type_text':
					return `input_text:${action.index}:${action.text}`;

				case 'navigate':
					// Normalize: just the URL
					return `go_to_url:${action.url}`;

				case 'web_search':
					// Sort search terms for order-independent matching
					return `search_google:${this.normalizeSearchQuery(action.query)}`;

				case 'search': {
					const q = 'query' in action ? String((action as Record<string, unknown>).query) : '';
					return `search_page:${this.normalizeSearchQuery(q)}`;
				}

				case 'scroll':
					return `scroll:${action.direction}:${action.index ?? 'page'}`;

				case 'finish':
					return `done:${action.text.slice(0, 50)}`;

				default:
					// Generic fallback: action name + stringified params
					return JSON.stringify(action);
			}
		});

		return normalized.join('|');
	}

	/**
	 * Normalize a search query by lowercasing and sorting tokens.
	 * "best pizza NYC" and "NYC best pizza" produce the same hash.
	 */
	private normalizeSearchQuery(query: string): string {
		return query
			.toLowerCase()
			.split(/\s+/)
			.filter(Boolean)
			.sort()
			.join(' ');
	}

	/**
	 * Hash a page fingerprint for quick equality checks.
	 * Includes URL, element count, text hash, and scroll position bucket.
	 */
	private hashFingerprint(fp: PageSignature): string {
		const scrollBucket = Math.floor(fp.scrollY / 200);
		const parts = [
			fp.url,
			fp.domHash,
			scrollBucket.toString(),
		];
		if (fp.elementCount !== undefined) {
			parts.push(`e:${fp.elementCount}`);
		}
		if (fp.textHash) {
			parts.push(`t:${fp.textHash}`);
		}
		return parts.join('|');
	}

	/**
	 * Count how many trailing entries in a history array are identical.
	 */
	private countTrailingRepetitions(history: string[]): number {
		if (history.length === 0) return 0;
		const last = history[history.length - 1];
		let count = 0;
		for (let i = history.length - 1; i >= 0; i--) {
			if (history[i] === last) {
				count++;
			} else {
				break;
			}
		}
		return count;
	}

	/**
	 * Count consecutive stagnant pages: same URL and similar element count.
	 * "Similar" means within 5% or 10 elements of each other.
	 */
	private countConsecutiveStagnantPages(): number {
		if (this.fingerprintHistory.length < 2) return 0;

		const latest = this.fingerprintHistory[this.fingerprintHistory.length - 1];
		let count = 1;

		for (let i = this.fingerprintHistory.length - 2; i >= 0; i--) {
			const fp = this.fingerprintHistory[i];
			if (fp.url !== latest.url) break;

			if (latest.elementCount !== undefined && fp.elementCount !== undefined) {
				const diff = Math.abs(latest.elementCount - fp.elementCount);
				const threshold = Math.max(10, Math.floor(latest.elementCount * 0.05));
				if (diff > threshold) break;
			}

			count++;
		}

		return count;
	}

	/**
	 * Map repetition count to severity level (0-3).
	 */
	private getSeverity(repetitions: number): number {
		if (repetitions >= 12) return 3;
		if (repetitions >= 8) return 2;
		if (repetitions >= 5) return 1;
		return 0;
	}

	/**
	 * Get the appropriate escalating nudge message based on total repetitions.
	 */
	private getEscalatingNudge(): string {
		// Pick the highest-threshold nudge that applies
		let bestNudge = ESCALATING_NUDGES[0];
		for (const nudge of ESCALATING_NUDGES) {
			if (this.totalRepetitions >= nudge.threshold) {
				bestNudge = nudge;
			}
		}
		return bestNudge.message;
	}
}

/**
 * Compute a fast 32-bit hash of a DOM tree string.
 * Used for quick fingerprint comparison.
 */
export function hashPageTree(domTree: string): string {
	let hash = 0;
	for (let i = 0; i < domTree.length; i++) {
		const char = domTree.charCodeAt(i);
		hash = ((hash << 5) - hash + char) | 0;
	}
	return hash.toString(36);
}

/**
 * Compute a content-based text hash from visible page text.
 * More robust than DOM hash for detecting actual content changes.
 */
export function hashTextContent(text: string): string {
	// Normalize: lowercase, collapse whitespace, remove punctuation
	const normalized = text
		.toLowerCase()
		.replace(/\s+/g, ' ')
		.replace(/[^\w\s]/g, '')
		.trim();

	let hash = 0;
	for (let i = 0; i < normalized.length; i++) {
		const char = normalized.charCodeAt(i);
		hash = ((hash << 5) - hash + char) | 0;
	}
	return hash.toString(36);
}


================================================
FILE: packages/core/src/agent/types.ts
================================================
import { z } from 'zod';
import type { Command, CommandResult } from '../commands/types.js';
import type { ViewportSnapshot, ViewportHistory } from '../viewport/types.js';
import type { InferenceUsage } from '../model/types.js';

// ── Agent Settings ──

export interface AgentConfig {
	task: string;
	stepLimit: number;
	commandsPerStep: number;
	failureThreshold: number;
	retryDelay: number;
	enableScreenshots: boolean;
	enableScreenshotsForTextExtraction: boolean;
	contextWindowSize: number;
	capturedAttributes: string[];
	commandDelayMs: number;
	allowedUrls?: string[];
	blockedUrls?: string[];
	traceOutputPath?: string;
	replayOutputPath?: string;
	strategyInterval: number;
	maskedValues?: Record<string, string>;
	overrideInstructionBuilder?: string;
	extendInstructionBuilder?: string;
	inlineCommands: boolean;
	conversationCompaction?: CompactionPolicy;

	// Extended thinking
	enableDeepReasoning: boolean;
	reasoningBudget: number;

	// Flash mode
	compactMode: boolean;

	// Timeouts (0 = no timeout)
	stepDeadlineMs: number;
	modelDeadlineMs: number;

	// Planning system
	enableStrategy: boolean;
	restrategizeOnStall: boolean;

	// URL extraction from task text
	autoNavigateToUrls: boolean;

	// Coordinate clicking auto-enable per model
	autoEnableCoordinateClicking: boolean;

	// Judge integration
	enableEvaluation: boolean;
	enableSimpleJudge: boolean;
	expectedOutcome?: string;

	// Demo mode
	enableVisualTracer: boolean;

	// Initial actions before main loop
	preflightCommands: Command[];

	// Save conversation per step
	conversationOutputPath?: string;

	// Dynamic action schema rebuild per step
	dynamicCommandSchema: boolean;
}

export const DEFAULT_AGENT_CONFIG: AgentConfig = {
	task: '',
	stepLimit: 100,
	commandsPerStep: 10,
	failureThreshold: 5,
	retryDelay: 10,
	enableScreenshots: true,
	enableScreenshotsForTextExtraction: false,
	contextWindowSize: 128000,
	capturedAttributes: [
		'title', 'type', 'name', 'role', 'tabindex',
		'aria-label', 'placeholder', 'value', 'alt', 'aria-expanded',
	],
	commandDelayMs: 1,
	strategyInterval: 0,
	inlineCommands: true,

	enableDeepReasoning: false,
	reasoningBudget: 10000,
	compactMode: false,
	stepDeadlineMs: 0,
	modelDeadlineMs: 0,
	enableStrategy: false,
	restrategizeOnStall: false,
	autoNavigateToUrls: true,
	autoEnableCoordinateClicking: false,
	enableEvaluation: false,
	enableSimpleJudge: false,
	enableVisualTracer: false,
	preflightCommands: [],
	dynamicCommandSchema: false,
};

// ── Message Compaction Settings ──

export interface CompactionPolicy {
	/** Run LLM-based compaction every N steps (0 = disabled). */
	interval: number;
	/** Model ID to use for summarization. If omitted, uses the agent's main model. */
	model?: string;
	/** Max tokens for the compaction summary output. */
	maxTokens: number;
	/** Target token budget after compaction. Defaults to 60% of contextWindowSize. */
	targetTokens?: number;
}

// ── Agent Brain (LLM thought process) ──

export const ReasoningSchema = z.object({
	evaluation: z.string().describe('Assessment of the current state'),
	memory: z.string().describe('Important information to remember'),
	nextGoal: z.string().describe('Next immediate goal'),
});

export type Reasoning = z.infer<typeof ReasoningSchema>;

// ── Agent Output (what LLM returns each step) ──

export const AgentDecisionSchema = z.object({
	currentState: ReasoningSchema,
	actions: z.array(z.record(z.unknown())).describe('Actions to execute'),
	thinking: z.string().optional().describe('Extended thinking / chain-of-thought'),
	evaluation: z.string().optional().describe('Top-level evaluation (mirrors currentState.evaluation for convenience)'),
	memory: z.string().optional().describe('Top-level memory note (mirrors currentState.memory for convenience)'),
	nextGoal: z.string().optional().describe('Top-level next goal (mirrors currentState.nextGoal for convenience)'),
});

export type AgentDecision = z.infer<typeof AgentDecisionSchema>;

/**
 * Simplified output schema for flash / lightweight models that skip extended thinking.
 * Only contains the essential fields: current state evaluation + actions.
 */
export const AgentDecisionCompactSchema = z.object({
	currentState: z.object({
		evaluation: z.string().describe('Brief assessment'),
		nextGoal: z.string().describe('Next immediate goal'),
	}),
	actions: z.array(z.record(z.unknown())).describe('Actions to execute'),
});

export type AgentDecisionCompact = z.infer<typeof AgentDecisionCompactSchema>;

/**
 * Output variant that omits the extended thinking field.
 * Used when the model does not support or should not produce chain-of-thought.
 */
export const AgentDecisionDirectSchema = z.object({
	currentState: ReasoningSchema,
	actions: z.array(z.record(z.unknown())).describe('Actions to execute'),
});

export type AgentDecisionDirect = z.infer<typeof AgentDecisionDirectSchema>;

// ── Step Metadata ──

export interface StepTelemetry {
	/** Step number (1-based). */
	stepNumber: number;
	/** Wall-clock duration of this step in milliseconds. */
	durationMs: number;
	/** Token usage for this step. */
	inputTokens: number;
	outputTokens: number;
	/** Number of actions attempted in this step. */
	actionCount: number;
	/** URL at the start of this step. */
	url?: string;
	/** Path to screenshot file if one was saved. */
	screenshotPath?: string;
	/** Timestamp when the step started. */
	startedAt: number;
	/** Timestamp when the step completed. */
	completedAt: number;
}

// ── Detected Variable ──

/**
 * A variable or piece of data detected during agent execution,
 * e.g. a confirmation number, order ID, or extracted value.
 */
export interface ExtractedVariable {
	/** Human-readable name (e.g. "order_id", "confirmation_number"). */
	name: string;
	/** The detected value as a string. */
	value: string;
	/** Where this variable was found. */
	source: 'extraction' | 'action_result' | 'page_content' | 'user_input';
	/** Step number where this variable was detected. */
	step?: number;
}

// ── Agent State ──

export interface AgentState {
	step: number;
	stepLimit: number;
	failureCount: number;
	consecutiveFailures: number;
	isRunning: boolean;
	isPaused: boolean;
	isDone: boolean;
	lastResult?: string;
	currentUrl?: string;
	totalInputTokens: number;
	totalOutputTokens: number;
	cumulativeCost: AccumulatedCost;
	currentPlan?: string;
	lastPlanStep?: number;
}

// ── History ──

export interface StepRecord {
	step: number;
	timestamp: number;
	browserState: ViewportHistory;
	agentOutput: AgentDecision;
	actionResults: CommandResult[];
	error?: string;
	usage?: InferenceUsage;
	duration: number;
	metadata?: StepTelemetry;
	detectedVariables?: ExtractedVariable[];
}

/**
 * Concrete class wrapping agent execution history with helper methods.
 *
 * Replaces the plain ExecutionLog interface so that consumers can call
 * convenience methods like `finalResult()`, `isDone()`, `urls()`, etc.
 */
export class ExecutionLog {
	readonly entries: StepRecord[];
	readonly task: string;
	readonly startTime: number;
	endTime?: number;
	totalDuration?: number;
	totalSteps: number;
	totalInputTokens: number;
	totalOutputTokens: number;

	constructor(init: {
		entries?: StepRecord[];
		task: string;
		startTime?: number;
	}) {
		this.entries = init.entries ?? [];
		this.task = init.task;
		this.startTime = init.startTime ?? Date.now();
		this.totalSteps = this.entries.length;
		this.totalInputTokens = 0;
		this.totalOutputTokens = 0;
		this.recomputeTotals();
	}

	/** Recalculate aggregate totals from entries. Called internally and from static factories. */
	recomputeTotals(): void {
		this.totalSteps = this.entries.length;
		this.totalInputTokens = 0;
		this.totalOutputTokens = 0;
		for (const entry of this.entries) {
			if (entry.usage) {
				this.totalInputTokens += entry.usage.inputTokens;
				this.totalOutputTokens += entry.usage.outputTokens;
			}
		}
	}

	/** Push a new entry and update totals. */
	addEntry(entry: StepRecord): void {
		this.entries.push(entry);
		this.recomputeTotals();
	}

	/** Mark the history as finished. */
	finish(): void {
		this.endTime = Date.now();
		this.totalDuration = this.endTime - this.startTime;
		this.recomputeTotals();
	}

	/**
	 * Returns the final result text from the last "done" action, or undefined
	 * if the agent never completed with a done action.
	 */
	finalResult(): string | undefined {
		for (let i = this.entries.length - 1; i >= 0; i--) {
			const entry = this.entries[i];
			for (const result of entry.actionResults) {
				if (result.isDone && result.extractedContent) {
					return result.extractedContent;
				}
			}
		}
		return undefined;
	}

	/**
	 * Whether the agent reached a "done" action at any point.
	 */
	isDone(): boolean {
		return this.entries.some((entry) =>
			entry.actionResults.some((r) => r.isDone),
		);
	}

	/**
	 * Deduplicated list of all URLs visited during execution (in order of first visit).
	 */
	urls(): string[] {
		const seen = new Set<string>();
		const result: string[] = [];
		for (const entry of this.entries) {
			const url = entry.browserState.url;
			if (url && !seen.has(url)) {
				seen.add(url);
				result.push(url);
			}
		}
		return result;
	}

	/**
	 * All screenshot base64 strings collected during execution (chronological).
	 */
	screenshots(): string[] {
		const result: string[] = [];
		for (const entry of this.entries) {
			if (entry.browserState.screenshot) {
				result.push(entry.browserState.screenshot);
			}
		}
		return result;
	}

	/**
	 * All errors encountered during execution.
	 */
	errors(): string[] {
		const result: string[] = [];
		for (const entry of this.entries) {
			if (entry.error) {
				result.push(entry.error);
			}
			for (const ar of entry.actionResults) {
				if (ar.error) {
					result.push(ar.error);
				}
			}
		}
		return result;
	}

	/**
	 * All detected variables across all steps.
	 */
	allExtractedVariables(): ExtractedVariable[] {
		const result: ExtractedVariable[] = [];
		for (const entry of this.entries) {
			if (entry.detectedVariables) {
				result.push(...entry.detectedVariables);
			}
		}
		return result;
	}

	/**
	 * Serialize the full history to a JSON-compatible object for saving to disk.
	 */
	toJSON(): Record<string, unknown> {
		return {
			task: this.task,
			startTime: this.startTime,
			endTime: this.endTime,
			totalDuration: this.totalDuration,
			totalSteps: this.totalSteps,
			totalInputTokens: this.totalInputTokens,
			totalOutputTokens: this.totalOutputTokens,
			entries: this.entries.map((e) => ({
				...e,
				// Strip screenshot data from serialized form to keep file size down
				browserState: {
					...e.browserState,
					screenshot: e.browserState.screenshot ? '[screenshot omitted]' : undefined,
				},
			})),
		};
	}

	/**
	 * Save the history to a file at the given path (JSON format).
	 * Returns the written path.
	 */
	async saveToFile(filePath: string): Promise<string> {
		const { writeFile, mkdir } = await import('node:fs/promises');
		const { dirname } = await import('node:path');
		await mkdir(dirname(filePath), { recursive: true });
		const json = JSON.stringify(this.toJSON(), null, 2);
		await writeFile(filePath, json, 'utf-8');
		return filePath;
	}

	/**
	 * Load history from a JSON file. Screenshots will be placeholders.
	 */
	static async loadFromFile(filePath: string): Promise<ExecutionLog> {
		const { readFile } = await import('node:fs/promises');
		const raw = await readFile(filePath, 'utf-8');
		const data = JSON.parse(raw) as Record<string, unknown>;
		const list = new ExecutionLog({
			task: (data.task as string) ?? '',
			startTime: (data.startTime as number) ?? Date.now(),
		});
		list.endTime = data.endTime as number | undefined;
		list.totalDuration = data.totalDuration as number | undefined;

		const entries = (data.entries ?? []) as StepRecord[];
		for (const entry of entries) {
			list.entries.push(entry);
		}
		list.recomputeTotals();
		return list;
	}
}

// ── Plan ──

export const PlanStepSchema = z.object({
	id: z.number(),
	description: z.string(),
	status: z.enum(['pending', 'in_progress', 'completed', 'failed', 'blocked', 'skipped']),
	note: z.string().optional(),
});

export type PlanStep = z.infer<typeof PlanStepSchema>;

export const StrategyPlanSchema = z.object({
	items: z.array(PlanStepSchema),
});

// ── Judgement ──

export const EvaluationResultSchema = z.object({
	isComplete: z.boolean(),
	reason: z.string(),
	confidence: z.number().min(0).max(1),
	verdict: z.string().optional().describe('Short human-readable verdict (e.g. "success", "partial", "failed")'),
	failureReason: z.string().optional().describe('Detailed reason if the task failed'),
	impossibleTask: z.boolean().optional().describe('Whether the task appears impossible to complete'),
	reachedCaptcha: z.boolean().optional().describe('Whether a CAPTCHA was encountered that blocked progress'),
});

export type EvaluationResult = z.infer<typeof EvaluationResultSchema>;

/**
 * Lightweight judgement result for simple pass/fail evaluation
 * without confidence scoring or detailed analysis.
 */
export const QuickCheckResultSchema = z.object({
	passed: z.boolean(),
	reason: z.string(),
	shouldRetry: z.boolean().optional().describe('Whether the agent should retry with a different approach'),
});

export type QuickCheckResult = z.infer<typeof QuickCheckResultSchema>;

// ── Cost Tracking ──

export interface StepCostBreakdown {
	inputCost: number;
	outputCost: number;
	totalCost: number;
}

export interface AccumulatedCost {
	totalInputTokens: number;
	totalOutputTokens: number;
	totalInputCost: number;
	totalOutputCost: number;
	totalCost: number;
}

/** Per-model pricing in USD per 1M tokens */
export interface PricingTable {
	inputPer1M: number;
	outputPer1M: number;
}

export const PRICING_TABLE: Record<string, PricingTable> = {
	'gpt-4o': { inputPer1M: 2.5, outputPer1M: 10 },
	'gpt-4o-mini': { inputPer1M: 0.15, outputPer1M: 0.6 },
	'gpt-4-turbo': { inputPer1M: 10, outputPer1M: 30 },
	'claude-3-opus': { inputPer1M: 15, outputPer1M: 75 },
	'claude-3-5-sonnet': { inputPer1M: 3, outputPer1M: 15 },
	'claude-3-5-haiku': { inputPer1M: 0.8, outputPer1M: 4 },
	'claude-3-haiku': { inputPer1M: 0.25, outputPer1M: 1.25 },
	'gemini-2.0-flash': { inputPer1M: 0.1, outputPer1M: 0.4 },
	'gemini-1.5-pro': { inputPer1M: 1.25, outputPer1M: 5 },
	'gemini-1.5-flash': { inputPer1M: 0.075, outputPer1M: 0.3 },
};

export function calculateStepCost(
	inputTokens: number,
	outputTokens: number,
	modelId: string,
): StepCostBreakdown | undefined {
	let pricing: PricingTable | undefined;
	for (const [key, value] of Object.entries(PRICING_TABLE)) {
		if (modelId.startsWith(key)) {
			pricing = value;
			break;
		}
	}
	if (!pricing) return undefined;

	const inputCost = (inputTokens / 1_000_000) * pricing.inputPer1M;
	const outputCost = (outputTokens / 1_000_000) * pricing.outputPer1M;
	return { inputCost, outputCost, totalCost: inputCost + outputCost };
}

// ── Plan Update ──

export const PlanRevisionSchema = z.object({
	plan: z.string().describe('Updated plan based on current progress'),
	reasoning: z.string().describe('Why the plan was updated'),
});

export type PlanRevision = z.infer<typeof PlanRevisionSchema>;

// ── Model capability helpers ──

const EXTENDED_THINKING_MODELS = [
	'claude-3-5-sonnet',
	'claude-3-opus',
	'claude-3-7-sonnet',
	'claude-4',
	'o1',
	'o1-pro',
	'o3',
	'o3-mini',
	'gemini-2.0-flash-thinking',
	'deepseek-r1',
];

export function supportsDeepReasoning(modelId: string): boolean {
	return EXTENDED_THINKING_MODELS.some((m) => modelId.includes(m));
}

const COORDINATE_CLICK_MODELS = [
	'gpt-4o',
	'claude-3-5-sonnet',
	'claude-4',
	'gemini-2.0',
	'gemini-1.5-pro',
];

export function supportsCoordinateMode(modelId: string): boolean {
	return COORDINATE_CLICK_MODELS.some((m) => modelId.includes(m));
}

const FLASH_MODELS = [
	'gpt-4o-mini',
	'claude-3-haiku',
	'claude-3-5-haiku',
	'gemini-1.5-flash',
	'gemini-2.0-flash',
];

export function isCompactModel(modelId: string): boolean {
	return FLASH_MODELS.some((m) => modelId.includes(m));
}

// ── Agent Run Result ──

export interface RunOutcome {
	finalResult?: string;
	success: boolean;
	history: ExecutionLog;
	errors: string[];
	detectedVariables?: ExtractedVariable[];
	judgement?: EvaluationResult;
	simpleJudgement?: QuickCheckResult;
	totalCost?: AccumulatedCost;
}


================================================
FILE: packages/core/src/bridge/adapter.ts
================================================
import { z, type ZodTypeAny } from 'zod';
import type { CommandExecutor } from '../commands/executor.js';

export interface MCPToolDefinition {
	name: string;
	description: string;
	inputSchema: Record<string, unknown>;
}

export class BridgeAdapter {
	private tools: CommandExecutor;

	constructor(tools: CommandExecutor) {
		this.tools = tools;
	}

	getToolDefinitions(): MCPToolDefinition[] {
		return this.tools.registry.getAll().map((action) => ({
			name: `browser_${action.name}`,
			description: action.description,
			inputSchema: this.zodToJsonSchema(action.schema),
		}));
	}

	getToolNames(): string[] {
		return this.tools.registry.getNames().map((name) => `browser_${name}`);
	}

	parseToolName(mcpToolName: string): string | null {
		if (mcpToolName.startsWith('browser_')) {
			return mcpToolName.slice(8);
		}
		return null;
	}

	private zodToJsonSchema(schema: ZodTypeAny): Record<string, unknown> {
		const jsonSchema: Record<string, unknown> = { type: 'object' };

		if (schema instanceof z.ZodObject) {
			const shape = schema.shape;
			const properties: Record<string, unknown> = {};
			const required: string[] = [];

			for (const [key, value] of Object.entries(shape)) {
				const fieldSchema = value as ZodTypeAny;
				properties[key] = this.fieldToJsonSchema(fieldSchema);
				if (!(fieldSchema instanceof z.ZodOptional)) {
					required.push(key);
				}
			}

			jsonSchema.properties = properties;
			if (required.length > 0) {
				jsonSchema.required = required;
			}
		}

		return jsonSchema;
	}

	private fieldToJsonSchema(schema: ZodTypeAny): Record<string, unknown> {
		if (schema instanceof z.ZodString) {
			return { type: 'string', description: schema.description };
		}
		if (schema instanceof z.ZodNumber) {
			return { type: 'number', description: schema.description };
		}
		if (schema instanceof z.ZodBoolean) {
			return { type: 'boolean', description: schema.description };
		}
		if (schema instanceof z.ZodEnum) {
			return { type: 'string', enum: schema.options, description: schema.description };
		}
		if (schema instanceof z.ZodArray) {
			return {
				type: 'array',
				items: this.fieldToJsonSchema(schema.element),
				description: schema.description,
			};
		}
		if (schema instanceof z.ZodOptional) {
			return this.fieldToJsonSchema(schema.unwrap());
		}
		if (schema instanceof z.ZodDefault) {
			const inner = this.fieldToJsonSchema(schema.removeDefault());
			(inner as any).default = schema._def.defaultValue();
			return inner;
		}
		if (schema instanceof z.ZodLiteral) {
			return { const: schema.value };
		}
		return { type: 'object', description: schema.description };
	}
}


================================================
FILE: packages/core/src/bridge/client.ts
================================================
import { type ChildProcess, spawn } from 'node:child_process';
import { EventEmitter } from 'node:events';
import type { CustomCommandSpec } from '../commands/types.js';
import { createLogger } from '../logging.js';

const logger = createLogger('mcp-client');

// ── Types ──

export interface BridgeClientOptions {
	command: string;
	args?: string[];
	env?: Record<string, string>;
	/** Timeout per JSON-RPC request in ms (default: 30_000) */
	requestTimeoutMs?: number;
	/** Maximum reconnection attempts (default: 5) */
	maxReconnectAttempts?: number;
	/** Initial reconnection delay in ms, doubles each attempt (default: 1000) */
	reconnectDelayMs?: number;
	/** Interval between health checks in ms (0 to disable, default: 0) */
	healthCheckIntervalMs?: number;
}

export interface MCPTool {
	name: string;
	description: string;
	inputSchema: Record<string, unknown>;
}

export type MCPConnectionState = 'disconnected' | 'connecting' | 'connected' | 'reconnecting';

interface PendingRequest {
	resolve: (value: unknown) => void;
	reject: (error: Error) => void;
	timer: ReturnType<typeof setTimeout>;
	method: string;
}

export interface BridgeClientEvents {
	stateChange: [state: MCPConnectionState, previousState: MCPConnectionState];
	error: [error: Error];
	notification: [method: string, params: Record<string, unknown> | undefined];
}

/**
 * MCP client that connects to external MCP servers and converts their tools
 * into custom browser actions.
 *
 * Features:
 * - Reconnection with exponential backoff
 * - Per-call request timeout
 * - Concurrent request multiplexing (multiple in-flight requests)
 * - Tool list caching with invalidation
 * - Health check / ping
 * - Event emitter for connection state changes
 * - Graceful shutdown with pending request drain
 */
export class BridgeClient extends EventEmitter<BridgeClientEvents> {
	private process: ChildProcess | null = null;
	private requestId = 0;
	private pendingRequests = new Map<string | number, PendingRequest>();
	private options: BridgeClientOptions;
	private buffer = '';

	// ── Connection state ──
	private _state: MCPConnectionState = 'disconnected';
	private reconnectAttempts = 0;
	private reconnectTimer: ReturnType<typeof setTimeout> | null = null;

	// ── Tool caching ──
	private cachedTools: MCPTool[] | null = null;
	private toolsCacheTimestamp = 0;

	// ── Health check ──
	private healthCheckTimer: ReturnType<typeof setInterval> | null = null;

	// ── Config ──
	private readonly requestTimeoutMs: number;
	private readonly maxReconnectAttempts: number;
	private readonly reconnectDelayMs: number;
	private readonly healthCheckIntervalMs: number;

	constructor(options: BridgeClientOptions) {
		super();
		this.options = options;
		this.requestTimeoutMs = options.requestTimeoutMs ?? 30_000;
		this.maxReconnectAttempts = options.maxReconnectAttempts ?? 5;
		this.reconnectDelayMs = options.reconnectDelayMs ?? 1000;
		this.healthCheckIntervalMs = options.healthCheckIntervalMs ?? 0;
	}

	// ── Public accessors ──

	get state(): MCPConnectionState {
		return this._state;
	}

	get isConnected(): boolean {
		return this._state === 'connected';
	}

	// ── Connection lifecycle ──

	async connect(): Promise<void> {
		if (this._state === 'connected') {
			logger.debug('Already connected, skipping connect()');
			return;
		}

		this.setState('connecting');
		await this.spawnProcess();
		await this.initialize();
		this.setState('connected');
		this.reconnectAttempts = 0;

		// Warm the tool cache
		await this.listTools();

		// Start health checks if configured
		this.startHealthChecks();

		logger.info(`Connected to MCP server: ${this.options.command}`);
	}

	private async spawnProcess(): Promise<void> {
		this.process = spawn(this.options.command, this.options.args ?? [], {
			stdio: ['pipe', 'pipe', 'pipe'],
			env: { ...process.env, ...this.options.env },
		});

		this.process.stdout?.setEncoding('utf-8');
		this.process.stdout?.on('data', (data: string) => {
			this.buffer += data;
			this.processBuffer();
		});

		this.process.stderr?.on('data', (data: Buffer) => {
			logger.warn(`[MCP stderr] ${data.toString().trimEnd()}`);
		});

		this.process.on('close', (code: number | null) => {
			logger.info(`MCP server process exited with code ${code}`);
			this.handleProcessClose();
		});

		this.process.on('error', (error: Error) => {
			logger.error(`MCP server process error: ${error.message}`);
			this.emit('error', error);
			this.handleProcessClose();
		});
	}

	private async initialize(): Promise<void> {
		await this.send('initialize', {
			protocolVersion: '2024-11-05',
			capabilities: {},
			clientInfo: { name: 'open-browser', version: '0.1.0' },
		});

		// Send initialized notification (no id, no response expected)
		this.sendNotification('notifications/initialized');
	}

	// ── State management ──

	private setState(newState: MCPConnectionState): void {
		const previousState = this._state;
		if (previousState === newState) return;

		this._state = newState;
		logger.debug(`Connection state: ${previousState} -> ${newState}`);
		this.emit('stateChange', newState, previousState);
	}

	// ── Reconnection ──

	private handleProcessClose(): void {
		const wasPreviouslyConnected = this._state === 'connected';

		// Reject all pending requests
		for (const [id, pending] of this.pendingRequests) {
			clearTimeout(pending.timer);
			pending.reject(new Error('MCP server disconnected'));
		}
		this.pendingRequests.clear();
		this.process = null;
		this.buffer = '';

		if (wasPreviouslyConnected) {
			this.attemptReconnect();
		} else {
			this.setState('disconnected');
		}
	}

	private attemptReconnect(): void {
		if (this.reconnectAttempts >= this.maxReconnectAttempts) {
			logger.error(`Max reconnection attempts (${this.maxReconnectAttempts}) reached`);
			this.setState('disconnected');
			this.emit('error', new Error('MCP server reconnection failed after all attempts'));
			return;
		}

		this.setState('reconnecting');
		this.reconnectAttempts++;

		const delay = this.reconnectDelayMs * 2 ** (this.reconnectAttempts - 1);
		logger.info(
			`Reconnecting in ${delay}ms (attempt ${this.reconnectAttempts}/${this.maxReconnectAttempts})`,
		);

		this.reconnectTimer = setTimeout(async () => {
			this.reconnectTimer = null;
			try {
				await this.spawnProcess();
				await this.initialize();
				this.setState('connected');
				this.reconnectAttempts = 0;

				// Invalidate tool cache on reconnect -- server may have changed
				this.invalidateToolCache();
				await this.listTools();

				this.startHealthChecks();
				logger.info('Reconnected to MCP server');
			} catch (error) {
				logger.warn(
					`Reconnect attempt ${this.reconnectAttempts} failed: ${
						error instanceof Error ? error.message : String(error)
					}`,
				);
				this.attemptReconnect();
			}
		}, delay);
	}

	// ── Tool caching ──

	async listTools(): Promise<MCPTool[]> {
		if (this.cachedTools) {
			return this.cachedTools;
		}

		const result = (await this.send('tools/list', {})) as { tools: MCPTool[] };
		this.cachedTools = result.tools ?? [];
		this.toolsCacheTimestamp = Date.now();

		logger.debug(`Cached ${this.cachedTools.length} tools from MCP server`);
		return this.cachedTools;
	}

	/** Get cached tools synchronously. Returns empty array if cache is cold. */
	getTools(): MCPTool[] {
		return this.cachedTools ?? [];
	}

	/** Force-invalidate the tool cache. Next listTools() call will re-fetch. */
	invalidateToolCache(): void {
		this.cachedTools = null;
		this.toolsCacheTimestamp = 0;
	}

	/** Returns when the tool cache was last populated (epoch ms), or 0 if empty. */
	get toolsCacheAge(): number {
		return this.toolsCacheTimestamp > 0 ? Date.now() - this.toolsCacheTimestamp : 0;
	}

	// ── Tool invocation ──

	toCustomActions(): CustomCommandSpec[] {
		const { z } = require('zod');
		const tools = this.getTools();

		return tools.map((tool) => ({
			name: `mcp_${tool.name}`,
			description: `[MCP] ${tool.description}`,
			schema: z.object({}),
			handler: async (params: Record<string, unknown>) => {
				const result = await this.callTool(tool.name, params);
				return {
					success: true,
					extractedContent: typeof result === 'string' ? result : JSON.stringify(result),
				};
			},
		}));
	}

	async callTool(name: string, args: Record<string, unknown>): Promise<unknown> {
		const result = (await this.send('tools/call', { name, arguments: args })) as {
			content: Array<{ type: string; text?: string }>;
			isError?: boolean;
		};

		if (result.isError) {
			const errorText = result.content?.find((c) => c.type === 'text')?.text;
			throw new Error(errorText ?? 'MCP tool call failed');
		}

		const textContent = result.content?.find((c) => c.type === 'text');
		return textContent?.text ?? result;
	}

	// ── Health check ──

	/** Send a ping to verify the server is responsive. Rejects if no pong within timeout. */
	async ping(): Promise<void> {
		await this.send('ping', {});
	}

	private startHealthChecks(): void {
		this.stopHealthChecks();

		if (this.healthCheckIntervalMs <= 0) return;

		this.healthCheckTimer = setInterval(async () => {
			try {
				await this.ping();
			} catch {
				logger.warn('Health check failed');
			}
		}, this.healthCheckIntervalMs);
	}

	private stopHealthChecks(): void {
		if (this.healthCheckTimer) {
			clearInterval(this.healthCheckTimer);
			this.healthCheckTimer = null;
		}
	}

	// ── JSON-RPC transport ──

	private send(method: string, params?: Record<string, unknown>): Promise<unknown> {
		if (!this.process?.stdin?.writable) {
			return Promise.reject(new Error('MCP client is not connected'));
		}

		const id = ++this.requestId;

		return new Promise((resolve, reject) => {
			// Per-call timeout
			const timer = setTimeout(() => {
				this.pendingRequests.delete(id);
				reject(new Error(`MCP request timed out after ${this.requestTimeoutMs}ms: ${method}`));
			}, this.requestTimeoutMs);

			this.pendingRequests.set(id, { resolve, reject, timer, method });

			const request = JSON.stringify({
				jsonrpc: '2.0',
				id,
				method,
				params,
			});

			this.process?.stdin?.write(`${request}\n`);
		});
	}

	/** Send a JSON-RPC notification (no id, no response expected). */
	private sendNotification(method: string, params?: Record<string, unknown>): void {
		if (!this.process?.stdin?.writable) return;

		const notification = JSON.stringify({
			jsonrpc: '2.0',
			method,
			...(params ? { params } : {}),
		});

		this.process.stdin.write(`${notification}\n`);
	}

	private processBuffer(): void {
		const lines = this.buffer.split('\n');
		this.buffer = lines.pop() ?? '';

		for (const line of lines) {
			if (!line.trim()) continue;
			try {
				const message = JSON.parse(line);

				// JSON-RPC notification from server (no id field)
				if (message.id === undefined || message.id === null) {
					this.handleServerNotification(message);
					continue;
				}

				// Response to a pending request
				const pending = this.pendingRequests.get(message.id);
				if (pending) {
					clearTimeout(pending.timer);
					this.pendingRequests.delete(message.id);
					if (message.error) {
						pending.reject(new Error(message.error.message));
					} else {
						pending.resolve(message.result);
					}
				}
			} catch {
				// Ignore malformed responses
			}
		}
	}

	private handleServerNotification(message: {
		method: string;
		params?: Record<string, unknown>;
	}): void {
		logger.debug(`Server notification: ${message.method}`);
		this.emit('notification', message.method, message.params);

		// If server signals tool list changed, invalidate cache
		if (message.method === 'notifications/tools/list_changed') {
			this.invalidateToolCache();
		}
	}

	// ── Graceful shutdown ──

	/**
	 * Disconnect gracefully: wait for pending requests to drain (up to a timeout),
	 * then kill the server process.
	 */
	async disconnect(drainTimeoutMs = 5000): Promise<void> {
		this.stopHealthChecks();

		if (this.reconnectTimer) {
			clearTimeout(this.reconnectTimer);
			this.reconnectTimer = null;
		}

		// Wait for pending requests to drain
		if (this.pendingRequests.size > 0) {
			logger.debug(
				`Waiting for ${this.pendingRequests.size} pending request(s) to drain...`,
			);

			await Promise.race([
				this.waitForPendingDrain(),
				new Promise<void>((resolve) => setTimeout(resolve, drainTimeoutMs)),
			]);
		}

		// Reject any still-pending requests
		for (const [id, pending] of this.pendingRequests) {
			clearTimeout(pending.timer);
			pending.reject(new Error('MCP client shutting down'));
		}
		this.pendingRequests.clear();

		// Kill the process
		if (this.process) {
			this.process.removeAllListeners();
			this.process.kill();
			this.process = null;
		}

		this.buffer = '';
		this.setState('disconnected');
		logger.info('MCP client disconnected');
	}

	private waitForPendingDrain(): Promise<void> {
		return new Promise<void>((resolve) => {
			const check = () => {
				if (this.pendingRequests.size === 0) {
					resolve();
				} else {
					setTimeout(check, 50);
				}
			};
			check();
		});
	}

	/** Get the number of in-flight requests. */
	get pendingRequestCount(): number {
		return this.pendingRequests.size;
	}
}


================================================
FILE: packages/core/src/bridge/index.ts
================================================
export { BridgeServer, type BridgeServerOptions } from './server.js';
export { BridgeClient, type BridgeClientOptions } from './client.js';
export { BridgeAdapter } from './adapter.js';


================================================
FILE: packages/core/src/bridge/mcp-types.ts
================================================
/**
 * Experimental MCP (Model Context Protocol) server types.
 * @experimental
 */

export interface MCPServerOptions {
	port?: number;
	host?: string;
	capabilities?: MCPCapability[];
}

export type MCPCapability = 'browse' | 'extract' | 'screenshot' | 'interact';

export interface MCPRequest {
	method: string;
	params: Record<string, unknown>;
}

export interface MCPResponse {
	result?: unknown;
	error?: { code: number; message: string };
}


================================================
FILE: packages/core/src/bridge/server.test.ts
================================================
import { test, expect, describe, beforeEach, mock } from 'bun:test';
import { BridgeServer, type MCPRequest, type MCPResponse } from './server.js';
import { CommandExecutor } from '../commands/executor.js';

// ── Mock factories ──

function makeMockViewport() {
	return {
		currentPage: {
			goBack: mock(() => Promise.resolve()),
			evaluate: mock(() => Promise.resolve({})),
			mouse: { click: mock(() => Promise.resolve()) },
			keyboard: { press: mock(() => Promise.resolve()) },
		},
		cdp: {
			send: mock(() => Promise.resolve({})),
		},
		navigate: mock(() => Promise.resolve()),
		waitForPageReady: mock(() => Promise.resolve()),
		switchTab: mock(() => Promise.resolve()),
		newTab: mock(() => Promise.resolve()),
		closeTab: mock(() => Promise.resolve()),
		screenshot: mock(() =>
			Promise.resolve({ base64: 'abc123', width: 1280, height: 800 }),
		),
		isConnected: true,
		getState: mock(() =>
			Promise.resolve({
				url: 'https://example.com',
				title: 'Example',
				tabs: [{ url: 'https://example.com', title: 'Example' }],
			}),
		),
	} as any;
}

function makeMockPageAnalyzer() {
	return {
		extractState: mock(() =>
			Promise.resolve({
				tree: '<html>...</html>',
				selectorMap: {},
				elementCount: 5,
				interactiveElementCount: 2,
				scrollPosition: { x: 0, y: 0 },
				viewportSize: { width: 1280, height: 800 },
				documentSize: { width: 1280, height: 2000 },
				pixelsAbove: 0,
				pixelsBelow: 1200,
			}),
		),
		clickElementByIndex: mock(() => Promise.resolve()),
		inputTextByIndex: mock(() => Promise.resolve()),
		getElementSelector: mock(() => Promise.resolve('#el')),
	} as any;
}

function makeRequest(
	method: string,
	id: number | string = 1,
	params?: Record<string, unknown>,
): MCPRequest & { id: number | string } {
	return {
		jsonrpc: '2.0' as const,
		id,
		method,
		...(params ? { params } : {}),
	};
}

// ── Tests ──

describe('BridgeServer', () => {
	let server: BridgeServer;
	let browser: ReturnType<typeof makeMockViewport>;
	let domService: ReturnType<typeof makeMockPageAnalyzer>;
	let tools: CommandExecutor;

	beforeEach(() => {
		browser = makeMockViewport();
		domService = makeMockPageAnalyzer();
		tools = new CommandExecutor();

		server = new BridgeServer({
			browser,
			domService,
			tools,
			name: 'test-server',
			version: '1.0.0',
		});
	});

	describe('handleRequest: initialize', () => {
		test('returns server info and capabilities', async () => {
			const response = await server.handleRequest(makeRequest('initialize'));

			expect(response.jsonrpc).toBe('2.0');
			expect(response.id).toBe(1);
			expect(response.result).toBeDefined();

			const result = response.result as any;
			expect(result.protocolVersion).toBe('2024-11-05');
			expect(result.serverInfo.name).toBe('test-server');
			expect(result.serverInfo.version).toBe('1.0.0');
			expect(result.capabilities.tools).toBeDefined();
			expect(result.capabilities.resources).toBeDefined();
			expect(result.capabilities.resources.subscribe).toBe(true);
		});
	});

	describe('handleRequest: tools/list', () => {
		test('returns list of available tools', async () => {
			const response = await server.handleRequest(makeRequest('tools/list'));

			expect(response.result).toBeDefined();
			const result = response.result as any;
			expect(Array.isArray(result.tools)).toBe(true);
			expect(result.tools.length).toBeGreaterThan(0);

			// Each tool should have name, description, inputSchema
			const firstTool = result.tools[0];
			expect(firstTool.name).toBeDefined();
			expect(firstTool.description).toBeDefined();
			expect(firstTool.inputSchema).toBeDefined();

			// Tool names should be prefixed with browser_
			expect(firstTool.name.startsWith('browser_')).toBe(true);
		});
	});

	describe('handleRequest: tools/call', () => {
		test('executes a browser tool and returns result', async () => {
			const response = await server.handleRequest(
				makeRequest('tools/call', 1, {
					name: 'browser_tap',
					arguments: { index: 0 },
				}),
			);

			expect(response.result).toBeDefined();
			const result = response.result as any;
			expect(result.content).toBeDefined();
			expect(Array.isArray(result.content)).toBe(true);
			expect(result.content[0].type).toBe('text');
			expect(result.isError).toBe(false);
		});

		test('returns error for unknown tool', async () => {
			const response = await server.handleRequest(
				makeRequest('tools/call', 1, {
					name: 'unknown_tool',
					arguments: {},
				}),
			);

			expect(response.error).toBeDefined();
			expect(response.error!.code).toBe(-32602);
			expect(response.error!.message).toContain('Unknown tool');
		});

		test('returns error for tool that does not start with browser_', async () => {
			const response = await server.handleRequest(
				makeRequest('tools/call', 1, {
					name: 'not_browser_tool',
					arguments: {},
				}),
			);

			expect(response.error).toBeDefined();
			expect(response.error!.code).toBe(-32602);
		});

		test('returns success content for done action', async () => {
			const response = await server.handleRequest(
				makeRequest('tools/call', 1, {
					name: 'browser_finish',
					arguments: { text: 'All done' },
				}),
			);

			expect(response.result).toBeDefined();
			const result = response.result as any;
			expect(result.content[0].text).toContain('All done');
		});
	});

	describe('handleRequest: resources/list', () => {
		test('returns available resources', async () => {
			const response = await server.handleRequest(makeRequest('resources/list'));

			expect(response.result).toBeDefined();
			const result = response.result as any;
			expect(Array.isArray(result.resources)).toBe(true);

			const uris = result.resources.map((r: any) => r.uri);
			expect(uris).toContain('browser://state');
			expect(uris).toContain('browser://dom');
			expect(uris).toContain('browser://screenshot');
			expect(uris).toContain('browser://tabs');

			// Each resource should have standard fields
			for (const resource of result.resources) {
				expect(resource.name).toBeDefined();
				expect(resource.description).toBeDefined();
				expect(resource.mimeType).toBeDefined();
			}
		});
	});

	describe('handleRequest: resources/read', () => {
		test('reads browser://state resource', async () => {
			const response = await server.handleRequest(
				makeRequest('resources/read', 1, { uri: 'browser://state' }),
			);

			expect(response.result).toBeDefined();
			const result = response.result as any;
			expect(result.contents).toBeDefined();
			expect(result.contents[0].uri).toBe('browser://state');
			expect(result.contents[0].mimeType).toBe('application/json');
			expect(result.contents[0].text).toBeDefined();

			const state = JSON.parse(result.contents[0].text);
			expect(state.url).toBe('https://example.com');
		});

		test('reads browser://dom resource', async () => {
			const response = await server.handleRequest(
				makeRequest('resources/read', 1, { uri: 'browser://dom' }),
			);

			expect(response.result).toBeDefined();
			const result = response.result as any;
			expect(result.contents[0].uri).toBe('browser://dom');
			expect(result.contents[0].mimeType).toBe('text/plain');
			expect(result.contents[0].text).toContain('<html>');
		});

		test('reads browser://screenshot resource', async () => {
			const response = await server.handleRequest(
				makeRequest('resources/read', 1, { uri: 'browser://screenshot' }),
			);

			expect(response.result).toBeDefined();
			const result = response.result as any;
			expect(result.contents[0].uri).toBe('browser://screenshot');
			expect(result.contents[0].mimeType).toBe('image/png');
			expect(result.contents[0].blob).toBe('abc123');
		});

		test('reads browser://tabs resource', async () => {
			const response = await server.handleRequest(
				makeRequest('resources/read', 1, { uri: 'browser://tabs' }),
			);

			expect(response.result).toBeDefined();
			const result = response.result as any;
			expect(result.contents[0].uri).toBe('browser://tabs');
			const tabs = JSON.parse(result.contents[0].text);
			expect(Array.isArray(tabs)).toBe(true);
		});

		test('returns error for unknown resource URI', async () => {
			const response = await server.handleRequest(
				makeRequest('resources/read', 1, { uri: 'browser://nonexistent' }),
			);

			expect(response.error).toBeDefined();
			expect(response.error!.message).toContain('Unknown resource URI');
		});

		test('returns error when uri parameter is missing', async () => {
			const response = await server.handleRequest(
				makeRequest('resources/read', 1, {}),
			);

			expect(response.error).toBeDefined();
			expect(response.error!.message).toContain('Missing required parameter');
		});
	});

	describe('handleRequest: unknown method', () => {
		test('returns method not found error', async () => {
			const response = await server.handleRequest(
				makeRequest('unknown/method'),
			);

			expect(response.error).toBeDefined();
			expect(response.error!.code).toBe(-32601);
			expect(response.error!.message).toContain('Method not found');
		});
	});

	describe('handleRequest: ping', () => {
		test('responds to ping', async () => {
			const response = await server.handleRequest(makeRequest('ping'));

			expect(response.jsonrpc).toBe('2.0');
			expect(response.result).toEqual({});
		});
	});

	describe('handleRequest: resources/subscribe', () => {
		test('subscribes to a valid resource', async () => {
			const response = await server.handleRequest(
				makeRequest('resources/subscribe', 1, { uri: 'browser://state' }),
			);

			expect(response.result).toEqual({});
			expect(response.error).toBeUndefined();
		});

		test('returns error for unknown resource URI', async () => {
			const response = await server.handleRequest(
				makeRequest('resources/subscribe', 1, { uri: 'browser://invalid' }),
			);

			expect(response.error).toBeDefined();
			expect(response.error!.message).toContain('Unknown resource URI');
		});

		test('returns error when uri is missing', async () => {
			const response = await server.handleRequest(
				makeRequest('resources/subscribe', 1, {}),
			);

			expect(response.error).toBeDefined();
		});
	});

	describe('handleRequest: resources/unsubscribe', () => {
		test('unsubscribes from a resource', async () => {
			// First subscribe
			await server.handleRequest(
				makeRequest('resources/subscribe', 1, { uri: 'browser://state' }),
			);

			// Then unsubscribe
			const response = await server.handleRequest(
				makeRequest('resources/unsubscribe', 2, { uri: 'browser://state' }),
			);

			expect(response.result).toEqual({});
		});

		test('returns error when uri is missing', async () => {
			const response = await server.handleRequest(
				makeRequest('resources/unsubscribe', 1, {}),
			);

			expect(response.error).toBeDefined();
		});
	});

	describe('error handling', () => {
		test('returns error response for synchronously thrown errors', async () => {
			// Test with a method that will cause a synchronous error in the handler
			// The try/catch in handleRequest catches synchronous errors from switch cases
			const response = await server.handleRequest(
				makeRequest('resources/read', 1, { uri: 'browser://nonexistent' }),
			);

			expect(response.jsonrpc).toBe('2.0');
			expect(response.error).toBeDefined();
			expect(response.error!.message).toContain('Unknown resource URI');
		});

		test('returns error for tools/call when execution fails', async () => {
			// Modify the domService to throw on clickElementByIndex
			domService.clickElementByIndex = mock(() =>
				Promise.reject(new Error('Unexpected crash')),
			);

			const failServer = new BridgeServer({
				browser,
				domService,
				tools,
			});

			// CommandFailedError propagates from registry.execute through
			// handleToolsCall. Since handleRequest returns (not awaits) the
			// promise from handleToolsCall, the error may propagate as a
			// rejection. We handle both cases.
			try {
				const response = await failServer.handleRequest(
					makeRequest('tools/call', 1, {
						name: 'browser_tap',
						arguments: { index: 0 },
					}),
				);

				// If it returns a response, it should have an error field
				expect(response.jsonrpc).toBe('2.0');
				const hasError = response.error !== undefined;
				const hasIsError = (response.result as any)?.isError === true;
				expect(hasError || hasIsError).toBe(true);
			} catch (error) {
				// If the error propagates as a rejection, that is acceptable too
				expect(error).toBeDefined();
			}
		});
	});

	describe('handleMessage (with notifications)', () => {
		test('returns null for notification (no id)', async () => {
			const notification: MCPRequest = {
				jsonrpc: '2.0',
				method: 'notifications/initialized',
			};

			const response = await server.handleMessage(notification);
			expect(response).toBeNull();
		});

		test('returns response for request (with id)', async () => {
			const request: MCPRequest = {
				jsonrpc: '2.0',
				id: 1,
				method: 'ping',
			};

			const response = await server.handleMessage(request);
			expect(response).not.toBeNull();
			expect(response!.result).toEqual({});
		});
	});
});


================================================
FILE: packages/core/src/bridge/server.ts
================================================
import type { IncomingMessage, ServerResponse } from 'node:http';
import type { Viewport } from '../viewport/viewport.js';
import type { PageAnalyzer } from '../page/page-analyzer.js';
import type { CommandExecutor } from '../commands/executor.js';
import type { ExecutionContext } from '../commands/types.js';
import { BridgeAdapter, type MCPToolDefinition } from './adapter.js';
import { createLogger } from '../logging.js';

const logger = createLogger('mcp-server');

// ── JSON-RPC types ──

export interface BridgeServerOptions {
	browser: Viewport;
	domService: PageAnalyzer;
	tools: CommandExecutor;
	name?: string;
	version?: string;
	/** Port for SSE transport (default: 3100) */
	ssePort?: number;
}

export interface MCPRequest {
	jsonrpc: '2.0';
	id?: string | number;
	method: string;
	params?: Record<string, unknown>;
}

export interface MCPResponse {
	jsonrpc: '2.0';
	id: string | number;
	result?: unknown;
	error?: { code: number; message: string; data?: unknown };
}

export interface MCPNotification {
	jsonrpc: '2.0';
	method: string;
	params?: Record<string, unknown>;
}

// ── Resource types ──

export interface MCPResource {
	uri: string;
	name: string;
	description: string;
	mimeType: string;
}

export interface MCPResourceContent {
	uri: string;
	mimeType: string;
	text?: string;
	blob?: string;
}

// ── Subscription tracking ──

interface ResourceSubscription {
	uri: string;
	/** Callback that receives the notification to send to the client */
	notify: (notification: MCPNotification) => void;
}

/**
 * MCP (Model Context Protocol) server that exposes browser actions as tools
 * and browser state as resources. Supports stdio and SSE transports.
 *
 * Implements:
 * - initialize / tools/list / tools/call (existing)
 * - resources/list / resources/read (browser state as resources)
 * - resources/subscribe / resources/unsubscribe (live updates)
 * - notifications/progress (step progress notifications)
 * - SSE transport via HTTP
 */
export class BridgeServer {
	private controller: BridgeAdapter;
	private browser: Viewport;
	private domService: PageAnalyzer;
	private tools: CommandExecutor;
	private name: string;
	private version: string;
	private ssePort: number;

	/** Active SSE connections that receive notifications */
	private sseClients = new Set<ServerResponse>();

	/** Resource subscriptions keyed by URI */
	private subscriptions = new Map<string, Set<ResourceSubscription>>();

	/** Last screenshot base64 cache for resource reads */
	private lastScreenshotBase64: string | null = null;

	/** HTTP server reference for SSE transport */
	private httpServer: import('node:http').Server | null = null;

	constructor(options: BridgeServerOptions) {
		this.browser = options.browser;
		this.domService = options.domService;
		this.tools = options.tools;
		this.controller = new BridgeAdapter(options.tools);
		this.name = options.name ?? 'open-browser';
		this.version = options.version ?? '0.1.0';
		this.ssePort = options.ssePort ?? 3100;
	}

	// ── Static resource definitions ──

	private getResourceDefinitions(): MCPResource[] {
		return [
			{
				uri: 'browser://state',
				name: 'Browser State',
				description: 'Current browser state summary including URL, title, and active tab',
				mimeType: 'application/json',
			},
			{
				uri: 'browser://dom',
				name: 'DOM Tree',
				description: 'Current page DOM tree serialized for LLM consumption',
				mimeType: 'text/plain',
			},
			{
				uri: 'browser://screenshot',
				name: 'Screenshot',
				description: 'Last screenshot of the current page as base64 PNG',
				mimeType: 'image/png',
			},
			{
				uri: 'browser://tabs',
				name: 'Open Tabs',
				description: 'List of all open browser tabs with URLs and titles',
				mimeType: 'application/json',
			},
		];
	}

	// ── Request dispatcher ──

	async handleMessage(message: MCPRequest): Promise<MCPResponse | null> {
		// JSON-RPC notifications have no `id` field -- they are fire-and-forget
		if (message.id === undefined || message.id === null) {
			await this.handleNotification(message);
			return null;
		}

		return this.handleRequest(message as MCPRequest & { id: string | number });
	}

	async handleRequest(request: MCPRequest & { id: string | number }): Promise<MCPResponse> {
		try {
			switch (request.method) {
				case 'initialize':
					return this.handleInitialize(request);
				case 'tools/list':
					return this.handleToolsList(request);
				case 'tools/call':
					return this.handleToolsCall(request);
				case 'resources/list':
					return this.handleResourcesList(request);
				case 'resources/read':
					return this.handleResourcesRead(request);
				case 'resources/subscribe':
					return this.handleResourcesSubscribe(request);
				case 'resources/unsubscribe':
					return this.handleResourcesUnsubscribe(request);
				case 'ping':
					return { jsonrpc: '2.0', id: request.id, result: {} };
				default:
					return {
						jsonrpc: '2.0',
						id: request.id,
						error: { code: -32601, message: `Method not found: ${request.method}` },
					};
			}
		} catch (error) {
			return {
				jsonrpc: '2.0',
				id: request.id,
				error: {
					code: -32603,
					message: error instanceof Error ? error.message : String(error),
				},
			};
		}
	}

	/** Handle incoming JSON-RPC notifications (no response expected). */
	private async handleNotification(message: MCPRequest): Promise<void> {
		switch (message.method) {
			case 'notifications/initialized':
				logger.debug('Client confirmed initialization');
				break;
			case 'notifications/cancelled': {
				const requestId = message.params?.requestId;
				logger.debug(`Client cancelled request ${requestId}`);
				break;
			}
			default:
				logger.debug(`Received unknown notification: ${message.method}`);
		}
	}

	// ── Protocol handlers ──

	private handleInitialize(request: MCPRequest & { id: string | number }): MCPResponse {
		return {
			jsonrpc: '2.0',
			id: request.id,
			result: {
				protocolVersion: '2024-11-05',
				capabilities: {
					tools: {},
					resources: {
						subscribe: true,
						listChanged: true,
					},
				},
				serverInfo: {
					name: this.name,
					version: this.version,
				},
			},
		};
	}

	private handleToolsList(request: MCPRequest & { id: string | number }): MCPResponse {
		const tools = this.controller.getToolDefinitions();
		return {
			jsonrpc: '2.0',
			id: request.id,
			result: {
				tools: tools.map((t) => ({
					name: t.name,
					description: t.description,
					inputSchema: t.inputSchema,
				})),
			},
		};
	}

	private async handleToolsCall(request: MCPRequest & { id: string | number }): Promise<MCPResponse> {
		const params = request.params ?? {};
		const toolName = params.name as string;
		const args = (params.arguments ?? {}) as Record<string, unknown>;

		const actionName = this.controller.parseToolName(toolName);
		if (!actionName) {
			return {
				jsonrpc: '2.0',
				id: request.id,
				error: { code: -32602, message: `Unknown tool: ${toolName}` },
			};
		}

		// Emit progress notification at start
		this.emitProgress(request.id, 0, `Executing ${toolName}...`);

		const context: ExecutionContext = {
			page: this.browser.currentPage,
			cdpSession: this.browser.cdp!,
			domService: this.domService,
			browserSession: this.browser,
		};

		const result = await this.tools.registry.execute(actionName, args, context);

		// Emit progress notification at completion
		this.emitProgress(request.id, 1, 'Complete');

		// Notify subscribers that browser state may have changed
		this.notifyResourceChanged('browser://state');
		this.notifyResourceChanged('browser://dom');

		return {
			jsonrpc: '2.0',
			id: request.id,
			result: {
				content: [
					{
						type: 'text',
						text: result.extractedContent ?? (result.success ? 'Success' : `Error: ${result.error}`),
					},
				],
				isError: !result.success,
			},
		};
	}

	// ── Resource handlers ──

	private handleResourcesList(request: MCPRequest & { id: string | number }): MCPResponse {
		return {
			jsonrpc: '2.0',
			id: request.id,
			result: {
				resources: this.getResourceDefinitions(),
			},
		};
	}

	private async handleResourcesRead(request: MCPRequest & { id: string | number }): Promise<MCPResponse> {
		const uri = request.params?.uri as string;
		if (!uri) {
			return {
				jsonrpc: '2.0',
				id: request.id,
				error: { code: -32602, message: 'Missing required parameter: uri' },
			};
		}

		try {
			const content = await this.readResource(uri);
			return {
				jsonrpc: '2.0',
				id: request.id,
				result: {
					contents: [content],
				},
			};
		} catch (error) {
			return {
				jsonrpc: '2.0',
				id: request.id,
				error: {
					code: -32602,
					message: error instanceof Error ? error.message : String(error),
				},
			};
		}
	}

	private async readResource(uri: string): Promise<MCPResourceContent> {
		switch (uri) {
			case 'browser://state': {
				const state = await this.browser.getState();
				return {
					uri,
					mimeType: 'application/json',
					text: JSON.stringify(state, null, 2),
				};
			}
			case 'browser://dom': {
				const domState = await this.domService.extractState(
					this.browser.currentPage,
					this.browser.cdp!,
				);
				return {
					uri,
					mimeType: 'text/plain',
					text: domState.tree,
				};
			}
			case 'browser://screenshot': {
				const screenshot = await this.browser.screenshot();
				this.lastScreenshotBase64 = screenshot.base64;
				return {
					uri,
					mimeType: 'image/png',
					blob: screenshot.base64,
				};
			}
			case 'browser://tabs': {
				const state = await this.browser.getState();
				return {
					uri,
					mimeType: 'application/json',
					text: JSON.stringify(state.tabs, null, 2),
				};
			}
			default:
				throw new Error(`Unknown resource URI: ${uri}`);
		}
	}

	private handleResourcesSubscribe(request: MCPRequest & { id: string | number }): MCPResponse {
		const uri = request.params?.uri as string;
		if (!uri) {
			return {
				jsonrpc: '2.0',
				id: request.id,
				error: { code: -32602, message: 'Missing required parameter: uri' },
			};
		}

		const validUris = new Set(this.getResourceDefinitions().map((r) => r.uri));
		if (!validUris.has(uri)) {
			return {
				jsonrpc: '2.0',
				id: request.id,
				error: { code: -32602, message: `Unknown resource URI: ${uri}` },
			};
		}

		// The subscription is tracked; actual notification delivery happens
		// via emitNotification which writes to all connected transports
		if (!this.subscriptions.has(uri)) {
			this.subscriptions.set(uri, new Set());
		}

		logger.debug(`Client subscribed to resource: ${uri}`);
		return { jsonrpc: '2.0', id: request.id, result: {} };
	}

	private handleResourcesUnsubscribe(request: MCPRequest & { id: string | number }): MCPResponse {
		const uri = request.params?.uri as string;
		if (!uri) {
			return {
				jsonrpc: '2.0',
				id: request.id,
				error: { code: -32602, message: 'Missing required parameter: uri' },
			};
		}

		this.subscriptions.delete(uri);
		logger.debug(`Client unsubscribed from resource: ${uri}`);
		return { jsonrpc: '2.0', id: request.id, result: {} };
	}

	// ── Notification emission ──

	/** Emit a progress notification for an in-flight request. */
	emitProgress(requestId: string | number, progress: number, message?: string): void {
		const notification: MCPNotification = {
			jsonrpc: '2.0',
			method: 'notifications/progress',
			params: {
				progressToken: requestId,
				progress,
				total: 1,
				...(message ? { message } : {}),
			},
		};
		this.broadcastNotification(notification);
	}

	/** Notify subscribers that a resource has changed. */
	private notifyResourceChanged(uri: string): void {
		if (!this.subscriptions.has(uri)) return;

		const notification: MCPNotification = {
			jsonrpc: '2.0',
			method: 'notifications/resources/updated',
			params: { uri },
		};
		this.broadcastNotification(notification);
	}

	/** Send a notification to all connected transports (SSE clients + stdio). */
	private broadcastNotification(notification: MCPNotification): void {
		const serialized = JSON.stringify(notification);

		// SSE clients
		for (const client of this.sseClients) {
			try {
				client.write(`data: ${serialized}\n\n`);
			} catch {
				// Client may have disconnected; will be cleaned up
				this.sseClients.delete(client);
			}
		}
	}

	// ── Stdio transport ──

	async startStdio(): Promise<void> {
		const stdin = process.stdin;
		const stdout = process.stdout;

		stdin.setEncoding('utf-8');

		let buffer = '';

		stdin.on('data', async (data: string) => {
			buffer += data;

			const lines = buffer.split('\n');
			buffer = lines.pop() ?? '';

			for (const line of lines) {
				if (!line.trim()) continue;

				try {
					const message = JSON.parse(line) as MCPRequest;
					const response = await this.handleMessage(message);
					if (response) {
						stdout.write(`${JSON.stringify(response)}\n`);
					}
				} catch {
					const errorResponse: MCPResponse = {
						jsonrpc: '2.0',
						id: 0,
						error: { code: -32700, message: 'Parse error' },
					};
					stdout.write(`${JSON.stringify(errorResponse)}\n`);
				}
			}
		});

		stdin.on('end', () => {
			process.exit(0);
		});
	}

	// ── SSE transport ──

	/**
	 * Start an HTTP server that exposes the MCP protocol over Server-Sent Events.
	 *
	 * Endpoints:
	 * - GET  /sse       -- SSE event stream for notifications and responses
	 * - POST /message   -- Send JSON-RPC requests
	 * - GET  /health    -- Health check
	 */
	async startSSE(port?: number): Promise<void> {
		const http = await import('node:http');
		const listenPort = port ?? this.ssePort;

		this.httpServer = http.createServer(async (req: IncomingMessage, res: ServerResponse) => {
			// CORS headers for browser clients
			res.setHeader('Access-Control-Allow-Origin', '*');
			res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
			res.setHeader('Access-Control-Allow-Headers', 'Content-Type');

			if (req.method === 'OPTIONS') {
				res.writeHead(204);
				res.end();
				return;
			}

			const url = req.url ?? '/';

			if (req.method === 'GET' && url === '/sse') {
				this.handleSSEConnection(res);
				return;
			}

			if (req.method === 'POST' && url === '/message') {
				await this.handleSSEMessage(req, res);
				return;
			}

			if (req.method === 'GET' && url === '/health') {
				res.writeHead(200, { 'Content-Type': 'application/json' });
				res.end(JSON.stringify({
					status: 'ok',
					server: this.name,
					version: this.version,
					browserConnected: this.browser.isConnected,
				}));
				return;
			}

			res.writeHead(404, { 'Content-Type': 'application/json' });
			res.end(JSON.stringify({ error: 'Not found' }));
		});

		return new Promise<void>((resolve) => {
			this.httpServer!.listen(listenPort, () => {
				logger.info(`MCP SSE server listening on port ${listenPort}`);
				resolve();
			});
		});
	}

	private handleSSEConnection(res: ServerResponse): void {
		res.writeHead(200, {
			'Content-Type': 'text/event-stream',
			'Cache-Control': 'no-cache',
			Connection: 'keep-alive',
		});

		// Send endpoint info as the first event so the client knows where to POST
		const endpointEvent = JSON.stringify({ endpoint: '/message' });
		res.write(`event: endpoint\ndata: ${endpointEvent}\n\n`);

		this.sseClients.add(res);
		logger.debug(`SSE client connected (total: ${this.sseClients.size})`);

		res.on('close', () => {
			this.sseClients.delete(res);
			logger.debug(`SSE client disconnected (total: ${this.sseClients.size})`);
		});
	}

	private async handleSSEMessage(req: IncomingMessage, res: ServerResponse): Promise<void> {
		let body = '';

		for await (const chunk of req) {
			body += chunk;
		}

		try {
			const message = JSON.parse(body) as MCPRequest;
			const response = await this.handleMessage(message);

			if (response) {
				// Send response both as HTTP response and as SSE event
				res.writeHead(200, { 'Content-Type': 'application/json' });
				res.end(JSON.stringify(response));

				// Also push to SSE stream for clients that expect it there
				const serialized = JSON.stringify(response);
				for (const client of this.sseClients) {
					try {
						client.write(`event: message\ndata: ${serialized}\n\n`);
					} catch {
						this.sseClients.delete(client);
					}
				}
			} else {
				// Notification -- no response body
				res.writeHead(202);
				res.end();
			}
		} catch {
			res.writeHead(400, { 'Content-Type': 'application/json' });
			res.end(JSON.stringify({ jsonrpc: '2.0', id: 0, error: { code: -32700, message: 'Parse error' } }));
		}
	}

	/** Stop the SSE HTTP server and disconnect all clients. */
	async stopSSE(): Promise<void> {
		for (const client of this.sseClients) {
			try {
				client.end();
			} catch {
				// Ignore
			}
		}
		this.sseClients.clear();

		if (this.httpServer) {
			return new Promise<void>((resolve) => {
				this.httpServer!.close(() => {
					this.httpServer = null;
					logger.info('MCP SSE server stopped');
					resolve();
				});
			});
		}
	}

	/** Stop all transports and clean up. */
	async stop(): Promise<void> {
		await this.stopSSE();
		this.subscriptions.clear();
	}
}


================================================
FILE: packages/core/src/commands/catalog/catalog.ts
================================================
import { z, type ZodTypeAny } from 'zod';
import type { CatalogEntry, CatalogOptions } from './types.js';
import type { CommandResult, ExecutionContext, CustomCommandSpec } from '../types.js';
import { CommandFailedError } from '../../errors.js';
import { escapeRegExp } from '../../utils.js';

// ── Special parameter names ──
// These parameter names, when found in a handler's function signature,
// are automatically injected from the ExecutionContext instead of from
// the action's validated params.

const SPECIAL_PARAMS = new Set([
	'browserSession',
	'cdpSession',
	'page',
	'domService',
	'extractionLlm',
	'fileSystem',
	'maskedValues',
]);

/**
 * Parse the parameter names from a function's source text.
 * Handles arrow functions, regular functions, destructured params, etc.
 */
function inspectHandlerParams(handler: Function): string[] {
	const source = handler.toString();

	// Match parameter list: function(a, b) / (a, b) => / async (a, b) =>
	// Also handles single param without parens: a =>
	const arrowMatch = source.match(/^(?:async\s+)?\(([^)]*)\)/);
	const funcMatch = source.match(/^(?:async\s+)?function\s*\w*\s*\(([^)]*)\)/);
	const singleParamArrow = source.match(/^(?:async\s+)?(\w+)\s*=>/);

	let paramString: string | undefined;
	if (arrowMatch) {
		paramString = arrowMatch[1];
	} else if (funcMatch) {
		paramString = funcMatch[1];
	} else if (singleParamArrow) {
		return [singleParamArrow[1]];
	}

	if (!paramString || !paramString.trim()) {
		return [];
	}

	// Split on commas, handling nested braces/brackets for destructuring
	const params: string[] = [];
	let depth = 0;
	let current = '';

	for (const char of paramString) {
		if (char === '{' || char === '[' || char === '(') {
			depth++;
			current += char;
		} else if (char === '}' || char === ']' || char === ')') {
			depth--;
			current += char;
		} else if (char === ',' && depth === 0) {
			params.push(current.trim());
			current = '';
		} else {
			current += char;
		}
	}
	if (current.trim()) {
		params.push(current.trim());
	}

	// Clean up: remove type annotations, defaults, destructuring
	return params.map((p) => {
		// Remove default values: param = defaultVal
		const withoutDefault = p.split('=')[0].trim();
		// Remove type annotations: param: Type
		const withoutType = withoutDefault.split(':')[0].trim();
		// If it's a destructured param like { a, b }, keep the braces stripped name
		// For our purposes we only care about top-level named params
		return withoutType.replace(/^[{[(]|[})\]]$/g, '').trim();
	});
}

/**
 * Detect which special parameters a handler function expects,
 * based on its parameter names (beyond the standard params + context args).
 */
function detectSpecialParams(handler: Function): Set<string> {
	const paramNames = inspectHandlerParams(handler);
	const detected = new Set<string>();
	for (const name of paramNames) {
		if (SPECIAL_PARAMS.has(name)) {
			detected.add(name);
		}
	}
	return detected;
}

/**
 * Resolve a special parameter value from the ExecutionContext.
 */
function resolveSpecialParam(
	name: string,
	context: ExecutionContext,
): unknown {
	switch (name) {
		case 'browserSession':
			return context.browserSession;
		case 'cdpSession':
			return context.cdpSession;
		case 'page':
			return context.page;
		case 'domService':
			return context.domService;
		case 'extractionLlm':
			return context.extractionLlm;
		case 'fileSystem':
			return context.fileSystem;
		case 'maskedValues':
			return context.maskedValues;
		default:
			return undefined;
	}
}

export class CommandCatalog {
	private actions = new Map<string, CatalogEntry>();
	private specialParamsCache = new Map<string, Set<string>>();
	private options: CatalogOptions;

	constructor(options?: CatalogOptions) {
		this.options = options ?? {};
	}

	register(action: CatalogEntry): void {
		if (this.options.excludeActions?.includes(action.name)) return;
		if (
			this.options.includeActions &&
			this.options.includeActions.length > 0 &&
			!this.options.includeActions.includes(action.name)
		) {
			return;
		}

		this.actions.set(action.name, action);

		// Pre-compute which special parameters the handler expects
		const specialParams = detectSpecialParams(action.handler);
		if (specialParams.size > 0) {
			this.specialParamsCache.set(action.name, specialParams);
		}
	}

	registerCustom(definition: CustomCommandSpec): void {
		this.register({
			name: definition.name,
			description: definition.description,
			schema: definition.schema,
			handler: definition.handler,
			terminatesSequence: definition.terminatesSequence,
		});
	}

	unregister(name: string): void {
		this.actions.delete(name);
		this.specialParamsCache.delete(name);
	}

	get(name: string): CatalogEntry | undefined {
		return this.actions.get(name);
	}

	has(name: string): boolean {
		return this.actions.has(name);
	}

	getAll(): CatalogEntry[] {
		return [...this.actions.values()];
	}

	getNames(): string[] {
		return [...this.actions.keys()];
	}

	async execute(
		name: string,
		params: Record<string, unknown>,
		context: ExecutionContext,
	): Promise<CommandResult> {
		const action = this.actions.get(name);
		if (!action) {
			throw new CommandFailedError(name, `Action "${name}" is not registered`);
		}

		try {
			// Validate params against schema
			const validated = action.schema.parse(params);

			// Inject special parameters from context into the validated params
			const enriched = this.injectSpecialParams(name, validated, context);

			return await action.handler(enriched, context);
		} catch (error) {
			if (error instanceof CommandFailedError) throw error;

			const message = error instanceof Error ? error.message : String(error);
			throw new CommandFailedError(name, message, {
				cause: error instanceof Error ? error : undefined,
			});
		}
	}

	/**
	 * Return the set of special parameter names detected for a given action.
	 * Returns an empty set if no special params were detected.
	 */
	getSpecialParams(name: string): Set<string> {
		return this.specialParamsCache.get(name) ?? new Set();
	}

	/**
	 * Inject special parameters from ExecutionContext into the params object.
	 * Special params are resolved from context and merged into the params
	 * so the handler can destructure them directly from its first argument.
	 */
	private injectSpecialParams(
		actionName: string,
		params: Record<string, unknown>,
		context: ExecutionContext,
	): Record<string, unknown> {
		const specialParams = this.specialParamsCache.get(actionName);
		if (!specialParams || specialParams.size === 0) {
			return params;
		}

		const enriched = { ...params };
		for (const paramName of specialParams) {
			// Only inject if not already present in the validated params
			if (!(paramName in enriched)) {
				const value = resolveSpecialParam(paramName, context);
				if (value !== undefined) {
					enriched[paramName] = value;
				}
			}
		}
		return enriched;
	}

	buildDynamicSchema(): z.ZodType {
		const actionSchemas = this.getAll().map((action) => {
			if (action.schema instanceof z.ZodObject) {
				return action.schema.extend({
					action: z.literal(action.name),
				});
			}
			return action.schema;
		});

		if (actionSchemas.length === 0) {
			return z.object({ action: z.string() });
		}

		if (actionSchemas.length === 1) {
			return actionSchemas[0];
		}

		return z.union(actionSchemas as [ZodTypeAny, ZodTypeAny, ...ZodTypeAny[]]);
	}

	get size(): number {
		return this.actions.size;
	}

	// ── Prompt description ──

	/**
	 * Build a formatted multi-line description of all available actions.
	 * Optionally filter by page URL domain so only relevant actions appear.
	 */
	getPromptDescription(pageUrl?: string): string {
		let actions = this.getAll();

		// If a URL is provided, filter out actions whose domainFilter does not match
		if (pageUrl) {
			const domain = extractDomain(pageUrl);
			if (domain) {
				actions = actions.filter((a) => {
					// Actions without a domainFilter are always shown
					if (!a.domainFilter || a.domainFilter.length === 0) return true;
					return a.domainFilter.some(
						(pattern) =>
							domain === pattern ||
							domain.endsWith(`.${pattern}`),
					);
				});
			}
		}

		const lines: string[] = [];
		for (const action of actions) {
			const termFlag = action.terminatesSequence ? ' [terminates]' : '';
			lines.push(`- ${action.name}: ${action.description}${termFlag}`);

			// Describe the schema parameters
			if (action.schema instanceof z.ZodObject) {
				const shape = action.schema.shape as Record<string, ZodTypeAny>;
				for (const [key, zodType] of Object.entries(shape)) {
					if (key === 'action') continue;
					const desc = zodType.description ?? '';
					const isOptional = zodType.isOptional?.() ?? false;
					const optLabel = isOptional ? ' (optional)' : '';
					lines.push(`    ${key}${optLabel}: ${desc}`);
				}
			}
		}

		return lines.join('\n');
	}

	// ── Domain-based filtering ──

	/**
	 * Return actions that have a domainFilter matching the given domain,
	 * plus all actions that have no domainFilter (universal actions).
	 */
	getActionsForDomain(domain: string): CatalogEntry[] {
		const normalized = domain.replace(/^www\./, '').toLowerCase();

		return this.getAll().filter((action) => {
			if (!action.domainFilter || action.domainFilter.length === 0) return true;

			return action.domainFilter.some((pattern) => {
				const p = pattern.toLowerCase();
				return normalized === p || normalized.endsWith(`.${p}`);
			});
		});
	}

	// ── Sensitive data replacement ──

	/**
	 * Replace sensitive data values in text with `<key>` placeholders.
	 * Keys are sorted longest-value-first to avoid partial replacements.
	 */
	replaceSensitiveData(
		text: string,
		maskedValues: Record<string, string>,
	): string {
		if (!text) return text;

		// Sort entries by value length descending so longer values are replaced first
		const entries = Object.entries(maskedValues).sort(
			(a, b) => b[1].length - a[1].length,
		);

		let result = text;
		for (const [key, value] of entries) {
			if (!value) continue;
			const pattern = new RegExp(escapeRegExp(value), 'g');
			result = result.replace(pattern, `<${key}>`);
		}

		return result;
	}

	// ── Actions that terminate the sequence ──

	/**
	 * Return the names of all actions marked as terminatesSequence.
	 */
	getTerminatingActions(): string[] {
		return this.getAll()
			.filter((a) => a.terminatesSequence)
			.map((a) => a.name);
	}

	/**
	 * Check whether a given action name is marked as terminatesSequence.
	 */
	isTerminating(name: string): boolean {
		const action = this.actions.get(name);
		return action?.terminatesSequence === true;
	}
}

// ── Helpers ──

function extractDomain(url: string): string | null {
	try {
		return new URL(url).hostname.replace(/^www\./, '').toLowerCase();
	} catch {
		return null;
	}
}


================================================
FILE: packages/core/src/commands/catalog/types.ts
================================================
import type { z } from 'zod';
import type { CommandResult, ExecutionContext } from '../types.js';

export interface CatalogEntry {
	name: string;
	description: string;
	schema: z.ZodTypeAny;
	handler: (params: Record<string, unknown>, context: ExecutionContext) => Promise<CommandResult>;
	terminatesSequence?: boolean;
	domainFilter?: string[];
}

export interface CatalogOptions {
	excludeActions?: string[];
	includeActions?: string[];
}


================================================
FILE: packages/core/src/commands/catalog.test.ts
================================================
import { test, expect, describe, beforeEach, mock } from 'bun:test';
import { z } from 'zod';
import { CommandCatalog } from './catalog/catalog.js';
import { CommandFailedError } from '../errors.js';
import type { ExecutionContext, CommandResult } from './types.js';

// ── Helpers ──

function makeHandler(
	result: CommandResult = { success: true },
): (params: Record<string, unknown>, ctx: ExecutionContext) => Promise<CommandResult> {
	return mock(() => Promise.resolve(result));
}

function makeContext(overrides: Partial<ExecutionContext> = {}): ExecutionContext {
	return {
		page: {} as any,
		cdpSession: {} as any,
		domService: {} as any,
		browserSession: {} as any,
		...overrides,
	};
}

const testSchema = z.object({
	value: z.string(),
	count: z.number().optional(),
});

// ── Tests ──

describe('CommandCatalog', () => {
	let registry: CommandCatalog

	beforeEach(() => {
		registry = new CommandCatalog();
	});

	describe('register and unregister', () => {
		test('registers an action', () => {
			registry.register({
				name: 'test_action',
				description: 'A test action',
				schema: testSchema,
				handler: makeHandler(),
			});

			expect(registry.has('test_action')).toBe(true);
			expect(registry.size).toBe(1);
		});

		test('unregisters an action', () => {
			registry.register({
				name: 'test_action',
				description: 'A test action',
				schema: testSchema,
				handler: makeHandler(),
			});

			registry.unregister('test_action');
			expect(registry.has('test_action')).toBe(false);
			expect(registry.size).toBe(0);
		});

		test('get returns registered action', () => {
			registry.register({
				name: 'my_action',
				description: 'Mine',
				schema: testSchema,
				handler: makeHandler(),
			});

			const action = registry.get('my_action');
			expect(action).toBeDefined();
			expect(action!.name).toBe('my_action');
			expect(action!.description).toBe('Mine');
		});

		test('get returns undefined for unregistered action', () => {
			expect(registry.get('nonexistent')).toBeUndefined();
		});

		test('respects excludeActions option', () => {
			const filtered = new CommandCatalog({ excludeActions: ['blocked'] });

			filtered.register({
				name: 'blocked',
				description: 'Should not register',
				schema: testSchema,
				handler: makeHandler(),
			});

			filtered.register({
				name: 'allowed',
				description: 'Should register',
				schema: testSchema,
				handler: makeHandler(),
			});

			expect(filtered.has('blocked')).toBe(false);
			expect(filtered.has('allowed')).toBe(true);
		});

		test('respects includeActions option', () => {
			const filtered = new CommandCatalog({ includeActions: ['only_this'] });

			filtered.register({
				name: 'only_this',
				description: 'Should register',
				schema: testSchema,
				handler: makeHandler(),
			});

			filtered.register({
				name: 'other',
				description: 'Should not register',
				schema: testSchema,
				handler: makeHandler(),
			});

			expect(filtered.has('only_this')).toBe(true);
			expect(filtered.has('other')).toBe(false);
		});
	});

	describe('getAll and getNames', () => {
		test('returns all registered actions', () => {
			registry.register({
				name: 'alpha',
				description: 'Alpha',
				schema: testSchema,
				handler: makeHandler(),
			});
			registry.register({
				name: 'beta',
				description: 'Beta',
				schema: testSchema,
				handler: makeHandler(),
			});

			const all = registry.getAll();
			expect(all).toHaveLength(2);

			const names = registry.getNames();
			expect(names).toContain('alpha');
			expect(names).toContain('beta');
		});
	});

	describe('execute', () => {
		test('executes registered action with valid params', async () => {
			const handler = makeHandler({ success: true, extractedContent: 'result' });
			registry.register({
				name: 'exec_test',
				description: 'Test execute',
				schema: testSchema,
				handler,
			});

			const ctx = makeContext();
			const result = await registry.execute('exec_test', { value: 'hello' }, ctx);

			expect(result.success).toBe(true);
			expect(result.extractedContent).toBe('result');
			expect(handler).toHaveBeenCalledTimes(1);
		});

		test('throws CommandFailedError for unregistered action', async () => {
			const ctx = makeContext();

			await expect(
				registry.execute('nonexistent', {}, ctx),
			).rejects.toThrow(CommandFailedError);
		});

		test('throws CommandFailedError when schema validation fails', async () => {
			registry.register({
				name: 'strict',
				description: 'Strict schema',
				schema: z.object({ required: z.string() }),
				handler: makeHandler(),
			});

			const ctx = makeContext();

			await expect(
				registry.execute('strict', { wrong: 'param' }, ctx),
			).rejects.toThrow(CommandFailedError);
		});

		test('wraps handler errors in CommandFailedError', async () => {
			registry.register({
				name: 'failing',
				description: 'Fails',
				schema: testSchema,
				handler: async () => {
					throw new Error('Internal failure');
				},
			});

			const ctx = makeContext();

			await expect(
				registry.execute('failing', { value: 'x' }, ctx),
			).rejects.toThrow(CommandFailedError);
		});

		test('re-throws CommandFailedError without wrapping', async () => {
			const original = new CommandFailedError('tool', 'original error');
			registry.register({
				name: 'rethrow',
				description: 'Rethrow',
				schema: testSchema,
				handler: async () => {
					throw original;
				},
			});

			const ctx = makeContext();

			try {
				await registry.execute('rethrow', { value: 'x' }, ctx);
				expect.unreachable('Should have thrown');
			} catch (error) {
				expect(error).toBe(original);
			}
		});
	});

	describe('domain-based filtering', () => {
		test('returns universal actions for any domain', () => {
			registry.register({
				name: 'universal',
				description: 'No filter',
				schema: testSchema,
				handler: makeHandler(),
			});

			const actions = registry.getActionsForDomain('example.com');
			expect(actions.map((a) => a.name)).toContain('universal');
		});

		test('returns domain-specific actions matching the domain', () => {
			registry.register({
				name: 'github_only',
				description: 'GitHub',
				schema: testSchema,
				handler: makeHandler(),
				domainFilter: ['github.com'],
			});

			const githubActions = registry.getActionsForDomain('github.com');
			expect(githubActions.map((a) => a.name)).toContain('github_only');

			const otherActions = registry.getActionsForDomain('example.com');
			expect(otherActions.map((a) => a.name)).not.toContain('github_only');
		});

		test('matches subdomains', () => {
			registry.register({
				name: 'google_all',
				description: 'Google subdomains',
				schema: testSchema,
				handler: makeHandler(),
				domainFilter: ['google.com'],
			});

			const actions = registry.getActionsForDomain('mail.google.com');
			expect(actions.map((a) => a.name)).toContain('google_all');
		});

		test('strips www prefix from domain', () => {
			registry.register({
				name: 'example',
				description: 'Example',
				schema: testSchema,
				handler: makeHandler(),
				domainFilter: ['example.com'],
			});

			const actions = registry.getActionsForDomain('www.example.com');
			expect(actions.map((a) => a.name)).toContain('example');
		});
	});

	describe('terminatesSequence flag', () => {
		test('isTerminating returns true for terminating actions', () => {
			registry.register({
				name: 'finish',
				description: 'Finish',
				schema: testSchema,
				handler: makeHandler(),
				terminatesSequence: true,
			});

			expect(registry.isTerminating('finish')).toBe(true);
		});

		test('isTerminating returns false for non-terminating actions', () => {
			registry.register({
				name: 'continue',
				description: 'Continue',
				schema: testSchema,
				handler: makeHandler(),
			});

			expect(registry.isTerminating('continue')).toBe(false);
		});

		test('getTerminatingActions returns all terminating action names', () => {
			registry.register({
				name: 'finish',
				description: 'Done',
				schema: testSchema,
				handler: makeHandler(),
				terminatesSequence: true,
			});
			registry.register({
				name: 'abort',
				description: 'Abort',
				schema: testSchema,
				handler: makeHandler(),
				terminatesSequence: true,
			});
			registry.register({
				name: 'tap',
				description: 'Click',
				schema: testSchema,
				handler: makeHandler(),
			});

			const terminating = registry.getTerminatingActions();
			expect(terminating).toContain('finish');
			expect(terminating).toContain('abort');
			expect(terminating).not.toContain('tap');
		});
	});

	describe('getPromptDescription', () => {
		test('returns formatted description of all actions', () => {
			registry.register({
				name: 'tap',
				description: 'Click on an element',
				schema: z.object({
					index: z.number().describe('Element index'),
				}),
				handler: makeHandler(),
			});
			registry.register({
				name: 'finish',
				description: 'Mark task as done',
				schema: z.object({
					text: z.string().describe('Result text'),
				}),
				handler: makeHandler(),
				terminatesSequence: true,
			});

			const desc = registry.getPromptDescription();
			expect(desc).toContain('- tap: Click on an element');
			expect(desc).toContain('index');
			expect(desc).toContain('Element index');
			expect(desc).toContain('- finish: Mark task as done [terminates]');
		});

		test('filters by page URL domain', () => {
			registry.register({
				name: 'universal',
				description: 'Universal action',
				schema: testSchema,
				handler: makeHandler(),
			});
			registry.register({
				name: 'github_only',
				description: 'GitHub action',
				schema: testSchema,
				handler: makeHandler(),
				domainFilter: ['github.com'],
			});

			const githubDesc = registry.getPromptDescription('https://github.com/repo');
			expect(githubDesc).toContain('universal');
			expect(githubDesc).toContain('github_only');

			const otherDesc = registry.getPromptDescription('https://example.com');
			expect(otherDesc).toContain('universal');
			expect(otherDesc).not.toContain('github_only');
		});
	});

	describe('sensitive data replacement', () => {
		test('replaces sensitive values with placeholders', () => {
			const result = registry.replaceSensitiveData(
				'The password is hunter2 and the key is abc123',
				{ PASSWORD: 'hunter2', API_KEY: 'abc123' },
			);

			expect(result).toBe('The password is <PASSWORD> and the key is <API_KEY>');
		});

		test('replaces longer values first to avoid partial replacements', () => {
			const result = registry.replaceSensitiveData(
				'Token: my-long-secret-token and key: secret',
				{ TOKEN: 'my-long-secret-token', KEY: 'secret' },
			);

			// "my-long-secret-token" should be replaced first, not the inner "secret"
			expect(result).toBe('Token: <TOKEN> and key: <KEY>');
		});

		test('handles empty text', () => {
			const result = registry.replaceSensitiveData('', { KEY: 'value' });
			expect(result).toBe('');
		});

		test('handles empty sensitive data', () => {
			const result = registry.replaceSensitiveData('some text', {});
			expect(result).toBe('some text');
		});

		test('handles special regex characters in values', () => {
			const result = registry.replaceSensitiveData(
				'Found: $100.00 (USD)',
				{ PRICE: '$100.00' },
			);

			expect(result).toBe('Found: <PRICE> (USD)');
		});
	});

	describe('parameter inspection and injection', () => {
		test('detects special parameters from handler function', () => {
			registry.register({
				name: 'with_page',
				description: 'Uses page',
				schema: z.object({}),
				handler: async (params, ctx) => {
					return { success: true };
				},
			});

			// The handler doesn't use named special params, so set should be empty
			const special = registry.getSpecialParams('with_page');
			expect(special.size).toBe(0);
		});

		test('returns empty set for unregistered action', () => {
			const special = registry.getSpecialParams('nonexistent');
			expect(special.size).toBe(0);
		});
	});

	describe('buildDynamicSchema', () => {
		test('builds a union schema from registered actions', () => {
			registry.register({
				name: 'tap',
				description: 'Click',
				schema: z.object({ index: z.number() }),
				handler: makeHandler(),
			});
			registry.register({
				name: 'finish',
				description: 'Done',
				schema: z.object({ text: z.string() }),
				handler: makeHandler(),
			});

			const schema = registry.buildDynamicSchema();
			expect(schema).toBeDefined();

			// Should parse a click action
			const clickResult = schema.safeParse({ action: 'tap', index: 5 });
			expect(clickResult.success).toBe(true);

			// Should parse a done action
			const doneResult = schema.safeParse({ action: 'finish', text: 'finished' });
			expect(doneResult.success).toBe(true);
		});

		test('returns simple object schema when no actions registered', () => {
			const schema = registry.buildDynamicSchema();
			const result = schema.safeParse({ action: 'anything' });
			expect(result.success).toBe(true);
		});

		test('returns single schema when only one action registered', () => {
			registry.register({
				name: 'only',
				description: 'Only action',
				schema: z.object({ x: z.number() }),
				handler: makeHandler(),
			});

			const schema = registry.buildDynamicSchema();
			const result = schema.safeParse({ action: 'only', x: 42 });
			expect(result.success).toBe(true);
		});
	});

	describe('registerCustom', () => {
		test('registers a custom action definition', () => {
			registry.registerCustom({
				name: 'custom_action',
				description: 'A custom action',
				schema: z.object({ query: z.string() }),
				handler: async () => ({ success: true }),
			});

			expect(registry.has('custom_action')).toBe(true);
		});

		test('registers with terminatesSequence flag', () => {
			registry.registerCustom({
				name: 'custom_done',
				description: 'Custom done',
				schema: z.object({}),
				handler: async () => ({ success: true, isDone: true }),
				terminatesSequence: true,
			});

			expect(registry.isTerminating('custom_done')).toBe(true);
		});
	});
});


================================================
FILE: packages/core/src/commands/executor.test.ts
================================================
import { test, expect, describe, beforeEach, mock } from 'bun:test';
import { CommandExecutor } from './executor.js';
import type { Command, ExecutionContext, CommandResult } from './types.js';
import { UrlBlockedError, CommandFailedError } from '../errors.js';

// ── Mock factories ──

function makeMockPageAnalyzer() {
	return {
		clickElementByIndex: mock(() => Promise.resolve()),
		inputTextByIndex: mock(() => Promise.resolve()),
		getElementSelector: mock(() => Promise.resolve('#selector')),
		extractState: mock(() =>
			Promise.resolve({
				tree: '<html></html>',
				selectorMap: {},
				elementCount: 0,
				interactiveElementCount: 0,
				scrollPosition: { x: 0, y: 0 },
				viewportSize: { width: 1280, height: 800 },
				documentSize: { width: 1280, height: 2000 },
				pixelsAbove: 0,
				pixelsBelow: 0,
			}),
		),
	} as any;
}

function makeMockViewport() {
	return {
		navigate: mock(() => Promise.resolve()),
		waitForPageReady: mock(() => Promise.resolve()),
		switchTab: mock(() => Promise.resolve()),
		newTab: mock(() => Promise.resolve()),
		closeTab: mock(() => Promise.resolve()),
		screenshot: mock(() =>
			Promise.resolve({ base64: 'abc', width: 1280, height: 800 }),
		),
		currentPage: makeMockPage(),
		cdp: makeMockCdpSession(),
		isConnected: true,
	} as any;
}

function makeMockPage() {
	return {
		goBack: mock(() => Promise.resolve()),
		evaluate: mock(() => Promise.resolve([])),
		mouse: {
			click: mock(() => Promise.resolve()),
		},
		keyboard: {
			press: mock(() => Promise.resolve()),
		},
		fill: mock(() => Promise.resolve()),
		click: mock(() => Promise.resolve()),
		selectOption: mock(() => Promise.resolve()),
		$: mock(() => Promise.resolve({ setInputFiles: mock(() => Promise.resolve()) })),
	} as any;
}

function makeMockCdpSession() {
	return {
		send: mock(() => Promise.resolve({})),
	} as any;
}

function makeContext(overrides: Partial<ExecutionContext> = {}): ExecutionContext {
	const browser = makeMockViewport();
	return {
		page: browser.currentPage,
		cdpSession: browser.cdp,
		domService: makeMockPageAnalyzer(),
		browserSession: browser,
		...overrides,
	};
}

/**
 * Helper to create action objects. Zod schemas with .default() produce
 * required fields in the inferred output type, but at runtime the defaults
 * are applied during validation. We cast through `any` to allow omitting
 * fields that have Zod defaults.
 */
function action(a: Record<string, unknown>): Command {
	return a as Command;
}

// ── Tests ──

describe('CommandExecutor', () => {
	let tools: CommandExecutor;

	beforeEach(() => {
		tools = new CommandExecutor();
	});

	describe('constructor and registration', () => {
		test('registers all built-in actions', () => {
			const names = tools.registry.getNames();
			expect(names).toContain('tap');
			expect(names).toContain('type_text');
			expect(names).toContain('navigate');
			expect(names).toContain('back');
			expect(names).toContain('scroll');
			expect(names).toContain('press_keys');
			expect(names).toContain('extract');
			expect(names).toContain('finish');
			expect(names).toContain('focus_tab');
			expect(names).toContain('new_tab');
			expect(names).toContain('close_tab');
			expect(names).toContain('web_search');
			expect(names).toContain('capture');
			expect(names).toContain('read_page');
			expect(names).toContain('wait');
			expect(names).toContain('scroll_to');
			expect(names).toContain('find');
			expect(names).toContain('search');
			expect(names).toContain('extract_structured');
		});

		test('has default commandsPerStep of 10', () => {
			expect(tools.commandsPerStep).toBe(10);
		});

		test('respects custom commandsPerStep', () => {
			const custom = new CommandExecutor({ commandsPerStep: 5 });
			expect(custom.commandsPerStep).toBe(5);
		});
	});

	describe('click action', () => {
		test('delegates to domService.clickElementByIndex', async () => {
			const ctx = makeContext();
			const result = await tools.executeAction(
				action({ action: 'tap', index: 0 }),
				ctx,
			);

			expect(result.success).toBe(true);
			expect(ctx.domService.clickElementByIndex).toHaveBeenCalledWith(
				ctx.page,
				ctx.cdpSession,
				0,
			);
		});

		test('supports multiple clicks via clickCount', async () => {
			const ctx = makeContext();
			await tools.executeAction(
				action({ action: 'tap', index: 0, clickCount: 3 }),
				ctx,
			);

			// First call + 2 additional
			expect(ctx.domService.clickElementByIndex).toHaveBeenCalledTimes(3);
		});

		test('uses coordinate-based clicking when enabled', async () => {
			tools.setCoordinateClicking(true);
			const ctx = makeContext();

			const result = await tools.executeAction(
				action({ action: 'tap', index: 0, coordinateX: 100, coordinateY: 200 }),
				ctx,
			);

			expect(result.success).toBe(true);
			expect(ctx.page.mouse.click).toHaveBeenCalledWith(100, 200);
			// domService should NOT have been called
			expect(ctx.domService.clickElementByIndex).not.toHaveBeenCalled();
		});

		test('coordinate click supports clickCount', async () => {
			tools.setCoordinateClicking(true);
			const ctx = makeContext();

			await tools.executeAction(
				action({ action: 'tap', index: 0, coordinateX: 50, coordinateY: 50, clickCount: 2 }),
				ctx,
			);

			expect(ctx.page.mouse.click).toHaveBeenCalledTimes(2);
		});

		test('falls back to index-based click when coordinate clicking disabled', async () => {
			// Default: coordinate clicking is disabled
			const ctx = makeContext();

			await tools.executeAction(
				action({ action: 'tap', index: 0, coordinateX: 100, coordinateY: 200 }),
				ctx,
			);

			// Should use domService, not coordinates
			expect(ctx.domService.clickElementByIndex).toHaveBeenCalled();
		});
	});

	describe('navigate action', () => {
		test('navigates to valid URL', async () => {
			const ctx = makeContext();
			const result = await tools.executeAction(
				action({ action: 'navigate', url: 'https://example.com' }),
				ctx,
			);

			expect(result.success).toBe(true);
			expect(ctx.browserSession.navigate).toHaveBeenCalledWith('https://example.com');
		});

		test('throws CommandFailedError wrapping UrlBlockedError for blocked URL', async () => {
			const restricted = new CommandExecutor({ blockedUrls: ['evil.com'] });
			const ctx = makeContext();

			await expect(
				restricted.executeAction(
					action({ action: 'navigate', url: 'https://evil.com/page' }),
					ctx,
				),
			).rejects.toThrow(CommandFailedError);
		});

		test('throws when URL not in allowlist', async () => {
			const restricted = new CommandExecutor({ allowedUrls: ['safe.com'] });
			const ctx = makeContext();

			await expect(
				restricted.executeAction(
					action({ action: 'navigate', url: 'https://other.com' }),
					ctx,
				),
			).rejects.toThrow(CommandFailedError);
		});
	});

	describe('input_text action', () => {
		test('inputs text into element', async () => {
			const ctx = makeContext();
			const result = await tools.executeAction(
				action({ action: 'type_text', index: 3, text: 'hello' }),
				ctx,
			);

			expect(result.success).toBe(true);
			expect(ctx.domService.inputTextByIndex).toHaveBeenCalledWith(
				ctx.page,
				ctx.cdpSession,
				3,
				'hello',
				true, // clearFirst defaults to true
			);
		});

		test('passes clearFirst=false when specified', async () => {
			const ctx = makeContext();
			await tools.executeAction(
				action({ action: 'type_text', index: 0, text: 'append', clearFirst: false }),
				ctx,
			);

			expect(ctx.domService.inputTextByIndex).toHaveBeenCalledWith(
				ctx.page,
				ctx.cdpSession,
				0,
				'append',
				false,
			);
		});
	});

	describe('scroll action', () => {
		test('scrolls the page when no index provided', async () => {
			const ctx = makeContext();
			const result = await tools.executeAction(
				action({ action: 'scroll', direction: 'down' }),
				ctx,
			);

			expect(result.success).toBe(true);
		});

		test('scrolls an element when index is provided', async () => {
			const ctx = makeContext();
			const result = await tools.executeAction(
				action({ action: 'scroll', direction: 'up', index: 5 }),
				ctx,
			);

			expect(result.success).toBe(true);
			expect(ctx.domService.getElementSelector).toHaveBeenCalledWith(5);
		});
	});

	describe('search_google action', () => {
		test('navigates to Google search URL', async () => {
			const ctx = makeContext();
			const result = await tools.executeAction(
				action({ action: 'web_search', query: 'bun test runner' }),
				ctx,
			);

			expect(result.success).toBe(true);
			expect(ctx.browserSession.navigate).toHaveBeenCalled();
			const navigateArg = (ctx.browserSession.navigate as any).mock.calls[0][0] as string;
			expect(navigateArg).toContain('google.com/search');
			expect(navigateArg).toContain('bun%20test%20runner');
		});
	});

	describe('done action', () => {
		test('returns isDone=true with text', async () => {
			const ctx = makeContext();
			const result = await tools.executeAction(
				action({ action: 'finish', text: 'Task completed successfully' }),
				ctx,
			);

			expect(result.success).toBe(true);
			expect(result.isDone).toBe(true);
			expect(result.extractedContent).toBe('Task completed successfully');
			expect(result.includeInMemory).toBe(true);
		});

		test('respects explicit success=false', async () => {
			const ctx = makeContext();
			const result = await tools.executeAction(
				action({ action: 'finish', text: 'Could not complete', success: false }),
				ctx,
			);

			expect(result.success).toBe(false);
			expect(result.isDone).toBe(true);
		});
	});

	describe('go_back action', () => {
		test('calls page.goBack and waits for ready', async () => {
			const ctx = makeContext();
			const result = await tools.executeAction(
				action({ action: 'back' }),
				ctx,
			);

			expect(result.success).toBe(true);
			expect(ctx.page.goBack).toHaveBeenCalled();
			expect(ctx.browserSession.waitForPageReady).toHaveBeenCalled();
		});
	});

	describe('send_keys action', () => {
		test('presses keyboard keys', async () => {
			const ctx = makeContext();
			const result = await tools.executeAction(
				action({ action: 'press_keys', keys: 'Enter' }),
				ctx,
			);

			expect(result.success).toBe(true);
			expect(ctx.page.keyboard.press).toHaveBeenCalledWith('Enter');
		});
	});

	describe('find_elements action', () => {
		test('returns found elements description', async () => {
			const page = makeMockPage();
			page.evaluate = mock(() =>
				Promise.resolve([
					{ tag: 'button', text: 'Submit', attributes: { id: 'btn-submit' } },
					{ tag: 'a', text: 'Home', attributes: {} },
				]),
			);
			const ctx = makeContext({ page });

			const result = await tools.executeAction(
				action({ action: 'find', query: 'submit' }),
				ctx,
			);

			expect(result.success).toBe(true);
			expect(result.extractedContent).toContain('Found 2 element(s)');
			expect(result.extractedContent).toContain('button');
			expect(result.extractedContent).toContain('Submit');
		});

		test('returns message when no elements found', async () => {
			const page = makeMockPage();
			page.evaluate = mock(() => Promise.resolve([]));
			const ctx = makeContext({ page });

			const result = await tools.executeAction(
				action({ action: 'find', query: 'nonexistent' }),
				ctx,
			);

			expect(result.success).toBe(true);
			expect(result.extractedContent).toContain('No elements found');
		});
	});

	describe('extract_content action (fallback, no LLM)', () => {
		test('returns error/fallback when no extraction service', async () => {
			// Tools without model won't have an extraction service
			// The handler falls back to extractMarkdown which we mock via page.evaluate
			const ctx = makeContext();
			// extractMarkdown eventually calls page.evaluate
			// For this test, just verify no crash. The actual extractMarkdown module
			// import might require more setup, so we test the branch
			try {
				await tools.executeAction(
					action({ action: 'extract', goal: 'get all links' }),
					ctx,
				);
			} catch {
				// Expected - extractMarkdown import/evaluation may fail in test env
			}
		});
	});

	describe('search_page action (multi-engine)', () => {
		test('navigates to DuckDuckGo when specified', async () => {
			const ctx = makeContext();
			const result = await tools.executeAction(
				action({ action: 'search', query: 'hello', engine: 'duckduckgo' }),
				ctx,
			);

			expect(result.success).toBe(true);
			const url = (ctx.browserSession.navigate as any).mock.calls[0][0] as string;
			expect(url).toContain('duckduckgo.com');
		});

		test('navigates to Bing when specified', async () => {
			const ctx = makeContext();
			const result = await tools.executeAction(
				action({ action: 'search', query: 'hello', engine: 'bing' }),
				ctx,
			);

			expect(result.success).toBe(true);
			const url = (ctx.browserSession.navigate as any).mock.calls[0][0] as string;
			expect(url).toContain('bing.com/search');
		});

		test('defaults to Google', async () => {
			const ctx = makeContext();
			await tools.executeAction(
				action({ action: 'search', query: 'hello' }),
				ctx,
			);

			const url = (ctx.browserSession.navigate as any).mock.calls[0][0] as string;
			expect(url).toContain('google.com/search');
		});
	});

	describe('sensitive data masking', () => {
		test('masks sensitive data in action results', async () => {
			const ctx = makeContext({
				maskedValues: {
					PASSWORD: 'secret123',
					API_KEY: 'sk-abc',
				},
			});

			// Execute done action with text containing sensitive data
			const result = await tools.executeActions(
				[action({ action: 'finish', text: 'Found password: secret123 and key: sk-abc' })],
				ctx,
			);

			expect(result[0].success).toBe(true);
			expect(result[0].extractedContent).toContain('<PASSWORD>');
			expect(result[0].extractedContent).toContain('<API_KEY>');
			expect(result[0].extractedContent).not.toContain('secret123');
			expect(result[0].extractedContent).not.toContain('sk-abc');
		});

		test('does not mask when no sensitive data configured', async () => {
			const ctx = makeContext(); // no maskedValues

			const result = await tools.executeActions(
				[action({ action: 'finish', text: 'Plain text with no secrets' })],
				ctx,
			);

			expect(result[0].extractedContent).toBe('Plain text with no secrets');
		});
	});

	describe('action sequence execution', () => {
		test('executes multiple actions in sequence', async () => {
			const ctx = makeContext();

			const results = await tools.executeActions(
				[
					action({ action: 'tap', index: 0 }),
					action({ action: 'tap', index: 1 }),
				],
				ctx,
			);

			expect(results).toHaveLength(2);
			expect(results[0].success).toBe(true);
			expect(results[1].success).toBe(true);
		});

		test('stops at done action', async () => {
			const ctx = makeContext();

			const results = await tools.executeActions(
				[
					action({ action: 'tap', index: 0 }),
					action({ action: 'finish', text: 'Finished' }),
					action({ action: 'tap', index: 1 }), // should not execute
				],
				ctx,
			);

			expect(results).toHaveLength(2);
			expect(results[1].isDone).toBe(true);
		});

		test('respects commandsPerStep limit', async () => {
			const limited = new CommandExecutor({ commandsPerStep: 2 });
			const ctx = makeContext();

			const results = await limited.executeActions(
				[
					action({ action: 'tap', index: 0 }),
					action({ action: 'tap', index: 1 }),
					action({ action: 'tap', index: 2 }), // should not execute (limit=2)
				],
				ctx,
			);

			expect(results).toHaveLength(2);
		});

		test('handles errors gracefully in sequence', async () => {
			const ctx = makeContext();
			ctx.domService.clickElementByIndex = mock(() =>
				Promise.reject(new Error('Element is not visible')),
			);

			const results = await tools.executeActions(
				[action({ action: 'tap', index: 0 })],
				ctx,
			);

			expect(results).toHaveLength(1);
			expect(results[0].success).toBe(false);
			expect(results[0].error).toBeDefined();
			expect(results[0].error).toContain('not visible');
		});

		test('stops sequence on non-retryable error', async () => {
			const ctx = makeContext();
			ctx.domService.clickElementByIndex = mock(() =>
				Promise.reject(new Error('browser has been closed')),
			);

			const results = await tools.executeActions(
				[
					action({ action: 'tap', index: 0 }),
					action({ action: 'tap', index: 1 }), // should not run
				],
				ctx,
			);

			expect(results).toHaveLength(1);
			expect(results[0].success).toBe(false);
		});

		test('continues after retryable error', async () => {
			const ctx = makeContext();
			let callCount = 0;
			ctx.domService.clickElementByIndex = mock(() => {
				callCount++;
				if (callCount === 1) {
					return Promise.reject(new Error('Element is not visible'));
				}
				return Promise.resolve();
			});

			const results = await tools.executeActions(
				[
					action({ action: 'tap', index: 0 }),
					action({ action: 'tap', index: 1 }),
				],
				ctx,
			);

			expect(results).toHaveLength(2);
			expect(results[0].success).toBe(false);
			expect(results[1].success).toBe(true);
		});

		test('masks sensitive data in error messages', async () => {
			const ctx = makeContext({
				maskedValues: { TOKEN: 'my-secret-token' },
			});
			ctx.domService.clickElementByIndex = mock(() =>
				Promise.reject(new Error('Failed with my-secret-token')),
			);

			const results = await tools.executeActions(
				[action({ action: 'tap', index: 0 })],
				ctx,
			);

			expect(results[0].error).not.toContain('my-secret-token');
			expect(results[0].error).toContain('<TOKEN>');
		});
	});

	describe('switch_tab action', () => {
		test('switches to specified tab', async () => {
			const ctx = makeContext();
			const result = await tools.executeAction(
				action({ action: 'focus_tab', tabIndex: 1 }),
				ctx,
			);

			expect(result.success).toBe(true);
			expect(ctx.browserSession.switchTab).toHaveBeenCalledWith(1);
		});
	});

	describe('open_tab action', () => {
		test('opens new tab with URL', async () => {
			const ctx = makeContext();
			const result = await tools.executeAction(
				action({ action: 'new_tab', url: 'https://example.com' }),
				ctx,
			);

			expect(result.success).toBe(true);
			expect(ctx.browserSession.newTab).toHaveBeenCalledWith('https://example.com');
		});

		test('throws for blocked URL', async () => {
			const restricted = new CommandExecutor({ blockedUrls: ['banned.com'] });
			const ctx = makeContext();

			await expect(
				restricted.executeAction(
					action({ action: 'new_tab', url: 'https://banned.com' }),
					ctx,
				),
			).rejects.toThrow(CommandFailedError);
		});
	});

	describe('close_tab action', () => {
		test('closes specified tab', async () => {
			const ctx = makeContext();
			const result = await tools.executeAction(
				action({ action: 'close_tab', tabIndex: 2 }),
				ctx,
			);

			expect(result.success).toBe(true);
			expect(ctx.browserSession.closeTab).toHaveBeenCalledWith(2);
		});
	});

	describe('screenshot action', () => {
		test('takes a screenshot', async () => {
			const ctx = makeContext();
			const result = await tools.executeAction(
				action({ action: 'capture' }),
				ctx,
			);

			expect(result.success).toBe(true);
			expect(result.extractedContent).toContain('Screenshot taken');
			expect(ctx.browserSession.screenshot).toHaveBeenCalled();
		});
	});

	describe('setCoordinateClicking', () => {
		test('enables coordinate-based clicking', () => {
			tools.setCoordinateClicking(true);
			// Verified through click behavior in click action tests above
			expect(tools).toBeDefined();
		});

		test('disables coordinate-based clicking', () => {
			tools.setCoordinateClicking(true);
			tools.setCoordinateClicking(false);
			expect(tools).toBeDefined();
		});
	});
});


================================================
FILE: packages/core/src/commands/executor.ts
================================================
import type { Page, CDPSession } from 'playwright';
import { z } from 'zod';
import { CommandCatalog } from './catalog/catalog.js';
import type {
	Command,
	CommandResult,
	ExecutionContext,
	InterpretedViewportError,
	ViewportErrorCategory,
} from './types.js';
import {
	TapCommandSchema,
	TypeTextCommandSchema,
	NavigateCommandSchema,
	BackCommandSchema,
	ScrollCommandSchema,
	PressKeysCommandSchema,
	ExtractCommandSchema,
	FinishCommandSchema,
	FocusTabCommandSchema,
	NewTabCommandSchema,
	CloseTabCommandSchema,
	WebSearchCommandSchema,
	UploadCommandSchema,
	SelectCommandSchema,
	CaptureCommandSchema,
	ReadPageCommandSchema,
	WaitCommandSchema,
	ScrollToCommandSchema,
	FindCommandSchema,
	SearchCommandSchema,
	ListOptionsCommandSchema,
	PickOptionCommandSchema,
	ExtractStructuredCommandSchema,
} from './types.js';
import type { Viewport } from '../viewport/viewport.js';
import type { PageAnalyzer } from '../page/page-analyzer.js';
import type { LanguageModel } from '../model/interface.js';
import { ContentExtractor } from './extraction/extractor.js';
import { scrollPage, scrollElement, buildGoogleSearchUrl } from './utils.js';
import { extractMarkdown } from '../page/content-extractor.js';
import { isUrlPermitted } from '../utils.js';
import {
	UrlBlockedError,
	NavigationFailedError,
	ViewportCrashedError,
} from '../errors.js';
import { sleep } from '../utils.js';

export interface CommandExecutorOptions {
	model?: LanguageModel;
	allowedUrls?: string[];
	blockedUrls?: string[];
	commandsPerStep?: number;
}

export class CommandExecutor {
	readonly registry: CommandCatalog
	private extractionService?: ContentExtractor;
	private allowedUrls?: string[];
	private blockedUrls?: string[];
	readonly commandsPerStep: number;
	private coordinateClickingEnabled = false;

	constructor(options?: CommandExecutorOptions) {
		this.registry = new CommandCatalog();
		this.allowedUrls = options?.allowedUrls;
		this.blockedUrls = options?.blockedUrls;
		this.commandsPerStep = options?.commandsPerStep ?? 10;

		if (options?.model) {
			this.extractionService = new ContentExtractor(options.model);
		}

		this.registerBuiltinActions();
	}

	/**
	 * Enable or disable coordinate-based clicking.
	 * When enabled, click actions with coordinateX/coordinateY will use
	 * page.mouse.click instead of element index lookup.
	 */
	setCoordinateClicking(enabled: boolean): void {
		this.coordinateClickingEnabled = enabled;
	}

	private registerBuiltinActions(): void {
		// Click
		this.registry.register({
			name: 'tap',
			description: 'Click on an element by its index',
			schema: TapCommandSchema.omit({ action: true }),
			handler: async (params, ctx) => {
				const { index, clickCount, coordinateX, coordinateY } = params as {
					index: number;
					clickCount?: number;
					coordinateX?: number;
					coordinateY?: number;
				};

				// Coordinate-based clicking
				if (
					this.coordinateClickingEnabled &&
					coordinateX !== undefined &&
					coordinateY !== undefined
				) {
					const clicks = clickCount ?? 1;
					for (let i = 0; i < clicks; i++) {
						await ctx.page.mouse.click(coordinateX, coordinateY);
					}
					return { success: true };
				}

				await ctx.domService.clickElementByIndex(ctx.page, ctx.cdpSession, index);
				if (clickCount && clickCount > 1) {
					for (let i = 1; i < clickCount; i++) {
						await ctx.domService.clickElementByIndex(ctx.page, ctx.cdpSession, index);
					}
				}
				return { success: true };
			},
		});

		// Input text
		this.registry.register({
			name: 'type_text',
			description: 'Type text into an input element',
			schema: TypeTextCommandSchema.omit({ action: true }),
			handler: async (params, ctx) => {
				const { index, text, clearFirst } = params as {
					index: number;
					text: string;
					clearFirst?: boolean;
				};
				await ctx.domService.inputTextByIndex(
					ctx.page,
					ctx.cdpSession,
					index,
					text,
					clearFirst ?? true,
				);
				return { success: true };
			},
		});

		// Navigate
		this.registry.register({
			name: 'navigate',
			description: 'Navigate to a URL',
			schema: NavigateCommandSchema.omit({ action: true }),
			handler: async (params, ctx) => {
				const { url } = params as { url: string };
				if (!isUrlPermitted(url, this.allowedUrls, this.blockedUrls)) {
					throw new UrlBlockedError(url);
				}
				await ctx.browserSession.navigate(url);
				return { success: true };
			},
		});

		// Go back
		this.registry.register({
			name: 'back',
			description: 'Go back to previous page',
			schema: BackCommandSchema.omit({ action: true }),
			handler: async (_params, ctx) => {
				await ctx.page.goBack({ timeout: 5000 }).catch(() => {});
				await ctx.browserSession.waitForPageReady();
				return { success: true };
			},
		});

		// Scroll
		this.registry.register({
			name: 'scroll',
			description: 'Scroll the page or an element',
			schema: ScrollCommandSchema.omit({ action: true }),
			handler: async (params, ctx) => {
				const { direction, amount, index } = params as {
					direction: 'up' | 'down';
					amount?: number;
					index?: number;
				};

				if (index !== undefined) {
					const selector = await ctx.domService.getElementSelector(index);
					if (selector) {
						await scrollElement(ctx.page, selector, direction, amount);
					}
				} else {
					await scrollPage(ctx.page, direction, amount);
				}

				return { success: true };
			},
		});

		// Send keys
		this.registry.register({
			name: 'press_keys',
			description: 'Send keyboard keys (e.g., Enter, Escape, Control+a)',
			schema: PressKeysCommandSchema.omit({ action: true }),
			handler: async (params, ctx) => {
				const { keys } = params as { keys: string };
				await ctx.page.keyboard.press(keys);
				return { success: true };
			},
		});

		// Extract content
		this.registry.register({
			name: 'extract',
			description: 'Extract specific information from the current page',
			schema: ExtractCommandSchema.omit({ action: true }),
			handler: async (params, ctx) => {
				const { goal, outputSchema } = params as {
					goal: string;
					outputSchema?: Record<string, unknown>;
				};

				// Use the extraction LLM from context if available, otherwise fall back
				const extractionModel = ctx.extractionLlm;
				const service =
					extractionModel
						? new ContentExtractor(extractionModel)
						: this.extractionService;

				if (!service) {
					// Fallback: just extract markdown
					const markdown = await extractMarkdown(ctx.page);
					return {
						success: true,
						extractedContent: markdown.slice(0, 5000),
						includeInMemory: true,
					};
				}

				// If an outputSchema is provided, use structured extraction from text
				if (outputSchema) {
					const markdown = await extractMarkdown(ctx.page);
					const content = await service.extractFromText(
						markdown.slice(0, 8000),
						goal,
						outputSchema,
					);
					return { success: true, extractedContent: content, includeInMemory: true };
				}

				const content = await service.extract(ctx.page, goal);
				return { success: true, extractedContent: content, includeInMemory: true };
			},
		});

		// Done
		this.registry.register({
			name: 'finish',
			description: 'Mark the task as completed with a result',
			schema: FinishCommandSchema.omit({ action: true }),
			terminatesSequence: true,
			handler: async (params) => {
				const { text, success } = params as { text: string; success?: boolean };
				return {
					success: success ?? true,
					isDone: true,
					extractedContent: text,
					includeInMemory: true,
				};
			},
		});

		// Switch tab
		this.registry.register({
			name: 'focus_tab',
			description: 'Switch to a different browser tab',
			schema: FocusTabCommandSchema.omit({ action: true }),
			handler: async (params, ctx) => {
				const { tabIndex } = params as { tabIndex: number };
				await ctx.browserSession.switchTab(tabIndex);
				return { success: true };
			},
		});

		// Open tab
		this.registry.register({
			name: 'new_tab',
			description: 'Open a new tab with a URL',
			schema: NewTabCommandSchema.omit({ action: true }),
			handler: async (params, ctx) => {
				const { url } = params as { url: string };
				if (!isUrlPermitted(url, this.allowedUrls, this.blockedUrls)) {
					throw new UrlBlockedError(url);
				}
				await ctx.browserSession.newTab(url);
				return { success: true };
			},
		});

		// Close tab
		this.registry.register({
			name: 'close_tab',
			description: 'Close a browser tab',
			schema: CloseTabCommandSchema.omit({ action: true }),
			handler: async (params, ctx) => {
				const { tabIndex } = params as { tabIndex?: number };
				await ctx.browserSession.closeTab(tabIndex);
				return { success: true };
			},
		});

		// Search Google
		this.registry.register({
			name: 'web_search',
			description: 'Search Google for a query',
			schema: WebSearchCommandSchema.omit({ action: true }),
			handler: async (params, ctx) => {
				const { query } = params as { query: string };
				const url = buildGoogleSearchUrl(query);
				await ctx.browserSession.navigate(url);
				return { success: true };
			},
		});

		// Upload file
		this.registry.register({
			name: 'upload',
			description: 'Upload files to a file input',
			schema: UploadCommandSchema.omit({ action: true }),
			handler: async (params, ctx) => {
				const { index, filePaths } = params as { index: number; filePaths: string[] };

				// If a fileSystem is available in context, resolve relative paths
				// against the sandbox directory
				let resolvedPaths = filePaths;
				if (ctx.fileSystem) {
					const sandboxDir = ctx.fileSystem.getSandboxDir();
					const { resolve: pathResolve } = await import('node:path');
					resolvedPaths = filePaths.map((fp) =>
						fp.startsWith('/') ? fp : pathResolve(sandboxDir, fp),
					);
				}

				const selector = await ctx.domService.getElementSelector(index);
				if (!selector) {
					return { success: false, error: `Element ${index} not found` };
				}
				const fileInput = await ctx.page.$(selector);
				if (!fileInput) {
					return { success: false, error: `File input element not found` };
				}
				await fileInput.setInputFiles(resolvedPaths);
				return { success: true };
			},
		});

		// Select option
		this.registry.register({
			name: 'select',
			description: 'Select an option in a dropdown',
			schema: SelectCommandSchema.omit({ action: true }),
			handler: async (params, ctx) => {
				const { index, value } = params as { index: number; value: string };
				const selector = await ctx.domService.getElementSelector(index);
				if (!selector) {
					return { success: false, error: `Element ${index} not found` };
				}
				await ctx.page.selectOption(selector, value);
				return { success: true };
			},
		});

		// Screenshot
		this.registry.register({
			name: 'capture',
			description: 'Take a screenshot of the current page',
			schema: CaptureCommandSchema.omit({ action: true }),
			handler: async (params, ctx) => {
				const { fullPage } = params as { fullPage?: boolean };
				const result = await ctx.browserSession.screenshot(fullPage);
				return {
					success: true,
					extractedContent: `Screenshot taken (${result.width}x${result.height})`,
				};
			},
		});

		// Read content
		this.registry.register({
			name: 'read_page',
			description: 'Read the text content of the current page',
			schema: ReadPageCommandSchema.omit({ action: true }),
			handler: async (_params, ctx) => {
				const markdown = await extractMarkdown(ctx.page);
				return {
					success: true,
					extractedContent: markdown.slice(0, 10000),
					includeInMemory: true,
				};
			},
		});

		// Wait
		this.registry.register({
			name: 'wait',
			description: 'Wait for a specified number of seconds',
			schema: WaitCommandSchema.omit({ action: true }),
			handler: async (params) => {
				const { seconds } = params as { seconds?: number };
				await sleep((seconds ?? 3) * 1000);
				return { success: true };
			},
		});

		// ── New actions ──

		// Scroll to text
		this.registry.register({
			name: 'scroll_to',
			description: 'Scroll to a specific text on the page',
			schema: ScrollToCommandSchema.omit({ action: true }),
			handler: async (params, ctx) => {
				const { text } = params as { text: string };

				const found = await ctx.page.evaluate((searchText: string) => {
					// Use TreeWalker to find text nodes containing the search text
					const walker = document.createTreeWalker(
						document.body,
						NodeFilter.SHOW_TEXT,
						{
							acceptNode(node) {
								if (
									node.textContent &&
									node.textContent.toLowerCase().includes(searchText.toLowerCase())
								) {
									return NodeFilter.FILTER_ACCEPT;
								}
								return NodeFilter.FILTER_REJECT;
							},
						},
					);

					const node = walker.nextNode();
					if (!node?.parentElement) return false;

					node.parentElement.scrollIntoView({
						behavior: 'smooth',
						block: 'center',
					});
					return true;
				}, text);

				if (!found) {
					return {
						success: false,
						error: `Text "${text}" not found on the page`,
					};
				}

				// Allow time for the smooth scroll to finish
				await sleep(500);
				return { success: true };
			},
		});

		// Find elements
		this.registry.register({
			name: 'find',
			description: 'Find elements on the page matching a description',
			schema: FindCommandSchema.omit({ action: true }),
			handler: async (params, ctx) => {
				const { query } = params as { query: string };

				const elements = await ctx.page.evaluate((searchQuery: string) => {
					const results: Array<{
						tag: string;
						text: string;
						attributes: Record<string, string>;
					}> = [];
					const queryLower = searchQuery.toLowerCase();

					// Search through interactive and content elements
					const selectors = [
						'a',
						'button',
						'input',
						'select',
						'textarea',
						'[role="button"]',
						'[role="link"]',
						'[role="tab"]',
						'[role="menuitem"]',
						'h1',
						'h2',
						'h3',
						'h4',
						'h5',
						'h6',
						'label',
						'[aria-label]',
					];

					for (const selector of selectors) {
						for (const el of document.querySelectorAll(selector)) {
							const htmlEl = el as HTMLElement;
							const text = (htmlEl.innerText || htmlEl.textContent || '').trim();
							const ariaLabel = el.getAttribute('aria-label') || '';
							const placeholder = el.getAttribute('placeholder') || '';
							const title = el.getAttribute('title') || '';

							const searchableText =
								`${text} ${ariaLabel} ${placeholder} ${title}`.toLowerCase();

							if (searchableText.includes(queryLower)) {
								const attrs: Record<string, string> = {};
								if (el.id) attrs.id = el.id;
								if (el.className && typeof el.className === 'string') {
									attrs.class = el.className;
								}
								if (ariaLabel) attrs['aria-label'] = ariaLabel;
								if (placeholder) attrs.placeholder = placeholder;

								results.push({
									tag: el.tagName.toLowerCase(),
									text: text.slice(0, 100),
									attributes: attrs,
								});
							}

							// Cap at 20 results
							if (results.length >= 20) break;
						}
						if (results.length >= 20) break;
					}

					return results;
				}, query);

				if (elements.length === 0) {
					return {
						success: true,
						extractedContent: `No elements found matching "${query}"`,
						includeInMemory: true,
					};
				}

				const descriptions = elements.map((el, i) => {
					const attrStr = Object.entries(el.attributes)
						.map(([k, v]) => `${k}="${v}"`)
						.join(' ');
					return `[${i}] <${el.tag}${attrStr ? ` ${attrStr}` : ''}> ${el.text}`;
				});

				return {
					success: true,
					extractedContent: `Found ${elements.length} element(s):\n${descriptions.join('\n')}`,
					includeInMemory: true,
				};
			},
		});

		// Search page (multi-engine)
		this.registry.register({
			name: 'search',
			description: 'Search the web using a specified search engine',
			schema: SearchCommandSchema.omit({ action: true }),
			handler: async (params, ctx) => {
				const { query, engine } = params as {
					query: string;
					engine?: 'google' | 'duckduckgo' | 'bing';
				};

				const searchEngine = engine ?? 'google';
				const url = buildSearchUrl(query, searchEngine);

				if (!isUrlPermitted(url, this.allowedUrls, this.blockedUrls)) {
					throw new UrlBlockedError(url);
				}

				await ctx.browserSession.navigate(url);
				return { success: true };
			},
		});

		// Get dropdown options
		this.registry.register({
			name: 'list_options',
			description: 'Get all options from a select/dropdown element',
			schema: ListOptionsCommandSchema.omit({ action: true }),
			handler: async (params, ctx) => {
				const { index } = params as { index: number };
				const selector = await ctx.domService.getElementSelector(index);
				if (!selector) {
					return { success: false, error: `Element ${index} not found` };
				}

				const options = await ctx.page.evaluate((sel: string) => {
					const selectEl = document.querySelector(sel) as HTMLSelectElement | null;
					if (!selectEl || selectEl.tagName !== 'SELECT') {
						return null;
					}

					return Array.from(selectEl.options).map((opt) => ({
						value: opt.value,
						text: opt.text.trim(),
						selected: opt.selected,
					}));
				}, selector);

				if (!options) {
					return {
						success: false,
						error: `Element ${index} is not a select element`,
					};
				}

				const formatted = options
					.map(
						(opt, i) =>
							`[${i}] "${opt.text}" (value="${opt.value}")${opt.selected ? ' [selected]' : ''}`,
					)
					.join('\n');

				return {
					success: true,
					extractedContent: `Dropdown options:\n${formatted}`,
					includeInMemory: true,
				};
			},
		});

		// Select dropdown option (by text match)
		this.registry.register({
			name: 'pick_option',
			description: 'Select a dropdown option by its visible text',
			schema: PickOptionCommandSchema.omit({ action: true }),
			handler: async (params, ctx) => {
				const { index, optionText } = params as {
					index: number;
					optionText: string;
				};
				const selector = await ctx.domService.getElementSelector(index);
				if (!selector) {
					return { success: false, error: `Element ${index} not found` };
				}

				// Find the option value by matching text content
				const matchedValue = await ctx.page.evaluate(
					({ sel, text }: { sel: string; text: string }) => {
						const selectEl = document.querySelector(sel) as HTMLSelectElement | null;
						if (!selectEl || selectEl.tagName !== 'SELECT') return null;

						const textLower = text.toLowerCase();

						// Try exact match first
						for (const opt of selectEl.options) {
							if (opt.text.trim().toLowerCase() === textLower) {
								return opt.value;
							}
						}

						// Try partial / includes match
						for (const opt of selectEl.options) {
							if (opt.text.trim().toLowerCase().includes(textLower)) {
								return opt.value;
							}
						}

						return null;
					},
					{ sel: selector, text: optionText },
				);

				if (matchedValue === null) {
					return {
						success: false,
						error: `No option matching "${optionText}" found in dropdown at element ${index}`,
					};
				}

				await ctx.page.selectOption(selector, matchedValue);
				return { success: true };
			},
		});

		// Structured output
		this.useStructuredOutputAction();
	}

	/**
	 * Register the structured_output action.
	 * Uses the extraction LLM to produce structured JSON output from
	 * the current page content according to a caller-provided JSON schema.
	 */
	private useStructuredOutputAction(): void {
		this.registry.register({
			name: 'extract_structured',
			description:
				'Extract structured data from the current page content. Returns JSON conforming to the provided schema.',
			schema: ExtractStructuredCommandSchema.omit({ action: true }),
			handler: async (params, ctx) => {
				const { goal, outputSchema, maxContentLength } = params as {
					goal: string;
					outputSchema: Record<string, unknown>;
					maxContentLength?: number;
				};

				const contentLimit = maxContentLength ?? 8000;

				// Resolve the extraction model: prefer context-provided, fall back to Tools-level
				const extractionModel = ctx.extractionLlm;
				const service = extractionModel
					? new ContentExtractor(extractionModel)
					: this.extractionService;

				if (!service) {
					return {
						success: false,
						error:
							'No extraction LLM configured. Provide a model via CommandExecutorOptions or ExecutionContext.extractionLlm.',
					};
				}

				// Extract page content as markdown
				const markdown = await extractMarkdown(ctx.page);
				if (!markdown.trim()) {
					return {
						success: false,
						error: 'No content found on the page for structured extraction.',
					};
				}

				const truncatedContent = markdown.slice(0, contentLimit);

				try {
					const result = await service.extractFromText(
						truncatedContent,
						goal,
						outputSchema,
					);

					return {
						success: true,
						extractedContent: result,
						includeInMemory: true,
					};
				} catch (error) {
					const message =
						error instanceof Error ? error.message : String(error);
					return {
						success: false,
						error: `Structured extraction failed: ${message}`,
					};
				}
			},
		});
	}

	async executeAction(
		action: Command,
		context: ExecutionContext,
	): Promise<CommandResult> {
		const { action: actionName, ...params } = action;
		return this.registry.execute(actionName, params, context);
	}

	async executeActions(
		actions: Command[],
		context: ExecutionContext,
	): Promise<CommandResult[]> {
		const results: CommandResult[] = [];
		const limit = Math.min(actions.length, this.commandsPerStep);

		for (let i = 0; i < limit; i++) {
			try {
				const result = await this.executeAction(actions[i], context);

				// Mask sensitive data in extracted content
				const maskedResult = this.maskSensitiveResult(result, context);
				results.push(maskedResult);

				// Stop if we hit a terminating action (done, or custom terminatesSequence)
				if (maskedResult.isDone) break;

				const actionName = actions[i].action;
				if (this.registry.isTerminating(actionName)) break;
			} catch (error) {
				// Interpret the browser error for a more meaningful result
				const interpreted = classifyViewportError(error);
				const errorMessage = `${interpreted.message} | Suggestion: ${interpreted.suggestion}`;

				// Mask sensitive data in error messages too
				const maskedMessage = this.maskSensitiveText(errorMessage, context);
				results.push({
					success: false,
					error: maskedMessage,
				});

				// If the error is not retryable (e.g., browser crash), stop the sequence
				if (!interpreted.isRetryable) break;
			}
		}

		return results;
	}

	// ── Sensitive data masking ──

	/**
	 * Mask sensitive data values in an CommandResult's extractedContent and error fields.
	 */
	private maskSensitiveResult(
		result: CommandResult,
		context: ExecutionContext,
	): CommandResult {
		if (!context.maskedValues) return result;

		const masked = { ...result };
		if (masked.extractedContent) {
			masked.extractedContent = this.registry.replaceSensitiveData(
				masked.extractedContent,
				context.maskedValues,
			);
		}
		if (masked.error) {
			masked.error = this.registry.replaceSensitiveData(
				masked.error,
				context.maskedValues,
			);
		}
		return masked;
	}

	/**
	 * Mask sensitive data in a plain text string.
	 */
	private maskSensitiveText(
		text: string,
		context: ExecutionContext,
	): string {
		if (!context.maskedValues) return text;
		return this.registry.replaceSensitiveData(text, context.maskedValues);
	}
}

// ── Helpers ──

function buildSearchUrl(
	query: string,
	engine: 'google' | 'duckduckgo' | 'bing',
): string {
	const encoded = encodeURIComponent(query);
	switch (engine) {
		case 'google':
			return `https://www.google.com/search?q=${encoded}&udm=14`;
		case 'duckduckgo':
			return `https://duckduckgo.com/?q=${encoded}`;
		case 'bing':
			return `https://www.bing.com/search?q=${encoded}`;
	}
}

// ── Browser error interpretation ──

/**
 * Error pattern matcher: maps regex patterns against error messages to
 * categories, human-readable messages, and actionable suggestions.
 */
const ERROR_PATTERNS: Array<{
	pattern: RegExp;
	category: ViewportErrorCategory;
	message: (match: RegExpMatchArray) => string;
	suggestion: string;
	isRetryable: boolean;
}> = [
	{
		pattern: /net::ERR_NAME_NOT_RESOLVED/i,
		category: 'network',
		message: () => 'DNS resolution failed - the domain could not be found.',
		suggestion: 'Check the URL for typos or try a different URL.',
		isRetryable: false,
	},
	{
		pattern: /net::ERR_CONNECTION_REFUSED/i,
		category: 'network',
		message: () => 'Connection refused by the server.',
		suggestion: 'The server may be down. Try again later or use a different URL.',
		isRetryable: true,
	},
	{
		pattern: /net::ERR_CONNECTION_TIMED_OUT/i,
		category: 'network',
		message: () => 'Connection timed out.',
		suggestion: 'The server is not responding. Try again or use a different URL.',
		isRetryable: true,
	},
	{
		pattern: /net::ERR_SSL/i,
		category: 'network',
		message: () => 'SSL/TLS connection error.',
		suggestion: 'The site has an invalid certificate. Try an alternative URL.',
		isRetryable: false,
	},
	{
		pattern: /net::ERR_CERT/i,
		category: 'network',
		message: () => 'Certificate verification failed.',
		suggestion: 'The site has a certificate issue. Try a different URL.',
		isRetryable: false,
	},
	{
		pattern: /net::ERR_ABORTED/i,
		category: 'navigation',
		message: () => 'Navigation was aborted.',
		suggestion: 'The page load was interrupted. Try navigating again.',
		isRetryable: true,
	},
	{
		pattern: /net::ERR_/i,
		category: 'network',
		message: (m) => `Network error: ${m[0]}`,
		suggestion: 'A network error occurred. Check the URL and try again.',
		isRetryable: true,
	},
	{
		pattern: /Navigation timeout of \d+ms exceeded/i,
		category: 'timeout',
		message: () => 'Page navigation timed out.',
		suggestion: 'The page took too long to load. Try again or navigate to a simpler page.',
		isRetryable: true,
	},
	{
		pattern: /Timeout \d+ms exceeded/i,
		category: 'timeout',
		message: () => 'Operation timed out.',
		suggestion: 'The operation took too long. Try a simpler action or wait and retry.',
		isRetryable: true,
	},
	{
		pattern: /waiting for selector/i,
		category: 'timeout',
		message: () => 'Timed out waiting for an element to appear.',
		suggestion: 'The element may not exist on this page. Check the page content and try a different selector or index.',
		isRetryable: true,
	},
	{
		pattern: /Element is not visible/i,
		category: 'element_not_interactable',
		message: () => 'The element exists but is not visible.',
		suggestion: 'Try scrolling to make the element visible, or use a different element.',
		isRetryable: true,
	},
	{
		pattern: /Element is not attached to the DOM/i,
		category: 'element_stale',
		message: () => 'The element reference is stale - the element was removed from the page.',
		suggestion: 'The page content has changed. Re-read the page and use updated element indices.',
		isRetryable: true,
	},
	{
		pattern: /Element is outside of the viewport/i,
		category: 'element_not_interactable',
		message: () => 'The element is outside the visible viewport.',
		suggestion: 'Scroll to bring the element into view before interacting with it.',
		isRetryable: true,
	},
	{
		pattern: /Element is not (?:enabled|editable)/i,
		category: 'element_not_interactable',
		message: () => 'The element is disabled or read-only.',
		suggestion: 'The element cannot be interacted with in its current state. Look for an alternative element or action.',
		isRetryable: false,
	},
	{
		pattern: /intercepts pointer events/i,
		category: 'element_not_interactable',
		message: () => 'Another element is covering the target element.',
		suggestion: 'An overlay or dialog may be blocking the click. Try closing it first, or use send_keys as an alternative.',
		isRetryable: true,
	},
	{
		pattern: /(?:Element|Node)\s+(?:\d+\s+)?not found/i,
		category: 'element_not_found',
		message: () => 'The specified element was not found on the page.',
		suggestion: 'The element index may be invalid. Re-read the page content to get updated element indices.',
		isRetryable: true,
	},
	{
		pattern: /frame was detached/i,
		category: 'element_stale',
		message: () => 'The frame containing the element has been detached.',
		suggestion: 'The page structure changed. Navigate to a stable page and retry.',
		isRetryable: true,
	},
	{
		pattern: /browser has been closed/i,
		category: 'crash',
		message: () => 'The browser has been closed unexpectedly.',
		suggestion: 'The browser session is no longer available.',
		isRetryable: false,
	},
	{
		pattern: /Target (?:page|context|browser) (?:closed|crashed)/i,
		category: 'crash',
		message: () => 'The browser page or context has crashed.',
		suggestion: 'The browser session is no longer available.',
		isRetryable: false,
	},
	{
		pattern: /Protocol error/i,
		category: 'crash',
		message: () => 'Browser protocol communication error.',
		suggestion: 'The browser may have crashed or become unresponsive.',
		isRetryable: false,
	},
	{
		pattern: /Permission denied|not allowed/i,
		category: 'permission',
		message: () => 'Permission denied for this operation.',
		suggestion: 'The action requires permissions that are not available. Try an alternative approach.',
		isRetryable: false,
	},
];

/**
 * Analyze a browser or tool error and return a structured interpretation
 * with a human-readable message, category, and actionable suggestion.
 */
export function classifyViewportError(error: unknown): InterpretedViewportError {
	const rawMessage = error instanceof Error ? error.message : String(error);

	// Check for known error types first
	if (error instanceof NavigationFailedError) {
		return {
			category: 'navigation',
			message: `Navigation failed for ${error.url}: ${rawMessage}`,
			suggestion: 'Check the URL for correctness and try again.',
			isRetryable: true,
		};
	}

	if (error instanceof ViewportCrashedError) {
		return {
			category: 'crash',
			message: rawMessage,
			suggestion: 'The browser has crashed and the session must be restarted.',
			isRetryable: false,
		};
	}

	if (error instanceof UrlBlockedError) {
		return {
			category: 'permission',
			message: rawMessage,
			suggestion: 'This URL is blocked by the allowed/blocked URL configuration. Use a different URL.',
			isRetryable: false,
		};
	}

	// Match against known patterns
	for (const entry of ERROR_PATTERNS) {
		const match = rawMessage.match(entry.pattern);
		if (match) {
			return {
				category: entry.category,
				message: entry.message(match),
				suggestion: entry.suggestion,
				isRetryable: entry.isRetryable,
			};
		}
	}

	// Unknown error - default interpretation
	return {
		category: 'unknown',
		message: rawMessage,
		suggestion: 'An unexpected error occurred. Try a different action or approach.',
		isRetryable: true,
	};
}


================================================
FILE: packages/core/src/commands/extraction/extractor.ts
================================================
import type { Page } from 'playwright';
import type { LanguageModel } from '../../model/interface.js';
import { z } from 'zod';
import {
	extractMarkdown,
	chunkText,
	extractLinks as extractPageLinks,
} from '../../page/content-extractor.js';
import { systemMessage, userMessage } from '../../model/messages.js';

const ExtractionResultSchema = z.object({
	content: z.string().describe('The extracted information'),
	confidence: z.number().min(0).max(1).describe('Confidence in the extraction (0-1)'),
});

type ExtractionResult = z.infer<typeof ExtractionResultSchema>;

export class ContentExtractor {
	private model: LanguageModel;

	constructor(model: LanguageModel) {
		this.model = model;
	}

	async extract(page: Page, goal: string, startFromChar?: number): Promise<string> {
		const markdown = await extractMarkdown(page, {
			startFromChar: startFromChar && startFromChar > 0 ? startFromChar : undefined,
		});

		if (!markdown.trim()) {
			return 'No content found on the page.';
		}

		// For short pages, extract directly
		if (markdown.length <= 8000) {
			return this.extractFromText(markdown, goal);
		}

		// For longer pages, chunk and extract from each chunk
		const chunks = chunkText(markdown, 6000);
		const results: string[] = [];

		for (const chunk of chunks) {
			const result = await this.extractFromText(chunk, goal);
			if (result && result !== 'No relevant information found.') {
				results.push(result);
			}
		}

		if (results.length === 0) {
			return 'No relevant information found on the page.';
		}

		if (results.length === 1) {
			return results[0];
		}

		// Combine results
		return this.combineExtractions(results, goal);
	}

	// ── Structured extraction ──

	/**
	 * Extract information from a page and validate against a Zod schema.
	 * The LLM is prompted to return JSON conforming to the schema, then the
	 * output is parsed/validated with Zod.
	 */
	async extractStructured<T>(
		page: Page,
		goal: string,
		schema: z.ZodType<T>,
	): Promise<T> {
		const markdown = await extractMarkdown(page);

		if (!markdown.trim()) {
			throw new Error('No content found on the page for structured extraction.');
		}

		// Build a JSON schema description for the prompt
		const schemaDescription =
			schema instanceof z.ZodObject
				? JSON.stringify(
						(schema as z.ZodObject<z.ZodRawShape>).shape,
						(_key, value) => {
							if (value?._def?.description) return `(${value._def.description})`;
							if (value?._def?.typeName) return value._def.typeName;
							return value;
						},
						2,
					)
				: 'See schema constraints';

		const text = markdown.length > 8000 ? markdown.slice(0, 8000) : markdown;

		const StructuredOutputSchema = z.object({
			result: z.string().describe('JSON string conforming to the requested schema'),
		});

		const response = await this.model.invoke({
			messages: [
				systemMessage(
					'You are a precise information extractor. Extract the requested information from the provided text and return it as a valid JSON string in the "result" field. The JSON must conform to the schema described below.',
				),
				userMessage(
					`Goal: ${goal}\n\nExpected schema:\n${schemaDescription}\n\nText content:\n${text}\n\nReturn the extracted data as a JSON string in the "result" field.`,
				),
			],
			responseSchema: StructuredOutputSchema,
			schemaName: 'StructuredOutput',
			temperature: 0,
		});

		const parsed = JSON.parse(response.parsed.result);
		return schema.parse(parsed);
	}

	// ── Link extraction ──

	/**
	 * Extract all links from a page, returning text, url, and whether external.
	 */
	async extractLinks(
		page: Page,
	): Promise<Array<{ text: string; url: string; isExternal: boolean }>> {
		return extractPageLinks(page);
	}

	// ── Text extraction with optional JSON schema ──

	async extractFromText(
		text: string,
		goal: string,
		outputJsonSchema?: Record<string, unknown>,
	): Promise<string> {
		// If a JSON schema is provided, ask the LLM to produce structured output
		if (outputJsonSchema) {
			return this.extractFromTextWithJsonSchema(text, goal, outputJsonSchema);
		}

		const result = await this.model.invoke({
			messages: [
				systemMessage(
					'You are a precise information extractor. Extract only the requested information from the provided text. Be concise and accurate.',
				),
				userMessage(
					`Goal: ${goal}\n\nText content:\n${text}\n\nExtract the information specified in the goal. If the information is not found, say "No relevant information found."`,
				),
			],
			responseSchema: ExtractionResultSchema,
			schemaName: 'ExtractionResult',
			temperature: 0,
		});

		return result.parsed.content;
	}

	// ── Private helpers ──

	private async extractFromTextWithJsonSchema(
		text: string,
		goal: string,
		jsonSchema: Record<string, unknown>,
	): Promise<string> {
		const schemaStr = JSON.stringify(jsonSchema, null, 2);

		const JsonExtractionSchema = z.object({
			json: z.string().describe('JSON conforming to the requested schema'),
		});

		const result = await this.model.invoke({
			messages: [
				systemMessage(
					'You are a precise information extractor. Extract the requested information and return it as valid JSON conforming to the provided schema. Put the JSON string in the "json" field.',
				),
				userMessage(
					`Goal: ${goal}\n\nRequired JSON schema:\n${schemaStr}\n\nText content:\n${text}\n\nExtract and return as JSON.`,
				),
			],
			responseSchema: JsonExtractionSchema,
			schemaName: 'JsonExtraction',
			temperature: 0,
		});

		// Validate the JSON parses correctly
		const parsed = JSON.parse(result.parsed.json);
		return JSON.stringify(parsed);
	}

	private async combineExtractions(results: string[], goal: string): Promise<string> {
		const combined = results.map((r, i) => `Part ${i + 1}:\n${r}`).join('\n\n');

		const result = await this.model.invoke({
			messages: [
				systemMessage(
					'Combine the following extracted information into a single coherent response. Remove duplicates and organize logically.',
				),
				userMessage(`Goal: ${goal}\n\nExtracted parts:\n${combined}`),
			],
			responseSchema: ExtractionResultSchema,
			schemaName: 'ExtractionResult',
			temperature: 0,
		});

		return result.parsed.content;
	}
}


================================================
FILE: packages/core/src/commands/index.ts
================================================
export { CommandExecutor, type CommandExecutorOptions, classifyViewportError } from './executor.js';
export { CommandCatalog } from './catalog/catalog.js';
export { ContentExtractor } from './extraction/extractor.js';
export { type CatalogEntry, type CatalogOptions } from './catalog/types.js';
export {
	CommandSchema,
	type Command,
	type CommandName,
	type CommandResult,
	type ExecutionContext,
	type CustomCommandSpec,
	type ViewportErrorCategory,
	type InterpretedViewportError,
	TapCommandSchema,
	TypeTextCommandSchema,
	NavigateCommandSchema,
	BackCommandSchema,
	ScrollCommandSchema,
	PressKeysCommandSchema,
	ExtractCommandSchema,
	FinishCommandSchema,
	FocusTabCommandSchema,
	NewTabCommandSchema,
	CloseTabCommandSchema,
	WebSearchCommandSchema,
	UploadCommandSchema,
	SelectCommandSchema,
	CaptureCommandSchema,
	ReadPageCommandSchema,
	WaitCommandSchema,
	ScrollToCommandSchema,
	FindCommandSchema,
	SearchCommandSchema,
	ListOptionsCommandSchema,
	PickOptionCommandSchema,
	ExtractStructuredCommandSchema,
} from './types.js';


================================================
FILE: packages/core/src/commands/types.ts
================================================
import { z } from 'zod';

// ── Individual action schemas ──

export const TapCommandSchema = z.object({
	action: z.literal('tap'),
	index: z.number().describe('Element index to click'),
	clickCount: z.number().optional().default(1).describe('Number of clicks'),
	coordinateX: z.number().optional().describe('X coordinate for coordinate-based clicking'),
	coordinateY: z.number().optional().describe('Y coordinate for coordinate-based clicking'),
});

export const TypeTextCommandSchema = z.object({
	action: z.literal('type_text'),
	index: z.number().describe('Element index to type into'),
	text: z.string().describe('Text to input'),
	clearFirst: z.boolean().optional().default(true).describe('Clear existing text first'),
});

export const NavigateCommandSchema = z.object({
	action: z.literal('navigate'),
	url: z.string().describe('URL to navigate to'),
});

export const BackCommandSchema = z.object({
	action: z.literal('back'),
});

export const ScrollCommandSchema = z.object({
	action: z.literal('scroll'),
	direction: z.enum(['up', 'down']).describe('Scroll direction'),
	amount: z.number().optional().describe('Scroll amount in pixels or pages'),
	index: z.number().optional().describe('Element index to scroll within'),
	pages: z.number().optional().describe('Number of pages to scroll (fractional allowed)'),
});

export const PressKeysCommandSchema = z.object({
	action: z.literal('press_keys'),
	keys: z.string().describe('Keys to send (e.g., "Enter", "Escape", "Control+a")'),
});

export const ExtractCommandSchema = z.object({
	action: z.literal('extract'),
	goal: z.string().describe('What information to extract from the page'),
	outputSchema: z.record(z.unknown()).optional().describe('Optional JSON schema for structured output'),
});

export const FinishCommandSchema = z.object({
	action: z.literal('finish'),
	text: z.string().describe('Final result text'),
	success: z.boolean().optional().default(true),
});

export const FocusTabCommandSchema = z.object({
	action: z.literal('focus_tab'),
	tabIndex: z.number().describe('Tab index to switch to'),
});

export const NewTabCommandSchema = z.object({
	action: z.literal('new_tab'),
	url: z.string().describe('URL to open in new tab'),
});

export const CloseTabCommandSchema = z.object({
	action: z.literal('close_tab'),
	tabIndex: z.number().optional().describe('Tab index to close (current if omitted)'),
});

export const WebSearchCommandSchema = z.object({
	action: z.literal('web_search'),
	query: z.string().describe('Search query'),
});

export const UploadCommandSchema = z.object({
	action: z.literal('upload'),
	index: z.number().describe('File input element index'),
	filePaths: z.array(z.string()).describe('File paths to upload'),
});

export const SelectCommandSchema = z.object({
	action: z.literal('select'),
	index: z.number().describe('Select element index'),
	value: z.string().describe('Option value to select'),
});

export const CaptureCommandSchema = z.object({
	action: z.literal('capture'),
	fullPage: z.boolean().optional().default(false),
});

export const ReadPageCommandSchema = z.object({
	action: z.literal('read_page'),
});

export const WaitCommandSchema = z.object({
	action: z.literal('wait'),
	seconds: z.number().optional().default(3).describe('Seconds to wait'),
});

// ── New action schemas ──

export const ScrollToCommandSchema = z.object({
	action: z.literal('scroll_to'),
	text: z.string().describe('Text to scroll to on the page'),
});

export const FindCommandSchema = z.object({
	action: z.literal('find'),
	query: z.string().describe('Description of elements to find (e.g., "all submit buttons")'),
});

export const SearchCommandSchema = z.object({
	action: z.literal('search'),
	query: z.string().describe('Search query'),
	engine: z.enum(['google', 'duckduckgo', 'bing']).optional().default('google'),
});

export const ListOptionsCommandSchema = z.object({
	action: z.literal('list_options'),
	index: z.number().describe('Select element index'),
});

export const PickOptionCommandSchema = z.object({
	action: z.literal('pick_option'),
	index: z.number().describe('Select element index'),
	optionText: z.string().describe('Text of the option to select'),
});

export const ExtractStructuredCommandSchema = z.object({
	action: z.literal('extract_structured'),
	goal: z.string().describe('Description of what data to extract from the page'),
	outputSchema: z
		.record(z.unknown())
		.describe(
			'JSON Schema describing the structure of the expected output. The LLM will return data conforming to this schema.',
		),
	maxContentLength: z
		.number()
		.optional()
		.default(8000)
		.describe('Maximum number of characters of page content to send to the LLM'),
});

// ── Discriminated union of all actions ──

export const CommandSchema = z.discriminatedUnion('action', [
	TapCommandSchema,
	TypeTextCommandSchema,
	NavigateCommandSchema,
	BackCommandSchema,
	ScrollCommandSchema,
	PressKeysCommandSchema,
	ExtractCommandSchema,
	FinishCommandSchema,
	FocusTabCommandSchema,
	NewTabCommandSchema,
	CloseTabCommandSchema,
	WebSearchCommandSchema,
	UploadCommandSchema,
	SelectCommandSchema,
	CaptureCommandSchema,
	ReadPageCommandSchema,
	WaitCommandSchema,
	ScrollToCommandSchema,
	FindCommandSchema,
	SearchCommandSchema,
	ListOptionsCommandSchema,
	PickOptionCommandSchema,
	ExtractStructuredCommandSchema,
]);

export type Command = z.infer<typeof CommandSchema>;

export type CommandName = Command['action'];

// ── Action result ──

export interface CommandResult {
	success: boolean;
	extractedContent?: string;
	error?: string;
	isDone?: boolean;
	includeInMemory?: boolean;
}

// ── Browser error categories ──

export type ViewportErrorCategory =
	| 'navigation'
	| 'element_not_found'
	| 'element_stale'
	| 'element_not_interactable'
	| 'timeout'
	| 'permission'
	| 'network'
	| 'crash'
	| 'unknown';

export interface InterpretedViewportError {
	category: ViewportErrorCategory;
	message: string;
	suggestion: string;
	isRetryable: boolean;
}

// ── Custom action definition ──

export interface CustomCommandSpec {
	name: string;
	description: string;
	schema: z.ZodObject<any>;
	handler: (params: Record<string, unknown>, context: ExecutionContext) => Promise<CommandResult>;
	terminatesSequence?: boolean;
}

export interface ExecutionContext {
	page: import('playwright').Page;
	cdpSession: import('playwright').CDPSession;
	domService: import('../page/page-analyzer.js').PageAnalyzer;
	browserSession: import('../viewport/viewport.js').Viewport;
	extractionLlm?: import('../model/interface.js').LanguageModel;
	fileSystem?: import('../sandbox/file-access.js').FileAccess;
	maskedValues?: Record<string, string>;
}


================================================
FILE: packages/core/src/commands/utils.ts
================================================
import type { Page } from 'playwright';

export async function scrollPage(
	page: Page,
	direction: 'up' | 'down',
	amount?: number,
): Promise<void> {
	const scrollAmount = amount ?? 500;
	const delta = direction === 'down' ? scrollAmount : -scrollAmount;

	await page.evaluate((d) => {
		window.scrollBy(0, d);
	}, delta);

	// Wait for scroll to complete
	await new Promise((resolve) => setTimeout(resolve, 200));
}

export async function scrollElement(
	page: Page,
	selector: string,
	direction: 'up' | 'down',
	amount?: number,
): Promise<void> {
	const scrollAmount = amount ?? 300;
	const delta = direction === 'down' ? scrollAmount : -scrollAmount;

	await page.evaluate(
		({ sel, d }) => {
			const el = document.querySelector(sel);
			if (el) el.scrollBy(0, d);
		},
		{ sel: selector, d: delta },
	);

	await new Promise((resolve) => setTimeout(resolve, 200));
}

export function buildGoogleSearchUrl(query: string): string {
	return `https://www.google.com/search?q=${encodeURIComponent(query)}&udm=14`;
}


================================================
FILE: packages/core/src/config/config.ts
================================================
import { config as loadDotenv } from 'dotenv';
import * as path from 'node:path';
import * as os from 'node:os';
import * as fs from 'node:fs';
import { type GlobalConfig, GlobalConfigSchema, type ConfigFileContents } from './types.js';
import type { DeepPartial } from '../types.js';
import { createLogger } from '../logging.js';

const logger = createLogger('config');

let _instance: Config | undefined;

export class Config {
	readonly config: GlobalConfig;

	private constructor(overrides: DeepPartial<GlobalConfig> = {}) {
		loadDotenv();

		// Load from config file first, then merge env and overrides
		const fileConfig = Config.loadConfigFile();
		const merged = this.deepMerge(
			this.mergeEnvDefaults({}),
			fileConfig,
			overrides,
		);
		this.config = GlobalConfigSchema.parse(merged);
	}

	static instance(overrides?: DeepPartial<GlobalConfig>): Config {
		if (!_instance) {
			_instance = new Config(overrides);
		}
		return _instance;
	}

	static reset(): void {
		_instance = undefined;
	}

	private mergeEnvDefaults(overrides: DeepPartial<GlobalConfig>): DeepPartial<GlobalConfig> {
		const env = process.env;

		const proxy = env.OPEN_BROWSER_PROXY_SERVER
			? {
					server: env.OPEN_BROWSER_PROXY_SERVER,
					username: env.OPEN_BROWSER_PROXY_USERNAME,
					password: env.OPEN_BROWSER_PROXY_PASSWORD,
				}
			: (env.HTTP_PROXY || env.HTTPS_PROXY)
				? { server: (env.HTTPS_PROXY || env.HTTP_PROXY)! }
				: undefined;

		return {
			browser: {
				headless: env.BROWSER_HEADLESS !== 'false',
				relaxedSecurity: env.BROWSER_DISABLE_SECURITY === 'true',
				browserBinaryPath: env.BROWSER_BINARY_PATH ?? undefined,
				userDataDir: env.BROWSER_USER_DATA_DIR ?? undefined,
				...(proxy ? { proxy } : {}),
				...overrides.browser,
			},
			tracePath: env.OPEN_BROWSER_TRACE_PATH ?? overrides.tracePath,
			recordingPath: env.OPEN_BROWSER_SAVE_RECORDING_PATH ?? overrides.recordingPath,
			...overrides,
		};
	}

	private deepMerge(...objects: DeepPartial<GlobalConfig>[]): DeepPartial<GlobalConfig> {
		const result: Record<string, unknown> = {};

		for (const obj of objects) {
			if (!obj) continue;
			for (const [key, value] of Object.entries(obj)) {
				if (
					value !== null &&
					value !== undefined &&
					typeof value === 'object' &&
					!Array.isArray(value) &&
					typeof result[key] === 'object' &&
					result[key] !== null &&
					!Array.isArray(result[key])
				) {
					result[key] = this.deepMerge(
						result[key] as DeepPartial<GlobalConfig>,
						value as DeepPartial<GlobalConfig>,
					);
				} else if (value !== undefined) {
					result[key] = value;
				}
			}
		}

		return result as DeepPartial<GlobalConfig>;
	}

	get browser() {
		return this.config.browser;
	}

	get agent() {
		return this.config.agent;
	}

	static get configDir(): string {
		const dir = path.join(os.homedir(), '.open-browser');
		if (!fs.existsSync(dir)) {
			fs.mkdirSync(dir, { recursive: true });
		}
		return dir;
	}

	static get tmpDir(): string {
		const dir = path.join(Config.configDir, 'tmp');
		if (!fs.existsSync(dir)) {
			fs.mkdirSync(dir, { recursive: true });
		}
		return dir;
	}

	static get configFilePath(): string {
		return path.join(Config.configDir, 'config.json');
	}

	static loadConfigFile(): DeepPartial<GlobalConfig> {
		try {
			const filePath = Config.configFilePath;
			if (fs.existsSync(filePath)) {
				const raw = fs.readFileSync(filePath, 'utf-8');
				const parsed = JSON.parse(raw) as ConfigFileContents;
				logger.debug(`Loaded config from ${filePath}`);
				return parsed;
			}
		} catch (error) {
			logger.warn(`Failed to load config file: ${error}`);
		}
		return {};
	}

	static saveConfigFile(config: ConfigFileContents): void {
		const filePath = Config.configFilePath;
		const dir = path.dirname(filePath);
		if (!fs.existsSync(dir)) {
			fs.mkdirSync(dir, { recursive: true });
		}
		fs.writeFileSync(filePath, JSON.stringify(config, null, 2), 'utf-8');
		logger.info(`Config saved to ${filePath}`);
	}

	static isDocker(): boolean {
		try {
			if (fs.existsSync('/.dockerenv')) return true;
			if (fs.existsSync('/proc/1/cgroup')) {
				const cgroup = fs.readFileSync('/proc/1/cgroup', 'utf-8');
				return cgroup.includes('docker') || cgroup.includes('kubepods');
			}
		} catch {
			// Not on Linux, definitely not Docker
		}
		return false;
	}

	static hasDisplay(): boolean {
		if (process.platform === 'win32') return true;
		if (process.platform === 'darwin') return true;
		return !!process.env.DISPLAY || !!process.env.WAYLAND_DISPLAY;
	}
}


================================================
FILE: packages/core/src/config/index.ts
================================================
export { Config } from './config.js';
export {
	type ViewportConfig,
	ViewportConfigSchema,
	type AgentConfig,
	AgentConfigSchema,
	type GlobalConfig,
	GlobalConfigSchema,
} from './types.js';


================================================
FILE: packages/core/src/config/types.ts
================================================
import { z } from 'zod';

export const ProxyConfigSchema = z.object({
	server: z.string(),
	username: z.string().optional(),
	password: z.string().optional(),
	bypass: z.array(z.string()).optional(),
});

export type ProxyConfig = z.infer<typeof ProxyConfigSchema>;

export const ViewportConfigSchema = z.object({
	headless: z.boolean().default(true),
	relaxedSecurity: z.boolean().default(false),
	extraChromiumArgs: z.array(z.string()).default([]),
	windowWidth: z.number().default(1280),
	windowHeight: z.number().default(1100),
	proxy: ProxyConfigSchema.optional(),
	minWaitPageLoadMs: z.number().default(500),
	waitForNetworkIdleMs: z.number().default(1000),
	maxWaitPageLoadMs: z.number().default(5000),
	cookieFile: z.string().optional(),
	minimumWaitBetweenActions: z.number().default(1000),
	maxErrorLength: z.number().default(400),
	commandsPerStep: z.number().default(10),
	browserBinaryPath: z.string().optional(),
	userDataDir: z.string().optional(),
	persistAfterClose: z.boolean().default(false),
	channelName: z.string().optional(),
	deterministicRendering: z.boolean().default(false),
	maxIframes: z.number().default(3),
	downloadsPath: z.string().optional(),
});

export type ViewportConfig = z.infer<typeof ViewportConfigSchema>;

export const AgentConfigSchema = z.object({
	stepLimit: z.number().default(100),
	commandsPerStep: z.number().default(10),
	failureThreshold: z.number().default(5),
	retryDelay: z.number().default(10),
	enableScreenshots: z.boolean().default(true),
	enableScreenshotsForTextExtraction: z.boolean().default(false),
	contextWindowSize: z.number().default(128000),
	inlineCommands: z.boolean().default(true),
	capturedAttributes: z.array(z.string()).default([
		'title',
		'type',
		'name',
		'role',
		'tabindex',
		'aria-label',
		'placeholder',
		'value',
		'alt',
		'aria-expanded',
	]),
	commandDelayMs: z.number().default(1),
	allowedUrls: z.array(z.string()).optional(),
	blockedUrls: z.array(z.string()).optional(),
	traceOutputPath: z.string().optional(),
	replayOutputPath: z.string().optional(),
	strategyInterval: z.number().default(0),
	plannerModel: z.any().optional(),
	enableStrategy: z.boolean().default(false),
	enableEvaluation: z.boolean().default(false),
	stepTimeout: z.number().default(60000),
	llmTimeout: z.number().default(30000),
	maxElementsInDom: z.number().default(2000),
	coordinateClicking: z.boolean().default(false),
	compactMode: z.boolean().default(false),
});

export type AgentConfig = z.infer<typeof AgentConfigSchema>;

export const GlobalConfigSchema = z.object({
	browser: ViewportConfigSchema.default({}),
	agent: AgentConfigSchema.default({}),
	tracePath: z.string().default('./traces'),
	recordingPath: z.string().default('./recordings'),
});

export type GlobalConfig = z.infer<typeof GlobalConfigSchema>;

export interface ConfigFileContents {
	browser?: Partial<ViewportConfig>;
	agent?: Partial<AgentConfig>;
	tracePath?: string;
	recordingPath?: string;
}


================================================
FILE: packages/core/src/errors.ts
================================================
export class OpenBrowserError extends Error {
	constructor(message: string, options?: ErrorOptions) {
		super(message, options);
		this.name = 'OpenBrowserError';
	}
}

export class ViewportError extends OpenBrowserError {
	constructor(message: string, options?: ErrorOptions) {
		super(message, options);
		this.name = 'ViewportError';
	}
}

export class LaunchFailedError extends ViewportError {
	constructor(message: string, options?: ErrorOptions) {
		super(message, options);
		this.name = 'LaunchFailedError';
	}
}

export class NavigationFailedError extends ViewportError {
	constructor(
		message: string,
		public readonly url: string,
		options?: ErrorOptions,
	) {
		super(message, options);
		this.name = 'NavigationFailedError';
	}
}

export class ViewportCrashedError extends ViewportError {
	constructor(message = 'Browser has crashed', options?: ErrorOptions) {
		super(message, options);
		this.name = 'ViewportCrashedError';
	}
}

export class AgentError extends OpenBrowserError {
	constructor(message: string, options?: ErrorOptions) {
		super(message, options);
		this.name = 'AgentError';
	}
}

export class AgentStalledError extends AgentError {
	constructor(message = 'Agent is stuck in a loop', options?: ErrorOptions) {
		super(message, options);
		this.name = 'AgentStalledError';
	}
}

export class StepLimitExceededError extends AgentError {
	public readonly stepsTaken: number;
	public readonly stepLimit: number;

	constructor(stepsTaken: number, stepLimit: number, options?: ErrorOptions) {
		super(`Agent reached maximum steps (${stepsTaken}/${stepLimit})`, options);
		this.name = 'StepLimitExceededError';
		this.stepsTaken = stepsTaken;
		this.stepLimit = stepLimit;
	}
}

export class UrlBlockedError extends OpenBrowserError {
	public readonly url: string;

	constructor(url: string, options?: ErrorOptions) {
		super(`URL not allowed: ${url}`, options);
		this.name = 'UrlBlockedError';
		this.url = url;
	}
}

export class PageExtractionError extends OpenBrowserError {
	constructor(message: string, options?: ErrorOptions) {
		super(message, options);
		this.name = 'PageExtractionError';
	}
}

export class ModelError extends OpenBrowserError {
	constructor(message: string, options?: ErrorOptions) {
		super(message, options);
		this.name = 'ModelError';
	}
}

export class ModelThrottledError extends ModelError {
	public readonly retryAfterMs?: number;

	constructor(message: string, retryAfterMs?: number, options?: ErrorOptions) {
		super(message, options);
		this.name = 'ModelThrottledError';
		this.retryAfterMs = retryAfterMs;
	}
}

export class CommandFailedError extends OpenBrowserError {
	public readonly toolName: string;

	constructor(toolName: string, message: string, options?: ErrorOptions) {
		super(`Tool "${toolName}" failed: ${message}`, options);
		this.name = 'CommandFailedError';
		this.toolName = toolName;
	}
}

export class ContextualViewportError extends ViewportError {
	public readonly pageUrl: string;
	public readonly pageTitle: string;
	public readonly stepNumber: number;

	constructor(
		message: string,
		context: { pageUrl: string; pageTitle: string; stepNumber: number },
		options?: ErrorOptions,
	) {
		super(
			`[Step ${context.stepNumber}] ${message} (url: ${context.pageUrl})`,
			options,
		);
		this.name = 'ContextualViewportError';
		this.pageUrl = context.pageUrl;
		this.pageTitle = context.pageTitle;
		this.stepNumber = context.stepNumber;
	}
}

export class ProviderError extends ModelError {
	public readonly provider: string;
	public readonly statusCode?: number;

	constructor(
		provider: string,
		message: string,
		statusCode?: number,
		options?: ErrorOptions,
	) {
		super(`[${provider}] ${message}`, options);
		this.name = 'ProviderError';
		this.provider = provider;
		this.statusCode = statusCode;
	}

	get isRetryable(): boolean {
		if (this.statusCode === undefined) return false;
		return this.statusCode === 429 || this.statusCode >= 500;
	}
}

export class SchemaViolationError extends OpenBrowserError {
	public readonly field: string;
	public readonly issues: string[];

	constructor(field: string, issues: string[], options?: ErrorOptions) {
		super(`Validation failed for "${field}": ${issues.join('; ')}`, options);
		this.name = 'SchemaViolationError';
		this.field = field;
		this.issues = issues;
	}
}


================================================
FILE: packages/core/src/index.ts
================================================
// ── Core types ──
export {
	type TargetId,
	type SessionId,
	type ElementRef,
	type TabId,
	targetId,
	sessionId,
	elementIndex,
	tabId,
	type Result,
	ok,
	err,
	type Position,
	type Rect,
	LogLevel,
	type DeepPartial,
	type Awaitable,
} from './types.js';

// ── Errors ──
export {
	OpenBrowserError,
	ViewportError,
	LaunchFailedError,
	NavigationFailedError,
	ViewportCrashedError,
	ContextualViewportError,
	AgentError,
	AgentStalledError,
	StepLimitExceededError,
	UrlBlockedError,
	PageExtractionError,
	ModelError,
	ModelThrottledError,
	CommandFailedError,
	ProviderError,
	SchemaViolationError,
} from './errors.js';

// ── Logging ──
export {
	Logger,
	createLogger,
	setGlobalLogLevel,
	getGlobalLogLevel,
	setLogColors,
	setLogTimestamps,
} from './logging.js';

// ── Observability ──
export {
	timed,
	withTiming,
	Stopwatch,
	type TimingResult,
} from './telemetry.js';

// ── Utils ──
export { generateId, matchesUrlPattern, isUrlPermitted, sleep, withDeadline, Timer } from './utils.js';

// ── Config ──
export { Config } from './config/index.js';
export type { ViewportConfig, AgentConfig as AgentConfigSchema, GlobalConfig } from './config/index.js';

// ── LLM ──
export {
	type LanguageModel,
	type InferenceOptions,
	type ModelProvider,
	type InferenceResult,
	type InferenceUsage,
	type Message,
	type SystemMessage,
	type UserMessage,
	type AssistantMessage,
	type ToolResultMessage,
	type ToolCall,
	type ContentPart,
	type TextContent,
	type ImageContent,
	systemMessage,
	userMessage,
	assistantMessage,
	toolResultMessage,
	textContent,
	imageContent,
	VercelModelAdapter,
	type VercelModelAdapterOptions,
	zodToJsonSchema,
	optimizeSchemaForModel,
	optimizeJsonSchemaForModel,
	type SchemaOptimizationOptions,
} from './model/index.js';

// ── Browser ──
export {
	Viewport,
	type ViewportOptions,
	LaunchProfile,
	EventHub,
	BaseGuard,
	type GuardContext,
	VisualTracer,
	type VisualTracerOptions,
	type TabDescriptor,
	type ViewportSnapshot,
	type ViewportHistory,
	type LaunchOptions,
	type PageState,
	type ViewportEventMap,
	type ViewportRequestMap,
	type NavigateEvent,
	type ClickEvent,
	type InputEvent,
	type ScrollEvent,
	type ScreenshotEvent,
	type ScreenshotResult,
	type DownloadEvent,
	type PopupEvent,
	type SecurityEvent,
	type CrashEvent,
} from './viewport/index.js';

// ── DOM ──
export {
	PageAnalyzer,
	type PageAnalyzerOptions,
	SnapshotBuilder,
	TreeRenderer,
	type RendererOptions,
	extractMarkdown,
	htmlToMarkdown,
	extractTextContent,
	extractLinks,
	chunkText,
	type MarkdownExtractionOptions,
	type PageTreeNode,
	type SelectorIndex,
	type RenderedPageState,
	type DOMRect,
	type CDPSnapshotResult,
	type AXNode,
	type TargetInfo,
	type TargetAllTrees,
	type InteractedElement,
	type MatchLevel,
	type SimplifiedNode,
} from './page/index.js';

// ── FileAccess ──
export {
	FileAccess,
	type FileAccessOptions,
	type FileInfo,
	type FileAccessState,
} from './sandbox/index.js';

// ── Commands ──
export {
	CommandExecutor,
	type CommandExecutorOptions,
	classifyViewportError,
	CommandCatalog,
	ContentExtractor,
	type CatalogEntry,
	type CatalogOptions,
	CommandSchema,
	type Command,
	type CommandName,
	type CommandResult,
	type ExecutionContext,
	type CustomCommandSpec,
	type ViewportErrorCategory,
	type InterpretedViewportError,
	TapCommandSchema,
	TypeTextCommandSchema,
	NavigateCommandSchema,
	BackCommandSchema,
	ScrollCommandSchema,
	PressKeysCommandSchema,
	ExtractCommandSchema,
	FinishCommandSchema,
	FocusTabCommandSchema,
	NewTabCommandSchema,
	CloseTabCommandSchema,
	WebSearchCommandSchema,
	UploadCommandSchema,
	SelectCommandSchema,
	CaptureCommandSchema,
	ReadPageCommandSchema,
	WaitCommandSchema,
	ScrollToCommandSchema,
	FindCommandSchema,
	SearchCommandSchema,
	ListOptionsCommandSchema,
	PickOptionCommandSchema,
	ExtractStructuredCommandSchema,
} from './commands/index.js';

// ── Agent ──
export {
	Agent,
	type AgentOptions,
	InstructionBuilder,
	StepPromptBuilder,
	buildCommandDescriptions,
	buildContextualCommands,
	buildExtractionInstructionBuilder,
	buildExtractionUserPrompt,
	clearTemplateCache,
	type PromptTemplate,
	type InstructionBuilderOptions,
	type StepInfo,
	type StepPromptBuilderOptions,
	ConversationManager,
	StallDetector,
	hashPageTree,
	hashTextContent,
	type PageSignature,
	type StallDetectorConfig,
	type StallCheckResult,
	ResultEvaluator,
	constructEvaluatorMessages,
	constructQuickCheckMessages,
	ReplayRecorder,
	type ReplayRecorderOptions,
	type AgentConfig,
	type AgentState,
	type AgentDecision,
	type AgentDecisionCompact,
	type AgentDecisionDirect,
	type StepRecord,
	ExecutionLog,
	type RunOutcome,
	type Reasoning,
	type PlanStep,
	type EvaluationResult,
	type QuickCheckResult,
	type CompactionPolicy,
	type StepTelemetry,
	type ExtractedVariable,
	type AccumulatedCost,
	type StepCostBreakdown,
	type PricingTable as AgentPricingTable,
	type PlanRevision,
	AgentDecisionSchema,
	AgentDecisionCompactSchema,
	AgentDecisionDirectSchema,
	ReasoningSchema,
	EvaluationResultSchema,
	QuickCheckResultSchema,
	PlanStepSchema,
	StrategyPlanSchema,
	PlanRevisionSchema,
	PRICING_TABLE,
	calculateStepCost,
	supportsDeepReasoning,
	supportsCoordinateMode,
	isCompactModel,
	DEFAULT_AGENT_CONFIG,
	type ConversationManagerOptions,
	type TrackedMessage,
	type ConversationManagerState,
	type ConversationEntry,
	type SerializedTrackedMessage,
	type MessageCategory,
	estimateTokens,
	estimateMessageTokens,
	redactSensitiveValues,
	redactMessage,
	redactMessages,
	extractTextContent as extractMessageTextContent,
	truncate,
} from './agent/index.js';

// ── Bridge ──
export { BridgeServer, type BridgeServerOptions, BridgeClient, type BridgeClientOptions, BridgeAdapter } from './bridge/index.js';

// ── Metering ──
export {
	UsageMeter,
	CompositeUsageMeter,
	BudgetDepletedError,
	estimateTokenCount,
	DEFAULT_COST_RATES,
	type UsageRecord,
	type CostRates,
	type PricingTable,
	type ModelRole,
	type ActionUsageRecord,
	type MeteringSummary,
	type ModelUsageBreakdown,
	type RoleUsageBreakdown,
	type BudgetPolicy,
	type BudgetState,
} from './metering/index.js';


================================================
FILE: packages/core/src/logging.ts
================================================
import { LogLevel } from './types.js';

const LEVEL_NAMES: Record<number, string> = {
	[LogLevel.DEBUG]: 'DEBUG',
	[LogLevel.INFO]: 'INFO',
	[LogLevel.WARN]: 'WARN',
	[LogLevel.ERROR]: 'ERROR',
};

const LEVEL_COLORS: Record<number, string> = {
	[LogLevel.DEBUG]: '\x1b[36m', // cyan
	[LogLevel.INFO]: '\x1b[32m',  // green
	[LogLevel.WARN]: '\x1b[33m',  // yellow
	[LogLevel.ERROR]: '\x1b[31m', // red
};

const RESET = '\x1b[0m';
const DIM = '\x1b[2m';
const BOLD = '\x1b[1m';

let globalLevel: LogLevel = LogLevel.INFO;
let useColors = true;
let logTimestamps = true;

export function setGlobalLogLevel(level: LogLevel): void {
	globalLevel = level;
}

export function getGlobalLogLevel(): LogLevel {
	return globalLevel;
}

export function setLogColors(enabled: boolean): void {
	useColors = enabled;
}

export function setLogTimestamps(enabled: boolean): void {
	logTimestamps = enabled;
}

function formatTimestamp(): string {
	const now = new Date();
	const h = now.getHours().toString().padStart(2, '0');
	const m = now.getMinutes().toString().padStart(2, '0');
	const s = now.getSeconds().toString().padStart(2, '0');
	const ms = now.getMilliseconds().toString().padStart(3, '0');
	return `${h}:${m}:${s}.${ms}`;
}

function formatMessage(
	level: LogLevel,
	name: string,
	message: string,
): string {
	const parts: string[] = [];

	if (logTimestamps) {
		const ts = formatTimestamp();
		parts.push(useColors ? `${DIM}${ts}${RESET}` : ts);
	}

	const levelName = LEVEL_NAMES[level] ?? 'UNKNOWN';
	const color = LEVEL_COLORS[level] ?? '';

	if (useColors) {
		parts.push(`${color}${levelName.padEnd(5)}${RESET}`);
		parts.push(`${BOLD}[${name}]${RESET}`);
	} else {
		parts.push(levelName.padEnd(5));
		parts.push(`[${name}]`);
	}

	parts.push(message);
	return parts.join(' ');
}

export class Logger {
	readonly name: string;
	private level: LogLevel | null = null;

	constructor(name: string) {
		this.name = name;
	}

	setLevel(level: LogLevel): void {
		this.level = level;
	}

	getEffectiveLevel(): LogLevel {
		return this.level ?? globalLevel;
	}

	isEnabled(level: LogLevel): boolean {
		return level >= this.getEffectiveLevel();
	}

	debug(message: string, ...args: unknown[]): void {
		this.log(LogLevel.DEBUG, message, ...args);
	}

	info(message: string, ...args: unknown[]): void {
		this.log(LogLevel.INFO, message, ...args);
	}

	warn(message: string, ...args: unknown[]): void {
		this.log(LogLevel.WARN, message, ...args);
	}

	error(message: string, ...args: unknown[]): void {
		this.log(LogLevel.ERROR, message, ...args);
	}

	private log(level: LogLevel, message: string, ...args: unknown[]): void {
		if (!this.isEnabled(level)) return;

		const formatted = formatMessage(level, this.name, message);

		switch (level) {
			case LogLevel.ERROR:
				console.error(formatted, ...args);
				break;
			case LogLevel.WARN:
				console.warn(formatted, ...args);
				break;
			default:
				console.log(formatted, ...args);
		}
	}
}

const loggerCache = new Map<string, Logger>();

export function createLogger(name: string): Logger {
	let logger = loggerCache.get(name);
	if (!logger) {
		logger = new Logger(name);
		loggerCache.set(name, logger);
	}
	return logger;
}


================================================
FILE: packages/core/src/metering/index.ts
================================================
export { UsageMeter, CompositeUsageMeter, BudgetDepletedError, estimateTokenCount } from './tracker.js';
export {
	DEFAULT_COST_RATES,
	type UsageRecord,
	type CostRates,
	type PricingTable,
	type ModelRole,
	type ActionUsageRecord,
	type MeteringSummary,
	type ModelUsageBreakdown,
	type RoleUsageBreakdown,
	type BudgetPolicy,
	type BudgetState,
} from './types.js';


================================================
FILE: packages/core/src/metering/tracker.test.ts
================================================
import { test, expect, describe, beforeEach, mock } from 'bun:test';
import {
	UsageMeter,
	CompositeUsageMeter,
	BudgetDepletedError,
	estimateTokenCount,
} from './tracker.js';
import type { PricingTable } from './types.js';

// ── Shared pricing for predictable cost calculations ──

const TEST_PRICING: PricingTable = {
	'gpt-4o': { inputCostPerMillion: 2.5, outputCostPerMillion: 10.0 },
	'gpt-4o-mini': { inputCostPerMillion: 0.15, outputCostPerMillion: 0.6 },
	'claude-3-5-sonnet': { inputCostPerMillion: 3.0, outputCostPerMillion: 15.0 },
};

// ── UsageMeter ──

describe('UsageMeter', () => {
	let tracker: UsageMeter;

	beforeEach(() => {
		tracker = new UsageMeter('gpt-4o', TEST_PRICING);
	});

	describe('record and getTotalUsage', () => {
		test('records token usage and returns totals', () => {
			tracker.record(100, 50);

			const usage = tracker.getTotalUsage();
			expect(usage.inputTokens).toBe(100);
			expect(usage.outputTokens).toBe(50);
			expect(usage.totalTokens).toBe(150);
		});

		test('accumulates across multiple records', () => {
			tracker.record(100, 50);
			tracker.record(200, 100);
			tracker.record(300, 150);

			const usage = tracker.getTotalUsage();
			expect(usage.inputTokens).toBe(600);
			expect(usage.outputTokens).toBe(300);
			expect(usage.totalTokens).toBe(900);
		});

		test('returns a copy of usage object', () => {
			tracker.record(100, 50);
			const usage1 = tracker.getTotalUsage();
			const usage2 = tracker.getTotalUsage();
			expect(usage1).not.toBe(usage2);
			expect(usage1).toEqual(usage2);
		});
	});

	describe('getEstimatedCost', () => {
		test('computes correct cost for gpt-4o', () => {
			// gpt-4o: $2.50/M input, $10.00/M output
			tracker.record(1_000_000, 500_000);

			const cost = tracker.getEstimatedCost();
			// input: 1M * 2.5/M = 2.5; output: 0.5M * 10/M = 5.0
			expect(cost).toBeCloseTo(7.5, 4);
		});

		test('returns 0 for unknown model', () => {
			const unknown = new UsageMeter('unknown-model', TEST_PRICING);
			unknown.record(1000, 500);

			expect(unknown.getEstimatedCost()).toBe(0);
		});

		test('formats cost as dollar string', () => {
			tracker.record(100_000, 50_000);
			const formatted = tracker.getEstimatedCostFormatted();
			expect(formatted).toMatch(/^\$\d+\.\d{4}$/);
		});
	});

	describe('getStepUsages', () => {
		test('tracks per-step usage', () => {
			tracker.record(100, 50);
			tracker.record(200, 100);

			const steps = tracker.getStepUsages();
			expect(steps).toHaveLength(2);
			expect(steps[0]).toEqual({ inputTokens: 100, outputTokens: 50, totalTokens: 150 });
			expect(steps[1]).toEqual({ inputTokens: 200, outputTokens: 100, totalTokens: 300 });
		});

		test('returns a copy of step usages array', () => {
			tracker.record(100, 50);
			const steps1 = tracker.getStepUsages();
			const steps2 = tracker.getStepUsages();
			expect(steps1).not.toBe(steps2);
		});
	});

	describe('getSummary', () => {
		test('returns formatted summary string', () => {
			tracker.record(1000, 500);

			const summary = tracker.getSummary();
			expect(summary).toContain('Model: gpt-4o');
			expect(summary).toContain('Steps: 1');
			expect(summary).toContain('Input tokens:');
			expect(summary).toContain('Output tokens:');
			expect(summary).toContain('Total tokens:');
			expect(summary).toContain('Estimated cost: $');
		});
	});

	describe('reset', () => {
		test('resets all usage data', () => {
			tracker.record(1000, 500);
			tracker.record(2000, 1000);

			tracker.reset();

			const usage = tracker.getTotalUsage();
			expect(usage.inputTokens).toBe(0);
			expect(usage.outputTokens).toBe(0);
			expect(usage.totalTokens).toBe(0);
			expect(tracker.getStepUsages()).toHaveLength(0);
			expect(tracker.getEstimatedCost()).toBe(0);
		});
	});

	describe('partial model matching', () => {
		test('matches model by partial ID', () => {
			// "gpt-4o" pricing should match "gpt-4o-2024-08-06" via partial match
			const versioned = new UsageMeter('gpt-4o-2024-08-06', TEST_PRICING);
			versioned.record(1_000_000, 0);

			// Should find gpt-4o pricing ($2.50/M input)
			expect(versioned.getEstimatedCost()).toBeCloseTo(2.5, 4);
		});
	});
});

// ── CompositeUsageMeter ──

describe('CompositeUsageMeter', () => {
	let multiTracker: CompositeUsageMeter;

	beforeEach(() => {
		multiTracker = new CompositeUsageMeter(TEST_PRICING);
	});

	describe('record and getTotalUsage', () => {
		test('records usage for a single model', () => {
			multiTracker.record({
				modelId: 'gpt-4o',
				role: 'main',
				inputTokens: 1000,
				outputTokens: 500,
			});

			const usage = multiTracker.getTotalUsage();
			expect(usage.inputTokens).toBe(1000);
			expect(usage.outputTokens).toBe(500);
			expect(usage.totalTokens).toBe(1500);
		});

		test('aggregates across multiple models', () => {
			multiTracker.record({
				modelId: 'gpt-4o',
				role: 'main',
				inputTokens: 1000,
				outputTokens: 500,
			});
			multiTracker.record({
				modelId: 'gpt-4o-mini',
				role: 'extraction',
				inputTokens: 2000,
				outputTokens: 800,
			});

			const usage = multiTracker.getTotalUsage();
			expect(usage.inputTokens).toBe(3000);
			expect(usage.outputTokens).toBe(1300);
			expect(usage.totalTokens).toBe(4300);
		});

		test('returns estimated cost for the recorded call', () => {
			const cost = multiTracker.record({
				modelId: 'gpt-4o',
				role: 'main',
				inputTokens: 1_000_000,
				outputTokens: 0,
			});

			// gpt-4o: $2.50/M input
			expect(cost).toBeCloseTo(2.5, 4);
		});
	});

	describe('getTotalCost', () => {
		test('sums costs across all models', () => {
			multiTracker.record({
				modelId: 'gpt-4o',
				role: 'main',
				inputTokens: 1_000_000,
				outputTokens: 0,
			});
			multiTracker.record({
				modelId: 'gpt-4o-mini',
				role: 'extraction',
				inputTokens: 1_000_000,
				outputTokens: 0,
			});

			const totalCost = multiTracker.getTotalCost();
			// gpt-4o: $2.50; gpt-4o-mini: $0.15
			expect(totalCost).toBeCloseTo(2.65, 4);
		});

		test('formats total cost', () => {
			multiTracker.record({
				modelId: 'gpt-4o',
				role: 'main',
				inputTokens: 100_000,
				outputTokens: 50_000,
			});

			const formatted = multiTracker.getTotalCostFormatted();
			expect(formatted).toMatch(/^\$\d+\.\d{4}$/);
		});
	});

	describe('getTracker', () => {
		test('returns per-model tracker', () => {
			multiTracker.record({
				modelId: 'gpt-4o',
				role: 'main',
				inputTokens: 500,
				outputTokens: 200,
			});

			const tracker = multiTracker.getTracker('gpt-4o');
			expect(tracker.getTotalUsage().inputTokens).toBe(500);
		});

		test('creates tracker on first access', () => {
			const tracker = multiTracker.getTracker('claude-3-5-sonnet');
			expect(tracker).toBeDefined();
			expect(tracker.getTotalUsage().totalTokens).toBe(0);
		});
	});

	describe('budget alerts', () => {
		test('fires threshold callback when cost crosses threshold', () => {
			const thresholdCrossed = mock(() => {});
			multiTracker.setBudget({
				maxCostUsd: 1.0,
				thresholds: [0.5, 0.8, 1.0],
				onThresholdCrossed: thresholdCrossed,
			});

			// Record enough to cross 0.5 threshold ($0.50)
			// gpt-4o: $2.50/M input -> need 200k tokens for $0.50
			multiTracker.record({
				modelId: 'gpt-4o',
				role: 'main',
				inputTokens: 200_000,
				outputTokens: 0,
			});

			expect(thresholdCrossed).toHaveBeenCalledTimes(1);
			const call = (thresholdCrossed as any).mock.calls[0];
			expect(call[1]).toBe(0.5); // threshold
			expect(call[2]).toBe(1.0); // maxCost
		});

		test('fires multiple thresholds as cost increases', () => {
			const thresholdCrossed = mock(() => {});
			multiTracker.setBudget({
				maxCostUsd: 1.0,
				thresholds: [0.5, 1.0],
				onThresholdCrossed: thresholdCrossed,
			});

			// Cross 0.5 threshold
			multiTracker.record({
				modelId: 'gpt-4o',
				role: 'main',
				inputTokens: 200_000,
				outputTokens: 0,
			});

			// Cross 1.0 threshold
			multiTracker.record({
				modelId: 'gpt-4o',
				role: 'main',
				inputTokens: 200_000,
				outputTokens: 0,
			});

			expect(thresholdCrossed).toHaveBeenCalledTimes(2);
		});

		test('does not fire same threshold twice', () => {
			const thresholdCrossed = mock(() => {});
			multiTracker.setBudget({
				maxCostUsd: 1.0,
				thresholds: [0.5],
				onThresholdCrossed: thresholdCrossed,
			});

			// Cross 0.5 threshold twice
			multiTracker.record({
				modelId: 'gpt-4o',
				role: 'main',
				inputTokens: 200_000,
				outputTokens: 0,
			});
			multiTracker.record({
				modelId: 'gpt-4o',
				role: 'main',
				inputTokens: 10_000,
				outputTokens: 0,
			});

			expect(thresholdCrossed).toHaveBeenCalledTimes(1);
		});

		test('throws BudgetDepletedError when budget exceeded and callback returns false', () => {
			multiTracker.setBudget({
				maxCostUsd: 0.01,
				thresholds: [1.0],
				onThresholdCrossed: () => {},
				onBudgetExhausted: () => false,
			});

			expect(() =>
				multiTracker.record({
					modelId: 'gpt-4o',
					role: 'main',
					inputTokens: 1_000_000,
					outputTokens: 0,
				}),
			).toThrow(BudgetDepletedError);
		});

		test('allows continuing when onBudgetExhausted returns true', () => {
			multiTracker.setBudget({
				maxCostUsd: 0.01,
				thresholds: [1.0],
				onThresholdCrossed: () => {},
				onBudgetExhausted: () => true,
			});

			expect(() =>
				multiTracker.record({
					modelId: 'gpt-4o',
					role: 'main',
					inputTokens: 1_000_000,
					outputTokens: 0,
				}),
			).not.toThrow();
		});

		test('getBudgetState reflects current state', () => {
			multiTracker.setBudget({
				maxCostUsd: 10.0,
				thresholds: [0.5],
				onThresholdCrossed: () => {},
			});

			multiTracker.record({
				modelId: 'gpt-4o',
				role: 'main',
				inputTokens: 1_000_000,
				outputTokens: 0,
			});

			const status = multiTracker.getBudgetState();
			expect(status.maxCostUsd).toBe(10.0);
			expect(status.currentCostUsd).toBeCloseTo(2.5, 2);
			expect(status.fractionUsed).toBeCloseTo(0.25, 2);
			expect(status.isExhausted).toBe(false);
		});

		test('clearBudget removes budget configuration', () => {
			multiTracker.setBudget({
				maxCostUsd: 1.0,
				thresholds: [0.5],
				onThresholdCrossed: () => {},
			});

			multiTracker.clearBudget();

			const status = multiTracker.getBudgetState();
			expect(status.maxCostUsd).toBeUndefined();
			expect(status.fractionUsed).toBeUndefined();
			expect(status.isExhausted).toBe(false);
		});
	});

	describe('MeteringSummary generation', () => {
		test('generates comprehensive summary', () => {
			multiTracker.record({
				modelId: 'gpt-4o',
				role: 'main',
				inputTokens: 1000,
				outputTokens: 500,
				stepIndex: 0,
				actionName: 'tap',
			});
			multiTracker.record({
				modelId: 'gpt-4o-mini',
				role: 'extraction',
				inputTokens: 2000,
				outputTokens: 300,
				stepIndex: 1,
				actionName: 'extract',
			});

			const summary = multiTracker.getSummary();

			expect(summary.totalInputTokens).toBe(3000);
			expect(summary.totalOutputTokens).toBe(800);
			expect(summary.totalTokens).toBe(3800);
			expect(summary.totalCalls).toBe(2);
			expect(summary.totalEstimatedCost).toBeGreaterThan(0);

			// By model breakdown
			expect(summary.byModel).toHaveLength(2);
			const gpt4o = summary.byModel.find((m) => m.modelId === 'gpt-4o');
			expect(gpt4o).toBeDefined();
			expect(gpt4o!.inputTokens).toBe(1000);
			expect(gpt4o!.callCount).toBe(1);

			// By role breakdown
			expect(summary.byRole).toHaveLength(2);
			const mainRole = summary.byRole.find((r) => r.role === 'main');
			expect(mainRole).toBeDefined();
			expect(mainRole!.callCount).toBe(1);

			// Action trace
			expect(summary.actionTrace).toHaveLength(2);
			expect(summary.actionTrace[0].actionName).toBe('tap');
			expect(summary.actionTrace[1].actionName).toBe('extract');

			// Duration
			expect(summary.durationMs).toBeDefined();
			expect(summary.durationMs!).toBeGreaterThanOrEqual(0);
		});

		test('generates human-readable summary text', () => {
			multiTracker.record({
				modelId: 'gpt-4o',
				role: 'main',
				inputTokens: 10000,
				outputTokens: 5000,
			});

			const text = multiTracker.getSummaryText();
			expect(text).toContain('Token Usage Summary');
			expect(text).toContain('Total:');
			expect(text).toContain('Cost:');
			expect(text).toContain('Calls:');
			expect(text).toContain('Duration:');
			expect(text).toContain('By Role');
			expect(text).toContain('By Model');
		});

		test('includes budget info in summary text when configured', () => {
			multiTracker.setBudget({
				maxCostUsd: 5.0,
				thresholds: [],
				onThresholdCrossed: () => {},
			});

			multiTracker.record({
				modelId: 'gpt-4o',
				role: 'main',
				inputTokens: 100_000,
				outputTokens: 0,
			});

			const text = multiTracker.getSummaryText();
			expect(text).toContain('Budget:');
			expect(text).toContain('$5.0000');
		});
	});

	describe('reset', () => {
		test('clears all tracking data', () => {
			multiTracker.record({
				modelId: 'gpt-4o',
				role: 'main',
				inputTokens: 1000,
				outputTokens: 500,
			});
			multiTracker.record({
				modelId: 'gpt-4o-mini',
				role: 'extraction',
				inputTokens: 500,
				outputTokens: 200,
			});

			multiTracker.reset();

			const usage = multiTracker.getTotalUsage();
			expect(usage.totalTokens).toBe(0);
			expect(multiTracker.getTotalCost()).toBe(0);

			const summary = multiTracker.getSummary();
			expect(summary.totalCalls).toBe(0);
			expect(summary.byModel).toHaveLength(0);
			expect(summary.byRole).toHaveLength(0);
			expect(summary.durationMs).toBeUndefined();
		});

		test('resets budget thresholds', () => {
			const thresholdCrossed = mock(() => {});
			multiTracker.setBudget({
				maxCostUsd: 1.0,
				thresholds: [0.5],
				onThresholdCrossed: thresholdCrossed,
			});

			// Cross 0.5 threshold
			multiTracker.record({
				modelId: 'gpt-4o',
				role: 'main',
				inputTokens: 200_000,
				outputTokens: 0,
			});

			multiTracker.reset();

			// Record again -- should fire threshold callback again since it was reset
			// But reset() clears crossedThresholds AND trackers, so cost starts at 0
			multiTracker.record({
				modelId: 'gpt-4o',
				role: 'main',
				inputTokens: 200_000,
				outputTokens: 0,
			});

			// Both before and after reset should have fired
			expect(thresholdCrossed).toHaveBeenCalledTimes(2);
		});
	});

	describe('auto-start', () => {
		test('automatically starts timer on first record', () => {
			const summary1 = multiTracker.getSummary();
			expect(summary1.durationMs).toBeUndefined();

			multiTracker.record({
				modelId: 'gpt-4o',
				role: 'main',
				inputTokens: 100,
				outputTokens: 50,
			});

			const summary2 = multiTracker.getSummary();
			expect(summary2.durationMs).toBeDefined();
		});

		test('explicit start() sets the timer', () => {
			multiTracker.start();

			const summary = multiTracker.getSummary();
			expect(summary.durationMs).toBeDefined();
			expect(summary.durationMs!).toBeGreaterThanOrEqual(0);
		});
	});
});

// ── estimateTokenCount ──

describe('estimateTokenCount', () => {
	test('estimates roughly 1 token per 4 chars', () => {
		expect(estimateTokenCount('hello world')).toBe(3); // ceil(11/4)
	});

	test('returns 0 for empty string', () => {
		expect(estimateTokenCount('')).toBe(0);
	});

	test('rounds up', () => {
		expect(estimateTokenCount('a')).toBe(1); // ceil(1/4) = 1
	});
});

// ── BudgetDepletedError ──

describe('BudgetDepletedError', () => {
	test('has correct properties', () => {
		const error = new BudgetDepletedError(5.5, 5.0);
		expect(error.name).toBe('BudgetDepletedError');
		expect(error.currentCost).toBe(5.5);
		expect(error.maxCost).toBe(5.0);
		expect(error.message).toContain('$5.5000');
		expect(error.message).toContain('$5.0000');
	});

	test('is instanceof Error', () => {
		const error = new BudgetDepletedError(1, 1);
		expect(error instanceof Error).toBe(true);
	});
});


================================================
FILE: packages/core/src/metering/tracker.ts
================================================
import type {
	UsageRecord,
	CostRates,
	PricingTable,
	ModelRole,
	ActionUsageRecord,
	MeteringSummary,
	ModelUsageBreakdown,
	RoleUsageBreakdown,
	BudgetPolicy,
	BudgetState,
} from './types.js';
import { DEFAULT_COST_RATES } from './types.js';

// ── Single-model tracker (unchanged public API) ──

export class UsageMeter {
	private usage: UsageRecord = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
	private pricing: PricingTable;
	private modelId: string;
	private stepUsages: UsageRecord[] = [];

	constructor(modelId: string, customPricing?: PricingTable) {
		this.modelId = modelId;
		this.pricing = customPricing ?? DEFAULT_COST_RATES;
	}

	record(inputTokens: number, outputTokens: number): void {
		const stepUsage: UsageRecord = {
			inputTokens,
			outputTokens,
			totalTokens: inputTokens + outputTokens,
		};

		this.usage.inputTokens += inputTokens;
		this.usage.outputTokens += outputTokens;
		this.usage.totalTokens += inputTokens + outputTokens;
		this.stepUsages.push(stepUsage);
	}

	getTotalUsage(): UsageRecord {
		return { ...this.usage };
	}

	getStepUsages(): UsageRecord[] {
		return [...this.stepUsages];
	}

	getEstimatedCost(): number {
		const cost = this.getModelCost();
		if (!cost) return 0;

		return (
			(this.usage.inputTokens / 1_000_000) * cost.inputCostPerMillion +
			(this.usage.outputTokens / 1_000_000) * cost.outputCostPerMillion
		);
	}

	getEstimatedCostFormatted(): string {
		const cost = this.getEstimatedCost();
		return `$${cost.toFixed(4)}`;
	}

	private getModelCost(): CostRates | undefined {
		return resolveModelCost(this.modelId, this.pricing);
	}

	getSummary(): string {
		const lines = [
			`Model: ${this.modelId}`,
			`Steps: ${this.stepUsages.length}`,
			`Input tokens: ${this.usage.inputTokens.toLocaleString()}`,
			`Output tokens: ${this.usage.outputTokens.toLocaleString()}`,
			`Total tokens: ${this.usage.totalTokens.toLocaleString()}`,
			`Estimated cost: ${this.getEstimatedCostFormatted()}`,
		];
		return lines.join('\n');
	}

	reset(): void {
		this.usage = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
		this.stepUsages = [];
	}
}

// ── Multi-model tracker ──

/**
 * Tracks token usage across multiple LLM roles (main, extraction, judge, compaction)
 * with per-action cost breakdown, budget alerts, and comprehensive summaries.
 */
export class CompositeUsageMeter {
	private readonly pricing: PricingTable;
	private readonly trackers = new Map<string, UsageMeter>();
	private readonly actionTrace: ActionUsageRecord[] = [];
	private budgetConfig: BudgetPolicy | undefined;
	private crossedThresholds = new Set<number>();
	private startTime: number | undefined;

	constructor(customPricing?: PricingTable) {
		this.pricing = customPricing ?? DEFAULT_COST_RATES;
	}

	/** Start the session timer. Called automatically on first record if not called explicitly. */
	start(): void {
		this.startTime = Date.now();
	}

	/**
	 * Configure budget alerts. Thresholds default to [0.5, 0.8, 1.0].
	 * Returns this for chaining.
	 */
	setBudget(config: BudgetPolicy): this {
		this.budgetConfig = {
			...config,
			thresholds: config.thresholds ?? [0.5, 0.8, 1.0],
		};
		this.crossedThresholds.clear();
		return this;
	}

	/** Clear the budget configuration. */
	clearBudget(): void {
		this.budgetConfig = undefined;
		this.crossedThresholds.clear();
	}

	/**
	 * Record token usage for a specific model and role.
	 * Returns the estimated cost for this single call.
	 * Throws if budget is exhausted and onBudgetExhausted returns false.
	 */
	record(opts: {
		modelId: string;
		role: ModelRole;
		inputTokens: number;
		outputTokens: number;
		stepIndex?: number;
		actionName?: string;
	}): number {
		if (!this.startTime) this.start();

		// Get or create per-model tracker
		const tracker = this.getOrCreateTracker(opts.modelId);
		tracker.record(opts.inputTokens, opts.outputTokens);

		// Compute cost for this call
		const cost = computeCost(opts.inputTokens, opts.outputTokens, opts.modelId, this.pricing);

		// Append to action trace
		const entry: ActionUsageRecord = {
			stepIndex: opts.stepIndex ?? this.actionTrace.length,
			actionName: opts.actionName ?? 'unknown',
			role: opts.role,
			modelId: opts.modelId,
			usage: {
				inputTokens: opts.inputTokens,
				outputTokens: opts.outputTokens,
				totalTokens: opts.inputTokens + opts.outputTokens,
			},
			cost,
			timestamp: Date.now(),
		};
		this.actionTrace.push(entry);

		// Check budget thresholds
		this.checkBudget();

		return cost;
	}

	/** Get the per-model UsageMeter (creates one if missing). */
	getTracker(modelId: string): UsageMeter {
		return this.getOrCreateTracker(modelId);
	}

	/** Total estimated cost across all models. */
	getTotalCost(): number {
		let total = 0;
		for (const tracker of this.trackers.values()) {
			total += tracker.getEstimatedCost();
		}
		return total;
	}

	/** Formatted total cost string. */
	getTotalCostFormatted(): string {
		return `$${this.getTotalCost().toFixed(4)}`;
	}

	/** Aggregate token usage across all models. */
	getTotalUsage(): UsageRecord {
		let inputTokens = 0;
		let outputTokens = 0;
		for (const tracker of this.trackers.values()) {
			const u = tracker.getTotalUsage();
			inputTokens += u.inputTokens;
			outputTokens += u.outputTokens;
		}
		return { inputTokens, outputTokens, totalTokens: inputTokens + outputTokens };
	}

	/** Get the current budget status. */
	getBudgetState(): BudgetState {
		const currentCost = this.getTotalCost();
		const maxCost = this.budgetConfig?.maxCostUsd;

		return {
			currentCostUsd: currentCost,
			maxCostUsd: maxCost,
			fractionUsed: maxCost != null ? currentCost / maxCost : undefined,
			isExhausted: maxCost != null ? currentCost >= maxCost : false,
			crossedThresholds: [...this.crossedThresholds].sort((a, b) => a - b),
		};
	}

	/** Build a full MeteringSummary with per-model and per-role breakdowns. */
	getSummary(): MeteringSummary {
		const totalUsage = this.getTotalUsage();

		return {
			totalInputTokens: totalUsage.inputTokens,
			totalOutputTokens: totalUsage.outputTokens,
			totalTokens: totalUsage.totalTokens,
			totalEstimatedCost: this.getTotalCost(),
			totalCalls: this.actionTrace.length,
			byModel: this.buildModelBreakdown(),
			byRole: this.buildRoleBreakdown(),
			actionTrace: [...this.actionTrace],
			durationMs: this.startTime ? Date.now() - this.startTime : undefined,
		};
	}

	/** Human-readable summary string. */
	getSummaryText(): string {
		const s = this.getSummary();
		const lines: string[] = [
			'=== Token Usage Summary ===',
			`Total: ${s.totalTokens.toLocaleString()} tokens (${s.totalInputTokens.toLocaleString()} in / ${s.totalOutputTokens.toLocaleString()} out)`,
			`Cost: $${s.totalEstimatedCost.toFixed(4)}`,
			`Calls: ${s.totalCalls}`,
		];

		if (s.durationMs != null) {
			lines.push(`Duration: ${(s.durationMs / 1000).toFixed(1)}s`);
		}

		if (s.byRole.length > 0) {
			lines.push('', '--- By Role ---');
			for (const r of s.byRole) {
				lines.push(
					`  ${r.role}: ${r.totalTokens.toLocaleString()} tokens, $${r.estimatedCost.toFixed(4)} (${r.callCount} calls)`,
				);
			}
		}

		if (s.byModel.length > 0) {
			lines.push('', '--- By Model ---');
			for (const m of s.byModel) {
				lines.push(
					`  ${m.modelId}: ${m.totalTokens.toLocaleString()} tokens, $${m.estimatedCost.toFixed(4)} (${m.callCount} calls)`,
				);
			}
		}

		const budget = this.getBudgetState();
		if (budget.maxCostUsd != null) {
			const pct = ((budget.fractionUsed ?? 0) * 100).toFixed(1);
			lines.push(
				'',
				`Budget: $${budget.currentCostUsd.toFixed(4)} / $${budget.maxCostUsd.toFixed(4)} (${pct}%)`,
			);
		}

		return lines.join('\n');
	}

	/** Reset all tracking data. */
	reset(): void {
		for (const tracker of this.trackers.values()) {
			tracker.reset();
		}
		this.trackers.clear();
		this.actionTrace.length = 0;
		this.crossedThresholds.clear();
		this.startTime = undefined;
	}

	// ── Private helpers ──

	private getOrCreateTracker(modelId: string): UsageMeter {
		let tracker = this.trackers.get(modelId);
		if (!tracker) {
			tracker = new UsageMeter(modelId, this.pricing);
			this.trackers.set(modelId, tracker);
		}
		return tracker;
	}

	private checkBudget(): void {
		if (!this.budgetConfig) return;

		const currentCost = this.getTotalCost();
		const { maxCostUsd, thresholds, onThresholdCrossed, onBudgetExhausted } = this.budgetConfig;

		// Check each threshold
		for (const threshold of thresholds ?? []) {
			if (this.crossedThresholds.has(threshold)) continue;

			const thresholdCost = maxCostUsd * threshold;
			if (currentCost >= thresholdCost) {
				this.crossedThresholds.add(threshold);
				onThresholdCrossed(currentCost, threshold, maxCostUsd);
			}
		}

		// Check full exhaustion
		if (currentCost >= maxCostUsd) {
			if (onBudgetExhausted) {
				const allow = onBudgetExhausted(currentCost, maxCostUsd);
				if (!allow) {
					throw new BudgetDepletedError(currentCost, maxCostUsd);
				}
			}
		}
	}

	private buildModelBreakdown(): ModelUsageBreakdown[] {
		const map = new Map<string, ModelUsageBreakdown>();

		for (const entry of this.actionTrace) {
			let mb = map.get(entry.modelId);
			if (!mb) {
				mb = {
					modelId: entry.modelId,
					inputTokens: 0,
					outputTokens: 0,
					totalTokens: 0,
					estimatedCost: 0,
					callCount: 0,
				};
				map.set(entry.modelId, mb);
			}
			mb.inputTokens += entry.usage.inputTokens;
			mb.outputTokens += entry.usage.outputTokens;
			mb.totalTokens += entry.usage.totalTokens;
			mb.estimatedCost += entry.cost;
			mb.callCount++;
		}

		return [...map.values()].sort((a, b) => b.estimatedCost - a.estimatedCost);
	}

	private buildRoleBreakdown(): RoleUsageBreakdown[] {
		const map = new Map<ModelRole, RoleUsageBreakdown>();

		for (const entry of this.actionTrace) {
			let rb = map.get(entry.role);
			if (!rb) {
				rb = {
					role: entry.role,
					inputTokens: 0,
					outputTokens: 0,
					totalTokens: 0,
					estimatedCost: 0,
					callCount: 0,
				};
				map.set(entry.role, rb);
			}
			rb.inputTokens += entry.usage.inputTokens;
			rb.outputTokens += entry.usage.outputTokens;
			rb.totalTokens += entry.usage.totalTokens;
			rb.estimatedCost += entry.cost;
			rb.callCount++;
		}

		return [...map.values()].sort((a, b) => b.estimatedCost - a.estimatedCost);
	}
}

// ── Budget error ──

export class BudgetDepletedError extends Error {
	readonly currentCost: number;
	readonly maxCost: number;

	constructor(currentCost: number, maxCost: number) {
		super(
			`Token budget exhausted: $${currentCost.toFixed(4)} spent, limit is $${maxCost.toFixed(4)}`,
		);
		this.name = 'BudgetDepletedError';
		this.currentCost = currentCost;
		this.maxCost = maxCost;
	}
}

// ── Shared utilities ──

export function estimateTokenCount(text: string): number {
	return Math.ceil(text.length / 4);
}

/** Resolve pricing for a model ID with exact-match then partial-match fallback. */
function resolveModelCost(modelId: string, pricing: PricingTable): CostRates | undefined {
	if (pricing[modelId]) return pricing[modelId];

	for (const [key, value] of Object.entries(pricing)) {
		if (modelId.includes(key) || key.includes(modelId)) {
			return value;
		}
	}
	return undefined;
}

/** Compute cost in USD for a single call. */
function computeCost(
	inputTokens: number,
	outputTokens: number,
	modelId: string,
	pricing: PricingTable,
): number {
	const cost = resolveModelCost(modelId, pricing);
	if (!cost) return 0;
	return (
		(inputTokens / 1_000_000) * cost.inputCostPerMillion +
		(outputTokens / 1_000_000) * cost.outputCostPerMillion
	);
}


================================================
FILE: packages/core/src/metering/types.ts
================================================
export interface UsageRecord {
	inputTokens: number;
	outputTokens: number;
	totalTokens: number;
}

export interface CostRates {
	inputCostPerMillion: number;
	outputCostPerMillion: number;
}

export interface PricingTable {
	[modelId: string]: CostRates;
}

/**
 * Role that a model can serve in the agent pipeline.
 * - main: primary reasoning / action-selection model
 * - extraction: lightweight model for page content extraction
 * - judge: evaluates task completion
 * - compaction: summarizes / compresses conversation history
 */
export type ModelRole = 'main' | 'extraction' | 'judge' | 'compaction';

/** Token usage attributed to a single agent action (step). */
export interface ActionUsageRecord {
	stepIndex: number;
	actionName: string;
	role: ModelRole;
	modelId: string;
	usage: UsageRecord;
	cost: number;
	timestamp: number;
}

/** Per-model aggregated usage. */
export interface ModelUsageBreakdown {
	modelId: string;
	inputTokens: number;
	outputTokens: number;
	totalTokens: number;
	estimatedCost: number;
	callCount: number;
}

/** Per-role aggregated usage. */
export interface RoleUsageBreakdown {
	role: ModelRole;
	inputTokens: number;
	outputTokens: number;
	totalTokens: number;
	estimatedCost: number;
	callCount: number;
}

/** Comprehensive usage summary across all models and roles. */
export interface MeteringSummary {
	/** Aggregate across everything. */
	totalInputTokens: number;
	totalOutputTokens: number;
	totalTokens: number;
	totalEstimatedCost: number;
	totalCalls: number;

	/** Breakdown by model ID. */
	byModel: ModelUsageBreakdown[];

	/** Breakdown by role. */
	byRole: RoleUsageBreakdown[];

	/** Per-action cost trace (chronological). */
	actionTrace: ActionUsageRecord[];

	/** Wall-clock duration of the tracked session in ms (if available). */
	durationMs?: number;
}

/** Configuration for budget alerts. */
export interface BudgetPolicy {
	/** Maximum allowed cost in USD. */
	maxCostUsd: number;

	/**
	 * Warning thresholds as fractions of maxCostUsd (e.g. [0.5, 0.8, 1.0]).
	 * Callbacks fire when cost first crosses each threshold.
	 */
	thresholds?: number[];

	/** Called each time a threshold is crossed. */
	onThresholdCrossed: (currentCost: number, threshold: number, maxCost: number) => void;

	/** Called when the budget is fully exhausted. Return true to allow continuing. */
	onBudgetExhausted?: (currentCost: number, maxCost: number) => boolean;
}

/** Status of budget consumption. */
export interface BudgetState {
	currentCostUsd: number;
	maxCostUsd: number | undefined;
	/** Fraction 0..1+ of budget consumed. undefined if no budget set. */
	fractionUsed: number | undefined;
	isExhausted: boolean;
	crossedThresholds: number[];
}

// ── Comprehensive default pricing ──

export const DEFAULT_COST_RATES: PricingTable = {
	// OpenAI
	'gpt-4o': { inputCostPerMillion: 2.5, outputCostPerMillion: 10.0 },
	'gpt-4o-mini': { inputCostPerMillion: 0.15, outputCostPerMillion: 0.6 },
	'gpt-4-turbo': { inputCostPerMillion: 10.0, outputCostPerMillion: 30.0 },
	'gpt-4.5-preview': { inputCostPerMillion: 75.0, outputCostPerMillion: 150.0 },
	'o1': { inputCostPerMillion: 15.0, outputCostPerMillion: 60.0 },
	'o1-mini': { inputCostPerMillion: 3.0, outputCostPerMillion: 12.0 },
	'o1-preview': { inputCostPerMillion: 15.0, outputCostPerMillion: 60.0 },
	'o3-mini': { inputCostPerMillion: 1.1, outputCostPerMillion: 4.4 },

	// Anthropic
	'claude-3-5-sonnet': { inputCostPerMillion: 3.0, outputCostPerMillion: 15.0 },
	'claude-3-5-haiku': { inputCostPerMillion: 0.8, outputCostPerMillion: 4.0 },
	'claude-3-opus': { inputCostPerMillion: 15.0, outputCostPerMillion: 75.0 },
	'claude-3-haiku': { inputCostPerMillion: 0.25, outputCostPerMillion: 1.25 },
	'claude-4-sonnet': { inputCostPerMillion: 3.0, outputCostPerMillion: 15.0 },
	'claude-4-opus': { inputCostPerMillion: 15.0, outputCostPerMillion: 75.0 },

	// Google
	'gemini-1.5-pro': { inputCostPerMillion: 1.25, outputCostPerMillion: 5.0 },
	'gemini-1.5-flash': { inputCostPerMillion: 0.075, outputCostPerMillion: 0.3 },
	'gemini-2.0-flash': { inputCostPerMillion: 0.1, outputCostPerMillion: 0.4 },
	'gemini-2.0-pro': { inputCostPerMillion: 1.25, outputCostPerMillion: 5.0 },
	'gemini-2.5-pro': { inputCostPerMillion: 1.25, outputCostPerMillion: 10.0 },
	'gemini-2.5-flash': { inputCostPerMillion: 0.15, outputCostPerMillion: 0.6 },

	// Mistral
	'mistral-large': { inputCostPerMillion: 2.0, outputCostPerMillion: 6.0 },
	'mistral-small': { inputCostPerMillion: 0.2, outputCostPerMillion: 0.6 },
	'codestral': { inputCostPerMillion: 0.3, outputCostPerMillion: 0.9 },

	// DeepSeek
	'deepseek-chat': { inputCostPerMillion: 0.14, outputCostPerMillion: 0.28 },
	'deepseek-reasoner': { inputCostPerMillion: 0.55, outputCostPerMillion: 2.19 },
};


================================================
FILE: packages/core/src/model/adapters/vercel.ts
================================================
import { generateObject, type CoreMessage, type CoreUserMessage } from 'ai';
import type { LanguageModelV1 } from 'ai';
import type { ZodType } from 'zod';
import type { LanguageModel, InferenceOptions, ModelProvider } from '../interface.js';
import type { InferenceResult, InferenceUsage } from '../types.js';
import type { Message, ContentPart } from '../messages.js';
import { ModelError, ModelThrottledError } from '../../errors.js';

export interface VercelModelAdapterOptions {
	model: LanguageModelV1;
	/** Override provider detection (otherwise inferred from model.provider or modelId). */
	provider?: ModelProvider;
	temperature?: number;
	maxTokens?: number;
	maxRetries?: number;
}

export class VercelModelAdapter implements LanguageModel {
	private readonly model: LanguageModelV1;
	private readonly defaultTemperature: number;
	private readonly defaultMaxTokens: number;
	private readonly maxRetries: number;
	private readonly _provider: ModelProvider;

	constructor(options: VercelModelAdapterOptions) {
		this.model = options.model;
		this.defaultTemperature = options.temperature ?? 0;
		this.defaultMaxTokens = options.maxTokens ?? 4096;
		this.maxRetries = options.maxRetries ?? 3;
		this._provider = options.provider ?? inferProvider(this.model.modelId, this.model.provider);
	}

	get modelId(): string {
		return this.model.modelId;
	}

	get provider(): ModelProvider {
		return this._provider;
	}

	async invoke<T>(options: InferenceOptions<T>): Promise<InferenceResult<T>> {
		const messages = this.convertMessages(options.messages);

		try {
			const result = await generateObject({
				model: this.model,
				schema: options.responseSchema as ZodType<T>,
				schemaName: options.schemaName ?? 'AgentDecision',
				schemaDescription: options.schemaDescription,
				messages,
				temperature: options.temperature ?? this.defaultTemperature,
				maxTokens: options.maxTokens ?? this.defaultMaxTokens,
				maxRetries: this.maxRetries,
			});

			const usage: InferenceUsage = {
				inputTokens: result.usage?.promptTokens ?? 0,
				outputTokens: result.usage?.completionTokens ?? 0,
				totalTokens:
					(result.usage?.promptTokens ?? 0) + (result.usage?.completionTokens ?? 0),
			};

			return {
				parsed: result.object,
				usage,
				finishReason: mapFinishReason(result.finishReason),
			};
		} catch (error: any) {
			if (error?.statusCode === 429 || error?.message?.includes('rate limit')) {
				const retryAfter = error?.headers?.['retry-after'];
				throw new ModelThrottledError(
					error.message ?? 'Rate limited',
					retryAfter ? Number.parseInt(retryAfter) * 1000 : undefined,
				);
			}
			throw new ModelError(
				`LLM invocation failed: ${error?.message ?? String(error)}`,
				{ cause: error },
			);
		}
	}

	private convertMessages(messages: Message[]): CoreMessage[] {
		return messages.map((msg): CoreMessage => {
			switch (msg.role) {
				case 'system':
					return { role: 'system', content: msg.content };

				case 'user': {
					if (typeof msg.content === 'string') {
						return { role: 'user', content: msg.content };
					}
					return {
						role: 'user',
						content: msg.content.map((part) => this.convertContentPart(part)),
					} as CoreUserMessage;
				}

				case 'assistant': {
					const content = typeof msg.content === 'string'
						? msg.content
						: msg.content.map((part) => {
								if (part.type === 'text') return { type: 'text' as const, text: part.text };
								return { type: 'text' as const, text: '[image]' };
							});
					return { role: 'assistant', content };
				}

				case 'tool':
					return {
						role: 'user',
						content: `[Tool Result (${msg.toolCallId})]: ${msg.content}`,
					};
			}
		});
	}

	private convertContentPart(
		part: ContentPart,
	): { type: 'text'; text: string } | { type: 'image'; image: string | URL } {
		switch (part.type) {
			case 'text':
				return { type: 'text', text: part.text };
			case 'image':
				if (part.source.type === 'base64') {
					return {
						type: 'image',
						image: part.source.data,
					};
				}
				return {
					type: 'image',
					image: new URL(part.source.url),
				};
		}
	}
}

function mapFinishReason(
	reason: string,
): 'stop' | 'length' | 'content-filter' | 'tool-calls' | 'error' | 'other' {
	switch (reason) {
		case 'stop':
			return 'stop';
		case 'length':
			return 'length';
		case 'content-filter':
			return 'content-filter';
		case 'tool-calls':
			return 'tool-calls';
		case 'error':
			return 'error';
		default:
			return 'other';
	}
}

const PROVIDER_PATTERNS: Array<[RegExp, ModelProvider]> = [
	[/anthropic|claude/i, 'anthropic'],
	[/openai|gpt|o1|o3/i, 'openai'],
	[/google|gemini/i, 'google'],
	[/mistral/i, 'mistral'],
	[/deepseek/i, 'deepseek'],
	[/groq/i, 'groq'],
	[/fireworks/i, 'fireworks'],
	[/together/i, 'together'],
];

function inferProvider(modelId: string, providerHint?: string): ModelProvider {
	const combined = `${providerHint ?? ''} ${modelId}`;
	for (const [pattern, provider] of PROVIDER_PATTERNS) {
		if (pattern.test(combined)) return provider;
	}
	return 'custom';
}


================================================
FILE: packages/core/src/model/index.ts
================================================
export { type LanguageModel, type InferenceOptions, type ModelProvider } from './interface.js';
export { type InferenceResult, type InferenceUsage } from './types.js';
export {
	type Message,
	type SystemMessage,
	type UserMessage,
	type AssistantMessage,
	type ToolResultMessage,
	type ToolCall,
	type ContentPart,
	type TextContent,
	type ImageContent,
	systemMessage,
	userMessage,
	assistantMessage,
	toolResultMessage,
	textContent,
	imageContent,
} from './messages.js';
export { VercelModelAdapter, type VercelModelAdapterOptions } from './adapters/vercel.js';
export {
	zodToJsonSchema,
	optimizeSchemaForModel,
	optimizeJsonSchemaForModel,
	type SchemaOptimizationOptions,
} from './schema-optimizer.js';


================================================
FILE: packages/core/src/model/interface.ts
================================================
import type { ZodType } from 'zod';
import type { Message } from './messages.js';
import type { InferenceResult } from './types.js';

/** Known LLM provider identifiers. */
export type ModelProvider =
	| 'anthropic'
	| 'openai'
	| 'google'
	| 'mistral'
	| 'deepseek'
	| 'groq'
	| 'fireworks'
	| 'together'
	| 'custom';

export interface InferenceOptions<T> {
	messages: Message[];
	responseSchema: ZodType<T>;
	schemaName?: string;
	schemaDescription?: string;
	temperature?: number;
	maxTokens?: number;

	/**
	 * Token budget for extended thinking / chain-of-thought.
	 * Only honored by models that support thinking (Claude 3.5+, o1, etc.).
	 * Set to 0 to disable thinking even when the model supports it.
	 */
	thinkingBudget?: number;

	/**
	 * Enable prompt caching for this call. When true, the adapter should
	 * set cache-control headers / parameters where the provider supports it
	 * (e.g. Anthropic prompt caching, OpenAI predicted outputs).
	 */
	cache?: boolean;

	/**
	 * Per-call timeout in milliseconds. Overrides any default timeout
	 * configured on the LanguageModel instance.
	 */
	timeout?: number;
}

export interface LanguageModel {
	invoke<T>(options: InferenceOptions<T>): Promise<InferenceResult<T>>;

	/** The model identifier string (e.g. "claude-3-5-sonnet-20241022"). */
	readonly modelId: string;

	/** The LLM provider this model belongs to. */
	readonly provider: ModelProvider;
}


================================================
FILE: packages/core/src/model/messages.ts
================================================
export interface TextContent {
	type: 'text';
	text: string;
}

export interface ImageContent {
	type: 'image';
	source:
		| { type: 'base64'; mediaType: string; data: string }
		| { type: 'url'; url: string };
}

export type ContentPart = TextContent | ImageContent;

export interface SystemMessage {
	role: 'system';
	content: string;
}

export interface UserMessage {
	role: 'user';
	content: string | ContentPart[];
}

export interface AssistantMessage {
	role: 'assistant';
	content: string | ContentPart[];
	toolCalls?: ToolCall[];
}

export interface ToolResultMessage {
	role: 'tool';
	toolCallId: string;
	content: string;
}

export interface ToolCall {
	id: string;
	name: string;
	args: Record<string, unknown>;
}

export type Message = SystemMessage | UserMessage | AssistantMessage | ToolResultMessage;

// ── Helpers ──

export function systemMessage(content: string): SystemMessage {
	return { role: 'system', content };
}

export function userMessage(content: string | ContentPart[]): UserMessage {
	return { role: 'user', content };
}

export function assistantMessage(
	content: string | ContentPart[],
	toolCalls?: ToolCall[],
): AssistantMessage {
	return { role: 'assistant', content, toolCalls };
}

export function toolResultMessage(toolCallId: string, content: string): ToolResultMessage {
	return { role: 'tool', toolCallId, content };
}

export function textContent(text: string): TextContent {
	return { type: 'text', text };
}

export function imageContent(base64: string, mediaType = 'image/png'): ImageContent {
	return {
		type: 'image',
		source: { type: 'base64', mediaType, data: base64 },
	};
}


================================================
FILE: packages/core/src/model/schema-optimizer.ts
================================================
import { z, type ZodTypeAny } from 'zod';
import type { ModelProvider } from './interface.js';

// ── Configuration ──

export interface SchemaOptimizationOptions {
	/** LLM provider to apply provider-specific tweaks for. */
	provider?: ModelProvider;

	/**
	 * Maximum number of variants in a discriminated union before collapsing
	 * infrequently used ones into a generic fallback.
	 */
	maxUnionVariants?: number;

	/**
	 * Maximum nesting depth before flattening deeply nested objects
	 * into dot-separated flat keys.
	 */
	maxNestingDepth?: number;

	/**
	 * Maximum number of enum values before collapsing similar ones.
	 */
	maxEnumValues?: number;
}

const DEFAULTS: Required<Omit<SchemaOptimizationOptions, 'provider'>> = {
	maxUnionVariants: 15,
	maxNestingDepth: 4,
	maxEnumValues: 30,
};

// ── Main entry point ──

/**
 * Optimizes a JSON Schema (as a plain object) for LLM consumption.
 * Applies union collapsing, enum simplification, provider-specific tweaks,
 * and nested object flattening.
 */
export function optimizeJsonSchemaForModel(
	schema: Record<string, unknown>,
	options: SchemaOptimizationOptions = {},
): Record<string, unknown> {
	const opts = { ...DEFAULTS, ...options };
	let result = structuredClone(schema);

	result = collapseUnions(result, opts.maxUnionVariants);
	result = collapseEnums(result, opts.maxEnumValues);
	result = flattenNesting(result, opts.maxNestingDepth);

	if (opts.provider) {
		result = applyProviderTweaks(result, opts.provider);
	}

	return result;
}

/**
 * Optimizes Zod schemas for LLM consumption by simplifying complex unions
 * and removing unnecessary constraints that confuse models.
 *
 * This works at the Zod level for simple transformations, but for deeper
 * optimization, convert to JSON Schema first with zodToJsonSchema() and
 * then call optimizeJsonSchemaForModel().
 */
export function optimizeSchemaForModel<T extends ZodTypeAny>(
	schema: T,
	options: SchemaOptimizationOptions = {},
): T {
	// For discriminated unions with too many variants, wrap in a transformation
	// that strips the union down. We operate at the Zod type level where possible.
	if (schema instanceof z.ZodDiscriminatedUnion) {
		const variants = [...schema.options.values()] as ZodTypeAny[];
		const maxVariants = options.maxUnionVariants ?? DEFAULTS.maxUnionVariants;

		if (variants.length > maxVariants) {
			// Keep the first maxVariants-1 variants and add a catch-all object
			const kept = variants.slice(0, maxVariants - 1);
			const catchAll = z.object({}).passthrough().describe('Other action (see documentation)');
			const unionMembers = [...kept, catchAll] as unknown as [ZodTypeAny, ZodTypeAny, ...ZodTypeAny[]];
			return z.union(unionMembers) as any;
		}
	}

	if (schema instanceof z.ZodUnion) {
		const variants = schema.options as ZodTypeAny[];
		const maxVariants = options.maxUnionVariants ?? DEFAULTS.maxUnionVariants;

		if (variants.length > maxVariants) {
			const kept = variants.slice(0, maxVariants - 1);
			const catchAll = z.object({}).passthrough().describe('Other variant');
			const unionMembers = [...kept, catchAll] as unknown as [ZodTypeAny, ZodTypeAny, ...ZodTypeAny[]];
			return z.union(unionMembers) as any;
		}
	}

	return schema;
}

// ── Union collapsing ──

/**
 * When a oneOf / anyOf has more variants than maxVariants, collapse the
 * excess into a single permissive object schema.
 */
function collapseUnions(
	schema: Record<string, unknown>,
	maxVariants: number,
): Record<string, unknown> {
	schema = walkSchema(schema, (node) => {
		const unionKey = node.oneOf ? 'oneOf' : node.anyOf ? 'anyOf' : undefined;
		if (!unionKey) return node;

		const variants = node[unionKey] as Record<string, unknown>[];
		if (!Array.isArray(variants) || variants.length <= maxVariants) return node;

		// Keep the first N-1 variants, replace the rest with a permissive catch-all
		const kept = variants.slice(0, maxVariants - 1);
		const catchAll: Record<string, unknown> = {
			type: 'object',
			description: `One of ${variants.length - maxVariants + 1} additional variants (see documentation)`,
			additionalProperties: true,
		};

		return { ...node, [unionKey]: [...kept, catchAll] };
	});

	return schema;
}

// ── Enum collapsing ──

/**
 * When an enum has too many values, collapse similar values by removing
 * duplicates after case-normalization, and truncate with an annotation.
 */
function collapseEnums(
	schema: Record<string, unknown>,
	maxValues: number,
): Record<string, unknown> {
	return walkSchema(schema, (node) => {
		if (!Array.isArray(node.enum)) return node;

		const values = node.enum as unknown[];
		if (values.length <= maxValues) return node;

		// Deduplicate by lowercase string representation
		const seen = new Set<string>();
		const deduped: unknown[] = [];
		for (const v of values) {
			const key = String(v).toLowerCase();
			if (!seen.has(key)) {
				seen.add(key);
				deduped.push(v);
			}
		}

		// If still too many, truncate and annotate
		if (deduped.length > maxValues) {
			const truncated = deduped.slice(0, maxValues);
			const description = node.description
				? `${node.description} (${deduped.length - maxValues} more values omitted)`
				: `${deduped.length - maxValues} additional values omitted`;
			return { ...node, enum: truncated, description };
		}

		return { ...node, enum: deduped };
	});
}

// ── Nested object flattening ──

/**
 * Flattens objects nested beyond maxDepth by lifting nested properties
 * to the parent level with dot-separated keys.
 */
function flattenNesting(
	schema: Record<string, unknown>,
	maxDepth: number,
): Record<string, unknown> {
	return walkSchema(schema, (node) => {
		if (node.type !== 'object' || !node.properties) return node;

		const flatProps: Record<string, unknown> = {};
		const flatRequired: string[] = [];
		const origRequired = new Set(
			Array.isArray(node.required) ? (node.required as string[]) : [],
		);

		flattenProperties(
			node.properties as Record<string, Record<string, unknown>>,
			origRequired,
			'',
			0,
			maxDepth,
			flatProps,
			flatRequired,
		);

		// Only return the flattened version if we actually changed something
		const origKeys = Object.keys(node.properties as object);
		const flatKeys = Object.keys(flatProps);
		if (
			flatKeys.length === origKeys.length &&
			flatKeys.every((k) => origKeys.includes(k))
		) {
			return node;
		}

		const result: Record<string, unknown> = { ...node, properties: flatProps };
		if (flatRequired.length > 0) {
			result.required = flatRequired;
		} else {
			delete result.required;
		}
		return result;
	});
}

function flattenProperties(
	properties: Record<string, Record<string, unknown>>,
	required: Set<string>,
	prefix: string,
	currentDepth: number,
	maxDepth: number,
	out: Record<string, unknown>,
	outRequired: string[],
): void {
	for (const [key, schema] of Object.entries(properties)) {
		const fullKey = prefix ? `${prefix}.${key}` : key;
		const isRequired = required.has(key);

		if (
			schema.type === 'object' &&
			schema.properties &&
			currentDepth >= maxDepth
		) {
			// Flatten: lift child properties up
			const childRequired = new Set(
				Array.isArray(schema.required) ? (schema.required as string[]) : [],
			);
			flattenProperties(
				schema.properties as Record<string, Record<string, unknown>>,
				childRequired,
				fullKey,
				currentDepth + 1,
				maxDepth,
				out,
				outRequired,
			);
		} else {
			out[fullKey] = schema;
			if (isRequired) {
				outRequired.push(fullKey);
			}
		}
	}
}

// ── Provider-specific tweaks ──

/**
 * Apply provider-specific schema modifications:
 * - Gemini: requires description on all properties
 * - OpenAI: prefers simpler schemas, removes redundant constraints
 */
function applyProviderTweaks(
	schema: Record<string, unknown>,
	provider: ModelProvider,
): Record<string, unknown> {
	switch (provider) {
		case 'google':
			return applyGeminiTweaks(schema);
		case 'openai':
			return applyOpenAITweaks(schema);
		default:
			return schema;
	}
}

/**
 * Gemini requires description fields on all object properties.
 * Without descriptions, Gemini may produce empty or incorrect output.
 */
function applyGeminiTweaks(schema: Record<string, unknown>): Record<string, unknown> {
	return walkSchema(schema, (node) => {
		if (node.type !== 'object' || !node.properties) return node;

		const props = node.properties as Record<string, Record<string, unknown>>;
		const patched: Record<string, Record<string, unknown>> = {};

		for (const [key, propSchema] of Object.entries(props)) {
			if (!propSchema.description) {
				patched[key] = {
					...propSchema,
					description: humanizePropertyName(key),
				};
			} else {
				patched[key] = propSchema;
			}
		}

		return { ...node, properties: patched };
	});
}

/**
 * OpenAI models work better with simpler schemas:
 * - Remove additionalProperties: false (it's the default for structured output)
 * - Ensure all required fields are listed
 */
function applyOpenAITweaks(schema: Record<string, unknown>): Record<string, unknown> {
	return walkSchema(schema, (node) => {
		if (node.type !== 'object') return node;

		const cleaned = { ...node };

		// OpenAI structured output doesn't need additionalProperties: false
		if (cleaned.additionalProperties === false) {
			delete cleaned.additionalProperties;
		}

		// Ensure all properties are marked required (OpenAI prefers explicit required lists)
		if (cleaned.properties && !cleaned.required) {
			cleaned.required = Object.keys(cleaned.properties as object);
		}

		return cleaned;
	});
}

// ── Schema walking utility ──

type SchemaVisitor = (node: Record<string, unknown>) => Record<string, unknown>;

/**
 * Recursively walks a JSON Schema tree, applying a visitor function
 * to each schema node (depth-first, post-order).
 */
function walkSchema(
	schema: Record<string, unknown>,
	visitor: SchemaVisitor,
): Record<string, unknown> {
	let node = { ...schema };

	// Walk into properties
	if (node.properties && typeof node.properties === 'object') {
		const props: Record<string, unknown> = {};
		for (const [key, val] of Object.entries(node.properties as Record<string, unknown>)) {
			if (val && typeof val === 'object' && !Array.isArray(val)) {
				props[key] = walkSchema(val as Record<string, unknown>, visitor);
			} else {
				props[key] = val;
			}
		}
		node.properties = props;
	}

	// Walk into array items
	if (node.items && typeof node.items === 'object' && !Array.isArray(node.items)) {
		node.items = walkSchema(node.items as Record<string, unknown>, visitor);
	}

	// Walk into oneOf / anyOf / allOf
	for (const combiner of ['oneOf', 'anyOf', 'allOf'] as const) {
		if (Array.isArray(node[combiner])) {
			node[combiner] = (node[combiner] as Record<string, unknown>[]).map((s) =>
				typeof s === 'object' && s !== null ? walkSchema(s, visitor) : s,
			);
		}
	}

	// Walk into additionalProperties
	if (
		node.additionalProperties &&
		typeof node.additionalProperties === 'object'
	) {
		node.additionalProperties = walkSchema(
			node.additionalProperties as Record<string, unknown>,
			visitor,
		);
	}

	return visitor(node);
}

// ── Helpers ──

/**
 * Converts a camelCase or snake_case property name to a human-readable description.
 * Used for Gemini which requires descriptions on all properties.
 */
function humanizePropertyName(name: string): string {
	// Split on camelCase boundaries and underscores
	const words = name
		.replace(/([a-z])([A-Z])/g, '$1 $2')
		.replace(/[_-]/g, ' ')
		.toLowerCase()
		.split(/\s+/);

	if (words.length === 0) return name;

	// Capitalize first word
	words[0] = words[0].charAt(0).toUpperCase() + words[0].slice(1);
	return words.join(' ');
}

// ── zodToJsonSchema (existing, unchanged) ──

/**
 * Converts a Zod schema to a JSON Schema representation suitable for LLM tool use.
 */
export function zodToJsonSchema(schema: ZodTypeAny): Record<string, unknown> {
	const jsonSchema: Record<string, unknown> = {};

	if (schema instanceof z.ZodObject) {
		jsonSchema.type = 'object';
		const shape = schema.shape;
		const properties: Record<string, unknown> = {};
		const required: string[] = [];

		for (const [key, value] of Object.entries(shape)) {
			properties[key] = zodToJsonSchema(value as ZodTypeAny);
			if (!(value instanceof z.ZodOptional)) {
				required.push(key);
			}
		}

		jsonSchema.properties = properties;
		if (required.length > 0) {
			jsonSchema.required = required;
		}
	} else if (schema instanceof z.ZodString) {
		jsonSchema.type = 'string';
	} else if (schema instanceof z.ZodNumber) {
		jsonSchema.type = 'number';
	} else if (schema instanceof z.ZodBoolean) {
		jsonSchema.type = 'boolean';
	} else if (schema instanceof z.ZodArray) {
		jsonSchema.type = 'array';
		jsonSchema.items = zodToJsonSchema(schema.element);
	} else if (schema instanceof z.ZodOptional) {
		return zodToJsonSchema(schema.unwrap()) as any;
	} else if (schema instanceof z.ZodDefault) {
		const inner = zodToJsonSchema(schema.removeDefault()) as any;
		inner.default = schema._def.defaultValue();
		return inner as any;
	} else if (schema instanceof z.ZodEnum) {
		jsonSchema.type = 'string';
		jsonSchema.enum = schema.options;
	} else if (schema instanceof z.ZodLiteral) {
		jsonSchema.const = schema.value;
	} else if (schema instanceof z.ZodUnion) {
		jsonSchema.oneOf = (schema.options as ZodTypeAny[]).map(zodToJsonSchema);
	} else if (schema instanceof z.ZodDiscriminatedUnion) {
		jsonSchema.oneOf = [...schema.options.values()].map((opt: ZodTypeAny) =>
			zodToJsonSchema(opt),
		);
	} else if (schema instanceof z.ZodNullable) {
		const inner = zodToJsonSchema(schema.unwrap());
		return { oneOf: [inner, { type: 'null' }] } as any;
	} else if (schema instanceof z.ZodRecord) {
		jsonSchema.type = 'object';
		jsonSchema.additionalProperties = zodToJsonSchema(schema.element);
	} else {
		jsonSchema.type = 'object';
	}

	if (schema.description) {
		jsonSchema.description = schema.description;
	}

	return jsonSchema as any;
}


================================================
FILE: packages/core/src/model/types.ts
================================================
import { z } from 'zod';

export interface InferenceUsage {
	inputTokens: number;
	outputTokens: number;
	totalTokens: number;
}

export interface InferenceResult<T = unknown> {
	parsed: T;
	rawText?: string;
	usage: InferenceUsage;
	finishReason: 'stop' | 'length' | 'content-filter' | 'tool-calls' | 'error' | 'other';
}

export const InferenceUsageSchema = z.object({
	inputTokens: z.number(),
	outputTokens: z.number(),
	totalTokens: z.number(),
});


================================================
FILE: packages/core/src/page/content-extractor.ts
================================================
import TurndownService from 'turndown';
import type { Page } from 'playwright';

let turndownInstance: TurndownService | null = null;

function getTurndown(): TurndownService {
	if (!turndownInstance) {
		turndownInstance = new TurndownService({
			headingStyle: 'atx',
			codeBlockStyle: 'fenced',
			emDelimiter: '*',
		});

		// Remove scripts, styles, and other non-content elements
		turndownInstance.remove(['script', 'style', 'nav', 'footer', 'header', 'noscript']);

		// Preserve tables as markdown tables
		turndownInstance.addRule('table', {
			filter: 'table',
			replacement: (_content, node) => {
				const table = node as HTMLTableElement;
				return htmlTableToMarkdown(table);
			},
		});

		// Preserve code blocks with enhanced language detection from class attributes.
		// Supports patterns: language-xxx, lang-xxx, highlight-xxx, brush:xxx, and bare lang names.
		turndownInstance.addRule('codeBlock', {
			filter: (node) => {
				return (
					node.nodeName === 'PRE' &&
					node.firstChild !== null &&
					node.firstChild.nodeName === 'CODE'
				);
			},
			replacement: (_content, node) => {
				const codeEl = node.firstChild as HTMLElement;
				const lang = detectCodeLanguage(codeEl);
				const code = codeEl?.textContent ?? '';
				return `\n\`\`\`${lang}\n${code}\n\`\`\`\n`;
			},
		});
	}
	return turndownInstance;
}

function htmlTableToMarkdown(table: HTMLTableElement): string {
	const rows: string[][] = [];
	const tableRows = table.querySelectorAll('tr');

	for (const row of tableRows) {
		const cells: string[] = [];
		for (const cell of row.querySelectorAll('th, td')) {
			cells.push((cell.textContent ?? '').trim().replace(/\|/g, '\\|'));
		}
		if (cells.length > 0) {
			rows.push(cells);
		}
	}

	if (rows.length === 0) return '';

	const maxCols = Math.max(...rows.map((r) => r.length));

	// Pad rows to same column count
	for (const row of rows) {
		while (row.length < maxCols) {
			row.push('');
		}
	}

	const lines: string[] = [];
	// Header
	lines.push(`| ${rows[0].join(' | ')} |`);
	lines.push(`| ${rows[0].map(() => '---').join(' | ')} |`);

	// Body
	for (let i = 1; i < rows.length; i++) {
		lines.push(`| ${rows[i].join(' | ')} |`);
	}

	return '\n' + lines.join('\n') + '\n';
}

/**
 * Known programming language names used as a fallback for bare class name matching.
 */
const KNOWN_LANGUAGES = new Set([
	'javascript', 'typescript', 'python', 'ruby', 'java', 'go', 'rust', 'c',
	'cpp', 'csharp', 'swift', 'kotlin', 'scala', 'php', 'perl', 'lua',
	'bash', 'shell', 'sh', 'zsh', 'powershell', 'sql', 'html', 'css',
	'scss', 'less', 'json', 'yaml', 'yml', 'xml', 'toml', 'ini',
	'markdown', 'md', 'jsx', 'tsx', 'graphql', 'r', 'matlab', 'dart',
	'elixir', 'erlang', 'haskell', 'ocaml', 'clojure', 'vim', 'dockerfile',
	'makefile', 'cmake', 'protobuf', 'terraform', 'hcl',
]);

/**
 * Detect the programming language from a code element's class attribute.
 * Tries multiple patterns commonly used by syntax highlighters:
 * - language-xxx (Prism, highlight.js)
 * - lang-xxx (some highlighters)
 * - highlight-xxx / hljs xxx
 * - brush: xxx (SyntaxHighlighter)
 * - data-lang attribute
 * - bare class name matching a known language
 */
function detectCodeLanguage(codeEl: HTMLElement | null): string {
	if (!codeEl) return '';

	// Check data-lang attribute first (used by some markdown renderers)
	const dataLang = codeEl.getAttribute?.('data-lang') ?? '';
	if (dataLang) return dataLang.toLowerCase();

	const className = codeEl.getAttribute?.('class') ?? '';
	if (!className) return '';

	// Pattern: language-xxx or lang-xxx
	const langPrefixMatch = className.match(/(?:language|lang)-(\w+)/);
	if (langPrefixMatch) return langPrefixMatch[1].toLowerCase();

	// Pattern: highlight-xxx
	const highlightMatch = className.match(/highlight-(\w+)/);
	if (highlightMatch) return highlightMatch[1].toLowerCase();

	// Pattern: brush: xxx (SyntaxHighlighter legacy)
	const brushMatch = className.match(/brush:\s*(\w+)/);
	if (brushMatch) return brushMatch[1].toLowerCase();

	// Fallback: check if any class token is a known language name
	const tokens = className.split(/\s+/);
	for (const token of tokens) {
		const lower = token.toLowerCase();
		if (KNOWN_LANGUAGES.has(lower)) return lower;
	}

	return '';
}

/**
 * Tracks reading position across multiple extractMarkdown calls,
 * allowing incremental content consumption without re-reading.
 */
export class ReadingState {
	private charOffset = 0;
	private totalLength = 0;
	private pageUrl = '';

	/**
	 * Get the current character offset for the next read.
	 */
	get currentOffset(): number {
		return this.charOffset;
	}

	/**
	 * Get the total length of the last-known content.
	 */
	get contentLength(): number {
		return this.totalLength;
	}

	/**
	 * Whether there is more content to read.
	 */
	get hasMore(): boolean {
		return this.charOffset < this.totalLength;
	}

	/**
	 * Fraction of content consumed so far (0..1).
	 */
	get progress(): number {
		if (this.totalLength === 0) return 0;
		return Math.min(1, this.charOffset / this.totalLength);
	}

	/**
	 * Advance the reading position by the given number of characters.
	 */
	advance(chars: number): void {
		this.charOffset = Math.min(this.charOffset + chars, this.totalLength);
	}

	/**
	 * Update state with fresh content metadata. If the URL changes,
	 * the offset resets to the beginning.
	 */
	update(url: string, totalLength: number): void {
		if (url !== this.pageUrl) {
			this.charOffset = 0;
			this.pageUrl = url;
		}
		this.totalLength = totalLength;
	}

	/**
	 * Reset the reading state to the beginning.
	 */
	reset(): void {
		this.charOffset = 0;
		this.totalLength = 0;
		this.pageUrl = '';
	}
}

export interface MarkdownExtractionOptions {
	startFromChar?: number;
	maxLength?: number;
	extractLinks?: boolean;
	readingState?: ReadingState;
}

export async function extractMarkdown(
	page: Page,
	options?: MarkdownExtractionOptions,
): Promise<string> {
	const html = await page.evaluate(() => {
		// Try to get main content first
		const main = document.querySelector('main, article, [role="main"], .content, #content');
		if (main) return main.innerHTML;

		// Fallback to body
		return document.body?.innerHTML ?? '';
	});

	let markdown = htmlToMarkdown(html);
	const fullLength = markdown.length;

	// Update reading state if provided
	const readingState = options?.readingState;
	if (readingState) {
		const url = page.url();
		readingState.update(url, fullLength);
	}

	// Determine the starting offset: explicit option takes priority,
	// then reading state's tracked position, then 0.
	const startOffset = options?.startFromChar ??
		(readingState ? readingState.currentOffset : 0);

	if (startOffset > 0) {
		markdown = markdown.slice(startOffset);
	}

	// Apply max length
	let truncated = false;
	if (options?.maxLength && markdown.length > options.maxLength) {
		markdown = markdown.slice(0, options.maxLength);
		// Try to break at a paragraph boundary
		const lastParagraph = markdown.lastIndexOf('\n\n');
		if (lastParagraph > markdown.length * 0.8) {
			markdown = markdown.slice(0, lastParagraph);
		}
		truncated = true;
	}

	// Advance reading state by the number of characters consumed
	if (readingState) {
		readingState.advance(markdown.length);
	}

	if (truncated) {
		const remaining = fullLength - startOffset - markdown.length;
		markdown += `\n\n[... content truncated, ~${remaining} chars remaining]`;
	}

	// Append links section if requested
	if (options?.extractLinks) {
		const links = await extractLinks(page);
		if (links.length > 0) {
			markdown += '\n\n## Links\n';
			for (const link of links) {
				const marker = link.isExternal ? ' (external)' : '';
				markdown += `- [${link.text}](${link.url})${marker}\n`;
			}
		}
	}

	return markdown;
}

export function htmlToMarkdown(html: string): string {
	const turndown = getTurndown();
	const markdown = turndown.turndown(html);

	// Clean up excessive whitespace
	return markdown
		.replace(/\n{3,}/g, '\n\n')
		.replace(/^\s+|\s+$/gm, (match) => match.replace(/ +/g, ''))
		.trim();
}

/**
 * Extract all links from a page as a structured list.
 */
export async function extractLinks(
	page: Page,
): Promise<Array<{ text: string; url: string; isExternal: boolean }>> {
	return page.evaluate(() => {
		const links: Array<{ text: string; url: string; isExternal: boolean }> = [];
		const currentHost = window.location.hostname;

		for (const anchor of document.querySelectorAll('a[href]')) {
			const href = anchor.getAttribute('href');
			if (!href || href.startsWith('#') || href.startsWith('javascript:')) continue;

			let url: string;
			try {
				url = new URL(href, window.location.href).href;
			} catch {
				continue;
			}

			const text = (anchor.textContent ?? '').trim().slice(0, 200);
			if (!text) continue;

			let isExternal = false;
			try {
				isExternal = new URL(url).hostname !== currentHost;
			} catch {
				// ignore
			}

			links.push({ text, url, isExternal });
		}

		return links;
	});
}

export async function extractTextContent(page: Page): Promise<string> {
	return page.evaluate(() => {
		const main = document.querySelector('main, article, [role="main"], .content, #content');
		const element = (main ?? document.body) as HTMLElement | null;
		return element?.innerText ?? '';
	});
}

export function chunkText(text: string, maxChunkSize: number): string[] {
	if (text.length <= maxChunkSize) return [text];

	const chunks: string[] = [];
	const paragraphs = text.split(/\n\n+/);
	let currentChunk = '';

	for (const para of paragraphs) {
		if (currentChunk.length + para.length + 2 > maxChunkSize) {
			if (currentChunk) {
				chunks.push(currentChunk.trim());
				currentChunk = '';
			}

			// If a single paragraph is too long, split by sentences
			if (para.length > maxChunkSize) {
				const sentences = para.split(/(?<=[.!?])\s+/);
				for (const sentence of sentences) {
					if (currentChunk.length + sentence.length + 1 > maxChunkSize) {
						if (currentChunk) chunks.push(currentChunk.trim());
						currentChunk = sentence;
					} else {
						currentChunk += (currentChunk ? ' ' : '') + sentence;
					}
				}
			} else {
				currentChunk = para;
			}
		} else {
			currentChunk += (currentChunk ? '\n\n' : '') + para;
		}
	}

	if (currentChunk) {
		chunks.push(currentChunk.trim());
	}

	return chunks;
}


================================================
FILE: packages/core/src/page/index.ts
================================================
export { PageAnalyzer, type PageAnalyzerOptions } from './page-analyzer.js';
export { SnapshotBuilder } from './snapshot-builder.js';
export { TreeRenderer, type RendererOptions } from './renderer/tree-renderer.js';
export {
	extractMarkdown,
	htmlToMarkdown,
	extractTextContent,
	extractLinks,
	chunkText,
	type MarkdownExtractionOptions,
} from './content-extractor.js';
export {
	type PageTreeNode,
	type SelectorIndex,
	type RenderedPageState,
	type DOMRect,
	type CDPSnapshotResult,
	type AXNode,
	type TargetInfo,
	type TargetAllTrees,
	type InteractedElement,
	type MatchLevel,
	type SimplifiedNode,
} from './types.js';


================================================
FILE: packages/core/src/page/page-analyzer.test.ts
================================================
import { test, expect, describe, beforeEach, mock } from 'bun:test';
import { PageAnalyzer } from './page-analyzer.js';
import { PageExtractionError } from '../errors.js';
import type { PageTreeNode, SelectorIndex, RenderedPageState } from './types.js';
import type { ElementRef } from '../types.js';

// ── Mock factories ──

function makeMockPage(overrides: Record<string, unknown> = {}) {
	return {
		viewportSize: () => ({ width: 1280, height: 800 }),
		evaluate: mock(() => Promise.resolve({ x: 0, y: 0 })),
		click: mock(() => Promise.resolve()),
		fill: mock(() => Promise.resolve()),
		mouse: {
			click: mock(() => Promise.resolve()),
		},
		keyboard: {
			type: mock(() => Promise.resolve()),
		},
		frames: () => [],
		...overrides,
	} as any;
}

function makeMockCdpSession(overrides: Record<string, unknown> = {}) {
	return {
		send: mock(() => Promise.resolve({})),
		...overrides,
	} as any;
}

function makeNode(overrides: Partial<PageTreeNode> = {}): PageTreeNode {
	return {
		tagName: 'div',
		nodeType: 'element',
		attributes: {},
		children: [],
		isVisible: true,
		isInteractive: false,
		isClickable: false,
		isEditable: false,
		isScrollable: false,
		...overrides,
	};
}

// ── Tests ──

describe('PageAnalyzer', () => {
	let service: PageAnalyzer;

	beforeEach(() => {
		service = new PageAnalyzer();
	});

	describe('constructor defaults', () => {
		test('has default viewport expansion of 0', () => {
			// The service is created with defaults, including viewportExpansion = 0
			expect(service).toBeDefined();
		});

		test('accepts custom options', () => {
			const custom = new PageAnalyzer({
				viewportExpansion: 500,
				maxElementsInDom: 100,
				maxIframes: 1,
				capturedAttributes: ['title'],
			});
			expect(custom).toBeDefined();
		});
	});

	describe('cache management', () => {
		test('getCachedTree returns null initially', () => {
			expect(service.getCachedTree()).toBeNull();
		});

		test('getCachedSelectorMap returns null initially', () => {
			expect(service.getCachedSelectorMap()).toBeNull();
		});

		test('clearCache resets tree and selector map', () => {
			// We can't set cachedTree directly, but clearCache should work on empty state
			service.clearCache();
			expect(service.getCachedTree()).toBeNull();
			expect(service.getCachedSelectorMap()).toBeNull();
		});
	});

	describe('interaction recording', () => {
		test('getInteractedElements returns empty array initially', () => {
			expect(service.getInteractedElements()).toEqual([]);
		});

		test('clearInteractedElements resets the list', () => {
			service.clearInteractedElements();
			expect(service.getInteractedElements()).toEqual([]);
		});

		test('getInteractedElements returns a copy', () => {
			const elements = service.getInteractedElements();
			expect(elements).not.toBe(service.getInteractedElements());
		});
	});

	describe('clickElementByIndex', () => {
		test('throws PageExtractionError when element not in selector map', async () => {
			const page = makeMockPage();
			const cdp = makeMockCdpSession();

			await expect(
				service.clickElementByIndex(page, cdp, 42),
			).rejects.toThrow(PageExtractionError);
		});

		test('Strategy 1: uses CDP box model when backendNodeId is available', async () => {
			const page = makeMockPage();
			const cdp = makeMockCdpSession({
				send: mock(() =>
					Promise.resolve({
						model: {
							content: [10, 10, 110, 10, 110, 60, 10, 60],
						},
					}),
				),
			});

			// Inject a selector map with a backendNodeId
			const selectorMap: SelectorIndex = {
				0: {
					cssSelector: '#btn',
					backendNodeId: 123,
					tagName: 'button',
				},
			};
			// Use the private cachedSelectorMap via prototype access
			(service as any).cachedSelectorMap = selectorMap;

			await service.clickElementByIndex(page, cdp, 0);

			// Should have used mouse.click with center coordinates
			expect(page.mouse.click).toHaveBeenCalledTimes(1);
			// Center of quad: ((10+110+110+10)/4, (10+10+60+60)/4) = (60, 35)
			expect(page.mouse.click).toHaveBeenCalledWith(60, 35);

			// Should have recorded the interaction
			const interactions = service.getInteractedElements();
			expect(interactions).toHaveLength(1);
			expect(interactions[0].action).toBe('click');
			expect(interactions[0].tagName).toBe('button');
		});

		test('Strategy 2: falls back to JS getBoundingClientRect when CDP fails', async () => {
			const evaluateMock = mock(() =>
				Promise.resolve({ x: 50, y: 25 }),
			);
			const page = makeMockPage({ evaluate: evaluateMock });
			const cdp = makeMockCdpSession({
				send: mock(() => Promise.reject(new Error('CDP failed'))),
			});

			const selectorMap: SelectorIndex = {
				0: {
					cssSelector: '#btn',
					backendNodeId: 123,
					tagName: 'button',
				},
			};
			(service as any).cachedSelectorMap = selectorMap;

			await service.clickElementByIndex(page, cdp, 0);

			// Should have called page.evaluate (JS fallback)
			expect(evaluateMock).toHaveBeenCalled();
			// Then mouse.click with the returned coords
			expect(page.mouse.click).toHaveBeenCalledWith(50, 25);
		});

		test('Strategy 3: falls back to CSS selector click when JS rect returns null', async () => {
			const evaluateMock = mock(() => Promise.resolve(null));
			const page = makeMockPage({ evaluate: evaluateMock });
			const cdp = makeMockCdpSession({
				send: mock(() => Promise.reject(new Error('CDP failed'))),
			});

			const selectorMap: SelectorIndex = {
				0: {
					cssSelector: '.my-btn',
					backendNodeId: 123,
					tagName: 'button',
				},
			};
			(service as any).cachedSelectorMap = selectorMap;

			await service.clickElementByIndex(page, cdp, 0);

			// Should have fallen through to page.click(cssSelector)
			expect(page.click).toHaveBeenCalledWith('.my-btn', { timeout: 5000 });
		});

		test('uses CSS selector click when no backendNodeId', async () => {
			const evaluateMock = mock(() => Promise.resolve(null));
			const page = makeMockPage({ evaluate: evaluateMock });
			const cdp = makeMockCdpSession();

			const selectorMap: SelectorIndex = {
				0: {
					cssSelector: '#submit',
					tagName: 'button',
					// No backendNodeId
				},
			};
			(service as any).cachedSelectorMap = selectorMap;

			await service.clickElementByIndex(page, cdp, 0);

			expect(page.click).toHaveBeenCalledWith('#submit', { timeout: 5000 });
		});
	});

	describe('clickAtCoordinates', () => {
		test('clicks at the specified coordinates', async () => {
			const page = makeMockPage();
			await service.clickAtCoordinates(page, 100, 200);
			expect(page.mouse.click).toHaveBeenCalledWith(100, 200);
		});
	});

	describe('inputTextByIndex', () => {
		test('throws when element not in selector map', async () => {
			const page = makeMockPage();
			const cdp = makeMockCdpSession();

			await expect(
				service.inputTextByIndex(page, cdp, 99, 'hello'),
			).rejects.toThrow(PageExtractionError);
		});

		test('fills input with text when clearFirst is true (default)', async () => {
			const page = makeMockPage();
			const cdp = makeMockCdpSession();

			(service as any).cachedSelectorMap = {
				0: { cssSelector: '#name', tagName: 'input' },
			};

			await service.inputTextByIndex(page, cdp, 0, 'Alice');

			expect(page.fill).toHaveBeenCalledWith('#name', 'Alice');
			expect(service.getInteractedElements()).toHaveLength(1);
			expect(service.getInteractedElements()[0].action).toBe('input');
		});

		test('types text without clearing when clearFirst is false', async () => {
			const page = makeMockPage();
			const cdp = makeMockCdpSession();

			(service as any).cachedSelectorMap = {
				0: { cssSelector: '#name', tagName: 'input' },
			};

			await service.inputTextByIndex(page, cdp, 0, 'Bob', false);

			expect(page.click).toHaveBeenCalledWith('#name');
			expect(page.keyboard.type).toHaveBeenCalledWith('Bob');
		});
	});

	describe('getElementSelector', () => {
		test('returns undefined when no selector map cached', async () => {
			const result = await service.getElementSelector(0);
			expect(result).toBeUndefined();
		});

		test('returns CSS selector when element is in the map', async () => {
			(service as any).cachedSelectorMap = {
				5: { cssSelector: '.item-5', tagName: 'div' },
			};

			const result = await service.getElementSelector(5);
			expect(result).toBe('.item-5');
		});
	});

	describe('getElementByBackendNodeId', () => {
		test('returns selector with ID when available', async () => {
			const cdp = makeMockCdpSession({
				send: mock(() =>
					Promise.resolve({
						node: {
							nodeName: 'DIV',
							attributes: ['id', 'main-content', 'class', 'wrapper'],
						},
					}),
				),
			});

			const result = await service.getElementByBackendNodeId(cdp, 42);
			expect(result).toEqual({ selector: '#main-content' });
		});

		test('returns tag name when no ID attribute', async () => {
			const cdp = makeMockCdpSession({
				send: mock(() =>
					Promise.resolve({
						node: {
							nodeName: 'BUTTON',
							attributes: ['class', 'primary'],
						},
					}),
				),
			});

			const result = await service.getElementByBackendNodeId(cdp, 42);
			expect(result).toEqual({ selector: 'button' });
		});

		test('returns null when CDP call fails', async () => {
			const cdp = makeMockCdpSession({
				send: mock(() => Promise.reject(new Error('not found'))),
			});

			const result = await service.getElementByBackendNodeId(cdp, 42);
			expect(result).toBeNull();
		});

		test('returns null when node has no result', async () => {
			const cdp = makeMockCdpSession({
				send: mock(() => Promise.resolve({ node: null })),
			});

			const result = await service.getElementByBackendNodeId(cdp, 42);
			expect(result).toBeNull();
		});
	});

	describe('collectHiddenElementHints (via private access)', () => {
		test('collects hints for elements below the viewport', () => {
			const root = makeNode({
				children: [
					makeNode({
						tagName: 'button',
						isInteractive: true,
						isVisible: false,
						highlightIndex: 0 as ElementRef,
						ariaLabel: 'Submit form',
						rect: { x: 0, y: 2000, width: 100, height: 30 },
					}),
				],
			});

			const viewport = { width: 1280, height: 800 };
			const scroll = { x: 0, y: 0 };

			const hints = (service as any).collectHiddenElementHints(root, viewport, scroll);

			expect(hints).toHaveLength(1);
			expect(hints[0]).toContain('Submit form');
			expect(hints[0]).toContain('pages below');
		});

		test('collects hints for elements above the viewport', () => {
			const root = makeNode({
				children: [
					makeNode({
						tagName: 'a',
						isInteractive: true,
						isVisible: false,
						highlightIndex: 1 as ElementRef,
						text: 'Top link',
						rect: { x: 0, y: 100, width: 80, height: 20 },
					}),
				],
			});

			const viewport = { width: 1280, height: 800 };
			const scroll = { x: 0, y: 1600 }; // scrolled way down

			const hints = (service as any).collectHiddenElementHints(root, viewport, scroll);

			expect(hints).toHaveLength(1);
			expect(hints[0]).toContain('Top link');
			expect(hints[0]).toContain('pages above');
		});

		test('ignores visible or non-interactive elements', () => {
			const root = makeNode({
				children: [
					makeNode({
						tagName: 'button',
						isInteractive: true,
						isVisible: true, // visible elements are not collected
						highlightIndex: 0 as ElementRef,
						rect: { x: 0, y: 2000, width: 100, height: 30 },
					}),
					makeNode({
						tagName: 'div',
						isInteractive: false, // non-interactive
						isVisible: false,
						highlightIndex: 1 as ElementRef,
						rect: { x: 0, y: 2000, width: 100, height: 30 },
					}),
				],
			});

			const viewport = { width: 1280, height: 800 };
			const scroll = { x: 0, y: 0 };

			const hints = (service as any).collectHiddenElementHints(root, viewport, scroll);
			expect(hints).toHaveLength(0);
		});
	});

	describe('applyViewportThresholdFilter (via private access)', () => {
		test('removes highlightIndex from elements outside expanded viewport', () => {
			const outsideNode = makeNode({
				tagName: 'button',
				highlightIndex: 0 as ElementRef,
				rect: { x: 0, y: 5000, width: 100, height: 30 },
			});
			const insideNode = makeNode({
				tagName: 'input',
				highlightIndex: 1 as ElementRef,
				rect: { x: 0, y: 200, width: 200, height: 30 },
			});
			const root = makeNode({
				children: [outsideNode, insideNode],
			});

			const viewport = { width: 1280, height: 800 };
			const scroll = { x: 0, y: 0 };

			(service as any).applyViewportThresholdFilter(root, viewport, scroll);

			// The outside node should have its highlightIndex removed
			expect(outsideNode.highlightIndex).toBeUndefined();
			// The inside node should keep its highlightIndex
			expect(insideNode.highlightIndex).toBe(1 as ElementRef);
		});

		test('keeps elements within the viewport expansion margin', () => {
			const svc = new PageAnalyzer({ viewportExpansion: 500 });
			const nearNode = makeNode({
				tagName: 'a',
				highlightIndex: 0 as ElementRef,
				rect: { x: 0, y: 1100, width: 100, height: 30 },
			});
			const root = makeNode({ children: [nearNode] });

			(svc as any).applyViewportThresholdFilter(
				root,
				{ width: 1280, height: 800 },
				{ x: 0, y: 0 },
			);

			// y=1100 is within 0..800+500=1300, so should be kept
			expect(nearNode.highlightIndex).toBe(0 as ElementRef);
		});

		test('removes elements far to the right of the viewport', () => {
			const farRightNode = makeNode({
				tagName: 'button',
				highlightIndex: 0 as ElementRef,
				rect: { x: 5000, y: 100, width: 100, height: 30 },
			});
			const root = makeNode({ children: [farRightNode] });

			(service as any).applyViewportThresholdFilter(
				root,
				{ width: 1280, height: 800 },
				{ x: 0, y: 0 },
			);

			expect(farRightNode.highlightIndex).toBeUndefined();
		});
	});

	describe('integrateShadowDOMChildren (via private access)', () => {
		test('merges shadow children into the children array', () => {
			const shadowChild = makeNode({ tagName: 'span', text: 'shadow' });
			const regularChild = makeNode({ tagName: 'p', text: 'regular' });
			const root = makeNode({
				children: [regularChild],
				shadowChildren: [shadowChild],
			});

			(service as any).integrateShadowDOMChildren(root);

			expect(root.children).toHaveLength(2);
			expect(root.children[0].tagName).toBe('span'); // shadow comes first
			expect(root.children[1].tagName).toBe('p');
			expect(root.children[0].isShadowRoot).toBe(true);
			expect(root.children[0].parentNode).toBe(root);
			expect(root.shadowChildren).toBeUndefined();
		});

		test('handles nodes with no shadow children', () => {
			const root = makeNode({
				children: [makeNode({ tagName: 'div' })],
			});

			(service as any).integrateShadowDOMChildren(root);
			expect(root.children).toHaveLength(1);
		});
	});
});


================================================
FILE: packages/core/src/page/page-analyzer.ts
================================================
import type { CDPSession, Page } from 'playwright';
import { SnapshotBuilder } from './snapshot-builder.js';
import { TreeRenderer, type RendererOptions } from './renderer/tree-renderer.js';
import type {
	PageTreeNode,
	RenderedPageState,
	SelectorIndex,
	TargetInfo,
	TargetAllTrees,
	InteractedElement,
} from './types.js';
import { PageExtractionError } from '../errors.js';
import { createLogger } from '../logging.js';
import { timed } from '../telemetry.js';
import type { ElementRef } from '../types.js';

const logger = createLogger('dom');

export interface PageAnalyzerOptions {
	serializer?: Partial<RendererOptions>;
	capturedAttributes?: string[];
	maxIframes?: number;
	viewportExpansion?: number;
	maxElementsInDom?: number;
}

export class PageAnalyzer {
	private snapshotProcessor: SnapshotBuilder;
	private serializer: TreeRenderer;
	private capturedAttributes: string[];
	private maxIframes: number;
	private viewportExpansion: number;
	private maxElementsInDom: number;

	private cachedTree: PageTreeNode | null = null;
	private cachedSelectorMap: SelectorIndex | null = null;
	private interactedElements: InteractedElement[] = [];
	private hiddenElementHints: string[] = [];

	constructor(options?: PageAnalyzerOptions) {
		this.snapshotProcessor = new SnapshotBuilder();
		this.capturedAttributes = options?.capturedAttributes ?? [
			'title', 'type', 'name', 'role', 'tabindex',
			'aria-label', 'placeholder', 'value', 'alt', 'aria-expanded',
		];
		this.maxIframes = options?.maxIframes ?? 3;
		this.viewportExpansion = options?.viewportExpansion ?? 0;
		this.maxElementsInDom = options?.maxElementsInDom ?? 2000;
		this.serializer = new TreeRenderer({
			capturedAttributes: this.capturedAttributes,
			maxElementsInDom: this.maxElementsInDom,
			...options?.serializer,
		});
	}

	async extractState(
		page: Page,
		cdpSession: CDPSession,
	): Promise<RenderedPageState> {
		const { result } = await timed('dom-extract', () =>
			this._extractState(page, cdpSession),
		);
		return result;
	}

	private async _extractState(
		page: Page,
		cdpSession: CDPSession,
	): Promise<RenderedPageState> {
		try {
			// Capture CDP snapshot
			const { domSnapshot, axTree } = await this.snapshotProcessor.captureSnapshot(cdpSession);

			// Get viewport and document info
			const [viewportSize, scrollPosition, documentSize] = await Promise.all([
				page.viewportSize() ?? { width: 1280, height: 1100 },
				page.evaluate(() => ({ x: window.scrollX, y: window.scrollY })),
				page.evaluate(() => ({
					width: document.documentElement.scrollWidth,
					height: document.documentElement.scrollHeight,
				})),
			]);

			// Build enhanced DOM tree
			const { root } = this.snapshotProcessor.buildTree(
				domSnapshot,
				axTree,
				viewportSize,
				this.capturedAttributes,
			);

			// Traverse shadow DOM roots and merge their children into the main tree
			this.integrateShadowDOMChildren(root);

			// Filter interactive elements by viewport visibility threshold.
			// Elements far outside the expanded viewport are stripped of their
			// highlight index so they do not clutter the serialized output.
			if (this.viewportExpansion >= 0) {
				this.applyViewportThresholdFilter(root, viewportSize, scrollPosition);
			}

			this.cachedTree = root;

			// Collect hidden element hints for scroll guidance
			this.hiddenElementHints = this.collectHiddenElementHints(
				root,
				viewportSize,
				scrollPosition,
			);

			// Serialize for LLM
			const state = this.serializer.serializeTree(
				root,
				scrollPosition,
				viewportSize,
				documentSize,
			);

			this.cachedSelectorMap = state.selectorMap;

			// Append hidden element hints
			if (this.hiddenElementHints.length > 0) {
				state.tree += '\n\n--- Hidden interactive elements (scroll to access) ---\n';
				state.tree += this.hiddenElementHints.slice(0, 10).join('\n');
				if (this.hiddenElementHints.length > 10) {
					state.tree += `\n... and ${this.hiddenElementHints.length - 10} more`;
				}
			}

			logger.debug(
				`Extracted DOM: ${state.elementCount} elements, ${state.interactiveElementCount} interactive`,
			);

			return state;
		} catch (error) {
			throw new PageExtractionError(
				`Failed to extract DOM state: ${error instanceof Error ? error.message : String(error)}`,
				{ cause: error instanceof Error ? error : undefined },
			);
		}
	}

	/**
	 * Discover cross-origin iframes and extract their DOM trees via CDP Target discovery.
	 * For same-origin iframes, uses Playwright frame evaluation.
	 * For cross-origin iframes, attaches CDP sessions to their targets and extracts DOM snapshots.
	 */
	async extractWithIframes(
		page: Page,
		cdpSession: CDPSession,
	): Promise<TargetAllTrees> {
		const mainTree = await this._extractState(page, cdpSession).then(() => this.cachedTree!);

		const iframeTrees: TargetAllTrees['iframeTrees'] = [];

		try {
			const frames = page.frames().slice(0, this.maxIframes + 1); // +1 for main
			const processedUrls = new Set<string>();

			for (const frame of frames.slice(1, this.maxIframes + 1)) {
				try {
					const url = frame.url();
					if (!url || url === 'about:blank' || processedUrls.has(url)) continue;
					processedUrls.add(url);

					const targetInfo: TargetInfo = {
						targetId: url,
						type: 'iframe',
						url,
						attached: true,
					};

					// Try same-origin access first via Playwright frame evaluation
					const html = await frame.evaluate(() => document.body?.innerHTML ?? '').catch(() => '');
					if (html) {
						iframeTrees.push({
							targetInfo,
							tree: {
								tagName: 'iframe',
								nodeType: 'element',
								attributes: { src: url },
								children: [],
								isVisible: true,
								isInteractive: false,
								isClickable: false,
								isEditable: false,
								isScrollable: false,
								text: `[iframe: ${url}]`,
							},
						});
						continue;
					}

					// Cross-origin: use CDP Target discovery to attach a session
					const iframeTree = await this.extractCrossOriginIframe(cdpSession, url);
					if (iframeTree) {
						iframeTrees.push({
							targetInfo,
							tree: iframeTree,
						});
					}
				} catch (error) {
					logger.debug(`Failed to extract iframe ${frame.url()}: ${error}`);
				}
			}
		} catch (error) {
			logger.debug(`Failed to extract iframe trees: ${error}`);
		}

		return { mainTree, iframeTrees };
	}

	/**
	 * Attach a CDP session to a cross-origin iframe target and extract its DOM tree.
	 * Uses Target.getTargets to find the matching iframe target, then attaches a session
	 * and runs DOMSnapshot.captureSnapshot on it.
	 */
	private async extractCrossOriginIframe(
		cdpSession: CDPSession,
		iframeUrl: string,
	): Promise<PageTreeNode | null> {
		try {
			const { targetInfos } = await cdpSession.send('Target.getTargets', {}) as unknown as {
				targetInfos: Array<{ targetId: string; type: string; url: string; attached: boolean }>;
			};

			const iframeTarget = targetInfos.find(
				(t) => t.type === 'iframe' && t.url === iframeUrl,
			);
			if (!iframeTarget) {
				logger.debug(`No CDP target found for cross-origin iframe: ${iframeUrl}`);
				return null;
			}

			// Attach to the iframe target
			const { sessionId: iframeSessionId } = await cdpSession.send('Target.attachToTarget', {
				targetId: iframeTarget.targetId,
				flatten: true,
			}) as unknown as { sessionId: string };

			try {
				// Capture a DOM snapshot from the iframe session
				const snapshotResult = await cdpSession.send('Target.sendMessageToTarget', {
					sessionId: iframeSessionId,
					message: JSON.stringify({
						id: 1,
						method: 'DOMSnapshot.captureSnapshot',
						params: {
							computedStyles: ['display', 'visibility', 'opacity'],
							includeDOMRects: true,
						},
					}),
				}) as unknown;

				// The snapshot result comes back as a string via Target protocol
				// Build a minimal tree node representing the iframe content
				const iframeNode: PageTreeNode = {
					tagName: 'iframe',
					nodeType: 'element',
					attributes: { src: iframeUrl },
					children: [],
					isVisible: true,
					isInteractive: false,
					isClickable: false,
					isEditable: false,
					isScrollable: false,
					text: `[cross-origin iframe: ${iframeUrl}]`,
				};

				// If snapshot returned usable data, try to annotate the node
				if (snapshotResult && typeof snapshotResult === 'object') {
					iframeNode.text = `[cross-origin iframe content: ${iframeUrl}]`;
				}

				return iframeNode;
			} finally {
				// Detach from the iframe target to clean up
				await cdpSession.send('Target.detachFromTarget', {
					sessionId: iframeSessionId,
				}).catch(() => {});
			}
		} catch (error) {
			logger.debug(`CDP cross-origin iframe extraction failed for ${iframeUrl}: ${error}`);
			return null;
		}
	}

	/**
	 * Collect hints about interactive elements that are off-screen,
	 * including approximate scroll distance.
	 */
	private collectHiddenElementHints(
		root: PageTreeNode,
		viewportSize: { width: number; height: number },
		scrollPosition: { x: number; y: number },
	): string[] {
		const hints: string[] = [];
		const viewportTop = scrollPosition.y;
		const viewportBottom = viewportTop + viewportSize.height;

		const visit = (node: PageTreeNode) => {
			if (
				node.isInteractive &&
				node.rect &&
				!node.isVisible &&
				node.highlightIndex !== undefined
			) {
				const elementY = node.rect.y;
				if (elementY > viewportBottom) {
					const pagesBelow = ((elementY - viewportBottom) / viewportSize.height).toFixed(1);
					const desc = node.ariaLabel || node.text?.trim()?.slice(0, 50) || node.tagName;
					hints.push(
						`${node.tagName} '${desc}' is ~${pagesBelow} pages below`,
					);
				} else if (elementY < viewportTop) {
					const pagesAbove = ((viewportTop - elementY) / viewportSize.height).toFixed(1);
					const desc = node.ariaLabel || node.text?.trim()?.slice(0, 50) || node.tagName;
					hints.push(
						`${node.tagName} '${desc}' is ~${pagesAbove} pages above`,
					);
				}
			}
			for (const child of node.children) {
				visit(child);
			}
		};

		visit(root);
		return hints;
	}

	/**
	 * Apply viewport threshold filtering to the tree.
	 * Interactive elements whose rects fall entirely outside the expanded viewport
	 * have their highlightIndex removed so they are not serialized as interactive.
	 * The expansion margin is controlled by viewportExpansion (in pixels).
	 */
	private applyViewportThresholdFilter(
		root: PageTreeNode,
		viewportSize: { width: number; height: number },
		scrollPosition: { x: number; y: number },
	): void {
		const expansion = this.viewportExpansion;
		const vpTop = scrollPosition.y - expansion;
		const vpBottom = scrollPosition.y + viewportSize.height + expansion;
		const vpLeft = scrollPosition.x - expansion;
		const vpRight = scrollPosition.x + viewportSize.width + expansion;

		const visit = (node: PageTreeNode) => {
			if (node.highlightIndex !== undefined && node.rect) {
				const nodeBottom = node.rect.y + node.rect.height;
				const nodeRight = node.rect.x + node.rect.width;

				// Element is entirely outside the expanded viewport
				const outsideVertically = nodeBottom < vpTop || node.rect.y > vpBottom;
				const outsideHorizontally = nodeRight < vpLeft || node.rect.x > vpRight;

				if (outsideVertically || outsideHorizontally) {
					// Remove the highlight index so it will not appear in the serialized map,
					// but keep the node in the tree for structure.
					node.highlightIndex = undefined;
				}
			}
			for (const child of node.children) {
				visit(child);
			}
		};

		visit(root);
	}

	/**
	 * Walk the tree and integrate shadow DOM children.
	 * Nodes that have shadowChildren get those children merged into the
	 * regular children array so downstream serialization handles them uniformly.
	 */
	private integrateShadowDOMChildren(root: PageTreeNode): void {
		const visit = (node: PageTreeNode) => {
			if (node.shadowChildren && node.shadowChildren.length > 0) {
				// Prepend shadow children before regular children so they
				// appear first, matching browser rendering order.
				for (const shadowChild of node.shadowChildren) {
					shadowChild.parentNode = node;
					shadowChild.isShadowRoot = true;
				}
				node.children = [...node.shadowChildren, ...node.children];
				node.shadowChildren = undefined;
			}
			for (const child of node.children) {
				visit(child);
			}
		};

		visit(root);
	}

	async getElementSelector(index: number): Promise<string | undefined> {
		return this.cachedSelectorMap?.[index]?.cssSelector;
	}

	async getElementByBackendNodeId(
		cdpSession: CDPSession,
		backendNodeId: number,
	): Promise<{ selector: string } | null> {
		try {
			const result = await cdpSession.send('DOM.describeNode', {
				backendNodeId,
			}) as { node: { nodeName: string; attributes?: string[] } };

			if (!result?.node) return null;

			const attrs = result.node.attributes ?? [];
			for (let i = 0; i < attrs.length; i += 2) {
				if (attrs[i] === 'id' && attrs[i + 1]) {
					return { selector: `#${attrs[i + 1]}` };
				}
			}

			return { selector: result.node.nodeName.toLowerCase() };
		} catch {
			return null;
		}
	}

	/**
	 * Click an element using a fallback chain:
	 * 1. CDP box model (most reliable for overlapping elements)
	 * 2. JS getBoundingClientRect
	 * 3. CSS selector click
	 */
	async clickElementByIndex(
		page: Page,
		cdpSession: CDPSession,
		index: number,
	): Promise<void> {
		const selectorInfo = this.cachedSelectorMap?.[index];
		if (!selectorInfo) {
			throw new PageExtractionError(`Element with index ${index} not found in selector map`);
		}

		// Strategy 1: CDP box model click
		if (selectorInfo.backendNodeId) {
			try {
				const { model } = await cdpSession.send('DOM.getBoxModel', {
					backendNodeId: selectorInfo.backendNodeId,
				}) as { model: { content: number[] } };

				if (model?.content) {
					const [x1, y1, x2, y2, x3, y3, x4, y4] = model.content;
					const centerX = (x1 + x2 + x3 + x4) / 4;
					const centerY = (y1 + y2 + y3 + y4) / 4;

					await page.mouse.click(centerX, centerY);
					this.recordInteraction(index, selectorInfo.tagName, 'click');
					return;
				}
			} catch {
				logger.debug(`CDP box model click failed for index ${index}, trying JS fallback`);
			}
		}

		// Strategy 2: JS getBoundingClientRect
		try {
			const rect = await page.evaluate((sel: string) => {
				const el = document.querySelector(sel);
				if (!el) return null;
				const r = el.getBoundingClientRect();
				return { x: r.x + r.width / 2, y: r.y + r.height / 2 };
			}, selectorInfo.cssSelector);

			if (rect) {
				await page.mouse.click(rect.x, rect.y);
				this.recordInteraction(index, selectorInfo.tagName, 'click');
				return;
			}
		} catch {
			logger.debug(`JS rect click failed for index ${index}, trying CSS selector`);
		}

		// Strategy 3: CSS selector
		await page.click(selectorInfo.cssSelector, { timeout: 5000 });
		this.recordInteraction(index, selectorInfo.tagName, 'click');
	}

	/**
	 * Click at specific coordinates on the page.
	 */
	async clickAtCoordinates(
		page: Page,
		x: number,
		y: number,
	): Promise<void> {
		await page.mouse.click(x, y);
	}

	async inputTextByIndex(
		page: Page,
		_cdpSession: CDPSession,
		index: number,
		text: string,
		clearFirst = true,
	): Promise<void> {
		const selectorInfo = this.cachedSelectorMap?.[index];
		if (!selectorInfo) {
			throw new PageExtractionError(`Element with index ${index} not found in selector map`);
		}

		const selector = selectorInfo.cssSelector;

		if (clearFirst) {
			await page.fill(selector, text);
		} else {
			await page.click(selector);
			await page.keyboard.type(text);
		}

		this.recordInteraction(index, selectorInfo.tagName, 'input');
	}

	private recordInteraction(
		index: number,
		tagName: string,
		action: string,
	): void {
		this.interactedElements.push({
			index: index as ElementRef,
			tagName,
			action,
			timestamp: Date.now(),
		});
	}

	getInteractedElements(): InteractedElement[] {
		return [...this.interactedElements];
	}

	clearInteractedElements(): void {
		this.interactedElements = [];
	}

	getCachedTree(): PageTreeNode | null {
		return this.cachedTree;
	}

	getCachedSelectorMap(): SelectorIndex | null {
		return this.cachedSelectorMap;
	}

	clearCache(): void {
		this.cachedTree = null;
		this.cachedSelectorMap = null;
		this.hiddenElementHints = [];
	}
}


================================================
FILE: packages/core/src/page/renderer/interactive-elements.ts
================================================
import type { PageTreeNode } from '../types.js';

const ALWAYS_CLICKABLE_TAGS = new Set([
	'a', 'button', 'input', 'select', 'textarea', 'summary',
]);

const CLICKABLE_ROLES = new Set([
	'button', 'link', 'menuitem', 'option', 'tab', 'treeitem',
	'checkbox', 'radio', 'switch',
]);

export function isClickableElement(node: PageTreeNode): boolean {
	if (ALWAYS_CLICKABLE_TAGS.has(node.tagName)) return true;
	if (node.role && CLICKABLE_ROLES.has(node.role)) return true;
	if (node.attributes['onclick']) return true;
	if (node.attributes['tabindex'] && node.attributes['tabindex'] !== '-1') return true;
	if (node.attributes['role'] && CLICKABLE_ROLES.has(node.attributes['role'])) return true;
	return node.isClickable;
}

export function getClickableDescription(node: PageTreeNode): string {
	const parts: string[] = [];

	if (node.ariaLabel) {
		parts.push(node.ariaLabel);
	} else if (node.text) {
		parts.push(node.text.trim().slice(0, 50));
	} else if (node.attributes['title']) {
		parts.push(node.attributes['title']);
	} else if (node.attributes['alt']) {
		parts.push(node.attributes['alt']);
	} else if (node.attributes['placeholder']) {
		parts.push(node.attributes['placeholder']);
	}

	return parts.join(' - ') || node.tagName;
}


================================================
FILE: packages/core/src/page/renderer/layer-order.ts
================================================
import type { PageTreeNode, DOMRect } from '../types.js';

/**
 * Filter overlapping elements by paint order (z-index).
 * When two interactive elements overlap, only keep the one painted on top.
 */
export function filterByPaintOrder(nodes: PageTreeNode[]): PageTreeNode[] {
	if (nodes.length === 0) return nodes;

	// Group nodes by approximate position
	const gridSize = 50;
	const grid = new Map<string, PageTreeNode[]>();

	for (const node of nodes) {
		if (!node.rect || !node.isVisible) continue;

		const cellX = Math.floor(node.rect.x / gridSize);
		const cellY = Math.floor(node.rect.y / gridSize);
		const key = `${cellX},${cellY}`;

		if (!grid.has(key)) grid.set(key, []);
		grid.get(key)!.push(node);
	}

	const hidden = new Set<PageTreeNode>();

	for (const cellNodes of grid.values()) {
		if (cellNodes.length < 2) continue;

		for (let i = 0; i < cellNodes.length; i++) {
			for (let j = i + 1; j < cellNodes.length; j++) {
				const a = cellNodes[i];
				const b = cellNodes[j];

				if (rectsOverlap(a.rect!, b.rect!, 0.5)) {
					const paintA = a.paintOrder ?? 0;
					const paintB = b.paintOrder ?? 0;

					if (paintA < paintB) {
						hidden.add(a);
					} else if (paintB < paintA) {
						hidden.add(b);
					}
				}
			}
		}
	}

	return nodes.filter((n) => !hidden.has(n));
}

function rectsOverlap(a: DOMRect, b: DOMRect, threshold: number): boolean {
	const overlapX = Math.max(
		0,
		Math.min(a.x + a.width, b.x + b.width) - Math.max(a.x, b.x),
	);
	const overlapY = Math.max(
		0,
		Math.min(a.y + a.height, b.y + b.height) - Math.max(a.y, b.y),
	);

	const overlapArea = overlapX * overlapY;
	const smallerArea = Math.min(a.width * a.height, b.width * b.height);

	return smallerArea > 0 && overlapArea / smallerArea >= threshold;
}


================================================
FILE: packages/core/src/page/renderer/tree-renderer.ts
================================================
import type { PageTreeNode, SelectorIndex, RenderedPageState } from '../types.js';
import type { ElementRef } from '../../types.js';
import { isClickableElement, getClickableDescription } from './interactive-elements.js';
import { filterByPaintOrder } from './layer-order.js';

export interface RendererOptions {
	capturedAttributes: string[];
	maxDepth: number;
	filterPaintOrder: boolean;
	maxElementsInDom: number;
	collapseSvg: boolean;
	deduplicateSiblings: boolean;
	siblingDeduplicateThreshold: number;
	containmentThreshold: number;
}

const DEFAULT_OPTIONS: RendererOptions = {
	capturedAttributes: [
		'title', 'type', 'name', 'role', 'tabindex',
		'aria-label', 'placeholder', 'value', 'alt', 'aria-expanded',
	],
	maxDepth: 100,
	filterPaintOrder: true,
	maxElementsInDom: 2000,
	collapseSvg: true,
	deduplicateSiblings: true,
	siblingDeduplicateThreshold: 5,
	containmentThreshold: 0.95,
};

const SVG_TAGS = new Set(['svg', 'path', 'rect', 'circle', 'ellipse', 'line', 'polyline', 'polygon', 'g', 'defs', 'use', 'symbol', 'clippath', 'lineargradient', 'radialgradient', 'stop', 'text', 'tspan', 'mask', 'filter']);

export class TreeRenderer {
	private options: RendererOptions;

	constructor(options?: Partial<RendererOptions>) {
		this.options = { ...DEFAULT_OPTIONS, ...options };
	}

	serializeTree(
		root: PageTreeNode,
		scrollPosition: { x: number; y: number },
		viewportSize: { width: number; height: number },
		documentSize: { width: number; height: number },
	): RenderedPageState {
		const selectorMap: SelectorIndex = {};
		const interactiveElements: PageTreeNode[] = [];

		// Collect interactive elements
		this.collectInteractiveElements(root, interactiveElements);

		// Filter by paint order if enabled
		let visibleElements = this.options.filterPaintOrder
			? filterByPaintOrder(interactiveElements)
			: interactiveElements;

		// Enhanced bounding-box off-screen filtering:
		// Remove elements that are clearly off-screen (negative coords beyond
		// a reasonable threshold, or positioned entirely past the document bounds).
		const offScreenHidden: PageTreeNode[] = [];
		visibleElements = this.filterOffScreenElements(
			visibleElements,
			scrollPosition,
			viewportSize,
			documentSize,
			offScreenHidden,
		);

		// Build selector map
		for (const node of visibleElements) {
			if (node.highlightIndex !== undefined) {
				selectorMap[node.highlightIndex] = {
					cssSelector: node.cssSelector ?? this.buildCssSelector(node),
					xpath: node.xpath,
					backendNodeId: node.backendNodeId,
					tagName: node.tagName,
					role: node.role,
					ariaLabel: node.ariaLabel,
					text: node.text?.trim()?.slice(0, 100),
				};
			}
		}

		// Serialize to text with element cap
		const lines: string[] = [];
		let elementCount = 0;
		const maxElements = this.options.maxElementsInDom;

		const countingContext = { count: 0, maxReached: false };
		this.serializeNode(root, lines, 0, selectorMap, countingContext, maxElements);
		elementCount = Object.keys(selectorMap).length;

		if (countingContext.maxReached) {
			lines.push(`\n[... DOM truncated at ${maxElements} elements]`);
		}

		// Append hidden element hint section for off-screen interactive elements
		const hiddenHints = this.formatHiddenElementHints(offScreenHidden, scrollPosition, viewportSize);
		if (hiddenHints.length > 0) {
			lines.push('');
			lines.push('--- Off-screen interactive elements ---');
			for (const hint of hiddenHints.slice(0, 15)) {
				lines.push(hint);
			}
			if (hiddenHints.length > 15) {
				lines.push(`... and ${hiddenHints.length - 15} more off-screen elements`);
			}
		}

		const pixelsAbove = scrollPosition.y;
		const pixelsBelow = Math.max(0, documentSize.height - scrollPosition.y - viewportSize.height);

		return {
			tree: lines.join('\n'),
			selectorMap,
			elementCount,
			interactiveElementCount: visibleElements.length,
			scrollPosition,
			viewportSize,
			documentSize,
			pixelsAbove,
			pixelsBelow,
		};
	}

	private serializeNode(
		node: PageTreeNode,
		lines: string[],
		depth: number,
		selectorMap: SelectorIndex,
		ctx: { count: number; maxReached: boolean },
		maxElements: number,
	): void {
		if (depth > this.options.maxDepth) return;
		if (ctx.maxReached) return;
		if (!node.isVisible && node.nodeType === 'element' && node.children.length === 0) return;

		const indent = '\t'.repeat(depth);

		if (node.nodeType === 'text') {
			const text = node.text?.trim();
			if (text) {
				lines.push(`${indent}${text}`);
			}
			return;
		}

		// Skip invisible non-interactive containers with no visible children
		if (!node.isVisible && !node.isInteractive && !this.hasVisibleDescendant(node)) {
			return;
		}

		// Collapse SVGs to placeholder, with containment deduplication for nested SVGs.
		// When an SVG contains only other SVG elements (nested wrappers), we collapse
		// them into a single placeholder using the deepest label we can find.
		if (this.options.collapseSvg && node.tagName === 'svg') {
			const desc = this.resolveSvgDescription(node);
			if (node.highlightIndex !== undefined && selectorMap[node.highlightIndex]) {
				lines.push(`${indent}[${node.highlightIndex}]<svg>${desc}</svg>`);
			} else {
				lines.push(`${indent}<svg>${desc}</svg>`);
			}
			ctx.count++;
			return;
		}

		// Skip inner SVG elements
		if (SVG_TAGS.has(node.tagName) && node.tagName !== 'svg') {
			return;
		}

		ctx.count++;
		if (ctx.count > maxElements) {
			ctx.maxReached = true;
			return;
		}

		// Containment check: if parent fully contains only this child, prefer showing child
		// (handled implicitly by tree traversal — we just skip redundant wrappers)
		if (this.isRedundantWrapper(node)) {
			for (const child of node.children) {
				this.serializeNode(child, lines, depth, selectorMap, ctx, maxElements);
			}
			return;
		}

		// Build tag representation
		const parts: string[] = [];

		// Highlight index for interactive elements
		if (node.highlightIndex !== undefined && selectorMap[node.highlightIndex]) {
			parts.push(`[${node.highlightIndex}]`);
		}

		// Tag name
		parts.push(`<${node.tagName}`);

		// Attributes
		const attrParts: string[] = [];
		for (const attr of this.options.capturedAttributes) {
			const value = node.attributes[attr];
			if (value !== undefined && value !== '') {
				attrParts.push(`${attr}="${value}"`);
			}
		}

		// Prefer AX node name over DOM text when available
		if (node.role) {
			attrParts.push(`role="${node.role}"`);
		}
		if (node.ariaLabel && !node.attributes['aria-label']) {
			attrParts.push(`aria-label="${node.ariaLabel}"`);
		}

		if (attrParts.length > 0) {
			parts.push(` ${attrParts.join(' ')}`);
		}

		// Input value
		if (node.inputValue !== undefined) {
			parts.push(` value="${node.inputValue}"`);
		}

		parts.push('>');

		// Inline text for leaf elements
		const inlineText = this.getInlineText(node);
		if (inlineText) {
			parts.push(inlineText);
			parts.push(`</${node.tagName}>`);
			lines.push(`${indent}${parts.join('')}`);
			return;
		}

		lines.push(`${indent}${parts.join('')}`);

		// Deduplicate similar siblings
		if (this.options.deduplicateSiblings) {
			this.serializeChildrenWithDedup(node.children, lines, depth + 1, selectorMap, ctx, maxElements);
		} else {
			for (const child of node.children) {
				this.serializeNode(child, lines, depth + 1, selectorMap, ctx, maxElements);
			}
		}

		// Closing tag only if there were children
		if (node.children.some((c) => c.isVisible || c.nodeType === 'text')) {
			lines.push(`${indent}</${node.tagName}>`);
		}
	}

	/**
	 * Serialize children but deduplicate runs of similar siblings.
	 * If more than N consecutive siblings have the same tagName and no interactive children,
	 * show the first few and add "... and N-3 more" summary.
	 */
	private serializeChildrenWithDedup(
		children: PageTreeNode[],
		lines: string[],
		depth: number,
		selectorMap: SelectorIndex,
		ctx: { count: number; maxReached: boolean },
		maxElements: number,
	): void {
		const threshold = this.options.siblingDeduplicateThreshold;
		let i = 0;

		while (i < children.length) {
			if (ctx.maxReached) return;

			const child = children[i];

			// Find run of same-tag non-interactive siblings
			let runEnd = i + 1;
			if (
				child.nodeType === 'element' &&
				!child.isInteractive &&
				!this.hasInteractiveDescendant(child)
			) {
				while (
					runEnd < children.length &&
					children[runEnd].nodeType === 'element' &&
					children[runEnd].tagName === child.tagName &&
					!children[runEnd].isInteractive &&
					!this.hasInteractiveDescendant(children[runEnd])
				) {
					runEnd++;
				}
			}

			const runLength = runEnd - i;
			if (runLength > threshold) {
				// Show first 3, then summarize
				const showCount = 3;
				for (let j = i; j < i + showCount && j < runEnd; j++) {
					this.serializeNode(children[j], lines, depth, selectorMap, ctx, maxElements);
				}
				const indent = '\t'.repeat(depth);
				lines.push(`${indent}... and ${runLength - showCount} more <${child.tagName}> elements`);
				i = runEnd;
			} else {
				this.serializeNode(child, lines, depth, selectorMap, ctx, maxElements);
				i++;
			}
		}
	}

	/**
	 * Check if a node is a redundant wrapper: single visible child, no interactive
	 * properties, no highlight index, generic tag.
	 */
	private isRedundantWrapper(node: PageTreeNode): boolean {
		if (node.highlightIndex !== undefined) return false;
		if (node.isInteractive) return false;

		const visibleChildren = node.children.filter(
			(c) => c.isVisible || c.isInteractive || c.nodeType === 'text',
		);

		if (visibleChildren.length !== 1) return false;

		const genericTags = new Set(['div', 'span', 'section', 'article', 'main']);
		if (!genericTags.has(node.tagName)) return false;

		// Check containment: does the parent rect fully contain the child rect?
		if (node.rect && visibleChildren[0].rect) {
			const parentArea = node.rect.width * node.rect.height;
			const childArea = visibleChildren[0].rect.width * visibleChildren[0].rect.height;
			if (parentArea > 0 && childArea / parentArea > this.options.containmentThreshold) {
				return true;
			}
		}

		return false;
	}

	private getInlineText(node: PageTreeNode): string | null {
		if (node.children.length === 0) {
			return node.text?.trim() || null;
		}
		if (
			node.children.length === 1 &&
			node.children[0].nodeType === 'text' &&
			node.children[0].text
		) {
			return node.children[0].text.trim();
		}
		return null;
	}

	private hasVisibleDescendant(node: PageTreeNode): boolean {
		for (const child of node.children) {
			if (child.isVisible || child.isInteractive) return true;
			if (this.hasVisibleDescendant(child)) return true;
		}
		return false;
	}

	private hasInteractiveDescendant(node: PageTreeNode): boolean {
		for (const child of node.children) {
			if (child.isInteractive || child.highlightIndex !== undefined) return true;
			if (this.hasInteractiveDescendant(child)) return true;
		}
		return false;
	}

	private collectInteractiveElements(
		node: PageTreeNode,
		result: PageTreeNode[],
	): void {
		if (node.highlightIndex !== undefined && node.isVisible) {
			result.push(node);
		}
		for (const child of node.children) {
			this.collectInteractiveElements(child, result);
		}
	}

	private buildCssSelector(node: PageTreeNode): string {
		const parts: string[] = [];
		let current: PageTreeNode | undefined = node;

		while (current && current.tagName !== 'html') {
			let selector = current.tagName;

			if (current.attributes['id']) {
				selector = `#${current.attributes['id']}`;
				parts.unshift(selector);
				break;
			}

			if (current.parentNode) {
				const siblings = current.parentNode.children.filter(
					(c) => c.tagName === current!.tagName,
				);
				if (siblings.length > 1) {
					const idx = siblings.indexOf(current) + 1;
					selector += `:nth-of-type(${idx})`;
				}
			}

			parts.unshift(selector);
			current = current.parentNode;
		}

		return parts.join(' > ');
	}

	/**
	 * Enhanced off-screen element filtering.
	 * Removes interactive elements whose bounding boxes fall entirely outside
	 * reasonable document bounds, or that have degenerate rects (negative width/height,
	 * extremely large offsets indicating hidden off-canvas positioning).
	 * Elements that are simply scrolled out of the current viewport are NOT removed --
	 * they are collected into the offScreenHidden array for hint formatting.
	 */
	private filterOffScreenElements(
		elements: PageTreeNode[],
		scrollPosition: { x: number; y: number },
		viewportSize: { width: number; height: number },
		documentSize: { width: number; height: number },
		offScreenHidden: PageTreeNode[],
	): PageTreeNode[] {
		// Anything positioned more than this many pixels outside the document
		// is almost certainly a hidden/off-canvas element (e.g. left: -9999px).
		const offCanvasThreshold = 5000;

		const vpTop = scrollPosition.y;
		const vpBottom = scrollPosition.y + viewportSize.height;
		const vpLeft = scrollPosition.x;
		const vpRight = scrollPosition.x + viewportSize.width;

		const result: PageTreeNode[] = [];

		for (const node of elements) {
			if (!node.rect) {
				result.push(node);
				continue;
			}

			const { x, y, width, height } = node.rect;

			// Degenerate rects: negative dimensions or zero-area
			if (width <= 0 || height <= 0) {
				continue;
			}

			// Off-canvas positioning (common CSS hidden pattern: left: -9999px)
			if (
				x + width < -offCanvasThreshold ||
				y + height < -offCanvasThreshold ||
				x > documentSize.width + offCanvasThreshold ||
				y > documentSize.height + offCanvasThreshold
			) {
				continue;
			}

			// Check if the element is inside the current viewport
			const nodeBottom = y + height;
			const nodeRight = x + width;
			const inViewport =
				nodeBottom >= vpTop &&
				y <= vpBottom &&
				nodeRight >= vpLeft &&
				x <= vpRight;

			if (inViewport) {
				result.push(node);
			} else {
				// Off-screen but within reasonable document bounds --
				// keep it in the selector map but track it for hint section
				result.push(node);
				offScreenHidden.push(node);
			}
		}

		return result;
	}

	/**
	 * Format hidden element hints for the serialized output.
	 * Groups off-screen elements by direction and provides scroll distance estimates.
	 */
	private formatHiddenElementHints(
		offScreenElements: PageTreeNode[],
		scrollPosition: { x: number; y: number },
		viewportSize: { width: number; height: number },
	): string[] {
		if (offScreenElements.length === 0) return [];

		const vpBottom = scrollPosition.y + viewportSize.height;
		const vpTop = scrollPosition.y;
		const hints: string[] = [];

		for (const node of offScreenElements) {
			if (!node.rect) continue;
			const desc = this.getNodeDescription(node);
			const elementY = node.rect.y;

			if (elementY > vpBottom) {
				const pxBelow = elementY - vpBottom;
				const pagesBelow = (pxBelow / viewportSize.height).toFixed(1);
				hints.push(`  ${node.tagName} "${desc}" ~${pagesBelow} pages below`);
			} else if (elementY + node.rect.height < vpTop) {
				const pxAbove = vpTop - (elementY + node.rect.height);
				const pagesAbove = (pxAbove / viewportSize.height).toFixed(1);
				hints.push(`  ${node.tagName} "${desc}" ~${pagesAbove} pages above`);
			} else {
				// Off to the side
				hints.push(`  ${node.tagName} "${desc}" off-screen horizontally`);
			}
		}

		return hints;
	}

	/**
	 * Get a short human-readable description of a node for hint text.
	 */
	private getNodeDescription(node: PageTreeNode): string {
		if (node.ariaLabel) return node.ariaLabel.slice(0, 60);
		if (node.text) return node.text.trim().slice(0, 60);
		if (node.attributes['title']) return node.attributes['title'].slice(0, 60);
		if (node.attributes['placeholder']) return node.attributes['placeholder'].slice(0, 60);
		return node.tagName;
	}

	/**
	 * Resolve the best description for an SVG, traversing nested SVG wrappers
	 * to find the deepest aria-label or title. This collapses redundant
	 * nested SVG containers into a single description.
	 */
	private resolveSvgDescription(node: PageTreeNode): string {
		// Check the current node for labels
		const label = node.ariaLabel || node.attributes['aria-label'] || '';
		const title = node.attributes['title'] || '';

		// Look for nested SVGs that might carry a better description
		let deepLabel = '';
		const visitSvgChildren = (n: PageTreeNode): void => {
			for (const child of n.children) {
				if (child.tagName === 'title' && child.text) {
					deepLabel = child.text.trim();
					return;
				}
				if (child.tagName === 'svg') {
					// Nested SVG -- check it for labels
					const nested =
						child.ariaLabel ||
						child.attributes['aria-label'] ||
						child.attributes['title'] ||
						'';
					if (nested) {
						deepLabel = nested;
						return;
					}
					// Keep traversing deeper
					visitSvgChildren(child);
					if (deepLabel) return;
				}
				if (SVG_TAGS.has(child.tagName)) {
					visitSvgChildren(child);
					if (deepLabel) return;
				}
			}
		};

		visitSvgChildren(node);

		return label || title || deepLabel || 'icon';
	}
}


================================================
FILE: packages/core/src/page/renderer.test.ts
================================================
import { test, expect, describe, beforeEach } from 'bun:test';
import { TreeRenderer } from './renderer/tree-renderer.js';
import type { PageTreeNode, SelectorIndex } from './types.js';
import type { ElementRef } from '../types.js';

// ── Helpers ──

function makeNode(overrides: Partial<PageTreeNode> = {}): PageTreeNode {
	return {
		tagName: 'div',
		nodeType: 'element',
		attributes: {},
		children: [],
		isVisible: true,
		isInteractive: false,
		isClickable: false,
		isEditable: false,
		isScrollable: false,
		...overrides,
	};
}

function makeTextNode(text: string): PageTreeNode {
	return makeNode({
		tagName: '',
		nodeType: 'text',
		text,
		children: [],
	});
}

const defaultScroll = { x: 0, y: 0 };
const defaultViewport = { width: 1280, height: 800 };
const defaultDocSize = { width: 1280, height: 3000 };

// ── Tests ──

describe('TreeRenderer', () => {
	let serializer: TreeRenderer;

	beforeEach(() => {
		serializer = new TreeRenderer({
			capturedAttributes: ['title', 'role', 'aria-label', 'placeholder'],
			filterPaintOrder: false,
		});
	});

	describe('basic tree serialization', () => {
		test('serializes a simple root with text child', () => {
			const root = makeNode({
				tagName: 'html',
				children: [
					makeNode({
						tagName: 'body',
						children: [
							makeNode({
								tagName: 'h1',
								text: 'Hello World',
								children: [],
							}),
						],
					}),
				],
			});

			const state = serializer.serializeTree(root, defaultScroll, defaultViewport, defaultDocSize);

			expect(state.tree).toContain('h1');
			expect(state.tree).toContain('Hello World');
			expect(state.scrollPosition).toEqual(defaultScroll);
			expect(state.viewportSize).toEqual(defaultViewport);
		});

		test('includes element count and interactive element count', () => {
			const root = makeNode({
				tagName: 'html',
				children: [
					makeNode({
						tagName: 'button',
						isInteractive: true,
						isVisible: true,
						highlightIndex: 0 as ElementRef,
						text: 'Click me',
						cssSelector: '#btn',
					}),
					makeNode({
						tagName: 'p',
						text: 'Paragraph',
					}),
				],
			});

			const state = serializer.serializeTree(root, defaultScroll, defaultViewport, defaultDocSize);

			expect(state.interactiveElementCount).toBeGreaterThanOrEqual(1);
			expect(state.elementCount).toBeGreaterThanOrEqual(1);
		});

		test('builds selector map for interactive elements with highlightIndex', () => {
			const root = makeNode({
				tagName: 'html',
				children: [
					makeNode({
						tagName: 'button',
						isInteractive: true,
						isVisible: true,
						highlightIndex: 0 as ElementRef,
						cssSelector: '#submit-btn',
						text: 'Submit',
						role: 'button',
						ariaLabel: 'Submit form',
					}),
				],
			});

			const state = serializer.serializeTree(root, defaultScroll, defaultViewport, defaultDocSize);

			expect(state.selectorMap[0]).toBeDefined();
			expect(state.selectorMap[0].cssSelector).toBe('#submit-btn');
			expect(state.selectorMap[0].tagName).toBe('button');
			expect(state.selectorMap[0].role).toBe('button');
			expect(state.selectorMap[0].ariaLabel).toBe('Submit form');
		});

		test('includes highlight index in serialized output', () => {
			const root = makeNode({
				tagName: 'html',
				children: [
					makeNode({
						tagName: 'a',
						isInteractive: true,
						isVisible: true,
						highlightIndex: 3 as ElementRef,
						cssSelector: 'a.link',
						text: 'Link text',
					}),
				],
			});

			const state = serializer.serializeTree(root, defaultScroll, defaultViewport, defaultDocSize);
			expect(state.tree).toContain('[3]');
		});

		test('computes pixelsAbove and pixelsBelow', () => {
			const root = makeNode({ tagName: 'html' });
			const state = serializer.serializeTree(
				root,
				{ x: 0, y: 400 },
				{ width: 1280, height: 800 },
				{ width: 1280, height: 2000 },
			);

			expect(state.pixelsAbove).toBe(400);
			expect(state.pixelsBelow).toBe(800); // 2000 - 400 - 800
		});
	});

	describe('SVG collapse', () => {
		test('collapses SVG to placeholder with icon label', () => {
			const root = makeNode({
				tagName: 'html',
				children: [
					makeNode({
						tagName: 'svg',
						isVisible: true,
						attributes: {},
						children: [
							makeNode({
								tagName: 'path',
								isVisible: true,
								attributes: { d: 'M0 0L10 10' },
							}),
						],
					}),
				],
			});

			const state = serializer.serializeTree(root, defaultScroll, defaultViewport, defaultDocSize);
			expect(state.tree).toContain('<svg>icon</svg>');
		});

		test('uses aria-label from SVG if available', () => {
			const root = makeNode({
				tagName: 'html',
				children: [
					makeNode({
						tagName: 'svg',
						isVisible: true,
						ariaLabel: 'Search icon',
						attributes: {},
						children: [],
					}),
				],
			});

			const state = serializer.serializeTree(root, defaultScroll, defaultViewport, defaultDocSize);
			expect(state.tree).toContain('<svg>Search icon</svg>');
		});

		test('finds title in nested SVG structure', () => {
			const root = makeNode({
				tagName: 'html',
				children: [
					makeNode({
						tagName: 'svg',
						isVisible: true,
						attributes: {},
						children: [
							makeNode({
								tagName: 'title',
								isVisible: true,
								text: 'Close button',
								nodeType: 'element',
								attributes: {},
								children: [],
							}),
						],
					}),
				],
			});

			const state = serializer.serializeTree(root, defaultScroll, defaultViewport, defaultDocSize);
			expect(state.tree).toContain('<svg>Close button</svg>');
		});

		test('includes highlight index on interactive SVG', () => {
			const root = makeNode({
				tagName: 'html',
				children: [
					makeNode({
						tagName: 'svg',
						isVisible: true,
						isInteractive: true,
						highlightIndex: 5 as ElementRef,
						cssSelector: 'svg.icon',
						attributes: {},
						children: [],
					}),
				],
			});

			const state = serializer.serializeTree(root, defaultScroll, defaultViewport, defaultDocSize);
			expect(state.tree).toContain('[5]<svg>');
		});

		test('does not collapse SVG when collapseSvg is disabled', () => {
			const noCollapse = new TreeRenderer({
				collapseSvg: false,
				filterPaintOrder: false,
			});
			const root = makeNode({
				tagName: 'html',
				children: [
					makeNode({
						tagName: 'svg',
						isVisible: true,
						attributes: {},
						children: [
							makeNode({
								tagName: 'rect',
								isVisible: true,
								attributes: {},
							}),
						],
					}),
				],
			});

			const state = noCollapse.serializeTree(root, defaultScroll, defaultViewport, defaultDocSize);
			// Should not be collapsed to a single <svg>icon</svg> placeholder
			expect(state.tree).toContain('<svg>');
			// Inner SVG elements (path, rect, etc.) are always skipped by the
			// SVG_TAGS filter, so they won't appear. The key difference is
			// collapseSvg=false does NOT produce the collapsed placeholder format.
			expect(state.tree).not.toContain('<svg>icon</svg>');
		});
	});

	describe('sibling deduplication', () => {
		test('deduplicates runs of same-tag non-interactive siblings', () => {
			// Create 8 identical li elements (threshold = 5)
			const listItems = Array.from({ length: 8 }, (_, i) =>
				makeNode({
					tagName: 'li',
					isVisible: true,
					text: `Item ${i}`,
					children: [],
				}),
			);

			const root = makeNode({
				tagName: 'html',
				children: [
					makeNode({
						tagName: 'ul',
						isVisible: true,
						children: listItems,
					}),
				],
			});

			const state = serializer.serializeTree(root, defaultScroll, defaultViewport, defaultDocSize);

			// Should show first 3 and then "... and 5 more" summary
			expect(state.tree).toContain('Item 0');
			expect(state.tree).toContain('Item 1');
			expect(state.tree).toContain('Item 2');
			expect(state.tree).toContain('... and 5 more <li> elements');
			expect(state.tree).not.toContain('Item 7');
		});

		test('does not deduplicate when below threshold', () => {
			const items = Array.from({ length: 3 }, (_, i) =>
				makeNode({
					tagName: 'li',
					isVisible: true,
					text: `Item ${i}`,
					children: [],
				}),
			);

			const root = makeNode({
				tagName: 'html',
				children: [
					makeNode({
						tagName: 'ul',
						isVisible: true,
						children: items,
					}),
				],
			});

			const state = serializer.serializeTree(root, defaultScroll, defaultViewport, defaultDocSize);

			expect(state.tree).toContain('Item 0');
			expect(state.tree).toContain('Item 1');
			expect(state.tree).toContain('Item 2');
			expect(state.tree).not.toContain('... and');
		});

		test('does not deduplicate siblings with interactive descendants', () => {
			const items = Array.from({ length: 8 }, (_, i) =>
				makeNode({
					tagName: 'li',
					isVisible: true,
					children: [
						makeNode({
							tagName: 'a',
							isInteractive: i === 4, // one has interactive child
							isVisible: true,
							highlightIndex: i === 4 ? (10 as ElementRef) : undefined,
							text: `Link ${i}`,
						}),
					],
				}),
			);

			const root = makeNode({
				tagName: 'html',
				children: [
					makeNode({
						tagName: 'ul',
						isVisible: true,
						children: items,
					}),
				],
			});

			const state = serializer.serializeTree(root, defaultScroll, defaultViewport, defaultDocSize);

			// Because item 4 has an interactive descendant, the run is broken
			// and items should not all be deduped away
			expect(state.tree).toContain('Link 4');
		});

		test('does not deduplicate when deduplicateSiblings is disabled', () => {
			const noDedup = new TreeRenderer({
				deduplicateSiblings: false,
				filterPaintOrder: false,
			});

			const items = Array.from({ length: 8 }, (_, i) =>
				makeNode({
					tagName: 'li',
					isVisible: true,
					text: `Item ${i}`,
					children: [],
				}),
			);

			const root = makeNode({
				tagName: 'html',
				children: [
					makeNode({
						tagName: 'ul',
						isVisible: true,
						children: items,
					}),
				],
			});

			const state = noDedup.serializeTree(root, defaultScroll, defaultViewport, defaultDocSize);
			expect(state.tree).not.toContain('... and');
			expect(state.tree).toContain('Item 7');
		});
	});

	describe('max elements cap', () => {
		test('truncates tree when max elements is exceeded', () => {
			const small = new TreeRenderer({
				maxElementsInDom: 5,
				filterPaintOrder: false,
				deduplicateSiblings: false,
			});

			const children = Array.from({ length: 20 }, (_, i) =>
				makeNode({
					tagName: 'p',
					isVisible: true,
					text: `Para ${i}`,
					children: [],
				}),
			);

			const root = makeNode({
				tagName: 'html',
				children: [
					makeNode({
						tagName: 'body',
						isVisible: true,
						children,
					}),
				],
			});

			const state = small.serializeTree(root, defaultScroll, defaultViewport, defaultDocSize);
			expect(state.tree).toContain('DOM truncated at 5 elements');
		});
	});

	describe('containment threshold (redundant wrappers)', () => {
		test('skips redundant div wrapper when child fills parent', () => {
			const innerButton = makeNode({
				tagName: 'button',
				isVisible: true,
				isInteractive: true,
				highlightIndex: 0 as ElementRef,
				cssSelector: 'button',
				text: 'Click',
				rect: { x: 0, y: 0, width: 200, height: 50 },
			});

			const wrapper = makeNode({
				tagName: 'div',
				isVisible: true,
				isInteractive: false,
				rect: { x: 0, y: 0, width: 200, height: 50 },
				children: [innerButton],
			});

			const root = makeNode({
				tagName: 'html',
				children: [wrapper],
			});

			const state = serializer.serializeTree(root, defaultScroll, defaultViewport, defaultDocSize);

			// The redundant div wrapper should be skipped in output;
			// the button should appear directly
			expect(state.tree).toContain('button');
			expect(state.tree).toContain('Click');
		});

		test('does not skip wrapper when it has a highlightIndex', () => {
			const inner = makeNode({
				tagName: 'span',
				isVisible: true,
				text: 'Text',
				rect: { x: 0, y: 0, width: 100, height: 20 },
			});

			const wrapper = makeNode({
				tagName: 'div',
				isVisible: true,
				highlightIndex: 1 as ElementRef,
				cssSelector: 'div#parent',
				rect: { x: 0, y: 0, width: 100, height: 20 },
				children: [inner],
			});

			const root = makeNode({
				tagName: 'html',
				children: [wrapper],
			});

			const state = serializer.serializeTree(root, defaultScroll, defaultViewport, defaultDocSize);
			expect(state.tree).toContain('<div');
		});

		test('does not skip non-generic tag wrappers', () => {
			const inner = makeNode({
				tagName: 'p',
				isVisible: true,
				text: 'Hello',
				rect: { x: 0, y: 0, width: 100, height: 20 },
			});

			const wrapper = makeNode({
				tagName: 'nav', // not in genericTags set
				isVisible: true,
				rect: { x: 0, y: 0, width: 100, height: 20 },
				children: [inner],
			});

			const root = makeNode({
				tagName: 'html',
				children: [wrapper],
			});

			const state = serializer.serializeTree(root, defaultScroll, defaultViewport, defaultDocSize);
			expect(state.tree).toContain('<nav');
		});
	});

	describe('off-screen element filtering', () => {
		test('filters out elements with degenerate rects (zero area)', () => {
			const zeroWidth = makeNode({
				tagName: 'button',
				isVisible: true,
				isInteractive: true,
				highlightIndex: 0 as ElementRef,
				cssSelector: 'button.hidden',
				rect: { x: 0, y: 0, width: 0, height: 30 },
			});

			const root = makeNode({
				tagName: 'html',
				children: [zeroWidth],
			});

			const state = serializer.serializeTree(root, defaultScroll, defaultViewport, defaultDocSize);

			// Zero-width element should be filtered from the selector map
			expect(state.selectorMap[0]).toBeUndefined();
		});

		test('filters out elements with extreme off-canvas positioning', () => {
			const offCanvas = makeNode({
				tagName: 'a',
				isVisible: true,
				isInteractive: true,
				highlightIndex: 0 as ElementRef,
				cssSelector: 'a.sr-only',
				rect: { x: -10000, y: 0, width: 100, height: 20 },
			});

			const root = makeNode({
				tagName: 'html',
				children: [offCanvas],
			});

			const state = serializer.serializeTree(root, defaultScroll, defaultViewport, defaultDocSize);
			expect(state.selectorMap[0]).toBeUndefined();
		});

		test('keeps elements that are off-viewport but within document bounds', () => {
			const belowViewport = makeNode({
				tagName: 'button',
				isVisible: true,
				isInteractive: true,
				highlightIndex: 0 as ElementRef,
				cssSelector: 'button.below',
				rect: { x: 100, y: 2000, width: 100, height: 30 },
			});

			const root = makeNode({
				tagName: 'html',
				children: [belowViewport],
			});

			const state = serializer.serializeTree(root, defaultScroll, defaultViewport, defaultDocSize);

			// Should be kept in selector map even though off-viewport
			expect(state.selectorMap[0]).toBeDefined();
			expect(state.selectorMap[0].cssSelector).toBe('button.below');
		});
	});

	describe('hidden element hints formatting', () => {
		test('formats hints for off-screen elements below viewport', () => {
			const belowElement = makeNode({
				tagName: 'button',
				isVisible: true,
				isInteractive: true,
				highlightIndex: 0 as ElementRef,
				cssSelector: 'button.far',
				ariaLabel: 'Load more',
				rect: { x: 100, y: 2400, width: 100, height: 30 },
			});

			const root = makeNode({
				tagName: 'html',
				children: [belowElement],
			});

			const state = serializer.serializeTree(root, defaultScroll, defaultViewport, defaultDocSize);

			expect(state.tree).toContain('Off-screen interactive elements');
			expect(state.tree).toContain('Load more');
			expect(state.tree).toContain('pages below');
		});

		test('formats hints for elements above viewport', () => {
			const aboveElement = makeNode({
				tagName: 'a',
				isVisible: true,
				isInteractive: true,
				highlightIndex: 0 as ElementRef,
				cssSelector: 'a.header',
				ariaLabel: 'Home link',
				rect: { x: 100, y: 50, width: 100, height: 30 },
			});

			const root = makeNode({
				tagName: 'html',
				children: [aboveElement],
			});

			// Scrolled down so element is above
			const state = serializer.serializeTree(
				root,
				{ x: 0, y: 1000 },
				defaultViewport,
				defaultDocSize,
			);

			expect(state.tree).toContain('Home link');
			expect(state.tree).toContain('pages above');
		});

		test('limits hints to 15 off-screen elements', () => {
			const children = Array.from({ length: 20 }, (_, i) =>
				makeNode({
					tagName: 'button',
					isVisible: true,
					isInteractive: true,
					highlightIndex: i as ElementRef,
					cssSelector: `button.item-${i}`,
					ariaLabel: `Button ${i}`,
					rect: { x: 100, y: 2000 + i * 100, width: 100, height: 30 },
				}),
			);

			const root = makeNode({
				tagName: 'html',
				children,
			});

			const state = serializer.serializeTree(root, defaultScroll, defaultViewport, defaultDocSize);

			// Should cap at 15 and say "... and N more"
			expect(state.tree).toContain('more off-screen elements');
		});
	});

	describe('attributes serialization', () => {
		test('includes configured attributes in output', () => {
			const root = makeNode({
				tagName: 'html',
				children: [
					makeNode({
						tagName: 'input',
						isVisible: true,
						attributes: {
							placeholder: 'Enter email',
							title: 'Email field',
						},
						children: [],
					}),
				],
			});

			const state = serializer.serializeTree(root, defaultScroll, defaultViewport, defaultDocSize);
			expect(state.tree).toContain('placeholder="Enter email"');
			expect(state.tree).toContain('title="Email field"');
		});

		test('includes role and aria-label from node properties', () => {
			const root = makeNode({
				tagName: 'html',
				children: [
					makeNode({
						tagName: 'div',
						isVisible: true,
						role: 'navigation',
						ariaLabel: 'Main menu',
						attributes: {},
						children: [makeTextNode('Menu')],
					}),
				],
			});

			const state = serializer.serializeTree(root, defaultScroll, defaultViewport, defaultDocSize);
			expect(state.tree).toContain('role="navigation"');
			expect(state.tree).toContain('aria-label="Main menu"');
		});

		test('includes input value in output', () => {
			const root = makeNode({
				tagName: 'html',
				children: [
					makeNode({
						tagName: 'input',
						isVisible: true,
						inputValue: 'current text',
						attributes: {},
						children: [],
					}),
				],
			});

			const state = serializer.serializeTree(root, defaultScroll, defaultViewport, defaultDocSize);
			expect(state.tree).toContain('value="current text"');
		});
	});

	describe('text node handling', () => {
		test('renders text content inline for leaf elements', () => {
			const root = makeNode({
				tagName: 'html',
				children: [
					makeNode({
						tagName: 'p',
						isVisible: true,
						text: 'Hello world',
						children: [],
					}),
				],
			});

			const state = serializer.serializeTree(root, defaultScroll, defaultViewport, defaultDocSize);
			expect(state.tree).toContain('Hello world');
		});

		test('renders text node children', () => {
			const root = makeNode({
				tagName: 'html',
				children: [
					makeNode({
						tagName: 'p',
						isVisible: true,
						children: [makeTextNode('Some text content')],
					}),
				],
			});

			const state = serializer.serializeTree(root, defaultScroll, defaultViewport, defaultDocSize);
			expect(state.tree).toContain('Some text content');
		});
	});
});


================================================
FILE: packages/core/src/page/snapshot-builder.ts
================================================
import type { CDPSession } from 'playwright';
import type {
	CDPSnapshotResult,
	AXNode,
	PageTreeNode,
	DOMRect,
} from './types.js';
import { type ElementRef, elementIndex } from '../types.js';

const INTERACTIVE_TAGS = new Set([
	'a', 'button', 'input', 'select', 'textarea', 'details', 'summary',
	'label', 'option', 'fieldset', 'legend',
]);

const INTERACTIVE_ROLES = new Set([
	'button', 'link', 'textbox', 'checkbox', 'radio', 'combobox',
	'listbox', 'menu', 'menuitem', 'menuitemcheckbox', 'menuitemradio',
	'option', 'searchbox', 'slider', 'spinbutton', 'switch', 'tab',
	'treeitem', 'gridcell', 'columnheader', 'rowheader',
]);

const INVISIBLE_TAGS = new Set([
	'script', 'style', 'link', 'meta', 'head', 'noscript', 'template',
]);

export class SnapshotBuilder {
	private indexCounter = 0;

	async captureSnapshot(cdpSession: CDPSession): Promise<{
		domSnapshot: CDPSnapshotResult;
		axTree: AXNode;
	}> {
		const [domSnapshot, axTree] = await Promise.all([
			cdpSession.send('DOMSnapshot.captureSnapshot', {
				computedStyles: ['display', 'visibility', 'opacity', 'overflow'],
				includeDOMRects: true,
				includePaintOrder: true,
			}) as Promise<unknown> as Promise<CDPSnapshotResult>,
			cdpSession.send('Accessibility.getFullAXTree', {}) as Promise<unknown> as Promise<{ nodes: AXNode[] }>,
		]);

		// Convert flat AX tree list to the root node
		const rootAx: AXNode = axTree.nodes?.[0] ?? {
			nodeId: '0',
			role: { value: 'WebArea' },
		};

		return { domSnapshot, axTree: rootAx };
	}

	buildTree(
		snapshot: CDPSnapshotResult,
		axTree: AXNode,
		viewportSize: { width: number; height: number },
		capturedAttributes: string[] = [],
	): { root: PageTreeNode; indexCounter: number } {
		this.indexCounter = 0;
		const doc = snapshot.documents[0];
		if (!doc) {
			return {
				root: this.createEmptyNode(),
				indexCounter: 0,
			};
		}

		const { nodes, layout, strings } = doc;

		// Build backend node ID → AX node map
		const axNodeMap = new Map<number, AXNode>();
		this.buildAXMap(axTree, axNodeMap);

		// Build layout index map
		const layoutMap = new Map<number, { bounds: number[]; text?: string; paintOrder?: number }>();
		for (let i = 0; i < layout.nodeIndex.length; i++) {
			const nodeIdx = layout.nodeIndex[i];
			layoutMap.set(nodeIdx, {
				bounds: layout.bounds[i],
				text: layout.text[i] !== -1 ? strings[layout.text[i]] : undefined,
				paintOrder: layout.paintOrder?.[i],
			});
		}

		// Build clickable set
		const clickableSet = new Set<number>();
		if (nodes.isClickable) {
			for (const idx of nodes.isClickable.index) {
				clickableSet.add(idx);
			}
		}

		// Build input value map
		const inputValueMap = new Map<number, string>();
		if (nodes.inputValue) {
			for (let i = 0; i < nodes.inputValue.index.length; i++) {
				const nodeIdx = nodes.inputValue.index[i];
				const valueIdx = nodes.inputValue.value[i];
				inputValueMap.set(nodeIdx, strings[valueIdx]);
			}
		}

		// Build the tree recursively
		const root = this.buildNodeTree(
			0,
			nodes,
			strings,
			layoutMap,
			axNodeMap,
			clickableSet,
			inputValueMap,
			viewportSize,
			capturedAttributes,
		);

		return { root, indexCounter: this.indexCounter };
	}

	private buildNodeTree(
		nodeIndex: number,
		nodes: CDPSnapshotResult['documents'][0]['nodes'],
		strings: string[],
		layoutMap: Map<number, { bounds: number[]; text?: string; paintOrder?: number }>,
		axNodeMap: Map<number, AXNode>,
		clickableSet: Set<number>,
		inputValueMap: Map<number, string>,
		viewportSize: { width: number; height: number },
		capturedAttributes: string[],
	): PageTreeNode {
		const nodeType = nodes.nodeType[nodeIndex];
		const tagName = strings[nodes.nodeName[nodeIndex]]?.toLowerCase() ?? '';
		const backendNodeId = nodes.backendNodeId[nodeIndex];

		// Check layout
		const layoutInfo = layoutMap.get(nodeIndex);
		let rect: DOMRect | undefined;
		let isVisible = false;

		if (layoutInfo) {
			const [x, y, w, h] = layoutInfo.bounds;
			rect = { x, y, width: w, height: h };
			isVisible = w > 0 && h > 0 && !INVISIBLE_TAGS.has(tagName);
		}

		// Parse attributes
		const rawAttrs = nodes.attributes[nodeIndex] ?? [];
		const attributes: Record<string, string> = {};
		for (let i = 0; i < rawAttrs.length; i += 2) {
			const name = strings[rawAttrs[i]];
			const value = strings[rawAttrs[i + 1]];
			if (name && (capturedAttributes.length === 0 || capturedAttributes.includes(name))) {
				attributes[name] = value ?? '';
			}
		}

		// Get AX info
		const axNode = axNodeMap.get(backendNodeId);
		const role = axNode?.role?.value;
		const ariaLabel = axNode?.name?.value;

		// Determine interactivity
		const isInteractive =
			INTERACTIVE_TAGS.has(tagName) ||
			(role ? INTERACTIVE_ROLES.has(role) : false) ||
			clickableSet.has(nodeIndex) ||
			attributes['tabindex'] !== undefined ||
			attributes['contenteditable'] === 'true';

		const isEditable =
			tagName === 'input' ||
			tagName === 'textarea' ||
			attributes['contenteditable'] === 'true' ||
			role === 'textbox' ||
			role === 'searchbox';

		const isScrollable =
			tagName === 'body' || tagName === 'html' || attributes['role'] === 'scrollbar';

		// Build node
		const node: PageTreeNode = {
			tagName,
			nodeType: nodeType === 3 ? 'text' : 'element',
			text: nodeType === 3 ? strings[nodes.nodeValue[nodeIndex]] : layoutInfo?.text,
			attributes,
			children: [],
			isVisible,
			rect,
			role: role && role !== 'none' && role !== 'generic' ? role : undefined,
			ariaLabel,
			isInteractive,
			isClickable: clickableSet.has(nodeIndex) || INTERACTIVE_TAGS.has(tagName),
			isEditable,
			isScrollable,
			backendNodeId,
			paintOrder: layoutInfo?.paintOrder,
			inputValue: inputValueMap.get(nodeIndex),
		};

		// Assign highlight index for interactive/visible elements
		if (isInteractive && isVisible) {
			node.highlightIndex = elementIndex(this.indexCounter++);
		}

		// Build children
		const childIndexes: number[] = nodes.childNodeIndexes?.[nodeIndex] ?? [];
		for (const childIdx of childIndexes) {
			const child = this.buildNodeTree(
				childIdx,
				nodes,
				strings,
				layoutMap,
				axNodeMap,
				clickableSet,
				inputValueMap,
				viewportSize,
				capturedAttributes,
			);
			child.parentNode = node;
			node.children.push(child);
		}

		return node;
	}

	private buildAXMap(node: AXNode, map: Map<number, AXNode>): void {
		if (node.backendDOMNodeId) {
			map.set(node.backendDOMNodeId, node);
		}
		if (node.children) {
			for (const child of node.children) {
				this.buildAXMap(child, map);
			}
		}
	}

	private createEmptyNode(): PageTreeNode {
		return {
			tagName: 'html',
			nodeType: 'element',
			attributes: {},
			children: [],
			isVisible: false,
			isInteractive: false,
			isClickable: false,
			isEditable: false,
			isScrollable: false,
		};
	}
}


================================================
FILE: packages/core/src/page/types.ts
================================================
import type { ElementRef } from '../types.js';

export interface DOMRect {
	x: number;
	y: number;
	width: number;
	height: number;
}

export interface TargetInfo {
	targetId: string;
	type: 'page' | 'iframe' | 'worker' | 'other';
	url: string;
	title?: string;
	attached: boolean;
}

export interface TargetAllTrees {
	mainTree: PageTreeNode;
	iframeTrees: Array<{
		targetInfo: TargetInfo;
		tree: PageTreeNode;
		parentNodeId?: number;
	}>;
}

export interface InteractedElement {
	index: ElementRef;
	tagName: string;
	text?: string;
	role?: string;
	ariaLabel?: string;
	action: string;
	timestamp: number;
}

export const MatchLevel = {
	EXACT: 'exact',
	PARTIAL: 'partial',
	FUZZY: 'fuzzy',
	NONE: 'none',
} as const;
export type MatchLevel = (typeof MatchLevel)[keyof typeof MatchLevel];

export interface SimplifiedNode {
	tag: string;
	text?: string;
	attrs: Record<string, string>;
	children: SimplifiedNode[];
	index?: ElementRef;
	isInteractive: boolean;
}

export interface PageTreeNode {
	tagName: string;
	nodeType: 'element' | 'text';
	text?: string;
	attributes: Record<string, string>;
	children: PageTreeNode[];

	// Layout info
	isVisible: boolean;
	rect?: DOMRect;

	// A11y info
	role?: string;
	ariaLabel?: string;
	ariaExpanded?: boolean;

	// Interaction info
	isInteractive: boolean;
	isClickable: boolean;
	isEditable: boolean;
	isScrollable: boolean;

	// Index for LLM reference
	highlightIndex?: ElementRef;

	// Parent reference (not serialized)
	parentNode?: PageTreeNode;

	// CDP node info
	backendNodeId?: number;
	nodeId?: number;

	// Selector info
	cssSelector?: string;
	xpath?: string;

	// Shadow DOM
	isShadowRoot?: boolean;
	shadowChildren?: PageTreeNode[];

	// Input state
	inputValue?: string;
	isChecked?: boolean;
	selectedOption?: string;

	// Paint order for z-index filtering
	paintOrder?: number;
}

export interface SelectorIndex {
	[index: number]: {
		cssSelector: string;
		xpath?: string;
		backendNodeId?: number;
		tagName: string;
		role?: string;
		ariaLabel?: string;
		text?: string;
	};
}

export interface RenderedPageState {
	tree: string;
	selectorMap: SelectorIndex;
	elementCount: number;
	interactiveElementCount: number;
	scrollPosition: { x: number; y: number };
	viewportSize: { width: number; height: number };
	documentSize: { width: number; height: number };
	pixelsAbove: number;
	pixelsBelow: number;
}

export interface CDPDOMNode {
	nodeType: number;
	nodeName: string;
	nodeValue: string;
	backendNodeId: number;
	childNodeIndexes?: number[];
	attributes?: string[];
	parentIndex?: number;
	contentDocumentIndex?: number;
	shadowRootType?: string;
	isClickable?: boolean;
	inputValue?: { value: string; type?: string };
	currentSourceURL?: string;
	textValue?: string;
	layoutNodeIndex?: number;
}

export interface CDPLayoutNode {
	nodeIndex: number;
	bounds: number[];
	text?: string;
	stackingContexts?: { index: number }[];
	paintOrder?: number;
	isStackingContext?: boolean;
}

export interface CDPSnapshotResult {
	documents: Array<{
		nodes: {
			nodeType: number[];
			nodeName: number[];
			nodeValue: number[];
			backendNodeId: number[];
			childNodeIndexes?: number[][];
			attributes: Array<number[]>;
			parentIndex: number[];
			contentDocumentIndex?: { index: number[] };
			shadowRootType?: { index: number[]; value: number[] };
			isClickable?: { index: number[] };
			inputValue?: { index: number[]; value: number[] };
			currentSourceURL?: { index: number[]; value: number[] };
		};
		layout: {
			nodeIndex: number[];
			bounds: number[][];
			text: number[];
			stackingContexts?: { index: number[] };
			paintOrder?: number[];
			styles: number[][];
		};
		textBoxes: {
			layoutIndex: number[];
			bounds: number[][];
		};
		strings: string[];
	}>;
}

export interface AXNode {
	nodeId: string;
	role: { value: string };
	name?: { value: string };
	description?: { value: string };
	value?: { value: string };
	properties?: Array<{
		name: string;
		value: { value: unknown };
	}>;
	children?: AXNode[];
	backendDOMNodeId?: number;
	ignored?: boolean;
}


================================================
FILE: packages/core/src/sandbox/file-access.ts
================================================
import * as fs from 'node:fs';
import * as path from 'node:path';
import { createLogger } from '../logging.js';

const logger = createLogger('filesystem');

const ALLOWED_EXTENSIONS = new Set([
	'.txt', '.md', '.json', '.csv', '.html', '.xml', '.yaml', '.yml',
	'.js', '.ts', '.py', '.rb', '.go', '.rs', '.java', '.c', '.cpp',
	'.css', '.scss', '.less', '.svg', '.log', '.env', '.toml', '.ini',
	'.sh', '.bash', '.zsh', '.sql', '.graphql',
]);

const MAX_FILE_SIZE = 10 * 1024 * 1024; // 10MB

export interface FileAccessOptions {
	sandboxDir: string;
	allowedExtensions?: Set<string>;
	maxFileSize?: number;
	readOnly?: boolean;
}

export interface FileInfo {
	name: string;
	path: string;
	size: number;
	isDirectory: boolean;
	modifiedAt: Date;
	extension: string;
}

export interface FileAccessState {
	files: Map<string, FileInfo>;
	totalSize: number;
	operationCount: number;
}

export class FileAccess {
	private sandboxDir: string;
	private allowedExtensions: Set<string>;
	private maxFileSize: number;
	private readOnly: boolean;
	private state: FileAccessState;

	constructor(options: FileAccessOptions) {
		this.sandboxDir = path.resolve(options.sandboxDir);
		this.allowedExtensions = options.allowedExtensions ?? ALLOWED_EXTENSIONS;
		this.maxFileSize = options.maxFileSize ?? MAX_FILE_SIZE;
		this.readOnly = options.readOnly ?? false;

		this.state = {
			files: new Map(),
			totalSize: 0,
			operationCount: 0,
		};

		// Ensure sandbox directory exists
		if (!fs.existsSync(this.sandboxDir)) {
			fs.mkdirSync(this.sandboxDir, { recursive: true });
		}

		// Index existing files
		this.indexDirectory();
	}

	private indexDirectory(): void {
		try {
			const entries = fs.readdirSync(this.sandboxDir, { withFileTypes: true });
			for (const entry of entries) {
				const fullPath = path.join(this.sandboxDir, entry.name);
				if (entry.isFile()) {
					const stat = fs.statSync(fullPath);
					this.state.files.set(entry.name, {
						name: entry.name,
						path: fullPath,
						size: stat.size,
						isDirectory: false,
						modifiedAt: stat.mtime,
						extension: path.extname(entry.name).toLowerCase(),
					});
					this.state.totalSize += stat.size;
				}
			}
		} catch {
			logger.debug('Failed to index sandbox directory');
		}
	}

	private resolvePath(relativePath: string): string {
		const resolved = path.resolve(this.sandboxDir, relativePath);
		// Prevent path traversal
		if (!resolved.startsWith(this.sandboxDir)) {
			throw new Error(`Path traversal detected: ${relativePath}`);
		}
		return resolved;
	}

	private validateExtension(filePath: string): void {
		const ext = path.extname(filePath).toLowerCase();
		if (!this.allowedExtensions.has(ext)) {
			throw new Error(
				`File extension "${ext}" is not allowed. Allowed: ${[...this.allowedExtensions].join(', ')}`,
			);
		}
	}

	private isBinaryFile(filePath: string): boolean {
		const ext = path.extname(filePath).toLowerCase();
		const binaryExts = new Set([
			'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.ico', '.webp',
			'.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx',
			'.zip', '.tar', '.gz', '.7z', '.rar',
			'.exe', '.dll', '.so', '.dylib',
			'.mp3', '.mp4', '.avi', '.mkv', '.wav',
			'.woff', '.woff2', '.ttf', '.eot',
		]);
		return binaryExts.has(ext);
	}

	async read(relativePath: string): Promise<string> {
		const fullPath = this.resolvePath(relativePath);

		if (!fs.existsSync(fullPath)) {
			throw new Error(`File not found: ${relativePath}`);
		}

		if (this.isBinaryFile(fullPath)) {
			throw new Error(`Cannot read binary file: ${relativePath}`);
		}

		const stat = fs.statSync(fullPath);
		if (stat.size > this.maxFileSize) {
			throw new Error(
				`File too large: ${(stat.size / 1024 / 1024).toFixed(1)}MB (max: ${(this.maxFileSize / 1024 / 1024).toFixed(1)}MB)`,
			);
		}

		this.state.operationCount++;
		logger.debug(`Read file: ${relativePath} (${stat.size} bytes)`);
		return fs.readFileSync(fullPath, 'utf-8');
	}

	async write(relativePath: string, content: string): Promise<void> {
		if (this.readOnly) {
			throw new Error('File system is read-only');
		}

		const fullPath = this.resolvePath(relativePath);
		this.validateExtension(fullPath);

		const contentSize = Buffer.byteLength(content, 'utf-8');
		if (contentSize > this.maxFileSize) {
			throw new Error(`Content too large: ${(contentSize / 1024 / 1024).toFixed(1)}MB`);
		}

		// Ensure parent directory exists
		const dir = path.dirname(fullPath);
		if (!fs.existsSync(dir)) {
			fs.mkdirSync(dir, { recursive: true });
		}

		fs.writeFileSync(fullPath, content, 'utf-8');

		const info: FileInfo = {
			name: path.basename(relativePath),
			path: fullPath,
			size: contentSize,
			isDirectory: false,
			modifiedAt: new Date(),
			extension: path.extname(relativePath).toLowerCase(),
		};

		this.state.files.set(relativePath, info);
		this.state.totalSize += contentSize;
		this.state.operationCount++;
		logger.debug(`Wrote file: ${relativePath} (${contentSize} bytes)`);
	}

	async list(relativeDir = '.'): Promise<FileInfo[]> {
		const fullPath = this.resolvePath(relativeDir);

		if (!fs.existsSync(fullPath)) {
			return [];
		}

		const entries = fs.readdirSync(fullPath, { withFileTypes: true });
		const result: FileInfo[] = [];

		for (const entry of entries) {
			const entryPath = path.join(fullPath, entry.name);
			const stat = fs.statSync(entryPath);
			result.push({
				name: entry.name,
				path: entryPath,
				size: stat.size,
				isDirectory: entry.isDirectory(),
				modifiedAt: stat.mtime,
				extension: path.extname(entry.name).toLowerCase(),
			});
		}

		this.state.operationCount++;
		return result;
	}

	async delete(relativePath: string): Promise<void> {
		if (this.readOnly) {
			throw new Error('File system is read-only');
		}

		const fullPath = this.resolvePath(relativePath);

		if (!fs.existsSync(fullPath)) {
			throw new Error(`File not found: ${relativePath}`);
		}

		const stat = fs.statSync(fullPath);
		fs.unlinkSync(fullPath);

		this.state.files.delete(relativePath);
		this.state.totalSize -= stat.size;
		this.state.operationCount++;
		logger.debug(`Deleted file: ${relativePath}`);
	}

	async exists(relativePath: string): Promise<boolean> {
		const fullPath = this.resolvePath(relativePath);
		return fs.existsSync(fullPath);
	}

	getState(): FileAccessState {
		return {
			files: new Map(this.state.files),
			totalSize: this.state.totalSize,
			operationCount: this.state.operationCount,
		};
	}

	getSandboxDir(): string {
		return this.sandboxDir;
	}
}


================================================
FILE: packages/core/src/sandbox/index.ts
================================================
export { FileAccess, type FileAccessOptions, type FileInfo, type FileAccessState } from './file-access.js';


================================================
FILE: packages/core/src/telemetry.ts
================================================
import { createLogger } from './logging.js';

const logger = createLogger('perf');

export interface TimingResult<T> {
	result: T;
	durationMs: number;
}

/**
 * Wraps an async function to measure and log its execution time.
 * Returns the result along with timing information.
 */
export async function timed<T>(
	label: string,
	fn: () => Promise<T>,
): Promise<TimingResult<T>> {
	const start = performance.now();
	try {
		const result = await fn();
		const durationMs = performance.now() - start;
		logger.debug(`${label}: ${durationMs.toFixed(1)}ms`);
		return { result, durationMs };
	} catch (error) {
		const durationMs = performance.now() - start;
		logger.debug(`${label}: FAILED after ${durationMs.toFixed(1)}ms`);
		throw error;
	}
}

/**
 * Creates a decorator-style wrapper that times all calls to the provided function.
 */
export function withTiming<Args extends unknown[], R>(
	label: string,
	fn: (...args: Args) => Promise<R>,
): (...args: Args) => Promise<R> {
	return async (...args: Args): Promise<R> => {
		const { result } = await timed(label, () => fn(...args));
		return result;
	};
}

/**
 * Simple stopwatch for manual timing control.
 */
export class Stopwatch {
	private startTime: number;
	private splits: Array<{ label: string; timeMs: number }> = [];

	constructor() {
		this.startTime = performance.now();
	}

	split(label: string): number {
		const elapsed = performance.now() - this.startTime;
		this.splits.push({ label, timeMs: elapsed });
		return elapsed;
	}

	elapsed(): number {
		return performance.now() - this.startTime;
	}

	reset(): void {
		this.startTime = performance.now();
		this.splits = [];
	}

	getSplits(): Array<{ label: string; timeMs: number }> {
		return [...this.splits];
	}

	summary(): string {
		const lines = this.splits.map(
			(s) => `  ${s.label}: ${s.timeMs.toFixed(1)}ms`,
		);
		lines.push(`  total: ${this.elapsed().toFixed(1)}ms`);
		return lines.join('\n');
	}
}


================================================
FILE: packages/core/src/types.ts
================================================
import { z } from 'zod';

// ── Branded types for compile-time safety ──

declare const __brand: unique symbol;
type Brand<T, B extends string> = T & { readonly [__brand]: B };

export type TargetId = Brand<string, 'TargetId'>;
export type SessionId = Brand<string, 'SessionId'>;
export type ElementRef = Brand<number, 'ElementRef'>;
export type TabId = Brand<number, 'TabId'>;

export function targetId(id: string): TargetId {
	return id as TargetId;
}

export function sessionId(id: string): SessionId {
	return id as SessionId;
}

export function elementIndex(index: number): ElementRef {
	return index as ElementRef;
}

export function tabId(id: number): TabId {
	return id as TabId;
}

// ── Result type for error handling ──

export type Result<T, E = Error> = { ok: true; value: T } | { ok: false; error: E };

export function ok<T>(value: T): Result<T, never> {
	return { ok: true, value };
}

export function err<E>(error: E): Result<never, E> {
	return { ok: false, error };
}

// ── Position & geometry ──

export const PositionSchema = z.object({
	x: z.number(),
	y: z.number(),
});
export type Position = z.infer<typeof PositionSchema>;

export const RectSchema = z.object({
	x: z.number(),
	y: z.number(),
	width: z.number(),
	height: z.number(),
});
export type Rect = z.infer<typeof RectSchema>;

// ── Common enums ──

export const LogLevel = {
	DEBUG: 0,
	INFO: 1,
	WARN: 2,
	ERROR: 3,
} as const;
export type LogLevel = (typeof LogLevel)[keyof typeof LogLevel];

// ── Utility types ──

export type DeepPartial<T> = {
	[P in keyof T]?: T[P] extends object ? DeepPartial<T[P]> : T[P];
};

export type Awaitable<T> = T | Promise<T>;


================================================
FILE: packages/core/src/utils.ts
================================================
import { nanoid } from 'nanoid';

// ── ID generation ──

export function generateId(size = 12): string {
	return nanoid(size);
}

// ── URL matching ──

export function matchesUrlPattern(url: string, pattern: string): boolean {
	if (pattern === '*') return true;

	try {
		const urlObj = new URL(url);
		const patternObj = new URL(pattern.includes('://') ? pattern : `https://${pattern}`);

		if (patternObj.hostname.startsWith('*.')) {
			const baseDomain = patternObj.hostname.slice(2);
			if (!urlObj.hostname.endsWith(baseDomain) && urlObj.hostname !== baseDomain) {
				return false;
			}
		} else if (urlObj.hostname !== patternObj.hostname) {
			return false;
		}

		if (patternObj.pathname !== '/' && patternObj.pathname !== '/*') {
			const patternPath = patternObj.pathname.replace(/\*/g, '.*');
			const regex = new RegExp(`^${patternPath}`);
			if (!regex.test(urlObj.pathname)) {
				return false;
			}
		}

		return true;
	} catch {
		return url.includes(pattern);
	}
}

export function isUrlPermitted(
	url: string,
	allowedUrls?: string[],
	blockedUrls?: string[],
): boolean {
	if (blockedUrls?.some((pattern) => matchesUrlPattern(url, pattern))) {
		return false;
	}
	if (allowedUrls && allowedUrls.length > 0) {
		return allowedUrls.some((pattern) => matchesUrlPattern(url, pattern));
	}
	return true;
}

// ── Text utilities ──

export function sanitizeText(text: string): string {
	return text
		.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, '')
		.replace(/\s+/g, ' ')
		.trim();
}

export function truncateText(text: string, maxLength: number, suffix = '...'): string {
	if (text.length <= maxLength) return text;
	return text.slice(0, maxLength - suffix.length) + suffix;
}

export function removeTags(html: string): string {
	return html.replace(/<[^>]*>/g, '');
}

// ── Timing ──

export function sleep(ms: number): Promise<void> {
	return new Promise((resolve) => setTimeout(resolve, ms));
}

export async function withDeadline<T>(
	promise: Promise<T>,
	ms: number,
	message = 'Operation timed out',
): Promise<T> {
	const timer = new Promise<never>((_, reject) =>
		setTimeout(() => reject(new Error(message)), ms),
	);
	return Promise.race([promise, timer]);
}

export class Timer {
	private startTime: number;

	constructor() {
		this.startTime = Date.now();
	}

	elapsed(): number {
		return Date.now() - this.startTime;
	}

	elapsedSeconds(): number {
		return this.elapsed() / 1000;
	}

	reset(): void {
		this.startTime = Date.now();
	}
}

// ── Retry ──

export interface RetryOptions {
	maxRetries: number;
	initialDelayMs: number;
	maxDelayMs: number;
	backoffFactor: number;
}

const DEFAULT_RETRY: RetryOptions = {
	maxRetries: 3,
	initialDelayMs: 1000,
	maxDelayMs: 30000,
	backoffFactor: 2,
};

export async function withRetry<T>(
	fn: () => Promise<T>,
	options: Partial<RetryOptions> = {},
): Promise<T> {
	const opts = { ...DEFAULT_RETRY, ...options };
	let lastError: Error | undefined;
	let delay = opts.initialDelayMs;

	for (let attempt = 0; attempt <= opts.maxRetries; attempt++) {
		try {
			return await fn();
		} catch (error) {
			lastError = error instanceof Error ? error : new Error(String(error));
			if (attempt < opts.maxRetries) {
				await sleep(Math.min(delay, opts.maxDelayMs));
				delay *= opts.backoffFactor;
			}
		}
	}

	throw lastError;
}

// ── Misc ──

export function groupBy<T, K extends string | number>(
	items: T[],
	keyFn: (item: T) => K,
): Record<K, T[]> {
	return items.reduce(
		(acc, item) => {
			const key = keyFn(item);
			(acc[key] ??= []).push(item);
			return acc;
		},
		{} as Record<K, T[]>,
	);
}

export function dedent(str: string): string {
	const lines = str.split('\n');
	if (lines[0]?.trim() === '') lines.shift();
	if (lines[lines.length - 1]?.trim() === '') lines.pop();

	const minIndent = lines
		.filter((line) => line.trim().length > 0)
		.reduce((min, line) => {
			const match = line.match(/^(\s*)/);
			return Math.min(min, match ? match[1].length : 0);
		}, Number.POSITIVE_INFINITY);

	if (minIndent === Number.POSITIVE_INFINITY) return str;
	return lines.map((line) => line.slice(minIndent)).join('\n');
}

// ── URL utilities ──

/**
 * Match a URL against a domain pattern like "*.example.com" or "example.com/path/*".
 * More comprehensive than matchesUrlPattern — handles port stripping, www normalization.
 */
export function matchUrlWithDomainPattern(url: string, pattern: string): boolean {
	try {
		const urlObj = new URL(url);
		const urlHost = urlObj.hostname.replace(/^www\./, '');

		// Pattern can be a plain domain, wildcard domain, or full URL pattern
		if (pattern.startsWith('*.')) {
			const base = pattern.slice(2);
			return urlHost === base || urlHost.endsWith(`.${base}`);
		}

		// Try parsing as URL
		const patternHost = pattern.includes('://')
			? new URL(pattern).hostname.replace(/^www\./, '')
			: pattern.replace(/^www\./, '').split('/')[0];

		return urlHost === patternHost;
	} catch {
		return url.includes(pattern);
	}
}

const NEW_TAB_URLS = new Set([
	'about:blank',
	'about:newtab',
	'chrome://newtab/',
	'chrome://new-tab-page/',
	'edge://newtab/',
	'about:home',
]);

export function isNewTabPage(url: string): boolean {
	return NEW_TAB_URLS.has(url) || url === '' || url === 'about:blank';
}

/**
 * Remove unpaired surrogates from a string to prevent JSON serialization issues.
 */
export function sanitizeSurrogates(text: string): string {
	return text.replace(
		/[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF]/g,
		'\uFFFD',
	);
}

const URL_REGEX = /https?:\/\/[^\s<>"{}|\\^`\[\]]+/g;

/**
 * Extract all URLs from a text string.
 */
export function extractUrls(text: string): string[] {
	return [...text.matchAll(URL_REGEX)].map((m) => m[0]);
}

/**
 * Escape special regex characters in a string.
 */
export function escapeRegExp(string: string): string {
	return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}


================================================
FILE: packages/core/src/viewport/event-hub.ts
================================================
type Handler<T = unknown> = (payload: T) => void;
type RequestHandler<Req = unknown, Res = unknown> = (payload: Req) => Promise<Res>;

export class EventHub<
	EventMap extends { [K in keyof EventMap]: EventMap[K] } = Record<string, unknown>,
	RequestMap extends { [K in keyof RequestMap]: { request: unknown; response: unknown } } = Record<
		string,
		{ request: unknown; response: unknown }
	>,
> {
	private handlers = new Map<string, Set<Handler>>();
	private requestHandlers = new Map<string, RequestHandler>();
	private history: Array<{ event: string; payload: unknown; timestamp: number }> = [];
	private maxHistory: number;

	constructor(options?: { maxHistory?: number }) {
		this.maxHistory = options?.maxHistory ?? 100;
	}

	on<K extends keyof EventMap & string>(event: K, handler: Handler<EventMap[K]>): () => void {
		if (!this.handlers.has(event)) {
			this.handlers.set(event, new Set());
		}
		this.handlers.get(event)!.add(handler as Handler);

		return () => {
			this.handlers.get(event)?.delete(handler as Handler);
		};
	}

	once<K extends keyof EventMap & string>(event: K, handler: Handler<EventMap[K]>): () => void {
		const wrappedHandler: Handler<EventMap[K]> = (payload) => {
			off();
			handler(payload);
		};
		const off = this.on(event, wrappedHandler);
		return off;
	}

	emit<K extends keyof EventMap & string>(event: K, payload: EventMap[K]): void {
		this.recordHistory(event, payload);
		const handlers = this.handlers.get(event);
		if (handlers) {
			for (const handler of handlers) {
				try {
					handler(payload);
				} catch (error) {
					console.error(`Error in event handler for "${event}":`, error);
				}
			}
		}
	}

	onRequest<K extends keyof RequestMap & string>(
		event: K,
		handler: RequestHandler<RequestMap[K]['request'], RequestMap[K]['response']>,
	): () => void {
		this.requestHandlers.set(event, handler as RequestHandler);
		return () => {
			this.requestHandlers.delete(event);
		};
	}

	async request<K extends keyof RequestMap & string>(
		event: K,
		payload: RequestMap[K]['request'],
		timeoutMs = 30000,
	): Promise<RequestMap[K]['response']> {
		const handler = this.requestHandlers.get(event);
		if (!handler) {
			throw new Error(`No handler registered for request "${event}"`);
		}

		const result = await Promise.race([
			handler(payload),
			new Promise<never>((_, reject) =>
				setTimeout(() => reject(new Error(`Request "${event}" timed out after ${timeoutMs}ms`)), timeoutMs),
			),
		]);

		return result as RequestMap[K]['response'];
	}

	off<K extends keyof EventMap & string>(event: K, handler?: Handler<EventMap[K]>): void {
		if (handler) {
			this.handlers.get(event)?.delete(handler as Handler);
		} else {
			this.handlers.delete(event);
		}
	}

	removeAllListeners(): void {
		this.handlers.clear();
		this.requestHandlers.clear();
	}

	getHistory(event?: string): Array<{ event: string; payload: unknown; timestamp: number }> {
		if (event) {
			return this.history.filter((h) => h.event === event);
		}
		return [...this.history];
	}

	clearHistory(): void {
		this.history = [];
	}

	private recordHistory(event: string, payload: unknown): void {
		this.history.push({ event, payload, timestamp: Date.now() });
		if (this.history.length > this.maxHistory) {
			this.history = this.history.slice(-this.maxHistory);
		}
	}
}


================================================
FILE: packages/core/src/viewport/events.ts
================================================
import type { ElementRef } from '../types.js';

// ── Event payload types ──

export interface NavigateEvent {
	url: string;
	waitUntil?: 'load' | 'domcontentloaded' | 'networkidle';
}

export interface ClickEvent {
	elementIndex: ElementRef;
	clickCount?: number;
}

export interface InputEvent {
	elementIndex: ElementRef;
	text: string;
	clearFirst?: boolean;
}

export interface SelectOptionEvent {
	elementIndex: ElementRef;
	value: string;
}

export interface ScrollEvent {
	direction: 'up' | 'down';
	amount?: number;
	elementIndex?: ElementRef;
}

export interface ScreenshotEvent {
	fullPage?: boolean;
}

export interface ScreenshotResult {
	base64: string;
	width: number;
	height: number;
}

export interface TabSwitchEvent {
	tabIndex: number;
}

export interface FileUploadEvent {
	elementIndex: ElementRef;
	filePaths: string[];
}

export interface KeyPressEvent {
	key: string;
}

export interface BrowserStateEvent {
	url: string;
	title: string;
	tabCount: number;
}

export interface DownloadEvent {
	url: string;
	suggestedFilename: string;
	path?: string;
}

export interface PopupEvent {
	url: string;
	type: 'popup' | 'dialog';
}

export interface SecurityEvent {
	type: 'navigation-blocked' | 'download-blocked' | 'popup-blocked';
	url: string;
	reason: string;
}

export interface CrashEvent {
	reason: string;
}

// ── Event map ──

export interface ViewportEventMap {
	'navigation': NavigateEvent;
	'click': ClickEvent;
	'input': InputEvent;
	'selection': SelectOptionEvent;
	'scroll': ScrollEvent;
	'capture': ScreenshotEvent;
	'capture-result': ScreenshotResult;
	'tab-changed': TabSwitchEvent;
	'tab-closed': { tabIndex: number };
	'tab-opened': { url: string };
	'file-uploaded': FileUploadEvent;
	'keystroke': KeyPressEvent;
	'viewport-state': BrowserStateEvent;
	'download': DownloadEvent;
	'popup': PopupEvent;
	'policy-violation': SecurityEvent;
	'crash': CrashEvent;
	'page-ready': { url: string };
	'content-ready': void;
	'shutdown': void;
}

// ── Request-response event map ──

export interface ViewportRequestMap {
	'get-screenshot': { request: ScreenshotEvent; response: ScreenshotResult };
	'get-state': { request: void; response: BrowserStateEvent };
}


================================================
FILE: packages/core/src/viewport/guard-base.ts
================================================
import type { Page, BrowserContext } from 'playwright';
import type { EventHub } from './event-hub.js';
import type { ViewportEventMap, ViewportRequestMap } from './events.js';

export interface GuardContext {
	page: Page;
	context: BrowserContext;
	eventBus: EventHub<ViewportEventMap, ViewportRequestMap>;
}

/**
 * Base class for browser watchdogs that monitor and react to browser events.
 * Each watchdog handles a specific concern (security, popups, downloads, etc.).
 */
export abstract class BaseGuard {
	protected ctx!: GuardContext;
	protected cleanupFns: Array<() => void> = [];
	private _active = false;

	get active(): boolean {
		return this._active;
	}

	abstract readonly name: string;
	abstract readonly priority: number;

	async attach(ctx: GuardContext): Promise<void> {
		this.ctx = ctx;
		this._active = true;
		await this.setup();
	}

	async detach(): Promise<void> {
		this._active = false;
		for (const cleanup of this.cleanupFns) {
			try {
				cleanup();
			} catch {
				// Ignore cleanup errors
			}
		}
		this.cleanupFns = [];
		await this.teardown();
	}

	protected abstract setup(): Promise<void>;

	protected async teardown(): Promise<void> {
		// Override if needed
	}

	protected onEvent<K extends keyof ViewportEventMap & string>(
		event: K,
		handler: (payload: ViewportEventMap[K]) => void,
	): void {
		const off = this.ctx.eventBus.on(event, handler);
		this.cleanupFns.push(off);
	}
}


================================================
FILE: packages/core/src/viewport/guards/blank-page.ts
================================================
import { BaseGuard } from '../guard-base.js';

/**
 * Handles about:blank pages. If the page navigates to about:blank,
 * attempts to navigate back to the previous page.
 */
export class BlankPageGuard extends BaseGuard {
	readonly name = 'about-blank';
	readonly priority = 400;

	protected async setup(): Promise<void> {
		const handler = () => {
			const url = this.ctx.page.url();
			if (url === 'about:blank') {
				this.ctx.page.goBack().catch(() => {
					// Cannot go back; ignore
				});
			}
		};

		this.ctx.page.on('framenavigated', handler);
		this.cleanupFns.push(() => this.ctx.page.off('framenavigated', handler));
	}
}


================================================
FILE: packages/core/src/viewport/guards/crash.ts
================================================
import { BaseGuard } from '../guard-base.js';

/**
 * Monitors for browser page crashes. Emits crash events
 * and attempts recovery by creating a new page.
 */
export class CrashGuard extends BaseGuard {
	readonly name = 'crash';
	readonly priority = 500;

	protected async setup(): Promise<void> {
		const handler = () => {
			this.ctx.eventBus.emit('crash', {
				reason: 'Page crashed unexpectedly',
			});

			// Attempt recovery by creating a new page
			this.ctx.context
				.newPage()
				.then((newPage) => {
					this.ctx.page = newPage;
				})
				.catch(() => {
					// Recovery failed; context may be closed
				});
		};

		this.ctx.page.on('crash', handler);
		this.cleanupFns.push(() => this.ctx.page.off('crash', handler));
	}
}


================================================
FILE: packages/core/src/viewport/guards/default-handler.ts
================================================
import type { Dialog } from 'playwright';
import { BaseGuard } from '../guard-base.js';

/**
 * Monitors for default browser actions that need to be handled,
 * such as catching unhandled dialogs and auto-dismissing them.
 */
export class DefaultHandlerGuard extends BaseGuard {
	readonly name = 'default-action';
	readonly priority = 100;

	protected async setup(): Promise<void> {
		const handler = async (dialog: Dialog) => {
			this.ctx.eventBus.emit('popup', {
				url: this.ctx.page.url(),
				type: 'dialog',
			});
			try {
				await dialog.accept();
			} catch {
				// Dialog may already be dismissed
			}
		};

		this.ctx.page.on('dialog', handler);
		this.cleanupFns.push(() => this.ctx.page.off('dialog', handler));
	}
}


================================================
FILE: packages/core/src/viewport/guards/downloads.ts
================================================
import type { Download } from 'playwright';
import * as fs from 'node:fs';
import * as path from 'node:path';
import * as crypto from 'node:crypto';
import { BaseGuard } from '../guard-base.js';
import { createLogger } from '../../logging.js';

const logger = createLogger('watchdog:downloads');

// ── Options ──

export interface DownloadGuardOptions {
	/** Directory to save downloads to. Defaults to OS temp directory. */
	downloadsPath?: string;
	/** Automatically accept all downloads without prompting. Defaults to true. */
	autoAccept?: boolean;
	/** Settings for PDF printing when a page triggers a print-to-PDF download. */
	pdfSettings?: {
		printBackground: boolean;
		landscape: boolean;
	};
}

// ── Download tracking ──

export type DownloadStatus = 'started' | 'completed' | 'failed';

export interface DownloadInfo {
	url: string;
	suggestedFilename: string;
	savedPath?: string;
	status: DownloadStatus;
	startTime: number;
	endTime?: number;
	fileSize?: number;
}

// ── Watchdog ──

/**
 * Monitors for file downloads with full lifecycle tracking.
 *
 * Features:
 * - Configures CDP download behavior for reliable acceptance
 * - Tracks every download from start to completion/failure
 * - Deduplicates filenames with UUID suffixes when collisions occur
 * - Provides download history and a promise-based wait API
 */
export class DownloadGuard extends BaseGuard {
	readonly name = 'downloads';
	readonly priority = 300;

	private readonly options: Required<DownloadGuardOptions>;
	private readonly downloads = new Map<string, DownloadInfo>();
	private downloadCounter = 0;

	/**
	 * Listeners waiting for the next download to complete.
	 * Each call to `waitForDownload` pushes a resolver here;
	 * it is removed once a download completes or the timeout fires.
	 */
	private pendingWaiters: Array<{
		resolve: (info: DownloadInfo) => void;
		reject: (err: Error) => void;
		timer: ReturnType<typeof setTimeout>;
	}> = [];

	constructor(options?: DownloadGuardOptions) {
		super();
		const defaultPath = path.join(
			(typeof process !== 'undefined' && process.env.TMPDIR) || '/tmp',
			'open-browser-downloads',
		);
		this.options = {
			downloadsPath: options?.downloadsPath ?? defaultPath,
			autoAccept: options?.autoAccept ?? true,
			pdfSettings: options?.pdfSettings ?? {
				printBackground: true,
				landscape: false,
			},
		};
	}

	// ── Setup / Teardown ──

	protected async setup(): Promise<void> {
		// Ensure the downloads directory exists.
		this.ensureDownloadsDir();

		// Try to enable CDP-level auto-accept so the browser never shows a
		// "Save As" dialog, even for cross-origin downloads.
		await this.configureCdpDownloadBehavior();

		// Listen for Playwright download events on the page.
		const handler = (download: Download) => {
			this.handleDownload(download).catch((err) => {
				logger.error('Unhandled error processing download', err);
			});
		};

		this.ctx.page.on('download', handler);
		this.cleanupFns.push(() => this.ctx.page.off('download', handler));

		logger.debug(`Downloads watchdog active – saving to ${this.options.downloadsPath}`);
	}

	protected async teardown(): Promise<void> {
		// Reject any pending waiters so they don't hang forever.
		for (const waiter of this.pendingWaiters) {
			clearTimeout(waiter.timer);
			waiter.reject(new Error('DownloadGuard detached before download completed'));
		}
		this.pendingWaiters = [];
		logger.debug('Downloads watchdog detached');
	}

	// ── CDP configuration ──

	private async configureCdpDownloadBehavior(): Promise<void> {
		if (!this.options.autoAccept) return;

		try {
			const cdpSession = await this.ctx.page.context().newCDPSession(this.ctx.page);
			await (cdpSession.send('Page.setDownloadBehavior', {
				behavior: 'allow',
				downloadPath: this.options.downloadsPath,
			}) as Promise<unknown> as Promise<void>);

			this.cleanupFns.push(() => {
				cdpSession.detach().catch(() => {
					// Session may already be closed.
				});
			});

			logger.debug('CDP download behavior set to "allow"');
		} catch (err) {
			// CDP may not be available (e.g. Firefox). Fall back to Playwright-only handling.
			logger.warn('Could not set CDP download behavior – falling back to Playwright handling', err);
		}
	}

	// ── Download handler ──

	private async handleDownload(download: Download): Promise<void> {
		const id = `dl_${++this.downloadCounter}`;
		const suggestedFilename = download.suggestedFilename();
		const url = download.url();

		const info: DownloadInfo = {
			url,
			suggestedFilename,
			status: 'started',
			startTime: Date.now(),
		};
		this.downloads.set(id, info);

		logger.info(`Download started: ${suggestedFilename} (${url})`);

		// Emit the initial event so consumers know a download has begun.
		this.ctx.eventBus.emit('download', {
			url,
			suggestedFilename,
		});

		try {
			const destPath = this.resolveUniquePath(suggestedFilename);

			// Save the file to our chosen path.
			await download.saveAs(destPath);

			// Gather file size.
			let fileSize: number | undefined;
			try {
				const stat = fs.statSync(destPath);
				fileSize = stat.size;
			} catch {
				// File may have been moved/deleted by another process.
			}

			info.savedPath = destPath;
			info.status = 'completed';
			info.endTime = Date.now();
			info.fileSize = fileSize;

			const elapsed = info.endTime - info.startTime;
			logger.info(
				`Download completed: ${suggestedFilename} → ${destPath} (${formatBytes(fileSize)} in ${elapsed}ms)`,
			);

			// Emit a follow-up download event with the saved path.
			this.ctx.eventBus.emit('download', {
				url,
				suggestedFilename,
				path: destPath,
			});

			// Resolve any pending waiters.
			this.notifyWaiters(info);
		} catch (err) {
			info.status = 'failed';
			info.endTime = Date.now();

			const reason = err instanceof Error ? err.message : String(err);
			logger.error(`Download failed: ${suggestedFilename} – ${reason}`);
		}
	}

	// ── Filename collision handling ──

	/**
	 * Returns a path inside the downloads directory. If a file with the same
	 * name already exists, a short UUID is inserted before the extension.
	 */
	private resolveUniquePath(suggestedFilename: string): string {
		const candidate = path.join(this.options.downloadsPath, suggestedFilename);

		if (!fs.existsSync(candidate)) {
			return candidate;
		}

		const ext = path.extname(suggestedFilename);
		const base = path.basename(suggestedFilename, ext);
		const uuid = crypto.randomUUID().slice(0, 8);
		const uniqueName = `${base}-${uuid}${ext}`;

		logger.debug(`File "${suggestedFilename}" already exists – saving as "${uniqueName}"`);
		return path.join(this.options.downloadsPath, uniqueName);
	}

	// ── Directory helpers ──

	private ensureDownloadsDir(): void {
		if (!fs.existsSync(this.options.downloadsPath)) {
			fs.mkdirSync(this.options.downloadsPath, { recursive: true });
			logger.debug(`Created downloads directory: ${this.options.downloadsPath}`);
		}
	}

	// ── Public API ──

	/**
	 * Returns a snapshot of all tracked downloads (both in-progress and finished).
	 */
	getDownloadHistory(): DownloadInfo[] {
		return Array.from(this.downloads.values());
	}

	/**
	 * Returns a promise that resolves with the `DownloadInfo` of the next
	 * download that completes (or rejects after `timeout` ms).
	 *
	 * @param timeout Maximum milliseconds to wait. Defaults to 30 000 ms.
	 */
	waitForDownload(timeout = 30_000): Promise<DownloadInfo> {
		return new Promise<DownloadInfo>((resolve, reject) => {
			const timer = setTimeout(() => {
				this.removePendingWaiter(waiter);
				reject(new Error(`waitForDownload timed out after ${timeout}ms`));
			}, timeout);

			const waiter = { resolve, reject, timer };
			this.pendingWaiters.push(waiter);
		});
	}

	// ── Waiter helpers ──

	private notifyWaiters(info: DownloadInfo): void {
		const waiters = this.pendingWaiters.splice(0);
		for (const waiter of waiters) {
			clearTimeout(waiter.timer);
			waiter.resolve(info);
		}
	}

	private removePendingWaiter(waiter: (typeof this.pendingWaiters)[number]): void {
		const idx = this.pendingWaiters.indexOf(waiter);
		if (idx !== -1) {
			this.pendingWaiters.splice(idx, 1);
		}
	}
}

// ── Helpers ──

function formatBytes(bytes: number | undefined): string {
	if (bytes == null) return '? bytes';
	if (bytes < 1024) return `${bytes} B`;
	if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
	return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
}


================================================
FILE: packages/core/src/viewport/guards/har-capture.ts
================================================
import { writeFile, mkdir } from 'node:fs/promises';
import { dirname } from 'node:path';
import type { CDPSession } from 'playwright';
import { BaseGuard } from '../guard-base.js';

// ── HAR 1.2 types ──

interface HarRequest {
	method: string;
	url: string;
	httpVersion: string;
	headers: Array<{ name: string; value: string }>;
	queryString: Array<{ name: string; value: string }>;
	headersSize: number;
	bodySize: number;
}

interface HarResponse {
	status: number;
	statusText: string;
	httpVersion: string;
	headers: Array<{ name: string; value: string }>;
	content: {
		size: number;
		mimeType: string;
	};
	headersSize: number;
	bodySize: number;
	redirectURL: string;
}

interface HarEntry {
	startedDateTime: string;
	time: number;
	request: HarRequest;
	response: HarResponse;
	cache: Record<string, never>;
	timings: {
		send: number;
		wait: number;
		receive: number;
	};
}

interface PendingRequest {
	requestId: string;
	startTime: number;
	method: string;
	url: string;
	headers: Record<string, string>;
}

interface ResponseInfo {
	status: number;
	statusText: string;
	headers: Record<string, string>;
	mimeType: string;
	encodedDataLength: number;
}

/**
 * Records network traffic in HAR 1.2 format using CDP Network domain events.
 * On teardown, writes the complete HAR log to the configured output path.
 */
export class HarCaptureGuard extends BaseGuard {
	readonly name = 'har-recording';
	readonly priority = 500;

	private readonly outputPath: string;
	private cdpSession: CDPSession | null = null;
	private pendingRequests = new Map<string, PendingRequest>();
	private responses = new Map<string, ResponseInfo>();
	private entries: HarEntry[] = [];

	constructor(outputPath: string) {
		super();
		this.outputPath = outputPath;
	}

	protected async setup(): Promise<void> {
		this.cdpSession = await this.ctx.page.context().newCDPSession(this.ctx.page);

		await this.cdpSession.send('Network.enable');

		this.cdpSession.on('Network.requestWillBeSent', (params) => {
			const { requestId, request, timestamp } = params as {
				requestId: string;
				request: { method: string; url: string; headers: Record<string, string> };
				timestamp: number;
			};

			this.pendingRequests.set(requestId, {
				requestId,
				startTime: timestamp,
				method: request.method,
				url: request.url,
				headers: request.headers,
			});
		});

		this.cdpSession.on('Network.responseReceived', (params) => {
			const { requestId, response } = params as {
				requestId: string;
				response: {
					status: number;
					statusText: string;
					headers: Record<string, string>;
					mimeType: string;
					encodedDataLength: number;
				};
			};

			this.responses.set(requestId, {
				status: response.status,
				statusText: response.statusText,
				headers: response.headers,
				mimeType: response.mimeType,
				encodedDataLength: response.encodedDataLength,
			});
		});

		this.cdpSession.on('Network.loadingFinished', (params) => {
			const { requestId, timestamp, encodedDataLength } = params as {
				requestId: string;
				timestamp: number;
				encodedDataLength: number;
			};

			this.finalizeEntry(requestId, timestamp, encodedDataLength);
		});

		this.cdpSession.on('Network.loadingFailed', (params) => {
			const { requestId, timestamp } = params as {
				requestId: string;
				timestamp: number;
			};

			// Still record failed requests with a zero-length response
			this.finalizeEntry(requestId, timestamp, 0);
		});

		this.cleanupFns.push(() => {
			this.cdpSession?.detach().catch(() => {
				// Ignore detach errors during cleanup
			});
		});
	}

	private finalizeEntry(requestId: string, endTimestamp: number, encodedDataLength: number): void {
		const pending = this.pendingRequests.get(requestId);
		if (!pending) return;

		const response = this.responses.get(requestId);
		const elapsedMs = (endTimestamp - pending.startTime) * 1000;

		const harRequest: HarRequest = {
			method: pending.method,
			url: pending.url,
			httpVersion: 'HTTP/1.1',
			headers: toHeaderArray(pending.headers),
			queryString: parseQueryString(pending.url),
			headersSize: -1,
			bodySize: -1,
		};

		const harResponse: HarResponse = response
			? {
					status: response.status,
					statusText: response.statusText,
					httpVersion: 'HTTP/1.1',
					headers: toHeaderArray(response.headers),
					content: {
						size: encodedDataLength,
						mimeType: response.mimeType,
					},
					headersSize: -1,
					bodySize: encodedDataLength,
					redirectURL: response.headers['location'] ?? '',
				}
			: {
					status: 0,
					statusText: '',
					httpVersion: 'HTTP/1.1',
					headers: [],
					content: { size: 0, mimeType: '' },
					headersSize: -1,
					bodySize: 0,
					redirectURL: '',
				};

		this.entries.push({
			startedDateTime: new Date(pending.startTime * 1000).toISOString(),
			time: Math.max(0, elapsedMs),
			request: harRequest,
			response: harResponse,
			cache: {},
			timings: {
				send: 0,
				wait: Math.max(0, elapsedMs),
				receive: 0,
			},
		});

		this.pendingRequests.delete(requestId);
		this.responses.delete(requestId);
	}

	protected override async teardown(): Promise<void> {
		const har = {
			log: {
				version: '1.2',
				creator: {
					name: 'open-browser',
					version: '1.0.0',
				},
				entries: this.entries,
			},
		};

		await mkdir(dirname(this.outputPath), { recursive: true });
		await writeFile(this.outputPath, JSON.stringify(har, null, 2), 'utf-8');
	}
}

// ── Helpers ──

function toHeaderArray(headers: Record<string, string>): Array<{ name: string; value: string }> {
	return Object.entries(headers).map(([name, value]) => ({ name, value }));
}

function parseQueryString(url: string): Array<{ name: string; value: string }> {
	try {
		const parsed = new URL(url);
		return [...parsed.searchParams.entries()].map(([name, value]) => ({ name, value }));
	} catch {
		return [];
	}
}


================================================
FILE: packages/core/src/viewport/guards/local-instance.ts
================================================
import { BaseGuard } from '../guard-base.js';

/**
 * Ensures a local browser is connected by verifying
 * the page is accessible during setup.
 */
export class LocalInstanceGuard extends BaseGuard {
	readonly name = 'local-browser';
	readonly priority = 10;

	protected async setup(): Promise<void> {
		// Verify the page is accessible by checking its URL.
		// This is a no-op check that throws if the page is not connected.
		this.ctx.page.url();
	}
}


================================================
FILE: packages/core/src/viewport/guards/page-ready.ts
================================================
import { BaseGuard } from '../guard-base.js';
import { createLogger } from '../../logging.js';

const logger = createLogger('watchdog:dom');

// ── Options ──

export interface PageReadyGuardOptions {
	/**
	 * Milliseconds of mutation silence required before the DOM is considered
	 * "stable". Defaults to 500 ms.
	 */
	idleTimeoutMs?: number;

	/**
	 * Debounce interval for grouping rapid-fire mutation callbacks.
	 * Defaults to 100 ms.
	 */
	debounceMs?: number;
}

// ── Load-state tracking ──

export type LoadState = 'domcontentloaded' | 'load' | 'networkidle';

// ── Watchdog ──

/**
 * Monitors DOM readiness and mutation activity.
 *
 * Features:
 * - Listens for standard Playwright page lifecycle events
 *   (`domcontentloaded`, `load`, `networkidle`)
 * - Injects a MutationObserver via `page.evaluate` to detect in-page DOM
 *   changes and determine when the page has "settled"
 * - Emits `dom-ready` once the DOM is stable (no mutations for `idleTimeoutMs`)
 * - Exposes `waitForDomStable()` for external consumers
 * - Tracks cumulative mutation count for debugging
 */
export class PageReadyGuard extends BaseGuard {
	readonly name = 'dom';
	readonly priority = 200;

	private readonly idleTimeoutMs: number;
	private readonly debounceMs: number;

	/** Which lifecycle states the current page has reached. */
	private reachedStates = new Set<LoadState>();

	/** Running total of mutation batches observed (useful for debugging). */
	private mutationCount = 0;

	/** Whether we currently consider the DOM to be stable. */
	private stable = false;

	/** Timer handle for the idle-detection window. */
	private idleTimer: ReturnType<typeof setTimeout> | null = null;

	/** Timer handle for the debounce window. */
	private debounceTimer: ReturnType<typeof setTimeout> | null = null;

	/** Resolvers for external callers waiting on `waitForDomStable`. */
	private stableWaiters: Array<{
		resolve: () => void;
		reject: (err: Error) => void;
		timer: ReturnType<typeof setTimeout>;
	}> = [];

	/** Callback used for `page.exposeFunction` – stored so we can reference it. */
	private readonly exposedFnName = '__ob_dom_mutation';

	constructor(options?: PageReadyGuardOptions) {
		super();
		this.idleTimeoutMs = options?.idleTimeoutMs ?? 500;
		this.debounceMs = options?.debounceMs ?? 100;
	}

	// ── Setup ──

	protected async setup(): Promise<void> {
		this.reachedStates.clear();
		this.mutationCount = 0;
		this.stable = false;

		// 1. Standard lifecycle events.
		this.setupLifecycleListeners();

		// 2. MutationObserver bridge via an exposed function.
		await this.setupMutationObserver();

		logger.debug(
			`DOM watchdog active (idleTimeout=${this.idleTimeoutMs}ms, debounce=${this.debounceMs}ms)`,
		);
	}

	// ── Teardown ──

	protected async teardown(): Promise<void> {
		this.clearTimers();

		// Reject pending waiters.
		for (const waiter of this.stableWaiters) {
			clearTimeout(waiter.timer);
			waiter.reject(new Error('PageReadyGuard detached before DOM became stable'));
		}
		this.stableWaiters = [];

		logger.debug(
			`DOM watchdog detached (observed ${this.mutationCount} mutation batches)`,
		);
	}

	// ── Lifecycle listeners ──

	private setupLifecycleListeners(): void {
		const onDomContentLoaded = () => {
			this.reachedStates.add('domcontentloaded');
			logger.debug('Page reached domcontentloaded');
			this.resetIdleTimer();
		};

		const onLoad = () => {
			this.reachedStates.add('load');
			logger.debug('Page reached load');
			this.resetIdleTimer();
		};

		this.ctx.page.on('domcontentloaded', onDomContentLoaded);
		this.ctx.page.on('load', onLoad);

		this.cleanupFns.push(
			() => this.ctx.page.off('domcontentloaded', onDomContentLoaded),
			() => this.ctx.page.off('load', onLoad),
		);

		// `networkidle` is not a standard event – we wait for it asynchronously
		// after page load to avoid blocking setup.
		const watchNetworkIdle = async () => {
			try {
				await this.ctx.page.waitForLoadState('networkidle');
				if (!this.active) return;
				this.reachedStates.add('networkidle');
				logger.debug('Page reached networkidle');
				this.resetIdleTimer();
			} catch {
				// Navigation may have occurred or page closed – ignore.
			}
		};

		// Fire-and-forget; we do not await.
		watchNetworkIdle();
	}

	// ── MutationObserver bridge ──

	private async setupMutationObserver(): Promise<void> {
		// Expose a function so the in-page MutationObserver can call back into Node.
		try {
			await this.ctx.page.exposeFunction(this.exposedFnName, (count: number) => {
				this.onMutationBatch(count);
			});
		} catch {
			// Function may already be exposed from a previous attach cycle.
			logger.debug('Mutation bridge function already exposed – reusing');
		}

		// Inject the observer. We re-inject on every `domcontentloaded` so it
		// survives navigations.
		const injectObserver = async () => {
			try {
				await this.ctx.page.evaluate((fnName: string) => {
					const win = window as unknown as Record<string, unknown>;

					// Avoid double-installing on the same document.
					if (win.__ob_observer_installed) return;
					win.__ob_observer_installed = true;

					let pending = 0;
					const observer = new MutationObserver((mutations) => {
						pending += mutations.length;
					});

					observer.observe(document.documentElement, {
						childList: true,
						subtree: true,
						attributes: true,
						characterData: true,
					});

					// Flush accumulated mutation count periodically rather than on
					// every single micro-mutation.
					setInterval(() => {
						if (pending > 0) {
							const count = pending;
							pending = 0;
							const fn = win[fnName];
							if (typeof fn === 'function') fn(count);
						}
					}, 50);
				}, this.exposedFnName);
			} catch {
				// Page may have navigated away or closed.
			}
		};

		// Inject immediately for the current document...
		await injectObserver();

		// ...and re-inject on future navigations.
		const onDomContentLoaded = () => {
			injectObserver();
		};
		this.ctx.page.on('domcontentloaded', onDomContentLoaded);
		this.cleanupFns.push(() => this.ctx.page.off('domcontentloaded', onDomContentLoaded));
	}

	// ── Mutation handling ──

	private onMutationBatch(count: number): void {
		this.mutationCount += count;
		this.stable = false;

		// Debounce: delay the idle-timer reset so we don't restart it on
		// every single mutation callback.
		if (this.debounceTimer) {
			clearTimeout(this.debounceTimer);
		}
		this.debounceTimer = setTimeout(() => {
			this.debounceTimer = null;
			this.resetIdleTimer();
		}, this.debounceMs);
	}

	// ── Idle detection ──

	private resetIdleTimer(): void {
		if (this.idleTimer) {
			clearTimeout(this.idleTimer);
		}

		this.idleTimer = setTimeout(() => {
			this.idleTimer = null;
			this.markStable();
		}, this.idleTimeoutMs);
	}

	private markStable(): void {
		if (this.stable) return;

		this.stable = true;
		logger.debug(
			`DOM stable after ${this.mutationCount} mutation batches ` +
			`(states: ${[...this.reachedStates].join(', ') || 'none'})`,
		);

		this.ctx.eventBus.emit('content-ready', undefined as void);
		this.notifyStableWaiters();
	}

	// ── Public API ──

	/**
	 * Returns a promise that resolves once the DOM is considered stable
	 * (no mutations for `idleTimeoutMs`).
	 *
	 * If the DOM is already stable the promise resolves immediately.
	 *
	 * @param timeout Maximum milliseconds to wait. Defaults to 10 000 ms.
	 */
	waitForDomStable(timeout = 10_000): Promise<void> {
		if (this.stable) {
			return Promise.resolve();
		}

		return new Promise<void>((resolve, reject) => {
			const timer = setTimeout(() => {
				this.removeStableWaiter(waiter);
				reject(new Error(`waitForDomStable timed out after ${timeout}ms`));
			}, timeout);

			const waiter = { resolve, reject, timer };
			this.stableWaiters.push(waiter);
		});
	}

	/**
	 * Returns the set of lifecycle states the current page has reached.
	 */
	getReachedStates(): ReadonlySet<LoadState> {
		return this.reachedStates;
	}

	/**
	 * Returns the total number of mutation batches observed since the
	 * watchdog was attached.
	 */
	getMutationCount(): number {
		return this.mutationCount;
	}

	/**
	 * Whether the DOM is currently considered stable.
	 */
	isStable(): boolean {
		return this.stable;
	}

	// ── Waiter helpers ──

	private notifyStableWaiters(): void {
		const waiters = this.stableWaiters.splice(0);
		for (const waiter of waiters) {
			clearTimeout(waiter.timer);
			waiter.resolve();
		}
	}

	private removeStableWaiter(waiter: (typeof this.stableWaiters)[number]): void {
		const idx = this.stableWaiters.indexOf(waiter);
		if (idx !== -1) {
			this.stableWaiters.splice(idx, 1);
		}
	}

	// ── Timer cleanup ──

	private clearTimers(): void {
		if (this.idleTimer) {
			clearTimeout(this.idleTimer);
			this.idleTimer = null;
		}
		if (this.debounceTimer) {
			clearTimeout(this.debounceTimer);
			this.debounceTimer = null;
		}
	}
}


================================================
FILE: packages/core/src/viewport/guards/permissions.ts
================================================
import type { CDPSession } from 'playwright';
import { BaseGuard } from '../guard-base.js';

/**
 * Grants browser permissions (geolocation, notifications, camera, etc.)
 * via CDP. Re-grants permissions when the page navigates to a new origin.
 */
export class PermissionsGuard extends BaseGuard {
	readonly name = 'permissions';
	readonly priority = 400;

	private readonly permissions: string[];
	private cdpSession: CDPSession | null = null;
	private lastOrigin: string | null = null;

	constructor(permissions: string[]) {
		super();
		this.permissions = permissions;
	}

	protected async setup(): Promise<void> {
		this.cdpSession = await this.ctx.page.context().newCDPSession(this.ctx.page);

		// Grant permissions for the current page origin
		await this.grantForCurrentPage();

		// Re-grant permissions when navigating to a new origin
		const handler = () => {
			this.grantForCurrentPage().catch(() => {
				// Ignore errors from navigations to about:blank, etc.
			});
		};

		this.ctx.page.on('framenavigated', handler);
		this.cleanupFns.push(() => this.ctx.page.off('framenavigated', handler));
		this.cleanupFns.push(() => {
			this.cdpSession?.detach().catch(() => {
				// Ignore detach errors during cleanup
			});
		});
	}

	private async grantForCurrentPage(): Promise<void> {
		const url = this.ctx.page.url();
		let origin: string;
		try {
			origin = new URL(url).origin;
		} catch {
			return;
		}

		// Skip non-http origins and avoid re-granting for the same origin
		if (!origin.startsWith('http') || origin === this.lastOrigin) return;

		this.lastOrigin = origin;
		if (!this.cdpSession) return;
		// CDP types require PermissionType[] but we accept string[] for ergonomics
		type SendFn = (method: string, params: Record<string, unknown>) => Promise<unknown>;
		await (this.cdpSession.send as unknown as SendFn)(
			'Browser.grantPermissions',
			{ permissions: this.permissions, origin },
		);
	}
}


================================================
FILE: packages/core/src/viewport/guards/persistence.ts
================================================
import { readFile, writeFile, mkdir } from 'node:fs/promises';
import { dirname } from 'node:path';
import { BaseGuard } from '../guard-base.js';

/**
 * Handles saving and restoring browser storage state (cookies, localStorage).
 * Persists state to a file so it can be restored across sessions.
 */
export class PersistenceGuard extends BaseGuard {
	readonly name = 'storage-state';
	readonly priority = 600;

	private readonly storagePath: string;

	constructor(storagePath: string) {
		super();
		this.storagePath = storagePath;
	}

	protected async setup(): Promise<void> {
		// Try to restore storage state from file if it exists
		try {
			const data = await readFile(this.storagePath, 'utf-8');
			const storageState = JSON.parse(data) as {
				cookies?: Array<{
					name: string;
					value: string;
					domain: string;
					path: string;
					expires?: number;
					httpOnly?: boolean;
					secure?: boolean;
					sameSite?: 'Strict' | 'Lax' | 'None';
				}>;
			};
			if (storageState.cookies) {
				await this.ctx.context.addCookies(storageState.cookies);
			}
		} catch {
			// File doesn't exist or is invalid; start fresh
		}
	}

	/**
	 * Saves the current context storage state to the configured file path.
	 */
	async save(): Promise<void> {
		const storageState = await this.ctx.context.storageState();
		await mkdir(dirname(this.storagePath), { recursive: true });
		await writeFile(this.storagePath, JSON.stringify(storageState, null, 2), 'utf-8');
	}
}


================================================
FILE: packages/core/src/viewport/guards/popups.ts
================================================
import type { Page } from 'playwright';
import { BaseGuard } from '../guard-base.js';

/**
 * Monitors for popups and new windows/tabs. Listens for new pages
 * created in the browser context and emits tab-created events.
 */
export class PopupGuard extends BaseGuard {
	readonly name = 'popups';
	readonly priority = 150;

	protected async setup(): Promise<void> {
		const handler = async (page: Page) => {
			try {
				await page.waitForLoadState('domcontentloaded');
			} catch {
				// Page may have been closed before load
			}

			const url = page.url();
			this.ctx.eventBus.emit('tab-opened', { url });

			// Bring focus to the new page
			try {
				await page.bringToFront();
			} catch {
				// Page may have been closed
			}
		};

		this.ctx.context.on('page', handler);
		this.cleanupFns.push(() => this.ctx.context.off('page', handler));
	}
}


================================================
FILE: packages/core/src/viewport/guards/screenshot.ts
================================================
import type { ScreenshotEvent, ScreenshotResult } from '../events.js';
import { BaseGuard } from '../guard-base.js';

/**
 * Handles screenshot requests by registering a request handler
 * for 'get-screenshot' on the event bus.
 */
export class ScreenshotGuard extends BaseGuard {
	readonly name = 'screenshot';
	readonly priority = 700;

	protected async setup(): Promise<void> {
		const off = this.ctx.eventBus.onRequest(
			'get-screenshot',
			async (event: ScreenshotEvent): Promise<ScreenshotResult> => {
				const buffer = await this.ctx.page.screenshot({
					fullPage: event?.fullPage ?? false,
					type: 'png',
				});

				const base64 = buffer.toString('base64');
				const viewport = this.ctx.page.viewportSize();

				return {
					base64,
					width: viewport?.width ?? 0,
					height: viewport?.height ?? 0,
				};
			},
		);

		this.cleanupFns.push(off);
	}
}


================================================
FILE: packages/core/src/viewport/guards/url-policy.ts
================================================
import type { Route } from 'playwright';
import { BaseGuard } from '../guard-base.js';
import { isUrlPermitted } from '../../utils.js';

/**
 * Monitors for security concerns by intercepting navigation requests.
 * Checks URLs against allowed/blocked lists before permitting navigation.
 */
export class UrlPolicyGuard extends BaseGuard {
	readonly name = 'policy-violation';
	readonly priority = 50;

	private readonly allowedUrls: string[];
	private readonly blockedUrls: string[];

	constructor(allowedUrls: string[] = [], blockedUrls: string[] = []) {
		super();
		this.allowedUrls = allowedUrls;
		this.blockedUrls = blockedUrls;
	}

	protected async setup(): Promise<void> {
		const handler = async (route: Route) => {
			const url = route.request().url();

			if (
				route.request().isNavigationRequest() &&
				!isUrlPermitted(url, this.allowedUrls, this.blockedUrls)
			) {
				this.ctx.eventBus.emit('policy-violation', {
					type: 'navigation-blocked',
					url,
					reason: `URL not allowed by security policy: ${url}`,
				});
				await route.abort('blockedbyclient');
				return;
			}

			await route.continue();
		};

		await this.ctx.page.route('**/*', handler);
		this.cleanupFns.push(() => {
			this.ctx.page.unroute('**/*', handler).catch(() => {
				// Ignore errors during cleanup
			});
		});
	}
}


================================================
FILE: packages/core/src/viewport/guards/video-capture.ts
================================================
import { mkdir, writeFile } from 'node:fs/promises';
import { dirname, join } from 'node:path';
import type { CDPSession } from 'playwright';
import { BaseGuard } from '../guard-base.js';
import { createLogger } from '../../logging.js';

const logger = createLogger('watchdog:video-recording');

// ── Options ──

export interface VideoRecordingOptions {
	/** Path for the Playwright trace archive (.zip). */
	outputPath: string;
	/**
	 * Recording mode. `'tracing'` uses Playwright's built-in tracing API
	 * (screenshots + DOM snapshots). `'screencast'` falls back to CDP
	 * Page.startScreencast for raw frame capture. `'auto'` tries tracing
	 * first and falls back to screencast on failure.
	 *
	 * @default 'auto'
	 */
	mode?: 'tracing' | 'screencast' | 'auto';
	/**
	 * Maximum frames per second for CDP screencast mode.
	 * Ignored when using Playwright tracing.
	 *
	 * @default 5
	 */
	maxFrameRate?: number;
	/**
	 * Screencast image format.
	 * @default 'jpeg'
	 */
	format?: 'jpeg' | 'png';
	/**
	 * Screencast image quality (1-100). Only applies to JPEG.
	 * @default 60
	 */
	quality?: number;
	/**
	 * Maximum width of captured screencast frames in pixels.
	 * The browser scales down if the viewport is larger.
	 *
	 * @default 1280
	 */
	maxWidth?: number;
	/**
	 * Maximum height of captured screencast frames in pixels.
	 * @default 720
	 */
	maxHeight?: number;
}

// ── Resolved defaults ──

interface ResolvedOptions {
	outputPath: string;
	mode: 'tracing' | 'screencast' | 'auto';
	maxFrameRate: number;
	format: 'jpeg' | 'png';
	quality: number;
	maxWidth: number;
	maxHeight: number;
}

function resolveOptions(opts: VideoRecordingOptions): ResolvedOptions {
	return {
		outputPath: opts.outputPath,
		mode: opts.mode ?? 'auto',
		maxFrameRate: opts.maxFrameRate ?? 5,
		format: opts.format ?? 'jpeg',
		quality: opts.quality ?? 60,
		maxWidth: opts.maxWidth ?? 1280,
		maxHeight: opts.maxHeight ?? 720,
	};
}

// ── Watchdog ──

/**
 * Records browser activity using Playwright's tracing API or CDP
 * Page.startScreencast as a fallback.
 *
 * - **Tracing mode** captures screenshots and DOM snapshots viewable in
 *   the Playwright Trace Viewer. Produces a `.zip` archive.
 * - **Screencast mode** uses CDP to capture individual frames at a
 *   configurable frame rate and quality. Produces numbered image files
 *   written into a directory alongside the output path.
 *
 * Supports pause/resume so callers can temporarily halt recording
 * (e.g. during long waits) and restart without losing earlier frames.
 */
export class VideoCaptureGuard extends BaseGuard {
	readonly name = 'video-recording';
	readonly priority = 500;

	private readonly options: ResolvedOptions;

	// ── Tracing state ──
	private tracingStarted = false;

	// ── Screencast state ──
	private cdpSession: CDPSession | null = null;
	private screencastActive = false;
	private paused = false;
	private frameCount = 0;
	private readonly frames: Array<{ data: string; timestamp: number }> = [];

	constructor(options: VideoRecordingOptions) {
		super();
		this.options = resolveOptions(options);
	}

	// ── Setup ──

	protected async setup(): Promise<void> {
		const { mode } = this.options;

		if (mode === 'tracing' || mode === 'auto') {
			const tracingOk = await this.startTracing();
			if (tracingOk) return;
			if (mode === 'tracing') {
				logger.warn('Tracing failed and mode is "tracing" – recording will be unavailable');
				return;
			}
			logger.info('Tracing unavailable, falling back to CDP screencast');
		}

		await this.startScreencast();
	}

	// ── Teardown ──

	protected override async teardown(): Promise<void> {
		if (this.tracingStarted) {
			await this.stopTracing();
		} else if (this.screencastActive) {
			await this.stopScreencast();
		}
	}

	// ── Pause / Resume ──

	/**
	 * Temporarily pauses frame capture (screencast only).
	 * Tracing mode does not support granular pause/resume.
	 */
	pause(): void {
		if (!this.screencastActive || this.paused) return;
		this.paused = true;
		logger.debug('Screencast paused');
	}

	/**
	 * Resumes frame capture after a pause (screencast only).
	 */
	resume(): void {
		if (!this.screencastActive || !this.paused) return;
		this.paused = false;
		logger.debug('Screencast resumed');
	}

	/** Whether the recording is currently paused. */
	get isPaused(): boolean {
		return this.paused;
	}

	/** Number of frames captured so far (screencast mode). */
	get capturedFrameCount(): number {
		return this.frameCount;
	}

	// ── Tracing ──

	private async startTracing(): Promise<boolean> {
		try {
			await this.ctx.context.tracing.start({
				screenshots: true,
				snapshots: true,
			});
			this.tracingStarted = true;
			logger.info('Playwright tracing started');
			return true;
		} catch (err) {
			const reason = err instanceof Error ? err.message : String(err);
			logger.debug(`Could not start tracing: ${reason}`);
			return false;
		}
	}

	private async stopTracing(): Promise<void> {
		try {
			await mkdir(dirname(this.options.outputPath), { recursive: true });
			await this.ctx.context.tracing.stop({
				path: this.options.outputPath,
			});
			logger.info(`Trace saved to ${this.options.outputPath}`);
		} catch (err) {
			const reason = err instanceof Error ? err.message : String(err);
			logger.error(`Failed to save trace: ${reason}`);
		}
		this.tracingStarted = false;
	}

	// ── Screencast ──

	private async startScreencast(): Promise<void> {
		try {
			this.cdpSession = await this.ctx.page.context().newCDPSession(this.ctx.page);

			this.cdpSession.on('Page.screencastFrame', (params) => {
				const { data, metadata, sessionId } = params as {
					data: string;
					metadata: { timestamp: number };
					sessionId: number;
				};

				// Acknowledge the frame so the browser keeps sending them.
				this.cdpSession?.send('Page.screencastFrameAck', { sessionId }).catch(() => {
					// Ignore ack errors; session may have closed.
				});

				if (this.paused) return;

				this.frameCount++;
				this.frames.push({ data, timestamp: metadata.timestamp });

				if (this.frameCount % 50 === 0) {
					logger.debug(`Screencast: captured ${this.frameCount} frames`);
				}
			});

			await (this.cdpSession.send('Page.startScreencast', {
				format: this.options.format,
				quality: this.options.format === 'jpeg' ? this.options.quality : undefined,
				maxWidth: this.options.maxWidth,
				maxHeight: this.options.maxHeight,
				everyNthFrame: Math.max(1, Math.round(60 / this.options.maxFrameRate)),
			}) as Promise<unknown> as Promise<void>);

			this.screencastActive = true;

			this.cleanupFns.push(() => {
				this.cdpSession?.detach().catch(() => {
					// Ignore detach errors during cleanup.
				});
			});

			logger.info(
				`CDP screencast started (${this.options.maxWidth}x${this.options.maxHeight}, ` +
					`${this.options.format} q${this.options.quality}, ~${this.options.maxFrameRate} fps)`,
			);
		} catch (err) {
			const reason = err instanceof Error ? err.message : String(err);
			logger.error(`Failed to start CDP screencast: ${reason}`);
		}
	}

	private async stopScreencast(): Promise<void> {
		if (!this.cdpSession) return;

		try {
			await (this.cdpSession.send('Page.stopScreencast') as Promise<unknown> as Promise<void>);
		} catch {
			// Session may already be closed.
		}

		this.screencastActive = false;
		logger.info(`Screencast stopped – ${this.frameCount} frames captured`);

		await this.saveFrames();
	}

	private async saveFrames(): Promise<void> {
		if (this.frames.length === 0) {
			logger.debug('No screencast frames to save');
			return;
		}

		const framesDir = join(dirname(this.options.outputPath), 'screencast-frames');
		await mkdir(framesDir, { recursive: true });

		const ext = this.options.format === 'png' ? 'png' : 'jpg';
		const manifest: Array<{ file: string; timestamp: number }> = [];

		for (let i = 0; i < this.frames.length; i++) {
			const frame = this.frames[i];
			const filename = `frame-${String(i).padStart(5, '0')}.${ext}`;
			const filePath = join(framesDir, filename);
			await writeFile(filePath, Buffer.from(frame.data, 'base64'));
			manifest.push({ file: filename, timestamp: frame.timestamp });
		}

		// Write a JSON manifest alongside the frames for downstream tooling.
		const manifestPath = join(framesDir, 'manifest.json');
		await writeFile(manifestPath, JSON.stringify(manifest, null, 2), 'utf-8');

		logger.info(`Saved ${this.frames.length} frames to ${framesDir}`);
	}
}


================================================
FILE: packages/core/src/viewport/index.ts
================================================
export { Viewport, type ViewportOptions } from './viewport.js';
export { LaunchProfile } from './launch-profile.js';
export { EventHub } from './event-hub.js';
export { BaseGuard, type GuardContext } from './guard-base.js';
export { VisualTracer, type VisualTracerOptions } from './visual-tracer.js';
export {
	type TabDescriptor,
	type ViewportSnapshot,
	type ViewportHistory,
	type LaunchOptions,
	type PageState,
} from './types.js';
export {
	type ViewportEventMap,
	type ViewportRequestMap,
	type NavigateEvent,
	type ClickEvent,
	type InputEvent,
	type ScrollEvent,
	type ScreenshotEvent,
	type ScreenshotResult,
	type DownloadEvent,
	type PopupEvent,
	type SecurityEvent,
	type CrashEvent,
} from './events.js';


================================================
FILE: packages/core/src/viewport/launch-profile.test.ts
================================================
import { test, expect, describe } from 'bun:test';
import {
	LaunchProfile,
	CHROME_AUTOMATION_FLAGS,
	CHROME_STRIPPED_FEATURES,
	ANTI_DETECTION_FLAGS,
	CONTAINER_FLAGS,
	REPRODUCIBLE_RENDER_FLAGS,
	RELAXED_SECURITY_FLAGS,
} from './launch-profile.js';

describe('LaunchProfile', () => {
	describe('static create', () => {
		test('returns a LaunchProfile instance', () => {
			const profile = LaunchProfile.create();
			expect(profile).toBeInstanceOf(LaunchProfile);
		});
	});

	describe('default build', () => {
		test('produces headless true by default', () => {
			const opts = LaunchProfile.create().build();
			expect(opts.headless).toBe(true);
		});

		test('produces default window size 1280x1100', () => {
			const opts = LaunchProfile.create().build();
			expect(opts.windowWidth).toBe(1280);
			expect(opts.windowHeight).toBe(1100);
		});

		test('persistAfterClose defaults to false', () => {
			const opts = LaunchProfile.create().build();
			expect(opts.persistAfterClose).toBe(false);
		});

		test('relaxedSecurity defaults to false', () => {
			const opts = LaunchProfile.create().build();
			expect(opts.relaxedSecurity).toBe(false);
		});

		test('includes CHROME_AUTOMATION_FLAGS in extraArgs', () => {
			const opts = LaunchProfile.create().build();
			for (const arg of CHROME_AUTOMATION_FLAGS) {
				expect(opts.extraArgs).toContain(arg);
			}
		});

		test('includes disabled components feature flag', () => {
			const opts = LaunchProfile.create().build();
			const disableFeatures = opts.extraArgs.find((a) =>
				a.startsWith('--disable-features='),
			);
			expect(disableFeatures).toBeDefined();
			for (const component of CHROME_STRIPPED_FEATURES) {
				expect(disableFeatures).toContain(component);
			}
		});

		test('includes window-size arg', () => {
			const opts = LaunchProfile.create().build();
			expect(opts.extraArgs).toContain('--window-size=1280,1100');
		});

		test('proxy is undefined by default', () => {
			const opts = LaunchProfile.create().build();
			expect(opts.proxy).toBeUndefined();
		});

		test('userDataDir is undefined by default', () => {
			const opts = LaunchProfile.create().build();
			expect(opts.userDataDir).toBeUndefined();
		});

		test('channelName is undefined by default', () => {
			const opts = LaunchProfile.create().build();
			expect(opts.channelName).toBeUndefined();
		});
	});

	describe('.headless()', () => {
		test('headless(true) sets headless to true', () => {
			const opts = LaunchProfile.create().headless(true).build();
			expect(opts.headless).toBe(true);
		});

		test('headless(false) sets headless to false', () => {
			const opts = LaunchProfile.create().headless(false).build();
			expect(opts.headless).toBe(false);
		});

		test('headless() with no argument defaults to true', () => {
			const opts = LaunchProfile.create().headless().build();
			expect(opts.headless).toBe(true);
		});
	});

	describe('.headful() equivalent', () => {
		test('headless(false) creates headful mode', () => {
			const opts = LaunchProfile.create().headless(false).build();
			expect(opts.headless).toBe(false);
		});
	});

	describe('.stealthMode()', () => {
		test('adds stealth args when enabled', () => {
			const opts = LaunchProfile.create().stealthMode().build();
			for (const arg of ANTI_DETECTION_FLAGS) {
				expect(opts.extraArgs).toContain(arg);
			}
		});

		test('does not add stealth args when disabled', () => {
			const opts = LaunchProfile.create().stealthMode(false).build();
			// ANTI_DETECTION_FLAGS[1] is --disable-features=AutomationControlled
			// which won't be in the base args (only in ANTI_DETECTION_FLAGS)
			// But CHROME_AUTOMATION_FLAGS also contains --disable-blink-features=AutomationControlled
			// so check for the features one specifically
			const stealthOnlyArg = '--disable-features=AutomationControlled';
			const hasStealthOnlyArg = opts.extraArgs.some(
				(a) => a === stealthOnlyArg,
			);
			expect(hasStealthOnlyArg).toBe(false);
		});

		test('returns this for chaining', () => {
			const profile = LaunchProfile.create();
			const result = profile.stealthMode();
			expect(result).toBe(profile);
		});
	});

	describe('.dockerMode()', () => {
		test('adds docker args when enabled', () => {
			const opts = LaunchProfile.create().dockerMode().build();
			for (const arg of CONTAINER_FLAGS) {
				expect(opts.extraArgs).toContain(arg);
			}
		});

		test('does not add docker args when disabled', () => {
			const opts = LaunchProfile.create().dockerMode(false).build();
			// --no-sandbox should not be present when docker mode is off
			expect(opts.extraArgs).not.toContain('--no-sandbox');
		});
	});

	describe('.deterministicRendering()', () => {
		test('adds deterministic rendering args when enabled', () => {
			const opts = LaunchProfile.create().deterministicRendering().build();
			for (const arg of REPRODUCIBLE_RENDER_FLAGS) {
				expect(opts.extraArgs).toContain(arg);
			}
		});

		test('does not add deterministic args when disabled', () => {
			const opts = LaunchProfile.create().deterministicRendering(false).build();
			expect(opts.extraArgs).not.toContain('--deterministic-mode');
		});
	});

	describe('.relaxedSecurity()', () => {
		test('adds security-disable args when enabled', () => {
			const opts = LaunchProfile.create().relaxedSecurity().build();
			expect(opts.relaxedSecurity).toBe(true);
			for (const arg of RELAXED_SECURITY_FLAGS) {
				expect(opts.extraArgs).toContain(arg);
			}
		});

		test('does not add security args when disabled', () => {
			const opts = LaunchProfile.create().relaxedSecurity(false).build();
			expect(opts.relaxedSecurity).toBe(false);
			expect(opts.extraArgs).not.toContain('--disable-web-security');
		});
	});

	describe('.downloadsPath()', () => {
		test('adds download-default-directory arg', () => {
			const opts = LaunchProfile.create()
				.downloadsPath('/tmp/downloads')
				.build();
			expect(opts.extraArgs).toContain(
				'--download-default-directory=/tmp/downloads',
			);
		});
	});

	describe('.maxIframes()', () => {
		test('returns this for chaining', () => {
			const profile = LaunchProfile.create();
			const result = profile.maxIframes(5);
			expect(result).toBe(profile);
		});
	});

	describe('.addExtension()', () => {
		test('adds single extension path to load-extension arg', () => {
			const opts = LaunchProfile.create()
				.addExtension('/path/to/ext1')
				.build();
			const loadExtArg = opts.extraArgs.find((a) =>
				a.startsWith('--load-extension='),
			);
			expect(loadExtArg).toBe('--load-extension=/path/to/ext1');
		});

		test('adds multiple extensions as comma-separated list', () => {
			const opts = LaunchProfile.create()
				.addExtension('/path/to/ext1')
				.addExtension('/path/to/ext2')
				.build();
			const loadExtArg = opts.extraArgs.find((a) =>
				a.startsWith('--load-extension='),
			);
			expect(loadExtArg).toBe(
				'--load-extension=/path/to/ext1,/path/to/ext2',
			);
		});

		test('no load-extension arg when no extensions added', () => {
			const opts = LaunchProfile.create().build();
			const loadExtArg = opts.extraArgs.find((a) =>
				a.startsWith('--load-extension='),
			);
			expect(loadExtArg).toBeUndefined();
		});
	});

	describe('.windowSize()', () => {
		test('sets custom window dimensions', () => {
			const opts = LaunchProfile.create().windowSize(1920, 1080).build();
			expect(opts.windowWidth).toBe(1920);
			expect(opts.windowHeight).toBe(1080);
			expect(opts.extraArgs).toContain('--window-size=1920,1080');
		});
	});

	describe('.proxy()', () => {
		test('sets proxy server', () => {
			const opts = LaunchProfile.create()
				.proxy('http://proxy:8080')
				.build();
			expect(opts.proxy).toEqual({
				server: 'http://proxy:8080',
				username: undefined,
				password: undefined,
			});
		});

		test('sets proxy with credentials', () => {
			const opts = LaunchProfile.create()
				.proxy('http://proxy:8080', 'user', 'pass')
				.build();
			expect(opts.proxy).toEqual({
				server: 'http://proxy:8080',
				username: 'user',
				password: 'pass',
			});
		});
	});

	describe('.userDataDir()', () => {
		test('sets user data directory', () => {
			const opts = LaunchProfile.create()
				.userDataDir('/tmp/chrome-data')
				.build();
			expect(opts.userDataDir).toBe('/tmp/chrome-data');
		});
	});

	describe('.browserBinary()', () => {
		test('sets browser binary path', () => {
			const opts = LaunchProfile.create()
				.browserBinary('/usr/bin/chromium')
				.build();
			expect(opts.browserBinaryPath).toBe('/usr/bin/chromium');
		});
	});

	describe('.persistAfterClose()', () => {
		test('sets persistAfterClose to true', () => {
			const opts = LaunchProfile.create().persistAfterClose().build();
			expect(opts.persistAfterClose).toBe(true);
		});

		test('sets persistAfterClose to false', () => {
			const opts = LaunchProfile.create().persistAfterClose(false).build();
			expect(opts.persistAfterClose).toBe(false);
		});
	});

	describe('.channel()', () => {
		test('sets channel name', () => {
			const opts = LaunchProfile.create().channel('chrome').build();
			expect(opts.channelName).toBe('chrome');
		});
	});

	describe('.extraArgs()', () => {
		test('appends extra args to the end', () => {
			const opts = LaunchProfile.create()
				.extraArgs('--custom-flag', '--another-flag')
				.build();
			expect(opts.extraArgs).toContain('--custom-flag');
			expect(opts.extraArgs).toContain('--another-flag');
		});

		test('user extra args can override earlier args', () => {
			const opts = LaunchProfile.create()
				.extraArgs('--override=value')
				.build();
			// The user arg should be at the end of the array (after CHROME_AUTOMATION_FLAGS)
			const lastArgs = opts.extraArgs.slice(-1);
			expect(lastArgs).toContain('--override=value');
		});
	});

	describe('builder chaining', () => {
		test('multiple methods can be chained together', () => {
			const opts = LaunchProfile.create()
				.headless(false)
				.stealthMode()
				.dockerMode()
				.deterministicRendering()
				.windowSize(800, 600)
				.downloadsPath('/downloads')
				.addExtension('/ext')
				.persistAfterClose()
				.build();

			expect(opts.headless).toBe(false);
			expect(opts.persistAfterClose).toBe(true);
			expect(opts.windowWidth).toBe(800);
			expect(opts.windowHeight).toBe(600);
			expect(opts.extraArgs).toContain('--window-size=800,600');

			for (const arg of ANTI_DETECTION_FLAGS) {
				expect(opts.extraArgs).toContain(arg);
			}
			for (const arg of CONTAINER_FLAGS) {
				expect(opts.extraArgs).toContain(arg);
			}
			for (const arg of REPRODUCIBLE_RENDER_FLAGS) {
				expect(opts.extraArgs).toContain(arg);
			}
		});
	});
});

describe('CHROME_AUTOMATION_FLAGS', () => {
	test('is a non-empty array', () => {
		expect(Array.isArray(CHROME_AUTOMATION_FLAGS)).toBe(true);
		expect(CHROME_AUTOMATION_FLAGS.length).toBeGreaterThan(10);
	});

	test('contains essential flags', () => {
		expect(CHROME_AUTOMATION_FLAGS).toContain('--no-first-run');
		expect(CHROME_AUTOMATION_FLAGS).toContain('--disable-popup-blocking');
		expect(CHROME_AUTOMATION_FLAGS).toContain('--disable-infobars');
	});

	test('all entries are strings starting with --', () => {
		for (const arg of CHROME_AUTOMATION_FLAGS) {
			expect(typeof arg).toBe('string');
			expect(arg.startsWith('--')).toBe(true);
		}
	});
});

describe('CHROME_STRIPPED_FEATURES', () => {
	test('is a non-empty array', () => {
		expect(Array.isArray(CHROME_STRIPPED_FEATURES)).toBe(true);
		expect(CHROME_STRIPPED_FEATURES.length).toBeGreaterThan(10);
	});

	test('contains known components', () => {
		expect(CHROME_STRIPPED_FEATURES).toContain('Translate');
		expect(CHROME_STRIPPED_FEATURES).toContain('MediaRouter');
		expect(CHROME_STRIPPED_FEATURES).toContain('Prerender2');
	});

	test('all entries are non-empty strings', () => {
		for (const component of CHROME_STRIPPED_FEATURES) {
			expect(typeof component).toBe('string');
			expect(component.length).toBeGreaterThan(0);
		}
	});
});


================================================
FILE: packages/core/src/viewport/launch-profile.ts
================================================
import type { LaunchOptions } from './types.js';
import { Config } from '../config/config.js';

/**
 * Chrome default args for automation — standard flags to disable
 * background noise, throttling, and other non-essential features.
 */
export const CHROME_AUTOMATION_FLAGS = [
	'--no-first-run',
	'--no-default-browser-check',
	'--disable-background-networking',
	'--disable-background-timer-throttling',
	'--disable-backgrounding-occluded-windows',
	'--disable-breakpad',
	'--disable-component-update',
	'--disable-default-apps',
	'--disable-dev-shm-usage',
	'--disable-extensions-except=',
	'--disable-hang-monitor',
	'--disable-ipc-flooding-protection',
	'--disable-popup-blocking',
	'--disable-prompt-on-repost',
	'--disable-renderer-backgrounding',
	'--disable-sync',
	'--disable-translate',
	'--metrics-recording-only',
	'--no-pings',
	'--password-store=basic',
	'--use-mock-keychain',
	'--disable-blink-features=AutomationControlled',
	'--disable-infobars',
	'--disable-session-crashed-bubble',
	'--force-color-profile=srgb',
];

/**
 * Chrome disabled-components flag values that reduce resource usage
 * and prevent interfering background services.
 */
export const CHROME_STRIPPED_FEATURES = [
	'InterestFeedContentSuggestions',
	'Translate',
	'OptimizationHints',
	'MediaRouter',
	'DialMediaRouteProvider',
	'CalculatorTool',
	'CrashedTabFinder',
	'AutofillServerCommunication',
	'BackgroundTracing',
	'NtpTiles',
	'OneGoogleBar',
	'ReadLater',
	'NTPArticleSuggestions',
	'CrossDeviceSync',
	'PrivacySandboxSettings4',
	'SidePanelPinning',
	'HistoryEmbeddings',
	'PrivacySandboxPromptV2',
	'GlobalMediaControls',
	'ComposeService',
	'AutofillFeature',
	'NTPSigninPromo',
	'Prerender2',
	'TabGroupsSave',
];

export const ANTI_DETECTION_FLAGS = [
	'--disable-blink-features=AutomationControlled',
	'--disable-features=AutomationControlled',
];

export const CONTAINER_FLAGS = [
	'--no-sandbox',
	'--disable-gpu',
	'--disable-software-rasterizer',
	'--disable-setuid-sandbox',
	'--single-process',
];

export const RELAXED_SECURITY_FLAGS = [
	'--disable-web-security',
	'--disable-site-isolation-trials',
	'--disable-features=IsolateOrigins,site-per-process',
];

export const REPRODUCIBLE_RENDER_FLAGS = [
	'--deterministic-mode',
	'--disable-skia-runtime-opts',
	'--disable-font-subpixel-positioning',
	'--force-color-profile=srgb',
	'--disable-lcd-text',
];

/**
 * Builder pattern for browser profile configuration.
 * Replaces the Python ViewportConfig with a fluent API.
 */
export class LaunchProfile {
	private options: Partial<LaunchOptions> = {};
	private _stealthMode = false;
	private _dockerMode = false;
	private _deterministicRendering = false;
	private _maxIframes = 3;
	private _downloadsPath?: string;
	private _extensions: string[] = [];

	static create(): LaunchProfile {
		return new LaunchProfile();
	}

	headless(value = true): this {
		this.options.headless = value;
		return this;
	}

	relaxedSecurity(value = true): this {
		this.options.relaxedSecurity = value;
		return this;
	}

	windowSize(width: number, height: number): this {
		this.options.windowWidth = width;
		this.options.windowHeight = height;
		return this;
	}

	proxy(server: string, username?: string, password?: string): this {
		this.options.proxy = { server, username, password };
		return this;
	}

	userDataDir(dir: string): this {
		this.options.userDataDir = dir;
		return this;
	}

	browserBinary(path: string): this {
		this.options.browserBinaryPath = path;
		return this;
	}

	persistAfterClose(value = true): this {
		this.options.persistAfterClose = value;
		return this;
	}

	channel(name: string): this {
		this.options.channelName = name;
		return this;
	}

	extraArgs(...args: string[]): this {
		this.options.extraArgs = [...(this.options.extraArgs ?? []), ...args];
		return this;
	}

	stealthMode(value = true): this {
		this._stealthMode = value;
		return this;
	}

	dockerMode(value = true): this {
		this._dockerMode = value;
		return this;
	}

	deterministicRendering(value = true): this {
		this._deterministicRendering = value;
		return this;
	}

	downloadsPath(path: string): this {
		this._downloadsPath = path;
		return this;
	}

	maxIframes(max: number): this {
		this._maxIframes = max;
		return this;
	}

	addExtension(extensionPath: string): this {
		this._extensions.push(extensionPath);
		return this;
	}

	/**
	 * Auto-detect and apply Docker settings if running inside a container.
	 */
	autoDetect(): this {
		if (Config.isDocker()) {
			this._dockerMode = true;
			// Force headless in Docker if no display
			if (!Config.hasDisplay()) {
				this.options.headless = true;
			}
		}
		return this;
	}

	build(): LaunchOptions {
		const args = [...CHROME_AUTOMATION_FLAGS];

		// Disabled components
		args.push(`--disable-component-extensions-with-background-pages`);
		args.push(`--disable-features=${CHROME_STRIPPED_FEATURES.join(',')}`);

		// Mode-specific args
		if (this._stealthMode) {
			args.push(...ANTI_DETECTION_FLAGS);
		}

		if (this._dockerMode) {
			args.push(...CONTAINER_FLAGS);
		}

		if (this._deterministicRendering) {
			args.push(...REPRODUCIBLE_RENDER_FLAGS);
		}

		if (this.options.relaxedSecurity) {
			args.push(...RELAXED_SECURITY_FLAGS);
		}

		// Window size
		const width = this.options.windowWidth ?? 1280;
		const height = this.options.windowHeight ?? 1100;
		args.push(`--window-size=${width},${height}`);

		// Extensions
		if (this._extensions.length > 0) {
			args.push(`--load-extension=${this._extensions.join(',')}`);
		}

		// Downloads
		if (this._downloadsPath) {
			args.push(`--download-default-directory=${this._downloadsPath}`);
		}

		// User extra args (last, so they can override)
		if (this.options.extraArgs) {
			args.push(...this.options.extraArgs);
		}

		return {
			headless: this.options.headless ?? true,
			relaxedSecurity: this.options.relaxedSecurity ?? false,
			extraArgs: args,
			windowWidth: width,
			windowHeight: height,
			proxy: this.options.proxy,
			userDataDir: this.options.userDataDir,
			browserBinaryPath: this.options.browserBinaryPath,
			persistAfterClose: this.options.persistAfterClose ?? false,
			channelName: this.options.channelName,
		};
	}
}


================================================
FILE: packages/core/src/viewport/types.ts
================================================
import { z } from 'zod';
import type { TabId } from '../types.js';

export interface TabDescriptor {
	tabId: TabId;
	url: string;
	title: string;
	isActive: boolean;
}

export interface ViewportSnapshot {
	url: string;
	title: string;
	tabs: TabDescriptor[];
	activeTabIndex: number;
	screenshot?: string;
	domTree?: string;
	selectorMap?: Record<number, string>;
	pixelsAbove?: number;
	pixelsBelow?: number;
}

export interface ViewportHistory {
	url: string;
	title: string;
	tabs: TabDescriptor[];
	interactedElements: Array<{
		index: number;
		description: string;
		action: string;
	}>;
	screenshot?: string;
}

export const LaunchOptionsSchema = z.object({
	headless: z.boolean().default(true),
	relaxedSecurity: z.boolean().default(false),
	extraArgs: z.array(z.string()).default([]),
	windowWidth: z.number().default(1280),
	windowHeight: z.number().default(1100),
	proxy: z
		.object({
			server: z.string(),
			username: z.string().optional(),
			password: z.string().optional(),
		})
		.optional(),
	userDataDir: z.string().optional(),
	browserBinaryPath: z.string().optional(),
	persistAfterClose: z.boolean().default(false),
	channelName: z.string().optional(),
});

export type LaunchOptions = z.infer<typeof LaunchOptionsSchema>;

export interface PageState {
	url: string;
	title: string;
	content?: string;
	screenshot?: string;
}


================================================
FILE: packages/core/src/viewport/viewport.ts
================================================
import {
	chromium,
	type Browser,
	type BrowserContext,
	type Page,
	type CDPSession,
} from 'playwright';
import { EventHub } from './event-hub.js';
import type { ViewportEventMap, ViewportRequestMap } from './events.js';
import type { LaunchOptions, ViewportSnapshot, TabDescriptor } from './types.js';
import { LaunchProfile } from './launch-profile.js';
import { BaseGuard, type GuardContext } from './guard-base.js';
import { LaunchFailedError, ViewportCrashedError } from '../errors.js';
import { tabId, targetId, type TargetId } from '../types.js';
import { createLogger } from '../logging.js';
import { timed } from '../telemetry.js';
import { isNewTabPage } from '../utils.js';

// Watchdogs
import { LocalInstanceGuard } from './guards/local-instance.js';
import { UrlPolicyGuard } from './guards/url-policy.js';
import { DefaultHandlerGuard } from './guards/default-handler.js';
import { PopupGuard } from './guards/popups.js';
import { PageReadyGuard } from './guards/page-ready.js';
import { DownloadGuard } from './guards/downloads.js';
import { BlankPageGuard } from './guards/blank-page.js';
import { CrashGuard } from './guards/crash.js';
import { PersistenceGuard } from './guards/persistence.js';
import { ScreenshotGuard } from './guards/screenshot.js';

const logger = createLogger('browser-session');

// ── Multi-target tracking ──

/** Represents a single CDP target (page, iframe, service worker, etc.) */
export interface Target {
	targetId: TargetId;
	type: 'page' | 'iframe' | 'service_worker' | 'worker' | 'other';
	url: string;
	title: string;
}

/** Viewport dimensions as detected via CDP */
export interface ViewportInfo {
	width: number;
	height: number;
	deviceScaleFactor: number;
	isMobile: boolean;
}

export interface ViewportOptions {
	/** Launch options (or use LaunchProfile) */
	launchOptions?: Partial<LaunchOptions>;
	/** Pre-built browser profile */
	profile?: LaunchProfile;
	/** Connect to existing browser via WebSocket URL */
	wsEndpoint?: string;
	/** Connect to existing browser via CDP URL */
	cdpUrl?: string;
	/** Headless mode shortcut */
	headless?: boolean;
	/** Allowed URLs for security watchdog */
	allowedUrls?: string[];
	/** Blocked URLs for security watchdog */
	blockedUrls?: string[];
	/** Storage state file path */
	storageStatePath?: string;
	/** Extra watchdogs */
	watchdogs?: BaseGuard[];
	/** Minimum wait after page load (ms) */
	minWaitPageLoadMs?: number;
	/** Wait for network idle (ms) */
	waitForNetworkIdleMs?: number;
	/** Max wait for page load (ms) */
	maxWaitPageLoadMs?: number;
	/** Max reconnection attempts */
	maxReconnectAttempts?: number;
	/** Delay between reconnection attempts (ms) */
	reconnectDelayMs?: number;
}

export class Viewport {
	private browser: Browser | null = null;
	private context: BrowserContext | null = null;
	private _currentPage: Page | null = null;
	private cdpSession: CDPSession | null = null;

	readonly eventBus: EventHub<ViewportEventMap, ViewportRequestMap>;
	private watchdogs: BaseGuard[] = [];
	private options: ViewportOptions;
	private launchOptions: LaunchOptions;
	private _isConnected = false;

	private readonly minWaitPageLoadMs: number;
	private readonly waitForNetworkIdleMs: number;
	private readonly maxWaitPageLoadMs: number;
	private readonly maxReconnectAttempts: number;
	private readonly reconnectDelayMs: number;

	/** Tracks known CDP targets keyed by targetId */
	private knownTargets = new Map<string, Target>();

	/** Cached viewport info, invalidated on page/tab switch */
	private cachedViewport: ViewportInfo | null = null;

	/** Tracks whether a reconnection is currently in progress */
	private reconnecting = false;

	constructor(options: ViewportOptions = {}) {
		this.options = options;
		this.eventBus = new EventHub({ maxHistory: 200 });

		if (options.profile) {
			this.launchOptions = options.profile.build();
		} else {
			this.launchOptions = {
				headless: options.headless ?? options.launchOptions?.headless ?? true,
				relaxedSecurity: options.launchOptions?.relaxedSecurity ?? false,
				extraArgs: options.launchOptions?.extraArgs ?? [],
				windowWidth: options.launchOptions?.windowWidth ?? 1280,
				windowHeight: options.launchOptions?.windowHeight ?? 1100,
				proxy: options.launchOptions?.proxy,
				userDataDir: options.launchOptions?.userDataDir,
				browserBinaryPath: options.launchOptions?.browserBinaryPath,
				persistAfterClose: options.launchOptions?.persistAfterClose ?? false,
				channelName: options.launchOptions?.channelName,
			};
		}

		this.minWaitPageLoadMs = options.minWaitPageLoadMs ?? 500;
		this.waitForNetworkIdleMs = options.waitForNetworkIdleMs ?? 1000;
		this.maxWaitPageLoadMs = options.maxWaitPageLoadMs ?? 5000;
		this.maxReconnectAttempts = options.maxReconnectAttempts ?? 3;
		this.reconnectDelayMs = options.reconnectDelayMs ?? 1000;
	}

	get isConnected(): boolean {
		return this._isConnected;
	}

	get currentPage(): Page {
		if (!this._currentPage) {
			throw new ViewportCrashedError('No active page');
		}
		return this._currentPage;
	}

	get browserContext(): BrowserContext {
		if (!this.context) {
			throw new ViewportCrashedError('No active browser context');
		}
		return this.context;
	}

	get cdp(): CDPSession | null {
		return this.cdpSession;
	}

	// ── Lifecycle ──

	async start(): Promise<void> {
		const { durationMs } = await timed('browser-session.start', async () => {
			try {
				logger.info('Starting browser session');

				if (this.options.wsEndpoint) {
					logger.debug(`Connecting via WebSocket: ${this.options.wsEndpoint}`);
					this.browser = await chromium.connect(this.options.wsEndpoint);
				} else if (this.options.cdpUrl) {
					logger.debug(`Connecting via CDP: ${this.options.cdpUrl}`);
					this.browser = await chromium.connectOverCDP(this.options.cdpUrl);
				} else {
					this.browser = await this.launchBrowser();
				}

				const contexts = this.browser.contexts();
				if (contexts.length > 0) {
					this.context = contexts[0];
					logger.debug('Reusing existing browser context');
				} else {
					this.context = await this.createContext();
					logger.debug('Created new browser context');
				}

				const pages = this.context.pages();
				if (pages.length > 0) {
					this._currentPage = pages[0];
				} else {
					this._currentPage = await this.context.newPage();
				}

				// Create CDP session
				this.cdpSession = await this._currentPage.context().newCDPSession(this._currentPage);

				this._isConnected = true;

				// Wire up disconnect detection on the browser
				this.setupDisconnectHandler();

				// Discover initial targets
				await this.refreshTargets();

				// Detect initial viewport via CDP
				this.cachedViewport = null;
				await this.detectViewport();

				// Initialize watchdogs
				await this.initializeWatchdogs();

				// Set up page lifecycle listeners on the context
				this.setupPageLifecycleListeners();

				const pageUrl = this._currentPage.url();
				const pageTitle = await this._currentPage.title();

				// Emit initial lifecycle events
				this.eventBus.emit('content-ready', undefined as any);

				if (!isNewTabPage(pageUrl)) {
					this.eventBus.emit('page-ready', { url: pageUrl });
				}

				this.eventBus.emit('viewport-state', {
					url: pageUrl,
					title: pageTitle,
					tabCount: this.context.pages().length,
				});

				logger.info(`Browser session started: ${pageUrl}`);
			} catch (error) {
				throw new LaunchFailedError(
					`Failed to start browser: ${error instanceof Error ? error.message : String(error)}`,
					{ cause: error instanceof Error ? error : undefined },
				);
			}
		});

		logger.debug(`start() completed in ${durationMs.toFixed(1)}ms`);
	}

	private setupDisconnectHandler(): void {
		if (!this.browser) return;

		this.browser.on('disconnected', () => {
			logger.warn('Browser disconnected');
			this._isConnected = false;
			this.eventBus.emit('crash', { reason: 'Browser disconnected unexpectedly' });
		});
	}

	private setupPageLifecycleListeners(): void {
		if (!this.context) return;

		// Track new pages (tabs) being created
		this.context.on('page', async (page: Page) => {
			const url = page.url();
			logger.debug(`New page created: ${url}`);
			this.eventBus.emit('tab-opened', { url });

			// Refresh target list when new pages appear
			await this.refreshTargets();

			// Emit browser-state update
			try {
				this.eventBus.emit('viewport-state', {
					url: this._currentPage?.url() ?? url,
					title: this._currentPage ? await this._currentPage.title() : '',
					tabCount: this.context?.pages().length ?? 1,
				});
			} catch {
				// Page might be closed already
			}

			// When the new page loads, emit page-loaded
			page.on('load', () => {
				const loadedUrl = page.url();
				if (!isNewTabPage(loadedUrl)) {
					logger.debug(`Page loaded in new tab: ${loadedUrl}`);
				}
			});
		});
	}

	// ── Multi-target tracking ──

	/**
	 * Queries CDP for the current list of targets (pages, iframes, workers, etc.)
	 * and updates the internal target map.
	 */
	async getTargets(): Promise<Target[]> {
		await this.refreshTargets();
		return Array.from(this.knownTargets.values());
	}

	private async refreshTargets(): Promise<void> {
		if (!this.cdpSession) return;

		try {
			const result = await (
				this.cdpSession.send('Target.getTargets') as Promise<unknown>
			) as Promise<{ targetInfos: Array<{ targetId: string; type: string; url: string; title: string }> }>;

			const { targetInfos } = await result;

			this.knownTargets.clear();
			for (const info of targetInfos) {
				const type = normalizeTargetType(info.type);
				this.knownTargets.set(info.targetId, {
					targetId: targetId(info.targetId),
					type,
					url: info.url,
					title: info.title,
				});
			}

			logger.debug(`Refreshed targets: ${this.knownTargets.size} found`);
		} catch (error) {
			logger.debug(
				`Failed to refresh targets: ${error instanceof Error ? error.message : String(error)}`,
			);
		}
	}

	/**
	 * Find a target by its targetId.
	 */
	findTarget(id: TargetId): Target | undefined {
		return this.knownTargets.get(id);
	}

	/**
	 * Get only page-type targets, filtering out new-tab pages.
	 */
	async getPageTargets(): Promise<Target[]> {
		const targets = await this.getTargets();
		return targets.filter((t) => t.type === 'page' && !isNewTabPage(t.url));
	}

	// ── Viewport detection via CDP ──

	/**
	 * Detects the actual viewport dimensions by evaluating JavaScript in the page
	 * via CDP Runtime.evaluate. This is more accurate than Playwright's viewportSize()
	 * because it reflects the real rendered viewport including device pixel ratio.
	 */
	async detectViewport(): Promise<ViewportInfo> {
		if (this.cachedViewport) {
			return this.cachedViewport;
		}

		if (!this.cdpSession) {
			// Fallback to launch options if no CDP session
			const fallback: ViewportInfo = {
				width: this.launchOptions.windowWidth,
				height: this.launchOptions.windowHeight,
				deviceScaleFactor: 1,
				isMobile: false,
			};
			this.cachedViewport = fallback;
			return fallback;
		}

		try {
			const { result: viewportResult } = await timed('detectViewport', async () => {
				const evalResult = await (
					this.cdpSession!.send('Runtime.evaluate', {
						expression: `JSON.stringify({
							width: window.innerWidth,
							height: window.innerHeight,
							deviceScaleFactor: window.devicePixelRatio || 1,
							isMobile: /Mobi|Android/i.test(navigator.userAgent)
						})`,
						returnByValue: true,
					}) as Promise<unknown>
				) as Promise<{ result: { value: string } }>;
				return evalResult;
			});

			const parsed = JSON.parse(viewportResult.result.value) as ViewportInfo;
			this.cachedViewport = parsed;
			logger.debug(
				`Viewport detected: ${parsed.width}x${parsed.height} @${parsed.deviceScaleFactor}x`,
			);
			return parsed;
		} catch (error) {
			logger.warn(
				`Viewport detection failed, using defaults: ${error instanceof Error ? error.message : String(error)}`,
			);
			const fallback: ViewportInfo = {
				width: this.launchOptions.windowWidth,
				height: this.launchOptions.windowHeight,
				deviceScaleFactor: 1,
				isMobile: false,
			};
			this.cachedViewport = fallback;
			return fallback;
		}
	}

	/** Invalidates the cached viewport, forcing a fresh CDP detection on next access. */
	invalidateViewportCache(): void {
		this.cachedViewport = null;
	}

	// ── Reconnection logic ──

	/**
	 * Attempts to reconnect to the browser after a disconnect. Uses the original
	 * connection method (wsEndpoint, cdpUrl, or local launch). Retries up to
	 * maxReconnectAttempts with exponential backoff.
	 *
	 * Returns true if reconnection succeeded, false otherwise.
	 */
	async reconnect(): Promise<boolean> {
		if (this.reconnecting) {
			logger.warn('Reconnection already in progress, skipping');
			return false;
		}

		this.reconnecting = true;
		logger.info('Attempting to reconnect browser session');

		try {
			// Clean up current state without emitting close event
			await this.cleanupForReconnect();

			let delay = this.reconnectDelayMs;

			for (let attempt = 1; attempt <= this.maxReconnectAttempts; attempt++) {
				logger.info(`Reconnect attempt ${attempt}/${this.maxReconnectAttempts}`);

				try {
					if (this.options.wsEndpoint) {
						this.browser = await chromium.connect(this.options.wsEndpoint);
					} else if (this.options.cdpUrl) {
						this.browser = await chromium.connectOverCDP(this.options.cdpUrl);
					} else {
						// For locally launched browsers, we need to launch a new instance
						this.browser = await this.launchBrowser();
					}

					// Re-establish context
					const contexts = this.browser.contexts();
					if (contexts.length > 0) {
						this.context = contexts[0];
					} else {
						this.context = await this.createContext();
					}

					// Re-establish page
					const pages = this.context.pages();
					if (pages.length > 0) {
						this._currentPage = pages[0];
					} else {
						this._currentPage = await this.context.newPage();
					}

					// Re-create CDP session
					this.cdpSession = await this._currentPage.context().newCDPSession(this._currentPage);

					this._isConnected = true;
					this.cachedViewport = null;

					// Re-wire handlers
					this.setupDisconnectHandler();
					this.setupPageLifecycleListeners();

					// Refresh targets after reconnect
					await this.refreshTargets();

					// Re-initialize watchdogs
					await this.initializeWatchdogs();

					logger.info(`Reconnected successfully on attempt ${attempt}`);

					// Emit lifecycle events for the reconnected state
					const url = this._currentPage.url();
					const title = await this._currentPage.title();

					this.eventBus.emit('viewport-state', {
						url,
						title,
						tabCount: this.context.pages().length,
					});

					return true;
				} catch (error) {
					logger.warn(
						`Reconnect attempt ${attempt} failed: ${error instanceof Error ? error.message : String(error)}`,
					);

					if (attempt < this.maxReconnectAttempts) {
						await new Promise((resolve) => setTimeout(resolve, delay));
						delay *= 2; // Exponential backoff
					}
				}
			}

			logger.error(`All ${this.maxReconnectAttempts} reconnect attempts failed`);
			this.eventBus.emit('crash', { reason: 'Reconnection failed after all attempts' });
			return false;
		} finally {
			this.reconnecting = false;
		}
	}

	/**
	 * Cleans up internal state in preparation for a reconnect attempt,
	 * without emitting lifecycle events or clearing the event bus.
	 */
	private async cleanupForReconnect(): Promise<void> {
		// Detach watchdogs
		for (const watchdog of this.watchdogs) {
			try {
				await watchdog.detach();
			} catch {
				// Ignore detach errors during reconnect
			}
		}
		this.watchdogs = [];

		// Detach CDP session
		if (this.cdpSession) {
			try {
				await this.cdpSession.detach();
			} catch {
				// Ignore
			}
			this.cdpSession = null;
		}

		// Don't close the browser if connecting remotely -- it's already disconnected
		if (this.browser && !this.options.wsEndpoint && !this.options.cdpUrl) {
			try {
				await this.browser.close();
			} catch {
				// Ignore
			}
		}

		this.browser = null;
		this.context = null;
		this._currentPage = null;
		this._isConnected = false;
		this.knownTargets.clear();
		this.cachedViewport = null;
	}

	// ── DOM stability ──

	/**
	 * Waits for the DOM to stop mutating. Uses a MutationObserver injected via
	 * page.evaluate to detect when no DOM changes occur for a quiet period.
	 *
	 * @param timeout - Maximum time to wait in ms (default: 3000)
	 * @param quietPeriodMs - How long the DOM must be silent to be considered stable (default: 300)
	 */
	async waitForStableDOM(timeout = 3000, quietPeriodMs = 300): Promise<void> {
		const page = this.currentPage;

		const { durationMs } = await timed('waitForStableDOM', async () => {
			try {
				await page.evaluate(
					({ timeoutMs, quietMs }) => {
						return new Promise<void>((resolve) => {
							let timer: ReturnType<typeof setTimeout>;
							let overallTimer: ReturnType<typeof setTimeout>;

							const observer = new MutationObserver(() => {
								clearTimeout(timer);
								timer = setTimeout(() => {
									observer.disconnect();
									clearTimeout(overallTimer);
									resolve();
								}, quietMs);
							});

							observer.observe(document.body, {
								childList: true,
								subtree: true,
								attributes: true,
								characterData: true,
							});

							// Start the quiet period timer immediately -- if no mutations
							// happen at all, we resolve after quietMs
							timer = setTimeout(() => {
								observer.disconnect();
								clearTimeout(overallTimer);
								resolve();
							}, quietMs);

							// Overall timeout: resolve even if mutations keep happening
							overallTimer = setTimeout(() => {
								observer.disconnect();
								clearTimeout(timer);
								resolve();
							}, timeoutMs);
						});
					},
					{ timeoutMs: timeout, quietMs: quietPeriodMs },
				);
			} catch (error) {
				// If the page navigated or was closed, just return
				logger.debug(
					`waitForStableDOM interrupted: ${error instanceof Error ? error.message : String(error)}`,
				);
			}
		});

		logger.debug(`DOM stabilized in ${durationMs.toFixed(1)}ms`);
	}

	// ── Visible HTML extraction ──

	/**
	 * Returns the HTML of elements currently visible in the viewport.
	 * Uses IntersectionObserver logic evaluated in-page to collect only
	 * elements that are within the visible area, then serializes them.
	 */
	async getVisibleHtml(): Promise<string> {
		const page = this.currentPage;

		const { result: html } = await timed('getVisibleHtml', async () => {
			return page.evaluate(() => {
				function isInViewport(el: Element): boolean {
					const rect = el.getBoundingClientRect();
					// Element is at least partially visible
					return (
						rect.bottom > 0 &&
						rect.right > 0 &&
						rect.top < window.innerHeight &&
						rect.left < window.innerWidth &&
						rect.width > 0 &&
						rect.height > 0
					);
				}

				function isVisible(el: Element): boolean {
					const style = window.getComputedStyle(el);
					return (
						style.display !== 'none' &&
						style.visibility !== 'hidden' &&
						style.opacity !== '0' &&
						isInViewport(el)
					);
				}

				// Walk the DOM and collect visible top-level elements
				const visibleParts: string[] = [];
				const body = document.body;
				if (!body) return '<body></body>';

				// Collect direct children of body that are visible,
				// or recurse one level for major containers
				for (const child of Array.from(body.children)) {
					if (isVisible(child)) {
						// Clone the element and remove hidden descendants
						const clone = child.cloneNode(true) as Element;
						const hiddenDescendants = Array.from(clone.querySelectorAll('*')).filter(
							(desc) => {
								const s = window.getComputedStyle(desc);
								return s.display === 'none' || s.visibility === 'hidden';
							},
						);
						for (const hidden of hiddenDescendants) {
							hidden.remove();
						}
						visibleParts.push(clone.outerHTML);
					}
				}

				if (visibleParts.length === 0) {
					// Fallback: return the body's innerHTML truncated
					return body.innerHTML.slice(0, 50000);
				}

				return visibleParts.join('\n');
			});
		});

		return html;
	}

	// ── Launch & context setup (existing) ──

	private async launchBrowser(): Promise<Browser> {
		const args = this.buildChromiumArgs();

		logger.debug(`Launching chromium with ${args.length} args`);

		return chromium.launch({
			headless: this.launchOptions.headless,
			args,
			executablePath: this.launchOptions.browserBinaryPath || undefined,
			channel: this.launchOptions.channelName || undefined,
			proxy: this.launchOptions.proxy
				? {
						server: this.launchOptions.proxy.server,
						username: this.launchOptions.proxy.username,
						password: this.launchOptions.proxy.password,
					}
				: undefined,
		});
	}

	private buildChromiumArgs(): string[] {
		const args = [
			`--window-size=${this.launchOptions.windowWidth},${this.launchOptions.windowHeight}`,
			...this.launchOptions.extraArgs,
		];

		if (this.launchOptions.relaxedSecurity) {
			args.push(
				'--disable-web-security',
				'--disable-site-isolation-trials',
				'--disable-features=IsolateOrigins,site-per-process',
			);
		}

		return args;
	}

	private async createContext(): Promise<BrowserContext> {
		const context = await this.browser!.newContext({
			viewport: {
				width: this.launchOptions.windowWidth,
				height: this.launchOptions.windowHeight,
			},
			userAgent: undefined, // Use default
			javaScriptEnabled: true,
			ignoreHTTPSErrors: this.launchOptions.relaxedSecurity,
			acceptDownloads: true,
		});

		return context;
	}

	private async initializeWatchdogs(): Promise<void> {
		const ctx: GuardContext = {
			page: this._currentPage!,
			context: this.context!,
			eventBus: this.eventBus,
		};

		// Create default watchdogs
		this.watchdogs = [
			new LocalInstanceGuard(),
			new UrlPolicyGuard(this.options.allowedUrls, this.options.blockedUrls),
			new DefaultHandlerGuard(),
			new PopupGuard(),
			new PageReadyGuard(),
			new DownloadGuard(),
			new BlankPageGuard(),
			new CrashGuard(),
			new ScreenshotGuard(),
			...(this.options.watchdogs ?? []),
		];

		if (this.options.storageStatePath) {
			this.watchdogs.push(new PersistenceGuard(this.options.storageStatePath));
		}

		// Sort by priority (lower = higher priority)
		this.watchdogs.sort((a, b) => a.priority - b.priority);

		// Attach all watchdogs
		for (const watchdog of this.watchdogs) {
			await watchdog.attach(ctx);
		}

		logger.debug(`Initialized ${this.watchdogs.length} watchdogs`);
	}

	// ── Navigation & interaction (existing, enhanced) ──

	async navigate(url: string): Promise<void> {
		const page = this.currentPage;

		logger.debug(`Navigating to: ${url}`);

		try {
			await page.goto(url, {
				waitUntil: 'domcontentloaded',
				timeout: this.maxWaitPageLoadMs,
			});
		} catch (error) {
			// Timeout is OK, page might still be loading
			if (error instanceof Error && !error.message.includes('Timeout')) {
				throw error;
			}
		}

		await this.waitForPageReady();

		// Invalidate viewport cache after navigation (page dimensions may change)
		this.cachedViewport = null;

		// Refresh targets (navigation may create/destroy targets)
		await this.refreshTargets();

		this.eventBus.emit('page-ready', { url: page.url() });
		this.eventBus.emit('viewport-state', {
			url: page.url(),
			title: await page.title(),
			tabCount: this.context!.pages().length,
		});
	}

	async waitForPageReady(): Promise<void> {
		const page = this.currentPage;

		// Minimum wait
		await new Promise((resolve) => setTimeout(resolve, this.minWaitPageLoadMs));

		// Wait for network idle
		try {
			await page.waitForLoadState('networkidle', {
				timeout: this.waitForNetworkIdleMs,
			});
		} catch {
			// Timeout is OK
		}
	}

	async click(selector: string): Promise<void> {
		await this.currentPage.click(selector, { timeout: 5000 });
	}

	async type(selector: string, text: string): Promise<void> {
		await this.currentPage.fill(selector, text);
	}

	async pressKey(key: string): Promise<void> {
		await this.currentPage.keyboard.press(key);
	}

	async screenshot(fullPage = false): Promise<{ base64: string; width: number; height: number }> {
		const page = this.currentPage;
		const buffer = await page.screenshot({
			fullPage,
			type: 'png',
		});
		const base64 = buffer.toString('base64');
		const viewport = page.viewportSize();

		return {
			base64,
			width: viewport?.width ?? this.launchOptions.windowWidth,
			height: viewport?.height ?? this.launchOptions.windowHeight,
		};
	}

	async getState(): Promise<ViewportSnapshot> {
		const page = this.currentPage;
		const pages = this.context!.pages();
		const activeIndex = pages.indexOf(page);

		const tabs: TabDescriptor[] = pages.map((p, i) => ({
			tabId: tabId(i),
			url: p.url(),
			title: '', // Will be populated async
			isActive: i === activeIndex,
		}));

		// Get titles in parallel
		await Promise.all(
			tabs.map(async (tab, i) => {
				try {
					tab.title = await pages[i].title();
				} catch {
					tab.title = '';
				}
			}),
		);

		return {
			url: page.url(),
			title: await page.title(),
			tabs,
			activeTabIndex: activeIndex,
		};
	}

	async switchTab(tabIndex: number): Promise<void> {
		const pages = this.context!.pages();
		if (tabIndex < 0 || tabIndex >= pages.length) {
			throw new Error(`Invalid tab index: ${tabIndex}. Available tabs: ${pages.length}`);
		}

		this._currentPage = pages[tabIndex];
		await this._currentPage.bringToFront();

		// Re-create CDP session for new page
		this.cdpSession = await this._currentPage.context().newCDPSession(this._currentPage);

		// Invalidate viewport cache when switching tabs
		this.cachedViewport = null;

		// Refresh target list
		await this.refreshTargets();

		this.eventBus.emit('tab-changed', { tabIndex });
	}

	async closeTab(tabIndex?: number): Promise<void> {
		const pages = this.context!.pages();
		const index = tabIndex ?? pages.indexOf(this.currentPage);

		if (pages.length <= 1) {
			throw new Error('Cannot close the last tab');
		}

		const pageToClose = pages[index];
		await pageToClose.close();

		// Switch to remaining page
		const remainingPages = this.context!.pages();
		if (remainingPages.length > 0) {
			const newIndex = Math.min(index, remainingPages.length - 1);
			this._currentPage = remainingPages[newIndex];
			await this._currentPage.bringToFront();
			this.cdpSession = await this._currentPage.context().newCDPSession(this._currentPage);
		}

		// Invalidate caches
		this.cachedViewport = null;

		// Refresh targets after closing a tab
		await this.refreshTargets();

		this.eventBus.emit('tab-closed', { tabIndex: index });
	}

	async newTab(url?: string): Promise<void> {
		const page = await this.context!.newPage();
		this._currentPage = page;

		if (url) {
			await this.navigate(url);
		}

		this.cdpSession = await this._currentPage.context().newCDPSession(this._currentPage);

		// Invalidate caches
		this.cachedViewport = null;
	}

	async evaluate<T>(expression: string): Promise<T> {
		return this.currentPage.evaluate(expression) as Promise<T>;
	}

	async setPage(page: Page): Promise<void> {
		this._currentPage = page;
		this.cdpSession = await page.context().newCDPSession(page);
		this.cachedViewport = null;
	}

	// ── Cleanup ──

	async close(): Promise<void> {
		logger.info('Closing browser session');

		// Detach all watchdogs
		for (const watchdog of this.watchdogs) {
			await watchdog.detach();
		}
		this.watchdogs = [];

		// Close CDP session
		if (this.cdpSession) {
			try {
				await this.cdpSession.detach();
			} catch {
				// Ignore
			}
			this.cdpSession = null;
		}

		// Close browser
		if (this.browser && !this.launchOptions.persistAfterClose) {
			try {
				await this.browser.close();
			} catch {
				// Ignore
			}
		}

		this.browser = null;
		this.context = null;
		this._currentPage = null;
		this._isConnected = false;
		this.knownTargets.clear();
		this.cachedViewport = null;

		this.eventBus.emit('shutdown', undefined as any);
		this.eventBus.removeAllListeners();

		logger.info('Browser session closed');
	}

	// AsyncDisposable support
	async [Symbol.asyncDispose](): Promise<void> {
		await this.close();
	}
}

// ── Helpers ──

/**
 * Normalizes a CDP target type string to our Target type union.
 */
function normalizeTargetType(
	cdpType: string,
): 'page' | 'iframe' | 'service_worker' | 'worker' | 'other' {
	switch (cdpType) {
		case 'page':
			return 'page';
		case 'iframe':
			return 'iframe';
		case 'service_worker':
			return 'service_worker';
		case 'worker':
		case 'shared_worker':
			return 'worker';
		default:
			return 'other';
	}
}


================================================
FILE: packages/core/src/viewport/visual-tracer.ts
================================================
import type { Page } from 'playwright';

export interface VisualTracerOptions {
	highlightColor?: string;
	highlightDuration?: number;
	annotationFontSize?: number;
	showTimeline?: boolean;
	showCoordinates?: boolean;
	actionColors?: Record<string, string>;
}

const DEFAULT_OPTIONS: Required<VisualTracerOptions> = {
	highlightColor: 'rgba(255, 0, 0, 0.3)',
	highlightDuration: 2000,
	annotationFontSize: 14,
	showTimeline: false,
	showCoordinates: false,
	actionColors: {
		click: '#ff4444',
		scroll: '#44aaff',
		type: '#44cc44',
		navigate: '#ff9900',
		default: '#aa44ff',
	},
};

const OVERLAY_ATTR = 'data-demo-mode-overlay';

export class VisualTracer {
	private options: Required<VisualTracerOptions>;

	constructor(options?: VisualTracerOptions) {
		this.options = {
			...DEFAULT_OPTIONS,
			...options,
			actionColors: { ...DEFAULT_OPTIONS.actionColors, ...options?.actionColors },
		};
	}

	// ───────────────────────────────────────────
	// Existing methods
	// ───────────────────────────────────────────

	async highlightElement(page: Page, selector: string, label?: string): Promise<void> {
		await page.evaluate(
			({ selector, color, duration, label, fontSize, attr }) => {
				const element = document.querySelector(selector);
				if (!element) return;

				const rect = element.getBoundingClientRect();
				const overlay = document.createElement('div');
				overlay.setAttribute(attr, '');
				overlay.style.cssText = `
					position: fixed;
					left: ${rect.left}px;
					top: ${rect.top}px;
					width: ${rect.width}px;
					height: ${rect.height}px;
					background: ${color};
					border: 2px solid red;
					pointer-events: none;
					z-index: 999999;
					transition: opacity 0.3s;
				`;

				if (label) {
					const labelEl = document.createElement('div');
					labelEl.textContent = label;
					labelEl.style.cssText = `
						position: absolute;
						top: -24px;
						left: 0;
						background: red;
						color: white;
						padding: 2px 6px;
						font-size: ${fontSize}px;
						font-family: monospace;
						border-radius: 3px;
						white-space: nowrap;
					`;
					overlay.appendChild(labelEl);
				}

				document.body.appendChild(overlay);
				setTimeout(() => {
					overlay.style.opacity = '0';
					setTimeout(() => overlay.remove(), 300);
				}, duration);
			},
			{
				selector,
				color: this.options.highlightColor,
				duration: this.options.highlightDuration,
				label,
				fontSize: this.options.annotationFontSize,
				attr: OVERLAY_ATTR,
			},
		);
	}

	async showAction(page: Page, action: string, details?: string): Promise<void> {
		await page.evaluate(
			({ action, details, fontSize, attr }) => {
				const toast = document.createElement('div');
				toast.setAttribute(attr, '');
				toast.style.cssText = `
					position: fixed;
					bottom: 20px;
					right: 20px;
					background: rgba(0, 0, 0, 0.8);
					color: white;
					padding: 12px 20px;
					border-radius: 8px;
					font-family: monospace;
					font-size: ${fontSize}px;
					z-index: 999999;
					max-width: 400px;
					transition: opacity 0.3s;
				`;
				toast.innerHTML = `<strong>${action}</strong>${details ? `<br>${details}` : ''}`;

				document.body.appendChild(toast);
				setTimeout(() => {
					toast.style.opacity = '0';
					setTimeout(() => toast.remove(), 300);
				}, 2000);
			},
			{ action, details, fontSize: this.options.annotationFontSize, attr: OVERLAY_ATTR },
		);
	}

	// ───────────────────────────────────────────
	// Action-specific visual overlays
	// ───────────────────────────────────────────

	/**
	 * Shows an expanding circle animation at the given click coordinates.
	 * Optionally displays a label next to the click point.
	 */
	async highlightClick(page: Page, x: number, y: number, label?: string): Promise<void> {
		await page.evaluate(
			({ x, y, label, color, duration, fontSize, attr }) => {
				const container = document.createElement('div');
				container.setAttribute(attr, '');
				container.style.cssText = `
					position: fixed;
					left: 0;
					top: 0;
					width: 100%;
					height: 100%;
					pointer-events: none;
					z-index: 999999;
				`;

				// Inject keyframes for the expanding ring
				const styleEl = document.createElement('style');
				styleEl.textContent = `
					@keyframes demo-click-ring {
						0% { transform: translate(-50%, -50%) scale(0); opacity: 1; }
						70% { opacity: 0.6; }
						100% { transform: translate(-50%, -50%) scale(1); opacity: 0; }
					}
				`;
				container.appendChild(styleEl);

				// Create three staggered rings for a ripple effect
				for (let i = 0; i < 3; i++) {
					const ring = document.createElement('div');
					ring.style.cssText = `
						position: fixed;
						left: ${x}px;
						top: ${y}px;
						width: 60px;
						height: 60px;
						border: 3px solid ${color};
						border-radius: 50%;
						pointer-events: none;
						animation: demo-click-ring ${duration * 0.6}ms ease-out ${i * 120}ms forwards;
					`;
					container.appendChild(ring);
				}

				// Small filled dot at click center
				const dot = document.createElement('div');
				dot.style.cssText = `
					position: fixed;
					left: ${x}px;
					top: ${y}px;
					width: 10px;
					height: 10px;
					background: ${color};
					border-radius: 50%;
					transform: translate(-50%, -50%);
					pointer-events: none;
					transition: opacity 0.3s;
				`;
				container.appendChild(dot);

				// Optional label
				if (label) {
					const labelEl = document.createElement('div');
					labelEl.textContent = label;
					labelEl.style.cssText = `
						position: fixed;
						left: ${x + 16}px;
						top: ${y - 12}px;
						background: ${color};
						color: white;
						padding: 2px 8px;
						font-size: ${fontSize}px;
						font-family: monospace;
						border-radius: 3px;
						white-space: nowrap;
						pointer-events: none;
					`;
					container.appendChild(labelEl);
				}

				document.body.appendChild(container);
				setTimeout(() => {
					container.style.opacity = '0';
					setTimeout(() => container.remove(), 300);
				}, duration);
			},
			{
				x,
				y,
				label,
				color: this.options.actionColors.click,
				duration: this.options.highlightDuration,
				fontSize: this.options.annotationFontSize,
				attr: OVERLAY_ATTR,
			},
		);
	}

	/**
	 * Shows an arrow animation indicating the scroll direction.
	 */
	async highlightScroll(page: Page, direction: 'up' | 'down'): Promise<void> {
		await page.evaluate(
			({ direction, color, duration, fontSize, attr }) => {
				const container = document.createElement('div');
				container.setAttribute(attr, '');
				container.style.cssText = `
					position: fixed;
					left: 0;
					top: 0;
					width: 100%;
					height: 100%;
					pointer-events: none;
					z-index: 999999;
				`;

				const styleEl = document.createElement('style');
				const translateY = direction === 'up' ? '-40px' : '40px';
				styleEl.textContent = `
					@keyframes demo-scroll-arrow {
						0% { opacity: 0; transform: translateX(-50%) translateY(0); }
						30% { opacity: 1; }
						100% { opacity: 0; transform: translateX(-50%) translateY(${translateY}); }
					}
				`;
				container.appendChild(styleEl);

				const arrowChar = direction === 'up' ? '\u25B2' : '\u25BC';

				// Show three staggered arrows along the right side
				for (let i = 0; i < 3; i++) {
					const arrow = document.createElement('div');
					const topOffset = direction === 'up' ? 60 + i * 40 : 40 + i * 40;
					arrow.textContent = arrowChar;
					arrow.style.cssText = `
						position: fixed;
						right: 30px;
						top: ${topOffset}%;
						transform: translateX(-50%);
						color: ${color};
						font-size: ${fontSize * 2}px;
						pointer-events: none;
						animation: demo-scroll-arrow ${duration * 0.5}ms ease-out ${i * 150}ms forwards;
					`;
					container.appendChild(arrow);
				}

				// Direction label
				const label = document.createElement('div');
				label.textContent = `Scroll ${direction}`;
				label.style.cssText = `
					position: fixed;
					right: 12px;
					top: 50%;
					transform: translateY(-50%);
					background: ${color};
					color: white;
					padding: 4px 12px;
					font-size: ${fontSize}px;
					font-family: monospace;
					border-radius: 4px;
					pointer-events: none;
					transition: opacity 0.3s;
				`;
				container.appendChild(label);

				document.body.appendChild(container);
				setTimeout(() => {
					container.style.opacity = '0';
					setTimeout(() => container.remove(), 300);
				}, duration);
			},
			{
				direction,
				color: this.options.actionColors.scroll,
				duration: this.options.highlightDuration,
				fontSize: this.options.annotationFontSize,
				attr: OVERLAY_ATTR,
			},
		);
	}

	/**
	 * Shows a keyboard icon animation near the target element with a preview of the text being typed.
	 */
	async highlightType(page: Page, selector: string, text: string): Promise<void> {
		await page.evaluate(
			({ selector, text, color, duration, fontSize, attr }) => {
				const element = document.querySelector(selector);
				if (!element) return;

				const rect = element.getBoundingClientRect();

				const container = document.createElement('div');
				container.setAttribute(attr, '');
				container.style.cssText = `
					position: fixed;
					left: 0;
					top: 0;
					width: 100%;
					height: 100%;
					pointer-events: none;
					z-index: 999999;
				`;

				const styleEl = document.createElement('style');
				styleEl.textContent = `
					@keyframes demo-type-blink {
						0%, 100% { border-right-color: transparent; }
						50% { border-right-color: white; }
					}
					@keyframes demo-type-fadein {
						0% { opacity: 0; transform: translateY(4px); }
						100% { opacity: 1; transform: translateY(0); }
					}
				`;
				container.appendChild(styleEl);

				// Highlight the target element
				const highlight = document.createElement('div');
				highlight.style.cssText = `
					position: fixed;
					left: ${rect.left - 2}px;
					top: ${rect.top - 2}px;
					width: ${rect.width + 4}px;
					height: ${rect.height + 4}px;
					border: 2px solid ${color};
					border-radius: 3px;
					pointer-events: none;
					transition: opacity 0.3s;
				`;
				container.appendChild(highlight);

				// Keyboard icon (simplified as a unicode symbol + label)
				const kbIcon = document.createElement('div');
				kbIcon.style.cssText = `
					position: fixed;
					left: ${rect.left}px;
					top: ${rect.bottom + 6}px;
					display: flex;
					align-items: center;
					gap: 6px;
					animation: demo-type-fadein 0.2s ease-out forwards;
					pointer-events: none;
				`;

				const iconSpan = document.createElement('span');
				iconSpan.textContent = '\u2328';
				iconSpan.style.cssText = `
					font-size: ${fontSize * 1.4}px;
					color: ${color};
				`;
				kbIcon.appendChild(iconSpan);

				// Text preview bubble with blinking cursor
				const textBubble = document.createElement('div');
				const truncated = text.length > 40 ? `${text.slice(0, 37)}...` : text;
				textBubble.textContent = truncated;
				textBubble.style.cssText = `
					background: ${color};
					color: white;
					padding: 3px 10px;
					font-size: ${fontSize}px;
					font-family: monospace;
					border-radius: 4px;
					white-space: nowrap;
					border-right: 2px solid white;
					animation: demo-type-blink 0.7s step-end infinite;
				`;
				kbIcon.appendChild(textBubble);

				container.appendChild(kbIcon);

				document.body.appendChild(container);
				setTimeout(() => {
					container.style.opacity = '0';
					setTimeout(() => container.remove(), 300);
				}, duration);
			},
			{
				selector,
				text,
				color: this.options.actionColors.type,
				duration: this.options.highlightDuration,
				fontSize: this.options.annotationFontSize,
				attr: OVERLAY_ATTR,
			},
		);
	}

	/**
	 * Shows a URL bar-like overlay at the top of the viewport to indicate navigation.
	 */
	async highlightNavigation(page: Page, url: string): Promise<void> {
		await page.evaluate(
			({ url, color, duration, fontSize, attr }) => {
				const container = document.createElement('div');
				container.setAttribute(attr, '');
				container.style.cssText = `
					position: fixed;
					left: 0;
					top: 0;
					width: 100%;
					height: 100%;
					pointer-events: none;
					z-index: 999999;
				`;

				const styleEl = document.createElement('style');
				styleEl.textContent = `
					@keyframes demo-nav-slide {
						0% { transform: translateY(-100%); opacity: 0; }
						15% { transform: translateY(0); opacity: 1; }
						85% { transform: translateY(0); opacity: 1; }
						100% { transform: translateY(-100%); opacity: 0; }
					}
					@keyframes demo-nav-progress {
						0% { width: 0%; }
						100% { width: 100%; }
					}
				`;
				container.appendChild(styleEl);

				// URL bar
				const bar = document.createElement('div');
				bar.style.cssText = `
					position: fixed;
					top: 0;
					left: 0;
					right: 0;
					background: rgba(0, 0, 0, 0.9);
					padding: 10px 16px;
					display: flex;
					align-items: center;
					gap: 10px;
					animation: demo-nav-slide ${duration}ms ease-in-out forwards;
					border-bottom: 2px solid ${color};
				`;

				// Globe icon
				const globe = document.createElement('span');
				globe.textContent = '\uD83C\uDF10';
				globe.style.cssText = `font-size: ${fontSize * 1.2}px;`;
				bar.appendChild(globe);

				// URL text in a pill
				const urlPill = document.createElement('div');
				urlPill.style.cssText = `
					flex: 1;
					background: rgba(255, 255, 255, 0.1);
					border: 1px solid rgba(255, 255, 255, 0.2);
					border-radius: 20px;
					padding: 6px 14px;
					color: white;
					font-size: ${fontSize}px;
					font-family: monospace;
					white-space: nowrap;
					overflow: hidden;
					text-overflow: ellipsis;
				`;
				urlPill.textContent = url;
				bar.appendChild(urlPill);

				// Navigate label
				const label = document.createElement('div');
				label.textContent = 'Navigate';
				label.style.cssText = `
					background: ${color};
					color: white;
					padding: 4px 10px;
					font-size: ${fontSize - 2}px;
					font-family: monospace;
					border-radius: 4px;
					white-space: nowrap;
				`;
				bar.appendChild(label);

				container.appendChild(bar);

				// Progress bar
				const progress = document.createElement('div');
				progress.style.cssText = `
					position: fixed;
					top: 0;
					left: 0;
					height: 3px;
					background: ${color};
					animation: demo-nav-progress ${duration * 0.7}ms ease-out forwards;
					z-index: 1;
				`;
				container.appendChild(progress);

				document.body.appendChild(container);
				setTimeout(() => container.remove(), duration + 100);
			},
			{
				url,
				color: this.options.actionColors.navigate,
				duration: this.options.highlightDuration,
				fontSize: this.options.annotationFontSize,
				attr: OVERLAY_ATTR,
			},
		);
	}

	// ───────────────────────────────────────────
	// Multi-element and composite overlays
	// ───────────────────────────────────────────

	/**
	 * Highlights multiple elements with numbered labels, useful for showing a sequence of targets.
	 */
	async showElementSequence(
		page: Page,
		elements: Array<{ selector: string; label: string }>,
	): Promise<void> {
		await page.evaluate(
			({ elements, color, duration, fontSize, attr }) => {
				const container = document.createElement('div');
				container.setAttribute(attr, '');
				container.style.cssText = `
					position: fixed;
					left: 0;
					top: 0;
					width: 100%;
					height: 100%;
					pointer-events: none;
					z-index: 999999;
				`;

				const styleEl = document.createElement('style');
				styleEl.textContent = `
					@keyframes demo-seq-appear {
						0% { transform: scale(0); opacity: 0; }
						60% { transform: scale(1.15); }
						100% { transform: scale(1); opacity: 1; }
					}
				`;
				container.appendChild(styleEl);

				// Draw connecting lines between sequential elements
				const rects: DOMRect[] = [];
				for (const { selector } of elements) {
					const el = document.querySelector(selector);
					if (el) {
						rects.push(el.getBoundingClientRect());
					} else {
						rects.push(new DOMRect(0, 0, 0, 0));
					}
				}

				// SVG for connecting lines
				if (rects.length > 1) {
					const svg = document.createElementNS('http://www.w3.org/2000/svg', 'svg');
					svg.style.cssText = `
						position: fixed;
						left: 0;
						top: 0;
						width: 100%;
						height: 100%;
						pointer-events: none;
					`;
					for (let i = 0; i < rects.length - 1; i++) {
						const from = rects[i];
						const to = rects[i + 1];
						if (from.width === 0 || to.width === 0) continue;
						const line = document.createElementNS('http://www.w3.org/2000/svg', 'line');
						line.setAttribute('x1', String(from.left + from.width / 2));
						line.setAttribute('y1', String(from.top + from.height / 2));
						line.setAttribute('x2', String(to.left + to.width / 2));
						line.setAttribute('y2', String(to.top + to.height / 2));
						line.setAttribute('stroke', color);
						line.setAttribute('stroke-width', '2');
						line.setAttribute('stroke-dasharray', '6,4');
						line.setAttribute('opacity', '0.5');
						svg.appendChild(line);
					}
					container.appendChild(svg);
				}

				// Numbered badges and highlight boxes for each element
				elements.forEach(({ selector, label }, index) => {
					const el = document.querySelector(selector);
					if (!el) return;

					const rect = el.getBoundingClientRect();

					// Highlight box
					const box = document.createElement('div');
					box.style.cssText = `
						position: fixed;
						left: ${rect.left - 3}px;
						top: ${rect.top - 3}px;
						width: ${rect.width + 6}px;
						height: ${rect.height + 6}px;
						border: 2px solid ${color};
						border-radius: 4px;
						pointer-events: none;
						animation: demo-seq-appear 0.3s ease-out ${index * 150}ms both;
					`;
					container.appendChild(box);

					// Numbered badge
					const badge = document.createElement('div');
					badge.style.cssText = `
						position: fixed;
						left: ${rect.left - 12}px;
						top: ${rect.top - 12}px;
						width: 24px;
						height: 24px;
						background: ${color};
						color: white;
						border-radius: 50%;
						display: flex;
						align-items: center;
						justify-content: center;
						font-size: ${fontSize - 2}px;
						font-family: monospace;
						font-weight: bold;
						pointer-events: none;
						animation: demo-seq-appear 0.3s ease-out ${index * 150}ms both;
					`;
					badge.textContent = String(index + 1);
					container.appendChild(badge);

					// Label text
					const labelEl = document.createElement('div');
					labelEl.textContent = label;
					labelEl.style.cssText = `
						position: fixed;
						left: ${rect.left + 16}px;
						top: ${rect.top - 28}px;
						background: ${color};
						color: white;
						padding: 2px 8px;
						font-size: ${fontSize}px;
						font-family: monospace;
						border-radius: 3px;
						white-space: nowrap;
						pointer-events: none;
						animation: demo-seq-appear 0.3s ease-out ${index * 150 + 80}ms both;
					`;
					container.appendChild(labelEl);
				});

				document.body.appendChild(container);
				setTimeout(() => {
					container.style.opacity = '0';
					container.style.transition = 'opacity 0.3s';
					setTimeout(() => container.remove(), 300);
				}, duration);
			},
			{
				elements,
				color: this.options.actionColors.default,
				duration: this.options.highlightDuration,
				fontSize: this.options.annotationFontSize,
				attr: OVERLAY_ATTR,
			},
		);
	}

	/**
	 * Shows a horizontal timeline panel at the bottom of the viewport summarizing actions taken.
	 */
	async showTimeline(
		page: Page,
		steps: Array<{ action: string; timestamp: number; success: boolean }>,
	): Promise<void> {
		await page.evaluate(
			({ steps, colors, duration, fontSize, attr }) => {
				const container = document.createElement('div');
				container.setAttribute(attr, '');
				container.style.cssText = `
					position: fixed;
					left: 0;
					top: 0;
					width: 100%;
					height: 100%;
					pointer-events: none;
					z-index: 999999;
				`;

				const styleEl = document.createElement('style');
				styleEl.textContent = `
					@keyframes demo-timeline-slide {
						0% { transform: translateY(100%); opacity: 0; }
						10% { transform: translateY(0); opacity: 1; }
						90% { transform: translateY(0); opacity: 1; }
						100% { transform: translateY(100%); opacity: 0; }
					}
					@keyframes demo-timeline-dot {
						0% { transform: scale(0); }
						60% { transform: scale(1.3); }
						100% { transform: scale(1); }
					}
				`;
				container.appendChild(styleEl);

				// Timeline panel
				const panel = document.createElement('div');
				panel.style.cssText = `
					position: fixed;
					bottom: 0;
					left: 0;
					right: 0;
					background: rgba(0, 0, 0, 0.92);
					padding: 14px 20px 18px;
					animation: demo-timeline-slide ${duration}ms ease-in-out forwards;
					border-top: 2px solid rgba(255, 255, 255, 0.15);
				`;

				// Title
				const title = document.createElement('div');
				title.textContent = 'Action Timeline';
				title.style.cssText = `
					color: rgba(255, 255, 255, 0.6);
					font-size: ${fontSize - 2}px;
					font-family: monospace;
					margin-bottom: 10px;
					text-transform: uppercase;
					letter-spacing: 1px;
				`;
				panel.appendChild(title);

				// Timeline track
				const track = document.createElement('div');
				track.style.cssText = `
					display: flex;
					align-items: center;
					gap: 0;
					overflow-x: auto;
					padding-bottom: 4px;
				`;

				steps.forEach((step, index) => {
					// Step item
					const item = document.createElement('div');
					item.style.cssText = `
						display: flex;
						align-items: center;
						flex-shrink: 0;
					`;

					// Dot
					const actionKey = step.action.toLowerCase();
					const dotColor = step.success
						? (colors[actionKey] || colors.default)
						: '#ff4444';

					const dot = document.createElement('div');
					dot.style.cssText = `
						width: 14px;
						height: 14px;
						border-radius: 50%;
						background: ${dotColor};
						border: 2px solid ${step.success ? 'transparent' : '#ff0000'};
						flex-shrink: 0;
						animation: demo-timeline-dot 0.3s ease-out ${index * 100}ms both;
					`;
					item.appendChild(dot);

					// Label below
					const label = document.createElement('div');
					const time = new Date(step.timestamp).toLocaleTimeString([], {
						hour: '2-digit',
						minute: '2-digit',
						second: '2-digit',
					});
					label.innerHTML = `
						<div style="color: white; font-size: ${fontSize - 1}px; font-family: monospace; white-space: nowrap;">
							${step.action}
						</div>
						<div style="color: rgba(255,255,255,0.4); font-size: ${fontSize - 3}px; font-family: monospace;">
							${time}
						</div>
					`;
					label.style.cssText = `
						margin-left: 4px;
						margin-right: 4px;
					`;
					item.appendChild(label);

					track.appendChild(item);

					// Connector line between steps
					if (index < steps.length - 1) {
						const connector = document.createElement('div');
						connector.style.cssText = `
							width: 30px;
							height: 2px;
							background: rgba(255, 255, 255, 0.2);
							flex-shrink: 0;
							margin: 0 2px;
						`;
						track.appendChild(connector);
					}
				});

				panel.appendChild(track);
				container.appendChild(panel);

				document.body.appendChild(container);
				setTimeout(() => container.remove(), duration + 100);
			},
			{
				steps,
				colors: this.options.actionColors,
				duration: this.options.highlightDuration,
				fontSize: this.options.annotationFontSize,
				attr: OVERLAY_ATTR,
			},
		);
	}

	/**
	 * Shows a crosshair and coordinate text at the given position.
	 */
	async showCoordinates(page: Page, x: number, y: number): Promise<void> {
		await page.evaluate(
			({ x, y, color, duration, fontSize, attr }) => {
				const container = document.createElement('div');
				container.setAttribute(attr, '');
				container.style.cssText = `
					position: fixed;
					left: 0;
					top: 0;
					width: 100%;
					height: 100%;
					pointer-events: none;
					z-index: 999999;
				`;

				// Horizontal crosshair line
				const hLine = document.createElement('div');
				hLine.style.cssText = `
					position: fixed;
					left: 0;
					top: ${y}px;
					width: 100%;
					height: 1px;
					background: ${color};
					opacity: 0.4;
					pointer-events: none;
				`;
				container.appendChild(hLine);

				// Vertical crosshair line
				const vLine = document.createElement('div');
				vLine.style.cssText = `
					position: fixed;
					left: ${x}px;
					top: 0;
					width: 1px;
					height: 100%;
					background: ${color};
					opacity: 0.4;
					pointer-events: none;
				`;
				container.appendChild(vLine);

				// Center crosshair marks (thicker, shorter lines)
				const crossSize = 12;
				const marks = [
					// Left mark
					{ left: x - crossSize, top: y, width: crossSize - 3, height: 2 },
					// Right mark
					{ left: x + 3, top: y, width: crossSize - 3, height: 2 },
					// Top mark
					{ left: x, top: y - crossSize, width: 2, height: crossSize - 3 },
					// Bottom mark
					{ left: x, top: y + 3, width: 2, height: crossSize - 3 },
				];
				for (const m of marks) {
					const mark = document.createElement('div');
					mark.style.cssText = `
						position: fixed;
						left: ${m.left}px;
						top: ${m.top}px;
						width: ${m.width}px;
						height: ${m.height}px;
						background: ${color};
						pointer-events: none;
					`;
					container.appendChild(mark);
				}

				// Coordinate label
				const label = document.createElement('div');
				label.textContent = `(${Math.round(x)}, ${Math.round(y)})`;
				label.style.cssText = `
					position: fixed;
					left: ${x + 14}px;
					top: ${y + 14}px;
					background: rgba(0, 0, 0, 0.8);
					color: ${color};
					padding: 3px 8px;
					font-size: ${fontSize}px;
					font-family: monospace;
					border-radius: 3px;
					border: 1px solid ${color};
					white-space: nowrap;
					pointer-events: none;
				`;
				container.appendChild(label);

				document.body.appendChild(container);
				setTimeout(() => {
					container.style.opacity = '0';
					container.style.transition = 'opacity 0.3s';
					setTimeout(() => container.remove(), 300);
				}, duration);
			},
			{
				x,
				y,
				color: this.options.actionColors.default,
				duration: this.options.highlightDuration,
				fontSize: this.options.annotationFontSize,
				attr: OVERLAY_ATTR,
			},
		);
	}

	// ───────────────────────────────────────────
	// Cleanup
	// ───────────────────────────────────────────

	/**
	 * Removes all demo-mode overlays currently on the page.
	 */
	async clearOverlays(page: Page): Promise<void> {
		await page.evaluate(
			({ attr }) => {
				const overlays = document.querySelectorAll(`[${attr}]`);
				for (const overlay of overlays) {
					overlay.remove();
				}
			},
			{ attr: OVERLAY_ATTR },
		);
	}
}


================================================
FILE: packages/core/tsconfig.json
================================================
{
  "extends": "../../tsconfig.base.json",
  "compilerOptions": {
    "rootDir": "src",
    "outDir": "dist"
  },
  "include": ["src/**/*.ts"]
}


================================================
FILE: packages/sandbox/package.json
================================================
{
  "name": "@open-browser/sandbox",
  "version": "1.1.0",
  "description": "Sandboxed execution environment for Open Browser",
  "type": "module",
  "main": "src/index.ts",
  "types": "src/index.ts",
  "exports": {
    ".": "./src/index.ts"
  },
  "scripts": {
    "build": "tsc --noEmit",
    "test": "bun test"
  },
  "dependencies": {
    "open-browser": "workspace:*"
  },
  "license": "MIT"
}


================================================
FILE: packages/sandbox/src/index.ts
================================================
export { Sandbox } from './sandbox.js';
export type {
	SandboxOptions,
	SandboxResult,
	SandboxError,
	SandboxErrorCategory,
	SandboxMetrics,
	CapturedOutput,
	ResourceSnapshot,
} from './types.js';


================================================
FILE: packages/sandbox/src/sandbox.ts
================================================
import type {
	SandboxOptions,
	SandboxResult,
	SandboxError,
	SandboxMetrics,
	CapturedOutput,
	ResourceSnapshot,
	SandboxErrorCategory,
} from './types.js';
import { Viewport, Agent, type AgentOptions, type CommandResult } from 'open-browser';

// ── Defaults ──

const DEFAULT_OPTIONS: Required<SandboxOptions> = {
	timeout: 300_000,
	maxMemoryMB: 512,
	allowedDomains: [],
	blockedDomains: [],
	enableNetworking: true,
	enableFileAccess: false,
	workDir: process.cwd(),
	resourceCheckIntervalMs: 1_000,
	captureOutput: true,
	stepLimit: 100,
};

// ── Resource Monitor ──

/**
 * Monitors memory and CPU usage during sandbox execution.
 * Takes periodic snapshots and detects OOM conditions.
 */
class ResourceMonitor {
	private intervalId: ReturnType<typeof setInterval> | null = null;
	private snapshots: ResourceSnapshot[] = [];
	private peakMemoryMB = 0;
	private startCpuUsage: NodeJS.CpuUsage | null = null;
	private readonly limitMB: number;
	private onOOM: (() => void) | null = null;

	constructor(limitMB: number) {
		this.limitMB = limitMB;
	}

	start(intervalMs: number, onOOM: () => void): void {
		this.startCpuUsage = process.cpuUsage();
		this.onOOM = onOOM;
		this.takeSnapshot();

		this.intervalId = setInterval(() => {
			this.takeSnapshot();
		}, intervalMs);
	}

	stop(): void {
		if (this.intervalId !== null) {
			clearInterval(this.intervalId);
			this.intervalId = null;
		}
		// Final snapshot
		this.takeSnapshot();
	}

	private takeSnapshot(): void {
		const mem = process.memoryUsage();
		const cpu = this.startCpuUsage
			? process.cpuUsage(this.startCpuUsage)
			: process.cpuUsage();

		const rssMB = mem.rss / (1024 * 1024);
		const heapUsedMB = mem.heapUsed / (1024 * 1024);
		const heapTotalMB = mem.heapTotal / (1024 * 1024);
		const externalMB = mem.external / (1024 * 1024);

		const snapshot: ResourceSnapshot = {
			timestampMs: Date.now(),
			heapUsedMB,
			heapTotalMB,
			rssMB,
			externalMB,
			cpuUserMs: cpu.user / 1000,
			cpuSystemMs: cpu.system / 1000,
		};

		this.snapshots.push(snapshot);

		if (rssMB > this.peakMemoryMB) {
			this.peakMemoryMB = rssMB;
		}

		// Check OOM condition against RSS (total process memory)
		if (rssMB > this.limitMB && this.onOOM) {
			this.onOOM();
		}
	}

	getPeakMemoryMB(): number {
		return Math.round(this.peakMemoryMB * 100) / 100;
	}

	getCpuTimeMs(): number {
		if (!this.startCpuUsage) return 0;
		const usage = process.cpuUsage(this.startCpuUsage);
		return Math.round((usage.user + usage.system) / 1000);
	}

	getSnapshots(): ResourceSnapshot[] {
		return [...this.snapshots];
	}

	getCurrentMemoryMB(): number {
		const mem = process.memoryUsage();
		return Math.round((mem.rss / (1024 * 1024)) * 100) / 100;
	}
}

// ── Output Capture ──

/**
 * Captures stdout and stderr output during execution.
 * Intercepts process.stdout.write and process.stderr.write.
 */
class OutputCapture {
	private stdoutChunks: string[] = [];
	private stderrChunks: string[] = [];
	private originalStdoutWrite: typeof process.stdout.write | null = null;
	private originalStderrWrite: typeof process.stderr.write | null = null;
	private active = false;

	start(): void {
		if (this.active) return;
		this.active = true;
		this.stdoutChunks = [];
		this.stderrChunks = [];

		this.originalStdoutWrite = process.stdout.write.bind(process.stdout);
		this.originalStderrWrite = process.stderr.write.bind(process.stderr);

		process.stdout.write = ((chunk: string | Uint8Array, ...args: unknown[]): boolean => {
			const text = typeof chunk === 'string' ? chunk : new TextDecoder().decode(chunk);
			this.stdoutChunks.push(text);
			// Still write to original stdout for real-time visibility
			return this.originalStdoutWrite!(chunk as string, ...args as []);
		}) as typeof process.stdout.write;

		process.stderr.write = ((chunk: string | Uint8Array, ...args: unknown[]): boolean => {
			const text = typeof chunk === 'string' ? chunk : new TextDecoder().decode(chunk);
			this.stderrChunks.push(text);
			return this.originalStderrWrite!(chunk as string, ...args as []);
		}) as typeof process.stderr.write;
	}

	stop(): void {
		if (!this.active) return;
		this.active = false;

		if (this.originalStdoutWrite) {
			process.stdout.write = this.originalStdoutWrite as typeof process.stdout.write;
			this.originalStdoutWrite = null;
		}
		if (this.originalStderrWrite) {
			process.stderr.write = this.originalStderrWrite as typeof process.stderr.write;
			this.originalStderrWrite = null;
		}
	}

	getOutput(): CapturedOutput {
		return {
			stdout: this.stdoutChunks.join(''),
			stderr: this.stderrChunks.join(''),
		};
	}
}

// ── Sandbox ──

/**
 * Sandboxed execution environment for browser automation.
 * Runs agent tasks in an isolated context with resource limits,
 * output capture, and comprehensive metrics.
 */
export class Sandbox {
	private options: Required<SandboxOptions>;

	constructor(options?: SandboxOptions) {
		this.options = { ...DEFAULT_OPTIONS, ...options };
	}

	/**
	 * Run an agent task inside the sandbox with resource monitoring,
	 * output capture, and timeout enforcement.
	 */
	async run(agentOptions: Omit<AgentOptions, 'browser'>): Promise<SandboxResult> {
		const startTime = Date.now();
		const resourceMonitor = new ResourceMonitor(this.options.maxMemoryMB);
		const outputCapture = new OutputCapture();

		// Track visited URLs and step/action counts
		const visitedUrls = new Set<string>();
		let stepsExecuted = 0;
		let totalActions = 0;

		// OOM abort controller
		let oomTriggered = false;
		const abortController = new AbortController();

		const browser = new Viewport({
			headless: true,
			allowedUrls: this.options.allowedDomains.length > 0
				? this.options.allowedDomains
				: undefined,
			blockedUrls: this.options.blockedDomains.length > 0
				? this.options.blockedDomains
				: undefined,
		});

		// Start resource monitoring with OOM callback
		resourceMonitor.start(this.options.resourceCheckIntervalMs, () => {
			if (!oomTriggered) {
				oomTriggered = true;
				abortController.abort();
			}
		});

		// Start output capture
		if (this.options.captureOutput) {
			outputCapture.start();
		}

		try {
			await browser.start();

			const agent = new Agent({
				...agentOptions,
				browser,
				settings: {
					...agentOptions.settings,
					allowedUrls: this.options.allowedDomains,
					blockedUrls: this.options.blockedDomains,
					stepLimit: this.options.stepLimit,
				},
				onStepStart: (step) => {
					stepsExecuted = step;
					// Track URL at step start
					try {
						const url = browser.currentPage?.url();
						if (url && url !== 'about:blank') {
							visitedUrls.add(url);
						}
					} catch {
						// Page may not be ready
					}
					// Delegate to caller's onStepStart if provided
					agentOptions.onStepStart?.(step);
				},
				onStepEnd: (step, results) => {
					totalActions += results.length;
					// Track URL at step end (may have changed)
					try {
						const url = browser.currentPage?.url();
						if (url && url !== 'about:blank') {
							visitedUrls.add(url);
						}
					} catch {
						// Page may not be ready
					}
					agentOptions.onStepEnd?.(step, results);
				},
			});

			// Race the agent execution against timeout and OOM
			const result = await Promise.race([
				this.executeAgent(agent, startTime),
				this.createTimeoutPromise(startTime),
				this.createOOMPromise(abortController.signal, startTime, resourceMonitor),
			]);

			// Build metrics
			const metrics = this.buildMetrics(
				startTime,
				resourceMonitor,
				stepsExecuted,
				visitedUrls,
				totalActions,
			);

			return {
				...result,
				memoryUsageMB: resourceMonitor.getCurrentMemoryMB(),
				capturedOutput: this.options.captureOutput ? outputCapture.getOutput() : undefined,
				metrics,
			};
		} catch (error) {
			const sandboxError = this.classifyError(error, oomTriggered);
			const metrics = this.buildMetrics(
				startTime,
				resourceMonitor,
				stepsExecuted,
				visitedUrls,
				totalActions,
			);

			return {
				success: false,
				error: sandboxError,
				errorMessage: sandboxError.message,
				duration: Date.now() - startTime,
				memoryUsageMB: resourceMonitor.getCurrentMemoryMB(),
				capturedOutput: this.options.captureOutput ? outputCapture.getOutput() : undefined,
				metrics,
			};
		} finally {
			// Always clean up in reverse order
			resourceMonitor.stop();
			if (this.options.captureOutput) {
				outputCapture.stop();
			}
			await this.forceCleanup(browser);
		}
	}

	/**
	 * Execute the agent and wrap the result.
	 */
	private async executeAgent(
		agent: Agent,
		startTime: number,
	): Promise<SandboxResult> {
		const result = await agent.run();
		return {
			success: result.success,
			output: result.finalResult,
			duration: Date.now() - startTime,
		};
	}

	/**
	 * Create a timeout promise that resolves with a timeout error.
	 */
	private createTimeoutPromise(startTime: number): Promise<SandboxResult> {
		return new Promise<SandboxResult>((resolve) => {
			setTimeout(() => {
				resolve({
					success: false,
					error: {
						category: 'timeout',
						message: `Sandbox timeout after ${this.options.timeout}ms`,
					},
					errorMessage: `Sandbox timeout after ${this.options.timeout}ms`,
					duration: Date.now() - startTime,
				});
			}, this.options.timeout);
		});
	}

	/**
	 * Create a promise that rejects when OOM is detected via the AbortSignal.
	 */
	private createOOMPromise(
		signal: AbortSignal,
		startTime: number,
		monitor: ResourceMonitor,
	): Promise<SandboxResult> {
		return new Promise<SandboxResult>((resolve) => {
			const onAbort = () => {
				resolve({
					success: false,
					error: {
						category: 'oom',
						message: `Memory limit exceeded: ${monitor.getPeakMemoryMB()}MB > ${this.options.maxMemoryMB}MB`,
					},
					errorMessage: `Memory limit exceeded: ${monitor.getPeakMemoryMB()}MB > ${this.options.maxMemoryMB}MB`,
					duration: Date.now() - startTime,
					memoryUsageMB: monitor.getPeakMemoryMB(),
				});
			};

			if (signal.aborted) {
				onAbort();
			} else {
				signal.addEventListener('abort', onAbort, { once: true });
			}
		});
	}

	/**
	 * Classify an error into a SandboxError with the appropriate category.
	 */
	private classifyError(error: unknown, oomTriggered: boolean): SandboxError {
		if (oomTriggered) {
			return {
				category: 'oom',
				message: 'Execution terminated due to memory limit exceeded',
				stack: error instanceof Error ? error.stack : undefined,
			};
		}

		const message = error instanceof Error ? error.message : String(error);
		const stack = error instanceof Error ? error.stack : undefined;

		// Detect browser crashes
		if (
			message.includes('browser has been closed') ||
			message.includes('Target page') ||
			message.includes('Target closed') ||
			message.includes('Protocol error')
		) {
			return { category: 'crash', message, stack };
		}

		// Detect timeout patterns
		if (
			message.includes('timeout') ||
			message.includes('Timeout') ||
			message.includes('ETIMEDOUT')
		) {
			return { category: 'timeout', message, stack };
		}

		// Detect agent-specific errors
		if (
			message.includes('Agent') ||
			message.includes('maximum steps') ||
			message.includes('stuck in a loop')
		) {
			return { category: 'agent_error', message, stack };
		}

		// Detect browser/navigation errors
		if (
			message.includes('net::ERR_') ||
			message.includes('Navigation') ||
			message.includes('navigation')
		) {
			return { category: 'browser_error', message, stack };
		}

		return { category: 'unknown', message, stack };
	}

	/**
	 * Build metrics from the execution data.
	 */
	private buildMetrics(
		startTime: number,
		monitor: ResourceMonitor,
		stepsExecuted: number,
		visitedUrls: Set<string>,
		totalActions: number,
	): SandboxMetrics {
		return {
			durationMs: Date.now() - startTime,
			peakMemoryMB: monitor.getPeakMemoryMB(),
			stepsExecuted,
			pagesVisited: visitedUrls.size,
			visitedUrls: [...visitedUrls],
			totalActions,
			cpuTimeMs: monitor.getCpuTimeMs(),
		};
	}

	/**
	 * Force cleanup of browser resources. Catches and ignores errors
	 * since the browser may already be crashed or closed.
	 */
	private async forceCleanup(browser: Viewport): Promise<void> {
		try {
			await Promise.race([
				browser.close(),
				// Give cleanup 5 seconds max, then move on
				new Promise<void>((resolve) => setTimeout(resolve, 5_000)),
			]);
		} catch {
			// Browser may already be closed or crashed - ignore
		}
	}

	/**
	 * Get the current sandbox configuration.
	 */
	getOptions(): Readonly<Required<SandboxOptions>> {
		return { ...this.options };
	}
}


================================================
FILE: packages/sandbox/src/types.ts
================================================
// ── Sandbox configuration ──

export interface SandboxOptions {
	/** Maximum execution time in milliseconds (default: 300000 = 5 minutes) */
	timeout?: number;
	/** Maximum memory usage in MB (default: 512) */
	maxMemoryMB?: number;
	/** Domains the agent is allowed to visit */
	allowedDomains?: string[];
	/** Domains the agent is blocked from visiting */
	blockedDomains?: string[];
	/** Whether network access is allowed (default: true) */
	enableNetworking?: boolean;
	/** Whether file system access is allowed (default: false) */
	enableFileAccess?: boolean;
	/** Working directory for the sandbox */
	workDir?: string;
	/** Interval in ms to check resource usage (default: 1000) */
	resourceCheckIntervalMs?: number;
	/** Whether to capture stdout/stderr from the agent execution (default: true) */
	captureOutput?: boolean;
	/** Maximum number of agent steps (default: 100) */
	stepLimit?: number;
}

// ── Sandbox error categories ──

export type SandboxErrorCategory =
	| 'timeout'
	| 'oom'
	| 'crash'
	| 'agent_error'
	| 'browser_error'
	| 'unknown';

export interface SandboxError {
	category: SandboxErrorCategory;
	message: string;
	/** Original stack trace if available */
	stack?: string;
}

// ── Output capture ──

export interface CapturedOutput {
	stdout: string;
	stderr: string;
}

// ── Metrics ──

export interface SandboxMetrics {
	/** Total execution time in milliseconds */
	durationMs: number;
	/** Peak memory usage in MB */
	peakMemoryMB: number;
	/** Number of agent steps executed */
	stepsExecuted: number;
	/** Number of unique pages visited */
	pagesVisited: number;
	/** URLs of pages visited */
	visitedUrls: string[];
	/** Number of actions taken across all steps */
	totalActions: number;
	/** CPU time used (user + system) in milliseconds */
	cpuTimeMs: number;
}

// ── Sandbox result ──

export interface SandboxResult {
	success: boolean;
	output?: string;
	error?: SandboxError;
	/** Legacy string error for backwards compatibility */
	errorMessage?: string;
	duration: number;
	memoryUsageMB?: number;
	/** Captured stdout/stderr from the execution */
	capturedOutput?: CapturedOutput;
	/** Detailed execution metrics */
	metrics?: SandboxMetrics;
}

// ── Resource monitor state ──

export interface ResourceSnapshot {
	timestampMs: number;
	heapUsedMB: number;
	heapTotalMB: number;
	rssMB: number;
	externalMB: number;
	cpuUserMs: number;
	cpuSystemMs: number;
}


================================================
FILE: packages/sandbox/tsconfig.json
================================================
{
  "extends": "../../tsconfig.base.json",
  "compilerOptions": {
    "rootDir": "src",
    "outDir": "dist"
  },
  "include": ["src/**/*.ts"]
}


================================================
FILE: tsconfig.base.json
================================================
{
  "compilerOptions": {
    "target": "ESNext",
    "module": "ESNext",
    "moduleResolution": "bundler",
    "esModuleInterop": true,
    "strict": true,
    "skipLibCheck": true,
    "declaration": true,
    "declarationMap": true,
    "sourceMap": true,
    "outDir": "dist",
    "rootDir": "src",
    "composite": true,
    "incremental": true,
    "resolveJsonModule": true,
    "isolatedModules": true,
    "forceConsistentCasingInFileNames": true,
    "noUnusedLocals": false,
    "noUnusedParameters": false,
    "noFallthroughCasesInSwitch": true,
    "allowImportingTsExtensions": true,
    "noEmit": true,
    "types": ["bun"]
  }
}


================================================
FILE: tsconfig.json
================================================
{
  "compilerOptions": {
    "target": "ESNext",
    "module": "ESNext",
    "moduleResolution": "bundler",
    "lib": ["ESNext"],
    "outDir": "dist",
    "rootDir": "src",
    "strict": true,
    "esModuleInterop": true,
    "declaration": true,
    "declarationMap": true,
    "sourceMap": true,
    "resolveJsonModule": true,
    "forceConsistentCasingInFileNames": true,
    "skipLibCheck": true
  },
  "include": ["src/**/*.ts"],
  "exclude": ["node_modules", "dist", "**/__tests__/**"]
}