Repository: ntegrals/openbrowser
Branch: master
Commit: 622f36985df6
Files: 119
Total size: 697.5 KB
Directory structure:
gitextract_rxlca7z1/
├── .github/
│ ├── CONTRIBUTING.md
│ └── workflows/
│ └── ci.yml
├── .gitignore
├── LICENSE
├── README.md
├── biome.json
├── bunfig.toml
├── package.json
├── packages/
│ ├── cli/
│ │ ├── package.json
│ │ ├── src/
│ │ │ ├── commands/
│ │ │ │ ├── click.ts
│ │ │ │ ├── eval.ts
│ │ │ │ ├── extract.ts
│ │ │ │ ├── interactive.ts
│ │ │ │ ├── open.ts
│ │ │ │ ├── run.ts
│ │ │ │ ├── screenshot.ts
│ │ │ │ ├── sessions.ts
│ │ │ │ ├── state.ts
│ │ │ │ └── type.ts
│ │ │ ├── display.ts
│ │ │ ├── globals.ts
│ │ │ ├── index.ts
│ │ │ ├── protocol.ts
│ │ │ ├── server.ts
│ │ │ └── sessions.ts
│ │ └── tsconfig.json
│ ├── core/
│ │ ├── package.json
│ │ ├── src/
│ │ │ ├── agent/
│ │ │ │ ├── agent.test.ts
│ │ │ │ ├── agent.ts
│ │ │ │ ├── conversation/
│ │ │ │ │ ├── service.ts
│ │ │ │ │ ├── types.ts
│ │ │ │ │ └── utils.ts
│ │ │ │ ├── conversation.test.ts
│ │ │ │ ├── evaluator.ts
│ │ │ │ ├── index.ts
│ │ │ │ ├── instructions/
│ │ │ │ │ ├── instructions-compact.md
│ │ │ │ │ ├── instructions-direct.md
│ │ │ │ │ └── instructions.md
│ │ │ │ ├── instructions.ts
│ │ │ │ ├── replay-recorder.ts
│ │ │ │ ├── stall-detector.test.ts
│ │ │ │ ├── stall-detector.ts
│ │ │ │ └── types.ts
│ │ │ ├── bridge/
│ │ │ │ ├── adapter.ts
│ │ │ │ ├── client.ts
│ │ │ │ ├── index.ts
│ │ │ │ ├── mcp-types.ts
│ │ │ │ ├── server.test.ts
│ │ │ │ └── server.ts
│ │ │ ├── commands/
│ │ │ │ ├── catalog/
│ │ │ │ │ ├── catalog.ts
│ │ │ │ │ └── types.ts
│ │ │ │ ├── catalog.test.ts
│ │ │ │ ├── executor.test.ts
│ │ │ │ ├── executor.ts
│ │ │ │ ├── extraction/
│ │ │ │ │ └── extractor.ts
│ │ │ │ ├── index.ts
│ │ │ │ ├── types.ts
│ │ │ │ └── utils.ts
│ │ │ ├── config/
│ │ │ │ ├── config.ts
│ │ │ │ ├── index.ts
│ │ │ │ └── types.ts
│ │ │ ├── errors.ts
│ │ │ ├── index.ts
│ │ │ ├── logging.ts
│ │ │ ├── metering/
│ │ │ │ ├── index.ts
│ │ │ │ ├── tracker.test.ts
│ │ │ │ ├── tracker.ts
│ │ │ │ └── types.ts
│ │ │ ├── model/
│ │ │ │ ├── adapters/
│ │ │ │ │ └── vercel.ts
│ │ │ │ ├── index.ts
│ │ │ │ ├── interface.ts
│ │ │ │ ├── messages.ts
│ │ │ │ ├── schema-optimizer.ts
│ │ │ │ └── types.ts
│ │ │ ├── page/
│ │ │ │ ├── content-extractor.ts
│ │ │ │ ├── index.ts
│ │ │ │ ├── page-analyzer.test.ts
│ │ │ │ ├── page-analyzer.ts
│ │ │ │ ├── renderer/
│ │ │ │ │ ├── interactive-elements.ts
│ │ │ │ │ ├── layer-order.ts
│ │ │ │ │ └── tree-renderer.ts
│ │ │ │ ├── renderer.test.ts
│ │ │ │ ├── snapshot-builder.ts
│ │ │ │ └── types.ts
│ │ │ ├── sandbox/
│ │ │ │ ├── file-access.ts
│ │ │ │ └── index.ts
│ │ │ ├── telemetry.ts
│ │ │ ├── types.ts
│ │ │ ├── utils.ts
│ │ │ └── viewport/
│ │ │ ├── event-hub.ts
│ │ │ ├── events.ts
│ │ │ ├── guard-base.ts
│ │ │ ├── guards/
│ │ │ │ ├── blank-page.ts
│ │ │ │ ├── crash.ts
│ │ │ │ ├── default-handler.ts
│ │ │ │ ├── downloads.ts
│ │ │ │ ├── har-capture.ts
│ │ │ │ ├── local-instance.ts
│ │ │ │ ├── page-ready.ts
│ │ │ │ ├── permissions.ts
│ │ │ │ ├── persistence.ts
│ │ │ │ ├── popups.ts
│ │ │ │ ├── screenshot.ts
│ │ │ │ ├── url-policy.ts
│ │ │ │ └── video-capture.ts
│ │ │ ├── index.ts
│ │ │ ├── launch-profile.test.ts
│ │ │ ├── launch-profile.ts
│ │ │ ├── types.ts
│ │ │ ├── viewport.ts
│ │ │ └── visual-tracer.ts
│ │ └── tsconfig.json
│ └── sandbox/
│ ├── package.json
│ ├── src/
│ │ ├── index.ts
│ │ ├── sandbox.ts
│ │ └── types.ts
│ └── tsconfig.json
├── tsconfig.base.json
└── tsconfig.json
================================================
FILE CONTENTS
================================================
================================================
FILE: .github/CONTRIBUTING.md
================================================
# Contributing to Open Browser
Thank you for your interest in contributing!
## Getting Started
1. Fork the repository
2. Clone your fork: `git clone https://github.com/YOUR_USERNAME/openbrowser.git`
3. Install dependencies: `bun install`
4. Create a branch: `git checkout -b my-feature`
5. Make your changes and add tests
6. Run tests: `bun run test`
7. Submit a pull request
## Code Style
We use [Biome](https://biomejs.dev/) for formatting and linting. Run `bun run format` before committing.
## Reporting Issues
Please use GitHub Issues to report bugs or request features. Include:
- Steps to reproduce
- Expected vs actual behavior
- Browser and OS version
================================================
FILE: .github/workflows/ci.yml
================================================
name: CI
on:
push:
branches: [main]
pull_request:
branches: [main]
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: oven-sh/setup-bun@v2
- run: bun install
- run: bun run build
- run: bun run test
- run: bun run lint
================================================
FILE: .gitignore
================================================
node_modules/
dist/
.env
*.tsbuildinfo
.DS_Store
traces/
coverage/
recordings/
tmp/
*.log
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2024-2026 Open Browser Contributors
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
Open Browser
AI-powered autonomous web browsing framework for TypeScript.
---
Give an AI agent a browser. It clicks, types, navigates, and extracts data — autonomously completing tasks on any website. Built on Playwright with first-class support for OpenAI, Anthropic, and Google models.
> **Production-ready since v1.0.** Contributions welcome.
## Why Open Browser?
- **Autonomous agents**: Describe a task in natural language, and an AI agent navigates the web to complete it — clicking, typing, scrolling, and extracting data without manual scripting
- **Multi-model support**: Works with OpenAI, Anthropic, and Google out of the box via the Vercel AI SDK — swap models with a single flag
- **Interactive REPL**: Drop into a live browser session and issue commands interactively — great for debugging, prototyping, and exploration
- **Sandboxed execution**: Run agents in resource-limited environments with CPU/memory monitoring, timeouts, and domain restrictions
- **Production-ready**: Stall detection, cost tracking, session management, replay recording, and comprehensive error handling
- **Open source**: MIT licensed, fully extensible, bring your own API keys
## Quick Start
```bash
# Install dependencies
bun install
# Set up your API keys
cp .env.example .env
# Edit .env with your API keys
# Run an agent
bun run open-browser run "Find the top story on Hacker News and summarize it"
# Or open a browser interactively
bun run open-browser interactive
```
## Architecture
Open Browser is a monorepo with three packages:
| Package | Description |
| --------------------------- | -------------------------------------------------------------------------- |
| **`open-browser`** | Core library — agent logic, browser control, DOM analysis, LLM integration |
| **`@open-browser/cli`** | Command-line interface for running agents and browser commands |
| **`@open-browser/sandbox`** | Sandboxed execution with resource limits and monitoring |
## CLI Commands
### Run an AI Agent
```bash
open-browser run [options]
```
Describe what you want done. The agent figures out the rest.
```bash
# Search and extract information
open-browser run "Find the price of the MacBook Pro on apple.com"
# Fill out forms
open-browser run "Sign up for the newsletter on example.com with test@email.com"
# Multi-step workflows
open-browser run "Go to GitHub, find the open-browser repo, and star it"
```
| Option | Description |
| ---------------------------- | ----------------------------------------- |
| `-m, --model ` | Model to use (default: `gpt-4o`) |
| `-p, --provider ` | Provider: `openai`, `anthropic`, `google` |
| `--headless / --no-headless` | Show or hide the browser window |
| `--max-steps ` | Max agent steps (default: `25`) |
| `-v, --verbose` | Show detailed step info |
| `--no-cost` | Hide cost tracking |
### Browser Commands
```bash
open-browser open # Open a URL
open-browser click # Click an element
open-browser type # Type into an input
open-browser screenshot [output] # Capture a screenshot
open-browser eval # Run JavaScript on the page
open-browser extract # Extract content as markdown
open-browser state # Show current URL, title, and tabs
open-browser sessions # List active browser sessions
```
### Interactive REPL
```bash
open-browser interactive
```
Drop into a live `browser>` prompt with full control:
```
browser> open https://news.ycombinator.com
browser> extract "top 5 stories with titles and points"
browser> click .morelink
browser> screenshot front-page.png
browser> help
```
## Using as a Library
```typescript
import { Agent, createViewport, createModel } from 'open-browser'
const viewport = await createViewport({ headless: true })
const model = createModel('openai', 'gpt-4o')
const agent = new Agent({
viewport,
model,
task: 'Go to example.com and extract the main heading',
settings: {
stepLimit: 50,
enableScreenshots: true,
},
})
const result = await agent.run()
console.log(result)
```
### Sandboxed Execution
Run agents with resource limits and monitoring:
```typescript
import { Sandbox } from '@open-browser/sandbox'
const sandbox = new Sandbox({
timeout: 300_000, // 5 minute timeout
maxMemoryMB: 512, // Memory limit
allowedDomains: ['example.com'],
stepLimit: 100,
captureOutput: true,
})
const result = await sandbox.run({
task: 'Complete the checkout form',
model: languageModel,
})
console.log(result.metrics) // steps, URLs visited, CPU time
```
## Configuration
### Environment Variables
```bash
# LLM Provider Keys (at least one required)
OPENAI_API_KEY=sk-...
ANTHROPIC_API_KEY=sk-ant-...
GOOGLE_GENERATIVE_AI_API_KEY=...
# Browser
BROWSER_HEADLESS=true
BROWSER_DISABLE_SECURITY=false
# Recording & Debugging
OPEN_BROWSER_TRACE_PATH=./traces
OPEN_BROWSER_SAVE_RECORDING_PATH=./recordings
```
### Agent Configuration
| Setting | Default | Description |
| ------------------- | -------- | ----------------------------------------- |
| `stepLimit` | `100` | Maximum agent iterations |
| `commandsPerStep` | `10` | Actions per agent step |
| `failureThreshold` | `5` | Consecutive failures before stopping |
| `enableScreenshots` | `true` | Include page screenshots in agent context |
| `contextWindowSize` | `128000` | Token budget for conversation |
| `allowedUrls` | `[]` | Restrict navigation to specific URLs |
| `blockedUrls` | `[]` | Block navigation to specific URLs |
### Viewport Configuration
| Setting | Default | Description |
| ------------------ | --------------- | ------------------------------------------- |
| `headless` | `true` | Run browser without visible window |
| `width` / `height` | `1280` / `1100` | Browser window dimensions |
| `relaxedSecurity` | `false` | Disable browser security features |
| `proxy` | — | Proxy server configuration |
| `cookieFile` | — | Path to cookie file for persistent sessions |
## How It Works
```
┌─────────────┐
"Book a flight" │ │
───────────────► │ Agent │ ◄── LLM (OpenAI / Anthropic / Google)
│ │
└──────┬──────┘
│
┌──────▼──────┐
│ Commands │ click, type, scroll, extract, navigate...
└──────┬──────┘
│
┌──────▼──────┐
│ Viewport │ Playwright browser instance
└──────┬──────┘
│
┌──────▼──────┐
│ DOM / Page │ Snapshot, interactive elements, content
└─────────────┘
```
1. You describe a **task** in natural language
2. The **Agent** sends the current page state + task to an LLM
3. The LLM decides what **commands** to execute (click, type, navigate, extract...)
4. Commands execute against the **Viewport** (Playwright browser)
5. The agent observes the result, detects stalls, and loops until the task is complete
## Model Support
| Provider | Example Models | Flag |
| ------------- | ----------------------------------------------- | -------------- |
| **OpenAI** | `gpt-4o`, `gpt-4o-mini`, `o1` | `-p openai` |
| **Anthropic** | `claude-sonnet-4-5-20250929`, `claude-opus-4-6` | `-p anthropic` |
| **Google** | `gemini-2.0-flash`, `gemini-2.5-pro` | `-p google` |
## Project Structure
```
packages/
├── core/ # Core library (open-browser)
│ └── src/
│ ├── agent/ # Agent logic, conversation, stall detection
│ ├── commands/ # Action schemas and executor (25+ commands)
│ ├── viewport/ # Browser control, events, guards
│ ├── page/ # DOM analysis, content extraction
│ ├── model/ # LLM adapter and message formatting
│ ├── metering/ # Cost tracking
│ ├── bridge/ # IPC server/client
│ └── config/ # Configuration types
├── cli/ # CLI (@open-browser/cli)
│ └── src/
│ ├── commands/ # CLI command implementations
│ └── index.ts # Entry point
└── sandbox/ # Sandbox (@open-browser/sandbox)
└── src/
└── sandbox.ts # Resource-limited execution
```
## Development
```bash
# Install dependencies
bun install
# Type check
bun run build
# Run tests
bun run test
# Lint
bun run lint
# Format
bun run format
```
## Contributing
Contributions are welcome! Please see [CONTRIBUTING.md](.github/CONTRIBUTING.md) for guidelines.
## License
[MIT](LICENSE)
================================================
FILE: biome.json
================================================
{
"$schema": "https://biomejs.dev/schemas/1.9.0/schema.json",
"organizeImports": {
"enabled": true
},
"linter": {
"enabled": true,
"rules": {
"recommended": true,
"complexity": {
"noForEach": "off"
},
"style": {
"noNonNullAssertion": "off",
"useConst": "warn"
},
"suspicious": {
"noExplicitAny": "off"
}
}
},
"formatter": {
"enabled": true,
"indentStyle": "tab",
"indentWidth": 2,
"lineWidth": 120
},
"javascript": {
"formatter": {
"quoteStyle": "single",
"semicolons": "always",
"trailingCommas": "all"
}
},
"files": {
"ignore": ["node_modules", "dist", "*.json", "*.d.ts"]
}
}
================================================
FILE: bunfig.toml
================================================
[install]
peer = false
[test]
timeout = 60000
================================================
FILE: package.json
================================================
{
"name": "open-browser-monorepo",
"private": true,
"workspaces": ["packages/*"],
"scripts": {
"build": "bun run --filter '*' build",
"test": "bun run --filter '*' test",
"lint": "biome check .",
"format": "biome format --write ."
},
"devDependencies": {
"@biomejs/biome": "^1.9.4",
"@types/bun": "^1.2.0",
"typescript": "^5.8.0"
},
"trustedDependencies": [
"@biomejs/biome"
]
}
================================================
FILE: packages/cli/package.json
================================================
{
"name": "@open-browser/cli",
"version": "1.1.0",
"description": "CLI for Open Browser - AI-powered autonomous web browsing",
"type": "module",
"main": "src/index.ts",
"bin": {
"open-browser": "src/index.ts"
},
"scripts": {
"build": "tsc --noEmit",
"test": "bun test",
"start": "bun run src/index.ts"
},
"dependencies": {
"open-browser": "workspace:*",
"commander": "^12.1.0",
"chalk": "^5.4.0"
},
"license": "MIT"
}
================================================
FILE: packages/cli/src/commands/click.ts
================================================
import type { Command } from 'commander';
import chalk from 'chalk';
import { sessionManager } from '../globals.js';
export function registerClickCommand(program: Command): void {
program
.command('click')
.description('Click on an element matching the given CSS selector')
.argument('', 'CSS selector of the element to click')
.option('-s, --session ', 'Session ID to use')
.action(async (selector: string, options: { session?: string }) => {
try {
const browser = options.session
? sessionManager.get(options.session)
: sessionManager.getDefault();
if (!browser) {
console.error(chalk.red('No active session. Use "open" command first.'));
process.exit(1);
}
await browser.click(selector);
console.log(chalk.green('Clicked:'), selector);
} catch (error) {
console.error(chalk.red('Failed to click:'), error instanceof Error ? error.message : String(error));
process.exit(1);
}
});
}
================================================
FILE: packages/cli/src/commands/eval.ts
================================================
import type { Command } from 'commander';
import chalk from 'chalk';
import { sessionManager } from '../globals.js';
export function registerEvalCommand(program: Command): void {
program
.command('eval')
.description('Evaluate a JavaScript expression in the browser')
.argument('', 'JavaScript expression to evaluate')
.option('-s, --session ', 'Session ID to use')
.action(async (expression: string, options: { session?: string }) => {
try {
const browser = options.session
? sessionManager.get(options.session)
: sessionManager.getDefault();
if (!browser) {
console.error(chalk.red('No active session. Use "open" command first.'));
process.exit(1);
}
const result = await browser.evaluate(expression);
if (result === undefined) {
console.log(chalk.dim('undefined'));
} else if (result === null) {
console.log(chalk.dim('null'));
} else if (typeof result === 'object') {
console.log(JSON.stringify(result, null, 2));
} else {
console.log(String(result));
}
} catch (error) {
console.error(chalk.red('Evaluation failed:'), error instanceof Error ? error.message : String(error));
process.exit(1);
}
});
}
================================================
FILE: packages/cli/src/commands/extract.ts
================================================
import type { Command } from 'commander';
import chalk from 'chalk';
import { extractMarkdown } from 'open-browser';
import { sessionManager } from '../globals.js';
export function registerExtractCommand(program: Command): void {
program
.command('extract')
.description('Extract content from the current page as markdown')
.argument('', 'Description of what to extract (used as a label)')
.option('-s, --session ', 'Session ID to use')
.action(async (goal: string, options: { session?: string }) => {
try {
const browser = options.session
? sessionManager.get(options.session)
: sessionManager.getDefault();
if (!browser) {
console.error(chalk.red('No active session. Use "open" command first.'));
process.exit(1);
}
console.log(chalk.dim(`Extracting: ${goal}`));
const markdown = await extractMarkdown(browser.currentPage);
if (!markdown) {
console.log(chalk.yellow('No content extracted from the page.'));
} else {
console.log(markdown);
}
} catch (error) {
console.error(chalk.red('Extraction failed:'), error instanceof Error ? error.message : String(error));
process.exit(1);
}
});
}
================================================
FILE: packages/cli/src/commands/interactive.ts
================================================
import * as readline from 'node:readline';
import type { Command } from 'commander';
import chalk from 'chalk';
import {
Viewport,
extractMarkdown,
} from 'open-browser';
import {
Spinner,
displayInfo,
displayError,
displaySeparator,
} from '../display.js';
interface InteractiveOptions {
headless: boolean;
}
/**
* Interactive REPL-like session for browser automation.
* Supports commands: open, click, type, eval, extract, screenshot, state, back, forward, tabs, help, quit
*/
export function registerInteractiveCommand(program: Command): void {
program
.command('interactive')
.alias('repl')
.description('Start an interactive browser session (REPL mode)')
.option('--headless', 'Run browser in headless mode', false)
.action(async (options: InteractiveOptions) => {
console.log(chalk.bold.white('Interactive Browser Session'));
console.log(chalk.dim('Type "help" for available commands, "quit" to exit.'));
displaySeparator();
let browser: Viewport | null = null;
try {
const spinner = new Spinner('Starting browser...');
spinner.start();
browser = new Viewport({
headless: options.headless,
});
await browser.start();
spinner.stop(chalk.green('Browser ready.'));
console.log('');
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
prompt: chalk.cyan('browser> '),
terminal: true,
});
rl.prompt();
rl.on('line', async (line) => {
const trimmed = line.trim();
if (!trimmed) {
rl.prompt();
return;
}
const [command, ...args] = parseCommandLine(trimmed);
try {
const shouldQuit = await handleCommand(
command.toLowerCase(),
args,
browser!,
);
if (shouldQuit) {
rl.close();
return;
}
} catch (error) {
displayError(
error instanceof Error ? error.message : String(error),
);
}
rl.prompt();
});
rl.on('close', async () => {
console.log('');
displayInfo('Closing browser session...');
if (browser) {
await browser.close().catch(() => {});
}
process.exit(0);
});
} catch (error) {
displayError(
error instanceof Error ? error.message : String(error),
);
if (browser) {
await browser.close().catch(() => {});
}
process.exit(1);
}
});
}
// ── Command Parsing ──
function parseCommandLine(input: string): string[] {
const tokens: string[] = [];
let current = '';
let inQuote: string | null = null;
for (const char of input) {
if (inQuote) {
if (char === inQuote) {
inQuote = null;
} else {
current += char;
}
} else if (char === '"' || char === "'") {
inQuote = char;
} else if (char === ' ' || char === '\t') {
if (current) {
tokens.push(current);
current = '';
}
} else {
current += char;
}
}
if (current) {
tokens.push(current);
}
return tokens;
}
// ── Command Handler ──
async function handleCommand(
command: string,
args: string[],
browser: Viewport,
): Promise {
switch (command) {
case 'open':
case 'goto':
case 'navigate': {
const url = args[0];
if (!url) {
displayError('Usage: open ');
return false;
}
const spinner = new Spinner(`Navigating to ${url}...`);
spinner.start();
await browser.navigate(url);
const finalUrl = browser.currentPage.url();
spinner.stop(`${chalk.green('Loaded:')} ${finalUrl}`);
return false;
}
case 'tap': {
const selector = args.join(' ');
if (!selector) {
displayError('Usage: click ');
return false;
}
await browser.click(selector);
console.log(chalk.green('Clicked:'), selector);
return false;
}
case 'type': {
const selector = args[0];
const text = args.slice(1).join(' ');
if (!selector || !text) {
displayError('Usage: type ');
return false;
}
await browser.type(selector, text);
console.log(chalk.green('Typed:'), text);
return false;
}
case 'eval':
case 'js': {
const expression = args.join(' ');
if (!expression) {
displayError('Usage: eval ');
return false;
}
const result = await browser.evaluate(expression);
if (result === undefined) {
console.log(chalk.dim('undefined'));
} else if (result === null) {
console.log(chalk.dim('null'));
} else if (typeof result === 'object') {
console.log(JSON.stringify(result, null, 2));
} else {
console.log(String(result));
}
return false;
}
case 'extract':
case 'markdown': {
const spinner = new Spinner('Extracting page content...');
spinner.start();
const markdown = await extractMarkdown(browser.currentPage);
spinner.stop();
if (markdown) {
// Show first 2000 chars
const preview = markdown.length > 2000
? `${markdown.slice(0, 2000)}\n${chalk.dim(`... (${markdown.length} chars total)`)}`
: markdown;
console.log(preview);
} else {
console.log(chalk.yellow('No content found.'));
}
return false;
}
case 'capture': {
const outputPath = args[0] || 'screenshot.png';
const result = await browser.screenshot(false);
const fs = await import('node:fs');
const path = await import('node:path');
const buffer = Buffer.from(result.base64, 'base64');
const resolved = path.resolve(outputPath);
fs.writeFileSync(resolved, buffer);
console.log(chalk.green('Screenshot saved:'), resolved);
console.log(chalk.dim(`${result.width}x${result.height}`));
return false;
}
case 'state':
case 'info': {
const state = await browser.getState();
console.log(`${chalk.white('URL:')} ${state.url}`);
console.log(`${chalk.white('Title:')} ${state.title}`);
if (state.tabs.length > 1) {
console.log(`${chalk.white('Tabs:')}`);
for (const tab of state.tabs) {
const marker = tab.isActive ? chalk.cyan(' > ') : ' ';
console.log(`${marker}[${tab.tabId}] ${tab.title || '(untitled)'} - ${tab.url}`);
}
}
return false;
}
case 'back': {
await browser.currentPage.goBack({ timeout: 5000 }).catch(() => {});
console.log(chalk.green('Navigated back'));
return false;
}
case 'forward': {
await browser.currentPage.goForward({ timeout: 5000 }).catch(() => {});
console.log(chalk.green('Navigated forward'));
return false;
}
case 'tabs': {
const state = await browser.getState();
for (const tab of state.tabs) {
const marker = tab.isActive ? chalk.cyan(' > ') : ' ';
console.log(`${marker}[${tab.tabId}] ${tab.title || '(untitled)'} - ${tab.url}`);
}
return false;
}
case 'url': {
console.log(browser.currentPage.url());
return false;
}
case 'title': {
const title = await browser.currentPage.title();
console.log(title);
return false;
}
case 'reload':
case 'refresh': {
await browser.currentPage.reload({ timeout: 10000 }).catch(() => {});
console.log(chalk.green('Page reloaded'));
return false;
}
case 'wait': {
const ms = Number.parseInt(args[0] || '1000', 10);
console.log(chalk.dim(`Waiting ${ms}ms...`));
await new Promise((resolve) => setTimeout(resolve, ms));
return false;
}
case 'help': {
printHelp();
return false;
}
case 'quit':
case 'exit':
case 'q': {
return true;
}
default: {
console.log(chalk.yellow(`Unknown command: ${command}`));
console.log(chalk.dim('Type "help" for available commands.'));
return false;
}
}
}
function printHelp(): void {
console.log(chalk.bold('Available commands:'));
console.log('');
const commands = [
['open ', 'Navigate to a URL'],
['click ', 'Click an element'],
['type ', 'Type text into an element'],
['eval ', 'Run JavaScript in the browser'],
['extract', 'Extract page content as markdown'],
['screenshot [path]', 'Take a screenshot'],
['state', 'Show current browser state'],
['back', 'Navigate back'],
['forward', 'Navigate forward'],
['tabs', 'List open tabs'],
['url', 'Show current URL'],
['title', 'Show current page title'],
['reload', 'Reload the current page'],
['wait [ms]', 'Wait for the specified time'],
['help', 'Show this help message'],
['quit', 'Exit the interactive session'],
];
for (const [cmd, desc] of commands) {
console.log(` ${chalk.cyan(cmd.padEnd(25))} ${desc}`);
}
}
================================================
FILE: packages/cli/src/commands/open.ts
================================================
import type { Command } from 'commander';
import chalk from 'chalk';
import { sessionManager } from '../globals.js';
export function registerOpenCommand(program: Command): void {
program
.command('open')
.description('Open a URL in the browser')
.argument('', 'URL to navigate to')
.option('--headless', 'Run in headless mode', false)
.option('-s, --session ', 'Reuse an existing session')
.action(async (url: string, options: { headless: boolean; session?: string }) => {
try {
let sessionId = options.session;
if (sessionId) {
const browser = sessionManager.get(sessionId);
if (!browser) {
console.error(chalk.red(`Session "${sessionId}" not found.`));
process.exit(1);
}
await browser.navigate(url);
} else {
// Try to reuse the default session, or create a new one
sessionId = sessionManager.getDefaultId();
if (!sessionId) {
sessionId = await sessionManager.create({
headless: options.headless,
});
}
const browser = sessionManager.get(sessionId)!;
await browser.navigate(url);
}
const browser = sessionManager.get(sessionId)!;
const finalUrl = browser.currentPage.url();
console.log(chalk.green('Session:'), sessionId);
console.log(chalk.green('URL:'), finalUrl);
} catch (error) {
console.error(chalk.red('Failed to open URL:'), error instanceof Error ? error.message : String(error));
process.exit(1);
}
});
}
================================================
FILE: packages/cli/src/commands/run.ts
================================================
import type { Command } from 'commander';
import chalk from 'chalk';
import {
Agent,
Viewport,
VercelModelAdapter,
type LanguageModel,
type CommandResult,
type StepRecord,
} from 'open-browser';
import {
Spinner,
displayStep,
displayTotalCost,
displayResult,
displayHeader,
displaySeparator,
displayError,
} from '../display.js';
interface RunOptions {
model: string;
provider: string;
headless: boolean;
stepLimit: number;
verbose: boolean;
noCost: boolean;
}
/**
* Dynamically import and create a Vercel AI SDK language model
* based on the provider and model ID strings.
*/
async function createModel(provider: string, modelId: string): Promise {
let languageModel: import('ai').LanguageModelV1;
switch (provider) {
case 'openai': {
const { createOpenAI } = await import('@ai-sdk/openai');
const openai = createOpenAI({});
languageModel = openai(modelId);
break;
}
case 'anthropic': {
const { createAnthropic } = await import('@ai-sdk/anthropic');
const anthropic = createAnthropic({});
languageModel = anthropic(modelId);
break;
}
case 'google': {
const { createGoogleGenerativeAI } = await import('@ai-sdk/google');
const google = createGoogleGenerativeAI({});
languageModel = google(modelId);
break;
}
default:
throw new Error(
`Unsupported provider: ${provider}. ` +
'Supported: openai, anthropic, google',
);
}
return new VercelModelAdapter({ model: languageModel });
}
export function registerRunCommand(program: Command): void {
program
.command('run')
.description('Run an AI agent to complete a browser task')
.argument('', 'Description of the task for the agent to complete')
.option('-m, --model ', 'Model ID to use', 'gpt-4o')
.option('-p, --provider ', 'LLM provider (openai, anthropic, google)', 'openai')
.option('--headless', 'Run browser in headless mode', true)
.option('--no-headless', 'Show the browser window')
.option('--max-steps ', 'Maximum number of agent steps', '25')
.option('-v, --verbose', 'Show detailed step information', false)
.option('--no-cost', 'Hide cost tracking information')
.action(async (task: string, options: RunOptions) => {
const stepLimit = Number.parseInt(String(options.stepLimit), 10);
displayHeader(`Agent Task: ${task}`);
console.log(
`${chalk.dim('model:')} ${options.model} ` +
`${chalk.dim('provider:')} ${options.provider} ` +
`${chalk.dim('max steps:')} ${stepLimit}`,
);
displaySeparator();
const spinner = new Spinner('Starting browser...');
spinner.start();
let browser: Viewport | null = null;
try {
// Initialize the LLM
spinner.update('Loading model...');
const model = await createModel(options.provider, options.model);
// Initialize the browser
spinner.update('Starting browser...');
browser = new Viewport({
headless: options.headless,
});
await browser.start();
spinner.update('Browser ready, starting agent...');
// Track per-step timing
const stepTimings = new Map();
let currentStepStart = 0;
// Create the agent
const agent = new Agent({
task,
model,
browser,
settings: {
stepLimit,
},
onStepStart: (step) => {
currentStepStart = Date.now();
stepTimings.set(step, currentStepStart);
spinner.update(`Step ${step}: thinking...`);
},
onStepEnd: (step, results) => {
const durationMs = Date.now() - (stepTimings.get(step) ?? currentStepStart);
spinner.stop();
// Display each action result for this step
for (const result of results) {
displayStep({
step,
action: extractActionName(result),
target: extractActionTarget(result),
durationMs,
success: result.success,
error: result.error,
extractedContent: result.extractedContent,
});
}
if (options.verbose) {
displaySeparator();
}
// Restart spinner for next step
spinner.start();
spinner.update(`Step ${step + 1}: thinking...`);
},
});
spinner.update('Agent running...');
// Execute the agent
const result = await agent.run();
spinner.stop();
// Display result
displayResult(result.success, result.finalResult);
// Display cost summary
if (!options.noCost && result.totalCost) {
displayTotalCost({
steps: result.history.entries.length,
inputTokens: result.totalCost.totalInputTokens,
outputTokens: result.totalCost.totalOutputTokens,
totalCost: result.totalCost.totalCost,
durationMs: computeTotalDuration(result.history.entries),
});
} else if (!options.noCost) {
// Show basic timing even without cost data
const totalMs = computeTotalDuration(result.history.entries);
console.log('');
console.log(
chalk.dim(
`Completed in ${result.history.entries.length} step(s), ` +
`${(totalMs / 1000).toFixed(1)}s`,
),
);
}
// Display errors if any
if (result.errors.length > 0) {
console.log('');
console.log(chalk.bold.yellow('Errors encountered:'));
for (const err of result.errors) {
console.log(` ${chalk.red('-')} ${err}`);
}
}
// Exit with appropriate code
process.exit(result.success ? 0 : 1);
} catch (error) {
spinner.stop();
displayError(
error instanceof Error ? error.message : String(error),
);
process.exit(1);
} finally {
if (browser) {
await browser.close().catch(() => {});
}
}
});
}
// ── Helpers ──
function extractActionName(result: CommandResult): string {
if (result.isDone) return 'done';
if (result.extractedContent) return 'extract';
return result.success ? 'action' : 'failed_action';
}
function extractActionTarget(result: CommandResult): string | undefined {
if (result.extractedContent) {
return result.extractedContent.slice(0, 80);
}
return undefined;
}
function computeTotalDuration(entries: StepRecord[]): number {
return entries.reduce((sum, e) => sum + e.duration, 0);
}
================================================
FILE: packages/cli/src/commands/screenshot.ts
================================================
import type { Command } from 'commander';
import chalk from 'chalk';
import * as fs from 'node:fs';
import * as path from 'node:path';
import { sessionManager } from '../globals.js';
export function registerScreenshotCommand(program: Command): void {
program
.command('screenshot')
.description('Take a screenshot of the current page')
.argument('[output]', 'Output file path', 'screenshot.png')
.option('-s, --session ', 'Session ID to use')
.option('--full-page', 'Capture the full page', false)
.action(async (output: string, options: { session?: string; fullPage: boolean }) => {
try {
const browser = options.session
? sessionManager.get(options.session)
: sessionManager.getDefault();
if (!browser) {
console.error(chalk.red('No active session. Use "open" command first.'));
process.exit(1);
}
const result = await browser.screenshot(options.fullPage);
const buffer = Buffer.from(result.base64, 'base64');
const outputPath = path.resolve(output);
fs.writeFileSync(outputPath, buffer);
console.log(chalk.green('Screenshot saved:'), outputPath);
console.log(chalk.green('Dimensions:'), `${result.width}x${result.height}`);
} catch (error) {
console.error(chalk.red('Failed to take screenshot:'), error instanceof Error ? error.message : String(error));
process.exit(1);
}
});
}
================================================
FILE: packages/cli/src/commands/sessions.ts
================================================
import type { Command } from 'commander';
import chalk from 'chalk';
import { sessionManager } from '../globals.js';
export function registerSessionsCommand(program: Command): void {
program
.command('sessions')
.description('List all active browser sessions')
.action(() => {
try {
const sessions = sessionManager.list();
if (sessions.length === 0) {
console.log(chalk.yellow('No active sessions.'));
return;
}
console.log(chalk.bold(`Active Sessions (${sessions.length}):`));
for (const session of sessions) {
const created = new Date(session.createdAt).toLocaleTimeString();
const accessed = new Date(session.lastAccessedAt).toLocaleTimeString();
console.log(` ${chalk.cyan(session.id)} created ${created} last used ${accessed}`);
}
} catch (error) {
console.error(chalk.red('Failed to list sessions:'), error instanceof Error ? error.message : String(error));
process.exit(1);
}
});
program
.command('sessions:close')
.description('Close a specific session or all sessions')
.argument('[id]', 'Session ID to close (omit to close all)')
.action(async (id?: string) => {
try {
if (id) {
const closed = await sessionManager.close(id);
if (closed) {
console.log(chalk.green('Closed session:'), id);
} else {
console.error(chalk.red(`Session "${id}" not found.`));
process.exit(1);
}
} else {
const count = sessionManager.activeCount;
await sessionManager.closeAll();
console.log(chalk.green(`Closed ${count} session(s).`));
}
} catch (error) {
console.error(chalk.red('Failed to close session:'), error instanceof Error ? error.message : String(error));
process.exit(1);
}
});
}
================================================
FILE: packages/cli/src/commands/state.ts
================================================
import type { Command } from 'commander';
import chalk from 'chalk';
import { sessionManager } from '../globals.js';
export function registerStateCommand(program: Command): void {
program
.command('state')
.description('Print the current browser state (URL, title, tabs)')
.option('-s, --session ', 'Session ID to use')
.action(async (options: { session?: string }) => {
try {
const browser = options.session
? sessionManager.get(options.session)
: sessionManager.getDefault();
if (!browser) {
console.error(chalk.red('No active session. Use "open" command first.'));
process.exit(1);
}
const state = await browser.getState();
console.log(chalk.bold('Browser State'));
console.log(chalk.green('URL:'), state.url);
console.log(chalk.green('Title:'), state.title);
console.log(chalk.green('Tabs:'), state.tabs.length);
for (const tab of state.tabs) {
const marker = tab.isActive ? chalk.cyan('→') : ' ';
console.log(` ${marker} [${tab.tabId}] ${tab.title || '(untitled)'} - ${tab.url}`);
}
} catch (error) {
console.error(chalk.red('Failed to get state:'), error instanceof Error ? error.message : String(error));
process.exit(1);
}
});
}
================================================
FILE: packages/cli/src/commands/type.ts
================================================
import type { Command } from 'commander';
import chalk from 'chalk';
import { sessionManager } from '../globals.js';
export function registerTypeCommand(program: Command): void {
program
.command('type')
.description('Type text into an element matching the given CSS selector')
.argument('', 'CSS selector of the input element')
.argument('', 'Text to type into the element')
.option('-s, --session ', 'Session ID to use')
.action(async (selector: string, text: string, options: { session?: string }) => {
try {
const browser = options.session
? sessionManager.get(options.session)
: sessionManager.getDefault();
if (!browser) {
console.error(chalk.red('No active session. Use "open" command first.'));
process.exit(1);
}
await browser.type(selector, text);
console.log(chalk.green('Typed into:'), selector);
} catch (error) {
console.error(chalk.red('Failed to type:'), error instanceof Error ? error.message : String(error));
process.exit(1);
}
});
}
================================================
FILE: packages/cli/src/display.ts
================================================
import chalk from 'chalk';
// ── Spinner ──
const SPINNER_FRAMES = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
export class Spinner {
private intervalId: ReturnType | null = null;
private frameIndex = 0;
private message: string;
constructor(message: string) {
this.message = message;
}
start(): void {
if (this.intervalId) return;
this.frameIndex = 0;
this.intervalId = setInterval(() => {
const frame = SPINNER_FRAMES[this.frameIndex % SPINNER_FRAMES.length];
process.stdout.write(`\r${chalk.cyan(frame)} ${this.message}`);
this.frameIndex++;
}, 80);
}
update(message: string): void {
this.message = message;
}
stop(finalMessage?: string): void {
if (this.intervalId) {
clearInterval(this.intervalId);
this.intervalId = null;
}
// Clear the spinner line
process.stdout.write('\r\x1b[K');
if (finalMessage) {
console.log(finalMessage);
}
}
}
// ── Step Display ──
export interface StepDisplayInfo {
step: number;
action: string;
target?: string;
durationMs: number;
success: boolean;
error?: string;
extractedContent?: string;
}
/**
* Format and display a single agent step with its result.
*/
export function displayStep(info: StepDisplayInfo): void {
const stepLabel = chalk.bold.white(`Step ${info.step}`);
const actionLabel = chalk.yellow(info.action);
const durationLabel = chalk.dim(`${info.durationMs}ms`);
const statusIcon = info.success ? chalk.green('✓') : chalk.red('✗');
console.log(`${stepLabel} ${statusIcon} ${actionLabel} ${durationLabel}`);
if (info.target) {
console.log(` ${chalk.dim('target:')} ${info.target}`);
}
if (info.error) {
console.log(` ${chalk.red('error:')} ${info.error}`);
}
if (info.extractedContent) {
const preview = info.extractedContent.length > 120
? `${info.extractedContent.slice(0, 120)}...`
: info.extractedContent;
console.log(` ${chalk.dim('output:')} ${preview}`);
}
}
// ── Cost Display ──
export interface CostDisplayInfo {
inputTokens: number;
outputTokens: number;
totalCost: number;
}
/**
* Display token usage and cost for a single step.
*/
export function displayStepCost(info: CostDisplayInfo): void {
const tokens = chalk.dim(
`tokens: ${info.inputTokens.toLocaleString()} in / ${info.outputTokens.toLocaleString()} out`,
);
const cost = chalk.dim(`cost: $${info.totalCost.toFixed(4)}`);
console.log(` ${tokens} ${cost}`);
}
/**
* Display a summary of total cost and token usage.
*/
export function displayTotalCost(info: CostDisplayInfo & { steps: number; durationMs: number }): void {
console.log('');
console.log(chalk.bold('Summary'));
console.log(chalk.dim('─'.repeat(50)));
console.log(` ${chalk.white('Steps:')} ${info.steps}`);
console.log(` ${chalk.white('Duration:')} ${(info.durationMs / 1000).toFixed(1)}s`);
console.log(` ${chalk.white('Input tokens:')} ${info.inputTokens.toLocaleString()}`);
console.log(` ${chalk.white('Output tokens:')} ${info.outputTokens.toLocaleString()}`);
console.log(` ${chalk.white('Total tokens:')} ${(info.inputTokens + info.outputTokens).toLocaleString()}`);
console.log(` ${chalk.white('Total cost:')} $${info.totalCost.toFixed(4)}`);
console.log(chalk.dim('─'.repeat(50)));
}
// ── Progress Bar ──
export function displayProgressBar(current: number, total: number, width = 30): void {
const ratio = Math.min(current / total, 1);
const filled = Math.round(ratio * width);
const empty = width - filled;
const bar = chalk.green('█'.repeat(filled)) + chalk.dim('░'.repeat(empty));
const pct = (ratio * 100).toFixed(0).padStart(3);
process.stdout.write(`\r [${bar}] ${pct}% (${current}/${total})`);
}
// ── Result Display ──
export function displayResult(success: boolean, output?: string): void {
console.log('');
if (success) {
console.log(chalk.bold.green('Task completed successfully'));
} else {
console.log(chalk.bold.red('Task failed'));
}
if (output) {
console.log('');
console.log(chalk.bold('Result:'));
console.log(output);
}
}
// ── Helpers ──
export function displayError(message: string): void {
console.error(chalk.red('Error:'), message);
}
export function displayWarning(message: string): void {
console.warn(chalk.yellow('Warning:'), message);
}
export function displayInfo(message: string): void {
console.log(chalk.blue('Info:'), message);
}
export function displaySeparator(): void {
console.log(chalk.dim('─'.repeat(60)));
}
export function displayHeader(title: string): void {
console.log('');
console.log(chalk.bold.white(title));
console.log(chalk.dim('═'.repeat(60)));
}
================================================
FILE: packages/cli/src/globals.ts
================================================
import { SessionManager } from './sessions.js';
export const sessionManager = new SessionManager();
================================================
FILE: packages/cli/src/index.ts
================================================
#!/usr/bin/env bun
import { Command } from 'commander';
import { registerOpenCommand } from './commands/open.js';
import { registerClickCommand } from './commands/click.js';
import { registerTypeCommand } from './commands/type.js';
import { registerStateCommand } from './commands/state.js';
import { registerScreenshotCommand } from './commands/screenshot.js';
import { registerEvalCommand } from './commands/eval.js';
import { registerExtractCommand } from './commands/extract.js';
import { registerSessionsCommand } from './commands/sessions.js';
import { registerRunCommand } from './commands/run.js';
import { registerInteractiveCommand } from './commands/interactive.js';
const program = new Command();
program
.name('open-browser')
.description('AI-powered autonomous web browsing CLI')
.version('0.1.0');
// ── Browser manipulation commands ──
registerOpenCommand(program);
registerClickCommand(program);
registerTypeCommand(program);
registerStateCommand(program);
registerScreenshotCommand(program);
registerEvalCommand(program);
registerExtractCommand(program);
registerSessionsCommand(program);
// ── Agent and interactive commands ──
registerRunCommand(program);
registerInteractiveCommand(program);
program.parse();
================================================
FILE: packages/cli/src/protocol.ts
================================================
export interface CLIRequest {
id: string;
command: string;
args: Record;
}
export interface CLIResponse {
id: string;
success: boolean;
data?: unknown;
error?: string;
}
export function serializeRequest(req: CLIRequest): string {
return JSON.stringify(req) + '\n';
}
export function parseRequest(data: string): CLIRequest | null {
try {
return JSON.parse(data.trim()) as CLIRequest;
} catch {
return null;
}
}
export function serializeResponse(res: CLIResponse): string {
return JSON.stringify(res) + '\n';
}
export function parseResponse(data: string): CLIResponse | null {
try {
return JSON.parse(data.trim()) as CLIResponse;
} catch {
return null;
}
}
================================================
FILE: packages/cli/src/server.ts
================================================
import * as net from 'node:net';
import * as fs from 'node:fs';
import * as path from 'node:path';
import * as os from 'node:os';
import { SessionManager } from './sessions.js';
import { type CLIRequest, type CLIResponse, parseRequest, serializeResponse } from './protocol.js';
const SOCKET_DIR = path.join(os.tmpdir(), 'open-browser');
const SOCKET_PATH = path.join(SOCKET_DIR, 'server.sock');
export class CLIServer {
private server: net.Server | null = null;
readonly sessions: SessionManager;
constructor() {
this.sessions = new SessionManager();
}
async start(): Promise {
if (!fs.existsSync(SOCKET_DIR)) {
fs.mkdirSync(SOCKET_DIR, { recursive: true });
}
// Clean up stale socket
if (fs.existsSync(SOCKET_PATH)) {
fs.unlinkSync(SOCKET_PATH);
}
return new Promise((resolve, reject) => {
this.server = net.createServer((socket) => {
let buffer = '';
socket.on('data', async (data) => {
buffer += data.toString();
const lines = buffer.split('\n');
buffer = lines.pop() ?? '';
for (const line of lines) {
if (!line.trim()) continue;
const request = parseRequest(line);
if (request) {
const response = await this.handleRequest(request);
socket.write(serializeResponse(response));
}
}
});
socket.on('error', () => {
// Client disconnected
});
});
this.server.on('error', reject);
this.server.listen(SOCKET_PATH, () => {
resolve(SOCKET_PATH);
});
});
}
private async handleRequest(request: CLIRequest): Promise {
try {
switch (request.command) {
case 'open': {
const url = request.args.url as string;
let sessionId = request.args.session as string | undefined;
if (!sessionId) {
sessionId = this.sessions.getDefaultId();
}
if (!sessionId) {
sessionId = await this.sessions.create({
headless: request.args.headless as boolean | undefined,
});
}
const browser = this.sessions.get(sessionId)!;
await browser.navigate(url);
return {
id: request.id,
success: true,
data: { sessionId, url: browser.currentPage.url() },
};
}
case 'tap': {
const browser = this.getSessionBrowser(request);
const selector = request.args.selector as string;
await browser.click(selector);
return { id: request.id, success: true };
}
case 'type': {
const browser = this.getSessionBrowser(request);
const selector = request.args.selector as string;
const text = request.args.text as string;
await browser.type(selector, text);
return { id: request.id, success: true };
}
case 'state': {
const browser = this.getSessionBrowser(request);
const state = await browser.getState();
return { id: request.id, success: true, data: state };
}
case 'capture': {
const browser = this.getSessionBrowser(request);
const result = await browser.screenshot(request.args.fullPage as boolean);
return { id: request.id, success: true, data: result };
}
case 'eval': {
const browser = this.getSessionBrowser(request);
const expression = request.args.expression as string;
const result = await browser.evaluate(expression);
return { id: request.id, success: true, data: result };
}
case 'sessions': {
return {
id: request.id,
success: true,
data: this.sessions.list(),
};
}
case 'close': {
const sessionId = request.args.session as string | undefined;
if (sessionId) {
await this.sessions.close(sessionId);
} else {
await this.sessions.closeAll();
}
return { id: request.id, success: true };
}
default:
return {
id: request.id,
success: false,
error: `Unknown command: ${request.command}`,
};
}
} catch (error) {
return {
id: request.id,
success: false,
error: error instanceof Error ? error.message : String(error),
};
}
}
private getSessionBrowser(request: CLIRequest) {
const sessionId = request.args.session as string | undefined;
const browser = sessionId
? this.sessions.get(sessionId)
: this.sessions.getDefault();
if (!browser) {
throw new Error('No active session. Use "open" command first.');
}
return browser;
}
async stop(): Promise {
await this.sessions.closeAll();
if (this.server) {
return new Promise((resolve) => {
this.server!.close(() => {
if (fs.existsSync(SOCKET_PATH)) {
fs.unlinkSync(SOCKET_PATH);
}
resolve();
});
});
}
}
static get socketPath(): string {
return SOCKET_PATH;
}
}
================================================
FILE: packages/cli/src/sessions.ts
================================================
import { Viewport, type ViewportOptions } from 'open-browser';
import { nanoid } from 'nanoid';
interface ManagedSession {
id: string;
browser: Viewport;
createdAt: number;
lastAccessedAt: number;
}
export class SessionManager {
private sessions = new Map();
async create(options?: ViewportOptions): Promise {
const id = nanoid(8);
const browser = new Viewport(options);
await browser.start();
this.sessions.set(id, {
id,
browser,
createdAt: Date.now(),
lastAccessedAt: Date.now(),
});
return id;
}
get(id: string): Viewport | undefined {
const session = this.sessions.get(id);
if (session) {
session.lastAccessedAt = Date.now();
return session.browser;
}
return undefined;
}
async close(id: string): Promise {
const session = this.sessions.get(id);
if (!session) return false;
await session.browser.close();
this.sessions.delete(id);
return true;
}
async closeAll(): Promise {
for (const session of this.sessions.values()) {
await session.browser.close();
}
this.sessions.clear();
}
list(): Array<{ id: string; createdAt: number; lastAccessedAt: number }> {
return [...this.sessions.values()].map((s) => ({
id: s.id,
createdAt: s.createdAt,
lastAccessedAt: s.lastAccessedAt,
}));
}
get activeCount(): number {
return this.sessions.size;
}
getDefault(): Viewport | undefined {
const first = this.sessions.values().next();
if (first.done) return undefined;
first.value.lastAccessedAt = Date.now();
return first.value.browser;
}
getDefaultId(): string | undefined {
const first = this.sessions.keys().next();
return first.done ? undefined : first.value;
}
}
================================================
FILE: packages/cli/tsconfig.json
================================================
{
"extends": "../../tsconfig.base.json",
"compilerOptions": {
"rootDir": "src",
"outDir": "dist"
},
"include": ["src/**/*.ts"]
}
================================================
FILE: packages/core/package.json
================================================
{
"name": "open-browser",
"version": "1.1.0",
"description": "AI-powered autonomous web browsing library for TypeScript",
"type": "module",
"main": "src/index.ts",
"types": "src/index.ts",
"exports": {
".": "./src/index.ts"
},
"scripts": {
"build": "tsc --noEmit",
"test": "bun test",
"lint": "biome check src/"
},
"dependencies": {
"ai": "^4.2.0",
"@ai-sdk/openai": "^1.1.0",
"@ai-sdk/anthropic": "^1.1.0",
"@ai-sdk/google": "^1.1.0",
"zod": "^3.24.0",
"playwright": "^1.51.0",
"mitt": "^3.0.2",
"nanoid": "^5.1.0",
"turndown": "^7.2.1",
"dotenv": "^16.5.0"
},
"devDependencies": {
"@types/turndown": "^5.0.5"
},
"peerDependencies": {
"sharp": ">=0.33.0"
},
"peerDependenciesMeta": {
"sharp": {
"optional": true
}
},
"license": "MIT"
}
================================================
FILE: packages/core/src/agent/agent.test.ts
================================================
import { test, expect, describe, beforeEach, mock } from 'bun:test';
import { Agent, type AgentOptions } from '../agent/agent.js';
import type { PageAnalyzer } from '../page/page-analyzer.js';
// ── Mock PageAnalyzer factory (injected via AgentOptions.domService) ──
const mockExtractState = mock(async () => ({
tree: '[1]
',
selectorMap: { 1: 'button' },
elementCount: 10,
interactiveElementCount: 1,
scrollPosition: { x: 0, y: 0 },
viewportSize: { width: 1280, height: 1100 },
documentSize: { width: 1280, height: 2000 },
pixelsAbove: 0,
pixelsBelow: 900,
}));
function createMockPageAnalyzer(): PageAnalyzer {
return {
extractState: mockExtractState,
clickElementByIndex: mock(async () => {}),
getCachedTree: mock(() => null),
getCachedSelectorMap: mock(() => null),
clearCache: mock(() => {}),
getInteractedElements: mock(() => []),
clearInteractedElements: mock(() => {}),
getElementSelector: mock(async () => undefined),
getElementByBackendNodeId: mock(async () => null),
clickAtCoordinates: mock(async () => {}),
inputTextByIndex: mock(async () => {}),
extractWithIframes: mock(async () => ({ mainTree: null, iframeTrees: [] })),
} as unknown as PageAnalyzer;
}
import type { RunOutcome } from './types.js';
import type { LanguageModel, InferenceOptions } from '../model/interface.js';
import type { InferenceResult, InferenceUsage } from '../model/types.js';
import type { Viewport } from '../viewport/viewport.js';
import type { ViewportSnapshot } from '../viewport/types.js';
import type { CommandExecutor } from '../commands/executor.js';
import type { Command, CommandResult, ExecutionContext } from '../commands/types.js';
import type { CommandCatalog } from '../commands/catalog/catalog.js';
// ── Mock Factories ──
function createMockUsage(input = 100, output = 50): InferenceUsage {
return { inputTokens: input, outputTokens: output, totalTokens: input + output };
}
function createMockModel(options?: {
responses?: Array<{
currentState: { evaluation: string; memory: string; nextGoal: string };
actions: Command[];
}>;
modelId?: string;
}): LanguageModel {
let callCount = 0;
const responses = options?.responses ?? [
{
currentState: {
evaluation: 'Page loaded',
memory: '',
nextGoal: 'Click element',
},
actions: [{ action: 'tap', index: 1, clickCount: 1 } as Command],
},
];
return {
modelId: options?.modelId ?? 'test-model',
provider: 'custom',
invoke: async (_options: InferenceOptions): Promise> => {
const responseIndex = Math.min(callCount, responses.length - 1);
callCount++;
return {
parsed: responses[responseIndex] as unknown as T,
usage: createMockUsage(),
finishReason: 'stop',
};
},
};
}
function createDoneOnStepModel(doneOnStep: number, result = 'Task completed'): LanguageModel {
const responses: Array<{
currentState: { evaluation: string; memory: string; nextGoal: string };
actions: Command[];
}> = [];
for (let i = 1; i < doneOnStep; i++) {
responses.push({
currentState: {
evaluation: `Step ${i} assessment`,
memory: '',
nextGoal: `Goal for step ${i + 1}`,
},
actions: [{ action: 'tap', index: i, clickCount: 1 } as Command],
});
}
responses.push({
currentState: {
evaluation: 'Task done',
memory: '',
nextGoal: 'Report result',
},
actions: [{ action: 'finish', text: result, success: true } as Command],
});
return createMockModel({ responses });
}
function createMockBrowserState(): ViewportSnapshot {
return {
url: 'https://example.com',
title: 'Example Page',
tabs: [
{ tabId: 0 as any, url: 'https://example.com', title: 'Example Page', isActive: true },
],
activeTabIndex: 0,
};
}
function createMockRegistry(): CommandCatalog{
return {
register: mock(() => {}),
get: mock(() => undefined),
getAll: mock(() => []),
getActionDescriptions: mock(() => 'click: Click on an element'),
getPromptDescription: mock(() => 'click: Click on an element by its index\ngo_to_url: Navigate to a URL'),
has: mock(() => false),
} as unknown as CommandCatalog;
}
function createMockTools(actionResults?: CommandResult[]): CommandExecutor {
const defaultResults: CommandResult[] = [{ success: true }];
return {
registry: createMockRegistry(),
commandsPerStep: 10,
setCoordinateClicking: mock(() => {}),
executeActions: mock(async (_actions: Command[], _ctx: ExecutionContext) => {
return actionResults ?? defaultResults;
}),
executeAction: mock(async (_action: Command, _ctx: ExecutionContext) => {
return (actionResults ?? defaultResults)[0];
}),
} as unknown as CommandExecutor;
}
function createMockBrowser(overrides?: {
browserState?: ViewportSnapshot;
isConnected?: boolean;
}): Viewport {
const state = overrides?.browserState ?? createMockBrowserState();
return {
isConnected: overrides?.isConnected ?? true,
start: mock(async () => {}),
getState: mock(async () => state),
screenshot: mock(async () => ({ base64: 'fake_screenshot', width: 1280, height: 1100 })),
navigate: mock(async () => {}),
currentPage: {
viewportSize: () => ({ width: 1280, height: 1100 }),
evaluate: mock(async () => ({})),
} as any,
cdp: {
send: mock(async () => ({})),
} as any,
} as unknown as Viewport;
}
function createDefaultAgentOptions(overrides?: Partial): AgentOptions {
return {
task: 'Find the price of the product',
model: createDoneOnStepModel(2),
browser: createMockBrowser(),
tools: createMockTools([{ success: true, isDone: false }]),
domService: createMockPageAnalyzer(),
settings: {
stepLimit: 5,
enableScreenshots: false,
commandDelayMs: 0,
retryDelay: 0,
autoNavigateToUrls: false,
contextWindowSize: 50000,
},
...overrides,
};
}
// ── Tests ──
describe('Agent', () => {
describe('constructor', () => {
test('creates agent with default settings merged', () => {
const agent = new Agent(createDefaultAgentOptions());
const state = agent.getState();
expect(state.step).toBe(0);
expect(state.isRunning).toBe(false);
expect(state.isDone).toBe(false);
expect(state.failureCount).toBe(0);
expect(state.consecutiveFailures).toBe(0);
});
test('overrides default settings with provided values', () => {
const agent = new Agent(
createDefaultAgentOptions({
settings: {
stepLimit: 50,
enableScreenshots: false,
commandDelayMs: 0,
retryDelay: 0,
autoNavigateToUrls: false,
contextWindowSize: 50000,
},
}),
);
const state = agent.getState();
expect(state.stepLimit).toBe(50);
});
test('initializes cost tracking to zero', () => {
const agent = new Agent(createDefaultAgentOptions());
const cost = agent.getAccumulatedCost();
expect(cost.totalCost).toBe(0);
expect(cost.totalInputTokens).toBe(0);
expect(cost.totalOutputTokens).toBe(0);
});
test('initializes empty history', () => {
const agent = new Agent(createDefaultAgentOptions());
const history = agent.getHistory();
expect(history.entries).toHaveLength(0);
expect(history.task).toBe('Find the price of the product');
});
test('uses custom tools when provided', () => {
const customTools = createMockTools();
const agent = new Agent(createDefaultAgentOptions({ tools: customTools }));
expect(agent).toBeDefined();
});
});
describe('run() basic flow', () => {
test('completes when done action is returned', async () => {
const doneModel = createDoneOnStepModel(1, 'The price is $42');
const tools = createMockTools([
{ success: true, isDone: true, extractedContent: 'The price is $42' },
]);
const agent = new Agent(
createDefaultAgentOptions({ model: doneModel, tools }),
);
const result = await agent.run();
expect(result.finalResult).toBe('The price is $42');
expect(result.success).toBe(true);
expect(result.errors).toHaveLength(0);
});
test('sets isRunning to false after completion', async () => {
const doneModel = createDoneOnStepModel(1, 'Done');
const tools = createMockTools([
{ success: true, isDone: true, extractedContent: 'Done' },
]);
const agent = new Agent(
createDefaultAgentOptions({ model: doneModel, tools }),
);
await agent.run();
const state = agent.getState();
expect(state.isRunning).toBe(false);
});
test('calls onStepStart callback', async () => {
const stepStarts: number[] = [];
const doneModel = createDoneOnStepModel(2, 'Result');
let callCount = 0;
const tools = createMockTools();
(tools.executeActions as any) = mock(async () => {
callCount++;
if (callCount >= 2) {
return [{ success: true, isDone: true, extractedContent: 'Result' }];
}
return [{ success: true }];
});
const agent = new Agent(
createDefaultAgentOptions({
model: doneModel,
tools,
onStepStart: (step) => stepStarts.push(step),
}),
);
await agent.run();
expect(stepStarts.length).toBeGreaterThan(0);
expect(stepStarts[0]).toBe(1);
});
test('calls onDone callback with result', async () => {
let doneResult: RunOutcome | undefined;
const doneModel = createDoneOnStepModel(1, 'Final answer');
const tools = createMockTools([
{ success: true, isDone: true, extractedContent: 'Final answer' },
]);
const agent = new Agent(
createDefaultAgentOptions({
model: doneModel,
tools,
onDone: (r) => { doneResult = r; },
}),
);
await agent.run();
expect(doneResult).toBeDefined();
expect(doneResult!.finalResult).toBe('Final answer');
});
test('starts browser if not connected', async () => {
const browser = createMockBrowser({ isConnected: false });
const doneModel = createDoneOnStepModel(1, 'Result');
const tools = createMockTools([
{ success: true, isDone: true, extractedContent: 'Result' },
]);
const agent = new Agent(
createDefaultAgentOptions({ browser, model: doneModel, tools }),
);
await agent.run();
expect(browser.start).toHaveBeenCalled();
});
});
describe('step execution', () => {
test('invokes browser.getState() on each step', async () => {
const browser = createMockBrowser();
const doneModel = createDoneOnStepModel(1, 'Done');
const tools = createMockTools([
{ success: true, isDone: true, extractedContent: 'Done' },
]);
const agent = new Agent(
createDefaultAgentOptions({ browser, model: doneModel, tools }),
);
await agent.run();
expect(browser.getState).toHaveBeenCalled();
});
test('invokes PageAnalyzer.extractState on each step', async () => {
const doneModel = createDoneOnStepModel(1, 'Done');
const tools = createMockTools([
{ success: true, isDone: true, extractedContent: 'Done' },
]);
mockExtractState.mockClear();
const agent = new Agent(
createDefaultAgentOptions({ model: doneModel, tools }),
);
await agent.run();
expect(mockExtractState).toHaveBeenCalled();
});
test('records history entries for each step', async () => {
let callCount = 0;
const tools = createMockTools();
(tools.executeActions as any) = mock(async () => {
callCount++;
if (callCount >= 3) {
return [{ success: true, isDone: true, extractedContent: 'Done' }];
}
return [{ success: true }];
});
const model = createDoneOnStepModel(3, 'Done');
const agent = new Agent(
createDefaultAgentOptions({ model, tools }),
);
await agent.run();
const history = agent.getHistory();
expect(history.entries.length).toBeGreaterThanOrEqual(1);
});
test('token usage is tracked across steps', async () => {
let callCount = 0;
const tools = createMockTools();
(tools.executeActions as any) = mock(async () => {
callCount++;
if (callCount >= 2) {
return [{ success: true, isDone: true, extractedContent: 'Done' }];
}
return [{ success: true }];
});
const model = createDoneOnStepModel(2, 'Done');
const agent = new Agent(
createDefaultAgentOptions({ model, tools }),
);
await agent.run();
const state = agent.getState();
expect(state.totalInputTokens).toBeGreaterThan(0);
expect(state.totalOutputTokens).toBeGreaterThan(0);
});
});
describe('failure recovery', () => {
test('consecutive failures increment failure count', async () => {
let callCount = 0;
const errorModel: LanguageModel = {
modelId: 'test-model',
provider: 'custom',
invoke: async (): Promise> => {
callCount++;
throw new Error(`Simulated error ${callCount}`);
},
};
const agent = new Agent(
createDefaultAgentOptions({
model: errorModel,
settings: {
stepLimit: 10,
failureThreshold: 3,
retryDelay: 0,
enableScreenshots: false,
commandDelayMs: 0,
autoNavigateToUrls: false,
contextWindowSize: 50000,
},
}),
);
const result = await agent.run();
expect(result.errors.length).toBeGreaterThan(0);
});
test('agent records error about consecutive failures after failureThreshold', async () => {
let callCount = 0;
const errorModel: LanguageModel = {
modelId: 'test-model',
provider: 'custom',
invoke: async (): Promise> => {
callCount++;
throw new Error(`Error ${callCount}`);
},
};
const agent = new Agent(
createDefaultAgentOptions({
model: errorModel,
settings: {
stepLimit: 20,
failureThreshold: 3,
retryDelay: 0,
enableScreenshots: false,
commandDelayMs: 0,
autoNavigateToUrls: false,
contextWindowSize: 50000,
},
}),
);
const result = await agent.run();
const hasFailureError = result.errors.some(
(e) => e.includes('consecutive failures'),
);
expect(hasFailureError).toBe(true);
});
test('successful step resets consecutive failure count', async () => {
let callCount = 0;
const model: LanguageModel = {
modelId: 'test-model',
provider: 'custom',
invoke: async (): Promise> => {
callCount++;
if (callCount === 1) {
throw new Error('Transient error');
}
return {
parsed: {
currentState: { evaluation: 'Done', memory: '', nextGoal: '' },
actions: [{ action: 'finish', text: 'Success', success: true }],
} as unknown as T,
usage: createMockUsage(),
finishReason: 'stop',
};
},
};
const tools = createMockTools([
{ success: true, isDone: true, extractedContent: 'Success' },
]);
const agent = new Agent(
createDefaultAgentOptions({
model,
tools,
settings: {
stepLimit: 10,
failureThreshold: 5,
retryDelay: 0,
enableScreenshots: false,
commandDelayMs: 0,
autoNavigateToUrls: false,
contextWindowSize: 50000,
},
}),
);
const result = await agent.run();
expect(result.finalResult).toBe('Success');
});
});
describe('done action detection and result extraction', () => {
test('detects done action and extracts result text', async () => {
const tools = createMockTools([
{ success: true, isDone: true, extractedContent: 'Product costs $99' },
]);
const model = createDoneOnStepModel(1, 'Product costs $99');
const agent = new Agent(
createDefaultAgentOptions({ model, tools }),
);
const result = await agent.run();
expect(result.finalResult).toBe('Product costs $99');
expect(result.success).toBe(true);
});
test('handles done action with success=false', async () => {
const model = createMockModel({
responses: [{
currentState: { evaluation: 'Cannot find', memory: '', nextGoal: '' },
actions: [{ action: 'finish', text: 'Could not find', success: false } as Command],
}],
});
const tools = createMockTools([
{ success: false, isDone: true, extractedContent: 'Could not find' },
]);
const agent = new Agent(
createDefaultAgentOptions({ model, tools }),
);
const result = await agent.run();
expect(result.finalResult).toBe('Could not find');
expect(result.success).toBe(false);
});
});
describe('pause / resume / stop', () => {
test('pause sets isPaused flag', () => {
const agent = new Agent(createDefaultAgentOptions());
agent.pause();
expect(agent.getState().isPaused).toBe(true);
});
test('resume clears isPaused flag', () => {
const agent = new Agent(createDefaultAgentOptions());
agent.pause();
agent.resume();
expect(agent.getState().isPaused).toBe(false);
});
test('stop sets isRunning to false', async () => {
let stepCount = 0;
const tools = createMockTools();
(tools.executeActions as any) = mock(async () => {
stepCount++;
return [{ success: true }];
});
const model = createMockModel();
const agent = new Agent(
createDefaultAgentOptions({
model,
tools,
settings: {
stepLimit: 100,
enableScreenshots: false,
commandDelayMs: 0,
retryDelay: 0,
autoNavigateToUrls: false,
contextWindowSize: 50000,
},
}),
);
const runPromise = agent.run();
// Stop after a brief moment
await new Promise((r) => setTimeout(r, 50));
agent.stop();
await runPromise;
const state = agent.getState();
expect(state.isRunning).toBe(false);
});
});
describe('max steps reached', () => {
test('returns error when max steps exceeded without done', async () => {
const model = createMockModel();
const tools = createMockTools([{ success: true }]);
const agent = new Agent(
createDefaultAgentOptions({
model,
tools,
settings: {
stepLimit: 3,
enableScreenshots: false,
commandDelayMs: 0,
retryDelay: 0,
autoNavigateToUrls: false,
contextWindowSize: 50000,
},
}),
);
const result = await agent.run();
const hasMaxStepsError = result.errors.some(
(e) => e.includes('maximum steps'),
);
expect(hasMaxStepsError).toBe(true);
});
test('run() accepts stepLimit parameter to override settings', async () => {
const model = createMockModel();
const tools = createMockTools([{ success: true }]);
const agent = new Agent(
createDefaultAgentOptions({
model,
tools,
settings: {
stepLimit: 100,
enableScreenshots: false,
commandDelayMs: 0,
retryDelay: 0,
autoNavigateToUrls: false,
contextWindowSize: 50000,
},
}),
);
const result = await agent.run(2);
const hasMaxStepsError = result.errors.some(
(e) => e.includes('maximum steps'),
);
expect(hasMaxStepsError).toBe(true);
});
});
describe('sensitive data filtering', () => {
test('filters sensitive values from action results', async () => {
const tools = createMockTools([
{
success: true,
isDone: true,
extractedContent: 'Your API key is sk-12345 and password is hunter2',
},
]);
const model = createDoneOnStepModel(1, 'Done');
const agent = new Agent(
createDefaultAgentOptions({
model,
tools,
settings: {
stepLimit: 5,
enableScreenshots: false,
commandDelayMs: 0,
retryDelay: 0,
autoNavigateToUrls: false,
contextWindowSize: 50000,
maskedValues: {
apiKey: 'sk-12345',
password: 'hunter2',
},
},
}),
);
const result = await agent.run();
const history = agent.getHistory();
for (const entry of history.entries) {
for (const ar of entry.actionResults) {
if (ar.extractedContent) {
expect(ar.extractedContent).not.toContain('sk-12345');
expect(ar.extractedContent).not.toContain('hunter2');
}
}
}
});
test('returns unmodified results when no sensitive data configured', async () => {
const tools = createMockTools([
{
success: true,
isDone: true,
extractedContent: 'Plain text result',
},
]);
const model = createDoneOnStepModel(1, 'Done');
const agent = new Agent(
createDefaultAgentOptions({ model, tools }),
);
const result = await agent.run();
expect(result.finalResult).toBe('Plain text result');
});
});
describe('history recording', () => {
test('history entries contain step number', async () => {
let callCount = 0;
const tools = createMockTools();
(tools.executeActions as any) = mock(async () => {
callCount++;
if (callCount >= 2) {
return [{ success: true, isDone: true, extractedContent: 'Done' }];
}
return [{ success: true }];
});
const model = createDoneOnStepModel(2, 'Done');
const agent = new Agent(
createDefaultAgentOptions({ model, tools }),
);
await agent.run();
const history = agent.getHistory();
expect(history.entries.length).toBeGreaterThanOrEqual(1);
expect(history.entries[0].step).toBe(1);
});
test('history entries contain browser state info', async () => {
const doneModel = createDoneOnStepModel(1, 'Done');
const tools = createMockTools([
{ success: true, isDone: true, extractedContent: 'Done' },
]);
const agent = new Agent(
createDefaultAgentOptions({ model: doneModel, tools }),
);
await agent.run();
const history = agent.getHistory();
expect(history.entries.length).toBeGreaterThanOrEqual(1);
expect(history.entries[0].browserState.url).toBe('https://example.com');
expect(history.entries[0].browserState.title).toBe('Example Page');
});
test('history entries contain usage info', async () => {
const doneModel = createDoneOnStepModel(1, 'Done');
const tools = createMockTools([
{ success: true, isDone: true, extractedContent: 'Done' },
]);
const agent = new Agent(
createDefaultAgentOptions({ model: doneModel, tools }),
);
await agent.run();
const history = agent.getHistory();
expect(history.entries.length).toBeGreaterThanOrEqual(1);
expect(history.entries[0].usage).toBeDefined();
expect(history.entries[0].usage!.inputTokens).toBe(100);
expect(history.entries[0].usage!.outputTokens).toBe(50);
});
test('history is finalized after run', async () => {
const doneModel = createDoneOnStepModel(1, 'Done');
const tools = createMockTools([
{ success: true, isDone: true, extractedContent: 'Done' },
]);
const agent = new Agent(
createDefaultAgentOptions({ model: doneModel, tools }),
);
await agent.run();
const history = agent.getHistory();
expect(history.endTime).toBeDefined();
expect(history.totalDuration).toBeDefined();
});
});
describe('cost tracking', () => {
test('cumulative cost accumulates across steps', async () => {
let callCount = 0;
const tools = createMockTools();
(tools.executeActions as any) = mock(async () => {
callCount++;
if (callCount >= 3) {
return [{ success: true, isDone: true, extractedContent: 'Done' }];
}
return [{ success: true }];
});
const model = createDoneOnStepModel(3, 'Done');
const agent = new Agent(
createDefaultAgentOptions({ model, tools }),
);
await agent.run();
const cost = agent.getAccumulatedCost();
expect(cost.totalInputTokens).toBeGreaterThanOrEqual(100);
expect(cost.totalOutputTokens).toBeGreaterThanOrEqual(50);
});
});
describe('follow-up tasks', () => {
test('addNewTask stores follow-up tasks', () => {
const agent = new Agent(createDefaultAgentOptions());
agent.addNewTask('Follow up: check price again');
agent.addNewTask('Follow up: compare with competitor');
const tasks = agent.getFollowUpTasks();
expect(tasks).toHaveLength(2);
expect(tasks[0]).toBe('Follow up: check price again');
expect(tasks[1]).toBe('Follow up: compare with competitor');
});
test('getFollowUpTasks returns a copy', () => {
const agent = new Agent(createDefaultAgentOptions());
agent.addNewTask('Task 1');
const tasks1 = agent.getFollowUpTasks();
const tasks2 = agent.getFollowUpTasks();
expect(tasks1).toEqual(tasks2);
expect(tasks1).not.toBe(tasks2);
});
});
describe('getState', () => {
test('returns a copy of the state', () => {
const agent = new Agent(createDefaultAgentOptions());
const state1 = agent.getState();
const state2 = agent.getState();
expect(state1).toEqual(state2);
expect(state1).not.toBe(state2);
});
test('tracks current URL after run', async () => {
const doneModel = createDoneOnStepModel(1, 'Done');
const tools = createMockTools([
{ success: true, isDone: true, extractedContent: 'Done' },
]);
const agent = new Agent(
createDefaultAgentOptions({ model: doneModel, tools }),
);
await agent.run();
const state = agent.getState();
expect(state.currentUrl).toBe('https://example.com');
});
});
describe('getAccumulatedCost', () => {
test('returns a copy of cost data', () => {
const agent = new Agent(createDefaultAgentOptions());
const cost1 = agent.getAccumulatedCost();
const cost2 = agent.getAccumulatedCost();
expect(cost1).toEqual(cost2);
expect(cost1).not.toBe(cost2);
});
});
describe('run result structure', () => {
test('result contains all expected fields', async () => {
const doneModel = createDoneOnStepModel(1, 'Answer');
const tools = createMockTools([
{ success: true, isDone: true, extractedContent: 'Answer' },
]);
const agent = new Agent(
createDefaultAgentOptions({ model: doneModel, tools }),
);
const result = await agent.run();
expect(result).toHaveProperty('finalResult');
expect(result).toHaveProperty('success');
expect(result).toHaveProperty('history');
expect(result).toHaveProperty('errors');
expect(result).toHaveProperty('totalCost');
});
test('result.history is an ExecutionLog', async () => {
const doneModel = createDoneOnStepModel(1, 'Answer');
const tools = createMockTools([
{ success: true, isDone: true, extractedContent: 'Answer' },
]);
const agent = new Agent(
createDefaultAgentOptions({ model: doneModel, tools }),
);
const result = await agent.run();
expect(result.history).toBeDefined();
expect(result.history.task).toBe('Find the price of the product');
expect(typeof result.history.finalResult).toBe('function');
});
});
});
================================================
FILE: packages/core/src/agent/agent.ts
================================================
import { z, ZodError } from 'zod';
import type { LanguageModel, InferenceOptions } from '../model/interface.js';
import type { Viewport } from '../viewport/viewport.js';
import type { FileAccess } from '../sandbox/file-access.js';
import { PageAnalyzer } from '../page/page-analyzer.js';
import { CommandExecutor } from '../commands/executor.js';
import type { Command, CommandResult, ExecutionContext } from '../commands/types.js';
import { CommandSchema } from '../commands/types.js';
import { InstructionBuilder } from './instructions.js';
import { ConversationManager } from './conversation/service.js';
import { StallDetector, hashPageTree, hashTextContent } from './stall-detector.js';
import { ReplayRecorder } from './replay-recorder.js';
import { ResultEvaluator } from './evaluator.js';
import {
type AgentConfig,
type AgentState,
type AgentDecision,
type StepRecord,
ExecutionLog,
type RunOutcome,
type AccumulatedCost,
type EvaluationResult,
type QuickCheckResult,
ReasoningSchema,
AgentDecisionCompactSchema,
AgentDecisionDirectSchema,
PlanRevisionSchema,
DEFAULT_AGENT_CONFIG,
calculateStepCost,
supportsDeepReasoning,
supportsCoordinateMode,
isCompactModel,
} from './types.js';
import {
AgentError,
StepLimitExceededError,
AgentStalledError,
ModelThrottledError,
} from '../errors.js';
import {
Timer,
sleep,
truncateText,
withDeadline,
extractUrls,
escapeRegExp,
} from '../utils.js';
import { createLogger } from '../logging.js';
const logger = createLogger('agent');
// ── Agent Options ──
export interface AgentOptions {
task: string;
model: LanguageModel;
browser: Viewport;
tools?: CommandExecutor;
/** Pre-configured PageAnalyzer instance (defaults to a new PageAnalyzer) */
domService?: PageAnalyzer;
settings?: Partial;
/** Separate model for the judge (defaults to main model) */
judgeModel?: LanguageModel;
/** Separate model for extraction actions (defaults to main model) */
extractionModel?: LanguageModel;
/** File system access for sandbox operations */
fileSystem?: FileAccess;
onStepStart?: (step: number) => void;
onStepEnd?: (step: number, result: CommandResult[]) => void;
onDone?: (result: RunOutcome) => void;
}
// ── Agent ──
export class Agent {
private model: LanguageModel;
private browser: Viewport;
private tools: CommandExecutor;
private domService: PageAnalyzer;
private messageManager: ConversationManager;
private loopDetector: StallDetector;
private gifRecorder?: ReplayRecorder;
private judge?: ResultEvaluator;
private settings: AgentConfig;
private extractionModel?: LanguageModel;
private fileSystem?: FileAccess;
private state: AgentState;
private historyList: ExecutionLog;
private startTime = 0;
private followUpTasks: string[] = [];
private onStepStart?: (step: number) => void;
private onStepEnd?: (step: number, result: CommandResult[]) => void;
private onDone?: (result: RunOutcome) => void;
constructor(options: AgentOptions) {
this.model = options.model;
this.browser = options.browser;
this.settings = { ...DEFAULT_AGENT_CONFIG, ...options.settings, task: options.task };
this.extractionModel = options.extractionModel;
this.fileSystem = options.fileSystem;
this.tools = options.tools ?? new CommandExecutor({
model: this.extractionModel ?? this.model,
allowedUrls: this.settings.allowedUrls,
blockedUrls: this.settings.blockedUrls,
commandsPerStep: this.settings.commandsPerStep,
});
this.domService = options.domService ?? new PageAnalyzer({
capturedAttributes: this.settings.capturedAttributes,
});
this.messageManager = new ConversationManager({
contextWindowSize: this.settings.contextWindowSize,
includeLastScreenshot: this.settings.enableScreenshots,
maskedValues: this.settings.maskedValues,
compaction: this.settings.conversationCompaction,
});
this.loopDetector = new StallDetector();
if (this.settings.replayOutputPath) {
this.gifRecorder = new ReplayRecorder({
outputPath: this.settings.replayOutputPath,
});
}
// Judge setup
if (this.settings.enableEvaluation || this.settings.enableSimpleJudge) {
const judgeModel = options.judgeModel ?? this.model;
this.judge = new ResultEvaluator(judgeModel);
}
// Auto-enable coordinate clicking for supported models
if (this.settings.autoEnableCoordinateClicking) {
if (supportsCoordinateMode(this.model.modelId)) {
this.tools.setCoordinateClicking(true);
logger.info(`Coordinate clicking auto-enabled for model ${this.model.modelId}`);
}
}
// Initialize state
this.state = {
step: 0,
stepLimit: this.settings.stepLimit,
failureCount: 0,
consecutiveFailures: 0,
isRunning: false,
isPaused: false,
isDone: false,
totalInputTokens: 0,
totalOutputTokens: 0,
cumulativeCost: {
totalInputTokens: 0,
totalOutputTokens: 0,
totalInputCost: 0,
totalOutputCost: 0,
totalCost: 0,
},
};
this.historyList = new ExecutionLog({
task: this.settings.task,
});
this.onStepStart = options.onStepStart;
this.onStepEnd = options.onStepEnd;
this.onDone = options.onDone;
}
// ────────────────────────────────────────
// Main run loop
// ────────────────────────────────────────
async run(stepLimit?: number): Promise {
const effectiveMaxSteps = stepLimit ?? this.settings.stepLimit;
this.state.stepLimit = effectiveMaxSteps;
this.state.isRunning = true;
this.startTime = Date.now();
// Ensure browser is started
if (!this.browser.isConnected) {
await this.browser.start();
}
// Build system prompt (may be rebuilt per step if dynamicCommandSchema is on)
this.rebuildInstructionBuilder();
// URL extraction: auto-navigate to first URL found in task text
if (this.settings.autoNavigateToUrls) {
await this.autoNavigateFromTask();
}
// Execute initial actions before the main loop
if (this.settings.preflightCommands.length > 0) {
await this.executeInitialActions();
}
const errors: string[] = [];
let finalResult: string | undefined;
let success = false;
let judgement: EvaluationResult | undefined;
let simpleJudgement: QuickCheckResult | undefined;
try {
for (let step = 1; step <= effectiveMaxSteps; step++) {
if (!this.state.isRunning || this.state.isDone) break;
// Pause support
while (this.state.isPaused) {
await sleep(100);
}
this.state.step = step;
this.onStepStart?.(step);
try {
// Wrap step execution in optional timeout
const stepPromise = this.executeStep(step, effectiveMaxSteps);
const result = this.settings.stepDeadlineMs > 0
? await withDeadline(
stepPromise,
this.settings.stepDeadlineMs,
`Step ${step} timed out after ${this.settings.stepDeadlineMs}ms`,
)
: await stepPromise;
this.state.consecutiveFailures = 0;
// Check if done
const doneResult = result.find((r) => r.isDone);
if (doneResult) {
finalResult = doneResult.extractedContent;
success = doneResult.success;
// Simple judge: quick validation before accepting the result
if (this.settings.enableSimpleJudge && this.judge && finalResult) {
simpleJudgement = await this.judge.simpleEvaluate(
this.settings.task,
finalResult,
);
if (simpleJudgement.shouldRetry && step < effectiveMaxSteps) {
logger.info(
`Simple judge suggests retry: ${simpleJudgement.reason}`,
);
this.messageManager.addCommandResultMessage(
`The result was reviewed and found lacking: ${simpleJudgement.reason}. ` +
'Please try a different approach to complete the task.',
step,
);
// Don't mark as done -- continue the loop
continue;
}
}
this.state.isDone = true;
break;
}
this.onStepEnd?.(step, result);
// Planning: periodically update the plan
if (this.settings.enableStrategy && this.shouldUpdatePlan(step)) {
await this.updatePlan(step);
}
// Replan on stall: if loop detector shows stuck + planning enabled
if (this.settings.restrategizeOnStall && this.settings.enableStrategy) {
const loopCheck = this.loopDetector.isStuck();
if (loopCheck.stuck && loopCheck.severity >= 2) {
logger.info('Agent stalled, triggering replan');
await this.updatePlan(step);
}
}
// Message compaction: every N steps (LLM-based)
if (this.messageManager.shouldCompactWithLlm()) {
const compacted = await this.messageManager.compactWithLlm(this.model);
if (compacted) {
logger.debug(`Messages compacted at step ${step}`);
}
}
// Save conversation per step if configured
if (this.settings.conversationOutputPath) {
await this.saveConversation(step);
}
} catch (error) {
// Rate limit retry with exponential backoff
if (error instanceof ModelThrottledError) {
const waitMs = error.retryAfterMs ?? Math.min(
60_000,
this.settings.retryDelay * 1000 * 2 ** this.state.consecutiveFailures,
);
logger.warn(`Rate limited, waiting ${waitMs}ms before retry`);
await sleep(waitMs);
this.state.consecutiveFailures++;
// Don't count rate limits toward max failures
continue;
}
const message = error instanceof Error ? error.message : String(error);
errors.push(`Step ${step}: ${message}`);
this.state.failureCount++;
this.state.consecutiveFailures++;
if (this.state.consecutiveFailures >= this.settings.failureThreshold) {
// Failure recovery: make one final LLM call to diagnose
const failureSummary = await this.makeFailureRecoveryCall(errors);
if (failureSummary) {
finalResult = failureSummary;
}
throw new AgentError(
`Too many consecutive failures (${this.state.consecutiveFailures})`,
);
}
// Add error message to conversation
this.messageManager.addCommandResultMessage(
`Error: ${truncateText(message, 400)}`,
step,
);
// Wait before retry
await sleep(this.settings.retryDelay * 1000);
}
}
if (!this.state.isDone && this.state.step >= effectiveMaxSteps) {
throw new StepLimitExceededError(this.state.step, effectiveMaxSteps);
}
} catch (error) {
if (
error instanceof StepLimitExceededError ||
error instanceof AgentStalledError ||
error instanceof AgentError
) {
errors.push(error.message);
} else {
throw error;
}
} finally {
this.state.isRunning = false;
// Save recording
if (this.gifRecorder) {
await this.gifRecorder.save();
}
}
// Full judge evaluation after completion
if (this.settings.enableEvaluation && this.judge && finalResult) {
judgement = await this.judge.evaluate(
this.settings.task,
finalResult,
this.historyList.entries,
{
expectedOutcome: this.settings.expectedOutcome,
includeScreenshots: this.settings.enableScreenshots,
},
);
}
// Finalize history
this.historyList.finish();
const runResult: RunOutcome = {
finalResult,
success,
history: this.historyList,
errors,
judgement,
simpleJudgement,
totalCost: { ...this.state.cumulativeCost },
};
this.onDone?.(runResult);
return runResult;
}
// ────────────────────────────────────────
// Step Execution
// ────────────────────────────────────────
private async executeStep(step: number, stepLimit: number): Promise {
const timer = new Timer();
// Get browser state
const browserState = await this.browser.getState();
this.state.currentUrl = browserState.url;
// Dynamic action schema: rebuild system prompt per step based on current URL
if (this.settings.dynamicCommandSchema) {
this.rebuildInstructionBuilder(browserState.url);
}
// Extract DOM
const domState = await this.domService.extractState(
this.browser.currentPage,
this.browser.cdp!,
);
// Take screenshot if using vision
let screenshot: string | undefined;
if (this.settings.enableScreenshots) {
const screenshotResult = await this.browser.screenshot();
screenshot = screenshotResult.base64;
if (this.gifRecorder) {
const actionLabel = browserState.url;
this.gifRecorder.addFrame(screenshot, step, actionLabel);
}
}
// Build state message
const stateText = InstructionBuilder.buildStatePrompt(
browserState.url,
browserState.title,
browserState.tabs,
domState.tree,
step,
stepLimit,
domState.pixelsAbove,
domState.pixelsBelow,
);
// Check for loop
const loopCheck = this.loopDetector.isStuck();
let additionalContext = '';
if (loopCheck.stuck) {
additionalContext = InstructionBuilder.buildLoopNudge(
this.loopDetector.getLoopNudgeMessage(),
);
// Severe loop: throw stuck error
if (loopCheck.severity >= 3) {
throw new AgentStalledError(
`Agent stuck: ${loopCheck.reason} (severity ${loopCheck.severity})`,
);
}
}
// Add plan context if planning is enabled
if (this.settings.enableStrategy && this.state.currentPlan) {
additionalContext += InstructionBuilder.buildPlanPrompt(this.state.currentPlan);
}
// Add messages
this.messageManager.addStateMessage(
stateText + additionalContext,
screenshot,
step,
);
// Determine output schema based on mode
const outputSchema = this.getOutputSchema();
// Invoke LLM with optional timeout and Zod recovery
const completion = await this.invokeLlmWithRecovery(outputSchema, step);
// Update token tracking
this.state.totalInputTokens += completion.usage.inputTokens;
this.state.totalOutputTokens += completion.usage.outputTokens;
// Cost tracking
this.updateCostTracking(completion.usage.inputTokens, completion.usage.outputTokens, step);
const output = completion.parsed;
// Normalize output to standard AgentDecision shape
const normalizedOutput = this.normalizeOutput(output);
// Add assistant response
this.messageManager.addAssistantMessage(
JSON.stringify(normalizedOutput.currentState),
step,
);
// Execute actions
const context: ExecutionContext = {
page: this.browser.currentPage,
cdpSession: this.browser.cdp!,
domService: this.domService,
browserSession: this.browser,
extractionLlm: this.extractionModel,
fileSystem: this.fileSystem,
maskedValues: this.settings.maskedValues,
};
const actions = normalizedOutput.actions as Command[];
const results = await this.tools.executeActions(actions, context);
// Record for loop detection (with enhanced fingerprint)
this.loopDetector.recordAction(actions);
this.loopDetector.recordFingerprint({
url: browserState.url,
domHash: hashPageTree(domState.tree),
scrollY: domState.scrollPosition.y,
elementCount: domState.elementCount,
textHash: hashTextContent(domState.tree.slice(0, 2000)),
});
// Filter sensitive data from results
const filteredResults = this.filterSensitiveData(results);
// Add action results to conversation
const resultText = filteredResults
.map((r, i) => {
const actionName = actions[i]?.action ?? 'unknown';
const status = r.success ? 'success' : `error: ${r.error}`;
const content = r.extractedContent
? `\nContent: ${r.extractedContent}`
: '';
return `${actionName}: ${status}${content}`;
})
.join('\n');
if (resultText) {
this.messageManager.addCommandResultMessage(resultText, step);
}
// Wait between actions
if (this.settings.commandDelayMs > 0) {
await sleep(this.settings.commandDelayMs * 1000);
}
// Record history entry
const entry: StepRecord = {
step,
timestamp: Date.now(),
browserState: {
url: browserState.url,
title: browserState.title,
tabs: browserState.tabs,
interactedElements: actions
.filter((a): a is Command & { index: number } => 'index' in a)
.map((a) => ({
index: a.index,
description: '',
action: a.action,
})),
screenshot,
},
agentOutput: normalizedOutput as AgentDecision,
actionResults: filteredResults,
usage: completion.usage,
duration: timer.elapsed(),
metadata: {
stepNumber: step,
durationMs: timer.elapsed(),
inputTokens: completion.usage.inputTokens,
outputTokens: completion.usage.outputTokens,
actionCount: actions.length,
url: browserState.url,
startedAt: Date.now() - timer.elapsed(),
completedAt: Date.now(),
},
};
this.historyList.addEntry(entry);
return results;
}
// ────────────────────────────────────────
// LLM Invocation with Zod Recovery
// ────────────────────────────────────────
private async invokeLlmWithRecovery(
outputSchema: z.ZodType,
step: number,
retryCount = 0,
): Promise<{
parsed: Record;
usage: { inputTokens: number; outputTokens: number; totalTokens: number };
}> {
const messages = this.messageManager.getMessages();
const invokeOptions: InferenceOptions = {
messages,
responseSchema: outputSchema,
schemaName: this.getSchemaName(),
schemaDescription: 'Agent decision with current state assessment and actions to take',
};
// Extended thinking: pass thinking budget as maxTokens
if (
this.settings.enableDeepReasoning &&
supportsDeepReasoning(this.model.modelId)
) {
invokeOptions.maxTokens = this.settings.reasoningBudget;
}
try {
// Wrap LLM call in optional timeout
const invokePromise = this.model.invoke(invokeOptions);
const completion =
this.settings.modelDeadlineMs > 0
? await withDeadline(
invokePromise,
this.settings.modelDeadlineMs,
`LLM call timed out after ${this.settings.modelDeadlineMs}ms`,
)
: await invokePromise;
return {
parsed: completion.parsed as Record,
usage: completion.usage,
};
} catch (error) {
// Zod validation error recovery: re-prompt with the error details
if (error instanceof ZodError && retryCount < 2) {
logger.warn(
`Zod validation failed (attempt ${retryCount + 1}), re-prompting LLM`,
);
const issues = error.issues
.map((issue) => `- ${issue.path.join('.')}: ${issue.message}`)
.join('\n');
this.messageManager.addCommandResultMessage(
'Your previous response had a validation error. ' +
'Please fix the following issues and respond again:\n' +
`${issues}\n\n` +
'Make sure your response matches the expected JSON schema exactly.',
step,
);
return this.invokeLlmWithRecovery(outputSchema, step, retryCount + 1);
}
// Re-throw rate limit errors for special handling in the main loop
if (error instanceof ModelThrottledError) {
throw error;
}
throw error;
}
}
// ────────────────────────────────────────
// Output Schema Selection
// ────────────────────────────────────────
private getOutputSchema(): z.ZodType {
// Flash mode: simpler schema for cheaper / faster models
if (this.settings.compactMode || isCompactModel(this.model.modelId)) {
return AgentDecisionCompactSchema as z.ZodType;
}
// Extended thinking: model reasons internally, skip brain schema
if (
this.settings.enableDeepReasoning &&
supportsDeepReasoning(this.model.modelId)
) {
return AgentDecisionDirectSchema as z.ZodType;
}
// Default full schema with brain + typed action union
return z.object({
currentState: ReasoningSchema,
actions: z.array(CommandSchema),
}) as z.ZodType;
}
private getSchemaName(): string {
if (this.settings.compactMode || isCompactModel(this.model.modelId)) {
return 'AgentDecisionCompact';
}
if (
this.settings.enableDeepReasoning &&
supportsDeepReasoning(this.model.modelId)
) {
return 'AgentDecisionDirect';
}
return 'AgentDecision';
}
/**
* Normalize the various output schema shapes into the standard AgentDecision.
*/
private normalizeOutput(output: Record): AgentDecision {
// Flash schema: { goal, actions }
if ('goal' in output && !('currentState' in output)) {
return {
currentState: {
evaluation: String(output.goal ?? ''),
memory: '',
nextGoal: String(output.goal ?? ''),
},
actions: (output.actions ?? []) as Record[],
};
}
// No-thinking schema: { actions } only
if (!('currentState' in output) && 'actions' in output) {
return {
currentState: {
evaluation: '',
memory: '',
nextGoal: '',
},
actions: (output.actions ?? []) as Record[],
};
}
// Standard schema passthrough
return output as AgentDecision;
}
// ────────────────────────────────────────
// Planning System
// ────────────────────────────────────────
private shouldUpdatePlan(step: number): boolean {
if (!this.settings.enableStrategy) return false;
const interval =
this.settings.strategyInterval > 0 ? this.settings.strategyInterval : 5;
const lastPlan = this.state.lastPlanStep ?? 0;
return step - lastPlan >= interval;
}
private async updatePlan(step: number): Promise {
try {
const recentHistory = this.historyList.entries
.slice(-5)
.map(
(e) =>
`Step ${e.step}: ${e.agentOutput.currentState?.evaluation ?? '(no eval)'}`,
)
.join('\n');
const planPrompt =
`Task: ${this.settings.task}\n\n` +
`Current step: ${step}/${this.state.stepLimit}\n` +
(this.state.currentPlan
? `Current plan:\n${this.state.currentPlan}\n\n`
: '') +
`Recent progress:\n${recentHistory}\n\n` +
'Based on the current progress, provide an updated plan. ' +
'Include what has been accomplished and what remains.';
// Use ephemeral message so the plan prompt doesn't persist
this.messageManager.addEphemeralMessage(planPrompt);
const completion = await this.model.invoke({
messages: this.messageManager.getMessages(),
responseSchema: PlanRevisionSchema,
schemaName: 'PlanRevision',
temperature: 0.3,
});
this.state.currentPlan = completion.parsed.plan;
this.state.lastPlanStep = step;
logger.info(`Plan updated at step ${step}: ${completion.parsed.reasoning}`);
} catch (error) {
logger.warn(
`Plan update failed at step ${step}: ${
error instanceof Error ? error.message : String(error)
}`,
);
}
}
// ────────────────────────────────────────
// System Prompt Management
// ────────────────────────────────────────
/**
* (Re)build the system prompt. When `pageUrl` is provided, the registry
* can filter action descriptions to show only domain-relevant actions.
*/
private rebuildInstructionBuilder(pageUrl?: string): void {
const systemPrompt = InstructionBuilder.fromSettings(
this.settings,
this.tools.registry,
pageUrl,
);
this.messageManager.setInstructionBuilder(systemPrompt.build());
}
// ────────────────────────────────────────
// URL Extraction from Task Text
// ────────────────────────────────────────
private async autoNavigateFromTask(): Promise {
const urls = extractUrls(this.settings.task);
if (urls.length === 0) return;
const firstUrl = urls[0];
logger.info(`Auto-navigating to URL found in task: ${firstUrl}`);
try {
await this.browser.navigate(firstUrl);
// Give the page a moment to load
await sleep(1000);
} catch (error) {
logger.warn(
`Auto-navigation to ${firstUrl} failed: ${
error instanceof Error ? error.message : String(error)
}`,
);
}
}
// ────────────────────────────────────────
// Initial Actions
// ────────────────────────────────────────
private async executeInitialActions(): Promise {
logger.info(
`Executing ${this.settings.preflightCommands.length} initial action(s)`,
);
const context: ExecutionContext = {
page: this.browser.currentPage,
cdpSession: this.browser.cdp!,
domService: this.domService,
browserSession: this.browser,
extractionLlm: this.extractionModel,
fileSystem: this.fileSystem,
maskedValues: this.settings.maskedValues,
};
for (const action of this.settings.preflightCommands) {
try {
await this.tools.executeAction(action, context);
logger.debug(`Initial action ${action.action} completed`);
} catch (error) {
logger.warn(
`Initial action ${action.action} failed: ${
error instanceof Error ? error.message : String(error)
}`,
);
}
}
await sleep(500);
}
// ────────────────────────────────────────
// Failure Recovery
// ────────────────────────────────────────
/**
* On max failures, make one final LLM call to produce a diagnostic
* summary. Returns a description of what went wrong, or undefined
* if the recovery call itself fails.
*/
private async makeFailureRecoveryCall(
errors: string[],
): Promise {
try {
const errorSummary = errors.slice(-5).join('\n');
const recoverySchema = z.object({
diagnosis: z.string().describe('What went wrong'),
suggestion: z.string().describe('What could be tried differently'),
});
const completion = await this.model.invoke({
messages: [
{
role: 'system' as const,
content:
'You are a diagnostic assistant. Analyze the errors that occurred during ' +
'a web browsing automation task and provide a brief diagnosis.',
},
{
role: 'user' as const,
content:
`Task: ${this.settings.task}\n\n` +
`Errors encountered:\n${errorSummary}\n\n` +
'Provide a brief diagnosis of what went wrong and what could be tried differently.',
},
],
responseSchema: recoverySchema,
schemaName: 'FailureRecovery',
temperature: 0,
});
const result =
`Task failed. Diagnosis: ${completion.parsed.diagnosis}. ` +
`Suggestion: ${completion.parsed.suggestion}`;
logger.info(`Failure recovery: ${result}`);
return result;
} catch {
logger.debug('Failure recovery call itself failed');
return undefined;
}
}
// ────────────────────────────────────────
// Cost Tracking
// ────────────────────────────────────────
private updateCostTracking(
inputTokens: number,
outputTokens: number,
step: number,
): void {
const stepCost = calculateStepCost(
inputTokens,
outputTokens,
this.model.modelId,
);
this.state.cumulativeCost.totalInputTokens += inputTokens;
this.state.cumulativeCost.totalOutputTokens += outputTokens;
if (stepCost) {
this.state.cumulativeCost.totalInputCost += stepCost.inputCost;
this.state.cumulativeCost.totalOutputCost += stepCost.outputCost;
this.state.cumulativeCost.totalCost += stepCost.totalCost;
logger.debug(
`Step ${step} cost: $${stepCost.totalCost.toFixed(4)} ` +
`(cumulative: $${this.state.cumulativeCost.totalCost.toFixed(4)})`,
);
}
}
// ────────────────────────────────────────
// Sensitive Data Filtering
// ────────────────────────────────────────
private filterSensitiveData(results: CommandResult[]): CommandResult[] {
if (!this.settings.maskedValues) return results;
return results.map((r) => {
if (!r.extractedContent) return r;
let content = r.extractedContent;
for (const [key, value] of Object.entries(this.settings.maskedValues!)) {
content = content.replace(
new RegExp(escapeRegExp(value), 'g'),
`<${key}>`,
);
}
return { ...r, extractedContent: content };
});
}
// ────────────────────────────────────────
// Save Conversation
// ────────────────────────────────────────
private async saveConversation(step: number): Promise {
if (!this.settings.conversationOutputPath) return;
try {
const filePath = this.settings.conversationOutputPath.replace(
/\{step\}/g,
step.toString(),
);
await this.messageManager.saveToFile(filePath);
} catch (error) {
logger.debug(
`Failed to save conversation at step ${step}: ${
error instanceof Error ? error.message : String(error)
}`,
);
}
}
// ────────────────────────────────────────
// Follow-up Tasks
// ────────────────────────────────────────
/**
* Add a follow-up task to be executed after the current task completes.
* Tasks are stored and can be retrieved via getFollowUpTasks().
*/
addNewTask(task: string): void {
this.followUpTasks.push(task);
logger.info(`Follow-up task added: ${truncateText(task, 100)}`);
}
getFollowUpTasks(): string[] {
return [...this.followUpTasks];
}
// ────────────────────────────────────────
// Control Methods
// ────────────────────────────────────────
pause(): void {
this.state.isPaused = true;
}
resume(): void {
this.state.isPaused = false;
}
stop(): void {
this.state.isRunning = false;
}
getState(): AgentState {
return { ...this.state };
}
getHistory(): ExecutionLog {
return this.historyList;
}
getAccumulatedCost(): AccumulatedCost {
return { ...this.state.cumulativeCost };
}
}
================================================
FILE: packages/core/src/agent/conversation/service.ts
================================================
import { z } from 'zod';
import type { Message } from '../../model/messages.js';
import {
systemMessage,
userMessage,
assistantMessage,
imageContent,
textContent,
type ContentPart,
} from '../../model/messages.js';
import type { LanguageModel } from '../../model/interface.js';
import type {
ConversationManagerOptions,
TrackedMessage,
ConversationManagerState,
ConversationEntry,
SerializedTrackedMessage,
MessageCategory,
} from './types.js';
import {
estimateTokens,
estimateMessageTokens,
redactMessages,
extractTextContent,
truncate,
} from './utils.js';
// ── LLM Compaction Summary Schema ──
const CompactionSummarySchema = z.object({
summary: z.string().describe('Concise summary of the conversation so far'),
});
// ── ConversationManager ──
export class ConversationManager {
private messages: TrackedMessage[] = [];
private systemPromptMessage: Message | null = null;
private systemPromptText: string | null = null;
private options: ConversationManagerOptions;
private historyItems: ConversationEntry[] = [];
private currentStep = 0;
private lastCompactionStep = 0;
constructor(options: ConversationManagerOptions) {
this.options = options;
}
// ────────────────────────────────────────
// System Prompt
// ────────────────────────────────────────
setInstructionBuilder(prompt: string): void {
this.systemPromptText = prompt;
this.systemPromptMessage = systemMessage(prompt);
}
// ────────────────────────────────────────
// Add Messages
// ────────────────────────────────────────
addStateMessage(
stateText: string,
screenshot?: string,
step?: number,
): void {
const content: ContentPart[] = [textContent(stateText)];
if (screenshot && this.options.includeLastScreenshot) {
content.push(imageContent(screenshot, 'image/png'));
}
if (step !== undefined) this.currentStep = step;
this.messages.push({
message: userMessage(content),
isCompactable: true,
tokenEstimate: estimateMessageTokens(content),
step,
category: 'state',
addedAt: Date.now(),
});
this.recordConversationEntry(step ?? this.currentStep, 'state', stateText, !!screenshot);
}
addAssistantMessage(text: string, step?: number): void {
if (step !== undefined) this.currentStep = step;
this.messages.push({
message: assistantMessage(text),
isCompactable: true,
tokenEstimate: estimateTokens(text),
step,
category: 'assistant',
addedAt: Date.now(),
});
this.recordConversationEntry(step ?? this.currentStep, 'assistant', text);
}
addCommandResultMessage(text: string, step?: number): void {
if (step !== undefined) this.currentStep = step;
this.messages.push({
message: userMessage(text),
isCompactable: true,
tokenEstimate: estimateTokens(text),
step,
category: 'action_result',
addedAt: Date.now(),
});
this.recordConversationEntry(step ?? this.currentStep, 'action_result', text);
}
addUserMessage(text: string): void {
this.messages.push({
message: userMessage(text),
isCompactable: false,
tokenEstimate: estimateTokens(text),
category: 'user',
addedAt: Date.now(),
});
this.recordConversationEntry(this.currentStep, 'user', text);
}
/**
* Add an ephemeral message that is included in the next getMessages() call
* and then automatically removed. Useful for one-shot instructions or
* temporary context that should not persist across steps.
*/
addEphemeralMessage(text: string, role: 'user' | 'assistant' = 'user'): void {
const msg =
role === 'user' ? userMessage(text) : assistantMessage(text);
this.messages.push({
message: msg,
isCompactable: false,
tokenEstimate: estimateTokens(text),
category: role === 'user' ? 'user' : 'assistant',
ephemeral: true,
ephemeralRead: false,
addedAt: Date.now(),
});
}
// ────────────────────────────────────────
// Get Messages (with compaction + filtering)
// ────────────────────────────────────────
getMessages(): Message[] {
const result: Message[] = [];
if (this.systemPromptMessage) {
result.push(this.systemPromptMessage);
}
// Check if we need to compact
const totalTokens = this.estimateTotalTokens();
if (totalTokens > this.options.contextWindowSize) {
this.compact();
}
for (const managed of this.messages) {
result.push(managed.message);
}
// Mark ephemeral messages as read so they can be cleaned up
this.consumeEphemeralMessages();
// Apply sensitive data filtering
if (this.options.maskedValues && Object.keys(this.options.maskedValues).length > 0) {
return redactMessages(result, this.options.maskedValues);
}
return result;
}
// ────────────────────────────────────────
// Ephemeral Message Lifecycle
// ────────────────────────────────────────
/**
* After getMessages() has been called, remove ephemeral messages that were already read.
* Freshly-added ephemeral messages are marked as read (so they survive one getMessages call).
*/
private consumeEphemeralMessages(): void {
// Remove previously-read ephemeral messages
this.messages = this.messages.filter(
(m) => !(m.ephemeral && m.ephemeralRead),
);
// Mark remaining ephemeral messages as read for the next pass
for (const m of this.messages) {
if (m.ephemeral && !m.ephemeralRead) {
m.ephemeralRead = true;
}
}
}
// ────────────────────────────────────────
// Token Estimation
// ────────────────────────────────────────
estimateTotalTokens(): number {
let total = 0;
if (this.systemPromptMessage) {
total += estimateTokens(
typeof this.systemPromptMessage.content === 'string'
? this.systemPromptMessage.content
: '',
);
}
for (const managed of this.messages) {
total += managed.tokenEstimate;
}
return total;
}
// ────────────────────────────────────────
// Basic Compaction (image removal + old message replacement)
// ────────────────────────────────────────
private compact(): void {
// Remove screenshots from older messages (keep only last)
let foundLast = false;
for (let i = this.messages.length - 1; i >= 0; i--) {
const msg = this.messages[i];
if (!msg.isCompactable) continue;
const content = msg.message.content;
if (Array.isArray(content)) {
const hasImage = content.some(
(p) => typeof p === 'object' && p !== null && (p as ContentPart).type === 'image',
);
if (hasImage) {
if (foundLast) {
// Remove images from this message
const filtered = content.filter(
(p) =>
typeof p !== 'object' ||
p === null ||
(p as ContentPart).type !== 'image',
);
if (filtered.length > 0) {
msg.message = userMessage(filtered as ContentPart[]);
msg.tokenEstimate = estimateMessageTokens(filtered);
}
} else {
foundLast = true;
}
}
}
}
// If still over budget, remove old compactable state messages
while (
this.estimateTotalTokens() > this.options.contextWindowSize &&
this.messages.length > 4
) {
// Find first compactable message
const idx = this.messages.findIndex((m) => m.isCompactable);
if (idx === -1) break;
// Replace with a summary
const removed = this.messages.splice(idx, 1)[0];
const summary = `[Step ${removed.step ?? '?'} state omitted to save tokens]`;
this.messages.splice(idx, 0, {
message: userMessage(summary),
isCompactable: true,
tokenEstimate: estimateTokens(summary),
step: removed.step,
category: 'compaction_summary',
addedAt: Date.now(),
});
}
}
// ────────────────────────────────────────
// LLM-Based Compaction
// ────────────────────────────────────────
/**
* Run LLM-based message compaction: send the older portion of the conversation
* to a summarization model and replace it with a single summary message.
*
* Call this periodically (e.g. every N steps as configured in compaction.interval).
* Returns true if compaction was performed, false if skipped.
*/
async compactWithLlm(model?: LanguageModel): Promise {
const compactionConfig = this.options.compaction;
if (!compactionConfig) return false;
const llm = model ?? this.options.compactionModel;
if (!llm) return false;
// Only compact if enough steps have passed since last compaction
if (
compactionConfig.interval > 0 &&
this.currentStep - this.lastCompactionStep < compactionConfig.interval
) {
return false;
}
const targetTokens =
compactionConfig.targetTokens ??
Math.floor(this.options.contextWindowSize * 0.6);
// If we're under the target, no need to compact
if (this.estimateTotalTokens() <= targetTokens) return false;
// Split messages: keep the last few messages intact, summarize the rest
const keepCount = Math.min(6, Math.floor(this.messages.length / 2));
const toSummarize = this.messages.slice(0, this.messages.length - keepCount);
const toKeep = this.messages.slice(this.messages.length - keepCount);
if (toSummarize.length === 0) return false;
// Build a transcript of the messages to summarize
const transcript = toSummarize
.map((m) => {
const role = m.message.role;
const text = extractTextContent(m.message);
const stepLabel = m.step !== undefined ? ` (step ${m.step})` : '';
return `[${role}${stepLabel}]: ${truncate(text, 500)}`;
})
.join('\n');
const prompt = [
systemMessage(
'You are a conversation summarizer. Summarize the following agent-browser conversation transcript. ' +
'Preserve key facts: URLs visited, actions taken, errors encountered, extracted data, and the current task state. ' +
'Be concise but complete.',
),
userMessage(
`Summarize this conversation transcript:\n\n${transcript}`,
),
];
try {
const completion = await llm.invoke({
messages: prompt,
responseSchema: CompactionSummarySchema,
schemaName: 'CompactionSummary',
schemaDescription: 'A concise summary of the conversation so far',
maxTokens: compactionConfig.maxTokens,
temperature: 0,
});
const summaryText = `[Conversation summary of steps 1-${toSummarize[toSummarize.length - 1]?.step ?? '?'}]\n${completion.parsed.summary}`;
// Replace the summarized messages with a single summary
this.messages = [
{
message: userMessage(summaryText),
isCompactable: false, // Don't re-compact the summary
tokenEstimate: estimateTokens(summaryText),
category: 'compaction_summary',
addedAt: Date.now(),
},
...toKeep,
];
this.lastCompactionStep = this.currentStep;
return true;
} catch {
// If LLM compaction fails, fall back to basic compaction silently
return false;
}
}
/**
* Check whether LLM compaction should run at the current step.
* This is a convenience check; the caller can use it to decide whether
* to call compactWithLlm().
*/
shouldCompactWithLlm(): boolean {
const config = this.options.compaction;
if (!config || config.interval <= 0) return false;
return (
this.currentStep - this.lastCompactionStep >= config.interval &&
this.estimateTotalTokens() > (config.targetTokens ?? this.options.contextWindowSize * 0.6)
);
}
// ────────────────────────────────────────
// History Items & Description
// ────────────────────────────────────────
private recordConversationEntry(
step: number,
category: MessageCategory,
content: string,
hasScreenshot?: boolean,
): void {
this.historyItems.push({
step,
category,
summary: truncate(content, 120),
content: truncate(content, 2000),
hasScreenshot,
timestamp: Date.now(),
});
}
/**
* Build a human-readable description of the agent's history,
* with "N steps omitted" truncation for long histories.
*
* @param stepLimitShown Maximum number of steps to show in full detail.
* If the history is longer, middle steps are replaced with a "N steps omitted" line.
*/
agentHistoryDescription(stepLimitShown = 10): string {
// Group history items by step
const byStep = new Map();
for (const item of this.historyItems) {
const existing = byStep.get(item.step);
if (existing) {
existing.push(item);
} else {
byStep.set(item.step, [item]);
}
}
const stepNumbers = [...byStep.keys()].sort((a, b) => a - b);
if (stepNumbers.length === 0) return '(no history)';
const lines: string[] = [];
if (stepNumbers.length <= stepLimitShown) {
// Show all steps
for (const stepNum of stepNumbers) {
lines.push(this.formatStepDescription(stepNum, byStep.get(stepNum)!));
}
} else {
// Show first few, omitted middle, last few
const headCount = Math.ceil(stepLimitShown / 2);
const tailCount = stepLimitShown - headCount;
const headSteps = stepNumbers.slice(0, headCount);
const tailSteps = stepNumbers.slice(stepNumbers.length - tailCount);
const omittedCount = stepNumbers.length - headCount - tailCount;
for (const stepNum of headSteps) {
lines.push(this.formatStepDescription(stepNum, byStep.get(stepNum)!));
}
lines.push(` ... (${omittedCount} steps omitted) ...`);
for (const stepNum of tailSteps) {
lines.push(this.formatStepDescription(stepNum, byStep.get(stepNum)!));
}
}
return lines.join('\n');
}
private formatStepDescription(step: number, items: ConversationEntry[]): string {
const parts = items.map((item) => {
const prefix = item.category === 'state' ? 'State' :
item.category === 'assistant' ? 'Agent' :
item.category === 'action_result' ? 'Result' :
item.category === 'user' ? 'User' : item.category;
return `${prefix}: ${item.summary}`;
});
return `Step ${step}:\n ${parts.join('\n ')}`;
}
/** Get all recorded history items. */
getConversationEntrys(): readonly ConversationEntry[] {
return this.historyItems;
}
// ────────────────────────────────────────
// Save / Load (Conversation Persistence)
// ────────────────────────────────────────
/**
* Serialize the current state to a persistence-friendly snapshot.
* Screenshots are stripped (replaced with placeholder text) to keep size manageable.
*/
save(): ConversationManagerState {
const serialized: SerializedTrackedMessage[] = this.messages.map((m) => ({
role: m.message.role,
content: extractTextContent(m.message),
isCompactable: m.isCompactable,
tokenEstimate: m.tokenEstimate,
step: m.step,
category: m.category,
}));
return {
systemPrompt: this.systemPromptText,
messages: serialized,
historyItems: [...this.historyItems],
currentStep: this.currentStep,
};
}
/**
* Restore the ConversationManager from a previously saved state.
* This replaces all current messages and history.
*/
load(state: ConversationManagerState): void {
if (state.systemPrompt) {
this.setInstructionBuilder(state.systemPrompt);
} else {
this.systemPromptMessage = null;
this.systemPromptText = null;
}
this.messages = state.messages.map((s) => ({
message:
s.role === 'assistant'
? assistantMessage(s.content)
: userMessage(s.content),
isCompactable: s.isCompactable,
tokenEstimate: s.tokenEstimate,
step: s.step,
category: s.category,
addedAt: Date.now(),
}));
this.historyItems = [...state.historyItems];
this.currentStep = state.currentStep;
}
/**
* Save the conversation state to a JSON file.
*/
async saveToFile(filePath: string): Promise {
const { writeFile, mkdir } = await import('node:fs/promises');
const { dirname } = await import('node:path');
await mkdir(dirname(filePath), { recursive: true });
const json = JSON.stringify(this.save(), null, 2);
await writeFile(filePath, json, 'utf-8');
return filePath;
}
/**
* Load conversation state from a JSON file.
*/
async loadFromFile(filePath: string): Promise {
const { readFile } = await import('node:fs/promises');
const raw = await readFile(filePath, 'utf-8');
const state = JSON.parse(raw) as ConversationManagerState;
this.load(state);
}
// ────────────────────────────────────────
// Accessors
// ────────────────────────────────────────
get messageCount(): number {
return this.messages.length + (this.systemPromptMessage ? 1 : 0);
}
get step(): number {
return this.currentStep;
}
clear(): void {
this.messages = [];
this.historyItems = [];
this.currentStep = 0;
this.lastCompactionStep = 0;
}
/**
* Remove all messages but preserve history items and step counter.
* Useful when restarting message context without losing the history summary.
*/
resetMessages(): void {
this.messages = [];
this.lastCompactionStep = 0;
}
}
================================================
FILE: packages/core/src/agent/conversation/types.ts
================================================
import type { Message } from '../../model/messages.js';
import type { CompactionPolicy } from '../types.js';
import type { LanguageModel } from '../../model/interface.js';
// ── Message Manager Options ──
export interface ConversationManagerOptions {
contextWindowSize: number;
estimateTokens?: (text: string) => number;
includeLastScreenshot: boolean;
/** Sensitive key-value pairs to mask in outgoing messages. */
maskedValues?: Record;
/** LLM-based compaction configuration. */
compaction?: CompactionPolicy;
/** LanguageModel used for LLM-based compaction. Ignored if compaction is not set. */
compactionModel?: LanguageModel;
}
// ── Managed Message ──
export type MessageCategory =
| 'system'
| 'state'
| 'action_result'
| 'assistant'
| 'user'
| 'compaction_summary';
export interface TrackedMessage {
message: Message;
isCompactable: boolean;
tokenEstimate: number;
step?: number;
/** Semantic category for structured history tracking. */
category?: MessageCategory;
/** When true, this message is only included on the next getMessages() call then removed. */
ephemeral?: boolean;
/** When true, this message has already been read (consumed) in an ephemeral pass. */
ephemeralRead?: boolean;
/** Timestamp when this message was added. */
addedAt?: number;
}
// ── History Item ──
/**
* A structured entry in the agent's conversation history, richer than TrackedMessage.
* Used for building human-readable summaries and for save/load.
*/
export interface ConversationEntry {
/** Step number this item belongs to. */
step: number;
/** Category of this history item. */
category: MessageCategory;
/** Brief human-readable summary of this item (e.g. "Clicked element 5" or "Navigated to google.com"). */
summary: string;
/** The full text content (truncated for large payloads). */
content?: string;
/** Whether this item included a screenshot. */
hasScreenshot?: boolean;
/** Timestamp. */
timestamp: number;
}
// ── Message Manager State (persistence) ──
/**
* Serializable snapshot of the ConversationManager for save/load.
*/
export interface ConversationManagerState {
systemPrompt: string | null;
messages: SerializedTrackedMessage[];
historyItems: ConversationEntry[];
/** Step count at the time of snapshot. */
currentStep: number;
}
/**
* Serializable form of TrackedMessage (Message content may contain base64
* screenshots, which are replaced with placeholders during serialization).
*/
export interface SerializedTrackedMessage {
role: string;
content: string;
isCompactable: boolean;
tokenEstimate: number;
step?: number;
category?: MessageCategory;
}
================================================
FILE: packages/core/src/agent/conversation/utils.ts
================================================
import type { Message } from '../../model/messages.js';
import type { ContentPart } from '../../model/messages.js';
/**
* Rough token estimation: ~4 characters per token.
*/
export function estimateTokens(text: string): number {
return Math.ceil(text.length / 4);
}
export function estimateMessageTokens(content: string | unknown[]): number {
if (typeof content === 'string') {
return estimateTokens(content);
}
let total = 0;
for (const part of content) {
if (typeof part === 'object' && part !== null) {
const p = part as Record;
if (p.type === 'text' && typeof p.text === 'string') {
total += estimateTokens(p.text);
} else if (p.type === 'image') {
total += 1000; // Approximate cost for an image
}
}
}
return total;
}
// ── Sensitive Data Filtering ──
const MASK = '***';
/**
* Replace all occurrences of each sensitive value in `text` with a mask.
* Keys are used only for logging context; values are the secrets to redact.
*/
export function redactSensitiveValues(
text: string,
maskedValues: Record,
): string {
let result = text;
for (const [_key, value] of Object.entries(maskedValues)) {
if (!value) continue;
// Escape regex special characters in the value
const escaped = value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
result = result.replace(new RegExp(escaped, 'g'), MASK);
}
return result;
}
/**
* Deep-filter a Message, masking any sensitive values found in text content.
* Returns a new message (does not mutate the original).
*/
export function redactMessage(
message: Message,
maskedValues: Record,
): Message {
const entries = Object.entries(maskedValues);
if (entries.length === 0) return message;
const content = message.content;
if (typeof content === 'string') {
return {
...message,
content: redactSensitiveValues(content, maskedValues),
} as Message;
}
if (Array.isArray(content)) {
const filtered = (content as ContentPart[]).map((part) => {
if (part.type === 'text') {
return {
...part,
text: redactSensitiveValues(part.text, maskedValues),
};
}
// Images are left as-is (binary data)
return part;
});
return {
...message,
content: filtered,
} as Message;
}
return message;
}
/**
* Filter an array of Messages, masking sensitive data in each.
*/
export function redactMessages(
messages: Message[],
maskedValues: Record,
): Message[] {
if (Object.keys(maskedValues).length === 0) return messages;
return messages.map((m) => redactMessage(m, maskedValues));
}
/**
* Extract the text content from a Message as a plain string.
* For multi-part content, concatenates all text parts.
*/
export function extractTextContent(message: Message): string {
const content = message.content;
if (typeof content === 'string') return content;
if (Array.isArray(content)) {
return (content as ContentPart[])
.filter((p): p is Extract => p.type === 'text')
.map((p) => p.text)
.join('\n');
}
return '';
}
/**
* Truncate a string to maxLen characters, appending an ellipsis if truncated.
*/
export function truncate(text: string, maxLen: number): string {
if (text.length <= maxLen) return text;
return `${text.slice(0, maxLen - 3)}...`;
}
================================================
FILE: packages/core/src/agent/conversation.test.ts
================================================
import { test, expect, describe, beforeEach } from 'bun:test';
import { ConversationManager } from './conversation/service.js';
import type { ConversationManagerOptions } from './conversation/types.js';
import type { LanguageModel, InferenceOptions } from '../model/interface.js';
import type { InferenceResult } from '../model/types.js';
// ── Helpers ──
function createManager(
overrides: Partial = {},
): ConversationManager {
return new ConversationManager({
contextWindowSize: 10000,
includeLastScreenshot: true,
...overrides,
});
}
function createMockModel(summary = 'Summary of the conversation'): LanguageModel {
return {
modelId: 'test-model',
provider: 'custom',
invoke: async (_options: InferenceOptions): Promise> => {
return {
parsed: { summary } as unknown as T,
usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
finishReason: 'stop',
};
},
};
}
// ── Tests ──
describe('ConversationManager', () => {
let mm: ConversationManager;
beforeEach(() => {
mm = createManager();
});
describe('system prompt', () => {
test('setInstructionBuilder stores the system prompt', () => {
mm.setInstructionBuilder('You are a helpful assistant');
const messages = mm.getMessages();
expect(messages[0]).toEqual({
role: 'system',
content: 'You are a helpful assistant',
});
});
test('system prompt appears first in getMessages', () => {
mm.setInstructionBuilder('System');
mm.addStateMessage('State text', undefined, 1);
const messages = mm.getMessages();
expect(messages[0].role).toBe('system');
expect(messages[1].role).toBe('user');
});
test('changing system prompt replaces the previous one', () => {
mm.setInstructionBuilder('First');
mm.setInstructionBuilder('Second');
const messages = mm.getMessages();
const systemMessages = messages.filter((m) => m.role === 'system');
expect(systemMessages).toHaveLength(1);
expect(systemMessages[0].content).toBe('Second');
});
});
describe('addStateMessage', () => {
test('adds a user message with state text', () => {
mm.addStateMessage('Page state info', undefined, 1);
const messages = mm.getMessages();
expect(messages).toHaveLength(1);
expect(messages[0].role).toBe('user');
});
test('includes screenshot when provided and vision enabled', () => {
mm.addStateMessage('State', 'base64screenshot', 1);
const messages = mm.getMessages();
const content = messages[0].content;
expect(Array.isArray(content)).toBe(true);
if (Array.isArray(content)) {
expect(content).toHaveLength(2);
expect(content[0]).toEqual({ type: 'text', text: 'State' });
expect(content[1]).toHaveProperty('type', 'image');
}
});
test('excludes screenshot when vision disabled', () => {
const noVision = createManager({ includeLastScreenshot: false });
noVision.addStateMessage('State', 'base64screenshot', 1);
const messages = noVision.getMessages();
const content = messages[0].content;
// Content should be text-only array
expect(Array.isArray(content)).toBe(true);
if (Array.isArray(content)) {
expect(content).toHaveLength(1);
expect(content[0]).toHaveProperty('type', 'text');
}
});
test('updates messageCount', () => {
expect(mm.messageCount).toBe(0);
mm.addStateMessage('State 1', undefined, 1);
expect(mm.messageCount).toBe(1);
mm.addStateMessage('State 2', undefined, 2);
expect(mm.messageCount).toBe(2);
});
});
describe('addAssistantMessage', () => {
test('adds an assistant role message', () => {
mm.addAssistantMessage('Agent response', 1);
const messages = mm.getMessages();
expect(messages[0].role).toBe('assistant');
expect(messages[0].content).toBe('Agent response');
});
});
describe('addCommandResultMessage', () => {
test('adds a user role message for action results', () => {
mm.addCommandResultMessage('click: success', 1);
const messages = mm.getMessages();
expect(messages[0].role).toBe('user');
expect(messages[0].content).toBe('click: success');
});
});
describe('getMessages ordering', () => {
test('returns messages in correct order', () => {
mm.setInstructionBuilder('System prompt');
mm.addStateMessage('State text', undefined, 1);
mm.addAssistantMessage('Agent thought', 1);
mm.addCommandResultMessage('Action result', 1);
const messages = mm.getMessages();
expect(messages).toHaveLength(4);
expect(messages[0].role).toBe('system');
expect(messages[1].role).toBe('user');
expect(messages[2].role).toBe('assistant');
expect(messages[3].role).toBe('user');
});
});
describe('compaction - screenshot removal', () => {
test('removes old screenshots when over token budget, keeps last', () => {
// 3 screenshots: each ~1000 tokens for image + ~2 for text = ~3006 total.
// Budget of 1500: after removing 2 old screenshots (saving 2000),
// total becomes ~1006 < 1500, so compact exits successfully.
const small = createManager({ contextWindowSize: 1500 });
small.addStateMessage('State 1', 'screenshot1', 1);
small.addStateMessage('State 2', 'screenshot2', 2);
small.addStateMessage('State 3', 'screenshot3', 3);
const messages = small.getMessages();
// After compaction, older screenshots should be removed
// The last message should still have its image
const lastMessage = messages[messages.length - 1];
const lastContent = lastMessage.content;
expect(Array.isArray(lastContent)).toBe(true);
if (Array.isArray(lastContent)) {
const hasImage = lastContent.some(
(p: any) => typeof p === 'object' && p.type === 'image',
);
expect(hasImage).toBe(true);
// Older messages should have had their images removed
const firstMsg = messages[0];
const firstContent = firstMsg.content;
if (Array.isArray(firstContent)) {
const firstHasImage = firstContent.some(
(p: any) => typeof p === 'object' && p.type === 'image',
);
expect(firstHasImage).toBe(false);
}
}
});
});
describe('compaction - token budget behavior', () => {
test('does not trigger compaction when under budget', () => {
// Budget of 10000 means no compaction needed for a few messages
const large = createManager({ contextWindowSize: 10000, includeLastScreenshot: false });
large.addStateMessage('Short state', undefined, 1);
large.addAssistantMessage('Short response', 1);
const messages = large.getMessages();
// No summaries should be present
const summaryMessages = messages.filter(
(m) =>
typeof m.content === 'string' &&
m.content.includes('omitted to save tokens'),
);
expect(summaryMessages).toHaveLength(0);
});
test('estimateTotalTokens reflects actual message content', () => {
const mm2 = createManager({ contextWindowSize: 100000, includeLastScreenshot: false });
mm2.addStateMessage('A'.repeat(400), undefined, 1); // ~100 tokens
mm2.addStateMessage('B'.repeat(800), undefined, 2); // ~200 tokens
const total = mm2.estimateTotalTokens();
// Total should be roughly 300 tokens for 1200 chars
expect(total).toBeGreaterThanOrEqual(250);
expect(total).toBeLessThanOrEqual(400);
});
});
describe('token estimation', () => {
test('estimateTotalTokens includes system prompt', () => {
mm.setInstructionBuilder('System prompt text');
const tokensWithSystem = mm.estimateTotalTokens();
expect(tokensWithSystem).toBeGreaterThan(0);
});
test('estimateTotalTokens grows with messages', () => {
const before = mm.estimateTotalTokens();
mm.addStateMessage('Some state text', undefined, 1);
const after = mm.estimateTotalTokens();
expect(after).toBeGreaterThan(before);
});
test('estimateTotalTokens counts images as ~1000 tokens', () => {
mm.addStateMessage('Text', 'screenshot', 1);
const tokens = mm.estimateTotalTokens();
// Text ~4 chars = 1 token, plus ~1000 for image
expect(tokens).toBeGreaterThanOrEqual(1000);
});
});
describe('history items', () => {
test('records history for each added message', () => {
mm.addStateMessage('State text', undefined, 1);
mm.addAssistantMessage('Agent response', 1);
mm.addCommandResultMessage('Result text', 1);
const items = mm.getConversationEntrys();
expect(items).toHaveLength(3);
expect(items[0].category).toBe('state');
expect(items[1].category).toBe('assistant');
expect(items[2].category).toBe('action_result');
});
test('history items include step number', () => {
mm.addStateMessage('State', undefined, 5);
const items = mm.getConversationEntrys();
expect(items[0].step).toBe(5);
});
test('history items include truncated summary', () => {
const longText = 'a'.repeat(200);
mm.addStateMessage(longText, undefined, 1);
const items = mm.getConversationEntrys();
// Summary should be truncated to 120 chars
expect(items[0].summary.length).toBeLessThanOrEqual(123); // 120 + '...'
});
test('history items track screenshot presence', () => {
mm.addStateMessage('State', 'screenshot_data', 1);
const items = mm.getConversationEntrys();
expect(items[0].hasScreenshot).toBe(true);
});
});
describe('agentHistoryDescription', () => {
test('returns "(no history)" when empty', () => {
expect(mm.agentHistoryDescription()).toBe('(no history)');
});
test('shows all steps when under stepLimitShown', () => {
mm.addStateMessage('State 1', undefined, 1);
mm.addAssistantMessage('Agent 1', 1);
mm.addStateMessage('State 2', undefined, 2);
mm.addAssistantMessage('Agent 2', 2);
const desc = mm.agentHistoryDescription(10);
expect(desc).toContain('Step 1:');
expect(desc).toContain('Step 2:');
});
test('truncates with "steps omitted" when exceeding stepLimitShown', () => {
for (let i = 1; i <= 20; i++) {
mm.addStateMessage(`State ${i}`, undefined, i);
mm.addAssistantMessage(`Agent ${i}`, i);
}
const desc = mm.agentHistoryDescription(4);
expect(desc).toContain('steps omitted');
// Should show first 2 and last 2 steps
expect(desc).toContain('Step 1:');
expect(desc).toContain('Step 2:');
expect(desc).toContain('Step 19:');
expect(desc).toContain('Step 20:');
});
test('includes category prefixes in description', () => {
mm.addStateMessage('Page loaded', undefined, 1);
mm.addAssistantMessage('Clicking button', 1);
mm.addCommandResultMessage('click: success', 1);
const desc = mm.agentHistoryDescription();
expect(desc).toContain('State:');
expect(desc).toContain('Agent:');
expect(desc).toContain('Result:');
});
});
describe('ephemeral messages', () => {
test('ephemeral message appears in first getMessages call', () => {
mm.addEphemeralMessage('Temporary instruction');
const messages = mm.getMessages();
const found = messages.some(
(m) => typeof m.content === 'string' && m.content === 'Temporary instruction',
);
expect(found).toBe(true);
});
test('ephemeral message is removed after being consumed', () => {
mm.addEphemeralMessage('Temp');
// First call: message is present and gets marked as read
const first = mm.getMessages();
expect(first.some((m) => typeof m.content === 'string' && m.content === 'Temp')).toBe(true);
// Second call: message is still in result (removal happens after building result),
// then gets removed during consumeEphemeralMessages
const second = mm.getMessages();
// Third call: message is now actually gone from this.messages
const third = mm.getMessages();
const found = third.some(
(m) => typeof m.content === 'string' && m.content === 'Temp',
);
expect(found).toBe(false);
});
test('ephemeral message with assistant role', () => {
mm.addEphemeralMessage('Agent thought', 'assistant');
const messages = mm.getMessages();
const found = messages.find(
(m) => m.role === 'assistant' && m.content === 'Agent thought',
);
expect(found).toBeDefined();
});
test('multiple ephemeral messages all appear then get cleaned up', () => {
mm.addEphemeralMessage('Temp 1');
mm.addEphemeralMessage('Temp 2');
// First call: both present, marked as read
const first = mm.getMessages();
expect(first).toHaveLength(2);
// Second call: still in result (removal after build), then removed
mm.getMessages();
// Third call: messages have been removed
const third = mm.getMessages();
expect(third).toHaveLength(0);
});
});
describe('save / load round-trip', () => {
test('save and load preserves system prompt', () => {
mm.setInstructionBuilder('My system prompt');
mm.addStateMessage('State 1', undefined, 1);
const saved = mm.save();
const restored = createManager();
restored.load(saved);
const messages = restored.getMessages();
expect(messages[0].role).toBe('system');
expect(messages[0].content).toBe('My system prompt');
});
test('save and load preserves messages', () => {
mm.addStateMessage('State 1', undefined, 1);
mm.addAssistantMessage('Response 1', 1);
mm.addCommandResultMessage('Result 1', 1);
const saved = mm.save();
const restored = createManager();
restored.load(saved);
const messages = restored.getMessages();
expect(messages).toHaveLength(3);
expect(messages[0].role).toBe('user');
expect(messages[1].role).toBe('assistant');
expect(messages[2].role).toBe('user');
});
test('save and load preserves history items', () => {
mm.addStateMessage('State 1', undefined, 1);
mm.addAssistantMessage('Response 1', 1);
const saved = mm.save();
const restored = createManager();
restored.load(saved);
const items = restored.getConversationEntrys();
expect(items).toHaveLength(2);
expect(items[0].category).toBe('state');
expect(items[1].category).toBe('assistant');
});
test('save and load preserves currentStep', () => {
mm.addStateMessage('State', undefined, 7);
const saved = mm.save();
expect(saved.currentStep).toBe(7);
const restored = createManager();
restored.load(saved);
expect(restored.step).toBe(7);
});
test('save strips screenshots (text only in serialized form)', () => {
mm.addStateMessage('State with screenshot', 'base64data', 1);
const saved = mm.save();
// Serialized content should be text-only, no base64
for (const msg of saved.messages) {
expect(msg.content).not.toContain('base64data');
}
});
test('load with null system prompt clears system prompt', () => {
mm.setInstructionBuilder('Initial prompt');
const saved = mm.save();
saved.systemPrompt = null;
mm.load(saved);
const messages = mm.getMessages();
const hasSystem = messages.some((m) => m.role === 'system');
expect(hasSystem).toBe(false);
});
});
describe('sensitive data filtering', () => {
test('masks sensitive values in outgoing messages', () => {
const sensitive = createManager({
maskedValues: { password: 'secret123', apiKey: 'key-abc' },
});
sensitive.addStateMessage('Login with password secret123', undefined, 1);
sensitive.addAssistantMessage('Using key-abc to authenticate', 1);
const messages = sensitive.getMessages();
// Text should have been masked
const stateMsg = messages[0];
if (typeof stateMsg.content === 'string') {
expect(stateMsg.content).not.toContain('secret123');
expect(stateMsg.content).toContain('***');
} else if (Array.isArray(stateMsg.content)) {
const textPart = stateMsg.content.find((p: any) => p.type === 'text');
expect((textPart as any).text).not.toContain('secret123');
}
const assistantMsg = messages[1];
if (typeof assistantMsg.content === 'string') {
expect(assistantMsg.content).not.toContain('key-abc');
expect(assistantMsg.content).toContain('***');
}
});
test('no filtering when maskedValues is empty', () => {
const noSensitive = createManager({ maskedValues: {} });
noSensitive.addStateMessage('Plain text with secret123', undefined, 1);
const messages = noSensitive.getMessages();
const content = messages[0].content;
if (Array.isArray(content)) {
const textPart = content.find((p: any) => p.type === 'text');
expect((textPart as any).text).toContain('secret123');
}
});
test('no filtering when maskedValues is not set', () => {
mm.addStateMessage('Text with sensitive data', undefined, 1);
const messages = mm.getMessages();
const content = messages[0].content;
if (Array.isArray(content)) {
const textPart = content.find((p: any) => p.type === 'text');
expect((textPart as any).text).toContain('sensitive data');
}
});
});
describe('LLM-based compaction', () => {
test('shouldCompactWithLlm returns false when no compaction config', () => {
expect(mm.shouldCompactWithLlm()).toBe(false);
});
test('shouldCompactWithLlm returns false when interval not reached', () => {
const withCompaction = createManager({
compaction: { interval: 10, maxTokens: 500 },
});
// Only 1 message, interval not reached
withCompaction.addStateMessage('State', undefined, 1);
expect(withCompaction.shouldCompactWithLlm()).toBe(false);
});
test('compactWithLlm returns false without a model', async () => {
const withCompaction = createManager({
contextWindowSize: 100000,
includeLastScreenshot: false,
compaction: { interval: 1, maxTokens: 500, targetTokens: 10 },
});
// Add enough messages so estimateTotalTokens > targetTokens (10)
for (let i = 1; i <= 5; i++) {
withCompaction.addStateMessage('x'.repeat(100), undefined, i);
}
const result = await withCompaction.compactWithLlm();
expect(result).toBe(false);
});
test('compactWithLlm performs compaction with model', async () => {
const model = createMockModel('Summarized: visited pages and clicked buttons');
// Use large contextWindowSize so getMessages() doesn't trigger basic compact(),
// but low targetTokens so the LLM compaction decides to run.
const longText = 'A'.repeat(500);
const withCompaction = createManager({
contextWindowSize: 100000,
includeLastScreenshot: false,
compaction: { interval: 1, maxTokens: 500, targetTokens: 500 },
});
// Add lots of messages to exceed targetTokens (500).
// Each 500-char message = ~125 tokens. 10 messages = ~1250 tokens > 500.
for (let i = 1; i <= 10; i++) {
withCompaction.addStateMessage(`${longText} step ${i}`, undefined, i);
withCompaction.addAssistantMessage(`${longText} response ${i}`, i);
}
const result = await withCompaction.compactWithLlm(model);
expect(result).toBe(true);
// After compaction, message count should be reduced
const messages = withCompaction.getMessages();
expect(messages.length).toBeLessThan(20);
// First message should be the summary
const firstContent = messages[0].content;
expect(typeof firstContent).toBe('string');
expect(firstContent as string).toContain('Conversation summary');
});
});
describe('clear and resetMessages', () => {
test('clear removes all messages and history', () => {
mm.setInstructionBuilder('System');
mm.addStateMessage('State', undefined, 1);
mm.addAssistantMessage('Response', 1);
mm.clear();
expect(mm.messageCount).toBe(1); // system prompt still present via setInstructionBuilder
expect(mm.getConversationEntrys()).toHaveLength(0);
expect(mm.step).toBe(0);
});
test('resetMessages removes messages but preserves history', () => {
mm.addStateMessage('State', undefined, 1);
mm.addAssistantMessage('Response', 1);
const historyBefore = mm.getConversationEntrys().length;
mm.resetMessages();
// Messages cleared
const messages = mm.getMessages();
expect(messages).toHaveLength(0);
// History preserved
expect(mm.getConversationEntrys()).toHaveLength(historyBefore);
});
});
describe('messageCount', () => {
test('includes system prompt in count', () => {
mm.setInstructionBuilder('System');
expect(mm.messageCount).toBe(1);
mm.addStateMessage('State', undefined, 1);
expect(mm.messageCount).toBe(2);
});
test('does not count system prompt when not set', () => {
expect(mm.messageCount).toBe(0);
mm.addStateMessage('State', undefined, 1);
expect(mm.messageCount).toBe(1);
});
});
describe('step tracking', () => {
test('step reflects the most recent step from added messages', () => {
mm.addStateMessage('State 1', undefined, 1);
expect(mm.step).toBe(1);
mm.addStateMessage('State 5', undefined, 5);
expect(mm.step).toBe(5);
});
});
});
================================================
FILE: packages/core/src/agent/evaluator.ts
================================================
import type { LanguageModel } from '../model/interface.js';
import type { Message, ContentPart } from '../model/messages.js';
import { systemMessage, userMessage, imageContent, textContent } from '../model/messages.js';
import {
EvaluationResultSchema,
QuickCheckResultSchema,
type EvaluationResult,
type QuickCheckResult,
type StepRecord,
} from './types.js';
import { createLogger } from '../logging.js';
const logger = createLogger('judge');
// ── Judge System Prompts ──
const JUDGE_SYSTEM_PROMPT = `You are an expert task completion judge. Your job is to evaluate whether a web browser automation agent completed its assigned task successfully.
You will be provided with:
1. The task description
2. A history of steps the agent took (including actions and their results)
3. Screenshots from during execution (if available)
4. Optionally, ground truth information about the expected result
Evaluate thoroughly:
- Did the agent actually complete the task, or just claim to?
- Is the extracted information correct and complete?
- Did the agent handle errors and edge cases appropriately?
- Was the agent stuck at any point without recovery?
If ground truth is provided, compare the agent's result against it.
Be strict but fair. Partial completions should be marked with lower confidence.`;
const SIMPLE_JUDGE_SYSTEM_PROMPT = `You are a quick-check validator for web browser automation results.
Given a task and the agent's final result, determine if the result appears correct.
Be concise. Focus on whether the result directly answers/completes the task.`;
export class ResultEvaluator {
private model: LanguageModel;
constructor(model: LanguageModel) {
this.model = model;
}
/**
* Full evaluation with step history, screenshots, and optional ground truth.
* Provides detailed verdict with failure analysis.
*/
async evaluate(
task: string,
result: string,
history: StepRecord[],
options?: {
expectedOutcome?: string;
includeScreenshots?: boolean;
},
): Promise {
const messages = constructEvaluatorMessages(task, result, history, options);
try {
const completion = await this.model.invoke({
messages,
responseSchema: EvaluationResultSchema,
schemaName: 'EvaluationResult',
temperature: 0,
});
logger.info(
`Judge verdict: complete=${completion.parsed.isComplete}, ` +
`confidence=${completion.parsed.confidence}, ` +
`verdict=${completion.parsed.verdict ?? 'n/a'}`,
);
return completion.parsed;
} catch (error) {
logger.error('Judge evaluation failed', error);
return {
isComplete: false,
reason: `Judge evaluation failed: ${error instanceof Error ? error.message : String(error)}`,
confidence: 0,
verdict: 'unknown',
};
}
}
/**
* Lightweight always-on validation.
* Quick pass/fail check without detailed history analysis.
* Useful for running after every "done" action to catch obvious errors.
*/
async simpleEvaluate(
task: string,
result: string,
): Promise {
const messages = constructQuickCheckMessages(task, result);
try {
const completion = await this.model.invoke({
messages,
responseSchema: QuickCheckResultSchema,
schemaName: 'QuickCheckResult',
temperature: 0,
});
logger.debug(
`Simple judge: passed=${completion.parsed.passed}, reason=${completion.parsed.reason}`,
);
return completion.parsed;
} catch (error) {
logger.error('Simple judge evaluation failed', error);
return {
passed: true, // Default to pass on error to avoid blocking
reason: `Simple judge failed: ${error instanceof Error ? error.message : String(error)}`,
shouldRetry: false,
};
}
}
}
// ── Message Construction ──
/**
* Build the full message array for detailed judge evaluation.
* Includes step-by-step history, screenshots (if enabled), and ground truth.
*/
export function constructEvaluatorMessages(
task: string,
result: string,
history: StepRecord[],
options?: {
expectedOutcome?: string;
includeScreenshots?: boolean;
},
): Message[] {
const messages: Message[] = [
systemMessage(JUDGE_SYSTEM_PROMPT),
];
// Build the evaluation prompt
const parts: string[] = [];
parts.push(`## Task\n${task}`);
parts.push(`## Agent's Final Result\n${result}`);
// Step history summary
if (history.length > 0) {
const stepSummaries: string[] = [];
for (const entry of history) {
const actions = entry.agentOutput.actions
.map((a) => {
const actionObj = a as Record;
return actionObj.action ?? 'unknown';
})
.join(', ');
const results = entry.actionResults
.map((r) => {
if (r.isDone) return `DONE: ${r.extractedContent?.slice(0, 200) ?? ''}`;
if (r.error) return `ERROR: ${r.error.slice(0, 150)}`;
if (r.extractedContent) return `OK: ${r.extractedContent.slice(0, 150)}`;
return r.success ? 'OK' : 'FAILED';
})
.join('; ');
const evaluation = entry.agentOutput.currentState?.evaluation ?? '';
stepSummaries.push(
`Step ${entry.step} [${entry.browserState.url}]:\n` +
` Eval: ${evaluation.slice(0, 200)}\n` +
` Actions: ${actions}\n` +
` Results: ${results}`,
);
}
parts.push(`## Step History (${history.length} steps)\n${stepSummaries.join('\n\n')}`);
}
// Ground truth
if (options?.expectedOutcome) {
parts.push(
`## Ground Truth (Expected Result)\n${options.expectedOutcome}\n\n` +
'Compare the agent\'s result against this ground truth carefully.',
);
}
parts.push(
'## Instructions\n' +
'Evaluate the task completion. Provide:\n' +
'- isComplete: whether the task was fully completed\n' +
'- reason: detailed explanation\n' +
'- confidence: 0-1 score\n' +
'- verdict: "success", "partial", "failed", or "unknown"\n' +
'- failureReason: if failed, explain why\n' +
'- impossibleTask: true if the task appears impossible\n' +
'- reachedCaptcha: true if a CAPTCHA blocked progress',
);
// If screenshots are requested and available, include the last few
if (options?.includeScreenshots) {
const screenshotEntries = history
.filter((e) => e.browserState.screenshot)
.slice(-3); // Last 3 screenshots
if (screenshotEntries.length > 0) {
const content: ContentPart[] = [
textContent(`${parts.join('\n\n')}\n\nBelow are screenshots from the agent's execution:`),
];
for (const entry of screenshotEntries) {
if (entry.browserState.screenshot) {
content.push(
textContent(`Screenshot from step ${entry.step} (${entry.browserState.url}):`),
);
content.push(imageContent(entry.browserState.screenshot));
}
}
messages.push(userMessage(content));
return messages;
}
}
messages.push(userMessage(parts.join('\n\n')));
return messages;
}
/**
* Build messages for lightweight simple judge evaluation.
* Only includes task and result -- no history or screenshots.
*/
export function constructQuickCheckMessages(
task: string,
result: string,
): Message[] {
return [
systemMessage(SIMPLE_JUDGE_SYSTEM_PROMPT),
userMessage(
`Task: ${task}\n\n` +
`Agent's Result: ${result}\n\n` +
'Does this result correctly complete the task? ' +
'If not, should the agent retry with a different approach?',
),
];
}
================================================
FILE: packages/core/src/agent/index.ts
================================================
export { Agent, type AgentOptions } from '../agent/agent.js';
export {
InstructionBuilder,
StepPromptBuilder,
buildCommandDescriptions,
buildContextualCommands,
buildExtractionInstructionBuilder,
buildExtractionUserPrompt,
clearTemplateCache,
type PromptTemplate,
type InstructionBuilderOptions,
type StepInfo,
type StepPromptBuilderOptions,
} from './instructions.js';
export { ConversationManager } from './conversation/service.js';
export {
StallDetector,
hashPageTree,
hashTextContent,
type PageSignature,
type StallDetectorConfig,
type StallCheckResult,
} from './stall-detector.js';
export {
ResultEvaluator,
constructEvaluatorMessages,
constructQuickCheckMessages,
} from './evaluator.js';
export { ReplayRecorder, type ReplayRecorderOptions } from './replay-recorder.js';
export {
type AgentConfig,
type AgentState,
type AgentDecision,
type AgentDecisionCompact,
type AgentDecisionDirect,
type StepRecord,
ExecutionLog,
type RunOutcome,
type Reasoning,
type PlanStep,
type EvaluationResult,
type QuickCheckResult,
type CompactionPolicy,
type StepTelemetry,
type ExtractedVariable,
type AccumulatedCost,
type StepCostBreakdown,
type PricingTable,
type PlanRevision,
AgentDecisionSchema,
AgentDecisionCompactSchema,
AgentDecisionDirectSchema,
ReasoningSchema,
EvaluationResultSchema,
QuickCheckResultSchema,
PlanStepSchema,
StrategyPlanSchema,
PlanRevisionSchema,
PRICING_TABLE,
calculateStepCost,
supportsDeepReasoning,
supportsCoordinateMode,
isCompactModel,
DEFAULT_AGENT_CONFIG,
} from './types.js';
export type {
ConversationManagerOptions,
TrackedMessage,
ConversationManagerState,
ConversationEntry,
SerializedTrackedMessage,
MessageCategory,
} from './conversation/types.js';
export {
estimateTokens,
estimateMessageTokens,
redactSensitiveValues,
redactMessage,
redactMessages,
extractTextContent,
truncate,
} from './conversation/utils.js';
================================================
FILE: packages/core/src/agent/instructions/instructions-compact.md
================================================
You are an AI agent that controls a web browser to complete tasks. You operate in an iterative loop: observe, decide, act, repeat.
Your task: {{task}}
Default: English. Match the task's language.
Elements: `[index]text`. Only `[indexed]` elements are interactive. Indentation = child. `*[` = new element.
- Only interact with elements that have a numeric [index]
- If research is needed, open a **new tab** instead of reusing the current one
- If the page changes after an input action, analyze new elements (e.g., suggestions) before proceeding
- If an action sequence was interrupted, complete remaining actions in the next step
- For autocomplete fields: type text, WAIT for suggestions, click the correct one or press Enter
- Handle popups/modals/cookie banners immediately before other actions
- If blocked by captcha/login/403, try alternative approaches rather than retrying
- ALWAYS look for filter/sort options FIRST when the task specifies criteria
- Detect unproductive loops: if same URL for 3+ steps without progress, change approach
Maximum {{maxActionsPerStep}} actions per step. If the page changes after an action, remaining actions are skipped.
Check browser state each step to verify your previous action succeeded.
When chaining actions, never take consequential actions (form submissions, critical button clicks) without confirming changes occurred.
{{actionDescriptions}}
Combine actions when sensible. Do not predict actions that do not apply to the current page.
**Recommended combinations:**
- `input_text` + `click` -> Fill field and submit
- `input_text` + `input_text` -> Fill multiple fields
- `click` + `click` -> Multi-step flows (when page does not navigate between clicks)
Do not chain actions that change browser state multiple times (e.g., click then navigate). Always have one clear goal per step.
Call `done` when:
- Task is fully completed
- Reached max steps (even if incomplete)
- Absolutely impossible to continue
Set `success=true` ONLY if the full task is completed. Put ALL findings in the `text` field.
Before calling done with success=true: re-read the task, verify every requirement is met, confirm actions completed via page state, ensure no data was fabricated.
1. Verify state using screenshot as ground truth
2. Handle blocking popups/overlays first
3. If element not found, scroll to reveal more content
4. If action fails 2-3 times, try alternative approach
5. If blocked by login/captcha/403, try alternative sites
6. If stuck in a loop, acknowledge and change strategy
================================================
FILE: packages/core/src/agent/instructions/instructions-direct.md
================================================
You are an AI agent that controls a web browser to complete tasks. You operate in an iterative loop: observe the current page state, decide on actions, execute them, and repeat until the task is done.
Your task: {{task}}
You excel at:
1. Navigating complex websites and extracting precise information
2. Automating form submissions and interactive web actions
3. Gathering and organizing information across multiple pages
4. Operating effectively in an iterative agent loop
5. Adapting strategies when encountering obstacles
- Default working language: **English**
- Always respond in the same language as the task description
At every step, your input will consist of:
1. **Agent history**: A chronological event stream including your previous actions and their results.
2. **Browser state**: Current URL, open tabs, interactive elements indexed for actions, and visible page content.
3. **Screenshot** (when vision is enabled): A screenshot of the current page with bounding boxes around interactive elements.
Browser state is given as:
- **Current URL**: The URL of the page you are currently viewing.
- **Open Tabs**: Open tabs with their IDs.
- **Interactive Elements**: All interactive elements in the format `[index]text` where:
- `index`: Numeric identifier for interaction
- `type`: HTML element type (button, input, etc.)
- `text`: Element description
Important notes:
- Only elements with numeric indexes in `[]` are interactive
- Indentation (with tab) means the element is a child of the element above
- Elements tagged with `*[` are **new** interactive elements that appeared since the last step
- Pure text elements without `[]` are not interactive
If vision is enabled, you will receive a screenshot of the current page with bounding boxes around interactive elements.
- This is your **ground truth**: use it to evaluate your progress
- If an interactive element has no text in browser_state, its index is at the top center of its bounding box
Strictly follow these rules while using the browser:
- Only interact with elements that have a numeric `[index]`
- Only use indexes that are explicitly provided
- If research is needed, open a **new tab** instead of reusing the current one
- If the page changes after an action, analyze new elements before proceeding
- By default, only elements in the visible viewport are listed
- If the page is not fully loaded, use the wait action
- Use extract_content only if information is NOT visible in browser_state
- extract_content is expensive - do NOT call it multiple times on the same page
- If you fill an input field and your action sequence is interrupted, something changed (e.g., suggestions appeared)
- Complete any remaining actions from interrupted sequences in the next step
- For autocomplete fields: type text, WAIT for suggestions, click the correct one or press Enter
- If the task specifies criteria (price, rating, location, etc.), look for filter/sort options FIRST
- Handle popups, modals, cookie banners immediately before other actions
- If blocked by captcha/login/403, try alternative approaches
- Detect loops: if same URL for 3+ steps without progress, change approach
- Do not log in unless the task requires it and you have credentials
## Output Format
Respond with:
1. **currentState**: Your assessment including:
- `evaluation`: Assessment of how the last action went
- `memory`: Important information to remember
- `nextGoal`: The next immediate goal
2. **actions**: A list of actions to execute (max {{maxActionsPerStep}} per step)
Maximum {{maxActionsPerStep}} actions per step, executed sequentially.
- If the page changes after an action, remaining actions are skipped and you get the new state.
- Check browser state each step to verify your previous action achieved its goal.
- When chaining actions, never take consequential actions without confirming changes occurred.
{{actionDescriptions}}
Combine actions when sensible. Do not predict actions that do not apply to the current page.
**Recommended combinations:**
- `input_text` + `input_text` + `click` -> Fill multiple fields then submit
- `input_text` + `send_keys` -> Fill a field and press Enter
- `scroll` + `scroll` -> Scroll further down
Do not try multiple paths in one step. Have one clear goal per step.
Place page-changing actions **last** in your action list.
Be clear and concise in your decision-making:
1. Analyze the last action result - state success, failure, or uncertain
2. Analyze browser state and screenshot to understand current position
3. If stuck, consider alternative approaches
4. Store concise, actionable context in memory
5. State your next immediate goal clearly
Call `done` when:
- Task is fully completed
- Reached max steps (even if incomplete)
- Absolutely impossible to continue
Rules:
- Set `success=true` ONLY if the full task is completed
- Put ALL relevant findings in the `text` field
- Call `done` as a single action - never combine with other actions
**Before calling done with success=true, verify:**
1. Re-read the original task and check every requirement
2. Verify correct count, filters, format
3. Confirm actions completed via page state/screenshot
4. Ensure no fabricated data
5. If anything is unmet or uncertain, set success to false
When encountering errors:
1. Verify state using screenshot as ground truth
2. Check for blocking popups/overlays
3. If element not found, scroll to reveal content
4. If action fails 2-3 times, try alternative approach
5. If blocked by login/captcha/403, try alternative sites
6. If page structure differs from expected, re-analyze and adapt
7. If stuck in loop, acknowledge in memory and change strategy
8. If max_steps approaching, prioritize most important parts
**Good evaluation examples:**
- "Successfully navigated to the product page and found the target information. Verdict: Success"
- "Failed to input text into the search bar - element not visible. Verdict: Failure"
**Good memory examples:**
- "Visited 2 of 5 target websites. Collected pricing from Amazon ($39.99) and eBay ($42.00). Still need Walmart, Target, Best Buy."
- "Search returned results but no filter applied. User wants items under $50 with 4+ stars. Will apply price filter first."
**Good next goal examples:**
- "Click 'Add to Cart' to proceed with purchase flow."
- "Apply price filter to narrow results to items under $50."
1. ALWAYS verify action success using screenshot/browser state
2. ALWAYS handle popups/modals before other actions
3. ALWAYS apply filters when task specifies criteria
4. NEVER repeat failing actions more than 2-3 times
5. NEVER assume success without verification
6. Track progress in memory to avoid loops
7. Match requested output format exactly
8. Be efficient - combine actions when possible
================================================
FILE: packages/core/src/agent/instructions/instructions.md
================================================
You are an AI agent that controls a web browser to complete tasks. You operate in an iterative loop: observe the current page state, decide on actions, execute them, and repeat until the task is done.
Your task: {{task}}
You excel at:
1. Navigating complex websites and extracting precise information
2. Automating form submissions and interactive web actions
3. Gathering and organizing information across multiple pages
4. Operating effectively in an iterative agent loop
5. Adapting strategies when encountering obstacles
- Default working language: **English**
- Always respond in the same language as the task description
At every step, your input will consist of:
1. **Agent history**: A chronological event stream including your previous actions and their results.
2. **Browser state**: Current URL, open tabs, interactive elements indexed for actions, and visible page content.
3. **Screenshot** (when vision is enabled): A screenshot of the current page with bounding boxes around interactive elements.
Browser state is given as:
- **Current URL**: The URL of the page you are currently viewing.
- **Open Tabs**: Open tabs with their IDs.
- **Interactive Elements**: All interactive elements in the format `[index]text` where:
- `index`: Numeric identifier for interaction
- `type`: HTML element type (button, input, etc.)
- `text`: Element description
Examples:
```
[33]User form
*[35]
```
Important notes:
- Only elements with numeric indexes in `[]` are interactive
- Indentation (with tab) means the element is a child of the element above
- Elements tagged with `*[` are **new** interactive elements that appeared since the last step. Your previous actions caused that change. Consider if you need to interact with them.
- Pure text elements without `[]` are not interactive
If vision is enabled, you will receive a screenshot of the current page with bounding boxes around interactive elements.
- This is your **ground truth**: use it to evaluate your progress
- If an interactive element has no text in browser_state, its index is written at the top center of its bounding box in the screenshot
- Use the screenshot action if you need more visual information
Strictly follow these rules while using the browser:
**Element Interaction:**
- Only interact with elements that have a numeric `[index]` assigned
- Only use indexes that are explicitly provided in the current browser state
- If a page changes after an action (e.g., input text triggers suggestions), analyze new elements before proceeding
**Navigation:**
- If research is needed, open a **new tab** instead of reusing the current one
- By default, only elements in the visible viewport are listed
- If the page is not fully loaded, use the wait action
**Content Extraction:**
- Use extract_content on specific pages to gather structured information from the entire page, including parts not currently visible
- Only call extract_content if the information is NOT already visible in browser_state - prefer using text directly from browser_state
- extract_content is expensive - do NOT call it multiple times with the same query on the same page
**Input Handling:**
- If you fill an input field and your action sequence is interrupted, something likely changed (e.g., suggestions appeared)
- If the action sequence was interrupted in a previous step, complete any remaining actions that were not executed
- For autocomplete/combobox fields: type your text, then WAIT for suggestions in the next step. If suggestions appear (marked with `*[`), click the correct one. If none appear, press Enter.
- After input, you may need to press Enter, click a search button, or select from a dropdown
**Filters and Criteria:**
- If the task includes specific criteria (product type, rating, price, location, etc.), ALWAYS look for filter/sort options FIRST before browsing results
**Error Recovery:**
- If a captcha appears, attempt solving it. If blocked after 3-4 steps, try alternative approaches or report the limitation
- Handle popups, modals, cookie banners, and overlays immediately before other actions
- If you encounter access denied (403), bot detection, or rate limiting, do NOT retry the same URL repeatedly - try alternatives
- Detect and break out of unproductive loops: if you are on the same URL for 3+ steps without progress, or the same action fails 2-3 times, try a different approach
**Authentication:**
- Do not log into a page unless required by the task and you have credentials
## Output Format
Respond with:
1. **currentState**: Your assessment of the current state including:
- `evaluation`: Assessment of how the last action went
- `memory`: Important information to remember (progress, data found, approaches tried)
- `nextGoal`: The next immediate goal to pursue
2. **actions**: A list of actions to execute (max {{maxActionsPerStep}} per step)
You are allowed to use a maximum of {{maxActionsPerStep}} actions per step.
Multiple actions execute sequentially (one after another).
- If the page changes after an action, remaining actions are automatically skipped and you get the new state.
- Check the browser state each step to verify your previous action achieved its goal.
{{actionDescriptions}}
You can output multiple actions in one step. Be efficient where it makes sense, but do not predict actions that do not make sense for the current page.
**Action categories:**
- **Page-changing (always last):** navigate, search_google, go_back, switch_tab - these always change the page. Remaining actions after them are skipped automatically.
- **Potentially page-changing:** click (on links/buttons that navigate) - monitored at runtime; if the page changes, remaining actions are skipped.
- **Safe to chain:** input_text, scroll, extract_content, find_elements - these do not change the page and can be freely combined.
**Recommended combinations:**
- `input_text` + `input_text` + `click` -> Fill multiple form fields then submit
- `input_text` + `send_keys` -> Fill a field and press Enter
- `scroll` + `scroll` -> Scroll further down the page
Do not try multiple different paths in one step. Always have one clear goal per step.
Place any page-changing action **last** in your action list.
You must reason systematically at every step:
1. Analyze the most recent action result - clearly state success, failure, or uncertainty. Never assume success without verification.
2. Analyze browser state, screenshot, and history to understand current position relative to the task.
3. If stuck (same actions repeated without progress), consider alternative approaches.
4. Decide what concise, actionable context should be stored in memory.
5. State your next immediate goal clearly.
You must use the `done` action when:
- You have fully completed the task
- You reach the final allowed step, even if the task is incomplete
- It is absolutely impossible to continue
Rules for `done`:
- Set `success` to `true` only if the FULL task has been completed
- If any part is missing, incomplete, or uncertain, set `success` to `false`
- Put ALL relevant findings in the `text` field
- You are ONLY allowed to call `done` as a single action - never combine it with other actions
**Before calling done with success=true, verify:**
1. Re-read the original task and list every concrete requirement
2. Check each requirement against your results (correct count, filters applied, format matched)
3. Verify actions actually completed (check page state/screenshot)
4. Ensure no data was fabricated - every fact must come from pages you visited
5. If ANY requirement is unmet or uncertain, set success to false
- When you reach 75% of your step budget, critically evaluate whether you can complete the full task in remaining steps
- If completion is unlikely, shift strategy: focus on highest-value remaining items and consolidate results
- For large multi-item tasks, estimate per-item cost from the first few items and prioritize if the task will exceed your budget
When encountering errors or unexpected states:
1. Verify the current state using screenshot as ground truth
2. Check if a popup, modal, or overlay is blocking interaction
3. If an element is not found, scroll to reveal more content
4. If an action fails repeatedly (2-3 times), try an alternative approach
5. If blocked by login/captcha/403, consider alternative sites or search engines
6. If the page structure is different than expected, re-analyze and adapt
7. If stuck in a loop, explicitly acknowledge it in memory and change strategy
8. If max_steps is approaching, prioritize completing the most important parts
**Good evaluation examples:**
- "Successfully navigated to the product page and found the target information. Verdict: Success"
- "Failed to input text into the search bar - element not visible. Verdict: Failure"
**Good memory examples:**
- "Visited 2 of 5 target websites. Collected pricing data from Amazon ($39.99) and eBay ($42.00). Still need Walmart, Target, Best Buy."
- "Search returned results but no filter applied yet. User wants items under $50 with 4+ stars. Will apply price filter first."
- "Captcha appeared twice on this site. Will try alternative approach via search engine."
**Good next goal examples:**
- "Click the 'Add to Cart' button to proceed with the purchase flow."
- "Apply price filter to narrow results to items under $50."
- "Close the popup blocking the main content."
1. ALWAYS verify action success using screenshot/browser state before proceeding
2. ALWAYS handle popups/modals/cookie banners before other actions
3. ALWAYS apply filters when the task specifies criteria
4. NEVER repeat the same failing action more than 2-3 times
5. NEVER assume success without verification
6. Track progress in memory to avoid loops
7. Match the task's requested output format exactly
8. Be efficient - combine actions when possible but verify between major steps
================================================
FILE: packages/core/src/agent/instructions.ts
================================================
import { readFileSync } from 'node:fs';
import { resolve, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
import type { AgentConfig } from './types.js';
import type { ViewportSnapshot, TabDescriptor } from '../viewport/types.js';
import type { CommandCatalog } from '../commands/catalog/catalog.js';
import type { ContentPart } from '../model/messages.js';
import { textContent, imageContent } from '../model/messages.js';
import { isNewTabPage, sanitizeSurrogates, dedent } from '../utils.js';
// ── Template types ──
export type PromptTemplate = 'default' | 'flash' | 'no-thinking';
export interface InstructionBuilderOptions {
/** Maximum actions the agent can take per step. */
commandsPerStep: number;
/** Override the entire system prompt with a custom string. */
overrideInstructionBuilder?: string;
/** Append additional instructions to the system prompt. */
extendInstructionBuilder?: string;
/** Which template variant to use. Defaults to 'default'. */
template?: PromptTemplate;
/** Whether to include sensitive-data warnings. */
hasSensitiveData?: boolean;
}
export interface StepInfo {
step: number;
stepLimit: number;
}
export interface StepPromptBuilderOptions {
browserState: ViewportSnapshot;
task: string;
stepInfo?: StepInfo;
actionDescriptions?: string;
pageFilteredActions?: string;
agentHistoryDescription?: string;
maskedValues?: string;
planDescription?: string;
screenshots?: string[];
enableScreenshots?: boolean;
maxElementsLength?: number;
}
// ── Template loading ──
/**
* Directory containing the .md system prompt templates.
* Resolved relative to this file's location so it works regardless of
* the current working directory or whether the package is installed.
*/
const TEMPLATES_DIR = resolve(dirname(fileURLToPath(import.meta.url)), 'instructions');
/** Cache loaded templates so we only hit the filesystem once per variant. */
const templateCache = new Map();
/**
* Map from PromptTemplate variant to the corresponding filename.
*/
const TEMPLATE_FILES: Record = {
default: 'instructions.md',
flash: 'instructions-compact.md',
'no-thinking': 'instructions-direct.md',
};
/**
* Load a system-prompt template from disk. Results are cached.
*
* @param variant - Which prompt template to load.
* @returns The raw template string with `{{variable}}` placeholders.
* @throws If the template file cannot be read.
*/
function loadTemplate(variant: PromptTemplate): string {
const cached = templateCache.get(variant);
if (cached !== undefined) return cached;
const filename = TEMPLATE_FILES[variant];
const filepath = resolve(TEMPLATES_DIR, filename);
try {
const content = readFileSync(filepath, 'utf-8');
templateCache.set(variant, content);
return content;
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
throw new Error(`Failed to load system prompt template "${filename}": ${message}`);
}
}
/**
* Interpolate `{{key}}` placeholders in a template string.
* Unmatched placeholders are left as-is so downstream code can detect them.
*/
function interpolate(template: string, variables: Record): string {
return template.replace(/\{\{(\w+)\}\}/g, (match, key: string) => {
return key in variables ? variables[key] : match;
});
}
/**
* Clear the template cache. Useful for testing or hot-reloading.
*/
export function clearTemplateCache(): void {
templateCache.clear();
}
// ── InstructionBuilder ──
/**
* Builds the system prompt for the browser automation agent.
*
* In the simplest case it loads a `.md` template from the `system-prompts/`
* directory and interpolates variables like `{{task}}`, `{{commandsPerStep}}`,
* and `{{actionDescriptions}}`.
*
* The class also exposes static helpers for building per-step state messages,
* action results, and other ancillary prompt fragments that are injected as
* user messages during the agent loop.
*/
export class InstructionBuilder {
private options: InstructionBuilderOptions;
private actionDescriptions: string;
constructor(options: InstructionBuilderOptions, actionDescriptions: string) {
this.options = options;
this.actionDescriptions = actionDescriptions;
}
/**
* Build and return the complete system prompt string.
*
* If `overrideInstructionBuilder` is set, it is returned verbatim (after
* optional extension). Otherwise, the appropriate `.md` template is
* loaded and interpolated with the current settings.
*/
build(): string {
if (this.options.overrideInstructionBuilder) {
let prompt = this.options.overrideInstructionBuilder;
if (this.options.extendInstructionBuilder) {
prompt += `\n${this.options.extendInstructionBuilder}`;
}
return prompt;
}
const variant = this.options.template ?? 'default';
const template = loadTemplate(variant);
const variables: Record = {
task: '(set per-step in user messages)',
commandsPerStep: String(this.options.commandsPerStep),
actionDescriptions: this.actionDescriptions,
};
let prompt = interpolate(template, variables);
if (this.options.extendInstructionBuilder) {
prompt += `\n${this.options.extendInstructionBuilder}`;
}
return prompt;
}
/**
* Convenience: create a InstructionBuilder from AgentConfig + a CommandCatalog.
* Pulls action descriptions directly from the registry, optionally
* filtered by the current page URL.
*/
static fromSettings(settings: AgentConfig, registry: CommandCatalog, pageUrl?: string): InstructionBuilder {
const descriptions = registry.getPromptDescription(pageUrl);
return new InstructionBuilder(
{
commandsPerStep: settings.commandsPerStep,
overrideInstructionBuilder: settings.overrideInstructionBuilder,
extendInstructionBuilder: settings.extendInstructionBuilder,
hasSensitiveData: settings.maskedValues !== undefined,
},
descriptions,
);
}
// ── Static prompt fragment builders ──
static buildTaskPrompt(task: string): string {
return `Your current task: ${task}`;
}
static buildStatePrompt(
url: string,
title: string,
tabs: Array<{ url: string; title: string; isActive: boolean }>,
domTree: string,
step: number,
stepLimit: number,
pixelsAbove?: number,
pixelsBelow?: number,
): string {
const parts: string[] = [];
parts.push(`[Step ${step}/${stepLimit}]`);
parts.push(`Current URL: ${url}`);
parts.push(`Page Title: ${title}`);
if (tabs.length > 1) {
const tabList = tabs
.map((t, i) => ` [${i}] ${t.isActive ? '(active) ' : ''}${t.title} - ${t.url}`)
.join('\n');
parts.push(`Open Tabs:\n${tabList}`);
}
if (pixelsAbove !== undefined && pixelsAbove > 0) {
parts.push(`Scroll position: ${pixelsAbove}px from top`);
}
if (pixelsBelow !== undefined && pixelsBelow > 0) {
parts.push(`${pixelsBelow}px of content below the visible area`);
}
parts.push(`\nPage content:\n${domTree}`);
return parts.join('\n');
}
static buildCommandResultPrompt(results: Array<{ action: string; result: string }>): string {
if (results.length === 0) return '';
const formatted = results
.map((r) => `Action: ${r.action}\nResult: ${r.result}`)
.join('\n---\n');
return `Previous action results:\n${formatted}`;
}
static buildLoopNudge(message: string): string {
return `\nIMPORTANT: ${message}`;
}
static buildPlanPrompt(currentPlan: string): string {
return `\nCurrent plan:\n${currentPlan}`;
}
}
// ── StepPromptBuilder ──
/**
* Constructs the per-step user message for the agent.
*
* Each step of the agent loop sends a user message containing:
* - The current browser state (URL, tabs, interactive elements)
* - Scroll position and page boundaries
* - Agent history summary
* - Step information (step N of M)
* - Optionally: screenshots, sensitive data warnings, plan description
* - Optionally: page-specific action descriptions
*
* The message can be returned as a plain string or as a multipart content
* array (text + images) when vision is enabled.
*/
export class StepPromptBuilder {
private browserState: ViewportSnapshot;
private task: string;
private stepInfo?: StepInfo;
private actionDescriptions?: string;
private pageFilteredActions?: string;
private agentHistoryDescription?: string;
private maskedValues?: string;
private planDescription?: string;
private screenshots: string[];
private enableScreenshots: boolean;
private maxElementsLength: number;
constructor(options: StepPromptBuilderOptions) {
this.browserState = options.browserState;
this.task = options.task;
this.stepInfo = options.stepInfo;
this.actionDescriptions = options.actionDescriptions;
this.pageFilteredActions = options.pageFilteredActions;
this.agentHistoryDescription = options.agentHistoryDescription;
this.maskedValues = options.maskedValues;
this.planDescription = options.planDescription;
this.screenshots = options.screenshots ?? [];
this.enableScreenshots = options.enableScreenshots ?? false;
this.maxElementsLength = options.maxElementsLength ?? 40_000;
}
/**
* Build the user message content.
*
* When vision is disabled (or no screenshots are available), returns a
* single string. When vision is enabled and screenshots exist, returns
* a `ContentPart[]` array interleaving text and image parts.
*/
getUserMessage(): string | ContentPart[] {
// Skip screenshots on step 0 for new-tab pages with a single tab
let effectiveVision = this.enableScreenshots;
if (
isNewTabPage(this.browserState.url) &&
this.stepInfo?.step === 0 &&
this.browserState.tabs.length <= 1
) {
effectiveVision = false;
}
const stateDescription = this.buildStateDescription();
if (effectiveVision && this.screenshots.length > 0) {
const parts: ContentPart[] = [textContent(stateDescription)];
for (let i = 0; i < this.screenshots.length; i++) {
const label =
i === this.screenshots.length - 1 ? 'Current screenshot:' : 'Previous screenshot:';
parts.push(textContent(label));
parts.push(imageContent(this.screenshots[i], 'image/png'));
}
return parts;
}
return stateDescription;
}
/**
* Build the complete text description of the current state.
* This includes agent history, agent state (task, step info, plan),
* and browser state (URL, tabs, elements, scroll position).
*/
private buildStateDescription(): string {
const sections: string[] = [];
// Agent history
sections.push(this.buildAgentHistorySection());
// Agent state (task, step info, plan, sensitive data)
sections.push(this.buildAgentStateSection());
// Browser state (URL, tabs, elements)
sections.push(this.buildBrowserStateSection());
// Page-specific actions (if any domain-filtered actions apply)
if (this.pageFilteredActions) {
sections.push(
`\n${this.pageFilteredActions}\n`,
);
}
// Sanitize surrogates to prevent JSON serialization issues
return sanitizeSurrogates(sections.join('\n\n'));
}
private buildAgentHistorySection(): string {
const history = this.agentHistoryDescription?.trim() ?? '';
return `\n${history}\n`;
}
private buildAgentStateSection(): string {
const parts: string[] = [];
parts.push(`\n${this.task}\n`);
if (this.planDescription) {
parts.push(`\n${this.planDescription}\n`);
}
if (this.maskedValues) {
parts.push(`${this.maskedValues}`);
}
if (this.stepInfo) {
const today = new Date().toISOString().slice(0, 10);
parts.push(
`Step ${this.stepInfo.step + 1} of ${this.stepInfo.stepLimit} | Today: ${today}`,
);
}
return `\n${parts.join('\n')}\n`;
}
private buildBrowserStateSection(): string {
const parts: string[] = [];
// Tabs
const tabsText = this.buildTabsText();
if (tabsText) {
parts.push(tabsText);
}
// Scroll / page info
const pageInfo = this.buildPageInfoText();
if (pageInfo) {
parts.push(pageInfo);
}
// Interactive elements
parts.push(this.buildElementsText());
return `\n${parts.join('\n')}\n`;
}
private buildTabsText(): string {
const { tabs, url, title } = this.browserState;
if (tabs.length === 0) return '';
// Try to identify the current tab
const currentCandidates = tabs.filter((t) => t.url === url && t.title === title);
const currentTabId =
currentCandidates.length === 1 ? currentCandidates[0].tabId : undefined;
const lines: string[] = [];
if (currentTabId) {
lines.push(`Current tab: ${String(currentTabId).slice(-4)}`);
}
lines.push('Available tabs:');
for (const tab of tabs) {
lines.push(`Tab ${String(tab.tabId).slice(-4)}: ${tab.url} - ${tab.title.slice(0, 30)}`);
}
return lines.join('\n');
}
private buildPageInfoText(): string {
const { pixelsAbove, pixelsBelow } = this.browserState;
const parts: string[] = [];
if (pixelsAbove !== undefined && pixelsAbove > 0) {
// Estimate "pages above" assuming ~900px viewport height
const pagesAbove = (pixelsAbove / 900).toFixed(1);
parts.push(`${pagesAbove} pages above`);
}
if (pixelsBelow !== undefined && pixelsBelow > 0) {
const pagesBelow = (pixelsBelow / 900).toFixed(1);
parts.push(`${pagesBelow} pages below`);
}
if (parts.length === 0) return '';
return `${parts.join(', ')}`;
}
private buildElementsText(): string {
let elementsText = this.browserState.domTree ?? '';
if (!elementsText) {
return 'Interactive elements:\nempty page';
}
// Truncate if too long
let truncatedNote = '';
if (elementsText.length > this.maxElementsLength) {
elementsText = elementsText.slice(0, this.maxElementsLength);
truncatedNote = ` (truncated to ${this.maxElementsLength} characters)`;
}
// Add start/end of page markers based on scroll position
const hasContentAbove =
this.browserState.pixelsAbove !== undefined && this.browserState.pixelsAbove > 0;
const hasContentBelow =
this.browserState.pixelsBelow !== undefined && this.browserState.pixelsBelow > 0;
if (!hasContentAbove) {
elementsText = `[Start of page]\n${elementsText}`;
}
if (!hasContentBelow) {
elementsText = `${elementsText}\n[End of page]`;
}
return `Interactive elements${truncatedNote}:\n${elementsText}`;
}
}
// ── Dynamic action descriptions ──
/**
* Build action descriptions from a registry, optionally filtered by
* the current page URL. Returns a formatted string suitable for
* injection into the system prompt's `{{actionDescriptions}}` slot.
*/
export function buildCommandDescriptions(registry: CommandCatalog, pageUrl?: string): string {
return registry.getPromptDescription(pageUrl);
}
/**
* Build a description of actions that are specific to the current page's domain.
* Returns `undefined` if there are no domain-specific actions beyond the
* universal set.
*
* This is injected as a `` section in the per-step
* user message when the page URL triggers extra actions.
*/
export function buildContextualCommands(registry: CommandCatalog, pageUrl: string): string | undefined {
const allActions = registry.getAll();
const domainActions = registry.getActionsForDomain(extractDomain(pageUrl));
// If all actions are already shown (no domain filtering), nothing extra to show
if (domainActions.length === allActions.length) return undefined;
// Find domain-specific actions (ones that have a domainFilter)
const extraActions = domainActions.filter(
(a) => a.domainFilter && a.domainFilter.length > 0,
);
if (extraActions.length === 0) return undefined;
const lines = extraActions.map(
(a) => `- ${a.name}: ${a.description}`,
);
return `The following actions are available on this page:\n${lines.join('\n')}`;
}
// ── Rerun / extraction prompt helpers ──
/**
* Build a system prompt for the extraction/AI-step action used during reruns.
*/
export function buildExtractionInstructionBuilder(): string {
return dedent(`
You are an expert at extracting data from webpages.
You will be given:
1. A query describing what to extract
2. The markdown of the webpage (filtered to remove noise)
3. Optionally, a screenshot of the current page state
Instructions:
- Extract information from the webpage that is relevant to the query
- ONLY use the information available in the webpage - do not make up information
- If the information is not available, mention that clearly
- If the query asks for all items, list all of them
Output:
- Present ALL relevant information in a concise way
- Do not use conversational format - directly output the relevant information
- If information is unavailable, state that clearly
`);
}
/**
* Build a user prompt for the extraction/AI-step action.
*/
export function buildExtractionUserPrompt(
query: string,
statsSummary: string,
content: string,
): string {
return [
`\n${query}\n`,
`\n${statsSummary}\n`,
`\n${content}\n`,
].join('\n\n');
}
// ── Helpers ──
function extractDomain(url: string): string {
try {
return new URL(url).hostname.replace(/^www\./, '').toLowerCase();
} catch {
return '';
}
}
================================================
FILE: packages/core/src/agent/replay-recorder.ts
================================================
import * as fs from 'node:fs';
import * as path from 'node:path';
import { createLogger } from '../logging.js';
const logger = createLogger('gif-recorder');
export interface ReplayRecorderOptions {
/** Output file path. Extension determines format (.gif or .png for fallback). */
outputPath: string;
/** Delay between frames in milliseconds */
frameDelay?: number;
/** Resize frames to this width (maintains aspect ratio). 0 = no resize. */
resizeWidth?: number;
/** Quality (1-30, lower = better quality). Only used for GIF encoding. */
quality?: number;
}
interface FrameData {
buffer: Buffer;
stepNumber: number;
label?: string;
}
/**
* Records agent screenshots and encodes them into an animated GIF.
*
* Uses the `sharp` library (optional dependency) for image processing
* and compositing step-number overlays. If sharp is not available,
* falls back to saving individual PNG frames.
*
* Usage:
* const recorder = new ReplayRecorder({ outputPath: './recording.gif' });
* recorder.addFrame(screenshotBase64, 1);
* // ... more frames ...
* await recorder.save(); // -> path to GIF or frames directory
*/
export class ReplayRecorder {
private frames: FrameData[] = [];
private outputPath: string;
private frameDelay: number;
private resizeWidth: number;
private quality: number;
constructor(options: ReplayRecorderOptions) {
this.outputPath = options.outputPath;
this.frameDelay = options.frameDelay ?? 500;
this.resizeWidth = options.resizeWidth ?? 800;
this.quality = options.quality ?? 10;
}
/**
* Add a screenshot frame to the recording.
* @param screenshotBase64 - PNG screenshot as base64 string
* @param stepNumber - Step number for the overlay annotation
* @param label - Optional label text (e.g., the action taken)
*/
addFrame(screenshotBase64: string, stepNumber?: number, label?: string): void {
const buffer = Buffer.from(screenshotBase64, 'base64');
this.frames.push({
buffer,
stepNumber: stepNumber ?? this.frames.length + 1,
label,
});
}
/**
* Save the recording. Attempts GIF encoding with sharp, falls back
* to individual PNG frames if sharp is not available.
*
* @param generateGif - true to generate a GIF, 'path' to override output path,
* false to only save individual frames
* @returns The path where the recording was saved
*/
async save(generateGif: string | boolean = true): Promise {
if (this.frames.length === 0) {
logger.debug('No frames to save');
return this.outputPath;
}
const effectivePath = typeof generateGif === 'string' ? generateGif : this.outputPath;
const dir = path.dirname(effectivePath);
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true });
}
// Always save individual frames as fallback / debug
await this.saveFrames(effectivePath);
if (generateGif === false) {
return effectivePath;
}
// Try to generate actual GIF using sharp
try {
const gifPath = await this.encodeGif(effectivePath);
logger.info(`GIF saved: ${gifPath} (${this.frames.length} frames)`);
return gifPath;
} catch (error) {
logger.warn(
`GIF encoding failed, falling back to individual frames: ${
error instanceof Error ? error.message : String(error)
}`,
);
return effectivePath;
}
}
/**
* Encode frames into an animated GIF using sharp.
* Sharp must be installed as a peer dependency.
*/
private async encodeGif(outputPath: string): Promise {
// Dynamic import -- sharp is an optional dependency.
// Use indirect require to avoid TS module resolution error.
// eslint-disable-next-line @typescript-eslint/no-explicit-any
let sharpModule: any;
try {
// Indirect dynamic import avoids TS2307 for optional peer deps
const moduleName = 'sharp';
sharpModule = await import(/* webpackIgnore: true */ moduleName);
} catch {
throw new Error(
'sharp is not installed. Install it with: npm install sharp',
);
}
// Resolve the default export (handles both ESM and CJS)
const sharp = sharpModule.default ?? sharpModule;
const gifPath = outputPath.replace(/\.[^.]+$/, '.gif');
const processedFrames: Buffer[] = [];
for (const frame of this.frames) {
let img = sharp(frame.buffer);
// Resize if configured
if (this.resizeWidth > 0) {
img = img.resize(this.resizeWidth, undefined, {
fit: 'inside',
withoutEnlargement: true,
});
}
// Composite a step number overlay onto the frame
const overlayBuffer = this.createStepOverlaySvg(
frame.stepNumber,
frame.label,
);
img = img.composite([
{
input: Buffer.from(overlayBuffer),
gravity: 'northwest',
},
]);
// Convert to PNG for further processing
const processed = await img
.flatten({ background: { r: 255, g: 255, b: 255 } })
.png()
.toBuffer();
processedFrames.push(processed);
}
// Attempt to assemble an animated GIF from the processed frames
try {
const firstFrame = sharp(processedFrames[0]);
const metadata = await firstFrame.metadata();
const width = metadata.width ?? this.resizeWidth;
const height = metadata.height ?? 600;
// Convert each frame to raw RGBA
const rawFrames: Buffer[] = [];
for (const frameBuffer of processedFrames) {
const raw = await sharp(frameBuffer)
.resize(width, height, {
fit: 'contain',
background: { r: 255, g: 255, b: 255 },
})
.raw()
.ensureAlpha()
.toBuffer();
rawFrames.push(raw);
}
// Concatenate all raw frames and encode as animated GIF
const combinedRaw = Buffer.concat(rawFrames);
await sharp(combinedRaw, {
raw: {
width,
height,
channels: 4,
pages: rawFrames.length,
},
})
.gif({
delay: Array(rawFrames.length).fill(this.frameDelay),
loop: 0,
})
.toFile(gifPath);
return gifPath;
} catch (animatedError) {
// If animated GIF creation fails, save the last frame as a static image
logger.debug(
`Animated GIF assembly failed, saving static image: ${
animatedError instanceof Error
? animatedError.message
: String(animatedError)
}`,
);
const lastFrame = processedFrames[processedFrames.length - 1];
const staticPath = outputPath.replace(/\.[^.]+$/, '.png');
await sharp(lastFrame).png().toFile(staticPath);
return staticPath;
}
}
/**
* Create an SVG overlay with the step number and optional label.
* Returns an SVG string that can be composited onto the frame.
*/
private createStepOverlaySvg(stepNumber: number, label?: string): string {
const labelText = label ? ` - ${label.slice(0, 40)}` : '';
const text = `Step ${stepNumber}${labelText}`;
const width = Math.max(200, text.length * 10 + 20);
const height = 36;
return ``;
}
/**
* Save individual PNG frames to a directory alongside the output path.
*/
private async saveFrames(outputPath: string): Promise {
const framesDir = outputPath.replace(/\.[^.]+$/, '_frames');
if (!fs.existsSync(framesDir)) {
fs.mkdirSync(framesDir, { recursive: true });
}
for (let i = 0; i < this.frames.length; i++) {
const frame = this.frames[i];
const framePath = path.join(
framesDir,
`frame_${frame.stepNumber.toString().padStart(4, '0')}.png`,
);
fs.writeFileSync(framePath, frame.buffer);
}
// Also save the last frame as the preview image
if (this.frames.length > 0) {
const lastFrame = this.frames[this.frames.length - 1];
const previewPath = outputPath.replace(/\.[^.]+$/, '_preview.png');
fs.writeFileSync(previewPath, lastFrame.buffer);
}
logger.debug(`Saved ${this.frames.length} frames to ${framesDir}`);
return framesDir;
}
/** Escape XML special characters for SVG text content */
private escapeXml(text: string): string {
return text
.replace(/&/g, '&')
.replace(//g, '>')
.replace(/"/g, '"')
.replace(/'/g, ''');
}
get frameCount(): number {
return this.frames.length;
}
clear(): void {
this.frames = [];
}
}
================================================
FILE: packages/core/src/agent/stall-detector.test.ts
================================================
import { test, expect, describe, beforeEach } from 'bun:test';
import {
StallDetector,
hashPageTree,
hashTextContent,
type PageSignature,
} from './stall-detector.js';
import type { Command } from '../commands/types.js';
// ── Helpers ──
function clickAction(index: number): Command {
return { action: 'tap', index, clickCount: 1 };
}
function inputAction(index: number, text: string): Command {
return { action: 'type_text', index, text, clearFirst: true };
}
function navigateAction(url: string): Command {
return { action: 'navigate', url };
}
function scrollAction(direction: 'up' | 'down', index?: number): Command {
return { action: 'scroll', direction, index };
}
function doneAction(text: string): Command {
return { action: 'finish', text, success: true };
}
function searchGoogleAction(query: string): Command {
return { action: 'web_search', query };
}
function makeFingerprint(overrides: Partial = {}): PageSignature {
return {
url: 'https://example.com',
domHash: 'abc123',
scrollY: 0,
elementCount: 50,
textHash: 'texthash1',
...overrides,
};
}
// ── Tests ──
describe('StallDetector', () => {
let detector: StallDetector;
beforeEach(() => {
detector = new StallDetector();
});
describe('initial state', () => {
test('isStuck returns not stuck when no actions recorded', () => {
const result = detector.isStuck();
expect(result.stuck).toBe(false);
expect(result.severity).toBe(0);
});
test('getTotalRepetitions returns 0 initially', () => {
expect(detector.getTotalRepetitions()).toBe(0);
});
test('getLoopNudgeMessage returns empty string when not stuck', () => {
expect(detector.getLoopNudgeMessage()).toBe('');
});
});
describe('recordAction and repeated action detection', () => {
test('does not flag non-repeated actions', () => {
detector.recordAction([clickAction(1)]);
detector.recordAction([clickAction(2)]);
detector.recordAction([clickAction(3)]);
const result = detector.isStuck();
expect(result.stuck).toBe(false);
});
test('flags the same action repeated maxRepeatedActions times (default 3)', () => {
detector.recordAction([clickAction(5)]);
detector.recordAction([clickAction(5)]);
detector.recordAction([clickAction(5)]);
const result = detector.isStuck();
expect(result.stuck).toBe(true);
expect(result.reason).toContain('repeated');
expect(result.reason).toContain('3');
});
test('flags repeated multi-action steps', () => {
const actions: Command[] = [clickAction(1), inputAction(2, 'hello')];
detector.recordAction(actions);
detector.recordAction(actions);
detector.recordAction(actions);
const result = detector.isStuck();
expect(result.stuck).toBe(true);
});
test('does not flag when only two repeated actions (below threshold)', () => {
detector.recordAction([clickAction(5)]);
detector.recordAction([clickAction(5)]);
const result = detector.isStuck();
expect(result.stuck).toBe(false);
});
test('custom maxRepeatedActions threshold', () => {
// With maxRepeatedActions=5, only 5+ trailing repeats should trigger.
// Note: cycle detection (A->B->A->B) fires with 4 identical actions
// because all 4 being the same matches the pattern. So we can only test
// that at exactly 3 trailing repeats (below our custom threshold of 5,
// and below the cycle check threshold of 4 identical entries), it's not stuck.
const custom = new StallDetector({ maxRepeatedActions: 5 });
custom.recordAction([clickAction(10)]); // prefix to avoid cycle match
custom.recordAction([clickAction(1)]);
custom.recordAction([clickAction(1)]);
custom.recordAction([clickAction(1)]);
// 3 trailing repeats < 5 threshold, and cycle check sees [10,1,1,1] which is not A->B->A->B
expect(custom.isStuck().stuck).toBe(false);
// Add two more to reach 5 trailing repeats
custom.recordAction([clickAction(1)]);
custom.recordAction([clickAction(1)]);
expect(custom.isStuck().stuck).toBe(true);
});
});
describe('action cycle detection (A -> B -> A -> B)', () => {
test('detects alternating two-action cycle', () => {
detector.recordAction([clickAction(1)]);
detector.recordAction([clickAction(2)]);
detector.recordAction([clickAction(1)]);
detector.recordAction([clickAction(2)]);
const result = detector.isStuck();
expect(result.stuck).toBe(true);
expect(result.reason).toContain('cycle');
});
test('does not falsely detect A -> B -> A -> C as a cycle', () => {
detector.recordAction([clickAction(1)]);
detector.recordAction([clickAction(2)]);
detector.recordAction([clickAction(1)]);
detector.recordAction([clickAction(3)]);
const result = detector.isStuck();
expect(result.stuck).toBe(false);
});
});
describe('triple cycle detection (A -> B -> C -> A -> B -> C)', () => {
test('detects 3-step cycle', () => {
detector.recordAction([clickAction(1)]);
detector.recordAction([clickAction(2)]);
detector.recordAction([clickAction(3)]);
detector.recordAction([clickAction(1)]);
detector.recordAction([clickAction(2)]);
detector.recordAction([clickAction(3)]);
const result = detector.isStuck();
expect(result.stuck).toBe(true);
expect(result.reason).toContain('3-step');
});
test('does not detect partial triple cycle', () => {
detector.recordAction([clickAction(1)]);
detector.recordAction([clickAction(2)]);
detector.recordAction([clickAction(3)]);
detector.recordAction([clickAction(1)]);
detector.recordAction([clickAction(2)]);
// Only 5 entries, needs 6 for triple check
const result = detector.isStuck();
expect(result.stuck).toBe(false);
});
});
describe('fingerprint-based stuck detection', () => {
test('detects repeated page fingerprints', () => {
const fp = makeFingerprint();
detector.recordFingerprint(fp);
detector.recordFingerprint(fp);
detector.recordFingerprint(fp);
const result = detector.isStuck();
expect(result.stuck).toBe(true);
expect(result.reason).toContain('Page state unchanged');
});
test('different fingerprints do not trigger stuck', () => {
detector.recordFingerprint(makeFingerprint({ domHash: 'hash1' }));
detector.recordFingerprint(makeFingerprint({ domHash: 'hash2' }));
detector.recordFingerprint(makeFingerprint({ domHash: 'hash3' }));
const result = detector.isStuck();
expect(result.stuck).toBe(false);
});
test('scroll position bucketed (200px buckets) - same bucket triggers stuck', () => {
// scrollY 0 and 100 are in the same bucket (both floor to 0)
detector.recordFingerprint(makeFingerprint({ scrollY: 0 }));
detector.recordFingerprint(makeFingerprint({ scrollY: 50 }));
detector.recordFingerprint(makeFingerprint({ scrollY: 100 }));
const result = detector.isStuck();
expect(result.stuck).toBe(true);
});
test('different scroll buckets not considered stuck', () => {
detector.recordFingerprint(makeFingerprint({ scrollY: 0 }));
detector.recordFingerprint(makeFingerprint({ scrollY: 200 }));
detector.recordFingerprint(makeFingerprint({ scrollY: 400 }));
const result = detector.isStuck();
expect(result.stuck).toBe(false);
});
test('custom maxRepeatedFingerprints threshold', () => {
const custom = new StallDetector({ maxRepeatedFingerprints: 5 });
const fp = makeFingerprint();
for (let i = 0; i < 4; i++) {
custom.recordFingerprint(fp);
}
expect(custom.isStuck().stuck).toBe(false);
custom.recordFingerprint(fp);
expect(custom.isStuck().stuck).toBe(true);
});
});
describe('consecutive stagnant pages detection', () => {
test('detects stagnant pages with same URL and similar element count', () => {
const detector5 = new StallDetector({ maxStagnantPages: 5 });
for (let i = 0; i < 5; i++) {
// Different domHash/scrollY so fingerprint hashing is distinct,
// but same URL and elementCount triggers stagnant detection.
detector5.recordFingerprint(
makeFingerprint({
domHash: `hash_${i}`,
scrollY: i * 200,
elementCount: 50,
}),
);
}
const result = detector5.isStuck();
expect(result.stuck).toBe(true);
expect(result.reason).toContain('stagnant');
});
test('different URLs do not trigger stagnant detection', () => {
for (let i = 0; i < 5; i++) {
detector.recordFingerprint(
makeFingerprint({
url: `https://example.com/page${i}`,
domHash: `hash_${i}`,
scrollY: i * 200,
elementCount: 50,
}),
);
}
const result = detector.isStuck();
expect(result.stuck).toBe(false);
});
});
describe('escalating nudge messages', () => {
test('severity 0 for repetitions below 5', () => {
// 3 repetitions -> gets flagged as stuck but severity 0
for (let i = 0; i < 3; i++) {
detector.recordAction([clickAction(1)]);
}
const result = detector.isStuck();
expect(result.stuck).toBe(true);
expect(result.severity).toBe(0);
});
test('severity 1 at 5+ total repetitions via cycle detection', () => {
// Cycle detection path uses getSeverity(this.totalRepetitions)
// so accumulating enough totalRepetitions can reach severity 1.
const det = new StallDetector({ maxRepeatedActions: 3 });
// First: accumulate 3 via repeated actions
for (let i = 0; i < 3; i++) {
det.recordAction([clickAction(1)]);
}
det.isStuck(); // totalRepetitions += 3
// Break the trailing sequence, then trigger a 2-cycle
det.recordAction([clickAction(10)]);
// A->B->A->B cycle adds 2 to totalRepetitions -> total 5
det.recordAction([clickAction(20)]);
det.recordAction([clickAction(10)]);
det.recordAction([clickAction(20)]);
const result = det.isStuck();
expect(result.stuck).toBe(true);
// totalRepetitions = 3 + 2 = 5, getSeverity(5) = 1
expect(result.severity).toBe(1);
});
test('nudge message contains appropriate text', () => {
for (let i = 0; i < 3; i++) {
detector.recordAction([clickAction(1)]);
}
const msg = detector.getLoopNudgeMessage();
expect(msg).toContain('Warning:');
expect(msg.length).toBeGreaterThan(0);
});
});
describe('action hash normalization', () => {
test('click actions normalized by index only', () => {
// Two click actions with same index but different click counts
// should both normalize to "click:5"
const d1 = new StallDetector();
const d2 = new StallDetector();
const act1: Command = { action: 'tap', index: 5, clickCount: 1 };
const act2: Command = { action: 'tap', index: 5, clickCount: 2 };
// Record 3 of each in separate detectors
for (let i = 0; i < 3; i++) {
d1.recordAction([act1]);
d2.recordAction([act2]);
}
// Both should detect as stuck since click is normalized by index
expect(d1.isStuck().stuck).toBe(true);
expect(d2.isStuck().stuck).toBe(true);
});
test('search queries normalized for order independence', () => {
// "best pizza NYC" and "NYC best pizza" should produce same hash
const d = new StallDetector();
d.recordAction([searchGoogleAction('best pizza NYC')]);
d.recordAction([searchGoogleAction('NYC best pizza')]);
d.recordAction([searchGoogleAction('pizza best NYC')]);
expect(d.isStuck().stuck).toBe(true);
});
test('different navigate URLs not considered same action', () => {
detector.recordAction([navigateAction('https://a.com')]);
detector.recordAction([navigateAction('https://b.com')]);
detector.recordAction([navigateAction('https://c.com')]);
expect(detector.isStuck().stuck).toBe(false);
});
test('scroll actions include direction and index', () => {
// Same direction, same index -> stuck
for (let i = 0; i < 3; i++) {
detector.recordAction([scrollAction('down', 1)]);
}
expect(detector.isStuck().stuck).toBe(true);
});
test('done actions include text prefix', () => {
detector.recordAction([doneAction('Task completed successfully')]);
detector.recordAction([doneAction('Task completed successfully')]);
detector.recordAction([doneAction('Task completed successfully')]);
expect(detector.isStuck().stuck).toBe(true);
});
});
describe('reset', () => {
test('clears all history and repetitions', () => {
for (let i = 0; i < 3; i++) {
detector.recordAction([clickAction(1)]);
detector.recordFingerprint(makeFingerprint());
}
expect(detector.isStuck().stuck).toBe(true);
detector.reset();
expect(detector.isStuck().stuck).toBe(false);
expect(detector.getTotalRepetitions()).toBe(0);
expect(detector.getLoopNudgeMessage()).toBe('');
});
});
describe('window size pruning', () => {
test('keeps action history within bounds', () => {
const smallWindow = new StallDetector({ windowSize: 5 });
// Record 15 unique actions, then 3 repeated
for (let i = 0; i < 15; i++) {
smallWindow.recordAction([clickAction(i)]);
}
// Now repeat same action 3 times
for (let i = 0; i < 3; i++) {
smallWindow.recordAction([clickAction(99)]);
}
// Should still detect the repetition
expect(smallWindow.isStuck().stuck).toBe(true);
});
});
});
describe('hashPageTree', () => {
test('produces consistent hash for same input', () => {
const hash1 = hashPageTree('hello
');
const hash2 = hashPageTree('hello
');
expect(hash1).toBe(hash2);
});
test('produces different hash for different input', () => {
const hash1 = hashPageTree('hello
');
const hash2 = hashPageTree('world
');
expect(hash1).not.toBe(hash2);
});
test('returns a base-36 string', () => {
const hash = hashPageTree('some content');
expect(typeof hash).toBe('string');
// Base-36 characters: 0-9, a-z, and optional leading minus
expect(hash).toMatch(/^-?[0-9a-z]+$/);
});
test('handles empty string', () => {
const hash = hashPageTree('');
expect(hash).toBe('0');
});
});
describe('hashTextContent', () => {
test('produces consistent hash for same input', () => {
const hash1 = hashTextContent('Hello World');
const hash2 = hashTextContent('Hello World');
expect(hash1).toBe(hash2);
});
test('normalizes case: same hash for different casing', () => {
const hash1 = hashTextContent('Hello World');
const hash2 = hashTextContent('hello world');
expect(hash1).toBe(hash2);
});
test('normalizes whitespace: collapses multiple spaces', () => {
const hash1 = hashTextContent('hello world');
const hash2 = hashTextContent('hello world');
expect(hash1).toBe(hash2);
});
test('removes punctuation for content-based matching', () => {
const hash1 = hashTextContent('hello, world!');
const hash2 = hashTextContent('hello world');
expect(hash1).toBe(hash2);
});
test('handles empty string', () => {
const hash = hashTextContent('');
expect(hash).toBe('0');
});
});
================================================
FILE: packages/core/src/agent/stall-detector.ts
================================================
import type { Command } from '../commands/types.js';
// ── Enhanced Page Fingerprint ──
export interface PageSignature {
url: string;
domHash: string;
scrollY: number;
elementCount?: number;
textHash?: string;
}
export interface StallDetectorConfig {
maxRepeatedActions: number;
maxRepeatedFingerprints: number;
windowSize: number;
/** Number of consecutive stagnant pages before raising stall alert */
maxStagnantPages: number;
}
const DEFAULT_OPTIONS: StallDetectorConfig = {
maxRepeatedActions: 3,
maxRepeatedFingerprints: 3,
windowSize: 10,
maxStagnantPages: 5,
};
export interface StallCheckResult {
stuck: boolean;
reason?: string;
/** Escalation level: 0 = not stuck, 1 = mild, 2 = moderate, 3 = severe */
severity: number;
}
/**
* Nudge messages that escalate in urgency as repetitions increase.
* Thresholds: 5 repetitions = mild, 8 = moderate, 12 = severe.
*/
const ESCALATING_NUDGES = [
{
threshold: 5,
severity: 1,
message:
'You seem to be repeating similar actions. Consider trying a different approach:\n' +
'- Click a different element\n' +
'- Try an alternative navigation path\n' +
'- Use search to find what you need',
},
{
threshold: 8,
severity: 2,
message:
'WARNING: You are stuck in a loop and have been repeating actions. You MUST change your approach:\n' +
'- Navigate to a completely different page\n' +
'- Try a fundamentally different strategy\n' +
'- If the current approach is not working, consider using the done action to report the issue',
},
{
threshold: 12,
severity: 3,
message:
'CRITICAL: You have been stuck for many steps. This approach is NOT working.\n' +
'You MUST either:\n' +
'1. Use the done action to report that the task cannot be completed with your current approach\n' +
'2. Navigate to a completely different website or page\n' +
'3. Try a radically different interaction method\n' +
'Do NOT repeat the same actions again.',
},
];
export class StallDetector {
private actionHistory: string[] = [];
private fingerprintHistory: PageSignature[] = [];
private fingerprintHashes: string[] = [];
private options: StallDetectorConfig;
private totalRepetitions = 0;
constructor(options?: Partial) {
this.options = { ...DEFAULT_OPTIONS, ...options };
}
recordAction(actions: Command[]): void {
const key = this.normalizeActionHash(actions);
this.actionHistory.push(key);
// Keep only the window
if (this.actionHistory.length > this.options.windowSize * 2) {
this.actionHistory = this.actionHistory.slice(-this.options.windowSize * 2);
}
}
recordFingerprint(fingerprint: PageSignature): void {
this.fingerprintHistory.push(fingerprint);
const hash = this.hashFingerprint(fingerprint);
this.fingerprintHashes.push(hash);
if (this.fingerprintHistory.length > this.options.windowSize * 2) {
this.fingerprintHistory = this.fingerprintHistory.slice(-this.options.windowSize * 2);
this.fingerprintHashes = this.fingerprintHashes.slice(-this.options.windowSize * 2);
}
}
isStuck(): StallCheckResult {
// Check for repeated actions
const actionRepetitions = this.countTrailingRepetitions(this.actionHistory);
if (actionRepetitions >= this.options.maxRepeatedActions) {
this.totalRepetitions += actionRepetitions;
const severity = this.getSeverity(actionRepetitions);
return {
stuck: true,
reason: `Same action repeated ${actionRepetitions} times`,
severity,
};
}
// Check for action cycle (A -> B -> A -> B)
if (this.actionHistory.length >= 4) {
const last4 = this.actionHistory.slice(-4);
if (last4[0] === last4[2] && last4[1] === last4[3]) {
this.totalRepetitions += 2;
return {
stuck: true,
reason: 'Detected action cycle (alternating between two actions)',
severity: this.getSeverity(this.totalRepetitions),
};
}
}
// Check for triple cycle (A -> B -> C -> A -> B -> C)
if (this.actionHistory.length >= 6) {
const last6 = this.actionHistory.slice(-6);
if (
last6[0] === last6[3] &&
last6[1] === last6[4] &&
last6[2] === last6[5]
) {
this.totalRepetitions += 3;
return {
stuck: true,
reason: 'Detected 3-step action cycle',
severity: this.getSeverity(this.totalRepetitions),
};
}
}
// Check for repeated fingerprints (same page state)
const fpRepetitions = this.countTrailingRepetitions(this.fingerprintHashes);
if (fpRepetitions >= this.options.maxRepeatedFingerprints) {
this.totalRepetitions += fpRepetitions;
return {
stuck: true,
reason: `Page state unchanged for ${fpRepetitions} steps`,
severity: this.getSeverity(fpRepetitions),
};
}
// Check for consecutive stagnant pages (URL + elementCount unchanged)
const stagnantCount = this.countConsecutiveStagnantPages();
if (stagnantCount >= this.options.maxStagnantPages) {
this.totalRepetitions += stagnantCount;
return {
stuck: true,
reason: `Page appears stagnant for ${stagnantCount} consecutive steps (same URL and element structure)`,
severity: this.getSeverity(stagnantCount),
};
}
return { stuck: false, severity: 0 };
}
getLoopNudgeMessage(): string {
const result = this.isStuck();
if (!result.stuck) {
return '';
}
// Find the appropriate escalating nudge
const nudge = this.getEscalatingNudge();
return `Warning: ${result.reason ?? 'You appear to be stuck'}.\n${nudge}`;
}
/** Get total number of detected repetitions across the session */
getTotalRepetitions(): number {
return this.totalRepetitions;
}
reset(): void {
this.actionHistory = [];
this.fingerprintHistory = [];
this.fingerprintHashes = [];
this.totalRepetitions = 0;
}
// ── Private helpers ──
/**
* Normalize action hash for better deduplication:
* - Sort search token strings for order-independent matching
* - Use element index (not full params) for click actions
* - Use URL (not full params) for navigate actions
*/
private normalizeActionHash(actions: Command[]): string {
const normalized = actions.map((action) => {
switch (action.action) {
case 'tap':
// Normalize click: use index as the primary key, ignore transient params
return `click:${action.index}`;
case 'type_text':
return `input_text:${action.index}:${action.text}`;
case 'navigate':
// Normalize: just the URL
return `go_to_url:${action.url}`;
case 'web_search':
// Sort search terms for order-independent matching
return `search_google:${this.normalizeSearchQuery(action.query)}`;
case 'search': {
const q = 'query' in action ? String((action as Record).query) : '';
return `search_page:${this.normalizeSearchQuery(q)}`;
}
case 'scroll':
return `scroll:${action.direction}:${action.index ?? 'page'}`;
case 'finish':
return `done:${action.text.slice(0, 50)}`;
default:
// Generic fallback: action name + stringified params
return JSON.stringify(action);
}
});
return normalized.join('|');
}
/**
* Normalize a search query by lowercasing and sorting tokens.
* "best pizza NYC" and "NYC best pizza" produce the same hash.
*/
private normalizeSearchQuery(query: string): string {
return query
.toLowerCase()
.split(/\s+/)
.filter(Boolean)
.sort()
.join(' ');
}
/**
* Hash a page fingerprint for quick equality checks.
* Includes URL, element count, text hash, and scroll position bucket.
*/
private hashFingerprint(fp: PageSignature): string {
const scrollBucket = Math.floor(fp.scrollY / 200);
const parts = [
fp.url,
fp.domHash,
scrollBucket.toString(),
];
if (fp.elementCount !== undefined) {
parts.push(`e:${fp.elementCount}`);
}
if (fp.textHash) {
parts.push(`t:${fp.textHash}`);
}
return parts.join('|');
}
/**
* Count how many trailing entries in a history array are identical.
*/
private countTrailingRepetitions(history: string[]): number {
if (history.length === 0) return 0;
const last = history[history.length - 1];
let count = 0;
for (let i = history.length - 1; i >= 0; i--) {
if (history[i] === last) {
count++;
} else {
break;
}
}
return count;
}
/**
* Count consecutive stagnant pages: same URL and similar element count.
* "Similar" means within 5% or 10 elements of each other.
*/
private countConsecutiveStagnantPages(): number {
if (this.fingerprintHistory.length < 2) return 0;
const latest = this.fingerprintHistory[this.fingerprintHistory.length - 1];
let count = 1;
for (let i = this.fingerprintHistory.length - 2; i >= 0; i--) {
const fp = this.fingerprintHistory[i];
if (fp.url !== latest.url) break;
if (latest.elementCount !== undefined && fp.elementCount !== undefined) {
const diff = Math.abs(latest.elementCount - fp.elementCount);
const threshold = Math.max(10, Math.floor(latest.elementCount * 0.05));
if (diff > threshold) break;
}
count++;
}
return count;
}
/**
* Map repetition count to severity level (0-3).
*/
private getSeverity(repetitions: number): number {
if (repetitions >= 12) return 3;
if (repetitions >= 8) return 2;
if (repetitions >= 5) return 1;
return 0;
}
/**
* Get the appropriate escalating nudge message based on total repetitions.
*/
private getEscalatingNudge(): string {
// Pick the highest-threshold nudge that applies
let bestNudge = ESCALATING_NUDGES[0];
for (const nudge of ESCALATING_NUDGES) {
if (this.totalRepetitions >= nudge.threshold) {
bestNudge = nudge;
}
}
return bestNudge.message;
}
}
/**
* Compute a fast 32-bit hash of a DOM tree string.
* Used for quick fingerprint comparison.
*/
export function hashPageTree(domTree: string): string {
let hash = 0;
for (let i = 0; i < domTree.length; i++) {
const char = domTree.charCodeAt(i);
hash = ((hash << 5) - hash + char) | 0;
}
return hash.toString(36);
}
/**
* Compute a content-based text hash from visible page text.
* More robust than DOM hash for detecting actual content changes.
*/
export function hashTextContent(text: string): string {
// Normalize: lowercase, collapse whitespace, remove punctuation
const normalized = text
.toLowerCase()
.replace(/\s+/g, ' ')
.replace(/[^\w\s]/g, '')
.trim();
let hash = 0;
for (let i = 0; i < normalized.length; i++) {
const char = normalized.charCodeAt(i);
hash = ((hash << 5) - hash + char) | 0;
}
return hash.toString(36);
}
================================================
FILE: packages/core/src/agent/types.ts
================================================
import { z } from 'zod';
import type { Command, CommandResult } from '../commands/types.js';
import type { ViewportSnapshot, ViewportHistory } from '../viewport/types.js';
import type { InferenceUsage } from '../model/types.js';
// ── Agent Settings ──
export interface AgentConfig {
task: string;
stepLimit: number;
commandsPerStep: number;
failureThreshold: number;
retryDelay: number;
enableScreenshots: boolean;
enableScreenshotsForTextExtraction: boolean;
contextWindowSize: number;
capturedAttributes: string[];
commandDelayMs: number;
allowedUrls?: string[];
blockedUrls?: string[];
traceOutputPath?: string;
replayOutputPath?: string;
strategyInterval: number;
maskedValues?: Record;
overrideInstructionBuilder?: string;
extendInstructionBuilder?: string;
inlineCommands: boolean;
conversationCompaction?: CompactionPolicy;
// Extended thinking
enableDeepReasoning: boolean;
reasoningBudget: number;
// Flash mode
compactMode: boolean;
// Timeouts (0 = no timeout)
stepDeadlineMs: number;
modelDeadlineMs: number;
// Planning system
enableStrategy: boolean;
restrategizeOnStall: boolean;
// URL extraction from task text
autoNavigateToUrls: boolean;
// Coordinate clicking auto-enable per model
autoEnableCoordinateClicking: boolean;
// Judge integration
enableEvaluation: boolean;
enableSimpleJudge: boolean;
expectedOutcome?: string;
// Demo mode
enableVisualTracer: boolean;
// Initial actions before main loop
preflightCommands: Command[];
// Save conversation per step
conversationOutputPath?: string;
// Dynamic action schema rebuild per step
dynamicCommandSchema: boolean;
}
export const DEFAULT_AGENT_CONFIG: AgentConfig = {
task: '',
stepLimit: 100,
commandsPerStep: 10,
failureThreshold: 5,
retryDelay: 10,
enableScreenshots: true,
enableScreenshotsForTextExtraction: false,
contextWindowSize: 128000,
capturedAttributes: [
'title', 'type', 'name', 'role', 'tabindex',
'aria-label', 'placeholder', 'value', 'alt', 'aria-expanded',
],
commandDelayMs: 1,
strategyInterval: 0,
inlineCommands: true,
enableDeepReasoning: false,
reasoningBudget: 10000,
compactMode: false,
stepDeadlineMs: 0,
modelDeadlineMs: 0,
enableStrategy: false,
restrategizeOnStall: false,
autoNavigateToUrls: true,
autoEnableCoordinateClicking: false,
enableEvaluation: false,
enableSimpleJudge: false,
enableVisualTracer: false,
preflightCommands: [],
dynamicCommandSchema: false,
};
// ── Message Compaction Settings ──
export interface CompactionPolicy {
/** Run LLM-based compaction every N steps (0 = disabled). */
interval: number;
/** Model ID to use for summarization. If omitted, uses the agent's main model. */
model?: string;
/** Max tokens for the compaction summary output. */
maxTokens: number;
/** Target token budget after compaction. Defaults to 60% of contextWindowSize. */
targetTokens?: number;
}
// ── Agent Brain (LLM thought process) ──
export const ReasoningSchema = z.object({
evaluation: z.string().describe('Assessment of the current state'),
memory: z.string().describe('Important information to remember'),
nextGoal: z.string().describe('Next immediate goal'),
});
export type Reasoning = z.infer;
// ── Agent Output (what LLM returns each step) ──
export const AgentDecisionSchema = z.object({
currentState: ReasoningSchema,
actions: z.array(z.record(z.unknown())).describe('Actions to execute'),
thinking: z.string().optional().describe('Extended thinking / chain-of-thought'),
evaluation: z.string().optional().describe('Top-level evaluation (mirrors currentState.evaluation for convenience)'),
memory: z.string().optional().describe('Top-level memory note (mirrors currentState.memory for convenience)'),
nextGoal: z.string().optional().describe('Top-level next goal (mirrors currentState.nextGoal for convenience)'),
});
export type AgentDecision = z.infer;
/**
* Simplified output schema for flash / lightweight models that skip extended thinking.
* Only contains the essential fields: current state evaluation + actions.
*/
export const AgentDecisionCompactSchema = z.object({
currentState: z.object({
evaluation: z.string().describe('Brief assessment'),
nextGoal: z.string().describe('Next immediate goal'),
}),
actions: z.array(z.record(z.unknown())).describe('Actions to execute'),
});
export type AgentDecisionCompact = z.infer;
/**
* Output variant that omits the extended thinking field.
* Used when the model does not support or should not produce chain-of-thought.
*/
export const AgentDecisionDirectSchema = z.object({
currentState: ReasoningSchema,
actions: z.array(z.record(z.unknown())).describe('Actions to execute'),
});
export type AgentDecisionDirect = z.infer;
// ── Step Metadata ──
export interface StepTelemetry {
/** Step number (1-based). */
stepNumber: number;
/** Wall-clock duration of this step in milliseconds. */
durationMs: number;
/** Token usage for this step. */
inputTokens: number;
outputTokens: number;
/** Number of actions attempted in this step. */
actionCount: number;
/** URL at the start of this step. */
url?: string;
/** Path to screenshot file if one was saved. */
screenshotPath?: string;
/** Timestamp when the step started. */
startedAt: number;
/** Timestamp when the step completed. */
completedAt: number;
}
// ── Detected Variable ──
/**
* A variable or piece of data detected during agent execution,
* e.g. a confirmation number, order ID, or extracted value.
*/
export interface ExtractedVariable {
/** Human-readable name (e.g. "order_id", "confirmation_number"). */
name: string;
/** The detected value as a string. */
value: string;
/** Where this variable was found. */
source: 'extraction' | 'action_result' | 'page_content' | 'user_input';
/** Step number where this variable was detected. */
step?: number;
}
// ── Agent State ──
export interface AgentState {
step: number;
stepLimit: number;
failureCount: number;
consecutiveFailures: number;
isRunning: boolean;
isPaused: boolean;
isDone: boolean;
lastResult?: string;
currentUrl?: string;
totalInputTokens: number;
totalOutputTokens: number;
cumulativeCost: AccumulatedCost;
currentPlan?: string;
lastPlanStep?: number;
}
// ── History ──
export interface StepRecord {
step: number;
timestamp: number;
browserState: ViewportHistory;
agentOutput: AgentDecision;
actionResults: CommandResult[];
error?: string;
usage?: InferenceUsage;
duration: number;
metadata?: StepTelemetry;
detectedVariables?: ExtractedVariable[];
}
/**
* Concrete class wrapping agent execution history with helper methods.
*
* Replaces the plain ExecutionLog interface so that consumers can call
* convenience methods like `finalResult()`, `isDone()`, `urls()`, etc.
*/
export class ExecutionLog {
readonly entries: StepRecord[];
readonly task: string;
readonly startTime: number;
endTime?: number;
totalDuration?: number;
totalSteps: number;
totalInputTokens: number;
totalOutputTokens: number;
constructor(init: {
entries?: StepRecord[];
task: string;
startTime?: number;
}) {
this.entries = init.entries ?? [];
this.task = init.task;
this.startTime = init.startTime ?? Date.now();
this.totalSteps = this.entries.length;
this.totalInputTokens = 0;
this.totalOutputTokens = 0;
this.recomputeTotals();
}
/** Recalculate aggregate totals from entries. Called internally and from static factories. */
recomputeTotals(): void {
this.totalSteps = this.entries.length;
this.totalInputTokens = 0;
this.totalOutputTokens = 0;
for (const entry of this.entries) {
if (entry.usage) {
this.totalInputTokens += entry.usage.inputTokens;
this.totalOutputTokens += entry.usage.outputTokens;
}
}
}
/** Push a new entry and update totals. */
addEntry(entry: StepRecord): void {
this.entries.push(entry);
this.recomputeTotals();
}
/** Mark the history as finished. */
finish(): void {
this.endTime = Date.now();
this.totalDuration = this.endTime - this.startTime;
this.recomputeTotals();
}
/**
* Returns the final result text from the last "done" action, or undefined
* if the agent never completed with a done action.
*/
finalResult(): string | undefined {
for (let i = this.entries.length - 1; i >= 0; i--) {
const entry = this.entries[i];
for (const result of entry.actionResults) {
if (result.isDone && result.extractedContent) {
return result.extractedContent;
}
}
}
return undefined;
}
/**
* Whether the agent reached a "done" action at any point.
*/
isDone(): boolean {
return this.entries.some((entry) =>
entry.actionResults.some((r) => r.isDone),
);
}
/**
* Deduplicated list of all URLs visited during execution (in order of first visit).
*/
urls(): string[] {
const seen = new Set();
const result: string[] = [];
for (const entry of this.entries) {
const url = entry.browserState.url;
if (url && !seen.has(url)) {
seen.add(url);
result.push(url);
}
}
return result;
}
/**
* All screenshot base64 strings collected during execution (chronological).
*/
screenshots(): string[] {
const result: string[] = [];
for (const entry of this.entries) {
if (entry.browserState.screenshot) {
result.push(entry.browserState.screenshot);
}
}
return result;
}
/**
* All errors encountered during execution.
*/
errors(): string[] {
const result: string[] = [];
for (const entry of this.entries) {
if (entry.error) {
result.push(entry.error);
}
for (const ar of entry.actionResults) {
if (ar.error) {
result.push(ar.error);
}
}
}
return result;
}
/**
* All detected variables across all steps.
*/
allExtractedVariables(): ExtractedVariable[] {
const result: ExtractedVariable[] = [];
for (const entry of this.entries) {
if (entry.detectedVariables) {
result.push(...entry.detectedVariables);
}
}
return result;
}
/**
* Serialize the full history to a JSON-compatible object for saving to disk.
*/
toJSON(): Record {
return {
task: this.task,
startTime: this.startTime,
endTime: this.endTime,
totalDuration: this.totalDuration,
totalSteps: this.totalSteps,
totalInputTokens: this.totalInputTokens,
totalOutputTokens: this.totalOutputTokens,
entries: this.entries.map((e) => ({
...e,
// Strip screenshot data from serialized form to keep file size down
browserState: {
...e.browserState,
screenshot: e.browserState.screenshot ? '[screenshot omitted]' : undefined,
},
})),
};
}
/**
* Save the history to a file at the given path (JSON format).
* Returns the written path.
*/
async saveToFile(filePath: string): Promise {
const { writeFile, mkdir } = await import('node:fs/promises');
const { dirname } = await import('node:path');
await mkdir(dirname(filePath), { recursive: true });
const json = JSON.stringify(this.toJSON(), null, 2);
await writeFile(filePath, json, 'utf-8');
return filePath;
}
/**
* Load history from a JSON file. Screenshots will be placeholders.
*/
static async loadFromFile(filePath: string): Promise {
const { readFile } = await import('node:fs/promises');
const raw = await readFile(filePath, 'utf-8');
const data = JSON.parse(raw) as Record;
const list = new ExecutionLog({
task: (data.task as string) ?? '',
startTime: (data.startTime as number) ?? Date.now(),
});
list.endTime = data.endTime as number | undefined;
list.totalDuration = data.totalDuration as number | undefined;
const entries = (data.entries ?? []) as StepRecord[];
for (const entry of entries) {
list.entries.push(entry);
}
list.recomputeTotals();
return list;
}
}
// ── Plan ──
export const PlanStepSchema = z.object({
id: z.number(),
description: z.string(),
status: z.enum(['pending', 'in_progress', 'completed', 'failed', 'blocked', 'skipped']),
note: z.string().optional(),
});
export type PlanStep = z.infer;
export const StrategyPlanSchema = z.object({
items: z.array(PlanStepSchema),
});
// ── Judgement ──
export const EvaluationResultSchema = z.object({
isComplete: z.boolean(),
reason: z.string(),
confidence: z.number().min(0).max(1),
verdict: z.string().optional().describe('Short human-readable verdict (e.g. "success", "partial", "failed")'),
failureReason: z.string().optional().describe('Detailed reason if the task failed'),
impossibleTask: z.boolean().optional().describe('Whether the task appears impossible to complete'),
reachedCaptcha: z.boolean().optional().describe('Whether a CAPTCHA was encountered that blocked progress'),
});
export type EvaluationResult = z.infer;
/**
* Lightweight judgement result for simple pass/fail evaluation
* without confidence scoring or detailed analysis.
*/
export const QuickCheckResultSchema = z.object({
passed: z.boolean(),
reason: z.string(),
shouldRetry: z.boolean().optional().describe('Whether the agent should retry with a different approach'),
});
export type QuickCheckResult = z.infer;
// ── Cost Tracking ──
export interface StepCostBreakdown {
inputCost: number;
outputCost: number;
totalCost: number;
}
export interface AccumulatedCost {
totalInputTokens: number;
totalOutputTokens: number;
totalInputCost: number;
totalOutputCost: number;
totalCost: number;
}
/** Per-model pricing in USD per 1M tokens */
export interface PricingTable {
inputPer1M: number;
outputPer1M: number;
}
export const PRICING_TABLE: Record = {
'gpt-4o': { inputPer1M: 2.5, outputPer1M: 10 },
'gpt-4o-mini': { inputPer1M: 0.15, outputPer1M: 0.6 },
'gpt-4-turbo': { inputPer1M: 10, outputPer1M: 30 },
'claude-3-opus': { inputPer1M: 15, outputPer1M: 75 },
'claude-3-5-sonnet': { inputPer1M: 3, outputPer1M: 15 },
'claude-3-5-haiku': { inputPer1M: 0.8, outputPer1M: 4 },
'claude-3-haiku': { inputPer1M: 0.25, outputPer1M: 1.25 },
'gemini-2.0-flash': { inputPer1M: 0.1, outputPer1M: 0.4 },
'gemini-1.5-pro': { inputPer1M: 1.25, outputPer1M: 5 },
'gemini-1.5-flash': { inputPer1M: 0.075, outputPer1M: 0.3 },
};
export function calculateStepCost(
inputTokens: number,
outputTokens: number,
modelId: string,
): StepCostBreakdown | undefined {
let pricing: PricingTable | undefined;
for (const [key, value] of Object.entries(PRICING_TABLE)) {
if (modelId.startsWith(key)) {
pricing = value;
break;
}
}
if (!pricing) return undefined;
const inputCost = (inputTokens / 1_000_000) * pricing.inputPer1M;
const outputCost = (outputTokens / 1_000_000) * pricing.outputPer1M;
return { inputCost, outputCost, totalCost: inputCost + outputCost };
}
// ── Plan Update ──
export const PlanRevisionSchema = z.object({
plan: z.string().describe('Updated plan based on current progress'),
reasoning: z.string().describe('Why the plan was updated'),
});
export type PlanRevision = z.infer;
// ── Model capability helpers ──
const EXTENDED_THINKING_MODELS = [
'claude-3-5-sonnet',
'claude-3-opus',
'claude-3-7-sonnet',
'claude-4',
'o1',
'o1-pro',
'o3',
'o3-mini',
'gemini-2.0-flash-thinking',
'deepseek-r1',
];
export function supportsDeepReasoning(modelId: string): boolean {
return EXTENDED_THINKING_MODELS.some((m) => modelId.includes(m));
}
const COORDINATE_CLICK_MODELS = [
'gpt-4o',
'claude-3-5-sonnet',
'claude-4',
'gemini-2.0',
'gemini-1.5-pro',
];
export function supportsCoordinateMode(modelId: string): boolean {
return COORDINATE_CLICK_MODELS.some((m) => modelId.includes(m));
}
const FLASH_MODELS = [
'gpt-4o-mini',
'claude-3-haiku',
'claude-3-5-haiku',
'gemini-1.5-flash',
'gemini-2.0-flash',
];
export function isCompactModel(modelId: string): boolean {
return FLASH_MODELS.some((m) => modelId.includes(m));
}
// ── Agent Run Result ──
export interface RunOutcome {
finalResult?: string;
success: boolean;
history: ExecutionLog;
errors: string[];
detectedVariables?: ExtractedVariable[];
judgement?: EvaluationResult;
simpleJudgement?: QuickCheckResult;
totalCost?: AccumulatedCost;
}
================================================
FILE: packages/core/src/bridge/adapter.ts
================================================
import { z, type ZodTypeAny } from 'zod';
import type { CommandExecutor } from '../commands/executor.js';
export interface MCPToolDefinition {
name: string;
description: string;
inputSchema: Record;
}
export class BridgeAdapter {
private tools: CommandExecutor;
constructor(tools: CommandExecutor) {
this.tools = tools;
}
getToolDefinitions(): MCPToolDefinition[] {
return this.tools.registry.getAll().map((action) => ({
name: `browser_${action.name}`,
description: action.description,
inputSchema: this.zodToJsonSchema(action.schema),
}));
}
getToolNames(): string[] {
return this.tools.registry.getNames().map((name) => `browser_${name}`);
}
parseToolName(mcpToolName: string): string | null {
if (mcpToolName.startsWith('browser_')) {
return mcpToolName.slice(8);
}
return null;
}
private zodToJsonSchema(schema: ZodTypeAny): Record {
const jsonSchema: Record = { type: 'object' };
if (schema instanceof z.ZodObject) {
const shape = schema.shape;
const properties: Record = {};
const required: string[] = [];
for (const [key, value] of Object.entries(shape)) {
const fieldSchema = value as ZodTypeAny;
properties[key] = this.fieldToJsonSchema(fieldSchema);
if (!(fieldSchema instanceof z.ZodOptional)) {
required.push(key);
}
}
jsonSchema.properties = properties;
if (required.length > 0) {
jsonSchema.required = required;
}
}
return jsonSchema;
}
private fieldToJsonSchema(schema: ZodTypeAny): Record {
if (schema instanceof z.ZodString) {
return { type: 'string', description: schema.description };
}
if (schema instanceof z.ZodNumber) {
return { type: 'number', description: schema.description };
}
if (schema instanceof z.ZodBoolean) {
return { type: 'boolean', description: schema.description };
}
if (schema instanceof z.ZodEnum) {
return { type: 'string', enum: schema.options, description: schema.description };
}
if (schema instanceof z.ZodArray) {
return {
type: 'array',
items: this.fieldToJsonSchema(schema.element),
description: schema.description,
};
}
if (schema instanceof z.ZodOptional) {
return this.fieldToJsonSchema(schema.unwrap());
}
if (schema instanceof z.ZodDefault) {
const inner = this.fieldToJsonSchema(schema.removeDefault());
(inner as any).default = schema._def.defaultValue();
return inner;
}
if (schema instanceof z.ZodLiteral) {
return { const: schema.value };
}
return { type: 'object', description: schema.description };
}
}
================================================
FILE: packages/core/src/bridge/client.ts
================================================
import { type ChildProcess, spawn } from 'node:child_process';
import { EventEmitter } from 'node:events';
import type { CustomCommandSpec } from '../commands/types.js';
import { createLogger } from '../logging.js';
const logger = createLogger('mcp-client');
// ── Types ──
export interface BridgeClientOptions {
command: string;
args?: string[];
env?: Record;
/** Timeout per JSON-RPC request in ms (default: 30_000) */
requestTimeoutMs?: number;
/** Maximum reconnection attempts (default: 5) */
maxReconnectAttempts?: number;
/** Initial reconnection delay in ms, doubles each attempt (default: 1000) */
reconnectDelayMs?: number;
/** Interval between health checks in ms (0 to disable, default: 0) */
healthCheckIntervalMs?: number;
}
export interface MCPTool {
name: string;
description: string;
inputSchema: Record;
}
export type MCPConnectionState = 'disconnected' | 'connecting' | 'connected' | 'reconnecting';
interface PendingRequest {
resolve: (value: unknown) => void;
reject: (error: Error) => void;
timer: ReturnType;
method: string;
}
export interface BridgeClientEvents {
stateChange: [state: MCPConnectionState, previousState: MCPConnectionState];
error: [error: Error];
notification: [method: string, params: Record | undefined];
}
/**
* MCP client that connects to external MCP servers and converts their tools
* into custom browser actions.
*
* Features:
* - Reconnection with exponential backoff
* - Per-call request timeout
* - Concurrent request multiplexing (multiple in-flight requests)
* - Tool list caching with invalidation
* - Health check / ping
* - Event emitter for connection state changes
* - Graceful shutdown with pending request drain
*/
export class BridgeClient extends EventEmitter {
private process: ChildProcess | null = null;
private requestId = 0;
private pendingRequests = new Map();
private options: BridgeClientOptions;
private buffer = '';
// ── Connection state ──
private _state: MCPConnectionState = 'disconnected';
private reconnectAttempts = 0;
private reconnectTimer: ReturnType | null = null;
// ── Tool caching ──
private cachedTools: MCPTool[] | null = null;
private toolsCacheTimestamp = 0;
// ── Health check ──
private healthCheckTimer: ReturnType | null = null;
// ── Config ──
private readonly requestTimeoutMs: number;
private readonly maxReconnectAttempts: number;
private readonly reconnectDelayMs: number;
private readonly healthCheckIntervalMs: number;
constructor(options: BridgeClientOptions) {
super();
this.options = options;
this.requestTimeoutMs = options.requestTimeoutMs ?? 30_000;
this.maxReconnectAttempts = options.maxReconnectAttempts ?? 5;
this.reconnectDelayMs = options.reconnectDelayMs ?? 1000;
this.healthCheckIntervalMs = options.healthCheckIntervalMs ?? 0;
}
// ── Public accessors ──
get state(): MCPConnectionState {
return this._state;
}
get isConnected(): boolean {
return this._state === 'connected';
}
// ── Connection lifecycle ──
async connect(): Promise {
if (this._state === 'connected') {
logger.debug('Already connected, skipping connect()');
return;
}
this.setState('connecting');
await this.spawnProcess();
await this.initialize();
this.setState('connected');
this.reconnectAttempts = 0;
// Warm the tool cache
await this.listTools();
// Start health checks if configured
this.startHealthChecks();
logger.info(`Connected to MCP server: ${this.options.command}`);
}
private async spawnProcess(): Promise {
this.process = spawn(this.options.command, this.options.args ?? [], {
stdio: ['pipe', 'pipe', 'pipe'],
env: { ...process.env, ...this.options.env },
});
this.process.stdout?.setEncoding('utf-8');
this.process.stdout?.on('data', (data: string) => {
this.buffer += data;
this.processBuffer();
});
this.process.stderr?.on('data', (data: Buffer) => {
logger.warn(`[MCP stderr] ${data.toString().trimEnd()}`);
});
this.process.on('close', (code: number | null) => {
logger.info(`MCP server process exited with code ${code}`);
this.handleProcessClose();
});
this.process.on('error', (error: Error) => {
logger.error(`MCP server process error: ${error.message}`);
this.emit('error', error);
this.handleProcessClose();
});
}
private async initialize(): Promise {
await this.send('initialize', {
protocolVersion: '2024-11-05',
capabilities: {},
clientInfo: { name: 'open-browser', version: '0.1.0' },
});
// Send initialized notification (no id, no response expected)
this.sendNotification('notifications/initialized');
}
// ── State management ──
private setState(newState: MCPConnectionState): void {
const previousState = this._state;
if (previousState === newState) return;
this._state = newState;
logger.debug(`Connection state: ${previousState} -> ${newState}`);
this.emit('stateChange', newState, previousState);
}
// ── Reconnection ──
private handleProcessClose(): void {
const wasPreviouslyConnected = this._state === 'connected';
// Reject all pending requests
for (const [id, pending] of this.pendingRequests) {
clearTimeout(pending.timer);
pending.reject(new Error('MCP server disconnected'));
}
this.pendingRequests.clear();
this.process = null;
this.buffer = '';
if (wasPreviouslyConnected) {
this.attemptReconnect();
} else {
this.setState('disconnected');
}
}
private attemptReconnect(): void {
if (this.reconnectAttempts >= this.maxReconnectAttempts) {
logger.error(`Max reconnection attempts (${this.maxReconnectAttempts}) reached`);
this.setState('disconnected');
this.emit('error', new Error('MCP server reconnection failed after all attempts'));
return;
}
this.setState('reconnecting');
this.reconnectAttempts++;
const delay = this.reconnectDelayMs * 2 ** (this.reconnectAttempts - 1);
logger.info(
`Reconnecting in ${delay}ms (attempt ${this.reconnectAttempts}/${this.maxReconnectAttempts})`,
);
this.reconnectTimer = setTimeout(async () => {
this.reconnectTimer = null;
try {
await this.spawnProcess();
await this.initialize();
this.setState('connected');
this.reconnectAttempts = 0;
// Invalidate tool cache on reconnect -- server may have changed
this.invalidateToolCache();
await this.listTools();
this.startHealthChecks();
logger.info('Reconnected to MCP server');
} catch (error) {
logger.warn(
`Reconnect attempt ${this.reconnectAttempts} failed: ${
error instanceof Error ? error.message : String(error)
}`,
);
this.attemptReconnect();
}
}, delay);
}
// ── Tool caching ──
async listTools(): Promise {
if (this.cachedTools) {
return this.cachedTools;
}
const result = (await this.send('tools/list', {})) as { tools: MCPTool[] };
this.cachedTools = result.tools ?? [];
this.toolsCacheTimestamp = Date.now();
logger.debug(`Cached ${this.cachedTools.length} tools from MCP server`);
return this.cachedTools;
}
/** Get cached tools synchronously. Returns empty array if cache is cold. */
getTools(): MCPTool[] {
return this.cachedTools ?? [];
}
/** Force-invalidate the tool cache. Next listTools() call will re-fetch. */
invalidateToolCache(): void {
this.cachedTools = null;
this.toolsCacheTimestamp = 0;
}
/** Returns when the tool cache was last populated (epoch ms), or 0 if empty. */
get toolsCacheAge(): number {
return this.toolsCacheTimestamp > 0 ? Date.now() - this.toolsCacheTimestamp : 0;
}
// ── Tool invocation ──
toCustomActions(): CustomCommandSpec[] {
const { z } = require('zod');
const tools = this.getTools();
return tools.map((tool) => ({
name: `mcp_${tool.name}`,
description: `[MCP] ${tool.description}`,
schema: z.object({}),
handler: async (params: Record) => {
const result = await this.callTool(tool.name, params);
return {
success: true,
extractedContent: typeof result === 'string' ? result : JSON.stringify(result),
};
},
}));
}
async callTool(name: string, args: Record): Promise {
const result = (await this.send('tools/call', { name, arguments: args })) as {
content: Array<{ type: string; text?: string }>;
isError?: boolean;
};
if (result.isError) {
const errorText = result.content?.find((c) => c.type === 'text')?.text;
throw new Error(errorText ?? 'MCP tool call failed');
}
const textContent = result.content?.find((c) => c.type === 'text');
return textContent?.text ?? result;
}
// ── Health check ──
/** Send a ping to verify the server is responsive. Rejects if no pong within timeout. */
async ping(): Promise {
await this.send('ping', {});
}
private startHealthChecks(): void {
this.stopHealthChecks();
if (this.healthCheckIntervalMs <= 0) return;
this.healthCheckTimer = setInterval(async () => {
try {
await this.ping();
} catch {
logger.warn('Health check failed');
}
}, this.healthCheckIntervalMs);
}
private stopHealthChecks(): void {
if (this.healthCheckTimer) {
clearInterval(this.healthCheckTimer);
this.healthCheckTimer = null;
}
}
// ── JSON-RPC transport ──
private send(method: string, params?: Record): Promise {
if (!this.process?.stdin?.writable) {
return Promise.reject(new Error('MCP client is not connected'));
}
const id = ++this.requestId;
return new Promise((resolve, reject) => {
// Per-call timeout
const timer = setTimeout(() => {
this.pendingRequests.delete(id);
reject(new Error(`MCP request timed out after ${this.requestTimeoutMs}ms: ${method}`));
}, this.requestTimeoutMs);
this.pendingRequests.set(id, { resolve, reject, timer, method });
const request = JSON.stringify({
jsonrpc: '2.0',
id,
method,
params,
});
this.process?.stdin?.write(`${request}\n`);
});
}
/** Send a JSON-RPC notification (no id, no response expected). */
private sendNotification(method: string, params?: Record): void {
if (!this.process?.stdin?.writable) return;
const notification = JSON.stringify({
jsonrpc: '2.0',
method,
...(params ? { params } : {}),
});
this.process.stdin.write(`${notification}\n`);
}
private processBuffer(): void {
const lines = this.buffer.split('\n');
this.buffer = lines.pop() ?? '';
for (const line of lines) {
if (!line.trim()) continue;
try {
const message = JSON.parse(line);
// JSON-RPC notification from server (no id field)
if (message.id === undefined || message.id === null) {
this.handleServerNotification(message);
continue;
}
// Response to a pending request
const pending = this.pendingRequests.get(message.id);
if (pending) {
clearTimeout(pending.timer);
this.pendingRequests.delete(message.id);
if (message.error) {
pending.reject(new Error(message.error.message));
} else {
pending.resolve(message.result);
}
}
} catch {
// Ignore malformed responses
}
}
}
private handleServerNotification(message: {
method: string;
params?: Record;
}): void {
logger.debug(`Server notification: ${message.method}`);
this.emit('notification', message.method, message.params);
// If server signals tool list changed, invalidate cache
if (message.method === 'notifications/tools/list_changed') {
this.invalidateToolCache();
}
}
// ── Graceful shutdown ──
/**
* Disconnect gracefully: wait for pending requests to drain (up to a timeout),
* then kill the server process.
*/
async disconnect(drainTimeoutMs = 5000): Promise {
this.stopHealthChecks();
if (this.reconnectTimer) {
clearTimeout(this.reconnectTimer);
this.reconnectTimer = null;
}
// Wait for pending requests to drain
if (this.pendingRequests.size > 0) {
logger.debug(
`Waiting for ${this.pendingRequests.size} pending request(s) to drain...`,
);
await Promise.race([
this.waitForPendingDrain(),
new Promise((resolve) => setTimeout(resolve, drainTimeoutMs)),
]);
}
// Reject any still-pending requests
for (const [id, pending] of this.pendingRequests) {
clearTimeout(pending.timer);
pending.reject(new Error('MCP client shutting down'));
}
this.pendingRequests.clear();
// Kill the process
if (this.process) {
this.process.removeAllListeners();
this.process.kill();
this.process = null;
}
this.buffer = '';
this.setState('disconnected');
logger.info('MCP client disconnected');
}
private waitForPendingDrain(): Promise {
return new Promise((resolve) => {
const check = () => {
if (this.pendingRequests.size === 0) {
resolve();
} else {
setTimeout(check, 50);
}
};
check();
});
}
/** Get the number of in-flight requests. */
get pendingRequestCount(): number {
return this.pendingRequests.size;
}
}
================================================
FILE: packages/core/src/bridge/index.ts
================================================
export { BridgeServer, type BridgeServerOptions } from './server.js';
export { BridgeClient, type BridgeClientOptions } from './client.js';
export { BridgeAdapter } from './adapter.js';
================================================
FILE: packages/core/src/bridge/mcp-types.ts
================================================
/**
* Experimental MCP (Model Context Protocol) server types.
* @experimental
*/
export interface MCPServerOptions {
port?: number;
host?: string;
capabilities?: MCPCapability[];
}
export type MCPCapability = 'browse' | 'extract' | 'screenshot' | 'interact';
export interface MCPRequest {
method: string;
params: Record;
}
export interface MCPResponse {
result?: unknown;
error?: { code: number; message: string };
}
================================================
FILE: packages/core/src/bridge/server.test.ts
================================================
import { test, expect, describe, beforeEach, mock } from 'bun:test';
import { BridgeServer, type MCPRequest, type MCPResponse } from './server.js';
import { CommandExecutor } from '../commands/executor.js';
// ── Mock factories ──
function makeMockViewport() {
return {
currentPage: {
goBack: mock(() => Promise.resolve()),
evaluate: mock(() => Promise.resolve({})),
mouse: { click: mock(() => Promise.resolve()) },
keyboard: { press: mock(() => Promise.resolve()) },
},
cdp: {
send: mock(() => Promise.resolve({})),
},
navigate: mock(() => Promise.resolve()),
waitForPageReady: mock(() => Promise.resolve()),
switchTab: mock(() => Promise.resolve()),
newTab: mock(() => Promise.resolve()),
closeTab: mock(() => Promise.resolve()),
screenshot: mock(() =>
Promise.resolve({ base64: 'abc123', width: 1280, height: 800 }),
),
isConnected: true,
getState: mock(() =>
Promise.resolve({
url: 'https://example.com',
title: 'Example',
tabs: [{ url: 'https://example.com', title: 'Example' }],
}),
),
} as any;
}
function makeMockPageAnalyzer() {
return {
extractState: mock(() =>
Promise.resolve({
tree: '...',
selectorMap: {},
elementCount: 5,
interactiveElementCount: 2,
scrollPosition: { x: 0, y: 0 },
viewportSize: { width: 1280, height: 800 },
documentSize: { width: 1280, height: 2000 },
pixelsAbove: 0,
pixelsBelow: 1200,
}),
),
clickElementByIndex: mock(() => Promise.resolve()),
inputTextByIndex: mock(() => Promise.resolve()),
getElementSelector: mock(() => Promise.resolve('#el')),
} as any;
}
function makeRequest(
method: string,
id: number | string = 1,
params?: Record,
): MCPRequest & { id: number | string } {
return {
jsonrpc: '2.0' as const,
id,
method,
...(params ? { params } : {}),
};
}
// ── Tests ──
describe('BridgeServer', () => {
let server: BridgeServer;
let browser: ReturnType;
let domService: ReturnType;
let tools: CommandExecutor;
beforeEach(() => {
browser = makeMockViewport();
domService = makeMockPageAnalyzer();
tools = new CommandExecutor();
server = new BridgeServer({
browser,
domService,
tools,
name: 'test-server',
version: '1.0.0',
});
});
describe('handleRequest: initialize', () => {
test('returns server info and capabilities', async () => {
const response = await server.handleRequest(makeRequest('initialize'));
expect(response.jsonrpc).toBe('2.0');
expect(response.id).toBe(1);
expect(response.result).toBeDefined();
const result = response.result as any;
expect(result.protocolVersion).toBe('2024-11-05');
expect(result.serverInfo.name).toBe('test-server');
expect(result.serverInfo.version).toBe('1.0.0');
expect(result.capabilities.tools).toBeDefined();
expect(result.capabilities.resources).toBeDefined();
expect(result.capabilities.resources.subscribe).toBe(true);
});
});
describe('handleRequest: tools/list', () => {
test('returns list of available tools', async () => {
const response = await server.handleRequest(makeRequest('tools/list'));
expect(response.result).toBeDefined();
const result = response.result as any;
expect(Array.isArray(result.tools)).toBe(true);
expect(result.tools.length).toBeGreaterThan(0);
// Each tool should have name, description, inputSchema
const firstTool = result.tools[0];
expect(firstTool.name).toBeDefined();
expect(firstTool.description).toBeDefined();
expect(firstTool.inputSchema).toBeDefined();
// Tool names should be prefixed with browser_
expect(firstTool.name.startsWith('browser_')).toBe(true);
});
});
describe('handleRequest: tools/call', () => {
test('executes a browser tool and returns result', async () => {
const response = await server.handleRequest(
makeRequest('tools/call', 1, {
name: 'browser_tap',
arguments: { index: 0 },
}),
);
expect(response.result).toBeDefined();
const result = response.result as any;
expect(result.content).toBeDefined();
expect(Array.isArray(result.content)).toBe(true);
expect(result.content[0].type).toBe('text');
expect(result.isError).toBe(false);
});
test('returns error for unknown tool', async () => {
const response = await server.handleRequest(
makeRequest('tools/call', 1, {
name: 'unknown_tool',
arguments: {},
}),
);
expect(response.error).toBeDefined();
expect(response.error!.code).toBe(-32602);
expect(response.error!.message).toContain('Unknown tool');
});
test('returns error for tool that does not start with browser_', async () => {
const response = await server.handleRequest(
makeRequest('tools/call', 1, {
name: 'not_browser_tool',
arguments: {},
}),
);
expect(response.error).toBeDefined();
expect(response.error!.code).toBe(-32602);
});
test('returns success content for done action', async () => {
const response = await server.handleRequest(
makeRequest('tools/call', 1, {
name: 'browser_finish',
arguments: { text: 'All done' },
}),
);
expect(response.result).toBeDefined();
const result = response.result as any;
expect(result.content[0].text).toContain('All done');
});
});
describe('handleRequest: resources/list', () => {
test('returns available resources', async () => {
const response = await server.handleRequest(makeRequest('resources/list'));
expect(response.result).toBeDefined();
const result = response.result as any;
expect(Array.isArray(result.resources)).toBe(true);
const uris = result.resources.map((r: any) => r.uri);
expect(uris).toContain('browser://state');
expect(uris).toContain('browser://dom');
expect(uris).toContain('browser://screenshot');
expect(uris).toContain('browser://tabs');
// Each resource should have standard fields
for (const resource of result.resources) {
expect(resource.name).toBeDefined();
expect(resource.description).toBeDefined();
expect(resource.mimeType).toBeDefined();
}
});
});
describe('handleRequest: resources/read', () => {
test('reads browser://state resource', async () => {
const response = await server.handleRequest(
makeRequest('resources/read', 1, { uri: 'browser://state' }),
);
expect(response.result).toBeDefined();
const result = response.result as any;
expect(result.contents).toBeDefined();
expect(result.contents[0].uri).toBe('browser://state');
expect(result.contents[0].mimeType).toBe('application/json');
expect(result.contents[0].text).toBeDefined();
const state = JSON.parse(result.contents[0].text);
expect(state.url).toBe('https://example.com');
});
test('reads browser://dom resource', async () => {
const response = await server.handleRequest(
makeRequest('resources/read', 1, { uri: 'browser://dom' }),
);
expect(response.result).toBeDefined();
const result = response.result as any;
expect(result.contents[0].uri).toBe('browser://dom');
expect(result.contents[0].mimeType).toBe('text/plain');
expect(result.contents[0].text).toContain('');
});
test('reads browser://screenshot resource', async () => {
const response = await server.handleRequest(
makeRequest('resources/read', 1, { uri: 'browser://screenshot' }),
);
expect(response.result).toBeDefined();
const result = response.result as any;
expect(result.contents[0].uri).toBe('browser://screenshot');
expect(result.contents[0].mimeType).toBe('image/png');
expect(result.contents[0].blob).toBe('abc123');
});
test('reads browser://tabs resource', async () => {
const response = await server.handleRequest(
makeRequest('resources/read', 1, { uri: 'browser://tabs' }),
);
expect(response.result).toBeDefined();
const result = response.result as any;
expect(result.contents[0].uri).toBe('browser://tabs');
const tabs = JSON.parse(result.contents[0].text);
expect(Array.isArray(tabs)).toBe(true);
});
test('returns error for unknown resource URI', async () => {
const response = await server.handleRequest(
makeRequest('resources/read', 1, { uri: 'browser://nonexistent' }),
);
expect(response.error).toBeDefined();
expect(response.error!.message).toContain('Unknown resource URI');
});
test('returns error when uri parameter is missing', async () => {
const response = await server.handleRequest(
makeRequest('resources/read', 1, {}),
);
expect(response.error).toBeDefined();
expect(response.error!.message).toContain('Missing required parameter');
});
});
describe('handleRequest: unknown method', () => {
test('returns method not found error', async () => {
const response = await server.handleRequest(
makeRequest('unknown/method'),
);
expect(response.error).toBeDefined();
expect(response.error!.code).toBe(-32601);
expect(response.error!.message).toContain('Method not found');
});
});
describe('handleRequest: ping', () => {
test('responds to ping', async () => {
const response = await server.handleRequest(makeRequest('ping'));
expect(response.jsonrpc).toBe('2.0');
expect(response.result).toEqual({});
});
});
describe('handleRequest: resources/subscribe', () => {
test('subscribes to a valid resource', async () => {
const response = await server.handleRequest(
makeRequest('resources/subscribe', 1, { uri: 'browser://state' }),
);
expect(response.result).toEqual({});
expect(response.error).toBeUndefined();
});
test('returns error for unknown resource URI', async () => {
const response = await server.handleRequest(
makeRequest('resources/subscribe', 1, { uri: 'browser://invalid' }),
);
expect(response.error).toBeDefined();
expect(response.error!.message).toContain('Unknown resource URI');
});
test('returns error when uri is missing', async () => {
const response = await server.handleRequest(
makeRequest('resources/subscribe', 1, {}),
);
expect(response.error).toBeDefined();
});
});
describe('handleRequest: resources/unsubscribe', () => {
test('unsubscribes from a resource', async () => {
// First subscribe
await server.handleRequest(
makeRequest('resources/subscribe', 1, { uri: 'browser://state' }),
);
// Then unsubscribe
const response = await server.handleRequest(
makeRequest('resources/unsubscribe', 2, { uri: 'browser://state' }),
);
expect(response.result).toEqual({});
});
test('returns error when uri is missing', async () => {
const response = await server.handleRequest(
makeRequest('resources/unsubscribe', 1, {}),
);
expect(response.error).toBeDefined();
});
});
describe('error handling', () => {
test('returns error response for synchronously thrown errors', async () => {
// Test with a method that will cause a synchronous error in the handler
// The try/catch in handleRequest catches synchronous errors from switch cases
const response = await server.handleRequest(
makeRequest('resources/read', 1, { uri: 'browser://nonexistent' }),
);
expect(response.jsonrpc).toBe('2.0');
expect(response.error).toBeDefined();
expect(response.error!.message).toContain('Unknown resource URI');
});
test('returns error for tools/call when execution fails', async () => {
// Modify the domService to throw on clickElementByIndex
domService.clickElementByIndex = mock(() =>
Promise.reject(new Error('Unexpected crash')),
);
const failServer = new BridgeServer({
browser,
domService,
tools,
});
// CommandFailedError propagates from registry.execute through
// handleToolsCall. Since handleRequest returns (not awaits) the
// promise from handleToolsCall, the error may propagate as a
// rejection. We handle both cases.
try {
const response = await failServer.handleRequest(
makeRequest('tools/call', 1, {
name: 'browser_tap',
arguments: { index: 0 },
}),
);
// If it returns a response, it should have an error field
expect(response.jsonrpc).toBe('2.0');
const hasError = response.error !== undefined;
const hasIsError = (response.result as any)?.isError === true;
expect(hasError || hasIsError).toBe(true);
} catch (error) {
// If the error propagates as a rejection, that is acceptable too
expect(error).toBeDefined();
}
});
});
describe('handleMessage (with notifications)', () => {
test('returns null for notification (no id)', async () => {
const notification: MCPRequest = {
jsonrpc: '2.0',
method: 'notifications/initialized',
};
const response = await server.handleMessage(notification);
expect(response).toBeNull();
});
test('returns response for request (with id)', async () => {
const request: MCPRequest = {
jsonrpc: '2.0',
id: 1,
method: 'ping',
};
const response = await server.handleMessage(request);
expect(response).not.toBeNull();
expect(response!.result).toEqual({});
});
});
});
================================================
FILE: packages/core/src/bridge/server.ts
================================================
import type { IncomingMessage, ServerResponse } from 'node:http';
import type { Viewport } from '../viewport/viewport.js';
import type { PageAnalyzer } from '../page/page-analyzer.js';
import type { CommandExecutor } from '../commands/executor.js';
import type { ExecutionContext } from '../commands/types.js';
import { BridgeAdapter, type MCPToolDefinition } from './adapter.js';
import { createLogger } from '../logging.js';
const logger = createLogger('mcp-server');
// ── JSON-RPC types ──
export interface BridgeServerOptions {
browser: Viewport;
domService: PageAnalyzer;
tools: CommandExecutor;
name?: string;
version?: string;
/** Port for SSE transport (default: 3100) */
ssePort?: number;
}
export interface MCPRequest {
jsonrpc: '2.0';
id?: string | number;
method: string;
params?: Record;
}
export interface MCPResponse {
jsonrpc: '2.0';
id: string | number;
result?: unknown;
error?: { code: number; message: string; data?: unknown };
}
export interface MCPNotification {
jsonrpc: '2.0';
method: string;
params?: Record;
}
// ── Resource types ──
export interface MCPResource {
uri: string;
name: string;
description: string;
mimeType: string;
}
export interface MCPResourceContent {
uri: string;
mimeType: string;
text?: string;
blob?: string;
}
// ── Subscription tracking ──
interface ResourceSubscription {
uri: string;
/** Callback that receives the notification to send to the client */
notify: (notification: MCPNotification) => void;
}
/**
* MCP (Model Context Protocol) server that exposes browser actions as tools
* and browser state as resources. Supports stdio and SSE transports.
*
* Implements:
* - initialize / tools/list / tools/call (existing)
* - resources/list / resources/read (browser state as resources)
* - resources/subscribe / resources/unsubscribe (live updates)
* - notifications/progress (step progress notifications)
* - SSE transport via HTTP
*/
export class BridgeServer {
private controller: BridgeAdapter;
private browser: Viewport;
private domService: PageAnalyzer;
private tools: CommandExecutor;
private name: string;
private version: string;
private ssePort: number;
/** Active SSE connections that receive notifications */
private sseClients = new Set();
/** Resource subscriptions keyed by URI */
private subscriptions = new Map>();
/** Last screenshot base64 cache for resource reads */
private lastScreenshotBase64: string | null = null;
/** HTTP server reference for SSE transport */
private httpServer: import('node:http').Server | null = null;
constructor(options: BridgeServerOptions) {
this.browser = options.browser;
this.domService = options.domService;
this.tools = options.tools;
this.controller = new BridgeAdapter(options.tools);
this.name = options.name ?? 'open-browser';
this.version = options.version ?? '0.1.0';
this.ssePort = options.ssePort ?? 3100;
}
// ── Static resource definitions ──
private getResourceDefinitions(): MCPResource[] {
return [
{
uri: 'browser://state',
name: 'Browser State',
description: 'Current browser state summary including URL, title, and active tab',
mimeType: 'application/json',
},
{
uri: 'browser://dom',
name: 'DOM Tree',
description: 'Current page DOM tree serialized for LLM consumption',
mimeType: 'text/plain',
},
{
uri: 'browser://screenshot',
name: 'Screenshot',
description: 'Last screenshot of the current page as base64 PNG',
mimeType: 'image/png',
},
{
uri: 'browser://tabs',
name: 'Open Tabs',
description: 'List of all open browser tabs with URLs and titles',
mimeType: 'application/json',
},
];
}
// ── Request dispatcher ──
async handleMessage(message: MCPRequest): Promise {
// JSON-RPC notifications have no `id` field -- they are fire-and-forget
if (message.id === undefined || message.id === null) {
await this.handleNotification(message);
return null;
}
return this.handleRequest(message as MCPRequest & { id: string | number });
}
async handleRequest(request: MCPRequest & { id: string | number }): Promise {
try {
switch (request.method) {
case 'initialize':
return this.handleInitialize(request);
case 'tools/list':
return this.handleToolsList(request);
case 'tools/call':
return this.handleToolsCall(request);
case 'resources/list':
return this.handleResourcesList(request);
case 'resources/read':
return this.handleResourcesRead(request);
case 'resources/subscribe':
return this.handleResourcesSubscribe(request);
case 'resources/unsubscribe':
return this.handleResourcesUnsubscribe(request);
case 'ping':
return { jsonrpc: '2.0', id: request.id, result: {} };
default:
return {
jsonrpc: '2.0',
id: request.id,
error: { code: -32601, message: `Method not found: ${request.method}` },
};
}
} catch (error) {
return {
jsonrpc: '2.0',
id: request.id,
error: {
code: -32603,
message: error instanceof Error ? error.message : String(error),
},
};
}
}
/** Handle incoming JSON-RPC notifications (no response expected). */
private async handleNotification(message: MCPRequest): Promise {
switch (message.method) {
case 'notifications/initialized':
logger.debug('Client confirmed initialization');
break;
case 'notifications/cancelled': {
const requestId = message.params?.requestId;
logger.debug(`Client cancelled request ${requestId}`);
break;
}
default:
logger.debug(`Received unknown notification: ${message.method}`);
}
}
// ── Protocol handlers ──
private handleInitialize(request: MCPRequest & { id: string | number }): MCPResponse {
return {
jsonrpc: '2.0',
id: request.id,
result: {
protocolVersion: '2024-11-05',
capabilities: {
tools: {},
resources: {
subscribe: true,
listChanged: true,
},
},
serverInfo: {
name: this.name,
version: this.version,
},
},
};
}
private handleToolsList(request: MCPRequest & { id: string | number }): MCPResponse {
const tools = this.controller.getToolDefinitions();
return {
jsonrpc: '2.0',
id: request.id,
result: {
tools: tools.map((t) => ({
name: t.name,
description: t.description,
inputSchema: t.inputSchema,
})),
},
};
}
private async handleToolsCall(request: MCPRequest & { id: string | number }): Promise {
const params = request.params ?? {};
const toolName = params.name as string;
const args = (params.arguments ?? {}) as Record;
const actionName = this.controller.parseToolName(toolName);
if (!actionName) {
return {
jsonrpc: '2.0',
id: request.id,
error: { code: -32602, message: `Unknown tool: ${toolName}` },
};
}
// Emit progress notification at start
this.emitProgress(request.id, 0, `Executing ${toolName}...`);
const context: ExecutionContext = {
page: this.browser.currentPage,
cdpSession: this.browser.cdp!,
domService: this.domService,
browserSession: this.browser,
};
const result = await this.tools.registry.execute(actionName, args, context);
// Emit progress notification at completion
this.emitProgress(request.id, 1, 'Complete');
// Notify subscribers that browser state may have changed
this.notifyResourceChanged('browser://state');
this.notifyResourceChanged('browser://dom');
return {
jsonrpc: '2.0',
id: request.id,
result: {
content: [
{
type: 'text',
text: result.extractedContent ?? (result.success ? 'Success' : `Error: ${result.error}`),
},
],
isError: !result.success,
},
};
}
// ── Resource handlers ──
private handleResourcesList(request: MCPRequest & { id: string | number }): MCPResponse {
return {
jsonrpc: '2.0',
id: request.id,
result: {
resources: this.getResourceDefinitions(),
},
};
}
private async handleResourcesRead(request: MCPRequest & { id: string | number }): Promise {
const uri = request.params?.uri as string;
if (!uri) {
return {
jsonrpc: '2.0',
id: request.id,
error: { code: -32602, message: 'Missing required parameter: uri' },
};
}
try {
const content = await this.readResource(uri);
return {
jsonrpc: '2.0',
id: request.id,
result: {
contents: [content],
},
};
} catch (error) {
return {
jsonrpc: '2.0',
id: request.id,
error: {
code: -32602,
message: error instanceof Error ? error.message : String(error),
},
};
}
}
private async readResource(uri: string): Promise {
switch (uri) {
case 'browser://state': {
const state = await this.browser.getState();
return {
uri,
mimeType: 'application/json',
text: JSON.stringify(state, null, 2),
};
}
case 'browser://dom': {
const domState = await this.domService.extractState(
this.browser.currentPage,
this.browser.cdp!,
);
return {
uri,
mimeType: 'text/plain',
text: domState.tree,
};
}
case 'browser://screenshot': {
const screenshot = await this.browser.screenshot();
this.lastScreenshotBase64 = screenshot.base64;
return {
uri,
mimeType: 'image/png',
blob: screenshot.base64,
};
}
case 'browser://tabs': {
const state = await this.browser.getState();
return {
uri,
mimeType: 'application/json',
text: JSON.stringify(state.tabs, null, 2),
};
}
default:
throw new Error(`Unknown resource URI: ${uri}`);
}
}
private handleResourcesSubscribe(request: MCPRequest & { id: string | number }): MCPResponse {
const uri = request.params?.uri as string;
if (!uri) {
return {
jsonrpc: '2.0',
id: request.id,
error: { code: -32602, message: 'Missing required parameter: uri' },
};
}
const validUris = new Set(this.getResourceDefinitions().map((r) => r.uri));
if (!validUris.has(uri)) {
return {
jsonrpc: '2.0',
id: request.id,
error: { code: -32602, message: `Unknown resource URI: ${uri}` },
};
}
// The subscription is tracked; actual notification delivery happens
// via emitNotification which writes to all connected transports
if (!this.subscriptions.has(uri)) {
this.subscriptions.set(uri, new Set());
}
logger.debug(`Client subscribed to resource: ${uri}`);
return { jsonrpc: '2.0', id: request.id, result: {} };
}
private handleResourcesUnsubscribe(request: MCPRequest & { id: string | number }): MCPResponse {
const uri = request.params?.uri as string;
if (!uri) {
return {
jsonrpc: '2.0',
id: request.id,
error: { code: -32602, message: 'Missing required parameter: uri' },
};
}
this.subscriptions.delete(uri);
logger.debug(`Client unsubscribed from resource: ${uri}`);
return { jsonrpc: '2.0', id: request.id, result: {} };
}
// ── Notification emission ──
/** Emit a progress notification for an in-flight request. */
emitProgress(requestId: string | number, progress: number, message?: string): void {
const notification: MCPNotification = {
jsonrpc: '2.0',
method: 'notifications/progress',
params: {
progressToken: requestId,
progress,
total: 1,
...(message ? { message } : {}),
},
};
this.broadcastNotification(notification);
}
/** Notify subscribers that a resource has changed. */
private notifyResourceChanged(uri: string): void {
if (!this.subscriptions.has(uri)) return;
const notification: MCPNotification = {
jsonrpc: '2.0',
method: 'notifications/resources/updated',
params: { uri },
};
this.broadcastNotification(notification);
}
/** Send a notification to all connected transports (SSE clients + stdio). */
private broadcastNotification(notification: MCPNotification): void {
const serialized = JSON.stringify(notification);
// SSE clients
for (const client of this.sseClients) {
try {
client.write(`data: ${serialized}\n\n`);
} catch {
// Client may have disconnected; will be cleaned up
this.sseClients.delete(client);
}
}
}
// ── Stdio transport ──
async startStdio(): Promise {
const stdin = process.stdin;
const stdout = process.stdout;
stdin.setEncoding('utf-8');
let buffer = '';
stdin.on('data', async (data: string) => {
buffer += data;
const lines = buffer.split('\n');
buffer = lines.pop() ?? '';
for (const line of lines) {
if (!line.trim()) continue;
try {
const message = JSON.parse(line) as MCPRequest;
const response = await this.handleMessage(message);
if (response) {
stdout.write(`${JSON.stringify(response)}\n`);
}
} catch {
const errorResponse: MCPResponse = {
jsonrpc: '2.0',
id: 0,
error: { code: -32700, message: 'Parse error' },
};
stdout.write(`${JSON.stringify(errorResponse)}\n`);
}
}
});
stdin.on('end', () => {
process.exit(0);
});
}
// ── SSE transport ──
/**
* Start an HTTP server that exposes the MCP protocol over Server-Sent Events.
*
* Endpoints:
* - GET /sse -- SSE event stream for notifications and responses
* - POST /message -- Send JSON-RPC requests
* - GET /health -- Health check
*/
async startSSE(port?: number): Promise {
const http = await import('node:http');
const listenPort = port ?? this.ssePort;
this.httpServer = http.createServer(async (req: IncomingMessage, res: ServerResponse) => {
// CORS headers for browser clients
res.setHeader('Access-Control-Allow-Origin', '*');
res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
res.setHeader('Access-Control-Allow-Headers', 'Content-Type');
if (req.method === 'OPTIONS') {
res.writeHead(204);
res.end();
return;
}
const url = req.url ?? '/';
if (req.method === 'GET' && url === '/sse') {
this.handleSSEConnection(res);
return;
}
if (req.method === 'POST' && url === '/message') {
await this.handleSSEMessage(req, res);
return;
}
if (req.method === 'GET' && url === '/health') {
res.writeHead(200, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({
status: 'ok',
server: this.name,
version: this.version,
browserConnected: this.browser.isConnected,
}));
return;
}
res.writeHead(404, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({ error: 'Not found' }));
});
return new Promise((resolve) => {
this.httpServer!.listen(listenPort, () => {
logger.info(`MCP SSE server listening on port ${listenPort}`);
resolve();
});
});
}
private handleSSEConnection(res: ServerResponse): void {
res.writeHead(200, {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
Connection: 'keep-alive',
});
// Send endpoint info as the first event so the client knows where to POST
const endpointEvent = JSON.stringify({ endpoint: '/message' });
res.write(`event: endpoint\ndata: ${endpointEvent}\n\n`);
this.sseClients.add(res);
logger.debug(`SSE client connected (total: ${this.sseClients.size})`);
res.on('close', () => {
this.sseClients.delete(res);
logger.debug(`SSE client disconnected (total: ${this.sseClients.size})`);
});
}
private async handleSSEMessage(req: IncomingMessage, res: ServerResponse): Promise {
let body = '';
for await (const chunk of req) {
body += chunk;
}
try {
const message = JSON.parse(body) as MCPRequest;
const response = await this.handleMessage(message);
if (response) {
// Send response both as HTTP response and as SSE event
res.writeHead(200, { 'Content-Type': 'application/json' });
res.end(JSON.stringify(response));
// Also push to SSE stream for clients that expect it there
const serialized = JSON.stringify(response);
for (const client of this.sseClients) {
try {
client.write(`event: message\ndata: ${serialized}\n\n`);
} catch {
this.sseClients.delete(client);
}
}
} else {
// Notification -- no response body
res.writeHead(202);
res.end();
}
} catch {
res.writeHead(400, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({ jsonrpc: '2.0', id: 0, error: { code: -32700, message: 'Parse error' } }));
}
}
/** Stop the SSE HTTP server and disconnect all clients. */
async stopSSE(): Promise {
for (const client of this.sseClients) {
try {
client.end();
} catch {
// Ignore
}
}
this.sseClients.clear();
if (this.httpServer) {
return new Promise((resolve) => {
this.httpServer!.close(() => {
this.httpServer = null;
logger.info('MCP SSE server stopped');
resolve();
});
});
}
}
/** Stop all transports and clean up. */
async stop(): Promise {
await this.stopSSE();
this.subscriptions.clear();
}
}
================================================
FILE: packages/core/src/commands/catalog/catalog.ts
================================================
import { z, type ZodTypeAny } from 'zod';
import type { CatalogEntry, CatalogOptions } from './types.js';
import type { CommandResult, ExecutionContext, CustomCommandSpec } from '../types.js';
import { CommandFailedError } from '../../errors.js';
import { escapeRegExp } from '../../utils.js';
// ── Special parameter names ──
// These parameter names, when found in a handler's function signature,
// are automatically injected from the ExecutionContext instead of from
// the action's validated params.
const SPECIAL_PARAMS = new Set([
'browserSession',
'cdpSession',
'page',
'domService',
'extractionLlm',
'fileSystem',
'maskedValues',
]);
/**
* Parse the parameter names from a function's source text.
* Handles arrow functions, regular functions, destructured params, etc.
*/
function inspectHandlerParams(handler: Function): string[] {
const source = handler.toString();
// Match parameter list: function(a, b) / (a, b) => / async (a, b) =>
// Also handles single param without parens: a =>
const arrowMatch = source.match(/^(?:async\s+)?\(([^)]*)\)/);
const funcMatch = source.match(/^(?:async\s+)?function\s*\w*\s*\(([^)]*)\)/);
const singleParamArrow = source.match(/^(?:async\s+)?(\w+)\s*=>/);
let paramString: string | undefined;
if (arrowMatch) {
paramString = arrowMatch[1];
} else if (funcMatch) {
paramString = funcMatch[1];
} else if (singleParamArrow) {
return [singleParamArrow[1]];
}
if (!paramString || !paramString.trim()) {
return [];
}
// Split on commas, handling nested braces/brackets for destructuring
const params: string[] = [];
let depth = 0;
let current = '';
for (const char of paramString) {
if (char === '{' || char === '[' || char === '(') {
depth++;
current += char;
} else if (char === '}' || char === ']' || char === ')') {
depth--;
current += char;
} else if (char === ',' && depth === 0) {
params.push(current.trim());
current = '';
} else {
current += char;
}
}
if (current.trim()) {
params.push(current.trim());
}
// Clean up: remove type annotations, defaults, destructuring
return params.map((p) => {
// Remove default values: param = defaultVal
const withoutDefault = p.split('=')[0].trim();
// Remove type annotations: param: Type
const withoutType = withoutDefault.split(':')[0].trim();
// If it's a destructured param like { a, b }, keep the braces stripped name
// For our purposes we only care about top-level named params
return withoutType.replace(/^[{[(]|[})\]]$/g, '').trim();
});
}
/**
* Detect which special parameters a handler function expects,
* based on its parameter names (beyond the standard params + context args).
*/
function detectSpecialParams(handler: Function): Set {
const paramNames = inspectHandlerParams(handler);
const detected = new Set();
for (const name of paramNames) {
if (SPECIAL_PARAMS.has(name)) {
detected.add(name);
}
}
return detected;
}
/**
* Resolve a special parameter value from the ExecutionContext.
*/
function resolveSpecialParam(
name: string,
context: ExecutionContext,
): unknown {
switch (name) {
case 'browserSession':
return context.browserSession;
case 'cdpSession':
return context.cdpSession;
case 'page':
return context.page;
case 'domService':
return context.domService;
case 'extractionLlm':
return context.extractionLlm;
case 'fileSystem':
return context.fileSystem;
case 'maskedValues':
return context.maskedValues;
default:
return undefined;
}
}
export class CommandCatalog {
private actions = new Map();
private specialParamsCache = new Map>();
private options: CatalogOptions;
constructor(options?: CatalogOptions) {
this.options = options ?? {};
}
register(action: CatalogEntry): void {
if (this.options.excludeActions?.includes(action.name)) return;
if (
this.options.includeActions &&
this.options.includeActions.length > 0 &&
!this.options.includeActions.includes(action.name)
) {
return;
}
this.actions.set(action.name, action);
// Pre-compute which special parameters the handler expects
const specialParams = detectSpecialParams(action.handler);
if (specialParams.size > 0) {
this.specialParamsCache.set(action.name, specialParams);
}
}
registerCustom(definition: CustomCommandSpec): void {
this.register({
name: definition.name,
description: definition.description,
schema: definition.schema,
handler: definition.handler,
terminatesSequence: definition.terminatesSequence,
});
}
unregister(name: string): void {
this.actions.delete(name);
this.specialParamsCache.delete(name);
}
get(name: string): CatalogEntry | undefined {
return this.actions.get(name);
}
has(name: string): boolean {
return this.actions.has(name);
}
getAll(): CatalogEntry[] {
return [...this.actions.values()];
}
getNames(): string[] {
return [...this.actions.keys()];
}
async execute(
name: string,
params: Record,
context: ExecutionContext,
): Promise {
const action = this.actions.get(name);
if (!action) {
throw new CommandFailedError(name, `Action "${name}" is not registered`);
}
try {
// Validate params against schema
const validated = action.schema.parse(params);
// Inject special parameters from context into the validated params
const enriched = this.injectSpecialParams(name, validated, context);
return await action.handler(enriched, context);
} catch (error) {
if (error instanceof CommandFailedError) throw error;
const message = error instanceof Error ? error.message : String(error);
throw new CommandFailedError(name, message, {
cause: error instanceof Error ? error : undefined,
});
}
}
/**
* Return the set of special parameter names detected for a given action.
* Returns an empty set if no special params were detected.
*/
getSpecialParams(name: string): Set {
return this.specialParamsCache.get(name) ?? new Set();
}
/**
* Inject special parameters from ExecutionContext into the params object.
* Special params are resolved from context and merged into the params
* so the handler can destructure them directly from its first argument.
*/
private injectSpecialParams(
actionName: string,
params: Record,
context: ExecutionContext,
): Record {
const specialParams = this.specialParamsCache.get(actionName);
if (!specialParams || specialParams.size === 0) {
return params;
}
const enriched = { ...params };
for (const paramName of specialParams) {
// Only inject if not already present in the validated params
if (!(paramName in enriched)) {
const value = resolveSpecialParam(paramName, context);
if (value !== undefined) {
enriched[paramName] = value;
}
}
}
return enriched;
}
buildDynamicSchema(): z.ZodType {
const actionSchemas = this.getAll().map((action) => {
if (action.schema instanceof z.ZodObject) {
return action.schema.extend({
action: z.literal(action.name),
});
}
return action.schema;
});
if (actionSchemas.length === 0) {
return z.object({ action: z.string() });
}
if (actionSchemas.length === 1) {
return actionSchemas[0];
}
return z.union(actionSchemas as [ZodTypeAny, ZodTypeAny, ...ZodTypeAny[]]);
}
get size(): number {
return this.actions.size;
}
// ── Prompt description ──
/**
* Build a formatted multi-line description of all available actions.
* Optionally filter by page URL domain so only relevant actions appear.
*/
getPromptDescription(pageUrl?: string): string {
let actions = this.getAll();
// If a URL is provided, filter out actions whose domainFilter does not match
if (pageUrl) {
const domain = extractDomain(pageUrl);
if (domain) {
actions = actions.filter((a) => {
// Actions without a domainFilter are always shown
if (!a.domainFilter || a.domainFilter.length === 0) return true;
return a.domainFilter.some(
(pattern) =>
domain === pattern ||
domain.endsWith(`.${pattern}`),
);
});
}
}
const lines: string[] = [];
for (const action of actions) {
const termFlag = action.terminatesSequence ? ' [terminates]' : '';
lines.push(`- ${action.name}: ${action.description}${termFlag}`);
// Describe the schema parameters
if (action.schema instanceof z.ZodObject) {
const shape = action.schema.shape as Record;
for (const [key, zodType] of Object.entries(shape)) {
if (key === 'action') continue;
const desc = zodType.description ?? '';
const isOptional = zodType.isOptional?.() ?? false;
const optLabel = isOptional ? ' (optional)' : '';
lines.push(` ${key}${optLabel}: ${desc}`);
}
}
}
return lines.join('\n');
}
// ── Domain-based filtering ──
/**
* Return actions that have a domainFilter matching the given domain,
* plus all actions that have no domainFilter (universal actions).
*/
getActionsForDomain(domain: string): CatalogEntry[] {
const normalized = domain.replace(/^www\./, '').toLowerCase();
return this.getAll().filter((action) => {
if (!action.domainFilter || action.domainFilter.length === 0) return true;
return action.domainFilter.some((pattern) => {
const p = pattern.toLowerCase();
return normalized === p || normalized.endsWith(`.${p}`);
});
});
}
// ── Sensitive data replacement ──
/**
* Replace sensitive data values in text with `` placeholders.
* Keys are sorted longest-value-first to avoid partial replacements.
*/
replaceSensitiveData(
text: string,
maskedValues: Record,
): string {
if (!text) return text;
// Sort entries by value length descending so longer values are replaced first
const entries = Object.entries(maskedValues).sort(
(a, b) => b[1].length - a[1].length,
);
let result = text;
for (const [key, value] of entries) {
if (!value) continue;
const pattern = new RegExp(escapeRegExp(value), 'g');
result = result.replace(pattern, `<${key}>`);
}
return result;
}
// ── Actions that terminate the sequence ──
/**
* Return the names of all actions marked as terminatesSequence.
*/
getTerminatingActions(): string[] {
return this.getAll()
.filter((a) => a.terminatesSequence)
.map((a) => a.name);
}
/**
* Check whether a given action name is marked as terminatesSequence.
*/
isTerminating(name: string): boolean {
const action = this.actions.get(name);
return action?.terminatesSequence === true;
}
}
// ── Helpers ──
function extractDomain(url: string): string | null {
try {
return new URL(url).hostname.replace(/^www\./, '').toLowerCase();
} catch {
return null;
}
}
================================================
FILE: packages/core/src/commands/catalog/types.ts
================================================
import type { z } from 'zod';
import type { CommandResult, ExecutionContext } from '../types.js';
export interface CatalogEntry {
name: string;
description: string;
schema: z.ZodTypeAny;
handler: (params: Record, context: ExecutionContext) => Promise;
terminatesSequence?: boolean;
domainFilter?: string[];
}
export interface CatalogOptions {
excludeActions?: string[];
includeActions?: string[];
}
================================================
FILE: packages/core/src/commands/catalog.test.ts
================================================
import { test, expect, describe, beforeEach, mock } from 'bun:test';
import { z } from 'zod';
import { CommandCatalog } from './catalog/catalog.js';
import { CommandFailedError } from '../errors.js';
import type { ExecutionContext, CommandResult } from './types.js';
// ── Helpers ──
function makeHandler(
result: CommandResult = { success: true },
): (params: Record, ctx: ExecutionContext) => Promise {
return mock(() => Promise.resolve(result));
}
function makeContext(overrides: Partial = {}): ExecutionContext {
return {
page: {} as any,
cdpSession: {} as any,
domService: {} as any,
browserSession: {} as any,
...overrides,
};
}
const testSchema = z.object({
value: z.string(),
count: z.number().optional(),
});
// ── Tests ──
describe('CommandCatalog', () => {
let registry: CommandCatalog
beforeEach(() => {
registry = new CommandCatalog();
});
describe('register and unregister', () => {
test('registers an action', () => {
registry.register({
name: 'test_action',
description: 'A test action',
schema: testSchema,
handler: makeHandler(),
});
expect(registry.has('test_action')).toBe(true);
expect(registry.size).toBe(1);
});
test('unregisters an action', () => {
registry.register({
name: 'test_action',
description: 'A test action',
schema: testSchema,
handler: makeHandler(),
});
registry.unregister('test_action');
expect(registry.has('test_action')).toBe(false);
expect(registry.size).toBe(0);
});
test('get returns registered action', () => {
registry.register({
name: 'my_action',
description: 'Mine',
schema: testSchema,
handler: makeHandler(),
});
const action = registry.get('my_action');
expect(action).toBeDefined();
expect(action!.name).toBe('my_action');
expect(action!.description).toBe('Mine');
});
test('get returns undefined for unregistered action', () => {
expect(registry.get('nonexistent')).toBeUndefined();
});
test('respects excludeActions option', () => {
const filtered = new CommandCatalog({ excludeActions: ['blocked'] });
filtered.register({
name: 'blocked',
description: 'Should not register',
schema: testSchema,
handler: makeHandler(),
});
filtered.register({
name: 'allowed',
description: 'Should register',
schema: testSchema,
handler: makeHandler(),
});
expect(filtered.has('blocked')).toBe(false);
expect(filtered.has('allowed')).toBe(true);
});
test('respects includeActions option', () => {
const filtered = new CommandCatalog({ includeActions: ['only_this'] });
filtered.register({
name: 'only_this',
description: 'Should register',
schema: testSchema,
handler: makeHandler(),
});
filtered.register({
name: 'other',
description: 'Should not register',
schema: testSchema,
handler: makeHandler(),
});
expect(filtered.has('only_this')).toBe(true);
expect(filtered.has('other')).toBe(false);
});
});
describe('getAll and getNames', () => {
test('returns all registered actions', () => {
registry.register({
name: 'alpha',
description: 'Alpha',
schema: testSchema,
handler: makeHandler(),
});
registry.register({
name: 'beta',
description: 'Beta',
schema: testSchema,
handler: makeHandler(),
});
const all = registry.getAll();
expect(all).toHaveLength(2);
const names = registry.getNames();
expect(names).toContain('alpha');
expect(names).toContain('beta');
});
});
describe('execute', () => {
test('executes registered action with valid params', async () => {
const handler = makeHandler({ success: true, extractedContent: 'result' });
registry.register({
name: 'exec_test',
description: 'Test execute',
schema: testSchema,
handler,
});
const ctx = makeContext();
const result = await registry.execute('exec_test', { value: 'hello' }, ctx);
expect(result.success).toBe(true);
expect(result.extractedContent).toBe('result');
expect(handler).toHaveBeenCalledTimes(1);
});
test('throws CommandFailedError for unregistered action', async () => {
const ctx = makeContext();
await expect(
registry.execute('nonexistent', {}, ctx),
).rejects.toThrow(CommandFailedError);
});
test('throws CommandFailedError when schema validation fails', async () => {
registry.register({
name: 'strict',
description: 'Strict schema',
schema: z.object({ required: z.string() }),
handler: makeHandler(),
});
const ctx = makeContext();
await expect(
registry.execute('strict', { wrong: 'param' }, ctx),
).rejects.toThrow(CommandFailedError);
});
test('wraps handler errors in CommandFailedError', async () => {
registry.register({
name: 'failing',
description: 'Fails',
schema: testSchema,
handler: async () => {
throw new Error('Internal failure');
},
});
const ctx = makeContext();
await expect(
registry.execute('failing', { value: 'x' }, ctx),
).rejects.toThrow(CommandFailedError);
});
test('re-throws CommandFailedError without wrapping', async () => {
const original = new CommandFailedError('tool', 'original error');
registry.register({
name: 'rethrow',
description: 'Rethrow',
schema: testSchema,
handler: async () => {
throw original;
},
});
const ctx = makeContext();
try {
await registry.execute('rethrow', { value: 'x' }, ctx);
expect.unreachable('Should have thrown');
} catch (error) {
expect(error).toBe(original);
}
});
});
describe('domain-based filtering', () => {
test('returns universal actions for any domain', () => {
registry.register({
name: 'universal',
description: 'No filter',
schema: testSchema,
handler: makeHandler(),
});
const actions = registry.getActionsForDomain('example.com');
expect(actions.map((a) => a.name)).toContain('universal');
});
test('returns domain-specific actions matching the domain', () => {
registry.register({
name: 'github_only',
description: 'GitHub',
schema: testSchema,
handler: makeHandler(),
domainFilter: ['github.com'],
});
const githubActions = registry.getActionsForDomain('github.com');
expect(githubActions.map((a) => a.name)).toContain('github_only');
const otherActions = registry.getActionsForDomain('example.com');
expect(otherActions.map((a) => a.name)).not.toContain('github_only');
});
test('matches subdomains', () => {
registry.register({
name: 'google_all',
description: 'Google subdomains',
schema: testSchema,
handler: makeHandler(),
domainFilter: ['google.com'],
});
const actions = registry.getActionsForDomain('mail.google.com');
expect(actions.map((a) => a.name)).toContain('google_all');
});
test('strips www prefix from domain', () => {
registry.register({
name: 'example',
description: 'Example',
schema: testSchema,
handler: makeHandler(),
domainFilter: ['example.com'],
});
const actions = registry.getActionsForDomain('www.example.com');
expect(actions.map((a) => a.name)).toContain('example');
});
});
describe('terminatesSequence flag', () => {
test('isTerminating returns true for terminating actions', () => {
registry.register({
name: 'finish',
description: 'Finish',
schema: testSchema,
handler: makeHandler(),
terminatesSequence: true,
});
expect(registry.isTerminating('finish')).toBe(true);
});
test('isTerminating returns false for non-terminating actions', () => {
registry.register({
name: 'continue',
description: 'Continue',
schema: testSchema,
handler: makeHandler(),
});
expect(registry.isTerminating('continue')).toBe(false);
});
test('getTerminatingActions returns all terminating action names', () => {
registry.register({
name: 'finish',
description: 'Done',
schema: testSchema,
handler: makeHandler(),
terminatesSequence: true,
});
registry.register({
name: 'abort',
description: 'Abort',
schema: testSchema,
handler: makeHandler(),
terminatesSequence: true,
});
registry.register({
name: 'tap',
description: 'Click',
schema: testSchema,
handler: makeHandler(),
});
const terminating = registry.getTerminatingActions();
expect(terminating).toContain('finish');
expect(terminating).toContain('abort');
expect(terminating).not.toContain('tap');
});
});
describe('getPromptDescription', () => {
test('returns formatted description of all actions', () => {
registry.register({
name: 'tap',
description: 'Click on an element',
schema: z.object({
index: z.number().describe('Element index'),
}),
handler: makeHandler(),
});
registry.register({
name: 'finish',
description: 'Mark task as done',
schema: z.object({
text: z.string().describe('Result text'),
}),
handler: makeHandler(),
terminatesSequence: true,
});
const desc = registry.getPromptDescription();
expect(desc).toContain('- tap: Click on an element');
expect(desc).toContain('index');
expect(desc).toContain('Element index');
expect(desc).toContain('- finish: Mark task as done [terminates]');
});
test('filters by page URL domain', () => {
registry.register({
name: 'universal',
description: 'Universal action',
schema: testSchema,
handler: makeHandler(),
});
registry.register({
name: 'github_only',
description: 'GitHub action',
schema: testSchema,
handler: makeHandler(),
domainFilter: ['github.com'],
});
const githubDesc = registry.getPromptDescription('https://github.com/repo');
expect(githubDesc).toContain('universal');
expect(githubDesc).toContain('github_only');
const otherDesc = registry.getPromptDescription('https://example.com');
expect(otherDesc).toContain('universal');
expect(otherDesc).not.toContain('github_only');
});
});
describe('sensitive data replacement', () => {
test('replaces sensitive values with placeholders', () => {
const result = registry.replaceSensitiveData(
'The password is hunter2 and the key is abc123',
{ PASSWORD: 'hunter2', API_KEY: 'abc123' },
);
expect(result).toBe('The password is and the key is ');
});
test('replaces longer values first to avoid partial replacements', () => {
const result = registry.replaceSensitiveData(
'Token: my-long-secret-token and key: secret',
{ TOKEN: 'my-long-secret-token', KEY: 'secret' },
);
// "my-long-secret-token" should be replaced first, not the inner "secret"
expect(result).toBe('Token: and key: ');
});
test('handles empty text', () => {
const result = registry.replaceSensitiveData('', { KEY: 'value' });
expect(result).toBe('');
});
test('handles empty sensitive data', () => {
const result = registry.replaceSensitiveData('some text', {});
expect(result).toBe('some text');
});
test('handles special regex characters in values', () => {
const result = registry.replaceSensitiveData(
'Found: $100.00 (USD)',
{ PRICE: '$100.00' },
);
expect(result).toBe('Found: (USD)');
});
});
describe('parameter inspection and injection', () => {
test('detects special parameters from handler function', () => {
registry.register({
name: 'with_page',
description: 'Uses page',
schema: z.object({}),
handler: async (params, ctx) => {
return { success: true };
},
});
// The handler doesn't use named special params, so set should be empty
const special = registry.getSpecialParams('with_page');
expect(special.size).toBe(0);
});
test('returns empty set for unregistered action', () => {
const special = registry.getSpecialParams('nonexistent');
expect(special.size).toBe(0);
});
});
describe('buildDynamicSchema', () => {
test('builds a union schema from registered actions', () => {
registry.register({
name: 'tap',
description: 'Click',
schema: z.object({ index: z.number() }),
handler: makeHandler(),
});
registry.register({
name: 'finish',
description: 'Done',
schema: z.object({ text: z.string() }),
handler: makeHandler(),
});
const schema = registry.buildDynamicSchema();
expect(schema).toBeDefined();
// Should parse a click action
const clickResult = schema.safeParse({ action: 'tap', index: 5 });
expect(clickResult.success).toBe(true);
// Should parse a done action
const doneResult = schema.safeParse({ action: 'finish', text: 'finished' });
expect(doneResult.success).toBe(true);
});
test('returns simple object schema when no actions registered', () => {
const schema = registry.buildDynamicSchema();
const result = schema.safeParse({ action: 'anything' });
expect(result.success).toBe(true);
});
test('returns single schema when only one action registered', () => {
registry.register({
name: 'only',
description: 'Only action',
schema: z.object({ x: z.number() }),
handler: makeHandler(),
});
const schema = registry.buildDynamicSchema();
const result = schema.safeParse({ action: 'only', x: 42 });
expect(result.success).toBe(true);
});
});
describe('registerCustom', () => {
test('registers a custom action definition', () => {
registry.registerCustom({
name: 'custom_action',
description: 'A custom action',
schema: z.object({ query: z.string() }),
handler: async () => ({ success: true }),
});
expect(registry.has('custom_action')).toBe(true);
});
test('registers with terminatesSequence flag', () => {
registry.registerCustom({
name: 'custom_done',
description: 'Custom done',
schema: z.object({}),
handler: async () => ({ success: true, isDone: true }),
terminatesSequence: true,
});
expect(registry.isTerminating('custom_done')).toBe(true);
});
});
});
================================================
FILE: packages/core/src/commands/executor.test.ts
================================================
import { test, expect, describe, beforeEach, mock } from 'bun:test';
import { CommandExecutor } from './executor.js';
import type { Command, ExecutionContext, CommandResult } from './types.js';
import { UrlBlockedError, CommandFailedError } from '../errors.js';
// ── Mock factories ──
function makeMockPageAnalyzer() {
return {
clickElementByIndex: mock(() => Promise.resolve()),
inputTextByIndex: mock(() => Promise.resolve()),
getElementSelector: mock(() => Promise.resolve('#selector')),
extractState: mock(() =>
Promise.resolve({
tree: '',
selectorMap: {},
elementCount: 0,
interactiveElementCount: 0,
scrollPosition: { x: 0, y: 0 },
viewportSize: { width: 1280, height: 800 },
documentSize: { width: 1280, height: 2000 },
pixelsAbove: 0,
pixelsBelow: 0,
}),
),
} as any;
}
function makeMockViewport() {
return {
navigate: mock(() => Promise.resolve()),
waitForPageReady: mock(() => Promise.resolve()),
switchTab: mock(() => Promise.resolve()),
newTab: mock(() => Promise.resolve()),
closeTab: mock(() => Promise.resolve()),
screenshot: mock(() =>
Promise.resolve({ base64: 'abc', width: 1280, height: 800 }),
),
currentPage: makeMockPage(),
cdp: makeMockCdpSession(),
isConnected: true,
} as any;
}
function makeMockPage() {
return {
goBack: mock(() => Promise.resolve()),
evaluate: mock(() => Promise.resolve([])),
mouse: {
click: mock(() => Promise.resolve()),
},
keyboard: {
press: mock(() => Promise.resolve()),
},
fill: mock(() => Promise.resolve()),
click: mock(() => Promise.resolve()),
selectOption: mock(() => Promise.resolve()),
$: mock(() => Promise.resolve({ setInputFiles: mock(() => Promise.resolve()) })),
} as any;
}
function makeMockCdpSession() {
return {
send: mock(() => Promise.resolve({})),
} as any;
}
function makeContext(overrides: Partial = {}): ExecutionContext {
const browser = makeMockViewport();
return {
page: browser.currentPage,
cdpSession: browser.cdp,
domService: makeMockPageAnalyzer(),
browserSession: browser,
...overrides,
};
}
/**
* Helper to create action objects. Zod schemas with .default() produce
* required fields in the inferred output type, but at runtime the defaults
* are applied during validation. We cast through `any` to allow omitting
* fields that have Zod defaults.
*/
function action(a: Record): Command {
return a as Command;
}
// ── Tests ──
describe('CommandExecutor', () => {
let tools: CommandExecutor;
beforeEach(() => {
tools = new CommandExecutor();
});
describe('constructor and registration', () => {
test('registers all built-in actions', () => {
const names = tools.registry.getNames();
expect(names).toContain('tap');
expect(names).toContain('type_text');
expect(names).toContain('navigate');
expect(names).toContain('back');
expect(names).toContain('scroll');
expect(names).toContain('press_keys');
expect(names).toContain('extract');
expect(names).toContain('finish');
expect(names).toContain('focus_tab');
expect(names).toContain('new_tab');
expect(names).toContain('close_tab');
expect(names).toContain('web_search');
expect(names).toContain('capture');
expect(names).toContain('read_page');
expect(names).toContain('wait');
expect(names).toContain('scroll_to');
expect(names).toContain('find');
expect(names).toContain('search');
expect(names).toContain('extract_structured');
});
test('has default commandsPerStep of 10', () => {
expect(tools.commandsPerStep).toBe(10);
});
test('respects custom commandsPerStep', () => {
const custom = new CommandExecutor({ commandsPerStep: 5 });
expect(custom.commandsPerStep).toBe(5);
});
});
describe('click action', () => {
test('delegates to domService.clickElementByIndex', async () => {
const ctx = makeContext();
const result = await tools.executeAction(
action({ action: 'tap', index: 0 }),
ctx,
);
expect(result.success).toBe(true);
expect(ctx.domService.clickElementByIndex).toHaveBeenCalledWith(
ctx.page,
ctx.cdpSession,
0,
);
});
test('supports multiple clicks via clickCount', async () => {
const ctx = makeContext();
await tools.executeAction(
action({ action: 'tap', index: 0, clickCount: 3 }),
ctx,
);
// First call + 2 additional
expect(ctx.domService.clickElementByIndex).toHaveBeenCalledTimes(3);
});
test('uses coordinate-based clicking when enabled', async () => {
tools.setCoordinateClicking(true);
const ctx = makeContext();
const result = await tools.executeAction(
action({ action: 'tap', index: 0, coordinateX: 100, coordinateY: 200 }),
ctx,
);
expect(result.success).toBe(true);
expect(ctx.page.mouse.click).toHaveBeenCalledWith(100, 200);
// domService should NOT have been called
expect(ctx.domService.clickElementByIndex).not.toHaveBeenCalled();
});
test('coordinate click supports clickCount', async () => {
tools.setCoordinateClicking(true);
const ctx = makeContext();
await tools.executeAction(
action({ action: 'tap', index: 0, coordinateX: 50, coordinateY: 50, clickCount: 2 }),
ctx,
);
expect(ctx.page.mouse.click).toHaveBeenCalledTimes(2);
});
test('falls back to index-based click when coordinate clicking disabled', async () => {
// Default: coordinate clicking is disabled
const ctx = makeContext();
await tools.executeAction(
action({ action: 'tap', index: 0, coordinateX: 100, coordinateY: 200 }),
ctx,
);
// Should use domService, not coordinates
expect(ctx.domService.clickElementByIndex).toHaveBeenCalled();
});
});
describe('navigate action', () => {
test('navigates to valid URL', async () => {
const ctx = makeContext();
const result = await tools.executeAction(
action({ action: 'navigate', url: 'https://example.com' }),
ctx,
);
expect(result.success).toBe(true);
expect(ctx.browserSession.navigate).toHaveBeenCalledWith('https://example.com');
});
test('throws CommandFailedError wrapping UrlBlockedError for blocked URL', async () => {
const restricted = new CommandExecutor({ blockedUrls: ['evil.com'] });
const ctx = makeContext();
await expect(
restricted.executeAction(
action({ action: 'navigate', url: 'https://evil.com/page' }),
ctx,
),
).rejects.toThrow(CommandFailedError);
});
test('throws when URL not in allowlist', async () => {
const restricted = new CommandExecutor({ allowedUrls: ['safe.com'] });
const ctx = makeContext();
await expect(
restricted.executeAction(
action({ action: 'navigate', url: 'https://other.com' }),
ctx,
),
).rejects.toThrow(CommandFailedError);
});
});
describe('input_text action', () => {
test('inputs text into element', async () => {
const ctx = makeContext();
const result = await tools.executeAction(
action({ action: 'type_text', index: 3, text: 'hello' }),
ctx,
);
expect(result.success).toBe(true);
expect(ctx.domService.inputTextByIndex).toHaveBeenCalledWith(
ctx.page,
ctx.cdpSession,
3,
'hello',
true, // clearFirst defaults to true
);
});
test('passes clearFirst=false when specified', async () => {
const ctx = makeContext();
await tools.executeAction(
action({ action: 'type_text', index: 0, text: 'append', clearFirst: false }),
ctx,
);
expect(ctx.domService.inputTextByIndex).toHaveBeenCalledWith(
ctx.page,
ctx.cdpSession,
0,
'append',
false,
);
});
});
describe('scroll action', () => {
test('scrolls the page when no index provided', async () => {
const ctx = makeContext();
const result = await tools.executeAction(
action({ action: 'scroll', direction: 'down' }),
ctx,
);
expect(result.success).toBe(true);
});
test('scrolls an element when index is provided', async () => {
const ctx = makeContext();
const result = await tools.executeAction(
action({ action: 'scroll', direction: 'up', index: 5 }),
ctx,
);
expect(result.success).toBe(true);
expect(ctx.domService.getElementSelector).toHaveBeenCalledWith(5);
});
});
describe('search_google action', () => {
test('navigates to Google search URL', async () => {
const ctx = makeContext();
const result = await tools.executeAction(
action({ action: 'web_search', query: 'bun test runner' }),
ctx,
);
expect(result.success).toBe(true);
expect(ctx.browserSession.navigate).toHaveBeenCalled();
const navigateArg = (ctx.browserSession.navigate as any).mock.calls[0][0] as string;
expect(navigateArg).toContain('google.com/search');
expect(navigateArg).toContain('bun%20test%20runner');
});
});
describe('done action', () => {
test('returns isDone=true with text', async () => {
const ctx = makeContext();
const result = await tools.executeAction(
action({ action: 'finish', text: 'Task completed successfully' }),
ctx,
);
expect(result.success).toBe(true);
expect(result.isDone).toBe(true);
expect(result.extractedContent).toBe('Task completed successfully');
expect(result.includeInMemory).toBe(true);
});
test('respects explicit success=false', async () => {
const ctx = makeContext();
const result = await tools.executeAction(
action({ action: 'finish', text: 'Could not complete', success: false }),
ctx,
);
expect(result.success).toBe(false);
expect(result.isDone).toBe(true);
});
});
describe('go_back action', () => {
test('calls page.goBack and waits for ready', async () => {
const ctx = makeContext();
const result = await tools.executeAction(
action({ action: 'back' }),
ctx,
);
expect(result.success).toBe(true);
expect(ctx.page.goBack).toHaveBeenCalled();
expect(ctx.browserSession.waitForPageReady).toHaveBeenCalled();
});
});
describe('send_keys action', () => {
test('presses keyboard keys', async () => {
const ctx = makeContext();
const result = await tools.executeAction(
action({ action: 'press_keys', keys: 'Enter' }),
ctx,
);
expect(result.success).toBe(true);
expect(ctx.page.keyboard.press).toHaveBeenCalledWith('Enter');
});
});
describe('find_elements action', () => {
test('returns found elements description', async () => {
const page = makeMockPage();
page.evaluate = mock(() =>
Promise.resolve([
{ tag: 'button', text: 'Submit', attributes: { id: 'btn-submit' } },
{ tag: 'a', text: 'Home', attributes: {} },
]),
);
const ctx = makeContext({ page });
const result = await tools.executeAction(
action({ action: 'find', query: 'submit' }),
ctx,
);
expect(result.success).toBe(true);
expect(result.extractedContent).toContain('Found 2 element(s)');
expect(result.extractedContent).toContain('button');
expect(result.extractedContent).toContain('Submit');
});
test('returns message when no elements found', async () => {
const page = makeMockPage();
page.evaluate = mock(() => Promise.resolve([]));
const ctx = makeContext({ page });
const result = await tools.executeAction(
action({ action: 'find', query: 'nonexistent' }),
ctx,
);
expect(result.success).toBe(true);
expect(result.extractedContent).toContain('No elements found');
});
});
describe('extract_content action (fallback, no LLM)', () => {
test('returns error/fallback when no extraction service', async () => {
// Tools without model won't have an extraction service
// The handler falls back to extractMarkdown which we mock via page.evaluate
const ctx = makeContext();
// extractMarkdown eventually calls page.evaluate
// For this test, just verify no crash. The actual extractMarkdown module
// import might require more setup, so we test the branch
try {
await tools.executeAction(
action({ action: 'extract', goal: 'get all links' }),
ctx,
);
} catch {
// Expected - extractMarkdown import/evaluation may fail in test env
}
});
});
describe('search_page action (multi-engine)', () => {
test('navigates to DuckDuckGo when specified', async () => {
const ctx = makeContext();
const result = await tools.executeAction(
action({ action: 'search', query: 'hello', engine: 'duckduckgo' }),
ctx,
);
expect(result.success).toBe(true);
const url = (ctx.browserSession.navigate as any).mock.calls[0][0] as string;
expect(url).toContain('duckduckgo.com');
});
test('navigates to Bing when specified', async () => {
const ctx = makeContext();
const result = await tools.executeAction(
action({ action: 'search', query: 'hello', engine: 'bing' }),
ctx,
);
expect(result.success).toBe(true);
const url = (ctx.browserSession.navigate as any).mock.calls[0][0] as string;
expect(url).toContain('bing.com/search');
});
test('defaults to Google', async () => {
const ctx = makeContext();
await tools.executeAction(
action({ action: 'search', query: 'hello' }),
ctx,
);
const url = (ctx.browserSession.navigate as any).mock.calls[0][0] as string;
expect(url).toContain('google.com/search');
});
});
describe('sensitive data masking', () => {
test('masks sensitive data in action results', async () => {
const ctx = makeContext({
maskedValues: {
PASSWORD: 'secret123',
API_KEY: 'sk-abc',
},
});
// Execute done action with text containing sensitive data
const result = await tools.executeActions(
[action({ action: 'finish', text: 'Found password: secret123 and key: sk-abc' })],
ctx,
);
expect(result[0].success).toBe(true);
expect(result[0].extractedContent).toContain('');
expect(result[0].extractedContent).toContain('');
expect(result[0].extractedContent).not.toContain('secret123');
expect(result[0].extractedContent).not.toContain('sk-abc');
});
test('does not mask when no sensitive data configured', async () => {
const ctx = makeContext(); // no maskedValues
const result = await tools.executeActions(
[action({ action: 'finish', text: 'Plain text with no secrets' })],
ctx,
);
expect(result[0].extractedContent).toBe('Plain text with no secrets');
});
});
describe('action sequence execution', () => {
test('executes multiple actions in sequence', async () => {
const ctx = makeContext();
const results = await tools.executeActions(
[
action({ action: 'tap', index: 0 }),
action({ action: 'tap', index: 1 }),
],
ctx,
);
expect(results).toHaveLength(2);
expect(results[0].success).toBe(true);
expect(results[1].success).toBe(true);
});
test('stops at done action', async () => {
const ctx = makeContext();
const results = await tools.executeActions(
[
action({ action: 'tap', index: 0 }),
action({ action: 'finish', text: 'Finished' }),
action({ action: 'tap', index: 1 }), // should not execute
],
ctx,
);
expect(results).toHaveLength(2);
expect(results[1].isDone).toBe(true);
});
test('respects commandsPerStep limit', async () => {
const limited = new CommandExecutor({ commandsPerStep: 2 });
const ctx = makeContext();
const results = await limited.executeActions(
[
action({ action: 'tap', index: 0 }),
action({ action: 'tap', index: 1 }),
action({ action: 'tap', index: 2 }), // should not execute (limit=2)
],
ctx,
);
expect(results).toHaveLength(2);
});
test('handles errors gracefully in sequence', async () => {
const ctx = makeContext();
ctx.domService.clickElementByIndex = mock(() =>
Promise.reject(new Error('Element is not visible')),
);
const results = await tools.executeActions(
[action({ action: 'tap', index: 0 })],
ctx,
);
expect(results).toHaveLength(1);
expect(results[0].success).toBe(false);
expect(results[0].error).toBeDefined();
expect(results[0].error).toContain('not visible');
});
test('stops sequence on non-retryable error', async () => {
const ctx = makeContext();
ctx.domService.clickElementByIndex = mock(() =>
Promise.reject(new Error('browser has been closed')),
);
const results = await tools.executeActions(
[
action({ action: 'tap', index: 0 }),
action({ action: 'tap', index: 1 }), // should not run
],
ctx,
);
expect(results).toHaveLength(1);
expect(results[0].success).toBe(false);
});
test('continues after retryable error', async () => {
const ctx = makeContext();
let callCount = 0;
ctx.domService.clickElementByIndex = mock(() => {
callCount++;
if (callCount === 1) {
return Promise.reject(new Error('Element is not visible'));
}
return Promise.resolve();
});
const results = await tools.executeActions(
[
action({ action: 'tap', index: 0 }),
action({ action: 'tap', index: 1 }),
],
ctx,
);
expect(results).toHaveLength(2);
expect(results[0].success).toBe(false);
expect(results[1].success).toBe(true);
});
test('masks sensitive data in error messages', async () => {
const ctx = makeContext({
maskedValues: { TOKEN: 'my-secret-token' },
});
ctx.domService.clickElementByIndex = mock(() =>
Promise.reject(new Error('Failed with my-secret-token')),
);
const results = await tools.executeActions(
[action({ action: 'tap', index: 0 })],
ctx,
);
expect(results[0].error).not.toContain('my-secret-token');
expect(results[0].error).toContain('');
});
});
describe('switch_tab action', () => {
test('switches to specified tab', async () => {
const ctx = makeContext();
const result = await tools.executeAction(
action({ action: 'focus_tab', tabIndex: 1 }),
ctx,
);
expect(result.success).toBe(true);
expect(ctx.browserSession.switchTab).toHaveBeenCalledWith(1);
});
});
describe('open_tab action', () => {
test('opens new tab with URL', async () => {
const ctx = makeContext();
const result = await tools.executeAction(
action({ action: 'new_tab', url: 'https://example.com' }),
ctx,
);
expect(result.success).toBe(true);
expect(ctx.browserSession.newTab).toHaveBeenCalledWith('https://example.com');
});
test('throws for blocked URL', async () => {
const restricted = new CommandExecutor({ blockedUrls: ['banned.com'] });
const ctx = makeContext();
await expect(
restricted.executeAction(
action({ action: 'new_tab', url: 'https://banned.com' }),
ctx,
),
).rejects.toThrow(CommandFailedError);
});
});
describe('close_tab action', () => {
test('closes specified tab', async () => {
const ctx = makeContext();
const result = await tools.executeAction(
action({ action: 'close_tab', tabIndex: 2 }),
ctx,
);
expect(result.success).toBe(true);
expect(ctx.browserSession.closeTab).toHaveBeenCalledWith(2);
});
});
describe('screenshot action', () => {
test('takes a screenshot', async () => {
const ctx = makeContext();
const result = await tools.executeAction(
action({ action: 'capture' }),
ctx,
);
expect(result.success).toBe(true);
expect(result.extractedContent).toContain('Screenshot taken');
expect(ctx.browserSession.screenshot).toHaveBeenCalled();
});
});
describe('setCoordinateClicking', () => {
test('enables coordinate-based clicking', () => {
tools.setCoordinateClicking(true);
// Verified through click behavior in click action tests above
expect(tools).toBeDefined();
});
test('disables coordinate-based clicking', () => {
tools.setCoordinateClicking(true);
tools.setCoordinateClicking(false);
expect(tools).toBeDefined();
});
});
});
================================================
FILE: packages/core/src/commands/executor.ts
================================================
import type { Page, CDPSession } from 'playwright';
import { z } from 'zod';
import { CommandCatalog } from './catalog/catalog.js';
import type {
Command,
CommandResult,
ExecutionContext,
InterpretedViewportError,
ViewportErrorCategory,
} from './types.js';
import {
TapCommandSchema,
TypeTextCommandSchema,
NavigateCommandSchema,
BackCommandSchema,
ScrollCommandSchema,
PressKeysCommandSchema,
ExtractCommandSchema,
FinishCommandSchema,
FocusTabCommandSchema,
NewTabCommandSchema,
CloseTabCommandSchema,
WebSearchCommandSchema,
UploadCommandSchema,
SelectCommandSchema,
CaptureCommandSchema,
ReadPageCommandSchema,
WaitCommandSchema,
ScrollToCommandSchema,
FindCommandSchema,
SearchCommandSchema,
ListOptionsCommandSchema,
PickOptionCommandSchema,
ExtractStructuredCommandSchema,
} from './types.js';
import type { Viewport } from '../viewport/viewport.js';
import type { PageAnalyzer } from '../page/page-analyzer.js';
import type { LanguageModel } from '../model/interface.js';
import { ContentExtractor } from './extraction/extractor.js';
import { scrollPage, scrollElement, buildGoogleSearchUrl } from './utils.js';
import { extractMarkdown } from '../page/content-extractor.js';
import { isUrlPermitted } from '../utils.js';
import {
UrlBlockedError,
NavigationFailedError,
ViewportCrashedError,
} from '../errors.js';
import { sleep } from '../utils.js';
export interface CommandExecutorOptions {
model?: LanguageModel;
allowedUrls?: string[];
blockedUrls?: string[];
commandsPerStep?: number;
}
export class CommandExecutor {
readonly registry: CommandCatalog
private extractionService?: ContentExtractor;
private allowedUrls?: string[];
private blockedUrls?: string[];
readonly commandsPerStep: number;
private coordinateClickingEnabled = false;
constructor(options?: CommandExecutorOptions) {
this.registry = new CommandCatalog();
this.allowedUrls = options?.allowedUrls;
this.blockedUrls = options?.blockedUrls;
this.commandsPerStep = options?.commandsPerStep ?? 10;
if (options?.model) {
this.extractionService = new ContentExtractor(options.model);
}
this.registerBuiltinActions();
}
/**
* Enable or disable coordinate-based clicking.
* When enabled, click actions with coordinateX/coordinateY will use
* page.mouse.click instead of element index lookup.
*/
setCoordinateClicking(enabled: boolean): void {
this.coordinateClickingEnabled = enabled;
}
private registerBuiltinActions(): void {
// Click
this.registry.register({
name: 'tap',
description: 'Click on an element by its index',
schema: TapCommandSchema.omit({ action: true }),
handler: async (params, ctx) => {
const { index, clickCount, coordinateX, coordinateY } = params as {
index: number;
clickCount?: number;
coordinateX?: number;
coordinateY?: number;
};
// Coordinate-based clicking
if (
this.coordinateClickingEnabled &&
coordinateX !== undefined &&
coordinateY !== undefined
) {
const clicks = clickCount ?? 1;
for (let i = 0; i < clicks; i++) {
await ctx.page.mouse.click(coordinateX, coordinateY);
}
return { success: true };
}
await ctx.domService.clickElementByIndex(ctx.page, ctx.cdpSession, index);
if (clickCount && clickCount > 1) {
for (let i = 1; i < clickCount; i++) {
await ctx.domService.clickElementByIndex(ctx.page, ctx.cdpSession, index);
}
}
return { success: true };
},
});
// Input text
this.registry.register({
name: 'type_text',
description: 'Type text into an input element',
schema: TypeTextCommandSchema.omit({ action: true }),
handler: async (params, ctx) => {
const { index, text, clearFirst } = params as {
index: number;
text: string;
clearFirst?: boolean;
};
await ctx.domService.inputTextByIndex(
ctx.page,
ctx.cdpSession,
index,
text,
clearFirst ?? true,
);
return { success: true };
},
});
// Navigate
this.registry.register({
name: 'navigate',
description: 'Navigate to a URL',
schema: NavigateCommandSchema.omit({ action: true }),
handler: async (params, ctx) => {
const { url } = params as { url: string };
if (!isUrlPermitted(url, this.allowedUrls, this.blockedUrls)) {
throw new UrlBlockedError(url);
}
await ctx.browserSession.navigate(url);
return { success: true };
},
});
// Go back
this.registry.register({
name: 'back',
description: 'Go back to previous page',
schema: BackCommandSchema.omit({ action: true }),
handler: async (_params, ctx) => {
await ctx.page.goBack({ timeout: 5000 }).catch(() => {});
await ctx.browserSession.waitForPageReady();
return { success: true };
},
});
// Scroll
this.registry.register({
name: 'scroll',
description: 'Scroll the page or an element',
schema: ScrollCommandSchema.omit({ action: true }),
handler: async (params, ctx) => {
const { direction, amount, index } = params as {
direction: 'up' | 'down';
amount?: number;
index?: number;
};
if (index !== undefined) {
const selector = await ctx.domService.getElementSelector(index);
if (selector) {
await scrollElement(ctx.page, selector, direction, amount);
}
} else {
await scrollPage(ctx.page, direction, amount);
}
return { success: true };
},
});
// Send keys
this.registry.register({
name: 'press_keys',
description: 'Send keyboard keys (e.g., Enter, Escape, Control+a)',
schema: PressKeysCommandSchema.omit({ action: true }),
handler: async (params, ctx) => {
const { keys } = params as { keys: string };
await ctx.page.keyboard.press(keys);
return { success: true };
},
});
// Extract content
this.registry.register({
name: 'extract',
description: 'Extract specific information from the current page',
schema: ExtractCommandSchema.omit({ action: true }),
handler: async (params, ctx) => {
const { goal, outputSchema } = params as {
goal: string;
outputSchema?: Record;
};
// Use the extraction LLM from context if available, otherwise fall back
const extractionModel = ctx.extractionLlm;
const service =
extractionModel
? new ContentExtractor(extractionModel)
: this.extractionService;
if (!service) {
// Fallback: just extract markdown
const markdown = await extractMarkdown(ctx.page);
return {
success: true,
extractedContent: markdown.slice(0, 5000),
includeInMemory: true,
};
}
// If an outputSchema is provided, use structured extraction from text
if (outputSchema) {
const markdown = await extractMarkdown(ctx.page);
const content = await service.extractFromText(
markdown.slice(0, 8000),
goal,
outputSchema,
);
return { success: true, extractedContent: content, includeInMemory: true };
}
const content = await service.extract(ctx.page, goal);
return { success: true, extractedContent: content, includeInMemory: true };
},
});
// Done
this.registry.register({
name: 'finish',
description: 'Mark the task as completed with a result',
schema: FinishCommandSchema.omit({ action: true }),
terminatesSequence: true,
handler: async (params) => {
const { text, success } = params as { text: string; success?: boolean };
return {
success: success ?? true,
isDone: true,
extractedContent: text,
includeInMemory: true,
};
},
});
// Switch tab
this.registry.register({
name: 'focus_tab',
description: 'Switch to a different browser tab',
schema: FocusTabCommandSchema.omit({ action: true }),
handler: async (params, ctx) => {
const { tabIndex } = params as { tabIndex: number };
await ctx.browserSession.switchTab(tabIndex);
return { success: true };
},
});
// Open tab
this.registry.register({
name: 'new_tab',
description: 'Open a new tab with a URL',
schema: NewTabCommandSchema.omit({ action: true }),
handler: async (params, ctx) => {
const { url } = params as { url: string };
if (!isUrlPermitted(url, this.allowedUrls, this.blockedUrls)) {
throw new UrlBlockedError(url);
}
await ctx.browserSession.newTab(url);
return { success: true };
},
});
// Close tab
this.registry.register({
name: 'close_tab',
description: 'Close a browser tab',
schema: CloseTabCommandSchema.omit({ action: true }),
handler: async (params, ctx) => {
const { tabIndex } = params as { tabIndex?: number };
await ctx.browserSession.closeTab(tabIndex);
return { success: true };
},
});
// Search Google
this.registry.register({
name: 'web_search',
description: 'Search Google for a query',
schema: WebSearchCommandSchema.omit({ action: true }),
handler: async (params, ctx) => {
const { query } = params as { query: string };
const url = buildGoogleSearchUrl(query);
await ctx.browserSession.navigate(url);
return { success: true };
},
});
// Upload file
this.registry.register({
name: 'upload',
description: 'Upload files to a file input',
schema: UploadCommandSchema.omit({ action: true }),
handler: async (params, ctx) => {
const { index, filePaths } = params as { index: number; filePaths: string[] };
// If a fileSystem is available in context, resolve relative paths
// against the sandbox directory
let resolvedPaths = filePaths;
if (ctx.fileSystem) {
const sandboxDir = ctx.fileSystem.getSandboxDir();
const { resolve: pathResolve } = await import('node:path');
resolvedPaths = filePaths.map((fp) =>
fp.startsWith('/') ? fp : pathResolve(sandboxDir, fp),
);
}
const selector = await ctx.domService.getElementSelector(index);
if (!selector) {
return { success: false, error: `Element ${index} not found` };
}
const fileInput = await ctx.page.$(selector);
if (!fileInput) {
return { success: false, error: `File input element not found` };
}
await fileInput.setInputFiles(resolvedPaths);
return { success: true };
},
});
// Select option
this.registry.register({
name: 'select',
description: 'Select an option in a dropdown',
schema: SelectCommandSchema.omit({ action: true }),
handler: async (params, ctx) => {
const { index, value } = params as { index: number; value: string };
const selector = await ctx.domService.getElementSelector(index);
if (!selector) {
return { success: false, error: `Element ${index} not found` };
}
await ctx.page.selectOption(selector, value);
return { success: true };
},
});
// Screenshot
this.registry.register({
name: 'capture',
description: 'Take a screenshot of the current page',
schema: CaptureCommandSchema.omit({ action: true }),
handler: async (params, ctx) => {
const { fullPage } = params as { fullPage?: boolean };
const result = await ctx.browserSession.screenshot(fullPage);
return {
success: true,
extractedContent: `Screenshot taken (${result.width}x${result.height})`,
};
},
});
// Read content
this.registry.register({
name: 'read_page',
description: 'Read the text content of the current page',
schema: ReadPageCommandSchema.omit({ action: true }),
handler: async (_params, ctx) => {
const markdown = await extractMarkdown(ctx.page);
return {
success: true,
extractedContent: markdown.slice(0, 10000),
includeInMemory: true,
};
},
});
// Wait
this.registry.register({
name: 'wait',
description: 'Wait for a specified number of seconds',
schema: WaitCommandSchema.omit({ action: true }),
handler: async (params) => {
const { seconds } = params as { seconds?: number };
await sleep((seconds ?? 3) * 1000);
return { success: true };
},
});
// ── New actions ──
// Scroll to text
this.registry.register({
name: 'scroll_to',
description: 'Scroll to a specific text on the page',
schema: ScrollToCommandSchema.omit({ action: true }),
handler: async (params, ctx) => {
const { text } = params as { text: string };
const found = await ctx.page.evaluate((searchText: string) => {
// Use TreeWalker to find text nodes containing the search text
const walker = document.createTreeWalker(
document.body,
NodeFilter.SHOW_TEXT,
{
acceptNode(node) {
if (
node.textContent &&
node.textContent.toLowerCase().includes(searchText.toLowerCase())
) {
return NodeFilter.FILTER_ACCEPT;
}
return NodeFilter.FILTER_REJECT;
},
},
);
const node = walker.nextNode();
if (!node?.parentElement) return false;
node.parentElement.scrollIntoView({
behavior: 'smooth',
block: 'center',
});
return true;
}, text);
if (!found) {
return {
success: false,
error: `Text "${text}" not found on the page`,
};
}
// Allow time for the smooth scroll to finish
await sleep(500);
return { success: true };
},
});
// Find elements
this.registry.register({
name: 'find',
description: 'Find elements on the page matching a description',
schema: FindCommandSchema.omit({ action: true }),
handler: async (params, ctx) => {
const { query } = params as { query: string };
const elements = await ctx.page.evaluate((searchQuery: string) => {
const results: Array<{
tag: string;
text: string;
attributes: Record;
}> = [];
const queryLower = searchQuery.toLowerCase();
// Search through interactive and content elements
const selectors = [
'a',
'button',
'input',
'select',
'textarea',
'[role="button"]',
'[role="link"]',
'[role="tab"]',
'[role="menuitem"]',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'label',
'[aria-label]',
];
for (const selector of selectors) {
for (const el of document.querySelectorAll(selector)) {
const htmlEl = el as HTMLElement;
const text = (htmlEl.innerText || htmlEl.textContent || '').trim();
const ariaLabel = el.getAttribute('aria-label') || '';
const placeholder = el.getAttribute('placeholder') || '';
const title = el.getAttribute('title') || '';
const searchableText =
`${text} ${ariaLabel} ${placeholder} ${title}`.toLowerCase();
if (searchableText.includes(queryLower)) {
const attrs: Record = {};
if (el.id) attrs.id = el.id;
if (el.className && typeof el.className === 'string') {
attrs.class = el.className;
}
if (ariaLabel) attrs['aria-label'] = ariaLabel;
if (placeholder) attrs.placeholder = placeholder;
results.push({
tag: el.tagName.toLowerCase(),
text: text.slice(0, 100),
attributes: attrs,
});
}
// Cap at 20 results
if (results.length >= 20) break;
}
if (results.length >= 20) break;
}
return results;
}, query);
if (elements.length === 0) {
return {
success: true,
extractedContent: `No elements found matching "${query}"`,
includeInMemory: true,
};
}
const descriptions = elements.map((el, i) => {
const attrStr = Object.entries(el.attributes)
.map(([k, v]) => `${k}="${v}"`)
.join(' ');
return `[${i}] <${el.tag}${attrStr ? ` ${attrStr}` : ''}> ${el.text}`;
});
return {
success: true,
extractedContent: `Found ${elements.length} element(s):\n${descriptions.join('\n')}`,
includeInMemory: true,
};
},
});
// Search page (multi-engine)
this.registry.register({
name: 'search',
description: 'Search the web using a specified search engine',
schema: SearchCommandSchema.omit({ action: true }),
handler: async (params, ctx) => {
const { query, engine } = params as {
query: string;
engine?: 'google' | 'duckduckgo' | 'bing';
};
const searchEngine = engine ?? 'google';
const url = buildSearchUrl(query, searchEngine);
if (!isUrlPermitted(url, this.allowedUrls, this.blockedUrls)) {
throw new UrlBlockedError(url);
}
await ctx.browserSession.navigate(url);
return { success: true };
},
});
// Get dropdown options
this.registry.register({
name: 'list_options',
description: 'Get all options from a select/dropdown element',
schema: ListOptionsCommandSchema.omit({ action: true }),
handler: async (params, ctx) => {
const { index } = params as { index: number };
const selector = await ctx.domService.getElementSelector(index);
if (!selector) {
return { success: false, error: `Element ${index} not found` };
}
const options = await ctx.page.evaluate((sel: string) => {
const selectEl = document.querySelector(sel) as HTMLSelectElement | null;
if (!selectEl || selectEl.tagName !== 'SELECT') {
return null;
}
return Array.from(selectEl.options).map((opt) => ({
value: opt.value,
text: opt.text.trim(),
selected: opt.selected,
}));
}, selector);
if (!options) {
return {
success: false,
error: `Element ${index} is not a select element`,
};
}
const formatted = options
.map(
(opt, i) =>
`[${i}] "${opt.text}" (value="${opt.value}")${opt.selected ? ' [selected]' : ''}`,
)
.join('\n');
return {
success: true,
extractedContent: `Dropdown options:\n${formatted}`,
includeInMemory: true,
};
},
});
// Select dropdown option (by text match)
this.registry.register({
name: 'pick_option',
description: 'Select a dropdown option by its visible text',
schema: PickOptionCommandSchema.omit({ action: true }),
handler: async (params, ctx) => {
const { index, optionText } = params as {
index: number;
optionText: string;
};
const selector = await ctx.domService.getElementSelector(index);
if (!selector) {
return { success: false, error: `Element ${index} not found` };
}
// Find the option value by matching text content
const matchedValue = await ctx.page.evaluate(
({ sel, text }: { sel: string; text: string }) => {
const selectEl = document.querySelector(sel) as HTMLSelectElement | null;
if (!selectEl || selectEl.tagName !== 'SELECT') return null;
const textLower = text.toLowerCase();
// Try exact match first
for (const opt of selectEl.options) {
if (opt.text.trim().toLowerCase() === textLower) {
return opt.value;
}
}
// Try partial / includes match
for (const opt of selectEl.options) {
if (opt.text.trim().toLowerCase().includes(textLower)) {
return opt.value;
}
}
return null;
},
{ sel: selector, text: optionText },
);
if (matchedValue === null) {
return {
success: false,
error: `No option matching "${optionText}" found in dropdown at element ${index}`,
};
}
await ctx.page.selectOption(selector, matchedValue);
return { success: true };
},
});
// Structured output
this.useStructuredOutputAction();
}
/**
* Register the structured_output action.
* Uses the extraction LLM to produce structured JSON output from
* the current page content according to a caller-provided JSON schema.
*/
private useStructuredOutputAction(): void {
this.registry.register({
name: 'extract_structured',
description:
'Extract structured data from the current page content. Returns JSON conforming to the provided schema.',
schema: ExtractStructuredCommandSchema.omit({ action: true }),
handler: async (params, ctx) => {
const { goal, outputSchema, maxContentLength } = params as {
goal: string;
outputSchema: Record;
maxContentLength?: number;
};
const contentLimit = maxContentLength ?? 8000;
// Resolve the extraction model: prefer context-provided, fall back to Tools-level
const extractionModel = ctx.extractionLlm;
const service = extractionModel
? new ContentExtractor(extractionModel)
: this.extractionService;
if (!service) {
return {
success: false,
error:
'No extraction LLM configured. Provide a model via CommandExecutorOptions or ExecutionContext.extractionLlm.',
};
}
// Extract page content as markdown
const markdown = await extractMarkdown(ctx.page);
if (!markdown.trim()) {
return {
success: false,
error: 'No content found on the page for structured extraction.',
};
}
const truncatedContent = markdown.slice(0, contentLimit);
try {
const result = await service.extractFromText(
truncatedContent,
goal,
outputSchema,
);
return {
success: true,
extractedContent: result,
includeInMemory: true,
};
} catch (error) {
const message =
error instanceof Error ? error.message : String(error);
return {
success: false,
error: `Structured extraction failed: ${message}`,
};
}
},
});
}
async executeAction(
action: Command,
context: ExecutionContext,
): Promise {
const { action: actionName, ...params } = action;
return this.registry.execute(actionName, params, context);
}
async executeActions(
actions: Command[],
context: ExecutionContext,
): Promise {
const results: CommandResult[] = [];
const limit = Math.min(actions.length, this.commandsPerStep);
for (let i = 0; i < limit; i++) {
try {
const result = await this.executeAction(actions[i], context);
// Mask sensitive data in extracted content
const maskedResult = this.maskSensitiveResult(result, context);
results.push(maskedResult);
// Stop if we hit a terminating action (done, or custom terminatesSequence)
if (maskedResult.isDone) break;
const actionName = actions[i].action;
if (this.registry.isTerminating(actionName)) break;
} catch (error) {
// Interpret the browser error for a more meaningful result
const interpreted = classifyViewportError(error);
const errorMessage = `${interpreted.message} | Suggestion: ${interpreted.suggestion}`;
// Mask sensitive data in error messages too
const maskedMessage = this.maskSensitiveText(errorMessage, context);
results.push({
success: false,
error: maskedMessage,
});
// If the error is not retryable (e.g., browser crash), stop the sequence
if (!interpreted.isRetryable) break;
}
}
return results;
}
// ── Sensitive data masking ──
/**
* Mask sensitive data values in an CommandResult's extractedContent and error fields.
*/
private maskSensitiveResult(
result: CommandResult,
context: ExecutionContext,
): CommandResult {
if (!context.maskedValues) return result;
const masked = { ...result };
if (masked.extractedContent) {
masked.extractedContent = this.registry.replaceSensitiveData(
masked.extractedContent,
context.maskedValues,
);
}
if (masked.error) {
masked.error = this.registry.replaceSensitiveData(
masked.error,
context.maskedValues,
);
}
return masked;
}
/**
* Mask sensitive data in a plain text string.
*/
private maskSensitiveText(
text: string,
context: ExecutionContext,
): string {
if (!context.maskedValues) return text;
return this.registry.replaceSensitiveData(text, context.maskedValues);
}
}
// ── Helpers ──
function buildSearchUrl(
query: string,
engine: 'google' | 'duckduckgo' | 'bing',
): string {
const encoded = encodeURIComponent(query);
switch (engine) {
case 'google':
return `https://www.google.com/search?q=${encoded}&udm=14`;
case 'duckduckgo':
return `https://duckduckgo.com/?q=${encoded}`;
case 'bing':
return `https://www.bing.com/search?q=${encoded}`;
}
}
// ── Browser error interpretation ──
/**
* Error pattern matcher: maps regex patterns against error messages to
* categories, human-readable messages, and actionable suggestions.
*/
const ERROR_PATTERNS: Array<{
pattern: RegExp;
category: ViewportErrorCategory;
message: (match: RegExpMatchArray) => string;
suggestion: string;
isRetryable: boolean;
}> = [
{
pattern: /net::ERR_NAME_NOT_RESOLVED/i,
category: 'network',
message: () => 'DNS resolution failed - the domain could not be found.',
suggestion: 'Check the URL for typos or try a different URL.',
isRetryable: false,
},
{
pattern: /net::ERR_CONNECTION_REFUSED/i,
category: 'network',
message: () => 'Connection refused by the server.',
suggestion: 'The server may be down. Try again later or use a different URL.',
isRetryable: true,
},
{
pattern: /net::ERR_CONNECTION_TIMED_OUT/i,
category: 'network',
message: () => 'Connection timed out.',
suggestion: 'The server is not responding. Try again or use a different URL.',
isRetryable: true,
},
{
pattern: /net::ERR_SSL/i,
category: 'network',
message: () => 'SSL/TLS connection error.',
suggestion: 'The site has an invalid certificate. Try an alternative URL.',
isRetryable: false,
},
{
pattern: /net::ERR_CERT/i,
category: 'network',
message: () => 'Certificate verification failed.',
suggestion: 'The site has a certificate issue. Try a different URL.',
isRetryable: false,
},
{
pattern: /net::ERR_ABORTED/i,
category: 'navigation',
message: () => 'Navigation was aborted.',
suggestion: 'The page load was interrupted. Try navigating again.',
isRetryable: true,
},
{
pattern: /net::ERR_/i,
category: 'network',
message: (m) => `Network error: ${m[0]}`,
suggestion: 'A network error occurred. Check the URL and try again.',
isRetryable: true,
},
{
pattern: /Navigation timeout of \d+ms exceeded/i,
category: 'timeout',
message: () => 'Page navigation timed out.',
suggestion: 'The page took too long to load. Try again or navigate to a simpler page.',
isRetryable: true,
},
{
pattern: /Timeout \d+ms exceeded/i,
category: 'timeout',
message: () => 'Operation timed out.',
suggestion: 'The operation took too long. Try a simpler action or wait and retry.',
isRetryable: true,
},
{
pattern: /waiting for selector/i,
category: 'timeout',
message: () => 'Timed out waiting for an element to appear.',
suggestion: 'The element may not exist on this page. Check the page content and try a different selector or index.',
isRetryable: true,
},
{
pattern: /Element is not visible/i,
category: 'element_not_interactable',
message: () => 'The element exists but is not visible.',
suggestion: 'Try scrolling to make the element visible, or use a different element.',
isRetryable: true,
},
{
pattern: /Element is not attached to the DOM/i,
category: 'element_stale',
message: () => 'The element reference is stale - the element was removed from the page.',
suggestion: 'The page content has changed. Re-read the page and use updated element indices.',
isRetryable: true,
},
{
pattern: /Element is outside of the viewport/i,
category: 'element_not_interactable',
message: () => 'The element is outside the visible viewport.',
suggestion: 'Scroll to bring the element into view before interacting with it.',
isRetryable: true,
},
{
pattern: /Element is not (?:enabled|editable)/i,
category: 'element_not_interactable',
message: () => 'The element is disabled or read-only.',
suggestion: 'The element cannot be interacted with in its current state. Look for an alternative element or action.',
isRetryable: false,
},
{
pattern: /intercepts pointer events/i,
category: 'element_not_interactable',
message: () => 'Another element is covering the target element.',
suggestion: 'An overlay or dialog may be blocking the click. Try closing it first, or use send_keys as an alternative.',
isRetryable: true,
},
{
pattern: /(?:Element|Node)\s+(?:\d+\s+)?not found/i,
category: 'element_not_found',
message: () => 'The specified element was not found on the page.',
suggestion: 'The element index may be invalid. Re-read the page content to get updated element indices.',
isRetryable: true,
},
{
pattern: /frame was detached/i,
category: 'element_stale',
message: () => 'The frame containing the element has been detached.',
suggestion: 'The page structure changed. Navigate to a stable page and retry.',
isRetryable: true,
},
{
pattern: /browser has been closed/i,
category: 'crash',
message: () => 'The browser has been closed unexpectedly.',
suggestion: 'The browser session is no longer available.',
isRetryable: false,
},
{
pattern: /Target (?:page|context|browser) (?:closed|crashed)/i,
category: 'crash',
message: () => 'The browser page or context has crashed.',
suggestion: 'The browser session is no longer available.',
isRetryable: false,
},
{
pattern: /Protocol error/i,
category: 'crash',
message: () => 'Browser protocol communication error.',
suggestion: 'The browser may have crashed or become unresponsive.',
isRetryable: false,
},
{
pattern: /Permission denied|not allowed/i,
category: 'permission',
message: () => 'Permission denied for this operation.',
suggestion: 'The action requires permissions that are not available. Try an alternative approach.',
isRetryable: false,
},
];
/**
* Analyze a browser or tool error and return a structured interpretation
* with a human-readable message, category, and actionable suggestion.
*/
export function classifyViewportError(error: unknown): InterpretedViewportError {
const rawMessage = error instanceof Error ? error.message : String(error);
// Check for known error types first
if (error instanceof NavigationFailedError) {
return {
category: 'navigation',
message: `Navigation failed for ${error.url}: ${rawMessage}`,
suggestion: 'Check the URL for correctness and try again.',
isRetryable: true,
};
}
if (error instanceof ViewportCrashedError) {
return {
category: 'crash',
message: rawMessage,
suggestion: 'The browser has crashed and the session must be restarted.',
isRetryable: false,
};
}
if (error instanceof UrlBlockedError) {
return {
category: 'permission',
message: rawMessage,
suggestion: 'This URL is blocked by the allowed/blocked URL configuration. Use a different URL.',
isRetryable: false,
};
}
// Match against known patterns
for (const entry of ERROR_PATTERNS) {
const match = rawMessage.match(entry.pattern);
if (match) {
return {
category: entry.category,
message: entry.message(match),
suggestion: entry.suggestion,
isRetryable: entry.isRetryable,
};
}
}
// Unknown error - default interpretation
return {
category: 'unknown',
message: rawMessage,
suggestion: 'An unexpected error occurred. Try a different action or approach.',
isRetryable: true,
};
}
================================================
FILE: packages/core/src/commands/extraction/extractor.ts
================================================
import type { Page } from 'playwright';
import type { LanguageModel } from '../../model/interface.js';
import { z } from 'zod';
import {
extractMarkdown,
chunkText,
extractLinks as extractPageLinks,
} from '../../page/content-extractor.js';
import { systemMessage, userMessage } from '../../model/messages.js';
const ExtractionResultSchema = z.object({
content: z.string().describe('The extracted information'),
confidence: z.number().min(0).max(1).describe('Confidence in the extraction (0-1)'),
});
type ExtractionResult = z.infer;
export class ContentExtractor {
private model: LanguageModel;
constructor(model: LanguageModel) {
this.model = model;
}
async extract(page: Page, goal: string, startFromChar?: number): Promise {
const markdown = await extractMarkdown(page, {
startFromChar: startFromChar && startFromChar > 0 ? startFromChar : undefined,
});
if (!markdown.trim()) {
return 'No content found on the page.';
}
// For short pages, extract directly
if (markdown.length <= 8000) {
return this.extractFromText(markdown, goal);
}
// For longer pages, chunk and extract from each chunk
const chunks = chunkText(markdown, 6000);
const results: string[] = [];
for (const chunk of chunks) {
const result = await this.extractFromText(chunk, goal);
if (result && result !== 'No relevant information found.') {
results.push(result);
}
}
if (results.length === 0) {
return 'No relevant information found on the page.';
}
if (results.length === 1) {
return results[0];
}
// Combine results
return this.combineExtractions(results, goal);
}
// ── Structured extraction ──
/**
* Extract information from a page and validate against a Zod schema.
* The LLM is prompted to return JSON conforming to the schema, then the
* output is parsed/validated with Zod.
*/
async extractStructured(
page: Page,
goal: string,
schema: z.ZodType,
): Promise {
const markdown = await extractMarkdown(page);
if (!markdown.trim()) {
throw new Error('No content found on the page for structured extraction.');
}
// Build a JSON schema description for the prompt
const schemaDescription =
schema instanceof z.ZodObject
? JSON.stringify(
(schema as z.ZodObject).shape,
(_key, value) => {
if (value?._def?.description) return `(${value._def.description})`;
if (value?._def?.typeName) return value._def.typeName;
return value;
},
2,
)
: 'See schema constraints';
const text = markdown.length > 8000 ? markdown.slice(0, 8000) : markdown;
const StructuredOutputSchema = z.object({
result: z.string().describe('JSON string conforming to the requested schema'),
});
const response = await this.model.invoke({
messages: [
systemMessage(
'You are a precise information extractor. Extract the requested information from the provided text and return it as a valid JSON string in the "result" field. The JSON must conform to the schema described below.',
),
userMessage(
`Goal: ${goal}\n\nExpected schema:\n${schemaDescription}\n\nText content:\n${text}\n\nReturn the extracted data as a JSON string in the "result" field.`,
),
],
responseSchema: StructuredOutputSchema,
schemaName: 'StructuredOutput',
temperature: 0,
});
const parsed = JSON.parse(response.parsed.result);
return schema.parse(parsed);
}
// ── Link extraction ──
/**
* Extract all links from a page, returning text, url, and whether external.
*/
async extractLinks(
page: Page,
): Promise> {
return extractPageLinks(page);
}
// ── Text extraction with optional JSON schema ──
async extractFromText(
text: string,
goal: string,
outputJsonSchema?: Record,
): Promise {
// If a JSON schema is provided, ask the LLM to produce structured output
if (outputJsonSchema) {
return this.extractFromTextWithJsonSchema(text, goal, outputJsonSchema);
}
const result = await this.model.invoke({
messages: [
systemMessage(
'You are a precise information extractor. Extract only the requested information from the provided text. Be concise and accurate.',
),
userMessage(
`Goal: ${goal}\n\nText content:\n${text}\n\nExtract the information specified in the goal. If the information is not found, say "No relevant information found."`,
),
],
responseSchema: ExtractionResultSchema,
schemaName: 'ExtractionResult',
temperature: 0,
});
return result.parsed.content;
}
// ── Private helpers ──
private async extractFromTextWithJsonSchema(
text: string,
goal: string,
jsonSchema: Record,
): Promise {
const schemaStr = JSON.stringify(jsonSchema, null, 2);
const JsonExtractionSchema = z.object({
json: z.string().describe('JSON conforming to the requested schema'),
});
const result = await this.model.invoke({
messages: [
systemMessage(
'You are a precise information extractor. Extract the requested information and return it as valid JSON conforming to the provided schema. Put the JSON string in the "json" field.',
),
userMessage(
`Goal: ${goal}\n\nRequired JSON schema:\n${schemaStr}\n\nText content:\n${text}\n\nExtract and return as JSON.`,
),
],
responseSchema: JsonExtractionSchema,
schemaName: 'JsonExtraction',
temperature: 0,
});
// Validate the JSON parses correctly
const parsed = JSON.parse(result.parsed.json);
return JSON.stringify(parsed);
}
private async combineExtractions(results: string[], goal: string): Promise {
const combined = results.map((r, i) => `Part ${i + 1}:\n${r}`).join('\n\n');
const result = await this.model.invoke({
messages: [
systemMessage(
'Combine the following extracted information into a single coherent response. Remove duplicates and organize logically.',
),
userMessage(`Goal: ${goal}\n\nExtracted parts:\n${combined}`),
],
responseSchema: ExtractionResultSchema,
schemaName: 'ExtractionResult',
temperature: 0,
});
return result.parsed.content;
}
}
================================================
FILE: packages/core/src/commands/index.ts
================================================
export { CommandExecutor, type CommandExecutorOptions, classifyViewportError } from './executor.js';
export { CommandCatalog } from './catalog/catalog.js';
export { ContentExtractor } from './extraction/extractor.js';
export { type CatalogEntry, type CatalogOptions } from './catalog/types.js';
export {
CommandSchema,
type Command,
type CommandName,
type CommandResult,
type ExecutionContext,
type CustomCommandSpec,
type ViewportErrorCategory,
type InterpretedViewportError,
TapCommandSchema,
TypeTextCommandSchema,
NavigateCommandSchema,
BackCommandSchema,
ScrollCommandSchema,
PressKeysCommandSchema,
ExtractCommandSchema,
FinishCommandSchema,
FocusTabCommandSchema,
NewTabCommandSchema,
CloseTabCommandSchema,
WebSearchCommandSchema,
UploadCommandSchema,
SelectCommandSchema,
CaptureCommandSchema,
ReadPageCommandSchema,
WaitCommandSchema,
ScrollToCommandSchema,
FindCommandSchema,
SearchCommandSchema,
ListOptionsCommandSchema,
PickOptionCommandSchema,
ExtractStructuredCommandSchema,
} from './types.js';
================================================
FILE: packages/core/src/commands/types.ts
================================================
import { z } from 'zod';
// ── Individual action schemas ──
export const TapCommandSchema = z.object({
action: z.literal('tap'),
index: z.number().describe('Element index to click'),
clickCount: z.number().optional().default(1).describe('Number of clicks'),
coordinateX: z.number().optional().describe('X coordinate for coordinate-based clicking'),
coordinateY: z.number().optional().describe('Y coordinate for coordinate-based clicking'),
});
export const TypeTextCommandSchema = z.object({
action: z.literal('type_text'),
index: z.number().describe('Element index to type into'),
text: z.string().describe('Text to input'),
clearFirst: z.boolean().optional().default(true).describe('Clear existing text first'),
});
export const NavigateCommandSchema = z.object({
action: z.literal('navigate'),
url: z.string().describe('URL to navigate to'),
});
export const BackCommandSchema = z.object({
action: z.literal('back'),
});
export const ScrollCommandSchema = z.object({
action: z.literal('scroll'),
direction: z.enum(['up', 'down']).describe('Scroll direction'),
amount: z.number().optional().describe('Scroll amount in pixels or pages'),
index: z.number().optional().describe('Element index to scroll within'),
pages: z.number().optional().describe('Number of pages to scroll (fractional allowed)'),
});
export const PressKeysCommandSchema = z.object({
action: z.literal('press_keys'),
keys: z.string().describe('Keys to send (e.g., "Enter", "Escape", "Control+a")'),
});
export const ExtractCommandSchema = z.object({
action: z.literal('extract'),
goal: z.string().describe('What information to extract from the page'),
outputSchema: z.record(z.unknown()).optional().describe('Optional JSON schema for structured output'),
});
export const FinishCommandSchema = z.object({
action: z.literal('finish'),
text: z.string().describe('Final result text'),
success: z.boolean().optional().default(true),
});
export const FocusTabCommandSchema = z.object({
action: z.literal('focus_tab'),
tabIndex: z.number().describe('Tab index to switch to'),
});
export const NewTabCommandSchema = z.object({
action: z.literal('new_tab'),
url: z.string().describe('URL to open in new tab'),
});
export const CloseTabCommandSchema = z.object({
action: z.literal('close_tab'),
tabIndex: z.number().optional().describe('Tab index to close (current if omitted)'),
});
export const WebSearchCommandSchema = z.object({
action: z.literal('web_search'),
query: z.string().describe('Search query'),
});
export const UploadCommandSchema = z.object({
action: z.literal('upload'),
index: z.number().describe('File input element index'),
filePaths: z.array(z.string()).describe('File paths to upload'),
});
export const SelectCommandSchema = z.object({
action: z.literal('select'),
index: z.number().describe('Select element index'),
value: z.string().describe('Option value to select'),
});
export const CaptureCommandSchema = z.object({
action: z.literal('capture'),
fullPage: z.boolean().optional().default(false),
});
export const ReadPageCommandSchema = z.object({
action: z.literal('read_page'),
});
export const WaitCommandSchema = z.object({
action: z.literal('wait'),
seconds: z.number().optional().default(3).describe('Seconds to wait'),
});
// ── New action schemas ──
export const ScrollToCommandSchema = z.object({
action: z.literal('scroll_to'),
text: z.string().describe('Text to scroll to on the page'),
});
export const FindCommandSchema = z.object({
action: z.literal('find'),
query: z.string().describe('Description of elements to find (e.g., "all submit buttons")'),
});
export const SearchCommandSchema = z.object({
action: z.literal('search'),
query: z.string().describe('Search query'),
engine: z.enum(['google', 'duckduckgo', 'bing']).optional().default('google'),
});
export const ListOptionsCommandSchema = z.object({
action: z.literal('list_options'),
index: z.number().describe('Select element index'),
});
export const PickOptionCommandSchema = z.object({
action: z.literal('pick_option'),
index: z.number().describe('Select element index'),
optionText: z.string().describe('Text of the option to select'),
});
export const ExtractStructuredCommandSchema = z.object({
action: z.literal('extract_structured'),
goal: z.string().describe('Description of what data to extract from the page'),
outputSchema: z
.record(z.unknown())
.describe(
'JSON Schema describing the structure of the expected output. The LLM will return data conforming to this schema.',
),
maxContentLength: z
.number()
.optional()
.default(8000)
.describe('Maximum number of characters of page content to send to the LLM'),
});
// ── Discriminated union of all actions ──
export const CommandSchema = z.discriminatedUnion('action', [
TapCommandSchema,
TypeTextCommandSchema,
NavigateCommandSchema,
BackCommandSchema,
ScrollCommandSchema,
PressKeysCommandSchema,
ExtractCommandSchema,
FinishCommandSchema,
FocusTabCommandSchema,
NewTabCommandSchema,
CloseTabCommandSchema,
WebSearchCommandSchema,
UploadCommandSchema,
SelectCommandSchema,
CaptureCommandSchema,
ReadPageCommandSchema,
WaitCommandSchema,
ScrollToCommandSchema,
FindCommandSchema,
SearchCommandSchema,
ListOptionsCommandSchema,
PickOptionCommandSchema,
ExtractStructuredCommandSchema,
]);
export type Command = z.infer;
export type CommandName = Command['action'];
// ── Action result ──
export interface CommandResult {
success: boolean;
extractedContent?: string;
error?: string;
isDone?: boolean;
includeInMemory?: boolean;
}
// ── Browser error categories ──
export type ViewportErrorCategory =
| 'navigation'
| 'element_not_found'
| 'element_stale'
| 'element_not_interactable'
| 'timeout'
| 'permission'
| 'network'
| 'crash'
| 'unknown';
export interface InterpretedViewportError {
category: ViewportErrorCategory;
message: string;
suggestion: string;
isRetryable: boolean;
}
// ── Custom action definition ──
export interface CustomCommandSpec {
name: string;
description: string;
schema: z.ZodObject;
handler: (params: Record, context: ExecutionContext) => Promise;
terminatesSequence?: boolean;
}
export interface ExecutionContext {
page: import('playwright').Page;
cdpSession: import('playwright').CDPSession;
domService: import('../page/page-analyzer.js').PageAnalyzer;
browserSession: import('../viewport/viewport.js').Viewport;
extractionLlm?: import('../model/interface.js').LanguageModel;
fileSystem?: import('../sandbox/file-access.js').FileAccess;
maskedValues?: Record;
}
================================================
FILE: packages/core/src/commands/utils.ts
================================================
import type { Page } from 'playwright';
export async function scrollPage(
page: Page,
direction: 'up' | 'down',
amount?: number,
): Promise {
const scrollAmount = amount ?? 500;
const delta = direction === 'down' ? scrollAmount : -scrollAmount;
await page.evaluate((d) => {
window.scrollBy(0, d);
}, delta);
// Wait for scroll to complete
await new Promise((resolve) => setTimeout(resolve, 200));
}
export async function scrollElement(
page: Page,
selector: string,
direction: 'up' | 'down',
amount?: number,
): Promise {
const scrollAmount = amount ?? 300;
const delta = direction === 'down' ? scrollAmount : -scrollAmount;
await page.evaluate(
({ sel, d }) => {
const el = document.querySelector(sel);
if (el) el.scrollBy(0, d);
},
{ sel: selector, d: delta },
);
await new Promise((resolve) => setTimeout(resolve, 200));
}
export function buildGoogleSearchUrl(query: string): string {
return `https://www.google.com/search?q=${encodeURIComponent(query)}&udm=14`;
}
================================================
FILE: packages/core/src/config/config.ts
================================================
import { config as loadDotenv } from 'dotenv';
import * as path from 'node:path';
import * as os from 'node:os';
import * as fs from 'node:fs';
import { type GlobalConfig, GlobalConfigSchema, type ConfigFileContents } from './types.js';
import type { DeepPartial } from '../types.js';
import { createLogger } from '../logging.js';
const logger = createLogger('config');
let _instance: Config | undefined;
export class Config {
readonly config: GlobalConfig;
private constructor(overrides: DeepPartial = {}) {
loadDotenv();
// Load from config file first, then merge env and overrides
const fileConfig = Config.loadConfigFile();
const merged = this.deepMerge(
this.mergeEnvDefaults({}),
fileConfig,
overrides,
);
this.config = GlobalConfigSchema.parse(merged);
}
static instance(overrides?: DeepPartial): Config {
if (!_instance) {
_instance = new Config(overrides);
}
return _instance;
}
static reset(): void {
_instance = undefined;
}
private mergeEnvDefaults(overrides: DeepPartial): DeepPartial {
const env = process.env;
const proxy = env.OPEN_BROWSER_PROXY_SERVER
? {
server: env.OPEN_BROWSER_PROXY_SERVER,
username: env.OPEN_BROWSER_PROXY_USERNAME,
password: env.OPEN_BROWSER_PROXY_PASSWORD,
}
: (env.HTTP_PROXY || env.HTTPS_PROXY)
? { server: (env.HTTPS_PROXY || env.HTTP_PROXY)! }
: undefined;
return {
browser: {
headless: env.BROWSER_HEADLESS !== 'false',
relaxedSecurity: env.BROWSER_DISABLE_SECURITY === 'true',
browserBinaryPath: env.BROWSER_BINARY_PATH ?? undefined,
userDataDir: env.BROWSER_USER_DATA_DIR ?? undefined,
...(proxy ? { proxy } : {}),
...overrides.browser,
},
tracePath: env.OPEN_BROWSER_TRACE_PATH ?? overrides.tracePath,
recordingPath: env.OPEN_BROWSER_SAVE_RECORDING_PATH ?? overrides.recordingPath,
...overrides,
};
}
private deepMerge(...objects: DeepPartial[]): DeepPartial {
const result: Record = {};
for (const obj of objects) {
if (!obj) continue;
for (const [key, value] of Object.entries(obj)) {
if (
value !== null &&
value !== undefined &&
typeof value === 'object' &&
!Array.isArray(value) &&
typeof result[key] === 'object' &&
result[key] !== null &&
!Array.isArray(result[key])
) {
result[key] = this.deepMerge(
result[key] as DeepPartial,
value as DeepPartial,
);
} else if (value !== undefined) {
result[key] = value;
}
}
}
return result as DeepPartial;
}
get browser() {
return this.config.browser;
}
get agent() {
return this.config.agent;
}
static get configDir(): string {
const dir = path.join(os.homedir(), '.open-browser');
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true });
}
return dir;
}
static get tmpDir(): string {
const dir = path.join(Config.configDir, 'tmp');
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true });
}
return dir;
}
static get configFilePath(): string {
return path.join(Config.configDir, 'config.json');
}
static loadConfigFile(): DeepPartial {
try {
const filePath = Config.configFilePath;
if (fs.existsSync(filePath)) {
const raw = fs.readFileSync(filePath, 'utf-8');
const parsed = JSON.parse(raw) as ConfigFileContents;
logger.debug(`Loaded config from ${filePath}`);
return parsed;
}
} catch (error) {
logger.warn(`Failed to load config file: ${error}`);
}
return {};
}
static saveConfigFile(config: ConfigFileContents): void {
const filePath = Config.configFilePath;
const dir = path.dirname(filePath);
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true });
}
fs.writeFileSync(filePath, JSON.stringify(config, null, 2), 'utf-8');
logger.info(`Config saved to ${filePath}`);
}
static isDocker(): boolean {
try {
if (fs.existsSync('/.dockerenv')) return true;
if (fs.existsSync('/proc/1/cgroup')) {
const cgroup = fs.readFileSync('/proc/1/cgroup', 'utf-8');
return cgroup.includes('docker') || cgroup.includes('kubepods');
}
} catch {
// Not on Linux, definitely not Docker
}
return false;
}
static hasDisplay(): boolean {
if (process.platform === 'win32') return true;
if (process.platform === 'darwin') return true;
return !!process.env.DISPLAY || !!process.env.WAYLAND_DISPLAY;
}
}
================================================
FILE: packages/core/src/config/index.ts
================================================
export { Config } from './config.js';
export {
type ViewportConfig,
ViewportConfigSchema,
type AgentConfig,
AgentConfigSchema,
type GlobalConfig,
GlobalConfigSchema,
} from './types.js';
================================================
FILE: packages/core/src/config/types.ts
================================================
import { z } from 'zod';
export const ProxyConfigSchema = z.object({
server: z.string(),
username: z.string().optional(),
password: z.string().optional(),
bypass: z.array(z.string()).optional(),
});
export type ProxyConfig = z.infer;
export const ViewportConfigSchema = z.object({
headless: z.boolean().default(true),
relaxedSecurity: z.boolean().default(false),
extraChromiumArgs: z.array(z.string()).default([]),
windowWidth: z.number().default(1280),
windowHeight: z.number().default(1100),
proxy: ProxyConfigSchema.optional(),
minWaitPageLoadMs: z.number().default(500),
waitForNetworkIdleMs: z.number().default(1000),
maxWaitPageLoadMs: z.number().default(5000),
cookieFile: z.string().optional(),
minimumWaitBetweenActions: z.number().default(1000),
maxErrorLength: z.number().default(400),
commandsPerStep: z.number().default(10),
browserBinaryPath: z.string().optional(),
userDataDir: z.string().optional(),
persistAfterClose: z.boolean().default(false),
channelName: z.string().optional(),
deterministicRendering: z.boolean().default(false),
maxIframes: z.number().default(3),
downloadsPath: z.string().optional(),
});
export type ViewportConfig = z.infer;
export const AgentConfigSchema = z.object({
stepLimit: z.number().default(100),
commandsPerStep: z.number().default(10),
failureThreshold: z.number().default(5),
retryDelay: z.number().default(10),
enableScreenshots: z.boolean().default(true),
enableScreenshotsForTextExtraction: z.boolean().default(false),
contextWindowSize: z.number().default(128000),
inlineCommands: z.boolean().default(true),
capturedAttributes: z.array(z.string()).default([
'title',
'type',
'name',
'role',
'tabindex',
'aria-label',
'placeholder',
'value',
'alt',
'aria-expanded',
]),
commandDelayMs: z.number().default(1),
allowedUrls: z.array(z.string()).optional(),
blockedUrls: z.array(z.string()).optional(),
traceOutputPath: z.string().optional(),
replayOutputPath: z.string().optional(),
strategyInterval: z.number().default(0),
plannerModel: z.any().optional(),
enableStrategy: z.boolean().default(false),
enableEvaluation: z.boolean().default(false),
stepTimeout: z.number().default(60000),
llmTimeout: z.number().default(30000),
maxElementsInDom: z.number().default(2000),
coordinateClicking: z.boolean().default(false),
compactMode: z.boolean().default(false),
});
export type AgentConfig = z.infer;
export const GlobalConfigSchema = z.object({
browser: ViewportConfigSchema.default({}),
agent: AgentConfigSchema.default({}),
tracePath: z.string().default('./traces'),
recordingPath: z.string().default('./recordings'),
});
export type GlobalConfig = z.infer;
export interface ConfigFileContents {
browser?: Partial;
agent?: Partial;
tracePath?: string;
recordingPath?: string;
}
================================================
FILE: packages/core/src/errors.ts
================================================
export class OpenBrowserError extends Error {
constructor(message: string, options?: ErrorOptions) {
super(message, options);
this.name = 'OpenBrowserError';
}
}
export class ViewportError extends OpenBrowserError {
constructor(message: string, options?: ErrorOptions) {
super(message, options);
this.name = 'ViewportError';
}
}
export class LaunchFailedError extends ViewportError {
constructor(message: string, options?: ErrorOptions) {
super(message, options);
this.name = 'LaunchFailedError';
}
}
export class NavigationFailedError extends ViewportError {
constructor(
message: string,
public readonly url: string,
options?: ErrorOptions,
) {
super(message, options);
this.name = 'NavigationFailedError';
}
}
export class ViewportCrashedError extends ViewportError {
constructor(message = 'Browser has crashed', options?: ErrorOptions) {
super(message, options);
this.name = 'ViewportCrashedError';
}
}
export class AgentError extends OpenBrowserError {
constructor(message: string, options?: ErrorOptions) {
super(message, options);
this.name = 'AgentError';
}
}
export class AgentStalledError extends AgentError {
constructor(message = 'Agent is stuck in a loop', options?: ErrorOptions) {
super(message, options);
this.name = 'AgentStalledError';
}
}
export class StepLimitExceededError extends AgentError {
public readonly stepsTaken: number;
public readonly stepLimit: number;
constructor(stepsTaken: number, stepLimit: number, options?: ErrorOptions) {
super(`Agent reached maximum steps (${stepsTaken}/${stepLimit})`, options);
this.name = 'StepLimitExceededError';
this.stepsTaken = stepsTaken;
this.stepLimit = stepLimit;
}
}
export class UrlBlockedError extends OpenBrowserError {
public readonly url: string;
constructor(url: string, options?: ErrorOptions) {
super(`URL not allowed: ${url}`, options);
this.name = 'UrlBlockedError';
this.url = url;
}
}
export class PageExtractionError extends OpenBrowserError {
constructor(message: string, options?: ErrorOptions) {
super(message, options);
this.name = 'PageExtractionError';
}
}
export class ModelError extends OpenBrowserError {
constructor(message: string, options?: ErrorOptions) {
super(message, options);
this.name = 'ModelError';
}
}
export class ModelThrottledError extends ModelError {
public readonly retryAfterMs?: number;
constructor(message: string, retryAfterMs?: number, options?: ErrorOptions) {
super(message, options);
this.name = 'ModelThrottledError';
this.retryAfterMs = retryAfterMs;
}
}
export class CommandFailedError extends OpenBrowserError {
public readonly toolName: string;
constructor(toolName: string, message: string, options?: ErrorOptions) {
super(`Tool "${toolName}" failed: ${message}`, options);
this.name = 'CommandFailedError';
this.toolName = toolName;
}
}
export class ContextualViewportError extends ViewportError {
public readonly pageUrl: string;
public readonly pageTitle: string;
public readonly stepNumber: number;
constructor(
message: string,
context: { pageUrl: string; pageTitle: string; stepNumber: number },
options?: ErrorOptions,
) {
super(
`[Step ${context.stepNumber}] ${message} (url: ${context.pageUrl})`,
options,
);
this.name = 'ContextualViewportError';
this.pageUrl = context.pageUrl;
this.pageTitle = context.pageTitle;
this.stepNumber = context.stepNumber;
}
}
export class ProviderError extends ModelError {
public readonly provider: string;
public readonly statusCode?: number;
constructor(
provider: string,
message: string,
statusCode?: number,
options?: ErrorOptions,
) {
super(`[${provider}] ${message}`, options);
this.name = 'ProviderError';
this.provider = provider;
this.statusCode = statusCode;
}
get isRetryable(): boolean {
if (this.statusCode === undefined) return false;
return this.statusCode === 429 || this.statusCode >= 500;
}
}
export class SchemaViolationError extends OpenBrowserError {
public readonly field: string;
public readonly issues: string[];
constructor(field: string, issues: string[], options?: ErrorOptions) {
super(`Validation failed for "${field}": ${issues.join('; ')}`, options);
this.name = 'SchemaViolationError';
this.field = field;
this.issues = issues;
}
}
================================================
FILE: packages/core/src/index.ts
================================================
// ── Core types ──
export {
type TargetId,
type SessionId,
type ElementRef,
type TabId,
targetId,
sessionId,
elementIndex,
tabId,
type Result,
ok,
err,
type Position,
type Rect,
LogLevel,
type DeepPartial,
type Awaitable,
} from './types.js';
// ── Errors ──
export {
OpenBrowserError,
ViewportError,
LaunchFailedError,
NavigationFailedError,
ViewportCrashedError,
ContextualViewportError,
AgentError,
AgentStalledError,
StepLimitExceededError,
UrlBlockedError,
PageExtractionError,
ModelError,
ModelThrottledError,
CommandFailedError,
ProviderError,
SchemaViolationError,
} from './errors.js';
// ── Logging ──
export {
Logger,
createLogger,
setGlobalLogLevel,
getGlobalLogLevel,
setLogColors,
setLogTimestamps,
} from './logging.js';
// ── Observability ──
export {
timed,
withTiming,
Stopwatch,
type TimingResult,
} from './telemetry.js';
// ── Utils ──
export { generateId, matchesUrlPattern, isUrlPermitted, sleep, withDeadline, Timer } from './utils.js';
// ── Config ──
export { Config } from './config/index.js';
export type { ViewportConfig, AgentConfig as AgentConfigSchema, GlobalConfig } from './config/index.js';
// ── LLM ──
export {
type LanguageModel,
type InferenceOptions,
type ModelProvider,
type InferenceResult,
type InferenceUsage,
type Message,
type SystemMessage,
type UserMessage,
type AssistantMessage,
type ToolResultMessage,
type ToolCall,
type ContentPart,
type TextContent,
type ImageContent,
systemMessage,
userMessage,
assistantMessage,
toolResultMessage,
textContent,
imageContent,
VercelModelAdapter,
type VercelModelAdapterOptions,
zodToJsonSchema,
optimizeSchemaForModel,
optimizeJsonSchemaForModel,
type SchemaOptimizationOptions,
} from './model/index.js';
// ── Browser ──
export {
Viewport,
type ViewportOptions,
LaunchProfile,
EventHub,
BaseGuard,
type GuardContext,
VisualTracer,
type VisualTracerOptions,
type TabDescriptor,
type ViewportSnapshot,
type ViewportHistory,
type LaunchOptions,
type PageState,
type ViewportEventMap,
type ViewportRequestMap,
type NavigateEvent,
type ClickEvent,
type InputEvent,
type ScrollEvent,
type ScreenshotEvent,
type ScreenshotResult,
type DownloadEvent,
type PopupEvent,
type SecurityEvent,
type CrashEvent,
} from './viewport/index.js';
// ── DOM ──
export {
PageAnalyzer,
type PageAnalyzerOptions,
SnapshotBuilder,
TreeRenderer,
type RendererOptions,
extractMarkdown,
htmlToMarkdown,
extractTextContent,
extractLinks,
chunkText,
type MarkdownExtractionOptions,
type PageTreeNode,
type SelectorIndex,
type RenderedPageState,
type DOMRect,
type CDPSnapshotResult,
type AXNode,
type TargetInfo,
type TargetAllTrees,
type InteractedElement,
type MatchLevel,
type SimplifiedNode,
} from './page/index.js';
// ── FileAccess ──
export {
FileAccess,
type FileAccessOptions,
type FileInfo,
type FileAccessState,
} from './sandbox/index.js';
// ── Commands ──
export {
CommandExecutor,
type CommandExecutorOptions,
classifyViewportError,
CommandCatalog,
ContentExtractor,
type CatalogEntry,
type CatalogOptions,
CommandSchema,
type Command,
type CommandName,
type CommandResult,
type ExecutionContext,
type CustomCommandSpec,
type ViewportErrorCategory,
type InterpretedViewportError,
TapCommandSchema,
TypeTextCommandSchema,
NavigateCommandSchema,
BackCommandSchema,
ScrollCommandSchema,
PressKeysCommandSchema,
ExtractCommandSchema,
FinishCommandSchema,
FocusTabCommandSchema,
NewTabCommandSchema,
CloseTabCommandSchema,
WebSearchCommandSchema,
UploadCommandSchema,
SelectCommandSchema,
CaptureCommandSchema,
ReadPageCommandSchema,
WaitCommandSchema,
ScrollToCommandSchema,
FindCommandSchema,
SearchCommandSchema,
ListOptionsCommandSchema,
PickOptionCommandSchema,
ExtractStructuredCommandSchema,
} from './commands/index.js';
// ── Agent ──
export {
Agent,
type AgentOptions,
InstructionBuilder,
StepPromptBuilder,
buildCommandDescriptions,
buildContextualCommands,
buildExtractionInstructionBuilder,
buildExtractionUserPrompt,
clearTemplateCache,
type PromptTemplate,
type InstructionBuilderOptions,
type StepInfo,
type StepPromptBuilderOptions,
ConversationManager,
StallDetector,
hashPageTree,
hashTextContent,
type PageSignature,
type StallDetectorConfig,
type StallCheckResult,
ResultEvaluator,
constructEvaluatorMessages,
constructQuickCheckMessages,
ReplayRecorder,
type ReplayRecorderOptions,
type AgentConfig,
type AgentState,
type AgentDecision,
type AgentDecisionCompact,
type AgentDecisionDirect,
type StepRecord,
ExecutionLog,
type RunOutcome,
type Reasoning,
type PlanStep,
type EvaluationResult,
type QuickCheckResult,
type CompactionPolicy,
type StepTelemetry,
type ExtractedVariable,
type AccumulatedCost,
type StepCostBreakdown,
type PricingTable as AgentPricingTable,
type PlanRevision,
AgentDecisionSchema,
AgentDecisionCompactSchema,
AgentDecisionDirectSchema,
ReasoningSchema,
EvaluationResultSchema,
QuickCheckResultSchema,
PlanStepSchema,
StrategyPlanSchema,
PlanRevisionSchema,
PRICING_TABLE,
calculateStepCost,
supportsDeepReasoning,
supportsCoordinateMode,
isCompactModel,
DEFAULT_AGENT_CONFIG,
type ConversationManagerOptions,
type TrackedMessage,
type ConversationManagerState,
type ConversationEntry,
type SerializedTrackedMessage,
type MessageCategory,
estimateTokens,
estimateMessageTokens,
redactSensitiveValues,
redactMessage,
redactMessages,
extractTextContent as extractMessageTextContent,
truncate,
} from './agent/index.js';
// ── Bridge ──
export { BridgeServer, type BridgeServerOptions, BridgeClient, type BridgeClientOptions, BridgeAdapter } from './bridge/index.js';
// ── Metering ──
export {
UsageMeter,
CompositeUsageMeter,
BudgetDepletedError,
estimateTokenCount,
DEFAULT_COST_RATES,
type UsageRecord,
type CostRates,
type PricingTable,
type ModelRole,
type ActionUsageRecord,
type MeteringSummary,
type ModelUsageBreakdown,
type RoleUsageBreakdown,
type BudgetPolicy,
type BudgetState,
} from './metering/index.js';
================================================
FILE: packages/core/src/logging.ts
================================================
import { LogLevel } from './types.js';
const LEVEL_NAMES: Record = {
[LogLevel.DEBUG]: 'DEBUG',
[LogLevel.INFO]: 'INFO',
[LogLevel.WARN]: 'WARN',
[LogLevel.ERROR]: 'ERROR',
};
const LEVEL_COLORS: Record = {
[LogLevel.DEBUG]: '\x1b[36m', // cyan
[LogLevel.INFO]: '\x1b[32m', // green
[LogLevel.WARN]: '\x1b[33m', // yellow
[LogLevel.ERROR]: '\x1b[31m', // red
};
const RESET = '\x1b[0m';
const DIM = '\x1b[2m';
const BOLD = '\x1b[1m';
let globalLevel: LogLevel = LogLevel.INFO;
let useColors = true;
let logTimestamps = true;
export function setGlobalLogLevel(level: LogLevel): void {
globalLevel = level;
}
export function getGlobalLogLevel(): LogLevel {
return globalLevel;
}
export function setLogColors(enabled: boolean): void {
useColors = enabled;
}
export function setLogTimestamps(enabled: boolean): void {
logTimestamps = enabled;
}
function formatTimestamp(): string {
const now = new Date();
const h = now.getHours().toString().padStart(2, '0');
const m = now.getMinutes().toString().padStart(2, '0');
const s = now.getSeconds().toString().padStart(2, '0');
const ms = now.getMilliseconds().toString().padStart(3, '0');
return `${h}:${m}:${s}.${ms}`;
}
function formatMessage(
level: LogLevel,
name: string,
message: string,
): string {
const parts: string[] = [];
if (logTimestamps) {
const ts = formatTimestamp();
parts.push(useColors ? `${DIM}${ts}${RESET}` : ts);
}
const levelName = LEVEL_NAMES[level] ?? 'UNKNOWN';
const color = LEVEL_COLORS[level] ?? '';
if (useColors) {
parts.push(`${color}${levelName.padEnd(5)}${RESET}`);
parts.push(`${BOLD}[${name}]${RESET}`);
} else {
parts.push(levelName.padEnd(5));
parts.push(`[${name}]`);
}
parts.push(message);
return parts.join(' ');
}
export class Logger {
readonly name: string;
private level: LogLevel | null = null;
constructor(name: string) {
this.name = name;
}
setLevel(level: LogLevel): void {
this.level = level;
}
getEffectiveLevel(): LogLevel {
return this.level ?? globalLevel;
}
isEnabled(level: LogLevel): boolean {
return level >= this.getEffectiveLevel();
}
debug(message: string, ...args: unknown[]): void {
this.log(LogLevel.DEBUG, message, ...args);
}
info(message: string, ...args: unknown[]): void {
this.log(LogLevel.INFO, message, ...args);
}
warn(message: string, ...args: unknown[]): void {
this.log(LogLevel.WARN, message, ...args);
}
error(message: string, ...args: unknown[]): void {
this.log(LogLevel.ERROR, message, ...args);
}
private log(level: LogLevel, message: string, ...args: unknown[]): void {
if (!this.isEnabled(level)) return;
const formatted = formatMessage(level, this.name, message);
switch (level) {
case LogLevel.ERROR:
console.error(formatted, ...args);
break;
case LogLevel.WARN:
console.warn(formatted, ...args);
break;
default:
console.log(formatted, ...args);
}
}
}
const loggerCache = new Map();
export function createLogger(name: string): Logger {
let logger = loggerCache.get(name);
if (!logger) {
logger = new Logger(name);
loggerCache.set(name, logger);
}
return logger;
}
================================================
FILE: packages/core/src/metering/index.ts
================================================
export { UsageMeter, CompositeUsageMeter, BudgetDepletedError, estimateTokenCount } from './tracker.js';
export {
DEFAULT_COST_RATES,
type UsageRecord,
type CostRates,
type PricingTable,
type ModelRole,
type ActionUsageRecord,
type MeteringSummary,
type ModelUsageBreakdown,
type RoleUsageBreakdown,
type BudgetPolicy,
type BudgetState,
} from './types.js';
================================================
FILE: packages/core/src/metering/tracker.test.ts
================================================
import { test, expect, describe, beforeEach, mock } from 'bun:test';
import {
UsageMeter,
CompositeUsageMeter,
BudgetDepletedError,
estimateTokenCount,
} from './tracker.js';
import type { PricingTable } from './types.js';
// ── Shared pricing for predictable cost calculations ──
const TEST_PRICING: PricingTable = {
'gpt-4o': { inputCostPerMillion: 2.5, outputCostPerMillion: 10.0 },
'gpt-4o-mini': { inputCostPerMillion: 0.15, outputCostPerMillion: 0.6 },
'claude-3-5-sonnet': { inputCostPerMillion: 3.0, outputCostPerMillion: 15.0 },
};
// ── UsageMeter ──
describe('UsageMeter', () => {
let tracker: UsageMeter;
beforeEach(() => {
tracker = new UsageMeter('gpt-4o', TEST_PRICING);
});
describe('record and getTotalUsage', () => {
test('records token usage and returns totals', () => {
tracker.record(100, 50);
const usage = tracker.getTotalUsage();
expect(usage.inputTokens).toBe(100);
expect(usage.outputTokens).toBe(50);
expect(usage.totalTokens).toBe(150);
});
test('accumulates across multiple records', () => {
tracker.record(100, 50);
tracker.record(200, 100);
tracker.record(300, 150);
const usage = tracker.getTotalUsage();
expect(usage.inputTokens).toBe(600);
expect(usage.outputTokens).toBe(300);
expect(usage.totalTokens).toBe(900);
});
test('returns a copy of usage object', () => {
tracker.record(100, 50);
const usage1 = tracker.getTotalUsage();
const usage2 = tracker.getTotalUsage();
expect(usage1).not.toBe(usage2);
expect(usage1).toEqual(usage2);
});
});
describe('getEstimatedCost', () => {
test('computes correct cost for gpt-4o', () => {
// gpt-4o: $2.50/M input, $10.00/M output
tracker.record(1_000_000, 500_000);
const cost = tracker.getEstimatedCost();
// input: 1M * 2.5/M = 2.5; output: 0.5M * 10/M = 5.0
expect(cost).toBeCloseTo(7.5, 4);
});
test('returns 0 for unknown model', () => {
const unknown = new UsageMeter('unknown-model', TEST_PRICING);
unknown.record(1000, 500);
expect(unknown.getEstimatedCost()).toBe(0);
});
test('formats cost as dollar string', () => {
tracker.record(100_000, 50_000);
const formatted = tracker.getEstimatedCostFormatted();
expect(formatted).toMatch(/^\$\d+\.\d{4}$/);
});
});
describe('getStepUsages', () => {
test('tracks per-step usage', () => {
tracker.record(100, 50);
tracker.record(200, 100);
const steps = tracker.getStepUsages();
expect(steps).toHaveLength(2);
expect(steps[0]).toEqual({ inputTokens: 100, outputTokens: 50, totalTokens: 150 });
expect(steps[1]).toEqual({ inputTokens: 200, outputTokens: 100, totalTokens: 300 });
});
test('returns a copy of step usages array', () => {
tracker.record(100, 50);
const steps1 = tracker.getStepUsages();
const steps2 = tracker.getStepUsages();
expect(steps1).not.toBe(steps2);
});
});
describe('getSummary', () => {
test('returns formatted summary string', () => {
tracker.record(1000, 500);
const summary = tracker.getSummary();
expect(summary).toContain('Model: gpt-4o');
expect(summary).toContain('Steps: 1');
expect(summary).toContain('Input tokens:');
expect(summary).toContain('Output tokens:');
expect(summary).toContain('Total tokens:');
expect(summary).toContain('Estimated cost: $');
});
});
describe('reset', () => {
test('resets all usage data', () => {
tracker.record(1000, 500);
tracker.record(2000, 1000);
tracker.reset();
const usage = tracker.getTotalUsage();
expect(usage.inputTokens).toBe(0);
expect(usage.outputTokens).toBe(0);
expect(usage.totalTokens).toBe(0);
expect(tracker.getStepUsages()).toHaveLength(0);
expect(tracker.getEstimatedCost()).toBe(0);
});
});
describe('partial model matching', () => {
test('matches model by partial ID', () => {
// "gpt-4o" pricing should match "gpt-4o-2024-08-06" via partial match
const versioned = new UsageMeter('gpt-4o-2024-08-06', TEST_PRICING);
versioned.record(1_000_000, 0);
// Should find gpt-4o pricing ($2.50/M input)
expect(versioned.getEstimatedCost()).toBeCloseTo(2.5, 4);
});
});
});
// ── CompositeUsageMeter ──
describe('CompositeUsageMeter', () => {
let multiTracker: CompositeUsageMeter;
beforeEach(() => {
multiTracker = new CompositeUsageMeter(TEST_PRICING);
});
describe('record and getTotalUsage', () => {
test('records usage for a single model', () => {
multiTracker.record({
modelId: 'gpt-4o',
role: 'main',
inputTokens: 1000,
outputTokens: 500,
});
const usage = multiTracker.getTotalUsage();
expect(usage.inputTokens).toBe(1000);
expect(usage.outputTokens).toBe(500);
expect(usage.totalTokens).toBe(1500);
});
test('aggregates across multiple models', () => {
multiTracker.record({
modelId: 'gpt-4o',
role: 'main',
inputTokens: 1000,
outputTokens: 500,
});
multiTracker.record({
modelId: 'gpt-4o-mini',
role: 'extraction',
inputTokens: 2000,
outputTokens: 800,
});
const usage = multiTracker.getTotalUsage();
expect(usage.inputTokens).toBe(3000);
expect(usage.outputTokens).toBe(1300);
expect(usage.totalTokens).toBe(4300);
});
test('returns estimated cost for the recorded call', () => {
const cost = multiTracker.record({
modelId: 'gpt-4o',
role: 'main',
inputTokens: 1_000_000,
outputTokens: 0,
});
// gpt-4o: $2.50/M input
expect(cost).toBeCloseTo(2.5, 4);
});
});
describe('getTotalCost', () => {
test('sums costs across all models', () => {
multiTracker.record({
modelId: 'gpt-4o',
role: 'main',
inputTokens: 1_000_000,
outputTokens: 0,
});
multiTracker.record({
modelId: 'gpt-4o-mini',
role: 'extraction',
inputTokens: 1_000_000,
outputTokens: 0,
});
const totalCost = multiTracker.getTotalCost();
// gpt-4o: $2.50; gpt-4o-mini: $0.15
expect(totalCost).toBeCloseTo(2.65, 4);
});
test('formats total cost', () => {
multiTracker.record({
modelId: 'gpt-4o',
role: 'main',
inputTokens: 100_000,
outputTokens: 50_000,
});
const formatted = multiTracker.getTotalCostFormatted();
expect(formatted).toMatch(/^\$\d+\.\d{4}$/);
});
});
describe('getTracker', () => {
test('returns per-model tracker', () => {
multiTracker.record({
modelId: 'gpt-4o',
role: 'main',
inputTokens: 500,
outputTokens: 200,
});
const tracker = multiTracker.getTracker('gpt-4o');
expect(tracker.getTotalUsage().inputTokens).toBe(500);
});
test('creates tracker on first access', () => {
const tracker = multiTracker.getTracker('claude-3-5-sonnet');
expect(tracker).toBeDefined();
expect(tracker.getTotalUsage().totalTokens).toBe(0);
});
});
describe('budget alerts', () => {
test('fires threshold callback when cost crosses threshold', () => {
const thresholdCrossed = mock(() => {});
multiTracker.setBudget({
maxCostUsd: 1.0,
thresholds: [0.5, 0.8, 1.0],
onThresholdCrossed: thresholdCrossed,
});
// Record enough to cross 0.5 threshold ($0.50)
// gpt-4o: $2.50/M input -> need 200k tokens for $0.50
multiTracker.record({
modelId: 'gpt-4o',
role: 'main',
inputTokens: 200_000,
outputTokens: 0,
});
expect(thresholdCrossed).toHaveBeenCalledTimes(1);
const call = (thresholdCrossed as any).mock.calls[0];
expect(call[1]).toBe(0.5); // threshold
expect(call[2]).toBe(1.0); // maxCost
});
test('fires multiple thresholds as cost increases', () => {
const thresholdCrossed = mock(() => {});
multiTracker.setBudget({
maxCostUsd: 1.0,
thresholds: [0.5, 1.0],
onThresholdCrossed: thresholdCrossed,
});
// Cross 0.5 threshold
multiTracker.record({
modelId: 'gpt-4o',
role: 'main',
inputTokens: 200_000,
outputTokens: 0,
});
// Cross 1.0 threshold
multiTracker.record({
modelId: 'gpt-4o',
role: 'main',
inputTokens: 200_000,
outputTokens: 0,
});
expect(thresholdCrossed).toHaveBeenCalledTimes(2);
});
test('does not fire same threshold twice', () => {
const thresholdCrossed = mock(() => {});
multiTracker.setBudget({
maxCostUsd: 1.0,
thresholds: [0.5],
onThresholdCrossed: thresholdCrossed,
});
// Cross 0.5 threshold twice
multiTracker.record({
modelId: 'gpt-4o',
role: 'main',
inputTokens: 200_000,
outputTokens: 0,
});
multiTracker.record({
modelId: 'gpt-4o',
role: 'main',
inputTokens: 10_000,
outputTokens: 0,
});
expect(thresholdCrossed).toHaveBeenCalledTimes(1);
});
test('throws BudgetDepletedError when budget exceeded and callback returns false', () => {
multiTracker.setBudget({
maxCostUsd: 0.01,
thresholds: [1.0],
onThresholdCrossed: () => {},
onBudgetExhausted: () => false,
});
expect(() =>
multiTracker.record({
modelId: 'gpt-4o',
role: 'main',
inputTokens: 1_000_000,
outputTokens: 0,
}),
).toThrow(BudgetDepletedError);
});
test('allows continuing when onBudgetExhausted returns true', () => {
multiTracker.setBudget({
maxCostUsd: 0.01,
thresholds: [1.0],
onThresholdCrossed: () => {},
onBudgetExhausted: () => true,
});
expect(() =>
multiTracker.record({
modelId: 'gpt-4o',
role: 'main',
inputTokens: 1_000_000,
outputTokens: 0,
}),
).not.toThrow();
});
test('getBudgetState reflects current state', () => {
multiTracker.setBudget({
maxCostUsd: 10.0,
thresholds: [0.5],
onThresholdCrossed: () => {},
});
multiTracker.record({
modelId: 'gpt-4o',
role: 'main',
inputTokens: 1_000_000,
outputTokens: 0,
});
const status = multiTracker.getBudgetState();
expect(status.maxCostUsd).toBe(10.0);
expect(status.currentCostUsd).toBeCloseTo(2.5, 2);
expect(status.fractionUsed).toBeCloseTo(0.25, 2);
expect(status.isExhausted).toBe(false);
});
test('clearBudget removes budget configuration', () => {
multiTracker.setBudget({
maxCostUsd: 1.0,
thresholds: [0.5],
onThresholdCrossed: () => {},
});
multiTracker.clearBudget();
const status = multiTracker.getBudgetState();
expect(status.maxCostUsd).toBeUndefined();
expect(status.fractionUsed).toBeUndefined();
expect(status.isExhausted).toBe(false);
});
});
describe('MeteringSummary generation', () => {
test('generates comprehensive summary', () => {
multiTracker.record({
modelId: 'gpt-4o',
role: 'main',
inputTokens: 1000,
outputTokens: 500,
stepIndex: 0,
actionName: 'tap',
});
multiTracker.record({
modelId: 'gpt-4o-mini',
role: 'extraction',
inputTokens: 2000,
outputTokens: 300,
stepIndex: 1,
actionName: 'extract',
});
const summary = multiTracker.getSummary();
expect(summary.totalInputTokens).toBe(3000);
expect(summary.totalOutputTokens).toBe(800);
expect(summary.totalTokens).toBe(3800);
expect(summary.totalCalls).toBe(2);
expect(summary.totalEstimatedCost).toBeGreaterThan(0);
// By model breakdown
expect(summary.byModel).toHaveLength(2);
const gpt4o = summary.byModel.find((m) => m.modelId === 'gpt-4o');
expect(gpt4o).toBeDefined();
expect(gpt4o!.inputTokens).toBe(1000);
expect(gpt4o!.callCount).toBe(1);
// By role breakdown
expect(summary.byRole).toHaveLength(2);
const mainRole = summary.byRole.find((r) => r.role === 'main');
expect(mainRole).toBeDefined();
expect(mainRole!.callCount).toBe(1);
// Action trace
expect(summary.actionTrace).toHaveLength(2);
expect(summary.actionTrace[0].actionName).toBe('tap');
expect(summary.actionTrace[1].actionName).toBe('extract');
// Duration
expect(summary.durationMs).toBeDefined();
expect(summary.durationMs!).toBeGreaterThanOrEqual(0);
});
test('generates human-readable summary text', () => {
multiTracker.record({
modelId: 'gpt-4o',
role: 'main',
inputTokens: 10000,
outputTokens: 5000,
});
const text = multiTracker.getSummaryText();
expect(text).toContain('Token Usage Summary');
expect(text).toContain('Total:');
expect(text).toContain('Cost:');
expect(text).toContain('Calls:');
expect(text).toContain('Duration:');
expect(text).toContain('By Role');
expect(text).toContain('By Model');
});
test('includes budget info in summary text when configured', () => {
multiTracker.setBudget({
maxCostUsd: 5.0,
thresholds: [],
onThresholdCrossed: () => {},
});
multiTracker.record({
modelId: 'gpt-4o',
role: 'main',
inputTokens: 100_000,
outputTokens: 0,
});
const text = multiTracker.getSummaryText();
expect(text).toContain('Budget:');
expect(text).toContain('$5.0000');
});
});
describe('reset', () => {
test('clears all tracking data', () => {
multiTracker.record({
modelId: 'gpt-4o',
role: 'main',
inputTokens: 1000,
outputTokens: 500,
});
multiTracker.record({
modelId: 'gpt-4o-mini',
role: 'extraction',
inputTokens: 500,
outputTokens: 200,
});
multiTracker.reset();
const usage = multiTracker.getTotalUsage();
expect(usage.totalTokens).toBe(0);
expect(multiTracker.getTotalCost()).toBe(0);
const summary = multiTracker.getSummary();
expect(summary.totalCalls).toBe(0);
expect(summary.byModel).toHaveLength(0);
expect(summary.byRole).toHaveLength(0);
expect(summary.durationMs).toBeUndefined();
});
test('resets budget thresholds', () => {
const thresholdCrossed = mock(() => {});
multiTracker.setBudget({
maxCostUsd: 1.0,
thresholds: [0.5],
onThresholdCrossed: thresholdCrossed,
});
// Cross 0.5 threshold
multiTracker.record({
modelId: 'gpt-4o',
role: 'main',
inputTokens: 200_000,
outputTokens: 0,
});
multiTracker.reset();
// Record again -- should fire threshold callback again since it was reset
// But reset() clears crossedThresholds AND trackers, so cost starts at 0
multiTracker.record({
modelId: 'gpt-4o',
role: 'main',
inputTokens: 200_000,
outputTokens: 0,
});
// Both before and after reset should have fired
expect(thresholdCrossed).toHaveBeenCalledTimes(2);
});
});
describe('auto-start', () => {
test('automatically starts timer on first record', () => {
const summary1 = multiTracker.getSummary();
expect(summary1.durationMs).toBeUndefined();
multiTracker.record({
modelId: 'gpt-4o',
role: 'main',
inputTokens: 100,
outputTokens: 50,
});
const summary2 = multiTracker.getSummary();
expect(summary2.durationMs).toBeDefined();
});
test('explicit start() sets the timer', () => {
multiTracker.start();
const summary = multiTracker.getSummary();
expect(summary.durationMs).toBeDefined();
expect(summary.durationMs!).toBeGreaterThanOrEqual(0);
});
});
});
// ── estimateTokenCount ──
describe('estimateTokenCount', () => {
test('estimates roughly 1 token per 4 chars', () => {
expect(estimateTokenCount('hello world')).toBe(3); // ceil(11/4)
});
test('returns 0 for empty string', () => {
expect(estimateTokenCount('')).toBe(0);
});
test('rounds up', () => {
expect(estimateTokenCount('a')).toBe(1); // ceil(1/4) = 1
});
});
// ── BudgetDepletedError ──
describe('BudgetDepletedError', () => {
test('has correct properties', () => {
const error = new BudgetDepletedError(5.5, 5.0);
expect(error.name).toBe('BudgetDepletedError');
expect(error.currentCost).toBe(5.5);
expect(error.maxCost).toBe(5.0);
expect(error.message).toContain('$5.5000');
expect(error.message).toContain('$5.0000');
});
test('is instanceof Error', () => {
const error = new BudgetDepletedError(1, 1);
expect(error instanceof Error).toBe(true);
});
});
================================================
FILE: packages/core/src/metering/tracker.ts
================================================
import type {
UsageRecord,
CostRates,
PricingTable,
ModelRole,
ActionUsageRecord,
MeteringSummary,
ModelUsageBreakdown,
RoleUsageBreakdown,
BudgetPolicy,
BudgetState,
} from './types.js';
import { DEFAULT_COST_RATES } from './types.js';
// ── Single-model tracker (unchanged public API) ──
export class UsageMeter {
private usage: UsageRecord = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
private pricing: PricingTable;
private modelId: string;
private stepUsages: UsageRecord[] = [];
constructor(modelId: string, customPricing?: PricingTable) {
this.modelId = modelId;
this.pricing = customPricing ?? DEFAULT_COST_RATES;
}
record(inputTokens: number, outputTokens: number): void {
const stepUsage: UsageRecord = {
inputTokens,
outputTokens,
totalTokens: inputTokens + outputTokens,
};
this.usage.inputTokens += inputTokens;
this.usage.outputTokens += outputTokens;
this.usage.totalTokens += inputTokens + outputTokens;
this.stepUsages.push(stepUsage);
}
getTotalUsage(): UsageRecord {
return { ...this.usage };
}
getStepUsages(): UsageRecord[] {
return [...this.stepUsages];
}
getEstimatedCost(): number {
const cost = this.getModelCost();
if (!cost) return 0;
return (
(this.usage.inputTokens / 1_000_000) * cost.inputCostPerMillion +
(this.usage.outputTokens / 1_000_000) * cost.outputCostPerMillion
);
}
getEstimatedCostFormatted(): string {
const cost = this.getEstimatedCost();
return `$${cost.toFixed(4)}`;
}
private getModelCost(): CostRates | undefined {
return resolveModelCost(this.modelId, this.pricing);
}
getSummary(): string {
const lines = [
`Model: ${this.modelId}`,
`Steps: ${this.stepUsages.length}`,
`Input tokens: ${this.usage.inputTokens.toLocaleString()}`,
`Output tokens: ${this.usage.outputTokens.toLocaleString()}`,
`Total tokens: ${this.usage.totalTokens.toLocaleString()}`,
`Estimated cost: ${this.getEstimatedCostFormatted()}`,
];
return lines.join('\n');
}
reset(): void {
this.usage = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
this.stepUsages = [];
}
}
// ── Multi-model tracker ──
/**
* Tracks token usage across multiple LLM roles (main, extraction, judge, compaction)
* with per-action cost breakdown, budget alerts, and comprehensive summaries.
*/
export class CompositeUsageMeter {
private readonly pricing: PricingTable;
private readonly trackers = new Map();
private readonly actionTrace: ActionUsageRecord[] = [];
private budgetConfig: BudgetPolicy | undefined;
private crossedThresholds = new Set();
private startTime: number | undefined;
constructor(customPricing?: PricingTable) {
this.pricing = customPricing ?? DEFAULT_COST_RATES;
}
/** Start the session timer. Called automatically on first record if not called explicitly. */
start(): void {
this.startTime = Date.now();
}
/**
* Configure budget alerts. Thresholds default to [0.5, 0.8, 1.0].
* Returns this for chaining.
*/
setBudget(config: BudgetPolicy): this {
this.budgetConfig = {
...config,
thresholds: config.thresholds ?? [0.5, 0.8, 1.0],
};
this.crossedThresholds.clear();
return this;
}
/** Clear the budget configuration. */
clearBudget(): void {
this.budgetConfig = undefined;
this.crossedThresholds.clear();
}
/**
* Record token usage for a specific model and role.
* Returns the estimated cost for this single call.
* Throws if budget is exhausted and onBudgetExhausted returns false.
*/
record(opts: {
modelId: string;
role: ModelRole;
inputTokens: number;
outputTokens: number;
stepIndex?: number;
actionName?: string;
}): number {
if (!this.startTime) this.start();
// Get or create per-model tracker
const tracker = this.getOrCreateTracker(opts.modelId);
tracker.record(opts.inputTokens, opts.outputTokens);
// Compute cost for this call
const cost = computeCost(opts.inputTokens, opts.outputTokens, opts.modelId, this.pricing);
// Append to action trace
const entry: ActionUsageRecord = {
stepIndex: opts.stepIndex ?? this.actionTrace.length,
actionName: opts.actionName ?? 'unknown',
role: opts.role,
modelId: opts.modelId,
usage: {
inputTokens: opts.inputTokens,
outputTokens: opts.outputTokens,
totalTokens: opts.inputTokens + opts.outputTokens,
},
cost,
timestamp: Date.now(),
};
this.actionTrace.push(entry);
// Check budget thresholds
this.checkBudget();
return cost;
}
/** Get the per-model UsageMeter (creates one if missing). */
getTracker(modelId: string): UsageMeter {
return this.getOrCreateTracker(modelId);
}
/** Total estimated cost across all models. */
getTotalCost(): number {
let total = 0;
for (const tracker of this.trackers.values()) {
total += tracker.getEstimatedCost();
}
return total;
}
/** Formatted total cost string. */
getTotalCostFormatted(): string {
return `$${this.getTotalCost().toFixed(4)}`;
}
/** Aggregate token usage across all models. */
getTotalUsage(): UsageRecord {
let inputTokens = 0;
let outputTokens = 0;
for (const tracker of this.trackers.values()) {
const u = tracker.getTotalUsage();
inputTokens += u.inputTokens;
outputTokens += u.outputTokens;
}
return { inputTokens, outputTokens, totalTokens: inputTokens + outputTokens };
}
/** Get the current budget status. */
getBudgetState(): BudgetState {
const currentCost = this.getTotalCost();
const maxCost = this.budgetConfig?.maxCostUsd;
return {
currentCostUsd: currentCost,
maxCostUsd: maxCost,
fractionUsed: maxCost != null ? currentCost / maxCost : undefined,
isExhausted: maxCost != null ? currentCost >= maxCost : false,
crossedThresholds: [...this.crossedThresholds].sort((a, b) => a - b),
};
}
/** Build a full MeteringSummary with per-model and per-role breakdowns. */
getSummary(): MeteringSummary {
const totalUsage = this.getTotalUsage();
return {
totalInputTokens: totalUsage.inputTokens,
totalOutputTokens: totalUsage.outputTokens,
totalTokens: totalUsage.totalTokens,
totalEstimatedCost: this.getTotalCost(),
totalCalls: this.actionTrace.length,
byModel: this.buildModelBreakdown(),
byRole: this.buildRoleBreakdown(),
actionTrace: [...this.actionTrace],
durationMs: this.startTime ? Date.now() - this.startTime : undefined,
};
}
/** Human-readable summary string. */
getSummaryText(): string {
const s = this.getSummary();
const lines: string[] = [
'=== Token Usage Summary ===',
`Total: ${s.totalTokens.toLocaleString()} tokens (${s.totalInputTokens.toLocaleString()} in / ${s.totalOutputTokens.toLocaleString()} out)`,
`Cost: $${s.totalEstimatedCost.toFixed(4)}`,
`Calls: ${s.totalCalls}`,
];
if (s.durationMs != null) {
lines.push(`Duration: ${(s.durationMs / 1000).toFixed(1)}s`);
}
if (s.byRole.length > 0) {
lines.push('', '--- By Role ---');
for (const r of s.byRole) {
lines.push(
` ${r.role}: ${r.totalTokens.toLocaleString()} tokens, $${r.estimatedCost.toFixed(4)} (${r.callCount} calls)`,
);
}
}
if (s.byModel.length > 0) {
lines.push('', '--- By Model ---');
for (const m of s.byModel) {
lines.push(
` ${m.modelId}: ${m.totalTokens.toLocaleString()} tokens, $${m.estimatedCost.toFixed(4)} (${m.callCount} calls)`,
);
}
}
const budget = this.getBudgetState();
if (budget.maxCostUsd != null) {
const pct = ((budget.fractionUsed ?? 0) * 100).toFixed(1);
lines.push(
'',
`Budget: $${budget.currentCostUsd.toFixed(4)} / $${budget.maxCostUsd.toFixed(4)} (${pct}%)`,
);
}
return lines.join('\n');
}
/** Reset all tracking data. */
reset(): void {
for (const tracker of this.trackers.values()) {
tracker.reset();
}
this.trackers.clear();
this.actionTrace.length = 0;
this.crossedThresholds.clear();
this.startTime = undefined;
}
// ── Private helpers ──
private getOrCreateTracker(modelId: string): UsageMeter {
let tracker = this.trackers.get(modelId);
if (!tracker) {
tracker = new UsageMeter(modelId, this.pricing);
this.trackers.set(modelId, tracker);
}
return tracker;
}
private checkBudget(): void {
if (!this.budgetConfig) return;
const currentCost = this.getTotalCost();
const { maxCostUsd, thresholds, onThresholdCrossed, onBudgetExhausted } = this.budgetConfig;
// Check each threshold
for (const threshold of thresholds ?? []) {
if (this.crossedThresholds.has(threshold)) continue;
const thresholdCost = maxCostUsd * threshold;
if (currentCost >= thresholdCost) {
this.crossedThresholds.add(threshold);
onThresholdCrossed(currentCost, threshold, maxCostUsd);
}
}
// Check full exhaustion
if (currentCost >= maxCostUsd) {
if (onBudgetExhausted) {
const allow = onBudgetExhausted(currentCost, maxCostUsd);
if (!allow) {
throw new BudgetDepletedError(currentCost, maxCostUsd);
}
}
}
}
private buildModelBreakdown(): ModelUsageBreakdown[] {
const map = new Map();
for (const entry of this.actionTrace) {
let mb = map.get(entry.modelId);
if (!mb) {
mb = {
modelId: entry.modelId,
inputTokens: 0,
outputTokens: 0,
totalTokens: 0,
estimatedCost: 0,
callCount: 0,
};
map.set(entry.modelId, mb);
}
mb.inputTokens += entry.usage.inputTokens;
mb.outputTokens += entry.usage.outputTokens;
mb.totalTokens += entry.usage.totalTokens;
mb.estimatedCost += entry.cost;
mb.callCount++;
}
return [...map.values()].sort((a, b) => b.estimatedCost - a.estimatedCost);
}
private buildRoleBreakdown(): RoleUsageBreakdown[] {
const map = new Map();
for (const entry of this.actionTrace) {
let rb = map.get(entry.role);
if (!rb) {
rb = {
role: entry.role,
inputTokens: 0,
outputTokens: 0,
totalTokens: 0,
estimatedCost: 0,
callCount: 0,
};
map.set(entry.role, rb);
}
rb.inputTokens += entry.usage.inputTokens;
rb.outputTokens += entry.usage.outputTokens;
rb.totalTokens += entry.usage.totalTokens;
rb.estimatedCost += entry.cost;
rb.callCount++;
}
return [...map.values()].sort((a, b) => b.estimatedCost - a.estimatedCost);
}
}
// ── Budget error ──
export class BudgetDepletedError extends Error {
readonly currentCost: number;
readonly maxCost: number;
constructor(currentCost: number, maxCost: number) {
super(
`Token budget exhausted: $${currentCost.toFixed(4)} spent, limit is $${maxCost.toFixed(4)}`,
);
this.name = 'BudgetDepletedError';
this.currentCost = currentCost;
this.maxCost = maxCost;
}
}
// ── Shared utilities ──
export function estimateTokenCount(text: string): number {
return Math.ceil(text.length / 4);
}
/** Resolve pricing for a model ID with exact-match then partial-match fallback. */
function resolveModelCost(modelId: string, pricing: PricingTable): CostRates | undefined {
if (pricing[modelId]) return pricing[modelId];
for (const [key, value] of Object.entries(pricing)) {
if (modelId.includes(key) || key.includes(modelId)) {
return value;
}
}
return undefined;
}
/** Compute cost in USD for a single call. */
function computeCost(
inputTokens: number,
outputTokens: number,
modelId: string,
pricing: PricingTable,
): number {
const cost = resolveModelCost(modelId, pricing);
if (!cost) return 0;
return (
(inputTokens / 1_000_000) * cost.inputCostPerMillion +
(outputTokens / 1_000_000) * cost.outputCostPerMillion
);
}
================================================
FILE: packages/core/src/metering/types.ts
================================================
export interface UsageRecord {
inputTokens: number;
outputTokens: number;
totalTokens: number;
}
export interface CostRates {
inputCostPerMillion: number;
outputCostPerMillion: number;
}
export interface PricingTable {
[modelId: string]: CostRates;
}
/**
* Role that a model can serve in the agent pipeline.
* - main: primary reasoning / action-selection model
* - extraction: lightweight model for page content extraction
* - judge: evaluates task completion
* - compaction: summarizes / compresses conversation history
*/
export type ModelRole = 'main' | 'extraction' | 'judge' | 'compaction';
/** Token usage attributed to a single agent action (step). */
export interface ActionUsageRecord {
stepIndex: number;
actionName: string;
role: ModelRole;
modelId: string;
usage: UsageRecord;
cost: number;
timestamp: number;
}
/** Per-model aggregated usage. */
export interface ModelUsageBreakdown {
modelId: string;
inputTokens: number;
outputTokens: number;
totalTokens: number;
estimatedCost: number;
callCount: number;
}
/** Per-role aggregated usage. */
export interface RoleUsageBreakdown {
role: ModelRole;
inputTokens: number;
outputTokens: number;
totalTokens: number;
estimatedCost: number;
callCount: number;
}
/** Comprehensive usage summary across all models and roles. */
export interface MeteringSummary {
/** Aggregate across everything. */
totalInputTokens: number;
totalOutputTokens: number;
totalTokens: number;
totalEstimatedCost: number;
totalCalls: number;
/** Breakdown by model ID. */
byModel: ModelUsageBreakdown[];
/** Breakdown by role. */
byRole: RoleUsageBreakdown[];
/** Per-action cost trace (chronological). */
actionTrace: ActionUsageRecord[];
/** Wall-clock duration of the tracked session in ms (if available). */
durationMs?: number;
}
/** Configuration for budget alerts. */
export interface BudgetPolicy {
/** Maximum allowed cost in USD. */
maxCostUsd: number;
/**
* Warning thresholds as fractions of maxCostUsd (e.g. [0.5, 0.8, 1.0]).
* Callbacks fire when cost first crosses each threshold.
*/
thresholds?: number[];
/** Called each time a threshold is crossed. */
onThresholdCrossed: (currentCost: number, threshold: number, maxCost: number) => void;
/** Called when the budget is fully exhausted. Return true to allow continuing. */
onBudgetExhausted?: (currentCost: number, maxCost: number) => boolean;
}
/** Status of budget consumption. */
export interface BudgetState {
currentCostUsd: number;
maxCostUsd: number | undefined;
/** Fraction 0..1+ of budget consumed. undefined if no budget set. */
fractionUsed: number | undefined;
isExhausted: boolean;
crossedThresholds: number[];
}
// ── Comprehensive default pricing ──
export const DEFAULT_COST_RATES: PricingTable = {
// OpenAI
'gpt-4o': { inputCostPerMillion: 2.5, outputCostPerMillion: 10.0 },
'gpt-4o-mini': { inputCostPerMillion: 0.15, outputCostPerMillion: 0.6 },
'gpt-4-turbo': { inputCostPerMillion: 10.0, outputCostPerMillion: 30.0 },
'gpt-4.5-preview': { inputCostPerMillion: 75.0, outputCostPerMillion: 150.0 },
'o1': { inputCostPerMillion: 15.0, outputCostPerMillion: 60.0 },
'o1-mini': { inputCostPerMillion: 3.0, outputCostPerMillion: 12.0 },
'o1-preview': { inputCostPerMillion: 15.0, outputCostPerMillion: 60.0 },
'o3-mini': { inputCostPerMillion: 1.1, outputCostPerMillion: 4.4 },
// Anthropic
'claude-3-5-sonnet': { inputCostPerMillion: 3.0, outputCostPerMillion: 15.0 },
'claude-3-5-haiku': { inputCostPerMillion: 0.8, outputCostPerMillion: 4.0 },
'claude-3-opus': { inputCostPerMillion: 15.0, outputCostPerMillion: 75.0 },
'claude-3-haiku': { inputCostPerMillion: 0.25, outputCostPerMillion: 1.25 },
'claude-4-sonnet': { inputCostPerMillion: 3.0, outputCostPerMillion: 15.0 },
'claude-4-opus': { inputCostPerMillion: 15.0, outputCostPerMillion: 75.0 },
// Google
'gemini-1.5-pro': { inputCostPerMillion: 1.25, outputCostPerMillion: 5.0 },
'gemini-1.5-flash': { inputCostPerMillion: 0.075, outputCostPerMillion: 0.3 },
'gemini-2.0-flash': { inputCostPerMillion: 0.1, outputCostPerMillion: 0.4 },
'gemini-2.0-pro': { inputCostPerMillion: 1.25, outputCostPerMillion: 5.0 },
'gemini-2.5-pro': { inputCostPerMillion: 1.25, outputCostPerMillion: 10.0 },
'gemini-2.5-flash': { inputCostPerMillion: 0.15, outputCostPerMillion: 0.6 },
// Mistral
'mistral-large': { inputCostPerMillion: 2.0, outputCostPerMillion: 6.0 },
'mistral-small': { inputCostPerMillion: 0.2, outputCostPerMillion: 0.6 },
'codestral': { inputCostPerMillion: 0.3, outputCostPerMillion: 0.9 },
// DeepSeek
'deepseek-chat': { inputCostPerMillion: 0.14, outputCostPerMillion: 0.28 },
'deepseek-reasoner': { inputCostPerMillion: 0.55, outputCostPerMillion: 2.19 },
};
================================================
FILE: packages/core/src/model/adapters/vercel.ts
================================================
import { generateObject, type CoreMessage, type CoreUserMessage } from 'ai';
import type { LanguageModelV1 } from 'ai';
import type { ZodType } from 'zod';
import type { LanguageModel, InferenceOptions, ModelProvider } from '../interface.js';
import type { InferenceResult, InferenceUsage } from '../types.js';
import type { Message, ContentPart } from '../messages.js';
import { ModelError, ModelThrottledError } from '../../errors.js';
export interface VercelModelAdapterOptions {
model: LanguageModelV1;
/** Override provider detection (otherwise inferred from model.provider or modelId). */
provider?: ModelProvider;
temperature?: number;
maxTokens?: number;
maxRetries?: number;
}
export class VercelModelAdapter implements LanguageModel {
private readonly model: LanguageModelV1;
private readonly defaultTemperature: number;
private readonly defaultMaxTokens: number;
private readonly maxRetries: number;
private readonly _provider: ModelProvider;
constructor(options: VercelModelAdapterOptions) {
this.model = options.model;
this.defaultTemperature = options.temperature ?? 0;
this.defaultMaxTokens = options.maxTokens ?? 4096;
this.maxRetries = options.maxRetries ?? 3;
this._provider = options.provider ?? inferProvider(this.model.modelId, this.model.provider);
}
get modelId(): string {
return this.model.modelId;
}
get provider(): ModelProvider {
return this._provider;
}
async invoke(options: InferenceOptions): Promise> {
const messages = this.convertMessages(options.messages);
try {
const result = await generateObject({
model: this.model,
schema: options.responseSchema as ZodType,
schemaName: options.schemaName ?? 'AgentDecision',
schemaDescription: options.schemaDescription,
messages,
temperature: options.temperature ?? this.defaultTemperature,
maxTokens: options.maxTokens ?? this.defaultMaxTokens,
maxRetries: this.maxRetries,
});
const usage: InferenceUsage = {
inputTokens: result.usage?.promptTokens ?? 0,
outputTokens: result.usage?.completionTokens ?? 0,
totalTokens:
(result.usage?.promptTokens ?? 0) + (result.usage?.completionTokens ?? 0),
};
return {
parsed: result.object,
usage,
finishReason: mapFinishReason(result.finishReason),
};
} catch (error: any) {
if (error?.statusCode === 429 || error?.message?.includes('rate limit')) {
const retryAfter = error?.headers?.['retry-after'];
throw new ModelThrottledError(
error.message ?? 'Rate limited',
retryAfter ? Number.parseInt(retryAfter) * 1000 : undefined,
);
}
throw new ModelError(
`LLM invocation failed: ${error?.message ?? String(error)}`,
{ cause: error },
);
}
}
private convertMessages(messages: Message[]): CoreMessage[] {
return messages.map((msg): CoreMessage => {
switch (msg.role) {
case 'system':
return { role: 'system', content: msg.content };
case 'user': {
if (typeof msg.content === 'string') {
return { role: 'user', content: msg.content };
}
return {
role: 'user',
content: msg.content.map((part) => this.convertContentPart(part)),
} as CoreUserMessage;
}
case 'assistant': {
const content = typeof msg.content === 'string'
? msg.content
: msg.content.map((part) => {
if (part.type === 'text') return { type: 'text' as const, text: part.text };
return { type: 'text' as const, text: '[image]' };
});
return { role: 'assistant', content };
}
case 'tool':
return {
role: 'user',
content: `[Tool Result (${msg.toolCallId})]: ${msg.content}`,
};
}
});
}
private convertContentPart(
part: ContentPart,
): { type: 'text'; text: string } | { type: 'image'; image: string | URL } {
switch (part.type) {
case 'text':
return { type: 'text', text: part.text };
case 'image':
if (part.source.type === 'base64') {
return {
type: 'image',
image: part.source.data,
};
}
return {
type: 'image',
image: new URL(part.source.url),
};
}
}
}
function mapFinishReason(
reason: string,
): 'stop' | 'length' | 'content-filter' | 'tool-calls' | 'error' | 'other' {
switch (reason) {
case 'stop':
return 'stop';
case 'length':
return 'length';
case 'content-filter':
return 'content-filter';
case 'tool-calls':
return 'tool-calls';
case 'error':
return 'error';
default:
return 'other';
}
}
const PROVIDER_PATTERNS: Array<[RegExp, ModelProvider]> = [
[/anthropic|claude/i, 'anthropic'],
[/openai|gpt|o1|o3/i, 'openai'],
[/google|gemini/i, 'google'],
[/mistral/i, 'mistral'],
[/deepseek/i, 'deepseek'],
[/groq/i, 'groq'],
[/fireworks/i, 'fireworks'],
[/together/i, 'together'],
];
function inferProvider(modelId: string, providerHint?: string): ModelProvider {
const combined = `${providerHint ?? ''} ${modelId}`;
for (const [pattern, provider] of PROVIDER_PATTERNS) {
if (pattern.test(combined)) return provider;
}
return 'custom';
}
================================================
FILE: packages/core/src/model/index.ts
================================================
export { type LanguageModel, type InferenceOptions, type ModelProvider } from './interface.js';
export { type InferenceResult, type InferenceUsage } from './types.js';
export {
type Message,
type SystemMessage,
type UserMessage,
type AssistantMessage,
type ToolResultMessage,
type ToolCall,
type ContentPart,
type TextContent,
type ImageContent,
systemMessage,
userMessage,
assistantMessage,
toolResultMessage,
textContent,
imageContent,
} from './messages.js';
export { VercelModelAdapter, type VercelModelAdapterOptions } from './adapters/vercel.js';
export {
zodToJsonSchema,
optimizeSchemaForModel,
optimizeJsonSchemaForModel,
type SchemaOptimizationOptions,
} from './schema-optimizer.js';
================================================
FILE: packages/core/src/model/interface.ts
================================================
import type { ZodType } from 'zod';
import type { Message } from './messages.js';
import type { InferenceResult } from './types.js';
/** Known LLM provider identifiers. */
export type ModelProvider =
| 'anthropic'
| 'openai'
| 'google'
| 'mistral'
| 'deepseek'
| 'groq'
| 'fireworks'
| 'together'
| 'custom';
export interface InferenceOptions {
messages: Message[];
responseSchema: ZodType