Repository: vercel-labs/agent-browser Branch: main Commit: 9837b9c1aaba Files: 208 Total size: 3.3 MB Directory structure: gitextract_aeoh496c/ ├── .changeset/ │ ├── README.md │ └── config.json ├── .claude-plugin/ │ └── marketplace.json ├── .github/ │ └── workflows/ │ ├── ci.yml │ └── release.yml ├── .gitignore ├── .husky/ │ └── pre-commit ├── .prettierrc ├── AGENTS.md ├── CHANGELOG.md ├── LICENSE ├── README.md ├── benchmarks/ │ ├── .gitignore │ ├── README.md │ ├── bench.ts │ ├── package.json │ ├── scenarios.ts │ └── tsconfig.json ├── bin/ │ └── agent-browser.js ├── cli/ │ ├── Cargo.toml │ ├── build.rs │ ├── cdp-protocol/ │ │ ├── browser_protocol.json │ │ └── js_protocol.json │ └── src/ │ ├── color.rs │ ├── commands.rs │ ├── connection.rs │ ├── flags.rs │ ├── install.rs │ ├── main.rs │ ├── native/ │ │ ├── actions.rs │ │ ├── auth.rs │ │ ├── browser.rs │ │ ├── cdp/ │ │ │ ├── chrome.rs │ │ │ ├── client.rs │ │ │ ├── discovery.rs │ │ │ ├── lightpanda.rs │ │ │ ├── mod.rs │ │ │ └── types.rs │ │ ├── cookies.rs │ │ ├── daemon.rs │ │ ├── diff.rs │ │ ├── e2e_tests.rs │ │ ├── element.rs │ │ ├── inspect_server.rs │ │ ├── interaction.rs │ │ ├── mod.rs │ │ ├── network.rs │ │ ├── parity_tests.rs │ │ ├── policy.rs │ │ ├── providers.rs │ │ ├── recording.rs │ │ ├── screenshot.rs │ │ ├── snapshot.rs │ │ ├── state.rs │ │ ├── storage.rs │ │ ├── stream.rs │ │ ├── test_fixtures/ │ │ │ ├── drag_probe.html │ │ │ ├── html5_drag_probe.html │ │ │ └── pointer_capture_probe.html │ │ ├── tracing.rs │ │ └── webdriver/ │ │ ├── appium.rs │ │ ├── backend.rs │ │ ├── client.rs │ │ ├── ios.rs │ │ ├── mod.rs │ │ ├── safari.rs │ │ └── types.rs │ ├── output.rs │ ├── test_utils.rs │ ├── upgrade.rs │ └── validation.rs ├── docker/ │ ├── Dockerfile.build │ └── docker-compose.yml ├── docs/ │ ├── .gitignore │ ├── components.json │ ├── eslint.config.mjs │ ├── mdx-components.tsx │ ├── next.config.mjs │ ├── package.json │ ├── postcss.config.mjs │ ├── src/ │ │ ├── app/ │ │ │ ├── api/ │ │ │ │ ├── docs-chat/ │ │ │ │ │ └── route.ts │ │ │ │ ├── docs-markdown/ │ │ │ │ │ └── route.ts │ │ │ │ └── search/ │ │ │ │ └── route.ts │ │ │ ├── cdp-mode/ │ │ │ │ ├── layout.tsx │ │ │ │ └── page.mdx │ │ │ ├── changelog/ │ │ │ │ ├── layout.tsx │ │ │ │ └── page.mdx │ │ │ ├── commands/ │ │ │ │ ├── layout.tsx │ │ │ │ └── page.mdx │ │ │ ├── configuration/ │ │ │ │ ├── layout.tsx │ │ │ │ └── page.mdx │ │ │ ├── diffing/ │ │ │ │ ├── layout.tsx │ │ │ │ └── page.mdx │ │ │ ├── engines/ │ │ │ │ ├── chrome/ │ │ │ │ │ ├── layout.tsx │ │ │ │ │ └── page.mdx │ │ │ │ └── lightpanda/ │ │ │ │ ├── layout.tsx │ │ │ │ └── page.mdx │ │ │ ├── globals.css │ │ │ ├── installation/ │ │ │ │ ├── layout.tsx │ │ │ │ └── page.mdx │ │ │ ├── ios/ │ │ │ │ ├── layout.tsx │ │ │ │ └── page.mdx │ │ │ ├── layout.tsx │ │ │ ├── native-mode/ │ │ │ │ ├── layout.tsx │ │ │ │ └── page.mdx │ │ │ ├── next/ │ │ │ │ ├── layout.tsx │ │ │ │ └── page.mdx │ │ │ ├── og/ │ │ │ │ ├── [...slug]/ │ │ │ │ │ └── route.tsx │ │ │ │ ├── og-image.tsx │ │ │ │ └── route.tsx │ │ │ ├── page.mdx │ │ │ ├── profiler/ │ │ │ │ ├── layout.tsx │ │ │ │ └── page.mdx │ │ │ ├── providers/ │ │ │ │ ├── browser-use/ │ │ │ │ │ ├── layout.tsx │ │ │ │ │ └── page.mdx │ │ │ │ ├── browserbase/ │ │ │ │ │ ├── layout.tsx │ │ │ │ │ └── page.mdx │ │ │ │ ├── browserless/ │ │ │ │ │ ├── layout.tsx │ │ │ │ │ └── page.mdx │ │ │ │ └── kernel/ │ │ │ │ ├── layout.tsx │ │ │ │ └── page.mdx │ │ │ ├── quick-start/ │ │ │ │ ├── layout.tsx │ │ │ │ └── page.mdx │ │ │ ├── security/ │ │ │ │ ├── layout.tsx │ │ │ │ └── page.mdx │ │ │ ├── selectors/ │ │ │ │ ├── layout.tsx │ │ │ │ └── page.mdx │ │ │ ├── sessions/ │ │ │ │ ├── layout.tsx │ │ │ │ └── page.mdx │ │ │ ├── skills/ │ │ │ │ ├── layout.tsx │ │ │ │ └── page.mdx │ │ │ ├── snapshots/ │ │ │ │ ├── layout.tsx │ │ │ │ └── page.mdx │ │ │ └── streaming/ │ │ │ ├── layout.tsx │ │ │ └── page.mdx │ │ ├── components/ │ │ │ ├── code-block.tsx │ │ │ ├── copy-button.tsx │ │ │ ├── copy-page-button.tsx │ │ │ ├── diff-demo.tsx │ │ │ ├── docs-chat.tsx │ │ │ ├── docs-mobile-nav.tsx │ │ │ ├── docs-sidebar.tsx │ │ │ ├── header.tsx │ │ │ ├── search.tsx │ │ │ ├── theme-provider.tsx │ │ │ ├── theme-toggle.tsx │ │ │ └── ui/ │ │ │ ├── dialog.tsx │ │ │ └── sheet.tsx │ │ └── lib/ │ │ ├── docs-navigation.ts │ │ ├── mdx-to-markdown.ts │ │ ├── page-metadata.ts │ │ ├── page-titles.ts │ │ ├── rate-limit.ts │ │ ├── search-index.ts │ │ └── utils.ts │ └── tsconfig.json ├── examples/ │ └── environments/ │ ├── .gitignore │ ├── README.md │ ├── app/ │ │ ├── actions/ │ │ │ └── browse.ts │ │ ├── api/ │ │ │ └── browse/ │ │ │ └── route.ts │ │ ├── globals.css │ │ ├── layout.tsx │ │ └── page.tsx │ ├── components/ │ │ └── ui/ │ │ ├── alert.tsx │ │ ├── badge.tsx │ │ ├── button.tsx │ │ ├── input.tsx │ │ ├── label.tsx │ │ ├── resizable.tsx │ │ ├── select.tsx │ │ ├── separator.tsx │ │ ├── toggle-group.tsx │ │ └── toggle.tsx │ ├── components.json │ ├── lib/ │ │ ├── agent-browser-sandbox.ts │ │ ├── constants.ts │ │ ├── rate-limit.ts │ │ └── utils.ts │ ├── next.config.ts │ ├── package.json │ ├── postcss.config.mjs │ ├── scripts/ │ │ └── create-snapshot.ts │ └── tsconfig.json ├── package.json ├── scripts/ │ ├── build-all-platforms.sh │ ├── check-version-sync.js │ ├── copy-native.js │ ├── postinstall.js │ └── sync-version.js └── skills/ ├── agent-browser/ │ ├── SKILL.md │ ├── references/ │ │ ├── authentication.md │ │ ├── commands.md │ │ ├── profiling.md │ │ ├── proxy-support.md │ │ ├── session-management.md │ │ ├── snapshot-refs.md │ │ └── video-recording.md │ └── templates/ │ ├── authenticated-session.sh │ ├── capture-workflow.sh │ └── form-automation.sh ├── dogfood/ │ ├── SKILL.md │ ├── references/ │ │ └── issue-taxonomy.md │ └── templates/ │ └── dogfood-report-template.md ├── electron/ │ └── SKILL.md ├── slack/ │ ├── SKILL.md │ ├── references/ │ │ └── slack-tasks.md │ └── templates/ │ └── slack-report-template.md └── vercel-sandbox/ └── SKILL.md ================================================ FILE CONTENTS ================================================ ================================================ FILE: .changeset/README.md ================================================ # Changesets This project uses [Changesets](https://github.com/changesets/changesets) for versioning and changelog generation. ## Adding a changeset When you make a change that should be released, run: ```bash pnpm changeset ``` This will prompt you to: 1. Select the type of change (patch, minor, major) 2. Write a summary of your changes The changeset file will be committed with your PR. ## Release process When changesets are merged to `main`, the release workflow will: 1. Create a "Version Packages" PR that updates version numbers and changelogs 2. When that PR is merged, packages are automatically published to npm ================================================ FILE: .changeset/config.json ================================================ { "$schema": "https://unpkg.com/@changesets/config@3.1.1/schema.json", "changelog": "@changesets/cli/changelog", "commit": false, "fixed": [], "linked": [], "access": "public", "baseBranch": "main", "updateInternalDependencies": "patch", "ignore": [] } ================================================ FILE: .claude-plugin/marketplace.json ================================================ { "$schema": "https://anthropic.com/claude-code/marketplace.schema.json", "name": "agent-browser", "description": "Headless browser automation for AI agents", "owner": { "name": "Vercel", "email": "support@vercel.com" }, "plugins": [ { "name": "agent-browser", "description": "Automates browser interactions for web testing, form filling, screenshots, and data extraction", "source": "./", "strict": false, "skills": ["./skills/agent-browser"], "category": "development" } ] } ================================================ FILE: .github/workflows/ci.yml ================================================ name: CI on: push: branches: [main] pull_request: branches: [main] workflow_dispatch: jobs: version-sync: name: Version Sync Check runs-on: ubuntu-latest steps: - name: Checkout repository uses: actions/checkout@v4 - name: Check version sync run: node scripts/check-version-sync.js rust: name: Rust runs-on: ubuntu-latest steps: - name: Checkout repository uses: actions/checkout@v4 - name: Setup Rust toolchain uses: dtolnay/rust-toolchain@stable with: components: rustfmt, clippy - name: Cache Rust build artifacts uses: Swatinem/rust-cache@v2 with: workspaces: cli - name: Format check run: cargo fmt --manifest-path cli/Cargo.toml -- --check - name: Clippy check run: cargo clippy --manifest-path cli/Cargo.toml -- -D warnings - name: Run Rust tests run: cargo test --profile ci --manifest-path cli/Cargo.toml rust-cross: name: Rust (${{ matrix.os }} - ${{ matrix.target }}) if: github.event_name != 'pull_request' runs-on: ${{ matrix.os }} strategy: matrix: include: - os: macos-latest target: aarch64-apple-darwin - os: macos-latest target: x86_64-apple-darwin - os: windows-latest target: x86_64-pc-windows-msvc steps: - name: Checkout repository uses: actions/checkout@v4 - name: Setup Rust toolchain uses: dtolnay/rust-toolchain@stable with: targets: ${{ matrix.target }} - name: Cache Rust build artifacts uses: Swatinem/rust-cache@v2 with: workspaces: cli - name: Run Rust tests run: cargo test --profile ci --manifest-path cli/Cargo.toml --target ${{ matrix.target }} native-e2e: name: Native E2E Tests if: github.event_name != 'pull_request' runs-on: ubuntu-latest needs: rust steps: - name: Checkout repository uses: actions/checkout@v4 - name: Setup Rust toolchain uses: dtolnay/rust-toolchain@stable - name: Cache Rust build artifacts uses: Swatinem/rust-cache@v2 with: workspaces: cli - name: Install Chrome run: | cargo run --manifest-path cli/Cargo.toml -- install --with-deps - name: Run e2e tests run: cargo test --profile ci --manifest-path cli/Cargo.toml e2e -- --ignored --test-threads=1 windows-integration: name: Windows Integration Test if: github.event_name != 'pull_request' runs-on: windows-latest needs: rust-cross steps: - name: Checkout repository uses: actions/checkout@v4 - name: Setup Rust toolchain uses: dtolnay/rust-toolchain@stable with: targets: x86_64-pc-windows-msvc - name: Cache Rust build artifacts uses: Swatinem/rust-cache@v2 with: workspaces: cli - name: Build Rust CLI run: cargo build --release --manifest-path cli/Cargo.toml --target x86_64-pc-windows-msvc - name: Copy CLI binary to bin directory run: | Copy-Item cli/target/x86_64-pc-windows-msvc/release/agent-browser.exe bin/agent-browser-win32-x64.exe - name: Test agent-browser install command run: | $env:PATH = "$pwd\bin;$env:PATH" for ($i = 1; $i -le 3; $i++) { bin/agent-browser-win32-x64.exe install if ($LASTEXITCODE -eq 0) { exit 0 } Write-Host "Attempt $i failed, retrying in 10 seconds..." Start-Sleep -Seconds 10 } exit 1 shell: pwsh timeout-minutes: 10 - name: Test daemon lifecycle (open, snapshot, close) run: | $env:PATH = "$pwd\bin;$env:PATH" Write-Host "--- Opening page ---" bin/agent-browser-win32-x64.exe open https://example.com if ($LASTEXITCODE -ne 0) { Write-Error "open failed"; exit 1 } Write-Host "--- Taking snapshot ---" $snapshot = bin/agent-browser-win32-x64.exe snapshot if ($LASTEXITCODE -ne 0) { Write-Error "snapshot failed"; exit 1 } Write-Host $snapshot Write-Host "--- Closing browser ---" bin/agent-browser-win32-x64.exe close if ($LASTEXITCODE -ne 0) { Write-Error "close failed"; exit 1 } Write-Host "--- Windows daemon lifecycle test passed ---" shell: pwsh timeout-minutes: 5 global-install: name: Global Install (${{ matrix.os }}) if: github.event_name != 'pull_request' runs-on: ${{ matrix.os }} needs: rust-cross strategy: matrix: include: - os: ubuntu-latest target: x86_64-unknown-linux-gnu binary: agent-browser-linux-x64 - os: macos-latest target: aarch64-apple-darwin binary: agent-browser-darwin-arm64 - os: windows-latest target: x86_64-pc-windows-msvc binary: agent-browser-win32-x64.exe steps: - name: Checkout repository uses: actions/checkout@v4 - name: Setup Node.js uses: actions/setup-node@v4 with: node-version: 22 - name: Setup Rust toolchain uses: dtolnay/rust-toolchain@stable with: targets: ${{ matrix.target }} - name: Cache Rust build artifacts uses: Swatinem/rust-cache@v2 with: workspaces: cli - name: Build Rust CLI run: cargo build --release --manifest-path cli/Cargo.toml --target ${{ matrix.target }} - name: Copy CLI binary to bin directory (Unix) if: runner.os != 'Windows' run: cp cli/target/${{ matrix.target }}/release/agent-browser bin/${{ matrix.binary }} - name: Copy CLI binary to bin directory (Windows) if: runner.os == 'Windows' run: Copy-Item cli/target/${{ matrix.target }}/release/agent-browser.exe bin/${{ matrix.binary }} - name: Test npm global install run: | npm pack npm install -g agent-browser-*.tgz agent-browser --version shell: bash - name: Verify symlink points to native binary (Unix) if: runner.os != 'Windows' run: | SYMLINK=$(npm prefix -g)/bin/agent-browser TARGET=$(readlink "$SYMLINK") echo "Symlink: $SYMLINK" echo "Target: $TARGET" if [[ "$TARGET" != *"${{ matrix.binary }}"* ]]; then echo "ERROR: Symlink should point to native binary, not JS wrapper" exit 1 fi echo "Symlink correctly points to native binary" shell: bash - name: Verify shim points to native binary (Windows) if: runner.os == 'Windows' run: | $shimPath = "$(npm prefix -g)\agent-browser.cmd" $content = Get-Content $shimPath -Raw echo "Shim path: $shimPath" echo "Shim content:" echo $content if ($content -notmatch "agent-browser-win32-x64\.exe") { echo "ERROR: Shim should point to native .exe, not JS wrapper" exit 1 } echo "Shim correctly points to native binary" shell: pwsh ================================================ FILE: .github/workflows/release.yml ================================================ name: Release on: push: branches: - main workflow_dispatch: concurrency: ${{ github.workflow }}-${{ github.ref }} permissions: contents: write pull-requests: write jobs: # Build native binaries for all platforms first build-binaries: name: Build ${{ matrix.name }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: include: - name: Linux x64 os: ubuntu-latest target: x86_64-unknown-linux-gnu binary: agent-browser-linux-x64 use_zigbuild: true - name: Linux ARM64 os: ubuntu-latest target: aarch64-unknown-linux-gnu binary: agent-browser-linux-arm64 use_zigbuild: true - name: Linux musl x64 os: ubuntu-latest target: x86_64-unknown-linux-musl binary: agent-browser-linux-musl-x64 use_zigbuild: true - name: Linux musl ARM64 os: ubuntu-latest target: aarch64-unknown-linux-musl binary: agent-browser-linux-musl-arm64 use_zigbuild: true - name: Windows x64 os: ubuntu-latest target: x86_64-pc-windows-gnu binary: agent-browser-win32-x64.exe use_zigbuild: false - name: macOS x64 os: macos-latest target: x86_64-apple-darwin binary: agent-browser-darwin-x64 use_zigbuild: false - name: macOS ARM64 os: macos-latest target: aarch64-apple-darwin binary: agent-browser-darwin-arm64 use_zigbuild: false steps: - name: Checkout repository uses: actions/checkout@v4 - name: Setup pnpm uses: pnpm/action-setup@v4 with: version: 9 - name: Setup Node.js uses: actions/setup-node@v4 with: node-version: '22' cache: pnpm - name: Install npm dependencies run: pnpm install --frozen-lockfile - name: Sync version run: pnpm run version:sync - name: Setup Rust toolchain uses: dtolnay/rust-toolchain@stable with: targets: ${{ matrix.target }} - name: Install cross-compilation tools (Linux) if: runner.os == 'Linux' run: | sudo apt-get update sudo apt-get install -y gcc-aarch64-linux-gnu gcc-x86-64-linux-gnu mingw-w64 - name: Install cargo-zigbuild if: matrix.use_zigbuild run: | pip3 install ziglang cargo install cargo-zigbuild - name: Configure Rust linkers if: runner.os == 'Linux' run: | mkdir -p ~/.cargo cat >> ~/.cargo/config.toml << 'EOF' [target.aarch64-unknown-linux-gnu] linker = "aarch64-linux-gnu-gcc" [target.x86_64-pc-windows-gnu] linker = "x86_64-w64-mingw32-gcc" EOF - name: Cache Rust build artifacts uses: Swatinem/rust-cache@v2 with: workspaces: cli - name: Build with zigbuild if: matrix.use_zigbuild run: cargo zigbuild --release --manifest-path cli/Cargo.toml --target ${{ matrix.target }} - name: Build with cargo if: '!matrix.use_zigbuild' run: cargo build --release --manifest-path cli/Cargo.toml --target ${{ matrix.target }} - name: Copy binary run: | mkdir -p artifacts if [[ "${{ matrix.target }}" == *"windows"* ]]; then cp cli/target/${{ matrix.target }}/release/agent-browser.exe artifacts/${{ matrix.binary }} else cp cli/target/${{ matrix.target }}/release/agent-browser artifacts/${{ matrix.binary }} chmod +x artifacts/${{ matrix.binary }} fi - name: Upload artifact uses: actions/upload-artifact@v4 with: name: ${{ matrix.binary }} path: artifacts/${{ matrix.binary }} retention-days: 7 # Create release PR or publish to npm (with binaries) release: name: Release needs: build-binaries runs-on: ubuntu-latest outputs: published: ${{ steps.changesets.outputs.published }} publishedPackages: ${{ steps.changesets.outputs.publishedPackages }} steps: - name: Checkout Repo uses: actions/checkout@v4 with: fetch-depth: 0 - name: Setup pnpm uses: pnpm/action-setup@v4 with: version: 9 - name: Setup Node.js uses: actions/setup-node@v4 with: node-version: '22' cache: pnpm registry-url: 'https://registry.npmjs.org' - name: Install Dependencies run: pnpm install --frozen-lockfile - name: Download all binary artifacts uses: actions/download-artifact@v4 with: path: artifacts/ - name: Move binaries to bin directory run: | mkdir -p bin find artifacts -type f -name 'agent-browser-*' -exec mv {} bin/ \; rm -rf artifacts chmod +x bin/agent-browser-* 2>/dev/null || true echo "Binaries in bin/:" ls -la bin/ - name: Verify all binaries exist run: | EXPECTED_BINARIES=( "agent-browser-linux-x64" "agent-browser-linux-arm64" "agent-browser-linux-musl-x64" "agent-browser-linux-musl-arm64" "agent-browser-win32-x64.exe" "agent-browser-darwin-x64" "agent-browser-darwin-arm64" ) MIN_SIZE=100000 # Binaries should be at least 100KB ERRORS=0 for binary in "${EXPECTED_BINARIES[@]}"; do if [ ! -f "bin/$binary" ]; then echo "ERROR: Missing bin/$binary" ERRORS=$((ERRORS + 1)) else SIZE=$(stat -c%s "bin/$binary" 2>/dev/null || stat -f%z "bin/$binary") if [ "$SIZE" -lt "$MIN_SIZE" ]; then echo "ERROR: bin/$binary is too small ($SIZE bytes, expected >= $MIN_SIZE)" ERRORS=$((ERRORS + 1)) else echo "OK: bin/$binary ($SIZE bytes)" fi fi done if [ "$ERRORS" -gt 0 ]; then echo "Error: $ERRORS binary issues found" exit 1 fi echo "All 7 platform binaries present and valid" - name: Create Release Pull Request or Publish to npm id: changesets uses: changesets/action@v1 with: version: pnpm ci:version publish: pnpm ci:publish title: 'chore: version packages' commit: 'chore: version packages' env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} NODE_AUTH_TOKEN: ${{ secrets.NPM_VERCEL_TOKEN_ELEVATED }} # Create GitHub release with binaries after npm publish github-release: name: Create GitHub Release needs: release if: needs.release.outputs.published == 'true' runs-on: ubuntu-latest steps: - name: Checkout Repo uses: actions/checkout@v4 with: ref: main - name: Download all artifacts uses: actions/download-artifact@v4 with: path: artifacts/ - name: Move binaries to bin directory run: | mkdir -p bin find artifacts -type f -name 'agent-browser-*' -exec mv {} bin/ \; rm -rf artifacts chmod +x bin/agent-browser-* 2>/dev/null || true ls -la bin/ - name: Verify binaries exist run: | BINARY_COUNT=$(ls bin/agent-browser-* 2>/dev/null | wc -l) if [ "$BINARY_COUNT" -lt 7 ]; then echo "Error: Expected 7 binaries, found $BINARY_COUNT" ls -la bin/ exit 1 fi echo "Found $BINARY_COUNT binaries" - name: Create GitHub Release run: | VERSION=$(node -p "require('./package.json').version") TAG="v$VERSION" # Check if release already exists if gh release view "$TAG" &>/dev/null; then echo "Release $TAG already exists, uploading binaries..." gh release upload "$TAG" bin/agent-browser-* --clobber else echo "Creating release $TAG..." gh release create "$TAG" \ --title "$TAG" \ --generate-notes \ bin/agent-browser-* fi env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} ================================================ FILE: .gitignore ================================================ # Dependencies node_modules/ # Build output dist/ # Native binaries (keep the launcher scripts) bin/agent-browser-* !bin/agent-browser !bin/agent-browser.cmd # Rust build artifacts cli/target/ cli/*.o # Logs *.log npm-debug.log* # IDE .idea/ .vscode/ *.swp *.swo # OS .DS_Store Thumbs.db # Python __pycache__/ # Test artifacts *.png *.jpeg *.jpg *.webm test/e2e/.dogfood-output/ # Package manager package-lock.json yarn.lock # Environment .env .env.local # opensrc - source code for packages opensrc/ # Docs site docs/node_modules/ docs/.next/ docs/out/ docs/package-lock.json # pnpm .pnpm-store/ ================================================ FILE: .husky/pre-commit ================================================ node scripts/sync-version.js git add cli/Cargo.toml cli/Cargo.lock ================================================ FILE: .prettierrc ================================================ { "semi": true, "singleQuote": true, "trailingComma": "es5", "printWidth": 100, "tabWidth": 2 } ================================================ FILE: AGENTS.md ================================================ # AGENTS.md Instructions for AI coding agents working with this codebase. ## Package Manager This project uses **pnpm**. Always use `pnpm` instead of `npm` or `yarn` for installing dependencies, running scripts, etc. (e.g., `pnpm install`, `pnpm run build`). ## Code Style - Do not use emojis in code, output, or documentation. Unicode symbols (✓, ✗, →, ⚠) are acceptable. - CLI colored output uses `cli/src/color.rs`. This module respects the `NO_COLOR` environment variable. Never use hardcoded ANSI color codes. - CLI flags must always use kebab-case (e.g., `--auto-connect`, `--allow-file-access`). Never use camelCase for flags (e.g., `--autoConnect` is wrong). ## Documentation When adding or changing user-facing features (new flags, commands, behaviors, environment variables, etc.), update **all** of the following: 1. `cli/src/output.rs` -- `--help` output (flags list, examples, environment variables) 2. `README.md` -- Options table, relevant feature sections, examples 3. `skills/agent-browser/SKILL.md` -- so AI agents know about the feature 4. `docs/src/app/` -- the Next.js docs site (MDX pages) 5. Inline doc comments in the relevant source files This applies to changes that either human users or AI agents would need to know about. Do not skip any of these locations. In the `docs/src/app/` MDX files, always use HTML `` syntax for tables (not markdown pipe tables). This matches the existing convention across the docs site. ## Architecture This is a Rust codebase. The browser automation daemon lives in `cli/src/native/` (daemon, actions, browser, CDP client, snapshot, state). The `--engine` flag selects Chrome vs Lightpanda. The `install` command downloads Chrome from Chrome for Testing directly. ## Testing ### Unit Tests ```bash cd cli && cargo test ``` Runs all unit tests (~320 tests). These are fast and don't require Chrome. ### End-to-End Tests ```bash cd cli && cargo test e2e -- --ignored --test-threads=1 ``` Runs 18 e2e tests that launch real headless Chrome instances and exercise the full native daemon command pipeline. Requirements: - Chrome must be installed - Must run serially (`--test-threads=1`) to avoid Chrome instance contention - Tests are `#[ignore]`'d so they don't run during normal `cargo test` The e2e tests live in `cli/src/native/e2e_tests.rs` and cover: launch/close, navigation, snapshots, screenshots, form interaction, cookies, storage, tabs, element queries, viewport/emulation, domain filtering, diff, state management, error handling, and Phase 8 commands. ### Linting and Formatting ```bash cd cli && cargo fmt -- --check # Check formatting cd cli && cargo clippy # Lint ``` ## Source Code Reference Source code for dependencies is available in `opensrc/` for deeper understanding of implementation details. See `opensrc/sources.json` for the list of available packages and their versions. Use this source code when you need to understand how a package works internally, not just its types/interface. ### Fetching Additional Source Code To fetch source code for a package or repository you need to understand, run: ```bash npx opensrc # npm package (e.g., npx opensrc zod) npx opensrc pypi: # Python package (e.g., npx opensrc pypi:requests) npx opensrc crates: # Rust crate (e.g., npx opensrc crates:serde) npx opensrc / # GitHub repo (e.g., npx opensrc vercel/ai) ``` ================================================ FILE: CHANGELOG.md ================================================ # agent-browser ## 0.21.2 ### Patch Changes - 757626f: ### Bug Fixes - **Deduplicate text content in snapshots** - Fixed an issue where duplicate text content appeared in page snapshots (#909) - **Native mouse drag state** - Fixed incorrect raw native mouse drag state not being properly tracked across `down`, `move`, and `up` events (#872) - **Chrome headless launch failures** - Fixed browser launch failures caused by the `--enable-unsafe-swiftshader` flag in Chrome headless mode (#915) - **Origin-scoped `--headers` persistence** - Restored correct persistence of origin-scoped headers set via `--headers` across navigation commands (#894) - **Relative URLs in WebSocket domain filter** - Fixed handling of relative URLs in the WebSocket domain filter script (#624) ## 0.21.1 ### Patch Changes - 1e7619d: ### New Features - **HAR 1.2 network capture** - Added commands to capture and export network traffic in HAR 1.2 format, including accurate request/response timing, headers, body sizes, and resource types sourced from Chrome DevTools Protocol events (#864) - **Built-in `upgrade` command** - Added `agent-browser upgrade` to self-update the CLI; automatically detects your installation method (npm, Homebrew, or Cargo) and runs the appropriate update command (#898) ### Documentation - Added `upgrade` command to the README command reference and installation guide - Added a dedicated **Updating** section to the README with usage instructions for `agent-browser upgrade` ## 0.21.0 ### Minor Changes - c6de80b: ### New Features - **`batch` command** -- Execute multiple commands from stdin in a single invocation. Accepts a JSON array of string arrays and returns results sequentially. Supports `--bail` to stop on first error and `--json` for structured output (#865) - **iframe support** -- CLI interactions and snapshots now traverse into iframe content, enabling automation of cross-frame pages (#869) - **`network har start/stop` command** -- Capture and export network traffic in HAR 1.2 format (#874) - **WebSocket fallback for CDP discovery** -- When HTTP-based CDP endpoint discovery fails, the CLI now falls back to a WebSocket connection automatically (#873) ### Improvements - **`--full`/`-f` refactored to command-level flag** -- Moved from a global flag to a per-command flag for clearer scoping (#877) - **Enhanced Chrome launch** -- Added `--user-data-dir` support and configurable launch timeout for more reliable browser startup (#852) ### Bug Fixes - Fixed `/json/list` fallback when `/json/version` endpoint is unavailable, improving compatibility with non-standard CDP implementations (#861) - Fixed daemon liveness detection for PID namespace isolation (e.g. `unshare`). Uses socket connectivity as the sole liveness check instead of `kill(pid, 0)`, which fails when the caller cannot see the daemon's PID (#879) - Fixed Ubuntu dependency install accidentally removing system packages (#884) ## 0.20.14 ### Patch Changes - c0d4cf6: ### New Features - **Idle timeout for daemon auto-shutdown** - Added `--idle-timeout` CLI flag (and `AGENT_BROWSER_IDLE_TIMEOUT_MS` environment variable) to automatically shut down the daemon after a period of inactivity. Accepts human-friendly formats such as `10s`, `3m`, `1h`, or raw milliseconds (#856) - **Cursor-interactive elements in snapshot tree** - Cursor-interactive elements are now embedded directly into the snapshot tree for richer context (#855) ### Bug Fixes - Fixed **remote host support** in CDP discovery, enabling connection to browsers running on non-local hosts (#854) - Fixed **CDP flag propagation** to the daemon process, ensuring reliable CDP reconnection across sessions (#857) - Fixed **Windows auto-connect profiling** to correctly handle browser connection on Windows (#835, #840) - Fixed **Windows transient error detection** by recognising Windows-specific socket error codes (`os error 10061` connection refused, `os error 10054` connection reset) during daemon reconnection attempts ## 0.20.13 ### Patch Changes - eda956b: ### Bug Fixes - **Network idle detection for cached pages** - Fixed an issue where `poll_network_idle` could return immediately when no network events were observed (e.g. pages served from cache). The idle timer is now only satisfied after a consistent **500 ms idle period** has elapsed, preventing false-positive idle detection. The core polling logic has also been extracted into a standalone `poll_network_idle` function to improve testability (#847) ## 0.20.12 ### Patch Changes - 5fa2396: ### Bug Fixes - Fixed **`snapshot -C`** and **`screenshot --annotate`** hanging when connected over WSS (WebSocket Secure) due to sequential CDP round-trips per interactive element (#842) ### Performance - **`snapshot -C` (cursor-interactive mode)** now batches CDP calls instead of issuing N×2 sequential round-trips per cursor-interactive element, preventing timeouts on high-latency WSS connections (#842) - **`screenshot --annotate`** now batches element queries, reducing completion time from potentially 20–40s (e.g. 50+ buttons over WSS) to within expected bounds (#842) ## 0.20.11 ### Patch Changes - 4b5fc78: ### Bug Fixes - **Material Design checkbox/radio parity** - Restored Playwright-parity behavior for `check`/`uncheck` actions on Material Design controls. These components hide the native `` off-screen and use overlay elements that intercept coordinate-based clicks; the actions now detect this pattern and fall back to a JS `.click()` to correctly toggle state. Also improves `ischecked` to handle nested hidden inputs and ARIA-only checkboxes (#837) - **Punctuation handling in `type` command** - Fixed incorrect virtual key (VK) codes being used for punctuation characters (e.g. `.`, `@`) in the `type` action, which previously caused those characters to be dropped or mistyped (#836) ## 0.20.10 ### Patch Changes - a3d9662: ### Bug Fixes - **Restored WebSocket streaming** - Fixed broken WebSocket streaming in the native daemon by keeping the **StreamServer** instance alive so the broadcast channel remains open, and ensuring CDP session IDs and connection status are correctly propagated to stream clients (#826) - **Filtered internal Chrome targets** - Fixed auto-connect discovery incorrectly attempting to attach to Chrome-internal pages (e.g. `chrome://`, `chrome-extension://`, `devtools://` URLs), which could cause unexpected connection failures (#827) ## 0.20.9 ### Patch Changes - 51d9ab4: ### Bug Fixes - **Appium v3 iOS capabilities** - Added `appium:` vendor prefix to iOS capabilities (e.g., `appium:automationName`, `appium:deviceName`, `appium:platformVersion`) to comply with the Appium v3 WebDriver protocol requirements (#810) - **Snapshot `--selector` scoping** - Fixed `snapshot --selector` so that the output is properly scoped to the matched element's subtree rather than returning the full accessibility tree. The selector now resolves the target DOM node's backend IDs and filters the accessibility tree to only include nodes within that subtree (#825) ## 0.20.8 ### Patch Changes - daf7263: ### Bug Fixes - Fixed **video duration** being reported incorrectly when using real-time ffmpeg encoding for screen recording (#812) - Removed obsolete **`BrowserManager` TypeScript API** references that no longer reflect the current CLI-based usage model (#821) ### Documentation - Updated README to replace outdated **`BrowserManager` programmatic API** examples with the current CLI-based approach using `execSync` and `agent-browser` commands (#821) - Removed the **Programmatic API** section covering `BrowserManager` screencast and input injection methods, which are no longer part of the public API (#821) ## 0.20.7 ### Patch Changes - 25a1526: ### New Features - **Brave Browser support** - Added auto-discovery of Brave Browser for CDP connections on macOS, Linux, and Windows. The agent will now automatically detect and connect to Brave alongside Chrome, Chromium, and Canary installations (#817) ### Improvements - **Postinstall message** - The post-install message now detects existing Chrome installations on the system. If a compatible browser is found, it confirms the path and notes it will be used automatically instead of prompting an install. If no browser is detected, the warning is clearer and mentions that installation can be skipped when using `--cdp`, `--provider`, `--engine`, or `--executable-path` (#815) ## 0.20.6 ### Patch Changes - fa91c22: ### Bug Fixes - **Stale accessibility tree reference fallback** - Fixed an issue where interacting with an element whose **`backend_node_id`** had become stale (e.g. after the DOM was replaced) would fail with a `Could not compute box model` CDP error. Element resolution now re-queries the accessibility tree using role/name lookup to obtain a fresh node ID before retrying the operation (#806) ## 0.20.5 ### Patch Changes - fc091d2: ### Bug Fixes - **Daemon panic on broken stderr pipe** - Replaced all `eprintln!` calls with `writeln!(std::io::stderr(), ...)` wrapped in `let _ =` to silently discard write errors, preventing the daemon from panicking when the parent process drops the stderr pipe during Chrome launch (#802) ## 0.20.4 ### Patch Changes - e2ebde2: ### Bug Fixes - **Broadcast channel lag handling** - Fixed an issue where **broadcast channel lag** errors were incorrectly treated as stream closure, causing premature termination of event listeners in reload, response body, download, and navigation wait operations. Lagged messages are now skipped and the loop continues instead of breaking (#797) ### Improvements - Removed unused **pnpm setup** steps from the `global-install` CI job, simplifying the workflow configuration (#798) ## 0.20.3 ### Patch Changes - e365909: ### Bug Fixes - **Chrome launch retry** - Chrome will now retry launching up to 3 times with a 500ms delay between attempts, improving resilience against transient startup failures (#791) - **Remote CDP snapshot hang** - Resolved an issue where snapshots would hang indefinitely over remote CDP (WSS) connections by removing WebSocket message and frame size limits to accommodate large responses (e.g. `Accessibility.getFullAXTree`), accepting binary frames from remote proxies such as Browserless, and immediately clearing pending commands when the connection closes rather than waiting for the 30-second timeout (#792) ## 0.20.2 ### Patch Changes - 944fa01: ### New Features - **Linux musl (Alpine) builds** - Added pre-built binaries for **linux-musl** targeting both **x64** and **arm64** architectures, enabling native support for Alpine Linux and other musl-based distributions without requiring glibc (#784) ### Improvements - **Consecutive `--auto-connect` commands** - Added support for issuing multiple consecutive `--auto-connect` commands without requiring a full browser relaunch; external connections are now correctly identified and reused (#786) - **External browser disconnect behavior** - When using `--auto-connect` or `--cdp`, closing the agent session now disconnects cleanly without shutting down the user's browser process ### Bug Fixes - **Restored `refs` dict in `--json` snapshot output** - The `refs` map containing role and name metadata for referenced elements is now correctly included in JSON snapshot responses (#787) - Fixed e2e test assertions for `diff_snapshot` and `domain_filter` to correctly reflect expected behavior (#783) - Fixed Chrome temp-dir cleanup test failing on Windows (#766) ## 0.20.1 ### Patch Changes - bd05917: ### Bug Fixes - Fixed **AX tree deserialization** to accept integer `nodeId` and `childIds` values for compatibility with Lightpanda, which sends numeric IDs where Chrome sends strings (#775) - Fixed **misleading SIGPIPE comment** to accurately describe the default Rust SIGPIPE behavior and why it is reset to `SIG_DFL` (#776) - Fixed **WebM recording output** to use the VP9 codec (`libvpx-vp9`) instead of H.264, producing valid WebM files; also adds a padding filter to ensure even frame dimensions (#779) ## 0.20.0 ### Minor Changes - 235fa88: ### Full Native Rust - **100% native Rust** -- Removed the entire Node.js/Playwright daemon. The Rust native daemon is now the only implementation. No Node.js runtime or Playwright dependency required. (#754) - **99x smaller install** -- Install size reduced from 710 MB to 7 MB by eliminating the Node.js dependency tree. - **18x less memory** -- Daemon memory usage reduced from 143 MB to 8 MB. - **1.6x faster cold start** -- Cold start time reduced from 1002ms to 617ms. - **Benchmarks** -- Added benchmark suite comparing native vs Node.js daemon performance. - **Chromium installer hardened** -- Fixed zip path traversal vulnerability in Chrome for Testing installer. ### Bug Fixes - Fixed `--headed false` flag not being respected in CLI (#757) - Fixed "not found" error pattern in `to_ai_friendly_error` incorrectly catching non-element errors (#759) - Fixed storage local key lookup parsing and text output (#761) - Fixed Lightpanda engine launch with release binaries (#760) - Hardened Lightpanda startup timeouts (#762) ## 0.19.0 ### Minor Changes - 56bb92b: ### New Features - **Browserless.io provider** -- Added browserless.io as a browser provider, supported in both Node.js and native daemon paths. Connect to remote Browserless instances with `--provider browserless` or `AGENT_BROWSER_PROVIDER=browserless`. Configurable via `BROWSERLESS_API_KEY`, `BROWSERLESS_API_URL`, and `BROWSERLESS_BROWSER_TYPE` environment variables. (#502, #746) - **`clipboard` command** -- Read from and write to the browser clipboard. Supports `read`, `write `, `copy` (simulates Ctrl+C), and `paste` (simulates Ctrl+V) operations. (#749) - **Screenshot output configuration** -- New global flags `--screenshot-dir`, `--screenshot-quality`, `--screenshot-format` and corresponding `AGENT_BROWSER_SCREENSHOT_DIR`, `AGENT_BROWSER_SCREENSHOT_QUALITY`, `AGENT_BROWSER_SCREENSHOT_FORMAT` environment variables for persistent screenshot settings. (#749) ### Bug Fixes - Fixed `wait --text` not working in native daemon path (#749) - Fixed `BrowserManager.navigate()` and package entry point (#748) - Fixed extensions not being loaded from `config.json` (#750) - Fixed scroll on page load (#747) - Fixed HTML retrieval by using `browser.getLocator()` for selector operations (#745) ## 0.18.0 ### Minor Changes - 942b8cd: ### New Features - **`inspect` command** - Opens Chrome DevTools for the active page by launching a local proxy server that forwards the DevTools frontend to the browser's CDP WebSocket. Commands continue to work while DevTools is open. Implemented in both Node.js and native paths. (#736) - **`get cdp-url` subcommand** - Retrieve the Chrome DevTools Protocol WebSocket URL for the active page, useful for external debugging tools. (#736) - **Native screenshot annotate** - The `--annotate` flag for screenshots now works in the native Rust daemon, bringing parity with the Node.js path. (#706) ### Improvements - **KERNEL_API_KEY now optional** - External credential injection no longer requires `KERNEL_API_KEY` to be set, making it easier to use Kernel with pre-configured environments. (#687) - **Browserbase simplified** - Removed the `BROWSERBASE_PROJECT_ID` requirement, reducing setup friction for Browserbase users. (#625) ### Bug Fixes - Fixed Browserbase API using incorrect endpoint to release sessions (#707) - Fixed CDP connect paths using hardcoded 10s timeout instead of `getDefaultTimeout()` (#704) - Fixed lone Unicode surrogates causing errors by sanitizing with `toWellFormed()` (#720) - Fixed CDP connection failure on IPv6-first systems (#717) - Fixed recordings not inheriting the current viewport settings (#718) ## 0.17.1 ### Patch Changes - 94cd888: Added support for device scale factor (retina display) in the viewport command via an optional scale parameter. Also added webview target type support for better Electron application compatibility, and the pages list now includes target type information. ## 0.17.0 ### Minor Changes - 94521e7: ### New Features - **Lightpanda browser engine support** - Added `--engine ` flag to select the browser engine (`chrome` by default, or `lightpanda`), implying `--native` mode. Configurable via `AGENT_BROWSER_ENGINE` environment variable (#646) - **Dialog dismiss command** - Added support for `dismiss` subcommand in dialog command parsing (#605) ### Improvements - **Daemon startup error reporting** - Daemon startup errors are now surfaced directly instead of showing an opaque timeout message (#614) - **CDP port discovery** - Replaced broken hand-rolled HTTP client with `reqwest` for more reliable CDP port discovery (#619) - **Chrome extensions** - Extensions now load correctly by forcing headed mode when extensions are present (#652) - **Google Translate bar suppression** - Suppressed the Google Translate bar in native headless mode to avoid interference (#649) - **Auth cookie persistence** - Auth cookies are now persisted on browser close in native mode (#650) ### Bug Fixes - Fixed native auth login failing due to incompatible encryption format (#648) ### Documentation - Improved snapshot usage guidance and added reproducibility check (#630) - Added `--engine` flag to the README options table ### Performance - Added benchmarks to the CLI codebase (#637) ## 0.16.3 ### Patch Changes - 7d2c895: Fixed an issue where the --native flag was being passed to child processes even when not explicitly specified on the command line. The flag is now only forwarded when the user explicitly provides it, consistent with how other CLI flags like --allow-file-access and --download-path are handled. ## 0.16.2 ### Patch Changes - 01ac557: Added AGENT_BROWSER_HEADED environment variable support for running the browser in headed mode, and improved temporary profile cleanup when launching Chrome directly. Also includes documentation clarification that browser extensions work in both headed and headless modes. ## 0.16.1 ### Patch Changes - c4180c8: Improved Chrome launch reliability by automatically detecting containerized environments (Docker, Podman, Kubernetes) and enabling --no-sandbox when needed. Added support for discovering Playwright-installed Chromium browsers and enhanced error messages with helpful diagnostics when Chrome fails to launch. ## 0.16.0 ### Minor Changes - 05018b3: Added experimental native Rust daemon (`--native` flag, `AGENT_BROWSER_NATIVE=1` env, or `"native": true` in config). The native daemon communicates with Chrome directly via CDP, eliminating Node.js and Playwright dependencies. Supports 150+ commands with full parity to the default Node.js daemon. Includes WebDriver backend for Safari/iOS, CDP protocol codegen, request tracking, frame context management, and comprehensive e2e and parity tests. ## 0.15.3 ### Patch Changes - 62241b5: Fixed Windows compatibility issues including proper handling of extended-length path prefixes from canonicalize(), prevention of MSYS/Git Bash path translation that could mangle arguments, and improved daemon startup reliability. Also added ARM64 Windows support in postinstall shims and expanded CI testing with a full daemon lifecycle test on Windows. ## 0.15.2 ### Patch Changes - 6aea316: Documentation site improvements and internal tooling updates including enhanced code blocks, mobile navigation, and docs chat components. CLI connection and output handling refinements. Skill creator reference documentation and scripts have been reorganized. ## 0.15.1 ### Patch Changes - 7bd8ce9: Added support for chrome:// and chrome-extension:// URLs in navigation and recording commands. These special browser URLs are now preserved as-is instead of having https:// incorrectly prepended. ## 0.15.0 ### Minor Changes - 2e38882: - Added security hardening: authentication vault, content boundary markers, domain allowlist, action policy, action confirmation, and output length limits. - Added `--download-path` flag (and `AGENT_BROWSER_DOWNLOAD_PATH` env / `downloadPath` config key) to set a default download directory. - Added `--selector` flag to `scroll` command for scrolling within specific container elements. ## 0.14.0 ### Minor Changes - b7665e5: - Added `keyboard` command for raw keyboard input -- type with real keystrokes, insert text, and press shortcuts at the currently focused element without needing a selector. - Added `--color-scheme` flag and `AGENT_BROWSER_COLOR_SCHEME` env var for persistent dark/light mode preference across browser sessions. - Fixed IPC EAGAIN errors (os error 35/11) by adding backpressure-aware socket writes, command serialization, and lowering the default Playwright timeout to 25s (configurable via `AGENT_BROWSER_DEFAULT_TIMEOUT`). - Fixed remote debugging (CDP) reconnection. - Fixed state load failing when no browser is running. - Fixed `--annotate` flag warning appearing when not explicitly passed via CLI. ## 0.13.0 ### Minor Changes - ebd8717: Added new diff commands for comparing snapshots, screenshots, and URLs between page states. You can now run visual pixel diffs against baseline images, compare accessibility tree snapshots with customizable depth and selectors, and diff two URLs side-by-side with optional screenshot comparison. ## 0.12.0 ### Minor Changes - 69ffad0: Add annotated screenshots with the new --annotate flag, which overlays numbered labels on interactive elements and prints a legend mapping each label to its element ref. This enables multimodal AI models to reason about visual layout while using the same @eN refs for subsequent interactions. The flag can also be set via the AGENT_BROWSER_ANNOTATE environment variable. ## 0.11.1 ### Patch Changes - c6fc7df: Added documentation for command chaining with && across README, CLI help output, docs, and skill files, explaining how to efficiently chain multiple agent-browser commands in a single shell invocation since the browser persists via a background daemon. ## 0.11.0 ### Minor Changes - 5dc40b4: Added configuration file support with automatic loading from user and project directories, new profiler commands for Chrome DevTools profiling, computed styles getter, browser extension loading, storage state management, and iOS device emulation. Expanded click command with new-tab option, improved find command with additional actions and filtering options, and enhanced CDP connection to accept WebSocket URLs. Documentation has been significantly expanded with new sections for configuration, profiling, and proxy support. ## 0.10.0 ### Minor Changes - 1112a16: Added session persistence with automatic save/restore of cookies and localStorage across browser restarts using --session-name flag, with optional AES-256-GCM encryption for saved state data. New state management commands allow listing, showing, renaming, clearing, and cleaning up old session files. Also added --new-tab option for click commands to open links in new tabs. ## 0.9.4 ### Patch Changes - 323b6cd: Fix all Clippy lint warnings in the Rust CLI: remove redundant import, use `.first()` instead of `.get(0)`, use `.copied()` instead of `.map(|s| *s)`, use `.contains()` instead of `.iter().any()`, use `then_some` instead of lazy `then`, and simplify redundant match guards. ## 0.9.3 ### Patch Changes - d03e238: Added support for custom executable path in CLI browser launch options. Documentation site received UI improvements including a new chat component with sheet-based interface and updated dependencies. ## 0.9.2 ### Patch Changes - 76d23db: Documentation site migrated to MDX for improved content authoring, added AI-powered docs chat feature, and updated README with Homebrew installation instructions for macOS users. ## 0.9.1 ### Patch Changes - ae34945: Added --allow-file-access flag to enable opening and interacting with local file:// URLs (PDFs, HTML files) by passing Chromium flags that allow JavaScript access to local files. Added -C/--cursor flag for snapshots to include cursor-interactive elements like divs with onclick handlers or cursor:pointer styles, which is useful for modern web apps using custom clickable elements. ## 0.9.0 ### Minor Changes - 9d021bd: Add iOS Simulator and real device support for mobile Safari testing via Appium. New CLI commands include `device list` to show available simulators, `tap` and `swipe` for touch interactions, and the `--device` flag to specify which iOS device to use. Configure with `-p ios` provider flag or `AGENT_BROWSER_PROVIDER=ios` environment variable. ## 0.8.10 ### Patch Changes - 17dba8f: Add --stdin flag for eval command to read JavaScript from stdin, enabling heredoc usage for multiline scripts - daeede4: Add --stdin flag for the eval command to read JavaScript from stdin, enabling heredoc usage for multiline scripts. Also fix binary permission issues on macOS/Linux when postinstall scripts don't run (e.g., with bun). ## 0.8.9 ### Patch Changes - 0dc36f2: Add --stdin flag for eval command to read JavaScript from stdin, enabling heredoc usage for multiline scripts ## 0.8.8 ### Patch Changes - 2771588: Added base64 encoding support for the eval command with -b/--base64 flag to avoid shell escaping issues when executing JavaScript. Updated documentation with AI agent setup instructions and reorganized the docs structure by consolidating agent mode content into the installation page. ## 0.8.7 ### Patch Changes - d24f753: Fixed browser launch options not being passed correctly when using persistent profiles, ensuring args, userAgent, proxy, and ignoreHTTPSErrors settings now work properly. Added pre-flight checks for socket path length limits and directory write permissions to provide clearer error messages when daemon startup fails. Improved error handling to properly exit with failure status when browser launch fails. ## 0.8.6 ### Patch Changes - d75350a: Improved daemon connection reliability by adding automatic retry logic for transient errors like connection resets, broken pipes, and temporary resource unavailability. The CLI now cleans up stale socket and PID files before starting a new daemon, and includes better detection of daemon responsiveness to handle race conditions during shutdown. ## 0.8.5 ### Patch Changes - cb2f8c3: Fixed version synchronization to automatically update Cargo.lock alongside Cargo.toml during releases, and made the CLI binary executable. This ensures the Rust CLI version stays in sync with the npm package version. ## 0.8.4 ### Patch Changes - 759302e: Fixed "Daemon not found" error when running through AI agents (e.g., Claude Code) by resolving symlinks in the executable path. Previously, npm global bin symlinks weren't being resolved correctly, causing intermittent daemon discovery failures. ## 0.8.3 ### Patch Changes - 4116a8a: Replaced shell-based CLI wrappers with a cross-platform Node.js wrapper to enable npx support on Windows. Added postinstall logic to patch npm's bin entry on global installs, allowing the native binary to be invoked directly with zero overhead. Added CI tests to verify global installation works correctly across all platforms. ## 0.8.2 ### Patch Changes - 7e6336f: Fixed the Windows CMD wrapper to use the native binary directly instead of routing through Node.js, improving startup performance and reliability. Added retry logic to the CI install command to handle transient failures during browser installation. ## 0.8.1 ### Patch Changes - 8eec634: Improved release workflow to validate binary file sizes and ensure binaries are executable after npm install. Updated documentation site with a new mobile navigation system and added v0.8.0 changelog entries. Reformatted CHANGELOG.md for better readability. ## v0.8.0 ### New Features - **Kernel cloud browser provider** - Connect to Kernel (https://kernel.sh) for remote browser infrastructure via `-p kernel` flag or `AGENT_BROWSER_PROVIDER=kernel`. Supports stealth mode, persistent profiles, and automatic profile find-or-create. - **Ignore HTTPS certificate errors** - New `--ignore-https-errors` flag for working with self-signed certificates and development environments - **Enhanced cookie management** - Extended `cookies set` command with `--url`, `--domain`, `--path`, `--httpOnly`, `--secure`, `--sameSite`, and `--expires` flags for setting cookies before page load ### Bug Fixes - Fixed tab list command not recognizing new pages opened via clicks or `target="_blank"` links (#275) - Fixed `check` command hanging indefinitely (#272) - Fixed `set device` not applying deviceScaleFactor - HiDPI screenshots now work correctly (#270) - Fixed state load and profile persistence not working in v0.7.6 (#268) - Screenshots now save to temp directory when no path is provided (#247) ### Security - Daemon and stream server now reject cross-origin connections (#274) ## 0.7.6 ### Patch Changes - a4d0c26: Allow null values for the screenshot selector field. Previously, passing a null selector would fail validation, but now it is properly handled as an optional value. ## 0.7.5 ### Patch Changes - 8c2a6ec: Fix GitHub release workflow to handle existing releases. If a release already exists, binaries are uploaded to it instead of failing. ## 0.7.4 ### Patch Changes - 957b5e5: Fix binary permissions on install. npm doesn't preserve execute bits, so postinstall now ensures the native binary is executable. ## 0.7.3 ### Patch Changes - 161d8f5: Fix native binary distribution in npm package. Native binaries for all platforms (Linux x64/arm64, macOS x64/arm64, Windows x64) are now correctly included when publishing. ## 0.7.2 ### Patch Changes - 6afede2: Fix native binary distribution in npm package Native binaries for all platforms (Linux x64/arm64, macOS x64/arm64, Windows x64) are now included in the npm package. Previously, the release workflow published to npm before building binaries, causing "No binary found" errors on installation. ## 0.7.1 ### Patch Changes - Fix native binary distribution in npm package. Native binaries for all platforms (Linux x64/arm64, macOS x64/arm64, Windows x64) are now included in the npm package. Previously, the release workflow published to npm before building binaries, causing "No binary found" errors on installation. ## 0.7.0 ### Minor Changes - 316e649: ## New Features - **Cloud browser providers** - Connect to Browserbase or Browser Use for remote browser infrastructure via `-p` flag or `AGENT_BROWSER_PROVIDER` env var - **Persistent browser profiles** - Store cookies, localStorage, and login sessions across browser restarts with `--profile` - **Remote CDP WebSocket URLs** - Connect to remote browser services via WebSocket URL (e.g., `--cdp "wss://..."`) - **Download commands** - New `download` command and `wait --download` for file downloads with ref support - **Browser launch configuration** - New `--args`, `--user-agent`, and `--proxy-bypass` flags for fine-grained browser control - **Enhanced skills** - Hierarchical structure with references and templates for Claude Code ## Bug Fixes - Screenshot command now supports refs and has improved error messages - WebSocket URLs work in `connect` command - Fixed socket file location (uses `~/.agent-browser` instead of TMPDIR) - Windows binary path fix (.exe extension) - State load and path-based actions now show correct output messages ## Documentation - Added Claude Code marketplace plugin installation instructions - Updated skill documentation with references and templates - Improved error documentation ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright 2025 Vercel Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================ # agent-browser Headless browser automation CLI for AI agents. Fast native Rust CLI. ## Installation ### Global Installation (recommended) Installs the native Rust binary: ```bash npm install -g agent-browser agent-browser install # Download Chrome from Chrome for Testing (first time only) ``` ### Project Installation (local dependency) For projects that want to pin the version in `package.json`: ```bash npm install agent-browser agent-browser install ``` Then use via `package.json` scripts or by invoking `agent-browser` directly. ### Homebrew (macOS) ```bash brew install agent-browser agent-browser install # Download Chrome from Chrome for Testing (first time only) ``` ### Cargo (Rust) ```bash cargo install agent-browser agent-browser install # Download Chrome from Chrome for Testing (first time only) ``` ### From Source ```bash git clone https://github.com/vercel-labs/agent-browser cd agent-browser pnpm install pnpm build pnpm build:native # Requires Rust (https://rustup.rs) pnpm link --global # Makes agent-browser available globally agent-browser install ``` ### Linux Dependencies On Linux, install system dependencies: ```bash agent-browser install --with-deps ``` ### Updating Upgrade to the latest version: ```bash agent-browser upgrade ``` Detects your installation method (npm, Homebrew, or Cargo) and runs the appropriate update command automatically. ### Requirements - **Chrome** - Run `agent-browser install` to download Chrome from [Chrome for Testing](https://developer.chrome.com/blog/chrome-for-testing/) (Google's official automation channel). No Playwright or Node.js required for the daemon. - **Rust** - Only needed when building from source (see From Source above). ## Quick Start ```bash agent-browser open example.com agent-browser snapshot # Get accessibility tree with refs agent-browser click @e2 # Click by ref from snapshot agent-browser fill @e3 "test@example.com" # Fill by ref agent-browser get text @e1 # Get text by ref agent-browser screenshot page.png agent-browser close ``` ### Traditional Selectors (also supported) ```bash agent-browser click "#submit" agent-browser fill "#email" "test@example.com" agent-browser find role button click --name "Submit" ``` ## Commands ### Core Commands ```bash agent-browser open # Navigate to URL (aliases: goto, navigate) agent-browser click # Click element (--new-tab to open in new tab) agent-browser dblclick # Double-click element agent-browser focus # Focus element agent-browser type # Type into element agent-browser fill # Clear and fill agent-browser press # Press key (Enter, Tab, Control+a) (alias: key) agent-browser keyboard type # Type with real keystrokes (no selector, current focus) agent-browser keyboard inserttext # Insert text without key events (no selector) agent-browser keydown # Hold key down agent-browser keyup # Release key agent-browser hover # Hover element agent-browser select # Select dropdown option agent-browser check # Check checkbox agent-browser uncheck # Uncheck checkbox agent-browser scroll [px] # Scroll (up/down/left/right, --selector ) agent-browser scrollintoview # Scroll element into view (alias: scrollinto) agent-browser drag # Drag and drop agent-browser upload # Upload files agent-browser screenshot [path] # Take screenshot (--full for full page, saves to a temporary directory if no path) agent-browser screenshot --annotate # Annotated screenshot with numbered element labels agent-browser screenshot --screenshot-dir ./shots # Save to custom directory agent-browser screenshot --screenshot-format jpeg --screenshot-quality 80 agent-browser pdf # Save as PDF agent-browser snapshot # Accessibility tree with refs (best for AI) agent-browser eval # Run JavaScript (-b for base64, --stdin for piped input) agent-browser connect # Connect to browser via CDP agent-browser close # Close browser (aliases: quit, exit) ``` ### Get Info ```bash agent-browser get text # Get text content agent-browser get html # Get innerHTML agent-browser get value # Get input value agent-browser get attr # Get attribute agent-browser get title # Get page title agent-browser get url # Get current URL agent-browser get cdp-url # Get CDP WebSocket URL (for DevTools, debugging) agent-browser get count # Count matching elements agent-browser get box # Get bounding box agent-browser get styles # Get computed styles ``` ### Check State ```bash agent-browser is visible # Check if visible agent-browser is enabled # Check if enabled agent-browser is checked # Check if checked ``` ### Find Elements (Semantic Locators) ```bash agent-browser find role [value] # By ARIA role agent-browser find text # By text content agent-browser find label