Showing preview only (943K chars total). Download the full file or copy to clipboard to get everything.
Repository: bytebot-ai/bytebot
Branch: main
Commit: 3d37894ce07e
Files: 316
Total size: 857.6 KB
Directory structure:
gitextract_hhcv8dmu/
├── .github/
│ └── workflows/
│ ├── build-agent.yaml
│ ├── build-desktop.yaml
│ └── build-ui.yaml
├── .gitignore
├── .prettierignore
├── LICENSE
├── README.md
├── docker/
│ ├── bytebot-desktop.Dockerfile
│ ├── docker-compose-claude-code.yml
│ ├── docker-compose.core.yml
│ ├── docker-compose.development.yml
│ ├── docker-compose.proxy.yml
│ └── docker-compose.yml
├── docs/
│ ├── api-reference/
│ │ ├── agent/
│ │ │ ├── tasks.mdx
│ │ │ └── ui.mdx
│ │ ├── computer-use/
│ │ │ ├── examples.mdx
│ │ │ ├── openapi.json
│ │ │ └── unified-endpoint.mdx
│ │ ├── endpoint/
│ │ │ ├── create.mdx
│ │ │ ├── delete.mdx
│ │ │ ├── get.mdx
│ │ │ └── webhook.mdx
│ │ ├── introduction.mdx
│ │ └── openapi.json
│ ├── core-concepts/
│ │ ├── agent-system.mdx
│ │ ├── architecture.mdx
│ │ ├── desktop-environment.mdx
│ │ └── rpa-comparison.mdx
│ ├── deployment/
│ │ ├── helm.mdx
│ │ ├── litellm.mdx
│ │ └── railway.mdx
│ ├── docs.json
│ ├── guides/
│ │ ├── password-management.mdx
│ │ ├── takeover-mode.mdx
│ │ └── task-creation.mdx
│ ├── introduction.mdx
│ ├── quickstart.mdx
│ └── rest-api/
│ ├── computer-use.mdx
│ ├── examples.mdx
│ ├── input-tracking.mdx
│ └── introduction.mdx
├── helm/
│ ├── Chart.yaml
│ ├── README.md
│ ├── charts/
│ │ ├── bytebot-agent/
│ │ │ ├── Chart.yaml
│ │ │ ├── templates/
│ │ │ │ ├── _helpers.tpl
│ │ │ │ ├── deployment.yaml
│ │ │ │ ├── ingress.yaml
│ │ │ │ ├── secret.yaml
│ │ │ │ └── service.yaml
│ │ │ └── values.yaml
│ │ ├── bytebot-desktop/
│ │ │ ├── Chart.yaml
│ │ │ ├── templates/
│ │ │ │ ├── _helpers.tpl
│ │ │ │ ├── deployment.yaml
│ │ │ │ ├── ingress.yaml
│ │ │ │ ├── pvc.yaml
│ │ │ │ └── service.yaml
│ │ │ └── values.yaml
│ │ ├── bytebot-llm-proxy/
│ │ │ ├── Chart.yaml
│ │ │ ├── templates/
│ │ │ │ ├── _helpers.tpl
│ │ │ │ ├── configmap.yaml
│ │ │ │ ├── deployment.yaml
│ │ │ │ ├── ingress.yaml
│ │ │ │ ├── secret.yaml
│ │ │ │ └── service.yaml
│ │ │ └── values.yaml
│ │ ├── bytebot-ui/
│ │ │ ├── Chart.yaml
│ │ │ ├── templates/
│ │ │ │ ├── _helpers.tpl
│ │ │ │ ├── deployment.yaml
│ │ │ │ ├── hpa.yaml
│ │ │ │ ├── ingress.yaml
│ │ │ │ └── service.yaml
│ │ │ └── values.yaml
│ │ └── postgresql/
│ │ ├── Chart.yaml
│ │ ├── templates/
│ │ │ ├── _helpers.tpl
│ │ │ ├── deployment.yaml
│ │ │ ├── secret.yaml
│ │ │ └── service.yaml
│ │ └── values.yaml
│ ├── templates/
│ │ ├── NOTES.txt
│ │ └── ingress.yaml
│ ├── values-proxy.yaml
│ ├── values-simple.yaml
│ └── values.yaml
└── packages/
├── bytebot-agent/
│ ├── .dockerignore
│ ├── .gitignore
│ ├── .prettierrc
│ ├── Dockerfile
│ ├── eslint.config.mjs
│ ├── nest-cli.json
│ ├── package.json
│ ├── prisma/
│ │ ├── migrations/
│ │ │ ├── 20250328022708_initial_migration/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250413053912_message_role/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250522200556_updated_task_structure/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250523162632_add_scheduling/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250529003255_tasks_control/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250530012753_tasks_control/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250619013027_add_better_auth_schema/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250622195148_add_user_to_task/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250706223912_model_picker/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250722041608_files/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250820172813_remove_auth/
│ │ │ │ └── migration.sql
│ │ │ └── migration_lock.toml
│ │ └── schema.prisma
│ ├── src/
│ │ ├── agent/
│ │ │ ├── agent.analytics.ts
│ │ │ ├── agent.computer-use.ts
│ │ │ ├── agent.constants.ts
│ │ │ ├── agent.module.ts
│ │ │ ├── agent.processor.ts
│ │ │ ├── agent.scheduler.ts
│ │ │ ├── agent.tools.ts
│ │ │ ├── agent.types.ts
│ │ │ └── input-capture.service.ts
│ │ ├── anthropic/
│ │ │ ├── anthropic.constants.ts
│ │ │ ├── anthropic.module.ts
│ │ │ ├── anthropic.service.ts
│ │ │ └── anthropic.tools.ts
│ │ ├── app.controller.ts
│ │ ├── app.module.ts
│ │ ├── app.service.ts
│ │ ├── google/
│ │ │ ├── google.constants.ts
│ │ │ ├── google.module.ts
│ │ │ ├── google.service.ts
│ │ │ └── google.tools.ts
│ │ ├── main.ts
│ │ ├── messages/
│ │ │ ├── messages.module.ts
│ │ │ └── messages.service.ts
│ │ ├── openai/
│ │ │ ├── openai.constants.ts
│ │ │ ├── openai.module.ts
│ │ │ ├── openai.service.ts
│ │ │ └── openai.tools.ts
│ │ ├── prisma/
│ │ │ ├── prisma.module.ts
│ │ │ └── prisma.service.ts
│ │ ├── proxy/
│ │ │ ├── proxy.module.ts
│ │ │ ├── proxy.service.ts
│ │ │ └── proxy.tools.ts
│ │ ├── summaries/
│ │ │ ├── summaries.modue.ts
│ │ │ └── summaries.service.ts
│ │ └── tasks/
│ │ ├── dto/
│ │ │ ├── add-task-message.dto.ts
│ │ │ ├── create-task.dto.ts
│ │ │ └── update-task.dto.ts
│ │ ├── tasks.controller.ts
│ │ ├── tasks.gateway.ts
│ │ ├── tasks.module.ts
│ │ └── tasks.service.ts
│ ├── tsconfig.build.json
│ └── tsconfig.json
├── bytebot-agent-cc/
│ ├── .dockerignore
│ ├── .gitignore
│ ├── .prettierrc
│ ├── Dockerfile
│ ├── eslint.config.mjs
│ ├── nest-cli.json
│ ├── package.json
│ ├── prisma/
│ │ ├── migrations/
│ │ │ ├── 20250328022708_initial_migration/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250413053912_message_role/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250522200556_updated_task_structure/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250523162632_add_scheduling/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250529003255_tasks_control/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250530012753_tasks_control/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250619013027_add_better_auth_schema/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250622195148_add_user_to_task/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250706223912_model_picker/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250722041608_files/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250820172813_remove_auth/
│ │ │ │ └── migration.sql
│ │ │ └── migration_lock.toml
│ │ └── schema.prisma
│ ├── src/
│ │ ├── agent/
│ │ │ ├── agent.analytics.ts
│ │ │ ├── agent.computer-use.ts
│ │ │ ├── agent.constants.ts
│ │ │ ├── agent.module.ts
│ │ │ ├── agent.processor.ts
│ │ │ ├── agent.scheduler.ts
│ │ │ ├── agent.tools.ts
│ │ │ ├── agent.types.ts
│ │ │ └── input-capture.service.ts
│ │ ├── app.controller.ts
│ │ ├── app.module.ts
│ │ ├── app.service.ts
│ │ ├── main.ts
│ │ ├── messages/
│ │ │ ├── messages.module.ts
│ │ │ └── messages.service.ts
│ │ ├── prisma/
│ │ │ ├── prisma.module.ts
│ │ │ └── prisma.service.ts
│ │ └── tasks/
│ │ ├── dto/
│ │ │ ├── add-task-message.dto.ts
│ │ │ ├── create-task.dto.ts
│ │ │ └── update-task.dto.ts
│ │ ├── tasks.controller.ts
│ │ ├── tasks.gateway.ts
│ │ ├── tasks.module.ts
│ │ └── tasks.service.ts
│ ├── tsconfig.build.json
│ └── tsconfig.json
├── bytebot-llm-proxy/
│ ├── Dockerfile
│ └── litellm-config.yaml
├── bytebot-ui/
│ ├── .dockerignore
│ ├── .gitignore
│ ├── .prettierrc.json
│ ├── Dockerfile
│ ├── components.json
│ ├── eslint.config.mjs
│ ├── next.config.ts
│ ├── package.json
│ ├── postcss.config.mjs
│ ├── server.ts
│ ├── src/
│ │ ├── app/
│ │ │ ├── api/
│ │ │ │ └── [[...path]]/
│ │ │ │ └── route.ts
│ │ │ ├── desktop/
│ │ │ │ └── page.tsx
│ │ │ ├── globals.css
│ │ │ ├── layout.tsx
│ │ │ ├── page.tsx
│ │ │ └── tasks/
│ │ │ ├── [id]/
│ │ │ │ └── page.tsx
│ │ │ └── page.tsx
│ │ ├── components/
│ │ │ ├── VirtualDesktopStatusHeader.tsx
│ │ │ ├── layout/
│ │ │ │ └── Header.tsx
│ │ │ ├── messages/
│ │ │ │ ├── AssistantMessage.tsx
│ │ │ │ ├── ChatContainer.tsx
│ │ │ │ ├── ChatInput.tsx
│ │ │ │ ├── MessageAvatar.tsx
│ │ │ │ ├── MessageGroup.tsx
│ │ │ │ ├── UserMessage.tsx
│ │ │ │ └── content/
│ │ │ │ ├── ComputerToolContent.tsx
│ │ │ │ ├── ComputerToolContentNormal.tsx
│ │ │ │ ├── ComputerToolContentTakeOver.tsx
│ │ │ │ ├── ComputerToolUtils.tsx
│ │ │ │ ├── ErrorContent.tsx
│ │ │ │ ├── ImageContent.tsx
│ │ │ │ ├── MessageContent.tsx
│ │ │ │ └── TextContent.tsx
│ │ │ ├── screenshot/
│ │ │ │ └── ScreenshotViewer.tsx
│ │ │ ├── tasks/
│ │ │ │ ├── TaskItem.tsx
│ │ │ │ ├── TaskList.tsx
│ │ │ │ └── TaskTabs.tsx
│ │ │ ├── ui/
│ │ │ │ ├── TopicPopover.tsx
│ │ │ │ ├── button.tsx
│ │ │ │ ├── card.tsx
│ │ │ │ ├── copy-button.tsx
│ │ │ │ ├── desktop-container.tsx
│ │ │ │ ├── dropdown-menu.tsx
│ │ │ │ ├── input.tsx
│ │ │ │ ├── label.tsx
│ │ │ │ ├── loader.tsx
│ │ │ │ ├── pagination.tsx
│ │ │ │ ├── popover.tsx
│ │ │ │ ├── scroll-area.tsx
│ │ │ │ ├── select.tsx
│ │ │ │ ├── separator.tsx
│ │ │ │ ├── switch.tsx
│ │ │ │ └── text-shimmer.tsx
│ │ │ └── vnc/
│ │ │ └── VncViewer.tsx
│ │ ├── constants/
│ │ │ └── ui.constants.ts
│ │ ├── hooks/
│ │ │ ├── useChatSession.ts
│ │ │ ├── useScrollScreenshot.ts
│ │ │ └── useWebSocket.ts
│ │ ├── lib/
│ │ │ └── utils.ts
│ │ ├── types/
│ │ │ └── index.ts
│ │ └── utils/
│ │ ├── clipboard.ts
│ │ ├── screenshotUtils.ts
│ │ ├── stringUtils.ts
│ │ └── taskUtils.ts
│ └── tsconfig.json
├── bytebotd/
│ ├── .dockerignore
│ ├── .prettierrc
│ ├── Dockerfile
│ ├── eslint.config.mjs
│ ├── nest-cli.json
│ ├── package.json
│ ├── root/
│ │ ├── etc/
│ │ │ ├── firefox/
│ │ │ │ └── policies/
│ │ │ │ └── policies.json
│ │ │ ├── lightdm/
│ │ │ │ └── lightdm.conf.d/
│ │ │ │ └── 50-autologin.conf
│ │ │ ├── supervisor/
│ │ │ │ └── conf.d/
│ │ │ │ └── supervisord.conf
│ │ │ └── thunderbird/
│ │ │ └── policies/
│ │ │ └── policies.json
│ │ ├── home/
│ │ │ └── user/
│ │ │ ├── .config/
│ │ │ │ └── xfce4/
│ │ │ │ ├── desktop/
│ │ │ │ │ └── icons.screen0-1264x913.rc
│ │ │ │ ├── helpers.rc
│ │ │ │ ├── terminal/
│ │ │ │ │ └── accels.scm
│ │ │ │ └── xfconf/
│ │ │ │ └── xfce-perchannel-xml/
│ │ │ │ ├── displays.xml
│ │ │ │ ├── thunar.xml
│ │ │ │ ├── xfce4-appfinder.xml
│ │ │ │ ├── xfce4-desktop.xml
│ │ │ │ ├── xfce4-keyboard-shortcuts.xml
│ │ │ │ ├── xfce4-notifyd.xml
│ │ │ │ ├── xfce4-panel.xml
│ │ │ │ └── xfwm4.xml
│ │ │ └── .xsessionrc
│ │ └── usr/
│ │ └── share/
│ │ └── applications/
│ │ ├── 1password.desktop
│ │ ├── code.desktop
│ │ ├── firefox.desktop
│ │ ├── terminal.desktop
│ │ └── thunderbird.desktop
│ ├── src/
│ │ ├── app.controller.ts
│ │ ├── app.module.ts
│ │ ├── app.service.ts
│ │ ├── computer-use/
│ │ │ ├── computer-use.controller.ts
│ │ │ ├── computer-use.module.ts
│ │ │ ├── computer-use.service.ts
│ │ │ └── dto/
│ │ │ ├── base.dto.ts
│ │ │ ├── computer-action-validation.pipe.ts
│ │ │ └── computer-action.dto.ts
│ │ ├── input-tracking/
│ │ │ ├── input-tracking.controller.ts
│ │ │ ├── input-tracking.gateway.ts
│ │ │ ├── input-tracking.helpers.ts
│ │ │ ├── input-tracking.module.ts
│ │ │ └── input-tracking.service.ts
│ │ ├── main.ts
│ │ ├── mcp/
│ │ │ ├── bytebot-mcp.module.ts
│ │ │ ├── compressor.ts
│ │ │ ├── computer-use.tools.ts
│ │ │ └── index.ts
│ │ └── nut/
│ │ ├── nut.module.ts
│ │ └── nut.service.ts
│ ├── tsconfig.build.json
│ └── tsconfig.json
└── shared/
├── package.json
├── src/
│ ├── index.ts
│ ├── types/
│ │ ├── computerAction.types.ts
│ │ └── messageContent.types.ts
│ └── utils/
│ ├── computerAction.utils.ts
│ └── messageContent.utils.ts
└── tsconfig.json
================================================
FILE CONTENTS
================================================
================================================
FILE: .github/workflows/build-agent.yaml
================================================
name: Build Agent
on:
push:
branches:
- main
paths:
- "packages/bytebot-agent/**"
- "packages/shared/**"
permissions:
contents: read
packages: write
jobs:
docker:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- uses: docker/setup-qemu-action@v3
- uses: docker/setup-buildx-action@v3
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: ghcr.io/bytebot-ai/bytebot-agent
tags: type=edge
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push
uses: docker/build-push-action@v6
env:
BUILDX_NO_DEFAULT_ATTESTATIONS: 1
DOCKER_BUILD_SUMMARY: false
DOCKER_BUILD_RECORD_UPLOAD: false
with:
context: ./packages
file: ./packages/bytebot-agent/Dockerfile
platforms: linux/amd64,linux/arm64
push: true
cache-from: type=gha
cache-to: type=gha,mode=max
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
================================================
FILE: .github/workflows/build-desktop.yaml
================================================
name: Build Desktop
on:
push:
branches:
- main
paths:
- "docker/**"
- "packages/bytebotd/**"
permissions:
contents: read
packages: write
jobs:
docker:
runs-on: ubuntu-22.04
steps:
# 1. Check out code
- uses: actions/checkout@v4
# 2. Enable QEMU so the amd64 runner can cross‑build arm64
- uses: docker/setup-qemu-action@v3
# 3. Set up Buildx builder
- uses: docker/setup-buildx-action@v3
# 4. Generate OCI labels + the single "edge" tag
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: ghcr.io/bytebot-ai/bytebot-desktop
tags: type=edge
# 5. Log in to GHCR
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
# 6. Build & push a multi‑arch image
- name: Build and push
uses: docker/build-push-action@v6
env:
BUILDX_NO_DEFAULT_ATTESTATIONS: 1 # hide "unknown/unknown" in GHCR
DOCKER_BUILD_SUMMARY: false # keep logs concise
DOCKER_BUILD_RECORD_UPLOAD: false
with:
context: ./packages/
file: ./packages/bytebotd/Dockerfile
platforms: linux/amd64,linux/arm64 # build both archs in one go
push: true
cache-from: type=gha
cache-to: type=gha,mode=max
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
================================================
FILE: .github/workflows/build-ui.yaml
================================================
name: Build UI
on:
push:
branches:
- main
paths:
- "packages/bytebot-ui/**"
- "packages/shared/**"
permissions:
contents: read
packages: write
jobs:
docker:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- uses: docker/setup-qemu-action@v3
- uses: docker/setup-buildx-action@v3
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: ghcr.io/bytebot-ai/bytebot-ui
tags: type=edge
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push
uses: docker/build-push-action@v6
env:
BUILDX_NO_DEFAULT_ATTESTATIONS: 1
DOCKER_BUILD_SUMMARY: false
DOCKER_BUILD_RECORD_UPLOAD: false
with:
context: ./packages
file: ./packages/bytebot-ui/Dockerfile
platforms: linux/amd64,linux/arm64
push: true
cache-from: type=gha
cache-to: type=gha,mode=max
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
================================================
FILE: .gitignore
================================================
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
lerna-debug.log*
.pnpm-debug.log*
# Diagnostic reports (https://nodejs.org/api/report.html)
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
# Runtime data
pids
*.pid
*.seed
*.pid.lock
# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov
# Coverage directory used by tools like istanbul
coverage
*.lcov
# nyc test coverage
.nyc_output
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
.grunt
# Bower dependency directory (https://bower.io/)
bower_components
# node-waf configuration
.lock-wscript
# Compiled binary addons (https://nodejs.org/api/addons.html)
build/Release
# Dependency directories
node_modules/
jspm_packages/
# Snowpack dependency directory (https://snowpack.dev/)
web_modules/
# TypeScript cache
*.tsbuildinfo
# Optional npm cache directory
.npm
# Optional eslint cache
.eslintcache
# Optional stylelint cache
.stylelintcache
# Microbundle cache
.rpt2_cache/
.rts2_cache_cjs/
.rts2_cache_es/
.rts2_cache_umd/
# Optional REPL history
.node_repl_history
# Output of 'npm pack'
*.tgz
# Yarn Integrity file
.yarn-integrity
# dotenv environment variable files
.env
.env.development.local
.env.test.local
.env.production.local
.env.local
# parcel-bundler cache (https://parceljs.org/)
.cache
.parcel-cache
# Next.js build output
.next
out
# Nuxt.js build / generate output
.nuxt
dist
# Gatsby files
.cache/
# Comment in the public line in if your project uses Gatsby and not Next.js
# https://nextjs.org/blog/next-9-1#public-directory-support
# public
# vuepress build output
.vuepress/dist
# vuepress v2.x temp and cache directory
.temp
.cache
# Docusaurus cache and generated files
.docusaurus
# Serverless directories
.serverless/
# FuseBox cache
.fusebox/
# DynamoDB Local files
.dynamodb/
# TernJS port file
.tern-port
# Stores VSCode versions used for testing VSCode extensions
.vscode-test
# yarn v2
.yarn/cache
.yarn/unplugged
.yarn/build-state.yml
.yarn/install-state.gz
.pnp.*
*.qcow2
*.iso
*.img
*.vdi
*.vmdk
*.vhdx
*.vhd
# compiled output
agent/dist
agent/node_modules
agent/build
# Logs
logs
*.log
npm-debug.log*
pnpm-debug.log*
yarn-debug.log*
yarn-error.log*
lerna-debug.log*
# OS
.DS_Store
# Tests
agent/coverage
agent/.nyc_output
# IDEs and editors
agent/.idea
agent/.project
agent/.classpath
.c9/
*.launch
.settings/
*.sublime-workspace
# IDE - VSCode
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
# dotenv environment variable files
.env.development.local
.env.test.local
.env.production.local
.env.local
# temp directory
.temp
.tmp
# Runtime data
pids
*.pid
*.seed
*.pid.lock
# Diagnostic reports (https://nodejs.org/api/report.html)
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
# QEMU
*.qcow2
================================================
FILE: .prettierignore
================================================
# Ignore formatting in docs folder
/docs/**
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: README.md
================================================
<div align="center">
<img src="docs/images/bytebot-logo.png" width="500" alt="Bytebot Logo">
# Bytebot: Open-Source AI Desktop Agent
<a href="https://trendshift.io/repositories/14624" target="_blank"><img src="https://trendshift.io/api/badge/repositories/14624" alt="bytebot-ai%2Fbytebot | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
**An AI that has its own computer to complete tasks for you**
[](https://railway.com/deploy/bytebot?referralCode=L9lKXQ)
[](https://github.com/bytebot-ai/bytebot/tree/main/docker)
[](LICENSE)
[](https://discord.com/invite/d9ewZkWPTP)
[🌐 Website](https://bytebot.ai) • [📚 Documentation](https://docs.bytebot.ai) • [💬 Discord](https://discord.com/invite/d9ewZkWPTP) • [𝕏 Twitter](https://x.com/bytebot_ai)
<!-- Keep these links. Translations will automatically update with the README. -->
[Deutsch](https://zdoc.app/de/bytebot-ai/bytebot) |
[Español](https://zdoc.app/es/bytebot-ai/bytebot) |
[français](https://zdoc.app/fr/bytebot-ai/bytebot) |
[日本語](https://zdoc.app/ja/bytebot-ai/bytebot) |
[한국어](https://zdoc.app/ko/bytebot-ai/bytebot) |
[Português](https://zdoc.app/pt/bytebot-ai/bytebot) |
[Русский](https://zdoc.app/ru/bytebot-ai/bytebot) |
[中文](https://zdoc.app/zh/bytebot-ai/bytebot)
</div>
---
https://github.com/user-attachments/assets/f271282a-27a3-43f3-9b99-b34007fdd169
https://github.com/user-attachments/assets/72a43cf2-bd87-44c5-a582-e7cbe176f37f
## What is a Desktop Agent?
A desktop agent is an AI that has its own computer. Unlike browser-only agents or traditional RPA tools, Bytebot comes with a full virtual desktop where it can:
- Use any application (browsers, email clients, office tools, IDEs)
- Download and organize files with its own file system
- Log into websites and applications using password managers
- Read and process documents, PDFs, and spreadsheets
- Complete complex multi-step workflows across different programs
Think of it as a virtual employee with their own computer who can see the screen, move the mouse, type on the keyboard, and complete tasks just like a human would.
## Why Give AI Its Own Computer?
When AI has access to a complete desktop environment, it unlocks capabilities that aren't possible with browser-only agents or API integrations:
### Complete Task Autonomy
Give Bytebot a task like "Download all invoices from our vendor portals and organize them into a folder" and it will:
- Open the browser
- Navigate to each portal
- Handle authentication (including 2FA via password managers)
- Download the files to its local file system
- Organize them into a folder
### Process Documents
Upload files directly to Bytebot's desktop and it can:
- Read entire PDFs into its context
- Extract data from complex documents
- Cross-reference information across multiple files
- Create new documents based on analysis
- Handle formats that APIs can't access
### Use Real Applications
Bytebot isn't limited to web interfaces. It can:
- Use desktop applications like text editors, VS Code, or email clients
- Run scripts and command-line tools
- Install new software as needed
- Configure applications for specific workflows
## Quick Start
### Deploy in 2 Minutes
**Option 1: Railway (Easiest)**
[](https://railway.com/deploy/bytebot?referralCode=L9lKXQ)
Just click and add your AI provider API key.
**Option 2: Docker Compose**
```bash
git clone https://github.com/bytebot-ai/bytebot.git
cd bytebot
# Add your AI provider key (choose one)
echo "ANTHROPIC_API_KEY=sk-ant-..." > docker/.env
# Or: echo "OPENAI_API_KEY=sk-..." > docker/.env
# Or: echo "GEMINI_API_KEY=..." > docker/.env
docker-compose -f docker/docker-compose.yml up -d
# Open http://localhost:9992
```
[Full deployment guide →](https://docs.bytebot.ai/quickstart)
## How It Works
Bytebot consists of four integrated components:
1. **Virtual Desktop**: A complete Ubuntu Linux environment with pre-installed applications
2. **AI Agent**: Understands your tasks and controls the desktop to complete them
3. **Task Interface**: Web UI where you create tasks and watch Bytebot work
4. **APIs**: REST endpoints for programmatic task creation and desktop control
### Key Features
- **Natural Language Tasks**: Just describe what you need done
- **File Uploads**: Drop files onto tasks for Bytebot to process
- **Live Desktop View**: Watch Bytebot work in real-time
- **Takeover Mode**: Take control when you need to help or configure something
- **Password Manager Support**: Install 1Password, Bitwarden, etc. for automatic authentication
- **Persistent Environment**: Install programs and they stay available for future tasks
## Example Tasks
### Basic Examples
```
"Go to Wikipedia and create a summary of quantum computing"
"Research flights from NYC to London and create a comparison document"
"Take screenshots of the top 5 news websites"
```
### Document Processing
```
"Read the uploaded contracts.pdf and extract all payment terms and deadlines"
"Process these 5 invoice PDFs and create a summary report"
"Download and analyze the latest financial report and answer: What were the key risks mentioned?"
```
### Multi-Application Workflows
```
"Download last month's bank statements from our three banks and consolidate them"
"Check all our vendor portals for new invoices and create a summary report"
"Log into our CRM, export the customer list, and update records in the ERP system"
```
## Programmatic Control
### Create Tasks via API
```python
import requests
# Simple task
response = requests.post('http://localhost:9991/tasks', json={
'description': 'Download the latest sales report and create a summary'
})
# Task with file upload
files = {'files': open('contracts.pdf', 'rb')}
response = requests.post('http://localhost:9991/tasks',
data={'description': 'Review these contracts for important dates'},
files=files
)
```
### Direct Desktop Control
```bash
# Take a screenshot
curl -X POST http://localhost:9990/computer-use \
-H "Content-Type: application/json" \
-d '{"action": "screenshot"}'
# Click at specific coordinates
curl -X POST http://localhost:9990/computer-use \
-H "Content-Type: application/json" \
-d '{"action": "click_mouse", "coordinate": [500, 300]}'
```
[Full API documentation →](https://docs.bytebot.ai/api-reference/introduction)
## Setting Up Your Desktop Agent
### 1. Deploy Bytebot
Use one of the deployment methods above to get Bytebot running.
### 2. Configure the Desktop
Use the Desktop tab in the UI to:
- Install additional programs you need
- Set up password managers for authentication
- Configure applications with your preferences
- Log into websites you want Bytebot to access
### 3. Start Giving Tasks
Create tasks in natural language and watch Bytebot complete them using the configured desktop.
## Use Cases
### Business Process Automation
- Invoice processing and data extraction
- Multi-system data synchronization
- Report generation from multiple sources
- Compliance checking across platforms
### Development & Testing
- Automated UI testing
- Cross-browser compatibility checks
- Documentation generation with screenshots
- Code deployment verification
### Research & Analysis
- Competitive analysis across websites
- Data gathering from multiple sources
- Document analysis and summarization
- Market research compilation
## Architecture
Bytebot is built with:
- **Desktop**: Ubuntu 22.04 with XFCE, Firefox, VS Code, and other tools
- **Agent**: NestJS service that coordinates AI and desktop actions
- **UI**: Next.js application for task management
- **AI Support**: Works with Anthropic Claude, OpenAI GPT, Google Gemini
- **Deployment**: Docker containers for easy self-hosting
## Why Self-Host?
- **Data Privacy**: Everything runs on your infrastructure
- **Full Control**: Customize the desktop environment as needed
- **No Limits**: Use your own AI API keys without platform restrictions
- **Flexibility**: Install any software, access any systems
## Advanced Features
### Multiple AI Providers
Use any AI provider through our [LiteLLM integration](https://docs.bytebot.ai/deployment/litellm):
- Azure OpenAI
- AWS Bedrock
- Local models via Ollama
- 100+ other providers
### Enterprise Deployment
Deploy on Kubernetes with Helm:
```bash
# Clone the repository
git clone https://github.com/bytebot-ai/bytebot.git
cd bytebot
# Install with Helm
helm install bytebot ./helm \
--set agent.env.ANTHROPIC_API_KEY=sk-ant-...
```
[Enterprise deployment guide →](https://docs.bytebot.ai/deployment/helm)
## Community & Support
- **Discord**: [Join our community](https://discord.com/invite/d9ewZkWPTP) for help and discussions
- **Documentation**: Comprehensive guides at [docs.bytebot.ai](https://docs.bytebot.ai)
- **GitHub Issues**: Report bugs and request features
## Contributing
We welcome contributions! Whether it's:
- 🐛 Bug fixes
- ✨ New features
- 📚 Documentation improvements
- 🌐 Translations
Please:
1. Check existing [issues](https://github.com/bytebot-ai/bytebot/issues) first
2. Open an issue to discuss major changes
3. Submit PRs with clear descriptions
4. Join our [Discord](https://discord.com/invite/d9ewZkWPTP) to discuss ideas
## License
Bytebot is open source under the Apache 2.0 license.
---
<div align="center">
**Give your AI its own computer. See what it can do.**
[](https://railway.com/deploy/bytebot?referralCode=L9lKXQ)
<sub>Built by [Tantl Labs](https://tantl.com) and the open source community</sub>
</div>
================================================
FILE: docker/bytebot-desktop.Dockerfile
================================================
# Extend the pre-built bytebot-desktop image
FROM ghcr.io/bytebot-ai/bytebot-desktop:edge
# Add additional packages, applications, or customizations here
# Expose the bytebotd service port
EXPOSE 9990
# Start the bytebotd service
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf", "-n"]
================================================
FILE: docker/docker-compose-claude-code.yml
================================================
name: bytebot
services:
bytebot-desktop:
# Build from source
build:
context: ../packages/
dockerfile: bytebotd/Dockerfile
# Use pre-built image
image: ghcr.io/bytebot-ai/bytebot-desktop:edge
shm_size: "2g"
container_name: bytebot-desktop
restart: unless-stopped
hostname: computer
privileged: true
ports:
- "9990:9990" # bytebotd service & noVNC
environment:
- DISPLAY=:0
networks:
- bytebot-network
postgres:
image: postgres:16-alpine
container_name: bytebot-postgres
restart: unless-stopped
ports:
- "5432:5432"
environment:
- POSTGRES_PASSWORD=postgres
- POSTGRES_USER=postgres
- POSTGRES_DB=bytebotdb
networks:
- bytebot-network
volumes:
- postgres_data:/var/lib/postgresql/data
bytebot-agent-cc:
build:
context: ../packages/
dockerfile: bytebot-agent-cc/Dockerfile
container_name: bytebot-agent-cc
restart: unless-stopped
ports:
- "9991:9991"
environment:
- DATABASE_URL=${DATABASE_URL:-postgresql://postgres:postgres@postgres:5432/bytebotdb}
- BYTEBOT_DESKTOP_BASE_URL=${BYTEBOT_DESKTOP_BASE_URL:-http://bytebot-desktop:9990}
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
depends_on:
- postgres
networks:
- bytebot-network
bytebot-ui:
build:
context: ../packages/
dockerfile: bytebot-ui/Dockerfile
args:
- BYTEBOT_AGENT_BASE_URL=${BYTEBOT_AGENT_BASE_URL:-http://bytebot-agent-cc:9991}
- BYTEBOT_DESKTOP_VNC_URL=${BYTEBOT_DESKTOP_VNC_URL:-http://bytebot-desktop:9990/websockify}
# Use pre-built image
image: ghcr.io/bytebot-ai/bytebot-ui:edge
container_name: bytebot-ui
restart: unless-stopped
ports:
- "9992:9992"
environment:
- NODE_ENV=production
- BYTEBOT_AGENT_BASE_URL=${BYTEBOT_AGENT_BASE_URL:-http://bytebot-agent-cc:9991}
- BYTEBOT_DESKTOP_VNC_URL=${BYTEBOT_DESKTOP_VNC_URL:-http://bytebot-desktop:9990/websockify}
depends_on:
- bytebot-agent-cc
networks:
- bytebot-network
networks:
bytebot-network:
driver: bridge
volumes:
postgres_data:
================================================
FILE: docker/docker-compose.core.yml
================================================
name: bytebot
services:
bytebot-desktop:
# Build from source
build:
context: ../packages/
dockerfile: bytebotd/Dockerfile
# Use pre-built image
image: ghcr.io/bytebot-ai/bytebot-desktop:edge
shm_size: "2g"
container_name: bytebot-desktop
restart: unless-stopped
hostname: computer
privileged: true
ports:
- "9990:9990" # bytebotd service & noVNC
environment:
- DISPLAY=:0
================================================
FILE: docker/docker-compose.development.yml
================================================
## docker-compose file that spins up a bytebot-desktop container
## and a postgres container. bytebot-ui and bytebot-agent are not included
## in this file, and can be run separately using npm, allowing for
## easier local development.
name: bytebot
services:
bytebot-desktop:
# Build from source
build:
context: ../packages/
dockerfile: bytebotd/Dockerfile
# Use pre-built image
image: ghcr.io/bytebot-ai/bytebot-desktop:edge
shm_size: "2g"
container_name: bytebot-desktop
restart: unless-stopped
hostname: computer
privileged: true
ports:
- "9990:9990" # bytebotd service & noVNC
environment:
- DISPLAY=:0
networks:
- bytebot-network
postgres:
image: postgres:16-alpine
container_name: bytebot-postgres
restart: unless-stopped
ports:
- "5432:5432"
environment:
- POSTGRES_PASSWORD=postgres
- POSTGRES_USER=postgres
- POSTGRES_DB=bytebotdb
networks:
- bytebot-network
volumes:
- postgres_data:/var/lib/postgresql/data
networks:
bytebot-network:
driver: bridge
volumes:
postgres_data:
================================================
FILE: docker/docker-compose.proxy.yml
================================================
name: bytebot
services:
bytebot-desktop:
# Build from source
build:
context: ../packages/
dockerfile: bytebotd/Dockerfile
# Use pre-built image
image: ghcr.io/bytebot-ai/bytebot-desktop:edge
shm_size: "2g"
container_name: bytebot-desktop
restart: unless-stopped
hostname: computer
privileged: true
ports:
- "9990:9990" # bytebotd service & noVNC
environment:
- DISPLAY=:0
networks:
- bytebot-network
postgres:
image: postgres:16-alpine
container_name: bytebot-postgres
restart: unless-stopped
ports:
- "5432:5432"
environment:
- POSTGRES_PASSWORD=postgres
- POSTGRES_USER=postgres
- POSTGRES_DB=bytebotdb
networks:
- bytebot-network
volumes:
- postgres_data:/var/lib/postgresql/data
bytebot-agent:
build:
context: ../packages/
dockerfile: bytebot-agent/Dockerfile
# Use pre-built image
image: ghcr.io/bytebot-ai/bytebot-agent:edge
container_name: bytebot-agent
restart: unless-stopped
ports:
- "9991:9991"
environment:
- DATABASE_URL=${DATABASE_URL:-postgresql://postgres:postgres@postgres:5432/bytebotdb}
- BYTEBOT_DESKTOP_BASE_URL=${BYTEBOT_DESKTOP_BASE_URL:-http://bytebot-desktop:9990}
- BYTEBOT_LLM_PROXY_URL=${BYTEBOT_LLM_PROXY_URL:-http://bytebot-llm-proxy:4000}
depends_on:
- postgres
networks:
- bytebot-network
bytebot-llm-proxy:
build:
context: ../packages/
dockerfile: bytebot-llm-proxy/Dockerfile
ports:
- "4000:4000"
environment:
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
- OPENAI_API_KEY=${OPENAI_API_KEY}
- GEMINI_API_KEY=${GEMINI_API_KEY}
networks:
- bytebot-network
bytebot-ui:
build:
context: ../packages/
dockerfile: bytebot-ui/Dockerfile
args:
- BYTEBOT_AGENT_BASE_URL=${BYTEBOT_AGENT_BASE_URL:-http://bytebot-agent:9991}
- BYTEBOT_DESKTOP_VNC_URL=${BYTEBOT_DESKTOP_VNC_URL:-http://bytebot-desktop:9990/websockify}
# Use pre-built image
image: ghcr.io/bytebot-ai/bytebot-ui:edge
container_name: bytebot-ui
restart: unless-stopped
ports:
- "9992:9992"
environment:
- NODE_ENV=production
- BYTEBOT_AGENT_BASE_URL=${BYTEBOT_AGENT_BASE_URL:-http://bytebot-agent:9991}
- BYTEBOT_DESKTOP_VNC_URL=${BYTEBOT_DESKTOP_VNC_URL:-http://bytebot-desktop:9990/websockify}
depends_on:
- bytebot-agent
networks:
- bytebot-network
networks:
bytebot-network:
driver: bridge
volumes:
postgres_data:
================================================
FILE: docker/docker-compose.yml
================================================
name: bytebot
services:
bytebot-desktop:
# Build from source
build:
context: ../packages/
dockerfile: bytebotd/Dockerfile
# Use pre-built image
image: ghcr.io/bytebot-ai/bytebot-desktop:edge
shm_size: "2g"
container_name: bytebot-desktop
restart: unless-stopped
hostname: computer
privileged: true
ports:
- "9990:9990" # bytebotd service & noVNC
environment:
- DISPLAY=:0
networks:
- bytebot-network
postgres:
image: postgres:16-alpine
container_name: bytebot-postgres
restart: unless-stopped
ports:
- "5432:5432"
environment:
- POSTGRES_PASSWORD=postgres
- POSTGRES_USER=postgres
- POSTGRES_DB=bytebotdb
networks:
- bytebot-network
volumes:
- postgres_data:/var/lib/postgresql/data
bytebot-agent:
build:
context: ../packages/
dockerfile: bytebot-agent/Dockerfile
# Use pre-built image
image: ghcr.io/bytebot-ai/bytebot-agent:edge
container_name: bytebot-agent
restart: unless-stopped
ports:
- "9991:9991"
environment:
- DATABASE_URL=${DATABASE_URL:-postgresql://postgres:postgres@postgres:5432/bytebotdb}
- BYTEBOT_DESKTOP_BASE_URL=${BYTEBOT_DESKTOP_BASE_URL:-http://bytebot-desktop:9990}
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
- OPENAI_API_KEY=${OPENAI_API_KEY}
- GEMINI_API_KEY=${GEMINI_API_KEY}
depends_on:
- postgres
networks:
- bytebot-network
bytebot-ui:
build:
context: ../packages/
dockerfile: bytebot-ui/Dockerfile
args:
- BYTEBOT_AGENT_BASE_URL=${BYTEBOT_AGENT_BASE_URL:-http://bytebot-agent:9991}
- BYTEBOT_DESKTOP_VNC_URL=${BYTEBOT_DESKTOP_VNC_URL:-http://bytebot-desktop:9990/websockify}
# Use pre-built image
image: ghcr.io/bytebot-ai/bytebot-ui:edge
container_name: bytebot-ui
restart: unless-stopped
ports:
- "9992:9992"
environment:
- NODE_ENV=production
- BYTEBOT_AGENT_BASE_URL=${BYTEBOT_AGENT_BASE_URL:-http://bytebot-agent:9991}
- BYTEBOT_DESKTOP_VNC_URL=${BYTEBOT_DESKTOP_VNC_URL:-http://bytebot-desktop:9990/websockify}
depends_on:
- bytebot-agent
networks:
- bytebot-network
networks:
bytebot-network:
driver: bridge
volumes:
postgres_data:
================================================
FILE: docs/api-reference/agent/tasks.mdx
================================================
---
title: 'Tasks API'
description: 'Reference documentation for the Bytebot Agent Tasks API'
---
## Tasks API
The Tasks API allows you to manage tasks in the Bytebot agent system. It's available at `http://localhost:9991/tasks` when running the full agent setup.
## Task Model
```typescript
{
id: string;
description: string;
status: 'PENDING' | 'IN_PROGRESS' | 'NEEDS_HELP' | 'NEEDS_REVIEW' | 'COMPLETED' | 'CANCELLED' | 'FAILED';
priority: 'LOW' | 'MEDIUM' | 'HIGH' | 'URGENT';
createdAt: string;
updatedAt: string;
}
```
## Endpoints
### Create Task
Create a new task for the agent to process.
<Card title="POST /tasks" icon="plus">
Create a new task
</Card>
#### Request Body
```json
{
"description": "This is a description of the task",
"priority": "MEDIUM" // Optional: LOW, MEDIUM, HIGH, URGENT
}
```
#### With File Upload
To upload files with a task, use `multipart/form-data`:
```bash
curl -X POST http://localhost:9991/tasks \
-F "description=Analyze the uploaded contracts and extract key terms" \
-F "priority=HIGH" \
-F "files=@contract1.pdf" \
-F "files=@contract2.pdf"
```
Uploaded files are automatically saved to the desktop and can be referenced in the task description.
#### Response
```json
{
"id": "task-123",
"description": "This is a description of the task",
"status": "PENDING",
"priority": "MEDIUM",
"createdAt": "2025-04-14T12:00:00Z",
"updatedAt": "2025-04-14T12:00:00Z"
}
```
### Get All Tasks
Retrieve a list of all tasks.
<Card title="GET /tasks" icon="list">
Get all tasks
</Card>
#### Response
```json
[
{
"id": "task-123",
"description": "This is a description of the task",
"status": "PENDING",
"priority": "MEDIUM",
"createdAt": "2025-04-14T12:00:00Z",
"updatedAt": "2025-04-14T12:00:00Z"
},
// ...more tasks
]
```
### Get In-Progress Task
Retrieve the currently in-progress task, if any.
<Card title="GET /tasks/in-progress" icon="play">
Get the currently in-progress task
</Card>
#### Response
```json
{
"id": "task-123",
"description": "This is a description of the task",
"status": "IN_PROGRESS",
"priority": "MEDIUM",
"createdAt": "2025-04-14T12:00:00Z",
"updatedAt": "2025-04-14T12:00:00Z"
}
```
If no task is in progress, the response will be `null`.
### Get Task by ID
Retrieve a specific task by its ID.
<Card title="GET /tasks/:id" icon="magnifying-glass">
Get a task by ID
</Card>
#### Response
```json
{
"id": "task-123",
"description": "This is a description of the task",
"status": "PENDING",
"priority": "MEDIUM",
"createdAt": "2025-04-14T12:00:00Z",
"updatedAt": "2025-04-14T12:00:00Z",
"messages": [
{
"id": "msg-456",
"content": [
{
"type": "text",
"text": "This is a message"
}
],
"role": "USER",
"taskId": "task-123",
"createdAt": "2025-04-14T12:05:00Z",
"updatedAt": "2025-04-14T12:05:00Z"
}
// ...more messages
]
}
```
### Update Task
Update an existing task.
<Card title="PATCH /tasks/:id" icon="pen">
Update a task
</Card>
#### Request Body
```json
{
"status": "COMPLETED",
"priority": "HIGH"
}
```
#### Response
```json
{
"id": "task-123",
"description": "This is a description of the task",
"status": "COMPLETED",
"priority": "HIGH",
"createdAt": "2025-04-14T12:00:00Z",
"updatedAt": "2025-04-14T12:01:00Z"
}
```
### Delete Task
Delete a task.
<Card title="DELETE /tasks/:id" icon="trash">
Delete a task
</Card>
#### Response
Status code `204 No Content` with an empty response body.
## Message Content Structure
Messages in the Bytebot agent system use a content block structure compatible with Anthropic's Claude API:
```typescript
type MessageContent = MessageContentBlock[];
interface MessageContentBlock {
type: string;
[key: string]: any;
}
interface TextContentBlock {
type: "text";
text: string;
}
interface ImageContentBlock {
type: "image";
source: {
type: "base64";
media_type: string;
data: string;
};
}
```
## Error Responses
The API may return the following error responses:
| Status Code | Description |
|-------------|--------------------------------------------|
| `400` | Bad Request - Invalid parameters |
| `404` | Not Found - Resource does not exist |
| `500` | Internal Server Error - Server side error |
Example error response:
```json
{
"statusCode": 404,
"message": "Task with ID task-123 not found",
"error": "Not Found"
}
```
## Code Examples
<CodeGroup>
```javascript JavaScript
const axios = require('axios');
async function createTask(description) {
const response = await axios.post('http://localhost:9991/tasks', {
description
});
return response.data;
}
async function findInProgressTask() {
const response = await axios.get('http://localhost:9991/tasks/in-progress');
return response.data;
}
// Example usage
async function main() {
// Create a new task
const task = await createTask('Compare React, Vue, and Angular for a new project');
console.log('Created task:', task);
// Get current in-progress task
const inProgressTask = await findInProgressTask();
console.log('In progress task:', inProgressTask);
}
```
```python Python
import requests
def create_task(description):
response = requests.post(
"http://localhost:9991/tasks",
json={
"description": description
}
)
return response.json()
def find_in_progress_task():
response = requests.get("http://localhost:9991/tasks/in-progress")
return response.json()
# Example usage
def main():
# Create a new task
task = create_task("Compare React, Vue, and Angular for a new project")
print(f"Created task: {task}")
# Get current in-progress task
in_progress_task = find_in_progress_task()
print(f"In progress task: {in_progress_task}")
```
```curl cURL
# Create a new task
curl -X POST http://localhost:9991/tasks \
-H "Content-Type: application/json" \
-d '{
"description": "Compare React, Vue, and Angular for a new project"
}'
# Get current in-progress task
curl -X GET http://localhost:9991/tasks/in-progress
```
</CodeGroup>
================================================
FILE: docs/api-reference/agent/ui.mdx
================================================
---
title: 'Task UI'
description: 'Documentation for the Bytebot Task UI'
---
## Bytebot Task UI
The Bytebot Task UI provides a web-based interface for interacting with the Bytebot agent system. It combines a action feed with an embedded noVNC viewer, allowing you to watch it perform task on the desktop in real-time.
<img src="/static/chat-ui-overview.png" alt="Bytebot Task Detail" className="w-full max-w-4xl" />
## Accessing the UI
When running the full Bytebot agent system, the Task UI is available at:
```
http://localhost:9992
```
## UI Components
### Task Management Panel
The task management panel allows you to:
- Create new tasks
- View existing tasks
- See task status and priority
- Select a task to work on
<img src="/static/ui-task-management.png" alt="Task Management Panel" className="w-full max-w-4xl" />
### Task Interface
The main task interface provides:
- Task history with the agent
- Support for markdown formatting in messages
- Automatic scrolling to new messages
### Desktop Viewer
The embedded noVNC viewer displays:
- Real-time view of the desktop environment
- Visual feedback of agent actions
- Option to expand to take over the desktop
- Connection status indicator
## Features
### Task Creation
To create a new task:
1. Enter a description for the task
2. Click "Start Task" button (or press Enter)
### Conversation Controls
The task interface supports:
- Text messages with markdown formatting
- Viewing image content in messages
- Displaying tool use actions
- Showing tool results
### Desktop Interaction
While primarily for viewing, the desktop panel allows:
- Taking over the desktop
- Real-time monitoring of agent actions
## Message Types
The task interface displays different types of messages based on Bytebot's content block structure:
- **User Messages**: Your instructions and queries
- **Assistant Messages**: Responses from the agent, which may include:
- **Text Content Blocks**: Markdown-formatted text responses
- **Image Content Blocks**: Images generated or captured
- **Tool Use Content Blocks**: Computer actions being performed
- **Tool Result Content Blocks**: Results of computer actions
The message content structure follows this format:
```typescript
interface Message {
id: string;
content: MessageContentBlock[];
role: Role; // "USER" or "ASSISTANT"
createdAt?: string;
}
interface MessageContentBlock {
type: string;
[key: string]: any;
}
interface TextContentBlock extends MessageContentBlock {
type: "text";
text: string;
}
interface ImageContentBlock extends MessageContentBlock {
type: "image";
source: {
type: "base64";
media_type: string;
data: string;
};
}
```
## Technical Details
The Bytebot Task UI is built with:
- **Next.js**: React framework for the frontend
- **Tailwind CSS**: For styling
- **ReactMarkdown**: For rendering markdown content
- **noVNC**: For the embedded desktop viewer
## Troubleshooting
### Connection Issues
If you experience connection issues:
1. Ensure all Bytebot services are running
2. Check that ports 9990, 9991, and 9992 are accessible
3. Try refreshing the browser
4. Check browser console for error messages
### Desktop Viewer Issues
If the desktop viewer is not displaying:
1. Ensure the Bytebot container is running
2. Check that the noVNC service is accessible at port 9990
### Message Display Issues
If messages are not displaying correctly:
1. Check that the message content is properly formatted
2. Ensure the agent service is processing task correctly
3. Check the browser console for any rendering errors
4. Try refreshing the browser
================================================
FILE: docs/api-reference/computer-use/examples.mdx
================================================
---
title: "Computer Use API Examples"
description: "Code examples for common automation scenarios using the Bytebot API"
---
## Basic Examples
Here are some practical examples of how to use the Computer Use API in different programming languages.
### Using cURL
<CodeGroup>
```bash Opening a Web Browser
# Move to Firefox/Chrome icon in the dock and click it
curl -X POST http://localhost:9990/computer-use \
-H "Content-Type: application/json" \
-d '{"action": "move_mouse", "coordinates": {"x": 100, "y": 960}}'
curl -X POST http://localhost:9990/computer-use \
-H "Content-Type: application/json" \
-d '{"action": "click_mouse", "button": "left", "clickCount": 1}'
````
```bash Taking and Saving a Screenshot
# Take a screenshot
response=$(curl -s -X POST http://localhost:9990/computer-use \
-H "Content-Type: application/json" \
-d '{"action": "screenshot"}')
# Extract the base64 image data and save to a file
echo $response | jq -r '.data.image' | base64 -d > screenshot.png
````
```bash Typing and Keyboard Shortcuts
# Type text in a text editor
curl -X POST http://localhost:9990/computer-use \
-H "Content-Type: application/json" \
-d '{"action": "type_text", "text": "Hello, this is an automated test!", "delay": 30}'
# Press Ctrl+S to save
curl -X POST http://localhost:9990/computer-use \
-H "Content-Type: application/json" \
-d '{"action": "press_keys", "key": "s", "modifiers": ["control"]}'
```
</CodeGroup>
### Python Examples
<CodeGroup>
```python Basic Automation
import requests
import json
import base64
import time
from io import BytesIO
from PIL import Image
def control_computer(action, **params):
url = "http://localhost:9990/computer-use"
data = {"action": action, **params}
response = requests.post(url, json=data)
return response.json()
# Open a web browser by clicking an icon
control_computer("move_mouse", coordinates={"x": 100, "y": 960})
control_computer("click_mouse", button="left")
# Wait for the browser to open
control_computer("wait", duration=2000)
# Type a URL
control_computer("type_text", text="https://example.com")
control_computer("press_keys", key="enter")
````
```python Screenshot and Analysis
import requests
import json
import base64
import cv2
import numpy as np
from PIL import Image
from io import BytesIO
def take_screenshot():
url = "http://localhost:9990/computer-use"
data = {"action": "screenshot"}
response = requests.post(url, json=data)
if response.json()["success"]:
img_data = base64.b64decode(response.json()["data"]["image"])
image = Image.open(BytesIO(img_data))
return np.array(image)
return None
# Take a screenshot
img = take_screenshot()
# Convert to grayscale for analysis
if img is not None:
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Save the screenshot
cv2.imwrite("screenshot.png", img)
# Perform image analysis (example: find edges)
edges = cv2.Canny(gray, 100, 200)
cv2.imwrite("edges.png", edges)
````
```python Web Form Automation
import requests
import time
def control_computer(action, **params):
url = "http://localhost:9990/computer-use"
data = {"action": action, **params}
response = requests.post(url, json=data)
return response.json()
def fill_web_form(form_fields):
# Click on the first form field
control_computer("move_mouse", coordinates=form_fields[0])
control_computer("click_mouse", button="left")
# Fill out each field
for i, field in enumerate(form_fields):
# Input the field value
control_computer("type_text", text=field["value"])
# If not the last field, press Tab to move to next field
if i < len(form_fields) - 1:
control_computer("press_keys", key="tab")
time.sleep(0.5)
# Submit the form by pressing Enter
control_computer("press_keys", key="enter")
# Example form fields with coordinates and values
form_fields = [
{"x": 500, "y": 300, "value": "John Doe"},
{"x": 500, "y": 350, "value": "john@example.com"},
{"x": 500, "y": 400, "value": "Password123"}
]
fill_web_form(form_fields)
```
</CodeGroup>
### JavaScript/Node.js Examples
<CodeGroup>
```javascript Basic Automation
const axios = require('axios');
async function controlComputer(action, params = {}) {
const url = "http://localhost:9990/computer-use";
const data = { action, ...params };
try {
const response = await axios.post(url, data);
return response.data;
} catch (error) {
console.error('Error:', error.message);
return { success: false, error: error.message };
}
}
// Example: Automate opening an application and typing
async function automateTextEditor() {
try {
// Open text editor by clicking its icon
await controlComputer("move_mouse", { coordinates: { x: 150, y: 960 } });
await controlComputer("click_mouse", { button: "left" });
// Wait for it to open
await controlComputer("wait", { duration: 2000 });
// Type some text
await controlComputer("type_text", {
text: "This is an automated test using Node.js and Bytebot",
delay: 30
});
console.log("Automation completed successfully");
} catch (error) {
console.error("Automation failed:", error);
}
}
automateTextEditor();
````
```javascript Advanced: Screenshot Comparison
const axios = require('axios');
const fs = require('fs');
const { createCanvas, loadImage } = require('canvas');
const pixelmatch = require('pixelmatch');
async function controlComputer(action, params = {}) {
const url = "http://localhost:9990/computer-use";
const data = { action, ...params };
try {
const response = await axios.post(url, data);
return response.data;
} catch (error) {
console.error('Error:', error.message);
return { success: false, error: error.message };
}
}
async function compareScreenshots() {
try {
// Take first screenshot
const screenshot1 = await controlComputer("screenshot");
// Do some actions
await controlComputer("move_mouse", { coordinates: { x: 500, y: 500 } });
await controlComputer("click_mouse", { button: "left" });
await controlComputer("wait", { duration: 1000 });
// Take second screenshot
const screenshot2 = await controlComputer("screenshot");
// Compare screenshots
if (screenshot1.success && screenshot2.success) {
const img1Data = Buffer.from(screenshot1.data.image, 'base64');
const img2Data = Buffer.from(screenshot2.data.image, 'base64');
fs.writeFileSync('screenshot1.png', img1Data);
fs.writeFileSync('screenshot2.png', img2Data);
// Now you could load and compare these images
// This requires additional image comparison libraries
console.log('Screenshots saved for comparison');
}
} catch (error) {
console.error("Screenshot comparison failed:", error);
}
}
compareScreenshots();
````
</CodeGroup>
## File Operations
### Writing Files
These examples show how to write files to the desktop environment:
<CodeGroup>
```python Python
import requests
import base64
def write_file(path, content):
url = "http://localhost:9990/computer-use"
# Encode content to base64
encoded_content = base64.b64encode(content.encode('utf-8')).decode('utf-8')
data = {
"action": "write_file",
"path": path,
"data": encoded_content
}
response = requests.post(url, json=data)
return response.json()
# Write a text file
result = write_file("/home/user/hello.txt", "Hello, Bytebot!")
print(result) # {'success': True, 'message': 'File written successfully...'}
# Write to desktop (relative path)
result = write_file("report.txt", "Daily report content")
print(result) # File will be written to /home/user/Desktop/report.txt
```
```javascript JavaScript
const axios = require('axios');
async function writeFile(path, content) {
const url = "http://localhost:9990/computer-use";
// Encode content to base64
const encodedContent = Buffer.from(content, 'utf-8').toString('base64');
const data = {
action: "write_file",
path: path,
data: encodedContent
};
const response = await axios.post(url, data);
return response.data;
}
// Write a text file
writeFile("/home/user/notes.txt", "Meeting notes...")
.then(result => console.log(result))
.catch(error => console.error(error));
// Write HTML file to desktop
const htmlContent = '<html><body><h1>Hello</h1></body></html>';
writeFile("index.html", htmlContent)
.then(result => console.log("HTML file created"));
```
</CodeGroup>
### Reading Files
These examples show how to read files from the desktop environment:
<CodeGroup>
```python Python
import requests
import base64
def read_file(path):
url = "http://localhost:9990/computer-use"
data = {
"action": "read_file",
"path": path
}
response = requests.post(url, json=data)
result = response.json()
if result['success']:
# Decode the base64 content
content = base64.b64decode(result['data']).decode('utf-8')
return {
'content': content,
'name': result['name'],
'size': result['size'],
'mediaType': result['mediaType']
}
else:
return result
# Read a text file
file_data = read_file("/home/user/hello.txt")
print(f"Content: {file_data['content']}")
print(f"Size: {file_data['size']} bytes")
print(f"Type: {file_data['mediaType']}")
```
```javascript JavaScript
const axios = require('axios');
async function readFile(path) {
const url = "http://localhost:9990/computer-use";
const data = {
action: "read_file",
path: path
};
const response = await axios.post(url, data);
const result = response.data;
if (result.success) {
// Decode the base64 content
const content = Buffer.from(result.data, 'base64').toString('utf-8');
return {
content: content,
name: result.name,
size: result.size,
mediaType: result.mediaType
};
} else {
throw new Error(result.message);
}
}
// Read a file from desktop
readFile("report.txt")
.then(fileData => {
console.log(`Content: ${fileData.content}`);
console.log(`Size: ${fileData.size} bytes`);
console.log(`Type: ${fileData.mediaType}`);
})
.catch(error => console.error("Error reading file:", error));
```
</CodeGroup>
## Automation Recipes
### Browser Automation
This example demonstrates how to automate browser interactions:
```python
import requests
import time
def control_computer(action, **params):
url = "http://localhost:9990/computer-use"
data = {"action": action, **params}
response = requests.post(url, json=data)
return response.json()
def automate_browser():
# Open browser (assuming browser icon is at position x=100, y=960)
control_computer("move_mouse", coordinates={"x": 100, "y": 960})
control_computer("click_mouse", button="left")
time.sleep(3) # Wait for browser to open
# Type URL
control_computer("type_text", text="https://example.com")
control_computer("press_keys", key="enter")
time.sleep(2) # Wait for page to load
# Take screenshot of the loaded page
screenshot = control_computer("screenshot")
# Click on a link (coordinates would need to be adjusted for your target)
control_computer("move_mouse", coordinates={"x": 300, "y": 400})
control_computer("click_mouse", button="left")
time.sleep(2)
# Scroll down
control_computer("scroll", direction="down", scrollCount=5)
automate_browser()
```
### Form Filling Automation
This example shows how to automate filling out a form in a web application:
```javascript
const axios = require("axios");
async function controlComputer(action, params = {}) {
const url = "http://localhost:9990/computer-use";
const data = { action, ...params };
const response = await axios.post(url, data);
return response.data;
}
async function fillForm() {
// Click first input field
await controlComputer("move_mouse", { coordinates: { x: 400, y: 300 } });
await controlComputer("click_mouse", { button: "left" });
// Type name
await controlComputer("type_text", { text: "John Doe" });
// Tab to next field
await controlComputer("press_keys", { key: "tab" });
// Type email
await controlComputer("type_text", { text: "john@example.com" });
// Tab to next field
await controlComputer("press_keys", { key: "tab" });
// Type message
await controlComputer("type_text", {
text: "This is an automated message sent using Bytebot's Computer Use API",
delay: 30,
});
// Tab to submit button
await controlComputer("press_keys", { key: "tab" });
// Press Enter to submit
await controlComputer("press_keys", { key: "enter" });
}
fillForm().catch(console.error);
```
## Integration with Testing Frameworks
The Computer Use API can be integrated with popular testing frameworks:
### Selenium Alternative
Bytebot can serve as an alternative to Selenium for web testing:
```python
import requests
import time
import json
class BytebotWebDriver:
def __init__(self, base_url="http://localhost:9990"):
self.base_url = base_url
def control_computer(self, action, **params):
url = f"{self.base_url}/computer-use"
data = {"action": action, **params}
response = requests.post(url, json=data)
return response.json()
def open_browser(self, browser_icon_coords):
self.control_computer("move_mouse", coordinates=browser_icon_coords)
self.control_computer("click_mouse", button="left")
time.sleep(3) # Wait for browser to open
def navigate_to(self, url):
self.control_computer("type_text", text=url)
self.control_computer("press_keys", key="enter")
time.sleep(2) # Wait for page to load
def click_element(self, coords):
self.control_computer("move_mouse", coordinates=coords)
self.control_computer("click_mouse", button="left")
def type_text(self, text):
self.control_computer("type_text", text=text)
def press_keys(self, key, modifiers=None):
params = {"key": key}
if modifiers:
params["modifiers"] = modifiers
self.control_computer("press_keys", **params)
def take_screenshot(self):
return self.control_computer("screenshot")
# Usage example
driver = BytebotWebDriver()
driver.open_browser({"x": 100, "y": 960})
driver.navigate_to("https://example.com")
driver.click_element({"x": 300, "y": 400})
driver.type_text("Hello Bytebot!")
```
================================================
FILE: docs/api-reference/computer-use/openapi.json
================================================
{
"openapi": "3.1.0",
"info": {
"title": "Bytebot Computer Use API",
"version": "1.0.0",
"description": "Control the Bytebot virtual desktop via a single endpoint"
},
"paths": {
"/computer-use": {
"post": {
"summary": "Execute a computer action",
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ComputerAction"
}
}
}
},
"responses": {
"200": {
"description": "Successful response",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ComputerActionResponse"
}
}
}
},
"500": {
"description": "Error executing action",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"status": {"type": "string"},
"error": {"type": "string"}
}
}
}
}
}
}
}
}
},
"components": {
"schemas": {
"Coordinates": {
"type": "object",
"properties": {
"x": {"type": "number"},
"y": {"type": "number"}
},
"required": ["x", "y"]
},
"Button": {
"type": "string",
"enum": ["left", "right", "middle"]
},
"Press": {
"type": "string",
"enum": ["up", "down"]
},
"ScrollDirection": {
"type": "string",
"enum": ["up", "down", "left", "right"]
},
"MoveMouseAction": {
"type": "object",
"properties": {
"action": {"enum": ["move_mouse"]},
"coordinates": {"$ref": "#/components/schemas/Coordinates"}
},
"required": ["action", "coordinates"]
},
"TraceMouseAction": {
"type": "object",
"properties": {
"action": {"enum": ["trace_mouse"]},
"path": {
"type": "array",
"items": {"$ref": "#/components/schemas/Coordinates"}
},
"holdKeys": {
"type": "array",
"items": {"type": "string"}
}
},
"required": ["action", "path"]
},
"ClickMouseAction": {
"type": "object",
"properties": {
"action": {"enum": ["click_mouse"]},
"coordinates": {"$ref": "#/components/schemas/Coordinates"},
"button": {"$ref": "#/components/schemas/Button"},
"holdKeys": {
"type": "array",
"items": {"type": "string"}
},
"clickCount": {"type": "integer", "minimum": 1}
},
"required": ["action", "button", "clickCount"]
},
"PressMouseAction": {
"type": "object",
"properties": {
"action": {"enum": ["press_mouse"]},
"coordinates": {"$ref": "#/components/schemas/Coordinates"},
"button": {"$ref": "#/components/schemas/Button"},
"press": {"$ref": "#/components/schemas/Press"}
},
"required": ["action", "button", "press"]
},
"DragMouseAction": {
"type": "object",
"properties": {
"action": {"enum": ["drag_mouse"]},
"path": {
"type": "array",
"items": {"$ref": "#/components/schemas/Coordinates"}
},
"button": {"$ref": "#/components/schemas/Button"},
"holdKeys": {
"type": "array",
"items": {"type": "string"}
}
},
"required": ["action", "path", "button"]
},
"ScrollAction": {
"type": "object",
"properties": {
"action": {"enum": ["scroll"]},
"coordinates": {"$ref": "#/components/schemas/Coordinates"},
"direction": {"$ref": "#/components/schemas/ScrollDirection"},
"scrollCount": {"type": "integer", "minimum": 1},
"holdKeys": {
"type": "array",
"items": {"type": "string"}
}
},
"required": ["action", "direction", "scrollCount"]
},
"TypeKeysAction": {
"type": "object",
"properties": {
"action": {"enum": ["type_keys"]},
"keys": {
"type": "array",
"items": {"type": "string"}
},
"delay": {"type": "integer", "minimum": 0}
},
"required": ["action", "keys"]
},
"PressKeysAction": {
"type": "object",
"properties": {
"action": {"enum": ["press_keys"]},
"keys": {
"type": "array",
"items": {"type": "string"}
},
"press": {"$ref": "#/components/schemas/Press"}
},
"required": ["action", "keys", "press"]
},
"TypeTextAction": {
"type": "object",
"properties": {
"action": {"enum": ["type_text"]},
"text": {"type": "string"},
"delay": {"type": "integer", "minimum": 0}
},
"required": ["action", "text"]
},
"WaitAction": {
"type": "object",
"properties": {
"action": {"enum": ["wait"]},
"duration": {"type": "integer", "minimum": 0}
},
"required": ["action", "duration"]
},
"ScreenshotAction": {
"type": "object",
"properties": {
"action": {"enum": ["screenshot"]}
},
"required": ["action"]
},
"CursorPositionAction": {
"type": "object",
"properties": {
"action": {"enum": ["cursor_position"]}
},
"required": ["action"]
},
"ComputerAction": {
"oneOf": [
{"$ref": "#/components/schemas/MoveMouseAction"},
{"$ref": "#/components/schemas/TraceMouseAction"},
{"$ref": "#/components/schemas/ClickMouseAction"},
{"$ref": "#/components/schemas/PressMouseAction"},
{"$ref": "#/components/schemas/DragMouseAction"},
{"$ref": "#/components/schemas/ScrollAction"},
{"$ref": "#/components/schemas/TypeKeysAction"},
{"$ref": "#/components/schemas/PressKeysAction"},
{"$ref": "#/components/schemas/TypeTextAction"},
{"$ref": "#/components/schemas/WaitAction"},
{"$ref": "#/components/schemas/ScreenshotAction"},
{"$ref": "#/components/schemas/CursorPositionAction"}
],
"discriminator": {
"propertyName": "action",
"mapping": {
"move_mouse": "#/components/schemas/MoveMouseAction",
"trace_mouse": "#/components/schemas/TraceMouseAction",
"click_mouse": "#/components/schemas/ClickMouseAction",
"press_mouse": "#/components/schemas/PressMouseAction",
"drag_mouse": "#/components/schemas/DragMouseAction",
"scroll": "#/components/schemas/ScrollAction",
"type_keys": "#/components/schemas/TypeKeysAction",
"press_keys": "#/components/schemas/PressKeysAction",
"type_text": "#/components/schemas/TypeTextAction",
"wait": "#/components/schemas/WaitAction",
"screenshot": "#/components/schemas/ScreenshotAction",
"cursor_position": "#/components/schemas/CursorPositionAction"
}
}
},
"ScreenshotResponse": {
"type": "object",
"properties": {
"image": {
"type": "string",
"description": "Base64 encoded PNG"
}
},
"required": ["image"]
},
"CursorPosition": {
"type": "object",
"properties": {
"x": {"type": "number"},
"y": {"type": "number"}
},
"required": ["x", "y"]
},
"ComputerActionResponse": {
"type": "object",
"properties": {
"success": {"type": "boolean"},
"data": {
"oneOf": [
{"$ref": "#/components/schemas/ScreenshotResponse"},
{"$ref": "#/components/schemas/CursorPosition"}
]
}
},
"required": ["success"]
}
}
}
}
================================================
FILE: docs/api-reference/computer-use/unified-endpoint.mdx
================================================
---
title: "Unified Computer Actions API"
description: "Control all aspects of the desktop environment with a single endpoint"
---
## Overview
The unified computer action API allows for granular control over all aspects of the Bytebot virtual desktop environment through a single endpoint. It replaces multiple specific endpoints with a unified interface that handles various computer actions like mouse movements, clicks, key presses, and more.
## Endpoint
| Method | URL | Description |
| ------ | ---------------- | ----------------------------------------------- |
| POST | `/computer-use` | Execute computer actions in the virtual desktop |
## Request Format
All requests to the unified endpoint follow this format:
```json
{
"action": "action_name",
...action-specific parameters
}
```
The `action` parameter determines which operation to perform, and the remaining parameters depend on the specific action.
## Available Actions
### move_mouse
Move the mouse cursor to a specific position.
**Parameters:**
| Parameter | Type | Required | Description |
| --------------- | ------ | -------- | --------------------------------- |
| `coordinates` | Object | Yes | The target coordinates to move to |
| `coordinates.x` | Number | Yes | X coordinate |
| `coordinates.y` | Number | Yes | Y coordinate |
**Example:**
```json
{
"action": "move_mouse",
"coordinates": {
"x": 100,
"y": 200
}
}
```
### trace_mouse
Move the mouse along a path of coordinates.
**Parameters:**
| Parameter | Type | Required | Description |
| ------------ | ------ | -------- | ----------------------------------------------- |
| `path` | Array | Yes | Array of coordinate objects for the mouse path |
| `path[].x` | Number | Yes | X coordinate for each point in the path |
| `path[].y` | Number | Yes | Y coordinate for each point in the path |
| `holdKeys` | Array | No | Keys to hold while moving along the path |
**Example:**
```json
{
"action": "trace_mouse",
"path": [
{ "x": 100, "y": 100 },
{ "x": 150, "y": 150 },
{ "x": 200, "y": 200 }
],
"holdKeys": ["shift"]
}
```
### click_mouse
Perform a mouse click at the current or specified position.
**Parameters:**
| Parameter | Type | Required | Description |
| --------------- | ------ | -------- | ----------------------------------------------------- |
| `coordinates` | Object | No | The coordinates to click (uses current if omitted) |
| `coordinates.x` | Number | Yes* | X coordinate |
| `coordinates.y` | Number | Yes* | Y coordinate |
| `button` | String | Yes | Mouse button: 'left', 'right', or 'middle' |
| `clickCount` | Number | Yes | Number of clicks to perform |
| `holdKeys` | Array | No | Keys to hold while clicking (e.g., ['ctrl', 'shift']) |
**Example:**
```json
{
"action": "click_mouse",
"coordinates": {
"x": 150,
"y": 250
},
"button": "left",
"clickCount": 2
}
```
### press_mouse
Press or release a mouse button at the current or specified position.
**Parameters:**
| Parameter | Type | Required | Description |
| --------------- | ------ | -------- | -------------------------------------------------------- |
| `coordinates` | Object | No | The coordinates to press/release (uses current if omitted) |
| `coordinates.x` | Number | Yes* | X coordinate |
| `coordinates.y` | Number | Yes* | Y coordinate |
| `button` | String | Yes | Mouse button: 'left', 'right', or 'middle' |
| `press` | String | Yes | Action: 'up' or 'down' |
**Example:**
```json
{
"action": "press_mouse",
"coordinates": {
"x": 150,
"y": 250
},
"button": "left",
"press": "down"
}
```
### drag_mouse
Click and drag the mouse from one point to another.
**Parameters:**
| Parameter | Type | Required | Description |
| ------------ | ------ | -------- | --------------------------------------------- |
| `path` | Array | Yes | Array of coordinate objects for the drag path |
| `path[].x` | Number | Yes | X coordinate for each point in the path |
| `path[].y` | Number | Yes | Y coordinate for each point in the path |
| `button` | String | Yes | Mouse button: 'left', 'right', or 'middle' |
| `holdKeys` | Array | No | Keys to hold while dragging |
**Example:**
```json
{
"action": "drag_mouse",
"path": [
{ "x": 100, "y": 100 },
{ "x": 200, "y": 200 }
],
"button": "left"
}
```
### scroll
Scroll up, down, left, or right.
**Parameters:**
| Parameter | Type | Required | Description |
| --------------- | ------ | -------- | ------------------------------------------------------ |
| `coordinates` | Object | No | The coordinates to scroll at (uses current if omitted) |
| `coordinates.x` | Number | Yes* | X coordinate |
| `coordinates.y` | Number | Yes* | Y coordinate |
| `direction` | String | Yes | Scroll direction: 'up', 'down', 'left', 'right' |
| `scrollCount` | Number | Yes | Number of scroll steps |
| `holdKeys` | Array | No | Keys to hold while scrolling |
**Example:**
```json
{
"action": "scroll",
"direction": "down",
"scrollCount": 5
}
```
### type_keys
Type a sequence of keyboard keys.
**Parameters:**
| Parameter | Type | Required | Description |
| --------- | ------ | -------- | -------------------------------------- |
| `keys` | Array | Yes | Array of keys to type in sequence |
| `delay` | Number | No | Delay between key presses (ms) |
**Example:**
```json
{
"action": "type_keys",
"keys": ["a", "b", "c", "enter"],
"delay": 50
}
```
### press_keys
Press or release keyboard keys.
**Parameters:**
| Parameter | Type | Required | Description |
| --------- | ------ | -------- | ------------------------------------------ |
| `keys` | Array | Yes | Array of keys to press or release |
| `press` | String | Yes | Action: 'up' or 'down' |
**Example:**
```json
{
"action": "press_keys",
"keys": ["ctrl", "shift", "esc"],
"press": "down"
}
```
### type_text
Type a text string with optional delay.
**Parameters:**
| Parameter | Type | Required | Description |
| --------- | ------ | -------- | ----------------------------------------------------- |
| `text` | String | Yes | The text to type |
| `delay` | Number | No | Delay between characters in milliseconds (default: 0) |
**Example:**
```json
{
"action": "type_text",
"text": "Hello, Bytebot!",
"delay": 50
}
```
### paste_text
Paste text to the current cursor position. This is especially useful for special characters that aren't on the standard keyboard.
**Parameters:**
| Parameter | Type | Required | Description |
| --------- | ------ | -------- | ------------------------------------------------------------------------ |
| `text` | String | Yes | The text to paste, including special characters and emojis |
**Example:**
```json
{
"action": "paste_text",
"text": "Special characters: ©®™€¥£ émojis 🎉"
}
```
### wait
Wait for a specified duration.
**Parameters:**
| Parameter | Type | Required | Description |
| ---------- | ------ | -------- | ----------------------------- |
| `duration` | Number | Yes | Wait duration in milliseconds |
**Example:**
```json
{
"action": "wait",
"duration": 2000
}
```
### screenshot
Capture a screenshot of the desktop.
**Parameters:** None required
**Example:**
```json
{
"action": "screenshot"
}
```
### cursor_position
Get the current position of the mouse cursor.
**Parameters:** None required
**Example:**
```json
{
"action": "cursor_position"
}
```
### application
Switch between different applications or navigate to the desktop/directory.
**Parameters:**
| Parameter | Type | Required | Description |
| ------------- | ------ | -------- | ------------------------------------------------------------------------ |
| `application` | String | Yes | The application to switch to. See available options below. |
**Available Applications:**
- `firefox` - Mozilla Firefox web browser
- `1password` - Password manager
- `thunderbird` - Email client
- `vscode` - Visual Studio Code editor
- `terminal` - Terminal/console application
- `desktop` - Switch to desktop
- `directory` - File manager/directory browser
**Example:**
```json
{
"action": "application",
"application": "firefox"
}
```
### write_file
Write a file to the desktop environment filesystem.
**Parameters:**
| Parameter | Type | Required | Description |
| --------- | ------ | -------- | -------------------------------------------------------------- |
| `path` | String | Yes | File path (absolute or relative to /home/user/Desktop) |
| `data` | String | Yes | Base64 encoded file content |
**Example:**
```json
{
"action": "write_file",
"path": "/home/user/documents/example.txt",
"data": "SGVsbG8gV29ybGQh"
}
```
### read_file
Read a file from the desktop environment filesystem.
**Parameters:**
| Parameter | Type | Required | Description |
| --------- | ------ | -------- | -------------------------------------------------------------- |
| `path` | String | Yes | File path (absolute or relative to /home/user/Desktop) |
**Example:**
```json
{
"action": "read_file",
"path": "/home/user/documents/example.txt"
}
```
## Response Format
The response format varies depending on the action performed.
### Standard Response
Most actions return a simple success response:
```json
{
"success": true
}
```
### Screenshot Response
```json
{
"success": true,
"data": {
"image": "base64_encoded_image_data"
}
}
```
### Cursor Position Response
```json
{
"success": true,
"data": {
"x": 123,
"y": 456
}
}
```
### Write File Response
```json
{
"success": true,
"message": "File written successfully to: /home/user/documents/example.txt"
}
```
### Read File Response
```json
{
"success": true,
"data": "SGVsbG8gV29ybGQh",
"name": "example.txt",
"size": 12,
"mediaType": "text/plain"
}
```
### Error Response
```json
{
"success": false,
"error": "Error message"
}
```
## Code Examples
### JavaScript/Node.js Example
```javascript
const axios = require('axios');
const bytebot = {
baseUrl: 'http://localhost:9990/computer-use/computer',
async action(params) {
try {
const response = await axios.post(this.baseUrl, params);
return response.data;
} catch (error) {
console.error('Error:', error.response?.data || error.message);
throw error;
}
},
// Convenience methods
async moveMouse(x, y) {
return this.action({
action: 'move_mouse',
coordinates: { x, y }
});
},
async clickMouse(x, y, button = 'left') {
return this.action({
action: 'click_mouse',
coordinates: { x, y },
button
});
},
async typeText(text) {
return this.action({
action: 'type_text',
text
});
},
async pasteText(text) {
return this.action({
action: 'paste_text',
text
});
},
async switchApplication(application) {
return this.action({
action: 'application',
application
});
},
async screenshot() {
return this.action({ action: 'screenshot' });
}
};
// Example usage:
async function example() {
// Switch to Firefox
await bytebot.switchApplication('firefox');
// Navigate to a website
await bytebot.moveMouse(100, 35);
await bytebot.clickMouse(100, 35);
await bytebot.typeText('https://example.com');
await bytebot.action({
action: 'press_keys',
keys: ['enter'],
press: 'down'
});
// Wait for page to load
await bytebot.action({
action: 'wait',
duration: 2000
});
// Paste some special characters
await bytebot.pasteText('© 2025 Example Corp™ - €100');
// Take a screenshot
const result = await bytebot.screenshot();
console.log('Screenshot taken!');
}
example().catch(console.error);
================================================
FILE: docs/api-reference/endpoint/create.mdx
================================================
---
title: 'Create Plant'
openapi: 'POST /plants'
---
================================================
FILE: docs/api-reference/endpoint/delete.mdx
================================================
---
title: 'Delete Plant'
openapi: 'DELETE /plants/{id}'
---
================================================
FILE: docs/api-reference/endpoint/get.mdx
================================================
---
title: 'Get Plants'
openapi: 'GET /plants'
---
================================================
FILE: docs/api-reference/endpoint/webhook.mdx
================================================
---
title: 'New Plant'
openapi: 'WEBHOOK /plant/webhook'
---
================================================
FILE: docs/api-reference/introduction.mdx
================================================
---
title: "API Reference"
description: "Overview of the Bytebot API endpoints for programmatic control"
---
# Bytebot API Overview
Bytebot provides two main APIs for programmatic control:
## 1. Agent API (Task Management)
The Agent API runs on port 9991 and provides high-level task management:
<CardGroup cols={2}>
<Card
title="Task Management"
icon="list-check"
href="/api-reference/agent/tasks"
>
Create, manage, and monitor AI-powered tasks programmatically
</Card>
<Card
title="UI Integration"
icon="window"
href="/api-reference/agent/ui"
>
WebSocket connections and real-time updates for custom UIs
</Card>
</CardGroup>
### Agent API Base URL
```
http://localhost:9991
```
### Example Task Creation
```bash
curl -X POST http://localhost:9991/tasks \
-H "Content-Type: application/json" \
-d '{
"description": "Download invoices from webmail and organize by date",
"priority": "HIGH"
}'
```
## 2. Desktop API (Direct Control)
The Desktop API runs on port 9990 and provides low-level desktop control:
<CardGroup cols={2}>
<Card
title="Computer Control"
icon="keyboard"
href="/api-reference/computer-use/unified-endpoint"
>
Direct control of mouse, keyboard, and screen capture
</Card>
<Card
title="Usage Examples"
icon="code"
href="/api-reference/computer-use/examples"
>
Code examples for common automation scenarios
</Card>
</CardGroup>
### Desktop API Base URL
```
http://localhost:9990
```
### Example Desktop Control
```bash
curl -X POST http://localhost:9990/computer-use \
-H "Content-Type: application/json" \
-d '{"action": "screenshot"}'
```
### MCP Support
The Desktop API also exposes an MCP (Model Context Protocol) endpoint:
```
http://localhost:9990/mcp
```
Connect your MCP client to access desktop control tools over SSE.
## Authentication
- **Local Access**: No authentication required by default
- **Remote Access**: Configure authentication based on your security requirements
- **Production**: Implement API keys, OAuth, or other authentication methods
## Response Formats
### Agent API Response
```json
{
"id": "task-123",
"status": "RUNNING",
"description": "Your task description",
"messages": [...],
"createdAt": "2024-01-01T00:00:00Z"
}
```
### Desktop API Response
```json
{
"success": true,
"data": { ... }, // Response data specific to the action
"error": null // Error message if success is false
}
```
## Error Handling
Both APIs use standard HTTP status codes:
| Status Code | Description |
| ----------- | ------------------------------------ |
| 200 | Success |
| 201 | Created (new resource) |
| 400 | Bad Request - Invalid parameters |
| 401 | Unauthorized - Authentication failed |
| 404 | Not Found - Resource doesn't exist |
| 500 | Internal Server Error |
## Rate Limiting
- **Agent API**: No hard limits, but consider task queue capacity
- **Desktop API**: No rate limiting, but rapid actions may impact desktop performance
## Best Practices
1. **Use Agent API for high-level automation** - Let the AI handle complexity
2. **Use Desktop API for precise control** - When you need exact actions
3. **Combine both APIs** - Create tasks via Agent API, monitor via Desktop API
4. **Handle errors gracefully** - Implement retry logic for transient failures
5. **Monitor resource usage** - Both APIs can be resource-intensive
## Next Steps
<CardGroup cols={2}>
<Card title="Quick Start" icon="rocket" href="/quickstart">
Get your APIs running
</Card>
<Card title="Task Examples" icon="code" href="/guides/task-creation">
See the APIs in action
</Card>
</CardGroup>
================================================
FILE: docs/api-reference/openapi.json
================================================
{
"openapi": "3.1.0",
"info": {
"title": "OpenAPI Plant Store",
"description": "A sample API that uses a plant store as an example to demonstrate features in the OpenAPI specification",
"license": {
"name": "MIT"
},
"version": "1.0.0"
},
"servers": [
{
"url": "http://sandbox.mintlify.com"
}
],
"security": [
{
"bearerAuth": []
}
],
"paths": {
"/plants": {
"get": {
"description": "Returns all plants from the system that the user has access to",
"parameters": [
{
"name": "limit",
"in": "query",
"description": "The maximum number of results to return",
"schema": {
"type": "integer",
"format": "int32"
}
}
],
"responses": {
"200": {
"description": "Plant response",
"content": {
"application/json": {
"schema": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Plant"
}
}
}
}
},
"400": {
"description": "Unexpected error",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Error"
}
}
}
}
}
},
"post": {
"description": "Creates a new plant in the store",
"requestBody": {
"description": "Plant to add to the store",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/NewPlant"
}
}
},
"required": true
},
"responses": {
"200": {
"description": "plant response",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Plant"
}
}
}
},
"400": {
"description": "unexpected error",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Error"
}
}
}
}
}
}
},
"/plants/{id}": {
"delete": {
"description": "Deletes a single plant based on the ID supplied",
"parameters": [
{
"name": "id",
"in": "path",
"description": "ID of plant to delete",
"required": true,
"schema": {
"type": "integer",
"format": "int64"
}
}
],
"responses": {
"204": {
"description": "Plant deleted",
"content": {}
},
"400": {
"description": "unexpected error",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Error"
}
}
}
}
}
}
}
},
"webhooks": {
"/plant/webhook": {
"post": {
"description": "Information about a new plant added to the store",
"requestBody": {
"description": "Plant added to the store",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/NewPlant"
}
}
}
},
"responses": {
"200": {
"description": "Return a 200 status to indicate that the data was received successfully"
}
}
}
}
},
"components": {
"schemas": {
"Plant": {
"required": [
"name"
],
"type": "object",
"properties": {
"name": {
"description": "The name of the plant",
"type": "string"
},
"tag": {
"description": "Tag to specify the type",
"type": "string"
}
}
},
"NewPlant": {
"allOf": [
{
"$ref": "#/components/schemas/Plant"
},
{
"required": [
"id"
],
"type": "object",
"properties": {
"id": {
"description": "Identification number of the plant",
"type": "integer",
"format": "int64"
}
}
}
]
},
"Error": {
"required": [
"error",
"message"
],
"type": "object",
"properties": {
"error": {
"type": "integer",
"format": "int32"
},
"message": {
"type": "string"
}
}
}
},
"securitySchemes": {
"bearerAuth": {
"type": "http",
"scheme": "bearer"
}
}
}
}
================================================
FILE: docs/core-concepts/agent-system.mdx
================================================
---
title: "Agent System"
description: "The AI brain that powers your self-hosted desktop automation"
---
## Overview
The Bytebot Agent System transforms a simple desktop container into an intelligent, autonomous computer user. By combining Claude AI with structured task management, it can understand natural language requests and execute complex workflows just like a human would.
<img
src="/images/agent-architecture.png"
alt="Bytebot Agent Architecture"
className="w-full max-w-4xl"
/>
## How the AI Agent Works
### The Brain: Multi-Model AI Integration
At the heart of Bytebot is a flexible AI integration that supports multiple models. Choose the AI that best fits your needs:
**Anthropic Claude** (Default):
- Best for complex reasoning and visual understanding
- Excellent at following detailed instructions
- Superior performance on desktop automation tasks
**OpenAI GPT Models**:
- Fast and reliable for general automation
- Strong code understanding and generation
- Cost-effective for routine tasks
**Google Gemini**:
- Efficient for high-volume tasks
- Good balance of speed and capability
- Excellent multilingual support
The agent with any model:
1. **Understands Context**: Processes your natural language requests with full conversation history
2. **Plans Actions**: Breaks down complex tasks into executable computer actions
3. **Adapts in Real-time**: Adjusts its approach based on what it sees on screen
4. **Learns from Feedback**: Improves task execution through conversation
### Conversation Flow
<Steps>
<Step title="You Describe a Task">
"Research competitors for my SaaS product and create a comparison table"
</Step>
<Step title="AI Plans the Approach">
The AI model understands the request and plans: open browser → search → visit sites → extract data → create document
</Step>
<Step title="Executes Actions">
The agent controls the desktop: clicking, typing, taking screenshots, reading content
</Step>
<Step title="Provides Updates">
Real-time status updates and asks for clarification when needed
</Step>
<Step title="Delivers Results">
Completes the task and provides the output (files, screenshots, summaries)
</Step>
</Steps>
## Task Management System
### Task Lifecycle
Tasks move through a structured lifecycle:
```mermaid
graph LR
A[Created] --> B[Queued]
B --> C[Running]
C --> D[Needs Help]
C --> E[Completed]
C --> F[Failed]
D --> C
```
### Task Properties
Each task contains:
- **Description**: What needs to be done
- **Priority**: Urgent, High, Medium, or Low
- **Status**: Current state in the lifecycle
- **Type**: Immediate or Scheduled
- **History**: All messages and actions taken
### Smart Task Processing
The agent processes tasks intelligently:
1. **Priority Queue**: Urgent tasks run first
2. **Error Recovery**: Automatically retries failed actions
3. **Human in the Loop**: Asks for help when stuck
4. **Context Preservation**: Maintains conversation history across sessions
## Real-world Capabilities
### What the Agent Can Do
<CardGroup cols={2}>
<Card title="Web Automation" icon="globe">
- Browse websites
- Fill out forms
- Extract data
- Download files
- Monitor changes
</Card>
<Card title="Document Work" icon="file">
- Create documents
- Edit spreadsheets
- Generate reports
- Organize files
- Convert formats
</Card>
<Card title="Email & Communication" icon="envelope">
- Access webmail through browser
- Read and extract information
- Fill contact forms
- Navigate communication portals
- Handle verification flows
</Card>
<Card title="Data Processing" icon="database">
- Extract from PDFs
- Process CSV files
- Create visualizations
- Generate summaries
- Transform data
</Card>
</CardGroup>
## Technical Architecture
### Core Components
1. **NestJS Agent Service**
- Integrates with multiple AI provider APIs (Anthropic, OpenAI, Google)
- Handles WebSocket connections
- Coordinates with desktop API
2. **Message System**
- Structured conversation format
- Supports text and images
- Maintains full context
- Enables rich interactions
3. **Database Schema**
```sql
Tasks: id, description, status, priority, timestamps
Messages: id, task_id, role, content, timestamps
Summaries: id, task_id, content, parent_id
```
4. **Computer Action Bridge**
- Translates AI decisions to desktop actions
- Handles screenshots and feedback
- Manages action timing
- Provides error handling
### API Endpoints
Key endpoints for programmatic control:
```typescript
// Create a new task
POST /tasks
{
"description": "Your task description",
"priority": "HIGH",
"type": "IMMEDIATE"
}
// Get task status
GET /tasks/:id
// Send a message
POST /tasks/:id/messages
{
"content": "Additional instructions"
}
// Get task history
GET /tasks/:id/messages
```
## Chat UI Features
The web interface provides:
### Real-time Interaction
- Live chat with the AI agent
- Instant status updates
- Progress indicators
- Error notifications
### Visual Feedback
- Embedded desktop viewer
- Screenshot history
- Action replay
- Task timeline
### Task Management
- Create and prioritize tasks
- View active and completed tasks
- Export conversation logs
- Manage task queues
## Security & Privacy
### Data Isolation
- All processing happens in your infrastructure
- No data sent to external services (except your chosen AI provider API)
- Conversations stored locally
- Complete audit trail
### Access Control
- Configurable authentication
- API key management
- Network isolation options
## Extending the Agent
### Integration Points
- External API calls via the Agent API
- Custom AI prompts for specialized workflows
- MCP protocol support for tool integration
### Best Practices
1. **Clear Instructions**: Be specific about desired outcomes
2. **Break Down Complex Tasks**: Use multiple smaller tasks for better results
3. **Provide Context**: Include relevant files or URLs
4. **Monitor Progress**: Watch the desktop view for real-time feedback
5. **Review Results**: Verify outputs meet requirements
## Troubleshooting
<AccordionGroup>
<Accordion title="Agent not responding">
- Check your AI provider API key is valid
- Verify agent service is running
- Review logs for errors
- Ensure sufficient API credits/quota with your provider
</Accordion>
<Accordion title="Slow task execution">
- Monitor system resources
- Check network latency
- Reduce screenshot frequency
- Optimize AI prompts for your chosen model
- Consider switching to a faster model (e.g., Gemini Flash)
</Accordion>
</AccordionGroup>
## Next Steps
<CardGroup cols={2}>
<Card title="Quick Start" icon="rocket" href="/quickstart">
Get your agent running
</Card>
<Card title="API Reference" icon="code" href="/api-reference/agent/tasks">
Integrate with your apps
</Card>
<Card title="Use Cases" icon="lightbulb" href="#example-use-cases">
See what's possible
</Card>
<Card title="Best Practices" icon="star" href="#best-practices">
Optimize your workflows
</Card>
</CardGroup>
================================================
FILE: docs/core-concepts/architecture.mdx
================================================
---
title: "Architecture"
description: "How Bytebot's desktop agent works under the hood"
---
## Overview
Bytebot is a self-hosted AI desktop agent built with a modular architecture. It combines a Linux desktop environment with AI to create an autonomous computer user that can perform tasks through natural language instructions.
<img
src="/images/agent-architecture.png"
alt="Bytebot Architecture Diagram"
className="w-full max-w-4xl"
/>
## System Architecture
The system consists of four main components that work together:
### 1. Bytebot Desktop Container
The foundation of the system - a virtual Linux desktop that provides:
- **Ubuntu 22.04 LTS** base for stability and compatibility
- **XFCE4 Desktop** for a lightweight, responsive UI
- **bytebotd Daemon** - The automation service built on nutjs that executes computer actions
- **Pre-installed Applications**: Firefox ESR, Thunderbird, text editors, and development tools
- **noVNC** for remote desktop access
**Key Features:**
- Runs completely isolated from your host system
- Consistent environment across different platforms
- Can be customized with additional software
- Accessible via REST API on port 9990
- MCP SSE endpoint available at `/mcp`
- Uses shared types from `@bytebot/shared` package
### 2. AI Agent Service
The brain of the system - orchestrates tasks using an LLM:
- **NestJS Framework** for robust, scalable backend
- **LLM Integration** supporting Anthropic Claude, OpenAI GPT, and Google Gemini models
- **WebSocket Support** for real-time updates
- **Computer Use API Client** to control the desktop
- **Prisma ORM** for database operations
- **Tool definitions** for computer actions (mouse, keyboard, screenshots)
**Responsibilities:**
- Interprets natural language requests
- Plans sequences of computer actions
- Manages task state and progress
- Handles errors and retries
- Provides real-time task updates via WebSocket
### 3. Web Task Interface
The user interface for interacting with your AI agent:
- **Next.js 15 Application** with TypeScript for type safety
- **Embedded VNC Viewer** to watch the desktop in action
- **Task Management** UI with status badges
- **WebSocket Connections** for live updates
- **Reusable components** for consistent UI
- **API utilities** for streamlined server communication
**Features:**
- Task creation and management interface
- Desktop tab for direct manual control
- Real-time desktop viewer with takeover mode
- Task history and status tracking
- Responsive design for all devices
### 4. PostgreSQL Database
Persistent storage for the agent system:
- **Tasks Table**: Stores task details, status, and metadata
- **Messages Table**: Stores AI conversation history
- **Prisma ORM** for type-safe database access
## Data Flow
### Task Execution Flow
<Steps>
<Step title="User Input">
User describes a task in natural language via the chat UI
</Step>
<Step title="Task Creation">
Agent service creates a task record and adds it to the processing queue
</Step>
<Step title="AI Planning">
The LLM analyzes the task and generates a plan of computer actions
</Step>
<Step title="Action Execution">
Agent sends computer actions to bytebotd via REST API or MCP
</Step>
<Step title="Desktop Automation">
bytebotd executes actions (mouse, keyboard, screenshots) on the desktop
</Step>
<Step title="Result Processing">
Agent receives results, updates task status, and continues or completes
</Step>
<Step title="User Feedback">
Results and status updates are sent back to the user in real-time
</Step>
</Steps>
### Communication Protocols
```mermaid
graph LR
A[Tasks UI] -->|WebSocket| B[Agent Service]
A -->|HTTP Proxy| C[Desktop VNC]
B -->|REST/MCP| D[Desktop API]
B -->|SQL| E[PostgreSQL]
B -->|HTTPS| F[LLM Provider]
D -->|IPC| G[bytebotd]
```
## Security Architecture
### Isolation Layers
1. **Container Isolation**
- Each desktop runs in its own Docker container
- No access to host filesystem by default
- Network isolation with explicit port mapping
2. **Process Isolation**
- bytebotd runs as non-root user
- Separate processes for different services
- Resource limits enforced by Docker
3. **Network Security**
- Services only accessible from localhost by default
- Can be configured with authentication
- HTTPS/WSS for external connections
### API Security
- **Desktop API**: No authentication by default (localhost only). Supports REST and MCP.
- **Agent API**: Can be secured with API keys
- **Database**: Password protected, not exposed externally
<Warning>
Default configuration is for development. For production:
- Enable authentication on all APIs
- Use HTTPS/WSS for all connections
- Implement network policies
- Rotate credentials regularly
</Warning>
## Deployment Patterns
### Single User (Development)
```yaml
Services: All on one machine
Scale: 1 instance each
Use Case: Personal automation, development
Resources: 4GB RAM, 2 CPU cores
```
### Production Deployment
```yaml
Services: All services on dedicated hardware
Scale: Single instance (1 agent, 1 desktop)
Use Case: Business automation
Resources: 8GB+ RAM, 4+ CPU cores
```
### Enterprise Deployment
```yaml
Services: Kubernetes orchestration
Scale: Single instance with high availability
Use Case: Organization-wide automation
Resources: Dedicated nodes
```
## Extension Points
### Custom Tools
Add specialized software to the desktop:
```dockerfile
FROM bytebot/desktop:latest
RUN apt-get update && apt-get install -y \
your-custom-tools
```
### AI Integrations
Extend agent capabilities:
- Custom tools for the LLM
- Additional AI models
- Specialized prompts
- Domain-specific knowledge
## Performance Considerations
### Resource Usage
- **Desktop Container**: ~1GB RAM idle, 2GB+ active
- **Agent Service**: ~256MB RAM
- **UI Service**: ~128MB RAM
- **Database**: ~256MB RAM
### Optimization Tips
1. Allocate sufficient resources to containers
2. Limit concurrent tasks to prevent overload
3. Monitor resource usage regularly
4. Use LiteLLM proxy for provider flexibility
## Next Steps
<CardGroup cols={2}>
<Card title="Agent System" icon="robot" href="/core-concepts/agent-system">
Learn about the AI agent capabilities
</Card>
<Card title="Desktop Environment" icon="desktop" href="/core-concepts/desktop-environment">
Explore the virtual desktop environment
</Card>
<Card title="API Reference" icon="code" href="/api-reference/introduction">
Integrate with your applications
</Card>
<Card title="Deployment Guide" icon="rocket" href="/quickstart">
Deploy your own instance
</Card>
</CardGroup>
================================================
FILE: docs/core-concepts/desktop-environment.mdx
================================================
---
title: "Desktop Environment"
description: "The virtual Linux desktop where Bytebot performs tasks"
---
## Overview
The Bytebot Desktop Environment (also called Bytebot Core) is a complete Linux desktop that runs in a Docker container. This is where Bytebot does its work - clicking buttons, typing text, browsing websites, and using applications just like you would.
<img
src="/images/core-container.png"
alt="Bytebot Desktop Environment"
className="w-full max-w-4xl"
/>
## Why a Virtual Desktop?
### Complete Isolation
- **No Risk to Host**: All actions happen inside the container
- **Sandboxed Environment**: Desktop can't access your host system
- **Easy Reset**: Destroy and recreate in seconds
- **Clean Workspace**: Each restart provides a fresh environment
### Consistency Everywhere
- **Platform Independent**: Same environment on Mac, Windows, or Linux
- **Reproducible**: Identical setup every time
- **Version Control**: Pin specific versions for stability
- **No Dependencies**: Everything included in the container
### Built for Automation
- **Predictable UI**: Consistent element positioning
- **Clean Environment**: No popups or distractions
- **Automation-Ready**: Optimized for programmatic control
- **Fast Startup**: Desktop ready in seconds
## Technical Stack
### Base System
- **Ubuntu 22.04 LTS**: Stable, well-supported Linux distribution
- **XFCE4 Desktop**: Lightweight, responsive desktop environment
- **X11 Display Server**: Standard Linux graphics system
- **supervisord**: Service management
### Pre-installed Software
<CardGroup cols={2}>
<Card title="Web Browser" icon="globe">
- Firefox ESR (Extended Support Release)
- Pre-configured for automation
- Clean profile without distractions
</Card>
<Card title="Productivity Tools" icon="file-lines">
- Text editor
- Office tools
- PDF viewer
- File manager
</Card>
<Card title="Communication" icon="envelope">
- Thunderbird email client
- Terminal emulator
</Card>
<Card title="Security & Development" icon="shield">
- 1Password password manager
- Visual Studio Code (VSCode)
- Git version control
- Python 3 environment
</Card>
</CardGroup>
### Core Services
1. **bytebotd Daemon**
- Runs on port 9990
- Handles all automation requests
- Built on nutjs framework
- Provides REST API
2. **noVNC Web Client**
- Browser-based desktop access
- No client installation needed
- WebSocket proxy included
3. **Supervisor**
- Process management
- Service monitoring
- Automatic restarts
- Log management
## Desktop Features
### Display Configuration
```bash
# Resolution
1920x1080 @ 24-bit color
```
### User Environment
- **Username**: `user`
- **Home Directory**: `/home/user`
- **Sudo Access**: Yes (passwordless)
- **Desktop Session**: Auto-login enabled
### File System
```
/home/user/
├── Desktop/ # Desktop shortcuts
├── Documents/ # User documents
├── Downloads/ # Browser downloads
├── .config/ # Application configs
└── .local/ # User data
```
## Accessing the Desktop
### Web Browser (Recommended)
Navigate to `http://localhost:9990/vnc` for instant access:
- No software installation required
- Works on any device with a browser
- Supports touch devices
- Clipboard sharing
### MCP Control
The core container also exposes an [MCP](https://github.com/rekog-labs/MCP-Nest) endpoint.
Connect your MCP client to `http://localhost:9990/mcp` to invoke these tools over SSE.
```json
{
"mcpServers": {
"bytebot": {
"command": "npx",
"args": [
"mcp-remote",
"http://127.0.0.1:9990/mcp",
"--transport",
"http-first"
]
}
}
}
```
### Direct API Control
Most efficient for automation:
```bash
# Take a screenshot
curl -X POST http://localhost:9990/computer-use \
-H "Content-Type: application/json" \
-d '{"action": "screenshot"}'
# Move mouse
curl -X POST http://localhost:9990/computer-use \
-H "Content-Type: application/json" \
-d '{"action": "move_mouse", "coordinate": {"x": 500, "y": 300}}'
```
## Customization
### Adding Software
Create a custom Dockerfile:
```dockerfile
FROM ghcr.io/bytebot-ai/bytebot-desktop:edge
# Install additional packages
RUN apt-get update && apt-get install -y \
slack-desktop \
zoom \
your-custom-app
# Copy configuration files
COPY configs/ /home/user/.config/
```
## Performance Optimization
### Resource Allocation
```yaml
# Recommended settings
deploy:
resources:
limits:
cpus: '2'
memory: 4G
reservations:
cpus: '1'
memory: 2G
```
## Security Hardening
<Warning>
Default configuration prioritizes ease of use. For production, apply these security measures:
</Warning>
### Essential Security Steps
1. **Change Default Passwords**
```bash
# Set user password
passwd bytebot
```
2. **Limit Network Access**
```yaml
# Whitelist specific domains
environment:
- ALLOWED_DOMAINS=company.com,trusted-site.com
# Or restrict to local network only
ports:
- "10.0.0.0/8:9990:9990"
```
## Troubleshooting
<AccordionGroup>
<Accordion title="Desktop won't start">
Check logs:
```bash
docker logs bytebot-desktop
```
Common issues:
- Insufficient memory
- Port conflicts
- Display server errors
</Accordion>
<Accordion title="Applications crash">
Monitor resources:
```bash
docker stats bytebot-desktop
```
Solutions:
- Increase memory allocation
- Check disk space
- Update container image
</Accordion>
</AccordionGroup>
## Best Practices
1. **Regular Updates**: Keep the base image updated for security patches
2. **Persistent Storage**: Mount volumes for important data
3. **Backup Configurations**: Save customizations outside the container
4. **Monitor Resources**: Track CPU/memory usage
5. **Clean Temporary Files**: Periodic cleanup for performance
## Next Steps
<CardGroup cols={2}>
<Card title="Quick Start" icon="rocket" href="/quickstart">
Deploy your first agent
</Card>
<Card title="API Reference" icon="code" href="/api-reference/computer-use/unified-endpoint">
Control the desktop programmatically
</Card>
<Card title="Agent System" icon="robot" href="/core-concepts/agent-system">
Add AI capabilities
</Card>
<Card title="Password Management" icon="key" href="/guides/password-management">
Set up authentication
</Card>
</CardGroup>
================================================
FILE: docs/core-concepts/rpa-comparison.mdx
================================================
---
title: "Bytebot vs Traditional RPA"
description: "How Bytebot revolutionizes enterprise automation beyond traditional RPA tools"
---
# The Next Generation of Enterprise Automation
Bytebot represents a fundamental shift in how businesses approach process automation. While traditional RPA tools like UiPath, Automation Anywhere, and Blue Prism require extensive scripting and brittle workflows, Bytebot leverages AI to understand and execute tasks like a human would.
## Traditional RPA Limitations
<CardGroup cols={2}>
<Card title="Brittle Selectors" icon="xmark">
Traditional RPA breaks when UI elements change even slightly
</Card>
<Card title="Complex Development" icon="code">
Requires specialized developers and lengthy implementation cycles
</Card>
<Card title="High Maintenance" icon="wrench">
Constant updates needed as applications evolve
</Card>
<Card title="Limited Adaptability" icon="robot">
Can't handle unexpected scenarios or variations
</Card>
</CardGroup>
## How Bytebot is Different
### Visual Intelligence vs Element Mapping
**Traditional RPA:**
```xml
<!-- Brittle selector that breaks with any UI change -->
<Click>
<Selector>
<webctrl id='submit-btn-2947'
class='btn-primary-new'
idx='3'/>
</Selector>
</Click>
```
**Bytebot:**
```
"Click the blue Submit button at the bottom of the form"
```
Bytebot understands interfaces visually, just like a human. It doesn't rely on fragile technical selectors that break with every update.
### Natural Language vs Complex Scripting
**Traditional RPA Workflow:**
- Design in Studio
- Map every element
- Script error handling
- Test extensively
- Deploy with fingers crossed
- Fix when it breaks (often)
**Bytebot Workflow:**
- Describe what you need
- Bytebot figures it out
- Handles errors intelligently
- Adapts to changes automatically
## Real-World Enterprise Examples
### Financial Services Automation
<Tabs>
<Tab title="Traditional RPA">
```csharp
// 500+ lines of code to handle one banking portal
var loginPage = new LoginPageObject();
loginPage.WaitForElement("username", 30);
loginPage.EnterText("username", credentials.User);
loginPage.EnterText("password", credentials.Pass);
// Handle 2FA with complex conditional logic
if (loginPage.Has2FAPrompt()) {
var method = loginPage.Get2FAMethod();
switch(method) {
case "SMS":
// 50 more lines of code
case "Email":
// 50 more lines of code
case "Authenticator":
// 50 more lines of code
}
}
// Download statements with exact selectors
navigation.ClickElement("xpath://div[@id='acct-menu']");
navigation.ClickElement("xpath://a[contains(@href,'statements')]");
// ... continues for hundreds more lines
```
</Tab>
<Tab title="Bytebot">
```
Task: "Log into Chase banking portal, navigate to statements,
download all statements from last month for account ending in 4521,
and save them to Finance/BankStatements/Chase/"
That's it. Bytebot handles everything - including 2FA - automatically.
```
</Tab>
</Tabs>
### Multi-System Integration
A FinTech company needed to automate operators who:
1. Log into multiple banking portals with 2FA
2. Download transaction files
3. Run proprietary scripts on those files
4. Upload results to internal systems
**Traditional RPA Challenge:**
- 6 months to implement
- Breaks monthly with UI changes
- Requires dedicated maintenance team
- Can't handle new banks without development
- Complex 2FA handling logic for each bank
**Bytebot Solution:**
- Deployed in 1 week
- Adapts to UI changes automatically
- 2FA handled automatically via password manager
- New banks added with simple instructions
- Zero manual intervention required
## Performance Comparison
| Metric | Traditional RPA | Bytebot |
|--------|----------------|---------|
| **Implementation Time** | 3-6 months | 1-2 weeks |
| **Developer Requirement** | RPA specialists | Any technical user |
| **Maintenance Effort** | 40% of dev time | Near zero |
| **Handling UI Changes** | Breaks immediately | Adapts automatically |
| **Error Recovery** | Pre-scripted only | Intelligent adaptation |
| **New Process Addition** | Weeks of development | Minutes to describe |
| **Cost** | $100k+ annually | Self-hosted on your infrastructure |
## Common RPA Migration Patterns
### 1. Invoice Processing
**Before (UiPath):**
- 2000+ lines of workflow XML
- Breaks when vendor portal updates
- Requires exact folder structures
- Failed on unexpected popups
**After (Bytebot):**
- One paragraph description
- Handles portal changes
- Asks for help when needed
- Processes variations intelligently
### 2. Compliance Reporting
**Before (Automation Anywhere):**
- Complex bot orchestration
- Separate bots per system
- Rigid scheduling
- No flexibility
**After (Bytebot):**
- Single unified workflow
- Natural language instructions
- Dynamic adaptation
- Human collaboration when needed
### 3. Data Migration
**Before (Blue Prism):**
- Massive process definitions
- Exact field mapping required
- Breaks on data variations
- Limited error handling
**After (Bytebot):**
- Describe the mapping rules
- Handles variations intelligently
- Asks for clarification
- Visual validation included
## Integration with Existing RPA
Bytebot can work alongside existing RPA investments:
```mermaid
graph LR
A[Legacy RPA] -->|Handles stable processes| B[Structured Systems]
C[Bytebot] -->|Handles complex/changing processes| D[Dynamic Systems]
C -->|Takes over when RPA fails| A
E[Human Operator] -->|Guides via takeover mode| C
```
## Enterprise Architecture
### Deployment Options
<CardGroup cols={2}>
<Card title="On-Premise" icon="server">
Deploy in your data center for maximum security and compliance
</Card>
<Card title="Private Cloud" icon="cloud">
Use your AWS/Azure/GCP infrastructure with full control
</Card>
<Card title="Hybrid" icon="arrows-split-up-and-left">
Process sensitive data locally, leverage cloud for scaling
</Card>
<Card title="Air-Gapped" icon="shield">
Completely isolated deployment for classified environments
</Card>
</CardGroup>
### Security & Compliance
- **Data Sovereignty**: All processing on your infrastructure
- **Audit Trails**: Complete logs of every action
- **Access Control**: Integrate with your IAM/SSO
- **Compliance**: SOC2, HIPAA, PCI-DSS compatible deployments
## Getting Started with Migration
<Steps>
<Step title="Identify Processes">
List your current RPA workflows, especially:
- Those that break frequently
- Require regular maintenance
- Handle multiple systems
- Need human decision points
</Step>
<Step title="Start Small">
Pick one problematic workflow:
- Document the business process
- Deploy Bytebot
- Describe the task naturally
- Compare results
</Step>
<Step title="Expand Gradually">
As confidence grows:
- Migrate more complex processes
- Retire brittle RPA bots
- Reduce maintenance overhead
- Scale across departments
</Step>
</Steps>
## Next Steps
<CardGroup cols={2}>
<Card title="Quick Start" icon="rocket" href="/quickstart">
Deploy Bytebot in your environment
</Card>
<Card title="GitHub" icon="github" href="https://github.com/bytebot-ai/bytebot">
View source code and contribute
</Card>
<Card title="Community" icon="users" href="https://discord.gg/zcb5wA2t4u">
Join our Discord for support
</Card>
<Card title="Enterprise Support" icon="users" href="https://discord.gg/zcb5wA2t4u">
Get help with enterprise deployments
</Card>
</CardGroup>
<Note>
**Ready to move beyond traditional RPA?** Bytebot brings human-like intelligence to process automation, eliminating the brittleness and complexity of traditional tools while delivering enterprise-grade reliability and security.
</Note>
================================================
FILE: docs/deployment/helm.mdx
================================================
---
title: "Helm Deployment"
description: "Deploy Bytebot on Kubernetes using Helm charts"
---
# Deploy Bytebot on Kubernetes with Helm
Helm provides a simple way to deploy Bytebot on Kubernetes clusters.
## Prerequisites
- Kubernetes cluster (1.19+)
- Helm 3.x installed
- kubectl configured
- 8GB+ available memory in cluster
## Quick Start
<Steps>
<Step title="Clone Repository">
```bash
git clone https://github.com/bytebot-ai/bytebot.git
cd bytebot
```
</Step>
<Step title="Configure API Keys">
Create a `values.yaml` file with at least one API key:
```yaml
bytebot-agent:
apiKeys:
anthropic:
value: "sk-ant-your-key-here"
# Optional: Add more providers
# openai:
# value: "sk-your-key-here"
# gemini:
# value: "your-key-here"
```
</Step>
<Step title="Install Bytebot">
```bash
helm install bytebot ./helm \
--namespace bytebot \
--create-namespace \
-f values.yaml
```
</Step>
<Step title="Access Bytebot">
```bash
# Port-forward for local access
kubectl port-forward -n bytebot svc/bytebot-ui 9992:9992
# Access at http://localhost:9992
```
</Step>
</Steps>
## Basic Configuration
### API Keys
Configure at least one AI provider:
```yaml
bytebot-agent:
apiKeys:
anthropic:
value: "sk-ant-your-key-here"
openai:
value: "sk-your-key-here"
gemini:
value: "your-key-here"
```
### Resource Limits (Optional)
Adjust resources based on your needs:
```yaml
# Desktop container (where automation runs)
desktop:
resources:
requests:
memory: "2Gi"
cpu: "1"
limits:
memory: "4Gi"
cpu: "2"
# Agent (AI orchestration)
agent:
resources:
requests:
memory: "1Gi"
cpu: "500m"
```
### External Access (Optional)
Enable ingress for domain-based access:
```yaml
ui:
ingress:
enabled: true
hostname: bytebot.your-domain.com
tls: true
```
## Accessing Bytebot
### Local Access (Recommended)
```bash
kubectl port-forward -n bytebot svc/bytebot-ui 9992:9992
```
Access at: http://localhost:9992
### External Access
If you configured ingress:
- Access at: https://bytebot.your-domain.com
## Verifying Deployment
Check that all pods are running:
```bash
kubectl get pods -n bytebot
```
Expected output:
```
NAME READY STATUS RESTARTS AGE
bytebot-agent-xxxxx 1/1 Running 0 2m
bytebot-desktop-xxxxx 1/1 Running 0 2m
bytebot-postgresql-0 1/1 Running 0 2m
bytebot-ui-xxxxx 1/1 Running 0 2m
```
## Troubleshooting
### Pods Not Starting
Check pod status:
```bash
kubectl describe pod -n bytebot <pod-name>
```
Common issues:
- Insufficient memory/CPU: Check node resources with `kubectl top nodes`
- Missing API keys: Verify your values.yaml configuration
### Connection Issues
Test service connectivity:
```bash
kubectl logs -n bytebot deployment/bytebot-agent
```
### View Logs
```bash
# All logs
kubectl logs -n bytebot -l app=bytebot --tail=100
# Specific component
kubectl logs -n bytebot deployment/bytebot-agent
```
## Upgrading
```bash
# Update your values.yaml as needed, then:
helm upgrade bytebot ./helm -n bytebot -f values.yaml
```
## Uninstalling
```bash
# Remove Bytebot
helm uninstall bytebot -n bytebot
# Clean up namespace
kubectl delete namespace bytebot
```
## Advanced Configuration
<AccordionGroup>
<Accordion title="Using External Secrets">
If using Kubernetes secret management (Vault, Sealed Secrets, etc.):
```yaml
bytebot-agent:
apiKeys:
anthropic:
useExisting: true
secretName: "my-api-keys"
secretKey: "anthropic-key"
```
Create the secret manually:
```bash
kubectl create secret generic my-api-keys \
--namespace bytebot \
--from-literal=anthropic-key="sk-ant-your-key"
```
</Accordion>
<Accordion title="LiteLLM Proxy Mode">
For centralized LLM management, use the included LiteLLM proxy:
```bash
helm install bytebot ./helm \
-f values-proxy.yaml \
--namespace bytebot \
--create-namespace \
--set bytebot-llm-proxy.env.ANTHROPIC_API_KEY="your-key"
```
This provides:
- Centralized API key management
- Request routing and load balancing
- Rate limiting and retry logic
</Accordion>
<Accordion title="Custom Storage">
Configure persistent storage:
```yaml
desktop:
persistence:
enabled: true
size: "20Gi"
storageClass: "fast-ssd"
postgresql:
persistence:
size: "20Gi"
storageClass: "fast-ssd"
```
</Accordion>
<Accordion title="Production Security">
```yaml
# Network policies
networkPolicy:
enabled: true
# Pod security
podSecurityContext:
runAsNonRoot: true
runAsUser: 1000
fsGroup: 1000
# Enable authentication
auth:
enabled: true
type: "basic"
username: "admin"
password: "changeme" # Use secrets in production!
```
</Accordion>
</AccordionGroup>
## Next Steps
<CardGroup cols={2}>
<Card title="API Reference" icon="code" href="/api-reference/introduction">
Integrate Bytebot with your applications
</Card>
<Card title="LiteLLM Integration" icon="plug" href="/deployment/litellm">
Use any LLM provider with Bytebot
</Card>
</CardGroup>
<Note>
**Need help?** Join our [Discord community](https://discord.com/invite/d9ewZkWPTP) or check our [GitHub discussions](https://github.com/bytebot-ai/bytebot/discussions).
</Note>
================================================
FILE: docs/deployment/litellm.mdx
================================================
---
title: "LiteLLM Integration"
description: "Use any LLM provider with Bytebot through LiteLLM proxy"
---
# Connect Any LLM to Bytebot with LiteLLM
LiteLLM acts as a unified proxy that lets you use 100+ LLM providers with Bytebot - including Azure OpenAI, AWS Bedrock, Anthropic, Hugging Face, Ollama, and more. This guide shows you how to set up LiteLLM with Bytebot.
## Why Use LiteLLM?
<CardGroup cols={2}>
<Card title="100+ LLM Providers" icon="plug">
Use Azure, AWS, GCP, Anthropic, OpenAI, Cohere, and local models
</Card>
<Card title="Cost Tracking" icon="dollar-sign">
Monitor spending across all providers in one place
</Card>
<Card title="Load Balancing" icon="scale-balanced">
Distribute requests across multiple models and providers
</Card>
<Card title="Fallback Models" icon="shield">
Automatic failover when primary models are unavailable
</Card>
</CardGroup>
## Quick Start with Bytebot's Built-in LiteLLM Proxy
Bytebot includes a pre-configured LiteLLM proxy service that makes it easy to use any LLM provider. Here's how to set it up:
<Steps>
<Step title="Use Docker Compose with Proxy">
The easiest way is to use the proxy-enabled Docker Compose file:
```bash
# Clone Bytebot
git clone https://github.com/bytebot-ai/bytebot.git
cd bytebot
# Set up your API keys in docker/.env
cat > docker/.env << EOF
# Add any combination of these keys
ANTHROPIC_API_KEY=sk-ant-your-key-here
OPENAI_API_KEY=sk-your-key-here
GEMINI_API_KEY=your-key-here
EOF
# Start Bytebot with LiteLLM proxy
docker-compose -f docker/docker-compose.proxy.yml up -d
```
This automatically:
- Starts the `bytebot-llm-proxy` service on port 4000
- Configures the agent to use the proxy via `BYTEBOT_LLM_PROXY_URL`
- Makes all configured models available through the proxy
</Step>
<Step title="Customize Model Configuration">
To add custom models or providers, edit the LiteLLM config:
```yaml
# packages/bytebot-llm-proxy/litellm-config.yaml
model_list:
# Add Azure OpenAI
- model_name: azure-gpt-4o
litellm_params:
model: azure/gpt-4o-deployment
api_base: https://your-resource.openai.azure.com/
api_key: os.environ/AZURE_API_KEY
api_version: "2024-02-15-preview"
# Add AWS Bedrock
- model_name: claude-bedrock
litellm_params:
model: bedrock/anthropic.claude-3-5-sonnet
aws_region_name: us-east-1
# Add local models via Ollama
- model_name: local-llama
litellm_params:
model: ollama/llama3:70b
api_base: http://host.docker.internal:11434
```
Then rebuild:
```bash
docker-compose -f docker/docker-compose.proxy.yml up -d --build
```
</Step>
<Step title="Verify Models are Available">
The Bytebot agent automatically queries the proxy for available models:
```bash
# Check available models through Bytebot API
curl http://localhost:9991/tasks/models
# Or directly from LiteLLM proxy
curl http://localhost:4000/model/info
```
The UI will show all available models in the model selector.
</Step>
</Steps>
## How It Works
### Architecture
```mermaid
graph LR
A[Bytebot UI] -->|Select Model| B[Bytebot Agent]
B -->|BYTEBOT_LLM_PROXY_URL| C[LiteLLM Proxy :4000]
C -->|Route Request| D[Anthropic API]
C -->|Route Request| E[OpenAI API]
C -->|Route Request| F[Google API]
C -->|Route Request| G[Any Provider]
```
### Key Components
1. **bytebot-llm-proxy Service**: A LiteLLM instance running in Docker that:
- Runs on port 4000 within the Bytebot network
- Uses the config from `packages/bytebot-llm-proxy/litellm-config.yaml`
- Inherits API keys from environment variables
2. **Agent Integration**: The Bytebot agent:
- Checks for `BYTEBOT_LLM_PROXY_URL` environment variable
- If set, queries the proxy at `/model/info` for available models
- Routes all LLM requests through the proxy
3. **Pre-configured Models**: Out of the box support for:
- Anthropic: Claude Opus 4, Claude Sonnet 4
- OpenAI: GPT-4.1, GPT-4o
- Google: Gemini 2.5 Pro, Gemini 2.5 Flash
## Provider Configurations
### Azure OpenAI
```yaml
model_list:
- model_name: azure-gpt-4o
litellm_params:
model: azure/gpt-4o-deployment-name
api_base: https://your-resource.openai.azure.com/
api_key: your-azure-key
api_version: "2024-02-15-preview"
- model_name: azure-gpt-4o-vision
litellm_params:
model: azure/gpt-4o-deployment-name
api_base: https://your-resource.openai.azure.com/
api_key: your-azure-key
api_version: "2024-02-15-preview"
supports_vision: true
```
### AWS Bedrock
```yaml
model_list:
- model_name: claude-bedrock
litellm_params:
model: bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0
aws_region_name: us-east-1
# Uses AWS credentials from environment
- model_name: llama-bedrock
litellm_params:
model: bedrock/meta.llama3-70b-instruct-v1:0
aws_region_name: us-east-1
```
### Google Vertex AI
```yaml
model_list:
- model_name: gemini-vertex
litellm_params:
model: vertex_ai/gemini-1.5-pro
vertex_project: your-gcp-project
vertex_location: us-central1
# Uses GCP credentials from environment
```
### Local Models (Ollama)
```yaml
model_list:
- model_name: local-llama
litellm_params:
model: ollama/llama3:70b
api_base: http://ollama:11434
- model_name: local-mixtral
litellm_params:
model: ollama/mixtral:8x7b
api_base: http://ollama:11434
```
### Hugging Face
```yaml
model_list:
- model_name: hf-llama
litellm_params:
model: huggingface/meta-llama/Llama-3-70b-chat-hf
api_key: hf_your_token
```
## Advanced Features
### Load Balancing
Distribute requests across multiple providers:
```yaml
model_list:
- model_name: gpt-4o
litellm_params:
model: gpt-4o
api_key: sk-openai-key
- model_name: gpt-4o # Same name for load balancing
litellm_params:
model: azure/gpt-4o
api_base: https://azure.openai.azure.com/
api_key: azure-key
router_settings:
routing_strategy: "least-busy" # or "round-robin", "latency-based"
```
### Fallback Models
Configure automatic failover:
```yaml
model_list:
- model_name: primary-model
litellm_params:
model: claude-3-5-sonnet-20241022
api_key: sk-ant-key
- model_name: fallback-model
litellm_params:
model: gpt-4o
api_key: sk-openai-key
router_settings:
model_group_alias:
"smart-model": ["primary-model", "fallback-model"]
# Use "smart-model" in Bytebot config
```
### Cost Controls
Set spending limits and track usage:
```yaml
general_settings:
master_key: sk-litellm-master
database_url: "postgresql://user:pass@localhost:5432/litellm"
# Budget limits
max_budget: 100 # $100 monthly limit
budget_duration: "30d"
# Per-model limits
model_max_budget:
gpt-4o: 50
claude-3-5-sonnet: 50
litellm_settings:
callbacks: ["langfuse"] # For detailed tracking
```
### Rate Limiting
Prevent API overuse:
```yaml
model_list:
- model_name: rate-limited-gpt
litellm_params:
model: gpt-4o
api_key: sk-key
rpm: 100 # Requests per minute
tpm: 100000 # Tokens per minute
```
## Alternative Setup: External LiteLLM Proxy
If you prefer to run LiteLLM separately or have an existing LiteLLM deployment:
### Option 1: Modify docker-compose.yml
```yaml
# docker-compose.yml (without built-in proxy)
services:
bytebot-agent:
environment:
# Point to your external LiteLLM instance
- BYTEBOT_LLM_PROXY_URL=http://your-litellm-server:4000
# ... rest of config
```
### Option 2: Use Environment Variable
```bash
# Set the proxy URL before starting
export BYTEBOT_LLM_PROXY_URL=http://your-litellm-server:4000
# Start normally
docker-compose -f docker/docker-compose.yml up -d
```
### Option 3: Run Standalone LiteLLM
```bash
# Run your own LiteLLM instance
docker run -d \
--name litellm-external \
-p 4000:4000 \
-v $(pwd)/custom-config.yaml:/app/config.yaml \
-e ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY \
ghcr.io/berriai/litellm:main \
--config /app/config.yaml
# Then start Bytebot with:
export BYTEBOT_LLM_PROXY_URL=http://localhost:4000
docker-compose up -d
```
## Kubernetes Setup
Deploy with Helm:
```yaml
# litellm-values.yaml
replicaCount: 2
image:
repository: ghcr.io/berriai/litellm
tag: main
service:
type: ClusterIP
port: 4000
config:
model_list:
- model_name: claude-3-5-sonnet
litellm_params:
model: claude-3-5-sonnet-20241022
api_key: ${ANTHROPIC_API_KEY}
general_settings:
master_key: ${LITELLM_MASTER_KEY}
# Then in Bytebot values.yaml:
agent:
openai:
enabled: true
apiKey: "${LITELLM_MASTER_KEY}"
baseUrl: "http://litellm:4000/v1"
model: "claude-3-5-sonnet"
```
## Monitoring & Debugging
### LiteLLM Dashboard
Access metrics and logs:
```bash
# Port forward to dashboard
kubectl port-forward svc/litellm 4000:4000
# Access at http://localhost:4000/ui
# Login with your master_key
```
### Debug Requests
Enable detailed logging:
```yaml
litellm_settings:
debug: true
detailed_debug: true
general_settings:
master_key: sk-key
store_model_in_db: true # Store request history
```
### Common Issues
<AccordionGroup>
<Accordion title="Model not found">
Check model name matches exactly:
```bash
curl http://localhost:4000/v1/models \
-H "Authorization: Bearer sk-key"
```
</Accordion>
<Accordion title="Authentication errors">
Verify master key in both LiteLLM and Bytebot:
```bash
# Test LiteLLM
curl http://localhost:4000/v1/chat/completions \
-H "Authorization: Bearer sk-key" \
-H "Content-Type: application/json" \
-d '{"model": "your-model", "messages": [{"role": "user", "content": "test"}]}'
```
</Accordion>
<Accordion title="Slow responses">
Check latency per provider:
```yaml
router_settings:
routing_strategy: "latency-based"
enable_pre_call_checks: true
```
</Accordion>
</AccordionGroup>
## Best Practices
### Model Selection for Bytebot
Choose models with strong vision capabilities for best results:
<Tabs>
<Tab title="Recommended">
- Claude 3.5 Sonnet (Best overall)
- GPT-4o (Good vision + reasoning)
- Gemini 1.5 Pro (Large context)
</Tab>
<Tab title="Budget Options">
- Claude 3.5 Haiku (Fast + cheap)
- GPT-4o mini (Good balance)
- Gemini 1.5 Flash (Very fast)
</Tab>
<Tab title="Local Models">
- LLaVA (Vision support)
- Qwen-VL (Vision support)
- CogVLM (Vision support)
</Tab>
</Tabs>
### Performance Optimization
```yaml
# Optimize for Bytebot workloads
router_settings:
routing_strategy: "latency-based"
cooldown_time: 60 # Seconds before retrying failed provider
num_retries: 2
request_timeout: 600 # 10 minutes for complex tasks
# Cache for repeated requests
cache: true
cache_params:
type: "redis"
host: "redis"
port: 6379
ttl: 3600 # 1 hour
```
### Security
```yaml
general_settings:
master_key: ${LITELLM_MASTER_KEY}
# IP allowlist
allowed_ips: ["10.0.0.0/8", "172.16.0.0/12"]
# Audit logging
store_model_in_db: true
# Encryption
encrypt_keys: true
# Headers to forward
forward_headers: ["X-Request-ID", "X-User-ID"]
```
## Next Steps
<CardGroup cols={2}>
<Card title="Supported Models" icon="list" href="https://docs.litellm.ai/docs/providers">
Full list of 100+ providers
</Card>
<Card title="LiteLLM Proxy Docs" icon="server" href="https://docs.litellm.ai/docs/simple_proxy">
Official LiteLLM proxy server documentation
</Card>
<Card title="LiteLLM Docs" icon="book" href="https://docs.litellm.ai">
Complete LiteLLM documentation
</Card>
</CardGroup>
<Note>
**Pro tip:** Start with a single provider, then add more as needed. LiteLLM makes it easy to switch or combine models without changing Bytebot configuration.
</Note>
================================================
FILE: docs/deployment/railway.mdx
================================================
---
title: "Deploying Bytebot on Railway"
description: "Comprehensive guide to deploying the full Bytebot stack on Railway using the official 1-click template"
---
> **TL;DR –** Click the button below, add your AI API key (Anthropic, OpenAI, or Google), and your personal Bytebot instance will be live in ~2 minutes.
[](https://railway.com/deploy/bytebot?referralCode=L9lKXQ)
---
## Why Railway?
Railway provides a zero-ops PaaS experience with private networking and per-service logs that perfectly fits Bytebot’s multi-container architecture. The official template wires every service together using the latest container images pushed to the `edge` branch.
---
## What Gets Deployed
| Service | Container Image (edge) | Port | Exposed? | Purpose |
| ---------------- | -------------------------------------------------------------------- | ---- | -------- | ------------------------------------ |
| **bytebot-ui** | `ghcr.io/bytebot-ai/bytebot-ui:edge` | 9992 | **Yes** | Next.js web UI rendered to the world |
| **bytebot-agent**| `ghcr.io/bytebot-ai/bytebot-agent:edge` | 9991 | No | Task orchestration & LLM calls |
| **bytebot-desktop**| `ghcr.io/bytebot-ai/bytebot-desktop:edge` | 9990 | No | Containerised Ubuntu + XFCE desktop |
| **postgres** | `postgres:14-alpine` | 5432 | No | Persistence layer |
All internal traffic flows through Railway’s [private networking](https://docs.railway.com/guides/private-networking). Only `bytebot-ui` is assigned a public domain.
---
## Step-by-Step Walk-through
<Steps>
<Step title="1. Open the Template">
Click the **Deploy on Railway** button above or visit [https://railway.com/deploy/bytebot?referralCode=L9lKXQ](https://railway.com/deploy/bytebot?referralCode=L9lKXQ).
</Step>
<Step title="2. Configure Environment">
For the bytebot-agent resource, add your AI API key (choose at least one):
- **Anthropic**: Paste into `ANTHROPIC_API_KEY` for Claude models
- **OpenAI**: Paste into `OPENAI_API_KEY` for GPT models
- **Google**: Paste into `GEMINI_API_KEY` for Gemini models
Keep other defaults as is.
</Step>
<Step title="3. Kick off the Deployment">
Press **Deploy**. Railway will pull the pre-built images, create the Postgres database and link all services on a private network.
</Step>
<Step title="4. Launch Bytebot">
When the build logs show *"bytebot-ui: ready"*, click the generated URL (e.g. `https://bytebot-ui-prod.up.railway.app`). You should see the task interface. Create a task and watch the desktop stream!
_Tip: You can tail logs for each service from the Railway dashboard._
</Step>
</Steps>
<Note>
The first deploy downloads several container layers – expect ~2 minutes. Subsequent redeploys are much faster.
</Note>
---
## Private Networking & Security
• **Private networking** ensures that the agent, desktop and database can communicate securely without exposing their ports to the internet.
• **Public exposure** is limited to the UI which serves static assets and proxies WebSocket traffic.
• **Add authentication** by placing the UI behind Railway’s built-in password protection or an external provider (e.g. Cloudflare Access, Auth0, OAuth proxy).
• You can also point a custom domain to the UI from the Railway dashboard and enable Cloudflare for WAF/CDN protection.
---
## Customisation & Scaling
1. **Change images** – Fork the repo, push your own images and edit the template’s `Dockerfile` references.
2. **Increase resources** – Each service has an independent CPU/RAM slider in Railway. Bump up the desktop or agent if you plan heavy automations.
---
## Troubleshooting
| Symptom | Likely Cause | Fix |
| ------- | ------------ | ---- |
| Web UI shows “connecting…” | Desktop not ready or private networking mis-config | Wait for `bytebot-desktop` container to finish starting, or restart service |
| Agent errors `401` or `403` | Missing/invalid API key | Re-enter your AI provider's API key in Railway variables |
| Slow desktop video | Free Railway plan throttling | Upgrade plan or reduce screen resolution in desktop settings |
---
## Next Steps
• Explore the [REST APIs](/api-reference/introduction) to script tasks programmatically.
• Join our [Discord](https://discord.com/invite/d9ewZkWPTP) community for support and showcase your automations!
================================================
FILE: docs/docs.json
================================================
{
"$schema": "https://mintlify.com/docs.json",
"theme": "mint",
"name": "Bytebot - Self-Hosted AI Desktop Agent",
"colors": {
"primary": "#000000",
"light": "#fbfaf9",
"dark": "#000000"
},
"favicon": "/favicon.svg",
"navigation": {
"tabs": [
{
"tab": "Documentation",
"groups": [
{
"group": "Getting Started",
"pages": ["introduction", "quickstart"]
},
{
"group": "User Guides",
"pages": [
"guides/task-creation",
"guides/password-management",
"guides/takeover-mode"
]
},
{
"group": "Deployment",
"pages": [
"deployment/railway",
"deployment/helm",
"deployment/litellm"
]
},
{
"group": "Core Concepts",
"pages": [
"core-concepts/architecture",
"core-concepts/agent-system",
"core-concepts/desktop-environment",
"core-concepts/rpa-comparison"
]
}
]
},
{
"tab": "API Reference",
"groups": [
{
"group": "Overview",
"pages": ["api-reference/introduction"]
},
{
"group": "Agent API",
"pages": [
"api-reference/agent/tasks",
"api-reference/agent/ui"
]
},
{
"group": "Computer Control API",
"pages": [
"api-reference/computer-use/unified-endpoint",
"api-reference/computer-use/examples"
]
}
]
}
],
"global": {
"anchors": [
{
"anchor": "GitHub",
"href": "https://github.com/bytebot-ai/bytebot",
"icon": "github"
},
{
"anchor": "Discord",
"href": "https://discord.gg/zcb5wA2t4u",
"icon": "discord"
},
{
"anchor": "Twitter",
"href": "https://x.com/bytebot_ai",
"icon": "twitter"
},
{
"anchor": "Blog",
"href": "https://bytebot.ai/blog",
"icon": "newspaper"
}
]
}
},
"logo": {
"light": "/logo/bytebot_transparent_logo_dark.svg",
"dark": "/logo/bytebot_transparent_logo_white.svg"
},
"navbar": {
"links": [
{
"label": "Support",
"href": "https://discord.gg/zcb5wA2t4u"
}
],
"primary": {
"type": "button",
"label": "Get Started",
"href": "https://github.com/bytebot-ai/bytebot"
}
},
"footer": {
"socials": {
"github": "https://github.com/bytebot-ai/bytebot",
"twitter": "https://twitter.com/bytebotai",
"discord": "https://discord.gg/zcb5wA2t4u"
}
},
"metadata": {
"og:title": "Bytebot - Self-Hosted AI Desktop Agent",
"og:description": "Automate any computer task with natural language using your own AI desktop agent",
"og:image": "/images/agent-architecture.png",
"twitter:card": "summary_large_image"
}
}
================================================
FILE: docs/guides/password-management.mdx
================================================
---
title: "Password Management & 2FA"
description: "How Bytebot handles authentication automatically using password managers"
---
# Automated Authentication with Bytebot
Bytebot can handle authentication automatically - including passwords, 2FA, and even complex multi-step authentication flows - when you set up a password manager extension.
<Note>
**Important**: Password manager extensions are not enabled by default. You need to install them manually using the desktop view.
</Note>
## How It Works
Bytebot comes with 1Password built-in and supports any browser-based password manager extension. It can:
- Automatically fill passwords from the password manager
- Handle 2FA codes (TOTP/authenticator apps)
- Manage multiple accounts across different systems
- Work with SSO and federated authentication
- Store and use API keys and tokens
## Setting Up Password Management
### Option 1: 1Password (Recommended)
<Steps>
<Step title="Install 1Password Extension">
1. Go to the Desktop tab in Bytebot UI
2. Open Firefox
3. Install the 1Password extension from the Firefox Add-ons store
4. Sign in to your 1Password account (or create a dedicated one for Bytebot)
</Step>
<Step title="Configure Vaults">
In your 1Password admin panel:
1. Create a vault called "Bytebot Automation"
2. Add the credentials Bytebot needs
3. Share the vault with Bytebot's account
4. Set appropriate permissions (read-only recommended)
</Step>
<Step title="Enable Auto-fill">
The 1Password extension will automatically:
- Detect login forms
- Fill credentials
- Handle 2FA codes
- Submit forms
</Step>
</Steps>
### Option 2: Other Password Managers
You can use any browser-based password manager by installing it through the Desktop view:
<Tabs>
<Tab title="Bitwarden">
1. Open Desktop tab
2. Launch Firefox
3. Install Bitwarden extension from Firefox Add-ons
4. Log in to your Bitwarden account
5. Configure auto-fill settings in Bitwarden preferences
</Tab>
<Tab title="LastPass">
1. Open Desktop tab
2. Launch Firefox
3. Install LastPass extension from Firefox Add-ons
4. Log in with your enterprise account
5. Accept any shared folders for automation credentials
</Tab>
<Tab title="KeePass">
1. Open Desktop tab
2. Install KeePassXC application if needed
3. Install KeePassXC browser extension in Firefox
4. Configure browser integration
5. Load your KeePass database
</Tab>
</Tabs>
## Handling Different Authentication Types
### Standard Username/Password
```yaml
# Task description
Task: "Log into our CRM system and export the customer list"
# Bytebot automatically:
1. Navigates to login page
2. Password manager detects form
3. Auto-fills credentials
4. Submits login
5. Proceeds with task
```
### Time-based 2FA (TOTP)
```yaml
# Task description
Task: "Access the banking portal and download statements"
# Bytebot handles:
1. Enters username/password from password manager
2. When 2FA prompt appears
3. Password manager provides TOTP code
4. Enters code automatically
5. Completes authentication
```
### Complex Multi-Step Auth
```yaml
# Task description
Task: "Log into the government portal (uses email verification)"
# Bytebot can:
1. Fill initial credentials
2. Handle "send code to email" flows
3. Access webmail account (also in password manager)
4. Retrieve verification code from webmail
5. Complete authentication
```
## Enterprise Setup Guide
### Centralized Credential Management
<Steps>
<Step title="Create Service Accounts">
Set up dedicated service accounts for Bytebot:
```
- bytebot-finance@company.com (banking portals)
- bytebot-hr@company.com (HR systems)
- bytebot-ops@company.com (operational tools)
```
</Step>
<Step title="Organize Password Vaults">
Structure your password manager:
```
Bytebot Vaults/
├── Financial Systems/
│ ├── Banking Portal A
│ ├── Banking Portal B
│ └── Payment Processor
├── Internal Tools/
│ ├── ERP System
│ ├── CRM Platform
│ └── HR Portal
└── External Services/
├── Vendor Portal 1
├── Government Site
└── Partner System
```
</Step>
<Step title="Set Rotation Policies">
Configure automatic password rotation:
```javascript
// Example automation for password rotation
{
"schedule": "monthly",
"task": "For each credential in 'Rotation Required' vault,
update password in the system and save new password"
}
```
</Step>
</Steps>
### Security Best Practices
<CardGroup cols={2}>
<Card title="Least Privilege" icon="shield-halved">
Only share credentials Bytebot needs for specific tasks
</Card>
<Card title="Audit Logging" icon="scroll">
Enable password manager audit logs to track access
</Card>
<Card title="Vault Isolation" icon="lock">
Separate vaults by sensitivity level and department
</Card>
<Card title="Regular Reviews" icon="calendar-check">
Audit Bytebot's credential access monthly
</Card>
</CardGroup>
## Common Authentication Scenarios
### Banking and Financial Systems
```yaml
Scenario: Daily bank reconciliation across 5 banks
Setup:
- Each bank credential in password manager
- 2FA seeds stored for TOTP generation
- Bytebot's IP whitelisted at banks
Task: "Log into each bank account, download yesterday's
transactions, and consolidate into daily report"
Result: Fully automated, no human intervention needed
```
### Government and Compliance Portals
```yaml
Scenario: Weekly regulatory filings
Setup:
- Service account with 2FA enabled
- Password manager has TOTP seed
- Security questions stored as notes
Task: "Log into state tax portal, file weekly sales tax
report using data from tax_data.csv"
Handles: Password, 2FA, security questions, CAPTCHAs
```
### Multi-Tenant SaaS Platforms
```yaml
Scenario: Managing multiple client accounts
Setup:
- Credentials for each tenant/client
- Organized in password manager by client
- Naming convention: client-platform-role
Task: "For each client in client_list.txt, log into their
Shopify account and export this month's orders"
Scales: Handles 100+ accounts seamlessly
```
## Advanced Authentication Features
### SSO and SAML Integration
```yaml
# Bytebot can handle SSO flows
Task: "Log into Salesforce using Okta SSO"
Process:
1. Navigate to Salesforce
2. Click "Log in with SSO"
3. Redirect to Okta
4. Password manager fills Okta credentials
5. Handle any 2FA on Okta
6. Redirect back to Salesforce
7. Continue with task
```
### API Key Management
```yaml
# Store API keys in password manager
Password Entry: "OpenAI API Key"
- Username: "api"
- Password: "sk-proj-..."
- Notes: "Rate limit: 10000/day"
# Use in tasks
Task: "Configure the application to use our OpenAI API key
from the password manager"
```
### Certificate-Based Auth
```yaml
# For systems requiring certificates
Setup:
1. Store certificate password in manager
2. Mount certificate file to Bytebot
3. Configure browser to use certificate
Task: "Access the enterprise portal that requires
client certificate authentication"
```
## Troubleshooting Authentication
<AccordionGroup>
<Accordion title="Password manager not auto-filling">
**Solutions:**
- Ensure extension is installed and logged in
- Check site is saved in password manager
- Verify auto-fill settings are enabled
- Try refreshing the page
</Accordion>
<Accordion title="2FA code rejected">
**Common causes:**
- Time sync issues (check system clock)
- Wrong TOTP seed saved
- Site using non-standard 2FA
**Fix:**
```bash
# Sync system time
docker exec bytebot-desktop ntpdate -s time.nist.gov
```
</Accordion>
<Accordion title="Session expiring during task">
**Solutions:**
- Enable "remember me" if available
- Increase session timeout in target system
- Break long tasks into smaller chunks
- Use API access where possible
</Accordion>
</AccordionGroup>
## Integration Examples
### Finance Automation Script
```python
# Example: Automated invoice collection
tasks = [
{
"description": "Log into vendor portal A and download all pending invoices",
"credentials": "vault://Financial Systems/Vendor Portal A"
},
{
"description": "Log into vendor portal B and download all pending invoices",
"credentials": "vault://Financial Systems/Vendor Portal B"
},
{
"description": "Process all downloaded invoices through our AP system",
"credentials": "vault://Internal Tools/AP System"
}
]
# Bytebot handles all authentication automatically
```
### Compliance Automation
```yaml
Daily Compliance Check:
Morning:
- Log into regulatory portal (2FA enabled)
- Download new compliance updates
- Check our status
If Non-Compliant:
- Log into internal system
- Create compliance ticket
- Notify compliance team
All credentials managed automatically
```
## Best Practices Summary
✅ **DO:**
- Use dedicated service accounts for Bytebot
- Organize credentials in logical vaults
- Enable 2FA on all accounts (Bytebot handles it!)
- Rotate passwords regularly
- Monitor access logs
❌ **DON'T:**
- Share personal credentials with Bytebot
- Store passwords in task descriptions
- Disable 2FA for convenience
- Use the same password across systems
- Ignore authentication errors
## Next Steps
<CardGroup cols={2}>
<Card title="Task Examples" icon="list" href="/guides/task-creation">
See auth in action
</Card>
<Card title="API Integration" icon="code" href="/api-reference/introduction">
Programmatic credential management
</Card>
</CardGroup>
<Note>
**Game Changer**: With proper password manager setup, Bytebot can handle even the most complex authentication flows automatically. No more manual intervention for 2FA, no more sharing passwords insecurely, and no more authentication bottlenecks in your automation workflows!
</Note>
================================================
FILE: docs/guides/takeover-mode.mdx
================================================
---
title: "Takeover Mode"
description: "Take control of the desktop when you need to guide or assist Bytebot"
---
# Takeover Mode: Human-AI Collaboration
Takeover mode lets you take control of the desktop to help Bytebot when needed. There are two ways to use it:
## 1. During Task Execution
In the task detail view, you can hit the takeover button to:
- Interrupt the agent if it's going down the wrong path
- Guide it towards the correct solution
- Resolve issues when it's stumbling on something
## 2. Automatic Activation
Takeover mode is automatically enabled when a task status is set to "needs help" - this happens when the agent realizes it can't accomplish something on its own.
## How Actions Are Recorded
All your actions during takeover (clicks, drags, scrolls, typing, key presses) are automatically logged in the same unified action space that the agent uses. This means Bytebot understands and learns from everything you do.
## Desktop Tab for Setup
Outside of tasks, there's a dedicated **Desktop** tab on the main page that provides:
- Free-ranging access to the desktop
- Nothing is recorded in this mode
- Perfect for:
- Installing programs
- Logging into apps or websites
- Setting up the desktop environment
- General desktop maintenance
## Activating Takeover Mode
### Method 1: Manual Takeover During Tasks
<Steps>
<Step title="Open Task Detail View">
While Bytebot is working on a task, click on the task to open the detail view.
</Step>
<Step title="Click Takeover Button">
Hit the takeover button to interrupt the agent and take control.
</Step>
<Step title="Guide Bytebot">
Perform the necessary actions to get past the obstacle or show the correct path.
</Step>
<Step title="Release Control">
Click to release control and let Bytebot continue from where you left off.
</Step>
</Steps>
### Method 2: Automatic When Help Needed
When Bytebot sets a task status to "needs help":
- Takeover mode is automatically enabled
- You'll see a notification that Bytebot needs assistance
- Take control to help resolve the issue
- Bytebot will continue once you release control
## Common Use Cases
### 1. Complex UI Navigation
<Card title="Custom Applications" icon="window">
**Scenario**: Working with proprietary or complex software
**Steps**:
1. Let Bytebot open the application
2. Take control to navigate complex interfaces
3. Use the chat to explain what you're doing
4. Return control for Bytebot to continue
**Example**: "Open our internal CRM, I'll show you how to navigate to the reports section"
</Card>
### 2. Error Recovery
<Card title="Handling Unexpected Situations" icon="exclamation-triangle">
**Scenario**: Bytebot encounters an error or gets stuck
**Steps**:
1. Notice Bytebot is struggling
2. Take control to resolve the issue
3. Guide it past the problem
4. Explain what went wrong in chat
5. Return control to let Bytebot continue
**Example**: "Let me handle this unexpected popup that's blocking the workflow"
</Card>
### 3. Teaching by Demonstration
<Card title="Show Don't Tell" icon="graduation-cap">
**Scenario**: Complex multi-step processes
**Steps**:
1. Take control when you need to demonstrate
2. Perform the task normally (no need to move slowly)
3. Use chat to explain what you're clicking and why
4. Return control
5. Ask Bytebot to repeat the process
**Example**: "Watch me navigate through our vendor portal to find the invoice section"
</Card>
<Warning>
**Important**: Screenshots are taken for every action during takeover mode. Do not enter any data that you don't want captured in screenshots.
</Warning>
## Best Practices
### Do's ✅
- **Use Chat While Taking Over**: Type messages explaining what you're doing and why
- **Explain Your Clicks**: Share context about UI elements and their purpose
- **Return Control Before Leaving**: Always release control before exiting the task detail view
- **Test Understanding**: Ask Bytebot to summarize what it learned
### Don'ts ❌
- **Enter Data You Don't Want Captured**: Screenshots are taken of all actions
- **Skip Chat Explanations**: Context helps Bytebot learn patterns
- **Leave Task View While in Control**: This will leave the task stuck in takeover mode
- **Assume Knowledge**: Explain application-specific workflows
<Note>
**No Need to Move Slowly**: Bytebot captures the state before and after each action, so you can work at normal speed.
</Note>
## Summary
Takeover mode provides flexibility when you need to guide Bytebot or handle situations it can't manage alone. Whether you're navigating complex interfaces, recovering from errors, or teaching new workflows, takeover mode ensures you're always in control when needed.
================================================
FILE: docs/guides/task-creation.mdx
================================================
---
title: "Task Creation & Management"
description: "Master the art of creating effective tasks and managing them through completion"
---
# Creating and Managing Tasks in Bytebot
This guide will walk you through everything you need to know about creating tasks that Bytebot can execute effectively, and managing them through their lifecycle.
## Understanding Tasks
A task is any job you want Bytebot to complete. Tasks can be:
- **Simple**: "Log in to GitHub" or "Visit example.com" (uses one program)
- **Complex**: "Download invoices from email and save them to a folder" (uses multiple programs)
- **File-based**: "Read the uploaded PDF and extract all email addresses" (processes uploaded files)
- **Collaborative**: "Process invoices, ask me to handle special approvals"
## Working with Files
Bytebot has powerful file handling capabilities that make it perfect for document processing and data analysis tasks.
### Uploading Files with Tasks
When creating a task, you can upload files that will be automatically saved to the desktop instance. This is incredibly useful for:
- **Document Processing**: Upload PDFs, spreadsheets, or documents for Bytebot to analyze
- **Data Analysis**: Provide CSV files or datasets for processing
- **Template Filling**: Upload forms or templates that need to be completed
- **Batch Operations**: Upload multiple files for bulk processing
<Note>
**Game Changer**: Bytebot can read entire files, including PDFs, directly into the LLM context. This means it can process large amounts of data quickly and understand complex documents without manual extraction.
</Note>
### File Upload Examples
<Tabs>
<Tab title="Web UI">
1. Click the attachment button when creating a task
2. Select files to upload (PDFs, CSVs, images, etc.)
3. Files are automatically saved to the desktop
4. Reference them in your task description:
```
"Read the uploaded contracts.pdf and extract all payment terms,
then create a summary spreadsheet with vendor names and terms"
```
</Tab>
<Tab title="API">
```bash
# Upload files with task creation (multipart/form-data)
curl -X POST http://localhost:9991/tasks \
-F "description=Analyze the uploaded financial statements and create a summary" \
-F "priority=HIGH" \
-F "files=@financial_statements_2024.pdf" \
-F "files=@budget_comparison.xlsx"
```
</Tab>
</Tabs>
### File Processing Capabilities
<CardGroup cols={2}>
<Card title="PDF Analysis" icon="file-pdf">
- Extract text from PDFs
- Read entire PDFs into context
- Parse forms and contracts
- Extract tables and data
</Card>
<Card title="Spreadsheet Processing" icon="table">
- Read Excel/CSV files
- Analyze data patterns
- Generate reports
- Cross-reference multiple sheets
</Card>
<Card title="Document Understanding" icon="brain">
- Summarize long documents
- Extract key information
- Compare multiple files
- Answer questions about content
</Card>
<Card title="Batch Operations" icon="layer-group">
- Process multiple files
- Apply same analysis to each
- Consolidate results
- Generate unified reports
</Card>
</CardGroup>
## Creating Your First Task
### Using the Web UI
<Steps>
<Step title="Open Bytebot UI">
Navigate to `http://localhost:9992`
</Step>
<Step title="Enter Your Task">
In the input field on the left side, type what you want done. For example:
```
Log in to my GitHub account and check for new notifications
```
</Step>
<Step title="Start Task">
Press the arrow button or hit Enter. Bytebot will start loading and begin working on your task.
</Step>
</Steps>
### Using the API
```bash
curl -X POST http://localhost:9991/tasks \
-H "Content-Type: application/json" \
-d '{
"description": "Download all PDF invoices from my email and organize by date",
"priority": "HIGH",
"type": "IMMEDIATE"
}'
```
## Writing Effective Task Descriptions
### The Golden Rules
<CardGroup cols={2}>
<Card title="Be Specific" icon="bullseye">
❌ "Do some research"
✅ "Research top 5 CRM tools for small businesses"
</Card>
<Card title="Include Context" icon="info">
❌ "Fill out the form"
✅ "Fill out the contact form on example.com with test data"
</Card>
<Card title="Define Success" icon="check">
❌ "Organize files"
✅ "Organize files in Downloads folder by type into subfolders"
</Card>
<Card title="One Goal Per Task" icon="target">
❌ "Do multiple unrelated things"
✅ "Focus on a single objective with clear steps"
</Card>
</CardGroup>
### Task Description Templates
#### Enterprise Process Automation
```
Log into [system] and:
1. [Navigate to specific section]
2. [Download/Extract data]
3. [Process through other system]
4. [Update records/Generate report]
Handle any [specific scenarios]
Example:
Log into our banking portal and:
1. Navigate to wire transfers section
2. Download all pending wire confirmations
3. Match against our ERP payment records
4. Flag any discrepancies in the reconciliation sheet
(Bytebot handles all authentication including 2FA automatically via password manager)
```
#### Multi-Application Workflow
```
Access [System A] to get [data]
Then in [System B]:
1. [Process the data]
2. [Update records]
Finally in [System C]:
1. [Verify updates]
2. [Generate confirmation]
Example:
Access Salesforce to get list of new customers from today
Then in NetSuite:
1. Create customer records with billing info
2. Set up payment terms
Finally in our shipping system:
1. Verify addresses are valid
2. Generate welcome kit shipping labels
```
#### Compliance & Audit Task
```
For each [entity] in [source]:
1. Check [compliance requirement]
2. Document [specific data]
3. Flag any [violations/issues]
Generate report showing [metrics]
Example:
For each vendor in our approved vendor list:
1. Check their insurance certificates are current
2. Document expiration dates and coverage amounts
3. Flag any expiring within 30 days
Generate report showing compliance percentage by category
```
## Managing Active Tasks
### Task States
<img src="/images/task-lifecycle.png" alt="Task Lifecycle" className="w-full max-w-3xl" />
Tasks move through these states:
1. **Created** → Task is defined but not started
2. **Queued** → Waiting for agent availability
3. **Running** → Actively being worked on
4. **Needs Help** → Requires human input
5. **Completed** → Successfully finished
6. **Failed** → Could not be completed
### Monitoring Progress
#### Real-time Updates
Watch Bytebot work through the task detail viewer:
- **Green dot**: Task is actively running
- **Status messages**: Current step being executed
- **Desktop view**: See what Bytebot sees in real-time
#### Chat Messages
Bytebot provides updates like:
```
Assistant: I'm now searching for project management tools...
Assistant: Found 15 options, filtering by your criteria...
Assistant: Creating the comparison table with 5 tools...
```
### Interacting with Running Tasks
#### Providing Additional Information
```
User: "Also include free tier options in your research"
Assistant: "I'll add a column for free tier availability to the comparison table."
```
#### Clarifying Instructions
```
Assistant: "I found multiple forms on this page. Which one should I fill out?"
User: "Use the 'Contact Sales' form on the right side"
```
#### Modifying Tasks
```
User: "Actually, make it top 10 tools instead of top 5"
Assistant: "I'll expand my research to include 10 tools in the comparison."
```
## Advanced Task Management
### Task Dependencies
Chain tasks that depend on each other:
```
Task 1: "Download the invoice from the vendor portal"
Task 2: "Open the downloaded invoice and extract the total amount"
Task 3: "Enter the amount into our accounting system"
```
## Best Practices
### Do's ✅
1. **Start Simple**: Test with basic tasks before complex ones
2. **Provide Examples**: "Format it like the report from last week"
3. **Include Credentials Safely**: Use takeover mode for passwords
4. **Set Realistic Expectations**: Complex tasks take time
5. **Review Results**: Always verify important outputs
### Don'ts ❌
1. **Overload Single Tasks**: Break complex workflows into steps
2. **Assume Knowledge**: Explain custom applications
3. **Skip Context**: Always provide necessary background
4. **Ignore Errors**: Address issues promptly
5. **Rush Critical Tasks**: Allow time for careful execution
## Task Examples by Category
### 📄 Document Processing & Analysis
```
"Read the uploaded contract.pdf and extract all key terms including payment schedules, deliverables, and termination clauses. Create a summary document with these details."
"Process all the uploaded invoice PDFs, extract vendor names, amounts, and due dates, then create a consolidated Excel spreadsheet sorted by due date."
"Analyze the uploaded financial_report.pdf and answer these questions: What was the revenue growth? What are the main risk factors mentioned? What is the debt-to-equity ratio?"
"Read through the uploaded employee_handbook.pdf and create a checklist of all compliance requirements mentioned in the document."
```
### 🏦 Enterprise Automation (RPA-Style Workflows)
```
"Log into our banking portal, download all transaction files from last month, save them to the Finance/Statements folder, then run the reconciliation script on each file."
(Note: Bytebot handles all authentication including 2FA automatically using the built-in password manager)
"Access the vendor portal at supplier.example.com, navigate to the invoice section, download all pending invoices, extract the data into our standard template, and upload to the AP system."
"Open our legacy ERP system, export the customer list, then for each customer, look them up in the new CRM and update their status and last contact date."
```
### 📊 Financial Operations & Data Analysis
```
"Read the uploaded bank_statements folder containing 12 monthly PDFs, extract all transactions over $10,000, and create a summary report showing patterns and anomalies."
"Log into each of our 5 bank accounts, download the daily statements, consolidate them into a single cash position report, and save to the shared finance folder."
"Process the uploaded expense_reports.zip file, review all reports over $1,000, create a summary with policy violations flagged, and prepare for approval."
"Navigate to the tax authority website, download all GST/VAT returns for Q4, extract the figures, and populate our tax reconciliation spreadsheet."
```
### 🔄 Multi-System Integration
```
"Pull today's orders from Shopify, create corresponding entries in NetSuite, update inventory in our WMS, and trigger shipping labels in ShipStation."
"Extract employee data from Workday, cross-reference with our access control system, identify discrepancies, and create tickets for IT to resolve."
"Log into our insurance portal, download policy documents for all active policies, extract key dates and coverage amounts, update our risk management database."
```
### 📈 Compliance & Reporting
```
"Access all state regulatory websites for our operating regions, check for new compliance updates since last month, download relevant documents, and create a summary report."
"Log into our various SaaS tools (list provided), export user access reports, consolidate into a single audit trail, and flag any terminated employees still with access."
"Navigate to customer portal, download all SLA performance reports, extract metrics, compare against our internal data, and highlight discrepancies."
```
### 🤝 Development & QA Integration
```
"After the code agent deploys the new feature, test the complete user journey from signup to checkout, take screenshots at each step, and verify against the design specs."
"Run through all test scenarios in our QA checklist, but for any failures, have the code agent analyze the error and attempt a fix, then retest automatically."
"Monitor our staging environment, when a new build is deployed, automatically run the smoke test suite and create a visual regression report comparing to production."
```
## Troubleshooting Common Issues
<AccordionGroup>
<Accordion title="Task stuck in 'Running' state">
**Possible causes**:
- Waiting for slow page/app to load
- Encountered unexpected popup
- Unclear next step
**Solutions**:
- Check desktop viewer for current state
- Provide clarification via chat
- Use takeover mode to help
- Cancel and restart with clearer instructions
</Accordion>
<Accordion title="Task completed but wrong result">
**Possible causes**:
- Ambiguous instructions
- Website/app changed
- Misunderstood context
**Solutions**:
- Review task description for clarity
- Provide specific examples
- Break into smaller subtasks
- Use takeover mode to demonstrate
</Accordion>
<Accordion title="Task failed immediately">
**Possible causes**:
- Invalid URL or application
- Missing prerequisites
- System resource issues
**Solutions**:
- Verify URLs and application names
- Ensure required files/data exist
- Check system resources
- Review error messages in chat
</Accordion>
</AccordionGroup>
## Task Management Tips
### Organizing Multiple Tasks
1. **Use Clear Naming**: Include date, category, or project
2. **Group Related Tasks**: Process similar tasks together
3. **Priority Management**: Reserve 'Urgent' for true emergencies
4. **Regular Reviews**: Check completed tasks for quality
### Performance Optimization
- **Batch Similar Tasks**: Group web research, data entry, etc.
- **Prepare Resources**: Have files/data ready before starting
- **Clear Desktop**: Minimize distractions and popups
- **Stable Environment**: Ensure good internet and system resources
### Learning from Tasks
After each task:
1. Review the approach Bytebot took
2. Note any inefficiencies
3. Refine future task descriptions
4. Build a library of effective prompts
## Next Steps
<CardGroup cols={2}>
<Card title="Takeover Mode" icon="hand" href="/guides/takeover-mode">
Learn human-AI collaboration
</Card>
<Card title="API Integration" icon="code" href="/api-reference/agent/tasks">
Automate task creation
</Card>
</CardGroup>
<Note>
**Pro Tip**: Start with simple tasks to understand Bytebot's capabilities, then gradually increase complexity as you learn what works best.
</Note>
================================================
FILE: docs/introduction.mdx
================================================
---
title: Introduction
description: "Open source AI desktop agent that automates any computer task"
---
<p align="center">
<img
className="block dark:hidden"
src="/logo/bytebot_transparent_logo_dark.svg"
alt="Bytebot Logo"
width="500"
/>
<img
className="hidden dark:block"
src="/logo/bytebot_transparent_logo_white.svg"
alt="Bytebot Logo"
width="500"
/>
</p>
## What is Bytebot?
Bytebot is an open-source AI agent that can control a computer desktop to complete tasks for you. It runs in Docker containers on your own infrastructure, giving you a virtual assistant that can:
- Use any desktop application (browser, email, office tools, etc.)
- Process uploaded files including PDFs, spreadsheets, and documents
- Read entire files directly into the LLM context for rapid analysis
- Automate repetitive tasks like data entry and form filling
- Handle complex workflows that span multiple applications
- Work 24/7 without human supervision
Simply describe what you need done in plain English, and Bytebot will figure out how to do it – clicking buttons, typing text, navigating websites, reading documents, and completing tasks just like a human would.
## Why Bytebot Over Traditional RPA?
<CardGroup cols={2}>
<Card title="No Complex Scripting" icon="code-branch">
Unlike UiPath or similar tools, no need to design flowcharts or write scripts - just describe tasks naturally
</Card>
<Card title="Adaptive Intelligence" icon="brain">
AI-powered understanding means Bytebot adapts to UI changes without breaking
</Card>
<Card title="Visual Understanding" icon="eye">
Can read and understand any interface, not just pre-mapped elements
</Card>
<Card title="Human-Like Problem Solving" icon="lightbulb">
Handles unexpected popups, errors, and variations automatically
</Card>
</CardGroup>
## Why Self-Host Bytebot?
<CardGroup cols={2}>
<Card title="Complete Privacy" icon="shield">
Your tasks and data never leave your infrastructure. Everything runs locally
on your servers.
</Card>
<Card title="Full Control" icon="sliders">
Customize the desktop environment, install any applications, and configure
to your exact needs.
</Card>
<Card title="No Usage Limits" icon="infinity">
Use your own LLM API keys without platform restrictions or additional fees.
</Card>
<Card title="Secure Isolation" icon="lock">
Each desktop runs in its own container, completely isolated from your host
system.
</Card>
</CardGroup>
## Real-World Use Cases
### Enterprise Automation (RPA Replacement)
Bytebot is the next generation of RPA (Robotic Process Automation). It handles the same complex workflows as traditional tools like UiPath, but with AI-powered adaptability and automatic authentication:
- **Financial Operations**: Automate banking portal access (including 2FA when password manager extensions are configured), download transaction files, and process them through multiple systems
- **Compliance Workflows**: Navigate government websites, download regulatory documents, extract data, and update compliance tracking systems
- **Multi-System Integration**: Bridge legacy systems that lack APIs by automating the UI interactions between them
- **Vendor Management**: Log into supplier portals, download invoices, reconcile with internal systems, and process payments
### Business Process Automation
- **Data Reconciliation**: Pull reports from multiple SaaS platforms, cross-reference data, and generate consolidated reports
- **Customer Onboarding**: Navigate between CRM, banking, and verification systems to complete new customer setup
- **Purchase Order Processing**: Extract POs from webmail portals, enter into ERP systems, and update inventory databases
- **HR Operations**: Collect employee data from various systems, update records, and ensure consistency across platforms
### Development & QA Integration
Bytebot becomes even more powerful when combined with coding agents:
- **Full-Stack Testing**: Use a coding agent to generate code, then have Bytebot visually test and validate the output
- **Automated Debugging**: Let Bytebot reproduce user-reported issues while a coding agent analyzes and fixes the code
- **End-to-End Development**: Code agents write features, Bytebot tests them, creating a complete development loop
- **Visual Regression Testing**: Automatically detect UI changes across deployments with screenshot comparisons
## How It Works
<Steps>
<Step title="Describe Your Task">
Simply tell Bytebot what you want done in natural language through the tasks
interface
</Step>
<Step title="AI Plans the Actions">
Bytebot understands your request and breaks it down into specific computer
actions
</Step>
<Step title="Executes Actions">
Bytebot executes the task on its virtual desktop using the keyboard
and mouse
</Step>
<Step title="Watch or Walk Away">
Monitor it working in real-time through the task detail view, or let it
complete tasks independently.
</Step>
<Step title="Get Results">
Receive the completed task output, screenshots, or confirmation of
completion
</Step>
</Steps>
## Architecture Overview
Bytebot consists of four integrated components working together:
<img src="/images/agent-architecture.png" alt="Bytebot Agent Architecture" />
<CardGroup cols={2}>
<Card
title="Bytebot Desktop"
icon="desktop"
href="/core-concepts/desktop-environment"
>
Ubuntu 22.04 with XFCE4, VSCode, Firefox, Thunderbird email client, and automation daemon (bytebotd)
</Card>
<Card title="AI Agent" icon="brain" href="/core-concepts/agent-system">
NestJS service that uses LLMs (Anthropic Claude, OpenAI GPT, Google Gemini) to plan and execute tasks
</Card>
<Card
title="Task Interface"
icon="window"
href="/api-reference/agent/ui"
>
Next.js web app for creating and managing tasks
</Card>
<Card title="REST API" icon="code" href="/api-reference/introduction">
Programmatic access to both task management and direct desktop control
</Card>
</CardGroup>
## Getting Started
<CardGroup cols={3}>
<Card title="Quick Start" icon="rocket" href="/quickstart">
Get Bytebot running in 2 minutes
</Card>
<Card title="Architecture" icon="sitemap" href="/core-concepts/architecture">
Understand how it all fits together
</Card>
<Card title="API Reference" icon="book" href="/api-reference/introduction">
Integrate with your applications
</Card>
</CardGroup>
## Key Features
### 🤖 Natural Language Control
Just tell Bytebot what you need done. No coding or complex automation tools required.
### 🖥️ Full Desktop Access
Bytebot can use any application you can install - browsers, office tools, custom software.
### 🔒 Complete Privacy
Runs entirely on your infrastructure. Your data never leaves your servers.
### 🔄 Two Operating Modes
- **Autonomous Mode**: Bytebot completes tasks independently
- **Takeover Mode**: You can step in and take control when needed
### 🖱️ Direct Desktop Access
- **Desktop Tab**: Free-form access to the virtual desktop for setup, installing programs, or manual operations
- **Task View**: Watch and interact with Bytebot during task execution
### 🚀 Easy Deployment
- One-click deployment on Railway
- Docker Compose for self-hosting
- Helm charts for Kubernetes
### 🔌 Developer-Friendly
- REST APIs for programmatic control
- Task management API
- Extensible architecture
- MCP (Model Context Protocol) support
## Community & Support
<CardGroup cols={2}>
<Card
title="Discord Community"
icon="discord"
href="https://discord.com/invite/d9ewZkWPTP"
>
Join our community for help, tips, and discussions
</Card>
<Card
title="GitHub"
icon="github"
href="https://github.com/bytebot-ai/bytebot"
>
Report issues, contribute, or star the project
</Card>
</CardGroup>
<Note>
**Ready to give your AI its own computer?** Start with our [Quick Start
Guide](/quickstart) to have your own AI desktop agent running in minutes.
</Note>
================================================
FILE: docs/quickstart.mdx
================================================
---
title: "Quick Start"
description: "Get your AI desktop agent running in 2 minutes"
---
# Choose Your Deployment Method
Bytebot can be deployed in several ways depending on your needs:
<Tabs>
<Tab title="Railway (Easiest)">
## ☁️ One-click Deploy on Railway
[](https://railway.com/deploy/bytebot?referralCode=L9lKXQ)
<Steps>
<Step title="Visit the Template">
Click the Deploy Now button in the Bytebot template on Railway.
</Step>
<Step title="Add Anthropic Key">
Enter either your `ANTHROPIC_API_KEY`, `OPENAI_API_KEY`, or `GEMINI_API_KEY` for the bytebot-agent resource.
</Step>
<Step title="Deploy & Launch">
Hit **Deploy**. Railway will build the stack, wire the services together via private networking and output a public URL for the UI. Your agent should be ready within a couple of minutes!
</Step>
</Steps>
<Note>
Need more details? See the full <a href="/deployment/railway">Railway deployment guide</a>.
</Note>
</Tab>
<Tab title="Docker Compose">
## 🐳 Self-host with Docker Compose
## Prerequisites
- Docker ≥ 20.10
- Docker Compose
- 4GB+ RAM available
- AI API key from one of these providers:
- Anthropic ([get one here](https://console.anthropic.com)) - Claude models
- OpenAI ([get one here](https://platform.openai.com/api-keys)) - GPT models
- Google ([get one here](https://makersuite.google.com/app/apikey)) - Gemini models
## 🚀 2-Minute Setup
Get your self-hosted AI desktop agent running with just three commands:
<Steps>
<Step title="Clone and Configure">
```bash
git clone https://github.com/bytebot-ai/bytebot.git
cd bytebot
# Configure your AI provider (choose one):
echo "ANTHROPIC_API_KEY=your_api_key_here" > docker/.env # For Claude
# echo "OPENAI_API_KEY=your_api_key_here" > docker/.env # For OpenAI
# echo "GEMINI_API_KEY=your_api_key_here" > docker/.env # For Gemini
```
</Step>
<Step title="Start the Agent Stack">
```bash
docker-compose -f docker/docker-compose.yml up -d
```
This starts all four services:
- **Bytebot Desktop**: Containerized Linux environment
- **AI Agent**: LLM-powered task processor (supports Claude, GPT, or Gemini)
- **Chat UI**: Web interface for interaction
- **Database**: PostgreSQL for persistence
</Step>
<Step title="Open the Chat Interface">
Navigate to [http://localhost:9992](http://localhost:9992) to access the Bytebot UI.
**Two ways to interact:**
1. **Tasks**: Enter task descriptions to have Bytebot work autonomously
2. **Desktop**: Direct access to the virtual desktop for manual control
Try asking:
- "Open Firefox and search for the weather forecast"
- "Take a screenshot of the desktop"
- "Create a text file with today's date"
</Step>
</Steps>
<Note>
**First time?** The initial startup may take 2-3 minutes as Docker downloads
the images. Subsequent starts will be much faster.
</Note>
## 🎯 What You Just Deployed
You now have a complete AI desktop automation system with:
<Note>
**🔐 Password Manager Support**: Bytebot can handle authentication automatically when you install a password manager extension. See our [password management guide](/guides/password-management) for setup instructions.
</Note>
<CardGroup cols={2}>
<Card title="AI Agent" icon="brain">
- Understands natural language
- Plans and executes tasks
- Adapts to errors
- Works autonomously
</Card>
<Card title="Virtual Desktop" icon="desktop">
- Full Ubuntu environment
- Browser, office tools
- File system access
- Application support
</Card>
<Card title="Task Interface" icon="window">
- Create and manage tasks
- Real-time desktop view
- Conversation history
- Takeover mode
</Card>
<Card title="REST APIs" icon="code">
- Programmatic control
- Task management API
- Direct desktop access
- MCP protocol support
</Card>
</CardGroup>
## 🚀 Your First Tasks
Now let's see Bytebot in action! Try these example tasks:
### Simple Tasks (Test the Basics)
<CardGroup cols={2}>
<Card title="Take a Screenshot" icon="camera">
"Take a screenshot of the desktop"
</Card>
<Card title="Open Browser" icon="globe">
"Open Firefox and go to google.com"
</Card>
<Card title="Create File" icon="file">
"Create a text file called 'hello.txt' with today's date"
</Card>
<Card title="System Info" icon="info">
"Check the system information and tell me the OS version"
</Card>
</CardGroup>
### Advanced Tasks (See the Power)
<CardGroup cols={2}>
<Card title="Web Research" icon="magnifying-glass">
"Find the top 5 AI news stories today and create a summary document"
</Card>
<Card title="Data Extraction" icon="table">
"Go to hacker news, find the top 10 stories, and save them to a CSV file"
</Card>
<Card title="Document Processing" icon="file-pdf">
"Upload a PDF contract and extract all payment terms and deadlines"
</Card>
<Card title="Multi-Step Workflow" icon="layers">
"Search for 'machine learning tutorials', open the first 3 results in tabs, and take screenshots of each"
</Card>
</CardGroup>
## Accessing Your Services
| Service | URL | Purpose |
| ---------------- | ------------------------------------------------------------------------ | --------------------------------------------- |
| **Tasks UI** | [http://localhost:9992](http://localhost:9992) | Main interface for interacting with the agent |
| **Agent API** | [http://localhost:9991/tasks](http://localhost:9991/tasks) | REST API for programmatic task creation |
| **Desktop API** | [http://localhost:9990/computer-use](http://localhost:9990/computer-use) | Low-level desktop control API |
| **MCP SSE** | [http://localhost:9990/mcp](http://localhost:9990/mcp) | Connect MCP clients for tool access |
</Tab>
<Tab title="Kubernetes/Helm">
## ☸️ Deploy with Helm
See our [Helm deployment guide](/deployment/helm) for Kubernetes installation.
</Tab>
<Tab title="Desktop Only">
## 🖥️ Desktop Container Only
If you just want the virtual desktop without the AI agent:
```bash
# Using pre-built image (recommended)
docker-compose -f docker/docker-compose.core.yml pull
docker-compose -f docker/docker-compose.core.yml up -d
```
Or build locally:
```bash
docker-compose -f docker/docker-compose.core.yml up -d --build
```
Access the desktop at [http://localhost:9990/vnc](http://localhost:9990/vnc)
</Tab>
</Tabs>
## Managing Your Agent
### View Logs
Monitor what your agent is doing:
```bash
# All services
docker-compose -f docker/docker-compose.yml logs -f
# Just the agent
docker-compose -f docker/docker-compose.yml logs -f bytebot-agent
```
### Stop Services
```bash
docker-compose -f docker/docker-compose.yml down
```
### Update to Latest
```bash
docker-compose -f docker/docker-compose.yml pull
docker-compose -f docker/docker-compose.yml up -d
```
### Reset Everything
Remove all data and start fresh:
```bash
docker-compose -f docker/docker-compose.yml down -v
```
## Quick API Examples
### Create a Task via API
```bash
# Simple task
curl -X POST http://localhost:9991/tasks \
-H "Content-Type: application/json" \
-d '{
"description": "Search for flights from NYC to London next month",
"priority": "MEDIUM"
}'
# Task with file upload
curl -X POST http://localhost:9991/tasks \
-F "description=Read this contract and summarize the key terms" \
-F "priority=HIGH" \
-F "files=@contract.pdf"
```
### Direct Desktop Control
```bash
# Take a screenshot
curl -X POST http://localhost:9990/computer-use \
-H "Content-Type: application/json" \
-d '{"action": "screenshot"}'
# Type text
curl -X POST http://localhost:9990/computer-use \
-H "Content-Type: application/json" \
-d '{"action": "type_text", "text": "Hello, Bytebot!"}'
```
## Troubleshooting
<AccordionGroup>
<Accordion title="Container won't start">
Check Docker is running and you have enough resources:
```bash
docker info
docker-compose -f docker/docker-compose.yml logs
```
</Accordion>
<Accordion title="Can't connect to tasks UI">
Ensure all services are running:
```bash
docker-compose -f docker/docker-compose.yml ps
```
All services should show as "Up".
</Accordion>
<Accordion title="Agent errors or no response">
Check your API key is set correctly:
```bash
cat docker/.env
docker-compose -f docker/docker-compose.yml logs bytebot-agent
```
Ensure you're using a valid API key from Anthropic, OpenAI, or Google.
</Accordion>
</AccordionGroup>
## 📚 Next Steps
<CardGroup cols={2}>
<Card
title="Using the UI"
icon="window"
href="/guides/task-creation"
>
Learn how to create and manage tasks effectively
</Card>
<Card title="Takeover Mode" icon="hand" href="/guides/takeover-mode">
Take control when you need to guide Bytebot
</Card>
<Card title="LiteLLM Integration" icon="plug" href="/deployment/litellm">
Use any LLM provider with Bytebot
</Card>
<Card title="API Integration" icon="code" href="/api-reference/introduction">
Automate Bytebot with your applications
</Card>
</CardGroup>
## 🔧 Configuration Options
### Environment Variables
<AccordionGroup>
<Accordion title="AI Provider Settings">
```bash
# Choose one AI provider:
ANTHROPIC_API_KEY=sk-ant-... # For Claude models
OPENAI_API_KEY=sk-... # For GPT models
GEMINI_API_KEY=... # For Gemini models
# Optional: Use specific models
ANTHROPIC_MODEL=claude-3-5-sonnet-20241022 # Default
OPENAI_MODEL=gpt-4o
GEMINI_MODEL=gemini-1.5-flash
```
</Accordion>
<Accordion title="Port Configuration">
```bash
# Change default ports if needed
# Edit docker-compose.yml ports section:
# bytebot-ui:
gitextract_hhcv8dmu/
├── .github/
│ └── workflows/
│ ├── build-agent.yaml
│ ├── build-desktop.yaml
│ └── build-ui.yaml
├── .gitignore
├── .prettierignore
├── LICENSE
├── README.md
├── docker/
│ ├── bytebot-desktop.Dockerfile
│ ├── docker-compose-claude-code.yml
│ ├── docker-compose.core.yml
│ ├── docker-compose.development.yml
│ ├── docker-compose.proxy.yml
│ └── docker-compose.yml
├── docs/
│ ├── api-reference/
│ │ ├── agent/
│ │ │ ├── tasks.mdx
│ │ │ └── ui.mdx
│ │ ├── computer-use/
│ │ │ ├── examples.mdx
│ │ │ ├── openapi.json
│ │ │ └── unified-endpoint.mdx
│ │ ├── endpoint/
│ │ │ ├── create.mdx
│ │ │ ├── delete.mdx
│ │ │ ├── get.mdx
│ │ │ └── webhook.mdx
│ │ ├── introduction.mdx
│ │ └── openapi.json
│ ├── core-concepts/
│ │ ├── agent-system.mdx
│ │ ├── architecture.mdx
│ │ ├── desktop-environment.mdx
│ │ └── rpa-comparison.mdx
│ ├── deployment/
│ │ ├── helm.mdx
│ │ ├── litellm.mdx
│ │ └── railway.mdx
│ ├── docs.json
│ ├── guides/
│ │ ├── password-management.mdx
│ │ ├── takeover-mode.mdx
│ │ └── task-creation.mdx
│ ├── introduction.mdx
│ ├── quickstart.mdx
│ └── rest-api/
│ ├── computer-use.mdx
│ ├── examples.mdx
│ ├── input-tracking.mdx
│ └── introduction.mdx
├── helm/
│ ├── Chart.yaml
│ ├── README.md
│ ├── charts/
│ │ ├── bytebot-agent/
│ │ │ ├── Chart.yaml
│ │ │ ├── templates/
│ │ │ │ ├── _helpers.tpl
│ │ │ │ ├── deployment.yaml
│ │ │ │ ├── ingress.yaml
│ │ │ │ ├── secret.yaml
│ │ │ │ └── service.yaml
│ │ │ └── values.yaml
│ │ ├── bytebot-desktop/
│ │ │ ├── Chart.yaml
│ │ │ ├── templates/
│ │ │ │ ├── _helpers.tpl
│ │ │ │ ├── deployment.yaml
│ │ │ │ ├── ingress.yaml
│ │ │ │ ├── pvc.yaml
│ │ │ │ └── service.yaml
│ │ │ └── values.yaml
│ │ ├── bytebot-llm-proxy/
│ │ │ ├── Chart.yaml
│ │ │ ├── templates/
│ │ │ │ ├── _helpers.tpl
│ │ │ │ ├── configmap.yaml
│ │ │ │ ├── deployment.yaml
│ │ │ │ ├── ingress.yaml
│ │ │ │ ├── secret.yaml
│ │ │ │ └── service.yaml
│ │ │ └── values.yaml
│ │ ├── bytebot-ui/
│ │ │ ├── Chart.yaml
│ │ │ ├── templates/
│ │ │ │ ├── _helpers.tpl
│ │ │ │ ├── deployment.yaml
│ │ │ │ ├── hpa.yaml
│ │ │ │ ├── ingress.yaml
│ │ │ │ └── service.yaml
│ │ │ └── values.yaml
│ │ └── postgresql/
│ │ ├── Chart.yaml
│ │ ├── templates/
│ │ │ ├── _helpers.tpl
│ │ │ ├── deployment.yaml
│ │ │ ├── secret.yaml
│ │ │ └── service.yaml
│ │ └── values.yaml
│ ├── templates/
│ │ ├── NOTES.txt
│ │ └── ingress.yaml
│ ├── values-proxy.yaml
│ ├── values-simple.yaml
│ └── values.yaml
└── packages/
├── bytebot-agent/
│ ├── .dockerignore
│ ├── .gitignore
│ ├── .prettierrc
│ ├── Dockerfile
│ ├── eslint.config.mjs
│ ├── nest-cli.json
│ ├── package.json
│ ├── prisma/
│ │ ├── migrations/
│ │ │ ├── 20250328022708_initial_migration/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250413053912_message_role/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250522200556_updated_task_structure/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250523162632_add_scheduling/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250529003255_tasks_control/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250530012753_tasks_control/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250619013027_add_better_auth_schema/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250622195148_add_user_to_task/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250706223912_model_picker/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250722041608_files/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250820172813_remove_auth/
│ │ │ │ └── migration.sql
│ │ │ └── migration_lock.toml
│ │ └── schema.prisma
│ ├── src/
│ │ ├── agent/
│ │ │ ├── agent.analytics.ts
│ │ │ ├── agent.computer-use.ts
│ │ │ ├── agent.constants.ts
│ │ │ ├── agent.module.ts
│ │ │ ├── agent.processor.ts
│ │ │ ├── agent.scheduler.ts
│ │ │ ├── agent.tools.ts
│ │ │ ├── agent.types.ts
│ │ │ └── input-capture.service.ts
│ │ ├── anthropic/
│ │ │ ├── anthropic.constants.ts
│ │ │ ├── anthropic.module.ts
│ │ │ ├── anthropic.service.ts
│ │ │ └── anthropic.tools.ts
│ │ ├── app.controller.ts
│ │ ├── app.module.ts
│ │ ├── app.service.ts
│ │ ├── google/
│ │ │ ├── google.constants.ts
│ │ │ ├── google.module.ts
│ │ │ ├── google.service.ts
│ │ │ └── google.tools.ts
│ │ ├── main.ts
│ │ ├── messages/
│ │ │ ├── messages.module.ts
│ │ │ └── messages.service.ts
│ │ ├── openai/
│ │ │ ├── openai.constants.ts
│ │ │ ├── openai.module.ts
│ │ │ ├── openai.service.ts
│ │ │ └── openai.tools.ts
│ │ ├── prisma/
│ │ │ ├── prisma.module.ts
│ │ │ └── prisma.service.ts
│ │ ├── proxy/
│ │ │ ├── proxy.module.ts
│ │ │ ├── proxy.service.ts
│ │ │ └── proxy.tools.ts
│ │ ├── summaries/
│ │ │ ├── summaries.modue.ts
│ │ │ └── summaries.service.ts
│ │ └── tasks/
│ │ ├── dto/
│ │ │ ├── add-task-message.dto.ts
│ │ │ ├── create-task.dto.ts
│ │ │ └── update-task.dto.ts
│ │ ├── tasks.controller.ts
│ │ ├── tasks.gateway.ts
│ │ ├── tasks.module.ts
│ │ └── tasks.service.ts
│ ├── tsconfig.build.json
│ └── tsconfig.json
├── bytebot-agent-cc/
│ ├── .dockerignore
│ ├── .gitignore
│ ├── .prettierrc
│ ├── Dockerfile
│ ├── eslint.config.mjs
│ ├── nest-cli.json
│ ├── package.json
│ ├── prisma/
│ │ ├── migrations/
│ │ │ ├── 20250328022708_initial_migration/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250413053912_message_role/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250522200556_updated_task_structure/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250523162632_add_scheduling/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250529003255_tasks_control/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250530012753_tasks_control/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250619013027_add_better_auth_schema/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250622195148_add_user_to_task/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250706223912_model_picker/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250722041608_files/
│ │ │ │ └── migration.sql
│ │ │ ├── 20250820172813_remove_auth/
│ │ │ │ └── migration.sql
│ │ │ └── migration_lock.toml
│ │ └── schema.prisma
│ ├── src/
│ │ ├── agent/
│ │ │ ├── agent.analytics.ts
│ │ │ ├── agent.computer-use.ts
│ │ │ ├── agent.constants.ts
│ │ │ ├── agent.module.ts
│ │ │ ├── agent.processor.ts
│ │ │ ├── agent.scheduler.ts
│ │ │ ├── agent.tools.ts
│ │ │ ├── agent.types.ts
│ │ │ └── input-capture.service.ts
│ │ ├── app.controller.ts
│ │ ├── app.module.ts
│ │ ├── app.service.ts
│ │ ├── main.ts
│ │ ├── messages/
│ │ │ ├── messages.module.ts
│ │ │ └── messages.service.ts
│ │ ├── prisma/
│ │ │ ├── prisma.module.ts
│ │ │ └── prisma.service.ts
│ │ └── tasks/
│ │ ├── dto/
│ │ │ ├── add-task-message.dto.ts
│ │ │ ├── create-task.dto.ts
│ │ │ └── update-task.dto.ts
│ │ ├── tasks.controller.ts
│ │ ├── tasks.gateway.ts
│ │ ├── tasks.module.ts
│ │ └── tasks.service.ts
│ ├── tsconfig.build.json
│ └── tsconfig.json
├── bytebot-llm-proxy/
│ ├── Dockerfile
│ └── litellm-config.yaml
├── bytebot-ui/
│ ├── .dockerignore
│ ├── .gitignore
│ ├── .prettierrc.json
│ ├── Dockerfile
│ ├── components.json
│ ├── eslint.config.mjs
│ ├── next.config.ts
│ ├── package.json
│ ├── postcss.config.mjs
│ ├── server.ts
│ ├── src/
│ │ ├── app/
│ │ │ ├── api/
│ │ │ │ └── [[...path]]/
│ │ │ │ └── route.ts
│ │ │ ├── desktop/
│ │ │ │ └── page.tsx
│ │ │ ├── globals.css
│ │ │ ├── layout.tsx
│ │ │ ├── page.tsx
│ │ │ └── tasks/
│ │ │ ├── [id]/
│ │ │ │ └── page.tsx
│ │ │ └── page.tsx
│ │ ├── components/
│ │ │ ├── VirtualDesktopStatusHeader.tsx
│ │ │ ├── layout/
│ │ │ │ └── Header.tsx
│ │ │ ├── messages/
│ │ │ │ ├── AssistantMessage.tsx
│ │ │ │ ├── ChatContainer.tsx
│ │ │ │ ├── ChatInput.tsx
│ │ │ │ ├── MessageAvatar.tsx
│ │ │ │ ├── MessageGroup.tsx
│ │ │ │ ├── UserMessage.tsx
│ │ │ │ └── content/
│ │ │ │ ├── ComputerToolContent.tsx
│ │ │ │ ├── ComputerToolContentNormal.tsx
│ │ │ │ ├── ComputerToolContentTakeOver.tsx
│ │ │ │ ├── ComputerToolUtils.tsx
│ │ │ │ ├── ErrorContent.tsx
│ │ │ │ ├── ImageContent.tsx
│ │ │ │ ├── MessageContent.tsx
│ │ │ │ └── TextContent.tsx
│ │ │ ├── screenshot/
│ │ │ │ └── ScreenshotViewer.tsx
│ │ │ ├── tasks/
│ │ │ │ ├── TaskItem.tsx
│ │ │ │ ├── TaskList.tsx
│ │ │ │ └── TaskTabs.tsx
│ │ │ ├── ui/
│ │ │ │ ├── TopicPopover.tsx
│ │ │ │ ├── button.tsx
│ │ │ │ ├── card.tsx
│ │ │ │ ├── copy-button.tsx
│ │ │ │ ├── desktop-container.tsx
│ │ │ │ ├── dropdown-menu.tsx
│ │ │ │ ├── input.tsx
│ │ │ │ ├── label.tsx
│ │ │ │ ├── loader.tsx
│ │ │ │ ├── pagination.tsx
│ │ │ │ ├── popover.tsx
│ │ │ │ ├── scroll-area.tsx
│ │ │ │ ├── select.tsx
│ │ │ │ ├── separator.tsx
│ │ │ │ ├── switch.tsx
│ │ │ │ └── text-shimmer.tsx
│ │ │ └── vnc/
│ │ │ └── VncViewer.tsx
│ │ ├── constants/
│ │ │ └── ui.constants.ts
│ │ ├── hooks/
│ │ │ ├── useChatSession.ts
│ │ │ ├── useScrollScreenshot.ts
│ │ │ └── useWebSocket.ts
│ │ ├── lib/
│ │ │ └── utils.ts
│ │ ├── types/
│ │ │ └── index.ts
│ │ └── utils/
│ │ ├── clipboard.ts
│ │ ├── screenshotUtils.ts
│ │ ├── stringUtils.ts
│ │ └── taskUtils.ts
│ └── tsconfig.json
├── bytebotd/
│ ├── .dockerignore
│ ├── .prettierrc
│ ├── Dockerfile
│ ├── eslint.config.mjs
│ ├── nest-cli.json
│ ├── package.json
│ ├── root/
│ │ ├── etc/
│ │ │ ├── firefox/
│ │ │ │ └── policies/
│ │ │ │ └── policies.json
│ │ │ ├── lightdm/
│ │ │ │ └── lightdm.conf.d/
│ │ │ │ └── 50-autologin.conf
│ │ │ ├── supervisor/
│ │ │ │ └── conf.d/
│ │ │ │ └── supervisord.conf
│ │ │ └── thunderbird/
│ │ │ └── policies/
│ │ │ └── policies.json
│ │ ├── home/
│ │ │ └── user/
│ │ │ ├── .config/
│ │ │ │ └── xfce4/
│ │ │ │ ├── desktop/
│ │ │ │ │ └── icons.screen0-1264x913.rc
│ │ │ │ ├── helpers.rc
│ │ │ │ ├── terminal/
│ │ │ │ │ └── accels.scm
│ │ │ │ └── xfconf/
│ │ │ │ └── xfce-perchannel-xml/
│ │ │ │ ├── displays.xml
│ │ │ │ ├── thunar.xml
│ │ │ │ ├── xfce4-appfinder.xml
│ │ │ │ ├── xfce4-desktop.xml
│ │ │ │ ├── xfce4-keyboard-shortcuts.xml
│ │ │ │ ├── xfce4-notifyd.xml
│ │ │ │ ├── xfce4-panel.xml
│ │ │ │ └── xfwm4.xml
│ │ │ └── .xsessionrc
│ │ └── usr/
│ │ └── share/
│ │ └── applications/
│ │ ├── 1password.desktop
│ │ ├── code.desktop
│ │ ├── firefox.desktop
│ │ ├── terminal.desktop
│ │ └── thunderbird.desktop
│ ├── src/
│ │ ├── app.controller.ts
│ │ ├── app.module.ts
│ │ ├── app.service.ts
│ │ ├── computer-use/
│ │ │ ├── computer-use.controller.ts
│ │ │ ├── computer-use.module.ts
│ │ │ ├── computer-use.service.ts
│ │ │ └── dto/
│ │ │ ├── base.dto.ts
│ │ │ ├── computer-action-validation.pipe.ts
│ │ │ └── computer-action.dto.ts
│ │ ├── input-tracking/
│ │ │ ├── input-tracking.controller.ts
│ │ │ ├── input-tracking.gateway.ts
│ │ │ ├── input-tracking.helpers.ts
│ │ │ ├── input-tracking.module.ts
│ │ │ └── input-tracking.service.ts
│ │ ├── main.ts
│ │ ├── mcp/
│ │ │ ├── bytebot-mcp.module.ts
│ │ │ ├── compressor.ts
│ │ │ ├── computer-use.tools.ts
│ │ │ └── index.ts
│ │ └── nut/
│ │ ├── nut.module.ts
│ │ └── nut.service.ts
│ ├── tsconfig.build.json
│ └── tsconfig.json
└── shared/
├── package.json
├── src/
│ ├── index.ts
│ ├── types/
│ │ ├── computerAction.types.ts
│ │ └── messageContent.types.ts
│ └── utils/
│ ├── computerAction.utils.ts
│ └── messageContent.utils.ts
└── tsconfig.json
SYMBOL INDEX (648 symbols across 143 files)
FILE: packages/bytebot-agent-cc/prisma/migrations/20250328022708_initial_migration/migration.sql
type "Task" (line 11) | CREATE TABLE "Task" (
type "Summary" (line 23) | CREATE TABLE "Summary" (
type "Message" (line 35) | CREATE TABLE "Message" (
FILE: packages/bytebot-agent-cc/prisma/migrations/20250619013027_add_better_auth_schema/migration.sql
type "User" (line 5) | CREATE TABLE "User" (
type "Session" (line 18) | CREATE TABLE "Session" (
type "Account" (line 32) | CREATE TABLE "Account" (
type "Verification" (line 51) | CREATE TABLE "Verification" (
type "User" (line 63) | CREATE UNIQUE INDEX "User_email_key" ON "User"("email")
type "Session" (line 66) | CREATE UNIQUE INDEX "Session_token_key" ON "Session"("token")
type "Account" (line 69) | CREATE UNIQUE INDEX "Account_providerId_accountId_key" ON "Account"("pro...
type "Verification" (line 72) | CREATE UNIQUE INDEX "Verification_identifier_value_key" ON "Verification...
FILE: packages/bytebot-agent-cc/prisma/migrations/20250722041608_files/migration.sql
type "File" (line 2) | CREATE TABLE "File" (
FILE: packages/bytebot-agent-cc/src/agent/agent.analytics.ts
class AgentAnalyticsService (line 8) | class AgentAnalyticsService {
method constructor (line 12) | constructor(
method handleTaskEvent (line 28) | async handleTaskEvent(payload: { taskId: string }) {
FILE: packages/bytebot-agent-cc/src/agent/agent.computer-use.ts
constant BYTEBOT_DESKTOP_BASE_URL (line 26) | const BYTEBOT_DESKTOP_BASE_URL = process.env.BYTEBOT_DESKTOP_BASE_URL as...
function handleComputerToolUse (line 28) | async function handleComputerToolUse(
function moveMouse (line 236) | async function moveMouse(input: { coordinates: Coordinates }): Promise<v...
function traceMouse (line 257) | async function traceMouse(input: {
function clickMouse (line 282) | async function clickMouse(input: {
function pressMouse (line 311) | async function pressMouse(input: {
function dragMouse (line 338) | async function dragMouse(input: {
function scroll (line 365) | async function scroll(input: {
function typeKeys (line 394) | async function typeKeys(input: {
function pressKeys (line 417) | async function pressKeys(input: {
function typeText (line 440) | async function typeText(input: {
function pasteText (line 463) | async function pasteText(input: { text: string }): Promise<void> {
function wait (line 482) | async function wait(input: { duration: number }): Promise<void> {
function cursorPosition (line 501) | async function cursorPosition(): Promise<Coordinates> {
function screenshot (line 521) | async function screenshot(): Promise<string> {
function application (line 552) | async function application(input: { application: string }): Promise<void> {
function readFile (line 571) | async function readFile(input: { path: string }): Promise<{
function writeFile (line 607) | async function writeFile(input: {
FILE: packages/bytebot-agent-cc/src/agent/agent.constants.ts
constant DEFAULT_DISPLAY_SIZE (line 1) | const DEFAULT_DISPLAY_SIZE = {
constant SUMMARIZATION_SYSTEM_PROMPT (line 6) | const SUMMARIZATION_SYSTEM_PROMPT = `You are a helpful assistant that su...
constant AGENT_SYSTEM_PROMPT (line 17) | const AGENT_SYSTEM_PROMPT = `
FILE: packages/bytebot-agent-cc/src/agent/agent.module.ts
class AgentModule (line 20) | class AgentModule {}
FILE: packages/bytebot-agent-cc/src/agent/agent.processor.ts
class AgentProcessor (line 43) | class AgentProcessor {
method constructor (line 52) | constructor(
method isRunning (line 63) | isRunning(): boolean {
method getCurrentTaskId (line 70) | getCurrentTaskId(): string | null {
method handleTaskTakeover (line 75) | handleTaskTakeover({ taskId }: { taskId: string }) {
method handleTaskResume (line 88) | handleTaskResume({ taskId }: { taskId: string }) {
method handleTaskCancel (line 98) | async handleTaskCancel({ taskId }: { taskId: string }) {
method processTask (line 104) | processTask(taskId: string) {
method formatAnthropicResponse (line 123) | private formatAnthropicResponse(
method runIteration (line 164) | private async runIteration(taskId: string): Promise<void> {
method stopProcessing (line 275) | async stopProcessing(): Promise<void> {
FILE: packages/bytebot-agent-cc/src/agent/agent.scheduler.ts
class AgentScheduler (line 9) | class AgentScheduler implements OnModuleInit {
method constructor (line 12) | constructor(
method onModuleInit (line 17) | async onModuleInit() {
method handleCron (line 23) | async handleCron() {
FILE: packages/bytebot-agent-cc/src/agent/agent.types.ts
type BytebotAgentResponse (line 4) | interface BytebotAgentResponse {
type BytebotAgentService (line 13) | interface BytebotAgentService {
type BytebotAgentModel (line 23) | interface BytebotAgentModel {
class BytebotAgentInterrupt (line 30) | class BytebotAgentInterrupt extends Error {
method constructor (line 31) | constructor() {
FILE: packages/bytebot-agent-cc/src/agent/input-capture.service.ts
class InputCaptureService (line 24) | class InputCaptureService {
method constructor (line 29) | constructor(
method isCapturing (line 34) | isCapturing() {
method start (line 38) | start(taskId: string) {
method stop (line 161) | async stop() {
FILE: packages/bytebot-agent-cc/src/app.controller.ts
class AppController (line 5) | class AppController {
method constructor (line 6) | constructor(private readonly appService: AppService) {}
method getHello (line 9) | getHello(): string {
FILE: packages/bytebot-agent-cc/src/app.module.ts
class AppModule (line 27) | class AppModule {}
FILE: packages/bytebot-agent-cc/src/app.service.ts
class AppService (line 4) | class AppService {
method getHello (line 5) | getHello(): string {
FILE: packages/bytebot-agent-cc/src/main.ts
function bootstrap (line 11) | async function bootstrap() {
FILE: packages/bytebot-agent-cc/src/messages/messages.module.ts
class MessagesModule (line 11) | class MessagesModule {}
FILE: packages/bytebot-agent-cc/src/messages/messages.service.ts
type ProcessedMessage (line 18) | interface ProcessedMessage extends Message {
type GroupedMessages (line 22) | interface GroupedMessages {
class MessagesService (line 29) | class MessagesService {
method constructor (line 30) | constructor(
method create (line 36) | async create(data: {
method findEvery (line 54) | async findEvery(taskId: string): Promise<Message[]> {
method findAll (line 65) | async findAll(
method findUnsummarized (line 89) | async findUnsummarized(taskId: string): Promise<Message[]> {
method attachSummary (line 100) | async attachSummary(
method groupBackToBackMessages (line 118) | private groupBackToBackMessages(
method filterMessages (line 164) | private filterMessages(messages: Message[]): ProcessedMessage[] {
method findRawMessages (line 201) | async findRawMessages(
method findProcessedMessages (line 214) | async findProcessedMessages(
FILE: packages/bytebot-agent-cc/src/prisma/prisma.module.ts
class PrismaModule (line 10) | class PrismaModule {}
FILE: packages/bytebot-agent-cc/src/prisma/prisma.service.ts
class PrismaService (line 5) | class PrismaService extends PrismaClient implements OnModuleInit {
method constructor (line 6) | constructor() {
method onModuleInit (line 10) | async onModuleInit() {
FILE: packages/bytebot-agent-cc/src/tasks/dto/add-task-message.dto.ts
class AddTaskMessageDto (line 3) | class AddTaskMessageDto {
FILE: packages/bytebot-agent-cc/src/tasks/dto/create-task.dto.ts
class TaskFileDto (line 13) | class TaskFileDto {
class CreateTaskDto (line 31) | class CreateTaskDto {
FILE: packages/bytebot-agent-cc/src/tasks/dto/update-task.dto.ts
class UpdateTaskDto (line 4) | class UpdateTaskDto {
FILE: packages/bytebot-agent-cc/src/tasks/tasks.controller.ts
class TasksController (line 20) | class TasksController {
method constructor (line 21) | constructor(
method create (line 28) | async create(@Body() createTaskDto: CreateTaskDto): Promise<Task> {
method findAll (line 33) | async findAll(
method getModels (line 54) | async getModels() {
method findById (line 66) | async findById(@Param('id') id: string): Promise<Task> {
method taskMessages (line 71) | async taskMessages(
method addTaskMessage (line 87) | async addTaskMessage(
method taskRawMessages (line 95) | async taskRawMessages(
method taskProcessedMessages (line 109) | async taskProcessedMessages(
method delete (line 124) | async delete(@Param('id') id: string): Promise<void> {
method takeOver (line 130) | async takeOver(@Param('id') taskId: string): Promise<Task> {
method resume (line 136) | async resume(@Param('id') taskId: string): Promise<Task> {
method cancel (line 142) | async cancel(@Param('id') taskId: string): Promise<Task> {
FILE: packages/bytebot-agent-cc/src/tasks/tasks.gateway.ts
class TasksGateway (line 18) | class TasksGateway implements OnGatewayConnection, OnGatewayDisconnect {
method handleConnection (line 22) | handleConnection(client: Socket) {
method handleDisconnect (line 26) | handleDisconnect(client: Socket) {
method handleJoinTask (line 31) | handleJoinTask(client: Socket, taskId: string) {
method handleLeaveTask (line 37) | handleLeaveTask(client: Socket, taskId: string) {
method emitTaskUpdate (line 42) | emitTaskUpdate(taskId: string, task: any) {
method emitNewMessage (line 46) | emitNewMessage(taskId: string, message: any) {
method emitTaskCreated (line 50) | emitTaskCreated(task: any) {
method emitTaskDeleted (line 54) | emitTaskDeleted(taskId: string) {
FILE: packages/bytebot-agent-cc/src/tasks/tasks.module.ts
class TasksModule (line 14) | class TasksModule {}
FILE: packages/bytebot-agent-cc/src/tasks/tasks.service.ts
class TasksService (line 27) | class TasksService {
method constructor (line 30) | constructor(
method create (line 40) | async create(createTaskDto: CreateTaskDto): Promise<Task> {
method findScheduledTasks (line 119) | async findScheduledTasks(): Promise<Task[]> {
method findNextTask (line 131) | async findNextTask(): Promise<(Task & { files: File[] }) | null> {
method findAll (line 158) | async findAll(
method findById (line 192) | async findById(id: string): Promise<Task> {
method update (line 217) | async update(id: string, updateTaskDto: UpdateTaskDto): Promise<Task> {
method delete (line 249) | async delete(id: string): Promise<Task> {
method addTaskMessage (line 263) | async addTaskMessage(taskId: string, addTaskMessageDto: AddTaskMessage...
method resume (line 282) | async resume(taskId: string): Promise<Task> {
method takeOver (line 320) | async takeOver(taskId: string): Promise<Task> {
method cancel (line 359) | async cancel(taskId: string): Promise<Task> {
FILE: packages/bytebot-agent/prisma/migrations/20250328022708_initial_migration/migration.sql
type "Task" (line 11) | CREATE TABLE "Task" (
type "Summary" (line 23) | CREATE TABLE "Summary" (
type "Message" (line 35) | CREATE TABLE "Message" (
FILE: packages/bytebot-agent/prisma/migrations/20250619013027_add_better_auth_schema/migration.sql
type "User" (line 5) | CREATE TABLE "User" (
type "Session" (line 18) | CREATE TABLE "Session" (
type "Account" (line 32) | CREATE TABLE "Account" (
type "Verification" (line 51) | CREATE TABLE "Verification" (
type "User" (line 63) | CREATE UNIQUE INDEX "User_email_key" ON "User"("email")
type "Session" (line 66) | CREATE UNIQUE INDEX "Session_token_key" ON "Session"("token")
type "Account" (line 69) | CREATE UNIQUE INDEX "Account_providerId_accountId_key" ON "Account"("pro...
type "Verification" (line 72) | CREATE UNIQUE INDEX "Verification_identifier_value_key" ON "Verification...
FILE: packages/bytebot-agent/prisma/migrations/20250722041608_files/migration.sql
type "File" (line 2) | CREATE TABLE "File" (
FILE: packages/bytebot-agent/src/agent/agent.analytics.ts
class AgentAnalyticsService (line 8) | class AgentAnalyticsService {
method constructor (line 12) | constructor(
method handleTaskEvent (line 28) | async handleTaskEvent(payload: { taskId: string }) {
FILE: packages/bytebot-agent/src/agent/agent.computer-use.ts
constant BYTEBOT_DESKTOP_BASE_URL (line 26) | const BYTEBOT_DESKTOP_BASE_URL = process.env.BYTEBOT_DESKTOP_BASE_URL as...
function handleComputerToolUse (line 28) | async function handleComputerToolUse(
function moveMouse (line 241) | async function moveMouse(input: { coordinates: Coordinates }): Promise<v...
function traceMouse (line 262) | async function traceMouse(input: {
function clickMouse (line 287) | async function clickMouse(input: {
function pressMouse (line 316) | async function pressMouse(input: {
function dragMouse (line 343) | async function dragMouse(input: {
function scroll (line 370) | async function scroll(input: {
function typeKeys (line 399) | async function typeKeys(input: {
function pressKeys (line 422) | async function pressKeys(input: {
function typeText (line 445) | async function typeText(input: {
function pasteText (line 468) | async function pasteText(input: { text: string }): Promise<void> {
function wait (line 487) | async function wait(input: { duration: number }): Promise<void> {
function cursorPosition (line 506) | async function cursorPosition(): Promise<Coordinates> {
function screenshot (line 526) | async function screenshot(): Promise<string> {
function application (line 557) | async function application(input: { application: string }): Promise<void> {
function readFile (line 576) | async function readFile(input: { path: string }): Promise<{
function writeFile (line 612) | async function writeFile(input: {
FILE: packages/bytebot-agent/src/agent/agent.constants.ts
constant DEFAULT_DISPLAY_SIZE (line 1) | const DEFAULT_DISPLAY_SIZE = {
constant SUMMARIZATION_SYSTEM_PROMPT (line 6) | const SUMMARIZATION_SYSTEM_PROMPT = `You are a helpful assistant that su...
constant AGENT_SYSTEM_PROMPT (line 17) | const AGENT_SYSTEM_PROMPT = `
FILE: packages/bytebot-agent/src/agent/agent.module.ts
class AgentModule (line 34) | class AgentModule {}
FILE: packages/bytebot-agent/src/agent/agent.processor.ts
class AgentProcessor (line 44) | class AgentProcessor {
method constructor (line 51) | constructor(
method isRunning (line 73) | isRunning(): boolean {
method getCurrentTaskId (line 80) | getCurrentTaskId(): string | null {
method handleTaskTakeover (line 85) | handleTaskTakeover({ taskId }: { taskId: string }) {
method handleTaskResume (line 98) | handleTaskResume({ taskId }: { taskId: string }) {
method handleTaskCancel (line 108) | async handleTaskCancel({ taskId }: { taskId: string }) {
method processTask (line 114) | processTask(taskId: string) {
method runIteration (line 134) | private async runIteration(taskId: string): Promise<void> {
method stopProcessing (line 405) | async stopProcessing(): Promise<void> {
FILE: packages/bytebot-agent/src/agent/agent.scheduler.ts
class AgentScheduler (line 9) | class AgentScheduler implements OnModuleInit {
method constructor (line 12) | constructor(
method onModuleInit (line 17) | async onModuleInit() {
method handleCron (line 23) | async handleCron() {
FILE: packages/bytebot-agent/src/agent/agent.types.ts
type BytebotAgentResponse (line 4) | interface BytebotAgentResponse {
type BytebotAgentService (line 13) | interface BytebotAgentService {
type BytebotAgentModel (line 23) | interface BytebotAgentModel {
class BytebotAgentInterrupt (line 30) | class BytebotAgentInterrupt extends Error {
method constructor (line 31) | constructor() {
FILE: packages/bytebot-agent/src/agent/input-capture.service.ts
class InputCaptureService (line 24) | class InputCaptureService {
method constructor (line 29) | constructor(
method isCapturing (line 34) | isCapturing() {
method start (line 38) | start(taskId: string) {
method stop (line 161) | async stop() {
FILE: packages/bytebot-agent/src/anthropic/anthropic.constants.ts
constant ANTHROPIC_MODELS (line 3) | const ANTHROPIC_MODELS: BytebotAgentModel[] = [
constant DEFAULT_MODEL (line 18) | const DEFAULT_MODEL = ANTHROPIC_MODELS[0];
FILE: packages/bytebot-agent/src/anthropic/anthropic.module.ts
class AnthropicModule (line 10) | class AnthropicModule {}
FILE: packages/bytebot-agent/src/anthropic/anthropic.service.ts
class AnthropicService (line 24) | class AnthropicService implements BytebotAgentService {
method constructor (line 28) | constructor(private readonly configService: ConfigService) {
method generateMessage (line 42) | async generateMessage(
method formatMessagesForAnthropic (line 107) | private formatMessagesForAnthropic(
method formatAnthropicResponse (line 159) | private formatAnthropicResponse(
FILE: packages/bytebot-agent/src/anthropic/anthropic.tools.ts
function agentToolToAnthropicTool (line 7) | function agentToolToAnthropicTool(agentTool: any): Anthropic.Tool {
FILE: packages/bytebot-agent/src/app.controller.ts
class AppController (line 5) | class AppController {
method constructor (line 6) | constructor(private readonly appService: AppService) {}
method getHello (line 9) | getHello(): string {
FILE: packages/bytebot-agent/src/app.module.ts
class AppModule (line 37) | class AppModule {}
FILE: packages/bytebot-agent/src/app.service.ts
class AppService (line 4) | class AppService {
method getHello (line 5) | getHello(): string {
FILE: packages/bytebot-agent/src/google/google.constants.ts
constant GOOGLE_MODELS (line 3) | const GOOGLE_MODELS: BytebotAgentModel[] = [
constant DEFAULT_MODEL (line 18) | const DEFAULT_MODEL = GOOGLE_MODELS[0];
FILE: packages/bytebot-agent/src/google/google.module.ts
class GoogleModule (line 10) | class GoogleModule {}
FILE: packages/bytebot-agent/src/google/google.service.ts
class GoogleService (line 30) | class GoogleService implements BytebotAgentService {
method constructor (line 34) | constructor(private readonly configService: ConfigService) {
method generateMessage (line 48) | async generateMessage(
method formatMessagesForGoogle (line 121) | private formatMessagesForGoogle(messages: Message[]): Content[] {
method getToolName (line 237) | private getToolName(
method formatGoogleResponse (line 264) | private formatGoogleResponse(parts: Part[]): MessageContentBlock[] {
FILE: packages/bytebot-agent/src/google/google.tools.ts
function jsonSchemaTypeToGoogleType (line 7) | function jsonSchemaTypeToGoogleType(type: string): Type {
function convertJsonSchemaToGoogleSchema (line 29) | function convertJsonSchemaToGoogleSchema(schema: any): any {
function agentToolToGoogleTool (line 69) | function agentToolToGoogleTool(agentTool: any): FunctionDeclaration {
FILE: packages/bytebot-agent/src/main.ts
function bootstrap (line 11) | async function bootstrap() {
FILE: packages/bytebot-agent/src/messages/messages.module.ts
class MessagesModule (line 11) | class MessagesModule {}
FILE: packages/bytebot-agent/src/messages/messages.service.ts
type ProcessedMessage (line 18) | interface ProcessedMessage extends Message {
type GroupedMessages (line 22) | interface GroupedMessages {
class MessagesService (line 29) | class MessagesService {
method constructor (line 30) | constructor(
method create (line 36) | async create(data: {
method findEvery (line 54) | async findEvery(taskId: string): Promise<Message[]> {
method findAll (line 65) | async findAll(
method findUnsummarized (line 89) | async findUnsummarized(taskId: string): Promise<Message[]> {
method attachSummary (line 100) | async attachSummary(
method groupBackToBackMessages (line 118) | private groupBackToBackMessages(
method filterMessages (line 164) | private filterMessages(messages: Message[]): ProcessedMessage[] {
method findRawMessages (line 201) | async findRawMessages(
method findProcessedMessages (line 214) | async findProcessedMessages(
FILE: packages/bytebot-agent/src/openai/openai.constants.ts
constant OPENAI_MODELS (line 3) | const OPENAI_MODELS: BytebotAgentModel[] = [
constant DEFAULT_MODEL (line 18) | const DEFAULT_MODEL = OPENAI_MODELS[0];
FILE: packages/bytebot-agent/src/openai/openai.module.ts
class OpenAIModule (line 10) | class OpenAIModule {}
FILE: packages/bytebot-agent/src/openai/openai.service.ts
class OpenAIService (line 25) | class OpenAIService implements BytebotAgentService {
method constructor (line 29) | constructor(private readonly configService: ConfigService) {
method generateMessage (line 43) | async generateMessage(
method formatMessagesForOpenAI (line 93) | private formatMessagesForOpenAI(
method formatOpenAIResponse (line 242) | private formatOpenAIResponse(
FILE: packages/bytebot-agent/src/openai/openai.tools.ts
function agentToolToOpenAITool (line 4) | function agentToolToOpenAITool(agentTool: any): OpenAI.Responses.Functio...
FILE: packages/bytebot-agent/src/prisma/prisma.module.ts
class PrismaModule (line 10) | class PrismaModule {}
FILE: packages/bytebot-agent/src/prisma/prisma.service.ts
class PrismaService (line 5) | class PrismaService extends PrismaClient implements OnModuleInit {
method constructor (line 6) | constructor() {
method onModuleInit (line 10) | async onModuleInit() {
FILE: packages/bytebot-agent/src/proxy/proxy.module.ts
class ProxyModule (line 10) | class ProxyModule {}
FILE: packages/bytebot-agent/src/proxy/proxy.service.ts
class ProxyService (line 29) | class ProxyService implements BytebotAgentService {
method constructor (line 33) | constructor(private readonly configService: ConfigService) {
method generateMessage (line 52) | async generateMessage(
method formatMessagesForChatCompletion (line 114) | private formatMessagesForChatCompletion(
method formatChatCompletionResponse (line 272) | private formatChatCompletionResponse(
FILE: packages/bytebot-agent/src/proxy/proxy.tools.ts
function agentToolToChatCompletionTool (line 7) | function agentToolToChatCompletionTool(agentTool: any): ChatCompletionTo...
function convertToCamelCase (line 21) | function convertToCamelCase(name: string): string {
FILE: packages/bytebot-agent/src/summaries/summaries.modue.ts
class SummariesModule (line 10) | class SummariesModule {}
FILE: packages/bytebot-agent/src/summaries/summaries.service.ts
class SummariesService (line 6) | class SummariesService {
method constructor (line 7) | constructor(private prisma: PrismaService) {}
method create (line 9) | async create(data: {
method findLatest (line 23) | async findLatest(taskId: string): Promise<Summary | null> {
method findAll (line 30) | async findAll(taskId: string): Promise<Summary[]> {
FILE: packages/bytebot-agent/src/tasks/dto/add-task-message.dto.ts
class AddTaskMessageDto (line 3) | class AddTaskMessageDto {
FILE: packages/bytebot-agent/src/tasks/dto/create-task.dto.ts
class TaskFileDto (line 13) | class TaskFileDto {
class CreateTaskDto (line 31) | class CreateTaskDto {
FILE: packages/bytebot-agent/src/tasks/dto/update-task.dto.ts
class UpdateTaskDto (line 4) | class UpdateTaskDto {
FILE: packages/bytebot-agent/src/tasks/tasks.controller.ts
class TasksController (line 36) | class TasksController {
method constructor (line 37) | constructor(
method create (line 44) | async create(@Body() createTaskDto: CreateTaskDto): Promise<Task> {
method findAll (line 49) | async findAll(
method getModels (line 70) | async getModels() {
method findById (line 114) | async findById(@Param('id') id: string): Promise<Task> {
method taskMessages (line 119) | async taskMessages(
method addTaskMessage (line 135) | async addTaskMessage(
method taskRawMessages (line 143) | async taskRawMessages(
method taskProcessedMessages (line 157) | async taskProcessedMessages(
method delete (line 172) | async delete(@Param('id') id: string): Promise<void> {
method takeOver (line 178) | async takeOver(@Param('id') taskId: string): Promise<Task> {
method resume (line 184) | async resume(@Param('id') taskId: string): Promise<Task> {
method cancel (line 190) | async cancel(@Param('id') taskId: string): Promise<Task> {
FILE: packages/bytebot-agent/src/tasks/tasks.gateway.ts
class TasksGateway (line 18) | class TasksGateway implements OnGatewayConnection, OnGatewayDisconnect {
method handleConnection (line 22) | handleConnection(client: Socket) {
method handleDisconnect (line 26) | handleDisconnect(client: Socket) {
method handleJoinTask (line 31) | handleJoinTask(client: Socket, taskId: string) {
method handleLeaveTask (line 37) | handleLeaveTask(client: Socket, taskId: string) {
method emitTaskUpdate (line 42) | emitTaskUpdate(taskId: string, task: any) {
method emitNewMessage (line 46) | emitNewMessage(taskId: string, message: any) {
method emitTaskCreated (line 50) | emitTaskCreated(task: any) {
method emitTaskDeleted (line 54) | emitTaskDeleted(taskId: string) {
FILE: packages/bytebot-agent/src/tasks/tasks.module.ts
class TasksModule (line 14) | class TasksModule {}
FILE: packages/bytebot-agent/src/tasks/tasks.service.ts
class TasksService (line 27) | class TasksService {
method constructor (line 30) | constructor(
method create (line 40) | async create(createTaskDto: CreateTaskDto): Promise<Task> {
method findScheduledTasks (line 119) | async findScheduledTasks(): Promise<Task[]> {
method findNextTask (line 131) | async findNextTask(): Promise<(Task & { files: File[] }) | null> {
method findAll (line 158) | async findAll(
method findById (line 192) | async findById(id: string): Promise<Task> {
method update (line 217) | async update(id: string, updateTaskDto: UpdateTaskDto): Promise<Task> {
method delete (line 249) | async delete(id: string): Promise<Task> {
method addTaskMessage (line 263) | async addTaskMessage(taskId: string, addTaskMessageDto: AddTaskMessage...
method resume (line 282) | async resume(taskId: string): Promise<Task> {
method takeOver (line 320) | async takeOver(taskId: string): Promise<Task> {
method cancel (line 359) | async cancel(taskId: string): Promise<Task> {
FILE: packages/bytebot-ui/server.ts
constant BYTEBOT_AGENT_BASE_URL (line 16) | const BYTEBOT_AGENT_BASE_URL = process.env.BYTEBOT_AGENT_BASE_URL;
constant BYTEBOT_DESKTOP_VNC_URL (line 17) | const BYTEBOT_DESKTOP_VNC_URL = process.env.BYTEBOT_DESKTOP_VNC_URL;
FILE: packages/bytebot-ui/src/app/api/[[...path]]/route.ts
function proxy (line 6) | async function proxy(req: NextRequest, path: string[]): Promise<Response> {
type PathParams (line 51) | type PathParams = Promise<{ path?: string[] }>;
function handler (line 53) | async function handler(req: NextRequest, { params }: { params: PathParam...
constant GET (line 58) | const GET = handler;
constant POST (line 59) | const POST = handler;
constant PUT (line 60) | const PUT = handler;
constant PATCH (line 61) | const PATCH = handler;
constant DELETE (line 62) | const DELETE = handler;
constant OPTIONS (line 63) | const OPTIONS = handler;
constant HEAD (line 64) | const HEAD = handler;
FILE: packages/bytebot-ui/src/app/desktop/page.tsx
function DesktopPage (line 7) | function DesktopPage() {
FILE: packages/bytebot-ui/src/app/layout.tsx
function RootLayout (line 13) | function RootLayout({
FILE: packages/bytebot-ui/src/app/page.tsx
type StockPhotoProps (line 19) | interface StockPhotoProps {
type FileWithBase64 (line 37) | interface FileWithBase64 {
function Home (line 44) | function Home() {
FILE: packages/bytebot-ui/src/app/tasks/[id]/page.tsx
function TaskPage (line 25) | function TaskPage() {
FILE: packages/bytebot-ui/src/app/tasks/page.tsx
function TasksPageContent (line 15) | function TasksPageContent() {
function TasksPageFallback (line 176) | function TasksPageFallback() {
function TasksPage (line 185) | function TasksPage() {
FILE: packages/bytebot-ui/src/components/VirtualDesktopStatusHeader.tsx
type VirtualDesktopStatus (line 6) | type VirtualDesktopStatus =
type StatusConfig (line 16) | interface StatusConfig {
type VirtualDesktopStatusHeaderProps (line 146) | interface VirtualDesktopStatusHeaderProps {
FILE: packages/bytebot-ui/src/components/layout/Header.tsx
function Header (line 15) | function Header() {
FILE: packages/bytebot-ui/src/components/messages/AssistantMessage.tsx
type AssistantMessageProps (line 9) | interface AssistantMessageProps {
function AssistantMessage (line 15) | function AssistantMessage({
FILE: packages/bytebot-ui/src/components/messages/ChatContainer.tsx
type ChatContainerProps (line 9) | interface ChatContainerProps {
function ChatContainer (line 26) | function ChatContainer({
FILE: packages/bytebot-ui/src/components/messages/ChatInput.tsx
type FileWithBase64 (line 7) | interface FileWithBase64 {
type ChatInputProps (line 14) | interface ChatInputProps {
function ChatInput (line 24) | function ChatInput({
FILE: packages/bytebot-ui/src/components/messages/MessageAvatar.tsx
type MessageAvatarProps (line 7) | interface MessageAvatarProps {
function MessageAvatar (line 11) | function MessageAvatar({ role }: MessageAvatarProps) {
FILE: packages/bytebot-ui/src/components/messages/MessageGroup.tsx
type MessageGroupProps (line 7) | interface MessageGroupProps {
function MessageGroup (line 13) | function MessageGroup({ group, taskStatus, messageIdToIndex }: MessageGr...
FILE: packages/bytebot-ui/src/components/messages/UserMessage.tsx
type UserMessageProps (line 11) | interface UserMessageProps {
function UserMessage (line 16) | function UserMessage({ group, messageIdToIndex }: UserMessageProps) {
FILE: packages/bytebot-ui/src/components/messages/content/ComputerToolContent.tsx
type ComputerToolContentProps (line 6) | interface ComputerToolContentProps {
function ComputerToolContent (line 11) | function ComputerToolContent({ block, isTakeOver = false }: ComputerTool...
FILE: packages/bytebot-ui/src/components/messages/content/ComputerToolContentNormal.tsx
type ComputerToolContentNormalProps (line 17) | interface ComputerToolContentNormalProps {
function ToolDetailsNormal (line 31) | function ToolDetailsNormal({ block }: { block: ComputerToolUseContentBlo...
function ComputerToolContentNormal (line 99) | function ComputerToolContentNormal({
FILE: packages/bytebot-ui/src/components/messages/content/ComputerToolContentTakeOver.tsx
type ComputerToolContentTakeOverProps (line 13) | interface ComputerToolContentTakeOverProps {
function ToolDetailsTakeOver (line 17) | function ToolDetailsTakeOver({ block }: { block: ComputerToolUseContentB...
function ComputerToolContentTakeOver (line 78) | function ComputerToolContentTakeOver({ block }: ComputerToolContentTakeO...
FILE: packages/bytebot-ui/src/components/messages/content/ComputerToolUtils.tsx
type IconType (line 32) | type IconType =
function getIcon (line 43) | function getIcon(block: ComputerToolUseContentBlock): IconType {
function getLabel (line 91) | function getLabel(block: ComputerToolUseContentBlock) {
FILE: packages/bytebot-ui/src/components/messages/content/ErrorContent.tsx
type ErrorContentProps (line 6) | interface ErrorContentProps {
function ErrorContent (line 10) | function ErrorContent({ block }: ErrorContentProps) {
FILE: packages/bytebot-ui/src/components/messages/content/ImageContent.tsx
type ImageContentProps (line 7) | interface ImageContentProps {
function ImageContent (line 11) | function ImageContent({ block }: ImageContentProps) {
FILE: packages/bytebot-ui/src/components/messages/content/MessageContent.tsx
type MessageContentProps (line 14) | interface MessageContentProps {
function MessageContent (line 19) | function MessageContent({
FILE: packages/bytebot-ui/src/components/messages/content/TextContent.tsx
type TextContentProps (line 5) | interface TextContentProps {
function TextContent (line 9) | function TextContent({ block }: TextContentProps) {
FILE: packages/bytebot-ui/src/components/screenshot/ScreenshotViewer.tsx
type ScreenshotViewerProps (line 5) | interface ScreenshotViewerProps {
function ScreenshotViewer (line 10) | function ScreenshotViewer({ screenshot, className = '' }: ScreenshotView...
FILE: packages/bytebot-ui/src/components/tasks/TaskItem.tsx
type TaskItemProps (line 14) | interface TaskItemProps {
type StatusIconConfig (line 18) | interface StatusIconConfig {
constant STATUS_CONFIGS (line 25) | const STATUS_CONFIGS: Record<TaskStatus, StatusIconConfig> = {
FILE: packages/bytebot-ui/src/components/tasks/TaskList.tsx
type TaskListProps (line 9) | interface TaskListProps {
FILE: packages/bytebot-ui/src/components/tasks/TaskTabs.tsx
type TabKey (line 11) | type TabKey = "ALL" | "ACTIVE" | "COMPLETED" | "CANCELLED_FAILED";
type TaskTabsProps (line 13) | interface TaskTabsProps {
type TabConfig (line 19) | interface TabConfig {
constant TAB_CONFIGS (line 30) | const TAB_CONFIGS: Record<TabKey, TabConfig> = {
FILE: packages/bytebot-ui/src/components/ui/TopicPopover.tsx
type TopicPopoverProps (line 5) | interface TopicPopoverProps {
FILE: packages/bytebot-ui/src/components/ui/button.tsx
type ButtonProps (line 38) | type ButtonProps = React.ComponentProps<"button"> &
function Button (line 45) | function Button({
FILE: packages/bytebot-ui/src/components/ui/card.tsx
function Card (line 5) | function Card({ className, ...props }: React.ComponentProps<"div">) {
function CardHeader (line 18) | function CardHeader({ className, ...props }: React.ComponentProps<"div">) {
function CardTitle (line 31) | function CardTitle({ className, ...props }: React.ComponentProps<"div">) {
function CardDescription (line 41) | function CardDescription({ className, ...props }: React.ComponentProps<"...
function CardAction (line 51) | function CardAction({ className, ...props }: React.ComponentProps<"div">) {
function CardContent (line 64) | function CardContent({ className, ...props }: React.ComponentProps<"div"...
function CardFooter (line 74) | function CardFooter({ className, ...props }: React.ComponentProps<"div">) {
FILE: packages/bytebot-ui/src/components/ui/copy-button.tsx
type CopyButtonProps (line 8) | interface CopyButtonProps {
function CopyButton (line 15) | function CopyButton({
FILE: packages/bytebot-ui/src/components/ui/desktop-container.tsx
type DesktopContainerProps (line 10) | interface DesktopContainerProps {
FILE: packages/bytebot-ui/src/components/ui/input.tsx
function Input (line 5) | function Input({ className, type, ...props }: React.ComponentProps<"inpu...
FILE: packages/bytebot-ui/src/components/ui/loader.tsx
type LoaderProps (line 5) | interface LoaderProps {
FILE: packages/bytebot-ui/src/components/ui/pagination.tsx
type PaginationProps (line 6) | interface PaginationProps {
FILE: packages/bytebot-ui/src/components/ui/scroll-area.tsx
function ScrollArea (line 8) | function ScrollArea({
function ScrollBar (line 31) | function ScrollBar({
FILE: packages/bytebot-ui/src/components/ui/separator.tsx
function Separator (line 8) | function Separator({
FILE: packages/bytebot-ui/src/components/ui/switch.tsx
function Switch (line 8) | function Switch({
FILE: packages/bytebot-ui/src/components/ui/text-shimmer.tsx
type TextShimmerProps (line 6) | type TextShimmerProps = {
function TextShimmerComponent (line 14) | function TextShimmerComponent({
FILE: packages/bytebot-ui/src/components/vnc/VncViewer.tsx
type VncViewerProps (line 5) | interface VncViewerProps {
function VncViewer (line 9) | function VncViewer({ viewOnly = true }: VncViewerProps) {
FILE: packages/bytebot-ui/src/constants/ui.constants.ts
constant UI_CONSTANTS (line 5) | const UI_CONSTANTS = {
type UIConstants (line 32) | type UIConstants = typeof UI_CONSTANTS;
FILE: packages/bytebot-ui/src/hooks/useChatSession.ts
type UseChatSessionProps (line 15) | interface UseChatSessionProps {
function useChatSession (line 19) | function useChatSession({ initialTaskId }: UseChatSessionProps = {}) {
FILE: packages/bytebot-ui/src/hooks/useScrollScreenshot.ts
type UseScrollScreenshotProps (line 5) | interface UseScrollScreenshotProps {
function useScrollScreenshot (line 10) | function useScrollScreenshot({ messages, scrollContainerRef }: UseScroll...
FILE: packages/bytebot-ui/src/hooks/useWebSocket.ts
type UseWebSocketProps (line 5) | interface UseWebSocketProps {
function useWebSocket (line 12) | function useWebSocket({
FILE: packages/bytebot-ui/src/lib/utils.ts
function cn (line 4) | function cn(...inputs: ClassValue[]) {
FILE: packages/bytebot-ui/src/types/index.ts
type Role (line 3) | enum Role {
type Message (line 9) | interface Message {
type GroupedMessages (line 19) | interface GroupedMessages {
type Model (line 25) | interface Model {
type TaskStatus (line 32) | enum TaskStatus {
type TaskPriority (line 42) | enum TaskPriority {
type TaskType (line 49) | enum TaskType {
type FileWithBase64 (line 54) | interface FileWithBase64 {
type File (line 61) | interface File {
type Task (line 72) | interface Task {
FILE: packages/bytebot-ui/src/utils/clipboard.ts
function copyToClipboard (line 6) | async function copyToClipboard(text: string): Promise<boolean> {
FILE: packages/bytebot-ui/src/utils/screenshotUtils.ts
type ScreenshotData (line 4) | interface ScreenshotData {
function extractScreenshots (line 14) | function extractScreenshots(messages: Message[]): ScreenshotData[] {
function getScreenshotForScrollPosition (line 41) | function getScreenshotForScrollPosition(
FILE: packages/bytebot-ui/src/utils/stringUtils.ts
function capitalizeFirstChar (line 6) | function capitalizeFirstChar(str: string): string {
FILE: packages/bytebot-ui/src/utils/taskUtils.ts
constant API_CONFIG (line 6) | const API_CONFIG = {
function apiRequest (line 17) | async function apiRequest<T>(
function buildQueryString (line 47) | function buildQueryString(
function fetchTaskMessages (line 63) | async function fetchTaskMessages(
function fetchTaskRawMessages (line 81) | async function fetchTaskRawMessages(
function fetchTaskProcessedMessages (line 99) | async function fetchTaskProcessedMessages(
function fetchTaskById (line 117) | async function fetchTaskById(taskId: string): Promise<Task | null> {
function startTask (line 124) | async function startTask(data: {
function addMessage (line 138) | async function addMessage(
function fetchTasks (line 151) | async function fetchTasks(options?: {
function fetchTaskCounts (line 177) | async function fetchTaskCounts(): Promise<Record<string, number>> {
function fetchModels (line 223) | async function fetchModels(): Promise<Model[]> {
function takeOverTask (line 243) | async function takeOverTask(taskId: string): Promise<Task | null> {
function resumeTask (line 250) | async function resumeTask(taskId: string): Promise<Task | null> {
function cancelTask (line 257) | async function cancelTask(taskId: string): Promise<Task | null> {
FILE: packages/bytebotd/src/app.controller.ts
class AppController (line 5) | class AppController {
method constructor (line 6) | constructor(private readonly appService: AppService) {}
method redirectToVnc (line 13) | redirectToVnc(@Headers('host') host: string) {
FILE: packages/bytebotd/src/app.module.ts
class AppModule (line 26) | class AppModule {}
FILE: packages/bytebotd/src/app.service.ts
class AppService (line 4) | class AppService {
method getHello (line 5) | getHello(): string {
FILE: packages/bytebotd/src/computer-use/computer-use.controller.ts
class ComputerUseController (line 14) | class ComputerUseController {
method constructor (line 17) | constructor(private readonly computerUseService: ComputerUseService) {}
method action (line 20) | async action(
FILE: packages/bytebotd/src/computer-use/computer-use.module.ts
class ComputerUseModule (line 12) | class ComputerUseModule {}
FILE: packages/bytebotd/src/computer-use/computer-use.service.ts
class ComputerUseService (line 26) | class ComputerUseService {
method constructor (line 29) | constructor(private readonly nutService: NutService) {}
method action (line 31) | async action(params: ComputerAction): Promise<any> {
method moveMouse (line 107) | private async moveMouse(action: MoveMouseAction): Promise<void> {
method traceMouse (line 111) | private async traceMouse(action: TraceMouseAction): Promise<void> {
method clickMouse (line 133) | private async clickMouse(action: ClickMouseAction): Promise<void> {
method pressMouse (line 164) | private async pressMouse(action: PressMouseAction): Promise<void> {
method dragMouse (line 180) | private async dragMouse(action: DragMouseAction): Promise<void> {
method scroll (line 204) | private async scroll(action: ScrollAction): Promise<void> {
method typeKeys (line 229) | private async typeKeys(action: TypeKeysAction): Promise<void> {
method pressKeys (line 234) | private async pressKeys(action: PressKeysAction): Promise<void> {
method typeText (line 239) | private async typeText(action: TypeTextAction): Promise<void> {
method pasteText (line 244) | private async pasteText(action: PasteTextAction): Promise<void> {
method delay (line 249) | private async delay(ms: number): Promise<void> {
method screenshot (line 253) | async screenshot(): Promise<{ image: string }> {
method cursor_position (line 259) | private async cursor_position(): Promise<{ x: number; y: number }> {
method application (line 264) | private async application(action: ApplicationAction): Promise<void> {
method writeFile (line 364) | private async writeFile(
method readFile (line 419) | private async readFile(action: ReadFileAction): Promise<{
FILE: packages/bytebotd/src/computer-use/dto/base.dto.ts
class CoordinatesDto (line 3) | class CoordinatesDto {
type ButtonType (line 11) | enum ButtonType {
type PressType (line 17) | enum PressType {
type ScrollDirection (line 22) | enum ScrollDirection {
type ApplicationName (line 29) | enum ApplicationName {
FILE: packages/bytebotd/src/computer-use/dto/computer-action-validation.pipe.ts
class ComputerActionValidationPipe (line 29) | class ComputerActionValidationPipe implements PipeTransform {
method transform (line 30) | async transform(value: any, metadata: ArgumentMetadata) {
FILE: packages/bytebotd/src/computer-use/dto/computer-action.dto.ts
class MoveMouseActionDto (line 27) | class MoveMouseActionDto extends BaseActionDto {
class TraceMouseActionDto (line 36) | class TraceMouseActionDto extends BaseActionDto {
class ClickMouseActionDto (line 51) | class ClickMouseActionDto extends BaseActionDto {
class PressMouseActionDto (line 73) | class PressMouseActionDto extends BaseActionDto {
class DragMouseActionDto (line 89) | class DragMouseActionDto extends BaseActionDto {
class ScrollActionDto (line 107) | class ScrollActionDto extends BaseActionDto {
class TypeKeysActionDto (line 129) | class TypeKeysActionDto extends BaseActionDto {
class PressKeysActionDto (line 143) | class PressKeysActionDto extends BaseActionDto {
class TypeTextActionDto (line 155) | class TypeTextActionDto extends BaseActionDto {
class PasteTextActionDto (line 168) | class PasteTextActionDto extends BaseActionDto {
class WaitActionDto (line 176) | class WaitActionDto extends BaseActionDto {
class ScreenshotActionDto (line 185) | class ScreenshotActionDto extends BaseActionDto {
class CursorPositionActionDto (line 190) | class CursorPositionActionDto extends BaseActionDto {
class ApplicationActionDto (line 195) | class ApplicationActionDto extends BaseActionDto {
class WriteFileActionDto (line 203) | class WriteFileActionDto extends BaseActionDto {
class ReadFileActionDto (line 214) | class ReadFileActionDto extends BaseActionDto {
type ComputerActionDto (line 223) | type ComputerActionDto =
FILE: packages/bytebotd/src/input-tracking/input-tracking.controller.ts
class InputTrackingController (line 5) | class InputTrackingController {
method constructor (line 6) | constructor(private readonly inputTrackingService: InputTrackingServic...
method start (line 9) | start() {
method stop (line 15) | stop() {
FILE: packages/bytebotd/src/input-tracking/input-tracking.gateway.ts
class InputTrackingGateway (line 18) | class InputTrackingGateway
method handleConnection (line 26) | handleConnection(client: Socket) {
method handleDisconnect (line 30) | handleDisconnect(client: Socket) {
method emitAction (line 34) | emitAction(action: ComputerAction) {
method emitScreenshotAndAction (line 38) | emitScreenshotAndAction(
FILE: packages/bytebotd/src/input-tracking/input-tracking.helpers.ts
type KeyInfo (line 3) | type KeyInfo = {
FILE: packages/bytebotd/src/input-tracking/input-tracking.module.ts
class InputTrackingModule (line 13) | class InputTrackingModule {}
FILE: packages/bytebotd/src/input-tracking/input-tracking.service.ts
class InputTrackingService (line 23) | class InputTrackingService implements OnModuleDestroy {
method constructor (line 47) | constructor(
method onModuleDestroy (line 54) | onModuleDestroy() {
method startTracking (line 58) | startTracking() {
method stopTracking (line 68) | stopTracking() {
method bufferChar (line 79) | private bufferChar(char: string) {
method flushTypingBuffer (line 88) | private async flushTypingBuffer() {
method isModifierKey (line 98) | private isModifierKey(key: UiohookKeyboardEvent) {
method registerListeners (line 102) | private registerListeners() {
method mapButton (line 263) | private mapButton(btn: unknown): Button {
method logAction (line 276) | private async logAction(action: ComputerAction) {
FILE: packages/bytebotd/src/main.ts
function bootstrap (line 7) | async function bootstrap() {
FILE: packages/bytebotd/src/mcp/bytebot-mcp.module.ts
class BytebotMcpModule (line 17) | class BytebotMcpModule {}
FILE: packages/bytebotd/src/mcp/compressor.ts
type CompressionOptions (line 3) | interface CompressionOptions {
type CompressionResult (line 11) | interface CompressionResult {
class Base64ImageCompressor (line 21) | class Base64ImageCompressor {
method compressToSize (line 25) | static async compressToSize(
method compressBuffer (line 95) | private static async compressBuffer(
method compressWithResize (line 142) | static async compressWithResize(
method getBase64SizeInfo (line 194) | static getBase64SizeInfo(base64String: string): {
function compressPngBase64Under1MB (line 219) | async function compressPngBase64Under1MB(
FILE: packages/bytebotd/src/mcp/computer-use.tools.ts
class ComputerUseTools (line 8) | class ComputerUseTools {
method constructor (line 9) | constructor(private readonly computerUse: ComputerUseService) {}
method moveMouse (line 21) | async moveMouse({ coordinates }: { coordinates: { x: number; y: number...
method traceMouse (line 56) | async traceMouse({
method clickMouse (line 106) | async clickMouse({
method pressMouse (line 162) | async pressMouse({
method dragMouse (line 221) | async dragMouse({
method scroll (line 285) | async scroll({
method typeKeys (line 357) | async typeKeys({ keys, delay }: { keys: string[]; delay?: number }) {
method pressKeys (line 412) | async pressKeys({ keys, press }: { keys: string[]; press: 'down' | 'up...
method typeText (line 440) | async typeText({ text, delay }: { text: string; delay?: number }) {
method pasteText (line 464) | async pasteText({ text }: { text: string }) {
method wait (line 490) | async wait({ duration }: { duration: number }) {
method application (line 522) | async application({
method screenshot (line 553) | async screenshot() {
method cursorPosition (line 583) | async cursorPosition() {
method writeFile (line 619) | async writeFile({ path, data }: { path: string; data: string }) {
method readFile (line 654) | async readFile({ path }: { path: string }) {
FILE: packages/bytebotd/src/nut/nut.module.ts
class NutModule (line 8) | class NutModule {}
FILE: packages/bytebotd/src/nut/nut.service.ts
class NutService (line 120) | class NutService {
method constructor (line 124) | constructor() {
method sendKeys (line 148) | async sendKeys(keys: string[], delay: number = 100): Promise<any> {
method holdKeys (line 168) | async holdKeys(keys: string[], down: boolean): Promise<any> {
method validateKey (line 190) | private validateKey(key: string): Key {
method typeText (line 218) | async typeText(text: string, delayMs: number = 0): Promise<void> {
method pasteText (line 246) | async pasteText(text: string): Promise<void> {
method charToKeyInfo (line 284) | private charToKeyInfo(
method mouseMoveEvent (line 354) | async mouseMoveEvent({ x, y }: { x: number; y: number }): Promise<any> {
method mouseClickEvent (line 365) | async mouseClickEvent(button: 'left' | 'right' | 'middle'): Promise<an...
method mouseButtonEvent (line 391) | async mouseButtonEvent(
method mouseWheelEvent (line 438) | async mouseWheelEvent(
method screendump (line 470) | async screendump(): Promise<Buffer> {
method getCursorPosition (line 497) | async getCursorPosition(): Promise<{ x: number; y: number }> {
method delay (line 513) | private async delay(ms: number): Promise<void> {
FILE: packages/shared/src/types/computerAction.types.ts
type Coordinates (line 1) | type Coordinates = { x: number; y: number };
type Button (line 2) | type Button = "left" | "right" | "middle";
type Press (line 3) | type Press = "up" | "down";
type Application (line 4) | type Application =
type MoveMouseAction (line 14) | type MoveMouseAction = {
type TraceMouseAction (line 19) | type TraceMouseAction = {
type ClickMouseAction (line 25) | type ClickMouseAction = {
type PressMouseAction (line 33) | type PressMouseAction = {
type DragMouseAction (line 40) | type DragMouseAction = {
type ScrollAction (line 47) | type ScrollAction = {
type TypeKeysAction (line 55) | type TypeKeysAction = {
type PasteTextAction (line 61) | type PasteTextAction = {
type PressKeysAction (line 66) | type PressKeysAction = {
type TypeTextAction (line 72) | type TypeTextAction = {
type WaitAction (line 79) | type WaitAction = {
type ScreenshotAction (line 84) | type ScreenshotAction = {
type CursorPositionAction (line 88) | type CursorPositionAction = {
type ApplicationAction (line 92) | type ApplicationAction = {
type WriteFileAction (line 97) | type WriteFileAction = {
type ReadFileAction (line 103) | type ReadFileAction = {
type ComputerAction (line 109) | type ComputerAction =
FILE: packages/shared/src/types/messageContent.types.ts
type MessageContentType (line 4) | enum MessageContentType {
type MessageContentBlockBase (line 16) | type MessageContentBlockBase = {
type TextContentBlock (line 21) | type TextContentBlock = {
type ImageContentBlock (line 26) | type ImageContentBlock = {
type DocumentContentBlock (line 35) | type DocumentContentBlock = {
type ThinkingContentBlock (line 46) | type ThinkingContentBlock = {
type RedactedThinkingContentBlock (line 52) | type RedactedThinkingContentBlock = {
type ToolUseContentBlock (line 57) | type ToolUseContentBlock = {
type MoveMouseToolUseBlock (line 64) | type MoveMouseToolUseBlock = ToolUseContentBlock & {
type TraceMouseToolUseBlock (line 71) | type TraceMouseToolUseBlock = ToolUseContentBlock & {
type ClickMouseToolUseBlock (line 79) | type ClickMouseToolUseBlock = ToolUseContentBlock & {
type PressMouseToolUseBlock (line 89) | type PressMouseToolUseBlock = ToolUseContentBlock & {
type DragMouseToolUseBlock (line 98) | type DragMouseToolUseBlock = ToolUseContentBlock & {
type ScrollToolUseBlock (line 107) | type ScrollToolUseBlock = ToolUseContentBlock & {
type TypeKeysToolUseBlock (line 117) | type TypeKeysToolUseBlock = ToolUseContentBlock & {
type PressKeysToolUseBlock (line 125) | type PressKeysToolUseBlock = ToolUseContentBlock & {
type TypeTextToolUseBlock (line 133) | type TypeTextToolUseBlock = ToolUseContentBlock & {
type PasteTextToolUseBlock (line 142) | type PasteTextToolUseBlock = ToolUseContentBlock & {
type WaitToolUseBlock (line 150) | type WaitToolUseBlock = ToolUseContentBlock & {
type ScreenshotToolUseBlock (line 157) | type ScreenshotToolUseBlock = ToolUseContentBlock & {
type CursorPositionToolUseBlock (line 161) | type CursorPositionToolUseBlock = ToolUseContentBlock & {
type ApplicationToolUseBlock (line 165) | type ApplicationToolUseBlock = ToolUseContentBlock & {
type WriteFileToolUseBlock (line 172) | type WriteFileToolUseBlock = ToolUseContentBlock & {
type ReadFileToolUseBlock (line 180) | type ReadFileToolUseBlock = ToolUseContentBlock & {
type ComputerToolUseContentBlock (line 187) | type ComputerToolUseContentBlock =
type UserActionContentBlock (line 205) | type UserActionContentBlock = MessageContentBlockBase & {
type SetTaskStatusToolUseBlock (line 221) | type SetTaskStatusToolUseBlock = ToolUseContentBlock & {
type CreateTaskToolUseBlock (line 229) | type CreateTaskToolUseBlock = ToolUseContentBlock & {
type ToolResultContentBlock (line 240) | type ToolResultContentBlock = {
type MessageContentBlock (line 248) | type MessageContentBlock =
FILE: packages/shared/src/utils/computerAction.utils.ts
function createActionTypeGuard (line 28) | function createActionTypeGuard<T extends ComputerAction>(
function createToolUseBlock (line 71) | function createToolUseBlock(
function conditionallyAdd (line 87) | function conditionallyAdd<T extends Record<string, any>>(
function convertMoveMouseActionToToolUseBlock (line 103) | function convertMoveMouseActionToToolUseBlock(
function convertTraceMouseActionToToolUseBlock (line 112) | function convertTraceMouseActionToToolUseBlock(
function convertClickMouseActionToToolUseBlock (line 125) | function convertClickMouseActionToToolUseBlock(
function convertPressMouseActionToToolUseBlock (line 145) | function convertPressMouseActionToToolUseBlock(
function convertDragMouseActionToToolUseBlock (line 162) | function convertDragMouseActionToToolUseBlock(
function convertScrollActionToToolUseBlock (line 179) | function convertScrollActionToToolUseBlock(
function convertTypeKeysActionToToolUseBlock (line 199) | function convertTypeKeysActionToToolUseBlock(
function convertPressKeysActionToToolUseBlock (line 212) | function convertPressKeysActionToToolUseBlock(
function convertTypeTextActionToToolUseBlock (line 222) | function convertTypeTextActionToToolUseBlock(
function convertPasteTextActionToToolUseBlock (line 236) | function convertPasteTextActionToToolUseBlock(
function convertWaitActionToToolUseBlock (line 245) | function convertWaitActionToToolUseBlock(
function convertScreenshotActionToToolUseBlock (line 254) | function convertScreenshotActionToToolUseBlock(
function convertCursorPositionActionToToolUseBlock (line 261) | function convertCursorPositionActionToToolUseBlock(
function convertApplicationActionToToolUseBlock (line 268) | function convertApplicationActionToToolUseBlock(
function convertWriteFileActionToToolUseBlock (line 277) | function convertWriteFileActionToToolUseBlock(
function convertReadFileActionToToolUseBlock (line 287) | function convertReadFileActionToToolUseBlock(
function convertComputerActionToToolUseBlock (line 299) | function convertComputerActionToToolUseBlock(
FILE: packages/shared/src/utils/messageContent.utils.ts
function isTextContentBlock (line 38) | function isTextContentBlock(obj: unknown): obj is TextContentBlock {
function isThinkingContentBlock (line 49) | function isThinkingContentBlock(
function isRedactedThinkingContentBlock (line 64) | function isRedactedThinkingContentBlock(
function isImageContentBlock (line 83) | function isImageContentBlock(obj: unknown): obj is ImageContentBlock {
function isUserActionContentBlock (line 99) | function isUserActionContentBlock(
function isDocumentContentBlock (line 116) | function isDocumentContentBlock(
function isToolUseContentBlock (line 139) | function isToolUseContentBlock(
function isComputerToolUseContentBlock (line 161) | function isComputerToolUseContentBlock(
function isToolResultContentBlock (line 176) | function isToolResultContentBlock(
function isMessageContentBlock (line 195) | function isMessageContentBlock(
function getMessageContentBlockType (line 216) | function getMessageContentBlockType(obj: unknown): string | null {
function isMoveMouseToolUseBlock (line 268) | function isMoveMouseToolUseBlock(
function isTraceMouseToolUseBlock (line 284) | function isTraceMouseToolUseBlock(
function isClickMouseToolUseBlock (line 300) | function isClickMouseToolUseBlock(
function isCursorPositionToolUseBlock (line 316) | function isCursorPositionToolUseBlock(
function isPressMouseToolUseBlock (line 332) | function isPressMouseToolUseBlock(
function isDragMouseToolUseBlock (line 348) | function isDragMouseToolUseBlock(
function isScrollToolUseBlock (line 364) | function isScrollToolUseBlock(obj: unknown): obj is ScrollToolUseBlock {
function isTypeKeysToolUseBlock (line 378) | function isTypeKeysToolUseBlock(
function isPressKeysToolUseBlock (line 394) | function isPressKeysToolUseBlock(
function isTypeTextToolUseBlock (line 410) | function isTypeTextToolUseBlock(
function isPasteTextToolUseBlock (line 421) | function isPasteTextToolUseBlock(
function isWaitToolUseBlock (line 437) | function isWaitToolUseBlock(obj: unknown): obj is WaitToolUseBlock {
function isScreenshotToolUseBlock (line 451) | function isScreenshotToolUseBlock(
function isApplicationToolUseBlock (line 462) | function isApplicationToolUseBlock(
function isSetTaskStatusToolUseBlock (line 473) | function isSetTaskStatusToolUseBlock(
function isCreateTaskToolUseBlock (line 484) | function isCreateTaskToolUseBlock(
function isWriteFileToolUseBlock (line 495) | function isWriteFileToolUseBlock(
function isReadFileToolUseBlock (line 506) | function isReadFileToolUseBlock(
Condensed preview — 316 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (947K chars).
[
{
"path": ".github/workflows/build-agent.yaml",
"chars": 1287,
"preview": "name: Build Agent\n\non:\n push:\n branches:\n - main\n paths:\n - \"packages/bytebot-agent/**\"\n - \"packag"
},
{
"path": ".github/workflows/build-desktop.yaml",
"chars": 1608,
"preview": "name: Build Desktop\n\non:\n push:\n branches:\n - main\n paths:\n - \"docker/**\"\n - \"packages/bytebotd/**"
},
{
"path": ".github/workflows/build-ui.yaml",
"chars": 1276,
"preview": "name: Build UI\n\non:\n push:\n branches:\n - main\n paths:\n - \"packages/bytebot-ui/**\"\n - \"packages/sha"
},
{
"path": ".gitignore",
"chars": 2836,
"preview": "# Logs\nlogs\n*.log\nnpm-debug.log*\nyarn-debug.log*\nyarn-error.log*\nlerna-debug.log*\n.pnpm-debug.log*\n\n# Diagnostic reports"
},
{
"path": ".prettierignore",
"chars": 44,
"preview": "# Ignore formatting in docs folder\n/docs/**\n"
},
{
"path": "LICENSE",
"chars": 11357,
"preview": " Apache License\n Version 2.0, January 2004\n "
},
{
"path": "README.md",
"chars": 9887,
"preview": "<div align=\"center\">\n\n<img src=\"docs/images/bytebot-logo.png\" width=\"500\" alt=\"Bytebot Logo\">\n\n# Bytebot: Open-Source AI"
},
{
"path": "docker/bytebot-desktop.Dockerfile",
"chars": 317,
"preview": "# Extend the pre-built bytebot-desktop image\nFROM ghcr.io/bytebot-ai/bytebot-desktop:edge\n\n# Add additional packages, ap"
},
{
"path": "docker/docker-compose-claude-code.yml",
"chars": 2195,
"preview": "name: bytebot\n\nservices:\n bytebot-desktop:\n # Build from source\n build:\n context: ../packages/\n dockerf"
},
{
"path": "docker/docker-compose.core.yml",
"chars": 443,
"preview": "name: bytebot\n\nservices:\n bytebot-desktop:\n # Build from source\n build:\n context: ../packages/\n dockerf"
},
{
"path": "docker/docker-compose.development.yml",
"chars": 1144,
"preview": "## docker-compose file that spins up a bytebot-desktop container\n## and a postgres container. bytebot-ui and bytebot-age"
},
{
"path": "docker/docker-compose.proxy.yml",
"chars": 2614,
"preview": "name: bytebot\n\nservices:\n bytebot-desktop:\n # Build from source\n build:\n context: ../packages/\n dockerf"
},
{
"path": "docker/docker-compose.yml",
"chars": 2334,
"preview": "name: bytebot\n\nservices:\n bytebot-desktop:\n # Build from source\n build:\n context: ../packages/\n dockerf"
},
{
"path": "docs/api-reference/agent/tasks.mdx",
"chars": 6290,
"preview": "---\ntitle: 'Tasks API'\ndescription: 'Reference documentation for the Bytebot Agent Tasks API'\n---\n\n## Tasks API\n\nThe Tas"
},
{
"path": "docs/api-reference/agent/ui.mdx",
"chars": 3641,
"preview": "---\ntitle: 'Task UI'\ndescription: 'Documentation for the Bytebot Task UI'\n---\n\n## Bytebot Task UI\n\nThe Bytebot Task UI p"
},
{
"path": "docs/api-reference/computer-use/examples.mdx",
"chars": 14575,
"preview": "---\ntitle: \"Computer Use API Examples\"\ndescription: \"Code examples for common automation scenarios using the Bytebot API"
},
{
"path": "docs/api-reference/computer-use/openapi.json",
"chars": 8399,
"preview": "{\n \"openapi\": \"3.1.0\",\n \"info\": {\n \"title\": \"Bytebot Computer Use API\",\n \"version\": \"1.0.0\",\n \"description\": "
},
{
"path": "docs/api-reference/computer-use/unified-endpoint.mdx",
"chars": 13386,
"preview": "---\ntitle: \"Unified Computer Actions API\"\ndescription: \"Control all aspects of the desktop environment with a single end"
},
{
"path": "docs/api-reference/endpoint/create.mdx",
"chars": 54,
"preview": "---\ntitle: 'Create Plant'\nopenapi: 'POST /plants'\n---\n"
},
{
"path": "docs/api-reference/endpoint/delete.mdx",
"chars": 61,
"preview": "---\ntitle: 'Delete Plant'\nopenapi: 'DELETE /plants/{id}'\n---\n"
},
{
"path": "docs/api-reference/endpoint/get.mdx",
"chars": 51,
"preview": "---\ntitle: 'Get Plants'\nopenapi: 'GET /plants'\n---\n"
},
{
"path": "docs/api-reference/endpoint/webhook.mdx",
"chars": 61,
"preview": "---\ntitle: 'New Plant'\nopenapi: 'WEBHOOK /plant/webhook'\n---\n"
},
{
"path": "docs/api-reference/introduction.mdx",
"chars": 3813,
"preview": "---\ntitle: \"API Reference\"\ndescription: \"Overview of the Bytebot API endpoints for programmatic control\"\n---\n\n# Bytebot "
},
{
"path": "docs/api-reference/openapi.json",
"chars": 5173,
"preview": "{\n \"openapi\": \"3.1.0\",\n \"info\": {\n \"title\": \"OpenAPI Plant Store\",\n \"description\": \"A sample API that uses a pla"
},
{
"path": "docs/core-concepts/agent-system.mdx",
"chars": 7230,
"preview": "---\ntitle: \"Agent System\"\ndescription: \"The AI brain that powers your self-hosted desktop automation\"\n---\n\n## Overview\n\n"
},
{
"path": "docs/core-concepts/architecture.mdx",
"chars": 6702,
"preview": "---\ntitle: \"Architecture\"\ndescription: \"How Bytebot's desktop agent works under the hood\"\n---\n\n## Overview\n\nBytebot is a"
},
{
"path": "docs/core-concepts/desktop-environment.mdx",
"chars": 6516,
"preview": "---\ntitle: \"Desktop Environment\" \ndescription: \"The virtual Linux desktop where Bytebot performs tasks\"\n---\n\n## Overview"
},
{
"path": "docs/core-concepts/rpa-comparison.mdx",
"chars": 7998,
"preview": "---\ntitle: \"Bytebot vs Traditional RPA\"\ndescription: \"How Bytebot revolutionizes enterprise automation beyond traditiona"
},
{
"path": "docs/deployment/helm.mdx",
"chars": 5789,
"preview": "---\ntitle: \"Helm Deployment\"\ndescription: \"Deploy Bytebot on Kubernetes using Helm charts\"\n---\n\n# Deploy Bytebot on Kube"
},
{
"path": "docs/deployment/litellm.mdx",
"chars": 12272,
"preview": "---\ntitle: \"LiteLLM Integration\"\ndescription: \"Use any LLM provider with Bytebot through LiteLLM proxy\"\n---\n\n# Connect A"
},
{
"path": "docs/deployment/railway.mdx",
"chars": 4658,
"preview": "---\ntitle: \"Deploying Bytebot on Railway\"\ndescription: \"Comprehensive guide to deploying the full Bytebot stack on Railw"
},
{
"path": "docs/docs.json",
"chars": 3199,
"preview": "{\n \"$schema\": \"https://mintlify.com/docs.json\",\n \"theme\": \"mint\",\n \"name\": \"Bytebot - Self-Hosted AI Desktop Agent\",\n"
},
{
"path": "docs/guides/password-management.mdx",
"chars": 10144,
"preview": "---\ntitle: \"Password Management & 2FA\"\ndescription: \"How Bytebot handles authentication automatically using password man"
},
{
"path": "docs/guides/takeover-mode.mdx",
"chars": 4781,
"preview": "---\ntitle: \"Takeover Mode\"\ndescription: \"Take control of the desktop when you need to guide or assist Bytebot\"\n---\n\n# Ta"
},
{
"path": "docs/guides/task-creation.mdx",
"chars": 14539,
"preview": "---\ntitle: \"Task Creation & Management\"\ndescription: \"Master the art of creating effective tasks and managing them throu"
},
{
"path": "docs/introduction.mdx",
"chars": 8089,
"preview": "---\ntitle: Introduction\ndescription: \"Open source AI desktop agent that automates any computer task\"\n---\n\n<p align=\"cent"
},
{
"path": "docs/quickstart.mdx",
"chars": 10670,
"preview": "---\ntitle: \"Quick Start\"\ndescription: \"Get your AI desktop agent running in 2 minutes\"\n---\n\n# Choose Your Deployment Met"
},
{
"path": "docs/rest-api/computer-use.mdx",
"chars": 10378,
"preview": "---\ntitle: \"Computer Action\"\nopenapi: \"POST /computer-use\"\ndescription: \"Execute computer actions in the virtual desktop"
},
{
"path": "docs/rest-api/examples.mdx",
"chars": 17651,
"preview": "---\ntitle: \"Usage Examples\"\ndescription: \"Code examples for common automation scenarios using the Bytebot REST API\"\n---\n"
},
{
"path": "docs/rest-api/input-tracking.mdx",
"chars": 980,
"preview": "---\ntitle: \"Input Tracking\"\nopenapi: \"POST /input-tracking/start\"\ndescription: \"Start and stop input tracking on the Byt"
},
{
"path": "docs/rest-api/introduction.mdx",
"chars": 2035,
"preview": "---\ntitle: \"Introduction\"\ndescription: \"Overview of the Bytebot REST API\"\n---\n\n## Bytebot REST API\n\nBytebot's core funct"
},
{
"path": "helm/Chart.yaml",
"chars": 808,
"preview": "apiVersion: v2\nname: bytebot\ndescription: Bytebot - Complete deployment package\ntype: application\nversion: 0.1.0\nappVers"
},
{
"path": "helm/README.md",
"chars": 1196,
"preview": "# Bytebot Helm Charts\n\nThis directory contains Helm charts for deploying Bytebot on Kubernetes.\n\n## Documentation\n\nFor c"
},
{
"path": "helm/charts/bytebot-agent/Chart.yaml",
"chars": 408,
"preview": "apiVersion: v2\nname: bytebot-agent\ndescription: A Helm chart for Bytebot Agent service\ntype: application\nversion: 0.1.0\n"
},
{
"path": "helm/charts/bytebot-agent/templates/_helpers.tpl",
"chars": 2321,
"preview": "{{/*\nExpand the name of the chart.\n*/}}\n{{- define \"bytebot-agent.name\" -}}\n{{- default .Chart.Name .Values.nameOverride"
},
{
"path": "helm/charts/bytebot-agent/templates/deployment.yaml",
"chars": 3987,
"preview": "apiVersion: apps/v1\nkind: Deployment\nmetadata:\n name: {{ include \"bytebot-agent.fullname\" . }}\n labels:\n {{- includ"
},
{
"path": "helm/charts/bytebot-agent/templates/ingress.yaml",
"chars": 1078,
"preview": "{{- if .Values.ingress.enabled -}}\napiVersion: networking.k8s.io/v1\nkind: Ingress\nmetadata:\n name: {{ include \"bytebot-"
},
{
"path": "helm/charts/bytebot-agent/templates/secret.yaml",
"chars": 1692,
"preview": "{{- $createSecret := false -}}\n{{- if or .Values.env.ANTHROPIC_API_KEY .Values.env.OPENAI_API_KEY .Values.env.GEMINI_API"
},
{
"path": "helm/charts/bytebot-agent/templates/service.yaml",
"chars": 480,
"preview": "apiVersion: v1\nkind: Service\nmetadata:\n name: {{ include \"bytebot-agent.fullname\" . }}\n labels:\n {{- include \"byteb"
},
{
"path": "helm/charts/bytebot-agent/values.yaml",
"chars": 1767,
"preview": "replicaCount: 1\n\nimage:\n repository: ghcr.io/bytebot-ai/bytebot-agent\n tag: edge\n pullPolicy: IfNotPresent\n\nnameOverr"
},
{
"path": "helm/charts/bytebot-desktop/Chart.yaml",
"chars": 290,
"preview": "apiVersion: v2\nname: bytebot-desktop\ndescription: A Helm chart for Bytebot Desktop service\ntype: application\nversion: 0."
},
{
"path": "helm/charts/bytebot-desktop/templates/_helpers.tpl",
"chars": 1687,
"preview": "{{/*\nExpand the name of the chart.\n*/}}\n{{- define \"bytebot-desktop.name\" -}}\n{{- default .Chart.Name .Values.nameOverri"
},
{
"path": "helm/charts/bytebot-desktop/templates/deployment.yaml",
"chars": 1820,
"preview": "apiVersion: apps/v1\nkind: Deployment\nmetadata:\n name: {{ include \"bytebot-desktop.fullname\" . }}\n labels:\n {{- incl"
},
{
"path": "helm/charts/bytebot-desktop/templates/ingress.yaml",
"chars": 1084,
"preview": "{{- if .Values.ingress.enabled -}}\napiVersion: networking.k8s.io/v1\nkind: Ingress\nmetadata:\n name: {{ include \"bytebot-"
},
{
"path": "helm/charts/bytebot-desktop/templates/pvc.yaml",
"chars": 685,
"preview": "{{- if .Values.persistence.enabled -}}\napiVersion: v1\nkind: PersistentVolumeClaim\nmetadata:\n name: {{ include \"bytebot-"
},
{
"path": "helm/charts/bytebot-desktop/templates/service.yaml",
"chars": 486,
"preview": "apiVersion: v1\nkind: Service\nmetadata:\n name: {{ include \"bytebot-desktop.fullname\" . }}\n labels:\n {{- include \"byt"
},
{
"path": "helm/charts/bytebot-desktop/values.yaml",
"chars": 791,
"preview": "replicaCount: 1\n\nimage:\n repository: ghcr.io/bytebot-ai/bytebot-desktop\n tag: edge\n pullPolicy: IfNotPresent\n\nnameOve"
},
{
"path": "helm/charts/bytebot-llm-proxy/Chart.yaml",
"chars": 283,
"preview": "apiVersion: v2\nname: bytebot-llm-proxy\ndescription: A Helm chart for LiteLLM proxy service\ntype: application\nversion: 0."
},
{
"path": "helm/charts/bytebot-llm-proxy/templates/_helpers.tpl",
"chars": 1385,
"preview": "{{/*\nExpand the name of the chart.\n*/}}\n{{- define \"bytebot-llm-proxy.name\" -}}\n{{- default .Chart.Name .Values.nameOver"
},
{
"path": "helm/charts/bytebot-llm-proxy/templates/configmap.yaml",
"chars": 246,
"preview": "apiVersion: v1\nkind: ConfigMap\nmetadata:\n name: {{ include \"bytebot-llm-proxy.fullname\" . }}-config\n labels:\n {{- i"
},
{
"path": "helm/charts/bytebot-llm-proxy/templates/deployment.yaml",
"chars": 2732,
"preview": "apiVersion: apps/v1\nkind: Deployment\nmetadata:\n name: {{ include \"bytebot-llm-proxy.fullname\" . }}\n labels:\n {{- in"
},
{
"path": "helm/charts/bytebot-llm-proxy/templates/ingress.yaml",
"chars": 1090,
"preview": "{{- if .Values.ingress.enabled -}}\napiVersion: networking.k8s.io/v1\nkind: Ingress\nmetadata:\n name: {{ include \"bytebot-"
},
{
"path": "helm/charts/bytebot-llm-proxy/templates/secret.yaml",
"chars": 665,
"preview": "{{- if or .Values.env.ANTHROPIC_API_KEY .Values.env.OPENAI_API_KEY .Values.env.GEMINI_API_KEY -}}\napiVersion: v1\nkind: S"
},
{
"path": "helm/charts/bytebot-llm-proxy/templates/service.yaml",
"chars": 492,
"preview": "apiVersion: v1\nkind: Service\nmetadata:\n name: {{ include \"bytebot-llm-proxy.fullname\" . }}\n labels:\n {{- include \"b"
},
{
"path": "helm/charts/bytebot-llm-proxy/values.yaml",
"chars": 1564,
"preview": "replicaCount: 1\n\nimage:\n repository: ghcr.io/berriai/litellm\n tag: main-stable\n pullPolicy: IfNotPresent\n\nnameOverrid"
},
{
"path": "helm/charts/bytebot-ui/Chart.yaml",
"chars": 261,
"preview": "apiVersion: v2\nname: bytebot-ui\ndescription: A Helm chart for Bytebot UI service\ntype: application\nversion: 0.1.0\nappVer"
},
{
"path": "helm/charts/bytebot-ui/templates/_helpers.tpl",
"chars": 1637,
"preview": "{{/*\nExpand the name of the chart.\n*/}}\n{{- define \"bytebot-ui.name\" -}}\n{{- default .Chart.Name .Values.nameOverride | "
},
{
"path": "helm/charts/bytebot-ui/templates/deployment.yaml",
"chars": 1912,
"preview": "apiVersion: apps/v1\nkind: Deployment\nmetadata:\n name: {{ include \"bytebot-ui.fullname\" . }}\n labels:\n {{- include \""
},
{
"path": "helm/charts/bytebot-ui/templates/hpa.yaml",
"chars": 999,
"preview": "{{- if .Values.autoscaling.enabled }}\napiVersion: autoscaling/v2\nkind: HorizontalPodAutoscaler\nmetadata:\n name: {{ incl"
},
{
"path": "helm/charts/bytebot-ui/templates/ingress.yaml",
"chars": 1069,
"preview": "{{- if .Values.ingress.enabled -}}\napiVersion: networking.k8s.io/v1\nkind: Ingress\nmetadata:\n name: {{ include \"bytebot-"
},
{
"path": "helm/charts/bytebot-ui/templates/service.yaml",
"chars": 471,
"preview": "apiVersion: v1\nkind: Service\nmetadata:\n name: {{ include \"bytebot-ui.fullname\" . }}\n labels:\n {{- include \"bytebot-"
},
{
"path": "helm/charts/bytebot-ui/values.yaml",
"chars": 962,
"preview": "replicaCount: 1\n\nimage:\n repository: ghcr.io/bytebot-ai/bytebot-ui\n tag: edge\n pullPolicy: IfNotPresent\n\nnameOverride"
},
{
"path": "helm/charts/postgresql/Chart.yaml",
"chars": 271,
"preview": "apiVersion: v2\nname: postgresql\ndescription: A Helm chart for PostgreSQL database\ntype: application\nversion: 0.1.0\nappVe"
},
{
"path": "helm/charts/postgresql/templates/_helpers.tpl",
"chars": 1329,
"preview": "{{/*\nExpand the name of the chart.\n*/}}\n{{- define \"postgresql.name\" -}}\n{{- default .Chart.Name .Values.nameOverride | "
},
{
"path": "helm/charts/postgresql/templates/deployment.yaml",
"chars": 3308,
"preview": "apiVersion: apps/v1\nkind: StatefulSet\nmetadata:\n name: {{ include \"postgresql.fullname\" . }}\n labels:\n {{- include "
},
{
"path": "helm/charts/postgresql/templates/secret.yaml",
"chars": 238,
"preview": "apiVersion: v1\nkind: Secret\nmetadata:\n name: {{ include \"postgresql.fullname\" . }}-secret\n labels:\n {{- include \"po"
},
{
"path": "helm/charts/postgresql/templates/service.yaml",
"chars": 649,
"preview": "apiVersion: v1\nkind: Service\nmetadata:\n name: {{ include \"postgresql.fullname\" . }}\n labels:\n {{- include \"postgres"
},
{
"path": "helm/charts/postgresql/values.yaml",
"chars": 747,
"preview": "replicaCount: 1\n\nimage:\n repository: postgres\n tag: 16-alpine\n pullPolicy: IfNotPresent\n\nnameOverride: \"\"\nfullnameOve"
},
{
"path": "helm/templates/NOTES.txt",
"chars": 520,
"preview": "Thank you for installing {{ .Chart.Name }}!\n\nTo access Bytebot:\n\n1. Port-forward the UI service:\n kubectl port-forward"
},
{
"path": "helm/templates/ingress.yaml",
"chars": 1238,
"preview": "{{- if .Values.ingress.enabled -}}\napiVersion: networking.k8s.io/v1\nkind: Ingress\nmetadata:\n name: {{ .Release.Name }}-"
},
{
"path": "helm/values-proxy.yaml",
"chars": 1097,
"preview": "# Proxy configuration for Bytebot with LiteLLM\n# This values file enables the LiteLLM proxy and configures the agent to "
},
{
"path": "helm/values-simple.yaml",
"chars": 430,
"preview": "# Simple Bytebot configuration example\n# Copy this file and add your API key(s)\n\nbytebot-agent:\n # Configure at least o"
},
{
"path": "helm/values.yaml",
"chars": 2516,
"preview": "global:\n storageClass: \"\"\n \npostgresql:\n enabled: true\n fullnameOverride: \"bytebot-postgresql\"\n auth:\n username:"
},
{
"path": "packages/bytebot-agent/.dockerignore",
"chars": 125,
"preview": "**/node_modules\n**/dist\n**/.git\n**/.vscode\n**/.env*\n**/npm-debug.log\n**/yarn-debug.log\n**/yarn-error.log\n**/package-lock"
},
{
"path": "packages/bytebot-agent/.gitignore",
"chars": 686,
"preview": "# compiled output\n/dist\n/node_modules\n/build\n\n# Logs\nlogs\n*.log\nnpm-debug.log*\npnpm-debug.log*\nyarn-debug.log*\nyarn-erro"
},
{
"path": "packages/bytebot-agent/.prettierrc",
"chars": 51,
"preview": "{\n \"singleQuote\": true,\n \"trailingComma\": \"all\"\n}"
},
{
"path": "packages/bytebot-agent/Dockerfile",
"chars": 298,
"preview": "# Base image\nFROM node:20-alpine\n\n# Create app directory\nWORKDIR /app\n\n# Copy app source\nCOPY ./shared ./shared\nCOPY ./b"
},
{
"path": "packages/bytebot-agent/eslint.config.mjs",
"chars": 856,
"preview": "// @ts-check\nimport eslint from '@eslint/js';\nimport eslintPluginPrettierRecommended from 'eslint-plugin-prettier/recomm"
},
{
"path": "packages/bytebot-agent/nest-cli.json",
"chars": 171,
"preview": "{\n \"$schema\": \"https://json.schemastore.org/nest-cli\",\n \"collection\": \"@nestjs/schematics\",\n \"sourceRoot\": \"src\",\n \""
},
{
"path": "packages/bytebot-agent/package.json",
"chars": 3105,
"preview": "{\n \"name\": \"bytebot-agent\",\n \"version\": \"0.0.1\",\n \"description\": \"\",\n \"author\": \"\",\n \"private\": true,\n \"license\": "
},
{
"path": "packages/bytebot-agent/prisma/migrations/20250328022708_initial_migration/migration.sql",
"chars": 1981,
"preview": "-- CreateEnum\nCREATE TYPE \"TaskStatus\" AS ENUM ('PENDING', 'IN_PROGRESS', 'NEEDS_HELP', 'NEEDS_REVIEW', 'COMPLETED', 'CA"
},
{
"path": "packages/bytebot-agent/prisma/migrations/20250413053912_message_role/migration.sql",
"chars": 361,
"preview": "/*\n Warnings:\n\n - You are about to drop the column `type` on the `Message` table. All the data in the column will be l"
},
{
"path": "packages/bytebot-agent/prisma/migrations/20250522200556_updated_task_structure/migration.sql",
"chars": 2011,
"preview": "\n-- CreateEnum\nCREATE TYPE \"Role\" AS ENUM ('USER', 'ASSISTANT');\n\n-- CreateEnum\nCREATE TYPE \"TaskType\" AS ENUM ('IMMEDIA"
},
{
"path": "packages/bytebot-agent/prisma/migrations/20250523162632_add_scheduling/migration.sql",
"chars": 117,
"preview": "-- AlterTable\nALTER TABLE \"Task\" ADD COLUMN \"queuedAt\" TIMESTAMP(3),\nADD COLUMN \"scheduledFor\" TIMESTAMP(3);\n"
},
{
"path": "packages/bytebot-agent/prisma/migrations/20250529003255_tasks_control/migration.sql",
"chars": 90,
"preview": "-- AlterTable\nALTER TABLE \"Task\" ADD COLUMN \"control\" \"Role\" NOT NULL DEFAULT 'USER';\n"
},
{
"path": "packages/bytebot-agent/prisma/migrations/20250530012753_tasks_control/migration.sql",
"chars": 81,
"preview": "-- AlterTable\nALTER TABLE \"Task\" ALTER COLUMN \"control\" SET DEFAULT 'ASSISTANT';\n"
},
{
"path": "packages/bytebot-agent/prisma/migrations/20250619013027_add_better_auth_schema/migration.sql",
"chars": 2452,
"preview": "-- AlterTable\nALTER TABLE \"Message\" ADD COLUMN \"userId\" TEXT;\n\n-- CreateTable\nCREATE TABLE \"User\" (\n \"id\" TEXT NO"
},
{
"path": "packages/bytebot-agent/prisma/migrations/20250622195148_add_user_to_task/migration.sql",
"chars": 219,
"preview": "-- AlterTable\nALTER TABLE \"Task\" ADD COLUMN \"userId\" TEXT;\n\n-- AddForeignKey\nALTER TABLE \"Task\" ADD CONSTRAINT \"Task"
},
{
"path": "packages/bytebot-agent/prisma/migrations/20250706223912_model_picker/migration.sql",
"chars": 460,
"preview": "-- AlterTable: add `model` column as JSONB (nullable initially)\nALTER TABLE \"Task\" ADD COLUMN \"model\" JSONB;\n\n-- Backfil"
},
{
"path": "packages/bytebot-agent/prisma/migrations/20250722041608_files/migration.sql",
"chars": 505,
"preview": "-- CreateTable\nCREATE TABLE \"File\" (\n \"id\" TEXT NOT NULL,\n \"name\" TEXT NOT NULL,\n \"type\" TEXT NOT NULL,\n \"si"
},
{
"path": "packages/bytebot-agent/prisma/migrations/20250820172813_remove_auth/migration.sql",
"chars": 1357,
"preview": "/*\n Warnings:\n\n - You are about to drop the column `userId` on the `Message` table. All the data in the column will be"
},
{
"path": "packages/bytebot-agent/prisma/migrations/migration_lock.toml",
"chars": 128,
"preview": "# Please do not edit this file manually\n# It should be added in your version-control system (e.g., Git)\nprovider = \"post"
},
{
"path": "packages/bytebot-agent/prisma/schema.prisma",
"chars": 3113,
"preview": "// This is your Prisma schema file,\n// learn more about it in the docs: https://pris.ly/d/prisma-schema\n\n// Looking for "
},
{
"path": "packages/bytebot-agent/src/agent/agent.analytics.ts",
"chars": 1492,
"preview": "import { Injectable, Logger } from '@nestjs/common';\nimport { OnEvent } from '@nestjs/event-emitter';\nimport { ConfigSer"
},
{
"path": "packages/bytebot-agent/src/agent/agent.computer-use.ts",
"chars": 16687,
"preview": "import {\n Button,\n Coordinates,\n Press,\n ComputerToolUseContentBlock,\n ToolResultContentBlock,\n MessageContentType"
},
{
"path": "packages/bytebot-agent/src/agent/agent.constants.ts",
"chars": 9908,
"preview": "export const DEFAULT_DISPLAY_SIZE = {\n width: 1280,\n height: 960,\n};\n\nexport const SUMMARIZATION_SYSTEM_PROMPT = `You "
},
{
"path": "packages/bytebot-agent/src/agent/agent.module.ts",
"chars": 1076,
"preview": "import { Module } from '@nestjs/common';\nimport { TasksModule } from '../tasks/tasks.module';\nimport { MessagesModule } "
},
{
"path": "packages/bytebot-agent/src/agent/agent.processor.ts",
"chars": 13031,
"preview": "import { TasksService } from '../tasks/tasks.service';\nimport { MessagesService } from '../messages/messages.service';\ni"
},
{
"path": "packages/bytebot-agent/src/agent/agent.scheduler.ts",
"chars": 2049,
"preview": "import { Injectable, Logger, OnModuleInit } from '@nestjs/common';\nimport { Cron, CronExpression } from '@nestjs/schedul"
},
{
"path": "packages/bytebot-agent/src/agent/agent.tools.ts",
"chars": 10104,
"preview": "/**\n * Common schema definitions for reuse\n */\nconst coordinateSchema = {\n type: 'object' as const,\n properties: {\n "
},
{
"path": "packages/bytebot-agent/src/agent/agent.types.ts",
"chars": 802,
"preview": "import { Message } from '@prisma/client';\nimport { MessageContentBlock } from '@bytebot/shared';\n\nexport interface Byteb"
},
{
"path": "packages/bytebot-agent/src/agent/input-capture.service.ts",
"chars": 4908,
"preview": "import { Injectable, Logger } from '@nestjs/common';\nimport { io, Socket } from 'socket.io-client';\nimport { randomUUID "
},
{
"path": "packages/bytebot-agent/src/anthropic/anthropic.constants.ts",
"chars": 430,
"preview": "import { BytebotAgentModel } from '../agent/agent.types';\n\nexport const ANTHROPIC_MODELS: BytebotAgentModel[] = [\n {\n "
},
{
"path": "packages/bytebot-agent/src/anthropic/anthropic.module.ts",
"chars": 281,
"preview": "import { Module } from '@nestjs/common';\nimport { ConfigModule } from '@nestjs/config';\nimport { AnthropicService } from"
},
{
"path": "packages/bytebot-agent/src/anthropic/anthropic.service.ts",
"chars": 5644,
"preview": "import { Injectable, Logger } from '@nestjs/common';\nimport { ConfigService } from '@nestjs/config';\nimport Anthropic, {"
},
{
"path": "packages/bytebot-agent/src/anthropic/anthropic.tools.ts",
"chars": 1841,
"preview": "import Anthropic from '@anthropic-ai/sdk';\nimport { agentTools } from '../agent/agent.tools';\n\n/**\n * Converts an agent "
},
{
"path": "packages/bytebot-agent/src/app.controller.ts",
"chars": 274,
"preview": "import { Controller, Get } from '@nestjs/common';\nimport { AppService } from './app.service';\n\n@Controller()\nexport clas"
},
{
"path": "packages/bytebot-agent/src/app.module.ts",
"chars": 1207,
"preview": "import { Module } from '@nestjs/common';\nimport { AppController } from './app.controller';\nimport { AppService } from '."
},
{
"path": "packages/bytebot-agent/src/app.service.ts",
"chars": 142,
"preview": "import { Injectable } from '@nestjs/common';\n\n@Injectable()\nexport class AppService {\n getHello(): string {\n return "
},
{
"path": "packages/bytebot-agent/src/google/google.constants.ts",
"chars": 402,
"preview": "import { BytebotAgentModel } from '../agent/agent.types';\n\nexport const GOOGLE_MODELS: BytebotAgentModel[] = [\n {\n p"
},
{
"path": "packages/bytebot-agent/src/google/google.module.ts",
"chars": 266,
"preview": "import { Module } from '@nestjs/common';\nimport { ConfigModule } from '@nestjs/config';\nimport { GoogleService } from '."
},
{
"path": "packages/bytebot-agent/src/google/google.service.ts",
"chars": 8754,
"preview": "import { Injectable, Logger } from '@nestjs/common';\nimport { ConfigService } from '@nestjs/config';\nimport {\n isComput"
},
{
"path": "packages/bytebot-agent/src/google/google.tools.ts",
"chars": 3484,
"preview": "import { FunctionDeclaration, Type } from '@google/genai';\nimport { agentTools } from '../agent/agent.tools';\n\n/**\n * Co"
},
{
"path": "packages/bytebot-agent/src/main.ts",
"chars": 895,
"preview": "import { NestFactory } from '@nestjs/core';\nimport { AppModule } from './app.module';\nimport { webcrypto } from 'crypto'"
},
{
"path": "packages/bytebot-agent/src/messages/messages.module.ts",
"chars": 381,
"preview": "import { Module, forwardRef } from '@nestjs/common';\nimport { MessagesService } from './messages.service';\nimport { Pris"
},
{
"path": "packages/bytebot-agent/src/messages/messages.service.ts",
"chars": 6077,
"preview": "import {\n Injectable,\n NotFoundException,\n Inject,\n forwardRef,\n} from '@nestjs/common';\nimport { PrismaService } fr"
},
{
"path": "packages/bytebot-agent/src/openai/openai.constants.ts",
"chars": 382,
"preview": "import { BytebotAgentModel } from 'src/agent/agent.types';\n\nexport const OPENAI_MODELS: BytebotAgentModel[] = [\n {\n "
},
{
"path": "packages/bytebot-agent/src/openai/openai.module.ts",
"chars": 266,
"preview": "import { Module } from '@nestjs/common';\nimport { ConfigModule } from '@nestjs/config';\nimport { OpenAIService } from '."
},
{
"path": "packages/bytebot-agent/src/openai/openai.service.ts",
"chars": 10787,
"preview": "import { Injectable, Logger } from '@nestjs/common';\nimport { ConfigService } from '@nestjs/config';\nimport OpenAI, { AP"
},
{
"path": "packages/bytebot-agent/src/openai/openai.tools.ts",
"chars": 1933,
"preview": "import OpenAI from 'openai';\nimport { agentTools } from '../agent/agent.tools';\n\nfunction agentToolToOpenAITool(agentToo"
},
{
"path": "packages/bytebot-agent/src/prisma/prisma.module.ts",
"chars": 211,
"preview": "import { Global, Module } from '@nestjs/common';\n\nimport { PrismaService } from './prisma.service';\n\n@Global()\n@Module({"
},
{
"path": "packages/bytebot-agent/src/prisma/prisma.service.ts",
"chars": 289,
"preview": "import { Injectable, OnModuleInit } from '@nestjs/common';\nimport { PrismaClient } from '@prisma/client';\n\n@Injectable()"
},
{
"path": "packages/bytebot-agent/src/proxy/proxy.module.ts",
"chars": 260,
"preview": "import { Module } from '@nestjs/common';\nimport { ConfigModule } from '@nestjs/config';\nimport { ProxyService } from './"
},
{
"path": "packages/bytebot-agent/src/proxy/proxy.service.ts",
"chars": 9840,
"preview": "import { Injectable, Logger } from '@nestjs/common';\nimport { ConfigService } from '@nestjs/config';\nimport OpenAI, { AP"
},
{
"path": "packages/bytebot-agent/src/proxy/proxy.tools.ts",
"chars": 2221,
"preview": "import { ChatCompletionTool } from 'openai/resources';\nimport { agentTools } from '../agent/agent.tools';\n\n/**\n * Conver"
},
{
"path": "packages/bytebot-agent/src/summaries/summaries.modue.ts",
"chars": 290,
"preview": "import { Module } from '@nestjs/common';\nimport { PrismaModule } from '../prisma/prisma.module';\nimport { SummariesServi"
},
{
"path": "packages/bytebot-agent/src/summaries/summaries.service.ts",
"chars": 905,
"preview": "import { Injectable } from '@nestjs/common';\nimport { PrismaService } from '../prisma/prisma.service';\nimport { Summary "
},
{
"path": "packages/bytebot-agent/src/tasks/dto/add-task-message.dto.ts",
"chars": 141,
"preview": "import { IsNotEmpty, IsString } from 'class-validator';\n\nexport class AddTaskMessageDto {\n @IsNotEmpty()\n @IsString()\n"
},
{
"path": "packages/bytebot-agent/src/tasks/dto/create-task.dto.ts",
"chars": 894,
"preview": "import {\n IsArray,\n IsDate,\n IsNotEmpty,\n IsNumber,\n IsOptional,\n IsString,\n ValidateNested,\n} from 'class-valida"
},
{
"path": "packages/bytebot-agent/src/tasks/dto/update-task.dto.ts",
"chars": 387,
"preview": "import { IsEnum, IsOptional } from 'class-validator';\nimport { TaskPriority, TaskStatus } from '@prisma/client';\n\nexport"
},
{
"path": "packages/bytebot-agent/src/tasks/tasks.controller.ts",
"chars": 5361,
"preview": "import {\n Controller,\n Get,\n Post,\n Body,\n Param,\n Delete,\n HttpStatus,\n HttpCode,\n Query,\n HttpException,\n} f"
},
{
"path": "packages/bytebot-agent/src/tasks/tasks.gateway.ts",
"chars": 1419,
"preview": "import {\n WebSocketGateway,\n WebSocketServer,\n SubscribeMessage,\n OnGatewayConnection,\n OnGatewayDisconnect,\n} from"
},
{
"path": "packages/bytebot-agent/src/tasks/tasks.module.ts",
"chars": 512,
"preview": "import { Module } from '@nestjs/common';\nimport { TasksController } from './tasks.controller';\nimport { TasksService } f"
},
{
"path": "packages/bytebot-agent/src/tasks/tasks.service.ts",
"chars": 11072,
"preview": "import {\n Injectable,\n NotFoundException,\n Logger,\n BadRequestException,\n Inject,\n forwardRef,\n} from '@nestjs/com"
},
{
"path": "packages/bytebot-agent/tsconfig.build.json",
"chars": 97,
"preview": "{\n \"extends\": \"./tsconfig.json\",\n \"exclude\": [\"node_modules\", \"test\", \"dist\", \"**/*spec.ts\"]\n}\n"
},
{
"path": "packages/bytebot-agent/tsconfig.json",
"chars": 544,
"preview": "{\n \"compilerOptions\": {\n \"module\": \"commonjs\",\n \"declaration\": true,\n \"removeComments\": true,\n \"emitDecorat"
},
{
"path": "packages/bytebot-agent-cc/.dockerignore",
"chars": 125,
"preview": "**/node_modules\n**/dist\n**/.git\n**/.vscode\n**/.env*\n**/npm-debug.log\n**/yarn-debug.log\n**/yarn-error.log\n**/package-lock"
},
{
"path": "packages/bytebot-agent-cc/.gitignore",
"chars": 686,
"preview": "# compiled output\n/dist\n/node_modules\n/build\n\n# Logs\nlogs\n*.log\nnpm-debug.log*\npnpm-debug.log*\nyarn-debug.log*\nyarn-erro"
},
{
"path": "packages/bytebot-agent-cc/.prettierrc",
"chars": 51,
"preview": "{\n \"singleQuote\": true,\n \"trailingComma\": \"all\"\n}"
},
{
"path": "packages/bytebot-agent-cc/Dockerfile",
"chars": 307,
"preview": "# Base image\nFROM node:20-alpine\n\n# Create app directory\nWORKDIR /app\n\n# Copy app source\nCOPY ./shared ./shared\nCOPY ./b"
},
{
"path": "packages/bytebot-agent-cc/eslint.config.mjs",
"chars": 856,
"preview": "// @ts-check\nimport eslint from '@eslint/js';\nimport eslintPluginPrettierRecommended from 'eslint-plugin-prettier/recomm"
},
{
"path": "packages/bytebot-agent-cc/nest-cli.json",
"chars": 171,
"preview": "{\n \"$schema\": \"https://json.schemastore.org/nest-cli\",\n \"collection\": \"@nestjs/schematics\",\n \"sourceRoot\": \"src\",\n \""
},
{
"path": "packages/bytebot-agent-cc/package.json",
"chars": 3150,
"preview": "{\n \"name\": \"bytebot-agent\",\n \"version\": \"0.0.1\",\n \"description\": \"\",\n \"author\": \"\",\n \"private\": true,\n \"license\": "
},
{
"path": "packages/bytebot-agent-cc/prisma/migrations/20250328022708_initial_migration/migration.sql",
"chars": 1981,
"preview": "-- CreateEnum\nCREATE TYPE \"TaskStatus\" AS ENUM ('PENDING', 'IN_PROGRESS', 'NEEDS_HELP', 'NEEDS_REVIEW', 'COMPLETED', 'CA"
},
{
"path": "packages/bytebot-agent-cc/prisma/migrations/20250413053912_message_role/migration.sql",
"chars": 361,
"preview": "/*\n Warnings:\n\n - You are about to drop the column `type` on the `Message` table. All the data in the column will be l"
},
{
"path": "packages/bytebot-agent-cc/prisma/migrations/20250522200556_updated_task_structure/migration.sql",
"chars": 2011,
"preview": "\n-- CreateEnum\nCREATE TYPE \"Role\" AS ENUM ('USER', 'ASSISTANT');\n\n-- CreateEnum\nCREATE TYPE \"TaskType\" AS ENUM ('IMMEDIA"
},
{
"path": "packages/bytebot-agent-cc/prisma/migrations/20250523162632_add_scheduling/migration.sql",
"chars": 117,
"preview": "-- AlterTable\nALTER TABLE \"Task\" ADD COLUMN \"queuedAt\" TIMESTAMP(3),\nADD COLUMN \"scheduledFor\" TIMESTAMP(3);\n"
},
{
"path": "packages/bytebot-agent-cc/prisma/migrations/20250529003255_tasks_control/migration.sql",
"chars": 90,
"preview": "-- AlterTable\nALTER TABLE \"Task\" ADD COLUMN \"control\" \"Role\" NOT NULL DEFAULT 'USER';\n"
},
{
"path": "packages/bytebot-agent-cc/prisma/migrations/20250530012753_tasks_control/migration.sql",
"chars": 81,
"preview": "-- AlterTable\nALTER TABLE \"Task\" ALTER COLUMN \"control\" SET DEFAULT 'ASSISTANT';\n"
},
{
"path": "packages/bytebot-agent-cc/prisma/migrations/20250619013027_add_better_auth_schema/migration.sql",
"chars": 2452,
"preview": "-- AlterTable\nALTER TABLE \"Message\" ADD COLUMN \"userId\" TEXT;\n\n-- CreateTable\nCREATE TABLE \"User\" (\n \"id\" TEXT NO"
},
{
"path": "packages/bytebot-agent-cc/prisma/migrations/20250622195148_add_user_to_task/migration.sql",
"chars": 219,
"preview": "-- AlterTable\nALTER TABLE \"Task\" ADD COLUMN \"userId\" TEXT;\n\n-- AddForeignKey\nALTER TABLE \"Task\" ADD CONSTRAINT \"Task"
},
{
"path": "packages/bytebot-agent-cc/prisma/migrations/20250706223912_model_picker/migration.sql",
"chars": 460,
"preview": "-- AlterTable: add `model` column as JSONB (nullable initially)\nALTER TABLE \"Task\" ADD COLUMN \"model\" JSONB;\n\n-- Backfil"
},
{
"path": "packages/bytebot-agent-cc/prisma/migrations/20250722041608_files/migration.sql",
"chars": 505,
"preview": "-- CreateTable\nCREATE TABLE \"File\" (\n \"id\" TEXT NOT NULL,\n \"name\" TEXT NOT NULL,\n \"type\" TEXT NOT NULL,\n \"si"
},
{
"path": "packages/bytebot-agent-cc/prisma/migrations/20250820172813_remove_auth/migration.sql",
"chars": 1357,
"preview": "/*\n Warnings:\n\n - You are about to drop the column `userId` on the `Message` table. All the data in the column will be"
},
{
"path": "packages/bytebot-agent-cc/prisma/migrations/migration_lock.toml",
"chars": 128,
"preview": "# Please do not edit this file manually\n# It should be added in your version-control system (e.g., Git)\nprovider = \"post"
},
{
"path": "packages/bytebot-agent-cc/prisma/schema.prisma",
"chars": 3113,
"preview": "// This is your Prisma schema file,\n// learn more about it in the docs: https://pris.ly/d/prisma-schema\n\n// Looking for "
},
{
"path": "packages/bytebot-agent-cc/src/agent/agent.analytics.ts",
"chars": 1492,
"preview": "import { Injectable, Logger } from '@nestjs/common';\nimport { OnEvent } from '@nestjs/event-emitter';\nimport { ConfigSer"
},
{
"path": "packages/bytebot-agent-cc/src/agent/agent.computer-use.ts",
"chars": 16446,
"preview": "import {\n Button,\n Coordinates,\n Press,\n ComputerToolUseContentBlock,\n ToolResultContentBlock,\n MessageContentType"
},
{
"path": "packages/bytebot-agent-cc/src/agent/agent.constants.ts",
"chars": 9093,
"preview": "export const DEFAULT_DISPLAY_SIZE = {\n width: 1280,\n height: 960,\n};\n\nexport const SUMMARIZATION_SYSTEM_PROMPT = `You "
},
{
"path": "packages/bytebot-agent-cc/src/agent/agent.module.ts",
"chars": 668,
"preview": "import { Module } from '@nestjs/common';\nimport { TasksModule } from '../tasks/tasks.module';\nimport { MessagesModule } "
},
{
"path": "packages/bytebot-agent-cc/src/agent/agent.processor.ts",
"chars": 8570,
"preview": "import { TasksService } from '../tasks/tasks.service';\nimport { MessagesService } from '../messages/messages.service';\ni"
},
{
"path": "packages/bytebot-agent-cc/src/agent/agent.scheduler.ts",
"chars": 2049,
"preview": "import { Injectable, Logger, OnModuleInit } from '@nestjs/common';\nimport { Cron, CronExpression } from '@nestjs/schedul"
},
{
"path": "packages/bytebot-agent-cc/src/agent/agent.tools.ts",
"chars": 10104,
"preview": "/**\n * Common schema definitions for reuse\n */\nconst coordinateSchema = {\n type: 'object' as const,\n properties: {\n "
},
{
"path": "packages/bytebot-agent-cc/src/agent/agent.types.ts",
"chars": 802,
"preview": "import { Message } from '@prisma/client';\nimport { MessageContentBlock } from '@bytebot/shared';\n\nexport interface Byteb"
},
{
"path": "packages/bytebot-agent-cc/src/agent/input-capture.service.ts",
"chars": 4908,
"preview": "import { Injectable, Logger } from '@nestjs/common';\nimport { io, Socket } from 'socket.io-client';\nimport { randomUUID "
},
{
"path": "packages/bytebot-agent-cc/src/app.controller.ts",
"chars": 274,
"preview": "import { Controller, Get } from '@nestjs/common';\nimport { AppService } from './app.service';\n\n@Controller()\nexport clas"
},
{
"path": "packages/bytebot-agent-cc/src/app.module.ts",
"chars": 823,
"preview": "import { Module } from '@nestjs/common';\nimport { AppController } from './app.controller';\nimport { AppService } from '."
},
{
"path": "packages/bytebot-agent-cc/src/app.service.ts",
"chars": 142,
"preview": "import { Injectable } from '@nestjs/common';\n\n@Injectable()\nexport class AppService {\n getHello(): string {\n return "
},
{
"path": "packages/bytebot-agent-cc/src/main.ts",
"chars": 968,
"preview": "import { NestFactory } from '@nestjs/core';\nimport { AppModule } from './app.module';\nimport { webcrypto } from 'crypto'"
},
{
"path": "packages/bytebot-agent-cc/src/messages/messages.module.ts",
"chars": 381,
"preview": "import { Module, forwardRef } from '@nestjs/common';\nimport { MessagesService } from './messages.service';\nimport { Pris"
},
{
"path": "packages/bytebot-agent-cc/src/messages/messages.service.ts",
"chars": 6077,
"preview": "import {\n Injectable,\n NotFoundException,\n Inject,\n forwardRef,\n} from '@nestjs/common';\nimport { PrismaService } fr"
},
{
"path": "packages/bytebot-agent-cc/src/prisma/prisma.module.ts",
"chars": 211,
"preview": "import { Global, Module } from '@nestjs/common';\n\nimport { PrismaService } from './prisma.service';\n\n@Global()\n@Module({"
},
{
"path": "packages/bytebot-agent-cc/src/prisma/prisma.service.ts",
"chars": 289,
"preview": "import { Injectable, OnModuleInit } from '@nestjs/common';\nimport { PrismaClient } from '@prisma/client';\n\n@Injectable()"
},
{
"path": "packages/bytebot-agent-cc/src/tasks/dto/add-task-message.dto.ts",
"chars": 141,
"preview": "import { IsNotEmpty, IsString } from 'class-validator';\n\nexport class AddTaskMessageDto {\n @IsNotEmpty()\n @IsString()\n"
},
{
"path": "packages/bytebot-agent-cc/src/tasks/dto/create-task.dto.ts",
"chars": 894,
"preview": "import {\n IsArray,\n IsDate,\n IsNotEmpty,\n IsNumber,\n IsOptional,\n IsString,\n ValidateNested,\n} from 'class-valida"
},
{
"path": "packages/bytebot-agent-cc/src/tasks/dto/update-task.dto.ts",
"chars": 387,
"preview": "import { IsEnum, IsOptional } from 'class-validator';\nimport { TaskPriority, TaskStatus } from '@prisma/client';\n\nexport"
},
{
"path": "packages/bytebot-agent-cc/src/tasks/tasks.controller.ts",
"chars": 3860,
"preview": "import {\n Controller,\n Get,\n Post,\n Body,\n Param,\n Delete,\n HttpStatus,\n HttpCode,\n Query,\n} from '@nestjs/comm"
},
{
"path": "packages/bytebot-agent-cc/src/tasks/tasks.gateway.ts",
"chars": 1419,
"preview": "import {\n WebSocketGateway,\n WebSocketServer,\n SubscribeMessage,\n OnGatewayConnection,\n OnGatewayDisconnect,\n} from"
},
{
"path": "packages/bytebot-agent-cc/src/tasks/tasks.module.ts",
"chars": 512,
"preview": "import { Module } from '@nestjs/common';\nimport { TasksController } from './tasks.controller';\nimport { TasksService } f"
},
{
"path": "packages/bytebot-agent-cc/src/tasks/tasks.service.ts",
"chars": 11072,
"preview": "import {\n Injectable,\n NotFoundException,\n Logger,\n BadRequestException,\n Inject,\n forwardRef,\n} from '@nestjs/com"
},
{
"path": "packages/bytebot-agent-cc/tsconfig.build.json",
"chars": 97,
"preview": "{\n \"extends\": \"./tsconfig.json\",\n \"exclude\": [\"node_modules\", \"test\", \"dist\", \"**/*spec.ts\"]\n}\n"
},
{
"path": "packages/bytebot-agent-cc/tsconfig.json",
"chars": 544,
"preview": "{\n \"compilerOptions\": {\n \"module\": \"commonjs\",\n \"declaration\": true,\n \"removeComments\": true,\n \"emitDecorat"
},
{
"path": "packages/bytebot-llm-proxy/Dockerfile",
"chars": 194,
"preview": "FROM ghcr.io/berriai/litellm:main-stable\n\n# Add custom config into the image\nCOPY ./bytebot-llm-proxy/litellm-config.yam"
},
{
"path": "packages/bytebot-llm-proxy/litellm-config.yaml",
"chars": 837,
"preview": "model_list:\n # Anthropic Models\n - model_name: claude-opus-4\n litellm_params:\n model: anthropic/claude-opus-4-"
},
{
"path": "packages/bytebot-ui/.dockerignore",
"chars": 134,
"preview": "**/node_modules\n**/dist\n**/.next\n**/.git\n**/.vscode\n**/.env*\n**/npm-debug.log\n**/yarn-debug.log\n**/yarn-error.log\n**/pac"
},
{
"path": "packages/bytebot-ui/.gitignore",
"chars": 479,
"preview": "# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.\n\n# dependencies\n/node_modules\n/.pn"
},
{
"path": "packages/bytebot-ui/.prettierrc.json",
"chars": 49,
"preview": "{\n \"plugins\": [\"prettier-plugin-tailwindcss\"]\n}\n"
},
{
"path": "packages/bytebot-ui/Dockerfile",
"chars": 523,
"preview": "# Base image\nFROM node:20-alpine\n\n# Declare build arguments\nARG BYTEBOT_AGENT_BASE_URL\nARG BYTEBOT_DESKTOP_VNC_URL\n\n# Se"
},
{
"path": "packages/bytebot-ui/components.json",
"chars": 428,
"preview": "{\n \"$schema\": \"https://ui.shadcn.com/schema.json\",\n \"style\": \"new-york\",\n \"rsc\": true,\n \"tsx\": true,\n \"tailwind\": {"
},
{
"path": "packages/bytebot-ui/eslint.config.mjs",
"chars": 393,
"preview": "import { dirname } from \"path\";\nimport { fileURLToPath } from \"url\";\nimport { FlatCompat } from \"@eslint/eslintrc\";\n\ncon"
}
]
// ... and 116 more files (download for full content)
About this extraction
This page contains the full source code of the bytebot-ai/bytebot GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 316 files (857.6 KB), approximately 229.5k tokens, and a symbol index with 648 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.