Showing preview only (590K chars total). Download the full file or copy to clipboard to get everything.
Repository: camel-ai/crab
Branch: main
Commit: a8b6d7272385
Files: 230
Total size: 526.1 KB
Directory structure:
gitextract_jurvigyb/
├── .github/
│ ├── ISSUE_TEMPLATE/
│ │ ├── bug_report.yml
│ │ ├── feature_request.yml
│ │ └── questions.yml
│ ├── actions/
│ │ └── crab_install/
│ │ └── action.yml
│ └── workflows/
│ ├── documentation.yml
│ ├── publish_release.yml
│ └── pytest_package.yml
├── .gitignore
├── .pre-commit-config.yaml
├── README.md
├── crab/
│ ├── __init__.py
│ ├── actions/
│ │ ├── android_actions.py
│ │ ├── crab_actions.py
│ │ ├── desktop_actions.py
│ │ ├── file_actions.py
│ │ ├── system_actions.py
│ │ └── visual_prompt_actions.py
│ ├── agents/
│ │ ├── backend_models/
│ │ │ ├── __init__.py
│ │ │ ├── camel_model.py
│ │ │ ├── claude_model.py
│ │ │ ├── gemini_model.py
│ │ │ └── openai_model.py
│ │ ├── policies/
│ │ │ ├── __init__.py
│ │ │ ├── multi_agent_by_env.py
│ │ │ ├── multi_agent_by_func.py
│ │ │ └── single_agent.py
│ │ └── utils.py
│ ├── benchmarks/
│ │ ├── __init__.py
│ │ └── template.py
│ ├── core/
│ │ ├── __init__.py
│ │ ├── agent_policy.py
│ │ ├── backend_model.py
│ │ ├── benchmark.py
│ │ ├── csv_log.py
│ │ ├── decorators.py
│ │ ├── environment.py
│ │ ├── exceptions.py
│ │ ├── experiment.py
│ │ ├── graph_evaluator.py
│ │ ├── models/
│ │ │ ├── __init__.py
│ │ │ ├── action.py
│ │ │ ├── agent_interface.py
│ │ │ ├── benchmark_interface.py
│ │ │ ├── config.py
│ │ │ ├── evaluator.py
│ │ │ └── task.py
│ │ └── task_generator.py
│ ├── environments/
│ │ ├── __init__.py
│ │ └── template.py
│ ├── server/
│ │ ├── __init__.py
│ │ ├── api.py
│ │ ├── config.py
│ │ ├── exception_handlers.py
│ │ ├── logger.py
│ │ ├── main.py
│ │ ├── middleware.py
│ │ └── utils.py
│ └── utils/
│ ├── __init__.py
│ ├── common.py
│ ├── encryption.py
│ └── measure.py
├── crab-benchmark-v0/
│ ├── README.md
│ ├── __init__.py
│ ├── android_env.py
│ ├── dataset/
│ │ ├── android/
│ │ │ ├── 1005c437-50d1-465a-b3fc-833098b22bfc.json
│ │ │ ├── 12333aa0-e76d-4a5c-8657-9f897f62f62d.json
│ │ │ ├── 22b04776-8eec-4303-b3f6-9c981f7f29b8.json
│ │ │ ├── 2ade6a13-c7a6-4df7-8c62-77382687369e.json
│ │ │ ├── 346caf7c-dc74-4c38-962a-aaffb638e0c7.json
│ │ │ ├── 379b9c58-5125-41b3-9cc6-ea925c8b094d.json
│ │ │ ├── 4190c90c-b28c-4bb3-ab5c-af3c4fde0a3d.json
│ │ │ ├── 46d7ccdb-d2e4-4b8a-bead-f2641b5ac23c.json
│ │ │ ├── 483fbf9c-dc78-4ac2-9264-53c4f617f6cc.json
│ │ │ ├── 4893a9b0-6477-495d-a73c-32503326e24a.json
│ │ │ ├── 53010c40-dce4-4d72-a856-842c21059e2b.json
│ │ │ ├── 6d9f6395-de79-4ad0-8a2a-2d674f93f293.json
│ │ │ ├── 71ef7fd2-0ae3-49c8-8238-06b7aa985d25.json
│ │ │ ├── 73f78fc3-1ca5-442d-801f-bc175a0bfb89.json
│ │ │ ├── 764838cc-9359-4130-9bb2-4a75900b2d89.json
│ │ │ ├── 77289141-e52b-48c8-b3a7-1b29520f3e1e.json
│ │ │ ├── 7891ceab-7965-4ddb-a0fc-15740c9a4e44.json
│ │ │ ├── 8bd51440-f959-4edc-baa5-cd03d32a5b0f.json
│ │ │ ├── 94b1836b-3111-40ad-8d07-b8a57efe7438.json
│ │ │ ├── a225f7f8-6d03-4619-b57d-7a08610030d8.json
│ │ │ ├── b077299d-1acb-40f5-89f3-cc08044345bf.json
│ │ │ ├── b3965b07-4683-4445-9de1-a1dedf6c73ad.json
│ │ │ ├── c1b1cfeb-40e7-49a8-a3f5-b8c8ba723601.json
│ │ │ ├── c85f03c9-83c4-417b-93d9-0d7b41022525.json
│ │ │ ├── cf4c496b-fbbd-4701-91ea-4590fe6a66e1.json
│ │ │ ├── d0811e47-d75f-40ce-b34b-e1ee3c8bed3f.json
│ │ │ ├── d2d456bb-c7d1-46af-8263-78d8509fb320.json
│ │ │ ├── d4e0f2b3-d0ff-4efd-856f-9f5e598cfd05.json
│ │ │ ├── d7489d00-0046-4fb1-af5b-1fde7d87312c.json
│ │ │ ├── d92f6c33-e0a7-4101-957d-e7dd218d2565.json
│ │ │ ├── de843952-df8f-4a26-bae9-d0a32ed9a7f5.json
│ │ │ ├── e20fd121-b981-42da-94de-efcd66889c11.json
│ │ │ ├── e55d7a39-7b6b-4852-8711-844cebc88cb8.json
│ │ │ ├── e9268070-91b7-4e8c-9976-1cf8126ba13b.json
│ │ │ ├── fbe6a1b1-63bb-4d4e-8a53-ff4f7839ef61.json
│ │ │ └── fc642cb6-5321-4966-afbf-fb3348bb69ee.json
│ │ ├── android_subtasks.py
│ │ ├── cross/
│ │ │ ├── 05a7633d-b966-471c-8848-e18e69ad265f.json
│ │ │ ├── 1e92db38-501e-429b-ac31-453d1af10a25.json
│ │ │ ├── 43be6e8e-034d-4277-8346-c4ae7553bf68.json
│ │ │ ├── 534be964-269a-4509-b2b8-28cc3ba8dfca.json
│ │ │ ├── 6f95cfa1-e7ae-4a82-912b-0180fc9622f2.json
│ │ │ ├── 760ed27e-b1bd-451f-8659-bdb9845fcb7f.json
│ │ │ ├── 82596760-7d4d-457d-9ca9-9551ab85ec58.json
│ │ │ ├── a956a091-8de4-42ee-b152-913308dfc24b.json
│ │ │ ├── c5929ef3-ac27-4288-b02f-4f261d5871f9.json
│ │ │ └── da5911e3-1a99-4735-ba3e-f08c5ca81fdd.json
│ │ ├── handmade_tasks.py
│ │ ├── ubuntu/
│ │ │ ├── 05d0e137-7d97-4021-9477-6490a2154c81.json
│ │ │ ├── 0a893c2e-eec5-47cc-a930-eb01c5f17683.json
│ │ │ ├── 0d178388-8166-4b66-93c1-278861f9897c.json
│ │ │ ├── 0d7c84d2-bbbd-46ab-80d1-52b3a44f3858.json
│ │ │ ├── 0deafe05-8db5-445f-9031-f6e884569d03.json
│ │ │ ├── 0e80fd90-0b23-454f-a629-7b6d7baa7542.json
│ │ │ ├── 125f7bae-e931-4190-8737-5f1ea7227772.json
│ │ │ ├── 15a150a8-899c-4753-8dc5-05248ccc3640.json
│ │ │ ├── 1ebcd710-f73b-4022-832b-167c0d3f55a2.json
│ │ │ ├── 22787ecc-52b2-4791-aefb-c45800f51414.json
│ │ │ ├── 22f05f6f-6aef-4786-958f-14f559eaf014.json
│ │ │ ├── 28963795-d694-4bb4-adaf-f7708a2c6fe5.json
│ │ │ ├── 299db8f2-81eb-455f-9302-5c8cb30be691.json
│ │ │ ├── 29f099b2-b3a5-463f-b10a-15363bf7e845.json
│ │ │ ├── 355e9660-a355-4b95-8881-ac9da578ea43.json
│ │ │ ├── 35bd7387-4735-4632-8474-e93382004c12.json
│ │ │ ├── 362c5711-3824-42ff-96a0-7801b03b5f1f.json
│ │ │ ├── 4718df9c-97ec-4b54-86ca-bd34e65c5a43.json
│ │ │ ├── 47b75b21-99a2-461c-9d40-6dddc5c206d0.json
│ │ │ ├── 4ae4e35f-d90a-48cc-8fb9-492ac7ae07ee.json
│ │ │ ├── 4bbedade-4d4e-43d5-b650-2702b350ad28.json
│ │ │ ├── 51a288f9-cf2c-4e8e-a98c-596a505af77c.json
│ │ │ ├── 51c91051-3efb-4e92-a967-739b18520714.json
│ │ │ ├── 57b7e8a7-8c17-4cc4-9bb5-4385afde3ad8.json
│ │ │ ├── 58776443-ccf7-4db3-8c60-e188e4b5f90c.json
│ │ │ ├── 5ba74c6a-4513-448b-8b68-ff145ece0652.json
│ │ │ ├── 6428f803-62de-40d2-a345-64e6cf955c9d.json
│ │ │ ├── 64a2c205-c85a-4e56-8edb-5df4f7724441.json
│ │ │ ├── 696ca9bb-89ea-4cd5-b693-f2d749d964b1.json
│ │ │ ├── 6be49e77-e904-4eb0-a36a-7f0fd128ede3.json
│ │ │ ├── 6c3105a2-328c-4190-823d-03d759be0b57.json
│ │ │ ├── 6c560516-ca14-4f97-b51d-16ad81fc29e4.json
│ │ │ ├── 730172f5-894a-4d46-9102-ac7d985a479d.json
│ │ │ ├── 73038efb-ca0f-4d90-a947-fcfd097dd91b.json
│ │ │ ├── 73da97c9-f084-4cab-8697-1151737387ff.json
│ │ │ ├── 77aa4dd3-5a68-4686-9cac-26d0ab77c7b4.json
│ │ │ ├── 78502f1c-879b-4932-a5fd-d85f7f6b0f81.json
│ │ │ ├── 7912f7a5-24b9-4dfe-a7b8-1effc1b7a212.json
│ │ │ ├── 7d5613ec-9b67-4255-b766-d9c6e8466464.json
│ │ │ ├── 7dda7e46-78be-4663-b882-6132dbbff335.json
│ │ │ ├── 7e6c4927-2220-4522-9e3f-36f69adc3e71.json
│ │ │ ├── 82c49e12-3b2f-432e-9069-4b67bafebbf7.json
│ │ │ ├── 87910f23-ab23-4ccc-b115-d71cff6f0162.json
│ │ │ ├── 8afc25eb-7a80-459f-acdc-5c79fc146c29.json
│ │ │ ├── 8cb5ab6d-a56e-43b9-aa83-00a46331e20f.json
│ │ │ ├── 90e09946-7b28-4102-b0ed-f683c01dbbd4.json
│ │ │ ├── 925a3607-2802-48aa-b339-13ebfcef43a2.json
│ │ │ ├── 9506dd30-f58d-4832-b336-8037e83e2689.json
│ │ │ ├── 95e347aa-56ab-4d5d-a94c-350ddfddabf9.json
│ │ │ ├── 98a360d8-0f95-44cd-bb9d-442fca2918d4.json
│ │ │ ├── 9c979fc5-8d60-41f1-a494-904a1d312187.json
│ │ │ ├── 9e08971c-7f83-4853-952e-4c4a4a26333b.json
│ │ │ ├── 9fe4f541-61cf-48e0-a081-4371786659c7.json
│ │ │ ├── a0714ef7-bbdc-4f84-bd2e-c6e611d4db9e.json
│ │ │ ├── a2a34580-cded-4bf8-81d9-b36a4d4402d0.json
│ │ │ ├── a6b67c2d-d448-4e77-904e-dc7c5f21a5fe.json
│ │ │ ├── a70ab903-835f-48b7-8356-2321b8b869d8.json
│ │ │ ├── a78177f5-6cc6-48d7-8c6f-df53399d7759.json
│ │ │ ├── abb16512-27ae-49c0-b12b-7fbf0e95056b.json
│ │ │ ├── b2ca21dc-dde9-49f5-bec7-321fbf769315.json
│ │ │ ├── b57c96c1-071b-40f6-b33b-2a0459fc25bb.json
│ │ │ ├── b73019e0-3ce8-4657-8b13-b3e0ab6cfac8.json
│ │ │ ├── ba5aebcb-999d-44d4-b9bc-241f9884c6dd.json
│ │ │ ├── be6468be-2218-45c1-9b75-b56efec61eb4.json
│ │ │ ├── c4106f9a-9348-4a55-9892-782e6f4b3081.json
│ │ │ ├── c8800e50-3ff4-4dd2-bc90-33688be99659.json
│ │ │ ├── ccf31785-ec13-4981-93c5-ca6c242ac0c3.json
│ │ │ ├── d3478489-70f2-4a82-b7d2-0a47b75986eb.json
│ │ │ ├── d39d40b1-fc26-4169-9d6f-cdf81efe9a3e.json
│ │ │ ├── d3c917ff-406f-447a-87f5-b8d835cba750.json
│ │ │ ├── d6e460e4-c295-40ad-883c-11300d7832f0.json
│ │ │ ├── d9e4e23c-2a2a-4b5c-b034-7deb6036572d.json
│ │ │ ├── e31d4e3b-b753-4deb-b9ad-a0add5d4790e.json
│ │ │ ├── f07a1f32-2f3f-40e7-b12f-8f1b128c41f6.json
│ │ │ ├── f5cce3a0-ba65-4317-95f8-1fc7d9776c78.json
│ │ │ ├── f67a26e4-58dd-4dc6-8859-affbf1d62f94.json
│ │ │ └── f96d7c34-9543-4679-a6ea-89e0c2ef7b1c.json
│ │ └── ubuntu_subtasks.py
│ ├── main.py
│ ├── scripts/
│ │ └── ubuntu_env_init.sh
│ └── ubuntu_env.py
├── docs/
│ ├── Makefile
│ ├── conf.py
│ ├── crab.benchmarks.rst
│ ├── crab.client.rst
│ ├── crab.core.models.rst
│ ├── crab.core.rst
│ ├── crab.environments.rst
│ ├── crab.rst
│ ├── crab.server.controller.rst
│ ├── crab.server.rst
│ ├── crab_benchmark_v0/
│ │ ├── environment_gcp_setup.md
│ │ ├── environment_local_setup.md
│ │ └── get_started.md
│ ├── get_started/
│ │ ├── build_your_own_benchmark.md
│ │ └── quickstart.md
│ ├── index.rst
│ ├── make.bat
│ └── modules.rst
├── examples/
│ ├── multi_env.py
│ └── single_env.py
├── licenses/
│ ├── LICENSE
│ ├── license_template.txt
│ └── update_license.py
├── pyproject.toml
└── test/
├── actions/
│ └── test_visual_prompt_actions.py
├── agents/
│ ├── backend_models/
│ │ ├── test_camel_model.py
│ │ ├── test_claude_model.py
│ │ ├── test_gemini_model.py
│ │ └── test_openai_model.py
│ └── policies/
│ ├── test_multi_agent_by_func.py
│ ├── test_mutli_agent_by_env.py
│ └── test_single_agent.py
├── core/
│ ├── test_action.py
│ ├── test_benchmark.py
│ ├── test_evaluator.py
│ └── test_utils.py
└── server/
└── test_api.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.yml
================================================
name: 🐛 Bug Report
description: File an issue about a bug.
title: "[BUG] "
labels: [bug]
assignees: [dandansamax]
body:
- type: markdown
attributes:
value: |
Please do your best to make the issue as easy to act on as possible, and only submit here if there is clearly a problem with camel (ask in [Discussions](https://github.com/camel-ai/camel/discussions) first if unsure).
- type: input
id: version
attributes:
label: What version of camel are you using?
description: Run command `python3 -c 'print(__import__("camel").__version__)'` in your shell and paste the output here.
placeholder: E.g., 0.1.0
validations:
required: true
- type: textarea
id: system-info
attributes:
label: System information
description: |
Describe the characteristic of your environment:
- Describe how the library was installed (pip, conda, source, ...)
- Python version
- Versions of any other relevant libraries
```python
import sys, camel
print(sys.version, sys.platform)
print(camel.__version__)
```
validations:
required: true
- type: textarea
id: description
attributes:
label: Problem description
description: >-
Provide a short description, state the expected behavior and what actually happens. Include
relevant information like what version of camel you are using, what system you are on,
and any useful commands / output.
validations:
required: true
- type: textarea
id: code
attributes:
label: Reproducible example code
description: >-
The code should be minimal, have minimal external dependencies, and isolate the functions
that cause breakage. Submit matched and complete snippets that can be easily run to diagnose
the issue.
value: |
The Python snippets:
```python
```
Command lines:
```bash
```
Extra dependencies:
```text
```
Steps to reproduce:
1.
2.
3.
validations:
required: true
- type: textarea
id: traceback
attributes:
label: Traceback
description: Put the Python traceback information here.
placeholder: |
Traceback (most recent call last):
File ...
render: pytb
- type: textarea
id: expected
attributes:
label: Expected behavior
description: Provide a clear and concise description of what you expected to happen.
- type: textarea
id: additional-context
attributes:
label: Additional context
description: >-
Add any other context about the problem here. Screenshots may also be helpful.
If you know or suspect the reason for this bug, paste the code lines and suggest modifications.
================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.yml
================================================
name: ✨ Feature Request
description: Suggest an idea for this project.
title: "[Feature Request] "
labels: [enhancement]
assignees: [dandansamax]
body:
- type: checkboxes
id: steps
attributes:
label: Required prerequisites
description: Make sure you've completed the following steps before submitting your issue -- thank you!
options:
- label: I have searched the [Issue Tracker](https://github.com/camel-ai/crab/issues) that this hasn't already been reported. (+1 or comment there if it has.)
required: true
- type: textarea
id: motivation
attributes:
label: Motivation
description: Outline the motivation for the proposal.
value: |
<!-- Please outline the motivation for the proposal.
Is your feature request related to a problem? E.g., "I'm always frustrated when [...]".
If this is related to another issue, please link here too. -->
validations:
required: true
- type: textarea
id: solution
attributes:
label: Solution
description: Provide a clear and concise description of what you want to happen.
- type: textarea
id: additional-context
attributes:
label: Additional context
description: Add any other context about the problem here. Screenshots may also be helpful.
================================================
FILE: .github/ISSUE_TEMPLATE/questions.yml
================================================
name: 🤔 Questions / Help / Support
description: Do you need support?
title: "[Question] "
labels: [question]
assignees: [dandansamax]
body:
- type: checkboxes
id: steps
attributes:
label: Required prerequisites
description: Make sure you've completed the following steps before submitting your issue -- thank you!
options:
# - label: I have read the documentation <https://camel-ai.github.io/camel/camel.html>.
# required: true
- label: I have searched the [Issue Tracker](https://github.com/camel-ai/crab/issues) that this hasn't already been reported. (+1 or comment there if it has.)
required: true
- type: textarea
id: questions
attributes:
label: Questions
description: Describe your questions with relevant resources such as snippets, links, images, etc.
validations:
required: true
================================================
FILE: .github/actions/crab_install/action.yml
================================================
name: 'crab_install'
description: 'Setup python environment and install dependencies for Crab by poetry.'
inputs:
python-version:
description: 'Python version.'
required: true
default: '3.10'
runs:
using: "composite"
steps:
- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: '${{ inputs.python-version }}'
- name: Install poetry
uses: abatilo/actions-poetry@v2
- name: Setup poetry virtual environment
run: |
poetry config virtualenvs.create true --local
poetry config virtualenvs.in-project true --local
shell: bash
- uses: actions/cache/restore@v3
id: cache-restore
name: Restore caches for the virtual environment based on poetry.lock
with:
path: ./.venv
key: venv-${{ hashFiles('poetry.lock') }}
- name: Install the project dependencies
run: poetry install -E client -E server -E camel
shell: bash
- uses: actions/cache/save@v3
name: Save caches based on poetry.lock
if: ${{ !steps.cache-restore.outputs.cache-hit }}
with:
path: ./.venv
key: venv-${{ hashFiles('poetry.lock') }}
================================================
FILE: .github/workflows/documentation.yml
================================================
name: Build and deploy CRAB documents
on:
push:
branches: [ "main" ]
workflow_dispatch:
permissions:
contents: write
jobs:
docs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python environment and install dependencies
uses: ./.github/actions/crab_install
with:
python-version: "3.10"
- name: Sphinx build
run: |
cd docs
poetry run make html
- name: Deploy
uses: peaceiris/actions-gh-pages@v3
if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main'}}
with:
publish_branch: gh-pages
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: docs/_build/html/
force_orphan: true
================================================
FILE: .github/workflows/publish_release.yml
================================================
name: Publish CRAB to PyPI / GitHub
on:
push:
tags:
- "v*"
workflow_dispatch:
jobs:
build-n-publish:
name: Build and publish to PyPI
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- uses: actions/checkout@v3
- name: Build and publish to pypi
uses: JRubics/poetry-publish@v1.17
with:
pypi_token: ${{ secrets.PYPI_API_KEY }}
ignore_dev_requirements: "yes"
- name: Create GitHub Release
id: create_release
uses: actions/create-release@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # This token is provided by Actions, you do not need to create your own token
with:
tag_name: ${{ github.ref }}
release_name: ${{ github.ref }}
draft: false
prerelease: false
- name: Get Asset name
run: |
export PKG=$(ls dist/ | grep tar)
set -- $PKG
echo "name=$1" >> $GITHUB_ENV
- name: Upload Release Asset (sdist) to GitHub
id: upload-release-asset
uses: actions/upload-release-asset@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: ${{ steps.create_release.outputs.upload_url }}
asset_path: dist/${{ env.name }}
asset_name: ${{ env.name }}
asset_content_type: application/zip
================================================
FILE: .github/workflows/pytest_package.yml
================================================
# This workflow will install Python dependencies, run tests
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
name: Pytest CRAB package
on: push
jobs:
pytest:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python environment and install dependencies
uses: ./.github/actions/crab_install
with:
python-version: "3.10"
- name: Run pytest
run: poetry run pytest test/
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
.vagrant/*
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
# docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/
.vscode/
.python-version
_build/
# model parameter
*.pth
logs/
.DS_Store
================================================
FILE: .pre-commit-config.yaml
================================================
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.6.5
hooks:
# Run the linter.
- id: ruff
# Run the formatter.
- id: ruff-format
- repo: local
hooks:
- id: check-license
name: Check License
entry: python licenses/update_license.py . licenses/license_template.txt
language: system
types: [python]
================================================
FILE: README.md
================================================
# 🦀 CRAB: Cross-platform Agent Benchmark for Multimodal Embodied Language Model Agents
[![arXiv][arxiv-image]][arxiv-url]
[![Slack][slack-image]][slack-url]
[![Discord][discord-image]][discord-url]
[![Wechat][wechat-image]][wechat-url]
[![Twitter][twitter-image]][twitter-url]
<p align="center">
<a href="https://camel-ai.github.io/crab/">Documentation</a> |
<a href="https://crab.camel-ai.org/">Website & Demos</a> |
<a href="https://www.camel-ai.org/post/crab">Blog</a> |
<a href="https://dandansamax.github.io/posts/crab-paper/">Chinese Blog</a> |
<a href="https://www.camel-ai.org/">CAMEL-AI</a>
</p>
<p align="center">
<img src='https://raw.githubusercontent.com/camel-ai/crab/main/assets/CRAB_logo1.png' width=800>
</p>
## Overview
CRAB is a framework for building LLM agent benchmark environments in a Python-centric way.
#### Key Features
🌐 Cross-platform and Multi-environment
* Create build agent environments that support various deployment options including in-memory, Docker-hosted, virtual machines, or distributed physical machines, provided they are accessible via Python functions.
* Let the agent access all the environments in the same time through a unified interface.
⚙ ️Easy-to-use Configuration
* Add a new action by simply adding a `@action` decorator on a Python function.
* Define the environment by integrating several actions together.
📐 Novel Benchmarking Suite
* Define tasks and the corresponding evaluators in an intuitive Python-native way.
* Introduce a novel graph evaluator method providing fine-grained metrics.
## Installation
#### Prerequisites
- Python 3.10 or newer
```bash
pip install crab-framework[client]
```
## Experiment on CRAB-Benchmark-v0
All datasets and experiment code are in [crab-benchmark-v0](./crab-benchmark-v0/) directory. Please carefully read the [benchmark tutorial](./crab-benchmark-v0/README.md) before using our benchmark.
## Examples
#### Run template environment with openai agent
```bash
export OPENAI_API_KEY=<your api key>
python examples/single_env.py
python examples/multi_env.py
```
## Demo Video
[](https://www.youtube.com/watch?v=PNqrHNQlU6I&ab_channel=CamelAI)
## Cite
Please cite [our paper](https://arxiv.org/abs/2407.01511) if you use anything related in your work:
```
@misc{xu2024crab,
title={CRAB: Cross-environment Agent Benchmark for Multimodal Language Model Agents},
author={Tianqi Xu and Linyao Chen and Dai-Jie Wu and Yanjun Chen and Zecheng Zhang and Xiang Yao and Zhiqiang Xie and Yongchao Chen and Shilong Liu and Bochen Qian and Philip Torr and Bernard Ghanem and Guohao Li},
year={2024},
eprint={2407.01511},
archivePrefix={arXiv},
primaryClass={cs.AI},
url={https://arxiv.org/abs/2407.01511},
}
```
## Community
Join us ([*Discord*](https://discord.camel-ai.org/) or [*WeChat*](https://ghli.org/camel/wechat.png)) in pushing the boundaries of finding the scaling laws of agents.
- **WeChat Community:** Scan the QR code below to join our WeChat community.
<div align="center">
<img src="assets/wechatgroup.jpeg" alt="WeChat QR Code" width="50%">
</div>
<br>
[slack-url]: https://join.slack.com/t/camel-kwr1314/shared_invite/zt-1vy8u9lbo-ZQmhIAyWSEfSwLCl2r2eKA
[slack-image]: https://img.shields.io/badge/Slack-CAMEL--AI-blueviolet?logo=slack
[discord-url]: https://discord.gg/CNcNpquyDc
[discord-image]: https://img.shields.io/badge/Discord-CAMEL--AI-7289da?logo=discord&logoColor=white&color=7289da
[wechat-url]: https://ghli.org/camel/wechat.png
[wechat-image]: https://img.shields.io/badge/WeChat-CamelAIOrg-brightgreen?logo=wechat&logoColor=white
[twitter-url]: https://twitter.com/CamelAIOrg
[twitter-image]: https://img.shields.io/twitter/follow/CamelAIOrg?style=social&color=brightgreen&logo=twitter
[arxiv-image]: https://img.shields.io/badge/arXiv-2407.01511-b31b1b.svg
[arxiv-url]: https://arxiv.org/abs/2407.01511
================================================
FILE: crab/__init__.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# ruff: noqa: F403
from .core import *
__version__ = "0.1.2"
================================================
FILE: crab/actions/android_actions.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import base64
import subprocess
from enum import Enum
from time import sleep
from crab import action
from .crab_actions import get_element_position
def execute_adb(adb_command: str, env=None):
if env.device is None:
adb_command = "adb " + adb_command
else:
adb_command = f"adb -s {env.device} " + adb_command
result = subprocess.run(
adb_command,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)
if result.returncode == 0:
return result.stdout.strip()
print(f"Command execution failed: {adb_command}")
print(result.stderr)
return "ERROR"
def get_device_size(env):
adb_command = "shell wm size"
result = execute_adb(adb_command, env)
if result != "ERROR":
return map(int, result.split(": ")[1].split("x"))
return 0, 0
_DURATION = 1.5
@action
def setup(env) -> None:
env.width, env.height = get_device_size(env)
@action
def screenshot(env) -> str:
"""
Get the current screenshot of phone screen.
"""
if env.device is not None:
command = f"adb -s {env.device} exec-out screencap -p"
else:
command = "adb exec-out screencap -p"
result = subprocess.run(
command,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)
return base64.b64encode(result.stdout).decode("utf-8")
@action
def tap(element: int, env) -> None:
"""
Tap an UI element shown on the smartphone screen. A simple use case can be tap(5),
which taps the UI element labeled with the number 5.
Args:
element: A numeric tag assigned to an UI element shown on the smartphone screen.
"""
x, y = get_element_position(element, env)
execute_adb(f"shell input tap {x} {y}", env)
sleep(_DURATION)
@action
def long_tap(element: int, env) -> None:
"""
Press and hold a UI element on the smartphone screen for 1 second, typically to
access additional menu options. For example, the command long_tap(5) simulates a
long press on the UI element labeled with the number 5.
Args:
element: A numeric tag assigned to an UI element shown on the smartphone screen.
"""
x, y = get_element_position(element, env)
adb_command = f"shell input swipe {x} {y} {x} {y} 1000"
execute_adb(adb_command, env)
sleep(_DURATION)
class SwipeDirection(str, Enum):
RIGHT = "right"
LEFT = "left"
UP = "up"
DOWN = "down"
class SwipeDist(str, Enum):
SHORT = "short"
MEDIUM = "medium"
LONG = "long"
@action
def swipe(element: int, direction: SwipeDirection, dist: SwipeDist, env) -> None:
"""
This function is used to swipe an UI element shown on the smartphone screen, usually
a scroll view or a slide bar. You should choose the appropriate direction and
distance option according to your need. A simple use case can be swipe(21, "up",
"medium"), which swipes up the UI element labeled with the number 21 for a medium
distance.
Args:
element: is a numeric tag assigned to an UI element shown on the smartphone
screen.
direction: is a string that represents the swipe direction
dist: determines the distance of the swipe.
"""
x, y = get_element_position(element, env)
unit_dist = int(env.width / 10)
if dist == "long":
unit_dist *= 3
elif dist == "medium":
unit_dist *= 2
if direction == "up":
offset = 0, -2 * unit_dist
elif direction == "down":
offset = 0, 2 * unit_dist
elif direction == "left":
offset = -1 * unit_dist, 0
elif direction == "right":
offset = unit_dist, 0
else:
return "ERROR"
adb_command = f"shell input swipe {x} {y} {x + offset[0]} {y + offset[1]} 200"
execute_adb(adb_command, env)
sleep(_DURATION)
@action
def open_app_drawer(env) -> None:
"""Open app drawer to list all the installed applications in this phone. For
exmaple: you want to open "Messages" application, but you don't know where to find
it, you can call "open_app_drawer()" and you will see all the installed applications
through screenshot.
"""
execute_adb("shell input keyevent KEYCODE_HOME", env)
sleep(0.5)
execute_adb("shell input swipe 800 2000 800 100 500", env)
sleep(_DURATION)
class AndroidKey(str, Enum):
HOME = "home"
BACK = "back"
@action
def key_press(key: AndroidKey, env):
"""
Press Android keys. press("home") to go back to main screen. press("back") to return
to the preivous page.
Args:
key (str): The pressed key.
"""
if key == AndroidKey.HOME:
adb_command = "shell input keyevent KEYCODE_HOME"
elif key == AndroidKey.BACK:
adb_command = "shell input keyevent KEYCODE_BACK"
else:
raise ValueError("Unsupported key")
execute_adb(adb_command, env)
sleep(_DURATION)
@action
def write_text(text: str, env) -> None:
"""
Typing the specified text.
Args:
text (str): The text to be typed.
"""
text = text.replace(" ", "%s")
text = text.replace("'", "")
adb_command = f"shell input text {text}"
execute_adb(adb_command, env)
sleep(_DURATION)
@action
def stop_all_apps(env) -> None:
"""
Stop all running apps.
"""
execute_adb("shell input keyevent KEYCODE_HOME", env)
execute_adb("shell input keyevent KEYCODE_APP_SWITCH", env)
sleep(0.5)
command = (
f"shell input swipe 100 {env.height / 2} {env.width - 100} {env.height / 2} 200"
)
execute_adb(command, env)
sleep(0.5)
execute_adb("shell input tap 300 1400", env)
sleep(_DURATION)
================================================
FILE: crab/actions/crab_actions.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from time import sleep
from crab import action, evaluator
@action(env_name="root")
def submit(content: str) -> None:
"""Submit your answer through this action. For exmaple, if you are required to
submit a word "apple", you can use submit(content="apple").
Args:
content: the content to submit
"""
pass
@evaluator(env_name="root")
def check_submit(text: str, env) -> bool:
if env.trajectory:
action_name, params, _ = env.trajectory[-1]
if action_name == "submit" and text in params["content"]:
return True
return False
@action(env_name="root")
def complete() -> bool:
"""When you think the task is completed, use this action to notify the system. For
exmaple, if you successfully complete the task, you can use complete().
"""
pass
@action(env_name="root")
def wait() -> bool:
"""If the environment is still processing your action and you have nothing to do in
this step, you can use wait().
"""
sleep(5)
def get_element_position(element_id, env):
"""Get element position provided by function `zs_object_detection`"""
box = env.element_position_map[element_id]
x = (box[0] + box[2]) / 2
y = (box[1] + box[3]) / 2
return round(x), round(y)
================================================
FILE: crab/actions/desktop_actions.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import base64
import time
from enum import Enum
import pyautogui
from mss import mss, tools
from crab import action
from .crab_actions import get_element_position
DURATION = 0.8
DELAY = 1.0
@action
def click_position(x: int, y: int) -> None:
"""
click on the current desktop screen.
Args:
x: The X coordinate, as a floating-point number in the range [0.0, 1.0].
y: The Y coordinate, as a floating-point number in the range [0.0, 1.0].
"""
pyautogui.click(x, y, duration=DURATION)
time.sleep(DELAY)
@action(local=True)
def click(element: int, env) -> None:
"""
Click an UI element shown on the desktop screen. A simple use case can be
click(5), which clicks the UI element labeled with the number 5.
Args:
element: A numeric tag assigned to an UI element shown on the screenshot.
"""
x, y = get_element_position(element, env)
env._action_endpoint(click_position, {"x": x, "y": y})
@action
def right_click_position(x: int, y: int) -> None:
"""
right-click on the current desktop screen.
Args:
x: The X coordinate, as a floating-point number in the range [0.0, 1.0].
y: The Y coordinate, as a floating-point number in the range [0.0, 1.0].
"""
pyautogui.click(x, y, duration=DURATION, button="right")
@action(local=True)
def right_click(element: int, env) -> None:
"""
Right-click an UI element shown on the desktop screen using the mouse, which is
usually used for opening the menu of the element. A simple use case can be
right_click(5), which right-clicks the UI element labeled with the number 5 to open
up menu on it.
Args:
element: A numeric tag assigned to an UI element shown on the screenshot.
"""
x, y = get_element_position(element, env)
env._action_endpoint(right_click_position, {"x": x, "y": y})
time.sleep(DELAY)
@action
def double_click_position(x: int, y: int) -> None:
"""
Double-click on the current desktop screen.
Args:
x: The X coordinate, as a floating-point number in the range [0.0, 1.0].
y: The Y coordinate, as a floating-point number in the range [0.0, 1.0].
"""
pyautogui.click(x, y, duration=DURATION, clicks=2, interval=0.2)
@action(local=True)
def double_click(element: int, env) -> None:
"""
Double-click an UI element shown on the desktop screen using the mouse, which is
usually used for opening a folder or a file. A simple use case can be
double_click(5), which double-clicks the UI element labeled with the number 5 to
open it.
Args:
element: A numeric tag assigned to an UI element shown on the screenshot.
"""
x, y = get_element_position(element, env)
env._action_endpoint(double_click_position, {"x": x, "y": y})
time.sleep(DELAY)
@action
def mouse_scroll(click: int = 1) -> None:
"""
Performs a scroll of the mouse scroll wheel.
Args:
click(int): The amount of scrolling. Default to 1.
"""
pyautogui.scroll(click)
time.sleep(DELAY)
class KeyEnum(str, Enum):
KEY_TAB = "\t"
KEY_LB = "\n"
KEY_RR = "\r"
KEY_SPACE = " "
KEY_EXCLAMATION = "!"
KEY_DQUOTE = '"'
KEY_SHARP = "#"
KEY_DOLLAR = "$"
KEY_PER = "%"
KEY_AND = "&"
KEY_SQUOTE = "'"
KEY_LPAR = "("
KEY_RPAR = ")"
KEY_MUL = "*"
KEY_ADD = "+"
KEY_COMMA = ","
KEY_MIN = "-"
KEY_DOT = "."
KEY_SLASH = "/"
KEY_0 = "0"
KEY_1 = "1"
KEY_2 = "2"
KEY_3 = "3"
KEY_4 = "4"
KEY_5 = "5"
KEY_6 = "6"
KEY_7 = "7"
KEY_8 = "8"
KEY_9 = "9"
KEY_COL = ":"
KEY_SEMICOL = ";"
KET_LT = "<"
KEY_EQUAL = "="
KEY_GT = ">"
KEY_QM = "?"
KEY_AT = "@"
KEY_LBRA = "["
KEY_RSLASH = "\\"
KEY_RBRA = "]"
KEY_CARET = "^"
KEY_UNDERLINE = "_"
KEY_BACKTICK = "`"
KEY_LBRACE = "{"
KEY_RBRACE = "}"
KEY_PIPE = "|"
KEY_TLIDE = "~"
KEY_A = "a"
KEY_B = "b"
KEY_C = "c"
KEY_D = "d"
KEY_E = "e"
KEY_F = "f"
KEY_G = "g"
KEY_H = "h"
KEY_I = "i"
KEY_J = "j"
KEY_K = "k"
KEY_L = "l"
KEY_M = "m"
KEY_N = "n"
KEY_O = "o"
KEY_P = "p"
KEY_Q = "q"
KEY_R = "r"
KEY_S = "s"
KEY_T = "t"
KEY_U = "u"
KEY_V = "v"
KEY_W = "w"
KEY_X = "x"
KEY_Y = "y"
KEY_Z = "z"
KEY_ALT = "alt"
KEY_SHIFT = "shift"
KEY_CTRL = "ctrl"
KEY_WIN = "win"
KEY_BACKSPACE = "backspace"
KEY_ENTER = "enter"
KEY_ESC = "esc"
KEY_F1 = "f1"
KEY_F2 = "f2"
KEY_F3 = "f3"
KEY_F4 = "f4"
KEY_F5 = "f5"
KEY_F6 = "f6"
KEY_F7 = "f7"
KEY_F8 = "f8"
KEY_F9 = "f9"
KEY_F10 = "f10"
KEY_F11 = "f11"
KEY_F12 = "f12"
KEY_LEFT = "left"
KEY_UP = "up"
KEY_RIGHT = "right"
KEY_DOWN = "down"
@action
def key_press(key: KeyEnum) -> None:
"""
Performs a keyboard key press down, followed by a release.
Args:
key (str): The key to be pressed.
"""
if isinstance(key, KeyEnum):
pyautogui.press(key.value)
else:
pyautogui.press(key)
time.sleep(DELAY)
@action
def press_hotkey(keys: list[KeyEnum]) -> None:
"""
Press multiple keyboard keys at the same time. For exmaple, if you want to use
Ctrl-C hoykey to copy the selected text, you can call
press_hotkey(keys=["ctrl", "c"]).
Args:
key (str): The key to be pressed.
"""
if isinstance(keys[0], KeyEnum):
keys = [key.value for key in keys]
pyautogui.hotkey(*keys)
time.sleep(DELAY)
@action
def write_text(text: str) -> None:
"""
Typing the specified text. Note: This function does not move the mouse cursor.
Ensure the cursor focuses in the correct text input field before calling this
function.
Args:
text (str): The text to be typed.
"""
pyautogui.write(text, interval=0.03)
time.sleep(DELAY)
@action
def search_application(name: str) -> None:
"""
Search an application name. For exmaple, if you want to open an application named
"slack", you can call search_application(name="slack"). You MUST use this action to
search for applications.
Args:
name: the application name.
"""
pyautogui.press("esc")
time.sleep(DELAY)
pyautogui.hotkey("win", "a")
time.sleep(DELAY)
pyautogui.write(name)
time.sleep(DELAY)
@action
def screenshot() -> str:
"Get the current screenshot."
with mss() as sct:
# Get raw pixels from the screen
sct_img = sct.grab(sct.monitors[1])
# Create the Image
png = tools.to_png(sct_img.rgb, sct_img.size)
base64_img = base64.b64encode(png).decode("utf-8")
return base64_img
================================================
FILE: crab/actions/file_actions.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import base64
from io import BytesIO
from PIL import Image
from crab.core import action
@action
def save_base64_image(image: str, path: str = "image.png") -> None:
image = Image.open(BytesIO(base64.b64decode(image)))
image.save(path)
================================================
FILE: crab/actions/system_actions.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import subprocess
from time import sleep
from crab.core.decorators import action
@action
def delay(time: float) -> None:
sleep(time)
@action
def run_bash_command(command: str) -> str:
"""
Run a command using bash shell. You can use this command to open any application by
their name.
Args:
command: The commmand to be run.
Return:
stdout and stderr
"""
p = subprocess.run(["bash", command], capture_output=True)
return f'stdout: "{p.stdout}"\nstderr: "{p.stderr}"'
================================================
FILE: crab/actions/visual_prompt_actions.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import logging
from functools import cache
from typing import Literal
from PIL import Image, ImageDraw, ImageFont
from crab import action
from crab.utils.common import base64_to_image, image_to_base64
logger = logging.getLogger(__name__)
try:
import easyocr
import numpy as np
import torch
from transformers import (
AutoProcessor,
GroundingDinoForObjectDetection,
GroundingDinoProcessor,
)
device = "cuda" if torch.cuda.is_available() else "cpu"
TRANSFORMERS_ENABLE = True
except ImportError:
TRANSFORMERS_ENABLE = False
BoxType = tuple[int, int, int, int]
AnnotatedBoxType = tuple[BoxType, str | None]
def check_transformers_import() -> None:
if not TRANSFORMERS_ENABLE:
raise ImportError(
"Please install the required dependencies to use this function by running"
" `pip install crab-framework[client]`"
)
def _calculate_iou(box1: BoxType, box2: BoxType) -> float:
xA = max(box1[0], box2[0])
yA = max(box1[1], box2[1])
xB = min(box1[2], box2[2])
yB = min(box1[3], box2[3])
interArea = max(0, xB - xA) * max(0, yB - yA)
box1Area = (box1[2] - box1[0]) * (box1[3] - box1[1])
box2Area = (box2[2] - box2[0]) * (box2[3] - box2[1])
unionArea = box1Area + box2Area - interArea
iou = interArea / unionArea
return iou
def _calculate_center(box: BoxType) -> tuple[int, int]:
return (box[0] + box[2]) / 2, (box[1] + box[3]) / 2
def _remove_invalid_boxes(
boxes_with_label: AnnotatedBoxType, width: int, height: int
) -> AnnotatedBoxType:
boxes = [box[0] for box in boxes_with_label]
boxes_to_remove = set()
for idx, box in enumerate(boxes):
if box[0] < 0 or box[1] < 0 or box[2] > width or box[3] > height:
boxes_to_remove.add(idx)
continue
if box[0] >= box[2] or box[1] >= box[3]:
boxes_to_remove.add(idx)
continue
boxes_filt = [
box for idx, box in enumerate(boxes_with_label) if idx not in boxes_to_remove
]
return boxes_filt
def _filter_boxes_by_center(
boxes_with_label: list[AnnotatedBoxType], center_dis_thresh: float
) -> list[AnnotatedBoxType]:
boxes = [box[0] for box in boxes_with_label]
boxes_to_remove = set()
for i in range(len(boxes)):
if i in boxes_to_remove:
continue
center_i = _calculate_center(boxes[i])
for j in range(i + 1, len(boxes)):
center_j = _calculate_center(boxes[j])
# fmt: off
center_close = ((center_i[0] - center_j[0]) ** 2 +
(center_i[1] - center_j[1]) ** 2 <
center_dis_thresh**2)
# fmt: on
if center_close:
boxes_to_remove.add(j)
boxes_filt = [
box for idx, box in enumerate(boxes_with_label) if idx not in boxes_to_remove
]
return boxes_filt
def _box_a_in_b(a: BoxType, b: BoxType) -> bool:
return a[0] >= b[0] and a[1] >= b[1] and a[2] <= b[2] and a[3] <= b[3]
def _filter_boxes_by_overlap(
boxes_with_label: list[AnnotatedBoxType],
) -> list[AnnotatedBoxType]:
boxes = [box[0] for box in boxes_with_label]
boxes_to_remove = set()
for i in range(len(boxes)):
if i in boxes_to_remove:
continue
for j in range(len(boxes)):
if i != j and _box_a_in_b(boxes[i], boxes[j]):
boxes_to_remove.add(j)
boxes_filt = [
box for idx, box in enumerate(boxes_with_label) if idx not in boxes_to_remove
]
return boxes_filt
def _filter_boxes_by_iou(
boxes_with_label: list[AnnotatedBoxType], iou_threshold=0.5
) -> list[AnnotatedBoxType]:
boxes = [box[0] for box in boxes_with_label]
boxes_to_remove = set()
for i in range(len(boxes)):
if i in boxes_to_remove:
continue
for j in range(i + 1, len(boxes)):
iou = _calculate_iou(boxes[i], boxes[j])
if iou >= iou_threshold:
boxes_to_remove.add(j)
boxes_filt = [
box for idx, box in enumerate(boxes_with_label) if idx not in boxes_to_remove
]
return boxes_filt
def _draw_boxes(
image: Image.Image,
boxes: list[BoxType],
font_size: int = 30,
) -> None:
draw = ImageDraw.Draw(image)
for idx, box in enumerate(boxes):
color = tuple(np.random.randint(64, 191, size=3).tolist())
font = ImageFont.load_default(font_size)
center = _calculate_center(box)
draw.rectangle([box[0], box[1], box[2], box[3]], outline=color, width=2)
if hasattr(font, "getbbox"):
_, _, w, h = draw.textbbox((0, 0), str(idx), font)
else:
w, h = draw.textsize(str(idx), font)
if box[0] >= w:
bbox = (
round(box[0] - w),
round(center[1] - h / 2),
round(box[0]),
round(center[1] + h / 2),
)
else:
bbox = (
round(box[2]),
round(center[1] - h / 2),
round(box[2] + w),
round(center[1] + h / 2),
)
draw.rectangle(bbox, fill=color)
draw.text((bbox[0], bbox[1]), str(idx), fill="white", font=font)
@cache
def _get_grounding_dino_model(
type: Literal["tiny", "base"] = "tiny",
) -> tuple[GroundingDinoProcessor, GroundingDinoForObjectDetection]:
"""Get the grounding dino model.
Args:
type: The version of the Gounding Dino Model.
Returns:
A tuple (processor, model).
"""
model_name = f"IDEA-Research/grounding-dino-{type}"
processor = AutoProcessor.from_pretrained(model_name)
model = GroundingDinoForObjectDetection.from_pretrained(model_name).to(device)
return processor, model
@cache
def _get_easyocr_model() -> easyocr.Reader:
return easyocr.Reader(["en"])
def get_groundingdino_boxes(
images: Image.Image | list[Image.Image],
text_prompt: str,
box_threshold: float = 0.05,
text_threshold: float = 0.5,
) -> list[list[AnnotatedBoxType]]:
"""Get the bounding boxes of the objects in the image using GroundingDino.
Args:
images: The image or list of images.
text_prompt: The text prompt to use for all the images.
box_threshold: The box threshold.
text_threshold: The text threshold.
Returns:
The first level list is for each image, and the second level list contains
tuples (detected boxes, its sementical representation) as the result of the
image.
"""
processor, model = _get_grounding_dino_model()
if isinstance(images, Image.Image):
images = [images]
image_number = len(images)
images = [image.convert("RGB") for image in images]
inputs = processor(
images=images,
text=[text_prompt] * image_number,
return_tensors="pt",
).to(device)
with torch.no_grad():
outputs = model(**inputs)
target_sizes = [image.size[::-1] for image in images]
detection_results = processor.post_process_grounded_object_detection(
outputs,
inputs.input_ids,
box_threshold=box_threshold,
text_threshold=text_threshold,
target_sizes=target_sizes,
)
final_output = []
for result in detection_results:
boxes = result["boxes"].cpu().int().tolist()
labels = result["labels"]
final_output.append(list(zip(boxes, labels)))
return final_output
def get_easyocr_boxes(
image: Image.Image,
) -> list[AnnotatedBoxType]:
"""Get the bounding boxes of the text in the image using EasyOCR.
Args:
image: The taget image.
Returns:
The list of tuple of bounding boxes and their corresponding text.
"""
reader = _get_easyocr_model()
result = reader.readtext(np.array(image), text_threshold=0.9)
boxes = []
for detect in result:
boxes.append(
(
(
detect[0][0][0],
detect[0][0][1],
detect[0][2][0],
detect[0][2][1],
),
detect[1],
)
)
return boxes
@action(local=True)
def groundingdino_easyocr(
input_base64_image: str,
font_size: int,
env,
) -> tuple[str, list[AnnotatedBoxType]]:
"""Get the interative elements in the image.
Using GroundingDino and EasyOCR to detect the interactive elements in the image.
Mark the detected elements with bounding boxes and labels. Store the labels and
boxes in the environment to be used in other actions.
Args:
input_base64_image: The base64 encoded image.
font_size: The font size of the label.
Returns:
A tuple (base64_image, boxes), where base64_image is the base64 encoded image
drawn with bounding boxes and labels, and box is the list of detected boxes and
labels.
"""
check_transformers_import()
image = base64_to_image(input_base64_image)
od_boxes = get_groundingdino_boxes(image, "icon . logo .", box_threshold=0.02)[0]
od_boxes = _filter_boxes_by_iou(od_boxes, iou_threshold=0.5)
ocr_boxes = get_easyocr_boxes(image)
boxes_with_label = ocr_boxes + od_boxes
filtered_boxes = _remove_invalid_boxes(boxes_with_label, image.width, image.height)
filtered_boxes = _filter_boxes_by_overlap(filtered_boxes)
center_dis = round(max(image.height, image.width) / 80.0)
filtered_boxes = _filter_boxes_by_center(filtered_boxes, center_dis)
env.element_label_map = [box[1] for box in filtered_boxes]
result_boxes = [box[0] for box in filtered_boxes]
_draw_boxes(image, result_boxes, font_size)
env.element_position_map = result_boxes
env.ocr_results = "".join([box[1] for box in ocr_boxes])
return image_to_base64(image), filtered_boxes
@action(local=True)
def get_elements_prompt(
input: tuple[str, list[AnnotatedBoxType]], env
) -> tuple[str, str]:
"""Get the text prompt passing to the agent for the image.
Args:
input: The base64 encoded image and the list of detected boxes and labels.
Returns:
A tuple (image, prompt) contains the base64 encoded image and the prompt.
"""
image, boxes = input
labels = ""
for id, box in enumerate(boxes):
if box[1] is not None:
labels += f"[{id}|{box[1]}]\n"
prompt = (
"Some elements in the current screenshot have labels. I will give you "
"these labels by [id|label].\n" + labels
)
return image, prompt
================================================
FILE: crab/agents/backend_models/__init__.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# ruff: noqa: F401
from typing import Any, Literal
from pydantic import BaseModel
from crab.core.backend_model import BackendModel
from .camel_model import CamelModel
from .claude_model import ClaudeModel
from .gemini_model import GeminiModel
from .openai_model import OpenAIModel, OpenAIModelJSON, SGlangOpenAIModelJSON
class BackendModelConfig(BaseModel):
model_class: Literal["openai", "claude", "gemini", "camel", "sglang"]
"""Specify the model class to be used. Different model classese use different
APIs.
"""
model_name: str
"""Specify the model name to be used. This value is directly passed to the API,
check model provider API documentation for more details.
"""
model_platform: str | None = None
"""Required for CamelModel. Otherwise, it is ignored. Please check CAMEL
documentation for more details.
"""
history_messages_len: int = 0
"""Number of rounds of previous messages to be used in the model input. 0 means no
history.
"""
parameters: dict[str, Any] = {}
"""Additional parameters to be passed to the model."""
json_structre_output: bool = False
"""If True, the model generate action through JSON without using "tool call" or
"function call". SGLang model only supports JSON output. OpenAI model supports both.
Other models do not support JSON output.
"""
tool_call_required: bool = True
"""Specify if the model enforce each round to generate tool/function calls."""
base_url: str | None = None
"""Specify the base URL of the API. Only used in OpenAI and SGLang currently."""
api_key: str | None = None
"""Specify the API key to be used. Only used in OpenAI and SGLang currently."""
def create_backend_model(model_config: BackendModelConfig) -> BackendModel:
match model_config.model_class:
case "claude":
if model_config.base_url is not None or model_config.api_key is not None:
raise Warning(
"base_url and api_key are not supported for ClaudeModel currently."
)
if model_config.json_structre_output:
raise Warning(
"json_structre_output is not supported for ClaudeModel currently."
)
return ClaudeModel(
model=model_config.model_name,
parameters=model_config.parameters,
history_messages_len=model_config.history_messages_len,
tool_call_required=model_config.tool_call_required,
)
case "gemini":
if model_config.base_url is not None or model_config.api_key is not None:
raise Warning(
"base_url and api_key are not supported for GeminiModel currently."
)
if model_config.json_structre_output:
raise Warning(
"json_structre_output is not supported for GeminiModel currently."
)
return GeminiModel(
model=model_config.model_name,
parameters=model_config.parameters,
history_messages_len=model_config.history_messages_len,
tool_call_required=model_config.tool_call_required,
)
case "openai":
if not model_config.json_structre_output:
return OpenAIModel(
model=model_config.model_name,
parameters=model_config.parameters,
history_messages_len=model_config.history_messages_len,
base_url=model_config.base_url,
api_key=model_config.api_key,
tool_call_required=model_config.tool_call_required,
)
else:
return OpenAIModelJSON(
model=model_config.model_name,
parameters=model_config.parameters,
history_messages_len=model_config.history_messages_len,
base_url=model_config.base_url,
api_key=model_config.api_key,
)
case "sglang":
return SGlangOpenAIModelJSON(
model=model_config.model_name,
parameters=model_config.parameters,
history_messages_len=model_config.history_messages_len,
base_url=model_config.base_url,
api_key=model_config.api_key,
)
case "camel":
return CamelModel(
model=model_config.model_name,
model_platform=model_config.model_platform,
parameters=model_config.parameters,
history_messages_len=model_config.history_messages_len,
tool_call_required=model_config.tool_call_required,
)
case _:
raise ValueError(f"Unsupported model name: {model_config.model_name}")
================================================
FILE: crab/agents/backend_models/camel_model.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import json
from typing import Any
from openai.types.chat import ChatCompletionMessageToolCall
from PIL import Image
from crab import Action, ActionOutput, BackendModel, BackendOutput, MessageType
from crab.utils.common import base64_to_image
try:
from camel.agents import ChatAgent
from camel.messages import BaseMessage
from camel.models import ModelFactory
from camel.toolkits import OpenAIFunction
from camel.types.enums import ModelPlatformType, ModelType
CAMEL_ENABLED = True
except ImportError:
CAMEL_ENABLED = False
def _get_model_platform_type(model_platform_name: str) -> "ModelPlatformType":
try:
return ModelPlatformType(model_platform_name)
except ValueError:
all_models = [platform.value for platform in ModelPlatformType]
raise ValueError(
f"Model {model_platform_name} not found. Supported models are {all_models}"
)
def _get_model_type(model_name: str) -> "str | ModelType":
try:
return ModelType(model_name)
except ValueError:
return model_name
def _convert_action_to_schema(
action_space: list[Action] | None,
) -> "list[OpenAIFunction] | None":
if action_space is None:
return None
schema_list = []
for action in action_space:
new_action = action.to_openai_json_schema()
schema = {"type": "function", "function": new_action}
schema_list.append(OpenAIFunction(action.entry, schema))
return schema_list
def _convert_tool_calls_to_action_list(
tool_calls: list[ChatCompletionMessageToolCall] | None,
) -> list[ActionOutput] | None:
if tool_calls is None:
return None
return [
ActionOutput(
name=call.function.name,
arguments=json.loads(call.function.arguments),
)
for call in tool_calls
]
class CamelModel(BackendModel):
def __init__(
self,
model: str,
model_platform: str,
parameters: dict[str, Any] | None = None,
history_messages_len: int = 0,
tool_call_required: bool = True,
) -> None:
if not CAMEL_ENABLED:
raise ImportError("Please install camel-ai to use CamelModel")
self.model = model
self.parameters = parameters if parameters is not None else {}
self.history_messages_len = history_messages_len
self.model_type = _get_model_type(model)
self.model_platform_type = _get_model_platform_type(model_platform)
self.client: ChatAgent | None = None
self.token_usage = 0
self.tool_call_required = tool_call_required
self.history_messages_len = history_messages_len
def get_token_usage(self) -> int:
return self.token_usage
def reset(self, system_message: str, action_space: list[Action] | None) -> None:
action_schema = _convert_action_to_schema(action_space)
config = self.parameters.copy()
if action_schema is not None:
config["tool_choice"] = "required" if self.tool_call_required else "auto"
config["tools"] = [
schema.get_openai_tool_schema() for schema in action_schema
]
backend_model = ModelFactory.create(
self.model_platform_type,
self.model_type,
model_config_dict=config,
)
sysmsg = BaseMessage.make_assistant_message(
role_name="Assistant",
content=system_message,
)
self.client = ChatAgent(
model=backend_model,
system_message=sysmsg,
external_tools=action_schema,
message_window_size=self.history_messages_len,
)
self.token_usage = 0
def chat(self, messages: list[tuple[str, MessageType]]) -> BackendOutput:
# TODO: handle multiple text messages after message refactoring
image_list: list[Image.Image] = []
content = ""
for message in messages:
if message[1] == MessageType.IMAGE_JPG_BASE64:
image = base64_to_image(message[0])
image_list.append(image)
else:
content = message[0]
usermsg = BaseMessage.make_user_message(
role_name="User",
content=content,
image_list=image_list,
)
response = self.client.step(usermsg)
self.token_usage += response.info["usage"]["total_tokens"]
tool_call_request = response.info.get("external_tool_request")
return BackendOutput(
message=response.msg.content,
action_list=_convert_tool_calls_to_action_list([tool_call_request]),
)
================================================
FILE: crab/agents/backend_models/claude_model.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from copy import deepcopy
from typing import Any
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed
from crab import Action, ActionOutput, BackendModel, BackendOutput, Message, MessageType
try:
import anthropic
from anthropic.types import TextBlock, ToolUseBlock
anthropic_model_enable = True
except ImportError:
anthropic_model_enable = False
class ClaudeModel(BackendModel):
def __init__(
self,
model: str,
parameters: dict[str, Any] | None = None,
history_messages_len: int = 0,
tool_call_required: bool = True,
) -> None:
if anthropic_model_enable is False:
raise ImportError("Please install anthropic to use ClaudeModel")
self.model = model
self.parameters = parameters if parameters is not None else {}
self.history_messages_len = history_messages_len
assert self.history_messages_len >= 0
self.client = anthropic.Anthropic()
self.tool_call_required: bool = tool_call_required
self.system_message: str = "You are a helpful assistant."
self.action_space: list[Action] | None = None
self.action_schema: list[dict] | None = None
self.token_usage: int = 0
self.chat_history: list[list[dict]] = []
self.support_tool_call = True
def reset(self, system_message: str, action_space: list[Action] | None) -> None:
self.system_message = system_message
self.action_space = action_space
self.action_schema = _convert_action_to_schema(self.action_space)
self.token_usage = 0
self.chat_history = []
def chat(self, message: list[Message] | Message) -> BackendOutput:
if isinstance(message, tuple):
message = [message]
request = self._fetch_from_memory()
new_message = self._construct_new_message(message)
request.append(new_message)
response_message = self._call_api(request)
self._record_message(new_message, response_message)
return self._generate_backend_output(response_message)
def _construct_new_message(self, message: list[Message]) -> dict[str, Any]:
parts: list[dict] = []
for content, msg_type in message:
match msg_type:
case MessageType.TEXT:
parts.append(
{
"type": "text",
"text": content,
}
)
case MessageType.IMAGE_JPG_BASE64:
parts.append(
{
"type": "image",
"source": {
"data": content,
"type": "base64",
"media_type": "image/png",
},
}
)
return {
"role": "user",
"content": parts,
}
def _fetch_from_memory(self) -> list[dict]:
request: list[dict] = []
if self.history_messages_len > 0:
fetch_history_len = min(self.history_messages_len, len(self.chat_history))
for history_message in self.chat_history[-fetch_history_len:]:
request = request + history_message
return request
def get_token_usage(self):
return self.token_usage
def _record_message(
self, new_message: dict, response_message: anthropic.types.Message
) -> None:
self.chat_history.append([new_message])
self.chat_history[-1].append(
{"role": response_message.role, "content": response_message.content}
)
if self.action_schema:
tool_calls = response_message.content
tool_content = []
for call in tool_calls:
if isinstance(call, ToolUseBlock):
tool_content.append(
{
"type": "tool_result",
"tool_use_id": call.id,
"content": "success",
}
)
self.chat_history[-1].append(
{
"role": "user",
"content": tool_content,
}
)
@retry(
wait=wait_fixed(10),
stop=stop_after_attempt(7),
retry=retry_if_exception_type(
(
anthropic.APITimeoutError,
anthropic.APIConnectionError,
anthropic.InternalServerError,
)
),
)
def _call_api(self, request_messages: list[dict]) -> anthropic.types.Message:
request_messages = _merge_request(request_messages)
if self.action_schema is not None:
response = self.client.messages.create(
system=self.system_message, # <-- system prompt
messages=request_messages, # type: ignore
model=self.model,
max_tokens=4096,
tools=self.action_schema,
tool_choice={"type": "any" if self.tool_call_required else "auto"},
**self.parameters,
)
else:
response = self.client.messages.create(
system=self.system_message, # <-- system prompt
messages=request_messages, # type: ignore
model=self.model,
max_tokens=4096,
**self.parameters,
)
self.token_usage += response.usage.input_tokens + response.usage.output_tokens
return response
def _generate_backend_output(
self, response_message: anthropic.types.Message
) -> BackendOutput:
message = ""
action_list = []
for block in response_message.content:
if isinstance(block, TextBlock):
message += block.text
elif isinstance(block, ToolUseBlock):
action_list.append(
ActionOutput(
name=block.name,
arguments=block.input, # type: ignore
)
)
if not action_list:
return BackendOutput(message=message, action_list=None)
else:
return BackendOutput(
message=message,
action_list=action_list,
)
def _merge_request(request: list[dict]) -> list[dict]:
merge_request = [deepcopy(request[0])]
for idx in range(1, len(request)):
if request[idx]["role"] == merge_request[-1]["role"]:
merge_request[-1]["content"].extend(request[idx]["content"])
else:
merge_request.append(deepcopy(request[idx]))
return merge_request
def _convert_action_to_schema(action_space):
if action_space is None:
return None
actions = []
for action in action_space:
new_action = action.to_openai_json_schema()
new_action["input_schema"] = new_action.pop("parameters")
if "returns" in new_action:
new_action.pop("returns")
if "title" in new_action:
new_action.pop("title")
if "type" in new_action:
new_action["input_schema"]["type"] = new_action.pop("type")
if "required" in new_action:
new_action["input_schema"]["required"] = new_action.pop("required")
actions.append(new_action)
return actions
================================================
FILE: crab/agents/backend_models/gemini_model.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import os
from typing import Any
from PIL.Image import Image
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed
from crab import Action, ActionOutput, BackendModel, BackendOutput, Message, MessageType
from crab.utils.common import base64_to_image, json_expand_refs
try:
import google.generativeai as genai
from google.ai.generativelanguage_v1beta import (
Content,
FunctionDeclaration,
Part,
Tool,
)
from google.api_core.exceptions import ResourceExhausted
from google.generativeai.types import content_types
gemini_model_enable = True
except ImportError:
gemini_model_enable = False
class GeminiModel(BackendModel):
def __init__(
self,
model: str,
parameters: dict[str, Any] | None = None,
history_messages_len: int = 0,
tool_call_required: bool = True,
) -> None:
if gemini_model_enable is False:
raise ImportError("Please install google.generativeai to use GeminiModel")
self.model = model
self.parameters = parameters if parameters is not None else {}
self.history_messages_len = history_messages_len
assert self.history_messages_len >= 0
genai.configure(api_key=os.environ["GEMINI_API_KEY"])
self.client = genai
self.tool_call_required = tool_call_required
self.system_message: str = "You are a helpful assistant."
self.action_space: list[Action] | None = None
self.action_schema: list[Tool] | None = None
self.token_usage: int = 0
self.chat_history: list[list[dict]] = []
self.support_tool_call = True
def reset(self, system_message: str, action_space: list[Action] | None) -> None:
self.system_message = system_message
self.action_space = action_space
self.action_schema = _convert_action_to_schema(self.action_space)
self.token_usage = 0
self.chat_history = []
def chat(self, message: list[Message] | Message) -> BackendOutput:
if isinstance(message, tuple):
message = [message]
request = self._fetch_from_memory()
new_message = self._construct_new_message(message)
request.append(new_message)
response_message = self._call_api(request)
self._record_message(new_message, response_message)
return self._generate_backend_output(response_message)
def _construct_new_message(self, message: list[Message]) -> dict[str, Any]:
parts: list[str | Image] = []
for content, msg_type in message:
match msg_type:
case MessageType.TEXT:
parts.append(content)
case MessageType.IMAGE_JPG_BASE64:
parts.append(base64_to_image(content))
return {
"role": "user",
"parts": parts,
}
def _generate_backend_output(self, response_message: Content) -> BackendOutput:
tool_calls: list[ActionOutput] = []
for part in response_message.parts:
if "function_call" in Part.to_dict(part):
call = Part.to_dict(part)["function_call"]
tool_calls.append(
ActionOutput(
name=call["name"],
arguments=call["args"],
)
)
return BackendOutput(
message=response_message.parts[0].text or None,
action_list=tool_calls or None,
)
def _fetch_from_memory(self) -> list[dict]:
request: list[dict] = []
if self.history_messages_len > 0:
fetch_history_len = min(self.history_messages_len, len(self.chat_history))
for history_message in self.chat_history[-fetch_history_len:]:
request = request + history_message
return request
def get_token_usage(self):
return self.token_usage
def _record_message(
self, new_message: dict[str, Any], response_message: Content
) -> None:
self.chat_history.append([new_message])
self.chat_history[-1].append(
{"role": response_message.role, "parts": response_message.parts}
)
@retry(
wait=wait_fixed(10),
stop=stop_after_attempt(7),
retry=retry_if_exception_type(ResourceExhausted),
)
def _call_api(self, request_messages: list) -> Content:
if self.action_schema is not None:
tool_config = content_types.to_tool_config(
{
"function_calling_config": {
"mode": "ANY" if self.tool_call_required else "AUTO"
}
}
)
response = self.client.GenerativeModel(
self.model, system_instruction=self.system_message
).generate_content(
contents=request_messages,
tools=self.action_schema,
tool_config=tool_config,
# **self.parameters, # TODO(Tianqi): Fix this line in the future
)
else:
response = self.client.GenerativeModel(
self.model, system_instruction=self.system_message
).generate_content(
contents=request_messages,
# **self.parameters, # TODO(Tianqi): Fix this line in the future
)
self.token_usage += response.candidates[0].token_count
return response.candidates[0].content
def _convert_action_to_schema(action_space: list[Action] | None) -> list[Tool] | None:
if action_space is None:
return None
actions = [
Tool(
function_declarations=[
_action_to_func_dec(action) for action in action_space
]
)
]
return actions
def _clear_schema(schema_dict: dict) -> None:
schema_dict.pop("title", None)
p_type = schema_dict.pop("type", None)
for prop in schema_dict.get("properties", {}).values():
_clear_schema(prop)
if p_type is not None:
schema_dict["type_"] = p_type.upper()
if "items" in schema_dict:
_clear_schema(schema_dict["items"])
def _action_to_func_dec(action: Action) -> FunctionDeclaration:
"Converts crab Action to google FunctionDeclaration"
p_schema = action.parameters.model_json_schema()
if "$defs" in p_schema:
p_schema = json_expand_refs(p_schema)
_clear_schema(p_schema)
if not p_schema["properties"]:
return FunctionDeclaration(
name=action.name,
description=action.description,
)
return FunctionDeclaration(
name=action.name,
description=action.description,
parameters=p_schema,
)
================================================
FILE: crab/agents/backend_models/openai_model.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import json
from typing import Any
from crab import Action, ActionOutput, BackendModel, BackendOutput, Message, MessageType
from crab.agents.utils import extract_text_and_code_prompts
try:
import openai
from openai.types.chat import ChatCompletionMessage
openai_model_enable = True
except ImportError:
openai_model_enable = False
class OpenAIModel(BackendModel):
def __init__(
self,
model: str,
parameters: dict[str, Any] | None = None,
history_messages_len: int = 0,
tool_call_required: bool = True,
base_url: str | None = None,
api_key: str | None = None,
) -> None:
if not openai_model_enable:
raise ImportError("Please install openai to use OpenAIModel")
self.model = model
self.parameters = parameters if parameters is not None else {}
self.history_messages_len = history_messages_len
assert self.history_messages_len >= 0
self.client = openai.OpenAI(api_key=api_key, base_url=base_url)
self.tool_call_required: bool = tool_call_required
self.system_message: str = "You are a helpful assistant."
self.openai_system_message = {
"role": "system",
"content": self.system_message,
}
self.action_space: list[Action] | None = None
self.action_schema: list[dict] | None = None
self.token_usage: int = 0
self.chat_history: list[list[ChatCompletionMessage | dict]] = []
self.support_tool_call = True
def reset(self, system_message: str, action_space: list[Action] | None) -> None:
self.system_message = system_message
self.openai_system_message = {
"role": "system",
"content": system_message,
}
self.action_space = action_space
self.action_schema = _convert_action_to_schema(self.action_space)
self.token_usage = 0
self.chat_history = []
def chat(self, message: list[Message] | Message) -> BackendOutput:
if isinstance(message, tuple):
message = [message]
request = self._fetch_from_memory()
new_message = self._construct_new_message(message)
request.append(new_message)
response_message = self._call_api(request)
self._record_message(new_message, response_message)
return self._generate_backend_output(response_message)
def get_token_usage(self):
return self.token_usage
def _record_message(
self, new_message: dict, response_message: ChatCompletionMessage
) -> None:
self.chat_history.append([new_message])
self.chat_history[-1].append(response_message)
if self.action_schema and response_message.tool_calls is not None:
for tool_call in response_message.tool_calls:
self.chat_history[-1].append(
{
"tool_call_id": tool_call.id,
"role": "tool",
"name": tool_call.function.name,
"content": "success",
}
) # extend conversation with function response
def _call_api(
self, request_messages: list[ChatCompletionMessage | dict]
) -> ChatCompletionMessage:
if self.action_schema is not None:
response = self.client.chat.completions.create(
messages=request_messages, # type: ignore
model=self.model,
tools=self.action_schema,
tool_choice="required" if self.tool_call_required else "auto",
**self.parameters,
)
else:
response = self.client.chat.completions.create(
messages=request_messages, # type: ignore
model=self.model,
**self.parameters,
)
self.token_usage += response.usage.total_tokens
return response.choices[0].message
def _fetch_from_memory(self) -> list[ChatCompletionMessage | dict]:
request: list[ChatCompletionMessage | dict] = [self.openai_system_message]
if self.history_messages_len > 0:
fetch_history_len = min(self.history_messages_len, len(self.chat_history))
for history_message in self.chat_history[-fetch_history_len:]:
request = request + history_message
return request
def _construct_new_message(self, message: list[Message]) -> dict[str, Any]:
new_message_content: list[dict[str, Any]] = []
for content, msg_type in message:
match msg_type:
case MessageType.TEXT:
new_message_content.append(
{
"type": "text",
"text": content,
}
)
case MessageType.IMAGE_JPG_BASE64:
new_message_content.append(
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{content}",
"detail": "high",
},
}
)
return {"role": "user", "content": new_message_content}
def _generate_backend_output(
self, response_message: ChatCompletionMessage
) -> BackendOutput:
if response_message.tool_calls is None:
return BackendOutput(message=response_message.content, action_list=None)
action_list = [
ActionOutput(
name=call.function.name,
arguments=json.loads(call.function.arguments),
)
for call in response_message.tool_calls
]
return BackendOutput(
message=response_message.content,
action_list=action_list,
)
def _convert_action_to_schema(
action_space: list[Action] | None,
) -> list[dict] | None:
if action_space is None:
return None
actions = []
for action in action_space:
new_action = action.to_openai_json_schema()
actions.append({"type": "function", "function": new_action})
return actions
class OpenAIModelJSON(OpenAIModel):
def __init__(
self,
model: str,
parameters: dict[str, Any] = dict(),
history_messages_len: int = 0,
base_url: str | None = None,
api_key: str | None = None,
) -> None:
super().__init__(
model,
parameters,
history_messages_len,
False,
base_url,
api_key,
)
self.support_tool_call = False
def reset(self, system_message: str, action_space: list[Action] | None) -> None:
super().reset(system_message, action_space)
self.action_schema = None
def _record_message(
self, new_message: dict, response_message: ChatCompletionMessage
) -> None:
self.chat_history.append([new_message])
self.chat_history[-1].append(
{"role": "assistant", "content": response_message.content}
)
def _generate_backend_output(
self, response_message: ChatCompletionMessage
) -> BackendOutput:
content = response_message.content
text_list, code_list = extract_text_and_code_prompts(content)
action_list = []
try:
for code_block in code_list:
action_object = json.loads(code_block)
action_list.append(
ActionOutput(
name=action_object["name"], arguments=action_object["arguments"]
)
)
except json.JSONDecodeError as e:
raise RuntimeError(f"Failed to parse code block: {code_block}") from e
except KeyError as e:
raise RuntimeError(f"Received invalid action format: {code_block}") from e
return BackendOutput(
message="".join(text_list),
action_list=action_list,
)
class SGlangOpenAIModelJSON(OpenAIModelJSON):
def _construct_new_message(self, message: list[Message]) -> dict[str, Any]:
new_message_content: list[dict[str, Any]] = []
image_count = 0
for _, msg_type in message:
if msg_type == MessageType.IMAGE_JPG_BASE64:
image_count += 1
for content, msg_type in message:
match msg_type:
case MessageType.TEXT:
new_message_content.append(
{
"type": "text",
"text": content,
}
)
case MessageType.IMAGE_JPG_BASE64:
image_content = {
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{content}",
"detail": "high",
},
}
if image_count > 1:
image_content["modalities"] = "multi-images"
new_message_content.append(image_content)
return {"role": "user", "content": new_message_content}
================================================
FILE: crab/agents/policies/__init__.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# ruff: noqa: F401
from .multi_agent_by_env import MultiAgentByEnvPolicy
from .multi_agent_by_func import MultiAgentByFuncPolicy
from .single_agent import SingleAgentPolicy
================================================
FILE: crab/agents/policies/multi_agent_by_env.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from crab import Action, ActionOutput
from crab.agents.backend_models import BackendModelConfig, create_backend_model
from crab.agents.utils import generate_action_prompt
from crab.core.agent_policy import AgentPolicy
from crab.core.backend_model import (
BackendModel,
MessageType,
)
class MultiAgentByEnvPolicy(AgentPolicy):
_main_agent_prompt = """You are a main agent, and your goal is to plan and
give instructions to sub-agents in each environment to complete the final task. Now
you have to do a task as described below: {task_description}. The description of
each given environment: {env_description}. For each step, you are required to
provide high-level instructions detailing the next actions to be taken.
Additionally, you must specify which sub-agent in the designated environment should
execute these instructions. If a sub-agent is not needed for a particular step, you
may instruct it to skip that step."""
_env_agent_prompt = """You are a sub-agent responsible for the {environment}
environment. The description of the {environment} environment is:
{env_description}. Your goal is to assist the main agent in completing the final
task by performing actions in the {environment} environment according to the
instructions from the main agent. The final task is described below:
{task_description}. A unit operation you can perform is called action in a given
environment. You can only execute action in the {environment} environment. For the
{environment} environment, you are given a limited action space as function calls:
{action_descriptions}
The interactive UI elements on the screenshot are labeled with numeric tags starting
from 1. For each step, You will receive an instruction telling you what you need to
do next. After analyzing the instruction you received and the current {environment}
system, if you think you don't need to do anything in the current {environment}
system, you should choose SKIP action. Otherwise, you must state what actions to
take, what the parameters are, and you MUST provide in which environment to perform
these actions. Your answer must be function calls. Please do not output any other
information. You must make sure all function calls get their required parameters."""
_root_agent_prompt = """You are a sub-agent responsible for the crab benchmark root
environment. Your goal is to assist the main agent in completing the whole task:
"{task_description}". You can only complete the task or submit the result when the
main agent tells you the whole task has been completed. Otherwise, you can only call
SKIP. """
def __init__(
self,
main_agent_model_backend: BackendModelConfig,
env_agent_model_backend: BackendModelConfig,
):
self.main_agent_model_backend = create_backend_model(main_agent_model_backend)
self.env_agent_model_backend_config = env_agent_model_backend
self.reset(task_description="", action_spaces={}, env_descriptions={})
def reset(
self,
task_description: str,
action_spaces: dict[str, list[Action]],
env_descriptions: dict[str, str],
) -> list:
self.task_description = task_description
main_agent_system_message = self._main_agent_prompt.format(
task_description=task_description,
env_description=str(env_descriptions),
)
self.main_agent_model_backend.reset(main_agent_system_message, None)
root_agent_system_message = self._root_agent_prompt.format(
task_description=task_description
)
self.env_agent_model_backends: dict[str, BackendModel] = {}
for env in action_spaces:
backend = create_backend_model(self.env_agent_model_backend_config)
if env == "root":
backend.reset(root_agent_system_message, action_spaces[env])
else:
backend.require_tool = True
env_agent_system_message = self._env_agent_prompt.format(
task_description=task_description,
environment=env,
env_description=env_descriptions[env],
action_descriptions=generate_action_prompt(action_spaces[env]),
)
backend.reset(env_agent_system_message, action_spaces[env])
self.env_agent_model_backends[env] = backend
def get_token_usage(self):
result = 0
result += self.main_agent_model_backend.get_token_usage()
for env_agent in self.env_agent_model_backends.values():
result += env_agent.get_token_usage()
return result
def get_backend_model_name(self):
return (
self.main_agent_model_backend.__class__.__name__
+ "_"
+ self.main_agent_model_backend.model
)
def chat(
self,
observation: dict[str, list[tuple[str, MessageType]]],
) -> list[ActionOutput]:
main_prompt = []
for env in observation:
main_prompt.extend(observation[env])
main_prompt.append(
(
(
f"Your target: {self.task_description}\n"
"Tell me the next step in each environment."
),
MessageType.TEXT,
)
)
output = self.main_agent_model_backend.chat(main_prompt)
main_agent_message = (
f"The instruction from main agent for this step: {output.message}"
)
tool_calls = []
for env in self.env_agent_model_backends:
backend = self.env_agent_model_backends[env]
if env in observation:
output = backend.chat(
observation[env] + [(main_agent_message, MessageType.TEXT)]
)
else:
output = backend.chat((main_agent_message, MessageType.TEXT))
for action in output.action_list:
action.env = env
tool_calls.extend(output.action_list)
return tool_calls
================================================
FILE: crab/agents/policies/multi_agent_by_func.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from crab.agents.backend_models import BackendModelConfig, create_backend_model
from crab.agents.utils import (
combine_multi_env_action_space,
decode_combined_action,
generate_action_prompt,
)
from crab.core import Action, ActionOutput
from crab.core.agent_policy import AgentPolicy
from crab.core.backend_model import MessageType
class MultiAgentByFuncPolicy(AgentPolicy):
_system_prompt = """You are a helpful assistant. Now you have to do a task as
described below: {task_description}. And this is the description of each given
environment: {env_description}. A unit operation you can perform is called action in
a given environment. For each environment, you are given a limited action space as
function calls:
{action_descriptions}
You may receive a screenshot of the current system. The interactive UI elements on
the screenshot are labeled with numeric tags starting from 1. For each step, You
must state what actions to take, what the parameters are, and you MUST provide in
which environment to perform these actions. """
_tool_prompt = """You are a helpful assistant in generating function calls. I will
give you a detailed description of what actions to take next, you should translate
it into function calls. please do not output any other information.
"""
def __init__(
self,
main_agent_model_backend: BackendModelConfig,
tool_agent_model_backend: BackendModelConfig,
):
self.main_agent_model_backend = create_backend_model(main_agent_model_backend)
self.tool_agent_model_backend = create_backend_model(tool_agent_model_backend)
self.reset(task_description="", action_spaces=None, env_descriptions={})
def reset(
self,
task_description: str,
action_spaces: dict[str, list[Action]],
env_descriptions: dict[str, str],
) -> list[ActionOutput]:
self.task_description = task_description
self.action_space = combine_multi_env_action_space(action_spaces)
main_agent_system_message = self._system_prompt.format(
task_description=task_description,
action_descriptions=generate_action_prompt(self.action_space),
env_description=str(env_descriptions),
)
self.main_agent_model_backend.reset(main_agent_system_message, None)
self.tool_agent_model_backend.reset(self._tool_prompt, self.action_space)
def get_token_usage(self):
return (
self.main_agent_model_backend.get_token_usage()
+ self.tool_agent_model_backend.get_token_usage()
)
def get_backend_model_name(self):
return (
self.main_agent_model_backend.__class__.__name__
+ "_"
+ self.main_agent_model_backend.model
)
def chat(
self,
observation: dict[str, list[tuple[str, MessageType]]],
) -> list[ActionOutput]:
prompt = []
for env in observation:
prompt.extend(observation[env])
prompt.append(
(
f"Your target: {self.task_description}\nTell me the next action.",
MessageType.TEXT,
)
)
output = self.main_agent_model_backend.chat(prompt)
tool_output = self.tool_agent_model_backend.chat(
(output.message, MessageType.TEXT)
)
return decode_combined_action(tool_output.action_list)
================================================
FILE: crab/agents/policies/single_agent.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import logging
from crab import Action, ActionOutput
from crab.agents.backend_models import BackendModelConfig, create_backend_model
from crab.agents.utils import (
combine_multi_env_action_space,
decode_combined_action,
generate_action_prompt,
)
from crab.core.agent_policy import AgentPolicy
from crab.core.backend_model import (
MessageType,
)
from crab.utils.measure import timed
logger = logging.getLogger(__name__)
class SingleAgentPolicy(AgentPolicy):
_system_prompt_with_function_call = """\
You are a helpful assistant. Now you have to do a task as described below:
**"{task_description}."**
You should never forget this task and always perform actions to achieve this task.
And this is the description of each given environment: {env_description}. A
unit operation you can perform is called Action. You have a limited action space as
function calls:
{action_descriptions}
You may receive a screenshot of the current system. You may receive a screenshot of
a smartphone app. The interactive UI elements on the screenshot are labeled with
numeric tags starting from 1.
In each step, You MUST explain what do you see from the current observation and the
plan of the next action, then use a provided action in each step to achieve the
task. You should state what action to take and what the parameters should be. Your
answer MUST be a least one function call. You SHOULD NEVER ask me to do anything for
you. Always do them by yourself using function calls.
"""
_system_prompt_no_function_call = """\
You are a helpful assistant. Now you have to do a task as described below:
**"{task_description}."**
You should never forget this task and always perform actions to achieve this task.
And this is the description of each given environment: {env_description}. You will
receive screenshots of the environments. The interactive UI elements on the
screenshot are labeled with numeric tags starting from 1.
A unit operation you can perform is called Action. You have a limited action space
as function calls: {action_descriptions}. You should generate JSON code blocks to
execute the actions. Each code block MUST contains only one json object, i.e. one
action. You can output multiple code blocks to execute multiple actions in a single
step. You must follow the JSON format below to output the action.
```json
{{"name": "action_name", "arguments": {{"arg1": "value1", "arg2": "value2"}}}}
```
or if not arguments needed:
```json
{{"name": "action_name", "arguments": {{}}}}
```
You MUST use exactly the same "action_name" as I gave to you in the action space.
You SHOULDN'T add any comments in the code blocks.
In each step, You MUST explain what do you see from the current observation and the
plan of the next action, then use a provided action in each step to achieve the
task. You should state what action to take and what the parameters should be. Your
answer MUST contain at least one code block. You SHOULD NEVER ask me to do anything
for you. Always do them by yourself.
"""
def __init__(
self,
model_backend: BackendModelConfig,
function_call: bool = True,
):
self.model_backend = create_backend_model(model_backend)
self.function_call = function_call
if not self.model_backend.support_tool_call and self.function_call:
logger.warning(
"The backend model does not support tool call: {}".format(
model_backend.model_name
)
+ "\nFallback to no function call mode."
)
self.function_call = False
if self.function_call:
self.system_prompt = self._system_prompt_with_function_call
else:
self.system_prompt = self._system_prompt_no_function_call
self.reset(task_description="", action_spaces=None, env_descriptions={})
def reset(
self,
task_description: str,
action_spaces: dict[str, list[Action]],
env_descriptions: dict[str, str],
) -> list:
self.task_description = task_description
self.action_space = combine_multi_env_action_space(action_spaces)
system_message = self.system_prompt.format(
task_description=task_description,
action_descriptions=generate_action_prompt(
self.action_space,
expand=not self.function_call,
),
env_description=str(env_descriptions),
)
if self.function_call:
self.model_backend.reset(system_message, self.action_space)
else:
self.model_backend.reset(system_message, None)
def get_token_usage(self):
return self.model_backend.get_token_usage()
def get_backend_model_name(self):
return self.model_backend.__class__.__name__ + "_" + self.model_backend.model
@timed
def chat(
self,
observation: dict[str, list[tuple[str, MessageType]]],
) -> list[ActionOutput]:
prompt = []
for env in observation:
prompt.extend(observation[env])
prompt.append(
(
f"Your target: {self.task_description}\nTell me the next action.",
MessageType.TEXT,
)
)
output = self.model_backend.chat(prompt)
# print("Agent Message: " + output.message, flush=True)
# print("Agent Action: " + str(output.action_list), flush=True)
return decode_combined_action(output.action_list)
================================================
FILE: crab/agents/utils.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from crab.core import Action, ActionOutput
def combine_multi_env_action_space(
action_space: dict[str, list[Action]] | None,
) -> list[Action]:
"""Combine multi-env action space together to fit in a single agent."""
result = []
if action_space is None:
return result
for env in action_space:
for action in action_space[env]:
new_action = action.model_copy()
new_action.name = new_action.name + "_in_" + env
new_action.description = f"In {env} environment, " + new_action.description
result.append(new_action)
return result
def decode_combined_action(
output_actions: list[ActionOutput],
) -> list[ActionOutput]:
"""Decode combined action output to action output with the corresponding
environment.
"""
result = []
for output in output_actions:
name_env = output.name.split("_in_")
if len(name_env) != 2:
raise RuntimeError(
'The decoded action name should contain the splitter "_in_".'
)
new_output = output.model_copy()
new_output.name = name_env[0]
new_output.env = name_env[1]
result.append(new_output)
return result
def generate_action_prompt(action_space: list[Action], expand: bool = False) -> str:
if expand:
return "".join(
[
f"[**{action.name}**:\n"
f"action description: {action.description}\n"
f"action arguments json schema: {action.to_openai_json_schema()}\n"
"]\n"
for action in action_space
]
)
else:
return "".join(
[f"[{action.name}: {action.description}]\n" for action in action_space]
)
def extract_text_and_code_prompts(content: str) -> tuple[list[str], list[str]]:
r"""Extract text and code prompts from the message content.
Returns:
A tuple (text_list, code_list) where, text_list is a list of text and code_list
is a list of extracted codes both from the content.
"""
text_prompts: list[str] = []
code_prompts: list[str] = []
lines = content.split("\n")
idx = 0
start_idx = 0
while idx < len(lines):
while idx < len(lines) and (not lines[idx].lstrip().startswith("```")):
idx += 1
text = "\n".join(lines[start_idx:idx]).strip()
text_prompts.append(text)
if idx >= len(lines):
break
# code_type = lines[idx].strip()[3:].strip()
idx += 1
start_idx = idx
while not lines[idx].lstrip().startswith("```") and idx < len(lines):
idx += 1
if idx >= len(lines):
break
code = "\n".join(lines[start_idx:idx]).strip()
code_prompts.append(code)
idx += 1
start_idx = idx
return text_prompts, code_prompts
================================================
FILE: crab/benchmarks/__init__.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
================================================
FILE: crab/benchmarks/template.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import networkx as nx
from crab import BenchmarkConfig, Task, action, evaluator
from crab.environments.template import set_state, template_environment_config
@evaluator
def is_system_state(env) -> bool:
return env.state
@evaluator(env_name="root")
def check_submit_true(env) -> bool:
if env.trajectory:
action_name, params, _ = env.trajectory[-1]
print(action_name, params)
if action_name == "_submit" and params["content"]:
return True
return False
@action(env_name="root")
def _submit(content: bool) -> None:
"""Submit your answer through this function.
Args:
content: the content to submit
"""
pass
template_benchmark_config = BenchmarkConfig(
name="template_benchmark",
environments=[template_environment_config],
tasks=[
Task(
id="0",
description="Set the system state to True.",
evaluator=is_system_state,
setup=set_state(False),
),
Task(
id="1",
description="Submit True.",
evaluator=check_submit_true,
extra_action=[_submit],
),
],
)
@evaluator(env_name="testenv0")
def check_sys0(env) -> bool:
return env.state
@evaluator(env_name="testenv1")
def check_sys1(env) -> bool:
return env.state
@evaluator(env_name="testenv2")
def check_sys2(env) -> bool:
return env.state
eval_g = nx.DiGraph()
eval_g.add_edge(check_sys0, check_submit_true)
eval_g.add_edge(check_sys1, check_submit_true)
eval_g.add_edge(check_sys2, check_submit_true)
multienv_template_benchmark_config = BenchmarkConfig(
name="mutlienv_template_benchmark",
environments=[
template_environment_config.model_copy(update={"name": f"testenv{idx}"})
for idx in range(3)
],
tasks=[
Task(
id="0",
description=(
"Set the system state to True in all three environments. "
"Then submit True to finish the project."
),
evaluator=eval_g,
extra_action=[_submit],
)
],
multienv=True,
)
================================================
FILE: crab/core/__init__.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# ruff: noqa: F401, F403
from .agent_policy import AgentPolicy
from .backend_model import BackendModel
from .benchmark import Benchmark, create_benchmark
from .decorators import action, evaluator
from .environment import Environment, create_environment
from .experiment import Experiment
from .graph_evaluator import Evaluator, GraphEvaluator
from .models import *
from .task_generator import TaskGenerator
================================================
FILE: crab/core/agent_policy.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from abc import ABC, abstractmethod
from .models import Action, ActionOutput, Message
class AgentPolicy(ABC):
@abstractmethod
def chat(
self,
observation: dict[str, list[Message]],
) -> list[ActionOutput]: ...
@abstractmethod
def reset(
self,
task_description: str,
action_spaces: dict[str, list[Action]],
env_descriptions: dict[str, str],
) -> None: ...
@abstractmethod
def get_token_usage(self) -> int: ...
@abstractmethod
def get_backend_model_name(self) -> str: ...
================================================
FILE: crab/core/backend_model.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from abc import ABC, abstractmethod
from .models import Action, BackendOutput, MessageType
class BackendModel(ABC):
@abstractmethod
def chat(self, contents: list[tuple[str, MessageType]]) -> BackendOutput: ...
@abstractmethod
def reset(
self,
system_message: str,
action_space: list[Action] | None,
): ...
@abstractmethod
def get_token_usage(self): ...
================================================
FILE: crab/core/benchmark.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import traceback
from time import sleep
from typing import Any
from crab.core.graph_evaluator import GraphEvaluator
from crab.utils.measure import timed
from .environment import Environment, create_environment
from .exceptions import TaskNotFound
from .models import Action, BenchmarkConfig, ClosedAction, MessageType, StepResult, Task
class Benchmark:
"""The crab benchmark controller managing environments and agent evaluation.
The class manages multiple environments together and provide the simple API by
:meth:`step`, :meth:`observe` and :meth:`reset` for language model agents to perform
tasks in multiple environments.
This class introduces a "root" environment with no action or observation
capabilities, intended as a utility for evaluations not directly tied to a specific
environment.
This class operates in two distinct modes: "multi-environment" and
"single-environment". In multi-environment mode, observations and action results
are separated by environment, returned as a dictionary. While in single-environment
mode, all observations and action outcomes are merged under the "root" environment,
with actions being appropriately routed to their respective environments.
"""
def __init__(
self,
name: str,
tasks: list[Task],
environments: list[Environment],
default_env: str | None = None,
multienv: bool = False,
prompting_tools: dict[str, dict[str, Action]] = {},
root_action_space: list[Action] = [],
step_limit: int = 30,
common_setup: list[ClosedAction] = [],
) -> None:
"""Initializes the instance.
Args:
name: Identifier for the benchmark.
tasks: Tasks to be executed within the benchmark.
environments: Environments in which the benchmark is conducted.
default_env: The default environment name, applied when actions do not
specify an environment. Defaults to "root" in the multi-environment mode
and to the environment in the single environment mode.
multienv: Indicates whether to enable multi-environment mode. Defaults to
:obj:`False`.
prompting_tools: Prompting tools applied in :meth:`observe_with_prompt`. The
first level keys are environment names, the second level keys are
observation action names. Defaults to empty.
root_action_space: The action space executed in the root environment.
"""
self.name = name
self.tasks = tasks
self.multienv = multienv
self.prompting_tools = prompting_tools
self.step_limit = step_limit
self.common_setup = common_setup
if isinstance(environments, Environment):
environments = [environments]
self.root_env = Environment(
name="root",
action_space=root_action_space,
observation_space=[],
description="The crab benchmark root. You can submit your answer or "
"complete the task using this environment.",
)
self.root_env.contained_envs = {env.name: env for env in environments} # A hack
environments.append(self.root_env)
self.environment_map: dict[str, Environment] = {
env.name: env for env in environments
}
# if not multienv, combine all environments action space together
if not self.multienv:
# action_map is used only by "agent", specifically `step` and
# `export_action_space` functions
self._verify_spaces()
self._generate_action_map()
# default_env is used for predefined actions without env_name or like
# evaluators setups, teardowns, and so on.
if default_env is None:
if not multienv and len(environments) == 2:
self.default_env = environments[0].name
else:
self.default_env = self.root_env.name
else:
self.default_env = default_env
self.current_task: Task | None = None
self.current_evaluator: GraphEvaluator | None = None
self.step_cnt = 0
def start_task(self, task_id: str) -> tuple[Task, dict[str, list[Action]]]:
"""Initializes and starts a specified task.
Args:
task_id: The ID of the task to start.
Returns:
A tuple (task, action_space), where task is the started task object, and
action_sapce is a dict mapping action names to the corresponding action
object.
"""
if self.current_task is not None:
raise RuntimeError("Another task is running")
self.current_task = self._get_task_by_id(task_id)
# reset all environments
self._reset_environments()
for action in self.common_setup:
self._take_env_action(action)
# select environment by Action.env_name
for action in self.current_task.setup:
self._take_env_action(action)
for task_action in self.current_task.extra_action:
self._set_env_action(task_action)
# reset evaluator
self.current_evaluator = GraphEvaluator(self.current_task.evaluator)
# put submit action to corresponding env space
# For now, only the last node can be the submit task
self.step_cnt = 0
return self.current_task, self.export_action_space()
def close_task(self) -> None:
"""Cleans up after a task is completed."""
if self.current_evaluator is None or self.current_task is None:
raise RuntimeError("There is no started task.")
for action in self.current_task.teardown:
self._take_env_action(action)
self.current_task = None
def get_env_descriptions(self) -> dict[str, str]:
"""Get environment descriptions as a dict structure."""
return {
name: self.environment_map[name].description
for name in self.environment_map
}
def observe(self) -> dict[str, dict[str, Any]]:
"""Collects observations from all environments.
Returns:
A dict-of-dict with observations from each environment. The first level keys
are environment names, the second level keys are observation action names.
"""
env_obs = {env.name: env.observe() for env in self.environment_map.values()}
if self.multienv:
return env_obs
return self._merge_dicts(env_obs)
@timed
def observe_with_prompt(
self,
) -> tuple[dict[str, dict[str, Any]], dict[str, tuple[str, MessageType]]]:
"""Collects observations and applies prompting tools.
Returns:
A tuple (observations, prompts), where "observations" and "prompts" are
observations from each environment and the result of applying prompting
tools on them. The first level keys are environment names, the second level
keys are observation action names. Notice that some dicts can be empty if
its prompting tool wasn't set.
"""
observations = {}
prompts = {}
for env_name, env in self.environment_map.items():
if env_name in self.prompting_tools:
tools = self.prompting_tools[env_name]
else:
tools = {}
observations[env_name], prompts[env_name] = env.observe_with_prompt(tools)
if self.multienv:
return observations, prompts
return self._merge_dicts(observations), self._merge_dicts(prompts)
def evaluate(self):
self.current_evaluator.step(self.environment_map, self.default_env)
return self.current_evaluator.stat()
@timed
def step(
self,
action: str,
parameters: dict[str, Any] = {},
env_name: str | None = None,
) -> StepResult:
"""Executes a step in the benchmark by performing an action.
Args:
action: The action to execute.
parameters: Parameters for the action.
env_name: The name of the environment.
Returns:
The result of the step including observations and evaluation metrics. Notice
that the `truncated` field in the result is not meaningful for now.
"""
terminated = False
info = {}
if self.current_evaluator is None or self.current_task is None:
raise RuntimeError("There is no started task.")
if action == "complete":
terminated = True
info["terminate_reason"] = "agent_complete"
return StepResult(
truncated=False,
terminated=True,
action_returns=None,
evaluation_results=self.current_evaluator.stat(),
info=info,
)
try:
environment = self._get_env(env_name=env_name, action_name=action)
except Exception:
print(traceback.format_exc())
terminated = True
info["terminate_reason"] = "action_format_error"
info["exception_detail"] = traceback.format_exc()
environment.reset()
self.close_task()
return StepResult(
truncated=False,
terminated=True,
action_returns=None,
evaluation_results=self.current_evaluator.stat(),
info=info,
)
try:
action_returns = environment.step(action, parameters)
except Exception:
print(traceback.format_exc())
terminated = True
info["terminate_reason"] = "env_exception"
info["exception_detail"] = traceback.format_exc()
environment.reset()
self.close_task()
return StepResult(
truncated=False,
terminated=True,
action_returns=None,
evaluation_results=self.current_evaluator.stat(),
info=info,
)
try:
evaluation_results = self.evaluate()
except Exception:
print(traceback.format_exc())
terminated = True
info["terminate_reason"] = "evaluator_exception"
info["exception_detail"] = traceback.format_exc()
environment.reset()
self.close_task()
return StepResult(
truncated=False,
terminated=True,
action_returns=action_returns,
evaluation_results=self.current_evaluator.stat(),
info=info,
)
self.step_cnt += 1
if self.current_evaluator.is_complete():
terminated = True
info["terminate_reason"] = "success"
if self.step_cnt >= self.step_limit:
terminated = True
info["terminate_reason"] = "reach_max_step"
if terminated:
environment.reset()
self.close_task()
return StepResult(
truncated=False,
terminated=terminated,
action_returns=action_returns,
evaluation_results=evaluation_results,
info=info,
)
def reset(self) -> None:
"""Resets all environments and the current task."""
self.current_evaluator = None
self._reset_environments()
def human_evaluation(self, task_id: str) -> None:
task, _ = self.start_task(task_id)
print(task.description)
self.current_evaluator.human_mode = True
evaluation_results = self.evaluate()
print(evaluation_results, end="")
while evaluation_results["completeness"] != 1.0:
sleep(2)
evaluation_results = self.evaluate()
print("\r" + str(evaluation_results), end="")
self.close_task()
def export_action_space(self) -> dict[str, list[Action]]:
"""Returns the action spaces from all environments.
Returns:
A dict of action lists for each environment, keyed by environment name.
"""
result = {env.name: env.action_space for env in self.environment_map.values()}
if self.multienv:
return result
return self._merge_lists(result)
def _verify_spaces(self) -> None:
"""Make sure all actions and observations are unique."""
observation_name_set = set()
action_name_set = set()
for env in self.environment_map.values():
for action in env.action_space:
if action.name in action_name_set:
raise ValueError(
"Dulplicated action names are not allowed in single "
"environment benchmark."
)
action_name_set.add(action.name)
for observation in env.observation_space:
if observation.name in observation_name_set:
raise ValueError(
"Dulplicated observation names are not allowed in the "
"single environment benchmark."
)
observation_name_set.add(observation.name)
def _generate_action_map(self) -> None:
self.action_map: dict[str, Environment] = {}
for env in self.environment_map.values():
for action in env.action_space:
self.action_map[action.name] = env
def _get_env(
self, env_name: str | None = None, action_name: str | None = None
) -> Environment:
# env_name exists just return it
if env_name is not None:
return self.environment_map[env_name]
# or in multienv use default env, in singlenev use action_name mapping
if action_name is not None and not self.multienv:
return self.action_map[action_name]
return self.environment_map[self.default_env]
def _take_env_action(self, action: Action) -> Any:
if action.env_name is None:
env = self.environment_map[self.default_env]
else:
env = self.environment_map[action.env_name]
return env.take_action(action)
def _set_env_action(self, action: Action) -> None:
if action.env_name is None:
env = self.environment_map[self.default_env]
else:
env = self.environment_map[action.env_name]
env.set_action(action)
if not self.multienv:
self.action_map[action.name] = env
def _reset_environments(self):
for env in self.environment_map.values():
env.reset()
if not self.multienv:
self._generate_action_map()
def _get_task_by_id(self, task_id: str) -> Task:
result = [task for task in self.tasks if task_id == task.id]
if len(result) == 0: # Doesn't find the task
raise TaskNotFound(f"No such task: {task_id}")
return result[0]
def _merge_dicts(
self, env_dict: dict[str, dict[str, Any]]
) -> dict[str, dict[str, Any]]:
"In single environment mode, merge aciton_space/observation_space in root."
result = {}
for dict_value in env_dict.values():
result.update(dict_value)
return {self.default_env: result}
def _merge_lists(self, env_dict: dict[str, list]) -> dict[str, list]:
"In single environment mode, merge aciton_space/observation_space in root."
result = []
for dict_value in env_dict.values():
result.extend(dict_value)
return {self.default_env: result}
def create_benchmark(config: BenchmarkConfig) -> Benchmark:
"""Creates a benchmark by BenchmarkConfig"""
if isinstance(config, BenchmarkConfig):
environments = [
create_environment(env_config) for env_config in config.environments
]
parameters = dict(config)
parameters["environments"] = environments
return Benchmark(**parameters)
else:
raise ValueError("Unsupport benchmark config type.")
================================================
FILE: crab/core/csv_log.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import csv
from pathlib import Path
from typing import Any
class CSVLog:
def __init__(self, csv_path: Path, headers: list[str]) -> None:
self.csv_path = csv_path
self.header = headers
if not csv_path.exists():
with open(csv_path, "w", newline="") as file:
writer = csv.writer(file)
writer.writerow(headers)
def write_row(self, data: list[Any]):
assert len(data) == len(self.header)
with open(self.csv_path, "a", newline="") as file:
writer = csv.writer(file)
writer.writerow(data)
================================================
FILE: crab/core/decorators.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from typing import Callable
from .models import Action, Evaluator
def _decorator(func, cls: type[Action], options: dict | None = None) -> Action:
action = cls.from_function(func)
if options is not None:
for key in options:
setattr(action, key, options[key])
return action
def action(*args: Callable, env_name: str | None = None, local=False):
"""Use @action to change a function to an Action"""
if args and callable(args[0]):
return _decorator(args[0], Action)
return lambda func: _decorator(func, Action, {"env_name": env_name, "local": local})
def evaluator(
*args: Callable,
require_submit: bool = False,
env_name: str | None = None,
local=False,
):
"""Use @evaluator to change a function to an Evaluator"""
if args and callable(args[0]):
return _decorator(args[0], Evaluator)
return lambda func: _decorator(
func,
Evaluator,
{"require_submit": require_submit, "env_name": env_name, "local": local},
)
================================================
FILE: crab/core/environment.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import json
import logging
from typing import Any
from httpx import Client
from crab.utils import decrypt_message, encrypt_message, generate_key_from_env
from crab.utils.measure import timed
from .exceptions import ActionNotFound
from .models import Action, ClosedAction, EnvironmentConfig
logger = logging.getLogger("crab-server")
class Environment:
"""
A crab environment for language model agent interaction and evaluation.
This class supports action execution and observation within a simulated or actual
ecosystem. The environment is defined by customizable action and observation spaces,
comprising various crab actions. Actions should include comprehensive docstrings to
facilitate agent understanding and interaction.
Typically, users instantiate this class directly to perform actions within the local
execution context (i.e., the device running the crab framework). This class may also
serve as a base for specialized environments requiring unique action execution
processes, such as forwarding actions to remote systems for execution. This is
achieved by overriding the `take_action` method.
Actions defined in the `action_space`, `observation_space`, or `reset`, as well as
those invoked through the `take_action` method that include an `env` parameter, will
have this parameter automatically populated with the current environment instance.
This allows actions to access and manipulate environment states and variables.
Attributes:
name (str): The name of the environment.
description (str): A description of the environment.
trajectory (List[tuple[str, dict[str, Any], Any]]): A record of actions taken,
their parameters, and the results.
Args:
name (str): The name of the environment.
action_space (List[Action]): A list of actions that can be executed, defining
the possible interactions agents can undertake.
observation_space (List[ClosedAction]): A list of observations defining the
possible states agents can perceive.
description (str, optional): A textual description of the environment. Defaults
to an empty string.
reset (Action | None, optional): An action to reset the environment to its
initial state. Defaults to `None`.
remote_url (Action | None, optional): If set, the action will be taken at
remote machine, by default it will be taken at local. Example:
`http://192.168.1.1:8000`. Defaults to `None`.
"""
def __init__(
self,
name: str,
action_space: list[Action],
observation_space: list[ClosedAction],
description: str = "",
reset: Action | None = None,
remote_url: str | None = None,
extra_attributes: dict[str, Any] = {},
) -> None:
self.name = name
self.description = description
self.trajectory: list[tuple[str, dict[str, Any], Any]] = []
self.observation_history: list[dict[str, Any]] = []
self._origin_action_space = action_space
self._observation_space = observation_space
self._reset = reset
self._action_map = {action.name: action for action in action_space}
self._client: Client | None = None
if remote_url is not None:
self._client = Client(base_url=remote_url, timeout=60)
for key, value in extra_attributes.items():
setattr(self, key, value)
self._enc_key = generate_key_from_env()
def step(
self,
action_name: str,
parameters: dict[str, Any] = {},
):
"""
Executes an action that is in the action space and recorded to the trajectory.
Args:
action_name: Name of the action to execute. Must be in action space.
parameters (dict[str, Any], optional): Parameters for the action. Defaults
to an empty `dict`.
Returns:
Any: The result of the action execution.
Raises:
ActionNotFound: If the action is not found within the environment's action
space.
"""
if action_name not in self._action_map:
logger.error(f'Env "{self.name}": receives unkown action "{action_name}"')
raise ActionNotFound(f"Action {action_name} not found in the environment")
action_handler = self._action_map[action_name]
result = self.take_action(action_handler, parameters)
self.trajectory.append((action_handler.name, parameters, result))
return result
def take_action(
self,
action: Action,
parameters: dict[str, Any] = {},
) -> Any:
"""
Executes an action within the environment.
Args:
action (Action): The action to execute. Can be an action name or an
`Action` object.
parameters (dict[str, Any], optional): Parameters for the action. Defaults
to an empty `dict`.
Returns:
Any: The result of the action execution.
"""
try:
result = self._action_endpoint(action, parameters)
logger.info(
f'Env "{self.name}": action: "{action.name}" successed. '
"result: {result}."
)
return result
except:
logger.exception(
f'Env "{self.name}": action: "{action}" failed:', stack_info=True
)
raise
@timed
def observe(self) -> dict[str, Any]:
"""
Observes the current state.
Returns:
Dict[str, Any]: A dictionary containing the current observations. Keys
represent the names of the observation actions.
"""
result = {o.name: self.take_action(o) for o in self.observation_space}
self.observation_history.append(result)
return result
@timed
def observe_with_prompt(
self, prompt_tools: dict[str, Action]
) -> tuple[dict[str, Any], dict[str, Any]]:
"""
Observes the current state with prompt.
"""
observations = self.observe()
prompts = {}
for ob_name, value in observations.items():
if ob_name in prompt_tools:
action = prompt_tools[ob_name]
key = next(iter(action.get_required_params()))
prompts[ob_name] = self._action_endpoint(action, {key: value})
return observations, prompts
def set_action(self, action: Action) -> None:
"""
Adds an action in the environment's action space, either replace if the action
name exist.
Args:
action (Action): The action to replace or add.
"""
self._action_map[action.name] = action
def start(self) -> None:
"""Starts the environment."""
pass
def close(self) -> None:
"""Closes the environment, performing any necessary cleanup."""
pass
def reset(self) -> None:
"""Resets the environment based on the provided reset action"""
self._action_space = self._origin_action_space
self.action_map = {action.name: action for action in self._action_space}
if self._reset is not None:
self.take_action(self._reset)
@property
def action_space(self) -> list[Action]:
return list(self._action_map.values())
@property
def observation_space(self) -> list[ClosedAction]:
return self._observation_space
def _action_endpoint(self, action: Action, parameters: dict[str, Any]):
"""Rewrite to support different environments."""
if self._client is not None and not action.local:
data = json.dumps(
{
"action": action.to_raw_action(),
"parameters": action.parameters(**parameters).model_dump(),
}
)
content_type = "application/json"
if self._enc_key is not None:
data = encrypt_message(data, self._enc_key)
content_type = "text/plain"
# send action to remote machine
response = self._client.post(
"/raw_action",
content=data,
headers={"Content-Type": content_type},
)
resp_content = response.content.decode("utf-8")
if self._enc_key is not None:
resp_content = decrypt_message(resp_content, self._enc_key)
resp_json = json.loads(resp_content)
return resp_json["action_returns"]
else:
# or directly execute it
action = action.set_kept_param(env=self)
return action.run(**parameters)
def create_environment(config):
if isinstance(config, EnvironmentConfig):
return Environment(**dict(config))
else:
raise ValueError("Unsupported environment config type.")
================================================
FILE: crab/core/exceptions.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
class ActionNotFound(ValueError):
pass
class TaskNotFound(ValueError):
pass
================================================
FILE: crab/core/experiment.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import json
import traceback
from datetime import datetime
from pathlib import Path
from time import sleep
from typing import Literal
from crab.utils.common import base64_to_image
from .agent_policy import AgentPolicy
from .benchmark import Benchmark
from .csv_log import CSVLog
from .models import ActionOutput, MessageType
CURRENT_EXPERIMENT_COLUMNS = [
"step",
"action",
"total_nodes",
"complete_nodes",
"completeness",
"completeness_per_action",
"step_to_complete",
"longest_unfinished_path_length",
"token_usage",
]
MAIN_LOG_COLUMNS = [
"time",
"agent_policy",
"model",
"task_id",
"total_steps",
"terminate_reason",
"total_nodes",
"complete_nodes",
"completeness",
"completeness_per_action",
"step_to_complete",
"longest_unfinished_path_length",
"token_usage",
]
class Experiment:
def __init__(
self,
benchmark: Benchmark,
task_id: str,
agent_policy: AgentPolicy | Literal["human"],
log_dir: Path | None = None,
) -> None:
self.benchmark = benchmark
self.task_id = task_id
self.agent_policy = agent_policy
self.log_dir = log_dir
def write_message(self, message: str, step: int):
with open(self.message_path, "a") as file:
file.write("=" * 20 + f"Step: {step}" + "=" * 20 + "\n" + message + "\n")
def write_task_info_json(self, task_info_path: Path):
envs_info = {}
for name, env in self.benchmark.environment_map.items():
actions = {
name: action.description for name, action in env._action_map.items()
}
observations = {
action.name: action.description for action in env._observation_space
}
envs_info[name] = {
"description": env.description,
"actions": actions,
"observations": observations,
}
task_info = {
"benchmark_name": self.benchmark.name,
"task_id": self.task_id,
"task_description": self.task.description,
"envs": envs_info,
}
with open(task_info_path, "w") as file:
json.dump(task_info, file, indent=4)
def init_log_dir(self):
if self.log_dir is not None:
self.log_dir.mkdir(exist_ok=True, parents=True)
self.main_log = CSVLog(self.log_dir / "main_log.csv", MAIN_LOG_COLUMNS)
self.task_info_dir = self.log_dir / self.task_id
self.task_info_dir.mkdir(exist_ok=True, parents=True)
self.write_task_info_json(self.task_info_dir / "task_info.json")
self.time_now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
self.current_experiment_dir = (
self.task_info_dir / f"{self.agent_policy.__class__.__name__}"
f"({self.agent_policy.get_backend_model_name()})" / self.time_now
)
self.current_experiment_dir.mkdir(parents=True)
self.current_experiment_log = CSVLog(
self.current_experiment_dir / "metrics.csv", CURRENT_EXPERIMENT_COLUMNS
)
self.prompt_path = self.current_experiment_dir / "prompt"
self.image_path = self.current_experiment_dir / "images"
self.prompt_path.mkdir()
self.image_path.mkdir()
self.message_path = self.current_experiment_dir / "messages.txt"
def get_prompt(self) -> dict[str, list[tuple[str, MessageType]]]:
return self.benchmark.observe()
def execute_action(self, response: list[ActionOutput]) -> bool:
for action in response:
benchmark_result = self.benchmark.step(
action=action.name,
parameters=action.arguments,
env_name=action.env,
)
self.metrics = benchmark_result.evaluation_results
if benchmark_result.terminated:
print("\033[92m" f"Task finished, result: {self.metrics}" "\033[0m")
self.write_current_log_row(action)
self.write_main_csv_row(benchmark_result.info["terminate_reason"])
if "exception_detail" in benchmark_result.info:
self.write_exception_detail(
benchmark_result.info["exception_detail"]
)
return True
print(
"\033[92m"
f'Action "{action.name}" in env "{action.env}" success. '
f"Current evaluation results: {self.metrics}\n"
"\033[0m"
)
self.write_current_log_row(action)
self.step_cnt += 1
return False
def log_prompt(self, prompt):
for env in prompt:
with open(self.prompt_path / f"{env}_prompt.md", "a") as prompt_file:
prompt_file.write(f"### Step {self.step_cnt}\n\n")
for message, message_type in prompt[env]:
if message_type == MessageType.IMAGE_JPG_BASE64:
file_name = f"{env}_{self.step_cnt}.png"
base64_to_image(message).save(self.image_path / file_name)
prompt_file.write(f"\n\n")
else:
prompt_file.write(message + "\n\n")
def step(self, it) -> bool:
print("=" * 40)
print(f"Start agent step {self.step_cnt}:")
prompt = self.get_prompt()
self.log_prompt(prompt)
try:
response = self.agent_policy.chat(prompt)
except Exception:
print(traceback.format_exc())
self.write_main_csv_row("agent_exception")
self.write_exception_detail(traceback.format_exc())
return True
# content = response["content"]
# self.write_message(str(content), it)
# print("\033[94m" f"Agent Reponse: {content}" "\033[0m")
print(f"So agent take action: {response}")
return self.execute_action(response)
def start_benchmark(self):
if self.agent_policy == "human":
self.benchmark.human_evaluation(self.task_id)
return
env_description = {}
for env in self.benchmark.environment_map:
env_description[env] = self.benchmark.environment_map[env].description
self.task, action_space = self.benchmark.start_task(self.task_id)
self.agent_policy.reset(
task_description=self.task.description,
action_spaces=action_space,
env_descriptions=env_description,
)
print(
f'Start benchmark "{self.benchmark.name}", task id "{self.task.id}": '
f'"{self.task.description}"'
)
self.init_log_dir()
self.step_cnt = 0
self.metrics = self.benchmark.evaluate()
if self.metrics["complete_nodes"] != 0:
print("Graph Evaluator start with non-zero value. Check environment setup.")
return
for it in range(50):
try:
terminated = self.step(it)
except KeyboardInterrupt:
self.write_main_csv_row("keyboard_interrupt")
return
if terminated:
return
sleep(2)
# input("Press enter to do next step:")
def write_exception_detail(self, exception_info: str):
if self.log_dir is None:
return
with open(self.current_experiment_dir / "exception_detail.txt", "w") as file:
file.write(exception_info)
def write_current_log_row(self, action):
if self.log_dir is None:
return
self.current_experiment_log.write_row(
[
self.step_cnt,
str(action),
self.metrics["total_nodes"],
self.metrics["complete_nodes"],
self.metrics["completeness"],
self.metrics["completeness_per_action"],
self.metrics["step_to_complete"],
self.metrics["longest_unfinished_path_length"],
self.agent_policy.get_token_usage(),
]
)
def write_main_csv_row(self, terminate_reason):
if self.log_dir is None:
return
self.main_log.write_row(
[
self.time_now,
self.agent_policy.__class__.__name__,
self.agent_policy.get_backend_model_name(),
self.task_id,
self.step_cnt,
terminate_reason,
self.metrics["total_nodes"],
self.metrics["complete_nodes"],
self.metrics["completeness"],
self.metrics["completeness_per_action"],
self.metrics["step_to_complete"],
self.metrics["longest_unfinished_path_length"],
self.agent_policy.get_token_usage(),
]
)
================================================
FILE: crab/core/graph_evaluator.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from collections import deque
from typing import Any
import networkx as nx
from .environment import Environment
from .models import Evaluator
class GraphEvaluator:
def __init__(
self,
incoming_graph_data,
enable_shortcut: bool = False,
) -> None:
self.G = nx.DiGraph(incoming_graph_data)
assert nx.is_directed_acyclic_graph(self.G)
self.count: int = 0
self.total_nodes: int = self.G.number_of_nodes()
assert self.total_nodes != 0
self.complete_nodes: int = 0
self.completeness: float = 0.0
self.completeness_per_action: float = 0.0
self.step_to_complete: int = self.G.number_of_edges()
self.longest_unfinished_path_length: int = nx.dag_longest_path_length(self.G)
self.enable_shortcut: bool = enable_shortcut
# Set the sink node for the DAG:
sink_nodes: list[Evaluator] = [
node for node, out_degree in self.G.out_degree() if out_degree == 0
]
if len(sink_nodes) != 1:
raise ValueError("Graph should have exactly one sink node.")
self.sink_node: Evaluator = sink_nodes[0]
self.human_mode = False
self.reset()
def reset(self):
self.count = 0
for node in self.G.nodes():
self.G.nodes[node]["remaining_predecessors"] = self.G.in_degree(node)
self.G.nodes[node]["passing_count"] = None
def step(
self,
envs: dict[str, Environment],
default_env: str = "root",
):
if self.is_complete():
raise ValueError(
"GraphEvaluator has already completed and "
"cannot perform another step."
)
run_evaluators = set()
evaluators = self.get_next_source_nodes()
while evaluators:
for evaluator in evaluators:
if evaluator.local and self.human_mode:
result = True
else:
environment = envs[evaluator.env_name or default_env]
result = environment.take_action(evaluator)
if result:
self.G.nodes[evaluator]["passing_count"] = self.count
self.complete_nodes += 1
for _, out_node in self.G.out_edges(evaluator):
self.G.nodes[out_node]["remaining_predecessors"] -= 1
if self.is_complete():
self.complete_nodes = self.total_nodes
break
run_evaluators.update(evaluators)
evaluators = self.get_next_source_nodes() - run_evaluators
self.update()
def get_next_source_nodes(self) -> set[Evaluator]:
r"""Get next source nodes to evaluate."""
if not self.enable_shortcut:
source_nodes: list[Evaluator] = []
for node in self.G.nodes(data=True):
if (
node[1]["passing_count"] is None
and node[1]["remaining_predecessors"] == 0
):
source_nodes.append(node[0])
else:
source_nodes = list(self.G.nodes())
return set(source_nodes)
def entry(self) -> bool:
return all(count is not None for _, count in self.G.nodes(data="passing_count"))
def update(self):
self.count += 1
self.completeness = float(self.complete_nodes / self.total_nodes)
self.completeness_per_action = self.completeness / self.count
self.step_to_complete = self.calculate_step_to_complete()
self.longest_unfinished_path_length = (
self.calculate_longest_unfinished_path_length()
)
def calculate_longest_unfinished_path_length(self) -> int:
longest_path_length: int = 0
if self.G.nodes[self.sink_node]["passing_count"] is not None:
return longest_path_length
# Initialize set to keep track of visited nodes
visited = set()
# Initialize queue for BFS
queue = deque([[self.sink_node]])
# BFS traversal with path
while queue:
path = queue.popleft()
node = path[0]
# Mark the node as visited
visited.add(node)
longest_path_length = max(len(path), longest_path_length) - 1
# Explore predecessor of the current node
for predecessor in self.G.predecessors(node):
# If predecessor is complete, skip it
if self.G.nodes[predecessor]["passing_count"] is not None:
continue
elif predecessor not in visited:
# Add path with predecessor to queue
queue.append([predecessor] + path)
return longest_path_length
def calculate_step_to_complete(self) -> int:
# Initialize count for incomplete edges
incomplete_edges: int = 0
if self.G.nodes[self.sink_node]["passing_count"] is not None:
return incomplete_edges
# Initialize set to keep track of visited nodes
visited = set()
# Initialize queue for BFS
queue = deque([self.sink_node])
# BFS traversal
while queue:
# Pop node from queue
node = queue.popleft()
# Mark the node as visited
visited.add(node)
incomplete_edges += len(list(self.G.predecessors(node)))
# Explore predecessor of the current node
for predecessor in self.G.predecessors(node):
# If predecessor is complete, skip it
if self.G.nodes[predecessor]["passing_count"] is not None:
continue
elif predecessor not in visited:
# Add predecessor to queue
queue.append(predecessor)
return incomplete_edges
def is_complete(self) -> bool:
return self.G.nodes[self.sink_node]["passing_count"] is not None
def get_completeness(self) -> float:
return self.completeness
def get_completeness_per_action(self) -> float:
return self.completeness_per_action
def get_step_to_complete(self) -> int:
return self.step_to_complete
def get_longest_unfinished_path_length(self) -> int:
return self.longest_unfinished_path_length
def stat(self) -> dict[str, Any]:
return {
"total_nodes": self.total_nodes,
"complete_nodes": self.complete_nodes,
"completeness": self.completeness,
"completeness_per_action": self.completeness_per_action,
"step_to_complete": self.step_to_complete,
"longest_unfinished_path_length": self.longest_unfinished_path_length,
}
def _check_submit(self, environment: Environment) -> bool:
"""
Check if the last action is _submit. If yes, return its result, either return
False.
"""
if not environment.trajectory:
return False
last_action = environment.trajectory[-1]
if last_action[0] != "_submit":
return False
return last_action[2]
def compute_radar_stats(self) -> dict[str, float]:
longest_path_length = nx.dag_longest_path_length(self.G)
return {
"Completeness": float(self.completeness),
"Efficiency": float(self.completeness_per_action),
"Path Completeness Ratio": (
longest_path_length - self.longest_unfinished_path_length
)
/ longest_path_length,
}
@staticmethod
def visualize(evaluators: list["GraphEvaluator"], path: str):
import plotly.graph_objects as go
fig = go.Figure()
for i, evaluator in enumerate(evaluators):
radar_stats = evaluator.compute_radar_stats()
fig.add_trace(
go.Scatterpolar(
r=list(radar_stats.values()),
theta=list(radar_stats.keys()),
fill="toself",
name=f"Graph Evaluator {i}",
)
)
fig.update_layout(
polar=dict(radialaxis=dict(visible=True, range=[0, 1])),
showlegend=True,
)
fig.update_layout(
margin=dict(l=150, r=150, t=150, b=150),
)
fig.write_image(path, scale=12, width=600, height=600)
================================================
FILE: crab/core/models/__init__.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# ruff: noqa: F401
from .action import Action, ClosedAction
from .agent_interface import ActionOutput, BackendOutput, Message, MessageType
from .benchmark_interface import StepResult
from .config import BenchmarkConfig, EnvironmentConfig, VMEnvironmentConfig
from .evaluator import Evaluator
from .task import GeneratedTask, SubTask, SubTaskInstance, Task
__all__ = [
"Action",
"ClosedAction",
"MessageType",
"Message",
"ActionOutput",
"BackendOutput",
"StepResult",
"BenchmarkConfig",
"Task",
"SubTask",
"SubTaskInstance",
"GeneratedTask",
"Evaluator",
"EnvironmentConfig",
"VMEnvironmentConfig",
]
================================================
FILE: crab/core/models/action.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from functools import partial
from inspect import Parameter, Signature, signature
from types import NoneType
from typing import Annotated, Any, Callable, TypeAlias
from docstring_parser import parse
from pydantic import (
AfterValidator,
BaseModel,
ValidationError,
create_model,
model_serializer,
)
from pydantic.fields import FieldInfo
from crab.utils.common import callable_to_base64
try:
from typing import Self
except ImportError:
from typing_extensions import Self
KEPT_PARAMS = ["env"]
EMPTY_MODEL = create_model("Empty")
class Action(BaseModel):
"""
The core operational unit within the Crab system.
This class stores parameters and return type definitions and can be easily converted
into a JSON schema. It supports argument verification and includes a feature for
retaining specific parameters.
Attributes:
name (str): The name of the action.
entry (Callable): The actual entry function of the action.
parameters (type[BaseModel]): Definition of input parameters.
returns (type[BaseModel]): Definition of the return type. Note: The actual
return type is specified by the `returns` attribute in this model.
description (str | None): A clear and concise description of the function's
purpose and behavior. Defaults to None.
kept_params (dict[str, Any]): Parameters retained for internal use by the Crab
system, such as 'env' for storing the current environment. These parameters
do not appear in the `parameters` field and are automatically injected at
runtime. Defaults to an empty dictionary.
env_name (Optinal[str]): Specify the environment the action is associated with.
Defualts to None.
"""
name: str
entry: Callable
parameters: type[BaseModel]
returns: type[BaseModel]
description: str | None = None
kept_params: list[str] = []
env_name: str | None = None
local: bool = False
def __eq__(self, other):
return super().__eq__(other)
def __hash__(self):
return hash(self.entry)
def __call__(self, *args: Any, **kwargs: Any) -> Self:
"""Sets default values for the action.
Direct calling of the action will not actully call the function, yet set
defaults values for the action, so the agent don't need to or only need to
provide part of the parameters.
This method has two mode, full setting and partial setting. Full setting mode is
applied when the user provides positional arguments, where all the required
parameters must be provide and the action parameters will be empty. While if
only keyword arguments are provided, partial setting mode is applied, where the
parameter model will not be changed but only change the default value of the
parameters.
Note:
Full setting mode is not stable.
"""
if args:
# this is closed function
result = self.model_copy(
update={
"entry": partial(self.entry, *args, **kwargs),
"parameters": EMPTY_MODEL,
}
)
if self.description is not None:
result.description = self.description + f" Input: {args} {kwargs}"
return result
else:
# or it should only contain kwargs
for key in kwargs:
# verify the kwargs exist
if key not in self.parameters.model_fields:
raise ValueError(
f'"{key}" is not a parameter of action "{self.name}"'
)
result = self.model_copy(
update={
"entry": partial(self.entry, **kwargs),
}
)
if self.description is not None:
result.description = self.description + f" Input: {args} {kwargs}"
return result
@staticmethod
def _check_combinable(a: "Action", b: "Action") -> None:
if set(a.kept_params) != set(b.kept_params):
raise ValueError("Piped actions should have same kept parameters.")
if a.env_name != b.env_name:
raise ValueError("Piped actions should have same env_name.")
if a.local != b.local:
raise ValueError("Piped actions should have same `local` value.")
def __rshift__(self, other_action: "Action") -> "Action":
"""Uses :obj:`>>` to pipe two actions together to form a new action.
The returned action executes the actions from left to right. The output of the
left action becomes the input to the right action, provided their parameters and
return types are compatible.
"""
required = other_action.get_required_params()
if len(required) != 1:
raise ValueError(
"Return type of the former action must mathces the parameter type "
"of the later action."
)
Action._check_combinable(self, other_action)
a_entry = self.entry
b_entry = other_action.entry
kept_params = self.kept_params.copy()
entry = lambda *args, **kwargs: b_entry(
a_entry(*args, **kwargs),
**{key: kwargs[key] for key in kwargs if key in kept_params},
)
return Action(
name=f"{self.name}_pipe_{other_action.name}",
description=f"First {self.description}. Then use the result of the "
f"former as input, {other_action.description}",
parameters=self.parameters,
returns=other_action.returns,
entry=entry,
kept_params=self.kept_params,
env_name=self.env_name,
local=self.local,
)
def __add__(self, other_action: "Action") -> "Action":
"""Uses :obj:`+` to combine two actions sequetially to form a new action.
The returned action executes the actions from left to right. Its return value
will be the return value of the right action.
Note:
"+" operator only support two action with no required parameters.
"""
self_required = self.get_required_params()
other_required = other_action.get_required_params()
if len(other_required) > 1 or len(self_required) > 1:
raise ValueError(
'"+" operator only support two action with no required parameters.'
)
Action._check_combinable(self, other_action)
a_entry = self.entry
b_entry = other_action.entry
entry = lambda **kwargs: (a_entry(**kwargs), b_entry(**kwargs))[1]
return Action(
name=f"{self.name}_then_{other_action.name}",
description=f"{self.description} Then, {other_action.description}",
parameters=EMPTY_MODEL,
returns=other_action.returns,
entry=entry,
kept_params=self.kept_params,
env_name=self.env_name,
local=self.local,
)
def run(self, **kwargs) -> Any:
"""Varifies the action parameters then runes the action."""
if self.kept_params:
raise RuntimeError("There are unassigned kept parameters.")
try:
kwargs = self.parameters(**kwargs).model_dump()
except ValidationError:
pass # TODO: Exeception handle
return self.entry(**kwargs)
def set_kept_param(self, **params) -> Self:
kept_params = {key: params[key] for key in params if key in self.kept_params}
result = self.model_copy()
result.kept_params = []
result.entry = partial(self.entry, **kept_params)
return result
def get_required_params(self) -> dict[str, FieldInfo]:
return {
name: info
for name, info in self.parameters.model_fields.items()
if info.is_required()
}
@model_serializer
def to_openai_json_schema(self) -> dict:
"""Gets openai json schema from an action"""
return {
"name": self.name,
"description": self.description,
"parameters": self.parameters.model_json_schema(),
# "returns": self.returns.model_json_schema()["properties"]["returns"],
}
def to_raw_action(self) -> dict[str, Any]:
"""Gets serialized action for remote execution"""
return {
"name": self.name,
"dumped_entry": callable_to_base64(self.entry),
"kept_params": list(self.kept_params),
}
@classmethod
def from_function(cls, func: Callable) -> Self:
"""Generates an action from functions annotated by @action."""
if func.__doc__ is None:
# raise RuntimeError("The action must have a Google-style docstring.")
parameters_descriptions = None
func_description = None
return_description = None
else:
docstring = parse(func.__doc__)
parameters_descriptions = {
param.arg_name: param.description for param in docstring.params
}
func_description = docstring.short_description or ""
if docstring.long_description:
func_description += "\n" + docstring.long_description
if docstring.returns:
return_description = docstring.returns.description
else:
return_description = None
sign = signature(func)
params = sign.parameters
fields = {}
kept_params = []
for param_name, p in params.items():
# Don't add kept parameters in prameters' model
if param_name in KEPT_PARAMS:
kept_params.append(param_name)
continue
# Variable parameters are not supported
if p.kind in [Parameter.VAR_POSITIONAL, Parameter.VAR_KEYWORD]:
continue
# If the parameter type is not specified, it defaults to typing.Any
annotation = Any if p.annotation is Parameter.empty else p.annotation
# Check if the parameter has a description
param_description = None
if parameters_descriptions is not None:
param_description = parameters_descriptions.get(param_name, None)
# Check if the parameter has a default value
if p.default is Parameter.empty:
fields[param_name] = (
annotation,
FieldInfo(description=param_description),
)
else:
fields[param_name] = (annotation, FieldInfo(default=p.default))
model: type[BaseModel] = create_model(func.__name__, **fields) # type: ignore
# insert return to parameters
return_annotation = (
Any if sign.return_annotation == Signature.empty else sign.return_annotation
)
return_model: type[BaseModel] = create_model(
func.__name__ + "_return",
returns=(
return_annotation or NoneType,
FieldInfo(description=return_description, init=False), # type: ignore
),
)
action = cls(
name=func.__name__,
entry=func,
parameters=model,
returns=return_model,
description=func_description,
kept_params=kept_params,
)
return action
def _check_no_param(action: Action) -> Action:
if len(action.get_required_params()) != 0:
raise ValueError("ClosedAction should not accept any parameter.")
return action
ClosedAction: TypeAlias = Annotated[Action, AfterValidator(_check_no_param)]
"""The action type alias with no reuqired parameters"""
================================================
FILE: crab/core/models/agent_interface.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from enum import IntEnum
from typing import Any
from pydantic import BaseModel
from .action import Action
class MessageType(IntEnum):
TEXT = 0
IMAGE_JPG_BASE64 = 1
Message = tuple[str, MessageType]
class ActionOutput(BaseModel):
name: str
arguments: dict[str, Any]
env: str | None = None
class BackendOutput(BaseModel):
message: str | None
action_list: list[ActionOutput] | None
class EnvironmentInfo(BaseModel):
description: str
action_space: list[Action]
================================================
FILE: crab/core/models/benchmark_interface.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from typing import Any
from pydantic import BaseModel
class StepResult(BaseModel):
truncated: bool
terminated: bool
action_returns: Any
evaluation_results: dict[str, Any]
info: dict[str, Any]
================================================
FILE: crab/core/models/config.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from typing import Any
from pydantic import BaseModel
from .action import Action, ClosedAction
from .task import Task
class EnvironmentConfig(BaseModel):
name: str
action_space: list[Action]
observation_space: list[ClosedAction]
description: str = ""
reset: Action | None = None
remote_url: str | None = None
extra_attributes: dict[str, Any] = {}
class VMEnvironmentConfig(BaseModel):
inside_environment: EnvironmentConfig
remote_url: str = "http://192.168.0.0:8000"
class BenchmarkConfig(BaseModel):
name: str
tasks: list[Task]
environments: list[EnvironmentConfig]
default_env: str | None = None
multienv: bool = False
prompting_tools: dict[str, dict[str, Action]] = {}
root_action_space: list[Action] = []
step_limit: int = 30
common_setup: list[ClosedAction] = []
================================================
FILE: crab/core/models/evaluator.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from pydantic import BaseModel, field_validator
from .action import Action
class Evaluator(Action):
require_submit: bool = False
@field_validator("returns", mode="after")
@classmethod
def must_return_bool(cls, v: type[BaseModel]) -> type[BaseModel]:
if v.model_fields["returns"].annotation is not bool:
raise ValueError("Evaluator must return bool.")
return v
def __and__(self, other: "Evaluator") -> "Evaluator":
Action._check_combinable(self, other)
result = self.model_copy()
result.name = (f"{self.name}_and_{other.name}",)
result.description = f"{self.description} In the same time, {other.description}"
self_entry = self.entry
other_entry = other.entry
result.entry = lambda: self_entry() and other_entry()
return result
def __or__(self, other: "Evaluator") -> "Evaluator":
Action._check_combinable(self, other)
result = self.model_copy()
result.name = (f"{self.name}_or_{other.name}",)
result.description = (
f"{self.description} If the previous one fails {other.description}"
)
self_entry = self.entry
other_entry = other.entry
result.entry = lambda: self_entry() or other_entry()
return result
def __invert__(self) -> "Evaluator":
result = self.model_copy()
result.name = f"not_{self.name}"
result.description = (
f"Check if the following description is False. {self.description}"
)
self_entry = self.entry
result.entry = lambda: not self_entry()
return result
================================================
FILE: crab/core/models/task.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from typing import Any, Callable, Literal
from uuid import uuid4
import networkx as nx
from pydantic import (
BaseModel,
ConfigDict,
Field,
field_validator,
model_serializer,
)
from .action import Action, ClosedAction
from .evaluator import Evaluator
class Task(BaseModel):
model_config = ConfigDict(arbitrary_types_allowed=True)
id: str
description: str
evaluator: nx.DiGraph | Evaluator
setup: list[ClosedAction] | ClosedAction = []
teardown: list[ClosedAction] | ClosedAction = []
extra_action: list[Action] = []
@field_validator("evaluator")
@classmethod
def change_evaluator_to_graph(cls, evaluator: nx.DiGraph | Evaluator) -> str:
if isinstance(evaluator, Evaluator):
graph = nx.DiGraph()
graph.add_node(evaluator)
return graph
return evaluator
@field_validator("setup", "teardown")
@classmethod
def to_list(cls, action: Action | list[Action]) -> list[Action]:
if isinstance(action, Action):
return [action]
return action
class SubTask(BaseModel):
id: str
description: str
attribute_dict: dict[str, list[str] | str]
output_type: str
output_generator: Callable[[Any], str] | Literal["manual"] | None = None
evaluator_generator: Callable[[Any], nx.DiGraph] | None = None
setup: list[ClosedAction] | ClosedAction = []
teardown: list[ClosedAction] | ClosedAction = []
extra_action: list[Action] = []
def __hash__(self) -> int:
return hash(self.id)
@field_validator("attribute_dict")
@classmethod
def expand_attribute_type(
cls,
attribute_dict: dict[str, list[str] | str],
) -> dict[str, list[str]]:
attribute_dict = attribute_dict.copy()
for key in attribute_dict:
if isinstance(attribute_dict[key], str):
attribute_dict[key] = [attribute_dict[key]]
return attribute_dict
class SubTaskInstance(BaseModel):
task: SubTask
attribute: dict[str, Any]
output: str | None = None
id: str = Field(default_factory=uuid4)
def __hash__(self) -> int:
return hash(self.id)
@model_serializer
def dump_model(self) -> dict[str, Any]:
return {
"task": self.task.id,
"attribute": self.attribute,
"output": self.output,
}
class GeneratedTask(BaseModel):
description: str
tasks: list[SubTaskInstance]
adjlist: str
id: str = Field(default_factory=uuid4)
================================================
FILE: crab/core/task_generator.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# ruff: noqa: E501
import argparse
import importlib
import itertools
import json
import os
import random
from pathlib import Path
import networkx as nx
import yaml
from openai import OpenAI
from termcolor import colored
from .models import GeneratedTask, SubTask, SubTaskInstance, Task
SYSTEM_PROMPT_SINGLE = """
You are a wise operator who is familiar with both the Ubuntu and Android operating
systems. Our goal is to use the output of the source task as the input for the target
task. You should describe of the task they combined together using several imperative
sentences. You cannot provide any extra information such as detailed operation method,
yet only combined the taks description together in a reasonable way. You shouldn't fill
in the input attribute wrapped by curly brackets.
Source task:
Find out the city located at coordinate (8.65759263086632, 7.520403498426244) via Google Maps.
Target task:
Set the screen background as the first figure of {city_name} in Google.
Answer:
Using Google Maps, find the city located at coordinates (8.65759263086632,7.520403498426244), search Google for the first image of that city, and set this image as the desktop background on an Ubuntu system.
"""
USER_PROMPT_SINGLE = """
Source task:
{task1}
Target task:
{task2}
Answer:
"""
SELECT_USER_START = """
Source attribute:
{source_task}
Target tasks:
{target_tasks}
Select a task from target tasks
Answer:
"""
SELECT_SYSTEM_PROMPT = """
You are a wise operator who is familiar with both the Ubuntu and Android operating
systems. Our goal is to use the output of the source task as the input for the target
task. You should identify the most reasonable target task from the list, explain why you
choose it, and output the description of the task they combined together using several
imperative sentences. It is crucial to establish a connection between the source and
target tasks and select the best one as the output. Remember, you must select at least
one with the crucial output format. You must include the provided value and every
details in each task. You must use "======" to seperate each part (selected task number,
combined task description, and explanation) Here is an example:
Source task:
Find out the city located at coordinate (8.65759263086632, 7.520403498426244) via Google Maps.
Target tasks:
Task 0: Set the screen background as the first figure of {input attribute} in Google.
Task 1: Close the progress of {input attribute} app via task manager.
Task 2: Download {input attribute} from the app store.
Task 3: Create a PowerPoint with one page containing Mount Alps.jpg and named as {input attribute 2}.
Task 4: Send message {input attribute 1} to +81 09074540472.
Answer:
0
======
Using Google Maps, find the city located at coordinates (8.65759263086632,7.520403498426244), search Google for the first image of that city, and set this image as the desktop background on an Ubuntu system.
======
This task is the most relevant and directly utilizes the output of the source task.
Finding the city provides us with a specific location which can easily lead to a visual
representation. Searching for an image of the city to set as a background is a practical
application that visually celebrates the discovery of the city's identity.
"""
SELECT_USER_PROMPT = """
Source task:
{source_task}
Target tasks:
{target_tasks}
Answer:
"""
class TaskGenerator:
"""Class to generate tasks based on a directed graph of subtasks."""
def __init__(
self, attribute_pool: dict[str, list] = {}, subtasks: list[SubTask] = []
):
"""
Initializes the TaskGenerator object.
Parameters:
attribute_pool (dict): A dictionary mapping attribute types to lists of possible values.
subtasks (list): A list of SubTask objects to be included in the task generation graph.
"""
self.G = nx.DiGraph()
self.attribute_pool = attribute_pool
self.graph_generation(subtasks)
self.task_mapping = {task.id: task for task in subtasks}
if not os.getenv("OPENAI_API_KEY"):
os.environ["OPENAI_API_KEY"] = "EMPTY"
self.client = OpenAI()
@classmethod
def from_config(cls, config_path: str) -> "TaskGenerator":
"""
Class method to create a TaskGenerator instance from a configuration file.
Parameters:
config_path (str): Path to the YAML configuration file.
Returns:
TaskGenerator: An instance of TaskGenerator.
"""
with open(config_path, "r") as f:
data = yaml.safe_load(f)
subtask_data = data["subtask"]
attribute_pool = data["attribute_pool"]
subtask_list = [
SubTask(
id=subtask["id"],
description=subtask["description"],
attribute_dict={
key: subtask["attribute_dict"][key].split("/")
for key in subtask["attribute_dict"]
},
output_type=subtask["output_type"],
)
for subtask in subtask_data
]
return cls(attribute_pool, subtask_list)
def graph_generation(self, subtask_list: list[SubTask]) -> None:
"""Generates a directed graph from a list of subtasks based on output and input types."""
self.G.add_nodes_from(subtask_list)
for input_node in self.G.nodes:
for output_node in self.G.nodes:
for name, type_list in output_node.attribute_dict.items():
for type in type_list:
if type == input_node.output_type:
self.G.add_edge(
input_node, output_node, attribute_name=name
)
def combine(self, current_description: str, target_description: str) -> str:
"""
Combines two task descriptions into a single task description using GPT model.
Parameters:
current_description (str): The current task description.
target_description (str): The target task description to combine.
Returns:
str: The combined task description.
"""
user_content = USER_PROMPT_SINGLE.format(
task1=current_description, task2=target_description
)
response = self.client.chat.completions.create(
messages=[
{"role": "system", "content": SYSTEM_PROMPT_SINGLE},
{"role": "user", "content": user_content},
],
model="gpt-4-turbo-preview",
)
return response.choices[0].message.content
def gpt_choice(
self,
current_description: str,
outgoing_edges: list[tuple[SubTask, SubTask, str]],
) -> tuple[SubTask, dict[str, str], str, str]:
"""
Determines the best task choice from a list of possible target tasks using GPT model.
Parameters:
current_description (str): Description of the current task.
outgoing_edges (list): List of possible outgoing edges representing target tasks.
Returns:
tuple: A tuple containing the chosen SubTask, attributes, new description, and combined description.
"""
target_neighbours = ""
selected_attributes = []
new_descriptions = []
for idx, edge in enumerate(outgoing_edges):
_, node, attribute_name = edge
attributes = self._fill_task_attributes(node, attribute_name)
selected_attributes.append(attributes)
kwargs = attributes.copy()
kwargs[attribute_name] = "{" + attribute_name + "}"
new_description = node.description.format(**kwargs)
new_descriptions.append(new_description)
target_neigh
gitextract_jurvigyb/
├── .github/
│ ├── ISSUE_TEMPLATE/
│ │ ├── bug_report.yml
│ │ ├── feature_request.yml
│ │ └── questions.yml
│ ├── actions/
│ │ └── crab_install/
│ │ └── action.yml
│ └── workflows/
│ ├── documentation.yml
│ ├── publish_release.yml
│ └── pytest_package.yml
├── .gitignore
├── .pre-commit-config.yaml
├── README.md
├── crab/
│ ├── __init__.py
│ ├── actions/
│ │ ├── android_actions.py
│ │ ├── crab_actions.py
│ │ ├── desktop_actions.py
│ │ ├── file_actions.py
│ │ ├── system_actions.py
│ │ └── visual_prompt_actions.py
│ ├── agents/
│ │ ├── backend_models/
│ │ │ ├── __init__.py
│ │ │ ├── camel_model.py
│ │ │ ├── claude_model.py
│ │ │ ├── gemini_model.py
│ │ │ └── openai_model.py
│ │ ├── policies/
│ │ │ ├── __init__.py
│ │ │ ├── multi_agent_by_env.py
│ │ │ ├── multi_agent_by_func.py
│ │ │ └── single_agent.py
│ │ └── utils.py
│ ├── benchmarks/
│ │ ├── __init__.py
│ │ └── template.py
│ ├── core/
│ │ ├── __init__.py
│ │ ├── agent_policy.py
│ │ ├── backend_model.py
│ │ ├── benchmark.py
│ │ ├── csv_log.py
│ │ ├── decorators.py
│ │ ├── environment.py
│ │ ├── exceptions.py
│ │ ├── experiment.py
│ │ ├── graph_evaluator.py
│ │ ├── models/
│ │ │ ├── __init__.py
│ │ │ ├── action.py
│ │ │ ├── agent_interface.py
│ │ │ ├── benchmark_interface.py
│ │ │ ├── config.py
│ │ │ ├── evaluator.py
│ │ │ └── task.py
│ │ └── task_generator.py
│ ├── environments/
│ │ ├── __init__.py
│ │ └── template.py
│ ├── server/
│ │ ├── __init__.py
│ │ ├── api.py
│ │ ├── config.py
│ │ ├── exception_handlers.py
│ │ ├── logger.py
│ │ ├── main.py
│ │ ├── middleware.py
│ │ └── utils.py
│ └── utils/
│ ├── __init__.py
│ ├── common.py
│ ├── encryption.py
│ └── measure.py
├── crab-benchmark-v0/
│ ├── README.md
│ ├── __init__.py
│ ├── android_env.py
│ ├── dataset/
│ │ ├── android/
│ │ │ ├── 1005c437-50d1-465a-b3fc-833098b22bfc.json
│ │ │ ├── 12333aa0-e76d-4a5c-8657-9f897f62f62d.json
│ │ │ ├── 22b04776-8eec-4303-b3f6-9c981f7f29b8.json
│ │ │ ├── 2ade6a13-c7a6-4df7-8c62-77382687369e.json
│ │ │ ├── 346caf7c-dc74-4c38-962a-aaffb638e0c7.json
│ │ │ ├── 379b9c58-5125-41b3-9cc6-ea925c8b094d.json
│ │ │ ├── 4190c90c-b28c-4bb3-ab5c-af3c4fde0a3d.json
│ │ │ ├── 46d7ccdb-d2e4-4b8a-bead-f2641b5ac23c.json
│ │ │ ├── 483fbf9c-dc78-4ac2-9264-53c4f617f6cc.json
│ │ │ ├── 4893a9b0-6477-495d-a73c-32503326e24a.json
│ │ │ ├── 53010c40-dce4-4d72-a856-842c21059e2b.json
│ │ │ ├── 6d9f6395-de79-4ad0-8a2a-2d674f93f293.json
│ │ │ ├── 71ef7fd2-0ae3-49c8-8238-06b7aa985d25.json
│ │ │ ├── 73f78fc3-1ca5-442d-801f-bc175a0bfb89.json
│ │ │ ├── 764838cc-9359-4130-9bb2-4a75900b2d89.json
│ │ │ ├── 77289141-e52b-48c8-b3a7-1b29520f3e1e.json
│ │ │ ├── 7891ceab-7965-4ddb-a0fc-15740c9a4e44.json
│ │ │ ├── 8bd51440-f959-4edc-baa5-cd03d32a5b0f.json
│ │ │ ├── 94b1836b-3111-40ad-8d07-b8a57efe7438.json
│ │ │ ├── a225f7f8-6d03-4619-b57d-7a08610030d8.json
│ │ │ ├── b077299d-1acb-40f5-89f3-cc08044345bf.json
│ │ │ ├── b3965b07-4683-4445-9de1-a1dedf6c73ad.json
│ │ │ ├── c1b1cfeb-40e7-49a8-a3f5-b8c8ba723601.json
│ │ │ ├── c85f03c9-83c4-417b-93d9-0d7b41022525.json
│ │ │ ├── cf4c496b-fbbd-4701-91ea-4590fe6a66e1.json
│ │ │ ├── d0811e47-d75f-40ce-b34b-e1ee3c8bed3f.json
│ │ │ ├── d2d456bb-c7d1-46af-8263-78d8509fb320.json
│ │ │ ├── d4e0f2b3-d0ff-4efd-856f-9f5e598cfd05.json
│ │ │ ├── d7489d00-0046-4fb1-af5b-1fde7d87312c.json
│ │ │ ├── d92f6c33-e0a7-4101-957d-e7dd218d2565.json
│ │ │ ├── de843952-df8f-4a26-bae9-d0a32ed9a7f5.json
│ │ │ ├── e20fd121-b981-42da-94de-efcd66889c11.json
│ │ │ ├── e55d7a39-7b6b-4852-8711-844cebc88cb8.json
│ │ │ ├── e9268070-91b7-4e8c-9976-1cf8126ba13b.json
│ │ │ ├── fbe6a1b1-63bb-4d4e-8a53-ff4f7839ef61.json
│ │ │ └── fc642cb6-5321-4966-afbf-fb3348bb69ee.json
│ │ ├── android_subtasks.py
│ │ ├── cross/
│ │ │ ├── 05a7633d-b966-471c-8848-e18e69ad265f.json
│ │ │ ├── 1e92db38-501e-429b-ac31-453d1af10a25.json
│ │ │ ├── 43be6e8e-034d-4277-8346-c4ae7553bf68.json
│ │ │ ├── 534be964-269a-4509-b2b8-28cc3ba8dfca.json
│ │ │ ├── 6f95cfa1-e7ae-4a82-912b-0180fc9622f2.json
│ │ │ ├── 760ed27e-b1bd-451f-8659-bdb9845fcb7f.json
│ │ │ ├── 82596760-7d4d-457d-9ca9-9551ab85ec58.json
│ │ │ ├── a956a091-8de4-42ee-b152-913308dfc24b.json
│ │ │ ├── c5929ef3-ac27-4288-b02f-4f261d5871f9.json
│ │ │ └── da5911e3-1a99-4735-ba3e-f08c5ca81fdd.json
│ │ ├── handmade_tasks.py
│ │ ├── ubuntu/
│ │ │ ├── 05d0e137-7d97-4021-9477-6490a2154c81.json
│ │ │ ├── 0a893c2e-eec5-47cc-a930-eb01c5f17683.json
│ │ │ ├── 0d178388-8166-4b66-93c1-278861f9897c.json
│ │ │ ├── 0d7c84d2-bbbd-46ab-80d1-52b3a44f3858.json
│ │ │ ├── 0deafe05-8db5-445f-9031-f6e884569d03.json
│ │ │ ├── 0e80fd90-0b23-454f-a629-7b6d7baa7542.json
│ │ │ ├── 125f7bae-e931-4190-8737-5f1ea7227772.json
│ │ │ ├── 15a150a8-899c-4753-8dc5-05248ccc3640.json
│ │ │ ├── 1ebcd710-f73b-4022-832b-167c0d3f55a2.json
│ │ │ ├── 22787ecc-52b2-4791-aefb-c45800f51414.json
│ │ │ ├── 22f05f6f-6aef-4786-958f-14f559eaf014.json
│ │ │ ├── 28963795-d694-4bb4-adaf-f7708a2c6fe5.json
│ │ │ ├── 299db8f2-81eb-455f-9302-5c8cb30be691.json
│ │ │ ├── 29f099b2-b3a5-463f-b10a-15363bf7e845.json
│ │ │ ├── 355e9660-a355-4b95-8881-ac9da578ea43.json
│ │ │ ├── 35bd7387-4735-4632-8474-e93382004c12.json
│ │ │ ├── 362c5711-3824-42ff-96a0-7801b03b5f1f.json
│ │ │ ├── 4718df9c-97ec-4b54-86ca-bd34e65c5a43.json
│ │ │ ├── 47b75b21-99a2-461c-9d40-6dddc5c206d0.json
│ │ │ ├── 4ae4e35f-d90a-48cc-8fb9-492ac7ae07ee.json
│ │ │ ├── 4bbedade-4d4e-43d5-b650-2702b350ad28.json
│ │ │ ├── 51a288f9-cf2c-4e8e-a98c-596a505af77c.json
│ │ │ ├── 51c91051-3efb-4e92-a967-739b18520714.json
│ │ │ ├── 57b7e8a7-8c17-4cc4-9bb5-4385afde3ad8.json
│ │ │ ├── 58776443-ccf7-4db3-8c60-e188e4b5f90c.json
│ │ │ ├── 5ba74c6a-4513-448b-8b68-ff145ece0652.json
│ │ │ ├── 6428f803-62de-40d2-a345-64e6cf955c9d.json
│ │ │ ├── 64a2c205-c85a-4e56-8edb-5df4f7724441.json
│ │ │ ├── 696ca9bb-89ea-4cd5-b693-f2d749d964b1.json
│ │ │ ├── 6be49e77-e904-4eb0-a36a-7f0fd128ede3.json
│ │ │ ├── 6c3105a2-328c-4190-823d-03d759be0b57.json
│ │ │ ├── 6c560516-ca14-4f97-b51d-16ad81fc29e4.json
│ │ │ ├── 730172f5-894a-4d46-9102-ac7d985a479d.json
│ │ │ ├── 73038efb-ca0f-4d90-a947-fcfd097dd91b.json
│ │ │ ├── 73da97c9-f084-4cab-8697-1151737387ff.json
│ │ │ ├── 77aa4dd3-5a68-4686-9cac-26d0ab77c7b4.json
│ │ │ ├── 78502f1c-879b-4932-a5fd-d85f7f6b0f81.json
│ │ │ ├── 7912f7a5-24b9-4dfe-a7b8-1effc1b7a212.json
│ │ │ ├── 7d5613ec-9b67-4255-b766-d9c6e8466464.json
│ │ │ ├── 7dda7e46-78be-4663-b882-6132dbbff335.json
│ │ │ ├── 7e6c4927-2220-4522-9e3f-36f69adc3e71.json
│ │ │ ├── 82c49e12-3b2f-432e-9069-4b67bafebbf7.json
│ │ │ ├── 87910f23-ab23-4ccc-b115-d71cff6f0162.json
│ │ │ ├── 8afc25eb-7a80-459f-acdc-5c79fc146c29.json
│ │ │ ├── 8cb5ab6d-a56e-43b9-aa83-00a46331e20f.json
│ │ │ ├── 90e09946-7b28-4102-b0ed-f683c01dbbd4.json
│ │ │ ├── 925a3607-2802-48aa-b339-13ebfcef43a2.json
│ │ │ ├── 9506dd30-f58d-4832-b336-8037e83e2689.json
│ │ │ ├── 95e347aa-56ab-4d5d-a94c-350ddfddabf9.json
│ │ │ ├── 98a360d8-0f95-44cd-bb9d-442fca2918d4.json
│ │ │ ├── 9c979fc5-8d60-41f1-a494-904a1d312187.json
│ │ │ ├── 9e08971c-7f83-4853-952e-4c4a4a26333b.json
│ │ │ ├── 9fe4f541-61cf-48e0-a081-4371786659c7.json
│ │ │ ├── a0714ef7-bbdc-4f84-bd2e-c6e611d4db9e.json
│ │ │ ├── a2a34580-cded-4bf8-81d9-b36a4d4402d0.json
│ │ │ ├── a6b67c2d-d448-4e77-904e-dc7c5f21a5fe.json
│ │ │ ├── a70ab903-835f-48b7-8356-2321b8b869d8.json
│ │ │ ├── a78177f5-6cc6-48d7-8c6f-df53399d7759.json
│ │ │ ├── abb16512-27ae-49c0-b12b-7fbf0e95056b.json
│ │ │ ├── b2ca21dc-dde9-49f5-bec7-321fbf769315.json
│ │ │ ├── b57c96c1-071b-40f6-b33b-2a0459fc25bb.json
│ │ │ ├── b73019e0-3ce8-4657-8b13-b3e0ab6cfac8.json
│ │ │ ├── ba5aebcb-999d-44d4-b9bc-241f9884c6dd.json
│ │ │ ├── be6468be-2218-45c1-9b75-b56efec61eb4.json
│ │ │ ├── c4106f9a-9348-4a55-9892-782e6f4b3081.json
│ │ │ ├── c8800e50-3ff4-4dd2-bc90-33688be99659.json
│ │ │ ├── ccf31785-ec13-4981-93c5-ca6c242ac0c3.json
│ │ │ ├── d3478489-70f2-4a82-b7d2-0a47b75986eb.json
│ │ │ ├── d39d40b1-fc26-4169-9d6f-cdf81efe9a3e.json
│ │ │ ├── d3c917ff-406f-447a-87f5-b8d835cba750.json
│ │ │ ├── d6e460e4-c295-40ad-883c-11300d7832f0.json
│ │ │ ├── d9e4e23c-2a2a-4b5c-b034-7deb6036572d.json
│ │ │ ├── e31d4e3b-b753-4deb-b9ad-a0add5d4790e.json
│ │ │ ├── f07a1f32-2f3f-40e7-b12f-8f1b128c41f6.json
│ │ │ ├── f5cce3a0-ba65-4317-95f8-1fc7d9776c78.json
│ │ │ ├── f67a26e4-58dd-4dc6-8859-affbf1d62f94.json
│ │ │ └── f96d7c34-9543-4679-a6ea-89e0c2ef7b1c.json
│ │ └── ubuntu_subtasks.py
│ ├── main.py
│ ├── scripts/
│ │ └── ubuntu_env_init.sh
│ └── ubuntu_env.py
├── docs/
│ ├── Makefile
│ ├── conf.py
│ ├── crab.benchmarks.rst
│ ├── crab.client.rst
│ ├── crab.core.models.rst
│ ├── crab.core.rst
│ ├── crab.environments.rst
│ ├── crab.rst
│ ├── crab.server.controller.rst
│ ├── crab.server.rst
│ ├── crab_benchmark_v0/
│ │ ├── environment_gcp_setup.md
│ │ ├── environment_local_setup.md
│ │ └── get_started.md
│ ├── get_started/
│ │ ├── build_your_own_benchmark.md
│ │ └── quickstart.md
│ ├── index.rst
│ ├── make.bat
│ └── modules.rst
├── examples/
│ ├── multi_env.py
│ └── single_env.py
├── licenses/
│ ├── LICENSE
│ ├── license_template.txt
│ └── update_license.py
├── pyproject.toml
└── test/
├── actions/
│ └── test_visual_prompt_actions.py
├── agents/
│ ├── backend_models/
│ │ ├── test_camel_model.py
│ │ ├── test_claude_model.py
│ │ ├── test_gemini_model.py
│ │ └── test_openai_model.py
│ └── policies/
│ ├── test_multi_agent_by_func.py
│ ├── test_mutli_agent_by_env.py
│ └── test_single_agent.py
├── core/
│ ├── test_action.py
│ ├── test_benchmark.py
│ ├── test_evaluator.py
│ └── test_utils.py
└── server/
└── test_api.py
SYMBOL INDEX (463 symbols across 62 files)
FILE: crab-benchmark-v0/dataset/android_subtasks.py
function get_xml_etree (line 27) | def get_xml_etree(env) -> _Element | None:
function check_contain_input_text (line 36) | def check_contain_input_text(text: str, env) -> bool:
function check_contain_input_text_multiple (line 45) | def check_contain_input_text_multiple(text: str, env) -> bool:
function check_contain_contact (line 54) | def check_contain_contact(name: str, env) -> bool:
function check_current_package_name (line 90) | def check_current_package_name(name: str, env) -> bool:
function check_ocr_results (line 98) | def check_ocr_results(text: str, env) -> bool:
function check_current_message_page (line 103) | def check_current_message_page(title: str, env) -> bool:
function check_message_text_box_contain (line 117) | def check_message_text_box_contain(text: str, env) -> bool:
function check_message_text_box_empty (line 131) | def check_message_text_box_empty(env) -> bool:
function check_send_message (line 147) | def check_send_message(title: str, message: str, env) -> bool:
function check_note_content (line 166) | def check_note_content(content: str, env) -> bool:
function check_bluetooth_name (line 188) | def check_bluetooth_name(content: str, env) -> bool:
function check_map_direction_page (line 201) | def check_map_direction_page(from_des: str, to_des: str, env) -> bool:
function check_dial_number (line 215) | def check_dial_number(phone_number: str, env) -> bool:
function check_calendar_registered (line 229) | def check_calendar_registered(date: str, content: str, env) -> bool:
function check_drive_registered (line 250) | def check_drive_registered(content: str, env) -> bool:
function check_contact_registered (line 268) | def check_contact_registered(mail: str, name: str, env) -> bool:
function check_calling_number (line 287) | def check_calling_number(phone_number: str, env) -> bool:
function check_google_tasks_name (line 303) | def check_google_tasks_name(target: str, env) -> bool:
function check_date (line 320) | def check_date(target: str, env) -> bool:
function check_city_clock (line 340) | def check_city_clock(place_name: str, env) -> bool:
function check_event (line 357) | def check_event(date: str, env) -> bool:
function check_event_registered (line 374) | def check_event_registered(date: str, content: str, env) -> bool:
function check_location (line 395) | def check_location(content: str, env) -> bool:
function check_contain_city (line 406) | def check_contain_city(number: str, city: str, env) -> bool:
function check_file (line 426) | def check_file(content: str, env) -> bool:
function check_mail_sent (line 469) | def check_mail_sent(mail: str, content: str, env) -> bool:
function distance_evaluator_generator (line 503) | def distance_evaluator_generator(place_name_1: str, place_name_2: str):
function mail_evaluator_generator (line 513) | def mail_evaluator_generator(mail: str, content: str):
function contact_evaluator_generator (line 523) | def contact_evaluator_generator(mail: str, name: str):
FILE: crab-benchmark-v0/dataset/handmade_tasks.py
function check_calendar_in_today (line 39) | def check_calendar_in_today(env) -> bool:
function get_file_bullet_points (line 61) | def get_file_bullet_points(file_path: str) -> int | None:
function check_blluet_point_match_calendar (line 82) | def check_blluet_point_match_calendar(file_path: str, env) -> bool:
function check_node_exist (line 91) | def check_node_exist(node_query: str, env) -> bool:
function check_new_jpg_files_in_dir (line 102) | def check_new_jpg_files_in_dir(directory) -> bool:
function check_text_list_in_current_window_name (line 119) | def check_text_list_in_current_window_name(texts: list[str]) -> bool:
function check_keep_notes_content (line 133) | def check_keep_notes_content(text: str, env) -> bool:
function check_keep_notes_contain_fd (line 154) | def check_keep_notes_contain_fd(env) -> bool:
function check_alarm_contains (line 178) | def check_alarm_contains(time: str, env) -> bool:
function check_tap_text (line 192) | def check_tap_text(text: str, env) -> bool:
function summarize_ubuntu_evaluator (line 207) | def summarize_ubuntu_evaluator():
function check_calendar_evaluator (line 218) | def check_calendar_evaluator():
function evaluator_97e6f333 (line 228) | def evaluator_97e6f333():
function evaluator_82efbd82 (line 248) | def evaluator_82efbd82():
function evaluator_515a5467 (line 264) | def evaluator_515a5467():
function evaluator_5a1eba49 (line 288) | def evaluator_5a1eba49():
function evaluator_c347f78a (line 302) | def evaluator_c347f78a():
function evaluator_bf83c176 (line 316) | def evaluator_bf83c176():
function evaluator_74bb11dd (line 347) | def evaluator_74bb11dd():
function evaluator_ca79febf (line 381) | def evaluator_ca79febf():
function evaluator_dfabf84c (line 404) | def evaluator_dfabf84c():
function evaluator_aab5555e (line 419) | def evaluator_aab5555e():
function get_root_usage (line 450) | def get_root_usage() -> str:
function check_contain_input_text_and_get_df_result (line 459) | def check_contain_input_text_and_get_df_result(text: str, env) -> bool:
function evaluator_fd0576be (line 473) | def evaluator_fd0576be():
function evaluator_7e08f7d4 (line 483) | def evaluator_7e08f7d4():
function evaluator_4957e964 (line 495) | def evaluator_4957e964():
FILE: crab-benchmark-v0/dataset/ubuntu_subtasks.py
class ImageMatcher (line 43) | class ImageMatcher:
method __init__ (line 49) | def __init__(self, top_k: int = 4096):
method warp_corners_and_draw_matches (line 61) | def warp_corners_and_draw_matches(
method _get_bounding_box (line 119) | def _get_bounding_box(
method _resize_image (line 146) | def _resize_image(
method get_resizing_functions (line 176) | def get_resizing_functions(
method match_images (line 193) | def match_images(
method load_and_convert_image (line 256) | def load_and_convert_image(self, filepath: str) -> np.ndarray:
function from_env_load_and_save_file (line 278) | def from_env_load_and_save_file(env, file_path, output_dir="/tmp/local_s...
function crop_image (line 322) | def crop_image(img: np.ndarray, bbox: List[int]) -> np.ndarray:
function calculate_bbox_center (line 337) | def calculate_bbox_center(bbox: List[int]) -> Tuple[int, int]:
function is_bbox_in_direction (line 353) | def is_bbox_in_direction(bbox_1: List[int], bbox_2: List[int], direction...
function ocr_text_matching (line 381) | def ocr_text_matching(
function convert_file_to_images (line 414) | def convert_file_to_images(file_path: str) -> List[str]:
function cleanup_files (line 473) | def cleanup_files(files: List[str]):
function is_valid_url (line 484) | def is_valid_url(url):
function is_valid_image_data_uri (line 498) | def is_valid_image_data_uri(uri):
function is_github_repo_url (line 507) | def is_github_repo_url(url):
function get_rgb_values_outside_bbox (line 519) | def get_rgb_values_outside_bbox(
function contains_required_strings (line 557) | def contains_required_strings(clipboard_content: str, required_strings: ...
function verify_file_content_with_clipboard (line 575) | def verify_file_content_with_clipboard(file_path: str) -> bool:
function verify_odt_file_content_with_clipboard (line 614) | def verify_odt_file_content_with_clipboard(file_path: str) -> bool:
function verify_combined_image (line 656) | def verify_combined_image(
function is_image_2_brighter (line 716) | def is_image_2_brighter(image_path_1: str, image_path_2: str) -> bool:
function is_img_url_in_clipboard (line 743) | def is_img_url_in_clipboard() -> bool:
function is_github_repo_url_in_clipboard (line 775) | def is_github_repo_url_in_clipboard(keyword: str) -> bool:
function is_software_installed (line 797) | def is_software_installed(package_name: str) -> bool:
function get_file_url_hash (line 810) | def get_file_url_hash(url):
function download_and_verify_file (line 817) | def download_and_verify_file(url: str, file_path: str) -> bool:
function download_from_clipboard_and_verify_file (line 838) | def download_from_clipboard_and_verify_file(file_path: str) -> bool:
function check_color_scheme (line 865) | def check_color_scheme(assmue: str) -> bool:
function check_text_in_current_window_name (line 874) | def check_text_in_current_window_name(text: str) -> bool:
function check_current_window_process (line 885) | def check_current_window_process(assmue: str) -> bool:
function check_file_exist (line 904) | def check_file_exist(file_path: str) -> bool:
function check_file_content (line 909) | def check_file_content(file_path: str, content: str) -> bool:
function empty_evaluator (line 918) | def empty_evaluator() -> bool:
function is_process_open (line 923) | def is_process_open(process_name: str) -> bool:
function check_app_usage_history (line 940) | def check_app_usage_history(app_name: str) -> bool:
function check_process_closed (line 960) | def check_process_closed(app_name: str) -> bool:
function verify_background (line 976) | def verify_background(photo_path: str) -> bool:
function is_torch_matmul_example_copied_correctly (line 1007) | def is_torch_matmul_example_copied_correctly() -> bool:
function check_directory_exists (line 1043) | def check_directory_exists(dir_path: str) -> bool:
function verify_files_copied (line 1049) | def verify_files_copied(source_dir: str, target_dir: str, file_extension...
function check_contain_input_text_list (line 1061) | def check_contain_input_text_list(texts: list[str], env) -> bool:
function is_google_maps_url_in_clipboard (line 1085) | def is_google_maps_url_in_clipboard() -> bool:
function check_contain_input_text (line 1098) | def check_contain_input_text(text: str, env) -> bool:
function verify_country_data_in_ods (line 1120) | def verify_country_data_in_ods(country: str, file_path: str) -> bool:
FILE: crab-benchmark-v0/main.py
class CrabBenchmarkV0 (line 50) | class CrabBenchmarkV0(Experiment):
method __init__ (line 51) | def __init__(
method get_prompt (line 60) | def get_prompt(self):
function get_benchmark (line 82) | def get_benchmark(env: str, ubuntu_url: str):
FILE: crab/actions/android_actions.py
function execute_adb (line 24) | def execute_adb(adb_command: str, env=None):
function get_device_size (line 43) | def get_device_size(env):
function setup (line 55) | def setup(env) -> None:
function screenshot (line 60) | def screenshot(env) -> str:
function tap (line 78) | def tap(element: int, env) -> None:
function long_tap (line 92) | def long_tap(element: int, env) -> None:
class SwipeDirection (line 107) | class SwipeDirection(str, Enum):
class SwipeDist (line 114) | class SwipeDist(str, Enum):
function swipe (line 121) | def swipe(element: int, direction: SwipeDirection, dist: SwipeDist, env)...
function open_app_drawer (line 157) | def open_app_drawer(env) -> None:
class AndroidKey (line 169) | class AndroidKey(str, Enum):
function key_press (line 175) | def key_press(key: AndroidKey, env):
function write_text (line 194) | def write_text(text: str, env) -> None:
function stop_all_apps (line 209) | def stop_all_apps(env) -> None:
FILE: crab/actions/crab_actions.py
function submit (line 20) | def submit(content: str) -> None:
function check_submit (line 31) | def check_submit(text: str, env) -> bool:
function complete (line 40) | def complete() -> bool:
function wait (line 48) | def wait() -> bool:
function get_element_position (line 55) | def get_element_position(element_id, env):
FILE: crab/actions/desktop_actions.py
function click_position (line 30) | def click_position(x: int, y: int) -> None:
function click (line 43) | def click(element: int, env) -> None:
function right_click_position (line 56) | def right_click_position(x: int, y: int) -> None:
function right_click (line 68) | def right_click(element: int, env) -> None:
function double_click_position (line 84) | def double_click_position(x: int, y: int) -> None:
function double_click (line 96) | def double_click(element: int, env) -> None:
function mouse_scroll (line 112) | def mouse_scroll(click: int = 1) -> None:
class KeyEnum (line 123) | class KeyEnum(str, Enum):
function key_press (line 222) | def key_press(key: KeyEnum) -> None:
function press_hotkey (line 237) | def press_hotkey(keys: list[KeyEnum]) -> None:
function write_text (line 253) | def write_text(text: str) -> None:
function search_application (line 267) | def search_application(name: str) -> None:
function screenshot (line 285) | def screenshot() -> str:
FILE: crab/actions/file_actions.py
function save_base64_image (line 23) | def save_base64_image(image: str, path: str = "image.png") -> None:
FILE: crab/actions/system_actions.py
function delay (line 21) | def delay(time: float) -> None:
function run_bash_command (line 26) | def run_bash_command(command: str) -> str:
FILE: crab/actions/visual_prompt_actions.py
function check_transformers_import (line 45) | def check_transformers_import() -> None:
function _calculate_iou (line 53) | def _calculate_iou(box1: BoxType, box2: BoxType) -> float:
function _calculate_center (line 68) | def _calculate_center(box: BoxType) -> tuple[int, int]:
function _remove_invalid_boxes (line 72) | def _remove_invalid_boxes(
function _filter_boxes_by_center (line 91) | def _filter_boxes_by_center(
function _box_a_in_b (line 116) | def _box_a_in_b(a: BoxType, b: BoxType) -> bool:
function _filter_boxes_by_overlap (line 120) | def _filter_boxes_by_overlap(
function _filter_boxes_by_iou (line 138) | def _filter_boxes_by_iou(
function _draw_boxes (line 157) | def _draw_boxes(
function _get_grounding_dino_model (line 194) | def _get_grounding_dino_model(
function _get_easyocr_model (line 212) | def _get_easyocr_model() -> easyocr.Reader:
function get_groundingdino_boxes (line 216) | def get_groundingdino_boxes(
function get_easyocr_boxes (line 264) | def get_easyocr_boxes(
function groundingdino_easyocr (line 294) | def groundingdino_easyocr(
function get_elements_prompt (line 333) | def get_elements_prompt(
FILE: crab/agents/backend_models/__init__.py
class BackendModelConfig (line 27) | class BackendModelConfig(BaseModel):
function create_backend_model (line 67) | def create_backend_model(model_config: BackendModelConfig) -> BackendModel:
FILE: crab/agents/backend_models/camel_model.py
function _get_model_platform_type (line 35) | def _get_model_platform_type(model_platform_name: str) -> "ModelPlatform...
function _get_model_type (line 45) | def _get_model_type(model_name: str) -> "str | ModelType":
function _convert_action_to_schema (line 52) | def _convert_action_to_schema(
function _convert_tool_calls_to_action_list (line 65) | def _convert_tool_calls_to_action_list(
class CamelModel (line 80) | class CamelModel(BackendModel):
method __init__ (line 81) | def __init__(
method get_token_usage (line 102) | def get_token_usage(self) -> int:
method reset (line 105) | def reset(self, system_message: str, action_space: list[Action] | None...
method chat (line 131) | def chat(self, messages: list[tuple[str, MessageType]]) -> BackendOutput:
FILE: crab/agents/backend_models/claude_model.py
class ClaudeModel (line 30) | class ClaudeModel(BackendModel):
method __init__ (line 31) | def __init__(
method reset (line 55) | def reset(self, system_message: str, action_space: list[Action] | None...
method chat (line 62) | def chat(self, message: list[Message] | Message) -> BackendOutput:
method _construct_new_message (line 72) | def _construct_new_message(self, message: list[Message]) -> dict[str, ...
method _fetch_from_memory (line 99) | def _fetch_from_memory(self) -> list[dict]:
method get_token_usage (line 107) | def get_token_usage(self):
method _record_message (line 110) | def _record_message(
method _call_api (line 148) | def _call_api(self, request_messages: list[dict]) -> anthropic.types.M...
method _generate_backend_output (line 172) | def _generate_backend_output(
function _merge_request (line 196) | def _merge_request(request: list[dict]) -> list[dict]:
function _convert_action_to_schema (line 207) | def _convert_action_to_schema(action_space):
FILE: crab/agents/backend_models/gemini_model.py
class GeminiModel (line 39) | class GeminiModel(BackendModel):
method __init__ (line 40) | def __init__(
method reset (line 64) | def reset(self, system_message: str, action_space: list[Action] | None...
method chat (line 71) | def chat(self, message: list[Message] | Message) -> BackendOutput:
method _construct_new_message (line 81) | def _construct_new_message(self, message: list[Message]) -> dict[str, ...
method _generate_backend_output (line 94) | def _generate_backend_output(self, response_message: Content) -> Backe...
method _fetch_from_memory (line 111) | def _fetch_from_memory(self) -> list[dict]:
method get_token_usage (line 119) | def get_token_usage(self):
method _record_message (line 122) | def _record_message(
method _call_api (line 135) | def _call_api(self, request_messages: list) -> Content:
function _convert_action_to_schema (line 164) | def _convert_action_to_schema(action_space: list[Action] | None) -> list...
function _clear_schema (line 177) | def _clear_schema(schema_dict: dict) -> None:
function _action_to_func_dec (line 188) | def _action_to_func_dec(action: Action) -> FunctionDeclaration:
FILE: crab/agents/backend_models/openai_model.py
class OpenAIModel (line 29) | class OpenAIModel(BackendModel):
method __init__ (line 30) | def __init__(
method reset (line 61) | def reset(self, system_message: str, action_space: list[Action] | None...
method chat (line 72) | def chat(self, message: list[Message] | Message) -> BackendOutput:
method get_token_usage (line 82) | def get_token_usage(self):
method _record_message (line 85) | def _record_message(
method _call_api (line 102) | def _call_api(
method _fetch_from_memory (line 123) | def _fetch_from_memory(self) -> list[ChatCompletionMessage | dict]:
method _construct_new_message (line 131) | def _construct_new_message(self, message: list[Message]) -> dict[str, ...
method _generate_backend_output (line 155) | def _generate_backend_output(
function _convert_action_to_schema (line 173) | def _convert_action_to_schema(
class OpenAIModelJSON (line 185) | class OpenAIModelJSON(OpenAIModel):
method __init__ (line 186) | def __init__(
method reset (line 204) | def reset(self, system_message: str, action_space: list[Action] | None...
method _record_message (line 208) | def _record_message(
method _generate_backend_output (line 216) | def _generate_backend_output(
class SGlangOpenAIModelJSON (line 242) | class SGlangOpenAIModelJSON(OpenAIModelJSON):
method _construct_new_message (line 243) | def _construct_new_message(self, message: list[Message]) -> dict[str, ...
FILE: crab/agents/policies/multi_agent_by_env.py
class MultiAgentByEnvPolicy (line 24) | class MultiAgentByEnvPolicy(AgentPolicy):
method __init__ (line 58) | def __init__(
method reset (line 67) | def reset(
method get_token_usage (line 99) | def get_token_usage(self):
method get_backend_model_name (line 106) | def get_backend_model_name(self):
method chat (line 113) | def chat(
FILE: crab/agents/policies/multi_agent_by_func.py
class MultiAgentByFuncPolicy (line 25) | class MultiAgentByFuncPolicy(AgentPolicy):
method __init__ (line 42) | def __init__(
method reset (line 51) | def reset(
method get_token_usage (line 68) | def get_token_usage(self):
method get_backend_model_name (line 74) | def get_backend_model_name(self):
method chat (line 81) | def chat(
FILE: crab/agents/policies/single_agent.py
class SingleAgentPolicy (line 32) | class SingleAgentPolicy(AgentPolicy):
method __init__ (line 86) | def __init__(
method reset (line 107) | def reset(
method get_token_usage (line 128) | def get_token_usage(self):
method get_backend_model_name (line 131) | def get_backend_model_name(self):
method chat (line 135) | def chat(
FILE: crab/agents/utils.py
function combine_multi_env_action_space (line 17) | def combine_multi_env_action_space(
function decode_combined_action (line 33) | def decode_combined_action(
function generate_action_prompt (line 53) | def generate_action_prompt(action_space: list[Action], expand: bool = Fa...
function extract_text_and_code_prompts (line 70) | def extract_text_and_code_prompts(content: str) -> tuple[list[str], list...
FILE: crab/benchmarks/template.py
function is_system_state (line 21) | def is_system_state(env) -> bool:
function check_submit_true (line 26) | def check_submit_true(env) -> bool:
function _submit (line 36) | def _submit(content: bool) -> None:
function check_sys0 (line 66) | def check_sys0(env) -> bool:
function check_sys1 (line 71) | def check_sys1(env) -> bool:
function check_sys2 (line 76) | def check_sys2(env) -> bool:
FILE: crab/core/agent_policy.py
class AgentPolicy (line 19) | class AgentPolicy(ABC):
method chat (line 21) | def chat(
method reset (line 27) | def reset(
method get_token_usage (line 35) | def get_token_usage(self) -> int: ...
method get_backend_model_name (line 38) | def get_backend_model_name(self) -> str: ...
FILE: crab/core/backend_model.py
class BackendModel (line 19) | class BackendModel(ABC):
method chat (line 21) | def chat(self, contents: list[tuple[str, MessageType]]) -> BackendOutp...
method reset (line 24) | def reset(
method get_token_usage (line 31) | def get_token_usage(self): ...
FILE: crab/core/benchmark.py
class Benchmark (line 26) | class Benchmark:
method __init__ (line 45) | def __init__(
method start_task (line 116) | def start_task(self, task_id: str) -> tuple[Task, dict[str, list[Actio...
method close_task (line 152) | def close_task(self) -> None:
method get_env_descriptions (line 160) | def get_env_descriptions(self) -> dict[str, str]:
method observe (line 167) | def observe(self) -> dict[str, dict[str, Any]]:
method observe_with_prompt (line 180) | def observe_with_prompt(
method evaluate (line 204) | def evaluate(self):
method step (line 209) | def step(
method reset (line 310) | def reset(self) -> None:
method human_evaluation (line 315) | def human_evaluation(self, task_id: str) -> None:
method export_action_space (line 329) | def export_action_space(self) -> dict[str, list[Action]]:
method _verify_spaces (line 340) | def _verify_spaces(self) -> None:
method _generate_action_map (line 360) | def _generate_action_map(self) -> None:
method _get_env (line 366) | def _get_env(
method _take_env_action (line 377) | def _take_env_action(self, action: Action) -> Any:
method _set_env_action (line 384) | def _set_env_action(self, action: Action) -> None:
method _reset_environments (line 393) | def _reset_environments(self):
method _get_task_by_id (line 399) | def _get_task_by_id(self, task_id: str) -> Task:
method _merge_dicts (line 405) | def _merge_dicts(
method _merge_lists (line 414) | def _merge_lists(self, env_dict: dict[str, list]) -> dict[str, list]:
function create_benchmark (line 422) | def create_benchmark(config: BenchmarkConfig) -> Benchmark:
FILE: crab/core/csv_log.py
class CSVLog (line 19) | class CSVLog:
method __init__ (line 20) | def __init__(self, csv_path: Path, headers: list[str]) -> None:
method write_row (line 28) | def write_row(self, data: list[Any]):
FILE: crab/core/decorators.py
function _decorator (line 19) | def _decorator(func, cls: type[Action], options: dict | None = None) -> ...
function action (line 28) | def action(*args: Callable, env_name: str | None = None, local=False):
function evaluator (line 36) | def evaluator(
FILE: crab/core/environment.py
class Environment (line 29) | class Environment:
method __init__ (line 70) | def __init__(
method step (line 98) | def step(
method take_action (line 126) | def take_action(
method observe (line 157) | def observe(self) -> dict[str, Any]:
method observe_with_prompt (line 170) | def observe_with_prompt(
method set_action (line 185) | def set_action(self, action: Action) -> None:
method start (line 195) | def start(self) -> None:
method close (line 199) | def close(self) -> None:
method reset (line 203) | def reset(self) -> None:
method action_space (line 211) | def action_space(self) -> list[Action]:
method observation_space (line 215) | def observation_space(self) -> list[ClosedAction]:
method _action_endpoint (line 218) | def _action_endpoint(self, action: Action, parameters: dict[str, Any]):
function create_environment (line 251) | def create_environment(config):
FILE: crab/core/exceptions.py
class ActionNotFound (line 14) | class ActionNotFound(ValueError):
class TaskNotFound (line 18) | class TaskNotFound(ValueError):
FILE: crab/core/experiment.py
class Experiment (line 58) | class Experiment:
method __init__ (line 59) | def __init__(
method write_message (line 71) | def write_message(self, message: str, step: int):
method write_task_info_json (line 75) | def write_task_info_json(self, task_info_path: Path):
method init_log_dir (line 98) | def init_log_dir(self):
method get_prompt (line 126) | def get_prompt(self) -> dict[str, list[tuple[str, MessageType]]]:
method execute_action (line 129) | def execute_action(self, response: list[ActionOutput]) -> bool:
method log_prompt (line 156) | def log_prompt(self, prompt):
method step (line 168) | def step(self, it) -> bool:
method start_benchmark (line 186) | def start_benchmark(self):
method write_exception_detail (line 222) | def write_exception_detail(self, exception_info: str):
method write_current_log_row (line 228) | def write_current_log_row(self, action):
method write_main_csv_row (line 245) | def write_main_csv_row(self, terminate_reason):
FILE: crab/core/graph_evaluator.py
class GraphEvaluator (line 23) | class GraphEvaluator:
method __init__ (line 24) | def __init__(
method reset (line 53) | def reset(self):
method step (line 59) | def step(
method get_next_source_nodes (line 91) | def get_next_source_nodes(self) -> set[Evaluator]:
method entry (line 106) | def entry(self) -> bool:
method update (line 109) | def update(self):
method calculate_longest_unfinished_path_length (line 118) | def calculate_longest_unfinished_path_length(self) -> int:
method calculate_step_to_complete (line 144) | def calculate_step_to_complete(self) -> int:
method is_complete (line 173) | def is_complete(self) -> bool:
method get_completeness (line 176) | def get_completeness(self) -> float:
method get_completeness_per_action (line 179) | def get_completeness_per_action(self) -> float:
method get_step_to_complete (line 182) | def get_step_to_complete(self) -> int:
method get_longest_unfinished_path_length (line 185) | def get_longest_unfinished_path_length(self) -> int:
method stat (line 188) | def stat(self) -> dict[str, Any]:
method _check_submit (line 198) | def _check_submit(self, environment: Environment) -> bool:
method compute_radar_stats (line 211) | def compute_radar_stats(self) -> dict[str, float]:
method visualize (line 223) | def visualize(evaluators: list["GraphEvaluator"], path: str):
FILE: crab/core/models/action.py
class Action (line 41) | class Action(BaseModel):
method __eq__ (line 74) | def __eq__(self, other):
method __hash__ (line 77) | def __hash__(self):
method __call__ (line 80) | def __call__(self, *args: Any, **kwargs: Any) -> Self:
method _check_combinable (line 127) | def _check_combinable(a: "Action", b: "Action") -> None:
method __rshift__ (line 135) | def __rshift__(self, other_action: "Action") -> "Action":
method __add__ (line 169) | def __add__(self, other_action: "Action") -> "Action":
method run (line 200) | def run(self, **kwargs) -> Any:
method set_kept_param (line 210) | def set_kept_param(self, **params) -> Self:
method get_required_params (line 217) | def get_required_params(self) -> dict[str, FieldInfo]:
method to_openai_json_schema (line 225) | def to_openai_json_schema(self) -> dict:
method to_raw_action (line 235) | def to_raw_action(self) -> dict[str, Any]:
method from_function (line 244) | def from_function(cls, func: Callable) -> Self:
function _check_no_param (line 315) | def _check_no_param(action: Action) -> Action:
FILE: crab/core/models/agent_interface.py
class MessageType (line 22) | class MessageType(IntEnum):
class ActionOutput (line 30) | class ActionOutput(BaseModel):
class BackendOutput (line 36) | class BackendOutput(BaseModel):
class EnvironmentInfo (line 41) | class EnvironmentInfo(BaseModel):
FILE: crab/core/models/benchmark_interface.py
class StepResult (line 19) | class StepResult(BaseModel):
FILE: crab/core/models/config.py
class EnvironmentConfig (line 22) | class EnvironmentConfig(BaseModel):
class VMEnvironmentConfig (line 32) | class VMEnvironmentConfig(BaseModel):
class BenchmarkConfig (line 37) | class BenchmarkConfig(BaseModel):
FILE: crab/core/models/evaluator.py
class Evaluator (line 19) | class Evaluator(Action):
method must_return_bool (line 24) | def must_return_bool(cls, v: type[BaseModel]) -> type[BaseModel]:
method __and__ (line 29) | def __and__(self, other: "Evaluator") -> "Evaluator":
method __or__ (line 39) | def __or__(self, other: "Evaluator") -> "Evaluator":
method __invert__ (line 51) | def __invert__(self) -> "Evaluator":
FILE: crab/core/models/task.py
class Task (line 30) | class Task(BaseModel):
method change_evaluator_to_graph (line 41) | def change_evaluator_to_graph(cls, evaluator: nx.DiGraph | Evaluator) ...
method to_list (line 50) | def to_list(cls, action: Action | list[Action]) -> list[Action]:
class SubTask (line 56) | class SubTask(BaseModel):
method __hash__ (line 67) | def __hash__(self) -> int:
method expand_attribute_type (line 72) | def expand_attribute_type(
class SubTaskInstance (line 83) | class SubTaskInstance(BaseModel):
method __hash__ (line 89) | def __hash__(self) -> int:
method dump_model (line 93) | def dump_model(self) -> dict[str, Any]:
class GeneratedTask (line 101) | class GeneratedTask(BaseModel):
FILE: crab/core/task_generator.py
class TaskGenerator (line 108) | class TaskGenerator:
method __init__ (line 111) | def __init__(
method from_config (line 130) | def from_config(cls, config_path: str) -> "TaskGenerator":
method graph_generation (line 158) | def graph_generation(self, subtask_list: list[SubTask]) -> None:
method combine (line 170) | def combine(self, current_description: str, target_description: str) -...
method gpt_choice (line 193) | def gpt_choice(
method random_walk (line 242) | def random_walk(
method _fill_task_attributes (line 270) | def _fill_task_attributes(self, task: SubTask, kept_attribute: str):
method _select_random_attributes (line 285) | def _select_random_attributes(self, type_dict: dict[str, str]) -> dict...
method generate_single_node_task (line 306) | def generate_single_node_task(subtask: SubTask):
method combine_subtask_list (line 328) | def combine_subtask_list(self, subtask_list: list[SubTask]):
method combine_two_subtasks (line 369) | def combine_two_subtasks(
method task_generation (line 462) | def task_generation(
method generate_evaluator (line 508) | def generate_evaluator(
method dump_generated_task (line 543) | def dump_generated_task(
method get_task_from_file (line 576) | def get_task_from_file(self, file_name) -> Task:
function load_subtasks (line 618) | def load_subtasks(version):
function generate_length1_all (line 637) | def generate_length1_all(
function generate_length1_by_id (line 659) | def generate_length1_by_id(generator: TaskGenerator, dir_path: str):
function generate_length2_manual (line 684) | def generate_length2_manual(generator: TaskGenerator, dir_path: str):
function main (line 728) | def main():
FILE: crab/environments/template.py
function set_state (line 18) | def set_state(value: bool, env: Environment) -> None:
function current_state (line 29) | def current_state(env: Environment) -> bool:
FILE: crab/server/api.py
function raw_action (line 32) | async def raw_action(request: Request):
FILE: crab/server/config.py
class Settings (line 19) | class Settings(BaseSettings):
class EnvSettings (line 25) | class EnvSettings(BaseSettings):
function parse_args (line 29) | def parse_args():
FILE: crab/server/exception_handlers.py
function request_validation_exception_handler (line 26) | async def request_validation_exception_handler(
function unhandled_exception_handler (line 44) | async def unhandled_exception_handler(
FILE: crab/server/main.py
function init (line 33) | def init(environment_config: EnvironmentConfig) -> FastAPI:
FILE: crab/server/middleware.py
function log_request_middleware (line 22) | async def log_request_middleware(request: Request, call_next):
FILE: crab/server/utils.py
function get_instances (line 19) | def get_instances(package, class_type):
function get_benchmarks_environments (line 34) | def get_benchmarks_environments():
FILE: crab/utils/common.py
function base64_to_image (line 22) | def base64_to_image(encoded: str) -> Image.Image:
function image_to_base64 (line 26) | def image_to_base64(image: Image.Image) -> str:
function callable_to_base64 (line 32) | def callable_to_base64(func: Callable) -> str:
function base64_to_callable (line 36) | def base64_to_callable(encoded: str) -> Callable:
function json_expand_refs (line 40) | def json_expand_refs(schema: dict | list, defs: dict | None = None):
FILE: crab/utils/encryption.py
function encrypt_message (line 26) | def encrypt_message(plaintext: str, key: bytes) -> str:
function decrypt_message (line 43) | def decrypt_message(encrypted: str, key: bytes) -> str:
function generate_key_from_env (line 64) | def generate_key_from_env() -> Optional[bytes]:
FILE: crab/utils/measure.py
function timed (line 29) | def timed(func):
FILE: examples/multi_env.py
function start_benchmark (line 22) | def start_benchmark(benchmark: Benchmark, agent: SingleAgentPolicy):
FILE: examples/single_env.py
function start_benchmark (line 22) | def start_benchmark(benchmark: Benchmark, agent: SingleAgentPolicy):
FILE: licenses/update_license.py
function fine_license_start_line (line 22) | def fine_license_start_line(lines: List[str], start_with: str) -> int:
function find_license_end_line (line 29) | def find_license_end_line(lines: List[str], start_with: str) -> int:
function update_license_in_file (line 36) | def update_license_in_file(
function update_license_in_directory (line 76) | def update_license_in_directory(
FILE: test/actions/test_visual_prompt_actions.py
function test_get_groundingdino_boxes_single_image (line 28) | def test_get_groundingdino_boxes_single_image():
function test_get_groundingdino_boxes_multi_image (line 42) | def test_get_groundingdino_boxes_multi_image():
function test_groundingdino_easy_ocr (line 64) | def test_groundingdino_easy_ocr(image_name: str):
FILE: test/agents/backend_models/test_camel_model.py
function camel_model (line 21) | def camel_model():
function add (line 34) | def add(a: int, b: int):
function test_action_chat (line 45) | def test_action_chat(camel_model):
FILE: test/agents/backend_models/test_claude_model.py
function claude_model_text (line 23) | def claude_model_text():
function add (line 35) | def add(a: int, b: int):
function test_text_chat (line 46) | def test_text_chat(claude_model_text):
function test_action_chat (line 67) | def test_action_chat(claude_model_text):
FILE: test/agents/backend_models/test_gemini_model.py
function gemini_model_text (line 23) | def gemini_model_text():
function add (line 36) | def add(a: int, b: int):
function test_text_chat (line 47) | def test_text_chat(gemini_model_text):
function test_action_chat (line 68) | def test_action_chat(gemini_model_text):
FILE: test/agents/backend_models/test_openai_model.py
function openai_model_text (line 90) | def openai_model_text():
function add (line 104) | def add(a: int, b: int):
function test_text_chat (line 118) | def test_text_chat(mock_create, openai_model_text):
function test_action_chat (line 147) | def test_action_chat(mock_create, openai_model_text):
FILE: test/agents/policies/test_multi_agent_by_func.py
function policy_fixture (line 23) | def policy_fixture():
function test_policy (line 46) | def test_policy(policy_fixture):
FILE: test/agents/policies/test_mutli_agent_by_env.py
function policy_fixture (line 23) | def policy_fixture():
function test_policy (line 46) | def test_policy(policy_fixture):
FILE: test/agents/policies/test_single_agent.py
function policy_fixture (line 76) | def policy_fixture():
function test_policy (line 100) | def test_policy(mock_create: MagicMock, policy_fixture):
FILE: test/core/test_action.py
function dummy_function (line 19) | def dummy_function(a: int, b: str = "default") -> int:
function dummy_env_action (line 34) | def dummy_env_action(a: int, env: int) -> int:
function test_action_to_openai_json_schema (line 48) | def test_action_to_openai_json_schema():
function test_from_function (line 65) | def test_from_function():
function test_chaining (line 74) | def test_chaining():
function add_a_to_b (line 80) | def add_a_to_b(a: int, b: int = 1) -> int:
function multiply_a_to_b (line 85) | def multiply_a_to_b(a: int, b: int = 1) -> int:
function test_closed_action (line 89) | def test_closed_action():
function test_kwargs_action (line 95) | def test_kwargs_action():
function test_chain_various_actions (line 100) | def test_chain_various_actions():
function test_kept_param (line 109) | def test_kept_param():
FILE: test/core/test_benchmark.py
function benchmark (line 27) | def benchmark(request):
function test_multi_env_benchmark_process (line 51) | def test_multi_env_benchmark_process(benchmark: Benchmark):
function to_str (line 82) | def to_str(input: bool) -> str:
function test_prompting_tool (line 87) | def test_prompting_tool(benchmark: Benchmark):
FILE: test/core/test_evaluator.py
function set_a (line 22) | def set_a(value: int) -> None:
function dummy_evaluator1 (line 28) | def dummy_evaluator1() -> bool:
function dummy_evaluator2 (line 42) | def dummy_evaluator2() -> bool:
function dummy_evaluator3 (line 57) | def dummy_evaluator3() -> bool:
function no_param_evaluator (line 72) | def no_param_evaluator() -> bool:
function root_env (line 77) | def root_env() -> Environment:
function test_evaluator_run (line 86) | def test_evaluator_run():
function test_evaluator_and (line 94) | def test_evaluator_and():
function test_evaluator_or (line 103) | def test_evaluator_or():
function test_evaluator_not (line 112) | def test_evaluator_not():
function test_chain_evaluator (line 119) | def test_chain_evaluator(root_env):
FILE: test/core/test_utils.py
function test_encrypt_decrypt (line 21) | def test_encrypt_decrypt():
FILE: test/server/test_api.py
function mock_env (line 28) | def mock_env():
function test_raw_action_unencrypted (line 36) | def test_raw_action_unencrypted(mock_env):
function test_raw_action_encrypted (line 44) | def test_raw_action_encrypted(mock_env, monkeypatch):
Condensed preview — 230 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (584K chars).
[
{
"path": ".github/ISSUE_TEMPLATE/bug_report.yml",
"chars": 2906,
"preview": "name: 🐛 Bug Report\ndescription: File an issue about a bug.\ntitle: \"[BUG] \"\nlabels: [bug]\nassignees: [dandansamax]\nbody:\n"
},
{
"path": ".github/ISSUE_TEMPLATE/feature_request.yml",
"chars": 1330,
"preview": "name: ✨ Feature Request\ndescription: Suggest an idea for this project.\ntitle: \"[Feature Request] \"\nlabels: [enhancement]"
},
{
"path": ".github/ISSUE_TEMPLATE/questions.yml",
"chars": 886,
"preview": "name: 🤔 Questions / Help / Support\ndescription: Do you need support?\ntitle: \"[Question] \"\nlabels: [question]\nassignees: "
},
{
"path": ".github/actions/crab_install/action.yml",
"chars": 1183,
"preview": "name: 'crab_install'\ndescription: 'Setup python environment and install dependencies for Crab by poetry.'\ninputs:\n pyth"
},
{
"path": ".github/workflows/documentation.yml",
"chars": 747,
"preview": "name: Build and deploy CRAB documents\non:\n push:\n branches: [ \"main\" ]\n workflow_dispatch:\npermissions:\n content"
},
{
"path": ".github/workflows/publish_release.yml",
"chars": 1406,
"preview": "name: Publish CRAB to PyPI / GitHub\n\non:\n push:\n tags:\n - \"v*\"\n\n workflow_dispatch:\n\njobs:\n build-n-publish:\n"
},
{
"path": ".github/workflows/pytest_package.yml",
"chars": 510,
"preview": "# This workflow will install Python dependencies, run tests\n# For more information see: https://docs.github.com/en/actio"
},
{
"path": ".gitignore",
"chars": 3167,
"preview": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packagi"
},
{
"path": ".pre-commit-config.yaml",
"chars": 403,
"preview": "repos:\n - repo: https://github.com/astral-sh/ruff-pre-commit\n # Ruff version.\n rev: v0.6.5\n hooks:\n # Run"
},
{
"path": "README.md",
"chars": 4086,
"preview": "# 🦀 CRAB: Cross-platform Agent Benchmark for Multimodal Embodied Language Model Agents\r\n\r\n[![arXiv][arxiv-image]][arxiv-"
},
{
"path": "crab/__init__.py",
"chars": 762,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/actions/android_actions.py",
"chars": 6426,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/actions/crab_actions.py",
"chars": 1966,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/actions/desktop_actions.py",
"chars": 7461,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/actions/file_actions.py",
"chars": 946,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/actions/system_actions.py",
"chars": 1224,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/actions/visual_prompt_actions.py",
"chars": 11322,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/agents/backend_models/__init__.py",
"chars": 5637,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/agents/backend_models/camel_model.py",
"chars": 5375,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/agents/backend_models/claude_model.py",
"chars": 8267,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/agents/backend_models/gemini_model.py",
"chars": 7480,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/agents/backend_models/openai_model.py",
"chars": 10075,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/agents/policies/__init__.py",
"chars": 873,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/agents/policies/multi_agent_by_env.py",
"chars": 6874,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/agents/policies/multi_agent_by_func.py",
"chars": 4163,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/agents/policies/single_agent.py",
"chars": 6379,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/agents/utils.py",
"chars": 3597,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/benchmarks/__init__.py",
"chars": 700,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/benchmarks/template.py",
"chars": 2839,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/core/__init__.py",
"chars": 1107,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/core/agent_policy.py",
"chars": 1264,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/core/backend_model.py",
"chars": 1110,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/core/benchmark.py",
"chars": 16912,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/core/csv_log.py",
"chars": 1300,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/core/decorators.py",
"chars": 1731,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/core/environment.py",
"chars": 9732,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/core/exceptions.py",
"chars": 786,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/core/experiment.py",
"chars": 9752,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/core/graph_evaluator.py",
"chars": 9131,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/core/models/__init__.py",
"chars": 1361,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/core/models/action.py",
"chars": 12600,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/core/models/agent_interface.py",
"chars": 1205,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/core/models/benchmark_interface.py",
"chars": 915,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/core/models/config.py",
"chars": 1548,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/core/models/evaluator.py",
"chars": 2349,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/core/models/task.py",
"chars": 3238,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/core/task_generator.py",
"chars": 29434,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/environments/__init__.py",
"chars": 700,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/environments/template.py",
"chars": 1328,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/server/__init__.py",
"chars": 700,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/server/api.py",
"chars": 2037,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/server/config.py",
"chars": 1313,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/server/exception_handlers.py",
"chars": 2655,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/server/logger.py",
"chars": 2120,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/server/main.py",
"chars": 2299,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/server/middleware.py",
"chars": 1756,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/server/utils.py",
"chars": 1556,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/utils/__init__.py",
"chars": 1120,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/utils/common.py",
"chars": 3470,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/utils/encryption.py",
"chars": 2828,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab/utils/measure.py",
"chars": 1654,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab-benchmark-v0/README.md",
"chars": 4427,
"preview": "# Crab Benchmark v0\n\n## Overview\n\n`crab-benchmark-v0` is a benchmark released with the crab framework to provide a stand"
},
{
"path": "crab-benchmark-v0/__init__.py",
"chars": 700,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab-benchmark-v0/android_env.py",
"chars": 1638,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab-benchmark-v0/dataset/android/1005c437-50d1-465a-b3fc-833098b22bfc.json",
"chars": 829,
"preview": "{\n \"description\": \"In the Android operating system, use the \\\"Google Map\\\" app to find the city name corresponding to"
},
{
"path": "crab-benchmark-v0/dataset/android/12333aa0-e76d-4a5c-8657-9f897f62f62d.json",
"chars": 742,
"preview": "{\n \"description\": \"In Android, use the \\\"Google Map\\\" app to find the city name for the postal code \\\"2770885\\\" in Ja"
},
{
"path": "crab-benchmark-v0/dataset/android/22b04776-8eec-4303-b3f6-9c981f7f29b8.json",
"chars": 387,
"preview": "{\n \"description\": \"In Android, Using \\\"Setting\\\" app, rename the device name of bluetooth as \\\"Sydney\\\".\",\n \"tasks"
},
{
"path": "crab-benchmark-v0/dataset/android/2ade6a13-c7a6-4df7-8c62-77382687369e.json",
"chars": 740,
"preview": "{\n \"description\": \"In Android, using the \\\"Contacts\\\" app, find the email of the contact named John Lauphin, then usi"
},
{
"path": "crab-benchmark-v0/dataset/android/346caf7c-dc74-4c38-962a-aaffb638e0c7.json",
"chars": 442,
"preview": "{\n \"description\": \"In Android, Using \\\"Calendar\\\" app, add a new task with text \\\"meeting\\\" in date \\\"June 5th 2024\\\""
},
{
"path": "crab-benchmark-v0/dataset/android/379b9c58-5125-41b3-9cc6-ea925c8b094d.json",
"chars": 461,
"preview": "{\n \"description\": \"In Android, Using Google Map app, Find the city name of corresponding post code \\\"560049\\\" in the "
},
{
"path": "crab-benchmark-v0/dataset/android/4190c90c-b28c-4bb3-ab5c-af3c4fde0a3d.json",
"chars": 460,
"preview": "{\n \"description\": \"In Android, Using Google Map app, Find the city name of corresponding post code \\\"1010021\\\" in the"
},
{
"path": "crab-benchmark-v0/dataset/android/46d7ccdb-d2e4-4b8a-bead-f2641b5ac23c.json",
"chars": 446,
"preview": "{\n \"description\": \"In Android, Using \\\"Contacts\\\" app, add a contact with a mail \\\"{mail}\\\" with a name \\\"{name}\\\".\","
},
{
"path": "crab-benchmark-v0/dataset/android/483fbf9c-dc78-4ac2-9264-53c4f617f6cc.json",
"chars": 732,
"preview": "{\n \"description\": \"Open the calendar app in the Android system and find the title of an event on the date \\\"17 August"
},
{
"path": "crab-benchmark-v0/dataset/android/4893a9b0-6477-495d-a73c-32503326e24a.json",
"chars": 413,
"preview": "{\n \"description\": \"In the Android system, use the calendar app to find the title of an event on the date \\\"16 July 20"
},
{
"path": "crab-benchmark-v0/dataset/android/53010c40-dce4-4d72-a856-842c21059e2b.json",
"chars": 774,
"preview": "{\n \"description\": \"In the Android system, use the calendar app to find the title of an event on the date \\\"16 July 20"
},
{
"path": "crab-benchmark-v0/dataset/android/6d9f6395-de79-4ad0-8a2a-2d674f93f293.json",
"chars": 470,
"preview": "{\n \"description\": \"In Android, Using \\\"Clock\\\" app, set the time of \\\"London\\\" in the clock, check the time gap betwe"
},
{
"path": "crab-benchmark-v0/dataset/android/71ef7fd2-0ae3-49c8-8238-06b7aa985d25.json",
"chars": 930,
"preview": "{\n \"description\": \"Using the \\\"Google Map\\\" app on Android, find the distance of the shortest route from \\\"National U"
},
{
"path": "crab-benchmark-v0/dataset/android/73f78fc3-1ca5-442d-801f-bc175a0bfb89.json",
"chars": 554,
"preview": "{\n \"description\": \"In Android, using \\\"Google Map\\\" App, find the distance of the shortest route from \\\"Southern Univ"
},
{
"path": "crab-benchmark-v0/dataset/android/764838cc-9359-4130-9bb2-4a75900b2d89.json",
"chars": 335,
"preview": "{\n \"description\": \"In Android, call \\\"123456789\\\".\",\n \"tasks\": [\n {\n \"task\": \"955d8773-dd7a-4072"
},
{
"path": "crab-benchmark-v0/dataset/android/77289141-e52b-48c8-b3a7-1b29520f3e1e.json",
"chars": 414,
"preview": "{\n \"description\": \"In Android, Using \\\"Contacts\\\" app, find out the mail of contact named \\\"John Haruhimiya\\\".\",\n "
},
{
"path": "crab-benchmark-v0/dataset/android/7891ceab-7965-4ddb-a0fc-15740c9a4e44.json",
"chars": 718,
"preview": "{\n \"description\": \"In Android, Using \\\"Google Map\\\" app, find the city name of corresponding post code \\\"560049\\\" in "
},
{
"path": "crab-benchmark-v0/dataset/android/8bd51440-f959-4edc-baa5-cd03d32a5b0f.json",
"chars": 781,
"preview": "{\n \"description\": \"In Android, use the \\\"Google Map\\\" app to find the address of the University of Sydney, then using"
},
{
"path": "crab-benchmark-v0/dataset/android/94b1836b-3111-40ad-8d07-b8a57efe7438.json",
"chars": 836,
"preview": "{\n \"description\": \"In an Android system, use the calendar app to find the title of an event on the date \\\"9 August 20"
},
{
"path": "crab-benchmark-v0/dataset/android/a225f7f8-6d03-4619-b57d-7a08610030d8.json",
"chars": 775,
"preview": "{\n \"description\": \"In Android, Using \\\"Google Map\\\" app, Find the address of \\\"University of Oxford\\\" and send \\\"9880"
},
{
"path": "crab-benchmark-v0/dataset/android/b077299d-1acb-40f5-89f3-cc08044345bf.json",
"chars": 399,
"preview": "{\n \"description\": \"Using \\\"Tasks\\\" app, add a new task with text \\\"Watch camel tutorial video\\\".\",\n \"tasks\": [\n "
},
{
"path": "crab-benchmark-v0/dataset/android/b3965b07-4683-4445-9de1-a1dedf6c73ad.json",
"chars": 783,
"preview": "{\n \"description\": \"In Android, Using \\\"Google Map\\\" app, Find the address of \\\"University of Oxford\\\" and send \\\"abcd"
},
{
"path": "crab-benchmark-v0/dataset/android/c1b1cfeb-40e7-49a8-a3f5-b8c8ba723601.json",
"chars": 381,
"preview": "{\n \"description\": \"In Android, Using \\\"Google Drive\\\" app, create a new folder named \\\"Journey\\\".\",\n \"tasks\": [\n "
},
{
"path": "crab-benchmark-v0/dataset/android/c85f03c9-83c4-417b-93d9-0d7b41022525.json",
"chars": 410,
"preview": "{\n \"description\": \"In android system, use the calendar app, find the title of an event in the date \\\"15 June, 2024\\\"."
},
{
"path": "crab-benchmark-v0/dataset/android/cf4c496b-fbbd-4701-91ea-4590fe6a66e1.json",
"chars": 786,
"preview": "{\n \"description\": \"In Android, use the \\\"Google Map\\\" app to find the city name corresponding to the postcode \\\"11015"
},
{
"path": "crab-benchmark-v0/dataset/android/d0811e47-d75f-40ce-b34b-e1ee3c8bed3f.json",
"chars": 785,
"preview": "{\n \"description\": \"In Android, first use the \\\"Files\\\" app to find the creation date of the file /Movies/movie_list.t"
},
{
"path": "crab-benchmark-v0/dataset/android/d2d456bb-c7d1-46af-8263-78d8509fb320.json",
"chars": 457,
"preview": "{\n \"description\": \"In Android, using \\\"Gmail\\\" App, send \\\"abcdcly@qq.com\\\" a message \\\"Hello, nice to meet you!\\\"\",\n"
},
{
"path": "crab-benchmark-v0/dataset/android/d4e0f2b3-d0ff-4efd-856f-9f5e598cfd05.json",
"chars": 445,
"preview": "{\n \"description\": \"In Android, Using \\\"Google Map\\\" app, Find the address of \\\"University of Oxford\\\"\",\n \"tasks\": "
},
{
"path": "crab-benchmark-v0/dataset/android/d7489d00-0046-4fb1-af5b-1fde7d87312c.json",
"chars": 687,
"preview": "{\n \"description\": \"In Android, open the \\\"Contacts\\\" app to find the email address of the contact named Karoon Wei, t"
},
{
"path": "crab-benchmark-v0/dataset/android/d92f6c33-e0a7-4101-957d-e7dd218d2565.json",
"chars": 742,
"preview": "{\n \"description\": \"Using the \\\"Files\\\" app on an Android device, locate the file /Movies/movie_list.txt and determine"
},
{
"path": "crab-benchmark-v0/dataset/android/de843952-df8f-4a26-bae9-d0a32ed9a7f5.json",
"chars": 422,
"preview": "{\n \"description\": \"In Android, Using \\\"Files\\\" app, find the create date of \\\"Downloads/meow.jpg\\\" in the sdk system."
},
{
"path": "crab-benchmark-v0/dataset/android/e20fd121-b981-42da-94de-efcd66889c11.json",
"chars": 462,
"preview": "{\n \"description\": \"In Android, using \\\"Messages\\\", send \\\"The meeting starts from 10am today\\\" to \\\"123456789\\\".\",\n "
},
{
"path": "crab-benchmark-v0/dataset/android/e55d7a39-7b6b-4852-8711-844cebc88cb8.json",
"chars": 459,
"preview": "{\n \"description\": \"In Android, use the \\\"Google Map\\\" app to find the city name corresponding to the postcode \\\"11015"
},
{
"path": "crab-benchmark-v0/dataset/android/e9268070-91b7-4e8c-9976-1cf8126ba13b.json",
"chars": 695,
"preview": "{\n \"description\": \"In the Android system, use the task app to find the title of an event on the date \\\"15 June 2024\\\""
},
{
"path": "crab-benchmark-v0/dataset/android/fbe6a1b1-63bb-4d4e-8a53-ff4f7839ef61.json",
"chars": 764,
"preview": "{\n \"description\": \"In Android, open the \\\"Contacts\\\" app to find the email address of a contact named Luis Martin, th"
},
{
"path": "crab-benchmark-v0/dataset/android/fc642cb6-5321-4966-afbf-fb3348bb69ee.json",
"chars": 468,
"preview": "{\n \"description\": \"In Android, using \\\"Keep Notes\\\" App, record \\\"Camel is the best agent framework in the world!\\\" i"
},
{
"path": "crab-benchmark-v0/dataset/android_subtasks.py",
"chars": 24643,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab-benchmark-v0/dataset/cross/05a7633d-b966-471c-8848-e18e69ad265f.json",
"chars": 804,
"preview": "{\n \"description\": \"In Android, use the \\\"Google Map\\\" app to find the city name corresponding to the postal code \\\"10"
},
{
"path": "crab-benchmark-v0/dataset/cross/1e92db38-501e-429b-ac31-453d1af10a25.json",
"chars": 800,
"preview": "{\n \"description\": \"Open the terminal on Ubuntu, print the content of \\\"/home/crab/Desktop/kolakov.txt\\\" to the comman"
},
{
"path": "crab-benchmark-v0/dataset/cross/43be6e8e-034d-4277-8346-c4ae7553bf68.json",
"chars": 874,
"preview": "{\n \"description\": \"On an Android device, using the Google Map app, find the address of Dignity Health Sports Park, th"
},
{
"path": "crab-benchmark-v0/dataset/cross/534be964-269a-4509-b2b8-28cc3ba8dfca.json",
"chars": 740,
"preview": "{\n \"description\": \"On an Android system, use the calendar app to find the title of an event on the date \\\"18 Septembe"
},
{
"path": "crab-benchmark-v0/dataset/cross/6f95cfa1-e7ae-4a82-912b-0180fc9622f2.json",
"chars": 793,
"preview": "{\n \"description\": \"On an Android system, open the calendar app and find the title of an event scheduled for \\\"15 June"
},
{
"path": "crab-benchmark-v0/dataset/cross/760ed27e-b1bd-451f-8659-bdb9845fcb7f.json",
"chars": 789,
"preview": "{\n \"description\": \"Open the \\\"~/Desktop/contact.txt\\\" file via the command line interface in Ubuntu to view its conte"
},
{
"path": "crab-benchmark-v0/dataset/cross/82596760-7d4d-457d-9ca9-9551ab85ec58.json",
"chars": 698,
"preview": "{\n \"description\": \"Using the \\\"Google Map\\\" app on an Android device, find the city name corresponding to the postal "
},
{
"path": "crab-benchmark-v0/dataset/cross/a956a091-8de4-42ee-b152-913308dfc24b.json",
"chars": 667,
"preview": "{\n \"description\": \"In the \\\"Clock\\\" app on Android, add Yakarta's time, compare it with the current city's time to de"
},
{
"path": "crab-benchmark-v0/dataset/cross/c5929ef3-ac27-4288-b02f-4f261d5871f9.json",
"chars": 774,
"preview": "{\n \"description\": \"In Android, use the \\\"Google Map\\\" app to find the city name corresponding to the postal code \\\"10"
},
{
"path": "crab-benchmark-v0/dataset/cross/da5911e3-1a99-4735-ba3e-f08c5ca81fdd.json",
"chars": 813,
"preview": "{\n \"description\": \"Open a terminal in Ubuntu, print the content of \\\"~/Desktop/contract_reminder.txt\\\", and then, on "
},
{
"path": "crab-benchmark-v0/dataset/handmade_tasks.py",
"chars": 27671,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/05d0e137-7d97-4021-9477-6490a2154c81.json",
"chars": 559,
"preview": "{\n \"description\": \"Open \\\"/home/crab/poem\\\" using vim in a terminal, write \\\"If you shed tears when you miss the sun,"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/0a893c2e-eec5-47cc-a930-eb01c5f17683.json",
"chars": 459,
"preview": "{\n \"description\": \"Submit the following content \\\"If you shed tears when you miss the sun, you also miss the stars.\\\""
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/0d178388-8166-4b66-93c1-278861f9897c.json",
"chars": 497,
"preview": "{\n \"description\": \"Use Firefox to find out a \\\"restaurant\\\" around \\\"kaust\\\" on Google Maps and copy the Google Maps "
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/0d7c84d2-bbbd-46ab-80d1-52b3a44f3858.json",
"chars": 713,
"preview": "{\n \"description\": \"Combine two images from Image 1 \\\"/home/crab/assets/campus.png\\\" and Image 2 \\\"/home/crab/assets/d"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/0deafe05-8db5-445f-9031-f6e884569d03.json",
"chars": 1252,
"preview": "{\n \"description\": \"Create a new directory \\\"/home/crab/jpg_folder\\\", copy all files with the \\\"jpg\\\" extension from \\"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/0e80fd90-0b23-454f-a629-7b6d7baa7542.json",
"chars": 718,
"preview": "{\n \"description\": \"Use Firefox to search for the country \\\"Canada\\\" on Wikipedia, extract the capital city and popula"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/125f7bae-e931-4190-8737-5f1ea7227772.json",
"chars": 967,
"preview": "{\n \"description\": \"Submit content \\\"OpenAI is an American artificial intelligence (AI) research organization founded "
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/15a150a8-899c-4753-8dc5-05248ccc3640.json",
"chars": 1010,
"preview": "{\n \"description\": \"Download the file from \\\"https://media.cntraveller.com/photos/642aa1ad770beda2d4f5cc22/4:3/w_2664,"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/1ebcd710-f73b-4022-832b-167c0d3f55a2.json",
"chars": 509,
"preview": "{\n \"description\": \"Use Firefox to find out a \\\"University\\\" around \\\"Los Angeles\\\" on Google Maps and copy the Google"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/22787ecc-52b2-4791-aefb-c45800f51414.json",
"chars": 667,
"preview": "{\n \"description\": \"Submit content \\\"Jensen Huang cofounded graphics-chip maker Nvidia in 1993, and has served as its "
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/22f05f6f-6aef-4786-958f-14f559eaf014.json",
"chars": 605,
"preview": "{\n \"description\": \"Create a new directory \\\"/home/crab/example_code\\\" and copy all files with the specified \\\"py\\\" ex"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/28963795-d694-4bb4-adaf-f7708a2c6fe5.json",
"chars": 402,
"preview": "{\n \"description\": \"Use Firefox to search for an image using the keyword \\\"Elon Musk\\\" and copy the URL of the image.\""
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/299db8f2-81eb-455f-9302-5c8cb30be691.json",
"chars": 1126,
"preview": "{\n \"description\": \"Combine two images, Image 1 \\\"/home/crab/Pictures/Interstellar.jpg\\\" and Image 2 \\\"/home/crab/Pict"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/29f099b2-b3a5-463f-b10a-15363bf7e845.json",
"chars": 804,
"preview": "{\n \"description\": \"Use Firefox to search for a \\\"garden\\\" around \\\"ETH Zurich\\\" on Google Maps, copy the sharing URL "
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/355e9660-a355-4b95-8881-ac9da578ea43.json",
"chars": 718,
"preview": "{\n \"description\": \"Use Firefox to search for the country \\\"Italy\\\" on Wikipedia, extract the capital city and populat"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/35bd7387-4735-4632-8474-e93382004c12.json",
"chars": 621,
"preview": "{\n \"description\": \"Use GIMP (GNU Image Manipulation Program) to adjust the brightness of the image from \\\"/home/crab/"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/362c5711-3824-42ff-96a0-7801b03b5f1f.json",
"chars": 477,
"preview": "{\n \"description\": \"Use Firefox to find a code repository about \\\"Open Source Computer Vision Library\\\" in GitHub and "
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/4718df9c-97ec-4b54-86ca-bd34e65c5a43.json",
"chars": 501,
"preview": "{\n \"description\": \"Download a file from \\\"https://arxiv.org/pdf/2303.05499\\\" to \\\"/home/crab/Documents/Grounding_DINO"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/47b75b21-99a2-461c-9d40-6dddc5c206d0.json",
"chars": 407,
"preview": "{\n \"description\": \"Use Firefox to search for an image using the keyword \\\"LLM\\\" and copy the URL of the image to the "
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/4ae4e35f-d90a-48cc-8fb9-492ac7ae07ee.json",
"chars": 459,
"preview": "{\n \"description\": \"Paste clipboard content into LibreOffice Writer and save it as an ODT file at \\\"/home/crab/Documen"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/4bbedade-4d4e-43d5-b650-2702b350ad28.json",
"chars": 789,
"preview": "{\n \"description\": \"Open \\\"/home/crab/assets/1.txt\\\" using vim in a terminal, write \\\"LinkedIn is a business and emplo"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/51a288f9-cf2c-4e8e-a98c-596a505af77c.json",
"chars": 739,
"preview": "{\n \"description\": \"Combine two images from Image 1 \\\"/home/crab/assets/desert.jpg\\\" and Image 2 \\\"/home/crab/assets/c"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/51c91051-3efb-4e92-a967-739b18520714.json",
"chars": 741,
"preview": "{\n \"description\": \"Open Firefox and search for the torch.matmul example provided by the official PyTorch version 1.13"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/57b7e8a7-8c17-4cc4-9bb5-4385afde3ad8.json",
"chars": 1240,
"preview": "{\n \"description\": \"Create a new directory \\\"/home/crab/assets_for_edit\\\" and copy all files with the \\\"png\\\" extensio"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/58776443-ccf7-4db3-8c60-e188e4b5f90c.json",
"chars": 421,
"preview": "{\n \"description\": \"Paste clipboard content into LibreOffice Writer and save it as an ODT file at \\\"/home/crab/paste.o"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/5ba74c6a-4513-448b-8b68-ff145ece0652.json",
"chars": 911,
"preview": "{\n \"description\": \"Download the file from \\\"https://raw.githubusercontent.com/camel-ai/camel/master/README.md\\\" to \\\""
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/6428f803-62de-40d2-a345-64e6cf955c9d.json",
"chars": 1272,
"preview": "{\n \"description\": \"First, use LibreOffice Impress to adjust the brightness of the image located at \\\"/home/crab/Pictu"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/64a2c205-c85a-4e56-8edb-5df4f7724441.json",
"chars": 423,
"preview": "{\n \"description\": \"Find the example provided of \\\"torch.matmul\\\" by official PyTorch version 1.13 documentation using"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/696ca9bb-89ea-4cd5-b693-f2d749d964b1.json",
"chars": 982,
"preview": "{\n \"description\": \"Adjust the brightness of the image located at \\\"/home/crab/assets/campus.png\\\" using GIMP (GNU Ima"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/6be49e77-e904-4eb0-a36a-7f0fd128ede3.json",
"chars": 421,
"preview": "{\n \"description\": \"Use Firefox to find a code repository about \\\"pytorch\\\" in GitHub and copy the URL of the reposito"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/6c3105a2-328c-4190-823d-03d759be0b57.json",
"chars": 771,
"preview": "{\n \"description\": \"Use Firefox to search for an image with the keyword \\\"reinforcement learning,\\\" copy the URL of th"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/6c560516-ca14-4f97-b51d-16ad81fc29e4.json",
"chars": 1140,
"preview": "{\n \"description\": \"Open \\\"/home/crab/assets/a.txt\\\" using vim in a terminal, write \\\"The most recent COMPUTEX was hel"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/730172f5-894a-4d46-9102-ac7d985a479d.json",
"chars": 1241,
"preview": "{\n \"description\": \"Download the image of Jupiter from \\\"https://upload.wikimedia.org/wikipedia/commons/thumb/2/2b/Jup"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/73038efb-ca0f-4d90-a947-fcfd097dd91b.json",
"chars": 767,
"preview": "{\n \"description\": \"Open Firefox and navigate to the official PyTorch version 1.13 documentation to find an example of"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/73da97c9-f084-4cab-8697-1151737387ff.json",
"chars": 938,
"preview": "{\n \"description\": \"Download the file from \\\"https://images.top1market.com/images/cms/uploads/20230928/4950e1db0038feb"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/77aa4dd3-5a68-4686-9cac-26d0ab77c7b4.json",
"chars": 505,
"preview": "{\n \"description\": \"Use Firefox to find out a \\\"hiking trail\\\" around \\\"Munich\\\" on Google Maps and copy the Google Ma"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/78502f1c-879b-4932-a5fd-d85f7f6b0f81.json",
"chars": 949,
"preview": "{\n \"description\": \"Download the file from \\\"https://cemse.kaust.edu.sa/sites/default/files/styles/large/public/2023-0"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/7912f7a5-24b9-4dfe-a7b8-1effc1b7a212.json",
"chars": 741,
"preview": "{\n \"description\": \"Combine two images from Image 1 \\\"/home/crab/assets/campus.png\\\" and Image 2 \\\"/home/crab/assets/d"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/7d5613ec-9b67-4255-b766-d9c6e8466464.json",
"chars": 439,
"preview": "{\n \"description\": \"Paste clipboard content into LibreOffice Writer and save it as an ODT file at \\\"/home/crab/assets/"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/7dda7e46-78be-4663-b882-6132dbbff335.json",
"chars": 958,
"preview": "{\n \"description\": \"Adjust the brightness of the image located at \\\"/home/crab/Pictures/Interstellar.jpg\\\" to a higher"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/7e6c4927-2220-4522-9e3f-36f69adc3e71.json",
"chars": 446,
"preview": "{\n \"description\": \"Paste clipboard content into Visual Studio Code (VS Code) and save it as a file at \\\"/home/crab/as"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/82c49e12-3b2f-432e-9069-4b67bafebbf7.json",
"chars": 853,
"preview": "{\n \"description\": \"Open Firefox to find a coffee shop around the hungarian parliament on Google Maps, copy the sharin"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/87910f23-ab23-4ccc-b115-d71cff6f0162.json",
"chars": 710,
"preview": "{\n \"description\": \"Use Firefox to search for an image with the keyword \\\"patagonia,\\\" copy the URL of the chosen imag"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/8afc25eb-7a80-459f-acdc-5c79fc146c29.json",
"chars": 448,
"preview": "{\n \"description\": \"Paste clipboard content into Visual Studio Code (VS Code) and save it as a file at \\\"/home/crab/as"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/8cb5ab6d-a56e-43b9-aa83-00a46331e20f.json",
"chars": 1276,
"preview": "{\n \"description\": \"Download the image from \\\"https://res.cloudinary.com/simpleview/image/upload/v1648755098/clients/a"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/90e09946-7b28-4102-b0ed-f683c01dbbd4.json",
"chars": 413,
"preview": "{\n \"description\": \"Use Firefox to find a code repository about \\\"W&B\\\" in GitHub and copy the URL of the repository t"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/925a3607-2802-48aa-b339-13ebfcef43a2.json",
"chars": 439,
"preview": "{\n \"description\": \"Use Firefox to find a code repository about \\\"segment-anything\\\" in GitHub and copy the URL of the"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/9506dd30-f58d-4832-b336-8037e83e2689.json",
"chars": 441,
"preview": "{\n \"description\": \"Get the content of \\\"/home/crab/Documents/nba.txt\\\" by printing it to the command line interface t"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/95e347aa-56ab-4d5d-a94c-350ddfddabf9.json",
"chars": 591,
"preview": "{\n \"description\": \"Create a new directory \\\"/home/crab/png_folder\\\" and copy all files with the specified \\\"png\\\" ext"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/98a360d8-0f95-44cd-bb9d-442fca2918d4.json",
"chars": 565,
"preview": "{\n \"description\": \"Download a file from \\\"https://github.com/open-mmlab/mmdetection/archive/refs/tags/v3.3.0.zip\\\" to"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/9c979fc5-8d60-41f1-a494-904a1d312187.json",
"chars": 750,
"preview": "{\n \"description\": \"Use Firefox to search for the country \\\"United Kingdom\\\" on Wikipedia, extract the capital city an"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/9e08971c-7f83-4853-952e-4c4a4a26333b.json",
"chars": 415,
"preview": "{\n \"description\": \"Use Firefox to search for an image using the keyword \\\"Red Sea\\\" and copy the URL of the image to "
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/9fe4f541-61cf-48e0-a081-4371786659c7.json",
"chars": 418,
"preview": "{\n \"description\": \"Set \\\"/home/crab/Pictures/Interstellar.jpg\\\" as the screen background of the system\",\n \"tasks\":"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/a0714ef7-bbdc-4f84-bd2e-c6e611d4db9e.json",
"chars": 419,
"preview": "{\n \"description\": \"Get the content of \\\"/home/crab/ubuntu\\\" by printing it to the command line interface through a te"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/a2a34580-cded-4bf8-81d9-b36a4d4402d0.json",
"chars": 410,
"preview": "{\n \"description\": \"Set \\\"/home/crab/assets/background.png\\\" as the screen background of the system\",\n \"tasks\": [\n "
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/a6b67c2d-d448-4e77-904e-dc7c5f21a5fe.json",
"chars": 435,
"preview": "{\n \"description\": \"Get the content of \\\"/home/crab/crab/README.md\\\" by printing it to the command line interface thro"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/a70ab903-835f-48b7-8356-2321b8b869d8.json",
"chars": 768,
"preview": "{\n \"description\": \"Using Firefox, find the example of torch.matmul provided by the official PyTorch version 1.13 docu"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/a78177f5-6cc6-48d7-8c6f-df53399d7759.json",
"chars": 427,
"preview": "{\n \"description\": \"Use Firefox to search for an image using the keyword \\\"The Colosseum\\\" and copy the URL of the ima"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/abb16512-27ae-49c0-b12b-7fbf0e95056b.json",
"chars": 803,
"preview": "{\n \"description\": \"Paste the clipboard content into Visual Studio Code (VS Code) and save the file as \\\"/home/crab/De"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/b2ca21dc-dde9-49f5-bec7-321fbf769315.json",
"chars": 1297,
"preview": "{\n \"description\": \"Adjust the brightness of the image located at \\\"/home/crab/assets/desert.jpg\\\" to a darker value u"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/b57c96c1-071b-40f6-b33b-2a0459fc25bb.json",
"chars": 641,
"preview": "{\n \"description\": \"Use GIMP (GNU Image Manipulation Program) to adjust the brightness of the image from \\\"/home/crab/"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/b73019e0-3ce8-4657-8b13-b3e0ab6cfac8.json",
"chars": 563,
"preview": "{\n \"description\": \"Download a file from \\\"https://raw.githubusercontent.com/camel-ai/camel/master/misc/primary_logo.p"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/ba5aebcb-999d-44d4-b9bc-241f9884c6dd.json",
"chars": 639,
"preview": "{\n \"description\": \"Use GIMP (GNU Image Manipulation Program) to adjust the brightness of the image from \\\"/home/crab/"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/be6468be-2218-45c1-9b75-b56efec61eb4.json",
"chars": 430,
"preview": "{\n \"description\": \"Paste clipboard content into Visual Studio Code (VS Code) and save it as a file at \\\"/home/crab/te"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/c4106f9a-9348-4a55-9892-782e6f4b3081.json",
"chars": 600,
"preview": "{\n \"description\": \"Use LibreOffice Impress to adjust the brightness of the image from \\\"/home/crab/assets/desert.jpg\\"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/c8800e50-3ff4-4dd2-bc90-33688be99659.json",
"chars": 581,
"preview": "{\n \"description\": \"Download a file from \\\"https://raw.githubusercontent.com/facebookresearch/detectron2/main/README.m"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/ccf31785-ec13-4981-93c5-ca6c242ac0c3.json",
"chars": 1157,
"preview": "{\n \"description\": \"Download the flag of Ethiopia image from \\\"https://upload.wikimedia.org/wikipedia/commons/thumb/7/"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/d3478489-70f2-4a82-b7d2-0a47b75986eb.json",
"chars": 1161,
"preview": "{\n \"description\": \"Use Firefox to search for the country \\\"Ethiopia\\\" on Wikipedia, extract the capital city and popu"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/d39d40b1-fc26-4169-9d6f-cdf81efe9a3e.json",
"chars": 738,
"preview": "{\n \"description\": \"Use Firefox to search for the country \\\"Iceland\\\" on Wikipedia, extract the capital city and popul"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/d3c917ff-406f-447a-87f5-b8d835cba750.json",
"chars": 1034,
"preview": "{\n \"description\": \"Combine Image 1 \\\"/home/crab/Pictures/cat.png\\\" and Image 2 \\\"/home/crab/assets/campus.png\\\" using"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/d6e460e4-c295-40ad-883c-11300d7832f0.json",
"chars": 786,
"preview": "{\n \"description\": \"Using Firefox, locate the example provided of torch.matmul by the official PyTorch version 1.13 do"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/d9e4e23c-2a2a-4b5c-b034-7deb6036572d.json",
"chars": 513,
"preview": "{\n \"description\": \"Use Firefox to find out a \\\"amusement park\\\" around \\\"Sentosa\\\" on Google Maps and copy the Google"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/e31d4e3b-b753-4deb-b9ad-a0add5d4790e.json",
"chars": 734,
"preview": "{\n \"description\": \"Use Firefox to search for an image with the keyword \\\"Mission: Impossible\\\", copy the image's URL "
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/f07a1f32-2f3f-40e7-b12f-8f1b128c41f6.json",
"chars": 590,
"preview": "{\n \"description\": \"Create a new directory \\\"/home/crab/assets_copy\\\" and copy all files with the specified \\\"txt\\\" ex"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/f5cce3a0-ba65-4317-95f8-1fc7d9776c78.json",
"chars": 392,
"preview": "{\n \"description\": \"Set \\\"/home/crab/deepmind.png\\\" as the screen background of the system\",\n \"tasks\": [\n {\n"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/f67a26e4-58dd-4dc6-8859-affbf1d62f94.json",
"chars": 1099,
"preview": "{\n \"description\": \"Open \\\"/home/crab/poem\\\" using vim in a terminal, write \\\"Two roads diverged in a yellow wood, and"
},
{
"path": "crab-benchmark-v0/dataset/ubuntu/f96d7c34-9543-4679-a6ea-89e0c2ef7b1c.json",
"chars": 667,
"preview": "{\n \"description\": \"Open \\\"/home/crab/Documents/result\\\" using vim in a terminal, write \\\"Celtics vs. Mavericks odds, "
},
{
"path": "crab-benchmark-v0/dataset/ubuntu_subtasks.py",
"chars": 53118,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab-benchmark-v0/main.py",
"chars": 9441,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "crab-benchmark-v0/scripts/ubuntu_env_init.sh",
"chars": 2034,
"preview": "#!/bin/bash\n\n# Disable screen autolock\ngsettings set org.gnome.desktop.screensaver lock-enabled false\ngsettings set org."
},
{
"path": "crab-benchmark-v0/ubuntu_env.py",
"chars": 1823,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "docs/Makefile",
"chars": 634,
"preview": "# Minimal makefile for Sphinx documentation\n#\n\n# You can set these variables from the command line, and also\n# from the "
},
{
"path": "docs/conf.py",
"chars": 2392,
"preview": "# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========\n# Licensed under the Apache License, Versi"
},
{
"path": "docs/crab.benchmarks.rst",
"chars": 352,
"preview": "crab.benchmarks package\n=======================\n\nSubmodules\n----------\n\ncrab.benchmarks.template module\n----------------"
},
{
"path": "docs/crab.client.rst",
"chars": 488,
"preview": "crab.client package\n===================\n\nSubmodules\n----------\n\ncrab.client.env module\n----------------------\n\n.. automo"
},
{
"path": "docs/crab.core.models.rst",
"chars": 1028,
"preview": "crab.core.models package\n========================\n\nSubmodules\n----------\n\ncrab.core.models.action module\n---------------"
},
{
"path": "docs/crab.core.rst",
"chars": 1340,
"preview": "crab.core package\n=================\n\nSubpackages\n-----------\n\n.. toctree::\n :maxdepth: 4\n\n crab.core.models\n\nSubmodu"
},
{
"path": "docs/crab.environments.rst",
"chars": 686,
"preview": "crab.environments package\n=========================\n\nSubmodules\n----------\n\ncrab.environments.android module\n-----------"
}
]
// ... and 30 more files (download for full content)
About this extraction
This page contains the full source code of the camel-ai/crab GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 230 files (526.1 KB), approximately 140.1k tokens, and a symbol index with 463 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.