Repository: camel-ai/crab Branch: main Commit: a8b6d7272385 Files: 230 Total size: 526.1 KB Directory structure: gitextract_jurvigyb/ ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── bug_report.yml │ │ ├── feature_request.yml │ │ └── questions.yml │ ├── actions/ │ │ └── crab_install/ │ │ └── action.yml │ └── workflows/ │ ├── documentation.yml │ ├── publish_release.yml │ └── pytest_package.yml ├── .gitignore ├── .pre-commit-config.yaml ├── README.md ├── crab/ │ ├── __init__.py │ ├── actions/ │ │ ├── android_actions.py │ │ ├── crab_actions.py │ │ ├── desktop_actions.py │ │ ├── file_actions.py │ │ ├── system_actions.py │ │ └── visual_prompt_actions.py │ ├── agents/ │ │ ├── backend_models/ │ │ │ ├── __init__.py │ │ │ ├── camel_model.py │ │ │ ├── claude_model.py │ │ │ ├── gemini_model.py │ │ │ └── openai_model.py │ │ ├── policies/ │ │ │ ├── __init__.py │ │ │ ├── multi_agent_by_env.py │ │ │ ├── multi_agent_by_func.py │ │ │ └── single_agent.py │ │ └── utils.py │ ├── benchmarks/ │ │ ├── __init__.py │ │ └── template.py │ ├── core/ │ │ ├── __init__.py │ │ ├── agent_policy.py │ │ ├── backend_model.py │ │ ├── benchmark.py │ │ ├── csv_log.py │ │ ├── decorators.py │ │ ├── environment.py │ │ ├── exceptions.py │ │ ├── experiment.py │ │ ├── graph_evaluator.py │ │ ├── models/ │ │ │ ├── __init__.py │ │ │ ├── action.py │ │ │ ├── agent_interface.py │ │ │ ├── benchmark_interface.py │ │ │ ├── config.py │ │ │ ├── evaluator.py │ │ │ └── task.py │ │ └── task_generator.py │ ├── environments/ │ │ ├── __init__.py │ │ └── template.py │ ├── server/ │ │ ├── __init__.py │ │ ├── api.py │ │ ├── config.py │ │ ├── exception_handlers.py │ │ ├── logger.py │ │ ├── main.py │ │ ├── middleware.py │ │ └── utils.py │ └── utils/ │ ├── __init__.py │ ├── common.py │ ├── encryption.py │ └── measure.py ├── crab-benchmark-v0/ │ ├── README.md │ ├── __init__.py │ ├── android_env.py │ ├── dataset/ │ │ ├── android/ │ │ │ ├── 1005c437-50d1-465a-b3fc-833098b22bfc.json │ │ │ ├── 12333aa0-e76d-4a5c-8657-9f897f62f62d.json │ │ │ ├── 22b04776-8eec-4303-b3f6-9c981f7f29b8.json │ │ │ ├── 2ade6a13-c7a6-4df7-8c62-77382687369e.json │ │ │ ├── 346caf7c-dc74-4c38-962a-aaffb638e0c7.json │ │ │ ├── 379b9c58-5125-41b3-9cc6-ea925c8b094d.json │ │ │ ├── 4190c90c-b28c-4bb3-ab5c-af3c4fde0a3d.json │ │ │ ├── 46d7ccdb-d2e4-4b8a-bead-f2641b5ac23c.json │ │ │ ├── 483fbf9c-dc78-4ac2-9264-53c4f617f6cc.json │ │ │ ├── 4893a9b0-6477-495d-a73c-32503326e24a.json │ │ │ ├── 53010c40-dce4-4d72-a856-842c21059e2b.json │ │ │ ├── 6d9f6395-de79-4ad0-8a2a-2d674f93f293.json │ │ │ ├── 71ef7fd2-0ae3-49c8-8238-06b7aa985d25.json │ │ │ ├── 73f78fc3-1ca5-442d-801f-bc175a0bfb89.json │ │ │ ├── 764838cc-9359-4130-9bb2-4a75900b2d89.json │ │ │ ├── 77289141-e52b-48c8-b3a7-1b29520f3e1e.json │ │ │ ├── 7891ceab-7965-4ddb-a0fc-15740c9a4e44.json │ │ │ ├── 8bd51440-f959-4edc-baa5-cd03d32a5b0f.json │ │ │ ├── 94b1836b-3111-40ad-8d07-b8a57efe7438.json │ │ │ ├── a225f7f8-6d03-4619-b57d-7a08610030d8.json │ │ │ ├── b077299d-1acb-40f5-89f3-cc08044345bf.json │ │ │ ├── b3965b07-4683-4445-9de1-a1dedf6c73ad.json │ │ │ ├── c1b1cfeb-40e7-49a8-a3f5-b8c8ba723601.json │ │ │ ├── c85f03c9-83c4-417b-93d9-0d7b41022525.json │ │ │ ├── cf4c496b-fbbd-4701-91ea-4590fe6a66e1.json │ │ │ ├── d0811e47-d75f-40ce-b34b-e1ee3c8bed3f.json │ │ │ ├── d2d456bb-c7d1-46af-8263-78d8509fb320.json │ │ │ ├── d4e0f2b3-d0ff-4efd-856f-9f5e598cfd05.json │ │ │ ├── d7489d00-0046-4fb1-af5b-1fde7d87312c.json │ │ │ ├── d92f6c33-e0a7-4101-957d-e7dd218d2565.json │ │ │ ├── de843952-df8f-4a26-bae9-d0a32ed9a7f5.json │ │ │ ├── e20fd121-b981-42da-94de-efcd66889c11.json │ │ │ ├── e55d7a39-7b6b-4852-8711-844cebc88cb8.json │ │ │ ├── e9268070-91b7-4e8c-9976-1cf8126ba13b.json │ │ │ ├── fbe6a1b1-63bb-4d4e-8a53-ff4f7839ef61.json │ │ │ └── fc642cb6-5321-4966-afbf-fb3348bb69ee.json │ │ ├── android_subtasks.py │ │ ├── cross/ │ │ │ ├── 05a7633d-b966-471c-8848-e18e69ad265f.json │ │ │ ├── 1e92db38-501e-429b-ac31-453d1af10a25.json │ │ │ ├── 43be6e8e-034d-4277-8346-c4ae7553bf68.json │ │ │ ├── 534be964-269a-4509-b2b8-28cc3ba8dfca.json │ │ │ ├── 6f95cfa1-e7ae-4a82-912b-0180fc9622f2.json │ │ │ ├── 760ed27e-b1bd-451f-8659-bdb9845fcb7f.json │ │ │ ├── 82596760-7d4d-457d-9ca9-9551ab85ec58.json │ │ │ ├── a956a091-8de4-42ee-b152-913308dfc24b.json │ │ │ ├── c5929ef3-ac27-4288-b02f-4f261d5871f9.json │ │ │ └── da5911e3-1a99-4735-ba3e-f08c5ca81fdd.json │ │ ├── handmade_tasks.py │ │ ├── ubuntu/ │ │ │ ├── 05d0e137-7d97-4021-9477-6490a2154c81.json │ │ │ ├── 0a893c2e-eec5-47cc-a930-eb01c5f17683.json │ │ │ ├── 0d178388-8166-4b66-93c1-278861f9897c.json │ │ │ ├── 0d7c84d2-bbbd-46ab-80d1-52b3a44f3858.json │ │ │ ├── 0deafe05-8db5-445f-9031-f6e884569d03.json │ │ │ ├── 0e80fd90-0b23-454f-a629-7b6d7baa7542.json │ │ │ ├── 125f7bae-e931-4190-8737-5f1ea7227772.json │ │ │ ├── 15a150a8-899c-4753-8dc5-05248ccc3640.json │ │ │ ├── 1ebcd710-f73b-4022-832b-167c0d3f55a2.json │ │ │ ├── 22787ecc-52b2-4791-aefb-c45800f51414.json │ │ │ ├── 22f05f6f-6aef-4786-958f-14f559eaf014.json │ │ │ ├── 28963795-d694-4bb4-adaf-f7708a2c6fe5.json │ │ │ ├── 299db8f2-81eb-455f-9302-5c8cb30be691.json │ │ │ ├── 29f099b2-b3a5-463f-b10a-15363bf7e845.json │ │ │ ├── 355e9660-a355-4b95-8881-ac9da578ea43.json │ │ │ ├── 35bd7387-4735-4632-8474-e93382004c12.json │ │ │ ├── 362c5711-3824-42ff-96a0-7801b03b5f1f.json │ │ │ ├── 4718df9c-97ec-4b54-86ca-bd34e65c5a43.json │ │ │ ├── 47b75b21-99a2-461c-9d40-6dddc5c206d0.json │ │ │ ├── 4ae4e35f-d90a-48cc-8fb9-492ac7ae07ee.json │ │ │ ├── 4bbedade-4d4e-43d5-b650-2702b350ad28.json │ │ │ ├── 51a288f9-cf2c-4e8e-a98c-596a505af77c.json │ │ │ ├── 51c91051-3efb-4e92-a967-739b18520714.json │ │ │ ├── 57b7e8a7-8c17-4cc4-9bb5-4385afde3ad8.json │ │ │ ├── 58776443-ccf7-4db3-8c60-e188e4b5f90c.json │ │ │ ├── 5ba74c6a-4513-448b-8b68-ff145ece0652.json │ │ │ ├── 6428f803-62de-40d2-a345-64e6cf955c9d.json │ │ │ ├── 64a2c205-c85a-4e56-8edb-5df4f7724441.json │ │ │ ├── 696ca9bb-89ea-4cd5-b693-f2d749d964b1.json │ │ │ ├── 6be49e77-e904-4eb0-a36a-7f0fd128ede3.json │ │ │ ├── 6c3105a2-328c-4190-823d-03d759be0b57.json │ │ │ ├── 6c560516-ca14-4f97-b51d-16ad81fc29e4.json │ │ │ ├── 730172f5-894a-4d46-9102-ac7d985a479d.json │ │ │ ├── 73038efb-ca0f-4d90-a947-fcfd097dd91b.json │ │ │ ├── 73da97c9-f084-4cab-8697-1151737387ff.json │ │ │ ├── 77aa4dd3-5a68-4686-9cac-26d0ab77c7b4.json │ │ │ ├── 78502f1c-879b-4932-a5fd-d85f7f6b0f81.json │ │ │ ├── 7912f7a5-24b9-4dfe-a7b8-1effc1b7a212.json │ │ │ ├── 7d5613ec-9b67-4255-b766-d9c6e8466464.json │ │ │ ├── 7dda7e46-78be-4663-b882-6132dbbff335.json │ │ │ ├── 7e6c4927-2220-4522-9e3f-36f69adc3e71.json │ │ │ ├── 82c49e12-3b2f-432e-9069-4b67bafebbf7.json │ │ │ ├── 87910f23-ab23-4ccc-b115-d71cff6f0162.json │ │ │ ├── 8afc25eb-7a80-459f-acdc-5c79fc146c29.json │ │ │ ├── 8cb5ab6d-a56e-43b9-aa83-00a46331e20f.json │ │ │ ├── 90e09946-7b28-4102-b0ed-f683c01dbbd4.json │ │ │ ├── 925a3607-2802-48aa-b339-13ebfcef43a2.json │ │ │ ├── 9506dd30-f58d-4832-b336-8037e83e2689.json │ │ │ ├── 95e347aa-56ab-4d5d-a94c-350ddfddabf9.json │ │ │ ├── 98a360d8-0f95-44cd-bb9d-442fca2918d4.json │ │ │ ├── 9c979fc5-8d60-41f1-a494-904a1d312187.json │ │ │ ├── 9e08971c-7f83-4853-952e-4c4a4a26333b.json │ │ │ ├── 9fe4f541-61cf-48e0-a081-4371786659c7.json │ │ │ ├── a0714ef7-bbdc-4f84-bd2e-c6e611d4db9e.json │ │ │ ├── a2a34580-cded-4bf8-81d9-b36a4d4402d0.json │ │ │ ├── a6b67c2d-d448-4e77-904e-dc7c5f21a5fe.json │ │ │ ├── a70ab903-835f-48b7-8356-2321b8b869d8.json │ │ │ ├── a78177f5-6cc6-48d7-8c6f-df53399d7759.json │ │ │ ├── abb16512-27ae-49c0-b12b-7fbf0e95056b.json │ │ │ ├── b2ca21dc-dde9-49f5-bec7-321fbf769315.json │ │ │ ├── b57c96c1-071b-40f6-b33b-2a0459fc25bb.json │ │ │ ├── b73019e0-3ce8-4657-8b13-b3e0ab6cfac8.json │ │ │ ├── ba5aebcb-999d-44d4-b9bc-241f9884c6dd.json │ │ │ ├── be6468be-2218-45c1-9b75-b56efec61eb4.json │ │ │ ├── c4106f9a-9348-4a55-9892-782e6f4b3081.json │ │ │ ├── c8800e50-3ff4-4dd2-bc90-33688be99659.json │ │ │ ├── ccf31785-ec13-4981-93c5-ca6c242ac0c3.json │ │ │ ├── d3478489-70f2-4a82-b7d2-0a47b75986eb.json │ │ │ ├── d39d40b1-fc26-4169-9d6f-cdf81efe9a3e.json │ │ │ ├── d3c917ff-406f-447a-87f5-b8d835cba750.json │ │ │ ├── d6e460e4-c295-40ad-883c-11300d7832f0.json │ │ │ ├── d9e4e23c-2a2a-4b5c-b034-7deb6036572d.json │ │ │ ├── e31d4e3b-b753-4deb-b9ad-a0add5d4790e.json │ │ │ ├── f07a1f32-2f3f-40e7-b12f-8f1b128c41f6.json │ │ │ ├── f5cce3a0-ba65-4317-95f8-1fc7d9776c78.json │ │ │ ├── f67a26e4-58dd-4dc6-8859-affbf1d62f94.json │ │ │ └── f96d7c34-9543-4679-a6ea-89e0c2ef7b1c.json │ │ └── ubuntu_subtasks.py │ ├── main.py │ ├── scripts/ │ │ └── ubuntu_env_init.sh │ └── ubuntu_env.py ├── docs/ │ ├── Makefile │ ├── conf.py │ ├── crab.benchmarks.rst │ ├── crab.client.rst │ ├── crab.core.models.rst │ ├── crab.core.rst │ ├── crab.environments.rst │ ├── crab.rst │ ├── crab.server.controller.rst │ ├── crab.server.rst │ ├── crab_benchmark_v0/ │ │ ├── environment_gcp_setup.md │ │ ├── environment_local_setup.md │ │ └── get_started.md │ ├── get_started/ │ │ ├── build_your_own_benchmark.md │ │ └── quickstart.md │ ├── index.rst │ ├── make.bat │ └── modules.rst ├── examples/ │ ├── multi_env.py │ └── single_env.py ├── licenses/ │ ├── LICENSE │ ├── license_template.txt │ └── update_license.py ├── pyproject.toml └── test/ ├── actions/ │ └── test_visual_prompt_actions.py ├── agents/ │ ├── backend_models/ │ │ ├── test_camel_model.py │ │ ├── test_claude_model.py │ │ ├── test_gemini_model.py │ │ └── test_openai_model.py │ └── policies/ │ ├── test_multi_agent_by_func.py │ ├── test_mutli_agent_by_env.py │ └── test_single_agent.py ├── core/ │ ├── test_action.py │ ├── test_benchmark.py │ ├── test_evaluator.py │ └── test_utils.py └── server/ └── test_api.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.yml ================================================ name: 🐛 Bug Report description: File an issue about a bug. title: "[BUG] " labels: [bug] assignees: [dandansamax] body: - type: markdown attributes: value: | Please do your best to make the issue as easy to act on as possible, and only submit here if there is clearly a problem with camel (ask in [Discussions](https://github.com/camel-ai/camel/discussions) first if unsure). - type: input id: version attributes: label: What version of camel are you using? description: Run command `python3 -c 'print(__import__("camel").__version__)'` in your shell and paste the output here. placeholder: E.g., 0.1.0 validations: required: true - type: textarea id: system-info attributes: label: System information description: | Describe the characteristic of your environment: - Describe how the library was installed (pip, conda, source, ...) - Python version - Versions of any other relevant libraries ```python import sys, camel print(sys.version, sys.platform) print(camel.__version__) ``` validations: required: true - type: textarea id: description attributes: label: Problem description description: >- Provide a short description, state the expected behavior and what actually happens. Include relevant information like what version of camel you are using, what system you are on, and any useful commands / output. validations: required: true - type: textarea id: code attributes: label: Reproducible example code description: >- The code should be minimal, have minimal external dependencies, and isolate the functions that cause breakage. Submit matched and complete snippets that can be easily run to diagnose the issue. value: | The Python snippets: ```python ``` Command lines: ```bash ``` Extra dependencies: ```text ``` Steps to reproduce: 1. 2. 3. validations: required: true - type: textarea id: traceback attributes: label: Traceback description: Put the Python traceback information here. placeholder: | Traceback (most recent call last): File ... render: pytb - type: textarea id: expected attributes: label: Expected behavior description: Provide a clear and concise description of what you expected to happen. - type: textarea id: additional-context attributes: label: Additional context description: >- Add any other context about the problem here. Screenshots may also be helpful. If you know or suspect the reason for this bug, paste the code lines and suggest modifications. ================================================ FILE: .github/ISSUE_TEMPLATE/feature_request.yml ================================================ name: ✨ Feature Request description: Suggest an idea for this project. title: "[Feature Request] " labels: [enhancement] assignees: [dandansamax] body: - type: checkboxes id: steps attributes: label: Required prerequisites description: Make sure you've completed the following steps before submitting your issue -- thank you! options: - label: I have searched the [Issue Tracker](https://github.com/camel-ai/crab/issues) that this hasn't already been reported. (+1 or comment there if it has.) required: true - type: textarea id: motivation attributes: label: Motivation description: Outline the motivation for the proposal. value: | validations: required: true - type: textarea id: solution attributes: label: Solution description: Provide a clear and concise description of what you want to happen. - type: textarea id: additional-context attributes: label: Additional context description: Add any other context about the problem here. Screenshots may also be helpful. ================================================ FILE: .github/ISSUE_TEMPLATE/questions.yml ================================================ name: 🤔 Questions / Help / Support description: Do you need support? title: "[Question] " labels: [question] assignees: [dandansamax] body: - type: checkboxes id: steps attributes: label: Required prerequisites description: Make sure you've completed the following steps before submitting your issue -- thank you! options: # - label: I have read the documentation . # required: true - label: I have searched the [Issue Tracker](https://github.com/camel-ai/crab/issues) that this hasn't already been reported. (+1 or comment there if it has.) required: true - type: textarea id: questions attributes: label: Questions description: Describe your questions with relevant resources such as snippets, links, images, etc. validations: required: true ================================================ FILE: .github/actions/crab_install/action.yml ================================================ name: 'crab_install' description: 'Setup python environment and install dependencies for Crab by poetry.' inputs: python-version: description: 'Python version.' required: true default: '3.10' runs: using: "composite" steps: - name: Set up Python uses: actions/setup-python@v3 with: python-version: '${{ inputs.python-version }}' - name: Install poetry uses: abatilo/actions-poetry@v2 - name: Setup poetry virtual environment run: | poetry config virtualenvs.create true --local poetry config virtualenvs.in-project true --local shell: bash - uses: actions/cache/restore@v3 id: cache-restore name: Restore caches for the virtual environment based on poetry.lock with: path: ./.venv key: venv-${{ hashFiles('poetry.lock') }} - name: Install the project dependencies run: poetry install -E client -E server -E camel shell: bash - uses: actions/cache/save@v3 name: Save caches based on poetry.lock if: ${{ !steps.cache-restore.outputs.cache-hit }} with: path: ./.venv key: venv-${{ hashFiles('poetry.lock') }} ================================================ FILE: .github/workflows/documentation.yml ================================================ name: Build and deploy CRAB documents on: push: branches: [ "main" ] workflow_dispatch: permissions: contents: write jobs: docs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - name: Set up Python environment and install dependencies uses: ./.github/actions/crab_install with: python-version: "3.10" - name: Sphinx build run: | cd docs poetry run make html - name: Deploy uses: peaceiris/actions-gh-pages@v3 if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main'}} with: publish_branch: gh-pages github_token: ${{ secrets.GITHUB_TOKEN }} publish_dir: docs/_build/html/ force_orphan: true ================================================ FILE: .github/workflows/publish_release.yml ================================================ name: Publish CRAB to PyPI / GitHub on: push: tags: - "v*" workflow_dispatch: jobs: build-n-publish: name: Build and publish to PyPI runs-on: ubuntu-latest permissions: contents: write steps: - uses: actions/checkout@v3 - name: Build and publish to pypi uses: JRubics/poetry-publish@v1.17 with: pypi_token: ${{ secrets.PYPI_API_KEY }} ignore_dev_requirements: "yes" - name: Create GitHub Release id: create_release uses: actions/create-release@v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # This token is provided by Actions, you do not need to create your own token with: tag_name: ${{ github.ref }} release_name: ${{ github.ref }} draft: false prerelease: false - name: Get Asset name run: | export PKG=$(ls dist/ | grep tar) set -- $PKG echo "name=$1" >> $GITHUB_ENV - name: Upload Release Asset (sdist) to GitHub id: upload-release-asset uses: actions/upload-release-asset@v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: upload_url: ${{ steps.create_release.outputs.upload_url }} asset_path: dist/${{ env.name }} asset_name: ${{ env.name }} asset_content_type: application/zip ================================================ FILE: .github/workflows/pytest_package.yml ================================================ # This workflow will install Python dependencies, run tests # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python name: Pytest CRAB package on: push jobs: pytest: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - name: Set up Python environment and install dependencies uses: ./.github/actions/crab_install with: python-version: "3.10" - name: Run pytest run: poetry run pytest test/ ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST .vagrant/* # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ cover/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation # docs/_build/ # PyBuilder .pybuilder/ target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv # For a library or package, you might want to ignore these files since the code is # intended to run in multiple environments; otherwise, check them in: # .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # poetry # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. # This is especially recommended for binary packages to ensure reproducibility, and is more # commonly ignored for libraries. # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control #poetry.lock # pdm # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. #pdm.lock # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it # in version control. # https://pdm.fming.dev/#use-with-ide .pdm.toml # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # pytype static type analyzer .pytype/ # Cython debug symbols cython_debug/ # PyCharm # JetBrains specific template is maintained in a separate JetBrains.gitignore that can # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. .idea/ .vscode/ .python-version _build/ # model parameter *.pth logs/ .DS_Store ================================================ FILE: .pre-commit-config.yaml ================================================ repos: - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. rev: v0.6.5 hooks: # Run the linter. - id: ruff # Run the formatter. - id: ruff-format - repo: local hooks: - id: check-license name: Check License entry: python licenses/update_license.py . licenses/license_template.txt language: system types: [python] ================================================ FILE: README.md ================================================ # 🦀 CRAB: Cross-platform Agent Benchmark for Multimodal Embodied Language Model Agents [![arXiv][arxiv-image]][arxiv-url] [![Slack][slack-image]][slack-url] [![Discord][discord-image]][discord-url] [![Wechat][wechat-image]][wechat-url] [![Twitter][twitter-image]][twitter-url]

Documentation | Website & Demos | Blog | Chinese Blog | CAMEL-AI

## Overview CRAB is a framework for building LLM agent benchmark environments in a Python-centric way. #### Key Features 🌐 Cross-platform and Multi-environment * Create build agent environments that support various deployment options including in-memory, Docker-hosted, virtual machines, or distributed physical machines, provided they are accessible via Python functions. * Let the agent access all the environments in the same time through a unified interface. ⚙ ️Easy-to-use Configuration * Add a new action by simply adding a `@action` decorator on a Python function. * Define the environment by integrating several actions together. 📐 Novel Benchmarking Suite * Define tasks and the corresponding evaluators in an intuitive Python-native way. * Introduce a novel graph evaluator method providing fine-grained metrics. ## Installation #### Prerequisites - Python 3.10 or newer ```bash pip install crab-framework[client] ``` ## Experiment on CRAB-Benchmark-v0 All datasets and experiment code are in [crab-benchmark-v0](./crab-benchmark-v0/) directory. Please carefully read the [benchmark tutorial](./crab-benchmark-v0/README.md) before using our benchmark. ## Examples #### Run template environment with openai agent ```bash export OPENAI_API_KEY= python examples/single_env.py python examples/multi_env.py ``` ## Demo Video [![demo_video](https://i.ytimg.com/vi_webp/PNqrHNQlU6I/maxresdefault.webp)](https://www.youtube.com/watch?v=PNqrHNQlU6I&ab_channel=CamelAI) ## Cite Please cite [our paper](https://arxiv.org/abs/2407.01511) if you use anything related in your work: ``` @misc{xu2024crab, title={CRAB: Cross-environment Agent Benchmark for Multimodal Language Model Agents}, author={Tianqi Xu and Linyao Chen and Dai-Jie Wu and Yanjun Chen and Zecheng Zhang and Xiang Yao and Zhiqiang Xie and Yongchao Chen and Shilong Liu and Bochen Qian and Philip Torr and Bernard Ghanem and Guohao Li}, year={2024}, eprint={2407.01511}, archivePrefix={arXiv}, primaryClass={cs.AI}, url={https://arxiv.org/abs/2407.01511}, } ``` ## Community Join us ([*Discord*](https://discord.camel-ai.org/) or [*WeChat*](https://ghli.org/camel/wechat.png)) in pushing the boundaries of finding the scaling laws of agents. - **WeChat Community:** Scan the QR code below to join our WeChat community.
WeChat QR Code

[slack-url]: https://join.slack.com/t/camel-kwr1314/shared_invite/zt-1vy8u9lbo-ZQmhIAyWSEfSwLCl2r2eKA [slack-image]: https://img.shields.io/badge/Slack-CAMEL--AI-blueviolet?logo=slack [discord-url]: https://discord.gg/CNcNpquyDc [discord-image]: https://img.shields.io/badge/Discord-CAMEL--AI-7289da?logo=discord&logoColor=white&color=7289da [wechat-url]: https://ghli.org/camel/wechat.png [wechat-image]: https://img.shields.io/badge/WeChat-CamelAIOrg-brightgreen?logo=wechat&logoColor=white [twitter-url]: https://twitter.com/CamelAIOrg [twitter-image]: https://img.shields.io/twitter/follow/CamelAIOrg?style=social&color=brightgreen&logo=twitter [arxiv-image]: https://img.shields.io/badge/arXiv-2407.01511-b31b1b.svg [arxiv-url]: https://arxiv.org/abs/2407.01511 ================================================ FILE: crab/__init__.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # ruff: noqa: F403 from .core import * __version__ = "0.1.2" ================================================ FILE: crab/actions/android_actions.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import base64 import subprocess from enum import Enum from time import sleep from crab import action from .crab_actions import get_element_position def execute_adb(adb_command: str, env=None): if env.device is None: adb_command = "adb " + adb_command else: adb_command = f"adb -s {env.device} " + adb_command result = subprocess.run( adb_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, ) if result.returncode == 0: return result.stdout.strip() print(f"Command execution failed: {adb_command}") print(result.stderr) return "ERROR" def get_device_size(env): adb_command = "shell wm size" result = execute_adb(adb_command, env) if result != "ERROR": return map(int, result.split(": ")[1].split("x")) return 0, 0 _DURATION = 1.5 @action def setup(env) -> None: env.width, env.height = get_device_size(env) @action def screenshot(env) -> str: """ Get the current screenshot of phone screen. """ if env.device is not None: command = f"adb -s {env.device} exec-out screencap -p" else: command = "adb exec-out screencap -p" result = subprocess.run( command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, ) return base64.b64encode(result.stdout).decode("utf-8") @action def tap(element: int, env) -> None: """ Tap an UI element shown on the smartphone screen. A simple use case can be tap(5), which taps the UI element labeled with the number 5. Args: element: A numeric tag assigned to an UI element shown on the smartphone screen. """ x, y = get_element_position(element, env) execute_adb(f"shell input tap {x} {y}", env) sleep(_DURATION) @action def long_tap(element: int, env) -> None: """ Press and hold a UI element on the smartphone screen for 1 second, typically to access additional menu options. For example, the command long_tap(5) simulates a long press on the UI element labeled with the number 5. Args: element: A numeric tag assigned to an UI element shown on the smartphone screen. """ x, y = get_element_position(element, env) adb_command = f"shell input swipe {x} {y} {x} {y} 1000" execute_adb(adb_command, env) sleep(_DURATION) class SwipeDirection(str, Enum): RIGHT = "right" LEFT = "left" UP = "up" DOWN = "down" class SwipeDist(str, Enum): SHORT = "short" MEDIUM = "medium" LONG = "long" @action def swipe(element: int, direction: SwipeDirection, dist: SwipeDist, env) -> None: """ This function is used to swipe an UI element shown on the smartphone screen, usually a scroll view or a slide bar. You should choose the appropriate direction and distance option according to your need. A simple use case can be swipe(21, "up", "medium"), which swipes up the UI element labeled with the number 21 for a medium distance. Args: element: is a numeric tag assigned to an UI element shown on the smartphone screen. direction: is a string that represents the swipe direction dist: determines the distance of the swipe. """ x, y = get_element_position(element, env) unit_dist = int(env.width / 10) if dist == "long": unit_dist *= 3 elif dist == "medium": unit_dist *= 2 if direction == "up": offset = 0, -2 * unit_dist elif direction == "down": offset = 0, 2 * unit_dist elif direction == "left": offset = -1 * unit_dist, 0 elif direction == "right": offset = unit_dist, 0 else: return "ERROR" adb_command = f"shell input swipe {x} {y} {x + offset[0]} {y + offset[1]} 200" execute_adb(adb_command, env) sleep(_DURATION) @action def open_app_drawer(env) -> None: """Open app drawer to list all the installed applications in this phone. For exmaple: you want to open "Messages" application, but you don't know where to find it, you can call "open_app_drawer()" and you will see all the installed applications through screenshot. """ execute_adb("shell input keyevent KEYCODE_HOME", env) sleep(0.5) execute_adb("shell input swipe 800 2000 800 100 500", env) sleep(_DURATION) class AndroidKey(str, Enum): HOME = "home" BACK = "back" @action def key_press(key: AndroidKey, env): """ Press Android keys. press("home") to go back to main screen. press("back") to return to the preivous page. Args: key (str): The pressed key. """ if key == AndroidKey.HOME: adb_command = "shell input keyevent KEYCODE_HOME" elif key == AndroidKey.BACK: adb_command = "shell input keyevent KEYCODE_BACK" else: raise ValueError("Unsupported key") execute_adb(adb_command, env) sleep(_DURATION) @action def write_text(text: str, env) -> None: """ Typing the specified text. Args: text (str): The text to be typed. """ text = text.replace(" ", "%s") text = text.replace("'", "") adb_command = f"shell input text {text}" execute_adb(adb_command, env) sleep(_DURATION) @action def stop_all_apps(env) -> None: """ Stop all running apps. """ execute_adb("shell input keyevent KEYCODE_HOME", env) execute_adb("shell input keyevent KEYCODE_APP_SWITCH", env) sleep(0.5) command = ( f"shell input swipe 100 {env.height / 2} {env.width - 100} {env.height / 2} 200" ) execute_adb(command, env) sleep(0.5) execute_adb("shell input tap 300 1400", env) sleep(_DURATION) ================================================ FILE: crab/actions/crab_actions.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== from time import sleep from crab import action, evaluator @action(env_name="root") def submit(content: str) -> None: """Submit your answer through this action. For exmaple, if you are required to submit a word "apple", you can use submit(content="apple"). Args: content: the content to submit """ pass @evaluator(env_name="root") def check_submit(text: str, env) -> bool: if env.trajectory: action_name, params, _ = env.trajectory[-1] if action_name == "submit" and text in params["content"]: return True return False @action(env_name="root") def complete() -> bool: """When you think the task is completed, use this action to notify the system. For exmaple, if you successfully complete the task, you can use complete(). """ pass @action(env_name="root") def wait() -> bool: """If the environment is still processing your action and you have nothing to do in this step, you can use wait(). """ sleep(5) def get_element_position(element_id, env): """Get element position provided by function `zs_object_detection`""" box = env.element_position_map[element_id] x = (box[0] + box[2]) / 2 y = (box[1] + box[3]) / 2 return round(x), round(y) ================================================ FILE: crab/actions/desktop_actions.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import base64 import time from enum import Enum import pyautogui from mss import mss, tools from crab import action from .crab_actions import get_element_position DURATION = 0.8 DELAY = 1.0 @action def click_position(x: int, y: int) -> None: """ click on the current desktop screen. Args: x: The X coordinate, as a floating-point number in the range [0.0, 1.0]. y: The Y coordinate, as a floating-point number in the range [0.0, 1.0]. """ pyautogui.click(x, y, duration=DURATION) time.sleep(DELAY) @action(local=True) def click(element: int, env) -> None: """ Click an UI element shown on the desktop screen. A simple use case can be click(5), which clicks the UI element labeled with the number 5. Args: element: A numeric tag assigned to an UI element shown on the screenshot. """ x, y = get_element_position(element, env) env._action_endpoint(click_position, {"x": x, "y": y}) @action def right_click_position(x: int, y: int) -> None: """ right-click on the current desktop screen. Args: x: The X coordinate, as a floating-point number in the range [0.0, 1.0]. y: The Y coordinate, as a floating-point number in the range [0.0, 1.0]. """ pyautogui.click(x, y, duration=DURATION, button="right") @action(local=True) def right_click(element: int, env) -> None: """ Right-click an UI element shown on the desktop screen using the mouse, which is usually used for opening the menu of the element. A simple use case can be right_click(5), which right-clicks the UI element labeled with the number 5 to open up menu on it. Args: element: A numeric tag assigned to an UI element shown on the screenshot. """ x, y = get_element_position(element, env) env._action_endpoint(right_click_position, {"x": x, "y": y}) time.sleep(DELAY) @action def double_click_position(x: int, y: int) -> None: """ Double-click on the current desktop screen. Args: x: The X coordinate, as a floating-point number in the range [0.0, 1.0]. y: The Y coordinate, as a floating-point number in the range [0.0, 1.0]. """ pyautogui.click(x, y, duration=DURATION, clicks=2, interval=0.2) @action(local=True) def double_click(element: int, env) -> None: """ Double-click an UI element shown on the desktop screen using the mouse, which is usually used for opening a folder or a file. A simple use case can be double_click(5), which double-clicks the UI element labeled with the number 5 to open it. Args: element: A numeric tag assigned to an UI element shown on the screenshot. """ x, y = get_element_position(element, env) env._action_endpoint(double_click_position, {"x": x, "y": y}) time.sleep(DELAY) @action def mouse_scroll(click: int = 1) -> None: """ Performs a scroll of the mouse scroll wheel. Args: click(int): The amount of scrolling. Default to 1. """ pyautogui.scroll(click) time.sleep(DELAY) class KeyEnum(str, Enum): KEY_TAB = "\t" KEY_LB = "\n" KEY_RR = "\r" KEY_SPACE = " " KEY_EXCLAMATION = "!" KEY_DQUOTE = '"' KEY_SHARP = "#" KEY_DOLLAR = "$" KEY_PER = "%" KEY_AND = "&" KEY_SQUOTE = "'" KEY_LPAR = "(" KEY_RPAR = ")" KEY_MUL = "*" KEY_ADD = "+" KEY_COMMA = "," KEY_MIN = "-" KEY_DOT = "." KEY_SLASH = "/" KEY_0 = "0" KEY_1 = "1" KEY_2 = "2" KEY_3 = "3" KEY_4 = "4" KEY_5 = "5" KEY_6 = "6" KEY_7 = "7" KEY_8 = "8" KEY_9 = "9" KEY_COL = ":" KEY_SEMICOL = ";" KET_LT = "<" KEY_EQUAL = "=" KEY_GT = ">" KEY_QM = "?" KEY_AT = "@" KEY_LBRA = "[" KEY_RSLASH = "\\" KEY_RBRA = "]" KEY_CARET = "^" KEY_UNDERLINE = "_" KEY_BACKTICK = "`" KEY_LBRACE = "{" KEY_RBRACE = "}" KEY_PIPE = "|" KEY_TLIDE = "~" KEY_A = "a" KEY_B = "b" KEY_C = "c" KEY_D = "d" KEY_E = "e" KEY_F = "f" KEY_G = "g" KEY_H = "h" KEY_I = "i" KEY_J = "j" KEY_K = "k" KEY_L = "l" KEY_M = "m" KEY_N = "n" KEY_O = "o" KEY_P = "p" KEY_Q = "q" KEY_R = "r" KEY_S = "s" KEY_T = "t" KEY_U = "u" KEY_V = "v" KEY_W = "w" KEY_X = "x" KEY_Y = "y" KEY_Z = "z" KEY_ALT = "alt" KEY_SHIFT = "shift" KEY_CTRL = "ctrl" KEY_WIN = "win" KEY_BACKSPACE = "backspace" KEY_ENTER = "enter" KEY_ESC = "esc" KEY_F1 = "f1" KEY_F2 = "f2" KEY_F3 = "f3" KEY_F4 = "f4" KEY_F5 = "f5" KEY_F6 = "f6" KEY_F7 = "f7" KEY_F8 = "f8" KEY_F9 = "f9" KEY_F10 = "f10" KEY_F11 = "f11" KEY_F12 = "f12" KEY_LEFT = "left" KEY_UP = "up" KEY_RIGHT = "right" KEY_DOWN = "down" @action def key_press(key: KeyEnum) -> None: """ Performs a keyboard key press down, followed by a release. Args: key (str): The key to be pressed. """ if isinstance(key, KeyEnum): pyautogui.press(key.value) else: pyautogui.press(key) time.sleep(DELAY) @action def press_hotkey(keys: list[KeyEnum]) -> None: """ Press multiple keyboard keys at the same time. For exmaple, if you want to use Ctrl-C hoykey to copy the selected text, you can call press_hotkey(keys=["ctrl", "c"]). Args: key (str): The key to be pressed. """ if isinstance(keys[0], KeyEnum): keys = [key.value for key in keys] pyautogui.hotkey(*keys) time.sleep(DELAY) @action def write_text(text: str) -> None: """ Typing the specified text. Note: This function does not move the mouse cursor. Ensure the cursor focuses in the correct text input field before calling this function. Args: text (str): The text to be typed. """ pyautogui.write(text, interval=0.03) time.sleep(DELAY) @action def search_application(name: str) -> None: """ Search an application name. For exmaple, if you want to open an application named "slack", you can call search_application(name="slack"). You MUST use this action to search for applications. Args: name: the application name. """ pyautogui.press("esc") time.sleep(DELAY) pyautogui.hotkey("win", "a") time.sleep(DELAY) pyautogui.write(name) time.sleep(DELAY) @action def screenshot() -> str: "Get the current screenshot." with mss() as sct: # Get raw pixels from the screen sct_img = sct.grab(sct.monitors[1]) # Create the Image png = tools.to_png(sct_img.rgb, sct_img.size) base64_img = base64.b64encode(png).decode("utf-8") return base64_img ================================================ FILE: crab/actions/file_actions.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import base64 from io import BytesIO from PIL import Image from crab.core import action @action def save_base64_image(image: str, path: str = "image.png") -> None: image = Image.open(BytesIO(base64.b64decode(image))) image.save(path) ================================================ FILE: crab/actions/system_actions.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import subprocess from time import sleep from crab.core.decorators import action @action def delay(time: float) -> None: sleep(time) @action def run_bash_command(command: str) -> str: """ Run a command using bash shell. You can use this command to open any application by their name. Args: command: The commmand to be run. Return: stdout and stderr """ p = subprocess.run(["bash", command], capture_output=True) return f'stdout: "{p.stdout}"\nstderr: "{p.stderr}"' ================================================ FILE: crab/actions/visual_prompt_actions.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import logging from functools import cache from typing import Literal from PIL import Image, ImageDraw, ImageFont from crab import action from crab.utils.common import base64_to_image, image_to_base64 logger = logging.getLogger(__name__) try: import easyocr import numpy as np import torch from transformers import ( AutoProcessor, GroundingDinoForObjectDetection, GroundingDinoProcessor, ) device = "cuda" if torch.cuda.is_available() else "cpu" TRANSFORMERS_ENABLE = True except ImportError: TRANSFORMERS_ENABLE = False BoxType = tuple[int, int, int, int] AnnotatedBoxType = tuple[BoxType, str | None] def check_transformers_import() -> None: if not TRANSFORMERS_ENABLE: raise ImportError( "Please install the required dependencies to use this function by running" " `pip install crab-framework[client]`" ) def _calculate_iou(box1: BoxType, box2: BoxType) -> float: xA = max(box1[0], box2[0]) yA = max(box1[1], box2[1]) xB = min(box1[2], box2[2]) yB = min(box1[3], box2[3]) interArea = max(0, xB - xA) * max(0, yB - yA) box1Area = (box1[2] - box1[0]) * (box1[3] - box1[1]) box2Area = (box2[2] - box2[0]) * (box2[3] - box2[1]) unionArea = box1Area + box2Area - interArea iou = interArea / unionArea return iou def _calculate_center(box: BoxType) -> tuple[int, int]: return (box[0] + box[2]) / 2, (box[1] + box[3]) / 2 def _remove_invalid_boxes( boxes_with_label: AnnotatedBoxType, width: int, height: int ) -> AnnotatedBoxType: boxes = [box[0] for box in boxes_with_label] boxes_to_remove = set() for idx, box in enumerate(boxes): if box[0] < 0 or box[1] < 0 or box[2] > width or box[3] > height: boxes_to_remove.add(idx) continue if box[0] >= box[2] or box[1] >= box[3]: boxes_to_remove.add(idx) continue boxes_filt = [ box for idx, box in enumerate(boxes_with_label) if idx not in boxes_to_remove ] return boxes_filt def _filter_boxes_by_center( boxes_with_label: list[AnnotatedBoxType], center_dis_thresh: float ) -> list[AnnotatedBoxType]: boxes = [box[0] for box in boxes_with_label] boxes_to_remove = set() for i in range(len(boxes)): if i in boxes_to_remove: continue center_i = _calculate_center(boxes[i]) for j in range(i + 1, len(boxes)): center_j = _calculate_center(boxes[j]) # fmt: off center_close = ((center_i[0] - center_j[0]) ** 2 + (center_i[1] - center_j[1]) ** 2 < center_dis_thresh**2) # fmt: on if center_close: boxes_to_remove.add(j) boxes_filt = [ box for idx, box in enumerate(boxes_with_label) if idx not in boxes_to_remove ] return boxes_filt def _box_a_in_b(a: BoxType, b: BoxType) -> bool: return a[0] >= b[0] and a[1] >= b[1] and a[2] <= b[2] and a[3] <= b[3] def _filter_boxes_by_overlap( boxes_with_label: list[AnnotatedBoxType], ) -> list[AnnotatedBoxType]: boxes = [box[0] for box in boxes_with_label] boxes_to_remove = set() for i in range(len(boxes)): if i in boxes_to_remove: continue for j in range(len(boxes)): if i != j and _box_a_in_b(boxes[i], boxes[j]): boxes_to_remove.add(j) boxes_filt = [ box for idx, box in enumerate(boxes_with_label) if idx not in boxes_to_remove ] return boxes_filt def _filter_boxes_by_iou( boxes_with_label: list[AnnotatedBoxType], iou_threshold=0.5 ) -> list[AnnotatedBoxType]: boxes = [box[0] for box in boxes_with_label] boxes_to_remove = set() for i in range(len(boxes)): if i in boxes_to_remove: continue for j in range(i + 1, len(boxes)): iou = _calculate_iou(boxes[i], boxes[j]) if iou >= iou_threshold: boxes_to_remove.add(j) boxes_filt = [ box for idx, box in enumerate(boxes_with_label) if idx not in boxes_to_remove ] return boxes_filt def _draw_boxes( image: Image.Image, boxes: list[BoxType], font_size: int = 30, ) -> None: draw = ImageDraw.Draw(image) for idx, box in enumerate(boxes): color = tuple(np.random.randint(64, 191, size=3).tolist()) font = ImageFont.load_default(font_size) center = _calculate_center(box) draw.rectangle([box[0], box[1], box[2], box[3]], outline=color, width=2) if hasattr(font, "getbbox"): _, _, w, h = draw.textbbox((0, 0), str(idx), font) else: w, h = draw.textsize(str(idx), font) if box[0] >= w: bbox = ( round(box[0] - w), round(center[1] - h / 2), round(box[0]), round(center[1] + h / 2), ) else: bbox = ( round(box[2]), round(center[1] - h / 2), round(box[2] + w), round(center[1] + h / 2), ) draw.rectangle(bbox, fill=color) draw.text((bbox[0], bbox[1]), str(idx), fill="white", font=font) @cache def _get_grounding_dino_model( type: Literal["tiny", "base"] = "tiny", ) -> tuple[GroundingDinoProcessor, GroundingDinoForObjectDetection]: """Get the grounding dino model. Args: type: The version of the Gounding Dino Model. Returns: A tuple (processor, model). """ model_name = f"IDEA-Research/grounding-dino-{type}" processor = AutoProcessor.from_pretrained(model_name) model = GroundingDinoForObjectDetection.from_pretrained(model_name).to(device) return processor, model @cache def _get_easyocr_model() -> easyocr.Reader: return easyocr.Reader(["en"]) def get_groundingdino_boxes( images: Image.Image | list[Image.Image], text_prompt: str, box_threshold: float = 0.05, text_threshold: float = 0.5, ) -> list[list[AnnotatedBoxType]]: """Get the bounding boxes of the objects in the image using GroundingDino. Args: images: The image or list of images. text_prompt: The text prompt to use for all the images. box_threshold: The box threshold. text_threshold: The text threshold. Returns: The first level list is for each image, and the second level list contains tuples (detected boxes, its sementical representation) as the result of the image. """ processor, model = _get_grounding_dino_model() if isinstance(images, Image.Image): images = [images] image_number = len(images) images = [image.convert("RGB") for image in images] inputs = processor( images=images, text=[text_prompt] * image_number, return_tensors="pt", ).to(device) with torch.no_grad(): outputs = model(**inputs) target_sizes = [image.size[::-1] for image in images] detection_results = processor.post_process_grounded_object_detection( outputs, inputs.input_ids, box_threshold=box_threshold, text_threshold=text_threshold, target_sizes=target_sizes, ) final_output = [] for result in detection_results: boxes = result["boxes"].cpu().int().tolist() labels = result["labels"] final_output.append(list(zip(boxes, labels))) return final_output def get_easyocr_boxes( image: Image.Image, ) -> list[AnnotatedBoxType]: """Get the bounding boxes of the text in the image using EasyOCR. Args: image: The taget image. Returns: The list of tuple of bounding boxes and their corresponding text. """ reader = _get_easyocr_model() result = reader.readtext(np.array(image), text_threshold=0.9) boxes = [] for detect in result: boxes.append( ( ( detect[0][0][0], detect[0][0][1], detect[0][2][0], detect[0][2][1], ), detect[1], ) ) return boxes @action(local=True) def groundingdino_easyocr( input_base64_image: str, font_size: int, env, ) -> tuple[str, list[AnnotatedBoxType]]: """Get the interative elements in the image. Using GroundingDino and EasyOCR to detect the interactive elements in the image. Mark the detected elements with bounding boxes and labels. Store the labels and boxes in the environment to be used in other actions. Args: input_base64_image: The base64 encoded image. font_size: The font size of the label. Returns: A tuple (base64_image, boxes), where base64_image is the base64 encoded image drawn with bounding boxes and labels, and box is the list of detected boxes and labels. """ check_transformers_import() image = base64_to_image(input_base64_image) od_boxes = get_groundingdino_boxes(image, "icon . logo .", box_threshold=0.02)[0] od_boxes = _filter_boxes_by_iou(od_boxes, iou_threshold=0.5) ocr_boxes = get_easyocr_boxes(image) boxes_with_label = ocr_boxes + od_boxes filtered_boxes = _remove_invalid_boxes(boxes_with_label, image.width, image.height) filtered_boxes = _filter_boxes_by_overlap(filtered_boxes) center_dis = round(max(image.height, image.width) / 80.0) filtered_boxes = _filter_boxes_by_center(filtered_boxes, center_dis) env.element_label_map = [box[1] for box in filtered_boxes] result_boxes = [box[0] for box in filtered_boxes] _draw_boxes(image, result_boxes, font_size) env.element_position_map = result_boxes env.ocr_results = "".join([box[1] for box in ocr_boxes]) return image_to_base64(image), filtered_boxes @action(local=True) def get_elements_prompt( input: tuple[str, list[AnnotatedBoxType]], env ) -> tuple[str, str]: """Get the text prompt passing to the agent for the image. Args: input: The base64 encoded image and the list of detected boxes and labels. Returns: A tuple (image, prompt) contains the base64 encoded image and the prompt. """ image, boxes = input labels = "" for id, box in enumerate(boxes): if box[1] is not None: labels += f"[{id}|{box[1]}]\n" prompt = ( "Some elements in the current screenshot have labels. I will give you " "these labels by [id|label].\n" + labels ) return image, prompt ================================================ FILE: crab/agents/backend_models/__init__.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # ruff: noqa: F401 from typing import Any, Literal from pydantic import BaseModel from crab.core.backend_model import BackendModel from .camel_model import CamelModel from .claude_model import ClaudeModel from .gemini_model import GeminiModel from .openai_model import OpenAIModel, OpenAIModelJSON, SGlangOpenAIModelJSON class BackendModelConfig(BaseModel): model_class: Literal["openai", "claude", "gemini", "camel", "sglang"] """Specify the model class to be used. Different model classese use different APIs. """ model_name: str """Specify the model name to be used. This value is directly passed to the API, check model provider API documentation for more details. """ model_platform: str | None = None """Required for CamelModel. Otherwise, it is ignored. Please check CAMEL documentation for more details. """ history_messages_len: int = 0 """Number of rounds of previous messages to be used in the model input. 0 means no history. """ parameters: dict[str, Any] = {} """Additional parameters to be passed to the model.""" json_structre_output: bool = False """If True, the model generate action through JSON without using "tool call" or "function call". SGLang model only supports JSON output. OpenAI model supports both. Other models do not support JSON output. """ tool_call_required: bool = True """Specify if the model enforce each round to generate tool/function calls.""" base_url: str | None = None """Specify the base URL of the API. Only used in OpenAI and SGLang currently.""" api_key: str | None = None """Specify the API key to be used. Only used in OpenAI and SGLang currently.""" def create_backend_model(model_config: BackendModelConfig) -> BackendModel: match model_config.model_class: case "claude": if model_config.base_url is not None or model_config.api_key is not None: raise Warning( "base_url and api_key are not supported for ClaudeModel currently." ) if model_config.json_structre_output: raise Warning( "json_structre_output is not supported for ClaudeModel currently." ) return ClaudeModel( model=model_config.model_name, parameters=model_config.parameters, history_messages_len=model_config.history_messages_len, tool_call_required=model_config.tool_call_required, ) case "gemini": if model_config.base_url is not None or model_config.api_key is not None: raise Warning( "base_url and api_key are not supported for GeminiModel currently." ) if model_config.json_structre_output: raise Warning( "json_structre_output is not supported for GeminiModel currently." ) return GeminiModel( model=model_config.model_name, parameters=model_config.parameters, history_messages_len=model_config.history_messages_len, tool_call_required=model_config.tool_call_required, ) case "openai": if not model_config.json_structre_output: return OpenAIModel( model=model_config.model_name, parameters=model_config.parameters, history_messages_len=model_config.history_messages_len, base_url=model_config.base_url, api_key=model_config.api_key, tool_call_required=model_config.tool_call_required, ) else: return OpenAIModelJSON( model=model_config.model_name, parameters=model_config.parameters, history_messages_len=model_config.history_messages_len, base_url=model_config.base_url, api_key=model_config.api_key, ) case "sglang": return SGlangOpenAIModelJSON( model=model_config.model_name, parameters=model_config.parameters, history_messages_len=model_config.history_messages_len, base_url=model_config.base_url, api_key=model_config.api_key, ) case "camel": return CamelModel( model=model_config.model_name, model_platform=model_config.model_platform, parameters=model_config.parameters, history_messages_len=model_config.history_messages_len, tool_call_required=model_config.tool_call_required, ) case _: raise ValueError(f"Unsupported model name: {model_config.model_name}") ================================================ FILE: crab/agents/backend_models/camel_model.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import json from typing import Any from openai.types.chat import ChatCompletionMessageToolCall from PIL import Image from crab import Action, ActionOutput, BackendModel, BackendOutput, MessageType from crab.utils.common import base64_to_image try: from camel.agents import ChatAgent from camel.messages import BaseMessage from camel.models import ModelFactory from camel.toolkits import OpenAIFunction from camel.types.enums import ModelPlatformType, ModelType CAMEL_ENABLED = True except ImportError: CAMEL_ENABLED = False def _get_model_platform_type(model_platform_name: str) -> "ModelPlatformType": try: return ModelPlatformType(model_platform_name) except ValueError: all_models = [platform.value for platform in ModelPlatformType] raise ValueError( f"Model {model_platform_name} not found. Supported models are {all_models}" ) def _get_model_type(model_name: str) -> "str | ModelType": try: return ModelType(model_name) except ValueError: return model_name def _convert_action_to_schema( action_space: list[Action] | None, ) -> "list[OpenAIFunction] | None": if action_space is None: return None schema_list = [] for action in action_space: new_action = action.to_openai_json_schema() schema = {"type": "function", "function": new_action} schema_list.append(OpenAIFunction(action.entry, schema)) return schema_list def _convert_tool_calls_to_action_list( tool_calls: list[ChatCompletionMessageToolCall] | None, ) -> list[ActionOutput] | None: if tool_calls is None: return None return [ ActionOutput( name=call.function.name, arguments=json.loads(call.function.arguments), ) for call in tool_calls ] class CamelModel(BackendModel): def __init__( self, model: str, model_platform: str, parameters: dict[str, Any] | None = None, history_messages_len: int = 0, tool_call_required: bool = True, ) -> None: if not CAMEL_ENABLED: raise ImportError("Please install camel-ai to use CamelModel") self.model = model self.parameters = parameters if parameters is not None else {} self.history_messages_len = history_messages_len self.model_type = _get_model_type(model) self.model_platform_type = _get_model_platform_type(model_platform) self.client: ChatAgent | None = None self.token_usage = 0 self.tool_call_required = tool_call_required self.history_messages_len = history_messages_len def get_token_usage(self) -> int: return self.token_usage def reset(self, system_message: str, action_space: list[Action] | None) -> None: action_schema = _convert_action_to_schema(action_space) config = self.parameters.copy() if action_schema is not None: config["tool_choice"] = "required" if self.tool_call_required else "auto" config["tools"] = [ schema.get_openai_tool_schema() for schema in action_schema ] backend_model = ModelFactory.create( self.model_platform_type, self.model_type, model_config_dict=config, ) sysmsg = BaseMessage.make_assistant_message( role_name="Assistant", content=system_message, ) self.client = ChatAgent( model=backend_model, system_message=sysmsg, external_tools=action_schema, message_window_size=self.history_messages_len, ) self.token_usage = 0 def chat(self, messages: list[tuple[str, MessageType]]) -> BackendOutput: # TODO: handle multiple text messages after message refactoring image_list: list[Image.Image] = [] content = "" for message in messages: if message[1] == MessageType.IMAGE_JPG_BASE64: image = base64_to_image(message[0]) image_list.append(image) else: content = message[0] usermsg = BaseMessage.make_user_message( role_name="User", content=content, image_list=image_list, ) response = self.client.step(usermsg) self.token_usage += response.info["usage"]["total_tokens"] tool_call_request = response.info.get("external_tool_request") return BackendOutput( message=response.msg.content, action_list=_convert_tool_calls_to_action_list([tool_call_request]), ) ================================================ FILE: crab/agents/backend_models/claude_model.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== from copy import deepcopy from typing import Any from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed from crab import Action, ActionOutput, BackendModel, BackendOutput, Message, MessageType try: import anthropic from anthropic.types import TextBlock, ToolUseBlock anthropic_model_enable = True except ImportError: anthropic_model_enable = False class ClaudeModel(BackendModel): def __init__( self, model: str, parameters: dict[str, Any] | None = None, history_messages_len: int = 0, tool_call_required: bool = True, ) -> None: if anthropic_model_enable is False: raise ImportError("Please install anthropic to use ClaudeModel") self.model = model self.parameters = parameters if parameters is not None else {} self.history_messages_len = history_messages_len assert self.history_messages_len >= 0 self.client = anthropic.Anthropic() self.tool_call_required: bool = tool_call_required self.system_message: str = "You are a helpful assistant." self.action_space: list[Action] | None = None self.action_schema: list[dict] | None = None self.token_usage: int = 0 self.chat_history: list[list[dict]] = [] self.support_tool_call = True def reset(self, system_message: str, action_space: list[Action] | None) -> None: self.system_message = system_message self.action_space = action_space self.action_schema = _convert_action_to_schema(self.action_space) self.token_usage = 0 self.chat_history = [] def chat(self, message: list[Message] | Message) -> BackendOutput: if isinstance(message, tuple): message = [message] request = self._fetch_from_memory() new_message = self._construct_new_message(message) request.append(new_message) response_message = self._call_api(request) self._record_message(new_message, response_message) return self._generate_backend_output(response_message) def _construct_new_message(self, message: list[Message]) -> dict[str, Any]: parts: list[dict] = [] for content, msg_type in message: match msg_type: case MessageType.TEXT: parts.append( { "type": "text", "text": content, } ) case MessageType.IMAGE_JPG_BASE64: parts.append( { "type": "image", "source": { "data": content, "type": "base64", "media_type": "image/png", }, } ) return { "role": "user", "content": parts, } def _fetch_from_memory(self) -> list[dict]: request: list[dict] = [] if self.history_messages_len > 0: fetch_history_len = min(self.history_messages_len, len(self.chat_history)) for history_message in self.chat_history[-fetch_history_len:]: request = request + history_message return request def get_token_usage(self): return self.token_usage def _record_message( self, new_message: dict, response_message: anthropic.types.Message ) -> None: self.chat_history.append([new_message]) self.chat_history[-1].append( {"role": response_message.role, "content": response_message.content} ) if self.action_schema: tool_calls = response_message.content tool_content = [] for call in tool_calls: if isinstance(call, ToolUseBlock): tool_content.append( { "type": "tool_result", "tool_use_id": call.id, "content": "success", } ) self.chat_history[-1].append( { "role": "user", "content": tool_content, } ) @retry( wait=wait_fixed(10), stop=stop_after_attempt(7), retry=retry_if_exception_type( ( anthropic.APITimeoutError, anthropic.APIConnectionError, anthropic.InternalServerError, ) ), ) def _call_api(self, request_messages: list[dict]) -> anthropic.types.Message: request_messages = _merge_request(request_messages) if self.action_schema is not None: response = self.client.messages.create( system=self.system_message, # <-- system prompt messages=request_messages, # type: ignore model=self.model, max_tokens=4096, tools=self.action_schema, tool_choice={"type": "any" if self.tool_call_required else "auto"}, **self.parameters, ) else: response = self.client.messages.create( system=self.system_message, # <-- system prompt messages=request_messages, # type: ignore model=self.model, max_tokens=4096, **self.parameters, ) self.token_usage += response.usage.input_tokens + response.usage.output_tokens return response def _generate_backend_output( self, response_message: anthropic.types.Message ) -> BackendOutput: message = "" action_list = [] for block in response_message.content: if isinstance(block, TextBlock): message += block.text elif isinstance(block, ToolUseBlock): action_list.append( ActionOutput( name=block.name, arguments=block.input, # type: ignore ) ) if not action_list: return BackendOutput(message=message, action_list=None) else: return BackendOutput( message=message, action_list=action_list, ) def _merge_request(request: list[dict]) -> list[dict]: merge_request = [deepcopy(request[0])] for idx in range(1, len(request)): if request[idx]["role"] == merge_request[-1]["role"]: merge_request[-1]["content"].extend(request[idx]["content"]) else: merge_request.append(deepcopy(request[idx])) return merge_request def _convert_action_to_schema(action_space): if action_space is None: return None actions = [] for action in action_space: new_action = action.to_openai_json_schema() new_action["input_schema"] = new_action.pop("parameters") if "returns" in new_action: new_action.pop("returns") if "title" in new_action: new_action.pop("title") if "type" in new_action: new_action["input_schema"]["type"] = new_action.pop("type") if "required" in new_action: new_action["input_schema"]["required"] = new_action.pop("required") actions.append(new_action) return actions ================================================ FILE: crab/agents/backend_models/gemini_model.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import os from typing import Any from PIL.Image import Image from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed from crab import Action, ActionOutput, BackendModel, BackendOutput, Message, MessageType from crab.utils.common import base64_to_image, json_expand_refs try: import google.generativeai as genai from google.ai.generativelanguage_v1beta import ( Content, FunctionDeclaration, Part, Tool, ) from google.api_core.exceptions import ResourceExhausted from google.generativeai.types import content_types gemini_model_enable = True except ImportError: gemini_model_enable = False class GeminiModel(BackendModel): def __init__( self, model: str, parameters: dict[str, Any] | None = None, history_messages_len: int = 0, tool_call_required: bool = True, ) -> None: if gemini_model_enable is False: raise ImportError("Please install google.generativeai to use GeminiModel") self.model = model self.parameters = parameters if parameters is not None else {} self.history_messages_len = history_messages_len assert self.history_messages_len >= 0 genai.configure(api_key=os.environ["GEMINI_API_KEY"]) self.client = genai self.tool_call_required = tool_call_required self.system_message: str = "You are a helpful assistant." self.action_space: list[Action] | None = None self.action_schema: list[Tool] | None = None self.token_usage: int = 0 self.chat_history: list[list[dict]] = [] self.support_tool_call = True def reset(self, system_message: str, action_space: list[Action] | None) -> None: self.system_message = system_message self.action_space = action_space self.action_schema = _convert_action_to_schema(self.action_space) self.token_usage = 0 self.chat_history = [] def chat(self, message: list[Message] | Message) -> BackendOutput: if isinstance(message, tuple): message = [message] request = self._fetch_from_memory() new_message = self._construct_new_message(message) request.append(new_message) response_message = self._call_api(request) self._record_message(new_message, response_message) return self._generate_backend_output(response_message) def _construct_new_message(self, message: list[Message]) -> dict[str, Any]: parts: list[str | Image] = [] for content, msg_type in message: match msg_type: case MessageType.TEXT: parts.append(content) case MessageType.IMAGE_JPG_BASE64: parts.append(base64_to_image(content)) return { "role": "user", "parts": parts, } def _generate_backend_output(self, response_message: Content) -> BackendOutput: tool_calls: list[ActionOutput] = [] for part in response_message.parts: if "function_call" in Part.to_dict(part): call = Part.to_dict(part)["function_call"] tool_calls.append( ActionOutput( name=call["name"], arguments=call["args"], ) ) return BackendOutput( message=response_message.parts[0].text or None, action_list=tool_calls or None, ) def _fetch_from_memory(self) -> list[dict]: request: list[dict] = [] if self.history_messages_len > 0: fetch_history_len = min(self.history_messages_len, len(self.chat_history)) for history_message in self.chat_history[-fetch_history_len:]: request = request + history_message return request def get_token_usage(self): return self.token_usage def _record_message( self, new_message: dict[str, Any], response_message: Content ) -> None: self.chat_history.append([new_message]) self.chat_history[-1].append( {"role": response_message.role, "parts": response_message.parts} ) @retry( wait=wait_fixed(10), stop=stop_after_attempt(7), retry=retry_if_exception_type(ResourceExhausted), ) def _call_api(self, request_messages: list) -> Content: if self.action_schema is not None: tool_config = content_types.to_tool_config( { "function_calling_config": { "mode": "ANY" if self.tool_call_required else "AUTO" } } ) response = self.client.GenerativeModel( self.model, system_instruction=self.system_message ).generate_content( contents=request_messages, tools=self.action_schema, tool_config=tool_config, # **self.parameters, # TODO(Tianqi): Fix this line in the future ) else: response = self.client.GenerativeModel( self.model, system_instruction=self.system_message ).generate_content( contents=request_messages, # **self.parameters, # TODO(Tianqi): Fix this line in the future ) self.token_usage += response.candidates[0].token_count return response.candidates[0].content def _convert_action_to_schema(action_space: list[Action] | None) -> list[Tool] | None: if action_space is None: return None actions = [ Tool( function_declarations=[ _action_to_func_dec(action) for action in action_space ] ) ] return actions def _clear_schema(schema_dict: dict) -> None: schema_dict.pop("title", None) p_type = schema_dict.pop("type", None) for prop in schema_dict.get("properties", {}).values(): _clear_schema(prop) if p_type is not None: schema_dict["type_"] = p_type.upper() if "items" in schema_dict: _clear_schema(schema_dict["items"]) def _action_to_func_dec(action: Action) -> FunctionDeclaration: "Converts crab Action to google FunctionDeclaration" p_schema = action.parameters.model_json_schema() if "$defs" in p_schema: p_schema = json_expand_refs(p_schema) _clear_schema(p_schema) if not p_schema["properties"]: return FunctionDeclaration( name=action.name, description=action.description, ) return FunctionDeclaration( name=action.name, description=action.description, parameters=p_schema, ) ================================================ FILE: crab/agents/backend_models/openai_model.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import json from typing import Any from crab import Action, ActionOutput, BackendModel, BackendOutput, Message, MessageType from crab.agents.utils import extract_text_and_code_prompts try: import openai from openai.types.chat import ChatCompletionMessage openai_model_enable = True except ImportError: openai_model_enable = False class OpenAIModel(BackendModel): def __init__( self, model: str, parameters: dict[str, Any] | None = None, history_messages_len: int = 0, tool_call_required: bool = True, base_url: str | None = None, api_key: str | None = None, ) -> None: if not openai_model_enable: raise ImportError("Please install openai to use OpenAIModel") self.model = model self.parameters = parameters if parameters is not None else {} self.history_messages_len = history_messages_len assert self.history_messages_len >= 0 self.client = openai.OpenAI(api_key=api_key, base_url=base_url) self.tool_call_required: bool = tool_call_required self.system_message: str = "You are a helpful assistant." self.openai_system_message = { "role": "system", "content": self.system_message, } self.action_space: list[Action] | None = None self.action_schema: list[dict] | None = None self.token_usage: int = 0 self.chat_history: list[list[ChatCompletionMessage | dict]] = [] self.support_tool_call = True def reset(self, system_message: str, action_space: list[Action] | None) -> None: self.system_message = system_message self.openai_system_message = { "role": "system", "content": system_message, } self.action_space = action_space self.action_schema = _convert_action_to_schema(self.action_space) self.token_usage = 0 self.chat_history = [] def chat(self, message: list[Message] | Message) -> BackendOutput: if isinstance(message, tuple): message = [message] request = self._fetch_from_memory() new_message = self._construct_new_message(message) request.append(new_message) response_message = self._call_api(request) self._record_message(new_message, response_message) return self._generate_backend_output(response_message) def get_token_usage(self): return self.token_usage def _record_message( self, new_message: dict, response_message: ChatCompletionMessage ) -> None: self.chat_history.append([new_message]) self.chat_history[-1].append(response_message) if self.action_schema and response_message.tool_calls is not None: for tool_call in response_message.tool_calls: self.chat_history[-1].append( { "tool_call_id": tool_call.id, "role": "tool", "name": tool_call.function.name, "content": "success", } ) # extend conversation with function response def _call_api( self, request_messages: list[ChatCompletionMessage | dict] ) -> ChatCompletionMessage: if self.action_schema is not None: response = self.client.chat.completions.create( messages=request_messages, # type: ignore model=self.model, tools=self.action_schema, tool_choice="required" if self.tool_call_required else "auto", **self.parameters, ) else: response = self.client.chat.completions.create( messages=request_messages, # type: ignore model=self.model, **self.parameters, ) self.token_usage += response.usage.total_tokens return response.choices[0].message def _fetch_from_memory(self) -> list[ChatCompletionMessage | dict]: request: list[ChatCompletionMessage | dict] = [self.openai_system_message] if self.history_messages_len > 0: fetch_history_len = min(self.history_messages_len, len(self.chat_history)) for history_message in self.chat_history[-fetch_history_len:]: request = request + history_message return request def _construct_new_message(self, message: list[Message]) -> dict[str, Any]: new_message_content: list[dict[str, Any]] = [] for content, msg_type in message: match msg_type: case MessageType.TEXT: new_message_content.append( { "type": "text", "text": content, } ) case MessageType.IMAGE_JPG_BASE64: new_message_content.append( { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{content}", "detail": "high", }, } ) return {"role": "user", "content": new_message_content} def _generate_backend_output( self, response_message: ChatCompletionMessage ) -> BackendOutput: if response_message.tool_calls is None: return BackendOutput(message=response_message.content, action_list=None) action_list = [ ActionOutput( name=call.function.name, arguments=json.loads(call.function.arguments), ) for call in response_message.tool_calls ] return BackendOutput( message=response_message.content, action_list=action_list, ) def _convert_action_to_schema( action_space: list[Action] | None, ) -> list[dict] | None: if action_space is None: return None actions = [] for action in action_space: new_action = action.to_openai_json_schema() actions.append({"type": "function", "function": new_action}) return actions class OpenAIModelJSON(OpenAIModel): def __init__( self, model: str, parameters: dict[str, Any] = dict(), history_messages_len: int = 0, base_url: str | None = None, api_key: str | None = None, ) -> None: super().__init__( model, parameters, history_messages_len, False, base_url, api_key, ) self.support_tool_call = False def reset(self, system_message: str, action_space: list[Action] | None) -> None: super().reset(system_message, action_space) self.action_schema = None def _record_message( self, new_message: dict, response_message: ChatCompletionMessage ) -> None: self.chat_history.append([new_message]) self.chat_history[-1].append( {"role": "assistant", "content": response_message.content} ) def _generate_backend_output( self, response_message: ChatCompletionMessage ) -> BackendOutput: content = response_message.content text_list, code_list = extract_text_and_code_prompts(content) action_list = [] try: for code_block in code_list: action_object = json.loads(code_block) action_list.append( ActionOutput( name=action_object["name"], arguments=action_object["arguments"] ) ) except json.JSONDecodeError as e: raise RuntimeError(f"Failed to parse code block: {code_block}") from e except KeyError as e: raise RuntimeError(f"Received invalid action format: {code_block}") from e return BackendOutput( message="".join(text_list), action_list=action_list, ) class SGlangOpenAIModelJSON(OpenAIModelJSON): def _construct_new_message(self, message: list[Message]) -> dict[str, Any]: new_message_content: list[dict[str, Any]] = [] image_count = 0 for _, msg_type in message: if msg_type == MessageType.IMAGE_JPG_BASE64: image_count += 1 for content, msg_type in message: match msg_type: case MessageType.TEXT: new_message_content.append( { "type": "text", "text": content, } ) case MessageType.IMAGE_JPG_BASE64: image_content = { "type": "image_url", "image_url": { "url": f"data:image/png;base64,{content}", "detail": "high", }, } if image_count > 1: image_content["modalities"] = "multi-images" new_message_content.append(image_content) return {"role": "user", "content": new_message_content} ================================================ FILE: crab/agents/policies/__init__.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # ruff: noqa: F401 from .multi_agent_by_env import MultiAgentByEnvPolicy from .multi_agent_by_func import MultiAgentByFuncPolicy from .single_agent import SingleAgentPolicy ================================================ FILE: crab/agents/policies/multi_agent_by_env.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== from crab import Action, ActionOutput from crab.agents.backend_models import BackendModelConfig, create_backend_model from crab.agents.utils import generate_action_prompt from crab.core.agent_policy import AgentPolicy from crab.core.backend_model import ( BackendModel, MessageType, ) class MultiAgentByEnvPolicy(AgentPolicy): _main_agent_prompt = """You are a main agent, and your goal is to plan and give instructions to sub-agents in each environment to complete the final task. Now you have to do a task as described below: {task_description}. The description of each given environment: {env_description}. For each step, you are required to provide high-level instructions detailing the next actions to be taken. Additionally, you must specify which sub-agent in the designated environment should execute these instructions. If a sub-agent is not needed for a particular step, you may instruct it to skip that step.""" _env_agent_prompt = """You are a sub-agent responsible for the {environment} environment. The description of the {environment} environment is: {env_description}. Your goal is to assist the main agent in completing the final task by performing actions in the {environment} environment according to the instructions from the main agent. The final task is described below: {task_description}. A unit operation you can perform is called action in a given environment. You can only execute action in the {environment} environment. For the {environment} environment, you are given a limited action space as function calls: {action_descriptions} The interactive UI elements on the screenshot are labeled with numeric tags starting from 1. For each step, You will receive an instruction telling you what you need to do next. After analyzing the instruction you received and the current {environment} system, if you think you don't need to do anything in the current {environment} system, you should choose SKIP action. Otherwise, you must state what actions to take, what the parameters are, and you MUST provide in which environment to perform these actions. Your answer must be function calls. Please do not output any other information. You must make sure all function calls get their required parameters.""" _root_agent_prompt = """You are a sub-agent responsible for the crab benchmark root environment. Your goal is to assist the main agent in completing the whole task: "{task_description}". You can only complete the task or submit the result when the main agent tells you the whole task has been completed. Otherwise, you can only call SKIP. """ def __init__( self, main_agent_model_backend: BackendModelConfig, env_agent_model_backend: BackendModelConfig, ): self.main_agent_model_backend = create_backend_model(main_agent_model_backend) self.env_agent_model_backend_config = env_agent_model_backend self.reset(task_description="", action_spaces={}, env_descriptions={}) def reset( self, task_description: str, action_spaces: dict[str, list[Action]], env_descriptions: dict[str, str], ) -> list: self.task_description = task_description main_agent_system_message = self._main_agent_prompt.format( task_description=task_description, env_description=str(env_descriptions), ) self.main_agent_model_backend.reset(main_agent_system_message, None) root_agent_system_message = self._root_agent_prompt.format( task_description=task_description ) self.env_agent_model_backends: dict[str, BackendModel] = {} for env in action_spaces: backend = create_backend_model(self.env_agent_model_backend_config) if env == "root": backend.reset(root_agent_system_message, action_spaces[env]) else: backend.require_tool = True env_agent_system_message = self._env_agent_prompt.format( task_description=task_description, environment=env, env_description=env_descriptions[env], action_descriptions=generate_action_prompt(action_spaces[env]), ) backend.reset(env_agent_system_message, action_spaces[env]) self.env_agent_model_backends[env] = backend def get_token_usage(self): result = 0 result += self.main_agent_model_backend.get_token_usage() for env_agent in self.env_agent_model_backends.values(): result += env_agent.get_token_usage() return result def get_backend_model_name(self): return ( self.main_agent_model_backend.__class__.__name__ + "_" + self.main_agent_model_backend.model ) def chat( self, observation: dict[str, list[tuple[str, MessageType]]], ) -> list[ActionOutput]: main_prompt = [] for env in observation: main_prompt.extend(observation[env]) main_prompt.append( ( ( f"Your target: {self.task_description}\n" "Tell me the next step in each environment." ), MessageType.TEXT, ) ) output = self.main_agent_model_backend.chat(main_prompt) main_agent_message = ( f"The instruction from main agent for this step: {output.message}" ) tool_calls = [] for env in self.env_agent_model_backends: backend = self.env_agent_model_backends[env] if env in observation: output = backend.chat( observation[env] + [(main_agent_message, MessageType.TEXT)] ) else: output = backend.chat((main_agent_message, MessageType.TEXT)) for action in output.action_list: action.env = env tool_calls.extend(output.action_list) return tool_calls ================================================ FILE: crab/agents/policies/multi_agent_by_func.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== from crab.agents.backend_models import BackendModelConfig, create_backend_model from crab.agents.utils import ( combine_multi_env_action_space, decode_combined_action, generate_action_prompt, ) from crab.core import Action, ActionOutput from crab.core.agent_policy import AgentPolicy from crab.core.backend_model import MessageType class MultiAgentByFuncPolicy(AgentPolicy): _system_prompt = """You are a helpful assistant. Now you have to do a task as described below: {task_description}. And this is the description of each given environment: {env_description}. A unit operation you can perform is called action in a given environment. For each environment, you are given a limited action space as function calls: {action_descriptions} You may receive a screenshot of the current system. The interactive UI elements on the screenshot are labeled with numeric tags starting from 1. For each step, You must state what actions to take, what the parameters are, and you MUST provide in which environment to perform these actions. """ _tool_prompt = """You are a helpful assistant in generating function calls. I will give you a detailed description of what actions to take next, you should translate it into function calls. please do not output any other information. """ def __init__( self, main_agent_model_backend: BackendModelConfig, tool_agent_model_backend: BackendModelConfig, ): self.main_agent_model_backend = create_backend_model(main_agent_model_backend) self.tool_agent_model_backend = create_backend_model(tool_agent_model_backend) self.reset(task_description="", action_spaces=None, env_descriptions={}) def reset( self, task_description: str, action_spaces: dict[str, list[Action]], env_descriptions: dict[str, str], ) -> list[ActionOutput]: self.task_description = task_description self.action_space = combine_multi_env_action_space(action_spaces) main_agent_system_message = self._system_prompt.format( task_description=task_description, action_descriptions=generate_action_prompt(self.action_space), env_description=str(env_descriptions), ) self.main_agent_model_backend.reset(main_agent_system_message, None) self.tool_agent_model_backend.reset(self._tool_prompt, self.action_space) def get_token_usage(self): return ( self.main_agent_model_backend.get_token_usage() + self.tool_agent_model_backend.get_token_usage() ) def get_backend_model_name(self): return ( self.main_agent_model_backend.__class__.__name__ + "_" + self.main_agent_model_backend.model ) def chat( self, observation: dict[str, list[tuple[str, MessageType]]], ) -> list[ActionOutput]: prompt = [] for env in observation: prompt.extend(observation[env]) prompt.append( ( f"Your target: {self.task_description}\nTell me the next action.", MessageType.TEXT, ) ) output = self.main_agent_model_backend.chat(prompt) tool_output = self.tool_agent_model_backend.chat( (output.message, MessageType.TEXT) ) return decode_combined_action(tool_output.action_list) ================================================ FILE: crab/agents/policies/single_agent.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import logging from crab import Action, ActionOutput from crab.agents.backend_models import BackendModelConfig, create_backend_model from crab.agents.utils import ( combine_multi_env_action_space, decode_combined_action, generate_action_prompt, ) from crab.core.agent_policy import AgentPolicy from crab.core.backend_model import ( MessageType, ) from crab.utils.measure import timed logger = logging.getLogger(__name__) class SingleAgentPolicy(AgentPolicy): _system_prompt_with_function_call = """\ You are a helpful assistant. Now you have to do a task as described below: **"{task_description}."** You should never forget this task and always perform actions to achieve this task. And this is the description of each given environment: {env_description}. A unit operation you can perform is called Action. You have a limited action space as function calls: {action_descriptions} You may receive a screenshot of the current system. You may receive a screenshot of a smartphone app. The interactive UI elements on the screenshot are labeled with numeric tags starting from 1. In each step, You MUST explain what do you see from the current observation and the plan of the next action, then use a provided action in each step to achieve the task. You should state what action to take and what the parameters should be. Your answer MUST be a least one function call. You SHOULD NEVER ask me to do anything for you. Always do them by yourself using function calls. """ _system_prompt_no_function_call = """\ You are a helpful assistant. Now you have to do a task as described below: **"{task_description}."** You should never forget this task and always perform actions to achieve this task. And this is the description of each given environment: {env_description}. You will receive screenshots of the environments. The interactive UI elements on the screenshot are labeled with numeric tags starting from 1. A unit operation you can perform is called Action. You have a limited action space as function calls: {action_descriptions}. You should generate JSON code blocks to execute the actions. Each code block MUST contains only one json object, i.e. one action. You can output multiple code blocks to execute multiple actions in a single step. You must follow the JSON format below to output the action. ```json {{"name": "action_name", "arguments": {{"arg1": "value1", "arg2": "value2"}}}} ``` or if not arguments needed: ```json {{"name": "action_name", "arguments": {{}}}} ``` You MUST use exactly the same "action_name" as I gave to you in the action space. You SHOULDN'T add any comments in the code blocks. In each step, You MUST explain what do you see from the current observation and the plan of the next action, then use a provided action in each step to achieve the task. You should state what action to take and what the parameters should be. Your answer MUST contain at least one code block. You SHOULD NEVER ask me to do anything for you. Always do them by yourself. """ def __init__( self, model_backend: BackendModelConfig, function_call: bool = True, ): self.model_backend = create_backend_model(model_backend) self.function_call = function_call if not self.model_backend.support_tool_call and self.function_call: logger.warning( "The backend model does not support tool call: {}".format( model_backend.model_name ) + "\nFallback to no function call mode." ) self.function_call = False if self.function_call: self.system_prompt = self._system_prompt_with_function_call else: self.system_prompt = self._system_prompt_no_function_call self.reset(task_description="", action_spaces=None, env_descriptions={}) def reset( self, task_description: str, action_spaces: dict[str, list[Action]], env_descriptions: dict[str, str], ) -> list: self.task_description = task_description self.action_space = combine_multi_env_action_space(action_spaces) system_message = self.system_prompt.format( task_description=task_description, action_descriptions=generate_action_prompt( self.action_space, expand=not self.function_call, ), env_description=str(env_descriptions), ) if self.function_call: self.model_backend.reset(system_message, self.action_space) else: self.model_backend.reset(system_message, None) def get_token_usage(self): return self.model_backend.get_token_usage() def get_backend_model_name(self): return self.model_backend.__class__.__name__ + "_" + self.model_backend.model @timed def chat( self, observation: dict[str, list[tuple[str, MessageType]]], ) -> list[ActionOutput]: prompt = [] for env in observation: prompt.extend(observation[env]) prompt.append( ( f"Your target: {self.task_description}\nTell me the next action.", MessageType.TEXT, ) ) output = self.model_backend.chat(prompt) # print("Agent Message: " + output.message, flush=True) # print("Agent Action: " + str(output.action_list), flush=True) return decode_combined_action(output.action_list) ================================================ FILE: crab/agents/utils.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== from crab.core import Action, ActionOutput def combine_multi_env_action_space( action_space: dict[str, list[Action]] | None, ) -> list[Action]: """Combine multi-env action space together to fit in a single agent.""" result = [] if action_space is None: return result for env in action_space: for action in action_space[env]: new_action = action.model_copy() new_action.name = new_action.name + "_in_" + env new_action.description = f"In {env} environment, " + new_action.description result.append(new_action) return result def decode_combined_action( output_actions: list[ActionOutput], ) -> list[ActionOutput]: """Decode combined action output to action output with the corresponding environment. """ result = [] for output in output_actions: name_env = output.name.split("_in_") if len(name_env) != 2: raise RuntimeError( 'The decoded action name should contain the splitter "_in_".' ) new_output = output.model_copy() new_output.name = name_env[0] new_output.env = name_env[1] result.append(new_output) return result def generate_action_prompt(action_space: list[Action], expand: bool = False) -> str: if expand: return "".join( [ f"[**{action.name}**:\n" f"action description: {action.description}\n" f"action arguments json schema: {action.to_openai_json_schema()}\n" "]\n" for action in action_space ] ) else: return "".join( [f"[{action.name}: {action.description}]\n" for action in action_space] ) def extract_text_and_code_prompts(content: str) -> tuple[list[str], list[str]]: r"""Extract text and code prompts from the message content. Returns: A tuple (text_list, code_list) where, text_list is a list of text and code_list is a list of extracted codes both from the content. """ text_prompts: list[str] = [] code_prompts: list[str] = [] lines = content.split("\n") idx = 0 start_idx = 0 while idx < len(lines): while idx < len(lines) and (not lines[idx].lstrip().startswith("```")): idx += 1 text = "\n".join(lines[start_idx:idx]).strip() text_prompts.append(text) if idx >= len(lines): break # code_type = lines[idx].strip()[3:].strip() idx += 1 start_idx = idx while not lines[idx].lstrip().startswith("```") and idx < len(lines): idx += 1 if idx >= len(lines): break code = "\n".join(lines[start_idx:idx]).strip() code_prompts.append(code) idx += 1 start_idx = idx return text_prompts, code_prompts ================================================ FILE: crab/benchmarks/__init__.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== ================================================ FILE: crab/benchmarks/template.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import networkx as nx from crab import BenchmarkConfig, Task, action, evaluator from crab.environments.template import set_state, template_environment_config @evaluator def is_system_state(env) -> bool: return env.state @evaluator(env_name="root") def check_submit_true(env) -> bool: if env.trajectory: action_name, params, _ = env.trajectory[-1] print(action_name, params) if action_name == "_submit" and params["content"]: return True return False @action(env_name="root") def _submit(content: bool) -> None: """Submit your answer through this function. Args: content: the content to submit """ pass template_benchmark_config = BenchmarkConfig( name="template_benchmark", environments=[template_environment_config], tasks=[ Task( id="0", description="Set the system state to True.", evaluator=is_system_state, setup=set_state(False), ), Task( id="1", description="Submit True.", evaluator=check_submit_true, extra_action=[_submit], ), ], ) @evaluator(env_name="testenv0") def check_sys0(env) -> bool: return env.state @evaluator(env_name="testenv1") def check_sys1(env) -> bool: return env.state @evaluator(env_name="testenv2") def check_sys2(env) -> bool: return env.state eval_g = nx.DiGraph() eval_g.add_edge(check_sys0, check_submit_true) eval_g.add_edge(check_sys1, check_submit_true) eval_g.add_edge(check_sys2, check_submit_true) multienv_template_benchmark_config = BenchmarkConfig( name="mutlienv_template_benchmark", environments=[ template_environment_config.model_copy(update={"name": f"testenv{idx}"}) for idx in range(3) ], tasks=[ Task( id="0", description=( "Set the system state to True in all three environments. " "Then submit True to finish the project." ), evaluator=eval_g, extra_action=[_submit], ) ], multienv=True, ) ================================================ FILE: crab/core/__init__.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # ruff: noqa: F401, F403 from .agent_policy import AgentPolicy from .backend_model import BackendModel from .benchmark import Benchmark, create_benchmark from .decorators import action, evaluator from .environment import Environment, create_environment from .experiment import Experiment from .graph_evaluator import Evaluator, GraphEvaluator from .models import * from .task_generator import TaskGenerator ================================================ FILE: crab/core/agent_policy.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== from abc import ABC, abstractmethod from .models import Action, ActionOutput, Message class AgentPolicy(ABC): @abstractmethod def chat( self, observation: dict[str, list[Message]], ) -> list[ActionOutput]: ... @abstractmethod def reset( self, task_description: str, action_spaces: dict[str, list[Action]], env_descriptions: dict[str, str], ) -> None: ... @abstractmethod def get_token_usage(self) -> int: ... @abstractmethod def get_backend_model_name(self) -> str: ... ================================================ FILE: crab/core/backend_model.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== from abc import ABC, abstractmethod from .models import Action, BackendOutput, MessageType class BackendModel(ABC): @abstractmethod def chat(self, contents: list[tuple[str, MessageType]]) -> BackendOutput: ... @abstractmethod def reset( self, system_message: str, action_space: list[Action] | None, ): ... @abstractmethod def get_token_usage(self): ... ================================================ FILE: crab/core/benchmark.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import traceback from time import sleep from typing import Any from crab.core.graph_evaluator import GraphEvaluator from crab.utils.measure import timed from .environment import Environment, create_environment from .exceptions import TaskNotFound from .models import Action, BenchmarkConfig, ClosedAction, MessageType, StepResult, Task class Benchmark: """The crab benchmark controller managing environments and agent evaluation. The class manages multiple environments together and provide the simple API by :meth:`step`, :meth:`observe` and :meth:`reset` for language model agents to perform tasks in multiple environments. This class introduces a "root" environment with no action or observation capabilities, intended as a utility for evaluations not directly tied to a specific environment. This class operates in two distinct modes: "multi-environment" and "single-environment". In multi-environment mode, observations and action results are separated by environment, returned as a dictionary. While in single-environment mode, all observations and action outcomes are merged under the "root" environment, with actions being appropriately routed to their respective environments. """ def __init__( self, name: str, tasks: list[Task], environments: list[Environment], default_env: str | None = None, multienv: bool = False, prompting_tools: dict[str, dict[str, Action]] = {}, root_action_space: list[Action] = [], step_limit: int = 30, common_setup: list[ClosedAction] = [], ) -> None: """Initializes the instance. Args: name: Identifier for the benchmark. tasks: Tasks to be executed within the benchmark. environments: Environments in which the benchmark is conducted. default_env: The default environment name, applied when actions do not specify an environment. Defaults to "root" in the multi-environment mode and to the environment in the single environment mode. multienv: Indicates whether to enable multi-environment mode. Defaults to :obj:`False`. prompting_tools: Prompting tools applied in :meth:`observe_with_prompt`. The first level keys are environment names, the second level keys are observation action names. Defaults to empty. root_action_space: The action space executed in the root environment. """ self.name = name self.tasks = tasks self.multienv = multienv self.prompting_tools = prompting_tools self.step_limit = step_limit self.common_setup = common_setup if isinstance(environments, Environment): environments = [environments] self.root_env = Environment( name="root", action_space=root_action_space, observation_space=[], description="The crab benchmark root. You can submit your answer or " "complete the task using this environment.", ) self.root_env.contained_envs = {env.name: env for env in environments} # A hack environments.append(self.root_env) self.environment_map: dict[str, Environment] = { env.name: env for env in environments } # if not multienv, combine all environments action space together if not self.multienv: # action_map is used only by "agent", specifically `step` and # `export_action_space` functions self._verify_spaces() self._generate_action_map() # default_env is used for predefined actions without env_name or like # evaluators setups, teardowns, and so on. if default_env is None: if not multienv and len(environments) == 2: self.default_env = environments[0].name else: self.default_env = self.root_env.name else: self.default_env = default_env self.current_task: Task | None = None self.current_evaluator: GraphEvaluator | None = None self.step_cnt = 0 def start_task(self, task_id: str) -> tuple[Task, dict[str, list[Action]]]: """Initializes and starts a specified task. Args: task_id: The ID of the task to start. Returns: A tuple (task, action_space), where task is the started task object, and action_sapce is a dict mapping action names to the corresponding action object. """ if self.current_task is not None: raise RuntimeError("Another task is running") self.current_task = self._get_task_by_id(task_id) # reset all environments self._reset_environments() for action in self.common_setup: self._take_env_action(action) # select environment by Action.env_name for action in self.current_task.setup: self._take_env_action(action) for task_action in self.current_task.extra_action: self._set_env_action(task_action) # reset evaluator self.current_evaluator = GraphEvaluator(self.current_task.evaluator) # put submit action to corresponding env space # For now, only the last node can be the submit task self.step_cnt = 0 return self.current_task, self.export_action_space() def close_task(self) -> None: """Cleans up after a task is completed.""" if self.current_evaluator is None or self.current_task is None: raise RuntimeError("There is no started task.") for action in self.current_task.teardown: self._take_env_action(action) self.current_task = None def get_env_descriptions(self) -> dict[str, str]: """Get environment descriptions as a dict structure.""" return { name: self.environment_map[name].description for name in self.environment_map } def observe(self) -> dict[str, dict[str, Any]]: """Collects observations from all environments. Returns: A dict-of-dict with observations from each environment. The first level keys are environment names, the second level keys are observation action names. """ env_obs = {env.name: env.observe() for env in self.environment_map.values()} if self.multienv: return env_obs return self._merge_dicts(env_obs) @timed def observe_with_prompt( self, ) -> tuple[dict[str, dict[str, Any]], dict[str, tuple[str, MessageType]]]: """Collects observations and applies prompting tools. Returns: A tuple (observations, prompts), where "observations" and "prompts" are observations from each environment and the result of applying prompting tools on them. The first level keys are environment names, the second level keys are observation action names. Notice that some dicts can be empty if its prompting tool wasn't set. """ observations = {} prompts = {} for env_name, env in self.environment_map.items(): if env_name in self.prompting_tools: tools = self.prompting_tools[env_name] else: tools = {} observations[env_name], prompts[env_name] = env.observe_with_prompt(tools) if self.multienv: return observations, prompts return self._merge_dicts(observations), self._merge_dicts(prompts) def evaluate(self): self.current_evaluator.step(self.environment_map, self.default_env) return self.current_evaluator.stat() @timed def step( self, action: str, parameters: dict[str, Any] = {}, env_name: str | None = None, ) -> StepResult: """Executes a step in the benchmark by performing an action. Args: action: The action to execute. parameters: Parameters for the action. env_name: The name of the environment. Returns: The result of the step including observations and evaluation metrics. Notice that the `truncated` field in the result is not meaningful for now. """ terminated = False info = {} if self.current_evaluator is None or self.current_task is None: raise RuntimeError("There is no started task.") if action == "complete": terminated = True info["terminate_reason"] = "agent_complete" return StepResult( truncated=False, terminated=True, action_returns=None, evaluation_results=self.current_evaluator.stat(), info=info, ) try: environment = self._get_env(env_name=env_name, action_name=action) except Exception: print(traceback.format_exc()) terminated = True info["terminate_reason"] = "action_format_error" info["exception_detail"] = traceback.format_exc() environment.reset() self.close_task() return StepResult( truncated=False, terminated=True, action_returns=None, evaluation_results=self.current_evaluator.stat(), info=info, ) try: action_returns = environment.step(action, parameters) except Exception: print(traceback.format_exc()) terminated = True info["terminate_reason"] = "env_exception" info["exception_detail"] = traceback.format_exc() environment.reset() self.close_task() return StepResult( truncated=False, terminated=True, action_returns=None, evaluation_results=self.current_evaluator.stat(), info=info, ) try: evaluation_results = self.evaluate() except Exception: print(traceback.format_exc()) terminated = True info["terminate_reason"] = "evaluator_exception" info["exception_detail"] = traceback.format_exc() environment.reset() self.close_task() return StepResult( truncated=False, terminated=True, action_returns=action_returns, evaluation_results=self.current_evaluator.stat(), info=info, ) self.step_cnt += 1 if self.current_evaluator.is_complete(): terminated = True info["terminate_reason"] = "success" if self.step_cnt >= self.step_limit: terminated = True info["terminate_reason"] = "reach_max_step" if terminated: environment.reset() self.close_task() return StepResult( truncated=False, terminated=terminated, action_returns=action_returns, evaluation_results=evaluation_results, info=info, ) def reset(self) -> None: """Resets all environments and the current task.""" self.current_evaluator = None self._reset_environments() def human_evaluation(self, task_id: str) -> None: task, _ = self.start_task(task_id) print(task.description) self.current_evaluator.human_mode = True evaluation_results = self.evaluate() print(evaluation_results, end="") while evaluation_results["completeness"] != 1.0: sleep(2) evaluation_results = self.evaluate() print("\r" + str(evaluation_results), end="") self.close_task() def export_action_space(self) -> dict[str, list[Action]]: """Returns the action spaces from all environments. Returns: A dict of action lists for each environment, keyed by environment name. """ result = {env.name: env.action_space for env in self.environment_map.values()} if self.multienv: return result return self._merge_lists(result) def _verify_spaces(self) -> None: """Make sure all actions and observations are unique.""" observation_name_set = set() action_name_set = set() for env in self.environment_map.values(): for action in env.action_space: if action.name in action_name_set: raise ValueError( "Dulplicated action names are not allowed in single " "environment benchmark." ) action_name_set.add(action.name) for observation in env.observation_space: if observation.name in observation_name_set: raise ValueError( "Dulplicated observation names are not allowed in the " "single environment benchmark." ) observation_name_set.add(observation.name) def _generate_action_map(self) -> None: self.action_map: dict[str, Environment] = {} for env in self.environment_map.values(): for action in env.action_space: self.action_map[action.name] = env def _get_env( self, env_name: str | None = None, action_name: str | None = None ) -> Environment: # env_name exists just return it if env_name is not None: return self.environment_map[env_name] # or in multienv use default env, in singlenev use action_name mapping if action_name is not None and not self.multienv: return self.action_map[action_name] return self.environment_map[self.default_env] def _take_env_action(self, action: Action) -> Any: if action.env_name is None: env = self.environment_map[self.default_env] else: env = self.environment_map[action.env_name] return env.take_action(action) def _set_env_action(self, action: Action) -> None: if action.env_name is None: env = self.environment_map[self.default_env] else: env = self.environment_map[action.env_name] env.set_action(action) if not self.multienv: self.action_map[action.name] = env def _reset_environments(self): for env in self.environment_map.values(): env.reset() if not self.multienv: self._generate_action_map() def _get_task_by_id(self, task_id: str) -> Task: result = [task for task in self.tasks if task_id == task.id] if len(result) == 0: # Doesn't find the task raise TaskNotFound(f"No such task: {task_id}") return result[0] def _merge_dicts( self, env_dict: dict[str, dict[str, Any]] ) -> dict[str, dict[str, Any]]: "In single environment mode, merge aciton_space/observation_space in root." result = {} for dict_value in env_dict.values(): result.update(dict_value) return {self.default_env: result} def _merge_lists(self, env_dict: dict[str, list]) -> dict[str, list]: "In single environment mode, merge aciton_space/observation_space in root." result = [] for dict_value in env_dict.values(): result.extend(dict_value) return {self.default_env: result} def create_benchmark(config: BenchmarkConfig) -> Benchmark: """Creates a benchmark by BenchmarkConfig""" if isinstance(config, BenchmarkConfig): environments = [ create_environment(env_config) for env_config in config.environments ] parameters = dict(config) parameters["environments"] = environments return Benchmark(**parameters) else: raise ValueError("Unsupport benchmark config type.") ================================================ FILE: crab/core/csv_log.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import csv from pathlib import Path from typing import Any class CSVLog: def __init__(self, csv_path: Path, headers: list[str]) -> None: self.csv_path = csv_path self.header = headers if not csv_path.exists(): with open(csv_path, "w", newline="") as file: writer = csv.writer(file) writer.writerow(headers) def write_row(self, data: list[Any]): assert len(data) == len(self.header) with open(self.csv_path, "a", newline="") as file: writer = csv.writer(file) writer.writerow(data) ================================================ FILE: crab/core/decorators.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== from typing import Callable from .models import Action, Evaluator def _decorator(func, cls: type[Action], options: dict | None = None) -> Action: action = cls.from_function(func) if options is not None: for key in options: setattr(action, key, options[key]) return action def action(*args: Callable, env_name: str | None = None, local=False): """Use @action to change a function to an Action""" if args and callable(args[0]): return _decorator(args[0], Action) return lambda func: _decorator(func, Action, {"env_name": env_name, "local": local}) def evaluator( *args: Callable, require_submit: bool = False, env_name: str | None = None, local=False, ): """Use @evaluator to change a function to an Evaluator""" if args and callable(args[0]): return _decorator(args[0], Evaluator) return lambda func: _decorator( func, Evaluator, {"require_submit": require_submit, "env_name": env_name, "local": local}, ) ================================================ FILE: crab/core/environment.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import json import logging from typing import Any from httpx import Client from crab.utils import decrypt_message, encrypt_message, generate_key_from_env from crab.utils.measure import timed from .exceptions import ActionNotFound from .models import Action, ClosedAction, EnvironmentConfig logger = logging.getLogger("crab-server") class Environment: """ A crab environment for language model agent interaction and evaluation. This class supports action execution and observation within a simulated or actual ecosystem. The environment is defined by customizable action and observation spaces, comprising various crab actions. Actions should include comprehensive docstrings to facilitate agent understanding and interaction. Typically, users instantiate this class directly to perform actions within the local execution context (i.e., the device running the crab framework). This class may also serve as a base for specialized environments requiring unique action execution processes, such as forwarding actions to remote systems for execution. This is achieved by overriding the `take_action` method. Actions defined in the `action_space`, `observation_space`, or `reset`, as well as those invoked through the `take_action` method that include an `env` parameter, will have this parameter automatically populated with the current environment instance. This allows actions to access and manipulate environment states and variables. Attributes: name (str): The name of the environment. description (str): A description of the environment. trajectory (List[tuple[str, dict[str, Any], Any]]): A record of actions taken, their parameters, and the results. Args: name (str): The name of the environment. action_space (List[Action]): A list of actions that can be executed, defining the possible interactions agents can undertake. observation_space (List[ClosedAction]): A list of observations defining the possible states agents can perceive. description (str, optional): A textual description of the environment. Defaults to an empty string. reset (Action | None, optional): An action to reset the environment to its initial state. Defaults to `None`. remote_url (Action | None, optional): If set, the action will be taken at remote machine, by default it will be taken at local. Example: `http://192.168.1.1:8000`. Defaults to `None`. """ def __init__( self, name: str, action_space: list[Action], observation_space: list[ClosedAction], description: str = "", reset: Action | None = None, remote_url: str | None = None, extra_attributes: dict[str, Any] = {}, ) -> None: self.name = name self.description = description self.trajectory: list[tuple[str, dict[str, Any], Any]] = [] self.observation_history: list[dict[str, Any]] = [] self._origin_action_space = action_space self._observation_space = observation_space self._reset = reset self._action_map = {action.name: action for action in action_space} self._client: Client | None = None if remote_url is not None: self._client = Client(base_url=remote_url, timeout=60) for key, value in extra_attributes.items(): setattr(self, key, value) self._enc_key = generate_key_from_env() def step( self, action_name: str, parameters: dict[str, Any] = {}, ): """ Executes an action that is in the action space and recorded to the trajectory. Args: action_name: Name of the action to execute. Must be in action space. parameters (dict[str, Any], optional): Parameters for the action. Defaults to an empty `dict`. Returns: Any: The result of the action execution. Raises: ActionNotFound: If the action is not found within the environment's action space. """ if action_name not in self._action_map: logger.error(f'Env "{self.name}": receives unkown action "{action_name}"') raise ActionNotFound(f"Action {action_name} not found in the environment") action_handler = self._action_map[action_name] result = self.take_action(action_handler, parameters) self.trajectory.append((action_handler.name, parameters, result)) return result def take_action( self, action: Action, parameters: dict[str, Any] = {}, ) -> Any: """ Executes an action within the environment. Args: action (Action): The action to execute. Can be an action name or an `Action` object. parameters (dict[str, Any], optional): Parameters for the action. Defaults to an empty `dict`. Returns: Any: The result of the action execution. """ try: result = self._action_endpoint(action, parameters) logger.info( f'Env "{self.name}": action: "{action.name}" successed. ' "result: {result}." ) return result except: logger.exception( f'Env "{self.name}": action: "{action}" failed:', stack_info=True ) raise @timed def observe(self) -> dict[str, Any]: """ Observes the current state. Returns: Dict[str, Any]: A dictionary containing the current observations. Keys represent the names of the observation actions. """ result = {o.name: self.take_action(o) for o in self.observation_space} self.observation_history.append(result) return result @timed def observe_with_prompt( self, prompt_tools: dict[str, Action] ) -> tuple[dict[str, Any], dict[str, Any]]: """ Observes the current state with prompt. """ observations = self.observe() prompts = {} for ob_name, value in observations.items(): if ob_name in prompt_tools: action = prompt_tools[ob_name] key = next(iter(action.get_required_params())) prompts[ob_name] = self._action_endpoint(action, {key: value}) return observations, prompts def set_action(self, action: Action) -> None: """ Adds an action in the environment's action space, either replace if the action name exist. Args: action (Action): The action to replace or add. """ self._action_map[action.name] = action def start(self) -> None: """Starts the environment.""" pass def close(self) -> None: """Closes the environment, performing any necessary cleanup.""" pass def reset(self) -> None: """Resets the environment based on the provided reset action""" self._action_space = self._origin_action_space self.action_map = {action.name: action for action in self._action_space} if self._reset is not None: self.take_action(self._reset) @property def action_space(self) -> list[Action]: return list(self._action_map.values()) @property def observation_space(self) -> list[ClosedAction]: return self._observation_space def _action_endpoint(self, action: Action, parameters: dict[str, Any]): """Rewrite to support different environments.""" if self._client is not None and not action.local: data = json.dumps( { "action": action.to_raw_action(), "parameters": action.parameters(**parameters).model_dump(), } ) content_type = "application/json" if self._enc_key is not None: data = encrypt_message(data, self._enc_key) content_type = "text/plain" # send action to remote machine response = self._client.post( "/raw_action", content=data, headers={"Content-Type": content_type}, ) resp_content = response.content.decode("utf-8") if self._enc_key is not None: resp_content = decrypt_message(resp_content, self._enc_key) resp_json = json.loads(resp_content) return resp_json["action_returns"] else: # or directly execute it action = action.set_kept_param(env=self) return action.run(**parameters) def create_environment(config): if isinstance(config, EnvironmentConfig): return Environment(**dict(config)) else: raise ValueError("Unsupported environment config type.") ================================================ FILE: crab/core/exceptions.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== class ActionNotFound(ValueError): pass class TaskNotFound(ValueError): pass ================================================ FILE: crab/core/experiment.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import json import traceback from datetime import datetime from pathlib import Path from time import sleep from typing import Literal from crab.utils.common import base64_to_image from .agent_policy import AgentPolicy from .benchmark import Benchmark from .csv_log import CSVLog from .models import ActionOutput, MessageType CURRENT_EXPERIMENT_COLUMNS = [ "step", "action", "total_nodes", "complete_nodes", "completeness", "completeness_per_action", "step_to_complete", "longest_unfinished_path_length", "token_usage", ] MAIN_LOG_COLUMNS = [ "time", "agent_policy", "model", "task_id", "total_steps", "terminate_reason", "total_nodes", "complete_nodes", "completeness", "completeness_per_action", "step_to_complete", "longest_unfinished_path_length", "token_usage", ] class Experiment: def __init__( self, benchmark: Benchmark, task_id: str, agent_policy: AgentPolicy | Literal["human"], log_dir: Path | None = None, ) -> None: self.benchmark = benchmark self.task_id = task_id self.agent_policy = agent_policy self.log_dir = log_dir def write_message(self, message: str, step: int): with open(self.message_path, "a") as file: file.write("=" * 20 + f"Step: {step}" + "=" * 20 + "\n" + message + "\n") def write_task_info_json(self, task_info_path: Path): envs_info = {} for name, env in self.benchmark.environment_map.items(): actions = { name: action.description for name, action in env._action_map.items() } observations = { action.name: action.description for action in env._observation_space } envs_info[name] = { "description": env.description, "actions": actions, "observations": observations, } task_info = { "benchmark_name": self.benchmark.name, "task_id": self.task_id, "task_description": self.task.description, "envs": envs_info, } with open(task_info_path, "w") as file: json.dump(task_info, file, indent=4) def init_log_dir(self): if self.log_dir is not None: self.log_dir.mkdir(exist_ok=True, parents=True) self.main_log = CSVLog(self.log_dir / "main_log.csv", MAIN_LOG_COLUMNS) self.task_info_dir = self.log_dir / self.task_id self.task_info_dir.mkdir(exist_ok=True, parents=True) self.write_task_info_json(self.task_info_dir / "task_info.json") self.time_now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") self.current_experiment_dir = ( self.task_info_dir / f"{self.agent_policy.__class__.__name__}" f"({self.agent_policy.get_backend_model_name()})" / self.time_now ) self.current_experiment_dir.mkdir(parents=True) self.current_experiment_log = CSVLog( self.current_experiment_dir / "metrics.csv", CURRENT_EXPERIMENT_COLUMNS ) self.prompt_path = self.current_experiment_dir / "prompt" self.image_path = self.current_experiment_dir / "images" self.prompt_path.mkdir() self.image_path.mkdir() self.message_path = self.current_experiment_dir / "messages.txt" def get_prompt(self) -> dict[str, list[tuple[str, MessageType]]]: return self.benchmark.observe() def execute_action(self, response: list[ActionOutput]) -> bool: for action in response: benchmark_result = self.benchmark.step( action=action.name, parameters=action.arguments, env_name=action.env, ) self.metrics = benchmark_result.evaluation_results if benchmark_result.terminated: print("\033[92m" f"Task finished, result: {self.metrics}" "\033[0m") self.write_current_log_row(action) self.write_main_csv_row(benchmark_result.info["terminate_reason"]) if "exception_detail" in benchmark_result.info: self.write_exception_detail( benchmark_result.info["exception_detail"] ) return True print( "\033[92m" f'Action "{action.name}" in env "{action.env}" success. ' f"Current evaluation results: {self.metrics}\n" "\033[0m" ) self.write_current_log_row(action) self.step_cnt += 1 return False def log_prompt(self, prompt): for env in prompt: with open(self.prompt_path / f"{env}_prompt.md", "a") as prompt_file: prompt_file.write(f"### Step {self.step_cnt}\n\n") for message, message_type in prompt[env]: if message_type == MessageType.IMAGE_JPG_BASE64: file_name = f"{env}_{self.step_cnt}.png" base64_to_image(message).save(self.image_path / file_name) prompt_file.write(f"![](../images/{file_name})\n\n") else: prompt_file.write(message + "\n\n") def step(self, it) -> bool: print("=" * 40) print(f"Start agent step {self.step_cnt}:") prompt = self.get_prompt() self.log_prompt(prompt) try: response = self.agent_policy.chat(prompt) except Exception: print(traceback.format_exc()) self.write_main_csv_row("agent_exception") self.write_exception_detail(traceback.format_exc()) return True # content = response["content"] # self.write_message(str(content), it) # print("\033[94m" f"Agent Reponse: {content}" "\033[0m") print(f"So agent take action: {response}") return self.execute_action(response) def start_benchmark(self): if self.agent_policy == "human": self.benchmark.human_evaluation(self.task_id) return env_description = {} for env in self.benchmark.environment_map: env_description[env] = self.benchmark.environment_map[env].description self.task, action_space = self.benchmark.start_task(self.task_id) self.agent_policy.reset( task_description=self.task.description, action_spaces=action_space, env_descriptions=env_description, ) print( f'Start benchmark "{self.benchmark.name}", task id "{self.task.id}": ' f'"{self.task.description}"' ) self.init_log_dir() self.step_cnt = 0 self.metrics = self.benchmark.evaluate() if self.metrics["complete_nodes"] != 0: print("Graph Evaluator start with non-zero value. Check environment setup.") return for it in range(50): try: terminated = self.step(it) except KeyboardInterrupt: self.write_main_csv_row("keyboard_interrupt") return if terminated: return sleep(2) # input("Press enter to do next step:") def write_exception_detail(self, exception_info: str): if self.log_dir is None: return with open(self.current_experiment_dir / "exception_detail.txt", "w") as file: file.write(exception_info) def write_current_log_row(self, action): if self.log_dir is None: return self.current_experiment_log.write_row( [ self.step_cnt, str(action), self.metrics["total_nodes"], self.metrics["complete_nodes"], self.metrics["completeness"], self.metrics["completeness_per_action"], self.metrics["step_to_complete"], self.metrics["longest_unfinished_path_length"], self.agent_policy.get_token_usage(), ] ) def write_main_csv_row(self, terminate_reason): if self.log_dir is None: return self.main_log.write_row( [ self.time_now, self.agent_policy.__class__.__name__, self.agent_policy.get_backend_model_name(), self.task_id, self.step_cnt, terminate_reason, self.metrics["total_nodes"], self.metrics["complete_nodes"], self.metrics["completeness"], self.metrics["completeness_per_action"], self.metrics["step_to_complete"], self.metrics["longest_unfinished_path_length"], self.agent_policy.get_token_usage(), ] ) ================================================ FILE: crab/core/graph_evaluator.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== from collections import deque from typing import Any import networkx as nx from .environment import Environment from .models import Evaluator class GraphEvaluator: def __init__( self, incoming_graph_data, enable_shortcut: bool = False, ) -> None: self.G = nx.DiGraph(incoming_graph_data) assert nx.is_directed_acyclic_graph(self.G) self.count: int = 0 self.total_nodes: int = self.G.number_of_nodes() assert self.total_nodes != 0 self.complete_nodes: int = 0 self.completeness: float = 0.0 self.completeness_per_action: float = 0.0 self.step_to_complete: int = self.G.number_of_edges() self.longest_unfinished_path_length: int = nx.dag_longest_path_length(self.G) self.enable_shortcut: bool = enable_shortcut # Set the sink node for the DAG: sink_nodes: list[Evaluator] = [ node for node, out_degree in self.G.out_degree() if out_degree == 0 ] if len(sink_nodes) != 1: raise ValueError("Graph should have exactly one sink node.") self.sink_node: Evaluator = sink_nodes[0] self.human_mode = False self.reset() def reset(self): self.count = 0 for node in self.G.nodes(): self.G.nodes[node]["remaining_predecessors"] = self.G.in_degree(node) self.G.nodes[node]["passing_count"] = None def step( self, envs: dict[str, Environment], default_env: str = "root", ): if self.is_complete(): raise ValueError( "GraphEvaluator has already completed and " "cannot perform another step." ) run_evaluators = set() evaluators = self.get_next_source_nodes() while evaluators: for evaluator in evaluators: if evaluator.local and self.human_mode: result = True else: environment = envs[evaluator.env_name or default_env] result = environment.take_action(evaluator) if result: self.G.nodes[evaluator]["passing_count"] = self.count self.complete_nodes += 1 for _, out_node in self.G.out_edges(evaluator): self.G.nodes[out_node]["remaining_predecessors"] -= 1 if self.is_complete(): self.complete_nodes = self.total_nodes break run_evaluators.update(evaluators) evaluators = self.get_next_source_nodes() - run_evaluators self.update() def get_next_source_nodes(self) -> set[Evaluator]: r"""Get next source nodes to evaluate.""" if not self.enable_shortcut: source_nodes: list[Evaluator] = [] for node in self.G.nodes(data=True): if ( node[1]["passing_count"] is None and node[1]["remaining_predecessors"] == 0 ): source_nodes.append(node[0]) else: source_nodes = list(self.G.nodes()) return set(source_nodes) def entry(self) -> bool: return all(count is not None for _, count in self.G.nodes(data="passing_count")) def update(self): self.count += 1 self.completeness = float(self.complete_nodes / self.total_nodes) self.completeness_per_action = self.completeness / self.count self.step_to_complete = self.calculate_step_to_complete() self.longest_unfinished_path_length = ( self.calculate_longest_unfinished_path_length() ) def calculate_longest_unfinished_path_length(self) -> int: longest_path_length: int = 0 if self.G.nodes[self.sink_node]["passing_count"] is not None: return longest_path_length # Initialize set to keep track of visited nodes visited = set() # Initialize queue for BFS queue = deque([[self.sink_node]]) # BFS traversal with path while queue: path = queue.popleft() node = path[0] # Mark the node as visited visited.add(node) longest_path_length = max(len(path), longest_path_length) - 1 # Explore predecessor of the current node for predecessor in self.G.predecessors(node): # If predecessor is complete, skip it if self.G.nodes[predecessor]["passing_count"] is not None: continue elif predecessor not in visited: # Add path with predecessor to queue queue.append([predecessor] + path) return longest_path_length def calculate_step_to_complete(self) -> int: # Initialize count for incomplete edges incomplete_edges: int = 0 if self.G.nodes[self.sink_node]["passing_count"] is not None: return incomplete_edges # Initialize set to keep track of visited nodes visited = set() # Initialize queue for BFS queue = deque([self.sink_node]) # BFS traversal while queue: # Pop node from queue node = queue.popleft() # Mark the node as visited visited.add(node) incomplete_edges += len(list(self.G.predecessors(node))) # Explore predecessor of the current node for predecessor in self.G.predecessors(node): # If predecessor is complete, skip it if self.G.nodes[predecessor]["passing_count"] is not None: continue elif predecessor not in visited: # Add predecessor to queue queue.append(predecessor) return incomplete_edges def is_complete(self) -> bool: return self.G.nodes[self.sink_node]["passing_count"] is not None def get_completeness(self) -> float: return self.completeness def get_completeness_per_action(self) -> float: return self.completeness_per_action def get_step_to_complete(self) -> int: return self.step_to_complete def get_longest_unfinished_path_length(self) -> int: return self.longest_unfinished_path_length def stat(self) -> dict[str, Any]: return { "total_nodes": self.total_nodes, "complete_nodes": self.complete_nodes, "completeness": self.completeness, "completeness_per_action": self.completeness_per_action, "step_to_complete": self.step_to_complete, "longest_unfinished_path_length": self.longest_unfinished_path_length, } def _check_submit(self, environment: Environment) -> bool: """ Check if the last action is _submit. If yes, return its result, either return False. """ if not environment.trajectory: return False last_action = environment.trajectory[-1] if last_action[0] != "_submit": return False return last_action[2] def compute_radar_stats(self) -> dict[str, float]: longest_path_length = nx.dag_longest_path_length(self.G) return { "Completeness": float(self.completeness), "Efficiency": float(self.completeness_per_action), "Path Completeness Ratio": ( longest_path_length - self.longest_unfinished_path_length ) / longest_path_length, } @staticmethod def visualize(evaluators: list["GraphEvaluator"], path: str): import plotly.graph_objects as go fig = go.Figure() for i, evaluator in enumerate(evaluators): radar_stats = evaluator.compute_radar_stats() fig.add_trace( go.Scatterpolar( r=list(radar_stats.values()), theta=list(radar_stats.keys()), fill="toself", name=f"Graph Evaluator {i}", ) ) fig.update_layout( polar=dict(radialaxis=dict(visible=True, range=[0, 1])), showlegend=True, ) fig.update_layout( margin=dict(l=150, r=150, t=150, b=150), ) fig.write_image(path, scale=12, width=600, height=600) ================================================ FILE: crab/core/models/__init__.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # ruff: noqa: F401 from .action import Action, ClosedAction from .agent_interface import ActionOutput, BackendOutput, Message, MessageType from .benchmark_interface import StepResult from .config import BenchmarkConfig, EnvironmentConfig, VMEnvironmentConfig from .evaluator import Evaluator from .task import GeneratedTask, SubTask, SubTaskInstance, Task __all__ = [ "Action", "ClosedAction", "MessageType", "Message", "ActionOutput", "BackendOutput", "StepResult", "BenchmarkConfig", "Task", "SubTask", "SubTaskInstance", "GeneratedTask", "Evaluator", "EnvironmentConfig", "VMEnvironmentConfig", ] ================================================ FILE: crab/core/models/action.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== from functools import partial from inspect import Parameter, Signature, signature from types import NoneType from typing import Annotated, Any, Callable, TypeAlias from docstring_parser import parse from pydantic import ( AfterValidator, BaseModel, ValidationError, create_model, model_serializer, ) from pydantic.fields import FieldInfo from crab.utils.common import callable_to_base64 try: from typing import Self except ImportError: from typing_extensions import Self KEPT_PARAMS = ["env"] EMPTY_MODEL = create_model("Empty") class Action(BaseModel): """ The core operational unit within the Crab system. This class stores parameters and return type definitions and can be easily converted into a JSON schema. It supports argument verification and includes a feature for retaining specific parameters. Attributes: name (str): The name of the action. entry (Callable): The actual entry function of the action. parameters (type[BaseModel]): Definition of input parameters. returns (type[BaseModel]): Definition of the return type. Note: The actual return type is specified by the `returns` attribute in this model. description (str | None): A clear and concise description of the function's purpose and behavior. Defaults to None. kept_params (dict[str, Any]): Parameters retained for internal use by the Crab system, such as 'env' for storing the current environment. These parameters do not appear in the `parameters` field and are automatically injected at runtime. Defaults to an empty dictionary. env_name (Optinal[str]): Specify the environment the action is associated with. Defualts to None. """ name: str entry: Callable parameters: type[BaseModel] returns: type[BaseModel] description: str | None = None kept_params: list[str] = [] env_name: str | None = None local: bool = False def __eq__(self, other): return super().__eq__(other) def __hash__(self): return hash(self.entry) def __call__(self, *args: Any, **kwargs: Any) -> Self: """Sets default values for the action. Direct calling of the action will not actully call the function, yet set defaults values for the action, so the agent don't need to or only need to provide part of the parameters. This method has two mode, full setting and partial setting. Full setting mode is applied when the user provides positional arguments, where all the required parameters must be provide and the action parameters will be empty. While if only keyword arguments are provided, partial setting mode is applied, where the parameter model will not be changed but only change the default value of the parameters. Note: Full setting mode is not stable. """ if args: # this is closed function result = self.model_copy( update={ "entry": partial(self.entry, *args, **kwargs), "parameters": EMPTY_MODEL, } ) if self.description is not None: result.description = self.description + f" Input: {args} {kwargs}" return result else: # or it should only contain kwargs for key in kwargs: # verify the kwargs exist if key not in self.parameters.model_fields: raise ValueError( f'"{key}" is not a parameter of action "{self.name}"' ) result = self.model_copy( update={ "entry": partial(self.entry, **kwargs), } ) if self.description is not None: result.description = self.description + f" Input: {args} {kwargs}" return result @staticmethod def _check_combinable(a: "Action", b: "Action") -> None: if set(a.kept_params) != set(b.kept_params): raise ValueError("Piped actions should have same kept parameters.") if a.env_name != b.env_name: raise ValueError("Piped actions should have same env_name.") if a.local != b.local: raise ValueError("Piped actions should have same `local` value.") def __rshift__(self, other_action: "Action") -> "Action": """Uses :obj:`>>` to pipe two actions together to form a new action. The returned action executes the actions from left to right. The output of the left action becomes the input to the right action, provided their parameters and return types are compatible. """ required = other_action.get_required_params() if len(required) != 1: raise ValueError( "Return type of the former action must mathces the parameter type " "of the later action." ) Action._check_combinable(self, other_action) a_entry = self.entry b_entry = other_action.entry kept_params = self.kept_params.copy() entry = lambda *args, **kwargs: b_entry( a_entry(*args, **kwargs), **{key: kwargs[key] for key in kwargs if key in kept_params}, ) return Action( name=f"{self.name}_pipe_{other_action.name}", description=f"First {self.description}. Then use the result of the " f"former as input, {other_action.description}", parameters=self.parameters, returns=other_action.returns, entry=entry, kept_params=self.kept_params, env_name=self.env_name, local=self.local, ) def __add__(self, other_action: "Action") -> "Action": """Uses :obj:`+` to combine two actions sequetially to form a new action. The returned action executes the actions from left to right. Its return value will be the return value of the right action. Note: "+" operator only support two action with no required parameters. """ self_required = self.get_required_params() other_required = other_action.get_required_params() if len(other_required) > 1 or len(self_required) > 1: raise ValueError( '"+" operator only support two action with no required parameters.' ) Action._check_combinable(self, other_action) a_entry = self.entry b_entry = other_action.entry entry = lambda **kwargs: (a_entry(**kwargs), b_entry(**kwargs))[1] return Action( name=f"{self.name}_then_{other_action.name}", description=f"{self.description} Then, {other_action.description}", parameters=EMPTY_MODEL, returns=other_action.returns, entry=entry, kept_params=self.kept_params, env_name=self.env_name, local=self.local, ) def run(self, **kwargs) -> Any: """Varifies the action parameters then runes the action.""" if self.kept_params: raise RuntimeError("There are unassigned kept parameters.") try: kwargs = self.parameters(**kwargs).model_dump() except ValidationError: pass # TODO: Exeception handle return self.entry(**kwargs) def set_kept_param(self, **params) -> Self: kept_params = {key: params[key] for key in params if key in self.kept_params} result = self.model_copy() result.kept_params = [] result.entry = partial(self.entry, **kept_params) return result def get_required_params(self) -> dict[str, FieldInfo]: return { name: info for name, info in self.parameters.model_fields.items() if info.is_required() } @model_serializer def to_openai_json_schema(self) -> dict: """Gets openai json schema from an action""" return { "name": self.name, "description": self.description, "parameters": self.parameters.model_json_schema(), # "returns": self.returns.model_json_schema()["properties"]["returns"], } def to_raw_action(self) -> dict[str, Any]: """Gets serialized action for remote execution""" return { "name": self.name, "dumped_entry": callable_to_base64(self.entry), "kept_params": list(self.kept_params), } @classmethod def from_function(cls, func: Callable) -> Self: """Generates an action from functions annotated by @action.""" if func.__doc__ is None: # raise RuntimeError("The action must have a Google-style docstring.") parameters_descriptions = None func_description = None return_description = None else: docstring = parse(func.__doc__) parameters_descriptions = { param.arg_name: param.description for param in docstring.params } func_description = docstring.short_description or "" if docstring.long_description: func_description += "\n" + docstring.long_description if docstring.returns: return_description = docstring.returns.description else: return_description = None sign = signature(func) params = sign.parameters fields = {} kept_params = [] for param_name, p in params.items(): # Don't add kept parameters in prameters' model if param_name in KEPT_PARAMS: kept_params.append(param_name) continue # Variable parameters are not supported if p.kind in [Parameter.VAR_POSITIONAL, Parameter.VAR_KEYWORD]: continue # If the parameter type is not specified, it defaults to typing.Any annotation = Any if p.annotation is Parameter.empty else p.annotation # Check if the parameter has a description param_description = None if parameters_descriptions is not None: param_description = parameters_descriptions.get(param_name, None) # Check if the parameter has a default value if p.default is Parameter.empty: fields[param_name] = ( annotation, FieldInfo(description=param_description), ) else: fields[param_name] = (annotation, FieldInfo(default=p.default)) model: type[BaseModel] = create_model(func.__name__, **fields) # type: ignore # insert return to parameters return_annotation = ( Any if sign.return_annotation == Signature.empty else sign.return_annotation ) return_model: type[BaseModel] = create_model( func.__name__ + "_return", returns=( return_annotation or NoneType, FieldInfo(description=return_description, init=False), # type: ignore ), ) action = cls( name=func.__name__, entry=func, parameters=model, returns=return_model, description=func_description, kept_params=kept_params, ) return action def _check_no_param(action: Action) -> Action: if len(action.get_required_params()) != 0: raise ValueError("ClosedAction should not accept any parameter.") return action ClosedAction: TypeAlias = Annotated[Action, AfterValidator(_check_no_param)] """The action type alias with no reuqired parameters""" ================================================ FILE: crab/core/models/agent_interface.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== from enum import IntEnum from typing import Any from pydantic import BaseModel from .action import Action class MessageType(IntEnum): TEXT = 0 IMAGE_JPG_BASE64 = 1 Message = tuple[str, MessageType] class ActionOutput(BaseModel): name: str arguments: dict[str, Any] env: str | None = None class BackendOutput(BaseModel): message: str | None action_list: list[ActionOutput] | None class EnvironmentInfo(BaseModel): description: str action_space: list[Action] ================================================ FILE: crab/core/models/benchmark_interface.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== from typing import Any from pydantic import BaseModel class StepResult(BaseModel): truncated: bool terminated: bool action_returns: Any evaluation_results: dict[str, Any] info: dict[str, Any] ================================================ FILE: crab/core/models/config.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== from typing import Any from pydantic import BaseModel from .action import Action, ClosedAction from .task import Task class EnvironmentConfig(BaseModel): name: str action_space: list[Action] observation_space: list[ClosedAction] description: str = "" reset: Action | None = None remote_url: str | None = None extra_attributes: dict[str, Any] = {} class VMEnvironmentConfig(BaseModel): inside_environment: EnvironmentConfig remote_url: str = "http://192.168.0.0:8000" class BenchmarkConfig(BaseModel): name: str tasks: list[Task] environments: list[EnvironmentConfig] default_env: str | None = None multienv: bool = False prompting_tools: dict[str, dict[str, Action]] = {} root_action_space: list[Action] = [] step_limit: int = 30 common_setup: list[ClosedAction] = [] ================================================ FILE: crab/core/models/evaluator.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== from pydantic import BaseModel, field_validator from .action import Action class Evaluator(Action): require_submit: bool = False @field_validator("returns", mode="after") @classmethod def must_return_bool(cls, v: type[BaseModel]) -> type[BaseModel]: if v.model_fields["returns"].annotation is not bool: raise ValueError("Evaluator must return bool.") return v def __and__(self, other: "Evaluator") -> "Evaluator": Action._check_combinable(self, other) result = self.model_copy() result.name = (f"{self.name}_and_{other.name}",) result.description = f"{self.description} In the same time, {other.description}" self_entry = self.entry other_entry = other.entry result.entry = lambda: self_entry() and other_entry() return result def __or__(self, other: "Evaluator") -> "Evaluator": Action._check_combinable(self, other) result = self.model_copy() result.name = (f"{self.name}_or_{other.name}",) result.description = ( f"{self.description} If the previous one fails {other.description}" ) self_entry = self.entry other_entry = other.entry result.entry = lambda: self_entry() or other_entry() return result def __invert__(self) -> "Evaluator": result = self.model_copy() result.name = f"not_{self.name}" result.description = ( f"Check if the following description is False. {self.description}" ) self_entry = self.entry result.entry = lambda: not self_entry() return result ================================================ FILE: crab/core/models/task.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== from typing import Any, Callable, Literal from uuid import uuid4 import networkx as nx from pydantic import ( BaseModel, ConfigDict, Field, field_validator, model_serializer, ) from .action import Action, ClosedAction from .evaluator import Evaluator class Task(BaseModel): model_config = ConfigDict(arbitrary_types_allowed=True) id: str description: str evaluator: nx.DiGraph | Evaluator setup: list[ClosedAction] | ClosedAction = [] teardown: list[ClosedAction] | ClosedAction = [] extra_action: list[Action] = [] @field_validator("evaluator") @classmethod def change_evaluator_to_graph(cls, evaluator: nx.DiGraph | Evaluator) -> str: if isinstance(evaluator, Evaluator): graph = nx.DiGraph() graph.add_node(evaluator) return graph return evaluator @field_validator("setup", "teardown") @classmethod def to_list(cls, action: Action | list[Action]) -> list[Action]: if isinstance(action, Action): return [action] return action class SubTask(BaseModel): id: str description: str attribute_dict: dict[str, list[str] | str] output_type: str output_generator: Callable[[Any], str] | Literal["manual"] | None = None evaluator_generator: Callable[[Any], nx.DiGraph] | None = None setup: list[ClosedAction] | ClosedAction = [] teardown: list[ClosedAction] | ClosedAction = [] extra_action: list[Action] = [] def __hash__(self) -> int: return hash(self.id) @field_validator("attribute_dict") @classmethod def expand_attribute_type( cls, attribute_dict: dict[str, list[str] | str], ) -> dict[str, list[str]]: attribute_dict = attribute_dict.copy() for key in attribute_dict: if isinstance(attribute_dict[key], str): attribute_dict[key] = [attribute_dict[key]] return attribute_dict class SubTaskInstance(BaseModel): task: SubTask attribute: dict[str, Any] output: str | None = None id: str = Field(default_factory=uuid4) def __hash__(self) -> int: return hash(self.id) @model_serializer def dump_model(self) -> dict[str, Any]: return { "task": self.task.id, "attribute": self.attribute, "output": self.output, } class GeneratedTask(BaseModel): description: str tasks: list[SubTaskInstance] adjlist: str id: str = Field(default_factory=uuid4) ================================================ FILE: crab/core/task_generator.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # ruff: noqa: E501 import argparse import importlib import itertools import json import os import random from pathlib import Path import networkx as nx import yaml from openai import OpenAI from termcolor import colored from .models import GeneratedTask, SubTask, SubTaskInstance, Task SYSTEM_PROMPT_SINGLE = """ You are a wise operator who is familiar with both the Ubuntu and Android operating systems. Our goal is to use the output of the source task as the input for the target task. You should describe of the task they combined together using several imperative sentences. You cannot provide any extra information such as detailed operation method, yet only combined the taks description together in a reasonable way. You shouldn't fill in the input attribute wrapped by curly brackets. Source task: Find out the city located at coordinate (8.65759263086632, 7.520403498426244) via Google Maps. Target task: Set the screen background as the first figure of {city_name} in Google. Answer: Using Google Maps, find the city located at coordinates (8.65759263086632,7.520403498426244), search Google for the first image of that city, and set this image as the desktop background on an Ubuntu system. """ USER_PROMPT_SINGLE = """ Source task: {task1} Target task: {task2} Answer: """ SELECT_USER_START = """ Source attribute: {source_task} Target tasks: {target_tasks} Select a task from target tasks Answer: """ SELECT_SYSTEM_PROMPT = """ You are a wise operator who is familiar with both the Ubuntu and Android operating systems. Our goal is to use the output of the source task as the input for the target task. You should identify the most reasonable target task from the list, explain why you choose it, and output the description of the task they combined together using several imperative sentences. It is crucial to establish a connection between the source and target tasks and select the best one as the output. Remember, you must select at least one with the crucial output format. You must include the provided value and every details in each task. You must use "======" to seperate each part (selected task number, combined task description, and explanation) Here is an example: Source task: Find out the city located at coordinate (8.65759263086632, 7.520403498426244) via Google Maps. Target tasks: Task 0: Set the screen background as the first figure of {input attribute} in Google. Task 1: Close the progress of {input attribute} app via task manager. Task 2: Download {input attribute} from the app store. Task 3: Create a PowerPoint with one page containing Mount Alps.jpg and named as {input attribute 2}. Task 4: Send message {input attribute 1} to +81 09074540472. Answer: 0 ====== Using Google Maps, find the city located at coordinates (8.65759263086632,7.520403498426244), search Google for the first image of that city, and set this image as the desktop background on an Ubuntu system. ====== This task is the most relevant and directly utilizes the output of the source task. Finding the city provides us with a specific location which can easily lead to a visual representation. Searching for an image of the city to set as a background is a practical application that visually celebrates the discovery of the city's identity. """ SELECT_USER_PROMPT = """ Source task: {source_task} Target tasks: {target_tasks} Answer: """ class TaskGenerator: """Class to generate tasks based on a directed graph of subtasks.""" def __init__( self, attribute_pool: dict[str, list] = {}, subtasks: list[SubTask] = [] ): """ Initializes the TaskGenerator object. Parameters: attribute_pool (dict): A dictionary mapping attribute types to lists of possible values. subtasks (list): A list of SubTask objects to be included in the task generation graph. """ self.G = nx.DiGraph() self.attribute_pool = attribute_pool self.graph_generation(subtasks) self.task_mapping = {task.id: task for task in subtasks} if not os.getenv("OPENAI_API_KEY"): os.environ["OPENAI_API_KEY"] = "EMPTY" self.client = OpenAI() @classmethod def from_config(cls, config_path: str) -> "TaskGenerator": """ Class method to create a TaskGenerator instance from a configuration file. Parameters: config_path (str): Path to the YAML configuration file. Returns: TaskGenerator: An instance of TaskGenerator. """ with open(config_path, "r") as f: data = yaml.safe_load(f) subtask_data = data["subtask"] attribute_pool = data["attribute_pool"] subtask_list = [ SubTask( id=subtask["id"], description=subtask["description"], attribute_dict={ key: subtask["attribute_dict"][key].split("/") for key in subtask["attribute_dict"] }, output_type=subtask["output_type"], ) for subtask in subtask_data ] return cls(attribute_pool, subtask_list) def graph_generation(self, subtask_list: list[SubTask]) -> None: """Generates a directed graph from a list of subtasks based on output and input types.""" self.G.add_nodes_from(subtask_list) for input_node in self.G.nodes: for output_node in self.G.nodes: for name, type_list in output_node.attribute_dict.items(): for type in type_list: if type == input_node.output_type: self.G.add_edge( input_node, output_node, attribute_name=name ) def combine(self, current_description: str, target_description: str) -> str: """ Combines two task descriptions into a single task description using GPT model. Parameters: current_description (str): The current task description. target_description (str): The target task description to combine. Returns: str: The combined task description. """ user_content = USER_PROMPT_SINGLE.format( task1=current_description, task2=target_description ) response = self.client.chat.completions.create( messages=[ {"role": "system", "content": SYSTEM_PROMPT_SINGLE}, {"role": "user", "content": user_content}, ], model="gpt-4-turbo-preview", ) return response.choices[0].message.content def gpt_choice( self, current_description: str, outgoing_edges: list[tuple[SubTask, SubTask, str]], ) -> tuple[SubTask, dict[str, str], str, str]: """ Determines the best task choice from a list of possible target tasks using GPT model. Parameters: current_description (str): Description of the current task. outgoing_edges (list): List of possible outgoing edges representing target tasks. Returns: tuple: A tuple containing the chosen SubTask, attributes, new description, and combined description. """ target_neighbours = "" selected_attributes = [] new_descriptions = [] for idx, edge in enumerate(outgoing_edges): _, node, attribute_name = edge attributes = self._fill_task_attributes(node, attribute_name) selected_attributes.append(attributes) kwargs = attributes.copy() kwargs[attribute_name] = "{" + attribute_name + "}" new_description = node.description.format(**kwargs) new_descriptions.append(new_description) target_neighbours += "Task {0}: {1}\n".format(idx, new_description) user_content = SELECT_USER_PROMPT.format( source_task=current_description, target_tasks=target_neighbours, ) response = self.client.chat.completions.create( messages=[ {"role": "system", "content": SELECT_SYSTEM_PROMPT}, {"role": "user", "content": user_content}, ], model="gpt-4-turbo-preview", ) response_message = response.choices[0].message answers = response_message.content.split("======") index = int(answers[0].strip()) combined_description = answers[1].strip() return ( outgoing_edges[index][1], selected_attributes[index], new_descriptions[index], combined_description, ) def random_walk( self, current_description: str, start_node: SubTask, random_number: int ) -> tuple[SubTask, dict[str, str]] | None: """ Performs a random walk from the starting node to generate a task sequence. Parameters: current_description (str): The current task description. start_node (SubTask): The starting subtask node. random_number (int): Maximum number of edges to consider. Returns: tuple | None: A tuple containing the next SubTask, attributes if a next step is available, otherwise None. """ out_edges = list(self.G.out_edges(start_node, data="attribute_name")) if len(out_edges) == 0: print(colored("\n*** No neighbour points, generation stopped ***\n", "red")) return None if start_node.output_type == "None": print(colored("\n*** Output None, generation will stop ***\n", "red")) return None if random_number <= len(out_edges): select_edge_list = random.sample(out_edges, random_number) else: select_edge_list = out_edges return self.gpt_choice(current_description, select_edge_list) def _fill_task_attributes(self, task: SubTask, kept_attribute: str): """ Fills the task attributes by randomly selecting values from the attribute pool, except the kept attribute. Parameters: task (SubTask): The task whose attributes need to be filled. kept_attribute (str): The attribute to exclude from filling. Returns: dict: A dictionary of filled attributes. """ attribute_types = task.attribute_dict.copy() attribute_types.pop(kept_attribute) return self._select_random_attributes(attribute_types) def _select_random_attributes(self, type_dict: dict[str, str]) -> dict[str, str]: """ Randomly selects attributes for a task from the attribute pool based on the type dictionary. Parameters: type_dict (dict): A dictionary of attribute types to attribute names. Returns: dict: A dictionary of selected attributes. """ result = {} for attr_name, attr_type_list in type_dict.items(): pool = [] for attr_type in attr_type_list: if attr_type not in self.attribute_pool: raise ValueError(f"{attr_type} not in attribute pool.") pool.extend(self.attribute_pool[attr_type]) result[attr_name] = random.choice(pool) return result @staticmethod def generate_single_node_task(subtask: SubTask): """ Generates a single node task based on a SubTask instance. Parameters: subtask (SubTask): The subtask to generate a task for. Returns: tuple: A tuple containing the task description and a directed graph of the task. """ print(colored(f"Generating task: {subtask.description}\n", "green")) attributes = {} for name, type_name in subtask.attribute_dict.items(): value = input( colored(f'Input attribute "{name}" ({type_name}): ', "yellow") ) attributes[name] = value description = subtask.description.format(**attributes) result_graph = nx.DiGraph() result_graph.add_node(SubTaskInstance(task=subtask, attribute=attributes)) return description, result_graph def combine_subtask_list(self, subtask_list: list[SubTask]): """ Combines a list of subtasks into a single task sequence. Parameters: subtask_list (list): A list of SubTask instances to combine. Returns: tuple: A tuple containing the final task description and a directed graph of the task sequence. """ start_node = subtask_list[0] attributes = self._select_random_attributes(start_node.attribute_dict) result_graph = nx.DiGraph() output = input( colored( f"What is the output of {start_node.description.format(**attributes)}: ", "yellow", ) ) last_node = SubTaskInstance( task=start_node, attribute=attributes, output=output or None ) result_graph.add_node(last_node) current_description = start_node.description.format(**attributes) for task in subtask_list[1:]: current_description = self.combine(current_description, task.description) key = next(iter(task.attribute_dict.keys())) attributes = {key: output} output = input( colored( f"What is the output of {task.description.format(**attributes)}: ", "yellow", ) ) current_node = SubTaskInstance( task=task, attribute=attributes, output=output or None ) result_graph.add_edge(last_node, current_node) last_node = current_node return current_description, result_graph def combine_two_subtasks( self, sub_task_id_1: int, sub_task_id_2: int ) -> tuple[str, nx.DiGraph]: """ Combines two subtasks into a single task sequence based on user input. Parameters: sub_task_id_1 (int): ID of the first subtask. sub_task_id_2 (int): ID of the second subtask. Returns: tuple: A tuple containing the combined task description and a directed graph of the task sequence. """ sub_task_1 = self.task_mapping[sub_task_id_1] sub_task_2 = self.task_mapping[sub_task_id_2] print(colored(f"\nTask 1: {sub_task_1.description}", "cyan")) print(colored(f"Task 2: {sub_task_2.description}\n", "cyan")) attributes_1 = {} for name, types in sub_task_1.attribute_dict.items(): value = input( colored( f'Input attribute "{name}" ({types}) for the first task: ', "yellow" ) ) attributes_1[name] = value description_1 = sub_task_1.description.format(**attributes_1) output_1 = input( colored( f'What is the output of {description_1} ("{sub_task_1.output_type}"): ', "yellow", ) ) print( colored( f"\nThe output type of the first subtask is '{sub_task_1.output_type}'.\n", "cyan", ) ) attributes_2 = {} for name, types in sub_task_2.attribute_dict.items(): if ( sub_task_1.output_type in types or input( colored( f"Can the output '{sub_task_1.output_type}' be used as the '{name}' ({types}) of the second task? (yes/no): ", "yellow", ) ) .strip() .lower() == "yes" ): attributes_2[name] = output_1 else: value = input( colored( f'Input attribute "{name}" ({types}) for the second task: ', "yellow", ) ) attributes_2[name] = value description_2 = sub_task_2.description.format(**attributes_2) while True: combined_description = self.combine(description_1, description_2) print( colored(f"\n*** Combined Task: {combined_description} ***\n", "green") ) if ( input( colored( "Do you want to re-generate the combined task? (yes/no): ", "yellow", ) ) .strip() .lower() != "yes" ): break result_graph = nx.DiGraph() node1 = SubTaskInstance( task=sub_task_1, attribute=attributes_1, output=output_1 ) node2 = SubTaskInstance(task=sub_task_2, attribute=attributes_2) result_graph.add_node(node1) result_graph.add_node(node2) result_graph.add_edge(node1, node2) return combined_description, result_graph def task_generation( self, start_id: int | None = None, max_iter: int = 3, random_number: int = 5, ) -> tuple[str, list[SubTask]]: """ Generates a sequence of tasks starting from a given subtask ID or randomly. Parameters: start_id (int | None): The ID of the starting subtask or None to choose randomly. max_iter (int): The maximum number of iterations to perform in the generation process. random_number (int): The maximum number of neighbors to consider for random walk. Returns: tuple: A tuple containing the final task description and a list of SubTask objects. """ description = "" task_list = [] if start_id is None: start_node: SubTask = random.choice(list(self.G.nodes)) else: for node in self.G.nodes: if node.id == start_id: start_node: SubTask = node break attributes = self._select_random_attributes(start_node.attribute_dict) description = start_node.description.format(**attributes) task_list.append((start_node, attributes, description)) current_node = start_node for _ in range(max_iter - 1): next_node = self.random_walk( current_description=description, start_node=current_node, random_number=random_number, ) if next_node is None: break task_list.append(next_node) description = next_node[3] current_node = next_node[0] return description, task_list @staticmethod def generate_evaluator( subtasks_graph: nx.DiGraph, ): """ Generates an evaluator graph from a directed graph of subtask instances. Parameters: subtasks_graph (nx.DiGraph): A directed graph of subtask instances. Returns: nx.DiGraph: A directed graph representing the combined evaluator. """ evaluator_map = {} for node in subtasks_graph.nodes: evaluator_map[node.id] = node.task.evaluator_generator(**node.attribute) combined_evaluator_graph = nx.union_all(list(evaluator_map.values())) for from_node, to_node in subtasks_graph.edges: from_node_evaluator = evaluator_map[from_node.id] sink_nodes = [ node for node, out_degree in from_node_evaluator.out_degree() if out_degree == 0 ] to_node_evaluator = evaluator_map[to_node.id] start_nodes = [ node for node, in_degree in to_node_evaluator.in_degree() if in_degree == 0 ] combined_evaluator_graph.add_edges_from( itertools.product(sink_nodes, start_nodes) ) return combined_evaluator_graph @staticmethod def dump_generated_task( description, task_instance_graph, dir_path=".", ): """ Saves a generated task to a file. Parameters: description (str): The description of the generated task. task_instance_graph (nx.DiGraph): The directed graph of the task instance. dir_path (str): The directory path where the task file will be saved. """ mapping = {node: idx for idx, node in enumerate(task_instance_graph.nodes)} id_graph = nx.relabel_nodes(task_instance_graph, mapping) generated_task = GeneratedTask( description=description, tasks=list(task_instance_graph.nodes), adjlist="\n".join(nx.generate_adjlist(id_graph)), ) file_path = Path(dir_path) / f"{generated_task.id}.json" with open(file_path, "w") as f: f.write(generated_task.model_dump_json(indent=4)) print( colored( "\n====================================================================\n", "magenta", ) ) print(colored(f"Task saved to: {file_path}", "magenta")) def get_task_from_file(self, file_name) -> Task: """ Loads a task from a file. Parameters: file_name (str): The file name containing the task data. Returns: Task: An instance of Task loaded from the file. """ with open(file_name, "r") as f: config = json.load(f) description = config["description"] graph_map = {} for idx, task_config in enumerate(config["tasks"]): graph_map[idx] = SubTaskInstance( task=self.task_mapping[task_config["task"]], attribute=task_config["attribute"], output=task_config["output"], ) lines = config["adjlist"].split("\n") graph = nx.parse_adjlist(lines, nodetype=int) subtask_graph = nx.relabel_nodes(graph, graph_map) evaluator = self.generate_evaluator(subtask_graph) setup_set = set() teardown_set = set() extra_action_set = set() for node in subtask_graph.nodes: setup_set.update(node.task.setup) teardown_set.update(node.task.teardown) extra_action_set.update(node.task.extra_action) return Task( id=config["id"], description=description, evaluator=evaluator, setup=list(setup_set), teardown=list(teardown_set), extra_action=list(extra_action_set), ) def load_subtasks(version): """ Loads subtasks from specified benchmark version modules. Parameters: version (str): The version of the benchmark to load subtasks from. Returns: tuple: A tuple containing two collections of subtasks. """ a_subtasks_module = importlib.import_module( f"benchmarks.crab-benchmark-{version}.subtasks.a_subtasks" ) u_subtasks_module = importlib.import_module( f"benchmarks.crab-benchmark-{version}.subtasks.u_subtasks" ) return a_subtasks_module.collection, u_subtasks_module.collection def generate_length1_all( generator: TaskGenerator, dir_path: str, subtask_collection: list ): """ Generates tasks for all subtasks in a collection and saves them. Parameters: generator (TaskGenerator): The task generator instance. dir_path (str): The directory path where the tasks will be saved. subtask_collection (list): The collection of subtasks to generate tasks for. """ for task in subtask_collection: description, graph = generator.generate_single_node_task(task) generator.dump_generated_task(description, graph, dir_path) print( colored( "\n==================== Task Generation Completed ====================\n", "magenta", ) ) def generate_length1_by_id(generator: TaskGenerator, dir_path: str): """ Generates a single task for a specified subtask ID and saves it. Parameters: generator (TaskGenerator): The task generator instance. dir_path (str): The directory path where the task will be saved. """ while True: subtask_id = input(colored("Please input the subtask ID: ", "yellow")) if subtask_id in generator.task_mapping: task = generator.task_mapping[subtask_id] print() description, graph = generator.generate_single_node_task(task) generator.dump_generated_task(description, graph, dir_path) print( colored( "\n==================== Task Generation Completed ====================\n", "magenta", ) ) else: print(colored("Invalid subtask ID. Please try again.", "red")) def generate_length2_manual(generator: TaskGenerator, dir_path: str): """ Manually generates a two-step task sequence from user-specified subtask IDs and saves it. Parameters: generator (TaskGenerator): The task generator instance. dir_path (str): The directory path where the task sequence will be saved. """ while True: sub_task_id_1 = input( colored("Please input the id of the first subtask: ", "yellow") ) sub_task_id_2 = input( colored("Please input the id of the second subtask: ", "yellow") ) if ( sub_task_id_1 in generator.task_mapping and sub_task_id_2 in generator.task_mapping ): description, graph = generator.combine_two_subtasks( sub_task_id_1=sub_task_id_1, sub_task_id_2=sub_task_id_2 ) generator.dump_generated_task(description, graph, dir_path) print( colored( "\n==================== Task Composition Completed ====================\n", "magenta", ) ) else: missing_ids = [ id for id in [sub_task_id_1, sub_task_id_2] if id not in generator.task_mapping ] print( colored( f"Invalid input: ID {', '.join(missing_ids)} not found. Please try again.", "red", ) ) def main(): parser = argparse.ArgumentParser(description="Task Generator for CRAB Benchmarks") parser.add_argument( "--version", type=str, default="v0", help="Benchmark version (e.g., v0, v1)" ) parser.add_argument( "--mode", type=str, choices=[ "generate_length1_all", "generate_length2_manual", "generate_length1_by_id", ], help="Mode to run the task generator", ) parser.add_argument( "--dir_path", type=str, help="Directory path to save the generated tasks" ) parser.add_argument( "--config_path", type=str, help="Path to the task generation configuration file" ) args = parser.parse_args() Path(args.dir_path).mkdir(parents=True, exist_ok=True) a_collection, u_collection = load_subtasks(args.version) all_collection = u_collection + a_collection print( colored( "\n==================== Task Generation Starting ====================\n", "magenta", ) ) if args.mode == "generate_length1_all": generator = TaskGenerator(subtasks=all_collection) generate_length1_all(generator, args.dir_path, all_collection) elif args.mode == "generate_length2_manual": with open(args.config_path, "r") as f: data = yaml.safe_load(f) attribute_pool = data["attribute_pool"] generator = TaskGenerator(attribute_pool, all_collection) generate_length2_manual(generator, args.dir_path) elif args.mode == "generate_length1_by_id": generator = TaskGenerator(subtasks=all_collection) generate_length1_by_id(generator, args.dir_path) else: print( colored( "Invalid mode selected. Please choose 'generate_length1_all', 'generate_length2_manual', or 'generate_length1_by_id'.", "red", ) ) if __name__ == "__main__": main() ================================================ FILE: crab/environments/__init__.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== ================================================ FILE: crab/environments/template.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== from crab.core import Environment, EnvironmentConfig, action @action def set_state(value: bool, env: Environment) -> None: """ Set system state to the given value. Args: value (bool): The given value to set the system state. """ env.state = value @action def current_state(env: Environment) -> bool: """ Get current system state. """ return env.state template_environment_config = EnvironmentConfig( name="template_env", action_space=[set_state], observation_space=[current_state], description="A test environment", info=None, reset=set_state(False), ) ================================================ FILE: crab/server/__init__.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== ================================================ FILE: crab/server/api.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import json from fastapi import APIRouter, Request from fastapi.responses import JSONResponse, PlainTextResponse from crab.utils import ( base64_to_callable, decrypt_message, encrypt_message, generate_key_from_env, ) from .logger import crab_logger as logger api_router = APIRouter() @api_router.post("/raw_action") async def raw_action(request: Request): """Perform the specified action with given parameters.""" enc_key = generate_key_from_env() # Extract query parameters as a dictionary request_content = await request.body() request_content = request_content.decode("utf-8") if enc_key is not None: request_content = decrypt_message(request_content, enc_key) request_json = json.loads(request_content) action = request_json["action"] parameters = request_json["parameters"] entry = base64_to_callable(action["dumped_entry"]) logger.info(f"remote action: {action['name']} received. parameters: {parameters}") if "env" in action["kept_params"]: parameters["env"] = request.app.environment resp_data = {"action_returns": entry(**parameters)} if enc_key is None: return JSONResponse(content=resp_data) else: encrypted = encrypt_message(json.dumps(resp_data), enc_key) return PlainTextResponse(content=encrypted) ================================================ FILE: crab/server/config.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import argparse from pydantic_settings import BaseSettings class Settings(BaseSettings): HOST: str = "127.0.0.1" PORT: int = 8000 ENVIRONMENT: str = "template_environment_config" class EnvSettings(BaseSettings): DISPLAY: str = ":0" def parse_args(): parser = argparse.ArgumentParser(description="Application settings") parser.add_argument("--HOST", type=str, help="Host of the application") parser.add_argument("--PORT", type=int, help="Port of the application") parser.add_argument("--ENVIRONMENT", type=str, help="Environment to be loaded") return parser.parse_args() ================================================ FILE: crab/server/exception_handlers.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import sys from fastapi import Request from fastapi.exception_handlers import ( request_validation_exception_handler as _request_validation_exception_handler, ) from fastapi.exceptions import RequestValidationError from fastapi.responses import JSONResponse, PlainTextResponse from .logger import crab_logger as logger async def request_validation_exception_handler( request: Request, exc: RequestValidationError ) -> JSONResponse: """ This is a wrapper to the default RequestValidationException handler of FastAPI. This function will be called when client input is not valid. """ body = await request.body() query_params = request.query_params._dict # pylint: disable=protected-access detail = { "errors": exc.errors(), "body": body.decode(), "query_params": query_params, } logger.info(detail) return await _request_validation_exception_handler(request, exc) async def unhandled_exception_handler( request: Request, exc: Exception ) -> PlainTextResponse: """ This middleware will log all unhandled exceptions. Unhandled exceptions are all exceptions that are not HTTPExceptions or RequestValidationErrors. """ host = getattr(getattr(request, "client", None), "host", None) port = getattr(getattr(request, "client", None), "port", None) url = ( f"{request.url.path}?{request.query_params}" if request.query_params else request.url.path ) exception_type, exception_value, exception_traceback = sys.exc_info() exception_name = getattr(exception_type, "__name__", None) logger.error( f'{host}:{port} - "{request.method} {url}" 500 Internal Server Error ' f"<{exception_name}: {exception_value}>" ) return JSONResponse( status_code=500, content={ "error": "Internal Server Error", "message": "An unexpected error occurred.", }, ) ================================================ FILE: crab/server/logger.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import logging uvicorn_logger = logging.getLogger("uvicorn") uvicorn_logger.setLevel(logging.INFO) crab_logger = logging.getLogger("crab-server") crab_logger.setLevel(logging.INFO) LOGGING_CONFIG = { "version": 1, "disable_existing_loggers": False, "formatters": { "default": { "()": "uvicorn.logging.DefaultFormatter", "format": "[%(asctime)s %(process)d:%(threadName)s] %(name)s - " "%(levelname)s - %(message)s | %(filename)s:%(lineno)d", }, "logformat": { "format": "[%(asctime)s %(process)d:%(threadName)s] %(name)s - " "%(levelname)s - %(message)s | %(filename)s:%(lineno)d" }, }, "handlers": { "file_handler": { "class": "logging.FileHandler", "level": "INFO", "formatter": "logformat", "filename": "info.log", "encoding": "utf8", "mode": "a", }, "default": { "formatter": "default", "class": "logging.StreamHandler", "stream": "ext://sys.stderr", }, }, "loggers": { "uvicorn.error": { "level": "INFO", "handlers": ["default", "file_handler"], "propagate": False, } }, "root": { "level": "INFO", "handlers": ["default", "file_handler"], "propagate": False, }, } ================================================ FILE: crab/server/main.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import os import uvicorn from fastapi import FastAPI from fastapi.exceptions import RequestValidationError from crab import EnvironmentConfig, create_environment from .api import api_router from .config import EnvSettings, Settings, parse_args from .exception_handlers import ( request_validation_exception_handler, unhandled_exception_handler, ) from .logger import LOGGING_CONFIG from .middleware import log_request_middleware from .utils import get_benchmarks_environments def init(environment_config: EnvironmentConfig) -> FastAPI: app = FastAPI(title="Desktop Agent Benchmark Environment Server") app.middleware("http")(log_request_middleware) app.add_exception_handler( RequestValidationError, request_validation_exception_handler ) app.add_exception_handler(Exception, unhandled_exception_handler) app.include_router(api_router) app.environment = create_environment(environment_config) return app if __name__ == "__main__": env_settings = EnvSettings() for field in env_settings.model_fields.keys(): value = getattr(env_settings, field) os.environ[field] = value args = parse_args() kwargs = {k: v for k, v in vars(args).items() if v is not None} settings = Settings(**kwargs) benchmarks, environments = get_benchmarks_environments() app = init(environment_config=environments[settings.ENVIRONMENT]) app.server_settings = settings uvicorn.run( app, host=settings.HOST, port=settings.PORT, access_log=False, log_config=LOGGING_CONFIG, ) ================================================ FILE: crab/server/middleware.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import http import time from fastapi import Request from .logger import uvicorn_logger as logger async def log_request_middleware(request: Request, call_next): """ This middleware will log all requests and their processing time. E.g. log: 0.0.0.0:1234 - GET /ping 200 OK 1.00ms """ url = ( f"{request.url.path}?{request.query_params}" if request.query_params else request.url.path ) start_time = time.time() response = await call_next(request) process_time = (time.time() - start_time) * 1000 formatted_process_time = "{0:.2f}".format(process_time) host = getattr(getattr(request, "client", None), "host", None) port = getattr(getattr(request, "client", None), "port", None) try: status_phrase = http.HTTPStatus(response.status_code).phrase except ValueError: status_phrase = "" logger.info( f'{host}:{port} - "{request.method} {url}" {response.status_code} ' f"{status_phrase} {formatted_process_time}ms" ) return response ================================================ FILE: crab/server/utils.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import importlib import inspect import pkgutil def get_instances(package, class_type): instance_dict = {} # Iterate through all modules in the specified package for _, name, ispkg in pkgutil.iter_modules( package.__path__, package.__name__ + "." ): if ispkg: continue # Skip subpackages module = importlib.import_module(name) for name, obj in inspect.getmembers(module): if isinstance(obj, class_type): instance_dict[name] = obj return instance_dict def get_benchmarks_environments(): from crab import BenchmarkConfig, EnvironmentConfig, benchmarks, environments benchmark_configs = get_instances(benchmarks, BenchmarkConfig) environment_configs = get_instances(environments, EnvironmentConfig) return benchmark_configs, environment_configs ================================================ FILE: crab/utils/__init__.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== from crab.utils.common import ( base64_to_callable, base64_to_image, callable_to_base64, image_to_base64, ) from crab.utils.encryption import ( decrypt_message, encrypt_message, generate_key_from_env, ) __all__ = [ "base64_to_image", "image_to_base64", "callable_to_base64", "base64_to_callable", "decrypt_message", "encrypt_message", "generate_key_from_env", ] ================================================ FILE: crab/utils/common.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import base64 from io import BytesIO from typing import Callable import dill from PIL import Image def base64_to_image(encoded: str) -> Image.Image: return Image.open(BytesIO(base64.b64decode(encoded))) def image_to_base64(image: Image.Image) -> str: img_byte_arr = BytesIO() image.save(img_byte_arr, format="png") return base64.b64encode(img_byte_arr.getvalue()).decode("utf-8") def callable_to_base64(func: Callable) -> str: return base64.b64encode(dill.dumps(func, recurse=True)).decode("utf-8") def base64_to_callable(encoded: str) -> Callable: return dill.loads(base64.b64decode(encoded)) def json_expand_refs(schema: dict | list, defs: dict | None = None): """Recursively expand `$ref` and `allOf` in the JSON. This function walks through the schema object, replacing any `$ref` with its corresponding definition found in `$defs`. It also expands subschemas defined in `allOf` by merging their resolved definitions into a single schema. Args: schema: The JSON schema (or sub-schema). defs: The collection of definitions for `$ref` expansion. If None, it will look for `$defs` at the root of the schema. Returns: The schema with all `$ref` and `allOf` expanded. Raises: ValueError: If a reference cannot be resolved with the provided `$defs`. """ # If defs is None, it means we're at the root of the schema if defs is None: defs = schema.pop("$defs", {}) if isinstance(schema, dict): # Process `$ref` by replacing it with the referenced definition if "$ref" in schema: ref_path = schema["$ref"].split("/") ref_name = ref_path[-1] if ref_name in defs: return json_expand_refs(defs[ref_name], defs) else: raise ValueError(f"Reference {schema['$ref']} not found in $defs.") # Process `allOf` by combining all subschemas elif "allOf" in schema: combined_schema = {} for subschema in schema["allOf"]: expanded_subschema = json_expand_refs(subschema, defs) # Merge the expanded subschema into the combined_schema for key, value in expanded_subschema.items(): combined_schema[key] = value return combined_schema # Recursively process all keys in the dictionary else: return {key: json_expand_refs(value, defs) for key, value in schema.items()} elif isinstance(schema, list): # Recursively process each item in the list return [json_expand_refs(item, defs) for item in schema] # If it's neither a dict nor a list, return it as is (e.g., int, str) return schema ================================================ FILE: crab/utils/encryption.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import base64 import hashlib import logging import os from typing import Optional from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes logger = logging.getLogger("encryption") def encrypt_message(plaintext: str, key: bytes) -> str: """Encrypts a message using a key with AES 256 encryption. Args: plaintext (str): The message to encrypt. key (bytes): The encryption key, should be 256 bits. Returns: str: The encrypted message encoded in base64. """ nounce = os.urandom(12) cipher = Cipher(algorithms.AES(key), modes.GCM(nounce), backend=default_backend()) encryptor = cipher.encryptor() ciphertext = encryptor.update(plaintext.encode()) + encryptor.finalize() return base64.b64encode(nounce + ciphertext + encryptor.tag).decode("utf-8") def decrypt_message(encrypted: str, key: bytes) -> str: """Decrypts an encrypted message using a key with AES 256 encryption. Args: encrypted (str): The encrypted message encoded in base64. key (bytes): The encryption key, should be 256 bits. Returns: str: The decrypted message. """ encrypted = base64.b64decode(encrypted) nounce = encrypted[:12] ciphertext = encrypted[12:-16] tag = encrypted[-16:] cipher = Cipher( algorithms.AES(key), modes.GCM(nounce, tag), backend=default_backend() ) decryptor = cipher.decryptor() return (decryptor.update(ciphertext) + decryptor.finalize()).decode("utf-8") def generate_key_from_env() -> Optional[bytes]: """Generate the encryption key from the environment variable `CRAB_ENC_KEY`. Returns: Optional[bytes]: The encryption key. If the environment variable is not set or empty, return None. """ enc_key = os.environ.get("CRAB_ENC_KEY") # don't encrypt as long as the key is an empty value if not enc_key: logger.warning("CRAB_ENC_KEY is not set, connection will not be encrypted.") return None return hashlib.sha256(enc_key.encode("utf-8")).digest() ================================================ FILE: crab/utils/measure.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import logging import time from functools import wraps logger = logging.getLogger(__name__) # Misc logger setup so a debug log statement gets printed on stdout. handler = logging.StreamHandler() log_format = "%(asctime)s %(levelname)s -- %(message)s" formatter = logging.Formatter(log_format) handler.setFormatter(formatter) logger.addHandler(handler) def timed(func): """This decorator prints the execution time for the decorated function.""" @wraps(func) def wrapper(*args, **kwargs): start = time.time() result = func(*args, **kwargs) end = time.time() func_class = args[0].__class__.__name__ if args else "" info = "{}.{} ran in {}s".format( func_class, func.__name__, round(end - start, 2), ) if hasattr(args[0], "name"): info += f" with name {args[0].name}" logger.info(info) return result return wrapper ================================================ FILE: crab-benchmark-v0/README.md ================================================ # Crab Benchmark v0 ## Overview `crab-benchmark-v0` is a benchmark released with the crab framework to provide a standard usage. It includes two virtual machine environments: an Android smartphone and an Ubuntu desktop computer, with 100 tasks and 59 different evaluator functions in the dataset. It effectively evaluates the MLM-based agents' performance on operating real-world tasks across multiple platforms. ## Get Started Our benchmark contains two important parts: **Environments** and **Tasks**. #### Environments Since our Ubuntu environment is built upon KVM, setting it up locally requires you an experienced Linux user to deal with many small and miscellaneous issues. Therefore, we provide two environment setup methods: * [Local setup](./docs/environment_local_setup.md) provides you a step-by-step guideline to build environments on a Linux Machine with **at least one monitor and 32G memory**, but it doesn't cover details like how to install KVM on your machine because they are various on different Linux distros. * For those who want a quicker setup, we also provide a setup through [Google Clould Platform](./docs/environment_gcp_setup.md). Specifically, we publish a disk image contains all required software and configurations on google cloud, you can use your own google account to create a cloud computer through this disk image and use [google remote desktop](https://remotedesktop.google.com/access/) to connect to it. This method doesn't have any hardware limitations and when you set it up you can run the experiment immediately. As a tradeoff, the cloud computer that meets the minimum hardware requirement costs around $0.4 per hour (depend on the machine zone). We connect to the Android environment via ADB, so any Android device, from an emulator to a physical smartphone, will work. You should ensure ADB is installed on your system and can be directly called through the command line. In our experiment, we used the built-in emulator of [Android Studio](https://developer.android.com/studio) to create a Google Pixel 8 Pro virtual device with the release name \textit{R} and installed necessary extra Apps. #### Tasks We manage our task dataset using a CRAB-recommended method. Sub-tasks are defined through Pydantic models written in Python code, and composed tasks are defined in JSON format, typically combining several sub-tasks. The sub-tasks are defined in [android_subtasks](./dataset/android_subtasks.py) and [ubuntu_subtasks](./dataset/ubuntu_subtasks.py). The JSON files storing composed tasks are categorized into [android](./dataset/android/), [ubuntu](./dataset/ubuntu/), and [cross-platform](./dataset/cross/). The tasks in android and ubuntu directories are single-environment task and those in cross directory are cross-environment tasks. Additionally, we create several tasks by hand instead of composing sub-tasks to provide semantically more meaningful tasks, which are found in [handmade tasks](./dataset/handmade_tasks.py). ## Experiment After setting up the environment, you can start the experiment. A brief overview of the experiment is as follows: 1. Open the Ubuntu environment virtual machine and the Android environment emulator. 2. Start the CRAB server in the Ubuntu environment and get its IP address and port. Let's say they are `192.168.122.72` and `8000`. 3. Choose a task. As an example, we take the task with ID `a3476778-e512-40ca-b1c0-d7aab0c7f18b` from [handmade_tasks](./dataset/handmade_tasks.py). The task is: "Open the 'Tasks' app on Android, check the first incomplete task, then perform the task according to its description." 4. Run [main.py](./main.py) with the command `poetry run python -m crab-benchmark-v0.main --model gpt4o --policy single --remote-url http://192.168.122.72:8000 --task-id a3476778-e512-40ca-b1c0-d7aab0c7f18b`. In this command, `--model gpt4o` and `--policy single` determine the agent system, `--remote-url` specifies the Ubuntu environment interface, and `--task-id` indicates the task to be performed. #### Model For open source models, we use [VLLM](https://github.com/vllm-project/vllm) to host Pixtral model, check [here](https://docs.vllm.ai/en/latest/models/vlm.html#online-inference) for the setup commands; [SGLang](https://github.com/sgl-project/sglang) to host LLaVa-OneVision model, check [here](https://github.com/sgl-project/sglang?tab=readme-ov-file#supported-models) for the setup commands. ================================================ FILE: crab-benchmark-v0/__init__.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== ================================================ FILE: crab-benchmark-v0/android_env.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== from crab import EnvironmentConfig from crab.actions.android_actions import ( key_press, long_tap, open_app_drawer, screenshot, setup, swipe, tap, write_text, ) ANDROID_ENV = EnvironmentConfig( name="android", action_space=[tap, key_press, long_tap, write_text, swipe, open_app_drawer], observation_space=[screenshot], description="""A Google Pixel smartphone runs on the Android operating system. \ The interface displays a current screenshot at each step and primarily \ supports interaction through tapping and typing. This device offers a suite \ of standard applications including Phone, Photos, Camera, Chrome, and \ Calendar, among others. Access the app drawer to view all installed \ applications on the device. The Google account is pre-logged in, synchronized \ with the same account used in the Ubuntu environment.""", extra_attributes={"device": None}, reset=setup, ) ================================================ FILE: crab-benchmark-v0/dataset/android/1005c437-50d1-465a-b3fc-833098b22bfc.json ================================================ { "description": "In the Android operating system, use the \"Google Map\" app to find the city name corresponding to the postal code \"63002\" in South Korea, then use the \"Calendar\" app to add a new all-day event for 1 January 2025 with the text of the found city name.", "tasks": [ { "task": "51b2463c-9904-4a32-81ba-507bfb89d61f", "attribute": { "number": "63002", "country": "South Korea" }, "output": "Jeju" }, { "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ac", "attribute": { "content": "Jeju", "date": "1 January 2025" }, "output": null } ], "adjlist": "0 1\n1", "id": "1005c437-50d1-465a-b3fc-833098b22bfc" } ================================================ FILE: crab-benchmark-v0/dataset/android/12333aa0-e76d-4a5c-8657-9f897f62f62d.json ================================================ { "description": "In Android, use the \"Google Map\" app to find the city name for the postal code \"2770885\" in Japan, and then, using the \"Keep Notes\" app, create a new note without a title to record the city name you found.", "tasks": [ { "task": "51b2463c-9904-4a32-81ba-507bfb89d61f", "attribute": { "number": "2770885", "country": "Japan" }, "output": "Chiba" }, { "task": "eb92a1e6-4c86-4d56-baac-95fc8397732e", "attribute": { "content": "Chiba" }, "output": null } ], "adjlist": "0 1\n1", "id": "12333aa0-e76d-4a5c-8657-9f897f62f62d" } ================================================ FILE: crab-benchmark-v0/dataset/android/22b04776-8eec-4303-b3f6-9c981f7f29b8.json ================================================ { "description": "In Android, Using \"Setting\" app, rename the device name of bluetooth as \"Sydney\".", "tasks": [ { "task": "a3d11574-2acf-4b26-a569-a5dbc9d548an", "attribute": { "content": "Sydney" }, "output": null } ], "adjlist": "0", "id": "22b04776-8eec-4303-b3f6-9c981f7f29b8" } ================================================ FILE: crab-benchmark-v0/dataset/android/2ade6a13-c7a6-4df7-8c62-77382687369e.json ================================================ { "description": "In Android, using the \"Contacts\" app, find the email of the contact named John Lauphin, then using the \"Gmail\" app, send an email to that contact with the subject \"Hello John.\"", "tasks": [ { "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ap", "attribute": { "name": "John Lauphin" }, "output": "crabbb@gmail.com" }, { "task": "0090f116-e02b-4562-a20d-b5df38be963a", "attribute": { "content": "Hello John", "mail": "crabbb@gmail.com" }, "output": null } ], "adjlist": "0 1\n1", "id": "2ade6a13-c7a6-4df7-8c62-77382687369e" } ================================================ FILE: crab-benchmark-v0/dataset/android/346caf7c-dc74-4c38-962a-aaffb638e0c7.json ================================================ { "description": "In Android, Using \"Calendar\" app, add a new task with text \"meeting\" in date \"June 5th 2024\".", "tasks": [ { "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ac", "attribute": { "content": "meeting", "date": "05 June 2024" }, "output": null } ], "adjlist": "0", "id": "346caf7c-dc74-4c38-962a-aaffb638e0c7" } ================================================ FILE: crab-benchmark-v0/dataset/android/379b9c58-5125-41b3-9cc6-ea925c8b094d.json ================================================ { "description": "In Android, Using Google Map app, Find the city name of corresponding post code \"560049\" in the country \"India\".", "tasks": [ { "task": "51b2463c-9904-4a32-81ba-507bfb89d61f", "attribute": { "country": "India", "number": "560049" }, "output": "Bengaluru" } ], "adjlist": "0", "id": "379b9c58-5125-41b3-9cc6-ea925c8b094d" } ================================================ FILE: crab-benchmark-v0/dataset/android/4190c90c-b28c-4bb3-ab5c-af3c4fde0a3d.json ================================================ { "description": "In Android, Using Google Map app, Find the city name of corresponding post code \"1010021\" in the country \"Japan\".", "tasks": [ { "task": "51b2463c-9904-4a32-81ba-507bfb89d61f", "attribute": { "country": "Japan", "number": "101-0021" }, "output": "Tokyo" } ], "adjlist": "0", "id": "4190c90c-b28c-4bb3-ab5c-af3c4fde0a3d" } ================================================ FILE: crab-benchmark-v0/dataset/android/46d7ccdb-d2e4-4b8a-bead-f2641b5ac23c.json ================================================ { "description": "In Android, Using \"Contacts\" app, add a contact with a mail \"{mail}\" with a name \"{name}\".", "tasks": [ { "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ag", "attribute": { "mail": "abcdcly@qq.com", "name": "John Haruhimiya" }, "output": null } ], "adjlist": "0", "id": "46d7ccdb-d2e4-4b8a-bead-f2641b5ac23c" } ================================================ FILE: crab-benchmark-v0/dataset/android/483fbf9c-dc78-4ac2-9264-53c4f617f6cc.json ================================================ { "description": "Open the calendar app in the Android system and find the title of an event on the date \"17 August 2024,\" then using the \"Google Drive\" app on the same Android device, create a new folder with the founded name", "tasks": [ { "task": "2394b768-2ca7-45e9-b41e-2aa4e9573192", "attribute": { "date": "17 August 2024" }, "output": "Travel to Paris" }, { "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ar", "attribute": { "content": "Travel to Paris" }, "output": null } ], "adjlist": "0 1\n1", "id": "483fbf9c-dc78-4ac2-9264-53c4f617f6cc" } ================================================ FILE: crab-benchmark-v0/dataset/android/4893a9b0-6477-495d-a73c-32503326e24a.json ================================================ { "description": "In the Android system, use the calendar app to find the title of an event on the date \"16 July 2024,\".", "tasks": [ { "task": "2394b768-2ca7-45e9-b41e-2aa4e9573192", "attribute": { "date": "16 July 2024" }, "output": "Japan" } ], "adjlist": "0", "id": "4893a9b0-6477-495d-a73c-32503326e24a" } ================================================ FILE: crab-benchmark-v0/dataset/android/53010c40-dce4-4d72-a856-842c21059e2b.json ================================================ { "description": "In the Android system, use the calendar app to find the title of an event on the date \"16 July 2024,\" then, using the Google Map app, find the city name of the corresponding post code \"113-8654\" in the country with same name as title.", "tasks": [ { "task": "2394b768-2ca7-45e9-b41e-2aa4e9573192", "attribute": { "date": "16 July 2024" }, "output": "Japan" }, { "task": "51b2463c-9904-4a32-81ba-507bfb89d61f", "attribute": { "number": "113-8654", "country": "Japan" }, "output": null } ], "adjlist": "0 1\n1", "id": "53010c40-dce4-4d72-a856-842c21059e2b" } ================================================ FILE: crab-benchmark-v0/dataset/android/6d9f6395-de79-4ad0-8a2a-2d674f93f293.json ================================================ { "description": "In Android, Using \"Clock\" app, set the time of \"London\" in the clock, check the time gap between the city and current city.", "tasks": [ { "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ah", "attribute": { "place_name": "London" }, "output": "7 hours behind" } ], "adjlist": "0", "test_finished":"1", "id": "6d9f6395-de79-4ad0-8a2a-2d674f93f293" } ================================================ FILE: crab-benchmark-v0/dataset/android/71ef7fd2-0ae3-49c8-8238-06b7aa985d25.json ================================================ { "description": "Using the \"Google Map\" app on Android, find the distance of the shortest route from \"National University of Singapore\" to \"Nanyang Technology University,\" then using the \"Calendar\" app, add a new event with the text representing the found distance on the date 21 June 2024 as an all-day event.", "tasks": [ { "task": "1a1b72d7-78c9-4027-8278-86083ae01045", "attribute": { "place_name_1": "National University of Singapore", "place_name_2": "Nanyang Technology University" }, "output": "13km" }, { "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ac", "attribute": { "content": "13km", "date": "21 June 2024" }, "output": null } ], "adjlist": "0 1\n1", "id": "71ef7fd2-0ae3-49c8-8238-06b7aa985d25" } ================================================ FILE: crab-benchmark-v0/dataset/android/73f78fc3-1ca5-442d-801f-bc175a0bfb89.json ================================================ { "description": "In Android, using \"Google Map\" App, find the distance of the shortest route from \"Southern University of Science and Technology\" to \"Lianhuashan Park\"", "tasks": [ { "task": "1a1b72d7-78c9-4027-8278-86083ae01045", "attribute": { "place_name_1": "Southern University of Science and Technology", "place_name_2": "Lianhuashan Park" }, "output": null } ], "adjlist": "0", "id": "73f78fc3-1ca5-442d-801f-bc175a0bfb89" } ================================================ FILE: crab-benchmark-v0/dataset/android/764838cc-9359-4130-9bb2-4a75900b2d89.json ================================================ { "description": "In Android, call \"123456789\".", "tasks": [ { "task": "955d8773-dd7a-4072-b87c-7e546be7de4e", "attribute": { "number": "123456789" }, "output": null } ], "adjlist": "0", "id": "764838cc-9359-4130-9bb2-4a75900b2d89" } ================================================ FILE: crab-benchmark-v0/dataset/android/77289141-e52b-48c8-b3a7-1b29520f3e1e.json ================================================ { "description": "In Android, Using \"Contacts\" app, find out the mail of contact named \"John Haruhimiya\".", "tasks": [ { "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ap", "attribute": { "name": "John Haruhimiya" }, "output": "abcdcly@qq.com" } ], "adjlist": "0", "id": "77289141-e52b-48c8-b3a7-1b29520f3e1e" } ================================================ FILE: crab-benchmark-v0/dataset/android/7891ceab-7965-4ddb-a0fc-15740c9a4e44.json ================================================ { "description": "In Android, Using \"Google Map\" app, find the city name of corresponding post code \"560049\" in the country \"India\". Creat a folder with the city name in \"Google Drive \" app", "tasks": [ { "task": "51b2463c-9904-4a32-81ba-507bfb89d61f", "attribute": { "country": "India", "number": "560049" }, "output": "Bengaluru" }, { "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ar", "attribute": { "content": "Bengaluru" }, "output": null } ], "adjlist": "0 1\n1", "id": "7891ceab-7965-4ddb-a0fc-15740c9a4e44" } ================================================ FILE: crab-benchmark-v0/dataset/android/8bd51440-f959-4edc-baa5-cd03d32a5b0f.json ================================================ { "description": "In Android, use the \"Google Map\" app to find the address of the University of Sydney, then using the \"Gmail\" app, send a message to crabbb@gmail.com with the found address.", "tasks": [ { "task": "a3d11574-2acf-4b26-a569-a5dbc9d548aw", "attribute": { "content": "The University of Sydney" }, "output": "Camperdown NSW 2050 Australia" }, { "task": "0090f116-e02b-4562-a20d-b5df38be963a", "attribute": { "content": "Camperdown NSW 2050 Australia", "mail": "crabbb@gmail.com" }, "output": null } ], "adjlist": "0 1\n1", "id": "8bd51440-f959-4edc-baa5-cd03d32a5b0f" } ================================================ FILE: crab-benchmark-v0/dataset/android/94b1836b-3111-40ad-8d07-b8a57efe7438.json ================================================ { "description": "In an Android system, use the calendar app to find the title of an event on the date \"9 August 2024\", and then, using the Gmail app, send an email to crabbb@gmail.com with the event title as message.", "tasks": [ { "task": "2394b768-2ca7-45e9-b41e-2aa4e9573192", "attribute": { "date": "9 August 2024" }, "output": "National Day of Singapore would be a public holiday" }, { "task": "0090f116-e02b-4562-a20d-b5df38be963a", "attribute": { "content": "National Day of Singapore would be a public holiday", "mail": "crabbb@gmail.com" }, "output": null } ], "adjlist": "0 1\n1", "id": "94b1836b-3111-40ad-8d07-b8a57efe7438" } ================================================ FILE: crab-benchmark-v0/dataset/android/a225f7f8-6d03-4619-b57d-7a08610030d8.json ================================================ { "description": "In Android, Using \"Google Map\" app, Find the address of \"University of Oxford\" and send \"98801234\" the address using \"message\" App. ", "tasks": [ { "task": "a3d11574-2acf-4b26-a569-a5dbc9d548aw", "attribute": { "content": "University of Oxford" }, "output": "Wellington Square, Oxford OX1 2JD, United Kingdom" }, { "task": "caa29623-1811-402d-963a-19f7eecc63d8", "attribute": { "content": "Wellington Square, Oxford OX1 2JD, United Kingdom", "number": "98801234" }, "output": null } ], "adjlist": "0 1\n1", "id": "a225f7f8-6d03-4619-b57d-7a08610030d8" } ================================================ FILE: crab-benchmark-v0/dataset/android/b077299d-1acb-40f5-89f3-cc08044345bf.json ================================================ { "description": "Using \"Tasks\" app, add a new task with text \"Watch camel tutorial video\".", "tasks": [ { "task": "a3d11574-2acf-4b26-a569-a5dbc9d548af", "attribute": { "content": "Watch camel tutorial video" }, "output": null } ], "adjlist": "0", "id": "b077299d-1acb-40f5-89f3-cc08044345bf" } ================================================ FILE: crab-benchmark-v0/dataset/android/b3965b07-4683-4445-9de1-a1dedf6c73ad.json ================================================ { "description": "In Android, Using \"Google Map\" app, Find the address of \"University of Oxford\" and send \"abcdcly@qq.com\" the address using \"Gmail\" App. ", "tasks": [ { "task": "a3d11574-2acf-4b26-a569-a5dbc9d548aw", "attribute": { "content": "University of Oxford" }, "output": "Wellington Square, Oxford OX1 2JD, United Kingdom" }, { "task": "0090f116-e02b-4562-a20d-b5df38be963a", "attribute": { "content": "Wellington Square, Oxford OX1 2JD, United Kingdom", "mail": "abcdcly@qq.com" }, "output": null } ], "adjlist": "0 1\n1", "id": "b3965b07-4683-4445-9de1-a1dedf6c73ad" } ================================================ FILE: crab-benchmark-v0/dataset/android/c1b1cfeb-40e7-49a8-a3f5-b8c8ba723601.json ================================================ { "description": "In Android, Using \"Google Drive\" app, create a new folder named \"Journey\".", "tasks": [ { "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ar", "attribute": { "content": "Journey" }, "output": null } ], "adjlist": "0", "id": "c1b1cfeb-40e7-49a8-a3f5-b8c8ba723601" } ================================================ FILE: crab-benchmark-v0/dataset/android/c85f03c9-83c4-417b-93d9-0d7b41022525.json ================================================ { "description": "In android system, use the calendar app, find the title of an event in the date \"15 June, 2024\".", "tasks": [ { "task": "2394b768-2ca7-45e9-b41e-2aa4e9573192", "attribute": { "date": "15 June 2024" }, "output": "EMNLP ddl" } ], "adjlist": "0", "id": "c85f03c9-83c4-417b-93d9-0d7b41022525" } ================================================ FILE: crab-benchmark-v0/dataset/android/cf4c496b-fbbd-4701-91ea-4590fe6a66e1.json ================================================ { "description": "In Android, use the \"Google Map\" app to find the city name corresponding to the postcode \"110151\" in Colombia, then use the \"Clock\" app to set the time of that city in the clock and check the time gap between that city and your current city.", "tasks": [ { "task": "51b2463c-9904-4a32-81ba-507bfb89d61f", "attribute": { "number": "110151", "country": "Columbia" }, "output": "Bogota" }, { "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ah", "attribute": { "place_name": "Bogota" }, "output": "-5h" } ], "adjlist": "0 1\n1", "id": "cf4c496b-fbbd-4701-91ea-4590fe6a66e1" } ================================================ FILE: crab-benchmark-v0/dataset/android/d0811e47-d75f-40ce-b34b-e1ee3c8bed3f.json ================================================ { "description": "In Android, first use the \"Files\" app to find the creation date of the file /Movies/movie_list.txt, then use the \"Calendar\" app to add a new event titled \"Public Talking\" scheduled for all day on the founded day.", "tasks": [ { "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ak", "attribute": { "file_path": "/Movies/movie_list.txt" }, "output": "4 June 2024" }, { "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ac", "attribute": { "content": "Public Talking", "date": "4 June 2024" }, "output": null } ], "adjlist": "0 1\n1", "id": "d0811e47-d75f-40ce-b34b-e1ee3c8bed3f" } ================================================ FILE: crab-benchmark-v0/dataset/android/d2d456bb-c7d1-46af-8263-78d8509fb320.json ================================================ { "description": "In Android, using \"Gmail\" App, send \"abcdcly@qq.com\" a message \"Hello, nice to meet you!\"", "tasks": [ { "task": "0090f116-e02b-4562-a20d-b5df38be963a", "attribute": { "content": "Hello, nice to meet you!", "mail": "abcdcly@qq.com" }, "output": null } ], "adjlist": "0", "id": "d2d456bb-c7d1-46af-8263-78d8509fb320" } ================================================ FILE: crab-benchmark-v0/dataset/android/d4e0f2b3-d0ff-4efd-856f-9f5e598cfd05.json ================================================ { "description": "In Android, Using \"Google Map\" app, Find the address of \"University of Oxford\"", "tasks": [ { "task": "a3d11574-2acf-4b26-a569-a5dbc9d548aw", "attribute": { "content": "University of Oxford" }, "output": "Wellington Square, Oxford OX1 2JD, United Kingdom" } ], "adjlist": "0", "id": "d4e0f2b3-d0ff-4efd-856f-9f5e598cfd05" } ================================================ FILE: crab-benchmark-v0/dataset/android/d7489d00-0046-4fb1-af5b-1fde7d87312c.json ================================================ { "description": "In Android, open the \"Contacts\" app to find the email address of the contact named Karoon Wei, then use the \"Tasks\" app to add a new task with the email address.", "tasks": [ { "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ap", "attribute": { "name": "Karoon Wei" }, "output": "karroonw@gmail.com" }, { "task": "a3d11574-2acf-4b26-a569-a5dbc9d548af", "attribute": { "content": "karroonw@gmail.com" }, "output": null } ], "adjlist": "0 1\n1", "id": "d7489d00-0046-4fb1-af5b-1fde7d87312c" } ================================================ FILE: crab-benchmark-v0/dataset/android/d92f6c33-e0a7-4101-957d-e7dd218d2565.json ================================================ { "description": "Using the \"Files\" app on an Android device, locate the file /Movies/movie_list.txt and determine its creation date, then use the Task app in the same Android system to find the title of an event scheduled for the days.", "tasks": [ { "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ak", "attribute": { "file_path": "/Movies/movie_list.txt" }, "output": "4 June 2024" }, { "task": "2394b768-2ca7-45e9-b41e-2aa4e9573192", "attribute": { "date": "4 June 2024" }, "output": null } ], "adjlist": "0 1\n1", "id": "d92f6c33-e0a7-4101-957d-e7dd218d2565" } ================================================ FILE: crab-benchmark-v0/dataset/android/de843952-df8f-4a26-bae9-d0a32ed9a7f5.json ================================================ { "description": "In Android, Using \"Files\" app, find the create date of \"Downloads/meow.jpg\" in the sdk system.", "tasks": [ { "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ak", "attribute": { "file_path": "Download/meow.jpg.webp" }, "output": "May 28" } ], "adjlist": "0", "id": "de843952-df8f-4a26-bae9-d0a32ed9a7f5" } ================================================ FILE: crab-benchmark-v0/dataset/android/e20fd121-b981-42da-94de-efcd66889c11.json ================================================ { "description": "In Android, using \"Messages\", send \"The meeting starts from 10am today\" to \"123456789\".", "tasks": [ { "task": "caa29623-1811-402d-963a-19f7eecc63d8", "attribute": { "content": "The meeting starts from 10am today", "number": "123456789" }, "output": null } ], "adjlist": "0", "id": "e20fd121-b981-42da-94de-efcd66889c11" } ================================================ FILE: crab-benchmark-v0/dataset/android/e55d7a39-7b6b-4852-8711-844cebc88cb8.json ================================================ { "description": "In Android, use the \"Google Map\" app to find the city name corresponding to the postcode \"110151\" in Colombia.", "tasks": [ { "task": "51b2463c-9904-4a32-81ba-507bfb89d61f", "attribute": { "number": "110151", "country": "Columbia" }, "output": "Bogota" } ], "adjlist": "0", "id": "e55d7a39-7b6b-4852-8711-844cebc88cb8" } ================================================ FILE: crab-benchmark-v0/dataset/android/e9268070-91b7-4e8c-9976-1cf8126ba13b.json ================================================ { "description": "In the Android system, use the task app to find the title of an event on the date \"15 June 2024\", then using the \"Google Drive\" app, create a new folder named as the title we found.", "tasks": [ { "task": "2394b768-2ca7-45e9-b41e-2aa4e9573192", "attribute": { "date": "15 June 2024" }, "output": "EMNLP24 DDL" }, { "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ar", "attribute": { "content": "EMNLP24 DDL" }, "output": null } ], "adjlist": "0 1\n1", "id": "e9268070-91b7-4e8c-9976-1cf8126ba13b" } ================================================ FILE: crab-benchmark-v0/dataset/android/fbe6a1b1-63bb-4d4e-8a53-ff4f7839ef61.json ================================================ { "description": "In Android, open the \"Contacts\" app to find the email address of a contact named Luis Martin, then use the \"Messages\" app to send the found email address to the phone number \"04055891132\".", "tasks": [ { "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ap", "attribute": { "name": "Luis Martin" }, "output": "lmartin0431@gmail.com" }, { "task": "caa29623-1811-402d-963a-19f7eecc63d8", "attribute": { "content": "lmartin0431@gmail.com", "number": "04055891132" }, "output": null } ], "adjlist": "0 1\n1", "id": "fbe6a1b1-63bb-4d4e-8a53-ff4f7839ef61" } ================================================ FILE: crab-benchmark-v0/dataset/android/fc642cb6-5321-4966-afbf-fb3348bb69ee.json ================================================ { "description": "In Android, using \"Keep Notes\" App, record \"Camel is the best agent framework in the world!\" in a new note without title.", "tasks": [ { "task": "eb92a1e6-4c86-4d56-baac-95fc8397732e", "attribute": { "content": "Camel is the best agent framework in the world!" }, "output": null } ], "adjlist": "0", "id": "fc642cb6-5321-4966-afbf-fb3348bb69ee" } ================================================ FILE: crab-benchmark-v0/dataset/android_subtasks.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # ruff: noqa: E501 import re import networkx as nx from lxml import etree from lxml.etree import _Element from networkx import DiGraph, path_graph from crab import SubTask, evaluator from crab.actions.android_actions import execute_adb def get_xml_etree(env) -> _Element | None: xml_str = execute_adb("exec-out uiautomator dump /dev/tty", env) if "UI hierchary dumped to: /dev/tty" not in xml_str: return None xml_str = xml_str.removesuffix("UI hierchary dumped to: /dev/tty") return etree.fromstring(xml_str.encode("utf-8")) @evaluator(env_name="android", local=True) def check_contain_input_text(text: str, env) -> bool: if env.trajectory: action_name, params, _ = env.trajectory[-1] if action_name == "write_text" and text.lower() in params["text"].lower(): return True return False @evaluator(env_name="android", local=True) def check_contain_input_text_multiple(text: str, env) -> bool: if env.trajectory: for action_name, params, _ in env.trajectory: if action_name == "write_text" and text in params["text"].lower(): return True return False @evaluator(env_name="android") def check_contain_contact(name: str, env) -> bool: root = get_xml_etree(env) if root is None: return False title_node = root.xpath( '//node[@resource-id="com.android.contacts:id/photo_touch_intercept_overlay"]' ) if not title_node: return False if title_node[0].get("content-desc") != name: return False info_node = root.xpath('//*[@class="android.widget.RelativeLayout"]') if not info_node: return False print("info node checked") mail_node = None for node in info_node: desc = node.get("content-desc") if "Email" in desc: mail_node = node if mail_node is None: return False real_mail_node = mail_node.xpath( '//*[@resource-id="com.android.contacts:id/header"]' ) if not real_mail_node: return False context = real_mail_node[0].get("text") print("context get") pattern = re.compile(r"^\w+@\w+.com") if pattern.match(context): return True return False @evaluator(env_name="android") def check_current_package_name(name: str, env) -> bool: result = execute_adb( r'shell "dumpsys activity activities | grep mResumedActivity"', env ) return name in result @evaluator(env_name="android", local=True) def check_ocr_results(text: str, env) -> bool: return text in env.ocr_results @evaluator(env_name="android") def check_current_message_page(title: str, env) -> bool: root = get_xml_etree(env) if root is None: return False title_node = root.xpath( '//node[@resource-id="com.google.android.apps.messaging:id/conversation_title"]' ) if title_node: return title == title_node[0].get("text") else: return False @evaluator(env_name="android") def check_message_text_box_contain(text: str, env) -> bool: root = get_xml_etree(env) if root is None: return False text_box_node = root.xpath( '//node[@resource-id="com.google.android.apps.messaging:id/compose_message_text"]' ) if text_box_node: return text.lower() in text_box_node[0].get("text").lower() else: return False @evaluator(env_name="android") def check_message_text_box_empty(env) -> bool: root = get_xml_etree(env) if root is None: return False text_box_node = root.xpath( '//node[@resource-id="com.google.android.apps.messaging:id/compose_message_text"]' ) if not text_box_node: return False if text_box_node[0].get("text").strip() == "Text message": return True else: return False @evaluator(env_name="android") def check_send_message(title: str, message: str, env) -> bool: root = get_xml_etree(env) if root is None: return False title_node = root.xpath( '//node[@resource-id="com.google.android.apps.messaging:id/conversation_title"]' ) if not title_node or title != title_node[0].get("text"): return False messages_node = root.xpath( '//node[@resource-id="com.google.android.apps.messaging:id/message_text"]' ) for node in messages_node: if message in node.get("text"): return True return False @evaluator(env_name="android") def check_note_content(content: str, env) -> bool: root = get_xml_etree(env) if root is None: return False title_node = root.xpath( '//node[@resource-id="com.google.android.keep:id/editable_title"]' ) if not title_node: return False if title_node[0].get("text") != "Title": return False node = root.xpath( '//node[@resource-id="com.google.android.keep:id/edit_note_text"]' ) if not node: return False if content in node[0].get("text"): return True return False @evaluator(env_name="android") def check_bluetooth_name(content: str, env) -> bool: root = get_xml_etree(env) if root is None: return False bluetooth_node = root.xpath('//node[@resource-id="android:id/summary"]') if not bluetooth_node: return False if content in bluetooth_node[0].get("text"): return True return False @evaluator(env_name="android") def check_map_direction_page(from_des: str, to_des: str, env) -> bool: root = get_xml_etree(env) if root is None: return False from_node = root.xpath(f'//node[@content-desc="Start location, {from_des}"]') if not from_node: return False to_node = root.xpath(f'//node[@content-desc="Destination, {to_des}"]') if not to_node: return False return True @evaluator(env_name="android") def check_dial_number(phone_number: str, env) -> bool: root = get_xml_etree(env) if root is None: return False dialer_node = root.xpath('//node[@resource-id="com.android.dialer:id/digits"]') if not dialer_node: return False number = dialer_node[0].get("text") number = re.sub("[^0-9]", "", number) target = re.sub("[^0-9]", "", phone_number) return number == target @evaluator(env_name="android") def check_calendar_registered(date: str, content: str, env) -> bool: root = get_xml_etree(env) if root is None: return False calendar_node = root.xpath( '//node[@resource-id="com.google.android.calendar:id/alternate_timeline_fragment_container"]' ) if not calendar_node: return False itr_calendar_node = calendar_node[0].xpath( '//node[@class="android.support.v7.widget.RecyclerView"]' ) if not itr_calendar_node: return False target_nodes = itr_calendar_node[0].xpath('//node[@content-desc="{content}"]') if not target_nodes: return False return True @evaluator(env_name="android") def check_drive_registered(content: str, env) -> bool: root = get_xml_etree(env) if root is None: return False entry_node = root.xpath( '//node[@resource-id="com.google.android.apps.docs:id/entry_label"]' ) if not entry_node: return False for node in entry_node: if content == node.get("text") and f"{content} Folder" == node.get( "content-desc" ): return True return False @evaluator(env_name="android") def check_contact_registered(mail: str, name: str, env) -> bool: root = get_xml_etree(env) if root is None: return False name_node = root.xpath('//node[@resource-id="com.android.contacts:id/large_title"]') if not name_node: return False text = name_node[0].get("text") if text not in name: return False mail_node = root.xpath('//node[@resource-id="com.android.contacts:id/header"]') text = mail_node[0].get("text") if text not in mail: return False return True @evaluator(env_name="android") def check_calling_number(phone_number: str, env) -> bool: root = get_xml_etree(env) if root is None: return False dialer_node = root.xpath( '//node[@resource-id="com.android.dialer:id/contactgrid_contact_name"]' ) if not dialer_node: return False number = dialer_node[0].get("text") number = re.sub("[^0-9]", "", number) target = re.sub("[^0-9]", "", phone_number) return number == target @evaluator(env_name="android") def check_google_tasks_name(target: str, env) -> bool: root = get_xml_etree(env) if root is None: return False task_nodes = root.xpath( '//node[@resource-id="com.google.android.apps.tasks:id/task_name"]' ) if not task_nodes: return False for node in task_nodes: task_name = node.get("text") if target in task_name: return True return False @evaluator(env_name="android") def check_date(target: str, env) -> bool: root = get_xml_etree(env) if root is None: return False date_nodes = root.xpath( '//node[@resource-id="com.google.android.apps.photos:id/datetime_item_layout"]' ) if not date_nodes: return False prev_node = date_nodes.xpath( '//node[@resource-id="com.google.android.apps.photos:id/label"]' ) time = prev_node.get("text") pattern = re.compile(r"^\w{3},\s\w{3}\s\d{2},\s\d{4}\s•\s\d{1,2}:\d{2}\s[AP]M$") if pattern.match(time): return True return False @evaluator(env_name="android") def check_city_clock(place_name: str, env) -> bool: root = get_xml_etree(env) if root is None: return False city_nodes = root.xpath( '//node[@resource-id="com.google.android.deskclock:id/city_name"]' ) if city_nodes is None: return False for city_node in city_nodes: text = city_node.get("text") if place_name == text: return True return False @evaluator(env_name="android") def check_event(date: str, env) -> bool: root = get_xml_etree(env) if root is None: return False event_nodes = root.xpath('//node[@class="android.support.v7.widget.RecyclerView"]') if event_nodes is None: return False if not event_nodes: return False for node in event_nodes[0]: text = node.get("content-desc") if date in text: return True return False @evaluator(env_name="android") def check_event_registered(date: str, content: str, env) -> bool: root = get_xml_etree(env) if root is None: return False event_nodes = root.xpath('//node[@class="android.support.v7.widget.RecyclerView"]') if not event_nodes: return False time_reg = False content_reg = False for node in event_nodes[0]: text = node.get("content-desc") if date.lower() in text.lower(): time_reg = True if content.lower() in text.lower(): content_reg = True if time_reg and content_reg: return True return False @evaluator(env_name="android") def check_location(content: str, env) -> bool: root = get_xml_etree(env) if root is None: return False checked_node = root.xpath(f'//node[@content-desc="{content}"]') if not checked_node: return False return True @evaluator(env_name="android") def check_contain_city(number: str, city: str, env) -> bool: root = get_xml_etree(env) if root is None: return False business_node = root.xpath( '//node[@resource-id="com.google.android.apps.maps:id/search_omnibox_text_box"]' ) if not business_node: return False text = None for node in business_node[0]: text = node.get("text") if text is None: return False if city in text and str(number) in text: return True return False @evaluator(env_name="android") def check_file(content: str, env) -> bool: root = get_xml_etree(env) if root is None: return False name_source_node = root.xpath( '//node[@resource-id="com.google.android.apps.photos:id/exif_item_layout"]' ) if not name_source_node: return False name_nodes = name_source_node[0].xpath( '//node[@resource-id="com.google.android.apps.photos:id/label"]' ) if not name_nodes: return False target_node = None for node in name_nodes: text = node.get("text") if content in text: target_node = node if target_node is None: return False time_source_node = root.xpath( '//node[@resource-id="com.google.android.apps.photos:id/datetime_item_layout"]' ) if not time_source_node: return False time_nodes = time_source_node[0].xpath( '//node[@resource-id="com.google.android.apps.photos:id/label"]' ) if not time_nodes: return False target_node = None for node in time_nodes: text = node.get("text") pattern = re.compile( r"(Tue|Mon|Wed|Thu|Fri|Sat|Sun),\s(May|Jan|Feb|Mar|Apr|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s\d{2},\s\d{4} • \d{2}:\d{2}\s(AM|PM)" ) if pattern.match(text): return True return False @evaluator(env_name="android") def check_mail_sent(mail: str, content: str, env) -> bool: root = get_xml_etree(env) if root is None: return False to_node = root.xpath( '//node[@resource-id="com.google.android.gm:id/peoplekit_chip"]' ) if not to_node: return False checked = False for node in to_node: text = node.get("content-desc") if mail in text: checked = True if not checked: return False # check the mail information-> Done # check the content information body_node = root.xpath( '//node[@resource-id="com.google.android.gm:id/body_wrapper"]' ) if not body_node: return False text_node = body_node[0].xpath('//node[@class="android.widget.EditText"]') if not text_node: return False for node in text_node: text = node.get("text") if content in text: return True return False def distance_evaluator_generator(place_name_1: str, place_name_2: str): result = nx.DiGraph() a = check_current_package_name("com.google.android.apps.maps") b = check_contain_input_text(place_name_1) c = check_contain_input_text(place_name_2) d = check_map_direction_page(place_name_1, place_name_2) result.add_edges_from([(a, b), (a, c), (b, d), (c, d)]) return result def mail_evaluator_generator(mail: str, content: str): result = nx.DiGraph() a = check_current_package_name("com.google.android.gm") b = check_contain_input_text(mail) c = check_contain_input_text(content) d = check_mail_sent(mail, content) result.add_edges_from([(a, b), (a, c), (b, d), (c, d)]) return result def contact_evaluator_generator(mail: str, name: str): result = nx.DiGraph() a = check_current_package_name("com.android.contacts") b = check_contain_input_text(mail) c = check_contain_input_text(name) d = check_contact_registered(mail, name) result.add_edges_from([(a, b), (a, c), (b, d), (c, d)]) return result android_subtasks = [ SubTask( id="1a1b72d7-78c9-4027-8278-86083ae01045", description='In Android, using "Google Map" App, find the distance of the shortest route from "{place_name_1}" to "{place_name_2}"', attribute_dict={"place_name_1": "place_name_1", "place_name_2": "place_name_2"}, output_type="number", evaluator_generator=distance_evaluator_generator, ), SubTask( id="eb92a1e6-4c86-4d56-baac-95fc8397732e", description='In Android, using "Keep Notes" App, record "{content}" in a new note without title.', attribute_dict={"content": "content"}, output_type="None", evaluator_generator=lambda content: path_graph( [ check_current_package_name("com.google.android.keep"), check_contain_input_text(content), check_note_content(content), ], create_using=DiGraph, ), ), SubTask( id="caa29623-1811-402d-963a-19f7eecc63d8", description='In Android, using "Messages", send "{content}" to "{number}".', attribute_dict={"content": "content", "number": "number"}, output_type="None", evaluator_generator=lambda content, number: path_graph( [ check_current_package_name("com.google.android.apps.messaging"), check_current_message_page(number), check_contain_input_text(content), check_send_message(number, content), ], create_using=DiGraph, ), ), SubTask( id="955d8773-dd7a-4072-b87c-7e546be7de4e", description='In Android, call "{number}".', attribute_dict={"number": "number"}, output_type="None", evaluator_generator=lambda number: path_graph( [ check_current_package_name("com.android.dialer"), check_dial_number(number), check_calling_number(number), ], create_using=DiGraph, ), ), SubTask( id="a3d11574-2acf-4b26-a569-a5dbc9d548af", description='Using "Tasks" app, add a new task with text "{content}".', attribute_dict={"content": "content"}, output_type="None", evaluator_generator=lambda content: path_graph( [ check_current_package_name("com.google.android.apps.tasks"), check_contain_input_text(content), check_google_tasks_name(content), ], create_using=DiGraph, ), ), SubTask( id="a3d11574-2acf-4b26-a569-a5dbc9d548ac", description='In Android, Using "Calendar" app, add a new event with text "{content}" in date "{date}" all day.', attribute_dict={"content": "content", "date": "date"}, output_type="None", evaluator_generator=lambda content, date: path_graph( [ check_current_package_name("com.google.android.calendar"), check_contain_input_text(content), check_event_registered(date, content), ], create_using=DiGraph, ), ), SubTask( id="a3d11574-2acf-4b26-a569-a5dbc9d548ag", description='In Android, Using "Contacts" app, add a contact with a mail "{mail}" with a name "{name}".', attribute_dict={"mail": "mail", "name": "name"}, output_type="None", evaluator_generator=contact_evaluator_generator, ), SubTask( id="a3d11574-2acf-4b26-a569-a5dbc9d548ap", description='In Android, Using "Contacts" app, find out the mail of contact named {name}.', attribute_dict={"name": "name"}, output_type="mail", evaluator_generator=lambda name: path_graph( [ check_current_package_name("com.android.contact"), check_contain_contact(name), ], create_using=DiGraph, ), ), SubTask( id="0090f116-e02b-4562-a20d-b5df38be963a", description='In Android, Using "Gmail" app, send {mail} a message {content}.', attribute_dict={"content": "content", "mail": "mail"}, output_type="None", evaluator_generator=mail_evaluator_generator, ), SubTask( id="a3d11574-2acf-4b26-a569-a5dbc9d548ar", description='In Android, Using "Google Drive" app, create a new folder named {content}.', attribute_dict={"content": "content"}, output_type="None", evaluator_generator=lambda content: path_graph( [ check_current_package_name("com.google.android.apps.docs"), check_drive_registered(content), ], create_using=DiGraph, ), ), SubTask( id="a3d11574-2acf-4b26-a569-a5dbc9d548ak", description='In Android, Using "Files" app, find the create date of {file_path}.', attribute_dict={"file_path": "file_path"}, output_type="Date", evaluator_generator=lambda file_path: path_graph( [ check_current_package_name("com.google.android.apps.photos"), check_file(file_path), ], create_using=DiGraph, ), ), SubTask( id="a3d11574-2acf-4b26-a569-a5dbc9d548an", description='In Android, Using "Setting" app, rename the device name of bluetooth as {name}.', attribute_dict={"content": "content"}, output_type="None", evaluator_generator=lambda content: path_graph( [ check_current_package_name("com.android.settings"), check_contain_input_text(content), check_bluetooth_name(content), ], create_using=DiGraph, ), ), SubTask( id="a3d11574-2acf-4b26-a569-a5dbc9d548ah", description='In Android, Using "Clock" app, set the time of {place_name} in the clock, check the time gap between the city and current city.', attribute_dict={"place_name": "place_name"}, output_type="content", evaluator_generator=lambda place_name: path_graph( [ check_current_package_name("com.google.android.deskclock"), check_city_clock(place_name), ], create_using=DiGraph, ), ), SubTask( id="a3d11574-2acf-4b26-a569-a5dbc9d548aw", description='In Android, Using "Google Map" app, Find the address of {content}', attribute_dict={"content": "content"}, output_type="content", evaluator_generator=lambda content: path_graph( [ check_current_package_name("com.google.android.apps.maps"), check_location(content), ], create_using=DiGraph, ), ), SubTask( id="51b2463c-9904-4a32-81ba-507bfb89d61f", description='In Android, Using "Google Map" app, Find the city name of corresponding post code "{number}" in the country "{country}".', attribute_dict={"number": "number", "country": "country"}, output_type="content", evaluator_generator=lambda number, country: path_graph( [ check_current_package_name("com.google.android.apps.maps"), check_contain_input_text(country), check_contain_input_text(number), check_contain_city(number, country), ], create_using=DiGraph, ), ), SubTask( id="2394b768-2ca7-45e9-b41e-2aa4e9573192", description='In android system, use the calendar app, find the title of an event in the date "{date}".', attribute_dict={"date": "date"}, output_type="content", evaluator_generator=lambda date: path_graph( [ check_current_package_name("com.google.android.calendar"), check_event(date), ], create_using=DiGraph, ), ), # TODO: The phone number page cannot be accesed by xml. figure out another way. # SubTask( # id="fa9c0b01-9835-4932-824d-0990cb20e5f7", # description='Using Settings app, find the phone number of this phone in the "About" panel.', # attribute_dict={}, # output_type="phone_number", # evaluator=lambda: path_graph( # [ # check_current_package_name("com.android.settings"), # ], # create_using=DiGraph, # ), # ), ] ================================================ FILE: crab-benchmark-v0/dataset/cross/05a7633d-b966-471c-8848-e18e69ad265f.json ================================================ { "description": "In Android, use the \"Google Map\" app to find the city name corresponding to the postal code \"1010021\" in Japan, then paste the name into LibreOffice Writer on an Ubuntu system and save it as an ODT file at \"/home/crab/Desktop/target.opt\".", "tasks": [ { "task": "51b2463c-9904-4a32-81ba-507bfb89d61f", "attribute": { "number": "44145", "country": "Germany" }, "output": "Dortmund" }, { "task": "76de4bdb-c980-4b3a-9bd3-c87db467dffe", "attribute": { "file_path": "/home/crab/Desktop/target.odt" }, "output": null } ], "adjlist": "0 1\n1", "id": "05a7633d-b966-471c-8848-e18e69ad265f" } ================================================ FILE: crab-benchmark-v0/dataset/cross/1e92db38-501e-429b-ac31-453d1af10a25.json ================================================ { "description": "Open the terminal on Ubuntu, print the content of \"/home/crab/Desktop/kolakov.txt\" to the command line interface, and then, in the Android \"Keep Notes\" app, record the content in a new note without adding a title.", "tasks": [ { "task": "5b527839-0e58-426d-bab6-7160200b0d24", "attribute": { "file_path": "/home/crab/Desktop/kolakov.txt" }, "output": "The flight to warsaw is from kolakov" }, { "task": "eb92a1e6-4c86-4d56-baac-95fc8397732e", "attribute": { "content": "The flight to warsaw is from kolakov" }, "output": null } ], "adjlist": "0 1\n1", "id": "1e92db38-501e-429b-ac31-453d1af10a25" } ================================================ FILE: crab-benchmark-v0/dataset/cross/43be6e8e-034d-4277-8346-c4ae7553bf68.json ================================================ { "description": "On an Android device, using the Google Map app, find the address of Dignity Health Sports Park, then use Firefox to search for a university around the address on Google Maps, and copy the Google Maps sharing URL of that university to the clipboard.", "tasks": [ { "task": "a3d11574-2acf-4b26-a569-a5dbc9d548aw", "attribute": { "content": "Dignity Health Sports Park" }, "output": "18400 Avalon Blvd, Carson, CA 907, US" }, { "task": "2b189dc2-c77f-4fa3-8432-ba4355cc294c", "attribute": { "place_type": "University", "place_name": "18400 Avalon Blvd, Carson, CA 907, US" }, "output": null } ], "adjlist": "0 1\n1", "id": "43be6e8e-034d-4277-8346-c4ae7553bf68" } ================================================ FILE: crab-benchmark-v0/dataset/cross/534be964-269a-4509-b2b8-28cc3ba8dfca.json ================================================ { "description": "On an Android system, use the calendar app to find the title of an event on the date \"18 September 2024\", then use Firefox to search for an image with the title and copy the URL of the image to the clipboard.", "tasks": [ { "task": "2394b768-2ca7-45e9-b41e-2aa4e9573192", "attribute": { "date": "18 September 2024" }, "output": "Chile National Day" }, { "task": "017102b6-d2c3-466b-96f7-37c8bcddc41a", "attribute": { "keyword": "Chile National Day" }, "output": null } ], "adjlist": "0 1\n1", "id": "534be964-269a-4509-b2b8-28cc3ba8dfca" } ================================================ FILE: crab-benchmark-v0/dataset/cross/6f95cfa1-e7ae-4a82-912b-0180fc9622f2.json ================================================ { "description": "On an Android system, open the calendar app and find the title of an event scheduled for \"15 June 2024,\" copy this title, then paste the content into Visual Studio Code (VS Code) on an Ubuntu system and save it as a file named \"reminder.txt\" on the Desktop.", "tasks": [ { "task": "2394b768-2ca7-45e9-b41e-2aa4e9573192", "attribute": { "date": "15 June 2024" }, "output": "EMNLP24 DDL" }, { "task": "8491e674-596b-452b-9e0e-58a44d90f947", "attribute": { "file_path": "/home/crab/Desktop/reminder.txt" }, "output": null } ], "adjlist": "0 1\n1", "id": "6f95cfa1-e7ae-4a82-912b-0180fc9622f2" } ================================================ FILE: crab-benchmark-v0/dataset/cross/760ed27e-b1bd-451f-8659-bdb9845fcb7f.json ================================================ { "description": "Open the \"~/Desktop/contact.txt\" file via the command line interface in Ubuntu to view its content, then use the Gmail app on Android to send a message to crabbb@gmail.com with the content.", "tasks": [ { "task": "5b527839-0e58-426d-bab6-7160200b0d24", "attribute": { "file_path": "~/Desktop/contact.txt" }, "output": "crabbb@gmail.com" }, { "task": "0090f116-e02b-4562-a20d-b5df38be963a", "attribute": { "content": "Hello, please send me a message back", "mail": "crabbb@gmail.com" }, "output": null } ], "adjlist": "0 1\n1", "id": "760ed27e-b1bd-451f-8659-bdb9845fcb7f" } ================================================ FILE: crab-benchmark-v0/dataset/cross/82596760-7d4d-457d-9ca9-9551ab85ec58.json ================================================ { "description": "Using the \"Google Map\" app on an Android device, find the city name corresponding to the postal code \"10179\" in Germany, and then submit the discovered city name.", "tasks": [ { "task": "51b2463c-9904-4a32-81ba-507bfb89d61f", "attribute": { "number": "10179", "country": "German" }, "output": "Berlin" }, { "task": "1c3bedc3-ea5a-453c-a15b-223d72ab756d", "attribute": { "content": "Berlin" }, "output": null } ], "adjlist": "0 1\n1", "id": "82596760-7d4d-457d-9ca9-9551ab85ec58" } ================================================ FILE: crab-benchmark-v0/dataset/cross/a956a091-8de4-42ee-b152-913308dfc24b.json ================================================ { "description": "In the \"Clock\" app on Android, add Yakarta's time, compare it with the current city's time to determine the time gap, and then submit the information.", "tasks": [ { "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ah", "attribute": { "place_name": "yakarta" }, "output": "1 hour behind" }, { "task": "1c3bedc3-ea5a-453c-a15b-223d72ab756d", "attribute": { "content": "1 hour behind" }, "output": null } ], "adjlist": "0 1\n1", "id": "a956a091-8de4-42ee-b152-913308dfc24b" } ================================================ FILE: crab-benchmark-v0/dataset/cross/c5929ef3-ac27-4288-b02f-4f261d5871f9.json ================================================ { "description": "In Android, use the \"Google Map\" app to find the city name corresponding to the postal code \"1010021\" in Japan, then use Firefox to search for a code repository about that city on GitHub and copy the URL of the repository to the clipboard.", "tasks": [ { "task": "51b2463c-9904-4a32-81ba-507bfb89d61f", "attribute": { "number": "1010021", "country": "Japan" }, "output": "Tokyo" }, { "task": "bcd03c9f-62c9-4001-8d86-78358c59ce22", "attribute": { "keyword": "Tokyo" }, "output": null } ], "adjlist": "0 1\n1", "id": "c5929ef3-ac27-4288-b02f-4f261d5871f9" } ================================================ FILE: crab-benchmark-v0/dataset/cross/da5911e3-1a99-4735-ba3e-f08c5ca81fdd.json ================================================ { "description": "Open a terminal in Ubuntu, print the content of \"~/Desktop/contract_reminder.txt\", and then, on an Android device, use the Gmail app to send an email to crabbb@gmail.com, including the printed information.", "tasks": [ { "task": "5b527839-0e58-426d-bab6-7160200b0d24", "attribute": { "file_path": "~/Desktop/contract_reminder.txt" }, "output": "uld be end in three days." }, { "task": "0090f116-e02b-4562-a20d-b5df38be963a", "attribute": { "content": "uld be end in three days.", "mail": "crabbb@gmail.com" }, "output": null } ], "adjlist": "0 1\n1", "id": "da5911e3-1a99-4735-ba3e-f08c5ca81fdd" } ================================================ FILE: crab-benchmark-v0/dataset/handmade_tasks.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # ruff: noqa: E501 F405 import os import re import subprocess import time from datetime import datetime import networkx as nx from crab import Task, action, evaluator from .android_subtasks import ( check_current_package_name, check_google_tasks_name, check_message_text_box_contain, check_message_text_box_empty, check_note_content, get_xml_etree, ) from .ubuntu_subtasks import * # noqa: F403 _item_count_cache = None @evaluator(env_name="android") def check_calendar_in_today(env) -> bool: # Get today's date and format it as "Weekday DD Month YYYY" today_date_str = datetime.now().strftime("%A %d %B %Y") root = get_xml_etree(env) if root is None: return False # Construct the desired string with today's date date_string = f"{today_date_str}, Open Schedule View" date_node = root.xpath(f'//node[@content-desc="{date_string}"]') if not date_node or len(date_node) != 1: return False today_nodes = date_node[0].getparent().getchildren() item_count = len(today_nodes) - 2 if item_count < 0: return False global _item_count_cache _item_count_cache = item_count return True @action(env_name="ubuntu") def get_file_bullet_points(file_path: str) -> int | None: # Check if the file exists if not os.path.exists(file_path): return None # Read the markdown text from the file try: with open(file_path, "r") as file: markdown_text = file.read() except Exception: return None # Regex to match empty checkboxes in markdown pattern = r"- \[ \]" # Find all matches matches = re.findall(pattern, markdown_text) # Return the number of empty checkboxes return matches @evaluator(env_name="ubuntu", local=True) def check_blluet_point_match_calendar(file_path: str, env) -> bool: matches = env._action_endpoint(get_file_bullet_points, {"file_path": file_path}) global _item_count_cache if _item_count_cache is None or matches is None: return False return _item_count_cache == len(matches) @evaluator(env_name="android") def check_node_exist(node_query: str, env) -> bool: root = get_xml_etree(env) if root is None: return False node = root.xpath(f"//node[{node_query}]") if not node: return False return True @evaluator(env_name="ubuntu") def check_new_jpg_files_in_dir(directory) -> bool: # Get the current time current_time = time.time() # Time limit set to 3 minutes ago time_limit = current_time - 180 # Iterate over files in the specified directory for file in os.listdir(directory): file_path = os.path.join(directory, file) # Check if the file is a .jpg and was modified within the last 3 minutes if file.endswith(".jpg") and os.path.getmtime(file_path) > time_limit: return True return False @evaluator(env_name="ubuntu") def check_text_list_in_current_window_name(texts: list[str]) -> bool: try: out = subprocess.check_output( ["xdotool", "getwindowfocus", "getwindowname"], text=True ).strip() except subprocess.CalledProcessError: return False for text in texts: if text not in out: return False return True @evaluator(env_name="android") def check_keep_notes_content(text: str, env) -> bool: root = get_xml_etree(env) if root is None: return False edit_node = root.xpath( '//node[@resource-id="com.google.android.keep:id/editor_bottom_bar"]' ) if len(edit_node) != 1: return False content_node = root.xpath( '//node[@resource-id="com.google.android.keep:id/browse_note_interior_content"]' ) if len(content_node) != 1: return False text_nodes = content_node[0].getchildren() if len(text_nodes) != 1: return False return text_nodes[0].get("text") == text @evaluator(env_name="android") def check_keep_notes_contain_fd(env) -> bool: global RESULT_fd0576be text = RESULT_fd0576be root = get_xml_etree(env) if root is None or text is None: return False edit_node = root.xpath( '//node[@resource-id="com.google.android.keep:id/editor_bottom_bar"]' ) if len(edit_node) != 1: return False content_node = root.xpath( '//node[@resource-id="com.google.android.keep:id/browse_note_interior_content"]' ) for node in content_node: text_nodes = node.getchildren() if len(text_nodes) != 1: continue if text in text_nodes[0].get("text"): return True return False @evaluator(env_name="android") def check_alarm_contains(time: str, env) -> bool: root = get_xml_etree(env) if root is None or time is None: return False clock_node = root.xpath( '//node[@resource-id="com.google.android.deskclock:id/digital_clock"]' ) for node in clock_node: if time == node.get("text"): return True return False @evaluator(env_name="android", local=True) def check_tap_text(text: str, env) -> bool: if env.trajectory: action_name, params, _ = env.trajectory[-1] if action_name == "tap": try: element_id = int(params["element"]) element_label = env.element_label_map[element_id] except TypeError: return False if element_label is None: return False return text.lower() in element_label.lower() return False def summarize_ubuntu_evaluator(): result = nx.DiGraph() a = check_current_window_process("slack") b = check_current_package_name("com.google.android.apps.messaging") c = check_message_text_box_contain("agent") d = check_message_text_box_contain("github") e = check_message_text_box_empty() result.add_edges_from([(a, c), (a, d), (b, c), (b, d), (c, e), (d, e)]) return result def check_calendar_evaluator(): result = nx.DiGraph() a = check_current_package_name("com.google.android.calendar") b = check_calendar_in_today() c = check_file_exist("/home/crab/assets/plan.md") d = check_blluet_point_match_calendar("/home/crab/assets/plan.md") result.add_edges_from([(a, b), (b, d), (c, d)]) return result def evaluator_97e6f333(): result = nx.DiGraph() a = check_current_package_name("com.android.camera2") b = check_node_exist('@resource-id="com.android.camera2:id/rounded_thumbnail_view"') c = check_node_exist('@resource-id="com.android.camera2:id/filmstrip_layout"') d = check_current_package_name( "com.google.android.apps.photos/.upload.intent.UploadContentActivity" ) e = check_node_exist('@resource-id="com.android.camera2:id/filmstrip_layout"') f = check_current_window_process("firefox") g = check_text_in_current_window_name("Photos - Google Photos — Mozilla Firefox") h = check_new_jpg_files_in_dir("/home/crab/Downloads") i = check_file_exist("/home/crab/assets/photo.jpg") j = check_text_list_in_current_window_name(["photo", "GIMP"]) result.add_edges_from([(a, b), (b, c), (c, d), (d, e), (e, h)]) result.add_edges_from([(f, g), (g, h)]) result.add_edges_from([(h, i), (i, j)]) return result def evaluator_82efbd82(): result = nx.DiGraph() a = download_and_verify_file( "https://media.cntraveller.com/photos/642aa1ad770beda2d4f5cc22/4:3/w_2664,h_1998,c_limit/Fiji-march2023issue-JackJohns15.jpg", "/home/crab/Downloads/raw.jpg", ) b = check_text_in_current_window_name("GNU Image Manipulation Program") c = check_file_exist("/home/crab/Pictures/edited.jpg") d = is_image_2_brighter( "/home/crab/Downloads/raw.jpg", "/home/crab/Pictures/edited.jpg" ) e = verify_background("/home/crab/Pictures/edited.jpg") result.add_edges_from([(a, b), (b, c), (c, d), (d, e)]) return result def evaluator_515a5467(): result = nx.DiGraph() a = download_and_verify_file( "https://media.cntraveller.com/photos/642aa1ad770beda2d4f5cc22/4:3/w_2664,h_1998,c_limit/Fiji-march2023issue-JackJohns15.jpg", "/home/crab/Downloads/img_1.jpg", ) b = download_and_verify_file( "https://upload.wikimedia.org/wikipedia/commons/thumb/7/71/Flag_of_Ethiopia.svg/250px-Flag_of_Ethiopia.svg.png", "/home/crab/Downloads/img_2.jpg", ) c = check_text_in_current_window_name("GNU Image Manipulation Program") d = check_file_exist("/home/crab/Downloads/combined_editing.jpg") e = verify_combined_image( "/home/crab/Downloads/img_1.jpg", "/home/crab/Downloads/img_2.jpg", "/home/crab/Downloads/combined_editing.jpg", "right", ) f = check_directory_exists("/home/crab/jpg") g = verify_files_copied("/home/crab/Downloads", "/home/crab/jpg", "jpg") result.add_edges_from([(a, c), (b, c), (c, d), (d, e), (e, f), (f, g)]) return result def evaluator_5a1eba49(): result = nx.DiGraph() a = check_text_in_current_window_name("Firefox") b = check_contain_input_text("GPU") c = is_img_url_in_clipboard() d = download_from_clipboard_and_verify_file("/home/crab/Pictures/GPU.png") e = check_directory_exists("/home/crab/Pictures/png_files") f = verify_files_copied( "/home/crab/Pictures", "/home/crab/Pictures/png_files", "png" ) result.add_edges_from([(a, b), (b, c), (c, d), (d, e), (e, f)]) return result def evaluator_c347f78a(): file_path = "/home/crab/assets/content.txt" content = "An air quality health advisory is in effect Tuesday for New York City and the lower Hudson Valley, as well as western Connecticut and northern New Jersey, meaning it may not be safe for people with some conditions to be outside long." result = nx.DiGraph() a = check_current_window_process("gnome-terminal-server") b = is_process_open("vim") c = ~is_process_open("vim") d = check_file_content(file_path, content) e = check_contain_input_text("cat " + file_path) f = check_submit(content) result.add_edges_from([(a, b), (b, c), (c, d), (d, e), (e, f)]) return result def evaluator_bf83c176(): result = nx.DiGraph() file_path_1 = "/home/crab/Desktop/waymo.jpg" file_path_2 = "/home/crab/Desktop/tesla.png" output_path = "/home/crab/Documents/self_driving.pdf" # Search for the first image and download it a1 = check_text_in_current_window_name("Firefox") b1 = check_contain_input_text("Waymo") c1 = is_img_url_in_clipboard() d1 = download_from_clipboard_and_verify_file(file_path_1) # Search for the second image and download it a2 = check_text_in_current_window_name("Firefox") b2 = check_contain_input_text("Tesla") c2 = is_img_url_in_clipboard() d2 = download_from_clipboard_and_verify_file(file_path_2) # Combine images into a PDF e = check_text_in_current_window_name("LibreOffice Impress") f = check_file_exist(output_path) g = verify_combined_image(file_path_1, file_path_2, output_path, "left") # Add edges to form the branches and connections result.add_edges_from([(a1, b1), (b1, c1), (c1, d1)]) result.add_edges_from([(d1, a2), (a2, b2), (b2, c2), (c2, d2)]) result.add_edges_from([(d2, e), (e, f), (f, g)]) return result def evaluator_74bb11dd(): file_path_1 = "/home/crab/Documents/FR.ods" file_path_2 = "/home/crab/Documents/MX.ods" result = nx.DiGraph() # Search for the first country and save information to an ODS file a1 = check_text_in_current_window_name("Wikipedia — Mozilla Firefox") b1 = check_text_in_current_window_name("LibreOffice Calc") c1 = check_file_exist(file_path_1) d1 = verify_country_data_in_ods("France", file_path_1) # Search for the second country and save information to an ODS file a2 = check_text_in_current_window_name("Wikipedia — Mozilla Firefox") b2 = check_text_in_current_window_name("LibreOffice Calc") c2 = check_file_exist(file_path_2) d2 = verify_country_data_in_ods("Mexico", file_path_2) # Create new directory and copy ODS files to it e = check_directory_exists("/home/crab/Desktop/country_info") f = verify_files_copied( "/home/crab/Documents", "/home/crab/Desktop/country_info", "ods" ) # Add edges to form the branches and connections result.add_edges_from([(a1, b1), (b1, c1), (c1, d1)]) result.add_edges_from([(a2, b2), (b2, c2), (c2, d2)]) result.add_edges_from([(d1, e), (d2, e), (e, f)]) return result TEXT_ca79febf = 'The rapid advancement of conversational and chat-based language models has led to remarkable progress in complex task-solving. However, their success heavily relies on human input to guide the conversation, which can be challenging and time-consuming. This paper explores the potential of building scalable techniques to facilitate autonomous cooperation among communicative agents and provide insight into their "cognitive" processes. To address the challenges of achieving autonomous cooperation, we propose a novel communicative agent framework named role-playing. Our approach involves using inception prompting to guide chat agents toward task completion while maintaining consistency with human intentions. We showcase how role-playing can be used to generate conversational data for studying the behaviors and capabilities of chat agents, providing a valuable resource for investigating conversational language models. Our contributions include introducing a novel communicative agent framework, offering a scalable approach for studying the cooperative behaviors and capabilities of multi-agent systems, and open-sourcing our library to support research on communicative agents and beyond. The GitHub repository of this project is made publicly available on: https://github.com/camel-ai/camel.' def evaluator_ca79febf(): result = nx.DiGraph() a = check_current_package_name("com.google.android.keep") b = check_keep_notes_content(TEXT_ca79febf) c = check_tap_text("select") d = check_tap_text("copy") e = check_current_package_name( "com.google.android.apps.docs.editors.docs/com.google.android.apps.docs.editors.homescreen.HomescreenActivity" ) f = check_current_package_name( "com.google.android.apps.docs.editors.docs/com.google.android.apps.docs.editors.kix.KixEditorActivity" ) g = check_tap_text("paste") h = check_current_window_process("firefox") i = check_text_in_current_window_name("Google Docs — Mozilla Firefox") j = check_text_in_current_window_name( "Untitled document - Google Docs — Mozilla Firefox" ) result.add_edges_from([(a, b), (b, c), (c, d), (d, e), (e, f), (f, g), (g, j)]) result.add_edges_from([(h, i), (i, j)]) return result def evaluator_dfabf84c(): result = nx.DiGraph() keyword = "kaust" a = check_text_in_current_window_name("Mozilla Firefox") b = check_contain_input_text(keyword) c = is_img_url_in_clipboard() d = download_from_clipboard_and_verify_file("/home/crab/Desktop/download.jpg") e = check_current_package_name("com.google.android.keep") f = check_contain_input_text(keyword) g = check_note_content(keyword) result.add_edges_from([(a, b), (b, c), (c, d), (d, g)]) result.add_edges_from([(b, e), (e, f), (f, g)]) return result def evaluator_aab5555e(): result = nx.DiGraph() a = check_current_window_process("gnome-terminal-server") b = check_contain_input_text("uname -a") d = check_current_package_name("com.google.android.apps.messaging") e = check_message_text_box_contain("ubuntu") f = check_message_text_box_contain("x86") g = check_message_text_box_contain("linux") h = check_message_text_box_contain("crab") sink = check_message_text_box_empty() result.add_edges_from( [ (a, b), (b, sink), (d, e), (d, f), (d, g), (d, h), (e, sink), (f, sink), (g, sink), (h, sink), ] ) return result RESULT_fd0576be = None @action(env_name="ubuntu") def get_root_usage() -> str: try: output = subprocess.check_output(["df", "/"], text=True) return output.split("\n")[1].split()[4][:-1] except Exception: return None @evaluator(env_name="ubuntu", local=True) def check_contain_input_text_and_get_df_result(text: str, env) -> bool: global RESULT_fd0576be RESULT_fd0576be = env._action_endpoint(get_root_usage, parameters={}) if env.trajectory: inputs = [ params["text"].lower() for action_name, params, _ in env.trajectory if action_name == "write_text" ] return any(text.lower() in input_text for input_text in inputs) return False def evaluator_fd0576be(): result = nx.DiGraph() a = check_current_window_process("gnome-terminal-server") b = check_contain_input_text_and_get_df_result("df") c = check_current_package_name("com.google.android.keep") d = check_keep_notes_contain_fd() result.add_edges_from([(a, b), (b, d), (c, d)]) return result def evaluator_7e08f7d4(): result = nx.DiGraph() a = check_text_in_current_window_name("Mozilla Firefox") b = check_contain_input_text( "https://farm9.staticflickr.com/8293/7591378270_76059bc1cf_z.jpg" ) c = check_current_package_name("com.android.deskclock.DeskClock") d = check_alarm_contains("7:00\u200aAM") result.add_edges_from([(a, b), (b, d), (c, d)]) return result def evaluator_4957e964(): result = nx.DiGraph() a = check_current_window_process("gnome-terminal-server") b = check_contain_input_text("wget") c = check_contain_input_text( "https://farm8.staticflickr.com/7451/10001676353_fd762e02f0_z.jpg" ) d = check_file_exist("/home/crab/Desktop/download.jpg") e = check_text_in_current_window_name("Image Viewer") f = check_current_package_name("com.google.android.apps.tasks") g = check_google_tasks_name("tennis") result.add_edges_from([(a, b), (b, c), (c, d), (d, e), (e, g), (f, g)]) return result # Hand-made environment setup guide: # Ubuntu # * Make sure the Ubuntu slack login, and the default channel has at least two messages # Andorid # * Make sure the first incomplete task in android "Tasks" application is a instruction to change the system to dark mode. # * Make sure the init page of "Calendar" app is "Day" view. There should be at least one element today. ubuntu_handmade_tasks = [ Task( id="82efbd82-c941-4be9-9ac0-a495dc629e02", description='Download an image file from a given URL "https://media.cntraveller.com/photos/642aa1ad770beda2d4f5cc22/4:3/w_2664,h_1998,c_limit/Fiji-march2023issue-JackJohns15.jpg" to "/home/crab/Downloads/raw.jpg", then use GIMP (GNU Image Manipulation Program) to adjust the brightness of the image from "/home/crab/Downloads/raw.jpg" to be brighter and save the edited file to "/home/crab/Pictures/edited.jpg", and set the adjusted image "/home/crab/Pictures/edited.jpg" as the screen background of the system.', evaluator=evaluator_82efbd82(), ), Task( id="515a5467-b7ce-4cad-874d-da894361c1a3", description='Download two image files from given URLs "https://media.cntraveller.com/photos/642aa1ad770beda2d4f5cc22/4:3/w_2664,h_1998,c_limit/Fiji-march2023issue-JackJohns15.jpg" and "https://upload.wikimedia.org/wikipedia/commons/thumb/7/71/Flag_of_Ethiopia.svg/250px-Flag_of_Ethiopia.svg.png" to "/home/crab/Downloads/img_1.jpg" and "/home/crab/Downloads/img_2.jpg", combine the first image ("/home/crab/Downloads/img_1.jpg") with the second image ("/home/crab/Downloads/img_2.jpg") using GIMP (GNU Image Manipulation Program) by placing the first image on the right side of the second image, and save the resulting combined image to "/home/crab/Downloads/combined_editing.jpg". Then, create a new directory "/home/crab/jpg" and copy all files with the specified "jpg" extension from "/home/crab/Downloads" to the newly created directory "/home/crab/jpg".', evaluator=evaluator_515a5467(), ), Task( id="5a1eba49-ed2d-4955-a684-32472090a45b", description='Use Firefox to search for an image using the keyword "GPU", copy the URL of the found image to the clipboard, download the image file from the URL stored in the clipboard to "/home/crab/Pictures/GPU.png", and create a new directory "/home/crab/Pictures/png_files" to copy all files with the specified "png" extension from "/home/crab/Pictures" to the newly created directory "/home/crab/Pictures/png_files".', evaluator=evaluator_5a1eba49(), ), Task( id="c347f78a-4643-43c8-b41e-e437b70a2c5e", description='Open a file at "/home/crab/assets/content.txt" using vim in a terminal, write the specified "An air quality health advisory is in effect Tuesday for New York City and the lower Hudson Valley, as well as western Connecticut and northern New Jersey, meaning it may not be safe for people with some conditions to be outside long." to it, then save and exit vim. Print the content of the file by printing it to the command line interface through a terminal, and finally, submit the printed content.', evaluator=evaluator_c347f78a(), ), Task( id="bf83c176-fa15-4057-996f-f75be4338c05", description='Use Firefox to search for an image using the keyword "Waymo" first, copy the URL of the image to the clipboard, and download the image to "/home/crab/Desktop/waymo.jpg". Then, search for another image using the keyword "Tesla", copy the URL of the image to the clipboard, and download the image to "/home/crab/Desktop/tesla.png". Finally, combine the two images using LibreOffice Impress, placing Image 1 from "/home/crab/Desktop/waymo.jpg" on the left side of Image 2 "/home/crab/Desktop/tesla.png", and save the resulting file in PDF format to "/home/crab/Documents/self_driving.pdf".', evaluator=evaluator_bf83c176(), ), Task( id="74bb11dd-89ca-43d0-8edf-fe7b5201ecf7", description='Use Firefox to search for information about the country "France" on Wikipedia. Extract the capital city and population, and save this information in an ODS file at "/home/crab/Documents/FR.ods" using LibreOffice Calc. Then, search for information about the country "Mexico" on Wikipedia, extract the capital city and population, and save this information in a separate ODS file at "/home/crab/Documents/MX.ods" using LibreOffice Calc. The format of the file are, first column for the country name, the second for the capital city name, and the third for the population without any header. Finally, create a new directory "/home/crab/Desktop/country_info" and copy all files with the specified "ods" extension from "/home/crab/Documents" to the newly created directory "/home/crab/Desktop/country_info".', evaluator=evaluator_74bb11dd(), ), ] corss_environment_tasks = [ Task( id="79832e15-5fd3-43b8-b3e3-66249edfe1db", description='Open slack in Ubuntu desktop, summarize the last two messages in current channel, then use "Messages" app in android phone to send the summary to the first contact in the list.', evaluator=summarize_ubuntu_evaluator(), ), Task( id="a3476778-e512-40ca-b1c0-d7aab0c7f18b", # You must set the first incomplete task to "In Ubuntu, switch the system to dark mode by "Settings" application" description='Open "Tasks" app on Android, check the first incomplete task, then perform the task according to its description', evaluator=nx.path_graph( [ check_current_package_name("com.google.android.apps.tasks"), check_current_window_process("gnome-control-center"), check_color_scheme("prefer-dark"), ], create_using=nx.DiGraph, ), ), Task( id="914e6a48-8430-4a68-8328-c4e01db8926e", # You must create several tasks in google calendar today's view. description='Open "Calendar" app on Android, summarize all schedules today. Then, create a markdown file in Ubuntu at "/home/crab/assets/plan.md" with each event as a checkbox bullet point.', evaluator=check_calendar_evaluator(), ), Task( id="97e6f333-bedb-429b-8dd6-1855f99c312d", description="Take a photo through Android Camera, then upload it to Google Photos inside Camera App. Use Firefox inside Ubuntu desktop to download the photo to local disk, move it as `/home/crab/assets/photo.jpg`, finally open the photo in GIMP.", evaluator=evaluator_97e6f333(), ), Task( id="ca79febf-cae7-4669-8812-d3ec85ee2868", description="Open the first note in the Keep Notes app on Android, copy its contents, and paste them into a new document in Google docs. Then, open the newly created document in Firefox on Ubuntu.", evaluator=evaluator_ca79febf(), ), Task( id="dfabf84c-d05f-4e25-9f21-ba0f08107bd5", description='Use Firefox to search for an image using the keyword "kaust" and copy the URL of the image to the clipboard. Download a file from the URL stored in the clipboard to "/home/crab/Desktop/download.jpg". Then describe this image and save it in the Android Keep Notes app.', evaluator=evaluator_dfabf84c(), ), Task( id="aab5555e-4b72-4ebf-816a-59c1da2cec86", description="Check the all uname information of the system in Ubuntu, then explain the information to the first contact in the list of the Messages app in Android.", evaluator=evaluator_aab5555e(), ), Task( id="fd0576be-8b2c-45ce-b4a2-78659740879b", description="Check the current disk usage through command line in Ubuntu, check the root directory usage in percentage and save the information to a note in Keep Notes app in Android.", evaluator=evaluator_fd0576be(), ), Task( id="7e08f7d4-9b11-4aec-9b42-6cbde083fb4c", description='Use firefox on Ubuntu to openup the image "https://farm9.staticflickr.com/8293/7591378270_76059bc1cf_z.jpg", check the time of the clock in the image, then open the clock app in Android and set an alarm to the same as the image.', evaluator=evaluator_7e08f7d4(), ), Task( id="4957e964-5dd5-42f6-9d5d-f6a53a9a5d94", description='Use wget to download the image "https://farm8.staticflickr.com/7451/10001676353_fd762e02f0_z.jpg" to /home/crab/Desktop/download.jpg, what does the people in the image do? Create a task in the Tasks app in Android to remind you to do the same thing.', evaluator=evaluator_4957e964(), ), ] handmade_tasks = ubuntu_handmade_tasks + corss_environment_tasks ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/05d0e137-7d97-4021-9477-6490a2154c81.json ================================================ { "description": "Open \"/home/crab/poem\" using vim in a terminal, write \"If you shed tears when you miss the sun, you also miss the stars.\", then save and exit vim.", "tasks": [ { "task": "0f589bf9-9b26-4581-8b78-2961b115ab49", "attribute": { "file_path": "/home/crab/poem", "content": "If you shed tears when you miss the sun, you also miss the stars." }, "output": null } ], "adjlist": "0", "id": "05d0e137-7d97-4021-9477-6490a2154c81" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/0a893c2e-eec5-47cc-a930-eb01c5f17683.json ================================================ { "description": "Submit the following content \"If you shed tears when you miss the sun, you also miss the stars.\"", "tasks": [ { "task": "1c3bedc3-ea5a-453c-a15b-223d72ab756d", "attribute": { "content": "If you shed tears when you miss the sun, you also miss the stars." }, "output": null } ], "adjlist": "0", "id": "0a893c2e-eec5-47cc-a930-eb01c5f17683" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/0d178388-8166-4b66-93c1-278861f9897c.json ================================================ { "description": "Use Firefox to find out a \"restaurant\" around \"kaust\" on Google Maps and copy the Google Maps sharing URL of that \"restaurant\" to the clipboard", "tasks": [ { "task": "2b189dc2-c77f-4fa3-8432-ba4355cc294c", "attribute": { "place_type": "restaurant", "place_name": "kaust" }, "output": null } ], "adjlist": "0", "id": "0d178388-8166-4b66-93c1-278861f9897c" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/0d7c84d2-bbbd-46ab-80d1-52b3a44f3858.json ================================================ { "description": "Combine two images from Image 1 \"/home/crab/assets/campus.png\" and Image 2 \"/home/crab/assets/desert.jpg\" using LibreOffice Writer and save the resulting ODT file to \"/home/crab/assets/campus_desert.odt\". Image 1 should be placed above Image 2.", "tasks": [ { "task": "0111384f-38ca-41a2-9504-cb1c55002b3c", "attribute": { "image_path_1": "/home/crab/assets/campus.png", "image_path_2": "/home/crab/assets/desert.jpg", "output_path": "/home/crab/assets/campus_desert.odt" }, "output": null } ], "adjlist": "0", "id": "0d7c84d2-bbbd-46ab-80d1-52b3a44f3858" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/0deafe05-8db5-445f-9031-f6e884569d03.json ================================================ { "description": "Create a new directory \"/home/crab/jpg_folder\", copy all files with the \"jpg\" extension from \"/home/crab/Pictures\" to this newly created directory, then open LibreOffice Impress to combine the two images located at \"/home/crab/jpg_folder/dog.jpg\" (Image 1) and \"/home/crab/jpg_folder/Interstellar.jpg\" (Image 2), placing Image 1 on the right side of Image 2, and save the combined image in PDF format to \"/home/crab/Documents/combination.pdf\".", "tasks": [ { "task": "217ababc-ccc7-4b9f-af07-c239d92848fe", "attribute": { "file_extension": "jpg", "source_dir": "/home/crab/Pictures", "target_dir": "/home/crab/jpg_folder" }, "output": "/home/crab/jpg_folder" }, { "task": "467f17a6-c42f-4eda-996f-a53385eb3efd", "attribute": { "image_path_1": "/home/crab/jpg_folder/dog.jpg", "image_path_2": "/home/crab/jpg_folder/Interstellar.jpg", "output_path": "/home/crab/Documents/combination.pdf" }, "output": null } ], "adjlist": "0 1\n1", "id": "0deafe05-8db5-445f-9031-f6e884569d03" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/0e80fd90-0b23-454f-a629-7b6d7baa7542.json ================================================ { "description": "Use Firefox to search for the country \"Canada\" on Wikipedia, extract the capital city and population, and save this information in an ODS file at \"/home/crab/canada.ods\" with LibreOffice Calc. The first column will save the country name, the second will save the capital city name, and the third will save the population. No header is needed in the ODS file.", "tasks": [ { "task": "1cd6519a-9ee0-442b-ba5a-9238aeb00ff6", "attribute": { "country": "Canada", "file_path": "/home/crab/canada.ods" }, "output": null } ], "adjlist": "0", "id": "0e80fd90-0b23-454f-a629-7b6d7baa7542" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/125f7bae-e931-4190-8737-5f1ea7227772.json ================================================ { "description": "Submit content \"OpenAI is an American artificial intelligence (AI) research organization founded in December 2015, researching artificial intelligence with the goal of developing \"safe and beneficial\" artificial general intelligence, which it defines as \"highly autonomous systems that outperform humans at most economically valuable work.\"", "tasks": [ { "task": "1c3bedc3-ea5a-453c-a15b-223d72ab756d", "attribute": { "content": "OpenAI is an American artificial intelligence (AI) research organization founded in December 2015, researching artificial intelligence with the goal of developing \"safe and beneficial\" artificial general intelligence, which it defines as \"highly autonomous systems that outperform humans at most economically valuable work." }, "output": null } ], "adjlist": "0", "id": "125f7bae-e931-4190-8737-5f1ea7227772" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/15a150a8-899c-4753-8dc5-05248ccc3640.json ================================================ { "description": "Download the file from \"https://media.cntraveller.com/photos/642aa1ad770beda2d4f5cc22/4:3/w_2664,h_1998,c_limit/Fiji-march2023issue-JackJohns15.jpg\" to the location \"/home/crab/Downloads/fiji.png\", and then set \"/home/crab/Downloads/fiji.png\" as the desktop background on the system.", "tasks": [ { "task": "a313ea4d-e501-4971-b4fe-db2aad19eac1", "attribute": { "url": "https://media.cntraveller.com/photos/642aa1ad770beda2d4f5cc22/4:3/w_2664,h_1998,c_limit/Fiji-march2023issue-JackJohns15.jpg", "file_path": "/home/crab/Downloads/fiji.png" }, "output": "/home/crab/Downloads/fiji.png" }, { "task": "a207ef38-b3b2-4c6c-a1e3-75c38162f5ba", "attribute": { "photo_path": "/home/crab/Downloads/fiji.png" }, "output": null } ], "adjlist": "0 1\n1", "id": "15a150a8-899c-4753-8dc5-05248ccc3640" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/1ebcd710-f73b-4022-832b-167c0d3f55a2.json ================================================ { "description": "Use Firefox to find out a \"University\" around \"Los Angeles\" on Google Maps and copy the Google Maps sharing URL of that \"University\" to the clipboard", "tasks": [ { "task": "2b189dc2-c77f-4fa3-8432-ba4355cc294c", "attribute": { "place_type": "University", "place_name": "Los Angeles" }, "output": null } ], "adjlist": "0", "id": "1ebcd710-f73b-4022-832b-167c0d3f55a2" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/22787ecc-52b2-4791-aefb-c45800f51414.json ================================================ { "description": "Submit content \"Jensen Huang cofounded graphics-chip maker Nvidia in 1993, and has served as its CEO and president ever since. Huang owns approximately 3% of Nvidia, which went public in 1999.\"", "tasks": [ { "task": "1c3bedc3-ea5a-453c-a15b-223d72ab756d", "attribute": { "content": "Jensen Huang cofounded graphics-chip maker Nvidia in 1993, and has served as its CEO and president ever since. Huang owns approximately 3% of Nvidia, which went public in 1999." }, "output": null } ], "adjlist": "0", "id": "22787ecc-52b2-4791-aefb-c45800f51414" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/22f05f6f-6aef-4786-958f-14f559eaf014.json ================================================ { "description": "Create a new directory \"/home/crab/example_code\" and copy all files with the specified \"py\" extension from \"/home/crab/crab/examples\" to the directory \"/home/crab/example_code\".", "tasks": [ { "task": "217ababc-ccc7-4b9f-af07-c239d92848fe", "attribute": { "file_extension": "py", "source_dir": "/home/crab/crab/examples", "target_dir": "/home/crab/example_code" }, "output": null } ], "adjlist": "0", "id": "22f05f6f-6aef-4786-958f-14f559eaf014" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/28963795-d694-4bb4-adaf-f7708a2c6fe5.json ================================================ { "description": "Use Firefox to search for an image using the keyword \"Elon Musk\" and copy the URL of the image.", "tasks": [ { "task": "017102b6-d2c3-466b-96f7-37c8bcddc41a", "attribute": { "keyword": "Elon Musk" }, "output": null } ], "adjlist": "0", "id": "28963795-d694-4bb4-adaf-f7708a2c6fe5" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/299db8f2-81eb-455f-9302-5c8cb30be691.json ================================================ { "description": "Combine two images, Image 1 \"/home/crab/Pictures/Interstellar.jpg\" and Image 2 \"/home/crab/Pictures/cat.png\", using GIMP (GNU Image Manipulation Program) with Image 1 placed on the left side of Image 2, and save the resulting image to \"/home/crab/Pictures/edited_background.png\". Then, set \"/home/crab/Pictures/edited_background.png\" as the desktop background on the system.", "tasks": [ { "task": "4cf246ea-0a7f-43da-84b6-61d74a2699af", "attribute": { "image_path_1": "/home/crab/Pictures/Interstellar.jpg", "image_path_2": "/home/crab/Pictures/cat.png", "output_path": "/home/crab/Pictures/edited_background.png" }, "output": "/home/crab/Pictures/edited_background.png" }, { "task": "a207ef38-b3b2-4c6c-a1e3-75c38162f5ba", "attribute": { "photo_path": "/home/crab/Pictures/edited_background.png" }, "output": null } ], "adjlist": "0 1\n1", "id": "299db8f2-81eb-455f-9302-5c8cb30be691" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/29f099b2-b3a5-463f-b10a-15363bf7e845.json ================================================ { "description": "Use Firefox to search for a \"garden\" around \"ETH Zurich\" on Google Maps, copy the sharing URL of that \"garden\" to the clipboard, then paste the content into Visual Studio Code (VS Code) and save the file at \"/home/crab/eth_garden.txt\".", "tasks": [ { "task": "2b189dc2-c77f-4fa3-8432-ba4355cc294c", "attribute": { "place_type": "garden", "place_name": "ETH Zurich" }, "output": null }, { "task": "8491e674-596b-452b-9e0e-58a44d90f947", "attribute": { "file_path": "/home/crab/eth_garden.txt" }, "output": null } ], "adjlist": "0 1\n1", "id": "29f099b2-b3a5-463f-b10a-15363bf7e845" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/355e9660-a355-4b95-8881-ac9da578ea43.json ================================================ { "description": "Use Firefox to search for the country \"Italy\" on Wikipedia, extract the capital city and population, and save this information in an ODS file at \"/home/crab/country.ods\" with LibreOffice Calc. The first column will save the country name, the second will save the capital city name, and the third will save the population. No header is needed in the ODS file.", "tasks": [ { "task": "1cd6519a-9ee0-442b-ba5a-9238aeb00ff6", "attribute": { "country": "Italy", "file_path": "/home/crab/country.ods" }, "output": null } ], "adjlist": "0", "id": "355e9660-a355-4b95-8881-ac9da578ea43" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/35bd7387-4735-4632-8474-e93382004c12.json ================================================ { "description": "Use GIMP (GNU Image Manipulation Program) to adjust the brightness of the image from \"/home/crab/assets/campus.png\" to a higher value (brighter) and save it to \"/home/crab/assets/campus_edited.png\".", "tasks": [ { "task": "cc1adae7-bef9-4c8a-865d-00d44486dd69", "attribute": { "image_path_before_edit": "/home/crab/assets/campus.png", "image_path_after_edit": "/home/crab/assets/campus_edited.png" }, "output": null } ], "adjlist": "0", "id": "35bd7387-4735-4632-8474-e93382004c12" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/362c5711-3824-42ff-96a0-7801b03b5f1f.json ================================================ { "description": "Use Firefox to find a code repository about \"Open Source Computer Vision Library\" in GitHub and copy the URL of the repository to the clipboard.", "tasks": [ { "task": "bcd03c9f-62c9-4001-8d86-78358c59ce22", "attribute": { "keyword": "Open Source Computer Vision Library" }, "output": null } ], "adjlist": "0", "id": "362c5711-3824-42ff-96a0-7801b03b5f1f" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/4718df9c-97ec-4b54-86ca-bd34e65c5a43.json ================================================ { "description": "Download a file from \"https://arxiv.org/pdf/2303.05499\" to \"/home/crab/Documents/Grounding_DINO.pdf\".", "tasks": [ { "task": "a313ea4d-e501-4971-b4fe-db2aad19eac1", "attribute": { "url": "https://arxiv.org/pdf/2303.05499", "file_path": "/home/crab/Documents/Grounding_DINO.pdf" }, "output": null } ], "adjlist": "0", "id": "4718df9c-97ec-4b54-86ca-bd34e65c5a43" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/47b75b21-99a2-461c-9d40-6dddc5c206d0.json ================================================ { "description": "Use Firefox to search for an image using the keyword \"LLM\" and copy the URL of the image to the clipboard.", "tasks": [ { "task": "017102b6-d2c3-466b-96f7-37c8bcddc41a", "attribute": { "keyword": "LLM" }, "output": null } ], "adjlist": "0", "id": "47b75b21-99a2-461c-9d40-6dddc5c206d0" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/4ae4e35f-d90a-48cc-8fb9-492ac7ae07ee.json ================================================ { "description": "Paste clipboard content into LibreOffice Writer and save it as an ODT file at \"/home/crab/Documents/clipboard_text.odt\".", "tasks": [ { "task": "76de4bdb-c980-4b3a-9bd3-c87db467dffe", "attribute": { "file_path": "/home/crab/Documents/clipboard_text.odt" }, "output": null } ], "adjlist": "0", "id": "4ae4e35f-d90a-48cc-8fb9-492ac7ae07ee" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/4bbedade-4d4e-43d5-b650-2702b350ad28.json ================================================ { "description": "Open \"/home/crab/assets/1.txt\" using vim in a terminal, write \"LinkedIn is a business and employment-focused social media platform that works through websites and mobile apps. It was launched on May 5, 2003 by Reid Hoffman and Eric Ly.\", then save and exit vim.", "tasks": [ { "task": "0f589bf9-9b26-4581-8b78-2961b115ab49", "attribute": { "file_path": "/home/crab/assets/1.txt", "content": "LinkedIn is a business and employment-focused social media platform that works through websites and mobile apps. It was launched on May 5, 2003 by Reid Hoffman and Eric Ly." }, "output": null } ], "adjlist": "0", "id": "4bbedade-4d4e-43d5-b650-2702b350ad28" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/51a288f9-cf2c-4e8e-a98c-596a505af77c.json ================================================ { "description": "Combine two images from Image 1 \"/home/crab/assets/desert.jpg\" and Image 2 \"/home/crab/assets/campus.png\" using LibreOffice Impress and save the resulting file in PDF format to \"/home/crab/assets/desert_campus.pdf\". Image 1 should be placed on the right side of Image 2.", "tasks": [ { "task": "467f17a6-c42f-4eda-996f-a53385eb3efd", "attribute": { "image_path_1": "/home/crab/assets/desert.jpg", "image_path_2": "/home/crab/assets/campus.png", "output_path": "/home/crab/assets/desert_campus.pdf" }, "output": null } ], "adjlist": "0", "id": "51a288f9-cf2c-4e8e-a98c-596a505af77c" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/51c91051-3efb-4e92-a967-739b18520714.json ================================================ { "description": "Open Firefox and search for the torch.matmul example provided by the official PyTorch version 1.13 documentation, copy all the lines of code from the example, open Visual Studio Code (VS Code), paste the clipboard content into a new file, and save it as \"/home/crab/example.py\".", "tasks": [ { "task": "49b614c5-c4bb-4c20-aab8-ab9dcc7de1b5", "attribute": {}, "output": null }, { "task": "8491e674-596b-452b-9e0e-58a44d90f947", "attribute": { "file_path": "/home/crab/example.py" }, "output": null } ], "adjlist": "0 1\n1", "id": "51c91051-3efb-4e92-a967-739b18520714" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/57b7e8a7-8c17-4cc4-9bb5-4385afde3ad8.json ================================================ { "description": "Create a new directory \"/home/crab/assets_for_edit\" and copy all files with the \"png\" extension from \"/home/crab/assets\" to this new directory. Then, combining Image 1 \"/home/crab/assets_for_edit/background.png\" and Image 2 \"/home/crab/assets_for_edit/campus.png\" with LibreOffice Writer, place Image 1 above Image 2, and save the file in the ODT format to \"/home/crab/assets_for_edit/back_n_campus.odt\".", "tasks": [ { "task": "217ababc-ccc7-4b9f-af07-c239d92848fe", "attribute": { "file_extension": "png", "source_dir": "/home/crab/assets", "target_dir": "/home/crab/assets_for_edit" }, "output": "/home/crab/assets_for_edit" }, { "task": "0111384f-38ca-41a2-9504-cb1c55002b3c", "attribute": { "image_path_1": "/home/crab/assets_for_edit/background.png", "image_path_2": "/home/crab/assets_for_edit/campus.png", "output_path": "/home/crab/assets_for_edit/back_n_campus.odt" }, "output": null } ], "adjlist": "0 1\n1", "id": "57b7e8a7-8c17-4cc4-9bb5-4385afde3ad8" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/58776443-ccf7-4db3-8c60-e188e4b5f90c.json ================================================ { "description": "Paste clipboard content into LibreOffice Writer and save it as an ODT file at \"/home/crab/paste.odt\".", "tasks": [ { "task": "76de4bdb-c980-4b3a-9bd3-c87db467dffe", "attribute": { "file_path": "/home/crab/paste.odt" }, "output": null } ], "adjlist": "0", "id": "58776443-ccf7-4db3-8c60-e188e4b5f90c" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/5ba74c6a-4513-448b-8b68-ff145ece0652.json ================================================ { "description": "Download the file from \"https://raw.githubusercontent.com/camel-ai/camel/master/README.md\" to \"/home/crab/Documents/README.md\", and then print the content of \"/home/crab/Documents/README.md\" to the command line interface through a terminal.", "tasks": [ { "task": "a313ea4d-e501-4971-b4fe-db2aad19eac1", "attribute": { "url": "https://raw.githubusercontent.com/camel-ai/camel/master/README.md", "file_path": "/home/crab/Documents/README.md" }, "output": "/home/crab/Documents/README.md" }, { "task": "5b527839-0e58-426d-bab6-7160200b0d24", "attribute": { "file_path": "/home/crab/Documents/README.md" }, "output": null } ], "adjlist": "0 1\n1", "id": "5ba74c6a-4513-448b-8b68-ff145ece0652" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/6428f803-62de-40d2-a345-64e6cf955c9d.json ================================================ { "description": "First, use LibreOffice Impress to adjust the brightness of the image located at \"/home/crab/Pictures/cat.png\" to make it darker, and save the edited image as \"/home/crab/Pictures/cat_edited.png\". Then, using GIMP (GNU Image Manipulation Program), combine the image \"/home/crab/Pictures/dog.png\" with \"/home/crab/Pictures/cat_edited.png\" by placing the dog image on the left side of the cat image, and save the merged image to \"/home/crab/Pictures/dog_cat.png\".", "tasks": [ { "task": "434402f3-647a-4a9a-9d8f-10f5bb6c7cf0", "attribute": { "image_path_before_edit": "/home/crab/Pictures/cat.png", "image_path_after_edit": "/home/crab/Pictures/cat_edited.png" }, "output": "/home/crab/Pictures/cat_edited.png" }, { "task": "4cf246ea-0a7f-43da-84b6-61d74a2699af", "attribute": { "image_path_1": "/home/crab/Pictures/dog.png", "image_path_2": "/home/crab/Pictures/cat_edited.png", "output_path": "/home/crab/Pictures/dog_cat.png" }, "output": null } ], "adjlist": "0 1\n1", "id": "6428f803-62de-40d2-a345-64e6cf955c9d" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/64a2c205-c85a-4e56-8edb-5df4f7724441.json ================================================ { "description": "Find the example provided of \"torch.matmul\" by official PyTorch version 1.13 documentation using Firefox and copy all the lines of code in the example to the clipboard.", "tasks": [ { "task": "49b614c5-c4bb-4c20-aab8-ab9dcc7de1b5", "attribute": {}, "output": null } ], "adjlist": "0", "id": "64a2c205-c85a-4e56-8edb-5df4f7724441" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/696ca9bb-89ea-4cd5-b693-f2d749d964b1.json ================================================ { "description": "Adjust the brightness of the image located at \"/home/crab/assets/campus.png\" using GIMP (GNU Image Manipulation Program) to make it brighter, save the adjusted image to \"/home/crab/Pictures/campus_brighter.png\", and then set this enhanced image as the desktop background on an Ubuntu system.", "tasks": [ { "task": "cc1adae7-bef9-4c8a-865d-00d44486dd69", "attribute": { "image_path_before_edit": "/home/crab/assets/campus.png", "image_path_after_edit": "/home/crab/Pictures/campus_brighter.png" }, "output": "/home/crab/Pictures/campus_brighter.png" }, { "task": "a207ef38-b3b2-4c6c-a1e3-75c38162f5ba", "attribute": { "photo_path": "/home/crab/Pictures/campus_brighter.png" }, "output": null } ], "adjlist": "0 1\n1", "id": "696ca9bb-89ea-4cd5-b693-f2d749d964b1" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/6be49e77-e904-4eb0-a36a-7f0fd128ede3.json ================================================ { "description": "Use Firefox to find a code repository about \"pytorch\" in GitHub and copy the URL of the repository to the clipboard.", "tasks": [ { "task": "bcd03c9f-62c9-4001-8d86-78358c59ce22", "attribute": { "keyword": "pytorch" }, "output": null } ], "adjlist": "0", "id": "6be49e77-e904-4eb0-a36a-7f0fd128ede3" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/6c3105a2-328c-4190-823d-03d759be0b57.json ================================================ { "description": "Use Firefox to search for an image with the keyword \"reinforcement learning,\" copy the URL of the chosen image to the clipboard, and download the image from the URL in the clipboard to \"/home/crab/Downloads/RL.png\" on an Ubuntu system.", "tasks": [ { "task": "017102b6-d2c3-466b-96f7-37c8bcddc41a", "attribute": { "keyword": "reinforcement learning" }, "output": null }, { "task": "a313ea4d-e501-4971-b4fe-db2aad19acsd", "attribute": { "file_path": "/home/crab/Downloads/RL.png" }, "output": null } ], "adjlist": "0 1\n1", "id": "6c3105a2-328c-4190-823d-03d759be0b57" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/6c560516-ca14-4f97-b51d-16ad81fc29e4.json ================================================ { "description": "Open \"/home/crab/assets/a.txt\" using vim in a terminal, write \"The most recent COMPUTEX was held from 30 May to 2 June 2023 with sessions about such topics as high-performance computing, artificial intelligence, next-gen connectivity and sustainability.\", then save and exit vim, and print the content of \"/home/crab/assets/a.txt\" to the command line interface.", "tasks": [ { "task": "0f589bf9-9b26-4581-8b78-2961b115ab49", "attribute": { "file_path": "/home/crab/assets/a.txt", "content": "The most recent COMPUTEX was held from 30 May to 2 June 2023 with sessions about such topics as high-performance computing, artificial intelligence, next-gen connectivity and sustainability." }, "output": "/home/crab/assets/a.txt" }, { "task": "5b527839-0e58-426d-bab6-7160200b0d24", "attribute": { "file_path": "/home/crab/assets/a.txt" }, "output": null } ], "adjlist": "0 1\n1", "id": "6c560516-ca14-4f97-b51d-16ad81fc29e4" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/730172f5-894a-4d46-9102-ac7d985a479d.json ================================================ { "description": "Download the image of Jupiter from \"https://upload.wikimedia.org/wikipedia/commons/thumb/2/2b/Jupiter_and_its_shrunken_Great_Red_Spot.jpg/640px-Jupiter_and_its_shrunken_Great_Red_Spot.jpg\" to \"/home/crab/Pictures/jupiter.jpg\", then use LibreOffice Impress to adjust the brightness of this image to make it darker and save the edited version as \"/home/crab/Pictures/jupiter_edited.jpg\".", "tasks": [ { "task": "a313ea4d-e501-4971-b4fe-db2aad19eac1", "attribute": { "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/2/2b/Jupiter_and_its_shrunken_Great_Red_Spot.jpg/640px-Jupiter_and_its_shrunken_Great_Red_Spot.jpg", "file_path": "/home/crab/Pictures/jupiter.jpg" }, "output": "/home/crab/Pictures/jupiter.jpg" }, { "task": "434402f3-647a-4a9a-9d8f-10f5bb6c7cf0", "attribute": { "image_path_before_edit": "/home/crab/Pictures/jupiter.jpg", "image_path_after_edit": "/home/crab/Pictures/jupiter_edited.jpg" }, "output": null } ], "adjlist": "0 1\n1", "id": "730172f5-894a-4d46-9102-ac7d985a479d" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/73038efb-ca0f-4d90-a947-fcfd097dd91b.json ================================================ { "description": "Open Firefox and navigate to the official PyTorch version 1.13 documentation to find an example of `torch.matmul`. Copy all the lines of code in the example to the clipboard. Then, paste the clipboard content into Visual Studio Code (VS Code) and save it as a file at \"/home/crab/example_code.txt\".", "tasks": [ { "task": "49b614c5-c4bb-4c20-aab8-ab9dcc7de1b5", "attribute": {}, "output": null }, { "task": "8491e674-596b-452b-9e0e-58a44d90f947", "attribute": { "file_path": "/home/crab/example_code.txt" }, "output": null } ], "adjlist": "0 1\n1", "id": "73038efb-ca0f-4d90-a947-fcfd097dd91b" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/73da97c9-f084-4cab-8697-1151737387ff.json ================================================ { "description": "Download the file from \"https://images.top1market.com/images/cms/uploads/20230928/4950e1db0038feb506fdcfa0c936fd8e.png\" to \"/home/crab/Desktop/meta.png\", then set this image, \"/home/crab/Desktop/meta.png\", as the desktop background on the system.", "tasks": [ { "task": "a313ea4d-e501-4971-b4fe-db2aad19eac1", "attribute": { "url": "https://images.top1market.com/images/cms/uploads/20230928/4950e1db0038feb506fdcfa0c936fd8e.png", "file_path": "/home/crab/Desktop/meta.png" }, "output": "/home/crab/Desktop/meta.png" }, { "task": "a207ef38-b3b2-4c6c-a1e3-75c38162f5ba", "attribute": { "photo_path": "/home/crab/Desktop/meta.png" }, "output": null } ], "adjlist": "0 1\n1", "id": "73da97c9-f084-4cab-8697-1151737387ff" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/77aa4dd3-5a68-4686-9cac-26d0ab77c7b4.json ================================================ { "description": "Use Firefox to find out a \"hiking trail\" around \"Munich\" on Google Maps and copy the Google Maps sharing URL of that \"hiking trail\" to the clipboard", "tasks": [ { "task": "2b189dc2-c77f-4fa3-8432-ba4355cc294c", "attribute": { "place_type": "hiking trail", "place_name": "Munich" }, "output": null } ], "adjlist": "0", "id": "77aa4dd3-5a68-4686-9cac-26d0ab77c7b4" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/78502f1c-879b-4932-a5fd-d85f7f6b0f81.json ================================================ { "description": "Download the file from \"https://cemse.kaust.edu.sa/sites/default/files/styles/large/public/2023-04/Web%20banner.jpg?itok=d1TvGUKY\" to \"/home/crab/Pictures/KAUST_AI.png\" and then set this image as the desktop background on the system.", "tasks": [ { "task": "a313ea4d-e501-4971-b4fe-db2aad19eac1", "attribute": { "url": "https://cemse.kaust.edu.sa/sites/default/files/styles/large/public/2023-04/Web%20banner.jpg?itok=d1TvGUKY", "file_path": "/home/crab/Pictures/KAUST_AI.png" }, "output": "/home/crab/Pictures/KAUST_AI.png" }, { "task": "a207ef38-b3b2-4c6c-a1e3-75c38162f5ba", "attribute": { "photo_path": "/home/crab/Pictures/KAUST_AI.png" }, "output": null } ], "adjlist": "0 1\n1", "id": "78502f1c-879b-4932-a5fd-d85f7f6b0f81" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/7912f7a5-24b9-4dfe-a7b8-1effc1b7a212.json ================================================ { "description": "Combine two images from Image 1 \"/home/crab/assets/campus.png\" and Image 2 \"/home/crab/assets/desert.jpg using GIMP (GNU Image Manipulation Program) and save the resulting image to \"/home/crab/assets/campus_desert.png\". Image 1 should be placed on the left side of Image 2.", "tasks": [ { "task": "4cf246ea-0a7f-43da-84b6-61d74a2699af", "attribute": { "image_path_1": "/home/crab/assets/campus.png", "image_path_2": "/home/crab/assets/desert.jpg", "output_path": "/home/crab/assets/campus_desert.png" }, "output": null } ], "adjlist": "0", "id": "7912f7a5-24b9-4dfe-a7b8-1effc1b7a212" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/7d5613ec-9b67-4255-b766-d9c6e8466464.json ================================================ { "description": "Paste clipboard content into LibreOffice Writer and save it as an ODT file at \"/home/crab/assets/content.odt\".", "tasks": [ { "task": "76de4bdb-c980-4b3a-9bd3-c87db467dffe", "attribute": { "file_path": "/home/crab/assets/content.odt" }, "output": null } ], "adjlist": "0", "id": "7d5613ec-9b67-4255-b766-d9c6e8466464" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/7dda7e46-78be-4663-b882-6132dbbff335.json ================================================ { "description": "Adjust the brightness of the image located at \"/home/crab/Pictures/Interstellar.jpg\" to a higher value using GIMP (GNU Image Manipulation Program), save the edited image as \"/home/crab/edited_background.png\", and then set this edited image as the desktop background on the system.", "tasks": [ { "task": "cc1adae7-bef9-4c8a-865d-00d44486dd69", "attribute": { "image_path_before_edit": "/home/crab/Pictures/Interstellar.jpg", "image_path_after_edit": "/home/crab/edited_background.png" }, "output": "/home/crab/edited_background.png" }, { "task": "a207ef38-b3b2-4c6c-a1e3-75c38162f5ba", "attribute": { "photo_path": "/home/crab/edited_background.png" }, "output": null } ], "adjlist": "0 1\n1", "id": "7dda7e46-78be-4663-b882-6132dbbff335" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/7e6c4927-2220-4522-9e3f-36f69adc3e71.json ================================================ { "description": "Paste clipboard content into Visual Studio Code (VS Code) and save it as a file at \"/home/crab/assets/clipboard.md\".", "tasks": [ { "task": "8491e674-596b-452b-9e0e-58a44d90f947", "attribute": { "file_path": "/home/crab/assets/clipboard.md" }, "output": null } ], "adjlist": "0", "id": "7e6c4927-2220-4522-9e3f-36f69adc3e71" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/82c49e12-3b2f-432e-9069-4b67bafebbf7.json ================================================ { "description": "Open Firefox to find a coffee shop around the hungarian parliament on Google Maps, copy the sharing URL of the coffee shop to the clipboard, then paste the clipboard content into Visual Studio Code (VS Code), and save the content as a file at \"/home/crab/Downloads/coffee\".", "tasks": [ { "task": "2b189dc2-c77f-4fa3-8432-ba4355cc294c", "attribute": { "place_type": "coffee shop", "place_name": "hungarian parliament" }, "output": null }, { "task": "8491e674-596b-452b-9e0e-58a44d90f947", "attribute": { "file_path": "/home/crab/Downloads/coffee" }, "output": null } ], "adjlist": "0 1\n1", "id": "82c49e12-3b2f-432e-9069-4b67bafebbf7" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/87910f23-ab23-4ccc-b115-d71cff6f0162.json ================================================ { "description": "Use Firefox to search for an image with the keyword \"patagonia,\" copy the URL of the chosen image to the clipboard, and download the file from that URL to \"/home/crab/Desktop/brand.jpg\".", "tasks": [ { "task": "017102b6-d2c3-466b-96f7-37c8bcddc41a", "attribute": { "keyword": "patagonia" }, "output": null }, { "task": "a313ea4d-e501-4971-b4fe-db2aad19acsd", "attribute": { "file_path": "/home/crab/Desktop/brand.jpg" }, "output": null } ], "adjlist": "0 1\n1", "id": "87910f23-ab23-4ccc-b115-d71cff6f0162" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/8afc25eb-7a80-459f-acdc-5c79fc146c29.json ================================================ { "description": "Paste clipboard content into Visual Studio Code (VS Code) and save it as a file at \"/home/crab/assets/content_2.txt\".", "tasks": [ { "task": "8491e674-596b-452b-9e0e-58a44d90f947", "attribute": { "file_path": "/home/crab/assets/content_2.txt" }, "output": null } ], "adjlist": "0", "id": "8afc25eb-7a80-459f-acdc-5c79fc146c29" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/8cb5ab6d-a56e-43b9-aa83-00a46331e20f.json ================================================ { "description": "Download the image from \"https://res.cloudinary.com/simpleview/image/upload/v1648755098/clients/austin/Austin_Skyline_Credit_Christopher_Sherman_lifetime__4f60343d-9f69-450c-8ad3-fa636761786d.jpg\" to \"/home/crab/Downloads/Austin.jpg\", then use GIMP (GNU Image Manipulation Program) to adjust its brightness to a higher value and save the modified image as \"/home/crab/Downloads/brighter_austin.jpg\".", "tasks": [ { "task": "a313ea4d-e501-4971-b4fe-db2aad19eac1", "attribute": { "url": "https://res.cloudinary.com/simpleview/image/upload/v1648755098/clients/austin/Austin_Skyline_Credit_Christopher_Sherman_lifetime__4f60343d-9f69-450c-8ad3-fa636761786d.jpg", "file_path": "/home/crab/Downloads/Austin.jpg" }, "output": "/home/crab/Downloads/Austin.jpg" }, { "task": "cc1adae7-bef9-4c8a-865d-00d44486dd69", "attribute": { "image_path_before_edit": "/home/crab/Downloads/Austin.jpg", "image_path_after_edit": "/home/crab/Downloads/brighter_austin.jpg" }, "output": null } ], "adjlist": "0 1\n1", "id": "8cb5ab6d-a56e-43b9-aa83-00a46331e20f" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/90e09946-7b28-4102-b0ed-f683c01dbbd4.json ================================================ { "description": "Use Firefox to find a code repository about \"W&B\" in GitHub and copy the URL of the repository to the clipboard.", "tasks": [ { "task": "bcd03c9f-62c9-4001-8d86-78358c59ce22", "attribute": { "keyword": "W&B" }, "output": null } ], "adjlist": "0", "id": "90e09946-7b28-4102-b0ed-f683c01dbbd4" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/925a3607-2802-48aa-b339-13ebfcef43a2.json ================================================ { "description": "Use Firefox to find a code repository about \"segment-anything\" in GitHub and copy the URL of the repository to the clipboard.", "tasks": [ { "task": "bcd03c9f-62c9-4001-8d86-78358c59ce22", "attribute": { "keyword": "segment-anything" }, "output": null } ], "adjlist": "0", "id": "925a3607-2802-48aa-b339-13ebfcef43a2" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/9506dd30-f58d-4832-b336-8037e83e2689.json ================================================ { "description": "Get the content of \"/home/crab/Documents/nba.txt\" by printing it to the command line interface through a terminal", "tasks": [ { "task": "5b527839-0e58-426d-bab6-7160200b0d24", "attribute": { "file_path": "/home/crab/Documents/nba.txt" }, "output": null } ], "adjlist": "0", "id": "9506dd30-f58d-4832-b336-8037e83e2689" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/95e347aa-56ab-4d5d-a94c-350ddfddabf9.json ================================================ { "description": "Create a new directory \"/home/crab/png_folder\" and copy all files with the specified \"png\" extension from \"/home/crab/Pictures\" to the directory \"/home/crab/png_folder\".", "tasks": [ { "task": "217ababc-ccc7-4b9f-af07-c239d92848fe", "attribute": { "file_extension": "png", "source_dir": "/home/crab/Pictures", "target_dir": "/home/crab/png_folder" }, "output": null } ], "adjlist": "0", "id": "95e347aa-56ab-4d5d-a94c-350ddfddabf9" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/98a360d8-0f95-44cd-bb9d-442fca2918d4.json ================================================ { "description": "Download a file from \"https://github.com/open-mmlab/mmdetection/archive/refs/tags/v3.3.0.zip\" to \"/home/crab/mmdetection_v3.3.0.zip\".", "tasks": [ { "task": "a313ea4d-e501-4971-b4fe-db2aad19eac1", "attribute": { "url": "https://github.com/open-mmlab/mmdetection/archive/refs/tags/v3.3.0.zip", "file_path": "/home/crab/mmdetection_v3.3.0.zip" }, "output": null } ], "adjlist": "0", "id": "98a360d8-0f95-44cd-bb9d-442fca2918d4" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/9c979fc5-8d60-41f1-a494-904a1d312187.json ================================================ { "description": "Use Firefox to search for the country \"United Kingdom\" on Wikipedia, extract the capital city and population, and save this information in an ODS file at \"/home/crab/assets/content.ods\" with LibreOffice Calc. The first column will save the country name, the second will save the capital city name, and the third will save the population. No header is needed in the ODS file.", "tasks": [ { "task": "1cd6519a-9ee0-442b-ba5a-9238aeb00ff6", "attribute": { "country": "United Kingdom", "file_path": "/home/crab/assets/content.ods" }, "output": null } ], "adjlist": "0", "id": "9c979fc5-8d60-41f1-a494-904a1d312187" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/9e08971c-7f83-4853-952e-4c4a4a26333b.json ================================================ { "description": "Use Firefox to search for an image using the keyword \"Red Sea\" and copy the URL of the image to the clipboard.", "tasks": [ { "task": "017102b6-d2c3-466b-96f7-37c8bcddc41a", "attribute": { "keyword": "Red Sea" }, "output": null } ], "adjlist": "0", "id": "9e08971c-7f83-4853-952e-4c4a4a26333b" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/9fe4f541-61cf-48e0-a081-4371786659c7.json ================================================ { "description": "Set \"/home/crab/Pictures/Interstellar.jpg\" as the screen background of the system", "tasks": [ { "task": "a207ef38-b3b2-4c6c-a1e3-75c38162f5ba", "attribute": { "photo_path": "/home/crab/Pictures/Interstellar.jpg" }, "output": null } ], "adjlist": "0", "id": "9fe4f541-61cf-48e0-a081-4371786659c7" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/a0714ef7-bbdc-4f84-bd2e-c6e611d4db9e.json ================================================ { "description": "Get the content of \"/home/crab/ubuntu\" by printing it to the command line interface through a terminal", "tasks": [ { "task": "5b527839-0e58-426d-bab6-7160200b0d24", "attribute": { "file_path": "/home/crab/ubuntu" }, "output": null } ], "adjlist": "0", "id": "a0714ef7-bbdc-4f84-bd2e-c6e611d4db9e" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/a2a34580-cded-4bf8-81d9-b36a4d4402d0.json ================================================ { "description": "Set \"/home/crab/assets/background.png\" as the screen background of the system", "tasks": [ { "task": "a207ef38-b3b2-4c6c-a1e3-75c38162f5ba", "attribute": { "photo_path": "/home/crab/assets/background.png" }, "output": null } ], "adjlist": "0", "id": "a2a34580-cded-4bf8-81d9-b36a4d4402d0" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/a6b67c2d-d448-4e77-904e-dc7c5f21a5fe.json ================================================ { "description": "Get the content of \"/home/crab/crab/README.md\" by printing it to the command line interface through a terminal", "tasks": [ { "task": "5b527839-0e58-426d-bab6-7160200b0d24", "attribute": { "file_path": "/home/crab/crab/README.md" }, "output": null } ], "adjlist": "0", "id": "a6b67c2d-d448-4e77-904e-dc7c5f21a5fe" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/a70ab903-835f-48b7-8356-2321b8b869d8.json ================================================ { "description": "Using Firefox, find the example of torch.matmul provided by the official PyTorch version 1.13 documentation and copy all the lines of code in the example to the clipboard, then paste the clipboard content into LibreOffice Writer and save it as an ODT file at \"/home/crab/Desktop/doc_torch.odt\".", "tasks": [ { "task": "49b614c5-c4bb-4c20-aab8-ab9dcc7de1b5", "attribute": {}, "output": null }, { "task": "76de4bdb-c980-4b3a-9bd3-c87db467dffe", "attribute": { "file_path": "/home/crab/Desktop/doc_torch.odt" }, "output": null } ], "adjlist": "0 1\n1", "id": "a70ab903-835f-48b7-8356-2321b8b869d8" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/a78177f5-6cc6-48d7-8c6f-df53399d7759.json ================================================ { "description": "Use Firefox to search for an image using the keyword \"The Colosseum\" and copy the URL of the image to the clipboard.", "tasks": [ { "task": "017102b6-d2c3-466b-96f7-37c8bcddc41a", "attribute": { "keyword": "The Colosseum" }, "output": null } ], "adjlist": "0", "id": "a78177f5-6cc6-48d7-8c6f-df53399d7759" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/abb16512-27ae-49c0-b12b-7fbf0e95056b.json ================================================ { "description": "Paste the clipboard content into Visual Studio Code (VS Code) and save the file as \"/home/crab/Desktop/content.txt\", then open a terminal and print the content of \"/home/crab/Desktop/content.txt\" to the command line interface.", "tasks": [ { "task": "8491e674-596b-452b-9e0e-58a44d90f947", "attribute": { "file_path": "/home/crab/Desktop/content.txt" }, "output": "/home/crab/Desktop/content.txt" }, { "task": "5b527839-0e58-426d-bab6-7160200b0d24", "attribute": { "file_path": "/home/crab/Desktop/content.txt" }, "output": null } ], "adjlist": "0 1\n1", "id": "abb16512-27ae-49c0-b12b-7fbf0e95056b" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/b2ca21dc-dde9-49f5-bec7-321fbf769315.json ================================================ { "description": "Adjust the brightness of the image located at \"/home/crab/assets/desert.jpg\" to a darker value using LibreOffice Impress and save it as \"/home/crab/assets/darker_desert.jpg\", then use GIMP (GNU Image Manipulation Program) to combine this adjusted image with the original image at \"/home/crab/assets/desert.jpg\", placing the darker image on the left side and the original on the right, finally save the resulting comparison image to \"/home/crab/assets/desert_comparison.jpg\".", "tasks": [ { "task": "434402f3-647a-4a9a-9d8f-10f5bb6c7cf0", "attribute": { "image_path_before_edit": "/home/crab/assets/desert.jpg", "image_path_after_edit": "/home/crab/assets/darker_desert.jpg" }, "output": "/home/crab/assets/darker_desert.jpg" }, { "task": "4cf246ea-0a7f-43da-84b6-61d74a2699af", "attribute": { "image_path_1": "/home/crab/assets/darker_desert.jpg", "image_path_2": "/home/crab/assets/desert.jpg", "output_path": "/home/crab/assets/desert_comparison.jpg" }, "output": null } ], "adjlist": "0 1\n1", "id": "b2ca21dc-dde9-49f5-bec7-321fbf769315" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/b57c96c1-071b-40f6-b33b-2a0459fc25bb.json ================================================ { "description": "Use GIMP (GNU Image Manipulation Program) to adjust the brightness of the image from \"/home/crab/assets/background.png\" to a higher value (brighter) and save it to \"/home/crab/Pictures/background_edited.jpg\".", "tasks": [ { "task": "cc1adae7-bef9-4c8a-865d-00d44486dd69", "attribute": { "image_path_before_edit": "/home/crab/assets/background.png", "image_path_after_edit": "/home/crab/Pictures/background_edited.jpg" }, "output": null } ], "adjlist": "0", "id": "b57c96c1-071b-40f6-b33b-2a0459fc25bb" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/b73019e0-3ce8-4657-8b13-b3e0ab6cfac8.json ================================================ { "description": "Download a file from \"https://raw.githubusercontent.com/camel-ai/camel/master/misc/primary_logo.png\" to \"/home/crab/camel-logo.png\".", "tasks": [ { "task": "a313ea4d-e501-4971-b4fe-db2aad19eac1", "attribute": { "url": "https://raw.githubusercontent.com/camel-ai/camel/master/misc/primary_logo.png", "file_path": "/home/crab/camel-logo.png" }, "output": null } ], "adjlist": "0", "id": "b73019e0-3ce8-4657-8b13-b3e0ab6cfac8" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/ba5aebcb-999d-44d4-b9bc-241f9884c6dd.json ================================================ { "description": "Use GIMP (GNU Image Manipulation Program) to adjust the brightness of the image from \"/home/crab/Pictures/Interstellar.jpg\" to a higher value (brighter) and save it to \"/home/crab/interstellar_brighter.jpg\".", "tasks": [ { "task": "cc1adae7-bef9-4c8a-865d-00d44486dd69", "attribute": { "image_path_before_edit": "/home/crab/Pictures/Interstellar.jpg", "image_path_after_edit": "/home/crab/interstellar_brighter.jpg" }, "output": null } ], "adjlist": "0", "id": "ba5aebcb-999d-44d4-b9bc-241f9884c6dd" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/be6468be-2218-45c1-9b75-b56efec61eb4.json ================================================ { "description": "Paste clipboard content into Visual Studio Code (VS Code) and save it as a file at \"/home/crab/text_result\".", "tasks": [ { "task": "8491e674-596b-452b-9e0e-58a44d90f947", "attribute": { "file_path": "/home/crab/text_result" }, "output": null } ], "adjlist": "0", "id": "be6468be-2218-45c1-9b75-b56efec61eb4" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/c4106f9a-9348-4a55-9892-782e6f4b3081.json ================================================ { "description": "Use LibreOffice Impress to adjust the brightness of the image from \"/home/crab/assets/desert.jpg\" to a lower value (darker) and save it to \"/home/crab/assets/desert_edited.png\".", "tasks": [ { "task": "434402f3-647a-4a9a-9d8f-10f5bb6c7cf0", "attribute": { "image_path_before_edit": "/home/crab/assets/desert.jpg", "image_path_after_edit": "/home/crab/assets/desert_edited.png" }, "output": null } ], "adjlist": "0", "id": "c4106f9a-9348-4a55-9892-782e6f4b3081" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/c8800e50-3ff4-4dd2-bc90-33688be99659.json ================================================ { "description": "Download a file from \"https://raw.githubusercontent.com/facebookresearch/detectron2/main/README.md\" to \"/home/crab/Documents/detectron2.txt\".", "tasks": [ { "task": "a313ea4d-e501-4971-b4fe-db2aad19eac1", "attribute": { "url": "https://raw.githubusercontent.com/facebookresearch/detectron2/main/README.md", "file_path": "/home/crab/Documents/detectron2.txt" }, "output": null } ], "adjlist": "0", "id": "c8800e50-3ff4-4dd2-bc90-33688be99659" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/ccf31785-ec13-4981-93c5-ca6c242ac0c3.json ================================================ { "description": "Download the flag of Ethiopia image from \"https://upload.wikimedia.org/wikipedia/commons/thumb/7/71/Flag_of_Ethiopia.svg/250px-Flag_of_Ethiopia.svg.png\" to \"/home/crab/Pictures/flag.png\", create a new directory named \"/home/crab/Pictures/png_\", and copy all PNG files from \"/home/crab/Pictures\" to the newly created directory \"/home/crab/Pictures/png_\".", "tasks": [ { "task": "a313ea4d-e501-4971-b4fe-db2aad19eac1", "attribute": { "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/7/71/Flag_of_Ethiopia.svg/250px-Flag_of_Ethiopia.svg.png", "file_path": "/home/crab/Pictures/flag.png" }, "output": "/home/crab/Pictures/flag.png" }, { "task": "217ababc-ccc7-4b9f-af07-c239d92848fe", "attribute": { "file_extension": "png", "source_dir": "/home/crab/Pictures", "target_dir": "/home/crab/Pictures/png_" }, "output": null } ], "adjlist": "0 1\n1", "id": "ccf31785-ec13-4981-93c5-ca6c242ac0c3" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/d3478489-70f2-4a82-b7d2-0a47b75986eb.json ================================================ { "description": "Use Firefox to search for the country \"Ethiopia\" on Wikipedia, extract the capital city and population, save this information in an ODS file at \"/home/crab/Documents/africa.ods\" with LibreOffice Calc with the first column for the country name, the second for the capital city name, and the third for the population without any header, then create a new directory \"/home/crab/sheet\" and copy all ODS files from \"/home/crab/Documents\" to \"/home/crab/sheet\".", "tasks": [ { "task": "1cd6519a-9ee0-442b-ba5a-9238aeb00ff6", "attribute": { "country": "Ethiopia", "file_path": "/home/crab/Documents/africa.ods" }, "output": "/home/crab/Documents/africa.ods" }, { "task": "217ababc-ccc7-4b9f-af07-c239d92848fe", "attribute": { "file_extension": "ods", "source_dir": "/home/crab/Documents", "target_dir": "/home/crab/sheet" }, "output": null } ], "adjlist": "0 1\n1", "id": "d3478489-70f2-4a82-b7d2-0a47b75986eb" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/d39d40b1-fc26-4169-9d6f-cdf81efe9a3e.json ================================================ { "description": "Use Firefox to search for the country \"Iceland\" on Wikipedia, extract the capital city and population, and save this information in an ODS file at \"/home/crab/country_iceland.ods\" with LibreOffice Calc. The first column will save the country name, the second will save the capital city name, and the third will save the population. No header is needed in the ODS file.", "tasks": [ { "task": "1cd6519a-9ee0-442b-ba5a-9238aeb00ff6", "attribute": { "country": "Iceland", "file_path": "/home/crab/country_iceland.ods" }, "output": null } ], "adjlist": "0", "id": "d39d40b1-fc26-4169-9d6f-cdf81efe9a3e" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/d3c917ff-406f-447a-87f5-b8d835cba750.json ================================================ { "description": "Combine Image 1 \"/home/crab/Pictures/cat.png\" and Image 2 \"/home/crab/assets/campus.png\" using GIMP (GNU Image Manipulation Program), placing Image 1 on the left side of Image 2, and save the combined image to \"/home/crab/Desktop/background.png\". Then, set this combined image as the screen background of the system.", "tasks": [ { "task": "4cf246ea-0a7f-43da-84b6-61d74a2699af", "attribute": { "image_path_1": "/home/crab/Pictures/cat.png", "image_path_2": "/home/crab/assets/campus.png", "output_path": "/home/crab/Desktop/background.png" }, "output": "/home/crab/Desktop/background.png" }, { "task": "a207ef38-b3b2-4c6c-a1e3-75c38162f5ba", "attribute": { "photo_path": "/home/crab/Desktop/background.png" }, "output": null } ], "adjlist": "0 1\n1", "id": "d3c917ff-406f-447a-87f5-b8d835cba750" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/d6e460e4-c295-40ad-883c-11300d7832f0.json ================================================ { "description": "Using Firefox, locate the example provided of torch.matmul by the official PyTorch version 1.13 documentation and copy all the lines of code to the clipboard, then open LibreOffice Writer, paste the content from the clipboard, and save the document as an ODT file at \"/home/crab/Documents/torch_matmul.odt\".", "tasks": [ { "task": "49b614c5-c4bb-4c20-aab8-ab9dcc7de1b5", "attribute": {}, "output": null }, { "task": "76de4bdb-c980-4b3a-9bd3-c87db467dffe", "attribute": { "file_path": "/home/crab/Documents/torch_matmul.odt" }, "output": null } ], "adjlist": "0 1\n1", "id": "d6e460e4-c295-40ad-883c-11300d7832f0" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/d9e4e23c-2a2a-4b5c-b034-7deb6036572d.json ================================================ { "description": "Use Firefox to find out a \"amusement park\" around \"Sentosa\" on Google Maps and copy the Google Maps sharing URL of that \"amusement park\" to the clipboard", "tasks": [ { "task": "2b189dc2-c77f-4fa3-8432-ba4355cc294c", "attribute": { "place_type": "amusement park", "place_name": "Sentosa" }, "output": null } ], "adjlist": "0", "id": "d9e4e23c-2a2a-4b5c-b034-7deb6036572d" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/e31d4e3b-b753-4deb-b9ad-a0add5d4790e.json ================================================ { "description": "Use Firefox to search for an image with the keyword \"Mission: Impossible\", copy the image's URL to the clipboard, and then download the file from the clipboard's URL to \"/home/crab/Pictures/movie.jpg\".", "tasks": [ { "task": "017102b6-d2c3-466b-96f7-37c8bcddc41a", "attribute": { "keyword": "Mission: Impossible" }, "output": "" }, { "task": "a313ea4d-e501-4971-b4fe-db2aad19acsd", "attribute": { "file_path": "/home/crab/Pictures/movie.jpg" }, "output": null } ], "adjlist": "0 1\n1", "id": "e31d4e3b-b753-4deb-b9ad-a0add5d4790e" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/f07a1f32-2f3f-40e7-b12f-8f1b128c41f6.json ================================================ { "description": "Create a new directory \"/home/crab/assets_copy\" and copy all files with the specified \"txt\" extension from \"/home/crab/assets\" to the directory \"/home/crab/assets_copy\".", "tasks": [ { "task": "217ababc-ccc7-4b9f-af07-c239d92848fe", "attribute": { "file_extension": "txt", "source_dir": "/home/crab/assets", "target_dir": "/home/crab/assets_copy" }, "output": null } ], "adjlist": "0", "id": "f07a1f32-2f3f-40e7-b12f-8f1b128c41f6" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/f5cce3a0-ba65-4317-95f8-1fc7d9776c78.json ================================================ { "description": "Set \"/home/crab/deepmind.png\" as the screen background of the system", "tasks": [ { "task": "a207ef38-b3b2-4c6c-a1e3-75c38162f5ba", "attribute": { "photo_path": "/home/crab/deepmind.png" }, "output": null } ], "adjlist": "0", "id": "f5cce3a0-ba65-4317-95f8-1fc7d9776c78" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/f67a26e4-58dd-4dc6-8859-affbf1d62f94.json ================================================ { "description": "Open \"/home/crab/poem\" using vim in a terminal, write \"Two roads diverged in a yellow wood, and sorry I could not travel both and be one traveler, long I stood and looked down one as far as I could to where it bent in the undergrowth.\", save and exit vim, and then print the content of \"/home/crab/poem\" to the command line interface through the terminal.", "tasks": [ { "task": "0f589bf9-9b26-4581-8b78-2961b115ab49", "attribute": { "file_path": "/home/crab/poem", "content": "Two roads diverged in a yellow wood, and sorry I could not travel both and be one traveler, long I stood and looked down one as far as I could to where it bent in the undergrowth." }, "output": "/home/crab/poem" }, { "task": "5b527839-0e58-426d-bab6-7160200b0d24", "attribute": { "file_path": "/home/crab/poem" }, "output": null } ], "adjlist": "0 1\n1", "id": "f67a26e4-58dd-4dc6-8859-affbf1d62f94" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu/f96d7c34-9543-4679-a6ea-89e0c2ef7b1c.json ================================================ { "description": "Open \"/home/crab/Documents/result\" using vim in a terminal, write \"Celtics vs. Mavericks odds, score prediction, time: 2024 NBA Finals picks, Game 1 best bets by proven model\", then save and exit vim.", "tasks": [ { "task": "0f589bf9-9b26-4581-8b78-2961b115ab49", "attribute": { "file_path": "/home/crab/Documents/result", "content": "Celtics vs. Mavericks odds, score prediction, time: 2024 NBA Finals picks, Game 1 best bets by proven model" }, "output": null } ], "adjlist": "0", "id": "f96d7c34-9543-4679-a6ea-89e0c2ef7b1c" } ================================================ FILE: crab-benchmark-v0/dataset/ubuntu_subtasks.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # ruff: noqa: E501 import base64 import hashlib import io import os import re import subprocess import time from collections import Counter from functools import cache from typing import Callable, List, Optional, Tuple import cv2 import easyocr import imageio as imio import networkx as nx import numpy as np import psutil import pyperclip import requests import torch from networkx import DiGraph, path_graph from numpy.linalg import norm from PIL import Image from crab import SubTask, TaskGenerator, action, evaluator from crab.actions.crab_actions import check_submit, submit class ImageMatcher: """ A class to handle image matching, resizing, and cropping operations using accelerated feature matching. See https://github.com/verlab/accelerated_features. """ def __init__(self, top_k: int = 4096): """ Initializes the ImageMatcher with a pretrained XFeat model. Parameters: top_k (int): The number of top features to use for matching. """ self.xfeat = torch.hub.load( "verlab/accelerated_features", "XFeat", pretrained=True, top_k=top_k ) self.top_k = top_k def warp_corners_and_draw_matches( self, ref_points: np.ndarray, dst_points: np.ndarray, img1: np.ndarray, img2: np.ndarray, ) -> Tuple[np.ndarray, np.ndarray]: """ Calculates the homography matrix and warps the corners of the first image to the second image space. Parameters: ref_points (np.ndarray): Reference points from the first image. dst_points (np.ndarray): Destination points from the second image. img1 (np.ndarray): The first image. img2 (np.ndarray): The second image. Returns: Tuple[np.ndarray, np.ndarray]: Image with warped corners and the warped corners coordinates. """ H, mask = cv2.findHomography( ref_points, dst_points, cv2.USAC_MAGSAC, 3.5, maxIters=1000, confidence=0.999, ) mask = mask.flatten() h, w = img1.shape[:2] corners_img1 = np.array( [[0, 0], [w - 1, 0], [w - 1, h - 1], [0, h - 1]], dtype=np.float32 ).reshape(-1, 1, 2) warped_corners = cv2.perspectiveTransform(corners_img1, H) img2_with_corners = img2.copy() for i in range(len(warped_corners)): start_point = tuple(warped_corners[i - 1][0].astype(int)) end_point = tuple(warped_corners[i][0].astype(int)) cv2.line(img2_with_corners, start_point, end_point, (0, 255, 0), 4) keypoints1 = [cv2.KeyPoint(p[0], p[1], 5) for p in ref_points] keypoints2 = [cv2.KeyPoint(p[0], p[1], 5) for p in dst_points] matches = [cv2.DMatch(i, i, 0) for i in range(len(mask)) if mask[i]] img_matches = cv2.drawMatches( img1, keypoints1, img2_with_corners, keypoints2, matches, None, matchColor=(0, 255, 0), flags=2, ) return img_matches, warped_corners def _get_bounding_box( self, warped_corners: np.ndarray, img_shape: Tuple[int, int] ) -> List[int]: """ Computes the bounding box around the warped corners. Parameters: warped_corners (np.ndarray): The warped corners coordinates. img_shape (Tuple[int, int]): The shape of the image as (height, width). Returns: List[int]: Bounding box coordinates [x_min, x_max, y_min, y_max]. """ h, w = img_shape x_min = np.min(warped_corners[:, 0, 0]) x_max = np.max(warped_corners[:, 0, 0]) y_min = np.min(warped_corners[:, 0, 1]) y_max = np.max(warped_corners[:, 0, 1]) x_min = max(0, x_min) x_max = min(w - 1, x_max) y_min = max(0, y_min) y_max = min(h - 1, y_max) return [int(x_min), int(x_max), int(y_min), int(y_max)] def _resize_image( self, img1: np.ndarray, img2: np.ndarray, scale: float, match_dimension: str ) -> Tuple[np.ndarray, np.ndarray]: """ Resizes img1 to match a scaled dimension of img2. Parameters: img1 (np.ndarray): The first image to be resized. img2 (np.ndarray): The reference image. scale (float): The scale factor (0.5 for half size). match_dimension (str): The dimension to match ('height' or 'width'). Returns: Tuple[np.ndarray, np.ndarray]: Resized img1 and original img2. """ h1, w1 = img1.shape[:2] h2, w2 = img2.shape[:2] if match_dimension == "height": new_height = int(h2 * scale) new_width = int(w1 * (new_height / h1)) elif match_dimension == "width": new_width = int(w2 * scale) new_height = int(h1 * (new_width / w1)) else: raise ValueError("match_dimension must be either 'height' or 'width'.") resized_img1 = cv2.resize(img1, (new_width, new_height)) return resized_img1, img2 def get_resizing_functions( self, ) -> List[Callable[[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray]]]: """ Provides a list of resizing functions. Returns: List[Callable[[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray]]]: List of resizing functions. """ return [ lambda x, y: (x, y), lambda x, y: self._resize_image(x, y, 1.0, "height"), lambda x, y: self._resize_image(x, y, 1.0, "width"), lambda x, y: self._resize_image(x, y, 0.5, "height"), lambda x, y: self._resize_image(x, y, 0.5, "width"), ] def match_images( self, im1_path: str, im2_path: str, top_k: int = 4096, match_num_threshold: int = 80, ) -> Tuple[Optional[List[int]], Optional[np.ndarray], int]: """ Matches two images and finds the bounding box around the matched area if sufficient matches are found. Parameters: im1_path (str): Path to the first image. im2_path (str): Path to the second image. top_k (int): The number of top features to use for matching. match_num_threshold (int): The minimum number of matches required to consider the match valid. Returns: Tuple[Optional[List[int]], Optional[np.ndarray], int]: Bounding box, image with matched keypoints drawn, and the number of matches found. """ im1 = self.load_and_convert_image(im1_path) im2 = self.load_and_convert_image(im2_path) best_matches = { "count": 0, "im1_resized": None, "im2_resized": None, "mkpts_0": None, "mkpts_1": None, } for resize_func in self.get_resizing_functions(): try: im1_resized, im2_resized = resize_func(im1, im2) mkpts_0, mkpts_1 = self.xfeat.match_xfeat_star( im1_resized, im2_resized, top_k=top_k ) if len(mkpts_0) > best_matches["count"]: best_matches.update( { "count": len(mkpts_0), "im1_resized": im1_resized, "im2_resized": im2_resized, "mkpts_0": mkpts_0, "mkpts_1": mkpts_1, } ) except Exception: continue if best_matches["count"] >= match_num_threshold: canvas, warped_corners = self.warp_corners_and_draw_matches( best_matches["mkpts_0"], best_matches["mkpts_1"], best_matches["im1_resized"], best_matches["im2_resized"], ) bbox = self._get_bounding_box(warped_corners, im2_resized.shape[:2]) else: bbox, canvas = None, None return bbox, canvas, best_matches["count"] def load_and_convert_image(self, filepath: str) -> np.ndarray: """ Loads an image from a file and converts it to JPG format if necessary. Parameters: filepath (str): The path to the image file. Returns: np.ndarray: The loaded and converted image. """ image = Image.open(filepath) if image.mode != "RGB": image = image.convert("RGB") with io.BytesIO() as output: image.save(output, format="JPEG") converted_image = np.copy(imio.v2.imread(output)[..., ::-1]) return converted_image image_matcher = ImageMatcher() def from_env_load_and_save_file(env, file_path, output_dir="/tmp/local_save"): """ Load a file, convert it to bytes, and save it to a local directory with the same basename. Args: env: The environment object with the _action_endpoint method. file_path (str): The path to the file to be loaded. output_dir (str): The directory where the file should be saved (default is "/tmp/local_save"). Returns: str: The path to the saved file. """ @action(env_name="ubuntu") def get_encoded_file(file_path: str) -> bytes | None: try: with open(file_path, "rb") as file: file_bytes = file.read() encoded_string = base64.b64encode(file_bytes).decode("utf-8") except Exception: return None return encoded_string # Create output directory if it does not exist os.makedirs(output_dir, exist_ok=True) # Load the file and convert to bytes encoded_string = env._action_endpoint(get_encoded_file, {"file_path": file_path}) # Decode the Base64 string back to bytes decoded_bytes = base64.b64decode(encoded_string.encode("utf-8")) # Create the output file path file_name = os.path.basename(file_path) output_file_path = os.path.join(output_dir, file_name) # Save the decoded bytes to the output path with open(output_file_path, "wb") as file: file.write(decoded_bytes) return output_file_path def crop_image(img: np.ndarray, bbox: List[int]) -> np.ndarray: """ Crops the image based on the bounding box coordinates. Parameters: img (np.ndarray): The input image. bbox (List[int]): Bounding box coordinates [x_min, x_max, y_min, y_max]. Returns: np.ndarray: The cropped image. """ x_min, x_max, y_min, y_max = bbox return img[y_min:y_max, x_min:x_max] def calculate_bbox_center(bbox: List[int]) -> Tuple[int, int]: """ Calculates the center of a bounding box. Parameters: bbox (List[int]): The bounding box coordinates [x_min, x_max, y_min, y_max]. Returns: Tuple[int, int]: The center coordinates (x, y). """ x_min, x_max, y_min, y_max = bbox x_center = (x_min + x_max) // 2 y_center = (y_min + y_max) // 2 return x_center, y_center def is_bbox_in_direction(bbox_1: List[int], bbox_2: List[int], direction: str) -> bool: """ Check if the center of bbox_1 is in the specified direction relative to the center of bbox_2. Args: bbox_1 (List[int]): The bounding box coordinates [x_min, x_max, y_min, y_max] of the first bounding box. bbox_2 (List[int]): The bounding box coordinates [x_min, x_max, y_min, y_max] of the second bounding box. direction (str): The direction to check ("left", "right", "above", "below"). Returns: bool: True if the center of bbox_1 is in the specified direction relative to bbox_2, False otherwise. """ center_1 = calculate_bbox_center(bbox_1) center_2 = calculate_bbox_center(bbox_2) if direction == "left": return center_1[0] < center_2[0] elif direction == "right": return center_1[0] > center_2[0] elif direction == "above": return center_1[1] < center_2[1] elif direction == "below": return center_1[1] > center_2[1] else: raise ValueError("Invalid direction. Use 'left', 'right', 'above', or 'below'.") def ocr_text_matching( image_path: str, text: str ) -> Optional[Tuple[List[int], str, float]]: """ Performs OCR on an image to find a specific text string and returns the bounding box, matched text, and confidence level. Parameters: image_path (str): The path to the image file. text (str): The text string to search for in the image. Returns: Optional[Tuple[List[int], str, float]]: The bounding box coordinates [x_min, y_min, x_max, y_max], the matched text, and the confidence level if found, otherwise None. """ reader = easyocr.Reader(["en"]) result = reader.readtext(image_path) for entry in result: bbox, detected_text, confidence = entry if text in detected_text: # Extract the bounding box coordinates x_min = min(bbox[0][0], bbox[1][0], bbox[2][0], bbox[3][0]) x_max = max(bbox[0][0], bbox[1][0], bbox[2][0], bbox[3][0]) y_min = min(bbox[0][1], bbox[1][1], bbox[2][1], bbox[3][1]) y_max = max(bbox[0][1], bbox[1][1], bbox[2][1], bbox[3][1]) return ( [int(x_min), int(x_max), int(y_min), int(y_max)], detected_text, confidence, ) return None def convert_file_to_images(file_path: str) -> List[str]: """ Convert a file to JPG images using LibreOffice and return the list of image file paths. Args: file_path (str): The path to the file. Returns: List[str]: List of paths to the generated image files. """ output_format = "jpg" output_dir = "/tmp/converted_images" os.makedirs(output_dir, exist_ok=True) # Run LibreOffice conversion command result = subprocess.run( [ "libreoffice", "--headless", "--convert-to", output_format, "--outdir", output_dir, file_path, ], capture_output=True, text=True, ) # Check if the conversion was successful if result.returncode != 0: raise RuntimeError(f"Conversion failed: {result.stderr}") # Collect the generated image file paths image_files = [ os.path.join(output_dir, f) for f in os.listdir(output_dir) if f.endswith(f".{output_format}") ] # Verify if the files were successfully saved if not image_files: raise FileNotFoundError( f"No {output_format} files found in the output directory" ) # Get the basename of the original file (without extension) file_basename = os.path.splitext(os.path.basename(file_path))[0] # Check if any of the images match the basename of the original file matching_images = [f for f in image_files if file_basename in os.path.basename(f)] if not matching_images: raise FileNotFoundError( f"No images found with basename matching the original file: {file_basename}" ) return matching_images def cleanup_files(files: List[str]): """ Delete the list of files. Args: files (List[str]): List of paths to the files to be deleted. """ for file in files: os.remove(file) def is_valid_url(url): # Regular expression to check if the string is a valid HTTP/HTTPS URL url_pattern = re.compile( r"^(https?://)" # http:// or https:// r"(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|" # domain r"localhost|" # localhost... r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})" # ...or ip r"(?::\d+)?" # optional port r"(?:/?|[/?]\S+)$", re.IGNORECASE, ) return bool(re.match(url_pattern, url)) def is_valid_image_data_uri(uri): # Regular expression to check if the string is a valid Data URI for image formats data_uri_pattern = re.compile( r"^data:image/(png|jpeg|gif|svg\+xml|bmp|webp);base64,[A-Za-z0-9+/]+={0,2}$", re.IGNORECASE, ) return bool(re.match(data_uri_pattern, uri)) def is_github_repo_url(url): # Regular expression to check if the URL is a GitHub repository URL github_repo_pattern = re.compile( r"^https?://" # Protocol r"github\.com/" # Domain r"[^/]+/" # Username r"[^/]+/?$", # Repository name, optional trailing slash re.IGNORECASE, ) return bool(re.match(github_repo_pattern, url)) def get_rgb_values_outside_bbox( img: np.ndarray, bbox: List[int], margin: int = 10 ) -> Tuple[np.ndarray, Tuple[int, int, int]]: """ Reads the pixel color RGB values outside of the bounding box with an additional margin and finds the most frequent RGB value. Parameters: img (np.ndarray): The input image. bbox (List[int]): Bounding box coordinates [x_min, x_max, y_min, y_max]. margin (int): The margin to add outside the bounding box. Default is 10. Returns: Tuple[np.ndarray, Tuple[int, int, int]]: The RGB values outside the bounding box with the margin and the most frequent RGB value. """ x_min, x_max, y_min, y_max = bbox # Ensure the coordinates with margin are within image dimensions x_min_with_margin = max(0, x_min - margin) x_max_with_margin = min(img.shape[1], x_max + margin) y_min_with_margin = max(0, y_min - margin) y_max_with_margin = min(img.shape[0], y_max + margin) # Create a mask for the bounding box area with margin mask = np.ones(img.shape[:2], dtype=bool) mask[y_min_with_margin:y_max_with_margin, x_min_with_margin:x_max_with_margin] = ( False ) # Extract the RGB values outside the bounding box with margin rgb_values = img[mask] # Find the most frequent RGB value rgb_values_tuple = [tuple(rgb) for rgb in rgb_values] most_common_rgb = Counter(rgb_values_tuple).most_common(1)[0][0] return list(most_common_rgb)[::-1] def contains_required_strings(clipboard_content: str, required_strings: list) -> bool: """ Check if all required strings are present in the clipboard content. Args: clipboard_content (str): The content from the clipboard. required_strings (list): A list of required strings to check. Returns: bool: True if all required strings are found in the clipboard content, False otherwise. """ for string in required_strings: if string not in clipboard_content: return False return True @evaluator(env_name="ubuntu") def verify_file_content_with_clipboard(file_path: str) -> bool: """ Verify that the content of the file matches the clipboard content line by line. Args: file_path (str): The path to the file to verify. Returns: bool: True if the file content matches the clipboard content, False otherwise. """ def verify_content_with_clipboard(file_content: str) -> bool: """ Verify that the provided file content matches the clipboard content line by line. Args: file_content (str): The content of the file to verify. Returns: bool: True if the file content matches the clipboard content, False otherwise. """ clipboard_content = pyperclip.paste() clipboard_lines = clipboard_content.split("\n") file_lines = file_content.split("\n") # Check if each line from the clipboard content is in the corresponding line in the file content for clipboard_line, file_line in zip(clipboard_lines, file_lines): if clipboard_line not in file_line: return False return True with open(file_path, "r") as file: file_content = file.read() return verify_content_with_clipboard(file_content) @evaluator(env_name="ubuntu") def verify_odt_file_content_with_clipboard(file_path: str) -> bool: """ Verify that the content of the ODT file matches the clipboard content. Args: file_path (str): The path to the ODT file to verify. Returns: bool: True if the ODT file content matches the clipboard content, False otherwise. """ from odf import teletype, text from odf.opendocument import load def verify_content_with_clipboard(file_content: str) -> bool: """ Verify that the provided file content matches the clipboard content line by line. Args: file_content (str): The content of the file to verify. Returns: bool: True if the file content matches the clipboard content, False otherwise. """ clipboard_content = pyperclip.paste() clipboard_lines = clipboard_content.split("\n") file_lines = file_content.split("\n") # Check if each line from the clipboard content is in the corresponding line in the file content for clipboard_line, file_line in zip(clipboard_lines, file_lines): if clipboard_line not in file_line: return False return True textdoc = load(file_path) allparas = textdoc.getElementsByType(text.P) odt_content = "\n".join([teletype.extractText(p) for p in allparas]) return verify_content_with_clipboard(odt_content) @evaluator(env_name="ubuntu", local=True) def verify_combined_image( image_path_1: str, image_path_2: str, file_path: str, direction: str, env ) -> bool: """ Check if the combined file contains both input images without overlay and in the specified direction. Args: image_path_1 (str): Path to the first image. image_path_2 (str): Path to the second image. file_path (str): Path to the combined file. direction (str): The direction to check ("left", "right", "above", "below"). Returns: bool: True if the combined file contains both input images in the specified direction without overlay, False otherwise. """ saved_image_path_1 = from_env_load_and_save_file(env, image_path_1) saved_image_path_2 = from_env_load_and_save_file(env, image_path_2) saved_file_path = from_env_load_and_save_file(env, file_path) # Determine if file_path is already an image if file_path.lower().endswith((".jpg", ".jpeg", ".png", ".bmp", ".tiff")): combined_image_path = saved_file_path else: # Convert the file to images combined_image_path = convert_file_to_images(saved_file_path)[0] try: # Match the first image within the combined image bbox_1, _, _ = image_matcher.match_images( saved_image_path_1, combined_image_path ) # Match the second image within the combined image bbox_2, _, _ = image_matcher.match_images( saved_image_path_2, combined_image_path ) # Check if both bounding boxes are found if bbox_1 is None or bbox_2 is None: return False # Check if bbox_1 is in the specified direction relative to bbox_2 correct_direction = is_bbox_in_direction(bbox_1, bbox_2, direction) return correct_direction finally: # Cleanup intermediate image files if they were created cleanup_files( [ combined_image_path, saved_image_path_1, saved_image_path_2, saved_file_path, ] ) @evaluator(env_name="ubuntu") def is_image_2_brighter(image_path_1: str, image_path_2: str) -> bool: """ Check if the second image is brighter than the first image. Args: image_path_1(str): The path to the first image. image_path_2(str): The path to the second image. """ def brightness(image_path: str) -> float: # Load the image img = cv2.imread(image_path) if len(img.shape) == 3: # Colored RGB or BGR (*Do Not* use HSV images with this function) # create brightness with euclidean norm return float(np.average(norm(img, axis=2)) / np.sqrt(3)) else: # Grayscale return float(np.average(img)) brightness_1 = brightness(image_path_1) brightness_2 = brightness(image_path_2) return brightness_2 > brightness_1 @evaluator(env_name="ubuntu") def is_img_url_in_clipboard() -> bool: """ Check if the clipboard contains a valid URL or a Data URI that is specific to images. Args: env (Environment): The current testing environment, used to simulate clipboard functionality. Returns: bool: True if a valid URL or Data URI specific to images is found in the clipboard, False otherwise. """ clipboard_content = pyperclip.paste() # Simulate clipboard paste action data_uri_pattern = re.compile( r"^data:image/(png|jpeg|gif|svg\+xml|bmp|webp);base64,[A-Za-z0-9+/]+={0,2}$", re.IGNORECASE, ) is_valid_image_data = bool(re.match(data_uri_pattern, clipboard_content)) url_pattern = re.compile( r"^(https?://)" # http:// or https:// r"(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|" # domain r"localhost|" # localhost... r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})" # ...or ip r"(?::\d+)?" # optional port r"(?:/?|[/?]\S+)$", re.IGNORECASE, ) is_valid_url = bool(re.match(url_pattern, clipboard_content)) if is_valid_url or is_valid_image_data: return True return False @evaluator(env_name="ubuntu") def is_github_repo_url_in_clipboard(keyword: str) -> bool: """ Check if the clipboard contains a valid GitHub repository URL. Returns: bool: True if the clipboard content is a valid GitHub repository URL, False otherwise. """ clipboard_content = pyperclip.paste() # Access the clipboard content if keyword.lower() not in clipboard_content: return False github_repo_pattern = re.compile( r"^https?://" # Protocol r"github\.com/" # Domain r"[^/]+/" # Username r"[^/]+/?$", # Repository name, optional trailing slash re.IGNORECASE, ) return bool(re.match(github_repo_pattern, clipboard_content)) # return is_github_repo_url(clipboard_content) @evaluator(env_name="ubuntu") def is_software_installed(package_name: str) -> bool: try: subprocess.check_call( ["dpkg", "-s", package_name], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) return True except subprocess.CalledProcessError: return False @cache def get_file_url_hash(url): response = requests.get(url) response.raise_for_status() return hashlib.sha256(response.content).hexdigest() @evaluator(env_name="ubuntu") def download_and_verify_file(url: str, file_path: str) -> bool: # Check if the file was downloaded if not os.path.isfile(file_path): return False # Calculate the hash of the downloaded file with open(file_path, "rb") as f: file_data = f.read() downloaded_file_hash = hashlib.sha256(file_data).hexdigest() # Get the file content directly from the URL try: original_file_hash = get_file_url_hash(url) except requests.RequestException: return False # Compare the hashes return downloaded_file_hash == original_file_hash @evaluator(env_name="ubuntu") def download_from_clipboard_and_verify_file(file_path: str) -> bool: # Check if the file was downloaded if not os.path.isfile(file_path): return False # Calculate the hash of the downloaded file with open(file_path, "rb") as f: file_data = f.read() downloaded_file_hash = hashlib.sha256(file_data).hexdigest() # Get the url from clipboard content = pyperclip.paste() """ Problem: 1. There exist infinite possibilities of the downloable format in the clipboard. Not sure if we need to verify the format. """ # Get the file content directly from the URL try: original_file_hash = get_file_url_hash(content) except requests.RequestException: return False # Compare the hashes return downloaded_file_hash == original_file_hash @evaluator(env_name="ubuntu") def check_color_scheme(assmue: str) -> bool: out = subprocess.check_output( ["gsettings", "get", "org.gnome.desktop.interface", "color-scheme"], text=True, ) return assmue in out @evaluator(env_name="ubuntu") def check_text_in_current_window_name(text: str) -> bool: try: out = subprocess.check_output( ["xdotool", "getwindowfocus", "getwindowname"], text=True ).strip() except subprocess.CalledProcessError: return False return text in out @evaluator(env_name="ubuntu") def check_current_window_process(assmue: str) -> bool: try: out = subprocess.check_output( ["xdotool", "getwindowfocus", "getwindowpid"], text=True ).strip() if not out.isdigit(): return False process = psutil.Process(int(out)) except ( psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess, subprocess.CalledProcessError, ): return False return assmue.strip() == process.name() @evaluator(env_name="ubuntu") def check_file_exist(file_path: str) -> bool: return os.path.isfile(file_path) @evaluator(env_name="ubuntu") def check_file_content(file_path: str, content: str) -> bool: if not os.path.isfile(file_path): return False with open(file_path, "r") as f: file_content = f.read() return content in file_content @evaluator(env_name="ubuntu") def empty_evaluator() -> bool: return False @evaluator(env_name="ubuntu") def is_process_open(process_name: str) -> bool: """ Check if the given process is currently running. Args: process_name(str): The process name to check. """ for process in psutil.process_iter(["name"]): try: if process_name.lower() in process.info["name"].lower(): # type: ignore return True except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess): pass return False @evaluator(env_name="ubuntu") def check_app_usage_history(app_name: str) -> bool: """ Check if the given application has been in the usage history. Args: app_name(str): The name of the application to check. Returns: bool: True if the app was recently used, False otherwise. """ for process in psutil.process_iter(["name", "create_time"]): try: if app_name.lower() in process.info["name"].lower(): # Assuming 'recently used' implies a running process was started within the last hour if time.time() - process.info["create_time"] < 3600: return True except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess): continue return False @evaluator(env_name="ubuntu") def check_process_closed(app_name: str) -> bool: """ Verify that the specified process is not running. Args: app_name(str): The application name to check for its absence. Returns: bool: True if the app is not running, False otherwise. """ return not any( app_name.lower() in proc.info["name"].lower() for proc in psutil.process_iter(["name"]) if proc.is_running() ) @evaluator(env_name="ubuntu") def verify_background(photo_path: str) -> bool: """ Verify that the specified photo is currently set as the desktop background. Args: photo_path (str): The path to the photo file. Returns: bool: True if the photo is the current background, False otherwise. """ out = subprocess.check_output( ["gsettings", "get", "org.gnome.desktop.background", "picture-uri"], universal_newlines=True, ) current_background = ( out.strip().split("'")[1].split("file:/")[1] ) # Extract the path # Compute hashes to compare files if os.path.exists(photo_path) and os.path.exists(current_background): with open(photo_path, "rb") as f: original_hash = hashlib.sha256(f.read()).hexdigest() with open(current_background, "rb") as f: current_hash = hashlib.sha256(f.read()).hexdigest() return original_hash == current_hash return False @evaluator(env_name="ubuntu") def is_torch_matmul_example_copied_correctly() -> bool: """ Verify if the clipboard contains the correct torch.matmul example snippets from PyTorch 1.13 documentation. """ def contains_required_strings( clipboard_content: str, required_strings: list ) -> bool: """ Check if all required strings are present in the clipboard content. Args: clipboard_content (str): The content from the clipboard. required_strings (list): A list of required strings to check. Returns: bool: True if all required strings are found in the clipboard content, False otherwise. """ for string in required_strings: if string not in clipboard_content: return False return True required_strings = [ "tensor1 = torch.randn", "tensor2 = torch.randn", "torch.matmul(tensor1, tensor2).size()", ] clipboard_content = pyperclip.paste().strip() if not clipboard_content: return False return contains_required_strings(clipboard_content, required_strings) @evaluator(env_name="ubuntu") def check_directory_exists(dir_path: str) -> bool: """Check if the specified directory exists.""" return os.path.isdir(dir_path) @evaluator(env_name="ubuntu") def verify_files_copied(source_dir: str, target_dir: str, file_extension: str) -> bool: """Verify that files were copied correctly.""" source_files = { file for file in os.listdir(source_dir) if file.endswith(f".{file_extension}") } target_files = { file for file in os.listdir(target_dir) if file.endswith(f".{file_extension}") } return source_files == target_files @evaluator(env_name="ubuntu", local=True) def check_contain_input_text_list(texts: list[str], env) -> bool: """ Check if all provided search terms were entered in the browser. Args: search_terms: A list of strings, each representing a search term that needs to be verified. env: The current testing environment, used to simulate browser interactions. Returns: bool: True if all search terms are found in the written text, False otherwise. """ if env.trajectory: inputs = [ params["text"].lower() for action_name, params, _ in env.trajectory if action_name == "write_text" ] return all( any(term.lower() in input_text for input_text in inputs) for term in texts ) return False @evaluator(env_name="ubuntu") def is_google_maps_url_in_clipboard() -> bool: """ Check if the clipboard contains a valid shortened Google Maps URL. """ clipboard_content = pyperclip.paste() maps_url_pattern = re.compile( r"^https://maps\.app\.goo\.gl/[A-Za-z0-9]+$", re.IGNORECASE, ) return bool(re.match(maps_url_pattern, clipboard_content)) @evaluator(env_name="ubuntu", local=True) def check_contain_input_text(text: str, env) -> bool: """ Check if the input text is contained in the written text action in a case-insensitive manner. Args: text (str): The text to check for. env: The current testing environment, used to access the trajectory. Returns: bool: True if the input text is found in the written text action, False otherwise. """ if env.trajectory: inputs = [ params["text"].lower() for action_name, params, _ in env.trajectory if action_name == "write_text" ] return any(text.lower() in input_text for input_text in inputs) return False @evaluator(env_name="ubuntu") def verify_country_data_in_ods(country: str, file_path: str) -> bool: from bs4 import BeautifulSoup from pyexcel_ods import get_data def extract_population(text): # Use regex to extract the first sequence of numbers which possibly contains commas if text: match = re.search(r"\d{1,3}(?:,\d{3})*(?=\[|$)", text) if match: return match.group(0).replace(",", "") # Remove commas return "0" def normalize_population(text): # Ensure the input is treated as a string, whether it's originally an int or str text = str(text) # Normalize the population string by removing non-digit characters return "".join(filter(str.isdigit, text)) def fetch_country_data(country): country_norm = country.replace(" ", "_") # Replace spaces with underscores url = f"https://en.wikipedia.org/wiki/{country_norm}" response = requests.get(url) soup = BeautifulSoup(response.content, "html.parser") infobox = soup.find("table", {"class": "infobox"}) capital_city = None population = None if infobox: for row in infobox.find_all("tr"): header = row.find("th") if header: header_text = header.text.strip() if "Capital" in header_text: capital_city = row.find("td").text.strip() capital_city = " ".join( capital_city.split() ) # Normalize and clean up text if "Population" in header_text: if row.find("td"): population_text = row.find("td").text.strip() else: next_row = row.find_next_sibling("tr") if next_row and next_row.find("td"): population_text = next_row.find("td").text.strip() population = extract_population(population_text) return capital_city, population capital_city, population = fetch_country_data(country) if not capital_city or not population: return False # Load data from ODS file data = get_data(file_path) sheet = data[list(data.keys())[0]] # Assume data is in the first sheet # Search for country and verify data for row in sheet: if row[0].lower() == country.lower(): recorded_capital_city = row[1] recorded_population = normalize_population(row[2]) # Check if the capital city and population in the sheet match Wikipedia if ( recorded_capital_city in capital_city and recorded_population == population ): return True else: return False return True ubuntu_subtasks = [ SubTask( id="0f589bf9-9b26-4581-8b78-2961b115ab49", description='Open "{file_path}" using vim in a terminal, write "{content}", then save and exit vim.', attribute_dict={"file_path": "file_path", "content": "message"}, output_type="file_path", output_generator=lambda file_path, content: file_path, evaluator_generator=lambda file_path, content: nx.path_graph( [ check_current_window_process("gnome-terminal-server"), is_process_open("vim"), ~is_process_open("vim"), check_file_content(file_path, content), ], create_using=nx.DiGraph, ), ), SubTask( id="5b527839-0e58-426d-bab6-7160200b0d24", description='Get the content of "{file_path}" by printing it to the command line interface through a terminal', attribute_dict={"file_path": "file_path"}, output_type="message", output_generator="manual", evaluator_generator=lambda file_path: nx.path_graph( [ check_current_window_process("gnome-terminal-server"), check_contain_input_text("cat " + file_path), ], create_using=nx.DiGraph, ), ), SubTask( id="1c3bedc3-ea5a-453c-a15b-223d72ab756d", description='Submit content "{content}"', attribute_dict={"content": "message"}, output_type="None", output_generator="manual", evaluator_generator=lambda content: nx.path_graph( [ check_submit(content), ], create_using=nx.DiGraph, ), extra_action=[submit], ), SubTask( id="a313ea4d-e501-4971-b4fe-db2aad19eac1", description='Download a file from "{url}" to "{file_path}".', attribute_dict={"url": "url", "file_path": "file_path"}, output_type="file_path", output_generator=lambda file_path, content: file_path, evaluator_generator=lambda url, file_path: nx.path_graph( [ download_and_verify_file(url, file_path), ], create_using=nx.DiGraph, ), ), SubTask( id="a313ea4d-e501-4971-b4fe-db2aad19acsd", description='Download a file from the URL stored in the clipboard to "{file_path}".', attribute_dict={"file_path": "file_path"}, output_type="file_path", output_generator=lambda file_path, content: file_path, evaluator_generator=lambda file_path: nx.path_graph( [ download_from_clipboard_and_verify_file(file_path), ], create_using=nx.DiGraph, ), ), SubTask( id="017102b6-d2c3-466b-96f7-37c8bcddc41a", description='Use Firefox to search for an image using the keyword "{keyword}" and copy the URL of the image to the clipboard.', attribute_dict={"keyword": "keyword"}, output_type="None", evaluator_generator=lambda keyword: path_graph( [ check_text_in_current_window_name("Mozilla Firefox"), check_contain_input_text(keyword), is_img_url_in_clipboard(), ], create_using=DiGraph, ), ), SubTask( id="bcd03c9f-62c9-4001-8d86-78358c59ce22", description='Use Firefox to find a code repository about "{keyword}" in GitHub and copy the URL of the repository to the clipboard.', attribute_dict={"keyword": "keyword"}, output_type="None", evaluator_generator=lambda keyword: path_graph( [ check_text_in_current_window_name("GitHub — Mozilla Firefox"), check_contain_input_text(keyword), is_github_repo_url_in_clipboard(keyword), ], create_using=DiGraph, ), ), SubTask( id="a207ef38-b3b2-4c6c-a1e3-75c38162f5ba", description='Set "{photo_path}" as the screen background of the system', attribute_dict={"photo_path": "photo_path"}, output_type="None", evaluator_generator=lambda photo_path: path_graph( [verify_background(photo_path)], create_using=DiGraph, ), ), SubTask( id="217ababc-ccc7-4b9f-af07-c239d92848fe", description='Create a new directory "{target_dir}" and copy all files with the specified "{file_extension}" extension from "{source_dir}" to the directory "{target_dir}".', attribute_dict={ "file_extension": "file_extension", "source_dir": "dir_path", "target_dir": "dir_path", }, output_type="message", evaluator_generator=lambda file_extension, source_dir, target_dir: nx.path_graph( [ check_directory_exists(target_dir), verify_files_copied(source_dir, target_dir, file_extension), ], create_using=nx.DiGraph, ), ), SubTask( id="2b189dc2-c77f-4fa3-8432-ba4355cc294c", description='Use Firefox to find out a "{place_type}" around "{place_name}" on Google Maps and copy the Google Maps sharing URL of that "{place_type}" to the clipboard', attribute_dict={"place_type": "place_type", "place_name": "place_name"}, output_type="None", evaluator_generator=lambda place_type, place_name: path_graph( [ # check_current_window_process("firefox"), check_text_in_current_window_name("Google Maps — Mozilla Firefox"), check_contain_input_text_list([place_name, place_type]), is_google_maps_url_in_clipboard(), ], create_using=DiGraph, ), ), SubTask( id="cc1adae7-bef9-4c8a-865d-00d44486dd69", description='Use GIMP (GNU Image Manipulation Program) to adjust the brightness of the image from "{image_path_before_edit}" to a higher value (brighter) and save it to "{image_path_after_edit}".', attribute_dict={ "image_path_before_edit": "photo_path", "image_path_after_edit": "photo_path", }, output_type="photo_path", evaluator_generator=lambda image_path_before_edit, image_path_after_edit: nx.path_graph( [ check_text_in_current_window_name("GNU Image Manipulation Program"), check_file_exist(image_path_after_edit), is_image_2_brighter(image_path_before_edit, image_path_after_edit), ], create_using=nx.DiGraph, ), ), SubTask( id="434402f3-647a-4a9a-9d8f-10f5bb6c7cf0", description='Use LibreOffice Impress to adjust the brightness of the image from "{image_path_before_edit}" to a lower value (darker) and save it to "{image_path_after_edit}".', attribute_dict={ "image_path_before_edit": "photo_path", "image_path_after_edit": "photo_path", }, output_type="photo_path", evaluator_generator=lambda image_path_before_edit, image_path_after_edit: nx.path_graph( [ check_text_in_current_window_name("LibreOffice Impress"), check_file_exist(image_path_after_edit), ~is_image_2_brighter(image_path_before_edit, image_path_after_edit), ], create_using=nx.DiGraph, ), ), SubTask( id="4cf246ea-0a7f-43da-84b6-61d74a2699af", description='Combine two images from Image 1 "{image_path_1}" and Image 2 "{image_path_2} using GIMP (GNU Image Manipulation Program) and save the resulting image to "{output_path}". Image 1 should be placed on the left side of Image 2.', attribute_dict={ "image_path_1": "photo_path_1", "image_path_2": "photo_path_2", "output_path": "photo_path_ouput", }, output_type="photo_path", evaluator_generator=lambda image_path_1, image_path_2, output_path: nx.path_graph( [ check_text_in_current_window_name("GNU Image Manipulation Program"), check_file_exist(output_path), verify_combined_image(image_path_1, image_path_2, output_path, "left"), ], create_using=nx.DiGraph, ), ), SubTask( id="0111384f-38ca-41a2-9504-cb1c55002b3c", description='Combine two images from Image 1 "{image_path_1}" and Image 2 "{image_path_2}" using LibreOffice Writer and save the resulting ODT file to "{output_path}". Image 1 should be placed above Image 2.', attribute_dict={ "image_path_1": "photo_path_1", "image_path_2": "photo_path_2", "output_path": "file_path", }, output_type="file_path", evaluator_generator=lambda image_path_1, image_path_2, output_path: nx.path_graph( [ check_text_in_current_window_name("LibreOffice Writer"), check_file_exist(output_path), verify_combined_image(image_path_1, image_path_2, output_path, "above"), ], create_using=nx.DiGraph, ), ), SubTask( id="467f17a6-c42f-4eda-996f-a53385eb3efd", description='Combine two images from Image 1 "{image_path_1}" and Image 2 "{image_path_2}" using LibreOffice Impress and save the resulting file in PDF format to "{output_path}". Image 1 should be placed on the right side of Image 2.', attribute_dict={ "image_path_1": "photo_path_1", "image_path_2": "photo_path_2", "output_path": "file_path", }, output_type="file_path", evaluator_generator=lambda image_path_1, image_path_2, output_path: nx.path_graph( [ check_text_in_current_window_name("LibreOffice Impress"), check_file_exist(output_path), verify_combined_image(image_path_1, image_path_2, output_path, "right"), ], create_using=nx.DiGraph, ), ), SubTask( id="49b614c5-c4bb-4c20-aab8-ab9dcc7de1b5", description="Find the example provided of torch.matmul by official PyTorch version 1.13 documentation using Firefox and copy all the lines of code in the example to the clipboard.", attribute_dict={}, output_type="None", evaluator_generator=lambda: nx.path_graph( [ check_text_in_current_window_name( "torch.matmul — PyTorch 1.13 documentation — Mozilla Firefox" ), is_torch_matmul_example_copied_correctly(), ], create_using=DiGraph, ), ), SubTask( id="76de4bdb-c980-4b3a-9bd3-c87db467dffe", description='Paste clipboard content into LibreOffice Writer and save it as an ODT file at "{file_path}".', attribute_dict={"file_path": "file_path"}, output_type="file_path", evaluator_generator=lambda file_path: path_graph( [ check_text_in_current_window_name("LibreOffice Writer"), check_file_exist(file_path), verify_odt_file_content_with_clipboard(file_path), ], create_using=DiGraph, ), ), SubTask( id="8491e674-596b-452b-9e0e-58a44d90f947", description='Paste clipboard content into Visual Studio Code (VS Code) and save it as a file at "{file_path}".', attribute_dict={"file_path": "file_path"}, output_type="file_path", evaluator_generator=lambda file_path: path_graph( [ check_text_in_current_window_name("Visual Studio Code"), check_file_exist(file_path), verify_file_content_with_clipboard(file_path), ], create_using=DiGraph, ), ), SubTask( id="1cd6519a-9ee0-442b-ba5a-9238aeb00ff6", description='Use Firefox to search for the country "{country}" on Wikipedia, extract the capital city and population, and save this information in an ODS file at "{file_path}" with LibreOffice Calc. The first column will save the country name, the second will save the capital city name, and the third will save the population. No header is needed in the ODS file.', attribute_dict={"country": "country", "file_path": "file_path"}, output_type="file_path", evaluator_generator=lambda country, file_path: nx.path_graph( [ check_text_in_current_window_name("Wikipedia — Mozilla Firefox"), check_text_in_current_window_name("LibreOffice Calc"), check_file_exist(file_path), verify_country_data_in_ods(country, file_path), ], create_using=nx.DiGraph, ), ), ] if __name__ == "__main__": generator = TaskGenerator(attribute_pool={}) ================================================ FILE: crab-benchmark-v0/main.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import argparse import logging import warnings from pathlib import Path from typing import Literal from crab import ( BenchmarkConfig, Experiment, MessageType, TaskGenerator, create_benchmark, ) from crab.actions.crab_actions import complete, wait from crab.actions.visual_prompt_actions import ( get_elements_prompt, groundingdino_easyocr, ) from crab.agents.backend_models import BackendModelConfig from crab.agents.policies import ( MultiAgentByEnvPolicy, MultiAgentByFuncPolicy, SingleAgentPolicy, ) from crab.core.agent_policy import AgentPolicy from crab.core.benchmark import Benchmark from .android_env import ANDROID_ENV from .dataset.android_subtasks import android_subtasks from .dataset.handmade_tasks import handmade_tasks from .dataset.ubuntu_subtasks import ubuntu_subtasks from .ubuntu_env import UBUNTU_ENV warnings.filterwarnings("ignore") class CrabBenchmarkV0(Experiment): def __init__( self, benchmark: Benchmark, task_id: str, agent_policy: AgentPolicy | Literal["human"], log_dir: Path | None = None, ) -> None: super().__init__(benchmark, task_id, agent_policy, log_dir) def get_prompt(self): observation, ob_prompt = self.benchmark.observe_with_prompt() # construct prompt result_prompt = {} for env in ob_prompt: if env == "root": continue screenshot = observation[env]["screenshot"] marked_screenshot, _ = ob_prompt[env]["screenshot"] result_prompt[env] = [ (f"Here is the current screenshot of {env}:", MessageType.TEXT), (screenshot, MessageType.IMAGE_JPG_BASE64), ( f"Here is the screenshot with element labels of {env}:", MessageType.TEXT, ), (marked_screenshot, MessageType.IMAGE_JPG_BASE64), ] return result_prompt def get_benchmark(env: str, ubuntu_url: str): ubuntu_env = UBUNTU_ENV.model_copy() ubuntu_env.remote_url = ubuntu_url ubuntu_tool = { "screenshot": groundingdino_easyocr(font_size=16) >> get_elements_prompt } android_tool = { "screenshot": groundingdino_easyocr(font_size=40) >> get_elements_prompt } if env == "ubuntu": prompting_tools = {"ubuntu": ubuntu_tool} benchmark_config = BenchmarkConfig( name="ubuntu_benchmark", tasks=[], environments=[ubuntu_env], prompting_tools=prompting_tools, root_action_space=[complete, wait], multienv=True, ) elif env == "android": prompting_tools = {"android": android_tool} benchmark_config = BenchmarkConfig( name="android_benchmark", tasks=[], environments=[ANDROID_ENV], prompting_tools=prompting_tools, root_action_space=[complete, wait], multienv=True, ) elif env == "cross": prompting_tools = { "android": android_tool, "ubuntu": ubuntu_tool, } benchmark_config = BenchmarkConfig( name="ubuntu_android_benchmark", tasks=[], environments=[ubuntu_env, ANDROID_ENV], prompting_tools=prompting_tools, root_action_space=[complete, wait], multienv=True, ) else: raise ValueError("Env not support") # Load from json config files by combining sub-tasks generator = TaskGenerator(subtasks=android_subtasks + ubuntu_subtasks) dir_path = (Path(__file__).parent / "dataset").resolve() tasks = [] for task_json_files in dir_path.rglob("*.json"): task = generator.get_task_from_file(task_json_files) tasks.append(task) benchmark_config.tasks.extend(tasks) # Load from handmade tasks benchmark_config.tasks.extend(handmade_tasks) benchmark_config.step_limit = 20 return create_benchmark(benchmark_config) if __name__ == "__main__": parser = argparse.ArgumentParser( description="Script for running benchmark with an agent." ) parser.add_argument( "--model", type=str, help="gpt4o, gpt4turbo, gemini, claude or human", default="gpt4o", ) parser.add_argument( "--policy", type=str, help="single, multi-by-func, or multi-by-env", default="single", ) parser.add_argument( "--ubuntu-url", type=str, help="remote url of Ubunutu environment", default="http://127.0.0.1:8000", ) parser.add_argument( "--env", type=str, help="ubuntu, android or cross", default="cross", ) parser.add_argument("--task-id", type=str, help="task id") parser.add_argument( "--model-base-url", type=str, help="URL of the model API", default="http://127.0.0.1:8000/v1", ) parser.add_argument( "--model-api-key", type=str, help="API key of the model API", default="EMPTY", ) parser.add_argument( "--loglevel", type=str, help="logger level, debug, info, warning, or error", default="warning", ) parser.add_argument( "--history-messages-len", type=int, help="The number of rounds of chat history to provide to the model", default=2, ) args = parser.parse_args() loglevel = args.loglevel numeric_level = getattr(logging, loglevel.upper(), None) if not isinstance(numeric_level, int): raise ValueError("Invalid log level: %s" % loglevel) logging.basicConfig(level=numeric_level) benchmark = get_benchmark(args.env, args.ubuntu_url) if args.model == "human": expeirment = CrabBenchmarkV0( benchmark=benchmark, task_id=args.task_id, agent_policy="human", ) expeirment.start_benchmark() exit() if args.model == "gpt4o": model = BackendModelConfig( model_class="openai", model_name="gpt-4o", history_messages_len=args.history_messages_len, ) elif args.model == "gpt4turbo": model = BackendModelConfig( model_class="openai", model_name="gpt-4-turbo", history_messages_len=args.history_messages_len, ) elif args.model == "gemini": model = BackendModelConfig( model_class="gemini", model_name="gemini-1.5-pro-latest", history_messages_len=args.history_messages_len, ) elif args.model == "claude": model = BackendModelConfig( model_class="claude", model_name="claude-3-opus-20240229", history_messages_len=args.history_messages_len, ) elif args.model == "pixtral": model = BackendModelConfig( model_class="openai", model_name="mistralai/Pixtral-12B-2409", json_structre_output=True, history_messages_len=args.history_messages_len, base_url=args.model_base_url, api_key=args.model_api_key, ) elif args.model == "gpt4o-wofc": model = BackendModelConfig( model_class="openai", model_name="gpt-4o", json_structre_output=True, history_messages_len=args.history_messages_len, ) elif args.model == "llava-ov72b": model = BackendModelConfig( model_class="sglang", model_name="lmms-lab/llava-onevision-qwen2-72b-ov-chat", json_structre_output=True, history_messages_len=args.history_messages_len, base_url=args.model_base_url, api_key=args.model_api_key, ) else: print("Unsupported model: ", args.model) exit() if args.policy == "single": agent_policy = SingleAgentPolicy(model_backend=model) elif args.policy == "multi-by-func": agent_policy = MultiAgentByFuncPolicy( main_agent_model_backend=model, tool_agent_model_backend=model ) elif args.policy == "multi-by-env": agent_policy = MultiAgentByEnvPolicy( main_agent_model_backend=model, env_agent_model_backend=model ) else: print("Unsupported policy: ", args.policy) exit() log_dir = (Path(__file__).parent / "tianqi_logs").resolve() expeirment = CrabBenchmarkV0( benchmark=benchmark, task_id=args.task_id, agent_policy=agent_policy, log_dir=log_dir, ) expeirment.start_benchmark() ================================================ FILE: crab-benchmark-v0/scripts/ubuntu_env_init.sh ================================================ #!/bin/bash # Disable screen autolock gsettings set org.gnome.desktop.screensaver lock-enabled false gsettings set org.gnome.desktop.session idle-delay 0 # Disable automatic updates sudo bash -c 'cat < /etc/apt/apt.conf.d/20auto-upgrades APT::Periodic::Update-Package-Lists "0"; APT::Periodic::Unattended-Upgrade "0"; EOF' # Allow sudo without password for the current user CURRENT_USER=$(whoami) sudo bash -c "echo \"$CURRENT_USER ALL=(ALL) NOPASSWD: ALL\" | tee /etc/sudoers.d/$CURRENT_USER" # Install required packages sudo apt update sudo apt install -y openssh-server git vim python3-pip xdotool python3-tk python3.10-venv # Install pipx python3 -m pip install pipx python3 -m pipx ensurepath # Modify .bashrc to alias python to python3 for the current user echo 'alias python=python3' >> /home/$CURRENT_USER/.bashrc # Reload .bashrc for the current user source /home/$CURRENT_USER/.bashrc # Install poetry using pipx pipx install poetry # Pull CRAB repo if [ ! -d "/home/$CURRENT_USER/crab" ]; then git clone https://github.com/camel-ai/crab.git /home/$CURRENT_USER/crab/ fi # Create poetry environment cd /home/$CURRENT_USER/crab poetry install -E server # Change to X11 from Wayland sudo sed -i 's/#WaylandEnable=false/WaylandEnable=false/g' /etc/gdm3/custom.conf touch /home/$CURRENT_USER/.Xauthority # Create the crab.service file with dynamic user and group sudo bash -c "cat < /etc/systemd/system/crab.service [Unit] Description=My Python Script Service After=network.target [Service] WorkingDirectory=/home/$CURRENT_USER/crab/ ExecStart=/home/$CURRENT_USER/.local/bin/poetry run python -m crab.server.main --HOST 0.0.0.0 Restart=always User=$CURRENT_USER Group=$CURRENT_USER [Install] WantedBy=multi-user.target EOF" # Reload systemd to recognize the new service sudo systemctl daemon-reload # Enable and start the crab service sudo systemctl enable crab.service # Reboot the system to apply changes for X11 echo "System will reboot in 10 seconds to apply changes..." sleep 10 sudo reboot ================================================ FILE: crab-benchmark-v0/ubuntu_env.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== from crab.actions.desktop_actions import ( click, double_click, key_press, press_hotkey, right_click, screenshot, search_application, write_text, ) from crab.core import EnvironmentConfig UBUNTU_ENV = EnvironmentConfig( name="ubuntu", action_space=[ click, key_press, write_text, press_hotkey, search_application, right_click, double_click, ], observation_space=[screenshot], description="""An Ubuntu 22.04 Linux desktop operating system. The interface \ displays a current screenshot at each step and primarily supports interaction \ via mouse and keyboard. You must use searching functionality to open any \ application in the system. This device includes system-related applications \ including Terminal, Files, Text Editor, Vim, and Settings. It also features \ Firefox as the web browser, and the LibreOffice suite—Writer, Calc, and \ Impress. For communication, Slack is available. The Google account is \ pre-logged in on Firefox, synchronized with the same account used in the \ Android environment.""", ) ================================================ FILE: docs/Makefile ================================================ # Minimal makefile for Sphinx documentation # # You can set these variables from the command line, and also # from the environment for the first two. SPHINXOPTS ?= SPHINXBUILD ?= sphinx-build SOURCEDIR = . BUILDDIR = _build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) ================================================ FILE: docs/conf.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Configuration file for the Sphinx documentation builder. # # For the full list of built-in configuration values, see the documentation: # https://www.sphinx-doc.org/en/master/usage/configuration.html # -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # import os import sys sys.path.insert(0, os.path.abspath('..')) # -- Project information ----------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information project = 'CRAB' copyright = '2024, CAMEL-AI.org' author = 'CAMEL-AI.org' version = '0.1' release = '0.1.2' # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.viewcode', 'sphinx.ext.napoleon', 'myst_parser', ] templates_path = ['_templates'] exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] # -- Options for HTML output ------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output html_theme = 'sphinx_book_theme' html_favicon = '_static/favicon.png' html_static_path = ['_static'] html_logo = "_static/CRAB_logo1.png" html_title = "CRAB Documentation" html_theme_options = { "repository_url": "https://github.com/camel-ai/crab", "use_repository_button": True, } ================================================ FILE: docs/crab.benchmarks.rst ================================================ crab.benchmarks package ======================= Submodules ---------- crab.benchmarks.template module ------------------------------- .. automodule:: crab.benchmarks.template :members: :undoc-members: :show-inheritance: Module contents --------------- .. automodule:: crab.benchmarks :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/crab.client.rst ================================================ crab.client package =================== Submodules ---------- crab.client.env module ---------------------- .. automodule:: crab.client.env :members: :undoc-members: :show-inheritance: crab.client.openai\_interface module ------------------------------------ .. automodule:: crab.client.openai_interface :members: :undoc-members: :show-inheritance: Module contents --------------- .. automodule:: crab.client :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/crab.core.models.rst ================================================ crab.core.models package ======================== Submodules ---------- crab.core.models.action module ------------------------------ .. automodule:: crab.core.models.action :members: :undoc-members: :show-inheritance: crab.core.models.benchmark\_interface module -------------------------------------------- .. automodule:: crab.core.models.benchmark_interface :members: :undoc-members: :show-inheritance: crab.core.models.config module ------------------------------ .. automodule:: crab.core.models.config :members: :undoc-members: :show-inheritance: crab.core.models.evaluator module --------------------------------- .. automodule:: crab.core.models.evaluator :members: :undoc-members: :show-inheritance: crab.core.models.task module ---------------------------- .. automodule:: crab.core.models.task :members: :undoc-members: :show-inheritance: Module contents --------------- .. automodule:: crab.core.models :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/crab.core.rst ================================================ crab.core package ================= Subpackages ----------- .. toctree:: :maxdepth: 4 crab.core.models Submodules ---------- crab.core.benchmark module -------------------------- .. automodule:: crab.core.benchmark :members: :undoc-members: :show-inheritance: crab.core.decorators module --------------------------- .. automodule:: crab.core.decorators :members: :undoc-members: :show-inheritance: crab.core.environment module ---------------------------- .. automodule:: crab.core.environment :members: :undoc-members: :show-inheritance: crab.core.exceptions module --------------------------- .. automodule:: crab.core.exceptions :members: :undoc-members: :show-inheritance: crab.core.graph\_evaluator module --------------------------------- .. automodule:: crab.core.graph_evaluator :members: :undoc-members: :show-inheritance: crab.core.task\_generator module -------------------------------- .. automodule:: crab.core.task_generator :members: :undoc-members: :show-inheritance: crab.core.vagrant\_manager module --------------------------------- .. automodule:: crab.core.vagrant_manager :members: :undoc-members: :show-inheritance: Module contents --------------- .. automodule:: crab.core :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/crab.environments.rst ================================================ crab.environments package ========================= Submodules ---------- crab.environments.android module -------------------------------- .. automodule:: crab.environments.android :members: :undoc-members: :show-inheritance: crab.environments.linux module ------------------------------ .. automodule:: crab.environments.linux :members: :undoc-members: :show-inheritance: crab.environments.template module --------------------------------- .. automodule:: crab.environments.template :members: :undoc-members: :show-inheritance: Module contents --------------- .. automodule:: crab.environments :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/crab.rst ================================================ crab package ============ Subpackages ----------- .. toctree:: :maxdepth: 4 crab.benchmarks crab.client crab.core crab.environments crab.server Module contents --------------- .. automodule:: crab :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/crab.server.controller.rst ================================================ crab.server.controller package ============================== Submodules ---------- crab.server.controller.benchmark module --------------------------------------- .. automodule:: crab.server.controller.benchmark :members: :undoc-members: :show-inheritance: crab.server.controller.environment module ----------------------------------------- .. automodule:: crab.server.controller.environment :members: :undoc-members: :show-inheritance: Module contents --------------- .. automodule:: crab.server.controller :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/crab.server.rst ================================================ crab.server package =================== Subpackages ----------- .. toctree:: :maxdepth: 4 crab.server.controller Submodules ---------- crab.server.api module ---------------------- .. automodule:: crab.server.api :members: :undoc-members: :show-inheritance: crab.server.config module ------------------------- .. automodule:: crab.server.config :members: :undoc-members: :show-inheritance: crab.server.data module ----------------------- .. automodule:: crab.server.data :members: :undoc-members: :show-inheritance: crab.server.exception\_handlers module -------------------------------------- .. automodule:: crab.server.exception_handlers :members: :undoc-members: :show-inheritance: crab.server.logger module ------------------------- .. automodule:: crab.server.logger :members: :undoc-members: :show-inheritance: crab.server.main module ----------------------- .. automodule:: crab.server.main :members: :undoc-members: :show-inheritance: crab.server.middleware module ----------------------------- .. automodule:: crab.server.middleware :members: :undoc-members: :show-inheritance: crab.server.utils module ------------------------ .. automodule:: crab.server.utils :members: :undoc-members: :show-inheritance: Module contents --------------- .. automodule:: crab.server :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/crab_benchmark_v0/environment_gcp_setup.md ================================================ # Google cloud platform setup ## Setup and Start the VM Instance The development image is hosted in the project `capable-vista-420022` with image name `crab-benchmark-v0-1`. You can use [gcloud](https://cloud.google.com/sdk/docs/install) to create an instance from this image. First install [gcloud](https://cloud.google.com/sdk/docs/install), then create an instance using the following command: ```bash gcloud compute instances create \ crab-instance \ --zone=us-central1-a \ --machine-type=n2-standard-8 \ --image=https://www.googleapis.com/compute/v1/projects/capable-vista-420022/global/images/crab-benchmark-v0-1 \ --enable-nested-virtualization # You can change instance name, zone, machine type as you want. # Remember that the CPU must support nested virtualization and should have at least 32G memory. # This setting costs around 0.4$ per hour. ``` After creating the instance, you can connect it using SSH. User account information: * user: `root`; password: `crab` * user: `crab`; password: `crab` **IMPORTANT: You must switch to user `crab` before setting up remote desktop.** Use `sudo su crab`. ## Connect the Instance through a remote desktop service You need to connect the server to a display to set up the experiment environment because the Ubuntu virtual machine and the Android emulator require GUI operations. There are many possible remote desktop products you can use. Here, we provide instructions for [Google Remote Desktop](https://remotedesktop.google.com/access/), which was used to run our experiment. 1. Go to [Google Remote Desktop Headless](https://remotedesktop.google.com/headless). Click **Begin** -> **Next** -> **Authorize**. On the resulting page, copy the command from the `Debian Linux` section. 2. Connect to the VM instance through SSH, paste the copied command, and run it. You will be prompted to set a six-digit PIN. 3. Go to [Google Remote Desktop Access](https://remotedesktop.google.com/access). You should see a remote device marked as online. Click it and enter the PIN. You will then see the desktop of the VM instance. ================================================ FILE: docs/crab_benchmark_v0/environment_local_setup.md ================================================ # Local setup ## Install CRAB First you should install `poetry`, a modern python dependency management tool. Then pull the crab repo and install: ```bash git clone https://github.com/camel-ai/crab cd crab poetry install -E client ``` ## Install Ubuntu VM **IMPORTANT: If you are using an Ubuntu VM, the Python version in the VM must match the Python version on the host machine. If you follow this instruction to install Ubuntu, the Python version in the VM will be 3.10.12. Consider using `conda` or `pyenv` to install the same Python version on the host machine.** Install `virt-manager`. If you are using Ubuntu or Debian, try `sudo apt install virt-manager`. Download [Ubuntu 22.04 image](https://releases.ubuntu.com/jammy/ubuntu-22.04.4-desktop-amd64.iso), then create a new machine with at least 8G RAM and 30G disk in virt-manager using the image. Follow the instruction and complete the installation. (It's better to use `crab` as the main user name.) After install Ubuntu, you should install crab-server on it and do necessary initilization. In Ubuntu VM, run ```bash git clone https://github.com/camel-ai/crab.git ~/crab/ cd ~/crab/crab-benchmark-v0/scripts chmod +x ubuntu_env_init.sh ./ubuntu_env_init.sh ``` The VM will reboot after initilization. After rebooting, remember its ip address. ## Install ADB Download and install ADB from its [official website](https://developer.android.com/tools/releases/platform-tools). ## Install Android Emulator You can use emulators in [Android Studio](https://developer.android.com/studio) to simulate an Android device if you don't want to use a physical one. To create a new virtual device, open Android Studio and use its built-in device manager to create a Pixel 8 Pro with system image release "R". > Note that the benchmark on our side runs on a Google Pixel 8 Pro with system image release "R". However, cases are > noticed that Google API Level 30 may not work properly when trying to enable USB debugging mode. If such issues are > encountered, you can try switch to releases of lower API levels (e.g. "Q"). ![](./assets/android_1.png) ![](./assets/android_2.png) Then you can boot the device. To check if it's all set, run ```shell adb devices ``` You should see the device in the list. > Important: ADB won't work normally if you see an `unauthorized` tag after the device ID. To solve this, enable both > the developer mode and USB debugging mode in the device. ================================================ FILE: docs/crab_benchmark_v0/get_started.md ================================================ # Get started `crab-benchmark-v0` is a benchmark released with the crab framework to provide a standard usage. It includes two virtual machine environments: an Android smartphone and an Ubuntu desktop computer, with 100 tasks and 59 different evaluator functions in the dataset. It effectively evaluates the MLM-based agents' performance on operating real-world tasks across multiple platforms. ## Concept Our benchmark contains two important parts: **Environments** and **Tasks**. #### Environment Since our Ubuntu environment is built upon KVM, setting it up locally requires you an experienced Linux user to deal with many small and miscellaneous issues. Therefore, we provide two environment setup methods: * [Local setup](./environment_local_setup.md) provides you a step-by-step guideline to build environments on a Linux Machine with **at least one monitor and 32G memory**, but it doesn't cover details like how to install KVM on your machine because they are various on different Linux distros. * For those who want a quicker setup, we also provide a setup through [Google Clould Platform](./environment_gcp_setup.md). Specifically, we publish a disk image contains all required software and configurations on google cloud, you can use your own google account to create a cloud computer through this disk image and use [google remote desktop](https://remotedesktop.google.com/access/) to connect to it. This method doesn't have any hardware limitations and when you set it up you can run the experiment immediately. As a tradeoff, the cloud computer that meets the minimum hardware requirement costs around $0.4 per hour (depend on the machine zone). We connect to the Android environment via ADB, so any Android device, from an emulator to a physical smartphone, will work. You should ensure ADB is installed on your system and can be directly called through the command line. In our experiment, we used the built-in emulator of [Android Studio](https://developer.android.com/studio) to create a Google Pixel 8 Pro virtual device with the release name \textit{R} and installed necessary extra Apps. #### Task We manage our task dataset using a CRAB-recommended method. Sub-tasks are defined through Pydantic models written in Python code, and composed tasks are defined in JSON format, typically combining several sub-tasks. The sub-tasks are defined in [android_subtasks](https://github.com/camel-ai/crab/tree/main/crab-benchmark-v0/dataset/android_subtasks.py) and [ubuntu_subtasks](https://github.com/camel-ai/crab/tree/main/crab-benchmark-v0/dataset/ubuntu_subtasks.py). The JSON files storing composed tasks are categorized into [android](https://github.com/camel-ai/crab/tree/main/crab-benchmark-v0/dataset/android/), [ubuntu](https://github.com/camel-ai/crab/tree/main/crab-benchmark-v0/dataset/ubuntu/), and [cross-platform](https://github.com/camel-ai/crab/tree/main/crab-benchmark-v0/dataset/cross/). The tasks in android and ubuntu directories are single-environment task and those in cross directory are cross-environment tasks. Additionally, we create several tasks by hand instead of composing sub-tasks to provide semantically more meaningful tasks, which are found in [handmade tasks](https://github.com/camel-ai/crab/tree/main/crab-benchmark-v0/dataset/handmade_tasks.py). ## Experiment After setting up the environment, you can start the experiment. A brief overview of the experiment is as follows: 1. Open the Ubuntu environment virtual machine and the Android environment emulator. 2. Start the CRAB server in the Ubuntu environment and get its IP address and port. Let's say they are `192.168.122.72` and `8000`. 3. Choose a task. As an example, we take the task with ID `a3476778-e512-40ca-b1c0-d7aab0c7f18b` from [handmade_tasks](https://github.com/camel-ai/crab/tree/main/crab-benchmark-v0/dataset/handmade_tasks.py). The task is: "Open the 'Tasks' app on Android, check the first incomplete task, then perform the task according to its description." 4. Run [main.py](./main.py) with the command `poetry run python -m crab-benchmark-v0.main --model gpt4o --policy single --remote-url http://192.168.122.72:8000 --task-id a3476778-e512-40ca-b1c0-d7aab0c7f18b`. In this command, `--model gpt4o` and `--policy single` determine the agent system, `--remote-url` specifies the Ubuntu environment interface, and `--task-id` indicates the task to be performed. ================================================ FILE: docs/get_started/build_your_own_benchmark.md ================================================ # Build your own benchmark ## Overview ![](../assets/benchmark_config.png) Crab benchmark system mainly consists of five types of component: * `Action`: The fundamental building block of Crab framework, which represents a unit operation that can be taken by an agent or as a fixed process that called multi times in a benchmark. * `Evaluator`: A specific type of `Action` that assess whether an agent has achieved its goal. Multiple evaluators can be combined together as a graph to enable complex evaluation. * `Environment` A abstraction of an environment that the agent can take action and obverse in a given action and observation space. An environment can be launched on the local machine, a physical remote machine, or a virtual machine. * `Task`: A task with a natural language description to instruct the agent to perform. It can include interaction with multiple environments. Notice that in the benchmark, a task should have an graph evaluator to judge if the task progress. * `Benchmark`: The main body of the crab system that contains all required component to build a benchmark, including environments, tasks, prompting method. It controls several ## Actions Actions are the fundamental building blocks of the Crab system's operations. Each action is encapsulated as an instance of the `Action` class. An action can convert into a JSON schema for language model agents to use. An action is characterized by the following attributes: - **Name**: A string identifier uniquely represents the action. - **Entry**: A callable entry point to the actual Python function that executes the action. - **Parameters**: A Pydantic model class that defines the input parameters the action accepts. - **Returns**: A Pydantic model class that defines the structure of the return type the action produces. - **Description**: An string providing a clear and concise description of what the action does and how it behaves. - **Kept Parameters**: A list of parameters retained for internal use by the Crab system, which do not appear in the action's parameter list but are injected automatically at runtime. For exmaple we use `env` to represent the current environment object that action are taken in. - **Environment Name**: An optional string that can specify the environment the action is associated with. Usually this attribute is only used by predifined actions like `setup` in an environment. Here is an example of creating an action through python function: ```python @action def click(x: float, y: float) -> None: """ click on the current desktop screen. Args: x (float): The X coordinate, as a floating-point number in the range [0.0, 1.0]. y (float): The Y coordinate, as a floating-point number in the range [0.0, 1.0]. """ import pyautogui pyautogui.click(x,y) ``` The `@action` decorator transforms the `click` function into an `Action` with these mappings: - The function name `click` becomes the action **name**. - The parameters `x: float, y: float` with their type hints become the action **parameters**. - The return type hint `-> None` is used for the action's **returns** field, indicating no value returned. - The function's docstring provides a **description** for the action and its parameters, utilized in the JSON schema for the agent. - The function body defines the action's behavior, executed when the action is called. The `Action` class allows for different combination operations such as: - **Pipe**: Using the `>>` operator, actions can be piped together, where the output of one action becomes the input to another, provided their parameters and return types are compatible. - **Sequential Combination**: The `+` operator allows for two actions to be combined sequentially, executing one after the other. ## Evaluators Evaluators in the Crab system are a specific type of `Action` that assess whether an agent has achieved its goal. They should return a boolean value, indicating whether the task's objective has been met. Multiple evaluators can be connected into a graph using the `networkx` package, enabling multi-stage evaluation, where different conditions can be checked in sequence or in parallel. An example evaluator `check_file_exist` confirms the presence of a file at a given path, using the `os.path.isfile` method to return `True` if the file exists or `False` otherwise: ```python @evaluator def check_file_exist(file_path: str) -> bool: return os.path.isfile(file_path) ``` Extra attributes of evaluators: - **Require Submit**: Indicates if the evaluator awaits a specific submission to carry out its assessment. Logical operators allow for evaluator combinations: - **AND (&)**: Requires all evaluators to succeed for a task to pass. - **OR (|)**: Passes if any of the evaluators succeed. - **NOT (~)**: Reverses the evaluation outcome. The combined evaluator is still considered as **one evaluator** rather than a graph evaluator. ================================================ FILE: docs/get_started/quickstart.md ================================================ # Quickstart The `Benchmark` class is a comprehensive framework for evaluating language model agents across various tasks and environments. It provides a flexible structure to manage multiple environments and tasks, offering single and multi-environment execution modes. The following image shows an overview of how `Benchmark` works. ![](../assets/crab_overview.png) ## Basic Usage ### Step 1: Importing the Benchmark Begin by importing the predefined benchmark from the `crab.benchmarks` module. For exmple, here we import `template_benchmark_config`: ```python from crab.benchmarks import template_benchmark_config ``` ### Step 2: Creating the Benchmark Use the `create_benchmark` function to create an instance of a `Benchmark` class based on the imported benchmark configuration: ```python from crab import create_benchmark benchmark = create_benchmark(template_benchmark_config) ``` ### Step 3: Starting a Task Select a task to start within the benchmark. The task ID should correspond to one of the predefined tasks in the benchmark configuration. Use the `start_task` method to initialize and begin the task: ```python # Starting the task with ID "0" task, action_space = benchmark.start_task("0") ``` ### Step 4: Running the Benchmark Loop Execute actions and observe the results using the `step` and `observe` methods: ```python from crab.client.openai_interface import OpenAIAgent # Initialize the agent by benchmark task and action_space agent = OpenAIAgent(task, action_space) # Define a function to run the benchmark def run_benchmark(benchmark, agent): for step in range(20): # Define the number of steps as per your requirements print("=" * 40) print(f"Starting step {step}:") # Get the current observations and prompts observation = benchmark.observe() # Process the observations and determine the next action action_result = agent.determine_next_action(observation) # Execute the action and get the result step_result = benchmark.step(action_result.action, action_result.parameters) # Check current evaluation result. print(step_result.evaluation_results) # Check if the task is terminated and break the loop if so if step_result.terminated: print("Task completed successfully.") print(step_result.evaluation_results) break run_benchmark(benchmark, agent) ``` ### Step 5: Completing the Benchmark Clean up and reset the benchmark after completion using the`reset`: ```python benchmark.reset() ``` ================================================ FILE: docs/index.rst ================================================ .. Crab documentation master file, created by sphinx-quickstart on Thu May 2 10:58:47 2024. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. Welcome to Crab's documentation! ================================ .. toctree:: :maxdepth: 1 :caption: Get Started with CRAB: :name: get_started get_started/quickstart.md get_started/build_your_own_benchmark.md .. toctree:: :maxdepth: 1 :caption: CRAB Benchmark-v0: :name: crab_benchmark_v0 crab_benchmark_v0/get_started.md crab_benchmark_v0/environment_gcp_setup.md crab_benchmark_v0/environment_local_setup.md .. toctree:: :maxdepth: 2 :caption: API Reference: modules Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` ================================================ FILE: docs/make.bat ================================================ @ECHO OFF pushd %~dp0 REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set SOURCEDIR=. set BUILDDIR=_build %SPHINXBUILD% >NUL 2>NUL if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.installed, then set the SPHINXBUILD environment variable to point echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.https://www.sphinx-doc.org/ exit /b 1 ) if "%1" == "" goto help %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% goto end :help %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% :end popd ================================================ FILE: docs/modules.rst ================================================ crab ==== .. toctree:: :maxdepth: 4 crab ================================================ FILE: examples/multi_env.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== from termcolor import colored from crab import Benchmark, create_benchmark from crab.agents.backend_models import OpenAIModel from crab.agents.policies import SingleAgentPolicy from crab.benchmarks.template import multienv_template_benchmark_config def start_benchmark(benchmark: Benchmark, agent: SingleAgentPolicy): for step in range(20): print("=" * 40) print(f"Start agent step {step}:") observation = benchmark.observe() print(f"Current enviornment observation: {observation}") prompt = {} for env, obs in observation.items(): if env == "root": continue state = obs["current_state"] prompt[env] = [(f"The state of {env} is {state}", 0)] response = agent.chat(observation=prompt) print(colored(f"Agent take action: {response}", "blue")) for action in response: response = benchmark.step( action=action.name, parameters=action.arguments, env_name=action.env, ) print( colored( f'Action "{action.name}" success, stat: ' f"{response.evaluation_results}", "green", ) ) if response.terminated: print("=" * 40) print( colored( f"Task finished, result: {response.evaluation_results}", "green" ) ) return if __name__ == "__main__": benchmark = create_benchmark(multienv_template_benchmark_config) task, action_space = benchmark.start_task("0") env_descriptions = benchmark.get_env_descriptions() agent = SingleAgentPolicy(model_backend=OpenAIModel("gpt-4o")) agent.reset(task.description, action_space, env_descriptions) print("Start performing task: " + colored(f'"{task.description}"', "green")) start_benchmark(benchmark, agent) benchmark.reset() ================================================ FILE: examples/single_env.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== from termcolor import colored from crab import Benchmark, create_benchmark from crab.agents.backend_models import OpenAIModel from crab.agents.policies import SingleAgentPolicy from crab.benchmarks.template import template_benchmark_config def start_benchmark(benchmark: Benchmark, agent: SingleAgentPolicy): for step in range(20): print("=" * 40) print(f"Start agent step {step}:") observation = benchmark.observe()["template_env"] print(f"Current enviornment observation: {observation}") response = agent.chat( { "template_env": [ (f"Current enviornment observation: {observation}", 0), ] } ) print(colored(f"Agent take action: {response}", "blue")) for action in response: response = benchmark.step( action=action.name, parameters=action.arguments, env_name=action.env, ) print( colored( f'Action "{action.name}" success, stat: ' f"{response.evaluation_results}", "green", ) ) if response.terminated: print("=" * 40) print( colored( f"Task finished, result: {response.evaluation_results}", "green" ) ) return if __name__ == "__main__": benchmark = create_benchmark(template_benchmark_config) task, action_space = benchmark.start_task("0") env_descriptions = benchmark.get_env_descriptions() agent = SingleAgentPolicy(model_backend=OpenAIModel("gpt-4o")) agent.reset(task.description, action_space, env_descriptions) print("Start performing task: " + colored(f'"{task.description}"', "green")) start_benchmark(benchmark, agent) benchmark.reset() ================================================ FILE: licenses/LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright 2023 @ CAMEL-AI.org Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: licenses/license_template.txt ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== ================================================ FILE: licenses/update_license.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import os import re import sys from pathlib import Path from typing import List # The license template file is hard-coded with specific start and end lines def fine_license_start_line(lines: List[str], start_with: str) -> int: for i in range(len(lines)): if lines[i].startswith(start_with): return i return None def find_license_end_line(lines: List[str], start_with: str) -> int: for i in range(len(lines) - 1, -1, -1): if lines[i].startswith(start_with): return i return None def update_license_in_file( file_path: str, license_template_path: str, start_line_start_with: str, end_line_start_with: str, ) -> bool: with open(file_path, "r") as f: content = f.read() with open(license_template_path, "r") as f: new_license = f.read().strip() maybe_existing_licenses = re.findall( r"^#.*?(?=\n)", content, re.MULTILINE | re.DOTALL ) start_index = fine_license_start_line( maybe_existing_licenses, start_line_start_with ) end_index = find_license_end_line(maybe_existing_licenses, end_line_start_with) if start_index is not None and end_index is not None: maybe_existing_licenses = maybe_existing_licenses[start_index : end_index + 1] else: maybe_existing_licenses = None if maybe_existing_licenses: maybe_old_licenses = "\n".join(maybe_existing_licenses) if maybe_old_licenses.strip() != new_license.strip(): replaced_content = content.replace(maybe_old_licenses, new_license) with open(file_path, "w") as f: f.write(replaced_content) print(f"Replaced license in {file_path}") return True else: return False else: with open(file_path, "w") as f: f.write(new_license + "\n" + content) print(f"Added license to {file_path}") return True def update_license_in_directory( directory_path: str, license_template_path: str, start_line_start_with: str, end_line_start_with: str, ) -> None: # Check if directory exists if not os.path.isdir(directory_path): raise NotADirectoryError(f"{directory_path} is not a directory") # Check if license template exists if not os.path.isfile(license_template_path): raise FileNotFoundError(f"{license_template_path} not found") file_count = 0 for py_files in Path(directory_path).rglob("*.py"): if py_files.name.startswith("."): continue if any(part.startswith(".") for part in py_files.parts): continue if any(part == "thirdparty" for part in py_files.parts): continue if update_license_in_file( py_files, license_template_path, start_line_start_with, end_line_start_with, ): file_count += 1 print(f"License updated in {file_count} files") if __name__ == "__main__": if len(sys.argv) < 3: print( "Usage from command line: " "python update_license.py " "No valid input arguments found, please enter manually." ) directory_path = input("Enter directory path: ") license_template_path = input("Enter license template path: ") else: directory_path = sys.argv[1] license_template_path = sys.argv[2] start_line_start_with = "# =========== Copyright" end_line_start_with = "# =========== Copyright" update_license_in_directory( directory_path, license_template_path, start_line_start_with, end_line_start_with, ) ================================================ FILE: pyproject.toml ================================================ [build-system] requires = ["poetry-core>=1.2.0", "wheel"] build-backend = "poetry.core.masonry.api" [tool.poetry] name = "crab-framework" version = "0.1.2" description = "Cross-platform Agent Benchmark for Multimodal Embodied Language Model Agents." authors = ["CAMEL-AI.org"] maintainers = ["Tianqi Xu "] packages = [{ include = "crab" }] readme = "README.md" license = "Apache License 2.0" repository = "https://github.com/camel-ai/crab" [tool.poetry.dependencies] python = "^3.10, <3.12" # core docstring-parser = "^0" networkx = "^3" dill = "^0.3.8" pydantic = "^2.6" lxml = "^5.2.2" openai = "^1.12.0" cryptography = "^43.0.0" setuptools = "^73.0.1" tenacity = "^9.0.0" # desktop actions pillow = "^10.2.0" mss = "^9.0.1" psutil = "^5.9.8" pyautogui = "^0.9.3" pyperclip = "^1.8.2" # environment python-vagrant = "^1.0.0" # evaluation pyexcel-ods = "^0.6.0" odfpy = "^1.4.1" beautifulsoup4 = "^4.12.3" termcolor = "^2.4.0" opencv-python = "^4.9.0.80" # client httpx = { version = "*", optional = true } # agent google-generativeai = { version = "^0.6.0", optional = true } anthropic = { version = "^0.29.0", optional = true } groq = { version = "^0.5.0", optional = true } ollama = { version = "^0.2.0", optional = true } camel-ai = { version = "^0.2", extras = ["all"], optional = true } # text ocr easyocr = { version = "^1.7.1", optional = true } # visual prompt transformers = { version = "4.44.1", optional = true } torch = { version = "^2.4.0", optional = true } # server fastapi = { extras = ["all"], version = "0.109.1", optional = true } pydantic-settings = { version = "^2", optional = true } uvicorn = { extras = ["standard"], version = "^0.27.0.post1", optional = true } # radar plot plotly = { version = "^5.20.0", optional = true } # types types-pyautogui = "^0.9.3.20240106" types-psutil = "^5.9.5.20240205" types-networkx = "^3.2.1.20240210" [tool.poetry.extras] server = ["fastapi", "pydantic-settings", "uvicorn"] client = [ "httpx", "openai", "google-generativeai", "anthropic", "groq", "ollama", "easyocr", "plotly", "torch", "torchvision", "numpy", "opencv-python", "transformers", "addict", "yapf", "matplotlib", "pycocotools", "timm", ] camel = ["camel-ai"] [tool.poetry.group.dev.dependencies] mypy = "^1.8.0" pytest = "^8.0.0" ruff = "^0.6.5" ipykernel = "^6.29.3" pandas = "^2.2.2" sphinx = "^7" myst-parser = "^4" sphinx-book-theme = "*" pre-commit = "^3.7.0" certifi = "^2024.2.2" [tool.ruff] lint.select = ["E501", "E4", "E7", "E9", "F", "I"] lint.ignore = ["E731"] exclude = ["docs/"] [[tool.mypy.overrides]] module = ["dill", "easyocr", "google.generativeai.*"] ignore_missing_imports = true ================================================ FILE: test/actions/test_visual_prompt_actions.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== from pathlib import Path import pytest import requests from PIL import Image from crab.actions.visual_prompt_actions import ( get_groundingdino_boxes, groundingdino_easyocr, ) from crab.utils import image_to_base64 @pytest.mark.skip(reason="Too slow") def test_get_groundingdino_boxes_single_image(): url = "http://images.cocodataset.org/val2017/000000039769.jpg" image = Image.open(requests.get(url, stream=True).raw) text = "a cat." box_threshold = 0.4 text_threshold = 0.3 result = get_groundingdino_boxes(image, text, box_threshold, text_threshold) assert len(result) == 1 assert len(result[0]) > 0 assert len(result[0][0]) == 2 @pytest.mark.skip(reason="Too slow") def test_get_groundingdino_boxes_multi_image(): url1 = "http://images.cocodataset.org/val2017/000000039769.jpg" url2 = "https://farm5.staticflickr.com/4005/4666183752_c5b79faa17_z.jpg" image1 = Image.open(requests.get(url1, stream=True).raw) image2 = Image.open(requests.get(url2, stream=True).raw) text = "a cat. a car." box_threshold = 0.4 text_threshold = 0.3 result = get_groundingdino_boxes( [image1, image2], text, box_threshold, text_threshold ) assert len(result) == 2 assert len(result[0]) > 0 assert len(result[1]) > 0 assert len(result[0][0]) == 2 @pytest.mark.skip(reason="Too slow") @pytest.mark.parametrize( "image_name", ["ubuntu_screenshot.png", "android_screenshot.png"] ) def test_groundingdino_easy_ocr(image_name: str): class A: pass temp = A() test_dir = Path(__file__).parent.parent image_path = test_dir / "_assets" / image_name image = Image.open(image_path) image_base64 = image_to_base64(image) visual_prompt = groundingdino_easyocr(font_size=40).set_kept_param(env=temp) result_image, boxes = visual_prompt.run(input_base64_image=image_base64) assert result_image != image_base64 assert boxes ================================================ FILE: test/agents/backend_models/test_camel_model.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import pytest from crab import action from crab.agents.backend_models import BackendModelConfig, create_backend_model @pytest.fixture def camel_model(): return create_backend_model( BackendModelConfig( model_class="camel", model_name="gpt-4o", model_platform="openai", parameters={"max_tokens": 3000}, history_messages_len=1, ) ) @action def add(a: int, b: int): """Add up two integers. Args: a: An addend b: Another addend """ return a + b @pytest.mark.skip(reason="Mock data to be added") def test_action_chat(camel_model): camel_model.reset("You are a helpful assistant.", [add]) message = ( "I had 10 dollars. Miss Polaris gave me 15 dollars. " "How many money do I have now.", 0, ) output = camel_model.chat([message]) assert not output.message assert len(output.action_list) == 1 assert output.action_list[0].arguments == {"a": 10, "b": 15} assert output.action_list[0].name == "add" assert camel_model.token_usage > 0 ================================================ FILE: test/agents/backend_models/test_claude_model.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import pytest from crab import MessageType, action from crab.agents.backend_models import BackendModelConfig, create_backend_model # TODO: Add mock data @pytest.fixture def claude_model_text(): return create_backend_model( BackendModelConfig( model_class="claude", model_name="claude-3-opus-20240229", parameters={"max_tokens": 3000}, history_messages_len=1, ) ) @action def add(a: int, b: int): """Add up two integers. Args: a: An addend b: Another addend """ return a + b @pytest.mark.skip(reason="Mock data to be added") def test_text_chat(claude_model_text): message = ("Hello!", MessageType.TEXT) output = claude_model_text.chat(message) assert output.message assert output.action_list is None assert claude_model_text.token_usage > 0 # Send another message to check accumulated tokens and history length message2 = ("Give me five!", MessageType.TEXT) output = claude_model_text.chat(message2) assert claude_model_text.token_usage > 0 assert output.message assert len(claude_model_text.chat_history) == 2 # Send another message to check accumulated tokens and chat history output = claude_model_text.chat(message2) assert output.message assert len(claude_model_text.chat_history) == 3 @pytest.mark.skip(reason="Mock data to be added") def test_action_chat(claude_model_text): claude_model_text.reset("You are a helpful assistant.", [add]) message = ( ( "I had 10 dollars. Miss Polaris gave me 15 dollars." " How many money do I have now." ), 0, ) output = claude_model_text.chat(message) assert len(output.action_list) == 1 args = output.action_list[0].arguments assert args["a"] + args["b"] == 25 assert output.action_list[0].name == "add" assert claude_model_text.token_usage > 0 ================================================ FILE: test/agents/backend_models/test_gemini_model.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import pytest from crab import MessageType, action from crab.agents.backend_models import BackendModelConfig, create_backend_model # TODO: Add mock data @pytest.fixture def gemini_model_text(): return create_backend_model( BackendModelConfig( model_class="gemini", model_name="gemini-1.5-pro-latest", parameters={"max_tokens": 3000}, history_messages_len=1, tool_call_required=False, ) ) @action def add(a: int, b: int): """Add up two integers. Args: a: An addend b: Another addend """ return a + b @pytest.mark.skip(reason="Mock data to be added") def test_text_chat(gemini_model_text): message = ("Hello!", MessageType.TEXT) output = gemini_model_text.chat(message) assert output.message assert output.action_list is None # assert gemini_model_text.token_usage > 0 # Send another message to check accumulated tokens and history length message2 = ("Give me five!", MessageType.TEXT) output = gemini_model_text.chat(message2) # assert gemini_model_text.token_usage > 0 assert output.message assert len(gemini_model_text.chat_history) == 2 # Send another message to check accumulated tokens and chat history output = gemini_model_text.chat(message2) assert output.message assert len(gemini_model_text.chat_history) == 3 @pytest.mark.skip(reason="Mock data to be added") def test_action_chat(gemini_model_text): gemini_model_text.reset("You are a helpful assistant.", [add]) message = ( ( "I had 10 dollars. Miss Polaris gave me 15 dollars. " "How many money do I have now." ), 0, ) output = gemini_model_text.chat(message) assert output.message is None assert len(output.action_list) == 1 assert output.action_list[0].arguments == {"a": 10, "b": 15} assert output.action_list[0].name == "add" ================================================ FILE: test/agents/backend_models/test_openai_model.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import os from unittest.mock import MagicMock, patch import pytest from openai.types.chat.chat_completion_message_tool_call import Function from crab import action from crab.agents.backend_models import BackendModelConfig, create_backend_model from crab.agents.backend_models.openai_model import MessageType # Mock data for the OpenAI API response openai_mock_response = MagicMock( choices=[ MagicMock( finish_reason="stop", index=0, logprobs=None, message=MagicMock( content="Hi there! How can I assist you today?", role="assistant", function_call=None, tool_calls=None, ), ) ], model="gpt-4o-2024-05-13", object="chat.completion", usage=MagicMock(completion_tokens=10, prompt_tokens=19, total_tokens=29), ) openai_mock_response2 = MagicMock( choices=[ MagicMock( finish_reason="stop", index=0, logprobs=None, message=MagicMock( content="Sure thing! ✋ How can I help you today?", role="assistant", function_call=None, tool_calls=None, ), ) ], model="gpt-4o-2024-05-13", object="chat.completion", usage=MagicMock(completion_tokens=12, prompt_tokens=41, total_tokens=53), ) openai_mock_response3 = MagicMock( choices=[ MagicMock( finish_reason="stop", index=0, logprobs=None, message=MagicMock( content=None, role="assistant", function_call=None, tool_calls=[ MagicMock( id="call_ceE9IX1uYeRqGShYYlHYrCCF", function=Function(arguments='{"a":10,"b":15}', name="add"), type="function", ) ], ), ) ], model="gpt-4o-2024-05-13", object="chat.completion", usage=MagicMock(completion_tokens=15, prompt_tokens=93, total_tokens=108), ) @pytest.fixture def openai_model_text(): os.environ["OPENAI_API_KEY"] = "MOCK" return create_backend_model( BackendModelConfig( model_class="openai", model_name="gpt-4o", parameters={"max_tokens": 3000}, history_messages_len=1, tool_call_required=False, ) ) @action def add(a: int, b: int): """Add up two integers. Args: a: An addend b: Another addend """ return a + b @patch( "openai.resources.chat.completions.Completions.create", return_value=openai_mock_response, ) def test_text_chat(mock_create, openai_model_text): message = ("Hello!", MessageType.TEXT) output = openai_model_text.chat(message) assert len(mock_create.call_args.kwargs["messages"]) == 2 assert output.message == "Hi there! How can I assist you today?" assert output.action_list is None assert openai_model_text.token_usage == 29 # Send another message to check accumulated tokens and history length message2 = ("Give me five!", MessageType.TEXT) mock_create.return_value = openai_mock_response2 output = openai_model_text.chat(message2) assert len(mock_create.call_args.kwargs["messages"]) == 4 assert openai_model_text.token_usage == 29 + 53 assert output.message == "Sure thing! ✋ How can I help you today?" assert len(openai_model_text.chat_history) == 2 # Send another message to check accumulated tokens and chat history output = openai_model_text.chat(message2) assert len(mock_create.call_args.kwargs["messages"]) == 4 assert openai_model_text.token_usage == 29 + 53 + 53 assert output.message == "Sure thing! ✋ How can I help you today?" assert len(openai_model_text.chat_history) == 3 @patch( "openai.resources.chat.completions.Completions.create", return_value=openai_mock_response3, ) def test_action_chat(mock_create, openai_model_text): openai_model_text.reset("You are a helpful assistant.", [add]) message = ( ( "I had 10 dollars. Miss Polaris gave me 15 dollars. " "How many money do I have now." ), 0, ) output = openai_model_text.chat(message) assert output.message is None assert len(output.action_list) == 1 assert output.action_list[0].arguments == {"a": 10, "b": 15} assert output.action_list[0].name == "add" assert openai_model_text.token_usage == 108 ================================================ FILE: test/agents/policies/test_multi_agent_by_func.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import pytest from crab import create_benchmark from crab.agents.backend_models import BackendModelConfig from crab.agents.policies.multi_agent_by_func import MultiAgentByFuncPolicy from crab.benchmarks.template import multienv_template_benchmark_config @pytest.fixture def policy_fixture(): model = BackendModelConfig( model_class="openai", model_name="gpt-4o", parameters={"max_tokens": 3000}, history_messages_len=1, ) benchmark_config = multienv_template_benchmark_config benchmark = create_benchmark(benchmark_config) task, action_spaces = benchmark.start_task("0") policy = MultiAgentByFuncPolicy( main_agent_model_backend=model, tool_agent_model_backend=model, ) policy.reset( task_description=task.description, action_spaces=action_spaces, env_descriptions=benchmark.get_env_descriptions(), ) return policy, benchmark @pytest.mark.skip(reason="Mock data to be added") def test_policy(policy_fixture): policy, benchmark = policy_fixture observations = benchmark.observe() agent_observation = {} for env in observations: if env == "root": continue agent_observation[env] = [ ( f'The current state of "{env}" is ' + str(observations[env]["current_state"]) + ". ", 0, ) ] action_list = policy.chat(agent_observation) assert action_list ================================================ FILE: test/agents/policies/test_mutli_agent_by_env.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import pytest from crab import create_benchmark from crab.agents.backend_models import BackendModelConfig from crab.agents.policies.multi_agent_by_env import MultiAgentByEnvPolicy from crab.benchmarks.template import multienv_template_benchmark_config @pytest.fixture def policy_fixture(): model = BackendModelConfig( model_class="openai", model_name="gpt-4o", parameters={"max_tokens": 3000}, history_messages_len=1, ) benchmark_config = multienv_template_benchmark_config benchmark = create_benchmark(benchmark_config) task, action_spaces = benchmark.start_task("0") policy = MultiAgentByEnvPolicy( main_agent_model_backend=model, env_agent_model_backend=model, ) policy.reset( task_description=task.description, action_spaces=action_spaces, env_descriptions=benchmark.get_env_descriptions(), ) return policy, benchmark @pytest.mark.skip(reason="Mock data to be added") def test_policy(policy_fixture): policy, benchmark = policy_fixture observations = benchmark.observe() agent_observation = {} for env in observations: if env == "root": continue agent_observation[env] = [ ( f'The current state of "{env}" is ' + str(observations[env]["current_state"]) + ". ", 0, ) ] action_list = policy.chat(agent_observation) assert action_list ================================================ FILE: test/agents/policies/test_single_agent.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import os from unittest.mock import MagicMock, patch import pytest from openai.types.chat.chat_completion import ( ChatCompletionMessage, Choice, CompletionUsage, ) from openai.types.chat.chat_completion_message_tool_call import ( ChatCompletionMessageToolCall, Function, ) from crab import create_benchmark from crab.agents.backend_models import BackendModelConfig from crab.agents.policies.single_agent import SingleAgentPolicy from crab.benchmarks.template import multienv_template_benchmark_config openai_mock_response = MagicMock( choices=[ Choice( finish_reason="stop", index=0, logprobs=None, message=ChatCompletionMessage( content=None, role="assistant", function_call=None, tool_calls=[ ChatCompletionMessageToolCall( id="call_3YIJZhrC5smSjAJKOeFcQxRf", function=Function( arguments='{"value": true}', name="set_state__in__testenv0" ), type="function", ), ChatCompletionMessageToolCall( id="call_mA9Z9HQfmYn2TbzeGsEVcCr7", function=Function( arguments='{"value": true}', name="set_state__in__testenv1" ), type="function", ), ChatCompletionMessageToolCall( id="call_GgxbBTd6afj2iDyOewaNattB", function=Function( arguments='{"value": true}', name="set_state__in__testenv2" ), type="function", ), ], ), ) ], model="gpt-4o-2024-05-13", object="chat.completion", usage=CompletionUsage(completion_tokens=74, prompt_tokens=648, total_tokens=722), ) @pytest.fixture def policy_fixture(): os.environ["OPENAI_API_KEY"] = "MOCK" model = BackendModelConfig( model_class="openai", model_name="gpt-4o", parameters={"max_tokens": 3000}, history_messages_len=1, ) benchmark_config = multienv_template_benchmark_config benchmark = create_benchmark(benchmark_config) task, action_spaces = benchmark.start_task("0") policy = SingleAgentPolicy(model_backend=model) policy.reset( task_description=task.description, action_spaces=action_spaces, env_descriptions=benchmark.get_env_descriptions(), ) return policy, benchmark @patch( "openai.resources.chat.completions.Completions.create", return_value=openai_mock_response, ) def test_policy(mock_create: MagicMock, policy_fixture): policy, benchmark = policy_fixture observation = benchmark.observe() for env in observation: if env == "root": continue observation[env] = [ ( 'The current state of "{env}" is ' + str(observation[env]["current_state"]) + ". ", 0, ) ] action_list = policy.chat(observation) mock_create.assert_called_once() assert action_list ================================================ FILE: test/core/test_action.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== from crab.core import Action, action from crab.core.models.action import _check_no_param @action def dummy_function(a: int, b: str = "default") -> int: """ This is a test function. Args: a (int): The first parameter. b (str, optional): The second parameter. Defaults to "default". Returns: int: The result. """ return a + 1 @action def dummy_env_action(a: int, env: int) -> int: """ This is a kept parameter test function. Args: a (int): The first parameter. env (int): The current environemnt. Should not be appeared in the parameters. Returns: int: The result. """ return a + env def test_action_to_openai_json_schema(): result = dummy_function.to_openai_json_schema() assert result["name"] assert result["description"] assert result["parameters"] parameters = result["parameters"] assert "properties" in parameters assert "a" in parameters["properties"] assert parameters["properties"]["a"]["type"] == "integer" assert "b" in parameters["properties"] assert parameters["properties"]["b"]["type"] == "string" assert parameters["properties"]["b"]["default"] == "default" assert "required" in parameters assert "a" in parameters["required"] def test_from_function(): action_instance: Action = dummy_function assert action_instance.description == "This is a test function." assert action_instance.name == "dummy_function" assert "a" in action_instance.parameters.model_fields assert "b" in action_instance.parameters.model_fields assert action_instance.name == "dummy_function" def test_chaining(): dummy_x2 = dummy_function >> dummy_function assert dummy_x2.entry(1) == 3 @action def add_a_to_b(a: int, b: int = 1) -> int: return a + b @action def multiply_a_to_b(a: int, b: int = 1) -> int: return a * b def test_closed_action(): action = add_a_to_b(5) assert action.entry() == 6 assert _check_no_param(action) def test_kwargs_action(): action = add_a_to_b(b=6) assert action.entry(1) == 7 def test_chain_various_actions(): action = add_a_to_b(b=10) >> multiply_a_to_b(b=10) >> add_a_to_b() assert action.entry(0) == 101 action = add_a_to_b(a=1, b=10) >> multiply_a_to_b(b=10) >> add_a_to_b() assert action.entry() == 111 action = add_a_to_b(1, b=10) >> multiply_a_to_b(b=10) >> add_a_to_b() assert action.entry() == 111 def test_kept_param(): action = dummy_env_action.set_kept_param(env=10) assert action.run(a=10) == 20 ================================================ FILE: test/core/test_benchmark.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import pytest from fastapi.testclient import TestClient from crab import Benchmark, action, create_benchmark from crab.benchmarks.template import ( multienv_template_benchmark_config, template_benchmark_config, template_environment_config, ) from crab.server.main import init @pytest.fixture def benchmark(request): if request.param == "multienv": yield create_benchmark(multienv_template_benchmark_config) elif request.param == "multienv-remote": # TODO: fix multienv remote use the same env in different remote envs app0 = init(environment_config=template_environment_config) client0 = TestClient(app0) app1 = init(environment_config=template_environment_config) client1 = TestClient(app1) app2 = init(environment_config=template_environment_config) client2 = TestClient(app2) proxy_config = multienv_template_benchmark_config.model_copy() for env in proxy_config.environments: env.remote_url = "http://127.0.0.1:8000" benchmark = create_benchmark(proxy_config) benchmark.environment_map["testenv0"]._client = client0 benchmark.environment_map["testenv1"]._client = client1 benchmark.environment_map["testenv2"]._client = client2 yield benchmark elif request.param == "singleenv": yield create_benchmark(template_benchmark_config) @pytest.mark.parametrize("benchmark", ["multienv", "multienv-remote"], indirect=True) def test_multi_env_benchmark_process(benchmark: Benchmark): assert benchmark.multienv task, actions = benchmark.start_task(task_id="0") assert benchmark.current_task == task assert len(actions) == 4 assert len(actions["root"]) == 1 assert actions["root"][0].name == "_submit" result = benchmark.step( action="set_state", parameters={"value": True}, env_name="testenv0" ) assert result.evaluation_results["completeness"] == 0.25 result = benchmark.step( action="set_state", parameters={"value": True}, env_name="testenv1" ) assert result.evaluation_results["completeness"] == 0.5 result = benchmark.step( action="set_state", parameters={"value": True}, env_name="testenv2" ) assert result.evaluation_results["completeness"] == 0.75 result = benchmark.step( action="_submit", parameters={"content": True}, env_name="root" ) assert result.terminated assert result.evaluation_results["completeness"] == 1.0 @action def to_str(input: bool) -> str: return f"The current state is {input}" @pytest.mark.parametrize("benchmark", ["singleenv"], indirect=True) def test_prompting_tool(benchmark: Benchmark): benchmark.prompting_tools = {"template_env": {"current_state": to_str}} benchmark.start_task("0") observe, prompt = benchmark.observe_with_prompt() assert observe["template_env"]["current_state"] is False assert prompt["template_env"]["current_state"] == "The current state is False" benchmark.close_task() ================================================ FILE: test/core/test_evaluator.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import networkx as nx import pytest from crab.core import Environment, Evaluator, GraphEvaluator, evaluator a = None def set_a(value: int) -> None: global a a = value @evaluator def dummy_evaluator1() -> bool: """ This is a test evaluator. Args: a (int): The first parameter. Returns: bool: The result. """ return a > 0 @evaluator def dummy_evaluator2() -> bool: """ This is a test evaluator. Args: a (int): The first parameter. b (str, optional): The second parameter. Defaults to "default". Returns: bool: The result. """ return a < 2 @evaluator def dummy_evaluator3() -> bool: """ This is a test evaluator. Args: a (int): The first parameter. b (str, optional): The second parameter. Defaults to "default". Returns: bool: The result. """ return a > 100 @evaluator def no_param_evaluator() -> bool: return True @pytest.fixture def root_env() -> Environment: return Environment( name="root", action_space=[], observation_space=[], description="The crab root server", ) def test_evaluator_run(): assert isinstance(dummy_evaluator1, Evaluator) set_a(3) assert dummy_evaluator1.entry() set_a(-1) assert not dummy_evaluator1.entry() def test_evaluator_and(): set_a(1) assert (dummy_evaluator1 & dummy_evaluator2).entry() set_a(-1) assert not (dummy_evaluator1 & dummy_evaluator2).entry() set_a(3) assert not (dummy_evaluator1 & dummy_evaluator2).entry() def test_evaluator_or(): set_a(1) assert (dummy_evaluator1 | dummy_evaluator2).entry() set_a(-1) assert (dummy_evaluator1 | dummy_evaluator2).entry() set_a(3) assert (dummy_evaluator1 | dummy_evaluator2).entry() def test_evaluator_not(): set_a(3) assert not (~dummy_evaluator1).entry() set_a(-1) assert (~dummy_evaluator1).entry() def test_chain_evaluator(root_env): graph_evaluator = GraphEvaluator( nx.path_graph( [dummy_evaluator1, dummy_evaluator2, no_param_evaluator], create_using=nx.DiGraph, ) ) graph_evaluator.reset() assert graph_evaluator.count == 0 assert graph_evaluator.G.nodes[dummy_evaluator1]["remaining_predecessors"] == 0 assert graph_evaluator.G.nodes[dummy_evaluator2]["remaining_predecessors"] == 1 assert graph_evaluator.G.nodes[no_param_evaluator]["remaining_predecessors"] == 1 set_a(3) graph_evaluator.step({"root": root_env}) assert graph_evaluator.count == 1 assert graph_evaluator.G.nodes[dummy_evaluator1]["passing_count"] == 0 assert graph_evaluator.G.nodes[dummy_evaluator2]["remaining_predecessors"] == 0 set_a(3) graph_evaluator.step({"root": root_env}) assert graph_evaluator.count == 2 assert graph_evaluator.G.nodes[dummy_evaluator2]["remaining_predecessors"] == 0 assert graph_evaluator.G.nodes[dummy_evaluator2]["passing_count"] is None set_a(-1) graph_evaluator.step({"root": root_env}) assert graph_evaluator.count == 3 assert graph_evaluator.G.nodes[dummy_evaluator2]["passing_count"] == 2 assert graph_evaluator.G.nodes[no_param_evaluator]["remaining_predecessors"] == 0 ================================================ FILE: test/core/test_utils.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import os from crab.utils import decrypt_message, encrypt_message def test_encrypt_decrypt(): message = "Hello, World!" key = os.urandom(32) encrypted_message = encrypt_message(message, key) decrypted_message = decrypt_message(encrypted_message, key) assert decrypted_message == message ================================================ FILE: test/server/test_api.py ================================================ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # Licensed under the Apache License, Version 2.0 (the “License”); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an “AS IS” BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== import pytest from fastapi.testclient import TestClient from crab import create_environment from crab.environments.template import ( current_state, set_state, template_environment_config, ) from crab.server.main import init @pytest.fixture def mock_env(): mock_app = init(template_environment_config) mock_cli = TestClient(mock_app) mock_env = create_environment(template_environment_config) mock_env._client = mock_cli return mock_env def test_raw_action_unencrypted(mock_env): assert mock_env._action_endpoint(set_state, {"value": True}) is None assert mock_env._action_endpoint(current_state, {}) is True assert mock_env._action_endpoint(set_state(True), {}) is None assert mock_env._action_endpoint(current_state >> set_state, {}) is None assert mock_env._action_endpoint(set_state(True) + current_state, {}) is True def test_raw_action_encrypted(mock_env, monkeypatch): monkeypatch.setenv("ENCRYPTION_KEY", "the-cake-is-a-lie") assert mock_env._action_endpoint(set_state, {"value": True}) is None assert mock_env._action_endpoint(current_state, {}) is True assert mock_env._action_endpoint(set_state(True), {}) is None assert mock_env._action_endpoint(current_state >> set_state, {}) is None assert mock_env._action_endpoint(set_state(True) + current_state, {}) is True