Repository: camel-ai/crab
Branch: main
Commit: a8b6d7272385
Files: 230
Total size: 526.1 KB

Directory structure:
gitextract_jurvigyb/

├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug_report.yml
│   │   ├── feature_request.yml
│   │   └── questions.yml
│   ├── actions/
│   │   └── crab_install/
│   │       └── action.yml
│   └── workflows/
│       ├── documentation.yml
│       ├── publish_release.yml
│       └── pytest_package.yml
├── .gitignore
├── .pre-commit-config.yaml
├── README.md
├── crab/
│   ├── __init__.py
│   ├── actions/
│   │   ├── android_actions.py
│   │   ├── crab_actions.py
│   │   ├── desktop_actions.py
│   │   ├── file_actions.py
│   │   ├── system_actions.py
│   │   └── visual_prompt_actions.py
│   ├── agents/
│   │   ├── backend_models/
│   │   │   ├── __init__.py
│   │   │   ├── camel_model.py
│   │   │   ├── claude_model.py
│   │   │   ├── gemini_model.py
│   │   │   └── openai_model.py
│   │   ├── policies/
│   │   │   ├── __init__.py
│   │   │   ├── multi_agent_by_env.py
│   │   │   ├── multi_agent_by_func.py
│   │   │   └── single_agent.py
│   │   └── utils.py
│   ├── benchmarks/
│   │   ├── __init__.py
│   │   └── template.py
│   ├── core/
│   │   ├── __init__.py
│   │   ├── agent_policy.py
│   │   ├── backend_model.py
│   │   ├── benchmark.py
│   │   ├── csv_log.py
│   │   ├── decorators.py
│   │   ├── environment.py
│   │   ├── exceptions.py
│   │   ├── experiment.py
│   │   ├── graph_evaluator.py
│   │   ├── models/
│   │   │   ├── __init__.py
│   │   │   ├── action.py
│   │   │   ├── agent_interface.py
│   │   │   ├── benchmark_interface.py
│   │   │   ├── config.py
│   │   │   ├── evaluator.py
│   │   │   └── task.py
│   │   └── task_generator.py
│   ├── environments/
│   │   ├── __init__.py
│   │   └── template.py
│   ├── server/
│   │   ├── __init__.py
│   │   ├── api.py
│   │   ├── config.py
│   │   ├── exception_handlers.py
│   │   ├── logger.py
│   │   ├── main.py
│   │   ├── middleware.py
│   │   └── utils.py
│   └── utils/
│       ├── __init__.py
│       ├── common.py
│       ├── encryption.py
│       └── measure.py
├── crab-benchmark-v0/
│   ├── README.md
│   ├── __init__.py
│   ├── android_env.py
│   ├── dataset/
│   │   ├── android/
│   │   │   ├── 1005c437-50d1-465a-b3fc-833098b22bfc.json
│   │   │   ├── 12333aa0-e76d-4a5c-8657-9f897f62f62d.json
│   │   │   ├── 22b04776-8eec-4303-b3f6-9c981f7f29b8.json
│   │   │   ├── 2ade6a13-c7a6-4df7-8c62-77382687369e.json
│   │   │   ├── 346caf7c-dc74-4c38-962a-aaffb638e0c7.json
│   │   │   ├── 379b9c58-5125-41b3-9cc6-ea925c8b094d.json
│   │   │   ├── 4190c90c-b28c-4bb3-ab5c-af3c4fde0a3d.json
│   │   │   ├── 46d7ccdb-d2e4-4b8a-bead-f2641b5ac23c.json
│   │   │   ├── 483fbf9c-dc78-4ac2-9264-53c4f617f6cc.json
│   │   │   ├── 4893a9b0-6477-495d-a73c-32503326e24a.json
│   │   │   ├── 53010c40-dce4-4d72-a856-842c21059e2b.json
│   │   │   ├── 6d9f6395-de79-4ad0-8a2a-2d674f93f293.json
│   │   │   ├── 71ef7fd2-0ae3-49c8-8238-06b7aa985d25.json
│   │   │   ├── 73f78fc3-1ca5-442d-801f-bc175a0bfb89.json
│   │   │   ├── 764838cc-9359-4130-9bb2-4a75900b2d89.json
│   │   │   ├── 77289141-e52b-48c8-b3a7-1b29520f3e1e.json
│   │   │   ├── 7891ceab-7965-4ddb-a0fc-15740c9a4e44.json
│   │   │   ├── 8bd51440-f959-4edc-baa5-cd03d32a5b0f.json
│   │   │   ├── 94b1836b-3111-40ad-8d07-b8a57efe7438.json
│   │   │   ├── a225f7f8-6d03-4619-b57d-7a08610030d8.json
│   │   │   ├── b077299d-1acb-40f5-89f3-cc08044345bf.json
│   │   │   ├── b3965b07-4683-4445-9de1-a1dedf6c73ad.json
│   │   │   ├── c1b1cfeb-40e7-49a8-a3f5-b8c8ba723601.json
│   │   │   ├── c85f03c9-83c4-417b-93d9-0d7b41022525.json
│   │   │   ├── cf4c496b-fbbd-4701-91ea-4590fe6a66e1.json
│   │   │   ├── d0811e47-d75f-40ce-b34b-e1ee3c8bed3f.json
│   │   │   ├── d2d456bb-c7d1-46af-8263-78d8509fb320.json
│   │   │   ├── d4e0f2b3-d0ff-4efd-856f-9f5e598cfd05.json
│   │   │   ├── d7489d00-0046-4fb1-af5b-1fde7d87312c.json
│   │   │   ├── d92f6c33-e0a7-4101-957d-e7dd218d2565.json
│   │   │   ├── de843952-df8f-4a26-bae9-d0a32ed9a7f5.json
│   │   │   ├── e20fd121-b981-42da-94de-efcd66889c11.json
│   │   │   ├── e55d7a39-7b6b-4852-8711-844cebc88cb8.json
│   │   │   ├── e9268070-91b7-4e8c-9976-1cf8126ba13b.json
│   │   │   ├── fbe6a1b1-63bb-4d4e-8a53-ff4f7839ef61.json
│   │   │   └── fc642cb6-5321-4966-afbf-fb3348bb69ee.json
│   │   ├── android_subtasks.py
│   │   ├── cross/
│   │   │   ├── 05a7633d-b966-471c-8848-e18e69ad265f.json
│   │   │   ├── 1e92db38-501e-429b-ac31-453d1af10a25.json
│   │   │   ├── 43be6e8e-034d-4277-8346-c4ae7553bf68.json
│   │   │   ├── 534be964-269a-4509-b2b8-28cc3ba8dfca.json
│   │   │   ├── 6f95cfa1-e7ae-4a82-912b-0180fc9622f2.json
│   │   │   ├── 760ed27e-b1bd-451f-8659-bdb9845fcb7f.json
│   │   │   ├── 82596760-7d4d-457d-9ca9-9551ab85ec58.json
│   │   │   ├── a956a091-8de4-42ee-b152-913308dfc24b.json
│   │   │   ├── c5929ef3-ac27-4288-b02f-4f261d5871f9.json
│   │   │   └── da5911e3-1a99-4735-ba3e-f08c5ca81fdd.json
│   │   ├── handmade_tasks.py
│   │   ├── ubuntu/
│   │   │   ├── 05d0e137-7d97-4021-9477-6490a2154c81.json
│   │   │   ├── 0a893c2e-eec5-47cc-a930-eb01c5f17683.json
│   │   │   ├── 0d178388-8166-4b66-93c1-278861f9897c.json
│   │   │   ├── 0d7c84d2-bbbd-46ab-80d1-52b3a44f3858.json
│   │   │   ├── 0deafe05-8db5-445f-9031-f6e884569d03.json
│   │   │   ├── 0e80fd90-0b23-454f-a629-7b6d7baa7542.json
│   │   │   ├── 125f7bae-e931-4190-8737-5f1ea7227772.json
│   │   │   ├── 15a150a8-899c-4753-8dc5-05248ccc3640.json
│   │   │   ├── 1ebcd710-f73b-4022-832b-167c0d3f55a2.json
│   │   │   ├── 22787ecc-52b2-4791-aefb-c45800f51414.json
│   │   │   ├── 22f05f6f-6aef-4786-958f-14f559eaf014.json
│   │   │   ├── 28963795-d694-4bb4-adaf-f7708a2c6fe5.json
│   │   │   ├── 299db8f2-81eb-455f-9302-5c8cb30be691.json
│   │   │   ├── 29f099b2-b3a5-463f-b10a-15363bf7e845.json
│   │   │   ├── 355e9660-a355-4b95-8881-ac9da578ea43.json
│   │   │   ├── 35bd7387-4735-4632-8474-e93382004c12.json
│   │   │   ├── 362c5711-3824-42ff-96a0-7801b03b5f1f.json
│   │   │   ├── 4718df9c-97ec-4b54-86ca-bd34e65c5a43.json
│   │   │   ├── 47b75b21-99a2-461c-9d40-6dddc5c206d0.json
│   │   │   ├── 4ae4e35f-d90a-48cc-8fb9-492ac7ae07ee.json
│   │   │   ├── 4bbedade-4d4e-43d5-b650-2702b350ad28.json
│   │   │   ├── 51a288f9-cf2c-4e8e-a98c-596a505af77c.json
│   │   │   ├── 51c91051-3efb-4e92-a967-739b18520714.json
│   │   │   ├── 57b7e8a7-8c17-4cc4-9bb5-4385afde3ad8.json
│   │   │   ├── 58776443-ccf7-4db3-8c60-e188e4b5f90c.json
│   │   │   ├── 5ba74c6a-4513-448b-8b68-ff145ece0652.json
│   │   │   ├── 6428f803-62de-40d2-a345-64e6cf955c9d.json
│   │   │   ├── 64a2c205-c85a-4e56-8edb-5df4f7724441.json
│   │   │   ├── 696ca9bb-89ea-4cd5-b693-f2d749d964b1.json
│   │   │   ├── 6be49e77-e904-4eb0-a36a-7f0fd128ede3.json
│   │   │   ├── 6c3105a2-328c-4190-823d-03d759be0b57.json
│   │   │   ├── 6c560516-ca14-4f97-b51d-16ad81fc29e4.json
│   │   │   ├── 730172f5-894a-4d46-9102-ac7d985a479d.json
│   │   │   ├── 73038efb-ca0f-4d90-a947-fcfd097dd91b.json
│   │   │   ├── 73da97c9-f084-4cab-8697-1151737387ff.json
│   │   │   ├── 77aa4dd3-5a68-4686-9cac-26d0ab77c7b4.json
│   │   │   ├── 78502f1c-879b-4932-a5fd-d85f7f6b0f81.json
│   │   │   ├── 7912f7a5-24b9-4dfe-a7b8-1effc1b7a212.json
│   │   │   ├── 7d5613ec-9b67-4255-b766-d9c6e8466464.json
│   │   │   ├── 7dda7e46-78be-4663-b882-6132dbbff335.json
│   │   │   ├── 7e6c4927-2220-4522-9e3f-36f69adc3e71.json
│   │   │   ├── 82c49e12-3b2f-432e-9069-4b67bafebbf7.json
│   │   │   ├── 87910f23-ab23-4ccc-b115-d71cff6f0162.json
│   │   │   ├── 8afc25eb-7a80-459f-acdc-5c79fc146c29.json
│   │   │   ├── 8cb5ab6d-a56e-43b9-aa83-00a46331e20f.json
│   │   │   ├── 90e09946-7b28-4102-b0ed-f683c01dbbd4.json
│   │   │   ├── 925a3607-2802-48aa-b339-13ebfcef43a2.json
│   │   │   ├── 9506dd30-f58d-4832-b336-8037e83e2689.json
│   │   │   ├── 95e347aa-56ab-4d5d-a94c-350ddfddabf9.json
│   │   │   ├── 98a360d8-0f95-44cd-bb9d-442fca2918d4.json
│   │   │   ├── 9c979fc5-8d60-41f1-a494-904a1d312187.json
│   │   │   ├── 9e08971c-7f83-4853-952e-4c4a4a26333b.json
│   │   │   ├── 9fe4f541-61cf-48e0-a081-4371786659c7.json
│   │   │   ├── a0714ef7-bbdc-4f84-bd2e-c6e611d4db9e.json
│   │   │   ├── a2a34580-cded-4bf8-81d9-b36a4d4402d0.json
│   │   │   ├── a6b67c2d-d448-4e77-904e-dc7c5f21a5fe.json
│   │   │   ├── a70ab903-835f-48b7-8356-2321b8b869d8.json
│   │   │   ├── a78177f5-6cc6-48d7-8c6f-df53399d7759.json
│   │   │   ├── abb16512-27ae-49c0-b12b-7fbf0e95056b.json
│   │   │   ├── b2ca21dc-dde9-49f5-bec7-321fbf769315.json
│   │   │   ├── b57c96c1-071b-40f6-b33b-2a0459fc25bb.json
│   │   │   ├── b73019e0-3ce8-4657-8b13-b3e0ab6cfac8.json
│   │   │   ├── ba5aebcb-999d-44d4-b9bc-241f9884c6dd.json
│   │   │   ├── be6468be-2218-45c1-9b75-b56efec61eb4.json
│   │   │   ├── c4106f9a-9348-4a55-9892-782e6f4b3081.json
│   │   │   ├── c8800e50-3ff4-4dd2-bc90-33688be99659.json
│   │   │   ├── ccf31785-ec13-4981-93c5-ca6c242ac0c3.json
│   │   │   ├── d3478489-70f2-4a82-b7d2-0a47b75986eb.json
│   │   │   ├── d39d40b1-fc26-4169-9d6f-cdf81efe9a3e.json
│   │   │   ├── d3c917ff-406f-447a-87f5-b8d835cba750.json
│   │   │   ├── d6e460e4-c295-40ad-883c-11300d7832f0.json
│   │   │   ├── d9e4e23c-2a2a-4b5c-b034-7deb6036572d.json
│   │   │   ├── e31d4e3b-b753-4deb-b9ad-a0add5d4790e.json
│   │   │   ├── f07a1f32-2f3f-40e7-b12f-8f1b128c41f6.json
│   │   │   ├── f5cce3a0-ba65-4317-95f8-1fc7d9776c78.json
│   │   │   ├── f67a26e4-58dd-4dc6-8859-affbf1d62f94.json
│   │   │   └── f96d7c34-9543-4679-a6ea-89e0c2ef7b1c.json
│   │   └── ubuntu_subtasks.py
│   ├── main.py
│   ├── scripts/
│   │   └── ubuntu_env_init.sh
│   └── ubuntu_env.py
├── docs/
│   ├── Makefile
│   ├── conf.py
│   ├── crab.benchmarks.rst
│   ├── crab.client.rst
│   ├── crab.core.models.rst
│   ├── crab.core.rst
│   ├── crab.environments.rst
│   ├── crab.rst
│   ├── crab.server.controller.rst
│   ├── crab.server.rst
│   ├── crab_benchmark_v0/
│   │   ├── environment_gcp_setup.md
│   │   ├── environment_local_setup.md
│   │   └── get_started.md
│   ├── get_started/
│   │   ├── build_your_own_benchmark.md
│   │   └── quickstart.md
│   ├── index.rst
│   ├── make.bat
│   └── modules.rst
├── examples/
│   ├── multi_env.py
│   └── single_env.py
├── licenses/
│   ├── LICENSE
│   ├── license_template.txt
│   └── update_license.py
├── pyproject.toml
└── test/
    ├── actions/
    │   └── test_visual_prompt_actions.py
    ├── agents/
    │   ├── backend_models/
    │   │   ├── test_camel_model.py
    │   │   ├── test_claude_model.py
    │   │   ├── test_gemini_model.py
    │   │   └── test_openai_model.py
    │   └── policies/
    │       ├── test_multi_agent_by_func.py
    │       ├── test_mutli_agent_by_env.py
    │       └── test_single_agent.py
    ├── core/
    │   ├── test_action.py
    │   ├── test_benchmark.py
    │   ├── test_evaluator.py
    │   └── test_utils.py
    └── server/
        └── test_api.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.yml
================================================
name: 🐛 Bug Report
description: File an issue about a bug.
title: "[BUG] "
labels: [bug]
assignees: [dandansamax]
body:
  - type: markdown
    attributes:
      value: |
        Please do your best to make the issue as easy to act on as possible, and only submit here if there is clearly a problem with camel (ask in [Discussions](https://github.com/camel-ai/camel/discussions) first if unsure).

  - type: input
    id: version
    attributes:
      label: What version of camel are you using?
      description: Run command `python3 -c 'print(__import__("camel").__version__)'` in your shell and paste the output here.
      placeholder: E.g., 0.1.0
    validations:
      required: true

  - type: textarea
    id: system-info
    attributes:
      label: System information
      description: |
        Describe the characteristic of your environment:

        - Describe how the library was installed (pip, conda, source, ...)
        - Python version
        - Versions of any other relevant libraries

        ```python
        import sys, camel
        print(sys.version, sys.platform)
        print(camel.__version__)
        ```
    validations:
      required: true

  - type: textarea
    id: description
    attributes:
      label: Problem description
      description: >-
        Provide a short description, state the expected behavior and what actually happens. Include
        relevant information like what version of camel you are using, what system you are on,
        and any useful commands / output.
    validations:
      required: true

  - type: textarea
    id: code
    attributes:
      label: Reproducible example code
      description: >-
        The code should be minimal, have minimal external dependencies, and isolate the functions
        that cause breakage. Submit matched and complete snippets that can be easily run to diagnose
        the issue.
      value: |
        The Python snippets:

        ```python

        ```

        Command lines:

        ```bash

        ```

        Extra dependencies:

        ```text

        ```

        Steps to reproduce:

        1.
        2.
        3.
    validations:
      required: true

  - type: textarea
    id: traceback
    attributes:
      label: Traceback
      description: Put the Python traceback information here.
      placeholder: |
        Traceback (most recent call last):
          File ...
      render: pytb

  - type: textarea
    id: expected
    attributes:
      label: Expected behavior
      description: Provide a clear and concise description of what you expected to happen.

  - type: textarea
    id: additional-context
    attributes:
      label: Additional context
      description: >-
        Add any other context about the problem here. Screenshots may also be helpful.

        If you know or suspect the reason for this bug, paste the code lines and suggest modifications.


================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.yml
================================================
name: ✨ Feature Request
description: Suggest an idea for this project.
title: "[Feature Request] "
labels: [enhancement]
assignees: [dandansamax]
body:
  - type: checkboxes
    id: steps
    attributes:
      label: Required prerequisites
      description: Make sure you've completed the following steps before submitting your issue -- thank you!
      options:
        - label: I have searched the [Issue Tracker](https://github.com/camel-ai/crab/issues) that this hasn't already been reported. (+1 or comment there if it has.)
          required: true

  - type: textarea
    id: motivation
    attributes:
      label: Motivation
      description: Outline the motivation for the proposal.
      value: |
        <!-- Please outline the motivation for the proposal.
        Is your feature request related to a problem? E.g., "I'm always frustrated when [...]".
        If this is related to another issue, please link here too. -->
    validations:
      required: true

  - type: textarea
    id: solution
    attributes:
      label: Solution
      description: Provide a clear and concise description of what you want to happen.

  - type: textarea
    id: additional-context
    attributes:
      label: Additional context
      description: Add any other context about the problem here. Screenshots may also be helpful.


================================================
FILE: .github/ISSUE_TEMPLATE/questions.yml
================================================
name: 🤔 Questions / Help / Support
description: Do you need support?
title: "[Question] "
labels: [question]
assignees: [dandansamax]
body:
  - type: checkboxes
    id: steps
    attributes:
      label: Required prerequisites
      description: Make sure you've completed the following steps before submitting your issue -- thank you!
      options:
        # - label: I have read the documentation <https://camel-ai.github.io/camel/camel.html>.
        #   required: true
        - label: I have searched the [Issue Tracker](https://github.com/camel-ai/crab/issues) that this hasn't already been reported. (+1 or comment there if it has.)
          required: true

  - type: textarea
    id: questions
    attributes:
      label: Questions
      description: Describe your questions with relevant resources such as snippets, links, images, etc.
    validations:
      required: true


================================================
FILE: .github/actions/crab_install/action.yml
================================================
name: 'crab_install'
description: 'Setup python environment and install dependencies for Crab by poetry.'
inputs:
  python-version:
    description: 'Python version.'
    required: true
    default: '3.10'
runs:
  using: "composite"
  steps:
    - name: Set up Python
      uses: actions/setup-python@v3
      with:
        python-version: '${{ inputs.python-version }}'
    - name: Install poetry
      uses: abatilo/actions-poetry@v2
    - name: Setup poetry virtual environment
      run: |
        poetry config virtualenvs.create true --local
        poetry config virtualenvs.in-project true --local
      shell: bash
    - uses: actions/cache/restore@v3
      id: cache-restore
      name: Restore caches for the virtual environment based on poetry.lock
      with:
        path: ./.venv
        key: venv-${{ hashFiles('poetry.lock') }}
    - name: Install the project dependencies
      run: poetry install -E client -E server -E camel
      shell: bash
    - uses: actions/cache/save@v3
      name: Save caches based on poetry.lock 
      if: ${{ !steps.cache-restore.outputs.cache-hit }}
      with:
        path: ./.venv
        key: venv-${{ hashFiles('poetry.lock') }}


================================================
FILE: .github/workflows/documentation.yml
================================================
name: Build and deploy CRAB documents
on:
  push:
    branches: [ "main" ]
  workflow_dispatch:
permissions:
    contents: write
jobs:
  docs:
    runs-on: ubuntu-latest

    steps:
    - uses: actions/checkout@v3
    - name: Set up Python environment and install dependencies
      uses: ./.github/actions/crab_install
      with:
        python-version: "3.10"
    - name: Sphinx build
      run: |
        cd docs
        poetry run make html
    - name: Deploy
      uses: peaceiris/actions-gh-pages@v3
      if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main'}}
      with:
        publish_branch: gh-pages
        github_token: ${{ secrets.GITHUB_TOKEN }}
        publish_dir: docs/_build/html/
        force_orphan: true


================================================
FILE: .github/workflows/publish_release.yml
================================================
name: Publish CRAB to PyPI / GitHub

on:
  push:
    tags:
      - "v*"

  workflow_dispatch:

jobs:
  build-n-publish:
    name: Build and publish to PyPI
    runs-on: ubuntu-latest
    permissions:
      contents: write

    steps:
      - uses: actions/checkout@v3
      - name: Build and publish to pypi
        uses: JRubics/poetry-publish@v1.17
        with:
          pypi_token: ${{ secrets.PYPI_API_KEY }}
          ignore_dev_requirements: "yes"

      - name: Create GitHub Release
        id: create_release
        uses: actions/create-release@v1
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # This token is provided by Actions, you do not need to create your own token
        with:
          tag_name: ${{ github.ref }}
          release_name: ${{ github.ref }}
          draft: false
          prerelease: false

      - name: Get Asset name
        run: |
          export PKG=$(ls dist/ | grep tar)
          set -- $PKG
          echo "name=$1" >> $GITHUB_ENV
      - name: Upload Release Asset (sdist) to GitHub
        id: upload-release-asset
        uses: actions/upload-release-asset@v1
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        with:
          upload_url: ${{ steps.create_release.outputs.upload_url }}
          asset_path: dist/${{ env.name }}
          asset_name: ${{ env.name }}
          asset_content_type: application/zip


================================================
FILE: .github/workflows/pytest_package.yml
================================================
# This workflow will install Python dependencies, run tests
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python

name: Pytest CRAB package

on: push

jobs:
  pytest:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v3
    - name: Set up Python environment and install dependencies
      uses: ./.github/actions/crab_install
      with:
        python-version: "3.10"
    - name: Run pytest
      run: poetry run pytest test/


================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
.vagrant/*

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
# docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
#   For a library or package, you might want to ignore these files since the code is
#   intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
#   However, in case of collaboration, if having platform-specific dependencies or dependencies
#   having no cross-platform support, pipenv may install dependencies that don't work, or not
#   install all needed dependencies.
#Pipfile.lock

# poetry
#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
#   This is especially recommended for binary packages to ensure reproducibility, and is more
#   commonly ignored for libraries.
#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
#   in version control.
#   https://pdm.fming.dev/#use-with-ide
.pdm.toml

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
#  and can be added to the global gitignore or merged into this file.  For a more nuclear
#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/

.vscode/
.python-version

_build/

# model parameter
*.pth

logs/

.DS_Store

================================================
FILE: .pre-commit-config.yaml
================================================
repos:
  - repo: https://github.com/astral-sh/ruff-pre-commit
    # Ruff version.
    rev: v0.6.5
    hooks:
      # Run the linter.
      - id: ruff
      # Run the formatter.
      - id: ruff-format
  - repo: local
    hooks:
    - id: check-license
      name: Check License
      entry: python licenses/update_license.py . licenses/license_template.txt 
      language: system
      types: [python]


================================================
FILE: README.md
================================================
# 🦀 CRAB: Cross-platform Agent Benchmark for Multimodal Embodied Language Model Agents

[![arXiv][arxiv-image]][arxiv-url]
[![Slack][slack-image]][slack-url]
[![Discord][discord-image]][discord-url]
[![Wechat][wechat-image]][wechat-url]
[![Twitter][twitter-image]][twitter-url]

<p align="center">
  <a href="https://camel-ai.github.io/crab/">Documentation</a> |
  <a href="https://crab.camel-ai.org/">Website & Demos</a> |
  <a href="https://www.camel-ai.org/post/crab">Blog</a> |
  <a href="https://dandansamax.github.io/posts/crab-paper/">Chinese Blog</a> |
  <a href="https://www.camel-ai.org/">CAMEL-AI</a>
</p>

<p align="center">
  <img src='https://raw.githubusercontent.com/camel-ai/crab/main/assets/CRAB_logo1.png' width=800>
</p>

## Overview

CRAB is a framework for building LLM agent benchmark environments in a Python-centric way.

#### Key Features

🌐 Cross-platform and Multi-environment
* Create build agent environments that support various deployment options including in-memory, Docker-hosted, virtual machines, or distributed physical machines, provided they are accessible via Python functions.
* Let the agent access all the environments in the same time through a unified interface.

⚙ ️Easy-to-use Configuration
* Add a new action by simply adding a `@action` decorator on a Python function.
* Define the environment by integrating several actions together.

📐 Novel Benchmarking Suite
* Define tasks and the corresponding evaluators in an intuitive Python-native way.
* Introduce a novel graph evaluator method providing fine-grained metrics.

## Installation

#### Prerequisites

- Python 3.10 or newer

```bash
pip install crab-framework[client]
```

## Experiment on CRAB-Benchmark-v0

All datasets and experiment code are in [crab-benchmark-v0](./crab-benchmark-v0/) directory. Please carefully read the [benchmark tutorial](./crab-benchmark-v0/README.md) before using our benchmark.

## Examples

#### Run template environment with openai agent

```bash
export OPENAI_API_KEY=<your api key>
python examples/single_env.py
python examples/multi_env.py
```

## Demo Video

[![demo_video](https://i.ytimg.com/vi_webp/PNqrHNQlU6I/maxresdefault.webp)](https://www.youtube.com/watch?v=PNqrHNQlU6I&ab_channel=CamelAI)

## Cite
Please cite [our paper](https://arxiv.org/abs/2407.01511) if you use anything related in your work:
```
@misc{xu2024crab,
      title={CRAB: Cross-environment Agent Benchmark for Multimodal Language Model Agents}, 
      author={Tianqi Xu and Linyao Chen and Dai-Jie Wu and Yanjun Chen and Zecheng Zhang and Xiang Yao and Zhiqiang Xie and Yongchao Chen and Shilong Liu and Bochen Qian and Philip Torr and Bernard Ghanem and Guohao Li},
      year={2024},
      eprint={2407.01511},
      archivePrefix={arXiv},
      primaryClass={cs.AI},
      url={https://arxiv.org/abs/2407.01511}, 
}
```

## Community
Join us ([*Discord*](https://discord.camel-ai.org/) or [*WeChat*](https://ghli.org/camel/wechat.png)) in pushing the boundaries of finding the scaling laws of agents. 

- **WeChat Community:** Scan the QR code below to join our WeChat community.

  <div align="center">
    <img src="assets/wechatgroup.jpeg" alt="WeChat QR Code" width="50%">
  </div>


<br>

[slack-url]: https://join.slack.com/t/camel-kwr1314/shared_invite/zt-1vy8u9lbo-ZQmhIAyWSEfSwLCl2r2eKA
[slack-image]: https://img.shields.io/badge/Slack-CAMEL--AI-blueviolet?logo=slack
[discord-url]: https://discord.gg/CNcNpquyDc
[discord-image]: https://img.shields.io/badge/Discord-CAMEL--AI-7289da?logo=discord&logoColor=white&color=7289da
[wechat-url]: https://ghli.org/camel/wechat.png
[wechat-image]: https://img.shields.io/badge/WeChat-CamelAIOrg-brightgreen?logo=wechat&logoColor=white
[twitter-url]: https://twitter.com/CamelAIOrg
[twitter-image]: https://img.shields.io/twitter/follow/CamelAIOrg?style=social&color=brightgreen&logo=twitter
[arxiv-image]: https://img.shields.io/badge/arXiv-2407.01511-b31b1b.svg
[arxiv-url]: https://arxiv.org/abs/2407.01511


================================================
FILE: crab/__init__.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# ruff: noqa: F403
from .core import *

__version__ = "0.1.2"


================================================
FILE: crab/actions/android_actions.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import base64
import subprocess
from enum import Enum
from time import sleep

from crab import action

from .crab_actions import get_element_position


def execute_adb(adb_command: str, env=None):
    if env.device is None:
        adb_command = "adb " + adb_command
    else:
        adb_command = f"adb -s {env.device} " + adb_command
    result = subprocess.run(
        adb_command,
        shell=True,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        text=True,
    )
    if result.returncode == 0:
        return result.stdout.strip()
    print(f"Command execution failed: {adb_command}")
    print(result.stderr)
    return "ERROR"


def get_device_size(env):
    adb_command = "shell wm size"
    result = execute_adb(adb_command, env)
    if result != "ERROR":
        return map(int, result.split(": ")[1].split("x"))
    return 0, 0


_DURATION = 1.5


@action
def setup(env) -> None:
    env.width, env.height = get_device_size(env)


@action
def screenshot(env) -> str:
    """
    Get the current screenshot of phone screen.
    """
    if env.device is not None:
        command = f"adb -s {env.device} exec-out screencap -p"
    else:
        command = "adb exec-out screencap -p"
    result = subprocess.run(
        command,
        shell=True,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
    )
    return base64.b64encode(result.stdout).decode("utf-8")


@action
def tap(element: int, env) -> None:
    """
    Tap an UI element shown on the smartphone screen. A simple use case can be tap(5),
    which taps the UI element labeled with the number 5.

    Args:
        element: A numeric tag assigned to an UI element shown on the smartphone screen.
    """
    x, y = get_element_position(element, env)
    execute_adb(f"shell input tap {x} {y}", env)
    sleep(_DURATION)


@action
def long_tap(element: int, env) -> None:
    """
    Press and hold a UI element on the smartphone screen for 1 second, typically to
    access additional menu options. For example, the command long_tap(5) simulates a
    long press on the UI element labeled with the number 5.

    Args:
        element: A numeric tag assigned to an UI element shown on the smartphone screen.
    """
    x, y = get_element_position(element, env)
    adb_command = f"shell input swipe {x} {y} {x} {y} 1000"
    execute_adb(adb_command, env)
    sleep(_DURATION)


class SwipeDirection(str, Enum):
    RIGHT = "right"
    LEFT = "left"
    UP = "up"
    DOWN = "down"


class SwipeDist(str, Enum):
    SHORT = "short"
    MEDIUM = "medium"
    LONG = "long"


@action
def swipe(element: int, direction: SwipeDirection, dist: SwipeDist, env) -> None:
    """
    This function is used to swipe an UI element shown on the smartphone screen, usually
    a scroll view or a slide bar. You should choose the appropriate direction and
    distance option according to your need. A simple use case can be swipe(21, "up",
    "medium"), which swipes up the UI element labeled with the number 21 for a medium
    distance.

    Args:
        element: is a numeric tag assigned to an UI element shown on the smartphone
            screen.
        direction: is a string that represents the swipe direction
        dist: determines the distance of the swipe.
    """
    x, y = get_element_position(element, env)
    unit_dist = int(env.width / 10)
    if dist == "long":
        unit_dist *= 3
    elif dist == "medium":
        unit_dist *= 2
    if direction == "up":
        offset = 0, -2 * unit_dist
    elif direction == "down":
        offset = 0, 2 * unit_dist
    elif direction == "left":
        offset = -1 * unit_dist, 0
    elif direction == "right":
        offset = unit_dist, 0
    else:
        return "ERROR"
    adb_command = f"shell input swipe {x} {y} {x + offset[0]} {y + offset[1]} 200"
    execute_adb(adb_command, env)
    sleep(_DURATION)


@action
def open_app_drawer(env) -> None:
    """Open app drawer to list all the installed applications in this phone. For
    exmaple: you want to open "Messages" application, but you don't know where to find
    it, you can call "open_app_drawer()" and you will see all the installed applications
    through screenshot.
    """
    execute_adb("shell input keyevent KEYCODE_HOME", env)
    sleep(0.5)
    execute_adb("shell input swipe 800 2000 800 100 500", env)
    sleep(_DURATION)


class AndroidKey(str, Enum):
    HOME = "home"
    BACK = "back"


@action
def key_press(key: AndroidKey, env):
    """
    Press Android keys. press("home") to go back to main screen. press("back") to return
    to the preivous page.

    Args:
        key (str): The pressed key.
    """
    if key == AndroidKey.HOME:
        adb_command = "shell input keyevent KEYCODE_HOME"
    elif key == AndroidKey.BACK:
        adb_command = "shell input keyevent KEYCODE_BACK"
    else:
        raise ValueError("Unsupported key")
    execute_adb(adb_command, env)
    sleep(_DURATION)


@action
def write_text(text: str, env) -> None:
    """
    Typing the specified text.

    Args:
        text (str): The text to be typed.
    """
    text = text.replace(" ", "%s")
    text = text.replace("'", "")
    adb_command = f"shell input text {text}"
    execute_adb(adb_command, env)
    sleep(_DURATION)


@action
def stop_all_apps(env) -> None:
    """
    Stop all running apps.
    """
    execute_adb("shell input keyevent KEYCODE_HOME", env)
    execute_adb("shell input keyevent KEYCODE_APP_SWITCH", env)
    sleep(0.5)
    command = (
        f"shell input swipe 100 {env.height / 2} {env.width - 100} {env.height / 2} 200"
    )
    execute_adb(command, env)
    sleep(0.5)
    execute_adb("shell input tap 300 1400", env)
    sleep(_DURATION)


================================================
FILE: crab/actions/crab_actions.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from time import sleep

from crab import action, evaluator


@action(env_name="root")
def submit(content: str) -> None:
    """Submit your answer through this action. For exmaple, if you are required to
    submit a word "apple", you can use submit(content="apple").

    Args:
        content: the content to submit
    """
    pass


@evaluator(env_name="root")
def check_submit(text: str, env) -> bool:
    if env.trajectory:
        action_name, params, _ = env.trajectory[-1]
        if action_name == "submit" and text in params["content"]:
            return True
    return False


@action(env_name="root")
def complete() -> bool:
    """When you think the task is completed, use this action to notify the system. For
    exmaple, if you successfully complete the task, you can use complete().
    """
    pass


@action(env_name="root")
def wait() -> bool:
    """If the environment is still processing your action and you have nothing to do in
    this step, you can use wait().
    """
    sleep(5)


def get_element_position(element_id, env):
    """Get element position provided by function `zs_object_detection`"""
    box = env.element_position_map[element_id]
    x = (box[0] + box[2]) / 2
    y = (box[1] + box[3]) / 2
    return round(x), round(y)


================================================
FILE: crab/actions/desktop_actions.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import base64
import time
from enum import Enum

import pyautogui
from mss import mss, tools

from crab import action

from .crab_actions import get_element_position

DURATION = 0.8
DELAY = 1.0


@action
def click_position(x: int, y: int) -> None:
    """
    click on the current desktop screen.

    Args:
        x: The X coordinate, as a floating-point number in the range [0.0, 1.0].
        y: The Y coordinate, as a floating-point number in the range [0.0, 1.0].
    """
    pyautogui.click(x, y, duration=DURATION)
    time.sleep(DELAY)


@action(local=True)
def click(element: int, env) -> None:
    """
    Click an UI element shown on the desktop screen. A simple use case can be
    click(5), which clicks the UI element labeled with the number 5.

    Args:
        element: A numeric tag assigned to an UI element shown on the screenshot.
    """
    x, y = get_element_position(element, env)
    env._action_endpoint(click_position, {"x": x, "y": y})


@action
def right_click_position(x: int, y: int) -> None:
    """
    right-click on the current desktop screen.

    Args:
        x: The X coordinate, as a floating-point number in the range [0.0, 1.0].
        y: The Y coordinate, as a floating-point number in the range [0.0, 1.0].
    """
    pyautogui.click(x, y, duration=DURATION, button="right")


@action(local=True)
def right_click(element: int, env) -> None:
    """
    Right-click an UI element shown on the desktop screen using the mouse, which is
    usually used for opening the menu of the element. A simple use case can be
    right_click(5), which right-clicks the UI element labeled with the number 5 to open
    up menu on it.

    Args:
        element: A numeric tag assigned to an UI element shown on the screenshot.
    """
    x, y = get_element_position(element, env)
    env._action_endpoint(right_click_position, {"x": x, "y": y})
    time.sleep(DELAY)


@action
def double_click_position(x: int, y: int) -> None:
    """
    Double-click on the current desktop screen.

    Args:
        x: The X coordinate, as a floating-point number in the range [0.0, 1.0].
        y: The Y coordinate, as a floating-point number in the range [0.0, 1.0].
    """
    pyautogui.click(x, y, duration=DURATION, clicks=2, interval=0.2)


@action(local=True)
def double_click(element: int, env) -> None:
    """
    Double-click an UI element shown on the desktop screen using the mouse, which is
    usually used for opening a folder or a file. A simple use case can be
    double_click(5), which double-clicks the UI element labeled with the number 5 to
    open it.

    Args:
        element: A numeric tag assigned to an UI element shown on the screenshot.
    """
    x, y = get_element_position(element, env)
    env._action_endpoint(double_click_position, {"x": x, "y": y})
    time.sleep(DELAY)


@action
def mouse_scroll(click: int = 1) -> None:
    """
    Performs a scroll of the mouse scroll wheel.

    Args:
        click(int): The amount of scrolling. Default to 1.
    """
    pyautogui.scroll(click)
    time.sleep(DELAY)


class KeyEnum(str, Enum):
    KEY_TAB = "\t"
    KEY_LB = "\n"
    KEY_RR = "\r"
    KEY_SPACE = " "
    KEY_EXCLAMATION = "!"
    KEY_DQUOTE = '"'
    KEY_SHARP = "#"
    KEY_DOLLAR = "$"
    KEY_PER = "%"
    KEY_AND = "&"
    KEY_SQUOTE = "'"
    KEY_LPAR = "("
    KEY_RPAR = ")"
    KEY_MUL = "*"
    KEY_ADD = "+"
    KEY_COMMA = ","
    KEY_MIN = "-"
    KEY_DOT = "."
    KEY_SLASH = "/"
    KEY_0 = "0"
    KEY_1 = "1"
    KEY_2 = "2"
    KEY_3 = "3"
    KEY_4 = "4"
    KEY_5 = "5"
    KEY_6 = "6"
    KEY_7 = "7"
    KEY_8 = "8"
    KEY_9 = "9"
    KEY_COL = ":"
    KEY_SEMICOL = ";"
    KET_LT = "<"
    KEY_EQUAL = "="
    KEY_GT = ">"
    KEY_QM = "?"
    KEY_AT = "@"
    KEY_LBRA = "["
    KEY_RSLASH = "\\"
    KEY_RBRA = "]"
    KEY_CARET = "^"
    KEY_UNDERLINE = "_"
    KEY_BACKTICK = "`"
    KEY_LBRACE = "{"
    KEY_RBRACE = "}"
    KEY_PIPE = "|"
    KEY_TLIDE = "~"
    KEY_A = "a"
    KEY_B = "b"
    KEY_C = "c"
    KEY_D = "d"
    KEY_E = "e"
    KEY_F = "f"
    KEY_G = "g"
    KEY_H = "h"
    KEY_I = "i"
    KEY_J = "j"
    KEY_K = "k"
    KEY_L = "l"
    KEY_M = "m"
    KEY_N = "n"
    KEY_O = "o"
    KEY_P = "p"
    KEY_Q = "q"
    KEY_R = "r"
    KEY_S = "s"
    KEY_T = "t"
    KEY_U = "u"
    KEY_V = "v"
    KEY_W = "w"
    KEY_X = "x"
    KEY_Y = "y"
    KEY_Z = "z"
    KEY_ALT = "alt"
    KEY_SHIFT = "shift"
    KEY_CTRL = "ctrl"
    KEY_WIN = "win"
    KEY_BACKSPACE = "backspace"
    KEY_ENTER = "enter"
    KEY_ESC = "esc"
    KEY_F1 = "f1"
    KEY_F2 = "f2"
    KEY_F3 = "f3"
    KEY_F4 = "f4"
    KEY_F5 = "f5"
    KEY_F6 = "f6"
    KEY_F7 = "f7"
    KEY_F8 = "f8"
    KEY_F9 = "f9"
    KEY_F10 = "f10"
    KEY_F11 = "f11"
    KEY_F12 = "f12"
    KEY_LEFT = "left"
    KEY_UP = "up"
    KEY_RIGHT = "right"
    KEY_DOWN = "down"


@action
def key_press(key: KeyEnum) -> None:
    """
    Performs a keyboard key press down, followed by a release.

    Args:
        key (str): The key to be pressed.
    """
    if isinstance(key, KeyEnum):
        pyautogui.press(key.value)
    else:
        pyautogui.press(key)
    time.sleep(DELAY)


@action
def press_hotkey(keys: list[KeyEnum]) -> None:
    """
    Press multiple keyboard keys at the same time. For exmaple, if you want to use
    Ctrl-C hoykey to copy the selected text, you can call
    press_hotkey(keys=["ctrl", "c"]).

    Args:
        key (str): The key to be pressed.
    """
    if isinstance(keys[0], KeyEnum):
        keys = [key.value for key in keys]
    pyautogui.hotkey(*keys)
    time.sleep(DELAY)


@action
def write_text(text: str) -> None:
    """
    Typing the specified text. Note: This function does not move the mouse cursor.
    Ensure the cursor focuses in the correct text input field before calling this
    function.

    Args:
        text (str): The text to be typed.
    """
    pyautogui.write(text, interval=0.03)
    time.sleep(DELAY)


@action
def search_application(name: str) -> None:
    """
    Search an application name. For exmaple, if you want to open an application named
    "slack", you can call search_application(name="slack"). You MUST use this action to
    search for applications.

    Args:
        name: the application name.
    """
    pyautogui.press("esc")
    time.sleep(DELAY)
    pyautogui.hotkey("win", "a")
    time.sleep(DELAY)
    pyautogui.write(name)
    time.sleep(DELAY)


@action
def screenshot() -> str:
    "Get the current screenshot."
    with mss() as sct:
        # Get raw pixels from the screen
        sct_img = sct.grab(sct.monitors[1])
        # Create the Image
        png = tools.to_png(sct_img.rgb, sct_img.size)
        base64_img = base64.b64encode(png).decode("utf-8")
    return base64_img


================================================
FILE: crab/actions/file_actions.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import base64
from io import BytesIO

from PIL import Image

from crab.core import action


@action
def save_base64_image(image: str, path: str = "image.png") -> None:
    image = Image.open(BytesIO(base64.b64decode(image)))
    image.save(path)


================================================
FILE: crab/actions/system_actions.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import subprocess
from time import sleep

from crab.core.decorators import action


@action
def delay(time: float) -> None:
    sleep(time)


@action
def run_bash_command(command: str) -> str:
    """
    Run a command using bash shell. You can use this command to open any application by
    their name.

    Args:
        command: The commmand to be run.

    Return:
        stdout and stderr
    """
    p = subprocess.run(["bash", command], capture_output=True)
    return f'stdout: "{p.stdout}"\nstderr: "{p.stderr}"'


================================================
FILE: crab/actions/visual_prompt_actions.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import logging
from functools import cache
from typing import Literal

from PIL import Image, ImageDraw, ImageFont

from crab import action
from crab.utils.common import base64_to_image, image_to_base64

logger = logging.getLogger(__name__)

try:
    import easyocr
    import numpy as np
    import torch
    from transformers import (
        AutoProcessor,
        GroundingDinoForObjectDetection,
        GroundingDinoProcessor,
    )

    device = "cuda" if torch.cuda.is_available() else "cpu"

    TRANSFORMERS_ENABLE = True
except ImportError:
    TRANSFORMERS_ENABLE = False

BoxType = tuple[int, int, int, int]
AnnotatedBoxType = tuple[BoxType, str | None]


def check_transformers_import() -> None:
    if not TRANSFORMERS_ENABLE:
        raise ImportError(
            "Please install the required dependencies to use this function by running"
            " `pip install crab-framework[client]`"
        )


def _calculate_iou(box1: BoxType, box2: BoxType) -> float:
    xA = max(box1[0], box2[0])
    yA = max(box1[1], box2[1])
    xB = min(box1[2], box2[2])
    yB = min(box1[3], box2[3])

    interArea = max(0, xB - xA) * max(0, yB - yA)
    box1Area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2Area = (box2[2] - box2[0]) * (box2[3] - box2[1])
    unionArea = box1Area + box2Area - interArea
    iou = interArea / unionArea

    return iou


def _calculate_center(box: BoxType) -> tuple[int, int]:
    return (box[0] + box[2]) / 2, (box[1] + box[3]) / 2


def _remove_invalid_boxes(
    boxes_with_label: AnnotatedBoxType, width: int, height: int
) -> AnnotatedBoxType:
    boxes = [box[0] for box in boxes_with_label]
    boxes_to_remove = set()
    for idx, box in enumerate(boxes):
        if box[0] < 0 or box[1] < 0 or box[2] > width or box[3] > height:
            boxes_to_remove.add(idx)
            continue
        if box[0] >= box[2] or box[1] >= box[3]:
            boxes_to_remove.add(idx)
            continue

    boxes_filt = [
        box for idx, box in enumerate(boxes_with_label) if idx not in boxes_to_remove
    ]
    return boxes_filt


def _filter_boxes_by_center(
    boxes_with_label: list[AnnotatedBoxType], center_dis_thresh: float
) -> list[AnnotatedBoxType]:
    boxes = [box[0] for box in boxes_with_label]
    boxes_to_remove = set()
    for i in range(len(boxes)):
        if i in boxes_to_remove:
            continue
        center_i = _calculate_center(boxes[i])
        for j in range(i + 1, len(boxes)):
            center_j = _calculate_center(boxes[j])
            # fmt: off
            center_close = ((center_i[0] - center_j[0]) ** 2 + 
                            (center_i[1] - center_j[1]) ** 2 < 
                            center_dis_thresh**2)
            # fmt: on
            if center_close:
                boxes_to_remove.add(j)

    boxes_filt = [
        box for idx, box in enumerate(boxes_with_label) if idx not in boxes_to_remove
    ]
    return boxes_filt


def _box_a_in_b(a: BoxType, b: BoxType) -> bool:
    return a[0] >= b[0] and a[1] >= b[1] and a[2] <= b[2] and a[3] <= b[3]


def _filter_boxes_by_overlap(
    boxes_with_label: list[AnnotatedBoxType],
) -> list[AnnotatedBoxType]:
    boxes = [box[0] for box in boxes_with_label]
    boxes_to_remove = set()
    for i in range(len(boxes)):
        if i in boxes_to_remove:
            continue
        for j in range(len(boxes)):
            if i != j and _box_a_in_b(boxes[i], boxes[j]):
                boxes_to_remove.add(j)

    boxes_filt = [
        box for idx, box in enumerate(boxes_with_label) if idx not in boxes_to_remove
    ]
    return boxes_filt


def _filter_boxes_by_iou(
    boxes_with_label: list[AnnotatedBoxType], iou_threshold=0.5
) -> list[AnnotatedBoxType]:
    boxes = [box[0] for box in boxes_with_label]
    boxes_to_remove = set()
    for i in range(len(boxes)):
        if i in boxes_to_remove:
            continue
        for j in range(i + 1, len(boxes)):
            iou = _calculate_iou(boxes[i], boxes[j])
            if iou >= iou_threshold:
                boxes_to_remove.add(j)

    boxes_filt = [
        box for idx, box in enumerate(boxes_with_label) if idx not in boxes_to_remove
    ]
    return boxes_filt


def _draw_boxes(
    image: Image.Image,
    boxes: list[BoxType],
    font_size: int = 30,
) -> None:
    draw = ImageDraw.Draw(image)
    for idx, box in enumerate(boxes):
        color = tuple(np.random.randint(64, 191, size=3).tolist())
        font = ImageFont.load_default(font_size)
        center = _calculate_center(box)

        draw.rectangle([box[0], box[1], box[2], box[3]], outline=color, width=2)

        if hasattr(font, "getbbox"):
            _, _, w, h = draw.textbbox((0, 0), str(idx), font)
        else:
            w, h = draw.textsize(str(idx), font)
        if box[0] >= w:
            bbox = (
                round(box[0] - w),
                round(center[1] - h / 2),
                round(box[0]),
                round(center[1] + h / 2),
            )
        else:
            bbox = (
                round(box[2]),
                round(center[1] - h / 2),
                round(box[2] + w),
                round(center[1] + h / 2),
            )

        draw.rectangle(bbox, fill=color)
        draw.text((bbox[0], bbox[1]), str(idx), fill="white", font=font)


@cache
def _get_grounding_dino_model(
    type: Literal["tiny", "base"] = "tiny",
) -> tuple[GroundingDinoProcessor, GroundingDinoForObjectDetection]:
    """Get the grounding dino model.

    Args:
        type: The version of the Gounding Dino Model.

    Returns:
        A tuple (processor, model).
    """
    model_name = f"IDEA-Research/grounding-dino-{type}"
    processor = AutoProcessor.from_pretrained(model_name)
    model = GroundingDinoForObjectDetection.from_pretrained(model_name).to(device)
    return processor, model


@cache
def _get_easyocr_model() -> easyocr.Reader:
    return easyocr.Reader(["en"])


def get_groundingdino_boxes(
    images: Image.Image | list[Image.Image],
    text_prompt: str,
    box_threshold: float = 0.05,
    text_threshold: float = 0.5,
) -> list[list[AnnotatedBoxType]]:
    """Get the bounding boxes of the objects in the image using GroundingDino.

    Args:
        images: The image or list of images.
        text_prompt: The text prompt to use for all the images.
        box_threshold: The box threshold.
        text_threshold: The text threshold.

    Returns:
        The first level list is for each image, and the second level list contains
        tuples (detected boxes, its sementical representation) as the result of the
        image.
    """
    processor, model = _get_grounding_dino_model()
    if isinstance(images, Image.Image):
        images = [images]
    image_number = len(images)
    images = [image.convert("RGB") for image in images]
    inputs = processor(
        images=images,
        text=[text_prompt] * image_number,
        return_tensors="pt",
    ).to(device)
    with torch.no_grad():
        outputs = model(**inputs)

    target_sizes = [image.size[::-1] for image in images]
    detection_results = processor.post_process_grounded_object_detection(
        outputs,
        inputs.input_ids,
        box_threshold=box_threshold,
        text_threshold=text_threshold,
        target_sizes=target_sizes,
    )
    final_output = []
    for result in detection_results:
        boxes = result["boxes"].cpu().int().tolist()
        labels = result["labels"]
        final_output.append(list(zip(boxes, labels)))
    return final_output


def get_easyocr_boxes(
    image: Image.Image,
) -> list[AnnotatedBoxType]:
    """Get the bounding boxes of the text in the image using EasyOCR.

    Args:
        image: The taget image.

    Returns:
        The list of tuple of bounding boxes and their corresponding text.
    """
    reader = _get_easyocr_model()
    result = reader.readtext(np.array(image), text_threshold=0.9)
    boxes = []
    for detect in result:
        boxes.append(
            (
                (
                    detect[0][0][0],
                    detect[0][0][1],
                    detect[0][2][0],
                    detect[0][2][1],
                ),
                detect[1],
            )
        )
    return boxes


@action(local=True)
def groundingdino_easyocr(
    input_base64_image: str,
    font_size: int,
    env,
) -> tuple[str, list[AnnotatedBoxType]]:
    """Get the interative elements in the image.

    Using GroundingDino and EasyOCR to detect the interactive elements in the image.
    Mark the detected elements with bounding boxes and labels. Store the labels and
    boxes in the environment to be used in other actions.

    Args:
        input_base64_image: The base64 encoded image.
        font_size: The font size of the label.

    Returns:
        A tuple (base64_image, boxes), where base64_image is the base64 encoded image
        drawn with bounding boxes and labels, and box is the list of detected boxes and
        labels.
    """
    check_transformers_import()
    image = base64_to_image(input_base64_image)
    od_boxes = get_groundingdino_boxes(image, "icon . logo .", box_threshold=0.02)[0]
    od_boxes = _filter_boxes_by_iou(od_boxes, iou_threshold=0.5)
    ocr_boxes = get_easyocr_boxes(image)
    boxes_with_label = ocr_boxes + od_boxes
    filtered_boxes = _remove_invalid_boxes(boxes_with_label, image.width, image.height)
    filtered_boxes = _filter_boxes_by_overlap(filtered_boxes)
    center_dis = round(max(image.height, image.width) / 80.0)
    filtered_boxes = _filter_boxes_by_center(filtered_boxes, center_dis)
    env.element_label_map = [box[1] for box in filtered_boxes]
    result_boxes = [box[0] for box in filtered_boxes]
    _draw_boxes(image, result_boxes, font_size)
    env.element_position_map = result_boxes
    env.ocr_results = "".join([box[1] for box in ocr_boxes])
    return image_to_base64(image), filtered_boxes


@action(local=True)
def get_elements_prompt(
    input: tuple[str, list[AnnotatedBoxType]], env
) -> tuple[str, str]:
    """Get the text prompt passing to the agent for the image.

    Args:
        input: The base64 encoded image and the list of detected boxes and labels.

    Returns:
        A tuple (image, prompt) contains the base64 encoded image and the prompt.
    """
    image, boxes = input
    labels = ""
    for id, box in enumerate(boxes):
        if box[1] is not None:
            labels += f"[{id}|{box[1]}]\n"
    prompt = (
        "Some elements in the current screenshot have labels. I will give you "
        "these labels by [id|label].\n" + labels
    )
    return image, prompt


================================================
FILE: crab/agents/backend_models/__init__.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# ruff: noqa: F401
from typing import Any, Literal

from pydantic import BaseModel

from crab.core.backend_model import BackendModel

from .camel_model import CamelModel
from .claude_model import ClaudeModel
from .gemini_model import GeminiModel
from .openai_model import OpenAIModel, OpenAIModelJSON, SGlangOpenAIModelJSON


class BackendModelConfig(BaseModel):
    model_class: Literal["openai", "claude", "gemini", "camel", "sglang"]
    """Specify the model class to be used. Different model classese use different
    APIs.
    """

    model_name: str
    """Specify the model name to be used. This value is directly passed to the API, 
    check model provider API documentation for more details.
    """

    model_platform: str | None = None
    """Required for CamelModel. Otherwise, it is ignored. Please check CAMEL
    documentation for more details.
    """

    history_messages_len: int = 0
    """Number of rounds of previous messages to be used in the model input. 0 means no
    history.
    """

    parameters: dict[str, Any] = {}
    """Additional parameters to be passed to the model."""

    json_structre_output: bool = False
    """If True, the model generate action through JSON without using "tool call" or
    "function call". SGLang model only supports JSON output. OpenAI model supports both.
    Other models do not support JSON output.
    """

    tool_call_required: bool = True
    """Specify if the model enforce each round to generate tool/function calls."""

    base_url: str | None = None
    """Specify the base URL of the API. Only used in OpenAI and SGLang currently."""

    api_key: str | None = None
    """Specify the API key to be used. Only used in OpenAI and SGLang currently."""


def create_backend_model(model_config: BackendModelConfig) -> BackendModel:
    match model_config.model_class:
        case "claude":
            if model_config.base_url is not None or model_config.api_key is not None:
                raise Warning(
                    "base_url and api_key are not supported for ClaudeModel currently."
                )
            if model_config.json_structre_output:
                raise Warning(
                    "json_structre_output is not supported for ClaudeModel currently."
                )
            return ClaudeModel(
                model=model_config.model_name,
                parameters=model_config.parameters,
                history_messages_len=model_config.history_messages_len,
                tool_call_required=model_config.tool_call_required,
            )
        case "gemini":
            if model_config.base_url is not None or model_config.api_key is not None:
                raise Warning(
                    "base_url and api_key are not supported for GeminiModel currently."
                )
            if model_config.json_structre_output:
                raise Warning(
                    "json_structre_output is not supported for GeminiModel currently."
                )
            return GeminiModel(
                model=model_config.model_name,
                parameters=model_config.parameters,
                history_messages_len=model_config.history_messages_len,
                tool_call_required=model_config.tool_call_required,
            )
        case "openai":
            if not model_config.json_structre_output:
                return OpenAIModel(
                    model=model_config.model_name,
                    parameters=model_config.parameters,
                    history_messages_len=model_config.history_messages_len,
                    base_url=model_config.base_url,
                    api_key=model_config.api_key,
                    tool_call_required=model_config.tool_call_required,
                )
            else:
                return OpenAIModelJSON(
                    model=model_config.model_name,
                    parameters=model_config.parameters,
                    history_messages_len=model_config.history_messages_len,
                    base_url=model_config.base_url,
                    api_key=model_config.api_key,
                )
        case "sglang":
            return SGlangOpenAIModelJSON(
                model=model_config.model_name,
                parameters=model_config.parameters,
                history_messages_len=model_config.history_messages_len,
                base_url=model_config.base_url,
                api_key=model_config.api_key,
            )
        case "camel":
            return CamelModel(
                model=model_config.model_name,
                model_platform=model_config.model_platform,
                parameters=model_config.parameters,
                history_messages_len=model_config.history_messages_len,
                tool_call_required=model_config.tool_call_required,
            )
        case _:
            raise ValueError(f"Unsupported model name: {model_config.model_name}")


================================================
FILE: crab/agents/backend_models/camel_model.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import json
from typing import Any

from openai.types.chat import ChatCompletionMessageToolCall
from PIL import Image

from crab import Action, ActionOutput, BackendModel, BackendOutput, MessageType
from crab.utils.common import base64_to_image

try:
    from camel.agents import ChatAgent
    from camel.messages import BaseMessage
    from camel.models import ModelFactory
    from camel.toolkits import OpenAIFunction
    from camel.types.enums import ModelPlatformType, ModelType

    CAMEL_ENABLED = True
except ImportError:
    CAMEL_ENABLED = False


def _get_model_platform_type(model_platform_name: str) -> "ModelPlatformType":
    try:
        return ModelPlatformType(model_platform_name)
    except ValueError:
        all_models = [platform.value for platform in ModelPlatformType]
        raise ValueError(
            f"Model {model_platform_name} not found. Supported models are {all_models}"
        )


def _get_model_type(model_name: str) -> "str | ModelType":
    try:
        return ModelType(model_name)
    except ValueError:
        return model_name


def _convert_action_to_schema(
    action_space: list[Action] | None,
) -> "list[OpenAIFunction] | None":
    if action_space is None:
        return None
    schema_list = []
    for action in action_space:
        new_action = action.to_openai_json_schema()
        schema = {"type": "function", "function": new_action}
        schema_list.append(OpenAIFunction(action.entry, schema))
    return schema_list


def _convert_tool_calls_to_action_list(
    tool_calls: list[ChatCompletionMessageToolCall] | None,
) -> list[ActionOutput] | None:
    if tool_calls is None:
        return None

    return [
        ActionOutput(
            name=call.function.name,
            arguments=json.loads(call.function.arguments),
        )
        for call in tool_calls
    ]


class CamelModel(BackendModel):
    def __init__(
        self,
        model: str,
        model_platform: str,
        parameters: dict[str, Any] | None = None,
        history_messages_len: int = 0,
        tool_call_required: bool = True,
    ) -> None:
        if not CAMEL_ENABLED:
            raise ImportError("Please install camel-ai to use CamelModel")
        self.model = model
        self.parameters = parameters if parameters is not None else {}
        self.history_messages_len = history_messages_len

        self.model_type = _get_model_type(model)
        self.model_platform_type = _get_model_platform_type(model_platform)
        self.client: ChatAgent | None = None
        self.token_usage = 0
        self.tool_call_required = tool_call_required
        self.history_messages_len = history_messages_len

    def get_token_usage(self) -> int:
        return self.token_usage

    def reset(self, system_message: str, action_space: list[Action] | None) -> None:
        action_schema = _convert_action_to_schema(action_space)
        config = self.parameters.copy()
        if action_schema is not None:
            config["tool_choice"] = "required" if self.tool_call_required else "auto"
            config["tools"] = [
                schema.get_openai_tool_schema() for schema in action_schema
            ]

        backend_model = ModelFactory.create(
            self.model_platform_type,
            self.model_type,
            model_config_dict=config,
        )
        sysmsg = BaseMessage.make_assistant_message(
            role_name="Assistant",
            content=system_message,
        )
        self.client = ChatAgent(
            model=backend_model,
            system_message=sysmsg,
            external_tools=action_schema,
            message_window_size=self.history_messages_len,
        )
        self.token_usage = 0

    def chat(self, messages: list[tuple[str, MessageType]]) -> BackendOutput:
        # TODO: handle multiple text messages after message refactoring
        image_list: list[Image.Image] = []
        content = ""
        for message in messages:
            if message[1] == MessageType.IMAGE_JPG_BASE64:
                image = base64_to_image(message[0])
                image_list.append(image)
            else:
                content = message[0]
        usermsg = BaseMessage.make_user_message(
            role_name="User",
            content=content,
            image_list=image_list,
        )
        response = self.client.step(usermsg)
        self.token_usage += response.info["usage"]["total_tokens"]
        tool_call_request = response.info.get("external_tool_request")

        return BackendOutput(
            message=response.msg.content,
            action_list=_convert_tool_calls_to_action_list([tool_call_request]),
        )


================================================
FILE: crab/agents/backend_models/claude_model.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from copy import deepcopy
from typing import Any

from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed

from crab import Action, ActionOutput, BackendModel, BackendOutput, Message, MessageType

try:
    import anthropic
    from anthropic.types import TextBlock, ToolUseBlock

    anthropic_model_enable = True
except ImportError:
    anthropic_model_enable = False


class ClaudeModel(BackendModel):
    def __init__(
        self,
        model: str,
        parameters: dict[str, Any] | None = None,
        history_messages_len: int = 0,
        tool_call_required: bool = True,
    ) -> None:
        if anthropic_model_enable is False:
            raise ImportError("Please install anthropic to use ClaudeModel")
        self.model = model
        self.parameters = parameters if parameters is not None else {}
        self.history_messages_len = history_messages_len

        assert self.history_messages_len >= 0

        self.client = anthropic.Anthropic()
        self.tool_call_required: bool = tool_call_required
        self.system_message: str = "You are a helpful assistant."
        self.action_space: list[Action] | None = None
        self.action_schema: list[dict] | None = None
        self.token_usage: int = 0
        self.chat_history: list[list[dict]] = []
        self.support_tool_call = True

    def reset(self, system_message: str, action_space: list[Action] | None) -> None:
        self.system_message = system_message
        self.action_space = action_space
        self.action_schema = _convert_action_to_schema(self.action_space)
        self.token_usage = 0
        self.chat_history = []

    def chat(self, message: list[Message] | Message) -> BackendOutput:
        if isinstance(message, tuple):
            message = [message]
        request = self._fetch_from_memory()
        new_message = self._construct_new_message(message)
        request.append(new_message)
        response_message = self._call_api(request)
        self._record_message(new_message, response_message)
        return self._generate_backend_output(response_message)

    def _construct_new_message(self, message: list[Message]) -> dict[str, Any]:
        parts: list[dict] = []
        for content, msg_type in message:
            match msg_type:
                case MessageType.TEXT:
                    parts.append(
                        {
                            "type": "text",
                            "text": content,
                        }
                    )
                case MessageType.IMAGE_JPG_BASE64:
                    parts.append(
                        {
                            "type": "image",
                            "source": {
                                "data": content,
                                "type": "base64",
                                "media_type": "image/png",
                            },
                        }
                    )
        return {
            "role": "user",
            "content": parts,
        }

    def _fetch_from_memory(self) -> list[dict]:
        request: list[dict] = []
        if self.history_messages_len > 0:
            fetch_history_len = min(self.history_messages_len, len(self.chat_history))
            for history_message in self.chat_history[-fetch_history_len:]:
                request = request + history_message
        return request

    def get_token_usage(self):
        return self.token_usage

    def _record_message(
        self, new_message: dict, response_message: anthropic.types.Message
    ) -> None:
        self.chat_history.append([new_message])
        self.chat_history[-1].append(
            {"role": response_message.role, "content": response_message.content}
        )

        if self.action_schema:
            tool_calls = response_message.content
            tool_content = []
            for call in tool_calls:
                if isinstance(call, ToolUseBlock):
                    tool_content.append(
                        {
                            "type": "tool_result",
                            "tool_use_id": call.id,
                            "content": "success",
                        }
                    )
            self.chat_history[-1].append(
                {
                    "role": "user",
                    "content": tool_content,
                }
            )

    @retry(
        wait=wait_fixed(10),
        stop=stop_after_attempt(7),
        retry=retry_if_exception_type(
            (
                anthropic.APITimeoutError,
                anthropic.APIConnectionError,
                anthropic.InternalServerError,
            )
        ),
    )
    def _call_api(self, request_messages: list[dict]) -> anthropic.types.Message:
        request_messages = _merge_request(request_messages)
        if self.action_schema is not None:
            response = self.client.messages.create(
                system=self.system_message,  # <-- system prompt
                messages=request_messages,  # type: ignore
                model=self.model,
                max_tokens=4096,
                tools=self.action_schema,
                tool_choice={"type": "any" if self.tool_call_required else "auto"},
                **self.parameters,
            )
        else:
            response = self.client.messages.create(
                system=self.system_message,  # <-- system prompt
                messages=request_messages,  # type: ignore
                model=self.model,
                max_tokens=4096,
                **self.parameters,
            )

        self.token_usage += response.usage.input_tokens + response.usage.output_tokens
        return response

    def _generate_backend_output(
        self, response_message: anthropic.types.Message
    ) -> BackendOutput:
        message = ""
        action_list = []
        for block in response_message.content:
            if isinstance(block, TextBlock):
                message += block.text
            elif isinstance(block, ToolUseBlock):
                action_list.append(
                    ActionOutput(
                        name=block.name,
                        arguments=block.input,  # type: ignore
                    )
                )
        if not action_list:
            return BackendOutput(message=message, action_list=None)
        else:
            return BackendOutput(
                message=message,
                action_list=action_list,
            )


def _merge_request(request: list[dict]) -> list[dict]:
    merge_request = [deepcopy(request[0])]
    for idx in range(1, len(request)):
        if request[idx]["role"] == merge_request[-1]["role"]:
            merge_request[-1]["content"].extend(request[idx]["content"])
        else:
            merge_request.append(deepcopy(request[idx]))

    return merge_request


def _convert_action_to_schema(action_space):
    if action_space is None:
        return None
    actions = []
    for action in action_space:
        new_action = action.to_openai_json_schema()
        new_action["input_schema"] = new_action.pop("parameters")
        if "returns" in new_action:
            new_action.pop("returns")
        if "title" in new_action:
            new_action.pop("title")
        if "type" in new_action:
            new_action["input_schema"]["type"] = new_action.pop("type")
        if "required" in new_action:
            new_action["input_schema"]["required"] = new_action.pop("required")

        actions.append(new_action)
    return actions


================================================
FILE: crab/agents/backend_models/gemini_model.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import os
from typing import Any

from PIL.Image import Image
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed

from crab import Action, ActionOutput, BackendModel, BackendOutput, Message, MessageType
from crab.utils.common import base64_to_image, json_expand_refs

try:
    import google.generativeai as genai
    from google.ai.generativelanguage_v1beta import (
        Content,
        FunctionDeclaration,
        Part,
        Tool,
    )
    from google.api_core.exceptions import ResourceExhausted
    from google.generativeai.types import content_types

    gemini_model_enable = True
except ImportError:
    gemini_model_enable = False


class GeminiModel(BackendModel):
    def __init__(
        self,
        model: str,
        parameters: dict[str, Any] | None = None,
        history_messages_len: int = 0,
        tool_call_required: bool = True,
    ) -> None:
        if gemini_model_enable is False:
            raise ImportError("Please install google.generativeai to use GeminiModel")

        self.model = model
        self.parameters = parameters if parameters is not None else {}
        self.history_messages_len = history_messages_len
        assert self.history_messages_len >= 0
        genai.configure(api_key=os.environ["GEMINI_API_KEY"])
        self.client = genai
        self.tool_call_required = tool_call_required
        self.system_message: str = "You are a helpful assistant."
        self.action_space: list[Action] | None = None
        self.action_schema: list[Tool] | None = None
        self.token_usage: int = 0
        self.chat_history: list[list[dict]] = []
        self.support_tool_call = True

    def reset(self, system_message: str, action_space: list[Action] | None) -> None:
        self.system_message = system_message
        self.action_space = action_space
        self.action_schema = _convert_action_to_schema(self.action_space)
        self.token_usage = 0
        self.chat_history = []

    def chat(self, message: list[Message] | Message) -> BackendOutput:
        if isinstance(message, tuple):
            message = [message]
        request = self._fetch_from_memory()
        new_message = self._construct_new_message(message)
        request.append(new_message)
        response_message = self._call_api(request)
        self._record_message(new_message, response_message)
        return self._generate_backend_output(response_message)

    def _construct_new_message(self, message: list[Message]) -> dict[str, Any]:
        parts: list[str | Image] = []
        for content, msg_type in message:
            match msg_type:
                case MessageType.TEXT:
                    parts.append(content)
                case MessageType.IMAGE_JPG_BASE64:
                    parts.append(base64_to_image(content))
        return {
            "role": "user",
            "parts": parts,
        }

    def _generate_backend_output(self, response_message: Content) -> BackendOutput:
        tool_calls: list[ActionOutput] = []
        for part in response_message.parts:
            if "function_call" in Part.to_dict(part):
                call = Part.to_dict(part)["function_call"]
                tool_calls.append(
                    ActionOutput(
                        name=call["name"],
                        arguments=call["args"],
                    )
                )

        return BackendOutput(
            message=response_message.parts[0].text or None,
            action_list=tool_calls or None,
        )

    def _fetch_from_memory(self) -> list[dict]:
        request: list[dict] = []
        if self.history_messages_len > 0:
            fetch_history_len = min(self.history_messages_len, len(self.chat_history))
            for history_message in self.chat_history[-fetch_history_len:]:
                request = request + history_message
        return request

    def get_token_usage(self):
        return self.token_usage

    def _record_message(
        self, new_message: dict[str, Any], response_message: Content
    ) -> None:
        self.chat_history.append([new_message])
        self.chat_history[-1].append(
            {"role": response_message.role, "parts": response_message.parts}
        )

    @retry(
        wait=wait_fixed(10),
        stop=stop_after_attempt(7),
        retry=retry_if_exception_type(ResourceExhausted),
    )
    def _call_api(self, request_messages: list) -> Content:
        if self.action_schema is not None:
            tool_config = content_types.to_tool_config(
                {
                    "function_calling_config": {
                        "mode": "ANY" if self.tool_call_required else "AUTO"
                    }
                }
            )
            response = self.client.GenerativeModel(
                self.model, system_instruction=self.system_message
            ).generate_content(
                contents=request_messages,
                tools=self.action_schema,
                tool_config=tool_config,
                # **self.parameters, # TODO(Tianqi): Fix this line in the future
            )
        else:
            response = self.client.GenerativeModel(
                self.model, system_instruction=self.system_message
            ).generate_content(
                contents=request_messages,
                # **self.parameters, # TODO(Tianqi): Fix this line in the future
            )

        self.token_usage += response.candidates[0].token_count
        return response.candidates[0].content


def _convert_action_to_schema(action_space: list[Action] | None) -> list[Tool] | None:
    if action_space is None:
        return None
    actions = [
        Tool(
            function_declarations=[
                _action_to_func_dec(action) for action in action_space
            ]
        )
    ]
    return actions


def _clear_schema(schema_dict: dict) -> None:
    schema_dict.pop("title", None)
    p_type = schema_dict.pop("type", None)
    for prop in schema_dict.get("properties", {}).values():
        _clear_schema(prop)
    if p_type is not None:
        schema_dict["type_"] = p_type.upper()
    if "items" in schema_dict:
        _clear_schema(schema_dict["items"])


def _action_to_func_dec(action: Action) -> FunctionDeclaration:
    "Converts crab Action to google FunctionDeclaration"
    p_schema = action.parameters.model_json_schema()
    if "$defs" in p_schema:
        p_schema = json_expand_refs(p_schema)
    _clear_schema(p_schema)
    if not p_schema["properties"]:
        return FunctionDeclaration(
            name=action.name,
            description=action.description,
        )
    return FunctionDeclaration(
        name=action.name,
        description=action.description,
        parameters=p_schema,
    )


================================================
FILE: crab/agents/backend_models/openai_model.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import json
from typing import Any

from crab import Action, ActionOutput, BackendModel, BackendOutput, Message, MessageType
from crab.agents.utils import extract_text_and_code_prompts

try:
    import openai
    from openai.types.chat import ChatCompletionMessage

    openai_model_enable = True
except ImportError:
    openai_model_enable = False


class OpenAIModel(BackendModel):
    def __init__(
        self,
        model: str,
        parameters: dict[str, Any] | None = None,
        history_messages_len: int = 0,
        tool_call_required: bool = True,
        base_url: str | None = None,
        api_key: str | None = None,
    ) -> None:
        if not openai_model_enable:
            raise ImportError("Please install openai to use OpenAIModel")

        self.model = model
        self.parameters = parameters if parameters is not None else {}
        self.history_messages_len = history_messages_len

        assert self.history_messages_len >= 0

        self.client = openai.OpenAI(api_key=api_key, base_url=base_url)
        self.tool_call_required: bool = tool_call_required
        self.system_message: str = "You are a helpful assistant."
        self.openai_system_message = {
            "role": "system",
            "content": self.system_message,
        }
        self.action_space: list[Action] | None = None
        self.action_schema: list[dict] | None = None
        self.token_usage: int = 0
        self.chat_history: list[list[ChatCompletionMessage | dict]] = []
        self.support_tool_call = True

    def reset(self, system_message: str, action_space: list[Action] | None) -> None:
        self.system_message = system_message
        self.openai_system_message = {
            "role": "system",
            "content": system_message,
        }
        self.action_space = action_space
        self.action_schema = _convert_action_to_schema(self.action_space)
        self.token_usage = 0
        self.chat_history = []

    def chat(self, message: list[Message] | Message) -> BackendOutput:
        if isinstance(message, tuple):
            message = [message]
        request = self._fetch_from_memory()
        new_message = self._construct_new_message(message)
        request.append(new_message)
        response_message = self._call_api(request)
        self._record_message(new_message, response_message)
        return self._generate_backend_output(response_message)

    def get_token_usage(self):
        return self.token_usage

    def _record_message(
        self, new_message: dict, response_message: ChatCompletionMessage
    ) -> None:
        self.chat_history.append([new_message])
        self.chat_history[-1].append(response_message)

        if self.action_schema and response_message.tool_calls is not None:
            for tool_call in response_message.tool_calls:
                self.chat_history[-1].append(
                    {
                        "tool_call_id": tool_call.id,
                        "role": "tool",
                        "name": tool_call.function.name,
                        "content": "success",
                    }
                )  # extend conversation with function response

    def _call_api(
        self, request_messages: list[ChatCompletionMessage | dict]
    ) -> ChatCompletionMessage:
        if self.action_schema is not None:
            response = self.client.chat.completions.create(
                messages=request_messages,  # type: ignore
                model=self.model,
                tools=self.action_schema,
                tool_choice="required" if self.tool_call_required else "auto",
                **self.parameters,
            )
        else:
            response = self.client.chat.completions.create(
                messages=request_messages,  # type: ignore
                model=self.model,
                **self.parameters,
            )

        self.token_usage += response.usage.total_tokens
        return response.choices[0].message

    def _fetch_from_memory(self) -> list[ChatCompletionMessage | dict]:
        request: list[ChatCompletionMessage | dict] = [self.openai_system_message]
        if self.history_messages_len > 0:
            fetch_history_len = min(self.history_messages_len, len(self.chat_history))
            for history_message in self.chat_history[-fetch_history_len:]:
                request = request + history_message
        return request

    def _construct_new_message(self, message: list[Message]) -> dict[str, Any]:
        new_message_content: list[dict[str, Any]] = []
        for content, msg_type in message:
            match msg_type:
                case MessageType.TEXT:
                    new_message_content.append(
                        {
                            "type": "text",
                            "text": content,
                        }
                    )
                case MessageType.IMAGE_JPG_BASE64:
                    new_message_content.append(
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/jpeg;base64,{content}",
                                "detail": "high",
                            },
                        }
                    )

        return {"role": "user", "content": new_message_content}

    def _generate_backend_output(
        self, response_message: ChatCompletionMessage
    ) -> BackendOutput:
        if response_message.tool_calls is None:
            return BackendOutput(message=response_message.content, action_list=None)
        action_list = [
            ActionOutput(
                name=call.function.name,
                arguments=json.loads(call.function.arguments),
            )
            for call in response_message.tool_calls
        ]
        return BackendOutput(
            message=response_message.content,
            action_list=action_list,
        )


def _convert_action_to_schema(
    action_space: list[Action] | None,
) -> list[dict] | None:
    if action_space is None:
        return None
    actions = []
    for action in action_space:
        new_action = action.to_openai_json_schema()
        actions.append({"type": "function", "function": new_action})
    return actions


class OpenAIModelJSON(OpenAIModel):
    def __init__(
        self,
        model: str,
        parameters: dict[str, Any] = dict(),
        history_messages_len: int = 0,
        base_url: str | None = None,
        api_key: str | None = None,
    ) -> None:
        super().__init__(
            model,
            parameters,
            history_messages_len,
            False,
            base_url,
            api_key,
        )
        self.support_tool_call = False

    def reset(self, system_message: str, action_space: list[Action] | None) -> None:
        super().reset(system_message, action_space)
        self.action_schema = None

    def _record_message(
        self, new_message: dict, response_message: ChatCompletionMessage
    ) -> None:
        self.chat_history.append([new_message])
        self.chat_history[-1].append(
            {"role": "assistant", "content": response_message.content}
        )

    def _generate_backend_output(
        self, response_message: ChatCompletionMessage
    ) -> BackendOutput:
        content = response_message.content
        text_list, code_list = extract_text_and_code_prompts(content)

        action_list = []
        try:
            for code_block in code_list:
                action_object = json.loads(code_block)
                action_list.append(
                    ActionOutput(
                        name=action_object["name"], arguments=action_object["arguments"]
                    )
                )
        except json.JSONDecodeError as e:
            raise RuntimeError(f"Failed to parse code block: {code_block}") from e
        except KeyError as e:
            raise RuntimeError(f"Received invalid action format: {code_block}") from e

        return BackendOutput(
            message="".join(text_list),
            action_list=action_list,
        )


class SGlangOpenAIModelJSON(OpenAIModelJSON):
    def _construct_new_message(self, message: list[Message]) -> dict[str, Any]:
        new_message_content: list[dict[str, Any]] = []
        image_count = 0
        for _, msg_type in message:
            if msg_type == MessageType.IMAGE_JPG_BASE64:
                image_count += 1
        for content, msg_type in message:
            match msg_type:
                case MessageType.TEXT:
                    new_message_content.append(
                        {
                            "type": "text",
                            "text": content,
                        }
                    )
                case MessageType.IMAGE_JPG_BASE64:
                    image_content = {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/png;base64,{content}",
                            "detail": "high",
                        },
                    }
                    if image_count > 1:
                        image_content["modalities"] = "multi-images"
                    new_message_content.append(image_content)

        return {"role": "user", "content": new_message_content}


================================================
FILE: crab/agents/policies/__init__.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# ruff: noqa: F401
from .multi_agent_by_env import MultiAgentByEnvPolicy
from .multi_agent_by_func import MultiAgentByFuncPolicy
from .single_agent import SingleAgentPolicy


================================================
FILE: crab/agents/policies/multi_agent_by_env.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from crab import Action, ActionOutput
from crab.agents.backend_models import BackendModelConfig, create_backend_model
from crab.agents.utils import generate_action_prompt
from crab.core.agent_policy import AgentPolicy
from crab.core.backend_model import (
    BackendModel,
    MessageType,
)


class MultiAgentByEnvPolicy(AgentPolicy):
    _main_agent_prompt = """You are a main agent, and your goal is to plan and
    give instructions to sub-agents in each environment to complete the final task. Now
    you have to do a task as described below: {task_description}.  The description of
    each given environment: {env_description}.  For each step, you are required to
    provide high-level instructions detailing the next actions to be taken.
    Additionally, you must specify which sub-agent in the designated environment should
    execute these instructions. If a sub-agent is not needed for a particular step, you
    may instruct it to skip that step."""

    _env_agent_prompt = """You are a sub-agent responsible for the {environment}
    environment.  The description of the {environment} environment is:
    {env_description}.  Your goal is to assist the main agent in completing the final
    task by performing actions in the {environment} environment according to the
    instructions from the main agent. The final task is described below:
    {task_description}. A unit operation you can perform is called action in a given
    environment. You can only execute action in the {environment} environment. For the
    {environment} environment, you are given a limited action space as function calls:
    {action_descriptions}
    The interactive UI elements on the screenshot are labeled with numeric tags starting
    from 1. For each step, You will receive an instruction telling you what you need to
    do next. After analyzing the instruction you received and the current {environment}
    system, if you think you don't need to do anything in the current {environment}
    system, you should choose SKIP action. Otherwise, you must state what actions to
    take, what the parameters are, and you MUST provide in which environment to perform
    these actions. Your answer must be function calls. Please do not output any other
    information. You must make sure all function calls get their required parameters."""

    _root_agent_prompt = """You are a sub-agent responsible for the crab benchmark root
    environment. Your goal is to assist the main agent in completing the whole task:
    "{task_description}". You can only complete the task or submit the result when the
    main agent tells you the whole task has been completed. Otherwise, you can only call
    SKIP.  """

    def __init__(
        self,
        main_agent_model_backend: BackendModelConfig,
        env_agent_model_backend: BackendModelConfig,
    ):
        self.main_agent_model_backend = create_backend_model(main_agent_model_backend)
        self.env_agent_model_backend_config = env_agent_model_backend
        self.reset(task_description="", action_spaces={}, env_descriptions={})

    def reset(
        self,
        task_description: str,
        action_spaces: dict[str, list[Action]],
        env_descriptions: dict[str, str],
    ) -> list:
        self.task_description = task_description
        main_agent_system_message = self._main_agent_prompt.format(
            task_description=task_description,
            env_description=str(env_descriptions),
        )
        self.main_agent_model_backend.reset(main_agent_system_message, None)

        root_agent_system_message = self._root_agent_prompt.format(
            task_description=task_description
        )
        self.env_agent_model_backends: dict[str, BackendModel] = {}
        for env in action_spaces:
            backend = create_backend_model(self.env_agent_model_backend_config)
            if env == "root":
                backend.reset(root_agent_system_message, action_spaces[env])
            else:
                backend.require_tool = True
                env_agent_system_message = self._env_agent_prompt.format(
                    task_description=task_description,
                    environment=env,
                    env_description=env_descriptions[env],
                    action_descriptions=generate_action_prompt(action_spaces[env]),
                )
                backend.reset(env_agent_system_message, action_spaces[env])
            self.env_agent_model_backends[env] = backend

    def get_token_usage(self):
        result = 0
        result += self.main_agent_model_backend.get_token_usage()
        for env_agent in self.env_agent_model_backends.values():
            result += env_agent.get_token_usage()
        return result

    def get_backend_model_name(self):
        return (
            self.main_agent_model_backend.__class__.__name__
            + "_"
            + self.main_agent_model_backend.model
        )

    def chat(
        self,
        observation: dict[str, list[tuple[str, MessageType]]],
    ) -> list[ActionOutput]:
        main_prompt = []
        for env in observation:
            main_prompt.extend(observation[env])
        main_prompt.append(
            (
                (
                    f"Your target: {self.task_description}\n"
                    "Tell me the next step in each environment."
                ),
                MessageType.TEXT,
            )
        )
        output = self.main_agent_model_backend.chat(main_prompt)
        main_agent_message = (
            f"The instruction from main agent for this step: {output.message}"
        )

        tool_calls = []
        for env in self.env_agent_model_backends:
            backend = self.env_agent_model_backends[env]
            if env in observation:
                output = backend.chat(
                    observation[env] + [(main_agent_message, MessageType.TEXT)]
                )
            else:
                output = backend.chat((main_agent_message, MessageType.TEXT))
            for action in output.action_list:
                action.env = env
            tool_calls.extend(output.action_list)
        return tool_calls


================================================
FILE: crab/agents/policies/multi_agent_by_func.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from crab.agents.backend_models import BackendModelConfig, create_backend_model
from crab.agents.utils import (
    combine_multi_env_action_space,
    decode_combined_action,
    generate_action_prompt,
)
from crab.core import Action, ActionOutput
from crab.core.agent_policy import AgentPolicy
from crab.core.backend_model import MessageType


class MultiAgentByFuncPolicy(AgentPolicy):
    _system_prompt = """You are a helpful assistant. Now you have to do a task as
    described below: {task_description}. And this is the description of each given
    environment: {env_description}. A unit operation you can perform is called action in
    a given environment. For each environment, you are given a limited action space as
    function calls:
    {action_descriptions}
    You may receive a screenshot of the current system. The interactive UI elements on
    the screenshot are labeled with numeric tags starting from 1. For each step, You
    must state what actions to take, what the parameters are, and you MUST provide in
    which environment to perform these actions. """

    _tool_prompt = """You are a helpful assistant in generating function calls. I will
    give you a detailed description of what actions to take next, you should translate
    it into function calls. please do not output any other information.
    """

    def __init__(
        self,
        main_agent_model_backend: BackendModelConfig,
        tool_agent_model_backend: BackendModelConfig,
    ):
        self.main_agent_model_backend = create_backend_model(main_agent_model_backend)
        self.tool_agent_model_backend = create_backend_model(tool_agent_model_backend)
        self.reset(task_description="", action_spaces=None, env_descriptions={})

    def reset(
        self,
        task_description: str,
        action_spaces: dict[str, list[Action]],
        env_descriptions: dict[str, str],
    ) -> list[ActionOutput]:
        self.task_description = task_description
        self.action_space = combine_multi_env_action_space(action_spaces)

        main_agent_system_message = self._system_prompt.format(
            task_description=task_description,
            action_descriptions=generate_action_prompt(self.action_space),
            env_description=str(env_descriptions),
        )
        self.main_agent_model_backend.reset(main_agent_system_message, None)
        self.tool_agent_model_backend.reset(self._tool_prompt, self.action_space)

    def get_token_usage(self):
        return (
            self.main_agent_model_backend.get_token_usage()
            + self.tool_agent_model_backend.get_token_usage()
        )

    def get_backend_model_name(self):
        return (
            self.main_agent_model_backend.__class__.__name__
            + "_"
            + self.main_agent_model_backend.model
        )

    def chat(
        self,
        observation: dict[str, list[tuple[str, MessageType]]],
    ) -> list[ActionOutput]:
        prompt = []
        for env in observation:
            prompt.extend(observation[env])
        prompt.append(
            (
                f"Your target: {self.task_description}\nTell me the next action.",
                MessageType.TEXT,
            )
        )
        output = self.main_agent_model_backend.chat(prompt)
        tool_output = self.tool_agent_model_backend.chat(
            (output.message, MessageType.TEXT)
        )
        return decode_combined_action(tool_output.action_list)


================================================
FILE: crab/agents/policies/single_agent.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import logging

from crab import Action, ActionOutput
from crab.agents.backend_models import BackendModelConfig, create_backend_model
from crab.agents.utils import (
    combine_multi_env_action_space,
    decode_combined_action,
    generate_action_prompt,
)
from crab.core.agent_policy import AgentPolicy
from crab.core.backend_model import (
    MessageType,
)
from crab.utils.measure import timed

logger = logging.getLogger(__name__)


class SingleAgentPolicy(AgentPolicy):
    _system_prompt_with_function_call = """\
    You are a helpful assistant. Now you have to do a task as described below: 

    **"{task_description}."**

    You should never forget this task and always perform actions to achieve this task. 
    And this is the description of each given environment: {env_description}. A
    unit operation you can perform is called Action. You have a limited action space as
    function calls:
    {action_descriptions}
    You may receive a screenshot of the current system. You may receive a screenshot of
    a smartphone app. The interactive UI elements on the screenshot are labeled with
    numeric tags starting from 1. 

    In each step, You MUST explain what do you see from the current observation and the
    plan of the next action, then use a provided action in each step to achieve the
    task. You should state what action to take and what the parameters should be. Your
    answer MUST be a least one function call. You SHOULD NEVER ask me to do anything for
    you. Always do them by yourself using function calls.
    """

    _system_prompt_no_function_call = """\
    You are a helpful assistant. Now you have to do a task as described below: 

    **"{task_description}."**

    You should never forget this task and always perform actions to achieve this task. 
    And this is the description of each given environment: {env_description}. You will
    receive screenshots of the environments. The interactive UI elements on the
    screenshot are labeled with numeric tags starting from 1. 

    A unit operation you can perform is called Action. You have a limited action space
    as function calls: {action_descriptions}. You should generate JSON code blocks to
    execute the actions. Each code block MUST contains only one json object, i.e. one
    action. You can output multiple code blocks to execute multiple actions in a single
    step. You must follow the JSON format below to output the action. 
    ```json
    {{"name": "action_name", "arguments": {{"arg1": "value1", "arg2": "value2"}}}}
    ```
    or if not arguments needed:
    ```json
    {{"name": "action_name", "arguments": {{}}}}
    ```
    You MUST use exactly the same "action_name" as I gave to you in the action space.
    You SHOULDN'T add any comments in the code blocks.

    In each step, You MUST explain what do you see from the current observation and the
    plan of the next action, then use a provided action in each step to achieve the
    task. You should state what action to take and what the parameters should be. Your
    answer MUST contain at least one code block. You SHOULD NEVER ask me to do anything
    for you. Always do them by yourself.
    """

    def __init__(
        self,
        model_backend: BackendModelConfig,
        function_call: bool = True,
    ):
        self.model_backend = create_backend_model(model_backend)
        self.function_call = function_call
        if not self.model_backend.support_tool_call and self.function_call:
            logger.warning(
                "The backend model does not support tool call: {}".format(
                    model_backend.model_name
                )
                + "\nFallback to no function call mode."
            )
            self.function_call = False
        if self.function_call:
            self.system_prompt = self._system_prompt_with_function_call
        else:
            self.system_prompt = self._system_prompt_no_function_call
        self.reset(task_description="", action_spaces=None, env_descriptions={})

    def reset(
        self,
        task_description: str,
        action_spaces: dict[str, list[Action]],
        env_descriptions: dict[str, str],
    ) -> list:
        self.task_description = task_description
        self.action_space = combine_multi_env_action_space(action_spaces)
        system_message = self.system_prompt.format(
            task_description=task_description,
            action_descriptions=generate_action_prompt(
                self.action_space,
                expand=not self.function_call,
            ),
            env_description=str(env_descriptions),
        )
        if self.function_call:
            self.model_backend.reset(system_message, self.action_space)
        else:
            self.model_backend.reset(system_message, None)

    def get_token_usage(self):
        return self.model_backend.get_token_usage()

    def get_backend_model_name(self):
        return self.model_backend.__class__.__name__ + "_" + self.model_backend.model

    @timed
    def chat(
        self,
        observation: dict[str, list[tuple[str, MessageType]]],
    ) -> list[ActionOutput]:
        prompt = []
        for env in observation:
            prompt.extend(observation[env])
        prompt.append(
            (
                f"Your target: {self.task_description}\nTell me the next action.",
                MessageType.TEXT,
            )
        )
        output = self.model_backend.chat(prompt)
        # print("Agent Message: " + output.message, flush=True)
        # print("Agent Action: " + str(output.action_list), flush=True)
        return decode_combined_action(output.action_list)


================================================
FILE: crab/agents/utils.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from crab.core import Action, ActionOutput


def combine_multi_env_action_space(
    action_space: dict[str, list[Action]] | None,
) -> list[Action]:
    """Combine multi-env action space together to fit in a single agent."""
    result = []
    if action_space is None:
        return result
    for env in action_space:
        for action in action_space[env]:
            new_action = action.model_copy()
            new_action.name = new_action.name + "_in_" + env
            new_action.description = f"In {env} environment, " + new_action.description
            result.append(new_action)
    return result


def decode_combined_action(
    output_actions: list[ActionOutput],
) -> list[ActionOutput]:
    """Decode combined action output to action output with the corresponding
    environment.
    """
    result = []
    for output in output_actions:
        name_env = output.name.split("_in_")
        if len(name_env) != 2:
            raise RuntimeError(
                'The decoded action name should contain the splitter "_in_".'
            )
        new_output = output.model_copy()
        new_output.name = name_env[0]
        new_output.env = name_env[1]
        result.append(new_output)
    return result


def generate_action_prompt(action_space: list[Action], expand: bool = False) -> str:
    if expand:
        return "".join(
            [
                f"[**{action.name}**:\n"
                f"action description: {action.description}\n"
                f"action arguments json schema: {action.to_openai_json_schema()}\n"
                "]\n"
                for action in action_space
            ]
        )
    else:
        return "".join(
            [f"[{action.name}: {action.description}]\n" for action in action_space]
        )


def extract_text_and_code_prompts(content: str) -> tuple[list[str], list[str]]:
    r"""Extract text and code prompts from the message content.

    Returns:
        A tuple (text_list, code_list) where, text_list is a list of text and  code_list
        is a list of extracted codes both from the content.
    """
    text_prompts: list[str] = []
    code_prompts: list[str] = []

    lines = content.split("\n")
    idx = 0
    start_idx = 0
    while idx < len(lines):
        while idx < len(lines) and (not lines[idx].lstrip().startswith("```")):
            idx += 1
        text = "\n".join(lines[start_idx:idx]).strip()
        text_prompts.append(text)

        if idx >= len(lines):
            break

        # code_type = lines[idx].strip()[3:].strip()
        idx += 1
        start_idx = idx
        while not lines[idx].lstrip().startswith("```") and idx < len(lines):
            idx += 1
        if idx >= len(lines):
            break
        code = "\n".join(lines[start_idx:idx]).strip()
        code_prompts.append(code)

        idx += 1
        start_idx = idx

    return text_prompts, code_prompts


================================================
FILE: crab/benchmarks/__init__.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========


================================================
FILE: crab/benchmarks/template.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import networkx as nx

from crab import BenchmarkConfig, Task, action, evaluator
from crab.environments.template import set_state, template_environment_config


@evaluator
def is_system_state(env) -> bool:
    return env.state


@evaluator(env_name="root")
def check_submit_true(env) -> bool:
    if env.trajectory:
        action_name, params, _ = env.trajectory[-1]
        print(action_name, params)
        if action_name == "_submit" and params["content"]:
            return True
    return False


@action(env_name="root")
def _submit(content: bool) -> None:
    """Submit your answer through this function.

    Args:
        content: the content to submit
    """
    pass


template_benchmark_config = BenchmarkConfig(
    name="template_benchmark",
    environments=[template_environment_config],
    tasks=[
        Task(
            id="0",
            description="Set the system state to True.",
            evaluator=is_system_state,
            setup=set_state(False),
        ),
        Task(
            id="1",
            description="Submit True.",
            evaluator=check_submit_true,
            extra_action=[_submit],
        ),
    ],
)


@evaluator(env_name="testenv0")
def check_sys0(env) -> bool:
    return env.state


@evaluator(env_name="testenv1")
def check_sys1(env) -> bool:
    return env.state


@evaluator(env_name="testenv2")
def check_sys2(env) -> bool:
    return env.state


eval_g = nx.DiGraph()
eval_g.add_edge(check_sys0, check_submit_true)
eval_g.add_edge(check_sys1, check_submit_true)
eval_g.add_edge(check_sys2, check_submit_true)

multienv_template_benchmark_config = BenchmarkConfig(
    name="mutlienv_template_benchmark",
    environments=[
        template_environment_config.model_copy(update={"name": f"testenv{idx}"})
        for idx in range(3)
    ],
    tasks=[
        Task(
            id="0",
            description=(
                "Set the system state to True in all three environments. "
                "Then submit True to finish the project."
            ),
            evaluator=eval_g,
            extra_action=[_submit],
        )
    ],
    multienv=True,
)


================================================
FILE: crab/core/__init__.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# ruff: noqa: F401, F403
from .agent_policy import AgentPolicy
from .backend_model import BackendModel
from .benchmark import Benchmark, create_benchmark
from .decorators import action, evaluator
from .environment import Environment, create_environment
from .experiment import Experiment
from .graph_evaluator import Evaluator, GraphEvaluator
from .models import *
from .task_generator import TaskGenerator


================================================
FILE: crab/core/agent_policy.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from abc import ABC, abstractmethod

from .models import Action, ActionOutput, Message


class AgentPolicy(ABC):
    @abstractmethod
    def chat(
        self,
        observation: dict[str, list[Message]],
    ) -> list[ActionOutput]: ...

    @abstractmethod
    def reset(
        self,
        task_description: str,
        action_spaces: dict[str, list[Action]],
        env_descriptions: dict[str, str],
    ) -> None: ...

    @abstractmethod
    def get_token_usage(self) -> int: ...

    @abstractmethod
    def get_backend_model_name(self) -> str: ...


================================================
FILE: crab/core/backend_model.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from abc import ABC, abstractmethod

from .models import Action, BackendOutput, MessageType


class BackendModel(ABC):
    @abstractmethod
    def chat(self, contents: list[tuple[str, MessageType]]) -> BackendOutput: ...

    @abstractmethod
    def reset(
        self,
        system_message: str,
        action_space: list[Action] | None,
    ): ...

    @abstractmethod
    def get_token_usage(self): ...


================================================
FILE: crab/core/benchmark.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import traceback
from time import sleep
from typing import Any

from crab.core.graph_evaluator import GraphEvaluator
from crab.utils.measure import timed

from .environment import Environment, create_environment
from .exceptions import TaskNotFound
from .models import Action, BenchmarkConfig, ClosedAction, MessageType, StepResult, Task


class Benchmark:
    """The crab benchmark controller managing environments and agent evaluation.

    The class manages multiple environments together and provide the simple API by
    :meth:`step`, :meth:`observe` and :meth:`reset` for language model agents to perform
    tasks in multiple environments.

    This class introduces a "root" environment with no action or observation
    capabilities, intended as a utility for evaluations not directly tied to a specific
    environment.

    This class operates in two distinct modes: "multi-environment" and
    "single-environment".  In multi-environment mode, observations and action results
    are separated by environment, returned as a dictionary. While in single-environment
    mode, all observations and action outcomes are merged under the "root" environment,
    with actions being appropriately routed to their respective environments.

    """

    def __init__(
        self,
        name: str,
        tasks: list[Task],
        environments: list[Environment],
        default_env: str | None = None,
        multienv: bool = False,
        prompting_tools: dict[str, dict[str, Action]] = {},
        root_action_space: list[Action] = [],
        step_limit: int = 30,
        common_setup: list[ClosedAction] = [],
    ) -> None:
        """Initializes the instance.

        Args:
            name: Identifier for the benchmark.
            tasks: Tasks to be executed within the benchmark.
            environments: Environments in which the benchmark is conducted.
            default_env: The default environment name, applied when actions do not
                specify an environment. Defaults to "root" in the multi-environment mode
                and to the environment in the single environment mode.
            multienv: Indicates whether to enable multi-environment mode. Defaults to
                :obj:`False`.
            prompting_tools: Prompting tools applied in :meth:`observe_with_prompt`. The
                first level keys are environment names, the second level keys are
                observation action names. Defaults to empty.
            root_action_space: The action space executed in the root environment.
        """
        self.name = name
        self.tasks = tasks
        self.multienv = multienv
        self.prompting_tools = prompting_tools
        self.step_limit = step_limit
        self.common_setup = common_setup

        if isinstance(environments, Environment):
            environments = [environments]
        self.root_env = Environment(
            name="root",
            action_space=root_action_space,
            observation_space=[],
            description="The crab benchmark root. You can submit your answer or "
            "complete the task using this environment.",
        )
        self.root_env.contained_envs = {env.name: env for env in environments}  # A hack
        environments.append(self.root_env)
        self.environment_map: dict[str, Environment] = {
            env.name: env for env in environments
        }

        # if not multienv, combine all environments action space together
        if not self.multienv:
            # action_map is used only by "agent", specifically `step` and
            # `export_action_space` functions
            self._verify_spaces()
            self._generate_action_map()

        # default_env is used for predefined actions without env_name or like
        # evaluators setups, teardowns, and so on.
        if default_env is None:
            if not multienv and len(environments) == 2:
                self.default_env = environments[0].name
            else:
                self.default_env = self.root_env.name
        else:
            self.default_env = default_env

        self.current_task: Task | None = None
        self.current_evaluator: GraphEvaluator | None = None
        self.step_cnt = 0

    def start_task(self, task_id: str) -> tuple[Task, dict[str, list[Action]]]:
        """Initializes and starts a specified task.

        Args:
            task_id: The ID of the task to start.

        Returns:
            A tuple (task, action_space), where task is the started task object, and
            action_sapce is a dict mapping action names to the corresponding action
            object.
        """
        if self.current_task is not None:
            raise RuntimeError("Another task is running")
        self.current_task = self._get_task_by_id(task_id)

        # reset all environments
        self._reset_environments()

        for action in self.common_setup:
            self._take_env_action(action)

        # select environment by Action.env_name
        for action in self.current_task.setup:
            self._take_env_action(action)

        for task_action in self.current_task.extra_action:
            self._set_env_action(task_action)

        # reset evaluator
        self.current_evaluator = GraphEvaluator(self.current_task.evaluator)
        # put submit action to corresponding env space
        # For now, only the last node can be the submit task

        self.step_cnt = 0
        return self.current_task, self.export_action_space()

    def close_task(self) -> None:
        """Cleans up after a task is completed."""
        if self.current_evaluator is None or self.current_task is None:
            raise RuntimeError("There is no started task.")
        for action in self.current_task.teardown:
            self._take_env_action(action)
        self.current_task = None

    def get_env_descriptions(self) -> dict[str, str]:
        """Get environment descriptions as a dict structure."""
        return {
            name: self.environment_map[name].description
            for name in self.environment_map
        }

    def observe(self) -> dict[str, dict[str, Any]]:
        """Collects observations from all environments.

        Returns:
            A dict-of-dict with observations from each environment. The first level keys
            are environment names, the second level keys are observation action names.
        """
        env_obs = {env.name: env.observe() for env in self.environment_map.values()}
        if self.multienv:
            return env_obs
        return self._merge_dicts(env_obs)

    @timed
    def observe_with_prompt(
        self,
    ) -> tuple[dict[str, dict[str, Any]], dict[str, tuple[str, MessageType]]]:
        """Collects observations and applies prompting tools.

        Returns:
            A tuple (observations, prompts), where "observations" and "prompts" are
            observations from each environment and the result of applying prompting
            tools on them. The first level keys are environment names, the second level
            keys are observation action names. Notice that some dicts can be empty if
            its prompting tool wasn't set.
        """
        observations = {}
        prompts = {}
        for env_name, env in self.environment_map.items():
            if env_name in self.prompting_tools:
                tools = self.prompting_tools[env_name]
            else:
                tools = {}
            observations[env_name], prompts[env_name] = env.observe_with_prompt(tools)
        if self.multienv:
            return observations, prompts
        return self._merge_dicts(observations), self._merge_dicts(prompts)

    def evaluate(self):
        self.current_evaluator.step(self.environment_map, self.default_env)
        return self.current_evaluator.stat()

    @timed
    def step(
        self,
        action: str,
        parameters: dict[str, Any] = {},
        env_name: str | None = None,
    ) -> StepResult:
        """Executes a step in the benchmark by performing an action.

        Args:
            action: The action to execute.
            parameters: Parameters for the action.
            env_name: The name of the environment.

        Returns:
            The result of the step including observations and evaluation metrics. Notice
            that the `truncated` field in the result is not meaningful for now.
        """
        terminated = False
        info = {}
        if self.current_evaluator is None or self.current_task is None:
            raise RuntimeError("There is no started task.")

        if action == "complete":
            terminated = True
            info["terminate_reason"] = "agent_complete"
            return StepResult(
                truncated=False,
                terminated=True,
                action_returns=None,
                evaluation_results=self.current_evaluator.stat(),
                info=info,
            )

        try:
            environment = self._get_env(env_name=env_name, action_name=action)
        except Exception:
            print(traceback.format_exc())
            terminated = True
            info["terminate_reason"] = "action_format_error"
            info["exception_detail"] = traceback.format_exc()
            environment.reset()
            self.close_task()
            return StepResult(
                truncated=False,
                terminated=True,
                action_returns=None,
                evaluation_results=self.current_evaluator.stat(),
                info=info,
            )
        try:
            action_returns = environment.step(action, parameters)
        except Exception:
            print(traceback.format_exc())
            terminated = True
            info["terminate_reason"] = "env_exception"
            info["exception_detail"] = traceback.format_exc()
            environment.reset()
            self.close_task()
            return StepResult(
                truncated=False,
                terminated=True,
                action_returns=None,
                evaluation_results=self.current_evaluator.stat(),
                info=info,
            )

        try:
            evaluation_results = self.evaluate()
        except Exception:
            print(traceback.format_exc())
            terminated = True
            info["terminate_reason"] = "evaluator_exception"
            info["exception_detail"] = traceback.format_exc()
            environment.reset()
            self.close_task()
            return StepResult(
                truncated=False,
                terminated=True,
                action_returns=action_returns,
                evaluation_results=self.current_evaluator.stat(),
                info=info,
            )

        self.step_cnt += 1
        if self.current_evaluator.is_complete():
            terminated = True
            info["terminate_reason"] = "success"
        if self.step_cnt >= self.step_limit:
            terminated = True
            info["terminate_reason"] = "reach_max_step"
        if terminated:
            environment.reset()
            self.close_task()
        return StepResult(
            truncated=False,
            terminated=terminated,
            action_returns=action_returns,
            evaluation_results=evaluation_results,
            info=info,
        )

    def reset(self) -> None:
        """Resets all environments and the current task."""
        self.current_evaluator = None
        self._reset_environments()

    def human_evaluation(self, task_id: str) -> None:
        task, _ = self.start_task(task_id)
        print(task.description)

        self.current_evaluator.human_mode = True

        evaluation_results = self.evaluate()
        print(evaluation_results, end="")
        while evaluation_results["completeness"] != 1.0:
            sleep(2)
            evaluation_results = self.evaluate()
            print("\r" + str(evaluation_results), end="")
        self.close_task()

    def export_action_space(self) -> dict[str, list[Action]]:
        """Returns the action spaces from all environments.

        Returns:
            A dict of action lists for each environment, keyed by environment name.
        """
        result = {env.name: env.action_space for env in self.environment_map.values()}
        if self.multienv:
            return result
        return self._merge_lists(result)

    def _verify_spaces(self) -> None:
        """Make sure all actions and observations are unique."""
        observation_name_set = set()
        action_name_set = set()
        for env in self.environment_map.values():
            for action in env.action_space:
                if action.name in action_name_set:
                    raise ValueError(
                        "Dulplicated action names are not allowed in single "
                        "environment benchmark."
                    )
                action_name_set.add(action.name)
            for observation in env.observation_space:
                if observation.name in observation_name_set:
                    raise ValueError(
                        "Dulplicated observation names are not allowed in the "
                        "single environment benchmark."
                    )
                observation_name_set.add(observation.name)

    def _generate_action_map(self) -> None:
        self.action_map: dict[str, Environment] = {}
        for env in self.environment_map.values():
            for action in env.action_space:
                self.action_map[action.name] = env

    def _get_env(
        self, env_name: str | None = None, action_name: str | None = None
    ) -> Environment:
        # env_name exists just return it
        if env_name is not None:
            return self.environment_map[env_name]
        # or in multienv use default env, in singlenev use action_name mapping
        if action_name is not None and not self.multienv:
            return self.action_map[action_name]
        return self.environment_map[self.default_env]

    def _take_env_action(self, action: Action) -> Any:
        if action.env_name is None:
            env = self.environment_map[self.default_env]
        else:
            env = self.environment_map[action.env_name]
        return env.take_action(action)

    def _set_env_action(self, action: Action) -> None:
        if action.env_name is None:
            env = self.environment_map[self.default_env]
        else:
            env = self.environment_map[action.env_name]
        env.set_action(action)
        if not self.multienv:
            self.action_map[action.name] = env

    def _reset_environments(self):
        for env in self.environment_map.values():
            env.reset()
        if not self.multienv:
            self._generate_action_map()

    def _get_task_by_id(self, task_id: str) -> Task:
        result = [task for task in self.tasks if task_id == task.id]
        if len(result) == 0:  # Doesn't find the task
            raise TaskNotFound(f"No such task: {task_id}")
        return result[0]

    def _merge_dicts(
        self, env_dict: dict[str, dict[str, Any]]
    ) -> dict[str, dict[str, Any]]:
        "In single environment mode, merge aciton_space/observation_space in root."
        result = {}
        for dict_value in env_dict.values():
            result.update(dict_value)
        return {self.default_env: result}

    def _merge_lists(self, env_dict: dict[str, list]) -> dict[str, list]:
        "In single environment mode, merge aciton_space/observation_space in root."
        result = []
        for dict_value in env_dict.values():
            result.extend(dict_value)
        return {self.default_env: result}


def create_benchmark(config: BenchmarkConfig) -> Benchmark:
    """Creates a benchmark by BenchmarkConfig"""
    if isinstance(config, BenchmarkConfig):
        environments = [
            create_environment(env_config) for env_config in config.environments
        ]
        parameters = dict(config)
        parameters["environments"] = environments
        return Benchmark(**parameters)
    else:
        raise ValueError("Unsupport benchmark config type.")


================================================
FILE: crab/core/csv_log.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import csv
from pathlib import Path
from typing import Any


class CSVLog:
    def __init__(self, csv_path: Path, headers: list[str]) -> None:
        self.csv_path = csv_path
        self.header = headers
        if not csv_path.exists():
            with open(csv_path, "w", newline="") as file:
                writer = csv.writer(file)
                writer.writerow(headers)

    def write_row(self, data: list[Any]):
        assert len(data) == len(self.header)
        with open(self.csv_path, "a", newline="") as file:
            writer = csv.writer(file)
            writer.writerow(data)


================================================
FILE: crab/core/decorators.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from typing import Callable

from .models import Action, Evaluator


def _decorator(func, cls: type[Action], options: dict | None = None) -> Action:
    action = cls.from_function(func)
    if options is not None:
        for key in options:
            setattr(action, key, options[key])

    return action


def action(*args: Callable, env_name: str | None = None, local=False):
    """Use @action to change a function to an Action"""
    if args and callable(args[0]):
        return _decorator(args[0], Action)

    return lambda func: _decorator(func, Action, {"env_name": env_name, "local": local})


def evaluator(
    *args: Callable,
    require_submit: bool = False,
    env_name: str | None = None,
    local=False,
):
    """Use @evaluator to change a function to an Evaluator"""
    if args and callable(args[0]):
        return _decorator(args[0], Evaluator)

    return lambda func: _decorator(
        func,
        Evaluator,
        {"require_submit": require_submit, "env_name": env_name, "local": local},
    )


================================================
FILE: crab/core/environment.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import json
import logging
from typing import Any

from httpx import Client

from crab.utils import decrypt_message, encrypt_message, generate_key_from_env
from crab.utils.measure import timed

from .exceptions import ActionNotFound
from .models import Action, ClosedAction, EnvironmentConfig

logger = logging.getLogger("crab-server")


class Environment:
    """
    A crab environment for language model agent interaction and evaluation.

    This class supports action execution and observation within a simulated or actual
    ecosystem. The environment is defined by customizable action and observation spaces,
    comprising various crab actions. Actions should include comprehensive docstrings to
    facilitate agent understanding and interaction.

    Typically, users instantiate this class directly to perform actions within the local
    execution context (i.e., the device running the crab framework). This class may also
    serve as a base for specialized environments requiring unique action execution
    processes, such as forwarding actions to remote systems for execution. This is
    achieved by overriding the `take_action` method.

    Actions defined in the `action_space`, `observation_space`, or `reset`, as well as
    those invoked through the `take_action` method that include an `env` parameter, will
    have this parameter automatically populated with the current environment instance.
    This allows actions to access and manipulate environment states and variables.

    Attributes:
        name (str): The name of the environment.
        description (str): A description of the environment.
        trajectory (List[tuple[str, dict[str, Any], Any]]): A record of actions taken,
            their parameters, and the results.

    Args:
        name (str): The name of the environment.
        action_space (List[Action]): A list of actions that can be executed, defining
            the possible interactions agents can undertake.
        observation_space (List[ClosedAction]): A list of observations defining the
            possible states agents can perceive.
        description (str, optional): A textual description of the environment. Defaults
            to an empty string.
        reset (Action | None, optional): An action to reset the environment to its
            initial state. Defaults to `None`.
        remote_url (Action | None, optional): If set, the action will be taken at
            remote machine, by default it will be taken at local. Example:
            `http://192.168.1.1:8000`. Defaults to `None`.
    """

    def __init__(
        self,
        name: str,
        action_space: list[Action],
        observation_space: list[ClosedAction],
        description: str = "",
        reset: Action | None = None,
        remote_url: str | None = None,
        extra_attributes: dict[str, Any] = {},
    ) -> None:
        self.name = name
        self.description = description
        self.trajectory: list[tuple[str, dict[str, Any], Any]] = []
        self.observation_history: list[dict[str, Any]] = []

        self._origin_action_space = action_space
        self._observation_space = observation_space
        self._reset = reset
        self._action_map = {action.name: action for action in action_space}

        self._client: Client | None = None
        if remote_url is not None:
            self._client = Client(base_url=remote_url, timeout=60)
        for key, value in extra_attributes.items():
            setattr(self, key, value)

        self._enc_key = generate_key_from_env()

    def step(
        self,
        action_name: str,
        parameters: dict[str, Any] = {},
    ):
        """
        Executes an action that is in the action space and recorded to the trajectory.

        Args:
            action_name: Name of the action to execute. Must be in action space.
            parameters (dict[str, Any], optional): Parameters for the action. Defaults
                to an empty `dict`.

        Returns:
            Any: The result of the action execution.

        Raises:
            ActionNotFound: If the action is not found within the environment's action
                space.
        """
        if action_name not in self._action_map:
            logger.error(f'Env "{self.name}": receives unkown action "{action_name}"')
            raise ActionNotFound(f"Action {action_name} not found in the environment")
        action_handler = self._action_map[action_name]
        result = self.take_action(action_handler, parameters)
        self.trajectory.append((action_handler.name, parameters, result))
        return result

    def take_action(
        self,
        action: Action,
        parameters: dict[str, Any] = {},
    ) -> Any:
        """
        Executes an action within the environment.

        Args:
            action (Action): The action to execute. Can be an action name or an
                `Action` object.
            parameters (dict[str, Any], optional): Parameters for the action. Defaults
                to an empty `dict`.

        Returns:
            Any: The result of the action execution.
        """
        try:
            result = self._action_endpoint(action, parameters)
            logger.info(
                f'Env "{self.name}": action: "{action.name}" successed. '
                "result: {result}."
            )
            return result
        except:
            logger.exception(
                f'Env "{self.name}": action: "{action}" failed:', stack_info=True
            )
            raise

    @timed
    def observe(self) -> dict[str, Any]:
        """
        Observes the current state.

        Returns:
            Dict[str, Any]: A dictionary containing the current observations. Keys
                represent the names of the observation actions.
        """
        result = {o.name: self.take_action(o) for o in self.observation_space}
        self.observation_history.append(result)
        return result

    @timed
    def observe_with_prompt(
        self, prompt_tools: dict[str, Action]
    ) -> tuple[dict[str, Any], dict[str, Any]]:
        """
        Observes the current state with prompt.
        """
        observations = self.observe()
        prompts = {}
        for ob_name, value in observations.items():
            if ob_name in prompt_tools:
                action = prompt_tools[ob_name]
                key = next(iter(action.get_required_params()))
                prompts[ob_name] = self._action_endpoint(action, {key: value})
        return observations, prompts

    def set_action(self, action: Action) -> None:
        """
        Adds an action in the environment's action space, either replace if the action
        name exist.

        Args:
            action (Action): The action to replace or add.
        """
        self._action_map[action.name] = action

    def start(self) -> None:
        """Starts the environment."""
        pass

    def close(self) -> None:
        """Closes the environment, performing any necessary cleanup."""
        pass

    def reset(self) -> None:
        """Resets the environment based on the provided reset action"""
        self._action_space = self._origin_action_space
        self.action_map = {action.name: action for action in self._action_space}
        if self._reset is not None:
            self.take_action(self._reset)

    @property
    def action_space(self) -> list[Action]:
        return list(self._action_map.values())

    @property
    def observation_space(self) -> list[ClosedAction]:
        return self._observation_space

    def _action_endpoint(self, action: Action, parameters: dict[str, Any]):
        """Rewrite to support different environments."""
        if self._client is not None and not action.local:
            data = json.dumps(
                {
                    "action": action.to_raw_action(),
                    "parameters": action.parameters(**parameters).model_dump(),
                }
            )
            content_type = "application/json"
            if self._enc_key is not None:
                data = encrypt_message(data, self._enc_key)
                content_type = "text/plain"

            # send action to remote machine
            response = self._client.post(
                "/raw_action",
                content=data,
                headers={"Content-Type": content_type},
            )

            resp_content = response.content.decode("utf-8")
            if self._enc_key is not None:
                resp_content = decrypt_message(resp_content, self._enc_key)

            resp_json = json.loads(resp_content)
            return resp_json["action_returns"]
        else:
            # or directly execute it
            action = action.set_kept_param(env=self)
            return action.run(**parameters)


def create_environment(config):
    if isinstance(config, EnvironmentConfig):
        return Environment(**dict(config))
    else:
        raise ValueError("Unsupported environment config type.")


================================================
FILE: crab/core/exceptions.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
class ActionNotFound(ValueError):
    pass


class TaskNotFound(ValueError):
    pass


================================================
FILE: crab/core/experiment.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import json
import traceback
from datetime import datetime
from pathlib import Path
from time import sleep
from typing import Literal

from crab.utils.common import base64_to_image

from .agent_policy import AgentPolicy
from .benchmark import Benchmark
from .csv_log import CSVLog
from .models import ActionOutput, MessageType

CURRENT_EXPERIMENT_COLUMNS = [
    "step",
    "action",
    "total_nodes",
    "complete_nodes",
    "completeness",
    "completeness_per_action",
    "step_to_complete",
    "longest_unfinished_path_length",
    "token_usage",
]


MAIN_LOG_COLUMNS = [
    "time",
    "agent_policy",
    "model",
    "task_id",
    "total_steps",
    "terminate_reason",
    "total_nodes",
    "complete_nodes",
    "completeness",
    "completeness_per_action",
    "step_to_complete",
    "longest_unfinished_path_length",
    "token_usage",
]


class Experiment:
    def __init__(
        self,
        benchmark: Benchmark,
        task_id: str,
        agent_policy: AgentPolicy | Literal["human"],
        log_dir: Path | None = None,
    ) -> None:
        self.benchmark = benchmark
        self.task_id = task_id
        self.agent_policy = agent_policy
        self.log_dir = log_dir

    def write_message(self, message: str, step: int):
        with open(self.message_path, "a") as file:
            file.write("=" * 20 + f"Step: {step}" + "=" * 20 + "\n" + message + "\n")

    def write_task_info_json(self, task_info_path: Path):
        envs_info = {}
        for name, env in self.benchmark.environment_map.items():
            actions = {
                name: action.description for name, action in env._action_map.items()
            }
            observations = {
                action.name: action.description for action in env._observation_space
            }
            envs_info[name] = {
                "description": env.description,
                "actions": actions,
                "observations": observations,
            }
        task_info = {
            "benchmark_name": self.benchmark.name,
            "task_id": self.task_id,
            "task_description": self.task.description,
            "envs": envs_info,
        }
        with open(task_info_path, "w") as file:
            json.dump(task_info, file, indent=4)

    def init_log_dir(self):
        if self.log_dir is not None:
            self.log_dir.mkdir(exist_ok=True, parents=True)

            self.main_log = CSVLog(self.log_dir / "main_log.csv", MAIN_LOG_COLUMNS)

            self.task_info_dir = self.log_dir / self.task_id
            self.task_info_dir.mkdir(exist_ok=True, parents=True)
            self.write_task_info_json(self.task_info_dir / "task_info.json")

            self.time_now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
            self.current_experiment_dir = (
                self.task_info_dir / f"{self.agent_policy.__class__.__name__}"
                f"({self.agent_policy.get_backend_model_name()})" / self.time_now
            )
            self.current_experiment_dir.mkdir(parents=True)

            self.current_experiment_log = CSVLog(
                self.current_experiment_dir / "metrics.csv", CURRENT_EXPERIMENT_COLUMNS
            )

            self.prompt_path = self.current_experiment_dir / "prompt"
            self.image_path = self.current_experiment_dir / "images"
            self.prompt_path.mkdir()
            self.image_path.mkdir()

            self.message_path = self.current_experiment_dir / "messages.txt"

    def get_prompt(self) -> dict[str, list[tuple[str, MessageType]]]:
        return self.benchmark.observe()

    def execute_action(self, response: list[ActionOutput]) -> bool:
        for action in response:
            benchmark_result = self.benchmark.step(
                action=action.name,
                parameters=action.arguments,
                env_name=action.env,
            )
            self.metrics = benchmark_result.evaluation_results
            if benchmark_result.terminated:
                print("\033[92m" f"Task finished, result: {self.metrics}" "\033[0m")
                self.write_current_log_row(action)
                self.write_main_csv_row(benchmark_result.info["terminate_reason"])
                if "exception_detail" in benchmark_result.info:
                    self.write_exception_detail(
                        benchmark_result.info["exception_detail"]
                    )
                return True
            print(
                "\033[92m"
                f'Action "{action.name}" in env "{action.env}" success. '
                f"Current evaluation results: {self.metrics}\n"
                "\033[0m"
            )
            self.write_current_log_row(action)
            self.step_cnt += 1
        return False

    def log_prompt(self, prompt):
        for env in prompt:
            with open(self.prompt_path / f"{env}_prompt.md", "a") as prompt_file:
                prompt_file.write(f"### Step {self.step_cnt}\n\n")
                for message, message_type in prompt[env]:
                    if message_type == MessageType.IMAGE_JPG_BASE64:
                        file_name = f"{env}_{self.step_cnt}.png"
                        base64_to_image(message).save(self.image_path / file_name)
                        prompt_file.write(f"![](../images/{file_name})\n\n")
                    else:
                        prompt_file.write(message + "\n\n")

    def step(self, it) -> bool:
        print("=" * 40)
        print(f"Start agent step {self.step_cnt}:")
        prompt = self.get_prompt()
        self.log_prompt(prompt)
        try:
            response = self.agent_policy.chat(prompt)
        except Exception:
            print(traceback.format_exc())
            self.write_main_csv_row("agent_exception")
            self.write_exception_detail(traceback.format_exc())
            return True
        # content = response["content"]
        # self.write_message(str(content), it)
        # print("\033[94m" f"Agent Reponse: {content}" "\033[0m")
        print(f"So agent take action: {response}")
        return self.execute_action(response)

    def start_benchmark(self):
        if self.agent_policy == "human":
            self.benchmark.human_evaluation(self.task_id)
            return

        env_description = {}
        for env in self.benchmark.environment_map:
            env_description[env] = self.benchmark.environment_map[env].description

        self.task, action_space = self.benchmark.start_task(self.task_id)
        self.agent_policy.reset(
            task_description=self.task.description,
            action_spaces=action_space,
            env_descriptions=env_description,
        )
        print(
            f'Start benchmark "{self.benchmark.name}", task id "{self.task.id}": '
            f'"{self.task.description}"'
        )
        self.init_log_dir()
        self.step_cnt = 0
        self.metrics = self.benchmark.evaluate()
        if self.metrics["complete_nodes"] != 0:
            print("Graph Evaluator start with non-zero value. Check environment setup.")
            return
        for it in range(50):
            try:
                terminated = self.step(it)
            except KeyboardInterrupt:
                self.write_main_csv_row("keyboard_interrupt")
                return
            if terminated:
                return
            sleep(2)
            # input("Press enter to do next step:")

    def write_exception_detail(self, exception_info: str):
        if self.log_dir is None:
            return
        with open(self.current_experiment_dir / "exception_detail.txt", "w") as file:
            file.write(exception_info)

    def write_current_log_row(self, action):
        if self.log_dir is None:
            return
        self.current_experiment_log.write_row(
            [
                self.step_cnt,
                str(action),
                self.metrics["total_nodes"],
                self.metrics["complete_nodes"],
                self.metrics["completeness"],
                self.metrics["completeness_per_action"],
                self.metrics["step_to_complete"],
                self.metrics["longest_unfinished_path_length"],
                self.agent_policy.get_token_usage(),
            ]
        )

    def write_main_csv_row(self, terminate_reason):
        if self.log_dir is None:
            return
        self.main_log.write_row(
            [
                self.time_now,
                self.agent_policy.__class__.__name__,
                self.agent_policy.get_backend_model_name(),
                self.task_id,
                self.step_cnt,
                terminate_reason,
                self.metrics["total_nodes"],
                self.metrics["complete_nodes"],
                self.metrics["completeness"],
                self.metrics["completeness_per_action"],
                self.metrics["step_to_complete"],
                self.metrics["longest_unfinished_path_length"],
                self.agent_policy.get_token_usage(),
            ]
        )


================================================
FILE: crab/core/graph_evaluator.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from collections import deque
from typing import Any

import networkx as nx

from .environment import Environment
from .models import Evaluator


class GraphEvaluator:
    def __init__(
        self,
        incoming_graph_data,
        enable_shortcut: bool = False,
    ) -> None:
        self.G = nx.DiGraph(incoming_graph_data)
        assert nx.is_directed_acyclic_graph(self.G)
        self.count: int = 0
        self.total_nodes: int = self.G.number_of_nodes()
        assert self.total_nodes != 0
        self.complete_nodes: int = 0
        self.completeness: float = 0.0
        self.completeness_per_action: float = 0.0
        self.step_to_complete: int = self.G.number_of_edges()
        self.longest_unfinished_path_length: int = nx.dag_longest_path_length(self.G)
        self.enable_shortcut: bool = enable_shortcut

        # Set the sink node for the DAG:
        sink_nodes: list[Evaluator] = [
            node for node, out_degree in self.G.out_degree() if out_degree == 0
        ]
        if len(sink_nodes) != 1:
            raise ValueError("Graph should have exactly one sink node.")
        self.sink_node: Evaluator = sink_nodes[0]

        self.human_mode = False

        self.reset()

    def reset(self):
        self.count = 0
        for node in self.G.nodes():
            self.G.nodes[node]["remaining_predecessors"] = self.G.in_degree(node)
            self.G.nodes[node]["passing_count"] = None

    def step(
        self,
        envs: dict[str, Environment],
        default_env: str = "root",
    ):
        if self.is_complete():
            raise ValueError(
                "GraphEvaluator has already completed and "
                "cannot perform another step."
            )
        run_evaluators = set()
        evaluators = self.get_next_source_nodes()
        while evaluators:
            for evaluator in evaluators:
                if evaluator.local and self.human_mode:
                    result = True
                else:
                    environment = envs[evaluator.env_name or default_env]
                    result = environment.take_action(evaluator)
                if result:
                    self.G.nodes[evaluator]["passing_count"] = self.count
                    self.complete_nodes += 1
                    for _, out_node in self.G.out_edges(evaluator):
                        self.G.nodes[out_node]["remaining_predecessors"] -= 1
            if self.is_complete():
                self.complete_nodes = self.total_nodes
                break
            run_evaluators.update(evaluators)
            evaluators = self.get_next_source_nodes() - run_evaluators

        self.update()

    def get_next_source_nodes(self) -> set[Evaluator]:
        r"""Get next source nodes to evaluate."""
        if not self.enable_shortcut:
            source_nodes: list[Evaluator] = []
            for node in self.G.nodes(data=True):
                if (
                    node[1]["passing_count"] is None
                    and node[1]["remaining_predecessors"] == 0
                ):
                    source_nodes.append(node[0])
        else:
            source_nodes = list(self.G.nodes())

        return set(source_nodes)

    def entry(self) -> bool:
        return all(count is not None for _, count in self.G.nodes(data="passing_count"))

    def update(self):
        self.count += 1
        self.completeness = float(self.complete_nodes / self.total_nodes)
        self.completeness_per_action = self.completeness / self.count
        self.step_to_complete = self.calculate_step_to_complete()
        self.longest_unfinished_path_length = (
            self.calculate_longest_unfinished_path_length()
        )

    def calculate_longest_unfinished_path_length(self) -> int:
        longest_path_length: int = 0
        if self.G.nodes[self.sink_node]["passing_count"] is not None:
            return longest_path_length

        # Initialize set to keep track of visited nodes
        visited = set()
        # Initialize queue for BFS
        queue = deque([[self.sink_node]])
        # BFS traversal with path
        while queue:
            path = queue.popleft()
            node = path[0]
            # Mark the node as visited
            visited.add(node)
            longest_path_length = max(len(path), longest_path_length) - 1
            # Explore predecessor of the current node
            for predecessor in self.G.predecessors(node):
                # If predecessor is complete, skip it
                if self.G.nodes[predecessor]["passing_count"] is not None:
                    continue
                elif predecessor not in visited:
                    # Add path with predecessor to queue
                    queue.append([predecessor] + path)
        return longest_path_length

    def calculate_step_to_complete(self) -> int:
        # Initialize count for incomplete edges
        incomplete_edges: int = 0
        if self.G.nodes[self.sink_node]["passing_count"] is not None:
            return incomplete_edges

        # Initialize set to keep track of visited nodes
        visited = set()
        # Initialize queue for BFS
        queue = deque([self.sink_node])
        # BFS traversal
        while queue:
            # Pop node from queue
            node = queue.popleft()
            # Mark the node as visited
            visited.add(node)

            incomplete_edges += len(list(self.G.predecessors(node)))
            # Explore predecessor of the current node
            for predecessor in self.G.predecessors(node):
                # If predecessor is complete, skip it
                if self.G.nodes[predecessor]["passing_count"] is not None:
                    continue
                elif predecessor not in visited:
                    # Add predecessor to queue
                    queue.append(predecessor)

        return incomplete_edges

    def is_complete(self) -> bool:
        return self.G.nodes[self.sink_node]["passing_count"] is not None

    def get_completeness(self) -> float:
        return self.completeness

    def get_completeness_per_action(self) -> float:
        return self.completeness_per_action

    def get_step_to_complete(self) -> int:
        return self.step_to_complete

    def get_longest_unfinished_path_length(self) -> int:
        return self.longest_unfinished_path_length

    def stat(self) -> dict[str, Any]:
        return {
            "total_nodes": self.total_nodes,
            "complete_nodes": self.complete_nodes,
            "completeness": self.completeness,
            "completeness_per_action": self.completeness_per_action,
            "step_to_complete": self.step_to_complete,
            "longest_unfinished_path_length": self.longest_unfinished_path_length,
        }

    def _check_submit(self, environment: Environment) -> bool:
        """
        Check if the last action is _submit. If yes, return its result, either return
        False.
        """
        if not environment.trajectory:
            return False
        last_action = environment.trajectory[-1]
        if last_action[0] != "_submit":
            return False

        return last_action[2]

    def compute_radar_stats(self) -> dict[str, float]:
        longest_path_length = nx.dag_longest_path_length(self.G)
        return {
            "Completeness": float(self.completeness),
            "Efficiency": float(self.completeness_per_action),
            "Path Completeness Ratio": (
                longest_path_length - self.longest_unfinished_path_length
            )
            / longest_path_length,
        }

    @staticmethod
    def visualize(evaluators: list["GraphEvaluator"], path: str):
        import plotly.graph_objects as go

        fig = go.Figure()
        for i, evaluator in enumerate(evaluators):
            radar_stats = evaluator.compute_radar_stats()
            fig.add_trace(
                go.Scatterpolar(
                    r=list(radar_stats.values()),
                    theta=list(radar_stats.keys()),
                    fill="toself",
                    name=f"Graph Evaluator {i}",
                )
            )

        fig.update_layout(
            polar=dict(radialaxis=dict(visible=True, range=[0, 1])),
            showlegend=True,
        )
        fig.update_layout(
            margin=dict(l=150, r=150, t=150, b=150),
        )
        fig.write_image(path, scale=12, width=600, height=600)


================================================
FILE: crab/core/models/__init__.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# ruff: noqa: F401
from .action import Action, ClosedAction
from .agent_interface import ActionOutput, BackendOutput, Message, MessageType
from .benchmark_interface import StepResult
from .config import BenchmarkConfig, EnvironmentConfig, VMEnvironmentConfig
from .evaluator import Evaluator
from .task import GeneratedTask, SubTask, SubTaskInstance, Task

__all__ = [
    "Action",
    "ClosedAction",
    "MessageType",
    "Message",
    "ActionOutput",
    "BackendOutput",
    "StepResult",
    "BenchmarkConfig",
    "Task",
    "SubTask",
    "SubTaskInstance",
    "GeneratedTask",
    "Evaluator",
    "EnvironmentConfig",
    "VMEnvironmentConfig",
]


================================================
FILE: crab/core/models/action.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from functools import partial
from inspect import Parameter, Signature, signature
from types import NoneType
from typing import Annotated, Any, Callable, TypeAlias

from docstring_parser import parse
from pydantic import (
    AfterValidator,
    BaseModel,
    ValidationError,
    create_model,
    model_serializer,
)
from pydantic.fields import FieldInfo

from crab.utils.common import callable_to_base64

try:
    from typing import Self
except ImportError:
    from typing_extensions import Self


KEPT_PARAMS = ["env"]
EMPTY_MODEL = create_model("Empty")


class Action(BaseModel):
    """
    The core operational unit within the Crab system.

    This class stores parameters and return type definitions and can be easily converted
    into a JSON schema. It supports argument verification and includes a feature for
    retaining specific parameters.

    Attributes:
        name (str): The name of the action.
        entry (Callable): The actual entry function of the action.
        parameters (type[BaseModel]): Definition of input parameters.
        returns (type[BaseModel]): Definition of the return type. Note: The actual
            return type is specified by the `returns` attribute in this model.
        description (str | None): A clear and concise description of the function's
            purpose and behavior. Defaults to None.
        kept_params (dict[str, Any]): Parameters retained for internal use by the Crab
            system, such as 'env' for storing the current environment. These parameters
            do not appear in the `parameters` field and are automatically injected at
            runtime. Defaults to an empty dictionary.
        env_name (Optinal[str]): Specify the environment the action is associated with.
            Defualts to None.
    """

    name: str
    entry: Callable
    parameters: type[BaseModel]
    returns: type[BaseModel]
    description: str | None = None
    kept_params: list[str] = []
    env_name: str | None = None
    local: bool = False

    def __eq__(self, other):
        return super().__eq__(other)

    def __hash__(self):
        return hash(self.entry)

    def __call__(self, *args: Any, **kwargs: Any) -> Self:
        """Sets default values for the action.

        Direct calling of the action will not actully call the function, yet set
        defaults values for the action, so the agent don't need to or only need to
        provide part of the parameters.

        This method has two mode, full setting and partial setting. Full setting mode is
        applied when the user provides positional arguments, where all the required
        parameters must be provide and the action parameters will be empty. While if
        only keyword arguments are provided, partial setting mode is applied, where the
        parameter model will not be changed but only change the default value of the
        parameters.

        Note:
            Full setting mode is not stable.
        """
        if args:
            # this is closed function
            result = self.model_copy(
                update={
                    "entry": partial(self.entry, *args, **kwargs),
                    "parameters": EMPTY_MODEL,
                }
            )
            if self.description is not None:
                result.description = self.description + f" Input: {args} {kwargs}"
            return result
        else:
            # or it should only contain kwargs
            for key in kwargs:
                # verify the kwargs exist
                if key not in self.parameters.model_fields:
                    raise ValueError(
                        f'"{key}" is not a parameter of action "{self.name}"'
                    )

            result = self.model_copy(
                update={
                    "entry": partial(self.entry, **kwargs),
                }
            )
            if self.description is not None:
                result.description = self.description + f" Input: {args} {kwargs}"
            return result

    @staticmethod
    def _check_combinable(a: "Action", b: "Action") -> None:
        if set(a.kept_params) != set(b.kept_params):
            raise ValueError("Piped actions should have same kept parameters.")
        if a.env_name != b.env_name:
            raise ValueError("Piped actions should have same env_name.")
        if a.local != b.local:
            raise ValueError("Piped actions should have same `local` value.")

    def __rshift__(self, other_action: "Action") -> "Action":
        """Uses :obj:`>>` to pipe two actions together to form a new action.

        The returned action executes the actions from left to right. The output of the
        left action becomes the input to the right action, provided their parameters and
        return types are compatible.
        """
        required = other_action.get_required_params()
        if len(required) != 1:
            raise ValueError(
                "Return type of the former action must mathces the parameter type "
                "of the later action."
            )
        Action._check_combinable(self, other_action)

        a_entry = self.entry
        b_entry = other_action.entry
        kept_params = self.kept_params.copy()
        entry = lambda *args, **kwargs: b_entry(
            a_entry(*args, **kwargs),
            **{key: kwargs[key] for key in kwargs if key in kept_params},
        )
        return Action(
            name=f"{self.name}_pipe_{other_action.name}",
            description=f"First {self.description}. Then use the result of the "
            f"former as input, {other_action.description}",
            parameters=self.parameters,
            returns=other_action.returns,
            entry=entry,
            kept_params=self.kept_params,
            env_name=self.env_name,
            local=self.local,
        )

    def __add__(self, other_action: "Action") -> "Action":
        """Uses :obj:`+` to combine two actions sequetially to form a new action.

        The returned action executes the actions from left to right. Its return value
        will be the return value of the right action.

        Note:
            "+" operator only support two action with no required parameters.
        """
        self_required = self.get_required_params()
        other_required = other_action.get_required_params()
        if len(other_required) > 1 or len(self_required) > 1:
            raise ValueError(
                '"+" operator only support two action with no required parameters.'
            )
        Action._check_combinable(self, other_action)

        a_entry = self.entry
        b_entry = other_action.entry
        entry = lambda **kwargs: (a_entry(**kwargs), b_entry(**kwargs))[1]
        return Action(
            name=f"{self.name}_then_{other_action.name}",
            description=f"{self.description} Then, {other_action.description}",
            parameters=EMPTY_MODEL,
            returns=other_action.returns,
            entry=entry,
            kept_params=self.kept_params,
            env_name=self.env_name,
            local=self.local,
        )

    def run(self, **kwargs) -> Any:
        """Varifies the action parameters then runes the action."""
        if self.kept_params:
            raise RuntimeError("There are unassigned kept parameters.")
        try:
            kwargs = self.parameters(**kwargs).model_dump()
        except ValidationError:
            pass  # TODO: Exeception handle
        return self.entry(**kwargs)

    def set_kept_param(self, **params) -> Self:
        kept_params = {key: params[key] for key in params if key in self.kept_params}
        result = self.model_copy()
        result.kept_params = []
        result.entry = partial(self.entry, **kept_params)
        return result

    def get_required_params(self) -> dict[str, FieldInfo]:
        return {
            name: info
            for name, info in self.parameters.model_fields.items()
            if info.is_required()
        }

    @model_serializer
    def to_openai_json_schema(self) -> dict:
        """Gets openai json schema from an action"""

        return {
            "name": self.name,
            "description": self.description,
            "parameters": self.parameters.model_json_schema(),
            # "returns": self.returns.model_json_schema()["properties"]["returns"],
        }

    def to_raw_action(self) -> dict[str, Any]:
        """Gets serialized action for remote execution"""
        return {
            "name": self.name,
            "dumped_entry": callable_to_base64(self.entry),
            "kept_params": list(self.kept_params),
        }

    @classmethod
    def from_function(cls, func: Callable) -> Self:
        """Generates an action from functions annotated by @action."""
        if func.__doc__ is None:
            # raise RuntimeError("The action must have a Google-style docstring.")
            parameters_descriptions = None
            func_description = None
            return_description = None
        else:
            docstring = parse(func.__doc__)
            parameters_descriptions = {
                param.arg_name: param.description for param in docstring.params
            }
            func_description = docstring.short_description or ""
            if docstring.long_description:
                func_description += "\n" + docstring.long_description
            if docstring.returns:
                return_description = docstring.returns.description
            else:
                return_description = None

        sign = signature(func)
        params = sign.parameters
        fields = {}
        kept_params = []
        for param_name, p in params.items():
            # Don't add kept parameters in prameters' model
            if param_name in KEPT_PARAMS:
                kept_params.append(param_name)
                continue
            # Variable parameters are not supported
            if p.kind in [Parameter.VAR_POSITIONAL, Parameter.VAR_KEYWORD]:
                continue
            # If the parameter type is not specified, it defaults to typing.Any
            annotation = Any if p.annotation is Parameter.empty else p.annotation
            # Check if the parameter has a description
            param_description = None
            if parameters_descriptions is not None:
                param_description = parameters_descriptions.get(param_name, None)
            # Check if the parameter has a default value
            if p.default is Parameter.empty:
                fields[param_name] = (
                    annotation,
                    FieldInfo(description=param_description),
                )
            else:
                fields[param_name] = (annotation, FieldInfo(default=p.default))
        model: type[BaseModel] = create_model(func.__name__, **fields)  # type: ignore

        # insert return to parameters
        return_annotation = (
            Any if sign.return_annotation == Signature.empty else sign.return_annotation
        )
        return_model: type[BaseModel] = create_model(
            func.__name__ + "_return",
            returns=(
                return_annotation or NoneType,
                FieldInfo(description=return_description, init=False),  # type: ignore
            ),
        )

        action = cls(
            name=func.__name__,
            entry=func,
            parameters=model,
            returns=return_model,
            description=func_description,
            kept_params=kept_params,
        )
        return action


def _check_no_param(action: Action) -> Action:
    if len(action.get_required_params()) != 0:
        raise ValueError("ClosedAction should not accept any parameter.")
    return action


ClosedAction: TypeAlias = Annotated[Action, AfterValidator(_check_no_param)]
"""The action type alias with no reuqired parameters"""


================================================
FILE: crab/core/models/agent_interface.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from enum import IntEnum
from typing import Any

from pydantic import BaseModel

from .action import Action


class MessageType(IntEnum):
    TEXT = 0
    IMAGE_JPG_BASE64 = 1


Message = tuple[str, MessageType]


class ActionOutput(BaseModel):
    name: str
    arguments: dict[str, Any]
    env: str | None = None


class BackendOutput(BaseModel):
    message: str | None
    action_list: list[ActionOutput] | None


class EnvironmentInfo(BaseModel):
    description: str
    action_space: list[Action]


================================================
FILE: crab/core/models/benchmark_interface.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from typing import Any

from pydantic import BaseModel


class StepResult(BaseModel):
    truncated: bool
    terminated: bool
    action_returns: Any
    evaluation_results: dict[str, Any]
    info: dict[str, Any]


================================================
FILE: crab/core/models/config.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from typing import Any

from pydantic import BaseModel

from .action import Action, ClosedAction
from .task import Task


class EnvironmentConfig(BaseModel):
    name: str
    action_space: list[Action]
    observation_space: list[ClosedAction]
    description: str = ""
    reset: Action | None = None
    remote_url: str | None = None
    extra_attributes: dict[str, Any] = {}


class VMEnvironmentConfig(BaseModel):
    inside_environment: EnvironmentConfig
    remote_url: str = "http://192.168.0.0:8000"


class BenchmarkConfig(BaseModel):
    name: str
    tasks: list[Task]
    environments: list[EnvironmentConfig]
    default_env: str | None = None
    multienv: bool = False
    prompting_tools: dict[str, dict[str, Action]] = {}
    root_action_space: list[Action] = []
    step_limit: int = 30
    common_setup: list[ClosedAction] = []


================================================
FILE: crab/core/models/evaluator.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from pydantic import BaseModel, field_validator

from .action import Action


class Evaluator(Action):
    require_submit: bool = False

    @field_validator("returns", mode="after")
    @classmethod
    def must_return_bool(cls, v: type[BaseModel]) -> type[BaseModel]:
        if v.model_fields["returns"].annotation is not bool:
            raise ValueError("Evaluator must return bool.")
        return v

    def __and__(self, other: "Evaluator") -> "Evaluator":
        Action._check_combinable(self, other)
        result = self.model_copy()
        result.name = (f"{self.name}_and_{other.name}",)
        result.description = f"{self.description} In the same time, {other.description}"
        self_entry = self.entry
        other_entry = other.entry
        result.entry = lambda: self_entry() and other_entry()
        return result

    def __or__(self, other: "Evaluator") -> "Evaluator":
        Action._check_combinable(self, other)
        result = self.model_copy()
        result.name = (f"{self.name}_or_{other.name}",)
        result.description = (
            f"{self.description} If the previous one fails {other.description}"
        )
        self_entry = self.entry
        other_entry = other.entry
        result.entry = lambda: self_entry() or other_entry()
        return result

    def __invert__(self) -> "Evaluator":
        result = self.model_copy()
        result.name = f"not_{self.name}"
        result.description = (
            f"Check if the following description is False. {self.description}"
        )
        self_entry = self.entry
        result.entry = lambda: not self_entry()
        return result


================================================
FILE: crab/core/models/task.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from typing import Any, Callable, Literal
from uuid import uuid4

import networkx as nx
from pydantic import (
    BaseModel,
    ConfigDict,
    Field,
    field_validator,
    model_serializer,
)

from .action import Action, ClosedAction
from .evaluator import Evaluator


class Task(BaseModel):
    model_config = ConfigDict(arbitrary_types_allowed=True)
    id: str
    description: str
    evaluator: nx.DiGraph | Evaluator
    setup: list[ClosedAction] | ClosedAction = []
    teardown: list[ClosedAction] | ClosedAction = []
    extra_action: list[Action] = []

    @field_validator("evaluator")
    @classmethod
    def change_evaluator_to_graph(cls, evaluator: nx.DiGraph | Evaluator) -> str:
        if isinstance(evaluator, Evaluator):
            graph = nx.DiGraph()
            graph.add_node(evaluator)
            return graph
        return evaluator

    @field_validator("setup", "teardown")
    @classmethod
    def to_list(cls, action: Action | list[Action]) -> list[Action]:
        if isinstance(action, Action):
            return [action]
        return action


class SubTask(BaseModel):
    id: str
    description: str
    attribute_dict: dict[str, list[str] | str]
    output_type: str
    output_generator: Callable[[Any], str] | Literal["manual"] | None = None
    evaluator_generator: Callable[[Any], nx.DiGraph] | None = None
    setup: list[ClosedAction] | ClosedAction = []
    teardown: list[ClosedAction] | ClosedAction = []
    extra_action: list[Action] = []

    def __hash__(self) -> int:
        return hash(self.id)

    @field_validator("attribute_dict")
    @classmethod
    def expand_attribute_type(
        cls,
        attribute_dict: dict[str, list[str] | str],
    ) -> dict[str, list[str]]:
        attribute_dict = attribute_dict.copy()
        for key in attribute_dict:
            if isinstance(attribute_dict[key], str):
                attribute_dict[key] = [attribute_dict[key]]
        return attribute_dict


class SubTaskInstance(BaseModel):
    task: SubTask
    attribute: dict[str, Any]
    output: str | None = None
    id: str = Field(default_factory=uuid4)

    def __hash__(self) -> int:
        return hash(self.id)

    @model_serializer
    def dump_model(self) -> dict[str, Any]:
        return {
            "task": self.task.id,
            "attribute": self.attribute,
            "output": self.output,
        }


class GeneratedTask(BaseModel):
    description: str
    tasks: list[SubTaskInstance]
    adjlist: str
    id: str = Field(default_factory=uuid4)


================================================
FILE: crab/core/task_generator.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# ruff: noqa: E501
import argparse
import importlib
import itertools
import json
import os
import random
from pathlib import Path

import networkx as nx
import yaml
from openai import OpenAI
from termcolor import colored

from .models import GeneratedTask, SubTask, SubTaskInstance, Task

SYSTEM_PROMPT_SINGLE = """
You are a wise operator who is familiar with both the Ubuntu and Android operating
systems. Our goal is to use the output of the source task as the input for the target
task. You should describe of the task they combined together using several imperative
sentences. You cannot provide any extra information such as detailed operation method,
yet only combined the taks description together in a reasonable way. You shouldn't fill
in the input attribute wrapped by curly brackets.

Source task:
Find out the city located at coordinate (8.65759263086632, 7.520403498426244) via Google Maps.

Target task:
Set the screen background as the first figure of {city_name} in Google.

Answer:
Using Google Maps, find the city located at coordinates (8.65759263086632,7.520403498426244), search Google for the first image of that city, and set this image as the desktop background on an Ubuntu system.
"""
USER_PROMPT_SINGLE = """
Source task:
{task1}

Target task:
{task2}

Answer:
"""

SELECT_USER_START = """
Source attribute:
{source_task}
Target tasks:
{target_tasks}
Select a task from target tasks
Answer:
"""

SELECT_SYSTEM_PROMPT = """
You are a wise operator who is familiar with both the Ubuntu and Android operating
systems. Our goal is to use the output of the source task as the input for the target
task. You should identify the most reasonable target task from the list, explain why you
choose it, and output the description of the task they combined together using several
imperative sentences. It is crucial to establish a connection between the source and
target tasks and select the best one as the output. Remember, you must select at least
one with the crucial output format. You must include the provided value and every
details in each task. You must use "======" to seperate each part (selected task number,
combined task description, and explanation) Here is an example:

Source task:
Find out the city located at coordinate (8.65759263086632, 7.520403498426244) via Google Maps.

Target tasks:
Task 0: Set the screen background as the first figure of {input attribute} in Google.
Task 1: Close the progress of {input attribute} app via task manager.
Task 2: Download {input attribute} from the app store.
Task 3: Create a PowerPoint with one page containing Mount Alps.jpg and named as {input attribute 2}.
Task 4: Send message {input attribute 1} to +81 09074540472.

Answer:
0
======
Using Google Maps, find the city located at coordinates (8.65759263086632,7.520403498426244), search Google for the first image of that city, and set this image as the desktop background on an Ubuntu system.
======
This task is the most relevant and directly utilizes the output of the source task.
Finding the city provides us with a specific location which can easily lead to a visual
representation. Searching for an image of the city to set as a background is a practical
application that visually celebrates the discovery of the city's identity.
"""

SELECT_USER_PROMPT = """
Source task:
{source_task}
Target tasks:
{target_tasks}

Answer:
"""


class TaskGenerator:
    """Class to generate tasks based on a directed graph of subtasks."""

    def __init__(
        self, attribute_pool: dict[str, list] = {}, subtasks: list[SubTask] = []
    ):
        """
        Initializes the TaskGenerator object.

        Parameters:
            attribute_pool (dict): A dictionary mapping attribute types to lists of possible values.
            subtasks (list): A list of SubTask objects to be included in the task generation graph.
        """
        self.G = nx.DiGraph()
        self.attribute_pool = attribute_pool
        self.graph_generation(subtasks)
        self.task_mapping = {task.id: task for task in subtasks}
        if not os.getenv("OPENAI_API_KEY"):
            os.environ["OPENAI_API_KEY"] = "EMPTY"
        self.client = OpenAI()

    @classmethod
    def from_config(cls, config_path: str) -> "TaskGenerator":
        """
        Class method to create a TaskGenerator instance from a configuration file.

        Parameters:
            config_path (str): Path to the YAML configuration file.

        Returns:
            TaskGenerator: An instance of TaskGenerator.
        """
        with open(config_path, "r") as f:
            data = yaml.safe_load(f)
        subtask_data = data["subtask"]
        attribute_pool = data["attribute_pool"]
        subtask_list = [
            SubTask(
                id=subtask["id"],
                description=subtask["description"],
                attribute_dict={
                    key: subtask["attribute_dict"][key].split("/")
                    for key in subtask["attribute_dict"]
                },
                output_type=subtask["output_type"],
            )
            for subtask in subtask_data
        ]
        return cls(attribute_pool, subtask_list)

    def graph_generation(self, subtask_list: list[SubTask]) -> None:
        """Generates a directed graph from a list of subtasks based on output and input types."""
        self.G.add_nodes_from(subtask_list)
        for input_node in self.G.nodes:
            for output_node in self.G.nodes:
                for name, type_list in output_node.attribute_dict.items():
                    for type in type_list:
                        if type == input_node.output_type:
                            self.G.add_edge(
                                input_node, output_node, attribute_name=name
                            )

    def combine(self, current_description: str, target_description: str) -> str:
        """
        Combines two task descriptions into a single task description using GPT model.

        Parameters:
            current_description (str): The current task description.
            target_description (str): The target task description to combine.

        Returns:
            str: The combined task description.
        """
        user_content = USER_PROMPT_SINGLE.format(
            task1=current_description, task2=target_description
        )
        response = self.client.chat.completions.create(
            messages=[
                {"role": "system", "content": SYSTEM_PROMPT_SINGLE},
                {"role": "user", "content": user_content},
            ],
            model="gpt-4-turbo-preview",
        )
        return response.choices[0].message.content

    def gpt_choice(
        self,
        current_description: str,
        outgoing_edges: list[tuple[SubTask, SubTask, str]],
    ) -> tuple[SubTask, dict[str, str], str, str]:
        """
        Determines the best task choice from a list of possible target tasks using GPT model.

        Parameters:
            current_description (str): Description of the current task.
            outgoing_edges (list): List of possible outgoing edges representing target tasks.

        Returns:
            tuple: A tuple containing the chosen SubTask, attributes, new description, and combined description.
        """
        target_neighbours = ""
        selected_attributes = []
        new_descriptions = []
        for idx, edge in enumerate(outgoing_edges):
            _, node, attribute_name = edge
            attributes = self._fill_task_attributes(node, attribute_name)
            selected_attributes.append(attributes)
            kwargs = attributes.copy()
            kwargs[attribute_name] = "{" + attribute_name + "}"
            new_description = node.description.format(**kwargs)
            new_descriptions.append(new_description)
            target_neighbours += "Task {0}: {1}\n".format(idx, new_description)
        user_content = SELECT_USER_PROMPT.format(
            source_task=current_description,
            target_tasks=target_neighbours,
        )
        response = self.client.chat.completions.create(
            messages=[
                {"role": "system", "content": SELECT_SYSTEM_PROMPT},
                {"role": "user", "content": user_content},
            ],
            model="gpt-4-turbo-preview",
        )
        response_message = response.choices[0].message
        answers = response_message.content.split("======")
        index = int(answers[0].strip())
        combined_description = answers[1].strip()
        return (
            outgoing_edges[index][1],
            selected_attributes[index],
            new_descriptions[index],
            combined_description,
        )

    def random_walk(
        self, current_description: str, start_node: SubTask, random_number: int
    ) -> tuple[SubTask, dict[str, str]] | None:
        """
        Performs a random walk from the starting node to generate a task sequence.

        Parameters:
            current_description (str): The current task description.
            start_node (SubTask): The starting subtask node.
            random_number (int): Maximum number of edges to consider.

        Returns:
            tuple | None: A tuple containing the next SubTask, attributes if a next step is available, otherwise None.
        """
        out_edges = list(self.G.out_edges(start_node, data="attribute_name"))
        if len(out_edges) == 0:
            print(colored("\n*** No neighbour points, generation stopped ***\n", "red"))
            return None
        if start_node.output_type == "None":
            print(colored("\n*** Output None, generation will stop ***\n", "red"))
            return None

        if random_number <= len(out_edges):
            select_edge_list = random.sample(out_edges, random_number)
        else:
            select_edge_list = out_edges
        return self.gpt_choice(current_description, select_edge_list)

    def _fill_task_attributes(self, task: SubTask, kept_attribute: str):
        """
        Fills the task attributes by randomly selecting values from the attribute pool, except the kept attribute.

        Parameters:
            task (SubTask): The task whose attributes need to be filled.
            kept_attribute (str): The attribute to exclude from filling.

        Returns:
            dict: A dictionary of filled attributes.
        """
        attribute_types = task.attribute_dict.copy()
        attribute_types.pop(kept_attribute)
        return self._select_random_attributes(attribute_types)

    def _select_random_attributes(self, type_dict: dict[str, str]) -> dict[str, str]:
        """
        Randomly selects attributes for a task from the attribute pool based on the type dictionary.

        Parameters:
            type_dict (dict): A dictionary of attribute types to attribute names.

        Returns:
            dict: A dictionary of selected attributes.
        """
        result = {}
        for attr_name, attr_type_list in type_dict.items():
            pool = []
            for attr_type in attr_type_list:
                if attr_type not in self.attribute_pool:
                    raise ValueError(f"{attr_type} not in attribute pool.")
                pool.extend(self.attribute_pool[attr_type])
            result[attr_name] = random.choice(pool)
        return result

    @staticmethod
    def generate_single_node_task(subtask: SubTask):
        """
        Generates a single node task based on a SubTask instance.

        Parameters:
            subtask (SubTask): The subtask to generate a task for.

        Returns:
            tuple: A tuple containing the task description and a directed graph of the task.
        """
        print(colored(f"Generating task: {subtask.description}\n", "green"))
        attributes = {}
        for name, type_name in subtask.attribute_dict.items():
            value = input(
                colored(f'Input attribute "{name}" ({type_name}): ', "yellow")
            )
            attributes[name] = value
        description = subtask.description.format(**attributes)
        result_graph = nx.DiGraph()
        result_graph.add_node(SubTaskInstance(task=subtask, attribute=attributes))
        return description, result_graph

    def combine_subtask_list(self, subtask_list: list[SubTask]):
        """
        Combines a list of subtasks into a single task sequence.

        Parameters:
            subtask_list (list): A list of SubTask instances to combine.

        Returns:
            tuple: A tuple containing the final task description and a directed graph of the task sequence.
        """
        start_node = subtask_list[0]
        attributes = self._select_random_attributes(start_node.attribute_dict)
        result_graph = nx.DiGraph()
        output = input(
            colored(
                f"What is the output of {start_node.description.format(**attributes)}: ",
                "yellow",
            )
        )
        last_node = SubTaskInstance(
            task=start_node, attribute=attributes, output=output or None
        )
        result_graph.add_node(last_node)
        current_description = start_node.description.format(**attributes)
        for task in subtask_list[1:]:
            current_description = self.combine(current_description, task.description)
            key = next(iter(task.attribute_dict.keys()))
            attributes = {key: output}
            output = input(
                colored(
                    f"What is the output of {task.description.format(**attributes)}: ",
                    "yellow",
                )
            )
            current_node = SubTaskInstance(
                task=task, attribute=attributes, output=output or None
            )
            result_graph.add_edge(last_node, current_node)
            last_node = current_node
        return current_description, result_graph

    def combine_two_subtasks(
        self, sub_task_id_1: int, sub_task_id_2: int
    ) -> tuple[str, nx.DiGraph]:
        """
        Combines two subtasks into a single task sequence based on user input.

        Parameters:
            sub_task_id_1 (int): ID of the first subtask.
            sub_task_id_2 (int): ID of the second subtask.

        Returns:
            tuple: A tuple containing the combined task description and a directed graph of the task sequence.
        """
        sub_task_1 = self.task_mapping[sub_task_id_1]
        sub_task_2 = self.task_mapping[sub_task_id_2]
        print(colored(f"\nTask 1: {sub_task_1.description}", "cyan"))
        print(colored(f"Task 2: {sub_task_2.description}\n", "cyan"))
        attributes_1 = {}
        for name, types in sub_task_1.attribute_dict.items():
            value = input(
                colored(
                    f'Input attribute "{name}" ({types}) for the first task: ', "yellow"
                )
            )
            attributes_1[name] = value
        description_1 = sub_task_1.description.format(**attributes_1)
        output_1 = input(
            colored(
                f'What is the output of {description_1} ("{sub_task_1.output_type}"): ',
                "yellow",
            )
        )

        print(
            colored(
                f"\nThe output type of the first subtask is '{sub_task_1.output_type}'.\n",
                "cyan",
            )
        )
        attributes_2 = {}
        for name, types in sub_task_2.attribute_dict.items():
            if (
                sub_task_1.output_type in types
                or input(
                    colored(
                        f"Can the output '{sub_task_1.output_type}' be used as the '{name}' ({types}) of the second task? (yes/no): ",
                        "yellow",
                    )
                )
                .strip()
                .lower()
                == "yes"
            ):
                attributes_2[name] = output_1
            else:
                value = input(
                    colored(
                        f'Input attribute "{name}" ({types}) for the second task: ',
                        "yellow",
                    )
                )
                attributes_2[name] = value

        description_2 = sub_task_2.description.format(**attributes_2)

        while True:
            combined_description = self.combine(description_1, description_2)
            print(
                colored(f"\n*** Combined Task: {combined_description} ***\n", "green")
            )
            if (
                input(
                    colored(
                        "Do you want to re-generate the combined task? (yes/no): ",
                        "yellow",
                    )
                )
                .strip()
                .lower()
                != "yes"
            ):
                break
        result_graph = nx.DiGraph()
        node1 = SubTaskInstance(
            task=sub_task_1, attribute=attributes_1, output=output_1
        )
        node2 = SubTaskInstance(task=sub_task_2, attribute=attributes_2)
        result_graph.add_node(node1)
        result_graph.add_node(node2)
        result_graph.add_edge(node1, node2)

        return combined_description, result_graph

    def task_generation(
        self,
        start_id: int | None = None,
        max_iter: int = 3,
        random_number: int = 5,
    ) -> tuple[str, list[SubTask]]:
        """
        Generates a sequence of tasks starting from a given subtask ID or randomly.

        Parameters:
            start_id (int | None): The ID of the starting subtask or None to choose randomly.
            max_iter (int): The maximum number of iterations to perform in the generation process.
            random_number (int): The maximum number of neighbors to consider for random walk.

        Returns:
            tuple: A tuple containing the final task description and a list of SubTask objects.
        """
        description = ""
        task_list = []

        if start_id is None:
            start_node: SubTask = random.choice(list(self.G.nodes))
        else:
            for node in self.G.nodes:
                if node.id == start_id:
                    start_node: SubTask = node
                    break
        attributes = self._select_random_attributes(start_node.attribute_dict)
        description = start_node.description.format(**attributes)
        task_list.append((start_node, attributes, description))

        current_node = start_node
        for _ in range(max_iter - 1):
            next_node = self.random_walk(
                current_description=description,
                start_node=current_node,
                random_number=random_number,
            )
            if next_node is None:
                break
            task_list.append(next_node)
            description = next_node[3]
            current_node = next_node[0]
        return description, task_list

    @staticmethod
    def generate_evaluator(
        subtasks_graph: nx.DiGraph,
    ):
        """
        Generates an evaluator graph from a directed graph of subtask instances.

        Parameters:
            subtasks_graph (nx.DiGraph): A directed graph of subtask instances.

        Returns:
            nx.DiGraph: A directed graph representing the combined evaluator.
        """
        evaluator_map = {}
        for node in subtasks_graph.nodes:
            evaluator_map[node.id] = node.task.evaluator_generator(**node.attribute)
        combined_evaluator_graph = nx.union_all(list(evaluator_map.values()))
        for from_node, to_node in subtasks_graph.edges:
            from_node_evaluator = evaluator_map[from_node.id]
            sink_nodes = [
                node
                for node, out_degree in from_node_evaluator.out_degree()
                if out_degree == 0
            ]
            to_node_evaluator = evaluator_map[to_node.id]
            start_nodes = [
                node
                for node, in_degree in to_node_evaluator.in_degree()
                if in_degree == 0
            ]
            combined_evaluator_graph.add_edges_from(
                itertools.product(sink_nodes, start_nodes)
            )
        return combined_evaluator_graph

    @staticmethod
    def dump_generated_task(
        description,
        task_instance_graph,
        dir_path=".",
    ):
        """
        Saves a generated task to a file.

        Parameters:
            description (str): The description of the generated task.
            task_instance_graph (nx.DiGraph): The directed graph of the task instance.
            dir_path (str): The directory path where the task file will be saved.
        """
        mapping = {node: idx for idx, node in enumerate(task_instance_graph.nodes)}
        id_graph = nx.relabel_nodes(task_instance_graph, mapping)

        generated_task = GeneratedTask(
            description=description,
            tasks=list(task_instance_graph.nodes),
            adjlist="\n".join(nx.generate_adjlist(id_graph)),
        )
        file_path = Path(dir_path) / f"{generated_task.id}.json"
        with open(file_path, "w") as f:
            f.write(generated_task.model_dump_json(indent=4))

        print(
            colored(
                "\n====================================================================\n",
                "magenta",
            )
        )
        print(colored(f"Task saved to: {file_path}", "magenta"))

    def get_task_from_file(self, file_name) -> Task:
        """
        Loads a task from a file.

        Parameters:
            file_name (str): The file name containing the task data.

        Returns:
            Task: An instance of Task loaded from the file.
        """
        with open(file_name, "r") as f:
            config = json.load(f)
        description = config["description"]
        graph_map = {}
        for idx, task_config in enumerate(config["tasks"]):
            graph_map[idx] = SubTaskInstance(
                task=self.task_mapping[task_config["task"]],
                attribute=task_config["attribute"],
                output=task_config["output"],
            )
        lines = config["adjlist"].split("\n")
        graph = nx.parse_adjlist(lines, nodetype=int)
        subtask_graph = nx.relabel_nodes(graph, graph_map)
        evaluator = self.generate_evaluator(subtask_graph)

        setup_set = set()
        teardown_set = set()
        extra_action_set = set()
        for node in subtask_graph.nodes:
            setup_set.update(node.task.setup)
            teardown_set.update(node.task.teardown)
            extra_action_set.update(node.task.extra_action)
        return Task(
            id=config["id"],
            description=description,
            evaluator=evaluator,
            setup=list(setup_set),
            teardown=list(teardown_set),
            extra_action=list(extra_action_set),
        )


def load_subtasks(version):
    """
    Loads subtasks from specified benchmark version modules.

    Parameters:
        version (str): The version of the benchmark to load subtasks from.

    Returns:
        tuple: A tuple containing two collections of subtasks.
    """
    a_subtasks_module = importlib.import_module(
        f"benchmarks.crab-benchmark-{version}.subtasks.a_subtasks"
    )
    u_subtasks_module = importlib.import_module(
        f"benchmarks.crab-benchmark-{version}.subtasks.u_subtasks"
    )
    return a_subtasks_module.collection, u_subtasks_module.collection


def generate_length1_all(
    generator: TaskGenerator, dir_path: str, subtask_collection: list
):
    """
    Generates tasks for all subtasks in a collection and saves them.

    Parameters:
        generator (TaskGenerator): The task generator instance.
        dir_path (str): The directory path where the tasks will be saved.
        subtask_collection (list): The collection of subtasks to generate tasks for.
    """
    for task in subtask_collection:
        description, graph = generator.generate_single_node_task(task)
        generator.dump_generated_task(description, graph, dir_path)
        print(
            colored(
                "\n==================== Task Generation Completed ====================\n",
                "magenta",
            )
        )


def generate_length1_by_id(generator: TaskGenerator, dir_path: str):
    """
    Generates a single task for a specified subtask ID and saves it.

    Parameters:
        generator (TaskGenerator): The task generator instance.
        dir_path (str): The directory path where the task will be saved.
    """
    while True:
        subtask_id = input(colored("Please input the subtask ID: ", "yellow"))
        if subtask_id in generator.task_mapping:
            task = generator.task_mapping[subtask_id]
            print()
            description, graph = generator.generate_single_node_task(task)
            generator.dump_generated_task(description, graph, dir_path)
            print(
                colored(
                    "\n==================== Task Generation Completed ====================\n",
                    "magenta",
                )
            )
        else:
            print(colored("Invalid subtask ID. Please try again.", "red"))


def generate_length2_manual(generator: TaskGenerator, dir_path: str):
    """
    Manually generates a two-step task sequence from user-specified subtask IDs and saves it.

    Parameters:
        generator (TaskGenerator): The task generator instance.
        dir_path (str): The directory path where the task sequence will be saved.
    """
    while True:
        sub_task_id_1 = input(
            colored("Please input the id of the first subtask: ", "yellow")
        )
        sub_task_id_2 = input(
            colored("Please input the id of the second subtask: ", "yellow")
        )

        if (
            sub_task_id_1 in generator.task_mapping
            and sub_task_id_2 in generator.task_mapping
        ):
            description, graph = generator.combine_two_subtasks(
                sub_task_id_1=sub_task_id_1, sub_task_id_2=sub_task_id_2
            )
            generator.dump_generated_task(description, graph, dir_path)
            print(
                colored(
                    "\n==================== Task Composition Completed ====================\n",
                    "magenta",
                )
            )
        else:
            missing_ids = [
                id
                for id in [sub_task_id_1, sub_task_id_2]
                if id not in generator.task_mapping
            ]
            print(
                colored(
                    f"Invalid input: ID {', '.join(missing_ids)} not found. Please try again.",
                    "red",
                )
            )


def main():
    parser = argparse.ArgumentParser(description="Task Generator for CRAB Benchmarks")
    parser.add_argument(
        "--version", type=str, default="v0", help="Benchmark version (e.g., v0, v1)"
    )
    parser.add_argument(
        "--mode",
        type=str,
        choices=[
            "generate_length1_all",
            "generate_length2_manual",
            "generate_length1_by_id",
        ],
        help="Mode to run the task generator",
    )
    parser.add_argument(
        "--dir_path", type=str, help="Directory path to save the generated tasks"
    )
    parser.add_argument(
        "--config_path", type=str, help="Path to the task generation configuration file"
    )

    args = parser.parse_args()

    Path(args.dir_path).mkdir(parents=True, exist_ok=True)

    a_collection, u_collection = load_subtasks(args.version)
    all_collection = u_collection + a_collection

    print(
        colored(
            "\n==================== Task Generation Starting ====================\n",
            "magenta",
        )
    )
    if args.mode == "generate_length1_all":
        generator = TaskGenerator(subtasks=all_collection)
        generate_length1_all(generator, args.dir_path, all_collection)
    elif args.mode == "generate_length2_manual":
        with open(args.config_path, "r") as f:
            data = yaml.safe_load(f)
        attribute_pool = data["attribute_pool"]
        generator = TaskGenerator(attribute_pool, all_collection)
        generate_length2_manual(generator, args.dir_path)
    elif args.mode == "generate_length1_by_id":
        generator = TaskGenerator(subtasks=all_collection)
        generate_length1_by_id(generator, args.dir_path)
    else:
        print(
            colored(
                "Invalid mode selected. Please choose 'generate_length1_all', 'generate_length2_manual', or 'generate_length1_by_id'.",
                "red",
            )
        )


if __name__ == "__main__":
    main()


================================================
FILE: crab/environments/__init__.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========


================================================
FILE: crab/environments/template.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from crab.core import Environment, EnvironmentConfig, action


@action
def set_state(value: bool, env: Environment) -> None:
    """
    Set system state to the given value.

    Args:
        value (bool): The given value to set the system state.
    """
    env.state = value


@action
def current_state(env: Environment) -> bool:
    """
    Get current system state.
    """
    return env.state


template_environment_config = EnvironmentConfig(
    name="template_env",
    action_space=[set_state],
    observation_space=[current_state],
    description="A test environment",
    info=None,
    reset=set_state(False),
)


================================================
FILE: crab/server/__init__.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========


================================================
FILE: crab/server/api.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import json

from fastapi import APIRouter, Request
from fastapi.responses import JSONResponse, PlainTextResponse

from crab.utils import (
    base64_to_callable,
    decrypt_message,
    encrypt_message,
    generate_key_from_env,
)

from .logger import crab_logger as logger

api_router = APIRouter()


@api_router.post("/raw_action")
async def raw_action(request: Request):
    """Perform the specified action with given parameters."""
    enc_key = generate_key_from_env()
    # Extract query parameters as a dictionary
    request_content = await request.body()
    request_content = request_content.decode("utf-8")
    if enc_key is not None:
        request_content = decrypt_message(request_content, enc_key)
    request_json = json.loads(request_content)

    action = request_json["action"]
    parameters = request_json["parameters"]
    entry = base64_to_callable(action["dumped_entry"])
    logger.info(f"remote action: {action['name']} received. parameters: {parameters}")
    if "env" in action["kept_params"]:
        parameters["env"] = request.app.environment

    resp_data = {"action_returns": entry(**parameters)}
    if enc_key is None:
        return JSONResponse(content=resp_data)
    else:
        encrypted = encrypt_message(json.dumps(resp_data), enc_key)
        return PlainTextResponse(content=encrypted)


================================================
FILE: crab/server/config.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import argparse

from pydantic_settings import BaseSettings


class Settings(BaseSettings):
    HOST: str = "127.0.0.1"
    PORT: int = 8000
    ENVIRONMENT: str = "template_environment_config"


class EnvSettings(BaseSettings):
    DISPLAY: str = ":0"


def parse_args():
    parser = argparse.ArgumentParser(description="Application settings")
    parser.add_argument("--HOST", type=str, help="Host of the application")
    parser.add_argument("--PORT", type=int, help="Port of the application")
    parser.add_argument("--ENVIRONMENT", type=str, help="Environment to be loaded")
    return parser.parse_args()


================================================
FILE: crab/server/exception_handlers.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import sys

from fastapi import Request
from fastapi.exception_handlers import (
    request_validation_exception_handler as _request_validation_exception_handler,
)
from fastapi.exceptions import RequestValidationError
from fastapi.responses import JSONResponse, PlainTextResponse

from .logger import crab_logger as logger


async def request_validation_exception_handler(
    request: Request, exc: RequestValidationError
) -> JSONResponse:
    """
    This is a wrapper to the default RequestValidationException handler of FastAPI.
    This function will be called when client input is not valid.
    """
    body = await request.body()
    query_params = request.query_params._dict  # pylint: disable=protected-access
    detail = {
        "errors": exc.errors(),
        "body": body.decode(),
        "query_params": query_params,
    }
    logger.info(detail)
    return await _request_validation_exception_handler(request, exc)


async def unhandled_exception_handler(
    request: Request, exc: Exception
) -> PlainTextResponse:
    """
    This middleware will log all unhandled exceptions. Unhandled exceptions are
    all exceptions that are not HTTPExceptions or RequestValidationErrors.
    """
    host = getattr(getattr(request, "client", None), "host", None)
    port = getattr(getattr(request, "client", None), "port", None)
    url = (
        f"{request.url.path}?{request.query_params}"
        if request.query_params
        else request.url.path
    )
    exception_type, exception_value, exception_traceback = sys.exc_info()
    exception_name = getattr(exception_type, "__name__", None)
    logger.error(
        f'{host}:{port} - "{request.method} {url}" 500 Internal Server Error '
        f"<{exception_name}: {exception_value}>"
    )

    return JSONResponse(
        status_code=500,
        content={
            "error": "Internal Server Error",
            "message": "An unexpected error occurred.",
        },
    )


================================================
FILE: crab/server/logger.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import logging

uvicorn_logger = logging.getLogger("uvicorn")
uvicorn_logger.setLevel(logging.INFO)

crab_logger = logging.getLogger("crab-server")
crab_logger.setLevel(logging.INFO)

LOGGING_CONFIG = {
    "version": 1,
    "disable_existing_loggers": False,
    "formatters": {
        "default": {
            "()": "uvicorn.logging.DefaultFormatter",
            "format": "[%(asctime)s %(process)d:%(threadName)s] %(name)s - "
            "%(levelname)s - %(message)s | %(filename)s:%(lineno)d",
        },
        "logformat": {
            "format": "[%(asctime)s %(process)d:%(threadName)s] %(name)s - "
            "%(levelname)s - %(message)s | %(filename)s:%(lineno)d"
        },
    },
    "handlers": {
        "file_handler": {
            "class": "logging.FileHandler",
            "level": "INFO",
            "formatter": "logformat",
            "filename": "info.log",
            "encoding": "utf8",
            "mode": "a",
        },
        "default": {
            "formatter": "default",
            "class": "logging.StreamHandler",
            "stream": "ext://sys.stderr",
        },
    },
    "loggers": {
        "uvicorn.error": {
            "level": "INFO",
            "handlers": ["default", "file_handler"],
            "propagate": False,
        }
    },
    "root": {
        "level": "INFO",
        "handlers": ["default", "file_handler"],
        "propagate": False,
    },
}


================================================
FILE: crab/server/main.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import os

import uvicorn
from fastapi import FastAPI
from fastapi.exceptions import RequestValidationError

from crab import EnvironmentConfig, create_environment

from .api import api_router
from .config import EnvSettings, Settings, parse_args
from .exception_handlers import (
    request_validation_exception_handler,
    unhandled_exception_handler,
)
from .logger import LOGGING_CONFIG
from .middleware import log_request_middleware
from .utils import get_benchmarks_environments


def init(environment_config: EnvironmentConfig) -> FastAPI:
    app = FastAPI(title="Desktop Agent Benchmark Environment Server")

    app.middleware("http")(log_request_middleware)
    app.add_exception_handler(
        RequestValidationError, request_validation_exception_handler
    )
    app.add_exception_handler(Exception, unhandled_exception_handler)
    app.include_router(api_router)

    app.environment = create_environment(environment_config)
    return app


if __name__ == "__main__":
    env_settings = EnvSettings()
    for field in env_settings.model_fields.keys():
        value = getattr(env_settings, field)
        os.environ[field] = value

    args = parse_args()
    kwargs = {k: v for k, v in vars(args).items() if v is not None}
    settings = Settings(**kwargs)

    benchmarks, environments = get_benchmarks_environments()
    app = init(environment_config=environments[settings.ENVIRONMENT])

    app.server_settings = settings
    uvicorn.run(
        app,
        host=settings.HOST,
        port=settings.PORT,
        access_log=False,
        log_config=LOGGING_CONFIG,
    )


================================================
FILE: crab/server/middleware.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import http
import time

from fastapi import Request

from .logger import uvicorn_logger as logger


async def log_request_middleware(request: Request, call_next):
    """
    This middleware will log all requests and their processing time.
    E.g. log:
    0.0.0.0:1234 - GET /ping 200 OK 1.00ms
    """
    url = (
        f"{request.url.path}?{request.query_params}"
        if request.query_params
        else request.url.path
    )
    start_time = time.time()
    response = await call_next(request)
    process_time = (time.time() - start_time) * 1000
    formatted_process_time = "{0:.2f}".format(process_time)
    host = getattr(getattr(request, "client", None), "host", None)
    port = getattr(getattr(request, "client", None), "port", None)
    try:
        status_phrase = http.HTTPStatus(response.status_code).phrase
    except ValueError:
        status_phrase = ""
    logger.info(
        f'{host}:{port} - "{request.method} {url}" {response.status_code} '
        f"{status_phrase} {formatted_process_time}ms"
    )
    return response


================================================
FILE: crab/server/utils.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import importlib
import inspect
import pkgutil


def get_instances(package, class_type):
    instance_dict = {}
    # Iterate through all modules in the specified package
    for _, name, ispkg in pkgutil.iter_modules(
        package.__path__, package.__name__ + "."
    ):
        if ispkg:
            continue  # Skip subpackages
        module = importlib.import_module(name)
        for name, obj in inspect.getmembers(module):
            if isinstance(obj, class_type):
                instance_dict[name] = obj
    return instance_dict


def get_benchmarks_environments():
    from crab import BenchmarkConfig, EnvironmentConfig, benchmarks, environments

    benchmark_configs = get_instances(benchmarks, BenchmarkConfig)
    environment_configs = get_instances(environments, EnvironmentConfig)

    return benchmark_configs, environment_configs


================================================
FILE: crab/utils/__init__.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========

from crab.utils.common import (
    base64_to_callable,
    base64_to_image,
    callable_to_base64,
    image_to_base64,
)
from crab.utils.encryption import (
    decrypt_message,
    encrypt_message,
    generate_key_from_env,
)

__all__ = [
    "base64_to_image",
    "image_to_base64",
    "callable_to_base64",
    "base64_to_callable",
    "decrypt_message",
    "encrypt_message",
    "generate_key_from_env",
]


================================================
FILE: crab/utils/common.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import base64
from io import BytesIO
from typing import Callable

import dill
from PIL import Image


def base64_to_image(encoded: str) -> Image.Image:
    return Image.open(BytesIO(base64.b64decode(encoded)))


def image_to_base64(image: Image.Image) -> str:
    img_byte_arr = BytesIO()
    image.save(img_byte_arr, format="png")
    return base64.b64encode(img_byte_arr.getvalue()).decode("utf-8")


def callable_to_base64(func: Callable) -> str:
    return base64.b64encode(dill.dumps(func, recurse=True)).decode("utf-8")


def base64_to_callable(encoded: str) -> Callable:
    return dill.loads(base64.b64decode(encoded))


def json_expand_refs(schema: dict | list, defs: dict | None = None):
    """Recursively expand `$ref` and `allOf` in the JSON.

    This function walks through the schema object, replacing any `$ref` with its
    corresponding definition found in `$defs`. It also expands subschemas defined in
    `allOf` by merging their resolved definitions into a single schema.

    Args:
        schema: The JSON schema (or sub-schema).
        defs: The collection of definitions for `$ref` expansion. If None, it will look
            for `$defs` at the root of the schema.

    Returns:
        The schema with all `$ref` and `allOf` expanded.

    Raises:
        ValueError: If a reference cannot be resolved with the provided `$defs`.
    """
    # If defs is None, it means we're at the root of the schema
    if defs is None:
        defs = schema.pop("$defs", {})

    if isinstance(schema, dict):
        # Process `$ref` by replacing it with the referenced definition
        if "$ref" in schema:
            ref_path = schema["$ref"].split("/")
            ref_name = ref_path[-1]
            if ref_name in defs:
                return json_expand_refs(defs[ref_name], defs)
            else:
                raise ValueError(f"Reference {schema['$ref']} not found in $defs.")

        # Process `allOf` by combining all subschemas
        elif "allOf" in schema:
            combined_schema = {}
            for subschema in schema["allOf"]:
                expanded_subschema = json_expand_refs(subschema, defs)
                # Merge the expanded subschema into the combined_schema
                for key, value in expanded_subschema.items():
                    combined_schema[key] = value
            return combined_schema

        # Recursively process all keys in the dictionary
        else:
            return {key: json_expand_refs(value, defs) for key, value in schema.items()}

    elif isinstance(schema, list):
        # Recursively process each item in the list
        return [json_expand_refs(item, defs) for item in schema]

    # If it's neither a dict nor a list, return it as is (e.g., int, str)
    return schema


================================================
FILE: crab/utils/encryption.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import base64
import hashlib
import logging
import os
from typing import Optional

from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes

logger = logging.getLogger("encryption")


def encrypt_message(plaintext: str, key: bytes) -> str:
    """Encrypts a message using a key with AES 256 encryption.

    Args:
        plaintext (str): The message to encrypt.
        key (bytes): The encryption key, should be 256 bits.

    Returns:
        str: The encrypted message encoded in base64.
    """
    nounce = os.urandom(12)
    cipher = Cipher(algorithms.AES(key), modes.GCM(nounce), backend=default_backend())
    encryptor = cipher.encryptor()
    ciphertext = encryptor.update(plaintext.encode()) + encryptor.finalize()
    return base64.b64encode(nounce + ciphertext + encryptor.tag).decode("utf-8")


def decrypt_message(encrypted: str, key: bytes) -> str:
    """Decrypts an encrypted message using a key with AES 256 encryption.

    Args:
        encrypted (str): The encrypted message encoded in base64.
        key (bytes): The encryption key, should be 256 bits.

    Returns:
        str: The decrypted message.
    """
    encrypted = base64.b64decode(encrypted)
    nounce = encrypted[:12]
    ciphertext = encrypted[12:-16]
    tag = encrypted[-16:]
    cipher = Cipher(
        algorithms.AES(key), modes.GCM(nounce, tag), backend=default_backend()
    )
    decryptor = cipher.decryptor()
    return (decryptor.update(ciphertext) + decryptor.finalize()).decode("utf-8")


def generate_key_from_env() -> Optional[bytes]:
    """Generate the encryption key from the environment variable `CRAB_ENC_KEY`.

    Returns:
        Optional[bytes]: The encryption key. If the environment variable is not set or
            empty, return None.
    """
    enc_key = os.environ.get("CRAB_ENC_KEY")
    # don't encrypt as long as the key is an empty value
    if not enc_key:
        logger.warning("CRAB_ENC_KEY is not set, connection will not be encrypted.")
        return None

    return hashlib.sha256(enc_key.encode("utf-8")).digest()


================================================
FILE: crab/utils/measure.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import logging
import time
from functools import wraps

logger = logging.getLogger(__name__)


# Misc logger setup so a debug log statement gets printed on stdout.
handler = logging.StreamHandler()
log_format = "%(asctime)s %(levelname)s -- %(message)s"
formatter = logging.Formatter(log_format)
handler.setFormatter(formatter)
logger.addHandler(handler)


def timed(func):
    """This decorator prints the execution time for the decorated function."""

    @wraps(func)
    def wrapper(*args, **kwargs):
        start = time.time()
        result = func(*args, **kwargs)
        end = time.time()
        func_class = args[0].__class__.__name__ if args else ""
        info = "{}.{} ran in {}s".format(
            func_class,
            func.__name__,
            round(end - start, 2),
        )
        if hasattr(args[0], "name"):
            info += f" with name {args[0].name}"
        logger.info(info)
        return result

    return wrapper


================================================
FILE: crab-benchmark-v0/README.md
================================================
# Crab Benchmark v0

## Overview

`crab-benchmark-v0` is a benchmark released with the crab framework to provide a standard usage. It includes two virtual machine environments: an Android smartphone and an Ubuntu desktop computer, with 100 tasks and 59 different evaluator functions in the dataset. It effectively evaluates the MLM-based agents' performance on operating real-world tasks across multiple platforms.

## Get Started

Our benchmark contains two important parts: **Environments** and **Tasks**.

#### Environments

Since our Ubuntu environment is built upon KVM, setting it up locally requires you an experienced Linux user to deal with many small and miscellaneous issues. Therefore, we provide two environment setup methods:

* [Local setup](./docs/environment_local_setup.md) provides you a step-by-step guideline to build environments on a Linux Machine with **at least one monitor and 32G memory**, but it doesn't cover details like how to install KVM on your machine because they are various on different Linux distros.
* For those who want a quicker setup, we also provide a setup through [Google Clould Platform](./docs/environment_gcp_setup.md). Specifically, we publish a disk image contains all required software and configurations on google cloud, you can use your own google account to create a cloud computer through this disk image and use [google remote desktop](https://remotedesktop.google.com/access/) to connect to it. This method doesn't have any hardware limitations and when you set it up you can run the experiment immediately. As a tradeoff, the cloud computer that meets the minimum hardware requirement costs around $0.4 per hour (depend on the machine zone).

We connect to the Android environment via ADB, so any Android device, from an emulator to a physical smartphone, will work. You should ensure ADB is installed on your system and can be directly called through the command line. In our experiment, we used the built-in emulator of [Android Studio](https://developer.android.com/studio) to create a Google Pixel 8 Pro virtual device with the release name \textit{R} and installed necessary extra Apps.

#### Tasks

We manage our task dataset using a CRAB-recommended method. Sub-tasks are defined through Pydantic models written in Python code, and composed tasks are defined in JSON format, typically combining several sub-tasks. The sub-tasks are defined in [android_subtasks](./dataset/android_subtasks.py) and [ubuntu_subtasks](./dataset/ubuntu_subtasks.py). The JSON files storing composed tasks are categorized into [android](./dataset/android/), [ubuntu](./dataset/ubuntu/), and [cross-platform](./dataset/cross/). The tasks in android and ubuntu directories are single-environment task and those in cross directory are cross-environment tasks. Additionally, we create several tasks by hand instead of composing sub-tasks to provide semantically more meaningful tasks, which are found in [handmade tasks](./dataset/handmade_tasks.py).

## Experiment

After setting up the environment, you can start the experiment. A brief overview of the experiment is as follows:

1. Open the Ubuntu environment virtual machine and the Android environment emulator.
2. Start the CRAB server in the Ubuntu environment and get its IP address and port. Let's say they are `192.168.122.72` and `8000`.
3. Choose a task. As an example, we take the task with ID `a3476778-e512-40ca-b1c0-d7aab0c7f18b` from [handmade_tasks](./dataset/handmade_tasks.py). The task is: "Open the 'Tasks' app on Android, check the first incomplete task, then perform the task according to its description."
4. Run [main.py](./main.py) with the command `poetry run python -m crab-benchmark-v0.main --model gpt4o --policy single --remote-url http://192.168.122.72:8000 --task-id a3476778-e512-40ca-b1c0-d7aab0c7f18b`. In this command, `--model gpt4o` and `--policy single` determine the agent system, `--remote-url` specifies the Ubuntu environment interface, and `--task-id` indicates the task to be performed.

#### Model

For open source models, we use [VLLM](https://github.com/vllm-project/vllm) to host Pixtral model, check [here](https://docs.vllm.ai/en/latest/models/vlm.html#online-inference) for the setup commands; [SGLang](https://github.com/sgl-project/sglang) to host LLaVa-OneVision model, check [here](https://github.com/sgl-project/sglang?tab=readme-ov-file#supported-models) for the setup commands.

================================================
FILE: crab-benchmark-v0/__init__.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========


================================================
FILE: crab-benchmark-v0/android_env.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from crab import EnvironmentConfig
from crab.actions.android_actions import (
    key_press,
    long_tap,
    open_app_drawer,
    screenshot,
    setup,
    swipe,
    tap,
    write_text,
)

ANDROID_ENV = EnvironmentConfig(
    name="android",
    action_space=[tap, key_press, long_tap, write_text, swipe, open_app_drawer],
    observation_space=[screenshot],
    description="""A Google Pixel smartphone runs on the Android operating system. \
The interface displays a current screenshot at each step and primarily \
supports interaction through tapping and typing. This device offers a suite \
of standard applications including Phone, Photos, Camera, Chrome, and \
Calendar, among others. Access the app drawer to view all installed \
applications on the device. The Google account is pre-logged in, synchronized \
with the same account used in the Ubuntu environment.""",
    extra_attributes={"device": None},
    reset=setup,
)


================================================
FILE: crab-benchmark-v0/dataset/android/1005c437-50d1-465a-b3fc-833098b22bfc.json
================================================
{
    "description": "In the Android operating system, use the \"Google Map\" app to find the city name corresponding to the postal code \"63002\" in South Korea, then use the \"Calendar\" app to add a new all-day event for 1 January 2025 with the text of the found city name.",
    "tasks": [
        {
            "task": "51b2463c-9904-4a32-81ba-507bfb89d61f",
            "attribute": {
                "number": "63002",
                "country": "South Korea"
            },
            "output": "Jeju"
        },
        {
            "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ac",
            "attribute": {
                "content": "Jeju",
                "date": "1 January 2025"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "1005c437-50d1-465a-b3fc-833098b22bfc"
}

================================================
FILE: crab-benchmark-v0/dataset/android/12333aa0-e76d-4a5c-8657-9f897f62f62d.json
================================================
{
    "description": "In Android, use the \"Google Map\" app to find the city name for the postal code \"2770885\" in Japan, and then, using the \"Keep Notes\" app, create a new note without a title to record the city name you found.",
    "tasks": [
        {
            "task": "51b2463c-9904-4a32-81ba-507bfb89d61f",
            "attribute": {
                "number": "2770885",
                "country": "Japan"
            },
            "output": "Chiba"
        },
        {
            "task": "eb92a1e6-4c86-4d56-baac-95fc8397732e",
            "attribute": {
                "content": "Chiba"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "12333aa0-e76d-4a5c-8657-9f897f62f62d"
}

================================================
FILE: crab-benchmark-v0/dataset/android/22b04776-8eec-4303-b3f6-9c981f7f29b8.json
================================================
{
    "description": "In Android, Using \"Setting\" app, rename the device name of bluetooth as \"Sydney\".",
    "tasks": [
        {
            "task": "a3d11574-2acf-4b26-a569-a5dbc9d548an",
            "attribute": {
                "content": "Sydney"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "22b04776-8eec-4303-b3f6-9c981f7f29b8"
}

================================================
FILE: crab-benchmark-v0/dataset/android/2ade6a13-c7a6-4df7-8c62-77382687369e.json
================================================
{
    "description": "In Android, using the \"Contacts\" app, find the email of the contact named John Lauphin, then using the \"Gmail\" app, send an email to that contact with the subject \"Hello John.\"",
    "tasks": [
        {
            "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ap",
            "attribute": {
                "name": "John Lauphin"
            },
            "output": "crabbb@gmail.com"
        },
        {
            "task": "0090f116-e02b-4562-a20d-b5df38be963a",
            "attribute": {
                "content": "Hello John",
                "mail": "crabbb@gmail.com"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "2ade6a13-c7a6-4df7-8c62-77382687369e"
}

================================================
FILE: crab-benchmark-v0/dataset/android/346caf7c-dc74-4c38-962a-aaffb638e0c7.json
================================================
{
    "description": "In Android, Using \"Calendar\" app, add a new task with text \"meeting\" in date \"June 5th 2024\".",
    "tasks": [
        {
            "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ac",
            "attribute": {
                "content": "meeting",
                "date": "05 June 2024"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "346caf7c-dc74-4c38-962a-aaffb638e0c7"
}

================================================
FILE: crab-benchmark-v0/dataset/android/379b9c58-5125-41b3-9cc6-ea925c8b094d.json
================================================
{
    "description": "In Android, Using Google Map app, Find the city name of corresponding post code \"560049\" in the country \"India\".",
    "tasks": [
        {
            "task": "51b2463c-9904-4a32-81ba-507bfb89d61f",
            "attribute": {
                "country": "India",
                "number": "560049"
            },
            "output": "Bengaluru"
        }
    ],
    "adjlist": "0",
    "id": "379b9c58-5125-41b3-9cc6-ea925c8b094d"
}


================================================
FILE: crab-benchmark-v0/dataset/android/4190c90c-b28c-4bb3-ab5c-af3c4fde0a3d.json
================================================
{
    "description": "In Android, Using Google Map app, Find the city name of corresponding post code \"1010021\" in the country \"Japan\".",
    "tasks": [
        {
            "task": "51b2463c-9904-4a32-81ba-507bfb89d61f",
            "attribute": {
                "country": "Japan",
                "number": "101-0021"
            },
            "output": "Tokyo"
        }
    ],
    "adjlist": "0",
    "id": "4190c90c-b28c-4bb3-ab5c-af3c4fde0a3d"
}


================================================
FILE: crab-benchmark-v0/dataset/android/46d7ccdb-d2e4-4b8a-bead-f2641b5ac23c.json
================================================
{
    "description": "In Android, Using \"Contacts\" app, add a contact with a mail \"{mail}\" with a name \"{name}\".",
    "tasks": [
        {
            "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ag",
            "attribute": {
                "mail": "abcdcly@qq.com",
                "name": "John Haruhimiya"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "46d7ccdb-d2e4-4b8a-bead-f2641b5ac23c"
}

================================================
FILE: crab-benchmark-v0/dataset/android/483fbf9c-dc78-4ac2-9264-53c4f617f6cc.json
================================================
{
    "description": "Open the calendar app in the Android system and find the title of an event on the date \"17 August 2024,\" then using the \"Google Drive\" app on the same Android device, create a new folder with the founded name",
    "tasks": [
        {
            "task": "2394b768-2ca7-45e9-b41e-2aa4e9573192",
            "attribute": {
                "date": "17 August 2024"
            },
            "output": "Travel to Paris"
        },
        {
            "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ar",
            "attribute": {
                "content": "Travel to Paris"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "483fbf9c-dc78-4ac2-9264-53c4f617f6cc"
}

================================================
FILE: crab-benchmark-v0/dataset/android/4893a9b0-6477-495d-a73c-32503326e24a.json
================================================
{
    "description": "In the Android system, use the calendar app to find the title of an event on the date \"16 July 2024,\".",
    "tasks": [
        {
            "task": "2394b768-2ca7-45e9-b41e-2aa4e9573192",
            "attribute": {
                "date": "16 July 2024"
            },
            "output": "Japan"
        }
    ],
    "adjlist": "0",
    "id": "4893a9b0-6477-495d-a73c-32503326e24a"
}


================================================
FILE: crab-benchmark-v0/dataset/android/53010c40-dce4-4d72-a856-842c21059e2b.json
================================================
{
    "description": "In the Android system, use the calendar app to find the title of an event on the date \"16 July 2024,\" then, using the Google Map app, find the city name of the corresponding post code \"113-8654\" in the country with same name as title.",
    "tasks": [
        {
            "task": "2394b768-2ca7-45e9-b41e-2aa4e9573192",
            "attribute": {
                "date": "16 July 2024"
            },
            "output": "Japan"
        },
        {
            "task": "51b2463c-9904-4a32-81ba-507bfb89d61f",
            "attribute": {
                "number": "113-8654",
                "country": "Japan"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "53010c40-dce4-4d72-a856-842c21059e2b"
}

================================================
FILE: crab-benchmark-v0/dataset/android/6d9f6395-de79-4ad0-8a2a-2d674f93f293.json
================================================
{
    "description": "In Android, Using \"Clock\" app, set the time of \"London\" in the clock, check the time gap between the city and current city.",
    "tasks": [
        {
            "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ah",
            "attribute": {
                "place_name": "London"
            },
            "output": "7 hours behind"
        }
    ],
    "adjlist": "0",
    "test_finished":"1",
    "id": "6d9f6395-de79-4ad0-8a2a-2d674f93f293"
}


================================================
FILE: crab-benchmark-v0/dataset/android/71ef7fd2-0ae3-49c8-8238-06b7aa985d25.json
================================================
{
    "description": "Using the \"Google Map\" app on Android, find the distance of the shortest route from \"National University of Singapore\" to \"Nanyang Technology University,\" then using the \"Calendar\" app, add a new event with the text representing the found distance on the date 21 June 2024 as an all-day event.",
    "tasks": [
        {
            "task": "1a1b72d7-78c9-4027-8278-86083ae01045",
            "attribute": {
                "place_name_1": "National University of Singapore",
                "place_name_2": "Nanyang Technology University"
            },
            "output": "13km"
        },
        {
            "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ac",
            "attribute": {
                "content": "13km",
                "date": "21 June 2024"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "71ef7fd2-0ae3-49c8-8238-06b7aa985d25"
}

================================================
FILE: crab-benchmark-v0/dataset/android/73f78fc3-1ca5-442d-801f-bc175a0bfb89.json
================================================
{
    "description": "In Android, using \"Google Map\" App, find the distance of the shortest route from \"Southern University of Science and Technology\" to \"Lianhuashan Park\"",
    "tasks": [
        {
            "task": "1a1b72d7-78c9-4027-8278-86083ae01045",
            "attribute": {
                "place_name_1": "Southern University of Science and Technology",
                "place_name_2": "Lianhuashan Park"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "73f78fc3-1ca5-442d-801f-bc175a0bfb89"
}

================================================
FILE: crab-benchmark-v0/dataset/android/764838cc-9359-4130-9bb2-4a75900b2d89.json
================================================
{
    "description": "In Android, call \"123456789\".",
    "tasks": [
        {
            "task": "955d8773-dd7a-4072-b87c-7e546be7de4e",
            "attribute": {
                "number": "123456789"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "764838cc-9359-4130-9bb2-4a75900b2d89"
}

================================================
FILE: crab-benchmark-v0/dataset/android/77289141-e52b-48c8-b3a7-1b29520f3e1e.json
================================================
{
    "description": "In Android, Using \"Contacts\" app, find out the mail of contact named \"John Haruhimiya\".",
    "tasks": [
        {
            "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ap",
            "attribute": {
                "name": "John Haruhimiya"
            },
            "output": "abcdcly@qq.com"  
        }
    ],
    "adjlist": "0",
    "id": "77289141-e52b-48c8-b3a7-1b29520f3e1e"
} 

================================================
FILE: crab-benchmark-v0/dataset/android/7891ceab-7965-4ddb-a0fc-15740c9a4e44.json
================================================
{
    "description": "In Android, Using \"Google Map\" app, find the city name of corresponding post code \"560049\" in the country \"India\". Creat a folder with the city name in  \"Google Drive \" app",
    "tasks": [
        {
            "task": "51b2463c-9904-4a32-81ba-507bfb89d61f",
            "attribute": {
                "country": "India",
                "number": "560049"
            },
            "output": "Bengaluru"
        },
        {
            "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ar",
            "attribute": {
                "content": "Bengaluru"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "7891ceab-7965-4ddb-a0fc-15740c9a4e44"
}

================================================
FILE: crab-benchmark-v0/dataset/android/8bd51440-f959-4edc-baa5-cd03d32a5b0f.json
================================================
{
    "description": "In Android, use the \"Google Map\" app to find the address of the University of Sydney, then using the \"Gmail\" app, send a message to crabbb@gmail.com with the found address.",
    "tasks": [
        {
            "task": "a3d11574-2acf-4b26-a569-a5dbc9d548aw",
            "attribute": {
                "content": "The University of Sydney"
            },
            "output": "Camperdown NSW 2050 Australia"
        },
        {
            "task": "0090f116-e02b-4562-a20d-b5df38be963a",
            "attribute": {
                "content": "Camperdown NSW 2050 Australia",
                "mail": "crabbb@gmail.com"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "8bd51440-f959-4edc-baa5-cd03d32a5b0f"
}

================================================
FILE: crab-benchmark-v0/dataset/android/94b1836b-3111-40ad-8d07-b8a57efe7438.json
================================================
{
    "description": "In an Android system, use the calendar app to find the title of an event on the date \"9 August 2024\", and then, using the Gmail app, send an email to crabbb@gmail.com with the event title as message.",
    "tasks": [
        {
            "task": "2394b768-2ca7-45e9-b41e-2aa4e9573192",
            "attribute": {
                "date": "9 August 2024"
            },
            "output": "National Day of Singapore would be a public holiday"
        },
        {
            "task": "0090f116-e02b-4562-a20d-b5df38be963a",
            "attribute": {
                "content": "National Day of Singapore would be a public holiday",
                "mail": "crabbb@gmail.com"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "94b1836b-3111-40ad-8d07-b8a57efe7438"
}

================================================
FILE: crab-benchmark-v0/dataset/android/a225f7f8-6d03-4619-b57d-7a08610030d8.json
================================================
{
    "description": "In Android, Using \"Google Map\" app, Find the address of \"University of Oxford\" and send \"98801234\" the address using \"message\" App. ",
    "tasks": [
        {
            "task": "a3d11574-2acf-4b26-a569-a5dbc9d548aw",
            "attribute": {
                "content": "University of Oxford"
            },
            "output": "Wellington Square, Oxford OX1 2JD, United Kingdom"
        },
        {
            "task": "caa29623-1811-402d-963a-19f7eecc63d8",
            "attribute": {
                "content": "Wellington Square, Oxford OX1 2JD, United Kingdom",
                "number": "98801234"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "a225f7f8-6d03-4619-b57d-7a08610030d8"
}

================================================
FILE: crab-benchmark-v0/dataset/android/b077299d-1acb-40f5-89f3-cc08044345bf.json
================================================
{
    "description": "Using \"Tasks\" app, add a new task with text \"Watch camel tutorial video\".",
    "tasks": [
        {
            "task": "a3d11574-2acf-4b26-a569-a5dbc9d548af",
            "attribute": {
                "content": "Watch camel tutorial video"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "b077299d-1acb-40f5-89f3-cc08044345bf"
}

================================================
FILE: crab-benchmark-v0/dataset/android/b3965b07-4683-4445-9de1-a1dedf6c73ad.json
================================================
{
    "description": "In Android, Using \"Google Map\" app, Find the address of \"University of Oxford\" and send \"abcdcly@qq.com\" the address using \"Gmail\" App. ",
    "tasks": [
        {
            "task": "a3d11574-2acf-4b26-a569-a5dbc9d548aw",
            "attribute": {
                "content": "University of Oxford"
            },
            "output": "Wellington Square, Oxford OX1 2JD, United Kingdom"
        },
        {
            "task": "0090f116-e02b-4562-a20d-b5df38be963a",
            "attribute": {
                "content": "Wellington Square, Oxford OX1 2JD, United Kingdom",
                "mail": "abcdcly@qq.com"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "b3965b07-4683-4445-9de1-a1dedf6c73ad"
}

================================================
FILE: crab-benchmark-v0/dataset/android/c1b1cfeb-40e7-49a8-a3f5-b8c8ba723601.json
================================================
{
    "description": "In Android, Using \"Google Drive\" app, create a new folder named \"Journey\".",
    "tasks": [
        {
            "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ar",
            "attribute": {
                "content": "Journey"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "c1b1cfeb-40e7-49a8-a3f5-b8c8ba723601"
}

================================================
FILE: crab-benchmark-v0/dataset/android/c85f03c9-83c4-417b-93d9-0d7b41022525.json
================================================
{
    "description": "In android system, use the calendar app, find the title of an event in the date \"15 June, 2024\".",
    "tasks": [
        {
            "task": "2394b768-2ca7-45e9-b41e-2aa4e9573192",
            "attribute": {
                "date": "15 June 2024"
            },
            "output": "EMNLP ddl"
        }
    ],
    "adjlist": "0",
    "id": "c85f03c9-83c4-417b-93d9-0d7b41022525"
}

================================================
FILE: crab-benchmark-v0/dataset/android/cf4c496b-fbbd-4701-91ea-4590fe6a66e1.json
================================================
{
    "description": "In Android, use the \"Google Map\" app to find the city name corresponding to the postcode \"110151\" in Colombia, then use the \"Clock\" app to set the time of that city in the clock and check the time gap between that city and your current city.",
    "tasks": [
        {
            "task": "51b2463c-9904-4a32-81ba-507bfb89d61f",
            "attribute": {
                "number": "110151",
                "country": "Columbia"
            },
            "output": "Bogota"
        },
        {
            "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ah",
            "attribute": {
                "place_name": "Bogota"
            },
            "output": "-5h"
        }
    ],
    "adjlist": "0 1\n1",
    "id": "cf4c496b-fbbd-4701-91ea-4590fe6a66e1"
}

================================================
FILE: crab-benchmark-v0/dataset/android/d0811e47-d75f-40ce-b34b-e1ee3c8bed3f.json
================================================
{
    "description": "In Android, first use the \"Files\" app to find the creation date of the file /Movies/movie_list.txt, then use the \"Calendar\" app to add a new event titled \"Public Talking\" scheduled for all day on the founded day.",
    "tasks": [
        {
            "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ak",
            "attribute": {
                "file_path": "/Movies/movie_list.txt"
            },
            "output": "4 June 2024"
        },
        {
            "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ac",
            "attribute": {
                "content": "Public Talking",
                "date": "4 June 2024"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "d0811e47-d75f-40ce-b34b-e1ee3c8bed3f"
}

================================================
FILE: crab-benchmark-v0/dataset/android/d2d456bb-c7d1-46af-8263-78d8509fb320.json
================================================
{
    "description": "In Android, using \"Gmail\" App, send \"abcdcly@qq.com\" a message \"Hello, nice to meet you!\"",
    "tasks": [
        {
            "task": "0090f116-e02b-4562-a20d-b5df38be963a",
            "attribute": {
                "content": "Hello, nice to meet you!",
                "mail": "abcdcly@qq.com"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "d2d456bb-c7d1-46af-8263-78d8509fb320"
}

================================================
FILE: crab-benchmark-v0/dataset/android/d4e0f2b3-d0ff-4efd-856f-9f5e598cfd05.json
================================================
{
    "description": "In Android, Using \"Google Map\" app, Find the address of \"University of Oxford\"",
    "tasks": [
        {
            "task": "a3d11574-2acf-4b26-a569-a5dbc9d548aw",
            "attribute": {
                "content": "University of Oxford"
            },
            "output": "Wellington Square, Oxford OX1 2JD, United Kingdom"
        }
    ],
    "adjlist": "0",
    "id": "d4e0f2b3-d0ff-4efd-856f-9f5e598cfd05"
}

================================================
FILE: crab-benchmark-v0/dataset/android/d7489d00-0046-4fb1-af5b-1fde7d87312c.json
================================================
{
    "description": "In Android, open the \"Contacts\" app to find the email address of the contact named Karoon Wei, then use the \"Tasks\" app to add a new task with the email address.",
    "tasks": [
        {
            "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ap",
            "attribute": {
                "name": "Karoon Wei"
            },
            "output": "karroonw@gmail.com"
        },
        {
            "task": "a3d11574-2acf-4b26-a569-a5dbc9d548af",
            "attribute": {
                "content": "karroonw@gmail.com"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "d7489d00-0046-4fb1-af5b-1fde7d87312c"
}

================================================
FILE: crab-benchmark-v0/dataset/android/d92f6c33-e0a7-4101-957d-e7dd218d2565.json
================================================
{
    "description": "Using the \"Files\" app on an Android device, locate the file /Movies/movie_list.txt and determine its creation date, then use the Task app in the same Android system to find the title of an event scheduled for the days.",
    "tasks": [
        {
            "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ak",
            "attribute": {
                "file_path": "/Movies/movie_list.txt"
            },
            "output": "4 June 2024"
        },
        {
            "task": "2394b768-2ca7-45e9-b41e-2aa4e9573192",
            "attribute": {
                "date": "4 June 2024"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "d92f6c33-e0a7-4101-957d-e7dd218d2565"
}

================================================
FILE: crab-benchmark-v0/dataset/android/de843952-df8f-4a26-bae9-d0a32ed9a7f5.json
================================================
{
    "description": "In Android, Using \"Files\" app, find the create date of \"Downloads/meow.jpg\" in the sdk system.",
    "tasks": [
        {
            "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ak",
            "attribute": {
                "file_path": "Download/meow.jpg.webp"
            },
            "output": "May 28"
        }
    ],
    "adjlist": "0",
    "id": "de843952-df8f-4a26-bae9-d0a32ed9a7f5"
}

================================================
FILE: crab-benchmark-v0/dataset/android/e20fd121-b981-42da-94de-efcd66889c11.json
================================================
{
    "description": "In Android, using \"Messages\", send \"The meeting starts from 10am today\" to \"123456789\".",
    "tasks": [
        {
            "task": "caa29623-1811-402d-963a-19f7eecc63d8",
            "attribute": {
                "content": "The meeting starts from 10am today",
                "number": "123456789"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "e20fd121-b981-42da-94de-efcd66889c11"
}

================================================
FILE: crab-benchmark-v0/dataset/android/e55d7a39-7b6b-4852-8711-844cebc88cb8.json
================================================
{
    "description": "In Android, use the \"Google Map\" app to find the city name corresponding to the postcode \"110151\" in Colombia.",
    "tasks": [
        {
            "task": "51b2463c-9904-4a32-81ba-507bfb89d61f",
            "attribute": {
                "number": "110151",
                "country": "Columbia"
            },
            "output": "Bogota"
        }
    ],
    "adjlist": "0",
    "id": "e55d7a39-7b6b-4852-8711-844cebc88cb8"
}


================================================
FILE: crab-benchmark-v0/dataset/android/e9268070-91b7-4e8c-9976-1cf8126ba13b.json
================================================
{
    "description": "In the Android system, use the task app to find the title of an event on the date \"15 June 2024\", then using the \"Google Drive\" app, create a new folder named as the title we found.",
    "tasks": [
        {
            "task": "2394b768-2ca7-45e9-b41e-2aa4e9573192",
            "attribute": {
                "date": "15 June 2024"
            },
            "output": "EMNLP24 DDL"
        },
        {
            "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ar",
            "attribute": {
                "content": "EMNLP24 DDL"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "e9268070-91b7-4e8c-9976-1cf8126ba13b"
}

================================================
FILE: crab-benchmark-v0/dataset/android/fbe6a1b1-63bb-4d4e-8a53-ff4f7839ef61.json
================================================
{
    "description": "In Android, open the \"Contacts\" app to find the email address of a contact named Luis Martin, then use the \"Messages\" app to send the found email address to the phone number \"04055891132\".",
    "tasks": [
        {
            "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ap",
            "attribute": {
                "name": "Luis Martin"
            },
            "output": "lmartin0431@gmail.com"
        },
        {
            "task": "caa29623-1811-402d-963a-19f7eecc63d8",
            "attribute": {
                "content": "lmartin0431@gmail.com",
                "number": "04055891132"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "fbe6a1b1-63bb-4d4e-8a53-ff4f7839ef61"
}

================================================
FILE: crab-benchmark-v0/dataset/android/fc642cb6-5321-4966-afbf-fb3348bb69ee.json
================================================
{
    "description": "In Android, using \"Keep Notes\" App, record \"Camel is the best agent framework in the world!\" in a new note without title.",
    "tasks": [
        {
            "task": "eb92a1e6-4c86-4d56-baac-95fc8397732e",
            "attribute": {
                "content": "Camel is the best agent framework in the world!"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "fc642cb6-5321-4966-afbf-fb3348bb69ee"
}

================================================
FILE: crab-benchmark-v0/dataset/android_subtasks.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# ruff: noqa: E501

import re

import networkx as nx
from lxml import etree
from lxml.etree import _Element
from networkx import DiGraph, path_graph

from crab import SubTask, evaluator
from crab.actions.android_actions import execute_adb


def get_xml_etree(env) -> _Element | None:
    xml_str = execute_adb("exec-out uiautomator dump /dev/tty", env)
    if "UI hierchary dumped to: /dev/tty" not in xml_str:
        return None
    xml_str = xml_str.removesuffix("UI hierchary dumped to: /dev/tty")
    return etree.fromstring(xml_str.encode("utf-8"))


@evaluator(env_name="android", local=True)
def check_contain_input_text(text: str, env) -> bool:
    if env.trajectory:
        action_name, params, _ = env.trajectory[-1]
        if action_name == "write_text" and text.lower() in params["text"].lower():
            return True
    return False


@evaluator(env_name="android", local=True)
def check_contain_input_text_multiple(text: str, env) -> bool:
    if env.trajectory:
        for action_name, params, _ in env.trajectory:
            if action_name == "write_text" and text in params["text"].lower():
                return True
    return False


@evaluator(env_name="android")
def check_contain_contact(name: str, env) -> bool:
    root = get_xml_etree(env)
    if root is None:
        return False
    title_node = root.xpath(
        '//node[@resource-id="com.android.contacts:id/photo_touch_intercept_overlay"]'
    )
    if not title_node:
        return False
    if title_node[0].get("content-desc") != name:
        return False
    info_node = root.xpath('//*[@class="android.widget.RelativeLayout"]')
    if not info_node:
        return False
    print("info node checked")
    mail_node = None
    for node in info_node:
        desc = node.get("content-desc")
        if "Email" in desc:
            mail_node = node
    if mail_node is None:
        return False
    real_mail_node = mail_node.xpath(
        '//*[@resource-id="com.android.contacts:id/header"]'
    )
    if not real_mail_node:
        return False
    context = real_mail_node[0].get("text")
    print("context get")
    pattern = re.compile(r"^\w+@\w+.com")
    if pattern.match(context):
        return True
    return False


@evaluator(env_name="android")
def check_current_package_name(name: str, env) -> bool:
    result = execute_adb(
        r'shell "dumpsys activity activities | grep mResumedActivity"', env
    )
    return name in result


@evaluator(env_name="android", local=True)
def check_ocr_results(text: str, env) -> bool:
    return text in env.ocr_results


@evaluator(env_name="android")
def check_current_message_page(title: str, env) -> bool:
    root = get_xml_etree(env)
    if root is None:
        return False
    title_node = root.xpath(
        '//node[@resource-id="com.google.android.apps.messaging:id/conversation_title"]'
    )
    if title_node:
        return title == title_node[0].get("text")
    else:
        return False


@evaluator(env_name="android")
def check_message_text_box_contain(text: str, env) -> bool:
    root = get_xml_etree(env)
    if root is None:
        return False
    text_box_node = root.xpath(
        '//node[@resource-id="com.google.android.apps.messaging:id/compose_message_text"]'
    )
    if text_box_node:
        return text.lower() in text_box_node[0].get("text").lower()
    else:
        return False


@evaluator(env_name="android")
def check_message_text_box_empty(env) -> bool:
    root = get_xml_etree(env)
    if root is None:
        return False
    text_box_node = root.xpath(
        '//node[@resource-id="com.google.android.apps.messaging:id/compose_message_text"]'
    )
    if not text_box_node:
        return False
    if text_box_node[0].get("text").strip() == "Text message":
        return True
    else:
        return False


@evaluator(env_name="android")
def check_send_message(title: str, message: str, env) -> bool:
    root = get_xml_etree(env)
    if root is None:
        return False
    title_node = root.xpath(
        '//node[@resource-id="com.google.android.apps.messaging:id/conversation_title"]'
    )
    if not title_node or title != title_node[0].get("text"):
        return False
    messages_node = root.xpath(
        '//node[@resource-id="com.google.android.apps.messaging:id/message_text"]'
    )
    for node in messages_node:
        if message in node.get("text"):
            return True
    return False


@evaluator(env_name="android")
def check_note_content(content: str, env) -> bool:
    root = get_xml_etree(env)
    if root is None:
        return False
    title_node = root.xpath(
        '//node[@resource-id="com.google.android.keep:id/editable_title"]'
    )
    if not title_node:
        return False
    if title_node[0].get("text") != "Title":
        return False
    node = root.xpath(
        '//node[@resource-id="com.google.android.keep:id/edit_note_text"]'
    )
    if not node:
        return False
    if content in node[0].get("text"):
        return True
    return False


@evaluator(env_name="android")
def check_bluetooth_name(content: str, env) -> bool:
    root = get_xml_etree(env)
    if root is None:
        return False
    bluetooth_node = root.xpath('//node[@resource-id="android:id/summary"]')
    if not bluetooth_node:
        return False
    if content in bluetooth_node[0].get("text"):
        return True
    return False


@evaluator(env_name="android")
def check_map_direction_page(from_des: str, to_des: str, env) -> bool:
    root = get_xml_etree(env)
    if root is None:
        return False
    from_node = root.xpath(f'//node[@content-desc="Start location, {from_des}"]')
    if not from_node:
        return False
    to_node = root.xpath(f'//node[@content-desc="Destination, {to_des}"]')
    if not to_node:
        return False
    return True


@evaluator(env_name="android")
def check_dial_number(phone_number: str, env) -> bool:
    root = get_xml_etree(env)
    if root is None:
        return False
    dialer_node = root.xpath('//node[@resource-id="com.android.dialer:id/digits"]')
    if not dialer_node:
        return False
    number = dialer_node[0].get("text")
    number = re.sub("[^0-9]", "", number)
    target = re.sub("[^0-9]", "", phone_number)
    return number == target


@evaluator(env_name="android")
def check_calendar_registered(date: str, content: str, env) -> bool:
    root = get_xml_etree(env)
    if root is None:
        return False
    calendar_node = root.xpath(
        '//node[@resource-id="com.google.android.calendar:id/alternate_timeline_fragment_container"]'
    )
    if not calendar_node:
        return False
    itr_calendar_node = calendar_node[0].xpath(
        '//node[@class="android.support.v7.widget.RecyclerView"]'
    )
    if not itr_calendar_node:
        return False
    target_nodes = itr_calendar_node[0].xpath('//node[@content-desc="{content}"]')
    if not target_nodes:
        return False
    return True


@evaluator(env_name="android")
def check_drive_registered(content: str, env) -> bool:
    root = get_xml_etree(env)
    if root is None:
        return False
    entry_node = root.xpath(
        '//node[@resource-id="com.google.android.apps.docs:id/entry_label"]'
    )
    if not entry_node:
        return False
    for node in entry_node:
        if content == node.get("text") and f"{content} Folder" == node.get(
            "content-desc"
        ):
            return True
    return False


@evaluator(env_name="android")
def check_contact_registered(mail: str, name: str, env) -> bool:
    root = get_xml_etree(env)
    if root is None:
        return False
    name_node = root.xpath('//node[@resource-id="com.android.contacts:id/large_title"]')
    if not name_node:
        return False
    text = name_node[0].get("text")
    if text not in name:
        return False

    mail_node = root.xpath('//node[@resource-id="com.android.contacts:id/header"]')
    text = mail_node[0].get("text")
    if text not in mail:
        return False
    return True


@evaluator(env_name="android")
def check_calling_number(phone_number: str, env) -> bool:
    root = get_xml_etree(env)
    if root is None:
        return False
    dialer_node = root.xpath(
        '//node[@resource-id="com.android.dialer:id/contactgrid_contact_name"]'
    )
    if not dialer_node:
        return False
    number = dialer_node[0].get("text")
    number = re.sub("[^0-9]", "", number)
    target = re.sub("[^0-9]", "", phone_number)
    return number == target


@evaluator(env_name="android")
def check_google_tasks_name(target: str, env) -> bool:
    root = get_xml_etree(env)
    if root is None:
        return False
    task_nodes = root.xpath(
        '//node[@resource-id="com.google.android.apps.tasks:id/task_name"]'
    )
    if not task_nodes:
        return False
    for node in task_nodes:
        task_name = node.get("text")
        if target in task_name:
            return True
    return False


@evaluator(env_name="android")
def check_date(target: str, env) -> bool:
    root = get_xml_etree(env)
    if root is None:
        return False
    date_nodes = root.xpath(
        '//node[@resource-id="com.google.android.apps.photos:id/datetime_item_layout"]'
    )
    if not date_nodes:
        return False
    prev_node = date_nodes.xpath(
        '//node[@resource-id="com.google.android.apps.photos:id/label"]'
    )
    time = prev_node.get("text")
    pattern = re.compile(r"^\w{3},\s\w{3}\s\d{2},\s\d{4}\s•\s\d{1,2}:\d{2}\s[AP]M$")
    if pattern.match(time):
        return True
    return False


@evaluator(env_name="android")
def check_city_clock(place_name: str, env) -> bool:
    root = get_xml_etree(env)
    if root is None:
        return False
    city_nodes = root.xpath(
        '//node[@resource-id="com.google.android.deskclock:id/city_name"]'
    )
    if city_nodes is None:
        return False
    for city_node in city_nodes:
        text = city_node.get("text")
        if place_name == text:
            return True
    return False


@evaluator(env_name="android")
def check_event(date: str, env) -> bool:
    root = get_xml_etree(env)
    if root is None:
        return False
    event_nodes = root.xpath('//node[@class="android.support.v7.widget.RecyclerView"]')
    if event_nodes is None:
        return False
    if not event_nodes:
        return False
    for node in event_nodes[0]:
        text = node.get("content-desc")
        if date in text:
            return True
    return False


@evaluator(env_name="android")
def check_event_registered(date: str, content: str, env) -> bool:
    root = get_xml_etree(env)
    if root is None:
        return False
    event_nodes = root.xpath('//node[@class="android.support.v7.widget.RecyclerView"]')
    if not event_nodes:
        return False
    time_reg = False
    content_reg = False
    for node in event_nodes[0]:
        text = node.get("content-desc")
        if date.lower() in text.lower():
            time_reg = True
        if content.lower() in text.lower():
            content_reg = True
    if time_reg and content_reg:
        return True
    return False


@evaluator(env_name="android")
def check_location(content: str, env) -> bool:
    root = get_xml_etree(env)
    if root is None:
        return False
    checked_node = root.xpath(f'//node[@content-desc="{content}"]')
    if not checked_node:
        return False
    return True


@evaluator(env_name="android")
def check_contain_city(number: str, city: str, env) -> bool:
    root = get_xml_etree(env)
    if root is None:
        return False
    business_node = root.xpath(
        '//node[@resource-id="com.google.android.apps.maps:id/search_omnibox_text_box"]'
    )
    if not business_node:
        return False
    text = None
    for node in business_node[0]:
        text = node.get("text")
    if text is None:
        return False
    if city in text and str(number) in text:
        return True
    return False


@evaluator(env_name="android")
def check_file(content: str, env) -> bool:
    root = get_xml_etree(env)
    if root is None:
        return False
    name_source_node = root.xpath(
        '//node[@resource-id="com.google.android.apps.photos:id/exif_item_layout"]'
    )
    if not name_source_node:
        return False
    name_nodes = name_source_node[0].xpath(
        '//node[@resource-id="com.google.android.apps.photos:id/label"]'
    )
    if not name_nodes:
        return False
    target_node = None
    for node in name_nodes:
        text = node.get("text")
        if content in text:
            target_node = node
    if target_node is None:
        return False
    time_source_node = root.xpath(
        '//node[@resource-id="com.google.android.apps.photos:id/datetime_item_layout"]'
    )
    if not time_source_node:
        return False
    time_nodes = time_source_node[0].xpath(
        '//node[@resource-id="com.google.android.apps.photos:id/label"]'
    )
    if not time_nodes:
        return False
    target_node = None
    for node in time_nodes:
        text = node.get("text")
        pattern = re.compile(
            r"(Tue|Mon|Wed|Thu|Fri|Sat|Sun),\s(May|Jan|Feb|Mar|Apr|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s\d{2},\s\d{4} • \d{2}:\d{2}\s(AM|PM)"
        )
        if pattern.match(text):
            return True
        return False


@evaluator(env_name="android")
def check_mail_sent(mail: str, content: str, env) -> bool:
    root = get_xml_etree(env)
    if root is None:
        return False
    to_node = root.xpath(
        '//node[@resource-id="com.google.android.gm:id/peoplekit_chip"]'
    )
    if not to_node:
        return False
    checked = False
    for node in to_node:
        text = node.get("content-desc")
        if mail in text:
            checked = True
    if not checked:
        return False
    # check the mail information-> Done

    # check the content information
    body_node = root.xpath(
        '//node[@resource-id="com.google.android.gm:id/body_wrapper"]'
    )
    if not body_node:
        return False
    text_node = body_node[0].xpath('//node[@class="android.widget.EditText"]')
    if not text_node:
        return False
    for node in text_node:
        text = node.get("text")
        if content in text:
            return True
    return False


def distance_evaluator_generator(place_name_1: str, place_name_2: str):
    result = nx.DiGraph()
    a = check_current_package_name("com.google.android.apps.maps")
    b = check_contain_input_text(place_name_1)
    c = check_contain_input_text(place_name_2)
    d = check_map_direction_page(place_name_1, place_name_2)
    result.add_edges_from([(a, b), (a, c), (b, d), (c, d)])
    return result


def mail_evaluator_generator(mail: str, content: str):
    result = nx.DiGraph()
    a = check_current_package_name("com.google.android.gm")
    b = check_contain_input_text(mail)
    c = check_contain_input_text(content)
    d = check_mail_sent(mail, content)
    result.add_edges_from([(a, b), (a, c), (b, d), (c, d)])
    return result


def contact_evaluator_generator(mail: str, name: str):
    result = nx.DiGraph()
    a = check_current_package_name("com.android.contacts")
    b = check_contain_input_text(mail)
    c = check_contain_input_text(name)
    d = check_contact_registered(mail, name)
    result.add_edges_from([(a, b), (a, c), (b, d), (c, d)])
    return result


android_subtasks = [
    SubTask(
        id="1a1b72d7-78c9-4027-8278-86083ae01045",
        description='In Android, using "Google Map" App, find the distance of the shortest route from "{place_name_1}" to "{place_name_2}"',
        attribute_dict={"place_name_1": "place_name_1", "place_name_2": "place_name_2"},
        output_type="number",
        evaluator_generator=distance_evaluator_generator,
    ),
    SubTask(
        id="eb92a1e6-4c86-4d56-baac-95fc8397732e",
        description='In Android, using "Keep Notes" App, record "{content}" in a new note without title.',
        attribute_dict={"content": "content"},
        output_type="None",
        evaluator_generator=lambda content: path_graph(
            [
                check_current_package_name("com.google.android.keep"),
                check_contain_input_text(content),
                check_note_content(content),
            ],
            create_using=DiGraph,
        ),
    ),
    SubTask(
        id="caa29623-1811-402d-963a-19f7eecc63d8",
        description='In Android, using "Messages", send "{content}" to "{number}".',
        attribute_dict={"content": "content", "number": "number"},
        output_type="None",
        evaluator_generator=lambda content, number: path_graph(
            [
                check_current_package_name("com.google.android.apps.messaging"),
                check_current_message_page(number),
                check_contain_input_text(content),
                check_send_message(number, content),
            ],
            create_using=DiGraph,
        ),
    ),
    SubTask(
        id="955d8773-dd7a-4072-b87c-7e546be7de4e",
        description='In Android, call "{number}".',
        attribute_dict={"number": "number"},
        output_type="None",
        evaluator_generator=lambda number: path_graph(
            [
                check_current_package_name("com.android.dialer"),
                check_dial_number(number),
                check_calling_number(number),
            ],
            create_using=DiGraph,
        ),
    ),
    SubTask(
        id="a3d11574-2acf-4b26-a569-a5dbc9d548af",
        description='Using "Tasks" app, add a new task with text "{content}".',
        attribute_dict={"content": "content"},
        output_type="None",
        evaluator_generator=lambda content: path_graph(
            [
                check_current_package_name("com.google.android.apps.tasks"),
                check_contain_input_text(content),
                check_google_tasks_name(content),
            ],
            create_using=DiGraph,
        ),
    ),
    SubTask(
        id="a3d11574-2acf-4b26-a569-a5dbc9d548ac",
        description='In Android, Using "Calendar" app, add a new event with text "{content}" in date "{date}" all day.',
        attribute_dict={"content": "content", "date": "date"},
        output_type="None",
        evaluator_generator=lambda content, date: path_graph(
            [
                check_current_package_name("com.google.android.calendar"),
                check_contain_input_text(content),
                check_event_registered(date, content),
            ],
            create_using=DiGraph,
        ),
    ),
    SubTask(
        id="a3d11574-2acf-4b26-a569-a5dbc9d548ag",
        description='In Android, Using "Contacts" app, add a contact with a mail "{mail}" with a name "{name}".',
        attribute_dict={"mail": "mail", "name": "name"},
        output_type="None",
        evaluator_generator=contact_evaluator_generator,
    ),
    SubTask(
        id="a3d11574-2acf-4b26-a569-a5dbc9d548ap",
        description='In Android, Using "Contacts" app, find out the mail of contact named {name}.',
        attribute_dict={"name": "name"},
        output_type="mail",
        evaluator_generator=lambda name: path_graph(
            [
                check_current_package_name("com.android.contact"),
                check_contain_contact(name),
            ],
            create_using=DiGraph,
        ),
    ),
    SubTask(
        id="0090f116-e02b-4562-a20d-b5df38be963a",
        description='In Android, Using "Gmail" app, send {mail} a message {content}.',
        attribute_dict={"content": "content", "mail": "mail"},
        output_type="None",
        evaluator_generator=mail_evaluator_generator,
    ),
    SubTask(
        id="a3d11574-2acf-4b26-a569-a5dbc9d548ar",
        description='In Android, Using "Google Drive" app, create a new folder named {content}.',
        attribute_dict={"content": "content"},
        output_type="None",
        evaluator_generator=lambda content: path_graph(
            [
                check_current_package_name("com.google.android.apps.docs"),
                check_drive_registered(content),
            ],
            create_using=DiGraph,
        ),
    ),
    SubTask(
        id="a3d11574-2acf-4b26-a569-a5dbc9d548ak",
        description='In Android, Using "Files" app, find the create date of {file_path}.',
        attribute_dict={"file_path": "file_path"},
        output_type="Date",
        evaluator_generator=lambda file_path: path_graph(
            [
                check_current_package_name("com.google.android.apps.photos"),
                check_file(file_path),
            ],
            create_using=DiGraph,
        ),
    ),
    SubTask(
        id="a3d11574-2acf-4b26-a569-a5dbc9d548an",
        description='In Android, Using "Setting" app, rename the device name of bluetooth as {name}.',
        attribute_dict={"content": "content"},
        output_type="None",
        evaluator_generator=lambda content: path_graph(
            [
                check_current_package_name("com.android.settings"),
                check_contain_input_text(content),
                check_bluetooth_name(content),
            ],
            create_using=DiGraph,
        ),
    ),
    SubTask(
        id="a3d11574-2acf-4b26-a569-a5dbc9d548ah",
        description='In Android, Using "Clock" app, set the time of {place_name} in the clock, check the time gap between the city and current city.',
        attribute_dict={"place_name": "place_name"},
        output_type="content",
        evaluator_generator=lambda place_name: path_graph(
            [
                check_current_package_name("com.google.android.deskclock"),
                check_city_clock(place_name),
            ],
            create_using=DiGraph,
        ),
    ),
    SubTask(
        id="a3d11574-2acf-4b26-a569-a5dbc9d548aw",
        description='In Android, Using "Google Map" app, Find the address of {content}',
        attribute_dict={"content": "content"},
        output_type="content",
        evaluator_generator=lambda content: path_graph(
            [
                check_current_package_name("com.google.android.apps.maps"),
                check_location(content),
            ],
            create_using=DiGraph,
        ),
    ),
    SubTask(
        id="51b2463c-9904-4a32-81ba-507bfb89d61f",
        description='In Android, Using "Google Map" app, Find the city name of corresponding post code "{number}" in the country "{country}".',
        attribute_dict={"number": "number", "country": "country"},
        output_type="content",
        evaluator_generator=lambda number, country: path_graph(
            [
                check_current_package_name("com.google.android.apps.maps"),
                check_contain_input_text(country),
                check_contain_input_text(number),
                check_contain_city(number, country),
            ],
            create_using=DiGraph,
        ),
    ),
    SubTask(
        id="2394b768-2ca7-45e9-b41e-2aa4e9573192",
        description='In android system, use the calendar app, find the title of an event in the date "{date}".',
        attribute_dict={"date": "date"},
        output_type="content",
        evaluator_generator=lambda date: path_graph(
            [
                check_current_package_name("com.google.android.calendar"),
                check_event(date),
            ],
            create_using=DiGraph,
        ),
    ),
    # TODO: The phone number page cannot be accesed by xml. figure out another way.
    # SubTask(
    #     id="fa9c0b01-9835-4932-824d-0990cb20e5f7",
    #     description='Using Settings app, find the phone number of this phone in the "About" panel.',
    #     attribute_dict={},
    #     output_type="phone_number",
    #     evaluator=lambda: path_graph(
    #         [
    #             check_current_package_name("com.android.settings"),
    #         ],
    #         create_using=DiGraph,
    #     ),
    # ),
]


================================================
FILE: crab-benchmark-v0/dataset/cross/05a7633d-b966-471c-8848-e18e69ad265f.json
================================================
{
    "description": "In Android, use the \"Google Map\" app to find the city name corresponding to the postal code \"1010021\" in Japan, then paste the name into LibreOffice Writer on an Ubuntu system and save it as an ODT file at \"/home/crab/Desktop/target.opt\".",
    "tasks": [
        {
            "task": "51b2463c-9904-4a32-81ba-507bfb89d61f",
            "attribute": {
                "number": "44145",
                "country": "Germany"
            },
            "output": "Dortmund"
        },
        {
            "task": "76de4bdb-c980-4b3a-9bd3-c87db467dffe",
            "attribute": {
                "file_path": "/home/crab/Desktop/target.odt"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "05a7633d-b966-471c-8848-e18e69ad265f"
}

================================================
FILE: crab-benchmark-v0/dataset/cross/1e92db38-501e-429b-ac31-453d1af10a25.json
================================================
{
    "description": "Open the terminal on Ubuntu, print the content of \"/home/crab/Desktop/kolakov.txt\" to the command line interface, and then, in the Android \"Keep Notes\" app, record the content in a new note without adding a title.",
    "tasks": [
        {
            "task": "5b527839-0e58-426d-bab6-7160200b0d24",
            "attribute": {
                "file_path": "/home/crab/Desktop/kolakov.txt"
            },
            "output": "The flight to warsaw is from kolakov"
        },
        {
            "task": "eb92a1e6-4c86-4d56-baac-95fc8397732e",
            "attribute": {
                "content": "The flight to warsaw is from kolakov"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "1e92db38-501e-429b-ac31-453d1af10a25"
}

================================================
FILE: crab-benchmark-v0/dataset/cross/43be6e8e-034d-4277-8346-c4ae7553bf68.json
================================================
{
    "description": "On an Android device, using the Google Map app, find the address of Dignity Health Sports Park, then use Firefox to search for a university around the address on Google Maps, and copy the Google Maps sharing URL of that university to the clipboard.",
    "tasks": [
        {
            "task": "a3d11574-2acf-4b26-a569-a5dbc9d548aw",
            "attribute": {
                "content": "Dignity Health Sports Park"
            },
            "output": "18400 Avalon Blvd, Carson, CA 907, US"
        },
        {
            "task": "2b189dc2-c77f-4fa3-8432-ba4355cc294c",
            "attribute": {
                "place_type": "University",
                "place_name": "18400 Avalon Blvd, Carson, CA 907, US"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "43be6e8e-034d-4277-8346-c4ae7553bf68"
}

================================================
FILE: crab-benchmark-v0/dataset/cross/534be964-269a-4509-b2b8-28cc3ba8dfca.json
================================================
{
    "description": "On an Android system, use the calendar app to find the title of an event on the date \"18 September 2024\", then use Firefox to search for an image with the title and copy the URL of the image to the clipboard.",
    "tasks": [
        {
            "task": "2394b768-2ca7-45e9-b41e-2aa4e9573192",
            "attribute": {
                "date": "18 September 2024"
            },
            "output": "Chile National Day"
        },
        {
            "task": "017102b6-d2c3-466b-96f7-37c8bcddc41a",
            "attribute": {
                "keyword": "Chile National Day"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "534be964-269a-4509-b2b8-28cc3ba8dfca"
}


================================================
FILE: crab-benchmark-v0/dataset/cross/6f95cfa1-e7ae-4a82-912b-0180fc9622f2.json
================================================
{
    "description": "On an Android system, open the calendar app and find the title of an event scheduled for \"15 June 2024,\" copy this title, then paste the content into Visual Studio Code (VS Code) on an Ubuntu system and save it as a file named \"reminder.txt\" on the Desktop.",
    "tasks": [
        {
            "task": "2394b768-2ca7-45e9-b41e-2aa4e9573192",
            "attribute": {
                "date": "15 June 2024"
            },
            "output": "EMNLP24 DDL"
        },
        {
            "task": "8491e674-596b-452b-9e0e-58a44d90f947",
            "attribute": {
                "file_path": "/home/crab/Desktop/reminder.txt"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "6f95cfa1-e7ae-4a82-912b-0180fc9622f2"
}

================================================
FILE: crab-benchmark-v0/dataset/cross/760ed27e-b1bd-451f-8659-bdb9845fcb7f.json
================================================
{
    "description": "Open the \"~/Desktop/contact.txt\" file via the command line interface in Ubuntu to view its content, then use the Gmail app on Android to send a message to crabbb@gmail.com with the content.",
    "tasks": [
        {
            "task": "5b527839-0e58-426d-bab6-7160200b0d24",
            "attribute": {
                "file_path": "~/Desktop/contact.txt"
            },
            "output": "crabbb@gmail.com"
        },
        {
            "task": "0090f116-e02b-4562-a20d-b5df38be963a",
            "attribute": {
                "content": "Hello, please send me a message back",
                "mail": "crabbb@gmail.com"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "760ed27e-b1bd-451f-8659-bdb9845fcb7f"
}

================================================
FILE: crab-benchmark-v0/dataset/cross/82596760-7d4d-457d-9ca9-9551ab85ec58.json
================================================
{
    "description": "Using the \"Google Map\" app on an Android device, find the city name corresponding to the postal code \"10179\" in Germany, and then submit the discovered city name.",
    "tasks": [
        {
            "task": "51b2463c-9904-4a32-81ba-507bfb89d61f",
            "attribute": {
                "number": "10179",
                "country": "German"
            },
            "output": "Berlin"
        },
        {
            "task": "1c3bedc3-ea5a-453c-a15b-223d72ab756d",
            "attribute": {
                "content": "Berlin"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "82596760-7d4d-457d-9ca9-9551ab85ec58"
}

================================================
FILE: crab-benchmark-v0/dataset/cross/a956a091-8de4-42ee-b152-913308dfc24b.json
================================================
{
    "description": "In the \"Clock\" app on Android, add Yakarta's time, compare it with the current city's time to determine the time gap, and then submit the information.",
    "tasks": [
        {
            "task": "a3d11574-2acf-4b26-a569-a5dbc9d548ah",
            "attribute": {
                "place_name": "yakarta"
            },
            "output": "1 hour behind"
        },
        {
            "task": "1c3bedc3-ea5a-453c-a15b-223d72ab756d",
            "attribute": {
                "content": "1 hour behind"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "a956a091-8de4-42ee-b152-913308dfc24b"
}

================================================
FILE: crab-benchmark-v0/dataset/cross/c5929ef3-ac27-4288-b02f-4f261d5871f9.json
================================================
{
    "description": "In Android, use the \"Google Map\" app to find the city name corresponding to the postal code \"1010021\" in Japan, then use Firefox to search for a code repository about that city on GitHub and copy the URL of the repository to the clipboard.",
    "tasks": [
        {
            "task": "51b2463c-9904-4a32-81ba-507bfb89d61f",
            "attribute": {
                "number": "1010021",
                "country": "Japan"
            },
            "output": "Tokyo"
        },
        {
            "task": "bcd03c9f-62c9-4001-8d86-78358c59ce22",
            "attribute": {
                "keyword": "Tokyo"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "c5929ef3-ac27-4288-b02f-4f261d5871f9"
}

================================================
FILE: crab-benchmark-v0/dataset/cross/da5911e3-1a99-4735-ba3e-f08c5ca81fdd.json
================================================
{
    "description": "Open a terminal in Ubuntu, print the content of \"~/Desktop/contract_reminder.txt\", and then, on an Android device, use the Gmail app to send an email to crabbb@gmail.com, including the printed information.",
    "tasks": [
        {
            "task": "5b527839-0e58-426d-bab6-7160200b0d24",
            "attribute": {
                "file_path": "~/Desktop/contract_reminder.txt"
            },
            "output": "uld be end in three days."
        },
        {
            "task": "0090f116-e02b-4562-a20d-b5df38be963a",
            "attribute": {
                "content": "uld be end in three days.",
                "mail": "crabbb@gmail.com"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "da5911e3-1a99-4735-ba3e-f08c5ca81fdd"
}

================================================
FILE: crab-benchmark-v0/dataset/handmade_tasks.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# ruff: noqa: E501 F405
import os
import re
import subprocess
import time
from datetime import datetime

import networkx as nx

from crab import Task, action, evaluator

from .android_subtasks import (
    check_current_package_name,
    check_google_tasks_name,
    check_message_text_box_contain,
    check_message_text_box_empty,
    check_note_content,
    get_xml_etree,
)
from .ubuntu_subtasks import *  # noqa: F403

_item_count_cache = None


@evaluator(env_name="android")
def check_calendar_in_today(env) -> bool:
    # Get today's date and format it as "Weekday DD Month YYYY"
    today_date_str = datetime.now().strftime("%A %d %B %Y")

    root = get_xml_etree(env)
    if root is None:
        return False
    # Construct the desired string with today's date
    date_string = f"{today_date_str}, Open Schedule View"
    date_node = root.xpath(f'//node[@content-desc="{date_string}"]')
    if not date_node or len(date_node) != 1:
        return False
    today_nodes = date_node[0].getparent().getchildren()
    item_count = len(today_nodes) - 2
    if item_count < 0:
        return False
    global _item_count_cache
    _item_count_cache = item_count
    return True


@action(env_name="ubuntu")
def get_file_bullet_points(file_path: str) -> int | None:
    # Check if the file exists
    if not os.path.exists(file_path):
        return None

    # Read the markdown text from the file
    try:
        with open(file_path, "r") as file:
            markdown_text = file.read()
    except Exception:
        return None

    # Regex to match empty checkboxes in markdown
    pattern = r"- \[ \]"
    # Find all matches
    matches = re.findall(pattern, markdown_text)
    # Return the number of empty checkboxes
    return matches


@evaluator(env_name="ubuntu", local=True)
def check_blluet_point_match_calendar(file_path: str, env) -> bool:
    matches = env._action_endpoint(get_file_bullet_points, {"file_path": file_path})
    global _item_count_cache
    if _item_count_cache is None or matches is None:
        return False
    return _item_count_cache == len(matches)


@evaluator(env_name="android")
def check_node_exist(node_query: str, env) -> bool:
    root = get_xml_etree(env)
    if root is None:
        return False
    node = root.xpath(f"//node[{node_query}]")
    if not node:
        return False
    return True


@evaluator(env_name="ubuntu")
def check_new_jpg_files_in_dir(directory) -> bool:
    # Get the current time
    current_time = time.time()
    # Time limit set to 3 minutes ago
    time_limit = current_time - 180

    # Iterate over files in the specified directory
    for file in os.listdir(directory):
        file_path = os.path.join(directory, file)
        # Check if the file is a .jpg and was modified within the last 3 minutes
        if file.endswith(".jpg") and os.path.getmtime(file_path) > time_limit:
            return True

    return False


@evaluator(env_name="ubuntu")
def check_text_list_in_current_window_name(texts: list[str]) -> bool:
    try:
        out = subprocess.check_output(
            ["xdotool", "getwindowfocus", "getwindowname"], text=True
        ).strip()
    except subprocess.CalledProcessError:
        return False
    for text in texts:
        if text not in out:
            return False
    return True


@evaluator(env_name="android")
def check_keep_notes_content(text: str, env) -> bool:
    root = get_xml_etree(env)
    if root is None:
        return False
    edit_node = root.xpath(
        '//node[@resource-id="com.google.android.keep:id/editor_bottom_bar"]'
    )
    if len(edit_node) != 1:
        return False
    content_node = root.xpath(
        '//node[@resource-id="com.google.android.keep:id/browse_note_interior_content"]'
    )
    if len(content_node) != 1:
        return False
    text_nodes = content_node[0].getchildren()
    if len(text_nodes) != 1:
        return False
    return text_nodes[0].get("text") == text


@evaluator(env_name="android")
def check_keep_notes_contain_fd(env) -> bool:
    global RESULT_fd0576be
    text = RESULT_fd0576be
    root = get_xml_etree(env)
    if root is None or text is None:
        return False
    edit_node = root.xpath(
        '//node[@resource-id="com.google.android.keep:id/editor_bottom_bar"]'
    )
    if len(edit_node) != 1:
        return False
    content_node = root.xpath(
        '//node[@resource-id="com.google.android.keep:id/browse_note_interior_content"]'
    )
    for node in content_node:
        text_nodes = node.getchildren()
        if len(text_nodes) != 1:
            continue
        if text in text_nodes[0].get("text"):
            return True
    return False


@evaluator(env_name="android")
def check_alarm_contains(time: str, env) -> bool:
    root = get_xml_etree(env)
    if root is None or time is None:
        return False
    clock_node = root.xpath(
        '//node[@resource-id="com.google.android.deskclock:id/digital_clock"]'
    )
    for node in clock_node:
        if time == node.get("text"):
            return True
    return False


@evaluator(env_name="android", local=True)
def check_tap_text(text: str, env) -> bool:
    if env.trajectory:
        action_name, params, _ = env.trajectory[-1]
        if action_name == "tap":
            try:
                element_id = int(params["element"])
                element_label = env.element_label_map[element_id]
            except TypeError:
                return False
            if element_label is None:
                return False
            return text.lower() in element_label.lower()
    return False


def summarize_ubuntu_evaluator():
    result = nx.DiGraph()
    a = check_current_window_process("slack")
    b = check_current_package_name("com.google.android.apps.messaging")
    c = check_message_text_box_contain("agent")
    d = check_message_text_box_contain("github")
    e = check_message_text_box_empty()
    result.add_edges_from([(a, c), (a, d), (b, c), (b, d), (c, e), (d, e)])
    return result


def check_calendar_evaluator():
    result = nx.DiGraph()
    a = check_current_package_name("com.google.android.calendar")
    b = check_calendar_in_today()
    c = check_file_exist("/home/crab/assets/plan.md")
    d = check_blluet_point_match_calendar("/home/crab/assets/plan.md")
    result.add_edges_from([(a, b), (b, d), (c, d)])
    return result


def evaluator_97e6f333():
    result = nx.DiGraph()
    a = check_current_package_name("com.android.camera2")
    b = check_node_exist('@resource-id="com.android.camera2:id/rounded_thumbnail_view"')
    c = check_node_exist('@resource-id="com.android.camera2:id/filmstrip_layout"')
    d = check_current_package_name(
        "com.google.android.apps.photos/.upload.intent.UploadContentActivity"
    )
    e = check_node_exist('@resource-id="com.android.camera2:id/filmstrip_layout"')
    f = check_current_window_process("firefox")
    g = check_text_in_current_window_name("Photos - Google Photos — Mozilla Firefox")
    h = check_new_jpg_files_in_dir("/home/crab/Downloads")
    i = check_file_exist("/home/crab/assets/photo.jpg")
    j = check_text_list_in_current_window_name(["photo", "GIMP"])
    result.add_edges_from([(a, b), (b, c), (c, d), (d, e), (e, h)])
    result.add_edges_from([(f, g), (g, h)])
    result.add_edges_from([(h, i), (i, j)])
    return result


def evaluator_82efbd82():
    result = nx.DiGraph()
    a = download_and_verify_file(
        "https://media.cntraveller.com/photos/642aa1ad770beda2d4f5cc22/4:3/w_2664,h_1998,c_limit/Fiji-march2023issue-JackJohns15.jpg",
        "/home/crab/Downloads/raw.jpg",
    )
    b = check_text_in_current_window_name("GNU Image Manipulation Program")
    c = check_file_exist("/home/crab/Pictures/edited.jpg")
    d = is_image_2_brighter(
        "/home/crab/Downloads/raw.jpg", "/home/crab/Pictures/edited.jpg"
    )
    e = verify_background("/home/crab/Pictures/edited.jpg")
    result.add_edges_from([(a, b), (b, c), (c, d), (d, e)])
    return result


def evaluator_515a5467():
    result = nx.DiGraph()
    a = download_and_verify_file(
        "https://media.cntraveller.com/photos/642aa1ad770beda2d4f5cc22/4:3/w_2664,h_1998,c_limit/Fiji-march2023issue-JackJohns15.jpg",
        "/home/crab/Downloads/img_1.jpg",
    )
    b = download_and_verify_file(
        "https://upload.wikimedia.org/wikipedia/commons/thumb/7/71/Flag_of_Ethiopia.svg/250px-Flag_of_Ethiopia.svg.png",
        "/home/crab/Downloads/img_2.jpg",
    )
    c = check_text_in_current_window_name("GNU Image Manipulation Program")
    d = check_file_exist("/home/crab/Downloads/combined_editing.jpg")
    e = verify_combined_image(
        "/home/crab/Downloads/img_1.jpg",
        "/home/crab/Downloads/img_2.jpg",
        "/home/crab/Downloads/combined_editing.jpg",
        "right",
    )
    f = check_directory_exists("/home/crab/jpg")
    g = verify_files_copied("/home/crab/Downloads", "/home/crab/jpg", "jpg")
    result.add_edges_from([(a, c), (b, c), (c, d), (d, e), (e, f), (f, g)])
    return result


def evaluator_5a1eba49():
    result = nx.DiGraph()
    a = check_text_in_current_window_name("Firefox")
    b = check_contain_input_text("GPU")
    c = is_img_url_in_clipboard()
    d = download_from_clipboard_and_verify_file("/home/crab/Pictures/GPU.png")
    e = check_directory_exists("/home/crab/Pictures/png_files")
    f = verify_files_copied(
        "/home/crab/Pictures", "/home/crab/Pictures/png_files", "png"
    )
    result.add_edges_from([(a, b), (b, c), (c, d), (d, e), (e, f)])
    return result


def evaluator_c347f78a():
    file_path = "/home/crab/assets/content.txt"
    content = "An air quality health advisory is in effect Tuesday for New York City and the lower Hudson Valley, as well as western Connecticut and northern New Jersey, meaning it may not be safe for people with some conditions to be outside long."
    result = nx.DiGraph()
    a = check_current_window_process("gnome-terminal-server")
    b = is_process_open("vim")
    c = ~is_process_open("vim")
    d = check_file_content(file_path, content)
    e = check_contain_input_text("cat " + file_path)
    f = check_submit(content)
    result.add_edges_from([(a, b), (b, c), (c, d), (d, e), (e, f)])
    return result


def evaluator_bf83c176():
    result = nx.DiGraph()

    file_path_1 = "/home/crab/Desktop/waymo.jpg"
    file_path_2 = "/home/crab/Desktop/tesla.png"
    output_path = "/home/crab/Documents/self_driving.pdf"
    # Search for the first image and download it
    a1 = check_text_in_current_window_name("Firefox")
    b1 = check_contain_input_text("Waymo")
    c1 = is_img_url_in_clipboard()
    d1 = download_from_clipboard_and_verify_file(file_path_1)

    # Search for the second image and download it
    a2 = check_text_in_current_window_name("Firefox")
    b2 = check_contain_input_text("Tesla")
    c2 = is_img_url_in_clipboard()
    d2 = download_from_clipboard_and_verify_file(file_path_2)

    # Combine images into a PDF
    e = check_text_in_current_window_name("LibreOffice Impress")
    f = check_file_exist(output_path)
    g = verify_combined_image(file_path_1, file_path_2, output_path, "left")

    # Add edges to form the branches and connections
    result.add_edges_from([(a1, b1), (b1, c1), (c1, d1)])
    result.add_edges_from([(d1, a2), (a2, b2), (b2, c2), (c2, d2)])
    result.add_edges_from([(d2, e), (e, f), (f, g)])

    return result


def evaluator_74bb11dd():
    file_path_1 = "/home/crab/Documents/FR.ods"
    file_path_2 = "/home/crab/Documents/MX.ods"
    result = nx.DiGraph()

    # Search for the first country and save information to an ODS file
    a1 = check_text_in_current_window_name("Wikipedia — Mozilla Firefox")
    b1 = check_text_in_current_window_name("LibreOffice Calc")
    c1 = check_file_exist(file_path_1)
    d1 = verify_country_data_in_ods("France", file_path_1)

    # Search for the second country and save information to an ODS file
    a2 = check_text_in_current_window_name("Wikipedia — Mozilla Firefox")
    b2 = check_text_in_current_window_name("LibreOffice Calc")
    c2 = check_file_exist(file_path_2)
    d2 = verify_country_data_in_ods("Mexico", file_path_2)

    # Create new directory and copy ODS files to it
    e = check_directory_exists("/home/crab/Desktop/country_info")
    f = verify_files_copied(
        "/home/crab/Documents", "/home/crab/Desktop/country_info", "ods"
    )

    # Add edges to form the branches and connections
    result.add_edges_from([(a1, b1), (b1, c1), (c1, d1)])
    result.add_edges_from([(a2, b2), (b2, c2), (c2, d2)])
    result.add_edges_from([(d1, e), (d2, e), (e, f)])

    return result


TEXT_ca79febf = 'The rapid advancement of conversational and chat-based language models has led to remarkable progress in complex task-solving. However, their success heavily relies on human input to guide the conversation, which can be challenging and time-consuming. This paper explores the potential of building scalable techniques to facilitate autonomous cooperation among communicative agents and provide insight into their "cognitive" processes. To address the challenges of achieving autonomous cooperation, we propose a novel communicative agent framework named role-playing. Our approach involves using inception prompting to guide chat agents toward task completion while maintaining consistency with human intentions. We showcase how role-playing can be used to generate conversational data for studying the behaviors and capabilities of chat agents, providing a valuable resource for investigating conversational language models. Our contributions include introducing a novel communicative agent framework, offering a scalable approach for studying the cooperative behaviors and capabilities of multi-agent systems, and open-sourcing our library to support research on communicative agents and beyond. The GitHub repository of this project is made publicly available on: https://github.com/camel-ai/camel.'


def evaluator_ca79febf():
    result = nx.DiGraph()
    a = check_current_package_name("com.google.android.keep")
    b = check_keep_notes_content(TEXT_ca79febf)
    c = check_tap_text("select")
    d = check_tap_text("copy")
    e = check_current_package_name(
        "com.google.android.apps.docs.editors.docs/com.google.android.apps.docs.editors.homescreen.HomescreenActivity"
    )
    f = check_current_package_name(
        "com.google.android.apps.docs.editors.docs/com.google.android.apps.docs.editors.kix.KixEditorActivity"
    )
    g = check_tap_text("paste")
    h = check_current_window_process("firefox")
    i = check_text_in_current_window_name("Google Docs — Mozilla Firefox")
    j = check_text_in_current_window_name(
        "Untitled document - Google Docs — Mozilla Firefox"
    )
    result.add_edges_from([(a, b), (b, c), (c, d), (d, e), (e, f), (f, g), (g, j)])
    result.add_edges_from([(h, i), (i, j)])
    return result


def evaluator_dfabf84c():
    result = nx.DiGraph()
    keyword = "kaust"
    a = check_text_in_current_window_name("Mozilla Firefox")
    b = check_contain_input_text(keyword)
    c = is_img_url_in_clipboard()
    d = download_from_clipboard_and_verify_file("/home/crab/Desktop/download.jpg")
    e = check_current_package_name("com.google.android.keep")
    f = check_contain_input_text(keyword)
    g = check_note_content(keyword)
    result.add_edges_from([(a, b), (b, c), (c, d), (d, g)])
    result.add_edges_from([(b, e), (e, f), (f, g)])
    return result


def evaluator_aab5555e():
    result = nx.DiGraph()
    a = check_current_window_process("gnome-terminal-server")
    b = check_contain_input_text("uname -a")
    d = check_current_package_name("com.google.android.apps.messaging")
    e = check_message_text_box_contain("ubuntu")
    f = check_message_text_box_contain("x86")
    g = check_message_text_box_contain("linux")
    h = check_message_text_box_contain("crab")
    sink = check_message_text_box_empty()
    result.add_edges_from(
        [
            (a, b),
            (b, sink),
            (d, e),
            (d, f),
            (d, g),
            (d, h),
            (e, sink),
            (f, sink),
            (g, sink),
            (h, sink),
        ]
    )
    return result


RESULT_fd0576be = None


@action(env_name="ubuntu")
def get_root_usage() -> str:
    try:
        output = subprocess.check_output(["df", "/"], text=True)
        return output.split("\n")[1].split()[4][:-1]
    except Exception:
        return None


@evaluator(env_name="ubuntu", local=True)
def check_contain_input_text_and_get_df_result(text: str, env) -> bool:
    global RESULT_fd0576be
    RESULT_fd0576be = env._action_endpoint(get_root_usage, parameters={})
    if env.trajectory:
        inputs = [
            params["text"].lower()
            for action_name, params, _ in env.trajectory
            if action_name == "write_text"
        ]
        return any(text.lower() in input_text for input_text in inputs)

    return False


def evaluator_fd0576be():
    result = nx.DiGraph()
    a = check_current_window_process("gnome-terminal-server")
    b = check_contain_input_text_and_get_df_result("df")
    c = check_current_package_name("com.google.android.keep")
    d = check_keep_notes_contain_fd()
    result.add_edges_from([(a, b), (b, d), (c, d)])
    return result


def evaluator_7e08f7d4():
    result = nx.DiGraph()
    a = check_text_in_current_window_name("Mozilla Firefox")
    b = check_contain_input_text(
        "https://farm9.staticflickr.com/8293/7591378270_76059bc1cf_z.jpg"
    )
    c = check_current_package_name("com.android.deskclock.DeskClock")
    d = check_alarm_contains("7:00\u200aAM")
    result.add_edges_from([(a, b), (b, d), (c, d)])
    return result


def evaluator_4957e964():
    result = nx.DiGraph()
    a = check_current_window_process("gnome-terminal-server")
    b = check_contain_input_text("wget")
    c = check_contain_input_text(
        "https://farm8.staticflickr.com/7451/10001676353_fd762e02f0_z.jpg"
    )
    d = check_file_exist("/home/crab/Desktop/download.jpg")
    e = check_text_in_current_window_name("Image Viewer")
    f = check_current_package_name("com.google.android.apps.tasks")
    g = check_google_tasks_name("tennis")
    result.add_edges_from([(a, b), (b, c), (c, d), (d, e), (e, g), (f, g)])
    return result


# Hand-made environment setup guide:
# Ubuntu
# * Make sure the Ubuntu slack login, and the default channel has at least two messages

# Andorid
# * Make sure the first incomplete task in android "Tasks" application is a instruction to change the system to dark mode.
# * Make sure the init page of "Calendar" app is "Day" view. There should be at least one element today.


ubuntu_handmade_tasks = [
    Task(
        id="82efbd82-c941-4be9-9ac0-a495dc629e02",
        description='Download an image file from a given URL "https://media.cntraveller.com/photos/642aa1ad770beda2d4f5cc22/4:3/w_2664,h_1998,c_limit/Fiji-march2023issue-JackJohns15.jpg" to "/home/crab/Downloads/raw.jpg", then use GIMP (GNU Image Manipulation Program) to adjust the brightness of the image from "/home/crab/Downloads/raw.jpg" to be brighter and save the edited file to "/home/crab/Pictures/edited.jpg", and set the adjusted image "/home/crab/Pictures/edited.jpg" as the screen background of the system.',
        evaluator=evaluator_82efbd82(),
    ),
    Task(
        id="515a5467-b7ce-4cad-874d-da894361c1a3",
        description='Download two image files from given URLs "https://media.cntraveller.com/photos/642aa1ad770beda2d4f5cc22/4:3/w_2664,h_1998,c_limit/Fiji-march2023issue-JackJohns15.jpg" and "https://upload.wikimedia.org/wikipedia/commons/thumb/7/71/Flag_of_Ethiopia.svg/250px-Flag_of_Ethiopia.svg.png" to "/home/crab/Downloads/img_1.jpg" and "/home/crab/Downloads/img_2.jpg", combine the first image ("/home/crab/Downloads/img_1.jpg") with the second image ("/home/crab/Downloads/img_2.jpg") using GIMP (GNU Image Manipulation Program) by placing the first image on the right side of the second image, and save the resulting combined image to "/home/crab/Downloads/combined_editing.jpg". Then, create a new directory "/home/crab/jpg" and copy all files with the specified "jpg" extension from "/home/crab/Downloads" to the newly created directory "/home/crab/jpg".',
        evaluator=evaluator_515a5467(),
    ),
    Task(
        id="5a1eba49-ed2d-4955-a684-32472090a45b",
        description='Use Firefox to search for an image using the keyword "GPU", copy the URL of the found image to the clipboard, download the image file from the URL stored in the clipboard to "/home/crab/Pictures/GPU.png", and create a new directory "/home/crab/Pictures/png_files" to copy all files with the specified "png" extension from "/home/crab/Pictures" to the newly created directory "/home/crab/Pictures/png_files".',
        evaluator=evaluator_5a1eba49(),
    ),
    Task(
        id="c347f78a-4643-43c8-b41e-e437b70a2c5e",
        description='Open a file at "/home/crab/assets/content.txt" using vim in a terminal, write the specified "An air quality health advisory is in effect Tuesday for New York City and the lower Hudson Valley, as well as western Connecticut and northern New Jersey, meaning it may not be safe for people with some conditions to be outside long." to it, then save and exit vim. Print the content of the file by printing it to the command line interface through a terminal, and finally, submit the printed content.',
        evaluator=evaluator_c347f78a(),
    ),
    Task(
        id="bf83c176-fa15-4057-996f-f75be4338c05",
        description='Use Firefox to search for an image using the keyword "Waymo" first, copy the URL of the image to the clipboard, and download the image to "/home/crab/Desktop/waymo.jpg". Then, search for another image using the keyword "Tesla", copy the URL of the image to the clipboard, and download the image to "/home/crab/Desktop/tesla.png". Finally, combine the two images using LibreOffice Impress, placing Image 1 from "/home/crab/Desktop/waymo.jpg" on the left side of Image 2 "/home/crab/Desktop/tesla.png", and save the resulting file in PDF format to "/home/crab/Documents/self_driving.pdf".',
        evaluator=evaluator_bf83c176(),
    ),
    Task(
        id="74bb11dd-89ca-43d0-8edf-fe7b5201ecf7",
        description='Use Firefox to search for information about the country "France" on Wikipedia. Extract the capital city and population, and save this information in an ODS file at "/home/crab/Documents/FR.ods" using LibreOffice Calc. Then, search for information about the country "Mexico" on Wikipedia, extract the capital city and population, and save this information in a separate ODS file at "/home/crab/Documents/MX.ods" using LibreOffice Calc. The format of the file are, first column for the country name, the second for the capital city name, and the third for the population without any header. Finally, create a new directory "/home/crab/Desktop/country_info" and copy all files with the specified "ods" extension from "/home/crab/Documents" to the newly created directory "/home/crab/Desktop/country_info".',
        evaluator=evaluator_74bb11dd(),
    ),
]

corss_environment_tasks = [
    Task(
        id="79832e15-5fd3-43b8-b3e3-66249edfe1db",
        description='Open slack in Ubuntu desktop, summarize the last two messages in current channel, then use "Messages" app in android phone to send the summary to the first contact in the list.',
        evaluator=summarize_ubuntu_evaluator(),
    ),
    Task(
        id="a3476778-e512-40ca-b1c0-d7aab0c7f18b",
        # You must set the first incomplete task to "In Ubuntu, switch the system to dark mode by "Settings" application"
        description='Open "Tasks" app on Android, check the first incomplete task, then perform the task according to its description',
        evaluator=nx.path_graph(
            [
                check_current_package_name("com.google.android.apps.tasks"),
                check_current_window_process("gnome-control-center"),
                check_color_scheme("prefer-dark"),
            ],
            create_using=nx.DiGraph,
        ),
    ),
    Task(
        id="914e6a48-8430-4a68-8328-c4e01db8926e",
        # You must create several tasks in google calendar today's view.
        description='Open "Calendar" app on Android, summarize all schedules today. Then, create a markdown file in Ubuntu at "/home/crab/assets/plan.md" with each event as a checkbox bullet point.',
        evaluator=check_calendar_evaluator(),
    ),
    Task(
        id="97e6f333-bedb-429b-8dd6-1855f99c312d",
        description="Take a photo through Android Camera, then upload it to Google Photos inside Camera App. Use Firefox inside Ubuntu desktop to download the photo to local disk, move it as `/home/crab/assets/photo.jpg`, finally open the photo in GIMP.",
        evaluator=evaluator_97e6f333(),
    ),
    Task(
        id="ca79febf-cae7-4669-8812-d3ec85ee2868",
        description="Open the first note in the Keep Notes app on Android, copy its contents, and paste them into a new document in Google docs. Then, open the newly created document in Firefox on Ubuntu.",
        evaluator=evaluator_ca79febf(),
    ),
    Task(
        id="dfabf84c-d05f-4e25-9f21-ba0f08107bd5",
        description='Use Firefox to search for an image using the keyword "kaust" and copy the URL of the image to the clipboard. Download a file from the URL stored in the clipboard to "/home/crab/Desktop/download.jpg". Then describe this image and save it in the Android Keep Notes app.',
        evaluator=evaluator_dfabf84c(),
    ),
    Task(
        id="aab5555e-4b72-4ebf-816a-59c1da2cec86",
        description="Check the all uname information of the system in Ubuntu, then explain the information to the first contact in the list of the Messages app in Android.",
        evaluator=evaluator_aab5555e(),
    ),
    Task(
        id="fd0576be-8b2c-45ce-b4a2-78659740879b",
        description="Check the current disk usage through command line in Ubuntu, check the root directory usage in percentage and save the information to a note in Keep Notes app in Android.",
        evaluator=evaluator_fd0576be(),
    ),
    Task(
        id="7e08f7d4-9b11-4aec-9b42-6cbde083fb4c",
        description='Use firefox on Ubuntu to openup the image "https://farm9.staticflickr.com/8293/7591378270_76059bc1cf_z.jpg", check the time of the clock in the image, then open the clock app in Android and set an alarm to the same as the image.',
        evaluator=evaluator_7e08f7d4(),
    ),
    Task(
        id="4957e964-5dd5-42f6-9d5d-f6a53a9a5d94",
        description='Use wget to download the image "https://farm8.staticflickr.com/7451/10001676353_fd762e02f0_z.jpg" to /home/crab/Desktop/download.jpg, what does the people in the image do? Create a task in the Tasks app in Android to remind you to do the same thing.',
        evaluator=evaluator_4957e964(),
    ),
]

handmade_tasks = ubuntu_handmade_tasks + corss_environment_tasks


================================================
FILE: crab-benchmark-v0/dataset/ubuntu/05d0e137-7d97-4021-9477-6490a2154c81.json
================================================
{
    "description": "Open \"/home/crab/poem\" using vim in a terminal, write \"If you shed tears when you miss the sun, you also miss the stars.\", then save and exit vim.",
    "tasks": [
        {
            "task": "0f589bf9-9b26-4581-8b78-2961b115ab49",
            "attribute": {
                "file_path": "/home/crab/poem",
                "content": "If you shed tears when you miss the sun, you also miss the stars."
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "05d0e137-7d97-4021-9477-6490a2154c81"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/0a893c2e-eec5-47cc-a930-eb01c5f17683.json
================================================
{
    "description": "Submit the following content \"If you shed tears when you miss the sun, you also miss the stars.\"",
    "tasks": [
        {
            "task": "1c3bedc3-ea5a-453c-a15b-223d72ab756d",
            "attribute": {
                "content": "If you shed tears when you miss the sun, you also miss the stars."
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "0a893c2e-eec5-47cc-a930-eb01c5f17683"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/0d178388-8166-4b66-93c1-278861f9897c.json
================================================
{
    "description": "Use Firefox to find out a \"restaurant\" around \"kaust\" on Google Maps and copy the Google Maps sharing URL of that \"restaurant\" to the clipboard",
    "tasks": [
        {
            "task": "2b189dc2-c77f-4fa3-8432-ba4355cc294c",
            "attribute": {
                "place_type": "restaurant",
                "place_name": "kaust"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "0d178388-8166-4b66-93c1-278861f9897c"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/0d7c84d2-bbbd-46ab-80d1-52b3a44f3858.json
================================================
{
    "description": "Combine two images from Image 1 \"/home/crab/assets/campus.png\" and Image 2 \"/home/crab/assets/desert.jpg\" using LibreOffice Writer and save the resulting ODT file to \"/home/crab/assets/campus_desert.odt\". Image 1 should be placed above Image 2.",
    "tasks": [
        {
            "task": "0111384f-38ca-41a2-9504-cb1c55002b3c",
            "attribute": {
                "image_path_1": "/home/crab/assets/campus.png",
                "image_path_2": "/home/crab/assets/desert.jpg",
                "output_path": "/home/crab/assets/campus_desert.odt"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "0d7c84d2-bbbd-46ab-80d1-52b3a44f3858"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/0deafe05-8db5-445f-9031-f6e884569d03.json
================================================
{
    "description": "Create a new directory \"/home/crab/jpg_folder\", copy all files with the \"jpg\" extension from \"/home/crab/Pictures\" to this newly created directory, then open LibreOffice Impress to combine the two images located at \"/home/crab/jpg_folder/dog.jpg\" (Image 1) and \"/home/crab/jpg_folder/Interstellar.jpg\" (Image 2), placing Image 1 on the right side of Image 2, and save the combined image in PDF format to \"/home/crab/Documents/combination.pdf\".",
    "tasks": [
        {
            "task": "217ababc-ccc7-4b9f-af07-c239d92848fe",
            "attribute": {
                "file_extension": "jpg",
                "source_dir": "/home/crab/Pictures",
                "target_dir": "/home/crab/jpg_folder"
            },
            "output": "/home/crab/jpg_folder"
        },
        {
            "task": "467f17a6-c42f-4eda-996f-a53385eb3efd",
            "attribute": {
                "image_path_1": "/home/crab/jpg_folder/dog.jpg",
                "image_path_2": "/home/crab/jpg_folder/Interstellar.jpg",
                "output_path": "/home/crab/Documents/combination.pdf"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "0deafe05-8db5-445f-9031-f6e884569d03"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/0e80fd90-0b23-454f-a629-7b6d7baa7542.json
================================================
{
    "description": "Use Firefox to search for the country \"Canada\" on Wikipedia, extract the capital city and population, and save this information in an ODS file at \"/home/crab/canada.ods\" with LibreOffice Calc. The first column will save the country name, the second will save the capital city name, and the third will save the population. No header is needed in the ODS file.",
    "tasks": [
        {
            "task": "1cd6519a-9ee0-442b-ba5a-9238aeb00ff6",
            "attribute": {
                "country": "Canada",
                "file_path": "/home/crab/canada.ods"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "0e80fd90-0b23-454f-a629-7b6d7baa7542"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/125f7bae-e931-4190-8737-5f1ea7227772.json
================================================
{
    "description": "Submit content \"OpenAI is an American artificial intelligence (AI) research organization founded in December 2015, researching artificial intelligence with the goal of developing \"safe and beneficial\" artificial general intelligence, which it defines as \"highly autonomous systems that outperform humans at most economically valuable work.\"",
    "tasks": [
        {
            "task": "1c3bedc3-ea5a-453c-a15b-223d72ab756d",
            "attribute": {
                "content": "OpenAI is an American artificial intelligence (AI) research organization founded in December 2015, researching artificial intelligence with the goal of developing \"safe and beneficial\" artificial general intelligence, which it defines as \"highly autonomous systems that outperform humans at most economically valuable work."
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "125f7bae-e931-4190-8737-5f1ea7227772"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/15a150a8-899c-4753-8dc5-05248ccc3640.json
================================================
{
    "description": "Download the file from \"https://media.cntraveller.com/photos/642aa1ad770beda2d4f5cc22/4:3/w_2664,h_1998,c_limit/Fiji-march2023issue-JackJohns15.jpg\" to the location \"/home/crab/Downloads/fiji.png\", and then set \"/home/crab/Downloads/fiji.png\" as the desktop background on the system.",
    "tasks": [
        {
            "task": "a313ea4d-e501-4971-b4fe-db2aad19eac1",
            "attribute": {
                "url": "https://media.cntraveller.com/photos/642aa1ad770beda2d4f5cc22/4:3/w_2664,h_1998,c_limit/Fiji-march2023issue-JackJohns15.jpg",
                "file_path": "/home/crab/Downloads/fiji.png"
            },
            "output": "/home/crab/Downloads/fiji.png"
        },
        {
            "task": "a207ef38-b3b2-4c6c-a1e3-75c38162f5ba",
            "attribute": {
                "photo_path": "/home/crab/Downloads/fiji.png"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "15a150a8-899c-4753-8dc5-05248ccc3640"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/1ebcd710-f73b-4022-832b-167c0d3f55a2.json
================================================
{
    "description": "Use Firefox to find out a \"University\" around \"Los Angeles\" on Google Maps and copy the Google Maps sharing URL of that \"University\" to the clipboard",
    "tasks": [
        {
            "task": "2b189dc2-c77f-4fa3-8432-ba4355cc294c",
            "attribute": {
                "place_type": "University",
                "place_name": "Los Angeles"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "1ebcd710-f73b-4022-832b-167c0d3f55a2"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/22787ecc-52b2-4791-aefb-c45800f51414.json
================================================
{
    "description": "Submit content \"Jensen Huang cofounded graphics-chip maker Nvidia in 1993, and has served as its CEO and president ever since. Huang owns approximately 3% of Nvidia, which went public in 1999.\"",
    "tasks": [
        {
            "task": "1c3bedc3-ea5a-453c-a15b-223d72ab756d",
            "attribute": {
                "content": "Jensen Huang cofounded graphics-chip maker Nvidia in 1993, and has served as its CEO and president ever since. Huang owns approximately 3% of Nvidia, which went public in 1999."
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "22787ecc-52b2-4791-aefb-c45800f51414"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/22f05f6f-6aef-4786-958f-14f559eaf014.json
================================================
{
    "description": "Create a new directory \"/home/crab/example_code\" and copy all files with the specified \"py\" extension from \"/home/crab/crab/examples\" to the directory \"/home/crab/example_code\".",
    "tasks": [
        {
            "task": "217ababc-ccc7-4b9f-af07-c239d92848fe",
            "attribute": {
                "file_extension": "py",
                "source_dir": "/home/crab/crab/examples",
                "target_dir": "/home/crab/example_code"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "22f05f6f-6aef-4786-958f-14f559eaf014"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/28963795-d694-4bb4-adaf-f7708a2c6fe5.json
================================================
{
    "description": "Use Firefox to search for an image using the keyword \"Elon Musk\" and copy the URL of the image.",
    "tasks": [
        {
            "task": "017102b6-d2c3-466b-96f7-37c8bcddc41a",
            "attribute": {
                "keyword": "Elon Musk"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "28963795-d694-4bb4-adaf-f7708a2c6fe5"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/299db8f2-81eb-455f-9302-5c8cb30be691.json
================================================
{
    "description": "Combine two images, Image 1 \"/home/crab/Pictures/Interstellar.jpg\" and Image 2 \"/home/crab/Pictures/cat.png\", using GIMP (GNU Image Manipulation Program) with Image 1 placed on the left side of Image 2, and save the resulting image to \"/home/crab/Pictures/edited_background.png\". Then, set \"/home/crab/Pictures/edited_background.png\" as the desktop background on the system.",
    "tasks": [
        {
            "task": "4cf246ea-0a7f-43da-84b6-61d74a2699af",
            "attribute": {
                "image_path_1": "/home/crab/Pictures/Interstellar.jpg",
                "image_path_2": "/home/crab/Pictures/cat.png",
                "output_path": "/home/crab/Pictures/edited_background.png"
            },
            "output": "/home/crab/Pictures/edited_background.png"
        },
        {
            "task": "a207ef38-b3b2-4c6c-a1e3-75c38162f5ba",
            "attribute": {
                "photo_path": "/home/crab/Pictures/edited_background.png"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "299db8f2-81eb-455f-9302-5c8cb30be691"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/29f099b2-b3a5-463f-b10a-15363bf7e845.json
================================================
{
    "description": "Use Firefox to search for a \"garden\" around \"ETH Zurich\" on Google Maps, copy the sharing URL of that \"garden\" to the clipboard, then paste the content into Visual Studio Code (VS Code) and save the file at \"/home/crab/eth_garden.txt\".",
    "tasks": [
        {
            "task": "2b189dc2-c77f-4fa3-8432-ba4355cc294c",
            "attribute": {
                "place_type": "garden",
                "place_name": "ETH Zurich"
            },
            "output": null
        },
        {
            "task": "8491e674-596b-452b-9e0e-58a44d90f947",
            "attribute": {
                "file_path": "/home/crab/eth_garden.txt"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "29f099b2-b3a5-463f-b10a-15363bf7e845"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/355e9660-a355-4b95-8881-ac9da578ea43.json
================================================
{
    "description": "Use Firefox to search for the country \"Italy\" on Wikipedia, extract the capital city and population, and save this information in an ODS file at \"/home/crab/country.ods\" with LibreOffice Calc. The first column will save the country name, the second will save the capital city name, and the third will save the population. No header is needed in the ODS file.",
    "tasks": [
        {
            "task": "1cd6519a-9ee0-442b-ba5a-9238aeb00ff6",
            "attribute": {
                "country": "Italy",
                "file_path": "/home/crab/country.ods"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "355e9660-a355-4b95-8881-ac9da578ea43"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/35bd7387-4735-4632-8474-e93382004c12.json
================================================
{
    "description": "Use GIMP (GNU Image Manipulation Program) to adjust the brightness of the image from \"/home/crab/assets/campus.png\" to a higher value (brighter) and save it to \"/home/crab/assets/campus_edited.png\".",
    "tasks": [
        {
            "task": "cc1adae7-bef9-4c8a-865d-00d44486dd69",
            "attribute": {
                "image_path_before_edit": "/home/crab/assets/campus.png",
                "image_path_after_edit": "/home/crab/assets/campus_edited.png"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "35bd7387-4735-4632-8474-e93382004c12"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/362c5711-3824-42ff-96a0-7801b03b5f1f.json
================================================
{
    "description": "Use Firefox to find a code repository about \"Open Source Computer Vision Library\" in GitHub and copy the URL of the repository to the clipboard.",
    "tasks": [
        {
            "task": "bcd03c9f-62c9-4001-8d86-78358c59ce22",
            "attribute": {
                "keyword": "Open Source Computer Vision Library"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "362c5711-3824-42ff-96a0-7801b03b5f1f"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/4718df9c-97ec-4b54-86ca-bd34e65c5a43.json
================================================
{
    "description": "Download a file from \"https://arxiv.org/pdf/2303.05499\" to \"/home/crab/Documents/Grounding_DINO.pdf\".",
    "tasks": [
        {
            "task": "a313ea4d-e501-4971-b4fe-db2aad19eac1",
            "attribute": {
                "url": "https://arxiv.org/pdf/2303.05499",
                "file_path": "/home/crab/Documents/Grounding_DINO.pdf"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "4718df9c-97ec-4b54-86ca-bd34e65c5a43"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/47b75b21-99a2-461c-9d40-6dddc5c206d0.json
================================================
{
    "description": "Use Firefox to search for an image using the keyword \"LLM\" and copy the URL of the image to the clipboard.",
    "tasks": [
        {
            "task": "017102b6-d2c3-466b-96f7-37c8bcddc41a",
            "attribute": {
                "keyword": "LLM"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "47b75b21-99a2-461c-9d40-6dddc5c206d0"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/4ae4e35f-d90a-48cc-8fb9-492ac7ae07ee.json
================================================
{
    "description": "Paste clipboard content into LibreOffice Writer and save it as an ODT file at \"/home/crab/Documents/clipboard_text.odt\".",
    "tasks": [
        {
            "task": "76de4bdb-c980-4b3a-9bd3-c87db467dffe",
            "attribute": {
                "file_path": "/home/crab/Documents/clipboard_text.odt"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "4ae4e35f-d90a-48cc-8fb9-492ac7ae07ee"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/4bbedade-4d4e-43d5-b650-2702b350ad28.json
================================================
{
    "description": "Open \"/home/crab/assets/1.txt\" using vim in a terminal, write \"LinkedIn is a business and employment-focused social media platform that works through websites and mobile apps. It was launched on May 5, 2003 by Reid Hoffman and Eric Ly.\", then save and exit vim.",
    "tasks": [
        {
            "task": "0f589bf9-9b26-4581-8b78-2961b115ab49",
            "attribute": {
                "file_path": "/home/crab/assets/1.txt",
                "content": "LinkedIn is a business and employment-focused social media platform that works through websites and mobile apps. It was launched on May 5, 2003 by Reid Hoffman and Eric Ly."
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "4bbedade-4d4e-43d5-b650-2702b350ad28"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/51a288f9-cf2c-4e8e-a98c-596a505af77c.json
================================================
{
    "description": "Combine two images from Image 1 \"/home/crab/assets/desert.jpg\" and Image 2 \"/home/crab/assets/campus.png\" using LibreOffice Impress and save the resulting file in PDF format to \"/home/crab/assets/desert_campus.pdf\". Image 1 should be placed on the right side of Image 2.",
    "tasks": [
        {
            "task": "467f17a6-c42f-4eda-996f-a53385eb3efd",
            "attribute": {
                "image_path_1": "/home/crab/assets/desert.jpg",
                "image_path_2": "/home/crab/assets/campus.png",
                "output_path": "/home/crab/assets/desert_campus.pdf"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "51a288f9-cf2c-4e8e-a98c-596a505af77c"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/51c91051-3efb-4e92-a967-739b18520714.json
================================================
{
    "description": "Open Firefox and search for the torch.matmul example provided by the official PyTorch version 1.13 documentation, copy all the lines of code from the example, open Visual Studio Code (VS Code), paste the clipboard content into a new file, and save it as \"/home/crab/example.py\".",
    "tasks": [
        {
            "task": "49b614c5-c4bb-4c20-aab8-ab9dcc7de1b5",
            "attribute": {},
            "output": null
        },
        {
            "task": "8491e674-596b-452b-9e0e-58a44d90f947",
            "attribute": {
                "file_path": "/home/crab/example.py"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "51c91051-3efb-4e92-a967-739b18520714"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/57b7e8a7-8c17-4cc4-9bb5-4385afde3ad8.json
================================================
{
    "description": "Create a new directory \"/home/crab/assets_for_edit\" and copy all files with the \"png\" extension from \"/home/crab/assets\" to this new directory. Then, combining Image 1 \"/home/crab/assets_for_edit/background.png\" and Image 2 \"/home/crab/assets_for_edit/campus.png\" with LibreOffice Writer, place Image 1 above Image 2, and save the file in the ODT format to \"/home/crab/assets_for_edit/back_n_campus.odt\".",
    "tasks": [
        {
            "task": "217ababc-ccc7-4b9f-af07-c239d92848fe",
            "attribute": {
                "file_extension": "png",
                "source_dir": "/home/crab/assets",
                "target_dir": "/home/crab/assets_for_edit"
            },
            "output": "/home/crab/assets_for_edit"
        },
        {
            "task": "0111384f-38ca-41a2-9504-cb1c55002b3c",
            "attribute": {
                "image_path_1": "/home/crab/assets_for_edit/background.png",
                "image_path_2": "/home/crab/assets_for_edit/campus.png",
                "output_path": "/home/crab/assets_for_edit/back_n_campus.odt"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "57b7e8a7-8c17-4cc4-9bb5-4385afde3ad8"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/58776443-ccf7-4db3-8c60-e188e4b5f90c.json
================================================
{
    "description": "Paste clipboard content into LibreOffice Writer and save it as an ODT file at \"/home/crab/paste.odt\".",
    "tasks": [
        {
            "task": "76de4bdb-c980-4b3a-9bd3-c87db467dffe",
            "attribute": {
                "file_path": "/home/crab/paste.odt"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "58776443-ccf7-4db3-8c60-e188e4b5f90c"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/5ba74c6a-4513-448b-8b68-ff145ece0652.json
================================================
{
    "description": "Download the file from \"https://raw.githubusercontent.com/camel-ai/camel/master/README.md\" to \"/home/crab/Documents/README.md\", and then print the content of \"/home/crab/Documents/README.md\" to the command line interface through a terminal.",
    "tasks": [
        {
            "task": "a313ea4d-e501-4971-b4fe-db2aad19eac1",
            "attribute": {
                "url": "https://raw.githubusercontent.com/camel-ai/camel/master/README.md",
                "file_path": "/home/crab/Documents/README.md"
            },
            "output": "/home/crab/Documents/README.md"
        },
        {
            "task": "5b527839-0e58-426d-bab6-7160200b0d24",
            "attribute": {
                "file_path": "/home/crab/Documents/README.md"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "5ba74c6a-4513-448b-8b68-ff145ece0652"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/6428f803-62de-40d2-a345-64e6cf955c9d.json
================================================
{
    "description": "First, use LibreOffice Impress to adjust the brightness of the image located at \"/home/crab/Pictures/cat.png\" to make it darker, and save the edited image as \"/home/crab/Pictures/cat_edited.png\". Then, using GIMP (GNU Image Manipulation Program), combine the image \"/home/crab/Pictures/dog.png\" with \"/home/crab/Pictures/cat_edited.png\" by placing the dog image on the left side of the cat image, and save the merged image to \"/home/crab/Pictures/dog_cat.png\".",
    "tasks": [
        {
            "task": "434402f3-647a-4a9a-9d8f-10f5bb6c7cf0",
            "attribute": {
                "image_path_before_edit": "/home/crab/Pictures/cat.png",
                "image_path_after_edit": "/home/crab/Pictures/cat_edited.png"
            },
            "output": "/home/crab/Pictures/cat_edited.png"
        },
        {
            "task": "4cf246ea-0a7f-43da-84b6-61d74a2699af",
            "attribute": {
                "image_path_1": "/home/crab/Pictures/dog.png",
                "image_path_2": "/home/crab/Pictures/cat_edited.png",
                "output_path": "/home/crab/Pictures/dog_cat.png"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "6428f803-62de-40d2-a345-64e6cf955c9d"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/64a2c205-c85a-4e56-8edb-5df4f7724441.json
================================================
{
    "description": "Find the example provided of \"torch.matmul\" by official PyTorch version 1.13 documentation using Firefox and copy all the lines of code in the example to the clipboard.",
    "tasks": [
        {
            "task": "49b614c5-c4bb-4c20-aab8-ab9dcc7de1b5",
            "attribute": {},
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "64a2c205-c85a-4e56-8edb-5df4f7724441"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/696ca9bb-89ea-4cd5-b693-f2d749d964b1.json
================================================
{
    "description": "Adjust the brightness of the image located at \"/home/crab/assets/campus.png\" using GIMP (GNU Image Manipulation Program) to make it brighter, save the adjusted image to \"/home/crab/Pictures/campus_brighter.png\", and then set this enhanced image as the desktop background on an Ubuntu system.",
    "tasks": [
        {
            "task": "cc1adae7-bef9-4c8a-865d-00d44486dd69",
            "attribute": {
                "image_path_before_edit": "/home/crab/assets/campus.png",
                "image_path_after_edit": "/home/crab/Pictures/campus_brighter.png"
            },
            "output": "/home/crab/Pictures/campus_brighter.png"
        },
        {
            "task": "a207ef38-b3b2-4c6c-a1e3-75c38162f5ba",
            "attribute": {
                "photo_path": "/home/crab/Pictures/campus_brighter.png"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "696ca9bb-89ea-4cd5-b693-f2d749d964b1"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/6be49e77-e904-4eb0-a36a-7f0fd128ede3.json
================================================
{
    "description": "Use Firefox to find a code repository about \"pytorch\" in GitHub and copy the URL of the repository to the clipboard.",
    "tasks": [
        {
            "task": "bcd03c9f-62c9-4001-8d86-78358c59ce22",
            "attribute": {
                "keyword": "pytorch"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "6be49e77-e904-4eb0-a36a-7f0fd128ede3"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/6c3105a2-328c-4190-823d-03d759be0b57.json
================================================
{
    "description": "Use Firefox to search for an image with the keyword \"reinforcement learning,\" copy the URL of the chosen image to the clipboard, and download the image from the URL in the clipboard to \"/home/crab/Downloads/RL.png\" on an Ubuntu system.",
    "tasks": [
        {
            "task": "017102b6-d2c3-466b-96f7-37c8bcddc41a",
            "attribute": {
                "keyword": "reinforcement learning"
            },
            "output": null
        },
        {
            "task": "a313ea4d-e501-4971-b4fe-db2aad19acsd",
            "attribute": {
                "file_path": "/home/crab/Downloads/RL.png"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "6c3105a2-328c-4190-823d-03d759be0b57"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/6c560516-ca14-4f97-b51d-16ad81fc29e4.json
================================================
{
    "description": "Open \"/home/crab/assets/a.txt\" using vim in a terminal, write \"The most recent COMPUTEX was held from 30 May to 2 June 2023 with sessions about such topics as high-performance computing, artificial intelligence, next-gen connectivity and sustainability.\", then save and exit vim, and print the content of \"/home/crab/assets/a.txt\" to the command line interface.",
    "tasks": [
        {
            "task": "0f589bf9-9b26-4581-8b78-2961b115ab49",
            "attribute": {
                "file_path": "/home/crab/assets/a.txt",
                "content": "The most recent COMPUTEX was held from 30 May to 2 June 2023 with sessions about such topics as high-performance computing, artificial intelligence, next-gen connectivity and sustainability."
            },
            "output": "/home/crab/assets/a.txt"
        },
        {
            "task": "5b527839-0e58-426d-bab6-7160200b0d24",
            "attribute": {
                "file_path": "/home/crab/assets/a.txt"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "6c560516-ca14-4f97-b51d-16ad81fc29e4"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/730172f5-894a-4d46-9102-ac7d985a479d.json
================================================
{
    "description": "Download the image of Jupiter from \"https://upload.wikimedia.org/wikipedia/commons/thumb/2/2b/Jupiter_and_its_shrunken_Great_Red_Spot.jpg/640px-Jupiter_and_its_shrunken_Great_Red_Spot.jpg\" to \"/home/crab/Pictures/jupiter.jpg\", then use LibreOffice Impress to adjust the brightness of this image to make it darker and save the edited version as \"/home/crab/Pictures/jupiter_edited.jpg\".",
    "tasks": [
        {
            "task": "a313ea4d-e501-4971-b4fe-db2aad19eac1",
            "attribute": {
                "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/2/2b/Jupiter_and_its_shrunken_Great_Red_Spot.jpg/640px-Jupiter_and_its_shrunken_Great_Red_Spot.jpg",
                "file_path": "/home/crab/Pictures/jupiter.jpg"
            },
            "output": "/home/crab/Pictures/jupiter.jpg"
        },
        {
            "task": "434402f3-647a-4a9a-9d8f-10f5bb6c7cf0",
            "attribute": {
                "image_path_before_edit": "/home/crab/Pictures/jupiter.jpg",
                "image_path_after_edit": "/home/crab/Pictures/jupiter_edited.jpg"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "730172f5-894a-4d46-9102-ac7d985a479d"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/73038efb-ca0f-4d90-a947-fcfd097dd91b.json
================================================
{
    "description": "Open Firefox and navigate to the official PyTorch version 1.13 documentation to find an example of `torch.matmul`. Copy all the lines of code in the example to the clipboard. Then, paste the clipboard content into Visual Studio Code (VS Code) and save it as a file at \"/home/crab/example_code.txt\".",
    "tasks": [
        {
            "task": "49b614c5-c4bb-4c20-aab8-ab9dcc7de1b5",
            "attribute": {},
            "output": null
        },
        {
            "task": "8491e674-596b-452b-9e0e-58a44d90f947",
            "attribute": {
                "file_path": "/home/crab/example_code.txt"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "73038efb-ca0f-4d90-a947-fcfd097dd91b"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/73da97c9-f084-4cab-8697-1151737387ff.json
================================================
{
    "description": "Download the file from \"https://images.top1market.com/images/cms/uploads/20230928/4950e1db0038feb506fdcfa0c936fd8e.png\" to \"/home/crab/Desktop/meta.png\", then set this image, \"/home/crab/Desktop/meta.png\", as the desktop background on the system.",
    "tasks": [
        {
            "task": "a313ea4d-e501-4971-b4fe-db2aad19eac1",
            "attribute": {
                "url": "https://images.top1market.com/images/cms/uploads/20230928/4950e1db0038feb506fdcfa0c936fd8e.png",
                "file_path": "/home/crab/Desktop/meta.png"
            },
            "output": "/home/crab/Desktop/meta.png"
        },
        {
            "task": "a207ef38-b3b2-4c6c-a1e3-75c38162f5ba",
            "attribute": {
                "photo_path": "/home/crab/Desktop/meta.png"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "73da97c9-f084-4cab-8697-1151737387ff"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/77aa4dd3-5a68-4686-9cac-26d0ab77c7b4.json
================================================
{
    "description": "Use Firefox to find out a \"hiking trail\" around \"Munich\" on Google Maps and copy the Google Maps sharing URL of that \"hiking trail\" to the clipboard",
    "tasks": [
        {
            "task": "2b189dc2-c77f-4fa3-8432-ba4355cc294c",
            "attribute": {
                "place_type": "hiking trail",
                "place_name": "Munich"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "77aa4dd3-5a68-4686-9cac-26d0ab77c7b4"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/78502f1c-879b-4932-a5fd-d85f7f6b0f81.json
================================================
{
    "description": "Download the file from \"https://cemse.kaust.edu.sa/sites/default/files/styles/large/public/2023-04/Web%20banner.jpg?itok=d1TvGUKY\" to \"/home/crab/Pictures/KAUST_AI.png\" and then set this image as the desktop background on the system.",
    "tasks": [
        {
            "task": "a313ea4d-e501-4971-b4fe-db2aad19eac1",
            "attribute": {
                "url": "https://cemse.kaust.edu.sa/sites/default/files/styles/large/public/2023-04/Web%20banner.jpg?itok=d1TvGUKY",
                "file_path": "/home/crab/Pictures/KAUST_AI.png"
            },
            "output": "/home/crab/Pictures/KAUST_AI.png"
        },
        {
            "task": "a207ef38-b3b2-4c6c-a1e3-75c38162f5ba",
            "attribute": {
                "photo_path": "/home/crab/Pictures/KAUST_AI.png"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "78502f1c-879b-4932-a5fd-d85f7f6b0f81"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/7912f7a5-24b9-4dfe-a7b8-1effc1b7a212.json
================================================
{
    "description": "Combine two images from Image 1 \"/home/crab/assets/campus.png\" and Image 2 \"/home/crab/assets/desert.jpg using GIMP (GNU Image Manipulation Program) and save the resulting image to \"/home/crab/assets/campus_desert.png\". Image 1 should be placed on the left side of Image 2.",
    "tasks": [
        {
            "task": "4cf246ea-0a7f-43da-84b6-61d74a2699af",
            "attribute": {
                "image_path_1": "/home/crab/assets/campus.png",
                "image_path_2": "/home/crab/assets/desert.jpg",
                "output_path": "/home/crab/assets/campus_desert.png"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "7912f7a5-24b9-4dfe-a7b8-1effc1b7a212"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/7d5613ec-9b67-4255-b766-d9c6e8466464.json
================================================
{
    "description": "Paste clipboard content into LibreOffice Writer and save it as an ODT file at \"/home/crab/assets/content.odt\".",
    "tasks": [
        {
            "task": "76de4bdb-c980-4b3a-9bd3-c87db467dffe",
            "attribute": {
                "file_path": "/home/crab/assets/content.odt"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "7d5613ec-9b67-4255-b766-d9c6e8466464"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/7dda7e46-78be-4663-b882-6132dbbff335.json
================================================
{
    "description": "Adjust the brightness of the image located at \"/home/crab/Pictures/Interstellar.jpg\" to a higher value using GIMP (GNU Image Manipulation Program), save the edited image as \"/home/crab/edited_background.png\", and then set this edited image as the desktop background on the system.",
    "tasks": [
        {
            "task": "cc1adae7-bef9-4c8a-865d-00d44486dd69",
            "attribute": {
                "image_path_before_edit": "/home/crab/Pictures/Interstellar.jpg",
                "image_path_after_edit": "/home/crab/edited_background.png"
            },
            "output": "/home/crab/edited_background.png"
        },
        {
            "task": "a207ef38-b3b2-4c6c-a1e3-75c38162f5ba",
            "attribute": {
                "photo_path": "/home/crab/edited_background.png"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "7dda7e46-78be-4663-b882-6132dbbff335"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/7e6c4927-2220-4522-9e3f-36f69adc3e71.json
================================================
{
    "description": "Paste clipboard content into Visual Studio Code (VS Code) and save it as a file at \"/home/crab/assets/clipboard.md\".",
    "tasks": [
        {
            "task": "8491e674-596b-452b-9e0e-58a44d90f947",
            "attribute": {
                "file_path": "/home/crab/assets/clipboard.md"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "7e6c4927-2220-4522-9e3f-36f69adc3e71"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/82c49e12-3b2f-432e-9069-4b67bafebbf7.json
================================================
{
    "description": "Open Firefox to find a coffee shop around the hungarian parliament on Google Maps, copy the sharing URL of the coffee shop to the clipboard, then paste the clipboard content into Visual Studio Code (VS Code), and save the content as a file at \"/home/crab/Downloads/coffee\".",
    "tasks": [
        {
            "task": "2b189dc2-c77f-4fa3-8432-ba4355cc294c",
            "attribute": {
                "place_type": "coffee shop",
                "place_name": "hungarian parliament"
            },
            "output": null
        },
        {
            "task": "8491e674-596b-452b-9e0e-58a44d90f947",
            "attribute": {
                "file_path": "/home/crab/Downloads/coffee"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "82c49e12-3b2f-432e-9069-4b67bafebbf7"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/87910f23-ab23-4ccc-b115-d71cff6f0162.json
================================================
{
    "description": "Use Firefox to search for an image with the keyword \"patagonia,\" copy the URL of the chosen image to the clipboard, and download the file from that URL to \"/home/crab/Desktop/brand.jpg\".",
    "tasks": [
        {
            "task": "017102b6-d2c3-466b-96f7-37c8bcddc41a",
            "attribute": {
                "keyword": "patagonia"
            },
            "output": null
        },
        {
            "task": "a313ea4d-e501-4971-b4fe-db2aad19acsd",
            "attribute": {
                "file_path": "/home/crab/Desktop/brand.jpg"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "87910f23-ab23-4ccc-b115-d71cff6f0162"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/8afc25eb-7a80-459f-acdc-5c79fc146c29.json
================================================
{
    "description": "Paste clipboard content into Visual Studio Code (VS Code) and save it as a file at \"/home/crab/assets/content_2.txt\".",
    "tasks": [
        {
            "task": "8491e674-596b-452b-9e0e-58a44d90f947",
            "attribute": {
                "file_path": "/home/crab/assets/content_2.txt"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "8afc25eb-7a80-459f-acdc-5c79fc146c29"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/8cb5ab6d-a56e-43b9-aa83-00a46331e20f.json
================================================
{
    "description": "Download the image from \"https://res.cloudinary.com/simpleview/image/upload/v1648755098/clients/austin/Austin_Skyline_Credit_Christopher_Sherman_lifetime__4f60343d-9f69-450c-8ad3-fa636761786d.jpg\" to \"/home/crab/Downloads/Austin.jpg\", then use GIMP (GNU Image Manipulation Program) to adjust its brightness to a higher value and save the modified image as \"/home/crab/Downloads/brighter_austin.jpg\".",
    "tasks": [
        {
            "task": "a313ea4d-e501-4971-b4fe-db2aad19eac1",
            "attribute": {
                "url": "https://res.cloudinary.com/simpleview/image/upload/v1648755098/clients/austin/Austin_Skyline_Credit_Christopher_Sherman_lifetime__4f60343d-9f69-450c-8ad3-fa636761786d.jpg",
                "file_path": "/home/crab/Downloads/Austin.jpg"
            },
            "output": "/home/crab/Downloads/Austin.jpg"
        },
        {
            "task": "cc1adae7-bef9-4c8a-865d-00d44486dd69",
            "attribute": {
                "image_path_before_edit": "/home/crab/Downloads/Austin.jpg",
                "image_path_after_edit": "/home/crab/Downloads/brighter_austin.jpg"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "8cb5ab6d-a56e-43b9-aa83-00a46331e20f"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/90e09946-7b28-4102-b0ed-f683c01dbbd4.json
================================================
{
    "description": "Use Firefox to find a code repository about \"W&B\" in GitHub and copy the URL of the repository to the clipboard.",
    "tasks": [
        {
            "task": "bcd03c9f-62c9-4001-8d86-78358c59ce22",
            "attribute": {
                "keyword": "W&B"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "90e09946-7b28-4102-b0ed-f683c01dbbd4"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/925a3607-2802-48aa-b339-13ebfcef43a2.json
================================================
{
    "description": "Use Firefox to find a code repository about \"segment-anything\" in GitHub and copy the URL of the repository to the clipboard.",
    "tasks": [
        {
            "task": "bcd03c9f-62c9-4001-8d86-78358c59ce22",
            "attribute": {
                "keyword": "segment-anything"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "925a3607-2802-48aa-b339-13ebfcef43a2"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/9506dd30-f58d-4832-b336-8037e83e2689.json
================================================
{
    "description": "Get the content of \"/home/crab/Documents/nba.txt\" by printing it to the command line interface through a terminal",
    "tasks": [
        {
            "task": "5b527839-0e58-426d-bab6-7160200b0d24",
            "attribute": {
                "file_path": "/home/crab/Documents/nba.txt"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "9506dd30-f58d-4832-b336-8037e83e2689"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/95e347aa-56ab-4d5d-a94c-350ddfddabf9.json
================================================
{
    "description": "Create a new directory \"/home/crab/png_folder\" and copy all files with the specified \"png\" extension from \"/home/crab/Pictures\" to the directory \"/home/crab/png_folder\".",
    "tasks": [
        {
            "task": "217ababc-ccc7-4b9f-af07-c239d92848fe",
            "attribute": {
                "file_extension": "png",
                "source_dir": "/home/crab/Pictures",
                "target_dir": "/home/crab/png_folder"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "95e347aa-56ab-4d5d-a94c-350ddfddabf9"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/98a360d8-0f95-44cd-bb9d-442fca2918d4.json
================================================
{
    "description": "Download a file from \"https://github.com/open-mmlab/mmdetection/archive/refs/tags/v3.3.0.zip\" to \"/home/crab/mmdetection_v3.3.0.zip\".",
    "tasks": [
        {
            "task": "a313ea4d-e501-4971-b4fe-db2aad19eac1",
            "attribute": {
                "url": "https://github.com/open-mmlab/mmdetection/archive/refs/tags/v3.3.0.zip",
                "file_path": "/home/crab/mmdetection_v3.3.0.zip"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "98a360d8-0f95-44cd-bb9d-442fca2918d4"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/9c979fc5-8d60-41f1-a494-904a1d312187.json
================================================
{
    "description": "Use Firefox to search for the country \"United Kingdom\" on Wikipedia, extract the capital city and population, and save this information in an ODS file at \"/home/crab/assets/content.ods\" with LibreOffice Calc. The first column will save the country name, the second will save the capital city name, and the third will save the population. No header is needed in the ODS file.",
    "tasks": [
        {
            "task": "1cd6519a-9ee0-442b-ba5a-9238aeb00ff6",
            "attribute": {
                "country": "United Kingdom",
                "file_path": "/home/crab/assets/content.ods"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "9c979fc5-8d60-41f1-a494-904a1d312187"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/9e08971c-7f83-4853-952e-4c4a4a26333b.json
================================================
{
    "description": "Use Firefox to search for an image using the keyword \"Red Sea\" and copy the URL of the image to the clipboard.",
    "tasks": [
        {
            "task": "017102b6-d2c3-466b-96f7-37c8bcddc41a",
            "attribute": {
                "keyword": "Red Sea"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "9e08971c-7f83-4853-952e-4c4a4a26333b"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/9fe4f541-61cf-48e0-a081-4371786659c7.json
================================================
{
    "description": "Set \"/home/crab/Pictures/Interstellar.jpg\" as the screen background of the system",
    "tasks": [
        {
            "task": "a207ef38-b3b2-4c6c-a1e3-75c38162f5ba",
            "attribute": {
                "photo_path": "/home/crab/Pictures/Interstellar.jpg"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "9fe4f541-61cf-48e0-a081-4371786659c7"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/a0714ef7-bbdc-4f84-bd2e-c6e611d4db9e.json
================================================
{
    "description": "Get the content of \"/home/crab/ubuntu\" by printing it to the command line interface through a terminal",
    "tasks": [
        {
            "task": "5b527839-0e58-426d-bab6-7160200b0d24",
            "attribute": {
                "file_path": "/home/crab/ubuntu"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "a0714ef7-bbdc-4f84-bd2e-c6e611d4db9e"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/a2a34580-cded-4bf8-81d9-b36a4d4402d0.json
================================================
{
    "description": "Set \"/home/crab/assets/background.png\" as the screen background of the system",
    "tasks": [
        {
            "task": "a207ef38-b3b2-4c6c-a1e3-75c38162f5ba",
            "attribute": {
                "photo_path": "/home/crab/assets/background.png"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "a2a34580-cded-4bf8-81d9-b36a4d4402d0"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/a6b67c2d-d448-4e77-904e-dc7c5f21a5fe.json
================================================
{
    "description": "Get the content of \"/home/crab/crab/README.md\" by printing it to the command line interface through a terminal",
    "tasks": [
        {
            "task": "5b527839-0e58-426d-bab6-7160200b0d24",
            "attribute": {
                "file_path": "/home/crab/crab/README.md"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "a6b67c2d-d448-4e77-904e-dc7c5f21a5fe"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/a70ab903-835f-48b7-8356-2321b8b869d8.json
================================================
{
    "description": "Using Firefox, find the example of torch.matmul provided by the official PyTorch version 1.13 documentation and copy all the lines of code in the example to the clipboard, then paste the clipboard content into LibreOffice Writer and save it as an ODT file at \"/home/crab/Desktop/doc_torch.odt\".",
    "tasks": [
        {
            "task": "49b614c5-c4bb-4c20-aab8-ab9dcc7de1b5",
            "attribute": {},
            "output": null
        },
        {
            "task": "76de4bdb-c980-4b3a-9bd3-c87db467dffe",
            "attribute": {
                "file_path": "/home/crab/Desktop/doc_torch.odt"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "a70ab903-835f-48b7-8356-2321b8b869d8"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/a78177f5-6cc6-48d7-8c6f-df53399d7759.json
================================================
{
    "description": "Use Firefox to search for an image using the keyword \"The Colosseum\" and copy the URL of the image to the clipboard.",
    "tasks": [
        {
            "task": "017102b6-d2c3-466b-96f7-37c8bcddc41a",
            "attribute": {
                "keyword": "The Colosseum"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "a78177f5-6cc6-48d7-8c6f-df53399d7759"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/abb16512-27ae-49c0-b12b-7fbf0e95056b.json
================================================
{
    "description": "Paste the clipboard content into Visual Studio Code (VS Code) and save the file as \"/home/crab/Desktop/content.txt\", then open a terminal and print the content of \"/home/crab/Desktop/content.txt\" to the command line interface.",
    "tasks": [
        {
            "task": "8491e674-596b-452b-9e0e-58a44d90f947",
            "attribute": {
                "file_path": "/home/crab/Desktop/content.txt"
            },
            "output": "/home/crab/Desktop/content.txt"
        },
        {
            "task": "5b527839-0e58-426d-bab6-7160200b0d24",
            "attribute": {
                "file_path": "/home/crab/Desktop/content.txt"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "abb16512-27ae-49c0-b12b-7fbf0e95056b"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/b2ca21dc-dde9-49f5-bec7-321fbf769315.json
================================================
{
    "description": "Adjust the brightness of the image located at \"/home/crab/assets/desert.jpg\" to a darker value using LibreOffice Impress and save it as \"/home/crab/assets/darker_desert.jpg\", then use GIMP (GNU Image Manipulation Program) to combine this adjusted image with the original image at \"/home/crab/assets/desert.jpg\", placing the darker image on the left side and the original on the right, finally save the resulting comparison image to \"/home/crab/assets/desert_comparison.jpg\".",
    "tasks": [
        {
            "task": "434402f3-647a-4a9a-9d8f-10f5bb6c7cf0",
            "attribute": {
                "image_path_before_edit": "/home/crab/assets/desert.jpg",
                "image_path_after_edit": "/home/crab/assets/darker_desert.jpg"
            },
            "output": "/home/crab/assets/darker_desert.jpg"
        },
        {
            "task": "4cf246ea-0a7f-43da-84b6-61d74a2699af",
            "attribute": {
                "image_path_1": "/home/crab/assets/darker_desert.jpg",
                "image_path_2": "/home/crab/assets/desert.jpg",
                "output_path": "/home/crab/assets/desert_comparison.jpg"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "b2ca21dc-dde9-49f5-bec7-321fbf769315"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/b57c96c1-071b-40f6-b33b-2a0459fc25bb.json
================================================
{
    "description": "Use GIMP (GNU Image Manipulation Program) to adjust the brightness of the image from \"/home/crab/assets/background.png\" to a higher value (brighter) and save it to \"/home/crab/Pictures/background_edited.jpg\".",
    "tasks": [
        {
            "task": "cc1adae7-bef9-4c8a-865d-00d44486dd69",
            "attribute": {
                "image_path_before_edit": "/home/crab/assets/background.png",
                "image_path_after_edit": "/home/crab/Pictures/background_edited.jpg"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "b57c96c1-071b-40f6-b33b-2a0459fc25bb"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/b73019e0-3ce8-4657-8b13-b3e0ab6cfac8.json
================================================
{
    "description": "Download a file from \"https://raw.githubusercontent.com/camel-ai/camel/master/misc/primary_logo.png\" to \"/home/crab/camel-logo.png\".",
    "tasks": [
        {
            "task": "a313ea4d-e501-4971-b4fe-db2aad19eac1",
            "attribute": {
                "url": "https://raw.githubusercontent.com/camel-ai/camel/master/misc/primary_logo.png",
                "file_path": "/home/crab/camel-logo.png"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "b73019e0-3ce8-4657-8b13-b3e0ab6cfac8"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/ba5aebcb-999d-44d4-b9bc-241f9884c6dd.json
================================================
{
    "description": "Use GIMP (GNU Image Manipulation Program) to adjust the brightness of the image from \"/home/crab/Pictures/Interstellar.jpg\" to a higher value (brighter) and save it to \"/home/crab/interstellar_brighter.jpg\".",
    "tasks": [
        {
            "task": "cc1adae7-bef9-4c8a-865d-00d44486dd69",
            "attribute": {
                "image_path_before_edit": "/home/crab/Pictures/Interstellar.jpg",
                "image_path_after_edit": "/home/crab/interstellar_brighter.jpg"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "ba5aebcb-999d-44d4-b9bc-241f9884c6dd"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/be6468be-2218-45c1-9b75-b56efec61eb4.json
================================================
{
    "description": "Paste clipboard content into Visual Studio Code (VS Code) and save it as a file at \"/home/crab/text_result\".",
    "tasks": [
        {
            "task": "8491e674-596b-452b-9e0e-58a44d90f947",
            "attribute": {
                "file_path": "/home/crab/text_result"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "be6468be-2218-45c1-9b75-b56efec61eb4"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/c4106f9a-9348-4a55-9892-782e6f4b3081.json
================================================
{
    "description": "Use LibreOffice Impress to adjust the brightness of the image from \"/home/crab/assets/desert.jpg\" to a lower value (darker) and save it to \"/home/crab/assets/desert_edited.png\".",
    "tasks": [
        {
            "task": "434402f3-647a-4a9a-9d8f-10f5bb6c7cf0",
            "attribute": {
                "image_path_before_edit": "/home/crab/assets/desert.jpg",
                "image_path_after_edit": "/home/crab/assets/desert_edited.png"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "c4106f9a-9348-4a55-9892-782e6f4b3081"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/c8800e50-3ff4-4dd2-bc90-33688be99659.json
================================================
{
    "description": "Download a file from \"https://raw.githubusercontent.com/facebookresearch/detectron2/main/README.md\" to \"/home/crab/Documents/detectron2.txt\".",
    "tasks": [
        {
            "task": "a313ea4d-e501-4971-b4fe-db2aad19eac1",
            "attribute": {
                "url": "https://raw.githubusercontent.com/facebookresearch/detectron2/main/README.md",
                "file_path": "/home/crab/Documents/detectron2.txt"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "c8800e50-3ff4-4dd2-bc90-33688be99659"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/ccf31785-ec13-4981-93c5-ca6c242ac0c3.json
================================================
{
    "description": "Download the flag of Ethiopia image from \"https://upload.wikimedia.org/wikipedia/commons/thumb/7/71/Flag_of_Ethiopia.svg/250px-Flag_of_Ethiopia.svg.png\" to \"/home/crab/Pictures/flag.png\", create a new directory named \"/home/crab/Pictures/png_\", and copy all PNG files from \"/home/crab/Pictures\" to the newly created directory \"/home/crab/Pictures/png_\".",
    "tasks": [
        {
            "task": "a313ea4d-e501-4971-b4fe-db2aad19eac1",
            "attribute": {
                "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/7/71/Flag_of_Ethiopia.svg/250px-Flag_of_Ethiopia.svg.png",
                "file_path": "/home/crab/Pictures/flag.png"
            },
            "output": "/home/crab/Pictures/flag.png"
        },
        {
            "task": "217ababc-ccc7-4b9f-af07-c239d92848fe",
            "attribute": {
                "file_extension": "png",
                "source_dir": "/home/crab/Pictures",
                "target_dir": "/home/crab/Pictures/png_"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "ccf31785-ec13-4981-93c5-ca6c242ac0c3"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/d3478489-70f2-4a82-b7d2-0a47b75986eb.json
================================================
{
    "description": "Use Firefox to search for the country \"Ethiopia\" on Wikipedia, extract the capital city and population, save this information in an ODS file at \"/home/crab/Documents/africa.ods\" with LibreOffice Calc with the first column for the country name, the second for the capital city name, and the third for the population without any header, then create a new directory \"/home/crab/sheet\" and copy all ODS files from \"/home/crab/Documents\" to \"/home/crab/sheet\".",
    "tasks": [
        {
            "task": "1cd6519a-9ee0-442b-ba5a-9238aeb00ff6",
            "attribute": {
                "country": "Ethiopia",
                "file_path": "/home/crab/Documents/africa.ods"
            },
            "output": "/home/crab/Documents/africa.ods"
        },
        {
            "task": "217ababc-ccc7-4b9f-af07-c239d92848fe",
            "attribute": {
                "file_extension": "ods",
                "source_dir": "/home/crab/Documents",
                "target_dir": "/home/crab/sheet"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "d3478489-70f2-4a82-b7d2-0a47b75986eb"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/d39d40b1-fc26-4169-9d6f-cdf81efe9a3e.json
================================================
{
    "description": "Use Firefox to search for the country \"Iceland\" on Wikipedia, extract the capital city and population, and save this information in an ODS file at \"/home/crab/country_iceland.ods\" with LibreOffice Calc. The first column will save the country name, the second will save the capital city name, and the third will save the population. No header is needed in the ODS file.",
    "tasks": [
        {
            "task": "1cd6519a-9ee0-442b-ba5a-9238aeb00ff6",
            "attribute": {
                "country": "Iceland",
                "file_path": "/home/crab/country_iceland.ods"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "d39d40b1-fc26-4169-9d6f-cdf81efe9a3e"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/d3c917ff-406f-447a-87f5-b8d835cba750.json
================================================
{
    "description": "Combine Image 1 \"/home/crab/Pictures/cat.png\" and Image 2 \"/home/crab/assets/campus.png\" using GIMP (GNU Image Manipulation Program), placing Image 1 on the left side of Image 2, and save the combined image to \"/home/crab/Desktop/background.png\". Then, set this combined image as the screen background of the system.",
    "tasks": [
        {
            "task": "4cf246ea-0a7f-43da-84b6-61d74a2699af",
            "attribute": {
                "image_path_1": "/home/crab/Pictures/cat.png",
                "image_path_2": "/home/crab/assets/campus.png",
                "output_path": "/home/crab/Desktop/background.png"
            },
            "output": "/home/crab/Desktop/background.png"
        },
        {
            "task": "a207ef38-b3b2-4c6c-a1e3-75c38162f5ba",
            "attribute": {
                "photo_path": "/home/crab/Desktop/background.png"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "d3c917ff-406f-447a-87f5-b8d835cba750"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/d6e460e4-c295-40ad-883c-11300d7832f0.json
================================================
{
    "description": "Using Firefox, locate the example provided of torch.matmul by the official PyTorch version 1.13 documentation and copy all the lines of code to the clipboard, then open LibreOffice Writer, paste the content from the clipboard, and save the document as an ODT file at \"/home/crab/Documents/torch_matmul.odt\".",
    "tasks": [
        {
            "task": "49b614c5-c4bb-4c20-aab8-ab9dcc7de1b5",
            "attribute": {},
            "output": null
        },
        {
            "task": "76de4bdb-c980-4b3a-9bd3-c87db467dffe",
            "attribute": {
                "file_path": "/home/crab/Documents/torch_matmul.odt"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "d6e460e4-c295-40ad-883c-11300d7832f0"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/d9e4e23c-2a2a-4b5c-b034-7deb6036572d.json
================================================
{
    "description": "Use Firefox to find out a \"amusement park\" around \"Sentosa\" on Google Maps and copy the Google Maps sharing URL of that \"amusement park\" to the clipboard",
    "tasks": [
        {
            "task": "2b189dc2-c77f-4fa3-8432-ba4355cc294c",
            "attribute": {
                "place_type": "amusement park",
                "place_name": "Sentosa"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "d9e4e23c-2a2a-4b5c-b034-7deb6036572d"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/e31d4e3b-b753-4deb-b9ad-a0add5d4790e.json
================================================
{
    "description": "Use Firefox to search for an image with the keyword \"Mission: Impossible\", copy the image's URL to the clipboard, and then download the file from the clipboard's URL to \"/home/crab/Pictures/movie.jpg\".",
    "tasks": [
        {
            "task": "017102b6-d2c3-466b-96f7-37c8bcddc41a",
            "attribute": {
                "keyword": "Mission: Impossible"
            },
            "output": ""
        },
        {
            "task": "a313ea4d-e501-4971-b4fe-db2aad19acsd",
            "attribute": {
                "file_path": "/home/crab/Pictures/movie.jpg"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "e31d4e3b-b753-4deb-b9ad-a0add5d4790e"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/f07a1f32-2f3f-40e7-b12f-8f1b128c41f6.json
================================================
{
    "description": "Create a new directory \"/home/crab/assets_copy\" and copy all files with the specified \"txt\" extension from \"/home/crab/assets\" to the directory \"/home/crab/assets_copy\".",
    "tasks": [
        {
            "task": "217ababc-ccc7-4b9f-af07-c239d92848fe",
            "attribute": {
                "file_extension": "txt",
                "source_dir": "/home/crab/assets",
                "target_dir": "/home/crab/assets_copy"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "f07a1f32-2f3f-40e7-b12f-8f1b128c41f6"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/f5cce3a0-ba65-4317-95f8-1fc7d9776c78.json
================================================
{
    "description": "Set \"/home/crab/deepmind.png\" as the screen background of the system",
    "tasks": [
        {
            "task": "a207ef38-b3b2-4c6c-a1e3-75c38162f5ba",
            "attribute": {
                "photo_path": "/home/crab/deepmind.png"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "f5cce3a0-ba65-4317-95f8-1fc7d9776c78"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/f67a26e4-58dd-4dc6-8859-affbf1d62f94.json
================================================
{
    "description": "Open \"/home/crab/poem\" using vim in a terminal, write \"Two roads diverged in a yellow wood, and sorry I could not travel both and be one traveler, long I stood and looked down one as far as I could to where it bent in the undergrowth.\", save and exit vim, and then print the content of \"/home/crab/poem\" to the command line interface through the terminal.",
    "tasks": [
        {
            "task": "0f589bf9-9b26-4581-8b78-2961b115ab49",
            "attribute": {
                "file_path": "/home/crab/poem",
                "content": "Two roads diverged in a yellow wood, and sorry I could not travel both and be one traveler, long I stood and looked down one as far as I could to where it bent in the undergrowth."
            },
            "output": "/home/crab/poem"
        },
        {
            "task": "5b527839-0e58-426d-bab6-7160200b0d24",
            "attribute": {
                "file_path": "/home/crab/poem"
            },
            "output": null
        }
    ],
    "adjlist": "0 1\n1",
    "id": "f67a26e4-58dd-4dc6-8859-affbf1d62f94"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu/f96d7c34-9543-4679-a6ea-89e0c2ef7b1c.json
================================================
{
    "description": "Open \"/home/crab/Documents/result\" using vim in a terminal, write \"Celtics vs. Mavericks odds, score prediction, time: 2024 NBA Finals picks, Game 1 best bets by proven model\", then save and exit vim.",
    "tasks": [
        {
            "task": "0f589bf9-9b26-4581-8b78-2961b115ab49",
            "attribute": {
                "file_path": "/home/crab/Documents/result",
                "content": "Celtics vs. Mavericks odds, score prediction, time: 2024 NBA Finals picks, Game 1 best bets by proven model"
            },
            "output": null
        }
    ],
    "adjlist": "0",
    "id": "f96d7c34-9543-4679-a6ea-89e0c2ef7b1c"
}

================================================
FILE: crab-benchmark-v0/dataset/ubuntu_subtasks.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# ruff: noqa: E501
import base64
import hashlib
import io
import os
import re
import subprocess
import time
from collections import Counter
from functools import cache
from typing import Callable, List, Optional, Tuple

import cv2
import easyocr
import imageio as imio
import networkx as nx
import numpy as np
import psutil
import pyperclip
import requests
import torch
from networkx import DiGraph, path_graph
from numpy.linalg import norm
from PIL import Image

from crab import SubTask, TaskGenerator, action, evaluator
from crab.actions.crab_actions import check_submit, submit


class ImageMatcher:
    """
    A class to handle image matching, resizing, and cropping operations using accelerated feature matching.
    See https://github.com/verlab/accelerated_features.
    """

    def __init__(self, top_k: int = 4096):
        """
        Initializes the ImageMatcher with a pretrained XFeat model.

        Parameters:
        top_k (int): The number of top features to use for matching.
        """
        self.xfeat = torch.hub.load(
            "verlab/accelerated_features", "XFeat", pretrained=True, top_k=top_k
        )
        self.top_k = top_k

    def warp_corners_and_draw_matches(
        self,
        ref_points: np.ndarray,
        dst_points: np.ndarray,
        img1: np.ndarray,
        img2: np.ndarray,
    ) -> Tuple[np.ndarray, np.ndarray]:
        """
        Calculates the homography matrix and warps the corners of the first image to the second image space.

        Parameters:
        ref_points (np.ndarray): Reference points from the first image.
        dst_points (np.ndarray): Destination points from the second image.
        img1 (np.ndarray): The first image.
        img2 (np.ndarray): The second image.

        Returns:
        Tuple[np.ndarray, np.ndarray]: Image with warped corners and the warped corners coordinates.
        """
        H, mask = cv2.findHomography(
            ref_points,
            dst_points,
            cv2.USAC_MAGSAC,
            3.5,
            maxIters=1000,
            confidence=0.999,
        )
        mask = mask.flatten()

        h, w = img1.shape[:2]
        corners_img1 = np.array(
            [[0, 0], [w - 1, 0], [w - 1, h - 1], [0, h - 1]], dtype=np.float32
        ).reshape(-1, 1, 2)
        warped_corners = cv2.perspectiveTransform(corners_img1, H)

        img2_with_corners = img2.copy()
        for i in range(len(warped_corners)):
            start_point = tuple(warped_corners[i - 1][0].astype(int))
            end_point = tuple(warped_corners[i][0].astype(int))
            cv2.line(img2_with_corners, start_point, end_point, (0, 255, 0), 4)

        keypoints1 = [cv2.KeyPoint(p[0], p[1], 5) for p in ref_points]
        keypoints2 = [cv2.KeyPoint(p[0], p[1], 5) for p in dst_points]
        matches = [cv2.DMatch(i, i, 0) for i in range(len(mask)) if mask[i]]

        img_matches = cv2.drawMatches(
            img1,
            keypoints1,
            img2_with_corners,
            keypoints2,
            matches,
            None,
            matchColor=(0, 255, 0),
            flags=2,
        )

        return img_matches, warped_corners

    def _get_bounding_box(
        self, warped_corners: np.ndarray, img_shape: Tuple[int, int]
    ) -> List[int]:
        """
        Computes the bounding box around the warped corners.

        Parameters:
        warped_corners (np.ndarray): The warped corners coordinates.
        img_shape (Tuple[int, int]): The shape of the image as (height, width).

        Returns:
        List[int]: Bounding box coordinates [x_min, x_max, y_min, y_max].
        """
        h, w = img_shape

        x_min = np.min(warped_corners[:, 0, 0])
        x_max = np.max(warped_corners[:, 0, 0])
        y_min = np.min(warped_corners[:, 0, 1])
        y_max = np.max(warped_corners[:, 0, 1])

        x_min = max(0, x_min)
        x_max = min(w - 1, x_max)
        y_min = max(0, y_min)
        y_max = min(h - 1, y_max)

        return [int(x_min), int(x_max), int(y_min), int(y_max)]

    def _resize_image(
        self, img1: np.ndarray, img2: np.ndarray, scale: float, match_dimension: str
    ) -> Tuple[np.ndarray, np.ndarray]:
        """
        Resizes img1 to match a scaled dimension of img2.

        Parameters:
        img1 (np.ndarray): The first image to be resized.
        img2 (np.ndarray): The reference image.
        scale (float): The scale factor (0.5 for half size).
        match_dimension (str): The dimension to match ('height' or 'width').

        Returns:
        Tuple[np.ndarray, np.ndarray]: Resized img1 and original img2.
        """
        h1, w1 = img1.shape[:2]
        h2, w2 = img2.shape[:2]

        if match_dimension == "height":
            new_height = int(h2 * scale)
            new_width = int(w1 * (new_height / h1))
        elif match_dimension == "width":
            new_width = int(w2 * scale)
            new_height = int(h1 * (new_width / w1))
        else:
            raise ValueError("match_dimension must be either 'height' or 'width'.")

        resized_img1 = cv2.resize(img1, (new_width, new_height))
        return resized_img1, img2

    def get_resizing_functions(
        self,
    ) -> List[Callable[[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray]]]:
        """
        Provides a list of resizing functions.

        Returns:
        List[Callable[[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray]]]: List of resizing functions.
        """
        return [
            lambda x, y: (x, y),
            lambda x, y: self._resize_image(x, y, 1.0, "height"),
            lambda x, y: self._resize_image(x, y, 1.0, "width"),
            lambda x, y: self._resize_image(x, y, 0.5, "height"),
            lambda x, y: self._resize_image(x, y, 0.5, "width"),
        ]

    def match_images(
        self,
        im1_path: str,
        im2_path: str,
        top_k: int = 4096,
        match_num_threshold: int = 80,
    ) -> Tuple[Optional[List[int]], Optional[np.ndarray], int]:
        """
        Matches two images and finds the bounding box around the matched area if sufficient matches are found.

        Parameters:
        im1_path (str): Path to the first image.
        im2_path (str): Path to the second image.
        top_k (int): The number of top features to use for matching.
        match_num_threshold (int): The minimum number of matches required to consider the match valid.

        Returns:
        Tuple[Optional[List[int]], Optional[np.ndarray], int]: Bounding box, image with matched keypoints drawn, and the number of matches found.
        """
        im1 = self.load_and_convert_image(im1_path)
        im2 = self.load_and_convert_image(im2_path)

        best_matches = {
            "count": 0,
            "im1_resized": None,
            "im2_resized": None,
            "mkpts_0": None,
            "mkpts_1": None,
        }

        for resize_func in self.get_resizing_functions():
            try:
                im1_resized, im2_resized = resize_func(im1, im2)
                mkpts_0, mkpts_1 = self.xfeat.match_xfeat_star(
                    im1_resized, im2_resized, top_k=top_k
                )

                if len(mkpts_0) > best_matches["count"]:
                    best_matches.update(
                        {
                            "count": len(mkpts_0),
                            "im1_resized": im1_resized,
                            "im2_resized": im2_resized,
                            "mkpts_0": mkpts_0,
                            "mkpts_1": mkpts_1,
                        }
                    )
            except Exception:
                continue

        if best_matches["count"] >= match_num_threshold:
            canvas, warped_corners = self.warp_corners_and_draw_matches(
                best_matches["mkpts_0"],
                best_matches["mkpts_1"],
                best_matches["im1_resized"],
                best_matches["im2_resized"],
            )
            bbox = self._get_bounding_box(warped_corners, im2_resized.shape[:2])
        else:
            bbox, canvas = None, None

        return bbox, canvas, best_matches["count"]

    def load_and_convert_image(self, filepath: str) -> np.ndarray:
        """
        Loads an image from a file and converts it to JPG format if necessary.

        Parameters:
        filepath (str): The path to the image file.

        Returns:
        np.ndarray: The loaded and converted image.
        """
        image = Image.open(filepath)
        if image.mode != "RGB":
            image = image.convert("RGB")
        with io.BytesIO() as output:
            image.save(output, format="JPEG")
            converted_image = np.copy(imio.v2.imread(output)[..., ::-1])
        return converted_image


image_matcher = ImageMatcher()


def from_env_load_and_save_file(env, file_path, output_dir="/tmp/local_save"):
    """
    Load a file, convert it to bytes, and save it to a local directory with the same basename.

    Args:
        env: The environment object with the _action_endpoint method.
        file_path (str): The path to the file to be loaded.
        output_dir (str): The directory where the file should be saved (default is "/tmp/local_save").

    Returns:
        str: The path to the saved file.
    """

    @action(env_name="ubuntu")
    def get_encoded_file(file_path: str) -> bytes | None:
        try:
            with open(file_path, "rb") as file:
                file_bytes = file.read()
                encoded_string = base64.b64encode(file_bytes).decode("utf-8")
        except Exception:
            return None

        return encoded_string

    # Create output directory if it does not exist
    os.makedirs(output_dir, exist_ok=True)

    # Load the file and convert to bytes
    encoded_string = env._action_endpoint(get_encoded_file, {"file_path": file_path})

    # Decode the Base64 string back to bytes
    decoded_bytes = base64.b64decode(encoded_string.encode("utf-8"))

    # Create the output file path
    file_name = os.path.basename(file_path)
    output_file_path = os.path.join(output_dir, file_name)

    # Save the decoded bytes to the output path
    with open(output_file_path, "wb") as file:
        file.write(decoded_bytes)

    return output_file_path


def crop_image(img: np.ndarray, bbox: List[int]) -> np.ndarray:
    """
    Crops the image based on the bounding box coordinates.

    Parameters:
    img (np.ndarray): The input image.
    bbox (List[int]): Bounding box coordinates [x_min, x_max, y_min, y_max].

    Returns:
    np.ndarray: The cropped image.
    """
    x_min, x_max, y_min, y_max = bbox
    return img[y_min:y_max, x_min:x_max]


def calculate_bbox_center(bbox: List[int]) -> Tuple[int, int]:
    """
    Calculates the center of a bounding box.

    Parameters:
    bbox (List[int]): The bounding box coordinates [x_min, x_max, y_min, y_max].

    Returns:
    Tuple[int, int]: The center coordinates (x, y).
    """
    x_min, x_max, y_min, y_max = bbox
    x_center = (x_min + x_max) // 2
    y_center = (y_min + y_max) // 2
    return x_center, y_center


def is_bbox_in_direction(bbox_1: List[int], bbox_2: List[int], direction: str) -> bool:
    """
    Check if the center of bbox_1 is in the specified direction relative to the center of bbox_2.

    Args:
        bbox_1 (List[int]): The bounding box coordinates [x_min, x_max, y_min, y_max] of the first bounding box.
        bbox_2 (List[int]): The bounding box coordinates [x_min, x_max, y_min, y_max] of the second bounding box.
        direction (str): The direction to check ("left", "right", "above", "below").

    Returns:
        bool: True if the center of bbox_1 is in the specified direction relative to bbox_2, False otherwise.
    """

    center_1 = calculate_bbox_center(bbox_1)
    center_2 = calculate_bbox_center(bbox_2)

    if direction == "left":
        return center_1[0] < center_2[0]
    elif direction == "right":
        return center_1[0] > center_2[0]
    elif direction == "above":
        return center_1[1] < center_2[1]
    elif direction == "below":
        return center_1[1] > center_2[1]
    else:
        raise ValueError("Invalid direction. Use 'left', 'right', 'above', or 'below'.")


def ocr_text_matching(
    image_path: str, text: str
) -> Optional[Tuple[List[int], str, float]]:
    """
    Performs OCR on an image to find a specific text string and returns the bounding box, matched text, and confidence level.

    Parameters:
    image_path (str): The path to the image file.
    text (str): The text string to search for in the image.

    Returns:
    Optional[Tuple[List[int], str, float]]: The bounding box coordinates [x_min, y_min, x_max, y_max], the matched text, and the confidence level if found, otherwise None.
    """
    reader = easyocr.Reader(["en"])
    result = reader.readtext(image_path)

    for entry in result:
        bbox, detected_text, confidence = entry
        if text in detected_text:
            # Extract the bounding box coordinates
            x_min = min(bbox[0][0], bbox[1][0], bbox[2][0], bbox[3][0])
            x_max = max(bbox[0][0], bbox[1][0], bbox[2][0], bbox[3][0])
            y_min = min(bbox[0][1], bbox[1][1], bbox[2][1], bbox[3][1])
            y_max = max(bbox[0][1], bbox[1][1], bbox[2][1], bbox[3][1])
            return (
                [int(x_min), int(x_max), int(y_min), int(y_max)],
                detected_text,
                confidence,
            )

    return None


def convert_file_to_images(file_path: str) -> List[str]:
    """
    Convert a file to JPG images using LibreOffice and return the list of image file paths.

    Args:
        file_path (str): The path to the file.

    Returns:
        List[str]: List of paths to the generated image files.
    """
    output_format = "jpg"
    output_dir = "/tmp/converted_images"
    os.makedirs(output_dir, exist_ok=True)

    # Run LibreOffice conversion command
    result = subprocess.run(
        [
            "libreoffice",
            "--headless",
            "--convert-to",
            output_format,
            "--outdir",
            output_dir,
            file_path,
        ],
        capture_output=True,
        text=True,
    )

    # Check if the conversion was successful
    if result.returncode != 0:
        raise RuntimeError(f"Conversion failed: {result.stderr}")

    # Collect the generated image file paths
    image_files = [
        os.path.join(output_dir, f)
        for f in os.listdir(output_dir)
        if f.endswith(f".{output_format}")
    ]

    # Verify if the files were successfully saved
    if not image_files:
        raise FileNotFoundError(
            f"No {output_format} files found in the output directory"
        )

    # Get the basename of the original file (without extension)
    file_basename = os.path.splitext(os.path.basename(file_path))[0]

    # Check if any of the images match the basename of the original file
    matching_images = [f for f in image_files if file_basename in os.path.basename(f)]
    if not matching_images:
        raise FileNotFoundError(
            f"No images found with basename matching the original file: {file_basename}"
        )

    return matching_images


def cleanup_files(files: List[str]):
    """
    Delete the list of files.

    Args:
        files (List[str]): List of paths to the files to be deleted.
    """
    for file in files:
        os.remove(file)


def is_valid_url(url):
    # Regular expression to check if the string is a valid HTTP/HTTPS URL
    url_pattern = re.compile(
        r"^(https?://)"  # http:// or https://
        r"(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|"  # domain
        r"localhost|"  # localhost...
        r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})"  # ...or ip
        r"(?::\d+)?"  # optional port
        r"(?:/?|[/?]\S+)$",
        re.IGNORECASE,
    )
    return bool(re.match(url_pattern, url))


def is_valid_image_data_uri(uri):
    # Regular expression to check if the string is a valid Data URI for image formats
    data_uri_pattern = re.compile(
        r"^data:image/(png|jpeg|gif|svg\+xml|bmp|webp);base64,[A-Za-z0-9+/]+={0,2}$",
        re.IGNORECASE,
    )
    return bool(re.match(data_uri_pattern, uri))


def is_github_repo_url(url):
    # Regular expression to check if the URL is a GitHub repository URL
    github_repo_pattern = re.compile(
        r"^https?://"  # Protocol
        r"github\.com/"  # Domain
        r"[^/]+/"  # Username
        r"[^/]+/?$",  # Repository name, optional trailing slash
        re.IGNORECASE,
    )
    return bool(re.match(github_repo_pattern, url))


def get_rgb_values_outside_bbox(
    img: np.ndarray, bbox: List[int], margin: int = 10
) -> Tuple[np.ndarray, Tuple[int, int, int]]:
    """
    Reads the pixel color RGB values outside of the bounding box with an additional margin and finds the most frequent RGB value.

    Parameters:
    img (np.ndarray): The input image.
    bbox (List[int]): Bounding box coordinates [x_min, x_max, y_min, y_max].
    margin (int): The margin to add outside the bounding box. Default is 10.

    Returns:
    Tuple[np.ndarray, Tuple[int, int, int]]: The RGB values outside the bounding box with the margin and the most frequent RGB value.
    """
    x_min, x_max, y_min, y_max = bbox

    # Ensure the coordinates with margin are within image dimensions
    x_min_with_margin = max(0, x_min - margin)
    x_max_with_margin = min(img.shape[1], x_max + margin)
    y_min_with_margin = max(0, y_min - margin)
    y_max_with_margin = min(img.shape[0], y_max + margin)

    # Create a mask for the bounding box area with margin
    mask = np.ones(img.shape[:2], dtype=bool)
    mask[y_min_with_margin:y_max_with_margin, x_min_with_margin:x_max_with_margin] = (
        False
    )

    # Extract the RGB values outside the bounding box with margin
    rgb_values = img[mask]

    # Find the most frequent RGB value
    rgb_values_tuple = [tuple(rgb) for rgb in rgb_values]
    most_common_rgb = Counter(rgb_values_tuple).most_common(1)[0][0]

    return list(most_common_rgb)[::-1]


def contains_required_strings(clipboard_content: str, required_strings: list) -> bool:
    """
    Check if all required strings are present in the clipboard content.

    Args:
        clipboard_content (str): The content from the clipboard.
        required_strings (list): A list of required strings to check.

    Returns:
        bool: True if all required strings are found in the clipboard content, False otherwise.
    """
    for string in required_strings:
        if string not in clipboard_content:
            return False
    return True


@evaluator(env_name="ubuntu")
def verify_file_content_with_clipboard(file_path: str) -> bool:
    """
    Verify that the content of the file matches the clipboard content line by line.

    Args:
        file_path (str): The path to the file to verify.

    Returns:
        bool: True if the file content matches the clipboard content, False otherwise.
    """

    def verify_content_with_clipboard(file_content: str) -> bool:
        """
        Verify that the provided file content matches the clipboard content line by line.

        Args:
            file_content (str): The content of the file to verify.

        Returns:
            bool: True if the file content matches the clipboard content, False otherwise.
        """
        clipboard_content = pyperclip.paste()
        clipboard_lines = clipboard_content.split("\n")
        file_lines = file_content.split("\n")

        # Check if each line from the clipboard content is in the corresponding line in the file content
        for clipboard_line, file_line in zip(clipboard_lines, file_lines):
            if clipboard_line not in file_line:
                return False

        return True

    with open(file_path, "r") as file:
        file_content = file.read()

    return verify_content_with_clipboard(file_content)


@evaluator(env_name="ubuntu")
def verify_odt_file_content_with_clipboard(file_path: str) -> bool:
    """
    Verify that the content of the ODT file matches the clipboard content.

    Args:
        file_path (str): The path to the ODT file to verify.

    Returns:
        bool: True if the ODT file content matches the clipboard content, False otherwise.
    """
    from odf import teletype, text
    from odf.opendocument import load

    def verify_content_with_clipboard(file_content: str) -> bool:
        """
        Verify that the provided file content matches the clipboard content line by line.

        Args:
            file_content (str): The content of the file to verify.

        Returns:
            bool: True if the file content matches the clipboard content, False otherwise.
        """
        clipboard_content = pyperclip.paste()
        clipboard_lines = clipboard_content.split("\n")
        file_lines = file_content.split("\n")

        # Check if each line from the clipboard content is in the corresponding line in the file content
        for clipboard_line, file_line in zip(clipboard_lines, file_lines):
            if clipboard_line not in file_line:
                return False

        return True

    textdoc = load(file_path)
    allparas = textdoc.getElementsByType(text.P)
    odt_content = "\n".join([teletype.extractText(p) for p in allparas])

    return verify_content_with_clipboard(odt_content)


@evaluator(env_name="ubuntu", local=True)
def verify_combined_image(
    image_path_1: str, image_path_2: str, file_path: str, direction: str, env
) -> bool:
    """
    Check if the combined file contains both input images without overlay and in the specified direction.

    Args:
        image_path_1 (str): Path to the first image.
        image_path_2 (str): Path to the second image.
        file_path (str): Path to the combined file.
        direction (str): The direction to check ("left", "right", "above", "below").

    Returns:
        bool: True if the combined file contains both input images in the specified direction without overlay, False otherwise.
    """

    saved_image_path_1 = from_env_load_and_save_file(env, image_path_1)
    saved_image_path_2 = from_env_load_and_save_file(env, image_path_2)
    saved_file_path = from_env_load_and_save_file(env, file_path)

    # Determine if file_path is already an image

    if file_path.lower().endswith((".jpg", ".jpeg", ".png", ".bmp", ".tiff")):
        combined_image_path = saved_file_path
    else:
        # Convert the file to images
        combined_image_path = convert_file_to_images(saved_file_path)[0]

    try:
        # Match the first image within the combined image
        bbox_1, _, _ = image_matcher.match_images(
            saved_image_path_1, combined_image_path
        )

        # Match the second image within the combined image
        bbox_2, _, _ = image_matcher.match_images(
            saved_image_path_2, combined_image_path
        )

        # Check if both bounding boxes are found
        if bbox_1 is None or bbox_2 is None:
            return False

        # Check if bbox_1 is in the specified direction relative to bbox_2
        correct_direction = is_bbox_in_direction(bbox_1, bbox_2, direction)

        return correct_direction
    finally:
        # Cleanup intermediate image files if they were created
        cleanup_files(
            [
                combined_image_path,
                saved_image_path_1,
                saved_image_path_2,
                saved_file_path,
            ]
        )


@evaluator(env_name="ubuntu")
def is_image_2_brighter(image_path_1: str, image_path_2: str) -> bool:
    """
    Check if the second image is brighter than the first image.

    Args:
        image_path_1(str): The path to the first image.
        image_path_2(str): The path to the second image.
    """

    def brightness(image_path: str) -> float:
        # Load the image
        img = cv2.imread(image_path)
        if len(img.shape) == 3:
            # Colored RGB or BGR (*Do Not* use HSV images with this function)
            # create brightness with euclidean norm
            return float(np.average(norm(img, axis=2)) / np.sqrt(3))
        else:
            # Grayscale
            return float(np.average(img))

    brightness_1 = brightness(image_path_1)
    brightness_2 = brightness(image_path_2)

    return brightness_2 > brightness_1


@evaluator(env_name="ubuntu")
def is_img_url_in_clipboard() -> bool:
    """
    Check if the clipboard contains a valid URL or a Data URI that is specific to images.

    Args:
        env (Environment): The current testing environment, used to simulate clipboard functionality.

    Returns:
        bool: True if a valid URL or Data URI specific to images is found in the clipboard, False otherwise.
    """
    clipboard_content = pyperclip.paste()  # Simulate clipboard paste action
    data_uri_pattern = re.compile(
        r"^data:image/(png|jpeg|gif|svg\+xml|bmp|webp);base64,[A-Za-z0-9+/]+={0,2}$",
        re.IGNORECASE,
    )
    is_valid_image_data = bool(re.match(data_uri_pattern, clipboard_content))
    url_pattern = re.compile(
        r"^(https?://)"  # http:// or https://
        r"(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|"  # domain
        r"localhost|"  # localhost...
        r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})"  # ...or ip
        r"(?::\d+)?"  # optional port
        r"(?:/?|[/?]\S+)$",
        re.IGNORECASE,
    )
    is_valid_url = bool(re.match(url_pattern, clipboard_content))
    if is_valid_url or is_valid_image_data:
        return True
    return False


@evaluator(env_name="ubuntu")
def is_github_repo_url_in_clipboard(keyword: str) -> bool:
    """
    Check if the clipboard contains a valid GitHub repository URL.

    Returns:
        bool: True if the clipboard content is a valid GitHub repository URL, False otherwise.
    """
    clipboard_content = pyperclip.paste()  # Access the clipboard content
    if keyword.lower() not in clipboard_content:
        return False
    github_repo_pattern = re.compile(
        r"^https?://"  # Protocol
        r"github\.com/"  # Domain
        r"[^/]+/"  # Username
        r"[^/]+/?$",  # Repository name, optional trailing slash
        re.IGNORECASE,
    )
    return bool(re.match(github_repo_pattern, clipboard_content))
    # return is_github_repo_url(clipboard_content)


@evaluator(env_name="ubuntu")
def is_software_installed(package_name: str) -> bool:
    try:
        subprocess.check_call(
            ["dpkg", "-s", package_name],
            stdout=subprocess.DEVNULL,
            stderr=subprocess.DEVNULL,
        )
        return True
    except subprocess.CalledProcessError:
        return False


@cache
def get_file_url_hash(url):
    response = requests.get(url)
    response.raise_for_status()
    return hashlib.sha256(response.content).hexdigest()


@evaluator(env_name="ubuntu")
def download_and_verify_file(url: str, file_path: str) -> bool:
    # Check if the file was downloaded
    if not os.path.isfile(file_path):
        return False

    # Calculate the hash of the downloaded file
    with open(file_path, "rb") as f:
        file_data = f.read()
        downloaded_file_hash = hashlib.sha256(file_data).hexdigest()

    # Get the file content directly from the URL
    try:
        original_file_hash = get_file_url_hash(url)
    except requests.RequestException:
        return False

    # Compare the hashes
    return downloaded_file_hash == original_file_hash


@evaluator(env_name="ubuntu")
def download_from_clipboard_and_verify_file(file_path: str) -> bool:
    # Check if the file was downloaded
    if not os.path.isfile(file_path):
        return False

    # Calculate the hash of the downloaded file
    with open(file_path, "rb") as f:
        file_data = f.read()
        downloaded_file_hash = hashlib.sha256(file_data).hexdigest()

    # Get the url from clipboard
    content = pyperclip.paste()
    """
    Problem: 
        1. There exist infinite possibilities of the downloable format in the clipboard. Not sure if we need to verify the format.
    """
    # Get the file content directly from the URL
    try:
        original_file_hash = get_file_url_hash(content)
    except requests.RequestException:
        return False

    # Compare the hashes
    return downloaded_file_hash == original_file_hash


@evaluator(env_name="ubuntu")
def check_color_scheme(assmue: str) -> bool:
    out = subprocess.check_output(
        ["gsettings", "get", "org.gnome.desktop.interface", "color-scheme"],
        text=True,
    )
    return assmue in out


@evaluator(env_name="ubuntu")
def check_text_in_current_window_name(text: str) -> bool:
    try:
        out = subprocess.check_output(
            ["xdotool", "getwindowfocus", "getwindowname"], text=True
        ).strip()
    except subprocess.CalledProcessError:
        return False
    return text in out


@evaluator(env_name="ubuntu")
def check_current_window_process(assmue: str) -> bool:
    try:
        out = subprocess.check_output(
            ["xdotool", "getwindowfocus", "getwindowpid"], text=True
        ).strip()
        if not out.isdigit():
            return False
        process = psutil.Process(int(out))
    except (
        psutil.NoSuchProcess,
        psutil.AccessDenied,
        psutil.ZombieProcess,
        subprocess.CalledProcessError,
    ):
        return False
    return assmue.strip() == process.name()


@evaluator(env_name="ubuntu")
def check_file_exist(file_path: str) -> bool:
    return os.path.isfile(file_path)


@evaluator(env_name="ubuntu")
def check_file_content(file_path: str, content: str) -> bool:
    if not os.path.isfile(file_path):
        return False
    with open(file_path, "r") as f:
        file_content = f.read()
    return content in file_content


@evaluator(env_name="ubuntu")
def empty_evaluator() -> bool:
    return False


@evaluator(env_name="ubuntu")
def is_process_open(process_name: str) -> bool:
    """
    Check if the given process is currently running.

    Args:
        process_name(str): The process name to check.
    """
    for process in psutil.process_iter(["name"]):
        try:
            if process_name.lower() in process.info["name"].lower():  # type: ignore
                return True
        except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
            pass
    return False


@evaluator(env_name="ubuntu")
def check_app_usage_history(app_name: str) -> bool:
    """
    Check if the given application has been in the usage history.
    Args:
        app_name(str): The name of the application to check.
    Returns:
        bool: True if the app was recently used, False otherwise.
    """
    for process in psutil.process_iter(["name", "create_time"]):
        try:
            if app_name.lower() in process.info["name"].lower():
                # Assuming 'recently used' implies a running process was started within the last hour
                if time.time() - process.info["create_time"] < 3600:
                    return True
        except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
            continue
    return False


@evaluator(env_name="ubuntu")
def check_process_closed(app_name: str) -> bool:
    """
    Verify that the specified process is not running.
    Args:
        app_name(str): The application name to check for its absence.
    Returns:
        bool: True if the app is not running, False otherwise.
    """
    return not any(
        app_name.lower() in proc.info["name"].lower()
        for proc in psutil.process_iter(["name"])
        if proc.is_running()
    )


@evaluator(env_name="ubuntu")
def verify_background(photo_path: str) -> bool:
    """
    Verify that the specified photo is currently set as the desktop background.

    Args:
        photo_path (str): The path to the photo file.

    Returns:
        bool: True if the photo is the current background, False otherwise.
    """
    out = subprocess.check_output(
        ["gsettings", "get", "org.gnome.desktop.background", "picture-uri"],
        universal_newlines=True,
    )
    current_background = (
        out.strip().split("'")[1].split("file:/")[1]
    )  # Extract the path

    # Compute hashes to compare files
    if os.path.exists(photo_path) and os.path.exists(current_background):
        with open(photo_path, "rb") as f:
            original_hash = hashlib.sha256(f.read()).hexdigest()
        with open(current_background, "rb") as f:
            current_hash = hashlib.sha256(f.read()).hexdigest()

        return original_hash == current_hash

    return False


@evaluator(env_name="ubuntu")
def is_torch_matmul_example_copied_correctly() -> bool:
    """
    Verify if the clipboard contains the correct torch.matmul example snippets from PyTorch 1.13 documentation.
    """

    def contains_required_strings(
        clipboard_content: str, required_strings: list
    ) -> bool:
        """
        Check if all required strings are present in the clipboard content.

        Args:
            clipboard_content (str): The content from the clipboard.
            required_strings (list): A list of required strings to check.

        Returns:
            bool: True if all required strings are found in the clipboard content, False otherwise.
        """
        for string in required_strings:
            if string not in clipboard_content:
                return False
        return True

    required_strings = [
        "tensor1 = torch.randn",
        "tensor2 = torch.randn",
        "torch.matmul(tensor1, tensor2).size()",
    ]
    clipboard_content = pyperclip.paste().strip()
    if not clipboard_content:
        return False

    return contains_required_strings(clipboard_content, required_strings)


@evaluator(env_name="ubuntu")
def check_directory_exists(dir_path: str) -> bool:
    """Check if the specified directory exists."""
    return os.path.isdir(dir_path)


@evaluator(env_name="ubuntu")
def verify_files_copied(source_dir: str, target_dir: str, file_extension: str) -> bool:
    """Verify that files were copied correctly."""
    source_files = {
        file for file in os.listdir(source_dir) if file.endswith(f".{file_extension}")
    }
    target_files = {
        file for file in os.listdir(target_dir) if file.endswith(f".{file_extension}")
    }
    return source_files == target_files


@evaluator(env_name="ubuntu", local=True)
def check_contain_input_text_list(texts: list[str], env) -> bool:
    """
    Check if all provided search terms were entered in the browser.

    Args:
        search_terms: A list of strings, each representing a search term that needs to be verified.
        env: The current testing environment, used to simulate browser interactions.

    Returns:
        bool: True if all search terms are found in the written text, False otherwise.
    """
    if env.trajectory:
        inputs = [
            params["text"].lower()
            for action_name, params, _ in env.trajectory
            if action_name == "write_text"
        ]
        return all(
            any(term.lower() in input_text for input_text in inputs) for term in texts
        )
    return False


@evaluator(env_name="ubuntu")
def is_google_maps_url_in_clipboard() -> bool:
    """
    Check if the clipboard contains a valid shortened Google Maps URL.
    """
    clipboard_content = pyperclip.paste()
    maps_url_pattern = re.compile(
        r"^https://maps\.app\.goo\.gl/[A-Za-z0-9]+$",
        re.IGNORECASE,
    )
    return bool(re.match(maps_url_pattern, clipboard_content))


@evaluator(env_name="ubuntu", local=True)
def check_contain_input_text(text: str, env) -> bool:
    """
    Check if the input text is contained in the written text action in a case-insensitive manner.

    Args:
        text (str): The text to check for.
        env: The current testing environment, used to access the trajectory.

    Returns:
        bool: True if the input text is found in the written text action, False otherwise.
    """
    if env.trajectory:
        inputs = [
            params["text"].lower()
            for action_name, params, _ in env.trajectory
            if action_name == "write_text"
        ]
        return any(text.lower() in input_text for input_text in inputs)
    return False


@evaluator(env_name="ubuntu")
def verify_country_data_in_ods(country: str, file_path: str) -> bool:
    from bs4 import BeautifulSoup
    from pyexcel_ods import get_data

    def extract_population(text):
        # Use regex to extract the first sequence of numbers which possibly contains commas
        if text:
            match = re.search(r"\d{1,3}(?:,\d{3})*(?=\[|$)", text)
            if match:
                return match.group(0).replace(",", "")  # Remove commas
        return "0"

    def normalize_population(text):
        # Ensure the input is treated as a string, whether it's originally an int or str
        text = str(text)
        # Normalize the population string by removing non-digit characters
        return "".join(filter(str.isdigit, text))

    def fetch_country_data(country):
        country_norm = country.replace(" ", "_")  # Replace spaces with underscores
        url = f"https://en.wikipedia.org/wiki/{country_norm}"
        response = requests.get(url)
        soup = BeautifulSoup(response.content, "html.parser")

        infobox = soup.find("table", {"class": "infobox"})
        capital_city = None
        population = None

        if infobox:
            for row in infobox.find_all("tr"):
                header = row.find("th")
                if header:
                    header_text = header.text.strip()
                    if "Capital" in header_text:
                        capital_city = row.find("td").text.strip()
                        capital_city = " ".join(
                            capital_city.split()
                        )  # Normalize and clean up text
                    if "Population" in header_text:
                        if row.find("td"):
                            population_text = row.find("td").text.strip()
                        else:
                            next_row = row.find_next_sibling("tr")
                            if next_row and next_row.find("td"):
                                population_text = next_row.find("td").text.strip()
                        population = extract_population(population_text)

        return capital_city, population

    capital_city, population = fetch_country_data(country)

    if not capital_city or not population:
        return False

    # Load data from ODS file
    data = get_data(file_path)
    sheet = data[list(data.keys())[0]]  # Assume data is in the first sheet

    # Search for country and verify data
    for row in sheet:
        if row[0].lower() == country.lower():
            recorded_capital_city = row[1]
            recorded_population = normalize_population(row[2])
            # Check if the capital city and population in the sheet match Wikipedia
            if (
                recorded_capital_city in capital_city
                and recorded_population == population
            ):
                return True
            else:
                return False

    return True


ubuntu_subtasks = [
    SubTask(
        id="0f589bf9-9b26-4581-8b78-2961b115ab49",
        description='Open "{file_path}" using vim in a terminal, write "{content}", then save and exit vim.',
        attribute_dict={"file_path": "file_path", "content": "message"},
        output_type="file_path",
        output_generator=lambda file_path, content: file_path,
        evaluator_generator=lambda file_path, content: nx.path_graph(
            [
                check_current_window_process("gnome-terminal-server"),
                is_process_open("vim"),
                ~is_process_open("vim"),
                check_file_content(file_path, content),
            ],
            create_using=nx.DiGraph,
        ),
    ),
    SubTask(
        id="5b527839-0e58-426d-bab6-7160200b0d24",
        description='Get the content of "{file_path}" by printing it to the command line interface through a terminal',
        attribute_dict={"file_path": "file_path"},
        output_type="message",
        output_generator="manual",
        evaluator_generator=lambda file_path: nx.path_graph(
            [
                check_current_window_process("gnome-terminal-server"),
                check_contain_input_text("cat " + file_path),
            ],
            create_using=nx.DiGraph,
        ),
    ),
    SubTask(
        id="1c3bedc3-ea5a-453c-a15b-223d72ab756d",
        description='Submit content "{content}"',
        attribute_dict={"content": "message"},
        output_type="None",
        output_generator="manual",
        evaluator_generator=lambda content: nx.path_graph(
            [
                check_submit(content),
            ],
            create_using=nx.DiGraph,
        ),
        extra_action=[submit],
    ),
    SubTask(
        id="a313ea4d-e501-4971-b4fe-db2aad19eac1",
        description='Download a file from "{url}" to "{file_path}".',
        attribute_dict={"url": "url", "file_path": "file_path"},
        output_type="file_path",
        output_generator=lambda file_path, content: file_path,
        evaluator_generator=lambda url, file_path: nx.path_graph(
            [
                download_and_verify_file(url, file_path),
            ],
            create_using=nx.DiGraph,
        ),
    ),
    SubTask(
        id="a313ea4d-e501-4971-b4fe-db2aad19acsd",
        description='Download a file from the URL stored in the clipboard to "{file_path}".',
        attribute_dict={"file_path": "file_path"},
        output_type="file_path",
        output_generator=lambda file_path, content: file_path,
        evaluator_generator=lambda file_path: nx.path_graph(
            [
                download_from_clipboard_and_verify_file(file_path),
            ],
            create_using=nx.DiGraph,
        ),
    ),
    SubTask(
        id="017102b6-d2c3-466b-96f7-37c8bcddc41a",
        description='Use Firefox to search for an image using the keyword "{keyword}" and copy the URL of the image to the clipboard.',
        attribute_dict={"keyword": "keyword"},
        output_type="None",
        evaluator_generator=lambda keyword: path_graph(
            [
                check_text_in_current_window_name("Mozilla Firefox"),
                check_contain_input_text(keyword),
                is_img_url_in_clipboard(),
            ],
            create_using=DiGraph,
        ),
    ),
    SubTask(
        id="bcd03c9f-62c9-4001-8d86-78358c59ce22",
        description='Use Firefox to find a code repository about "{keyword}" in GitHub and copy the URL of the repository to the clipboard.',
        attribute_dict={"keyword": "keyword"},
        output_type="None",
        evaluator_generator=lambda keyword: path_graph(
            [
                check_text_in_current_window_name("GitHub — Mozilla Firefox"),
                check_contain_input_text(keyword),
                is_github_repo_url_in_clipboard(keyword),
            ],
            create_using=DiGraph,
        ),
    ),
    SubTask(
        id="a207ef38-b3b2-4c6c-a1e3-75c38162f5ba",
        description='Set "{photo_path}" as the screen background of the system',
        attribute_dict={"photo_path": "photo_path"},
        output_type="None",
        evaluator_generator=lambda photo_path: path_graph(
            [verify_background(photo_path)],
            create_using=DiGraph,
        ),
    ),
    SubTask(
        id="217ababc-ccc7-4b9f-af07-c239d92848fe",
        description='Create a new directory "{target_dir}" and copy all files with the specified "{file_extension}" extension from "{source_dir}" to the directory "{target_dir}".',
        attribute_dict={
            "file_extension": "file_extension",
            "source_dir": "dir_path",
            "target_dir": "dir_path",
        },
        output_type="message",
        evaluator_generator=lambda file_extension,
        source_dir,
        target_dir: nx.path_graph(
            [
                check_directory_exists(target_dir),
                verify_files_copied(source_dir, target_dir, file_extension),
            ],
            create_using=nx.DiGraph,
        ),
    ),
    SubTask(
        id="2b189dc2-c77f-4fa3-8432-ba4355cc294c",
        description='Use Firefox to find out a "{place_type}" around "{place_name}" on Google Maps and copy the Google Maps sharing URL of that "{place_type}" to the clipboard',
        attribute_dict={"place_type": "place_type", "place_name": "place_name"},
        output_type="None",
        evaluator_generator=lambda place_type, place_name: path_graph(
            [
                # check_current_window_process("firefox"),
                check_text_in_current_window_name("Google Maps — Mozilla Firefox"),
                check_contain_input_text_list([place_name, place_type]),
                is_google_maps_url_in_clipboard(),
            ],
            create_using=DiGraph,
        ),
    ),
    SubTask(
        id="cc1adae7-bef9-4c8a-865d-00d44486dd69",
        description='Use GIMP (GNU Image Manipulation Program) to adjust the brightness of the image from "{image_path_before_edit}" to a higher value (brighter) and save it to "{image_path_after_edit}".',
        attribute_dict={
            "image_path_before_edit": "photo_path",
            "image_path_after_edit": "photo_path",
        },
        output_type="photo_path",
        evaluator_generator=lambda image_path_before_edit,
        image_path_after_edit: nx.path_graph(
            [
                check_text_in_current_window_name("GNU Image Manipulation Program"),
                check_file_exist(image_path_after_edit),
                is_image_2_brighter(image_path_before_edit, image_path_after_edit),
            ],
            create_using=nx.DiGraph,
        ),
    ),
    SubTask(
        id="434402f3-647a-4a9a-9d8f-10f5bb6c7cf0",
        description='Use LibreOffice Impress to adjust the brightness of the image from "{image_path_before_edit}" to a lower value (darker) and save it to "{image_path_after_edit}".',
        attribute_dict={
            "image_path_before_edit": "photo_path",
            "image_path_after_edit": "photo_path",
        },
        output_type="photo_path",
        evaluator_generator=lambda image_path_before_edit,
        image_path_after_edit: nx.path_graph(
            [
                check_text_in_current_window_name("LibreOffice Impress"),
                check_file_exist(image_path_after_edit),
                ~is_image_2_brighter(image_path_before_edit, image_path_after_edit),
            ],
            create_using=nx.DiGraph,
        ),
    ),
    SubTask(
        id="4cf246ea-0a7f-43da-84b6-61d74a2699af",
        description='Combine two images from Image 1 "{image_path_1}" and Image 2 "{image_path_2} using GIMP (GNU Image Manipulation Program) and save the resulting image to "{output_path}". Image 1 should be placed on the left side of Image 2.',
        attribute_dict={
            "image_path_1": "photo_path_1",
            "image_path_2": "photo_path_2",
            "output_path": "photo_path_ouput",
        },
        output_type="photo_path",
        evaluator_generator=lambda image_path_1,
        image_path_2,
        output_path: nx.path_graph(
            [
                check_text_in_current_window_name("GNU Image Manipulation Program"),
                check_file_exist(output_path),
                verify_combined_image(image_path_1, image_path_2, output_path, "left"),
            ],
            create_using=nx.DiGraph,
        ),
    ),
    SubTask(
        id="0111384f-38ca-41a2-9504-cb1c55002b3c",
        description='Combine two images from Image 1 "{image_path_1}" and Image 2 "{image_path_2}" using LibreOffice Writer and save the resulting ODT file to "{output_path}". Image 1 should be placed above Image 2.',
        attribute_dict={
            "image_path_1": "photo_path_1",
            "image_path_2": "photo_path_2",
            "output_path": "file_path",
        },
        output_type="file_path",
        evaluator_generator=lambda image_path_1,
        image_path_2,
        output_path: nx.path_graph(
            [
                check_text_in_current_window_name("LibreOffice Writer"),
                check_file_exist(output_path),
                verify_combined_image(image_path_1, image_path_2, output_path, "above"),
            ],
            create_using=nx.DiGraph,
        ),
    ),
    SubTask(
        id="467f17a6-c42f-4eda-996f-a53385eb3efd",
        description='Combine two images from Image 1 "{image_path_1}" and Image 2 "{image_path_2}" using LibreOffice Impress and save the resulting file in PDF format to "{output_path}". Image 1 should be placed on the right side of Image 2.',
        attribute_dict={
            "image_path_1": "photo_path_1",
            "image_path_2": "photo_path_2",
            "output_path": "file_path",
        },
        output_type="file_path",
        evaluator_generator=lambda image_path_1,
        image_path_2,
        output_path: nx.path_graph(
            [
                check_text_in_current_window_name("LibreOffice Impress"),
                check_file_exist(output_path),
                verify_combined_image(image_path_1, image_path_2, output_path, "right"),
            ],
            create_using=nx.DiGraph,
        ),
    ),
    SubTask(
        id="49b614c5-c4bb-4c20-aab8-ab9dcc7de1b5",
        description="Find the example provided of torch.matmul by official PyTorch version 1.13 documentation using Firefox and copy all the lines of code in the example to the clipboard.",
        attribute_dict={},
        output_type="None",
        evaluator_generator=lambda: nx.path_graph(
            [
                check_text_in_current_window_name(
                    "torch.matmul — PyTorch 1.13 documentation — Mozilla Firefox"
                ),
                is_torch_matmul_example_copied_correctly(),
            ],
            create_using=DiGraph,
        ),
    ),
    SubTask(
        id="76de4bdb-c980-4b3a-9bd3-c87db467dffe",
        description='Paste clipboard content into LibreOffice Writer and save it as an ODT file at "{file_path}".',
        attribute_dict={"file_path": "file_path"},
        output_type="file_path",
        evaluator_generator=lambda file_path: path_graph(
            [
                check_text_in_current_window_name("LibreOffice Writer"),
                check_file_exist(file_path),
                verify_odt_file_content_with_clipboard(file_path),
            ],
            create_using=DiGraph,
        ),
    ),
    SubTask(
        id="8491e674-596b-452b-9e0e-58a44d90f947",
        description='Paste clipboard content into Visual Studio Code (VS Code) and save it as a file at "{file_path}".',
        attribute_dict={"file_path": "file_path"},
        output_type="file_path",
        evaluator_generator=lambda file_path: path_graph(
            [
                check_text_in_current_window_name("Visual Studio Code"),
                check_file_exist(file_path),
                verify_file_content_with_clipboard(file_path),
            ],
            create_using=DiGraph,
        ),
    ),
    SubTask(
        id="1cd6519a-9ee0-442b-ba5a-9238aeb00ff6",
        description='Use Firefox to search for the country "{country}" on Wikipedia, extract the capital city and population, and save this information in an ODS file at "{file_path}" with LibreOffice Calc. The first column will save the country name, the second will save the capital city name, and the third will save the population. No header is needed in the ODS file.',
        attribute_dict={"country": "country", "file_path": "file_path"},
        output_type="file_path",
        evaluator_generator=lambda country, file_path: nx.path_graph(
            [
                check_text_in_current_window_name("Wikipedia — Mozilla Firefox"),
                check_text_in_current_window_name("LibreOffice Calc"),
                check_file_exist(file_path),
                verify_country_data_in_ods(country, file_path),
            ],
            create_using=nx.DiGraph,
        ),
    ),
]


if __name__ == "__main__":
    generator = TaskGenerator(attribute_pool={})


================================================
FILE: crab-benchmark-v0/main.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import argparse
import logging
import warnings
from pathlib import Path
from typing import Literal

from crab import (
    BenchmarkConfig,
    Experiment,
    MessageType,
    TaskGenerator,
    create_benchmark,
)
from crab.actions.crab_actions import complete, wait
from crab.actions.visual_prompt_actions import (
    get_elements_prompt,
    groundingdino_easyocr,
)
from crab.agents.backend_models import BackendModelConfig
from crab.agents.policies import (
    MultiAgentByEnvPolicy,
    MultiAgentByFuncPolicy,
    SingleAgentPolicy,
)
from crab.core.agent_policy import AgentPolicy
from crab.core.benchmark import Benchmark

from .android_env import ANDROID_ENV
from .dataset.android_subtasks import android_subtasks
from .dataset.handmade_tasks import handmade_tasks
from .dataset.ubuntu_subtasks import ubuntu_subtasks
from .ubuntu_env import UBUNTU_ENV

warnings.filterwarnings("ignore")


class CrabBenchmarkV0(Experiment):
    def __init__(
        self,
        benchmark: Benchmark,
        task_id: str,
        agent_policy: AgentPolicy | Literal["human"],
        log_dir: Path | None = None,
    ) -> None:
        super().__init__(benchmark, task_id, agent_policy, log_dir)

    def get_prompt(self):
        observation, ob_prompt = self.benchmark.observe_with_prompt()

        # construct prompt
        result_prompt = {}
        for env in ob_prompt:
            if env == "root":
                continue
            screenshot = observation[env]["screenshot"]
            marked_screenshot, _ = ob_prompt[env]["screenshot"]
            result_prompt[env] = [
                (f"Here is the current screenshot of {env}:", MessageType.TEXT),
                (screenshot, MessageType.IMAGE_JPG_BASE64),
                (
                    f"Here is the screenshot with element labels of {env}:",
                    MessageType.TEXT,
                ),
                (marked_screenshot, MessageType.IMAGE_JPG_BASE64),
            ]
        return result_prompt


def get_benchmark(env: str, ubuntu_url: str):
    ubuntu_env = UBUNTU_ENV.model_copy()
    ubuntu_env.remote_url = ubuntu_url
    ubuntu_tool = {
        "screenshot": groundingdino_easyocr(font_size=16) >> get_elements_prompt
    }
    android_tool = {
        "screenshot": groundingdino_easyocr(font_size=40) >> get_elements_prompt
    }

    if env == "ubuntu":
        prompting_tools = {"ubuntu": ubuntu_tool}
        benchmark_config = BenchmarkConfig(
            name="ubuntu_benchmark",
            tasks=[],
            environments=[ubuntu_env],
            prompting_tools=prompting_tools,
            root_action_space=[complete, wait],
            multienv=True,
        )
    elif env == "android":
        prompting_tools = {"android": android_tool}
        benchmark_config = BenchmarkConfig(
            name="android_benchmark",
            tasks=[],
            environments=[ANDROID_ENV],
            prompting_tools=prompting_tools,
            root_action_space=[complete, wait],
            multienv=True,
        )
    elif env == "cross":
        prompting_tools = {
            "android": android_tool,
            "ubuntu": ubuntu_tool,
        }
        benchmark_config = BenchmarkConfig(
            name="ubuntu_android_benchmark",
            tasks=[],
            environments=[ubuntu_env, ANDROID_ENV],
            prompting_tools=prompting_tools,
            root_action_space=[complete, wait],
            multienv=True,
        )
    else:
        raise ValueError("Env not support")

    # Load from json config files by combining sub-tasks
    generator = TaskGenerator(subtasks=android_subtasks + ubuntu_subtasks)
    dir_path = (Path(__file__).parent / "dataset").resolve()
    tasks = []
    for task_json_files in dir_path.rglob("*.json"):
        task = generator.get_task_from_file(task_json_files)
        tasks.append(task)
    benchmark_config.tasks.extend(tasks)

    # Load from handmade tasks
    benchmark_config.tasks.extend(handmade_tasks)

    benchmark_config.step_limit = 20
    return create_benchmark(benchmark_config)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Script for running benchmark with an agent."
    )
    parser.add_argument(
        "--model",
        type=str,
        help="gpt4o, gpt4turbo, gemini, claude or human",
        default="gpt4o",
    )
    parser.add_argument(
        "--policy",
        type=str,
        help="single, multi-by-func, or multi-by-env",
        default="single",
    )
    parser.add_argument(
        "--ubuntu-url",
        type=str,
        help="remote url of Ubunutu environment",
        default="http://127.0.0.1:8000",
    )
    parser.add_argument(
        "--env",
        type=str,
        help="ubuntu, android or cross",
        default="cross",
    )
    parser.add_argument("--task-id", type=str, help="task id")
    parser.add_argument(
        "--model-base-url",
        type=str,
        help="URL of the model API",
        default="http://127.0.0.1:8000/v1",
    )
    parser.add_argument(
        "--model-api-key",
        type=str,
        help="API key of the model API",
        default="EMPTY",
    )
    parser.add_argument(
        "--loglevel",
        type=str,
        help="logger level, debug, info, warning, or error",
        default="warning",
    )
    parser.add_argument(
        "--history-messages-len",
        type=int,
        help="The number of rounds of chat history to provide to the model",
        default=2,
    )
    args = parser.parse_args()
    loglevel = args.loglevel
    numeric_level = getattr(logging, loglevel.upper(), None)
    if not isinstance(numeric_level, int):
        raise ValueError("Invalid log level: %s" % loglevel)
    logging.basicConfig(level=numeric_level)

    benchmark = get_benchmark(args.env, args.ubuntu_url)

    if args.model == "human":
        expeirment = CrabBenchmarkV0(
            benchmark=benchmark,
            task_id=args.task_id,
            agent_policy="human",
        )
        expeirment.start_benchmark()
        exit()

    if args.model == "gpt4o":
        model = BackendModelConfig(
            model_class="openai",
            model_name="gpt-4o",
            history_messages_len=args.history_messages_len,
        )
    elif args.model == "gpt4turbo":
        model = BackendModelConfig(
            model_class="openai",
            model_name="gpt-4-turbo",
            history_messages_len=args.history_messages_len,
        )
    elif args.model == "gemini":
        model = BackendModelConfig(
            model_class="gemini",
            model_name="gemini-1.5-pro-latest",
            history_messages_len=args.history_messages_len,
        )
    elif args.model == "claude":
        model = BackendModelConfig(
            model_class="claude",
            model_name="claude-3-opus-20240229",
            history_messages_len=args.history_messages_len,
        )
    elif args.model == "pixtral":
        model = BackendModelConfig(
            model_class="openai",
            model_name="mistralai/Pixtral-12B-2409",
            json_structre_output=True,
            history_messages_len=args.history_messages_len,
            base_url=args.model_base_url,
            api_key=args.model_api_key,
        )
    elif args.model == "gpt4o-wofc":
        model = BackendModelConfig(
            model_class="openai",
            model_name="gpt-4o",
            json_structre_output=True,
            history_messages_len=args.history_messages_len,
        )
    elif args.model == "llava-ov72b":
        model = BackendModelConfig(
            model_class="sglang",
            model_name="lmms-lab/llava-onevision-qwen2-72b-ov-chat",
            json_structre_output=True,
            history_messages_len=args.history_messages_len,
            base_url=args.model_base_url,
            api_key=args.model_api_key,
        )
    else:
        print("Unsupported model: ", args.model)
        exit()

    if args.policy == "single":
        agent_policy = SingleAgentPolicy(model_backend=model)
    elif args.policy == "multi-by-func":
        agent_policy = MultiAgentByFuncPolicy(
            main_agent_model_backend=model, tool_agent_model_backend=model
        )
    elif args.policy == "multi-by-env":
        agent_policy = MultiAgentByEnvPolicy(
            main_agent_model_backend=model, env_agent_model_backend=model
        )
    else:
        print("Unsupported policy: ", args.policy)
        exit()

    log_dir = (Path(__file__).parent / "tianqi_logs").resolve()
    expeirment = CrabBenchmarkV0(
        benchmark=benchmark,
        task_id=args.task_id,
        agent_policy=agent_policy,
        log_dir=log_dir,
    )
    expeirment.start_benchmark()


================================================
FILE: crab-benchmark-v0/scripts/ubuntu_env_init.sh
================================================
#!/bin/bash

# Disable screen autolock
gsettings set org.gnome.desktop.screensaver lock-enabled false
gsettings set org.gnome.desktop.session idle-delay 0

# Disable automatic updates
sudo bash -c 'cat <<EOF > /etc/apt/apt.conf.d/20auto-upgrades
APT::Periodic::Update-Package-Lists "0";
APT::Periodic::Unattended-Upgrade "0";
EOF'

# Allow sudo without password for the current user
CURRENT_USER=$(whoami)
sudo bash -c "echo \"$CURRENT_USER ALL=(ALL) NOPASSWD: ALL\" | tee /etc/sudoers.d/$CURRENT_USER"

# Install required packages
sudo apt update
sudo apt install -y openssh-server git vim python3-pip xdotool python3-tk python3.10-venv

# Install pipx
python3 -m pip install pipx
python3 -m pipx ensurepath

# Modify .bashrc to alias python to python3 for the current user
echo 'alias python=python3' >> /home/$CURRENT_USER/.bashrc

# Reload .bashrc for the current user
source /home/$CURRENT_USER/.bashrc

# Install poetry using pipx
pipx install poetry

# Pull CRAB repo
if [ ! -d "/home/$CURRENT_USER/crab" ]; then
    git clone https://github.com/camel-ai/crab.git /home/$CURRENT_USER/crab/
fi

# Create poetry environment
cd /home/$CURRENT_USER/crab
poetry install -E server

# Change to X11 from Wayland
sudo sed -i 's/#WaylandEnable=false/WaylandEnable=false/g' /etc/gdm3/custom.conf
touch /home/$CURRENT_USER/.Xauthority

# Create the crab.service file with dynamic user and group
sudo bash -c "cat <<EOF > /etc/systemd/system/crab.service
[Unit]
Description=My Python Script Service
After=network.target

[Service]
WorkingDirectory=/home/$CURRENT_USER/crab/
ExecStart=/home/$CURRENT_USER/.local/bin/poetry run python -m crab.server.main --HOST 0.0.0.0
Restart=always
User=$CURRENT_USER
Group=$CURRENT_USER

[Install]
WantedBy=multi-user.target
EOF"

# Reload systemd to recognize the new service
sudo systemctl daemon-reload

# Enable and start the crab service
sudo systemctl enable crab.service

# Reboot the system to apply changes for X11
echo "System will reboot in 10 seconds to apply changes..."
sleep 10
sudo reboot

================================================
FILE: crab-benchmark-v0/ubuntu_env.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from crab.actions.desktop_actions import (
    click,
    double_click,
    key_press,
    press_hotkey,
    right_click,
    screenshot,
    search_application,
    write_text,
)
from crab.core import EnvironmentConfig

UBUNTU_ENV = EnvironmentConfig(
    name="ubuntu",
    action_space=[
        click,
        key_press,
        write_text,
        press_hotkey,
        search_application,
        right_click,
        double_click,
    ],
    observation_space=[screenshot],
    description="""An Ubuntu 22.04 Linux desktop operating system. The interface \
displays a current screenshot at each step and primarily supports interaction \
via mouse and keyboard. You must use searching functionality to open any \
application in the system. This device includes system-related applications \
including Terminal, Files, Text Editor, Vim, and Settings. It also features \
Firefox as the web browser, and the LibreOffice suite—Writer, Calc, and \
Impress. For communication, Slack is available. The Google account is \
pre-logged in on Firefox, synchronized with the same account used in the \
Android environment.""",
)


================================================
FILE: docs/Makefile
================================================
# Minimal makefile for Sphinx documentation
#

# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS    ?=
SPHINXBUILD   ?= sphinx-build
SOURCEDIR     = .
BUILDDIR      = _build

# Put it first so that "make" without argument is like "make help".
help:
	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

.PHONY: help Makefile

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


================================================
FILE: docs/conf.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Configuration file for the Sphinx documentation builder.
#
# For the full list of built-in configuration values, see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html

# -- Path setup --------------------------------------------------------------

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import sys
sys.path.insert(0, os.path.abspath('..'))

# -- Project information -----------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information

project = 'CRAB'
copyright = '2024, CAMEL-AI.org'
author = 'CAMEL-AI.org'
version = '0.1'
release = '0.1.2'

# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration

extensions = [
    'sphinx.ext.autodoc',
    'sphinx.ext.viewcode',
    'sphinx.ext.napoleon',
    'myst_parser',
]

templates_path = ['_templates']
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']


# -- Options for HTML output -------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output

html_theme = 'sphinx_book_theme'
html_favicon = '_static/favicon.png'
html_static_path = ['_static']
html_logo = "_static/CRAB_logo1.png"
html_title = "CRAB Documentation"
html_theme_options = {
    "repository_url": "https://github.com/camel-ai/crab",
    "use_repository_button": True,
}


================================================
FILE: docs/crab.benchmarks.rst
================================================
crab.benchmarks package
=======================

Submodules
----------

crab.benchmarks.template module
-------------------------------

.. automodule:: crab.benchmarks.template
   :members:
   :undoc-members:
   :show-inheritance:

Module contents
---------------

.. automodule:: crab.benchmarks
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/crab.client.rst
================================================
crab.client package
===================

Submodules
----------

crab.client.env module
----------------------

.. automodule:: crab.client.env
   :members:
   :undoc-members:
   :show-inheritance:

crab.client.openai\_interface module
------------------------------------

.. automodule:: crab.client.openai_interface
   :members:
   :undoc-members:
   :show-inheritance:

Module contents
---------------

.. automodule:: crab.client
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/crab.core.models.rst
================================================
crab.core.models package
========================

Submodules
----------

crab.core.models.action module
------------------------------

.. automodule:: crab.core.models.action
   :members:
   :undoc-members:
   :show-inheritance:

crab.core.models.benchmark\_interface module
--------------------------------------------

.. automodule:: crab.core.models.benchmark_interface
   :members:
   :undoc-members:
   :show-inheritance:

crab.core.models.config module
------------------------------

.. automodule:: crab.core.models.config
   :members:
   :undoc-members:
   :show-inheritance:

crab.core.models.evaluator module
---------------------------------

.. automodule:: crab.core.models.evaluator
   :members:
   :undoc-members:
   :show-inheritance:

crab.core.models.task module
----------------------------

.. automodule:: crab.core.models.task
   :members:
   :undoc-members:
   :show-inheritance:

Module contents
---------------

.. automodule:: crab.core.models
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/crab.core.rst
================================================
crab.core package
=================

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   crab.core.models

Submodules
----------

crab.core.benchmark module
--------------------------

.. automodule:: crab.core.benchmark
   :members:
   :undoc-members:
   :show-inheritance:

crab.core.decorators module
---------------------------

.. automodule:: crab.core.decorators
   :members:
   :undoc-members:
   :show-inheritance:

crab.core.environment module
----------------------------

.. automodule:: crab.core.environment
   :members:
   :undoc-members:
   :show-inheritance:

crab.core.exceptions module
---------------------------

.. automodule:: crab.core.exceptions
   :members:
   :undoc-members:
   :show-inheritance:

crab.core.graph\_evaluator module
---------------------------------

.. automodule:: crab.core.graph_evaluator
   :members:
   :undoc-members:
   :show-inheritance:

crab.core.task\_generator module
--------------------------------

.. automodule:: crab.core.task_generator
   :members:
   :undoc-members:
   :show-inheritance:

crab.core.vagrant\_manager module
---------------------------------

.. automodule:: crab.core.vagrant_manager
   :members:
   :undoc-members:
   :show-inheritance:

Module contents
---------------

.. automodule:: crab.core
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/crab.environments.rst
================================================
crab.environments package
=========================

Submodules
----------

crab.environments.android module
--------------------------------

.. automodule:: crab.environments.android
   :members:
   :undoc-members:
   :show-inheritance:

crab.environments.linux module
------------------------------

.. automodule:: crab.environments.linux
   :members:
   :undoc-members:
   :show-inheritance:

crab.environments.template module
---------------------------------

.. automodule:: crab.environments.template
   :members:
   :undoc-members:
   :show-inheritance:

Module contents
---------------

.. automodule:: crab.environments
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/crab.rst
================================================
crab package
============

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   crab.benchmarks
   crab.client
   crab.core
   crab.environments
   crab.server

Module contents
---------------

.. automodule:: crab
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/crab.server.controller.rst
================================================
crab.server.controller package
==============================

Submodules
----------

crab.server.controller.benchmark module
---------------------------------------

.. automodule:: crab.server.controller.benchmark
   :members:
   :undoc-members:
   :show-inheritance:

crab.server.controller.environment module
-----------------------------------------

.. automodule:: crab.server.controller.environment
   :members:
   :undoc-members:
   :show-inheritance:

Module contents
---------------

.. automodule:: crab.server.controller
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/crab.server.rst
================================================
crab.server package
===================

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   crab.server.controller

Submodules
----------

crab.server.api module
----------------------

.. automodule:: crab.server.api
   :members:
   :undoc-members:
   :show-inheritance:

crab.server.config module
-------------------------

.. automodule:: crab.server.config
   :members:
   :undoc-members:
   :show-inheritance:

crab.server.data module
-----------------------

.. automodule:: crab.server.data
   :members:
   :undoc-members:
   :show-inheritance:

crab.server.exception\_handlers module
--------------------------------------

.. automodule:: crab.server.exception_handlers
   :members:
   :undoc-members:
   :show-inheritance:

crab.server.logger module
-------------------------

.. automodule:: crab.server.logger
   :members:
   :undoc-members:
   :show-inheritance:

crab.server.main module
-----------------------

.. automodule:: crab.server.main
   :members:
   :undoc-members:
   :show-inheritance:

crab.server.middleware module
-----------------------------

.. automodule:: crab.server.middleware
   :members:
   :undoc-members:
   :show-inheritance:

crab.server.utils module
------------------------

.. automodule:: crab.server.utils
   :members:
   :undoc-members:
   :show-inheritance:

Module contents
---------------

.. automodule:: crab.server
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/crab_benchmark_v0/environment_gcp_setup.md
================================================
# Google cloud platform setup

## Setup and Start the VM Instance

The development image is hosted in the project `capable-vista-420022` with image name `crab-benchmark-v0-1`.

You can use [gcloud](https://cloud.google.com/sdk/docs/install) to create an instance from this image.

First install [gcloud](https://cloud.google.com/sdk/docs/install), then create an instance using the following command:

```bash
gcloud compute instances create \
crab-instance \
--zone=us-central1-a \
--machine-type=n2-standard-8 \
--image=https://www.googleapis.com/compute/v1/projects/capable-vista-420022/global/images/crab-benchmark-v0-1 \
--enable-nested-virtualization
# You can change instance name, zone, machine type as you want.
# Remember that the CPU must support nested virtualization and should have at least 32G memory.
# This setting costs around 0.4$ per hour.
```

After creating the instance, you can connect it using SSH.

User account information:

* user: `root`; password: `crab`
* user: `crab`; password: `crab`

**IMPORTANT: You must switch to user `crab` before setting up remote desktop.** Use `sudo su crab`.

## Connect the Instance through a remote desktop service

You need to connect the server to a display to set up the experiment environment because the Ubuntu virtual machine and the Android emulator require GUI operations.

There are many possible remote desktop products you can use. Here, we provide instructions for [Google Remote Desktop](https://remotedesktop.google.com/access/), which was used to run our experiment.

1. Go to [Google Remote Desktop Headless](https://remotedesktop.google.com/headless). Click **Begin** -> **Next** -> **Authorize**. On the resulting page, copy the command from the `Debian Linux` section.
2. Connect to the VM instance through SSH, paste the copied command, and run it. You will be prompted to set a six-digit PIN.
3. Go to [Google Remote Desktop Access](https://remotedesktop.google.com/access). You should see a remote device marked as online. Click it and enter the PIN. You will then see the desktop of the VM instance.

================================================
FILE: docs/crab_benchmark_v0/environment_local_setup.md
================================================
# Local setup

## Install CRAB

First you should install `poetry`, a modern python dependency management tool.

Then pull the crab repo and install:

```bash
git clone https://github.com/camel-ai/crab

cd crab
poetry install -E client
```

## Install Ubuntu VM

**IMPORTANT: If you are using an Ubuntu VM, the Python version in the VM must match the Python version on the host machine. If you follow this instruction to install Ubuntu, the Python version in the VM will be 3.10.12. Consider using `conda` or `pyenv` to install the same Python version on the host machine.**

Install `virt-manager`. If you are using Ubuntu or Debian, try `sudo apt install virt-manager`.

Download [Ubuntu 22.04 image](https://releases.ubuntu.com/jammy/ubuntu-22.04.4-desktop-amd64.iso), then create a new machine with at least 8G RAM and 30G disk in virt-manager using the image. Follow the instruction and complete the installation. (It's better to use `crab` as the main user name.)

After install Ubuntu, you should install crab-server on it and do necessary initilization. In Ubuntu VM, run

```bash
git clone https://github.com/camel-ai/crab.git ~/crab/
cd ~/crab/crab-benchmark-v0/scripts
chmod +x ubuntu_env_init.sh
./ubuntu_env_init.sh
```

The VM will reboot after initilization. After rebooting, remember its ip address.


## Install ADB

Download and install ADB from its [official website](https://developer.android.com/tools/releases/platform-tools).

## Install Android Emulator

You can use emulators in [Android Studio](https://developer.android.com/studio) to simulate an Android device if you
don't want to use a physical one.

To create a new virtual device, open Android Studio and use its built-in device manager to create a Pixel 8 Pro with
system image release "R".

> Note that the benchmark on our side runs on a Google Pixel 8 Pro with system image release "R". However, cases are
> noticed that Google API Level 30 may not work properly when trying to enable USB debugging mode. If such issues are 
> encountered, you can try switch to releases of lower API levels (e.g. "Q").

![](./assets/android_1.png)

![](./assets/android_2.png)

Then you can boot the device. To check if it's all set, run

```shell
adb devices
```

You should see the device in the list.

> Important: ADB won't work normally if you see an `unauthorized` tag after the device ID. To solve this, enable both
> the developer mode and USB debugging mode in the device.

================================================
FILE: docs/crab_benchmark_v0/get_started.md
================================================
# Get started

`crab-benchmark-v0` is a benchmark released with the crab framework to provide a standard usage. It includes two virtual machine environments: an Android smartphone and an Ubuntu desktop computer, with 100 tasks and 59 different evaluator functions in the dataset. It effectively evaluates the MLM-based agents' performance on operating real-world tasks across multiple platforms.

## Concept

Our benchmark contains two important parts: **Environments** and **Tasks**.

#### Environment

Since our Ubuntu environment is built upon KVM, setting it up locally requires you an experienced Linux user to deal with many small and miscellaneous issues. Therefore, we provide two environment setup methods:

* [Local setup](./environment_local_setup.md) provides you a step-by-step guideline to build environments on a Linux Machine with **at least one monitor and 32G memory**, but it doesn't cover details like how to install KVM on your machine because they are various on different Linux distros.
* For those who want a quicker setup, we also provide a setup through [Google Clould Platform](./environment_gcp_setup.md). Specifically, we publish a disk image contains all required software and configurations on google cloud, you can use your own google account to create a cloud computer through this disk image and use [google remote desktop](https://remotedesktop.google.com/access/) to connect to it. This method doesn't have any hardware limitations and when you set it up you can run the experiment immediately. As a tradeoff, the cloud computer that meets the minimum hardware requirement costs around $0.4 per hour (depend on the machine zone).

We connect to the Android environment via ADB, so any Android device, from an emulator to a physical smartphone, will work. You should ensure ADB is installed on your system and can be directly called through the command line. In our experiment, we used the built-in emulator of [Android Studio](https://developer.android.com/studio) to create a Google Pixel 8 Pro virtual device with the release name \textit{R} and installed necessary extra Apps.

#### Task

We manage our task dataset using a CRAB-recommended method. Sub-tasks are defined through Pydantic models written in Python code, and composed tasks are defined in JSON format, typically combining several sub-tasks. The sub-tasks are defined in [android_subtasks](https://github.com/camel-ai/crab/tree/main/crab-benchmark-v0/dataset/android_subtasks.py) and [ubuntu_subtasks](https://github.com/camel-ai/crab/tree/main/crab-benchmark-v0/dataset/ubuntu_subtasks.py). The JSON files storing composed tasks are categorized into [android](https://github.com/camel-ai/crab/tree/main/crab-benchmark-v0/dataset/android/), [ubuntu](https://github.com/camel-ai/crab/tree/main/crab-benchmark-v0/dataset/ubuntu/), and [cross-platform](https://github.com/camel-ai/crab/tree/main/crab-benchmark-v0/dataset/cross/). The tasks in android and ubuntu directories are single-environment task and those in cross directory are cross-environment tasks. Additionally, we create several tasks by hand instead of composing sub-tasks to provide semantically more meaningful tasks, which are found in [handmade tasks](https://github.com/camel-ai/crab/tree/main/crab-benchmark-v0/dataset/handmade_tasks.py).

## Experiment

After setting up the environment, you can start the experiment. A brief overview of the experiment is as follows:

1. Open the Ubuntu environment virtual machine and the Android environment emulator.
2. Start the CRAB server in the Ubuntu environment and get its IP address and port. Let's say they are `192.168.122.72` and `8000`.
3. Choose a task. As an example, we take the task with ID `a3476778-e512-40ca-b1c0-d7aab0c7f18b` from [handmade_tasks](https://github.com/camel-ai/crab/tree/main/crab-benchmark-v0/dataset/handmade_tasks.py). The task is: "Open the 'Tasks' app on Android, check the first incomplete task, then perform the task according to its description."
4. Run [main.py](./main.py) with the command `poetry run python -m crab-benchmark-v0.main --model gpt4o --policy single --remote-url http://192.168.122.72:8000 --task-id a3476778-e512-40ca-b1c0-d7aab0c7f18b`. In this command, `--model gpt4o` and `--policy single` determine the agent system, `--remote-url` specifies the Ubuntu environment interface, and `--task-id` indicates the task to be performed.


================================================
FILE: docs/get_started/build_your_own_benchmark.md
================================================
# Build your own benchmark

## Overview

![](../assets/benchmark_config.png)

Crab benchmark system mainly consists of five types of component:

* `Action`: The fundamental building block of Crab framework, which represents a unit operation that can be taken by an agent or as a fixed process that called multi times in a benchmark.
* `Evaluator`: A specific type of `Action` that assess whether an agent has achieved its goal. Multiple evaluators can be combined together as a graph to enable complex evaluation.
* `Environment` A abstraction of an environment that the agent can take action and obverse in a given action and observation space. An environment can be launched on the local machine, a physical remote machine, or a virtual machine.
* `Task`: A task with a natural language description to instruct the agent to perform. It can include interaction with multiple environments. Notice that in the benchmark, a task should have an graph evaluator to judge if the task progress.
* `Benchmark`: The main body of the crab system that contains all required component to build a benchmark, including environments, tasks, prompting method. It controls several 

## Actions

Actions are the fundamental building blocks of the Crab system's operations.  Each action is encapsulated as an instance of the `Action` class. An action can convert into a JSON schema for language model agents to use.

An action is characterized by the following attributes:

- **Name**: A string identifier uniquely represents the action.
- **Entry**: A callable entry point to the actual Python function that executes the action.
- **Parameters**: A Pydantic model class that defines the input parameters the action accepts.
- **Returns**: A Pydantic model class that defines the structure of the return type the action produces.
- **Description**: An string providing a clear and concise description of what the action does and how it behaves.
- **Kept Parameters**: A list of parameters retained for internal use by the Crab system, which do not appear in the action's parameter list but are injected automatically at runtime. For exmaple we use `env` to represent the current environment object that action are taken in.
- **Environment Name**: An optional string that can specify the environment the action is associated with. Usually this attribute is only used by predifined actions like `setup` in an environment.

Here is an example of creating an action through python function:

```python
@action
def click(x: float, y: float) -> None:
    """
    click on the current desktop screen.

    Args:
        x (float): The X coordinate, as a floating-point number in the range [0.0, 1.0].
        y (float): The Y coordinate, as a floating-point number in the range [0.0, 1.0].

    """
    import pyautogui

    pyautogui.click(x,y)
```

The `@action` decorator transforms the `click` function into an `Action` with these mappings:

- The function name `click` becomes the action **name**.
- The parameters `x: float, y: float` with their type hints become the action **parameters**.
- The return type hint `-> None` is used for the action's **returns** field, indicating no value returned.
- The function's docstring provides a **description** for the action and its parameters, utilized in the JSON schema for the agent.
- The function body defines the action's behavior, executed when the action is called.


The `Action` class allows for different combination operations such as:

- **Pipe**: Using the `>>` operator, actions can be piped together, where the output of one action becomes the input to another, provided their parameters and return types are compatible.
- **Sequential Combination**: The `+` operator allows for two actions to be combined sequentially, executing one after the other.

## Evaluators

Evaluators in the Crab system are a specific type of `Action` that assess whether an agent has achieved its goal. They should return a boolean value, indicating whether the task's objective has been met. Multiple evaluators can be connected into a graph using the `networkx` package, enabling multi-stage evaluation, where different conditions can be checked in sequence or in parallel.

An example evaluator `check_file_exist` confirms the presence of a file at a given path, using the `os.path.isfile` method to return `True` if the file exists or `False` otherwise:

```python
@evaluator
def check_file_exist(file_path: str) -> bool:
    return os.path.isfile(file_path)
```

Extra attributes of evaluators:

- **Require Submit**: Indicates if the evaluator awaits a specific submission to carry out its assessment.

Logical operators allow for evaluator combinations:

- **AND (&)**: Requires all evaluators to succeed for a task to pass.
- **OR (|)**: Passes if any of the evaluators succeed.
- **NOT (~)**: Reverses the evaluation outcome.

The combined evaluator is still considered as **one evaluator** rather than a graph evaluator.


================================================
FILE: docs/get_started/quickstart.md
================================================
# Quickstart

The `Benchmark` class is a comprehensive framework for evaluating language model agents across various tasks and environments. It provides a flexible structure to manage multiple environments and tasks, offering single and multi-environment execution modes.

The following image shows an overview of how `Benchmark` works.

![](../assets/crab_overview.png)

## Basic Usage

### Step 1: Importing the Benchmark

Begin by importing the predefined benchmark from the `crab.benchmarks` module. For exmple, here we import `template_benchmark_config`:

```python
from crab.benchmarks import template_benchmark_config
```

### Step 2: Creating the Benchmark

Use the `create_benchmark` function to create an instance of a `Benchmark` class based on the imported benchmark configuration:

```python
from crab import create_benchmark

benchmark = create_benchmark(template_benchmark_config)
```

### Step 3: Starting a Task

Select a task to start within the benchmark. The task ID should correspond to one of the predefined tasks in the benchmark configuration. Use the `start_task` method to initialize and begin the task:

```python
# Starting the task with ID "0"
task, action_space = benchmark.start_task("0")
```

### Step 4: Running the Benchmark Loop

Execute actions and observe the results using the `step` and `observe` methods:

```python
from crab.client.openai_interface import OpenAIAgent

# Initialize the agent by benchmark task and action_space
agent = OpenAIAgent(task, action_space)

# Define a function to run the benchmark
def run_benchmark(benchmark, agent):
    for step in range(20):  # Define the number of steps as per your requirements
        print("=" * 40)
        print(f"Starting step {step}:")

        # Get the current observations and prompts
        observation = benchmark.observe()

        # Process the observations and determine the next action
        action_result = agent.determine_next_action(observation)
        
        # Execute the action and get the result
        step_result = benchmark.step(action_result.action, action_result.parameters)

        # Check current evaluation result.
        print(step_result.evaluation_results)

        # Check if the task is terminated and break the loop if so
        if step_result.terminated:
            print("Task completed successfully.")
            print(step_result.evaluation_results)
            break

run_benchmark(benchmark, agent)
```

### Step 5: Completing the Benchmark

Clean up and reset the benchmark after completion using the`reset`:

```python
benchmark.reset()
```


================================================
FILE: docs/index.rst
================================================
.. Crab documentation master file, created by
   sphinx-quickstart on Thu May  2 10:58:47 2024.
   You can adapt this file completely to your liking, but it should at least
   contain the root `toctree` directive.

Welcome to Crab's documentation!
================================

.. toctree::
   :maxdepth: 1
   :caption: Get Started with CRAB:
   :name: get_started

   get_started/quickstart.md
   get_started/build_your_own_benchmark.md

.. toctree::
   :maxdepth: 1
   :caption: CRAB Benchmark-v0:
   :name: crab_benchmark_v0

   crab_benchmark_v0/get_started.md
   crab_benchmark_v0/environment_gcp_setup.md
   crab_benchmark_v0/environment_local_setup.md

.. toctree::
   :maxdepth: 2
   :caption: API Reference:

   modules


Indices and tables
==================

* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`


================================================
FILE: docs/make.bat
================================================
@ECHO OFF

pushd %~dp0

REM Command file for Sphinx documentation

if "%SPHINXBUILD%" == "" (
	set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=.
set BUILDDIR=_build

%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
	echo.
	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
	echo.installed, then set the SPHINXBUILD environment variable to point
	echo.to the full path of the 'sphinx-build' executable. Alternatively you
	echo.may add the Sphinx directory to PATH.
	echo.
	echo.If you don't have Sphinx installed, grab it from
	echo.https://www.sphinx-doc.org/
	exit /b 1
)

if "%1" == "" goto help

%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end

:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%

:end
popd


================================================
FILE: docs/modules.rst
================================================
crab
====

.. toctree::
   :maxdepth: 4

   crab


================================================
FILE: examples/multi_env.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from termcolor import colored

from crab import Benchmark, create_benchmark
from crab.agents.backend_models import OpenAIModel
from crab.agents.policies import SingleAgentPolicy
from crab.benchmarks.template import multienv_template_benchmark_config


def start_benchmark(benchmark: Benchmark, agent: SingleAgentPolicy):
    for step in range(20):
        print("=" * 40)
        print(f"Start agent step {step}:")
        observation = benchmark.observe()
        print(f"Current enviornment observation: {observation}")
        prompt = {}
        for env, obs in observation.items():
            if env == "root":
                continue
            state = obs["current_state"]
            prompt[env] = [(f"The state of {env} is {state}", 0)]
        response = agent.chat(observation=prompt)
        print(colored(f"Agent take action: {response}", "blue"))

        for action in response:
            response = benchmark.step(
                action=action.name,
                parameters=action.arguments,
                env_name=action.env,
            )
            print(
                colored(
                    f'Action "{action.name}" success, stat: '
                    f"{response.evaluation_results}",
                    "green",
                )
            )
            if response.terminated:
                print("=" * 40)
                print(
                    colored(
                        f"Task finished, result: {response.evaluation_results}", "green"
                    )
                )
                return


if __name__ == "__main__":
    benchmark = create_benchmark(multienv_template_benchmark_config)
    task, action_space = benchmark.start_task("0")
    env_descriptions = benchmark.get_env_descriptions()

    agent = SingleAgentPolicy(model_backend=OpenAIModel("gpt-4o"))
    agent.reset(task.description, action_space, env_descriptions)
    print("Start performing task: " + colored(f'"{task.description}"', "green"))
    start_benchmark(benchmark, agent)
    benchmark.reset()


================================================
FILE: examples/single_env.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from termcolor import colored

from crab import Benchmark, create_benchmark
from crab.agents.backend_models import OpenAIModel
from crab.agents.policies import SingleAgentPolicy
from crab.benchmarks.template import template_benchmark_config


def start_benchmark(benchmark: Benchmark, agent: SingleAgentPolicy):
    for step in range(20):
        print("=" * 40)
        print(f"Start agent step {step}:")
        observation = benchmark.observe()["template_env"]
        print(f"Current enviornment observation: {observation}")
        response = agent.chat(
            {
                "template_env": [
                    (f"Current enviornment observation: {observation}", 0),
                ]
            }
        )
        print(colored(f"Agent take action: {response}", "blue"))

        for action in response:
            response = benchmark.step(
                action=action.name,
                parameters=action.arguments,
                env_name=action.env,
            )
            print(
                colored(
                    f'Action "{action.name}" success, stat: '
                    f"{response.evaluation_results}",
                    "green",
                )
            )
            if response.terminated:
                print("=" * 40)
                print(
                    colored(
                        f"Task finished, result: {response.evaluation_results}", "green"
                    )
                )
                return


if __name__ == "__main__":
    benchmark = create_benchmark(template_benchmark_config)
    task, action_space = benchmark.start_task("0")
    env_descriptions = benchmark.get_env_descriptions()

    agent = SingleAgentPolicy(model_backend=OpenAIModel("gpt-4o"))
    agent.reset(task.description, action_space, env_descriptions)
    print("Start performing task: " + colored(f'"{task.description}"', "green"))
    start_benchmark(benchmark, agent)
    benchmark.reset()


================================================
FILE: licenses/LICENSE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright 2023 @ CAMEL-AI.org

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.

================================================
FILE: licenses/license_template.txt
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========

================================================
FILE: licenses/update_license.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import os
import re
import sys
from pathlib import Path
from typing import List


# The license template file is hard-coded with specific start and end lines
def fine_license_start_line(lines: List[str], start_with: str) -> int:
    for i in range(len(lines)):
        if lines[i].startswith(start_with):
            return i
    return None


def find_license_end_line(lines: List[str], start_with: str) -> int:
    for i in range(len(lines) - 1, -1, -1):
        if lines[i].startswith(start_with):
            return i
    return None


def update_license_in_file(
    file_path: str,
    license_template_path: str,
    start_line_start_with: str,
    end_line_start_with: str,
) -> bool:
    with open(file_path, "r") as f:
        content = f.read()

    with open(license_template_path, "r") as f:
        new_license = f.read().strip()

    maybe_existing_licenses = re.findall(
        r"^#.*?(?=\n)", content, re.MULTILINE | re.DOTALL
    )
    start_index = fine_license_start_line(
        maybe_existing_licenses, start_line_start_with
    )
    end_index = find_license_end_line(maybe_existing_licenses, end_line_start_with)
    if start_index is not None and end_index is not None:
        maybe_existing_licenses = maybe_existing_licenses[start_index : end_index + 1]
    else:
        maybe_existing_licenses = None
    if maybe_existing_licenses:
        maybe_old_licenses = "\n".join(maybe_existing_licenses)
        if maybe_old_licenses.strip() != new_license.strip():
            replaced_content = content.replace(maybe_old_licenses, new_license)
            with open(file_path, "w") as f:
                f.write(replaced_content)
            print(f"Replaced license in {file_path}")
            return True
        else:
            return False
    else:
        with open(file_path, "w") as f:
            f.write(new_license + "\n" + content)
        print(f"Added license to {file_path}")
        return True


def update_license_in_directory(
    directory_path: str,
    license_template_path: str,
    start_line_start_with: str,
    end_line_start_with: str,
) -> None:
    # Check if directory exists
    if not os.path.isdir(directory_path):
        raise NotADirectoryError(f"{directory_path} is not a directory")
    # Check if license template exists
    if not os.path.isfile(license_template_path):
        raise FileNotFoundError(f"{license_template_path} not found")

    file_count = 0
    for py_files in Path(directory_path).rglob("*.py"):
        if py_files.name.startswith("."):
            continue
        if any(part.startswith(".") for part in py_files.parts):
            continue
        if any(part == "thirdparty" for part in py_files.parts):
            continue
        if update_license_in_file(
            py_files,
            license_template_path,
            start_line_start_with,
            end_line_start_with,
        ):
            file_count += 1

    print(f"License updated in {file_count} files")


if __name__ == "__main__":
    if len(sys.argv) < 3:
        print(
            "Usage from command line: "
            "python update_license.py <directory_path> <license_template_path>"
            "No valid input arguments found, please enter manually."
        )
        directory_path = input("Enter directory path: ")
        license_template_path = input("Enter license template path: ")
    else:
        directory_path = sys.argv[1]
        license_template_path = sys.argv[2]

    start_line_start_with = "# =========== Copyright"
    end_line_start_with = "# =========== Copyright"
    update_license_in_directory(
        directory_path,
        license_template_path,
        start_line_start_with,
        end_line_start_with,
    )


================================================
FILE: pyproject.toml
================================================
[build-system]
requires = ["poetry-core>=1.2.0", "wheel"]
build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "crab-framework"
version = "0.1.2"
description = "Cross-platform Agent Benchmark for Multimodal Embodied Language Model Agents."

authors = ["CAMEL-AI.org"]
maintainers = ["Tianqi Xu <tianqi.xu@kaust.edu.sa>"]

packages = [{ include = "crab" }]

readme = "README.md"
license = "Apache License 2.0"
repository = "https://github.com/camel-ai/crab"

[tool.poetry.dependencies]
python = "^3.10, <3.12"

# core
docstring-parser = "^0"
networkx = "^3"
dill = "^0.3.8"
pydantic = "^2.6"
lxml = "^5.2.2"
openai = "^1.12.0"
cryptography = "^43.0.0"
setuptools = "^73.0.1"
tenacity = "^9.0.0"

# desktop actions
pillow = "^10.2.0"
mss = "^9.0.1"
psutil = "^5.9.8"
pyautogui = "^0.9.3"
pyperclip = "^1.8.2"

# environment
python-vagrant = "^1.0.0"

# evaluation
pyexcel-ods = "^0.6.0"
odfpy = "^1.4.1"
beautifulsoup4 = "^4.12.3"
termcolor = "^2.4.0"
opencv-python = "^4.9.0.80"

# client
httpx = { version = "*", optional = true }

# agent
google-generativeai = { version = "^0.6.0", optional = true }
anthropic = { version = "^0.29.0", optional = true }
groq = { version = "^0.5.0", optional = true }
ollama = { version = "^0.2.0", optional = true }
camel-ai = { version = "^0.2", extras = ["all"], optional = true }

# text ocr
easyocr = { version = "^1.7.1", optional = true }

# visual prompt
transformers = { version = "4.44.1", optional = true }
torch = { version = "^2.4.0", optional = true }

# server
fastapi = { extras = ["all"], version = "0.109.1", optional = true }
pydantic-settings = { version = "^2", optional = true }
uvicorn = { extras = ["standard"], version = "^0.27.0.post1", optional = true }

# radar plot
plotly = { version = "^5.20.0", optional = true }

# types
types-pyautogui = "^0.9.3.20240106"
types-psutil = "^5.9.5.20240205"
types-networkx = "^3.2.1.20240210"

[tool.poetry.extras]
server = ["fastapi", "pydantic-settings", "uvicorn"]
client = [
    "httpx",
    "openai",
    "google-generativeai",
    "anthropic",
    "groq",
    "ollama",
    "easyocr",
    "plotly",
    "torch",
    "torchvision",
    "numpy",
    "opencv-python",
    "transformers",
    "addict",
    "yapf",
    "matplotlib",
    "pycocotools",
    "timm",
]
camel = ["camel-ai"]

[tool.poetry.group.dev.dependencies]
mypy = "^1.8.0"
pytest = "^8.0.0"
ruff = "^0.6.5"
ipykernel = "^6.29.3"
pandas = "^2.2.2"
sphinx = "^7"
myst-parser = "^4"
sphinx-book-theme = "*"
pre-commit = "^3.7.0"
certifi = "^2024.2.2"

[tool.ruff]
lint.select = ["E501", "E4", "E7", "E9", "F", "I"]
lint.ignore = ["E731"]
exclude = ["docs/"]

[[tool.mypy.overrides]]
module = ["dill", "easyocr", "google.generativeai.*"]
ignore_missing_imports = true


================================================
FILE: test/actions/test_visual_prompt_actions.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from pathlib import Path

import pytest
import requests
from PIL import Image

from crab.actions.visual_prompt_actions import (
    get_groundingdino_boxes,
    groundingdino_easyocr,
)
from crab.utils import image_to_base64


@pytest.mark.skip(reason="Too slow")
def test_get_groundingdino_boxes_single_image():
    url = "http://images.cocodataset.org/val2017/000000039769.jpg"
    image = Image.open(requests.get(url, stream=True).raw)
    text = "a cat."

    box_threshold = 0.4
    text_threshold = 0.3
    result = get_groundingdino_boxes(image, text, box_threshold, text_threshold)
    assert len(result) == 1
    assert len(result[0]) > 0
    assert len(result[0][0]) == 2


@pytest.mark.skip(reason="Too slow")
def test_get_groundingdino_boxes_multi_image():
    url1 = "http://images.cocodataset.org/val2017/000000039769.jpg"
    url2 = "https://farm5.staticflickr.com/4005/4666183752_c5b79faa17_z.jpg"
    image1 = Image.open(requests.get(url1, stream=True).raw)
    image2 = Image.open(requests.get(url2, stream=True).raw)
    text = "a cat. a car."

    box_threshold = 0.4
    text_threshold = 0.3
    result = get_groundingdino_boxes(
        [image1, image2], text, box_threshold, text_threshold
    )
    assert len(result) == 2
    assert len(result[0]) > 0
    assert len(result[1]) > 0
    assert len(result[0][0]) == 2


@pytest.mark.skip(reason="Too slow")
@pytest.mark.parametrize(
    "image_name", ["ubuntu_screenshot.png", "android_screenshot.png"]
)
def test_groundingdino_easy_ocr(image_name: str):
    class A:
        pass

    temp = A()

    test_dir = Path(__file__).parent.parent
    image_path = test_dir / "_assets" / image_name
    image = Image.open(image_path)
    image_base64 = image_to_base64(image)
    visual_prompt = groundingdino_easyocr(font_size=40).set_kept_param(env=temp)
    result_image, boxes = visual_prompt.run(input_base64_image=image_base64)
    assert result_image != image_base64
    assert boxes


================================================
FILE: test/agents/backend_models/test_camel_model.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import pytest

from crab import action
from crab.agents.backend_models import BackendModelConfig, create_backend_model


@pytest.fixture
def camel_model():
    return create_backend_model(
        BackendModelConfig(
            model_class="camel",
            model_name="gpt-4o",
            model_platform="openai",
            parameters={"max_tokens": 3000},
            history_messages_len=1,
        )
    )


@action
def add(a: int, b: int):
    """Add up two integers.

    Args:
        a: An addend
        b: Another addend
    """
    return a + b


@pytest.mark.skip(reason="Mock data to be added")
def test_action_chat(camel_model):
    camel_model.reset("You are a helpful assistant.", [add])
    message = (
        "I had 10 dollars. Miss Polaris gave me 15 dollars. "
        "How many money do I have now.",
        0,
    )
    output = camel_model.chat([message])
    assert not output.message
    assert len(output.action_list) == 1
    assert output.action_list[0].arguments == {"a": 10, "b": 15}
    assert output.action_list[0].name == "add"
    assert camel_model.token_usage > 0


================================================
FILE: test/agents/backend_models/test_claude_model.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import pytest

from crab import MessageType, action
from crab.agents.backend_models import BackendModelConfig, create_backend_model

# TODO: Add mock data


@pytest.fixture
def claude_model_text():
    return create_backend_model(
        BackendModelConfig(
            model_class="claude",
            model_name="claude-3-opus-20240229",
            parameters={"max_tokens": 3000},
            history_messages_len=1,
        )
    )


@action
def add(a: int, b: int):
    """Add up two integers.

    Args:
        a: An addend
        b: Another addend
    """
    return a + b


@pytest.mark.skip(reason="Mock data to be added")
def test_text_chat(claude_model_text):
    message = ("Hello!", MessageType.TEXT)
    output = claude_model_text.chat(message)
    assert output.message
    assert output.action_list is None
    assert claude_model_text.token_usage > 0

    # Send another message to check accumulated tokens and history length
    message2 = ("Give me five!", MessageType.TEXT)
    output = claude_model_text.chat(message2)
    assert claude_model_text.token_usage > 0
    assert output.message
    assert len(claude_model_text.chat_history) == 2

    # Send another message to check accumulated tokens and chat history
    output = claude_model_text.chat(message2)
    assert output.message
    assert len(claude_model_text.chat_history) == 3


@pytest.mark.skip(reason="Mock data to be added")
def test_action_chat(claude_model_text):
    claude_model_text.reset("You are a helpful assistant.", [add])
    message = (
        (
            "I had 10 dollars. Miss Polaris gave me 15 dollars."
            " How many money do I have now."
        ),
        0,
    )
    output = claude_model_text.chat(message)
    assert len(output.action_list) == 1
    args = output.action_list[0].arguments
    assert args["a"] + args["b"] == 25
    assert output.action_list[0].name == "add"
    assert claude_model_text.token_usage > 0


================================================
FILE: test/agents/backend_models/test_gemini_model.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import pytest

from crab import MessageType, action
from crab.agents.backend_models import BackendModelConfig, create_backend_model

# TODO: Add mock data


@pytest.fixture
def gemini_model_text():
    return create_backend_model(
        BackendModelConfig(
            model_class="gemini",
            model_name="gemini-1.5-pro-latest",
            parameters={"max_tokens": 3000},
            history_messages_len=1,
            tool_call_required=False,
        )
    )


@action
def add(a: int, b: int):
    """Add up two integers.

    Args:
        a: An addend
        b: Another addend
    """
    return a + b


@pytest.mark.skip(reason="Mock data to be added")
def test_text_chat(gemini_model_text):
    message = ("Hello!", MessageType.TEXT)
    output = gemini_model_text.chat(message)
    assert output.message
    assert output.action_list is None
    # assert gemini_model_text.token_usage > 0

    # Send another message to check accumulated tokens and history length
    message2 = ("Give me five!", MessageType.TEXT)
    output = gemini_model_text.chat(message2)
    # assert gemini_model_text.token_usage > 0
    assert output.message
    assert len(gemini_model_text.chat_history) == 2

    # Send another message to check accumulated tokens and chat history
    output = gemini_model_text.chat(message2)
    assert output.message
    assert len(gemini_model_text.chat_history) == 3


@pytest.mark.skip(reason="Mock data to be added")
def test_action_chat(gemini_model_text):
    gemini_model_text.reset("You are a helpful assistant.", [add])
    message = (
        (
            "I had 10 dollars. Miss Polaris gave me 15 dollars. "
            "How many money do I have now."
        ),
        0,
    )
    output = gemini_model_text.chat(message)
    assert output.message is None
    assert len(output.action_list) == 1
    assert output.action_list[0].arguments == {"a": 10, "b": 15}
    assert output.action_list[0].name == "add"


================================================
FILE: test/agents/backend_models/test_openai_model.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import os
from unittest.mock import MagicMock, patch

import pytest
from openai.types.chat.chat_completion_message_tool_call import Function

from crab import action
from crab.agents.backend_models import BackendModelConfig, create_backend_model
from crab.agents.backend_models.openai_model import MessageType

# Mock data for the OpenAI API response
openai_mock_response = MagicMock(
    choices=[
        MagicMock(
            finish_reason="stop",
            index=0,
            logprobs=None,
            message=MagicMock(
                content="Hi there! How can I assist you today?",
                role="assistant",
                function_call=None,
                tool_calls=None,
            ),
        )
    ],
    model="gpt-4o-2024-05-13",
    object="chat.completion",
    usage=MagicMock(completion_tokens=10, prompt_tokens=19, total_tokens=29),
)

openai_mock_response2 = MagicMock(
    choices=[
        MagicMock(
            finish_reason="stop",
            index=0,
            logprobs=None,
            message=MagicMock(
                content="Sure thing! ✋ How can I help you today?",
                role="assistant",
                function_call=None,
                tool_calls=None,
            ),
        )
    ],
    model="gpt-4o-2024-05-13",
    object="chat.completion",
    usage=MagicMock(completion_tokens=12, prompt_tokens=41, total_tokens=53),
)

openai_mock_response3 = MagicMock(
    choices=[
        MagicMock(
            finish_reason="stop",
            index=0,
            logprobs=None,
            message=MagicMock(
                content=None,
                role="assistant",
                function_call=None,
                tool_calls=[
                    MagicMock(
                        id="call_ceE9IX1uYeRqGShYYlHYrCCF",
                        function=Function(arguments='{"a":10,"b":15}', name="add"),
                        type="function",
                    )
                ],
            ),
        )
    ],
    model="gpt-4o-2024-05-13",
    object="chat.completion",
    usage=MagicMock(completion_tokens=15, prompt_tokens=93, total_tokens=108),
)


@pytest.fixture
def openai_model_text():
    os.environ["OPENAI_API_KEY"] = "MOCK"
    return create_backend_model(
        BackendModelConfig(
            model_class="openai",
            model_name="gpt-4o",
            parameters={"max_tokens": 3000},
            history_messages_len=1,
            tool_call_required=False,
        )
    )


@action
def add(a: int, b: int):
    """Add up two integers.

    Args:
        a: An addend
        b: Another addend
    """
    return a + b


@patch(
    "openai.resources.chat.completions.Completions.create",
    return_value=openai_mock_response,
)
def test_text_chat(mock_create, openai_model_text):
    message = ("Hello!", MessageType.TEXT)
    output = openai_model_text.chat(message)
    assert len(mock_create.call_args.kwargs["messages"]) == 2
    assert output.message == "Hi there! How can I assist you today?"
    assert output.action_list is None
    assert openai_model_text.token_usage == 29

    # Send another message to check accumulated tokens and history length
    message2 = ("Give me five!", MessageType.TEXT)
    mock_create.return_value = openai_mock_response2
    output = openai_model_text.chat(message2)
    assert len(mock_create.call_args.kwargs["messages"]) == 4
    assert openai_model_text.token_usage == 29 + 53
    assert output.message == "Sure thing! ✋ How can I help you today?"
    assert len(openai_model_text.chat_history) == 2

    # Send another message to check accumulated tokens and chat history
    output = openai_model_text.chat(message2)
    assert len(mock_create.call_args.kwargs["messages"]) == 4
    assert openai_model_text.token_usage == 29 + 53 + 53
    assert output.message == "Sure thing! ✋ How can I help you today?"
    assert len(openai_model_text.chat_history) == 3


@patch(
    "openai.resources.chat.completions.Completions.create",
    return_value=openai_mock_response3,
)
def test_action_chat(mock_create, openai_model_text):
    openai_model_text.reset("You are a helpful assistant.", [add])
    message = (
        (
            "I had 10 dollars. Miss Polaris gave me 15 dollars. "
            "How many money do I have now."
        ),
        0,
    )
    output = openai_model_text.chat(message)
    assert output.message is None
    assert len(output.action_list) == 1
    assert output.action_list[0].arguments == {"a": 10, "b": 15}
    assert output.action_list[0].name == "add"
    assert openai_model_text.token_usage == 108


================================================
FILE: test/agents/policies/test_multi_agent_by_func.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import pytest

from crab import create_benchmark
from crab.agents.backend_models import BackendModelConfig
from crab.agents.policies.multi_agent_by_func import MultiAgentByFuncPolicy
from crab.benchmarks.template import multienv_template_benchmark_config


@pytest.fixture
def policy_fixture():
    model = BackendModelConfig(
        model_class="openai",
        model_name="gpt-4o",
        parameters={"max_tokens": 3000},
        history_messages_len=1,
    )
    benchmark_config = multienv_template_benchmark_config
    benchmark = create_benchmark(benchmark_config)
    task, action_spaces = benchmark.start_task("0")
    policy = MultiAgentByFuncPolicy(
        main_agent_model_backend=model,
        tool_agent_model_backend=model,
    )
    policy.reset(
        task_description=task.description,
        action_spaces=action_spaces,
        env_descriptions=benchmark.get_env_descriptions(),
    )
    return policy, benchmark


@pytest.mark.skip(reason="Mock data to be added")
def test_policy(policy_fixture):
    policy, benchmark = policy_fixture
    observations = benchmark.observe()
    agent_observation = {}
    for env in observations:
        if env == "root":
            continue
        agent_observation[env] = [
            (
                f'The current state of "{env}" is '
                + str(observations[env]["current_state"])
                + ". ",
                0,
            )
        ]
    action_list = policy.chat(agent_observation)
    assert action_list


================================================
FILE: test/agents/policies/test_mutli_agent_by_env.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import pytest

from crab import create_benchmark
from crab.agents.backend_models import BackendModelConfig
from crab.agents.policies.multi_agent_by_env import MultiAgentByEnvPolicy
from crab.benchmarks.template import multienv_template_benchmark_config


@pytest.fixture
def policy_fixture():
    model = BackendModelConfig(
        model_class="openai",
        model_name="gpt-4o",
        parameters={"max_tokens": 3000},
        history_messages_len=1,
    )
    benchmark_config = multienv_template_benchmark_config
    benchmark = create_benchmark(benchmark_config)
    task, action_spaces = benchmark.start_task("0")
    policy = MultiAgentByEnvPolicy(
        main_agent_model_backend=model,
        env_agent_model_backend=model,
    )
    policy.reset(
        task_description=task.description,
        action_spaces=action_spaces,
        env_descriptions=benchmark.get_env_descriptions(),
    )
    return policy, benchmark


@pytest.mark.skip(reason="Mock data to be added")
def test_policy(policy_fixture):
    policy, benchmark = policy_fixture
    observations = benchmark.observe()
    agent_observation = {}
    for env in observations:
        if env == "root":
            continue
        agent_observation[env] = [
            (
                f'The current state of "{env}" is '
                + str(observations[env]["current_state"])
                + ". ",
                0,
            )
        ]
    action_list = policy.chat(agent_observation)
    assert action_list


================================================
FILE: test/agents/policies/test_single_agent.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import os
from unittest.mock import MagicMock, patch

import pytest
from openai.types.chat.chat_completion import (
    ChatCompletionMessage,
    Choice,
    CompletionUsage,
)
from openai.types.chat.chat_completion_message_tool_call import (
    ChatCompletionMessageToolCall,
    Function,
)

from crab import create_benchmark
from crab.agents.backend_models import BackendModelConfig
from crab.agents.policies.single_agent import SingleAgentPolicy
from crab.benchmarks.template import multienv_template_benchmark_config

openai_mock_response = MagicMock(
    choices=[
        Choice(
            finish_reason="stop",
            index=0,
            logprobs=None,
            message=ChatCompletionMessage(
                content=None,
                role="assistant",
                function_call=None,
                tool_calls=[
                    ChatCompletionMessageToolCall(
                        id="call_3YIJZhrC5smSjAJKOeFcQxRf",
                        function=Function(
                            arguments='{"value": true}', name="set_state__in__testenv0"
                        ),
                        type="function",
                    ),
                    ChatCompletionMessageToolCall(
                        id="call_mA9Z9HQfmYn2TbzeGsEVcCr7",
                        function=Function(
                            arguments='{"value": true}', name="set_state__in__testenv1"
                        ),
                        type="function",
                    ),
                    ChatCompletionMessageToolCall(
                        id="call_GgxbBTd6afj2iDyOewaNattB",
                        function=Function(
                            arguments='{"value": true}', name="set_state__in__testenv2"
                        ),
                        type="function",
                    ),
                ],
            ),
        )
    ],
    model="gpt-4o-2024-05-13",
    object="chat.completion",
    usage=CompletionUsage(completion_tokens=74, prompt_tokens=648, total_tokens=722),
)


@pytest.fixture
def policy_fixture():
    os.environ["OPENAI_API_KEY"] = "MOCK"
    model = BackendModelConfig(
        model_class="openai",
        model_name="gpt-4o",
        parameters={"max_tokens": 3000},
        history_messages_len=1,
    )
    benchmark_config = multienv_template_benchmark_config
    benchmark = create_benchmark(benchmark_config)
    task, action_spaces = benchmark.start_task("0")
    policy = SingleAgentPolicy(model_backend=model)
    policy.reset(
        task_description=task.description,
        action_spaces=action_spaces,
        env_descriptions=benchmark.get_env_descriptions(),
    )
    return policy, benchmark


@patch(
    "openai.resources.chat.completions.Completions.create",
    return_value=openai_mock_response,
)
def test_policy(mock_create: MagicMock, policy_fixture):
    policy, benchmark = policy_fixture
    observation = benchmark.observe()
    for env in observation:
        if env == "root":
            continue
        observation[env] = [
            (
                'The current state of "{env}" is '
                + str(observation[env]["current_state"])
                + ". ",
                0,
            )
        ]
    action_list = policy.chat(observation)
    mock_create.assert_called_once()
    assert action_list


================================================
FILE: test/core/test_action.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
from crab.core import Action, action
from crab.core.models.action import _check_no_param


@action
def dummy_function(a: int, b: str = "default") -> int:
    """
    This is a test function.

    Args:
        a (int): The first parameter.
        b (str, optional): The second parameter. Defaults to "default".

    Returns:
        int: The result.
    """
    return a + 1


@action
def dummy_env_action(a: int, env: int) -> int:
    """
    This is a kept parameter test function.

    Args:
        a (int): The first parameter.
        env (int): The current environemnt. Should not be appeared in the parameters.

    Returns:
        int: The result.
    """
    return a + env


def test_action_to_openai_json_schema():
    result = dummy_function.to_openai_json_schema()
    assert result["name"]
    assert result["description"]
    assert result["parameters"]

    parameters = result["parameters"]
    assert "properties" in parameters
    assert "a" in parameters["properties"]
    assert parameters["properties"]["a"]["type"] == "integer"
    assert "b" in parameters["properties"]
    assert parameters["properties"]["b"]["type"] == "string"
    assert parameters["properties"]["b"]["default"] == "default"
    assert "required" in parameters
    assert "a" in parameters["required"]


def test_from_function():
    action_instance: Action = dummy_function
    assert action_instance.description == "This is a test function."
    assert action_instance.name == "dummy_function"
    assert "a" in action_instance.parameters.model_fields
    assert "b" in action_instance.parameters.model_fields
    assert action_instance.name == "dummy_function"


def test_chaining():
    dummy_x2 = dummy_function >> dummy_function
    assert dummy_x2.entry(1) == 3


@action
def add_a_to_b(a: int, b: int = 1) -> int:
    return a + b


@action
def multiply_a_to_b(a: int, b: int = 1) -> int:
    return a * b


def test_closed_action():
    action = add_a_to_b(5)
    assert action.entry() == 6
    assert _check_no_param(action)


def test_kwargs_action():
    action = add_a_to_b(b=6)
    assert action.entry(1) == 7


def test_chain_various_actions():
    action = add_a_to_b(b=10) >> multiply_a_to_b(b=10) >> add_a_to_b()
    assert action.entry(0) == 101
    action = add_a_to_b(a=1, b=10) >> multiply_a_to_b(b=10) >> add_a_to_b()
    assert action.entry() == 111
    action = add_a_to_b(1, b=10) >> multiply_a_to_b(b=10) >> add_a_to_b()
    assert action.entry() == 111


def test_kept_param():
    action = dummy_env_action.set_kept_param(env=10)
    assert action.run(a=10) == 20


================================================
FILE: test/core/test_benchmark.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import pytest
from fastapi.testclient import TestClient

from crab import Benchmark, action, create_benchmark
from crab.benchmarks.template import (
    multienv_template_benchmark_config,
    template_benchmark_config,
    template_environment_config,
)
from crab.server.main import init


@pytest.fixture
def benchmark(request):
    if request.param == "multienv":
        yield create_benchmark(multienv_template_benchmark_config)
    elif request.param == "multienv-remote":
        # TODO: fix multienv remote use the same env in different remote envs
        app0 = init(environment_config=template_environment_config)
        client0 = TestClient(app0)
        app1 = init(environment_config=template_environment_config)
        client1 = TestClient(app1)
        app2 = init(environment_config=template_environment_config)
        client2 = TestClient(app2)
        proxy_config = multienv_template_benchmark_config.model_copy()
        for env in proxy_config.environments:
            env.remote_url = "http://127.0.0.1:8000"
        benchmark = create_benchmark(proxy_config)
        benchmark.environment_map["testenv0"]._client = client0
        benchmark.environment_map["testenv1"]._client = client1
        benchmark.environment_map["testenv2"]._client = client2
        yield benchmark
    elif request.param == "singleenv":
        yield create_benchmark(template_benchmark_config)


@pytest.mark.parametrize("benchmark", ["multienv", "multienv-remote"], indirect=True)
def test_multi_env_benchmark_process(benchmark: Benchmark):
    assert benchmark.multienv
    task, actions = benchmark.start_task(task_id="0")
    assert benchmark.current_task == task
    assert len(actions) == 4
    assert len(actions["root"]) == 1
    assert actions["root"][0].name == "_submit"

    result = benchmark.step(
        action="set_state", parameters={"value": True}, env_name="testenv0"
    )
    assert result.evaluation_results["completeness"] == 0.25

    result = benchmark.step(
        action="set_state", parameters={"value": True}, env_name="testenv1"
    )
    assert result.evaluation_results["completeness"] == 0.5

    result = benchmark.step(
        action="set_state", parameters={"value": True}, env_name="testenv2"
    )
    assert result.evaluation_results["completeness"] == 0.75

    result = benchmark.step(
        action="_submit", parameters={"content": True}, env_name="root"
    )
    assert result.terminated
    assert result.evaluation_results["completeness"] == 1.0


@action
def to_str(input: bool) -> str:
    return f"The current state is {input}"


@pytest.mark.parametrize("benchmark", ["singleenv"], indirect=True)
def test_prompting_tool(benchmark: Benchmark):
    benchmark.prompting_tools = {"template_env": {"current_state": to_str}}
    benchmark.start_task("0")
    observe, prompt = benchmark.observe_with_prompt()
    assert observe["template_env"]["current_state"] is False
    assert prompt["template_env"]["current_state"] == "The current state is False"
    benchmark.close_task()


================================================
FILE: test/core/test_evaluator.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
import networkx as nx
import pytest

from crab.core import Environment, Evaluator, GraphEvaluator, evaluator

a = None


def set_a(value: int) -> None:
    global a
    a = value


@evaluator
def dummy_evaluator1() -> bool:
    """
    This is a test evaluator.

    Args:
        a (int): The first parameter.

    Returns:
        bool: The result.
    """
    return a > 0


@evaluator
def dummy_evaluator2() -> bool:
    """
    This is a test evaluator.

    Args:
        a (int): The first parameter.
        b (str, optional): The second parameter. Defaults to "default".

    Returns:
        bool: The result.
    """
    return a < 2


@evaluator
def dummy_evaluator3() -> bool:
    """
    This is a test evaluator.

    Args:
        a (int): The first parameter.
        b (str, optional): The second parameter. Defaults to "default".

    Returns:
        bool: The result.
    """
    return a > 100


@evaluator
def no_param_evaluator() -> bool:
    return True


@pytest.fixture
def root_env() -> Environment:
    return Environment(
        name="root",
        action_space=[],
        observation_space=[],
        description="The crab root server",
    )


def test_evaluator_run():
    assert isinstance(dummy_evaluator1, Evaluator)
    set_a(3)
    assert dummy_evaluator1.entry()
    set_a(-1)
    assert not dummy_evaluator1.entry()


def test_evaluator_and():
    set_a(1)
    assert (dummy_evaluator1 & dummy_evaluator2).entry()
    set_a(-1)
    assert not (dummy_evaluator1 & dummy_evaluator2).entry()
    set_a(3)
    assert not (dummy_evaluator1 & dummy_evaluator2).entry()


def test_evaluator_or():
    set_a(1)
    assert (dummy_evaluator1 | dummy_evaluator2).entry()
    set_a(-1)
    assert (dummy_evaluator1 | dummy_evaluator2).entry()
    set_a(3)
    assert (dummy_evaluator1 | dummy_evaluator2).entry()


def test_evaluator_not():
    set_a(3)
    assert not (~dummy_evaluator1).entry()
    set_a(-1)
    assert (~dummy_evaluator1).entry()


def test_chain_evaluator(root_env):
    graph_evaluator = GraphEvaluator(
        nx.path_graph(
            [dummy_evaluator1, dummy_evaluator2, no_param_evaluator],
            create_using=nx.DiGraph,
        )
    )
    graph_evaluator.reset()
    assert graph_evaluator.count == 0
    assert graph_evaluator.G.nodes[dummy_evaluator1]["remaining_predecessors"] == 0
    assert graph_evaluator.G.nodes[dummy_evaluator2]["remaining_predecessors"] == 1
    assert graph_evaluator.G.nodes[no_param_evaluator]["remaining_predecessors"] == 1

    set_a(3)
    graph_evaluator.step({"root": root_env})
    assert graph_evaluator.count == 1
    assert graph_evaluator.G.nodes[dummy_evaluator1]["passing_count"] == 0
    assert graph_evaluator.G.nodes[dummy_evaluator2]["remaining_predecessors"] == 0

    set_a(3)
    graph_evaluator.step({"root": root_env})
    assert graph_evaluator.count == 2
    assert graph_evaluator.G.nodes[dummy_evaluator2]["remaining_predecessors"] == 0
    assert graph_evaluator.G.nodes[dummy_evaluator2]["passing_count"] is None

    set_a(-1)
    graph_evaluator.step({"root": root_env})
    assert graph_evaluator.count == 3
    assert graph_evaluator.G.nodes[dummy_evaluator2]["passing_count"] == 2
    assert graph_evaluator.G.nodes[no_param_evaluator]["remaining_predecessors"] == 0


================================================
FILE: test/core/test_utils.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========


import os

from crab.utils import decrypt_message, encrypt_message


def test_encrypt_decrypt():
    message = "Hello, World!"
    key = os.urandom(32)
    encrypted_message = encrypt_message(message, key)
    decrypted_message = decrypt_message(encrypted_message, key)
    assert decrypted_message == message


================================================
FILE: test/server/test_api.py
================================================
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========

import pytest
from fastapi.testclient import TestClient

from crab import create_environment
from crab.environments.template import (
    current_state,
    set_state,
    template_environment_config,
)
from crab.server.main import init


@pytest.fixture
def mock_env():
    mock_app = init(template_environment_config)
    mock_cli = TestClient(mock_app)
    mock_env = create_environment(template_environment_config)
    mock_env._client = mock_cli
    return mock_env


def test_raw_action_unencrypted(mock_env):
    assert mock_env._action_endpoint(set_state, {"value": True}) is None
    assert mock_env._action_endpoint(current_state, {}) is True
    assert mock_env._action_endpoint(set_state(True), {}) is None
    assert mock_env._action_endpoint(current_state >> set_state, {}) is None
    assert mock_env._action_endpoint(set_state(True) + current_state, {}) is True


def test_raw_action_encrypted(mock_env, monkeypatch):
    monkeypatch.setenv("ENCRYPTION_KEY", "the-cake-is-a-lie")
    assert mock_env._action_endpoint(set_state, {"value": True}) is None
    assert mock_env._action_endpoint(current_state, {}) is True
    assert mock_env._action_endpoint(set_state(True), {}) is None
    assert mock_env._action_endpoint(current_state >> set_state, {}) is None
    assert mock_env._action_endpoint(set_state(True) + current_state, {}) is True