Repository: DigiRL-agent/digirl Branch: master Commit: c74ec34029eb Files: 52 Total size: 270.4 KB Directory structure: gitextract_68uiztdu/ ├── .gitignore ├── LICENSE ├── README.md ├── __init__.py ├── digirl/ │ ├── algorithms/ │ │ ├── __init__.py │ │ ├── digirl/ │ │ │ ├── __init__.py │ │ │ └── trainer.py │ │ ├── eval_loop.py │ │ ├── filteredbc/ │ │ │ ├── __init__.py │ │ │ └── trainer.py │ │ ├── offpolicy_train_loop.py │ │ ├── parallel_utils.py │ │ └── worker_collect_loop.py │ ├── data/ │ │ ├── __init__.py │ │ └── utils.py │ ├── environment/ │ │ ├── __init__.py │ │ ├── android/ │ │ │ ├── __init__.py │ │ │ ├── assets/ │ │ │ │ └── task_set/ │ │ │ │ ├── general_test.txt │ │ │ │ ├── general_train.txt │ │ │ │ ├── webshop_test.txt │ │ │ │ └── webshop_train.txt │ │ │ ├── autoui_utils.py │ │ │ ├── client.py │ │ │ ├── env.py │ │ │ └── evaluate.py │ │ └── env_utils.py │ ├── misc.py │ └── models/ │ ├── __init__.py │ ├── autoui_agent.py │ ├── cog_agent.py │ ├── critic.py │ ├── infer_utils.py │ └── model.py ├── env_setup/ │ ├── README.md │ ├── config.ini │ └── screenshot.py ├── multimachine/ │ └── README.md ├── push.sh ├── requirements.txt ├── scripts/ │ ├── config/ │ │ ├── accelerate_config/ │ │ │ └── default_config.yaml │ │ ├── cogagent/ │ │ │ ├── default.yaml │ │ │ └── eval_only.yaml │ │ ├── main/ │ │ │ ├── default.yaml │ │ │ ├── digirl_off2on.yaml │ │ │ ├── digirl_offline.yaml │ │ │ ├── digirl_online.yaml │ │ │ └── eval_only.yaml │ │ └── multimachine/ │ │ ├── default.yaml │ │ ├── host.yaml │ │ └── worker.yaml │ └── run.py └── setup.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ # Ignore __pycache__ directories __pycache__/ # Ignore .egg-info directories *.egg-info/ # Ignore outputs directories scripts/outputs/ # Ignore wandb directories scripts/wandb/ ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================

🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉
Check out our latest progress of new offline RL algorithm for Android DigiQ and autonomous skill discovery for web agents Proposer-Agent-Evaluator.
🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉

logo

DigiRL: Training In-The-Wild Device-Control Agents with Autonomous Reinforcement Learning
Oral @ FM Wild, ICML
Neurips 2024

| Website | Demo | Results | Paper | Checkpoints | Data |

--- Research Code for preprint "DigiRL: Training In-The-Wild Device-Control Agents with Autonomous Reinforcement Learning". [Hao Bai*](https://jackgethome.com), [Yifei Zhou*](https://02.github.io/), [Mert Cemri](https://scholar.google.com/citations?user=sMEFwf8AAAAJ&hl=en), [Jiayi Pan](https://www.jiayipan.me/), [Alane Suhr](https://www.alanesuhr.com/), [Sergey Levine](https://people.eecs.berkeley.edu/~svlevine/), [Aviral Kumar](https://aviralkumar2907.github.io/)
UC Berkeley, UIUC, Google DeepMind
*Equal contribution, alphabetic order; work done at UC Berkeley

digirl-diagram

## 🍩 Features ### Environment Features - Auto-adaptive error handling support. - Multi-machine [emulation parallel](multimachine/README.md) support. - Checkpoint resuming support. - Trajectory video recording support. ### Approach Features - Two training algorithms proposed in the paper - DigiRL (automatic curriculum + doubly robust estimator filtering). - Filtered Behavior Cloning (reward-based filtering). - Three training modes: - Offline-only training: baseline apporach - use the AutoUI checkpoint to collect data (we have this data ready for you), then train with these pre-collected sub-optimal trajectories. This mode only allows evaluation using the checkpoint. - Online-only training: traditional RL approach - the AutoUI checkpoint simultaneously interacts with the environment learns online. This mode allows interactive training. - Offline-to-online training: the most powerful approach as evaluated in paper - the AutoUI checkpoint first learns the pre-collected data, then simultanesouly interacts with the environment and do online learning starting from this checkpoint. This mode allows interactive training - Two agents: - [AutoUI](https://arxiv.org/abs/2309.11436): we support both training (2 algorithms x 3 paradigms) and evaluation. - [CogAgent](https://arxiv.org/abs/2312.08914): current only support evaluation, no training pipeline is supported. - Two [Android-in-the-Wild](https://arxiv.org/abs/2307.10088) task sets: - AitW General: general browsing, opening apps. - AitW Web Shopping: shopping on popular shopping websites. - It'll also be interesting to explore the [other AitW subsets](https://github.com/google-research/google-research/tree/master/android_in_the_wild) or other task sets if you have good candidates, please propose one in the issue. - DDP Multi-GPU training: - We support `accelerate` for multi-GPU training. You can turn off this feature if you only have 1 GPU. It only takes **12GB** of GPU memory for AutoUI running the DigiRL algorithm, but we provide this feature in case you want to play with something larger. ## 🚀 Quick Start ### Dependencies First, create a [conda](https://conda.io/projects/conda/en/latest/user-guide/install/index.html) environment and install all pip package requirements. ```bash conda create -n digirl python==3.10 conda activate digirl git clone https://github.com/DigiRL-agent/digirl.git cd digirl pip install -e . ``` ### Environment Setup To set up the Android environment for the DigiRL/filtered BC to interact with, refer to [the environment README](./env_setup/README.md). Before moving on, you should be able to view [this screenshot](./env_setup/screenshot.png) by running [this script](./env_setup/screenshot.py). ### Model checkpoint and Datasets The SFT checkpoint of the AutoUI model was released here and we use it: - [AutoUI SFT checkpoint](https://huggingface.co/cooelf/Auto-UI) Simply download `Auto-UI-Base.zip`, then unzip to a directory. ```bash cd wget https://huggingface.co/cooelf/Auto-UI/resolve/main/Auto-UI-Base.zip unzip Auto-UI-Base.zip # wait... ls Auto-UI-Base # config.json pytorch_model.bin tokenizer.json training_args.bin # generation_config.json special_tokens_map.json tokenizer_config.json ``` We provide the pre-collected trajectories using this SFT checkpoint: - [Trajectories of SFT'ed AutoUI](https://drive.google.com/drive/folders/1ud1XyzCfh0257CixxdgLjjpX59jYbhfU?usp=sharing) The Google Drive folder contains 4 files, with stats below (you can use `gdown` to download the checkpoint you want): | File Name | #Trajectories | Horizon | File Size | |-----------|---------------|---------|-----------| | `general-off2on-zeroshot-trajectories.pt` | 608 | 10 | 95.5M | | `general-offline-zeroshot-trajectories.pt` | 1552 | 10 | 243.9M | | `webshop-off2on-zeroshot-trajectories.pt` | 528 | 20 | 115.2M | | `webshop-offline-zeroshot-trajectories.pt` | 1296 | 20 | 297.5M | where `general/webshop` mean the AitW General/Web Shopping subset, `off2on/offline` means whether the data is used for offline learning or offline-to-online learning. To make a fair comparison, offline learning should use the similar amount of data that offline-to-online learning finally uses. Store these files into a directory: ```bash mkdir ~/data && cd ~/data # copy the .pt file here ``` If you want to use our final offline-to-online checkpoints to reproduce scores in the paper, you can also download from Google Drive. We release the first offline-to-online checkpoint (`run1` in paper) for each algorithm in each environment: - [AutoUI DigiRL & online filtered BC checkpoint](https://drive.google.com/drive/folders/13jkIgWQ6JCcaTsfG_AWdgxE1qO4c2imJ?usp=sharing) The Google Drive folder also contains 4 files: | File Name | Index in Paper | Test Set Score | File Size | |-----------|---------------|---------|---------| | `general-off2on-digirl.zip` | `run1` | 70.8 | 1.9G | | `general-off2on-filteredbc.zip` | `run1` | 59.4 | 1.9G | | `webshop-off2on-digirl.zip` | `run1` | 75.0 | 1.9G | | `webshop-off2on-filteredbc.zip` | `run1` | 55.2 | 1.9G | You can also access through [Huggingface](https://huggingface.co/collections/JackBAI/digirl-checkpoints-6682ea42bdfb5af9bfc5f29f). Note that these checkpoints only allows evaluation because we only release the AutoUI checkpoint, not the optimizer states. ### Modify Configurations Then change the `huggingface_token`, `wandb_token`, `gemini_token`, etc. in `scripts/config/main/default.yaml`, note that you need to specify **all entries** left blank or `` for you in this file. This config is the default configuration - you also need to specify the subconfiguration - for example, if you want to run the online algorithm, you should also examine what to modify in `scripts/config/main/digirl_online`. Feel free to DIY your configs and play with the code! **Note: to load existing checkpoints, modify `save_path` instead of `policy_lm`.** That is, `policy_lm` should still be the path to the AutoUI checkpoint. ### Run Experiments After modifying the config to what you like, you can now run experiments with the following commands: ```bash cd scripts python run.py --config-path config/main --config-name digirl_online ``` The file `run.py` is the entrance of the program, and you can pass the config name to run different experiments. The config file is in `scripts/config/` directory. ### Main Results Reproduction To reproduce the results in Table 1 of our paper, first download the corresponding checkpoints as described above. As the results in the training set are obtained by randomly sampling tasks, we recommend reproducing the test results (which are obtained by sequentially sampling the first 96 trajectories). To do this, modify the [`eval_only.yaml`](https://github.com/DigiRL-agent/digirl/blob/master/scripts/config/main/default.yaml) config file and its parent ['default.yaml'](https://github.com/DigiRL-agent/digirl/blob/master/scripts/config/main/default.yaml) config file to experiment settings. For instance, you can modify these configs for reproduction: 1. `default.yaml` 1. Set `task_split: "test"` and `eval_sample_mode: "sequential"` 2. Don't forget to increase `max_steps` to `20` if `task_set` is set to `webshop` (as the webshop tasks usually need more steps than the general tasks to complete). 2. `eval_only.yaml` 1. Make sure `rollout_size` (in `default.yaml`) * `eval_iterations` (in `eval_only.yaml`) = 96. For example, `rollout_size (16) * eval_iterations (6) = 96`. ### (Optional) CogAgent server The way we set CogAgent up is using a Gradio-based API approach, which means that you need to setup CogAgent inference service on a server, then use our code to query that API. To set up CogAgent, refer to the GitHub Page of project [AutoEval](https://github.com/Berkeley-NLP/Agent-Eval-Refine/blob/main/exps/android_exp/README.md) by [Jiayi Pan](https://www.jiayipan.me/). Grab the link and modify that in `scripts/config/cogagent/default.yaml` file. You need at least one GPU with 48GB memory to host CogAgent for inference. ### (Optional) Multi-machine Emulation Parallel If you want to launch large scale emulation (say more than 32 emulators running at the same time), you'll need multiple machines that collects trajectories at the same time. Refer to the [multimachine-training README](multimachine/README.md) for details. ### (Optional) Multi-GPU DDP Training We use `accelerate` for multi-GPU DDP training. To enable, you need to identify the number of GPUs on your machine in the [accelerate config](scripts/config/accelerate_config/default_config.yaml). If you model is extremely large, it's also possible to do multi-machine DDP training but we currently don't support it. To enable this, the only thing you need to do is to replace `python run.py` with `accelerate launch --config_file run.py`. An example below: ``` accelerate launch --config_file config/accelerate_config/default_config.yaml run.py --config-path config/main --config-name digirl_off2on ``` You should be able to see a much faster learning speed if you've successfully set this up. ## Trouble Shooting (IMPORTANT) 1. If you frequently get the `Error in environment reset` error, you can try increasing the timeout at [this line](https://github.com/DigiRL-agent/digirl/blob/5b77663c3c3f19932cdb9ceb6fe0474c7b28a0b7/digirl/environment/env_utils.py#L59). 2. If you frequently get the `409 resource exhausted` error, try adding a `sleep()` function within the `call_gemini()` function [here](https://github.com/DigiRL-agent/digirl/blob/3896fda9d2e31081234f8b716e9049f6a2d6a7f8/digirl/environment/android/evaluate.py#L161). FYI, a free-tier Gemini API fits `sleep(2)` very well. 3. If you see AVD copying errors (started with `shutil.error`), you can safely ignore it unless the location copying to is empty. ## 🌟 Contribution We welcome the open-source community to contribute to this project. If you invented an algorithm, or you support other types of base models, please propose a PR or issue. Example topics: - [ ] Other algorithms like PPO or any algorithm you invented. - [ ] Other base models like LLaVA. - [ ] Other task sets like WebArena. - [ ] Potential sub-optimal implementations. ## 📄 License All content of this work is under [Apache License v2.0](https://github.com/DigiRL-agent/digirl/blob/master/LICENSE), including codebase, data, and model checkpoints. ## 📚 Citation Consider citing our paper! ``` @article{bai2024digirl, title={DigiRL: Training In-The-Wild Device-Control Agents with Autonomous Reinforcement Learning}, author={Bai, Hao and Zhou, Yifei and Cemri, Mert and Pan, Jiayi and Suhr, Alane and Levine, Sergey and Kumar, Aviral}, journal={arXiv preprint arXiv:2406.11896}, year={2024} } ``` ================================================ FILE: __init__.py ================================================ ================================================ FILE: digirl/algorithms/__init__.py ================================================ from .offpolicy_train_loop import offpolicy_train_loop from .eval_loop import eval_loop from .worker_collect_loop import worker_collect_loop from .parallel_utils import remote_collect_trajectories ================================================ FILE: digirl/algorithms/digirl/__init__.py ================================================ from .trainer import DigiRLTrainer ================================================ FILE: digirl/algorithms/digirl/trainer.py ================================================ import torch from tqdm import tqdm from torch.utils.data import DataLoader from digirl.data import DummyDataset import random def dict_mean(dict_list): mean_dict = {} if len(dict_list) > 0: for key in dict_list[0].keys(): if "min" in key: mean_dict[key] = min(d[key] for d in dict_list) elif "max" in key: mean_dict[key] = max(d[key] for d in dict_list) else: mean_dict[key] = sum(d[key] for d in dict_list) / len(dict_list) return mean_dict class DigiRLTrainer(): def __init__(self, agent,\ accelerator,\ tokenizer,\ critic_lr: float = 1e-3,\ lm_lr: float = 1e-5,\ grad_accum_steps: int = 8,\ gamma: float = 0.9, tau: float = 0.1, epochs: int = 3, max_grad_norm: float=0.01, actor_epochs: int = 3, trajectory_critic_epochs: int = 3,): """ beta: coefficient for the bc loss """ super().__init__() self.agent = agent self.tokenizer = tokenizer self.lm_optimizer = torch.optim.Adam(agent.model.parameters(), lr = lm_lr) self.critic_optimizer = torch.optim.Adam(agent.critic.parameters(), lr = critic_lr) self.trajectory_critic_optimizer = torch.optim.Adam(agent.trajectory_critic.parameters(), lr = critic_lr) self.criterion = torch.nn.CrossEntropyLoss() self.grad_accum_steps = grad_accum_steps self.actor_epochs = actor_epochs self.gamma = gamma self.epochs = epochs self.trajectory_critic_epochs = trajectory_critic_epochs self.step = 0 self.tau = tau self.max_grad_norm = max_grad_norm self.accelerator = accelerator self.softmax = torch.nn.Softmax(dim = -1) def prepare(self): self.lm_optimizer = self.accelerator.prepare(self.lm_optimizer) self.critic_optimizer = self.accelerator.prepare(self.critic_optimizer) self.trajectory_critic_optimizer = self.accelerator.prepare(self.trajectory_critic_optimizer) def trajectory_critic_loss(self, observation, mc_return, validation = False, **kwargs): with torch.autograd.set_detect_anomaly(True): mc_return = torch.Tensor(mc_return).to(self.accelerator.unwrap_model(self.agent.model).device, dtype = self.accelerator.unwrap_model(self.agent.model).dtype).flatten() v = self.agent.trajectory_critic(observation, detach_model=False) regression_target = (mc_return > 0).long() v_loss = self.criterion(v, regression_target) v_acc = (v.argmax(dim = 1) == regression_target).float().mean() if not validation: self.accelerator.backward(v_loss) v_loss = v_loss.detach().cpu() v_acc = v_acc.detach().cpu() mc_return = mc_return.detach().cpu() v = self.softmax(v)[:, 1] info = {"trajectory.v1.loss": v_loss,\ "trajectory.v1.acc": v_acc,\ "trajectory.v1.mean": torch.mean(v),\ "trajectory.v1.min": torch.min(v),\ "trajectory.v1.max": torch.max(v),\ "trajectory.v1.std": torch.std(v),\ "mc_return.mean": torch.mean(mc_return), "mc_return.max": torch.max(mc_return), "mc_return.min": torch.min(mc_return), "mc_return.std": torch.std(mc_return), } if validation: validation_info = {} for k,v in info.items(): validation_info["validation."+k] = v return validation_info return info def critic_loss(self, observation, image_features, action, reward, next_observation, next_image_features,done, mc_return, validation = False, **kwargs): reward = torch.Tensor(reward).to(self.accelerator.unwrap_model(self.agent.model).device, dtype = self.accelerator.unwrap_model(self.agent.model).dtype).flatten() done = torch.Tensor(done).to(self.accelerator.unwrap_model(self.agent.model).device, dtype = self.accelerator.unwrap_model(self.agent.model).dtype).flatten() mc_return = torch.Tensor(mc_return).to(self.accelerator.unwrap_model(self.agent.model).device, dtype = self.accelerator.unwrap_model(self.agent.model).dtype).flatten() v1, v2 = self.agent.critic(observation, image_features, action, detach_model=False) nv1, nv2 = self.agent.critic(next_observation, next_image_features, action, detach_model=False) v1 = v1.reshape(-1, 2) v2 = v2.reshape(-1, 2) nv1 = nv1.reshape(-1, 2) nv2 = nv2.reshape(-1, 2) regression_target = (mc_return > 0).long() v1_loss = self.criterion(v1, regression_target) v1_acc = (v1.argmax(dim = 1) == regression_target).float().mean() v2_loss = self.criterion(v2, regression_target) v2_acc = (v2.argmax(dim = 1) == regression_target).float().mean() nv1_loss = self.criterion(nv1, regression_target) nv2_loss = self.criterion(nv2, regression_target) if not validation: self.accelerator.backward(v1_loss + v2_loss + nv1_loss + nv2_loss) v1_loss, v2_loss = v1_loss.detach().cpu(), v2_loss.detach().cpu() v1_acc, v2_acc = v1_acc.detach().cpu(), v2_acc.detach().cpu() #calculate the probability for logging purpose v1 = self.softmax(v1)[:, 1] v2 = self.softmax(v2)[:, 1] info = {"v1.loss": v1_loss,\ "v2.loss": v2_loss,\ "v1.acc": v1_acc,\ "v2.acc": v2_acc,\ "v1.mean": torch.mean(v1),\ "v1.min": torch.min(v1),\ "v1.max": torch.max(v1),\ "v1.std": torch.std(v1), "v2.mean": torch.mean(v2), "v2.max": torch.max(v2), "v2.min": torch.min(v2), "v2.std": torch.std(v2), } if validation: validation_info = {} for k,v in info.items(): validation_info["validation."+k] = v return validation_info return info def actor_loss(self, observation, action, image_features, next_observation, next_image_features, mc_return, pi_action, advantage, reward, validation=False,**kwargs): mc_return = torch.Tensor(mc_return).to(self.accelerator.unwrap_model(self.agent.model).device, dtype = self.accelerator.unwrap_model(self.agent.model).dtype).flatten() reward = torch.Tensor(reward).to(self.accelerator.unwrap_model(self.agent.model).device, dtype = self.accelerator.unwrap_model(self.agent.model).dtype).flatten() with torch.no_grad(): v1, v2 = self.agent.critic(observation, image_features, action, detach_model=False) nv1, nv2 = self.agent.critic(next_observation, next_image_features, action, detach_model=False) v1 = self.softmax(v1)[:, 1] v2 = self.softmax(v2)[:, 1] nv1 = self.softmax(nv1)[:, 1] nv2 = self.softmax(nv2)[:, 1] v = torch.minimum(v1, v2).flatten() nv = torch.minimum(nv1, nv2).flatten() #TODO: set +1 so that the advantage is always positive advantage = nv - v - 0.05 + reward + mc_return advantage = torch.clamp(advantage, 0, 1) advantage = (advantage > 0).to(dtype = self.accelerator.unwrap_model(self.agent.model).dtype) image_features = image_features.to(self.agent.device) log_prob = self.agent.get_log_prob(observation, image_features, action).sum(dim = 1).flatten() advantage = torch.Tensor(advantage).to(self.accelerator.unwrap_model(self.agent.model).device, dtype = self.accelerator.unwrap_model(self.agent.model).dtype) advantages = advantage.flatten() values = torch.zeros_like(advantages) residual_advantage = torch.zeros_like(advantages) pg_loss = -torch.mean(log_prob.flatten()*advantages) value_loss = torch.zeros_like(pg_loss) if not validation: self.accelerator.backward(pg_loss+value_loss) advantages = advantages.detach().cpu() info = {"pg.loss": pg_loss.detach().cpu().item(), "values.loss": value_loss.detach().cpu().item(), "values.mean": values.mean(), "values.max": torch.max(values), "values.min": torch.min(values), "values.std": torch.std(values), "advantages.mean": advantages.mean(), "advantages.max": torch.max(advantages), "advantages.min": torch.min(advantages), "advantages.std": torch.std(advantages), "residual_advantages.mean": residual_advantage.mean(), "residual_advantages.max": torch.max(residual_advantage), "residual_advantages.min": torch.min(residual_advantage), "residual_advantages.std": torch.std(residual_advantage),} if validation: validation_info = {} for k,v in info.items(): validation_info["validation."+k] = v return validation_info return info def update_trajectory_critic(self, trajectories, validation_trajectories = None): info = {} info_list = [] batch_size = 8 with torch.autograd.set_detect_anomaly(True): for _ in tqdm(range(self.trajectory_critic_epochs), disable= not self.accelerator.is_main_process): data = [{"observation": traj[0]["observation"], "mc_return": traj[-1]["mc_return"]} for traj in trajectories] data = [random.sample(data, 1)[0] for _ in range(self.grad_accum_steps*batch_size)] dataloader = DataLoader(DummyDataset(data), batch_size=batch_size) dataloader = self.accelerator.prepare(dataloader) self.trajectory_critic_optimizer.zero_grad() for batch in tqdm(dataloader, disable=True): info_list.append(self.trajectory_critic_loss(**batch)) self.accelerator.clip_grad_norm_(self.agent.parameters(), self.max_grad_norm) self.trajectory_critic_optimizer.step() info.update(dict_mean(info_list)) if validation_trajectories is not None: info_list = [] data = [{"observation": traj[0]["observation"], "mc_return": traj[-1]["mc_return"]} for traj in validation_trajectories] data = [random.sample(data, 1)[0] for _ in range(self.grad_accum_steps*batch_size)] dataloader = DataLoader(DummyDataset(data), batch_size=batch_size) dataloader = self.accelerator.prepare(dataloader) with torch.no_grad(): for batch in tqdm(dataloader, disable=True): info_list.append(self.trajectory_critic_loss(validation=True, **batch)) info.update(dict_mean(info_list)) return info def update_critic(self, replay_buffer, validation_buffer = None): self.step += 1 info = {} info_list = [] with torch.autograd.set_detect_anomaly(True): for _ in tqdm(range(self.epochs), disable= not self.accelerator.is_main_process): data = [replay_buffer.sample(1) for _ in range(self.grad_accum_steps*replay_buffer.batch_size)] for d in data: for k,v in d.items(): d[k] = v[0] dataloader = DataLoader(DummyDataset(data), batch_size=replay_buffer.batch_size) dataloader = self.accelerator.prepare(dataloader) self.critic_optimizer.zero_grad() for batch in tqdm(dataloader, disable=True): info_list.append(self.critic_loss(**batch)) self.accelerator.clip_grad_norm_(self.agent.parameters(), self.max_grad_norm) self.critic_optimizer.step() info.update(dict_mean(info_list)) if validation_buffer is not None: info_list = [] data = [validation_buffer.sample(1) for _ in range(self.grad_accum_steps*replay_buffer.batch_size)] for d in data: for k,v in d.items(): d[k] = v[0] dataloader = DataLoader(DummyDataset(data), batch_size=replay_buffer.batch_size) dataloader = self.accelerator.prepare(dataloader) with torch.no_grad(): for batch in tqdm(dataloader, disable=True): info_list.append(self.critic_loss(validation=True, **batch)) info.update(dict_mean(info_list)) return info def update_policy(self, replay_buffer, validation_buffer = None, no_update_actor=False): self.step += 1 info = {} info_list = [] action_bsize = 2 if 'mistral' in self.agent.policy_lm else replay_buffer.batch_size #update actor if not no_update_actor: print(">>>updating actor") #batchsize for the actor set to 1 for mistral due to memory concern # action_bsize = 2 if 'mistral' in self.agent.policy_lm else replay_buffer.batch_size #action_bsize = replay_buffer.batch_size for _ in tqdm(range(self.actor_epochs), disable= not self.accelerator.is_main_process): data = [replay_buffer.sample(1) for _ in range(self.grad_accum_steps*replay_buffer.batch_size)] grad_index = 0 for d in data: for k,v in d.items(): d[k] = v[0] dataloader = DataLoader(DummyDataset(data), batch_size=action_bsize, shuffle=False) all_pi_actions = [] all_advantages = [] # import IPython; IPython.embed() dataloader = self.accelerator.prepare(dataloader) self.lm_optimizer.zero_grad() for batch in dataloader: pi_action = None advantages = None info_list.append(self.actor_loss(**batch, pi_action=pi_action, advantage=advantages)) self.accelerator.clip_grad_norm_(self.agent.parameters(), self.max_grad_norm) self.lm_optimizer.step() info.update(dict_mean(info_list)) if validation_buffer is not None: info_list = [] data = [validation_buffer.sample(1) for _ in range(self.grad_accum_steps*replay_buffer.batch_size)] for d in data: for k,v in d.items(): d[k] = v[0] dataloader = DataLoader(DummyDataset(data), batch_size=action_bsize, shuffle=False) dataloader = self.accelerator.prepare(dataloader) with torch.no_grad(): for batch in tqdm(dataloader, disable=True): info_list.append(self.actor_loss(validation=True, pi_action=None, advantage=None, **batch)) info.update(dict_mean(info_list)) return info return info def update(self, replay_buffer, validation_buffer = None, filtered_buffer = None, filtered_validation_buffer = None,no_update_actor=False): if filtered_validation_buffer is None: filtered_validation_buffer = validation_buffer if filtered_buffer is None: filtered_buffer = replay_buffer info = {} info.update(self.update_critic(replay_buffer, validation_buffer)) info.update(self.update_policy(filtered_buffer, filtered_validation_buffer,no_update_actor=no_update_actor)) return info def save(self, path): self.accelerator.save_state(path, safe_serialization=False) def load(self, path): self.accelerator.load_state(path) ================================================ FILE: digirl/algorithms/eval_loop.py ================================================ from digirl.environment import batch_interact_environment from digirl.algorithms.digirl import DigiRLTrainer from digirl.algorithms.filteredbc import BCTrainer import numpy as np from digirl.misc import colorful_print import wandb import os import torch import time def eval_loop(env,\ agent,\ accelerator,\ tokenizer,\ critic_lr,\ lm_lr,\ tau,\ epochs,\ actor_epochs,\ grad_accum_steps,\ max_grad_norm, trajectory_critic_epochs, gamma=None,\ train_algorithm=None,\ rollout_size: int = 50,\ eval_iterations: int = 10,\ use_wandb: bool = False, save_path: str = None, decode_f: callable = lambda x: x, **kwargs): if train_algorithm == "digirl": print(">>> Using DigiRL trainer") trainer = DigiRLTrainer(agent=agent,\ accelerator=accelerator,\ tokenizer=tokenizer,\ critic_lr = critic_lr,\ lm_lr = lm_lr,\ gamma = gamma,\ tau = tau,\ epochs = epochs,\ actor_epochs = actor_epochs, grad_accum_steps=grad_accum_steps, max_grad_norm=max_grad_norm, trajectory_critic_epochs = trajectory_critic_epochs) elif train_algorithm == "filteredbc": print(">>> Using Filtered BC trainer") trainer = BCTrainer(agent=agent,\ tokenizer=tokenizer,\ accelerator=accelerator, lm_lr = lm_lr,\ epochs = actor_epochs,\ grad_accum_steps=grad_accum_steps, max_grad_norm=max_grad_norm) agent.prepare() # evaluation does not require optimizer # trainer.prepare() if os.path.exists(os.path.join(save_path, 'trainer.pt')): print(">>> Loading from previous checkpoint") trainer.load(os.path.join(save_path, 'trainer.pt')) else: print(">>> No previous checkpoint found") colorful_print(">>> Evaluating Agent", fg='blue') all_trajectories = [] for i in range(eval_iterations): trajectories = batch_interact_environment(agent = agent,\ env = env,\ num_trajectories= rollout_size,\ accelerator = accelerator,\ use_tqdm=False, decode_f = decode_f, gamma = gamma, iter=i) if accelerator.is_main_process: info = {"iteration": i,\ "rollout.mean": np.mean([d[0]["trajectory_reward"] if len(d) > 0 else 0 for d in trajectories]),\ "rollout.max": np.max([d[0]["trajectory_reward"] if len(d) > 0 else 0 for d in trajectories]),\ "rollout.min": np.min([d[0]["trajectory_reward"] if len(d) > 0 else 0 for d in trajectories]),\ "walltime": time.time()} all_trajectories += trajectories torch.save(all_trajectories, os.path.join(save_path, 'trajectories_eval.pt')) time.sleep(15) else: info = {} accelerator.wait_for_everyone() all_trajectories = torch.load(os.path.join(save_path, 'trajectories_eval.pt')) if use_wandb and accelerator.is_main_process: wandb.log(info) ================================================ FILE: digirl/algorithms/filteredbc/__init__.py ================================================ from .trainer import BCTrainer ================================================ FILE: digirl/algorithms/filteredbc/trainer.py ================================================ import torch import transformers from tqdm import tqdm import copy import random from torch.utils.data import DataLoader from digirl.data import DummyDataset def dict_mean(dict_list): mean_dict = {} if len(dict_list) > 0: for key in dict_list[0].keys(): mean_dict[key] = sum(d[key] for d in dict_list) / len(dict_list) return mean_dict class BCTrainer(): def __init__(self, agent,\ tokenizer,\ accelerator,\ lm_lr: float = 1e-5,\ epochs: int = 3, max_grad_norm: float=0.01, grad_accum_steps: int = 8): """ beta: coefficient for the bc loss """ super().__init__() self.agent = agent self.tokenizer = tokenizer self.lm_optimizer = torch.optim.Adam(agent.model.parameters(), lr = lm_lr) self.criterion = torch.nn.MSELoss() self.grad_accum_steps = grad_accum_steps self.epochs = epochs self.step = 0 self.max_grad_norm = max_grad_norm self.accelerator = accelerator def prepare(self): self.lm_optimizer = self.accelerator.prepare(self.lm_optimizer) def actor_loss(self, observation, image_features, action, **kwargs): # loss = plain_bc_loss(self.agent.model, self.tokenizer, observation, action) image_features = image_features.to(self.agent.device) loss = -self.agent.get_log_prob(observation, image_features, action).sum(dim = 1).mean() self.accelerator.backward(loss) return {"bc.loss": loss.detach().cpu().item()} def update(self, replay_buffer, no_update_actor=False): self.step += 1 info = {} info_list = [] #update actor if not no_update_actor: action_bsize = 1 if 'llama' in self.agent.policy_lm else replay_buffer.batch_size for _ in range(self.epochs): self.lm_optimizer.zero_grad() data = [replay_buffer.sample(1) for _ in range(self.grad_accum_steps*replay_buffer.batch_size)] grad_index = 0 for d in data: for k,v in d.items(): d[k] = v[0] dataloader = DataLoader(DummyDataset(data), batch_size=action_bsize, shuffle=False) dataloader = self.accelerator.prepare(dataloader) for batch in dataloader: info_list.append(self.actor_loss(**batch)) self.accelerator.clip_grad_norm_(self.agent.parameters(), self.max_grad_norm) self.lm_optimizer.step() info.update(dict_mean(info_list)) return info def save(self, path): self.accelerator.save_state(path, safe_serialization=False) # torch.save({'model_state_dict': self.accelerator.unwrap_model(self.agent.model).state_dict(), # 'critic_state_dict': self.accelerator.unwrap_model(self.agent.critic).state_dict(), # 'target_critic_state_dict': self.accelerator.unwrap_model(self.agent.target_critic).state_dict(), # 'critic_optimizer_state_dict': self.critic_optimizer.state_dict(), # 'lm_optimizer_state_dict': self.lm_optimizer.state_dict()}, path) def load(self, path): self.accelerator.load_state(path) ================================================ FILE: digirl/algorithms/offpolicy_train_loop.py ================================================ from digirl.environment import batch_interact_environment from digirl.data import ReplayBuffer import numpy as np from tqdm import tqdm from digirl.algorithms.digirl import DigiRLTrainer from digirl.algorithms.filteredbc import BCTrainer from digirl.misc import colorful_print import wandb import os import torch import time import copy from digirl.environment.env_utils import add_mc_return from digirl.algorithms.parallel_utils import remote_collect_trajectories def label_trajectories(trajectories, agent): print("Labeling Trajectories") baselines = [] for i in range(0, len(trajectories), 16): observations = [t[0]["observation"] for t in trajectories[i:i+16]] with torch.no_grad(): v = agent.trajectory_critic(observations) v = torch.nn.Softmax(dim = -1)(v)[:,1] baselines.append(v.flatten()) baselines = torch.cat(baselines, dim = -1) print("Done Labeling Trajectories") return torch.clamp(baselines.cpu(), 1e-4, 1-1e-4) def framestack(all_trajectories): new_trajectories = copy.deepcopy(all_trajectories) for trajectory, new_trajectory in zip(all_trajectories, new_trajectories): for i,(t, nt) in enumerate(zip(trajectory, new_trajectory)): if i == 0: nt["image_features"] = np.concatenate([t["image_features"], t["image_features"]], axis = -1) else: nt["image_features"] = np.concatenate([trajectory[i-1]["image_features"], t["image_features"]], axis = -1) nt["next_image_features"] = np.concatenate([t["image_features"], t["next_image_features"]], axis = -1) return new_trajectories def filterbc_buffer(all_trajectories, batch_size, capacity, agent): trajectory_rewards = np.array([t[0]["trajectory_reward"] if len(t) > 0 else 0 for t in all_trajectories]).flatten() cutoff = np.quantile(trajectory_rewards, 1 - 0.1) top10 = np.argsort(trajectory_rewards)[-10:] print("Top 10 Trajectories: ") for i in top10: print(all_trajectories[i][0]["observation"]) print(trajectory_rewards[i]) print("Cutoff: ", cutoff) filtered_trajectories = [] for t, b in zip(all_trajectories, trajectory_rewards): if b >= cutoff: filtered_trajectories.append(t) data = sum(filtered_trajectories, []) filtered_buffer= ReplayBuffer(batch_size= batch_size, capacity=capacity) for d in data: filtered_buffer.insert(**d) return filtered_buffer def filter_buffer(all_trajectories, batch_size, capacity, agent): baselines = label_trajectories(all_trajectories, agent).numpy().flatten() trajectory_rewards = np.array([t[0]["trajectory_reward"] if len(t) > 0 else 0 for t in all_trajectories]).flatten() baselines = trajectory_rewards - baselines cutoff = np.quantile(baselines, 1 - 0.1) top10 = np.argsort(baselines)[-10:] print("Top 10 Trajectories: ") for i in top10: print(all_trajectories[i][0]["observation"]) print(baselines[i]) print("Cutoff: ", cutoff) filtered_trajectories = [] for t, b in zip(all_trajectories, baselines): if b >= cutoff: filtered_trajectories.append(t) data = sum(filtered_trajectories, []) filtered_buffer= ReplayBuffer(batch_size= batch_size, capacity=capacity) for d in data: filtered_buffer.insert(**d) return filtered_buffer def offpolicy_train_loop(env,\ agent,\ tokenizer,\ accelerator,\ warmup_iter: int = 20, rollout_size: int = 50,\ batch_size: int = 2, capacity: int = 500000, train_iterations: int = 10,\ epochs:int = 3, \ grad_accum_steps: int = 1,\ critic_lr: float= 1e-3,\ lm_lr: float = 1e-5,\ gamma: float = 0.9, tau: float = 0.1, use_wandb: bool = False, actor_epochs: int = 3, train_mode: str = None, max_grad_norm: float = 0.01, save_path: str = None, save_freq: int = 25, train_algorithm: str = "digirl", decode_f: callable = lambda x: x, offline_data_path: str = None, offline_actor_iterations: int = 20, offline_critic_iterations: int = 20, offline_trajectory_critic_iterations: int = 20, trajectory_critic_epochs: int = 5, parallel: str = 'single', worker_temp_path=None, worker_run_path=None, worker_ips=[], worker_username=None, **kwargs): if train_algorithm == "digirl": trainer = DigiRLTrainer(agent=agent,\ accelerator=accelerator,\ tokenizer=tokenizer,\ critic_lr = critic_lr,\ lm_lr = lm_lr,\ gamma = gamma,\ tau = tau,\ epochs = epochs,\ actor_epochs = actor_epochs, grad_accum_steps=grad_accum_steps, max_grad_norm=max_grad_norm, trajectory_critic_epochs = trajectory_critic_epochs) elif train_algorithm == "filteredbc": trainer = BCTrainer(agent=agent,\ tokenizer=tokenizer,\ accelerator=accelerator, lm_lr = lm_lr,\ epochs = actor_epochs,\ grad_accum_steps=grad_accum_steps, max_grad_norm=max_grad_norm) replay_buffer= ReplayBuffer(batch_size= batch_size, capacity=capacity) all_trajectories = [] # prepare the model agent.prepare() # prepare the optimizers trainer.prepare() loaded_trajs = False # off-to-on # no offline ckpt, no online ckpt -> offline training # offline ckpt, no online ckpt -> online training # offline ckpt, online ckpt -> resume online training # offline # no resume supported # online # no online ckpt -> online training # online ckpt -> resume online training # omit this for online training if offline_data_path is not None and train_mode != "online": all_trajectories = torch.load(offline_data_path) all_trajectories = framestack(all_trajectories) print(f"The number of offline trajectories is {len(all_trajectories)}") all_trajectories = [add_mc_return(t, gamma=gamma) for t in all_trajectories] train_trajectories = all_trajectories[:int(len(all_trajectories)*0.8)] val_trajectories = all_trajectories[int(len(all_trajectories)*0.8):] loaded_trajs = 'scratch' # resume training from the saved checkpoint if os.path.exists(os.path.join(save_path, 'trainer.pt')): assert train_mode != "offline", "Only online/off2on training can be resumed" trainer.load(os.path.join(save_path, 'trainer.pt')) replay_buffer = torch.load(os.path.join(save_path, 'replay_buffer.pt')) all_trajectories = torch.load(os.path.join(save_path, 'trajectories.pt')) train_trajectories = torch.load(os.path.join(save_path, 'train_trajectories.pt')) val_trajectories = torch.load(os.path.join(save_path, 'val_trajectories.pt')) print(f"The number of online trajectories is {len(all_trajectories)}") if use_wandb and accelerator.is_main_process: print("Loading from checkpoint") loaded_trajs = 'resume' if not loaded_trajs: train_trajectories = [] val_trajectories = [] all_trajectories = [] replay_buffer = ReplayBuffer(batch_size= batch_size, capacity=capacity) validation_buffer = ReplayBuffer(batch_size= batch_size, capacity=capacity) data = sum(train_trajectories, []) val_data = sum(val_trajectories, []) for d in data: replay_buffer.insert(**d) for d in val_data: validation_buffer.insert(**d) # offline training if not os.path.exists(os.path.join(save_path, 'trainer.pt')): #if nothing in the trainer only the offline trainer is saved if os.path.exists(os.path.join(save_path, 'trainer_offline.pt')): trainer.load(os.path.join(save_path, 'trainer_offline.pt')) print("Loading from offline trainer") else: if offline_data_path is not None and train_mode != "online": print(">>>Training Offline") info = {} # offline training will never use the trajectory-level critic filter, so please use filterbc_buffer filtered_buffer = filterbc_buffer(train_trajectories, batch_size, capacity, agent) filtered_validation_buffer = filterbc_buffer(val_trajectories, batch_size, capacity, agent) if train_algorithm == "filteredbc": # filtered BC training phase for i in tqdm(range(offline_actor_iterations), disable=not accelerator.is_main_process): info.update(trainer.update(filtered_buffer)) if use_wandb and accelerator.is_main_process: wandb.log(info) elif train_algorithm == "digirl": # digirl training phase for i in tqdm(range(offline_trajectory_critic_iterations), disable=not accelerator.is_main_process): info.update(trainer.update_trajectory_critic(train_trajectories, val_trajectories)) if use_wandb and accelerator.is_main_process: wandb.log(info) for i in tqdm(range(offline_critic_iterations), disable=not accelerator.is_main_process): info.update(trainer.update_critic(replay_buffer, validation_buffer)) if use_wandb and accelerator.is_main_process: wandb.log(info) print(">>>Training Policy") for i in tqdm(range(offline_actor_iterations), disable=not accelerator.is_main_process): info.update(trainer.update_policy(filtered_buffer, filtered_validation_buffer)) if use_wandb and accelerator.is_main_process: wandb.log(info) if accelerator.is_main_process: trainer.save(os.path.join(save_path, 'trainer_offline.pt')) if accelerator.is_main_process: print(">>>start iterations") if loaded_trajs == "resume": resume_iter = len(all_trajectories) // rollout_size else: resume_iter = 0 progress_bar = tqdm(total=train_iterations, initial=resume_iter) for i in range(resume_iter, train_iterations): assert train_mode != "offline", "Only online/off2on need to iteractively train; offline should directly go to eval loop after training" if parallel == 'single': trajectories = batch_interact_environment(agent = agent,\ env = env,\ num_trajectories= rollout_size,\ accelerator = accelerator,\ use_tqdm=False, decode_f = decode_f, gamma = gamma, iter=i) elif parallel == 'host': if i == 0: if not os.path.exists(save_path): os.makedirs(save_path) trajectories = remote_collect_trajectories(save_path=save_path, worker_temp_path=worker_temp_path, worker_run_path=worker_run_path, worker_ips=worker_ips, worker_username=worker_username, trainer=trainer) trajectories = framestack(trajectories) if accelerator.is_main_process: info = {"iteration": i,\ "rollout.mean": np.mean([d[0]["trajectory_reward"] if len(d) > 0 else 0 for d in trajectories]),\ "rollout.max": np.max([d[0]["trajectory_reward"] if len(d) > 0 else 0 for d in trajectories]),\ "rollout.min": np.min([d[0]["trajectory_reward"] if len(d) > 0 else 0 for d in trajectories]),\ "walltime": time.time()} all_trajectories += trajectories colorful_print(f">>> length of all_trajectories: {len(trajectories)}", fg='green') new_train_trajectories = trajectories[:int(len(trajectories)*0.8)] new_val_trajectories = trajectories[int(len(trajectories)*0.8):] train_trajectories += new_train_trajectories val_trajectories += new_val_trajectories data = sum(new_train_trajectories, []) val_data = sum(new_val_trajectories, []) for d in data: replay_buffer.insert(**d) for d in val_data: validation_buffer.insert(**d) info.update({"rollout.reward.mean": np.mean([d["reward"] for d in data]),\ "rollout.reward.max": np.max([d["reward"] for d in data]),\ "rollout.reward.min": np.min([d["reward"] for d in data])}) print(">>> Saving Replay Buffer") torch.save(replay_buffer, os.path.join(save_path, 'replay_buffer.pt')) torch.save(all_trajectories, os.path.join(save_path, 'trajectories.pt')) torch.save(train_trajectories, os.path.join(save_path, 'train_trajectories.pt')) torch.save(val_trajectories, os.path.join(save_path, 'val_trajectories.pt')) print(">>> Saved Replay Buffer") time.sleep(15) else: info = {} accelerator.wait_for_everyone() train_trajectories = torch.load(os.path.join(save_path, 'train_trajectories.pt')) val_trajectories = torch.load(os.path.join(save_path, 'val_trajectories.pt')) all_trajectories = torch.load(os.path.join(save_path, 'trajectories.pt')) replay_buffer = torch.load(os.path.join(save_path, 'replay_buffer.pt')) assert train_algorithm in ['digirl', 'filteredbc'], "Only digirl and filteredbc are supported" if train_algorithm == "filteredbc": filtered_buffer = filterbc_buffer(train_trajectories, batch_size, capacity, agent) filtered_validation_buffer = filterbc_buffer(val_trajectories, batch_size, capacity, agent) elif train_algorithm == 'digirl': filtered_buffer = filter_buffer(train_trajectories, batch_size, capacity, agent) filtered_validation_buffer = filter_buffer(val_trajectories, batch_size, capacity, agent) print("Training") if 'filtered' in train_algorithm: info.update(trainer.update(filtered_buffer, no_update_actor = (i < warmup_iter))) del filtered_buffer else: info.update(trainer.update_trajectory_critic(train_trajectories, val_trajectories)) info.update(trainer.update(replay_buffer, validation_buffer, filtered_buffer, filtered_validation_buffer, no_update_actor = (i < warmup_iter))) if use_wandb and accelerator.is_main_process: wandb.log(info) if (i+1) % save_freq == 0 and save_path is not None and accelerator.is_main_process: print("Saving") trainer.save(os.path.join(save_path, 'trainer.pt')) torch.save(replay_buffer, os.path.join(save_path, 'replay_buffer.pt')) if accelerator.is_main_process: progress_bar.update(1) ================================================ FILE: digirl/algorithms/parallel_utils.py ================================================ from digirl.misc import colorful_print import threading import os import torch import time def remote_collect_trajectories(save_path, worker_temp_path, worker_run_path, worker_ips, worker_username, trainer): # add all workers into known hosts if not already colorful_print("Adding all workers to known hosts", fg='green') for worker_ip in worker_ips: print("worker_ip", worker_ip) os.system(f"ssh-keyscan -H {worker_ip} >> ~/.ssh/known_hosts") # kill all processes for worker_ip in worker_ips: os.system(f"ssh {worker_username}@{worker_ip} 'pkill -U {worker_username}'") time.sleep(5) for worker_ip in worker_ips: os.system(f"ssh {worker_username}@{worker_ip} 'skill -u {worker_username}'") time.sleep(5) # copying the agent to all remote workers # save the current trainer, NO MATTER it's zero-shot or offline or online colorful_print("Saving the current trainer", fg='green') trainer.save(os.path.join(save_path, "trainer_current.pt")) colorful_print("Copying the current trainer to all workers", fg='green') command = f"rm -rf {worker_temp_path} && mkdir -p {worker_temp_path} && exit" # parallely execute this command in all remote workser and wait for the command to finish threads = [] colorful_print("Starting all trajectory collections", fg='green') for worker_ip in worker_ips: t = threading.Thread(target=os.system, args=(f"""ssh -tt {worker_username}@{worker_ip} << EOF {command} EOF """,)) threads.append(t) t.start() for t in threads: t.join() colorful_print("Trajectory collection finished", fg='green') for worker_ip in worker_ips: command = f"scp -r {save_path}/trainer_current.pt {worker_username}@{worker_ip}:{worker_temp_path}" os.system(command) command = f"conda activate digirl && cd {worker_run_path} && python run.py --config-path config/multimachine --config-name worker && exit" for worker_ip in worker_ips: t = threading.Thread(target=os.system, args=(f"""ssh -tt {worker_username}@{worker_ip} << EOF {command} EOF """,)) threads.append(t) t.start() for t in threads: t.join() colorful_print("Trajectory collection finished", fg='green') for worker_ip in worker_ips: os.system(f"scp {worker_username}@{worker_ip}:{worker_temp_path}/trajectories.pt {save_path}/{worker_ip}") # wait for all trajs to be scp'ed to this host machine while True: if all([os.path.exists(f"{save_path}/{worker_ip}") for worker_ip in worker_ips]): break time.sleep(5) # load all trajs in the remote machine trajectories_list = [torch.load(f"{save_path}/{worker_ip}") for worker_ip in worker_ips] # aggregate all trajs trajectories = [] for traj_list in trajectories_list: for traj in traj_list: trajectories.append(traj) return trajectories ================================================ FILE: digirl/algorithms/worker_collect_loop.py ================================================ from digirl.environment import batch_interact_environment from digirl.data import ReplayBuffer from digirl.algorithms.digirl import DigiRLTrainer from digirl.algorithms.filteredbc import BCTrainer from digirl.misc import colorful_print import os import torch def worker_collect_loop(env,\ agent,\ tokenizer,\ accelerator,\ warmup_iter: int = 20, rollout_size: int = 50,\ batch_size: int = 2, capacity: int = 500000, train_iterations: int = 1,\ epochs:int = 3, \ grad_accum_steps: int = 1,\ do_sample: bool = False,\ temperature: float = 2.0,\ critic_lr: float= 1e-3,\ lm_lr: float = 1e-5,\ gamma: float = 0.9, tau: float = 0.1, use_wandb: bool = False, env_load_path: str = '', actor_epochs: int = 3, max_grad_norm: float = 0.01, save_path: str = None, save_freq: int = 25, train_algorithm: str = "digirl", decode_f: callable = lambda x: x, offline_data_path: str = None, offline_actor_iterations: int = 20, offline_critic_iterations: int = 20, offline_trajectory_critic_iterations: int = 20, trajectory_critic_epochs: int = 5, **kwargs): if train_algorithm == "digirl": trainer = DigiRLTrainer(agent=agent,\ accelerator=accelerator,\ tokenizer=tokenizer,\ critic_lr = critic_lr,\ lm_lr = lm_lr,\ gamma = gamma,\ tau = tau,\ epochs = epochs,\ actor_epochs = actor_epochs, grad_accum_steps=grad_accum_steps, max_grad_norm=max_grad_norm, trajectory_critic_epochs = trajectory_critic_epochs) elif train_algorithm == "filteredbc": trainer = BCTrainer(agent=agent,\ tokenizer=tokenizer,\ accelerator=accelerator, lm_lr = lm_lr,\ epochs = actor_epochs,\ grad_accum_steps=grad_accum_steps, max_grad_norm=max_grad_norm) replay_buffer= ReplayBuffer(batch_size= batch_size, capacity=capacity) all_trajectories = [] #prepare the model and optimizers agent.prepare() trainer.prepare() colorful_print(">>> Loading Current Trainer from Host", fg='blue') trainer.load(os.path.join(save_path, 'trainer_current.pt')) colorful_print(">>> Worker Collecting Online Data", fg='blue') for i in range(train_iterations): trajectories = batch_interact_environment(agent = agent,\ env = env,\ num_trajectories= rollout_size,\ accelerator = accelerator,\ use_tqdm=False, decode_f = decode_f, gamma = gamma, iter=i) torch.save(trajectories, os.path.join(save_path, 'trajectories.pt')) ================================================ FILE: digirl/data/__init__.py ================================================ from .utils import DummyDataset, ReplayBuffer ================================================ FILE: digirl/data/utils.py ================================================ from torch.utils.data import Dataset, DataLoader import numpy as np class DummyDataset(Dataset): def __init__(self, buffer): self.buffer = buffer def __len__(self): return len(self.buffer) def __getitem__(self, idx): return self.buffer[idx] class ReplayBuffer: def __init__(self, batch_size=2, capacity=10000): self.max_size = capacity self.size = 0 self.observations = None self.rewards = None self.next_observations = None self.dones = None self.batch_size = batch_size self.actions = None self.mc_returns = None self.image_features = None self.next_image_features = None def sample(self, batch_size=None): if batch_size is None: batch_size = self.batch_size rand_indices = np.random.randint(0, self.size, size=(batch_size,)) % self.max_size return { "observation": self.observations[rand_indices], "action": self.actions[rand_indices], "image_features": self.image_features[rand_indices], "next_image_features": self.next_image_features[rand_indices], "reward": self.rewards[rand_indices], "next_observation": self.next_observations[rand_indices], "done": self.dones[rand_indices], "mc_return": self.mc_returns[rand_indices], } def __len__(self): return self.size def insert( self, /, observation, action, image_features: np.ndarray, next_image_features: np.ndarray, reward: np.ndarray, next_observation, done: np.ndarray, mc_return, **kwargs ): """ Insert a single transition into the replay buffer. Use like: replay_buffer.insert( observation=observation, action=action, reward=reward, next_observation=next_observation, done=done, ) """ if isinstance(reward, (float, int)): reward = np.array(reward) if isinstance(mc_return, (float, int)): mc_return = np.array(mc_return) if isinstance(done, bool): done = np.array(done) # print(next_observation) # if isinstance(prompt_actionaction, int): # action = np.array(action, dtype=np.int64) if self.observations is None: self.observations = np.array(['']*self.max_size, dtype = 'object') self.actions = np.array(['']*self.max_size, dtype = 'object') self.image_features = np.empty((self.max_size, *image_features.shape), dtype=image_features.dtype) self.next_image_features = np.empty((self.max_size, *next_image_features.shape), dtype=next_image_features.dtype) self.rewards = np.empty((self.max_size, *reward.shape), dtype=reward.dtype) self.next_observations = np.array(['']*self.max_size, dtype = 'object') self.dones = np.empty((self.max_size, *done.shape), dtype=done.dtype) self.mc_returns = np.empty((self.max_size, *mc_return.shape), dtype=mc_return.dtype) assert reward.shape == () assert done.shape == () self.observations[self.size % self.max_size] = observation self.image_features[self.size % self.max_size] = image_features self.next_image_features[self.size % self.max_size] = next_image_features self.actions[self.size % self.max_size] = action self.rewards[self.size % self.max_size] = reward self.next_observations[self.size % self.max_size] = next_observation self.dones[self.size % self.max_size] = done self.mc_returns[self.size % self.max_size] = mc_return self.size += 1 ================================================ FILE: digirl/environment/__init__.py ================================================ from .env_utils import batch_interact_environment from .android import BatchedAndroidEnv ================================================ FILE: digirl/environment/android/__init__.py ================================================ from .env import BatchedAndroidEnv from .evaluate import EndResultEvaluator from .autoui_utils import cogagent_translate_action, autoui_translate_action, autoui_prepare_prompt ================================================ FILE: digirl/environment/android/assets/task_set/general_test.txt ================================================ Search for hotels in Washington DC What's the news in India? How much does a 2 bedroom apartment rent for in Philadelphia? Open a new tab in Chrome Set an alarm for 6pm Search for flights from Sydney to Helsinki What's the news in Japan? Open the clock How do I get to the nearest Lowe's? Search for hotels in Philadelphia What's the latest video from GameSpot News? What's a good restaurant in New Jersey? What's the weather like in New York? What's a good restaurant in Seattle? What's on the menu at Burger King? Install the Calendar app What's a good restaurant in New York? Play some music on YouTube Search for hotels in Atlanta Open a new Chrome incognito tab What's the latest news in space exploration? What's the news in the Dominican Republic? Search for flights from NYC to Mexico city Search for flights from Seoul to Barcelona What's the latest news in astrophysics? What's the news in Jamaica? What is the capital of Switzerland? What's a good restaurant in Sacramento? Where can I buy a nice beach towel? Search for a new perfume Show me some nice wallpapers for my tablet Search for vegetarian restaurants on Maps Search for flights from Buenos aires to Tokyo Search for a new blush on Sephora What's the price of the Hisense TV? Open a new Chrome private window Search for hotels in Chicago Search for hotels in Austin Set an alarm for 4pm Install the Reddit app How much does a 2x4x8 board cost at Lowes? Check the settings for the YouTube app Search for good Greek restaurants Play the new Drake video on YouTube Open the files app Check the settings for the Google Maps app Who is the prime minister of the United Kingdom? Find the nearest grocery store Search for hotels in Paris How much does a 3 bedroom apartment rent for in Dallas? How much does a 2 bedroom apartment rent for in Miami? Find the nearest electronics store that's open tomorrow Open a new incognito window in the chrome app Search for good Korean restaurants Search for flights from London to Paris What's the news in Sri Lanka? What are the new products by Samsung? What is the capital of Sweden? How much does the LG TV cost? Search for flights from NYC to Buenos aires What's a good restaurant in Las Vegas? What is the capital of Norway? What is the capital of Italy? What is the speed of a rocket? How do I get to the nearest Target? Show me some nice wallpapers for my phone What time is it in New York? Search for flights from NYC to Tokyo Check the settings for the Amazon Alexa app What's the weather like inToronto? Play the new Bruno Mars video on YouTube What's a good restaurant in San Diego? What's the price of the LG TV? Google the capital of Argentina Google the capital of the United States What's the news in Argentina? Search for flights from NYC to Sydney Where can I buy a nice beach umbrella? Show me some nice wallpapers for my computer Open a new incognito tab in the chrome app Set an alarm for 7pm Search for flights from Barcelona to Boston How much does a 3 bedroom apartment rent for in Miami? What's a good restaurant in Los Angeles? Google the capital of Panama Search for a new eyeshadow How much does a 3 bedroom apartment rent for in Washington DC? What's the weather like in London? What's the news in Chile? Play the latest video from the BBC Check the settings for the Twitter app How do I get to the nearest Verizon Store? Open Reddit Search for a new mascara on Sephora Search for top rated sushi restaurants on Maps What time is it in Moscow? How much does a 3 bedroom apartment rent for in New York? How much does a 2 bedroom apartment rent for in Denver? Check the settings for the Google Play Music app What's on the menu at McDonalds? What's US dollar exchange rate against the British Pound? What is the capital of Canada? What's on the menu at Panera? How much does a 3 bedroom apartment rent for in Seattle? Search for a new mascara What is the capital of Spain? What is the speed of a jet? What's the news about the US dollar exchange rate? Install the Yelp app What is the capital of the United Kingdom? What's on my calendar for the rest of the month? Show me some nice wallpapers for my desktop Check the settings for the Amazon Music app Install the Yahoo app How much does a 2 bedroom apartment rent for in San Francisco? What's US dollar exchange rate against the South Korean Won? What's the top post on reddit? What is the speed of a plane? Searchfor good French restaurants How much does the HisenseTV cost? Search for a good pizza place on Maps Search for flights from Helsinki to Tokyo What are the best selling refrigerators at Home Depot? What is the speed of a tiger? Install the Pandora app Open the SMS app What's the price of the Vizio TV? How much does a 2 bedroom apartment rent for in New York? What's the price of the Samsung TV? Find the nearest electronics store that's open today Check the settings for the Amazon Prime Video app Where can I buy a nice beach tote? What's the news in Paraguay? Search for a new foundation (skincare) product Who is the president of the United States? What time is it in Sydney? Search for good Italian restaurants Open the calendar and show me this week's events? How big is the universe? Search for flights from Mexico city to Sydney Search for flights from Helsinki to Seoul Search for top rated burger restaurants on Maps Install the Facebook app How big is the earth? What's the price of the Sony TV? Open a new private tab in Chrome What's on the menu at Denny's? How do I get to the nearest electronics store? How much does a 3 bedroom apartment rent for in Boston? What is the speed of sound? Open the calculator What's the price of the 1000-Watt EGO Power+ Snow Blower? Search for good Indian restaurants What's the latest news in space science? Install the Spotify app Open a new Chrome incognito window How much does a 2 bedroom apartment rent for in Chicago? What's a good restaurant in Philadelphia? What's the weather like in Chicago? What's a good restaurant in Portland? What's a good restaurant in San Francisco? Install the Weather app Search for flights from NYC to Paris Search for hotels in Las Vegas Play the latest video from the Wall Street Journal Search for flights from Zurich to Buenos aires What's the news in China? Install the Uber app Where can I buy a nice beach tent? Check the settings for the Google Play Books app What's the latest technology news? What's the news in Ecuador? Search for a new skincare product What's on my calendar for the rest of the week? Check the settings for the Google Chrome app Open the contacts Google the capital of Paraguay Search for flights from Mexico city to Boston Search for top rated seafood restaurants on Google Maps How much does a3 bedroom apartment rent for in Portland? What's the news about the US economy? Where can I buy a nice beach sandals? Search for a new eyeliner What's the latest video from GameXplain? Where can I buy a nice beach chair? What's the news about the US dollar? Play the new Katy Perry video on YouTube Open a new incognito window in Chrome Search for hotels in Sydney How big is the moon? What's on the menu at Taco Bell? What is the capital of France? Play the latest video from the Washington Post Search for the best pizza restaurants on Maps How do I get to the nearest McDonalds? Search for hotels in New York What's the news in the Bahamas? What's the latest video from GameSpot Reviews? What's the news in Singapore? Check my email Check the settings for the Spotify app How much does the new iPad cost on eBay? What's the weather like in Beijing? Turn on notifications for the Google Maps app Open the camera How do I get to the nearest Best Buy? What is the speed of a train? How do I get to the nearest Nordstrom? How big is the sun? Who is the president of France? What's the price of the 2x4x8 boards at Home Depot? What's the time in San Francisco? What's the weather like in Moscow? Check the settings for the Instagram app What's the weather like in Mexico City? What time is it in London? What's on the menu at In-N-Out? What's the news in Barbados? Open the calculator app What's the news in South Korea? What's the weather like in Rio de Janeiro? What is the speed of a bicycle? What time is it in Beijing? What's the news this afternoon? How much does a 2 bedroom apartment rent for in Seattle? Check the settings for the Lyft app What's a good restaurant near me? What's the price of the new iPhone on eBay? How much does a 2 bedroom apartment rent for in Washington DC? What's US dollar exchange rateagainst the Mexican Peso? What's the price of the Galaxy phone on eBay? What's the news in Suriname? How do I get to the nearest IKEA? How much does the new iPad cost? Open the Google play store app Check the settings for the Amazon Shopping app What's the news in the Philippines? What's a good restaurant in Miami? Search for hotels in San Francisco Set an alarm for 6am Open the settings How big is a dinosaur? Search for good Chinese restaurants What is the capital of Japan? What's the top post on reddit right now? Search for good Italian restaurants on Maps What's the news in Pakistan? What is the capital of Brazil? What's the news in Cambodia? Google the capital of Bolivia Search for a new blush What is the speed of light? What's on the menu at Domino's? What's the top post on reddit today? How much does the TCL TV cost? Google the capital of Uruguay Search for hotels in Buenos aires Search for flights from Sydney to Zurich What are the best selling refrigerators at Lowes? Search for hotels in Orlando What is the capital of Germany? Open a new window in the chrome app Open a new Chrome tab Search for flights from Barcelona to Mexico city What's a good restaurant in Atlanta? Google the capital of Chile Play the new Beyonce video on YouTube What's on the menu at IHOP? Search for flights from Buenos aires to Seoul Open a new tab in the chrome app How much does a 3 bedroom apartment rent for in Austin? What is the capital of India? How much does a 3 bedroom apartment rent for in Los Angeles? How do I get to the nearest Home Depot? Check the settings for the Google Play Movies app Search for hotels in NYC Set an alarm for 3pm Search for good Japanese restaurants What's the news in Brunei? Set an alarm for 11am Install the eBay app Google the capital of Canada Search for flights from Tokyo to NYC What's the price of the TCL TV? What's the weather like in San Francisco? How do I get to the nearest Starbucks? How much does a 3 bedroom apartment rent for in Atlanta? Check my gmail Play the new Ariana Grande video on YouTube How much does a 2 bedroom apartment rent for in Atlanta? What's the price of the new iPhone Find the nearest electronics store that's open Show me my notifications Search for flights from NYC to San Diego What's the weather like in Seoul? What's the news this morning? Google the capital of Peru What's a good restaurant in Denver? Google the capital of Mexico Search for top rated sushi restaurant What's the weather like in Tokyo? Search for top rated pizza restaurants on Maps Search for a new highlighter How do I get to the nearest JCPenney? What's the news in Thailand? What's the news this month? Set an alarm for 8pm What's the news this week? Search for a new hair product What's the news in Taiwan? How old is the earth? Check the settings for the Netflix app What's the weather like in Johannesburg? How do I get to the nearest Burger King? What is the capital of Argentina? How big is a giraffe? Open a new Chrome private tab What's the US dollar exchange rate against the Canadian Dollar? Play the new Justin Bieber video on YouTube How much does the Vizio TV cost? Set an alarm for 1pm How much does a 2 bedroom apartment rent for in Austin? What's on the menu at Papa Murphy's? Search for flights from Buenos aires to Helsinki What's the latest news in space technology? Find coffee shops on Maps Install the Wikipedia app What's the news in Nepal? Set an alarm for 2pm What's the news this evening? What's the weather like in Paris? What's the news in Uruguay? What's on the menu at Subway? How do I get to the nearest Sprint Store? What time is it in Berlin? What time is it? How big is a lion? Search for flights from Chicago to London Search for hotels in Tokyo What's the time in New York? Open the clock app Open the downloads What's the news in Puerto Rico? What time is it in Tokyo? What's the news in Bangladesh? What time is it in San Francisco? Search for hotels in London Search for flights from Tokyo to Seoul What's on the menu at Cheesecake Factory? Search for flights from Boston to Zurich What's the news in French Guiana? What's the speed of light? How much does the Samsung TV cost? Open a new Chrome window What's the news about the US? Open the music app Play the new Taylor Swift video on YouTube What's the latest news in planetary science? What's the news in Laos? Set an alarm for 8am Search for hotels in Seattle What's on the menu at Five Guys? Google the capital of Brazil What's the news in Guyana? What's a good restaurant in San Jose? How much does a 3 bedroom apartment rent for in San Francisco? Play the latest video from the Huffington Post Search for a new bronzer Search for hotels in Zurich Install the Google app What's US dollar exchange rate against the Chinese Yuan? Install the Starbucks app What's a good restaurant in Dallas? What's on Reddit this week Find a good burger place on Maps How much does a 2 bedroom apartment rent for in Los Angeles? Set an alarmfor 5pm What's the news in Peru? What is the capital of China? What's the news in Indonesia? How much does a 3 bedroom apartment rent for in Houston? How much does a 2 bedroom apartment rent for in Portland? What's the latest video from GameSpot Trailers? What's on Reddit Install the News app What's the weather like in Sydney? What's a good restaurant in Chicago? Search for hotels in Denver Turn off notifications for the Google Maps app Search for flights from Sydney to Buenos aires Check the settings for the Facebook app Go to Reddit What's on the menu at Chick-fil-A? What does the iPhone 8 look like on eBay? What's the price of the Galaxy phone? Search for the best burger restaurants on Maps Search for hotels in Miami How much does a 2 bedroom apartment rent for in Boston? Search for a new foundation on Sephora Google the capital of Colombia What's the news in Malaysia? What's on the menu at Olive Garden? What's the latest video from GameTrailers? What's the latest news in space? What's the weather like in Singapore? Search for flights from NYC to London Open the play store What's the weather like in Los Angeles? How much does a 3 bedroom apartment rent for in Philadelphia? Check the settings for the Google Photos app Open the calendar What's a goodrestaurant in Austin? Open a new private window in Chrome Find the nearest electronics store that's open now Install the Twitter app Open the contacts app What's the news in Vietnam? What's a good restaurant in Houston? What's the latest news in cosmology? Search for hotels in Boston Search for flights from San Francisco to Tokyo What is the speed of a skateboard? Search for a new nail polish What's the latest video from GameSpot? Where can I buy a nice beach hat? What's the news in theFalkland Islands? Search for a new lipstick on Sephora What does the iPhone 8 look like? Install the CNN app Play the latest video from the New York Times What is the capital of Mexico? Google the capital of Venezuela ================================================ FILE: digirl/environment/android/assets/task_set/general_train.txt ================================================ Check the settings for the Pandora app What's a good restaurant in Los Angeles? Show me some nice wallpapers for my computer Play the new Bruno Mars video on YouTube How do I get to the nearest Verizon Store? What's the latest video from GameSpot Reviews? How much does the Samsung TV cost? Check the settings for the YouTube app What's a good restaurant in Denver? Search for 5 star sushi restaurants on Maps What's a good restaurant in New Jersey? Check the settings for the Facebook app Where can I buy a nice beach hat? Play the latest video from the Wall Street Journal Google the capital of Colombia Play the new Taylor Swift video on YouTube What's on the menu at Cheesecake Factory? How do I get to the nearest grocery store? What's US dollar exchange rate against the Chinese Yuan? What's the weather like in Hong Kong? What is the price of a 12' ladder at Home Depot? What's the news this week? How do I get to the nearest IKEA? Check the settings for the Google Play Movies app What's the news in Malaysia? What's the weather like in Johannesburg? Search for good Indian restaurants Search for flights from NYC to Mexico city How big is the sun? Open Reddit What is the capital of Germany? Search for flights from NYC to San Francisco Search for flights from Sydney to Buenos aires What's a good restaurant near me? What's the latest video from GameSpot Trailers? What's a good restaurant in Seattle? What's the latest tech news? What's US dollar exchange rate against the South Korean Won? Install the News app Check my email Search for flights from Zurich to Helsinki Search for top rated sushi restaurant Play the latest video from the New York Times What is the average speed of a car? What's the price of the Galaxy phone on eBay? Find a nice sofa on eBay What is the capital of China? Install the Pandora app How much does a 2 bedroom apartment rent for in Washington DC? Search for a new lipgloss How big is a blue whale? Check the settings for the Spotify app Search for flights from Barcelona to Mexico city How do I get to the nearest Apple Store? What's US dollar exchange rate against the British Pound? Search for flights from Tokyo to Mexico city How much does a 3 bedroom apartment rent for in Atlanta? Open a new incognito window in the chrome app Check the settings for the Google Play Store app Install the Calendar app What's the latest video from GameXplain? Search for top rated sushi restaurants on Maps What's the news in Indonesia? Set an alarm for 3pm Show me some nice wallpapers for my phone Search for hotels in Las Vegas What's the price of the TCL TV? How big is the earth? Google the capital of Uruguay Open the Google play store app What's the news in Myanmar? How far is the moon? What's the news today? How much does a 3 bedroom apartment rent for in Chicago? What is the speed of a skateboard? What's the price of the new iPhone on eBay? What's the top post on reddit today? What's the news in Puerto Rico? Open the play store Play the latest video from the Washington Post Search for flights from Helsinki to Seoul Search for flights from Seoul to Mexico city What's the weather like in Moscow? How do I get to the nearest JCPenney? What's on the menu at Domino's? What's a good restaurant in Atlanta? Search for good Thai restaurants Search for a new blush How much does a 3 bedroom apartment rent for in Seattle? Install the Twitter app What's the news this afternoon? Search for a new foundation (skincare) product Open a new Chrome incognito tab Search for good Italian restaurants on Maps Play the new Beyonce video on YouTube Search for flights from NYC to Tokyo Find the nearest electronics store that's open now What's the weather like in Seoul? Show me my notifications Install the Instagram app What's the US dollar exchange rate against the Brazilian Real? What's a good restaurant in San Francisco? How do I get to the nearest Home Depot? Install the CNN app Search for a new eyeshadow Find coffee shops on Maps Search for top rated seafood restaurants on Google Maps Search for the best Mexican restaurants Set an alarm for 2pm Open a new incognito window in Chrome What are the best selling refrigerators at Home Depot? What's the news in Singapore? How much does a 2 bedroom apartment rent for in Houston? Search for hotels in Paris Install the ESPN app Search for good Chinese restaurants What's the latest news in space science? What's the time in New York? What's on the menu at Red Lobster? What time is it in Moscow? Google the capital of Panama Show me some nice wallpapers for my laptop What's a good restaurant in Boston? What time is it in Beijing? What time is it in Sydney? How much does a 2 bedroom apartment rent for in Denver? Search for flights from Tokyo to NYC Search for the best burger restaurants on Maps Play the new Ariana Grande video on YouTube How much does the Vizio TV cost? What's the price of the new iPhone Search for good pizza restaurants on Maps How much does a 2 bedroom apartment rent for in Chicago? What's the US dollar exchange rate against the Australian Dollar? Search for a new eyeliner How do I get to the nearest AT&T Store? What's the news in Paraguay? What's the latest news in tech? Search for flights from Zurich to Buenos aires Search for hotels in San Diego Set an alarm for 10am Search for good Italian restaurants Open a new Chrome incognito window Open the contacts What's the weather like in Mexico City? Find the nearest electronics store that's open tomorrow Set an alarm for 8pm What's the news in Suriname? Search for a new eyeshadow on Sephora What's the price of the 1000-Watt EGO Power+ Snow Blower? Where can I buy a nice beach bag? Open a new window in the chrome app What's the news in Trinidad and Tobago? What is the speed of a jet? Check the settings for the Google Photos app What's the news this weekend? How do I get to the nearest Best Buy? What's the news in the Philippines? What's the weather like inToronto? What's the latest video from GameSpot eSports? What is the capital of Spain? What is the capital of Japan? What's a goodrestaurant in Austin? Search for flights from NYC to Barcelona Check my email inbox Search for hotels in Miami What's on the menu at Denny's? What is the capital of India? What's the news in Cambodia? What's on the menu at Panera? Search for hotels in Buenos aires What is the capital of Brazil? Google the capital of Peru Search for flights from Buenos aires to Tokyo Search for flights from NYC to London What's the top post on reddit right now? What time is it? Toggle notifications for the Google Photos app What's on the menu at Chipotle? Search for flights from Helsinki to Tokyo Open the calendar and show me this week's events? What's the latest news in space exploration? What's the latest video from IGN? What's the latest news in cosmology? What's the news in South Korea? What's on the menu at Chick-fil-A? Search for flights from Mexico city to Seattle Open a new private window in Chrome Play the new Ed Sheeran video on YouTube Open a new incognito tab in Chrome What's the latest news in astrophysics? What is the speed of sound? Search for good BBQ restaurants Search for hotels in Los Angeles What time is it in San Francisco? Check the settings for the Amazon Prime Video app Play the new Justin Bieber video on YouTube What is the capital of France? Search for a new perfume What's the US dollar exchange rate against the Euro? How big is a giraffe? What's on the menu at Subway? What's the latest technology news? What is the capital of Switzerland? What's the news in Venezuela? Search for a new blush on Sephora What's the news in Bolivia? Search for hotels in San Francisco Check the settings for the Google Play Books app Searchfor good French restaurants What's the news about the US president? What's the news in Pakistan? What's the news in Argentina? Search for good Greek restaurants How do I get to the nearest Burger King? What's the news in Taiwan? What's a good restaurant in Sacramento? Where can I buy a nice beach sandals? What time is it in London? What's the news this evening? How do I get to the nearest Starbucks? How much does a 3 bedroom apartment rent for in Denver? Search for flights from Mexico city to Boston What's the latest video from Game Informer? Search for hotels in New York Check the settings for the Uber app What is the capital of England? Search for hotels in Philadelphia Where can I buy a nice beach blanket? Open the settings What's the latest video from GameSpot? What's a good restaurant in Miami? How much does a 3 bedroom apartment rent for in Austin? Search for flights from San Francisco to Tokyo How do I get to the nearest Subway? Where can I buy a nice beach towel? What's US dollar exchange rate against the Indian Rupee? What's on the menu at Starbucks? What's a good restaurant in New York? Toggle notifications for the Google Maps app Search for hotels in Atlanta Open the calendar app What's the news in Chile? Install the eBay app Where can I buy a nice beach chair? Google the capital of Chile What's a good restaurant in Chicago? Install the Amazon app Search for hotels in Sydney What's the time in San Francisco? Open the downloads What's a good restaurant in Phoenix? Search for flights from Chicago to London What's the weather like in Beijing? What's the news about the US stock market? Play the new Maroon 5 video on YouTube Search for flights from NYC to San Diego What's the news in the Bahamas? How much does a 2 bedroom apartment rent for in Atlanta? What's the price of the Vizio TV? Open the files app Search for good Japanese restaurants Install the Lyft app What's the news about the US economy? What's the news in Barbados? Check the settings for the Lyft app Find the nearest grocery store What's the news in Brazil? What's the news in Colombia? How do I get to the nearest Target? What's on the menu at Papa Murphy's? What's the news in Nepal? Open a new tab in Chrome How much does a 2x4x8 board cost at Lowes? What's on the menu at Five Guys? How big is a tiger? What's on the menu at McDonalds? What's the news in Japan? What's the weather like in Los Angeles? Checkthe settings for the Amazon Prime Music app Search for hotels in Mexico city Play the new Selena Gomez video on YouTube Install the Starbucks app What's the speed of light? Set an alarm for 1pm How much does a 2 bedroom apartment rent for in New York? What's US dollar exchange rate against the Japanese Yen? Open a new private tab in Chrome Search for flights from San Diego to Seattle What's the news in Vietnam? What's the weather like in Tokyo? Search for a new highlighter How much does the HisenseTV cost? Check the settings for the Amazon Shopping app How much does a 3 bedroom apartment rent for in Dallas? What's the news in theFalkland Islands? How do I get to the nearest T-Mobile Store? Search for flights from Buenos aires to Helsinki How much does a 2 bedroom apartment rent for in Boston? Open a new private window in the chrome app What's on my calendar for the rest of the week? WHat are the new products by Samsung on eBay? What's the price of the 2x4x8 boards at Home Depot? Search for the best pizza restaurants on Maps What's the latest news in astronomy? Search for flights from NYC to Chicago Search for hotels in Austin Search for top rated pizza restaurants on Maps How big is the moon? What's the latest video from GameSpot News? What's the weather like in London? Check the settings for the Netflix app What's on the menu at In-N-Out? Set an alarm for 6am What time is it in New York? What's the price of the EGO 14-in 56-Volt Brushless Cordless Chainsaw? Where can I buy a nice beach tote? Search for flights from Mexico city to Sydney Play the new Demi Lovato video on YouTube How do I get to the nearest McDonalds? What's the news in Ecuador? Set an alarm for 7pm What are the best selling refrigerators at Lowes? Search for flights from Buenos aires to Seoul Search for flights from London to Paris Install the Google app Check my gmail Open the calendar How big is a lion? Search for a new skincare product Search for flights from NYC to Sydney What's the latest video from Gameranx? What is the speed of a plane? What's on the menu at Pizza Hut? Search for hotels in Denver Search for hotels in NYC What's the latest news in space technology? Who is the prime minister of Canada? What's on the menu at Burger King? Install the Uber app Open the calculator Search for flights from Seoul to Barcelona Check the settings for the Twitter app What's the weather like in Rio de Janeiro? Open the music app What's the news in Bangladesh? How much does a 2 bedroom apartment rent for in Miami? How much does a 2 bedroom apartment rent for in Austin? Google the capital of Paraguay Where can I buy a nice beach cooler? Open a new Chrome tab Set an alarm for 7am Search for flights from NYC to Paris What's the news this morning? How much does a 3 bedroom apartment rent for in Boston? Play the latest video from the BBC Set an alarmfor 5pm What's on the menu at Olive Garden? Search for top rated burger restaurants on Google Maps How do I get to the nearest Sprint Store? How do I get to the nearest Chipotle? Search for vegetarian restaurants on Maps What's the price of the Galaxy phone? Google the capital of the United States What's the weather like in Delhi? Find a good burger place on Maps What are the new products by Samsung? How much does a 3 bedroom apartment rent for in Philadelphia? What's the news in India? Where can I buy a nice beach umbrella? Where can I buy a nice beach tent? What's a good restaurant in Portland? Search for a good pizza place on Maps What's the price of the Hisense TV? Search for flights from NYC to Buenos aires What's the US dollar exchange rateagainst the Swiss Franc? What is the capital of Mexico? How do I get to the nearest Lowe's? Install the Yahoo app What does the iPhone 8 look like? Search for hotels in Boston What's the weather like in Paris? Check the settings for the Google Play Music app What's a good restaurant in San Diego? Search for a new mascara on Sephora What is the speed of a rocket? What is the price of a 12' ladder at Lowes? What's the weather like in Sydney? What is the speed of light? What's the price of the Samsung TV? What's the news in Thailand? What's on my calendar for the rest of the month? What's the news in Guyana? Google the capital of Brazil What's the latest news in planetary science? Search for flights from Boston to Zurich What is the speed of a tiger? What is the speed of a train? Check the settings for the Instagram app Set an alarm for 4pm What is the capital of Canada? Google the capital of Ecuador Google the capital of Mexico Find the nearest electronics store that's open Set an alarm for 8am What's the weather like in Singapore? Search for flights from Barcelona to Boston Search for a new hair product Open a new Chrome private window Search for flights from Mexico city to Zurich Open the gallery What is the capital of the United Kingdom? Search for hotels in Zurich How do I get to the nearest Walmart? What's on the menu at Papa John's? Search for a new nail polish What's the news in Peru? What's a good restaurant in Dallas? Go to Reddit What's US dollar exchange rateagainst the Mexican Peso? Google the capital of Venezuela How much does a 3 bedroom apartment rent for in Washington DC? What's the top post on reddit? Search for flights from Boston to Sydney Google the capital of Bolivia What's a good restaurant in San Jose? What time is it in Berlin? What's the news? Open the calculator app Open a new tab in the chrome app What time is it in Los Angeles? How much does the new iPad cost? How do I get to the nearest Macy's? What's on the menu at IHOP? How much does a 3 bedroom apartment rent for in Los Angeles? What's the latest news in space? Search for a new mascara What's on Reddit today Set an alarm for 6pm Search for flights from Tokyo to Seoul Open the camera Check the settings for the Google Maps app What's the news in China? How much does a 3 bedroom apartment rent for in San Francisco? What's the news about the US dollar? Play the latest video from the Huffington Post Open a new window in Chrome Check my gmail inbox Turn off notifications for the Google Maps app What's on Reddit Install the Weather app Install the Wikipedia app Install the Yelp app What's the US dollar exchange rate against the Canadian Dollar? What's a good restaurant in Houston? What is the capital of Italy? How much does the new iPad cost on eBay? What's a good restaurant in Philadelphia? How much does a 3 bedroom apartment rent for in Houston? What's the news in the Dominican Republic? How much does the LG TV cost? Search for hotels in Orlando Search for a new foundation on Sephora Play some music on YouTube How much does a 2 bedroom apartment rent for in Los Angeles? Open a new Chrome window How big is the universe? What's the news about the US dollar exchange rate? What time is it in Tokyo? How much does a3 bedroom apartment rent for in Portland? Search for a new lipstick on Sephora What's the weather like in San Francisco? What's the news about the US? Open the SMS app Search for hotels in Chicago What's on the menu at Taco Bell? Open the clock Search for hotels in Washington DC What's the news in Laos? Search for hotels in Seattle What is the capital of Argentina? What's the weather like in Chicago? Install the Reddit app Check the settings for the Amazon Alexa app What's the weather like in New York? Search for hotels in Tokyo Check the settings for the Google Chrome app Turn on notifications for the Google Maps app Open the clock app Install the Spotify app Set an alarm for 11am Set an alarm for 12pm How do I get to the nearest Nordstrom? Search for hotels in London Who is the president of France? How much does a 3 bedroom apartment rent for in Miami? Show me some nice wallpapers for my desktop How much does the Sony TV cost? How much does the TCL TV cost? What's on Reddit this week What's the news in French Guiana? Find the nearest electronics store that's open today What's the news in Brunei? Search for top rated burger restaurants on Maps Show me some nice wallpapers for my tablet Who is the president of the United States? What's the price of the Sony TV? Install the Facebook app What's the time? What's a good restaurant in Las Vegas? What is the capital of Sweden? Google the capital of Argentina How much does a 2 bedroom apartment rent for in Philadelphia? How do I get to the nearest electronics store? How much does a 3 bedroom apartment rent for in New York? Set an alarm for 9am Check the settings for the Amazon Music app What's the news in Jamaica? Open a new incognito tab in the chrome app Search for good Korean restaurants What's the weather like in Mumbai? What's the latest video from GameTrailers? What time is it in Paris? How much does a 2 bedroom apartment rent for in Portland? What does the iPhone 8 look like on eBay? Play the new Drake video on YouTube What is the speed of a cheetah? Open the contacts app Search for flights from Sydney to Zurich What's the news in Uruguay? Search for flights from Sydney to Helsinki Search for a new bronzer What's the news this month? How big is a dinosaur? Who is the prime minister of the United Kingdom? How much does a 2 bedroom apartment rent for in Seattle? What's the news in Sri Lanka? Open a new Chrome private tab How old is the earth? What's the price of the LG TV? Play the new Katy Perry video on YouTube How much does a 2 bedroom apartment rent for in San Francisco? What is the capital of Norway? Google the capital of Canada What is the speed of a bicycle? ================================================ FILE: digirl/environment/android/assets/task_set/webshop_test.txt ================================================ Go to ebay.com, search for 'apple airpods', and select the first entry Go to ebay.com Go to ebay.com, search for 'apple airpods' Go to costco.com, search for 'razer blade', and select the first entry Go to costco.com Go to costco.com, search for 'razer blade' Go to costco.com, search for 'asus zenbook', and select the first entry Go to costco.com Go to costco.com, search for 'asus zenbook' Go to walmart.com, search for 'logitech g933', and select the first entry Go to walmart.com Go to walmart.com, search for 'logitech g933' Go to ebay.com, search for 'corsair k70', and select the first entry Go to ebay.com Go to ebay.com, search for 'corsair k70' Go to newegg.com, search for 'alienware area 51', and select the first entry Go to newegg.com Go to newegg.com, search for 'alienware area 51' Go to walmart.com, search for 'macbook pro 15 inch', and select the first entry Go to walmart.com Go to walmart.com, search for 'macbook pro 15 inch' Go to newegg.com, search for 'duracell triple a', and select the first entry Go to newegg.com Go to newegg.com, search for 'duracell triple a' Go to bestbuy.com, search for 'energizer triple a', and select the first entry Go to bestbuy.com Go to bestbuy.com, search for 'energizer triple a' Go to ebay.com, search for 'duracell triple a', and select the first entry Go to ebay.com Go to ebay.com, search for 'duracell triple a' Go to costco.com, search for 'usb-c to usb-a', and select the first entry Go to costco.com Go to costco.com, search for 'usb-c to usb-a' Go to newegg.com, search for 'razer blade', and select the first entry Go to newegg.com Go to newegg.com, search for 'razer blade' Go to costco.com, search for 'razer blade', and select the first entry Go to costco.com Go to costco.com, search for 'razer blade' Go to bestbuy.com, search for 'logitech g933', and select the first entry Go to bestbuy.com Go to bestbuy.com, search for 'logitech g933' Go to walmart.com, search for 'razer kraken', and select the first entry Go to walmart.com Go to walmart.com, search for 'razer kraken' Go to walmart.com, search for 'macbook pro', and select the first entry Go to walmart.com Go to walmart.com, search for 'macbook pro' Go to bestbuy.com, search for 'rayovac triple a', and select the first entry Go to bestbuy.com Go to bestbuy.com, search for 'rayovac triple a' Go to ebay.com, search for 'sony triple a', and select the first entry Go to ebay.com Go to ebay.com, search for 'sony triple a' Go to ebay.com, search for 'jbl flip 4', and select the first entry Go to ebay.com Go to ebay.com, search for 'jbl flip 4' Go to ebay.com, search for 'logitech g pro', and select the first entry Go to ebay.com Go to ebay.com, search for 'logitech g pro' Go to costco.com, search for 'acer predator', and select the first entry Go to costco.com Go to costco.com, search for 'acer predator' Go to newegg.com, search for 'usb-a', and select the first entry Go to newegg.com Go to newegg.com, search for 'usb-a' Go to newegg.com, search for 'razer thresher', and select the first entry Go to newegg.com Go to newegg.com, search for 'razer thresher' Go to costco.com, search for 'acer nitro', and select the first entry Go to costco.com Go to costco.com, search for 'acer nitro' Go to bestbuy.com, search for 'razer thresher', and select the first entry Go to bestbuy.com Go to bestbuy.com, search for 'razer thresher' Go to walmart.com, search for 'dell xps', and select the first entry Go to walmart.com Go to walmart.com, search for 'dell xps' Go to bestbuy.com, search for 'razer nari', and select the first entry Go to bestbuy.com Go to bestbuy.com, search for 'razer nari' Go to ebay.com, search for 'macbook air', and select the first entry Go to ebay.com Go to ebay.com, search for 'macbook air' Go to costco.com, search for 'macbook pro', and select the first entry Go to costco.com Go to costco.com, search for 'macbook pro' Go to costco.com, search for 'razer blackwidow', and select the first entry Go to costco.com Go to costco.com, search for 'razer blackwidow' Go to ebay.com, search for 'lenovo thinkpad', and select the first entry Go to ebay.com Go to ebay.com, search for 'lenovo thinkpad' Go to ebay.com, search for 'razer blade', and select the first entry Go to ebay.com Go to ebay.com, search for 'razer blade' Go to newegg.com, search for 'acer predator', and select the first entry Go to newegg.com Go to newegg.com, search for 'acer predator' Go to walmart.com, search for 'logitech g502', and select the first entry Go to walmart.com Go to walmart.com, search for 'logitech g502' Go to walmart.com, search for 'usb-a', and select the first entry Go to walmart.com Go to walmart.com, search for 'usb-a' Go to walmart.com, search for 'panasonic triple a', and select the first entry Go to walmart.com Go to walmart.com, search for 'panasonic triple a' Go to costco.com, search for 'razer thresher', and select the first entry Go to costco.com Go to costco.com, search for 'razer thresher' Go to newegg.com, search for 'razer nari', and select the first entry Go to newegg.com Go to newegg.com, search for 'razer nari' Go to costco.com, search for 'logitech g910', and select the first entry Go to costco.com Go to costco.com, search for 'logitech g910' Go to walmart.com, search for 'dell xps', and select the first entry Go to walmart.com Go to walmart.com, search for 'dell xps' Go to costco.com, search for 'bose soundlink', and select the first entry Go to costco.com Go to costco.com, search for 'bose soundlink' Go to costco.com, search for 'duracell triple a', and select the first entry Go to costco.com Go to costco.com, search for 'duracell triple a' Go to bestbuy.com, search for 'asus rog', and select the first entry Go to bestbuy.com Go to bestbuy.com, search for 'asus rog' Go to walmart.com, search for 'dell xps', and select the first entry Go to walmart.com Go to walmart.com, search for 'dell xps' Go to costco.com, search for 'duracell triple a', and select the first entry Go to costco.com Go to costco.com, search for 'duracell triple a' Go to ebay.com, search for 'alienware area 51', and select the first entry Go to ebay.com Go to ebay.com, search for 'alienware area 51' Go to newegg.com, search for 'macbook air', and select the first entry Go to newegg.com Go to newegg.com, search for 'macbook air' Go to costco.com, search for 'macbook pro 13 inch', and select the first entry Go to costco.com Go to costco.com, search for 'macbook pro 13 inch' Go to bestbuy.com, search for 'logitech g910', and select the first entry Go to bestbuy.com Go to bestbuy.com, search for 'logitech g910' Go to ebay.com, search for 'sony triple a', and select the first entry Go to ebay.com Go to ebay.com, search for 'sony triple a' Go to walmart.com, search for 'razer thresher', and select the first entry Go to walmart.com Go to walmart.com, search for 'razer thresher' Go to walmart.com, search for 'apple airpods', and select the first entry Go to walmart.com Go to walmart.com, search for 'apple airpods' Go to costco.com, search for 'usb-b', and select the first entry Go to costco.com Go to costco.com, search for 'usb-b' Go to ebay.com, search for 'logitech g502', and select the first entry Go to ebay.com Go to ebay.com, search for 'logitech g502' Go to costco.com, search for 'acer nitro', and select the first entry Go to costco.com Go to costco.com, search for 'acer nitro' Go to newegg.com, search for 'usb-a', and select the first entry Go to newegg.com Go to newegg.com, search for 'usb-a' Go to bestbuy.com, search for 'beats solo 3', and select the first entry Go to bestbuy.com Go to bestbuy.com, search for 'beats solo 3' Go to walmart.com, search for 'dell alienware', and select the first entry Go to walmart.com Go to walmart.com, search for 'dell alienware' Go to walmart.com, search for 'alienware area 51', and select the first entry Go to walmart.com Go to walmart.com, search for 'alienware area 51' Go to newegg.com, search for 'macbook pro 15 inch', and select the first entry Go to newegg.com Go to newegg.com, search for 'macbook pro 15 inch' Go to costco.com, search for 'macbook air', and select the first entry Go to costco.com Go to costco.com, search for 'macbook air' Go to newegg.com, search for 'macbook', and select the first entry Go to newegg.com Go to newegg.com, search for 'macbook' Go to costco.com, search for 'panasonic triple a', and select the first entry Go to costco.com Go to costco.com, search for 'panasonic triple a' Go to bestbuy.com, search for 'usb-a', and select the first entry Go to bestbuy.com Go to bestbuy.com, search for 'usb-a' Go to bestbuy.com, search for 'bestbuy', and select the first entry Go to bestbuy.com Go to bestbuy.com, search for 'bestbuy' Go to costco.com, search for 'logitech g pro', and select the first entry Go to costco.com Go to costco.com, search for 'logitech g pro' Go to bestbuy.com, search for 'apple airpods pro', and select the first entry Go to bestbuy.com Go to bestbuy.com, search for 'apple airpods pro' Go to bestbuy.com, search for 'usb-a', and select the first entry Go to bestbuy.com Go to bestbuy.com, search for 'usb-a' Go to newegg.com, search for 'razer kraken', and select the first entry Go to newegg.com Go to newegg.com, search for 'razer kraken' Go to newegg.com, search for 'macbook pro', and select the first entry Go to newegg.com Go to newegg.com, search for 'macbook pro' Go to newegg.com, search for 'usb-c', and select the first entry Go to newegg.com Go to newegg.com, search for 'usb-c' Go to ebay.com, search for 'bose soundlink mini', and select the first entry Go to ebay.com Go to ebay.com, search for 'bose soundlink mini' Go to costco.com, search for 'logitech g933', and select the first entry Go to costco.com Go to costco.com, search for 'logitech g933' Go to costco.com, search for 'macbook air', and select the first entry Go to costco.com Go to costco.com, search for 'macbook air' Go to bestbuy.com, search for 'lg ultragear', and select the first entry Go to bestbuy.com Go to bestbuy.com, search for 'lg ultragear' Go to bestbuy.com, search for 'razer kraken', and select the first entry Go to bestbuy.com Go to bestbuy.com, search for 'razer kraken' Go to walmart.com, search for 'bose soundlink', and select the first entry Go to walmart.com Go to walmart.com, search for 'bose soundlink' Go to bestbuy.com, search for 'logitech g pro', and select the first entry Go to bestbuy.com Go to bestbuy.com, search for 'logitech g pro' Go to bestbuy.com, search for 'macbook pro', and select the first entry Go to bestbuy.com Go to bestbuy.com, search for 'macbook pro' Go to newegg.com, search for 'bose soundlink', and select the first entry Go to newegg.com Go to newegg.com, search for 'bose soundlink' Go to bestbuy.com, search for 'usb-c to usb-a', and select the first entry Go to bestbuy.com Go to bestbuy.com, search for 'usb-c to usb-a' Go to newegg.com, search for 'alienware aurora', and select the first entry Go to newegg.com Go to newegg.com, search for 'alienware aurora' Go to costco.com, search for 'usb-c', and select the first entry Go to costco.com Go to costco.com, search for 'usb-c' Go to walmart.com, search for 'razer thresher', and select the first entry Go to walmart.com Go to walmart.com, search for 'razer thresher' Go to ebay.com, search for 'usb-c to usb-a', and select the first entry Go to ebay.com Go to ebay.com, search for 'usb-c to usb-a' Go to bestbuy.com, search for 'macbook air', and select the first entry Go to bestbuy.com Go to bestbuy.com, search for 'macbook air' Go to bestbuy.com, search for 'beats solo 3', and select the first entry Go to bestbuy.com Go to bestbuy.com, search for 'beats solo 3' Go to bestbuy.com, search for 'usb-c to usb-b', and select the first entry Go to bestbuy.com Go to bestbuy.com, search for 'usb-c to usb-b' Go to newegg.com, search for 'rayovac triple a', and select the first entry Go to newegg.com Go to newegg.com, search for 'rayovac triple a' Go to bestbuy.com, search for 'logitech g502', and select the first entry Go to bestbuy.com Go to bestbuy.com, search for 'logitech g502' Go to walmart.com, search for 'duracell triple a', and select the first entry Go to walmart.com Go to walmart.com, search for 'duracell triple a' Go to newegg.com, search for 'razer blade', and select the first entry Go to newegg.com Go to newegg.com, search for 'razer blade' Go to walmart.com, search for 'logitech g pro', and select the first entry Go to walmart.com Go to walmart.com, search for 'logitech g pro' Go to bestbuy.com, search for 'usb-c to usb-b', and select the first entry Go to bestbuy.com Go to bestbuy.com, search for 'usb-c to usb-b' Go to costco.com, search for 'usb-c to usb-a', and select the first entry Go to costco.com Go to costco.com, search for 'usb-c to usb-a' Go to bestbuy.com, search for 'logitech g910', and select the first entry Go to bestbuy.com Go to bestbuy.com, search for 'logitech g910' Go to bestbuy.com, search for 'apple airpods', and select the first entry Go to bestbuy.com Go to bestbuy.com, search for 'apple airpods' Go to costco.com, search for 'logitech g910', and select the first entry Go to costco.com Go to costco.com, search for 'logitech g910' Go to ebay.com, search for 'jbl charge 4', and select the first entry Go to ebay.com Go to ebay.com, search for 'jbl charge 4' Go to ebay.com, search for 'energizer triple a', and select the first entry Go to ebay.com Go to ebay.com, search for 'energizer triple a' Go to newegg.com, search for 'asus rog', and select the first entry Go to newegg.com Go to newegg.com, search for 'asus rog' Go to walmart.com, search for 'dell alienware', and select the first entry Go to walmart.com Go to walmart.com, search for 'dell alienware' Go to bestbuy.com, search for 'bose quietcomfort 35', and select the first entry Go to bestbuy.com Go to bestbuy.com, search for 'bose quietcomfort 35' Go to costco.com, search for 'macbook pro', and select the first entry Go to costco.com Go to costco.com, search for 'macbook pro' Go to costco.com, search for 'macbook pro 13 inch', and select the first entry Go to costco.com Go to costco.com, search for 'macbook pro 13 inch' Go to newegg.com, search for 'razer thresher', and select the first entry Go to newegg.com Go to newegg.com, search for 'razer thresher' Go to ebay.com, search for 'macbook air', and select the first entry Go to ebay.com Go to ebay.com, search for 'macbook air' Go to costco.com, search for 'soundlink mini', and select the first entry Go to costco.com Go to costco.com, search for 'soundlink mini' Go to newegg.com, search for 'razer blackwidow', and select the first entry Go to newegg.com Go to newegg.com, search for 'razer blackwidow' Go to walmart.com, search for 'macbook pro', and select the first entry Go to walmart.com Go to walmart.com, search for 'macbook pro' Go to walmart.com, search for 'razer nari', and select the first entry Go to walmart.com Go to walmart.com, search for 'razer nari' Go to bestbuy.com, search for 'logitech g910', and select the first entry Go to bestbuy.com Go to bestbuy.com, search for 'logitech g910' Go to ebay.com, search for 'alienware area 51', and select the first entry Go to ebay.com Go to ebay.com, search for 'alienware area 51' Go to bestbuy.com, search for 'usb-a to usb-b', and select the first entry Go to bestbuy.com Go to bestbuy.com, search for 'usb-a to usb-b' Go to walmart.com, search for 'bose soundsport free', and select the first entry Go to walmart.com Go to walmart.com, search for 'bose soundsport free' Go to ebay.com, search for 'dell xps', and select the first entry Go to ebay.com Go to ebay.com, search for 'dell xps' Go to walmart.com, search for 'razer kraken', and select the first entry Go to walmart.com Go to walmart.com, search for 'razer kraken' Go to ebay.com, search for 'bose soundsport free', and select the first entry Go to ebay.com Go to ebay.com, search for 'bose soundsport free' Go to newegg.com, search for 'usb-b', and select the first entry Go to newegg.com Go to newegg.com, search for 'usb-b' Go to newegg.com, search for 'razer blade', and select the first entry Go to newegg.com Go to newegg.com, search for 'razer blade' ================================================ FILE: digirl/environment/android/assets/task_set/webshop_train.txt ================================================ Go to newegg.com Go to newegg.com, search for "macbook air" Go to newegg.com, search for "macbook air", and select the first entry Go to bestbuy.com Go to bestbuy.com, search for "logitech g910" Go to bestbuy.com, search for "logitech g910", and select the first entry Go to bestbuy.com Go to bestbuy.com, search for "logitech g910" Go to bestbuy.com, search for "logitech g910", and select the first entry Go to costco.com Go to costco.com, search for "bose soundsport free" Go to costco.com, search for "bose soundsport free", and select the first entry Go to walmart.com Go to walmart.com, search for "logitech g910" Go to walmart.com, search for "logitech g910", and select the first entry Go to ebay.com Go to ebay.com, search for "asus zenbook" Go to ebay.com, search for "asus zenbook", and select the first entry Go to newegg.com Go to newegg.com, search for "rayovac triple a" Go to newegg.com, search for "rayovac triple a", and select the first entry Go to ebay.com Go to ebay.com, search for "razer blade" Go to ebay.com, search for "razer blade", and select the first entry Go to costco.com Go to costco.com, search for "razer blackwidow" Go to costco.com, search for "razer blackwidow", and select the first entry Go to costco.com Go to costco.com, search for "beats solo 3" Go to costco.com, search for "beats solo 3", and select the first entry Go to costco.com Go to costco.com, search for "usb-c to usb-b" Go to costco.com, search for "usb-c to usb-b", and select the first entry Go to costco.com Go to costco.com, search for "bose soundsport free" Go to costco.com, search for "bose soundsport free", and select the first entry Go to costco.com Go to costco.com, search for "bose soundlink mini" Go to costco.com, search for "bose soundlink mini", and select the first entry Go to bestbuy.com Go to bestbuy.com, search for "bose quietcomfort 35" Go to bestbuy.com, search for "bose quietcomfort 35", and select the first entry Go to walmart.com Go to walmart.com, search for "usb-a" Go to walmart.com, search for "usb-a", and select the first entry Go to walmart.com Go to walmart.com, search for "razer blade" Go to walmart.com, search for "razer blade", and select the first entry Go to costco.com Go to costco.com, search for "logitech g pro" Go to costco.com, search for "logitech g pro", and select the first entry Go to ebay.com Go to ebay.com, search for "lg ultragear" Go to ebay.com, search for "lg ultragear", and select the first entry Go to ebay.com Go to ebay.com, search for "razer deathadder" Go to ebay.com, search for "razer deathadder", and select the first entry Go to walmart.com Go to walmart.com, search for "razer kraken" Go to walmart.com, search for "razer kraken", and select the first entry Go to newegg.com Go to newegg.com, search for "asus zenbook" Go to newegg.com, search for "asus zenbook", and select the first entry Go to newegg.com Go to newegg.com, search for "bose soundlink mini" Go to newegg.com, search for "bose soundlink mini", and select the first entry Go to bestbuy.com Go to bestbuy.com, search for "dell xps" Go to bestbuy.com, search for "dell xps", and select the first entry Go to newegg.com Go to newegg.com, search for "alienware aurora" Go to newegg.com, search for "alienware aurora", and select the first entry Go to walmart.com Go to walmart.com, search for "acer predator" Go to walmart.com, search for "acer predator", and select the first entry Go to walmart.com Go to walmart.com, search for "macbook air" Go to walmart.com, search for "macbook air", and select the first entry Go to bestbuy.com Go to bestbuy.com, search for "macbook" Go to bestbuy.com, search for "macbook", and select the first entry Go to walmart.com Go to walmart.com, search for "bose soundlink" Go to walmart.com, search for "bose soundlink", and select the first entry Go to newegg.com Go to newegg.com, search for "macbook pro" Go to newegg.com, search for "macbook pro", and select the first entry Go to walmart.com Go to walmart.com, search for "dell alienware" Go to walmart.com, search for "dell alienware", and select the first entry Go to costco.com Go to costco.com, search for "logitech g pro" Go to costco.com, search for "logitech g pro", and select the first entry Go to ebay.com Go to ebay.com, search for "lenovo thinkpad" Go to ebay.com, search for "lenovo thinkpad", and select the first entry Go to ebay.com Go to ebay.com, search for "acer predator" Go to ebay.com, search for "acer predator", and select the first entry Go to costco.com Go to costco.com, search for "corsair k70" Go to costco.com, search for "corsair k70", and select the first entry Go to costco.com Go to costco.com, search for "sony triple a" Go to costco.com, search for "sony triple a", and select the first entry Go to newegg.com Go to newegg.com, search for "razer thresher" Go to newegg.com, search for "razer thresher", and select the first entry Go to costco.com Go to costco.com, search for "macbook pro 13 inch" Go to costco.com, search for "macbook pro 13 inch", and select the first entry Go to bestbuy.com Go to bestbuy.com, search for "razer kraken" Go to bestbuy.com, search for "razer kraken", and select the first entry Go to newegg.com Go to newegg.com, search for "lenovo thinkpad" Go to newegg.com, search for "lenovo thinkpad", and select the first entry Go to costco.com Go to costco.com, search for "macbook" Go to costco.com, search for "macbook", and select the first entry Go to walmart.com Go to walmart.com, search for "corsair k70" Go to walmart.com, search for "corsair k70", and select the first entry Go to ebay.com Go to ebay.com, search for "usb-c to usb-a" Go to ebay.com, search for "usb-c to usb-a", and select the first entry Go to newegg.com Go to newegg.com, search for "logitech g pro" Go to newegg.com, search for "logitech g pro", and select the first entry Go to ebay.com Go to ebay.com, search for "bose soundlink" Go to ebay.com, search for "bose soundlink", and select the first entry Go to ebay.com Go to ebay.com, search for "razer blackwidow" Go to ebay.com, search for "razer blackwidow", and select the first entry Go to ebay.com Go to ebay.com, search for "logitech g903" Go to ebay.com, search for "logitech g903", and select the first entry Go to walmart.com Go to walmart.com, search for "jbl flip 4" Go to walmart.com, search for "jbl flip 4", and select the first entry Go to costco.com Go to costco.com, search for "razer deathadder" Go to costco.com, search for "razer deathadder", and select the first entry Go to costco.com Go to costco.com, search for "logitech g903" Go to costco.com, search for "logitech g903", and select the first entry Go to walmart.com Go to walmart.com, search for "asus rog" Go to walmart.com, search for "asus rog", and select the first entry Go to ebay.com Go to ebay.com, search for "razer blade" Go to ebay.com, search for "razer blade", and select the first entry Go to ebay.com Go to ebay.com, search for "usb-a to usb-b" Go to ebay.com, search for "usb-a to usb-b", and select the first entry Go to costco.com Go to costco.com, search for "rayovac triple a" Go to costco.com, search for "rayovac triple a", and select the first entry Go to ebay.com Go to ebay.com, search for "razer blade" Go to ebay.com, search for "razer blade", and select the first entry Go to newegg.com Go to newegg.com, search for "dell xps" Go to newegg.com, search for "dell xps", and select the first entry Go to newegg.com Go to newegg.com, search for "logitech g903" Go to newegg.com, search for "logitech g903", and select the first entry Go to bestbuy.com Go to bestbuy.com, search for "logitech g pro" Go to bestbuy.com, search for "logitech g pro", and select the first entry Go to newegg.com Go to newegg.com, search for "bose soundsport free" Go to newegg.com, search for "bose soundsport free", and select the first entry Go to newegg.com Go to newegg.com, search for "bose quietcomfort 35" Go to newegg.com, search for "bose quietcomfort 35", and select the first entry Go to walmart.com Go to walmart.com, search for "lenovo thinkpad" Go to walmart.com, search for "lenovo thinkpad", and select the first entry Go to ebay.com Go to ebay.com, search for "logitech g910" Go to ebay.com, search for "logitech g910", and select the first entry Go to bestbuy.com Go to bestbuy.com, search for "jbl charge 4" Go to bestbuy.com, search for "jbl charge 4", and select the first entry Go to newegg.com Go to newegg.com, search for "logitech g910" Go to newegg.com, search for "logitech g910", and select the first entry Go to costco.com Go to costco.com, search for "lg ultragear" Go to costco.com, search for "lg ultragear", and select the first entry Go to ebay.com Go to ebay.com, search for "alienware area 51" Go to ebay.com, search for "alienware area 51", and select the first entry Go to walmart.com Go to walmart.com, search for "razer blade" Go to walmart.com, search for "razer blade", and select the first entry Go to costco.com Go to costco.com, search for "usb-b" Go to costco.com, search for "usb-b", and select the first entry Go to bestbuy.com Go to bestbuy.com, search for "alienware aurora" Go to bestbuy.com, search for "alienware aurora", and select the first entry Go to bestbuy.com Go to bestbuy.com, search for "usb-b" Go to bestbuy.com, search for "usb-b", and select the first entry Go to walmart.com Go to walmart.com, search for "macbook" Go to walmart.com, search for "macbook", and select the first entry Go to newegg.com Go to newegg.com, search for "razer nari" Go to newegg.com, search for "razer nari", and select the first entry Go to costco.com Go to costco.com, search for "usb-a to usb-b" Go to costco.com, search for "usb-a to usb-b", and select the first entry Go to walmart.com Go to walmart.com, search for "usb-c to usb-b" Go to walmart.com, search for "usb-c to usb-b", and select the first entry Go to bestbuy.com Go to bestbuy.com, search for "corsair k70" Go to bestbuy.com, search for "corsair k70", and select the first entry Go to ebay.com Go to ebay.com, search for "logitech g933" Go to ebay.com, search for "logitech g933", and select the first entry Go to newegg.com Go to newegg.com, search for "beats solo 3" Go to newegg.com, search for "beats solo 3", and select the first entry Go to ebay.com Go to ebay.com, search for "macbook pro 15 inch" Go to ebay.com, search for "macbook pro 15 inch", and select the first entry Go to costco.com Go to costco.com, search for "lenovo thinkpad" Go to costco.com, search for "lenovo thinkpad", and select the first entry Go to ebay.com Go to ebay.com, search for "macbook pro" Go to ebay.com, search for "macbook pro", and select the first entry Go to walmart.com Go to walmart.com, search for "logitech g903" Go to walmart.com, search for "logitech g903", and select the first entry Go to walmart.com Go to walmart.com, search for "macbook" Go to walmart.com, search for "macbook", and select the first entry Go to ebay.com Go to ebay.com, search for "usb-a to usb-b" Go to ebay.com, search for "usb-a to usb-b", and select the first entry Go to newegg.com Go to newegg.com, search for "usb-a to usb-b" Go to newegg.com, search for "usb-a to usb-b", and select the first entry Go to ebay.com Go to ebay.com, search for "duracell triple a" Go to ebay.com, search for "duracell triple a", and select the first entry Go to costco.com Go to costco.com, search for "razer huntsman" Go to costco.com, search for "razer huntsman", and select the first entry Go to walmart.com Go to walmart.com, search for "razer blackwidow" Go to walmart.com, search for "razer blackwidow", and select the first entry Go to walmart.com Go to walmart.com, search for "macbook air" Go to walmart.com, search for "macbook air", and select the first entry Go to ebay.com Go to ebay.com, search for "logitech g pro" Go to ebay.com, search for "logitech g pro", and select the first entry Go to walmart.com Go to walmart.com, search for "beats solo 3" Go to walmart.com, search for "beats solo 3", and select the first entry Go to walmart.com Go to walmart.com, search for "razer huntsman" Go to walmart.com, search for "razer huntsman", and select the first entry Go to bestbuy.com Go to bestbuy.com, search for "razer blackwidow" Go to bestbuy.com, search for "razer blackwidow", and select the first entry Go to newegg.com Go to newegg.com, search for "razer huntsman" Go to newegg.com, search for "razer huntsman", and select the first entry Go to ebay.com Go to ebay.com, search for "corsair k70" Go to ebay.com, search for "corsair k70", and select the first entry Go to bestbuy.com Go to bestbuy.com, search for "corsair k70" Go to bestbuy.com, search for "corsair k70", and select the first entry Go to bestbuy.com Go to bestbuy.com, search for "usb-c to usb-a" Go to bestbuy.com, search for "usb-c to usb-a", and select the first entry Go to ebay.com Go to ebay.com, search for "razer thresher" Go to ebay.com, search for "razer thresher", and select the first entry Go to costco.com Go to costco.com, search for "macbook" Go to costco.com, search for "macbook", and select the first entry Go to walmart.com Go to walmart.com, search for "lenovo thinkpad" Go to walmart.com, search for "lenovo thinkpad", and select the first entry Go to ebay.com Go to ebay.com, search for "alienware aurora" Go to ebay.com, search for "alienware aurora", and select the first entry Go to bestbuy.com Go to bestbuy.com, search for "dell xps" Go to bestbuy.com, search for "dell xps", and select the first entry Go to costco.com Go to costco.com, search for "macbook air" Go to costco.com, search for "macbook air", and select the first entry Go to costco.com Go to costco.com, search for "lg ultragear" Go to costco.com, search for "lg ultragear", and select the first entry Go to costco.com Go to costco.com, search for "razer kraken" Go to costco.com, search for "razer kraken", and select the first entry Go to ebay.com Go to ebay.com, search for "apple airpods" Go to ebay.com, search for "apple airpods", and select the first entry Go to walmart.com Go to walmart.com, search for "dell alienware" Go to walmart.com, search for "dell alienware", and select the first entry Go to bestbuy.com Go to bestbuy.com, search for "duracell triple a" Go to bestbuy.com, search for "duracell triple a", and select the first entry Go to costco.com Go to costco.com, search for "razer kraken" Go to costco.com, search for "razer kraken", and select the first entry Go to costco.com Go to costco.com, search for "razer deathadder" Go to costco.com, search for "razer deathadder", and select the first entry Go to newegg.com Go to newegg.com, search for "sony triple a" Go to newegg.com, search for "sony triple a", and select the first entry Go to costco.com Go to costco.com, search for "razer blackwidow" Go to costco.com, search for "razer blackwidow", and select the first entry Go to ebay.com Go to ebay.com, search for "panasonic triple a" Go to ebay.com, search for "panasonic triple a", and select the first entry Go to walmart.com Go to walmart.com, search for "lg ultragear" Go to walmart.com, search for "lg ultragear", and select the first entry Go to costco.com Go to costco.com, search for "panasonic triple a" Go to costco.com, search for "panasonic triple a", and select the first entry Go to costco.com Go to costco.com, search for "alienware area 51" Go to costco.com, search for "alienware area 51", and select the first entry Go to bestbuy.com Go to bestbuy.com, search for "energizer triple a" Go to bestbuy.com, search for "energizer triple a", and select the first entry Go to newegg.com Go to newegg.com, search for "lenovo thinkpad" Go to newegg.com, search for "lenovo thinkpad", and select the first entry Go to walmart.com Go to walmart.com, search for "razer naga" Go to walmart.com, search for "razer naga", and select the first entry Go to walmart.com Go to walmart.com, search for "usb-c to usb-b" Go to walmart.com, search for "usb-c to usb-b", and select the first entry Go to bestbuy.com Go to bestbuy.com, search for "jbl charge 4" Go to bestbuy.com, search for "jbl charge 4", and select the first entry Go to newegg.com Go to newegg.com, search for "alienware area 51" Go to newegg.com, search for "alienware area 51", and select the first entry Go to newegg.com Go to newegg.com, search for "razer kraken" Go to newegg.com, search for "razer kraken", and select the first entry Go to newegg.com Go to newegg.com, search for "dell xps" Go to newegg.com, search for "dell xps", and select the first entry Go to costco.com Go to costco.com, search for "logitech g502" Go to costco.com, search for "logitech g502", and select the first entry Go to walmart.com Go to walmart.com, search for "acer nitro" Go to walmart.com, search for "acer nitro", and select the first entry Go to bestbuy.com Go to bestbuy.com, search for "beats solo 3" Go to bestbuy.com, search for "beats solo 3", and select the first entry Go to walmart.com Go to walmart.com, search for "bose quitecomfort 35" Go to walmart.com, search for "bose quitecomfort 35", and select the first entry Go to bestbuy.com Go to bestbuy.com, search for "acer nitro" Go to bestbuy.com, search for "acer nitro", and select the first entry Go to ebay.com Go to ebay.com, search for "asus rog" Go to ebay.com, search for "asus rog", and select the first entry Go to ebay.com Go to ebay.com, search for "duracell triple a" Go to ebay.com, search for "duracell triple a", and select the first entry Go to ebay.com Go to ebay.com, search for "usb-c" Go to ebay.com, search for "usb-c", and select the first entry Go to bestbuy.com Go to bestbuy.com, search for "razer huntsman" Go to bestbuy.com, search for "razer huntsman", and select the first entry Go to costco.com Go to costco.com, search for "beats solo 3" Go to costco.com, search for "beats solo 3", and select the first entry Go to bestbuy.com Go to bestbuy.com, search for "acer nitro" Go to bestbuy.com, search for "acer nitro", and select the first entry Go to bestbuy.com Go to bestbuy.com, search for "lenovo thinkpad" Go to bestbuy.com, search for "lenovo thinkpad", and select the first entry Go to ebay.com Go to ebay.com, search for "logitech g502" Go to ebay.com, search for "logitech g502", and select the first entry Go to ebay.com Go to ebay.com, search for "logitech g933" Go to ebay.com, search for "logitech g933", and select the first entry Go to ebay.com Go to ebay.com, search for "logitech g pro" Go to ebay.com, search for "logitech g pro", and select the first entry Go to costco.com Go to costco.com, search for "panasonic triple a" Go to costco.com, search for "panasonic triple a", and select the first entry Go to ebay.com Go to ebay.com, search for "dell xps" Go to ebay.com, search for "dell xps", and select the first entry Go to ebay.com Go to ebay.com, search for "logitech g910" Go to ebay.com, search for "logitech g910", and select the first entry Go to ebay.com Go to ebay.com, search for "razer thresher" Go to ebay.com, search for "razer thresher", and select the first entry Go to ebay.com Go to ebay.com, search for "logitech g933" Go to ebay.com, search for "logitech g933", and select the first entry Go to ebay.com Go to ebay.com, search for "logitech g910" Go to ebay.com, search for "logitech g910", and select the first entry Go to walmart.com Go to walmart.com, search for "dell xps" Go to walmart.com, search for "dell xps", and select the first entry Go to bestbuy.com Go to bestbuy.com, search for "usb-c to usb-a" Go to bestbuy.com, search for "usb-c to usb-a", and select the first entry Go to ebay.com Go to ebay.com, search for "bose soundlink" Go to ebay.com, search for "bose soundlink", and select the first entry ================================================ FILE: digirl/environment/android/autoui_utils.py ================================================ from enum import Enum from dataclasses import dataclass from typing import List, Tuple, Union from transformers import Blip2VisionModel, AutoProcessor, Blip2Model import torch from PIL import Image class ImageFeatureExtractor: def __init__(self, device): # Set device based on CUDA availability self.device = device # Initialize and load the BLIP2 model and processor self.model = Blip2Model.from_pretrained("Salesforce/blip2-opt-2.7b").cpu() self.model.language_model = None # self.model = self.model.to(self.device) self.processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b") def to_feat(self, image: Image.Image): """Converts a PIL image to a feature representation using the BLIP2 model. Args: image: A PIL.Image object representing the image to convert. Returns: A tensor representing the image feature. """ with torch.no_grad(): # Preprocess the image and move to the correct device inputs = self.processor(images=image, return_tensors="pt").to(self.model.device) # Get the image features from the model image_features = self.model.get_image_features(**inputs).pooler_output[0] # Detach the tensor from the graph and move it to CPU image_features = image_features.detach().cpu() return image_features # class ImageFeatureExtractor: # def __init__(self, device): # # Set device based on CUDA availability # self.device = device # # Initialize and load the BLIP2 model and processor # self.model = Blip2VisionModel.from_pretrained("Salesforce/blip2-opt-2.7b").to(self.device) # self.processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b") # def to_feat(self, image: Image.Image): # """Converts a PIL image to a feature representation using the BLIP2 model. # Args: # image: A PIL.Image object representing the image to convert. # Returns: # A tensor representing the image feature. # """ # with torch.no_grad(): # # Preprocess the image and move to the correct device # inputs = self.processor(images=image, return_tensors="pt").to(self.device) # # Get the image features from the model # image_features = self.model(**inputs, # output_attentions=False, # output_hidden_states=False, # return_dict=True).pooler_output[0] # #size is 1408 # # Detach the tensor from the graph and move it to CPU # image_features = image_features.detach().cpu() # return image_features class ActionType(Enum): Idle=0 DualPoint=1 Type=2 GoBack=3 GoHome=4 Enter=5 TaskComplete=6 TaskImpossible=7 @dataclass class AndroidAction(): action_type: ActionType touch_point: Tuple[float, float] = None lift_point: Tuple[float, float] = None typed_text: str = None def __str__(self): # Construct the basic action type string. components = [f"Action Type: {self.action_type.name}"] # Format and add touch_point if it's not None. if self.touch_point: touch_point_str = f"({self.touch_point[0]:.4f}, {self.touch_point[1]:.4f})" components.append(f"Touch Point: {touch_point_str}") # Format and add lift_point if it's not None. if self.lift_point: lift_point_str = f"({self.lift_point[0]:.4f}, {self.lift_point[1]:.4f})" components.append(f"Lift Point: {lift_point_str}") # Add typed_text if it's not None. if self.typed_text: components.append(f"Typed Text: '{self.typed_text}'") # Join all components into a single string. return ", ".join(components) def to_act(self): pass def cogagent_translate_action(out): raw_action = out try: raw_action = raw_action.split('Grounded Operation:')[1] action = raw_action.split(" ")[0] if action == 'tap': numbers = raw_action.split('[[')[1].split(',') x = int(numbers[0]) y = int(numbers[1].split(']]')[0]) touch_point = (x/1000, y/1000) return AndroidAction(action_type=ActionType.DualPoint, touch_point=touch_point, lift_point=touch_point) elif "type" in action: text = raw_action.split('"')[1] return AndroidAction(action_type=ActionType.Type, typed_text=text) elif "press home" in raw_action: return AndroidAction(action_type=ActionType.GoHome) elif "press back" in raw_action: return AndroidAction(action_type=ActionType.GoBack) elif "press enter" in raw_action: return AndroidAction(action_type=ActionType.Enter) elif "task complete" in raw_action: return AndroidAction(action_type=ActionType.TaskComplete) elif "task impossible" in raw_action: return AndroidAction(action_type=ActionType.TaskImpossible) elif "swipe up" in raw_action: return AndroidAction(action_type=ActionType.DualPoint, touch_point=(0.5, 0.5), lift_point=(0.5, 0.2)) elif "swipe down" in raw_action: return AndroidAction(action_type=ActionType.DualPoint, touch_point=(0.5, 0.2), lift_point=(0.5, 0.5)) elif "swipe left" in raw_action: return AndroidAction(action_type=ActionType.DualPoint, touch_point=(0.8, 0.5), lift_point=(0.2, 0.5)) elif "swipe right" in raw_action: return AndroidAction(action_type=ActionType.DualPoint, touch_point=(0.2, 0.5), lift_point=(0.8, 0.5)) else: print(f"Action {raw_action} not supported yet.") return AndroidAction(action_type=ActionType.Idle) except Exception as e: print(f"Action {raw_action} Parsing Error: {e}") return AndroidAction(action_type=ActionType.Idle) def autoui_translate_action(out): action_str = out.split("Action Decision: ")[1] action_type, touch_point_1, touch_point_2, lift_point_1, lift_point_2, typed_text = action_str.split(", ") touch_point = touch_point_1 + ", " + touch_point_2 lift_point = lift_point_1 + ", " + lift_point_2 try: action_type = action_type.split(": ")[1].strip('"') if action_type == 'DUAL_POINT': touch_point_yx = touch_point.split(": ")[1].strip('[]"') touch_point_yx = [float(num) for num in touch_point_yx.split(", ")] lift_point_yx = lift_point.split(": ")[1].strip('[]"') lift_point_yx = [float(num) for num in lift_point_yx.split(", ")] return AndroidAction(action_type=ActionType.DualPoint, touch_point=touch_point_yx[::-1], lift_point=lift_point_yx[::-1]) elif action_type == 'TYPE': text = typed_text.split(": ")[1].strip('"') return AndroidAction(action_type=ActionType.Type, typed_text=text) elif action_type == 'PRESS_HOME': return AndroidAction(action_type=ActionType.GoHome) elif action_type == 'PRESS_BACK': return AndroidAction(action_type=ActionType.GoBack) elif action_type == 'PRESS_ENTER': return AndroidAction(action_type=ActionType.Enter) elif action_type == 'STATUS_TASK_COMPLETE': return AndroidAction(action_type=ActionType.TaskComplete) elif action_type == 'TASK_IMPOSSIBLE': return AndroidAction(action_type=ActionType.TaskImpossible) else: print(f"Action {out} not supported yet.") return AndroidAction(action_type=ActionType.Idle) except Exception as e: print(f"Action {out} Parsing Error: {e}") return AndroidAction(action_type=ActionType.Idle) def to_autoui(act: AndroidAction): if act.action_type == ActionType.DualPoint: return f'"action_type": "DUAL_POINT", "touch_point": "[{act.touch_point[1]:.4f}, {act.touch_point[0]:.4f}]", "lift_point": "[{act.lift_point[1]:.4f}, {act.lift_point[0]:.4f}]", "typed_text": ""' elif act.action_type == ActionType.Type: return f'"action_type": "TYPE", "touch_point": "[-1.0, -1.0]", "lift_point": "[-1.0, -1.0]", "typed_text": "{act.typed_text}"' elif act.action_type == ActionType.GoBack: return f'"action_type": "PRESS_BACK", "touch_point": "[-1.0, -1.0]", "lift_point": "[-1.0, -1.0]", "typed_text": ""' elif act.action_type == ActionType.GoHome: return f'"action_type": "PRESS_HOME", "touch_point": "[-1.0, -1.0]", "lift_point": "[-1.0, -1.0]", "typed_text": ""' elif act.action_type == ActionType.Enter: return f'"action_type": "PRESS_ENTER", "touch_point": "[-1.0, -1.0]", "lift_point": "[-1.0, -1.0]", "typed_text": ""' elif act.action_type == ActionType.TaskComplete or act.action_type == ActionType.TaskImpossible: return f'"action_type": "STATUS_TASK_COMPLETE", "touch_point": "[-1.0, -1.0]", "lift_point": "[-1.0, -1.0]", "typed_text": ""' else: print(f"Action {act} not supported yet.") return "" def autoui_prepare_prompt(task, history): prompt = "Previous Actions: " for act in history[-1:]: prompt += f"{to_autoui(act)} " prompt += f"Goal: {task}" return prompt ================================================ FILE: digirl/environment/android/client.py ================================================ from gradio_client import Client from PIL import Image from .env import AndroidAction, ActionType from typing import Dict, Union from time import sleep from abc import ABC, abstractmethod class AbstractAgent(ABC): @abstractmethod def act(self, task:str, image_path:str)->Union[AndroidAction, Dict]: pass class AutoUI: def __init__(self, url): self.client = Client(url) self.reset_history() def predict(self, text:str, image_path:str)->str: for _ in range(3): try: out = self.client.predict(text, image_path) break except: sleep(1) return out @classmethod def to_autoui(self, act: AndroidAction): if act.action_type == ActionType.DualPoint: return f'"action_type": "DUAL_POINT", "touch_point": "[{act.touch_point[1]:.4f}, {act.touch_point[0]:.4f}]", "lift_point": "[{act.lift_point[1]:.4f}, {act.lift_point[0]:.4f}]", "typed_text": ""' elif act.action_type == ActionType.Type: return f'"action_type": "TYPE", "touch_point": "[-1.0, -1.0]", "lift_point": "[-1.0, -1.0]", "typed_text": "{act.typed_text}"' elif act.action_type == ActionType.GoBack: return f'"action_type": "PRESS_BACK", "touch_point": "[-1.0, -1.0]", "lift_point": "[-1.0, -1.0]", "typed_text": ""' elif act.action_type == ActionType.GoHome: return f'"action_type": "PRESS_HOME", "touch_point": "[-1.0, -1.0]", "lift_point": "[-1.0, -1.0]", "typed_text": ""' elif act.action_type == ActionType.Enter: return f'"action_type": "PRESS_ENTER", "touch_point": "[-1.0, -1.0]", "lift_point": "[-1.0, -1.0]", "typed_text": ""' elif act.action_type == ActionType.TaskComplete or act.action_type == ActionType.TaskImpossible: return f'"action_type": "STATUS_TASK_COMPLETE", "touch_point": "[-1.0, -1.0]", "lift_point": "[-1.0, -1.0]", "typed_text": ""' else: print(f"Action {act} not supported yet.") return "" def act(self, task:str, image_path:str)->Union[AndroidAction, Dict]: prompt = self.prepare_prompts(task) out = self.predict(prompt, image_path) translated_action = self._translate_action(out) self.history_acts.append(translated_action) return translated_action, {"prompt": prompt, "output": out} def reset_history(self): self.history_acts = [] def prepare_prompts(self, task:str): prompt = "Previous Actions: " for act in self.history_acts[-8:]: prompt += f"{AutoUI.to_autoui(act)} " prompt += f"Goal: {task}" return prompt def _translate_action(self, out): action_str = out.split("Action Decision: ")[1] action_type, touch_point_1, touch_point_2, lift_point_1, lift_point_2, typed_text = action_str.split(", ") touch_point = touch_point_1 + ", " + touch_point_2 lift_point = lift_point_1 + ", " + lift_point_2 try: action_type = action_type.split(": ")[1].strip('"') if action_type == 'DUAL_POINT': touch_point_yx = touch_point.split(": ")[1].strip('[]"') touch_point_yx = [float(num) for num in touch_point_yx.split(", ")] lift_point_yx = lift_point.split(": ")[1].strip('[]"') lift_point_yx = [float(num) for num in lift_point_yx.split(", ")] return AndroidAction(action_type=ActionType.DualPoint, touch_point=touch_point_yx[::-1], lift_point=lift_point_yx[::-1]) elif action_type == 'TYPE': text = typed_text.split(": ")[1].strip('"') return AndroidAction(action_type=ActionType.Type, typed_text=text) elif action_type == 'PRESS_HOME': return AndroidAction(action_type=ActionType.GoHome) elif action_type == 'PRESS_BACK': return AndroidAction(action_type=ActionType.GoBack) elif action_type == 'PRESS_ENTER': return AndroidAction(action_type=ActionType.Enter) elif action_type == 'STATUS_TASK_COMPLETE': return AndroidAction(action_type=ActionType.TaskComplete) elif action_type == 'TASK_IMPOSSIBLE': return AndroidAction(action_type=ActionType.TaskImpossible) else: print(f"Action {out} not supported yet.") return AndroidAction(action_type=ActionType.Idle) except Exception as e: print(f"Action {out} Parsing Error: {e}") return AndroidAction(action_type=ActionType.Idle) ================================================ FILE: digirl/environment/android/env.py ================================================ import os import shutil import subprocess, signal import re from time import sleep import random from .autoui_utils import autoui_prepare_prompt, AndroidAction, ActionType, ImageFeatureExtractor import time from digirl.misc import colorful_print from appium import webdriver from appium.options.android import UiAutomator2Options import base64 from PIL import Image from io import BytesIO from termcolor import colored, cprint import concurrent.futures import numpy as np import traceback def escape_shell_text(text): # List of characters to escape chars_to_escape = ['\\','"', "'", '`', '$'] # Escape the characters by adding a backslash before them for char in chars_to_escape: text = text.replace(char, '\\' + char) text = text.replace(" ", "%s") return text def kill_all_emulators(adb_path, emulators=None): # Get the list of connected devices result = subprocess.run([adb_path, 'devices'], stdout=subprocess.PIPE) devices_output = result.stdout.decode('utf-8') # Find all emulator device names using a regular expression running_emulators = re.findall(r'emulator-\d+', devices_output) # Shut down each emulator found for emulator in emulators: if emulator not in running_emulators: continue subprocess.run([adb_path, '-s', emulator, 'emu', 'kill']) print(f'{emulator} has been shut down.') if not emulators: print("No running emulators found.") def clone_avd(src_avd_name, tar_avd_name, android_avd_home): """ Clone the source AVD to the target AVD. Parameters: - src_avd_name: The name of the source AVD folder. - tar_avd_name: The name of the target AVD folder. - android_avd_home: The path to the .android/avd directory. This function copies the source AVD folder and its .ini file to a new target AVD and updates the paths inside the .ini files accordingly. """ # Paths for source and target AVD directories and .ini files src_avd_dir = os.path.join(android_avd_home, src_avd_name + '.avd') tar_avd_dir = os.path.join(android_avd_home, tar_avd_name + '.avd') src_ini_file = os.path.join(android_avd_home, src_avd_name + '.ini') tar_ini_file = os.path.join(android_avd_home, tar_avd_name + '.ini') # Copy the AVD folder colorful_print(f"Copying the AVD folder from {src_avd_dir} to {tar_avd_dir}", "green") if not os.path.exists(tar_avd_dir): shutil.copytree(src_avd_dir, tar_avd_dir) # Copy the .ini file and modify it for the new AVD with open(src_ini_file, 'r') as src_ini, open(tar_ini_file, 'w') as tar_ini: for line in src_ini: tar_ini.write(line.replace(src_avd_name, tar_avd_name)) # Update paths inside the target AVD's .ini files for ini_name in ['config.ini', 'hardware-qemu.ini']: ini_path = os.path.join(tar_avd_dir, ini_name) if os.path.exists(ini_path): with open(ini_path, 'r') as file: lines = file.readlines() with open(ini_path, 'w') as file: for line in lines: # Update paths and AVD name/ID new_line = line.replace(src_avd_name, tar_avd_name) file.write(new_line) # Update the snapshots' hardware.ini file if it exists snapshots_hw_ini = os.path.join(tar_avd_dir, 'snapshots', 'default_boot', 'hardware.ini') if os.path.exists(snapshots_hw_ini): with open(snapshots_hw_ini, 'r') as file: lines = file.readlines() with open(snapshots_hw_ini, 'w') as file: for line in lines: # Update AVD name/ID new_line = line.replace(src_avd_name, tar_avd_name) file.write(new_line) class AndroidEmulator(): def __init__(self, avd_name, max_steps, temp_path, evaluator, emulator_path="~/Android/Sdk/emulator/emulator", appium_server_url='http://localhost:4723', no_window=False, udid = None, feature_extractor = None, all_tasks = None, prepare_prompt = autoui_prepare_prompt, translate_action = None, save_images = False, task_id=None, task_split="train", sample_mode=None, record=False): """ temp_path temporary path to store the images for evaluation """ self.temp_path = temp_path if not os.path.exists(temp_path): os.makedirs(temp_path) self.emulator_path = os.path.expanduser(emulator_path) self.avd_name = avd_name self.save_images = save_images self.image_id = str(time.time()) port_number = udid.split("-")[-1] self.udid = udid cprint(colored(f"Starting the Emulator", "green")) command = f"""{self.emulator_path} -avd {self.avd_name} "-no-audio" "-skip-adb-auth" "-no-boot-anim" "-gpu" "auto" "-no-snapshot-save" -port {port_number}""" if no_window: command += " -no-window" print(f"executing command {command}") self.emulator_process = subprocess.Popen(command, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) sleep(30) self.record = record if self.record: self.record_random_id = random.randint(0, 100000) try_record_command = f"""adb -s {self.udid} shell screenrecord --size 540x1140 --bit-rate 4M --time-limit=180 /sdcard/video_{self.image_id}_1.mp4""" # redirect the output and error to the output of the main process import sys print(f"Trying to record the screen of {self.udid}") self.try_record_process = subprocess.Popen(try_record_command, shell=True, stdout=sys.stdout, stderr=sys.stderr) sleep(20) self.try_record_process.terminate() try: self.try_record_process.wait(timeout=20) except subprocess.TimeoutExpired: self.try_record_process.kill() self.try_record_process.wait() sleep(5) print(f"Recording the screen of {self.udid}") do_record_command = f"""adb -s {self.udid} shell screenrecord --size 540x1140 --bit-rate 4M --time-limit=180 /sdcard/video_{self.image_id}_1.mp4 && adb -s {self.udid} shell screenrecord --size 540x1140 --bit-rate 4M --time-limit=180 /sdcard/video_{self.image_id}_2.mp4 && adb -s {self.udid} shell screenrecord --size 540x1140 --bit-rate 4M --time-limit=180 /sdcard/video_{self.image_id}_3.mp4 && adb -s {self.udid} shell screenrecord --size 540x1140 --bit-rate 4M --time-limit=180 /sdcard/video_{self.image_id}_4.mp4 && adb -s {self.udid} shell screenrecord --size 540x1140 --bit-rate 4M --time-limit=180 /sdcard/video_{self.image_id}_5.mp4 && adb -s {self.udid} shell screenrecord --size 540x1140 --bit-rate 4M --time-limit=180 /sdcard/video_{self.image_id}_6.mp4""" self.record_process = subprocess.Popen(do_record_command, shell=True, stdout=subprocess.PIPE, preexec_fn=os.setsid) # should be good the second time sleep(5) capabilities = dict( platformName='Android', automationName='uiautomator2', deviceName='Android', newCommandTimeout="120000", adbExecTimeout="120000", uiautomator2ServerInstallTimeout="120000", uiautomator2ServerLaunchTimeout="120000", uiautomator2ServerReadTimeout="120000", noSign=True ) if udid: capabilities["udid"] = udid self.options = UiAutomator2Options().load_capabilities(capabilities) self.appium_server_url = appium_server_url for i in range(3): try: self.driver = webdriver.Remote(self.appium_server_url, options=self.options) print("connected!") break except Exception as e: cprint(colored(f"Failed to connect to the appium server: {e}\n Retrying", "red")) if i == 3: raise Exception("Failed to connect to the appium server") sleep(20) self.terminated = False self.max_steps = max_steps self.steps = 0 self.feature_extractor = feature_extractor screen_size = self.driver.get_window_size() self.screen_size = (screen_size["width"], screen_size["height"]) if sample_mode == "random": # randomly sample a task from the task set self.current_task = random.choice(all_tasks) elif sample_mode == "sequential": self.current_task = all_tasks[task_id] else: print("Invalid sample mode") self.prepare_prompt = prepare_prompt self.translate_action = translate_action self.history = [] self.evaluator = evaluator def terminate(self): if self.record: # send sigterm to the record process os.killpg(os.getpgid(self.record_process.pid), signal.SIGINT) sleep(5) os.system(f"adb -s {self.udid} pull /sdcard/video_{self.image_id}_1.mp4 {self.temp_path}") os.system(f"adb -s {self.udid} pull /sdcard/video_{self.image_id}_2.mp4 {self.temp_path}") os.system(f"adb -s {self.udid} pull /sdcard/video_{self.image_id}_3.mp4 {self.temp_path}") os.system(f"adb -s {self.udid} pull /sdcard/video_{self.image_id}_4.mp4 {self.temp_path}") os.system(f"adb -s {self.udid} pull /sdcard/video_{self.image_id}_5.mp4 {self.temp_path}") os.system(f"adb -s {self.udid} pull /sdcard/video_{self.image_id}_6.mp4 {self.temp_path}") print("it's okay if you see errros like failed to stat remote object '/sdcard/video_1718747809.256034_{i}.mp4' where i is larger than 1.") sleep(5) self.emulator_process.terminate() try: self.emulator_process.wait(timeout=20) except subprocess.TimeoutExpired: self.emulator_process.kill() self.emulator_process.wait() self.terminated = True def refresh_driver(self): self.driver.quit() self.driver = webdriver.Remote(self.appium_server_url, options=self.options) def count_white_pixels(self, img): # Convert the image to RGB format if it's not img = img.convert('RGB') # Convert image to numpy array data = np.array(img) # Count white pixels # Assuming 'white' is (255, 255, 255) white_count = np.sum(np.all(data > 240, axis=-1)) return white_count > 2_300_000 def get_obs(self): for _ in range(3): try: is_white = True for _ in range(5): if not is_white: break sleep(5) screenshot_str = self.driver.get_screenshot_as_base64() imgdata = base64.b64decode(screenshot_str) image = Image.open(BytesIO(imgdata)) is_white = self.count_white_pixels(image) # print("Saving observation!") image.save(os.path.join(self.temp_path, f"{self.image_id}_{self.steps}.png")) # Assuming 'image' is your PIL Image object in RGBA mode if image.mode == 'RGBA': image = image.convert('RGB') if self.feature_extractor is not None: image = self.feature_extractor.to_feat(image) # colorful_print(f"history: {self.history}", "green") # colorful_print(f"prompt: {self.prepare_prompt(self.current_task, self.history)}", "yellow") return {"prompt": self.prepare_prompt(self.current_task, self.history), "image_feature": image, "task": self.current_task, "image_path": os.path.join(self.temp_path, f"{self.image_id}_{self.steps}.png"), "video_path": os.path.join(self.temp_path, f"video_{self.record_random_id}.mp4") if self.record else None } except Exception as e: print(f"Exception happened during screenshotting") print(e) print(traceback.format_exc()) sleep(6) continue def step(self, raw_action: str): if self.terminated: return None try: # colorful_print(f"raw action: {raw_action}", "green") action = self.translate_action(raw_action) # colorful_print(f"translated action: {action}", "green") except Exception as e: print(e) print(f"Failed to translate action: {raw_action}, terminating the environment") action = AndroidAction(action_type=ActionType.TaskImpossible) self.history.append(action) self.steps += 1 if self.steps > self.max_steps: action = AndroidAction(action_type=ActionType.TaskImpossible) cprint(colored(f"Terminate the Emulator: Max Steps Exceeded {self.max_steps}.", "red")) screenshot = None info = {} for i in range(2): try: if action.action_type == ActionType.DualPoint: assert len(action.touch_point) == 2 assert len(action.lift_point) == 2 touch_x = action.touch_point[0] * self.screen_size[0] touch_y = action.touch_point[1] * self.screen_size[1] lift_x = action.lift_point[0] * self.screen_size[0] lift_y = action.lift_point[1] * self.screen_size[1] if (touch_x - lift_x)**2 + (touch_y - lift_y)**2 < 10: self.driver.tap([(touch_x, touch_y)]) else: self.driver.swipe(touch_x, touch_y, lift_x, lift_y) elif action.action_type == ActionType.Type: # This doesn't work well because of active element for i in range(2): try: sleep(4) element = self.driver.switch_to.active_element element.send_keys(action.typed_text) break except Exception as e: cprint(f"The element is not loaded yet or agent did not click anything", "red") elif action.action_type == ActionType.GoBack: self.driver.back() elif action.action_type == ActionType.GoHome: self.driver.press_keycode(3) elif action.action_type == ActionType.Enter: self.driver.press_keycode(66) elif action.action_type == ActionType.TaskComplete: self.terminated = True elif action.action_type == ActionType.TaskImpossible: self.terminated = True elif action.action_type == ActionType.Idle: pass else: raise Exception(f"Unknown action type: {action.action_type}") action_success = True screenshot = self.get_obs() break except Exception as e: cprint(colored("an Exception occurred during environment interaction", "red")) print(e) cprint(colored("Retrying", "red")) sleep(10) if i == 1: action_success = False info["error"] = str(e) self.driver.quit() self.terminate() return None continue r = 0 if screenshot is not None and self.evaluator is not None: r = self.evaluator([os.path.join(self.temp_path, f"{self.image_id}_{self.steps-1}.png"), os.path.join(self.temp_path, f"{self.image_id}_{self.steps}.png")], self.current_task) info["action_success"] = action_success #terminate the environment if there is a success if r >= 1 or self.terminated: self.driver.quit() self.terminate() if self.terminated and not self.save_images: os.system(f"rm -rf {self.temp_path}/*") return screenshot, r, self.terminated class BatchedAndroidEnv(): """ This class wraps around the android emulator and provides a more infrastructure for free-form GUI navigation This is a batched version for Android Env cache_avd is the avd to be used the avd is the initial one """ def __init__(self, avd_name, cache_avd_names, udids, appium_base_port, android_avd_home: str = '/nfs/kun2/users/yifei/openended/.android/android_avd/avd', emulator_path: str = '~/Android/Sdk/emulator/emulator', adb_path: str = "~/Library/Android/sdk/platform-tools/adb", run_headless: bool = False, max_steps: int = 10, use_feature_extractor = False, evaluators = None, prepare_prompt = autoui_prepare_prompt, translate_action = None, device = "cuda:2", temp_path = "/nfs/kun2/users/yifei/openended/logs/images", save_images = False, all_tasks = None, task_split = "train", sample_mode = None, record = False): self.android_avd_home = os.path.expanduser(android_avd_home) self.emulator_path = os.path.expanduser(emulator_path) self.adb_path = os.path.expanduser(adb_path) self.avd_name = avd_name self.save_images = save_images self.bsize = len(cache_avd_names) self.cache_avd_names = cache_avd_names self.run_headless = run_headless self.max_steps = max_steps self.emulator_group_offset = 0 if use_feature_extractor: self.feature_extractor = ImageFeatureExtractor("cpu") else: self.feature_extractor = None self.device = device self.record = record self.all_tasks = all_tasks self.task_split = task_split self.prepare_prompt = prepare_prompt self.translate_action = translate_action self.temp_path = temp_path if evaluators is None: evaluators = [None for _ in range(self.bsize)] self.evaluators = evaluators if not os.path.exists(temp_path): os.makedirs(temp_path) self.udids = udids self.base_port = appium_base_port self.appium_processes = [] self.sample_mode = sample_mode # Start the appium servers for i in range(self.base_port, self.base_port+self.bsize): self.appium_processes.append(subprocess.Popen(f"appium --relaxed-security -p {i} > /dev/null", stdout=subprocess.DEVNULL, shell=True)) print("starting appium server at port ", i) self.appium_server_urls = [f"http://localhost:{i}" for i in range(self.base_port, self.base_port+self.bsize)] def reset_appium(self): for p in self.appium_processes: p.terminate() try: p.wait(timeout=20) except subprocess.TimeoutExpired: p.kill() p.wait() os.system("pkill -f appium") self.base_port = self.base_port + self.bsize * 2 self.appium_processes = [] for i in range(self.base_port, self.base_port+self.bsize): self.appium_processes.append(subprocess.Popen(f"appium --relaxed-security -p {i} > /dev/null", stdout=subprocess.DEVNULL, shell=True)) # sleep(10) self.appium_server_urls = [f"http://localhost:{i}" for i in range(self.base_port, self.base_port+self.bsize)] def reset(self): """ Reset the emulator to a clean state """ # If the emulator is already running, kill it, # Then delete the cache AVD kill_all_emulators(self.adb_path, emulators=self.udids) if hasattr(self, "emulator_process"): self.emulator_process.send_signal(signal.SIGINT) self.emulator_process.wait() self.emulators = [] for cache_avd_name in self.cache_avd_names: # print(cache_avd_name) for _ in range(3): try: cache_avd_path = os.path.join(self.android_avd_home, cache_avd_name + ".avd") cache_avd_ini_path = os.path.join(self.android_avd_home, cache_avd_name + ".ini") if os.path.exists(cache_avd_path): shutil.rmtree(cache_avd_path, ignore_errors=True) if os.path.exists(cache_avd_ini_path): os.remove(cache_avd_ini_path) sleep(2) # Clone the source AVD and start the emulator clone_avd(self.avd_name, cache_avd_name, self.android_avd_home) break except OSError as e: print(f"Failed to reset the emulator: {e}") import traceback print(traceback.format_exc()) sleep(20) # # use parallel version only when you've got nice CPUs, or it will error out # def reset_emulator(cache_avd_name, avd_name, android_avd_home): # for _ in range(3): # try: # cache_avd_path = os.path.join(android_avd_home, cache_avd_name + ".avd") # cache_avd_ini_path = os.path.join(android_avd_home, cache_avd_name + ".ini") # if os.path.exists(cache_avd_path): # shutil.rmtree(cache_avd_path, ignore_errors=True) # if os.path.exists(cache_avd_ini_path): # os.remove(cache_avd_ini_path) # sleep(2) # # Clone the source AVD and start the emulator # clone_avd(avd_name, cache_avd_name, android_avd_home) # break # except OSError as e: # print(f"Failed to reset the emulator: {e}") # import traceback # print(traceback.format_exc()) # sleep(20) # with concurrent.futures.ThreadPoolExecutor() as executor: # futures = [executor.submit(reset_emulator, cache_avd_name, self.avd_name, self.android_avd_home) for cache_avd_name in self.cache_avd_names] # for future in futures: # future.result() def emulator_constructor(udid, appium_server_url, cache_avd_name, evaluator, task_id, task_split): return AndroidEmulator(avd_name=cache_avd_name, max_steps=self.max_steps, emulator_path=self.emulator_path, appium_server_url=appium_server_url, no_window=self.run_headless, udid = udid, feature_extractor = self.feature_extractor, prepare_prompt = self.prepare_prompt, translate_action = self.translate_action, all_tasks = self.all_tasks, evaluator = evaluator, temp_path = os.path.join(self.temp_path, cache_avd_name), save_images = self.save_images, task_id=task_id, task_split=task_split, sample_mode=self.sample_mode, record=self.record) with concurrent.futures.ThreadPoolExecutor() as executor: jobs = [executor.submit(emulator_constructor, udid, appium_server_url, cache_avd_name, evaluator, task_id, self.task_split) for udid, appium_server_url, cache_avd_name, evaluator, task_id in zip(self.udids, self.appium_server_urls, self.cache_avd_names, self.evaluators, range(self.emulator_group_offset, self.emulator_group_offset+self.bsize))] self.emulators = [job.result() for job in jobs] with concurrent.futures.ThreadPoolExecutor() as executor: jobs = [executor.submit(emulator.get_obs) for emulator in self.emulators] # for i, job in enumerate(jobs): # colorful_print(f"Getting observation from emulator {i}: {job.result()}", "green") return [job.result() for job in jobs] def step(self, actions): if not self.emulators: raise Exception("Please call reset() before calling step()") with concurrent.futures.ThreadPoolExecutor() as executor: jobs = [executor.submit(emulator.step, action) for emulator, action in zip(self.emulators, actions)] results = [job.result() for job in jobs] return results ================================================ FILE: digirl/environment/android/evaluate.py ================================================ import json import requests from PIL import Image from typing import List, Tuple from gradio_client import Client from transformers import AutoTokenizer import numpy as np from gradio_client.utils import QueueError, file from time import sleep import re import os import io from tenacity import retry, wait_random_exponential, stop_after_attempt, wait_fixed, wait_chain import base64 import traceback import google.generativeai as genai import subprocess import time import signal def extract_status(text): match = re.search(r'Status:\s*(\w+)', text) if match: return match.group(1) else: return None def build_prompt_webshop(intent) -> Tuple[str, str]: system_msg = """You're an expert in evaluating whether the Screenshot successfully completes the Task.""" prompt = [f"""Task: Go to bestbuy.com Q: What should I expect to see on the screenshot if I've gone to bestbuy.com? A: I should expect to see I'm in the Best Buy website, which usually shows the best buy logo with some featured products and categories. The screenshot shows I'm searching for "bestbuy.com" in the Google search (with some search suggestions) instead of being in the Best Buy website. Status: failure""", #0 f"""Task: Go to costco.com Q: What should I expect to see on the screenshot if I've gone to costco.com? A: I should expect to see I'm in the Costco website, which usually shows the homepage with some featured products and categories. The screenshot shows I'm in the Costco website with some featured products and categories. Status: success""", #1 f"""Task: Go to bestbuy.com, search for "macbook" Q: What should I expect to see on the screenshot if I've gone to bestbuy.com AND searched for "macbook"? A: I should expect to see I'm in the Best Buy website and search results for "macbook". The screenshot shows I'm in the Best Buy website and have several search suggestions for "macbook", but it does not show search results of the product, which usually includes price and the product details. Status: failure""", #2 f"""Task: Go to ebay.com, search for "corsair k70" Q: What should I expect to see on the screenshot if I've gone to ebay.com AND searched for "corsair k70"? A: I should expect to see I'm in the eBay website and search results for "corsair k70". The screenshot shows I'm in the eBay ebay website with some search suggestions for "corsair k70", but it does not show search results of the product, which usually includes price and the product details. Status: failure""", #3 f"""Task: Go to walmart.com, search for "macbook air" Q: What should I expect to see on the screenshot if I've gone to walmart.com AND searched for "macbook air"? A: I should expect to see I'm in the Walmart website and search results for "razer huntsman". The screenshot shows I'm in Google search with some search suggestions for "macbook air", not Walmart. Status: failure""", #4 f"""Task: Go to walmart.com, search for "razer huntsman" Q: What should I expect to see on the screenshot if I've gone to walmart.com AND searched for "razer huntsman"? A: I should expect to see I'm in the Walmart website and search results for "razer huntsman". The screenshot shows I'm in the Walmart website, but there's no search results for "razer huntsman", which usually includes the product details and price. Status: failure""", #5 f"""Task: Go to ebay.com, search for "lenovo thinkpad" Q: What should I expect to see on the screenshot if I've gone to ebay.com AND searched for "lenovo thinkpad"? A: I should expect to see I'm in the eBay website and search results for "lenovo thinkpad". The screenshot shows I'm in the eBay website and have several search results for "lenovo thinkpad". Status: success""", #6 f"""Task: Go to ebay.com, search for "razer thresher", select the first entry Q: What should I expect to see on the screenshot if I've gone to ebay.com AND going to the first entry of the search results of "razer thresher"? A: I should expect to see I'm in the eBay website and detailed information of a razer thresher product, like a big image of the product, the price, and the product details. The screenshot shows I'm in the eBay website but with more than one search results for "razer thresher", which means the user has not selected the first entry of the search results. Status: failure""", #7 f"""Task: Go to target.com, search for "razer kraken", and select the first entry Q: What should I expect to see on the screenshot if I've gone to target.com AND gone to the first entry of the search results of "razer kraken"? A: I should expect to see I'm in the Target website and can see detailed information of a razer thresher product, like a big image of the product, the price, and the product details. The screenshot shows I'm in Google Search, not in the Target website. Status: failure""", #8 f"""Task: Go to ebay.com, search for "acer predator", and select the first entry Q: What should I expect to see on the screenshot if I've gone to ebay.com AND gone to the first entry of the search results of "acer predator"? A: I should expect to see I'm in the eBay website with detailed information of an acer predator product, like a big image of the product, the price, and the product details. The screenshot shows I'm in the eBay website and have more than one search results for "acer predator", which means the user has not selected the first entry of the search results. Status: failure""", #9 f"""Task: Go to bestbuy.com, search for "macbook", select the first entry Q: What should I expect to see on the screenshot if I've gone to bestbuy.com AND gone to the first entry of the search results of "macbook"? A: I should expect to see I'm in the eBay website and detailed information of a macbook product, like a big image of the product, the price, and the product details. The screenshot shows I'm in the eBay website and have detailed information of Macbook Air, including the price and the product details. Status: success""", #10 f"""Task: {intent} Respond in this format: Q: What should I expect to see on the screenshot if I've ? A: I should expect to see Status: success or failure (don't return anything else) Start with "Q:"."""] image_paths = os.path.join(os.path.dirname(__file__), "assets", "images") cot_image_list = [os.path.join(image_paths, "step1_bestbuy.png"), # 0 os.path.join(image_paths, "step1_costco.png"), # 1 os.path.join(image_paths, "step2_bestbuy.png"), # 2 os.path.join(image_paths, "step2_ebay.png"), # 3 os.path.join(image_paths, "step2_walmart.png"), # 4 os.path.join(image_paths, "step2_walmart2.png"), # 5 os.path.join(image_paths, "step2_ebay2.png"), # 6 os.path.join(image_paths, "step3_ebay.png"), # 7 os.path.join(image_paths, "step3_target.png"), # 8 os.path.join(image_paths, "step3_ebay2.png"), # 9 os.path.join(image_paths, "step3_bestbuy.png"), # 10 "" # -1 ] return system_msg, prompt, cot_image_list def build_prompt_general(intent) -> Tuple[str, str]: system_msg = """You're an expert in evaluating whether the Screenshot successfully completes the Task.""" prompt = [f"""Task: Open the settings. Q: What should I expect to see on the screenshot if I've opened the settings? A: I should expect to see I'm in the settings app. The screenshot shows the home screen of a mobile device, with various app icons displayed, including the settings app icon, but the settings app is not opened. Status: failure""", #0 f"""Task: Find hotels in washington dc Q: What should I expect to see on the screenshot if I've searched for hotels in Washington, DC? A: I should expect to see I'm in a search results page for hotels in Washington, DC. The screenshot shows a Google search page with the search field populated with the query "hotels in washington dc" and a list of suggested searches related to hotels in Washington, DC, but it does not show any search results for hotels in Washington, DC. Status: failure""", #1 f"""Task: What's a good restaurant in Portland? Q: What should I expect to see on the screenshot if I've searched for a good restaurant in Portland? A: I should expect to see I'm in a search results page for a good restaurant in Portland. The screenshot shows a Google search page with a search input field for "good restaurant in portland" and a map results preview showing business locations near Portland, like "Li Pigeon", "Portland City Grill", and "Higgins", Status: success""", #2 f"""Task: What's on the menu at In-N-Out? Q: What should I expect to see on the screenshot if I've searched for the menu at In-N-Out? A: I should expect to see a menu page for In-N-Out, including product names, thumbnails and prices. The screenshot shows a Google search page with a search input field for "In-N-Out menu" and some page snippets of In-N-Out indicating potential menu items, but does not actually show the actual menu. Status: failure""", #3 f"""Task: What's the news in Suriname? Q: What should I expect to see on the screenshot if I've searched for the news in Suriname? A: I should expect to see some news in Suriname, such as someone did something or some accident happens in Suriname. The screenshot shows a Google search page with a search input field for "Suriname news today" and some page snippets indicating potential news items, but does not actually show the news. Status: failure""", #4 f"""Task: What's the weather like in Chicago? Q: What should I expect to see on the screenshot if I've searched for the weather in Chicago? A: I should expect to see some exact values like temperature, humidity, wind speed, and weather condition in Chicago. The screenshot shows a Google search page with a search input field for "weather in Chicago" and some page snippets indicating potential weather information. Although one page snippet contains some weather information, the information is not comprehensive enough to determine the weather in Chicago. Status: failure""", #5 f"""Task: Set an alarm for 6pm. Q: What should I expect to see on the screenshot if I've set an alarm for 6pm? A: I should expect to see some alarms including a 6pm alarm activated in the clock app. The screenshot shows an attempt to set an alarm for 6pm in the clock app, but the alarm is not set yet. Status: failure""", #6 f"""Task: What's the news in French today? Q: What should I expect to see on the screenshot if I've searched for the news in French today? A: I should expect to see some news in French today, such as someone did something or some accident happens in French today. The screenshot shows I'm in the website france24.com but blocked with a cookie consent banner. Status: failure""", #7 f"""Task: What's the news in French today? Q: What should I expect to see on the screenshot if I've searched for the news in French today? A: I should expect to see some news in French today, such as someone did something or some accident happens in French today. The screenshot shows I'm in the website france24.com and can see the news, like something about the Olympic flame. Status: success""", #8 f"""Task: {intent} Respond in this format: Q: What should I expect to see on the screenshot if I've ? A: I should expect to see Status: success or failure (don't return anything else) Start with "Q:"."""] image_paths = os.path.join(os.path.dirname(__file__), "assets", "images") cot_image_list = [os.path.join(image_paths, "screenshot_menu.png"), # 0 os.path.join(image_paths, "screenshot_hotel.png"), # 1 os.path.join(image_paths, "screenshot_restaurant.png"), # 2 os.path.join(image_paths, "screenshot_foodmenu.png"), # 3 os.path.join(image_paths, "screenshot_news.png"), # 4 os.path.join(image_paths, "screenshot_weather.png"), # 5 os.path.join(image_paths, "screenshot_alarm.png"), # 6 os.path.join(image_paths, "screenshot_frenchnews_blocked.png"), # 7 os.path.join(image_paths, "screenshot_frenchnews_okay.png"), # 8 "" # -1 ] return system_msg, prompt, cot_image_list @retry(wait=wait_chain(*[wait_fixed(1) for i in range(3)] + [wait_fixed(3) for i in range(2)] + [wait_fixed(5)]), stop=stop_after_attempt(5)) def call_gemini(client, system_msg, prompt, image_list, image_path): if type(prompt) == list: input_msg = [system_msg + "\n" + "=====Examples====="] for i in range(len(image_list)-1): input_msg += [ "\nScreenshot:", process_image(image_list[i]), prompt[i] ] input_msg += [ "=====Your Turn=====", "\nScreenshot: ", process_image(image_path), prompt[-1] ] response = client.generate_content( input_msg ) else: response = client.generate_content( [ system_msg + "\n" + prompt, process_image(image_path) ] ) response.resolve() response_text = response.text return response_text def process_image(image_path): image = Image.open(image_path, 'r') image = image.resize((image.width // 4, image.height // 4)) # Save to a BytesIO object (in-memory file) as PNG buffer = io.BytesIO() image.save(buffer, format="PNG") # Load it back from the BytesIO object buffer.seek(0) image_reloaded = Image.open(buffer) return image_reloaded class EndResultEvaluator: def __init__(self, gemini_key=None, task_set=None): genai.configure(api_key=gemini_key) self.client = genai.GenerativeModel("models/gemini-1.5-pro-latest") self.img_matrix = None self.cache_max = 5 self.threshold = 0.001 * 255**2 self.task_set = task_set def __call__(self, last_two_images, intent: str) -> bool: """ last_two_images: a list of two image path. [last_image_path, this_image_path] intent: a string representing the user's intent Returns: - True if the task is completed - False otherwise If there's an error, it will return False and print the error message """ with Image.open(last_two_images[0]) as img1_src, Image.open(last_two_images[1]) as img2_src: img1 = np.array(img1_src) img2 = np.array(img2_src) if np.mean((img1.astype(np.float64) - img2.astype(np.float64))**2) < self.threshold: print("skipping evaluation due to same images") return 0 # this is an approximation, but it should be fine to add frequently viewed false negatives if self.img_matrix is None: self.img_matrix = np.expand_dims(img2, axis = 0) # will always trigger after the first time else: distances = np.mean((self.img_matrix.astype(np.float64) - img2.astype(np.float64))**2, axis = (1,2,3)) if np.min(distances) < self.threshold: print("skipping evaluation due to previously seen image, current img_matrix size: ", self.img_matrix.shape[0]) return 0 elif self.img_matrix.shape[0] < self.cache_max: self.img_matrix = np.concatenate([self.img_matrix, np.expand_dims(img2, axis = 0)], axis = 0) print(f"Task: {intent}, image: {last_two_images[1]}") eval_res = self._evaluate(intent, last_two_images[1]) del img1, img2 return eval_res def _evaluate(self, intent: str, image_path: str) -> bool: if self.task_set == "general": system_msg, prompt, cot_image_list = build_prompt_general(intent) elif self.task_set == "webshop": system_msg, prompt, cot_image_list = build_prompt_webshop(intent) response_text = call_gemini(self.client, system_msg, prompt, cot_image_list, image_path) if extract_status(response_text) is not None and 'success' in extract_status(response_text).lower(): print("Success!") print("image path:" + image_path) print("prompt") print(prompt) print("response") print(response_text) return 1 return 0 ================================================ FILE: digirl/environment/env_utils.py ================================================ import torch from tqdm import tqdm import numpy as np import accelerate from digirl.models import timeout def add_trajectory_reward(trajectory): """ add trajectory reward to the dict of each interaction """ trajectory_reward = np.sum([d["reward"] for d in trajectory]) for d in trajectory: d.update({"trajectory_reward": trajectory_reward}) return trajectory def add_mc_return(trajectory, gamma = 0.95): """ add trajectory reward to the dict of each interaction """ trajectory_rewards = np.array([d["reward"] for d in trajectory]).reshape(1, -1) gamma_row = np.cumprod(np.ones((1, trajectory_rewards.shape[1]))*gamma) gamma_matrix = np.triu(gamma_row.reshape(1, -1 )/ gamma_row.reshape(-1, 1)) mc_returns = np.sum(trajectory_rewards*gamma_matrix, axis = 1) for d, mc in zip(trajectory, mc_returns): d.update({"mc_return": mc}) return trajectory def batch_interact_environment(agent, env, num_trajectories,\ accelerator, post_f = lambda x: x, use_tqdm = True, decode_f = lambda x: x, gamma = 0.95, iter=0): """ in a bacthed way, interact with the environments to get a list of trajectories [[{"observation":, "next_observation":, "reward":, "done":},...],...] post_f: function to add additional attributes to the trajectory """ # broadcast the batch size bsize = torch.Tensor([0,]).to(accelerator.device) if accelerator.is_main_process: bsize[0] = env.bsize accelerate.utils.broadcast(bsize) bsize = int(bsize.item()) all_trajectories = [] if accelerator.is_main_process: if hasattr(agent, "critic"): env.feature_extractor.model = env.feature_extractor.model.to(env.device) agent.critic.to("cpu") for num_t in tqdm(range(num_trajectories//bsize), disable = not use_tqdm): if accelerator.is_main_process: env.emulator_group_offset = iter * num_trajectories + num_t * bsize for _ in range(3): try: done = False trajectories = [[] for _ in range(bsize)] #handle the case where the reset fails and timeouts reset_success = torch.Tensor([False,]).to(accelerator.device) while not all(reset_success): for _ in range(5): try: if accelerator.is_main_process: with timeout(seconds=240): # change this if frequently timeout batch_obs = env.reset() #the observation space is now a tuple of (text, image) if type(batch_obs[0]['image_feature']) == torch.Tensor: batch_img = [obs["image_feature"] for obs in batch_obs] else: batch_img = ["Image feature is not a tensor" for _ in range(bsize)] if env.feature_extractor is not None: # colorful_print("autoui has critic, so batch_obs being refractored", "red") batch_obs = [obs["prompt"] for obs in batch_obs] reset_success[0] = True accelerate.utils.broadcast(reset_success) break except Exception as e: print(f"Error in environment reset") print(e) if hasattr(env, "reset_appium"): print("Resetting appium") env.reset_appium() accelerate.utils.broadcast(reset_success) continue batch_done = torch.Tensor([False,]*bsize).to(accelerator.device) accelerate.utils.broadcast(batch_done) steps = 0 while not all(batch_done): steps += 1 if accelerator.is_main_process: # print(f"Environment stpes {str(steps)}") # print("getting actions!") if env.feature_extractor is not None: action = agent.get_action(batch_obs, torch.cat([i.unsqueeze(0) for i in batch_img], dim = 0)) else: action = agent.get_action(batch_obs, None) # import IPython; IPython.embed(); exit(1) with timeout(seconds=5*60): batch_return = env.step(decode_f(action)) # batch_return = env.step(decode_f(action)) # import IPython; IPython.embed() for i,result in zip(range(bsize), batch_return): if result is None: batch_done[i] = True continue obs_dict, r, done = result next_img = obs_dict["image_feature"] next_obs = obs_dict["prompt"] if not hasattr(agent, "critic"): trajectories[i].append({"observation": batch_obs[i], \ "next_observation": next_obs, \ "image_features": None, \ "image_path": obs_dict["image_path"], \ "next_image_features": None, \ "task": obs_dict["task"],\ "reward": r, \ "done": done, \ "action": action[i]}) batch_obs[i] = obs_dict else: trajectories[i].append({"observation": batch_obs[i], \ "next_observation": next_obs, \ "image_features": batch_img[i].cpu().numpy(), \ "image_path": obs_dict["image_path"], \ "video_path": obs_dict["video_path"], \ "next_image_features": next_img.cpu().numpy(), \ "task": obs_dict["task"],\ "reward": r, \ "done": done, \ "action": action[i]}) batch_obs[i] = next_obs batch_img[i] = next_img batch_done[i] = done accelerate.utils.broadcast(batch_done) # print("waiting for everyone") # accelerator.wait_for_everyone() # obs = next_obs if accelerator.is_main_process: print(trajectories[0][-1]["next_observation"]) all_trajectories += [post_f(add_mc_return(add_trajectory_reward(trajectory), gamma=gamma))\ for trajectory in trajectories] break except Exception as e: print(f"Error in environment interaction") import traceback print(traceback.format_exc()) print(e) if hasattr(env, "reset_appium"): print("Resetting appium") env.reset_appium() continue if accelerator.is_main_process: if env.feature_extractor is not None: env.feature_extractor.model = env.feature_extractor.model.to("cpu") if hasattr(agent, "critic"): agent.critic.to(agent.device) return all_trajectories ================================================ FILE: digirl/misc.py ================================================ """ Miscellaneous Utility Functions """ import click import warnings from torch.utils.data import Dataset def colorful_print(string: str, *args, **kwargs) -> None: print(click.style(string, *args, **kwargs)) def colorful_warning(string: str, *args, **kwargs) -> None: warnings.warn(click.style(string, *args, **kwargs)) ================================================ FILE: digirl/models/__init__.py ================================================ from .autoui_agent import AutoUIAgent, timeout from .cog_agent import CogAgent ================================================ FILE: digirl/models/autoui_agent.py ================================================ import torch from transformers import AutoTokenizer from digirl.models.critic import VLMDoubleCritic, TrajectoryCritic from .model import T5ForMultimodalGeneration import signal class timeout: def __init__(self, seconds=1, error_message='Timeout'): self.seconds = seconds self.error_message = error_message def handle_timeout(self, signum, frame): raise TimeoutError(self.error_message) def __enter__(self): signal.signal(signal.SIGALRM, self.handle_timeout) signal.alarm(self.seconds) def __exit__(self, type, value, traceback): signal.alarm(0) class AutoUIAgent(torch.nn.Module): def __init__(self, device, accelerator, policy_lm = "gpt2", critic_lm = "roberta-base", cache_dir = '~/.cache', dropout = 0.5, TEMPLATE = None, use_lora=False, do_sample = True, temperature = 1.0, max_new_tokens = 32, use_bfloat16 = False, eos_str = None): super(AutoUIAgent, self).__init__() if use_bfloat16: self.model = T5ForMultimodalGeneration.from_pretrained(policy_lm, cache_dir=cache_dir, torch_dtype = torch.bfloat16).to(device) else: self.model = T5ForMultimodalGeneration.from_pretrained(policy_lm, cache_dir=cache_dir).to(device) if use_lora: from peft import LoraConfig, TaskType, get_peft_model lora_config = LoraConfig( r=16, target_modules=['q_proj', 'k_proj', 'v_proj', 'o_proj'], task_type=TaskType.CAUSAL_LM, lora_alpha=32, lora_dropout=0.05 ) self.model = get_peft_model(self.model, lora_config) print("Using LoRA") self.model.print_trainable_parameters() self.template = TEMPLATE self.policy_lm = policy_lm self.critic = VLMDoubleCritic(device, accelerator, critic_lm = critic_lm, cache_dir = cache_dir, in_dim = 768, out_dim = 1) self.trajectory_critic = TrajectoryCritic(device, accelerator, critic_lm = critic_lm, cache_dir = cache_dir, in_dim = 768, out_dim = 1) self.target_critic = None self.tokenizer = AutoTokenizer.from_pretrained(policy_lm, trust_remote_code=True, cache_dir=cache_dir) self.tokenizer.truncation_side = 'left' self.tokenizer.pad_token = self.tokenizer.eos_token self.tokenizer.pad_token_id = self.tokenizer.eos_token_id self.device = device self.dropout = torch.nn.Dropout(p=dropout) self.softmax = torch.nn.Softmax(dim= -1) self.do_sample = do_sample self.temperature = temperature self.accelerator = accelerator self.max_new_tokens = max_new_tokens self.eos_str = eos_str def prepare(self): self.model = self.accelerator.prepare(self.model) self.critic = self.accelerator.prepare(self.critic) self.trajectory_critic = self.accelerator.prepare(self.trajectory_critic) def get_action(self, observation, image_features): image_features = image_features[..., -1408:] # if self.template is not None: # observation = [self.template.replace("{obs}", obs) for obs in observation] for _ in range(3): try: with timeout(seconds=60): with torch.no_grad(): obs_ids = self.tokenizer(observation, return_tensors='pt', padding=True, max_length=512, truncation = True).to(self.device) image_features = image_features.to(self.device) outputs = self.accelerator.unwrap_model(self.model).generate(**obs_ids, image_ids = image_features, max_new_tokens=self.max_new_tokens, do_sample=self.do_sample, temperature = self.temperature, pad_token_id = self.tokenizer.eos_token_id).cpu() break except TimeoutError: print("Timeout while accessing actions") continue raw_action = self.tokenizer.batch_decode(outputs, skip_special_tokens = True) for _ in range(3): raw_action = [a[1:] if a.startswith('\n') else a for a in raw_action] # return raw_action if self.eos_str is not None: # print(f"using eos str {eos_str}") # print([raw_a.split(self.eos_str)[0] + self.eos_str for raw_a in raw_action]) return [raw_a.split(self.eos_str)[0] for raw_a in raw_action] else: return raw_action def get_log_prob(self, observation, image_features, action): image_features = image_features[...,-1408:] if self.template is not None: observation = [self.template.replace("{obs}", obs) for obs in observation] obs_ids = self.tokenizer(observation, return_tensors='pt', padding=True, max_length=512, truncation = True).to(self.device) action_ids = self.tokenizer(action, return_tensors='pt', padding=True, max_length=512, truncation = True).to(self.device) outputs = self.model(input_ids = obs_ids["input_ids"], image_ids = image_features, attention_mask = obs_ids["attention_mask"], labels = action_ids["input_ids"]) # # action_embeds = self.model.get_input_embeddings()(action_ids["input_ids"]).detach() # # obs_embeds = self.model.get_input_embeddings()(obs_ids["input_ids"]).detach() # input_ids = torch.cat([obs_ids["input_ids"], action_ids["input_ids"]], dim = 1) # # input_embeds = torch.cat([obs_embeds, action_embeds], dim = 1) # attention_mask = torch.cat([obs_ids["attention_mask"], action_ids["attention_mask"]],\ # dim = 1) # outputs = self.model(input_ids=input_ids, attention_mask = attention_mask) # values = None # if isinstance(outputs, Tuple): # values, outputs = outputs ## TODO: need to check if token shifting is done correctly prediction_probs = self.softmax(outputs.logits) selected_prediction_probs = torch.take_along_dim(prediction_probs,\ action_ids["input_ids"].unsqueeze(2), dim=2).squeeze(2) selected_prediction_probs = torch.clamp(selected_prediction_probs, min=0.001, max=0.99) # import IPython; IPython.embed(); exit() return torch.log(selected_prediction_probs)*action_ids["attention_mask"] ================================================ FILE: digirl/models/cog_agent.py ================================================ import signal from gradio_client import Client, handle_file # remember to use gradio==4.43.0 for both client and host! import gradio_client from time import sleep from concurrent.futures import ThreadPoolExecutor, as_completed def _get_a_action(pair): client, obs = pair text = f'What steps do I need to take to "{obs["task"]}"?(with grounding)' for _ in range(3): try: out = client.predict(input_text=text, image_prompt=handle_file(obs['image_path']), api_name="/predict") return out except: sleep(1) return None class CogAgent: def __init__(self, url): urls = url self.clients = [Client(u) for u in urls] def prepare(self): pass def get_action(self, observation, image_features): results = [] client_obs_pairs = zip(self.clients, observation) with ThreadPoolExecutor(max_workers=len(self.clients)) as executor: future_to_client_obs = {executor.submit(_get_a_action, pair): pair for pair in client_obs_pairs} for future in as_completed(future_to_client_obs): # try: result = future.result() results.append(result) # except Exception as exc: # print(f'Generated an exception: {exc}') return results ================================================ FILE: digirl/models/critic.py ================================================ import torch from transformers import AutoTokenizer, AutoModel import torch.nn as nn class VLMDoubleCritic(torch.nn.Module): def __init__(self, device, accelerator, critic_lm, cache_dir, in_dim, out_dim): """ VLM critic using image features """ super(VLMDoubleCritic, self).__init__() self.device = device self.accelerator = accelerator self.base_lm = AutoModel.from_pretrained(critic_lm, cache_dir=cache_dir).to(device) self.base_tokenizer = AutoTokenizer.from_pretrained(critic_lm, cache_dir=cache_dir) self.base_tokenizer.truncation_side = 'left' image_feature_dim = 1408*2 out_dim = 2 # for v self.critic1 = nn.Sequential(nn.Linear(in_dim+image_feature_dim, in_dim),\ nn.ReLU(),\ nn.Linear(in_dim, in_dim),\ nn.ReLU(),\ nn.Linear(in_dim, out_dim)).to(device) self.critic2 = nn.Sequential(nn.Linear(in_dim+image_feature_dim, in_dim),\ nn.ReLU(),\ nn.Linear(in_dim, in_dim),\ nn.ReLU(),\ nn.Linear(in_dim, out_dim)).to(device) def forward(self, observation, image_features, action, detach_model=False): detach_model = True obs_ids = self.base_tokenizer(observation, padding = True, return_tensors='pt', max_length=512, truncation = True).to(self.device) if detach_model: with torch.no_grad(): lm_states = self.base_lm(**obs_ids).pooler_output else: lm_states = self.base_lm(**obs_ids).pooler_output v_states = torch.cat([lm_states, image_features], dim = 1) return self.critic1(v_states), self.critic2(v_states) class TrajectoryCritic(torch.nn.Module): def __init__(self, device, accelerator, critic_lm, cache_dir, in_dim, out_dim): """ VLM critic using image features """ super(TrajectoryCritic, self).__init__() self.device = device self.accelerator = accelerator self.base_lm = AutoModel.from_pretrained(critic_lm, cache_dir=cache_dir).to(device) self.base_tokenizer = AutoTokenizer.from_pretrained(critic_lm, cache_dir=cache_dir) self.base_tokenizer.truncation_side = 'left' out_dim = 2 self.critic = nn.Linear(in_dim, out_dim).to(device) def forward(self, observation, detach_model=False): detach_model = False obs_ids = self.base_tokenizer(observation, padding = True, return_tensors='pt', max_length=512, truncation = True).to(self.device) if detach_model: with torch.no_grad(): lm_states = self.base_lm(**obs_ids).pooler_output else: lm_states = self.base_lm(**obs_ids).pooler_output return self.critic(lm_states) ================================================ FILE: digirl/models/infer_utils.py ================================================ import torch from PIL import Image from transformers import AutoProcessor, Blip2VisionModel class ImageFeatureExtractor: def __init__(self): # Set device based on CUDA availability self.device = "cuda" if torch.cuda.is_available() else "cpu" # Initialize and load the BLIP2 model and processor self.model = Blip2VisionModel.from_pretrained("/nfs/kun2/users/yifei/.cache/models--Salesforce--blip2-opt-2.7b/snapshots/235c75ea3861136b9dd202c6edc6a7ba285c35e3").to(self.device) self.processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b") def to_feat(self, image: Image.Image): """Converts a PIL image to a feature representation using the BLIP2 model. Args: image: A PIL.Image object representing the image to convert. Returns: A tensor representing the image feature. """ with torch.no_grad(): # Preprocess the image and move to the correct device inputs = self.processor(images=image, return_tensors="pt").to(self.device) # Get the image features from the model image_features = self.model(**inputs, output_attentions=False, output_hidden_states=False, return_dict=False).pooler_output[0] #size is 1408 # Detach the tensor from the graph and move it to CPU image_features = image_features.detach().cpu() return image_features ================================================ FILE: digirl/models/model.py ================================================ ''' Adapted from https://github.com/huggingface/transformers ''' from transformers import T5Config, T5ForConditionalGeneration from transformers.models.t5.modeling_t5 import T5Stack, __HEAD_MASK_WARNING_MSG import copy from transformers.modeling_outputs import BaseModelOutput, Seq2SeqLMOutput import warnings from typing import Optional, Tuple, Union import torch from torch import nn from torch.nn import CrossEntropyLoss class T5ForMultimodalGeneration(T5ForConditionalGeneration): _keys_to_ignore_on_load_missing = [ r"encoder.embed_tokens.weight", r"decoder.embed_tokens.weight", r"lm_head.weight", ] _keys_to_ignore_on_load_unexpected = [ r"decoder.block.0.layer.1.EncDecAttention.relative_attention_bias.weight", ] def __init__(self, config: T5Config, img_dim=1408, num_actions=12, use_lm_head = True): super().__init__(config) self.model_dim = config.d_model self.shared = nn.Embedding(config.vocab_size, config.d_model) self.image_dense = nn.Linear(img_dim, config.d_model) self.mha_layer = torch.nn.MultiheadAttention(embed_dim=config.hidden_size, kdim=config.hidden_size, vdim=config.hidden_size, num_heads=1, batch_first=True) self.gate_dense = nn.Linear(2*config.hidden_size, config.hidden_size) self.sigmoid = nn.Sigmoid() encoder_config = copy.deepcopy(config) encoder_config.is_decoder = False encoder_config.use_cache = False encoder_config.is_encoder_decoder = False self.encoder = T5Stack(encoder_config, self.shared) decoder_config = copy.deepcopy(config) decoder_config.is_decoder = True decoder_config.is_encoder_decoder = False decoder_config.num_layers = config.num_decoder_layers self.decoder = T5Stack(decoder_config, self.shared) self.use_lm_head = use_lm_head if self.use_lm_head: self.lm_head = nn.Linear(config.d_model, config.vocab_size, bias=False) # Initialize weights and apply final processing self.post_init() # Model parallel self.model_parallel = False self.device_map = None def remove_lm_head(self): self.use_lm_head = False self.lm_head = None def forward( self, input_ids: Optional[torch.LongTensor] = None, image_ids=None, attention_mask: Optional[torch.FloatTensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.BoolTensor] = None, head_mask: Optional[torch.FloatTensor] = None, decoder_head_mask: Optional[torch.FloatTensor] = None, cross_attn_head_mask: Optional[torch.Tensor] = None, encoder_outputs: Optional[Tuple[Tuple[torch.Tensor]]] = None, past_key_values: Optional[Tuple[Tuple[torch.Tensor]]] = None, inputs_embeds: Optional[torch.FloatTensor] = None, decoder_inputs_embeds: Optional[torch.FloatTensor] = None, labels: Optional[torch.LongTensor] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, ) -> Union[Tuple[torch.FloatTensor], Seq2SeqLMOutput]: use_cache = use_cache if use_cache is not None else self.config.use_cache return_dict = return_dict if return_dict is not None else self.config.use_return_dict # FutureWarning: head_mask was separated into two input args - head_mask, decoder_head_mask if head_mask is not None and decoder_head_mask is None: if self.config.num_layers == self.config.num_decoder_layers: warnings.warn(__HEAD_MASK_WARNING_MSG, FutureWarning) decoder_head_mask = head_mask # Encode if needed (training, first prediction pass) if encoder_outputs is None: # Convert encoder inputs in embeddings if needed encoder_outputs = self.encoder( input_ids=input_ids, attention_mask=attention_mask, inputs_embeds=inputs_embeds, head_mask=head_mask, output_attentions=output_attentions, output_hidden_states=output_hidden_states, return_dict=return_dict, ) elif return_dict and not isinstance(encoder_outputs, BaseModelOutput): encoder_outputs = BaseModelOutput( last_hidden_state=encoder_outputs[0], hidden_states=encoder_outputs[1] if len(encoder_outputs) > 1 else None, attentions=encoder_outputs[2] if len(encoder_outputs) > 2 else None, ) hidden_states = encoder_outputs[0] image_embedding = self.image_dense(image_ids) # use pooled image features if len(image_embedding.size()) == 2: image_embedding = image_embedding.unsqueeze(1) image_att, _ = self.mha_layer(hidden_states, image_embedding, image_embedding) merge = torch.cat([hidden_states, image_att], dim=-1) gate = self.sigmoid(self.gate_dense(merge)) hidden_states = (1 - gate) * hidden_states + gate * image_att if self.model_parallel: torch.cuda.set_device(self.decoder.first_device) if labels is not None and decoder_input_ids is None and decoder_inputs_embeds is None: # get decoder inputs from shifting lm labels to the right decoder_input_ids = self._shift_right(labels) # Set device for model parallelism if self.model_parallel: torch.cuda.set_device(self.decoder.first_device) hidden_states = hidden_states.to(self.decoder.first_device) if decoder_input_ids is not None: decoder_input_ids = decoder_input_ids.to(self.decoder.first_device) if attention_mask is not None: attention_mask = attention_mask.to(self.decoder.first_device) if decoder_attention_mask is not None: decoder_attention_mask = decoder_attention_mask.to(self.decoder.first_device) # Decode decoder_outputs = self.decoder( input_ids=decoder_input_ids, attention_mask=decoder_attention_mask, inputs_embeds=decoder_inputs_embeds, past_key_values=past_key_values, encoder_hidden_states=hidden_states, encoder_attention_mask=attention_mask, head_mask=decoder_head_mask, cross_attn_head_mask=cross_attn_head_mask, use_cache=use_cache, output_attentions=output_attentions, output_hidden_states=output_hidden_states, return_dict=return_dict, ) sequence_output = decoder_outputs[0] # Set device for model parallelism if self.model_parallel: torch.cuda.set_device(self.encoder.first_device) if self.use_lm_head: self.lm_head = self.lm_head.to(self.encoder.first_device) sequence_output = sequence_output.to(self.lm_head.weight.device) if self.config.tie_word_embeddings: # Rescale output before projecting on vocab # See https://github.com/tensorflow/mesh/blob/fa19d69eafc9a482aff0b59ddd96b025c0cb207d/mesh_tensorflow/transformer/transformer.py#L586 sequence_output = sequence_output * (self.model_dim**-0.5) if self.use_lm_head: lm_logits = self.lm_head(sequence_output) else: lm_logits = None loss = None if labels is not None and self.use_lm_head: loss_fct = CrossEntropyLoss(ignore_index=-100) loss = loss_fct(lm_logits.view(-1, lm_logits.size(-1)), labels.view(-1)) # TODO(thom): Add z_loss https://github.com/tensorflow/mesh/blob/fa19d69eafc9a482aff0b59ddd96b025c0cb207d/mesh_tensorflow/layers.py#L666 if not return_dict: output = (lm_logits,) + decoder_outputs[1:] + encoder_outputs return ((loss,) + output) if loss is not None else output return Seq2SeqLMOutput( loss=loss, logits=lm_logits, past_key_values=decoder_outputs.past_key_values, decoder_hidden_states=decoder_outputs.hidden_states, decoder_attentions=decoder_outputs.attentions, cross_attentions=decoder_outputs.cross_attentions, encoder_last_hidden_state=encoder_outputs.last_hidden_state, encoder_hidden_states=encoder_outputs.hidden_states, encoder_attentions=encoder_outputs.attentions, ) def prepare_inputs_for_generation( self, decoder_input_ids, past=None, attention_mask=None, use_cache=None, encoder_outputs=None, **kwargs ): # cut decoder_input_ids if past is used if past is not None: decoder_input_ids = decoder_input_ids[:, -1:] output = { "input_ids": None, # encoder_outputs is defined. input_ids not needed "encoder_outputs": encoder_outputs, "past_key_values": past, "decoder_input_ids": decoder_input_ids, "attention_mask": attention_mask, "use_cache": use_cache, # change this to avoid caching (presumably for debugging) } output["image_ids"] = kwargs['image_ids'] return output def test_step(self, tokenizer, batch, **kwargs): device = next(self.parameters()).device input_ids = batch['input_ids'].to(device) image_ids = batch['image_ids'].to(device) output = self.generate( input_ids=input_ids, image_ids=image_ids, **kwargs ) generated_sents = tokenizer.batch_decode(output, skip_special_tokens=True) targets = tokenizer.batch_decode(batch['labels'], skip_special_tokens=True) result = {} result['preds'] = generated_sents result['targets'] = targets return result ================================================ FILE: env_setup/README.md ================================================ # Environment Installation Guide We recommend using the Linux environment. Support to Windows and MacOS are not provided, but we welcome contributions. ## Android Software Development Kit (SDK) Part of this tutorial is based on this [GitHub Gist](https://gist.github.com/nhtua/2d294f276dc1e110a7ac14d69c37904f). ### Install Java (JDK 8) Download a Java Development Kit 8 (v1.8.0) release version from the open-source Java releaser [OpenLogic](https://www.oracle.com/java/technologies/downloads/). Install using your Linux package installer, like `apt` or `rpm`. For example, on a Debian server: ```bash sudo apt-get update cd ~ && mkdir install-android/ && cd install-android wget https://builds.openlogic.com/downloadJDK/openlogic-openjdk/8u412-b08/openlogic-openjdk-8u412-b08-linux-x64-deb.deb sudo apt install ./openlogic-openjdk-8u412-b08-linux-x64-deb.deb ``` If you already has a java binary previously, you should also do this: ```bash sudo update-alternatives --config java # select /usr/lib/jvm/openlogic-openjdk-8-hotspot-amd64/bin/java ``` Check whether the installation is successful by `java -version`. You should expect the output shows version 1.8.0. Higher versions makes `sdkmanager` crash. ```bash java -version # openjdk version "1.8.0_412-412" # OpenJDK Runtime Environment (build 1.8.0_412-412-b08) # OpenJDK 64-Bit Server VM (build 25.412-b08, mixed mode) ``` ### Install SDK Manager Download the Android SDK for Linux from the [official website](https://developer.android.com/studio/index.html#downloads). For your convenience, you can also directly download the [installation package](https://dl.google.com/android/repository/sdk-tools-linux-4333796.zip). ```bash wget https://dl.google.com/android/repository/sdk-tools-linux-4333796.zip ``` Now specify the android installation path and unzip the installation package to that path. It's recommended to use `/home//.android` as the default installation path. ```bash export ANDROID_HOME= # recommended: /home//.android mkdir -p $ANDROID_HOME unzip sdk-tools-linux-4333796.zip -d $ANDROID_HOME ``` Make sure you have `unzip` installed. For example, use `sudo apt install unzip -y` to install on Debian servers. To check whether the unzip is successful: ```bash ls $ANDROID_HOME # tools ``` ### SDK Emulator Prior to install the SDK emulators, set the environment variables: ```bash echo "export ANDROID_HOME=$ANDROID_HOME" >> ~/.bashrc echo 'export SDK=$ANDROID_HOME' >> ~/.bashrc echo 'export ANDROID_SDK_ROOT=$ANDROID_HOME' >> ~/.bashrc echo 'export PATH=$SDK/emulator:$SDK/tools:$SDK/tools/bin:$SDK/platform-tools:$PATH' >> ~/.bashrc source ~/.bashrc ``` Now you should be able to locate the `sdkmanager` binary: ```bash which sdkmanager # .../tools/bin/sdkmanager ``` Then install the Android emulator 28 (other versions should also work, but the offline data we provided is in version 28): ```bash yes | sdkmanager "platform-tools" "platforms;android-28" "emulator" yes | sdkmanager "system-images;android-28;google_apis;x86_64" yes | sdkmanager "build-tools;28.0.0" ``` Now you should be able to view the version of the emulator: ```bash emulator -version # INFO | Storing crashdata in: /tmp/android-/emu-crash-34.2.14.db, detection is enabled for process: 16670 # INFO | Android emulator version 34.2.14.0 (build_id 11834374) (CL:N/A) # INFO | Storing crashdata in: /tmp/android-/emu-crash-34.2.14.db, detection is enabled for process: 16670 # INFO | Duplicate loglines will be removed, if you wish to see each individual line launch with the -log-nofilter flag. # ... ``` ## Android Virtual Device (AVD) Initialization In the next step, we create an AVD snapshot as the environment. ### Device Creation Download the device image [here](https://drive.google.com/drive/folders/1ZGKrWiSoGqg8_NoIGT7rWmiZ8CXToaBF?usp=sharing). Unzip the device image to `$ANDROID_HOME/avd`. ```bash cd $ANDROID_HOME mkdir avd cd avd unzip test_Android.zip ``` You have now successfully copied the Pixel 28 device that we use for our research. ### KVM Acceleration In order to launch the emulator, check whether `kvm` is reachable on your machine. Simply run this command to check: ```bash ls /dev/kvm # /dev/kvm -> you have KVM support # ls: cannot access '/dev/kvm': No such file or directory -> you don't have KVM support ``` If you don't have KVM support, try to enable it. During our experiments, we find that KVM virtualization makes the emulator at least 5x faster (in all aspects, including bootstrapping and interactions). **Again, failure to set up KVM is likely to backfire your research by significantly increasing the interaction time during reinforcement learning.** You can check whether you can virtualize your machine via ```bash sudo apt-get install cpu-checker sudo kvm-ok # yes means your machine supports virtualization ``` If your machine doesn't support virtualization, first enable this feature (this can be enabled on most virtual server providers). On GCP, for example, refer to [this guide](https://cloud.google.com/compute/docs/instances/nested-virtualization/enabling). To best of our knowledge, AWS only allows virtualization on bare metal machines, so try to set up bare metals for this research. After checking that your machine supports virtualization, enable KVM by referring to [this guide](https://developer.android.com/studio/run/emulator-acceleration#vm-linux). If you have done all steps in this guide and you still can't set up KVM, try rebooting your machine. ### Device Bootstrapping Now check whether you can successfully run an AVD instance with KVM acceleration by starting an emulator: ```bash emulator -avd test_Android "-no-window" "-no-audio" "-skip-adb-auth" "-no-boot-anim" "-gpu" "auto" "-no-snapshot-load" # ... # Cold boot: requested by the user # INFO | Boot completed in 12579 ms ``` A successful launch should show `Cold boot: requested by the user` in the end. Now open a new terminal tab, you should be able to see an online devices through `adb`: ```bash adb devices # List of devices attached # emulator-5554 device ``` ## Remote Driver: Appium Now **don't close the emulator** and open a new terminal tab. We use `appium` as the bind between Python (software) and the Android device (hardware). ### Install Node.js Appium is based on Node.js. On a Linux system, simply do ```bash curl -fsSL https://deb.nodesource.com/setup_22.x | sudo -E bash - sudo apt-get install -y nodejs # the order matters, first install nodesource then install nodejs ``` Now check the installation through `node -v`: ```bash node -v # v18.19.0 ``` ### Install Appium Now install `appium` using Node.js **globally**. Avoid local installations to avoid messing up the `digirl` repo. Also install the `uiautomator2` driver for `appium`. ```bash sudo npm i --location=global appium appium driver install uiautomator2 ``` Now in the `digirl` conda environment, install the Python interface for Appium (you should have created the `digirl` environment in the main README): ```bash conda activate digirl pip install Appium-Python-Client # this should already be installed using requirements.txt, but better double-check ``` ## Final Step: AVD Snapshot for Quickboot Now we create an AVD snapshot for quickboot. This avoids bootstrapping the device every time we launch it by saving a bootstrapped snapshot. ### Install Device Interface for Appium First, launch `appium`: ```bash appium --relaxed-security ``` Then open a new terminal tab (now you should have 3 tabs, one running Android emulator, one running appium, and this new one) and execute the screenshot script: ```bash # wait for half a minute... (if you do screenshot right away, you will get errors cmd: Can't find service: settings. allow some time for emulator to install the packages.) python /env_setup/screenshot.py # keep trying this command till it no longer raises errors # wait for half a minute... # screenshot saved to /screenshot.png ``` You should now see a screenshot like this: screenshot1 Now go back to the emulator terminal tab. Use `ctrl+c` to exit the emulator, and you should see ```bash ctrl+c # INFO | Saving with gfxstream=1 # ERROR | stop: Not implemented (ignore this error) ``` Now execute this command to check whether the snapshot is successfully saved: ```bash emulator -avd test_Android "-no-window" "-no-audio" "-skip-adb-auth" "-no-boot-anim" "-gpu" "auto" "-no-snapshot-save" # Successfully loaded snapshot 'default_boot' ``` Congratulations! You're good to go now. Close all tabs and move on the main README for the experiments. ## Optional (Not Recommended): Create a Device from Scratch Alternatively, you can create a device from scratch, not using our device. This may result in different behavior as our experiments, so it's not recommended for reproducing our results ### Device Creation Create a Android 28 device from `google_apis`: ```bash echo no | avdmanager create avd -n test_Android -k "system-images;android-28;google_apis;x86_64" ``` You should now see a directory at `$ANDOIRD_HOME/avd`. For simplicity, it's advised to set the environment variable `ANDROID_AVD_HOME` to this path: ```bash echo 'export ANDROID_AVD_HOME=$ANDROID_HOME/avd' >> ~/.bashrc source ~/.bashrc ``` You should now be able to see the empty AVD instance at `$ANDROID_AVD_HOME`: ```bash ls $ANDROID_AVD_HOME # test_Android.avd test_Android.ini ``` To align with our experiments, replace the `$ANDROID_AVD_HOME/test_Android.avd/config.ini` file with [our released config file](./config.ini). ```bash cp /env_setup/config.ini $ANDROID_AVD_HOME/test_Android.avd/config.ini ``` You have now successfully created an empty Pixel device image of the same configuration as the data collected in our research. ### Device Bootstrapping Now check whether you can successfully run an AVD instance with KVM acceleration by starting an emulator: ```bash emulator -avd test_Android "-no-window" "-no-audio" "-skip-adb-auth" "-no-boot-anim" "-gpu" "auto" "-no-snapshot-load" # ... # Cold boot: requested by the user ``` A successful launch should show `Cold boot: requested by the user` in the end. Now open a new terminal tab, you should be able to see an online devices through `adb`: ```bash adb devices # List of devices attached # emulator-5554 device ``` ### Install Device Interface for Appium First, launch `appium`: ```bash appium --relaxed-security ``` Then open a new terminal tab (now you should have 3 tabs, one running Android emulator, one running appium, and this new one) and execute the screenshot script: ```bash # wait for half a minute... (if you do screenshot right away, you will get errors cmd: Can't find service: settings. allow some time for emulator to install the packages.) python /env_setup/screenshot.py # keep trying this command till it no longer raises errors # wait for half a minute... # screenshot saved to /screenshot.png ``` You should now see a screenshot like this: screenshot1 ### Click Google Login in Chrome and Update Chrome As we frequently use Chrome for our experiments, we need to ensure Chrome isn't blocked by the login interface. To complete that, we need to manually remove that in the device with ADB commands: ```bash adb shell input tap 739 1828 # click the Chrome icon sleep 2 adb shell input tap 654 2043 # click "accept & continue" sleep 2 adb shell input tap 197 2066 # click "no thanks" ``` Now do the screenshot again. You should see the produced screenshot like this: screenshot2 ```bash python /env_setup/screenshot.py # should work on the first try ``` ``` adb shell input tap 322 719 # click the search bar sleep 2 adb shell input tap 559 2063 # click "no" ``` Now do the screenshot again. You should see the produced screenshot like this: screenshot2 ```bash python /env_setup/screenshot.py # should work on the first try ``` Then reboot the device and click the Chrome icon again. Chrome will automatically update its main page. ```bash adb reboot adb devices # ...wait until `adb device` gives `device` instead of `offline` adb shell input tap 739 1828 # click the Chrome icon ``` Do a screenshot again. You should expect the screenshot to change to this: screenshot3 ```bash python /env_setup/screenshot.py # should work on the first try ``` Now go back to home screen and save a snapshot of the device: ```bash adb shell input tap 551 2228 # click the home button # you can also use `adb shell input keyevent KEYCODE_HOME` sleep 2 adb emu avd snapshot save default_boot # OK ``` Now go back to the emulator tab. Use `ctrl+c` to exit the emulator, and you should see ```bash ctrl+c # INFO | Saving with gfxstream=1 # ERROR | stop: Not implemented (ignore this error) ``` Now execute this command to check whether the snapshot is successfully saved: ```bash emulator -avd test_Android "-no-window" "-no-audio" "-skip-adb-auth" "-no-boot-anim" "-gpu" "auto" "-no-snapshot-save" # Successfully loaded snapshot 'default_boot' ``` Congratulations! You're good to go now. Close all tabs and move on the main README for the experiments. ## Optional: Useful ADB Commands Feel free to play around ADB with these commands: |Function|Command| |--------|-------| Manually save a snapshot | `adb emu avd snapshot save default_boot` Click | `adb shell input tap 160 240` Scroll Down | `adb shell input swipe 500 1000 300 300` Screenshot | `adb exec-out screencap -p > screenshot.png` Keyboard input|`adb shell input text "insert%syour%stext%shere" && adb shell input keyevent KEYCODE_ENTER` Open Chrome | `adb shell am start -n com.android.chrome/com.google.android.apps.chrome.Main` Open a website in Chrome |`adb shell am start -a android.intent.action.VIEW -d http://www.stackoverflow.com` Uninstall a package | `adb shell pm uninstall --user 0 com.example.yourapp` Install a package | `adb install -r /path/to/package` Check application version | `adb shell dumpsys package com.example.yourapp | grep versionName` Check Android version | `adb shell getprop ro.build.version.release` Screen recording (high frame rate) | `adb shell screenrecord --size 540x1140 --bit-rate 4M /sdcard/video.mp4` Pull screenshot to computer | `adb pull /sdcard/video.mp4` ### Update Google Version Check Chrome internal version: ```bash adb shell input tap 739 1828 # click the Chrome icon adb shell input tap 1004 144 # click dots adb shell input tap 510 1382 # click settings adb shell input swipe 500 1000 300 300 # swipe down adb shell input tap 191 2093 # click about Chrome python env_setup/screenshot.py ``` Install a new version: you must use architecture-specific binaries. In our setup, we use the `x86/x86_64` architecture. If you use a different architecture, you must download the corresponding binaries. For example, you can download the `x86 or x86_64` version from [this page](https://www.apkmirror.com/apk/google-inc/chrome/chrome-124-0-6367-172-release/google-chrome-124-0-6367-172-6-android-apk-download/download/?key=6b3dc806b877aa88cb664103bd5e596284b12b4d&forcebaseapk=true), which will be Chrome version 124 (our paper uses Chrome 69 thought). ## Troubleshoot ### Java ```bash sdkmanager # Exception in thread "main" java.lang.NoClassDefFoundError: javax/xml/bind/annotation/XmlSchema # at com.android.repository.api.SchemaModule$SchemaModuleVersion.(SchemaModule.java:156) # at com.android.repository.api.SchemaModule.(SchemaModule.java:75) # at com.android.sdklib.repository.AndroidSdkHandler.(AndroidSdkHandler.java:81) # at com.android.sdklib.tool.sdkmanager.SdkManagerCli.main(SdkManagerCli.java:73) # at com.android.sdklib.tool.sdkmanager.SdkManagerCli.main(SdkManagerCli.java:48) # Caused by: java.lang.ClassNotFoundException: javax.xml.bind.annotation.XmlSchema # at java.base/jdk.internal.loader.BuiltinClassLoader.loadClass(BuiltinClassLoader.java:641) # at java.base/jdk.internal.loader.ClassLoaders$AppClassLoader.loadClass(ClassLoaders.java:188) # at java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:525) # ... 5 more ``` This error shows because your `java` version is too new. Downgrade to Java 8. ### Emulator ```bash emulator -avd test_Android "-no-window" "-no-audio" "-skip-adb-auth" "-no-boot-anim" "-gpu" "auto" "-no-snapshot-save" # INFO | Storing crashdata in: /tmp/android-/emu-crash-34.2.14.db, detection is enabled for process: 29307 # INFO | Android emulator version 34.2.14.0 (build_id 11834374) (CL:N/A) # INFO | Found systemPath /home//.android/system-images/android-28/google_apis/x86_64/ # INFO | Storing crashdata in: /tmp/android-/emu-crash-34.2.14.db, detection is enabled for process: 29307 # INFO | Duplicate loglines will be removed, if you wish to see each individual line launch with the -log-nofilter flag. # INFO | Changing default hw.initialOrientation to portrait # INFO | Increasing RAM size to 1536MB # ERROR | x86_64 emulation currently requires hardware acceleration! # CPU acceleration status: KVM requires a CPU that supports vmx or svm # More info on configuring VM acceleration on Linux: # https://developer.android.com/studio/run/emulator-acceleration#vm-linux # General information on acceleration: https://developer.android.com/studio/run/emulator-acceleration. ``` This error shows you've not installed KVM or your machine does not support KVM. Enable KVM in this case. ```bash emulator -avd test_Android "-no-window" "-no-audio" "-skip-adb-auth" "-no-boot-anim" "-gpu" "auto" "-no-snapshot-load" # ... # ProbeKVM: This user doesn't have permissions to use KVM (/dev/kvm). ``` Try two things: ```bash sudo chown -R /dev/kvm sudo gpasswd -a $USER kvm ``` ### Appium ```bash python screenshot.py # selenium.common.exceptions.WebDriverException: Message: An unknown server-side error occurred while processing the command. Original error: Error executing adbExec. Original error: 'Command '/home//.android/platform-tools/adb -P 5037 -s emulator-5554 shell 'settings delete global hidden_api_policy_pre_p_apps;settings delete global hidden_api_policy_p_apps;settings delete global hidden_api_policy'' exited with code 20'; Command output: cmd: Can't find service: settings ``` Device is installing the Appium interface. Wait for a minute then try again. ================================================ FILE: env_setup/config.ini ================================================ PlayStore.enabled = false abi.type = x86_64 avd.ini.encoding = UTF-8 fastboot.chosenSnapshotFile= fastboot.forceChosenSnapshotBoot=no fastboot.forceColdBoot=no fastboot.forceFastBoot=yes hw.accelerometer=yes hw.arc=false hw.audioInput=yes hw.battery=yes hw.camera.back=virtualscene hw.dPad=no hw.device.hash2=MD5:d37b758e1a2b070e348378c2efde849a hw.device.manufacturer=Generic hw.gps=yes hw.gpu.enabled=yes hw.gpu.mode=auto hw.initialOrientation=Portrait hw.keyboard=yes hw.lcd.width = 1080 hw.lcd.height = 2280 hw.lcd.depth = 16 hw.lcd.circular = false hw.lcd.density = 440 hw.lcd.backlight = true hw.lcd.vsync = 60 hw.mainKeys=no hw.trackBall=no hw.cpu.arch = x86_64 image.sysdir.1 = system-images/android-28/google_apis/x86_64/ tag.display = Google APIs tag.id = google_apis disk.dataPartition.size = 6442450944 ================================================ FILE: env_setup/screenshot.py ================================================ ''' Taking Screenshots with Appium I'll be using Python and a sample iOS application from Apple's Developer Library This tutorial assumes you understand how to run, launch, and interact with your application. ''' from appium import webdriver import os desired_capabilities = {} desired_capabilities['deviceName'] = 'Simulator' capabilities = dict( platformName='Android', automationName='uiautomator2', deviceName='Android', newCommandTimeout="120000", adbExecTimeout="120000", noReset=True, uiautomator2ServerInstallTimeout="120000", uiautomator2ServerLaunchTimeout="120000", uiautomator2ServerReadTimeout="120000", ) capabilities["udid"] = "emulator-5554" from appium.options.android import UiAutomator2Options options = UiAutomator2Options().load_capabilities(capabilities) directory = '%s/' % os.getcwd() appium_server_url = "http://0.0.0.0:4723" driver = webdriver.Remote(appium_server_url, options=options) file_name = 'screenshot.png' driver.save_screenshot(directory + file_name) print("screenshot saved to", directory + file_name) ================================================ FILE: multimachine/README.md ================================================ # Guidelines on Multi-Machine Emulation Parallel We support the feature to distributedly run environment on many machines and synchronize when all collections are done. The design works as below:

digirl-diagram

You need a host machine (with GPU) to synchronize the emulation process and train the agent, which subsequently sends the checkpoints to each worker machine (no GPU required) to simply interact with the environment to get trajectories. ## Hands-on Guideline **Make sure you're able to run the single-machine pipeline before you move on to multi-machine setup.** ### Configuring your cluster First, start 1 host machine with GPU support. Then start as many worker machines as you want, where no GPU is required on these worker machines. Then you need to make sure all **worker machines** support to interact with the emulator (the host machine does not interact with the environment, so no need to support this). We suggest using virtual machines, so you can copy the environment directly. Make sure that the host machine can access all worker machines by simply `ssh` them - this can be done by adding the public keys of the worker machines to the `~/.ssh/authorized_keys` file of the host machine. This step is **mandatory** because the host machine will send the checkpoint to the worker machines and get the collected trajectories from them. ### Code Setup Again, make sure you're able to run the single-machine pipeline on each machine before you move on. Now copy the source code you've modified (including all the individualized changes) to each and every machine (including both host and worker machine). Note that you must set `save_freq` to `1` in multi-machine training mode (which is already set for you). Then modify the paths containing `` on each machine according to their username, respectively. Then simply run ``` cd ./script python run.py --config-path config/multimachine --config-name host ``` The script will handle everything for you, you don't need to access any worker machine when running multi-machine emulation. Note that `bsize` in the configuration files means the number of parallel emulators on **each machine**. When there are multiple worker machines collecting trajectories parallely, the total number of parallel emulators will be the sum of `bsize` on each machine. ================================================ FILE: push.sh ================================================ git add . git commit -m "autopush" git push ================================================ FILE: requirements.txt ================================================ annotated-types==0.6.0 blis==0.7.11 brotlipy==0.7.0 catalogue==2.0.10 certifi cffi charset-normalizer click==8.1.7 cloudpathlib==0.16.0 cloudpickle==3.0.0 confection==0.1.3 contourpy==1.1.1 cryptography cycler==0.12.1 cymem==2.0.8 Farama-Notifications==0.0.4 fonttools==4.43.1 gym gym-notices gymnasium hashids==1.3.1 jericho==3.1.2 Jinja2==3.1.2 kiwisolver==1.4.5 langcodes==3.3.0 MarkupSafe==2.1.3 matplotlib==3.8.1 mementos==1.3.1 more-itertools==10.1.0 murmurhash==1.0.10 networkx==3.2.1 numpy packaging==23.2 Pillow pluggy preshed==3.0.9 prompt-toolkit==3.0.39 pycosat pycparser pydantic==2.4.2 pydantic_core==2.10.1 pyOpenSSL pyparsing==3.1.1 PySocks python-dateutil==2.8.2 requests ruamel.yaml ruamel.yaml.clib six smart-open==6.4.0 spacy==3.7.2 spacy-legacy==3.0.12 spacy-loggers==1.0.5 srsly==2.4.8 TatSu thinc==8.2.1 toolz tqdm typer==0.9.0 typing_extensions==4.8.0 urllib3 wasabi==1.1.2 wcwidth==0.2.9 weasel==0.3.3 zstandard hydra-core gradio torch transformers==4.37.2 accelerate peft openai wandb beautifulsoup4 sentencepiece pyinstrument memory_profiler appium-python-client tenacity termcolor google-generativeai jupyter ================================================ FILE: scripts/config/accelerate_config/default_config.yaml ================================================ compute_environment: LOCAL_MACHINE debug: true distributed_type: MULTI_GPU downcast_bf16: 'no' gpu_ids: 0,1,2,3 machine_rank: 0 main_training_function: main mixed_precision: 'no' num_machines: 1 num_processes: 4 rdzv_backend: static same_network: true tpu_env: [] tpu_use_cluster: false tpu_use_sudo: false use_cpu: false ================================================ FILE: scripts/config/cogagent/default.yaml ================================================ # =================== # ====== task ====== # =================== task_set: "general" # "general" or "webshop" task_split: "train" # "train" or "test" eval_sample_mode: "random" # "random" or "sequential" # max_steps of AitW General should be 10; of AitW Web Shopping should max_steps: 10 # =================== # ====== token ====== # =================== huggingface_token: '' wandb_key: '' gemini_key: '' # =================== # ====== placeholder (will not be used for cogagent) ====== # =================== policy_lm: '/home//Auto-UI-Base' critic_lm: 'roberta-base' capacity: 2000 # replay buffer size epochs: 5 # number of epochs for the critic each witeration batch_size: 4 grad_accum_steps: 32 warmup_iter: 0 # how many iterations to only collect data and evaluate before training actor_epochs: 20 # number of epochs for training the actor each iteration trajectory_critic_epochs: 5 lm_lr: 1e-4 critic_lr: 1e-4 max_grad_norm: 0.01 gamma: 0.5 # =================== # ====== algo ====== # =================== bsize: 1 # should be the same as the number of gradio urls provided for cogagent rollout_size: 16 # how many trajectories to collect between training iterations (should be multiple of bsize) # =================== # ====== agent ====== # =================== use_lora: False agent_name: "cogagent" # "autoui" or "cogagent" >>> the eval method must be "autoui" if train_algorithm is not none <<< do_sample: True temperature: 1.0 tau: 0.01 # soft update parameter max_new_tokens: 128 # =================== # ====== log ====== # =================== record: False # whether you want to record mp4 videos use_wandb: True entity_name: '' project_name: '' # =================== # ====== path ====== # =================== # query by: echo $ANDROID_AVD_HOME android_avd_home: '/home//.android/avd' # query by: which emulator emulator_path: '/home//.android/emulator/emulator' # query by: which adb adb_path: '/home//.android/platform-tools/adb' cache_dir: '/home//.cache' assets_path: '/home//digirl/digirl/environment/android/assets/task_set' ================================================ FILE: scripts/config/cogagent/eval_only.yaml ================================================ defaults: - default - _self_ # no train algorithm here save_path: '/home//logs/ckpts/general-off2on-cogagent/' # the interacted trajectories will be saved to this path run_name: 'cogagent-general-off2on' cogagent_url: - # training task_mode: "evaluate" # "train" or "evaluate" - "train" will do training when evaluating, "evaluate" will NOT do training parallel: "single" # "single" or "host" or "worker" # evaluation-only mode doesn't require any training # train_mode: "off2on" # "offline" or "online" or "off2on" # offline_data_path: "/home//logs/general-off2on-sft-trajectories.pt" # offline_actor_iterations: 30 # offline_critic_iterations: 20 # offline_trajectory_critic_iterations: 20 # train_iterations: 400 eval_iterations: 1000 save_freq: 3 ================================================ FILE: scripts/config/main/default.yaml ================================================ # =================== # ====== task ====== # =================== task_set: "general" # "general" or "webshop" task_split: "train" # "train" or "test" eval_sample_mode: "random" # "random" or "sequential" # max_steps of AitW General should be 10; of AitW Web Shopping should max_steps: 10 # =================== # ====== token ====== # =================== huggingface_token: '' wandb_key: '' gemini_key: '' # =================== # ====== algo ====== # =================== policy_lm: '/home//Auto-UI-Base' # do NOT modify this. To load existing checkpoints, modify `save_path` in children config files. critic_lm: 'roberta-base' capacity: 2000 # replay buffer size epochs: 5 # number of epochs for the critic each witeration batch_size: 4 bsize: 8 # number of emulators parallelly on the machine rollout_size: 16 # how many trajectories to collect between training iterations (should be multiple of bsize) grad_accum_steps: 32 warmup_iter: 0 # how many iterations to only collect data and evaluate before training actor_epochs: 20 # number of epochs for training the actor each iteration trajectory_critic_epochs: 5 lm_lr: 1e-4 critic_lr: 1e-4 max_grad_norm: 0.01 gamma: 0.5 # =================== # ====== agent ====== # =================== use_lora: False agent_name: "autoui" # "autoui" or "cogagent" >>> the eval method must be "autoui" if train_algorithm is not none <<< do_sample: True temperature: 1.0 tau: 0.01 #soft update parameter max_new_tokens: 128 # =================== # ====== log ====== # =================== record: False # whether you want to record mp4 videos use_wandb: True entity_name: '' project_name: '' # =================== # ====== path ====== # =================== # query by: echo $ANDROID_AVD_HOME android_avd_home: '/home//.android/avd' # query by: which emulator emulator_path: '/home//.android/emulator/emulator' # query by: which adb adb_path: '/home//.android/platform-tools/adb' cache_dir: '/home//.cache' assets_path: '/home//digirl/digirl/environment/android/assets/task_set' ================================================ FILE: scripts/config/main/digirl_off2on.yaml ================================================ defaults: - default - _self_ save_path: '/home//logs/digirl-general-off2on/' run_name: 'digirl-general-off2on' # training train_algorithm: "digirl" # "digirl" of "filteredbc" train_mode: "off2on" # "offline" or "online" or "off2on" task_mode: "train" # "train" or "evaluate" - "train" will do training when evaluating, "evaluate" will NOT do training parallel: "single" # "single" or "host" or "worker" # offline config, uncomment offline_data_path as long as train_model is offline or off2on offline_data_path: "/home//logs/general-off2on-sft-trajectories.pt" offline_actor_iterations: 30 offline_critic_iterations: 20 offline_trajectory_critic_iterations: 20 train_iterations: 400 eval_iterations: 1000 save_freq: 3 ================================================ FILE: scripts/config/main/digirl_offline.yaml ================================================ defaults: - default - _self_ save_path: '/home//logs/digirl-general-offline/' run_name: 'digirl-general-offline' # training train_algorithm: "digirl" # "digirl" of "filteredbc" train_mode: "offline" # "offline" or "online" or "off2on" task_mode: "train" # "train" or "evaluate" - "train" will do training when evaluating, "evaluate" will NOT do training parallel: "single" # "single" or "host" or "worker" # offline config, uncomment offline_data_path as long as train_model is offline or off2on offline_data_path: "/home//logs/general-off2on-sft-trajectories.pt" offline_actor_iterations: 30 offline_critic_iterations: 20 offline_trajectory_critic_iterations: 20 train_iterations: 0 eval_iterations: 1000 save_freq: 3 ================================================ FILE: scripts/config/main/digirl_online.yaml ================================================ defaults: - default - _self_ save_path: '/home//logs/digirl-general-online/' run_name: 'digirl-general-online' # training train_algorithm: "digirl" # "digirl" of "filteredbc" train_mode: "online" # "offline" or "online" or "off2on" task_mode: "train" # "train" or "evaluate" - "train" will do training when evaluating, "evaluate" will NOT do training parallel: "single" # "single" or "host" or "worker" # offline config, uncomment offline_data_path as long as train_model is offline or off2on # offline_data_path: "/home//logs/general-off2on-sft-trajectories.pt" offline_actor_iterations: 30 offline_critic_iterations: 20 offline_trajectory_critic_iterations: 20 train_iterations: 600 eval_iterations: 1000 save_freq: 3 ================================================ FILE: scripts/config/main/eval_only.yaml ================================================ defaults: - default - _self_ save_path: '/home//logs/ckpts/general-off2on-digirl/' run_name: 'autoui-general-eval-only' # training train_algorithm: "digirl" # "digirl" of "filteredbc", should be same as the evaluation checkpoint task_mode: "evaluate" # "train" or "evaluate" - "train" will do training when evaluating, "evaluate" will NOT do training parallel: "single" # "single" or "host" or "worker" # evaluation-only mode doesn't require any training # train_mode: "off2on" # "offline" or "online" or "off2on" # offline_data_path: "/home//logs/general-off2on-sft-trajectories.pt" # offline_actor_iterations: 30 # offline_critic_iterations: 20 # offline_trajectory_critic_iterations: 20 # train_iterations: 400 eval_iterations: 1000 save_freq: 3 ================================================ FILE: scripts/config/multimachine/default.yaml ================================================ # =================== # ====== task ====== # =================== task_set: "general" # "general" or "webshop" task_split: "train" # "train" or "test" eval_sample_mode: "random" # "random" or "sequential" # max_steps of AitW General should be 10; of AitW Web Shopping should max_steps: 10 # =================== # ====== token ====== # =================== huggingface_token: '' wandb_key: '' gemini_key: '' # =================== # ====== algo ====== # =================== policy_lm: '/home//Auto-UI-Base' critic_lm: 'roberta-base' capacity: 2000 # replay buffer size epochs: 5 # number of epochs for the critic each witeration batch_size: 4 bsize: 8 # number of emulators parallelly on the machine rollout_size: 16 # how many trajectories to collect between training iterations (should be multiple of bsize) grad_accum_steps: 32 warmup_iter: 0 # how many iterations to only collect data and evaluate before training actor_epochs: 20 # number of epochs for training the actor each iteration lm_lr: 1e-4 critic_lr: 1e-4 max_grad_norm: 0.01 gamma: 0.5 # =================== # ====== agent ====== # =================== use_lora: False agent_name: "autoui" # "autoui" or "cogagent" >>> the eval method must be "autoui" if train_algorithm is not none <<< do_sample: True temperature: 1.0 tau: 0.01 #soft update parameter max_new_tokens: 128 # =================== # ====== log ====== # =================== record: False # whether you want to record mp4 videos use_wandb: True entity_name: '' project_name: '' # =================== # ====== path ====== # =================== # query by: echo $ANDROID_AVD_HOME android_avd_home: '/home//.android/avd' # query by: which emulator emulator_path: '/home//.android/emulator/emulator' # query by: which adb adb_path: '/home//.android/platform-tools/adb' assets_path: '/home//digirl/digirl/environment/android/assets/task_set' cache_dir: '/home//.cache' # offline config, uncomment offline_data_path as long as train_model is offline or off2on offline_data_path: "/home//logs/general-off2on-sft-trajectories.pt" offline_actor_iterations: 30 offline_critic_iterations: 20 offline_trajectory_critic_iterations: 20 ================================================ FILE: scripts/config/multimachine/host.yaml ================================================ defaults: - default - _self_ parallel: "host" run_name: 'general-digirl-off2on-host' save_path: "/home//logs/multimachine" # path that saves checkpoint, trajectories, and images worker_temp_path: "/home//logs/worker" # MUST BE identical to save_path on the worker machine worker_run_path: "/home//digirl/scripts" # where the script dir is on the worker machine worker_ips: ["34.45.185.211"] worker_username: save_freq: 1 # training train_algorithm: "digirl" # "digirl" of "filteredbc" train_mode: "online" # "offline" or "online" or "off2on" task_mode: "train" # "train" or "evaluate" - "train" will do training when evaluating, "evaluate" will NOT do training train_iterations: 400 eval_iterations: 1000 ================================================ FILE: scripts/config/multimachine/worker.yaml ================================================ defaults: - default - _self_ # parallel parallel: "worker" # "single" or "host" or "worker" run_name: 'general-digirl-off2on-worker' save_path: '/home//logs/worker' # training train_algorithm: "digirl" # "digirl" of "filteredbc" train_mode: "online" # "offline" or "online" or "off2on" task_mode: "train" # "train" or "evaluate" - "train" will do training when evaluating, "evaluate" will NOT do training train_iterations: 1 eval_iterations: 0 ================================================ FILE: scripts/run.py ================================================ import transformers from tqdm import tqdm from digirl.environment import BatchedAndroidEnv from digirl.models import AutoUIAgent, CogAgent from digirl.algorithms import offpolicy_train_loop, eval_loop, worker_collect_loop from digirl.misc import colorful_print from digirl.environment.android import EndResultEvaluator from digirl.environment.android import autoui_translate_action, cogagent_translate_action import torch.nn as nn import numpy as np import wandb from omegaconf import DictConfig, OmegaConf import os import hydra from accelerate import Accelerator from datetime import timedelta from accelerate import DistributedDataParallelKwargs, InitProcessGroupKwargs transformers.logging.set_verbosity_error() import torch.distributed as dist import datetime def load_task_file(assets_path, task_set, task_split): all_tasks = [] with open(os.path.join(assets_path, task_set + "_" + task_split + ".txt")) as fb: for line in fb: all_tasks.append(line.replace("\n", "")) return all_tasks @hydra.main(version_base=None, config_path=None, config_name=None) def main(config: "DictConfig"): colorful_print(OmegaConf.to_yaml(config), fg='red') try: from huggingface_hub import login login(token=config.huggingface_token) except: print(">>> Huggingface token not found.") ddp_kwargs = DistributedDataParallelKwargs(find_unused_parameters=True) accelerator = Accelerator(InitProcessGroupKwargs(timeout=timedelta(minutes=40)), kwargs_handlers=[ddp_kwargs], project_dir = config.save_path) device = accelerator.device env = None if accelerator.is_main_process: # load environment all_tasks = load_task_file(config.assets_path, config.task_set, config.task_split) bsize = config.bsize base_port = 5554 evaluators = [EndResultEvaluator(config.gemini_key, config.task_set)] * bsize assert len(evaluators) == bsize if config.agent_name == "autoui": translate_action = autoui_translate_action use_feature_extractor = True elif config.agent_name == "cogagent": translate_action = cogagent_translate_action use_feature_extractor = False decode_f = lambda x:x if config.task_mode != "evaluate": assert config.agent_name == "autoui", "Only AutoUI agent is supported for training" colorful_print(">>> Agent: AutoUI", fg='blue') colorful_print(">>> Training algorithm: "+config.train_algorithm, fg='blue') colorful_print(">>> Training mode: "+config.train_mode, fg='blue') else: colorful_print(">>> Agent: "+config.agent_name, fg='blue') colorful_print(">>> Evauation mode", fg='blue') if config.agent_name == "autoui": agent = AutoUIAgent(device=device, accelerator=accelerator, temperature=config.temperature, do_sample=config.do_sample, policy_lm=config.policy_lm, critic_lm=config.critic_lm, cache_dir=config.cache_dir, max_new_tokens=config.max_new_tokens) tokenizer = agent.tokenizer elif config.agent_name == "cogagent": agent = CogAgent(url=config.cogagent_url) tokenizer = None else: raise NotImplementedError("Only AutoUI agent is supported for now") if config.use_wandb and accelerator.is_main_process: wandb.login(key=config.wandb_key) wandb.init(project=config.project_name, entity=config.entity_name, name=config.run_name, config=dict(config)) # this bunch of code should handle these functions: # |-- autoui # |-- online train (eval in the end) # |-- offline train (eval in the end) # |-- off2on train (eval in the end) # |-- eval-only # |-- cogagent (eval only) # |-- set-of-marks (eval only) # |-- appagent (eval only) def construct_env(sample_mode): env = BatchedAndroidEnv(avd_name="test_Android", cache_avd_names=[f"test{i}" for i in range(1,1+bsize)], android_avd_home=config.android_avd_home, emulator_path=config.emulator_path, adb_path=config.adb_path, udids = [f"emulator-{base_port+2*i}" for i in range(bsize)], max_steps=config.max_steps-1, # will have 1 dangling step after stop signal is triggered appium_base_port = base_port+1098, run_headless=True, use_feature_extractor=use_feature_extractor, device=accelerator.device, translate_action=translate_action, evaluators=evaluators, temp_path = os.path.join(config.save_path, "images"), save_images=True, all_tasks=all_tasks, task_split=config.task_split, sample_mode=sample_mode, record=config.record, ) return env # autoui will be trained first then evaluated if config.parallel in ["single", "host"]: if config.agent_name == "cogagent" or config.task_mode == "evaluate": if accelerator.is_main_process: env = construct_env(sample_mode=config.eval_sample_mode) eval_loop(env = env, tokenizer=tokenizer, agent = agent, accelerator = accelerator, decode_f=decode_f, **config) elif config.agent_name == "autoui": if accelerator.is_main_process: env = construct_env(sample_mode="random") offpolicy_train_loop(env = env, tokenizer=tokenizer, agent = agent, accelerator = accelerator, decode_f=decode_f, **config) # always do eval after training (unless this is only a worker machine to collect trajectories) if accelerator.is_main_process: env = construct_env(sample_mode=config.eval_sample_mode) eval_loop(env = env, tokenizer=tokenizer, agent = agent, accelerator = accelerator, decode_f=decode_f, **config) elif config.parallel == "worker": if accelerator.is_main_process: env = construct_env(sample_mode="random") worker_collect_loop(env = env, agent = agent, tokenizer=tokenizer, accelerator = accelerator, decode_f=decode_f, **config) if __name__ == "__main__": main() ================================================ FILE: setup.py ================================================ import setuptools # if sys.version_info < (3, 7): # sys.exit('Python>=3.7 is required by digirl.') setuptools.setup( name="digirl", version='0.1.0', url="https://github.com/DigiRL-agent/digirl", author=("Hao Bai"), description="Research code for digirl", long_description=open("README.md", "r", encoding='utf-8').read(), long_description_content_type="text/markdown", keywords='digirl', license='MIT', packages=setuptools.find_packages(), install_requires=open("requirements.txt", "r").read().split(), include_package_data=True, python_requires='>=3.9', classifiers=[ 'Intended Audience :: Science/Research', 'License :: OSI Approved :: MIT License', 'Programming Language :: Python :: 3', 'Topic :: Scientific/Engineering :: Artificial Intelligence', ], )